Fast and robust atproto CAR file processing in rust
14
fork

Configure Feed

Select the types of activity you want to include in your feed.

redb disk version

phil 3af074fe 72add79c

+833
+116
Cargo.lock
··· 126 126 ] 127 127 128 128 [[package]] 129 + name = "bincode" 130 + version = "2.0.1" 131 + source = "registry+https://github.com/rust-lang/crates.io-index" 132 + checksum = "36eaf5d7b090263e8150820482d5d93cd964a81e4019913c972f4edcc6edb740" 133 + dependencies = [ 134 + "bincode_derive", 135 + "serde", 136 + "unty", 137 + ] 138 + 139 + [[package]] 140 + name = "bincode_derive" 141 + version = "2.0.1" 142 + source = "registry+https://github.com/rust-lang/crates.io-index" 143 + checksum = "bf95709a440f45e986983918d0e8a1f30a9b1df04918fc828670606804ac3c09" 144 + dependencies = [ 145 + "virtue", 146 + ] 147 + 148 + [[package]] 129 149 name = "bitflags" 130 150 version = "2.9.4" 131 151 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 387 407 ] 388 408 389 409 [[package]] 410 + name = "fallible-iterator" 411 + version = "0.3.0" 412 + source = "registry+https://github.com/rust-lang/crates.io-index" 413 + checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649" 414 + 415 + [[package]] 416 + name = "fallible-streaming-iterator" 417 + version = "0.1.9" 418 + source = "registry+https://github.com/rust-lang/crates.io-index" 419 + checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" 420 + 421 + [[package]] 422 + name = "foldhash" 423 + version = "0.1.5" 424 + source = "registry+https://github.com/rust-lang/crates.io-index" 425 + checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" 426 + 427 + [[package]] 390 428 name = "futures" 391 429 version = "0.3.31" 392 430 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 493 531 ] 494 532 495 533 [[package]] 534 + name = "hashbrown" 535 + version = "0.15.5" 536 + source = "registry+https://github.com/rust-lang/crates.io-index" 537 + checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" 538 + dependencies = [ 539 + "foldhash", 540 + ] 541 + 542 + [[package]] 543 + name = "hashlink" 544 + version = "0.10.0" 545 + source = "registry+https://github.com/rust-lang/crates.io-index" 546 + checksum = "7382cf6263419f2d8df38c55d7da83da5c18aef87fc7a7fc1fb1e344edfe14c1" 547 + dependencies = [ 548 + "hashbrown", 549 + ] 550 + 551 + [[package]] 496 552 name = "heck" 497 553 version = "0.5.0" 498 554 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 598 654 checksum = "58f929b4d672ea937a23a1ab494143d968337a5f47e56d0815df1e0890ddf174" 599 655 600 656 [[package]] 657 + name = "libsqlite3-sys" 658 + version = "0.35.0" 659 + source = "registry+https://github.com/rust-lang/crates.io-index" 660 + checksum = "133c182a6a2c87864fe97778797e46c7e999672690dc9fa3ee8e241aa4a9c13f" 661 + dependencies = [ 662 + "pkg-config", 663 + "vcpkg", 664 + ] 665 + 666 + [[package]] 601 667 name = "lock_api" 602 668 version = "0.4.14" 603 669 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 744 810 checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" 745 811 746 812 [[package]] 813 + name = "pkg-config" 814 + version = "0.3.32" 815 + source = "registry+https://github.com/rust-lang/crates.io-index" 816 + checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" 817 + 818 + [[package]] 747 819 name = "plotters" 748 820 version = "0.3.7" 749 821 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 825 897 ] 826 898 827 899 [[package]] 900 + name = "redb" 901 + version = "3.1.0" 902 + source = "registry+https://github.com/rust-lang/crates.io-index" 903 + checksum = "ae323eb086579a3769daa2c753bb96deb95993c534711e0dbe881b5192906a06" 904 + dependencies = [ 905 + "libc", 906 + ] 907 + 908 + [[package]] 828 909 name = "redox_syscall" 829 910 version = "0.5.18" 830 911 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 866 947 name = "repo-stream" 867 948 version = "0.1.1" 868 949 dependencies = [ 950 + "bincode", 869 951 "clap", 870 952 "criterion", 871 953 "env_logger", ··· 875 957 "iroh-car", 876 958 "log", 877 959 "multibase", 960 + "redb", 961 + "rusqlite", 878 962 "serde", 879 963 "serde_bytes", 880 964 "serde_ipld_dagcbor", ··· 883 967 ] 884 968 885 969 [[package]] 970 + name = "rusqlite" 971 + version = "0.37.0" 972 + source = "registry+https://github.com/rust-lang/crates.io-index" 973 + checksum = "165ca6e57b20e1351573e3729b958bc62f0e48025386970b6e4d29e7a7e71f3f" 974 + dependencies = [ 975 + "bitflags", 976 + "fallible-iterator", 977 + "fallible-streaming-iterator", 978 + "hashlink", 979 + "libsqlite3-sys", 980 + "smallvec", 981 + ] 982 + 983 + [[package]] 886 984 name = "rustc-demangle" 887 985 version = "0.1.26" 888 986 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 1139 1237 checksum = "eb066959b24b5196ae73cb057f45598450d2c5f71460e98c49b738086eff9c06" 1140 1238 1141 1239 [[package]] 1240 + name = "unty" 1241 + version = "0.0.4" 1242 + source = "registry+https://github.com/rust-lang/crates.io-index" 1243 + checksum = "6d49784317cd0d1ee7ec5c716dd598ec5b4483ea832a2dced265471cc0f690ae" 1244 + 1245 + [[package]] 1142 1246 name = "utf8parse" 1143 1247 version = "0.2.2" 1144 1248 source = "registry+https://github.com/rust-lang/crates.io-index" 1145 1249 checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" 1250 + 1251 + [[package]] 1252 + name = "vcpkg" 1253 + version = "0.2.15" 1254 + source = "registry+https://github.com/rust-lang/crates.io-index" 1255 + checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" 1256 + 1257 + [[package]] 1258 + name = "virtue" 1259 + version = "0.0.18" 1260 + source = "registry+https://github.com/rust-lang/crates.io-index" 1261 + checksum = "051eb1abcf10076295e815102942cc58f9d5e3b4560e46e53c21e8ff6f3af7b1" 1146 1262 1147 1263 [[package]] 1148 1264 name = "walkdir"
+3
Cargo.toml
··· 7 7 repository = "https://tangled.org/@microcosm.blue/repo-stream" 8 8 9 9 [dependencies] 10 + bincode = { version = "2.0.1", features = ["serde"] } 10 11 futures = "0.3.31" 11 12 futures-core = "0.3.31" 12 13 ipld-core = { version = "0.4.2", features = ["serde"] } 13 14 iroh-car = "0.5.1" 14 15 log = "0.4.28" 15 16 multibase = "0.9.2" 17 + redb = "3.1.0" 18 + rusqlite = "0.37.0" 16 19 serde = { version = "1.0.228", features = ["derive"] } 17 20 serde_bytes = "0.11.19" 18 21 serde_ipld_dagcbor = "0.6.4"
+57
examples/disk-read-file/main.rs
··· 1 + extern crate repo_stream; 2 + use clap::Parser; 3 + use futures::TryStreamExt; 4 + use iroh_car::CarReader; 5 + use std::convert::Infallible; 6 + use std::path::PathBuf; 7 + 8 + type Result<T> = std::result::Result<T, Box<dyn std::error::Error>>; 9 + 10 + #[derive(Debug, Parser)] 11 + struct Args { 12 + #[arg()] 13 + car: PathBuf, 14 + #[arg()] 15 + tmpfile: PathBuf, 16 + } 17 + 18 + #[tokio::main] 19 + async fn main() -> Result<()> { 20 + env_logger::init(); 21 + 22 + let Args { car, tmpfile } = Args::parse(); 23 + let reader = tokio::fs::File::open(car).await?; 24 + let reader = tokio::io::BufReader::new(reader); 25 + 26 + println!("hello!"); 27 + 28 + let reader = CarReader::new(reader).await?; 29 + 30 + let redb_store = repo_stream::disk_redb::RedbStore::new(tmpfile)?; 31 + 32 + let root = reader 33 + .header() 34 + .roots() 35 + .first() 36 + .ok_or("missing root")? 37 + .clone(); 38 + log::debug!("root: {root:?}"); 39 + 40 + // let stream = Box::pin(reader.stream()); 41 + let stream = std::pin::pin!(reader.stream()); 42 + 43 + let (commit, v) = repo_stream::disk_drive::Vehicle::init(root, stream, redb_store, |block| { 44 + Ok::<_, Infallible>(block.len()) 45 + }) 46 + .await?; 47 + let mut record_stream = std::pin::pin!(v.stream()); 48 + 49 + log::info!("got commit: {commit:?}"); 50 + 51 + while let Some((rkey, _rec)) = record_stream.try_next().await? { 52 + log::info!("got {rkey:?}"); 53 + } 54 + log::info!("bye!"); 55 + 56 + Ok(()) 57 + }
+201
src/disk_drive.rs
··· 1 + use futures::Stream; 2 + use futures::TryStreamExt; 3 + use std::error::Error; 4 + 5 + use crate::disk_walk::{Step, Trip, Walker}; 6 + use crate::mst::Commit; 7 + use crate::mst::Node; 8 + 9 + use ipld_core::cid::Cid; 10 + use serde::{Deserialize, Serialize, de::DeserializeOwned}; 11 + 12 + /// Errors that can happen while consuming and emitting blocks and records 13 + #[derive(Debug, thiserror::Error)] 14 + pub enum DriveError { 15 + #[error("Failed to initialize CarReader: {0}")] 16 + CarReader(#[from] iroh_car::Error), 17 + #[error("Car block stream error: {0}")] 18 + CarBlockError(Box<dyn Error>), 19 + #[error("Failed to decode commit block: {0}")] 20 + BadCommit(Box<dyn Error>), 21 + #[error("The Commit block reference by the root was not found")] 22 + MissingCommit, 23 + #[error("The MST block {0} could not be found")] 24 + MissingBlock(Cid), 25 + #[error("Failed to walk the mst tree: {0}")] 26 + Tripped(#[from] Trip), 27 + } 28 + 29 + #[derive(Debug, Clone, Serialize, Deserialize)] 30 + pub enum MaybeProcessedBlock<T: Clone + Serialize> { 31 + /// A block that's *probably* a Node (but we can't know yet) 32 + /// 33 + /// It *can be* a record that suspiciously looks a lot like a node, so we 34 + /// cannot eagerly turn it into a Node. We only know for sure what it is 35 + /// when we actually walk down the MST 36 + Raw(Vec<u8>), 37 + /// A processed record from a block that was definitely not a Node 38 + /// 39 + /// If we _never_ needed this block, then we may have wasted a bit of effort 40 + /// trying to process it. Oh well. 41 + /// 42 + /// Processing has to be fallible because the CAR can have totally-unused 43 + /// blocks, which can just be garbage. since we're eagerly trying to process 44 + /// record blocks without knowing for sure that they *are* records, we 45 + /// discard any definitely-not-nodes that fail processing and keep their 46 + /// error in the buffer for them. if we later try to retreive them as a 47 + /// record, then we can surface the error. 48 + /// 49 + /// The error type is `String` because we don't really want to put 50 + /// any constraints like `Serialize` on the error type, and `Error` 51 + /// at least requires `Display`. It's a compromise. 52 + ProcessedOk(T), 53 + Unprocessable(String), 54 + } 55 + 56 + pub trait BlockStore<MPB: Serialize + DeserializeOwned> { 57 + fn put(&self, key: Cid, value: MPB); // unwraps for now 58 + fn get(&self, key: Cid) -> Option<MPB>; 59 + } 60 + 61 + type CarBlock<E> = Result<(Cid, Vec<u8>), E>; 62 + 63 + /// The core driver between the block stream and MST walker 64 + pub struct Vehicle<SE, S, T, BS, P, PE> 65 + where 66 + SE: Error + 'static, 67 + S: Stream<Item = CarBlock<SE>>, 68 + T: Clone + Serialize + DeserializeOwned, 69 + BS: BlockStore<MaybeProcessedBlock<T>>, 70 + P: Fn(&[u8]) -> Result<T, PE>, 71 + PE: Error, 72 + { 73 + block_stream: S, 74 + block_store: BS, 75 + walker: Walker, 76 + process: P, 77 + } 78 + 79 + impl<SE, S, T, BS, P, PE> Vehicle<SE, S, T, BS, P, PE> 80 + where 81 + SE: Error + 'static, 82 + S: Stream<Item = CarBlock<SE>> + Unpin, 83 + T: Clone + Serialize + DeserializeOwned, 84 + BS: BlockStore<MaybeProcessedBlock<T>>, 85 + P: Fn(&[u8]) -> Result<T, PE>, 86 + PE: Error, 87 + { 88 + /// Set up the stream 89 + /// 90 + /// This will eagerly consume blocks until the `Commit` object is found. 91 + /// *Usually* the it's the first block, but there is no guarantee. 92 + /// 93 + /// ### Parameters 94 + /// 95 + /// `root`: CID of the commit object that is the root of the MST 96 + /// 97 + /// `block_stream`: Input stream of raw CAR blocks 98 + /// 99 + /// `process`: record-transforming callback: 100 + /// 101 + /// For tasks where records can be quickly processed into a *smaller* 102 + /// useful representation, you can do that eagerly as blocks come in by 103 + /// passing the processor as a callback here. This can reduce overall 104 + /// memory usage. 105 + pub async fn init( 106 + root: Cid, 107 + mut block_stream: S, 108 + block_store: BS, 109 + process: P, 110 + ) -> Result<(Commit, Self), DriveError> { 111 + let mut commit = None; 112 + 113 + while let Some((cid, data)) = block_stream 114 + .try_next() 115 + .await 116 + .map_err(|e| DriveError::CarBlockError(e.into()))? 117 + { 118 + if cid == root { 119 + let c: Commit = serde_ipld_dagcbor::from_slice(&data) 120 + .map_err(|e| DriveError::BadCommit(e.into()))?; 121 + commit = Some(c); 122 + break; 123 + } else { 124 + block_store.put( 125 + cid, 126 + if Node::could_be(&data) { 127 + MaybeProcessedBlock::Raw(data) 128 + } else { 129 + match process(&data) { 130 + Ok(t) => MaybeProcessedBlock::ProcessedOk(t), 131 + Err(e) => MaybeProcessedBlock::Unprocessable(e.to_string()), 132 + } 133 + }, 134 + ); 135 + } 136 + } 137 + 138 + // we either broke out or read all the blocks without finding the commit... 139 + let commit = commit.ok_or(DriveError::MissingCommit)?; 140 + 141 + let walker = Walker::new(commit.data); 142 + 143 + let me = Self { 144 + block_stream, 145 + block_store, 146 + walker, 147 + process, 148 + }; 149 + Ok((commit, me)) 150 + } 151 + 152 + async fn drive_until(&mut self, cid_needed: Cid) -> Result<(), DriveError> { 153 + while let Some((cid, data)) = self 154 + .block_stream 155 + .try_next() 156 + .await 157 + .map_err(|e| DriveError::CarBlockError(e.into()))? 158 + { 159 + self.block_store.put( 160 + cid, 161 + if Node::could_be(&data) { 162 + MaybeProcessedBlock::Raw(data) 163 + } else { 164 + match (self.process)(&data) { 165 + Ok(t) => MaybeProcessedBlock::ProcessedOk(t), 166 + Err(e) => MaybeProcessedBlock::Unprocessable(e.to_string()), 167 + } 168 + }, 169 + ); 170 + if cid == cid_needed { 171 + return Ok(()); 172 + } 173 + } 174 + 175 + // if we never found the block 176 + Err(DriveError::MissingBlock(cid_needed)) 177 + } 178 + 179 + /// Manually step through the record outputs 180 + pub async fn next_record(&mut self) -> Result<Option<(String, T)>, DriveError> { 181 + loop { 182 + // walk as far as we can until we run out of blocks or find a record 183 + let cid_needed = match self.walker.step(&mut self.block_store, &self.process)? { 184 + Step::Rest(cid) => cid, 185 + Step::Finish => return Ok(None), 186 + Step::Step { rkey, data } => return Ok(Some((rkey, data))), 187 + }; 188 + 189 + // load blocks until we reach that cid 190 + self.drive_until(cid_needed).await?; 191 + } 192 + } 193 + 194 + /// Convert to a futures::stream of record outputs 195 + pub fn stream(self) -> impl Stream<Item = Result<(String, T), DriveError>> { 196 + futures::stream::try_unfold(self, |mut this| async move { 197 + let maybe_record = this.next_record().await?; 198 + Ok(maybe_record.map(|b| (b, this))) 199 + }) 200 + } 201 + }
+50
src/disk_redb.rs
··· 1 + use crate::disk_drive::BlockStore; 2 + use ipld_core::cid::Cid; 3 + use redb::{Database, Error, ReadableDatabase, TableDefinition}; 4 + use serde::{Serialize, de::DeserializeOwned}; 5 + use std::path::Path; 6 + 7 + const TABLE: TableDefinition<&[u8], &[u8]> = TableDefinition::new("blocks"); 8 + 9 + pub struct RedbStore { 10 + db: Database, 11 + } 12 + 13 + impl RedbStore { 14 + pub fn new(path: impl AsRef<Path>) -> Result<Self, Error> { 15 + let db = Database::create(path)?; 16 + Ok(Self { db }) 17 + } 18 + } 19 + 20 + // TODO: clean up on drop 21 + 22 + impl<MPB: Serialize + DeserializeOwned> BlockStore<MPB> for RedbStore { 23 + fn put(&self, c: Cid, t: MPB) { 24 + let key_bytes = c.to_bytes(); 25 + let val_bytes = bincode::serde::encode_to_vec(t, bincode::config::standard()).unwrap(); 26 + 27 + let mut tx = self.db.begin_write().unwrap(); 28 + tx.set_durability(redb::Durability::None).unwrap(); 29 + { 30 + let mut table = tx.open_table(TABLE).unwrap(); 31 + table.insert(&*key_bytes, &*val_bytes).unwrap(); 32 + } 33 + tx.commit().unwrap(); 34 + } 35 + fn get(&self, c: Cid) -> Option<MPB> { 36 + let key_bytes = c.to_bytes(); 37 + let tx = self.db.begin_read().unwrap(); 38 + let table = match tx.open_table(TABLE) { 39 + Ok(t) => t, 40 + Err(redb::TableError::TableDoesNotExist(_)) => return None, 41 + e => e.unwrap(), 42 + }; 43 + let maybe_val_bytes = table.get(&*key_bytes).unwrap()?; 44 + let (t, n): (MPB, usize) = 45 + bincode::serde::decode_from_slice(maybe_val_bytes.value(), bincode::config::standard()) 46 + .unwrap(); 47 + assert_eq!(maybe_val_bytes.value().len(), n); 48 + Some(t) 49 + } 50 + }
+403
src/disk_walk.rs
··· 1 + //! Depth-first MST traversal 2 + 3 + use crate::disk_drive::{BlockStore, MaybeProcessedBlock}; 4 + use crate::mst::Node; 5 + 6 + use ipld_core::cid::Cid; 7 + use serde::{Serialize, de::DeserializeOwned}; 8 + use std::error::Error; 9 + 10 + /// Errors that can happen while walking 11 + #[derive(Debug, thiserror::Error)] 12 + pub enum Trip { 13 + #[error("empty mst nodes are not allowed")] 14 + NodeEmpty, 15 + #[error("Failed to decode commit block: {0}")] 16 + BadCommit(Box<dyn std::error::Error>), 17 + #[error("Action node error: {0}")] 18 + RkeyError(#[from] RkeyError), 19 + #[error("Process failed: {0}")] 20 + ProcessFailed(String), 21 + #[error("Encountered an rkey out of order while walking the MST")] 22 + RkeyOutOfOrder, 23 + } 24 + 25 + /// Errors from invalid Rkeys 26 + #[derive(Debug, thiserror::Error)] 27 + pub enum RkeyError { 28 + #[error("Failed to compute an rkey due to invalid prefix_len")] 29 + EntryPrefixOutOfbounds, 30 + #[error("RKey was not utf-8")] 31 + EntryRkeyNotUtf8(#[from] std::string::FromUtf8Error), 32 + } 33 + 34 + /// Walker outputs 35 + #[derive(Debug)] 36 + pub enum Step<T: Serialize + DeserializeOwned> { 37 + /// We need a CID but it's not in the block store 38 + /// 39 + /// Give the needed CID to the driver so it can load blocks until it's found 40 + Rest(Cid), 41 + /// Reached the end of the MST! yay! 42 + Finish, 43 + /// A record was found! 44 + Step { rkey: String, data: T }, 45 + } 46 + 47 + #[derive(Debug, Clone, PartialEq)] 48 + enum Need { 49 + Node(Cid), 50 + Record { rkey: String, cid: Cid }, 51 + } 52 + 53 + fn push_from_node(stack: &mut Vec<Need>, node: &Node) -> Result<(), RkeyError> { 54 + let mut entries = Vec::with_capacity(node.entries.len()); 55 + 56 + let mut prefix = vec![]; 57 + for entry in &node.entries { 58 + let mut rkey = vec![]; 59 + let pre_checked = prefix 60 + .get(..entry.prefix_len) 61 + .ok_or(RkeyError::EntryPrefixOutOfbounds)?; 62 + rkey.extend_from_slice(pre_checked); 63 + rkey.extend_from_slice(&entry.keysuffix); 64 + prefix = rkey.clone(); 65 + 66 + entries.push(Need::Record { 67 + rkey: String::from_utf8(rkey)?, 68 + cid: entry.value, 69 + }); 70 + if let Some(ref tree) = entry.tree { 71 + entries.push(Need::Node(*tree)); 72 + } 73 + } 74 + 75 + entries.reverse(); 76 + stack.append(&mut entries); 77 + 78 + if let Some(tree) = node.left { 79 + stack.push(Need::Node(tree)); 80 + } 81 + Ok(()) 82 + } 83 + 84 + /// Traverser of an atproto MST 85 + /// 86 + /// Walks the tree from left-to-right in depth-first order 87 + #[derive(Debug)] 88 + pub struct Walker { 89 + stack: Vec<Need>, 90 + prev: String, 91 + } 92 + 93 + impl Walker { 94 + pub fn new(tree_root_cid: Cid) -> Self { 95 + Self { 96 + stack: vec![Need::Node(tree_root_cid)], 97 + prev: "".to_string(), 98 + } 99 + } 100 + 101 + /// Advance through nodes until we find a record or can't go further 102 + pub fn step<T: Clone + Serialize + DeserializeOwned, E: Error>( 103 + &mut self, 104 + block_store: &mut impl BlockStore<MaybeProcessedBlock<T>>, 105 + process: impl Fn(&[u8]) -> Result<T, E>, 106 + ) -> Result<Step<T>, Trip> { 107 + loop { 108 + let Some(mut need) = self.stack.last() else { 109 + log::trace!("tried to walk but we're actually done."); 110 + return Ok(Step::Finish); 111 + }; 112 + 113 + match &mut need { 114 + Need::Node(cid) => { 115 + log::trace!("need node {cid:?}"); 116 + let Some(mpb) = block_store.get(*cid) else { 117 + log::trace!("node not found, resting"); 118 + return Ok(Step::Rest(*cid)); 119 + }; 120 + 121 + let MaybeProcessedBlock::<T>::Raw(block) = mpb else { 122 + return Err(Trip::BadCommit("failed commit fingerprint".into())); 123 + }; 124 + let node = serde_ipld_dagcbor::from_slice::<Node>(&block) 125 + .map_err(|e| Trip::BadCommit(e.into()))?; 126 + 127 + // found node, make sure we remember 128 + self.stack.pop(); 129 + 130 + // queue up work on the found node next 131 + push_from_node(&mut self.stack, &node)?; 132 + } 133 + Need::Record { rkey, cid } => { 134 + log::trace!("need record {cid:?}"); 135 + let Some(mpb) = block_store.get(*cid) else { 136 + log::trace!("record block not found, resting"); 137 + return Ok(Step::Rest(*cid)); 138 + }; 139 + let rkey = rkey.clone(); 140 + let data = match mpb { 141 + MaybeProcessedBlock::Raw(data) => match process(&data) { 142 + Ok(t) => Ok(t), 143 + Err(e) => Err(Trip::ProcessFailed(e.to_string())), 144 + }, 145 + MaybeProcessedBlock::ProcessedOk(t) => Ok(t.clone()), 146 + MaybeProcessedBlock::Unprocessable(s) => { 147 + return Err(Trip::ProcessFailed(s.clone())); 148 + } 149 + }; 150 + 151 + // found node, make sure we remember 152 + self.stack.pop(); 153 + 154 + log::trace!("emitting a block as a step. depth={}", self.stack.len()); 155 + 156 + let data = data.map_err(|e| Trip::ProcessFailed(e.to_string()))?; 157 + 158 + // rkeys *must* be in order or else the tree is invalid (or 159 + // we have a bug) 160 + if rkey <= self.prev { 161 + return Err(Trip::RkeyOutOfOrder); 162 + } 163 + self.prev = rkey.clone(); 164 + 165 + return Ok(Step::Step { rkey, data }); 166 + } 167 + } 168 + } 169 + } 170 + } 171 + 172 + #[cfg(test)] 173 + mod test { 174 + use super::*; 175 + // use crate::mst::Entry; 176 + 177 + fn cid1() -> Cid { 178 + "bafyreihixenvk3ahqbytas4hk4a26w43bh6eo3w6usjqtxkpzsvi655a3m" 179 + .parse() 180 + .unwrap() 181 + } 182 + // fn cid2() -> Cid { 183 + // "QmY7Yh4UquoXHLPFo2XbhXkhBvFoPwmQUSa92pxnxjQuPU" 184 + // .parse() 185 + // .unwrap() 186 + // } 187 + // fn cid3() -> Cid { 188 + // "bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi" 189 + // .parse() 190 + // .unwrap() 191 + // } 192 + // fn cid4() -> Cid { 193 + // "QmbWqxBEKC3P8tqsKc98xmWNzrzDtRLMiMPL8wBuTGsMnR" 194 + // .parse() 195 + // .unwrap() 196 + // } 197 + // fn cid5() -> Cid { 198 + // "QmSnuWmxptJZdLJpKRarxBMS2Ju2oANVrgbr2xWbie9b2D" 199 + // .parse() 200 + // .unwrap() 201 + // } 202 + // fn cid6() -> Cid { 203 + // "QmdmQXB2mzChmMeKY47C43LxUdg1NDJ5MWcKMKxDu7RgQm" 204 + // .parse() 205 + // .unwrap() 206 + // } 207 + // fn cid7() -> Cid { 208 + // "bafybeiaysi4s6lnjev27ln5icwm6tueaw2vdykrtjkwiphwekaywqhcjze" 209 + // .parse() 210 + // .unwrap() 211 + // } 212 + // fn cid8() -> Cid { 213 + // "bafyreif3tfdpr5n4jdrbielmcapwvbpcthepfkwq2vwonmlhirbjmotedi" 214 + // .parse() 215 + // .unwrap() 216 + // } 217 + // fn cid9() -> Cid { 218 + // "bafyreicnokmhmrnlp2wjhyk2haep4tqxiptwfrp2rrs7rzq7uk766chqvq" 219 + // .parse() 220 + // .unwrap() 221 + // } 222 + 223 + #[test] 224 + fn test_next_from_node_empty() { 225 + let node = Node { 226 + left: None, 227 + entries: vec![], 228 + }; 229 + let mut stack = vec![]; 230 + push_from_node(&mut stack, &node).unwrap(); 231 + assert_eq!(stack.last(), None); 232 + } 233 + 234 + #[test] 235 + fn test_needs_from_node_just_left() { 236 + let node = Node { 237 + left: Some(cid1()), 238 + entries: vec![], 239 + }; 240 + let mut stack = vec![]; 241 + push_from_node(&mut stack, &node).unwrap(); 242 + assert_eq!(stack.last(), Some(Need::Node(cid1())).as_ref()); 243 + } 244 + 245 + // #[test] 246 + // fn test_needs_from_node_just_one_record() { 247 + // let node = Node { 248 + // left: None, 249 + // entries: vec![Entry { 250 + // keysuffix: "asdf".into(), 251 + // prefix_len: 0, 252 + // value: cid1(), 253 + // tree: None, 254 + // }], 255 + // }; 256 + // assert_eq!( 257 + // needs_from_node(node).unwrap(), 258 + // vec![Need::Record { 259 + // rkey: "asdf".into(), 260 + // cid: cid1(), 261 + // },] 262 + // ); 263 + // } 264 + 265 + // #[test] 266 + // fn test_needs_from_node_two_records() { 267 + // let node = Node { 268 + // left: None, 269 + // entries: vec![ 270 + // Entry { 271 + // keysuffix: "asdf".into(), 272 + // prefix_len: 0, 273 + // value: cid1(), 274 + // tree: None, 275 + // }, 276 + // Entry { 277 + // keysuffix: "gh".into(), 278 + // prefix_len: 2, 279 + // value: cid2(), 280 + // tree: None, 281 + // }, 282 + // ], 283 + // }; 284 + // assert_eq!( 285 + // needs_from_node(node).unwrap(), 286 + // vec![ 287 + // Need::Record { 288 + // rkey: "asdf".into(), 289 + // cid: cid1(), 290 + // }, 291 + // Need::Record { 292 + // rkey: "asgh".into(), 293 + // cid: cid2(), 294 + // }, 295 + // ] 296 + // ); 297 + // } 298 + 299 + // #[test] 300 + // fn test_needs_from_node_with_both() { 301 + // let node = Node { 302 + // left: None, 303 + // entries: vec![Entry { 304 + // keysuffix: "asdf".into(), 305 + // prefix_len: 0, 306 + // value: cid1(), 307 + // tree: Some(cid2()), 308 + // }], 309 + // }; 310 + // assert_eq!( 311 + // needs_from_node(node).unwrap(), 312 + // vec![ 313 + // Need::Record { 314 + // rkey: "asdf".into(), 315 + // cid: cid1(), 316 + // }, 317 + // Need::Node(cid2()), 318 + // ] 319 + // ); 320 + // } 321 + 322 + // #[test] 323 + // fn test_needs_from_node_left_and_record() { 324 + // let node = Node { 325 + // left: Some(cid1()), 326 + // entries: vec![Entry { 327 + // keysuffix: "asdf".into(), 328 + // prefix_len: 0, 329 + // value: cid2(), 330 + // tree: None, 331 + // }], 332 + // }; 333 + // assert_eq!( 334 + // needs_from_node(node).unwrap(), 335 + // vec![ 336 + // Need::Node(cid1()), 337 + // Need::Record { 338 + // rkey: "asdf".into(), 339 + // cid: cid2(), 340 + // }, 341 + // ] 342 + // ); 343 + // } 344 + 345 + // #[test] 346 + // fn test_needs_from_full_node() { 347 + // let node = Node { 348 + // left: Some(cid1()), 349 + // entries: vec![ 350 + // Entry { 351 + // keysuffix: "asdf".into(), 352 + // prefix_len: 0, 353 + // value: cid2(), 354 + // tree: Some(cid3()), 355 + // }, 356 + // Entry { 357 + // keysuffix: "ghi".into(), 358 + // prefix_len: 1, 359 + // value: cid4(), 360 + // tree: Some(cid5()), 361 + // }, 362 + // Entry { 363 + // keysuffix: "jkl".into(), 364 + // prefix_len: 2, 365 + // value: cid6(), 366 + // tree: Some(cid7()), 367 + // }, 368 + // Entry { 369 + // keysuffix: "mno".into(), 370 + // prefix_len: 4, 371 + // value: cid8(), 372 + // tree: Some(cid9()), 373 + // }, 374 + // ], 375 + // }; 376 + // assert_eq!( 377 + // needs_from_node(node).unwrap(), 378 + // vec![ 379 + // Need::Node(cid1()), 380 + // Need::Record { 381 + // rkey: "asdf".into(), 382 + // cid: cid2(), 383 + // }, 384 + // Need::Node(cid3()), 385 + // Need::Record { 386 + // rkey: "aghi".into(), 387 + // cid: cid4(), 388 + // }, 389 + // Need::Node(cid5()), 390 + // Need::Record { 391 + // rkey: "agjkl".into(), 392 + // cid: cid6(), 393 + // }, 394 + // Need::Node(cid7()), 395 + // Need::Record { 396 + // rkey: "agjkmno".into(), 397 + // cid: cid8(), 398 + // }, 399 + // Need::Node(cid9()), 400 + // ] 401 + // ); 402 + // } 403 + }
+3
src/lib.rs
··· 2 2 //! 3 3 //! For now see the [examples](https://tangled.org/@microcosm.blue/repo-stream/tree/main/examples) 4 4 5 + pub mod disk_drive; 6 + pub mod disk_redb; 7 + pub mod disk_walk; 5 8 pub mod drive; 6 9 pub mod mst; 7 10 pub mod walk;