Fast and robust atproto CAR file processing in rust
14
fork

Configure Feed

Select the types of activity you want to include in your feed.

initial slice api

phil b31e2808 5f249f1f

+236
+2
src/lib.rs
··· 77 77 pub mod disk; 78 78 pub mod mem; 79 79 pub mod mst; 80 + pub mod slice; 80 81 pub mod walk; 81 82 82 83 pub use disk::{DiskBuilder, DiskDriver, DiskError, DiskStore, DriveError}; 83 84 pub use mem::{DriverBuilder, LoadError, MemCar, PartialCar}; 84 85 pub use mst::Commit; 86 + pub use slice::{SliceError, SliceProof, SliceWalker}; 85 87 pub use walk::{MstError, Output, WalkError, WalkItem, noop}; 86 88 87 89 pub type Bytes = Vec<u8>;
+234
src/slice.rs
··· 1 + //! Proven-range walking of CAR slices 2 + 3 + use crate::{ 4 + RepoPath, 5 + mem::MemCar, 6 + walk::{Output, WalkError, WalkItem}, 7 + }; 8 + use cid::Cid; 9 + use std::ops::{Bound, RangeBounds}; 10 + 11 + /// Errors from [`MemCar::walk_slice`] 12 + #[derive(Debug, thiserror::Error)] 13 + pub enum SliceError { 14 + #[error("walk error: {0}")] 15 + Walk(#[from] WalkError), 16 + /// A record within the requested range has no block in the CAR 17 + #[error("record block absent within range: key={key:?} cid={cid}")] 18 + IncompleteRange { key: RepoPath, cid: Cid }, 19 + /// An MST node block is absent within the requested range 20 + #[error("MST node block absent within range: cid={cid}")] 21 + MissingNode { cid: Cid }, 22 + /// Proof failed: preceding key does not bound the lower end of the range 23 + #[error("preceding key {preceding:?} violates lower bound")] 24 + BadPrecedingKey { preceding: RepoPath }, 25 + /// Proof failed: following key does not bound the upper end of the range 26 + #[error("following key {following:?} violates upper bound")] 27 + BadFollowingKey { following: RepoPath }, 28 + } 29 + 30 + /// Proof that the walked range is complete. 31 + /// 32 + /// Returned by [`SliceWalker::finish`]. 33 + pub struct SliceProof { 34 + /// Key immediately before the lower bound in the full tree, 35 + /// or `None` if the range starts at the tree's leftmost record. 36 + pub preceding_key: Option<RepoPath>, 37 + /// Key immediately after the upper bound in the full tree, 38 + /// or `None` if the range ends at the tree's rightmost record. 39 + pub following_key: Option<RepoPath>, 40 + } 41 + 42 + enum SliceState { 43 + Before, 44 + In, 45 + Done, 46 + } 47 + 48 + /// Iterator-like walker over a proven range of the MST. 49 + /// 50 + /// Created by [`MemCar::walk_slice`]. Call [`SliceWalker::next`] to yield 51 + /// records, then [`SliceWalker::finish`] to validate the proof. 52 + pub struct SliceWalker<'a> { 53 + mem_car: &'a mut MemCar, 54 + lower: Bound<String>, 55 + upper: Bound<String>, 56 + preceding_key: Option<RepoPath>, 57 + following_key: Option<RepoPath>, 58 + state: SliceState, 59 + } 60 + 61 + impl SliceWalker<'_> { 62 + /// Yield the next in-range record. 63 + /// 64 + /// Transparently skips boundary items outside the range. Returns 65 + /// `Ok(None)` when the range is exhausted. Errors on any missing block 66 + /// within the range, or on an MST node absent after the first in-range 67 + /// record (which would leave the range unproven). 68 + /// Yield the next in-range record. 69 + /// 70 + /// Transparently skips boundary items outside the range. Returns 71 + /// `Ok(None)` when the range is exhausted — proof validation runs 72 + /// automatically before returning `None`, so the `while let` pattern 73 + /// is sufficient: 74 + /// 75 + /// ```ignore 76 + /// while let Some(output) = walker.next()? { ... } 77 + /// // proof has been validated; any violation surfaces as Err before None 78 + /// ``` 79 + /// 80 + /// Errors on any missing block within the range, on an MST node absent 81 + /// after the first in-range record, or on a proof violation. 82 + pub fn next(&mut self) -> Result<Option<Output>, SliceError> { 83 + if matches!(self.state, SliceState::Done) { 84 + return Ok(None); 85 + } 86 + loop { 87 + match self.mem_car.next()? { 88 + None => { 89 + self.state = SliceState::Done; 90 + validate_lower(self.preceding_key.as_deref(), &self.lower)?; 91 + validate_upper(self.following_key.as_deref(), &self.upper)?; 92 + return Ok(None); 93 + } 94 + Some(WalkItem::MissingSubtree { cid }) => { 95 + if matches!(self.state, SliceState::In) { 96 + return Err(SliceError::MissingNode { cid }); 97 + } 98 + // Before: boundary subtree outside the range, skip 99 + } 100 + Some(WalkItem::MissingRecord { key, cid }) => { 101 + if is_before(&key, &self.lower) { 102 + self.preceding_key = Some(key); 103 + } else if is_after(&key, &self.upper) { 104 + self.following_key = Some(key); 105 + self.state = SliceState::Done; 106 + validate_lower(self.preceding_key.as_deref(), &self.lower)?; 107 + validate_upper(self.following_key.as_deref(), &self.upper)?; 108 + return Ok(None); 109 + } else { 110 + return Err(SliceError::IncompleteRange { key, cid }); 111 + } 112 + } 113 + Some(WalkItem::Record(out)) => { 114 + if is_before(&out.key, &self.lower) { 115 + self.preceding_key = Some(out.key); 116 + } else if is_after(&out.key, &self.upper) { 117 + self.following_key = Some(out.key); 118 + self.state = SliceState::Done; 119 + validate_lower(self.preceding_key.as_deref(), &self.lower)?; 120 + validate_upper(self.following_key.as_deref(), &self.upper)?; 121 + return Ok(None); 122 + } else { 123 + self.state = SliceState::In; 124 + return Ok(Some(out)); 125 + } 126 + } 127 + } 128 + } 129 + } 130 + 131 + /// Drive any remaining walk to completion and return the proof keys. 132 + /// 133 + /// Useful when breaking out of the [`next`] loop early and still wanting 134 + /// the proof. Drives remaining boundary items (O(log n) at most), with 135 + /// proof validation happening inside `next` as usual. 136 + pub fn finish(mut self) -> Result<SliceProof, SliceError> { 137 + while self.next()?.is_some() {} 138 + Ok(SliceProof { 139 + preceding_key: self.preceding_key, 140 + following_key: self.following_key, 141 + }) 142 + } 143 + } 144 + 145 + impl MemCar { 146 + /// Walk a proven range of the MST. 147 + /// 148 + /// Returns a [`SliceWalker`] that yields records within `range` in key 149 + /// order. After the loop, call [`SliceWalker::finish`] to validate that 150 + /// the adjacent keys bound the range correctly. 151 + /// 152 + /// Accepts standard Rust range expressions: 153 + /// - `"a".."b"` — exclusive upper bound 154 + /// - `"a"..="b"` — inclusive upper bound 155 + /// - `"a"..` — from `a` to end of tree 156 + /// - `.."b"` — from start of tree to just before `b` 157 + /// - `..` — entire tree 158 + pub fn walk_slice<'r>(&mut self, range: impl RangeBounds<&'r str>) -> SliceWalker<'_> { 159 + let lower = bound_to_owned(range.start_bound()); 160 + let upper = bound_to_owned(range.end_bound()); 161 + SliceWalker { 162 + mem_car: self, 163 + lower, 164 + upper, 165 + preceding_key: None, 166 + following_key: None, 167 + state: SliceState::Before, 168 + } 169 + } 170 + } 171 + 172 + // --------------------------------------------------------------------------- 173 + // Helpers 174 + // --------------------------------------------------------------------------- 175 + 176 + fn bound_to_owned(b: Bound<&&str>) -> Bound<String> { 177 + match b { 178 + Bound::Unbounded => Bound::Unbounded, 179 + Bound::Included(s) => Bound::Included((*s).to_owned()), 180 + Bound::Excluded(s) => Bound::Excluded((*s).to_owned()), 181 + } 182 + } 183 + 184 + fn is_before(key: &str, lower: &Bound<String>) -> bool { 185 + match lower { 186 + Bound::Unbounded => false, 187 + Bound::Included(l) => key < l.as_str(), 188 + Bound::Excluded(l) => key <= l.as_str(), 189 + } 190 + } 191 + 192 + fn is_after(key: &str, upper: &Bound<String>) -> bool { 193 + match upper { 194 + Bound::Unbounded => false, 195 + Bound::Included(u) => key > u.as_str(), 196 + Bound::Excluded(u) => key >= u.as_str(), 197 + } 198 + } 199 + 200 + fn validate_lower(preceding: Option<&str>, lower: &Bound<String>) -> Result<(), SliceError> { 201 + let ok = match (preceding, lower) { 202 + (None, _) => true, 203 + (Some(p), Bound::Unbounded) => { 204 + unreachable!("is_before always false for Unbounded, but got {p:?}") 205 + } 206 + (Some(p), Bound::Included(l)) => p < l.as_str(), 207 + (Some(p), Bound::Excluded(l)) => p <= l.as_str(), 208 + }; 209 + if ok { 210 + Ok(()) 211 + } else { 212 + Err(SliceError::BadPrecedingKey { 213 + preceding: preceding.unwrap().to_owned(), 214 + }) 215 + } 216 + } 217 + 218 + fn validate_upper(following: Option<&str>, upper: &Bound<String>) -> Result<(), SliceError> { 219 + let ok = match (following, upper) { 220 + (None, _) => true, 221 + (Some(f), Bound::Unbounded) => { 222 + unreachable!("is_after always false for Unbounded, but got {f:?}") 223 + } 224 + (Some(f), Bound::Included(u)) => f > u.as_str(), 225 + (Some(f), Bound::Excluded(u)) => f >= u.as_str(), 226 + }; 227 + if ok { 228 + Ok(()) 229 + } else { 230 + Err(SliceError::BadFollowingKey { 231 + following: following.unwrap().to_owned(), 232 + }) 233 + } 234 + }