Fast and robust atproto CAR file processing in rust
14
fork

Configure Feed

Select the types of activity you want to include in your feed.

simplify slice walker by scanning on construction

phil 87079eaf b31e2808

+168 -73
+168 -73
src/slice.rs
··· 8 8 use cid::Cid; 9 9 use std::ops::{Bound, RangeBounds}; 10 10 11 - /// Errors from [`MemCar::walk_slice`] 11 + /// Errors from [`MemCar::walk_slice`] and friends 12 12 #[derive(Debug, thiserror::Error)] 13 13 pub enum SliceError { 14 14 #[error("walk error: {0}")] ··· 39 39 pub following_key: Option<RepoPath>, 40 40 } 41 41 42 - enum SliceState { 43 - Before, 44 - In, 45 - Done, 46 - } 47 - 48 42 /// Iterator-like walker over a proven range of the MST. 49 43 /// 50 - /// Created by [`MemCar::walk_slice`]. Call [`SliceWalker::next`] to yield 51 - /// records, then [`SliceWalker::finish`] to validate the proof. 44 + /// Created by [`MemCar::walk_slice`] and related methods. Call 45 + /// [`SliceWalker::next`] to yield records; proof validation runs 46 + /// automatically before `next` returns `Ok(None)`. 52 47 pub struct SliceWalker<'a> { 53 48 mem_car: &'a mut MemCar, 54 - lower: Bound<String>, 55 49 upper: Bound<String>, 56 50 preceding_key: Option<RepoPath>, 57 51 following_key: Option<RepoPath>, 58 - state: SliceState, 52 + /// First in-range item found during construction, buffered for the first `next()` call. 53 + buffered: Option<Output>, 54 + done: bool, 59 55 } 60 56 61 - impl SliceWalker<'_> { 62 - /// Yield the next in-range record. 57 + impl<'a> SliceWalker<'a> { 58 + /// Walk to the lower bound, establishing `preceding_key` from boundary items. 63 59 /// 64 - /// Transparently skips boundary items outside the range. Returns 65 - /// `Ok(None)` when the range is exhausted. Errors on any missing block 66 - /// within the range, or on an MST node absent after the first in-range 67 - /// record (which would leave the range unproven). 60 + /// Consumes all pre-range items here so that `next` only ever sees 61 + /// in-range or post-range items. 62 + /// 63 + /// We walk rather than seek so that boundary nodes are fully visited and 64 + /// `preceding_key` is set correctly for CAR-slice proofs. (Walker::seek's 65 + /// SkipSubtree optimisation would skip the boundary node whose MissingRecord 66 + /// entry carries the preceding key.) 67 + fn new( 68 + mem_car: &'a mut MemCar, 69 + lower: Bound<String>, 70 + upper: Bound<String>, 71 + ) -> Result<Self, SliceError> { 72 + let mut preceding_key = None; 73 + let mut following_key = None; 74 + let mut buffered = None; 75 + let mut done = false; 76 + 77 + loop { 78 + match mem_car.next()? { 79 + None => { 80 + done = true; 81 + break; 82 + } 83 + Some(WalkItem::MissingSubtree { .. }) => { 84 + // Boundary subtree entirely before the range — safe to skip. 85 + } 86 + Some(WalkItem::MissingRecord { key, cid }) => { 87 + if is_before(&key, &lower) { 88 + preceding_key = Some(key); 89 + } else if is_after(&key, &upper) { 90 + following_key = Some(key); 91 + done = true; 92 + break; 93 + } else { 94 + return Err(SliceError::IncompleteRange { key, cid }); 95 + } 96 + } 97 + Some(WalkItem::Record(out)) => { 98 + if is_before(&out.key, &lower) { 99 + preceding_key = Some(out.key); 100 + } else if is_after(&out.key, &upper) { 101 + following_key = Some(out.key); 102 + done = true; 103 + break; 104 + } else { 105 + buffered = Some(out); 106 + break; 107 + } 108 + } 109 + } 110 + } 111 + 112 + validate_lower(preceding_key.as_deref(), &lower)?; 113 + if done { 114 + validate_upper(following_key.as_deref(), &upper)?; 115 + } 116 + 117 + Ok(Self { 118 + mem_car, 119 + upper, 120 + preceding_key, 121 + following_key, 122 + buffered, 123 + done, 124 + }) 125 + } 126 + 68 127 /// Yield the next in-range record. 69 128 /// 70 - /// Transparently skips boundary items outside the range. Returns 71 - /// `Ok(None)` when the range is exhausted — proof validation runs 129 + /// Returns `Ok(None)` when the range is exhausted — proof validation runs 72 130 /// automatically before returning `None`, so the `while let` pattern 73 - /// is sufficient: 131 + /// is sufficient and safe: 74 132 /// 75 133 /// ```ignore 76 134 /// while let Some(output) = walker.next()? { ... } 77 - /// // proof has been validated; any violation surfaces as Err before None 135 + /// // any proof violation surfaced as Err before None was returned 78 136 /// ``` 79 137 /// 80 138 /// Errors on any missing block within the range, on an MST node absent 81 - /// after the first in-range record, or on a proof violation. 139 + /// within the range, or on a proof violation. 82 140 pub fn next(&mut self) -> Result<Option<Output>, SliceError> { 83 - if matches!(self.state, SliceState::Done) { 141 + if self.done { 84 142 return Ok(None); 85 143 } 86 - loop { 87 - match self.mem_car.next()? { 88 - None => { 89 - self.state = SliceState::Done; 90 - validate_lower(self.preceding_key.as_deref(), &self.lower)?; 144 + 145 + if let Some(out) = self.buffered.take() { 146 + return Ok(Some(out)); 147 + } 148 + 149 + match self.mem_car.next()? { 150 + None => { 151 + self.done = true; 152 + validate_upper(self.following_key.as_deref(), &self.upper)?; 153 + Ok(None) 154 + } 155 + Some(WalkItem::MissingSubtree { cid }) => { 156 + // Any missing subtree after the range starts is an error: 157 + // we can't prove the range is complete without it. 158 + Err(SliceError::MissingNode { cid }) 159 + } 160 + Some(WalkItem::MissingRecord { key, cid }) => { 161 + if is_after(&key, &self.upper) { 162 + self.following_key = Some(key); 163 + self.done = true; 91 164 validate_upper(self.following_key.as_deref(), &self.upper)?; 92 - return Ok(None); 93 - } 94 - Some(WalkItem::MissingSubtree { cid }) => { 95 - if matches!(self.state, SliceState::In) { 96 - return Err(SliceError::MissingNode { cid }); 97 - } 98 - // Before: boundary subtree outside the range, skip 99 - } 100 - Some(WalkItem::MissingRecord { key, cid }) => { 101 - if is_before(&key, &self.lower) { 102 - self.preceding_key = Some(key); 103 - } else if is_after(&key, &self.upper) { 104 - self.following_key = Some(key); 105 - self.state = SliceState::Done; 106 - validate_lower(self.preceding_key.as_deref(), &self.lower)?; 107 - validate_upper(self.following_key.as_deref(), &self.upper)?; 108 - return Ok(None); 109 - } else { 110 - return Err(SliceError::IncompleteRange { key, cid }); 111 - } 165 + Ok(None) 166 + } else { 167 + Err(SliceError::IncompleteRange { key, cid }) 112 168 } 113 - Some(WalkItem::Record(out)) => { 114 - if is_before(&out.key, &self.lower) { 115 - self.preceding_key = Some(out.key); 116 - } else if is_after(&out.key, &self.upper) { 117 - self.following_key = Some(out.key); 118 - self.state = SliceState::Done; 119 - validate_lower(self.preceding_key.as_deref(), &self.lower)?; 120 - validate_upper(self.following_key.as_deref(), &self.upper)?; 121 - return Ok(None); 122 - } else { 123 - self.state = SliceState::In; 124 - return Ok(Some(out)); 125 - } 169 + } 170 + Some(WalkItem::Record(out)) => { 171 + if is_after(&out.key, &self.upper) { 172 + self.following_key = Some(out.key); 173 + self.done = true; 174 + validate_upper(self.following_key.as_deref(), &self.upper)?; 175 + Ok(None) 176 + } else { 177 + Ok(Some(out)) 126 178 } 127 179 } 128 180 } ··· 146 198 /// Walk a proven range of the MST. 147 199 /// 148 200 /// Returns a [`SliceWalker`] that yields records within `range` in key 149 - /// order. After the loop, call [`SliceWalker::finish`] to validate that 150 - /// the adjacent keys bound the range correctly. 201 + /// order. Proof validation runs automatically when `next` returns `None`. 151 202 /// 152 203 /// Accepts standard Rust range expressions: 153 204 /// - `"a".."b"` — exclusive upper bound 154 205 /// - `"a"..="b"` — inclusive upper bound 155 206 /// - `"a"..` — from `a` to end of tree 156 207 /// - `.."b"` — from start of tree to just before `b` 157 - /// - `..` — entire tree 158 - pub fn walk_slice<'r>(&mut self, range: impl RangeBounds<&'r str>) -> SliceWalker<'_> { 208 + /// - `..` — entire tree (equivalent to [`full`](MemCar::full)) 209 + pub fn walk_slice<'r>( 210 + &mut self, 211 + range: impl RangeBounds<&'r str>, 212 + ) -> Result<SliceWalker<'_>, SliceError> { 159 213 let lower = bound_to_owned(range.start_bound()); 160 214 let upper = bound_to_owned(range.end_bound()); 161 - SliceWalker { 162 - mem_car: self, 163 - lower, 164 - upper, 165 - preceding_key: None, 166 - following_key: None, 167 - state: SliceState::Before, 168 - } 215 + SliceWalker::new(self, lower, upper) 216 + } 217 + 218 + /// Walk the entire MST, proving that no records are missing. 219 + pub fn full(&mut self) -> Result<SliceWalker<'_>, SliceError> { 220 + SliceWalker::new(self, Bound::Unbounded, Bound::Unbounded) 221 + } 222 + 223 + /// Walk all records whose key starts with `pre`, proving the range is complete. 224 + /// 225 + /// The exclusive upper bound is computed by incrementing the last character 226 + /// of `pre`, so all keys with that prefix — and only those keys — are in range. 227 + pub fn prefix(&mut self, pre: &str) -> Result<SliceWalker<'_>, SliceError> { 228 + let lower = Bound::Included(pre.to_owned()); 229 + let upper = prefix_upper(pre); 230 + SliceWalker::new(self, lower, upper) 231 + } 232 + 233 + /// Fetch a single record by exact key, proving its presence or absence. 234 + /// 235 + /// - `Ok(Some(output))` — record is present 236 + /// - `Ok(None)` — record is provably absent (adjacent MST keys bound it) 237 + /// - `Err(SliceError::IncompleteRange)` — the MST has an entry for this 238 + /// key but the block is absent; absence cannot be proven 239 + /// - Other `Err` variants for MST structural issues 240 + pub fn get(&mut self, key: &str) -> Result<Option<Output>, SliceError> { 241 + let mut walker = SliceWalker::new( 242 + self, 243 + Bound::Included(key.to_owned()), 244 + Bound::Included(key.to_owned()), 245 + )?; 246 + let record = walker.next()?; 247 + walker.finish()?; 248 + Ok(record) 169 249 } 170 250 } 171 251 ··· 179 259 Bound::Included(s) => Bound::Included((*s).to_owned()), 180 260 Bound::Excluded(s) => Bound::Excluded((*s).to_owned()), 181 261 } 262 + } 263 + 264 + /// Compute the exclusive upper bound for a prefix: the smallest string that 265 + /// does not start with `pre`. Found by incrementing the last character. 266 + fn prefix_upper(pre: &str) -> Bound<String> { 267 + let mut s = pre.to_owned(); 268 + while let Some(last) = s.chars().next_back() { 269 + s.pop(); 270 + if let Some(next) = char::from_u32(last as u32 + 1) { 271 + s.push(next); 272 + return Bound::Excluded(s); 273 + } 274 + // last char was U+10FFFF; try the previous one 275 + } 276 + Bound::Unbounded // pre was empty or all U+10FFFF 182 277 } 183 278 184 279 fn is_before(key: &str, lower: &Bound<String>) -> bool {