simplify slice walker by scanning on construction · microcosm.blue/repo-stream@87079ea

+168 -73

1 changed file

expand all

src

slice.rs

+168 -73

src/slice.rs

··· 8 8 use cid::Cid; 9 9 use std::ops::{Bound, RangeBounds}; 10 10 11 - /// Errors from [`MemCar::walk_slice`] 11 + /// Errors from [`MemCar::walk_slice`] and friends 12 12 #[derive(Debug, thiserror::Error)] 13 13 pub enum SliceError { 14 14 #[error("walk error: {0}")] ··· 39 39 pub following_key: Option<RepoPath>, 40 40 } 41 41 42 - enum SliceState { 43 - Before, 44 - In, 45 - Done, 46 - } 47 - 48 42 /// Iterator-like walker over a proven range of the MST. 49 43 /// 50 - /// Created by [`MemCar::walk_slice`]. Call [`SliceWalker::next`] to yield 51 - /// records, then [`SliceWalker::finish`] to validate the proof. 44 + /// Created by [`MemCar::walk_slice`] and related methods. Call 45 + /// [`SliceWalker::next`] to yield records; proof validation runs 46 + /// automatically before `next` returns `Ok(None)`. 52 47 pub struct SliceWalker<'a> { 53 48 mem_car: &'a mut MemCar, 54 - lower: Bound<String>, 55 49 upper: Bound<String>, 56 50 preceding_key: Option<RepoPath>, 57 51 following_key: Option<RepoPath>, 58 - state: SliceState, 52 + /// First in-range item found during construction, buffered for the first `next()` call. 53 + buffered: Option<Output>, 54 + done: bool, 59 55 } 60 56 61 - impl SliceWalker<'_> { 62 - /// Yield the next in-range record. 57 + impl<'a> SliceWalker<'a> { 58 + /// Walk to the lower bound, establishing `preceding_key` from boundary items. 63 59 /// 64 - /// Transparently skips boundary items outside the range. Returns 65 - /// `Ok(None)` when the range is exhausted. Errors on any missing block 66 - /// within the range, or on an MST node absent after the first in-range 67 - /// record (which would leave the range unproven). 60 + /// Consumes all pre-range items here so that `next` only ever sees 61 + /// in-range or post-range items. 62 + /// 63 + /// We walk rather than seek so that boundary nodes are fully visited and 64 + /// `preceding_key` is set correctly for CAR-slice proofs. (Walker::seek's 65 + /// SkipSubtree optimisation would skip the boundary node whose MissingRecord 66 + /// entry carries the preceding key.) 67 + fn new( 68 + mem_car: &'a mut MemCar, 69 + lower: Bound<String>, 70 + upper: Bound<String>, 71 + ) -> Result<Self, SliceError> { 72 + let mut preceding_key = None; 73 + let mut following_key = None; 74 + let mut buffered = None; 75 + let mut done = false; 76 + 77 + loop { 78 + match mem_car.next()? { 79 + None => { 80 + done = true; 81 + break; 82 + } 83 + Some(WalkItem::MissingSubtree { .. }) => { 84 + // Boundary subtree entirely before the range — safe to skip. 85 + } 86 + Some(WalkItem::MissingRecord { key, cid }) => { 87 + if is_before(&key, &lower) { 88 + preceding_key = Some(key); 89 + } else if is_after(&key, &upper) { 90 + following_key = Some(key); 91 + done = true; 92 + break; 93 + } else { 94 + return Err(SliceError::IncompleteRange { key, cid }); 95 + } 96 + } 97 + Some(WalkItem::Record(out)) => { 98 + if is_before(&out.key, &lower) { 99 + preceding_key = Some(out.key); 100 + } else if is_after(&out.key, &upper) { 101 + following_key = Some(out.key); 102 + done = true; 103 + break; 104 + } else { 105 + buffered = Some(out); 106 + break; 107 + } 108 + } 109 + } 110 + } 111 + 112 + validate_lower(preceding_key.as_deref(), &lower)?; 113 + if done { 114 + validate_upper(following_key.as_deref(), &upper)?; 115 + } 116 + 117 + Ok(Self { 118 + mem_car, 119 + upper, 120 + preceding_key, 121 + following_key, 122 + buffered, 123 + done, 124 + }) 125 + } 126 + 68 127 /// Yield the next in-range record. 69 128 /// 70 - /// Transparently skips boundary items outside the range. Returns 71 - /// `Ok(None)` when the range is exhausted — proof validation runs 129 + /// Returns `Ok(None)` when the range is exhausted — proof validation runs 72 130 /// automatically before returning `None`, so the `while let` pattern 73 - /// is sufficient: 131 + /// is sufficient and safe: 74 132 /// 75 133 /// ```ignore 76 134 /// while let Some(output) = walker.next()? { ... } 77 - /// // proof has been validated; any violation surfaces as Err before None 135 + /// // any proof violation surfaced as Err before None was returned 78 136 /// ``` 79 137 /// 80 138 /// Errors on any missing block within the range, on an MST node absent 81 - /// after the first in-range record, or on a proof violation. 139 + /// within the range, or on a proof violation. 82 140 pub fn next(&mut self) -> Result<Option<Output>, SliceError> { 83 - if matches!(self.state, SliceState::Done) { 141 + if self.done { 84 142 return Ok(None); 85 143 } 86 - loop { 87 - match self.mem_car.next()? { 88 - None => { 89 - self.state = SliceState::Done; 90 - validate_lower(self.preceding_key.as_deref(), &self.lower)?; 144 + 145 + if let Some(out) = self.buffered.take() { 146 + return Ok(Some(out)); 147 + } 148 + 149 + match self.mem_car.next()? { 150 + None => { 151 + self.done = true; 152 + validate_upper(self.following_key.as_deref(), &self.upper)?; 153 + Ok(None) 154 + } 155 + Some(WalkItem::MissingSubtree { cid }) => { 156 + // Any missing subtree after the range starts is an error: 157 + // we can't prove the range is complete without it. 158 + Err(SliceError::MissingNode { cid }) 159 + } 160 + Some(WalkItem::MissingRecord { key, cid }) => { 161 + if is_after(&key, &self.upper) { 162 + self.following_key = Some(key); 163 + self.done = true; 91 164 validate_upper(self.following_key.as_deref(), &self.upper)?; 92 - return Ok(None); 93 - } 94 - Some(WalkItem::MissingSubtree { cid }) => { 95 - if matches!(self.state, SliceState::In) { 96 - return Err(SliceError::MissingNode { cid }); 97 - } 98 - // Before: boundary subtree outside the range, skip 99 - } 100 - Some(WalkItem::MissingRecord { key, cid }) => { 101 - if is_before(&key, &self.lower) { 102 - self.preceding_key = Some(key); 103 - } else if is_after(&key, &self.upper) { 104 - self.following_key = Some(key); 105 - self.state = SliceState::Done; 106 - validate_lower(self.preceding_key.as_deref(), &self.lower)?; 107 - validate_upper(self.following_key.as_deref(), &self.upper)?; 108 - return Ok(None); 109 - } else { 110 - return Err(SliceError::IncompleteRange { key, cid }); 111 - } 165 + Ok(None) 166 + } else { 167 + Err(SliceError::IncompleteRange { key, cid }) 112 168 } 113 - Some(WalkItem::Record(out)) => { 114 - if is_before(&out.key, &self.lower) { 115 - self.preceding_key = Some(out.key); 116 - } else if is_after(&out.key, &self.upper) { 117 - self.following_key = Some(out.key); 118 - self.state = SliceState::Done; 119 - validate_lower(self.preceding_key.as_deref(), &self.lower)?; 120 - validate_upper(self.following_key.as_deref(), &self.upper)?; 121 - return Ok(None); 122 - } else { 123 - self.state = SliceState::In; 124 - return Ok(Some(out)); 125 - } 169 + } 170 + Some(WalkItem::Record(out)) => { 171 + if is_after(&out.key, &self.upper) { 172 + self.following_key = Some(out.key); 173 + self.done = true; 174 + validate_upper(self.following_key.as_deref(), &self.upper)?; 175 + Ok(None) 176 + } else { 177 + Ok(Some(out)) 126 178 } 127 179 } 128 180 } ··· 146 198 /// Walk a proven range of the MST. 147 199 /// 148 200 /// Returns a [`SliceWalker`] that yields records within `range` in key 149 - /// order. After the loop, call [`SliceWalker::finish`] to validate that 150 - /// the adjacent keys bound the range correctly. 201 + /// order. Proof validation runs automatically when `next` returns `None`. 151 202 /// 152 203 /// Accepts standard Rust range expressions: 153 204 /// - `"a".."b"` — exclusive upper bound 154 205 /// - `"a"..="b"` — inclusive upper bound 155 206 /// - `"a"..` — from `a` to end of tree 156 207 /// - `.."b"` — from start of tree to just before `b` 157 - /// - `..` — entire tree 158 - pub fn walk_slice<'r>(&mut self, range: impl RangeBounds<&'r str>) -> SliceWalker<'_> { 208 + /// - `..` — entire tree (equivalent to [`full`](MemCar::full)) 209 + pub fn walk_slice<'r>( 210 + &mut self, 211 + range: impl RangeBounds<&'r str>, 212 + ) -> Result<SliceWalker<'_>, SliceError> { 159 213 let lower = bound_to_owned(range.start_bound()); 160 214 let upper = bound_to_owned(range.end_bound()); 161 - SliceWalker { 162 - mem_car: self, 163 - lower, 164 - upper, 165 - preceding_key: None, 166 - following_key: None, 167 - state: SliceState::Before, 168 - } 215 + SliceWalker::new(self, lower, upper) 216 + } 217 + 218 + /// Walk the entire MST, proving that no records are missing. 219 + pub fn full(&mut self) -> Result<SliceWalker<'_>, SliceError> { 220 + SliceWalker::new(self, Bound::Unbounded, Bound::Unbounded) 221 + } 222 + 223 + /// Walk all records whose key starts with `pre`, proving the range is complete. 224 + /// 225 + /// The exclusive upper bound is computed by incrementing the last character 226 + /// of `pre`, so all keys with that prefix — and only those keys — are in range. 227 + pub fn prefix(&mut self, pre: &str) -> Result<SliceWalker<'_>, SliceError> { 228 + let lower = Bound::Included(pre.to_owned()); 229 + let upper = prefix_upper(pre); 230 + SliceWalker::new(self, lower, upper) 231 + } 232 + 233 + /// Fetch a single record by exact key, proving its presence or absence. 234 + /// 235 + /// - `Ok(Some(output))` — record is present 236 + /// - `Ok(None)` — record is provably absent (adjacent MST keys bound it) 237 + /// - `Err(SliceError::IncompleteRange)` — the MST has an entry for this 238 + /// key but the block is absent; absence cannot be proven 239 + /// - Other `Err` variants for MST structural issues 240 + pub fn get(&mut self, key: &str) -> Result<Option<Output>, SliceError> { 241 + let mut walker = SliceWalker::new( 242 + self, 243 + Bound::Included(key.to_owned()), 244 + Bound::Included(key.to_owned()), 245 + )?; 246 + let record = walker.next()?; 247 + walker.finish()?; 248 + Ok(record) 169 249 } 170 250 } 171 251 ··· 179 259 Bound::Included(s) => Bound::Included((*s).to_owned()), 180 260 Bound::Excluded(s) => Bound::Excluded((*s).to_owned()), 181 261 } 262 + } 263 + 264 + /// Compute the exclusive upper bound for a prefix: the smallest string that 265 + /// does not start with `pre`. Found by incrementing the last character. 266 + fn prefix_upper(pre: &str) -> Bound<String> { 267 + let mut s = pre.to_owned(); 268 + while let Some(last) = s.chars().next_back() { 269 + s.pop(); 270 + if let Some(next) = char::from_u32(last as u32 + 1) { 271 + s.push(next); 272 + return Bound::Excluded(s); 273 + } 274 + // last char was U+10FFFF; try the previous one 275 + } 276 + Bound::Unbounded // pre was empty or all U+10FFFF 182 277 } 183 278 184 279 fn is_before(key: &str, lower: &Bound<String>) -> bool {

Configure Feed

Configure Feed