···11[package]
22name = "repo-stream"
33-version = "0.5.0-alpha.2"
33+version = "0.5.0-alpha.3"
44edition = "2024"
55license = "MIT OR Apache-2.0"
66description = "Fast and robust atproto CAR file processing"
+42-4
src/mem.rs
···285285 /// Record CIDs come directly from MST node entries — record blocks are never
286286 /// looked up. MST node blocks are still fetched to traverse the tree.
287287 ///
288288- /// Returns `Ok(None)` when the walk is complete. Returns
289289- /// `Err(WalkError::MissingNode)` if a child MST node block is absent.
288288+ /// **Not strict**: if a child MST node block is absent, the subtree is silently
289289+ /// skipped. Use [`next_keys_strict`] to error instead.
290290+ ///
291291+ /// Returns `Ok(None)` when the walk is complete.
290292 pub fn next_keys(&mut self) -> Result<Option<(RepoPath, Cid)>, WalkError> {
291293 self.walker.step_keys(&self.blocks)
292294 }
···295297 ///
296298 /// Like [`next_keys`] but collects up to `n` pairs in one call.
297299 ///
298298- /// Returns `Ok(None)` when the walk is complete. Returns
299299- /// `Err(WalkError::MissingNode)` if a child MST node block is absent.
300300+ /// **Not strict**: if a child MST node block is absent, the subtree is silently
301301+ /// skipped. Use [`next_chunk_keys_strict`] to error instead.
302302+ ///
303303+ /// Returns `Ok(None)` when the walk is complete.
300304 pub fn next_chunk_keys(&mut self, n: usize) -> Result<Option<Vec<(RepoPath, Cid)>>, WalkError> {
301305 let mut out = Vec::with_capacity(n);
302306 for _ in 0..n {
303307 match self.walker.step_keys(&self.blocks)? {
308308+ Some(pair) => out.push(pair),
309309+ None => break,
310310+ }
311311+ }
312312+ if out.is_empty() {
313313+ Ok(None)
314314+ } else {
315315+ Ok(Some(out))
316316+ }
317317+ }
318318+319319+ /// Get the next key and CID from the walk, without fetching record blocks.
320320+ ///
321321+ /// Like [`next_keys`] but returns `Err(WalkError::MissingNode)` if a child
322322+ /// MST node block is absent rather than silently skipping the subtree.
323323+ ///
324324+ /// Returns `Ok(None)` when the walk is complete.
325325+ pub fn next_keys_strict(&mut self) -> Result<Option<(RepoPath, Cid)>, WalkError> {
326326+ self.walker.step_keys_strict(&self.blocks)
327327+ }
328328+329329+ /// Collect up to `n` key+CID pairs, without fetching record blocks.
330330+ ///
331331+ /// Like [`next_chunk_keys`] but returns `Err(WalkError::MissingNode)` if a
332332+ /// child MST node block is absent rather than silently skipping the subtree.
333333+ ///
334334+ /// Returns `Ok(None)` when the walk is complete.
335335+ pub fn next_chunk_keys_strict(
336336+ &mut self,
337337+ n: usize,
338338+ ) -> Result<Option<Vec<(RepoPath, Cid)>>, WalkError> {
339339+ let mut out = Vec::with_capacity(n);
340340+ for _ in 0..n {
341341+ match self.walker.step_keys_strict(&self.blocks)? {
304342 Some(pair) => out.push(pair),
305343 None => break,
306344 }
+27-4
src/walk.rs
···3737 }
3838 }
39394040+ #[allow(clippy::len_without_is_empty)]
4041 pub fn len(&self) -> usize {
4142 match self {
4243 MaybeProcessedBlock::Raw(b) | MaybeProcessedBlock::Processed(b) => b.len(),
···307308 /// Returns the key and CID of each record directly from the MST node entries.
308309 /// MST node blocks are still fetched to traverse the tree structure.
309310 ///
310310- /// Returns `Err(WalkError::MissingNode)` if a child MST node block is absent.
311311+ /// If a child MST node block is absent, the subtree is silently skipped.
312312+ /// Use [`step_keys_strict`] to error instead.
311313 pub fn step_keys(
312314 &mut self,
313315 blocks: &HashMap<ObjectLink, MaybeProcessedBlock>,
314316 ) -> Result<Option<(RepoPath, Cid)>, WalkError> {
317317+ self.step_keys_impl(blocks, false)
318318+ }
319319+320320+ /// Like [`step_keys`], but returns `Err(WalkError::MissingNode)` if a child
321321+ /// MST node block is absent rather than silently skipping the subtree.
322322+ pub fn step_keys_strict(
323323+ &mut self,
324324+ blocks: &HashMap<ObjectLink, MaybeProcessedBlock>,
325325+ ) -> Result<Option<(RepoPath, Cid)>, WalkError> {
326326+ self.step_keys_impl(blocks, true)
327327+ }
328328+329329+ fn step_keys_impl(
330330+ &mut self,
331331+ blocks: &HashMap<ObjectLink, MaybeProcessedBlock>,
332332+ strict: bool,
333333+ ) -> Result<Option<(RepoPath, Cid)>, WalkError> {
315334 while let Some(NodeThing { link, kind }) = self.next_todo() {
316335 match kind {
317336 ThingKind::Record(key) => {
···326345 }
327346 ThingKind::ChildNode => {
328347 let Some(mpb) = blocks.get(&link) else {
329329- return Err(WalkError::MissingNode {
330330- cid: Box::new(link.into()),
331331- });
348348+ if strict {
349349+ return Err(WalkError::MissingNode {
350350+ cid: Box::new(link.into()),
351351+ });
352352+ } else {
353353+ continue;
354354+ }
332355 };
333356 let MaybeProcessedBlock::Raw(data) = mpb else {
334357 return Err(WalkError::BadCommitFingerprint);
+2-2
tests/non-huge-cars.rs
···9696 assert_eq!(count_keys(MIDSIZE_CAR).await, 11585);
9797}
98989999-/// Verify that next_chunk_keys returns the same (key, cid) pairs as next_chunk_strict.
9999+/// Verify that next_chunk_keys_strict returns the same (key, cid) pairs as next_chunk_strict.
100100#[tokio::test]
101101async fn test_next_chunk_keys_agrees_with_strict() {
102102 let mut mc_strict = DriverBuilder::new()
···118118 }
119119120120 let mut from_keys = Vec::new();
121121- while let Some(pairs) = mc_keys.next_chunk_keys(256).unwrap() {
121121+ while let Some(pairs) = mc_keys.next_chunk_keys_strict(256).unwrap() {
122122 from_keys.extend(pairs);
123123 }
124124