···3232 log::info!("got commit: {:?}", mem_car.commit);
33333434 while let Step::Value(records) = mem_car.next_chunk(256)? {
3535- for Output { rkey: _, cid: _, data: _ } in records {
3535+ for Output { key: _, cid: _, data: _ } in records {
3636 // process records
3737 }
3838 }
+6-6
examples/read-slice/main.rs
···2626 "\nthis slice is from {}, repo rev {}",
2727 mem_car.commit.did, mem_car.commit.rev
2828 );
2929- if let Some(rkey) = &mem_car.prev_rkey {
3030- println!(" -> key immediately before CAR slice: {rkey}");
2929+ if let Some(key) = &mem_car.prev_key {
3030+ println!(" -> key immediately before CAR slice: {key}");
3131 } else {
3232 println!(
3333 " -> no key preceeding the CAR slice, so it includes the leading edge of the tree."
···3838 let end = loop {
3939 match mem_car.next_chunk(256)? {
4040 Step::Value(chunk) => {
4141- for Output { cid, rkey, .. } in chunk {
4141+ for Output { cid, key, .. } in chunk {
4242 print!(" SHA256 ");
4343 for byte in cid.to_bytes().iter().skip(4).take(5) {
4444 print!("{byte:02x}");
4545 }
4646- println!("...\t{rkey}");
4646+ println!("...\t{key}");
4747 }
4848 }
4949 Step::End(e) => break e,
···5151 };
52525353 println!("done walking records present in the slice.");
5454- if let Some(rkey) = end {
5555- println!(" -> key immediately after CAR slice: {rkey}");
5454+ if let Some(key) = end {
5555+ println!(" -> key immediately after CAR slice: {key}");
5656 } else {
5757 println!(
5858 " -> no key proceeding the CAR slice, so it includes the trailing edge of the tree."
+3-3
readme.md
···3232 // if all blocks fit within memory
3333 Ok(mut mem_car) => {
3434 while let Step::Value(chunk) = mem_car.next_chunk(256)? {
3535- for Output { rkey: _, cid: _, data } in chunk {
3535+ for Output { key: _, cid: _, data } in chunk {
3636 let size = usize::from_ne_bytes(<[u8; 8]>::try_from(data).unwrap());
3737 total_size += size;
3838 }
···4444 // set up a disk store we can spill to
4545 let store = DiskBuilder::new().open("some/path.db".into()).await?;
4646 // do the spilling, get back a disk driver
4747- let (_commit, _prev_rkey, mut driver) = partial.finish_loading(store).await?;
4747+ let (_commit, _prev_key, mut driver) = partial.finish_loading(store).await?;
48484949 while let Step::Value(chunk) = driver.next_chunk(256).await? {
5050- for Output { rkey: _, cid: _, data } in chunk {
5050+ for Output { key: _, cid: _, data } in chunk {
5151 let size = usize::from_ne_bytes(<[u8; 8]>::try_from(data).unwrap());
5252 total_size += size;
5353 }
+92-60
src/drive.rs
···11//! Consume a CAR from an AsyncRead, producing an ordered stream of records
2233-use crate::link::{NodeThing, ObjectLink, ThingKind};
33+use crate::link::ObjectLink;
44use crate::{
55- Bytes, HashMap, Rkey, Step,
55+ Bytes, HashMap, RepoPath, Step,
66 block::{MaybeProcessedBlock, noop},
77 disk::{DiskError, DiskStore},
88 mst::MstNode,
99- walk::{MstError, Output},
99+ walk::{MstError, Output, WalkItem},
1010};
1111use cid::Cid;
1212use iroh_car::CarReader;
···1717use crate::walk::{WalkError, Walker};
1818use thiserror::Error;
19192020-/// An in-order chunk of Rkey + CID + (processed) Block
2020+/// An in-order chunk of RepoPath + CID + (processed) Block
2121pub type BlockChunk = Vec<Output>;
22222323/// Errors that can occur while loading a CAR into memory
···167167 };
168168 let mut walker = Walker::new(root_node);
169169170170- let prev_rkey = walker.step_to_edge(&mem_blocks)?;
170170+ let prev_key = walker.step_to_edge(&mem_blocks)?;
171171172172 Ok(MemCar {
173173 commit,
174174- prev_rkey,
174174+ prev_key,
175175 blocks: mem_blocks,
176176 walker,
177177 process,
178178- next_missing: None,
178178+ trailing_key: None,
179179 })
180180}
181181···183183#[derive(Debug)]
184184pub struct MemCar {
185185 pub commit: Commit,
186186- /// For CAR slices: the rkey of the last record before this slice's leading edge.
186186+ /// For CAR slices: the key of the last record before this slice's leading edge.
187187 /// `None` if this slice (or full CAR) starts from the leftmost record in the tree.
188188- pub prev_rkey: Option<Rkey>,
188188+ pub prev_key: Option<RepoPath>,
189189 pub blocks: HashMap<ObjectLink, MaybeProcessedBlock>,
190190 walker: Walker,
191191 process: fn(Bytes) -> Bytes,
192192- next_missing: Option<NodeThing>,
192192+ /// `None` = no gap encountered yet; `Some(k)` = trailing edge determined.
193193+ trailing_key: Option<Option<RepoPath>>,
193194}
194195195196impl MemCar {
···197198 /// Seek forward to the first record at or after `target`.
198199 ///
199200 /// Uses the MST structure to skip entire subtrees efficiently.
200200- /// After this returns, the next `next_chunk` call will start at or after `target`.
201201+ /// After this returns, the next `next` or `next_chunk` call will start at or after `target`.
201202 pub fn seek(&mut self, target: &str) -> Result<(), WalkError> {
202203 self.walker.seek(target, &self.blocks)
203204 }
204205205205- /// Get the next record
206206- pub fn next(&mut self) -> Result<Option<Output>, WalkError> {
207207- todo!()
206206+ /// Walk forward past any gaps to determine the trailing edge key.
207207+ ///
208208+ /// The first record key encountered after a gap (whether the record's block
209209+ /// is present or missing) is the trailing edge — the first key not covered
210210+ /// by this slice. Sets `self.trailing_key` and returns it.
211211+ fn find_trailing_edge(&mut self) -> Result<Option<RepoPath>, WalkError> {
212212+ let trailing = loop {
213213+ match self.walker.step(&self.blocks, self.process)? {
214214+ Some(WalkItem::Record(r)) => break Some(r.key),
215215+ Some(WalkItem::MissingRecord { key, .. }) => break Some(key),
216216+ Some(WalkItem::MissingSubtree { .. }) => continue,
217217+ None => break None,
218218+ }
219219+ };
220220+ self.trailing_key = Some(trailing.clone());
221221+ Ok(trailing)
208222 }
209223210210- /// Iterate up to `n` records in rkey order.
224224+ /// Get the next record.
211225 ///
212212- /// Returns `Step::Value(records)` while records remain, then `Step::End(next_rkey)`
213213- /// where `next_rkey` is the first rkey after the slice (for CAR slices), or `None`.
214214- pub fn next_chunk(&mut self, n: usize) -> Result<Step<BlockChunk>, WalkError> {
215215- if let Some(ref mut missing) = self.next_missing {
216216- while let Step::Value(sparse_out) =
217217- self.walker.step_sparse(&self.blocks, self.process)?
218218- {
219219- if missing.kind == ThingKind::ChildNode {
220220- *missing = NodeThing {
221221- link: sparse_out.cid.into(),
222222- kind: ThingKind::Record(sparse_out.rkey),
223223- };
224224- }
226226+ /// Returns `Step::Value(output)` for each record in key order, then
227227+ /// `Step::End(None)` at the end of a full CAR, or `Step::End(Some(key))`
228228+ /// for CAR slices where `key` is the first key immediately after the slice.
229229+ pub fn next(&mut self) -> Result<Step, WalkError> {
230230+ if let Some(trailing) = &self.trailing_key {
231231+ return Ok(Step::End(trailing.clone()));
232232+ }
233233+ match self.walker.step(&self.blocks, self.process)? {
234234+ Some(WalkItem::Record(out)) => Ok(Step::Value(out)),
235235+ Some(WalkItem::MissingRecord { key, .. }) => {
236236+ self.trailing_key = Some(Some(key.clone()));
237237+ Ok(Step::End(Some(key)))
225238 }
226226- return Ok(match &missing.kind {
227227- ThingKind::ChildNode => Step::End(None),
228228- ThingKind::Record(rkey) => Step::End(Some(rkey.clone())),
229229- });
239239+ Some(WalkItem::MissingSubtree { .. }) => {
240240+ let trailing = self.find_trailing_edge()?;
241241+ Ok(Step::End(trailing))
242242+ }
243243+ None => {
244244+ self.trailing_key = Some(None);
245245+ Ok(Step::End(None))
246246+ }
247247+ }
248248+ }
249249+250250+ /// Iterate up to `n` records in key order.
251251+ ///
252252+ /// Returns `Step::Value(records)` while records remain, then `Step::End(next_key)`
253253+ /// where `next_key` is the first key after the slice (for CAR slices), or `None`.
254254+ pub fn next_chunk(&mut self, n: usize) -> Result<Step<BlockChunk>, WalkError> {
255255+ if let Some(trailing) = &self.trailing_key {
256256+ return Ok(Step::End(trailing.clone()));
230257 }
231258 let mut out = Vec::with_capacity(n);
232259 for _ in 0..n {
233233- match self.walker.step(&self.blocks, self.process) {
234234- Ok(Step::Value(record)) => out.push(record),
235235- Ok(Step::End(None)) => break,
236236- Ok(Step::End(_)) => unreachable!(),
237237- Err(WalkError::MissingBlock(missing)) => {
238238- self.next_missing = Some(*missing);
260260+ match self.walker.step(&self.blocks, self.process)? {
261261+ Some(WalkItem::Record(record)) => out.push(record),
262262+ Some(WalkItem::MissingRecord { key, .. }) => {
263263+ self.trailing_key = Some(Some(key.clone()));
239264 return Ok(Step::Value(out)); // may be empty
240265 }
241241- Err(other) => return Err(other),
266266+ Some(WalkItem::MissingSubtree { .. }) => {
267267+ let trailing = self.find_trailing_edge()?;
268268+ self.trailing_key = Some(trailing);
269269+ return Ok(Step::Value(out)); // may be empty
270270+ }
271271+ None => break,
242272 }
243273 }
244274 if out.is_empty() {
275275+ self.trailing_key = Some(None);
245276 Ok(Step::End(None))
246277 } else {
247278 Ok(Step::Value(out))
···268299 MissingRoot,
269300 #[error("Storage error")]
270301 StorageError(#[from] DiskError),
302302+ #[error("Unexpected missing block: {0:?}")]
303303+ MissingBlock(Cid),
271304 #[error("Tried to send on a closed channel")]
272305 ChannelSendError,
273306 #[error("Failed to join a task: {0}")]
···284317 pub async fn finish_loading(
285318 mut self,
286319 mut store: DiskStore,
287287- ) -> Result<(Commit, Option<Rkey>, DiskDriver), DriveError> {
320320+ ) -> Result<(Commit, Option<RepoPath>, DiskDriver), DriveError> {
288321 store = tokio::task::spawn(async move {
289322 let kvs = self
290323 .blocks
···391424}
392425393426impl DiskDriver {
394394- /// Walk the MST returning up to `n` rkey + record pairs
427427+ /// Walk the MST returning up to `n` key + record pairs
395428 ///
396429 /// ```no_run
397430 /// # use repo_stream::{drive::{DiskDriver, DriveError, _get_fake_disk_driver}, Step, noop};
···400433 /// # let mut disk_driver = _get_fake_disk_driver();
401434 /// while let Step::Value(outputs) = disk_driver.next_chunk(256).await? {
402435 /// for output in outputs {
403403- /// println!("{}: size={}", output.rkey, output.data.len());
436436+ /// println!("{}: size={}", output.key, output.data.len());
404437 /// }
405438 /// }
406439 /// # Ok(())
···416449 let mut out = Vec::with_capacity(n);
417450418451 for _ in 0..n {
419419- let step = match state.walker.disk_step(&state.store, process) {
420420- Ok(s) => s,
421421- Err(e) => {
422422- return (state, Err(e.into()));
452452+ match state.walker.disk_step(&state.store, process) {
453453+ Err(e) => return (state, Err(e.into())),
454454+ Ok(Some(WalkItem::Record(output))) => out.push(output),
455455+ Ok(Some(WalkItem::MissingRecord { cid, .. }))
456456+ | Ok(Some(WalkItem::MissingSubtree { cid })) => {
457457+ return (state, Err(DriveError::MissingBlock(cid)));
423458 }
424424- };
425425- let Step::Value(output) = step else {
426426- break;
427427- };
428428- out.push(output);
459459+ Ok(None) => break,
460460+ }
429461 }
430462431463 (state, Ok::<_, DriveError>(out))
···454486 let mut out: BlockChunk = Vec::with_capacity(n);
455487456488 for _ in 0..n {
457457- let step = match walker.disk_step(store, self.process) {
458458- Ok(s) => s,
489489+ match walker.disk_step(store, self.process) {
459490 Err(e) => return tx.blocking_send(Err(e.into())),
460460- };
461461-462462- let Step::Value(output) = step else {
463463- break;
464464- };
465465- out.push(output);
491491+ Ok(Some(WalkItem::Record(output))) => out.push(output),
492492+ Ok(Some(WalkItem::MissingRecord { cid, .. }))
493493+ | Ok(Some(WalkItem::MissingSubtree { cid })) => {
494494+ return tx.blocking_send(Err(DriveError::MissingBlock(cid)));
495495+ }
496496+ Ok(None) => break,
497497+ }
466498 }
467499468500 if out.is_empty() {
···486518 /// let outputs = recvd?;
487519 /// let Step::Value(outputs) = outputs else { break; };
488520 /// for output in outputs {
489489- /// println!("{}: size={}", output.rkey, output.data.len());
521521+ /// println!("{}: size={}", output.key, output.data.len());
490522 /// }
491523 ///
492524 /// }
+3-3
src/lib.rs
···99transformed into a smaller representation to save memory.
10101111Once blocks are loaded, the MST is walked and emitted as chunks of
1212-`(rkey, cid, processed_block)` records in left-to-right order.
1212+`(key, cid, processed_block)` records in left-to-right order.
13131414Some MST validations are applied:
1515- Keys must appear in order
···8686pub use drive::{DriveError, DriverBuilder, LoadError, MemCar, PartialCar};
8787pub use link::NodeThing;
8888pub use mst::Commit;
8989-pub use walk::{Output, Step, WalkError};
8989+pub use walk::{Output, Step, WalkError, WalkItem};
90909191pub type Bytes = Vec<u8>;
92929393-pub type Rkey = String;
9393+pub type RepoPath = String;
94949595#[cfg(feature = "hashbrown")]
9696pub(crate) use hashbrown::HashMap;