···99static GLOBAL: MiMalloc = MiMalloc;
10101111use clap::Parser;
1212-use repo_stream::{DiskBuilder, Driver, DriverBuilder, Step};
1212+use repo_stream::{DiskBuilder, DriverBuilder, LoadError, Step};
1313use std::path::PathBuf;
1414use std::time::Instant;
1515···37373838 // in this example we only bother handling CARs that are too big for memory
3939 // `noop` helper means: do no block processing, store the raw blocks
4040- let driver = match DriverBuilder::new()
4040+ let partial = match DriverBuilder::new()
4141 .with_mem_limit_mb(32) // how much memory can be used before disk spill
4242 .load_car(reader)
4343- .await?
4343+ .await
4444 {
4545- Driver::Memory(_, _, _) => panic!("try this on a bigger car"),
4646- Driver::Disk(big_stuff) => {
4747- // we reach here if the repo was too big and needs to be spilled to
4848- // disk to continue
4949-5050- // set up a disk store we can spill to
5151- let disk_store = DiskBuilder::new().open(tmpfile).await?;
5252-5353- // do the spilling, get back a (similar) driver
5454- let (commit, _, driver) = big_stuff.finish_loading(disk_store).await?;
4545+ Ok(_mem_car) => panic!("try this on a bigger car"),
4646+ Err(LoadError::MemoryLimitReached(partial)) => partial,
4747+ Err(e) => return Err(e.into()),
4848+ };
55495656- // at this point you might want to fetch the account's signing key
5757- // via the DID from the commit, and then verify the signature.
5858- log::warn!("big's comit ({:?}): {:?}", t0.elapsed(), commit);
5050+ // set up a disk store we can spill to
5151+ let disk_store = DiskBuilder::new().open(tmpfile).await?;
59526060- // log::info!("now is good time to check mem usage...");
6161- // tokio::time::sleep(std::time::Duration::from_secs(15)).await;
5353+ // do the spilling, get back a disk driver
5454+ let (commit, _, driver) = partial.finish_loading(disk_store).await?;
62556363- // pop the driver back out to get some code indentation relief
6464- driver
6565- }
6666- };
5656+ // at this point you might want to fetch the account's signing key
5757+ // via the DID from the commit, and then verify the signature.
5858+ log::warn!("big's commit ({:?}): {:?}", t0.elapsed(), commit);
67596860 // collect some random stats about the blocks
6961 let mut n = 0;
···11-/*!
22-Read a CAR slice in memory and show some info about it.
33-*/
44-55-extern crate repo_stream;
66-use repo_stream::{Driver, DriverBuilder};
77-88-type Result<T> = std::result::Result<T, Box<dyn std::error::Error>>;
99-1010-#[tokio::main]
1111-async fn main() -> Result<()> {
1212- env_logger::init();
1313- let reader = tokio::io::BufReader::new(tokio::io::stdin());
1414-1515- let (commit, driver) = match DriverBuilder::new()
1616- .with_block_processor(|block| block.len().to_ne_bytes().to_vec())
1717- .load_car(reader)
1818- .await?
1919- {
2020- Driver::Memory(commit, _, mem_driver) => (commit, mem_driver),
2121- Driver::Disk(_) => panic!("this example doesn't handle big CARs"),
2222- };
2323-2424- println!(
2525- "\nthis slice is from {}, repo rev {}\n\n",
2626- commit.did, commit.rev
2727- );
2828-2929- driver.viz(commit.data)?;
3030-3131- Ok(())
3232-}
+7-16
examples/read-file/main.rs
···4455extern crate repo_stream;
66use clap::Parser;
77-use repo_stream::{Driver, DriverBuilder, Output, Step};
77+use repo_stream::{DriverBuilder, Output, Step};
88use std::path::PathBuf;
991010type Result<T> = std::result::Result<T, Box<dyn std::error::Error>>;
···2323 let reader = tokio::fs::File::open(file).await?;
2424 let reader = tokio::io::BufReader::new(reader);
25252626- let (commit, mut driver) = match DriverBuilder::new()
2626+ let mut mem_car = DriverBuilder::new()
2727 .with_mem_limit_mb(1000)
2828 .with_block_processor(|block| block.len().to_ne_bytes().to_vec())
2929 .load_car(reader)
3030- .await?
3131- {
3232- Driver::Memory(commit, _, mem_driver) => (commit, mem_driver),
3333- Driver::Disk(_) => panic!("this example doesn't handle big CARs"),
3434- };
3030+ .await?;
35313636- log::info!("got commit: {commit:?}");
3232+ log::info!("got commit: {:?}", mem_car.commit);
37333838- while let Step::Value(records) = driver.next_chunk(256).await? {
3939- for Output { rkey, cid, data } in records {
4040- // let size = usize::from_ne_bytes(data.try_into().unwrap());
4141- // print!("0x");
4242- // for byte in cid.to_bytes() {
4343- // print!("{byte:>02x}");
4444- // }
4545- // println!(": {rkey} => record of len {}", size);
3434+ while let Step::Value(records) = mem_car.next_chunk(256)? {
3535+ for Output { rkey: _, cid: _, data: _ } in records {
3636+ // process records
4637 }
4738 }
4839
+9-8
examples/read-slice/main.rs
···33*/
4455extern crate repo_stream;
66-use repo_stream::{Driver, DriverBuilder, Output, Step};
66+use repo_stream::{DriverBuilder, LoadError, Output, Step};
7788type Result<T> = std::result::Result<T, Box<dyn std::error::Error>>;
99···1212 env_logger::init();
1313 let reader = tokio::io::BufReader::new(tokio::io::stdin());
14141515- let (commit, prev_rkey, mut driver) = match DriverBuilder::new()
1515+ let mut mem_car = match DriverBuilder::new()
1616 .with_block_processor(|block| block.len().to_ne_bytes().to_vec())
1717 .load_car(reader)
1818- .await?
1818+ .await
1919 {
2020- Driver::Memory(commit, prev, mem_driver) => (commit, prev, mem_driver),
2121- Driver::Disk(_) => panic!("this example doesn't handle big CARs"),
2020+ Ok(mc) => mc,
2121+ Err(LoadError::MemoryLimitReached(_)) => panic!("this example doesn't handle big CARs"),
2222+ Err(e) => return Err(e.into()),
2223 };
23242425 println!(
2526 "\nthis slice is from {}, repo rev {}",
2626- commit.did, commit.rev
2727+ mem_car.commit.did, mem_car.commit.rev
2728 );
2828- if let Some(rkey) = prev_rkey {
2929+ if let Some(rkey) = &mem_car.prev_rkey {
2930 println!(" -> key immediately before CAR slice: {rkey}");
3031 } else {
3132 println!(
···35363637 println!("included records:");
3738 let end = loop {
3838- match driver.next_chunk(256).await? {
3939+ match mem_car.next_chunk(256)? {
3940 Step::Value(chunk) => {
4041 for Output { cid, rkey, .. } in chunk {
4142 print!(" SHA256 ");
+103
hacking.md
···11+things are changing a lot right now, but hopefully this file won't get too out of date.
22+33+the latest published repo-stream release works well but it turns out there's still a lot left to do
44+55+66+### memory limit
77+88+the last release had a kind of singular concept of a "memory limit", after which it would refuse to process in-memory and give you the partial state to finish dealing with using the disk driver.
99+1010+the idea for sending the state back to the user was that disk resources might be something they'd want to constrain, while allowing high concurrency of in-memory processing (eg., for large network backfills)
1111+1212+one problem is that the high cost of disk spilling leads to high memory limits. queuing partial work, loaded to the max mem limit, can end up occupying a lot of memory! we probably actually need to pull the concept of a "disk worker" back to something like a "high-resource worker":
1313+1414+- lower mem limit for normal in-memory processing
1515+- queue lower-limit partial state for an available high-resource worker
1616+- high resource worker might continue trying to process in-memory to a higher limit before disk spilling
1717+1818+this should keep things a little more under control without giving up higher-memory in-memory performance -- generally everything should behave more predictably hopefully.
1919+2020+2121+### disk spilling
2222+2323+the switch to fjall was a major performance boost compared to sqlite, but it might not be the ultimate best fit for this
2424+2525+- its WAL can't be turned off. we don't need a WAL. WAL writes go at least into the OS page cache, and while they might never hit disk they still use resources. if the page cache needs to evict, (high overall memory utilization, what we are designing for!), then this could suddenly increase IO, slowing down the high-resource worker, increasing contention over disk bandwidth, and using more disk space.
2626+- its memory isn't super well under control. the amount it actually uses is currently higher than what the user configures with repo-stream (an internal repo-stream problem mostly, not fully fjall's problem) -- some of its impressive performance is probably due to this.
2727+- it launches background workers for compaction etc (extra resource usage, maybe some unfair perf vs sqlite comes from here too)
2828+- it opens a lot of files. if we keep fjall, we should make a global database instance and have individual workers create and drop keyspaces in it, instead of opening many fjall dbs and making the user's app hit ulimit.
2929+3030+i'm interested in seeing whether using fjall's LSM-Tree storage engine directly might help address all of these points.
3131+3232+other storage engines have been tested (redb, microsoft's new neat one, candystore, heed, cask) and so far fjall and sqlite have kept the best balance of controllable resource usage and performance. but i'm still interested in new ones to try.
3333+3434+new ones to try:
3535+ - https://github.com/arthurprs/canopydb: B+ tree so not holding my breath but let's see
3636+3737+sekoia has some nice ideas for a custom storage engine for repo-stream: that's what we'll ultimately switch to most likely!
3838+3939+4040+### partial CAR files
4141+4242+this is the big one currently: repo-stream originally assumed it was working with full CAR exports (every MST link present), but that's not the case for CAR slices (from `com.atproto.sync.getRecord`) or firehose commits (`com.atproto.sync.subscribeRepos` contains a spars trees), and it won't be the case in the future for the sync1.1 collection-subset repo export.
4343+4444+my original attempt focused too closely on CAR slices for `getRecord`, making annoying assumptions that limited it. instead we really just need richer APIs. the `getRecord` case and a collection-subset case could both be served by a range iterator, where getRecord would just tighten the bounds to one exact key. more below.
4545+4646+4747+### (de)serialization
4848+4949+there is a custom MST node deserializer right now which tries to parse the node directly into local data structures. it might have been a very small perf win, but annoyingly it means we lost *serialization* functionality.
5050+5151+we could (maybe should?) implement a custom serializer. or we could just go back to the original `derive` impl so we get it back for free.
5252+5353+(i had been thinking that the custom derive would eventually lead to a custom CBOR binary parser specialized for MST nodes -- i really don't see why not since the subset we need to handle is very small. but anyway.)
5454+5555+it turns out there are use-cases for emitting not just records but MST nodes as well from repo-stream: for example, to build a converter to STAR formats.
5656+5757+so we need to at least have a proper `Node` type we can emit, and ideally that thing derives `serde::Serialize`.
5858+5959+6060+### iroh-car
6161+6262+iroh-car is good but annoyingly async. since storage engines in rust are mostly sync, it makes a bit of friction. wrapping its async calls in a blocking executor might be ok but kind of annoying. also most projects will probably wind up using it in an overal async context.
6363+6464+i want to fork iroh-car and refactor it to a sans-io core, with sync/async wrapping interfaces.
6565+6666+6767+### richer apis
6868+6969+the apis kind of go out in a few dimensions
7070+7171+- output MST nodes or not
7272+- output record contents or just keys and CIDs
7373+- chunked APIs or individual
7474+- failure on missing blocks or Optional output values
7575+7676+feels like there should be
7777+7878+low-level:
7979+8080+- iterate all blocks forward, optional everything
8181+- seek: skip to some part of the tree
8282+8383+for now leaving reverse iteration out for reconsideration if a use-case arises
8484+8585+higher-level
8686+8787+- function to iterate all records, expecting them to all be there (output: (key, cid, contents))
8888+- function to iterate over a range of bounds
8989+- function to get a specific key
9090+- function to iterate over a prefix (with validation of proven correct start/stop bounds?)
9191+9292+9393+### MST validity
9494+9595+- maximum number of entries should be 200 (see previous work with Sekoia)
9696+- maximum number entries of a two-level subtree should be 800 or whatever (get real number, again prev work)
9797+9898+we should also try to make a standards push to get those limits explicitly stated in the spec, to avoid hurting interop.
9999+100100+101101+### processor function
102102+103103+TODO: describe
+17-10
readme.md
···1111[sponsor-badge]: https://img.shields.io/badge/at-microcosm-b820f9?labelColor=b820f9&logo=githubsponsors&logoColor=fff
12121313```rust no_run
1414-use repo_stream::{Driver, DriverBuilder, DriveError, DiskBuilder, Output, Step};
1414+use repo_stream::{DriverBuilder, LoadError, DiskBuilder, Output, Step};
15151616#[tokio::main]
1717async fn main() -> Result<(), Box<dyn std::error::Error>> {
···2727 .with_block_processor( // block processing: just extract the raw record size
2828 |rec| rec.len().to_ne_bytes().to_vec())
2929 .load_car(reader)
3030- .await?
3030+ .await
3131 {
3232-3332 // if all blocks fit within memory
3434- Driver::Memory(_commit, _prev_rkey, mut driver) => {
3535- while let Step::Value(chunk) = driver.next_chunk(256).await? {
3333+ Ok(mut mem_car) => {
3434+ while let Step::Value(chunk) = mem_car.next_chunk(256)? {
3635 for Output { rkey: _, cid: _, data } in chunk {
3737- let size = usize::from_ne_bytes(data.try_into().unwrap());
3636+ let size = usize::from_ne_bytes(<[u8; 8]>::try_from(data).unwrap());
3837 total_size += size;
3938 }
4039 }
4140 },
42414342 // if the CAR was too big for in-memory processing
4444- Driver::Disk(paused) => {
4343+ Err(LoadError::MemoryLimitReached(partial)) => {
4544 // set up a disk store we can spill to
4645 let store = DiskBuilder::new().open("some/path.db".into()).await?;
4747- // do the spilling, get back a (similar) driver
4848- let (_commit, _prev_rkey, mut driver) = paused.finish_loading(store).await?;
4646+ // do the spilling, get back a disk driver
4747+ let (_commit, _prev_rkey, mut driver) = partial.finish_loading(store).await?;
49485049 while let Step::Value(chunk) = driver.next_chunk(256).await? {
5150 for Output { rkey: _, cid: _, data } in chunk {
5252- let size = usize::from_ne_bytes(data.try_into().unwrap());
5151+ let size = usize::from_ne_bytes(<[u8; 8]>::try_from(data).unwrap());
5352 total_size += size;
5453 }
5554 }
5655 }
5656+5757+ Err(e) => return Err(e.into()),
5758 };
5859 println!("sum of size of all records: {total_size}");
5960 Ok(())
···7172- [ ] since the disk k/v get/set interface is now so similar to HashMap (blocking, no transactions,), it's probably possible to make a single `Driver` and move the thread stuff from the disk one to generic helper functions. (might create async footguns though)
7273- [ ] fork iroh-car into a sync version so we can drop tokio as a hard requirement, and offer async via wrapper helper things
7374- [ ] feature-flag the sha2 crate for hmac-sha256? if someone wanted fewer deps?? then maybe make `hashbrown` also optional vs builtin hashmap?
7575+7676+7777+## contributing
7878+7979+see ['./hacking.md'](./hacking.md)
8080+74817582-----
7683
+85
src/block.rs
···11+use crate::{Bytes, mst::MstNode};
22+33+#[derive(Debug, Clone)]
44+pub enum MaybeProcessedBlock {
55+ /// A block that's *probably* a Node (but we can't know yet)
66+ ///
77+ /// It *can be* a record that suspiciously looks a lot like a node, so we
88+ /// cannot eagerly turn it into a Node. We only know for sure what it is
99+ /// when we actually walk down the MST
1010+ Raw(Bytes),
1111+ /// A processed record from a block that was definitely not a Node
1212+ ///
1313+ /// Processing has to be fallible because the CAR can have totally-unused
1414+ /// blocks, which can just be garbage. since we're eagerly trying to process
1515+ /// record blocks without knowing for sure that they *are* records, we
1616+ /// discard any definitely-not-nodes that fail processing and keep their
1717+ /// error in the buffer for them. if we later try to retreive them as a
1818+ /// record, then we can surface the error.
1919+ ///
2020+ /// If we _never_ needed this block, then we may have wasted a bit of effort
2121+ /// trying to process it. Oh well.
2222+ ///
2323+ /// There's an alternative here, which would be to kick unprocessable blocks
2424+ /// back to Raw, or maybe even a new RawUnprocessable variant. Then we could
2525+ /// surface the typed error later if needed by trying to reprocess.
2626+ Processed(Bytes),
2727+}
2828+2929+impl MaybeProcessedBlock {
3030+ pub fn to_node(&self) -> Option<MstNode> {
3131+ let Self::Raw(bytes) = self else {
3232+ return None;
3333+ };
3434+ serde_ipld_dagcbor::from_slice(bytes).ok()
3535+ }
3636+ pub fn unknown_depth(&self) -> bool {
3737+ let Self::Raw(bytes) = self else {
3838+ return false;
3939+ };
4040+ let Ok(node) = serde_ipld_dagcbor::from_slice::<MstNode>(bytes) else {
4141+ return false;
4242+ };
4343+ node.depth.is_none()
4444+ }
4545+ pub(crate) fn maybe(process: fn(Bytes) -> Bytes, data: Bytes) -> Self {
4646+ if MstNode::could_be(&data) {
4747+ MaybeProcessedBlock::Raw(data)
4848+ } else {
4949+ MaybeProcessedBlock::Processed(process(data))
5050+ }
5151+ }
5252+ pub(crate) fn len(&self) -> usize {
5353+ match self {
5454+ MaybeProcessedBlock::Raw(b) => b.len(),
5555+ MaybeProcessedBlock::Processed(b) => b.len(),
5656+ }
5757+ }
5858+ pub(crate) fn into_bytes(self) -> Bytes {
5959+ match self {
6060+ MaybeProcessedBlock::Raw(mut b) => {
6161+ b.push(0x00);
6262+ b
6363+ }
6464+ MaybeProcessedBlock::Processed(mut b) => {
6565+ b.push(0x01);
6666+ b
6767+ }
6868+ }
6969+ }
7070+ pub(crate) fn from_bytes(mut b: Bytes) -> Self {
7171+ // TODO: make sure bytes is not empty, that it's explicitly 0 or 1, etc
7272+ let suffix = b.pop().unwrap();
7373+ if suffix == 0x00 {
7474+ MaybeProcessedBlock::Raw(b)
7575+ } else {
7676+ MaybeProcessedBlock::Processed(b)
7777+ }
7878+ }
7979+}
8080+8181+/// Processor that just returns the raw blocks
8282+#[inline]
8383+pub fn noop(block: Bytes) -> Bytes {
8484+ block
8585+}
+163-271
src/drive.rs
···33use crate::link::{NodeThing, ObjectLink, ThingKind};
44use crate::{
55 Bytes, HashMap, Rkey, Step,
66+ block::{MaybeProcessedBlock, noop},
67 disk::{DiskError, DiskStore},
78 mst::MstNode,
89 walk::{MstError, Output},
···14151516use crate::mst::Commit;
1617use crate::walk::{WalkError, Walker};
1818+use thiserror::Error;
17191818-/// Errors that can happen while consuming and emitting blocks and records
1919-#[derive(Debug, thiserror::Error)]
2020-pub enum DriveError {
2121- #[error("Error from iroh_car: {0}")]
2020+/// An in-order chunk of Rkey + CID + (processed) Block
2121+pub type BlockChunk = Vec<Output>;
2222+2323+/// Errors that can occur while loading a CAR into memory
2424+#[derive(Debug, Error)]
2525+pub enum LoadError<R: AsyncRead + Unpin> {
2626+ #[error("failed reading CAR: {0}")]
2227 CarReader(#[from] iroh_car::Error),
2323- #[error("Failed to decode commit block: {0}")]
2828+ #[error("failed to decode cbor: {0}")]
2429 BadBlock(#[from] serde_ipld_dagcbor::DecodeError<Infallible>),
2525- #[error("The Commit block reference by the root was not found")]
3030+ #[error("missing commit")]
2631 MissingCommit,
2727- #[error("Failed to walk the mst tree: {0}")]
2828- WalkError(#[from] WalkError),
2929- #[error("CAR file had no roots")]
3232+ #[error("missing mst root node")]
3033 MissingRoot,
3131- #[error("Storage error")]
3232- StorageError(#[from] DiskError),
3333- #[error("Tried to send on a closed channel")]
3434- ChannelSendError, // SendError takes <T> which we don't need
3535- #[error("Failed to join a task: {0}")]
3636- JoinError(#[from] tokio::task::JoinError),
3737-}
3838-3939-impl From<MstError> for DriveError {
4040- fn from(me: MstError) -> DriveError {
4141- DriveError::WalkError(WalkError::MstError(me))
4242- }
4343-}
4444-4545-/// An in-order chunk of Rkey + CID + (processed) Block
4646-pub type BlockChunk = Vec<Output>;
4747-4848-#[derive(Debug, Clone)]
4949-pub enum MaybeProcessedBlock {
5050- /// A block that's *probably* a Node (but we can't know yet)
3434+ #[error("failed to walk mst: {0}")]
3535+ WalkError(#[from] WalkError),
3636+ /// The memory limit was reached before all blocks were loaded.
5137 ///
5252- /// It *can be* a record that suspiciously looks a lot like a node, so we
5353- /// cannot eagerly turn it into a Node. We only know for sure what it is
5454- /// when we actually walk down the MST
5555- Raw(Bytes),
5656- /// A processed record from a block that was definitely not a Node
5757- ///
5858- /// Processing has to be fallible because the CAR can have totally-unused
5959- /// blocks, which can just be garbage. since we're eagerly trying to process
6060- /// record blocks without knowing for sure that they *are* records, we
6161- /// discard any definitely-not-nodes that fail processing and keep their
6262- /// error in the buffer for them. if we later try to retreive them as a
6363- /// record, then we can surface the error.
6464- ///
6565- /// If we _never_ needed this block, then we may have wasted a bit of effort
6666- /// trying to process it. Oh well.
6767- ///
6868- /// There's an alternative here, which would be to kick unprocessable blocks
6969- /// back to Raw, or maybe even a new RawUnprocessable variant. Then we could
7070- /// surface the typed error later if needed by trying to reprocess.
7171- Processed(Bytes),
3838+ /// The partial state is returned so the caller can decide what to do
3939+ /// (e.g. resume with disk storage via `PartialCar::finish_loading`).
4040+ #[error("partially loaded car")]
4141+ MemoryLimitReached(PartialCar<R>),
7242}
73437474-impl MaybeProcessedBlock {
7575- pub fn to_node(&self) -> Option<MstNode> {
7676- let Self::Raw(bytes) = self else {
7777- return None;
7878- };
7979- serde_ipld_dagcbor::from_slice(bytes).ok()
8080- }
8181- pub fn unknown_depth(&self) -> bool {
8282- let Self::Raw(bytes) = self else {
8383- return false;
8484- };
8585- let Ok(node) = serde_ipld_dagcbor::from_slice::<MstNode>(bytes) else {
8686- return false;
8787- };
8888- node.depth.is_none()
8989- }
9090- pub(crate) fn maybe(process: fn(Bytes) -> Bytes, data: Bytes) -> Self {
9191- if MstNode::could_be(&data) {
9292- MaybeProcessedBlock::Raw(data)
9393- } else {
9494- MaybeProcessedBlock::Processed(process(data))
9595- }
9696- }
9797- pub(crate) fn len(&self) -> usize {
9898- match self {
9999- MaybeProcessedBlock::Raw(b) => b.len(),
100100- MaybeProcessedBlock::Processed(b) => b.len(),
101101- }
102102- }
103103- pub(crate) fn into_bytes(self) -> Bytes {
104104- match self {
105105- MaybeProcessedBlock::Raw(mut b) => {
106106- b.push(0x00);
107107- b
108108- }
109109- MaybeProcessedBlock::Processed(mut b) => {
110110- b.push(0x01);
111111- b
112112- }
113113- }
114114- }
115115- pub(crate) fn from_bytes(mut b: Bytes) -> Self {
116116- // TODO: make sure bytes is not empty, that it's explicitly 0 or 1, etc
117117- let suffix = b.pop().unwrap();
118118- if suffix == 0x00 {
119119- MaybeProcessedBlock::Raw(b)
120120- } else {
121121- MaybeProcessedBlock::Processed(b)
122122- }
123123- }
124124-}
12544126126-/// Read a CAR file, buffering blocks in memory or to disk
127127-pub enum Driver<R: AsyncRead + Unpin> {
128128- /// All blocks fit within the memory limit
129129- ///
130130- /// You probably want to check the commit's signature. You can go ahead and
131131- /// walk the MST right away.
132132- Memory(Commit, Option<Rkey>, MemDriver),
133133- /// Blocks exceed the memory limit
134134- ///
135135- /// You'll need to provide a disk storage to continue. The commit will be
136136- /// returned and can be validated only once all blocks are loaded.
137137- Disk(NeedDisk<R>),
138138-}
139139-140140-/// Processor that just returns the raw blocks
141141-#[inline]
142142-pub fn noop(block: Bytes) -> Bytes {
143143- block
4545+/// A partially memory-loaded CAR file that hit the memory limit mid-stream.
4646+///
4747+/// Can be resumed with disk storage via `finish_loading`, or discarded.
4848+#[derive(Debug)]
4949+pub struct PartialCar<R: AsyncRead + Unpin> {
5050+ pub(crate) car: CarReader<R>,
5151+ pub(crate) root: Cid,
5252+ pub(crate) process: fn(Bytes) -> Bytes,
5353+ pub(crate) max_size: usize,
5454+ pub(crate) blocks: HashMap<ObjectLink, MaybeProcessedBlock>,
5555+ /// The commit block, if it was seen before the memory limit was reached
5656+ pub commit: Option<Commit>,
14457}
1455814659/// Builder-style driver setup
···15366impl Default for DriverBuilder {
15467 fn default() -> Self {
15568 Self {
156156- mem_limit_mb: 16,
6969+ mem_limit_mb: 10,
15770 block_processor: noop,
15871 }
15972 }
···16477 pub fn new() -> Self {
16578 Default::default()
16679 }
8080+16781 /// Set the in-memory size limit, in MiB
16882 ///
169169- /// Default: 16 MiB
8383+ /// Default: 10 MiB
17084 pub fn with_mem_limit_mb(mut self, new_limit: usize) -> Self {
17185 self.mem_limit_mb = new_limit;
17286 self
···17589 /// Set the block processor
17690 ///
17791 /// Default: noop, raw blocks will be emitted
178178- pub fn with_block_processor(mut self, new_processor: fn(Bytes) -> Bytes) -> DriverBuilder {
9292+ pub fn with_block_processor(mut self, new_processor: fn(Bytes) -> Bytes) -> Self {
17993 self.block_processor = new_processor;
18094 self
18195 }
18296183183- /// Begin processing an atproto MST from a CAR file
184184- pub async fn load_car<R: AsyncRead + Unpin>(&self, reader: R) -> Result<Driver<R>, DriveError> {
185185- Driver::load_car(reader, self.block_processor, self.mem_limit_mb).await
186186- }
187187-}
188188-189189-impl<R: AsyncRead + Unpin> Driver<R> {
190190-191191- /// Begin processing an atproto MST from a CAR file
192192- ///
193193- /// Blocks will be loaded, processed, and buffered in memory. If the entire
194194- /// processed size is under the `mem_limit_mb` limit, a `Driver::Memory`
195195- /// will be returned along with a `Commit` ready for validation.
9797+ /// Load an atproto repository CAR into memory.
19698 ///
197197- /// If the `mem_limit_mb` limit is reached before loading all blocks, the
198198- /// partial state will be returned as `Driver::Disk(needed)`, which can be
199199- /// resumed by providing a `SqliteStorage` for on-disk block storage.
200200- pub async fn load_car(
9999+ /// Returns a `MemCar` ready for walking. If the blocks exceed the memory
100100+ /// limit, returns `Err(LoadError::MemoryLimitReached(partial))` containing
101101+ /// the partial state, which can be resumed with disk storage.
102102+ pub async fn load_car<R: AsyncRead + Unpin>(
103103+ &self,
201104 reader: R,
202202- process: fn(Bytes) -> Bytes,
203203- mem_limit_mb: usize,
204204- ) -> Result<Driver<R>, DriveError> {
205205- let mut block_count = 0;
105105+ ) -> Result<MemCar, LoadError<R>> {
106106+ load_car(reader, self.block_processor, self.mem_limit_mb).await
107107+ }
108108+}
206109207207- let max_size = mem_limit_mb * 2_usize.pow(20);
208208- let mut mem_blocks = HashMap::new();
110110+async fn load_car<R: AsyncRead + Unpin>(
111111+ reader: R,
112112+ process: fn(Bytes) -> Bytes,
113113+ mem_limit_mb: usize,
114114+) -> Result<MemCar, LoadError<R>> {
115115+ let mut block_count = 0;
209116210210- let mut car = CarReader::new(reader).await?;
117117+ let max_size = mem_limit_mb * 2_usize.pow(20);
118118+ let mut mem_blocks = HashMap::new();
211119212212- let roots = car.header().roots();
213213- assert_eq!(roots.len(), 1);
120120+ let mut car = CarReader::new(reader).await?;
214121215215- let root = *roots.first().ok_or(DriveError::MissingRoot)?;
216216- log::debug!("root: {root:?}");
122122+ let roots = car.header().roots();
123123+ assert_eq!(roots.len(), 1);
217124218218- let mut commit = None;
125125+ let root = *roots.first().ok_or(LoadError::MissingRoot)?;
126126+ log::debug!("root: {root:?}");
219127220220- // try to load all the blocks into memory
221221- let mut mem_size = 0;
222222- while let Some((cid, data)) = car.next_block().await? {
223223- block_count += 1;
224224- // the root commit is a Special Third Kind of block that we need to make
225225- // sure not to optimistically send to the processing function
226226- if cid == root {
227227- let c: Commit = serde_ipld_dagcbor::from_slice(&data)?;
228228- commit = Some(c);
229229- continue;
230230- }
128128+ let mut commit = None;
231129232232- // remaining possible types: node, record, other. optimistically process
233233- let maybe_processed = MaybeProcessedBlock::maybe(process, data);
130130+ let mut mem_size = 0;
131131+ while let Some((cid, data)) = car.next_block().await? {
132132+ block_count += 1;
133133+ // The root commit block is handled separately — never passed to the processor
134134+ if cid == root {
135135+ let c: Commit = serde_ipld_dagcbor::from_slice(&data)?;
136136+ commit = Some(c);
137137+ continue;
138138+ }
234139235235- // stash (maybe processed) blocks in memory as long as we have room
236236- mem_size += maybe_processed.len();
237237- mem_blocks.insert(cid.into(), maybe_processed);
238238- if mem_size >= max_size {
239239- log::debug!("blocks loaded before disk needed: {block_count}");
140140+ let maybe_processed = MaybeProcessedBlock::maybe(process, data);
240141241241- return Ok(Driver::Disk(NeedDisk {
242242- car,
243243- root,
244244- process,
245245- max_size,
246246- mem_blocks,
247247- commit,
248248- }));
249249- }
142142+ mem_size += maybe_processed.len();
143143+ mem_blocks.insert(cid.into(), maybe_processed);
144144+ if mem_size >= max_size {
145145+ log::debug!("blocks loaded before memory limit: {block_count}");
146146+ return Err(LoadError::MemoryLimitReached(PartialCar {
147147+ car,
148148+ root,
149149+ process,
150150+ max_size,
151151+ blocks: mem_blocks,
152152+ commit,
153153+ }));
250154 }
155155+ }
251156252252- log::debug!("blocks: {block_count}");
157157+ log::debug!("blocks: {block_count}");
253158254254- // all blocks loaded and we fit in memory! hopefully we found the commit...
255255- let commit = commit.ok_or(DriveError::MissingCommit)?;
159159+ let commit = commit.ok_or(LoadError::MissingCommit)?;
256160257257- // the commit always must point to a Node; empty node => empty MST special case
258258- let root_node: MstNode = match mem_blocks
259259- .get(&commit.data)
260260- .ok_or(DriveError::MissingCommit)?
261261- {
262262- MaybeProcessedBlock::Processed(_) => Err(WalkError::BadCommitFingerprint)?,
263263- MaybeProcessedBlock::Raw(bytes) => serde_ipld_dagcbor::from_slice(bytes)?,
264264- };
265265- let mut walker = Walker::new(root_node);
161161+ let root_node: MstNode = match mem_blocks
162162+ .get(&commit.data)
163163+ .ok_or(LoadError::MissingCommit)?
164164+ {
165165+ MaybeProcessedBlock::Processed(_) => Err(WalkError::BadCommitFingerprint)?,
166166+ MaybeProcessedBlock::Raw(bytes) => serde_ipld_dagcbor::from_slice(bytes)?,
167167+ };
168168+ let mut walker = Walker::new(root_node);
266169267267- // eprintln!("going to edge...");
268268- let edge = walker.step_to_edge(&mem_blocks)?;
269269- // eprintln!("got edge? {edge:?}");
170170+ let prev_rkey = walker.step_to_edge(&mem_blocks)?;
270171271271- Ok(Driver::Memory(
272272- commit,
273273- edge,
274274- MemDriver {
275275- blocks: mem_blocks,
276276- walker,
277277- process,
278278- next_missing: None,
279279- },
280280- ))
281281- }
172172+ Ok(MemCar {
173173+ commit,
174174+ prev_rkey,
175175+ blocks: mem_blocks,
176176+ walker,
177177+ process,
178178+ next_missing: None,
179179+ })
282180}
283181284284-/// The core driver between the block stream and MST walker
285285-///
286286-/// In the future, PDSs will export CARs in a stream-friendly order that will
287287-/// enable processing them with tiny memory overhead. But that future is not
288288-/// here yet.
289289-///
290290-/// CARs are almost always in a stream-unfriendly order, so I'm reverting the
291291-/// optimistic stream features: we load all block first, then walk the MST.
292292-///
293293-/// This makes things much simpler: we only need to worry about spilling to disk
294294-/// in one place, and we always have a reasonable expecatation about how much
295295-/// work the init function will do. We can drop the CAR reader before walking,
296296-/// so the sync/async boundaries become a little easier to work around.
182182+/// A fully loaded in-memory CAR file, ready for MST walking.
297183#[derive(Debug)]
298298-pub struct MemDriver {
184184+pub struct MemCar {
185185+ pub commit: Commit,
186186+ /// For CAR slices: the rkey of the last record before this slice's leading edge.
187187+ /// `None` if this slice (or full CAR) starts from the leftmost record in the tree.
188188+ pub prev_rkey: Option<Rkey>,
299189 pub blocks: HashMap<ObjectLink, MaybeProcessedBlock>,
300190 walker: Walker,
301301- process: fn(Bytes) -> Bytes, // TODO: impl Fn(bytes) -> Bytes?
191191+ process: fn(Bytes) -> Bytes,
302192 next_missing: Option<NodeThing>,
303193}
304194305305-impl MemDriver {
306306- pub fn viz(&self, tree: ObjectLink) -> Result<(), WalkError> {
307307- self.walker.viz(&self.blocks, tree)
195195+impl MemCar {
196196+197197+ /// Seek forward to the first record at or after `target`.
198198+ ///
199199+ /// Uses the MST structure to skip entire subtrees efficiently.
200200+ /// After this returns, the next `next_chunk` call will start at or after `target`.
201201+ pub fn seek(&mut self, target: &str) -> Result<(), WalkError> {
202202+ self.walker.seek(target, &self.blocks)
203203+ }
204204+205205+ /// Get the next record
206206+ pub fn next(&mut self) -> Result<Option<Output>, WalkError> {
207207+ todo!()
308208 }
309309- /// Step through the record outputs, in rkey order
310310- pub async fn next_chunk(&mut self, n: usize) -> Result<Step<BlockChunk>, DriveError> {
209209+210210+ /// Iterate up to `n` records in rkey order.
211211+ ///
212212+ /// Returns `Step::Value(records)` while records remain, then `Step::End(next_rkey)`
213213+ /// where `next_rkey` is the first rkey after the slice (for CAR slices), or `None`.
214214+ pub fn next_chunk(&mut self, n: usize) -> Result<Step<BlockChunk>, WalkError> {
311215 if let Some(ref mut missing) = self.next_missing {
312216 while let Step::Value(sparse_out) =
313217 self.walker.step_sparse(&self.blocks, self.process)?
···319223 };
320224 }
321225 }
322322- // TODO: l asdflkja slfkja lkdfj lakjd f
323323- // TODO: make the walker finish walking to verify no more present blocks (oops sparse tree)
324324- // HACK: just get the last rkey if it's there -- i think we might actually need to walk for it though
325325- // ...and walk to verify rkey order of the rest of the nodes anyway?
326226 return Ok(match &missing.kind {
327227 ThingKind::ChildNode => Step::End(None),
328228 ThingKind::Record(rkey) => Step::End(Some(rkey.clone())),
329229 });
330230 }
331231 let mut out = Vec::with_capacity(n);
332332- // let mut err;
333232 for _ in 0..n {
334233 match self.walker.step(&self.blocks, self.process) {
335234 Ok(Step::Value(record)) => out.push(record),
336235 Ok(Step::End(None)) => break,
337337- Ok(Step::End(_)) => todo!("actually this should be unreachable?"),
236236+ Ok(Step::End(_)) => unreachable!(),
338237 Err(WalkError::MissingBlock(missing)) => {
339238 self.next_missing = Some(*missing);
340340- return Ok(Step::Value(out)); // nb: might be empty!
239239+ return Ok(Step::Value(out)); // may be empty
341240 }
342342- Err(other) => return Err(other.into()),
241241+ Err(other) => return Err(other),
343242 }
344243 }
345244 if out.is_empty() {
···350249 }
351250}
352251353353-/// A partially memory-loaded car file that needs disk spillover to continue
354354-pub struct NeedDisk<R: AsyncRead + Unpin> {
355355- car: CarReader<R>,
356356- root: Cid,
357357- process: fn(Bytes) -> Bytes,
358358- max_size: usize,
359359- mem_blocks: HashMap<ObjectLink, MaybeProcessedBlock>,
360360- pub commit: Option<Commit>,
252252+// ---------------------------------------------------------------------------
253253+// Disk path (kept for future wiring, not yet part of the primary API)
254254+// ---------------------------------------------------------------------------
255255+256256+/// Errors that can happen while consuming blocks via the disk path
257257+#[derive(Debug, thiserror::Error)]
258258+pub enum DriveError {
259259+ #[error("Error from iroh_car: {0}")]
260260+ CarReader(#[from] iroh_car::Error),
261261+ #[error("Failed to decode commit block: {0}")]
262262+ BadBlock(#[from] serde_ipld_dagcbor::DecodeError<Infallible>),
263263+ #[error("The Commit block reference by the root was not found")]
264264+ MissingCommit,
265265+ #[error("Failed to walk the mst tree: {0}")]
266266+ WalkError(#[from] WalkError),
267267+ #[error("CAR file had no roots")]
268268+ MissingRoot,
269269+ #[error("Storage error")]
270270+ StorageError(#[from] DiskError),
271271+ #[error("Tried to send on a closed channel")]
272272+ ChannelSendError,
273273+ #[error("Failed to join a task: {0}")]
274274+ JoinError(#[from] tokio::task::JoinError),
361275}
362276363363-impl<R: AsyncRead + Unpin> NeedDisk<R> {
277277+impl From<MstError> for DriveError {
278278+ fn from(me: MstError) -> DriveError {
279279+ DriveError::WalkError(WalkError::MstError(me))
280280+ }
281281+}
282282+283283+impl<R: AsyncRead + Unpin> PartialCar<R> {
364284 pub async fn finish_loading(
365285 mut self,
366286 mut store: DiskStore,
367287 ) -> Result<(Commit, Option<Rkey>, DiskDriver), DriveError> {
368368- // move store in and back out so we can manage lifetimes
369369- // dump mem blocks into the store
370288 store = tokio::task::spawn(async move {
371289 let kvs = self
372372- .mem_blocks
290290+ .blocks
373291 .into_iter()
374292 .map(|(k, v)| (k.to_bytes(), v.into_bytes()));
375293···388306 store.put_many(kvs)?;
389307 }
390308 Ok::<_, DriveError>(store)
391391- }); // await later
309309+ });
392310393393- // dump the rest to disk (in chunks)
394311 log::debug!("dumping the rest of the stream...");
395312 loop {
396313 let mut mem_size = 0;
···399316 let Some((cid, data)) = self.car.next_block().await? else {
400317 break;
401318 };
402402- // we still gotta keep checking for the root since we might not have it
403319 if cid == self.root {
404320 let c: Commit = serde_ipld_dagcbor::from_slice(&data)?;
405321 self.commit = Some(c);
···409325 let link = cid.into();
410326 let data = Bytes::from(data);
411327412412- // remaining possible types: node, record, other. optimistically process
413413- // TODO: get the actual in-memory size to compute disk spill
414328 let maybe_processed = MaybeProcessedBlock::maybe(self.process, data);
415329 mem_size += maybe_processed.len();
416330 chunk.push((link, maybe_processed));
417331 if mem_size >= (self.max_size / 2) {
418418- // soooooo if we're setting the db cache to max_size and then letting
419419- // multiple chunks in the queue that are >= max_size, then at any time
420420- // we might be using some multiple of max_size?
421332 break;
422333 }
423334 }
···498409 pub async fn next_chunk(&mut self, n: usize) -> Result<Step<Vec<Output>>, DriveError> {
499410 let process = self.process;
500411501501- // state should only *ever* be None transiently while inside here
502412 let mut state = self.state.take().expect("DiskDriver must have Some(state)");
503413504504- // the big pain here is that we don't want to leave self.state in an
505505- // invalid state (None), so all the error paths have to make sure it
506506- // comes out again.
507414 let (state, res) =
508415 tokio::task::spawn_blocking(move || -> (BigState, Result<BlockChunk, DriveError>) {
509416 let mut out = Vec::with_capacity(n);
510417511418 for _ in 0..n {
512512- // walk as far as we can until we run out of blocks or find a record
513419 let step = match state.walker.disk_step(&state.store, process) {
514420 Ok(s) => s,
515421 Err(e) => {
···524430525431 (state, Ok::<_, DriveError>(out))
526432 })
527527- .await?; // on tokio JoinError, we'll be left with invalid state :(
433433+ .await?;
528434529529- // *must* restore state before dealing with the actual result
530435 self.state = Some(state);
531436532437 let out = res?;
···549454 let mut out: BlockChunk = Vec::with_capacity(n);
550455551456 for _ in 0..n {
552552- // walk as far as we can until we run out of blocks or find a record
553553-554457 let step = match walker.disk_step(store, self.process) {
555458 Ok(s) => s,
556459 Err(e) => return tx.blocking_send(Err(e.into())),
···573476574477 /// Spawn the disk reading task into a tokio blocking thread
575478 ///
576576- /// The idea is to avoid so much sending back and forth to the blocking
577577- /// thread, letting a blocking task do all the disk reading work and sending
578578- /// records and rkeys back through an `mpsc` channel instead.
579579- ///
580580- /// This might also allow the disk work to continue while processing the
581581- /// records. It's still not yet clear if this method actually has much
582582- /// benefit over just using `.next_chunk(n)`.
583583- ///
584479 /// ```no_run
585480 /// # use repo_stream::{drive::{DiskDriver, DriveError, _get_fake_disk_driver}, Step, noop};
586481 /// # #[tokio::main]
···607502 ) {
608503 let (tx, rx) = mpsc::channel::<Result<Step<BlockChunk>, DriveError>>(1);
609504610610- // sketch: this worker is going to be allowed to execute without a join handle
611505 let chan_task = tokio::task::spawn_blocking(move || {
612506 if let Err(mpsc::error::SendError(_)) = self.read_tx_blocking(n, tx) {
613507 log::debug!("big car reader exited early due to dropped receiver channel");
···619513 }
620514621515 /// Reset the disk storage so it can be reused.
622622- ///
623623- /// The store is returned, so it can be reused for another `DiskDriver`.
624516 pub async fn reset_store(mut self) -> Result<DiskStore, DriveError> {
625517 let BigState { store, .. } = self.state.take().expect("valid state");
626518 store.reset().await?;
+48-50
src/lib.rs
···11/*!
22A robust CAR file -> MST walker for atproto
3344-Small CARs have their blocks buffered in memory. If a configurable memory limit
55-is reached while reading blocks, CAR reading is suspended, and can be continued
66-by providing disk storage to buffer the CAR blocks instead.
44+Blocks are buffered in memory up to a configurable limit (default 10 MiB).
55+If the limit is reached, `load_car` returns `Err(LoadError::MemoryLimitReached(partial))`
66+containing the partial state, which can later be resumed with disk storage.
7788-A `process` function can be provided for tasks where records are transformed
99-into a smaller representation, to save memory (and disk) during block reading.
88+A `block_processor` function can be provided for tasks where records are
99+transformed into a smaller representation to save memory.
10101111-Once blocks are loaded, the MST is walked and emitted as chunks of pairs of
1212-`(rkey, processed_block)` pairs, in order (depth first, left-to-right).
1111+Once blocks are loaded, the MST is walked and emitted as chunks of
1212+`(rkey, cid, processed_block)` records in left-to-right order.
13131414-Some MST validations are applied
1414+Some MST validations are applied:
1515- Keys must appear in order
1616- Keys must be at the correct MST tree depth
17171818`iroh_car` additionally applies a block size limit of `2MiB`.
19192020```
2121-use repo_stream::{Driver, DriverBuilder, DiskBuilder, Step};
2121+use repo_stream::{DriverBuilder, Step};
22222323# #[tokio::main]
2424# async fn main() -> Result<(), Box<dyn std::error::Error>> {
2525# let reader = include_bytes!("../car-samples/tiny.car").as_slice();
2626let mut total_size = 0;
27272828-match DriverBuilder::new()
2828+let mut mem_car = DriverBuilder::new()
2929 .with_mem_limit_mb(10)
3030- .with_block_processor(
3131- |rec| rec.len().to_ne_bytes().to_vec()
3232- ) // block processing: just extract the raw record size
3030+ .with_block_processor(|rec| rec.len().to_ne_bytes().to_vec())
3331 .load_car(reader)
3434- .await?
3535-{
3232+ .await?;
36333737- // if all blocks fit within memory
3838- Driver::Memory(_commit, _prev_rkey, mut driver) => {
3939- while let Step::Value(chunk) = driver.next_chunk(256).await? {
4040- for output in chunk {
4141- let size = usize::from_ne_bytes(output.data.try_into().unwrap());
4242-4343- total_size += size;
4444- }
4545- }
4646- },
3434+while let Step::Value(chunk) = mem_car.next_chunk(256)? {
3535+ for output in chunk {
3636+ let size = usize::from_ne_bytes(output.data.try_into().unwrap());
3737+ total_size += size;
3838+ }
3939+}
4040+println!("sum of size of all records: {total_size}");
4141+# Ok(())
4242+# }
4343+```
47444848- // if the CAR was too big for in-memory processing
4949- Driver::Disk(paused) => {
5050- // set up a disk store we can spill to
5151- let store = DiskBuilder::new().open("some/path.db".into()).await?;
5252- // do the spilling, get back a (similar) driver
5353- let (_commit, _prev_rkey, mut driver) = paused.finish_loading(store).await?;
4545+If the CAR is too large for memory, handle the `MemoryLimitReached` error:
54465555- while let Step::Value(chunk) = driver.next_chunk(256).await? {
5656- for output in chunk {
5757- let size = usize::from_ne_bytes(output.data.try_into().unwrap());
4747+```no_run
4848+use repo_stream::{DriverBuilder, LoadError, Step};
58495959- total_size += size;
6060- }
5050+# #[tokio::main]
5151+# async fn main() -> Result<(), Box<dyn std::error::Error>> {
5252+# let reader = tokio::io::stdin();
5353+match DriverBuilder::new()
5454+ .with_mem_limit_mb(10)
5555+ .load_car(reader)
5656+ .await
5757+{
5858+ Ok(mut mem_car) => {
5959+ while let Step::Value(chunk) = mem_car.next_chunk(256)? {
6060+ // process records
6161 }
6262 }
6363-};
6464-println!("sum of size of all records: {total_size}");
6363+ Err(LoadError::MemoryLimitReached(partial)) => {
6464+ // resume with disk storage (see DiskBuilder)
6565+ eprintln!("CAR too large for memory");
6666+ }
6767+ Err(e) => return Err(e.into()),
6868+}
6569# Ok(())
6670# }
6771```
68726969-Disk spilling suspends and returns a `Driver::Disk(paused)` instead of going
7070-ahead and eagerly using disk I/O. This means you have to write a bit more code
7171-to handle both cases, but it allows you to have finer control over resource
7272-usage. For example, you can drive a number of parallel memory CAR workers, and
7373-separately have a different number of disk workers picking up suspended disk
7474-tasks from a queue.
7575-7673Find more [examples in the repo](https://tangled.org/@microcosm.blue/repo-stream/tree/main/examples).
77747875*/
79768080-pub mod mst;
8181-mod walk;
8282-7777+pub mod block;
8378pub mod disk;
8479pub mod drive;
8580pub mod link;
8181+pub mod mst;
8282+pub mod walk;
86838784pub use disk::{DiskBuilder, DiskError, DiskStore};
8888-pub use drive::{DriveError, Driver, DriverBuilder, NeedDisk, noop};
8585+pub use block::noop;
8686+pub use drive::{DriveError, DriverBuilder, LoadError, MemCar, PartialCar};
8987pub use link::NodeThing;
9088pub use mst::Commit;
9191-pub use walk::{Output, Step};
8989+pub use walk::{Output, Step, WalkError};
92909391pub type Bytes = Vec<u8>;
9492
+101-124
src/walk.rs
···2233use crate::link::{NodeThing, ObjectLink, ThingKind};
44use crate::mst::{Depth, MstNode};
55-use crate::{Bytes, HashMap, Rkey, disk::DiskStore, drive::MaybeProcessedBlock, noop};
55+use crate::{Bytes, HashMap, Rkey, disk::DiskStore, block::MaybeProcessedBlock, noop};
66use cid::Cid;
77use std::convert::Infallible;
88···8888 }
8989 }
90909191- pub fn viz(
9292- &self,
9393- blocks: &HashMap<ObjectLink, MaybeProcessedBlock>,
9494- root_link: ObjectLink,
9595- ) -> Result<(), WalkError> {
9696- let root_block = blocks.get(&root_link).ok_or(WalkError::MissingBlock(
9797- NodeThing {
9898- link: root_link.clone(),
9999- kind: ThingKind::ChildNode,
100100- }
101101- .into(),
102102- ))?;
103103-104104- let root_node: MstNode = match root_block {
105105- MaybeProcessedBlock::Processed(_) => return Err(WalkError::BadCommitFingerprint),
106106- MaybeProcessedBlock::Raw(bytes) => serde_ipld_dagcbor::from_slice(bytes)?,
107107- };
108108-109109- let mut positions = HashMap::new();
110110- let mut w = Walker::new(root_node.clone());
111111-112112- let mut pos_idx = 0;
113113- while let Step::Value(Output { rkey, .. }) = w.step_sparse(blocks, noop)? {
114114- positions.insert(rkey, pos_idx);
115115- pos_idx += 1;
116116- }
117117-118118- Self::vnext(
119119- root_node.depth.unwrap(),
120120- vec![root_link],
121121- blocks,
122122- &positions,
123123- )?;
124124-125125- Ok(())
126126- }
127127-128128- pub fn vnext(
129129- level: u32,
130130- links: Vec<ObjectLink>,
131131- blocks: &HashMap<ObjectLink, MaybeProcessedBlock>,
132132- positions: &HashMap<Rkey, usize>,
133133- ) -> Result<Vec<usize>, WalkError> {
134134- let mut offsets = Vec::new();
135135- let mut level_keys = Vec::new();
136136- let mut child_links = Vec::new();
137137-138138- for link in links {
139139- println!(
140140- "\n{level}~{}..",
141141- link.to_bytes()
142142- .iter()
143143- .take(5)
144144- .map(|c| format!("{c:02x}"))
145145- .collect::<Vec<_>>()
146146- .join("")
147147- );
148148-149149- let Some(mpb) = blocks.get(&link) else {
150150- // TODO: drop an 'x' for missing node
151151- continue;
152152- };
153153- let node: MstNode = match mpb {
154154- MaybeProcessedBlock::Processed(_) => return Err(WalkError::BadCommitFingerprint),
155155- MaybeProcessedBlock::Raw(bytes) => serde_ipld_dagcbor::from_slice(bytes)?,
156156- };
157157-158158- let mut last_key = "".to_string();
159159- let mut last_was_record = true;
160160- for thing in node.things {
161161- let mut node_keys = Vec::new();
162162-163163- let has = blocks.contains_key(&thing.link);
164164-165165- match thing.kind {
166166- ThingKind::ChildNode => {
167167- if has {
168168- child_links.push(thing.link);
169169- last_was_record = false;
170170- }
171171- }
172172- ThingKind::Record(key) => {
173173- let us = positions[&key];
174174-175175- if !last_was_record && last_key.is_empty() {
176176- let them = positions[&last_key];
177177- for i in 0..(them - 1) {
178178- if i < (us + 1) {
179179- print!(" ");
180180- } else {
181181- print!("~~");
182182- }
183183- }
184184- println!("~");
185185- }
186186-187187- for _ in 0..us {
188188- print!(" ");
189189- }
190190- if has {
191191- print!("O");
192192- } else {
193193- print!("x");
194194- }
195195- println!(" {key}");
196196- node_keys.push(key.clone());
197197- last_key = key;
198198- last_was_record = true;
199199- }
200200- }
201201- level_keys.push(node_keys);
202202- }
203203-204204- offsets.push(1);
205205- }
206206-207207- if !child_links.is_empty() {
208208- Self::vnext(level - 1, child_links, blocks, positions)?; // TODO use offsets
209209- }
210210-211211- Ok(offsets)
212212- }
213213-21491 fn mpb_step(
21592 &mut self,
21693 thing: NodeThing,
···369246 Ok(z) => {
370247 log::info!("apparently we are too far at {z:?}");
371248 return Ok(rkey_prev); // oop real record, mutant went too far
249249+ }
250250+ }
251251+ }
252252+ }
253253+254254+ /// Skip forward to the first record at or after `target`, without emitting anything.
255255+ ///
256256+ /// Uses the tree structure to skip entire subtrees that are provably before `target`,
257257+ /// only loading child nodes on the path to `target`. O(depth × branching_factor).
258258+ ///
259259+ /// After this returns `Ok(())`, the next call to `step` will yield the first record
260260+ /// at or after `target`, or `Step::End` if no such record exists.
261261+ pub fn seek(
262262+ &mut self,
263263+ target: &str,
264264+ blocks: &HashMap<ObjectLink, MaybeProcessedBlock>,
265265+ ) -> Result<(), WalkError> {
266266+ // Classify what to do next without holding a borrow through the action
267267+ enum SeekStep {
268268+ Done,
269269+ EmptyLevel,
270270+ SkipRecord(Rkey),
271271+ SkipSubtree,
272272+ Descend,
273273+ }
274274+275275+ loop {
276276+ let next = match self.todo.last() {
277277+ None => return Ok(()),
278278+ Some(level) => {
279279+ let n = level.len();
280280+ if n == 0 {
281281+ SeekStep::EmptyLevel
282282+ } else {
283283+ match &level[n - 1].kind {
284284+ ThingKind::Record(k) if k.as_str() >= target => SeekStep::Done,
285285+ ThingKind::Record(k) => SeekStep::SkipRecord(k.clone()),
286286+ ThingKind::ChildNode => {
287287+ // The right-bounding record for this child node is at n-2.
288288+ // All keys in this subtree are < right_bound, so we can skip
289289+ // the whole subtree if right_bound <= target.
290290+ let can_skip = n >= 2
291291+ && matches!(
292292+ &level[n - 2].kind,
293293+ ThingKind::Record(k) if k.as_str() <= target
294294+ );
295295+ if can_skip {
296296+ SeekStep::SkipSubtree
297297+ } else {
298298+ SeekStep::Descend
299299+ }
300300+ }
301301+ }
302302+ }
303303+ }
304304+ }; // borrow of self.todo released here
305305+306306+ match next {
307307+ SeekStep::Done => return Ok(()),
308308+ SeekStep::EmptyLevel => {
309309+ self.todo.pop();
310310+ }
311311+ SeekStep::SkipRecord(key) => {
312312+ self.todo.last_mut().unwrap().pop();
313313+ self.prev_rkey = key;
314314+ }
315315+ SeekStep::SkipSubtree => {
316316+ self.todo.last_mut().unwrap().pop();
317317+ }
318318+ SeekStep::Descend => {
319319+ let child = self.todo.last_mut().unwrap().pop().unwrap();
320320+ // Note: self.todo borrow released before push below
321321+322322+ let Some(mpb) = blocks.get(&child.link) else {
323323+ return Err(WalkError::MissingBlock(child.into()));
324324+ };
325325+ let MaybeProcessedBlock::Raw(data) = mpb else {
326326+ return Err(WalkError::BadCommitFingerprint);
327327+ };
328328+ let node: MstNode =
329329+ serde_ipld_dagcbor::from_slice(data).map_err(WalkError::BadCommit)?;
330330+ if node.is_empty() {
331331+ return Err(WalkError::MstError(MstError::EmptyNode));
332332+ }
333333+ // Depth validation mirrors mpb_step: todo still has the (possibly empty)
334334+ // parent level, so todo.len()-1 is the parent's depth delta from root.
335335+ let current_depth = self.root_depth - (self.todo.len() - 1) as u32;
336336+ let next_depth = current_depth
337337+ .checked_sub(1)
338338+ .ok_or(MstError::DepthUnderflow)?;
339339+ if let Some(d) = node.depth
340340+ && d != next_depth
341341+ {
342342+ return Err(WalkError::MstError(MstError::WrongDepth {
343343+ depth: d,
344344+ expected: next_depth,
345345+ }));
346346+ }
347347+ self.links += node.things.len();
348348+ self.todo.push(node.things);
372349 }
373350 }
374351 }
+11-13
tests/car-slices.rs
···11extern crate repo_stream;
22-use repo_stream::{Driver, Output, Step};
22+use repo_stream::{DriverBuilder, LoadError, Output, Step};
3344const RECORD_SLICE: &'static [u8] = include_bytes!("../car-samples/slice-one.car");
55const RECORD_NODE_FIRST_KEY: &'static [u8] =
···1616 expect_rkey: Option<&str>,
1717 expect_proceeding: Option<&str>,
1818) {
1919- let (mut driver, before) = match Driver::load_car(
2020- bytes,
2121- |block| block.len().to_ne_bytes().to_vec(),
2222- 10, /* MiB */
2323- )
2424- .await
2525- .unwrap()
1919+ let mut mem_car = match DriverBuilder::new()
2020+ .with_block_processor(|block| block.len().to_ne_bytes().to_vec())
2121+ .load_car(bytes)
2222+ .await
2623 {
2727- Driver::Memory(_commit, before, mem_driver) => (mem_driver, before),
2828- Driver::Disk(_) => panic!("too big"),
2424+ Ok(mc) => mc,
2525+ Err(LoadError::MemoryLimitReached(_)) => panic!("too big"),
2626+ Err(e) => panic!("{e}"),
2927 };
30283131- assert_eq!(before.as_deref(), expect_preceeding);
2929+ assert_eq!(mem_car.prev_rkey.as_deref(), expect_preceeding);
32303331 let mut found_records = 0;
3432 let mut sum = 0;
3533 let mut found_expected_rkey = false;
3634 let mut prev_rkey = "".to_string();
37353838- while let Ok(step) = driver.next_chunk(256).await {
3939- match step {
3636+ loop {
3737+ match mem_car.next_chunk(256).unwrap() {
4038 Step::Value(records) => {
4139 for Output { rkey, cid: _, data } in records {
4240 found_records += 1;
+8-13
tests/non-huge-cars.rs
···11extern crate repo_stream;
22-use repo_stream::{Driver, Output, Step};
22+use repo_stream::{DriverBuilder, Output, Step};
3344const EMPTY_CAR: &'static [u8] = include_bytes!("../car-samples/empty.car");
55const TINY_CAR: &'static [u8] = include_bytes!("../car-samples/tiny.car");
···1212 expected_sum: usize,
1313 expect_profile: bool,
1414) {
1515- let mut driver = match Driver::load_car(
1616- bytes,
1717- |block| block.len().to_ne_bytes().to_vec(),
1818- 10, /* MiB */
1919- )
2020- .await
2121- .unwrap()
2222- {
2323- Driver::Memory(_commit, _, mem_driver) => mem_driver,
2424- Driver::Disk(_) => panic!("too big"),
2525- };
1515+ let mut mem_car = DriverBuilder::new()
1616+ .with_mem_limit_mb(10)
1717+ .with_block_processor(|block| block.len().to_ne_bytes().to_vec())
1818+ .load_car(bytes)
1919+ .await
2020+ .expect("should fit in memory");
26212722 let mut records = 0;
2823 let mut sum = 0;
2924 let mut found_bsky_profile = false;
3025 let mut prev_rkey = "".to_string();
31263232- while let Step::Value(pairs) = driver.next_chunk(256).await.unwrap() {
2727+ while let Step::Value(pairs) = mem_car.next_chunk(256).unwrap() {
3328 for Output { rkey, cid: _, data } in pairs {
3429 records += 1;
3530