···11[package]
22name = "repo-stream"
33-version = "0.4.0"
33+version = "0.5.0-alpha.1"
44edition = "2024"
55license = "MIT OR Apache-2.0"
66description = "Fast and robust atproto CAR file processing"
+42-33
benches/node-counts.rs
···143143 .build()
144144 .expect("Creating runtime failed");
145145146146- // let cars = [
147147- // ("empty", EMPTY_CAR),
148148- // ("tiny", TINY_CAR),
149149- // ("little", LITTLE_CAR),
150150- // ("midsize", MIDSIZE_CAR),
151151- // ];
146146+ let cars = [
147147+ ("empty", EMPTY_CAR),
148148+ ("tiny", TINY_CAR),
149149+ ("little", LITTLE_CAR),
150150+ ("midsize", MIDSIZE_CAR),
151151+ ];
152152153153- // // Sanity-check: both approaches agree on record count.
154154- // for (name, bytes) in cars {
155155- // let a = rt.block_on(count_records_filter(bytes));
156156- // let b = rt.block_on(count_records_separate(bytes));
157157- // assert_eq!(a, b, "approaches disagree on record count for {name}");
158158- // let (records, nodes) = rt.block_on(count_records_and_nodes(bytes));
159159- // println!("{name}: {records} records, {nodes} nodes");
160160- // }
153153+ // Sanity-check: both approaches agree on record count.
154154+ for (name, bytes) in cars {
155155+ let a = rt.block_on(count_records_filter(bytes));
156156+ let b = rt.block_on(count_records_separate(bytes));
157157+ assert_eq!(a, b, "approaches disagree on record count for {name}");
158158+ let (records, nodes) = rt.block_on(count_records_and_nodes(bytes));
159159+ println!("{name}: {records} records, {nodes} nodes");
160160+ }
161161162162- // let mut group = c.benchmark_group("node-counts");
162162+ let mut group = c.benchmark_group("node-counts");
163163164164- // for (name, bytes) in cars {
165165- // group.bench_with_input(
166166- // BenchmarkId::new("records-filter-approach", name),
167167- // bytes,
168168- // |b, bytes| b.to_async(&rt).iter(async || count_records_filter(bytes).await),
169169- // );
170170- // group.bench_with_input(
171171- // BenchmarkId::new("records-separate-approach", name),
172172- // bytes,
173173- // |b, bytes| b.to_async(&rt).iter(async || count_records_separate(bytes).await),
174174- // );
175175- // group.bench_with_input(
176176- // BenchmarkId::new("records-and-nodes", name),
177177- // bytes,
178178- // |b, bytes| b.to_async(&rt).iter(async || count_records_and_nodes(bytes).await),
179179- // );
180180- // }
164164+ for (name, bytes) in cars {
165165+ group.bench_with_input(
166166+ BenchmarkId::new("records-filter-approach", name),
167167+ bytes,
168168+ |b, bytes| {
169169+ b.to_async(&rt)
170170+ .iter(async || count_records_filter(bytes).await)
171171+ },
172172+ );
173173+ group.bench_with_input(
174174+ BenchmarkId::new("records-separate-approach", name),
175175+ bytes,
176176+ |b, bytes| {
177177+ b.to_async(&rt)
178178+ .iter(async || count_records_separate(bytes).await)
179179+ },
180180+ );
181181+ group.bench_with_input(
182182+ BenchmarkId::new("records-and-nodes", name),
183183+ bytes,
184184+ |b, bytes| {
185185+ b.to_async(&rt)
186186+ .iter(async || count_records_and_nodes(bytes).await)
187187+ },
188188+ );
189189+ }
181190182182- // group.finish();
191191+ group.finish();
183192184193 if let Ok(huge_car) = std::env::var("HUGE_CAR") {
185194 let path: std::path::PathBuf = huge_car.into();
+55-1
tests/car-slices.rs
···11extern crate repo_stream;
22-use repo_stream::{DriverBuilder, LoadError, Output, WalkItem};
22+use repo_stream::{DriverBuilder, LoadError, Output, SliceError, WalkItem};
3344const RECORD_SLICE: &[u8] = include_bytes!("../car-samples/slice-one.car");
55const RECORD_NODE_FIRST_KEY: &[u8] = include_bytes!("../car-samples/slice-node-first-key.car");
···120120 Some("app.bsky.feed.post/3lbn6of6qxc2a"),
121121 )
122122 .await
123123+}
124124+125125+/// Test the SliceWalker API directly: walk_slice, proof keys, and SliceError on
126126+/// missing subtrees inside the range.
127127+#[tokio::test]
128128+async fn test_walk_slice_api() {
129129+ // Known from test_record_slice_car: the slice contains exactly one record
130130+ // ("app.bsky.feed.like/3mcg72x6bi32z") bounded by two MissingRecord neighbours.
131131+ let key = "app.bsky.feed.like/3mcg72x6bi32z";
132132+ let expected_preceding = "app.bsky.feed.like/3mcfzfbpaml27";
133133+ let expected_following = "app.bsky.feed.like/3mcga2o2efq27";
134134+135135+ let mut mem_car = DriverBuilder::new()
136136+ .load_car(RECORD_SLICE)
137137+ .await
138138+ .expect("should load");
139139+140140+ let mut walker = mem_car.walk_slice(key..=key).unwrap();
141141+ let record = walker.next().unwrap().expect("should find the record");
142142+ assert_eq!(record.key, key);
143143+144144+ // Next call should return None and lock in the following key.
145145+ assert!(walker.next().unwrap().is_none());
146146+147147+ let proof = walker.finish().unwrap();
148148+ assert_eq!(proof.preceding_key.as_deref(), Some(expected_preceding));
149149+ assert_eq!(proof.following_key.as_deref(), Some(expected_following));
150150+}
151151+152152+/// A walk_slice range that is empty (both bounds exclude all records) still
153153+/// produces a valid proof via finish().
154154+#[tokio::test]
155155+async fn test_walk_slice_absent_key() {
156156+ // This key is absent from the slice (between the two MissingRecord neighbours).
157157+ // SliceWalker should prove absence by finding the bounding neighbours.
158158+ let absent = "app.bsky.feed.like/3mcg72x6bi32z-absent";
159159+160160+ let mut mem_car = DriverBuilder::new()
161161+ .load_car(RECORD_SLICE)
162162+ .await
163163+ .expect("should load");
164164+165165+ // Use get() which is the idiomatic API for single-key lookup.
166166+ let result = mem_car.get(absent);
167167+ // Should either return Ok(None) (provably absent) or Err(IncompleteRange)
168168+ // depending on whether the slice's MST nodes bound the key. Either is valid;
169169+ // we just assert it doesn't panic or return Ok(Some(_)).
170170+ match result {
171171+ Ok(None) => {} // proven absent
172172+ Err(SliceError::IncompleteRange { .. }) => {} // block missing within range
173173+ Err(SliceError::MissingNode { .. }) => {} // subtree missing, can't prove
174174+ Ok(Some(output)) => panic!("unexpected record for absent key: {}", output.key),
175175+ Err(e) => panic!("unexpected error: {e}"),
176176+ }
123177}
124178125179#[tokio::test]
+187-1
tests/non-huge-cars.rs
···11extern crate repo_stream;
22-use repo_stream::{DriverBuilder, Output};
22+use repo_stream::{DriverBuilder, Output, WalkItem};
3344const EMPTY_CAR: &'static [u8] = include_bytes!("../car-samples/empty.car");
55const TINY_CAR: &'static [u8] = include_bytes!("../car-samples/tiny.car");
···6464async fn test_midsize_car() {
6565 test_car(MIDSIZE_CAR, 11585, 3741393, true).await
6666}
6767+6868+// ---------------------------------------------------------------------------
6969+// next_chunk_keys tests
7070+// ---------------------------------------------------------------------------
7171+7272+async fn count_keys(bytes: &[u8]) -> usize {
7373+ let mut mem_car = DriverBuilder::new()
7474+ .with_mem_limit_mb(10)
7575+ .load_car(bytes)
7676+ .await
7777+ .expect("should fit in memory");
7878+7979+ let mut count = 0;
8080+ let mut prev_key = String::new();
8181+ while let Some(pairs) = mem_car.next_chunk_keys(256).unwrap() {
8282+ for (key, _cid) in pairs {
8383+ assert!(key > prev_key, "next_chunk_keys keys must be in order");
8484+ prev_key = key;
8585+ count += 1;
8686+ }
8787+ }
8888+ count
8989+}
9090+9191+#[tokio::test]
9292+async fn test_next_chunk_keys_counts() {
9393+ assert_eq!(count_keys(EMPTY_CAR).await, 0);
9494+ assert_eq!(count_keys(TINY_CAR).await, 8);
9595+ assert_eq!(count_keys(LITTLE_CAR).await, 278);
9696+ assert_eq!(count_keys(MIDSIZE_CAR).await, 11585);
9797+}
9898+9999+/// Verify that next_chunk_keys returns the same (key, cid) pairs as next_chunk_strict.
100100+#[tokio::test]
101101+async fn test_next_chunk_keys_agrees_with_strict() {
102102+ let mut mc_strict = DriverBuilder::new()
103103+ .with_mem_limit_mb(10)
104104+ .load_car(TINY_CAR)
105105+ .await
106106+ .unwrap();
107107+ let mut mc_keys = DriverBuilder::new()
108108+ .with_mem_limit_mb(10)
109109+ .load_car(TINY_CAR)
110110+ .await
111111+ .unwrap();
112112+113113+ let mut from_strict = Vec::new();
114114+ while let Some(chunk) = mc_strict.next_chunk_strict(256).unwrap() {
115115+ for output in chunk {
116116+ from_strict.push((output.key, output.cid));
117117+ }
118118+ }
119119+120120+ let mut from_keys = Vec::new();
121121+ while let Some(pairs) = mc_keys.next_chunk_keys(256).unwrap() {
122122+ from_keys.extend(pairs);
123123+ }
124124+125125+ assert_eq!(from_strict, from_keys);
126126+}
127127+128128+// ---------------------------------------------------------------------------
129129+// next_chunk_with_nodes tests
130130+// ---------------------------------------------------------------------------
131131+132132+async fn with_nodes_counts(bytes: &[u8]) -> (usize, usize) {
133133+ let mut mem_car = DriverBuilder::new()
134134+ .with_mem_limit_mb(10)
135135+ .load_car(bytes)
136136+ .await
137137+ .expect("should fit in memory");
138138+139139+ let mut records = 0;
140140+ let mut nodes = 0;
141141+ let mut first_item_is_node = None;
142142+143143+ while let Some(items) = mem_car.next_chunk_with_nodes(256).unwrap() {
144144+ for item in &items {
145145+ if first_item_is_node.is_none() {
146146+ first_item_is_node = Some(matches!(item, WalkItem::Node { .. }));
147147+ }
148148+ }
149149+ for item in items {
150150+ match item {
151151+ WalkItem::Record(_) => records += 1,
152152+ WalkItem::Node { .. } => nodes += 1,
153153+ _ => {}
154154+ }
155155+ }
156156+ }
157157+ // The root MST node must always be the first item emitted.
158158+ assert_eq!(
159159+ first_item_is_node,
160160+ Some(true),
161161+ "first item from next_chunk_with_nodes must be a Node"
162162+ );
163163+ (records, nodes)
164164+}
165165+166166+#[tokio::test]
167167+async fn test_next_chunk_with_nodes_counts() {
168168+ // Record counts must match the strict walk.
169169+ let (records, nodes) = with_nodes_counts(EMPTY_CAR).await;
170170+ assert_eq!(records, 0);
171171+ assert_eq!(nodes, 1, "empty MST still has a root node block");
172172+173173+ assert_eq!(with_nodes_counts(TINY_CAR).await.0, 8);
174174+ assert_eq!(with_nodes_counts(LITTLE_CAR).await.0, 278);
175175+ assert_eq!(with_nodes_counts(MIDSIZE_CAR).await.0, 11585);
176176+177177+ // Non-empty CARs have multiple nodes.
178178+ assert!(with_nodes_counts(TINY_CAR).await.1 > 1);
179179+ assert!(with_nodes_counts(LITTLE_CAR).await.1 > 1);
180180+ assert!(with_nodes_counts(MIDSIZE_CAR).await.1 > 1);
181181+}
182182+183183+// ---------------------------------------------------------------------------
184184+// SliceWalker tests on full CARs
185185+// ---------------------------------------------------------------------------
186186+187187+#[tokio::test]
188188+async fn test_full_walker() {
189189+ for (bytes, expected) in [(EMPTY_CAR, 0), (TINY_CAR, 8), (LITTLE_CAR, 278)] {
190190+ let mut mem_car = DriverBuilder::new()
191191+ .with_mem_limit_mb(10)
192192+ .load_car(bytes)
193193+ .await
194194+ .unwrap();
195195+196196+ let mut walker = mem_car.full().unwrap();
197197+ let mut count = 0;
198198+ let mut prev_key = String::new();
199199+ while let Some(output) = walker.next().unwrap() {
200200+ assert!(output.key > prev_key, "full() keys must be in order");
201201+ prev_key = output.key;
202202+ count += 1;
203203+ }
204204+ assert_eq!(count, expected);
205205+206206+ let proof = walker.finish().unwrap();
207207+ assert!(
208208+ proof.preceding_key.is_none(),
209209+ "full walk has no preceding key"
210210+ );
211211+ assert!(
212212+ proof.following_key.is_none(),
213213+ "full walk has no following key"
214214+ );
215215+ }
216216+}
217217+218218+#[tokio::test]
219219+async fn test_get_present_key() {
220220+ let mut mem_car = DriverBuilder::new()
221221+ .with_mem_limit_mb(10)
222222+ .load_car(TINY_CAR)
223223+ .await
224224+ .unwrap();
225225+226226+ let result = mem_car.get("app.bsky.actor.profile/self").unwrap();
227227+ assert!(result.is_some());
228228+ assert_eq!(result.unwrap().key, "app.bsky.actor.profile/self");
229229+}
230230+231231+#[tokio::test]
232232+async fn test_prefix_walker() {
233233+ let mut mem_car = DriverBuilder::new()
234234+ .with_mem_limit_mb(10)
235235+ .load_car(TINY_CAR)
236236+ .await
237237+ .unwrap();
238238+239239+ let mut walker = mem_car.prefix("app.bsky.actor.profile").unwrap();
240240+ let mut count = 0;
241241+ while let Some(output) = walker.next().unwrap() {
242242+ assert!(
243243+ output.key.starts_with("app.bsky.actor.profile/"),
244244+ "prefix walker must only yield matching keys"
245245+ );
246246+ count += 1;
247247+ }
248248+ assert_eq!(
249249+ count, 1,
250250+ "tiny.car has exactly one app.bsky.actor.profile record"
251251+ );
252252+}