Fast and robust atproto CAR file processing in rust
14
fork

Configure Feed

Select the types of activity you want to include in your feed.

note some sketchy timing data

phil 69c80a12 4cb98218

+73 -2
+4
Cargo.toml
··· 30 30 [[bench]] 31 31 name = "non-huge-cars" 32 32 harness = false 33 + 34 + [[bench]] 35 + name = "huge-car" 36 + harness = false
+50
benches/huge-car.rs
··· 1 + extern crate repo_stream; 2 + use futures::TryStreamExt; 3 + use iroh_car::CarReader; 4 + use std::convert::Infallible; 5 + use std::path::{Path, PathBuf}; 6 + 7 + use criterion::{Criterion, criterion_group, criterion_main}; 8 + 9 + pub fn criterion_benchmark(c: &mut Criterion) { 10 + let rt = tokio::runtime::Builder::new_multi_thread() 11 + .enable_all() 12 + .build() 13 + .expect("Creating runtime failed"); 14 + 15 + let filename = std::env::var("HUGE_CAR").expect("HUGE_CAR env var"); 16 + let filename: PathBuf = filename.try_into().unwrap(); 17 + 18 + c.bench_function("huge-car", |b| { 19 + b.to_async(&rt).iter(async || drive_car(&filename).await) 20 + }); 21 + } 22 + 23 + async fn drive_car(filename: impl AsRef<Path>) { 24 + let reader = tokio::fs::File::open(filename).await.unwrap(); 25 + let reader = tokio::io::BufReader::new(reader); 26 + let reader = CarReader::new(reader).await.unwrap(); 27 + 28 + let root = reader 29 + .header() 30 + .roots() 31 + .first() 32 + .ok_or("missing root") 33 + .unwrap() 34 + .clone(); 35 + 36 + let stream = std::pin::pin!(reader.stream()); 37 + 38 + let (_commit, v) = 39 + repo_stream::drive::Vehicle::init(root, stream, |block| Ok::<_, Infallible>(block.len())) 40 + .await 41 + .unwrap(); 42 + let mut record_stream = std::pin::pin!(v.stream()); 43 + 44 + while let Some(_) = record_stream.try_next().await.unwrap() { 45 + // just here for the drive 46 + } 47 + } 48 + 49 + criterion_group!(benches, criterion_benchmark); 50 + criterion_main!(benches);
+19 -2
readme.md
··· 3 3 Fast and (aspirationally) robust atproto CAR file processing in rust 4 4 5 5 6 + current car processing times (records processed into their length usize, phil's dev machine): 7 + 8 + - 128MiB CAR file: `347ms` 9 + - 5.0MiB: `6.1ms` 10 + - 279KiB: `139us` 11 + - 3.4KiB: `4.9us` 12 + 13 + 14 + running the huge-car benchmark 15 + 16 + - to avoid committing it to the repo, you have to pass it in through the env for now. 17 + 18 + ```bash 19 + HUGE_CAR=~/Downloads/did_plc_redacted.car cargo bench -- huge-car 20 + ``` 21 + 22 + 6 23 todo 7 24 8 - - [ ] car file test fixtures & validation tests 25 + - [x] car file test fixtures & validation tests 9 26 - [ ] make sure we can get the did and signature out for verification 10 27 - [ ] spec compliance todos 11 28 - [ ] assert that keys are ordered and fail if not ··· 13 30 - [ ] performance todos 14 31 - [ ] consume the serialized nodes into a mutable efficient format 15 32 - [ ] maybe customize the deserialize impl to do that directly? 16 - - [ ] benchmark and profile 33 + - [x] benchmark and profile 17 34 - [ ] robustness todos 18 35 - [ ] swap the blocks hashmap for a BlockStore trait that can be dumped to redb 19 36 - [ ] maybe keep the redb function behind a feature flag?