Fast and robust atproto CAR file processing in rust
14
fork

Configure Feed

Select the types of activity you want to include in your feed.

bench it

actually can't really beat multihash

oh well

phil 4d8603cc d2e96dfb

+50 -13
+4
Cargo.toml
··· 48 48 # [[bench]] 49 49 # name = "leading" 50 50 # harness = false 51 + 52 + [[bench]] 53 + name = "cid-check" 54 + harness = false
+39
benches/cid-check.rs
··· 1 + use criterion::{Criterion, criterion_group, criterion_main}; 2 + use multihash_codetable::{Code, MultihashDigest}; 3 + use cid::Cid; 4 + use sha2::{Digest, Sha256}; 5 + 6 + fn multihash_verify(given: Cid, block: &[u8]) -> bool { 7 + let calculated = Cid::new_v1(0x71, Code::Sha2_256.digest(block)); 8 + calculated == given 9 + } 10 + 11 + fn effortful_verify(given: Cid, block: &[u8]) -> bool { 12 + // we know we're in atproto, so we can make a few assumptions 13 + if given.version() != cid::Version::V1 { 14 + return false; 15 + } 16 + let (codec, given_digest, _) = given.hash().into_inner(); 17 + if codec != 0x12 { 18 + return false; 19 + } 20 + given_digest[..32] == *Sha256::digest(block) 21 + } 22 + 23 + fn fastloose_verify(given: Cid, block: &[u8]) -> bool { 24 + let (_, given_digest, _) = given.hash().into_inner(); 25 + given_digest[..32] == *Sha256::digest(block) 26 + } 27 + 28 + pub fn criterion_benchmark(c: &mut Criterion) { 29 + let some_bytes: Vec<u8> = vec![0x1a, 0x00, 0xAA, 0x39, 0x8C].repeat(100); 30 + let cid = Cid::new_v1(0x71, Code::Sha2_256.digest(&some_bytes)); 31 + 32 + let mut g = c.benchmark_group("CID check"); 33 + g.bench_function("multihash", |b| b.iter(|| multihash_verify(cid, &some_bytes))); 34 + g.bench_function("effortful", |b| b.iter(|| effortful_verify(cid, &some_bytes))); 35 + g.bench_function("fastloose", |b| b.iter(|| fastloose_verify(cid, &some_bytes))); 36 + } 37 + 38 + criterion_group!(benches, criterion_benchmark); 39 + criterion_main!(benches);
+7 -13
src/drive.rs
··· 1 1 //! Consume a CAR from an AsyncRead, producing an ordered stream of records 2 2 3 + use multihash_codetable::{MultihashDigest, Code}; 3 4 use crate::{ 4 5 Bytes, HashMap, 5 6 disk::{DiskError, DiskStore}, ··· 10 11 use iroh_car::CarReader; 11 12 use std::convert::Infallible; 12 13 use tokio::{io::AsyncRead, sync::mpsc}; 13 - use sha2::{Digest, Sha256}; 14 14 15 15 use crate::mst::Commit; 16 16 use crate::walk::{WalkError, Walker}; ··· 122 122 #[inline] 123 123 pub fn noop(block: Bytes) -> Bytes { 124 124 block 125 + } 126 + 127 + // iroh-car doesn't verify CIDs!!!!!! 128 + #[inline(always)] 129 + fn verify_block(given: Cid, block: &[u8]) -> bool { 130 + Cid::new_v1(0x71, Code::Sha2_256.digest(block)) == given 125 131 } 126 132 127 133 /// Builder-style driver setup ··· 302 308 max_size: usize, 303 309 mem_blocks: HashMap<Cid, MaybeProcessedBlock>, 304 310 pub commit: Option<Commit>, 305 - } 306 - 307 - fn verify_block(given: Cid, block: &[u8]) -> bool { 308 - // we know we're in atproto, so we can make a few assumptions 309 - if given.version() != cid::Version::V1 { 310 - return false; 311 - } 312 - let (codec, given_digest, _) = given.hash().into_inner(); 313 - if codec != 0x12 { 314 - return false; 315 - } 316 - given_digest[..32] == *Sha256::digest(block) 317 311 } 318 312 319 313 impl<R: AsyncRead + Unpin> NeedDisk<R> {