Fast and robust atproto CAR file processing in rust
14
fork

Configure Feed

Select the types of activity you want to include in your feed.

more cleanup and doc tweaks

phil 04f10b18 1cd633b5

+18 -202
+12 -2
readme.md
··· 1 1 # repo-stream 2 2 3 - Fast and (aspirationally) robust atproto CAR file processing in rust 3 + Efficient and robust atproto CAR file processing in rust 4 + 5 + todo 6 + 7 + - [ ] get an *emtpy* car for the test suite 8 + - [ ] implement a max size on disk limit 9 + 10 + 11 + ----- 12 + 13 + older stuff (to clean up): 4 14 5 15 6 16 current car processing times (records processed into their length usize, phil's dev machine): ··· 27 37 -> yeah the commit is returned from init 28 38 - [ ] spec compliance todos 29 39 - [x] assert that keys are ordered and fail if not 30 - - [ ] verify node mst depth from key (possibly pending [interop test fixes](https://github.com/bluesky-social/atproto-interop-tests/issues/5)) 40 + - [x] verify node mst depth from key (possibly pending [interop test fixes](https://github.com/bluesky-social/atproto-interop-tests/issues/5)) 31 41 - [ ] performance todos 32 42 - [x] consume the serialized nodes into a mutable efficient format 33 43 - [ ] maybe customize the deserialize impl to do that directly?
+6
src/process.rs
··· 11 11 approximate total off-stack size of the type. (the on-stack size will be added 12 12 automatically via `std::mem::get_size`). 13 13 14 + Note that it is **not guaranteed** that the `process` function will run on a 15 + block before storing it in memory or on disk: it's not possible to know if a 16 + block is a record without actually walking the MST, so the best we can do is 17 + apply `process` to any block that we know *cannot* be an MST node, and otherwise 18 + store the raw block bytes. 19 + 14 20 Here's a silly processing function that just collects 'eyy's found in the raw 15 21 record bytes 16 22
-200
src/walk.rs
··· 304 304 #[cfg(test)] 305 305 mod test { 306 306 use super::*; 307 - // use crate::mst::Entry; 308 307 309 308 fn cid1() -> Cid { 310 309 "bafyreihixenvk3ahqbytas4hk4a26w43bh6eo3w6usjqtxkpzsvi655a3m" 311 310 .parse() 312 311 .unwrap() 313 312 } 314 - // fn cid2() -> Cid { 315 - // "QmY7Yh4UquoXHLPFo2XbhXkhBvFoPwmQUSa92pxnxjQuPU" 316 - // .parse() 317 - // .unwrap() 318 - // } 319 - // fn cid3() -> Cid { 320 - // "bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi" 321 - // .parse() 322 - // .unwrap() 323 - // } 324 - // fn cid4() -> Cid { 325 - // "QmbWqxBEKC3P8tqsKc98xmWNzrzDtRLMiMPL8wBuTGsMnR" 326 - // .parse() 327 - // .unwrap() 328 - // } 329 - // fn cid5() -> Cid { 330 - // "QmSnuWmxptJZdLJpKRarxBMS2Ju2oANVrgbr2xWbie9b2D" 331 - // .parse() 332 - // .unwrap() 333 - // } 334 - // fn cid6() -> Cid { 335 - // "QmdmQXB2mzChmMeKY47C43LxUdg1NDJ5MWcKMKxDu7RgQm" 336 - // .parse() 337 - // .unwrap() 338 - // } 339 - // fn cid7() -> Cid { 340 - // "bafybeiaysi4s6lnjev27ln5icwm6tueaw2vdykrtjkwiphwekaywqhcjze" 341 - // .parse() 342 - // .unwrap() 343 - // } 344 - // fn cid8() -> Cid { 345 - // "bafyreif3tfdpr5n4jdrbielmcapwvbpcthepfkwq2vwonmlhirbjmotedi" 346 - // .parse() 347 - // .unwrap() 348 - // } 349 - // fn cid9() -> Cid { 350 - // "bafyreicnokmhmrnlp2wjhyk2haep4tqxiptwfrp2rrs7rzq7uk766chqvq" 351 - // .parse() 352 - // .unwrap() 353 - // } 354 313 355 314 #[test] 356 315 fn test_depth_spec_0() { ··· 441 400 .as_ref() 442 401 ); 443 402 } 444 - 445 - // #[test] 446 - // fn test_needs_from_node_just_one_record() { 447 - // let node = Node { 448 - // left: None, 449 - // entries: vec![Entry { 450 - // keysuffix: "asdf".into(), 451 - // prefix_len: 0, 452 - // value: cid1(), 453 - // tree: None, 454 - // }], 455 - // }; 456 - // assert_eq!( 457 - // needs_from_node(node).unwrap(), 458 - // vec![Need::Record { 459 - // rkey: "asdf".into(), 460 - // cid: cid1(), 461 - // },] 462 - // ); 463 - // } 464 - 465 - // #[test] 466 - // fn test_needs_from_node_two_records() { 467 - // let node = Node { 468 - // left: None, 469 - // entries: vec![ 470 - // Entry { 471 - // keysuffix: "asdf".into(), 472 - // prefix_len: 0, 473 - // value: cid1(), 474 - // tree: None, 475 - // }, 476 - // Entry { 477 - // keysuffix: "gh".into(), 478 - // prefix_len: 2, 479 - // value: cid2(), 480 - // tree: None, 481 - // }, 482 - // ], 483 - // }; 484 - // assert_eq!( 485 - // needs_from_node(node).unwrap(), 486 - // vec![ 487 - // Need::Record { 488 - // rkey: "asdf".into(), 489 - // cid: cid1(), 490 - // }, 491 - // Need::Record { 492 - // rkey: "asgh".into(), 493 - // cid: cid2(), 494 - // }, 495 - // ] 496 - // ); 497 - // } 498 - 499 - // #[test] 500 - // fn test_needs_from_node_with_both() { 501 - // let node = Node { 502 - // left: None, 503 - // entries: vec![Entry { 504 - // keysuffix: "asdf".into(), 505 - // prefix_len: 0, 506 - // value: cid1(), 507 - // tree: Some(cid2()), 508 - // }], 509 - // }; 510 - // assert_eq!( 511 - // needs_from_node(node).unwrap(), 512 - // vec![ 513 - // Need::Record { 514 - // rkey: "asdf".into(), 515 - // cid: cid1(), 516 - // }, 517 - // Need::Node(cid2()), 518 - // ] 519 - // ); 520 - // } 521 - 522 - // #[test] 523 - // fn test_needs_from_node_left_and_record() { 524 - // let node = Node { 525 - // left: Some(cid1()), 526 - // entries: vec![Entry { 527 - // keysuffix: "asdf".into(), 528 - // prefix_len: 0, 529 - // value: cid2(), 530 - // tree: None, 531 - // }], 532 - // }; 533 - // assert_eq!( 534 - // needs_from_node(node).unwrap(), 535 - // vec![ 536 - // Need::Node(cid1()), 537 - // Need::Record { 538 - // rkey: "asdf".into(), 539 - // cid: cid2(), 540 - // }, 541 - // ] 542 - // ); 543 - // } 544 - 545 - // #[test] 546 - // fn test_needs_from_full_node() { 547 - // let node = Node { 548 - // left: Some(cid1()), 549 - // entries: vec![ 550 - // Entry { 551 - // keysuffix: "asdf".into(), 552 - // prefix_len: 0, 553 - // value: cid2(), 554 - // tree: Some(cid3()), 555 - // }, 556 - // Entry { 557 - // keysuffix: "ghi".into(), 558 - // prefix_len: 1, 559 - // value: cid4(), 560 - // tree: Some(cid5()), 561 - // }, 562 - // Entry { 563 - // keysuffix: "jkl".into(), 564 - // prefix_len: 2, 565 - // value: cid6(), 566 - // tree: Some(cid7()), 567 - // }, 568 - // Entry { 569 - // keysuffix: "mno".into(), 570 - // prefix_len: 4, 571 - // value: cid8(), 572 - // tree: Some(cid9()), 573 - // }, 574 - // ], 575 - // }; 576 - // assert_eq!( 577 - // needs_from_node(node).unwrap(), 578 - // vec![ 579 - // Need::Node(cid1()), 580 - // Need::Record { 581 - // rkey: "asdf".into(), 582 - // cid: cid2(), 583 - // }, 584 - // Need::Node(cid3()), 585 - // Need::Record { 586 - // rkey: "aghi".into(), 587 - // cid: cid4(), 588 - // }, 589 - // Need::Node(cid5()), 590 - // Need::Record { 591 - // rkey: "agjkl".into(), 592 - // cid: cid6(), 593 - // }, 594 - // Need::Node(cid7()), 595 - // Need::Record { 596 - // rkey: "agjkmno".into(), 597 - // cid: cid8(), 598 - // }, 599 - // Need::Node(cid9()), 600 - // ] 601 - // ); 602 - // } 603 403 }