lightweight com.atproto.sync.listReposByCollection
45
fork

Configure Feed

Select the types of activity you want to include in your feed.

clean up dead code

phil 58cad518 40379369

+189 -154
+2 -26
src/mst/mod.rs
··· 14 14 15 15 enum SpanLen { 16 16 /// all collections are known 17 - #[allow(dead_code)] 17 + #[allow(dead_code)] // used in tests 18 18 Exactly(usize), 19 19 /// at least one gap exists 20 - #[allow(dead_code)] 20 + #[allow(dead_code)] // used in tests 21 21 AtLeast(usize), 22 22 } 23 23 ··· 50 50 /// the span has *no* gaps 51 51 fn is_complete(&self) -> bool { 52 52 matches!(self.len(), SpanLen::Exactly(_)) 53 - } 54 - fn contains(&self, k: &T) -> Option<bool> { 55 - if self.things.contains_key(k) { 56 - return Some(true); 57 - } 58 - // try to gap_after from the key before this one, if present 59 - // (when btree_cursors land we can do nicer with that) 60 - let falls_in_gap = self 61 - .things 62 - .range(..k) // exclusive range: all keys lex-before us 63 - .next_back() // take the closest previous one 64 - .map(|(_, gap_after)| gap_after) // if it existed, find out if it had a gap after 65 - .unwrap_or(&self.gap_before); // no before-key: span starts with gap? 66 - 67 - if *falls_in_gap { None } else { Some(false) } 68 - } 69 - /// definitive answer about whether it's *possible* for `k` to be in span 70 - /// 71 - /// key exist -> true 72 - /// key falls in a gap -> true (it's possible!) 73 - /// key falls after a key without a gap after -> false (not possible!) 74 - #[allow(dead_code)] 75 - fn may_contain(&self, k: &T) -> bool { 76 - self.contains(k).unwrap_or(true) 77 53 } 78 54 }
+127 -90
src/mst/mortality.rs
··· 17 17 //! Multi-op commits are handled because all ops are considered together: a key 18 18 //! is a "survivor" only if it is visible AND not in the deleted set. 19 19 20 + use std::collections::{HashMap, HashSet}; 20 21 use std::ops::Bound; 21 - use std::collections::HashSet; 22 22 23 23 use jacquard_api::com_atproto::sync::subscribe_repos::{RepoOp, RepoOpAction}; 24 24 use jacquard_common::types::string::Nsid; 25 - use repo_stream::{MemCar, WalkItem}; 26 25 use repo_stream::{DriverBuilder, JacquardLoadError, WalkError}; 26 + use repo_stream::{MemCar, WalkItem}; 27 27 use tracing::error; 28 28 29 29 use super::Span; ··· 51 51 52 52 impl KeySpan { 53 53 pub fn left_of(&self, key: &str) -> Existence { 54 - self 55 - .things 54 + self.things 56 55 .range(..key.to_string()) 57 56 .next_back() 58 - .map(|(k, gap)| if *gap { Existence::Uncertain } else { Existence::Yes(k.clone()) }) 59 - .unwrap_or(if self.gap_before { Existence::Uncertain } else { Existence::No }) 57 + .map(|(k, gap)| { 58 + if *gap { 59 + Existence::Uncertain 60 + } else { 61 + Existence::Yes(k.clone()) 62 + } 63 + }) 64 + .unwrap_or(if self.gap_before { 65 + Existence::Uncertain 66 + } else { 67 + Existence::No 68 + }) 60 69 } 61 70 pub fn right_of(&self, key: &str, left: &Existence) -> Existence { 62 71 if let Some(gap_after) = self.things.get(key) { ··· 70 79 .range((Bound::Excluded(key.to_string()), Bound::Unbounded)) 71 80 .next() 72 81 .map(|(k, _)| Existence::Yes(k.to_string())) 73 - .unwrap_or(Existence::No) 82 + .unwrap_or(Existence::No); 74 83 } 75 84 // the key does not exist 76 85 if *left == Existence::Uncertain { ··· 79 88 return Existence::Uncertain; 80 89 } 81 90 // we're not in a gap, so we have certainty about what's next 82 - self 83 - .things 91 + self.things 84 92 .range((Bound::Excluded(key.to_string()), Bound::Unbounded)) 85 93 .next() 86 94 .map(|(k, _)| Existence::Yes(k.clone())) ··· 170 178 true 171 179 } 172 180 173 - /// Collect every MST leaf path visible in a (possibly partial) CAR. 174 - /// 175 - /// Uses `next_keys()` which silently skips subtrees whose MST node blocks are 176 - /// absent, so this works on both full and proof-only CARs. 177 - #[cfg(test)] 178 - fn collect_visible_paths(parsed: jacquard_repo::car::reader::ParsedCar) -> Result<Vec<String>> { 179 - let mut car = DriverBuilder::new().load_jacquard_parsed_car(parsed)?; 180 - let mut visible = Vec::new(); 181 - while let Some((path, _)) = car.next_keys()? { 182 - visible.push(path.to_string()); 183 - } 184 - Ok(visible) 185 - } 186 - 187 181 /// Result of [`extract`]. 182 + #[derive(Debug, Default)] 188 183 pub struct ExtractResult { 189 184 /// Collections newly created in this commit (all pre-existing neighbours 190 185 /// were absent from the proof, or the proof covers the full range). 191 - pub born: Vec<Nsid<'static>>, 186 + pub born: HashMap<Nsid<'static>, bool>, 192 187 /// Collections fully deleted in this commit (proof covers the full range). 193 - pub died: Vec<Nsid<'static>>, 194 - /// True when a possible collection death could not be confirmed because the 195 - /// CAR proof has gaps that could hide surviving keys. The caller should 196 - /// queue a full-repo resync to verify the current collection state. 197 - pub needs_resync: bool, 188 + pub died: HashMap<Nsid<'static>, bool>, 198 189 } 199 190 200 191 /// Walk the partial CAR's MST to detect which collections are newly added ··· 236 227 } 237 228 238 229 if creates.is_empty() && deletes.is_empty() { 239 - return Ok(ExtractResult { born: vec![], died: vec![], needs_resync: false }); 230 + return Ok(ExtractResult::default()); 240 231 } 241 232 242 233 // ── Walk the partial CAR's MST to build a gap-aware key span ───────────── 243 - let mut mem_car = DriverBuilder::new().load_jacquard_parsed_car(parsed)?; 234 + let mut mem_car = DriverBuilder::new() 235 + .with_block_processor(|_| vec![]) // drop record blocks as much as possible 236 + .load_jacquard_parsed_car(parsed)?; 244 237 let span = span_from_slice(&mut mem_car)?; 245 238 246 239 // ── Check collection death ──────────────────────────────────────────────── 247 240 // A collection died iff all visible keys in it are being deleted AND 248 241 // the span proves there are no hidden surviving keys. 249 - let mut died: Vec<Nsid<'static>> = Vec::new(); 250 - let mut needs_resync = false; 251 - for coll in deletes 252 - .iter() 253 - .filter_map(|p| p.split_once('/').map(|(c, _)| c)) 254 - .collect::<HashSet<_>>() 255 - { 256 - let prefix = format!("{coll}/"); 242 + let mut died = HashMap::new(); 243 + let mut delete_collections_seen = HashSet::new(); 244 + for mst_key in &deletes { 245 + let (collection, _) = mst_key 246 + .split_once('/') 247 + .ok_or(MstMortalityError::InvalidData(format!( 248 + "mst key missing '/': {mst_key}" 249 + )))?; 250 + if !delete_collections_seen.insert(collection.to_string()) { 251 + continue; 252 + } 253 + let nsid = collection 254 + .parse() 255 + .map_err(|e| MstMortalityError::InvalidData(format!("bad nsid: {e}")))?; 256 + 257 + let collection_terminated = format!("{collection}/"); 257 258 let has_survivor = span 258 259 .things 259 - .range(prefix.clone()..) 260 - .take_while(|(k, _)| k.starts_with(&prefix)) 260 + .range(collection_terminated.clone()..) 261 + .take_while(|(k, _)| k.starts_with(&collection_terminated)) 261 262 .any(|(k, _)| !deletes.contains(k.as_str())); 262 263 if has_survivor { 263 264 continue; 264 265 } 265 - if !can_prove_complete_coverage(&span, &prefix) { 266 - // Possible death but the CAR has gaps — we can't confirm. Request 267 - // a full-repo resync so the caller can reconcile the index later. 268 - metrics::counter!("lightrail_mortality_unproven_total", "kind" => "death") 269 - .increment(1); 270 - error!(collection = coll, "possible collection death unproven due to MST gaps"); 271 - needs_resync = true; 272 - continue; 273 - } 274 - if let Ok(nsid) = Nsid::new_owned(coll) { 275 - died.push(nsid); 266 + if can_prove_complete_coverage(&span, &collection_terminated) { 267 + died.insert(nsid, true); 268 + } else { 269 + died.insert(nsid, false); 270 + metrics::counter!("lightrail_mortality_unproven_total", "kind" => "death").increment(1); 271 + error!( 272 + collection = collection, 273 + "possible collection death unproven due to MST gaps" 274 + ); 276 275 } 277 276 } 278 277 ··· 281 280 // the span proves there are no hidden pre-existing keys. 282 281 // If the proof has gaps, we still record the birth — false positives here 283 282 // are harmless (the index entry already exists or the resync will fix it). 284 - let mut born: Vec<Nsid<'static>> = Vec::new(); 285 - for coll in creates 286 - .iter() 287 - .filter_map(|p| p.split_once('/').map(|(c, _)| c)) 288 - .collect::<HashSet<_>>() 289 - { 290 - let prefix = format!("{coll}/"); 283 + let mut born = HashMap::new(); 284 + let mut created_collections_seen = HashSet::new(); 285 + for mst_key in &creates { 286 + let (collection, _) = mst_key 287 + .split_once('/') 288 + .ok_or(MstMortalityError::InvalidData(format!( 289 + "mst key missing '/': {mst_key}" 290 + )))?; 291 + if !created_collections_seen.insert(collection.to_string()) { 292 + continue; 293 + } 294 + let nsid = collection 295 + .parse() 296 + .map_err(|e| MstMortalityError::InvalidData(format!("bad nsid: {e}")))?; 297 + 298 + let collection_terminated = format!("{collection}/"); 291 299 let has_preexisting = span 292 300 .things 293 - .range(prefix.clone()..) 294 - .take_while(|(k, _)| k.starts_with(&prefix)) 301 + .range(collection_terminated.clone()..) 302 + .take_while(|(k, _)| k.starts_with(&collection_terminated)) 295 303 .any(|(k, _)| !creates.contains(k.as_str())); 296 304 if has_preexisting { 297 305 continue; 298 306 } 299 - if !can_prove_complete_coverage(&span, &prefix) { 300 - metrics::counter!("lightrail_mortality_unproven_total", "kind" => "birth") 301 - .increment(1); 307 + if can_prove_complete_coverage(&span, &collection_terminated) { 308 + born.insert(nsid, true); 309 + } else { 310 + born.insert(nsid, false); 311 + metrics::counter!("lightrail_mortality_unproven_total", "kind" => "birth").increment(1); 302 312 error!( 303 - collection = coll, 313 + collection, 304 314 "possible collection birth unproven due to MST gaps" 305 315 ); 306 - // Fall through: include the birth anyway. Spurious births are 307 - // idempotent (index insert is a blind overwrite); false negatives 308 - // would silently drop new collections from the index. 309 - } 310 - if let Ok(nsid) = Nsid::new_owned(coll) { 311 - born.push(nsid); 312 316 } 313 317 } 314 318 315 - Ok(ExtractResult { born, died, needs_resync }) 319 + Ok(ExtractResult { born, died }) 320 + } 321 + 322 + /// Collect every MST leaf path visible in a (possibly partial) CAR. 323 + /// 324 + /// Uses `next_keys()` which silently skips subtrees whose MST node blocks are 325 + /// absent, so this works on both full and proof-only CARs. 326 + #[cfg(test)] 327 + fn collect_visible_paths(parsed: jacquard_repo::car::reader::ParsedCar) -> Result<Vec<String>> { 328 + let mut car = DriverBuilder::new().load_jacquard_parsed_car(parsed)?; 329 + let mut visible = Vec::new(); 330 + while let Some((path, _)) = car.next_keys()? { 331 + visible.push(path.to_string()); 332 + } 333 + Ok(visible) 316 334 } 317 335 318 336 #[cfg(test)] ··· 417 435 // CAR contains only the created key → no preexisting neighbours. 418 436 let parsed = make_parsed_car(&["app.bsky.feed.post/abc123"]).await; 419 437 let ops = [op_create("app.bsky.feed.post/abc123")]; 420 - let ExtractResult { born, died, .. } = extract(&ops, parsed).unwrap(); 421 - assert_eq!(born, vec![nsid("app.bsky.feed.post")]); 438 + let ExtractResult { born, died } = extract(&ops, parsed).unwrap(); 439 + assert_eq!( 440 + born, 441 + HashMap::from_iter([(nsid("app.bsky.feed.post"), true)]) 442 + ); 422 443 assert!(died.is_empty()); 423 444 } 424 445 ··· 441 462 async fn collection_dies_when_last_key_deleted() { 442 463 let parsed = make_parsed_car(&["app.bsky.feed.post/abc123"]).await; 443 464 let ops = [op_delete("app.bsky.feed.post/abc123")]; 444 - let ExtractResult { born, died, .. } = extract(&ops, parsed).unwrap(); 465 + let ExtractResult { born, died } = extract(&ops, parsed).unwrap(); 445 466 assert!(born.is_empty()); 446 - assert_eq!(died, vec![nsid("app.bsky.feed.post")]); 467 + assert_eq!( 468 + died, 469 + HashMap::from_iter([(nsid("app.bsky.feed.post"), true)]) 470 + ); 447 471 } 448 472 449 473 #[tokio::test] ··· 472 496 op_create("app.bsky.feed.post/abc123"), 473 497 op_delete("app.bsky.graph.follow/old"), 474 498 ]; 475 - let ExtractResult { mut born, mut died, .. } = extract(&ops, parsed).unwrap(); 476 - born.sort_unstable(); 477 - died.sort_unstable(); 478 - assert_eq!(born, vec![nsid("app.bsky.feed.post")]); 479 - assert_eq!(died, vec![nsid("app.bsky.graph.follow")]); 499 + let ExtractResult { born, died } = extract(&ops, parsed).unwrap(); 500 + assert_eq!( 501 + born, 502 + HashMap::from_iter([(nsid("app.bsky.feed.post"), true)]) 503 + ); 504 + assert_eq!( 505 + died, 506 + HashMap::from_iter([(nsid("app.bsky.graph.follow"), true)]) 507 + ); 480 508 } 481 509 482 510 // --------------------------------------------------------------------------- ··· 544 572 // coverage of the collection. 545 573 546 574 #[tokio::test] 547 - async fn gap_suppresses_death_and_sets_needs_resync() { 575 + async fn gap_suppresses_death_and_sets_unprovable_deaths() { 548 576 let parsed = make_root_only_parsed_car(&[ 549 577 "app.bsky.feed.post/454397e440ec", // layer 4 — visible in root 550 578 "app.bsky.feed.post/aaa", // layer 0 — hidden behind right child ··· 552 580 .await; 553 581 // Delete only the visible key; the hidden sibling might be a survivor. 554 582 let ops = [op_delete("app.bsky.feed.post/454397e440ec")]; 555 - let ExtractResult { born, died, needs_resync } = extract(&ops, parsed).unwrap(); 583 + let ExtractResult { born, died } = extract(&ops, parsed).unwrap(); 556 584 assert!(born.is_empty(), "unexpected birth: {born:?}"); 557 - assert!(died.is_empty(), "death declared despite gap: {died:?}"); 558 - assert!(needs_resync, "needs_resync should be set when death is unproven"); 585 + assert_eq!( 586 + died, 587 + HashMap::from_iter([(nsid("app.bsky.feed.post"), false),]), 588 + "death declared despite (unprovable): {died:?}" 589 + ); 559 590 } 560 591 561 592 #[tokio::test] ··· 568 599 // Create only the visible key; gap can't prove no pre-existing keys exist, 569 600 // but we include the birth anyway (false positives are idempotent). 570 601 let ops = [op_create("app.bsky.feed.post/454397e440ec")]; 571 - let ExtractResult { born, died, needs_resync } = extract(&ops, parsed).unwrap(); 572 - assert_eq!(born, vec![nsid("app.bsky.feed.post")], "birth should be included despite gap"); 602 + let ExtractResult { born, died } = extract(&ops, parsed).unwrap(); 603 + assert_eq!( 604 + born, 605 + HashMap::from_iter([(nsid("app.bsky.feed.post"), false)]), 606 + "birth should be included despite gap" 607 + ); 573 608 assert!(died.is_empty(), "unexpected death: {died:?}"); 574 - assert!(!needs_resync, "needs_resync should not be set for unproven birth"); 575 609 } 576 610 } 577 611 ··· 693 727 #[test] 694 728 fn right_of_key_not_in_span_left_uncertain() { 695 729 let s = span(false, &[("a/r1", true), ("c/r1", false)]); 696 - assert_eq!(s.right_of("b/r1", &Existence::Uncertain), Existence::Uncertain); 730 + assert_eq!( 731 + s.right_of("b/r1", &Existence::Uncertain), 732 + Existence::Uncertain 733 + ); 697 734 } 698 735 699 736 // Key is NOT in the span; left is definite → look for the next key to the right. ··· 787 824 788 825 #[cfg(test)] 789 826 mod fixture_tests { 790 - use super::{collect_visible_paths, extract, ExtractResult}; 827 + use super::{ExtractResult, collect_visible_paths, extract}; 791 828 use std::collections::{BTreeMap, HashSet}; 792 829 use std::sync::Arc; 793 830
+60 -38
src/sync/firehose/commit_event.rs
··· 17 17 //! resync if the bytes don't match `prev.prev_data`. 18 18 19 19 use jacquard_api::com_atproto::sync::subscribe_repos::Commit; 20 - use jacquard_common::types::{string::Did, string::Nsid, tid::Tid}; 20 + use jacquard_common::types::{string::Did, tid::Tid}; 21 21 use jacquard_repo::commit::firehose::validate_v1_1; 22 22 use tracing::{debug, error, info, warn}; 23 23 24 24 use super::validate::{self, CarDrop}; 25 25 use crate::identity::Resolver; 26 + use crate::mst::{self, mortality::ExtractResult}; 26 27 use crate::storage::{ 27 28 self, DbRef, 28 29 pds_host::{self, Sync11Mode}, ··· 160 161 metrics::histogram!("lightrail_commit_ops").record(commit.ops.len() as f64); 161 162 162 163 // ── Collection birth/death detection ───────────────────────────────────── 163 - let crate::mst::mortality::ExtractResult { born, died, needs_resync } = 164 - crate::mst::mortality::extract(&commit.ops, parsed_clone)?; 164 + let mortality = mst::mortality::extract(&commit.ops, parsed_clone)?; 165 165 166 166 // ── Steps 6–9: Blocking storage checks + repo_prev update ─────────────── 167 167 let db = db.clone(); ··· 185 185 since, 186 186 incoming_prev_data, 187 187 new_mst_root_bytes, 188 - born, 189 - died, 190 - needs_resync, 188 + mortality, 191 189 pds_host, 192 190 current_mode: pds_mode, 193 191 }, ··· 362 360 since: Option<Tid>, 363 361 incoming_prev_data: Option<Vec<u8>>, 364 362 new_mst_root_bytes: Vec<u8>, 365 - born: Vec<Nsid<'static>>, 366 - died: Vec<Nsid<'static>>, 367 - needs_resync: bool, 363 + mortality: ExtractResult, 368 364 pds_host: Option<Host>, 369 365 current_mode: Sync11Mode, 370 366 } ··· 383 379 since, 384 380 incoming_prev_data, 385 381 new_mst_root_bytes, 386 - born, 387 - died, 388 - needs_resync, 382 + mortality, 389 383 pds_host, 390 384 current_mode, 391 385 }: ValidationState, ··· 417 411 } 418 412 } 419 413 420 - if !born.is_empty() { 421 - let names = born 422 - .iter() 423 - .map(|n| n.as_str()) 424 - .collect::<Vec<_>>() 425 - .join(", "); 426 - info!(did = %did, collections = names, "collection birth"); 414 + if !mortality.born.is_empty() { 415 + let names = mortality.born.keys().collect::<Vec<_>>(); 416 + info!(did = %did, collections = ?names, "collection birth"); 427 417 } 428 - if !died.is_empty() { 429 - let names = died 430 - .iter() 431 - .map(|n| n.as_str()) 432 - .collect::<Vec<_>>() 433 - .join(", "); 434 - info!(did = %did, collections = names, "collection death"); 418 + if !mortality.died.is_empty() { 419 + let names = mortality.died.keys().collect::<Vec<_>>(); 420 + info!(did = %did, collections = ?names, "collection death"); 435 421 } 436 422 437 423 // All checks passed — atomically update the prev_data and the collection 438 424 // index (born → insert, died → remove). Also record each born collection 439 425 // in the global collection list (blind overwrite, never deleted). 440 - let n_born = born.len() as u64; 441 - let n_died = died.len() as u64; 426 + let n_born = mortality.born.len() as u64; 427 + let n_died = mortality.died.len() as u64; 442 428 let mut batch = db.database.batch(); 443 429 storage::repo::put_prev_into( 444 430 &mut batch, ··· 449 435 prev_data: new_mst_root_bytes, 450 436 }, 451 437 ); 452 - for coll in born { 438 + for coll in mortality.born.keys() { 453 439 // TODO(temporary): detect spurious births to confirm pre-sync1.1 hypothesis 454 440 if storage::collection_index::has_collection(db, &did, coll.clone())? { 455 441 if incoming_prev_data.is_some() { 456 442 error!( 457 443 did = %did, 458 - collection = coll.as_str(), 459 - "spurious birth on sync1.1 commit — collection already indexed (unexpected)" 444 + collection = %coll, 445 + sync11 = true, 446 + "spurious birth, already indexed" 460 447 ); 461 448 } else { 462 - warn!( 449 + error!( 463 450 did = %did, 464 - collection = coll.as_str(), 465 - "spurious birth on pre-sync1.1 commit (no prevData) — expected" 451 + collection = %coll, 452 + sync11 = false, 453 + "spurious birth, already indexed" 466 454 ); 467 455 } 468 456 } 469 457 storage::collection_index::insert_into(&mut batch, db, &did, coll.clone()); 470 458 } 471 - for coll in died { 472 - storage::collection_index::remove_into(&mut batch, db, &did, coll); 459 + let mut all_deaths_proven = true; 460 + for (coll, proven) in &mortality.died { 461 + // TODO(remove): detect phandom deaths??? 462 + if !storage::collection_index::has_collection(db, &did, coll.clone())? { 463 + if incoming_prev_data.is_some() { 464 + error!( 465 + did = %did, 466 + collection = %coll, 467 + proven, 468 + sync11 = true, 469 + "phantom death, collection not indexed" 470 + ); 471 + } else { 472 + error!( 473 + did = %did, 474 + collection = %coll, 475 + proven, 476 + sync11 = false, 477 + "phantom death, collection not indexed" 478 + ); 479 + } 480 + } 481 + 482 + if !proven { 483 + all_deaths_proven = false; 484 + continue; 485 + } 486 + 487 + storage::collection_index::remove_into(&mut batch, db, &did, coll.clone()); 473 488 } 474 489 475 490 // If mortality detection found a possible-but-unproven collection death, 476 491 // queue a full-repo resync so the index can be reconciled once we have 477 492 // a complete view of the repo's current MST. 478 - if needs_resync { 493 + if !all_deaths_proven { 494 + let maybes: Vec<_> = mortality 495 + .died 496 + .iter() 497 + .filter(|(_, p)| !*p) 498 + .map(|(k, _)| k) 499 + .collect(); 500 + error!(did = %did, maybe_deaths = ?maybes, "queuing resync due to unprovable death"); 479 501 storage::resync_queue::enqueue_into( 480 502 &mut batch, 481 503 db, ··· 483 505 &crate::storage::resync_queue::ResyncItem { 484 506 did: did.clone(), 485 507 retry_count: 0, 486 - retry_reason: "possible collection death unproven due to MST gaps".to_string(), 508 + retry_reason: "unprovable_death".to_string(), 487 509 commit_cbor: vec![], 488 510 }, 489 511 );