lightweight com.atproto.sync.listReposByCollection
45
fork

Configure Feed

Select the types of activity you want to include in your feed.

collection sort: slash at sort time

instead of always building a new string with slash appended

phil 093b7c35 e8279bb3

+73 -21
+2 -2
hacking.md
··· 134 134 - [x] config: per-host request rate self-throttling `--crawl-qps` (name from collectiondir) 135 135 - [x] resync: estimate CAR size from `getRecord` mst height; `getRepo` if it's likely very small 136 136 - [x] admin view of backfill state etc 137 + - [x] metrics for db size 138 + - [~] make backfill go _really fast_ 137 139 - [ ] special did:web ident cache behaviour to keep reusing a stale resolution on failure 138 140 - [ ] vanity stats for optimizations, like how many in-flight repos were saved from resync due to high-water-mark firehose cursor persistence 139 141 - [ ] if the upstream is a PDS (check with describeServer?) then make only accept events for DIDs that have it as their PDS ··· 141 143 - [ ] combine the throttled http client instance, the db, and the admin info into an appstate fineeeee 142 144 - [ ] bad word filtering? (collectiondir has it) 143 145 - [ ] check response headers and adjust self-throttling rate limits per-host if present 144 - - [ ] make backfill go _really fast_ 145 146 - [ ] clean up commit validation (eg we're checking signatures twice, lenient handling is weird) 146 - - [ ] metrics for db size 147 147 148 148 going to be annoying but doable 149 149 - [ ] multi-relay subscriber
+71 -19
src/mst/slice_tricks.rs
··· 17 17 18 18 type Result<T> = std::result::Result<T, MstSliceTricksError>; 19 19 20 - /// quick hack wrapper to make collections sort the way they do in-MST 20 + /// hacky wrapper to make collections sort the way they do in-MST 21 21 /// 22 22 /// mst keys are `<collection>/<rkey>` and `/` is lex-after `.`, so 23 23 /// - `sh.tangled.issue.comment/<any rkey>` comes before ··· 25 25 /// 26 26 /// there is probably a nice way to implement PartialOrd, but... we're just 27 27 /// going to tack a `/` on the end and call it a day 28 - #[derive(Debug, PartialEq, PartialOrd, Eq, Ord)] 29 - pub struct TerminatedNsid(String); 28 + #[derive(Debug, PartialEq, Eq)] 29 + pub struct SortableCollection(String); 30 + 31 + impl Ord for SortableCollection { 32 + fn cmp(&self, other: &Self) -> Ordering { 33 + let (s, o) = (&self.0, &other.0); 34 + 35 + if s.len() < o.len() 36 + && let Some(o_suffix) = o.strip_prefix(s) 37 + { 38 + return "/".cmp(o_suffix); 39 + } 40 + 41 + if o.len() < s.len() 42 + && let Some(s_suffix) = s.strip_prefix(o) 43 + { 44 + return s_suffix.cmp("/"); 45 + } 46 + 47 + s.cmp(o) 48 + } 49 + } 30 50 31 - impl<'a> From<&Nsid<'a>> for TerminatedNsid { 32 - fn from(nsid: &Nsid<'a>) -> TerminatedNsid { 33 - let mut s = nsid.to_string(); 34 - s.push('/'); 35 - TerminatedNsid(s) 51 + impl PartialOrd for SortableCollection { 52 + fn partial_cmp(&self, other: &SortableCollection) -> Option<Ordering> { 53 + Some(self.cmp(other)) 36 54 } 37 55 } 38 56 39 - impl From<&TerminatedNsid> for Nsid<'static> { 57 + impl<'a> From<&Nsid<'a>> for SortableCollection { 58 + fn from(nsid: &Nsid<'a>) -> SortableCollection { 59 + let s = nsid.to_string(); 60 + SortableCollection(s) 61 + } 62 + } 63 + 64 + impl From<&SortableCollection> for Nsid<'static> { 40 65 /// go back to jacquard typed (unchecked) 41 66 /// 42 67 /// panics if missing the '/' suffix or if the nsid got messed up 43 - fn from(TerminatedNsid(s): &TerminatedNsid) -> Nsid<'static> { 44 - let unslashed = s 45 - .strip_suffix('/') 46 - .expect("BUG: TerminatedNsid without trailing slash"); 47 - Nsid::from(unslashed.to_string()) 68 + fn from(SortableCollection(s): &SortableCollection) -> Nsid<'static> { 69 + Nsid::from(s.clone()) 48 70 } 49 71 } 50 72 51 73 /// represent the collections across a whole, possibly sparse, repo 52 - type CollectionSpan = Span<TerminatedNsid>; 74 + type CollectionSpan = Span<SortableCollection>; 53 75 54 76 impl CollectionSpan { 55 77 /// get a list of NSIDs if the span has no gaps ··· 64 86 /// 65 87 /// each NSID from the set must either be in span, or in a gap of it 66 88 pub fn could_cover(&self, collections: &BTreeSet<Nsid<'_>>) -> bool { 67 - let mut candidates = collections.iter().map(Into::<TerminatedNsid>::into); 89 + let mut candidates = collections.iter().map(Into::<SortableCollection>::into); 68 90 let Some(mut candidate) = candidates.next() else { 69 91 return true; // empty set can always be covered, even by a zero-gap 70 92 }; ··· 217 239 gap_before, 218 240 things: things 219 241 .iter() 220 - .map(|(k, v)| (TerminatedNsid(format!("{k}/")), *v)) 242 + .map(|(k, v)| { 243 + let nsid = Nsid::from(k.to_string()); 244 + ((&nsid).into(), *v) 245 + }) 221 246 .collect(), 222 247 } 223 248 } ··· 226 251 names.iter().map(|s| Nsid::from(s.to_string())).collect() 227 252 } 228 253 254 + // --- nsid-sorting wrapper --------------------------------------------- 255 + 256 + #[test] 257 + fn compare_nsids() { 258 + for (us, them, expected) in [ 259 + ("sh.tangled.repo", "sh.tangled.repo", Ordering::Equal), 260 + ("sh.tangled.repo", "sh.tangled.repp", Ordering::Less), 261 + ("sh.tangled.repo", "sh.tangled.repoa", Ordering::Less), 262 + ("sh.tangled.repo", "sh.tangled.req.issue", Ordering::Less), 263 + ("sh.tangled.repo", "sh.tangled.rep", Ordering::Greater), 264 + ("sh.tangled.repo", "sh.tangled.repn", Ordering::Greater), 265 + ( 266 + "sh.tangled.repo", 267 + "sh.tangled.repo.issue", 268 + Ordering::Greater, 269 + ), // the surprising one: `/` always follows nsid, sorts after `.` 270 + ] { 271 + let us_nsid: SortableCollection = (&nsid(us)).into(); 272 + let them_nsid: SortableCollection = (&nsid(them)).into(); 273 + assert_eq!( 274 + us_nsid.cmp(&them_nsid), 275 + expected, 276 + "nsid {us:?} should be {expected:?} than {them:?} by repo path sort" 277 + ); 278 + } 279 + } 280 + 229 281 // --- empty query set -------------------------------------------------- 230 282 231 283 #[test] ··· 338 390 assert!(s.could_cover(&nsids(&["a.b.a", "a.b.b", "a.b.c"]))); 339 391 } 340 392 341 - // --- TerminatedNsid ordering ('.' < '/') ------------------------------ 393 + // --- SortableCollection ordering ('.' < '/') ------------------------------ 342 394 343 395 // sub-namespaces sort BEFORE their parent in MST order because 344 396 // "a.b.c.d/" < "a.b.c/" (at the branch point, '.' = 46 < '/' = 47) ··· 578 630 579 631 #[tokio::test] 580 632 async fn span_sub_namespace_sorts_before_parent_in_mst_order() { 581 - // '.' (0x2E) < '/' (0x2F), so TerminatedNsid ordering gives: 633 + // '.' (0x2E) < '/' (0x2F), so SortableCollection ordering gives: 582 634 // "sh.tangled.repo.issue/" < "sh.tangled.repo/" 583 635 let mut car = make_mem_car(&["sh.tangled.repo/self", "sh.tangled.repo.issue/abc123"]).await; 584 636 let span = span_from_slice(&mut car).unwrap();