···134134- [x] config: per-host request rate self-throttling `--crawl-qps` (name from collectiondir)
135135- [x] resync: estimate CAR size from `getRecord` mst height; `getRepo` if it's likely very small
136136- [x] admin view of backfill state etc
137137+- [x] metrics for db size
138138+- [~] make backfill go _really fast_
137139- [ ] special did:web ident cache behaviour to keep reusing a stale resolution on failure
138140- [ ] vanity stats for optimizations, like how many in-flight repos were saved from resync due to high-water-mark firehose cursor persistence
139141- [ ] if the upstream is a PDS (check with describeServer?) then make only accept events for DIDs that have it as their PDS
···141143- [ ] combine the throttled http client instance, the db, and the admin info into an appstate fineeeee
142144- [ ] bad word filtering? (collectiondir has it)
143145- [ ] check response headers and adjust self-throttling rate limits per-host if present
144144-- [ ] make backfill go _really fast_
145146- [ ] clean up commit validation (eg we're checking signatures twice, lenient handling is weird)
146146-- [ ] metrics for db size
147147148148going to be annoying but doable
149149- [ ] multi-relay subscriber
+71-19
src/mst/slice_tricks.rs
···17171818type Result<T> = std::result::Result<T, MstSliceTricksError>;
19192020-/// quick hack wrapper to make collections sort the way they do in-MST
2020+/// hacky wrapper to make collections sort the way they do in-MST
2121///
2222/// mst keys are `<collection>/<rkey>` and `/` is lex-after `.`, so
2323/// - `sh.tangled.issue.comment/<any rkey>` comes before
···2525///
2626/// there is probably a nice way to implement PartialOrd, but... we're just
2727/// going to tack a `/` on the end and call it a day
2828-#[derive(Debug, PartialEq, PartialOrd, Eq, Ord)]
2929-pub struct TerminatedNsid(String);
2828+#[derive(Debug, PartialEq, Eq)]
2929+pub struct SortableCollection(String);
3030+3131+impl Ord for SortableCollection {
3232+ fn cmp(&self, other: &Self) -> Ordering {
3333+ let (s, o) = (&self.0, &other.0);
3434+3535+ if s.len() < o.len()
3636+ && let Some(o_suffix) = o.strip_prefix(s)
3737+ {
3838+ return "/".cmp(o_suffix);
3939+ }
4040+4141+ if o.len() < s.len()
4242+ && let Some(s_suffix) = s.strip_prefix(o)
4343+ {
4444+ return s_suffix.cmp("/");
4545+ }
4646+4747+ s.cmp(o)
4848+ }
4949+}
30503131-impl<'a> From<&Nsid<'a>> for TerminatedNsid {
3232- fn from(nsid: &Nsid<'a>) -> TerminatedNsid {
3333- let mut s = nsid.to_string();
3434- s.push('/');
3535- TerminatedNsid(s)
5151+impl PartialOrd for SortableCollection {
5252+ fn partial_cmp(&self, other: &SortableCollection) -> Option<Ordering> {
5353+ Some(self.cmp(other))
3654 }
3755}
38563939-impl From<&TerminatedNsid> for Nsid<'static> {
5757+impl<'a> From<&Nsid<'a>> for SortableCollection {
5858+ fn from(nsid: &Nsid<'a>) -> SortableCollection {
5959+ let s = nsid.to_string();
6060+ SortableCollection(s)
6161+ }
6262+}
6363+6464+impl From<&SortableCollection> for Nsid<'static> {
4065 /// go back to jacquard typed (unchecked)
4166 ///
4267 /// panics if missing the '/' suffix or if the nsid got messed up
4343- fn from(TerminatedNsid(s): &TerminatedNsid) -> Nsid<'static> {
4444- let unslashed = s
4545- .strip_suffix('/')
4646- .expect("BUG: TerminatedNsid without trailing slash");
4747- Nsid::from(unslashed.to_string())
6868+ fn from(SortableCollection(s): &SortableCollection) -> Nsid<'static> {
6969+ Nsid::from(s.clone())
4870 }
4971}
50725173/// represent the collections across a whole, possibly sparse, repo
5252-type CollectionSpan = Span<TerminatedNsid>;
7474+type CollectionSpan = Span<SortableCollection>;
53755476impl CollectionSpan {
5577 /// get a list of NSIDs if the span has no gaps
···6486 ///
6587 /// each NSID from the set must either be in span, or in a gap of it
6688 pub fn could_cover(&self, collections: &BTreeSet<Nsid<'_>>) -> bool {
6767- let mut candidates = collections.iter().map(Into::<TerminatedNsid>::into);
8989+ let mut candidates = collections.iter().map(Into::<SortableCollection>::into);
6890 let Some(mut candidate) = candidates.next() else {
6991 return true; // empty set can always be covered, even by a zero-gap
7092 };
···217239 gap_before,
218240 things: things
219241 .iter()
220220- .map(|(k, v)| (TerminatedNsid(format!("{k}/")), *v))
242242+ .map(|(k, v)| {
243243+ let nsid = Nsid::from(k.to_string());
244244+ ((&nsid).into(), *v)
245245+ })
221246 .collect(),
222247 }
223248 }
···226251 names.iter().map(|s| Nsid::from(s.to_string())).collect()
227252 }
228253254254+ // --- nsid-sorting wrapper ---------------------------------------------
255255+256256+ #[test]
257257+ fn compare_nsids() {
258258+ for (us, them, expected) in [
259259+ ("sh.tangled.repo", "sh.tangled.repo", Ordering::Equal),
260260+ ("sh.tangled.repo", "sh.tangled.repp", Ordering::Less),
261261+ ("sh.tangled.repo", "sh.tangled.repoa", Ordering::Less),
262262+ ("sh.tangled.repo", "sh.tangled.req.issue", Ordering::Less),
263263+ ("sh.tangled.repo", "sh.tangled.rep", Ordering::Greater),
264264+ ("sh.tangled.repo", "sh.tangled.repn", Ordering::Greater),
265265+ (
266266+ "sh.tangled.repo",
267267+ "sh.tangled.repo.issue",
268268+ Ordering::Greater,
269269+ ), // the surprising one: `/` always follows nsid, sorts after `.`
270270+ ] {
271271+ let us_nsid: SortableCollection = (&nsid(us)).into();
272272+ let them_nsid: SortableCollection = (&nsid(them)).into();
273273+ assert_eq!(
274274+ us_nsid.cmp(&them_nsid),
275275+ expected,
276276+ "nsid {us:?} should be {expected:?} than {them:?} by repo path sort"
277277+ );
278278+ }
279279+ }
280280+229281 // --- empty query set --------------------------------------------------
230282231283 #[test]
···338390 assert!(s.could_cover(&nsids(&["a.b.a", "a.b.b", "a.b.c"])));
339391 }
340392341341- // --- TerminatedNsid ordering ('.' < '/') ------------------------------
393393+ // --- SortableCollection ordering ('.' < '/') ------------------------------
342394343395 // sub-namespaces sort BEFORE their parent in MST order because
344396 // "a.b.c.d/" < "a.b.c/" (at the branch point, '.' = 46 < '/' = 47)
···578630579631 #[tokio::test]
580632 async fn span_sub_namespace_sorts_before_parent_in_mst_order() {
581581- // '.' (0x2E) < '/' (0x2F), so TerminatedNsid ordering gives:
633633+ // '.' (0x2E) < '/' (0x2F), so SortableCollection ordering gives:
582634 // "sh.tangled.repo.issue/" < "sh.tangled.repo/"
583635 let mut car = make_mem_car(&["sh.tangled.repo/self", "sh.tangled.repo.issue/abc123"]).await;
584636 let span = span_from_slice(&mut car).unwrap();