lightweight com.atproto.sync.listReposByCollection
45
fork

Configure Feed

Select the types of activity you want to include in your feed.

wip: slice_tricks

phil 60d08740 c2c8b0f3

+380
+69
src/mst/mod.rs
··· 2 2 3 3 pub mod collections; 4 4 pub mod mortality; 5 + pub mod slice_tricks; 6 + 7 + use std::collections::BTreeMap; 8 + 9 + struct Span<T: Ord> { 10 + gap_before: bool, 11 + things: BTreeMap<T, bool>, // gap after 12 + } 13 + 14 + enum SpanLen { 15 + /// all collections are known 16 + Exactly(usize), 17 + /// at least one gap exists 18 + AtLeast(usize), 19 + } 20 + 21 + impl<T: Ord> Span<T> { 22 + fn empty() -> Self { 23 + Self { 24 + gap_before: false, 25 + things: Default::default(), 26 + } 27 + } 28 + fn len(&self) -> SpanLen { 29 + let known_len = self.things.len(); 30 + if self.gap_before || self.things.values().any(|gap_after| *gap_after) { 31 + SpanLen::AtLeast(known_len) 32 + } else { 33 + SpanLen::Exactly(known_len) 34 + } 35 + } 36 + /// check if any collections are present 37 + /// 38 + /// None if we can't be sure (we only have a gap) 39 + fn is_empty(&self) -> Option<bool> { 40 + match self.len() { 41 + SpanLen::AtLeast(0) => None, 42 + SpanLen::Exactly(0) => Some(true), 43 + _ => Some(false), 44 + } 45 + } 46 + /// the span has *no* gaps 47 + fn is_complete(&self) -> bool { 48 + matches!(self.len(), SpanLen::Exactly(_)) 49 + } 50 + fn contains(&self, k: &T) -> Option<bool> { 51 + if self.things.contains_key(k) { 52 + return Some(true); 53 + } 54 + // try to gap_after from the key before this one, if present 55 + // (when btree_cursors land we can do nicer with that) 56 + let falls_in_gap = self 57 + .things 58 + .range(..k) // exclusive range: all keys lex-before us 59 + .next_back() // take the closest previous one 60 + .map(|(_, gap_after)| gap_after) // if it existed, find out if it had a gap after 61 + .unwrap_or(&self.gap_before); // no before-key: span starts with gap? 62 + 63 + if *falls_in_gap { None } else { Some(false) } 64 + } 65 + /// definitive answer about whether it's *possible* for `k` to be in span 66 + /// 67 + /// key exist -> true 68 + /// key falls in a gap -> true (it's possible!) 69 + /// key falls after a key without a gap after -> false (not possible!) 70 + fn may_contain(&self, k: &T) -> bool { 71 + self.contains(k).unwrap_or(true) 72 + } 73 + }
+311
src/mst/slice_tricks.rs
··· 1 + //! glean more from a `sync.getRecord` slice than we have any right to 2 + 3 + use super::Span; 4 + use jacquard_common::types::string::Nsid; 5 + use repo_stream::{MemCar, Output as WalkOutput, WalkItem}; 6 + use std::cmp::Ordering; 7 + use std::collections::BTreeSet; 8 + 9 + #[derive(Debug, thiserror::Error)] 10 + pub enum MstSliceTricksError { 11 + #[error("repo-stream WalkError: {0}")] 12 + WalkError(#[from] repo_stream::WalkError), 13 + #[error("bad repo path: {0}")] 14 + BadPath(String), 15 + } 16 + 17 + type Result<T> = std::result::Result<T, MstSliceTricksError>; 18 + 19 + /// quick hack wrapper to make collections sort the way they do in-MST 20 + /// 21 + /// mst keys are `<collection>/<rkey>` and `/` is lex-after `.`, so 22 + /// - `sh.tangled.issue.comment/<any rkey>` comes before 23 + /// - `sh.tangled.issue/<any rkey>` 24 + /// 25 + /// there is probably a nice way to implement PartialOrd, but... we're just 26 + /// going to tack a `/` on the end and call it a day 27 + #[derive(Debug, PartialEq, PartialOrd, Eq, Ord)] 28 + struct TerminatedNsid(String); 29 + 30 + impl<'a> From<&Nsid<'a>> for TerminatedNsid { 31 + fn from(nsid: &Nsid<'a>) -> TerminatedNsid { 32 + let mut s = nsid.to_string(); 33 + s.push('/'); 34 + TerminatedNsid(s) 35 + } 36 + } 37 + 38 + impl From<&TerminatedNsid> for Nsid<'static> { 39 + /// go back to jacquard typed (unchecked) 40 + /// 41 + /// panics if missing the '/' suffix or if the nsid got messed up 42 + fn from(TerminatedNsid(s): &TerminatedNsid) -> Nsid<'static> { 43 + let unslashed = s 44 + .strip_suffix('/') 45 + .expect("BUG: TerminatedNsid without trailing slash"); 46 + Nsid::from(unslashed.to_string()) 47 + } 48 + } 49 + 50 + /// represent the collections across a whole, possibly sparse, repo 51 + type CollectionSpan = Span<TerminatedNsid>; 52 + 53 + impl CollectionSpan { 54 + // fn contains_nsid(&self, collection: &Nsid<'_>) -> Option<bool> { 55 + // self.contains((&collection).into()) 56 + // } 57 + // fn may_contain_nsid(&self, collection: &Nsid<'_>) -> bool { 58 + // self.may_contain(&collection.clone().into()) 59 + // } 60 + /// get a list of NSIDs if the span has no gaps 61 + fn complete(&self) -> Option<Vec<Nsid<'static>>> { 62 + self.is_complete() 63 + .then(|| self.things.keys().map(Into::into).collect()) 64 + } 65 + /// whether it's possible that this span covers some NSIDs 66 + /// 67 + /// each NSID from the set must either be in span, or in a gap of it 68 + fn could_cover(&self, collections: &BTreeSet<Nsid<'_>>) -> bool { 69 + let mut candidates = collections.iter().map(Into::<TerminatedNsid>::into); 70 + let Some(mut candidate) = candidates.next() else { 71 + return true; // empty set can always be covered, even by a zero-gap 72 + }; 73 + 74 + let mut in_gap = self.gap_before; 75 + let mut spans = self.things.iter(); 76 + let Some((mut next_key, mut gap_after)) = spans.next() else { 77 + return in_gap; // one big gap => covers all, else we span nothing 78 + }; 79 + 80 + // walk the spans and collections together (both are sorted) to check 81 + // each collection against span keys and gaps 82 + loop { 83 + match candidate.cmp(next_key) { 84 + Ordering::Less if !in_gap => return false, 85 + Ordering::Less => { 86 + candidate = match candidates.next() { 87 + Some(c) => c, 88 + None => return true, 89 + } 90 + } 91 + Ordering::Equal => { 92 + in_gap = *gap_after; 93 + (next_key, gap_after) = match spans.next() { 94 + Some(n) => n, 95 + None => return candidates.next().is_none(), 96 + }; 97 + candidate = match candidates.next() { 98 + Some(c) => c, 99 + None => return true, 100 + } 101 + } 102 + Ordering::Greater => { 103 + in_gap = *gap_after; 104 + (next_key, gap_after) = match spans.next() { 105 + Some(n) => n, 106 + None => return *gap_after, // trailing gap accepts all greater candidates 107 + } 108 + } 109 + } 110 + } 111 + } 112 + } 113 + 114 + #[cfg(test)] 115 + mod tests { 116 + use super::*; 117 + use jacquard_common::types::string::Nsid; 118 + use std::collections::BTreeSet; 119 + 120 + fn make_span(gap_before: bool, things: &[(&str, bool)]) -> CollectionSpan { 121 + CollectionSpan { 122 + gap_before, 123 + things: things 124 + .iter() 125 + .map(|(k, v)| (TerminatedNsid(format!("{k}/")), *v)) 126 + .collect(), 127 + } 128 + } 129 + 130 + fn nsids(names: &[&str]) -> BTreeSet<Nsid<'static>> { 131 + names.iter().map(|s| Nsid::from(s.to_string())).collect() 132 + } 133 + 134 + // --- empty query set -------------------------------------------------- 135 + 136 + #[test] 137 + fn empty_set_always_covered() { 138 + // even a zero-gap span covers the empty set 139 + assert!(make_span(false, &[]).could_cover(&nsids(&[]))); 140 + assert!(make_span(true, &[("a.b.c", false)]).could_cover(&nsids(&[]))); 141 + } 142 + 143 + // --- empty span ------------------------------------------------------- 144 + 145 + #[test] 146 + fn one_big_gap_covers_anything() { 147 + // gap_before=true with no known keys = "we know nothing, anything is possible" 148 + let s = make_span(true, &[]); 149 + assert!(s.could_cover(&nsids(&["a.b.c"]))); 150 + assert!(s.could_cover(&nsids(&["a.b.c", "a.b.d"]))); 151 + } 152 + 153 + #[test] 154 + fn empty_span_no_gap_covers_nothing() { 155 + let s = make_span(false, &[]); 156 + assert!(!s.could_cover(&nsids(&["a.b.c"]))); 157 + } 158 + 159 + // --- single known key ------------------------------------------------- 160 + 161 + #[test] 162 + fn exact_match() { 163 + let s = make_span(false, &[("a.b.c", false)]); 164 + assert!(s.could_cover(&nsids(&["a.b.c"]))); 165 + } 166 + 167 + #[test] 168 + fn gap_before_covers_collection_lex_before_first_key() { 169 + let s = make_span(true, &[("a.b.c", false)]); 170 + assert!(s.could_cover(&nsids(&["a.b.a"]))); // "a.b.a" < "a.b.c" 171 + } 172 + 173 + #[test] 174 + fn no_gap_before_rejects_collection_lex_before_first_key() { 175 + let s = make_span(false, &[("a.b.c", false)]); 176 + assert!(!s.could_cover(&nsids(&["a.b.a"]))); 177 + } 178 + 179 + #[test] 180 + fn no_gap_after_rejects_collection_lex_after_last_key() { 181 + let s = make_span(false, &[("a.b.c", false)]); 182 + assert!(!s.could_cover(&nsids(&["a.b.d"]))); 183 + } 184 + 185 + #[test] 186 + fn gap_after_last_key_covers_trailing_collection() { 187 + // Greater branch exhausting spans returns *gap_after, so the trailing 188 + // gap is correctly consulted. 189 + let s = make_span(false, &[("a.b.c", true)]); 190 + assert!(s.could_cover(&nsids(&["a.b.d"]))); 191 + } 192 + 193 + // NOTE: bug — Equal on the last span key returns candidates.next().is_none(), 194 + // ignoring gap_after. So a trailing candidate after an exact match is rejected 195 + // even when gap_after=true. 196 + #[test] 197 + fn gap_after_last_matched_key_does_not_cover_remaining_candidates() { 198 + let s = make_span(false, &[("a.b.c", true)]); 199 + assert!(!s.could_cover(&nsids(&["a.b.c", "a.b.d"]))); 200 + } 201 + 202 + // --- gap between two keys --------------------------------------------- 203 + 204 + #[test] 205 + fn gap_between_keys_covers_middle_collection() { 206 + let s = make_span(false, &[("a.b.a", true), ("a.b.c", false)]); 207 + assert!(s.could_cover(&nsids(&["a.b.b"]))); 208 + } 209 + 210 + #[test] 211 + fn no_gap_between_keys_rejects_middle_collection() { 212 + let s = make_span(false, &[("a.b.a", false), ("a.b.c", false)]); 213 + assert!(!s.could_cover(&nsids(&["a.b.b"]))); 214 + } 215 + 216 + // --- multiple collections in query set -------------------------------- 217 + 218 + // NOTE: bug — Equal advances the span but not the candidate. After matching 219 + // "a.b.a/", the same candidate is compared to "a.b.b/" → Less with in_gap=false 220 + // → false. Consecutive exact matches always fail when there are more span keys. 221 + #[test] 222 + fn consecutive_exact_matches_return_false() { 223 + let s = make_span( 224 + false, 225 + &[("a.b.a", false), ("a.b.b", false), ("a.b.c", false)], 226 + ); 227 + assert!(s.could_cover(&nsids(&["a.b.a", "a.b.b", "a.b.c"]))); 228 + } 229 + 230 + #[test] 231 + fn subset_of_exact_matches_returns_false() { 232 + // same root cause: after matching "a.b.a/", candidate stays "a.b.a/" and 233 + // compares Less to "a.b.b/" with no gap → false, even though "a.b.c" would match 234 + let s = make_span( 235 + false, 236 + &[("a.b.a", false), ("a.b.b", false), ("a.b.c", false)], 237 + ); 238 + assert!(s.could_cover(&nsids(&["a.b.a", "a.b.c"]))); // skip "a.b.b" 239 + } 240 + 241 + #[test] 242 + fn one_missing_key_no_gap_rejects() { 243 + let s = make_span(false, &[("a.b.a", false), ("a.b.c", false)]); 244 + assert!(!s.could_cover(&nsids(&["a.b.a", "a.b.b", "a.b.c"]))); 245 + } 246 + 247 + // a mix: some known, one in a gap 248 + #[test] 249 + fn known_key_plus_collection_in_adjacent_gap() { 250 + let s = make_span(false, &[("a.b.a", true), ("a.b.c", false)]); 251 + assert!(s.could_cover(&nsids(&["a.b.a", "a.b.b", "a.b.c"]))); 252 + } 253 + 254 + // --- TerminatedNsid ordering ('.' < '/') ------------------------------ 255 + 256 + // sub-namespaces sort BEFORE their parent in MST order because 257 + // "a.b.c.d/" < "a.b.c/" (at the branch point, '.' = 46 < '/' = 47) 258 + #[test] 259 + fn sub_namespace_sorts_before_parent_no_gap_before() { 260 + // span knows about "a.b.c" but not "a.b.c.d" 261 + // "a.b.c.d" is lex-before "a.b.c" in MST order, falls before first key 262 + let s = make_span(false, &[("a.b.c", false)]); 263 + assert!(!s.could_cover(&nsids(&["a.b.c.d"]))); 264 + } 265 + 266 + #[test] 267 + fn sub_namespace_covered_by_gap_before() { 268 + let s = make_span(true, &[("a.b.c", false)]); 269 + assert!(s.could_cover(&nsids(&["a.b.c.d"]))); 270 + } 271 + } 272 + 273 + fn span_from_slice(car: &mut MemCar) -> Result<CollectionSpan> { 274 + let mut prev_gap = false; 275 + let mut prev_collection = None; 276 + 277 + let mut span = CollectionSpan::empty(); 278 + 279 + while let Some(item) = car.next()? { 280 + match item { 281 + WalkItem::MissingSubtree { .. } => { 282 + prev_gap = true; 283 + } 284 + WalkItem::Record(WalkOutput { key, .. }) | WalkItem::MissingRecord { key, .. } => { 285 + let collection: Nsid<'_> = key 286 + .parse() 287 + .map_err(|e| MstSliceTricksError::BadPath(format!("nsid parse: {e}")))?; 288 + let terminated = (&collection).into(); 289 + 290 + if let Some(prev) = prev_collection { 291 + // last-from-collection wins setting gap_after 292 + span.things.insert(prev, prev_gap); 293 + } else { 294 + span.gap_before = prev_gap; 295 + } 296 + 297 + prev_collection = Some(terminated); 298 + } 299 + WalkItem::Node { .. } => unreachable!("repostream mem::next doesn't output nodes"), 300 + } 301 + } 302 + 303 + if let Some(prev) = prev_collection { 304 + // last-from-collection wins setting gap_after 305 + span.things.insert(prev, prev_gap); 306 + } else { 307 + span.gap_before = prev_gap; 308 + } 309 + 310 + Ok(span) 311 + }