···8787 - [x] swap in repo-stream
8888- [x] actually wire in the resync buffer (oops)
8989- [x] make sure we're doing the right thing on decode errors (seems we are, tungstenite closes connection)
9090-- [~] "deep crawl" mode for relays
9090+- [x] "deep crawl" mode for relays
9191 - [x] listHosts -> listRepos on host instead of relying on relay listRepos
9292- - [ ] defensive loop-cursor handling
9393-- [ ] lenient pre-sync1.1
9494- - [ ] *don't* allow non-validating commits that look like sync1.1
9595- - [ ] rachet by PDS host: be lenient if we have never seen a sync1.1-looking commit, always strict after we see one.
9696- - [ ] boooo we might need more handling for pre-sync1.1 repos if they don't include adjacent keys
9292+ - [x] defensive loop-cursor handling
9393+- [x] lenient pre-sync1.1
9494+ - [x] *don't* allow non-validating commits that look like sync1.1
9595+ - [x] rachet by PDS host: be lenient if we have never seen a sync1.1-looking commit, always strict after we see one.
9696+ - [?] boooo we might need more handling for pre-sync1.1 repos if they don't include adjacent keys
9797- [ ] resync free hints from first phony getRecord
9898 - [ ] short-circuit: tiny repos may incidentally return their entire CAR for getRecord
9999 - [ ] estimate CAR size and `getRecord` if it's likely very small (bypass `describeRepo`)
100100-- [ ] add a `--heavy` mode that always uses `getRepo` and never `describeRepo`
101100- [ ] commit CAR handling: generate a list of keys with gaps noted, to reliably detect missing adjacent keys
102101- [ ] account status convergeance: if we receive commits from apparently-inactive accounts, should we check upstream status to make sure we're not stale?
103102- [ ] split the keyspace: put the rbc/cbr indexes on a second keyspace with larger block size, expect hits on main keyspace
···106105107106108107very much still todo but i'm getting tired
109109-- [ ] multi-relay listener
108108+- [ ] add a `--heavy` mode that always uses `getRepo` and never `describeRepo`
109109+- [ ] multi-relay subscriber
110110- [ ] special did:web behaviour to keep reusing a stale resolution on failure
111111- [ ] admin view of backfill state etc
112112- [ ] vanity stats for optimizations, like how many in-flight repos were saved from resync due to high-water-mark firehose cursor persistence
+30-19
src/sync/backfill.rs
···4343/// as fatal and abort the walk immediately. Transient errors are retried up to
4444/// `MAX_PAGE_FAILURES` times before giving up. Returns `Ok(true)` when the
4545/// full walk completes, `Ok(false)` if cancelled or the host gives up.
4646+///
4747+/// passing in `resolver` implies that this is a PDS, not a relay -- treat with
4848+/// lower trust: verify DIDs belong on that host and check for evil cursor.
4649pub async fn run(
4750 host: Host,
4851 db: DbRef,
···8487 let page_len = dids.len();
8588 let now = unix_now();
86899090+ // resolver is passed in for untrusted hosts so we can filter out dids that the host isn't authoritative over
9191+ // TODO: if *many* dids are cleared (probably as a percentage?) we might mark this host sketchy
8792 let dids = match &resolver {
8893 Some(r) => validate_dids(dids, r, &host, &token).await,
8994 None => dids,
···108113 "backfill page"
109114 );
110115111111- match next_cursor {
112112- Some(c) => cursor = Some(c),
113113- None => {
114114- let db = db.clone();
115115- let host_owned = host.clone();
116116- tokio::task::spawn_blocking(move || {
117117- set(
118118- &db,
119119- &host_owned,
120120- &BackfillProgress {
121121- cursor: String::new(),
122122- completed_at: Some(now.to_string()),
123123- },
124124- )
125125- })
126126- .await??;
127127- info!(host = %host, total_queued, "backfill complete");
128128- return Ok(true);
129129- }
116116+ if next_cursor != cursor {
117117+ cursor = next_cursor;
118118+ continue;
130119 }
120120+121121+ if let Some(c) = next_cursor {
122122+ warn!(host = %host, cursor = c, "evil cursor! (unchanged), bailing on this host.");
123123+ // TODO: mark host as not trustworthy
124124+ };
125125+126126+ // persist cursor so we can restart
127127+ let db = db.clone();
128128+ let host_owned = host.clone();
129129+ tokio::task::spawn_blocking(move || {
130130+ set(
131131+ &db,
132132+ &host_owned,
133133+ &BackfillProgress {
134134+ cursor: "".to_string(),
135135+ completed_at: Some(now.to_string()),
136136+ },
137137+ )
138138+ })
139139+ .await??;
140140+ info!(host = %host, total_queued, "backfill complete");
141141+ return Ok(true);
131142 }
132143}
133144