···3939use std::time::SystemTime;
40404141const PAGE_LIMIT: i64 = 500;
4242-/// Delay between retry attempts after a transient page failure.
4343-const RETRY_DELAY_SECS: u64 = 10;
4444-/// Maximum consecutive transient failures before giving up on this host.
4545-const MAX_PAGE_FAILURES: u32 = 3;
4242+4343+// ---------------------------------------------------------------------------
4444+// Mode + retry policy
4545+// ---------------------------------------------------------------------------
4646+4747+/// Which host role this backfill is running against.
4848+///
4949+/// Bundles two decisions that always move together:
5050+/// - whether to validate DIDs against the host (trust model), and
5151+/// - how patiently to retry transient page failures (criticality).
5252+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
5353+pub enum BackfillMode {
5454+ /// Primary relay / upstream host. Trusted for DID listings; retried
5555+ /// generously because a transient outage here will take the whole service
5656+ /// down when the task exits.
5757+ Relay,
5858+ /// Untrusted PDS discovered via deep crawl. DIDs are validated against the
5959+ /// host, and a few transient failures are enough to move on.
6060+ DeepCrawl,
6161+}
6262+6363+impl BackfillMode {
6464+ /// True when listed DIDs must be independently verified to actually live
6565+ /// on `host` before we trust their account status.
6666+ fn validates_dids(self) -> bool {
6767+ matches!(self, Self::DeepCrawl)
6868+ }
6969+7070+ fn retry_policy(self) -> RetryPolicy {
7171+ match self {
7272+ Self::Relay => RetryPolicy::RELAY,
7373+ Self::DeepCrawl => RetryPolicy::DEEP_CRAWL,
7474+ }
7575+ }
7676+}
7777+7878+/// How many transient page failures to tolerate and how long to wait between.
7979+struct RetryPolicy {
8080+ max_page_failures: u32,
8181+ retry_delay: Duration,
8282+}
8383+8484+impl RetryPolicy {
8585+ /// Relay budget: 60 attempts × 30s = 30 minutes of retries before giving
8686+ /// up and triggering service shutdown. Enough to absorb typical relay
8787+ /// hiccups without flapping.
8888+ const RELAY: Self = Self {
8989+ max_page_failures: 60,
9090+ retry_delay: Duration::from_secs(30),
9191+ };
9292+ /// Deep-crawl budget: 3 × 10s. Fast give-up so dead PDSes don't monopolise
9393+ /// crawl workers.
9494+ const DEEP_CRAWL: Self = Self {
9595+ max_page_failures: 3,
9696+ retry_delay: Duration::from_secs(10),
9797+ };
9898+}
469947100// ---------------------------------------------------------------------------
48101// Listed account state
···7612977130/// Walk `listRepos` on `host` and enqueue new repos for resync.
78131///
7979-/// When `validate` is true (deep crawl / untrusted PDS), DIDs are verified to
8080-/// actually live on `host` and non-matching ones are rejected. Validated DIDs
8181-/// are considered authoritative for account status — if the PDS says an account
8282-/// is deactivated, we trust it even if our local state is Active. Without
8383-/// validation (relay backfill), we only write non-active status for accounts
8484-/// we haven't seen before, to avoid overwriting Active state with stale relay
8585-/// data (e.g. accounts that migrated off and appear deactivated on the old PDS).
132132+/// `mode` selects both the trust model (whether DIDs are validated against
133133+/// `host`) and the retry policy (how long we retry transient page failures
134134+/// before giving up). See [`BackfillMode`].
135135+///
136136+/// In `DeepCrawl` mode, DIDs are verified to actually live on `host` and
137137+/// non-matching ones are rejected. Validated DIDs are considered authoritative
138138+/// for account status — if the PDS says an account is deactivated, we trust
139139+/// it even if our local state is Active. In `Relay` mode we only write
140140+/// non-active status for accounts we haven't seen before, to avoid
141141+/// overwriting Active state with stale relay data (e.g. accounts that
142142+/// migrated off and appear deactivated on the old PDS).
86143pub async fn run(
87144 host: Host,
88145 db: DbRef,
89146 client: ThrottledClient,
90147 token: CancellationToken,
91148 resolver: Arc<Resolver>,
9292- validate: bool,
149149+ mode: BackfillMode,
93150 discovery_queue: Arc<DiscoveryQueue>,
94151) -> Result<bool> {
152152+ let validate = mode.validates_dids();
153153+ let retry_policy = mode.retry_policy();
95154 let base: jacquard_common::url::Url = format!("https://{host}")
96155 .parse()
97156 .map_err(|e: jacquard_common::url::ParseError| crate::error::Error::Other(e.to_string()))?;
···117176 return Ok(false);
118177 }
119178120120- let (dids, next_cursor) =
121121- match fetch_page(&host, &base, cursor.as_deref(), &client, &token).await {
122122- Some(page) => page,
123123- None => return Ok(false),
124124- };
179179+ let (dids, next_cursor) = match fetch_page(
180180+ &host,
181181+ &base,
182182+ cursor.as_deref(),
183183+ &client,
184184+ &token,
185185+ &retry_policy,
186186+ )
187187+ .await
188188+ {
189189+ Some(page) => page,
190190+ None => return Ok(false),
191191+ };
125192126193 let page_len = dids.len();
127194···228295229296/// Fetch one `listRepos` page with retry logic.
230297///
231231-/// Returns `None` if the host gives a 4xx, exceeds `MAX_PAGE_FAILURES`
298298+/// Returns `None` if the host gives a 4xx, exceeds `policy.max_page_failures`
232299/// transient errors, or the token is cancelled (including mid-request).
233300async fn fetch_page(
234301 host: &Host,
···236303 cursor: Option<&str>,
237304 client: &ThrottledClient,
238305 token: &CancellationToken,
306306+ policy: &RetryPolicy,
239307) -> Option<(Vec<(Did<'static>, RepoState)>, Option<String>)> {
240308 let req = ListRepos {
241309 cursor: cursor.map(Into::into),
···281349 Some(page) => return Some(page),
282350 None => {
283351 failures += 1;
284284- if failures >= MAX_PAGE_FAILURES {
285285- warn!(host = %host, failures,
286286- "listRepos page failed {MAX_PAGE_FAILURES} times; giving up on this host");
352352+ if failures >= policy.max_page_failures {
353353+ warn!(host = %host, failures, max = policy.max_page_failures,
354354+ "listRepos page failed too many times; giving up on this host");
287355 return None;
288356 }
289289- if !token.sleep(Duration::from_secs(RETRY_DELAY_SECS)).await {
357357+ if !token.sleep(policy.retry_delay).await {
290358 return None;
291359 }
292360 }