···11use jacquard_common::IntoStatic;
22-use jacquard_common::types::cid::Cid;
32use jacquard_common::types::crypto::PublicKey;
43use jacquard_repo::MemoryBlockStore;
54use jacquard_repo::Mst;
···5049 #[error("field mismatch in {field}")]
5150 FieldMismatch { field: &'static str },
5251 /// signature verification failed.
5353- /// `refreshed: false` means the key has not been re-fetched yet;
5454- /// the caller should refresh and retry with the same commit.
5552 #[error("signature verification failed")]
5656- SigFailure { refreshed: bool },
5353+ SigFailure,
5754 /// a block, op count, or record exceeds the ATProto size limits
5855 #[error("size limit exceeded: {0}")]
5956 SizeLimitExceeded(SizeLimitKind),
···8077 FieldMismatch { field: &'static str },
8178 /// signature verification failed
8279 #[error("signature verification failed")]
8383- SigFailure { refreshed: bool },
8080+ SigFailure,
8481}
85828683/// indicates that the commit's chain pointers do not match the last known repo state.
8784/// this is not a hard rejection so callers can decide whta they want to do
8585+#[derive(Default, Debug)]
8886pub struct ChainBreak {
8987 /// msg.since is present and does not match the last known rev
9088 pub since_mismatch: bool,
···9290 pub prev_data_mismatch: bool,
9391}
94929393+impl ChainBreak {
9494+ pub fn is_broken(&self) -> bool {
9595+ self.since_mismatch || self.prev_data_mismatch
9696+ }
9797+}
9898+9599/// a successfully validated `#commit` message, carrying pre-parsed data for apply_commit
96100pub struct ValidatedCommit<'c> {
97101 pub commit: &'c Commit<'c>,
98102 /// result of parse_car_bytes, already done so apply_commit does not re-parse
99103 pub parsed_blocks: ParsedCar,
100100- /// deserialized and signature-verified commit object
101104 pub commit_obj: AtpCommit<'static>,
102102- /// Some if chain pointers are inconsistent with last known state
103103- pub chain_break: Option<ChainBreak>,
105105+ pub chain_break: ChainBreak,
104106}
105107106108/// a successfully validated `#sync` message
107109pub struct ValidatedSync {
108108- /// MST root CID from the commit object, used to detect noop syncs
109109- pub data_cid: Cid<'static>,
110110- /// rev string from the commit object, used to detect stale syncs
111111- pub rev: String,
110110+ pub commit_obj: AtpCommit<'static>,
112111}
113112114113pub struct ValidationOptions {
···127126 }
128127}
129128129129+/// all methods panic if called outside a tokio runtime context.
130130+pub struct ValidationContext<'a> {
131131+ pub opts: &'a ValidationOptions,
132132+}
133133+134134+impl ValidationContext<'_> {
135135+ pub fn validate_commit<'c>(
136136+ &self,
137137+ msg: &'c Commit<'c>,
138138+ repo_state: &RepoState,
139139+ signing_key: Option<&PublicKey>,
140140+ ) -> Result<ValidatedCommit<'c>, CommitValidationError> {
141141+ validate_commit(msg, repo_state, signing_key, self.opts)
142142+ }
143143+144144+ pub fn validate_sync(
145145+ &self,
146146+ msg: &Sync<'_>,
147147+ signing_key: Option<&PublicKey>,
148148+ ) -> Result<ValidatedSync, SyncValidationError> {
149149+ validate_sync(msg, signing_key)
150150+ }
151151+}
152152+130153/// validate an incoming `#commit` message.
131154///
132155/// on success, returns a `ValidatedCommit` carrying pre-parsed data so that
···135158/// chain-break (since/prevData mismatch) is NOT an error. callers check
136159/// `validated.chain_break.is_some()` and decide how to respond.
137160///
138138-/// - `repo_state`: `None` for the first-ever commit for this DID.
139161/// - `signing_key`: `None` when signature verification is disabled.
162162+///
163163+/// panics if called outside a tokio runtime context.
140164pub fn validate_commit<'c>(
141165 msg: &'c Commit<'c>,
142142- repo_state: Option<&RepoState>,
166166+ repo_state: &RepoState,
143167 signing_key: Option<&PublicKey>,
144168 opts: &ValidationOptions,
145145- handle: &tokio::runtime::Handle,
146169) -> Result<ValidatedCommit<'c>, CommitValidationError> {
170170+ let handle = tokio::runtime::Handle::current();
147171 const MAX_BLOCKS_BYTES: usize = 2_097_152; // 2 MiB
148172 const MAX_OPS: usize = 200;
149173 const MAX_RECORD_BYTES: usize = 1_048_576; // 1 MiB
···161185 }
162186163187 // 2. stale rev, skip if msg.rev <= last known rev (lexicographic order)
164164- if let Some(state) = repo_state {
165165- if let Some(root) = &state.root {
166166- if msg.rev.as_str() <= root.rev.to_tid().as_str() {
167167- return Err(CommitValidationError::StaleRev);
168168- }
188188+ if let Some(root) = &repo_state.root {
189189+ if msg.rev.as_str() <= root.rev.to_tid().as_str() {
190190+ return Err(CommitValidationError::StaleRev);
169191 }
170192 }
171193···203225 if let Some(key) = signing_key {
204226 commit_obj
205227 .verify(key)
206206- .map_err(|_| CommitValidationError::SigFailure { refreshed: false })?;
228228+ .map_err(|_| CommitValidationError::SigFailure)?;
207229 }
208230209231 let commit_obj = commit_obj.into_static();
210232211233 // 8. chain break checks
212212- let chain_break = chain_break_check(msg, repo_state);
234234+ let chain_break = repo_state
235235+ .root
236236+ .as_ref()
237237+ .map(|r| breaks_chain(msg, r))
238238+ .unwrap_or_default();
213239214240 // 9–10. per-record size limits and basic CBOR validity
215241 for op in &msg.ops {
···236262237263 // 11. MST inversion
238264 if opts.verify_mst {
239239- verify_mst(msg, &parsed, &commit_obj, handle).map_err(CommitValidationError::MstInvalid)?;
265265+ verify_mst(msg, &parsed, &commit_obj, &handle)
266266+ .map_err(CommitValidationError::MstInvalid)?;
240267 }
241268242269 Ok(ValidatedCommit {
···247274 })
248275}
249276250250-/// validate an incoming `#sync` message.
251251-///
252252-/// replaces `ops::verify_sync_event`, adding field consistency checks.
277277+/// panics if called outside a tokio runtime context.
253278pub fn validate_sync<'c>(
254279 msg: &'c Sync<'c>,
255280 signing_key: Option<&PublicKey>,
256256- handle: &tokio::runtime::Handle,
257281) -> Result<ValidatedSync, SyncValidationError> {
282282+ let handle = tokio::runtime::Handle::current();
258283 const MAX_BLOCKS_BYTES: usize = 2_097_152;
259284260285 // 1. size limit
···287312 if let Some(key) = signing_key {
288313 commit_obj
289314 .verify(key)
290290- .map_err(|_| SyncValidationError::SigFailure { refreshed: false })?;
315315+ .map_err(|_| SyncValidationError::SigFailure)?;
291316 }
292317293318 Ok(ValidatedSync {
294294- data_cid: Cid::ipld(commit_obj.data).into_static(),
295295- rev: commit_obj.rev.to_string(),
319319+ commit_obj: commit_obj.into_static(),
296320 })
297321}
298322299299-/// compare msg chain pointers against known repo state and return a `ChainBreak` if inconsistent.
300300-fn chain_break_check(msg: &Commit<'_>, repo_state: Option<&RepoState>) -> Option<ChainBreak> {
301301- let state = repo_state?;
302302- let root = state.root.as_ref()?;
303303-323323+fn breaks_chain(msg: &Commit<'_>, root: &crate::types::Commit) -> ChainBreak {
304324 // since should equal the rev of the previous commit; only flag when since is present and wrong
305325 let since_mismatch = msg
306326 .since
···317337 None => true, // no prev_data but we have a previous state is a chain break
318338 };
319339320320- if since_mismatch || prev_data_mismatch {
321321- Some(ChainBreak {
322322- since_mismatch,
323323- prev_data_mismatch,
324324- })
325325- } else {
326326- None
340340+ ChainBreak {
341341+ since_mismatch,
342342+ prev_data_mismatch,
327343 }
328344}
329345
+198-259
src/ingest/worker.rs
···11+use super::*;
12use crate::db::{self, keys};
23use crate::filter::FilterMode;
34use crate::ingest::stream::{Account, Commit, Identity, SubscribeReposMessage, Sync};
45use crate::ingest::validation::{
55- CommitValidationError, SyncValidationError, ValidationOptions, validate_commit, validate_sync,
66+ CommitValidationError, SyncValidationError, ValidatedCommit, ValidatedSync, ValidationContext,
77+ ValidationOptions,
68};
77-use crate::ingest::{BufferRx, IngestMessage};
89use crate::ops;
910use crate::resolver::{NoSigningKeyError, ResolverError};
1011use crate::state::AppState;
···14151516use jacquard_common::IntoStatic;
1617use jacquard_common::cowstr::ToCowStr;
1717-use jacquard_common::types::crypto::PublicKey;
1818use jacquard_common::types::did::Did;
1919use jacquard_repo::error::CommitError;
2020use miette::{Diagnostic, IntoDiagnostic, Result};
2121use rand::Rng;
2222-use smol_str::ToSmolStr;
2322use std::collections::hash_map::DefaultHasher;
2423use std::hash::{Hash, Hasher};
2524use std::sync::Arc;
2625use std::sync::atomic::Ordering::SeqCst;
2726use thiserror::Error;
2727+use tokio::runtime::Handle;
2828use tokio::sync::mpsc;
2929use tracing::{debug, error, info, trace, warn};
3030···5252 }
5353}
54545555-enum HostAuthorityOutcome {
5656- /// stored pds matched the source host immediately.
5757- Authorized,
5858- /// pds migrated: doc now points to this host, but our stored state was stale. trigger backfill.
5959- Migration,
6060- /// host did not match even after doc resolution. reject the message.
6161- WrongHost,
6262-}
6363-6455// gate returned by check_repo_state, tells the shard loop what to do with the message
6556enum ProcessGate<'s, 'c> {
6657 // did not exist in db, newly queued for backfill, drop
···10192 added_blocks: &'a mut i64,
10293 records_delta: &'a mut i64,
10394 broadcast_events: &'a mut Vec<BroadcastEvent>,
104104- handle: &'a tokio::runtime::Handle,
105105- validation_opts: &'a ValidationOptions,
9595+ vctx: ValidationContext<'a>,
10696}
1079710898impl FirehoseWorker {
···127117 // starts the worker threads and the main dispatch loop
128118 // the dispatch loop reads from the firehose channel and
129119 // distributes messages to shards based on the hash of the DID
130130- pub fn run(mut self, handle: tokio::runtime::Handle) -> Result<()> {
120120+ pub fn run(mut self, handle: Handle) -> Result<()> {
131121 let mut shards = Vec::with_capacity(self.num_shards);
132122133123 for i in 0..self.num_shards {
···191181 state: Arc<AppState>,
192182 verify_signatures: bool,
193183 ephemeral: bool,
194194- handle: tokio::runtime::Handle,
184184+ handle: Handle,
195185 validation_opts: Arc<ValidationOptions>,
196186 ) {
197187 let _guard = handle.enter();
···212202 added_blocks: &mut added_blocks,
213203 records_delta: &mut records_delta,
214204 broadcast_events: &mut broadcast_events,
215215- handle: &handle,
205205+ vctx: ValidationContext {
206206+ opts: &validation_opts,
207207+ },
216208 verify_signatures,
217209 ephemeral,
218218- validation_opts: &validation_opts,
219210 };
220211221212 match msg {
···258249 }
259250 }
260251 }
261261- IngestMessage::Firehose { relay, is_pds, msg } => {
262262- let _span = tracing::info_span!("firehose", relay = %relay).entered();
263263- // only enforce host authority when the source is a direct PDS connection
264264- let source_host = is_pds.then(|| relay.host_str()).flatten();
252252+ IngestMessage::Firehose {
253253+ relay: firehose,
254254+ is_pds,
255255+ msg,
256256+ } => {
257257+ let _span = tracing::info_span!("firehose", relay = %firehose).entered();
265258 let (did, seq) = match &msg {
266259 SubscribeReposMessage::Commit(c) => (&c.repo, c.seq),
267260 SubscribeReposMessage::Identity(i) => (&i.did, i.seq),
···278271 }
279272 error!(did = %did, err = %e, "error in check_repo_state");
280273 state
281281- .relay_cursors
282282- .peek_with(&relay, |_, c| c.store(seq, SeqCst));
274274+ .firehose_cursors
275275+ .peek_with(&firehose, |_, c| c.store(seq, SeqCst));
283276 continue;
284277 }
285278 };
···299292 }
300293 }
301294 ProcessGate::Ready(mut repo_state) => {
295295+ // first validate the pds host
296296+ if let Some(host) = firehose.host_str()
297297+ && is_pds
298298+ {
299299+ let authority = match Self::check_host_authority(
300300+ &mut ctx,
301301+ did,
302302+ &mut repo_state,
303303+ host,
304304+ ) {
305305+ Ok(a) => a,
306306+ Err(e) => {
307307+ error!(did = %did, err = %e, "failed to check host authority");
308308+ state
309309+ .firehose_cursors
310310+ .peek_with(&firehose, |_, c| c.store(seq, SeqCst));
311311+ continue;
312312+ }
313313+ };
314314+ match authority {
315315+ AuthorityOutcome::Authorized => {}
316316+ AuthorityOutcome::WasStale => {
317317+ // pds migrated: our data may be stale, backfill from the new host
318318+ warn!(did = %did, source_host = host, "pds migration detected, triggering backfill");
319319+ if let Err(e) =
320320+ Self::trigger_backfill(&mut ctx, did, repo_state)
321321+ {
322322+ error!(did = %did, err = %e, "failed to trigger backfill");
323323+ } else if let SubscribeReposMessage::Commit(commit) = &msg {
324324+ if let Err(e) = ops::persist_to_resync_buffer(
325325+ &state.db, did, commit,
326326+ ) {
327327+ error!(
328328+ did = %did, err = %e,
329329+ "failed to persist commit to resync_buffer"
330330+ );
331331+ }
332332+ }
333333+ state
334334+ .firehose_cursors
335335+ .peek_with(&firehose, |_, c| c.store(seq, SeqCst));
336336+ continue;
337337+ }
338338+ // todo: ideally ban pds
339339+ AuthorityOutcome::WrongHost { expected } => {
340340+ warn!(did = %did, got = host, expected = %expected, "commit rejected: wrong host");
341341+ state
342342+ .firehose_cursors
343343+ .peek_with(&firehose, |_, c| c.store(seq, SeqCst));
344344+ continue;
345345+ }
346346+ }
347347+ }
348348+302349 let pre_status = repo_state.status.clone();
303350304351 // if it was in deactivated/takendown/suspended state, we can mark it
···332379 "failed to transition inactive repo to synced"
333380 );
334381 state
335335- .relay_cursors
336336- .peek_with(&relay, |_, c| c.store(seq, SeqCst));
382382+ .firehose_cursors
383383+ .peek_with(&firehose, |_, c| {
384384+ c.store(seq, SeqCst)
385385+ });
337386 continue;
338387 }
339388 }
···341390 }
342391 }
343392344344- match Self::process_message(
345345- &mut ctx,
346346- &msg,
347347- did,
348348- repo_state,
349349- pre_status,
350350- source_host,
351351- ) {
393393+ match Self::process_message(&mut ctx, &msg, did, repo_state, pre_status)
394394+ {
352395 Ok(RepoProcessResult::Ok(_)) => {}
353396 Ok(RepoProcessResult::Deleted) => {
354397 state.db.update_count("repos", -1);
···386429 }
387430 }
388431389389- // todo: consider not using seqcst
390432 state
391391- .relay_cursors
392392- .peek_with(&relay, |_, c| c.store(seq, SeqCst));
433433+ .firehose_cursors
434434+ .peek_with(&firehose, |_, c| c.store(seq, SeqCst));
393435 }
394436 }
395437···407449 let _ = state.db.event_tx.send(evt);
408450 }
409451410410- state.db.inner.persist(fjall::PersistMode::Buffer).ok();
452452+ // state.db.inner.persist(fjall::PersistMode::Buffer).ok();
411453 }
412454 }
413455···428470 did: &Did,
429471 repo_state: RepoState<'s>,
430472 pre_status: RepoStatus,
431431- source_host: Option<&str>,
432473 ) -> Result<RepoProcessResult<'s, 'c>, IngestError> {
433474 match msg {
434475 SubscribeReposMessage::Commit(commit) => {
435476 trace!(did = %did, "processing commit");
436436- Self::handle_commit(ctx, did, repo_state, commit, source_host)
477477+ Self::handle_commit(ctx, did, repo_state, commit)
437478 }
438479 SubscribeReposMessage::Sync(sync) => {
439480 debug!(did = %did, "processing sync");
440440- Self::handle_sync(ctx, did, repo_state, sync, source_host)
481481+ Self::handle_sync(ctx, did, repo_state, sync)
441482 }
442483 SubscribeReposMessage::Identity(identity) => {
443484 debug!(did = %did, "processing identity");
···459500 did: &Did,
460501 mut repo_state: RepoState<'s>,
461502 commit: &'c Commit<'c>,
462462- source_host: Option<&str>,
463503 ) -> Result<RepoProcessResult<'s, 'c>, IngestError> {
464504 repo_state.advance_message_time(commit.time.0.timestamp_millis());
465505466466- if let Some(host) = source_host {
467467- match Self::check_host_authority(ctx, did, &mut repo_state, host)? {
468468- HostAuthorityOutcome::Authorized => {}
469469- HostAuthorityOutcome::Migration => {
470470- // pds migrated: our data may be stale, backfill from the new host
471471- warn!(did = %did, source_host = host, "pds migration detected, triggering backfill");
472472- let mut batch = ctx.state.db.inner.batch();
473473- let _repo_state = ops::update_repo_status(
474474- &mut batch,
475475- &ctx.state.db,
476476- did,
477477- repo_state,
478478- RepoStatus::Backfilling,
479479- )?;
480480- batch.commit().into_diagnostic()?;
481481- ctx.state
482482- .db
483483- .update_gauge_diff(&GaugeState::Synced, &GaugeState::Pending);
484484- ctx.state.notify_backfill();
485485- return Ok(RepoProcessResult::NeedsBackfill(Some(commit)));
486486- }
487487- // todo: ideally ban pds
488488- HostAuthorityOutcome::WrongHost => {
489489- warn!(did = %did, source_host = host, pds = ?repo_state.pds, "commit rejected: wrong host");
490490- return Ok(RepoProcessResult::Ok(repo_state));
491491- }
492492- }
493493- }
494494-495495- // validate the commit: stale rev, size limits, future rev, CAR parse, field
496496- // consistency, signature, and chain-break detection
497497- let signing_key = Self::fetch_key(ctx, did)?;
498498- let validated = match validate_commit(
499499- commit,
500500- Some(&repo_state),
501501- signing_key.as_ref(),
502502- ctx.validation_opts,
503503- ctx.handle,
504504- ) {
505505- Ok(v) => v,
506506- Err(CommitValidationError::StaleRev) => {
507507- debug!(
508508- did = %did,
509509- commit_rev = %commit.rev,
510510- "skipping replayed commit"
511511- );
512512- return Ok(RepoProcessResult::Ok(repo_state));
513513- }
514514- Err(CommitValidationError::SigFailure { .. }) => {
515515- // refresh key and retry once
516516- Self::refresh_doc(ctx, &mut repo_state, did)?;
517517- let refreshed_key = Self::fetch_key(ctx, did)?;
518518- match validate_commit(
519519- commit,
520520- Some(&repo_state),
521521- refreshed_key.as_ref(),
522522- ctx.validation_opts,
523523- ctx.handle,
524524- ) {
525525- Ok(v) => v,
526526- Err(e) => {
527527- warn!(did = %did, err = %e, "commit rejected after key refresh");
528528- return Ok(RepoProcessResult::Ok(repo_state));
529529- }
530530- }
531531- }
532532- Err(e) => {
533533- warn!(did = %did, err = %e, "commit rejected");
534534- return Ok(RepoProcessResult::Ok(repo_state));
535535- }
506506+ let Some(validated) = ctx.validate_commit(did, &mut repo_state, commit)? else {
507507+ return Ok(RepoProcessResult::Ok(repo_state));
536508 };
537509538538- // chain break: prev_data or since mismatch against last known state → backfill
539539- if let Some(cb) = &validated.chain_break {
510510+ if validated.chain_break.is_broken() {
540511 warn!(
541512 did = %did,
542542- since_mismatch = cb.since_mismatch,
543543- prev_data_mismatch = cb.prev_data_mismatch,
513513+ broken = ?validated.chain_break,
544514 "chain break detected, triggering backfill"
545515 );
546546- let mut batch = ctx.state.db.inner.batch();
547547- let _repo_state = ops::update_repo_status(
548548- &mut batch,
549549- &ctx.state.db,
550550- did,
551551- repo_state,
552552- RepoStatus::Backfilling,
553553- )?;
554554- batch.commit().into_diagnostic()?;
555555- ctx.state
556556- .db
557557- .update_gauge_diff(&GaugeState::Synced, &GaugeState::Pending);
558558- ctx.state.notify_backfill();
516516+ Self::trigger_backfill(ctx, did, repo_state)?;
517517+ // not updating repo state root commit since we are backfilling anyway
559518 return Ok(RepoProcessResult::NeedsBackfill(Some(commit)));
560519 }
561520···582541 did: &Did,
583542 mut repo_state: RepoState<'s>,
584543 sync: &'c Sync<'c>,
585585- source_host: Option<&str>,
586544 ) -> Result<RepoProcessResult<'s, 'c>, IngestError> {
587545 repo_state.advance_message_time(sync.time.0.timestamp_millis());
588546589589- if let Some(host) = source_host {
590590- match Self::check_host_authority(ctx, did, &mut repo_state, host)? {
591591- HostAuthorityOutcome::Authorized | HostAuthorityOutcome::Migration => {
592592- // migration is fine here — sync already triggers a backfill below
593593- }
594594- // todo: ideally ban pds
595595- HostAuthorityOutcome::WrongHost => {
596596- warn!(did = %did, source_host = host, pds = ?repo_state.pds, "sync rejected: wrong host");
597597- return Ok(RepoProcessResult::Ok(repo_state));
598598- }
599599- }
600600- }
601601-602602- // validate: size limit, CAR parse, field consistency, signature
603603- let signing_key = Self::fetch_key(ctx, did)?;
604604- let validated = match validate_sync(sync, signing_key.as_ref(), ctx.handle) {
605605- Ok(v) => v,
606606- Err(SyncValidationError::SigFailure { .. }) => {
607607- // refresh key and retry once (same pattern as handle_commit)
608608- Self::refresh_doc(ctx, &mut repo_state, did)?;
609609- let refreshed_key = Self::fetch_key(ctx, did)?;
610610- match validate_sync(sync, refreshed_key.as_ref(), ctx.handle) {
611611- Ok(v) => v,
612612- Err(e) => {
613613- warn!(did = %did, err = %e, "sync rejected after key refresh");
614614- return Ok(RepoProcessResult::Ok(repo_state));
615615- }
616616- }
617617- }
618618- Err(e) => {
619619- warn!(did = %did, err = %e, "sync rejected");
620620- return Ok(RepoProcessResult::Ok(repo_state));
621621- }
547547+ let Some(validated) = ctx.validate_sync(did, &mut repo_state, sync)? else {
548548+ return Ok(RepoProcessResult::Ok(repo_state));
622549 };
623550624551 // skip noop syncs (data CID unchanged)
625552 if let Some(current_commit) = &repo_state.root {
626626- if current_commit.data == validated.data_cid.to_ipld().expect("valid cid") {
553553+ if current_commit.data == validated.commit_obj.data {
627554 debug!(did = %did, "skipping noop sync");
628555 return Ok(RepoProcessResult::Ok(repo_state));
629556 }
630557631631- if validated.rev.as_str() <= current_commit.rev.to_tid().as_str() {
558558+ if validated.commit_obj.rev.as_str() <= current_commit.rev.to_tid().as_str() {
632559 debug!(did = %did, "skipping replayed sync");
633560 return Ok(RepoProcessResult::Ok(repo_state));
634561 }
635562 }
563563+ // not updating repo state root commit since we are backfilling anyway
636564637565 warn!(did = %did, "sync event, triggering backfill");
638638- let mut batch = ctx.state.db.inner.batch();
639639- repo_state = ops::update_repo_status(
640640- &mut batch,
641641- &ctx.state.db,
642642- did,
643643- repo_state,
644644- RepoStatus::Backfilling,
645645- )?;
646646- batch.commit().into_diagnostic()?;
647647- ctx.state
648648- .db
649649- .update_gauge_diff(&GaugeState::Synced, &GaugeState::Pending);
650650- ctx.state.notify_backfill();
566566+ let repo_state = Self::trigger_backfill(ctx, did, repo_state)?;
651567 Ok(RepoProcessResult::Ok(repo_state))
652568 }
653569···664580 }
665581 repo_state.advance_message_time(event_ms);
666582583583+ // todo: make this match relay sync behaviour
667584 let changed = if identity.handle.is_none() {
668585 // no handle sent is basically "invalidate your caches"
669586 ctx.state.resolver.invalidate_sync(did);
670670- let doc = ctx.handle.block_on(ctx.state.resolver.resolve_doc(did))?;
587587+ let doc = Handle::current().block_on(ctx.state.resolver.resolve_doc(did))?;
671588 repo_state.update_from_doc(doc)
672589 } else {
673590 let old_handle = repo_state.handle.clone();
···724641 status: account.status.as_ref().map(|s| s.to_cowstr().into_static()),
725642 };
726643727727- Self::refresh_doc(ctx, &mut repo_state, did)?;
644644+ ctx.refresh_doc(&mut repo_state, did)?;
728645729646 if !account.active {
730647 use crate::ingest::stream::AccountStatus;
···735652 return Ok(RepoProcessResult::Deleted);
736653 }
737654 status => {
738738- let target_status = match status {
739739- Some(status) => match status {
740740- AccountStatus::Deleted => {
741741- unreachable!("deleted account status is handled before")
742742- }
743743- AccountStatus::Takendown => RepoStatus::Takendown,
744744- AccountStatus::Suspended => RepoStatus::Suspended,
745745- AccountStatus::Deactivated => RepoStatus::Deactivated,
746746- AccountStatus::Throttled => RepoStatus::Error("throttled".into()),
747747- AccountStatus::Desynchronized => {
748748- RepoStatus::Error("desynchronized".into())
749749- }
750750- AccountStatus::Other(s) => {
751751- warn!(
752752- did = %did, status = %s,
753753- "unknown account status, will put in error state"
754754- );
755755- RepoStatus::Error(s.to_smolstr())
756756- }
757757- },
758758- None => {
759759- warn!(did = %did, "account inactive but no status provided");
760760- RepoStatus::Error("unknown".into())
761761- }
762762- };
655655+ let target_status = inactive_account_repo_status(did, status);
763656764657 if repo_state.status == target_status {
765658 debug!(did = %did, ?target_status, "account status unchanged");
···923816 let commit: Commit = rmp_serde::from_slice(&value).into_diagnostic()?;
924817925818 // buffered commits have already been source-checked on arrival; skip host check
926926- let res = Self::handle_commit(ctx, did, repo_state, &commit, None);
819819+ let res = Self::handle_commit(ctx, did, repo_state, &commit);
927820 let res = match res {
928821 Ok(r) => r,
929822 Err(e) => {
···952845 Ok(RepoProcessResult::Ok(repo_state))
953846 }
954847955955- /// check that `source_host` is the authoritative PDS for `did`.
956956- ///
957957- /// - `Authorized`: stored pds matched immediately (fast path).
958958- /// - `Migration`: stored pds was wrong but doc resolved to this host; caller should backfill.
959959- /// - `WrongHost`: host did not match even after doc resolution; caller should reject.
848848+ // transitions repo to Backfilling, commits the status change immediately (separate from
849849+ // ctx.batch), updates the gauge, and pings the backfill worker. returns the updated state.
850850+ fn trigger_backfill<'s>(
851851+ ctx: &mut WorkerContext,
852852+ did: &Did,
853853+ repo_state: RepoState<'s>,
854854+ ) -> Result<RepoState<'s>, IngestError> {
855855+ let mut batch = ctx.state.db.inner.batch();
856856+ let repo_state = ops::update_repo_status(
857857+ &mut batch,
858858+ &ctx.state.db,
859859+ did,
860860+ repo_state,
861861+ RepoStatus::Backfilling,
862862+ )?;
863863+ batch.commit().into_diagnostic()?;
864864+ ctx.state
865865+ .db
866866+ .update_gauge_diff(&GaugeState::Synced, &GaugeState::Pending);
867867+ ctx.state.notify_backfill();
868868+ Ok(repo_state)
869869+ }
870870+960871 fn check_host_authority(
961872 ctx: &mut WorkerContext,
962873 did: &Did,
963874 repo_state: &mut RepoState,
964875 source_host: &str,
965965- ) -> Result<HostAuthorityOutcome, IngestError> {
966966- let pds_host = repo_state
967967- .pds
968968- .as_deref()
969969- .and_then(|pds| url::Url::parse(pds).ok())
970970- .and_then(|u| u.host_str().map(str::to_owned));
971971-972972- if pds_host.as_deref() == Some(source_host) {
973973- return Ok(HostAuthorityOutcome::Authorized);
876876+ ) -> Result<AuthorityOutcome, IngestError> {
877877+ let outcome =
878878+ super::check_host_authority(&ctx.state.resolver, did, repo_state, source_host)?;
879879+ if !matches!(outcome, AuthorityOutcome::Authorized) {
880880+ ctx.batch.insert(
881881+ &ctx.state.db.repos,
882882+ keys::repo_key(did),
883883+ crate::db::ser_repo_state(repo_state)?,
884884+ );
974885 }
975975-976976- // unknown pds or host mismatch — resolve doc to verify or detect a migration
977977- Self::refresh_doc(ctx, repo_state, did)?;
978978-979979- let updated_host = repo_state
980980- .pds
981981- .as_deref()
982982- .and_then(|pds| url::Url::parse(pds).ok())
983983- .and_then(|u| u.host_str().map(str::to_owned));
984984-985985- if updated_host.as_deref() == Some(source_host) {
986986- Ok(HostAuthorityOutcome::Migration)
987987- } else {
988988- Ok(HostAuthorityOutcome::WrongHost)
989989- }
886886+ Ok(outcome)
990887 }
888888+}
991889992992- // refreshes the handle, pds url and signing key of a did
993993- fn refresh_doc(
994994- ctx: &mut WorkerContext,
995995- repo_state: &mut RepoState,
996996- did: &Did,
997997- ) -> Result<(), IngestError> {
998998- ctx.state.resolver.invalidate_sync(did);
999999- let doc = ctx.handle.block_on(ctx.state.resolver.resolve_doc(did))?;
10001000- repo_state.update_from_doc(doc);
10011001- repo_state.touch();
10021002- ctx.batch.insert(
10031003- &ctx.state.db.repos,
890890+impl WorkerContext<'_> {
891891+ fn refresh_doc(&mut self, repo_state: &mut RepoState, did: &Did) -> Result<(), IngestError> {
892892+ super::refresh_doc(&self.state.resolver, did, repo_state)?;
893893+ self.batch.insert(
894894+ &self.state.db.repos,
1004895 keys::repo_key(did),
10051005- crate::db::ser_repo_state(&repo_state)?,
896896+ crate::db::ser_repo_state(repo_state)?,
1006897 );
1007898 Ok(())
1008899 }
100990010101010- fn fetch_key(
10111011- ctx: &WorkerContext,
901901+ fn fetch_key(&self, did: &Did) -> Result<Option<PublicKey<'static>>> {
902902+ super::fetch_key(&self.state.resolver, self.verify_signatures, did)
903903+ }
904904+905905+ fn validate_commit<'s, 'c>(
906906+ &mut self,
1012907 did: &Did,
10131013- ) -> Result<Option<PublicKey<'static>>, IngestError> {
10141014- if ctx.verify_signatures {
10151015- let key = ctx
10161016- .handle
10171017- .block_on(ctx.state.resolver.resolve_signing_key(did))?;
10181018- Ok(Some(key))
10191019- } else {
10201020- Ok(None)
908908+ repo_state: &mut RepoState<'s>,
909909+ commit: &'c Commit<'c>,
910910+ ) -> Result<Option<ValidatedCommit<'c>>, IngestError> {
911911+ let key = self.fetch_key(did)?;
912912+ match self.vctx.validate_commit(commit, repo_state, key.as_ref()) {
913913+ Ok(v) => return Ok(Some(v)),
914914+ Err(CommitValidationError::StaleRev) => {
915915+ debug!(did = %did, commit_rev = %commit.rev, "skipping replayed commit");
916916+ return Ok(None);
917917+ }
918918+ Err(CommitValidationError::SigFailure) => {}
919919+ Err(e) => {
920920+ warn!(did = %did, err = %e, "commit rejected");
921921+ return Ok(None);
922922+ }
923923+ }
924924+925925+ self.refresh_doc(repo_state, did)?;
926926+ let key = self.fetch_key(did)?;
927927+ match self.vctx.validate_commit(commit, repo_state, key.as_ref()) {
928928+ Ok(v) => Ok(Some(v)),
929929+ Err(e) => {
930930+ warn!(did = %did, err = %e, "commit rejected after key refresh");
931931+ Ok(None)
932932+ }
933933+ }
934934+ }
935935+936936+ fn validate_sync<'s>(
937937+ &mut self,
938938+ did: &Did,
939939+ repo_state: &mut RepoState<'s>,
940940+ sync: &Sync<'_>,
941941+ ) -> Result<Option<ValidatedSync>, IngestError> {
942942+ let key = self.fetch_key(did)?;
943943+ match self.vctx.validate_sync(sync, key.as_ref()) {
944944+ Ok(v) => return Ok(Some(v)),
945945+ Err(SyncValidationError::SigFailure) => {}
946946+ Err(e) => {
947947+ warn!(did = %did, err = %e, "sync rejected");
948948+ return Ok(None);
949949+ }
950950+ }
951951+952952+ self.refresh_doc(repo_state, did)?;
953953+ let key = self.fetch_key(did)?;
954954+ match self.vctx.validate_sync(sync, key.as_ref()) {
955955+ Ok(v) => Ok(Some(v)),
956956+ Err(e) => {
957957+ warn!(did = %did, err = %e, "sync rejected after key refresh");
958958+ Ok(None)
959959+ }
1021960 }
1022961 }
1023962}
+8
src/lib.rs
···33pub mod filter;
44pub mod types;
5566+#[cfg(all(feature = "relay", feature = "events", not(debug_assertions)))]
77+compile_error!("`relay` and `events` features are mutually exclusive");
88+99+#[cfg(all(feature = "relay", feature = "backlinks", not(debug_assertions)))]
1010+compile_error!("`relay` and `backlinks` features are mutually exclusive");
1111+612pub(crate) mod api;
1313+#[cfg(feature = "events")]
714pub(crate) mod backfill;
815#[cfg(feature = "backlinks")]
916pub(crate) mod backlinks;
1017pub(crate) mod crawler;
1118pub(crate) mod db;
1219pub(crate) mod ingest;
2020+#[cfg(feature = "events")]
1321pub(crate) mod ops;
1422pub(crate) mod resolver;
1523pub(crate) mod state;
+2-4
src/state.rs
···1616 pub db: Db,
1717 pub resolver: Resolver,
1818 pub(crate) filter: FilterHandle,
1919- /// per-relay firehose cursors. values use interior mutability so they can be
2020- /// updated through the lock-free `peek_with` reads in the ingest worker.
2121- pub relay_cursors: scc::HashIndex<Url, AtomicI64>,
1919+ pub firehose_cursors: scc::HashIndex<Url, AtomicI64>,
2220 pub backfill_notify: Notify,
2321 pub crawler_enabled: watch::Sender<bool>,
2422 pub firehose_enabled: watch::Sender<bool>,
···5351 db,
5452 resolver,
5553 filter,
5656- relay_cursors,
5454+ firehose_cursors: relay_cursors,
5755 backfill_notify: Notify::new(),
5856 crawler_enabled,
5957 firehose_enabled,