···263263 also returns the handle, PDS URL and the atproto signing key (these won't be
264264 available before the repo has been backfilled once at least).
265265- `PUT /repos`: explicitly track repositories. accepts an NDJSON body of `{"did": "..."}` (or JSON array of the same).
266266-- `DELETE /repos`: untrack repositories. accepts an NDJSON body of `{"did": "..."}` (or JSON array of the same).
266266+ only affects repositories that are not known or are untracked.
267267+ returns a JSON array of the DIDs that were tracked.
268268+- `DELETE /repos`: untrack repositories.
269269+ accepts an NDJSON body of `{"did": "..."}` (or JSON array of the same).
270270+ only affects repositories that are currently tracked.
271271+ returns a JSON array of the DIDs that were untracked.
272272+- `POST /repos/resync`: force a new backfill for one or more repositories.
273273+ accepts an NDJSON body of `{"did": "..."}` (or JSON array of the same).
274274+ only affects repositories hydrant already knows about.
275275+ returns a JSON array of the DIDs that were queued.
267276268277### database operations
269278
···11use std::sync::Arc;
2233use chrono::{DateTime, Utc};
44+use fjall::OwnedWriteBatch;
45use jacquard_common::cowstr::ToCowStr;
56use jacquard_common::types::cid::{Cid, IpldCid};
67use jacquard_common::types::ident::AtIdentifier;
···1314use url::Url;
14151516use crate::db::types::DbRkey;
1616-use crate::db::{self, keys, ser_repo_state};
1717+use crate::db::{self, Db, keys, ser_repo_state};
1718use crate::state::AppState;
1819use crate::types::{GaugeState, RepoState, RepoStatus};
1920···8485 })
8586 }
86878787- /// fetch the current state of a single repository. returns `None` if hydrant
8888- /// has never seen this DID.
8888+ /// fetch the current state of repository.
8989+ /// returns `None` if hydrant has never seen this repository.
8990 pub async fn info(&self, did: &Did<'_>) -> Result<Option<RepoInfo>> {
9091 self.get(did)?.info().await
9192 }
92939494+ fn _resync(
9595+ db: &Db,
9696+ did: &Did<'_>,
9797+ batch: &mut OwnedWriteBatch,
9898+ transitions: &mut Vec<(GaugeState, GaugeState)>,
9999+ ) -> Result<bool> {
100100+ let did_key = keys::repo_key(did);
101101+ let repo_bytes = db.repos.get(&did_key).into_diagnostic()?;
102102+ let existing = repo_bytes
103103+ .as_deref()
104104+ .map(db::deser_repo_state)
105105+ .transpose()?;
106106+107107+ if let Some(mut repo_state) = existing {
108108+ let resync = db.resync.get(&did_key).into_diagnostic()?;
109109+ let old = db::Db::repo_gauge_state(&repo_state, resync.as_deref());
110110+ repo_state.tracked = true;
111111+ repo_state.status = RepoStatus::Backfilling;
112112+ batch.insert(&db.repos, &did_key, ser_repo_state(&repo_state)?);
113113+ batch.insert(
114114+ &db.pending,
115115+ keys::pending_key(repo_state.index_id),
116116+ &did_key,
117117+ );
118118+ batch.remove(&db.resync, &did_key);
119119+ transitions.push((old, GaugeState::Pending));
120120+ return Ok(true);
121121+ }
122122+123123+ Ok(false)
124124+ }
125125+126126+ /// request one or more repositories to be resynced.
127127+ ///
128128+ /// note that they may not immediately start backfilling if:
129129+ /// - other repos already filled the backfill concurrency limit,
130130+ /// - or there are many repos pending already.
131131+ pub async fn resync(
132132+ &self,
133133+ dids: impl IntoIterator<Item = Did<'_>>,
134134+ ) -> Result<Vec<Did<'static>>> {
135135+ let dids: Vec<Did<'static>> = dids.into_iter().map(|d| d.into_static()).collect();
136136+ let state = self.0.clone();
137137+138138+ let (queued, transitions) = tokio::task::spawn_blocking(move || {
139139+ let db = &state.db;
140140+ let mut batch = db.inner.batch();
141141+ let mut queued: Vec<Did<'static>> = Vec::new();
142142+ let mut transitions: Vec<(GaugeState, GaugeState)> = Vec::new();
143143+144144+ for did in dids {
145145+ if Self::_resync(db, &did, &mut batch, &mut transitions)? {
146146+ queued.push(did);
147147+ }
148148+ }
149149+150150+ batch.commit().into_diagnostic()?;
151151+ Ok::<_, miette::Report>((queued, transitions))
152152+ })
153153+ .await
154154+ .into_diagnostic()??;
155155+156156+ for (old, new) in transitions {
157157+ self.0.db.update_gauge_diff_async(&old, &new).await;
158158+ }
159159+ if !queued.is_empty() {
160160+ self.0.notify_backfill();
161161+ }
162162+163163+ Ok(queued)
164164+ }
165165+93166 /// explicitly track one or more repositories, enqueuing them for backfill if needed.
94167 ///
9595- /// - if a DID is new, a fresh [`RepoState`] is created and backfill is queued.
9696- /// - if a DID is already known but untracked, it is marked tracked and re-enqueued.
9797- /// - if a DID is already tracked, this is a no-op.
9898- pub async fn track(&self, dids: impl IntoIterator<Item = Did<'_>>) -> Result<()> {
168168+ /// - if a repo is new, a fresh [`RepoState`] is created and backfill is queued.
169169+ /// - if a repo is already known but untracked, it is marked tracked and re-enqueued.
170170+ /// - if a repo is already tracked, this is a no-op.
171171+ pub async fn track(
172172+ &self,
173173+ dids: impl IntoIterator<Item = Did<'_>>,
174174+ ) -> Result<Vec<Did<'static>>> {
99175 let dids: Vec<Did<'static>> = dids.into_iter().map(|d| d.into_static()).collect();
100176 let state = self.0.clone();
101177102102- let (new_count, transitions) = tokio::task::spawn_blocking(move || {
178178+ let (new_count, queued, transitions) = tokio::task::spawn_blocking(move || {
103179 let db = &state.db;
104180 let mut batch = db.inner.batch();
105181 let mut added = 0i64;
182182+ let mut queued: Vec<Did<'static>> = Vec::new();
106183 let mut transitions: Vec<(GaugeState, GaugeState)> = Vec::new();
107184 let mut rng = rand::rng();
108185109109- for did in &dids {
110110- let did_key = keys::repo_key(did);
186186+ for did in dids {
187187+ let did_key = keys::repo_key(&did);
111188 let repo_bytes = db.repos.get(&did_key).into_diagnostic()?;
112189 let existing = repo_bytes
113190 .as_deref()
114191 .map(db::deser_repo_state)
115192 .transpose()?;
116193117117- if let Some(mut repo_state) = existing {
118118- if !repo_state.tracked {
119119- let resync = db.resync.get(&did_key).into_diagnostic()?;
120120- let old = db::Db::repo_gauge_state(&repo_state, resync.as_deref());
121121- repo_state.tracked = true;
122122- batch.insert(&db.repos, &did_key, ser_repo_state(&repo_state)?);
123123- batch.insert(
124124- &db.pending,
125125- keys::pending_key(repo_state.index_id),
126126- &did_key,
127127- );
128128- batch.remove(&db.resync, &did_key);
129129- transitions.push((old, GaugeState::Pending));
194194+ if let Some(repo_state) = existing {
195195+ // the double read here is an ok tradeoff, the block will be in read-cache anyway
196196+ if !repo_state.tracked && Self::_resync(db, &did, &mut batch, &mut transitions)?
197197+ {
198198+ queued.push(did);
130199 }
131200 } else {
132201 let repo_state = RepoState::backfilling(rng.next_u64());
···137206 &did_key,
138207 );
139208 added += 1;
209209+ queued.push(did);
140210 transitions.push((GaugeState::Synced, GaugeState::Pending));
141211 }
142212 }
143213144214 batch.commit().into_diagnostic()?;
145145- Ok::<_, miette::Report>((added, transitions))
215215+ Ok::<_, miette::Report>((added, queued, transitions))
146216 })
147217 .await
148218 .into_diagnostic()??;
···154224 self.0.db.update_gauge_diff_async(&old, &new).await;
155225 }
156226 self.0.notify_backfill();
157157- Ok(())
227227+ Ok(queued)
158228 }
159229160230 /// stop tracking one or more repositories. hydrant will stop processing new events
161231 /// for them and remove them from the pending/resync queues, but existing indexed
162232 /// records are **not** deleted.
163163- pub async fn untrack(&self, dids: impl IntoIterator<Item = Did<'_>>) -> Result<()> {
233233+ pub async fn untrack(
234234+ &self,
235235+ dids: impl IntoIterator<Item = Did<'_>>,
236236+ ) -> Result<Vec<Did<'static>>> {
164237 let dids: Vec<Did<'static>> = dids.into_iter().map(|d| d.into_static()).collect();
165238 let state = self.0.clone();
166239167167- let gauge_decrements = tokio::task::spawn_blocking(move || {
240240+ let (untracked, gauge_decrements) = tokio::task::spawn_blocking(move || {
168241 let db = &state.db;
169242 let mut batch = db.inner.batch();
243243+ let mut untracked: Vec<Did<'static>> = Vec::new();
170244 let mut gauge_decrements = Vec::new();
171245172172- for did in &dids {
173173- let did_key = keys::repo_key(did);
246246+ for did in dids {
247247+ let did_key = keys::repo_key(&did);
174248 let repo_bytes = db.repos.get(&did_key).into_diagnostic()?;
175249 let existing = repo_bytes
176250 .as_deref()
···189263 if old != GaugeState::Synced {
190264 gauge_decrements.push(old);
191265 }
266266+ untracked.push(did);
192267 }
193268 }
194269 }
195270196271 batch.commit().into_diagnostic()?;
197197- Ok::<_, miette::Report>(gauge_decrements)
272272+ Ok::<_, miette::Report>((untracked, gauge_decrements))
198273 })
199274 .await
200275 .into_diagnostic()??;
···205280 .update_gauge_diff_async(&gauge, &GaugeState::Synced)
206281 .await;
207282 }
208208- Ok(())
283283+ Ok(untracked)
209284 }
210285}
211286