···3434 - [x] #account: update account state
3535 - [x] #commit and #sync
3636- [x] make sure blocking db calls are in `spawn_blocking`!!
3737-- [~] db queries
3737+- [x] db queries
3838- [~] configuration
3939 - [~] copy applicable from tap
4040 - [ ] copy applicable from collectiondir
4141-- [~] sync1.1!!!
4141+- [x] sync1.1!!!
4242 - [x] verify #commit event
4343- - [ ] verify #sync event
4444- - [ ] inductive proof for #commits
4545-- [ ] actually firehose-index!!
4646- - [ ] extract collections-added/removed directly from CAR slice
4343+ - [x] verify #sync event
4444+ - [x] inductive proof for #commits
4545+- [~] actually firehose-index!!
4646+ - [x] extract collections-added/removed directly from CAR slice
4747 - [ ] (spend some time on tests here)
4848- - [ ] do the thing (write them to the db)
4848+ - [x] do the thing (write them to the db)
4949+ - [ ] swap in repo-stream
4950- [ ] lenient sync1.1
5051 - [ ] *don't* allow non-validating commits that look like sync1.1
5152 - [ ] rachet by PDS host: be lenient if we have never seen a sync1.1-looking commit, always strict after we see one.
+94
src/server/list_repos.rs
···11+//! XRPC handlers.
22+//!
33+//! Uses `jacquard-axum`'s `ExtractXrpc` extractor to deserialise query parameters
44+//! directly into the lexicon-generated request types.
55+66+use axum::{
77+ Json,
88+ extract::State,
99+ http::StatusCode,
1010+ response::{IntoResponse, Response},
1111+};
1212+use jacquard_api::com_atproto::sync::list_repos::{ListReposOutput, ListReposRequest, Repo};
1313+use jacquard_axum::ExtractXrpc;
1414+use jacquard_common::types::string::Did;
1515+use serde_json::json;
1616+1717+use crate::storage::{DbRef, error::StorageError};
1818+1919+pub enum ListReposError {
2020+ BadCursor,
2121+ StorageError,
2222+}
2323+2424+impl From<StorageError> for ListReposError {
2525+ fn from(e: StorageError) -> Self {
2626+ tracing::error!("Storage error: {e:#}");
2727+ Self::StorageError
2828+ }
2929+}
3030+3131+impl IntoResponse for ListReposError {
3232+ fn into_response(self) -> Response {
3333+ match self {
3434+ ListReposError::BadCursor => (
3535+ StatusCode::BAD_REQUEST,
3636+ Json(json!({
3737+ "error": "InvalidRequest",
3838+ "message": "the provided cursor was not valid",
3939+ })),
4040+ ),
4141+ ListReposError::StorageError => (
4242+ StatusCode::INTERNAL_SERVER_ERROR,
4343+ Json(json!({
4444+ "error": "InternalError",
4545+ "message": "Storage issue",
4646+ })),
4747+ ),
4848+ }
4949+ .into_response()
5050+ }
5151+}
5252+5353+/// Handler for `GET /xrpc/com.atproto.sync.listReposByCollection`.
5454+///
5555+/// Performs a cursor-paginated prefix scan over the rbc keyspace, returning
5656+/// up to `limit` DIDs that have at least one record in `collection`.
5757+///
5858+/// The cursor is the last DID from the previous page. On each request we
5959+/// scan for `limit + 1` results: if the extra result appears there is a next
6060+/// page, and we return the last DID of the current page as the next cursor.
6161+///
6262+/// the `limit` parameter is clamped at 10,000 instead of 2,000 as defined in
6363+/// the lexicon, because bluesky's own collectiondir only clamps at 10k.
6464+pub async fn list_repos(
6565+ State(db): State<DbRef>,
6666+ ExtractXrpc(req): ExtractXrpc<ListReposRequest>,
6767+) -> Result<Json<ListReposOutput<'static>>, ListReposError> {
6868+ let limit = req.limit.unwrap_or(500).clamp(1, 10_000) as usize;
6969+7070+ // Parse the cursor as a DID, if one was provided.
7171+ let cursor = req
7272+ .cursor
7373+ .map(Did::new_owned)
7474+ .transpose()
7575+ .map_err(|_| ListReposError::BadCursor)?;
7676+7777+ let (dids, next) = crate::storage::collection_index::scan_rbc_active(&db, None, cursor, limit)?;
7878+7979+ let repos = dids
8080+ .into_iter()
8181+ .map(|did| Repo {
8282+ did,
8383+ extra_data: None,
8484+ })
8585+ .collect();
8686+8787+ let next_cursor = next.map(|cursor| cursor.into());
8888+8989+ Ok(Json(ListReposOutput {
9090+ cursor: next_cursor,
9191+ repos,
9292+ extra_data: None,
9393+ }))
9494+}
···212212 Ok(db.ks.get(cbr(did, collection))?.is_some())
213213}
214214215215+/// Like [`scan_rbc`] but skips DIDs whose [`AccountStatus`] is not `Active`.
216216+///
217217+/// Each DID in the rbc scan receives a point-read against the repo info key to
218218+/// check its status before being included in the result. The returned cursor is
219219+/// the first DID of the next page (inclusive), so inactive DIDs interspersed
220220+/// with subsequent pages are filtered on the next call.
221221+pub fn scan_rbc_active(
222222+ db: &DbRef,
223223+ collection: Nsid<'_>,
224224+ cursor: Option<Did<'_>>,
225225+ limit: usize,
226226+) -> StorageResult<(Vec<Did<'static>>, Option<Did<'static>>)> {
227227+ use crate::storage::repo::{AccountStatus, get_status};
228228+229229+ let prefix = rbc_prefix(collection.clone());
230230+ let prefix_len = prefix.len();
231231+ let lower_did = cursor.map(rbc_suffix).unwrap_or_default();
232232+ let mut ranger = db.ks.range(prefixed_range(prefix.clone(), lower_did..));
233233+234234+ let mut dids = Vec::with_capacity(limit);
235235+ for guard in ranger.by_ref() {
236236+ let (k, _v) = guard.into_inner()?;
237237+ let did = rbc_parse_did(&k, prefix_len)?;
238238+239239+ if !matches!(get_status(db, did.clone())?, Some(AccountStatus::Active)) {
240240+ continue;
241241+ }
242242+243243+ dids.push(did);
244244+ if dids.len() >= limit {
245245+ break;
246246+ }
247247+ }
248248+249249+ let next = if let Some(guard) = ranger.next() {
250250+ let key = guard.key()?;
251251+ Some(rbc_parse_did(&key, prefix_len)?)
252252+ } else {
253253+ None
254254+ };
255255+256256+ Ok((dids, next))
257257+}
258258+215259/// Insert a `(collection, did)` pair into both the rbc and cbr indexes.
216260pub fn insert(db: &DbRef, did: Did<'_>, collection: Nsid<'_>) -> StorageResult<()> {
217261 let mut batch = db.database.batch();
+14
src/storage/repo.rs
···222222 Ok(RepoPrev { rev, prev_data })
223223}
224224225225+/// Read only the [`AccountStatus`] for `did` — cheaper than [`get`] because it
226226+/// skips the `RepoPrev` read and doesn't take a snapshot.
227227+///
228228+/// Returns `None` if the repo is not indexed.
229229+pub fn get_status(db: &DbRef, did: Did<'_>) -> StorageResult<Option<AccountStatus>> {
230230+ let k = key(did);
231231+ let Some(bytes) = db.ks.get(&k)? else {
232232+ return Ok(None);
233233+ };
234234+ let key_str = String::from_utf8_lossy(&k);
235235+ let info = decode_repo_info(&bytes, &key_str)?;
236236+ Ok(Some(info.status))
237237+}
238238+225239/// Insert a [`RepoInfo`] with `state = Pending` for `did` if no record exists.
226240///
227241/// Returns `true` if a new record was inserted, `false` if one already existed.