···1111};
1212use jacquard_api::com_atproto::sync::list_repos::{ListReposOutput, ListReposRequest, Repo};
1313use jacquard_axum::ExtractXrpc;
1414-use jacquard_common::types::string::Did;
1414+use jacquard_common::CowStr;
1515+use jacquard_common::types::string::{Cid as JCid, Did};
1516use serde_json::json;
16171718use crate::storage::{DbRef, error::StorageError};
···5051 }
5152}
52535353-/// Handler for `GET /xrpc/com.atproto.sync.listReposByCollection`.
5454+/// Handler for `GET /xrpc/com.atproto.sync.listRepos`.
5455///
5555-/// Performs a cursor-paginated prefix scan over the rbc keyspace, returning
5656-/// up to `limit` DIDs that have at least one record in `collection`.
5656+/// Performs a cursor-paginated scan over the repo keyspace, returning up to
5757+/// `limit` repos with their current `head` CID, `rev` TID, and account status.
5758///
5858-/// The cursor is the last DID from the previous page. On each request we
5959-/// scan for `limit + 1` results: if the extra result appears there is a next
6060-/// page, and we return the last DID of the current page as the next cursor.
5959+/// The cursor is the first DID of the next page (inclusive). On each request we
6060+/// scan for `limit` repos; if the scan yields a next DID, it is returned as
6161+/// the cursor for the following page.
6162///
6262-/// the `limit` parameter is clamped at 10,000 instead of 2,000 as defined in
6363-/// the lexicon, because bluesky's own collectiondir only clamps at 10k.
6363+/// Repos without a chain tip (not yet synced, so no `head`/`rev` available)
6464+/// are omitted from the response, but still count against the scan window for
6565+/// cursor advancement.
6666+///
6767+/// The `limit` parameter is clamped at 10,000 instead of 2,000 as defined in
6868+/// the lexicon, because bluesky's own implementation clamps at 10k.
6469pub async fn list_repos(
6570 State(db): State<DbRef>,
6671 ExtractXrpc(req): ExtractXrpc<ListReposRequest>,
6772) -> Result<Json<ListReposOutput<'static>>, ListReposError> {
6868- let limit = req.limit.unwrap_or(500).clamp(1, 10_000) as usize;
7373+ let limit = req.limit.unwrap_or(500).clamp(1, 1000) as usize;
69747070- // Parse the cursor as a DID, if one was provided.
7175 let cursor = req
7276 .cursor
7377 .map(Did::new_owned)
7478 .transpose()
7579 .map_err(|_| ListReposError::BadCursor)?;
76807777- let (dids, next) = crate::storage::collection_index::scan_rbc_active(&db, None, cursor, limit)?;
8181+ let (entries, next) = tokio::task::spawn_blocking({
8282+ let db = db.clone();
8383+ move || crate::storage::repo::scan_repos(&db, cursor, limit)
8484+ })
8585+ .await
8686+ .map_err(|_| ListReposError::StorageError)??;
78877979- let repos = dids
8888+ let repos: Vec<Repo<'static>> = entries
8089 .into_iter()
8181- .map(|did| Repo {
8282- did,
8383- extra_data: None,
9090+ .filter_map(|(did, info, prev)| {
9191+ let prev = prev?;
9292+ // Parse the stored MST-root CID bytes back into a typed CID.
9393+ // Use the Str variant explicitly: jacquard-common's Cid::Ipld
9494+ // delegates serialization to the `cid` crate which emits raw bytes
9595+ // in JSON rather than a string.
9696+ let head = match JCid::new_owned(&prev.prev_data) {
9797+ Ok(c) => JCid::Str(CowStr::copy_from_str(c.as_str())),
9898+ Err(e) => {
9999+ tracing::warn!(
100100+ did = %did.as_str(),
101101+ error = %e,
102102+ "invalid CID bytes in repo prev; skipping repo in listRepos",
103103+ );
104104+ return None;
105105+ }
106106+ };
107107+ Some(Repo {
108108+ active: Some(info.status.is_active()),
109109+ did,
110110+ head,
111111+ rev: prev.rev,
112112+ status: info.status.status().map(CowStr::copy_from_str),
113113+ extra_data: None,
114114+ })
84115 })
85116 .collect();
861178787- let next_cursor = next.map(|cursor| cursor.into());
118118+ let next_cursor = next.map(|did| did.into());
8811989120 Ok(Json(ListReposOutput {
90121 cursor: next_cursor,
···331331 Ok(())
332332}
333333334334+/// Iterate over repos in the `rep` keyspace, starting at `cursor` (inclusive).
335335+///
336336+/// Returns at most `limit` entries. `next` is the first DID of the next page,
337337+/// suitable for use as the cursor on the next request.
338338+///
339339+/// All repos are returned regardless of account status or sync state. Repos
340340+/// that have not yet been synced have `None` for the [`RepoPrev`] field.
341341+pub fn scan_repos(
342342+ db: &DbRef,
343343+ cursor: Option<Did<'_>>,
344344+ limit: usize,
345345+) -> StorageResult<(
346346+ Vec<(Did<'static>, RepoInfo, Option<RepoPrev>)>,
347347+ Option<Did<'static>>,
348348+)> {
349349+ let prefix_len = PREFIX_REPO.len();
350350+351351+ let start_key: Vec<u8> = {
352352+ let mut k = PREFIX_REPO.to_vec();
353353+ if let Some(ref did) = cursor {
354354+ k.extend_from_slice(did.as_str().as_bytes());
355355+ }
356356+ k
357357+ };
358358+359359+ let mut ranger = db.ks.range(start_key..);
360360+ let mut entries = Vec::with_capacity(limit);
361361+362362+ for guard in ranger.by_ref() {
363363+ let (k, v) = guard.into_inner()?;
364364+ if !k.starts_with(&PREFIX_REPO) {
365365+ break;
366366+ }
367367+ let did_str = std::str::from_utf8(&k[prefix_len..]).map_err(|_| StorageError::Corrupt {
368368+ key: String::from_utf8_lossy(&k).to_string(),
369369+ reason: "non-UTF-8 DID in repo key",
370370+ })?;
371371+ let did = Did::new_owned(did_str).map_err(|_| StorageError::Corrupt {
372372+ key: String::from_utf8_lossy(&k).to_string(),
373373+ reason: "invalid DID in repo key",
374374+ })?;
375375+ let key_str = String::from_utf8_lossy(&k).into_owned();
376376+ let info = decode_repo_info(&v, &key_str)?;
377377+ let pk = prev_key(did.clone());
378378+ let prev = db
379379+ .ks
380380+ .get(&pk)?
381381+ .map(|b| {
382382+ let pk_str = String::from_utf8_lossy(&pk).into_owned();
383383+ decode_repo_prev(&b, &pk_str)
384384+ })
385385+ .transpose()?;
386386+ entries.push((did, info, prev));
387387+ if entries.len() >= limit {
388388+ break;
389389+ }
390390+ }
391391+392392+ let next = loop {
393393+ let Some(guard) = ranger.next() else {
394394+ break None;
395395+ };
396396+ let key = guard.key()?;
397397+ if !key.starts_with(&PREFIX_REPO) {
398398+ break None;
399399+ }
400400+ let did_str = match std::str::from_utf8(&key[prefix_len..]) {
401401+ Ok(s) => s,
402402+ Err(_) => continue,
403403+ };
404404+ match Did::new_owned(did_str) {
405405+ Ok(did) => break Some(did),
406406+ Err(_) => continue,
407407+ }
408408+ };
409409+410410+ Ok((entries, next))
411411+}
412412+334413#[cfg(test)]
335414mod tests {
336415 use super::*;