···21212222use crate::storage::{DbRef, error::StorageError};
23232424+pub enum ListReposByCollectionError {
2525+ BadCursor,
2626+ StorageError,
2727+}
2828+2929+impl From<StorageError> for ListReposByCollectionError {
3030+ fn from(e: StorageError) -> Self {
3131+ tracing::error!("Storage error: {e:#}");
3232+ Self::StorageError
3333+ }
3434+}
3535+3636+impl IntoResponse for ListReposByCollectionError {
3737+ fn into_response(self) -> Response {
3838+ match self {
3939+ ListReposByCollectionError::BadCursor => (
4040+ StatusCode::BAD_REQUEST,
4141+ Json(json!({
4242+ "error": "InvalidRequest",
4343+ "message": "the provided cursor was not valid",
4444+ })),
4545+ ),
4646+ ListReposByCollectionError::StorageError => (
4747+ StatusCode::INTERNAL_SERVER_ERROR,
4848+ Json(json!({
4949+ "error": "InternalError",
5050+ "message": "Storage issue",
5151+ })),
5252+ ),
5353+ }
5454+ .into_response()
5555+ }
5656+}
5757+2458/// Handler for `GET /xrpc/com.atproto.sync.listReposByCollection`.
2559///
2660/// Performs a cursor-paginated prefix scan over the rbc keyspace, returning
···2963/// The cursor is the last DID from the previous page. On each request we
3064/// scan for `limit + 1` results: if the extra result appears there is a next
3165/// page, and we return the last DID of the current page as the next cursor.
6666+///
6767+/// the `limit` parameter is clamped at 10,000 instead of 2,000 as defined in
6868+/// the lexicon, because bluesky's own collectiondir only clamps at 10k.
3269pub async fn list_repos_by_collection(
3370 State(db): State<DbRef>,
3471 ExtractXrpc(req): ExtractXrpc<ListReposByCollectionRequest>,
3535-) -> Result<Json<ListReposByCollectionOutput<'static>>, StatusCode> {
3636- let limit = req.limit.unwrap_or(500).clamp(1, 2000) as usize;
7272+) -> Result<Json<ListReposByCollectionOutput<'static>>, ListReposByCollectionError> {
7373+ let limit = req.limit.unwrap_or(500).clamp(1, 10_000) as usize;
37743875 // Parse the cursor as a DID, if one was provided.
3939- let cursor: Option<Did<'static>> = req
7676+ let cursor = req
4077 .cursor
4141- .as_ref()
4278 .map(Did::new_owned)
4379 .transpose()
4444- .map_err(|_| StatusCode::BAD_REQUEST)?;
8080+ .map_err(|_| ListReposByCollectionError::BadCursor)?;
45814646- // Scan one extra to detect whether a next page exists.
4747- let mut dids = crate::storage::list_by::scan_rbc(&db, req.collection, cursor, limit + 1)
4848- .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
4949-5050- // If we got more than limit results, set the next-page cursor to the last
5151- // DID of the current page and drop the extra.
5252- let next_cursor = if dids.len() > limit {
5353- let cursor_did = dids[limit - 1].clone();
5454- dids.truncate(limit);
5555- Some(cursor_did.into()) // Did<'static> → CowStr<'static>
5656- } else {
5757- None
5858- };
8282+ let (dids, next) = crate::storage::list_by::scan_rbc(&db, req.collection, cursor, limit)?;
59836084 let repos = dids
6185 .into_iter()
···6488 extra_data: None,
6589 })
6690 .collect();
9191+9292+ let next_cursor = next.map(|cursor| cursor.into());
67936894 Ok(Json(ListReposByCollectionOutput {
6995 cursor: next_cursor,
+4-2
src/storage/cursor.rs
···1111/// Returns `None` if no cursor has been persisted yet (start from live).
1212pub fn get_subscribe_cursor(db: &DbRef, host: &str) -> StorageResult<Option<u64>> {
1313 let key = keys::subscribe_cursor(host);
1414+ let key_str = String::from_utf8_lossy(&key).to_string();
1415 match db.ks.get(key)? {
1516 None => Ok(None),
1617 Some(v) => {
1717- let bytes: [u8; 8] = v.as_ref().try_into().map_err(|e| {
1818- StorageError::CorruptedCursor(format!("array from slice failed: {e}"))
1818+ let bytes: [u8; 8] = v.as_ref().try_into().map_err(|_| StorageError::Corrupt {
1919+ key: key_str,
2020+ reason: "asdf",
1921 })?;
2022 Ok(Some(u64::from_be_bytes(bytes)))
2123 }
···66use jacquard_common::types::nsid::Nsid;
77use jacquard_common::types::string::Did;
8899-// Partition key prefixes
99+use super::{StorageError, StorageResult};
1010+1111+// Partition key prefixes: mut be unique
1012pub const PREFIX_RBC: &[u8] = b"rbc";
1113pub const PREFIX_CBR: &[u8] = b"cbr";
1214pub const PREFIX_REPO: &[u8] = b"repo";
···1517pub const PREFIX_LIST_REPOS: &[u8] = b"listRepos";
1618pub const PREFIX_RESYNC_QUEUE: &[u8] = b"repoResyncQueue";
17191818-const SEP: u8 = b'\0';
2020+const NUL_SEP: u8 = b'\0';
19212022/// `"rbc"\0<collection>\0<did>` — main collection index.
2123///
···2527 let d = did.as_str();
2628 let mut key = Vec::with_capacity(PREFIX_RBC.len() + 1 + col.len() + 1 + d.len());
2729 key.extend_from_slice(PREFIX_RBC);
2828- key.push(SEP);
3030+ key.push(NUL_SEP);
2931 key.extend_from_slice(col.as_bytes());
3030- key.push(SEP);
3232+ key.push(NUL_SEP);
3133 key.extend_from_slice(d.as_bytes());
3234 key
3335}
···3739 let col = collection.as_str();
3840 let mut key = Vec::with_capacity(PREFIX_RBC.len() + 1 + col.len() + 1);
3941 key.extend_from_slice(PREFIX_RBC);
4040- key.push(SEP);
4242+ key.push(NUL_SEP);
4143 key.extend_from_slice(col.as_bytes());
4242- key.push(SEP);
4444+ key.push(NUL_SEP);
4345 key
4446}
45474646-/// Parse the DID suffix from a full rbc key, given the prefix length.
4848+/// the serialized DID, no null prefix or terminator
4749///
4848-/// Returns `None` if the suffix bytes are not valid UTF-8 or not a valid DID.
4949-/// Typically used after prefix-scanning the rbc partition.
5050-pub fn rbc_parse_did(key: &[u8], prefix_len: usize) -> Option<Did<'static>> {
5151- let did_str = std::str::from_utf8(key.get(prefix_len..)?).ok()?;
5252- Did::new_owned(did_str).ok()
5050+/// rbc_prefix has a trailing null-terminator so this can be joined directly
5151+pub fn rbc_suffix(did: Did<'_>) -> Vec<u8> {
5252+ did.as_str().as_bytes().to_vec()
5353+}
5454+5555+/// Parse the DID suffix from a full rbc key, given the prefix length.
5656+pub fn rbc_parse_did(key: &[u8], prefix_len: usize) -> StorageResult<Did<'static>> {
5757+ let key_str = String::from_utf8_lossy(key);
5858+ let suffix = key.get(prefix_len..).ok_or(StorageError::Corrupt {
5959+ key: key_str.to_string(),
6060+ reason: "invalid prefix when parsing did in rbc",
6161+ })?;
6262+ let did_str = std::str::from_utf8(suffix).map_err(|_| StorageError::Corrupt {
6363+ key: key_str.to_string(),
6464+ reason: "invalid string suffix for DID in rbc suffix",
6565+ })?;
6666+ let did = Did::new_owned(did_str).map_err(|_| StorageError::Corrupt {
6767+ key: key_str.to_string(),
6868+ reason: "invalid DID in rbc suffix",
6969+ })?;
7070+ Ok(did)
5371}
54725573/// `"cbr"\0<did>\0<collection>` — reversed index for per-repo collection lookup.
···6078 let col = collection.as_str();
6179 let mut key = Vec::with_capacity(PREFIX_CBR.len() + 1 + d.len() + 1 + col.len());
6280 key.extend_from_slice(PREFIX_CBR);
6363- key.push(SEP);
8181+ key.push(NUL_SEP);
6482 key.extend_from_slice(d.as_bytes());
6565- key.push(SEP);
8383+ key.push(NUL_SEP);
6684 key.extend_from_slice(col.as_bytes());
6785 key
6886}
···7290 let d = did.as_str();
7391 let mut key = Vec::with_capacity(PREFIX_CBR.len() + 1 + d.len() + 1);
7492 key.extend_from_slice(PREFIX_CBR);
7575- key.push(SEP);
9393+ key.push(NUL_SEP);
7694 key.extend_from_slice(d.as_bytes());
7777- key.push(SEP);
9595+ key.push(NUL_SEP);
7896 key
7997}
8098···83101/// Returns `None` if the suffix bytes are not valid UTF-8 or not a valid NSID.
84102/// Typically used after prefix-scanning the cbr partition.
85103pub fn cbr_parse_collection(key: &[u8], prefix_len: usize) -> Option<Nsid<'static>> {
104104+ // TODO: we should error on unparseable!
86105 let nsid_str = std::str::from_utf8(key.get(prefix_len..)?).ok()?;
87106 Nsid::new_owned(nsid_str).ok()
88107}
···92111 let d = did.as_str();
93112 let mut key = Vec::with_capacity(PREFIX_REPO.len() + 1 + d.len());
94113 key.extend_from_slice(PREFIX_REPO);
9595- key.push(SEP);
114114+ key.push(NUL_SEP);
96115 key.extend_from_slice(d.as_bytes());
97116 key
98117}
···102121 let d = did.as_str();
103122 let mut key = Vec::with_capacity(PREFIX_REPO_PREV.len() + 1 + d.len());
104123 key.extend_from_slice(PREFIX_REPO_PREV);
105105- key.push(SEP);
124124+ key.push(NUL_SEP);
106125 key.extend_from_slice(d.as_bytes());
107126 key
108127}
···113132 let mut key =
114133 Vec::with_capacity(PREFIX_SUBSCRIBE_REPOS.len() + 1 + host.len() + 1 + SUFFIX.len());
115134 key.extend_from_slice(PREFIX_SUBSCRIBE_REPOS);
116116- key.push(SEP);
135135+ key.push(NUL_SEP);
117136 key.extend_from_slice(host.as_bytes());
118118- key.push(SEP);
137137+ key.push(NUL_SEP);
119138 key.extend_from_slice(SUFFIX);
120139 key
121140}
···124143pub fn list_repos_progress(host: &str) -> Vec<u8> {
125144 let mut key = Vec::with_capacity(PREFIX_LIST_REPOS.len() + 1 + host.len());
126145 key.extend_from_slice(PREFIX_LIST_REPOS);
127127- key.push(SEP);
146146+ key.push(NUL_SEP);
128147 key.extend_from_slice(host.as_bytes());
129148 key
130149}
···136155 let d = did.as_str();
137156 let mut key = Vec::with_capacity(PREFIX_RESYNC_QUEUE.len() + 1 + 8 + 1 + d.len());
138157 key.extend_from_slice(PREFIX_RESYNC_QUEUE);
139139- key.push(SEP);
158158+ key.push(NUL_SEP);
140159 key.extend_from_slice(&ts_be.to_be_bytes());
141141- key.push(SEP);
160160+ key.push(NUL_SEP);
142161 key.extend_from_slice(d.as_bytes());
143162 key
144163}
145164165165+/// `"repoResyncQueue"\0` — prefix for scanning the entire resync queue.
166166+pub fn resync_queue_prefix_all() -> Vec<u8> {
167167+ let mut key = Vec::with_capacity(PREFIX_RESYNC_QUEUE.len() + 1);
168168+ key.extend_from_slice(PREFIX_RESYNC_QUEUE);
169169+ key.push(NUL_SEP);
170170+ key
171171+}
172172+146173/// `"repoResyncQueue"\0<ts_be:u64>\0` — prefix for scanning resync queue up to a timestamp.
147174pub fn resync_queue_prefix(ts_be: u64) -> Vec<u8> {
148175 let mut key = Vec::with_capacity(PREFIX_RESYNC_QUEUE.len() + 1 + 8 + 1);
149176 key.extend_from_slice(PREFIX_RESYNC_QUEUE);
150150- key.push(SEP);
177177+ key.push(NUL_SEP);
151178 key.extend_from_slice(&ts_be.to_be_bytes());
152152- key.push(SEP);
179179+ key.push(NUL_SEP);
153180 key
154181}
155182···158185/// Key layout: `"repoResyncQueue"\0<ts_be:u64>\0<did>`
159186/// Returns `None` if the key is structurally invalid.
160187pub fn resync_queue_parse(key: &[u8]) -> Option<(u64, Did<'static>)> {
188188+ // TODO: error on unparseable!
161189 let rest = key
162190 .strip_prefix(PREFIX_RESYNC_QUEUE)?
163163- .strip_prefix(&[SEP])?;
191191+ .strip_prefix(&[NUL_SEP])?;
164192 if rest.len() < 9 {
165193 // Need at least 8 bytes for timestamp + 1 separator byte.
166194 return None;
167195 }
168196 let ts = u64::from_be_bytes(rest[..8].try_into().ok()?);
169169- let rest = rest[8..].strip_prefix(&[SEP])?;
197197+ let rest = rest[8..].strip_prefix(&[NUL_SEP])?;
170198 let did_str = std::str::from_utf8(rest).ok()?;
171199 Did::new_owned(did_str).ok().map(|did| (ts, did))
172200}
···228256 let d = did("did:web:example.com");
229257 let key = rbc(col.clone(), d.clone());
230258 let prefix_len = rbc_prefix(col).len();
231231- assert_eq!(rbc_parse_did(&key, prefix_len), Some(d));
259259+ assert_eq!(rbc_parse_did(&key, prefix_len), Ok(d));
232260 }
233261234262 #[test]
···236264 let prefix = rbc_prefix(nsid("app.bsky.feed.post"));
237265 // Prefix alone has no DID suffix — but it is valid UTF-8 empty string,
238266 // which is not a valid DID, so new_owned should reject it.
239239- assert!(rbc_parse_did(&prefix, prefix.len()).is_none());
267267+ assert_eq!(
268268+ rbc_parse_did(&prefix, prefix.len()),
269269+ Err(StorageError::Corrupt {
270270+ key: "rbc\0app.bsky.feed.post\0".to_string(),
271271+ reason: "invalid DID in rbc suffix",
272272+ })
273273+ );
240274 }
241275242276 // --- cbr ---
+84-89
src/storage/list_by.rs
···66use jacquard_common::types::string::Did;
7788use crate::storage::{DbRef, StorageResult, keys};
99+use fjall::util::prefixed_range;
1010+1111+/// Iterate over DIDs in the rbc index for `collection`, starting after `cursor`.
1212+///
1313+/// Returns at most `limit` DIDs.
1414+pub fn scan_rbc(
1515+ db: &DbRef,
1616+ collection: Nsid<'_>,
1717+ cursor: Option<Did<'_>>,
1818+ limit: usize,
1919+) -> StorageResult<(Vec<Did<'static>>, Option<Did<'static>>)> {
2020+ let prefix = keys::rbc_prefix(collection.clone());
2121+ let prefix_len = prefix.len();
2222+2323+ let lower_did = cursor.clone().map(keys::rbc_suffix).unwrap_or(vec![]);
2424+ let mut ranger = db.ks.range(prefixed_range(prefix.clone(), lower_did..));
2525+2626+ let mut dids = Vec::with_capacity(limit);
2727+ for guard in ranger.by_ref() {
2828+ let (k, _v) = guard.into_inner()?;
2929+ assert!(k.starts_with(&prefix));
3030+ let did = keys::rbc_parse_did(&k, prefix_len)?;
3131+ dids.push(did);
3232+ if dids.len() >= limit {
3333+ break;
3434+ }
3535+ }
3636+3737+ let next = if let Some(guard) = ranger.next() {
3838+ let key = guard.key()?;
3939+ let did = keys::rbc_parse_did(&key, prefix_len)?;
4040+ Some(did)
4141+ } else {
4242+ None
4343+ };
4444+4545+ Ok((dids, next))
4646+}
4747+4848+/// Iterate over collections in the cbr index for `did`, starting after `cursor`.
4949+///
5050+/// Returns at most `limit` NSIDs.
5151+///
5252+/// TODO: we can fjall range to the collection's next-after-max (might even be
5353+/// exposed now?) or maybe use prefix + seek for the start?
5454+pub fn scan_cbr(
5555+ db: &DbRef,
5656+ did: Did<'_>,
5757+ cursor: Option<Nsid<'_>>,
5858+ limit: usize,
5959+) -> StorageResult<Vec<Nsid<'static>>> {
6060+ let prefix = keys::cbr_prefix(did.clone());
6161+ let prefix_len = prefix.len();
6262+6363+ let start_key: Vec<u8> = match cursor {
6464+ None => prefix.clone(),
6565+ Some(ref col) => keys::cbr(did.clone(), col.clone()),
6666+ };
6767+6868+ let mut cols = Vec::with_capacity(limit);
6969+ for guard in db.ks.range(start_key..) {
7070+ let (k, _v) = guard.into_inner()?;
7171+ if !k.starts_with(&prefix) {
7272+ break;
7373+ }
7474+ if cols.len() >= limit {
7575+ break;
7676+ }
7777+ // Skip the cursor key itself.
7878+ if cursor
7979+ .clone()
8080+ .is_some_and(|c| k.as_ref() == keys::cbr(did.clone(), c).as_slice())
8181+ {
8282+ continue;
8383+ }
8484+ if let Some(col) = keys::cbr_parse_collection(&k, prefix_len) {
8585+ cols.push(col);
8686+ }
8787+ }
8888+ Ok(cols)
8989+}
9901091/// Add a `(collection, did)` pair to both indexes within an existing batch.
1192///
···250331251332 assert_eq!(
252333 scan_rbc(&db, nsid("app.bsky.actor.profile"), None, 10).unwrap(),
253253- vec![] as Vec<Did<'static>>,
334334+ (vec![], None),
254335 "removed collection should not appear in rbc",
255336 );
256337 assert_eq!(
257338 scan_rbc(&db, nsid("app.bsky.feed.post"), None, 10).unwrap(),
258258- vec![d.clone()],
339339+ (vec![d.clone()], None),
259340 "unchanged collection should remain in rbc",
260341 );
261342 assert_eq!(
262343 scan_rbc(&db, nsid("app.bsky.graph.follow"), None, 10).unwrap(),
263263- vec![d.clone()],
344344+ (vec![d.clone()], None),
264345 "added collection should appear in rbc",
265346 );
266347 }
···281362 assert_eq!(collections_for(&db, &bob), vec![nsid("app.bsky.feed.post")]);
282363 }
283364}
284284-285285-/// Iterate over DIDs in the rbc index for `collection`, starting after `cursor`.
286286-///
287287-/// Returns at most `limit` DIDs.
288288-///
289289-/// TODO: we can fjall range to the collection's next-after-max (might even be
290290-/// exposed now?) or maybe use prefix + seek for the start?
291291-pub fn scan_rbc(
292292- db: &DbRef,
293293- collection: Nsid<'_>,
294294- cursor: Option<Did<'_>>,
295295- limit: usize,
296296-) -> StorageResult<Vec<Did<'static>>> {
297297- let prefix = keys::rbc_prefix(collection.clone());
298298- let prefix_len = prefix.len();
299299-300300- let start_key: Vec<u8> = match cursor {
301301- None => prefix.clone(),
302302- Some(ref did) => keys::rbc(collection.clone(), did.clone()),
303303- };
304304-305305- let mut dids = Vec::with_capacity(limit);
306306- for guard in db.ks.range(start_key..) {
307307- let (k, _v) = guard.into_inner()?;
308308- if !k.starts_with(&prefix) {
309309- break;
310310- }
311311- if dids.len() >= limit {
312312- break;
313313- }
314314- // Skip the cursor key itself.
315315- if cursor
316316- .clone()
317317- .is_some_and(|c| k.as_ref() == keys::rbc(collection.clone(), c).as_slice())
318318- {
319319- continue;
320320- }
321321- if let Some(did) = keys::rbc_parse_did(&k, prefix_len) {
322322- dids.push(did);
323323- }
324324- }
325325- Ok(dids)
326326-}
327327-328328-/// Iterate over collections in the cbr index for `did`, starting after `cursor`.
329329-///
330330-/// Returns at most `limit` NSIDs.
331331-///
332332-/// TODO: we can fjall range to the collection's next-after-max (might even be
333333-/// exposed now?) or maybe use prefix + seek for the start?
334334-pub fn scan_cbr(
335335- db: &DbRef,
336336- did: Did<'_>,
337337- cursor: Option<Nsid<'_>>,
338338- limit: usize,
339339-) -> StorageResult<Vec<Nsid<'static>>> {
340340- let prefix = keys::cbr_prefix(did.clone());
341341- let prefix_len = prefix.len();
342342-343343- let start_key: Vec<u8> = match cursor {
344344- None => prefix.clone(),
345345- Some(ref col) => keys::cbr(did.clone(), col.clone()),
346346- };
347347-348348- let mut cols = Vec::with_capacity(limit);
349349- for guard in db.ks.range(start_key..) {
350350- let (k, _v) = guard.into_inner()?;
351351- if !k.starts_with(&prefix) {
352352- break;
353353- }
354354- if cols.len() >= limit {
355355- break;
356356- }
357357- // Skip the cursor key itself.
358358- if cursor
359359- .clone()
360360- .is_some_and(|c| k.as_ref() == keys::cbr(did.clone(), c).as_slice())
361361- {
362362- continue;
363363- }
364364- if let Some(col) = keys::cbr_parse_collection(&k, prefix_len) {
365365- cols.push(col);
366366- }
367367- }
368368- Ok(cols)
369369-}
+262-23
src/storage/repo.rs
···11//! Per-repo state storage.
2233use fjall::Readable;
44-use jacquard_common::types::{string::Cid, string::Did, tid::Tid};
44+use jacquard_common::types::{string::Did, tid::Tid};
5566-use crate::storage::{DbRef, error::StorageResult, keys};
66+use crate::storage::{
77+ DbRef,
88+ error::{StorageError, StorageResult},
99+ keys,
1010+};
711812/// tap's "RepoState" type
913#[derive(Debug, Clone, PartialEq, Eq)]
···3135 RepoState::Error => "error",
3236 }
3337 }
3838+3939+ fn from_str(s: &str) -> Option<Self> {
4040+ Some(match s {
4141+ "pending" => RepoState::Pending,
4242+ "desynchronized" => RepoState::Desynchronized,
4343+ "resyncing" => RepoState::Resyncing,
4444+ "active" => RepoState::Active,
4545+ "takendown" => RepoState::Takendown,
4646+ "suspended" => RepoState::Suspended,
4747+ "deactivated" => RepoState::Deactivated,
4848+ "error" => RepoState::Error,
4949+ _ => return None,
5050+ })
5151+ }
3452}
35533654/// tap's "AccountStatus" type
···4866 pub fn is_active(&self) -> bool {
4967 matches!(self, AccountStatus::Active)
5068 }
6969+5170 pub fn status(&self) -> Option<&str> {
5271 match self {
5372 AccountStatus::Active => None,
···5776 AccountStatus::Deleted => Some("deleted"),
5877 }
5978 }
7979+8080+ pub fn as_str(&self) -> &str {
8181+ match self {
8282+ AccountStatus::Active => "active",
8383+ AccountStatus::Takendown => "takendown",
8484+ AccountStatus::Suspended => "suspended",
8585+ AccountStatus::Deactivated => "deactivated",
8686+ AccountStatus::Deleted => "deleted",
8787+ }
8888+ }
8989+9090+ fn from_str(s: &str) -> Option<Self> {
9191+ Some(match s {
9292+ "active" => AccountStatus::Active,
9393+ "takendown" => AccountStatus::Takendown,
9494+ "suspended" => AccountStatus::Suspended,
9595+ "deactivated" => AccountStatus::Deactivated,
9696+ "deleted" => AccountStatus::Deleted,
9797+ _ => return None,
9898+ })
9999+ }
60100}
6110162102/// Stored info for a repository.
···67107 pub error: Option<String>,
68108}
69109110110+/// Wire format: `<state>\0<status>[\0<error>]`
111111+///
112112+/// Neither state nor status strings contain `\0`, so splitting on the first two
113113+/// `\0` bytes is unambiguous. The error field is absent when state != Error.
114114+fn encode_repo_info(info: &RepoInfo) -> Vec<u8> {
115115+ let mut v = Vec::new();
116116+ v.extend_from_slice(info.state.as_str().as_bytes());
117117+ v.push(b'\0');
118118+ v.extend_from_slice(info.status.as_str().as_bytes());
119119+ if let Some(err) = &info.error {
120120+ v.push(b'\0');
121121+ v.extend_from_slice(err.as_bytes());
122122+ }
123123+ v
124124+}
125125+126126+fn decode_repo_info(bytes: &[u8], key: &str) -> StorageResult<RepoInfo> {
127127+ let s = std::str::from_utf8(bytes).map_err(|_| StorageError::Corrupt {
128128+ key: key.to_owned(),
129129+ reason: "not valid UTF-8",
130130+ })?;
131131+ let mut parts = s.splitn(3, '\0');
132132+ let state = parts
133133+ .next()
134134+ .and_then(RepoState::from_str)
135135+ .ok_or(StorageError::Corrupt {
136136+ key: key.to_owned(),
137137+ reason: "invalid state",
138138+ })?;
139139+ let status = parts
140140+ .next()
141141+ .and_then(AccountStatus::from_str)
142142+ .ok_or(StorageError::Corrupt {
143143+ key: key.to_owned(),
144144+ reason: "invalid status",
145145+ })?;
146146+ let error = parts.next().map(str::to_owned);
147147+ Ok(RepoInfo {
148148+ state,
149149+ status,
150150+ error,
151151+ })
152152+}
153153+154154+/// Transient sync state for proof validation.
155155+///
156156+/// Updated on every firehose commit — the hot write path.
157157+#[derive(Debug, Clone)]
158158+pub struct RepoPrev {
159159+ /// The last-seen `rev`.
160160+ pub rev: Tid,
161161+ /// The last-seen commit CID as raw multihash bytes (from firehose CBOR).
162162+ pub prev_data: Vec<u8>,
163163+}
164164+165165+/// Wire format: `[13 bytes ASCII TID][raw CID bytes]`
166166+///
167167+/// TIDs are always exactly 13 base32-sortable ASCII characters, so the TID
168168+/// length is a fixed-width prefix — no separator byte needed.
169169+fn encode_repo_prev(prev: &RepoPrev) -> Vec<u8> {
170170+ let rev = prev.rev.as_str().as_bytes();
171171+ debug_assert_eq!(rev.len(), 13, "TID must be exactly 13 bytes");
172172+ let mut v = Vec::with_capacity(13 + prev.prev_data.len());
173173+ v.extend_from_slice(rev);
174174+ v.extend_from_slice(&prev.prev_data);
175175+ v
176176+}
177177+178178+fn decode_repo_prev(bytes: &[u8], key: &str) -> StorageResult<RepoPrev> {
179179+ if bytes.len() < 13 {
180180+ return Err(StorageError::Corrupt {
181181+ key: key.to_owned(),
182182+ reason: "too short for TID",
183183+ });
184184+ }
185185+ let rev_str = std::str::from_utf8(&bytes[..13]).map_err(|_| StorageError::Corrupt {
186186+ key: key.to_owned(),
187187+ reason: "TID not UTF-8",
188188+ })?;
189189+ let rev = Tid::new(rev_str).map_err(|_| StorageError::Corrupt {
190190+ key: key.to_owned(),
191191+ reason: "invalid TID",
192192+ })?;
193193+ let prev_data = bytes[13..].to_vec();
194194+ Ok(RepoPrev { rev, prev_data })
195195+}
196196+70197/// Retrieve both [`RepoInfo`] and [`RepoPrev`] for a `did`.
71198///
72199/// `None` if the repo is not indexed.
73200pub fn get(db: &DbRef, did: Did<'_>) -> StorageResult<Option<(RepoInfo, Option<RepoPrev>)>> {
201201+ let info_key = keys::repo(did.clone());
202202+ let prev_key = keys::repo_prev(did);
74203 let snapshot = db.database.snapshot();
7575- Ok(snapshot
7676- .get(&db.ks, keys::repo(did.clone()))?
7777- .map(|info| {
7878- let prev = snapshot.get(&db.ks, keys::repo_prev(did))?;
7979- Ok((info, prev))
204204+ let Some(info_bytes) = snapshot.get(&db.ks, &info_key)? else {
205205+ return Ok(None);
206206+ };
207207+ let key_str = String::from_utf8_lossy(&info_key);
208208+ let info = decode_repo_info(&info_bytes, &key_str)?;
209209+ let prev = snapshot
210210+ .get(&db.ks, &prev_key)?
211211+ .map(|b| {
212212+ let key_str = String::from_utf8_lossy(&prev_key);
213213+ decode_repo_prev(&b, &key_str)
80214 })
8181- .transpose()?)
215215+ .transpose()?;
216216+ Ok(Some((info, prev)))
82217}
8321884219/// Write a [`RepoInfo`] for `did`.
8585-pub fn put_info(_db: &DbRef, _did: Did<'_>, _record: &RepoInfo) -> StorageResult<()> {
8686- todo!("serialize RepoInfo and write to fjall")
8787-}
8888-8989-/// Transient sync state for proof validation.
9090-#[derive(Debug, Clone)]
9191-pub struct RepoPrev {
9292- /// The last-seen `rev` string.
9393- pub rev: Tid,
9494- /// The last-seen `prevData` CID (as raw bytes).
9595- pub prev_data: Cid<'static>,
220220+pub fn put_info(db: &DbRef, did: Did<'_>, info: &RepoInfo) -> StorageResult<()> {
221221+ let key = keys::repo(did);
222222+ db.ks.insert(key, encode_repo_info(info))?;
223223+ Ok(())
96224}
9722598226/// Read the transient [`RepoPrev`] for `did`.
9999-pub fn get_prev(_db: &DbRef, _did: Did<'_>) -> StorageResult<Option<RepoPrev>> {
100100- todo!("deserialize RepoPrev")
227227+pub fn get_prev(db: &DbRef, did: Did<'_>) -> StorageResult<Option<RepoPrev>> {
228228+ let key = keys::repo_prev(did);
229229+ db.ks
230230+ .get(&key)?
231231+ .map(|b| {
232232+ let k = String::from_utf8_lossy(&key).into_owned();
233233+ decode_repo_prev(&b, &k)
234234+ })
235235+ .transpose()
101236}
102237103238/// Write the transient [`RepoPrev`] for `did`.
104104-pub fn put_prev(_db: &DbRef, _did: Did<'_>, _prev: &RepoPrev) -> StorageResult<()> {
105105- todo!("serialize and write RepoPrev")
239239+pub fn put_prev(db: &DbRef, did: Did<'_>, prev: &RepoPrev) -> StorageResult<()> {
240240+ let key = keys::repo_prev(did);
241241+ db.ks.insert(key, encode_repo_prev(prev))?;
242242+ Ok(())
106243}
107244108245/// Delete the transient [`RepoPrev`] for `did`.
···111248 db.ks.remove(key)?;
112249 Ok(())
113250}
251251+252252+#[cfg(test)]
253253+mod tests {
254254+ use super::*;
255255+ use crate::storage::open_temporary;
256256+257257+ fn did(s: &str) -> Did<'static> {
258258+ Did::new_owned(s).unwrap()
259259+ }
260260+261261+ fn tid(s: &str) -> Tid {
262262+ Tid::new(s).unwrap()
263263+ }
264264+265265+ #[test]
266266+ fn repo_info_roundtrips_active() {
267267+ let info = RepoInfo {
268268+ state: RepoState::Active,
269269+ status: AccountStatus::Active,
270270+ error: None,
271271+ };
272272+ let encoded = encode_repo_info(&info);
273273+ let decoded = decode_repo_info(&encoded, "test").unwrap();
274274+ assert_eq!(decoded.state, RepoState::Active);
275275+ assert_eq!(decoded.status, AccountStatus::Active);
276276+ assert!(decoded.error.is_none());
277277+ }
278278+279279+ #[test]
280280+ fn repo_info_roundtrips_error_with_message() {
281281+ let info = RepoInfo {
282282+ state: RepoState::Error,
283283+ status: AccountStatus::Suspended,
284284+ error: Some("something went wrong".to_owned()),
285285+ };
286286+ let encoded = encode_repo_info(&info);
287287+ let decoded = decode_repo_info(&encoded, "test").unwrap();
288288+ assert_eq!(decoded.state, RepoState::Error);
289289+ assert_eq!(decoded.status, AccountStatus::Suspended);
290290+ assert_eq!(decoded.error.as_deref(), Some("something went wrong"));
291291+ }
292292+293293+ #[test]
294294+ fn repo_prev_roundtrips() {
295295+ let prev = RepoPrev {
296296+ rev: tid("3lczouzaqmo2e"),
297297+ prev_data: vec![0x01, 0x71, 0x12, 0x20, 0xde, 0xad, 0xbe, 0xef],
298298+ };
299299+ let encoded = encode_repo_prev(&prev);
300300+ let decoded = decode_repo_prev(&encoded, "test").unwrap();
301301+ assert_eq!(decoded.rev.as_str(), "3lczouzaqmo2e");
302302+ assert_eq!(
303303+ decoded.prev_data,
304304+ vec![0x01, 0x71, 0x12, 0x20, 0xde, 0xad, 0xbe, 0xef]
305305+ );
306306+ }
307307+308308+ #[test]
309309+ fn put_and_get_repo_info() {
310310+ let db = open_temporary().unwrap();
311311+ let d = did("did:web:example.com");
312312+ let info = RepoInfo {
313313+ state: RepoState::Active,
314314+ status: AccountStatus::Active,
315315+ error: None,
316316+ };
317317+ put_info(&db, d.clone(), &info).unwrap();
318318+ let (retrieved, prev) = get(&db, d).unwrap().unwrap();
319319+ assert_eq!(retrieved.state, RepoState::Active);
320320+ assert!(prev.is_none());
321321+ }
322322+323323+ #[test]
324324+ fn put_and_get_prev() {
325325+ let db = open_temporary().unwrap();
326326+ let d = did("did:web:example.com");
327327+ // Need a RepoInfo first for get() to return Some.
328328+ put_info(
329329+ &db,
330330+ d.clone(),
331331+ &RepoInfo {
332332+ state: RepoState::Active,
333333+ status: AccountStatus::Active,
334334+ error: None,
335335+ },
336336+ )
337337+ .unwrap();
338338+ let prev = RepoPrev {
339339+ rev: tid("3lczouzaqmo2e"),
340340+ prev_data: vec![1, 2, 3, 4],
341341+ };
342342+ put_prev(&db, d.clone(), &prev).unwrap();
343343+ let (_, stored_prev) = get(&db, d.clone()).unwrap().unwrap();
344344+ let stored_prev = stored_prev.unwrap();
345345+ assert_eq!(stored_prev.rev.as_str(), "3lczouzaqmo2e");
346346+ assert_eq!(stored_prev.prev_data, vec![1, 2, 3, 4]);
347347+348348+ delete_prev(&db, d.clone()).unwrap();
349349+ let (_, no_prev) = get(&db, d).unwrap().unwrap();
350350+ assert!(no_prev.is_none());
351351+ }
352352+}
+79-11
src/storage/resync.rs
···11//! Timestamp-ordered resync queue.
22//!
33//! Keys: `"repoResyncQueue"\0<ts_be:u64>\0<did>`
44-//! Values: CBOR payload with the triggering commit, retry count, and retry reason.
44+//! Values: `[u16 BE retry_count][u16 BE reason_len][reason_bytes][commit_cbor_bytes]`
5566use jacquard_common::types::string::Did;
7788-use crate::storage::{DbRef, StorageResult};
88+use crate::storage::{
99+ DbRef,
1010+ error::{StorageError, StorageResult},
1111+ keys,
1212+};
9131014/// An item waiting in the resync queue.
1115#[derive(Debug, Clone)]
···1721 pub commit_cbor: Vec<u8>,
1822}
19232424+/// Wire format: `[u16 BE retry_count][u16 BE reason_len][reason_bytes][commit_cbor_bytes]`
2525+fn encode(item: &ResyncItem) -> Vec<u8> {
2626+ let reason = item.retry_reason.as_bytes();
2727+ let mut v = Vec::with_capacity(2 + 2 + reason.len() + item.commit_cbor.len());
2828+ v.extend_from_slice(&item.retry_count.to_be_bytes());
2929+ v.extend_from_slice(&(reason.len() as u16).to_be_bytes());
3030+ v.extend_from_slice(reason);
3131+ v.extend_from_slice(&item.commit_cbor);
3232+ v
3333+}
3434+3535+fn decode(bytes: &[u8], key: &str, did: Did<'static>) -> StorageResult<ResyncItem> {
3636+ if bytes.len() < 4 {
3737+ return Err(StorageError::Corrupt {
3838+ key: key.to_owned(),
3939+ reason: "value too short",
4040+ });
4141+ }
4242+ let retry_count = u16::from_be_bytes([bytes[0], bytes[1]]);
4343+ let reason_len = u16::from_be_bytes([bytes[2], bytes[3]]) as usize;
4444+ let rest = &bytes[4..];
4545+ if rest.len() < reason_len {
4646+ return Err(StorageError::Corrupt {
4747+ key: key.to_owned(),
4848+ reason: "reason truncated",
4949+ });
5050+ }
5151+ let retry_reason = std::str::from_utf8(&rest[..reason_len])
5252+ .map_err(|_| StorageError::Corrupt {
5353+ key: key.to_owned(),
5454+ reason: "reason not UTF-8",
5555+ })?
5656+ .to_owned();
5757+ let commit_cbor = rest[reason_len..].to_vec();
5858+ Ok(ResyncItem {
5959+ did,
6060+ retry_count,
6161+ retry_reason,
6262+ commit_cbor,
6363+ })
6464+}
6565+2066/// Enqueue a repo for resync at the given Unix timestamp (seconds).
2121-pub fn enqueue(_db: &DbRef, _ts: u64, _item: &ResyncItem) -> StorageResult<()> {
2222- todo!("serialize ResyncItem to CBOR and insert into resync partition")
6767+pub fn enqueue(db: &DbRef, ts: u64, item: &ResyncItem) -> StorageResult<()> {
6868+ let key = keys::resync_queue(ts, item.did.clone());
6969+ db.ks.insert(key, encode(item))?;
7070+ Ok(())
2371}
24722573/// Dequeue and return the next item whose timestamp is ≤ `now`.
2674///
2775/// Removes the entry from the queue atomically before returning it.
2828-pub fn dequeue_ready(_db: &DbRef, _now: u64) -> StorageResult<Option<ResyncItem>> {
2929- todo!("scan resync partition up to `now`, remove and return the first entry")
3030-}
3131-3232-/// Remove all queue entries for `did` (e.g., after a successful resync).
3333-pub fn remove_did(_db: &DbRef, _did: Did<'_>) -> StorageResult<()> {
3434- todo!("scan resync partition and remove all entries matching did")
7676+///
7777+/// TODO: no, this is not atomic currently
7878+///
7979+/// note: deleted accounts aren't removed from the resync queue so we need to
8080+/// check that (or does the caller deal with it?)
8181+///
8282+/// TODO: we actually want to pass in an optional cursor so we can efficiently
8383+/// skip over tombstones. we don't have to persist the cursor to disk, but the
8484+/// caller can hold it in memory over the app's lifetime so we only pay the tomb
8585+/// scan cost once on startup.
8686+pub fn dequeue_ready(db: &DbRef, now: u64) -> StorageResult<Option<ResyncItem>> {
8787+ let prefix = keys::resync_queue_prefix_all();
8888+ for guard in db.ks.prefix(&prefix) {
8989+ let (key_slice, val_slice) = guard.into_inner()?;
9090+ let key_bytes: &[u8] = key_slice.as_ref();
9191+ let Some((ts, did)) = keys::resync_queue_parse(key_bytes) else {
9292+ continue;
9393+ };
9494+ if ts > now {
9595+ break; // queue is ordered; no earlier entries remain
9696+ }
9797+ let key_str = String::from_utf8_lossy(key_bytes).into_owned();
9898+ let item = decode(val_slice.as_ref(), &key_str, did)?;
9999+ db.ks.remove(key_bytes)?;
100100+ return Ok(Some(item));
101101+ }
102102+ Ok(None)
35103}