···1414jacquard-repo = "0.9.6"
1515metrics = "0.24.3"
1616metrics-exporter-prometheus = { version = "0.18.1", features = ["http-listener"] }
1717+serde = { version = "1", features = ["derive"] }
1718thiserror = "2.0.18"
1819tokio = { version = "1.49.0", features = ["full"] }
+41
src/api/handler.rs
···11+//! XRPC handlers.
22+//!
33+//! Uses `jacquard-axum`'s `ExtractXrpc` extractor to deserialise query parameters
44+//! directly into the lexicon-generated request types.
55+66+use axum::{extract::State, http::StatusCode, Json};
77+use jacquard_api::com_atproto::sync::{
88+ get_repo_status::{GetRepoStatusOutput, GetRepoStatusRequest},
99+ list_repos_by_collection::{ListReposByCollectionOutput, ListReposByCollectionRequest},
1010+};
1111+use jacquard_axum::ExtractXrpc;
1212+1313+use crate::db::DbRef;
1414+1515+/// Handler for `GET /xrpc/com.atproto.sync.listReposByCollection`.
1616+///
1717+/// Performs a cursor-paginated prefix scan over the rbc keyspace via
1818+/// `db::index::scan_rbc`, returning up to `limit` (default 500, max 2000)
1919+/// DIDs that have at least one record in the requested collection.
2020+pub async fn list_repos_by_collection(
2121+ State(_db): State<DbRef>,
2222+ ExtractXrpc(_req): ExtractXrpc<ListReposByCollectionRequest>,
2323+) -> Result<Json<ListReposByCollectionOutput<'static>>, StatusCode> {
2424+ // req.collection — Nsid<'static>
2525+ // req.cursor — Option<CowStr<'static>>
2626+ // req.limit — Option<i64>
2727+ todo!("scan rbc index and return paginated repo list")
2828+}
2929+3030+/// Handler for `GET /xrpc/com.atproto.sync.getRepoStatus`.
3131+///
3232+/// Looks up the per-repo state from `db::repo::get` and returns whether the
3333+/// repo is active. Returns a `RepoNotFound` XRPC error (404) if the DID has
3434+/// never been indexed.
3535+pub async fn get_repo_status(
3636+ State(_db): State<DbRef>,
3737+ ExtractXrpc(_req): ExtractXrpc<GetRepoStatusRequest>,
3838+) -> Result<Json<GetRepoStatusOutput<'static>>, StatusCode> {
3939+ // req.did — Did<'static>
4040+ todo!("look up RepoRecord in db and return active/status")
4141+}
+32
src/api/mod.rs
···11+//! XRPC API server.
22+//!
33+//! Serves XRPC endpoints via axum routers built with `jacquard-axum`'s
44+//! `IntoRouter` helper.
55+66+mod handler;
77+88+use std::net::SocketAddr;
99+1010+use jacquard_api::com_atproto::sync::{
1111+ get_repo_status::GetRepoStatusRequest,
1212+ list_repos_by_collection::ListReposByCollectionRequest,
1313+};
1414+use jacquard_axum::IntoRouter;
1515+1616+use crate::db::DbRef;
1717+use crate::error::Result;
1818+1919+/// Build and serve the axum application on `addr`.
2020+///
2121+/// Routes:
2222+/// GET /xrpc/com.atproto.sync.listReposByCollection
2323+/// GET /xrpc/com.atproto.sync.getRepoStatus
2424+pub async fn serve(addr: SocketAddr, db: DbRef) -> Result<()> {
2525+ let app = ListReposByCollectionRequest::into_router(handler::list_repos_by_collection)
2626+ .merge(GetRepoStatusRequest::into_router(handler::get_repo_status))
2727+ .with_state(db);
2828+2929+ let listener = tokio::net::TcpListener::bind(addr).await?;
3030+ axum::serve(listener, app).await?;
3131+ Ok(())
3232+}
+17
src/backfill/list_repos.rs
···11+//! Walk `com.atproto.sync.listRepos` with cursor pagination.
22+//!
33+//! For each DID encountered, either enqueues it for probing (large repos) or
44+//! dispatches it to the small-repo fast path.
55+66+use crate::db::DbRef;
77+use crate::error::Result;
88+99+/// Walk the full `listRepos` feed for `host`, persisting progress after each
1010+/// page so it can be resumed on restart.
1111+///
1212+/// Uses `jacquard-api`'s `com_atproto::sync::list_repos` XRPC call with cursor
1313+/// pagination. Per-page progress is written via `db::cursor::set_list_repos_progress`.
1414+pub async fn run(host: &str, db: DbRef) -> Result<()> {
1515+ let _ = (host, db);
1616+ todo!("paginate listRepos, probe each DID, persist cursor after each page")
1717+}
+23
src/backfill/mod.rs
···11+//! Backfill subsystem.
22+//!
33+//! Walks `com.atproto.sync.listRepos` and probes each repository to populate
44+//! the rbc/cbr index before or alongside the live firehose feed.
55+//!
66+//! Large repos are enumerated via binary-search `getRecord` probing (`probe`).
77+//! Small repos take the fast path of fetching the full repo CAR (`small_repo`).
88+99+pub mod list_repos;
1010+pub mod probe;
1111+pub mod small_repo;
1212+1313+use crate::db::DbRef;
1414+use crate::error::Result;
1515+1616+/// Run the backfill subsystem for `host`.
1717+///
1818+/// Resumes from the last-saved listRepos cursor if one exists, then pages
1919+/// through all repos and probes each one. Runs indefinitely until an error
2020+/// occurs (fatal errors) or until the full backfill completes.
2121+pub async fn run(host: String, db: DbRef) -> Result<()> {
2222+ list_repos::run(&host, db).await
2323+}
+21
src/backfill/probe.rs
···11+//! Binary-search `getRecord` probing for large-repo backfill.
22+//!
33+//! Since every ATProto collection has a known minimum and maximum possible rkey,
44+//! `getRecord` returns adjacent keys even when the requested key does not exist.
55+//! This lets us binary-search the MST to enumerate all collections without
66+//! fetching the full repo CAR.
77+88+use crate::db::DbRef;
99+use crate::error::Result;
1010+1111+/// Probe `did` to enumerate its collections via `getRecord` binary search.
1212+///
1313+/// 1. Issue a `getRecord` for the midpoint of the NSID key space.
1414+/// 2. Feed the returned CAR slice to `mst::adjacent::extract_adjacent`.
1515+/// 3. Use adjacent keys to narrow the search and recurse until all collection
1616+/// boundaries are discovered.
1717+/// 4. Write results to the rbc/cbr index via `db::index::insert`.
1818+pub async fn probe_repo(host: &str, did: &str, db: DbRef) -> Result<()> {
1919+ let _ = (host, did, db);
2020+ todo!("binary-search getRecord probing to enumerate collections")
2121+}
+20
src/backfill/small_repo.rs
···11+//! Fast path for small repositories: fetch the full repo CAR and extract all
22+//! collections in a single pass.
33+//!
44+//! Small repos are detected by inspecting MST node levels from an initial
55+//! `getRecord` probe — a high-level node present in a partial CAR slice means
66+//! the repo is large; absence of high-level nodes suggests a small repo.
77+88+use crate::db::DbRef;
99+use crate::error::Result;
1010+1111+/// Fetch the entire repo CAR for `did` via `com.atproto.sync.getRepo` and
1212+/// extract all collection NSIDs in one pass.
1313+///
1414+/// Streams the CAR response (via `iroh-car` or `jacquard-repo`'s CAR reader),
1515+/// walks every MST leaf to collect all record keys, groups them by collection,
1616+/// and writes the result to the rbc/cbr index.
1717+pub async fn index_small_repo(host: &str, did: &str, db: DbRef) -> Result<()> {
1818+ let _ = (host, did, db);
1919+ todo!("stream getRepo CAR, parse all MST leaves, index collections")
2020+}
+46
src/db/cursor.rs
···11+//! Firehose cursor and listRepos backfill progress tracking.
22+33+use crate::db::{keys, DbRef};
44+use crate::error::Result;
55+66+/// Read the last-saved firehose cursor for `host`.
77+///
88+/// Returns `None` if no cursor has been persisted yet (start from live).
99+pub fn get_subscribe_cursor(db: &DbRef, host: &str) -> Result<Option<u64>> {
1010+ let key = keys::subscribe_cursor(host);
1111+ match db.cursors.get(key)? {
1212+ None => Ok(None),
1313+ Some(v) => {
1414+ let bytes: [u8; 8] = v.as_ref().try_into().map_err(|_| {
1515+ crate::error::Error::Other("corrupt cursor value".into())
1616+ })?;
1717+ Ok(Some(u64::from_be_bytes(bytes)))
1818+ }
1919+ }
2020+}
2121+2222+/// Persist the firehose cursor for `host`.
2323+pub fn set_subscribe_cursor(db: &DbRef, host: &str, cursor: u64) -> Result<()> {
2424+ let key = keys::subscribe_cursor(host);
2525+ db.cursors.insert(key, cursor.to_be_bytes())?;
2626+ Ok(())
2727+}
2828+2929+/// Progress record for the listRepos backfill walk.
3030+#[derive(Debug, Clone)]
3131+pub struct ListReposProgress {
3232+ /// The current pagination cursor (empty string = start from beginning).
3333+ pub cursor: String,
3434+ /// Set when the full walk has completed at least once.
3535+ pub completed_at: Option<String>,
3636+}
3737+3838+/// Read the listRepos backfill progress for `host`.
3939+pub fn get_list_repos_progress(_db: &DbRef, _host: &str) -> Result<Option<ListReposProgress>> {
4040+ todo!("deserialize ListReposProgress")
4141+}
4242+4343+/// Write the listRepos backfill progress for `host`.
4444+pub fn set_list_repos_progress(_db: &DbRef, _host: &str, _progress: &ListReposProgress) -> Result<()> {
4545+ todo!("serialize and write ListReposProgress")
4646+}
+86
src/db/index.rs
···11+//! rbc / cbr index operations.
22+33+use crate::db::{keys, DbRef};
44+use crate::error::Result;
55+66+/// Insert a `(collection, did)` pair into both the rbc and cbr indexes.
77+pub fn insert(db: &DbRef, did: &str, collection: &str) -> Result<()> {
88+ let mut batch = db.database.batch();
99+ batch.insert(&db.rbc, keys::rbc(collection, did), b"");
1010+ batch.insert(&db.cbr, keys::cbr(did, collection), b"");
1111+ batch.commit()?;
1212+ Ok(())
1313+}
1414+1515+/// Remove a `(collection, did)` pair from both indexes.
1616+pub fn remove(db: &DbRef, did: &str, collection: &str) -> Result<()> {
1717+ let mut batch = db.database.batch();
1818+ batch.remove(&db.rbc, keys::rbc(collection, did));
1919+ batch.remove(&db.cbr, keys::cbr(did, collection));
2020+ batch.commit()?;
2121+ Ok(())
2222+}
2323+2424+/// Remove all index entries for `did` (account deletion / full resync).
2525+///
2626+/// Reads all collections from the cbr index, then deletes both sides in a batch.
2727+pub fn remove_all(db: &DbRef, did: &str) -> Result<()> {
2828+ let prefix = keys::cbr_prefix(did);
2929+ // fjall::Iter yields Guard; call into_inner() to get the KvPair.
3030+ let collections: Vec<Vec<u8>> = db
3131+ .cbr
3232+ .prefix(prefix)
3333+ .map(|guard| guard.into_inner().map(|(k, _v)| k.to_vec()))
3434+ .collect::<fjall::Result<_>>()?;
3535+3636+ if collections.is_empty() {
3737+ return Ok(());
3838+ }
3939+4040+ let mut batch = db.database.batch();
4141+ let prefix_len = keys::cbr_prefix(did).len();
4242+ for cbr_key in &collections {
4343+ // Extract the collection suffix from the cbr key.
4444+ // Key layout: "cbr"\0<did>\0<collection>
4545+ let col_bytes: &[u8] = &cbr_key[prefix_len..];
4646+ if let Ok(collection) = std::str::from_utf8(col_bytes) {
4747+ batch.remove(&db.rbc, keys::rbc(collection, did));
4848+ }
4949+ batch.remove(&db.cbr, cbr_key.as_slice());
5050+ }
5151+ batch.commit()?;
5252+ Ok(())
5353+}
5454+5555+/// Iterate over DIDs in the rbc index for `collection`, starting after `cursor`.
5656+///
5757+/// Returns at most `limit` DID strings.
5858+pub fn scan_rbc(db: &DbRef, collection: &str, cursor: Option<&str>, limit: usize) -> Result<Vec<String>> {
5959+ let prefix = keys::rbc_prefix(collection);
6060+ let prefix_len = prefix.len();
6161+6262+ let start_key: Vec<u8> = match cursor {
6363+ None => prefix.clone(),
6464+ Some(did) => keys::rbc(collection, did),
6565+ };
6666+6767+ let mut dids = Vec::with_capacity(limit);
6868+ for guard in db.rbc.range(start_key..) {
6969+ let (k, _v) = guard.into_inner()?;
7070+ if !k.starts_with(&prefix) {
7171+ break;
7272+ }
7373+ if dids.len() >= limit {
7474+ break;
7575+ }
7676+ // Skip the cursor key itself.
7777+ if cursor.is_some_and(|c| k.as_ref() == keys::rbc(collection, c).as_slice()) {
7878+ continue;
7979+ }
8080+ let col_bytes: &[u8] = &k[prefix_len..];
8181+ if let Ok(did) = std::str::from_utf8(col_bytes) {
8282+ dids.push(did.to_owned());
8383+ }
8484+ }
8585+ Ok(dids)
8686+}
···11+pub mod cursor;
22+pub mod index;
33+pub mod keys;
44+pub mod repo;
55+pub mod resync;
66+77+use std::path::Path;
88+use std::sync::Arc;
99+1010+use crate::error::Result;
1111+1212+/// Shared handle to the fjall database and its per-concern keyspaces.
1313+///
1414+/// In fjall 3.x, `Database` is the top-level multi-keyspace container and
1515+/// `Keyspace` is an individual column-family (the old `PartitionHandle`).
1616+pub struct Db {
1717+ pub(crate) database: fjall::Database,
1818+ /// Main collection index: `rbc\0<collection>\0<did>`.
1919+ pub(crate) rbc: fjall::Keyspace,
2020+ /// Reversed index: `cbr\0<did>\0<collection>`.
2121+ pub(crate) cbr: fjall::Keyspace,
2222+ /// Per-repo state + metadata.
2323+ pub(crate) repos: fjall::Keyspace,
2424+ /// Firehose cursor + listRepos progress.
2525+ pub(crate) cursors: fjall::Keyspace,
2626+ /// Timestamp-ordered resync queue.
2727+ pub(crate) resync: fjall::Keyspace,
2828+}
2929+3030+/// Cheaply-cloneable reference to the shared database.
3131+pub type DbRef = Arc<Db>;
3232+3333+/// Open (or create) the fjall database at `path` and return a shared handle.
3434+pub fn open(path: &Path) -> Result<DbRef> {
3535+ let database = fjall::Database::builder(path).open()?;
3636+3737+ let rbc = database.keyspace("rbc", fjall::KeyspaceCreateOptions::default)?;
3838+ let cbr = database.keyspace("cbr", fjall::KeyspaceCreateOptions::default)?;
3939+ let repos = database.keyspace("repos", fjall::KeyspaceCreateOptions::default)?;
4040+ let cursors = database.keyspace("cursors", fjall::KeyspaceCreateOptions::default)?;
4141+ let resync = database.keyspace("resync", fjall::KeyspaceCreateOptions::default)?;
4242+4343+ Ok(Arc::new(Db {
4444+ database,
4545+ rbc,
4646+ cbr,
4747+ repos,
4848+ cursors,
4949+ resync,
5050+ }))
5151+}
+72
src/db/repo.rs
···11+//! Per-repo state storage.
22+33+use crate::db::{keys, DbRef};
44+use crate::error::Result;
55+66+/// High-level lifecycle state of a repo.
77+#[derive(Debug, Clone, PartialEq, Eq)]
88+pub enum RepoState {
99+ /// Repo is being indexed for the first time.
1010+ Indexing,
1111+ /// Repo is fully indexed and up to date.
1212+ Active,
1313+ /// Repo has been tombstoned / deactivated.
1414+ Tombstoned,
1515+}
1616+1717+/// Processing status used to drive backfill / resync decisions.
1818+#[derive(Debug, Clone, PartialEq, Eq)]
1919+pub enum RepoStatus {
2020+ /// No record yet.
2121+ Unknown,
2222+ /// Queued for backfill probing.
2323+ Pending,
2424+ /// Backfill complete.
2525+ Complete,
2626+ /// An error occurred; `error` field has details.
2727+ Error,
2828+}
2929+3030+/// Stored record for a repository.
3131+#[derive(Debug, Clone)]
3232+pub struct RepoRecord {
3333+ pub state: RepoState,
3434+ pub status: RepoStatus,
3535+ pub error: Option<String>,
3636+}
3737+3838+/// Read the [`RepoRecord`] for `did`, returning `None` if no record exists.
3939+pub fn get(_db: &DbRef, _did: &str) -> Result<Option<RepoRecord>> {
4040+ todo!("deserialize RepoRecord from fjall value")
4141+}
4242+4343+/// Write a [`RepoRecord`] for `did`.
4444+pub fn put(_db: &DbRef, _did: &str, _record: &RepoRecord) -> Result<()> {
4545+ todo!("serialize RepoRecord and write to fjall")
4646+}
4747+4848+/// Transient sync state for proof validation.
4949+#[derive(Debug, Clone)]
5050+pub struct RepoPrev {
5151+ /// The last-seen `rev` string.
5252+ pub rev: String,
5353+ /// The last-seen `prevData` CID (as raw bytes).
5454+ pub prev_data: Vec<u8>,
5555+}
5656+5757+/// Read the transient [`RepoPrev`] for `did`.
5858+pub fn get_prev(_db: &DbRef, _did: &str) -> Result<Option<RepoPrev>> {
5959+ todo!("deserialize RepoPrev")
6060+}
6161+6262+/// Write the transient [`RepoPrev`] for `did`.
6363+pub fn put_prev(_db: &DbRef, _did: &str, _prev: &RepoPrev) -> Result<()> {
6464+ todo!("serialize and write RepoPrev")
6565+}
6666+6767+/// Delete the transient [`RepoPrev`] for `did`.
6868+pub fn delete_prev(db: &DbRef, did: &str) -> Result<()> {
6969+ let key = keys::repo_prev(did);
7070+ db.repos.remove(key)?;
7171+ Ok(())
7272+}
+34
src/db/resync.rs
···11+//! Timestamp-ordered resync queue.
22+//!
33+//! Keys: `"repoResyncQueue"\0<ts_be:u64>\0<did>`
44+//! Values: CBOR payload with the triggering commit, retry count, and retry reason.
55+66+use crate::db::DbRef;
77+use crate::error::Result;
88+99+/// An item waiting in the resync queue.
1010+#[derive(Debug, Clone)]
1111+pub struct ResyncItem {
1212+ pub did: String,
1313+ pub retry_count: u16,
1414+ pub retry_reason: String,
1515+ /// Raw CBOR of the triggering firehose commit.
1616+ pub commit_cbor: Vec<u8>,
1717+}
1818+1919+/// Enqueue a repo for resync at the given Unix timestamp (seconds).
2020+pub fn enqueue(_db: &DbRef, _ts: u64, _item: &ResyncItem) -> Result<()> {
2121+ todo!("serialize ResyncItem to CBOR and insert into resync partition")
2222+}
2323+2424+/// Dequeue and return the next item whose timestamp is ≤ `now`.
2525+///
2626+/// Removes the entry from the queue atomically before returning it.
2727+pub fn dequeue_ready(_db: &DbRef, _now: u64) -> Result<Option<ResyncItem>> {
2828+ todo!("scan resync partition up to `now`, remove and return the first entry")
2929+}
3030+3131+/// Remove all queue entries for `did` (e.g., after a successful resync).
3232+pub fn remove_did(_db: &DbRef, _did: &str) -> Result<()> {
3333+ todo!("scan resync partition and remove all entries matching did")
3434+}
···11+//! Firehose subsystem.
22+//!
33+//! Connects to an ATProto relay, validates incoming commits via sync1.1 inductive
44+//! proofs, and updates the rbc/cbr index on collection additions/removals.
55+66+mod subscriber;
77+88+pub use subscriber::Subscriber;
99+1010+use crate::db::DbRef;
1111+use crate::error::Result;
1212+1313+/// Spawn the firehose subscriber task for `host` and run until it returns.
1414+///
1515+/// This is the top-level entry point called from `main`. The subscriber handles
1616+/// reconnection internally, so this future only resolves on a fatal error.
1717+pub async fn run(host: String, db: DbRef) -> Result<()> {
1818+ let mut sub = Subscriber::new(host, db);
1919+ sub.run().await
2020+}
+51
src/firehose/subscriber.rs
···11+//! Firehose WebSocket subscriber.
22+//!
33+//! Connects to an ATProto relay using `jacquard-common`'s `SubscriptionExt` +
44+//! `TungsteniteClient`, persists/restores the sequence cursor via `db::cursor`,
55+//! and dispatches decoded events to the appropriate handlers.
66+77+use crate::db::DbRef;
88+use crate::error::Result;
99+1010+/// Manages a single WebSocket connection to a relay firehose.
1111+pub struct Subscriber {
1212+ host: String,
1313+ db: DbRef,
1414+}
1515+1616+impl Subscriber {
1717+ pub fn new(host: String, db: DbRef) -> Self {
1818+ Self { host, db }
1919+ }
2020+2121+ /// Connect and run the subscriber loop, reconnecting on disconnect.
2222+ ///
2323+ /// Restores the last cursor from `db::cursor::get_subscribe_cursor` before
2424+ /// connecting, and persists it after each successfully processed event.
2525+ pub async fn run(&mut self) -> Result<()> {
2626+ todo!(
2727+ "connect via jacquard_common::TungsteniteClient + SubscriptionExt; \
2828+ decode events with decode_cbor_msg; dispatch #commit / #sync / #account"
2929+ )
3030+ }
3131+3232+ /// Process a single `#commit` firehose event.
3333+ ///
3434+ /// Validates the inductive proof via `jacquard_repo::commit::proof::verify_proofs`,
3535+ /// then diffs the MST to determine collection additions/removals.
3636+ async fn handle_commit(&self, _event_bytes: &[u8]) -> Result<()> {
3737+ todo!("validate sync1.1 proof and update rbc/cbr index")
3838+ }
3939+4040+ /// Process a single `#sync` firehose event.
4141+ ///
4242+ /// Probes the repo to diff against recorded collections.
4343+ async fn handle_sync(&self, _event_bytes: &[u8]) -> Result<()> {
4444+ todo!("probe repo to detect collection changes after #sync")
4545+ }
4646+4747+ /// Process an `#account` (tombstone / reactivation) event.
4848+ async fn handle_account(&self, _event_bytes: &[u8]) -> Result<()> {
4949+ todo!("update RepoState and remove index entries if tombstoned")
5050+ }
5151+}
+41-2
src/main.rs
···11-fn main() {
22- println!("Hello, world!");
11+mod api;
22+mod backfill;
33+mod db;
44+mod error;
55+mod firehose;
66+mod mst;
77+88+use clap::Parser;
99+use std::net::SocketAddr;
1010+use std::path::PathBuf;
1111+1212+use error::Result;
1313+1414+#[derive(Parser, Debug)]
1515+#[command(name = "lightrail", about = "listReposByCollection indexing service")]
1616+struct Args {
1717+ /// ATProto relay or PDS host to subscribe to.
1818+ #[arg(long, env = "LIGHTRAIL_SUBSCRIBE")]
1919+ subscribe: String,
2020+2121+ /// Path to the fjall database directory.
2222+ #[arg(long, env = "LIGHTRAIL_DB_PATH", default_value = "lightrail.db")]
2323+ db_path: PathBuf,
2424+2525+ /// TCP address for the XRPC API server.
2626+ #[arg(long, env = "LIGHTRAIL_LISTEN", default_value = "0.0.0.0:3000")]
2727+ listen: SocketAddr,
2828+}
2929+3030+#[tokio::main]
3131+async fn main() -> Result<()> {
3232+ let args = Args::parse();
3333+ let db = db::open(&args.db_path)?;
3434+3535+ tokio::select! {
3636+ result = firehose::run(args.subscribe.clone(), db.clone()) => result?,
3737+ result = backfill::run(args.subscribe, db.clone()) => result?,
3838+ result = api::serve(args.listen, db) => result?,
3939+ }
4040+4141+ Ok(())
342}
+33
src/mst/adjacent.rs
···11+//! Extract adjacent MST keys from a CAR slice.
22+//!
33+//! Given the raw blocks included in a `getRecord` or firehose commit CAR slice,
44+//! this module uses `jacquard-repo` primitives to find neighbouring record keys
55+//! and detect collection boundaries.
66+77+/// The adjacent keys returned by the MST for a given probe key.
88+#[derive(Debug, Clone)]
99+pub struct AdjacentKeys {
1010+ /// The key immediately before the probe key in the MST, if any.
1111+ pub prev: Option<String>,
1212+ /// The key immediately after the probe key in the MST, if any.
1313+ pub next: Option<String>,
1414+}
1515+1616+/// Extract adjacent keys for `probe_key` from the given CAR block bytes.
1717+///
1818+/// Loads the blocks into an in-memory block store and uses `MstCursor` from
1919+/// `jacquard-repo` to walk the MST and find neighbours.
2020+pub fn extract_adjacent(_car_bytes: &[u8], _probe_key: &str) -> crate::error::Result<AdjacentKeys> {
2121+ todo!("load CAR blocks into MemoryBlockStore, use MstCursor to find adjacent keys")
2222+}
2323+2424+/// Estimate whether this is a small repo by inspecting the MST level of the
2525+/// highest-level node present in `car_bytes`.
2626+///
2727+/// Uses `jacquard_repo::mst::util::layer_for_key()` on the highest-level key
2828+/// found in the partial MST slice. Small repos are statistically unlikely to
2929+/// contain high-level keys, since every CAR slice must include all maximum-level
3030+/// keys of the repository.
3131+pub fn is_small_repo(_car_bytes: &[u8], _threshold_level: u8) -> crate::error::Result<bool> {
3232+ todo!("parse MST nodes from CAR blocks and check maximum key level")
3333+}
+8
src/mst/mod.rs
···11+//! MST (Merkle Search Tree) utilities.
22+//!
33+//! Thin wrappers around `jacquard-repo` primitives for:
44+//! - extracting adjacent keys from CAR slices
55+//! - detecting collection boundaries
66+//! - small-repo heuristic via MST level inspection
77+88+pub mod adjacent;