very fast at protocol indexer with flexible filtering, xrpc queries, cursor-backed event stream, and more, built on fjall
rust fjall at-protocol atproto indexer
58
fork

Configure Feed

Select the types of activity you want to include in your feed.

[all] re-organize types and feature gates

dawn 9af943f2 c9e93d07

+1187 -1001
+1 -1
src/api/crawler.rs
··· 8 8 use url::Url; 9 9 10 10 use crate::config::{CrawlerMode, CrawlerSource}; 11 - use crate::control::{CrawlerSourceInfo, Hydrant}; 11 + use crate::control::{Hydrant, crawler::CrawlerSourceInfo}; 12 12 13 13 pub fn router() -> Router<Hydrant> { 14 14 Router::new()
+18 -5
src/api/debug.rs
··· 8 8 http::StatusCode, 9 9 }; 10 10 use jacquard_common::types::cid::Cid; 11 + #[cfg(feature = "indexer")] 11 12 use jacquard_common::types::ident::AtIdentifier; 12 13 use serde::{Deserialize, Serialize}; 13 14 use serde_json::Value; 14 15 use std::str::FromStr; 15 16 use std::sync::Arc; 16 17 18 + #[cfg(feature = "indexer")] 17 19 #[derive(Deserialize)] 18 20 pub struct DebugCountRequest { 19 21 pub did: String, 20 22 pub collection: String, 21 23 } 22 24 25 + #[cfg(feature = "indexer")] 23 26 #[derive(Serialize)] 24 27 pub struct DebugCountResponse { 25 28 pub count: usize, 26 29 } 27 30 28 31 pub fn router() -> axum::Router<Arc<AppState>> { 29 - axum::Router::new() 30 - .route("/debug/count", get(handle_debug_count)) 32 + let r = axum::Router::new() 31 33 .route("/debug/get", get(handle_debug_get)) 32 34 .route("/debug/iter", get(handle_debug_iter)) 33 35 .route("/debug/compact", post(handle_debug_compact)) ··· 35 37 "/debug/ephemeral_ttl_tick", 36 38 post(handle_debug_ephemeral_ttl_tick), 37 39 ) 38 - .route("/debug/seed_watermark", post(handle_debug_seed_watermark)) 40 + .route("/debug/seed_watermark", post(handle_debug_seed_watermark)); 41 + 42 + #[cfg(feature = "indexer")] 43 + let r = r.route("/debug/count", get(handle_debug_count)); 44 + 45 + r 39 46 } 40 47 48 + #[cfg(feature = "indexer")] 41 49 pub async fn handle_debug_count( 42 50 State(state): State<Arc<AppState>>, 43 51 Query(req): Query<DebugCountRequest>, ··· 263 271 fn get_keyspace_by_name(db: &crate::db::Db, name: &str) -> Result<fjall::Keyspace, StatusCode> { 264 272 match name { 265 273 "repos" => Ok(db.repos.clone()), 274 + "counts" => Ok(db.counts.clone()), 275 + "cursors" => Ok(db.cursors.clone()), 276 + #[cfg(feature = "indexer")] 266 277 "blocks" => Ok(db.blocks.clone()), 267 - "cursors" => Ok(db.cursors.clone()), 278 + #[cfg(feature = "indexer")] 268 279 "pending" => Ok(db.pending.clone()), 280 + #[cfg(feature = "indexer")] 269 281 "resync" => Ok(db.resync.clone()), 282 + #[cfg(feature = "indexer")] 270 283 "events" => Ok(db.events.clone()), 271 - "counts" => Ok(db.counts.clone()), 284 + #[cfg(feature = "indexer")] 272 285 "records" => Ok(db.records.clone()), 273 286 _ => Err(StatusCode::BAD_REQUEST), 274 287 }
+8
src/api/ingestion.rs
··· 15 15 16 16 #[derive(Serialize)] 17 17 pub struct IngestionStatus { 18 + #[cfg(feature = "indexer")] 18 19 pub crawler: bool, 19 20 pub firehose: bool, 21 + #[cfg(feature = "indexer")] 20 22 pub backfill: bool, 21 23 } 22 24 23 25 pub async fn get_ingestion(State(hydrant): State<Hydrant>) -> Json<IngestionStatus> { 24 26 Json(IngestionStatus { 27 + #[cfg(feature = "indexer")] 25 28 crawler: hydrant.crawler.is_enabled(), 26 29 firehose: hydrant.firehose.is_enabled(), 30 + #[cfg(feature = "indexer")] 27 31 backfill: hydrant.backfill.is_enabled(), 28 32 }) 29 33 } 30 34 31 35 #[derive(Deserialize)] 32 36 pub struct IngestionPatch { 37 + #[cfg(feature = "indexer")] 33 38 #[serde(default)] 34 39 pub crawler: Option<bool>, 35 40 #[serde(default)] 36 41 pub firehose: Option<bool>, 42 + #[cfg(feature = "indexer")] 37 43 #[serde(default)] 38 44 pub backfill: Option<bool>, 39 45 } ··· 42 48 State(hydrant): State<Hydrant>, 43 49 Json(body): Json<IngestionPatch>, 44 50 ) -> StatusCode { 51 + #[cfg(feature = "indexer")] 45 52 if let Some(crawler) = body.crawler { 46 53 if crawler { 47 54 hydrant.crawler.enable(); ··· 56 63 hydrant.firehose.disable(); 57 64 } 58 65 } 66 + #[cfg(feature = "indexer")] 59 67 if let Some(backfill) = body.backfill { 60 68 if backfill { 61 69 hydrant.backfill.enable();
+8 -2
src/api/mod.rs
··· 5 5 use tower_http::cors::CorsLayer; 6 6 use tower_http::trace::TraceLayer; 7 7 8 + #[cfg(feature = "indexer")] 8 9 mod crawler; 9 10 mod db; 10 11 mod debug; ··· 25 26 .route("/stats", get(stats::get_stats)); 26 27 #[cfg(feature = "indexer")] 27 28 let app = app.nest("/stream", stream::router()); 28 - let app = app 29 + #[allow(unused_mut)] 30 + let mut app = app 29 31 .merge(xrpc::router()) 30 32 .merge(filter::router()) 31 33 .merge(pds::router()) 32 34 .merge(repos::router()) 33 35 .merge(ingestion::router()) 34 - .merge(crawler::router()) 35 36 .merge(firehose::router()) 36 37 .merge(db::router()); 38 + 39 + #[cfg(feature = "indexer")] 40 + { 41 + app = app.merge(crawler::router()); 42 + } 37 43 38 44 #[cfg(feature = "backlinks")] 39 45 let app = app.merge(crate::backlinks::api::router());
+18 -4
src/api/repos.rs
··· 1 1 use std::str::FromStr; 2 2 3 3 use crate::control::{Hydrant, RepoInfo}; 4 + #[cfg(feature = "indexer")] 5 + use axum::routing::{delete, post, put}; 4 6 use axum::{ 5 7 Json, Router, 6 8 body::Body, 7 9 extract::{Path, Query, State}, 8 10 http::{HeaderMap, StatusCode, header}, 9 11 response::{IntoResponse, Response}, 10 - routing::{delete, get, post, put}, 12 + routing::get, 11 13 }; 12 14 use jacquard_common::types::did::Did; 13 15 use miette::IntoDiagnostic; 14 16 use serde::Deserialize; 15 17 16 18 pub fn router() -> Router<Hydrant> { 17 - Router::new() 19 + #[allow(unused_mut)] 20 + let r = Router::new() 18 21 .route("/repos", get(handle_get_repos)) 22 + .route("/repos/{did}", get(handle_get_repo)); 23 + 24 + #[cfg(feature = "indexer")] 25 + let r = r 19 26 .route("/repos/resync", post(handle_post_resync)) 20 - .route("/repos/{did}", get(handle_get_repo)) 21 27 .route("/repos", put(handle_put_repos)) 22 - .route("/repos", delete(handle_delete_repos)) 28 + .route("/repos", delete(handle_delete_repos)); 29 + 30 + r 23 31 } 24 32 33 + #[cfg(feature = "indexer")] 25 34 #[derive(Deserialize, Debug)] 26 35 pub struct RepoRequest { 27 36 pub did: String, ··· 90 99 .ok_or_else(|| (StatusCode::NOT_FOUND, "repository not found".to_string())) 91 100 } 92 101 102 + #[cfg(feature = "indexer")] 93 103 pub async fn handle_put_repos( 94 104 State(hydrant): State<Hydrant>, 95 105 headers: HeaderMap, ··· 111 121 Ok(did_list_response(queued, &headers)) 112 122 } 113 123 124 + #[cfg(feature = "indexer")] 114 125 pub async fn handle_delete_repos( 115 126 State(hydrant): State<Hydrant>, 116 127 headers: HeaderMap, ··· 132 143 Ok(did_list_response(untracked, &headers)) 133 144 } 134 145 146 + #[cfg(feature = "indexer")] 135 147 pub async fn handle_post_resync( 136 148 State(hydrant): State<Hydrant>, 137 149 headers: HeaderMap, ··· 159 171 contains_json(header::ACCEPT) || contains_json(header::CONTENT_TYPE) 160 172 } 161 173 174 + #[cfg(feature = "indexer")] 162 175 fn did_list_response(dids: Vec<Did<'static>>, headers: &HeaderMap) -> Response { 163 176 if prefers_json(headers) { 164 177 let body: Vec<String> = dids.into_iter().map(|d| d.to_string()).collect(); ··· 173 186 } 174 187 } 175 188 189 + #[cfg(feature = "indexer")] 176 190 async fn parse_body( 177 191 body: Body, 178 192 headers: &HeaderMap,
+9 -12
src/api/xrpc/mod.rs
··· 2 2 use axum::extract::FromRequest; 3 3 use axum::response::IntoResponse; 4 4 use axum::routing::get; 5 + #[cfg(feature = "relay")] 6 + use axum::routing::post; 5 7 use axum::{Json, Router, extract::State, http::StatusCode}; 6 8 use jacquard_api::com_atproto::sync::get_host_status::GetHostStatusRequest; 7 9 use jacquard_api::com_atproto::sync::get_latest_commit::GetLatestCommitRequest; 8 10 use jacquard_api::com_atproto::sync::get_repo_status::GetRepoStatusRequest; 9 11 use jacquard_api::com_atproto::sync::list_hosts::ListHostsRequest; 10 12 use jacquard_api::com_atproto::sync::list_repos::ListReposRequest; 13 + #[cfg(feature = "indexer")] 11 14 use jacquard_common::types::ident::AtIdentifier; 15 + #[cfg(feature = "indexer")] 16 + use jacquard_common::types::string::AtUri; 12 17 use jacquard_common::xrpc::XrpcResp; 18 + use jacquard_common::xrpc::{GenericXrpcError, XrpcError}; 13 19 use jacquard_common::xrpc::{XrpcEndpoint, XrpcMethod}; 14 20 use jacquard_common::{IntoStatic, xrpc::XrpcRequest}; 15 - use jacquard_common::{ 16 - types::string::AtUri, 17 - xrpc::{GenericXrpcError, XrpcError}, 18 - }; 19 21 use serde::{Deserialize, Serialize}; 20 22 use smol_str::ToSmolStr; 21 23 use std::fmt::Display; ··· 84 86 85 87 #[cfg(feature = "relay")] 86 88 let r = r 87 - .route( 88 - SubscribeReposEndpoint::PATH, 89 - axum::routing::get(subscribe_repos::handle), 90 - ) 91 - .route( 92 - RequestCrawlRequest::PATH, 93 - axum::routing::get(subscribe_repos::handle), 94 - ); 89 + .route(SubscribeReposEndpoint::PATH, get(subscribe_repos::handle)) 90 + .route(RequestCrawlRequest::PATH, post(request_crawl::handle)); 95 91 96 92 r 97 93 } ··· 177 173 } 178 174 } 179 175 176 + #[cfg(feature = "indexer")] 180 177 fn upstream_error<E: std::error::Error + IntoStatic>( 181 178 nsid: &'static str, 182 179 message: impl Display,
+5 -6
src/backfill/manager.rs
··· 43 43 continue; 44 44 } 45 45 }; 46 - let mut metadata = crate::db::deser_repo_metadata(&metadata_bytes)?; 46 + let mut metadata = crate::db::deser_repo_meta(&metadata_bytes)?; 47 47 48 48 // move from resync back into pending 49 49 batch.remove(&state.db.resync, key.clone()); ··· 58 58 batch.insert( 59 59 &state.db.repo_metadata, 60 60 &metadata_key, 61 - crate::db::ser_repo_metadata(&metadata)?, 61 + crate::db::ser_repo_meta(&metadata)?, 62 62 ); 63 63 64 64 transitions.push((GaugeState::Resync(None), GaugeState::Pending)); ··· 133 133 continue; 134 134 } 135 135 }; 136 - let mut metadata = match crate::db::deser_repo_metadata( 137 - metadata_bytes.as_ref(), 138 - ) { 136 + let mut metadata = match crate::db::deser_repo_meta(metadata_bytes.as_ref()) 137 + { 139 138 Ok(m) => m, 140 139 Err(e) => { 141 140 error!(did = %did, err = %e, "failed to deserialize repo metadata"); ··· 153 152 keys::pending_key(metadata.index_id), 154 153 key.clone(), 155 154 ); 156 - let serialized_metadata = match crate::db::ser_repo_metadata(&metadata) { 155 + let serialized_metadata = match crate::db::ser_repo_meta(&metadata) { 157 156 Ok(s) => s, 158 157 Err(e) => { 159 158 error!(did = %did, err = %e, "failed to serialize repo metadata");
+3 -3
src/backfill/mod.rs
··· 742 742 .get(&metadata_key) 743 743 .into_diagnostic()? 744 744 .ok_or_else(|| miette::miette!("repo metadata not found for {}", did))?; 745 - let mut metadata = crate::db::deser_repo_metadata(&metadata_bytes)?; 745 + let mut metadata = crate::db::deser_repo_meta(&metadata_bytes)?; 746 746 metadata.tracked = true; 747 747 batch.insert( 748 748 &app_state.db.repo_metadata, 749 749 &metadata_key, 750 - crate::db::ser_repo_metadata(&metadata)?, 750 + crate::db::ser_repo_meta(&metadata)?, 751 751 ); 752 752 753 753 // add the counts ··· 771 771 .get(&metadata_key) 772 772 .into_diagnostic()? 773 773 .ok_or_else(|| miette::miette!("repo metadata not found for {}", did))?; 774 - let metadata = crate::db::deser_repo_metadata(metadata_bytes.as_ref())?; 774 + let metadata = crate::db::deser_repo_meta(metadata_bytes.as_ref())?; 775 775 776 776 let Some((_state, records_cnt_delta, added_blocks, count)) = result else { 777 777 // signal mode: no signal-matching records found, clean up the optimistically-added repo
+75
src/control/indexer.rs
··· 1 + use super::*; 2 + 3 + /// a stream of [`Event`]s. returned by [`Hydrant::subscribe`]. 4 + /// 5 + /// implements [`futures::Stream`] and can be used with `StreamExt::next`, 6 + /// `while let Some(evt) = stream.next().await`, `forward`, etc. 7 + /// the stream terminates when the underlying channel closes (i.e. hydrant shuts down). 8 + pub struct EventStream(mpsc::Receiver<Event>); 9 + 10 + impl Stream for EventStream { 11 + type Item = Event; 12 + 13 + fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> { 14 + self.0.poll_recv(cx) 15 + } 16 + } 17 + 18 + /// runtime control over the backfill worker component. 19 + /// 20 + /// the backfill worker fetches full repo CAR files from each repo's PDS for any 21 + /// repository in the pending queue, parses the MST, and inserts all matching records 22 + /// into the database. concurrency is bounded by `HYDRANT_BACKFILL_CONCURRENCY_LIMIT`. 23 + #[derive(Clone)] 24 + pub struct BackfillHandle(Arc<AppState>); 25 + 26 + impl BackfillHandle { 27 + pub(crate) fn new(state: Arc<AppState>) -> Self { 28 + Self(state) 29 + } 30 + 31 + /// enable the backfill worker, no-op if already enabled. 32 + pub fn enable(&self) { 33 + self.0.backfill_enabled.send_replace(true); 34 + } 35 + /// disable the backfill worker, in-flight repos complete before pausing. 36 + pub fn disable(&self) { 37 + self.0.backfill_enabled.send_replace(false); 38 + } 39 + /// returns the current enabled state of the backfill worker. 40 + pub fn is_enabled(&self) -> bool { 41 + *self.0.backfill_enabled.borrow() 42 + } 43 + } 44 + 45 + impl Hydrant { 46 + /// subscribe to the ordered event stream. 47 + /// 48 + /// returns an [`EventStream`] that implements [`futures::Stream`]. 49 + /// 50 + /// - if `cursor` is `None`, streaming starts from the current head (live tail only). 51 + /// - if `cursor` is `Some(id)`, all persisted `record` events from that ID onward are 52 + /// replayed first, then the stream will switch to live tailing. 53 + /// 54 + /// `identity` and `account` events are ephemeral and are never replayed from a cursor, 55 + /// only live ones are delivered. use [`ReposControl::info`] to fetch current state for 56 + /// a specific repository. 57 + /// 58 + /// multiple concurrent subscribers each receive a full independent copy of the stream. 59 + /// the stream ends when the `EventStream` is dropped. 60 + pub fn subscribe(&self, cursor: Option<u64>) -> EventStream { 61 + let (tx, rx) = mpsc::channel(500); 62 + let state = self.state.clone(); 63 + let runtime = tokio::runtime::Handle::current(); 64 + 65 + std::thread::Builder::new() 66 + .name("hydrant-stream".into()) 67 + .spawn(move || { 68 + let _g = runtime.enter(); 69 + event_stream_thread(state, tx, cursor); 70 + }) 71 + .expect("failed to spawn stream thread"); 72 + 73 + EventStream(rx) 74 + } 75 + }
+83 -164
src/control/mod.rs
··· 1 1 #![allow(unused_imports)] 2 2 3 + #[cfg(feature = "indexer")] 3 4 pub(crate) mod crawler; 4 5 pub(crate) mod filter; 5 6 pub(crate) mod firehose; ··· 8 9 mod seed; 9 10 pub(crate) mod stream; 10 11 11 - pub use crawler::{CrawlerHandle, CrawlerSourceInfo}; 12 + #[cfg(feature = "indexer")] 13 + mod indexer; 14 + #[cfg(feature = "indexer")] 15 + pub use indexer::*; 16 + 17 + #[cfg(feature = "relay")] 18 + mod relay; 19 + #[cfg(feature = "relay")] 20 + pub use relay::*; 21 + 12 22 pub use filter::{FilterControl, FilterPatch, FilterSnapshot}; 13 23 pub use firehose::{FirehoseHandle, FirehoseSourceInfo}; 14 24 pub use pds::{PdsControl, PdsTierAssignment, PdsTierDefinition}; ··· 30 40 #[cfg(feature = "indexer")] 31 41 use crate::backfill::BackfillWorker; 32 42 use crate::config::{Config, SignatureVerification}; 33 - use crate::db::{ 34 - self, filter as db_filter, keys, load_persisted_crawler_sources, 35 - load_persisted_firehose_sources, 36 - }; 43 + #[cfg(feature = "indexer")] 44 + use crate::db::load_persisted_crawler_sources; 45 + use crate::db::{self, filter as db_filter, keys, load_persisted_firehose_sources}; 37 46 use crate::filter::FilterMode; 38 47 #[cfg(feature = "indexer")] 39 48 use crate::ingest::indexer::FirehoseWorker; 40 49 use crate::state::AppState; 41 50 use crate::types::MarshallableEvt; 42 51 43 - use crawler::{CrawlerShared, spawn_crawler_producer}; 44 52 use firehose::{FirehoseShared, spawn_firehose_ingestor}; 45 53 #[cfg(feature = "indexer")] 46 54 use stream::event_stream_thread; ··· 86 94 /// ``` 87 95 #[derive(Clone)] 88 96 pub struct Hydrant { 89 - pub crawler: CrawlerHandle, 97 + #[cfg(feature = "indexer")] 98 + pub crawler: crawler::CrawlerHandle, 90 99 pub firehose: FirehoseHandle, 100 + #[cfg(feature = "indexer")] 91 101 pub backfill: BackfillHandle, 92 102 pub filter: FilterControl, 93 103 pub pds: PdsControl, ··· 160 170 state.filter.store(Arc::new(new_filter)); 161 171 } 162 172 163 - // 4. set crawler enabled state from config, evaluated against the post-patch filter 164 - let post_patch_crawler = match config.enable_crawler { 165 - Some(b) => b, 166 - None => { 167 - state.filter.load().mode == FilterMode::Full || !config.crawler_sources.is_empty() 168 - } 169 - }; 170 - state.crawler_enabled.send_replace(post_patch_crawler); 173 + #[cfg(feature = "indexer")] 174 + { 175 + // 4. set crawler enabled state from config, evaluated against the post-patch filter 176 + let post_patch_crawler = match config.enable_crawler { 177 + Some(b) => b, 178 + None => { 179 + state.filter.load().mode == FilterMode::Full 180 + || !config.crawler_sources.is_empty() 181 + } 182 + }; 183 + state.crawler_enabled.send_replace(post_patch_crawler); 184 + } 171 185 172 186 let state = Arc::new(state); 173 187 174 188 Ok(Self { 175 - crawler: CrawlerHandle { 189 + #[cfg(feature = "indexer")] 190 + crawler: crawler::CrawlerHandle { 176 191 state: state.clone(), 177 192 shared: Arc::new(std::sync::OnceLock::new()), 178 193 tasks: Arc::new(scc::HashMap::new()), 179 194 persisted: Arc::new(scc::HashSet::new()), 180 195 }, 181 196 firehose: FirehoseHandle::new(state.clone()), 182 - backfill: BackfillHandle(state.clone()), 197 + #[cfg(feature = "indexer")] 198 + backfill: BackfillHandle::new(state.clone()), 183 199 filter: FilterControl(state.clone()), 184 200 pds: pds::PdsControl(state.clone()), 185 201 repos: ReposControl(state.clone()), ··· 209 225 pub fn run(&self) -> Result<impl Future<Output = Result<()>>> { 210 226 let state = self.state.clone(); 211 227 let config = self.config.clone(); 228 + #[cfg(feature = "indexer")] 212 229 let crawler = self.crawler.clone(); 213 230 let firehose = self.firehose.clone(); 214 231 ··· 357 374 let (fatal_tx_inner, mut fatal_rx) = watch::channel(None); 358 375 let fatal_tx = Arc::new(fatal_tx_inner); 359 376 360 - info!( 361 - crawler_enabled = *state.crawler_enabled.borrow(), 362 - firehose_enabled = *state.firehose_enabled.borrow(), 363 - filter_mode = ?state.filter.load().mode, 364 - "starting ingestion" 365 - ); 366 - 367 377 // 10. set shared and spawn firehose ingestors 368 378 firehose 369 379 .shared ··· 516 526 // set shared objects so CrawlerHandle methods can use them 517 527 crawler 518 528 .shared 519 - .set(CrawlerShared { 529 + .set(crawler::CrawlerShared { 520 530 http, 521 531 checker, 522 532 in_flight, ··· 530 540 // spawn initial sources from config 531 541 for source in config.crawler_sources.iter() { 532 542 let enabled_rx = state.crawler_enabled.subscribe(); 533 - let handle = spawn_crawler_producer( 543 + let handle = crawler::spawn_crawler_producer( 534 544 source, 535 545 &shared.http, 536 546 &state, ··· 556 566 continue; 557 567 } 558 568 let enabled_rx = state.crawler_enabled.subscribe(); 559 - let handle = spawn_crawler_producer( 569 + let handle = crawler::spawn_crawler_producer( 560 570 source, 561 571 &shared.http, 562 572 &state, ··· 662 672 Ok(fut) 663 673 } 664 674 665 - /// subscribe to the ordered event stream. 666 - /// 667 - /// returns an [`EventStream`] that implements [`futures::Stream`]. 668 - /// 669 - /// - if `cursor` is `None`, streaming starts from the current head (live tail only). 670 - /// - if `cursor` is `Some(id)`, all persisted `record` events from that ID onward are 671 - /// replayed first, then the stream will switch to live tailing. 672 - /// 673 - /// `identity` and `account` events are ephemeral and are never replayed from a cursor, 674 - /// only live ones are delivered. use [`ReposControl::info`] to fetch current state for 675 - /// a specific repository. 676 - /// 677 - /// multiple concurrent subscribers each receive a full independent copy of the stream. 678 - /// the stream ends when the `EventStream` is dropped. 679 - #[cfg(feature = "indexer")] 680 - pub fn subscribe(&self, cursor: Option<u64>) -> EventStream { 681 - let (tx, rx) = mpsc::channel(500); 682 - let state = self.state.clone(); 683 - let runtime = tokio::runtime::Handle::current(); 684 - 685 - std::thread::Builder::new() 686 - .name("hydrant-stream".into()) 687 - .spawn(move || { 688 - let _g = runtime.enter(); 689 - event_stream_thread(state, tx, cursor); 690 - }) 691 - .expect("failed to spawn stream thread"); 692 - 693 - EventStream(rx) 694 - } 695 - 696 - /// subscribe to the relay's ordered `subscribeRepos` event stream. 697 - /// 698 - /// returns a [`RelayEventStream`] that yields pre-encoded CBOR binary frames 699 - /// ready to forward directly to ATProto clients via WebSocket. 700 - /// 701 - /// - if `cursor` is `None`, streaming starts from the current head (live tail only). 702 - /// - if `cursor` is `Some(seq)`, all persisted events from that seq onward are replayed first. 703 - #[cfg(feature = "relay")] 704 - pub fn subscribe_repos(&self, cursor: Option<u64>) -> RelayEventStream { 705 - let (tx, rx) = mpsc::channel(500); 706 - let state = self.state.clone(); 707 - let runtime = tokio::runtime::Handle::current(); 708 - 709 - std::thread::Builder::new() 710 - .name("hydrant-relay-stream".into()) 711 - .spawn(move || { 712 - let _g = runtime.enter(); 713 - relay_stream_thread(state, tx, cursor); 714 - }) 715 - .expect("failed to spawn relay stream thread"); 716 - 717 - RelayEventStream(rx) 718 - } 719 - 720 675 /// return database counts and on-disk sizes for all keyspaces. 721 676 /// 722 677 /// counts include: `repos`, `pending`, `resync`, `records`, `blocks`, `events`, ··· 726 681 pub async fn stats(&self) -> Result<StatsResponse> { 727 682 let state = self.state.clone(); 728 683 729 - // todo: update stats, only return necessary info on relay vs indexer modes 730 - // (and ephemeral indexer) 731 - let mut counts: BTreeMap<&'static str, u64> = futures::future::join_all( 732 - [ 733 - "repos", 734 - "pending", 735 - "records", 736 - "blocks", 737 - "resync", 738 - "error_ratelimited", 739 - "error_transport", 740 - "error_generic", 741 - ] 742 - .into_iter() 743 - .map(|name| { 684 + #[allow(unused_mut)] 685 + let mut count_keys = vec![ 686 + "repos", 687 + "error_ratelimited", 688 + "error_transport", 689 + "error_generic", 690 + ]; 691 + 692 + #[cfg(feature = "indexer")] 693 + { 694 + count_keys.push("pending"); 695 + count_keys.push("records"); 696 + count_keys.push("blocks"); 697 + count_keys.push("resync"); 698 + } 699 + 700 + let mut counts: BTreeMap<&'static str, u64> = 701 + futures::future::join_all(count_keys.into_iter().map(|name| { 744 702 let state = state.clone(); 745 703 async move { (name, state.db.get_count(name).await) } 746 - }), 747 - ) 748 - .await 749 - .into_iter() 750 - .collect(); 704 + })) 705 + .await 706 + .into_iter() 707 + .collect(); 751 708 709 + #[cfg(feature = "indexer")] 752 710 counts.insert("events", state.db.events.approximate_len() as u64); 753 711 712 + #[cfg(feature = "relay")] 713 + counts.insert( 714 + "relay_events", 715 + state.db.relay_events.approximate_len() as u64, 716 + ); 717 + 754 718 let sizes = tokio::task::spawn_blocking(move || { 755 719 let mut s = BTreeMap::new(); 756 720 s.insert("repos", state.db.repos.disk_space()); 757 - s.insert("records", state.db.records.disk_space()); 758 - s.insert("blocks", state.db.blocks.disk_space()); 759 721 s.insert("cursors", state.db.cursors.disk_space()); 760 - s.insert("pending", state.db.pending.disk_space()); 761 - s.insert("resync", state.db.resync.disk_space()); 762 - s.insert("resync_buffer", state.db.resync_buffer.disk_space()); 763 - s.insert("events", state.db.events.disk_space()); 764 722 s.insert("counts", state.db.counts.disk_space()); 765 723 s.insert("filter", state.db.filter.disk_space()); 766 724 s.insert("crawler", state.db.crawler.disk_space()); 725 + 726 + #[cfg(feature = "indexer")] 727 + { 728 + s.insert("records", state.db.records.disk_space()); 729 + s.insert("blocks", state.db.blocks.disk_space()); 730 + s.insert("pending", state.db.pending.disk_space()); 731 + s.insert("resync", state.db.resync.disk_space()); 732 + s.insert("resync_buffer", state.db.resync_buffer.disk_space()); 733 + s.insert("events", state.db.events.disk_space()); 734 + } 735 + 736 + #[cfg(feature = "relay")] 737 + s.insert("relay_events", state.db.relay_events.disk_space()); 738 + 739 + #[cfg(feature = "backlinks")] 740 + s.insert("backlinks", state.db.backlinks.disk_space()); 741 + 767 742 s 768 743 }) 769 744 .await ··· 890 865 } 891 866 } 892 867 893 - /// a stream of [`Event`]s. returned by [`Hydrant::subscribe`]. 894 - /// 895 - /// implements [`futures::Stream`] and can be used with `StreamExt::next`, 896 - /// `while let Some(evt) = stream.next().await`, `forward`, etc. 897 - /// the stream terminates when the underlying channel closes (i.e. hydrant shuts down). 898 - #[cfg(feature = "indexer")] 899 - pub struct EventStream(mpsc::Receiver<Event>); 900 - 901 - #[cfg(feature = "indexer")] 902 - impl Stream for EventStream { 903 - type Item = Event; 904 - 905 - fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> { 906 - self.0.poll_recv(cx) 907 - } 908 - } 909 - 910 - /// the relay event stream produced by [`Hydrant::subscribe_repos`]. 911 - #[cfg(feature = "relay")] 912 - pub struct RelayEventStream(mpsc::Receiver<bytes::Bytes>); 913 - 914 - #[cfg(feature = "relay")] 915 - impl futures::Stream for RelayEventStream { 916 - type Item = bytes::Bytes; 917 - 918 - fn poll_next( 919 - mut self: std::pin::Pin<&mut Self>, 920 - cx: &mut std::task::Context<'_>, 921 - ) -> std::task::Poll<Option<Self::Item>> { 922 - self.0.poll_recv(cx) 923 - } 924 - } 925 - 926 868 /// database statistics returned by [`Hydrant::stats`]. 927 869 #[derive(serde::Serialize)] 928 870 pub struct StatsResponse { ··· 930 872 pub counts: BTreeMap<&'static str, u64>, 931 873 /// on-disk size in bytes per keyspace 932 874 pub sizes: BTreeMap<&'static str, u64>, 933 - } 934 - 935 - /// runtime control over the backfill worker component. 936 - /// 937 - /// the backfill worker fetches full repo CAR files from each repo's PDS for any 938 - /// repository in the pending queue, parses the MST, and inserts all matching records 939 - /// into the database. concurrency is bounded by `HYDRANT_BACKFILL_CONCURRENCY_LIMIT`. 940 - #[derive(Clone)] 941 - pub struct BackfillHandle(Arc<AppState>); 942 - 943 - impl BackfillHandle { 944 - /// enable the backfill worker, no-op if already enabled. 945 - pub fn enable(&self) { 946 - self.0.backfill_enabled.send_replace(true); 947 - } 948 - /// disable the backfill worker, in-flight repos complete before pausing. 949 - pub fn disable(&self) { 950 - self.0.backfill_enabled.send_replace(false); 951 - } 952 - /// returns the current enabled state of the backfill worker. 953 - pub fn is_enabled(&self) -> bool { 954 - *self.0.backfill_enabled.borrow() 955 - } 956 875 } 957 876 958 877 /// control over database maintenance operations.
+40
src/control/relay.rs
··· 1 + use super::*; 2 + 3 + /// the relay event stream produced by [`Hydrant::subscribe_repos`]. 4 + pub struct RelayEventStream(mpsc::Receiver<bytes::Bytes>); 5 + 6 + impl futures::Stream for RelayEventStream { 7 + type Item = bytes::Bytes; 8 + 9 + fn poll_next( 10 + mut self: std::pin::Pin<&mut Self>, 11 + cx: &mut std::task::Context<'_>, 12 + ) -> std::task::Poll<Option<Self::Item>> { 13 + self.0.poll_recv(cx) 14 + } 15 + } 16 + 17 + impl Hydrant { 18 + /// subscribe to the relay's ordered `subscribeRepos` event stream. 19 + /// 20 + /// returns a [`RelayEventStream`] that yields pre-encoded CBOR binary frames 21 + /// ready to forward directly to ATProto clients via WebSocket. 22 + /// 23 + /// - if `cursor` is `None`, streaming starts from the current head (live tail only). 24 + /// - if `cursor` is `Some(seq)`, all persisted events from that seq onward are replayed first. 25 + pub fn subscribe_repos(&self, cursor: Option<u64>) -> RelayEventStream { 26 + let (tx, rx) = mpsc::channel(500); 27 + let state = self.state.clone(); 28 + let runtime = tokio::runtime::Handle::current(); 29 + 30 + std::thread::Builder::new() 31 + .name("hydrant-relay-stream".into()) 32 + .spawn(move || { 33 + let _g = runtime.enter(); 34 + relay_stream_thread(state, tx, cursor); 35 + }) 36 + .expect("failed to spawn relay stream thread"); 37 + 38 + RelayEventStream(rx) 39 + } 40 + }
+22 -357
src/control/repos.rs src/control/repos/indexer.rs
··· 1 - use std::collections::HashMap; 2 - use std::sync::Arc; 3 - 4 - use chrono::{DateTime, Utc}; 5 - use fjall::OwnedWriteBatch; 6 - use futures::TryFutureExt; 7 - use jacquard_common::cowstr::ToCowStr; 8 - use jacquard_common::types::cid::{Cid, IpldCid}; 9 - use jacquard_common::types::ident::AtIdentifier; 10 - use jacquard_common::types::nsid::Nsid; 11 - use jacquard_common::types::string::{Did, Handle, Rkey}; 12 - use jacquard_common::types::tid::Tid; 13 - use jacquard_common::{CowStr, Data, IntoStatic}; 14 - use miette::{Context, IntoDiagnostic, Result}; 1 + use futures::{FutureExt, TryFutureExt}; 15 2 use rand::Rng; 16 - use smol_str::ToSmolStr; 17 - use url::Url; 18 3 19 - use crate::db::types::{DbRkey, DidKey, TrimmedDid}; 20 - use crate::db::{self, Db, keys}; 21 - use crate::state::AppState; 22 - use crate::types::{GaugeState, RepoMetadata, RepoState, RepoStatus}; 23 - use crate::util::invalid_handle; 24 - 25 - /// information about a tracked or known repository. returned by [`ReposControl`] methods. 26 - #[derive(Debug, Clone, serde::Serialize)] 27 - pub struct RepoInfo { 28 - /// the DID of the repository. 29 - pub did: Did<'static>, 30 - /// the status of the repository. 31 - #[serde(serialize_with = "crate::util::repo_status_serialize_str")] 32 - pub status: RepoStatus, 33 - /// whether this repository is tracked or not. 34 - /// untracked repositories are not updated and they stay frozen. 35 - pub tracked: bool, 36 - /// the revision of the root commit of this repository. 37 - #[serde(skip_serializing_if = "Option::is_none")] 38 - pub rev: Option<Tid>, 39 - /// the CID of the MST root of this repository. 40 - #[serde(serialize_with = "crate::util::opt_cid_serialize_str")] 41 - #[serde(skip_serializing_if = "Option::is_none")] 42 - pub data: Option<IpldCid>, 43 - /// the handle for the DID of this repository. 44 - /// 45 - /// note that this handle is not bi-directionally verified. 46 - #[serde(skip_serializing_if = "Option::is_none")] 47 - pub handle: Option<Handle<'static>>, 48 - /// the URL for the PDS in which this repository is hosted on. 49 - #[serde(skip_serializing_if = "Option::is_none")] 50 - pub pds: Option<Url>, 51 - /// ATProto signing key of this repository. 52 - #[serde(serialize_with = "crate::util::opt_did_key_serialize_str")] 53 - #[serde(skip_serializing_if = "Option::is_none")] 54 - pub signing_key: Option<DidKey<'static>>, 55 - /// when this repository was last touched (status update, commit ingested, etc.). 56 - #[serde(skip_serializing_if = "Option::is_none")] 57 - pub last_updated_at: Option<DateTime<Utc>>, 58 - /// the time of the last message gotten from the firehose for this repository. 59 - /// this is equal to the `time` field. 60 - #[serde(skip_serializing_if = "Option::is_none")] 61 - pub last_message_at: Option<DateTime<Utc>>, 62 - } 63 - 64 - /// control over which repositories are tracked and access to their state. 65 - /// 66 - /// in `filter` mode, a repo is only indexed if it either matches a signal or is 67 - /// explicitly tracked via [`ReposControl::track`]. in `full` mode all repos are 68 - /// indexed and tracking is implicit. 69 - /// 70 - /// tracking a DID that hydrant has never seen enqueues an immediate backfill. 71 - /// tracking a DID that hydrant already knows about (but has marked untracked) 72 - /// re-enqueues it for backfill. 73 - #[derive(Clone)] 74 - pub struct ReposControl(pub(super) Arc<AppState>); 4 + use super::*; 75 5 76 6 impl ReposControl { 77 - pub(crate) fn iter_states( 78 - &self, 79 - cursor: Option<&Did<'_>>, 80 - ) -> impl Iterator<Item = Result<(Did<'static>, RepoState<'static>, crate::types::RepoMetadata)>> 81 - { 82 - let start_bound = if let Some(cursor) = cursor { 83 - let did_key = keys::repo_key(cursor); 84 - std::ops::Bound::Excluded(did_key) 85 - } else { 86 - std::ops::Bound::Unbounded 87 - }; 88 - 89 - let state = self.0.clone(); 90 - self.0 91 - .db 92 - .repos 93 - .range((start_bound, std::ops::Bound::Unbounded)) 94 - .map(move |g| { 95 - let (k, v) = g.into_inner().into_diagnostic()?; 96 - let repo_state = crate::db::deser_repo_state(&v)?.into_static(); 97 - let did = TrimmedDid::try_from(k.as_ref())?.to_did(); 98 - let metadata_key = keys::repo_metadata_key(&did); 99 - let metadata = state 100 - .db 101 - .repo_metadata 102 - .get(&metadata_key) 103 - .into_diagnostic()? 104 - .ok_or_else(|| miette::miette!("repo metadata not found for {}", did))?; 105 - let metadata = crate::db::deser_repo_metadata(metadata.as_ref())?; 106 - Ok((did, repo_state, metadata)) 107 - }) 108 - } 109 - 110 - /// iterates through all repositories, returning their state. 111 - pub fn iter(&self, cursor: Option<&Did<'_>>) -> impl Iterator<Item = Result<RepoInfo>> { 112 - self.iter_states(cursor) 113 - .map(|r| r.map(|(did, s, m)| repo_state_to_info(did, s, m.tracked))) 114 - } 115 - 116 - #[allow(dead_code)] 117 7 /// iterates through pending repositories, returning their state. 118 - fn iter_pending(&self, cursor: Option<u64>) -> impl Iterator<Item = Result<(u64, RepoInfo)>> { 8 + #[allow(dead_code)] 9 + pub(crate) fn iter_pending( 10 + &self, 11 + cursor: Option<u64>, 12 + ) -> impl Iterator<Item = Result<(u64, RepoInfo)>> { 119 13 let start_bound = if let Some(cursor) = cursor { 120 14 std::ops::Bound::Excluded(cursor.to_be_bytes().to_vec()) 121 15 } else { ··· 151 45 .get(&metadata_key) 152 46 .into_diagnostic()? 153 47 .ok_or_else(|| miette::miette!("repo metadata not found for {}", did))?; 154 - let metadata = crate::db::deser_repo_metadata(metadata.as_ref())?; 48 + let metadata = crate::db::deser_repo_meta(metadata.as_ref())?; 155 49 Ok(Some(( 156 50 id, 157 51 repo_state_to_info(did, repo_state.into_static(), metadata.tracked), ··· 162 56 } 163 57 164 58 #[allow(dead_code)] 165 - fn iter_resync(&self, cursor: Option<&Did<'_>>) -> impl Iterator<Item = Result<RepoInfo>> { 59 + pub(crate) fn iter_resync( 60 + &self, 61 + cursor: Option<&Did<'_>>, 62 + ) -> impl Iterator<Item = Result<RepoInfo>> { 166 63 let start_bound = if let Some(cursor) = cursor { 167 64 let did_key = keys::repo_key(cursor); 168 65 std::ops::Bound::Excluded(did_key) ··· 192 89 .get(&metadata_key) 193 90 .into_diagnostic()? 194 91 .ok_or_else(|| miette::miette!("repo metadata not found for {}", did))?; 195 - let metadata = crate::db::deser_repo_metadata(metadata.as_ref())?; 92 + let metadata = crate::db::deser_repo_meta(metadata.as_ref())?; 196 93 Ok(Some(repo_state_to_info( 197 94 did, 198 95 repo_state.into_static(), ··· 203 100 .flatten() 204 101 } 205 102 206 - /// gets a handle for a repository to read from it. 207 - pub fn get<'i>(&self, did: &Did<'i>) -> RepoHandle<'i> { 208 - RepoHandle { 209 - state: self.0.clone(), 210 - did: did.clone(), 211 - } 212 - } 213 - 214 - /// same as [`ReposControl::get`] but allows you to pass in an identifier that can be 215 - /// either a handle or a DID. 216 - pub async fn resolve(&self, repo: &AtIdentifier<'_>) -> Result<RepoHandle<'static>> { 217 - let did = self.0.resolver.resolve_did(repo).await?; 218 - Ok(RepoHandle { 219 - state: self.0.clone(), 220 - did, 221 - }) 222 - } 223 - 224 - /// fetch the current state of a repository. 225 - /// returns `None` if hydrant has never seen this repository. 226 - pub async fn info(&self, did: &Did<'_>) -> Result<Option<RepoInfo>> { 227 - self.get(did).info().await 228 - } 229 - 230 - fn _resync( 103 + pub(crate) fn _resync( 231 104 db: &Db, 232 105 did: &Did<'_>, 233 - batch: &mut OwnedWriteBatch, 106 + batch: &mut fjall::OwnedWriteBatch, 234 107 transitions: &mut Vec<(GaugeState, GaugeState)>, 235 108 ) -> Result<bool> { 236 109 let did_key = keys::repo_key(did); ··· 248 121 .get(&metadata_key) 249 122 .into_diagnostic()? 250 123 .ok_or_else(|| miette::miette!("repo metadata not found for {}", did))?; 251 - let mut metadata = crate::db::deser_repo_metadata(&metadata_bytes)?; 124 + let mut metadata = crate::db::deser_repo_meta(&metadata_bytes)?; 252 125 253 126 // skip if already in pending queue 254 127 let is_pending = db ··· 269 142 batch.insert( 270 143 &db.repo_metadata, 271 144 &metadata_key, 272 - crate::db::ser_repo_metadata(&metadata)?, 145 + crate::db::ser_repo_meta(&metadata)?, 273 146 ); 274 147 transitions.push((old, GaugeState::Pending)); 275 148 return Ok(true); ··· 341 214 let mut added = 0i64; 342 215 let mut queued: Vec<Did<'static>> = Vec::new(); 343 216 let mut transitions: Vec<(GaugeState, GaugeState)> = Vec::new(); 344 - let mut rng = rand::rng(); 345 217 346 218 for did in dids { 347 219 let did_key = keys::repo_key(&did); ··· 349 221 350 222 let metadata_bytes = db.repo_metadata.get(&metadata_key).into_diagnostic()?; 351 223 let existing_metadata = metadata_bytes 352 - .map(|b| crate::db::deser_repo_metadata(&b)) 224 + .map(|b| crate::db::deser_repo_meta(&b)) 353 225 .transpose()?; 354 226 355 227 if let Some(metadata) = existing_metadata { ··· 358 230 } 359 231 } else { 360 232 let repo_state = RepoState::backfilling(); 361 - let metadata = RepoMetadata::backfilling(rng.next_u64()); 233 + let metadata = RepoMetadata::backfilling(rand::random()); 362 234 batch.insert(&db.repos, &did_key, crate::db::ser_repo_state(&repo_state)?); 363 235 batch.insert( 364 236 &db.repo_metadata, 365 237 &metadata_key, 366 - crate::db::ser_repo_metadata(&metadata)?, 238 + crate::db::ser_repo_meta(&metadata)?, 367 239 ); 368 240 batch.insert(&db.pending, keys::pending_key(metadata.index_id), &did_key); 369 241 added += 1; ··· 418 290 if let Some(repo_state) = existing { 419 291 let metadata_bytes = db.repo_metadata.get(&metadata_key).into_diagnostic()?; 420 292 let existing_metadata = metadata_bytes 421 - .map(|b| crate::db::deser_repo_metadata(&b)) 293 + .map(|b| crate::db::deser_repo_meta(&b)) 422 294 .transpose()?; 423 295 424 296 if let Some(mut metadata) = existing_metadata { ··· 429 301 batch.insert( 430 302 &db.repo_metadata, 431 303 &metadata_key, 432 - crate::db::ser_repo_metadata(&metadata)?, 304 + crate::db::ser_repo_meta(&metadata)?, 433 305 ); 434 306 batch.remove(&db.pending, keys::pending_key(metadata.index_id)); 435 307 batch.remove(&db.resync, &did_key); ··· 459 331 } 460 332 } 461 333 462 - pub(crate) fn repo_state_to_info(did: Did<'static>, s: RepoState<'_>, tracked: bool) -> RepoInfo { 463 - let (rev, data) = s 464 - .root 465 - .map(|c| (Some(c.rev.to_tid()), Some(c.data))) 466 - .unwrap_or_default(); 467 - RepoInfo { 468 - did, 469 - status: s.status, 470 - tracked, 471 - rev, 472 - data, 473 - handle: s.handle.map(|h| h.into_static()), 474 - pds: s.pds.and_then(|p| p.parse().ok()), 475 - signing_key: s.signing_key.map(|k| k.into_static()), 476 - last_updated_at: DateTime::from_timestamp_secs(s.last_updated_at), 477 - last_message_at: s.last_message_time.and_then(DateTime::from_timestamp_secs), 478 - } 479 - } 480 - 481 - pub struct Record { 482 - pub did: Did<'static>, 483 - pub cid: Cid<'static>, 484 - pub value: Data<'static>, 485 - } 486 - 487 - pub struct ListedRecord { 488 - pub rkey: Rkey<'static>, 489 - pub cid: Cid<'static>, 490 - pub value: Data<'static>, 491 - } 492 - 493 - pub struct RecordList { 494 - pub records: Vec<ListedRecord>, 495 - pub cursor: Option<Rkey<'static>>, 496 - } 497 - 498 - #[derive(Debug, thiserror::Error)] 499 - pub enum MiniDocError { 500 - #[error("repo is not synced yet")] 501 - NotSynced, 502 - #[error("repo not found")] 503 - RepoNotFound, 504 - #[error("could not resolve identity")] 505 - CouldNotResolveIdentity, 506 - #[error("{0}")] 507 - Other(miette::Error), 508 - } 509 - 510 - /// a mini doc with a bi-directionally verified handle. 511 - pub struct MiniDoc<'i> { 512 - /// the did. 513 - pub did: Did<'i>, 514 - /// the handle. if verification fails or no handle is found, 515 - /// this will be "handle.invalid". 516 - pub handle: Handle<'i>, 517 - /// the url of the PDS of this repo. 518 - pub pds: Url, 519 - /// the atproto signing key of this repo. 520 - pub signing_key: DidKey<'i>, 521 - } 522 - 523 - /// handle to access data related to this repository. 524 - #[derive(Clone)] 525 - pub struct RepoHandle<'i> { 526 - state: Arc<AppState>, 527 - pub did: Did<'i>, 528 - } 529 - 530 334 impl<'i> RepoHandle<'i> { 531 - pub(crate) async fn state(&self) -> Result<Option<RepoState<'static>>> { 532 - let did_key = keys::repo_key(&self.did); 533 - let app_state = self.state.clone(); 534 - 535 - tokio::task::spawn_blocking(move || { 536 - let bytes = app_state.db.repos.get(&did_key).into_diagnostic()?; 537 - bytes 538 - .as_deref() 539 - .map(db::deser_repo_state) 540 - .transpose() 541 - .map(|opt| opt.map(IntoStatic::into_static)) 542 - }) 543 - .await 544 - .into_diagnostic()? 545 - } 546 - 547 - /// fetch the current state of this repository. 548 - /// returns `None` if hydrant has never seen this repository. 549 - pub async fn info(&self) -> Result<Option<RepoInfo>> { 550 - let did = self.did.clone().into_static(); 551 - let did_key = keys::repo_key(&did); 552 - let metadata_key = keys::repo_metadata_key(&did); 553 - let app_state = self.state.clone(); 554 - 555 - tokio::task::spawn_blocking(move || { 556 - let state_bytes = app_state.db.repos.get(&did_key).into_diagnostic()?; 557 - let Some(state_bytes) = state_bytes else { 558 - return Ok(None); 559 - }; 560 - let repo_state = crate::db::deser_repo_state(&state_bytes)?; 561 - 562 - let metadata_bytes = app_state 563 - .db 564 - .repo_metadata 565 - .get(&metadata_key) 566 - .into_diagnostic()? 567 - .ok_or_else(|| miette::miette!("repo metadata not found for {}", did))?; 568 - let metadata = crate::db::deser_repo_metadata(&metadata_bytes)?; 569 - 570 - Ok(Some(repo_state_to_info(did, repo_state, metadata.tracked))) 571 - }) 572 - .await 573 - .into_diagnostic()? 574 - } 575 - 576 - /// returns the collections of this repository and the number of records it has in each. 577 - pub async fn collections(&self) -> Result<HashMap<Nsid<'static>, u64>> { 578 - let did = self.did.clone().into_static(); 579 - let state = self.state.clone(); 580 - 581 - tokio::task::spawn_blocking(move || { 582 - let prefix = keys::did_collection_prefix(&did); 583 - let mut res = HashMap::new(); 584 - for item in state.db.counts.prefix(&prefix) { 585 - let (k, v) = item.into_inner().into_diagnostic()?; 586 - let col = k 587 - .strip_prefix(prefix.as_slice()) 588 - .ok_or_else(|| miette::miette!("invalid collection count key: {k:?}")) 589 - .and_then(|r| std::str::from_utf8(r).into_diagnostic()) 590 - .and_then(|n| Nsid::new(n).into_diagnostic())? 591 - .into_static(); 592 - let count = u64::from_be_bytes( 593 - v.as_ref() 594 - .try_into() 595 - .into_diagnostic() 596 - .wrap_err("expected to be count (8 bytes)")?, 597 - ); 598 - res.insert(col, count); 599 - } 600 - Ok(res) 601 - }) 602 - .await 603 - .into_diagnostic()? 604 - } 605 - 606 - /// returns a bi-directionally validated mini doc. 607 - pub async fn mini_doc(&self) -> Result<MiniDoc<'static>, MiniDocError> { 608 - let Some(info) = self.info().await.map_err(MiniDocError::Other)? else { 609 - return Err(MiniDocError::RepoNotFound); 610 - }; 611 - 612 - // check if repo is still backfilling (in pending) 613 - let metadata_key = keys::repo_metadata_key(&self.did); 614 - let app_state = self.state.clone(); 615 - 616 - let is_pending = tokio::task::spawn_blocking(move || { 617 - let metadata_bytes = app_state 618 - .db 619 - .repo_metadata 620 - .get(&metadata_key) 621 - .into_diagnostic()?; 622 - let Some(metadata_bytes) = metadata_bytes else { 623 - return Ok::<_, miette::Report>(false); 624 - }; 625 - let metadata = crate::db::deser_repo_metadata(metadata_bytes.as_ref())?; 626 - Ok(app_state 627 - .db 628 - .pending 629 - .get(crate::db::keys::pending_key(metadata.index_id)) 630 - .into_diagnostic()? 631 - .is_some()) 632 - }) 633 - .await 634 - .map_err(|e| MiniDocError::Other(miette::miette!(e)))? 635 - .map_err(MiniDocError::Other)?; 636 - 637 - if is_pending { 638 - return Err(MiniDocError::NotSynced); 639 - } 640 - 641 - let pds = info 642 - .pds 643 - .ok_or_else(|| MiniDocError::CouldNotResolveIdentity)?; 644 - let signing_key = info 645 - .signing_key 646 - .ok_or_else(|| MiniDocError::CouldNotResolveIdentity)? 647 - .into_static(); 648 - 649 - let handle = if let Some(h) = info.handle { 650 - let is_valid = self 651 - .state 652 - .resolver 653 - .verify_handle(&self.did, &h) 654 - .await 655 - .into_diagnostic() 656 - .map_err(MiniDocError::Other)?; 657 - is_valid.then_some(h).unwrap_or_else(invalid_handle) 658 - } else { 659 - invalid_handle() 660 - }; 661 - 662 - Ok(MiniDoc { 663 - did: self.did.clone().into_static(), 664 - handle, 665 - pds, 666 - signing_key, 667 - }) 668 - } 669 - 670 335 /// gets a record from this repository. 671 336 pub async fn get_record(&self, collection: &str, rkey: &str) -> Result<Option<Record>> { 672 337 let did = self.did.clone().into_static();
+358
src/control/repos/mod.rs
··· 1 + use std::collections::HashMap; 2 + use std::sync::Arc; 3 + 4 + use chrono::{DateTime, Utc}; 5 + use jacquard_common::cowstr::ToCowStr; 6 + use jacquard_common::types::cid::{Cid, IpldCid}; 7 + use jacquard_common::types::ident::AtIdentifier; 8 + use jacquard_common::types::nsid::Nsid; 9 + use jacquard_common::types::string::{Did, Handle, Rkey}; 10 + use jacquard_common::types::tid::Tid; 11 + use jacquard_common::{CowStr, Data, IntoStatic}; 12 + use miette::{Context, IntoDiagnostic, Result, WrapErr}; 13 + use smol_str::ToSmolStr; 14 + use url::Url; 15 + 16 + use crate::db::types::{DbRkey, DidKey, TrimmedDid}; 17 + use crate::db::{self, Db, keys}; 18 + use crate::state::AppState; 19 + #[cfg(feature = "indexer")] 20 + use crate::types::GaugeState; 21 + use crate::types::{RepoMetadata, RepoState, RepoStatus}; 22 + use crate::util::invalid_handle; 23 + 24 + #[cfg(feature = "indexer")] 25 + mod indexer; 26 + 27 + #[cfg(feature = "indexer")] 28 + pub use indexer::*; 29 + 30 + /// information about a tracked or known repository. returned by [`ReposControl`] methods. 31 + #[derive(Debug, Clone, serde::Serialize)] 32 + pub struct RepoInfo { 33 + /// the DID of the repository. 34 + pub did: Did<'static>, 35 + /// the status of the repository. 36 + #[serde(serialize_with = "crate::util::repo_status_serialize_str")] 37 + pub status: RepoStatus, 38 + /// whether this repository is tracked or not. 39 + /// untracked repositories are not updated and they stay frozen. 40 + pub tracked: bool, 41 + /// the revision of the root commit of this repository. 42 + #[serde(skip_serializing_if = "Option::is_none")] 43 + pub rev: Option<Tid>, 44 + /// the CID of the MST root of this repository. 45 + #[serde(serialize_with = "crate::util::opt_cid_serialize_str")] 46 + #[serde(skip_serializing_if = "Option::is_none")] 47 + pub data: Option<IpldCid>, 48 + /// the handle for the DID of this repository. 49 + /// 50 + /// note that this handle is not bi-directionally verified. 51 + #[serde(skip_serializing_if = "Option::is_none")] 52 + pub handle: Option<Handle<'static>>, 53 + /// the URL for the PDS in which this repository is hosted on. 54 + #[serde(skip_serializing_if = "Option::is_none")] 55 + pub pds: Option<Url>, 56 + /// ATProto signing key of this repository. 57 + #[serde(serialize_with = "crate::util::opt_did_key_serialize_str")] 58 + #[serde(skip_serializing_if = "Option::is_none")] 59 + pub signing_key: Option<DidKey<'static>>, 60 + /// when this repository was last touched (status update, commit ingested, etc.). 61 + #[serde(skip_serializing_if = "Option::is_none")] 62 + pub last_updated_at: Option<DateTime<Utc>>, 63 + /// the time of the last message gotten from the firehose for this repository. 64 + /// this is equal to the `time` field. 65 + #[serde(skip_serializing_if = "Option::is_none")] 66 + pub last_message_at: Option<DateTime<Utc>>, 67 + } 68 + 69 + /// control over which repositories are tracked and access to their state. 70 + /// 71 + /// in `filter` mode, a repo is only indexed if it either matches a signal or is 72 + /// explicitly tracked via [`ReposControl::track`]. in `full` mode all repos are 73 + /// indexed and tracking is implicit. 74 + /// 75 + /// tracking a DID that hydrant has never seen enqueues an immediate backfill. 76 + /// tracking a DID that hydrant already knows about (but has marked untracked) 77 + /// re-enqueues it for backfill. 78 + #[derive(Clone)] 79 + pub struct ReposControl(pub(super) Arc<AppState>); 80 + 81 + impl ReposControl { 82 + pub(crate) fn iter_states( 83 + &self, 84 + cursor: Option<&Did<'_>>, 85 + ) -> impl Iterator<Item = Result<(Did<'static>, RepoState<'static>, crate::types::RepoMetadata)>> 86 + { 87 + let start_bound = if let Some(cursor) = cursor { 88 + let did_key = keys::repo_key(cursor); 89 + std::ops::Bound::Excluded(did_key) 90 + } else { 91 + std::ops::Bound::Unbounded 92 + }; 93 + 94 + let state = self.0.clone(); 95 + self.0 96 + .db 97 + .repos 98 + .range((start_bound, std::ops::Bound::Unbounded)) 99 + .map(move |g| { 100 + let (k, v) = g.into_inner().into_diagnostic()?; 101 + let repo_state = crate::db::deser_repo_state(&v)?.into_static(); 102 + let did = TrimmedDid::try_from(k.as_ref())?.to_did(); 103 + let metadata_key = keys::repo_metadata_key(&did); 104 + let metadata = state 105 + .db 106 + .repo_metadata 107 + .get(&metadata_key) 108 + .into_diagnostic()? 109 + .ok_or_else(|| miette::miette!("repo metadata not found for {}", did))?; 110 + let metadata = crate::db::deser_repo_meta(metadata.as_ref())?; 111 + Ok((did, repo_state, metadata)) 112 + }) 113 + } 114 + 115 + /// iterates through all repositories, returning their state. 116 + pub fn iter(&self, cursor: Option<&Did<'_>>) -> impl Iterator<Item = Result<RepoInfo>> { 117 + self.iter_states(cursor) 118 + .map(|r| r.map(|(did, s, m)| repo_state_to_info(did, s, m.tracked))) 119 + } 120 + 121 + /// gets a handle for a repository to read from it. 122 + pub fn get<'i>(&self, did: &Did<'i>) -> RepoHandle<'i> { 123 + RepoHandle { 124 + state: self.0.clone(), 125 + did: did.clone(), 126 + } 127 + } 128 + 129 + /// same as [`ReposControl::get`] but allows you to pass in an identifier that can be 130 + /// either a handle or a DID. 131 + pub async fn resolve(&self, repo: &AtIdentifier<'_>) -> Result<RepoHandle<'static>> { 132 + let did = self.0.resolver.resolve_did(repo).await?; 133 + Ok(RepoHandle { 134 + state: self.0.clone(), 135 + did, 136 + }) 137 + } 138 + 139 + /// fetch the current state of a repository. 140 + /// returns `None` if hydrant has never seen this repository. 141 + pub async fn info(&self, did: &Did<'_>) -> Result<Option<RepoInfo>> { 142 + self.get(did).info().await 143 + } 144 + } 145 + 146 + pub(crate) fn repo_state_to_info(did: Did<'static>, s: RepoState<'_>, tracked: bool) -> RepoInfo { 147 + let (rev, data) = s 148 + .root 149 + .map(|c| (Some(c.rev.to_tid()), Some(c.data))) 150 + .unwrap_or_default(); 151 + RepoInfo { 152 + did, 153 + status: s.status, 154 + tracked, 155 + rev, 156 + data, 157 + handle: s.handle.map(|h| h.into_static()), 158 + pds: s.pds.and_then(|p| p.parse().ok()), 159 + signing_key: s.signing_key.map(|k| k.into_static()), 160 + last_updated_at: DateTime::from_timestamp_secs(s.last_updated_at), 161 + last_message_at: s.last_message_time.and_then(DateTime::from_timestamp_secs), 162 + } 163 + } 164 + 165 + pub struct Record { 166 + pub did: Did<'static>, 167 + pub cid: Cid<'static>, 168 + pub value: Data<'static>, 169 + } 170 + 171 + pub struct ListedRecord { 172 + pub rkey: Rkey<'static>, 173 + pub cid: Cid<'static>, 174 + pub value: Data<'static>, 175 + } 176 + 177 + pub struct RecordList { 178 + pub records: Vec<ListedRecord>, 179 + pub cursor: Option<Rkey<'static>>, 180 + } 181 + 182 + #[derive(Debug, thiserror::Error)] 183 + pub enum MiniDocError { 184 + #[error("repo is not synced yet")] 185 + NotSynced, 186 + #[error("repo not found")] 187 + RepoNotFound, 188 + #[error("could not resolve identity")] 189 + CouldNotResolveIdentity, 190 + #[error("{0}")] 191 + Other(miette::Error), 192 + } 193 + 194 + /// a mini doc with a bi-directionally verified handle. 195 + pub struct MiniDoc<'i> { 196 + /// the did. 197 + pub did: Did<'i>, 198 + /// the handle. if verification fails or no handle is found, 199 + /// this will be "handle.invalid". 200 + pub handle: Handle<'i>, 201 + /// the url of the PDS of this repo. 202 + pub pds: Url, 203 + /// the atproto signing key of this repo. 204 + pub signing_key: DidKey<'i>, 205 + } 206 + 207 + /// handle to access data related to this repository. 208 + #[derive(Clone)] 209 + pub struct RepoHandle<'i> { 210 + state: Arc<AppState>, 211 + pub did: Did<'i>, 212 + } 213 + 214 + impl<'i> RepoHandle<'i> { 215 + pub(crate) async fn state(&self) -> Result<Option<RepoState<'static>>> { 216 + let did_key = keys::repo_key(&self.did); 217 + let app_state = self.state.clone(); 218 + 219 + tokio::task::spawn_blocking(move || { 220 + let bytes = app_state.db.repos.get(&did_key).into_diagnostic()?; 221 + bytes 222 + .as_deref() 223 + .map(db::deser_repo_state) 224 + .transpose() 225 + .map(|opt| opt.map(IntoStatic::into_static)) 226 + }) 227 + .await 228 + .into_diagnostic()? 229 + } 230 + 231 + /// fetch the current state of this repository. 232 + /// returns `None` if hydrant has never seen this repository. 233 + pub async fn info(&self) -> Result<Option<RepoInfo>> { 234 + let did = self.did.clone().into_static(); 235 + let did_key = keys::repo_key(&did); 236 + let metadata_key = keys::repo_metadata_key(&did); 237 + let app_state = self.state.clone(); 238 + 239 + tokio::task::spawn_blocking(move || { 240 + let state_bytes = app_state.db.repos.get(&did_key).into_diagnostic()?; 241 + let Some(state_bytes) = state_bytes else { 242 + return Ok(None); 243 + }; 244 + let repo_state = crate::db::deser_repo_state(&state_bytes)?; 245 + 246 + let metadata_bytes = app_state 247 + .db 248 + .repo_metadata 249 + .get(&metadata_key) 250 + .into_diagnostic()? 251 + .ok_or_else(|| miette::miette!("repo metadata not found for {}", did))?; 252 + let metadata = crate::db::deser_repo_meta(&metadata_bytes)?; 253 + 254 + Ok(Some(repo_state_to_info(did, repo_state, metadata.tracked))) 255 + }) 256 + .await 257 + .into_diagnostic()? 258 + } 259 + 260 + /// returns the collections of this repository and the number of records it has in each. 261 + pub async fn collections(&self) -> Result<HashMap<Nsid<'static>, u64>> { 262 + let did = self.did.clone().into_static(); 263 + let state = self.state.clone(); 264 + 265 + tokio::task::spawn_blocking(move || { 266 + let prefix = keys::did_collection_prefix(&did); 267 + let mut res = HashMap::new(); 268 + for item in state.db.counts.prefix(&prefix) { 269 + let (k, v) = item.into_inner().into_diagnostic()?; 270 + let col = k 271 + .strip_prefix(prefix.as_slice()) 272 + .ok_or_else(|| miette::miette!("invalid collection count key: {k:?}")) 273 + .and_then(|r| std::str::from_utf8(r).into_diagnostic()) 274 + .and_then(|n| Nsid::new(n).into_diagnostic())? 275 + .into_static(); 276 + let count = u64::from_be_bytes( 277 + v.as_ref() 278 + .try_into() 279 + .into_diagnostic() 280 + .wrap_err("expected to be count (8 bytes)")?, 281 + ); 282 + res.insert(col, count); 283 + } 284 + Ok(res) 285 + }) 286 + .await 287 + .into_diagnostic()? 288 + } 289 + 290 + /// returns a bi-directionally validated mini doc. 291 + pub async fn mini_doc(&self) -> Result<MiniDoc<'static>, MiniDocError> { 292 + let Some(info) = self.info().await.map_err(MiniDocError::Other)? else { 293 + return Err(MiniDocError::RepoNotFound); 294 + }; 295 + 296 + // check if repo is still backfilling (in pending) 297 + #[cfg(feature = "indexer")] 298 + let is_pending = { 299 + let metadata_key = keys::repo_metadata_key(&self.did); 300 + let app_state = self.state.clone(); 301 + tokio::task::spawn_blocking(move || { 302 + let metadata_bytes = app_state 303 + .db 304 + .repo_metadata 305 + .get(&metadata_key) 306 + .into_diagnostic()?; 307 + let Some(metadata_bytes) = metadata_bytes else { 308 + return Ok::<_, miette::Report>(false); 309 + }; 310 + 311 + let metadata = crate::db::deser_repo_meta(metadata_bytes.as_ref())?; 312 + return Ok(app_state 313 + .db 314 + .pending 315 + .get(crate::db::keys::pending_key(metadata.index_id)) 316 + .into_diagnostic()? 317 + .is_some()); 318 + }) 319 + .await 320 + .map_err(|e| MiniDocError::Other(miette::miette!(e)))? 321 + .map_err(MiniDocError::Other)? 322 + }; 323 + #[cfg(feature = "relay")] 324 + let is_pending = false; 325 + 326 + if is_pending { 327 + return Err(MiniDocError::NotSynced); 328 + } 329 + 330 + let pds = info 331 + .pds 332 + .ok_or_else(|| MiniDocError::CouldNotResolveIdentity)?; 333 + let signing_key = info 334 + .signing_key 335 + .ok_or_else(|| MiniDocError::CouldNotResolveIdentity)? 336 + .into_static(); 337 + 338 + let handle = if let Some(h) = info.handle { 339 + let is_valid = self 340 + .state 341 + .resolver 342 + .verify_handle(&self.did, &h) 343 + .await 344 + .into_diagnostic() 345 + .map_err(MiniDocError::Other)?; 346 + is_valid.then_some(h).unwrap_or_else(invalid_handle) 347 + } else { 348 + invalid_handle() 349 + }; 350 + 351 + Ok(MiniDoc { 352 + did: self.did.clone().into_static(), 353 + handle, 354 + pds, 355 + signing_key, 356 + }) 357 + } 358 + }
+1 -1
src/control/seed.rs
··· 59 59 let ks = state.db.cursors.clone(); 60 60 let key = cursor_key.clone(); 61 61 match db::Db::get(ks, key).await { 62 - Ok(Some(b)) => rmp_serde::from_slice::<String>(&b).ok(), 62 + Ok(Some(b)) => rmp_serde::from_slice::<String>(b.as_ref()).ok(), 63 63 Ok(None) => None, 64 64 Err(e) => { 65 65 warn!(err = %e, "failed to load seed cursor, starting from scratch");
+3 -1
src/crawler/worker.rs
··· 161 161 batch.insert( 162 162 &app_state.db.repo_metadata, 163 163 &metadata_key, 164 - crate::db::ser_repo_metadata(&metadata)?, 164 + crate::db::ser_repo_meta(&metadata)?, 165 165 ); 166 + #[cfg(feature = "indexer")] 166 167 batch.insert( 167 168 &app_state.db.pending, 168 169 keys::pending_key(metadata.index_id), ··· 204 205 .db 205 206 .update_count_async("pending", count as i64) 206 207 .await; 208 + #[cfg(feature = "indexer")] 207 209 self.state.notify_backfill(); 208 210 } 209 211
+166
src/db/indexer.rs
··· 1 + use crate::types::{GaugeState, RepoStatus, ResyncState}; 2 + use fjall::{Keyspace, OwnedWriteBatch}; 3 + use jacquard_common::IntoStatic; 4 + use jacquard_common::types::string::Did; 5 + use miette::{IntoDiagnostic, Result, WrapErr}; 6 + use url::Url; 7 + 8 + use crate::db::{Db, deser_repo_state, keys, ser_repo_state}; 9 + use crate::types::RepoState; 10 + 11 + impl Db { 12 + pub(crate) fn update_gauge_diff(&self, old: &GaugeState, new: &GaugeState) { 13 + update_gauge_diff_impl!(self, old, new, update_count); 14 + } 15 + 16 + pub(crate) async fn update_gauge_diff_async(&self, old: &GaugeState, new: &GaugeState) { 17 + update_gauge_diff_impl!(self, old, new, update_count_async, await); 18 + } 19 + 20 + pub(crate) fn update_repo_state<F, T>( 21 + batch: &mut OwnedWriteBatch, 22 + repos: &Keyspace, 23 + did: &Did<'_>, 24 + f: F, 25 + ) -> Result<Option<(RepoState<'static>, T)>> 26 + where 27 + F: FnOnce(&mut RepoState, (&[u8], &mut fjall::OwnedWriteBatch)) -> Result<(bool, T)>, 28 + { 29 + let key = keys::repo_key(did); 30 + if let Some(bytes) = repos.get(&key).into_diagnostic()? { 31 + let mut state: RepoState = deser_repo_state(bytes.as_ref())?.into_static(); 32 + let (changed, result) = f(&mut state, (key.as_slice(), batch))?; 33 + if changed { 34 + batch.insert(repos, key, ser_repo_state(&state)?); 35 + } 36 + Ok(Some((state, result))) 37 + } else { 38 + Ok(None) 39 + } 40 + } 41 + 42 + pub(crate) async fn update_repo_state_async<F, T>( 43 + &self, 44 + did: &Did<'_>, 45 + f: F, 46 + ) -> Result<Option<(RepoState<'static>, T)>> 47 + where 48 + F: FnOnce(&mut RepoState, (&[u8], &mut fjall::OwnedWriteBatch)) -> Result<(bool, T)> 49 + + Send 50 + + 'static, 51 + T: Send + 'static, 52 + { 53 + let mut batch = self.inner.batch(); 54 + let repos = self.repos.clone(); 55 + let did = did.clone().into_static(); 56 + 57 + tokio::task::spawn_blocking(move || { 58 + let Some((state, t)) = Self::update_repo_state(&mut batch, &repos, &did, f)? else { 59 + return Ok(None); 60 + }; 61 + batch.commit().into_diagnostic()?; 62 + Ok(Some((state, t))) 63 + }) 64 + .await 65 + .into_diagnostic()? 66 + } 67 + 68 + pub(crate) fn repo_gauge_state( 69 + repo_state: &RepoState, 70 + resync_bytes: Option<&[u8]>, 71 + ) -> GaugeState { 72 + match repo_state.status { 73 + RepoStatus::Synced => GaugeState::Synced, 74 + RepoStatus::Error(_) 75 + | RepoStatus::Deactivated 76 + | RepoStatus::Takendown 77 + | RepoStatus::Suspended 78 + | RepoStatus::Deleted 79 + | RepoStatus::Desynchronized 80 + | RepoStatus::Throttled => resync_bytes 81 + .and_then(|b| rmp_serde::from_slice::<ResyncState>(b).ok()) 82 + .and_then(|s| match s { 83 + ResyncState::Error { kind, .. } => Some(GaugeState::Resync(Some(kind))), 84 + _ => None, 85 + }) 86 + .unwrap_or(GaugeState::Resync(None)), 87 + } 88 + } 89 + } 90 + 91 + pub fn set_record_count( 92 + batch: &mut OwnedWriteBatch, 93 + db: &Db, 94 + did: &Did<'_>, 95 + collection: &str, 96 + count: u64, 97 + ) { 98 + let key = keys::count_collection_key(did, collection); 99 + batch.insert(&db.counts, key, count.to_be_bytes()); 100 + } 101 + 102 + pub fn update_record_count( 103 + batch: &mut OwnedWriteBatch, 104 + db: &Db, 105 + did: &Did<'_>, 106 + collection: &str, 107 + delta: i64, 108 + ) -> Result<()> { 109 + let key = keys::count_collection_key(did, collection); 110 + let count = db 111 + .counts 112 + .get(&key) 113 + .into_diagnostic()? 114 + .map(|v| -> Result<_> { 115 + Ok(u64::from_be_bytes( 116 + v.as_ref() 117 + .try_into() 118 + .into_diagnostic() 119 + .wrap_err("expected to be count (8 bytes)")?, 120 + )) 121 + }) 122 + .transpose()? 123 + .unwrap_or(0); 124 + let new_count = if delta >= 0 { 125 + count.saturating_add(delta as u64) 126 + } else { 127 + count.saturating_sub(delta.unsigned_abs()) 128 + }; 129 + batch.insert(&db.counts, key, new_count.to_be_bytes()); 130 + Ok(()) 131 + } 132 + 133 + pub fn get_record_count(db: &Db, did: &Did<'_>, collection: &str) -> Result<u64> { 134 + let key = keys::count_collection_key(did, collection); 135 + let count = db 136 + .counts 137 + .get(&key) 138 + .into_diagnostic()? 139 + .map(|v| -> Result<_> { 140 + Ok(u64::from_be_bytes( 141 + v.as_ref() 142 + .try_into() 143 + .into_diagnostic() 144 + .wrap_err("expected to be count (8 bytes)")?, 145 + )) 146 + }) 147 + .transpose()?; 148 + Ok(count.unwrap_or(0)) 149 + } 150 + 151 + pub fn load_persisted_crawler_sources( 152 + db: &crate::db::Db, 153 + ) -> Result<Vec<crate::config::CrawlerSource>> { 154 + use crate::db::keys::CRAWLER_SOURCE_PREFIX; 155 + 156 + let mut sources = Vec::new(); 157 + for entry in db.crawler.prefix(CRAWLER_SOURCE_PREFIX) { 158 + let (key, val) = entry.into_inner().into_diagnostic()?; 159 + let url_bytes = &key[CRAWLER_SOURCE_PREFIX.len()..]; 160 + let url_str = std::str::from_utf8(url_bytes).into_diagnostic()?; 161 + let url = Url::parse(url_str).into_diagnostic()?; 162 + let mode: crate::config::CrawlerMode = rmp_serde::from_slice(&val).into_diagnostic()?; 163 + sources.push(crate::config::CrawlerSource { url, mode }); 164 + } 165 + Ok(sources) 166 + }
+165
src/db/keys/indexer.rs
··· 1 + use jacquard_common::types::string::Did; 2 + use smol_str::SmolStr; 3 + 4 + use super::SEP; 5 + use crate::db::types::{DbRkey, DbTid, TrimmedDid}; 6 + 7 + pub const EVENT_WATERMARK_PREFIX: &[u8] = b"ewm|"; 8 + 9 + pub fn pending_key(id: u64) -> [u8; 8] { 10 + id.to_be_bytes() 11 + } 12 + 13 + pub fn event_watermark_key(timestamp_secs: u64) -> Vec<u8> { 14 + let mut key = Vec::with_capacity(EVENT_WATERMARK_PREFIX.len() + 8); 15 + key.extend_from_slice(EVENT_WATERMARK_PREFIX); 16 + key.extend_from_slice(&timestamp_secs.to_be_bytes()); 17 + key 18 + } 19 + 20 + // prefix format: {DID}| (DID trimmed) 21 + pub fn record_prefix_did(did: &Did) -> Vec<u8> { 22 + let repo = TrimmedDid::from(did); 23 + let mut prefix = Vec::with_capacity(repo.len() + 1); 24 + repo.write_to_vec(&mut prefix); 25 + prefix.push(SEP); 26 + prefix 27 + } 28 + 29 + // prefix format: {DID}|{collection}| 30 + pub fn record_prefix_collection(did: &Did, collection: &str) -> Vec<u8> { 31 + let repo = TrimmedDid::from(did); 32 + let mut prefix = Vec::with_capacity(repo.len() + 1 + collection.len() + 1); 33 + repo.write_to_vec(&mut prefix); 34 + prefix.push(SEP); 35 + prefix.extend_from_slice(collection.as_bytes()); 36 + prefix.push(SEP); 37 + prefix 38 + } 39 + 40 + // key format: {DID}|{collection}|{rkey} 41 + pub fn record_key(did: &Did, collection: &str, rkey: &DbRkey) -> Vec<u8> { 42 + let repo = TrimmedDid::from(did); 43 + let mut key = Vec::with_capacity(repo.len() + 1 + collection.len() + 1 + rkey.len() + 1); 44 + repo.write_to_vec(&mut key); 45 + key.push(SEP); 46 + key.extend_from_slice(collection.as_bytes()); 47 + key.push(SEP); 48 + write_rkey(&mut key, rkey); 49 + key 50 + } 51 + 52 + pub fn write_rkey(buf: &mut Vec<u8>, rkey: &DbRkey) { 53 + match rkey { 54 + DbRkey::Tid(tid) => { 55 + buf.push(b't'); 56 + buf.extend_from_slice(tid.as_bytes()); 57 + } 58 + DbRkey::Str(s) => { 59 + buf.push(b's'); 60 + buf.extend_from_slice(s.as_bytes()); 61 + } 62 + } 63 + } 64 + 65 + pub fn parse_rkey(raw: &[u8]) -> miette::Result<DbRkey> { 66 + let Some(kind) = raw.first() else { 67 + miette::bail!("record key is empty"); 68 + }; 69 + let rkey = match kind { 70 + b't' => { 71 + DbRkey::Tid(DbTid::new_from_bytes(raw[1..].try_into().map_err(|e| { 72 + miette::miette!("record key '{raw:?}' is invalid: {e}") 73 + })?)) 74 + } 75 + b's' => DbRkey::Str(SmolStr::new( 76 + std::str::from_utf8(&raw[1..]) 77 + .map_err(|e| miette::miette!("record key '{raw:?}' is invalid: {e}"))?, 78 + )), 79 + _ => miette::bail!("invalid record key kind: {}", *kind as char), 80 + }; 81 + Ok(rkey) 82 + } 83 + 84 + // key format: r|{DID}|{collection} (DID trimmed) 85 + pub fn count_collection_key(did: &Did, collection: &str) -> Vec<u8> { 86 + let mut key = super::did_collection_prefix(did); 87 + key.extend_from_slice(collection.as_bytes()); 88 + key 89 + } 90 + 91 + // key format: {DID}|{rev} 92 + pub fn resync_buffer_key(did: &Did, rev: DbTid) -> Vec<u8> { 93 + let repo = TrimmedDid::from(did); 94 + let mut key = Vec::with_capacity(repo.len() + 1 + 8); 95 + repo.write_to_vec(&mut key); 96 + key.push(SEP); 97 + key.extend_from_slice(&rev.as_bytes()); 98 + key 99 + } 100 + 101 + // prefix format: {DID}| (DID trimmed) 102 + pub fn resync_buffer_prefix(did: &Did) -> Vec<u8> { 103 + let repo = TrimmedDid::from(did); 104 + let mut prefix = Vec::with_capacity(repo.len() + 1); 105 + repo.write_to_vec(&mut prefix); 106 + prefix.push(SEP); 107 + prefix 108 + } 109 + 110 + /// key format: `ret|<did bytes>` 111 + pub const CRAWLER_RETRY_PREFIX: &[u8] = b"ret|"; 112 + 113 + pub fn crawler_retry_key(did: &Did) -> Vec<u8> { 114 + let repo = TrimmedDid::from(did); 115 + let mut key = Vec::with_capacity(CRAWLER_RETRY_PREFIX.len() + repo.len()); 116 + key.extend_from_slice(CRAWLER_RETRY_PREFIX); 117 + repo.write_to_vec(&mut key); 118 + key 119 + } 120 + 121 + pub fn crawler_retry_parse_key(key: &[u8]) -> miette::Result<TrimmedDid<'_>> { 122 + TrimmedDid::try_from(&key[CRAWLER_RETRY_PREFIX.len()..]) 123 + } 124 + 125 + pub const CRAWLER_CURSOR_PREFIX: &[u8] = b"crawler_cursor|"; 126 + 127 + pub fn crawler_cursor_key(relay: &str) -> Vec<u8> { 128 + let mut key = CRAWLER_CURSOR_PREFIX.to_vec(); 129 + key.extend_from_slice(relay.as_bytes()); 130 + key 131 + } 132 + 133 + pub const BY_COLLECTION_CURSOR_PREFIX: &[u8] = b"by_collection_cursor|"; 134 + 135 + /// prefix for all by-collection cursors belonging to a given index URL. 136 + pub fn by_collection_cursor_prefix(url: &str) -> Vec<u8> { 137 + let mut prefix = BY_COLLECTION_CURSOR_PREFIX.to_vec(); 138 + prefix.extend_from_slice(url.as_bytes()); 139 + prefix.push(SEP); 140 + prefix 141 + } 142 + 143 + pub fn by_collection_cursor_key(url: &str, collection: &str) -> Vec<u8> { 144 + let mut key = by_collection_cursor_prefix(url); 145 + key.extend_from_slice(collection.as_bytes()); 146 + key 147 + } 148 + 149 + pub const CRAWLER_SOURCE_PREFIX: &[u8] = b"src|"; 150 + 151 + pub fn crawler_source_key(url: &str) -> Vec<u8> { 152 + let mut key = Vec::with_capacity(CRAWLER_SOURCE_PREFIX.len() + url.len()); 153 + key.extend_from_slice(CRAWLER_SOURCE_PREFIX); 154 + key.extend_from_slice(url.as_bytes()); 155 + key 156 + } 157 + 158 + // key format: {collection}|{cid_bytes} 159 + pub fn block_key(collection: &str, cid: &[u8]) -> Vec<u8> { 160 + let mut key = Vec::with_capacity(collection.len() + 1 + cid.len()); 161 + key.extend_from_slice(collection.as_bytes()); 162 + key.push(SEP); 163 + key.extend_from_slice(cid); 164 + key 165 + }
+8 -166
src/db/keys/mod.rs
··· 1 1 use jacquard_common::types::string::Did; 2 - use smol_str::SmolStr; 3 2 4 - use crate::db::types::{DbRkey, DbTid, TrimmedDid}; 3 + use crate::db::types::TrimmedDid; 5 4 6 5 pub mod v1; 7 6 ··· 11 10 pub const SEP: u8 = b'|'; 12 11 13 12 #[cfg(feature = "indexer")] 14 - pub const EVENT_WATERMARK_PREFIX: &[u8] = b"ewm|"; 13 + pub mod indexer; 14 + #[cfg(feature = "indexer")] 15 + pub use indexer::*; 15 16 16 17 #[cfg(feature = "relay")] 17 18 pub const RELAY_EVENT_WATERMARK_PREFIX: &[u8] = b"rwm|"; ··· 35 36 vec 36 37 } 37 38 38 - pub fn pending_key(id: u64) -> [u8; 8] { 39 - id.to_be_bytes() 40 - } 41 - 42 - #[cfg(feature = "indexer")] 43 - pub fn event_watermark_key(timestamp_secs: u64) -> Vec<u8> { 44 - let mut key = Vec::with_capacity(EVENT_WATERMARK_PREFIX.len() + 8); 45 - key.extend_from_slice(EVENT_WATERMARK_PREFIX); 46 - key.extend_from_slice(&timestamp_secs.to_be_bytes()); 47 - key 39 + #[cfg(feature = "relay")] 40 + /// key format: {SEQ} (u64 big-endian), mirroring event_key 41 + pub fn relay_event_key(seq: u64) -> [u8; 8] { 42 + seq.to_be_bytes() 48 43 } 49 44 50 45 #[cfg(feature = "relay")] ··· 55 50 key 56 51 } 57 52 58 - // prefix format: {DID}| (DID trimmed) 59 - pub fn record_prefix_did(did: &Did) -> Vec<u8> { 60 - let repo = TrimmedDid::from(did); 61 - let mut prefix = Vec::with_capacity(repo.len() + 1); 62 - repo.write_to_vec(&mut prefix); 63 - prefix.push(SEP); 64 - prefix 65 - } 66 - 67 - // prefix format: {DID}|{collection}| 68 - pub fn record_prefix_collection(did: &Did, collection: &str) -> Vec<u8> { 69 - let repo = TrimmedDid::from(did); 70 - let mut prefix = Vec::with_capacity(repo.len() + 1 + collection.len() + 1); 71 - repo.write_to_vec(&mut prefix); 72 - prefix.push(SEP); 73 - prefix.extend_from_slice(collection.as_bytes()); 74 - prefix.push(SEP); 75 - prefix 76 - } 77 - 78 - // key format: {DID}|{collection}|{rkey} 79 - pub fn record_key(did: &Did, collection: &str, rkey: &DbRkey) -> Vec<u8> { 80 - let repo = TrimmedDid::from(did); 81 - let mut key = Vec::with_capacity(repo.len() + 1 + collection.len() + 1 + rkey.len() + 1); 82 - repo.write_to_vec(&mut key); 83 - key.push(SEP); 84 - key.extend_from_slice(collection.as_bytes()); 85 - key.push(SEP); 86 - write_rkey(&mut key, rkey); 87 - key 88 - } 89 - 90 - pub fn write_rkey(buf: &mut Vec<u8>, rkey: &DbRkey) { 91 - match rkey { 92 - DbRkey::Tid(tid) => { 93 - buf.push(b't'); 94 - buf.extend_from_slice(tid.as_bytes()); 95 - } 96 - DbRkey::Str(s) => { 97 - buf.push(b's'); 98 - buf.extend_from_slice(s.as_bytes()); 99 - } 100 - } 101 - } 102 - 103 - pub fn parse_rkey(raw: &[u8]) -> miette::Result<DbRkey> { 104 - let Some(kind) = raw.first() else { 105 - miette::bail!("record key is empty"); 106 - }; 107 - let rkey = match kind { 108 - b't' => { 109 - DbRkey::Tid(DbTid::new_from_bytes(raw[1..].try_into().map_err(|e| { 110 - miette::miette!("record key '{raw:?}' is invalid: {e}") 111 - })?)) 112 - } 113 - b's' => DbRkey::Str(SmolStr::new( 114 - std::str::from_utf8(&raw[1..]) 115 - .map_err(|e| miette::miette!("record key '{raw:?}' is invalid: {e}"))?, 116 - )), 117 - _ => miette::bail!("invalid record key kind: {}", *kind as char), 118 - }; 119 - Ok(rkey) 120 - } 121 - 122 53 // key format: {SEQ} 123 54 pub fn event_key(seq: u64) -> [u8; 8] { 124 55 seq.to_be_bytes() ··· 146 77 key 147 78 } 148 79 149 - // key format: r|{DID}|{collection} (DID trimmed) 150 - pub fn count_collection_key(did: &Did, collection: &str) -> Vec<u8> { 151 - let mut key = did_collection_prefix(did); 152 - key.extend_from_slice(collection.as_bytes()); 153 - key 154 - } 155 - 156 - // key format: {DID}|{rev} 157 - pub fn resync_buffer_key(did: &Did, rev: DbTid) -> Vec<u8> { 158 - let repo = TrimmedDid::from(did); 159 - let mut key = Vec::with_capacity(repo.len() + 1 + 8); 160 - repo.write_to_vec(&mut key); 161 - key.push(SEP); 162 - key.extend_from_slice(&rev.as_bytes()); 163 - key 164 - } 165 - 166 - // prefix format: {DID}| (DID trimmed) 167 - pub fn resync_buffer_prefix(did: &Did) -> Vec<u8> { 168 - let repo = TrimmedDid::from(did); 169 - let mut prefix = Vec::with_capacity(repo.len() + 1); 170 - repo.write_to_vec(&mut prefix); 171 - prefix.push(SEP); 172 - prefix 173 - } 174 - 175 - /// key format: `ret|<did bytes>` 176 - pub const CRAWLER_RETRY_PREFIX: &[u8] = b"ret|"; 177 - 178 - pub fn crawler_retry_key(did: &Did) -> Vec<u8> { 179 - let repo = TrimmedDid::from(did); 180 - let mut key = Vec::with_capacity(CRAWLER_RETRY_PREFIX.len() + repo.len()); 181 - key.extend_from_slice(CRAWLER_RETRY_PREFIX); 182 - repo.write_to_vec(&mut key); 183 - key 184 - } 185 - 186 - pub fn crawler_retry_parse_key(key: &[u8]) -> miette::Result<TrimmedDid<'_>> { 187 - TrimmedDid::try_from(&key[CRAWLER_RETRY_PREFIX.len()..]) 188 - } 189 - 190 - pub const CRAWLER_CURSOR_PREFIX: &[u8] = b"crawler_cursor|"; 191 - 192 - pub fn crawler_cursor_key(relay: &str) -> Vec<u8> { 193 - let mut key = CRAWLER_CURSOR_PREFIX.to_vec(); 194 - key.extend_from_slice(relay.as_bytes()); 195 - key 196 - } 197 - 198 - pub const BY_COLLECTION_CURSOR_PREFIX: &[u8] = b"by_collection_cursor|"; 199 - 200 - /// prefix for all by-collection cursors belonging to a given index URL. 201 - pub fn by_collection_cursor_prefix(url: &str) -> Vec<u8> { 202 - let mut prefix = BY_COLLECTION_CURSOR_PREFIX.to_vec(); 203 - prefix.extend_from_slice(url.as_bytes()); 204 - prefix.push(SEP); 205 - prefix 206 - } 207 - 208 - pub fn by_collection_cursor_key(url: &str, collection: &str) -> Vec<u8> { 209 - let mut key = by_collection_cursor_prefix(url); 210 - key.extend_from_slice(collection.as_bytes()); 211 - key 212 - } 213 - 214 - pub const CRAWLER_SOURCE_PREFIX: &[u8] = b"src|"; 215 - 216 - pub fn crawler_source_key(url: &str) -> Vec<u8> { 217 - let mut key = Vec::with_capacity(CRAWLER_SOURCE_PREFIX.len() + url.len()); 218 - key.extend_from_slice(CRAWLER_SOURCE_PREFIX); 219 - key.extend_from_slice(url.as_bytes()); 220 - key 221 - } 222 - 223 80 pub const SEED_CURSOR_PREFIX: &[u8] = b"seed_cursor|"; 224 81 225 82 pub fn seed_cursor_key(url: &str) -> Vec<u8> { ··· 242 99 pub fn pds_account_count_key(host: &str) -> String { 243 100 format!("p|{host}") 244 101 } 245 - 246 - #[cfg(feature = "relay")] 247 - /// key format: {SEQ} (u64 big-endian), mirroring event_key 248 - pub fn relay_event_key(seq: u64) -> [u8; 8] { 249 - seq.to_be_bytes() 250 - } 251 - 252 - // key format: {collection}|{cid_bytes} 253 - pub fn block_key(collection: &str, cid: &[u8]) -> Vec<u8> { 254 - let mut key = Vec::with_capacity(collection.len() + 1 + cid.len()); 255 - key.extend_from_slice(collection.as_bytes()); 256 - key.push(SEP); 257 - key.extend_from_slice(cid); 258 - key 259 - }
+85 -212
src/db/mod.rs
··· 10 10 use fjall::{ 11 11 CompressionType, Database, Keyspace, KeyspaceCreateOptions, OwnedWriteBatch, PersistMode, Slice, 12 12 }; 13 - use jacquard_common::IntoStatic; 14 - use jacquard_common::types::string::Did; 15 13 use lsm_tree::compaction::Factory; 16 14 use miette::{Context, IntoDiagnostic, Result}; 17 15 use scc::HashMap; ··· 43 41 pub inner: Arc<Database>, 44 42 pub path: std::path::PathBuf, 45 43 pub repos: Keyspace, 44 + pub repo_metadata: Keyspace, 45 + pub cursors: Keyspace, 46 + pub counts: Keyspace, 47 + pub filter: Keyspace, 48 + pub crawler: Keyspace, 49 + #[cfg(feature = "indexer")] 46 50 pub records: Keyspace, 51 + #[cfg(feature = "indexer")] 47 52 pub blocks: Keyspace, 48 - pub cursors: Keyspace, 53 + #[cfg(feature = "indexer")] 49 54 pub pending: Keyspace, 55 + #[cfg(feature = "indexer")] 50 56 pub resync: Keyspace, 57 + #[cfg(feature = "indexer")] 51 58 pub resync_buffer: Keyspace, 52 - pub repo_metadata: Keyspace, 59 + #[cfg(feature = "indexer")] 53 60 pub events: Keyspace, 54 - pub counts: Keyspace, 55 - pub filter: Keyspace, 56 - pub crawler: Keyspace, 57 61 #[cfg(feature = "backlinks")] 58 62 pub backlinks: Keyspace, 59 63 #[cfg(feature = "indexer")] ··· 69 73 pub counts_map: HashMap<SmolStr, u64>, 70 74 } 71 75 76 + #[cfg(feature = "indexer")] 72 77 macro_rules! update_gauge_diff_impl { 73 78 ($self:ident, $old:ident, $new:ident, $update_method:ident $(, $await:tt)?) => {{ 74 79 use crate::types::GaugeState; ··· 226 231 // did plc are random so the interval wont rlly matter 227 232 .data_block_restart_interval_policy(RestartIntervalPolicy::new([2, 4])), 228 233 )?; 234 + #[cfg(feature = "indexer")] 229 235 let pending = open_ks( 230 236 "pending", 231 237 opts() ··· 239 245 // ids are sequential and share prefix so we can use large interval to save space 240 246 .data_block_restart_interval_policy(RestartIntervalPolicy::all(64)), 241 247 )?; 248 + #[cfg(feature = "indexer")] 242 249 let resync = open_ks( 243 250 "resync", 244 251 opts() ··· 253 260 .data_block_restart_interval_policy(RestartIntervalPolicy::all(4)), 254 261 )?; 255 262 // this is used in non-ephemeral mode 263 + #[cfg(feature = "indexer")] 256 264 let blocks = open_ks( 257 265 "blocks", 258 266 opts() ··· 276 284 ])) 277 285 .data_block_restart_interval_policy(RestartIntervalPolicy::new([8, 16, 32])), 278 286 )?; 287 + #[cfg(feature = "indexer")] 279 288 let records = open_ks( 280 289 "records", 281 290 opts() ··· 302 311 .data_block_compression_policy(CompressionPolicy::disabled()) 303 312 .data_block_restart_interval_policy(RestartIntervalPolicy::all(1)), 304 313 )?; 314 + #[cfg(feature = "indexer")] 305 315 let resync_buffer = open_ks( 306 316 "resync_buffer", 307 317 opts() ··· 313 323 .data_block_compression_policy(CompressionPolicy::disabled()) 314 324 .data_block_restart_interval_policy(RestartIntervalPolicy::all(16)), 315 325 )?; 326 + #[cfg(feature = "indexer")] 316 327 let events = open_ks( 317 328 "events", 318 329 opts() ··· 432 443 inner: db, 433 444 path: cfg.database_path.clone(), 434 445 repos, 446 + repo_metadata, 447 + #[cfg(feature = "indexer")] 435 448 records, 449 + #[cfg(feature = "indexer")] 436 450 blocks, 437 451 cursors, 452 + #[cfg(feature = "indexer")] 438 453 pending, 454 + #[cfg(feature = "indexer")] 439 455 resync, 456 + #[cfg(feature = "indexer")] 440 457 resync_buffer, 441 - repo_metadata, 458 + #[cfg(feature = "indexer")] 442 459 events, 443 460 counts, 444 461 filter, ··· 510 527 511 528 pub fn train_dict(&self, ks_name: &str) -> Result<()> { 512 529 let ks = match ks_name { 530 + #[cfg(feature = "indexer")] 513 531 "blocks" => &self.blocks, 532 + #[cfg(feature = "indexer")] 514 533 "events" => &self.events, 515 534 "repos" => &self.repos, 516 535 #[cfg(feature = "backlinks")] ··· 527 546 }; 528 547 529 548 let samples: Vec<Vec<u8>> = if ks_name == "blocks" { 530 - // sample up to 200 data blocks per collection, discovered lazily in the predicate 531 - let per_collection_limit = 200usize; 532 - let collection_counts: RefCell<std::collections::HashMap<Vec<u8>, usize>> = 533 - RefCell::new(std::collections::HashMap::new()); 549 + #[cfg(not(feature = "indexer"))] 550 + miette::bail!("indexer feature required for blocks keyspace training"); 551 + 552 + #[cfg(feature = "indexer")] 553 + { 554 + // sample up to 200 data blocks per collection, discovered lazily in the predicate 555 + let per_collection_limit = 200usize; 556 + let collection_counts: RefCell<std::collections::HashMap<Vec<u8>, usize>> = 557 + RefCell::new(std::collections::HashMap::new()); 534 558 535 - let new = ks 536 - .sample_data_blocks(5000, |first, _last| { 537 - let Some(sep_idx) = first.iter().position(|&b| b == keys::SEP) else { 538 - return false; 539 - }; 540 - let mut counts = collection_counts.borrow_mut(); 541 - let count = counts.entry(first[..sep_idx].to_vec()).or_insert(0); 542 - if *count >= per_collection_limit { 543 - return false; 544 - } 545 - *count += 1; 546 - true 547 - }) 548 - .into_diagnostic()?; 559 + let new = ks 560 + .sample_data_blocks(5000, |first, _last| { 561 + let Some(sep_idx) = first.iter().position(|&b| b == keys::SEP) else { 562 + return false; 563 + }; 564 + let mut counts = collection_counts.borrow_mut(); 565 + let count = counts.entry(first[..sep_idx].to_vec()).or_insert(0); 566 + if *count >= per_collection_limit { 567 + return false; 568 + } 569 + *count += 1; 570 + true 571 + }) 572 + .into_diagnostic()?; 549 573 550 - new.into_iter().map(|s| s.to_vec()).collect() 574 + new.into_iter().map(|s| s.to_vec()).collect() 575 + } 551 576 } else { 552 577 let mut seen_keys = HashSet::new(); 553 578 let captured_keys = RefCell::new(Vec::new()); ··· 605 630 .await 606 631 .into_diagnostic()? 607 632 }; 608 - tokio::try_join!( 633 + 634 + let mut tasks = vec![ 609 635 compact(self.repos.clone()), 610 - compact(self.records.clone()), 611 - compact(self.blocks.clone()), 612 636 compact(self.cursors.clone()), 613 - compact(self.pending.clone()), 614 - compact(self.resync.clone()), 615 - compact(self.resync_buffer.clone()), 616 637 compact(self.repo_metadata.clone()), 617 - compact(self.events.clone()), 618 638 compact(self.counts.clone()), 619 639 compact(self.filter.clone()), 620 640 compact(self.crawler.clone()), 621 - )?; 641 + ]; 642 + 643 + #[cfg(feature = "indexer")] 644 + { 645 + tasks.push(compact(self.records.clone())); 646 + tasks.push(compact(self.blocks.clone())); 647 + tasks.push(compact(self.pending.clone())); 648 + tasks.push(compact(self.resync.clone())); 649 + tasks.push(compact(self.resync_buffer.clone())); 650 + tasks.push(compact(self.events.clone())); 651 + } 652 + 622 653 #[cfg(feature = "relay")] 623 - compact(self.relay_events.clone()).await?; 654 + tasks.push(compact(self.relay_events.clone())); 655 + 624 656 #[cfg(feature = "backlinks")] 625 - compact(self.backlinks.clone()).await?; 657 + tasks.push(compact(self.backlinks.clone())); 658 + 659 + futures::future::try_join_all(tasks).await?; 660 + 626 661 Ok(()) 627 662 } 628 663 ··· 654 689 .into_diagnostic()? 655 690 } 656 691 692 + #[allow(dead_code)] 657 693 pub async fn contains_key(ks: Keyspace, key: impl Into<Slice>) -> Result<bool> { 658 694 let key = key.into(); 659 695 tokio::task::spawn_blocking(move || ks.contains_key(key).into_diagnostic()) ··· 711 747 .await 712 748 .unwrap_or(0) 713 749 } 750 + } 714 751 715 - pub(crate) fn update_gauge_diff( 716 - &self, 717 - old: &crate::types::GaugeState, 718 - new: &crate::types::GaugeState, 719 - ) { 720 - update_gauge_diff_impl!(self, old, new, update_count); 721 - } 752 + #[cfg(feature = "indexer")] 753 + mod indexer; 722 754 723 - pub(crate) async fn update_gauge_diff_async( 724 - &self, 725 - old: &crate::types::GaugeState, 726 - new: &crate::types::GaugeState, 727 - ) { 728 - update_gauge_diff_impl!(self, old, new, update_count_async, await); 729 - } 755 + #[cfg(feature = "indexer")] 756 + pub use indexer::*; 730 757 731 - pub(crate) fn update_repo_state<F, T>( 732 - batch: &mut OwnedWriteBatch, 733 - repos: &Keyspace, 734 - did: &Did<'_>, 735 - f: F, 736 - ) -> Result<Option<(RepoState<'static>, T)>> 737 - where 738 - F: FnOnce(&mut RepoState, (&[u8], &mut fjall::OwnedWriteBatch)) -> Result<(bool, T)>, 739 - { 740 - let key = keys::repo_key(did); 741 - if let Some(bytes) = repos.get(&key).into_diagnostic()? { 742 - let mut state: RepoState = deser_repo_state(bytes.as_ref())?.into_static(); 743 - let (changed, result) = f(&mut state, (key.as_slice(), batch))?; 744 - if changed { 745 - batch.insert(repos, key, ser_repo_state(&state)?); 746 - } 747 - Ok(Some((state, result))) 748 - } else { 749 - Ok(None) 750 - } 751 - } 752 - 753 - pub(crate) async fn update_repo_state_async<F, T>( 754 - &self, 755 - did: &Did<'_>, 756 - f: F, 757 - ) -> Result<Option<(RepoState<'static>, T)>> 758 - where 759 - F: FnOnce(&mut RepoState, (&[u8], &mut fjall::OwnedWriteBatch)) -> Result<(bool, T)> 760 - + Send 761 - + 'static, 762 - T: Send + 'static, 763 - { 764 - let mut batch = self.inner.batch(); 765 - let repos = self.repos.clone(); 766 - let did = did.clone().into_static(); 767 - 768 - tokio::task::spawn_blocking(move || { 769 - let Some((state, t)) = Self::update_repo_state(&mut batch, &repos, &did, f)? else { 770 - return Ok(None); 771 - }; 772 - batch.commit().into_diagnostic()?; 773 - Ok(Some((state, t))) 774 - }) 775 - .await 776 - .into_diagnostic()? 777 - } 778 - 779 - pub(crate) fn repo_gauge_state( 780 - repo_state: &RepoState, 781 - resync_bytes: Option<&[u8]>, 782 - ) -> crate::types::GaugeState { 783 - match repo_state.status { 784 - crate::types::RepoStatus::Synced => crate::types::GaugeState::Synced, 785 - crate::types::RepoStatus::Error(_) 786 - | crate::types::RepoStatus::Deactivated 787 - | crate::types::RepoStatus::Takendown 788 - | crate::types::RepoStatus::Suspended 789 - | crate::types::RepoStatus::Deleted 790 - | crate::types::RepoStatus::Desynchronized 791 - | crate::types::RepoStatus::Throttled => { 792 - if let Some(resync_bytes) = resync_bytes { 793 - if let Ok(crate::types::ResyncState::Error { kind, .. }) = 794 - rmp_serde::from_slice::<crate::types::ResyncState>(resync_bytes) 795 - { 796 - crate::types::GaugeState::Resync(Some(kind)) 797 - } else { 798 - crate::types::GaugeState::Resync(None) 799 - } 800 - } else { 801 - crate::types::GaugeState::Resync(None) 802 - } 803 - } 804 - } 805 - } 758 + #[derive(serde::Serialize, serde::Deserialize, Default)] 759 + pub(crate) struct FirehoseSourceMeta { 760 + #[serde(default)] 761 + pub(crate) is_pds: bool, 806 762 } 807 763 808 764 pub fn set_firehose_cursor(db: &Db, relay: &Url, cursor: i64) -> Result<()> { ··· 818 774 let key = keys::firehose_cursor_key_from_url(relay); 819 775 Db::get(db.cursors.clone(), key) 820 776 .await? 821 - .map(|v| { 777 + .map(|v: Slice| { 822 778 Ok(i64::from_be_bytes( 823 779 v.as_ref() 824 780 .try_into() ··· 829 785 .transpose() 830 786 } 831 787 832 - pub fn ser_repo_metadata(state: &RepoMetadata) -> Result<Vec<u8>> { 788 + pub fn ser_repo_meta(state: &RepoMetadata) -> Result<Vec<u8>> { 833 789 rmp_serde::to_vec(&state).into_diagnostic() 834 790 } 835 791 836 - pub fn deser_repo_metadata(bytes: &[u8]) -> Result<RepoMetadata> { 792 + pub fn deser_repo_meta(bytes: &[u8]) -> Result<RepoMetadata> { 837 793 rmp_serde::from_slice(bytes).into_diagnostic() 838 794 } 839 795 ··· 873 829 batch.commit().into_diagnostic() 874 830 } 875 831 876 - pub fn set_record_count( 877 - batch: &mut OwnedWriteBatch, 878 - db: &Db, 879 - did: &Did<'_>, 880 - collection: &str, 881 - count: u64, 882 - ) { 883 - let key = keys::count_collection_key(did, collection); 884 - batch.insert(&db.counts, key, count.to_be_bytes()); 885 - } 886 - 887 - pub fn update_record_count( 888 - batch: &mut OwnedWriteBatch, 889 - db: &Db, 890 - did: &Did<'_>, 891 - collection: &str, 892 - delta: i64, 893 - ) -> Result<()> { 894 - let key = keys::count_collection_key(did, collection); 895 - let count = db 896 - .counts 897 - .get(&key) 898 - .into_diagnostic()? 899 - .map(|v| -> Result<_> { 900 - Ok(u64::from_be_bytes( 901 - v.as_ref() 902 - .try_into() 903 - .into_diagnostic() 904 - .wrap_err("expected to be count (8 bytes)")?, 905 - )) 906 - }) 907 - .transpose()? 908 - .unwrap_or(0); 909 - let new_count = if delta >= 0 { 910 - count.saturating_add(delta as u64) 911 - } else { 912 - count.saturating_sub(delta.unsigned_abs()) 913 - }; 914 - batch.insert(&db.counts, key, new_count.to_be_bytes()); 915 - Ok(()) 916 - } 917 - 918 - pub fn get_record_count(db: &Db, did: &Did<'_>, collection: &str) -> Result<u64> { 919 - let key = keys::count_collection_key(did, collection); 920 - let count = db 921 - .counts 922 - .get(&key) 923 - .into_diagnostic()? 924 - .map(|v| -> Result<_> { 925 - Ok(u64::from_be_bytes( 926 - v.as_ref() 927 - .try_into() 928 - .into_diagnostic() 929 - .wrap_err("expected to be count (8 bytes)")?, 930 - )) 931 - }) 932 - .transpose()?; 933 - Ok(count.unwrap_or(0)) 934 - } 935 - 936 - #[derive(serde::Serialize, serde::Deserialize, Default)] 937 - pub(crate) struct FirehoseSourceMeta { 938 - #[serde(default)] 939 - pub(crate) is_pds: bool, 940 - } 941 - 942 832 pub fn load_persisted_firehose_sources( 943 833 db: &crate::db::Db, 944 834 ) -> Result<Vec<crate::config::FirehoseSource>> { ··· 959 849 } 960 850 Ok(sources) 961 851 } 962 - 963 - pub fn load_persisted_crawler_sources( 964 - db: &crate::db::Db, 965 - ) -> Result<Vec<crate::config::CrawlerSource>> { 966 - use crate::db::keys::CRAWLER_SOURCE_PREFIX; 967 - 968 - let mut sources = Vec::new(); 969 - for entry in db.crawler.prefix(CRAWLER_SOURCE_PREFIX) { 970 - let (key, val) = entry.into_inner().into_diagnostic()?; 971 - let url_bytes = &key[CRAWLER_SOURCE_PREFIX.len()..]; 972 - let url_str = std::str::from_utf8(url_bytes).into_diagnostic()?; 973 - let url = Url::parse(url_str).into_diagnostic()?; 974 - let mode: crate::config::CrawlerMode = rmp_serde::from_slice(&val).into_diagnostic()?; 975 - sources.push(crate::config::CrawlerSource { url, mode }); 976 - } 977 - Ok(sources) 978 - }
+23 -16
src/filter.rs
··· 30 30 collections: Vec::new(), 31 31 } 32 32 } 33 + } 33 34 34 - pub fn matches_collection(&self, collection: &str) -> bool { 35 - if self.collections.is_empty() { 36 - return true; 35 + #[cfg(feature = "indexer")] 36 + mod indexer { 37 + use super::*; 38 + 39 + impl FilterConfig { 40 + pub fn matches_collection(&self, collection: &str) -> bool { 41 + if self.collections.is_empty() { 42 + return true; 43 + } 44 + self.collections.iter().any(|p| nsid_matches(p, collection)) 37 45 } 38 - self.collections.iter().any(|p| nsid_matches(p, collection)) 39 - } 46 + 47 + pub fn matches_signal(&self, collection: &str) -> bool { 48 + self.signals.iter().any(|p| nsid_matches(p, collection)) 49 + } 40 50 41 - pub fn matches_signal(&self, collection: &str) -> bool { 42 - self.signals.iter().any(|p| nsid_matches(p, collection)) 51 + pub fn check_signals(&self) -> bool { 52 + self.mode == FilterMode::Filter && !self.signals.is_empty() 53 + } 43 54 } 44 55 45 - pub fn check_signals(&self) -> bool { 46 - self.mode == FilterMode::Filter && !self.signals.is_empty() 56 + fn nsid_matches(pattern: &str, col: &str) -> bool { 57 + pattern 58 + .strip_suffix(".*") 59 + .map(|prefix| col == prefix || col.starts_with(prefix)) 60 + .unwrap_or_else(|| col == pattern) 47 61 } 48 62 } 49 - 50 - fn nsid_matches(pattern: &str, col: &str) -> bool { 51 - pattern 52 - .strip_suffix(".*") 53 - .map(|prefix| col == prefix || col.starts_with(prefix)) 54 - .unwrap_or_else(|| col == pattern) 55 - }
+1 -1
src/ingest/firehose.rs
··· 288 288 .get(&metadata_key) 289 289 .into_diagnostic()? 290 290 { 291 - let metadata = crate::db::deser_repo_metadata(bytes.as_ref())?; 291 + let metadata = crate::db::deser_repo_meta(bytes.as_ref())?; 292 292 293 293 if metadata.tracked { 294 294 trace!(did = %did, "tracked repo, processing");
+4 -4
src/ingest/indexer.rs
··· 1 1 use super::*; 2 - use crate::db::{self, keys, ser_repo_metadata}; 2 + use crate::db::{self, keys, ser_repo_meta}; 3 3 use crate::ingest::stream::{Account, Commit, Identity}; 4 4 use crate::ingest::validation; 5 5 use crate::resolver::{NoSigningKeyError, ResolverError}; ··· 434 434 let metadata_key = keys::repo_metadata_key(did); 435 435 let metadata_bytes = db.repo_metadata.get(&metadata_key).into_diagnostic()?; 436 436 let is_backfilling = if let Some(metadata_bytes) = metadata_bytes { 437 - let metadata = crate::db::deser_repo_metadata(metadata_bytes.as_ref())?; 437 + let metadata = crate::db::deser_repo_meta(metadata_bytes.as_ref())?; 438 438 db.pending 439 439 .get(keys::pending_key(metadata.index_id)) 440 440 .into_diagnostic()? ··· 616 616 .repo_metadata 617 617 .get(&meta_key) 618 618 .into_diagnostic()? 619 - .map(|b| crate::db::deser_repo_metadata(&b)) 619 + .map(|b| crate::db::deser_repo_meta(&b)) 620 620 .transpose()?; 621 621 let had_metadata = existing_metadata.is_some(); 622 622 let mut metadata = existing_metadata.unwrap_or_else(|| RepoMetadata { ··· 634 634 635 635 metadata.index_id = rand::random::<u64>(); 636 636 batch.insert(&db.pending, keys::pending_key(metadata.index_id), &repo_key); 637 - batch.insert(&db.repo_metadata, &meta_key, ser_repo_metadata(&metadata)?); 637 + batch.insert(&db.repo_metadata, &meta_key, ser_repo_meta(&metadata)?); 638 638 batch.commit().into_diagnostic()?; 639 639 640 640 if !was_pending {
+4 -1
src/ingest/relay.rs
··· 304 304 .. 305 305 } = validated; 306 306 307 + #[cfg(not(feature = "indexer"))] 308 + let _ = parsed_blocks; 309 + 307 310 if chain_break.is_broken() { 308 311 // chain breaks are not grounds for blocking when acting as a relay 309 312 debug!(broken = ?chain_break, "chain break, forwarding anyway"); ··· 782 785 .repo_metadata 783 786 .get(&metadata_key) 784 787 .into_diagnostic()? 785 - .map(|bytes| db::deser_repo_metadata(&bytes)) 788 + .map(|bytes| db::deser_repo_meta(&bytes)) 786 789 .transpose()?; 787 790 788 791 if metadata.map_or(false, |m| !m.tracked) {
+1
src/lib.rs
··· 13 13 pub(crate) mod backfill; 14 14 #[cfg(feature = "backlinks")] 15 15 pub(crate) mod backlinks; 16 + #[cfg(feature = "indexer")] 16 17 pub(crate) mod crawler; 17 18 pub(crate) mod db; 18 19 pub(crate) mod ingest;
+2 -2
src/ops.rs
··· 72 72 73 73 let metadata_bytes = db.repo_metadata.get(&metadata_key).into_diagnostic()?; 74 74 if let Some(metadata_bytes) = metadata_bytes { 75 - let metadata = db::deser_repo_metadata(&metadata_bytes)?; 75 + let metadata = db::deser_repo_meta(&metadata_bytes)?; 76 76 batch.remove(&db.pending, keys::pending_key(metadata.index_id)); 77 77 } 78 78 ··· 131 131 132 132 let metadata_bytes = db.repo_metadata.get(&metadata_key).into_diagnostic()?; 133 133 if let Some(metadata_bytes) = metadata_bytes { 134 - let metadata = db::deser_repo_metadata(&metadata_bytes)?; 134 + let metadata = db::deser_repo_meta(&metadata_bytes)?; 135 135 let pending_key = keys::pending_key(metadata.index_id); 136 136 137 137 // manage queues
+23 -1
src/state.rs
··· 6 6 use arc_swap::ArcSwap; 7 7 use miette::Result; 8 8 use smol_str::SmolStr; 9 - use tokio::sync::{Notify, watch}; 9 + #[cfg(feature = "indexer")] 10 + use tokio::sync::Notify; 11 + use tokio::sync::watch; 10 12 use url::Url; 11 13 12 14 use crate::{ ··· 27 29 pub(crate) pds_tiers: PdsTierHandle, 28 30 pub(crate) rate_tiers: HashMap<String, RateTier>, 29 31 pub firehose_cursors: scc::HashIndex<Url, AtomicI64>, 32 + #[cfg(feature = "indexer")] 30 33 pub backfill_notify: Notify, 34 + #[cfg(feature = "indexer")] 31 35 pub crawler_enabled: watch::Sender<bool>, 32 36 pub firehose_enabled: watch::Sender<bool>, 37 + #[cfg(feature = "indexer")] 33 38 pub backfill_enabled: watch::Sender<bool>, 34 39 pub ephemeral_ttl: Duration, 35 40 pub throttler: Throttler, ··· 41 46 let resolver = Resolver::new(config.plc_urls.clone(), config.identity_cache_size); 42 47 let filter_config = crate::db::filter::load(&db.filter)?; 43 48 49 + #[cfg(feature = "indexer")] 44 50 let crawler_default = match config.enable_crawler { 45 51 Some(b) => b, 46 52 // default: enabled if full-network mode, or if crawler sources are configured ··· 69 75 70 76 let relay_cursors = scc::HashIndex::new(); 71 77 78 + #[cfg(feature = "indexer")] 72 79 let (crawler_enabled, _) = watch::channel(crawler_default); 73 80 let (firehose_enabled, _) = watch::channel(config.enable_firehose); 81 + #[cfg(feature = "indexer")] 74 82 let (backfill_enabled, _) = watch::channel(true); 75 83 76 84 Ok(Self { ··· 80 88 pds_tiers, 81 89 rate_tiers: config.rate_tiers.clone(), 82 90 firehose_cursors: relay_cursors, 91 + #[cfg(feature = "indexer")] 83 92 backfill_notify: Notify::new(), 93 + #[cfg(feature = "indexer")] 84 94 crawler_enabled, 85 95 firehose_enabled, 96 + #[cfg(feature = "indexer")] 86 97 backfill_enabled, 87 98 ephemeral_ttl: config.ephemeral_ttl.clone(), 88 99 throttler: Throttler::new(), 89 100 }) 90 101 } 91 102 103 + #[cfg(feature = "indexer")] 92 104 pub fn notify_backfill(&self) { 93 105 self.backfill_notify.notify_one(); 94 106 } ··· 115 127 F: FnOnce() -> Fut, 116 128 Fut: Future<Output = T>, 117 129 { 130 + #[cfg(feature = "indexer")] 118 131 let crawler_was = *self.crawler_enabled.borrow(); 119 132 let firehose_was = *self.firehose_enabled.borrow(); 133 + #[cfg(feature = "indexer")] 120 134 let backfill_was = *self.backfill_enabled.borrow(); 135 + 136 + #[cfg(feature = "indexer")] 121 137 self.crawler_enabled.send_replace(false); 122 138 self.firehose_enabled.send_replace(false); 139 + #[cfg(feature = "indexer")] 123 140 self.backfill_enabled.send_replace(false); 141 + 124 142 let result = f().await; 143 + 144 + #[cfg(feature = "indexer")] 125 145 self.crawler_enabled.send_replace(crawler_was); 126 146 self.firehose_enabled.send_replace(firehose_was); 147 + #[cfg(feature = "indexer")] 127 148 self.backfill_enabled.send_replace(backfill_was); 149 + 128 150 result 129 151 } 130 152 }
+51 -42
src/types.rs
··· 169 169 } 170 170 } 171 171 172 - impl RepoMetadata { 173 - pub fn backfilling(index_id: u64) -> Self { 174 - Self { 175 - index_id, 176 - tracked: true, 172 + impl RepoMetadata {} 173 + 174 + #[cfg(feature = "indexer")] 175 + mod indexer { 176 + use super::*; 177 + 178 + impl RepoMetadata { 179 + pub fn backfilling(index_id: u64) -> Self { 180 + Self { 181 + index_id, 182 + tracked: true, 183 + } 184 + } 185 + } 186 + 187 + impl ResyncState { 188 + pub fn next_backoff(retry_count: u32) -> i64 { 189 + // exponential backoff: 1m, 2m, 4m, 8m... up to 1h 190 + let base = 60; 191 + let cap = 3600; 192 + let mult = 2u64.pow(retry_count.min(10)) as i64; 193 + let delay = (base * mult).min(cap); 194 + 195 + // add +/- 10% jitter 196 + let jitter = (rand::random::<f64>() * 0.2 - 0.1) * delay as f64; 197 + let delay = (delay as f64 + jitter) as i64; 198 + 199 + chrono::Utc::now().timestamp() + delay 200 + } 201 + } 202 + 203 + #[derive(Clone, Debug)] 204 + pub(crate) enum BroadcastEvent { 205 + #[allow(dead_code)] 206 + Persisted(u64), 207 + Ephemeral(Box<MarshallableEvt<'static>>), 208 + } 209 + 210 + #[derive(Debug, PartialEq, Eq, Clone, Copy)] 211 + pub(crate) enum GaugeState { 212 + Synced, 213 + Pending, 214 + Resync(Option<ResyncErrorKind>), 215 + } 216 + 217 + impl GaugeState { 218 + pub fn is_resync(&self) -> bool { 219 + matches!(self, GaugeState::Resync(_)) 177 220 } 178 221 } 179 222 } 223 + 224 + #[cfg(feature = "indexer")] 225 + pub(crate) use indexer::*; 180 226 181 227 impl<'i> RepoState<'i> { 182 228 pub fn backfilling() -> Self { ··· 250 296 }, 251 297 } 252 298 253 - impl ResyncState { 254 - pub fn next_backoff(retry_count: u32) -> i64 { 255 - // exponential backoff: 1m, 2m, 4m, 8m... up to 1h 256 - let base = 60; 257 - let cap = 3600; 258 - let mult = 2u64.pow(retry_count.min(10)) as i64; 259 - let delay = (base * mult).min(cap); 260 - 261 - // add +/- 10% jitter 262 - let jitter = (rand::random::<f64>() * 0.2 - 0.1) * delay as f64; 263 - let delay = (delay as f64 + jitter) as i64; 264 - 265 - chrono::Utc::now().timestamp() + delay 266 - } 267 - } 268 - 269 299 #[derive(Debug, Serialize, Clone)] 270 300 pub enum EventType { 271 301 Record, ··· 302 332 #[serde(borrow)] 303 333 #[serde(skip_serializing_if = "Option::is_none")] 304 334 pub account: Option<AccountEvt<'i>>, 305 - } 306 - 307 - #[cfg(feature = "indexer")] 308 - #[derive(Clone, Debug)] 309 - pub(crate) enum BroadcastEvent { 310 - #[allow(dead_code)] 311 - Persisted(u64), 312 - Ephemeral(Box<MarshallableEvt<'static>>), 313 335 } 314 336 315 337 #[derive(Debug, Serialize, Clone)] ··· 390 412 #[serde(default)] 391 413 #[serde(skip_serializing_if = "StoredData::is_nothing")] 392 414 pub data: StoredData, 393 - } 394 - 395 - #[derive(Debug, PartialEq, Eq, Clone, Copy)] 396 - pub(crate) enum GaugeState { 397 - Synced, 398 - Pending, 399 - Resync(Option<ResyncErrorKind>), 400 - } 401 - 402 - impl GaugeState { 403 - pub fn is_resync(&self) -> bool { 404 - matches!(self, GaugeState::Resync(_)) 405 - } 406 415 } 407 416 408 417 #[cfg(feature = "relay")]
+2
src/util/mod.rs
··· 1 + #![allow(dead_code)] 2 + 1 3 use std::{hash::Hash, time::Duration}; 2 4 3 5 use jacquard_common::{deps::fluent_uri, types::string::Handle};