lightweight com.atproto.sync.listReposByCollection
45
fork

Configure Feed

Select the types of activity you want to include in your feed.

high-level stats

phil 78dc0b7b 61c6d1cd

+456 -23
+10
Cargo.lock
··· 392 392 ] 393 393 394 394 [[package]] 395 + name = "cardinality-estimator-safe" 396 + version = "4.0.2" 397 + dependencies = [ 398 + "enum_dispatch", 399 + "serde", 400 + ] 401 + 402 + [[package]] 395 403 name = "cargo-platform" 396 404 version = "0.1.9" 397 405 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 2330 2338 "axum", 2331 2339 "base64", 2332 2340 "bytes", 2341 + "cardinality-estimator-safe", 2333 2342 "cid", 2334 2343 "clap", 2335 2344 "dashmap 5.5.3", ··· 2348 2357 "metrics-exporter-prometheus", 2349 2358 "mini-moka", 2350 2359 "n0-future", 2360 + "postcard", 2351 2361 "repo-stream", 2352 2362 "reqwest", 2353 2363 "rustls",
+2
Cargo.toml
··· 7 7 axum = "0.8.8" 8 8 base64 = "0.22.1" 9 9 bytes = "1" 10 + cardinality-estimator-safe = { path = "../cardinality-estimator", features = ["with_serde"] } 10 11 cid = { version = "0.11", default-features = false, features = ["alloc"] } 11 12 clap = { version = "4.5.60", features = ["derive", "env"] } 12 13 dashmap = "5" ··· 25 26 metrics-exporter-prometheus = { version = "0.18.1", features = ["http-listener"] } 26 27 mini-moka = "0.10" 27 28 n0-future = "0.1" 29 + postcard = { version = "1", features = ["alloc"] } 28 30 repo-stream = { version = "0.5.0-alpha.5", features = ["jacquard"] } 29 31 reqwest = { version = "0.12", default-features = false, features = ["rustls-tls"] } 30 32 rustls = { version = "0.23", default-features = false, features = ["aws-lc-rs"] }
+21
src/main.rs
··· 11 11 use lightrail::identity; 12 12 use lightrail::storage; 13 13 use lightrail::sync::{self, backfill, firehose, resync}; 14 + use lightrail::util::TokenExt; 14 15 15 16 #[derive(Parser, Debug)] 16 17 #[command(name = "lightrail", about = "listReposByCollection indexing service")] ··· 227 228 } 228 229 }); 229 230 231 + tasks.spawn({ 232 + let db = db.clone(); 233 + let token = token.clone(); 234 + async move { 235 + let mut interval = tokio::time::interval(Duration::from_secs(60)); 236 + interval.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Delay); 237 + while token.run(interval.tick()).await.is_some() { 238 + if let Err(e) = storage::meta::save(&db) { 239 + warn!(error = %e, "failed to periodically save meta stats"); 240 + } 241 + } 242 + info!("meta stats done."); 243 + Ok(()) 244 + } 245 + }); 246 + 230 247 if args.deep_crawl { 231 248 tasks.spawn({ 232 249 let token = token.clone(); ··· 270 287 if error.is_none() { 271 288 error = into_error(r); 272 289 } 290 + } 291 + 292 + if let Err(e) = storage::meta::save(&db) { 293 + warn!(error = %e, "failed to save meta stats on shutdown"); 273 294 } 274 295 275 296 error.map_or(Ok(()), Err)
+45 -13
src/server/admin.rs
··· 9 9 use serde_json::json; 10 10 use subtle::ConstantTimeEq; 11 11 12 - use crate::storage::{DbRef, backfill_progress, error::StorageError, repo, resync_queue}; 12 + use std::sync::atomic::Ordering; 13 + 14 + use crate::storage::{DbRef, backfill_progress, error::StorageError}; 13 15 14 16 use super::AdminConfig; 15 17 16 18 #[derive(Serialize)] 17 19 pub(super) struct AdminStatus { 18 - repos_synced: u64, 19 - resync_queue_depth: u64, 20 + // ── Persistent counters (survive restarts) ──────────────────────────── 21 + first_startup_secs: u64, 22 + startup_count: u64, 23 + repos_queued_total: u64, 24 + collection_births_total: u64, 25 + collection_deaths_total: u64, 26 + resyncs_completed_total: u64, 27 + resync_queue_depth: i64, 28 + resync_buffer_count: i64, 29 + // ── Cardinality estimates (HLL sketches, ~1.6% error at large N) ────── 30 + distinct_collections: usize, 31 + distinct_accounts_all: usize, 32 + distinct_accounts_resynced: usize, 33 + distinct_accounts_commit_strict: usize, 34 + distinct_accounts_commit_lenient: usize, 35 + distinct_accounts_desynced: usize, 36 + distinct_pds_hosts: usize, 37 + // ── Backfill progress (live from storage) ───────────────────────────── 20 38 upstream_backfill_complete: bool, 21 39 upstream_backfill_completed_at: Option<String>, 22 40 } ··· 65 83 } 66 84 67 85 let host = config.subscribe_host.clone(); 68 - let (repos_synced, queue_depth, backfill) = tokio::task::spawn_blocking(move || { 69 - let repos_synced = repo::count_repos(&db) as u64; 70 - let queue_depth = resync_queue::count_queued(&db) as u64; 71 - let backfill = backfill_progress::get(&db, &host)?; 72 - Ok::<_, StorageError>((repos_synced, queue_depth, backfill)) 73 - }) 74 - .await 75 - .unwrap()?; 86 + let db2 = db.clone(); 87 + let backfill = tokio::task::spawn_blocking(move || backfill_progress::get(&db2, &host)) 88 + .await 89 + .unwrap()?; 76 90 91 + let s = &db.stats; 77 92 Ok(Json(AdminStatus { 78 - repos_synced, 79 - resync_queue_depth: queue_depth, 93 + first_startup_secs: s.first_startup_secs.load(Ordering::Relaxed), 94 + startup_count: s.startup_count.load(Ordering::Relaxed), 95 + repos_queued_total: s.repos_queued_total.load(Ordering::Relaxed), 96 + collection_births_total: s.collection_births_total.load(Ordering::Relaxed), 97 + collection_deaths_total: s.collection_deaths_total.load(Ordering::Relaxed), 98 + resyncs_completed_total: s.resyncs_completed_total.load(Ordering::Relaxed), 99 + resync_queue_depth: s.resync_queue_depth.load(Ordering::Relaxed), 100 + resync_buffer_count: s.resync_buffer_count.load(Ordering::Relaxed), 101 + distinct_collections: s.sketch_collections.lock().unwrap().estimate(), 102 + distinct_accounts_all: s.sketch_accounts_all.lock().unwrap().estimate(), 103 + distinct_accounts_resynced: s.sketch_accounts_resynced.lock().unwrap().estimate(), 104 + distinct_accounts_commit_strict: s.sketch_accounts_commit_strict.lock().unwrap().estimate(), 105 + distinct_accounts_commit_lenient: s 106 + .sketch_accounts_commit_lenient 107 + .lock() 108 + .unwrap() 109 + .estimate(), 110 + distinct_accounts_desynced: s.sketch_accounts_desynced.lock().unwrap().estimate(), 111 + distinct_pds_hosts: s.sketch_pds_hosts.lock().unwrap().estimate(), 80 112 upstream_backfill_complete: backfill 81 113 .as_ref() 82 114 .and_then(|b| b.completed_at.as_ref())
+2 -1
src/storage/collection_index.rs
··· 16 16 use jacquard_common::types::string::Did; 17 17 use tracing::warn; 18 18 19 - use crate::storage::{DbRef, PREFIX_CBR, PREFIX_RBC, StorageResult, error::StorageError}; 19 + use crate::storage::{DbRef, PREFIX_CBR, PREFIX_RBC, StorageResult, error::StorageError, meta}; 20 20 21 21 const NUL: u8 = b'\0'; 22 22 ··· 193 193 did: &Did<'_>, 194 194 collection: Nsid<'_>, 195 195 ) { 196 + meta::insert_str(&db.stats.sketch_collections, collection.as_str()); 196 197 batch.insert(&db.index_ks, rbc(collection.clone(), did), b""); 197 198 batch.insert(&db.index_ks, cbr(did, collection), b""); 198 199 }
+320
src/storage/meta.rs
··· 1 + //! Persistent system statistics and cardinality sketches. 2 + //! 3 + //! Values are held in memory as atomics/mutexes, loaded from storage on startup, 4 + //! and persisted periodically (every 60 s) and on clean shutdown. Prometheus 5 + //! metrics reset on restart; these stats give a long-running overview across 6 + //! restarts without precision requirements. 7 + //! 8 + //! Keys use the `met` prefix in the default keyspace. 9 + 10 + use std::collections::hash_map::DefaultHasher; 11 + use std::sync::atomic::{AtomicI64, AtomicU64, Ordering}; 12 + use std::sync::{Arc, Mutex}; 13 + 14 + use cardinality_estimator_safe::{Element, Sketch}; 15 + use tracing::error; 16 + 17 + use super::{DbRef, PREFIX_META, error::StorageResult}; 18 + 19 + type Sk = Sketch<12, 6>; 20 + 21 + pub struct Stats { 22 + // ── Counters ───────────────────────────────────────────────────────────── 23 + /// Unix timestamp of the very first startup; set once, never updated. 24 + pub first_startup_secs: AtomicU64, 25 + pub startup_count: AtomicU64, 26 + /// New repos ever enqueued for resync. 27 + pub repos_queued_total: AtomicU64, 28 + pub collection_births_total: AtomicU64, 29 + pub collection_deaths_total: AtomicU64, 30 + /// Successful full resyncs completed. 31 + pub resyncs_completed_total: AtomicU64, 32 + /// Approximate queue depth; may drift slightly after a crash. 33 + pub resync_queue_depth: AtomicI64, 34 + /// Approximate count of buffered in-flight events. 35 + pub resync_buffer_count: AtomicI64, 36 + 37 + // ── Cardinality sketches (~3 KiB each at full HLL) ──────────────────────── 38 + /// Distinct NSID strings ever indexed. 39 + /// BOOOOoooooo mutexes (they should be ok, we only grab them briefly for writes and serializes) 40 + pub sketch_collections: Mutex<Sk>, 41 + /// All DIDs ever seen. 42 + pub sketch_accounts_all: Mutex<Sk>, 43 + /// DIDs with ≥1 successful resync. 44 + pub sketch_accounts_resynced: Mutex<Sk>, 45 + /// DIDs indexed in sync1.1 strict mode. 46 + pub sketch_accounts_commit_strict: Mutex<Sk>, 47 + /// DIDs indexed in sync1.1 lenient mode. 48 + pub sketch_accounts_commit_lenient: Mutex<Sk>, 49 + /// DIDs desynchronized by commit failure. 50 + pub sketch_accounts_desynced: Mutex<Sk>, 51 + /// Distinct PDS hostnames seen. 52 + pub sketch_pds_hosts: Mutex<Sk>, 53 + } 54 + 55 + impl Default for Stats { 56 + fn default() -> Self { 57 + Stats { 58 + first_startup_secs: AtomicU64::new(0), 59 + startup_count: AtomicU64::new(0), 60 + repos_queued_total: AtomicU64::new(0), 61 + collection_births_total: AtomicU64::new(0), 62 + collection_deaths_total: AtomicU64::new(0), 63 + resyncs_completed_total: AtomicU64::new(0), 64 + resync_queue_depth: AtomicI64::new(0), 65 + resync_buffer_count: AtomicI64::new(0), 66 + sketch_collections: Mutex::new(Sk::default()), 67 + sketch_accounts_all: Mutex::new(Sk::default()), 68 + sketch_accounts_resynced: Mutex::new(Sk::default()), 69 + sketch_accounts_commit_strict: Mutex::new(Sk::default()), 70 + sketch_accounts_commit_lenient: Mutex::new(Sk::default()), 71 + sketch_accounts_desynced: Mutex::new(Sk::default()), 72 + sketch_pds_hosts: Mutex::new(Sk::default()), 73 + } 74 + } 75 + } 76 + 77 + pub type StatsRef = Arc<Stats>; 78 + 79 + /// Insert a string into a sketch. 80 + pub fn insert_str(sketch: &Mutex<Sk>, s: &str) { 81 + sketch 82 + .lock() 83 + .unwrap() 84 + .insert(Element::from_hasher_default::<DefaultHasher>(s)); 85 + } 86 + 87 + // --------------------------------------------------------------------------- 88 + // Key constants 89 + // --------------------------------------------------------------------------- 90 + 91 + const K_FIRST_STARTUP: &[u8] = b"first_startup"; 92 + const K_STARTUP_COUNT: &[u8] = b"startup_count"; 93 + const K_REPOS_QUEUED: &[u8] = b"repos_queued_total"; 94 + const K_BIRTHS: &[u8] = b"births_total"; 95 + const K_DEATHS: &[u8] = b"deaths_total"; 96 + const K_RESYNCS_DONE: &[u8] = b"resyncs_completed_total"; 97 + const K_QUEUE_DEPTH: &[u8] = b"queue_depth"; 98 + const K_BUFFER_COUNT: &[u8] = b"buffer_count"; 99 + const K_SK_COLLECTIONS: &[u8] = b"sketch_collections"; 100 + const K_SK_ACCOUNTS_ALL: &[u8] = b"sketch_accounts_all"; 101 + const K_SK_ACCOUNTS_RSYNCD: &[u8] = b"sketch_accounts_resynced"; 102 + const K_SK_ACCOUNTS_S11: &[u8] = b"sketch_accounts_commit_strict"; 103 + const K_SK_ACCOUNTS_LNT: &[u8] = b"sketch_accounts_commit_lenient"; 104 + const K_SK_ACCOUNTS_DSY: &[u8] = b"sketch_accounts_desynced"; 105 + const K_SK_PDS_HOSTS: &[u8] = b"sketch_pds_hosts"; 106 + 107 + // --------------------------------------------------------------------------- 108 + // Storage helpers 109 + // --------------------------------------------------------------------------- 110 + 111 + fn full_key(suffix: &[u8]) -> Vec<u8> { 112 + let mut k = Vec::with_capacity(PREFIX_META.len() + suffix.len()); 113 + k.extend_from_slice(&PREFIX_META); 114 + k.extend_from_slice(suffix); 115 + k 116 + } 117 + 118 + fn read_u64(ks: &fjall::Keyspace, suffix: &[u8]) -> StorageResult<u64> { 119 + match ks.get(full_key(suffix))? { 120 + None => Ok(0), 121 + Some(v) if v.len() >= 8 => Ok(u64::from_be_bytes(v[..8].try_into().unwrap())), 122 + Some(_) => Ok(0), 123 + } 124 + } 125 + 126 + fn write_u64(ks: &fjall::Keyspace, suffix: &[u8], val: u64) -> StorageResult<()> { 127 + ks.insert(full_key(suffix), val.to_be_bytes())?; 128 + Ok(()) 129 + } 130 + 131 + fn read_i64(ks: &fjall::Keyspace, suffix: &[u8]) -> StorageResult<i64> { 132 + Ok(read_u64(ks, suffix)? as i64) 133 + } 134 + 135 + fn write_i64(ks: &fjall::Keyspace, suffix: &[u8], val: i64) -> StorageResult<()> { 136 + write_u64(ks, suffix, val as u64) 137 + } 138 + 139 + fn read_sketch(ks: &fjall::Keyspace, suffix: &[u8]) -> Sk { 140 + let Some(bytes) = ks.get(full_key(suffix)).ok().flatten() else { 141 + return Sk::default(); 142 + }; 143 + match postcard::from_bytes(bytes.as_ref()) { 144 + Ok(sk) => sk, 145 + Err(e) => { 146 + error!(key = ?suffix, error = %e, "failed to deserialize cardinality sketch; using fresh sketch"); 147 + Sk::default() 148 + } 149 + } 150 + } 151 + 152 + fn write_sketch(ks: &fjall::Keyspace, suffix: &[u8], sketch: &Mutex<Sk>) -> StorageResult<()> { 153 + let bytes = postcard::to_stdvec(&*sketch.lock().unwrap()).unwrap_or_default(); 154 + ks.insert(full_key(suffix), bytes)?; 155 + Ok(()) 156 + } 157 + 158 + // --------------------------------------------------------------------------- 159 + // Public API 160 + // --------------------------------------------------------------------------- 161 + 162 + /// Load stats from storage on startup. 163 + /// 164 + /// Missing keys default to zero / empty sketch. After loading: 165 + /// - If `first_startup_secs` is 0, writes the current time immediately. 166 + /// - Increments `startup_count` and writes it immediately. 167 + pub(super) fn load(ks: &fjall::Keyspace) -> StorageResult<StatsRef> { 168 + let stats = Stats { 169 + first_startup_secs: AtomicU64::new(read_u64(ks, K_FIRST_STARTUP)?), 170 + startup_count: AtomicU64::new(read_u64(ks, K_STARTUP_COUNT)?), 171 + repos_queued_total: AtomicU64::new(read_u64(ks, K_REPOS_QUEUED)?), 172 + collection_births_total: AtomicU64::new(read_u64(ks, K_BIRTHS)?), 173 + collection_deaths_total: AtomicU64::new(read_u64(ks, K_DEATHS)?), 174 + resyncs_completed_total: AtomicU64::new(read_u64(ks, K_RESYNCS_DONE)?), 175 + resync_queue_depth: AtomicI64::new(read_i64(ks, K_QUEUE_DEPTH)?), 176 + resync_buffer_count: AtomicI64::new(read_i64(ks, K_BUFFER_COUNT)?), 177 + sketch_collections: Mutex::new(read_sketch(ks, K_SK_COLLECTIONS)), 178 + sketch_accounts_all: Mutex::new(read_sketch(ks, K_SK_ACCOUNTS_ALL)), 179 + sketch_accounts_resynced: Mutex::new(read_sketch(ks, K_SK_ACCOUNTS_RSYNCD)), 180 + sketch_accounts_commit_strict: Mutex::new(read_sketch(ks, K_SK_ACCOUNTS_S11)), 181 + sketch_accounts_commit_lenient: Mutex::new(read_sketch(ks, K_SK_ACCOUNTS_LNT)), 182 + sketch_accounts_desynced: Mutex::new(read_sketch(ks, K_SK_ACCOUNTS_DSY)), 183 + sketch_pds_hosts: Mutex::new(read_sketch(ks, K_SK_PDS_HOSTS)), 184 + }; 185 + 186 + // Set first_startup_secs if this is the very first startup. 187 + if stats.first_startup_secs.load(Ordering::Relaxed) == 0 { 188 + let now = crate::util::unix_now(); 189 + stats.first_startup_secs.store(now, Ordering::Relaxed); 190 + write_u64(ks, K_FIRST_STARTUP, now)?; 191 + } 192 + 193 + // Bump startup_count. 194 + let new_count = stats.startup_count.fetch_add(1, Ordering::Relaxed) + 1; 195 + write_u64(ks, K_STARTUP_COUNT, new_count)?; 196 + 197 + Ok(Arc::new(stats)) 198 + } 199 + 200 + /// Persist all stats to storage. 201 + pub fn save(db: &DbRef) -> StorageResult<()> { 202 + let s = &db.stats; 203 + let ks = &db.ks; 204 + 205 + write_u64( 206 + ks, 207 + K_FIRST_STARTUP, 208 + s.first_startup_secs.load(Ordering::Relaxed), 209 + )?; 210 + write_u64(ks, K_STARTUP_COUNT, s.startup_count.load(Ordering::Relaxed))?; 211 + write_u64( 212 + ks, 213 + K_REPOS_QUEUED, 214 + s.repos_queued_total.load(Ordering::Relaxed), 215 + )?; 216 + write_u64( 217 + ks, 218 + K_BIRTHS, 219 + s.collection_births_total.load(Ordering::Relaxed), 220 + )?; 221 + write_u64( 222 + ks, 223 + K_DEATHS, 224 + s.collection_deaths_total.load(Ordering::Relaxed), 225 + )?; 226 + write_u64( 227 + ks, 228 + K_RESYNCS_DONE, 229 + s.resyncs_completed_total.load(Ordering::Relaxed), 230 + )?; 231 + write_i64( 232 + ks, 233 + K_QUEUE_DEPTH, 234 + s.resync_queue_depth.load(Ordering::Relaxed), 235 + )?; 236 + write_i64( 237 + ks, 238 + K_BUFFER_COUNT, 239 + s.resync_buffer_count.load(Ordering::Relaxed), 240 + )?; 241 + 242 + write_sketch(ks, K_SK_COLLECTIONS, &s.sketch_collections)?; 243 + write_sketch(ks, K_SK_ACCOUNTS_ALL, &s.sketch_accounts_all)?; 244 + write_sketch(ks, K_SK_ACCOUNTS_RSYNCD, &s.sketch_accounts_resynced)?; 245 + write_sketch(ks, K_SK_ACCOUNTS_S11, &s.sketch_accounts_commit_strict)?; 246 + write_sketch(ks, K_SK_ACCOUNTS_LNT, &s.sketch_accounts_commit_lenient)?; 247 + write_sketch(ks, K_SK_ACCOUNTS_DSY, &s.sketch_accounts_desynced)?; 248 + write_sketch(ks, K_SK_PDS_HOSTS, &s.sketch_pds_hosts)?; 249 + 250 + Ok(()) 251 + } 252 + 253 + // --------------------------------------------------------------------------- 254 + // Tests 255 + // --------------------------------------------------------------------------- 256 + 257 + #[cfg(test)] 258 + mod tests { 259 + use super::*; 260 + use crate::storage::open_temporary; 261 + 262 + #[test] 263 + fn first_startup_set_on_open() { 264 + // open_temporary already calls meta::load internally. 265 + let db = open_temporary().unwrap(); 266 + assert!(db.stats.first_startup_secs.load(Ordering::Relaxed) > 0); 267 + assert_eq!(db.stats.startup_count.load(Ordering::Relaxed), 1); 268 + } 269 + 270 + #[test] 271 + fn startup_count_increments_on_reload() { 272 + let db = open_temporary().unwrap(); 273 + let first_ts = db.stats.first_startup_secs.load(Ordering::Relaxed); 274 + assert_eq!(db.stats.startup_count.load(Ordering::Relaxed), 1); 275 + 276 + // Simulate a second startup: save current stats, then call load again. 277 + save(&db).unwrap(); 278 + let s2 = load(&db.ks).unwrap(); 279 + assert_eq!(s2.startup_count.load(Ordering::Relaxed), 2); 280 + // first_startup_secs must not change. 281 + assert_eq!(s2.first_startup_secs.load(Ordering::Relaxed), first_ts); 282 + } 283 + 284 + #[test] 285 + fn save_and_load_counters_roundtrip() { 286 + let db = open_temporary().unwrap(); 287 + 288 + db.stats.repos_queued_total.store(42, Ordering::Relaxed); 289 + db.stats.collection_births_total.store(7, Ordering::Relaxed); 290 + db.stats.collection_deaths_total.store(3, Ordering::Relaxed); 291 + db.stats 292 + .resyncs_completed_total 293 + .store(100, Ordering::Relaxed); 294 + db.stats.resync_queue_depth.store(-5, Ordering::Relaxed); 295 + 296 + save(&db).unwrap(); 297 + 298 + let s2 = load(&db.ks).unwrap(); 299 + assert_eq!(s2.repos_queued_total.load(Ordering::Relaxed), 42); 300 + assert_eq!(s2.collection_births_total.load(Ordering::Relaxed), 7); 301 + assert_eq!(s2.collection_deaths_total.load(Ordering::Relaxed), 3); 302 + assert_eq!(s2.resyncs_completed_total.load(Ordering::Relaxed), 100); 303 + assert_eq!(s2.resync_queue_depth.load(Ordering::Relaxed), -5); 304 + } 305 + 306 + #[test] 307 + fn sketch_roundtrips_through_save_load() { 308 + let db = open_temporary().unwrap(); 309 + 310 + insert_str(&db.stats.sketch_collections, "app.bsky.feed.post"); 311 + insert_str(&db.stats.sketch_collections, "app.bsky.actor.profile"); 312 + let est_before = db.stats.sketch_collections.lock().unwrap().estimate(); 313 + 314 + save(&db).unwrap(); 315 + 316 + let s2 = load(&db.ks).unwrap(); 317 + let est_after = s2.sketch_collections.lock().unwrap().estimate(); 318 + assert_eq!(est_before, est_after); 319 + } 320 + }
+8
src/storage/mod.rs
··· 3 3 pub mod error; 4 4 pub mod firehose_cursor; 5 5 pub mod list_hosts_cursor; 6 + pub mod meta; 6 7 pub mod pds_host; 7 8 pub mod repo; 8 9 pub mod resync_buffer; 9 10 pub mod resync_queue; 10 11 11 12 pub(crate) use error::{StorageError, StorageResult}; 13 + pub(crate) use meta::StatsRef; 12 14 pub(crate) use repo::Account; 13 15 14 16 // --------------------------------------------------------------------------- ··· 42 44 pub(super) const PREFIX_PDS_HOST: KeyPrefix = *b"pdh"; 43 45 /// listHosts walk cursor (per upstream relay host). See [`list_hosts_cursor`]. 44 46 pub(super) const PREFIX_LIST_HOSTS: KeyPrefix = *b"lhs"; 47 + /// Persistent system stats and cardinality sketches. See [`meta`]. 48 + pub(super) const PREFIX_META: KeyPrefix = *b"met"; 45 49 46 50 use std::path::Path; 47 51 use std::sync::Arc; ··· 60 64 /// across sequential reads) and Lz4 compression at all levels (higher 61 65 /// on-disk density means more data fits in the block cache). 62 66 pub(crate) index_ks: fjall::Keyspace, 67 + /// Persistent system stats and cardinality sketches, loaded on open. 68 + pub(crate) stats: StatsRef, 63 69 } 64 70 65 71 /// Cheaply-cloneable reference to the shared database. ··· 103 109 fjall::CompressionType::Lz4, 104 110 )) 105 111 })?; 112 + let stats = meta::load(&ks)?; 106 113 Ok(Arc::new(Db { 107 114 database, 108 115 ks, 109 116 index_ks, 117 + stats, 110 118 })) 111 119 }
+2
src/storage/pds_host.rs
··· 7 7 use crate::storage::{ 8 8 DbRef, PREFIX_PDS_HOST, 9 9 error::{StorageError, StorageResult}, 10 + meta, 10 11 }; 11 12 12 13 // --------------------------------------------------------------------------- ··· 178 179 179 180 /// Add a [`PdsHostInfo`] write to an existing batch. 180 181 pub fn put_into(batch: &mut fjall::OwnedWriteBatch, db: &DbRef, host: &Host, info: &PdsHostInfo) { 182 + meta::insert_str(&db.stats.sketch_pds_hosts, &host.to_string()); 181 183 batch.insert(&db.ks, key(host), encode(info)); 182 184 } 183 185
+4
src/storage/resync_buffer.rs
··· 9 9 //! removals could use a weak/single delete to avoid full tombstones. Fjall 10 10 //! 3.0.3 does not yet expose that API; switch to it when available. 11 11 12 + use std::sync::atomic::Ordering; 13 + 12 14 use jacquard_common::types::string::Did; 13 15 use tracing::debug; 14 16 ··· 56 58 ); 57 59 let key = key(did, seq); 58 60 db.ks.insert(key, cbor)?; 61 + db.stats.resync_buffer_count.fetch_add(1, Ordering::Relaxed); 59 62 Ok(()) 60 63 } 61 64 ··· 93 96 debug!(did = did.as_str(), seq, "ack buffered event"); 94 97 let key = key(did, seq); 95 98 db.ks.remove(key)?; 99 + db.stats.resync_buffer_count.fetch_sub(1, Ordering::Relaxed); 96 100 Ok(()) 97 101 } 98 102
+3
src/storage/resync_queue.rs
··· 4 4 //! Values: `[u16 BE retry_count][u16 BE reason_len][reason_bytes][commit_cbor_bytes]` 5 5 6 6 use std::collections::HashSet; 7 + use std::sync::atomic::Ordering; 7 8 8 9 use fjall::util::prefixed_range; 9 10 use jacquard_common::types::string::Did; ··· 164 165 ); 165 166 } 166 167 batch.insert(&db.ks, key(ts, &item.did), encode(item)); 168 + db.stats.resync_queue_depth.fetch_add(1, Ordering::Relaxed); 167 169 } 168 170 169 171 /// Count the total number of entries currently in the resync queue. ··· 297 299 batch.insert(&db.ks, &repo_key, repo::encode_repo_info(&new_info)); 298 300 batch.commit()?; 299 301 302 + db.stats.resync_queue_depth.fetch_sub(1, Ordering::Relaxed); 300 303 trace!( 301 304 did = item.did.as_str(), 302 305 reason = %item.retry_reason,
+2
src/sync/backfill.rs
··· 30 30 }, 31 31 util::{TokenExt, unix_now}, 32 32 }; 33 + use std::sync::atomic::Ordering; 33 34 34 35 const PAGE_LIMIT: i64 = 500; 35 36 /// Delay between retry attempts after a transient page failure. ··· 280 281 commit_cbor: vec![], 281 282 }; 282 283 storage::resync_queue::enqueue(db, now, &item)?; 284 + db.stats.repos_queued_total.fetch_add(1, Ordering::Relaxed); 283 285 count += 1; 284 286 } 285 287 }
+21 -2
src/sync/firehose/commit_event.rs
··· 24 24 use super::validate::{self, CarDrop}; 25 25 use crate::identity::Resolver; 26 26 use crate::mst::{self, mortality::ExtractResult}; 27 + use std::sync::atomic::Ordering; 28 + 27 29 use crate::storage::{ 28 - self, DbRef, 30 + self, DbRef, meta, 29 31 pds_host::{self, Sync11Mode}, 30 32 repo::{AccountStatus, RepoInfo, RepoPrev, RepoState}, 31 33 }; ··· 308 310 batch 309 311 .commit() 310 312 .map_err(Into::<crate::storage::StorageError>::into)?; 313 + db.stats.repos_queued_total.fetch_add(1, Ordering::Relaxed); 311 314 metrics::counter!("lightrail_commit_dropped_total", "reason" => "unknown_repo") 312 315 .increment(1); 313 316 return Ok(Step2Result::Drop); ··· 529 532 .map_err(Into::<crate::storage::StorageError>::into)?; 530 533 531 534 if n_born > 0 { 535 + db.stats 536 + .collection_births_total 537 + .fetch_add(n_born, Ordering::Relaxed); 532 538 metrics::counter!("lightrail_collection_births_total", "source" => "firehose") 533 539 .increment(n_born); 534 540 } 535 541 if n_died > 0 { 542 + db.stats 543 + .collection_deaths_total 544 + .fetch_add(n_died, Ordering::Relaxed); 536 545 metrics::counter!("lightrail_collection_deaths_total", "source" => "firehose") 537 546 .increment(n_died); 538 547 } 548 + meta::insert_str(&db.stats.sketch_accounts_all, did.as_str()); 549 + match current_mode { 550 + Sync11Mode::Strict => { 551 + meta::insert_str(&db.stats.sketch_accounts_commit_strict, did.as_str()) 552 + } 553 + Sync11Mode::Lenient => { 554 + meta::insert_str(&db.stats.sketch_accounts_commit_lenient, did.as_str()) 555 + } 556 + } 539 557 metrics::counter!("lightrail_commits_indexed_total", "mode" => current_mode.as_str()) 540 558 .increment(1); 541 559 ··· 565 583 db, 566 584 crate::util::unix_now(), 567 585 &crate::storage::resync_queue::ResyncItem { 568 - did, 586 + did: did.clone(), 569 587 retry_count: 0, 570 588 retry_reason: reason.to_string(), 571 589 commit_cbor: vec![], ··· 574 592 batch 575 593 .commit() 576 594 .map_err(Into::<crate::storage::StorageError>::into)?; 595 + meta::insert_str(&db.stats.sketch_accounts_desynced, did.as_str()); 577 596 Ok(()) 578 597 }
+8 -1
src/sync/resync/dispatcher.rs
··· 20 20 use tokio::task::{Id as TaskId, JoinSet}; 21 21 use tracing::{debug, error, info, trace, warn}; 22 22 23 + use std::sync::atomic::Ordering; 24 + 23 25 use crate::error::Result; 24 26 use crate::storage::{ 25 - self, DbRef, 27 + self, DbRef, meta, 26 28 repo::{AccountStatus, RepoInfo, RepoState}, 27 29 resync_queue::ResyncItem, 28 30 }; ··· 335 337 WorkerOutcome::Success => { 336 338 metrics::counter!("lightrail_resync_completed_total", "outcome" => "success") 337 339 .increment(1); 340 + db.stats 341 + .resyncs_completed_total 342 + .fetch_add(1, Ordering::Relaxed); 343 + meta::insert_str(&db.stats.sketch_accounts_resynced, did.as_str()); 344 + meta::insert_str(&db.stats.sketch_accounts_all, did.as_str()); 338 345 // Transition to Active first so buffered commits see the correct 339 346 // state when they're replayed through the normal pipeline. 340 347 transition_state(db.clone(), did.clone(), RepoState::Active, None).await?;
+8 -6
src/util.rs
··· 1 1 use std::time::{SystemTime, UNIX_EPOCH}; 2 + use tokio::time::{Duration, sleep}; 3 + use tokio_util::sync::CancellationToken; 2 4 3 5 pub fn unix_now() -> u64 { 4 6 SystemTime::now() ··· 9 11 10 12 pub trait TokenExt { 11 13 /// alias for CancellationToken::run_until_cancelled 14 + /// 15 + /// returns None if the token cancels before the future completes 12 16 fn run<F: Future>(&self, fut: F) -> impl Future<Output = Option<F::Output>>; 13 17 /// sleep that ends early if if cancelled 14 18 /// 15 19 /// returns `true` when completed after the full sleep completed 16 - fn sleep(&self, d: tokio::time::Duration) -> impl Future<Output = bool>; 20 + fn sleep(&self, d: Duration) -> impl Future<Output = bool>; 17 21 } 18 22 19 - impl TokenExt for tokio_util::sync::CancellationToken { 23 + impl TokenExt for CancellationToken { 20 24 async fn run<F: Future>(&self, f: F) -> Option<F::Output> { 21 25 self.run_until_cancelled(f).await 22 26 } 23 - async fn sleep(&self, d: tokio::time::Duration) -> bool { 24 - self.run_until_cancelled(tokio::time::sleep(d)) 25 - .await 26 - .is_some() 27 + async fn sleep(&self, d: Duration) -> bool { 28 + self.run_until_cancelled(sleep(d)).await.is_some() 27 29 } 28 30 }