try some db stuff · microcosm.blue/lightrail@f9db2b1

+16 -1

src/main.rs

··· 106 106 #[arg(long, env = "LIGHTRAIL_FJALL_CACHE_MB", default_value_t = 256)] 107 107 fjall_cache_mb: u64, 108 108 109 + /// Number of fjall background worker threads (flush + compaction). 110 + /// Defaults to fjall's own heuristic (min(CPU cores, 4)). 111 + #[arg(long, env = "LIGHTRAIL_FJALL_WORKER_THREADS")] 112 + fjall_worker_threads: Option<usize>, 113 + 109 114 /// Max concurrent per-PDS listRepos workers during deep crawl. 110 115 #[arg( 111 116 long, ··· 153 158 install_metrics(addr)?; 154 159 } 155 160 156 - let db = storage::open(&args.db_path, args.fjall_cache_mb)?; 161 + let db = storage::open( 162 + &args.db_path, 163 + args.fjall_cache_mb, 164 + args.fjall_worker_threads, 165 + )?; 157 166 let client = lightrail::http::build_client(args.crawl_qps); 158 167 159 168 let dispatcher_state: resync::dispatcher::DispatcherState = std::sync::Arc::new( ··· 274 283 let mut interval = tokio::time::interval(Duration::from_secs(60)); 275 284 interval.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Delay); 276 285 while token.run(interval.tick()).await.is_some() { 286 + // Flush the journal BufWriter to the OS. We use 287 + // manual_journal_persist so individual writes skip this; 288 + // doing it here batches many writes into one flush. 289 + if let Err(e) = db.persist_journal() { 290 + warn!(error = %e, "failed to persist journal buffer"); 291 + } 277 292 if let Err(e) = storage::meta::save(&db) { 278 293 warn!(error = %e, "failed to periodically save meta stats"); 279 294 }

+38 -5

src/storage/mod.rs

··· 87 87 } 88 88 89 89 impl Db { 90 + /// Flush the journal write buffer to the OS page cache. 91 + /// 92 + /// With `manual_journal_persist` enabled, individual writes skip the flush; 93 + /// call this periodically to batch many writes into a single flush. 94 + /// Does NOT fsync — crash recovery may lose up to one flush interval of 95 + /// writes, which is acceptable since all data can be re-fetched. 96 + pub fn persist_journal(&self) -> StorageResult<()> { 97 + self.database.persist(fjall::PersistMode::Buffer)?; 98 + Ok(()) 99 + } 100 + 90 101 /// Collect a snapshot of fjall storage stats. 91 102 pub fn storage_stats(&self) -> StorageStats { 92 103 StorageStats { ··· 105 116 pub type DbRef = Arc<Db>; 106 117 107 118 /// Open (or create) the fjall database at `path` and return a shared handle. 108 - pub fn open(path: &Path, cache_mb: u64) -> StorageResult<DbRef> { 109 - open_inner(path, DbConfig::ForReal { cache_mb }) 119 + /// 120 + /// `worker_threads`: number of fjall background threads for flush + compaction. 121 + /// `None` uses fjall's own default (`min(cores, 4)`). 122 + pub fn open(path: &Path, cache_mb: u64, worker_threads: Option<usize>) -> StorageResult<DbRef> { 123 + open_inner( 124 + path, 125 + DbConfig::ForReal { 126 + cache_mb, 127 + worker_threads, 128 + }, 129 + ) 110 130 } 111 131 112 132 enum DbConfig { ··· 114 134 #[allow(dead_code)] 115 135 Testing, 116 136 /// bumpable cache for prod 117 - ForReal { cache_mb: u64 }, 137 + ForReal { 138 + cache_mb: u64, 139 + worker_threads: Option<usize>, 140 + }, 118 141 } 119 142 120 143 /// Open a temporary database that deletes itself on drop. For tests only. ··· 128 151 } 129 152 130 153 fn open_inner(path: &Path, config: DbConfig) -> StorageResult<DbRef> { 131 - let builder = fjall::Database::builder(path); 154 + let builder = fjall::Database::builder(path).manual_journal_persist(true); 132 155 let builder = match config { 133 156 DbConfig::Testing => builder.temporary(true), 134 - DbConfig::ForReal { cache_mb } => builder.cache_size(cache_mb * 2_u64.pow(20)), 157 + DbConfig::ForReal { 158 + cache_mb, 159 + worker_threads, 160 + } => { 161 + let b = builder.cache_size(cache_mb * 2_u64.pow(20)); 162 + if let Some(n) = worker_threads { 163 + b.worker_threads(n) 164 + } else { 165 + b 166 + } 167 + } 135 168 }; 136 169 let database = builder.open()?; 137 170 let ks = database.keyspace("default", fjall::KeyspaceCreateOptions::default)?;

+5 -4

src/storage/resync_buffer.rs

··· 6 6 //! is newer than the fetched rev, and acks (deletes) it as it goes. 7 7 //! 8 8 //! Keys are written exactly once — firehose sequence numbers are unique — so 9 - //! removals could use a weak/single delete to avoid full tombstones. Fjall 10 - //! 3.0.3 does not yet expose that API; switch to it when available. 9 + //! removals use fjall's weak tombstone (`remove_weak`) which is annihilated 10 + //! on first compaction contact with the corresponding insert, avoiding the 11 + //! write amplification of full tombstones propagating through all LSM levels. 11 12 12 13 use std::sync::atomic::Ordering; 13 14 ··· 96 97 let mut count: u64 = 0; 97 98 for guard in db.ks.prefix(&prefix) { 98 99 let (key_slice, _) = guard.into_inner()?; 99 - db.ks.remove(key_slice.as_ref())?; 100 + db.ks.remove_weak(key_slice.as_ref())?; 100 101 db.stats.resync_buffer_count.fetch_sub(1, Ordering::Relaxed); 101 102 metrics::gauge!("lightrail_resync_buffer_depth").decrement(1); 102 103 count += 1; ··· 113 114 pub fn ack_buffer_entry(db: &DbRef, did: Did<'_>, seq: u64) -> StorageResult<()> { 114 115 debug!(did = did.as_str(), seq, "ack buffered event"); 115 116 let key = key(did, seq); 116 - db.ks.remove(key)?; 117 + db.ks.remove_weak(key)?; 117 118 db.stats.resync_buffer_count.fetch_sub(1, Ordering::Relaxed); 118 119 metrics::gauge!("lightrail_resync_buffer_depth").decrement(1); 119 120 Ok(())

+2 -2

src/storage/resync_queue.rs

··· 235 235 retry = item.retry_count, 236 236 "dequeue resync" 237 237 ); 238 - db.ks.remove(key_bytes)?; 238 + db.ks.remove_weak(key_bytes)?; 239 239 let next_since = key_bytes 240 240 .get(prefix.len()..) 241 241 .expect("a resync queue key must start with the resync queue prefix"); ··· 304 304 305 305 // Atomically: remove from queue + write state=Resyncing. 306 306 let mut batch = db.database.batch(); 307 - batch.remove(&db.ks, key_bytes); 307 + batch.remove_weak(&db.ks, key_bytes); 308 308 batch.insert(&db.ks, &repo_key, repo::encode_repo_info(&new_info)); 309 309 batch.commit()?; 310 310

Configure Feed

Configure Feed