···11use std::net::SocketAddr;
22use std::path::PathBuf;
33+use std::time::Duration;
3445use clap::Parser;
56use tokio::task::JoinSet;
···4344 )]
4445 slingshot_url: Option<jacquard_common::url::Url>,
45464646- /// Max identities kept in in-process identity cache
4747- #[arg(long, env = "LIGHTRAIL_IDENT_CACHE_SIZE", default_value_t = 1_000_000)]
4747+ /// Max identities kept in in-process identity cache.
4848+ #[arg(long, env = "LIGHTRAIL_IDENT_CACHE_SIZE", default_value_t = 2_000_000)]
4849 ident_cache_size: u64,
49505151+ /// Maximum concurrent firehose commit worker tasks.
5252+ #[arg(long, env = "LIGHTRAIL_MAX_FIREHOSE_WORKERS", default_value_t = 10)]
5353+ max_firehose_workers: usize,
5454+5555+ /// Maximum concurrent resync worker tasks.
5656+ #[arg(long, env = "LIGHTRAIL_MAX_RESYNC_WORKERS", default_value_t = 16)]
5757+ max_resync_workers: usize,
5858+5959+ /// How often to flush the firehose cursor watermark to storage, in seconds.
6060+ #[arg(long, env = "LIGHTRAIL_CURSOR_SAVE_INTERVAL", default_value_t = 1)]
6161+ cursor_save_interval_secs: u64,
6262+6363+ /// HTTP timeout for describeRepo + getLatestCommit during resync, in seconds.
6464+ #[arg(
6565+ long,
6666+ env = "LIGHTRAIL_DESCRIBE_REPO_FETCH_TIMEOUT",
6767+ default_value_t = 30
6868+ )]
6969+ describe_repo_fetch_timeout_secs: u64,
7070+7171+ /// HTTP timeout for getRepo (full CAR download) during resync, in seconds.
7272+ #[arg(long, env = "LIGHTRAIL_GET_REPO_FETCH_TIMEOUT", default_value_t = 300)]
7373+ get_repo_fetch_timeout_secs: u64,
7474+5075 /// TCP address for the Prometheus metrics HTTP endpoint.
5176 /// If not set, metrics are not exported.
5277 #[arg(long, env = "LIGHTRAIL_METRICS_BIND", num_args = 0..=1, default_missing_value = "0.0.0.0:6789")]
5378 metrics_bind: Option<SocketAddr>,
7979+8080+ /// Admin password for privileged API endpoints.
8181+ #[arg(long, env = "LIGHTRAIL_ADMIN_PASSWORD")]
8282+ admin_password: Option<String>,
54835584 /// Log an error when a commit claims a collection birth but the index
5685 /// already has that collection for the DID (temporary diagnostic flag).
···103132 let resolver = resolver.clone();
104133 let validate_births = args.validate_births;
105134 async move {
106106- let mut sub = firehose::Subscriber::new(host, db, resolver, validate_births);
135135+ let mut sub = firehose::Subscriber::new(
136136+ host,
137137+ db,
138138+ resolver,
139139+ validate_births,
140140+ args.max_firehose_workers,
141141+ Duration::from_secs(args.cursor_save_interval_secs),
142142+ );
107143 sub.run(token).await
108144 }
109145 });
···124160 let token = token.clone();
125161 let db = db.clone();
126162 let resolver = resolver.clone();
127127- async move { resync::dispatcher::run(resolver, db, 20, token).await }
163163+ async move {
164164+ resync::dispatcher::run(
165165+ resolver,
166166+ db,
167167+ args.max_resync_workers,
168168+ Duration::from_secs(args.describe_repo_fetch_timeout_secs),
169169+ Duration::from_secs(args.get_repo_fetch_timeout_secs),
170170+ token,
171171+ )
172172+ .await
173173+ }
128174 });
129175130176 tasks.spawn({
+10
src/sync/firehose/commit_event.rs
···496496 // All checks passed — atomically update the chain tip and the collection
497497 // index (born → insert, died → remove). Also record each born collection
498498 // in the global collection list (blind overwrite, never deleted).
499499+ let n_born = born.len() as u64;
500500+ let n_died = died.len() as u64;
499501 let mut batch = db.database.batch();
500502 storage::repo::put_prev_into(
501503 &mut batch,
···516518 batch
517519 .commit()
518520 .map_err(Into::<crate::storage::StorageError>::into)?;
521521+522522+ if n_born > 0 {
523523+ metrics::counter!("lightrail_collection_births_total").increment(n_born);
524524+ }
525525+ if n_died > 0 {
526526+ metrics::counter!("lightrail_collection_deaths_total").increment(n_died);
527527+ }
528528+ metrics::counter!("lightrail_commits_indexed_total").increment(1);
519529520530 Ok(())
521531}
+8-8
src/sync/firehose/mod.rs
···4343/// Maximum reconnect delay.
4444const MAX_BACKOFF_SECS: u64 = 64;
45454646-/// How often to flush the watermark cursor to storage.
4747-const CURSOR_FLUSH_INTERVAL: Duration = Duration::from_secs(1);
4848-4949-/// Maximum commit worker tasks running concurrently.
5050-const MAX_COMMIT_WORKERS: usize = 16;
5151-5246/// Manages a single logical connection to a relay firehose, with reconnection.
5347pub struct Subscriber {
5448 host: Host,
5549 db: DbRef,
5650 resolver: Arc<crate::identity::Resolver>,
5751 validate_births: bool,
5252+ max_workers: usize,
5353+ cursor_save_interval: Duration,
5854}
59556056impl Subscriber {
···6359 db: DbRef,
6460 resolver: Arc<crate::identity::Resolver>,
6561 validate_births: bool,
6262+ max_workers: usize,
6363+ cursor_save_interval: Duration,
6664 ) -> Self {
6765 Self {
6866 host,
6967 db,
7068 resolver,
7169 validate_births,
7070+ max_workers,
7171+ cursor_save_interval,
7272 }
7373 }
7474···8989 let mut dispatcher = CommitDispatcher::new(
9090 self.resolver.clone(),
9191 self.db.clone(),
9292- MAX_COMMIT_WORKERS,
9292+ self.max_workers,
9393 self.validate_births,
9494 );
9595 // When Some, use this cursor on the next reconnect instead of loading
···234234 Event::Worker(None) => {} // JoinSet drained (shouldn't happen with guard)
235235 }
236236237237- if cursor_tick.elapsed() >= CURSOR_FLUSH_INTERVAL {
237237+ if cursor_tick.elapsed() >= self.cursor_save_interval {
238238 dispatcher.evict_stalled();
239239 let wm = dispatcher.watermark(last_seq);
240240 let db = self.db.clone();