···100100101101#[derive(Debug, Clone, Copy)]
102102pub enum SignatureVerification {
103103+ /// verify all commits, from the firehose and when backfilling a repo from a PDS.
103104 Full,
105105+ /// only verify commits when backfilling a repo from a PDS.
104106 BackfillOnly,
107107+ /// don't verify anything.
105108 None,
106109}
107110···129132130133#[derive(Debug, Clone)]
131134pub struct Config {
135135+ /// path to the database folder. set via `HYDRANT_DATABASE_PATH`.
132136 pub database_path: PathBuf,
133133- pub relays: Vec<Url>,
134134- pub plc_urls: Vec<Url>,
137137+ /// if `true`, discovers and indexes all repositories in the network.
138138+ /// set via `HYDRANT_FULL_NETWORK`.
135139 pub full_network: bool,
140140+ /// if `true`, no records are stored; events are deleted after `ephemeral_ttl`.
141141+ /// set via `HYDRANT_EPHEMERAL`.
136142 pub ephemeral: bool,
143143+ /// how long events are retained in ephemeral mode before deletion.
144144+ /// set via `HYDRANT_EPHEMERAL_TTL` (humantime duration, e.g. `60min`).
137145 pub ephemeral_ttl: Duration,
146146+147147+ /// relay URLs used for firehose ingestion. set via `HYDRANT_RELAY_HOST` (single)
148148+ /// or `HYDRANT_RELAY_HOSTS` (comma-separated; takes precedence).
149149+ pub relays: Vec<Url>,
150150+ /// base URL(s) of the PLC directory (comma-separated for multiple).
151151+ /// defaults to `https://plc.wtf`, or `https://plc.directory` in full-network mode.
152152+ /// set via `HYDRANT_PLC_URL`.
153153+ pub plc_urls: Vec<Url>,
154154+ /// whether to ingest events from relay firehose subscriptions.
155155+ /// set via `HYDRANT_ENABLE_FIREHOSE`.
156156+ pub enable_firehose: bool,
157157+ /// number of concurrent workers processing firehose events.
158158+ /// set via `HYDRANT_FIREHOSE_WORKERS`.
159159+ pub firehose_workers: usize,
160160+ /// how often the firehose cursor is persisted to disk.
161161+ /// set via `HYDRANT_CURSOR_SAVE_INTERVAL` (humantime duration, e.g. `3sec`).
138162 pub cursor_save_interval: Duration,
163163+ /// timeout for fetching a full repository CAR during backfill.
164164+ /// set via `HYDRANT_REPO_FETCH_TIMEOUT` (humantime duration, e.g. `5min`).
139165 pub repo_fetch_timeout: Duration,
140140- pub cache_size: u64,
166166+ /// maximum number of concurrent backfill tasks.
167167+ /// set via `HYDRANT_BACKFILL_CONCURRENCY_LIMIT`.
141168 pub backfill_concurrency_limit: usize,
142142- pub data_compression: Compression,
143143- pub journal_compression: Compression,
144144- pub verify_signatures: SignatureVerification,
145145- pub identity_cache_size: u64,
146146- pub enable_firehose: bool,
169169+170170+ /// whether to run the network crawler. `None` defers to the default for the current mode.
171171+ /// set via `HYDRANT_ENABLE_CRAWLER`.
147172 pub enable_crawler: Option<bool>,
148148- pub firehose_workers: usize,
149149- pub db_worker_threads: usize,
150150- pub db_max_journaling_size_mb: u64,
151151- pub db_blocks_memtable_size_mb: u64,
152152- pub db_repos_memtable_size_mb: u64,
153153- pub db_events_memtable_size_mb: u64,
154154- pub db_records_memtable_size_mb: u64,
173173+ /// maximum number of repos allowed in the backfill pending queue before the crawler pauses.
174174+ /// set via `HYDRANT_CRAWLER_MAX_PENDING_REPOS`.
155175 pub crawler_max_pending_repos: usize,
176176+ /// pending queue size at which the crawler resumes after being paused.
177177+ /// set via `HYDRANT_CRAWLER_RESUME_PENDING_REPOS`.
156178 pub crawler_resume_pending_repos: usize,
157157- pub filter_signals: Option<Vec<String>>,
158158- pub filter_collections: Option<Vec<String>>,
159159- pub filter_excludes: Option<Vec<String>>,
160160- /// enable backlinks indexing (only meaningful in non-ephemeral mode).
161161- /// set via `HYDRANT_ENABLE_BACKLINKS=true`.
162162- pub enable_backlinks: bool,
163179 /// crawler sources: each entry pairs a URL with a discovery mode.
164180 ///
165181 /// set via `HYDRANT_CRAWLER_URLS` as a comma-separated list of `[mode::]url` entries,
···168184 /// `by_collection` otherwise). defaults to the relay hosts with the default mode.
169185 /// set to an empty string to disable crawling entirely.
170186 pub crawler_sources: Vec<CrawlerSource>,
187187+188188+ /// signature verification level for incoming commits.
189189+ /// set via `HYDRANT_VERIFY_SIGNATURES` (`full`, `backfill-only`, or `none`).
190190+ pub verify_signatures: SignatureVerification,
191191+ /// number of resolved identities to keep in the in-memory LRU cache.
192192+ /// set via `HYDRANT_IDENTITY_CACHE_SIZE`.
193193+ pub identity_cache_size: u64,
194194+195195+ /// NSID patterns that trigger auto-discovery in filter mode (e.g. `app.bsky.feed.post`).
196196+ /// set via `HYDRANT_FILTER_SIGNALS` as a comma-separated list.
197197+ pub filter_signals: Option<Vec<String>>,
198198+ /// NSID patterns used to filter which record collections are stored.
199199+ /// if `None`, all collections are stored. set via `HYDRANT_FILTER_COLLECTIONS`.
200200+ pub filter_collections: Option<Vec<String>>,
201201+ /// DIDs that are always skipped, regardless of mode.
202202+ /// set via `HYDRANT_FILTER_EXCLUDES` as a comma-separated list.
203203+ pub filter_excludes: Option<Vec<String>>,
204204+205205+ /// enable backlinks indexing (only meaningful in non-ephemeral mode).
206206+ /// set via `HYDRANT_ENABLE_BACKLINKS=true`.
207207+ pub enable_backlinks: bool,
208208+209209+ /// db internals, tune only if you know what you're doing.
210210+ ///
211211+ /// size of the fjall block cache in MB. set via `HYDRANT_CACHE_SIZE`.
212212+ pub cache_size: u64,
213213+ /// db internals, tune only if you know what you're doing.
214214+ ///
215215+ /// compression algorithm for data keyspaces (blocks, records, repos, events).
216216+ /// set via `HYDRANT_DATA_COMPRESSION` (`lz4`, `zstd`, or `none`).
217217+ pub data_compression: Compression,
218218+ /// db internals, tune only if you know what you're doing.
219219+ ///
220220+ /// compression algorithm for the fjall journal.
221221+ /// set via `HYDRANT_JOURNAL_COMPRESSION` (`lz4`, `zstd`, or `none`).
222222+ pub journal_compression: Compression,
223223+ /// db internals, tune only if you know what you're doing.
224224+ ///
225225+ /// number of background threads used by the fjall storage engine.
226226+ /// set via `HYDRANT_DB_WORKER_THREADS`.
227227+ pub db_worker_threads: usize,
228228+ /// db internals, tune only if you know what you're doing.
229229+ ///
230230+ /// maximum total size of the fjall journal in MB before a flush is forced.
231231+ /// set via `HYDRANT_DB_MAX_JOURNALING_SIZE_MB`.
232232+ pub db_max_journaling_size_mb: u64,
233233+ /// db internals, tune only if you know what you're doing.
234234+ ///
235235+ /// in-memory write buffer (memtable) size for the blocks keyspace in MB.
236236+ /// set via `HYDRANT_DB_BLOCKS_MEMTABLE_SIZE_MB`.
237237+ pub db_blocks_memtable_size_mb: u64,
238238+ /// db internals, tune only if you know what you're doing.
239239+ ///
240240+ /// in-memory write buffer (memtable) size for the repos keyspace in MB.
241241+ /// set via `HYDRANT_DB_REPOS_MEMTABLE_SIZE_MB`.
242242+ pub db_repos_memtable_size_mb: u64,
243243+ /// db internals, tune only if you know what you're doing.
244244+ ///
245245+ /// in-memory write buffer (memtable) size for the events keyspace in MB.
246246+ /// set via `HYDRANT_DB_EVENTS_MEMTABLE_SIZE_MB`.
247247+ pub db_events_memtable_size_mb: u64,
248248+ /// db internals, tune only if you know what you're doing.
249249+ ///
250250+ /// in-memory write buffer (memtable) size for the records keyspace in MB.
251251+ /// set via `HYDRANT_DB_RECORDS_MEMTABLE_SIZE_MB`.
252252+ pub db_records_memtable_size_mb: u64,
171253}
172254173255impl Default for Config {
···175257 const BASE_MEMTABLE_MB: u64 = 32;
176258 Self {
177259 database_path: PathBuf::from("./hydrant.db"),
178178- relays: vec![Url::parse("wss://relay.fire.hose.cam/").unwrap()],
179179- plc_urls: vec![Url::parse("https://plc.wtf").unwrap()],
180260 full_network: false,
181261 ephemeral: false,
182262 ephemeral_ttl: Duration::from_secs(3600),
263263+ relays: vec![Url::parse("wss://relay.fire.hose.cam/").unwrap()],
264264+ plc_urls: vec![Url::parse("https://plc.wtf").unwrap()],
265265+ enable_firehose: true,
266266+ firehose_workers: 8,
183267 cursor_save_interval: Duration::from_secs(3),
184268 repo_fetch_timeout: Duration::from_secs(300),
185185- cache_size: 256,
186269 backfill_concurrency_limit: 16,
187187- data_compression: Compression::Lz4,
188188- journal_compression: Compression::Lz4,
270270+ enable_crawler: None,
271271+ crawler_max_pending_repos: 2000,
272272+ crawler_resume_pending_repos: 1000,
273273+ crawler_sources: vec![CrawlerSource {
274274+ url: Url::parse("https://lightrail.microcosm.blue").unwrap(),
275275+ mode: CrawlerMode::ByCollection,
276276+ }],
189277 verify_signatures: SignatureVerification::Full,
190278 identity_cache_size: 1_000_000,
191191- enable_firehose: true,
192192- enable_crawler: None,
193193- firehose_workers: 8,
279279+ filter_signals: None,
280280+ filter_collections: None,
281281+ filter_excludes: None,
282282+ enable_backlinks: false,
283283+ cache_size: 256,
284284+ data_compression: Compression::Lz4,
285285+ journal_compression: Compression::Lz4,
194286 db_worker_threads: 4,
195287 db_max_journaling_size_mb: 400,
196288 db_blocks_memtable_size_mb: BASE_MEMTABLE_MB,
197289 db_repos_memtable_size_mb: BASE_MEMTABLE_MB / 2,
198290 db_events_memtable_size_mb: BASE_MEMTABLE_MB,
199291 db_records_memtable_size_mb: BASE_MEMTABLE_MB / 3 * 2,
200200- crawler_max_pending_repos: 2000,
201201- crawler_resume_pending_repos: 1000,
202202- filter_signals: None,
203203- filter_collections: None,
204204- filter_excludes: None,
205205- enable_backlinks: false,
206206- crawler_sources: vec![CrawlerSource {
207207- url: Url::parse("https://lightrail.microcosm.blue").unwrap(),
208208- mode: CrawlerMode::ByCollection,
209209- }],
210292 }
211293 }
212294}
···218300 Self {
219301 full_network: true,
220302 plc_urls: vec![Url::parse("https://plc.directory").unwrap()],
303303+ firehose_workers: 24,
221304 backfill_concurrency_limit: 64,
222222- firehose_workers: 24,
305305+ crawler_sources: vec![CrawlerSource {
306306+ url: Url::parse("wss://relay.fire.hose.cam/").unwrap(),
307307+ mode: CrawlerMode::Relay,
308308+ }],
223309 db_worker_threads: 8,
224310 db_max_journaling_size_mb: 1024,
225311 db_blocks_memtable_size_mb: BASE_MEMTABLE_MB,
226226- db_events_memtable_size_mb: BASE_MEMTABLE_MB,
227312 db_repos_memtable_size_mb: BASE_MEMTABLE_MB / 2,
313313+ db_events_memtable_size_mb: BASE_MEMTABLE_MB,
228314 db_records_memtable_size_mb: BASE_MEMTABLE_MB / 3 * 2,
229229- crawler_sources: vec![CrawlerSource {
230230- url: Url::parse("wss://relay.fire.hose.cam/").unwrap(),
231231- mode: CrawlerMode::Relay,
232232- }],
233315 ..Self::default()
234316 }
235317 }
···387469388470 Ok(Self {
389471 database_path,
390390- relays: relay_hosts,
391391- plc_urls,
472472+ full_network,
392473 ephemeral,
393474 ephemeral_ttl,
394394- full_network,
475475+ relays: relay_hosts,
476476+ plc_urls,
477477+ enable_firehose,
478478+ firehose_workers,
395479 cursor_save_interval,
396480 repo_fetch_timeout,
397397- cache_size,
398481 backfill_concurrency_limit,
399399- data_compression,
400400- journal_compression,
482482+ enable_crawler,
483483+ crawler_max_pending_repos,
484484+ crawler_resume_pending_repos,
485485+ crawler_sources,
401486 verify_signatures,
402487 identity_cache_size,
403403- enable_firehose,
404404- enable_crawler,
405405- firehose_workers,
488488+ filter_signals,
489489+ filter_collections,
490490+ filter_excludes,
491491+ enable_backlinks,
492492+ cache_size,
493493+ data_compression,
494494+ journal_compression,
406495 db_worker_threads,
407496 db_max_journaling_size_mb,
408497 db_blocks_memtable_size_mb,
409498 db_repos_memtable_size_mb,
410499 db_events_memtable_size_mb,
411500 db_records_memtable_size_mb,
412412- crawler_max_pending_repos,
413413- crawler_resume_pending_repos,
414414- filter_signals,
415415- filter_collections,
416416- filter_excludes,
417417- enable_backlinks,
418418- crawler_sources,
419501 })
420502 }
421503}