very fast at protocol indexer with flexible filtering, xrpc queries, cursor-backed event stream, and more, built on fjall
rust fjall at-protocol atproto indexer
58
fork

Configure Feed

Select the types of activity you want to include in your feed.

[config] comment fields and types

dawn 76f20936 a882c07e

+143 -61
+143 -61
src/config.rs
··· 100 100 101 101 #[derive(Debug, Clone, Copy)] 102 102 pub enum SignatureVerification { 103 + /// verify all commits, from the firehose and when backfilling a repo from a PDS. 103 104 Full, 105 + /// only verify commits when backfilling a repo from a PDS. 104 106 BackfillOnly, 107 + /// don't verify anything. 105 108 None, 106 109 } 107 110 ··· 129 132 130 133 #[derive(Debug, Clone)] 131 134 pub struct Config { 135 + /// path to the database folder. set via `HYDRANT_DATABASE_PATH`. 132 136 pub database_path: PathBuf, 133 - pub relays: Vec<Url>, 134 - pub plc_urls: Vec<Url>, 137 + /// if `true`, discovers and indexes all repositories in the network. 138 + /// set via `HYDRANT_FULL_NETWORK`. 135 139 pub full_network: bool, 140 + /// if `true`, no records are stored; events are deleted after `ephemeral_ttl`. 141 + /// set via `HYDRANT_EPHEMERAL`. 136 142 pub ephemeral: bool, 143 + /// how long events are retained in ephemeral mode before deletion. 144 + /// set via `HYDRANT_EPHEMERAL_TTL` (humantime duration, e.g. `60min`). 137 145 pub ephemeral_ttl: Duration, 146 + 147 + /// relay URLs used for firehose ingestion. set via `HYDRANT_RELAY_HOST` (single) 148 + /// or `HYDRANT_RELAY_HOSTS` (comma-separated; takes precedence). 149 + pub relays: Vec<Url>, 150 + /// base URL(s) of the PLC directory (comma-separated for multiple). 151 + /// defaults to `https://plc.wtf`, or `https://plc.directory` in full-network mode. 152 + /// set via `HYDRANT_PLC_URL`. 153 + pub plc_urls: Vec<Url>, 154 + /// whether to ingest events from relay firehose subscriptions. 155 + /// set via `HYDRANT_ENABLE_FIREHOSE`. 156 + pub enable_firehose: bool, 157 + /// number of concurrent workers processing firehose events. 158 + /// set via `HYDRANT_FIREHOSE_WORKERS`. 159 + pub firehose_workers: usize, 160 + /// how often the firehose cursor is persisted to disk. 161 + /// set via `HYDRANT_CURSOR_SAVE_INTERVAL` (humantime duration, e.g. `3sec`). 138 162 pub cursor_save_interval: Duration, 163 + /// timeout for fetching a full repository CAR during backfill. 164 + /// set via `HYDRANT_REPO_FETCH_TIMEOUT` (humantime duration, e.g. `5min`). 139 165 pub repo_fetch_timeout: Duration, 140 - pub cache_size: u64, 166 + /// maximum number of concurrent backfill tasks. 167 + /// set via `HYDRANT_BACKFILL_CONCURRENCY_LIMIT`. 141 168 pub backfill_concurrency_limit: usize, 142 - pub data_compression: Compression, 143 - pub journal_compression: Compression, 144 - pub verify_signatures: SignatureVerification, 145 - pub identity_cache_size: u64, 146 - pub enable_firehose: bool, 169 + 170 + /// whether to run the network crawler. `None` defers to the default for the current mode. 171 + /// set via `HYDRANT_ENABLE_CRAWLER`. 147 172 pub enable_crawler: Option<bool>, 148 - pub firehose_workers: usize, 149 - pub db_worker_threads: usize, 150 - pub db_max_journaling_size_mb: u64, 151 - pub db_blocks_memtable_size_mb: u64, 152 - pub db_repos_memtable_size_mb: u64, 153 - pub db_events_memtable_size_mb: u64, 154 - pub db_records_memtable_size_mb: u64, 173 + /// maximum number of repos allowed in the backfill pending queue before the crawler pauses. 174 + /// set via `HYDRANT_CRAWLER_MAX_PENDING_REPOS`. 155 175 pub crawler_max_pending_repos: usize, 176 + /// pending queue size at which the crawler resumes after being paused. 177 + /// set via `HYDRANT_CRAWLER_RESUME_PENDING_REPOS`. 156 178 pub crawler_resume_pending_repos: usize, 157 - pub filter_signals: Option<Vec<String>>, 158 - pub filter_collections: Option<Vec<String>>, 159 - pub filter_excludes: Option<Vec<String>>, 160 - /// enable backlinks indexing (only meaningful in non-ephemeral mode). 161 - /// set via `HYDRANT_ENABLE_BACKLINKS=true`. 162 - pub enable_backlinks: bool, 163 179 /// crawler sources: each entry pairs a URL with a discovery mode. 164 180 /// 165 181 /// set via `HYDRANT_CRAWLER_URLS` as a comma-separated list of `[mode::]url` entries, ··· 168 184 /// `by_collection` otherwise). defaults to the relay hosts with the default mode. 169 185 /// set to an empty string to disable crawling entirely. 170 186 pub crawler_sources: Vec<CrawlerSource>, 187 + 188 + /// signature verification level for incoming commits. 189 + /// set via `HYDRANT_VERIFY_SIGNATURES` (`full`, `backfill-only`, or `none`). 190 + pub verify_signatures: SignatureVerification, 191 + /// number of resolved identities to keep in the in-memory LRU cache. 192 + /// set via `HYDRANT_IDENTITY_CACHE_SIZE`. 193 + pub identity_cache_size: u64, 194 + 195 + /// NSID patterns that trigger auto-discovery in filter mode (e.g. `app.bsky.feed.post`). 196 + /// set via `HYDRANT_FILTER_SIGNALS` as a comma-separated list. 197 + pub filter_signals: Option<Vec<String>>, 198 + /// NSID patterns used to filter which record collections are stored. 199 + /// if `None`, all collections are stored. set via `HYDRANT_FILTER_COLLECTIONS`. 200 + pub filter_collections: Option<Vec<String>>, 201 + /// DIDs that are always skipped, regardless of mode. 202 + /// set via `HYDRANT_FILTER_EXCLUDES` as a comma-separated list. 203 + pub filter_excludes: Option<Vec<String>>, 204 + 205 + /// enable backlinks indexing (only meaningful in non-ephemeral mode). 206 + /// set via `HYDRANT_ENABLE_BACKLINKS=true`. 207 + pub enable_backlinks: bool, 208 + 209 + /// db internals, tune only if you know what you're doing. 210 + /// 211 + /// size of the fjall block cache in MB. set via `HYDRANT_CACHE_SIZE`. 212 + pub cache_size: u64, 213 + /// db internals, tune only if you know what you're doing. 214 + /// 215 + /// compression algorithm for data keyspaces (blocks, records, repos, events). 216 + /// set via `HYDRANT_DATA_COMPRESSION` (`lz4`, `zstd`, or `none`). 217 + pub data_compression: Compression, 218 + /// db internals, tune only if you know what you're doing. 219 + /// 220 + /// compression algorithm for the fjall journal. 221 + /// set via `HYDRANT_JOURNAL_COMPRESSION` (`lz4`, `zstd`, or `none`). 222 + pub journal_compression: Compression, 223 + /// db internals, tune only if you know what you're doing. 224 + /// 225 + /// number of background threads used by the fjall storage engine. 226 + /// set via `HYDRANT_DB_WORKER_THREADS`. 227 + pub db_worker_threads: usize, 228 + /// db internals, tune only if you know what you're doing. 229 + /// 230 + /// maximum total size of the fjall journal in MB before a flush is forced. 231 + /// set via `HYDRANT_DB_MAX_JOURNALING_SIZE_MB`. 232 + pub db_max_journaling_size_mb: u64, 233 + /// db internals, tune only if you know what you're doing. 234 + /// 235 + /// in-memory write buffer (memtable) size for the blocks keyspace in MB. 236 + /// set via `HYDRANT_DB_BLOCKS_MEMTABLE_SIZE_MB`. 237 + pub db_blocks_memtable_size_mb: u64, 238 + /// db internals, tune only if you know what you're doing. 239 + /// 240 + /// in-memory write buffer (memtable) size for the repos keyspace in MB. 241 + /// set via `HYDRANT_DB_REPOS_MEMTABLE_SIZE_MB`. 242 + pub db_repos_memtable_size_mb: u64, 243 + /// db internals, tune only if you know what you're doing. 244 + /// 245 + /// in-memory write buffer (memtable) size for the events keyspace in MB. 246 + /// set via `HYDRANT_DB_EVENTS_MEMTABLE_SIZE_MB`. 247 + pub db_events_memtable_size_mb: u64, 248 + /// db internals, tune only if you know what you're doing. 249 + /// 250 + /// in-memory write buffer (memtable) size for the records keyspace in MB. 251 + /// set via `HYDRANT_DB_RECORDS_MEMTABLE_SIZE_MB`. 252 + pub db_records_memtable_size_mb: u64, 171 253 } 172 254 173 255 impl Default for Config { ··· 175 257 const BASE_MEMTABLE_MB: u64 = 32; 176 258 Self { 177 259 database_path: PathBuf::from("./hydrant.db"), 178 - relays: vec![Url::parse("wss://relay.fire.hose.cam/").unwrap()], 179 - plc_urls: vec![Url::parse("https://plc.wtf").unwrap()], 180 260 full_network: false, 181 261 ephemeral: false, 182 262 ephemeral_ttl: Duration::from_secs(3600), 263 + relays: vec![Url::parse("wss://relay.fire.hose.cam/").unwrap()], 264 + plc_urls: vec![Url::parse("https://plc.wtf").unwrap()], 265 + enable_firehose: true, 266 + firehose_workers: 8, 183 267 cursor_save_interval: Duration::from_secs(3), 184 268 repo_fetch_timeout: Duration::from_secs(300), 185 - cache_size: 256, 186 269 backfill_concurrency_limit: 16, 187 - data_compression: Compression::Lz4, 188 - journal_compression: Compression::Lz4, 270 + enable_crawler: None, 271 + crawler_max_pending_repos: 2000, 272 + crawler_resume_pending_repos: 1000, 273 + crawler_sources: vec![CrawlerSource { 274 + url: Url::parse("https://lightrail.microcosm.blue").unwrap(), 275 + mode: CrawlerMode::ByCollection, 276 + }], 189 277 verify_signatures: SignatureVerification::Full, 190 278 identity_cache_size: 1_000_000, 191 - enable_firehose: true, 192 - enable_crawler: None, 193 - firehose_workers: 8, 279 + filter_signals: None, 280 + filter_collections: None, 281 + filter_excludes: None, 282 + enable_backlinks: false, 283 + cache_size: 256, 284 + data_compression: Compression::Lz4, 285 + journal_compression: Compression::Lz4, 194 286 db_worker_threads: 4, 195 287 db_max_journaling_size_mb: 400, 196 288 db_blocks_memtable_size_mb: BASE_MEMTABLE_MB, 197 289 db_repos_memtable_size_mb: BASE_MEMTABLE_MB / 2, 198 290 db_events_memtable_size_mb: BASE_MEMTABLE_MB, 199 291 db_records_memtable_size_mb: BASE_MEMTABLE_MB / 3 * 2, 200 - crawler_max_pending_repos: 2000, 201 - crawler_resume_pending_repos: 1000, 202 - filter_signals: None, 203 - filter_collections: None, 204 - filter_excludes: None, 205 - enable_backlinks: false, 206 - crawler_sources: vec![CrawlerSource { 207 - url: Url::parse("https://lightrail.microcosm.blue").unwrap(), 208 - mode: CrawlerMode::ByCollection, 209 - }], 210 292 } 211 293 } 212 294 } ··· 218 300 Self { 219 301 full_network: true, 220 302 plc_urls: vec![Url::parse("https://plc.directory").unwrap()], 303 + firehose_workers: 24, 221 304 backfill_concurrency_limit: 64, 222 - firehose_workers: 24, 305 + crawler_sources: vec![CrawlerSource { 306 + url: Url::parse("wss://relay.fire.hose.cam/").unwrap(), 307 + mode: CrawlerMode::Relay, 308 + }], 223 309 db_worker_threads: 8, 224 310 db_max_journaling_size_mb: 1024, 225 311 db_blocks_memtable_size_mb: BASE_MEMTABLE_MB, 226 - db_events_memtable_size_mb: BASE_MEMTABLE_MB, 227 312 db_repos_memtable_size_mb: BASE_MEMTABLE_MB / 2, 313 + db_events_memtable_size_mb: BASE_MEMTABLE_MB, 228 314 db_records_memtable_size_mb: BASE_MEMTABLE_MB / 3 * 2, 229 - crawler_sources: vec![CrawlerSource { 230 - url: Url::parse("wss://relay.fire.hose.cam/").unwrap(), 231 - mode: CrawlerMode::Relay, 232 - }], 233 315 ..Self::default() 234 316 } 235 317 } ··· 387 469 388 470 Ok(Self { 389 471 database_path, 390 - relays: relay_hosts, 391 - plc_urls, 472 + full_network, 392 473 ephemeral, 393 474 ephemeral_ttl, 394 - full_network, 475 + relays: relay_hosts, 476 + plc_urls, 477 + enable_firehose, 478 + firehose_workers, 395 479 cursor_save_interval, 396 480 repo_fetch_timeout, 397 - cache_size, 398 481 backfill_concurrency_limit, 399 - data_compression, 400 - journal_compression, 482 + enable_crawler, 483 + crawler_max_pending_repos, 484 + crawler_resume_pending_repos, 485 + crawler_sources, 401 486 verify_signatures, 402 487 identity_cache_size, 403 - enable_firehose, 404 - enable_crawler, 405 - firehose_workers, 488 + filter_signals, 489 + filter_collections, 490 + filter_excludes, 491 + enable_backlinks, 492 + cache_size, 493 + data_compression, 494 + journal_compression, 406 495 db_worker_threads, 407 496 db_max_journaling_size_mb, 408 497 db_blocks_memtable_size_mb, 409 498 db_repos_memtable_size_mb, 410 499 db_events_memtable_size_mb, 411 500 db_records_memtable_size_mb, 412 - crawler_max_pending_repos, 413 - crawler_resume_pending_repos, 414 - filter_signals, 415 - filter_collections, 416 - filter_excludes, 417 - enable_backlinks, 418 - crawler_sources, 419 501 }) 420 502 } 421 503 }