···168168169169returns `{ count }`.
170170171171+#### blue.microcosm.links.*
172172+173173+hydrant implements a subset of [microcosm constellation](https://constellation.microcosm.blue/) when it's built with the `backlinks` cargo feature (`cargo build --features backlinks`).
174174+175175+when enabled, hydrant indexes all AT URI and DID references found inside stored records into a reverse index. this lets you efficiently answer "what records link to this subject?".
176176+177177+##### blue.microcosm.links.getBacklinks
178178+179179+return records that link to a given subject.
180180+181181+| param | required | description |
182182+| :--- | :--- | :--- |
183183+| `subject` | yes | AT URI or DID to look up backlinks for. |
184184+| `source` | no | filter by source collection, e.g. `app.bsky.feed.like`. also accepts `collection:path` form to further filter by field path, e.g. `app.bsky.feed.like:subject.uri`. the path is matched against the dotted field path within the record (`.` is prepended automatically). |
185185+| `limit` | no | max results to return (default 50, max 100). |
186186+| `cursor` | no | opaque pagination cursor from a previous response. |
187187+| `reverse` | no | if `true`, return results in reverse order (default `false`). |
188188+189189+returns `{ backlinks: [{ uri, cid }], cursor? }`.
190190+191191+results are ordered by source record rkey (ascending by default, descending when `reverse=true`). the cursor is stable across new insertions for TID rkey records.
192192+193193+##### blue.microcosm.links.getBacklinksCount
194194+195195+return the number of records that link to a given subject.
196196+197197+| param | required | description |
198198+| :--- | :--- | :--- |
199199+| `subject` | yes | AT URI or DID to count backlinks for. |
200200+| `source` | no | filter by source collection (same format as `getBacklinks`). |
201201+202202+returns `{ count }`.
203203+171204### event stream
172205173206- `GET /stream`: subscribe to the event stream.
···4848 pub counts: Keyspace,
4949 pub filter: Keyspace,
5050 pub crawler: Keyspace,
5151+ #[cfg(feature = "backlinks")]
5252+ pub backlinks: Keyspace,
5153 pub event_tx: broadcast::Sender<BroadcastEvent>,
5254 pub next_event_id: Arc<AtomicU64>,
5355 pub counts_map: HashMap<SmolStr, u64>,
···99101 }};
100102}
101103104104+const fn kb(v: u32) -> u32 {
105105+ v * 1024
106106+}
107107+const fn mb(v: u64) -> u64 {
108108+ v * 1024 * 1024
109109+}
110110+102111impl Db {
103112 pub fn open(cfg: &crate::config::Config) -> Result<Self> {
104104- const fn kb(v: u32) -> u32 {
105105- v * 1024
106106- }
107107- const fn mb(v: u64) -> u64 {
108108- v * 1024 * 1024
109109- }
110110-111113 let db = Database::builder(&cfg.database_path)
112114 .cache_size(cfg.cache_size * 2_u64.pow(20) / 2)
113115 .manual_journal_persist(true)
···152154 }
153155 None
154156 };
155155- let dicts = ["repos", "blocks", "events"].into_iter().fold(
157157+ let dicts = ["repos", "blocks", "events", "backlinks"].into_iter().fold(
156158 std::collections::HashMap::new(),
157159 |mut acc, name| {
158160 let Some(dict) = load_dict(name) else {
···258260 // cids arent compressable, most rkeys are TIDs so they will get compressed
259261 // by prefix truncation anyway
260262 .data_block_compression_policy(CompressionPolicy::disabled())
261261- .data_block_restart_interval_policy(RestartIntervalPolicy::new([9, 18])),
263263+ .data_block_restart_interval_policy(RestartIntervalPolicy::new([16, 32])),
262264 )?;
263265 let cursors = open_ks(
264266 "cursors",
···354356 .data_block_restart_interval_policy(RestartIntervalPolicy::all(2)),
355357 )?;
356358359359+ #[cfg(feature = "backlinks")]
360360+ let backlinks = open_ks(
361361+ "backlinks",
362362+ opts()
363363+ // lets assume we hit backlinks, getBacklinks will use iterator anyway
364364+ // so we can disable bloom filter okay
365365+ .expect_point_read_hits(true)
366366+ .max_memtable_size(mb(cfg.db_records_memtable_size_mb))
367367+ // same as records basically
368368+ .data_block_size_policy(BlockSizePolicy::new([kb(16), kb(32)]))
369369+ .data_block_compression_policy(CompressionPolicy::new([
370370+ CompressionType::None,
371371+ get_compression("backlinks", 3),
372372+ ]))
373373+ .data_block_restart_interval_policy(RestartIntervalPolicy::new([16, 32])),
374374+ )?;
375375+357376 // when adding new keyspaces, make sure to add them to the /stats endpoint
358377 // and also update any relevant /debug/* endpoints
359378···411430 counts,
412431 filter,
413432 crawler,
433433+ #[cfg(feature = "backlinks")]
434434+ backlinks,
414435 event_tx,
415436 counts_map,
416437 next_event_id: Arc::new(AtomicU64::new(last_id + 1)),
···422443 "blocks" => &self.blocks,
423444 "events" => &self.events,
424445 "repos" => &self.repos,
446446+ "backlinks" => &self.backlinks,
425447 _ => miette::bail!("unknown keyspace for training: {ks_name}"),
426448 };
427449···523545 compact(self.filter.clone()),
524546 compact(self.crawler.clone()),
525547 )?;
548548+ #[cfg(feature = "backlinks")]
549549+ compact(self.backlinks.clone()).await?;
526550 Ok(())
527551 }
528552
+2
src/lib.rs
···5566pub(crate) mod api;
77pub(crate) mod backfill;
88+#[cfg(feature = "backlinks")]
99+pub(crate) mod backlinks;
810pub(crate) mod crawler;
911pub(crate) mod db;
1012pub(crate) mod ingest;