···237237| `ENABLE_CRAWLER` | `true` if full network or crawler sources are configured, `false` otherwise | whether to actively query the network for unknown repositories. |
238238| `CRAWLER_MAX_PENDING_REPOS` | `2000` | max pending repos for crawler. |
239239| `CRAWLER_RESUME_PENDING_REPOS` | `1000` | resume threshold for crawler pending repos. |
240240-| `TRUSTED_HOSTS` | | comma-separated list of PDS hostnames to pre-assign to the `trusted` rate tier at startup. hosts not listed here use the `default` tier unless assigned via the API. |
241240| `RATE_TIERS` | | comma-separated list of named rate tier definitions in `name:base/mul/hourly/daily[/account_limit]` format (e.g. `trusted:5000/10.0/18000000/432000000/10000000`). the optional account limit prevents new accounts from being created on this PDS once reached. built-in tiers (`default`, `trusted`) are always present and can be overridden. |
241241+| `TIER_RULES` | | comma-separated ordered list of glob rules in `pattern:tier_name` format (e.g. `*.bsky.network:trusted`). rules are evaluated in order; first match wins. explicit API assignments via `PUT /pds/tiers` take precedence over rules; the `default` tier is the final fallback. uses standard glob wildcards (`*`, `?`) matched against the PDS hostname. |
242242243243## build features
244244···407407- `GET /pds/tiers`: list all current tier assignments alongside the available
408408 tier definitions.
409409 - returns `{ "assignments": [{ "host": string, "tier": string }], "rate_tiers": { <name>: { "per_second_base": int, "per_second_account_mul": float, "per_hour": int, "per_day": int } } }`.
410410- - `assignments` only contains PDSes with an explicit assignment; any PDS not
411411- listed uses the `default` tier.
410410+ - `assignments` only contains PDSes with an explicit API assignment. hosts without one resolve via glob rules or fall back to `default`.
412411- `PUT /pds/tiers`: assign a PDS to a named rate tier.
413412 - body: `{ "host": string, "tier": string }`.
414413 - `host` is the PDS hostname (e.g. `pds.example.com`).
415414 - `tier` must be one of the configured tier names. returns `400` if unknown.
416415 - assignments are persisted to the database and survive restarts.
417416 - re-assigning the same host updates the tier in place without creating a duplicate.
418418-- `DELETE /pds/tiers`: remove an explicit tier assignment for a PDS, reverting
419419- it to the `default` tier.
417417+- `DELETE /pds/tiers`: remove an explicit tier assignment for a PDS.
420418 - query parameter: `?host=<hostname>` (e.g. `?host=pds.example.com`).
419419+ - reverts the host to glob-rule resolution (not necessarily `default`, a matching `TIER_RULES` pattern still applies).
421420 - returns `200` even if no assignment existed.
422421- `GET /pds/rate-tiers`: list the available rate tier definitions.
423422 - returns a map of tier name to `{ "per_second_base", "per_second_account_mul", "per_hour", "per_day", "account_limit" }`.
424423425425-hosts listed in `TRUSTED_HOSTS` are seeded as `trusted` at startup, but only
426426-when no database assignment already exists for that host — DB entries always win.
427427-the seed is not written to the database, so it is re-applied on every restart.
428428-consequences: if you remove a host from `TRUSTED_HOSTS` and it has no DB entry,
429429-it will revert to `default` on the next restart. if you explicitly assign a host
430430-via the API (which writes to the DB), that assignment persists regardless of
431431-`TRUSTED_HOSTS`. if you delete a host's DB assignment via the API while it is
432432-still listed in `TRUSTED_HOSTS`, it will be re-seeded as `trusted` on the next
433433-restart.
424424+tiers are resolved in this order:
425425+426426+1. **explicit API assignment**, set via `PUT /pds/tiers`, stored in the database, survives restarts.
427427+2. **glob rules**, from `TIER_RULES`, evaluated in order; first match wins.
428428+3. **`default` tier**, applied if no rule or explicit assignment matches.
429429+430430+deleting an API assignment reverts the host to glob-rule resolution, not necessarily back to `default`. if a rule like `*.bsky.network:trusted` matches the host, it will become trusted again without any further action.
434431435432### repository management
436433
+4-2
src/api/xrpc/com_atproto_describe_repo.rs
···11use futures::TryFutureExt;
22-use jacquard_api::com_atproto::repo::describe_repo::{DescribeRepoOutput, DescribeRepoRequest};
22+use jacquard_api::com_atproto::repo::describe_repo::{
33+ DescribeRepoOutput, DescribeRepoRequest, DescribeRepoResponse,
44+};
3546use crate::util::invalid_handle;
57···911 State(hydrant): State<Hydrant>,
1012 ExtractXrpc(req): ExtractXrpc<DescribeRepoRequest>,
1113) -> XrpcResult<Json<DescribeRepoOutput<'static>>> {
1212- let nsid = "com.atproto.repo.describeRepo";
1414+ let nsid = DescribeRepoResponse::NSID;
1315 let resolver = &hydrant.state.resolver;
14161517 let did = resolver
+55-26
src/config.rs
···11+use crate::pds_meta::{TierPolicy, TierRule};
12use miette::Result;
23use serde::{Deserialize, Serialize};
33-use smol_str::ToSmolStr;
44+use smol_str::{SmolStr, ToSmolStr};
45use std::collections::HashMap;
56use std::fmt;
67use std::path::PathBuf;
···375376 ///
376377 /// set via `HYDRANT_SEED_HOSTS` as a comma-separated list of base URLs.
377378 pub seed_hosts: Vec<Url>,
378378- /// list of trusted PDS/relay hosts to pre-assign to the "trusted" rate tier at startup.
379379- /// set via `HYDRANT_TRUSTED_HOSTS` as a comma-separated list of hostnames.
380380- /// hosts not present in this list use the "default" tier unless assigned via the API.
381381- pub trusted_hosts: Vec<String>,
382379 /// named rate tier definitions for PDS rate limiting.
383380 ///
384381 /// built-in tiers ("default" and "trusted") are always present and may be overridden.
385382 /// set via `HYDRANT_RATE_TIERS` as a comma-separated list of `name:base/mul/hourly/daily` entries,
386383 /// e.g. `trusted:5000/10.0/18000000/432000000,custom:100/1.0/7200000/172800000`.
387387- pub rate_tiers: HashMap<String, RateTier>,
384384+ ///
385385+ /// built from `HYDRANT_TIER_RULES` and `HYDRANT_RATE_TIERS` at startup.
386386+ pub tier_policy: TierPolicy,
387387+388388+ /// glob rules mapping host patterns to named rate tiers.
389389+ ///
390390+ /// set via `HYDRANT_TIER_RULES` as a comma-separated list of `pattern:tiername` entries,
391391+ /// e.g. `*.bsky.network:trusted,pds.example.com:custom`. rules are evaluated in order;
392392+ /// api-assigned per-host overrides always take priority over these rules.
393393+ pub tier_rules: Vec<(String, String)>,
388394389395 /// db internals, tune only if you know what you're doing.
390396 ///
···478484 filter_collections: None,
479485 filter_excludes: None,
480486 enable_backlinks: false,
481481- trusted_hosts: vec![],
482482- rate_tiers: {
483483- let mut m = HashMap::new();
484484- m.insert("default".to_string(), RateTier::default_tier());
485485- m.insert("trusted".to_string(), RateTier::trusted());
486486- m
487487+ tier_rules: vec![],
488488+ tier_policy: {
489489+ let mut tiers = HashMap::new();
490490+ tiers.insert(SmolStr::new("default"), RateTier::default_tier());
491491+ tiers.insert(SmolStr::new("trusted"), RateTier::trusted());
492492+ TierPolicy {
493493+ tiers,
494494+ rules: vec![],
495495+ }
487496 },
488497 cache_size: 256,
489498 data_compression: Compression::Zstd,
···651660652661 let enable_backlinks: bool = cfg!("ENABLE_BACKLINKS", defaults.enable_backlinks);
653662654654- // start with built-in tiers, then layer in any env-defined overrides.
663663+ // start with built-in tier definitions, then layer in any env-defined overrides.
655664 // format: HYDRANT_RATE_TIERS=name:base/mul/hourly/daily,...
656656- let mut rate_tiers = defaults.rate_tiers.clone();
665665+ let mut tiers = defaults.tier_policy.tiers.clone();
657666 if let Ok(s) = std::env::var("HYDRANT_RATE_TIERS") {
658667 for entry in s.split(',') {
659668 let entry = entry.trim();
660669 if let Some((name, spec)) = entry.split_once(':') {
661670 match RateTier::parse(spec) {
662671 Some(tier) => {
663663- rate_tiers.insert(name.trim().to_string(), tier);
672672+ tiers.insert(SmolStr::new(name.trim()), tier);
664673 }
665674 None => tracing::warn!(
666675 "ignoring invalid rate tier '{name}': expected base/mul/hourly/daily format"
···688697 })
689698 .unwrap_or_else(|| defaults.seed_hosts.clone());
690699691691- let trusted_hosts = std::env::var("HYDRANT_TRUSTED_HOSTS")
692692- .ok()
693693- .map(|s| {
694694- s.split(',')
695695- .map(|s| s.trim().to_string())
696696- .filter(|s| !s.is_empty())
697697- .collect()
698698- })
699699- .unwrap_or_else(|| defaults.trusted_hosts.clone());
700700+ // build ordered glob rules from HYDRANT_TIER_RULES
701701+ let mut rules: Vec<TierRule> = vec![];
702702+ let mut tier_rules: Vec<(String, String)> = vec![];
703703+ if let Ok(s) = std::env::var("HYDRANT_TIER_RULES") {
704704+ for entry in s.split(',') {
705705+ let entry = entry.trim();
706706+ if entry.is_empty() {
707707+ continue;
708708+ }
709709+ if let Some((pattern_str, tier_name)) = entry.split_once(':') {
710710+ let pattern_str = pattern_str.trim();
711711+ let tier_name = tier_name.trim();
712712+ match glob::Pattern::new(pattern_str) {
713713+ Ok(pattern) => {
714714+ rules.push(TierRule {
715715+ pattern,
716716+ tier_name: SmolStr::new(tier_name),
717717+ });
718718+ tier_rules.push((pattern_str.to_string(), tier_name.to_string()));
719719+ }
720720+ Err(e) => tracing::warn!(
721721+ "ignoring invalid tier rule pattern '{pattern_str}': {e}"
722722+ ),
723723+ }
724724+ }
725725+ }
726726+ }
727727+728728+ let tier_policy = TierPolicy { tiers, rules };
700729701730 let default_mode = CrawlerMode::default_for(full_network);
702731 let crawler_sources = match std::env::var("HYDRANT_CRAWLER_URLS") {
···743772 filter_collections,
744773 filter_excludes,
745774 enable_backlinks,
746746- trusted_hosts,
747747- rate_tiers,
775775+ tier_policy,
776776+ tier_rules,
748777 cache_size,
749778 data_compression,
750779 journal_compression,
+6
src/control/firehose.rs
···199199200200 let _ = self.persisted.insert_async(url.clone()).await;
201201202202+ // reset failure state so the fresh task gets a clean slate.
203203+ // if the previous task exited after max failures, the failure counter
204204+ // would otherwise cause the new task to exit immediately.
205205+ let throttle = self.state.throttler.get_handle(&url).await;
206206+ throttle.record_success();
207207+202208 self.spawn_firehose_ingestor(&FirehoseSource { url, is_pds }, shared, false)
203209 .await?;
204210
+49-29
src/control/pds.rs
···44use miette::{IntoDiagnostic, Result};
55use serde::Serialize;
66use smol_str::SmolStr;
77+use tracing::debug;
7889use crate::config::RateTier;
1010+use crate::db::keys::pds_account_count_key;
911use crate::db::pds_meta as db_pds;
1010-use crate::pds_meta::{HostStatus, PdsMeta};
1212+use crate::pds_meta::{HostDesc, HostStatus, PdsMeta};
1113use crate::state::AppState;
12141315/// a single PDS-to-tier assignment.
···5052 G: FnOnce(&mut PdsMeta),
5153 {
5254 let state = self.0.clone();
5353- tokio::task::spawn_blocking(move || {
5555+ tokio::task::spawn_blocking(move || -> Result<()> {
5456 let mut batch = state.db.inner.batch();
5557 db_op(&mut batch, &state.db.filter);
5658 batch.commit().into_diagnostic()?;
···6668 Ok(())
6769 }
68706969- fn check_limit_transition(&self, host: &str, account_limit: Option<u64>) -> Option<HostStatus> {
7070- let count_key = crate::db::keys::pds_account_count_key(host);
7171- let count = self.0.db.get_count_sync(&count_key);
7272- let current_status = self.0.pds_meta.load().status(host);
7373- current_status.check_limit_transition(count, account_limit)
7474- }
7575-7676- /// list all current per-PDS tier assignments.
7171+ /// list all current per-PDS tier assignments (explicit api-assigned overrides only).
7772 pub async fn list_tiers(&self) -> HashMap<String, String> {
7873 let snapshot = self.0.pds_meta.load();
7974 snapshot
8075 .hosts
8176 .iter()
8282- .filter_map(|(host, desc)| desc.tier.as_ref().map(|t| (host.clone(), t.to_string())))
7777+ .filter_map(|(host, desc): (&String, &HostDesc)| {
7878+ desc.tier
7979+ .as_ref()
8080+ .map(|t: &smol_str::SmolStr| (host.clone(), t.to_string()))
8181+ })
8382 .collect()
8483 }
85848686- /// returns the assigned tier for `host`, or "default" if none is assigned.
8585+ /// returns the assigned tier for `host`, or \"default\" if none is assigned.
8786 pub fn get_tier(&self, host: impl AsRef<str>) -> String {
8887 let snapshot = self.0.pds_meta.load();
8988 snapshot
···105104 snapshot
106105 .hosts
107106 .iter()
108108- .filter_map(|(host, desc)| {
107107+ .filter_map(|(host, desc): (&String, &crate::pds_meta::HostDesc)| {
109108 matches!(desc.status, HostStatus::Banned).then(|| host.clone())
110109 })
111110 .collect()
···114113 /// list all configured rate tier definitions.
115114 pub fn list_rate_tiers(&self) -> HashMap<String, PdsTierDefinition> {
116115 self.0
117117- .rate_tiers
116116+ .tier_policy
117117+ .tiers
118118 .iter()
119119- .map(|(name, tier)| (name.clone(), PdsTierDefinition::from(*tier)))
119119+ .map(|(name, tier): (&smol_str::SmolStr, &RateTier)| {
120120+ (name.to_string(), PdsTierDefinition::from(*tier))
121121+ })
120122 .collect()
121123 }
122124123123- /// assign `host` to `tier`, persisting the change to the database.
125125+ /// assign `host` to `tier`.
124126 /// returns an error if `tier` is not a known tier name.
125127 pub async fn set_tier(&self, host: impl AsRef<str>, tier: String) -> Result<()> {
126126- if !self.0.rate_tiers.contains_key(&tier) {
128128+ if !self.0.tier_policy.tiers.contains_key(tier.as_str()) {
127129 miette::bail!(
128130 "unknown tier '{tier}'; known tiers: {:?}",
129129- self.0.rate_tiers.keys().collect::<Vec<_>>()
131131+ self.0.tier_policy.tiers.keys().collect::<Vec<_>>()
130132 );
131133 }
132134···134136 let host_clone = host.clone();
135137 let tier_clone = tier.clone();
136138137137- let new_tier_limit = self.0.rate_tiers.get(&tier).unwrap().account_limit;
138138- let maybe_status = self.check_limit_transition(&host, new_tier_limit);
139139+ // read the new tier's account limit and check for a status transition,
140140+ // now that the override is about to change.
141141+ let new_tier_limit = self
142142+ .0
143143+ .tier_policy
144144+ .tiers
145145+ .get(tier.as_str())
146146+ .unwrap()
147147+ .account_limit;
148148+ let count = self.0.db.get_count_sync(&pds_account_count_key(&host));
149149+ let current_status = self.0.pds_meta.load().status(&host);
150150+ let maybe_status = current_status.check_limit_transition(count, new_tier_limit);
139151140152 self.update(
141153 move |batch, ks| {
···156168 .await
157169 }
158170159159- /// remove any explicit tier assignment for `host`, reverting it to the default tier.
171171+ /// remove any explicit tier assignment for `host`, reverting it to the matched rule or default.
160172 pub async fn remove_tier(&self, host: impl AsRef<str>) -> Result<()> {
161173 let host = host.as_ref().to_string();
162174 let host_clone = host.clone();
163175164164- let default_tier_limit = self
165165- .0
166166- .rate_tiers
167167- .get("default")
168168- .and_then(|t| t.account_limit);
169169- let maybe_status = self.check_limit_transition(&host, default_tier_limit);
176176+ // after removing the override, the effective tier is determined by glob rules.
177177+ // resolve it without the override to get the correct limit.
178178+ let effective_limit = self.0.tier_policy.resolve(&host, None).account_limit;
179179+ let count = self.0.db.get_count_sync(&pds_account_count_key(&host));
180180+ let current_status = self.0.pds_meta.load().status(&host);
181181+ let maybe_status = current_status.check_limit_transition(count, effective_limit);
182182+ debug!(
183183+ host,
184184+ ?current_status,
185185+ ?effective_limit,
186186+ count,
187187+ ?maybe_status,
188188+ "remove_tier: computed status transition"
189189+ );
170190171191 self.update(
172192 move |batch, ks| {
···187207 .await
188208 }
189209190190- /// ban `host`, persisting the change to the database.
210210+ /// ban `host`
191211 pub async fn ban(&self, host: impl AsRef<str>) -> Result<()> {
192212 let host = host.as_ref().to_string();
193213 let host_clone = host.clone();
···204224 .await
205225 }
206226207207- /// unban `host`, removing it from the database.
227227+ /// unban `host`
208228 pub async fn unban(&self, host: impl AsRef<str>) -> Result<()> {
209229 let host = host.as_ref().to_string();
210230 let host_clone = host.clone();
+16-6
src/ingest/firehose.rs
···130130 if banned {
131131 break Ok(());
132132 }
133133- meta.tier_for(host, &self.state.rate_tiers)
133133+ let override_name = meta.hosts.get(host).and_then(|h| h.tier.as_ref());
134134+ self.state.tier_policy.resolve(host, override_name)
134135 };
135136 let accounts = self.state.db.get_count(&count_key).await;
136137 tokio::select! {
···162163 }
163164 _ = &mut active_sleep, if !marked_active => {
164165 marked_active = true;
165165- // only reset failure state once the stream has been healthy for
166166- // a full window — prevents hosts that connect but immediately
167167- // send garbage from resetting their backoff on every attempt
166166+ // only reset failure state once the stream has been healthy for a bit
167167+ // so we dont get in a "connects successfully, sends garbage" situation
168168 self.throttle.record_success();
169169 if self.is_pds {
170170 let (current_status, tier) = {
171171 let meta = self.state.pds_meta.load();
172172- (meta.status(host), meta.tier_for(host, &self.state.rate_tiers))
172172+ let override_name = meta.hosts.get(host).and_then(|h| h.tier.as_ref());
173173+ (meta.status(host), self.state.tier_policy.resolve(host, override_name))
173174 };
174175 if current_status == HostStatus::Banned {
175176 break Ok(());
176177 }
177178 let count = self.state.db.get_count_sync(&count_key);
178179 let new_status = tier.account_limit.is_some_and(|l| count >= l)
179179- .then_some(HostStatus::Throttled).unwrap_or(HostStatus::Active);
180180+ .then_some(HostStatus::Throttled)
181181+ .unwrap_or(HostStatus::Active);
182182+ debug!(
183183+ host,
184184+ ?current_status,
185185+ account_limit = ?tier.account_limit,
186186+ count,
187187+ ?new_status,
188188+ "active_sleep: computed status transition"
189189+ );
180190181191 if current_status != new_status {
182192 if let Err(e) = self.set_host_status(new_status) {
+12-38
src/ingest/relay.rs
···1818use tracing::{debug, error, info, info_span, trace, warn};
1919use url::Url;
20202121+use crate::db::keys::pds_account_count_key;
2122use crate::db::{self, keys};
2223use crate::ingest::stream::AccountStatus;
2324#[cfg(feature = "relay")]
···523524 // update per-PDS active account count on transitions
524525 if is_pds {
525526 if let Some(host) = firehose.host_str() {
526526- let count_key = keys::pds_account_count_key(host);
527527+ let count_key = pds_account_count_key(host);
527528 let changed = if !was_active && repo_state.active {
528529 Some(ctx.state.db.update_count(&count_key, 1))
529530 } else if was_active && !repo_state.active {
···533534 };
534535535536 if let Some(count) = changed {
536536- let (current_status, limit) = {
537537- let meta = ctx.state.pds_meta.load();
538538- (
539539- meta.status(host),
540540- meta.tier_for(host, &ctx.state.rate_tiers).account_limit,
541541- )
542542- };
543543-544544- if let Some(status) = current_status.check_limit_transition(count, limit) {
545545- debug!(%host, count, ?limit, ?status, "account count crossed limit, shifting status");
546546- if let Err(e) = crate::db::pds_meta::set_status(
547547- &mut ctx.batch,
548548- &ctx.state.db.filter,
549549- host,
550550- status,
551551- ) {
552552- error!(err = %e, "failed to write host status");
553553- } else {
554554- crate::pds_meta::PdsMeta::update_host(&ctx.state.pds_meta, host, |h| {
555555- h.status = status
556556- });
557557- }
537537+ let mut batch_for_status = ctx.state.db.inner.batch();
538538+ ctx.state
539539+ .apply_host_limit_status(&mut batch_for_status, host, count);
540540+ if let Err(e) = batch_for_status.commit() {
541541+ error!(%host, err = %e, "failed to commit host status update");
558542 }
559543 }
560544 }
···878862 }
879863880864 if let Some(host) = msg.firehose.host_str() {
881881- let tier = self
882882- .state
883883- .pds_meta
884884- .load()
885885- .tier_for(host, &self.state.rate_tiers);
886886- if let Some(limit) = tier.account_limit {
887887- let count = self
888888- .state
889889- .db
890890- .get_count_sync(&crate::db::keys::pds_account_count_key(host));
891891- if count >= limit {
892892- warn!(did = %did, host, count, limit, "account limit reached for host, dropping new account");
893893- return Ok(None);
894894- }
865865+ let count = self.state.db.get_count_sync(&pds_account_count_key(host));
866866+ if self.state.is_over_account_limit(host, count) {
867867+ warn!(did = %did, host, count, "account limit reached for host, dropping new account");
868868+ return Ok(None);
895869 }
896870 }
897871 }
···924898 // track initial active state for per-PDS rate limiting
925899 if msg.is_pds && repo_state.active {
926900 if let Some(host) = msg.firehose.host_str() {
927927- db.update_count(&keys::pds_account_count_key(host), 1);
901901+ db.update_count(&pds_account_count_key(host), 1);
928902 }
929903 }
930904