···171171| `CRAWLER_MAX_PENDING_REPOS` | `2000` | max pending repos for crawler. |
172172| `CRAWLER_RESUME_PENDING_REPOS` | `1000` | resume threshold for crawler pending repos. |
173173| `TRUSTED_HOSTS` | | comma-separated list of PDS hostnames to pre-assign to the `trusted` rate tier at startup. hosts not listed here use the `default` tier unless assigned via the API. |
174174-| `RATE_TIERS` | | comma-separated list of named rate tier definitions in `name:base/mul/hourly/daily` format (e.g. `trusted:5000/10.0/18000000/432000000`). built-in tiers (`default`, `trusted`) are always present and can be overridden. |
174174+| `RATE_TIERS` | | comma-separated list of named rate tier definitions in `name:base/mul/hourly/daily[/account_limit]` format (e.g. `trusted:5000/10.0/18000000/432000000/10000000`). the optional account limit prevents new accounts from being created on this PDS once reached. built-in tiers (`default`, `trusted`) are always present and can be overridden. |
175175176176## build features
177177···329329the per-second limit scales with the number of active accounts on the PDS:
330330`max(per_second_base, accounts × per_second_account_mul)`.
331331332332+you can also define an optional `account_limit` for a rate tier. if a PDS
333333+exceeds this number of active accounts, hydrant will reject any new account
334334+creation events from it.
335335+336336+the built-in tiers are defined as follows:
337337+- `default`: `50` per sec (floor), `+0.5` per account. max `3_600_000`/hr, `86_400_000`/day. `100` account limit.
338338+- `trusted`: `5000` per sec (floor), `+10.0` per account. max `18_000_000`/hr, `432_000_000`/day. `10_000_000` account limit.
339339+332340- `GET /pds/tiers`: list all current tier assignments alongside the available
333341 tier definitions.
334342 - returns `{ "assignments": [{ "host": string, "tier": string }], "rate_tiers": { <name>: { "per_second_base": int, "per_second_account_mul": float, "per_hour": int, "per_day": int } } }`.
···342350 - re-assigning the same host updates the tier in place without creating a duplicate.
343351- `DELETE /pds/tiers`: remove an explicit tier assignment for a PDS, reverting
344352 it to the `default` tier.
345345- - body: `{ "host": string }`.
353353+ - query parameter: `?host=<hostname>` (e.g. `?host=pds.example.com`).
346354 - returns `200` even if no assignment existed.
347355- `GET /pds/rate-tiers`: list the available rate tier definitions.
348348- - returns a map of tier name to `{ "per_second_base", "per_second_account_mul", "per_hour", "per_day" }`.
356356+ - returns a map of tier name to `{ "per_second_base", "per_second_account_mul", "per_hour", "per_day", "account_limit" }`.
349357350358hosts listed in `TRUSTED_HOSTS` are seeded as `trusted` at startup, but only
351359when no database assignment already exists for that host — DB entries always win.
···2222 pub per_hour: u64,
2323 /// per-day limit.
2424 pub per_day: u64,
2525+ /// maximum active account limit for this host before dropping tracking of new accounts
2626+ pub account_limit: Option<u64>,
2527}
26282729impl RateTier {
···3234 per_second_account_mul: 10.0,
3335 per_hour: 5000 * 3600,
3436 per_day: 5000 * 86400,
3737+ account_limit: Some(10_000_000),
3538 }
3639 }
3740···4245 per_second_account_mul: 0.5,
4346 per_hour: 1000 * 3600,
4447 per_day: 1000 * 86400,
4848+ account_limit: Some(100),
4549 }
4650 }
47514848- /// parse `base/mul/hourly/daily` format used by `HYDRANT_RATE_TIERS`.
5252+ /// parse `base/mul/hourly/daily[/account_limit]` format used by `HYDRANT_RATE_TIERS`.
4953 fn parse(s: &str) -> Option<Self> {
5054 let parts: Vec<&str> = s.split('/').collect();
5151- if parts.len() != 4 {
5555+ if parts.len() < 4 || parts.len() > 5 {
5256 return None;
5357 }
5458 Some(Self {
···5660 per_second_account_mul: parts[1].parse().ok()?,
5761 per_hour: parts[2].parse().ok()?,
5862 per_day: parts[3].parse().ok()?,
6363+ account_limit: parts.get(4).and_then(|p| p.parse().ok()),
5964 })
6065 }
6166}
+31-15
src/control/mod.rs
···5656#[cfg(feature = "relay")]
5757use stream::relay_stream_thread;
58585959+#[derive(Debug, Clone)]
5960/// infromation about a host hydrant is consuming from.
6061pub struct Host {
6162 /// hostname of the host.
···6465 pub seq: i64,
6566 /// the amount of accounts hydrant has seen from this host.
6667 pub account_count: u64,
6767- /// whether this host is banned or not.
6868- pub is_banned: bool,
6868+ /// the status of this host in hydrant.
6969+ pub status: crate::pds_meta::HostStatus,
6970}
70717172/// an event emitted by the hydrant event stream.
···767768768769 tokio::task::spawn_blocking(move || {
769770 let key = keys::firehose_cursor_key(&hostname);
770770- let Some(seq) = state.db.cursors.get(&key).into_diagnostic()? else {
771771- return Ok(None);
772772- };
773773- let seq = i64::from_be_bytes(
774774- seq.as_ref()
775775- .try_into()
776776- .into_diagnostic()
777777- .wrap_err("cursor value is not 8 bytes")?,
778778- );
771771+772772+ let mut seq = 0;
773773+ if let Some(cursor_bytes) = state.db.cursors.get(&key).into_diagnostic()? {
774774+ seq = i64::from_be_bytes(cursor_bytes.as_ref().try_into().into_diagnostic()?);
775775+ } else {
776776+ // if it has no cursor, check if it's explicitly tracked in hosts map
777777+ // or firehose tasks (recently added via API but no messages yet)
778778+ let meta = state.pds_meta.load();
779779+ if !meta.hosts.contains_key(hostname.as_str()) {
780780+ // we should also allow it if it's an active firehose ingestor
781781+ let mut found_in_cursors = false;
782782+ state.firehose_cursors.iter_sync(|u, _| {
783783+ if u.host_str() == Some(hostname.as_str()) {
784784+ found_in_cursors = true;
785785+ }
786786+ !found_in_cursors // continue if not found
787787+ });
788788+789789+ if !found_in_cursors {
790790+ return Ok(None);
791791+ }
792792+ }
793793+ }
794794+779795 let account_count = state
780796 .db
781797 .get_count_sync(&keys::pds_account_count_key(&hostname));
782782- let is_banned = state.pds_meta.load().is_banned(&hostname);
798798+ let status = state.pds_meta.load().status(&hostname);
783799784800 Ok(Some(Host {
785801 name: hostname.into(),
786802 seq,
787803 account_count,
788788- is_banned,
804804+ status,
789805 }))
790806 })
791807 .await
···835851 let account_count = state
836852 .db
837853 .get_count_sync(&keys::pds_account_count_key(hostname));
838838- let is_banned = state.pds_meta.load().is_banned(&hostname);
854854+ let status = state.pds_meta.load().status(hostname);
839855 hosts.push(Host {
840856 name: hostname.into(),
841857 seq,
842858 account_count,
843843- is_banned,
859859+ status,
844860 });
845861 }
846862
···234234 return (did, retry_state.into());
235235 }
236236 if is_throttle_worthy(&e) {
237237- if let Some(mins) = throttle.record_failure() {
238238- warn!(url = %pds_url, mins, "throttling pds due to hard failure");
237237+ if let Some(secs) = throttle.record_failure() {
238238+ warn!(url = %pds_url, secs, "throttling pds due to hard failure");
239239 }
240240 let mut retry_state = throttle.to_retry_state();
241241 retry_state.status = e.status();
···11-use std::collections::{HashMap, HashSet};
11+use std::collections::HashMap;
22use std::future::Future;
33use std::sync::atomic::AtomicI64;
44use std::time::Duration;
···55555656 let filter = new_filter_handle(filter_config);
57575858- // load persisted per-PDS tier assignments from the filter keyspace.
5959- // trusted_hosts from config are merged in as defaults (not persisted here; they seed
6060- // only if the host has no existing assignment in the DB).
6161- let mut tiers: HashMap<String, SmolStr> = crate::db::pds_meta::load_tiers(&db.filter)
5858+ let tiers: HashMap<String, SmolStr> = crate::db::pds_meta::load_tiers(&db.filter)
6259 .unwrap_or_default()
6360 .into_iter()
6461 .map(|(host, tier)| (host.to_string(), tier))
6562 .collect();
6363+6464+ let statuses: HashMap<String, crate::pds_meta::HostStatus> =
6565+ crate::db::pds_meta::load_statuses(&db.filter)
6666+ .unwrap_or_default()
6767+ .into_iter()
6868+ .map(|(host, stat)| (host.to_string(), stat))
6969+ .collect();
7070+7171+ let mut hosts = HashMap::new();
7272+ for (host, tier) in tiers {
7373+ hosts
7474+ .entry(host)
7575+ .or_insert_with(crate::pds_meta::HostDesc::default)
7676+ .tier = Some(tier);
7777+ }
7878+ for (host, stat) in statuses {
7979+ hosts
8080+ .entry(host)
8181+ .or_insert_with(crate::pds_meta::HostDesc::default)
8282+ .status = stat;
8383+ }
6684 for host in &config.trusted_hosts {
6767- tiers
8585+ let entry = hosts
6886 .entry(host.clone())
6969- .or_insert_with(|| SmolStr::new("trusted"));
8787+ .or_insert_with(crate::pds_meta::HostDesc::default);
8888+ if entry.tier.is_none() {
8989+ entry.tier = Some(SmolStr::new("trusted"));
9090+ }
7091 }
71927272- let banned: HashSet<String> = crate::db::pds_meta::load_banned(&db.filter)
7373- .unwrap_or_default()
7474- .into_iter()
7575- .map(|host| host.to_string())
7676- .collect();
7777-7878- let pds_meta = new_pds_handle(PdsMeta { tiers, banned });
9393+ let pds_meta = new_pds_handle(PdsMeta { hosts });
79948095 let relay_cursors = scc::HashIndex::new();
8196
+17-11
src/util/throttle.rs
···112112 }
113113114114 /// called on hard failures (timeout, TLS error, bad gateway, etc).
115115- /// returns throttle duration in minutes if this is a *new* throttle,
116116- /// and notifies all in-flight tasks to cancel immediately.
115115+ /// always increments `consecutive_failures`. only sets a new `throttled_until`
116116+ /// (and notifies waiters) if not already throttled.
117117 pub fn record_failure(&self) -> Option<u64> {
118118- if self.is_throttled() {
119119- return None;
120120- }
121121-122118 let failures = self
123119 .state
124120 .consecutive_failures
125121 .fetch_add(1, Ordering::AcqRel)
126122 + 1;
127123128128- // 30 min, 60 min, 120 min, ... capped at ~512 hours
129129- let base_minutes = 30u64;
124124+ if self.is_throttled() {
125125+ return None;
126126+ }
127127+128128+ let base_secs = 15u64;
130129 let exponent = (failures as u32).saturating_sub(1);
131131- let minutes = base_minutes * 2u64.pow(exponent.min(10));
132132- let until = chrono::Utc::now().timestamp() + (minutes * 60) as i64;
130130+ let secs = (base_secs * 2u64.pow(exponent.min(10))).min(300);
131131+ #[cfg(debug_assertions)]
132132+ let secs = secs.min(1);
133133+134134+ let until = chrono::Utc::now().timestamp() + secs as i64;
133135134136 self.state.throttled_until.store(until, Ordering::Release);
135137 self.state.failure_notify.notify_waiters();
136138137137- Some(minutes)
139139+ Some(secs)
138140 }
139141140142 /// returns current timeout duration — 3s, 6s, or 12s depending on prior timeouts.
141143 pub fn timeout(&self) -> Duration {
142144 let n = self.state.consecutive_timeouts.load(Ordering::Acquire);
143145 Duration::from_secs(3 * 2u64.pow(n.min(2) as u32))
146146+ }
147147+148148+ pub fn consecutive_failures(&self) -> usize {
149149+ self.state.consecutive_failures.load(Ordering::Acquire) as usize
144150 }
145151146152 /// returns whether the timeout attempts are exhausted
+4-10
tests/api.nu
···272272 let rate_tiers = (http get $"($url)/pds/rate-tiers")
273273 for tier_name in ["default", "trusted"] {
274274 let tier = ($rate_tiers | get $tier_name)
275275- for field in ["per_second_base", "per_second_account_mul", "per_hour", "per_day"] {
275275+ for field in ["per_second_base", "per_second_account_mul", "per_hour", "per_day", "account_limit"] {
276276 if not ($field in $tier) {
277277 fail $"($tier_name) tier missing field ($field)" $pid
278278 }
···346346347347 # remove the first host
348348 print " DELETE /pds/tiers (first host)..."
349349- http delete -f -e -t application/json $"($url)/pds/tiers" --data {
350350- host: "pds.example.com"
351351- } | assert-status 200 "DELETE /pds/tiers" $pid
349349+ http delete -f -e $"($url)/pds/tiers?host=pds.example.com" | assert-status 200 "DELETE /pds/tiers" $pid
352350 let after_del = (http get $"($url)/pds/tiers")
353351 if ($after_del.assignments | columns | length) != 1 {
354352 fail $"expected 1 assignment after delete, got ($after_del.assignments | columns | length)" $pid
···359357 print " ok: correct host removed, other assignment intact"
360358361359 # remove the second host
362362- http delete -f -e -t application/json $"($url)/pds/tiers" --data {
363363- host: "other.example.com"
364364- } | assert-status 200 "DELETE /pds/tiers second" $pid
360360+ http delete -f -e $"($url)/pds/tiers?host=other.example.com" | assert-status 200 "DELETE /pds/tiers second" $pid
365361366362 # deleting a non-existent host is idempotent (returns 200, not an error)
367363 print " DELETE /pds/tiers (non-existent, expect 200)..."
368368- http delete -f -e -t application/json $"($url)/pds/tiers" --data {
369369- host: "pds.example.com"
370370- } | assert-status 200 "DELETE /pds/tiers non-existent" $pid
364364+ http delete -f -e $"($url)/pds/tiers?host=pds.example.com" | assert-status 200 "DELETE /pds/tiers non-existent" $pid
371365 let after_idempotent = (http get $"($url)/pds/tiers")
372366 if ($after_idempotent.assignments | columns | length) != 0 {
373367 fail "expected empty assignments after cleanup" $pid
+12
tests/mock_pds.nu
···11+export def start-mock-pds [port: int] {
22+ # kill any stale process from a previous failed run holding this port
33+ try { bash -c $"fuser -k ($port)/tcp" } catch {}
44+ sleep 100ms
55+ let log_file = (mktemp)
66+ let pid = (bash -c $"websocat -s ($port) >($log_file) 2>&1 & echo $!" | str trim | into int)
77+ { pid: $pid, log: $log_file }
88+}
99+1010+export def stop-mock-pds [handle: record] {
1111+ try { kill $handle.pid }
1212+}
+139
tests/pds_status.nu
···11+source common.nu
22+33+source mock_pds.nu
44+55+def main [] {
66+ let port = resolve-test-port 3033
77+ let url = $"http://localhost:($port)"
88+ let binary = build-hydrant
99+ let db = (mktemp -d -t hydrant_test.XXXXXX)
1010+1111+ let instance = (with-env {
1212+ HYDRANT_RELAY_HOSTS: "",
1313+ HYDRANT_CRAWLER_URLS: "",
1414+ HYDRANT_RATE_TIERS: "custom:1/1/1/1/0"
1515+ } {
1616+ start-hydrant $binary $db $port
1717+ })
1818+ if not (wait-for-api $url) {
1919+ fail "hydrant did not start" $instance.pid
2020+ }
2121+2222+ let mock_port = resolve-test-mock-port 9999
2323+ let mock_host = "127.0.0.1"
2424+2525+ # kill any stale listener on the mock port from a previous failed run
2626+ try { bash -c $"fuser -k ($mock_port)/tcp" } catch {}
2727+ sleep 100ms
2828+2929+ print "adding offline mock pds via firehose sources..."
3030+ http post -t application/json $"($url)/firehose/sources" {
3131+ url: $"ws://($mock_host):($mock_port)/",
3232+ is_pds: true
3333+ }
3434+3535+ print "checking status transitions to Offline..."
3636+ mut offline = false
3737+3838+ # the throttle backoff will cap at 1 second in debug builds.
3939+ # it takes 4 consecutive failures to mark as offline.
4040+ # therefore, 4 * 1 = ~4 seconds maximum for transition.
4141+ for i in 1..20 {
4242+ let res = (http get -fe $"($url)/xrpc/com.atproto.sync.getHostStatus?hostname=($mock_host)")
4343+ if $res.status == 200 {
4444+ if $res.body.status == "offline" {
4545+ $offline = true
4646+ break
4747+ }
4848+ if $res.body.status == "active" {
4949+ print $" ... currently ($res.body.status), waiting for offline"
5050+ }
5151+ } else {
5252+ print $" ... could not get status, waiting: ($res.status)"
5353+ }
5454+ sleep 2sec
5555+ }
5656+5757+ if not $offline {
5858+ fail "host did not transition to offline within time limit" $instance.pid
5959+ }
6060+ print "ok: host transitioned to offline successfully."
6161+6262+ print "starting mock pds websocket server..."
6363+ let mock_pds_handle = (start-mock-pds $mock_port)
6464+6565+ print "checking status transitions back to Active..."
6666+ mut active = false
6767+6868+ # now wait for it to successfully reconnect and the active_sleep of 1s to pass.
6969+ for i in 1..20 {
7070+ let res = (http get -fe $"($url)/xrpc/com.atproto.sync.getHostStatus?hostname=($mock_host)")
7171+ if $res.status == 200 {
7272+ if $res.body.status == "active" {
7373+ $active = true
7474+ break
7575+ }
7676+ if $res.body.status == "offline" {
7777+ print $" ... currently ($res.body.status), waiting for active"
7878+ }
7979+ } else {
8080+ print $" ... could not get status, waiting: ($res.status)"
8181+ }
8282+ sleep 2sec
8383+ }
8484+8585+ if $active {
8686+ print "ok: host transitioned to active successfully."
8787+ } else {
8888+ stop-mock-pds $mock_pds_handle
8989+ try { kill $instance.pid }
9090+ fail "host did not transition to active within time limit"
9191+ }
9292+9393+ print "checking status transitions to Throttled..."
9494+ let put_res = (http put -fe -t application/json $"($url)/pds/tiers" {
9595+ host: $mock_host,
9696+ tier: "custom"
9797+ })
9898+ if $put_res.status != 200 {
9999+ print $"PUT /pds/tiers failed with status ($put_res.status)"
100100+ print $put_res.body
101101+ stop-mock-pds $mock_pds_handle
102102+ try { kill $instance.pid }
103103+ fail "failed to change tier"
104104+ }
105105+106106+ # since we updated the tier via API, the status should change immediately
107107+ mut throttled = false
108108+ let res = (http get -fe $"($url)/xrpc/com.atproto.sync.getHostStatus?hostname=($mock_host)")
109109+ if $res.status == 200 and $res.body.status == "throttled" {
110110+ $throttled = true
111111+ }
112112+113113+ if not $throttled {
114114+ stop-mock-pds $mock_pds_handle
115115+ try { kill $instance.pid }
116116+ fail "host did not transition to throttled after tier update"
117117+ }
118118+ print "ok: host transitioned to throttled successfully."
119119+120120+ print "checking status transitions back to Active when limits loosen..."
121121+ http delete -fe $"($url)/pds/tiers?host=($mock_host)"
122122+123123+ # should change back immediately
124124+ mut re_active = false
125125+ let res = (http get -fe $"($url)/xrpc/com.atproto.sync.getHostStatus?hostname=($mock_host)")
126126+ if $res.status == 200 and $res.body.status == "active" {
127127+ $re_active = true
128128+ }
129129+130130+ stop-mock-pds $mock_pds_handle
131131+ try { kill $instance.pid }
132132+133133+ if $re_active {
134134+ print "ok: host transitioned back to active successfully."
135135+ exit 0
136136+ } else {
137137+ fail "host did not transition back to active after tier removed"
138138+ }
139139+}