···99//! resync pipeline resolves DIDs to PDS endpoints before fetching, workers
1010//! will naturally be rate-limited against the correct PDS host.
11111212+use std::collections::HashMap;
1213use std::num::NonZeroU32;
1313-use std::sync::Arc;
1414+use std::sync::{
1515+ Arc,
1616+ atomic::{AtomicU64, Ordering},
1717+};
1418use std::time::Duration;
15191616-use dashmap::DashMap;
1717-use governor::{DefaultDirectRateLimiter, Quota, RateLimiter};
2020+use dashmap::{DashMap, DashSet};
2121+use governor::{
2222+ DefaultDirectRateLimiter, Quota, RateLimiter,
2323+ clock::{Clock, QuantaClock},
2424+};
1825use jacquard_common::http_client::{HttpClient, HttpClientExt};
1926use jacquard_common::stream::{ByteStream, StreamError};
2727+use tracing::warn;
20282121-const THROTTLE_JITTER_MS: f32 = 16.;
2929+const THROTTLE_JITTER_MS: f32 = 16.0;
3030+3131+struct HostLimiting {
3232+ limiter: DefaultDirectRateLimiter,
3333+ limiting: DashSet<(u64, String)>,
3434+}
3535+3636+impl HostLimiting {
3737+ fn new(quota: Quota) -> Self {
3838+ Self {
3939+ limiter: RateLimiter::direct(quota),
4040+ limiting: DashSet::new(),
4141+ }
4242+ }
4343+}
22442345/// State shared across all clones of a [`ThrottledClient`].
2424-struct Shared {
2525- /// Duration of one token at the configured rate (= 1s / rate).
2626- token_interval: Duration,
4646+struct Limiters {
4747+ current_id: AtomicU64,
2748 /// One GCRA limiter per hostname. Entries are never evicted; the number
2849 /// of distinct hosts contacted is small enough to be unbounded in memory.
2929- limiters: DashMap<String, Arc<DefaultDirectRateLimiter>>,
5050+ limiters: DashMap<String, Arc<HostLimiting>>,
3051 /// Per-host quota, kept for creating new limiters on demand.
3152 quota: Quota,
3253}
33543434-impl Shared {
3535- fn get_or_create_limiter(&self, host: &str) -> Arc<DefaultDirectRateLimiter> {
3636- Arc::clone(
5555+impl Limiters {
5656+ fn get_or_create_limiter(&self, host: &str) -> (u64, Arc<HostLimiting>) {
5757+ let id = self.current_id.fetch_add(1, Ordering::Relaxed); // just needs to be different each time
5858+ let limiting = Arc::clone(
3759 self.limiters
3860 .entry(host.to_string())
3939- .or_insert_with(|| Arc::new(RateLimiter::direct(self.quota)))
6161+ .or_insert_with(|| Arc::new(HostLimiting::new(self.quota)))
4062 .value(),
4141- )
6363+ );
6464+ (id, limiting)
4265 }
43664444- fn jittered_interval(&self) -> Duration {
4545- let secs = fastrand::f32() * THROTTLE_JITTER_MS / 1000.0;
4646- self.token_interval + Duration::from_secs_f32(secs)
6767+ async fn limit(&self, host: &str, path: &str) {
6868+ let (id, limiting) = self.get_or_create_limiter(host);
6969+ let mut throttled = false;
7070+ while let Err(not_until) = limiting.limiter.check() {
7171+ if !throttled {
7272+ throttled = true;
7373+ metrics::gauge!("lightrail_http_host_throttling").increment(1);
7474+ limiting.limiting.insert((id, path.to_string()));
7575+ }
7676+ let min_wait = not_until.wait_time_from(QuantaClock::default().now());
7777+ let jitter = Duration::from_millis((fastrand::f32() * THROTTLE_JITTER_MS) as u64);
7878+ let wait_total = min_wait + jitter;
7979+ metrics::counter!("lightrail_http_throttle_time_total_ms")
8080+ .increment(wait_total.as_millis() as u64);
8181+ tokio::time::sleep(wait_total).await;
8282+ }
8383+ if throttled {
8484+ metrics::gauge!("lightrail_http_host_throttling").decrement(1);
8585+ limiting.limiting.remove(&(id, path.to_string()));
8686+ }
4787 }
4888}
4989···5696#[derive(Clone)]
5797pub struct ThrottledClient {
5898 inner: reqwest::Client,
5959- shared: Arc<Shared>,
9999+ limiters: Arc<Limiters>,
60100}
6110162102impl ThrottledClient {
···72112 let quota = Quota::per_second(rate_per_second);
73113 Self {
74114 inner,
7575- shared: Arc::new(Shared {
7676- token_interval: Duration::from_secs(1) / rate_per_second.get(),
115115+ limiters: Arc::new(Limiters {
116116+ current_id: AtomicU64::new(0),
77117 limiters: DashMap::new(),
78118 quota,
79119 }),
···81121 }
82122}
83123124124+impl ThrottledClient {
125125+ /// Return the set of all PDS hostnames that have rate limiters.
126126+ pub fn currently_limiting(&self) -> HashMap<String, HashMap<String, u64>> {
127127+ self.limiters
128128+ .limiters
129129+ .iter()
130130+ .map(|entry| {
131131+ let (k, v) = entry.pair();
132132+ let path_counts = v.limiting.iter().map(|entry| entry.key().1.clone()).fold(
133133+ HashMap::new(),
134134+ |mut acc: std::collections::HashMap<_, u64>, path| {
135135+ *acc.entry(path.to_string()).or_default() += 1;
136136+ acc
137137+ },
138138+ );
139139+ (k.to_string(), path_counts)
140140+ })
141141+ .filter(|(_, v)| !v.is_empty())
142142+ .collect()
143143+ }
144144+}
145145+84146/// Build the shared HTTP client used for all outbound ATProto requests.
85147pub fn build_client(rate_per_sec: NonZeroU32) -> ThrottledClient {
86148 ThrottledClient::new(rate_per_sec)
···96158 let (parts, body) = request.into_parts();
9715998160 if let Some(host) = parts.uri.host() {
9999- let limiter = self.shared.get_or_create_limiter(host);
100100- while limiter.check().is_err() {
101101- metrics::gauge!("lightrail_http_host_throttling").increment(1);
102102- // i think we should be limiter.until_ready_with_jitter().await!
103103- tokio::time::sleep(self.shared.jittered_interval()).await;
104104- metrics::gauge!("lightrail_http_host_throttling").decrement(1);
105105- }
161161+ self.limiters.limit(host, parts.uri.path()).await;
162162+ } else {
163163+ warn!(uri = %parts.uri, "failed to get host for rate limiting");
106164 }
107165108166 let mut req = self
···133191 request: http::Request<Vec<u8>>,
134192 ) -> Result<http::Response<ByteStream>, Self::Error> {
135193 let (parts, body) = request.into_parts();
194194+136195 if let Some(host) = parts.uri.host() {
137137- let limiter = self.shared.get_or_create_limiter(host);
138138- while limiter.check().is_err() {
139139- metrics::gauge!("lightrail_http_host_throttling").increment(1);
140140- tokio::time::sleep(self.shared.jittered_interval()).await;
141141- metrics::gauge!("lightrail_http_host_throttling").decrement(1);
142142- }
196196+ self.limiters.limit(host, parts.uri.path()).await;
197197+ } else {
198198+ warn!(uri = %parts.uri, "failed to get host for rate limiting");
143199 }
200200+144201 metrics::gauge!("lightrail_http_requests_in_flight").increment(1);
145202 // decremented in get_repo (sketttch)
146203 self.inner
···158215 S: n0_future::Stream<Item = Result<bytes::Bytes, StreamError>> + Send + 'static,
159216 {
160217 if let Some(host) = parts.uri.host() {
161161- let limiter = self.shared.get_or_create_limiter(host);
162162- while limiter.check().is_err() {
163163- metrics::gauge!("lightrail_http_host_throttling").increment(1);
164164- tokio::time::sleep(self.shared.jittered_interval()).await;
165165- metrics::gauge!("lightrail_http_host_throttling").decrement(1);
166166- }
218218+ self.limiters.limit(host, parts.uri.path()).await;
219219+ } else {
220220+ warn!(uri = %parts.uri, "failed to get host for rate limiting");
167221 }
222222+168223 metrics::gauge!("lightrail_http_requests_in_flight").increment(1);
169224 // decremented in get_repo (sketttch)
170225 self.inner.send_http_bidirectional(parts, body).await
+6
src/identity.rs
···205205 self.cache.insert(did, pds, pubkey);
206206 }
207207208208+ /// Look up `did` in the cache without making network calls.
209209+ /// Returns `None` on cache miss.
210210+ pub fn resolve_cached(&self, did: &Did<'_>) -> Option<Arc<CachedIdentity>> {
211211+ self.cache.get(did)
212212+ }
213213+208214 /// Evict `did` from the identity cache.
209215 ///
210216 /// Called when a `#identity` firehose event is received, after all