···9595 - [x] rachet by PDS host: be lenient if we have never seen a sync1.1-looking commit, always strict after we see one.
9696 - [?] boooo we might need more handling for pre-sync1.1 repos if they don't include adjacent keys
9797- [x] split the keyspace: put the rbc/cbr indexes on a second keyspace with larger block size, expect hits on main keyspace
9898-- [ ] firehose websocket
9898+- [x] firehose websocket
9999 - [-] ~~ping/pong (unless jacquard is already doing it):~~ seems like no but we can skip it
100100- - [ ] no-events-received timeout reconnect
100100+ - [x] no-events-received timeout reconnect
101101- [ ] resync short-circuit: tiny repos may actually return their entire CAR for getRecord
102102- [ ] commit CAR handling: generate a list of keys with gaps noted, to reliably detect missing adjacent keys
103103- [ ] account status convergeance: if we receive commits from apparently-inactive accounts, should we check upstream status to make sure we're not stale?
+17
src/sync/firehose/mod.rs
···45454646/// Maximum reconnect delay.
4747const MAX_BACKOFF_SECS: u64 = 64;
4848+/// Reconnect if no message is received for this long.
4949+const IDLE_TIMEOUT: Duration = Duration::from_secs(180);
48504951/// Manages a single logical connection to a relay firehose, with reconnection.
5052pub struct Subscriber {
···138140139141 // `_sink` kept alive so the write half of the WebSocket isn't dropped.
140142 let (_sink, mut messages) = stream.into_stream();
143143+ let mut last_message_at = Instant::now();
141144142145 loop {
143146 // Unified select! driving both the WebSocket stream and worker
···159162 >,
160163 >,
161164 ),
165165+ Timeout,
162166 }
163167164168 let event = tokio::select! {
···166170 r = messages.next() => Event::Stream(r),
167171 r = dispatcher.workers_mut().join_next_with_id(),
168172 if dispatcher.has_workers() => Event::Worker(r),
173173+ _ = tokio::time::sleep(
174174+ IDLE_TIMEOUT.saturating_sub(last_message_at.elapsed())
175175+ ) => Event::Timeout,
169176 };
170177171178 match event {
···174181 continue 'reconnect;
175182 }
176183 Event::Stream(Some(Err(e))) => {
184184+ last_message_at = Instant::now();
177185 match e.kind() {
178186 // Application-level decode failure on a healthy
179187 // WebSocket connection: jacquard's filter_map stream
···213221 }
214222 }
215223 Event::Stream(Some(Ok(msg))) => {
224224+ last_message_at = Instant::now();
216225 backoff_secs = 1;
217226 if let Some(seq) = self.dispatch(msg, &mut dispatcher) {
218227 last_seq = seq;
···225234 dispatcher.handle_completion(join_err.id(), Err(join_err));
226235 }
227236 Event::Worker(None) => {} // JoinSet drained (shouldn't happen with guard)
237237+ Event::Timeout => {
238238+ warn!(
239239+ host = %self.host,
240240+ timeout_secs = IDLE_TIMEOUT.as_secs(),
241241+ "firehose idle timeout; reconnecting",
242242+ );
243243+ continue 'reconnect;
244244+ }
228245 }
229246230247 if cursor_tick.elapsed() >= self.cursor_save_interval {