···1616//!
1717//! Reconnection is handled here with exponential backoff (1 s … 64 s). A
1818//! graceful server close (`StreamErrorKind::Closed`) skips the backoff and
1919-//! reconnects immediately. The backoff counter resets to 1 s after any
2020-//! successfully processed event.
1919+//! reconnects immediately. Application-level decode errors (`Decode`,
2020+//! `WrongMessageFormat`) leave the WebSocket connection alive; those are
2121+//! logged and counted but do not trigger a reconnect. The backoff counter
2222+//! resets to 1 s after any successfully processed event.
21232224mod account_event;
2325pub(crate) mod commit_event;
···8587 // and the watermark doesn't regress.
8688 let mut dispatcher =
8789 CommitDispatcher::new(self.resolver.clone(), self.db.clone(), self.max_workers);
8888- // When Some, use this cursor on the next reconnect instead of loading
8989- // from storage — used to step past an un-decodable frame one position
9090- // at a time. Consumed by `take()` each iteration; set again only on a
9191- // fresh Decode error.
9292- let mut skip_cursor: Option<u64> = None;
9390 let mut last_seq: i64 = 0;
9491 let mut cursor_tick = Instant::now();
9592···9895 return Ok(());
9996 }
10097101101- // On a decode error we skip forward from wherever we connected
102102- // last; otherwise reload from storage so we pick up the latest
103103- // persisted watermark.
104104- let connect_cursor: Option<u64> = if let Some(skip) = skip_cursor.take() {
105105- Some(skip)
106106- } else {
9898+ let connect_cursor: Option<u64> = {
10799 let db = self.db.clone();
108100 let host = self.host.clone();
109101 tokio::task::spawn_blocking(move || storage::firehose_cursor::get(&db, &host))
···182174 continue 'reconnect;
183175 }
184176 Event::Stream(Some(Err(e))) => {
185185- let delay = match e.kind() {
186186- StreamErrorKind::Closed => 0,
187187- StreamErrorKind::Decode => {
188188- // Step past the un-decodable frame. We don't
189189- // know its seq; use whichever high-water mark
190190- // is furthest ahead — last_seq if we processed
191191- // events in this session, connect_cursor if the
192192- // error hit before any good events arrived.
193193- let hwm = connect_cursor.unwrap_or(0).max(last_seq as u64);
194194- skip_cursor = Some(hwm + 1);
195195- 0
177177+ match e.kind() {
178178+ // Application-level decode failure on a healthy
179179+ // WebSocket connection: jacquard's filter_map stream
180180+ // continues yielding after this error, so just log,
181181+ // count, and continue reading.
182182+ StreamErrorKind::Decode | StreamErrorKind::WrongMessageFormat => {
183183+ metrics::counter!("lightrail_firehose_decode_errors_total")
184184+ .increment(1);
185185+ warn!(
186186+ error = %e,
187187+ host = %self.host,
188188+ "firehose message decode error; skipping frame",
189189+ );
190190+ // No reconnect — the WebSocket connection is
191191+ // still alive.
192192+ }
193193+ // Graceful server close: reconnect immediately.
194194+ StreamErrorKind::Closed => {
195195+ info!(host = %self.host, "firehose stream closed; reconnecting");
196196+ continue 'reconnect;
196197 }
197197- _ => backoff_secs,
198198- };
199199- warn!(
200200- error = %e,
201201- host = %self.host,
202202- delay_secs = delay,
203203- skip_cursor,
204204- "firehose stream error; will reconnect",
205205- );
206206- if delay > 0 {
207207- tokio::select! {
208208- _ = token.cancelled() => return Ok(()),
209209- _ = tokio::time::sleep(Duration::from_secs(delay)) => {}
198198+ // Transport / protocol error: the connection is
199199+ // broken; reconnect with backoff.
200200+ _ => {
201201+ warn!(
202202+ error = %e,
203203+ host = %self.host,
204204+ backoff_secs,
205205+ "firehose stream error; will reconnect",
206206+ );
207207+ tokio::select! {
208208+ _ = token.cancelled() => return Ok(()),
209209+ _ = tokio::time::sleep(
210210+ Duration::from_secs(backoff_secs)
211211+ ) => {}
212212+ }
213213+ backoff_secs = (backoff_secs * 2).min(MAX_BACKOFF_SECS);
214214+ continue 'reconnect;
210215 }
211211- backoff_secs = (backoff_secs * 2).min(MAX_BACKOFF_SECS);
212216 }
213213- continue 'reconnect;
214217 }
215218 Event::Stream(Some(Ok(msg))) => {
216219 backoff_secs = 1;