Stitch any CI into Tangled
83
fork

Configure Feed

Select the types of activity you want to include in your feed.

events fan out

+685 -17
+137
events.go
··· 1 + package main 2 + 3 + // In-process event broker for the /events websocket fan-out. 4 + // 5 + // Lifecycle of an outbound event: 6 + // 7 + // publisher (e.g. Buildkite webhook handler) 8 + // │ 9 + // ▼ 10 + // broker.Publish ─── store.InsertEvent ──▶ events table (rowid = cursor) 11 + // │ 12 + // └── notify() ──▶ each subscriber's signal channel 13 + // │ 14 + // ▼ 15 + // /events handler wakes, 16 + // calls store.EventsAfter(cursor), 17 + // writes envelope JSON to its websocket. 18 + // 19 + // Subscribers don't receive events through the channel directly — only 20 + // a "wake up" signal. They re-read from the store using the cursor they 21 + // last delivered. This means: 22 + // 23 + // - slow clients can't make us drop events (they just lag behind in 24 + // rowid space and catch up on their own pace), 25 + // - reconnecting clients can resume by passing ?cursor=N, hitting the 26 + // same EventsAfter path used for live deliveries, 27 + // - we never have to bound a per-subscriber buffer. 28 + // 29 + // This mirrors the upstream Tangled spindle's notifier+stream design 30 + // (see tangled.org/core/spindle/stream.go), which is the source of 31 + // truth for the wire format on /events. 32 + 33 + import ( 34 + "context" 35 + "encoding/json" 36 + "sync" 37 + ) 38 + 39 + // eventsEnvelope is the wire shape we emit on /events frames. It must 40 + // match the upstream Tangled spindle byte-for-byte so the appview's 41 + // eventconsumer treats us as a drop-in source. 42 + // 43 + // Upstream defines this shape in two places that don't quite agree: 44 + // 45 + // - The producer (tangled.org/core/spindle/stream.go, streamPipelines) 46 + // marshals an inline map[string]any with lowercase keys. 47 + // - The consumer (tangled.org/core/eventconsumer.Message) is exported 48 + // but its Rkey/Nsid fields are missing JSON tags, so reusing it for 49 + // marshalling here would emit "Rkey"/"Nsid" — accepted on read only 50 + // because Go's json package matches field names case-insensitively. 51 + // 52 + // Defining our own struct keeps the wire output identical to the 53 + // upstream producer and lets every site that emits an event (the 54 + // /events handler today, future re-publishers tomorrow) share one 55 + // canonical type. 56 + // 57 + // Event is held as RawMessage so callers can splice a stored record 58 + // body straight in without an unmarshal/remarshal round-trip. 59 + type eventsEnvelope struct { 60 + Rkey string `json:"rkey"` 61 + Nsid string `json:"nsid"` 62 + Event json.RawMessage `json:"event"` 63 + Created int64 `json:"created"` 64 + } 65 + 66 + // broker fans out event-table writes to connected /events subscribers. 67 + // Construct with newBroker; safe for concurrent use. 68 + type broker struct { 69 + st *store 70 + 71 + mu sync.Mutex 72 + subs map[chan struct{}]struct{} 73 + } 74 + 75 + // newBroker returns a broker bound to st. The store is used both for 76 + // durable writes in Publish and for cursor-based reads in EventsAfter 77 + // from the /events handler. 78 + func newBroker(st *store) *broker { 79 + return &broker{ 80 + st: st, 81 + subs: make(map[chan struct{}]struct{}), 82 + } 83 + } 84 + 85 + // Subscribe registers a new subscriber and returns its signal channel. 86 + // The channel is buffered with a capacity of 1: notify() does a 87 + // non-blocking send, so a pending notification simply coalesces with 88 + // the next one rather than blocking the publisher. Subscribers must 89 + // call Unsubscribe when done to free the slot. 90 + func (b *broker) Subscribe() chan struct{} { 91 + ch := make(chan struct{}, 1) 92 + b.mu.Lock() 93 + b.subs[ch] = struct{}{} 94 + b.mu.Unlock() 95 + return ch 96 + } 97 + 98 + // Unsubscribe removes ch from the broker. Safe to call with a channel 99 + // that was never subscribed (no-op) or to call more than once. 100 + func (b *broker) Unsubscribe(ch chan struct{}) { 101 + b.mu.Lock() 102 + delete(b.subs, ch) 103 + b.mu.Unlock() 104 + } 105 + 106 + // Publish persists an event and wakes every subscriber. The returned 107 + // int64 is the assigned cursor (rowid) for the new row, useful in tests 108 + // and for any caller that wants to log "published as cursor=N". 109 + // 110 + // eventJSON is the record body — typically a marshalled 111 + // tangled.PipelineStatus. The caller is responsible for choosing rkey 112 + // (atproto record key) and nsid (collection NSID). 113 + func (b *broker) Publish(ctx context.Context, rkey, nsid string, eventJSON []byte) (int64, error) { 114 + created, err := b.st.InsertEvent(ctx, rkey, nsid, eventJSON) 115 + if err != nil { 116 + return 0, err 117 + } 118 + b.notify() 119 + return created, nil 120 + } 121 + 122 + // notify sends a non-blocking signal to every subscriber. Held lock 123 + // covers iteration only — the send itself is O(1) and never blocks 124 + // because of the buffered channel + default case. 125 + func (b *broker) notify() { 126 + b.mu.Lock() 127 + defer b.mu.Unlock() 128 + for ch := range b.subs { 129 + select { 130 + case ch <- struct{}{}: 131 + default: 132 + // A previous signal hasn't been drained yet; coalesce — 133 + // the subscriber will catch up on the next read since it 134 + // queries by cursor, not by message count. 135 + } 136 + } 137 + }
+335
events_test.go
··· 1 + package main 2 + 3 + // Tests for the /events fan-out: the store's event log methods, the 4 + // in-process broker, and the eventsHandler's wire output. The handler 5 + // test boots a real httptest server and a real gorilla websocket client 6 + // so we exercise the upgrade + envelope codec end to end. 7 + 8 + import ( 9 + "context" 10 + "encoding/json" 11 + "io" 12 + "log/slog" 13 + "net/http" 14 + "net/http/httptest" 15 + "net/url" 16 + "strconv" 17 + "strings" 18 + "testing" 19 + "time" 20 + 21 + "github.com/gorilla/websocket" 22 + ) 23 + 24 + // TestEventsLogRoundtrip covers InsertEvent / EventsAfter together 25 + // because they're a tightly-coupled pair: the cursor returned by Insert 26 + // is the same value EventsAfter must accept to skip past that row. 27 + func TestEventsLogRoundtrip(t *testing.T) { 28 + s := newTestStore(t) 29 + ctx := context.Background() 30 + 31 + // Empty log → empty slice (not nil), so callers can range freely. 32 + got, err := s.EventsAfter(ctx, 0) 33 + if err != nil { 34 + t.Fatalf("EventsAfter empty: %v", err) 35 + } 36 + if got == nil || len(got) != 0 { 37 + t.Fatalf("empty log: got %v, want empty non-nil slice", got) 38 + } 39 + 40 + c1, err := s.InsertEvent(ctx, "rk1", "sh.tangled.pipeline.status", []byte(`{"a":1}`)) 41 + if err != nil { 42 + t.Fatalf("insert 1: %v", err) 43 + } 44 + c2, err := s.InsertEvent(ctx, "rk2", "sh.tangled.pipeline.status", []byte(`{"a":2}`)) 45 + if err != nil { 46 + t.Fatalf("insert 2: %v", err) 47 + } 48 + if c2 <= c1 { 49 + t.Fatalf("cursors must be monotonically increasing: c1=%d c2=%d", c1, c2) 50 + } 51 + 52 + // cursor=0 returns everything; cursor=c1 skips the first row. 53 + got, err = s.EventsAfter(ctx, 0) 54 + if err != nil { 55 + t.Fatalf("EventsAfter 0: %v", err) 56 + } 57 + if len(got) != 2 || got[0].Created != c1 || got[1].Created != c2 { 58 + t.Fatalf("EventsAfter(0) = %+v, want both rows in order", got) 59 + } 60 + if got[0].Rkey != "rk1" || got[1].Rkey != "rk2" { 61 + t.Fatalf("rkey order wrong: %q %q", got[0].Rkey, got[1].Rkey) 62 + } 63 + // json.RawMessage round-trip — matters because the /events handler 64 + // splices these straight into the envelope. 65 + if string(got[0].EventJSON) != `{"a":1}` { 66 + t.Fatalf("event_json round-trip = %q", got[0].EventJSON) 67 + } 68 + 69 + got, err = s.EventsAfter(ctx, c1) 70 + if err != nil { 71 + t.Fatalf("EventsAfter c1: %v", err) 72 + } 73 + if len(got) != 1 || got[0].Created != c2 { 74 + t.Fatalf("EventsAfter(c1) = %+v, want only row c2", got) 75 + } 76 + } 77 + 78 + // TestBrokerPublishWakesSubscribers asserts the core invariant: a 79 + // Publish causes Subscribe()'d channels to fire (at least once) and the 80 + // row is durably visible via EventsAfter so the subscriber can drain 81 + // it. Two subscribers cover the multi-fanout case. 82 + func TestBrokerPublishWakesSubscribers(t *testing.T) { 83 + s := newTestStore(t) 84 + ctx := context.Background() 85 + br := newBroker(s) 86 + 87 + a := br.Subscribe() 88 + b := br.Subscribe() 89 + defer br.Unsubscribe(a) 90 + defer br.Unsubscribe(b) 91 + 92 + cursor, err := br.Publish(ctx, "rk", "sh.tangled.pipeline.status", []byte(`{}`)) 93 + if err != nil { 94 + t.Fatalf("publish: %v", err) 95 + } 96 + if cursor <= 0 { 97 + t.Fatalf("publish cursor = %d, want > 0", cursor) 98 + } 99 + 100 + // Both subscribers must receive the wake-up promptly. Use a 101 + // generous timeout so flaky CI doesn't false-alarm. 102 + for name, ch := range map[string]chan struct{}{"a": a, "b": b} { 103 + select { 104 + case <-ch: 105 + case <-time.After(time.Second): 106 + t.Fatalf("subscriber %s did not receive signal", name) 107 + } 108 + } 109 + 110 + rows, err := s.EventsAfter(ctx, 0) 111 + if err != nil { 112 + t.Fatalf("EventsAfter: %v", err) 113 + } 114 + if len(rows) != 1 || rows[0].Created != cursor { 115 + t.Fatalf("after publish, EventsAfter(0) = %+v, want one row with cursor=%d", rows, cursor) 116 + } 117 + } 118 + 119 + // TestBrokerCoalescesPendingSignal ensures Publish never blocks on a 120 + // subscriber that hasn't drained its channel: a second Publish while 121 + // the first signal is still pending must coalesce, not deadlock. This 122 + // is the property that lets slow clients lag without backpressuring 123 + // the rest of the system. 124 + func TestBrokerCoalescesPendingSignal(t *testing.T) { 125 + s := newTestStore(t) 126 + ctx := context.Background() 127 + br := newBroker(s) 128 + 129 + ch := br.Subscribe() 130 + defer br.Unsubscribe(ch) 131 + 132 + // First publish lands a pending signal in ch (cap=1). 133 + if _, err := br.Publish(ctx, "rk1", "n", []byte(`{}`)); err != nil { 134 + t.Fatalf("publish 1: %v", err) 135 + } 136 + // Second publish must succeed immediately — without a default 137 + // branch in notify(), this would block forever waiting on the 138 + // unread ch. 139 + done := make(chan error, 1) 140 + go func() { 141 + _, err := br.Publish(ctx, "rk2", "n", []byte(`{}`)) 142 + done <- err 143 + }() 144 + select { 145 + case err := <-done: 146 + if err != nil { 147 + t.Fatalf("publish 2: %v", err) 148 + } 149 + case <-time.After(time.Second): 150 + t.Fatal("Publish blocked on un-drained subscriber") 151 + } 152 + 153 + // One drain is enough — cursor-based catch-up will pick up *both* 154 + // rows in a single EventsAfter call. 155 + <-ch 156 + rows, err := s.EventsAfter(ctx, 0) 157 + if err != nil { 158 + t.Fatalf("EventsAfter: %v", err) 159 + } 160 + if len(rows) != 2 { 161 + t.Fatalf("expected 2 rows after 2 publishes, got %d", len(rows)) 162 + } 163 + } 164 + 165 + // TestBrokerUnsubscribeStopsDelivery confirms an unsubscribed channel 166 + // no longer receives wake-ups. Without this we'd leak signals to dead 167 + // websockets and (worse) hold their channels in the broker map. 168 + func TestBrokerUnsubscribeStopsDelivery(t *testing.T) { 169 + s := newTestStore(t) 170 + ctx := context.Background() 171 + br := newBroker(s) 172 + 173 + ch := br.Subscribe() 174 + br.Unsubscribe(ch) 175 + 176 + if _, err := br.Publish(ctx, "rk", "n", []byte(`{}`)); err != nil { 177 + t.Fatalf("publish: %v", err) 178 + } 179 + select { 180 + case <-ch: 181 + t.Fatal("unsubscribed channel still received signal") 182 + case <-time.After(50 * time.Millisecond): 183 + } 184 + } 185 + 186 + // TestEventsHandlerStreamsLiveAndBackfill exercises the full HTTP 187 + // surface: open a websocket, observe a backfill of pre-existing rows, 188 + // publish a new row, observe it arrive live, then reconnect with a 189 + // cursor and observe only events strictly after that cursor. 190 + func TestEventsHandlerStreamsLiveAndBackfill(t *testing.T) { 191 + s := newTestStore(t) 192 + br := newBroker(s) 193 + ctx := context.Background() 194 + 195 + // Seed two rows so the first connection has something to backfill. 196 + pre1, err := br.Publish(ctx, "rk-pre1", "sh.tangled.pipeline.status", []byte(`{"i":1}`)) 197 + if err != nil { 198 + t.Fatalf("seed 1: %v", err) 199 + } 200 + pre2, err := br.Publish(ctx, "rk-pre2", "sh.tangled.pipeline.status", []byte(`{"i":2}`)) 201 + if err != nil { 202 + t.Fatalf("seed 2: %v", err) 203 + } 204 + 205 + // Boot the handler behind an httptest server. Using a discarding 206 + // logger keeps test output quiet. 207 + logger := slog.New(slog.NewTextHandler(io.Discard, nil)) 208 + srv := httptest.NewServer(eventsHandler(logger, br)) 209 + t.Cleanup(srv.Close) 210 + 211 + // First connection: no cursor, expect both seeded rows plus a 212 + // freshly published live row. 213 + c1 := dialEvents(t, srv.URL, 0) 214 + defer c1.Close() 215 + 216 + got1 := readEnvelope(t, c1) 217 + if got1.Created != pre1 || got1.Rkey != "rk-pre1" { 218 + t.Fatalf("first frame = %+v, want pre1 (cursor=%d)", got1, pre1) 219 + } 220 + got2 := readEnvelope(t, c1) 221 + if got2.Created != pre2 || got2.Rkey != "rk-pre2" { 222 + t.Fatalf("second frame = %+v, want pre2 (cursor=%d)", got2, pre2) 223 + } 224 + // Verify the wire envelope's `event` field round-trips as the raw 225 + // record body, not a re-encoded blob. 226 + if strings.TrimSpace(string(got2.Event)) != `{"i":2}` { 227 + t.Fatalf("event body = %q, want %q", got2.Event, `{"i":2}`) 228 + } 229 + 230 + // Live publish — handler should wake on broker signal and emit it. 231 + live, err := br.Publish(ctx, "rk-live", "sh.tangled.pipeline.status", []byte(`{"i":3}`)) 232 + if err != nil { 233 + t.Fatalf("live publish: %v", err) 234 + } 235 + got3 := readEnvelope(t, c1) 236 + if got3.Created != live || got3.Rkey != "rk-live" { 237 + t.Fatalf("live frame = %+v, want rk-live (cursor=%d)", got3, live) 238 + } 239 + 240 + // Second connection with cursor=pre2: must skip pre1 and pre2, 241 + // receive only the live row. No timeout is set; if the handler 242 + // over-delivers we'll fail in readEnvelope's assert below. 243 + c2 := dialEvents(t, srv.URL, pre2) 244 + defer c2.Close() 245 + got := readEnvelope(t, c2) 246 + if got.Created != live || got.Rkey != "rk-live" { 247 + t.Fatalf("cursor-resume frame = %+v, want rk-live (cursor=%d)", got, live) 248 + } 249 + } 250 + 251 + // TestEventsHandlerBadCursorStartsFromZero confirms a malformed cursor 252 + // query parameter doesn't 4xx the upgrade — the handler logs and falls 253 + // back to the full backfill, matching the upstream spindle's behaviour. 254 + func TestEventsHandlerBadCursorStartsFromZero(t *testing.T) { 255 + s := newTestStore(t) 256 + br := newBroker(s) 257 + ctx := context.Background() 258 + 259 + if _, err := br.Publish(ctx, "rk", "sh.tangled.pipeline.status", []byte(`{}`)); err != nil { 260 + t.Fatalf("publish: %v", err) 261 + } 262 + 263 + logger := slog.New(slog.NewTextHandler(io.Discard, nil)) 264 + srv := httptest.NewServer(eventsHandler(logger, br)) 265 + t.Cleanup(srv.Close) 266 + 267 + // Build the URL by hand so we can inject a non-numeric cursor. 268 + u, _ := url.Parse(srv.URL) 269 + u.Scheme = "ws" 270 + q := u.Query() 271 + q.Set("cursor", "not-a-number") 272 + u.RawQuery = q.Encode() 273 + 274 + conn, _, err := websocket.DefaultDialer.Dial(u.String(), nil) 275 + if err != nil { 276 + t.Fatalf("dial: %v", err) 277 + } 278 + defer conn.Close() 279 + 280 + conn.SetReadDeadline(time.Now().Add(2 * time.Second)) 281 + _, msg, err := conn.ReadMessage() 282 + if err != nil { 283 + t.Fatalf("read: %v", err) 284 + } 285 + var env eventsEnvelope 286 + if err := json.Unmarshal(msg, &env); err != nil { 287 + t.Fatalf("unmarshal: %v", err) 288 + } 289 + if env.Rkey != "rk" { 290 + t.Fatalf("expected backfill of seeded row, got envelope %+v", env) 291 + } 292 + } 293 + 294 + // dialEvents opens a websocket against an httptest server (which 295 + // returns http://) by rewriting the scheme to ws://. cursor=0 omits the 296 + // query parameter entirely so we exercise the "no cursor" code path. 297 + func dialEvents(t *testing.T, base string, cursor int64) *websocket.Conn { 298 + t.Helper() 299 + u, err := url.Parse(base) 300 + if err != nil { 301 + t.Fatalf("parse url: %v", err) 302 + } 303 + u.Scheme = "ws" 304 + if cursor != 0 { 305 + q := u.Query() 306 + q.Set("cursor", strconv.FormatInt(cursor, 10)) 307 + u.RawQuery = q.Encode() 308 + } 309 + conn, _, err := websocket.DefaultDialer.Dial(u.String(), http.Header{}) 310 + if err != nil { 311 + t.Fatalf("dial %s: %v", u, err) 312 + } 313 + return conn 314 + } 315 + 316 + // readEnvelope reads one TextMessage frame, decodes it as the spindle 317 + // wire envelope, and returns it. It enforces a read deadline so a 318 + // handler bug that fails to flush events doesn't hang the test forever. 319 + func readEnvelope(t *testing.T, conn *websocket.Conn) eventsEnvelope { 320 + t.Helper() 321 + conn.SetReadDeadline(time.Now().Add(2 * time.Second)) 322 + mt, msg, err := conn.ReadMessage() 323 + if err != nil { 324 + t.Fatalf("read: %v", err) 325 + } 326 + if mt != websocket.TextMessage { 327 + t.Fatalf("frame type = %d, want TextMessage", mt) 328 + } 329 + var env eventsEnvelope 330 + if err := json.Unmarshal(msg, &env); err != nil { 331 + t.Fatalf("decode envelope: %v (raw: %s)", err, msg) 332 + } 333 + return env 334 + } 335 +
+101 -16
http.go
··· 21 21 "fmt" 22 22 "log/slog" 23 23 "net/http" 24 + "strconv" 24 25 "time" 25 26 26 27 "github.com/gorilla/websocket" ··· 31 32 // cancelled or the listener returns a fatal error. On ctx cancellation it 32 33 // performs a graceful shutdown with a bounded timeout. 33 34 // 34 - // The logger is read from ctx via loggerFrom. 35 - func runHTTP(ctx context.Context, cfg config) error { 35 + // The logger is read from ctx via loggerFrom. The broker is the 36 + // in-process pub/sub used by /events to fan published records out to 37 + // connected websocket subscribers. 38 + func runHTTP(ctx context.Context, cfg config, br *broker) error { 36 39 logger := loggerFrom(ctx) 37 40 38 41 mux := http.NewServeMux() 39 42 mux.HandleFunc("GET /", rootHandler()) 40 - mux.HandleFunc("GET /events", eventsHandler(logger)) 43 + mux.HandleFunc("GET /events", eventsHandler(logger, br)) 41 44 mux.HandleFunc("GET /xrpc/"+tangled.OwnerNSID, ownerHandler(logger, cfg.OwnerDID)) 42 45 mux.HandleFunc("POST /webhooks/buildkite", buildkiteWebhookHandler()) 43 46 ··· 97 100 } 98 101 } 99 102 100 - // eventsHandler upgrades to a WebSocket and emits no events yet. It exists 101 - // so the Tangled appview can connect; once we wire up Buildkite webhooks 102 - // this is where sh.tangled.pipeline.status frames will be sent. 103 + // eventsHandler upgrades to a WebSocket and streams persisted records 104 + // to the connected client. The wire protocol mirrors the upstream 105 + // Tangled spindle so the appview's eventconsumer treats us as a 106 + // drop-in source: 107 + // 108 + // - Optional ?cursor=<int64> resumes after that rowid; absent or 0 109 + // means "from the beginning of our retained log". 110 + // - We do a backfill pass first (everything with created > cursor), 111 + // then loop: on each broker signal, drain new rows; on a 30s 112 + // timer, write a websocket ping so intermediaries don't idle the 113 + // connection out. 103 114 // 104 - // We send a periodic ping to keep intermediaries (load balancers, tunnels) 105 - // from idling the connection, and watch for client reads to detect a 106 - // disconnect. 107 - func eventsHandler(logger *slog.Logger) http.HandlerFunc { 115 + // We subscribe to the broker *before* the backfill so any Publish that 116 + // races between the cursor read and the loop entry is captured by the 117 + // pending channel signal — the loop will see it on its first iteration 118 + // and call streamEvents again, which is idempotent on the cursor. 119 + func eventsHandler(logger *slog.Logger, br *broker) http.HandlerFunc { 108 120 upgrader := websocket.Upgrader{ 109 121 ReadBufferSize: 1024, 110 122 WriteBufferSize: 1024, ··· 116 128 return 117 129 } 118 130 defer conn.Close() 119 - logger.Debug("events client connected", "remote", r.RemoteAddr) 131 + 132 + // Parse the resume cursor up front. An unparseable cursor is a 133 + // client bug, but rather than 4xx the upgraded connection we 134 + // log it and start from zero — same behaviour as the upstream 135 + // spindle. 136 + var cursor int64 137 + if raw := r.URL.Query().Get("cursor"); raw != "" { 138 + parsed, err := strconv.ParseInt(raw, 10, 64) 139 + if err != nil { 140 + logger.Warn("events: bad cursor, starting from 0", 141 + "cursor", raw, "err", err, 142 + ) 143 + } else { 144 + cursor = parsed 145 + } 146 + } 147 + logger.Debug("events client connected", 148 + "remote", r.RemoteAddr, "cursor", cursor, 149 + ) 150 + 151 + // Subscribe before the backfill so a Publish that races between 152 + // the EventsAfter read and our select loop is captured by the 153 + // pending channel signal — we'll re-drain on the first wake-up. 154 + sig := br.Subscribe() 155 + defer br.Unsubscribe(sig) 120 156 121 157 ctx, cancel := context.WithCancel(r.Context()) 122 158 defer cancel() 123 159 124 - // Detect client disconnect by trying to read; we don't expect any 125 - // payloads from the client, so any read result (including EOF) 126 - // signals the connection has gone away. 160 + // Detect client disconnect by trying to read; we don't expect 161 + // any payloads from the client, so any read outcome (including 162 + // EOF) signals the connection has gone away. 127 163 go func() { 128 164 for { 129 165 if _, _, err := conn.NextReader(); err != nil { ··· 133 169 } 134 170 }() 135 171 172 + // Initial backfill. If this fails the connection is unusable 173 + // (we can't promise ordering after a partial write) so just 174 + // return and let the client reconnect with the same cursor. 175 + if err := streamEvents(ctx, conn, br.st, &cursor); err != nil { 176 + logger.Debug("events backfill ended", "err", err, "cursor", cursor) 177 + return 178 + } 179 + 136 180 ticker := time.NewTicker(30 * time.Second) 137 181 defer ticker.Stop() 138 182 for { 139 183 select { 140 184 case <-ctx.Done(): 141 - logger.Debug("events client disconnected", "remote", r.RemoteAddr) 185 + logger.Debug("events client disconnected", 186 + "remote", r.RemoteAddr, "cursor", cursor, 187 + ) 142 188 return 189 + case <-sig: 190 + if err := streamEvents(ctx, conn, br.st, &cursor); err != nil { 191 + logger.Debug("events stream ended", "err", err, "cursor", cursor) 192 + return 193 + } 143 194 case <-ticker.C: 144 - if err := conn.WriteControl(websocket.PingMessage, nil, time.Now().Add(time.Second)); err != nil { 195 + if err := conn.WriteControl( 196 + websocket.PingMessage, nil, 197 + time.Now().Add(time.Second), 198 + ); err != nil { 145 199 logger.Debug("events ping failed", "err", err) 146 200 return 147 201 } ··· 149 203 } 150 204 } 151 205 } 206 + 207 + // streamEvents drains every event row with `created > *cursor`, writes 208 + // each as a wire envelope frame, and advances *cursor in lockstep. The 209 + // cursor is updated *after* the write succeeds so a half-flushed batch 210 + // (interrupted by a websocket error) replays cleanly on the next 211 + // connection. 212 + // 213 + // It is safe to call repeatedly: when there are no new rows the query 214 + // returns an empty slice and we noop. 215 + func streamEvents(ctx context.Context, conn *websocket.Conn, st *store, cursor *int64) error { 216 + rows, err := st.EventsAfter(ctx, *cursor) 217 + if err != nil { 218 + return fmt.Errorf("read events: %w", err) 219 + } 220 + for _, row := range rows { 221 + frame, err := json.Marshal(eventsEnvelope{ 222 + Rkey: row.Rkey, 223 + Nsid: row.Nsid, 224 + Event: row.EventJSON, 225 + Created: row.Created, 226 + }) 227 + if err != nil { 228 + return fmt.Errorf("marshal envelope: %w", err) 229 + } 230 + if err := conn.WriteMessage(websocket.TextMessage, frame); err != nil { 231 + return fmt.Errorf("write frame: %w", err) 232 + } 233 + *cursor = row.Created 234 + } 235 + return nil 236 + }
+7 -1
main.go
··· 108 108 }() 109 109 logger.Info("store open", "path", cfg.DBPath) 110 110 111 + // In-process broker for the /events fan-out. Wraps the store so 112 + // publishes are durable and reconnecting subscribers can resume by 113 + // cursor. Constructed before the consumers in case we ever want 114 + // them to publish synthetic status events at startup. 115 + br := newBroker(st) 116 + 111 117 // Start the knot event-stream consumer first so the jetstream 112 118 // loop has somewhere to register newly-observed knots into. 113 119 knots, err := startKnotConsumer(ctx, cfg, st) ··· 127 133 128 134 // Run the HTTP server. This blocks until ctx is cancelled or the 129 135 // listener errors. 130 - if err := runHTTP(ctx, cfg); err != nil { 136 + if err := runHTTP(ctx, cfg, br); err != nil { 131 137 logger.Error("http server error", "err", err) 132 138 os.Exit(1) 133 139 }
+84
store.go
··· 21 21 import ( 22 22 "context" 23 23 "database/sql" 24 + "encoding/json" 24 25 "errors" 25 26 "fmt" 26 27 "strconv" 28 + "time" 27 29 28 30 _ "github.com/mattn/go-sqlite3" 29 31 ) ··· 275 277 } 276 278 return nil 277 279 } 280 + 281 + // EventRow is one row of the events table. It represents an outbound 282 + // record we want to deliver to /events websocket subscribers, in the 283 + // shape callers actually need (raw record JSON, not stringly-typed). 284 + type EventRow struct { 285 + // Created is the assigned monotonic rowid; doubles as the cursor 286 + // value subscribers use to resume. 287 + Created int64 288 + // Rkey is the ATProto record key. For sh.tangled.pipeline.status 289 + // records this is the rkey we mint when publishing. 290 + Rkey string 291 + // Nsid is the lexicon collection (e.g. sh.tangled.pipeline.status). 292 + Nsid string 293 + // EventJSON is the record body verbatim — held as RawMessage so 294 + // the /events handler can splice it into the wire envelope without 295 + // an unmarshal/remarshal round-trip. 296 + EventJSON json.RawMessage 297 + } 298 + 299 + // InsertEvent appends an event row and returns its assigned `created` 300 + // (rowid) cursor. Storage is the source of truth for fan-out, so we 301 + // write here even if zero subscribers are connected — a subscriber that 302 + // connects later (with an old cursor) will pick the row up via 303 + // EventsAfter. 304 + // 305 + // eventJSON must be a valid JSON object; we store it verbatim. Length 306 + // validation is intentionally absent — the schema accepts arbitrary 307 + // TEXT and SQLite handles huge blobs fine for our scale. 308 + func (s *store) InsertEvent(ctx context.Context, rkey, nsid string, eventJSON []byte) (int64, error) { 309 + res, err := s.db.ExecContext(ctx, 310 + `INSERT INTO events (rkey, nsid, event_json, inserted_at) 311 + VALUES (?, ?, ?, ?)`, 312 + rkey, nsid, string(eventJSON), 313 + time.Now().UTC().Format(time.RFC3339Nano), 314 + ) 315 + if err != nil { 316 + return 0, fmt.Errorf("insert event: %w", err) 317 + } 318 + id, err := res.LastInsertId() 319 + if err != nil { 320 + return 0, fmt.Errorf("event last insert id: %w", err) 321 + } 322 + return id, nil 323 + } 324 + 325 + // EventsAfter returns every event row with `created` strictly greater 326 + // than cursor, in cursor order. Used by /events to backfill a 327 + // reconnecting subscriber and to drain newly-published rows on each 328 + // broker notification. 329 + // 330 + // Pass cursor=0 to get the full log from the beginning, which is what 331 + // happens when a subscriber connects without a ?cursor= query param. 332 + func (s *store) EventsAfter(ctx context.Context, cursor int64) ([]EventRow, error) { 333 + rows, err := s.db.QueryContext(ctx, 334 + `SELECT created, rkey, nsid, event_json 335 + FROM events 336 + WHERE created > ? 337 + ORDER BY created ASC`, 338 + cursor, 339 + ) 340 + if err != nil { 341 + return nil, fmt.Errorf("query events: %w", err) 342 + } 343 + defer rows.Close() 344 + 345 + out := []EventRow{} 346 + for rows.Next() { 347 + var ( 348 + ev EventRow 349 + raw string 350 + ) 351 + if err := rows.Scan(&ev.Created, &ev.Rkey, &ev.Nsid, &raw); err != nil { 352 + return nil, fmt.Errorf("scan event: %w", err) 353 + } 354 + ev.EventJSON = json.RawMessage(raw) 355 + out = append(out, ev) 356 + } 357 + if err := rows.Err(); err != nil { 358 + return nil, fmt.Errorf("iterate events: %w", err) 359 + } 360 + return out, nil 361 + }
+21
store_migrate.go
··· 57 57 created_at TEXT NOT NULL, 58 58 PRIMARY KEY (did, rkey) 59 59 ); 60 + 61 + -- Outbound event log. Each row is one record we want to fan out to 62 + -- connected /events websocket subscribers (typically the Tangled 63 + -- appview) — today only sh.tangled.pipeline.status. 64 + -- 65 + -- We persist instead of pushing through an in-memory channel so that 66 + -- (a) a reconnecting subscriber can resume from a cursor without 67 + -- missing events that happened during the gap, and 68 + -- (b) slow subscribers can't make us drop events for fast ones — they 69 + -- simply lag behind in the rowid space. 70 + -- 71 + -- AUTOINCREMENT (vs plain INTEGER PRIMARY KEY) guarantees rowids 72 + -- strictly increase and never get reused if a row is ever deleted, so 73 + -- treating the created column as a monotonic cursor is safe forever. 74 + CREATE TABLE IF NOT EXISTS events ( 75 + created INTEGER PRIMARY KEY AUTOINCREMENT, 76 + rkey TEXT NOT NULL, 77 + nsid TEXT NOT NULL, 78 + event_json TEXT NOT NULL, 79 + inserted_at TEXT NOT NULL 80 + ); 60 81 ` 61 82 62 83 // migrate applies the schema. Safe to call repeatedly.