···218218 firehoseConsumer := firehose.NewConsumer(firehoseConfig, feedIndex)
219219 firehoseConsumer.Start(ctx)
220220221221+ // Create and start profile watcher (separate Jetstream connection filtered
222222+ // to app.bsky.actor.profile events for known Arabica users only)
223223+ profileWatcher := firehose.NewProfileWatcher(firehoseConfig, feedIndex)
224224+ profileWatcher.Start(ctx)
225225+221226 // Wire up the feed service to use the firehose index
222227 adapter := firehose.NewFeedIndexAdapter(feedIndex)
223228 feedService.SetFirehoseIndex(adapter)
···327332 // This ensures users are added to the feed even if they had an existing session
328333 oauthManager.SetOnAuthSuccess(func(did string) {
329334 feedRegistry.Register(did)
335335+ profileWatcher.Watch(did)
330336 // Backfill the user's records (BackfillUser creates its own span
331337 // only when there is actual work to do, avoiding empty traces for
332338 // already-backfilled users)
+4
internal/firehose/config.go
···1414 "wss://jetstream2.us-west.bsky.network/subscribe",
1515}
16161717+// NSIDBlueskyProfile is the AT Protocol collection for user profile records.
1818+// Watched by ProfileWatcher (separate connection) for known Arabica users only.
1919+const NSIDBlueskyProfile = "app.bsky.actor.profile"
2020+1721// ArabicaCollections lists all Arabica lexicon collections to filter for
1822var ArabicaCollections = []string{
1923 atproto.NSIDBrew,
+10
internal/firehose/index.go
···10691069 return profile, nil
10701070}
1071107110721072+// InvalidateProfile removes a DID's profile from both the in-memory and persistent
10731073+// caches. The next GetProfile call will re-fetch from the API.
10741074+func (idx *FeedIndex) InvalidateProfile(did string) {
10751075+ idx.profileCacheMu.Lock()
10761076+ delete(idx.profileCache, did)
10771077+ idx.profileCacheMu.Unlock()
10781078+10791079+ _, _ = idx.db.Exec(`DELETE FROM profiles WHERE did = ?`, did)
10801080+}
10811081+10721082// GetKnownDIDs returns all DIDs that have created Arabica records
10731083func (idx *FeedIndex) GetKnownDIDs(ctx context.Context) ([]string, error) {
10741084 rows, err := idx.db.QueryContext(ctx, `SELECT did FROM known_dids`)
+251
internal/firehose/profile_watcher.go
···11+package firehose
22+33+import (
44+ "context"
55+ "encoding/json"
66+ "fmt"
77+ "net/url"
88+ "sync"
99+ "time"
1010+1111+ "github.com/gorilla/websocket"
1212+ "github.com/rs/zerolog/log"
1313+)
1414+1515+// ProfileWatcher is a dedicated Jetstream connection that subscribes to
1616+// app.bsky.actor.profile events for known Arabica users only. Because Jetstream
1717+// uses AND semantics when both wantedCollections and wantedDids are set, this
1818+// must be a separate connection from the main consumer (which has no DID filter).
1919+type ProfileWatcher struct {
2020+ index *FeedIndex
2121+ endpoints []string
2222+2323+ conn *websocket.Conn
2424+ connMu sync.Mutex
2525+2626+ watchedDIDs map[string]struct{}
2727+ watchedDIDsMu sync.RWMutex
2828+2929+ endpointIdx int
3030+ stopCh chan struct{}
3131+ wg sync.WaitGroup
3232+}
3333+3434+type profileOptionsUpdate struct {
3535+ Type string `json:"type"`
3636+ Payload struct {
3737+ WantedCollections []string `json:"wantedCollections"`
3838+ WantedDids []string `json:"wantedDids"`
3939+ } `json:"payload"`
4040+}
4141+4242+// NewProfileWatcher creates a ProfileWatcher seeded with all currently known
4343+// Arabica DIDs from the index.
4444+func NewProfileWatcher(config *Config, index *FeedIndex) *ProfileWatcher {
4545+ dids, _ := index.GetKnownDIDs(context.Background())
4646+ watched := make(map[string]struct{}, len(dids))
4747+ for _, did := range dids {
4848+ watched[did] = struct{}{}
4949+ }
5050+ return &ProfileWatcher{
5151+ index: index,
5252+ endpoints: config.Endpoints,
5353+ watchedDIDs: watched,
5454+ stopCh: make(chan struct{}),
5555+ }
5656+}
5757+5858+// Watch adds a DID to the subscription. If connected, an options update is sent
5959+// immediately so Jetstream begins delivering that user's profile events.
6060+func (pw *ProfileWatcher) Watch(did string) {
6161+ pw.watchedDIDsMu.Lock()
6262+ _, already := pw.watchedDIDs[did]
6363+ pw.watchedDIDs[did] = struct{}{}
6464+ pw.watchedDIDsMu.Unlock()
6565+6666+ if !already {
6767+ pw.sendOptionsUpdate()
6868+ }
6969+}
7070+7171+// Start begins the profile watcher in a background goroutine. It will reconnect
7272+// automatically on failure, rotating through endpoints with exponential backoff.
7373+func (pw *ProfileWatcher) Start(ctx context.Context) {
7474+ pw.wg.Add(1)
7575+ go func() {
7676+ defer pw.wg.Done()
7777+ pw.run(ctx)
7878+ }()
7979+}
8080+8181+// Stop gracefully shuts down the watcher.
8282+func (pw *ProfileWatcher) Stop() {
8383+ close(pw.stopCh)
8484+ pw.connMu.Lock()
8585+ if pw.conn != nil {
8686+ pw.conn.Close()
8787+ }
8888+ pw.connMu.Unlock()
8989+ pw.wg.Wait()
9090+}
9191+9292+func (pw *ProfileWatcher) run(ctx context.Context) {
9393+ backoff := time.Second
9494+ maxBackoff := 30 * time.Second
9595+9696+ for {
9797+ select {
9898+ case <-ctx.Done():
9999+ return
100100+ case <-pw.stopCh:
101101+ return
102102+ default:
103103+ }
104104+105105+ // Skip connecting if we have no DIDs to watch yet — wait for the first Watch() call
106106+ pw.watchedDIDsMu.RLock()
107107+ n := len(pw.watchedDIDs)
108108+ pw.watchedDIDsMu.RUnlock()
109109+ if n == 0 {
110110+ select {
111111+ case <-ctx.Done():
112112+ return
113113+ case <-pw.stopCh:
114114+ return
115115+ case <-time.After(5 * time.Second):
116116+ }
117117+ continue
118118+ }
119119+120120+ endpoint := pw.endpoints[pw.endpointIdx]
121121+ err := pw.connectAndConsume(ctx, endpoint)
122122+123123+ if err != nil {
124124+ log.Warn().Err(err).Str("endpoint", endpoint).Msg("profile watcher: connection error")
125125+ pw.endpointIdx = (pw.endpointIdx + 1) % len(pw.endpoints)
126126+127127+ select {
128128+ case <-ctx.Done():
129129+ return
130130+ case <-pw.stopCh:
131131+ return
132132+ case <-time.After(backoff):
133133+ }
134134+135135+ backoff *= 2
136136+ if backoff > maxBackoff {
137137+ backoff = maxBackoff
138138+ }
139139+ } else {
140140+ backoff = time.Second
141141+ }
142142+ }
143143+}
144144+145145+func (pw *ProfileWatcher) connectAndConsume(ctx context.Context, endpoint string) error {
146146+ wsURL := pw.buildURL(endpoint)
147147+ log.Info().Str("url", wsURL).Msg("profile watcher: connecting")
148148+149149+ dialer := websocket.Dialer{HandshakeTimeout: 10 * time.Second}
150150+ conn, _, err := dialer.DialContext(ctx, wsURL, nil)
151151+ if err != nil {
152152+ return fmt.Errorf("failed to connect: %w", err)
153153+ }
154154+155155+ pw.connMu.Lock()
156156+ pw.conn = conn
157157+ pw.connMu.Unlock()
158158+159159+ log.Info().Str("endpoint", endpoint).Msg("profile watcher: connected")
160160+161161+ defer func() {
162162+ pw.connMu.Lock()
163163+ if pw.conn != nil {
164164+ pw.conn.Close()
165165+ pw.conn = nil
166166+ }
167167+ pw.connMu.Unlock()
168168+ }()
169169+170170+ for {
171171+ select {
172172+ case <-ctx.Done():
173173+ return ctx.Err()
174174+ case <-pw.stopCh:
175175+ return nil
176176+ default:
177177+ }
178178+179179+ conn.SetReadDeadline(time.Now().Add(60 * time.Second))
180180+ _, message, err := conn.ReadMessage()
181181+ if err != nil {
182182+ return fmt.Errorf("read error: %w", err)
183183+ }
184184+185185+ pw.processMessage(message)
186186+ }
187187+}
188188+189189+func (pw *ProfileWatcher) buildURL(endpoint string) string {
190190+ u, _ := url.Parse(endpoint)
191191+ q := u.Query()
192192+ q.Set("wantedCollections", NSIDBlueskyProfile)
193193+194194+ pw.watchedDIDsMu.RLock()
195195+ for did := range pw.watchedDIDs {
196196+ q.Add("wantedDids", did)
197197+ }
198198+ pw.watchedDIDsMu.RUnlock()
199199+200200+ u.RawQuery = q.Encode()
201201+ return u.String()
202202+}
203203+204204+func (pw *ProfileWatcher) sendOptionsUpdate() {
205205+ pw.connMu.Lock()
206206+ conn := pw.conn
207207+ pw.connMu.Unlock()
208208+209209+ if conn == nil {
210210+ return // will be applied via URL on next reconnect
211211+ }
212212+213213+ pw.watchedDIDsMu.RLock()
214214+ dids := make([]string, 0, len(pw.watchedDIDs))
215215+ for did := range pw.watchedDIDs {
216216+ dids = append(dids, did)
217217+ }
218218+ pw.watchedDIDsMu.RUnlock()
219219+220220+ var msg profileOptionsUpdate
221221+ msg.Type = "options_update"
222222+ msg.Payload.WantedCollections = []string{NSIDBlueskyProfile}
223223+ msg.Payload.WantedDids = dids
224224+225225+ data, err := json.Marshal(msg)
226226+ if err != nil {
227227+ return
228228+ }
229229+230230+ pw.connMu.Lock()
231231+ defer pw.connMu.Unlock()
232232+ if pw.conn != nil {
233233+ if err := pw.conn.WriteMessage(websocket.TextMessage, data); err != nil {
234234+ log.Warn().Err(err).Msg("profile watcher: failed to send options update")
235235+ }
236236+ }
237237+}
238238+239239+func (pw *ProfileWatcher) processMessage(data []byte) {
240240+ var event JetstreamEvent
241241+ if err := json.Unmarshal(data, &event); err != nil || event.Kind != "commit" || event.Commit == nil {
242242+ return
243243+ }
244244+ if event.Commit.Collection != NSIDBlueskyProfile {
245245+ return
246246+ }
247247+ if event.Commit.Operation == "create" || event.Commit.Operation == "update" {
248248+ pw.index.InvalidateProfile(event.DID)
249249+ log.Debug().Str("did", event.DID).Msg("profile watcher: invalidated profile cache")
250250+ }
251251+}
+8-36
internal/handlers/handlers.go
···88 "net/http"
99 "strconv"
1010 "strings"
1111- "sync"
1212- "time"
13111412 "arabica/internal/atproto"
1513 "arabica/internal/database"
···2725 "github.com/rs/zerolog/log"
2826)
29273030-// profileCacheTTL controls how long user profiles are cached before re-fetching.
3131-// Profiles (avatar, display name) change infrequently so 1 hour is reasonable.
3232-const profileCacheTTL = 1 * time.Hour
3333-3434-// cachedProfile holds a user profile with its fetch timestamp.
3535-type cachedProfile struct {
3636- profile *bff.UserProfile
3737- cachedAt time.Time
3838-}
39284029// Config holds handler configuration options
4130type Config struct {
···7059 pdsAdminURL string
7160 pdsAdminToken string
72617373- // profileCache caches user profiles (avatar, handle) by DID to avoid
7474- // hitting the Bluesky API on every page load.
7575- profileCache map[string]*cachedProfile
7676- profileCacheMu sync.RWMutex
7762}
78637964// NewHandler creates a new Handler with all required dependencies.
···9378 config: config,
9479 feedService: feedService,
9580 feedRegistry: feedRegistry,
9696- profileCache: make(map[string]*cachedProfile),
9781 }
9882}
9983···216200}
217201218202// getUserProfile fetches the profile for an authenticated user.
219219-// Results are cached by DID for profileCacheTTL to avoid hitting the
220220-// Bluesky API on every page load.
203203+// Routes through feedIndex (invalidated by ProfileWatcher on profile updates)
204204+// so the header stays fresh without a separate cache layer.
221205// Returns nil if unable to fetch profile (non-fatal error).
222206func (h *Handler) getUserProfile(ctx context.Context, did string) *bff.UserProfile {
223207 if did == "" {
224208 return nil
225209 }
226210227227- // Check cache
228228- h.profileCacheMu.RLock()
229229- if cached, ok := h.profileCache[did]; ok && time.Since(cached.cachedAt) < profileCacheTTL {
230230- h.profileCacheMu.RUnlock()
231231- return cached.profile
211211+ var profile *atproto.Profile
212212+ var err error
213213+ if h.feedIndex != nil {
214214+ profile, err = h.feedIndex.GetProfile(ctx, did)
215215+ } else {
216216+ profile, err = atproto.NewPublicClient().GetProfile(ctx, did)
232217 }
233233- h.profileCacheMu.RUnlock()
234234-235235- publicClient := atproto.NewPublicClient()
236236- profile, err := publicClient.GetProfile(ctx, did)
237218 if err != nil {
238219 log.Warn().Err(err).Str("did", did).Msg("Failed to fetch user profile for header")
239220 return nil
···248229 if profile.Avatar != nil {
249230 userProfile.Avatar = *profile.Avatar
250231 }
251251-252252- // Store in cache
253253- h.profileCacheMu.Lock()
254254- h.profileCache[did] = &cachedProfile{
255255- profile: userProfile,
256256- cachedAt: time.Now(),
257257- }
258258- h.profileCacheMu.Unlock()
259259-260232 return userProfile
261233}
262234