Select the types of activity you want to include in your feed.
Create Sonar, an ATProto RepoSync Stats Generator (#217)
Sonar is an ATProto SubscribeRepos firehose listener that emits
Prometheus metrics to characterize the kind and volume of content moving
through the firehose.
···2121 go build ./cmd/stress
2222 go build ./cmd/fakermaker
2323 go build ./cmd/labelmaker
2424+ go build -o ./sonar-cli ./cmd/sonar
24252526.PHONY: all
2627all: build
···7475.PHONY: run-dev-search
7576run-dev-search: .env ## Runs search daemon for local dev
7677 GOLOG_LOG_LEVEL=info go run ./cmd/palomar run
7878+7979+.PHONY: sonar-up
8080+sonar-up: # Runs sonar docker container
8181+ docker compose -f cmd/sonar/docker-compose.yml up --build -d || docker-compose -f cmd/sonar/docker-compose.yml up --build -d
+29
cmd/sonar/Dockerfile
···11+# Stage 1: Build the Go binary
22+FROM golang:1.20.5-alpine AS builder
33+44+# Install SSL ca certificates.
55+RUN apk update && apk add --no-cache ca-certificates && update-ca-certificates
66+77+# Create a directory for the application
88+WORKDIR /app
99+1010+# Fetch dependencies
1111+COPY go.mod go.sum ./
1212+RUN go mod download
1313+1414+COPY . .
1515+1616+# Build the application
1717+RUN CGO_ENABLED=0 GOOS=linux go build -o /sonar ./cmd/sonar
1818+1919+# Stage 2: Build a minimal Docker image
2020+FROM scratch
2121+2222+# Import the SSL certificates from the first stage.
2323+COPY --from=builder /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt
2424+2525+# Copy the binary from the first stage.
2626+COPY --from=builder /sonar /sonar
2727+2828+# Set the startup command to run the binary
2929+CMD ["/sonar"]
···11+package sonar
22+33+import (
44+ "github.com/prometheus/client_golang/prometheus"
55+ "github.com/prometheus/client_golang/prometheus/promauto"
66+)
77+88+// Initialize Prometheus Metrics for total number of posts processed
99+var eventsProcessedCounter = promauto.NewCounterVec(prometheus.CounterOpts{
1010+ Name: "sonar_events_processed_total",
1111+ Help: "The total number of firehose events processed by Sonar",
1212+}, []string{"event_type"})
1313+1414+var rebasesProcessedCounter = promauto.NewCounter(prometheus.CounterOpts{
1515+ Name: "sonar_rebases_processed_total",
1616+ Help: "The total number of rebase operations processed by Sonar",
1717+})
1818+1919+var recordsProcessedCounter = promauto.NewCounterVec(prometheus.CounterOpts{
2020+ Name: "sonar_records_processed_total",
2121+ Help: "The total number of records processed by Sonar",
2222+}, []string{"record_type"})
2323+2424+var quoteRepostsProcessedCounter = promauto.NewCounter(prometheus.CounterOpts{
2525+ Name: "sonar_quote_reposts_processed_total",
2626+ Help: "The total number quote repost operations processed by Sonar",
2727+})
2828+2929+var opsProcessedCounter = promauto.NewCounterVec(prometheus.CounterOpts{
3030+ Name: "sonar_ops_processed_total",
3131+ Help: "The total number of repo operations processed by Sonar",
3232+}, []string{"kind", "op_path"})
3333+3434+// Initialize Prometheus metrics for duration of processing events
3535+var eventProcessingDurationHistogram = promauto.NewHistogram(prometheus.HistogramOpts{
3636+ Name: "sonar_event_processing_duration_seconds",
3737+ Help: "The amount of time it takes to process a firehose event",
3838+ Buckets: prometheus.ExponentialBuckets(0.001, 2, 15),
3939+})
4040+4141+var lastSeqGauge = promauto.NewGauge(prometheus.GaugeOpts{
4242+ Name: "sonar_last_seq",
4343+ Help: "The last sequence number processed",
4444+})
4545+4646+var lastSeqProcessedAtGauge = promauto.NewGauge(prometheus.GaugeOpts{
4747+ Name: "sonar_last_seq_processed_at",
4848+ Help: "The timestamp of the last sequence number processed",
4949+})
5050+5151+var lastSeqCreatedAtGauge = promauto.NewGauge(prometheus.GaugeOpts{
5252+ Name: "sonar_last_seq_created_at",
5353+ Help: "The timestamp of the last sequence number created",
5454+})
5555+5656+var lastSeqCommittedAtGauge = promauto.NewGauge(prometheus.GaugeOpts{
5757+ Name: "sonar_last_seq_committed_at",
5858+ Help: "The commit timestamp of the last sequence number processed",
5959+})
+316
sonar/sonar.go
···11+package sonar
22+33+import (
44+ "bytes"
55+ "context"
66+ "fmt"
77+ "os"
88+ "strings"
99+ "sync"
1010+ "time"
1111+1212+ comatproto "github.com/bluesky-social/indigo/api/atproto"
1313+ "github.com/bluesky-social/indigo/api/bsky"
1414+ lexutil "github.com/bluesky-social/indigo/lex/util"
1515+ "github.com/bluesky-social/indigo/util"
1616+ "github.com/goccy/go-json"
1717+ "github.com/labstack/gommon/log"
1818+1919+ "github.com/bluesky-social/indigo/events"
2020+ "github.com/bluesky-social/indigo/repo"
2121+ "github.com/bluesky-social/indigo/repomgr"
2222+ "go.opentelemetry.io/otel"
2323+ "go.uber.org/zap"
2424+)
2525+2626+type Sonar struct {
2727+ Progress *Progress
2828+ ProgMux sync.Mutex
2929+ Logger *zap.SugaredLogger
3030+ CursorFile string
3131+}
3232+3333+type Progress struct {
3434+ LastSeq int64 `json:"last_seq"`
3535+ LastSeqProcessedAt time.Time `json:"last_seq_processed_at"`
3636+}
3737+3838+func (s *Sonar) WriteCursorFile() error {
3939+ // Marshal the cursor file
4040+ s.ProgMux.Lock()
4141+ data, err := json.Marshal(s.Progress)
4242+ s.ProgMux.Unlock()
4343+ if err != nil {
4444+ return fmt.Errorf("failed to marshal cursor file: %+v", err)
4545+ }
4646+4747+ // Write the cursor file
4848+ err = os.WriteFile(s.CursorFile, data, 0644)
4949+ if err != nil {
5050+ return fmt.Errorf("failed to write cursor file: %+v", err)
5151+ }
5252+5353+ return nil
5454+}
5555+5656+func (s *Sonar) ReadCursorFile() error {
5757+ // Read the cursor file
5858+ data, err := os.ReadFile(s.CursorFile)
5959+ if err != nil {
6060+ return fmt.Errorf("failed to read cursor file: %+v", err)
6161+ }
6262+6363+ // Unmarshal the cursor file
6464+ s.ProgMux.Lock()
6565+ err = json.Unmarshal(data, s.Progress)
6666+ s.ProgMux.Unlock()
6767+ if err != nil {
6868+ return fmt.Errorf("failed to unmarshal cursor file: %+v", err)
6969+ }
7070+7171+ return nil
7272+}
7373+7474+func NewSonar(logger *zap.SugaredLogger, cursorFile string) (*Sonar, error) {
7575+ s := Sonar{
7676+ Progress: &Progress{
7777+ LastSeq: -1,
7878+ },
7979+ Logger: logger,
8080+ ProgMux: sync.Mutex{},
8181+ CursorFile: cursorFile,
8282+ }
8383+8484+ // Check to see if the cursor file exists
8585+ if _, err := os.Stat(cursorFile); os.IsNotExist(err) {
8686+ logger.Infof("cursor file does not exist, creating %s", cursorFile)
8787+ // Create the cursor file
8888+ err := s.WriteCursorFile()
8989+ if err != nil {
9090+ return nil, fmt.Errorf("failed to write cursor file: %+v", err)
9191+ }
9292+ } else {
9393+ // Read the cursor file
9494+ err := s.ReadCursorFile()
9595+ if err != nil {
9696+ logger.Errorf("read cursor file, will start drinking from live: %+v", err.Error())
9797+ }
9898+ }
9999+100100+ return &s, nil
101101+}
102102+103103+func (s *Sonar) HandleStreamEvent(ctx context.Context, xe *events.XRPCStreamEvent) error {
104104+ ctx, span := otel.Tracer("sonar").Start(ctx, "HandleStreamEvent")
105105+ defer span.End()
106106+107107+ switch {
108108+ case xe.RepoCommit != nil:
109109+ eventsProcessedCounter.WithLabelValues("repo_commit").Inc()
110110+ return s.HandleRepoCommit(ctx, xe.RepoCommit)
111111+ case xe.RepoHandle != nil:
112112+ eventsProcessedCounter.WithLabelValues("repo_handle").Inc()
113113+ now := time.Now()
114114+ s.ProgMux.Lock()
115115+ s.Progress.LastSeq = xe.RepoHandle.Seq
116116+ s.Progress.LastSeqProcessedAt = now
117117+ s.ProgMux.Unlock()
118118+ // Parse time from the event time string
119119+ t, err := time.Parse(time.RFC3339, xe.RepoHandle.Time)
120120+ if err != nil {
121121+ log.Errorf("error parsing time: %+v", err)
122122+ return nil
123123+ }
124124+ lastSeqCommittedAtGauge.Set(float64(t.UnixNano()))
125125+ lastSeqProcessedAtGauge.Set(float64(now.UnixNano()))
126126+ lastSeqGauge.Set(float64(xe.RepoHandle.Seq))
127127+ case xe.RepoInfo != nil:
128128+ eventsProcessedCounter.WithLabelValues("repo_info").Inc()
129129+ case xe.RepoMigrate != nil:
130130+ eventsProcessedCounter.WithLabelValues("repo_migrate").Inc()
131131+ now := time.Now()
132132+ s.ProgMux.Lock()
133133+ s.Progress.LastSeq = xe.RepoMigrate.Seq
134134+ s.Progress.LastSeqProcessedAt = time.Now()
135135+ s.ProgMux.Unlock()
136136+ // Parse time from the event time string
137137+ t, err := time.Parse(time.RFC3339, xe.RepoMigrate.Time)
138138+ if err != nil {
139139+ log.Errorf("error parsing time: %+v", err)
140140+ return nil
141141+ }
142142+ lastSeqCommittedAtGauge.Set(float64(t.UnixNano()))
143143+ lastSeqProcessedAtGauge.Set(float64(now.UnixNano()))
144144+ lastSeqGauge.Set(float64(xe.RepoHandle.Seq))
145145+ case xe.RepoTombstone != nil:
146146+ eventsProcessedCounter.WithLabelValues("repo_tombstone").Inc()
147147+ case xe.LabelInfo != nil:
148148+ eventsProcessedCounter.WithLabelValues("label_info").Inc()
149149+ case xe.LabelLabels != nil:
150150+ eventsProcessedCounter.WithLabelValues("label_labels").Inc()
151151+ case xe.Error != nil:
152152+ eventsProcessedCounter.WithLabelValues("error").Inc()
153153+ }
154154+ return nil
155155+}
156156+157157+func (s *Sonar) HandleRepoCommit(ctx context.Context, evt *comatproto.SyncSubscribeRepos_Commit) error {
158158+ ctx, span := otel.Tracer("sonar").Start(ctx, "HandleRepoCommit")
159159+ defer span.End()
160160+161161+ start := time.Now()
162162+163163+ s.ProgMux.Lock()
164164+ s.Progress.LastSeq = evt.Seq
165165+ s.Progress.LastSeqProcessedAt = start
166166+ s.ProgMux.Unlock()
167167+168168+ lastSeqGauge.Set(float64(evt.Seq))
169169+170170+ log := s.Logger.With("repo", evt.Repo, "seq", evt.Seq, "commit", evt.Commit)
171171+172172+ rr, err := repo.ReadRepoFromCar(ctx, bytes.NewReader(evt.Blocks))
173173+ if err != nil {
174174+ log.Errorf("failed to read repo from car: %+v\n", err)
175175+ return nil
176176+ }
177177+178178+ if evt.Rebase {
179179+ log.Debug("rebase")
180180+ rebasesProcessedCounter.Inc()
181181+ }
182182+183183+ // Parse time from the event time string
184184+ t, err := time.Parse(time.RFC3339, evt.Time)
185185+ if err != nil {
186186+ log.Errorf("error parsing time: %+v", err)
187187+ return nil
188188+ }
189189+190190+ lastSeqCommittedAtGauge.Set(float64(t.UnixNano()))
191191+ lastSeqProcessedAtGauge.Set(float64(start.UnixNano()))
192192+193193+ for _, op := range evt.Ops {
194194+ collection := strings.Split(op.Path, "/")[0]
195195+196196+ ek := repomgr.EventKind(op.Action)
197197+ log = log.With("action", op.Action, "collection", collection)
198198+199199+ opsProcessedCounter.WithLabelValues(op.Action, collection).Inc()
200200+201201+ switch ek {
202202+ case repomgr.EvtKindCreateRecord, repomgr.EvtKindUpdateRecord:
203203+ // Grab the record from the merkel tree
204204+ rc, rec, err := rr.GetRecord(ctx, op.Path)
205205+ if err != nil {
206206+ e := fmt.Errorf("getting record %s (%s) within seq %d for %s: %w", op.Path, *op.Cid, evt.Seq, evt.Repo, err)
207207+ log.Errorf("failed to get a record from the event: %+v\n", e)
208208+ break
209209+ }
210210+211211+ // Verify that the record cid matches the cid in the event
212212+ if lexutil.LexLink(rc) != *op.Cid {
213213+ e := fmt.Errorf("mismatch in record and op cid: %s != %s", rc, *op.Cid)
214214+ log.Errorf("failed to LexLink the record in the event: %+v\n", e)
215215+ break
216216+ }
217217+218218+ // Unpack the record and process it
219219+ switch rec := rec.(type) {
220220+ case *bsky.FeedPost:
221221+ recordsProcessedCounter.WithLabelValues("feed_post").Inc()
222222+ if rec.Embed != nil && rec.Embed.EmbedRecord != nil && rec.Embed.EmbedRecord.Record != nil {
223223+ quoteRepostsProcessedCounter.Inc()
224224+ }
225225+ // Parse time from the event time string
226226+ recCreatedAt, err := time.Parse(util.ISO8601, evt.Time)
227227+ if err != nil {
228228+ log.Errorf("error parsing time: %+v", err)
229229+ continue
230230+ }
231231+ lastSeqCreatedAtGauge.Set(float64(recCreatedAt.UnixNano()))
232232+ case *bsky.FeedLike:
233233+ recordsProcessedCounter.WithLabelValues("feed_like").Inc()
234234+ // Parse time from the event time string
235235+ recCreatedAt, err := time.Parse(util.ISO8601, evt.Time)
236236+ if err != nil {
237237+ log.Errorf("error parsing time: %+v", err)
238238+ continue
239239+ }
240240+ lastSeqCreatedAtGauge.Set(float64(recCreatedAt.UnixNano()))
241241+ case *bsky.FeedRepost:
242242+ recordsProcessedCounter.WithLabelValues("feed_repost").Inc()
243243+ // Parse time from the event time string
244244+ recCreatedAt, err := time.Parse(util.ISO8601, evt.Time)
245245+ if err != nil {
246246+ log.Errorf("error parsing time: %+v", err)
247247+ continue
248248+ }
249249+ lastSeqCreatedAtGauge.Set(float64(recCreatedAt.UnixNano()))
250250+ case *bsky.GraphBlock:
251251+ recordsProcessedCounter.WithLabelValues("graph_block").Inc()
252252+ // Parse time from the event time string
253253+ recCreatedAt, err := time.Parse(util.ISO8601, evt.Time)
254254+ if err != nil {
255255+ log.Errorf("error parsing time: %+v", err)
256256+ continue
257257+ }
258258+ lastSeqCreatedAtGauge.Set(float64(recCreatedAt.UnixNano()))
259259+ case *bsky.GraphFollow:
260260+ recordsProcessedCounter.WithLabelValues("graph_follow").Inc()
261261+ // Parse time from the event time string
262262+ recCreatedAt, err := time.Parse(util.ISO8601, evt.Time)
263263+ if err != nil {
264264+ log.Errorf("error parsing time: %+v", err)
265265+ continue
266266+ }
267267+ lastSeqCreatedAtGauge.Set(float64(recCreatedAt.UnixNano()))
268268+ case *bsky.ActorProfile:
269269+ recordsProcessedCounter.WithLabelValues("actor_profile").Inc()
270270+ // Parse time from the event time string
271271+ recCreatedAt, err := time.Parse(util.ISO8601, evt.Time)
272272+ if err != nil {
273273+ log.Errorf("error parsing time: %+v", err)
274274+ continue
275275+ }
276276+ lastSeqCreatedAtGauge.Set(float64(recCreatedAt.UnixNano()))
277277+ case *bsky.FeedGenerator:
278278+ recordsProcessedCounter.WithLabelValues("feed_generator").Inc()
279279+ // Parse time from the event time string
280280+ recCreatedAt, err := time.Parse(util.ISO8601, evt.Time)
281281+ if err != nil {
282282+ log.Errorf("error parsing time: %+v", err)
283283+ continue
284284+ }
285285+ lastSeqCreatedAtGauge.Set(float64(recCreatedAt.UnixNano()))
286286+ case *bsky.GraphList:
287287+ recordsProcessedCounter.WithLabelValues("graph_list").Inc()
288288+ // Parse time from the event time string
289289+ recCreatedAt, err := time.Parse(util.ISO8601, evt.Time)
290290+ if err != nil {
291291+ log.Errorf("error parsing time: %+v", err)
292292+ continue
293293+ }
294294+ lastSeqCreatedAtGauge.Set(float64(recCreatedAt.UnixNano()))
295295+ case *bsky.GraphListitem:
296296+ recordsProcessedCounter.WithLabelValues("graph_listitem").Inc()
297297+ // Parse time from the event time string
298298+ recCreatedAt, err := time.Parse(util.ISO8601, evt.Time)
299299+ if err != nil {
300300+ log.Errorf("error parsing time: %+v", err)
301301+ continue
302302+ }
303303+ lastSeqCreatedAtGauge.Set(float64(recCreatedAt.UnixNano()))
304304+ default:
305305+ log.Warnf("unknown record type: %+v", rec)
306306+ }
307307+308308+ case repomgr.EvtKindDeleteRecord:
309309+ default:
310310+ log.Warnf("unknown event kind from op action: %+v", op.Action)
311311+ }
312312+ }
313313+314314+ eventProcessingDurationHistogram.Observe(time.Since(start).Seconds())
315315+ return nil
316316+}