this repo has no description
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

Track buffers on incoming and outgoing queues (#246)

Some additional fixes to speed up the BGS in here too.

authored by

Jaz and committed by
GitHub
b734acd1 9108e264

+225 -63
+3 -1
bgs/bgs.go
··· 481 481 } 482 482 }() 483 483 484 - evts, cleanup, err := bgs.events.Subscribe(ctx, func(evt *events.XRPCStreamEvent) bool { return true }, since) 484 + ident := c.RealIP() + "-" + c.Request().UserAgent() 485 + 486 + evts, cleanup, err := bgs.events.Subscribe(ctx, ident, func(evt *events.XRPCStreamEvent) bool { return true }, since) 485 487 if err != nil { 486 488 return err 487 489 }
+24 -7
bgs/fedmgr.go
··· 388 388 }, 389 389 } 390 390 391 - pool := events.NewConsumerPool(32, 20, rsc.EventHandler) 391 + pool := events.NewConsumerPool(32, 20, con.RemoteAddr().String(), rsc.EventHandler) 392 392 return events.HandleRepoStream(ctx, con, pool) 393 393 } 394 394 ··· 399 399 return nil 400 400 } 401 401 402 + type cursorSnapshot struct { 403 + id uint 404 + cursor int64 405 + } 406 + 402 407 // flushCursors updates the PDS cursors in the DB for all active subscriptions 403 408 func (s *Slurper) flushCursors(ctx context.Context) []error { 404 409 ctx, span := otel.Tracer("feedmgr").Start(ctx, "flushCursors") 405 410 defer span.End() 406 411 412 + var cursors []cursorSnapshot 413 + 407 414 s.lk.Lock() 408 - defer s.lk.Unlock() 415 + // Iterate over active subs and copy the current cursor 416 + for _, sub := range s.active { 417 + sub.lk.RLock() 418 + cursors = append(cursors, cursorSnapshot{ 419 + id: sub.pds.ID, 420 + cursor: sub.pds.Cursor, 421 + }) 422 + sub.lk.RUnlock() 423 + } 424 + s.lk.Unlock() 409 425 410 426 errs := []error{} 411 427 412 - // Iterate over active subs and update the PDS cursor in the DB 413 - for _, sub := range s.active { 414 - sub.lk.RLock() 415 - if err := s.db.WithContext(ctx).Model(models.PDS{}).Where("id = ?", sub.pds.ID).UpdateColumn("cursor", sub.pds.Cursor).Error; err != nil { 428 + tx := s.db.WithContext(ctx).Begin() 429 + for _, cursor := range cursors { 430 + if err := tx.WithContext(ctx).Model(models.PDS{}).Where("id = ?", cursor.id).UpdateColumn("cursor", cursor.cursor).Error; err != nil { 416 431 errs = append(errs, err) 417 432 } 418 - sub.lk.RUnlock() 433 + } 434 + if err := tx.WithContext(ctx).Commit().Error; err != nil { 435 + errs = append(errs, err) 419 436 } 420 437 421 438 return errs
+18 -17
carstore/bs.go
··· 69 69 70 70 Root models.DbCID `gorm:"index"` 71 71 DataStart int64 72 - Seq int `gorm:"index"` 72 + Seq int `gorm:"index:idx_car_shards_seq;index:idx_car_shards_usr_seq,priority:2,sort:desc"` 73 73 Path string 74 - Usr models.Uid `gorm:"index"` 74 + Usr models.Uid `gorm:"index:idx_car_shards_usr;index:idx_car_shards_usr_seq,priority:1"` 75 75 Rebase bool 76 76 } 77 77 ··· 275 275 276 276 var lastShard CarShard 277 277 // this is often slow (which is why we're caching it) but could be sped up with an extra index: 278 - // CREATE INDEX idx_car_shards_usr_id ON car_shards (usr, id DESC); 279 - if err := cs.meta.WithContext(ctx).Model(CarShard{}).Limit(1).Order("id desc").Find(&lastShard, "usr = ?", user).Error; err != nil { 278 + // CREATE INDEX idx_car_shards_usr_id ON car_shards (usr, seq DESC); 279 + if err := cs.meta.WithContext(ctx).Model(CarShard{}).Limit(1).Order("seq desc").Find(&lastShard, "usr = ?", user).Error; err != nil { 280 280 //if err := cs.meta.Model(CarShard{}).Where("user = ?", user).Last(&lastShard).Error; err != nil { 281 281 //if err != gorm.ErrRecordNotFound { 282 282 return nil, err ··· 653 653 654 654 // TODO: there should be a way to create the shard and block_refs that 655 655 // reference it in the same query, would save a lot of time 656 - if err := ds.cs.meta.WithContext(ctx).Transaction(func(tx *gorm.DB) error { 657 - if err := tx.WithContext(ctx).Create(shard).Error; err != nil { 658 - return fmt.Errorf("failed to create shard in DB tx: %w", err) 659 - } 660 - ds.cs.putLastShardCache(ds.user, shard) 656 + tx := ds.cs.meta.WithContext(ctx).Begin() 661 657 662 - for _, ref := range brefs { 663 - ref["shard"] = shard.ID 664 - } 658 + if err := tx.WithContext(ctx).Create(shard).Error; err != nil { 659 + return fmt.Errorf("failed to create shard in DB tx: %w", err) 660 + } 661 + ds.cs.putLastShardCache(ds.user, shard) 662 + 663 + for _, ref := range brefs { 664 + ref["shard"] = shard.ID 665 + } 665 666 666 - if err := createBlockRefs(ctx, tx, brefs); err != nil { 667 - return fmt.Errorf("failed to create block refs: %w", err) 668 - } 667 + if err := createBlockRefs(ctx, tx, brefs); err != nil { 668 + return fmt.Errorf("failed to create block refs: %w", err) 669 + } 669 670 670 - return nil 671 - }); err != nil { 671 + err := tx.WithContext(ctx).Commit().Error 672 + if err != nil { 672 673 return fmt.Errorf("failed to commit shard DB transaction: %w", err) 673 674 } 674 675
+1 -1
cmd/sonar/main.go
··· 107 107 108 108 wg := sync.WaitGroup{} 109 109 110 - pool := events.NewConsumerPool(cctx.Int("worker-count"), cctx.Int("max-queue-size"), s.HandleStreamEvent) 110 + pool := events.NewConsumerPool(cctx.Int("worker-count"), cctx.Int("max-queue-size"), u.Host, s.HandleStreamEvent) 111 111 112 112 // Start a goroutine to manage the cursor file, saving the current cursor every 5 seconds. 113 113 go func() {
+76 -28
cmd/supercollider/main.go
··· 10 10 "path/filepath" 11 11 "strconv" 12 12 "strings" 13 + "sync" 13 14 "syscall" 14 15 "time" 15 16 ··· 42 43 "github.com/prometheus/client_golang/prometheus" 43 44 "github.com/prometheus/client_golang/prometheus/promauto" 44 45 "github.com/prometheus/client_golang/prometheus/promhttp" 46 + "gorm.io/driver/postgres" 45 47 "gorm.io/driver/sqlite" 46 48 "gorm.io/gorm" 47 49 ··· 90 92 Usage: "hostname of this server (forward *.hostname DNS records to this server)", 91 93 Value: "supercollider.jazco.io", 92 94 EnvVars: []string{"SUPERCOLLIDER_HOST"}, 95 + }, 96 + &cli.StringFlag{ 97 + Name: "postgres-url", 98 + Usage: "postgres connection string for CarDB (if not set, will use sqlite in-memory)", 99 + EnvVars: []string{"SUPERCOLLIDER_POSTGRES_URL"}, 93 100 }, 94 101 &cli.BoolFlag{ 95 102 Name: "use-ssl", ··· 166 173 em := events.NewEventManager(events.NewYoloPersister()) 167 174 168 175 // Configure the repomanager and keypair for our fake accounts 169 - repoman, privkey, err := initSpeedyRepoMan() 176 + repoman, privkey, err := initSpeedyRepoMan(cctx.String("postgres-url")) 170 177 if err != nil { 171 178 log.Fatalf("failed to init repo manager: %+v\n", err) 172 179 } ··· 253 260 } 254 261 } 255 262 256 - go s.EventGenerationLoop(ctx) 263 + go s.EventGenerationLoop(ctx, cctx.String("postgres-url") != "") 257 264 258 265 listenAddress := fmt.Sprintf(":%d", port) 259 266 if cctx.Bool("use-ssl") { ··· 285 292 return db, nil 286 293 } 287 294 295 + // Configure a Postgres SqliteDB 296 + func setupPostgresDb(p string) (*gorm.DB, error) { 297 + db, err := gorm.Open(postgres.Open(p), &gorm.Config{}) 298 + if err != nil { 299 + return nil, fmt.Errorf("failed to open db: %w", err) 300 + } 301 + 302 + return db, nil 303 + } 304 + 288 305 // Stand up a Repo Manager with a Web DID Resolver 289 - func initSpeedyRepoMan() (*repomgr.RepoManager, *godid.PrivKey, error) { 306 + func initSpeedyRepoMan(postgresString string) (*repomgr.RepoManager, *godid.PrivKey, error) { 290 307 dir, err := os.MkdirTemp("", "supercollider") 291 308 if err != nil { 292 309 return nil, nil, err 293 310 } 294 311 295 - cardb, err := setupDb("file::memory:?cache=shared") 296 - if err != nil { 297 - return nil, nil, err 312 + var cardb *gorm.DB 313 + if postgresString != "" { 314 + cardb, err = setupPostgresDb(postgresString) 315 + if err != nil { 316 + return nil, nil, err 317 + } 318 + } else { 319 + cardb, err = setupDb("file::memory:?cache=shared") 320 + if err != nil { 321 + return nil, nil, err 322 + } 298 323 } 299 324 300 325 cspath := filepath.Join(dir, "carstore") ··· 362 387 // Event Generation Loop and Control 363 388 364 389 // EventGenerationLoop is the main loop for generating events 365 - func (s *Server) EventGenerationLoop(ctx context.Context) { 390 + func (s *Server) EventGenerationLoop(ctx context.Context, concurrent bool) { 366 391 running := false 367 392 totalEmittedEvents := 0 368 393 ··· 397 422 } 398 423 } 399 424 400 - for i := 0; i < s.TotalDesiredEvents; i++ { 401 - totalEmittedEvents++ 402 - if i%40_000 == 0 { 403 - s.Logger.Infof("emitted %d events\n", totalEmittedEvents) 404 - } 405 - 406 - // Wait for the limiter to allow us to emit another event 407 - limiter.Wait(ctx) 408 - 409 - _, _, err := s.RepoManager.CreateRecord(ctx, models.Uid(i%len(s.Dids)+1), "app.bsky.feed.post", &bsky.FeedPost{ 410 - Text: "cats", 411 - }) 412 - if err != nil { 413 - s.Logger.Errorf("failed to create record: %+v\n", err) 414 - } else { 415 - eventsGeneratedCounter.Inc() 425 + if concurrent { 426 + recordsPerActor := s.TotalDesiredEvents / len(s.Dids) 427 + wg := sync.WaitGroup{} 428 + for i := 0; i < len(s.Dids); i++ { 429 + wg.Add(1) 430 + go func(i int) { 431 + for j := 0; j < recordsPerActor; j++ { 432 + limiter.Wait(ctx) 433 + _, _, err := s.RepoManager.CreateRecord(ctx, models.Uid(i+1), "app.bsky.feed.post", &bsky.FeedPost{ 434 + Text: "cats", 435 + }) 436 + if err != nil { 437 + s.Logger.Errorf("failed to create record: %+v\n", err) 438 + } else { 439 + eventsGeneratedCounter.Inc() 440 + } 441 + select { 442 + case <-ctx.Done(): 443 + return 444 + default: 445 + } 446 + } 447 + }(i) 416 448 } 417 - select { 418 - case <-ctx.Done(): 419 - return 420 - default: 449 + wg.Wait() 450 + } else { 451 + for i := 0; i < s.TotalDesiredEvents; i++ { 452 + limiter.Wait(ctx) 453 + _, _, err := s.RepoManager.CreateRecord(ctx, models.Uid(i%len(s.Dids)+1), "app.bsky.feed.post", &bsky.FeedPost{ 454 + Text: "cats", 455 + }) 456 + if err != nil { 457 + s.Logger.Errorf("failed to create record: %+v\n", err) 458 + } else { 459 + eventsGeneratedCounter.Inc() 460 + } 461 + select { 462 + case <-ctx.Done(): 463 + return 464 + default: 465 + } 421 466 } 422 467 } 468 + 423 469 s.Logger.Infof("emitted %d events, stopping\n", totalEmittedEvents) 424 470 s.EventControl <- "stop" 425 471 break ··· 503 549 504 550 ctx := c.Request().Context() 505 551 506 - evts, cancel, err := s.Events.Subscribe(ctx, func(evt *events.XRPCStreamEvent) bool { 552 + ident := c.Request().RemoteAddr + "-" + c.Request().UserAgent() 553 + 554 + evts, cancel, err := s.Events.Subscribe(ctx, ident, func(evt *events.XRPCStreamEvent) bool { 507 555 return true 508 556 }, cursor) 509 557 if err != nil {
+25 -1
events/consumer.go
··· 3 3 import ( 4 4 "context" 5 5 "fmt" 6 + "io" 6 7 "time" 7 8 8 9 comatproto "github.com/bluesky-social/indigo/api/atproto" 9 10 label "github.com/bluesky-social/indigo/api/label" 11 + "github.com/prometheus/client_golang/prometheus" 10 12 11 13 "github.com/gorilla/websocket" 12 14 ) ··· 45 47 } 46 48 } 47 49 50 + type instrumentedReader struct { 51 + r io.Reader 52 + addr string 53 + bytesCounter prometheus.Counter 54 + } 55 + 56 + func (sr *instrumentedReader) Read(p []byte) (int, error) { 57 + n, err := sr.r.Read(p) 58 + sr.bytesCounter.Add(float64(n)) 59 + return n, err 60 + } 61 + 48 62 func HandleRepoStream(ctx context.Context, con *websocket.Conn, sched Scheduler) error { 49 63 ctx, cancel := context.WithCancel(ctx) 50 64 defer cancel() 65 + 66 + remoteAddr := con.RemoteAddr().String() 51 67 52 68 go func() { 53 69 t := time.NewTicker(time.Second * 30) ··· 73 89 return ctx.Err() 74 90 default: 75 91 } 76 - mt, r, err := con.NextReader() 92 + mt, rawReader, err := con.NextReader() 77 93 if err != nil { 78 94 return err 79 95 } ··· 85 101 // ok 86 102 } 87 103 104 + r := &instrumentedReader{ 105 + r: rawReader, 106 + addr: remoteAddr, 107 + bytesCounter: bytesFromStreamCounter.WithLabelValues(remoteAddr), 108 + } 109 + 88 110 var header EventHeader 89 111 if err := header.UnmarshalCBOR(r); err != nil { 90 112 return fmt.Errorf("reading header: %w", err) 91 113 } 114 + 115 + eventsFromStreamCounter.WithLabelValues(remoteAddr).Inc() 92 116 93 117 switch header.Op { 94 118 case EvtKindMessage:
+14 -4
events/events.go
··· 9 9 comatproto "github.com/bluesky-social/indigo/api/atproto" 10 10 label "github.com/bluesky-social/indigo/api/label" 11 11 "github.com/bluesky-social/indigo/models" 12 + "github.com/prometheus/client_golang/prometheus" 12 13 13 14 logging "github.com/ipfs/go-log" 14 15 "go.opentelemetry.io/otel" ··· 59 60 // directly to the bgs, and have rebroadcasting proxies instead 60 61 for _, s := range em.subs { 61 62 if s.filter(evt) { 63 + s.enqueuedCounter.Inc() 62 64 select { 63 65 case s.outgoing <- evt: 64 66 case <-s.done: ··· 68 70 default: 69 71 log.Warnf("event overflow (%d)", len(s.outgoing)) 70 72 } 73 + s.broadcastCounter.Inc() 71 74 } 72 75 } 73 76 } ··· 84 87 filter func(*XRPCStreamEvent) bool 85 88 86 89 done chan struct{} 90 + 91 + ident string 92 + enqueuedCounter prometheus.Counter 93 + broadcastCounter prometheus.Counter 87 94 } 88 95 89 96 const ( ··· 127 134 128 135 var ErrPlaybackShutdown = fmt.Errorf("playback shutting down") 129 136 130 - func (em *EventManager) Subscribe(ctx context.Context, filter func(*XRPCStreamEvent) bool, since *int64) (<-chan *XRPCStreamEvent, func(), error) { 137 + func (em *EventManager) Subscribe(ctx context.Context, ident string, filter func(*XRPCStreamEvent) bool, since *int64) (<-chan *XRPCStreamEvent, func(), error) { 131 138 if filter == nil { 132 139 filter = func(*XRPCStreamEvent) bool { return true } 133 140 } 134 141 135 142 done := make(chan struct{}) 136 143 sub := &Subscriber{ 137 - outgoing: make(chan *XRPCStreamEvent, em.bufferSize), 138 - filter: filter, 139 - done: done, 144 + ident: ident, 145 + outgoing: make(chan *XRPCStreamEvent, em.bufferSize), 146 + filter: filter, 147 + done: done, 148 + enqueuedCounter: eventsEnqueued.WithLabelValues(ident), 149 + broadcastCounter: eventsBroadcast.WithLabelValues(ident), 140 150 } 141 151 142 152 go func() {
+41
events/metrics.go
··· 1 + package events 2 + 3 + import ( 4 + "github.com/prometheus/client_golang/prometheus" 5 + "github.com/prometheus/client_golang/prometheus/promauto" 6 + ) 7 + 8 + var eventsFromStreamCounter = promauto.NewCounterVec(prometheus.CounterOpts{ 9 + Name: "repo_stream_events_received_total", 10 + Help: "Total number of events received from the stream", 11 + }, []string{"remote_addr"}) 12 + 13 + var bytesFromStreamCounter = promauto.NewCounterVec(prometheus.CounterOpts{ 14 + Name: "repo_stream_bytes_total", 15 + Help: "Total bytes received from the stream", 16 + }, []string{"remote_addr"}) 17 + 18 + var workItemsAdded = promauto.NewCounterVec(prometheus.CounterOpts{ 19 + Name: "work_items_added_total", 20 + Help: "Total number of work items added to the consumer pool", 21 + }, []string{"pool"}) 22 + 23 + var workItemsProcessed = promauto.NewCounterVec(prometheus.CounterOpts{ 24 + Name: "work_items_processed_total", 25 + Help: "Total number of work items processed by the consumer pool", 26 + }, []string{"pool"}) 27 + 28 + var workItemsActive = promauto.NewCounterVec(prometheus.CounterOpts{ 29 + Name: "work_items_active_total", 30 + Help: "Total number of work items passed into a worker", 31 + }, []string{"pool"}) 32 + 33 + var eventsEnqueued = promauto.NewCounterVec(prometheus.CounterOpts{ 34 + Name: "events_enqueued_for_broadcast_total", 35 + Help: "Total number of events enqueued to broadcast to subscribers", 36 + }, []string{"pool"}) 37 + 38 + var eventsBroadcast = promauto.NewCounterVec(prometheus.CounterOpts{ 39 + Name: "events_broadcast_total", 40 + Help: "Total number of events broadcast to subscribers", 41 + }, []string{"pool"})
+8 -1
events/parallel.go
··· 27 27 28 28 lk sync.Mutex 29 29 active map[string][]*consumerTask 30 + 31 + ident string 30 32 } 31 33 32 - func NewConsumerPool(maxC, maxQ int, do func(context.Context, *XRPCStreamEvent) error) *ParallelConsumerPool { 34 + func NewConsumerPool(maxC, maxQ int, ident string, do func(context.Context, *XRPCStreamEvent) error) *ParallelConsumerPool { 33 35 p := &ParallelConsumerPool{ 34 36 maxConcurrency: maxC, 35 37 maxQueue: maxQ, ··· 38 40 39 41 feeder: make(chan *consumerTask), 40 42 active: make(map[string][]*consumerTask), 43 + 44 + ident: ident, 41 45 } 42 46 43 47 for i := 0; i < maxC; i++ { ··· 53 57 } 54 58 55 59 func (p *ParallelConsumerPool) AddWork(ctx context.Context, repo string, val *XRPCStreamEvent) error { 60 + workItemsAdded.WithLabelValues(p.ident).Inc() 56 61 t := &consumerTask{ 57 62 repo: repo, 58 63 val: val, ··· 80 85 func (p *ParallelConsumerPool) worker() { 81 86 for work := range p.feeder { 82 87 for work != nil { 88 + workItemsActive.WithLabelValues(p.ident).Inc() 83 89 if err := p.do(context.TODO(), work.val); err != nil { 84 90 log.Errorf("event handler failed: %s", err) 85 91 } 92 + workItemsProcessed.WithLabelValues(p.ident).Inc() 86 93 87 94 p.lk.Lock() 88 95 rem, ok := p.active[work.repo]
+3 -1
labeler/ws_endpoints.go
··· 28 28 return fmt.Errorf("upgrading websocket: %w", err) 29 29 } 30 30 31 - evts, cancel, err := s.evtmgr.Subscribe(ctx, func(evt *events.XRPCStreamEvent) bool { 31 + ident := c.RealIP() + "-" + c.Request().UserAgent() 32 + 33 + evts, cancel, err := s.evtmgr.Subscribe(ctx, ident, func(evt *events.XRPCStreamEvent) bool { 32 34 return true 33 35 }, since) 34 36 if err != nil {
+3 -1
pds/server.go
··· 618 618 619 619 ctx := c.Request().Context() 620 620 621 - evts, cancel, err := s.events.Subscribe(ctx, func(evt *events.XRPCStreamEvent) bool { 621 + ident := c.RealIP() + "-" + c.Request().UserAgent() 622 + 623 + evts, cancel, err := s.events.Subscribe(ctx, ident, func(evt *events.XRPCStreamEvent) bool { 622 624 if !s.enforcePeering { 623 625 return true 624 626 }
+8
repomgr/memheadstore.go
··· 3 3 import ( 4 4 "context" 5 5 "fmt" 6 + "sync" 6 7 7 8 "github.com/bluesky-social/indigo/models" 8 9 ··· 11 12 12 13 type MemHeadStore struct { 13 14 heads map[models.Uid]cid.Cid 15 + lk sync.RWMutex 14 16 } 15 17 16 18 func NewMemHeadStore() *MemHeadStore { ··· 20 22 } 21 23 22 24 func (hs *MemHeadStore) GetUserRepoHead(ctx context.Context, user models.Uid) (cid.Cid, error) { 25 + hs.lk.RLock() 26 + defer hs.lk.RUnlock() 23 27 h, ok := hs.heads[user] 24 28 if !ok { 25 29 return cid.Undef, fmt.Errorf("user head not found") ··· 29 33 } 30 34 31 35 func (hs *MemHeadStore) UpdateUserRepoHead(ctx context.Context, user models.Uid, root cid.Cid) error { 36 + hs.lk.Lock() 37 + defer hs.lk.Unlock() 32 38 _, ok := hs.heads[user] 33 39 if !ok { 34 40 return fmt.Errorf("cannot update user head if it doesnt exist already") ··· 39 45 } 40 46 41 47 func (hs *MemHeadStore) InitUser(ctx context.Context, user models.Uid, root cid.Cid) error { 48 + hs.lk.Lock() 49 + defer hs.lk.Unlock() 42 50 _, ok := hs.heads[user] 43 51 if ok { 44 52 return fmt.Errorf("cannot init user head if it exists already")
+1 -1
search/server.go
··· 195 195 }, 196 196 } 197 197 198 - return events.HandleRepoStream(ctx, con, events.NewConsumerPool(8, 32, rsc.EventHandler)) 198 + return events.HandleRepoStream(ctx, con, events.NewConsumerPool(8, 32, s.bgshost, rsc.EventHandler)) 199 199 } 200 200 201 201 func (s *Server) handleOp(ctx context.Context, op repomgr.EventKind, seq int64, path string, did string, rcid *cid.Cid, rec any) error {