this repo has no description
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

Add an Autoscaling consumer pool to the events package

+238 -1
+2 -1
bgs/fedmgr.go
··· 10 10 11 11 comatproto "github.com/bluesky-social/indigo/api/atproto" 12 12 "github.com/bluesky-social/indigo/events" 13 + "github.com/bluesky-social/indigo/events/autoscaling" 13 14 "github.com/bluesky-social/indigo/models" 14 15 "go.opentelemetry.io/otel" 15 16 ··· 388 389 }, 389 390 } 390 391 391 - pool := events.NewConsumerPool(32, 20, con.RemoteAddr().String(), rsc.EventHandler) 392 + pool := autoscaling.NewConsumerPool(1, 360, con.RemoteAddr().String(), rsc.EventHandler) 392 393 return events.HandleRepoStream(ctx, con, pool) 393 394 } 394 395
+149
events/autoscaling/autoscaling.go
··· 1 + package autoscaling 2 + 3 + import ( 4 + "context" 5 + "sync" 6 + "time" 7 + 8 + "github.com/bluesky-social/indigo/events" 9 + "github.com/labstack/gommon/log" 10 + "github.com/prometheus/client_golang/prometheus" 11 + ) 12 + 13 + type ConsumerPool struct { 14 + concurrency int 15 + maxConcurrency int 16 + 17 + do func(context.Context, *events.XRPCStreamEvent) error 18 + 19 + feeder chan *consumerTask 20 + 21 + lk sync.Mutex 22 + active map[string][]*consumerTask 23 + 24 + ident string 25 + 26 + // metrics 27 + itemsAdded prometheus.Counter 28 + itemsProcessed prometheus.Counter 29 + itemsActive prometheus.Counter 30 + workersAcrive prometheus.Gauge 31 + 32 + // autoscaling 33 + throughputManager *ThroughputManager 34 + } 35 + 36 + func NewConsumerPool(concurrency, maxC int, ident string, do func(context.Context, *events.XRPCStreamEvent) error) *ConsumerPool { 37 + p := &ConsumerPool{ 38 + concurrency: concurrency, 39 + maxConcurrency: maxC, 40 + 41 + do: do, 42 + 43 + feeder: make(chan *consumerTask), 44 + active: make(map[string][]*consumerTask), 45 + 46 + ident: ident, 47 + 48 + itemsAdded: workItemsAdded.WithLabelValues(ident, "autoscaling"), 49 + itemsProcessed: workItemsProcessed.WithLabelValues(ident, "autoscaling"), 50 + itemsActive: workItemsActive.WithLabelValues(ident, "autoscaling"), 51 + 52 + // autoscaling 53 + // By default, the ThroughputManager will calculate the average throughput over the last 60 seconds. 54 + throughputManager: NewThroughputManager(60), 55 + } 56 + 57 + for i := 0; i < concurrency; i++ { 58 + go p.worker() 59 + } 60 + 61 + go p.autoscale() 62 + 63 + return p 64 + } 65 + 66 + // Add autoscaling function 67 + func (p *ConsumerPool) autoscale() { 68 + p.throughputManager.Start() 69 + tick := time.NewTicker(time.Second * 5) // adjust as needed 70 + for range tick.C { 71 + avg := p.throughputManager.AvgThroughput() 72 + if avg > float64(p.concurrency) && p.concurrency < p.maxConcurrency { 73 + p.concurrency++ 74 + go p.worker() 75 + } else if avg < float64(p.concurrency-1) && p.concurrency > 1 { 76 + p.concurrency-- 77 + p.feeder <- &consumerTask{signal: "stop"} 78 + } 79 + } 80 + } 81 + 82 + type consumerTask struct { 83 + repo string 84 + val *events.XRPCStreamEvent 85 + signal string 86 + } 87 + 88 + func (p *ConsumerPool) AddWork(ctx context.Context, repo string, val *events.XRPCStreamEvent) error { 89 + p.itemsAdded.Inc() 90 + p.throughputManager.Add(1) 91 + t := &consumerTask{ 92 + repo: repo, 93 + val: val, 94 + } 95 + p.lk.Lock() 96 + 97 + a, ok := p.active[repo] 98 + if ok { 99 + p.active[repo] = append(a, t) 100 + p.lk.Unlock() 101 + return nil 102 + } 103 + 104 + p.active[repo] = []*consumerTask{} 105 + p.lk.Unlock() 106 + 107 + select { 108 + case p.feeder <- t: 109 + return nil 110 + case <-ctx.Done(): 111 + return ctx.Err() 112 + } 113 + } 114 + 115 + func (p *ConsumerPool) worker() { 116 + log.Infof("starting autoscaling worker for %s", p.ident) 117 + p.workersAcrive.Inc() 118 + for work := range p.feeder { 119 + for work != nil { 120 + // Check if the work item contains a signal to stop the worker. 121 + if work.signal == "stop" { 122 + log.Infof("stopping autoscaling worker for %s", p.ident) 123 + p.workersAcrive.Dec() 124 + return 125 + } 126 + 127 + p.itemsActive.Inc() 128 + if err := p.do(context.TODO(), work.val); err != nil { 129 + log.Errorf("event handler failed: %s", err) 130 + } 131 + p.itemsProcessed.Inc() 132 + 133 + p.lk.Lock() 134 + rem, ok := p.active[work.repo] 135 + if !ok { 136 + log.Errorf("should always have an 'active' entry if a worker is processing a job") 137 + } 138 + 139 + if len(rem) == 0 { 140 + delete(p.active, work.repo) 141 + work = nil 142 + } else { 143 + work = rem[0] 144 + p.active[work.repo] = rem[1:] 145 + } 146 + p.lk.Unlock() 147 + } 148 + } 149 + }
+26
events/autoscaling/metrics.go
··· 1 + package autoscaling 2 + 3 + import ( 4 + "github.com/prometheus/client_golang/prometheus" 5 + "github.com/prometheus/client_golang/prometheus/promauto" 6 + ) 7 + 8 + var workItemsAdded = promauto.NewCounterVec(prometheus.CounterOpts{ 9 + Name: "indigo_pool_work_items_added_total", 10 + Help: "Total number of work items added to the consumer pool", 11 + }, []string{"pool", "pool_type"}) 12 + 13 + var workItemsProcessed = promauto.NewCounterVec(prometheus.CounterOpts{ 14 + Name: "indigo_pool_work_items_processed_total", 15 + Help: "Total number of work items processed by the consumer pool", 16 + }, []string{"pool", "pool_type"}) 17 + 18 + var workItemsActive = promauto.NewCounterVec(prometheus.CounterOpts{ 19 + Name: "indigo_pool_work_items_active_total", 20 + Help: "Total number of work items passed into a worker", 21 + }, []string{"pool", "pool_type"}) 22 + 23 + var workersActive = promauto.NewGaugeVec(prometheus.GaugeOpts{ 24 + Name: "indigo_pool_workers_active", 25 + Help: "Number of workers currently active", 26 + }, []string{"pool", "pool_type"})
+61
events/autoscaling/throughput.go
··· 1 + package autoscaling 2 + 3 + import ( 4 + "sync" 5 + "time" 6 + ) 7 + 8 + // ThroughputManager keeps track of the number of tasks processed per second over a specified interval. 9 + type ThroughputManager struct { 10 + mu sync.Mutex 11 + circular []int 12 + pos int 13 + sum int 14 + interval int 15 + } 16 + 17 + // NewThroughputManager creates a new ThroughputManager with the specified interval. 18 + func NewThroughputManager(interval int) *ThroughputManager { 19 + return &ThroughputManager{ 20 + circular: make([]int, interval), 21 + interval: interval, 22 + } 23 + } 24 + 25 + // Add increments the count of tasks processed in the current second. 26 + func (m *ThroughputManager) Add(n int) { 27 + m.mu.Lock() 28 + defer m.mu.Unlock() 29 + 30 + // increment the current position's value 31 + m.circular[m.pos] += n 32 + m.sum += n 33 + } 34 + 35 + // AvgThroughput returns the average number of tasks processed per second over the past interval. 36 + func (m *ThroughputManager) AvgThroughput() float64 { 37 + m.mu.Lock() 38 + defer m.mu.Unlock() 39 + 40 + return float64(m.sum) / float64(m.interval) 41 + } 42 + 43 + // shift shifts the position in the circular buffer every second, resetting the old value. 44 + func (m *ThroughputManager) shift() { 45 + tick := time.NewTicker(time.Second) 46 + for range tick.C { 47 + m.mu.Lock() 48 + 49 + m.pos = (m.pos + 1) % m.interval 50 + m.sum -= m.circular[m.pos] 51 + m.circular[m.pos] = 0 52 + 53 + m.mu.Unlock() 54 + } 55 + } 56 + 57 + // Start starts the ThroughputManager 58 + // It ticks every second, shifting the position in the circular buffer. 59 + func (m *ThroughputManager) Start() { 60 + go m.shift() 61 + }