this repo has no description
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

update metrics code

+73 -21
+28 -6
cmd/domesday/metrics.go
··· 6 6 ) 7 7 8 8 var handleCacheHits = promauto.NewCounter(prometheus.CounterOpts{ 9 - Name: "atproto_redis_resolver_handle_cache_hits", 9 + Name: "atproto_resolver_handle_cache_hits", 10 10 Help: "Number of cache hits for ATProto handle resolutions", 11 11 }) 12 12 13 13 var handleCacheMisses = promauto.NewCounter(prometheus.CounterOpts{ 14 - Name: "atproto_redis_resolver_handle_cache_misses", 14 + Name: "atproto_resolver_handle_cache_misses", 15 15 Help: "Number of cache misses for ATProto handle resolutions", 16 16 }) 17 17 18 18 var handleRequestsCoalesced = promauto.NewCounter(prometheus.CounterOpts{ 19 - Name: "atproto_redis_resolver_handle_requests_coalesced", 19 + Name: "atproto_resolver_handle_requests_coalesced", 20 20 Help: "Number of handle requests coalesced", 21 21 }) 22 + 23 + var handleResolutionErrors = promauto.NewCounter(prometheus.CounterOpts{ 24 + Name: "atproto_resolver_handle_resolution_errors", 25 + Help: "Number of non-cached handle resolution errors", 26 + }) 27 + 28 + var handleResolveDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{ 29 + Name: "atproto_resolver_handle_duration", 30 + Help: "Time to resolve a handle from network (not cached)", 31 + Buckets: prometheus.ExponentialBucketsRange(0.001, 2, 15), 32 + }, []string{"status"}) 22 33 23 34 var didCacheHits = promauto.NewCounter(prometheus.CounterOpts{ 24 - Name: "atproto_redis_resolver_did_cache_hits", 35 + Name: "atproto_resolver_did_cache_hits", 25 36 Help: "Number of cache hits for ATProto DID resolutions", 26 37 }) 27 38 28 39 var didCacheMisses = promauto.NewCounter(prometheus.CounterOpts{ 29 - Name: "atproto_redis_resolver_did_cache_misses", 40 + Name: "atproto_resolver_did_cache_misses", 30 41 Help: "Number of cache misses for ATProto DID resolutions", 31 42 }) 32 43 33 44 var didRequestsCoalesced = promauto.NewCounter(prometheus.CounterOpts{ 34 - Name: "atproto_redis_resolver_did_requests_coalesced", 45 + Name: "atproto_resolver_did_requests_coalesced", 35 46 Help: "Number of DID requests coalesced", 36 47 }) 48 + 49 + var didResolutionErrors = promauto.NewCounter(prometheus.CounterOpts{ 50 + Name: "atproto_resolver_did_resolution_errors", 51 + Help: "Number of non-cached DID resolution errors", 52 + }) 53 + 54 + var didResolveDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{ 55 + Name: "atproto_resolver_did_duration", 56 + Help: "Time to resolve a DID from network (not cached)", 57 + Buckets: prometheus.ExponentialBucketsRange(0.001, 2, 15), 58 + }, []string{"status"})
+31 -3
cmd/domesday/resolver.go
··· 26 26 ErrTTL time.Duration 27 27 HitTTL time.Duration 28 28 InvalidHandleTTL time.Duration 29 + Logger *slog.Logger 29 30 30 31 handleCache *cache.Cache 31 32 didCache *cache.Cache ··· 99 100 } 100 101 101 102 func (d *RedisResolver) refreshHandle(ctx context.Context, h syntax.Handle) handleEntry { 103 + start := time.Now() 102 104 did, err := d.Inner.ResolveHandle(ctx, h) 105 + duration := time.Since(start) 106 + 107 + if err != nil { 108 + d.Logger.Info("handle resolution failed", "handle", h, "duration", duration, "err", err) 109 + handleResolutionErrors.Inc() 110 + handleResolveDuration.WithLabelValues("fail").Observe(time.Since(start).Seconds()) 111 + } else { 112 + handleResolveDuration.WithLabelValues("success").Observe(time.Since(start).Seconds()) 113 + } 114 + if duration.Seconds() > 5.0 { 115 + d.Logger.Info("slow handle resolution", "handle", h, "duration", duration) 116 + } 117 + 103 118 he := handleEntry{ 104 119 Updated: time.Now(), 105 120 DID: &did, ··· 112 127 TTL: d.ErrTTL, 113 128 }) 114 129 if err != nil { 115 - slog.Error("identity cache write failed", "cache", "handle", "err", err) 130 + d.Logger.Error("identity cache write failed", "cache", "handle", "err", err) 116 131 } 117 132 return he 118 133 } 119 134 120 135 func (d *RedisResolver) refreshDID(ctx context.Context, did syntax.DID) didEntry { 121 - 136 + start := time.Now() 122 137 rawDoc, err := d.Inner.ResolveDIDRaw(ctx, did) 138 + duration := time.Since(start) 139 + 140 + if err != nil { 141 + d.Logger.Info("DID resolution failed", "did", did, "duration", duration, "err", err) 142 + didResolutionErrors.Inc() 143 + didResolveDuration.WithLabelValues("fail").Observe(time.Since(start).Seconds()) 144 + } else { 145 + didResolveDuration.WithLabelValues("success").Observe(time.Since(start).Seconds()) 146 + } 147 + if duration.Seconds() > 5.0 { 148 + d.Logger.Info("slow DID resolution", "did", did, "duration", duration) 149 + } 150 + 123 151 // persist the DID lookup error, instead of processing it immediately 124 152 entry := didEntry{ 125 153 Updated: time.Now(), ··· 134 162 TTL: d.HitTTL, 135 163 }) 136 164 if err != nil { 137 - slog.Error("DID cache write failed", "cache", "did", "did", did, "err", err) 165 + d.Logger.Error("DID cache write failed", "cache", "did", "did", did, "err", err) 138 166 } 139 167 return entry 140 168 }
+14 -12
cmd/domesday/server.go
··· 13 13 "time" 14 14 15 15 "github.com/bluesky-social/indigo/atproto/identity" 16 - //"github.com/bluesky-social/indigo/atproto/identity/redisdir" 17 16 18 17 "github.com/labstack/echo/v4" 19 18 "github.com/labstack/echo/v4/middleware" ··· 83 82 if err != nil { 84 83 return nil, err 85 84 } 85 + redisDir.Logger = logger 86 86 87 87 // configure redis client (for firehose consumer) 88 88 redisOpt, err := redis.ParseURL(config.RedisURL) ··· 149 149 } 150 150 151 151 func (srv *Server) RunAPI() error { 152 - slog.Info("starting server", "bind", srv.httpd.Addr) 152 + srv.logger.Info("starting server", "bind", srv.httpd.Addr) 153 153 go func() { 154 154 if err := srv.httpd.ListenAndServe(); err != nil { 155 155 if !errors.Is(err, http.ErrServerClosed) { 156 - slog.Error("HTTP server shutting down unexpectedly", "err", err) 156 + srv.logger.Error("HTTP server shutting down unexpectedly", "err", err) 157 157 } 158 158 } 159 159 }() 160 160 161 161 // Wait for a signal to exit. 162 - slog.Info("registering OS exit signal handler") 162 + srv.logger.Info("registering OS exit signal handler") 163 163 quit := make(chan struct{}) 164 164 exitSignals := make(chan os.Signal, 1) 165 165 signal.Notify(exitSignals, syscall.SIGINT, syscall.SIGTERM) 166 166 go func() { 167 167 sig := <-exitSignals 168 - slog.Info("received OS exit signal", "signal", sig) 168 + srv.logger.Info("received OS exit signal", "signal", sig) 169 169 170 170 // Shut down the HTTP server 171 171 if err := srv.Shutdown(); err != nil { 172 - slog.Error("HTTP server shutdown error", "err", err) 172 + srv.logger.Error("HTTP server shutdown error", "err", err) 173 173 } 174 174 175 175 // Trigger the return that causes an exit. 176 176 close(quit) 177 177 }() 178 178 <-quit 179 - slog.Info("graceful shutdown complete") 179 + srv.logger.Info("graceful shutdown complete") 180 180 return nil 181 181 } 182 182 183 - func (srv *Server) RunMetrics(listen string) error { 184 - http.Handle("/metrics", promhttp.Handler()) 185 - return http.ListenAndServe(listen, nil) 183 + func (srv *Server) RunMetrics(bind string) error { 184 + p := "/metrics" 185 + srv.logger.Info("starting metrics endpoint", "bind", bind, "path", p) 186 + http.Handle(p, promhttp.Handler()) 187 + return http.ListenAndServe(bind, nil) 186 188 } 187 189 188 190 func (srv *Server) Shutdown() error { 189 - slog.Info("shutting down") 191 + srv.logger.Info("shutting down") 190 192 191 193 ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) 192 194 defer cancel() ··· 207 209 errorMessage = fmt.Sprintf("%s", he.Message) 208 210 } 209 211 if code >= 500 { 210 - slog.Warn("domesday-http-internal-error", "err", err) 212 + srv.logger.Warn("domesday-http-internal-error", "err", err) 211 213 } 212 214 if !c.Response().Committed { 213 215 c.JSON(code, GenericError{Error: "InternalError", Message: errorMessage})