alf: the atproto Latency Fabric alf.fly.dev/
7
fork

Configure Feed

Select the types of activity you want to include in your feed.

at main 122 lines 4.0 kB view raw
1// ABOUTME: Prometheus metrics for ALF (Atproto Latency Fabric) service monitoring 2 3import { Registry, Gauge, Counter, Histogram, collectDefaultMetrics } from 'prom-client'; 4 5// Create a custom registry for this service 6export const registry = new Registry(); 7 8// Collect default Node.js metrics (memory, CPU, event loop, etc.) 9collectDefaultMetrics({ 10 register: registry, 11 prefix: 'alf_', 12}); 13 14// ============================================================================ 15// DRAFT OPERATION METRICS 16// ============================================================================ 17 18export const draftsCreatedTotal = new Counter({ 19 name: 'alf_drafts_created_total', 20 help: 'Total drafts created', 21 labelNames: ['collection'] as const, 22 registers: [registry], 23}); 24 25export const draftsUpdatedTotal = new Counter({ 26 name: 'alf_drafts_updated_total', 27 help: 'Total drafts updated', 28 registers: [registry], 29}); 30 31export const draftsCancelledTotal = new Counter({ 32 name: 'alf_drafts_cancelled_total', 33 help: 'Total drafts cancelled', 34 registers: [registry], 35}); 36 37// ============================================================================ 38// BLOB STORAGE METRICS 39// ============================================================================ 40 41export const blobsStoredTotal = new Counter({ 42 name: 'alf_blobs_stored_total', 43 help: 'Total blobs stored in the ALF blob store', 44 registers: [registry], 45}); 46 47export const blobSizeBytes = new Histogram({ 48 name: 'alf_blob_size_bytes', 49 help: 'Distribution of stored blob sizes in bytes', 50 buckets: [1024, 10240, 102400, 512000, 1048576, 2097152, 5242880], 51 registers: [registry], 52}); 53 54// ============================================================================ 55// SCHEDULER METRICS 56// ============================================================================ 57 58export const schedulerWakeupsTotal = new Counter({ 59 name: 'alf_scheduler_wakeups_total', 60 help: 'Total scheduler wakeup polls', 61 registers: [registry], 62}); 63 64export const schedulerPublishesTotal = new Counter({ 65 name: 'alf_scheduler_publishes_total', 66 help: 'Total publish attempts', 67 labelNames: ['result'] as const, // 'success', 'retry', 'failed', 'skipped' 68 registers: [registry], 69}); 70 71export const publishDurationSeconds = new Histogram({ 72 name: 'alf_publish_duration_seconds', 73 help: 'Duration of individual draft publish operations in seconds', 74 buckets: [0.1, 0.5, 1, 2, 5, 10, 30, 60], 75 registers: [registry], 76}); 77 78// ============================================================================ 79// AUTHENTICATION METRICS 80// ============================================================================ 81 82export const authVerificationsTotal = new Counter({ 83 name: 'alf_auth_verifications_total', 84 help: 'Total authentication verification attempts', 85 labelNames: ['result'] as const, // 'success', 'failure' 86 registers: [registry], 87}); 88 89// ============================================================================ 90// HTTP SERVER METRICS 91// ============================================================================ 92 93export const httpRequestsTotal = new Counter({ 94 name: 'alf_http_requests_total', 95 help: 'Total HTTP requests', 96 labelNames: ['method', 'endpoint', 'status_code'] as const, 97 registers: [registry], 98}); 99 100export const httpRequestDuration = new Histogram({ 101 name: 'alf_http_request_duration_seconds', 102 help: 'HTTP request duration in seconds', 103 labelNames: ['method', 'endpoint'] as const, 104 buckets: [0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 2, 5, 10], 105 registers: [registry], 106}); 107 108// ============================================================================ 109// SYSTEM HEALTH METRICS 110// ============================================================================ 111 112export const serverState = new Gauge({ 113 name: 'alf_server_state', 114 help: 'Server state (0=down, 1=up)', 115 registers: [registry], 116}); 117 118export const startupTimestamp = new Gauge({ 119 name: 'alf_startup_timestamp', 120 help: 'Unix timestamp of last server startup', 121 registers: [registry], 122});