alf: the atproto Latency Fabric
alf.fly.dev/
1// ABOUTME: Prometheus metrics for ALF (Atproto Latency Fabric) service monitoring
2
3import { Registry, Gauge, Counter, Histogram, collectDefaultMetrics } from 'prom-client';
4
5// Create a custom registry for this service
6export const registry = new Registry();
7
8// Collect default Node.js metrics (memory, CPU, event loop, etc.)
9collectDefaultMetrics({
10 register: registry,
11 prefix: 'alf_',
12});
13
14// ============================================================================
15// DRAFT OPERATION METRICS
16// ============================================================================
17
18export const draftsCreatedTotal = new Counter({
19 name: 'alf_drafts_created_total',
20 help: 'Total drafts created',
21 labelNames: ['collection'] as const,
22 registers: [registry],
23});
24
25export const draftsUpdatedTotal = new Counter({
26 name: 'alf_drafts_updated_total',
27 help: 'Total drafts updated',
28 registers: [registry],
29});
30
31export const draftsCancelledTotal = new Counter({
32 name: 'alf_drafts_cancelled_total',
33 help: 'Total drafts cancelled',
34 registers: [registry],
35});
36
37// ============================================================================
38// BLOB STORAGE METRICS
39// ============================================================================
40
41export const blobsStoredTotal = new Counter({
42 name: 'alf_blobs_stored_total',
43 help: 'Total blobs stored in the ALF blob store',
44 registers: [registry],
45});
46
47export const blobSizeBytes = new Histogram({
48 name: 'alf_blob_size_bytes',
49 help: 'Distribution of stored blob sizes in bytes',
50 buckets: [1024, 10240, 102400, 512000, 1048576, 2097152, 5242880],
51 registers: [registry],
52});
53
54// ============================================================================
55// SCHEDULER METRICS
56// ============================================================================
57
58export const schedulerWakeupsTotal = new Counter({
59 name: 'alf_scheduler_wakeups_total',
60 help: 'Total scheduler wakeup polls',
61 registers: [registry],
62});
63
64export const schedulerPublishesTotal = new Counter({
65 name: 'alf_scheduler_publishes_total',
66 help: 'Total publish attempts',
67 labelNames: ['result'] as const, // 'success', 'retry', 'failed', 'skipped'
68 registers: [registry],
69});
70
71export const publishDurationSeconds = new Histogram({
72 name: 'alf_publish_duration_seconds',
73 help: 'Duration of individual draft publish operations in seconds',
74 buckets: [0.1, 0.5, 1, 2, 5, 10, 30, 60],
75 registers: [registry],
76});
77
78// ============================================================================
79// AUTHENTICATION METRICS
80// ============================================================================
81
82export const authVerificationsTotal = new Counter({
83 name: 'alf_auth_verifications_total',
84 help: 'Total authentication verification attempts',
85 labelNames: ['result'] as const, // 'success', 'failure'
86 registers: [registry],
87});
88
89// ============================================================================
90// HTTP SERVER METRICS
91// ============================================================================
92
93export const httpRequestsTotal = new Counter({
94 name: 'alf_http_requests_total',
95 help: 'Total HTTP requests',
96 labelNames: ['method', 'endpoint', 'status_code'] as const,
97 registers: [registry],
98});
99
100export const httpRequestDuration = new Histogram({
101 name: 'alf_http_request_duration_seconds',
102 help: 'HTTP request duration in seconds',
103 labelNames: ['method', 'endpoint'] as const,
104 buckets: [0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 2, 5, 10],
105 registers: [registry],
106});
107
108// ============================================================================
109// SYSTEM HEALTH METRICS
110// ============================================================================
111
112export const serverState = new Gauge({
113 name: 'alf_server_state',
114 help: 'Server state (0=down, 1=up)',
115 registers: [registry],
116});
117
118export const startupTimestamp = new Gauge({
119 name: 'alf_startup_timestamp',
120 help: 'Unix timestamp of last server startup',
121 registers: [registry],
122});