perlsky is a Perl 5 implementation of an AT Protocol Personal Data Server.
13
fork

Configure Feed

Select the types of activity you want to include in your feed.

Expose dedicated XRPC error metrics

alice 584a007b 0f96f06d

+94 -50
+6
docs/METRICS.md
··· 14 14 Counts HTTP XRPC requests by method, NSID, endpoint type, and status. 15 15 - `perlsky_xrpc_request_duration_seconds` 16 16 Histogram for HTTP XRPC latency with the same labels. 17 + - `perlsky_xrpc_errors_total` 18 + Counts rendered XRPC failures by method, NSID, endpoint type, status, and error code. 19 + - `perlsky_xrpc_unhandled_exceptions_total` 20 + Counts true unhandled exceptions on XRPC routes by method, NSID, and endpoint type. 17 21 - `perlsky_subscription_connections_total` 18 22 Counts websocket subscription opens by NSID. 19 23 - `perlsky_subscription_active` ··· 74 78 ## Suggested Alerts 75 79 76 80 - high error rate on `perlsky_xrpc_requests_total` 81 + - spikes in `perlsky_xrpc_errors_total` for a specific `nsid` or `error` 82 + - any growth in `perlsky_xrpc_unhandled_exceptions_total` 77 83 - sustained increase in `perlsky_xrpc_request_duration_seconds` 78 84 - non-zero `perlsky_subscription_active` with no corresponding frame growth 79 85 - crawler errors from `perlsky_crawler_requests_total{result="error"}`
+10
lib/ATProto/PDS/Metrics.pm
··· 25 25 [0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10], 26 26 ); 27 27 $self->_register_counter( 28 + 'perlsky_xrpc_errors_total', 29 + 'Total rendered XRPC errors by method, endpoint, type, status, and error code.', 30 + [qw(method nsid endpoint_type status error)], 31 + ); 32 + $self->_register_counter( 33 + 'perlsky_xrpc_unhandled_exceptions_total', 34 + 'Total unhandled internal exceptions on XRPC routes by method, endpoint, and type.', 35 + [qw(method nsid endpoint_type)], 36 + ); 37 + $self->_register_counter( 28 38 'perlsky_service_proxy_requests_total', 29 39 'Total service-proxy requests handled locally or upstream by NSID, source, and status.', 30 40 [qw(nsid source status)],
+40 -50
lib/ATProto/PDS/XRPC/Dispatcher.pm
··· 78 78 $labels, 79 79 ); 80 80 }; 81 + my $observe_error = sub ($status, $error, $endpoint_type = 'unknown', $error_nsid = $c->stash('nsid') // 'unknown') { 82 + $c->app->metrics->increment_counter('perlsky_xrpc_errors_total', 1, { 83 + method => $method, 84 + nsid => $error_nsid, 85 + endpoint_type => $endpoint_type, 86 + status => $status, 87 + error => $error, 88 + }); 89 + }; 90 + my $render_error = sub ($status, $error, $message, $endpoint_type = 'unknown', $error_nsid = $c->stash('nsid') // 'unknown') { 91 + $finish_metrics->($status, $endpoint_type, $error_nsid); 92 + $observe_error->($status, $error, $endpoint_type, $error_nsid); 93 + return $c->render( 94 + status => $status, 95 + json => { 96 + error => $error, 97 + message => $message, 98 + }, 99 + ); 100 + }; 101 + my $render_internal_error = sub ($err, $endpoint_type = 'unknown', $error_nsid = $c->stash('nsid') // 'unknown') { 102 + my $message = "$err"; 103 + chomp $message; 104 + $c->app->log->error("Unhandled XRPC exception for $error_nsid: $message"); 105 + $c->app->metrics->increment_counter('perlsky_xrpc_unhandled_exceptions_total', 1, { 106 + method => $method, 107 + nsid => $error_nsid, 108 + endpoint_type => $endpoint_type, 109 + }); 110 + return $render_error->(500, 'InternalServerError', 'Internal server error', $endpoint_type, $error_nsid); 111 + }; 81 112 82 113 my $endpoint = $by_id{$nsid}; 83 114 unless ($endpoint) { 84 115 my $proxied_status = eval { $c->service_proxy->proxy_xrpc_request($c, $nsid) }; 85 116 if (my $err = $@) { 86 117 if (ref($err) eq 'HASH' && $err->{error}) { 87 - $finish_metrics->($err->{status} // 400, 'proxy', $nsid); 88 - return $c->render( 89 - status => $err->{status} // 400, 90 - json => { 91 - error => $err->{error}, 92 - message => $err->{message} // $err->{error}, 93 - }, 94 - ); 118 + return $render_error->($err->{status} // 400, $err->{error}, $err->{message} // $err->{error}, 'proxy', $nsid); 95 119 } 96 - die $err; 120 + return $render_internal_error->($err, 'proxy', $nsid); 97 121 } 98 122 99 123 if (defined $proxied_status) { ··· 101 125 return; 102 126 } 103 127 104 - $finish_metrics->(404); 105 - return $c->render( 106 - status => 404, 107 - json => { 108 - error => 'UnknownMethod', 109 - message => 'Unknown XRPC method', 110 - }, 111 - ); 128 + return $render_error->(404, 'UnknownMethod', 'Unknown XRPC method'); 112 129 } 113 130 114 131 if ($endpoint->{type} eq 'subscription') { 115 - $finish_metrics->(426, $endpoint->{type}, $endpoint->{id}); 116 - return $c->render( 117 - status => 426, 118 - json => { 119 - error => 'UpgradeRequired', 120 - message => "$endpoint->{id} requires a websocket upgrade", 121 - }, 122 - ); 132 + return $render_error->(426, 'UpgradeRequired', "$endpoint->{id} requires a websocket upgrade", $endpoint->{type}, $endpoint->{id}); 123 133 } 124 134 125 135 if ($endpoint->{type} eq 'query' && $c->req->method ne 'GET') { 126 - $finish_metrics->(405, $endpoint->{type}, $endpoint->{id}); 127 - return $c->render( 128 - status => 405, 129 - json => { 130 - error => 'MethodNotAllowed', 131 - message => "$endpoint->{id} expects GET", 132 - }, 133 - ); 136 + return $render_error->(405, 'MethodNotAllowed', "$endpoint->{id} expects GET", $endpoint->{type}, $endpoint->{id}); 134 137 } 135 138 136 139 if ($endpoint->{type} eq 'procedure' && $c->req->method ne 'POST') { 137 - $finish_metrics->(405, $endpoint->{type}, $endpoint->{id}); 138 - return $c->render( 139 - status => 405, 140 - json => { 141 - error => 'MethodNotAllowed', 142 - message => "$endpoint->{id} expects POST", 143 - }, 144 - ); 140 + return $render_error->(405, 'MethodNotAllowed', "$endpoint->{id} expects POST", $endpoint->{type}, $endpoint->{id}); 145 141 } 146 142 147 143 my $handler = $c->app->api_registry->handler_for($endpoint->{id}); 148 144 unless ($handler) { 149 145 $finish_metrics->(501, $endpoint->{type}, $endpoint->{id}); 146 + $observe_error->(501, 'NotImplemented', $endpoint->{type}, $endpoint->{id}); 150 147 return $c->render( 151 148 status => 501, 152 149 json => { ··· 161 158 my $result = eval { $handler->($c, $endpoint) }; 162 159 if (my $err = $@) { 163 160 if (ref($err) eq 'HASH' && $err->{error}) { 164 - $finish_metrics->($err->{status} // 400, $endpoint->{type}, $endpoint->{id}); 165 - return $c->render( 166 - status => $err->{status} // 400, 167 - json => { 168 - error => $err->{error}, 169 - message => $err->{message} // $err->{error}, 170 - }, 171 - ); 161 + return $render_error->($err->{status} // 400, $err->{error}, $err->{message} // $err->{error}, $endpoint->{type}, $endpoint->{id}); 172 162 } 173 - die $err; 163 + return $render_internal_error->($err, $endpoint->{type}, $endpoint->{id}); 174 164 } 175 165 176 166 if (!defined $result) {
+38
t/metrics.t
··· 99 99 Authorization => "Bearer $access", 100 100 })->status_is(200); 101 101 102 + $t->post_ok('/xrpc/com.atproto.server.createSession' => json => { 103 + identifier => 'alice.test', 104 + password => 'wrong-password', 105 + })->status_is(401) 106 + ->json_is('/error' => 'AuthRequired'); 107 + 108 + $t->get_ok('/xrpc/example.unsupported.method') 109 + ->status_is(404) 110 + ->json_is('/error' => 'UnknownMethod'); 111 + 112 + $app->api_registry->register('com.atproto.server.describeServer', sub { 113 + die "forced metrics failure\n"; 114 + }); 115 + 116 + $t->get_ok('/xrpc/com.atproto.server.describeServer') 117 + ->status_is(500) 118 + ->json_is('/error' => 'InternalServerError'); 119 + 102 120 $t->websocket_ok('/xrpc/com.atproto.sync.subscribeRepos') 103 121 ->finish_ok; 104 122 ··· 122 140 $metrics, 123 141 qr/perlsky_xrpc_request_duration_seconds_count\{endpoint_type="procedure",method="POST",nsid="com\.atproto\.server\.createAccount",status="200"\} 1\b/, 124 142 'createAccount latency histogram is exported', 143 + ); 144 + like( 145 + $metrics, 146 + qr/perlsky_xrpc_errors_total\{endpoint_type="procedure",error="AuthRequired",method="POST",nsid="com\.atproto\.server\.createSession",status="401"\} 1\b/, 147 + 'handled XRPC errors are exported with their error code', 148 + ); 149 + like( 150 + $metrics, 151 + qr/perlsky_xrpc_errors_total\{endpoint_type="unknown",error="UnknownMethod",method="GET",nsid="example\.unsupported\.method",status="404"\} 1\b/, 152 + 'unknown-method XRPC errors are exported', 153 + ); 154 + like( 155 + $metrics, 156 + qr/perlsky_xrpc_errors_total\{endpoint_type="query",error="InternalServerError",method="GET",nsid="com\.atproto\.server\.describeServer",status="500"\} 1\b/, 157 + 'internal XRPC failures are exported as 500 errors', 158 + ); 159 + like( 160 + $metrics, 161 + qr/perlsky_xrpc_unhandled_exceptions_total\{endpoint_type="query",method="GET",nsid="com\.atproto\.server\.describeServer"\} 1\b/, 162 + 'unhandled XRPC exceptions are exported separately', 125 163 ); 126 164 like( 127 165 $metrics,