my own status page
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

feat: overhaul the triage reports

+137 -77
+58
src/db.ts
··· 324 324 return rows.results as unknown as Incident[]; 325 325 } 326 326 327 + export async function getRecentResolvedIncidentsWithUpdates(db: D1Database, days: number): Promise<IncidentWithUpdates[]> { 328 + const since = Math.floor(Date.now() / 1000) - days * 24 * 60 * 60; 329 + const rows = await db 330 + .prepare( 331 + `SELECT i.*, u.id as update_id, u.status as update_status, u.message as update_message, u.created_at as update_created_at 332 + FROM incidents i 333 + LEFT JOIN incident_updates u ON u.incident_id = i.id 334 + WHERE i.status = 'resolved' AND i.resolved_at >= ? 335 + ORDER BY i.resolved_at DESC, u.created_at ASC`, 336 + ) 337 + .bind(since) 338 + .all(); 339 + 340 + const incidentMap = new Map<number, IncidentWithUpdates>(); 341 + for (const row of rows.results) { 342 + const id = row.id as number; 343 + if (!incidentMap.has(id)) { 344 + incidentMap.set(id, { 345 + id, 346 + service_id: row.service_id as string, 347 + title: row.title as string, 348 + status: row.status as string, 349 + severity: row.severity as string, 350 + triage_report: row.triage_report as string | null, 351 + github_repo: row.github_repo as string | null, 352 + github_issue_number: row.github_issue_number as number | null, 353 + started_at: row.started_at as number, 354 + resolved_at: row.resolved_at as number | null, 355 + created_at: row.created_at as number, 356 + updated_at: row.updated_at as number, 357 + updates: [], 358 + }); 359 + } 360 + if (row.update_id) { 361 + incidentMap.get(id)!.updates.push({ 362 + id: row.update_id as number, 363 + incident_id: id, 364 + status: row.update_status as string, 365 + message: row.update_message as string, 366 + created_at: row.update_created_at as number, 367 + }); 368 + } 369 + } 370 + return Array.from(incidentMap.values()); 371 + } 372 + 327 373 export async function getIncident(db: D1Database, id: number): Promise<IncidentWithUpdates | null> { 328 374 const incident = await db 329 375 .prepare("SELECT * FROM incidents WHERE id = ?") ··· 376 422 .first(); 377 423 return (row as unknown as Incident) ?? null; 378 424 } 425 + 426 + export async function getRecentlyResolvedIncidents( 427 + db: D1Database, 428 + withinSeconds: number, 429 + ): Promise<Incident[]> { 430 + const since = Math.floor(Date.now() / 1000) - withinSeconds; 431 + const rows = await db 432 + .prepare("SELECT * FROM incidents WHERE status = 'resolved' AND resolved_at >= ? ORDER BY resolved_at DESC") 433 + .bind(since) 434 + .all(); 435 + return rows.results as unknown as Incident[]; 436 + }
+21 -31
src/github.ts
··· 81 81 } 82 82 } 83 83 84 - export async function editIssueBody( 85 - token: string, 86 - owner: string, 87 - repo: string, 88 - issueNumber: number, 89 - body: string, 90 - ): Promise<void> { 91 - const res = await fetch( 92 - `https://api.github.com/repos/${owner}/${repo}/issues/${issueNumber}`, 93 - { 94 - method: "PATCH", 95 - headers: { 96 - Authorization: `Bearer ${token}`, 97 - Accept: "application/vnd.github+json", 98 - "User-Agent": "infra-status-worker", 99 - }, 100 - body: JSON.stringify({ body }), 101 - }, 102 - ); 103 - if (!res.ok) { 104 - const text = await res.text(); 105 - throw new Error(`GitHub edit issue failed: ${res.status} ${text}`); 106 - } 107 - } 108 - 109 84 interface GitHubIssue { 110 85 state: string; 111 86 body: string | null; ··· 161 136 const issue = await fetchIssue(token, parsed.owner, parsed.repo, incident.github_issue_number); 162 137 if (issue.state === "closed" && incident.status !== "resolved") { 163 138 const now = Math.floor(Date.now() / 1000); 139 + // Fetch latest comments to find the closing message 140 + const kvKey = `gh_sync:${incident.id}:last`; 141 + const lastSeen = await kv.get(kvKey); 142 + const comments = await fetchComments(token, parsed.owner, parsed.repo, incident.github_issue_number, lastSeen ?? undefined); 143 + const human = comments.filter((c) => c.user.type !== "Bot" && !c.body.startsWith("Automated incident detected") && !c.body.startsWith("## Triage Report") && !c.body.startsWith("Service recovered automatically")); 144 + 145 + // Use the last human comment as the resolve message, or fall back to generic 146 + const resolveMsg = human.length > 0 ? human[human.length - 1].body : "Issue closed on GitHub"; 147 + 148 + // Add any earlier human comments as investigating updates 149 + for (const comment of human.slice(0, -1)) { 150 + await addIncidentUpdate(db, incident.id, "investigating", comment.body); 151 + } 152 + 164 153 await updateIncident(db, incident.id, { status: "resolved", resolved_at: now }); 165 - await addIncidentUpdate(db, incident.id, "resolved", "Issue closed on GitHub"); 154 + await addIncidentUpdate(db, incident.id, "resolved", resolveMsg); 155 + 156 + // Track sync position 157 + if (comments.length > 0) { 158 + const latest = comments[comments.length - 1].created_at; 159 + await kv.put(kvKey, latest, { expirationTtl: 86400 * 7 }); 160 + } 166 161 continue; 167 - } 168 - 169 - // Sync issue body edits back to triage_report 170 - if (issue.body && issue.body !== incident.triage_report) { 171 - await updateIncident(db, incident.id, { triage_report: issue.body }); 172 162 } 173 163 174 164 // Sync new comments since last check
+11 -8
src/health.ts
··· 4 4 5 5 export type Status = "up" | "degraded" | "misconfigured" | "timeout" | "down" | "unknown"; 6 6 7 - interface HealthResult { 7 + export interface HealthResult { 8 8 status: Status; 9 9 latency_ms: number; 10 + status_code?: number; 11 + error?: string; 10 12 } 11 13 12 14 export async function checkHealth(service: Service): Promise<HealthResult> { ··· 24 26 const latency_ms = Date.now() - start; 25 27 26 28 if (res.status >= 400 && res.status < 500) { 27 - return { status: "misconfigured", latency_ms }; 29 + return { status: "misconfigured", latency_ms, status_code: res.status }; 28 30 } 29 31 if (res.status === 502 || res.status === 504) { 30 - return { status: "down", latency_ms }; 32 + return { status: "down", latency_ms, status_code: res.status }; 31 33 } 32 34 if (res.status >= 500) { 33 - return { status: "degraded", latency_ms }; 35 + return { status: "degraded", latency_ms, status_code: res.status }; 34 36 } 35 37 if (res.status >= 200 && res.status < 300) { 36 38 if (latency_ms > SLOW_THRESHOLD_MS) { 37 - return { status: "degraded", latency_ms }; 39 + return { status: "degraded", latency_ms, status_code: res.status }; 38 40 } 39 - return { status: "up", latency_ms }; 41 + return { status: "up", latency_ms, status_code: res.status }; 40 42 } 41 - return { status: "down", latency_ms }; 43 + return { status: "down", latency_ms, status_code: res.status }; 42 44 } catch (err) { 43 45 const latency_ms = Date.now() - start; 44 46 const isTimeout = err instanceof DOMException && err.name === "TimeoutError"; 45 - return { status: isTimeout ? "timeout" : "down", latency_ms }; 47 + const error = isTimeout ? "Request timed out after 10s" : err instanceof Error ? err.message : "Unknown error"; 48 + return { status: isTimeout ? "timeout" : "down", latency_ms, error }; 46 49 } 47 50 }
+5 -3
src/index.ts
··· 1 1 import type { Env } from "./types"; 2 2 import { getManifest } from "./manifest"; 3 3 import { checkHealth } from "./health"; 4 - import { insertPing, getLatestPing, pruneOldPings, createIncident, updateIncident, addIncidentUpdate, getActiveIncidentForService, getActiveIncidents, getRecentlyResolvedIncident, setIncidentGitHub } from "./db"; 4 + import { insertPing, getLatestPing, pruneOldPings, createIncident, updateIncident, addIncidentUpdate, getActiveIncidentForService, getActiveIncidents, getRecentlyResolvedIncident, getRecentlyResolvedIncidents, setIncidentGitHub } from "./db"; 5 5 import { refreshDevices } from "./tailscale"; 6 6 import { handleStatusRoute } from "./routes/status"; 7 7 import { handleFavicon } from "./routes/favicon"; ··· 127 127 try { 128 128 const issueNumber = await createIssue(env.GITHUB_TOKEN, parsed.owner, parsed.repo, { 129 129 title: `${svc.name} is ${result.status}`, 130 - body: `Automated incident detected by [infra.dunkirk.sh](https://infra.dunkirk.sh)\n\n**Service:** ${svc.name}\n**Health URL:** ${svc.health_url}\n**Status:** ${result.status}\n**Detected at:** ${new Date().toISOString()}\n\n---\n*Comments on this issue will appear on the status page. Close the issue to resolve the incident.*`, 130 + body: `Automated incident detected by [infra.dunkirk.sh](https://infra.dunkirk.sh)\n\n**Service:** ${svc.name}\n**Health URL:** ${svc.health_url}\n**Status:** ${result.status}${result.status_code ? ` (HTTP ${result.status_code})` : ""}${result.error ? ` — ${result.error}` : ""}\n**Latency:** ${result.latency_ms}ms\n**Detected at:** ${new Date().toISOString()}\n\n---\n*Comments on this issue will appear on the status page. Close the issue to resolve the incident.*`, 131 131 assignees: env.GITHUB_ASSIGNEE ? [env.GITHUB_ASSIGNEE] : [], 132 132 labels: ["incident"], 133 133 }); ··· 188 188 // Sync GitHub issue comments/state back to incidents 189 189 if (env.GITHUB_TOKEN) { 190 190 const active = await getActiveIncidents(env.DB); 191 - await syncGitHubIncidents(env.DB, env.KV, env.GITHUB_TOKEN, active); 191 + const recentlyResolved = await getRecentlyResolvedIncidents(env.DB, 86400 * 7); 192 + const toSync = [...active, ...recentlyResolved]; 193 + await syncGitHubIncidents(env.DB, env.KV, env.GITHUB_TOKEN, toSync); 192 194 } 193 195 }, 194 196 } satisfies ExportedHandler<Env>;
+4 -5
src/routes/incidents.ts
··· 7 7 updateIncident, 8 8 addIncidentUpdate, 9 9 } from "../db"; 10 - import { commentOnIssue, editIssueBody, parseRepo } from "../github"; 10 + import { commentOnIssue, parseRepo } from "../github"; 11 11 12 12 function authCheck(request: Request, env: Env): boolean { 13 13 const auth = request.headers.get("Authorization"); ··· 55 55 return Response.json({ error: "unauthorized" }, { status: 401 }); 56 56 } 57 57 const id = parseInt(singleMatch[1]); 58 - const body = await request.json<{ status?: string; triage_report?: string }>(); 58 + const body = await request.json<{ status?: string; triage_report?: string; summary?: string }>(); 59 59 const updateData: { status?: string; triage_report?: string; resolved_at?: number } = {}; 60 60 if (body.status) updateData.status = body.status; 61 61 if (body.triage_report !== undefined) updateData.triage_report = body.triage_report; 62 62 if (body.status === "resolved") updateData.resolved_at = Math.floor(Date.now() / 1000); 63 63 await updateIncident(env.DB, id, updateData); 64 64 if (body.status) { 65 - await addIncidentUpdate(env.DB, id, body.status, body.triage_report ?? `Status changed to ${body.status}`); 65 + const timelineMsg = body.summary ?? body.triage_report ?? `Status changed to ${body.status}`; 66 + await addIncidentUpdate(env.DB, id, body.status, timelineMsg); 66 67 } 67 68 68 69 // Sync triage report to GitHub issue ··· 71 72 const parsed = parseRepo(`https://github.com/${incident.github_repo}`); 72 73 if (parsed) { 73 74 if (body.triage_report) { 74 - // Post triage report as comment and update issue body 75 75 commentOnIssue(env.GITHUB_TOKEN, parsed.owner, parsed.repo, incident.github_issue_number, `## Triage Report\n\n${body.triage_report}`).catch(() => {}); 76 - editIssueBody(env.GITHUB_TOKEN, parsed.owner, parsed.repo, incident.github_issue_number, body.triage_report).catch(() => {}); 77 76 } else if (body.status) { 78 77 commentOnIssue(env.GITHUB_TOKEN, parsed.owner, parsed.repo, incident.github_issue_number, `Status changed to **${body.status}**`).catch(() => {}); 79 78 }
+37 -29
src/routes/index.ts
··· 1 1 import type { Env } from "../types"; 2 2 import { getManifest } from "../manifest"; 3 - import { getAllLatestPings, getAllUptime7d, getOverallUptimeDays, getLastCheckTime, getActiveIncidentsWithUpdates, getActiveIncidents, getRecentIncidents } from "../db"; 3 + import { getAllLatestPings, getAllUptime7d, getOverallUptimeDays, getLastCheckTime, getActiveIncidentsWithUpdates, getActiveIncidents, getRecentResolvedIncidentsWithUpdates } from "../db"; 4 4 import { getDeviceStatus } from "../tailscale"; 5 5 import { getOverallStatus } from "../overall"; 6 6 import { COMMIT_SHA } from "../version"; 7 7 8 8 export async function handleIndex(env: Env): Promise<Response> { 9 - const [manifest, latestPings, uptimes, lastCheck, uptimeDays, activeIncidentsWithUpdates, activeIncidentsList, recentIncidents] = await Promise.all([ 9 + const [manifest, latestPings, uptimes, lastCheck, uptimeDays, activeIncidentsWithUpdates, activeIncidentsList, resolvedIncidents] = await Promise.all([ 10 10 getManifest(env), 11 11 getAllLatestPings(env.DB), 12 12 getAllUptime7d(env.DB), ··· 14 14 getOverallUptimeDays(env.DB, 90), 15 15 getActiveIncidentsWithUpdates(env.DB), 16 16 getActiveIncidents(env.DB), 17 - getRecentIncidents(env.DB, 7), 17 + getRecentResolvedIncidentsWithUpdates(env.DB, 7), 18 18 ]); 19 19 20 20 const machineOnline = new Map<string, boolean>(); ··· 46 46 manifest, latestPings, activeIncidents: activeIncidentsList, machineOnline, 47 47 }); 48 48 const activeIncidents = activeIncidentsWithUpdates; 49 - const resolvedIncidents = recentIncidents.filter((i) => i.status === "resolved"); 50 49 51 50 const html = `<!DOCTYPE html> 52 51 <html lang="en"> ··· 105 104 .uptime-bar .day.down { background: #e74c3c; } 106 105 .uptime-bar .day.none { background: #21262d; } 107 106 .incidents { margin-bottom: 1.5rem; } 108 - .incident-banner { background: #2d1b1b; border: 1px solid #e74c3c; border-radius: 6px; padding: 0.75rem; margin-bottom: 0.5rem; } 109 - .incident-banner.major { border-color: #f39c12; background: #2d2517; } 110 - .incident-banner.minor { border-color: #8b949e; background: #21262d; } 111 - .incident-title { font-size: 0.85rem; font-weight: 500; margin-bottom: 0.25rem; } 112 - .incident-meta { font-size: 0.7rem; color: #8b949e; display: flex; gap: 0.75rem; } 113 - .incident-status { text-transform: uppercase; letter-spacing: 0.05em; } 107 + .incident-banner { padding: 0.5rem 0; border-bottom: 1px solid #21262d; } 108 + .incident-banner:last-child { border-bottom: none; } 109 + .incident-header { display: flex; align-items: baseline; gap: 0.5rem; flex-wrap: wrap; font-size: 0.8rem; } 110 + .incident-status { text-transform: uppercase; letter-spacing: 0.05em; font-weight: 600; } 114 111 .incident-status.investigating { color: #e74c3c; } 115 112 .incident-status.identified { color: #f39c12; } 116 - .incident-status.monitoring { color: #3498db; } 117 - .incident-triage { margin-top: 0.5rem; } 118 - .incident-triage summary { font-size: 0.75rem; color: #8b949e; cursor: pointer; } 119 - .incident-triage pre { font-size: 0.7rem; color: #c9d1d9; background: #161b22; padding: 0.5rem; border-radius: 4px; margin-top: 0.25rem; white-space: pre-wrap; word-break: break-word; max-height: 300px; overflow-y: auto; } 120 - .incident-timeline { margin-top: 0.5rem; padding-left: 0.75rem; border-left: 2px solid #21262d; } 113 + .incident-title { font-weight: 500; } 114 + .incident-time { color: #8b949e; } 115 + .incident-timeline { margin-top: 0.25rem; padding-left: 0.75rem; } 121 116 .timeline-entry { padding: 0.25rem 0 0.25rem 0.5rem; font-size: 0.7rem; position: relative; } 122 - .timeline-entry::before { content: ''; position: absolute; left: -0.75rem; top: 0.55rem; width: 6px; height: 6px; border-radius: 50%; background: #30363d; transform: translateX(-2px); } 117 + .timeline-entry::before { content: ''; position: absolute; left: -0.75rem; top: 50%; width: 6px; height: 6px; border-radius: 50%; background: #30363d; transform: translate(-2px, -50%); z-index: 1; } 118 + .timeline-entry::after { content: ''; position: absolute; left: calc(-0.75rem - 1px); top: 0; bottom: 0; width: 2px; background: #21262d; } 119 + .timeline-entry:first-child::after { top: 50%; } 120 + .timeline-entry:last-child::after { bottom: 50%; } 121 + .timeline-entry:only-child::after { display: none; } 123 122 .timeline-entry.investigating::before { background: #e74c3c; } 124 123 .timeline-entry.identified::before { background: #f39c12; } 125 - .timeline-entry.monitoring::before { background: #3498db; } 126 124 .timeline-entry.resolved::before { background: #2ecc71; } 127 125 .timeline-status { text-transform: uppercase; letter-spacing: 0.04em; font-size: 0.6rem; font-weight: 600; } 128 126 .timeline-status.investigating { color: #e74c3c; } 129 127 .timeline-status.identified { color: #f39c12; } 130 - .timeline-status.monitoring { color: #3498db; } 131 128 .timeline-status.resolved { color: #2ecc71; } 132 129 .timeline-time { color: #484f58; margin-right: 0.5rem; } 133 130 .timeline-msg { color: #8b949e; } 131 + .timeline-msg a { color: #8b949e; text-decoration: underline; } 132 + .timeline-msg code { font-size: 0.65rem; background: #161b22; padding: 0.1rem 0.25rem; border-radius: 3px; } 134 133 .resolved-incidents { margin-top: 0.75rem; } 135 - .resolved-header { font-size: 0.7rem; text-transform: uppercase; letter-spacing: 0.08em; color: #8b949e; margin-bottom: 0.25rem; } 136 - .resolved-item { font-size: 0.75rem; color: #8b949e; padding: 0.25rem 0; border-bottom: 1px solid #21262d; } 134 + .resolved-header { font-size: 0.7rem; text-transform: uppercase; letter-spacing: 0.08em; color: #8b949e; margin-bottom: 0.5rem; } 135 + .resolved-item { font-size: 0.75rem; color: #8b949e; padding: 0.4rem 0; border-bottom: 1px solid #21262d; } 137 136 .resolved-item:last-child { border-bottom: none; } 137 + .resolved-item-header { margin-bottom: 0.25rem; } 138 + .resolved-item .incident-timeline { margin-top: 0.25rem; } 138 139 footer { margin-top: auto; padding-top: 1rem; border-top: 1px solid #21262d; font-size: 0.7rem; color: #8b949e; } 139 140 .footer-meta { display: flex; justify-content: space-between; } 140 141 footer a { color: #8b949e; text-decoration: none; } ··· 146 147 <h1>infra.dunkirk.sh</h1> 147 148 <p class="overall"><span class="dot ${overallClass}" id="overall-dot" title="${overallClass}"></span><span id="overall-text">${overallText}</span></p> 148 149 ${activeIncidents.length > 0 ? `<div class="incidents"> 149 - ${activeIncidents.map((i) => `<div class="incident-banner ${i.severity}"> 150 - <div class="incident-title">${esc(i.title)}</div> 151 - <div class="incident-meta"> 150 + ${activeIncidents.map((i) => `<div class="incident-banner"> 151 + <div class="incident-header"> 152 152 <span class="incident-status ${i.status}">${i.status}</span> 153 - <span>${esc(i.service_id)}</span> 154 - <span>started <relative-time datetime="${new Date(i.started_at * 1000).toISOString()}">loading</relative-time></span> 153 + <span class="incident-title">${esc(i.title)}</span> 154 + <span class="incident-time">started <relative-time datetime="${new Date(i.started_at * 1000).toISOString()}">loading</relative-time></span> 155 155 </div> 156 - ${i.triage_report ? `<details class="incident-triage"><summary>triage report</summary><pre>${esc(i.triage_report)}</pre></details>` : ""} 157 156 ${i.updates.length > 0 ? `<div class="incident-timeline"> 158 157 ${i.updates.map((u) => `<div class="timeline-entry ${u.status}"> 159 158 <span class="timeline-time">${new Date(u.created_at * 1000).toLocaleTimeString("en-US", { hour: "2-digit", minute: "2-digit", hour12: false, timeZone: "America/New_York" })}</span> ··· 191 190 </div>` : ""} 192 191 ${resolvedIncidents.length > 0 ? `<div class="resolved-incidents"> 193 192 <div class="resolved-header">recent incidents</div> 194 - ${resolvedIncidents.map((i) => `<div class="resolved-item">${esc(i.title)} — resolved <relative-time datetime="${new Date((i.resolved_at ?? i.updated_at) * 1000).toISOString()}">loading</relative-time></div>`).join("\n")} 193 + ${resolvedIncidents.map((i) => `<div class="resolved-item"> 194 + <div class="resolved-item-header">${esc(i.title)} — resolved <relative-time datetime="${new Date((i.resolved_at ?? i.updated_at) * 1000).toISOString()}">loading</relative-time></div> 195 + ${i.updates.length > 0 ? `<div class="incident-timeline"> 196 + ${i.updates.map((u) => `<div class="timeline-entry ${u.status}"> 197 + <span class="timeline-time">${new Date(u.created_at * 1000).toLocaleTimeString("en-US", { hour: "2-digit", minute: "2-digit", hour12: false, timeZone: "America/New_York" })}</span> 198 + <span class="timeline-status ${u.status}">${esc(u.status)}</span> 199 + <span class="timeline-msg">${esc(u.message)}</span> 200 + </div>`).join("\n")} 201 + </div>` : ""} 202 + </div>`).join("\n")} 195 203 </div>` : ""} 196 204 <footer> 197 - <div class="footer-meta"><span>${lastCheckISO ? `updated <relative-time datetime="${lastCheckISO}" prefix="">loading</relative-time>` : "no checks yet"}</span><a href="https://github.com/taciturnaxolotl/status/commit/${COMMIT_SHA}">${COMMIT_SHA}</a></div> 205 + <div class="footer-meta"><span>${lastCheckISO ? `updated <relative-time id="last-check" datetime="${lastCheckISO}" prefix="">loading</relative-time>` : "no checks yet"}</span><a href="https://github.com/taciturnaxolotl/status/commit/${COMMIT_SHA}">${COMMIT_SHA}</a></div> 198 206 </footer> 199 207 <script> 200 208 class RelativeTimeElement extends HTMLElement { ··· 264 272 } 265 273 266 274 if (data.last_check) { 267 - const rt = document.querySelector('relative-time'); 275 + const rt = document.getElementById('last-check'); 268 276 if (rt) rt.setAttribute('datetime', new Date(data.last_check * 1000).toISOString()); 269 277 } 270 278 }
+1 -1
src/types.ts
··· 36 36 id: number; 37 37 service_id: string; 38 38 title: string; 39 - status: "investigating" | "identified" | "monitoring" | "resolved"; 39 + status: "investigating" | "identified" | "resolved"; 40 40 severity: "critical" | "major" | "minor"; 41 41 triage_report: string | null; 42 42 github_repo: string | null;