the code used for the cdn.blueat.net cloudflare worker
2
worker.js edited
305 lines 12 kB view raw
1/** 2 * BlueAT Network CDN — Cloudflare Worker Set Up 3 * Version: 2.4.0 4 * 5 * A 1:1 of cdn.bsky.app with D1-backed DID caching 6 * 7 * ───────────────────────────────────────────────────────────────────────────── 8 * SETUP 9 * ───────────────────────────────────────────────────────────────────────────── 10 * 11 * 1. D1 DATABASE 12 * ────────────── 13 * Create a D1 database in the Cloudflare dashboard (or via Wrangler): 14 * wrangler d1 create blueat-cdn-db 15 * 16 * Then run the following SQL to initialise the schema: 17 * 18 * CREATE TABLE IF NOT EXISTS did_cache ( 19 * did TEXT PRIMARY KEY, 20 * pds_url TEXT NOT NULL, 21 * cached_at INTEGER NOT NULL 22 * ); 23 * 24 * CREATE TABLE IF NOT EXISTS meta ( 25 * key TEXT PRIMARY KEY, 26 * value TEXT NOT NULL 27 * ); 28 * 29 * Apply it with: 30 * wrangler d1 execute blueat-cdn-db --file=schema.sql 31 * 32 * ───────────────────────────────────────────────────────────────────────────── 33 * 34 * 2. WRANGLER CONFIG (wrangler.toml) 35 * ─────────────────────────────────── 36 * Add the D1 binding and cron trigger to your wrangler.toml: 37 * 38 * name = "blueat-cdn" 39 * main = "worker.js" 40 * compatibility_date = "2024-09-23" 41 * 42 * [[d1_databases]] 43 * binding = "DB" 44 * database_name = "blueat-cdn-db" 45 * database_id = "<your-d1-database-id>" 46 * 47 * [triggers] 48 * crons = ["0 * * * *"] # every hour — prunes stale DID cache entries 49 * 50 * ───────────────────────────────────────────────────────────────────────────── 51 * 52 * 3. ENVIRONMENT BINDINGS (set in Cloudflare Dashboard) 53 * ────────────────────────────────────────────────────── 54 * No secret environment variables are required for core operation. 55 * The DB binding above is the only required binding. 56 * 57 * ───────────────────────────────────────────────────────────────────────────── 58 * 59 * 4. ROUTES 60 * ───────── 61 * The worker handles the following request paths: 62 * 63 * GET /xrpc/_health 64 * Returns JSON with service status and D1 cache statistics. 65 * Not cached — always live. 66 * 67 * GET /clear 68 * ⚠️ Clears all DID cache entries from D1. 69 * Consider restricting this route in production (e.g. via a Cloudflare 70 * WAF rule or by adding token-based auth) as it is currently open. 71 * 72 * GET /img/:type/plain/:did/:cid 73 * Main blob proxy endpoint. Resolves the DID to its PDS, fetches the 74 * blob via com.atproto.sync.getBlob, and caches the response in the 75 * Cloudflare Cache API for 7 days (immutable). 76 * 77 * :type — e.g. "avatar", "banner", "feed_thumbnail" 78 * :did — a did:plc or did:web identifier 79 * :cid — the blob CID 80 * 81 * For avatar and banner types, if the blob CID has rotated (e.g. after 82 * a profile update), the worker falls back to re-resolving the current 83 * CID from com.atproto.repo.getRecord on the PDS. 84 * 85 * ───────────────────────────────────────────────────────────────────────────── 86 * 87 * 5. CACHING BEHAVIOUR 88 * ───────────────────── 89 * - DID → PDS resolution is cached in D1 for 24 hours (DID_TTL_MS). 90 * - Blob responses are cached in the Cloudflare Cache API for 7 days. 91 * - Stale cache entries use stale-while-revalidate: the stale response is 92 * returned immediately and a background revalidation is queued via 93 * ctx.waitUntil(). 94 * - The hourly cron only prunes D1 rows older than 24 hours — it does NOT 95 * bulk-delete the entire table. 96 * 97 * ───────────────────────────────────────────────────────────────────────────── 98 * 99 * 6. DID RESOLUTION 100 * ────────────────── 101 * - did:plc — resolved via https://plc.directory/<did> 102 * - did:web — resolved via https://<host>/.well-known/did.json 103 * (or /<path>/did.json for path-based did:web identifiers) 104 * - PLC/did:web responses are edge-cached by Cloudflare for 1 hour 105 * (cf: { cacheTtl: 3600, cacheEverything: true }). 106 * 107 * ───────────────────────────────────────────────────────────────────────────── 108 */ 109 110const VERSION = "2.4.0"; 111const ADMIN_DID = "did:plc:l37td5yhxl2irrzrgvei4qay"; 112const CONTACT_EMAIL = "danielmorrisey@pm.me"; 113const SERVICE_NAME = "BlueAT Network CDN"; 114const PLC_DIRECTORY = "https://plc.directory"; 115const PATH_RE = /^\/img\/([^/]+)\/plain\/(did:[^/]+)\/([^@/]+)/; 116const DID_TTL_MS = 86_400_000; // 24 hours 117 118export default { 119 async fetch(request, env, ctx) { 120 if (!["GET", "HEAD"].includes(request.method)) { 121 return new Response("Method Not Allowed", { status: 405 }); 122 } 123 124 const url = new URL(request.url); 125 126 // ── Health ─────────────────────────────────────────────────────────────── 127 if (url.pathname === "/xrpc/_health") { 128 const [stats, lastClear] = await Promise.all([ 129 env.DB.prepare(` 130 SELECT 131 COUNT(*) AS total, 132 SUM(cached_at < ?) AS stale, 133 MIN(cached_at) AS oldest, 134 MAX(cached_at) AS newest 135 FROM did_cache 136 `).bind(Date.now() - DID_TTL_MS).first(), 137 env.DB.prepare( 138 "SELECT value FROM meta WHERE key = 'last_cache_clear'" 139 ).first(), 140 ]); 141 142 return Response.json( 143 { 144 status: "ok", 145 version: VERSION, 146 admin_did: ADMIN_DID, 147 admin_contact_email: CONTACT_EMAIL, 148 service_name: SERVICE_NAME, 149 db: { 150 cached_dids: stats?.total ?? 0, 151 stale_dids: stats?.stale ?? 0, 152 oldest_entry: stats?.oldest ? new Date(stats.oldest).toISOString() : null, 153 newest_entry: stats?.newest ? new Date(stats.newest).toISOString() : null, 154 last_cache_clear: lastClear?.value ?? null, 155 }, 156 }, 157 { headers: { "Cache-Control": "no-store" } } 158 ); 159 } 160 161 // ── Clear DID cache ────────────────────────────────────────────────────── 162 if (url.pathname === "/clear") { 163 const { meta } = await env.DB.prepare("DELETE FROM did_cache").run(); 164 return Response.json( 165 { cleared: meta.changes }, 166 { headers: { "Cache-Control": "no-store" } } 167 ); 168 } 169 170 const match = url.pathname.match(PATH_RE); 171 if (!match) return new Response("Not Found", { status: 404 }); 172 173 const cache = caches.default; 174 const cachedResponse = await cache.match(request); 175 if (cachedResponse) { 176 const age = Date.now() - new Date(cachedResponse.headers.get("X-Cached-At") || 0).getTime(); 177 if (age > 86_400_000) ctx.waitUntil(revalidate(request, match, cache, env)); 178 return cachedResponse; 179 } 180 181 const type = match[1]; 182 const did = match[2]; 183 const cid = match[3]; 184 185 try { 186 const pdsUrl = await resolvePds(did, env); 187 if (!pdsUrl) return new Response("PDS Not Found", { status: 404 }); 188 189 let blobRes = await fetch(`${pdsUrl}/xrpc/com.atproto.sync.getBlob?did=${did}&cid=${cid}`); 190 191 if (blobRes.status === 404 && (type === "avatar" || type === "banner")) { 192 blobRes = await resolveProfileFallback(pdsUrl, did, type, cid); 193 } 194 195 if (!blobRes?.ok) return new Response("Asset Not Found", { status: 404 }); 196 197 const finalRes = buildResponse(blobRes, "pds-direct"); 198 ctx.waitUntil(cache.put(request, finalRes.clone())); 199 return finalRes; 200 201 } catch { 202 return new Response("Bad Gateway", { status: 502 }); 203 } 204 }, 205 206 // ── Cron: runs every 60 min — only PURGES stale entries, never re-resolves ─ 207 async scheduled(event, env, ctx) { 208 const staleThreshold = Date.now() - DID_TTL_MS; 209 210 const { meta } = await env.DB.prepare( 211 "DELETE FROM did_cache WHERE cached_at < ?" 212 ).bind(staleThreshold).run(); 213 214 await env.DB.prepare( 215 `INSERT INTO meta (key, value) VALUES ('last_cache_clear', ?) 216 ON CONFLICT(key) DO UPDATE SET value = excluded.value` 217 ).bind(new Date().toISOString()).run(); 218 219 console.log(`[cron] pruned ${meta.changes} stale DID entries`); 220 }, 221}; 222 223// ── Helpers ─────────────────────────────────────────────────────────────────── 224 225function buildResponse(source, proxySource) { 226 const res = new Response(source.body, source); 227 res.headers.set("Cache-Control", "public, max-age=604800, immutable"); 228 res.headers.set("X-Proxy-Source", proxySource); 229 res.headers.set("X-Cached-At", new Date().toUTCString()); 230 res.headers.set("Content-Disposition", "inline"); 231 return res; 232} 233 234async function resolveProfileFallback(pdsUrl, did, type, cid) { 235 const profileRes = await fetch( 236 `${pdsUrl}/xrpc/com.atproto.repo.getRecord?repo=${did}&collection=app.bsky.actor.profile&rkey=self` 237 ); 238 if (!profileRes.ok) return null; 239 240 const profileData = await profileRes.json(); 241 const originalCid = profileData.value?.[type]?.ref?.$link; 242 if (!originalCid || originalCid === cid) return null; 243 244 return fetch(`${pdsUrl}/xrpc/com.atproto.sync.getBlob?did=${did}&cid=${originalCid}`); 245} 246 247async function revalidate(request, match, cache, env) { 248 const [, , did, cid] = match; 249 try { 250 const pdsUrl = await resolvePds(did, env); 251 if (!pdsUrl) return; 252 const blobRes = await fetch(`${pdsUrl}/xrpc/com.atproto.sync.getBlob?did=${did}&cid=${cid}`); 253 if (blobRes.ok) { 254 await cache.put(request, buildResponse(blobRes, "pds-direct-revalidate")); 255 } 256 } catch {} 257} 258 259async function resolvePds(did, env) { 260 // 1. Check D1 first 261 const row = await env.DB.prepare( 262 "SELECT pds_url, cached_at FROM did_cache WHERE did = ?" 263 ).bind(did).first(); 264 265 if (row && Date.now() - row.cached_at < DID_TTL_MS) { 266 return row.pds_url; 267 } 268 269 // 2. Fetch from PLC / did:web 270 let reqUrl; 271 if (did.startsWith("did:web:")) { 272 const parts = did.slice(8).split(":"); 273 const host = decodeURIComponent(parts[0]); 274 const path = parts.length === 1 275 ? "/.well-known/did.json" 276 : `/${parts.slice(1).map(decodeURIComponent).join("/")}/did.json`; 277 reqUrl = `https://${host}${path}`; 278 } else { 279 reqUrl = `${PLC_DIRECTORY}/${did}`; 280 } 281 282 const res = await fetch(reqUrl, { cf: { cacheTtl: 3600, cacheEverything: true } }); 283 if (!res.ok) return null; 284 285 const doc = await res.json(); 286 if (!doc?.service) return null; 287 288 let pdsUrl = null; 289 for (const s of doc.service) { 290 if (s.id === "#atproto_pds" || s.type === "AtprotoPersonalDataServer") { 291 pdsUrl = s.serviceEndpoint; 292 break; 293 } 294 } 295 296 // 3. Upsert into D1 297 if (pdsUrl) { 298 await env.DB.prepare( 299 `INSERT INTO did_cache (did, pds_url, cached_at) VALUES (?, ?, ?) 300 ON CONFLICT(did) DO UPDATE SET pds_url = excluded.pds_url, cached_at = excluded.cached_at` 301 ).bind(did, pdsUrl, Date.now()).run(); 302 } 303 304 return pdsUrl; 305}