GET /xrpc/app.bsky.actor.searchActorsTypeahead typeahead.waow.tech
16
fork

Configure Feed

Select the types of activity you want to include in your feed.

add moderation filtering and store avatar CIDs instead of full URLs

moderation: hide actors with bluesky moderation labels (!hide, !takedown,
spam) and self-applied !no-unauthenticated from search results. ingester
detects self-labels on ingest; hourly cron refreshes labels by walking the
index. request-indexing endpoint returns JSON instead of HTML.

avatar CIDs: store ~59-byte CIDs instead of ~130-byte full CDN URLs in
avatar_url column. reconstruct full URLs at query time via avatarUrl().
helper handles both formats for safe deploy-then-migrate ordering.
saves ~70 bytes/row (~2.8GB at 40M actors vs the 10GB D1 ceiling).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

+495 -49
+12 -3
ingester/src/main.zig
··· 29 29 handle: ?[]const u8 = null, 30 30 display_name: ?[]const u8 = null, 31 31 avatar_cid: ?[]const u8 = null, 32 + hidden: ?bool = null, 32 33 }; 33 34 34 35 const IngestHandler = struct { ··· 128 129 } 129 130 } 130 131 131 - if (event.display_name != null or event.avatar_cid != null) { 132 - self.buffer.append(self.allocator, event) catch return; 133 - } 132 + // check self-labels for !no-unauthenticated (user opts out of unauthenticated visibility) 133 + event.hidden = blk: { 134 + const values = zat.json.getArray(record, "labels.values") orelse break :blk false; 135 + for (values) |item| { 136 + const val = zat.json.getString(item, "val") orelse continue; 137 + if (mem.eql(u8, val, "!no-unauthenticated")) break :blk true; 138 + } 139 + break :blk false; 140 + }; 141 + 142 + self.buffer.append(self.allocator, event) catch return; 134 143 } 135 144 136 145 fn handleIdentity(self: *IngestHandler, id: zat.jetstream.IdentityEvent) void {
+3 -2
schema.sql
··· 2 2 did TEXT PRIMARY KEY, 3 3 handle TEXT NOT NULL DEFAULT '', 4 4 display_name TEXT DEFAULT '', 5 - avatar_url TEXT DEFAULT '', 6 - updated_at INTEGER NOT NULL DEFAULT (unixepoch()) 5 + avatar_url TEXT DEFAULT '', -- stores CID only (e.g. bafkrei...); reconstruct URL at query time 6 + updated_at INTEGER NOT NULL DEFAULT (unixepoch()), 7 + hidden INTEGER NOT NULL DEFAULT 0 7 8 ); 8 9 9 10 CREATE INDEX IF NOT EXISTS idx_actors_handle ON actors(handle COLLATE NOCASE);
+153
scripts/backfill-moderation.py
··· 1 + #!/usr/bin/env -S PYTHONUNBUFFERED=1 uv run --script --quiet 2 + # /// script 3 + # requires-python = ">=3.12" 4 + # dependencies = [] 5 + # /// 6 + """ 7 + one-shot: sweep entire actors table and set hidden flags from bsky moderation labels. 8 + 9 + usage: 10 + ./scripts/backfill-moderation.py 11 + """ 12 + 13 + import json 14 + import subprocess 15 + import sys 16 + import time 17 + import urllib.request 18 + 19 + BSKY_MOD_DID = "did:plc:ar7c4by46qjdydhdevvrndac" 20 + MOD_HIDE_VALS = {"!hide", "!takedown", "spam"} 21 + ANY_SRC_HIDE_VALS = {"!no-unauthenticated"} 22 + GET_PROFILES_URL = "https://public.api.bsky.app/xrpc/app.bsky.actor.getProfiles" 23 + BATCH_SIZE = 25 # getProfiles limit 24 + PAGE_SIZE = 500 # actors per D1 query 25 + DELAY = 0.3 # seconds between API calls 26 + 27 + 28 + def should_hide(labels: list | None) -> bool: 29 + if not labels: 30 + return False 31 + now = time.time() * 1000 32 + for l in labels: 33 + if l.get("neg"): 34 + continue 35 + if l.get("exp") and _parse_ts(l["exp"]) <= now: 36 + continue 37 + if l.get("src") == BSKY_MOD_DID and l.get("val") in MOD_HIDE_VALS: 38 + return True 39 + if l.get("val") in ANY_SRC_HIDE_VALS: 40 + return True 41 + return False 42 + 43 + 44 + def _parse_ts(s: str) -> float: 45 + """rough ISO8601 parse — good enough for expiry comparison.""" 46 + from datetime import datetime, timezone 47 + try: 48 + return datetime.fromisoformat(s.replace("Z", "+00:00")).timestamp() * 1000 49 + except Exception: 50 + return 0 51 + 52 + 53 + def d1_query(sql: str) -> list[dict]: 54 + result = subprocess.run( 55 + ["npx", "wrangler", "d1", "execute", "typeahead-db", "--remote", "--command", sql, "--json"], 56 + capture_output=True, text=True, cwd="." 57 + ) 58 + # --json still emits warnings to stdout; extract the JSON array 59 + stdout = result.stdout 60 + bracket = stdout.find("[") 61 + if bracket == -1: 62 + if result.returncode != 0: 63 + print(f" D1 error: {result.stderr[:200]}", file=sys.stderr) 64 + return [] 65 + try: 66 + data = json.loads(stdout[bracket:]) 67 + return data[0]["results"] if data else [] 68 + except (json.JSONDecodeError, IndexError, KeyError) as e: 69 + print(f" D1 parse error: {e}", file=sys.stderr) 70 + return [] 71 + 72 + 73 + def fetch_profiles(dids: list[str]) -> list[dict]: 74 + params = "&".join(f"actors={d}" for d in dids) 75 + url = f"{GET_PROFILES_URL}?{params}" 76 + req = urllib.request.Request(url, headers={"User-Agent": "typeahead-mod-backfill/1.0"}) 77 + try: 78 + with urllib.request.urlopen(req, timeout=15) as resp: 79 + data = json.loads(resp.read()) 80 + return data.get("profiles", []) 81 + except urllib.error.HTTPError as e: 82 + if e.code == 429: 83 + print(" rate limited — pausing 60s") 84 + time.sleep(60) 85 + return fetch_profiles(dids) # retry once 86 + print(f" HTTP {e.code}") 87 + return [] 88 + except Exception as e: 89 + print(f" error: {e}") 90 + return [] 91 + 92 + 93 + def main(): 94 + cursor = 0 95 + total_checked = 0 96 + total_hidden = 0 97 + total_unhidden = 0 98 + 99 + while True: 100 + rows = d1_query( 101 + f"SELECT rowid, did FROM actors WHERE rowid > {cursor} ORDER BY rowid ASC LIMIT {PAGE_SIZE}" 102 + ) 103 + if not rows: 104 + break 105 + 106 + page_hidden = 0 107 + page_unhidden = 0 108 + 109 + for i in range(0, len(rows), BATCH_SIZE): 110 + batch = rows[i : i + BATCH_SIZE] 111 + dids = [r["did"] for r in batch] 112 + 113 + if i > 0: 114 + time.sleep(DELAY) 115 + 116 + profiles = fetch_profiles(dids) 117 + total_checked += len(profiles) 118 + 119 + hide_dids = [] 120 + unhide_dids = [] 121 + for p in profiles: 122 + if should_hide(p.get("labels")): 123 + hide_dids.append(p["did"]) 124 + else: 125 + unhide_dids.append(p["did"]) 126 + 127 + if hide_dids: 128 + did_list = ", ".join(f"'{d}'" for d in hide_dids) 129 + changed = d1_query( 130 + f"UPDATE actors SET hidden = 1 WHERE did IN ({did_list}) AND hidden = 0" 131 + ) 132 + page_hidden += len(hide_dids) 133 + 134 + if unhide_dids: 135 + did_list = ", ".join(f"'{d}'" for d in unhide_dids) 136 + d1_query( 137 + f"UPDATE actors SET hidden = 0 WHERE did IN ({did_list}) AND hidden = 1" 138 + ) 139 + 140 + cursor = rows[-1]["rowid"] 141 + total_hidden += page_hidden 142 + total_unhidden += page_unhidden 143 + 144 + print( 145 + f" cursor={cursor} checked={total_checked} hidden={total_hidden} " 146 + f"(page: {len(rows)} actors, {page_hidden} newly hidden)" 147 + ) 148 + 149 + print(f"\ndone. checked={total_checked}, hidden={total_hidden}, unhidden={total_unhidden}") 150 + 151 + 152 + if __name__ == "__main__": 153 + main()
+12
scripts/migrate-avatar-cid.sql
··· 1 + -- one-shot migration: convert full avatar URLs to CIDs 2 + -- strips the URL prefix (including DID) and @jpeg suffix, leaving just the CID 3 + -- 4 + -- run with: npx wrangler d1 execute typeahead --remote --file scripts/migrate-avatar-cid.sql 5 + 6 + UPDATE actors 7 + SET avatar_url = REPLACE( 8 + REPLACE(avatar_url, 'https://cdn.bsky.app/img/avatar/plain/' || did || '/', ''), 9 + '@jpeg', 10 + '' 11 + ) 12 + WHERE avatar_url LIKE 'https://cdn.bsky.app/%';
+101 -2
scripts/smoke.py
··· 43 43 return ok 44 44 45 45 46 - def fetch(url: str, timeout: int = 15) -> tuple[dict | None, dict]: 46 + def fetch(url: str, timeout: int = 15, method: str = "GET") -> tuple[dict | None, dict]: 47 47 """fetch JSON + response headers. returns (body, headers).""" 48 48 try: 49 - req = urllib.request.Request(url, headers={"User-Agent": "typeahead-smoke/1.0"}) 49 + req = urllib.request.Request(url, headers={"User-Agent": "typeahead-smoke/1.0"}, method=method) 50 50 with urllib.request.urlopen(req, timeout=timeout) as resp: 51 51 headers = {k.lower(): v for k, v in resp.headers.items()} 52 52 return json.loads(resp.read()), headers ··· 155 155 check("stats contains actors indexed", "actors indexed" in body) 156 156 check("stats contains sparkline heading", "searches / hour" in body) 157 157 check("stats has home link", 'href="/"' in body) 158 + check("stats shows hidden count", "hidden by moderation" in body) 158 159 except urllib.error.HTTPError as e: 159 160 check("stats returns 200", False, f"got {e.code}") 160 161 except Exception as e: 161 162 check("stats fetch succeeded", False, str(e)) 162 163 163 164 165 + def test_request_indexing(base_url: str): 166 + print("\n--- request indexing ---") 167 + # GET should redirect to homepage (302) 168 + try: 169 + req = urllib.request.Request( 170 + f"{base_url}/request-indexing?handle=test", 171 + headers={"User-Agent": "typeahead-smoke/1.0"}, 172 + ) 173 + opener = urllib.request.build_opener(urllib.request.HTTPHandler) 174 + # don't follow redirects 175 + class NoRedirect(urllib.request.HTTPRedirectHandler): 176 + def redirect_request(self, req, fp, code, msg, headers, newurl): 177 + raise urllib.error.HTTPError(newurl, code, msg, headers, fp) 178 + opener = urllib.request.build_opener(NoRedirect) 179 + opener.open(req, timeout=15) 180 + check("GET redirects (302)", False, "no redirect") 181 + except urllib.error.HTTPError as e: 182 + check("GET redirects (302)", e.code == 302, f"got {e.code}") 183 + 184 + # POST with empty handle should return 400 185 + data, _ = fetch(f"{base_url}/request-indexing", method="POST") 186 + check("POST empty returns 400", data is not None and data.get("_http_error") == 400, f"got {data}") 187 + 188 + # POST with valid handle should return JSON with handle/did/hidden 189 + data, _ = fetch(f"{base_url}/request-indexing?handle=zzstoatzz.io", method="POST") 190 + if not data or "_error" in data or "_http_error" in data: 191 + check("POST valid handle", False, f"got {data}") 192 + else: 193 + check("POST returns handle", data.get("handle") == "zzstoatzz.io", f"got {data.get('handle')}") 194 + check("POST returns did", data.get("did", "").startswith("did:"), f"got {data.get('did')}") 195 + check("POST returns hidden field", "hidden" in data, f"keys: {list(data.keys())}") 196 + 197 + 198 + def test_moderation_filtering(base_url: str): 199 + print("\n--- moderation filtering ---") 200 + data, _ = fetch(f"{base_url}{XRPC_PATH}?q=test&limit=10") 201 + if not data or "_error" in data or "_http_error" in data: 202 + check("fetch succeeded", False) 203 + return 204 + 205 + actors = data.get("actors", []) 206 + has_labels = any("labels" in a for a in actors) 207 + check("no labels field leaked in results", not has_labels) 208 + 209 + allowed_keys = {"did", "handle", "displayName", "avatar"} 210 + extra_keys: set[str] = set() 211 + for a in actors: 212 + extra_keys |= set(a.keys()) - allowed_keys 213 + check("actor objects have clean shape", len(extra_keys) == 0, f"extra keys: {extra_keys}" if extra_keys else "") 214 + 215 + # verify hidden actors are actually excluded by finding one with !no-unauthenticated 216 + # via bsky API and checking it doesn't appear in our results 217 + print("\n--- hidden actor exclusion ---") 218 + found_hidden = find_hidden_actor(base_url) 219 + if not found_hidden: 220 + check("found a hidden actor to verify", False, "couldn't find one — skipping exclusion check") 221 + 222 + 223 + def find_hidden_actor(base_url: str) -> bool: 224 + """find an actor we've indexed that has !no-unauthenticated, verify they're excluded.""" 225 + # search our index for common names and cross-check labels via bsky API 226 + for q in ["alex", "sam", "chris", "jordan"]: 227 + # get actors from bsky that have !no-unauthenticated 228 + bsky_data, _ = fetch(f"{BSKY_PUBLIC}/xrpc/app.bsky.actor.searchActors?q={q}&limit=25") 229 + if not bsky_data or "_error" in bsky_data: 230 + continue 231 + 232 + for actor in bsky_data.get("actors", []): 233 + labels = actor.get("labels", []) 234 + has_noauth = any( 235 + l.get("val") == "!no-unauthenticated" and not l.get("neg") 236 + for l in labels 237 + ) 238 + if not has_noauth: 239 + continue 240 + 241 + handle = actor.get("handle", "") 242 + if not handle: 243 + continue 244 + 245 + # this actor has !no-unauthenticated — check they're NOT in our results 246 + our_data, _ = fetch(f"{base_url}{XRPC_PATH}?q={handle}&limit=10") 247 + if not our_data or "_error" in our_data: 248 + continue 249 + 250 + our_handles = {a.get("handle") for a in our_data.get("actors", [])} 251 + if handle in our_handles: 252 + check(f"hidden actor @{handle} excluded from search", False, "appeared in results") 253 + return True 254 + else: 255 + check(f"hidden actor @{handle} excluded from search", True) 256 + return True 257 + 258 + return False 259 + 260 + 164 261 def test_comparison(base_url: str, queries: list[str]): 165 262 print("\n--- comparison vs public.api.bsky.app ---") 166 263 ··· 209 306 test_empty_query(args.url) 210 307 test_limit_over_max(args.url) 211 308 test_stats_page(args.url) 309 + test_request_indexing(args.url) 310 + test_moderation_filtering(args.url) 212 311 213 312 if args.compare: 214 313 test_comparison(args.url, args.queries)
+214 -42
src/index.ts
··· 29 29 return Response.json(data, { status, headers: CORS_HEADERS }); 30 30 } 31 31 32 + function avatarUrl(did: string, cidOrUrl: string): string { 33 + if (cidOrUrl.startsWith("https://")) return cidOrUrl; 34 + return `https://cdn.bsky.app/img/avatar/plain/${did}/${cidOrUrl}@jpeg`; 35 + } 36 + 37 + function extractAvatarCid(url: string): string { 38 + const match = url.match(/\/([^/]+)@jpeg$/); 39 + return match?.[1] ?? ''; 40 + } 41 + 32 42 /** strip anything that could break FTS5 syntax, preserving unicode letters/digits */ 33 43 function sanitize(q: string): string { 34 44 return q.replace(/[^\p{L}\p{N}\s.-]/gu, "").trim(); ··· 46 56 handle?: string; 47 57 display_name?: string; 48 58 avatar_cid?: string; 59 + hidden?: boolean; 49 60 } 50 61 51 62 interface SlingshotResponse { ··· 57 68 const BSKY_TYPEAHEAD_URL = 58 69 "https://public.api.bsky.app/xrpc/app.bsky.actor.searchActorsTypeahead"; 59 70 71 + const BSKY_MOD_DID = "did:plc:ar7c4by46qjdydhdevvrndac"; 72 + /** labels from bluesky's moderation service that hide an actor */ 73 + const MOD_HIDE_VALS = new Set(["!hide", "!takedown", "spam"]); 74 + /** labels that hide regardless of issuer (protocol-level, self-labeling respected) */ 75 + const ANY_SRC_HIDE_VALS = new Set(["!no-unauthenticated"]); 76 + 77 + /** 78 + * true if actor should be hidden from our unauthenticated search. 79 + * 80 + * two paths: 81 + * 1. bluesky moderation issued !hide or spam → always hide 82 + * 2. anyone (including the actor themselves) issued !no-unauthenticated → hide, 83 + * because our service is unauthenticated and we should respect the user's intent 84 + */ 85 + function shouldHide(labels?: any[]): boolean { 86 + if (!labels) return false; 87 + const now = Date.now(); 88 + return labels.some((l: any) => { 89 + if (l.neg) return false; 90 + if (l.exp && new Date(l.exp).getTime() <= now) return false; 91 + if (l.src === BSKY_MOD_DID && MOD_HIDE_VALS.has(l.val)) return true; 92 + if (ANY_SRC_HIDE_VALS.has(l.val)) return true; 93 + return false; 94 + }); 95 + } 96 + 60 97 // --- backfill: remove this block once at parity with Bluesky --- 61 98 62 99 async function backfillFromBsky( ··· 78 115 // (e.g. actors ingested via Jetstream that lack avatar/displayName) 79 116 const stmts = actors.map((a) => 80 117 env.DB.prepare( 81 - `INSERT INTO actors (did, handle, display_name, avatar_url, updated_at) 82 - VALUES (?1, ?2, ?3, ?4, unixepoch()) 118 + `INSERT INTO actors (did, handle, display_name, avatar_url, hidden, updated_at) 119 + VALUES (?1, ?2, ?3, ?4, ?5, unixepoch()) 83 120 ON CONFLICT(did) DO UPDATE SET 84 121 handle = COALESCE(NULLIF(?2, ''), actors.handle), 85 122 display_name = COALESCE(NULLIF(?3, ''), actors.display_name), 86 123 avatar_url = COALESCE(NULLIF(?4, ''), actors.avatar_url), 124 + hidden = ?5, 87 125 updated_at = unixepoch()` 88 126 ).bind( 89 127 a.did, 90 128 a.handle || '', 91 129 a.displayName || '', 92 - a.avatar || '' 130 + extractAvatarCid(a.avatar || ''), 131 + shouldHide(a.labels) ? 1 : 0 93 132 ) 94 133 ); 95 134 ··· 124 163 `SELECT COUNT(*) AS total, 125 164 SUM(CASE WHEN handle != '' THEN 1 ELSE 0 END) AS with_handles, 126 165 SUM(CASE WHEN avatar_url != '' THEN 1 ELSE 0 END) AS with_avatars 127 - FROM actors` 166 + FROM actors WHERE hidden = 0` 128 167 ).first<{ total: number; with_handles: number; with_avatars: number }>(); 129 168 if (row) { 130 169 await env.DB.prepare( ··· 166 205 } 167 206 } 168 207 208 + const BSKY_GET_PROFILES_URL = 209 + "https://public.api.bsky.app/xrpc/app.bsky.actor.getProfiles"; 210 + 211 + /** refresh moderation labels, walking the full index over multiple cron runs */ 212 + async function refreshModeration(env: Env): Promise<void> { 213 + // resume where we left off (rowid cursor persisted in KV) 214 + const cursorStr = await env.KV.get("mod_cursor"); 215 + const cursor = cursorStr ? Number(cursorStr) : 0; 216 + 217 + const { results } = await env.DB.prepare( 218 + "SELECT rowid, did FROM actors WHERE rowid > ?1 ORDER BY rowid ASC LIMIT 1000" 219 + ).bind(cursor).all<{ rowid: number; did: string }>(); 220 + 221 + if (!results || results.length === 0) { 222 + // wrapped around — reset cursor for next run 223 + await env.KV.put("mod_cursor", "0"); 224 + console.log(JSON.stringify({ event: "moderation_refresh", status: "wrapped", cursor })); 225 + return; 226 + } 227 + 228 + let checked = 0; 229 + let changed = 0; 230 + 231 + // batch into groups of 25 (getProfiles limit), ~200ms pause between calls 232 + for (let i = 0; i < results.length; i += 25) { 233 + const batch = results.slice(i, i + 25); 234 + const params = batch.map((r) => `actors=${encodeURIComponent(r.did)}`).join("&"); 235 + try { 236 + if (i > 0) await new Promise((r) => setTimeout(r, 200)); 237 + 238 + const res = await fetch(`${BSKY_GET_PROFILES_URL}?${params}`); 239 + if (res.status === 429) { 240 + // save progress and bail — pick up here next run 241 + const lastRowid = results[Math.max(0, i - 1)].rowid; 242 + await env.KV.put("mod_cursor", String(lastRowid)); 243 + console.log(JSON.stringify({ event: "moderation_refresh", status: "rate_limited", checked, changed, cursor: lastRowid })); 244 + return; 245 + } 246 + if (!res.ok) continue; 247 + 248 + const data: any = await res.json(); 249 + const profiles: any[] = data.profiles || []; 250 + checked += profiles.length; 251 + 252 + const stmts: D1PreparedStatement[] = []; 253 + for (const p of profiles) { 254 + const hide = shouldHide(p.labels) ? 1 : 0; 255 + stmts.push( 256 + env.DB.prepare( 257 + "UPDATE actors SET hidden = ?1 WHERE did = ?2 AND hidden != ?1" 258 + ).bind(hide, p.did) 259 + ); 260 + } 261 + if (stmts.length > 0) { 262 + const batchResults = await env.DB.batch(stmts); 263 + changed += batchResults.filter((r) => r.meta.changes > 0).length; 264 + } 265 + } catch { 266 + // best-effort — skip failures 267 + } 268 + } 269 + 270 + // save cursor at end of this page 271 + const lastRowid = results[results.length - 1].rowid; 272 + await env.KV.put("mod_cursor", String(lastRowid)); 273 + console.log(JSON.stringify({ event: "moderation_refresh", checked, changed, cursor: lastRowid })); 274 + } 275 + 169 276 /** fire-and-forget: increment hourly search count + accumulate response time */ 170 277 async function recordMetric(env: Env, ms: number): Promise<void> { 171 278 const hour = Math.floor(Date.now() / 3_600_000); ··· 221 328 `SELECT a.did, a.handle, a.display_name, a.avatar_url 222 329 FROM actors_fts 223 330 JOIN actors a ON a.rowid = actors_fts.rowid 224 - WHERE actors_fts MATCH ?1 AND a.handle != '' 331 + WHERE actors_fts MATCH ?1 AND a.handle != '' AND a.hidden = 0 225 332 ORDER BY rank 226 333 LIMIT ?2` 227 334 ) ··· 233 340 did: r.did, 234 341 handle: r.handle, 235 342 ...(r.display_name ? { displayName: r.display_name } : {}), 236 - ...(r.avatar_url ? { avatar: r.avatar_url } : {}), 343 + ...(r.avatar_url ? { avatar: avatarUrl(r.did, r.avatar_url) } : {}), 237 344 })); 238 345 239 346 // --- backfill: remove this block once at parity with Bluesky --- ··· 284 391 285 392 // batch upsert — use COALESCE to preserve existing fields on partial updates 286 393 const stmts = events.map((e) => { 287 - const avatarUrl = e.avatar_cid 288 - ? `https://cdn.bsky.app/img/avatar/plain/${e.did}/${e.avatar_cid}@jpeg` 289 - : null; 394 + const avatarCid = e.avatar_cid || null; 395 + const hidden = e.hidden !== undefined ? (e.hidden ? 1 : 0) : null; 290 396 return env.DB.prepare( 291 - `INSERT INTO actors (did, handle, display_name, avatar_url, updated_at) 292 - VALUES (?1, ?2, ?3, ?4, unixepoch()) 397 + `INSERT INTO actors (did, handle, display_name, avatar_url, hidden, updated_at) 398 + VALUES (?1, ?2, ?3, ?4, COALESCE(?5, 0), unixepoch()) 293 399 ON CONFLICT(did) DO UPDATE SET 294 400 handle = COALESCE(NULLIF(?2, ''), actors.handle), 295 401 display_name = COALESCE(NULLIF(?3, ''), actors.display_name), 296 402 avatar_url = COALESCE(NULLIF(?4, ''), actors.avatar_url), 403 + hidden = COALESCE(?5, actors.hidden), 297 404 updated_at = unixepoch()` 298 405 ).bind( 299 406 e.did, 300 407 e.handle || '', 301 408 e.display_name || '', 302 - avatarUrl || '' 409 + avatarCid || '', 410 + hidden 303 411 ); 304 412 }); 305 413 ··· 384 492 ""; 385 493 386 494 if (!identifier) { 387 - return html(indexPage("enter a handle or DID to request indexing.")); 495 + return json({ error: "enter a handle or DID to request indexing." }, 400); 388 496 } 389 497 390 498 // resolve via slingshot ··· 392 500 `${SLINGSHOT_URL}?identifier=${encodeURIComponent(identifier)}` 393 501 ); 394 502 if (!res.ok) { 395 - return html(indexPage(`could not resolve "${escHtml(identifier)}". check that it's a valid handle or DID.`)); 503 + return json({ error: `could not resolve "${identifier}". check that it's a valid handle or DID.` }, 404); 396 504 } 397 505 398 506 const identity: SlingshotResponse = await res.json(); 399 507 400 - // fetch profile from public API for display name + avatar 508 + // fetch profile from public API for display name + avatar + labels 401 509 let displayName = ""; 402 - let avatarUrl = ""; 510 + let avatarCid = ""; 511 + let hidden = false; 403 512 try { 404 513 const profileRes = await fetch( 405 514 `https://public.api.bsky.app/xrpc/app.bsky.actor.getProfile?actor=${encodeURIComponent(identity.did)}` ··· 407 516 if (profileRes.ok) { 408 517 const profile: any = await profileRes.json(); 409 518 displayName = profile.displayName || ""; 410 - avatarUrl = profile.avatar || ""; 519 + avatarCid = extractAvatarCid(profile.avatar || ""); 520 + hidden = shouldHide(profile.labels); 411 521 } 412 522 } catch { 413 523 // profile enrichment is best-effort 414 524 } 415 525 416 526 await env.DB.prepare( 417 - `INSERT INTO actors (did, handle, display_name, avatar_url, updated_at) 418 - VALUES (?1, ?2, ?3, ?4, unixepoch()) 527 + `INSERT INTO actors (did, handle, display_name, avatar_url, hidden, updated_at) 528 + VALUES (?1, ?2, ?3, ?4, ?5, unixepoch()) 419 529 ON CONFLICT(did) DO UPDATE SET 420 530 handle = ?2, 421 531 display_name = COALESCE(NULLIF(?3, ''), actors.display_name), 422 532 avatar_url = COALESCE(NULLIF(?4, ''), actors.avatar_url), 533 + hidden = ?5, 423 534 updated_at = unixepoch()` 424 535 ) 425 - .bind(identity.did, identity.handle, displayName, avatarUrl) 536 + .bind(identity.did, identity.handle, displayName, avatarCid, hidden ? 1 : 0) 426 537 .run(); 427 538 428 - return html( 429 - indexPage(`indexed <strong>@${escHtml(identity.handle)}</strong> (${escHtml(identity.did)})`) 430 - ); 539 + return json({ handle: identity.handle, did: identity.did, hidden }); 431 540 } 432 541 433 542 async function handleStats(env: Env): Promise<Response> { 434 - const [totalRes, handlesRes, avatarsRes, metricsRes, snapshotRes] = 543 + const [totalRes, handlesRes, avatarsRes, hiddenRes, metricsRes, snapshotRes] = 435 544 await env.DB.batch([ 436 - env.DB.prepare("SELECT COUNT(*) AS cnt FROM actors"), 437 - env.DB.prepare("SELECT COUNT(*) AS cnt FROM actors WHERE handle != ''"), 438 - env.DB.prepare("SELECT COUNT(*) AS cnt FROM actors WHERE avatar_url != ''"), 545 + env.DB.prepare("SELECT COUNT(*) AS cnt FROM actors WHERE hidden = 0"), 546 + env.DB.prepare("SELECT COUNT(*) AS cnt FROM actors WHERE handle != '' AND hidden = 0"), 547 + env.DB.prepare("SELECT COUNT(*) AS cnt FROM actors WHERE avatar_url != '' AND hidden = 0"), 548 + env.DB.prepare("SELECT COUNT(*) AS cnt FROM actors WHERE hidden = 1"), 439 549 env.DB.prepare( 440 550 "SELECT hour, searches, total_ms FROM metrics ORDER BY hour DESC LIMIT 168" 441 551 ), ··· 447 557 const total = (totalRes.results[0] as any)?.cnt ?? 0; 448 558 const withHandles = (handlesRes.results[0] as any)?.cnt ?? 0; 449 559 const withAvatars = (avatarsRes.results[0] as any)?.cnt ?? 0; 560 + const hiddenCount = (hiddenRes.results[0] as any)?.cnt ?? 0; 450 561 const rows = (metricsRes.results ?? []) as { 451 562 hour: number; 452 563 searches: number; ··· 466 577 const handlePct = total > 0 ? ((withHandles / total) * 100).toFixed(1) : "0"; 467 578 const avatarPct = total > 0 ? ((withAvatars / total) * 100).toFixed(1) : "0"; 468 579 469 - return html(statsPage({ total, rows, totalSearches, avgLatency, handlePct, avatarPct, snapshots })); 580 + return html(statsPage({ total, hiddenCount, rows, totalSearches, avgLatency, handlePct, avatarPct, snapshots })); 470 581 } 471 582 472 583 interface SnapshotPoint { ··· 478 589 479 590 interface StatsData { 480 591 total: number; 592 + hiddenCount: number; 481 593 rows: { hour: number; searches: number; total_ms: number }[]; 482 594 totalSearches: number; 483 595 avgLatency: number; ··· 617 729 <div class="label" data-tip="% of indexed actors with a profile image">avatar coverage</div> 618 730 <div class="value">${d.avatarPct}%</div> 619 731 </div> 732 + <div class="metric"> 733 + <div class="label" data-tip="actors hidden from search by bluesky moderation labels">hidden by moderation</div> 734 + <div class="value">${d.hiddenCount.toLocaleString()}</div> 735 + </div> 620 736 </div> 621 737 622 738 <footer> ··· 816 932 </html>`; 817 933 } 818 934 819 - function indexPage(message?: string): string { 935 + function indexPage(): string { 820 936 return `<!doctype html> 821 937 <html> 822 938 <head> ··· 870 986 button { padding: 0.6rem 1rem; background: #2a2a2a; border: 1px solid #333; 871 987 border-radius: 6px; color: #e0e0e0; font-size: 0.9rem; cursor: pointer; } 872 988 button:hover { background: #333; } 873 - .msg { margin-top: 1rem; padding: 0.8rem; background: #1a1a1a; border-radius: 6px; 874 - font-size: 0.85rem; line-height: 1.4; } 989 + button:disabled { opacity: 0.5; cursor: default; } 990 + .msg { margin-top: 0.75rem; padding: 0.6rem 0.8rem; background: #1a1a1a; border-radius: 6px; 991 + font-size: 0.85rem; line-height: 1.4; display: flex; justify-content: space-between; 992 + align-items: start; gap: 0.5rem; } 993 + .msg.error { border-left: 2px solid #a55; } 994 + .msg .dismiss { color: #555; cursor: pointer; font-size: 0.75rem; flex-shrink: 0; 995 + background: none; border: none; padding: 0; } 996 + .msg .dismiss:hover { color: #888; } 875 997 .api { margin-bottom: 1.5rem; background: #111; border: 1px solid #222; border-radius: 6px; 876 998 padding: 0.5rem 0.7rem; display: flex; align-items: center; gap: 0.5rem; 877 999 overflow-x: auto; white-space: nowrap; } ··· 902 1024 903 1025 <section> 904 1026 <label>request indexing</label> 905 - <form class="index-form" method="get" action="/request-indexing"> 906 - <input name="handle" placeholder="handle or DID" autocomplete="off"> 907 - <button type="submit">index</button> 1027 + <form class="index-form" id="index-form"> 1028 + <input id="handle-input" placeholder="handle or DID" autocomplete="off"> 1029 + <button type="submit" id="index-btn">index</button> 908 1030 </form> 909 - ${message ? `<div class="msg">${message}</div>` : ""} 1031 + <div id="index-msg"></div> 910 1032 </section> 911 1033 912 1034 <div class="api"> ··· 949 1071 document.addEventListener('click', e => { if (!e.target.closest('.search-wrap')) results.classList.remove('show'); }); 950 1072 q.addEventListener('focus', () => { if (results.innerHTML) results.classList.add('show'); }); 951 1073 function esc(s) { const d = document.createElement('div'); d.textContent = s; return d.innerHTML; } 1074 + 1075 + // request indexing form 1076 + const indexForm = document.getElementById('index-form'); 1077 + const handleInput = document.getElementById('handle-input'); 1078 + const indexBtn = document.getElementById('index-btn'); 1079 + const indexMsg = document.getElementById('index-msg'); 1080 + 1081 + function showMsg(text, isError) { 1082 + indexMsg.innerHTML = '<div class="msg' + (isError ? ' error' : '') + '">' + 1083 + '<span>' + text + '</span>' + 1084 + '<button class="dismiss" onclick="dismissMsg()">\\u00d7</button></div>'; 1085 + } 1086 + 1087 + function dismissMsg() { 1088 + indexMsg.innerHTML = ''; 1089 + } 1090 + 1091 + indexForm.addEventListener('submit', async (e) => { 1092 + e.preventDefault(); 1093 + const val = handleInput.value.trim(); 1094 + if (!val) { showMsg('enter a handle or DID.', true); return; } 1095 + 1096 + indexBtn.disabled = true; 1097 + indexBtn.textContent = '...'; 1098 + dismissMsg(); 1099 + 1100 + try { 1101 + const r = await fetch('/request-indexing?handle=' + encodeURIComponent(val), { method: 'POST' }); 1102 + const data = await r.json(); 1103 + if (data.error) { 1104 + showMsg(esc(data.error), true); 1105 + } else { 1106 + const hidden = data.hidden ? ' <em style="color:#886">(hidden by moderation)</em>' : ''; 1107 + showMsg('indexed <strong>@' + esc(data.handle) + '</strong>' + hidden, false); 1108 + handleInput.value = ''; 1109 + } 1110 + } catch { 1111 + showMsg('something went wrong — try again.', true); 1112 + } finally { 1113 + indexBtn.disabled = false; 1114 + indexBtn.textContent = 'index'; 1115 + } 1116 + }); 952 1117 </script> 953 1118 </body> 954 1119 </html>`; ··· 964 1129 export default { 965 1130 async scheduled(_event: ScheduledEvent, env: Env, _ctx: ExecutionContext): Promise<void> { 966 1131 await recordSnapshot(env); 1132 + await refreshModeration(env); 967 1133 await resolveHandles(env); 968 1134 }, 969 1135 ··· 982 1148 return handleStats(env); 983 1149 } 984 1150 985 - if (pathname === "/request-indexing" && request.method === "GET") { 986 - const ip = clientIP(request); 987 - const { success } = await env.RATE_LIMITER.limit({ key: `index:${ip}` }); 988 - if (!success) { 989 - console.log(JSON.stringify({ event: "rate_limited", endpoint: "/request-indexing", ip })); 990 - return html(indexPage("slow down — try again in a minute."), 429); 1151 + if (pathname === "/request-indexing") { 1152 + if (request.method === "GET") { 1153 + // old bookmarks / form fallback — redirect to homepage 1154 + return new Response(null, { status: 302, headers: { Location: "/" } }); 991 1155 } 992 - return handleRequestIndexing(request, env); 1156 + if (request.method === "POST") { 1157 + const ip = clientIP(request); 1158 + const { success } = await env.RATE_LIMITER.limit({ key: `index:${ip}` }); 1159 + if (!success) { 1160 + console.log(JSON.stringify({ event: "rate_limited", endpoint: "/request-indexing", ip })); 1161 + return json({ error: "slow down — try again in a minute." }, 429); 1162 + } 1163 + return handleRequestIndexing(request, env); 1164 + } 993 1165 } 994 1166 995 1167 if (