GET /xrpc/app.bsky.actor.searchActorsTypeahead typeahead.waow.tech
16
fork

Configure Feed

Select the types of activity you want to include in your feed.

remove D1, simplify to Turso-only storage

D1 abstraction layer (d1Db, dualWriteDb, getBackend, createDb,
StorageBackend) was migration scaffolding — Turso is serving all
traffic. also adds @libsql/client dep, gitignores .dev.vars,
updates smoke tests for !no-unauthenticated inclusion policy,
and includes the migration script for historical reference.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

+891 -112
+1
.gitignore
··· 3 3 .zig-cache/ 4 4 zig-out/ 5 5 zig-cache/ 6 + .dev.vars
+2 -2
README.md
··· 13 13 ``` 14 14 jetstream → ingester (zig, fly.io) → worker (cloudflare) 15 15 16 - D1 (sqlite/FTS5) 16 + Turso (libSQL/FTS5) 17 17 ``` 18 18 19 19 - **ingester**: [zig](https://ziglang.org) on [fly.io](https://fly.io) — streams identity + profile events via [jetstream](https://docs.bsky.app/blog/jetstream), batches to worker 20 - - **worker**: [cloudflare worker](https://workers.cloudflare.com) + D1 + KV + cache API — FTS5 prefix search, edge-cached (60s), rate-limited 20 + - **worker**: [cloudflare worker](https://workers.cloudflare.com) + [Turso](https://turso.tech) + KV + cache API — FTS5 prefix search, edge-cached (60s), rate-limited 21 21 - **identity**: [slingshot](https://microcosm.blue) for on-demand handle resolution 22 22 23 23 ## dev
+8 -11
docs/architecture.md
··· 14 14 v 15 15 worker (cloudflare) 16 16 | 17 - +---> D1 (actors table + FTS5 index) 18 - +---> KV (cursor, config flags, mod_cursor) 17 + +---> Turso (actors table + FTS5 index) 18 + +---> KV (cursor, mod_cursor) 19 19 +---> cache API (60s edge cache for search) 20 20 ``` 21 21 ··· 57 57 58 58 plus FTS5 index overhead. roughly ~280 bytes/row total. 59 59 60 - D1 has a 10GB hard limit (non-negotiable). at current row size that's ~35M 61 - actors. D1 is designed for per-tenant databases, not single large datasets — 62 - sharding a global search index across multiple D1s degrades FTS5 ranking 63 - (scores aren't comparable across shards) and fans out every query. 64 - 65 - when we outgrow D1, the natural move is **Turso** (hosted libSQL). our sibling 66 - project [leaflet-search](https://tangled.org/zzstoatzz.io/leaflet-search) 67 - already runs Turso + local SQLite read replica in production for FTS5 search. 68 - the schema and queries would port with minimal changes. 60 + storage is [Turso](https://turso.tech) (hosted libSQL). previously used 61 + Cloudflare D1 but migrated to Turso to avoid D1's 10GB hard limit and 62 + per-tenant design constraints that made sharding a global FTS5 index 63 + impractical. 69 64 70 65 ## moderation 71 66 ··· 86 81 existing actors (run once after adding moderation support) 87 82 - `scripts/migrate-avatar-cid.sql` — one-shot migration from full avatar 88 83 URLs to bare CIDs (already applied to production) 84 + - `scripts/migrate-to-turso.py` — one-shot D1-to-Turso data migration 85 + (already applied to production)
+389 -1
package-lock.json
··· 5 5 "packages": { 6 6 "": { 7 7 "name": "typeahead", 8 + "dependencies": { 9 + "@libsql/client": "^0.17.0" 10 + }, 8 11 "devDependencies": { 9 12 "wrangler": "^4" 10 13 } ··· 1104 1107 "@jridgewell/sourcemap-codec": "^1.4.10" 1105 1108 } 1106 1109 }, 1110 + "node_modules/@libsql/client": { 1111 + "version": "0.17.0", 1112 + "resolved": "https://registry.npmjs.org/@libsql/client/-/client-0.17.0.tgz", 1113 + "integrity": "sha512-TLjSU9Otdpq0SpKHl1tD1Nc9MKhrsZbCFGot3EbCxRa8m1E5R1mMwoOjKMMM31IyF7fr+hPNHLpYfwbMKNusmg==", 1114 + "license": "MIT", 1115 + "dependencies": { 1116 + "@libsql/core": "^0.17.0", 1117 + "@libsql/hrana-client": "^0.9.0", 1118 + "js-base64": "^3.7.5", 1119 + "libsql": "^0.5.22", 1120 + "promise-limit": "^2.7.0" 1121 + } 1122 + }, 1123 + "node_modules/@libsql/core": { 1124 + "version": "0.17.0", 1125 + "resolved": "https://registry.npmjs.org/@libsql/core/-/core-0.17.0.tgz", 1126 + "integrity": "sha512-hnZRnJHiS+nrhHKLGYPoJbc78FE903MSDrFJTbftxo+e52X+E0Y0fHOCVYsKWcg6XgB7BbJYUrz/xEkVTSaipw==", 1127 + "license": "MIT", 1128 + "dependencies": { 1129 + "js-base64": "^3.7.5" 1130 + } 1131 + }, 1132 + "node_modules/@libsql/darwin-arm64": { 1133 + "version": "0.5.22", 1134 + "resolved": "https://registry.npmjs.org/@libsql/darwin-arm64/-/darwin-arm64-0.5.22.tgz", 1135 + "integrity": "sha512-4B8ZlX3nIDPndfct7GNe0nI3Yw6ibocEicWdC4fvQbSs/jdq/RC2oCsoJxJ4NzXkvktX70C1J4FcmmoBy069UA==", 1136 + "cpu": [ 1137 + "arm64" 1138 + ], 1139 + "license": "MIT", 1140 + "optional": true, 1141 + "os": [ 1142 + "darwin" 1143 + ] 1144 + }, 1145 + "node_modules/@libsql/darwin-x64": { 1146 + "version": "0.5.22", 1147 + "resolved": "https://registry.npmjs.org/@libsql/darwin-x64/-/darwin-x64-0.5.22.tgz", 1148 + "integrity": "sha512-ny2HYWt6lFSIdNFzUFIJ04uiW6finXfMNJ7wypkAD8Pqdm6nAByO+Fdqu8t7sD0sqJGeUCiOg480icjyQ2/8VA==", 1149 + "cpu": [ 1150 + "x64" 1151 + ], 1152 + "license": "MIT", 1153 + "optional": true, 1154 + "os": [ 1155 + "darwin" 1156 + ] 1157 + }, 1158 + "node_modules/@libsql/hrana-client": { 1159 + "version": "0.9.0", 1160 + "resolved": "https://registry.npmjs.org/@libsql/hrana-client/-/hrana-client-0.9.0.tgz", 1161 + "integrity": "sha512-pxQ1986AuWfPX4oXzBvLwBnfgKDE5OMhAdR/5cZmRaB4Ygz5MecQybvwZupnRz341r2CtFmbk/BhSu7k2Lm+Jw==", 1162 + "license": "MIT", 1163 + "dependencies": { 1164 + "@libsql/isomorphic-ws": "^0.1.5", 1165 + "cross-fetch": "^4.0.0", 1166 + "js-base64": "^3.7.5", 1167 + "node-fetch": "^3.3.2" 1168 + } 1169 + }, 1170 + "node_modules/@libsql/isomorphic-ws": { 1171 + "version": "0.1.5", 1172 + "resolved": "https://registry.npmjs.org/@libsql/isomorphic-ws/-/isomorphic-ws-0.1.5.tgz", 1173 + "integrity": "sha512-DtLWIH29onUYR00i0GlQ3UdcTRC6EP4u9w/h9LxpUZJWRMARk6dQwZ6Jkd+QdwVpuAOrdxt18v0K2uIYR3fwFg==", 1174 + "license": "MIT", 1175 + "dependencies": { 1176 + "@types/ws": "^8.5.4", 1177 + "ws": "^8.13.0" 1178 + } 1179 + }, 1180 + "node_modules/@libsql/linux-arm-gnueabihf": { 1181 + "version": "0.5.22", 1182 + "resolved": "https://registry.npmjs.org/@libsql/linux-arm-gnueabihf/-/linux-arm-gnueabihf-0.5.22.tgz", 1183 + "integrity": "sha512-3Uo3SoDPJe/zBnyZKosziRGtszXaEtv57raWrZIahtQDsjxBVjuzYQinCm9LRCJCUT5t2r5Z5nLDPJi2CwZVoA==", 1184 + "cpu": [ 1185 + "arm" 1186 + ], 1187 + "license": "MIT", 1188 + "optional": true, 1189 + "os": [ 1190 + "linux" 1191 + ] 1192 + }, 1193 + "node_modules/@libsql/linux-arm-musleabihf": { 1194 + "version": "0.5.22", 1195 + "resolved": "https://registry.npmjs.org/@libsql/linux-arm-musleabihf/-/linux-arm-musleabihf-0.5.22.tgz", 1196 + "integrity": "sha512-LCsXh07jvSojTNJptT9CowOzwITznD+YFGGW+1XxUr7fS+7/ydUrpDfsMX7UqTqjm7xG17eq86VkWJgHJfvpNg==", 1197 + "cpu": [ 1198 + "arm" 1199 + ], 1200 + "license": "MIT", 1201 + "optional": true, 1202 + "os": [ 1203 + "linux" 1204 + ] 1205 + }, 1206 + "node_modules/@libsql/linux-arm64-gnu": { 1207 + "version": "0.5.22", 1208 + "resolved": "https://registry.npmjs.org/@libsql/linux-arm64-gnu/-/linux-arm64-gnu-0.5.22.tgz", 1209 + "integrity": "sha512-KSdnOMy88c9mpOFKUEzPskSaF3VLflfSUCBwas/pn1/sV3pEhtMF6H8VUCd2rsedwoukeeCSEONqX7LLnQwRMA==", 1210 + "cpu": [ 1211 + "arm64" 1212 + ], 1213 + "license": "MIT", 1214 + "optional": true, 1215 + "os": [ 1216 + "linux" 1217 + ] 1218 + }, 1219 + "node_modules/@libsql/linux-arm64-musl": { 1220 + "version": "0.5.22", 1221 + "resolved": "https://registry.npmjs.org/@libsql/linux-arm64-musl/-/linux-arm64-musl-0.5.22.tgz", 1222 + "integrity": "sha512-mCHSMAsDTLK5YH//lcV3eFEgiR23Ym0U9oEvgZA0667gqRZg/2px+7LshDvErEKv2XZ8ixzw3p1IrBzLQHGSsw==", 1223 + "cpu": [ 1224 + "arm64" 1225 + ], 1226 + "license": "MIT", 1227 + "optional": true, 1228 + "os": [ 1229 + "linux" 1230 + ] 1231 + }, 1232 + "node_modules/@libsql/linux-x64-gnu": { 1233 + "version": "0.5.22", 1234 + "resolved": "https://registry.npmjs.org/@libsql/linux-x64-gnu/-/linux-x64-gnu-0.5.22.tgz", 1235 + "integrity": "sha512-kNBHaIkSg78Y4BqAdgjcR2mBilZXs4HYkAmi58J+4GRwDQZh5fIUWbnQvB9f95DkWUIGVeenqLRFY2pcTmlsew==", 1236 + "cpu": [ 1237 + "x64" 1238 + ], 1239 + "license": "MIT", 1240 + "optional": true, 1241 + "os": [ 1242 + "linux" 1243 + ] 1244 + }, 1245 + "node_modules/@libsql/linux-x64-musl": { 1246 + "version": "0.5.22", 1247 + "resolved": "https://registry.npmjs.org/@libsql/linux-x64-musl/-/linux-x64-musl-0.5.22.tgz", 1248 + "integrity": "sha512-UZ4Xdxm4pu3pQXjvfJiyCzZop/9j/eA2JjmhMaAhe3EVLH2g11Fy4fwyUp9sT1QJYR1kpc2JLuybPM0kuXv/Tg==", 1249 + "cpu": [ 1250 + "x64" 1251 + ], 1252 + "license": "MIT", 1253 + "optional": true, 1254 + "os": [ 1255 + "linux" 1256 + ] 1257 + }, 1258 + "node_modules/@libsql/win32-x64-msvc": { 1259 + "version": "0.5.22", 1260 + "resolved": "https://registry.npmjs.org/@libsql/win32-x64-msvc/-/win32-x64-msvc-0.5.22.tgz", 1261 + "integrity": "sha512-Fj0j8RnBpo43tVZUVoNK6BV/9AtDUM5S7DF3LB4qTYg1LMSZqi3yeCneUTLJD6XomQJlZzbI4mst89yspVSAnA==", 1262 + "cpu": [ 1263 + "x64" 1264 + ], 1265 + "license": "MIT", 1266 + "optional": true, 1267 + "os": [ 1268 + "win32" 1269 + ] 1270 + }, 1271 + "node_modules/@neon-rs/load": { 1272 + "version": "0.0.4", 1273 + "resolved": "https://registry.npmjs.org/@neon-rs/load/-/load-0.0.4.tgz", 1274 + "integrity": "sha512-kTPhdZyTQxB+2wpiRcFWrDcejc4JI6tkPuS7UZCG4l6Zvc5kU/gGQ/ozvHTh1XR5tS+UlfAfGuPajjzQjCiHCw==", 1275 + "license": "MIT" 1276 + }, 1107 1277 "node_modules/@poppinss/colors": { 1108 1278 "version": "4.1.6", 1109 1279 "resolved": "https://registry.npmjs.org/@poppinss/colors/-/colors-4.1.6.tgz", ··· 1153 1323 "dev": true, 1154 1324 "license": "CC0-1.0" 1155 1325 }, 1326 + "node_modules/@types/node": { 1327 + "version": "25.5.0", 1328 + "resolved": "https://registry.npmjs.org/@types/node/-/node-25.5.0.tgz", 1329 + "integrity": "sha512-jp2P3tQMSxWugkCUKLRPVUpGaL5MVFwF8RDuSRztfwgN1wmqJeMSbKlnEtQqU8UrhTmzEmZdu2I6v2dpp7XIxw==", 1330 + "license": "MIT", 1331 + "dependencies": { 1332 + "undici-types": "~7.18.0" 1333 + } 1334 + }, 1335 + "node_modules/@types/ws": { 1336 + "version": "8.18.1", 1337 + "resolved": "https://registry.npmjs.org/@types/ws/-/ws-8.18.1.tgz", 1338 + "integrity": "sha512-ThVF6DCVhA8kUGy+aazFQ4kXQ7E1Ty7A3ypFOe0IcJV8O/M511G99AW24irKrW56Wt44yG9+ij8FaqoBGkuBXg==", 1339 + "license": "MIT", 1340 + "dependencies": { 1341 + "@types/node": "*" 1342 + } 1343 + }, 1156 1344 "node_modules/blake3-wasm": { 1157 1345 "version": "2.1.5", 1158 1346 "resolved": "https://registry.npmjs.org/blake3-wasm/-/blake3-wasm-2.1.5.tgz", ··· 1174 1362 "url": "https://opencollective.com/express" 1175 1363 } 1176 1364 }, 1365 + "node_modules/cross-fetch": { 1366 + "version": "4.1.0", 1367 + "resolved": "https://registry.npmjs.org/cross-fetch/-/cross-fetch-4.1.0.tgz", 1368 + "integrity": "sha512-uKm5PU+MHTootlWEY+mZ4vvXoCn4fLQxT9dSc1sXVMSFkINTJVN8cAQROpwcKm8bJ/c7rgZVIBWzH5T78sNZZw==", 1369 + "license": "MIT", 1370 + "dependencies": { 1371 + "node-fetch": "^2.7.0" 1372 + } 1373 + }, 1374 + "node_modules/cross-fetch/node_modules/node-fetch": { 1375 + "version": "2.7.0", 1376 + "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.7.0.tgz", 1377 + "integrity": "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==", 1378 + "license": "MIT", 1379 + "dependencies": { 1380 + "whatwg-url": "^5.0.0" 1381 + }, 1382 + "engines": { 1383 + "node": "4.x || >=6.0.0" 1384 + }, 1385 + "peerDependencies": { 1386 + "encoding": "^0.1.0" 1387 + }, 1388 + "peerDependenciesMeta": { 1389 + "encoding": { 1390 + "optional": true 1391 + } 1392 + } 1393 + }, 1394 + "node_modules/data-uri-to-buffer": { 1395 + "version": "4.0.1", 1396 + "resolved": "https://registry.npmjs.org/data-uri-to-buffer/-/data-uri-to-buffer-4.0.1.tgz", 1397 + "integrity": "sha512-0R9ikRb668HB7QDxT1vkpuUBtqc53YyAwMwGeUFKRojY/NWKvdZ+9UYtRfGmhqNbRkTSVpMbmyhXipFFv2cb/A==", 1398 + "license": "MIT", 1399 + "engines": { 1400 + "node": ">= 12" 1401 + } 1402 + }, 1177 1403 "node_modules/detect-libc": { 1178 1404 "version": "2.1.2", 1179 1405 "resolved": "https://registry.npmjs.org/detect-libc/-/detect-libc-2.1.2.tgz", ··· 1236 1462 "@esbuild/win32-x64": "0.27.3" 1237 1463 } 1238 1464 }, 1465 + "node_modules/fetch-blob": { 1466 + "version": "3.2.0", 1467 + "resolved": "https://registry.npmjs.org/fetch-blob/-/fetch-blob-3.2.0.tgz", 1468 + "integrity": "sha512-7yAQpD2UMJzLi1Dqv7qFYnPbaPx7ZfFK6PiIxQ4PfkGPyNyl2Ugx+a/umUonmKqjhM4DnfbMvdX6otXq83soQQ==", 1469 + "funding": [ 1470 + { 1471 + "type": "github", 1472 + "url": "https://github.com/sponsors/jimmywarting" 1473 + }, 1474 + { 1475 + "type": "paypal", 1476 + "url": "https://paypal.me/jimmywarting" 1477 + } 1478 + ], 1479 + "license": "MIT", 1480 + "dependencies": { 1481 + "node-domexception": "^1.0.0", 1482 + "web-streams-polyfill": "^3.0.3" 1483 + }, 1484 + "engines": { 1485 + "node": "^12.20 || >= 14.13" 1486 + } 1487 + }, 1488 + "node_modules/formdata-polyfill": { 1489 + "version": "4.0.10", 1490 + "resolved": "https://registry.npmjs.org/formdata-polyfill/-/formdata-polyfill-4.0.10.tgz", 1491 + "integrity": "sha512-buewHzMvYL29jdeQTVILecSaZKnt/RJWjoZCF5OW60Z67/GmSLBkOFM7qh1PI3zFNtJbaZL5eQu1vLfazOwj4g==", 1492 + "license": "MIT", 1493 + "dependencies": { 1494 + "fetch-blob": "^3.1.2" 1495 + }, 1496 + "engines": { 1497 + "node": ">=12.20.0" 1498 + } 1499 + }, 1239 1500 "node_modules/fsevents": { 1240 1501 "version": "2.3.3", 1241 1502 "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz", ··· 1251 1512 "node": "^8.16.0 || ^10.6.0 || >=11.0.0" 1252 1513 } 1253 1514 }, 1515 + "node_modules/js-base64": { 1516 + "version": "3.7.8", 1517 + "resolved": "https://registry.npmjs.org/js-base64/-/js-base64-3.7.8.tgz", 1518 + "integrity": "sha512-hNngCeKxIUQiEUN3GPJOkz4wF/YvdUdbNL9hsBcMQTkKzboD7T/q3OYOuuPZLUE6dBxSGpwhk5mwuDud7JVAow==", 1519 + "license": "BSD-3-Clause" 1520 + }, 1254 1521 "node_modules/kleur": { 1255 1522 "version": "4.1.5", 1256 1523 "resolved": "https://registry.npmjs.org/kleur/-/kleur-4.1.5.tgz", ··· 1261 1528 "node": ">=6" 1262 1529 } 1263 1530 }, 1531 + "node_modules/libsql": { 1532 + "version": "0.5.22", 1533 + "resolved": "https://registry.npmjs.org/libsql/-/libsql-0.5.22.tgz", 1534 + "integrity": "sha512-NscWthMQt7fpU8lqd7LXMvT9pi+KhhmTHAJWUB/Lj6MWa0MKFv0F2V4C6WKKpjCVZl0VwcDz4nOI3CyaT1DDiA==", 1535 + "cpu": [ 1536 + "x64", 1537 + "arm64", 1538 + "wasm32", 1539 + "arm" 1540 + ], 1541 + "license": "MIT", 1542 + "os": [ 1543 + "darwin", 1544 + "linux", 1545 + "win32" 1546 + ], 1547 + "dependencies": { 1548 + "@neon-rs/load": "^0.0.4", 1549 + "detect-libc": "2.0.2" 1550 + }, 1551 + "optionalDependencies": { 1552 + "@libsql/darwin-arm64": "0.5.22", 1553 + "@libsql/darwin-x64": "0.5.22", 1554 + "@libsql/linux-arm-gnueabihf": "0.5.22", 1555 + "@libsql/linux-arm-musleabihf": "0.5.22", 1556 + "@libsql/linux-arm64-gnu": "0.5.22", 1557 + "@libsql/linux-arm64-musl": "0.5.22", 1558 + "@libsql/linux-x64-gnu": "0.5.22", 1559 + "@libsql/linux-x64-musl": "0.5.22", 1560 + "@libsql/win32-x64-msvc": "0.5.22" 1561 + } 1562 + }, 1563 + "node_modules/libsql/node_modules/detect-libc": { 1564 + "version": "2.0.2", 1565 + "resolved": "https://registry.npmjs.org/detect-libc/-/detect-libc-2.0.2.tgz", 1566 + "integrity": "sha512-UX6sGumvvqSaXgdKGUsgZWqcUyIXZ/vZTrlRT/iobiKhGL0zL4d3osHj3uqllWJK+i+sixDS/3COVEOFbupFyw==", 1567 + "license": "Apache-2.0", 1568 + "engines": { 1569 + "node": ">=8" 1570 + } 1571 + }, 1264 1572 "node_modules/miniflare": { 1265 1573 "version": "4.20260312.1", 1266 1574 "resolved": "https://registry.npmjs.org/miniflare/-/miniflare-4.20260312.1.tgz", ··· 1282 1590 "node": ">=18.0.0" 1283 1591 } 1284 1592 }, 1593 + "node_modules/node-domexception": { 1594 + "version": "1.0.0", 1595 + "resolved": "https://registry.npmjs.org/node-domexception/-/node-domexception-1.0.0.tgz", 1596 + "integrity": "sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ==", 1597 + "deprecated": "Use your platform's native DOMException instead", 1598 + "funding": [ 1599 + { 1600 + "type": "github", 1601 + "url": "https://github.com/sponsors/jimmywarting" 1602 + }, 1603 + { 1604 + "type": "github", 1605 + "url": "https://paypal.me/jimmywarting" 1606 + } 1607 + ], 1608 + "license": "MIT", 1609 + "engines": { 1610 + "node": ">=10.5.0" 1611 + } 1612 + }, 1613 + "node_modules/node-fetch": { 1614 + "version": "3.3.2", 1615 + "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-3.3.2.tgz", 1616 + "integrity": "sha512-dRB78srN/l6gqWulah9SrxeYnxeddIG30+GOqK/9OlLVyLg3HPnr6SqOWTWOXKRwC2eGYCkZ59NNuSgvSrpgOA==", 1617 + "license": "MIT", 1618 + "dependencies": { 1619 + "data-uri-to-buffer": "^4.0.0", 1620 + "fetch-blob": "^3.1.4", 1621 + "formdata-polyfill": "^4.0.10" 1622 + }, 1623 + "engines": { 1624 + "node": "^12.20.0 || ^14.13.1 || >=16.0.0" 1625 + }, 1626 + "funding": { 1627 + "type": "opencollective", 1628 + "url": "https://opencollective.com/node-fetch" 1629 + } 1630 + }, 1285 1631 "node_modules/path-to-regexp": { 1286 1632 "version": "6.3.0", 1287 1633 "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-6.3.0.tgz", ··· 1295 1641 "integrity": "sha512-WUjGcAqP1gQacoQe+OBJsFA7Ld4DyXuUIjZ5cc75cLHvJ7dtNsTugphxIADwspS+AraAUePCKrSVtPLFj/F88w==", 1296 1642 "dev": true, 1297 1643 "license": "MIT" 1644 + }, 1645 + "node_modules/promise-limit": { 1646 + "version": "2.7.0", 1647 + "resolved": "https://registry.npmjs.org/promise-limit/-/promise-limit-2.7.0.tgz", 1648 + "integrity": "sha512-7nJ6v5lnJsXwGprnGXga4wx6d1POjvi5Qmf1ivTRxTjH4Z/9Czja/UCMLVmB9N93GeWOU93XaFaEt6jbuoagNw==", 1649 + "license": "ISC" 1298 1650 }, 1299 1651 "node_modules/semver": { 1300 1652 "version": "7.7.4", ··· 1367 1719 "url": "https://github.com/chalk/supports-color?sponsor=1" 1368 1720 } 1369 1721 }, 1722 + "node_modules/tr46": { 1723 + "version": "0.0.3", 1724 + "resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz", 1725 + "integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==", 1726 + "license": "MIT" 1727 + }, 1370 1728 "node_modules/tslib": { 1371 1729 "version": "2.8.1", 1372 1730 "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz", ··· 1385 1743 "node": ">=20.18.1" 1386 1744 } 1387 1745 }, 1746 + "node_modules/undici-types": { 1747 + "version": "7.18.2", 1748 + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.18.2.tgz", 1749 + "integrity": "sha512-AsuCzffGHJybSaRrmr5eHr81mwJU3kjw6M+uprWvCXiNeN9SOGwQ3Jn8jb8m3Z6izVgknn1R0FTCEAP2QrLY/w==", 1750 + "license": "MIT" 1751 + }, 1388 1752 "node_modules/unenv": { 1389 1753 "version": "2.0.0-rc.24", 1390 1754 "resolved": "https://registry.npmjs.org/unenv/-/unenv-2.0.0-rc.24.tgz", ··· 1395 1759 "pathe": "^2.0.3" 1396 1760 } 1397 1761 }, 1762 + "node_modules/web-streams-polyfill": { 1763 + "version": "3.3.3", 1764 + "resolved": "https://registry.npmjs.org/web-streams-polyfill/-/web-streams-polyfill-3.3.3.tgz", 1765 + "integrity": "sha512-d2JWLCivmZYTSIoge9MsgFCZrt571BikcWGYkjC1khllbTeDlGqZ2D8vD8E/lJa8WGWbb7Plm8/XJYV7IJHZZw==", 1766 + "license": "MIT", 1767 + "engines": { 1768 + "node": ">= 8" 1769 + } 1770 + }, 1771 + "node_modules/webidl-conversions": { 1772 + "version": "3.0.1", 1773 + "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz", 1774 + "integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==", 1775 + "license": "BSD-2-Clause" 1776 + }, 1777 + "node_modules/whatwg-url": { 1778 + "version": "5.0.0", 1779 + "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz", 1780 + "integrity": "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==", 1781 + "license": "MIT", 1782 + "dependencies": { 1783 + "tr46": "~0.0.3", 1784 + "webidl-conversions": "^3.0.0" 1785 + } 1786 + }, 1398 1787 "node_modules/workerd": { 1399 1788 "version": "1.20260312.1", 1400 1789 "resolved": "https://registry.npmjs.org/workerd/-/workerd-1.20260312.1.tgz", ··· 1455 1844 "version": "8.18.0", 1456 1845 "resolved": "https://registry.npmjs.org/ws/-/ws-8.18.0.tgz", 1457 1846 "integrity": "sha512-8VbfWfHLbbwu3+N6OKsOMpBdT4kXPDDB9cJk2bJ6mh9ucxdlnNvH1e+roYkKmN9Nxw2yjz7VzeO9oOz2zJ04Pw==", 1458 - "dev": true, 1459 1847 "license": "MIT", 1460 1848 "engines": { 1461 1849 "node": ">=10.0.0"
+3
package.json
··· 7 7 }, 8 8 "devDependencies": { 9 9 "wrangler": "^4" 10 + }, 11 + "dependencies": { 12 + "@libsql/client": "^0.17.0" 10 13 } 11 14 }
+347
scripts/migrate-to-turso.py
··· 1 + #!/usr/bin/env -S PYTHONUNBUFFERED=1 uv run --script --quiet 2 + # /// script 3 + # requires-python = ">=3.12" 4 + # dependencies = [] 5 + # /// 6 + """ 7 + one-shot: migrate D1 data to Turso. 8 + 9 + reads D1 via Cloudflare REST API (fast), writes to Turso via pipeline API. 10 + uses ON CONFLICT upserts so re-running is safe (idempotent). 11 + 12 + prerequisites: 13 + turso db create typeahead 14 + turso db shell typeahead < schema.sql 15 + 16 + usage: 17 + TURSO_URL=libsql://... TURSO_AUTH_TOKEN=... ./scripts/migrate-to-turso.py 18 + TURSO_URL=libsql://... TURSO_AUTH_TOKEN=... ./scripts/migrate-to-turso.py --verify-only 19 + """ 20 + 21 + import argparse 22 + import json 23 + import os 24 + import re 25 + import subprocess 26 + import sys 27 + import urllib.request 28 + 29 + PAGE_SIZE = 1000 30 + TURSO_BATCH_SIZE = 200 # rows per Turso pipeline request 31 + 32 + PASS = "\033[32m✓\033[0m" 33 + FAIL = "\033[31m✗\033[0m" 34 + DIM = "\033[2m" 35 + RESET = "\033[0m" 36 + 37 + # D1 config (from wrangler.jsonc) 38 + CF_ACCOUNT_ID = "8feb33b5fb57ce2bc093bc6f4141f40a" 39 + CF_D1_DB_ID = "7e289d5d-dc50-46d1-8084-49aeec2679e5" 40 + D1_API = f"https://api.cloudflare.com/client/v4/accounts/{CF_ACCOUNT_ID}/d1/database/{CF_D1_DB_ID}/query" 41 + 42 + _ANSI_RE = re.compile(r"\x1b\[[0-9;]*m") 43 + 44 + 45 + def get_cf_token() -> str: 46 + """read wrangler's OAuth token from its config file.""" 47 + config_path = os.path.expanduser( 48 + "~/Library/Preferences/.wrangler/config/default.toml" 49 + ) 50 + try: 51 + with open(config_path) as f: 52 + for line in f: 53 + if line.startswith("oauth_token"): 54 + return line.split("=", 1)[1].strip().strip('"') 55 + except FileNotFoundError: 56 + pass 57 + # fallback: try CLOUDFLARE_API_TOKEN env var 58 + token = os.environ.get("CLOUDFLARE_API_TOKEN", "") 59 + if token: 60 + return token 61 + print("error: no Cloudflare API token found", file=sys.stderr) 62 + print(" run `wrangler login` or set CLOUDFLARE_API_TOKEN", file=sys.stderr) 63 + sys.exit(1) 64 + 65 + 66 + def get_turso_url() -> str: 67 + url = os.environ.get("TURSO_URL", "") 68 + if not url: 69 + print("error: TURSO_URL not set", file=sys.stderr) 70 + sys.exit(1) 71 + return url.replace("libsql://", "https://") 72 + 73 + 74 + def get_turso_token() -> str: 75 + token = os.environ.get("TURSO_AUTH_TOKEN", "") 76 + if not token: 77 + print("error: TURSO_AUTH_TOKEN not set", file=sys.stderr) 78 + sys.exit(1) 79 + return token 80 + 81 + 82 + def d1_query(sql: str, cf_token: str, params: list | None = None) -> list[dict]: 83 + """query D1 via Cloudflare REST API.""" 84 + payload: dict = {"sql": sql} 85 + if params: 86 + payload["params"] = params 87 + body = json.dumps(payload).encode() 88 + req = urllib.request.Request( 89 + D1_API, 90 + data=body, 91 + headers={ 92 + "Authorization": f"Bearer {cf_token}", 93 + "Content-Type": "application/json", 94 + }, 95 + ) 96 + try: 97 + with urllib.request.urlopen(req, timeout=30) as resp: 98 + data = json.loads(resp.read()) 99 + if data.get("success"): 100 + return data["result"][0]["results"] 101 + print(f" D1 API error: {data.get('errors')}", file=sys.stderr) 102 + return [] 103 + except urllib.error.HTTPError as e: 104 + body_text = e.read().decode()[:300] 105 + print(f" D1 HTTP {e.code}: {body_text}", file=sys.stderr) 106 + return [] 107 + except Exception as e: 108 + print(f" D1 request failed: {e}", file=sys.stderr) 109 + return [] 110 + 111 + 112 + def d1_query_wrangler(sql: str) -> list[dict]: 113 + """fallback: query D1 via wrangler CLI.""" 114 + result = subprocess.run( 115 + [ 116 + "npx", "wrangler", "d1", "execute", "typeahead-db", 117 + "--remote", "--command", sql, "--json", 118 + ], 119 + capture_output=True, text=True, cwd=".", 120 + ) 121 + stdout = _ANSI_RE.sub("", result.stdout) 122 + bracket = stdout.find("[") 123 + if bracket == -1: 124 + return [] 125 + try: 126 + data = json.loads(stdout[bracket:]) 127 + return data[0]["results"] if data else [] 128 + except (json.JSONDecodeError, IndexError, KeyError): 129 + return [] 130 + 131 + 132 + def turso_batch(stmts: list[dict], turso_url: str, turso_token: str) -> bool: 133 + """execute a batch of statements against Turso via HTTP pipeline API.""" 134 + requests = [{"type": "execute", "stmt": s} for s in stmts] 135 + requests.append({"type": "close"}) 136 + 137 + body = json.dumps({"requests": requests}).encode() 138 + req = urllib.request.Request( 139 + f"{turso_url}/v3/pipeline", 140 + data=body, 141 + headers={ 142 + "Authorization": f"Bearer {turso_token}", 143 + "Content-Type": "application/json", 144 + }, 145 + ) 146 + try: 147 + with urllib.request.urlopen(req, timeout=60) as resp: 148 + result = json.loads(resp.read()) 149 + for r in result.get("results", []): 150 + if r.get("type") == "error": 151 + print(f" Turso error: {r.get('error', {}).get('message', 'unknown')}") 152 + return False 153 + return True 154 + except urllib.error.HTTPError as e: 155 + err_body = e.read().decode()[:300] 156 + print(f" Turso HTTP {e.code}: {err_body}", file=sys.stderr) 157 + return False 158 + except Exception as e: 159 + print(f" Turso request failed: {e}", file=sys.stderr) 160 + return False 161 + 162 + 163 + def turso_count(table: str, turso_url: str, turso_token: str) -> int | str: 164 + """get row count from Turso.""" 165 + body = json.dumps({ 166 + "requests": [ 167 + {"type": "execute", "stmt": {"sql": f"SELECT COUNT(*) AS cnt FROM {table}", "args": []}}, 168 + {"type": "close"}, 169 + ] 170 + }).encode() 171 + req = urllib.request.Request( 172 + f"{turso_url}/v3/pipeline", 173 + data=body, 174 + headers={ 175 + "Authorization": f"Bearer {turso_token}", 176 + "Content-Type": "application/json", 177 + }, 178 + ) 179 + try: 180 + with urllib.request.urlopen(req, timeout=15) as resp: 181 + result = json.loads(resp.read()) 182 + return int(result["results"][0]["response"]["result"]["rows"][0][0]["value"]) 183 + except Exception as e: 184 + return f"error: {e}" 185 + 186 + 187 + def progress(msg: str): 188 + """overwrite current line with progress.""" 189 + sys.stdout.write(f"\r {msg}") 190 + sys.stdout.flush() 191 + 192 + 193 + def migrate_actors(turso_url: str, turso_token: str, cf_token: str) -> int: 194 + print("\n--- actors ---") 195 + 196 + # get total for progress reporting 197 + count_rows = d1_query("SELECT COUNT(*) AS cnt FROM actors", cf_token) 198 + d1_total = count_rows[0]["cnt"] if count_rows else "?" 199 + print(f" D1 has {d1_total} actors") 200 + 201 + cursor = 0 202 + total = 0 203 + 204 + while True: 205 + rows = d1_query( 206 + "SELECT rowid, did, handle, display_name, avatar_url, updated_at, hidden " 207 + f"FROM actors WHERE rowid > {cursor} ORDER BY rowid ASC LIMIT {PAGE_SIZE}", 208 + cf_token, 209 + ) 210 + if not rows: 211 + break 212 + 213 + for i in range(0, len(rows), TURSO_BATCH_SIZE): 214 + batch = rows[i : i + TURSO_BATCH_SIZE] 215 + stmts = [] 216 + for r in batch: 217 + stmts.append({ 218 + "sql": ( 219 + "INSERT INTO actors (did, handle, display_name, avatar_url, updated_at, hidden) " 220 + "VALUES (?, ?, ?, ?, ?, ?) " 221 + "ON CONFLICT(did) DO UPDATE SET " 222 + "handle = COALESCE(NULLIF(excluded.handle, ''), actors.handle), " 223 + "display_name = COALESCE(NULLIF(excluded.display_name, ''), actors.display_name), " 224 + "avatar_url = COALESCE(NULLIF(excluded.avatar_url, ''), actors.avatar_url), " 225 + "updated_at = excluded.updated_at, hidden = excluded.hidden" 226 + ), 227 + "args": [ 228 + {"type": "text", "value": r["did"]}, 229 + {"type": "text", "value": r.get("handle") or ""}, 230 + {"type": "text", "value": r.get("display_name") or ""}, 231 + {"type": "text", "value": r.get("avatar_url") or ""}, 232 + {"type": "integer", "value": str(r.get("updated_at") or 0)}, 233 + {"type": "integer", "value": str(r.get("hidden") or 0)}, 234 + ], 235 + }) 236 + 237 + if not turso_batch(stmts, turso_url, turso_token): 238 + print(f"\n batch failed at cursor={cursor}") 239 + return total 240 + total += len(batch) 241 + 242 + cursor = rows[-1]["rowid"] 243 + pct = f" ({total * 100 // d1_total}%)" if isinstance(d1_total, int) else "" 244 + progress(f"{total}/{d1_total} actors{pct} {DIM}cursor={cursor}{RESET}") 245 + 246 + print(f"\r {PASS} actors: {total} rows" + " " * 30) 247 + return total 248 + 249 + 250 + def migrate_table( 251 + table: str, 252 + columns: list[str], 253 + col_types: list[str], 254 + turso_url: str, 255 + turso_token: str, 256 + cf_token: str, 257 + ) -> int: 258 + print(f"\n--- {table} ---") 259 + col_list = ", ".join(columns) 260 + rows = d1_query(f"SELECT {col_list} FROM {table}", cf_token) 261 + 262 + if not rows: 263 + print(f" {PASS} {table}: 0 rows (empty)") 264 + return 0 265 + 266 + total = 0 267 + for i in range(0, len(rows), TURSO_BATCH_SIZE): 268 + batch = rows[i : i + TURSO_BATCH_SIZE] 269 + stmts = [] 270 + placeholders = ", ".join("?" for _ in columns) 271 + sql = f"INSERT OR REPLACE INTO {table} ({col_list}) VALUES ({placeholders})" 272 + 273 + for r in batch: 274 + args = [] 275 + for col, ctype in zip(columns, col_types): 276 + val = r.get(col) or 0 277 + if ctype == "text": 278 + args.append({"type": "text", "value": str(val)}) 279 + elif ctype == "float": 280 + args.append({"type": "float", "value": float(val)}) 281 + else: 282 + args.append({"type": "integer", "value": str(int(val))}) 283 + stmts.append({"sql": sql, "args": args}) 284 + 285 + if not turso_batch(stmts, turso_url, turso_token): 286 + print(f" batch failed") 287 + return total 288 + total += len(batch) 289 + progress(f"{total}/{len(rows)} {table}") 290 + 291 + print(f"\r {PASS} {table}: {total} rows" + " " * 20) 292 + return total 293 + 294 + 295 + def verify_counts(turso_url: str, turso_token: str, cf_token: str): 296 + print("\n--- verification ---") 297 + tables = ["actors", "metrics", "snapshots"] 298 + for table in tables: 299 + d1_rows = d1_query(f"SELECT COUNT(*) AS cnt FROM {table}", cf_token) 300 + d1_count = d1_rows[0]["cnt"] if d1_rows else "?" 301 + turso_cnt = turso_count(table, turso_url, turso_token) 302 + match = str(d1_count) == str(turso_cnt) 303 + tag = PASS if match else FAIL 304 + print(f" [{tag}] {table}: D1={d1_count}, Turso={turso_cnt}") 305 + 306 + 307 + def main(): 308 + parser = argparse.ArgumentParser(description="migrate D1 → Turso") 309 + parser.add_argument("--verify-only", action="store_true", help="only compare row counts") 310 + args = parser.parse_args() 311 + 312 + cf_token = get_cf_token() 313 + turso_url = get_turso_url() 314 + turso_token = get_turso_token() 315 + 316 + # quick API check 317 + test = d1_query("SELECT 1 AS ok", cf_token) 318 + if not test: 319 + print("error: D1 API connection failed", file=sys.stderr) 320 + sys.exit(1) 321 + 322 + if args.verify_only: 323 + verify_counts(turso_url, turso_token, cf_token) 324 + return 325 + 326 + print("migrating D1 → Turso") 327 + 328 + migrate_actors(turso_url, turso_token, cf_token) 329 + migrate_table( 330 + "metrics", 331 + ["hour", "searches", "total_ms"], 332 + ["integer", "integer", "float"], 333 + turso_url, turso_token, cf_token, 334 + ) 335 + migrate_table( 336 + "snapshots", 337 + ["hour", "total", "with_handles", "with_avatars"], 338 + ["integer", "integer", "integer", "integer"], 339 + turso_url, turso_token, cf_token, 340 + ) 341 + 342 + verify_counts(turso_url, turso_token, cf_token) 343 + print("\ndone.") 344 + 345 + 346 + if __name__ == "__main__": 347 + main()
+10 -13
scripts/smoke.py
··· 212 212 extra_keys |= set(a.keys()) - allowed_keys 213 213 check("actor objects have clean shape", len(extra_keys) == 0, f"extra keys: {extra_keys}" if extra_keys else "") 214 214 215 - # verify hidden actors are actually excluded by finding one with !no-unauthenticated 216 - # via bsky API and checking it doesn't appear in our results 217 - print("\n--- hidden actor exclusion ---") 218 - found_hidden = find_hidden_actor(base_url) 219 - if not found_hidden: 220 - check("found a hidden actor to verify", False, "couldn't find one — skipping exclusion check") 215 + # !no-unauthenticated actors should be VISIBLE (it's about content, not identity) 216 + print("\n--- !no-unauthenticated inclusion ---") 217 + found = find_noauth_actor(base_url) 218 + if not found: 219 + check("found a !no-unauthenticated actor to verify", False, "couldn't find one — skipping") 221 220 222 221 223 - def find_hidden_actor(base_url: str) -> bool: 224 - """find an actor we've indexed that has !no-unauthenticated, verify they're excluded.""" 225 - # search our index for common names and cross-check labels via bsky API 222 + def find_noauth_actor(base_url: str) -> bool: 223 + """find an actor with !no-unauthenticated and verify they ARE included in our results.""" 226 224 for q in ["alex", "sam", "chris", "jordan"]: 227 - # get actors from bsky that have !no-unauthenticated 228 225 bsky_data, _ = fetch(f"{BSKY_PUBLIC}/xrpc/app.bsky.actor.searchActors?q={q}&limit=25") 229 226 if not bsky_data or "_error" in bsky_data: 230 227 continue ··· 242 239 if not handle: 243 240 continue 244 241 245 - # this actor has !no-unauthenticated — check they're NOT in our results 242 + # this actor has !no-unauthenticated — check they ARE in our results 246 243 our_data, _ = fetch(f"{base_url}{XRPC_PATH}?q={handle}&limit=10") 247 244 if not our_data or "_error" in our_data: 248 245 continue 249 246 250 247 our_handles = {a.get("handle") for a in our_data.get("actors", [])} 251 248 if handle in our_handles: 252 - check(f"hidden actor @{handle} excluded from search", False, "appeared in results") 249 + check(f"!no-unauthenticated actor @{handle} visible in search", True) 253 250 return True 254 251 else: 255 - check(f"hidden actor @{handle} excluded from search", True) 252 + check(f"!no-unauthenticated actor @{handle} visible in search", False, "not found in results") 256 253 return True 257 254 258 255 return False
+131 -78
src/index.ts
··· 1 + import { createClient, type Client } from "@libsql/client/web"; 2 + 1 3 interface Env { 2 - DB: D1Database; 3 4 KV: KVNamespace; 4 5 ADMIN_SECRET: string; 5 6 RATE_LIMITER: RateLimit; 6 7 RATE_LIMITER_STRICT: RateLimit; 8 + TURSO_URL: string; 9 + TURSO_AUTH_TOKEN: string; 10 + } 11 + 12 + interface Stmt { 13 + bind(...args: unknown[]): Stmt; 14 + all<T = Record<string, unknown>>(): Promise<{ results: T[] }>; 15 + first<T = Record<string, unknown>>(): Promise<T | null>; 16 + run(): Promise<{ meta: { changes: number } }>; 17 + } 18 + 19 + interface TursoDB { 20 + prepare(sql: string): Stmt; 21 + batch(stmts: Stmt[]): Promise<{ results: unknown[]; meta: { changes: number } }[]>; 22 + } 23 + 24 + function tursoDb(client: Client): TursoDB { 25 + return { 26 + prepare(sql) { 27 + let args: unknown[] = []; 28 + const s: Stmt & { _sql: string; _args: () => unknown[] } = { 29 + _sql: sql, 30 + _args: () => args, 31 + bind(...a) { args = a; return s; }, 32 + async all<T>() { 33 + const r = await client.execute({ sql, args: args as any }); 34 + return { results: r.rows as unknown as T[] }; 35 + }, 36 + async first<T>() { 37 + const r = await client.execute({ sql, args: args as any }); 38 + return (r.rows[0] as unknown as T) ?? null; 39 + }, 40 + async run() { 41 + const r = await client.execute({ sql, args: args as any }); 42 + return { meta: { changes: r.rowsAffected } }; 43 + }, 44 + }; 45 + return s; 46 + }, 47 + async batch(stmts) { 48 + const results = await client.batch( 49 + stmts.map((s) => ({ sql: (s as any)._sql as string, args: (s as any)._args() as any[] })), 50 + "write", 51 + ); 52 + return results.map((r) => ({ 53 + results: r.rows as unknown[], 54 + meta: { changes: r.rowsAffected }, 55 + })); 56 + }, 57 + }; 7 58 } 8 59 9 60 const CORS_HEADERS = { ··· 69 120 "https://public.api.bsky.app/xrpc/app.bsky.actor.searchActorsTypeahead"; 70 121 71 122 const BSKY_MOD_DID = "did:plc:ar7c4by46qjdydhdevvrndac"; 72 - /** labels from bluesky's moderation service that hide an actor */ 123 + /** labels from bluesky's moderation service that hide an actor from search */ 73 124 const MOD_HIDE_VALS = new Set(["!hide", "!takedown", "spam"]); 74 - /** labels that hide regardless of issuer (protocol-level, self-labeling respected) */ 75 - const ANY_SRC_HIDE_VALS = new Set(["!no-unauthenticated"]); 76 125 77 126 /** 78 - * true if actor should be hidden from our unauthenticated search. 127 + * returns whether an actor should be hidden from search. 79 128 * 80 - * two paths: 81 - * 1. bluesky moderation issued !hide or spam → always hide 82 - * 2. anyone (including the actor themselves) issued !no-unauthenticated → hide, 83 - * because our service is unauthenticated and we should respect the user's intent 129 + * only hides actors flagged by bluesky's moderation service (!hide, !takedown, spam). 130 + * !no-unauthenticated is intentionally NOT filtered — it applies to content, not identity. 131 + * bluesky's own public typeahead API returns !no-unauthenticated accounts, and so do we. 84 132 */ 85 133 function shouldHide(labels?: any[]): boolean { 86 134 if (!labels) return false; ··· 88 136 return labels.some((l: any) => { 89 137 if (l.neg) return false; 90 138 if (l.exp && new Date(l.exp).getTime() <= now) return false; 91 - if (l.src === BSKY_MOD_DID && MOD_HIDE_VALS.has(l.val)) return true; 92 - if (ANY_SRC_HIDE_VALS.has(l.val)) return true; 93 - return false; 139 + return l.src === BSKY_MOD_DID && MOD_HIDE_VALS.has(l.val); 94 140 }); 95 141 } 96 142 ··· 99 145 async function backfillFromBsky( 100 146 term: string, 101 147 limit: number, 102 - env: Env 148 + db: TursoDB, 103 149 ): Promise<void> { 104 150 try { 105 151 const res = await fetch( ··· 114 160 // upsert all — fills in missing actors AND enriches existing ones 115 161 // (e.g. actors ingested via Jetstream that lack avatar/displayName) 116 162 const stmts = actors.map((a) => 117 - env.DB.prepare( 163 + db.prepare( 118 164 `INSERT INTO actors (did, handle, display_name, avatar_url, hidden, updated_at) 119 165 VALUES (?1, ?2, ?3, ?4, ?5, unixepoch()) 120 166 ON CONFLICT(did) DO UPDATE SET ··· 132 178 ) 133 179 ); 134 180 135 - await env.DB.batch(stmts); 181 + await db.batch(stmts); 136 182 console.log(JSON.stringify({ event: "backfill", term, upserted: actors.length })); 137 183 } catch { 138 184 // best-effort — don't let backfill errors affect anything 139 185 } 140 186 } 141 187 142 - async function throttledBackfill(term: string, limit: number, env: Env): Promise<void> { 188 + async function throttledBackfill(term: string, limit: number, db: TursoDB, env: Env): Promise<void> { 143 189 // kill switch — set KV key "backfill" to "off" to disable without redeploying 144 190 const flag = await env.KV.get("backfill"); 145 191 if (flag === "off") return; ··· 151 197 return; 152 198 } 153 199 154 - return backfillFromBsky(term, limit, env); 200 + return backfillFromBsky(term, limit, db); 155 201 } 156 202 157 203 // --- end backfill --- 158 204 159 205 /** record an actor-count snapshot for the current hour (idempotent) */ 160 - async function recordSnapshot(env: Env): Promise<void> { 206 + async function recordSnapshot(db: TursoDB): Promise<void> { 161 207 const hour = Math.floor(Date.now() / 3_600_000); 162 - const row = await env.DB.prepare( 208 + const row = await db.prepare( 163 209 `SELECT COUNT(*) AS total, 164 210 SUM(CASE WHEN handle != '' THEN 1 ELSE 0 END) AS with_handles, 165 211 SUM(CASE WHEN avatar_url != '' THEN 1 ELSE 0 END) AS with_avatars 166 212 FROM actors WHERE hidden = 0` 167 213 ).first<{ total: number; with_handles: number; with_avatars: number }>(); 168 214 if (row) { 169 - await env.DB.prepare( 215 + await db.prepare( 170 216 `INSERT OR REPLACE INTO snapshots (hour, total, with_handles, with_avatars) 171 217 VALUES (?1, ?2, ?3, ?4)` 172 218 ) ··· 176 222 } 177 223 178 224 /** resolve handles for actors missing them via slingshot */ 179 - async function resolveHandles(env: Env): Promise<void> { 180 - const { results } = await env.DB.prepare( 225 + async function resolveHandles(db: TursoDB): Promise<void> { 226 + const { results } = await db.prepare( 181 227 "SELECT did FROM actors WHERE handle = '' ORDER BY updated_at DESC LIMIT 1000" 182 228 ).all<{ did: string }>(); 183 229 if (!results || results.length === 0) return; ··· 191 237 if (!res.ok) continue; 192 238 const identity: SlingshotResponse = await res.json(); 193 239 if (identity.handle) { 194 - await env.DB.prepare( 240 + await db.prepare( 195 241 "UPDATE actors SET handle = ?1 WHERE did = ?2 AND handle = ''" 196 242 ).bind(identity.handle, did).run(); 197 243 resolved++; ··· 209 255 "https://public.api.bsky.app/xrpc/app.bsky.actor.getProfiles"; 210 256 211 257 /** refresh moderation labels, walking the full index over multiple cron runs */ 212 - async function refreshModeration(env: Env): Promise<void> { 258 + async function refreshModeration(db: TursoDB, env: Env): Promise<void> { 213 259 // resume where we left off (rowid cursor persisted in KV) 214 260 const cursorStr = await env.KV.get("mod_cursor"); 215 261 const cursor = cursorStr ? Number(cursorStr) : 0; 216 262 217 - const { results } = await env.DB.prepare( 263 + const { results } = await db.prepare( 218 264 "SELECT rowid, did FROM actors WHERE rowid > ?1 ORDER BY rowid ASC LIMIT 1000" 219 265 ).bind(cursor).all<{ rowid: number; did: string }>(); 220 266 ··· 249 295 const profiles: any[] = data.profiles || []; 250 296 checked += profiles.length; 251 297 252 - const stmts: D1PreparedStatement[] = []; 298 + const stmts: Stmt[] = []; 253 299 for (const p of profiles) { 254 300 const hide = shouldHide(p.labels) ? 1 : 0; 255 301 stmts.push( 256 - env.DB.prepare( 302 + db.prepare( 257 303 "UPDATE actors SET hidden = ?1 WHERE did = ?2 AND hidden != ?1" 258 304 ).bind(hide, p.did) 259 305 ); 260 306 } 261 307 if (stmts.length > 0) { 262 - const batchResults = await env.DB.batch(stmts); 308 + const batchResults = await db.batch(stmts); 263 309 changed += batchResults.filter((r) => r.meta.changes > 0).length; 264 310 } 265 311 } catch { ··· 274 320 } 275 321 276 322 /** fire-and-forget: increment hourly search count + accumulate response time */ 277 - async function recordMetric(env: Env, ms: number): Promise<void> { 323 + async function recordMetric(db: TursoDB, ms: number): Promise<void> { 278 324 const hour = Math.floor(Date.now() / 3_600_000); 279 - await env.DB.prepare( 325 + await db.prepare( 280 326 `INSERT INTO metrics (hour, searches, total_ms) 281 327 VALUES (?1, 1, ?2) 282 328 ON CONFLICT(hour) DO UPDATE SET ··· 289 335 290 336 async function handleSearch( 291 337 request: Request, 338 + db: TursoDB, 292 339 env: Env, 293 - ctx: ExecutionContext 340 + ctx: ExecutionContext, 294 341 ): Promise<Response> { 295 342 const url = new URL(request.url); 296 343 const q = url.searchParams.get("q") || url.searchParams.get("term") || ""; ··· 324 371 const t0 = Date.now(); 325 372 326 373 const ftsQuery = `"${term}"*`; 327 - const { results } = await env.DB.prepare( 374 + const { results } = await db.prepare( 328 375 `SELECT a.did, a.handle, a.display_name, a.avatar_url 329 376 FROM actors_fts 330 377 JOIN actors a ON a.rowid = actors_fts.rowid ··· 346 393 // --- backfill: remove this block once at parity with Bluesky --- 347 394 const hasGaps = actors.length < limit || actors.some((a) => !a.avatar); 348 395 if (hasGaps) { 349 - ctx.waitUntil(throttledBackfill(term, limit, env)); 396 + ctx.waitUntil(throttledBackfill(term, limit, db, env)); 350 397 } 351 398 // --- end backfill --- 352 399 353 - ctx.waitUntil(recordMetric(env, Date.now() - t0)); 400 + ctx.waitUntil(recordMetric(db, Date.now() - t0)); 354 401 355 402 const response = json({ actors }); 356 403 ··· 364 411 365 412 async function handleIngest( 366 413 request: Request, 367 - env: Env 414 + db: TursoDB, 415 + env: Env, 368 416 ): Promise<Response> { 369 417 const auth = request.headers.get("Authorization"); 370 418 if (auth !== `Bearer ${env.ADMIN_SECRET}`) { ··· 393 441 const stmts = events.map((e) => { 394 442 const avatarCid = e.avatar_cid || null; 395 443 const hidden = e.hidden !== undefined ? (e.hidden ? 1 : 0) : null; 396 - return env.DB.prepare( 444 + return db.prepare( 397 445 `INSERT INTO actors (did, handle, display_name, avatar_url, hidden, updated_at) 398 446 VALUES (?1, ?2, ?3, ?4, COALESCE(?5, 0), unixepoch()) 399 447 ON CONFLICT(did) DO UPDATE SET ··· 412 460 }); 413 461 414 462 try { 415 - await env.DB.batch(stmts); 463 + await db.batch(stmts); 416 464 } catch (e: any) { 417 465 console.log(JSON.stringify({ event: "ingest_error", error: e?.message, count: events.length })); 418 466 return json({ error: e?.message || "db batch failed" }, 500); ··· 431 479 432 480 async function handleDelete( 433 481 request: Request, 434 - env: Env 482 + db: TursoDB, 483 + env: Env, 435 484 ): Promise<Response> { 436 485 const auth = request.headers.get("Authorization"); 437 486 if (auth !== `Bearer ${env.ADMIN_SECRET}`) { ··· 457 506 } 458 507 459 508 const stmts = dids.map((did) => 460 - env.DB.prepare("DELETE FROM actors WHERE did = ?1").bind(did) 509 + db.prepare("DELETE FROM actors WHERE did = ?1").bind(did) 461 510 ); 462 - await env.DB.batch(stmts); 511 + await db.batch(stmts); 463 512 464 513 return json({ ok: true, deleted: dids.length }); 465 514 } ··· 480 529 return json({ cursor: cursor ? Number(cursor) : null }); 481 530 } 482 531 483 - /** resolve a handle or DID via slingshot, then upsert into D1 */ 532 + /** resolve a handle or DID via slingshot, then upsert into the active DB */ 484 533 async function handleRequestIndexing( 485 534 request: Request, 486 - env: Env 535 + db: TursoDB, 536 + env: Env, 487 537 ): Promise<Response> { 488 538 const url = new URL(request.url); 489 539 const identifier = ··· 523 573 // profile enrichment is best-effort 524 574 } 525 575 526 - await env.DB.prepare( 576 + await db.prepare( 527 577 `INSERT INTO actors (did, handle, display_name, avatar_url, hidden, updated_at) 528 578 VALUES (?1, ?2, ?3, ?4, ?5, unixepoch()) 529 579 ON CONFLICT(did) DO UPDATE SET ··· 536 586 .bind(identity.did, identity.handle, displayName, avatarCid, hidden ? 1 : 0) 537 587 .run(); 538 588 539 - return json({ handle: identity.handle, did: identity.did, hidden }); 589 + return json({ 590 + handle: identity.handle, 591 + did: identity.did, 592 + ...(hidden ? { hidden: true, reason: "hidden by moderation" } : { hidden: false }), 593 + }); 540 594 } 541 595 542 - async function handleStats(env: Env): Promise<Response> { 596 + async function handleStats(db: TursoDB): Promise<Response> { 543 597 const [totalRes, handlesRes, avatarsRes, hiddenRes, metricsRes, snapshotRes] = 544 - await env.DB.batch([ 545 - env.DB.prepare("SELECT COUNT(*) AS cnt FROM actors WHERE hidden = 0"), 546 - env.DB.prepare("SELECT COUNT(*) AS cnt FROM actors WHERE handle != '' AND hidden = 0"), 547 - env.DB.prepare("SELECT COUNT(*) AS cnt FROM actors WHERE avatar_url != '' AND hidden = 0"), 548 - env.DB.prepare("SELECT COUNT(*) AS cnt FROM actors WHERE hidden = 1"), 549 - env.DB.prepare( 598 + await db.batch([ 599 + db.prepare("SELECT COUNT(*) AS cnt FROM actors WHERE hidden = 0"), 600 + db.prepare("SELECT COUNT(*) AS cnt FROM actors WHERE handle != '' AND hidden = 0"), 601 + db.prepare("SELECT COUNT(*) AS cnt FROM actors WHERE avatar_url != '' AND hidden = 0"), 602 + db.prepare("SELECT COUNT(*) AS cnt FROM actors WHERE hidden != 0"), 603 + db.prepare( 550 604 "SELECT hour, searches, total_ms FROM metrics ORDER BY hour DESC LIMIT 168" 551 605 ), 552 - env.DB.prepare( 606 + db.prepare( 553 607 "SELECT hour, total, with_handles, with_avatars FROM snapshots ORDER BY hour ASC LIMIT 2000" 554 608 ), 555 609 ]); ··· 730 784 <div class="value">${d.avatarPct}%</div> 731 785 </div> 732 786 <div class="metric"> 733 - <div class="label" data-tip="actors hidden from search by bluesky moderation labels">hidden by moderation</div> 787 + <div class="label" data-tip="actors hidden by bluesky moderation (!hide, !takedown, spam)">hidden by moderation</div> 734 788 <div class="value">${d.hiddenCount.toLocaleString()}</div> 735 789 </div> 736 790 </div> ··· 1103 1157 if (data.error) { 1104 1158 showMsg(esc(data.error), true); 1105 1159 } else { 1106 - const hidden = data.hidden ? ' <em style="color:#886">(hidden by moderation)</em>' : ''; 1160 + const hidden = data.hidden ? ' <em style="color:#886">(' + esc(data.reason || 'hidden') + ')</em>' : ''; 1107 1161 showMsg('indexed <strong>@' + esc(data.handle) + '</strong>' + hidden, false); 1108 1162 handleInput.value = ''; 1109 1163 } ··· 1128 1182 1129 1183 export default { 1130 1184 async scheduled(_event: ScheduledEvent, env: Env, _ctx: ExecutionContext): Promise<void> { 1131 - await recordSnapshot(env); 1132 - await refreshModeration(env); 1133 - await resolveHandles(env); 1185 + const db = tursoDb(createClient({ url: env.TURSO_URL, authToken: env.TURSO_AUTH_TOKEN })); 1186 + await recordSnapshot(db); 1187 + await refreshModeration(db, env); 1188 + await resolveHandles(db); 1134 1189 }, 1135 1190 1136 1191 async fetch(request: Request, env: Env, ctx: ExecutionContext): Promise<Response> { ··· 1143 1198 if (pathname === "/" && request.method === "GET") { 1144 1199 return html(indexPage()); 1145 1200 } 1201 + if (pathname === "/admin/cursor" && request.method === "GET") { 1202 + return handleCursor(request, env); 1203 + } 1204 + if (pathname === "/request-indexing" && request.method === "GET") { 1205 + return new Response(null, { status: 302, headers: { Location: "/" } }); 1206 + } 1207 + 1208 + const db = tursoDb(createClient({ url: env.TURSO_URL, authToken: env.TURSO_AUTH_TOKEN })); 1146 1209 1147 1210 if (pathname === "/stats" && request.method === "GET") { 1148 - return handleStats(env); 1211 + return handleStats(db); 1149 1212 } 1150 1213 1151 - if (pathname === "/request-indexing") { 1152 - if (request.method === "GET") { 1153 - // old bookmarks / form fallback — redirect to homepage 1154 - return new Response(null, { status: 302, headers: { Location: "/" } }); 1214 + if (pathname === "/request-indexing" && request.method === "POST") { 1215 + const ip = clientIP(request); 1216 + const { success } = await env.RATE_LIMITER.limit({ key: `index:${ip}` }); 1217 + if (!success) { 1218 + console.log(JSON.stringify({ event: "rate_limited", endpoint: "/request-indexing", ip })); 1219 + return json({ error: "slow down — try again in a minute." }, 429); 1155 1220 } 1156 - if (request.method === "POST") { 1157 - const ip = clientIP(request); 1158 - const { success } = await env.RATE_LIMITER.limit({ key: `index:${ip}` }); 1159 - if (!success) { 1160 - console.log(JSON.stringify({ event: "rate_limited", endpoint: "/request-indexing", ip })); 1161 - return json({ error: "slow down — try again in a minute." }, 429); 1162 - } 1163 - return handleRequestIndexing(request, env); 1164 - } 1221 + return handleRequestIndexing(request, db, env); 1165 1222 } 1166 1223 1167 1224 if ( ··· 1174 1231 console.log(JSON.stringify({ event: "rate_limited", endpoint: "/search", ip })); 1175 1232 return json({ error: "rate limited" }, 429); 1176 1233 } 1177 - return handleSearch(request, env, ctx); 1234 + return handleSearch(request, db, env, ctx); 1178 1235 } 1179 1236 1180 1237 if (pathname === "/admin/ingest" && request.method === "POST") { 1181 - return handleIngest(request, env); 1238 + return handleIngest(request, db, env); 1182 1239 } 1183 1240 1184 1241 if (pathname === "/admin/delete" && request.method === "POST") { 1185 - return handleDelete(request, env); 1186 - } 1187 - 1188 - if (pathname === "/admin/cursor" && request.method === "GET") { 1189 - return handleCursor(request, env); 1242 + return handleDelete(request, db, env); 1190 1243 } 1191 1244 1192 1245 return json({ error: "not found" }, 404);
-7
wrangler.jsonc
··· 4 4 "compatibility_date": "2024-12-01", 5 5 "compatibility_flags": ["nodejs_compat"], 6 6 "triggers": { "crons": ["0 * * * *"] }, 7 - "d1_databases": [ 8 - { 9 - "binding": "DB", 10 - "database_name": "typeahead-db", 11 - "database_id": "7e289d5d-dc50-46d1-8084-49aeec2679e5" 12 - } 13 - ], 14 7 "kv_namespaces": [ 15 8 { 16 9 "binding": "KV",