···5555 view_url TEXT, -- Constructed canonical URL (pub_url + path)
5656 pds_endpoint TEXT, -- Cached PDS endpoint for this DID
5757 resolved_at TEXT DEFAULT (datetime('now')),
5858- stale_at TEXT -- When this record should be re-resolved
5858+ stale_at TEXT, -- When this record should be re-resolved
5959+ verified INTEGER DEFAULT 0 -- Whether the record has been verified via .well-known or link tag
5960);
60616162CREATE INDEX IF NOT EXISTS idx_resolved_documents_rkey ON resolved_documents(rkey DESC);
6263CREATE INDEX IF NOT EXISTS idx_resolved_documents_stale ON resolved_documents(stale_at);
6364CREATE INDEX IF NOT EXISTS idx_resolved_documents_pub_url ON resolved_documents(pub_url);
6565+CREATE INDEX IF NOT EXISTS idx_resolved_documents_verified ON resolved_documents(verified);
+2-2
packages/server/src/index.ts
···11import { Hono } from "hono";
22import { cors } from "hono/cors";
33import type { Bindings } from "./types";
44-import { health, webhook, feed, stats, records } from "./routes";
44+import { health, webhook, feed, stats, records, admin } from "./routes";
55import { processDocument } from "./utils";
6677const app = new Hono<{ Bindings: Bindings }>();
···1515app.route("/feed", feed);
1616app.route("/stats", stats);
1717app.route("/records", records);
1818-//app.route("/admin", admin);
1818+app.route("/admin", admin);
19192020// Legacy alias: /feed-raw -> /feed/raw
2121app.get("/feed-raw", async (c) => {
+55-55
packages/server/src/routes/admin.ts
···5566// Queue all documents for re-processing
77admin.post("/resolve-all", async (c) => {
88- try {
99- const db = c.env.DB;
1010- const queue = c.env.RESOLUTION_QUEUE;
88+ try {
99+ const db = c.env.DB;
1010+ const queue = c.env.RESOLUTION_QUEUE;
11111212- // Get all records from repo_records
1313- const { results } = await db
1414- .prepare(
1515- `SELECT did, rkey FROM repo_records
1616- WHERE collection = 'site.standard.document'`
1717- )
1818- .all<{ did: string; rkey: string }>();
1212+ // Get all records from repo_records
1313+ const { results } = await db
1414+ .prepare(
1515+ `SELECT did, rkey FROM repo_records
1616+ WHERE collection = 'site.standard.document'`,
1717+ )
1818+ .all<{ did: string; rkey: string }>();
19192020- if (!results || results.length === 0) {
2121- return c.json({ message: "No documents to process", queued: 0 });
2222- }
2020+ if (!results || results.length === 0) {
2121+ return c.json({ message: "No documents to process", queued: 0 });
2222+ }
23232424- // Queue in batches of 100 (Cloudflare Queue limit)
2525- const batchSize = 100;
2626- let queued = 0;
2424+ // Queue in batches of 100 (Cloudflare Queue limit)
2525+ const batchSize = 100;
2626+ let queued = 0;
27272828- for (let i = 0; i < results.length; i += batchSize) {
2929- const batch = results.slice(i, i + batchSize);
3030- const messages = batch.map((row) => ({
3131- body: {
3232- did: row.did,
3333- collection: "site.standard.document",
3434- rkey: row.rkey,
3535- },
3636- }));
2828+ for (let i = 0; i < results.length; i += batchSize) {
2929+ const batch = results.slice(i, i + batchSize);
3030+ const messages = batch.map((row) => ({
3131+ body: {
3232+ did: row.did,
3333+ collection: "site.standard.document",
3434+ rkey: row.rkey,
3535+ },
3636+ }));
37373838- await queue.sendBatch(messages);
3939- queued += messages.length;
4040- }
3838+ await queue.sendBatch(messages);
3939+ queued += messages.length;
4040+ }
41414242- return c.json({
4343- message: "Documents queued for re-processing",
4444- queued,
4545- });
4646- } catch (error) {
4747- return c.json(
4848- { error: "Failed to queue documents", details: String(error) },
4949- 500
5050- );
5151- }
4242+ return c.json({
4343+ message: "Documents queued for re-processing",
4444+ queued,
4545+ });
4646+ } catch (error) {
4747+ return c.json(
4848+ { error: "Failed to queue documents", details: String(error) },
4949+ 500,
5050+ );
5151+ }
5252});
53535454// Mark all documents as stale (alternative - lets cron handle it)
5555admin.post("/mark-stale", async (c) => {
5656- try {
5757- const db = c.env.DB;
5656+ try {
5757+ const db = c.env.DB;
58585959- const result = await db
6060- .prepare(
6161- `UPDATE resolved_documents SET stale_at = datetime('now', '-1 hour')`
6262- )
6363- .run();
5959+ const result = await db
6060+ .prepare(
6161+ `UPDATE resolved_documents SET stale_at = datetime('now', '-1 hour')`,
6262+ )
6363+ .run();
64646565- return c.json({
6666- message: "All documents marked as stale",
6767- affected: result.meta.changes,
6868- });
6969- } catch (error) {
7070- return c.json(
7171- { error: "Failed to mark documents as stale", details: String(error) },
7272- 500
7373- );
7474- }
6565+ return c.json({
6666+ message: "All documents marked as stale",
6767+ affected: result.meta.changes,
6868+ });
6969+ } catch (error) {
7070+ return c.json(
7171+ { error: "Failed to mark documents as stale", details: String(error) },
7272+ 500,
7373+ );
7474+ }
7575});
76767777export default admin;
+4-3
packages/server/src/routes/feed.ts
···8383 .prepare(
8484 `SELECT did, rkey FROM repo_records
8585 WHERE collection = 'site.standard.document'
8686- ORDER BY rkey DESC
8686+ ORDER BY published_at DESC
8787 LIMIT ? OFFSET ?`
8888 )
8989 .bind(limit, offset)
···116116 cover_image_cid, cover_image_url, bsky_post_ref, tags,
117117 published_at, updated_at, pub_url, pub_name, pub_description,
118118 pub_icon_cid, pub_icon_url, view_url, pds_endpoint,
119119- resolved_at, stale_at
119119+ resolved_at, stale_at, verified
120120 FROM resolved_documents
121121- ORDER BY rkey DESC
121121+ WHERE verified = 1
122122+ ORDER BY published_at DESC
122123 LIMIT ? OFFSET ?`
123124 )
124125 .bind(limit, offset)
+1-1
packages/server/src/routes/index.ts
···33export { default as feed } from "./feed";
44export { default as stats } from "./stats";
55export { default as records } from "./records";
66-//export { default as admin } from "./admin";
66+export { default as admin } from "./admin";
···22export { resolvePds } from "./resolver";
33export { resolveViewUrl, processDocument } from "./document";
44export { buildBlobUrl, extractBlobCid } from "./blob";
55+export { verifyPublication, verifyDocument, verifyDocumentRecord } from "./verification";
+99
packages/server/src/utils/verification.ts
···11+/**
22+ * Verification utilities for standard.site records.
33+ *
44+ * Publications are verified via /.well-known/site.standard.publication
55+ * Documents are verified via <link rel="site.standard.document"> in HTML
66+ */
77+88+/**
99+ * Verifies a publication by checking /.well-known/site.standard.publication
1010+ * @param pubUrl The publication's base URL (e.g., "https://example.com")
1111+ * @param siteUri The expected AT-URI of the publication (e.g., "at://did:plc:abc/site.standard.publication/rkey")
1212+ * @returns true if the .well-known endpoint returns the matching AT-URI
1313+ */
1414+export async function verifyPublication(
1515+ pubUrl: string,
1616+ siteUri: string
1717+): Promise<boolean> {
1818+ try {
1919+ const baseUrl = pubUrl.startsWith("http") ? pubUrl : `https://${pubUrl}`;
2020+ const wellKnownUrl = `${baseUrl.replace(/\/$/, "")}/.well-known/site.standard.publication`;
2121+2222+ const response = await fetch(wellKnownUrl, {
2323+ headers: { Accept: "text/plain" },
2424+ });
2525+2626+ if (!response.ok) return false;
2727+2828+ const body = await response.text();
2929+ return body.trim() === siteUri.trim();
3030+ } catch {
3131+ return false;
3232+ }
3333+}
3434+3535+/**
3636+ * Verifies a document by checking for a matching <link rel="site.standard.document"> tag
3737+ * @param viewUrl The document's canonical URL (e.g., "https://example.com/blog/post")
3838+ * @param documentUri The expected AT-URI of the document (e.g., "at://did:plc:abc/site.standard.document/rkey")
3939+ * @returns true if the HTML contains a matching link tag
4040+ */
4141+export async function verifyDocument(
4242+ viewUrl: string,
4343+ documentUri: string
4444+): Promise<boolean> {
4545+ try {
4646+ const response = await fetch(viewUrl, {
4747+ headers: { Accept: "text/html" },
4848+ });
4949+5050+ if (!response.ok) return false;
5151+5252+ const html = await response.text();
5353+5454+ // Look for <link rel="site.standard.document" href="at://...">
5555+ // Using regex to avoid heavy HTML parser dependency
5656+ const linkPattern =
5757+ /<link[^>]+rel=["']site\.standard\.document["'][^>]+href=["']([^"']+)["'][^>]*>/i;
5858+ const altPattern =
5959+ /<link[^>]+href=["']([^"']+)["'][^>]+rel=["']site\.standard\.document["'][^>]*>/i;
6060+6161+ const match = html.match(linkPattern) || html.match(altPattern);
6262+ if (!match) return false;
6363+6464+ return match[1].trim() === documentUri.trim();
6565+ } catch {
6666+ return false;
6767+ }
6868+}
6969+7070+/**
7171+ * Combined verification for a document record.
7272+ * Checks publication verification first (if applicable), then document verification.
7373+ *
7474+ * @param pubUrl The publication's base URL
7575+ * @param siteUri The AT-URI of the publication (from document's site field)
7676+ * @param viewUrl The document's canonical URL
7777+ * @param documentUri The AT-URI of the document
7878+ * @returns true if either publication or document verification passes
7979+ */
8080+export async function verifyDocumentRecord(
8181+ pubUrl: string | null,
8282+ siteUri: string | null,
8383+ viewUrl: string | null,
8484+ documentUri: string
8585+): Promise<boolean> {
8686+ // Try publication verification first (if we have a publication AT-URI)
8787+ if (pubUrl && siteUri && siteUri.startsWith("at://")) {
8888+ const pubVerified = await verifyPublication(pubUrl, siteUri);
8989+ if (pubVerified) return true;
9090+ }
9191+9292+ // Fall back to document verification (if we have a view URL)
9393+ if (viewUrl) {
9494+ const docVerified = await verifyDocument(viewUrl, documentUri);
9595+ if (docVerified) return true;
9696+ }
9797+9898+ return false;
9999+}
-51
packages/server/tables.csv
···11-name,sql
22-_cf_KV,"CREATE TABLE _cf_KV (
33- key TEXT PRIMARY KEY,
44- value BLOB
55- ) WITHOUT ROWID"
66-repo_records,"CREATE TABLE repo_records (
77- id INTEGER PRIMARY KEY AUTOINCREMENT,
88- did TEXT NOT NULL,
99- rkey TEXT NOT NULL,
1010- collection TEXT NOT NULL,
1111- cid TEXT,
1212- synced_at TEXT DEFAULT (datetime('now')),
1313- UNIQUE(did, collection, rkey)
1414-)"
1515-pds_cache,"CREATE TABLE pds_cache (
1616- did TEXT PRIMARY KEY,
1717- pds_endpoint TEXT NOT NULL,
1818- cached_at TEXT DEFAULT (datetime('now'))
1919-)"
2020-record_cache,"CREATE TABLE record_cache (
2121- uri TEXT PRIMARY KEY,
2222- did TEXT NOT NULL,
2323- collection TEXT NOT NULL,
2424- rkey TEXT NOT NULL,
2525- record_data TEXT NOT NULL, -- JSON blob
2626- cached_at TEXT DEFAULT (datetime('now'))
2727-)"
2828-publication_cache,"CREATE TABLE publication_cache (
2929- at_uri TEXT PRIMARY KEY,
3030- base_url TEXT NOT NULL,
3131- cached_at TEXT DEFAULT (datetime('now'))
3232-)"
3333-sync_metadata,"CREATE TABLE sync_metadata (
3434- key TEXT PRIMARY KEY,
3535- value TEXT NOT NULL,
3636- updated_at TEXT DEFAULT (datetime('now'))
3737-)"
3838-resolved_documents,"CREATE TABLE resolved_documents (
3939- uri TEXT PRIMARY KEY,
4040- did TEXT NOT NULL,
4141- rkey TEXT NOT NULL,
4242- title TEXT,
4343- path TEXT,
4444- site TEXT,
4545- content TEXT, -- JSON blob
4646- text_content TEXT,
4747- published_at TEXT,
4848- view_url TEXT,
4949- resolved_at TEXT DEFAULT (datetime('now')),
5050- stale_at TEXT -- When this record should be re-resolved
5151-)"