Webhooks for the AT Protocol airglow.run
atproto atprotocol automation webhook
12
fork

Configure Feed

Select the types of activity you want to include in your feed.

feat: add semble save action

Hugo fcfd9447 f51125e4

+1061 -9
+2
app/icons.ts
··· 29 29 30 30 import ActivityData from "lucide/icons/activity"; 31 31 import BookmarkData from "lucide/icons/bookmark"; 32 + import BookmarkPlusData from "lucide/icons/bookmark-plus"; 32 33 import CopyData from "lucide/icons/copy"; 33 34 import ChevronDownData from "lucide/icons/chevron-down"; 34 35 import ChevronRightData from "lucide/icons/chevron-right"; ··· 65 66 export const ArrowLeft = icon(ArrowLeftData); 66 67 export const ArrowRight = icon(ArrowRightData); 67 68 export const Bookmark = icon(BookmarkData); 69 + export const BookmarkPlus = icon(BookmarkPlusData); 68 70 export const Copy = icon(CopyData); 69 71 export const ChevronDown = icon(ChevronDownData); 70 72 export const ChevronRight = icon(ChevronRightData);
+69 -1
app/islands/AutomationForm.tsx
··· 106 106 comment: string; 107 107 forEach?: ForEachDraft; 108 108 }; 109 + type SembleSaveDraft = { 110 + type: "semble-save"; 111 + url: string; 112 + comment: string; 113 + forEach?: ForEachDraft; 114 + }; 109 115 type ActionDraft = 110 116 | WebhookDraft 111 117 | RecordDraft 112 118 | BskyPostDraft 113 119 | PatchRecordDraft 114 120 | BookmarkDraft 115 - | FollowDraft; 121 + | FollowDraft 122 + | SembleSaveDraft; 116 123 117 124 export type AutomationInitial = { 118 125 rkey?: string; ··· 892 899 } 893 900 894 901 // --------------------------------------------------------------------------- 902 + // Semble save action editor 903 + // --------------------------------------------------------------------------- 904 + 905 + function SembleSaveActionEditor({ 906 + action, 907 + index, 908 + onChange, 909 + }: { 910 + action: SembleSaveDraft; 911 + index: number; 912 + onChange: (a: SembleSaveDraft) => void; 913 + }) { 914 + const urlId = `action-${index}-semble-url`; 915 + return ( 916 + <div class={s.fieldGroup}> 917 + <label class={s.label} for={urlId}> 918 + Page URL 919 + </label> 920 + <input 921 + id={urlId} 922 + class={s.input} 923 + type="text" 924 + placeholder="e.g. https://example.com or {{event.commit.record.subject.uri}}" 925 + value={action.url} 926 + onInput={(e: Event) => onChange({ ...action, url: (e.target as HTMLInputElement).value })} 927 + required 928 + autocomplete="off" 929 + /> 930 + <span class={s.hint}> 931 + URL of the page to save. Metadata (title, description, image) will be fetched automatically. 932 + Supports {"{{placeholders}}"}. 933 + </span> 934 + </div> 935 + ); 936 + } 937 + 938 + // --------------------------------------------------------------------------- 895 939 // Follow (social graph) action editor, shared across bluesky / tangled / sifa 896 940 // --------------------------------------------------------------------------- 897 941 ··· 1250 1294 targetTitle: a.targetTitle ?? "", 1251 1295 bodyValue: a.bodyValue ?? "", 1252 1296 tagsText: (a.tags ?? []).join(", "), 1297 + comment: a.comment ?? "", 1298 + ...forEachField, 1299 + }; 1300 + } 1301 + if (a.$type === "semble-save") { 1302 + return { 1303 + type: "semble-save", 1304 + url: a.url, 1253 1305 comment: a.comment ?? "", 1254 1306 ...forEachField, 1255 1307 }; ··· 1813 1865 comment: "", 1814 1866 }, 1815 1867 ]); 1868 + } else if (type === "semble-save") { 1869 + setActions((prev) => [...prev, { type: "semble-save", url: "", comment: "" }]); 1816 1870 } else if (type.startsWith("follow-")) { 1817 1871 const target = type.slice("follow-".length) as FollowTarget; 1818 1872 setActions((prev) => [ ··· 1906 1960 ...(targetTitle ? { targetTitle } : {}), 1907 1961 ...(bodyValue ? { bodyValue } : {}), 1908 1962 ...(tags.length > 0 ? { tags } : {}), 1963 + ...forEachField, 1964 + ...comment, 1965 + }; 1966 + } 1967 + if (a.type === "semble-save") { 1968 + return { 1969 + type: "semble-save", 1970 + url: a.url, 1909 1971 ...forEachField, 1910 1972 ...comment, 1911 1973 }; ··· 2986 3048 /> 2987 3049 ) : action.type === "bookmark" ? ( 2988 3050 <BookmarkActionEditor 3051 + action={action} 3052 + index={i} 3053 + onChange={(a) => updateAction(i, a)} 3054 + /> 3055 + ) : action.type === "semble-save" ? ( 3056 + <SembleSaveActionEditor 2989 3057 action={action} 2990 3058 index={i} 2991 3059 onChange={(a) => updateAction(i, a)}
+26
app/routes/api/automations/[rkey].ts
··· 12 12 type PatchRecordAction, 13 13 type BookmarkAction, 14 14 type FollowAction, 15 + type SembleSaveAction, 15 16 } from "@/db/schema.js"; 16 17 import { config } from "@/config.js"; 17 18 import { isValidNsid } from "@/lexicons/resolver.js"; ··· 32 33 validateWebhookHeaders, 33 34 validateBookmarkInput, 34 35 validateFollowInput, 36 + validateSembleSaveInput, 35 37 validateForEachInput, 36 38 resolveWantedDids, 37 39 } from "@/actions/validation.js"; ··· 473 475 $type: "run.airglow.automation#followAction", 474 476 target: input.target, 475 477 subject: input.subject, 478 + ...forEachField, 479 + ...(input.comment ? { comment: input.comment } : {}), 480 + }); 481 + actionResultNames.push(`action${actionIndex + 1}`); 482 + } else if (input.type === "semble-save") { 483 + const sembleSaveValidation = validateSembleSaveInput( 484 + input, 485 + fetchNames, 486 + actionResultNames, 487 + hasItem, 488 + ); 489 + if (!sembleSaveValidation.valid) { 490 + return c.json({ error: sembleSaveValidation.error }, 400); 491 + } 492 + 493 + newLocalActions.push({ 494 + $type: "semble-save", 495 + url: input.url, 496 + ...forEachField, 497 + ...(input.comment ? { comment: input.comment } : {}), 498 + } satisfies SembleSaveAction); 499 + newPdsActions.push({ 500 + $type: "run.airglow.automation#sembleSaveAction", 501 + url: input.url, 476 502 ...forEachField, 477 503 ...(input.comment ? { comment: input.comment } : {}), 478 504 });
+26
app/routes/api/automations/index.ts
··· 11 11 type PatchRecordAction, 12 12 type BookmarkAction, 13 13 type FollowAction, 14 + type SembleSaveAction, 14 15 } from "@/db/schema.js"; 15 16 import { config } from "@/config.js"; 16 17 import { isValidNsid, isNsidAllowed } from "@/lexicons/resolver.js"; ··· 30 31 validateWebhookHeaders, 31 32 validateBookmarkInput, 32 33 validateFollowInput, 34 + validateSembleSaveInput, 33 35 validateForEachInput, 34 36 resolveWantedDids, 35 37 } from "@/actions/validation.js"; ··· 390 392 $type: "run.airglow.automation#followAction", 391 393 target: input.target, 392 394 subject: input.subject, 395 + ...forEachField, 396 + ...(input.comment ? { comment: input.comment } : {}), 397 + }); 398 + actionResultNames.push(`action${actionIndex + 1}`); 399 + } else if (input.type === "semble-save") { 400 + const sembleSaveValidation = validateSembleSaveInput( 401 + input, 402 + fetchNames, 403 + actionResultNames, 404 + hasItem, 405 + ); 406 + if (!sembleSaveValidation.valid) { 407 + return c.json({ error: sembleSaveValidation.error }, 400); 408 + } 409 + 410 + localActions.push({ 411 + $type: "semble-save", 412 + url: input.url, 413 + ...forEachField, 414 + ...(input.comment ? { comment: input.comment } : {}), 415 + } satisfies SembleSaveAction); 416 + pdsActions.push({ 417 + $type: "run.airglow.automation#sembleSaveAction", 418 + url: input.url, 393 419 ...forEachField, 394 420 ...(input.comment ? { comment: input.comment } : {}), 395 421 });
+7
app/routes/dashboard/automations/[rkey].tsx
··· 331 331 </> 332 332 )} 333 333 </> 334 + ) : action.$type === "semble-save" ? ( 335 + <> 336 + <dt>Page URL</dt> 337 + <dd> 338 + <InlineCode>{action.url}</InlineCode> 339 + </dd> 340 + </> 334 341 ) : ( 335 342 <> 336 343 <dt>Target Collection</dt>
+7
app/routes/u/[handle]/[rkey].tsx
··· 308 308 </> 309 309 )} 310 310 </> 311 + ) : action.$type === "semble-save" ? ( 312 + <> 313 + <dt>Page URL</dt> 314 + <dd> 315 + <InlineCode>{action.url}</InlineCode> 316 + </dd> 317 + </> 311 318 ) : ( 312 319 <> 313 320 <dt>Target Collection</dt>
+23 -1
lexicons/run/airglow/automation.json
··· 49 49 "#bskyPostAction", 50 50 "#patchRecordAction", 51 51 "#bookmarkAction", 52 - "#followAction" 52 + "#followAction", 53 + "#sembleSaveAction" 53 54 ] 54 55 } 55 56 }, ··· 384 385 "type": "string", 385 386 "description": "DID of the account to follow. Supports {{placeholders}}; the runtime enforces the rendered value matches the DID format before writing the record.", 386 387 "maxLength": 512 388 + }, 389 + "forEach": { 390 + "type": "ref", 391 + "ref": "#forEachConfig" 392 + }, 393 + "comment": { 394 + "type": "string", 395 + "description": "Optional user note about this action.", 396 + "maxLength": 512 397 + } 398 + } 399 + }, 400 + "sembleSaveAction": { 401 + "type": "object", 402 + "description": "Save a URL to Semble (network.cosmik.card). The URL's metadata is fetched at execution time to populate the card automatically.", 403 + "required": ["url"], 404 + "properties": { 405 + "url": { 406 + "type": "string", 407 + "description": "URL to save. Supports {{placeholders}} resolved from event data.", 408 + "maxLength": 2048 387 409 }, 388 410 "forEach": { 389 411 "type": "ref",
+211
lib/actions/semble-save.test.ts
··· 1 + import { describe, it, expect, vi, beforeEach, afterEach } from "vitest"; 2 + 3 + vi.mock("@/db/index.js", async () => { 4 + const { createTestDb } = await import("../test/db.js"); 5 + return { db: createTestDb() }; 6 + }); 7 + 8 + vi.mock("@/automations/pds.js", () => ({ 9 + createArbitraryRecord: vi.fn(), 10 + })); 11 + 12 + vi.mock("../url-metadata.js", () => ({ 13 + fetchURLMetadata: vi.fn(), 14 + })); 15 + 16 + vi.mock("../auth/client.js", () => ({ 17 + resolveDidToHandle: vi.fn(async (did: string) => `handle-for-${did.slice(-4)}`), 18 + })); 19 + 20 + import { executeSembleSave } from "./semble-save.js"; 21 + import { createArbitraryRecord } from "../automations/pds.js"; 22 + import { fetchURLMetadata } from "../url-metadata.js"; 23 + import { db } from "../db/index.js"; 24 + import { automations, deliveryLogs } from "../db/schema.js"; 25 + import { makeMatch, makeSembleSaveAction, makeAutomation } from "../test/fixtures.js"; 26 + 27 + const mockCreateRecord = vi.mocked(createArbitraryRecord); 28 + const mockFetchMeta = vi.mocked(fetchURLMetadata); 29 + 30 + describe("executeSembleSave", () => { 31 + beforeEach(async () => { 32 + vi.useFakeTimers(); 33 + vi.setSystemTime(new Date("2024-06-15T12:00:00.000Z")); 34 + mockCreateRecord.mockReset(); 35 + mockFetchMeta.mockReset(); 36 + 37 + await db.delete(deliveryLogs); 38 + await db.delete(automations); 39 + await db.insert(automations).values(makeAutomation()); 40 + }); 41 + 42 + afterEach(() => { 43 + vi.useRealTimers(); 44 + }); 45 + 46 + it("fetches metadata and creates a card record on PDS", async () => { 47 + mockFetchMeta.mockResolvedValueOnce({ 48 + url: "https://example.com/3k2la7bx", 49 + title: "Example Page", 50 + description: "A great page", 51 + imageUrl: "https://example.com/img.jpg", 52 + siteName: "Example", 53 + author: "Alice", 54 + type: "article", 55 + }); 56 + mockCreateRecord.mockResolvedValueOnce({ 57 + uri: "at://x/network.cosmik.card/rk", 58 + cid: "c", 59 + }); 60 + 61 + const action = makeSembleSaveAction({ 62 + url: "https://example.com/{{event.commit.rkey}}", 63 + }); 64 + const match = makeMatch({ automation: { actions: [action] } }); 65 + await executeSembleSave(match, 0); 66 + 67 + expect(mockFetchMeta).toHaveBeenCalledWith("https://example.com/3k2la7bx"); 68 + expect(mockCreateRecord).toHaveBeenCalledTimes(1); 69 + 70 + const [did, collection, record] = mockCreateRecord.mock.calls[0]!; 71 + expect(did).toBe(match.automation.did); 72 + expect(collection).toBe("network.cosmik.card"); 73 + expect(record).toMatchObject({ 74 + type: "URL", 75 + createdAt: "2024-06-15T12:00:00.000Z", 76 + content: { 77 + $type: "network.cosmik.card#urlContent", 78 + url: "https://example.com/3k2la7bx", 79 + metadata: { 80 + $type: "network.cosmik.card#urlMetadata", 81 + type: "article", 82 + title: "Example Page", 83 + description: "A great page", 84 + imageUrl: "https://example.com/img.jpg", 85 + siteName: "Example", 86 + author: "Alice", 87 + }, 88 + }, 89 + }); 90 + }); 91 + 92 + it("omits optional metadata fields when absent", async () => { 93 + mockFetchMeta.mockResolvedValueOnce({ 94 + url: "https://example.com/x", 95 + title: "Only Title", 96 + type: "link", 97 + }); 98 + mockCreateRecord.mockResolvedValueOnce({ 99 + uri: "at://x/network.cosmik.card/rk", 100 + cid: "c", 101 + }); 102 + 103 + const action = makeSembleSaveAction({ url: "https://example.com/x" }); 104 + const match = makeMatch({ automation: { actions: [action] } }); 105 + await executeSembleSave(match, 0); 106 + 107 + const record = mockCreateRecord.mock.calls[0]![2]!; 108 + const meta = (record.content as Record<string, unknown>).metadata as Record<string, unknown>; 109 + expect(meta.title).toBe("Only Title"); 110 + expect(meta).not.toHaveProperty("description"); 111 + expect(meta).not.toHaveProperty("imageUrl"); 112 + expect(meta).not.toHaveProperty("siteName"); 113 + expect(meta).not.toHaveProperty("author"); 114 + }); 115 + 116 + it("falls back to minimal metadata when fetchURLMetadata throws", async () => { 117 + mockFetchMeta.mockRejectedValueOnce(new Error("network error")); 118 + mockCreateRecord.mockResolvedValueOnce({ 119 + uri: "at://x/network.cosmik.card/rk", 120 + cid: "c", 121 + }); 122 + 123 + const action = makeSembleSaveAction({ url: "https://example.com/x" }); 124 + const match = makeMatch({ automation: { actions: [action] } }); 125 + await executeSembleSave(match, 0); 126 + 127 + expect(mockCreateRecord).toHaveBeenCalledTimes(1); 128 + const record = mockCreateRecord.mock.calls[0]![2]!; 129 + expect(record).toMatchObject({ 130 + type: "URL", 131 + content: { 132 + url: "https://example.com/x", 133 + metadata: { 134 + $type: "network.cosmik.card#urlMetadata", 135 + type: "link", 136 + }, 137 + }, 138 + }); 139 + }); 140 + 141 + it("fails with template error when URL renders to empty string", async () => { 142 + const action = makeSembleSaveAction({ url: "{{event.missing}}" }); 143 + const match = makeMatch({ automation: { actions: [action] } }); 144 + await executeSembleSave(match, 0); 145 + 146 + expect(mockCreateRecord).not.toHaveBeenCalled(); 147 + const logs = await db.query.deliveryLogs.findMany(); 148 + expect(logs[0]!.statusCode).toBe(0); 149 + expect(logs[0]!.error).toContain("Template error"); 150 + }); 151 + 152 + it("rejects rendered URL with non-http(s) scheme", async () => { 153 + const action = makeSembleSaveAction({ url: "{{event.url}}" }); 154 + const match = makeMatch({ 155 + automation: { actions: [action] }, 156 + event: { url: "javascript:alert(1)" } as unknown as Record<string, unknown>, 157 + }); 158 + await executeSembleSave(match, 0); 159 + 160 + expect(mockFetchMeta).not.toHaveBeenCalled(); 161 + expect(mockCreateRecord).not.toHaveBeenCalled(); 162 + const logs = await db.query.deliveryLogs.findMany(); 163 + expect(logs[0]!.error).toContain("http://"); 164 + }); 165 + 166 + it("extracts status code from PDS error message", async () => { 167 + mockFetchMeta.mockResolvedValueOnce({ url: "https://example.com/x", type: "link" }); 168 + mockCreateRecord.mockRejectedValueOnce( 169 + new Error("PDS com.atproto.repo.createRecord failed (400): bad request"), 170 + ); 171 + 172 + const action = makeSembleSaveAction(); 173 + const match = makeMatch({ automation: { actions: [action] } }); 174 + await executeSembleSave(match, 0); 175 + 176 + const logs = await db.query.deliveryLogs.findMany(); 177 + expect(logs).toHaveLength(1); 178 + expect(logs[0]!.statusCode).toBe(400); 179 + }); 180 + 181 + it("retries on 5xx PDS errors", async () => { 182 + mockFetchMeta.mockResolvedValue({ url: "https://example.com/x", type: "link" }); 183 + mockCreateRecord 184 + .mockRejectedValueOnce(new Error("PDS failed (500): internal")) 185 + .mockResolvedValueOnce({ uri: "at://x/network.cosmik.card/rk", cid: "c" }); 186 + 187 + const action = makeSembleSaveAction(); 188 + const match = makeMatch({ automation: { actions: [action] } }); 189 + await executeSembleSave(match, 0); 190 + 191 + expect(mockCreateRecord).toHaveBeenCalledTimes(1); 192 + 193 + await vi.advanceTimersByTimeAsync(5_000); 194 + expect(mockCreateRecord).toHaveBeenCalledTimes(2); 195 + 196 + const logs = await db.query.deliveryLogs.findMany(); 197 + expect(logs).toHaveLength(2); 198 + }); 199 + 200 + it("does not retry on 4xx PDS errors", async () => { 201 + mockFetchMeta.mockResolvedValue({ url: "https://example.com/x", type: "link" }); 202 + mockCreateRecord.mockRejectedValueOnce(new Error("PDS failed (400): bad request")); 203 + 204 + const action = makeSembleSaveAction(); 205 + const match = makeMatch({ automation: { actions: [action] } }); 206 + await executeSembleSave(match, 0); 207 + 208 + await vi.advanceTimersByTimeAsync(60_000); 209 + expect(mockCreateRecord).toHaveBeenCalledTimes(1); 210 + }); 211 + });
+87
lib/actions/semble-save.ts
··· 1 + import { type SembleSaveAction } from "../db/schema.js"; 2 + import { createArbitraryRecord } from "../automations/pds.js"; 3 + import { renderTextTemplate, type FetchContext } from "./template.js"; 4 + import { parsePdsError, wrapWithDelivery, type ActionResult } from "./delivery.js"; 5 + import type { MatchedEvent } from "../jetstream/consumer.js"; 6 + import { fetchURLMetadata, type UrlMetadata } from "../url-metadata.js"; 7 + 8 + const TARGET_COLLECTION = "network.cosmik.card"; 9 + 10 + async function buildRecord( 11 + match: MatchedEvent, 12 + action: SembleSaveAction, 13 + fetchContext?: FetchContext, 14 + item?: unknown, 15 + ): Promise<Record<string, unknown>> { 16 + const { automation, event } = match; 17 + 18 + const url = (await renderTextTemplate(action.url, event, fetchContext, automation, item)).trim(); 19 + if (!url) { 20 + throw new Error("url rendered to an empty string"); 21 + } 22 + // Reject non-http(s) schemes before the metadata fetch. Without this, a 23 + // `javascript:` or `file:` URL would fail inside fetchURLMetadata, hit the 24 + // catch below, and still get saved to the user's PDS with the bad URL. 25 + if (!/^https?:\/\//i.test(url)) { 26 + throw new Error("url must start with http:// or https://"); 27 + } 28 + 29 + let metadata: UrlMetadata; 30 + try { 31 + metadata = await fetchURLMetadata(url); 32 + } catch { 33 + metadata = { url }; 34 + } 35 + 36 + const urlMetadata: Record<string, unknown> = { 37 + $type: "network.cosmik.card#urlMetadata", 38 + type: metadata.type ?? "link", 39 + }; 40 + if (metadata.title) urlMetadata.title = metadata.title; 41 + if (metadata.description) urlMetadata.description = metadata.description; 42 + if (metadata.imageUrl) urlMetadata.imageUrl = metadata.imageUrl; 43 + if (metadata.siteName) urlMetadata.siteName = metadata.siteName; 44 + if (metadata.author) urlMetadata.author = metadata.author; 45 + 46 + return { 47 + type: "URL", 48 + content: { 49 + $type: "network.cosmik.card#urlContent", 50 + url: metadata.url, 51 + metadata: urlMetadata, 52 + }, 53 + createdAt: new Date().toISOString(), 54 + }; 55 + } 56 + 57 + async function execute( 58 + match: MatchedEvent, 59 + action: SembleSaveAction, 60 + fetchContext?: FetchContext, 61 + item?: unknown, 62 + ): Promise<ActionResult> { 63 + const { automation } = match; 64 + 65 + let record: Record<string, unknown>; 66 + try { 67 + record = await buildRecord(match, action, fetchContext, item); 68 + } catch (err) { 69 + return { 70 + statusCode: 0, 71 + error: `Template error: ${err instanceof Error ? err.message : String(err)}`, 72 + }; 73 + } 74 + 75 + try { 76 + const created = await createArbitraryRecord(automation.did, TARGET_COLLECTION, record); 77 + return { statusCode: 200, uri: created.uri, cid: created.cid }; 78 + } catch (err) { 79 + return parsePdsError(err); 80 + } 81 + } 82 + 83 + export const executeSembleSave = wrapWithDelivery( 84 + (match, i) => match.automation.actions[i] as SembleSaveAction, 85 + execute, 86 + (action) => JSON.stringify({ url: action.url }), 87 + );
+44 -1
lib/actions/validation.ts
··· 1 1 import { SECRET_NAME_RE, SECRET_REF_RE } from "../secrets/store.js"; 2 - import { AUTOMATION_LIMITS, BOOKMARK_LIMITS } from "../automations/limits.js"; 2 + import { AUTOMATION_LIMITS, BOOKMARK_LIMITS, SEMBLE_SAVE_LIMITS } from "../automations/limits.js"; 3 3 import { nsidRequiresWantedDids } from "../lexicons/match.js"; 4 4 import { isValidNsid } from "../lexicons/resolver.js"; 5 5 import { PLACEHOLDER_RE, validateTextTemplate } from "./template.js"; ··· 58 58 type: "follow"; 59 59 target: FollowTarget; 60 60 subject: string; 61 + comment?: string; 62 + }) 63 + | (ActionBase & { 64 + type: "semble-save"; 65 + url: string; 61 66 comment?: string; 62 67 }); 63 68 ··· 565 570 } 566 571 567 572 return { valid: true, tags }; 573 + } 574 + 575 + type SembleSaveInput = { 576 + url: string; 577 + }; 578 + 579 + // Allow either a literal http(s):// prefix or a leading {{...}} placeholder 580 + // (so the entire URL can come from event/action data). A literal non-http 581 + // scheme like `javascript:` is rejected here so it can't be persisted to the 582 + // user's PDS; the runtime SSRF guard remains the real security boundary. 583 + const SEMBLE_SAVE_URL_OK_RE = /^(https?:\/\/|\{\{)/i; 584 + 585 + export function validateSembleSaveInput( 586 + input: SembleSaveInput, 587 + fetchNames: string[], 588 + actionNames: string[], 589 + hasItem?: boolean, 590 + ): { valid: true } | { valid: false; error: string } { 591 + if (!input.url || typeof input.url !== "string" || !input.url.trim()) { 592 + return { valid: false, error: "url is required for semble-save actions" }; 593 + } 594 + if (input.url.length > SEMBLE_SAVE_LIMITS.url) { 595 + return { 596 + valid: false, 597 + error: `url must be ${SEMBLE_SAVE_LIMITS.url} characters or less`, 598 + }; 599 + } 600 + if (!SEMBLE_SAVE_URL_OK_RE.test(input.url)) { 601 + return { 602 + valid: false, 603 + error: "url must start with http://, https://, or a {{placeholder}}", 604 + }; 605 + } 606 + const urlValidation = validateTextTemplate(input.url, fetchNames, actionNames, hasItem); 607 + if (!urlValidation.valid) { 608 + return { valid: false, error: `url: ${urlValidation.error}` }; 609 + } 610 + return { valid: true }; 568 611 } 569 612 570 613 const FOR_EACH_PATH_RE = /^[a-zA-Z_][a-zA-Z0-9_]*(\.[a-zA-Z_][a-zA-Z0-9_-]*|\[\])+$/;
+2 -1
lib/auth/client.ts
··· 37 37 a.$type === "record" || 38 38 a.$type === "patch-record" || 39 39 a.$type === "bookmark" || 40 - a.$type === "follow", 40 + a.$type === "follow" || 41 + a.$type === "semble-save", 41 42 ); 42 43 } 43 44
+7 -1
lib/automations/action-catalogue.test.ts
··· 12 12 expect(bsky.actions.map((a) => a.id)).toEqual(["bsky-post", "follow-bluesky", "bsky-like"]); 13 13 }); 14 14 15 - it("orders Apps tiles: Bookmark → Follow Semble → Follow Sifa → Follow Tangled", () => { 15 + it("orders Apps tiles: Bookmark → Save Semble → Follow Semble → Follow Sifa → Follow Tangled", () => { 16 16 const apps = ACTION_CATALOGUE.find((c) => c.id === "apps")!; 17 17 expect(apps.actions.map((a) => a.id)).toEqual([ 18 18 "bookmark", 19 + "semble-save", 19 20 "follow-cosmik", 20 21 "follow-sifa", 21 22 "follow-tangled", ··· 40 41 expect(ACTION_INFO_BY_TYPE["follow-cosmik"]!.colorKey).toBe("cosmik"); 41 42 }); 42 43 44 + it("propagates colorKey for semble-save tile", () => { 45 + expect(ACTION_INFO_BY_TYPE["semble-save"]!.colorKey).toBe("cosmik"); 46 + }); 47 + 43 48 it("leaves colorKey undefined for tiles that use their category's color", () => { 44 49 expect(ACTION_INFO_BY_TYPE["bookmark"]!.colorKey).toBeUndefined(); 45 50 expect(ACTION_INFO_BY_TYPE["bsky-post"]!.colorKey).toBeUndefined(); ··· 52 57 expect(ACTION_INFO_BY_TYPE["follow-tangled"]!.faviconDomain).toBe("tangled.sh"); 53 58 expect(ACTION_INFO_BY_TYPE["follow-cosmik"]!.faviconDomain).toBe("semble.so"); 54 59 expect(ACTION_INFO_BY_TYPE["bookmark"]!.faviconDomain).toBe("margin.at"); 60 + expect(ACTION_INFO_BY_TYPE["semble-save"]!.faviconDomain).toBe("semble.so"); 55 61 }); 56 62 }); 57 63
+11
lib/automations/action-catalogue.ts
··· 1 1 import { 2 2 Bookmark, 3 + BookmarkPlus, 3 4 FilePlus2, 4 5 Heart, 5 6 MessageSquare, ··· 16 17 | "record" 17 18 | "patch-record" 18 19 | "bookmark" 20 + | "semble-save" 19 21 | `follow-${FollowTarget}`; 20 22 21 23 type ActionInfo = { ··· 102 104 icon: Bookmark, 103 105 available: true, 104 106 faviconDomain: "margin.at", 107 + }, 108 + { 109 + id: "semble-save", 110 + label: "Save on Semble", 111 + description: "Save a URL as a card on Semble", 112 + icon: BookmarkPlus, 113 + available: true, 114 + colorKey: "cosmik" as ColorKey, 115 + faviconDomain: "semble.so", 105 116 }, 106 117 ...followTilesByCat.apps, 107 118 ],
+4
lib/automations/limits.ts
··· 18 18 tag: 64, 19 19 maxTags: 10, 20 20 } as const; 21 + 22 + export const SEMBLE_SAVE_LIMITS = { 23 + url: 2048, 24 + } as const;
+8
lib/automations/pds-serialize.ts
··· 63 63 ...(a.comment ? { comment: a.comment } : {}), 64 64 }; 65 65 } 66 + if (a.$type === "semble-save") { 67 + return { 68 + $type: "run.airglow.automation#sembleSaveAction", 69 + url: a.url, 70 + ...forEachField, 71 + ...(a.comment ? { comment: a.comment } : {}), 72 + }; 73 + } 66 74 return { 67 75 $type: "run.airglow.automation#recordAction", 68 76 targetCollection: a.targetCollection,
+11 -3
lib/automations/pds.ts
··· 95 95 comment?: string; 96 96 }; 97 97 98 + type PdsSembleSaveAction = { 99 + $type: "run.airglow.automation#sembleSaveAction"; 100 + url: string; 101 + forEach?: PdsForEachConfig; 102 + comment?: string; 103 + }; 104 + 98 105 export type PdsAction = 99 106 | PdsWebhookAction 100 107 | PdsRecordAction 101 108 | PdsBskyPostAction 102 109 | PdsPatchRecordAction 103 110 | PdsBookmarkAction 104 - | PdsFollowAction; 111 + | PdsFollowAction 112 + | PdsSembleSaveAction; 105 113 106 114 export type PdsFetchStepRecord = { 107 115 $type: "run.airglow.automation#fetchStep"; ··· 237 245 const data = await pdsCall(did, "com.atproto.repo.createRecord", { 238 246 repo: did, 239 247 collection, 240 - record: { $type: collection, ...record }, 248 + record: { ...record, $type: collection }, 241 249 }); 242 250 return { uri: data.uri as string, cid: data.cid as string }; 243 251 } ··· 253 261 repo: did, 254 262 collection, 255 263 rkey, 256 - record: { $type: collection, ...record }, 264 + record: { ...record, $type: collection }, 257 265 }); 258 266 return { uri: data.uri as string, cid: data.cid as string }; 259 267 }
+10 -1
lib/db/schema.ts
··· 74 74 forEach?: ForEachConfig; 75 75 }; 76 76 77 + export type SembleSaveAction = { 78 + $type: "semble-save"; 79 + url: string; 80 + comment?: string; 81 + forEach?: ForEachConfig; 82 + }; 83 + 77 84 export type Action = 78 85 | WebhookAction 79 86 | RecordAction 80 87 | BskyPostAction 81 88 | PatchRecordAction 82 89 | BookmarkAction 83 - | FollowAction; 90 + | FollowAction 91 + | SembleSaveAction; 84 92 85 93 /** Action types that produce a record result (uri, cid, rkey) for chaining. */ 86 94 const RECORD_PRODUCING_TYPES = new Set([ ··· 89 97 "patch-record", 90 98 "bookmark", 91 99 "follow", 100 + "semble-save", 92 101 ]); 93 102 export function isRecordProducingAction(type: string): boolean { 94 103 return RECORD_PRODUCING_TYPES.has(type);
+30
lib/jetstream/handler.ts
··· 6 6 import { executePatchRecord } from "../actions/patch-record.js"; 7 7 import { executeBookmark } from "../actions/bookmark.js"; 8 8 import { executeFollow } from "../actions/follow.js"; 9 + import { executeSembleSave } from "../actions/semble-save.js"; 9 10 import { FOLLOW_TARGETS } from "../automations/follow-targets.js"; 10 11 import { resolveFetches } from "../actions/fetcher.js"; 11 12 import { isSuccess } from "../actions/delivery.js"; ··· 34 35 return executeBookmark; 35 36 case "follow": 36 37 return executeFollow; 38 + case "semble-save": 39 + return executeSembleSave; 37 40 default: 38 41 return dispatch; 39 42 } ··· 368 371 } catch (err) { 369 372 error = `Template error: ${err instanceof Error ? err.message : String(err)}`; 370 373 } 374 + } else if (action.$type === "semble-save") { 375 + try { 376 + const url = ( 377 + await renderTextTemplate(action.url, match.event, fetchContext, match.automation, item) 378 + ).trim(); 379 + // Redact credentials in userinfo before persisting to delivery_logs: 380 + // event-derived URLs may legitimately carry tokens we shouldn't store. 381 + const safeUrl = redactUserinfo(url); 382 + message = `Would save ${safeUrl || "(empty)"} to Semble${itemSuffix}`; 383 + payload = JSON.stringify({ url: safeUrl, item }); 384 + } catch (err) { 385 + error = `Template error: ${err instanceof Error ? err.message : String(err)}`; 386 + } 371 387 } else { 372 388 try { 373 389 const rendered = await renderTemplate( ··· 403 419 404 420 function truncateForLog(s: string, max = 120): string { 405 421 return s.length <= max ? s : s.slice(0, max) + "..."; 422 + } 423 + 424 + function redactUserinfo(url: string): string { 425 + try { 426 + const u = new URL(url); 427 + if (u.username || u.password) { 428 + u.username = ""; 429 + u.password = ""; 430 + return u.toString(); 431 + } 432 + return url; 433 + } catch { 434 + return url; 435 + } 406 436 } 407 437 408 438 /**
+9
lib/test/fixtures.ts
··· 7 7 PatchRecordAction, 8 8 BookmarkAction, 9 9 FollowAction, 10 + SembleSaveAction, 10 11 FetchStep, 11 12 } from "../db/schema.js"; 12 13 import type { MatchedEvent } from "../jetstream/consumer.js"; ··· 90 91 $type: "follow", 91 92 target: "bluesky", 92 93 subject: "{{event.did}}", 94 + ...overrides, 95 + }; 96 + } 97 + 98 + export function makeSembleSaveAction(overrides?: Partial<SembleSaveAction>): SembleSaveAction { 99 + return { 100 + $type: "semble-save", 101 + url: "https://example.com/{{event.commit.rkey}}", 93 102 ...overrides, 94 103 }; 95 104 }
+259
lib/url-metadata.test.ts
··· 1 + import { describe, it, expect, vi, beforeEach } from "vitest"; 2 + 3 + vi.mock("./url-guard.js", () => ({ 4 + assertPublicUrl: vi.fn(), 5 + })); 6 + 7 + vi.mock("./pds/fetch-with-retry.js", () => ({ 8 + fetchWithRetry: vi.fn(), 9 + })); 10 + 11 + import { fetchURLMetadata } from "./url-metadata.js"; 12 + import { fetchWithRetry } from "./pds/fetch-with-retry.js"; 13 + import { assertPublicUrl } from "./url-guard.js"; 14 + 15 + const mockFetch = vi.mocked(fetchWithRetry); 16 + const mockAssert = vi.mocked(assertPublicUrl); 17 + 18 + function html(head: string): string { 19 + return `<!doctype html><html><head>${head}</head><body></body></html>`; 20 + } 21 + 22 + function mockResponse( 23 + body: string, 24 + url = "https://example.com", 25 + contentType = "text/html; charset=utf-8", 26 + ) { 27 + const bytes = new TextEncoder().encode(body); 28 + let sent = false; 29 + const reader = { 30 + read: () => 31 + sent 32 + ? Promise.resolve({ done: true, value: undefined }) 33 + : ((sent = true), Promise.resolve({ done: false, value: bytes })), 34 + cancel: () => Promise.resolve(), 35 + }; 36 + const headers = new Headers({ "content-type": contentType }); 37 + mockFetch.mockResolvedValueOnce({ 38 + status: 200, 39 + body: { getReader: () => reader, cancel: () => Promise.resolve() }, 40 + headers, 41 + url, 42 + } as unknown as Response); 43 + } 44 + 45 + function mockRedirect(location: string, status = 302) { 46 + const headers = new Headers({ location }); 47 + mockFetch.mockResolvedValueOnce({ 48 + status, 49 + body: { 50 + getReader: () => ({ 51 + read: () => Promise.resolve({ done: true, value: undefined }), 52 + cancel: () => Promise.resolve(), 53 + }), 54 + cancel: () => Promise.resolve(), 55 + }, 56 + headers, 57 + url: "", 58 + } as unknown as Response); 59 + } 60 + 61 + beforeEach(() => { 62 + mockFetch.mockReset(); 63 + mockAssert.mockReset(); 64 + mockAssert.mockResolvedValue(new URL("https://example.com")); 65 + }); 66 + 67 + describe("fetchURLMetadata", () => { 68 + it("extracts title from <title> tag", async () => { 69 + mockResponse(html("<title>Hello World</title>")); 70 + const m = await fetchURLMetadata("https://example.com"); 71 + expect(m.title).toBe("Hello World"); 72 + }); 73 + 74 + it("prefers og:title over <title>", async () => { 75 + mockResponse(html('<title>Fallback</title><meta property="og:title" content="OG Title">')); 76 + const m = await fetchURLMetadata("https://example.com"); 77 + expect(m.title).toBe("OG Title"); 78 + }); 79 + 80 + it("falls back to twitter:title when og:title is absent", async () => { 81 + mockResponse(html('<meta name="twitter:title" content="Twitter Title">')); 82 + const m = await fetchURLMetadata("https://example.com"); 83 + expect(m.title).toBe("Twitter Title"); 84 + }); 85 + 86 + it("extracts all og fields", async () => { 87 + mockResponse( 88 + html( 89 + [ 90 + '<meta property="og:title" content="Title">', 91 + '<meta property="og:description" content="Desc">', 92 + '<meta property="og:image" content="https://img.example.com/a.jpg">', 93 + '<meta property="og:site_name" content="Example">', 94 + '<meta property="og:type" content="article">', 95 + ].join(""), 96 + ), 97 + ); 98 + const m = await fetchURLMetadata("https://example.com"); 99 + expect(m.title).toBe("Title"); 100 + expect(m.description).toBe("Desc"); 101 + expect(m.imageUrl).toBe("https://img.example.com/a.jpg"); 102 + expect(m.siteName).toBe("Example"); 103 + expect(m.type).toBe("article"); 104 + }); 105 + 106 + it("extracts article:author", async () => { 107 + mockResponse(html('<meta property="article:author" content="Jane Doe">')); 108 + const m = await fetchURLMetadata("https://example.com"); 109 + expect(m.author).toBe("Jane Doe"); 110 + }); 111 + 112 + it('falls back to <meta name="author"> when article:author is absent', async () => { 113 + mockResponse(html('<meta name="author" content="John Smith">')); 114 + const m = await fetchURLMetadata("https://example.com"); 115 + expect(m.author).toBe("John Smith"); 116 + }); 117 + 118 + it("decodes HTML entities in extracted values", async () => { 119 + mockResponse(html("<title>Tom &amp; Jerry &lt;3&gt;</title>")); 120 + const m = await fetchURLMetadata("https://example.com"); 121 + expect(m.title).toBe("Tom & Jerry <3>"); 122 + }); 123 + 124 + it("decodes numeric and hex HTML entities", async () => { 125 + mockResponse(html("<title>A &#65; B &#x42; C</title>")); 126 + const m = await fetchURLMetadata("https://example.com"); 127 + expect(m.title).toBe("A A B B C"); 128 + }); 129 + 130 + it("returns partial metadata when some tags are missing", async () => { 131 + mockResponse(html("<title>Only Title</title>")); 132 + const m = await fetchURLMetadata("https://example.com"); 133 + expect(m.title).toBe("Only Title"); 134 + expect(m.description).toBeUndefined(); 135 + expect(m.imageUrl).toBeUndefined(); 136 + expect(m.siteName).toBeUndefined(); 137 + expect(m.author).toBeUndefined(); 138 + }); 139 + 140 + it('defaults type to "link" when og:type is absent', async () => { 141 + mockResponse(html("<title>T</title>")); 142 + const m = await fetchURLMetadata("https://example.com"); 143 + expect(m.type).toBe("link"); 144 + }); 145 + 146 + it("captures final URL after redirects", async () => { 147 + mockResponse(html("<title>R</title>"), "https://example.com/final"); 148 + const m = await fetchURLMetadata("https://example.com/start"); 149 + expect(m.url).toBe("https://example.com/final"); 150 + }); 151 + 152 + it("throws when assertPublicUrl rejects the URL", async () => { 153 + mockAssert.mockRejectedValueOnce(new Error("private network")); 154 + await expect(fetchURLMetadata("http://127.0.0.1")).rejects.toThrow("private network"); 155 + }); 156 + 157 + it("handles single-quoted meta attributes", async () => { 158 + mockResponse(html("<meta property='og:title' content='Single Quoted'>")); 159 + const m = await fetchURLMetadata("https://example.com"); 160 + expect(m.title).toBe("Single Quoted"); 161 + }); 162 + 163 + it("works when </head> is missing (uses full buffer)", async () => { 164 + mockResponse("<html><head><title>No Close</title><body></body></html>"); 165 + const m = await fetchURLMetadata("https://example.com"); 166 + expect(m.title).toBe("No Close"); 167 + }); 168 + 169 + it("returns minimal metadata for non-HTML content types", async () => { 170 + mockResponse("not-html-bytes", "https://example.com/file.pdf", "application/pdf"); 171 + const m = await fetchURLMetadata("https://example.com/file.pdf"); 172 + expect(m).toEqual({ url: "https://example.com/file.pdf", type: "link" }); 173 + }); 174 + 175 + it("re-validates each redirect hop with assertPublicUrl", async () => { 176 + mockRedirect("https://intermediate.example.com/next"); 177 + mockRedirect("https://final.example.com/page"); 178 + mockResponse(html("<title>Final</title>"), "https://final.example.com/page"); 179 + 180 + const m = await fetchURLMetadata("https://example.com/start"); 181 + expect(m.title).toBe("Final"); 182 + expect(mockAssert).toHaveBeenCalledTimes(3); 183 + expect(mockAssert).toHaveBeenNthCalledWith(1, "https://example.com/start"); 184 + expect(mockAssert).toHaveBeenNthCalledWith(2, "https://intermediate.example.com/next"); 185 + expect(mockAssert).toHaveBeenNthCalledWith(3, "https://final.example.com/page"); 186 + }); 187 + 188 + it("rejects redirect to a private network", async () => { 189 + mockRedirect("http://169.254.169.254/latest/meta-data/"); 190 + mockAssert.mockResolvedValueOnce(new URL("https://example.com")); // first hop OK 191 + mockAssert.mockRejectedValueOnce(new Error("private network")); // redirect target rejected 192 + 193 + await expect(fetchURLMetadata("https://example.com/start")).rejects.toThrow("private network"); 194 + }); 195 + 196 + it("throws on too many redirects", async () => { 197 + for (let i = 0; i < 10; i++) { 198 + mockRedirect(`https://example.com/hop${i}`); 199 + } 200 + await expect(fetchURLMetadata("https://example.com/start")).rejects.toThrow( 201 + "Too many redirects", 202 + ); 203 + }); 204 + 205 + it("sends Accept-Encoding: identity to disable transparent decompression", async () => { 206 + mockResponse(html("<title>X</title>")); 207 + await fetchURLMetadata("https://example.com"); 208 + const init = mockFetch.mock.calls[0]![1]!; 209 + expect((init.headers as Record<string, string>)["Accept-Encoding"]).toBe("identity"); 210 + }); 211 + 212 + it("sends redirect: manual so SSRF guard runs per hop", async () => { 213 + mockResponse(html("<title>X</title>")); 214 + await fetchURLMetadata("https://example.com"); 215 + const init = mockFetch.mock.calls[0]![1]!; 216 + expect(init.redirect).toBe("manual"); 217 + }); 218 + 219 + it("caps title length", async () => { 220 + const long = "a".repeat(500); 221 + mockResponse(html(`<title>${long}</title>`)); 222 + const m = await fetchURLMetadata("https://example.com"); 223 + expect(m.title!.length).toBe(300); 224 + }); 225 + 226 + it("caps description length", async () => { 227 + const long = "b".repeat(2000); 228 + mockResponse(html(`<meta property="og:description" content="${long}">`)); 229 + const m = await fetchURLMetadata("https://example.com"); 230 + expect(m.description!.length).toBe(1000); 231 + }); 232 + 233 + it("rejects javascript: scheme in og:image", async () => { 234 + mockResponse(html(`<meta property="og:image" content="javascript:alert(1)">`)); 235 + const m = await fetchURLMetadata("https://example.com"); 236 + expect(m.imageUrl).toBeUndefined(); 237 + }); 238 + 239 + it("rejects data: scheme in og:image", async () => { 240 + mockResponse(html(`<meta property="og:image" content="data:image/png;base64,AAAA">`)); 241 + const m = await fetchURLMetadata("https://example.com"); 242 + expect(m.imageUrl).toBeUndefined(); 243 + }); 244 + 245 + it("resolves relative og:image against the page URL", async () => { 246 + mockResponse( 247 + html(`<meta property="og:image" content="/img/a.jpg">`), 248 + "https://example.com/page", 249 + ); 250 + const m = await fetchURLMetadata("https://example.com/page"); 251 + expect(m.imageUrl).toBe("https://example.com/img/a.jpg"); 252 + }); 253 + 254 + it("strips control characters from extracted fields", async () => { 255 + mockResponse(html("<title>Hi�there</title>")); 256 + const m = await fetchURLMetadata("https://example.com"); 257 + expect(m.title).toBe("Hithere"); 258 + }); 259 + });
+208
lib/url-metadata.ts
··· 1 + import { assertPublicUrl } from "./url-guard.js"; 2 + import { fetchWithRetry } from "./pds/fetch-with-retry.js"; 3 + import { config } from "./config.js"; 4 + 5 + export type UrlMetadata = { 6 + url: string; 7 + title?: string; 8 + description?: string; 9 + imageUrl?: string; 10 + siteName?: string; 11 + author?: string; 12 + type?: string; 13 + }; 14 + 15 + const MAX_HEAD_BYTES = 32_768; 16 + const TIMEOUT_MS = 5_000; 17 + const MAX_REDIRECTS = 5; 18 + 19 + /** Caps on extracted metadata strings before they are written to the user's 20 + * PDS. The remote site is untrusted; without caps a hostile page can stuff 21 + * tens of KB into a single field, and downstream consumers may render 22 + * control characters or injected markup verbatim. */ 23 + const FIELD_CAPS = { 24 + title: 300, 25 + description: 1_000, 26 + siteName: 200, 27 + author: 200, 28 + type: 64, 29 + imageUrl: 2_048, 30 + } as const; 31 + 32 + const TITLE_RE = /<title[^>]*>([\s\S]*?)<\/title>/i; 33 + const META_RE = /<meta\s[^>]*>/gi; 34 + const ATTR_RE = /\b(name|property|content)\s*=\s*(?:"([^"]*)"|'([^']*)')/gi; 35 + 36 + const NAMED_ENTITIES: Record<string, string> = { 37 + amp: "&", 38 + lt: "<", 39 + gt: ">", 40 + quot: '"', 41 + apos: "'", 42 + nbsp: " ", 43 + }; 44 + const ENTITY_RE = /&(?:#x([0-9a-f]+)|#([0-9]+)|([a-z]+));/gi; 45 + 46 + function decodeEntities(s: string): string { 47 + return s.replace(ENTITY_RE, (m, hex, dec, name) => { 48 + let cp: number | undefined; 49 + if (hex) cp = parseInt(hex, 16); 50 + else if (dec) cp = parseInt(dec, 10); 51 + else if (name) { 52 + const v = NAMED_ENTITIES[name.toLowerCase()]; 53 + return v ?? m; 54 + } 55 + if (cp === undefined || !Number.isFinite(cp) || cp < 0 || cp > 0x10ffff) return m; 56 + try { 57 + return String.fromCodePoint(cp); 58 + } catch { 59 + return m; 60 + } 61 + }); 62 + } 63 + 64 + /** Strip control characters (except tab/newline) and collapse whitespace. */ 65 + function sanitize(s: string): string { 66 + // eslint-disable-next-line no-control-regex 67 + return s.replace(/[\x00-\x08\x0b\x0c\x0e-\x1f\x7f]/g, "").trim(); 68 + } 69 + 70 + function clean(raw: string | undefined, max: number): string | undefined { 71 + if (raw === undefined) return undefined; 72 + const decoded = sanitize(decodeEntities(raw)); 73 + if (!decoded) return undefined; 74 + if (decoded.length <= max) return decoded; 75 + let sliced = decoded.slice(0, max); 76 + // Avoid leaving a lone high surrogate from a split astral codepoint. 77 + const last = sliced.charCodeAt(sliced.length - 1); 78 + if (last >= 0xd800 && last <= 0xdbff) sliced = sliced.slice(0, -1); 79 + return sliced; 80 + } 81 + 82 + function parseMeta(html: string): Map<string, string> { 83 + const map = new Map<string, string>(); 84 + for (const tag of html.matchAll(META_RE)) { 85 + let nameOrProp = ""; 86 + let content = ""; 87 + for (const attr of tag[0].matchAll(ATTR_RE)) { 88 + const key = attr[1]!.toLowerCase(); 89 + const val = attr[2] ?? attr[3] ?? ""; 90 + if (key === "content") content = val; 91 + else nameOrProp = val.toLowerCase(); 92 + } 93 + if (nameOrProp && content && !map.has(nameOrProp)) { 94 + map.set(nameOrProp, content); 95 + } 96 + } 97 + return map; 98 + } 99 + 100 + async function readCapped(res: Response, maxBytes: number): Promise<string> { 101 + if (!res.body) return ""; 102 + const reader = res.body.getReader(); 103 + const decoder = new TextDecoder("utf-8", { fatal: false }); 104 + let received = 0; 105 + let out = ""; 106 + try { 107 + while (received < maxBytes) { 108 + const { done, value } = await reader.read(); 109 + if (done) break; 110 + received += value.byteLength; 111 + out += decoder.decode(value, { stream: true }); 112 + } 113 + out += decoder.decode(); 114 + } finally { 115 + await reader.cancel().catch(() => {}); 116 + } 117 + return out; 118 + } 119 + 120 + /** Follow redirects manually so SSRF guard runs on every hop. Native 121 + * `fetch(redirect: "follow")` only resolves the initial host, letting a 122 + * public site 302 to 169.254.169.254 (cloud metadata) or 127.0.0.1. */ 123 + async function fetchFollowingRedirects(rawUrl: string): Promise<Response> { 124 + let current = rawUrl; 125 + for (let hop = 0; hop <= MAX_REDIRECTS; hop++) { 126 + await assertPublicUrl(current); 127 + const res = await fetchWithRetry(current, { 128 + timeoutMs: TIMEOUT_MS, 129 + redirect: "manual", 130 + headers: { 131 + "User-Agent": `Airglow (+${config.publicUrl})`, 132 + Accept: "text/html,*/*;q=0.1", 133 + // Disable transparent decompression: a tiny gzip payload can decode to 134 + // gigabytes before our 32KB cap kicks in on decoded bytes. 135 + "Accept-Encoding": "identity", 136 + }, 137 + }); 138 + if (res.status >= 300 && res.status < 400) { 139 + const location = res.headers.get("location"); 140 + await res.body?.cancel().catch(() => {}); 141 + if (!location) return res; 142 + try { 143 + current = new URL(location, current).toString(); 144 + } catch { 145 + throw new Error("Invalid redirect target"); 146 + } 147 + continue; 148 + } 149 + return res; 150 + } 151 + throw new Error("Too many redirects"); 152 + } 153 + 154 + export async function fetchURLMetadata(rawUrl: string): Promise<UrlMetadata> { 155 + const res = await fetchFollowingRedirects(rawUrl); 156 + 157 + const finalUrl = res.url || rawUrl; 158 + const contentType = res.headers.get("content-type")?.toLowerCase() ?? ""; 159 + 160 + if (contentType && !contentType.includes("html")) { 161 + await res.body?.cancel().catch(() => {}); 162 + return { url: finalUrl, type: "link" }; 163 + } 164 + 165 + const html = await readCapped(res, MAX_HEAD_BYTES); 166 + 167 + const headMatch = html.match(/<head[\s>][\s\S]*?<\/head>/i); 168 + const head = headMatch ? headMatch[0] : html; 169 + 170 + const meta = parseMeta(head); 171 + const titleMatch = head.match(TITLE_RE); 172 + const rawTitle = titleMatch ? titleMatch[1] : undefined; 173 + 174 + const result: UrlMetadata = { url: finalUrl }; 175 + 176 + result.title = clean( 177 + meta.get("og:title") ?? meta.get("twitter:title") ?? rawTitle, 178 + FIELD_CAPS.title, 179 + ); 180 + result.description = clean( 181 + meta.get("og:description") ?? meta.get("twitter:description") ?? meta.get("description"), 182 + FIELD_CAPS.description, 183 + ); 184 + result.imageUrl = sanitizeImageUrl( 185 + clean(meta.get("og:image") ?? meta.get("twitter:image"), FIELD_CAPS.imageUrl), 186 + finalUrl, 187 + ); 188 + result.siteName = clean(meta.get("og:site_name"), FIELD_CAPS.siteName); 189 + result.author = clean(meta.get("article:author") ?? meta.get("author"), FIELD_CAPS.author); 190 + result.type = clean(meta.get("og:type"), FIELD_CAPS.type) ?? "link"; 191 + 192 + return result; 193 + } 194 + 195 + /** Only accept http(s) image URLs. Rejects javascript:/data:/file: schemes 196 + * and anything that fails URL parsing. Resolves relative URLs against the 197 + * page URL. */ 198 + function sanitizeImageUrl(raw: string | undefined, base: string): string | undefined { 199 + if (!raw) return undefined; 200 + let parsed: URL; 201 + try { 202 + parsed = new URL(raw, base); 203 + } catch { 204 + return undefined; 205 + } 206 + if (parsed.protocol !== "http:" && parsed.protocol !== "https:") return undefined; 207 + return parsed.toString(); 208 + }