Monorepo for wisp.place. A static site hosting service built on top of the AT Protocol. wisp.place
86
fork

Configure Feed

Select the types of activity you want to include in your feed.

fix cache invalidation token parsing and stabilize html rewriting to be byte based

+255 -54
+2 -1
apps/firehose-service/src/lib/cache-invalidation.ts
··· 48 48 did: string, 49 49 rkey: string, 50 50 action: 'updating' | 'update' | 'delete' | 'settings', 51 + token?: string, 51 52 ): Promise<void> { 52 53 const redis = getPublisher() 53 54 if (!redis) return 54 55 55 56 try { 56 - const message = JSON.stringify({ did, rkey, action }) 57 + const message = JSON.stringify({ did, rkey, action, token }) 57 58 logger.debug(`[CacheInvalidation] Publishing ${action} for ${did}/${rkey} to ${CHANNEL}`) 58 59 await redis.publish(CHANNEL, message) 59 60 } catch (err) {
+27 -18
apps/firehose-service/src/lib/cache-writer.ts
··· 563 563 564 564 // If HTML, also write rewritten version 565 565 if (isHtmlContent(file.path)) { 566 - const basePath = `/${did}/${rkey}/` 567 - let rewriteSource = content 568 - if (encoding === 'gzip' && content.length >= 2 && content[0] === 0x1f && content[1] === 0x8b) { 569 - try { 570 - rewriteSource = gunzipSync(content) 571 - } catch (error) { 572 - logger.error(`Failed to decompress ${file.path} for rewrite, using raw content`, error) 566 + try { 567 + const basePath = `/${did}/${rkey}/` 568 + let rewriteSource = content 569 + if (encoding === 'gzip' && content.length >= 2 && content[0] === 0x1f && content[1] === 0x8b) { 570 + try { 571 + rewriteSource = gunzipSync(content) 572 + } catch (error) { 573 + logger.error(`Failed to decompress ${file.path} for rewrite, using raw content`, error) 574 + } 573 575 } 574 - } 575 576 576 - const htmlString = new TextDecoder().decode(rewriteSource) 577 - const rewritten = await rewriteHtmlPaths(htmlString, basePath) 578 - const rewrittenContent = new TextEncoder().encode(rewritten) 577 + const htmlString = new TextDecoder().decode(rewriteSource) 578 + const rewritten = await rewriteHtmlPaths(htmlString, basePath) 579 + const rewrittenContent = new TextEncoder().encode(rewritten) 579 580 580 - const rewrittenKey = `${did}/${rkey}/.rewritten/${file.path}` 581 - await writeFile(rewrittenKey, rewrittenContent, { mimeType: 'text/html' }) 582 - logger.debug(`Wrote rewritten HTML: ${rewrittenKey}`) 581 + const rewrittenKey = `${did}/${rkey}/.rewritten/${file.path}` 582 + await writeFile(rewrittenKey, rewrittenContent, { mimeType: 'text/html' }) 583 + logger.debug(`Wrote rewritten HTML: ${rewrittenKey}`) 584 + } catch (error) { 585 + logger.error(`Failed to cache rewritten HTML for ${file.path}; continuing with original`, error, { 586 + did, 587 + rkey, 588 + path: file.path, 589 + }) 590 + } 583 591 } 584 592 585 593 logger.debug(`Stored ${file.path} (${content.length} bytes)`) ··· 655 663 656 664 // Notify hosting-service that this site is about to be updated so it can 657 665 // show the "updating" page instead of serving stale or partially-updated files. 666 + const invalidationToken = !options?.skipInvalidation ? crypto.randomUUID() : undefined 658 667 if (!options?.skipInvalidation) { 659 - await publishCacheInvalidation(did, rkey, 'updating') 668 + await publishCacheInvalidation(did, rkey, 'updating', invalidationToken) 660 669 } 661 670 662 671 // Compare CIDs to determine what to download/delete ··· 783 792 }) 784 793 785 794 if (!options?.skipInvalidation) { 786 - await publishCacheInvalidation(did, rkey, 'update').catch(() => undefined) 795 + await publishCacheInvalidation(did, rkey, 'update', invalidationToken).catch(() => undefined) 787 796 } 788 797 789 798 if (allRetryBackoffed && retryBackoffUntil) { ··· 807 816 })), 808 817 }) 809 818 if (!options?.skipInvalidation) { 810 - await publishCacheInvalidation(did, rkey, 'update').catch(() => undefined) 819 + await publishCacheInvalidation(did, rkey, 'update', invalidationToken).catch(() => undefined) 811 820 } 812 821 throw new Error(`Failed to delete files for ${did}/${rkey}`) 813 822 } ··· 833 842 // Notify hosting-service to invalidate its local caches (including negative 404 cache) 834 843 // (skip for backfill since it runs before the hosting-service serves traffic) 835 844 if (!options?.skipInvalidation) { 836 - await publishCacheInvalidation(did, rkey, 'update') 845 + await publishCacheInvalidation(did, rkey, 'update', invalidationToken) 837 846 } 838 847 839 848 logger.info(`Successfully cached site ${did}/${rkey}`)
+16
apps/firehose-service/src/lib/revalidate-worker.test.ts
··· 1 + import { describe, expect, test } from 'bun:test' 2 + import { shouldSkipInvalidationForReason } from './revalidate-worker' 3 + 4 + describe('shouldSkipInvalidationForReason', () => { 5 + test('skips invalidation for rewrite repair jobs', () => { 6 + expect(shouldSkipInvalidationForReason('rewrite-miss:docs/w/~/index.html')).toBe(true) 7 + }) 8 + 9 + test('does not skip invalidation for storage misses', () => { 10 + expect(shouldSkipInvalidationForReason('storage-miss:docs/raw/README.md')).toBe(false) 11 + }) 12 + 13 + test('does not skip invalidation for other revalidate reasons', () => { 14 + expect(shouldSkipInvalidationForReason('manual')).toBe(false) 15 + }) 16 + })
+8 -1
apps/firehose-service/src/lib/revalidate-worker.ts
··· 37 37 return fields 38 38 } 39 39 40 + export function shouldSkipInvalidationForReason(reason: string): boolean { 41 + // Rewrite repairs only repopulate `.rewritten/*` HTML variants. They should not 42 + // flip the whole site into "updating" while the original files remain serveable. 43 + return reason.startsWith('rewrite-miss') 44 + } 45 + 40 46 async function processMessage(id: string, rawFields: string[]): Promise<void> { 41 47 if (!redis) return 42 48 ··· 71 77 // For storage-miss events, force re-download all files since storage is empty 72 78 const forceDownload = reason.startsWith('storage-miss') 73 79 const forceRewriteHtml = reason.startsWith('rewrite-miss') 80 + const skipInvalidation = shouldSkipInvalidationForReason(reason) 74 81 75 82 try { 76 83 await handleSiteCreateOrUpdate(did, rkey, record.record, record.cid, { 77 - skipInvalidation: false, 84 + skipInvalidation, 78 85 forceDownload, 79 86 forceRewriteHtml, 80 87 })
+50
apps/hosting-service/src/lib/cache-invalidation.test.ts
··· 1 + import { beforeEach, describe, expect, test } from 'bun:test' 2 + import { 3 + clearSiteUpdating, 4 + isSiteUpdating, 5 + markSiteUpdating, 6 + parseCacheInvalidationMessage, 7 + resetUpdatingSitesForTests, 8 + } from './cache-invalidation' 9 + 10 + const DID = 'did:plc:test' 11 + const RKEY = 'site' 12 + 13 + describe('cache invalidation updating state', () => { 14 + beforeEach(() => { 15 + resetUpdatingSitesForTests() 16 + }) 17 + 18 + test('stale token cannot clear a newer update', () => { 19 + markSiteUpdating(DID, RKEY, 'token-a') 20 + markSiteUpdating(DID, RKEY, 'token-b') 21 + 22 + expect(clearSiteUpdating(DID, RKEY, 'token-a')).toBe(false) 23 + expect(isSiteUpdating(DID, RKEY)).toBe(true) 24 + }) 25 + 26 + test('matching token clears the active update', () => { 27 + markSiteUpdating(DID, RKEY, 'token-a') 28 + 29 + expect(clearSiteUpdating(DID, RKEY, 'token-a')).toBe(true) 30 + expect(isSiteUpdating(DID, RKEY)).toBe(false) 31 + }) 32 + 33 + test('unversioned clear remains backward compatible', () => { 34 + markSiteUpdating(DID, RKEY, 'token-a') 35 + 36 + expect(clearSiteUpdating(DID, RKEY)).toBe(true) 37 + expect(isSiteUpdating(DID, RKEY)).toBe(false) 38 + }) 39 + 40 + test('message parsing preserves token', () => { 41 + expect( 42 + parseCacheInvalidationMessage(JSON.stringify({ did: DID, rkey: RKEY, action: 'update', token: 'token-a' })), 43 + ).toEqual({ 44 + did: DID, 45 + rkey: RKEY, 46 + action: 'update', 47 + token: 'token-a', 48 + }) 49 + }) 50 + })
+69 -14
apps/hosting-service/src/lib/cache-invalidation.ts
··· 16 16 17 17 const CHANNEL = 'wisp:cache-invalidate' 18 18 19 + type CacheInvalidationAction = 'updating' | 'update' | 'delete' | 'settings' 20 + 21 + export interface CacheInvalidationMessage { 22 + did: string 23 + rkey: string 24 + action: CacheInvalidationAction 25 + token?: string 26 + } 27 + 19 28 // Sites currently being downloaded by the firehose-service. 20 - // Maps `${did}/${rkey}` → timestamp when the update started. 29 + // Maps `${did}/${rkey}` → current update token and timestamp. 21 30 // Used to show an "updating" page instead of serving stale files. 22 31 const UPDATING_TTL_MS = 10 * 60 * 1000 // 10 minutes safety timeout 23 - const updatingSites = new Map<string, number>() 32 + const updatingSites = new Map<string, { since: number; token?: string }>() 24 33 25 34 export function isSiteUpdating(did: string, rkey: string): boolean { 26 35 const key = `${did}/${rkey}` 27 - const since = updatingSites.get(key) 28 - if (since === undefined) return false 29 - if (Date.now() - since > UPDATING_TTL_MS) { 36 + const state = updatingSites.get(key) 37 + if (state === undefined) return false 38 + if (Date.now() - state.since > UPDATING_TTL_MS) { 30 39 // Firehose must have crashed; remove the stale entry 31 40 updatingSites.delete(key) 32 41 return false 33 42 } 34 43 return true 44 + } 45 + 46 + export function markSiteUpdating(did: string, rkey: string, token?: string): void { 47 + updatingSites.set(`${did}/${rkey}`, { since: Date.now(), token }) 48 + } 49 + 50 + export function clearSiteUpdating(did: string, rkey: string, token?: string): boolean { 51 + const key = `${did}/${rkey}` 52 + const state = updatingSites.get(key) 53 + if (!state) return false 54 + 55 + // Unversioned clears are treated as unconditional for compatibility. 56 + // Versioned clears only succeed if they match the active update token. 57 + if (token && state.token && state.token !== token) { 58 + return false 59 + } 60 + 61 + updatingSites.delete(key) 62 + return true 63 + } 64 + 65 + export function resetUpdatingSitesForTests(): void { 66 + updatingSites.clear() 35 67 } 36 68 37 69 let subscriber: Redis | null = null 38 70 71 + export function parseCacheInvalidationMessage(message: string): CacheInvalidationMessage | null { 72 + const parsed = JSON.parse(message) as Partial<CacheInvalidationMessage> 73 + 74 + if ( 75 + typeof parsed.did !== 'string' || 76 + typeof parsed.rkey !== 'string' || 77 + (parsed.action !== 'updating' && 78 + parsed.action !== 'update' && 79 + parsed.action !== 'delete' && 80 + parsed.action !== 'settings') 81 + ) { 82 + return null 83 + } 84 + 85 + return { 86 + did: parsed.did, 87 + rkey: parsed.rkey, 88 + action: parsed.action, 89 + token: typeof parsed.token === 'string' ? parsed.token : undefined, 90 + } 91 + } 92 + 39 93 /** 40 94 * Directly invalidate a tier by listing and deleting all keys with the given prefix. 41 95 * Each tier is invalidated independently so a failure in one doesn't block the others. ··· 87 141 88 142 subscriber.on('message', async (_channel: string, message: string) => { 89 143 try { 90 - const { did, rkey, action } = JSON.parse(message) as { 91 - did: string 92 - rkey: string 93 - action: 'updating' | 'update' | 'delete' | 'settings' 94 - } 95 - 96 - if (!did || !rkey) { 144 + const parsed = parseCacheInvalidationMessage(message) 145 + if (!parsed) { 97 146 console.warn('[CacheInvalidation] Invalid message:', message) 98 147 return 99 148 } 100 149 150 + const { did, rkey, action, token } = parsed 151 + 101 152 console.log(`[CacheInvalidation] Received ${action} for ${did}/${rkey}`) 102 153 103 154 if (action === 'updating') { 104 155 // Firehose is about to download new files — mark site as updating 105 - updatingSites.set(`${did}/${rkey}`, Date.now()) 156 + markSiteUpdating(did, rkey, token) 106 157 console.log(`[CacheInvalidation] Marked ${did}/${rkey} as updating`) 107 158 return 108 159 } 109 160 110 161 // For update/delete/settings: clear the updating flag and invalidate caches 111 - updatingSites.delete(`${did}/${rkey}`) 162 + const cleared = clearSiteUpdating(did, rkey, token) 163 + if (!cleared && action === 'update' && token) { 164 + console.log(`[CacheInvalidation] Ignored stale update clear for ${did}/${rkey}`) 165 + return 166 + } 112 167 113 168 const prefix = `${did}/${rkey}/` 114 169
+1 -1
cli/package.json
··· 1 1 { 2 2 "name": "wispctl", 3 - "version": "1.1.1", 3 + "version": "1.1.2", 4 4 "main": "./dist/index.js", 5 5 "devDependencies": { 6 6 "@atproto/api": "^0.18.17",
+82 -19
packages/@wispplace/fs-utils/src/html-rewriter.ts
··· 2 2 * Rewrites root-relative URL attributes in an HTML document so it serves correctly 3 3 * from a `basePath` (e.g. `/did/rkey/`) instead of the site root. 4 4 * 5 - * Uses Bun's streaming `HTMLRewriter`: only the attribute bytes we target are replaced; 6 - * everything else (text, inline `<script>`/`<style>`, custom elements like `<md-block>`, 7 - * unbalanced markup, HTML-looking content inside Markdown code spans) passes through 8 - * byte-for-byte. No DOM construction, no re-serialisation. 5 + * Uses a byte-oriented tag scanner instead of a DOM parser so we can rewrite the 6 + * attributes we care about without reserializing the entire document. Raw-text 7 + * elements like `<script>` and `<style>` are copied through unchanged. 9 8 */ 10 9 11 10 const REWRITABLE_ATTRS: Record<string, 'url' | 'srcset'> = { ··· 16 15 poster: 'url', 17 16 srcset: 'srcset', 18 17 } 18 + const RAW_TEXT_TAGS = new Set(['script', 'style']) 19 19 20 20 function isRootRelative(url: string): boolean { 21 21 if (!url || !url.startsWith('/')) return false ··· 45 45 .join(', ') 46 46 } 47 47 48 + function escapeRegExp(value: string): string { 49 + return value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&') 50 + } 51 + 52 + function rewriteTagAttributes(tagSource: string, basePath: string): string { 53 + return tagSource.replace( 54 + /\b(src|href|action|data|poster|srcset)\s*=\s*("([^"]*)"|'([^']*)'|([^\s"'=<>`]+))/gi, 55 + (match, attr: string, _rawValue: string, doubleQuoted?: string, singleQuoted?: string, bare?: string) => { 56 + const value = doubleQuoted ?? singleQuoted ?? bare ?? '' 57 + const rewritten = 58 + REWRITABLE_ATTRS[attr.toLowerCase()] === 'srcset' ? rewriteSrcset(value, basePath) : rewriteUrl(value, basePath) 59 + if (rewritten === value) return match 60 + if (doubleQuoted !== undefined) return `${attr}="${rewritten}"` 61 + if (singleQuoted !== undefined) return `${attr}='${rewritten}'` 62 + return `${attr}=${rewritten}` 63 + }, 64 + ) 65 + } 66 + 67 + function rewriteHtmlPathsFallback(html: string, basePath: string): string { 68 + let output = '' 69 + let cursor = 0 70 + 71 + while (cursor < html.length) { 72 + const tagStart = html.indexOf('<', cursor) 73 + if (tagStart === -1) { 74 + output += html.slice(cursor) 75 + break 76 + } 77 + 78 + output += html.slice(cursor, tagStart) 79 + 80 + if (html.startsWith('<!--', tagStart)) { 81 + const commentEnd = html.indexOf('-->', tagStart + 4) 82 + if (commentEnd === -1) { 83 + output += html.slice(tagStart) 84 + break 85 + } 86 + output += html.slice(tagStart, commentEnd + 3) 87 + cursor = commentEnd + 3 88 + continue 89 + } 90 + 91 + const tagEnd = html.indexOf('>', tagStart + 1) 92 + if (tagEnd === -1) { 93 + output += html.slice(tagStart) 94 + break 95 + } 96 + 97 + const tagSource = html.slice(tagStart, tagEnd + 1) 98 + output += rewriteTagAttributes(tagSource, basePath) 99 + cursor = tagEnd + 1 100 + 101 + const tagNameMatch = /^<\s*([a-zA-Z][\w:-]*)/.exec(tagSource) 102 + const tagName = tagNameMatch?.[1]?.toLowerCase() 103 + const isSelfClosing = /\/\s*>$/.test(tagSource) 104 + if (!tagName || isSelfClosing || !RAW_TEXT_TAGS.has(tagName)) { 105 + continue 106 + } 107 + 108 + const closeTagPattern = new RegExp(`</\\s*${escapeRegExp(tagName)}\\s*>`, 'i') 109 + const remaining = html.slice(cursor) 110 + const closeMatch = closeTagPattern.exec(remaining) 111 + if (!closeMatch || closeMatch.index === undefined) { 112 + output += remaining 113 + break 114 + } 115 + 116 + const closeStart = cursor + closeMatch.index 117 + output += html.slice(cursor, closeStart) 118 + output += closeMatch[0] 119 + cursor = closeStart + closeMatch[0].length 120 + } 121 + 122 + return output 123 + } 124 + 48 125 /** 49 126 * Rewrite root-relative paths in an HTML document so it serves correctly from `basePath`. 50 127 * Relative paths (`./foo`, `../foo`, bare filenames) are left alone — browsers resolve ··· 52 129 */ 53 130 export async function rewriteHtmlPaths(html: string, basePath: string): Promise<string> { 54 131 const normalizedBase = basePath.endsWith('/') ? basePath : `${basePath}/` 55 - 56 - const rewriter = new HTMLRewriter().on('*', { 57 - element(el) { 58 - for (const [attr, type] of Object.entries(REWRITABLE_ATTRS)) { 59 - const value = el.getAttribute(attr) 60 - if (value == null) continue 61 - el.setAttribute( 62 - attr, 63 - type === 'srcset' ? rewriteSrcset(value, normalizedBase) : rewriteUrl(value, normalizedBase), 64 - ) 65 - } 66 - }, 67 - }) 68 - 69 - return await rewriter.transform(new Response(html)).text() 132 + return rewriteHtmlPathsFallback(html, normalizedBase) 70 133 } 71 134 72 135 /** Returns true if the file looks like HTML by content-type or extension. */