Monorepo for wisp.place. A static site hosting service built on top of the AT Protocol.
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

fix cache invalidation token parsing and stabilize html rewriting to be byte based

+255 -54
+2 -1
apps/firehose-service/src/lib/cache-invalidation.ts
··· 48 48 did: string, 49 49 rkey: string, 50 50 action: 'updating' | 'update' | 'delete' | 'settings', 51 + token?: string, 51 52 ): Promise<void> { 52 53 const redis = getPublisher() 53 54 if (!redis) return 54 55 55 56 try { 56 - const message = JSON.stringify({ did, rkey, action }) 57 + const message = JSON.stringify({ did, rkey, action, token }) 57 58 logger.debug(`[CacheInvalidation] Publishing ${action} for ${did}/${rkey} to ${CHANNEL}`) 58 59 await redis.publish(CHANNEL, message) 59 60 } catch (err) {
+27 -18
apps/firehose-service/src/lib/cache-writer.ts
··· 563 563 564 564 // If HTML, also write rewritten version 565 565 if (isHtmlContent(file.path)) { 566 - const basePath = `/${did}/${rkey}/` 567 - let rewriteSource = content 568 - if (encoding === 'gzip' && content.length >= 2 && content[0] === 0x1f && content[1] === 0x8b) { 569 - try { 570 - rewriteSource = gunzipSync(content) 571 - } catch (error) { 572 - logger.error(`Failed to decompress ${file.path} for rewrite, using raw content`, error) 566 + try { 567 + const basePath = `/${did}/${rkey}/` 568 + let rewriteSource = content 569 + if (encoding === 'gzip' && content.length >= 2 && content[0] === 0x1f && content[1] === 0x8b) { 570 + try { 571 + rewriteSource = gunzipSync(content) 572 + } catch (error) { 573 + logger.error(`Failed to decompress ${file.path} for rewrite, using raw content`, error) 574 + } 573 575 } 574 - } 575 576 576 - const htmlString = new TextDecoder().decode(rewriteSource) 577 - const rewritten = await rewriteHtmlPaths(htmlString, basePath) 578 - const rewrittenContent = new TextEncoder().encode(rewritten) 577 + const htmlString = new TextDecoder().decode(rewriteSource) 578 + const rewritten = await rewriteHtmlPaths(htmlString, basePath) 579 + const rewrittenContent = new TextEncoder().encode(rewritten) 579 580 580 - const rewrittenKey = `${did}/${rkey}/.rewritten/${file.path}` 581 - await writeFile(rewrittenKey, rewrittenContent, { mimeType: 'text/html' }) 582 - logger.debug(`Wrote rewritten HTML: ${rewrittenKey}`) 581 + const rewrittenKey = `${did}/${rkey}/.rewritten/${file.path}` 582 + await writeFile(rewrittenKey, rewrittenContent, { mimeType: 'text/html' }) 583 + logger.debug(`Wrote rewritten HTML: ${rewrittenKey}`) 584 + } catch (error) { 585 + logger.error(`Failed to cache rewritten HTML for ${file.path}; continuing with original`, error, { 586 + did, 587 + rkey, 588 + path: file.path, 589 + }) 590 + } 583 591 } 584 592 585 593 logger.debug(`Stored ${file.path} (${content.length} bytes)`) ··· 655 663 656 664 // Notify hosting-service that this site is about to be updated so it can 657 665 // show the "updating" page instead of serving stale or partially-updated files. 666 + const invalidationToken = !options?.skipInvalidation ? crypto.randomUUID() : undefined 658 667 if (!options?.skipInvalidation) { 659 - await publishCacheInvalidation(did, rkey, 'updating') 668 + await publishCacheInvalidation(did, rkey, 'updating', invalidationToken) 660 669 } 661 670 662 671 // Compare CIDs to determine what to download/delete ··· 783 792 }) 784 793 785 794 if (!options?.skipInvalidation) { 786 - await publishCacheInvalidation(did, rkey, 'update').catch(() => undefined) 795 + await publishCacheInvalidation(did, rkey, 'update', invalidationToken).catch(() => undefined) 787 796 } 788 797 789 798 if (allRetryBackoffed && retryBackoffUntil) { ··· 807 816 })), 808 817 }) 809 818 if (!options?.skipInvalidation) { 810 - await publishCacheInvalidation(did, rkey, 'update').catch(() => undefined) 819 + await publishCacheInvalidation(did, rkey, 'update', invalidationToken).catch(() => undefined) 811 820 } 812 821 throw new Error(`Failed to delete files for ${did}/${rkey}`) 813 822 } ··· 833 842 // Notify hosting-service to invalidate its local caches (including negative 404 cache) 834 843 // (skip for backfill since it runs before the hosting-service serves traffic) 835 844 if (!options?.skipInvalidation) { 836 - await publishCacheInvalidation(did, rkey, 'update') 845 + await publishCacheInvalidation(did, rkey, 'update', invalidationToken) 837 846 } 838 847 839 848 logger.info(`Successfully cached site ${did}/${rkey}`)
+16
apps/firehose-service/src/lib/revalidate-worker.test.ts
··· 1 + import { describe, expect, test } from 'bun:test' 2 + import { shouldSkipInvalidationForReason } from './revalidate-worker' 3 + 4 + describe('shouldSkipInvalidationForReason', () => { 5 + test('skips invalidation for rewrite repair jobs', () => { 6 + expect(shouldSkipInvalidationForReason('rewrite-miss:docs/w/~/index.html')).toBe(true) 7 + }) 8 + 9 + test('does not skip invalidation for storage misses', () => { 10 + expect(shouldSkipInvalidationForReason('storage-miss:docs/raw/README.md')).toBe(false) 11 + }) 12 + 13 + test('does not skip invalidation for other revalidate reasons', () => { 14 + expect(shouldSkipInvalidationForReason('manual')).toBe(false) 15 + }) 16 + })
+8 -1
apps/firehose-service/src/lib/revalidate-worker.ts
··· 37 37 return fields 38 38 } 39 39 40 + export function shouldSkipInvalidationForReason(reason: string): boolean { 41 + // Rewrite repairs only repopulate `.rewritten/*` HTML variants. They should not 42 + // flip the whole site into "updating" while the original files remain serveable. 43 + return reason.startsWith('rewrite-miss') 44 + } 45 + 40 46 async function processMessage(id: string, rawFields: string[]): Promise<void> { 41 47 if (!redis) return 42 48 ··· 71 77 // For storage-miss events, force re-download all files since storage is empty 72 78 const forceDownload = reason.startsWith('storage-miss') 73 79 const forceRewriteHtml = reason.startsWith('rewrite-miss') 80 + const skipInvalidation = shouldSkipInvalidationForReason(reason) 74 81 75 82 try { 76 83 await handleSiteCreateOrUpdate(did, rkey, record.record, record.cid, { 77 - skipInvalidation: false, 84 + skipInvalidation, 78 85 forceDownload, 79 86 forceRewriteHtml, 80 87 })
+50
apps/hosting-service/src/lib/cache-invalidation.test.ts
··· 1 + import { beforeEach, describe, expect, test } from 'bun:test' 2 + import { 3 + clearSiteUpdating, 4 + isSiteUpdating, 5 + markSiteUpdating, 6 + parseCacheInvalidationMessage, 7 + resetUpdatingSitesForTests, 8 + } from './cache-invalidation' 9 + 10 + const DID = 'did:plc:test' 11 + const RKEY = 'site' 12 + 13 + describe('cache invalidation updating state', () => { 14 + beforeEach(() => { 15 + resetUpdatingSitesForTests() 16 + }) 17 + 18 + test('stale token cannot clear a newer update', () => { 19 + markSiteUpdating(DID, RKEY, 'token-a') 20 + markSiteUpdating(DID, RKEY, 'token-b') 21 + 22 + expect(clearSiteUpdating(DID, RKEY, 'token-a')).toBe(false) 23 + expect(isSiteUpdating(DID, RKEY)).toBe(true) 24 + }) 25 + 26 + test('matching token clears the active update', () => { 27 + markSiteUpdating(DID, RKEY, 'token-a') 28 + 29 + expect(clearSiteUpdating(DID, RKEY, 'token-a')).toBe(true) 30 + expect(isSiteUpdating(DID, RKEY)).toBe(false) 31 + }) 32 + 33 + test('unversioned clear remains backward compatible', () => { 34 + markSiteUpdating(DID, RKEY, 'token-a') 35 + 36 + expect(clearSiteUpdating(DID, RKEY)).toBe(true) 37 + expect(isSiteUpdating(DID, RKEY)).toBe(false) 38 + }) 39 + 40 + test('message parsing preserves token', () => { 41 + expect( 42 + parseCacheInvalidationMessage(JSON.stringify({ did: DID, rkey: RKEY, action: 'update', token: 'token-a' })), 43 + ).toEqual({ 44 + did: DID, 45 + rkey: RKEY, 46 + action: 'update', 47 + token: 'token-a', 48 + }) 49 + }) 50 + })
+69 -14
apps/hosting-service/src/lib/cache-invalidation.ts
··· 16 16 17 17 const CHANNEL = 'wisp:cache-invalidate' 18 18 19 + type CacheInvalidationAction = 'updating' | 'update' | 'delete' | 'settings' 20 + 21 + export interface CacheInvalidationMessage { 22 + did: string 23 + rkey: string 24 + action: CacheInvalidationAction 25 + token?: string 26 + } 27 + 19 28 // Sites currently being downloaded by the firehose-service. 20 - // Maps `${did}/${rkey}` → timestamp when the update started. 29 + // Maps `${did}/${rkey}` → current update token and timestamp. 21 30 // Used to show an "updating" page instead of serving stale files. 22 31 const UPDATING_TTL_MS = 10 * 60 * 1000 // 10 minutes safety timeout 23 - const updatingSites = new Map<string, number>() 32 + const updatingSites = new Map<string, { since: number; token?: string }>() 24 33 25 34 export function isSiteUpdating(did: string, rkey: string): boolean { 26 35 const key = `${did}/${rkey}` 27 - const since = updatingSites.get(key) 28 - if (since === undefined) return false 29 - if (Date.now() - since > UPDATING_TTL_MS) { 36 + const state = updatingSites.get(key) 37 + if (state === undefined) return false 38 + if (Date.now() - state.since > UPDATING_TTL_MS) { 30 39 // Firehose must have crashed; remove the stale entry 31 40 updatingSites.delete(key) 32 41 return false 33 42 } 34 43 return true 44 + } 45 + 46 + export function markSiteUpdating(did: string, rkey: string, token?: string): void { 47 + updatingSites.set(`${did}/${rkey}`, { since: Date.now(), token }) 48 + } 49 + 50 + export function clearSiteUpdating(did: string, rkey: string, token?: string): boolean { 51 + const key = `${did}/${rkey}` 52 + const state = updatingSites.get(key) 53 + if (!state) return false 54 + 55 + // Unversioned clears are treated as unconditional for compatibility. 56 + // Versioned clears only succeed if they match the active update token. 57 + if (token && state.token && state.token !== token) { 58 + return false 59 + } 60 + 61 + updatingSites.delete(key) 62 + return true 63 + } 64 + 65 + export function resetUpdatingSitesForTests(): void { 66 + updatingSites.clear() 35 67 } 36 68 37 69 let subscriber: Redis | null = null 38 70 71 + export function parseCacheInvalidationMessage(message: string): CacheInvalidationMessage | null { 72 + const parsed = JSON.parse(message) as Partial<CacheInvalidationMessage> 73 + 74 + if ( 75 + typeof parsed.did !== 'string' || 76 + typeof parsed.rkey !== 'string' || 77 + (parsed.action !== 'updating' && 78 + parsed.action !== 'update' && 79 + parsed.action !== 'delete' && 80 + parsed.action !== 'settings') 81 + ) { 82 + return null 83 + } 84 + 85 + return { 86 + did: parsed.did, 87 + rkey: parsed.rkey, 88 + action: parsed.action, 89 + token: typeof parsed.token === 'string' ? parsed.token : undefined, 90 + } 91 + } 92 + 39 93 /** 40 94 * Directly invalidate a tier by listing and deleting all keys with the given prefix. 41 95 * Each tier is invalidated independently so a failure in one doesn't block the others. ··· 87 141 88 142 subscriber.on('message', async (_channel: string, message: string) => { 89 143 try { 90 - const { did, rkey, action } = JSON.parse(message) as { 91 - did: string 92 - rkey: string 93 - action: 'updating' | 'update' | 'delete' | 'settings' 94 - } 95 - 96 - if (!did || !rkey) { 144 + const parsed = parseCacheInvalidationMessage(message) 145 + if (!parsed) { 97 146 console.warn('[CacheInvalidation] Invalid message:', message) 98 147 return 99 148 } 100 149 150 + const { did, rkey, action, token } = parsed 151 + 101 152 console.log(`[CacheInvalidation] Received ${action} for ${did}/${rkey}`) 102 153 103 154 if (action === 'updating') { 104 155 // Firehose is about to download new files — mark site as updating 105 - updatingSites.set(`${did}/${rkey}`, Date.now()) 156 + markSiteUpdating(did, rkey, token) 106 157 console.log(`[CacheInvalidation] Marked ${did}/${rkey} as updating`) 107 158 return 108 159 } 109 160 110 161 // For update/delete/settings: clear the updating flag and invalidate caches 111 - updatingSites.delete(`${did}/${rkey}`) 162 + const cleared = clearSiteUpdating(did, rkey, token) 163 + if (!cleared && action === 'update' && token) { 164 + console.log(`[CacheInvalidation] Ignored stale update clear for ${did}/${rkey}`) 165 + return 166 + } 112 167 113 168 const prefix = `${did}/${rkey}/` 114 169
+1 -1
cli/package.json
··· 1 1 { 2 2 "name": "wispctl", 3 - "version": "1.1.1", 3 + "version": "1.1.2", 4 4 "main": "./dist/index.js", 5 5 "devDependencies": { 6 6 "@atproto/api": "^0.18.17",
+82 -19
packages/@wispplace/fs-utils/src/html-rewriter.ts
··· 2 2 * Rewrites root-relative URL attributes in an HTML document so it serves correctly 3 3 * from a `basePath` (e.g. `/did/rkey/`) instead of the site root. 4 4 * 5 - * Uses Bun's streaming `HTMLRewriter`: only the attribute bytes we target are replaced; 6 - * everything else (text, inline `<script>`/`<style>`, custom elements like `<md-block>`, 7 - * unbalanced markup, HTML-looking content inside Markdown code spans) passes through 8 - * byte-for-byte. No DOM construction, no re-serialisation. 5 + * Uses a byte-oriented tag scanner instead of a DOM parser so we can rewrite the 6 + * attributes we care about without reserializing the entire document. Raw-text 7 + * elements like `<script>` and `<style>` are copied through unchanged. 9 8 */ 10 9 11 10 const REWRITABLE_ATTRS: Record<string, 'url' | 'srcset'> = { ··· 16 15 poster: 'url', 17 16 srcset: 'srcset', 18 17 } 18 + const RAW_TEXT_TAGS = new Set(['script', 'style']) 19 19 20 20 function isRootRelative(url: string): boolean { 21 21 if (!url || !url.startsWith('/')) return false ··· 45 45 .join(', ') 46 46 } 47 47 48 + function escapeRegExp(value: string): string { 49 + return value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&') 50 + } 51 + 52 + function rewriteTagAttributes(tagSource: string, basePath: string): string { 53 + return tagSource.replace( 54 + /\b(src|href|action|data|poster|srcset)\s*=\s*("([^"]*)"|'([^']*)'|([^\s"'=<>`]+))/gi, 55 + (match, attr: string, _rawValue: string, doubleQuoted?: string, singleQuoted?: string, bare?: string) => { 56 + const value = doubleQuoted ?? singleQuoted ?? bare ?? '' 57 + const rewritten = 58 + REWRITABLE_ATTRS[attr.toLowerCase()] === 'srcset' ? rewriteSrcset(value, basePath) : rewriteUrl(value, basePath) 59 + if (rewritten === value) return match 60 + if (doubleQuoted !== undefined) return `${attr}="${rewritten}"` 61 + if (singleQuoted !== undefined) return `${attr}='${rewritten}'` 62 + return `${attr}=${rewritten}` 63 + }, 64 + ) 65 + } 66 + 67 + function rewriteHtmlPathsFallback(html: string, basePath: string): string { 68 + let output = '' 69 + let cursor = 0 70 + 71 + while (cursor < html.length) { 72 + const tagStart = html.indexOf('<', cursor) 73 + if (tagStart === -1) { 74 + output += html.slice(cursor) 75 + break 76 + } 77 + 78 + output += html.slice(cursor, tagStart) 79 + 80 + if (html.startsWith('<!--', tagStart)) { 81 + const commentEnd = html.indexOf('-->', tagStart + 4) 82 + if (commentEnd === -1) { 83 + output += html.slice(tagStart) 84 + break 85 + } 86 + output += html.slice(tagStart, commentEnd + 3) 87 + cursor = commentEnd + 3 88 + continue 89 + } 90 + 91 + const tagEnd = html.indexOf('>', tagStart + 1) 92 + if (tagEnd === -1) { 93 + output += html.slice(tagStart) 94 + break 95 + } 96 + 97 + const tagSource = html.slice(tagStart, tagEnd + 1) 98 + output += rewriteTagAttributes(tagSource, basePath) 99 + cursor = tagEnd + 1 100 + 101 + const tagNameMatch = /^<\s*([a-zA-Z][\w:-]*)/.exec(tagSource) 102 + const tagName = tagNameMatch?.[1]?.toLowerCase() 103 + const isSelfClosing = /\/\s*>$/.test(tagSource) 104 + if (!tagName || isSelfClosing || !RAW_TEXT_TAGS.has(tagName)) { 105 + continue 106 + } 107 + 108 + const closeTagPattern = new RegExp(`</\\s*${escapeRegExp(tagName)}\\s*>`, 'i') 109 + const remaining = html.slice(cursor) 110 + const closeMatch = closeTagPattern.exec(remaining) 111 + if (!closeMatch || closeMatch.index === undefined) { 112 + output += remaining 113 + break 114 + } 115 + 116 + const closeStart = cursor + closeMatch.index 117 + output += html.slice(cursor, closeStart) 118 + output += closeMatch[0] 119 + cursor = closeStart + closeMatch[0].length 120 + } 121 + 122 + return output 123 + } 124 + 48 125 /** 49 126 * Rewrite root-relative paths in an HTML document so it serves correctly from `basePath`. 50 127 * Relative paths (`./foo`, `../foo`, bare filenames) are left alone — browsers resolve ··· 52 129 */ 53 130 export async function rewriteHtmlPaths(html: string, basePath: string): Promise<string> { 54 131 const normalizedBase = basePath.endsWith('/') ? basePath : `${basePath}/` 55 - 56 - const rewriter = new HTMLRewriter().on('*', { 57 - element(el) { 58 - for (const [attr, type] of Object.entries(REWRITABLE_ATTRS)) { 59 - const value = el.getAttribute(attr) 60 - if (value == null) continue 61 - el.setAttribute( 62 - attr, 63 - type === 'srcset' ? rewriteSrcset(value, normalizedBase) : rewriteUrl(value, normalizedBase), 64 - ) 65 - } 66 - }, 67 - }) 68 - 69 - return await rewriter.transform(new Response(html)).text() 132 + return rewriteHtmlPathsFallback(html, normalizedBase) 70 133 } 71 134 72 135 /** Returns true if the file looks like HTML by content-type or extension. */