Monorepo for wisp.place. A static site hosting service built on top of the AT Protocol.
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

html rewriter fixes

+576 -976
+143
apps/firehose-service/scripts/invalidate-rewritten-cache.ts
··· 1 + #!/usr/bin/env bun 2 + /** 3 + * Per-site rewrite-cache reset: invalidate `<did>/<rkey>/.rewritten/*` in S3 4 + * and enqueue a `rewrite-miss` revalidate event so the worker regenerates 5 + * the pre-rewritten HTML from source. 6 + * 7 + * Why: earlier versions of the HTML rewriter used `node-html-parser`, which 8 + * parsed + re-serialised each cached `index.html` and corrupted content with 9 + * unbalanced or HTML-looking text (Markdown-in-HTML, custom elements, etc). 10 + * The current rewriter uses Bun's streaming `HTMLRewriter` and is byte-safe — 11 + * but the old corrupted copies remain cached under `<did>/<rkey>/.rewritten/…`. 12 + * 13 + * Usage: 14 + * bun apps/firehose-service/scripts/invalidate-rewritten-cache.ts <did> <rkey> 15 + * 16 + * # Batch from stdin (one `did/rkey` pair per line): 17 + * cat sites.txt | bun apps/firehose-service/scripts/invalidate-rewritten-cache.ts --stdin 18 + */ 19 + 20 + import { S3StorageTier } from '@wispplace/tiered-storage' 21 + import Redis from 'ioredis' 22 + import { config } from '../src/config' 23 + 24 + const REASON = 'rewrite-miss:manual' 25 + const BATCH_SIZE = 1000 26 + const args = process.argv.slice(2) 27 + const flags = new Set(args.filter((a) => a.startsWith('--'))) 28 + const positional = args.filter((a) => !a.startsWith('--')) 29 + const fromStdin = flags.has('--stdin') 30 + 31 + if (!config.s3Bucket) { 32 + console.error('[invalidate] S3_BUCKET not set') 33 + process.exit(1) 34 + } 35 + if (!config.redisUrl) { 36 + console.error('[invalidate] REDIS_URL not set') 37 + process.exit(1) 38 + } 39 + 40 + const tier = new S3StorageTier({ 41 + bucket: config.s3Bucket, 42 + region: config.s3Region, 43 + endpoint: config.s3Endpoint, 44 + credentials: 45 + config.awsAccessKeyId && config.awsSecretAccessKey 46 + ? { accessKeyId: config.awsAccessKeyId, secretAccessKey: config.awsSecretAccessKey } 47 + : undefined, 48 + prefix: config.s3Prefix, 49 + forcePathStyle: config.s3ForcePathStyle, 50 + }) 51 + 52 + const redis = new Redis(config.redisUrl, { maxRetriesPerRequest: 3 }) 53 + redis.on('error', (err) => console.error('[invalidate] redis error:', err.message)) 54 + 55 + const log = (msg: string) => process.stderr.write(`[invalidate] ${msg}\n`) 56 + 57 + async function resetSite(did: string, rkey: string): Promise<void> { 58 + const prefix = `${did}/${rkey}/.rewritten/` 59 + let batch: string[] = [] 60 + let deleted = 0 61 + 62 + for await (const key of tier.listKeys(prefix)) { 63 + batch.push(key) 64 + if (batch.length >= BATCH_SIZE) { 65 + await tier.deleteMany(batch) 66 + deleted += batch.length 67 + batch = [] 68 + } 69 + } 70 + if (batch.length) { 71 + await tier.deleteMany(batch) 72 + deleted += batch.length 73 + } 74 + 75 + await redis.del(`revalidate:site:rewrite-miss:${did}:${rkey}`) 76 + const id = await redis.xadd( 77 + config.revalidateStream, 78 + '*', 79 + 'did', 80 + did, 81 + 'rkey', 82 + rkey, 83 + 'reason', 84 + REASON, 85 + 'ts', 86 + Date.now().toString(), 87 + ) 88 + 89 + log(`${did}/${rkey}: deleted ${deleted} objects, enqueued ${id}`) 90 + process.stdout.write(`${did}/${rkey}\n`) 91 + } 92 + 93 + function parsePair(input: string): [string, string] | null { 94 + const trimmed = input.trim() 95 + if (!trimmed) return null 96 + const slash = trimmed.indexOf('/') 97 + if (slash === -1) return null 98 + const did = trimmed.slice(0, slash) 99 + const rkey = trimmed.slice(slash + 1) 100 + if (!did || !rkey) return null 101 + return [did, rkey] 102 + } 103 + 104 + log(`Bucket: ${config.s3Bucket} stream: ${config.revalidateStream} reason: ${REASON}`) 105 + 106 + let sites = 0 107 + try { 108 + if (fromStdin) { 109 + const reader = Bun.stdin.stream().getReader() 110 + const decoder = new TextDecoder() 111 + let buffer = '' 112 + for (;;) { 113 + const { done, value } = await reader.read() 114 + if (done) break 115 + buffer += decoder.decode(value, { stream: true }) 116 + for (let nl = buffer.indexOf('\n'); nl !== -1; nl = buffer.indexOf('\n')) { 117 + const line = buffer.slice(0, nl) 118 + buffer = buffer.slice(nl + 1) 119 + const pair = parsePair(line) 120 + if (!pair) continue 121 + await resetSite(pair[0], pair[1]) 122 + sites++ 123 + } 124 + } 125 + const pair = parsePair(buffer) 126 + if (pair) { 127 + await resetSite(pair[0], pair[1]) 128 + sites++ 129 + } 130 + } else { 131 + if (positional.length < 2) { 132 + console.error('Usage: invalidate-rewritten-cache.ts <did> <rkey>') 133 + console.error(' or: ... --stdin (read `did/rkey` pairs from stdin)') 134 + process.exit(1) 135 + } 136 + const [did, rkey] = positional 137 + await resetSite(did, rkey) 138 + sites++ 139 + } 140 + log(`Done. Reset ${sites} site(s).`) 141 + } finally { 142 + await redis.quit() 143 + }
+5 -5
apps/firehose-service/src/lib/cache-writer.ts
··· 8 8 import { shouldCompressMimeType } from '@wispplace/atproto-utils/compression' 9 9 import { MAX_BLOB_SIZE, MAX_FILE_COUNT, MAX_SITE_SIZE, MAX_SITE_SIZE_SUPPORTER } from '@wispplace/constants' 10 10 import { collectFileCidsFromEntries, countFilesInDirectory, normalizeFileCids } from '@wispplace/fs-utils' 11 + import { isHtmlContent, rewriteHtmlPaths } from '@wispplace/fs-utils/html-rewriter' 11 12 import type { Directory, Entry, File, Record as WispFsRecord } from '@wispplace/lexicons/types/place/wisp/fs' 12 13 import type { Record as WispSettings } from '@wispplace/lexicons/types/place/wisp/settings' 13 14 import type { Record as SubfsRecord } from '@wispplace/lexicons/types/place/wisp/subfs' ··· 24 25 upsertSiteCache, 25 26 upsertSiteSettingsCache, 26 27 } from './db' 27 - import { isHtmlFile, rewriteHtmlPaths } from './html-rewriter' 28 28 import { deleteFile, listFiles, writeFile } from './storage' 29 29 30 30 const logger = createLogger('firehose-service') ··· 562 562 await writeFile(key, content, metadata) 563 563 564 564 // If HTML, also write rewritten version 565 - if (isHtmlFile(file.path)) { 565 + if (isHtmlContent(file.path)) { 566 566 const basePath = `/${did}/${rkey}/` 567 567 let rewriteSource = content 568 568 if (encoding === 'gzip' && content.length >= 2 && content[0] === 0x1f && content[1] === 0x8b) { ··· 574 574 } 575 575 576 576 const htmlString = new TextDecoder().decode(rewriteSource) 577 - const rewritten = rewriteHtmlPaths(htmlString, basePath, file.path) 577 + const rewritten = await rewriteHtmlPaths(htmlString, basePath) 578 578 const rewrittenContent = new TextEncoder().encode(rewritten) 579 579 580 580 const rewrittenKey = `${did}/${rkey}/.rewritten/${file.path}` ··· 666 666 667 667 // Find new or changed files 668 668 for (const file of newFiles) { 669 - const shouldForceRewrite = forceRewriteHtml && isHtmlFile(file.path) 669 + const shouldForceRewrite = forceRewriteHtml && isHtmlContent(file.path) 670 670 if (forceDownload || oldFileCids[file.path] !== file.cid || shouldForceRewrite) { 671 671 filesToDownload.push(file) 672 672 } ··· 721 721 const keysToDelete: string[] = [] 722 722 for (const path of pathsToDelete) { 723 723 keysToDelete.push(`${did}/${rkey}/${path}`) 724 - if (isHtmlFile(path)) { 724 + if (isHtmlContent(path)) { 725 725 keysToDelete.push(`${did}/${rkey}/.rewritten/${path}`) 726 726 } 727 727 }
-308
apps/firehose-service/src/lib/html-rewriter.test.ts
··· 1 - import { describe, expect, test } from 'bun:test' 2 - import { isHtmlFile, rewriteHtmlPaths } from './html-rewriter' 3 - 4 - const BASE = '/did:plc:abc123/mysite/' 5 - const ROOT_DOC = 'index.html' 6 - const NESTED_DOC = 'blog/posts/index.html' 7 - 8 - function rewrite(html: string, doc = ROOT_DOC) { 9 - return rewriteHtmlPaths(html, BASE, doc) 10 - } 11 - 12 - describe('rewritten attributes', () => { 13 - test('src', () => { 14 - expect(rewrite('<img src="/photo.jpg">')).toBe('<img src="/did:plc:abc123/mysite/photo.jpg">') 15 - }) 16 - 17 - test('href', () => { 18 - expect(rewrite('<a href="/about">About</a>')).toBe('<a href="/did:plc:abc123/mysite/about">About</a>') 19 - }) 20 - 21 - test('action', () => { 22 - expect(rewrite('<form action="/submit"></form>')).toBe('<form action="/did:plc:abc123/mysite/submit"></form>') 23 - }) 24 - 25 - test('data (object)', () => { 26 - expect(rewrite('<object data="/file.pdf"></object>')).toBe( 27 - '<object data="/did:plc:abc123/mysite/file.pdf"></object>', 28 - ) 29 - }) 30 - 31 - test('poster', () => { 32 - expect(rewrite('<video poster="/thumb.jpg"></video>')).toBe( 33 - '<video poster="/did:plc:abc123/mysite/thumb.jpg"></video>', 34 - ) 35 - }) 36 - 37 - test('link href', () => { 38 - expect(rewrite('<link rel="stylesheet" href="/style.css">')).toBe( 39 - '<link rel="stylesheet" href="/did:plc:abc123/mysite/style.css">', 40 - ) 41 - }) 42 - 43 - test('script src', () => { 44 - expect(rewrite('<script src="/app.js"></script>')).toBe('<script src="/did:plc:abc123/mysite/app.js"></script>') 45 - }) 46 - 47 - test('source src', () => { 48 - expect(rewrite('<video><source src="/clip.mp4"></video>')).toBe( 49 - '<video><source src="/did:plc:abc123/mysite/clip.mp4"></video>', 50 - ) 51 - }) 52 - }) 53 - 54 - describe('srcset', () => { 55 - test('single entry no descriptor', () => { 56 - expect(rewrite('<img srcset="/img.jpg">')).toBe('<img srcset="/did:plc:abc123/mysite/img.jpg">') 57 - }) 58 - 59 - test('single entry with pixel density descriptor', () => { 60 - expect(rewrite('<img srcset="/img.jpg 2x">')).toBe('<img srcset="/did:plc:abc123/mysite/img.jpg 2x">') 61 - }) 62 - 63 - test('multiple entries with pixel density descriptors', () => { 64 - expect(rewrite('<img srcset="/img.jpg 1x, /img@2x.jpg 2x">')).toBe( 65 - '<img srcset="/did:plc:abc123/mysite/img.jpg 1x, /did:plc:abc123/mysite/img@2x.jpg 2x">', 66 - ) 67 - }) 68 - 69 - test('multiple entries with width descriptors', () => { 70 - expect(rewrite('<img srcset="/small.jpg 320w, /large.jpg 1024w">')).toBe( 71 - '<img srcset="/did:plc:abc123/mysite/small.jpg 320w, /did:plc:abc123/mysite/large.jpg 1024w">', 72 - ) 73 - }) 74 - 75 - test('relative entries are left alone', () => { 76 - const html = '<img srcset="../img.jpg 1x, ./img@2x.jpg 2x">' 77 - expect(rewrite(html, NESTED_DOC)).toBe(html) 78 - }) 79 - 80 - test('mixed: absolute entries rewritten, relative left alone', () => { 81 - expect(rewrite('<img srcset="/abs.jpg 1x, ./rel.jpg 2x">')).toBe( 82 - '<img srcset="/did:plc:abc123/mysite/abs.jpg 1x, ./rel.jpg 2x">', 83 - ) 84 - }) 85 - }) 86 - 87 - describe('absolute (root-relative) paths', () => { 88 - test('root file', () => { 89 - expect(rewrite('<img src="/image.png">')).toBe('<img src="/did:plc:abc123/mysite/image.png">') 90 - }) 91 - 92 - test('nested file', () => { 93 - expect(rewrite('<img src="/assets/photo.jpg">')).toBe('<img src="/did:plc:abc123/mysite/assets/photo.jpg">') 94 - }) 95 - 96 - test('deeply nested file', () => { 97 - expect(rewrite('<link href="/a/b/c/style.css">')).toBe('<link href="/did:plc:abc123/mysite/a/b/c/style.css">') 98 - }) 99 - 100 - test('same result regardless of which document it appears in', () => { 101 - const html = '<img src="/image.png">' 102 - const expected = '<img src="/did:plc:abc123/mysite/image.png">' 103 - expect(rewrite(html, ROOT_DOC)).toBe(expected) 104 - expect(rewrite(html, NESTED_DOC)).toBe(expected) 105 - }) 106 - }) 107 - 108 - describe('relative paths are not rewritten', () => { 109 - test('./ prefix', () => { 110 - const html = '<img src="./image.png">' 111 - expect(rewrite(html)).toBe(html) 112 - }) 113 - 114 - test('bare filename', () => { 115 - const html = '<img src="image.png">' 116 - expect(rewrite(html)).toBe(html) 117 - }) 118 - 119 - test('../ up one level', () => { 120 - const html = '<img src="../image.png">' 121 - expect(rewrite(html, NESTED_DOC)).toBe(html) 122 - }) 123 - 124 - test('../../ up two levels', () => { 125 - const html = '<link href="../../style.css">' 126 - expect(rewrite(html, NESTED_DOC)).toBe(html) 127 - }) 128 - 129 - test('../sibling/path', () => { 130 - const html = '<script src="../assets/app.js"></script>' 131 - expect(rewrite(html, NESTED_DOC)).toBe(html) 132 - }) 133 - }) 134 - 135 - describe('not rewritten', () => { 136 - describe('external / protocol-relative', () => { 137 - test('https', () => { 138 - const html = '<img src="https://cdn.example.com/img.png">' 139 - expect(rewrite(html)).toBe(html) 140 - }) 141 - 142 - test('http', () => { 143 - const html = '<link href="http://cdn.example.com/style.css">' 144 - expect(rewrite(html)).toBe(html) 145 - }) 146 - 147 - test('protocol-relative //', () => { 148 - const html = '<script src="//cdn.example.com/lib.js"></script>' 149 - expect(rewrite(html)).toBe(html) 150 - }) 151 - }) 152 - 153 - describe('URI schemes', () => { 154 - test('data:', () => { 155 - const html = '<img src="data:image/png;base64,abc123">' 156 - expect(rewrite(html)).toBe(html) 157 - }) 158 - 159 - test('mailto:', () => { 160 - const html = '<a href="mailto:hi@example.com">Email</a>' 161 - expect(rewrite(html)).toBe(html) 162 - }) 163 - 164 - test('tel:', () => { 165 - const html = '<a href="tel:+1234567890">Call</a>' 166 - expect(rewrite(html)).toBe(html) 167 - }) 168 - 169 - test('javascript:', () => { 170 - const html = '<a href="javascript:void(0)">JS</a>' 171 - expect(rewrite(html)).toBe(html) 172 - }) 173 - 174 - test('blob:', () => { 175 - const html = '<a href="blob:https://example.com/abc">Blob</a>' 176 - expect(rewrite(html)).toBe(html) 177 - }) 178 - }) 179 - 180 - describe('fragment-only', () => { 181 - test('#anchor', () => { 182 - const html = '<a href="#section">Jump</a>' 183 - expect(rewrite(html)).toBe(html) 184 - }) 185 - }) 186 - 187 - describe('already prefixed (Vite base output)', () => { 188 - test('path already starting with basePath is not double-rewritten', () => { 189 - const html = '<script src="/did:plc:abc123/mysite/assets/app.js"></script>' 190 - expect(rewrite(html)).toBe(html) 191 - }) 192 - }) 193 - 194 - describe('inline script and style content', () => { 195 - test('paths inside <script> text are not rewritten', () => { 196 - const html = '<script>\nvar path = "/api/data"\nfetch("/api/endpoint")\n</script>' 197 - expect(rewrite(html)).toBe(html) 198 - }) 199 - 200 - test('url() inside <style> text is not rewritten', () => { 201 - const html = "<style>.hero { background: url('/images/hero.jpg') }</style>" 202 - expect(rewrite(html)).toBe(html) 203 - }) 204 - }) 205 - }) 206 - 207 - describe('<base> tag', () => { 208 - test('root-relative base href is rewritten', () => { 209 - const result = rewrite('<head><base href="/"></head>') 210 - expect(result).toContain('href="/did:plc:abc123/mysite/"') 211 - }) 212 - 213 - test('subdirectory base href is rewritten', () => { 214 - const result = rewrite('<head><base href="/app/"></head>') 215 - expect(result).toContain('href="/did:plc:abc123/mysite/app/"') 216 - }) 217 - 218 - test('external base href is left untouched', () => { 219 - const html = '<head><base href="https://example.com/"></head>' 220 - expect(rewrite(html)).toBe(html) 221 - }) 222 - 223 - test('relative base href is left untouched', () => { 224 - const html = '<head><base href="./subdir/"></head>' 225 - expect(rewrite(html)).toBe(html) 226 - }) 227 - }) 228 - 229 - describe('URL features preserved', () => { 230 - test('query string', () => { 231 - expect(rewrite('<img src="/img.png?v=3">')).toBe('<img src="/did:plc:abc123/mysite/img.png?v=3">') 232 - }) 233 - 234 - test('hash fragment on a path URL', () => { 235 - expect(rewrite('<a href="/page#section">Link</a>')).toBe('<a href="/did:plc:abc123/mysite/page#section">Link</a>') 236 - }) 237 - 238 - test('query string and hash fragment together', () => { 239 - expect(rewrite('<a href="/page?q=1#section">Link</a>')).toBe( 240 - '<a href="/did:plc:abc123/mysite/page?q=1#section">Link</a>', 241 - ) 242 - }) 243 - }) 244 - 245 - describe('basePath normalisation', () => { 246 - test('basePath without trailing slash is normalised', () => { 247 - const result = rewriteHtmlPaths('<img src="/img.png">', '/did:plc:abc123/mysite', ROOT_DOC) 248 - expect(result).toBe('<img src="/did:plc:abc123/mysite/img.png">') 249 - }) 250 - 251 - test('basePath with trailing slash is unchanged', () => { 252 - const result = rewriteHtmlPaths('<img src="/img.png">', '/did:plc:abc123/mysite/', ROOT_DOC) 253 - expect(result).toBe('<img src="/did:plc:abc123/mysite/img.png">') 254 - }) 255 - }) 256 - 257 - describe('real-world scenarios', () => { 258 - test('Vite SPA with already-prefixed paths not double-rewritten', () => { 259 - const html = [ 260 - '<link rel="stylesheet" href="/did:plc:abc123/mysite/assets/index.css">', 261 - '<script src="/did:plc:abc123/mysite/assets/index.js"></script>', 262 - ].join('\n') 263 - expect(rewrite(html)).toBe(html) 264 - }) 265 - 266 - test('static site: absolute paths rewritten, relative paths left alone', () => { 267 - const html = ` 268 - <link href="/css/style.css" rel="stylesheet"> 269 - <script src="/js/main.js"></script> 270 - <img src="/images/logo.png"> 271 - <img src="./post-image.jpg"> 272 - <a href="../index.html">Blog</a> 273 - <a href="/index.html">Home</a>`.trim() 274 - 275 - const result = rewrite(html, NESTED_DOC) 276 - expect(result).toContain('href="/did:plc:abc123/mysite/css/style.css"') 277 - expect(result).toContain('src="/did:plc:abc123/mysite/js/main.js"') 278 - expect(result).toContain('src="/did:plc:abc123/mysite/images/logo.png"') 279 - expect(result).toContain('src="./post-image.jpg"') 280 - expect(result).toContain('href="../index.html"') 281 - expect(result).toContain('href="/did:plc:abc123/mysite/index.html"') 282 - }) 283 - 284 - test('inline script alongside rewritable elements', () => { 285 - const html = ` 286 - <link href="/style.css" rel="stylesheet"> 287 - <script> 288 - var API = '/api/v1' 289 - fetch('/api/data').then(r => r.json()) 290 - </script> 291 - <img src="/hero.jpg">`.trim() 292 - 293 - const result = rewrite(html) 294 - expect(result).toContain('href="/did:plc:abc123/mysite/style.css"') 295 - expect(result).toContain('src="/did:plc:abc123/mysite/hero.jpg"') 296 - expect(result).toContain("var API = '/api/v1'") 297 - expect(result).toContain("fetch('/api/data')") 298 - }) 299 - }) 300 - 301 - describe('isHtmlFile', () => { 302 - test('.html returns true', () => expect(isHtmlFile('index.html')).toBe(true)) 303 - test('.htm returns true', () => expect(isHtmlFile('page.htm')).toBe(true)) 304 - test('uppercase .HTML returns true', () => expect(isHtmlFile('INDEX.HTML')).toBe(true)) 305 - test('nested path', () => expect(isHtmlFile('blog/posts/index.html')).toBe(true)) 306 - test('.js returns false', () => expect(isHtmlFile('app.js')).toBe(false)) 307 - test('no extension returns false', () => expect(isHtmlFile('README')).toBe(false)) 308 - })
-94
apps/firehose-service/src/lib/html-rewriter.ts
··· 1 - import { parse } from 'node-html-parser' 2 - 3 - /** 4 - * Attributes whose values are rewritten. 5 - * - `'url'` — a single URL string 6 - * - `'srcset'` — a comma-separated list of `<url> [descriptor]` entries 7 - */ 8 - const REWRITABLE_ATTRS: Record<string, 'url' | 'srcset'> = { 9 - src: 'url', 10 - href: 'url', 11 - action: 'url', 12 - data: 'url', 13 - poster: 'url', 14 - srcset: 'srcset', 15 - } 16 - 17 - /** Returns true if the URL is a root-relative path that needs prefixing (e.g. `/style.css`). */ 18 - function isRootRelative(url: string): boolean { 19 - if (!url || !url.startsWith('/')) return false 20 - // Protocol-relative (//cdn.example.com) — not a local path 21 - if (url.startsWith('//')) return false 22 - return true 23 - } 24 - 25 - /** 26 - * Prepend `basePath` to a root-relative URL, preserving query string and hash. 27 - */ 28 - function rewriteUrl(url: string, basePath: string): string { 29 - if (!isRootRelative(url)) return url 30 - if (url.startsWith(basePath)) return url 31 - const resolved = new URL(url, 'http://x') 32 - return basePath + resolved.pathname.slice(1) + resolved.search + resolved.hash 33 - } 34 - 35 - /** Rewrite each root-relative URL in a `srcset` value (comma-separated `<url> [descriptor]` list). */ 36 - function rewriteSrcset(srcset: string, basePath: string): string { 37 - return srcset 38 - .split(',') 39 - .map((entry) => { 40 - const trimmed = entry.trim() 41 - const spaceIdx = trimmed.search(/\s/) 42 - if (spaceIdx === -1) return rewriteUrl(trimmed, basePath) 43 - const url = trimmed.slice(0, spaceIdx) 44 - const descriptor = trimmed.slice(spaceIdx) // keeps leading whitespace + e.g. "2x" 45 - return rewriteUrl(url, basePath) + descriptor 46 - }) 47 - .join(', ') 48 - } 49 - 50 - /** 51 - * Rewrite root-relative paths in an HTML document so it serves correctly from `basePath`. 52 - * 53 - * @param html Raw HTML string. 54 - * @param basePath Wisp serving prefix, e.g. `/did/rkey/`. 55 - * @param documentPath Storage path of this file — unused for path resolution but 56 - * kept in the signature for potential future use (e.g. logging). 57 - */ 58 - export function rewriteHtmlPaths(html: string, basePath: string, _documentPath: string): string { 59 - const normalizedBase = basePath.endsWith('/') ? basePath : `${basePath}/` 60 - 61 - const root = parse(html, { 62 - comment: true, 63 - blockTextElements: { script: true, style: true, pre: true, code: true }, 64 - }) 65 - 66 - // Rewrite <base href> so the browser uses the correct base at runtime for 67 - // JS fetch, form submits, dynamic navigation, etc. 68 - const baseEl = root.querySelector('base') 69 - if (baseEl) { 70 - const baseHref = baseEl.getAttribute('href') 71 - if (baseHref) { 72 - baseEl.setAttribute('href', rewriteUrl(baseHref, normalizedBase)) 73 - } 74 - } 75 - 76 - for (const el of root.querySelectorAll('*')) { 77 - for (const [attr, type] of Object.entries(REWRITABLE_ATTRS)) { 78 - const value = el.getAttribute(attr) 79 - if (value == null) continue 80 - el.setAttribute( 81 - attr, 82 - type === 'srcset' ? rewriteSrcset(value, normalizedBase) : rewriteUrl(value, normalizedBase), 83 - ) 84 - } 85 - } 86 - 87 - return root.toString() 88 - } 89 - 90 - /** Returns true for `.html` and `.htm` files. */ 91 - export function isHtmlFile(filepath: string): boolean { 92 - const ext = filepath.toLowerCase().split('.').pop() 93 - return ext === 'html' || ext === 'htm' 94 - }
+22 -16
apps/hosting-service/src/lib/file-serving.ts
··· 6 6 import { gunzipSync, gzipSync } from 'node:zlib' 7 7 import { shouldCompressMimeType } from '@wispplace/atproto-utils/compression' 8 8 import { normalizeFileCids } from '@wispplace/fs-utils' 9 + import { isHtmlContent, rewriteHtmlPaths } from '@wispplace/fs-utils/html-rewriter' 9 10 import type { Record as WispSettings } from '@wispplace/lexicons/types/place/wisp/settings' 10 11 import { createLogger } from '@wispplace/observability' 11 12 import type { StorageResult } from '@wispplace/tiered-storage' ··· 13 14 import { isSiteUpdating } from './cache-invalidation' 14 15 import { cache } from './cache-manager' 15 16 import { getSiteCache } from './db' 16 - import { isHtmlContent, rewriteHtmlPaths } from './html-rewriter' 17 17 import { fetchAndCacheSite } from './on-demand-cache' 18 18 import { generate404Page, generateDirectoryListing, siteUpdatingResponse } from './page-generators' 19 19 import { loadRedirectRules, matchRedirectRule, parseCookies, parseQueryString } from './redirects' ··· 260 260 return new Response(content, { headers }) 261 261 } 262 262 263 - function buildRewrittenHtmlResponse( 263 + async function buildRewrittenHtmlResponse( 264 264 result: FileStorageResult, 265 265 filePath: string, 266 266 basePath: string, 267 267 settings: WispSettings | null, 268 268 requestHeaders?: Record<string, string>, 269 - ): Response { 269 + ): Promise<Response> { 270 270 try { 271 271 const content = Buffer.from(result.data) 272 272 const meta = result.metadata.customMetadata as { encoding?: string; mimeType?: string } | undefined ··· 295 295 } 296 296 297 297 const htmlString = new TextDecoder().decode(decoded) 298 - const rewritten = rewriteHtmlPaths(htmlString, basePath, filePath) 298 + const rewritten = await rewriteHtmlPaths(htmlString, basePath) 299 299 let output = new TextEncoder().encode(rewritten) 300 300 301 301 const shouldServeCompressed = shouldCompressMimeType(mimeType) ··· 787 787 788 788 const indexFiles = getIndexFiles(settings) 789 789 const isDirectoryPathRequest = filePath.endsWith('/') && filePath.length > 0 790 - const buildResponse = (fileResult: FileForRequestResult): Response => { 790 + const buildResponse = async (fileResult: FileForRequestResult): Promise<Response> => { 791 791 const meta = fileResult.result.metadata.customMetadata as { encoding?: string; mimeType?: string } | undefined 792 792 const mimeType = meta?.mimeType || lookup(fileResult.filePath) || 'application/octet-stream' 793 793 const needsRewrite = !fileResult.wasRewritten && isHtmlContent(fileResult.filePath, mimeType) 794 794 795 795 if (needsRewrite) { 796 796 void enqueueRevalidate(did, rkey, `rewrite-miss:${fileResult.filePath}`) 797 - return buildRewrittenHtmlResponse(fileResult.result, fileResult.filePath, basePath, settings, requestHeaders) 797 + return await buildRewrittenHtmlResponse( 798 + fileResult.result, 799 + fileResult.filePath, 800 + basePath, 801 + settings, 802 + requestHeaders, 803 + ) 798 804 } 799 805 800 806 return buildResponseFromStorageResult(fileResult.result, fileResult.filePath, settings, requestHeaders) ··· 814 820 getFileForRequest(did, rkey, requestPath, true), 815 821 ) 816 822 if (directResult) { 817 - return buildResponse(directResult) 823 + return await buildResponse(directResult) 818 824 } 819 825 await markExpectedMiss(requestPath) 820 826 } ··· 823 829 const indexPath = requestPath ? `${requestPath}/${indexFile}` : indexFile 824 830 const fileResult = await span(trace, `storage:${indexPath}`, () => getFileForRequest(did, rkey, indexPath, true)) 825 831 if (fileResult) { 826 - return buildResponse(fileResult) 832 + return await buildResponse(fileResult) 827 833 } 828 834 await markExpectedMiss(indexPath) 829 835 } ··· 859 865 getFileForRequest(did, rkey, fileRequestPath, true), 860 866 ) 861 867 if (fileResult) { 862 - return buildResponse(fileResult) 868 + return await buildResponse(fileResult) 863 869 } 864 870 await markExpectedMiss(fileRequestPath) 865 871 ··· 869 875 const indexPath = fileRequestPath ? `${fileRequestPath}/${indexFileName}` : indexFileName 870 876 const indexResult = await span(trace, `storage:${indexPath}`, () => getFileForRequest(did, rkey, indexPath, true)) 871 877 if (indexResult) { 872 - return buildResponse(indexResult) 878 + return await buildResponse(indexResult) 873 879 } 874 880 await markExpectedMiss(indexPath) 875 881 } ··· 880 886 const htmlPath = `${fileRequestPath}.html` 881 887 const htmlResult = await span(trace, `storage:${htmlPath}`, () => getFileForRequest(did, rkey, htmlPath, true)) 882 888 if (htmlResult) { 883 - return buildResponse(htmlResult) 889 + return await buildResponse(htmlResult) 884 890 } 885 891 await markExpectedMiss(htmlPath) 886 892 ··· 889 895 const indexPath = fileRequestPath ? `${fileRequestPath}/${indexFileName}` : indexFileName 890 896 const indexResult = await span(trace, `storage:${indexPath}`, () => getFileForRequest(did, rkey, indexPath, true)) 891 897 if (indexResult) { 892 - return buildResponse(indexResult) 898 + return await buildResponse(indexResult) 893 899 } 894 900 await markExpectedMiss(indexPath) 895 901 } ··· 900 906 const spaFile = settings.spaMode 901 907 const spaResult = await getFallbackFileForRequest(did, rkey, spaFile, trace) 902 908 if (spaResult) { 903 - return buildResponse(spaResult) 909 + return await buildResponse(spaResult) 904 910 } 905 911 await markExpectedMiss(spaFile) 906 912 } ··· 910 916 const custom404File = settings.custom404 911 917 const custom404Result = await getFallbackFileForRequest(did, rkey, custom404File, trace) 912 918 if (custom404Result) { 913 - const response = buildResponse(custom404Result) 919 + const response = await buildResponse(custom404Result) 914 920 return new Response(response.body, { status: 404, headers: response.headers }) 915 921 } 916 922 await markExpectedMiss(custom404File) ··· 920 926 for (const auto404Page of ['404.html', 'not_found.html']) { 921 927 const auto404Result = await getFallbackFileForRequest(did, rkey, auto404Page, trace) 922 928 if (auto404Result) { 923 - const response = buildResponse(auto404Result) 929 + const response = await buildResponse(auto404Result) 924 930 return new Response(response.body, { status: 404, headers: response.headers }) 925 931 } 926 932 await markExpectedMiss(auto404Page) ··· 957 963 const retryPath = filePath || indexFiles[0] || 'index.html' 958 964 const retryResult = await span(trace, `storage:${retryPath}`, () => getFileForRequest(did, rkey, retryPath, true)) 959 965 if (retryResult) { 960 - return buildResponse(retryResult) 966 + return await buildResponse(retryResult) 961 967 } 962 968 } 963 969 }
-366
apps/hosting-service/src/lib/html-rewriter.test.ts
··· 1 - import { describe, expect, test } from 'bun:test' 2 - import { isHtmlContent, rewriteHtmlPaths } from './html-rewriter' 3 - 4 - describe('rewriteHtmlPaths', () => { 5 - const basePath = '/identifier/site/' 6 - 7 - describe('absolute paths', () => { 8 - test('rewrites absolute paths with leading slash', () => { 9 - const html = '<img src="/image.png">' 10 - const result = rewriteHtmlPaths(html, basePath, 'index.html') 11 - expect(result).toBe('<img src="/identifier/site/image.png">') 12 - }) 13 - 14 - test('rewrites nested absolute paths', () => { 15 - const html = '<link href="/css/style.css">' 16 - const result = rewriteHtmlPaths(html, basePath, 'index.html') 17 - expect(result).toBe('<link href="/identifier/site/css/style.css">') 18 - }) 19 - }) 20 - 21 - describe('relative paths from root document', () => { 22 - test('rewrites relative paths with ./ prefix', () => { 23 - const html = '<img src="./image.png">' 24 - const result = rewriteHtmlPaths(html, basePath, 'index.html') 25 - expect(result).toBe('<img src="/identifier/site/image.png">') 26 - }) 27 - 28 - test('rewrites relative paths without prefix', () => { 29 - const html = '<img src="image.png">' 30 - const result = rewriteHtmlPaths(html, basePath, 'index.html') 31 - expect(result).toBe('<img src="/identifier/site/image.png">') 32 - }) 33 - 34 - test('rewrites relative paths with ../ (should stay at root)', () => { 35 - const html = '<img src="../image.png">' 36 - const result = rewriteHtmlPaths(html, basePath, 'index.html') 37 - expect(result).toBe('<img src="/identifier/site/image.png">') 38 - }) 39 - }) 40 - 41 - describe('relative paths from nested documents', () => { 42 - test('rewrites relative path from nested document', () => { 43 - const html = '<img src="./photo.jpg">' 44 - const result = rewriteHtmlPaths(html, basePath, 'folder1/folder2/index.html') 45 - expect(result).toBe('<img src="/identifier/site/folder1/folder2/photo.jpg">') 46 - }) 47 - 48 - test('rewrites plain filename from nested document', () => { 49 - const html = '<script src="app.js"></script>' 50 - const result = rewriteHtmlPaths(html, basePath, 'folder1/folder2/index.html') 51 - expect(result).toBe('<script src="/identifier/site/folder1/folder2/app.js"></script>') 52 - }) 53 - 54 - test('rewrites ../ to go up one level', () => { 55 - const html = '<img src="../image.png">' 56 - const result = rewriteHtmlPaths(html, basePath, 'folder1/folder2/folder3/index.html') 57 - expect(result).toBe('<img src="/identifier/site/folder1/folder2/image.png">') 58 - }) 59 - 60 - test('rewrites multiple ../ to go up multiple levels', () => { 61 - const html = '<link href="../../css/style.css">' 62 - const result = rewriteHtmlPaths(html, basePath, 'folder1/folder2/folder3/index.html') 63 - expect(result).toBe('<link href="/identifier/site/folder1/css/style.css">') 64 - }) 65 - 66 - test('rewrites ../ with additional path segments', () => { 67 - const html = '<img src="../assets/logo.png">' 68 - const result = rewriteHtmlPaths(html, basePath, 'pages/about/index.html') 69 - expect(result).toBe('<img src="/identifier/site/pages/assets/logo.png">') 70 - }) 71 - 72 - test('handles complex nested relative paths', () => { 73 - const html = '<script src="../../lib/vendor/jquery.js"></script>' 74 - const result = rewriteHtmlPaths(html, basePath, 'pages/blog/post/index.html') 75 - expect(result).toBe('<script src="/identifier/site/pages/lib/vendor/jquery.js"></script>') 76 - }) 77 - 78 - test('handles ../ going past root (stays at root)', () => { 79 - const html = '<img src="../../../image.png">' 80 - const result = rewriteHtmlPaths(html, basePath, 'folder1/index.html') 81 - expect(result).toBe('<img src="/identifier/site/image.png">') 82 - }) 83 - }) 84 - 85 - describe('external URLs and special schemes', () => { 86 - test('does not rewrite http URLs', () => { 87 - const html = '<img src="http://example.com/image.png">' 88 - const result = rewriteHtmlPaths(html, basePath, 'index.html') 89 - expect(result).toBe('<img src="http://example.com/image.png">') 90 - }) 91 - 92 - test('does not rewrite https URLs', () => { 93 - const html = '<link href="https://cdn.example.com/style.css">' 94 - const result = rewriteHtmlPaths(html, basePath, 'index.html') 95 - expect(result).toBe('<link href="https://cdn.example.com/style.css">') 96 - }) 97 - 98 - test('does not rewrite protocol-relative URLs', () => { 99 - const html = '<script src="//cdn.example.com/script.js"></script>' 100 - const result = rewriteHtmlPaths(html, basePath, 'index.html') 101 - expect(result).toBe('<script src="//cdn.example.com/script.js"></script>') 102 - }) 103 - 104 - test('does not rewrite data URIs', () => { 105 - const html = '<img src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAUA">' 106 - const result = rewriteHtmlPaths(html, basePath, 'index.html') 107 - expect(result).toBe('<img src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAUA">') 108 - }) 109 - 110 - test('does not rewrite mailto links', () => { 111 - const html = '<a href="mailto:test@example.com">Email</a>' 112 - const result = rewriteHtmlPaths(html, basePath, 'index.html') 113 - expect(result).toBe('<a href="mailto:test@example.com">Email</a>') 114 - }) 115 - 116 - test('does not rewrite tel links', () => { 117 - const html = '<a href="tel:+1234567890">Call</a>' 118 - const result = rewriteHtmlPaths(html, basePath, 'index.html') 119 - expect(result).toBe('<a href="tel:+1234567890">Call</a>') 120 - }) 121 - }) 122 - 123 - describe('different HTML attributes', () => { 124 - test('rewrites src attribute', () => { 125 - const html = '<img src="/image.png">' 126 - const result = rewriteHtmlPaths(html, basePath, 'index.html') 127 - expect(result).toBe('<img src="/identifier/site/image.png">') 128 - }) 129 - 130 - test('rewrites href attribute', () => { 131 - const html = '<a href="/page.html">Link</a>' 132 - const result = rewriteHtmlPaths(html, basePath, 'index.html') 133 - expect(result).toBe('<a href="/identifier/site/page.html">Link</a>') 134 - }) 135 - 136 - test('rewrites action attribute', () => { 137 - const html = '<form action="/submit"></form>' 138 - const result = rewriteHtmlPaths(html, basePath, 'index.html') 139 - expect(result).toBe('<form action="/identifier/site/submit"></form>') 140 - }) 141 - 142 - test('rewrites data attribute', () => { 143 - const html = '<object data="/document.pdf"></object>' 144 - const result = rewriteHtmlPaths(html, basePath, 'index.html') 145 - expect(result).toBe('<object data="/identifier/site/document.pdf"></object>') 146 - }) 147 - 148 - test('rewrites poster attribute', () => { 149 - const html = '<video poster="/thumbnail.jpg"></video>' 150 - const result = rewriteHtmlPaths(html, basePath, 'index.html') 151 - expect(result).toBe('<video poster="/identifier/site/thumbnail.jpg"></video>') 152 - }) 153 - 154 - test('rewrites srcset attribute with single URL', () => { 155 - const html = '<img srcset="/image.png 1x">' 156 - const result = rewriteHtmlPaths(html, basePath, 'index.html') 157 - expect(result).toBe('<img srcset="/identifier/site/image.png 1x">') 158 - }) 159 - 160 - test('rewrites srcset attribute with multiple URLs', () => { 161 - const html = '<img srcset="/image-1x.png 1x, /image-2x.png 2x">' 162 - const result = rewriteHtmlPaths(html, basePath, 'index.html') 163 - expect(result).toBe('<img srcset="/identifier/site/image-1x.png 1x, /identifier/site/image-2x.png 2x">') 164 - }) 165 - 166 - test('rewrites srcset with width descriptors', () => { 167 - const html = '<img srcset="/small.jpg 320w, /large.jpg 1024w">' 168 - const result = rewriteHtmlPaths(html, basePath, 'index.html') 169 - expect(result).toBe('<img srcset="/identifier/site/small.jpg 320w, /identifier/site/large.jpg 1024w">') 170 - }) 171 - 172 - test('rewrites srcset with relative paths from nested document', () => { 173 - const html = '<img srcset="../img1.png 1x, ../img2.png 2x">' 174 - const result = rewriteHtmlPaths(html, basePath, 'folder1/folder2/index.html') 175 - expect(result).toBe('<img srcset="/identifier/site/folder1/img1.png 1x, /identifier/site/folder1/img2.png 2x">') 176 - }) 177 - }) 178 - 179 - describe('quote handling', () => { 180 - test('handles double quotes', () => { 181 - const html = '<img src="/image.png">' 182 - const result = rewriteHtmlPaths(html, basePath, 'index.html') 183 - expect(result).toBe('<img src="/identifier/site/image.png">') 184 - }) 185 - 186 - test('handles single quotes', () => { 187 - const html = "<img src='/image.png'>" 188 - const result = rewriteHtmlPaths(html, basePath, 'index.html') 189 - expect(result).toBe("<img src='/identifier/site/image.png'>") 190 - }) 191 - 192 - test('handles mixed quotes in same document', () => { 193 - const html = '<img src="/img1.png"><link href=\'/style.css\'>' 194 - const result = rewriteHtmlPaths(html, basePath, 'index.html') 195 - expect(result).toBe('<img src="/identifier/site/img1.png"><link href=\'/identifier/site/style.css\'>') 196 - }) 197 - }) 198 - 199 - describe('multiple rewrites in same document', () => { 200 - test('rewrites multiple attributes in complex HTML', () => { 201 - const html = ` 202 - <!DOCTYPE html> 203 - <html> 204 - <head> 205 - <link href="/css/style.css" rel="stylesheet"> 206 - <script src="/js/app.js"></script> 207 - </head> 208 - <body> 209 - <img src="/images/logo.png" alt="Logo"> 210 - <a href="/about.html">About</a> 211 - <form action="/submit"> 212 - <button type="submit">Submit</button> 213 - </form> 214 - </body> 215 - </html> 216 - ` 217 - const result = rewriteHtmlPaths(html, basePath, 'index.html') 218 - expect(result).toContain('href="/identifier/site/css/style.css"') 219 - expect(result).toContain('src="/identifier/site/js/app.js"') 220 - expect(result).toContain('src="/identifier/site/images/logo.png"') 221 - expect(result).toContain('href="/identifier/site/about.html"') 222 - expect(result).toContain('action="/identifier/site/submit"') 223 - }) 224 - 225 - test('handles mix of relative and absolute paths', () => { 226 - const html = ` 227 - <img src="/abs/image.png"> 228 - <img src="./rel/image.png"> 229 - <img src="../parent/image.png"> 230 - <img src="https://external.com/image.png"> 231 - ` 232 - const result = rewriteHtmlPaths(html, basePath, 'folder1/folder2/page.html') 233 - expect(result).toContain('src="/identifier/site/abs/image.png"') 234 - expect(result).toContain('src="/identifier/site/folder1/folder2/rel/image.png"') 235 - expect(result).toContain('src="/identifier/site/folder1/parent/image.png"') 236 - expect(result).toContain('src="https://external.com/image.png"') 237 - }) 238 - }) 239 - 240 - describe('edge cases', () => { 241 - test('handles empty src attribute', () => { 242 - const html = '<img src="">' 243 - const result = rewriteHtmlPaths(html, basePath, 'index.html') 244 - expect(result).toBe('<img src="">') 245 - }) 246 - 247 - test('handles basePath without trailing slash', () => { 248 - const html = '<img src="/image.png">' 249 - const result = rewriteHtmlPaths(html, '/identifier/site', 'index.html') 250 - expect(result).toBe('<img src="/identifier/site/image.png">') 251 - }) 252 - 253 - test('handles basePath with trailing slash', () => { 254 - const html = '<img src="/image.png">' 255 - const result = rewriteHtmlPaths(html, '/identifier/site/', 'index.html') 256 - expect(result).toBe('<img src="/identifier/site/image.png">') 257 - }) 258 - 259 - test('handles whitespace around equals sign', () => { 260 - const html = '<img src = "/image.png">' 261 - const result = rewriteHtmlPaths(html, basePath, 'index.html') 262 - expect(result).toBe('<img src="/identifier/site/image.png">') 263 - }) 264 - 265 - test('preserves query strings in URLs', () => { 266 - const html = '<img src="/image.png?v=123">' 267 - const result = rewriteHtmlPaths(html, basePath, 'index.html') 268 - expect(result).toBe('<img src="/identifier/site/image.png?v=123">') 269 - }) 270 - 271 - test('preserves hash fragments in URLs', () => { 272 - const html = '<a href="/page.html#section">Link</a>' 273 - const result = rewriteHtmlPaths(html, basePath, 'index.html') 274 - expect(result).toBe('<a href="/identifier/site/page.html#section">Link</a>') 275 - }) 276 - 277 - test('handles paths with special characters', () => { 278 - const html = '<img src="/folder-name/file_name.png">' 279 - const result = rewriteHtmlPaths(html, basePath, 'index.html') 280 - expect(result).toBe('<img src="/identifier/site/folder-name/file_name.png">') 281 - }) 282 - }) 283 - 284 - describe('real-world scenario', () => { 285 - test('handles the example from the bug report', () => { 286 - // HTML file at: /folder1/folder2/folder3/index.html 287 - // Image at: /folder1/folder2/img.png 288 - // Reference: src="../img.png" 289 - const html = '<img src="../img.png">' 290 - const result = rewriteHtmlPaths(html, basePath, 'folder1/folder2/folder3/index.html') 291 - expect(result).toBe('<img src="/identifier/site/folder1/folder2/img.png">') 292 - }) 293 - 294 - test('handles deeply nested static site structure', () => { 295 - // A typical static site with nested pages and shared assets 296 - const html = ` 297 - <!DOCTYPE html> 298 - <html> 299 - <head> 300 - <link href="../../css/style.css" rel="stylesheet"> 301 - <link href="../../css/theme.css" rel="stylesheet"> 302 - <script src="../../js/main.js"></script> 303 - </head> 304 - <body> 305 - <img src="../../images/logo.png" alt="Logo"> 306 - <img src="./post-image.jpg" alt="Post"> 307 - <a href="../index.html">Back to Blog</a> 308 - <a href="../../index.html">Home</a> 309 - </body> 310 - </html> 311 - ` 312 - const result = rewriteHtmlPaths(html, basePath, 'blog/posts/my-post.html') 313 - 314 - // Assets two levels up 315 - expect(result).toContain('href="/identifier/site/css/style.css"') 316 - expect(result).toContain('href="/identifier/site/css/theme.css"') 317 - expect(result).toContain('src="/identifier/site/js/main.js"') 318 - expect(result).toContain('src="/identifier/site/images/logo.png"') 319 - 320 - // Same directory 321 - expect(result).toContain('src="/identifier/site/blog/posts/post-image.jpg"') 322 - 323 - // One level up 324 - expect(result).toContain('href="/identifier/site/blog/index.html"') 325 - 326 - // Two levels up 327 - expect(result).toContain('href="/identifier/site/index.html"') 328 - }) 329 - }) 330 - }) 331 - 332 - describe('isHtmlContent', () => { 333 - test('identifies HTML by content type', () => { 334 - expect(isHtmlContent('file.txt', 'text/html')).toBe(true) 335 - expect(isHtmlContent('file.txt', 'text/html; charset=utf-8')).toBe(true) 336 - }) 337 - 338 - test('identifies HTML by .html extension', () => { 339 - expect(isHtmlContent('index.html')).toBe(true) 340 - expect(isHtmlContent('page.html', undefined)).toBe(true) 341 - expect(isHtmlContent('/path/to/file.html')).toBe(true) 342 - }) 343 - 344 - test('identifies HTML by .htm extension', () => { 345 - expect(isHtmlContent('index.htm')).toBe(true) 346 - expect(isHtmlContent('page.htm', undefined)).toBe(true) 347 - }) 348 - 349 - test('handles case-insensitive extensions', () => { 350 - expect(isHtmlContent('INDEX.HTML')).toBe(true) 351 - expect(isHtmlContent('page.HTM')).toBe(true) 352 - expect(isHtmlContent('File.HtMl')).toBe(true) 353 - }) 354 - 355 - test('returns false for non-HTML files', () => { 356 - expect(isHtmlContent('script.js')).toBe(false) 357 - expect(isHtmlContent('style.css')).toBe(false) 358 - expect(isHtmlContent('image.png')).toBe(false) 359 - expect(isHtmlContent('data.json')).toBe(false) 360 - }) 361 - 362 - test('returns false for files with no extension', () => { 363 - expect(isHtmlContent('README')).toBe(false) 364 - expect(isHtmlContent('Makefile')).toBe(false) 365 - }) 366 - })
-187
apps/hosting-service/src/lib/html-rewriter.ts
··· 1 - /** 2 - * Safely rewrites absolute paths in HTML to be relative to a base path 3 - * Only processes common HTML attributes and preserves external URLs, data URIs, etc. 4 - */ 5 - 6 - const REWRITABLE_ATTRIBUTES = ['src', 'href', 'action', 'data', 'poster', 'srcset'] as const 7 - 8 - /** 9 - * Check if a path should be rewritten 10 - */ 11 - function shouldRewritePath(path: string): boolean { 12 - // Don't rewrite empty paths 13 - if (!path) return false 14 - 15 - // Don't rewrite external URLs (http://, https://, //) 16 - if (path.startsWith('http://') || path.startsWith('https://') || path.startsWith('//')) { 17 - return false 18 - } 19 - 20 - // Don't rewrite data URIs or other schemes (except file paths) 21 - if (path.includes(':') && !path.startsWith('./') && !path.startsWith('../')) { 22 - return false 23 - } 24 - 25 - // Rewrite absolute paths (/) and relative paths (./ or ../ or plain filenames) 26 - return true 27 - } 28 - 29 - /** 30 - * Normalize a path by resolving . and .. segments 31 - */ 32 - function normalizePath(path: string): string { 33 - const parts = path.split('/') 34 - const result: string[] = [] 35 - 36 - for (const part of parts) { 37 - if (part === '.' || part === '') { 38 - // Skip current directory and empty parts (but keep leading empty for absolute paths) 39 - if (part === '' && result.length === 0) { 40 - result.push(part) 41 - } 42 - continue 43 - } 44 - if (part === '..') { 45 - // Go up one directory (but not past root) 46 - if (result.length > 0 && result[result.length - 1] !== '..') { 47 - result.pop() 48 - } 49 - continue 50 - } 51 - result.push(part) 52 - } 53 - 54 - return result.join('/') 55 - } 56 - 57 - /** 58 - * Get the directory path from a file path 59 - * e.g., "folder1/folder2/file.html" -> "folder1/folder2/" 60 - */ 61 - function getDirectory(filepath: string): string { 62 - const lastSlash = filepath.lastIndexOf('/') 63 - if (lastSlash === -1) { 64 - return '' 65 - } 66 - return filepath.substring(0, lastSlash + 1) 67 - } 68 - 69 - /** 70 - * Rewrite a single path 71 - */ 72 - function rewritePath(path: string, basePath: string, documentPath: string): string { 73 - if (!shouldRewritePath(path)) { 74 - return path 75 - } 76 - 77 - // Handle absolute paths: /file.js -> /base/file.js 78 - if (path.startsWith('/')) { 79 - return basePath + path.slice(1) 80 - } 81 - 82 - // Handle relative paths by resolving against document directory 83 - const documentDir = getDirectory(documentPath) 84 - let resolvedPath: string 85 - 86 - if (path.startsWith('./')) { 87 - // ./file.js relative to current directory 88 - resolvedPath = documentDir + path.slice(2) 89 - } else if (path.startsWith('../')) { 90 - // ../file.js relative to parent directory 91 - resolvedPath = documentDir + path 92 - } else { 93 - // file.js (no prefix) - treat as relative to current directory 94 - resolvedPath = documentDir + path 95 - } 96 - 97 - // Normalize the path to resolve .. and . 98 - resolvedPath = normalizePath(resolvedPath) 99 - 100 - return basePath + resolvedPath 101 - } 102 - 103 - /** 104 - * Rewrite srcset attribute (can contain multiple URLs) 105 - * Format: "url1 1x, url2 2x" or "url1 100w, url2 200w" 106 - */ 107 - function rewriteSrcset(srcset: string, basePath: string, documentPath: string): string { 108 - return srcset 109 - .split(',') 110 - .map((part) => { 111 - const trimmed = part.trim() 112 - const spaceIndex = trimmed.indexOf(' ') 113 - 114 - if (spaceIndex === -1) { 115 - // No descriptor, just URL 116 - return rewritePath(trimmed, basePath, documentPath) 117 - } 118 - 119 - const url = trimmed.substring(0, spaceIndex) 120 - const descriptor = trimmed.substring(spaceIndex) 121 - return rewritePath(url, basePath, documentPath) + descriptor 122 - }) 123 - .join(', ') 124 - } 125 - 126 - /** 127 - * Rewrite absolute and relative paths in HTML content 128 - * Uses simple regex matching for safety (no full HTML parsing) 129 - */ 130 - export function rewriteHtmlPaths(html: string, basePath: string, documentPath: string): string { 131 - // Ensure base path ends with / 132 - const normalizedBase = basePath.endsWith('/') ? basePath : `${basePath}/` 133 - 134 - let rewritten = html 135 - 136 - // Rewrite each attribute type 137 - // Use more specific patterns to prevent ReDoS attacks 138 - for (const attr of REWRITABLE_ATTRIBUTES) { 139 - if (attr === 'srcset') { 140 - // Special handling for srcset - use possessive quantifiers via atomic grouping simulation 141 - // Limit whitespace to reasonable amount (max 5 spaces) to prevent ReDoS 142 - const srcsetRegex = new RegExp(`\\b${attr}[ \\t]{0,5}=[ \\t]{0,5}"([^"]*)"`, 'gi') 143 - rewritten = rewritten.replace(srcsetRegex, (_match, value) => { 144 - const rewrittenValue = rewriteSrcset(value, normalizedBase, documentPath) 145 - return `${attr}="${rewrittenValue}"` 146 - }) 147 - } else { 148 - // Regular attributes with quoted values 149 - // Limit whitespace to prevent catastrophic backtracking 150 - const doubleQuoteRegex = new RegExp(`\\b${attr}[ \\t]{0,5}=[ \\t]{0,5}"([^"]*)"`, 'gi') 151 - const singleQuoteRegex = new RegExp(`\\b${attr}[ \\t]{0,5}=[ \\t]{0,5}'([^']*)'`, 'gi') 152 - // Unquoted attributes (valid in HTML5 for values without spaces/special chars) 153 - // Match: attr=value where value starts immediately (no quotes) and continues until space or > 154 - // Use negative lookahead to ensure we don't match quoted attributes 155 - const unquotedRegex = new RegExp(`\\b${attr}[ \\t]{0,5}=[ \\t]{0,5}(?!["'])([^\\s>]+)`, 'gi') 156 - 157 - rewritten = rewritten.replace(doubleQuoteRegex, (_match, value) => { 158 - const rewrittenValue = rewritePath(value, normalizedBase, documentPath) 159 - return `${attr}="${rewrittenValue}"` 160 - }) 161 - 162 - rewritten = rewritten.replace(singleQuoteRegex, (_match, value) => { 163 - const rewrittenValue = rewritePath(value, normalizedBase, documentPath) 164 - return `${attr}='${rewrittenValue}'` 165 - }) 166 - 167 - rewritten = rewritten.replace(unquotedRegex, (_match, value) => { 168 - const rewrittenValue = rewritePath(value, normalizedBase, documentPath) 169 - return `${attr}=${rewrittenValue}` 170 - }) 171 - } 172 - } 173 - 174 - return rewritten 175 - } 176 - 177 - /** 178 - * Check if content is HTML based on content or filename 179 - */ 180 - export function isHtmlContent(filepath: string, contentType?: string): boolean { 181 - if (contentType?.includes('text/html')) { 182 - return true 183 - } 184 - 185 - const ext = filepath.toLowerCase().split('.').pop() 186 - return ext === 'html' || ext === 'htm' 187 - }
+4
packages/@wispplace/fs-utils/package.json
··· 29 29 "./redirects": { 30 30 "types": "./src/redirects.ts", 31 31 "default": "./src/redirects.ts" 32 + }, 33 + "./html-rewriter": { 34 + "types": "./src/html-rewriter.ts", 35 + "default": "./src/html-rewriter.ts" 32 36 } 33 37 }, 34 38 "dependencies": {
+323
packages/@wispplace/fs-utils/src/html-rewriter.test.ts
··· 1 + import { describe, expect, test } from 'bun:test' 2 + import { isHtmlContent, rewriteHtmlPaths } from './html-rewriter' 3 + 4 + const BASE = '/did:plc:abc123/mysite/' 5 + 6 + function rewrite(html: string): Promise<string> { 7 + return rewriteHtmlPaths(html, BASE) 8 + } 9 + 10 + describe('rewritten attributes', () => { 11 + test('src', async () => { 12 + expect(await rewrite('<img src="/photo.jpg">')).toBe('<img src="/did:plc:abc123/mysite/photo.jpg">') 13 + }) 14 + 15 + test('href', async () => { 16 + expect(await rewrite('<a href="/about">About</a>')).toBe('<a href="/did:plc:abc123/mysite/about">About</a>') 17 + }) 18 + 19 + test('action', async () => { 20 + expect(await rewrite('<form action="/submit"></form>')).toBe('<form action="/did:plc:abc123/mysite/submit"></form>') 21 + }) 22 + 23 + test('data (object)', async () => { 24 + expect(await rewrite('<object data="/file.pdf"></object>')).toBe( 25 + '<object data="/did:plc:abc123/mysite/file.pdf"></object>', 26 + ) 27 + }) 28 + 29 + test('poster', async () => { 30 + expect(await rewrite('<video poster="/thumb.jpg"></video>')).toBe( 31 + '<video poster="/did:plc:abc123/mysite/thumb.jpg"></video>', 32 + ) 33 + }) 34 + 35 + test('link href', async () => { 36 + expect(await rewrite('<link rel="stylesheet" href="/style.css">')).toBe( 37 + '<link rel="stylesheet" href="/did:plc:abc123/mysite/style.css">', 38 + ) 39 + }) 40 + 41 + test('script src', async () => { 42 + expect(await rewrite('<script src="/app.js"></script>')).toBe( 43 + '<script src="/did:plc:abc123/mysite/app.js"></script>', 44 + ) 45 + }) 46 + 47 + test('source src', async () => { 48 + expect(await rewrite('<video><source src="/clip.mp4"></video>')).toBe( 49 + '<video><source src="/did:plc:abc123/mysite/clip.mp4"></video>', 50 + ) 51 + }) 52 + }) 53 + 54 + describe('srcset', () => { 55 + test('single entry no descriptor', async () => { 56 + expect(await rewrite('<img srcset="/img.jpg">')).toBe('<img srcset="/did:plc:abc123/mysite/img.jpg">') 57 + }) 58 + 59 + test('single entry with pixel density descriptor', async () => { 60 + expect(await rewrite('<img srcset="/img.jpg 2x">')).toBe('<img srcset="/did:plc:abc123/mysite/img.jpg 2x">') 61 + }) 62 + 63 + test('multiple entries with pixel density descriptors', async () => { 64 + expect(await rewrite('<img srcset="/img.jpg 1x, /img@2x.jpg 2x">')).toBe( 65 + '<img srcset="/did:plc:abc123/mysite/img.jpg 1x, /did:plc:abc123/mysite/img@2x.jpg 2x">', 66 + ) 67 + }) 68 + 69 + test('multiple entries with width descriptors', async () => { 70 + expect(await rewrite('<img srcset="/small.jpg 320w, /large.jpg 1024w">')).toBe( 71 + '<img srcset="/did:plc:abc123/mysite/small.jpg 320w, /did:plc:abc123/mysite/large.jpg 1024w">', 72 + ) 73 + }) 74 + 75 + test('relative entries are left alone', async () => { 76 + const html = '<img srcset="../img.jpg 1x, ./img@2x.jpg 2x">' 77 + expect(await rewrite(html)).toBe(html) 78 + }) 79 + 80 + test('mixed: absolute entries rewritten, relative left alone', async () => { 81 + expect(await rewrite('<img srcset="/abs.jpg 1x, ./rel.jpg 2x">')).toBe( 82 + '<img srcset="/did:plc:abc123/mysite/abs.jpg 1x, ./rel.jpg 2x">', 83 + ) 84 + }) 85 + }) 86 + 87 + describe('absolute (root-relative) paths', () => { 88 + test('root file', async () => { 89 + expect(await rewrite('<img src="/image.png">')).toBe('<img src="/did:plc:abc123/mysite/image.png">') 90 + }) 91 + 92 + test('nested file', async () => { 93 + expect(await rewrite('<img src="/assets/photo.jpg">')).toBe('<img src="/did:plc:abc123/mysite/assets/photo.jpg">') 94 + }) 95 + 96 + test('deeply nested file', async () => { 97 + expect(await rewrite('<link href="/a/b/c/style.css">')).toBe('<link href="/did:plc:abc123/mysite/a/b/c/style.css">') 98 + }) 99 + }) 100 + 101 + describe('relative paths are not rewritten (browser resolves them against doc URL)', () => { 102 + test('./ prefix', async () => { 103 + const html = '<img src="./image.png">' 104 + expect(await rewrite(html)).toBe(html) 105 + }) 106 + 107 + test('bare filename', async () => { 108 + const html = '<img src="image.png">' 109 + expect(await rewrite(html)).toBe(html) 110 + }) 111 + 112 + test('../ up one level', async () => { 113 + const html = '<img src="../image.png">' 114 + expect(await rewrite(html)).toBe(html) 115 + }) 116 + }) 117 + 118 + describe('not rewritten', () => { 119 + describe('external / protocol-relative', () => { 120 + test('https', async () => { 121 + const html = '<img src="https://cdn.example.com/img.png">' 122 + expect(await rewrite(html)).toBe(html) 123 + }) 124 + 125 + test('http', async () => { 126 + const html = '<link href="http://cdn.example.com/style.css">' 127 + expect(await rewrite(html)).toBe(html) 128 + }) 129 + 130 + test('protocol-relative //', async () => { 131 + const html = '<script src="//cdn.example.com/lib.js"></script>' 132 + expect(await rewrite(html)).toBe(html) 133 + }) 134 + }) 135 + 136 + describe('URI schemes', () => { 137 + test('data:', async () => { 138 + const html = '<img src="data:image/png;base64,abc123">' 139 + expect(await rewrite(html)).toBe(html) 140 + }) 141 + 142 + test('mailto:', async () => { 143 + const html = '<a href="mailto:hi@example.com">Email</a>' 144 + expect(await rewrite(html)).toBe(html) 145 + }) 146 + 147 + test('tel:', async () => { 148 + const html = '<a href="tel:+1234567890">Call</a>' 149 + expect(await rewrite(html)).toBe(html) 150 + }) 151 + 152 + test('javascript:', async () => { 153 + const html = '<a href="javascript:void(0)">JS</a>' 154 + expect(await rewrite(html)).toBe(html) 155 + }) 156 + 157 + test('blob:', async () => { 158 + const html = '<a href="blob:https://example.com/abc">Blob</a>' 159 + expect(await rewrite(html)).toBe(html) 160 + }) 161 + }) 162 + 163 + describe('fragment-only', () => { 164 + test('#anchor', async () => { 165 + const html = '<a href="#section">Jump</a>' 166 + expect(await rewrite(html)).toBe(html) 167 + }) 168 + }) 169 + 170 + describe('already prefixed (Vite base output)', () => { 171 + test('path already starting with basePath is not double-rewritten', async () => { 172 + const html = '<script src="/did:plc:abc123/mysite/assets/app.js"></script>' 173 + expect(await rewrite(html)).toBe(html) 174 + }) 175 + }) 176 + 177 + describe('inline script and style content', () => { 178 + test('paths inside <script> text are not rewritten', async () => { 179 + const html = '<script>\nvar path = "/api/data"\nfetch("/api/endpoint")\n</script>' 180 + expect(await rewrite(html)).toBe(html) 181 + }) 182 + 183 + test('url() inside <style> text is not rewritten', async () => { 184 + const html = "<style>.hero { background: url('/images/hero.jpg') }</style>" 185 + expect(await rewrite(html)).toBe(html) 186 + }) 187 + }) 188 + 189 + describe('custom elements and HTML-in-text', () => { 190 + test('custom element wrappers pass through unchanged', async () => { 191 + const html = '<md-block># Heading\n\nSome *markdown* with `<section>` and `<div>` in code spans.</md-block>' 192 + expect(await rewrite(html)).toBe(html) 193 + }) 194 + }) 195 + }) 196 + 197 + describe('<base> tag', () => { 198 + test('root-relative base href is rewritten', async () => { 199 + const result = await rewrite('<head><base href="/"></head>') 200 + expect(result).toContain('href="/did:plc:abc123/mysite/"') 201 + }) 202 + 203 + test('subdirectory base href is rewritten', async () => { 204 + const result = await rewrite('<head><base href="/app/"></head>') 205 + expect(result).toContain('href="/did:plc:abc123/mysite/app/"') 206 + }) 207 + 208 + test('external base href is left untouched', async () => { 209 + const html = '<head><base href="https://example.com/"></head>' 210 + expect(await rewrite(html)).toBe(html) 211 + }) 212 + 213 + test('relative base href is left untouched', async () => { 214 + const html = '<head><base href="./subdir/"></head>' 215 + expect(await rewrite(html)).toBe(html) 216 + }) 217 + }) 218 + 219 + describe('URL features preserved', () => { 220 + test('query string', async () => { 221 + expect(await rewrite('<img src="/img.png?v=3">')).toBe('<img src="/did:plc:abc123/mysite/img.png?v=3">') 222 + }) 223 + 224 + test('hash fragment on a path URL', async () => { 225 + expect(await rewrite('<a href="/page#section">Link</a>')).toBe( 226 + '<a href="/did:plc:abc123/mysite/page#section">Link</a>', 227 + ) 228 + }) 229 + 230 + test('query string and hash fragment together', async () => { 231 + expect(await rewrite('<a href="/page?q=1#section">Link</a>')).toBe( 232 + '<a href="/did:plc:abc123/mysite/page?q=1#section">Link</a>', 233 + ) 234 + }) 235 + }) 236 + 237 + describe('basePath normalisation', () => { 238 + test('basePath without trailing slash is normalised', async () => { 239 + const result = await rewriteHtmlPaths('<img src="/img.png">', '/did:plc:abc123/mysite') 240 + expect(result).toBe('<img src="/did:plc:abc123/mysite/img.png">') 241 + }) 242 + 243 + test('basePath with trailing slash is unchanged', async () => { 244 + const result = await rewriteHtmlPaths('<img src="/img.png">', '/did:plc:abc123/mysite/') 245 + expect(result).toBe('<img src="/did:plc:abc123/mysite/img.png">') 246 + }) 247 + }) 248 + 249 + describe('real-world scenarios', () => { 250 + test('Vite SPA with already-prefixed paths not double-rewritten', async () => { 251 + const html = [ 252 + '<link rel="stylesheet" href="/did:plc:abc123/mysite/assets/index.css">', 253 + '<script src="/did:plc:abc123/mysite/assets/index.js"></script>', 254 + ].join('\n') 255 + expect(await rewrite(html)).toBe(html) 256 + }) 257 + 258 + test('static site: absolute paths rewritten, relative paths left alone', async () => { 259 + const html = ` 260 + <link href="/css/style.css" rel="stylesheet"> 261 + <script src="/js/main.js"></script> 262 + <img src="/images/logo.png"> 263 + <img src="./post-image.jpg"> 264 + <a href="../index.html">Blog</a> 265 + <a href="/index.html">Home</a>`.trim() 266 + 267 + const result = await rewrite(html) 268 + expect(result).toContain('href="/did:plc:abc123/mysite/css/style.css"') 269 + expect(result).toContain('src="/did:plc:abc123/mysite/js/main.js"') 270 + expect(result).toContain('src="/did:plc:abc123/mysite/images/logo.png"') 271 + expect(result).toContain('src="./post-image.jpg"') 272 + expect(result).toContain('href="../index.html"') 273 + expect(result).toContain('href="/did:plc:abc123/mysite/index.html"') 274 + }) 275 + 276 + test('inline script alongside rewritable elements', async () => { 277 + const html = ` 278 + <link href="/style.css" rel="stylesheet"> 279 + <script> 280 + var API = '/api/v1' 281 + fetch('/api/data').then(r => r.json()) 282 + </script> 283 + <img src="/hero.jpg">`.trim() 284 + 285 + const result = await rewrite(html) 286 + expect(result).toContain('href="/did:plc:abc123/mysite/style.css"') 287 + expect(result).toContain('src="/did:plc:abc123/mysite/hero.jpg"') 288 + expect(result).toContain("var API = '/api/v1'") 289 + expect(result).toContain("fetch('/api/data')") 290 + }) 291 + }) 292 + 293 + describe('isHtmlContent', () => { 294 + test('identifies HTML by content type', () => { 295 + expect(isHtmlContent('file.txt', 'text/html')).toBe(true) 296 + expect(isHtmlContent('file.txt', 'text/html; charset=utf-8')).toBe(true) 297 + }) 298 + 299 + test('.html extension', () => { 300 + expect(isHtmlContent('index.html')).toBe(true) 301 + expect(isHtmlContent('/path/to/file.html')).toBe(true) 302 + }) 303 + 304 + test('.htm extension', () => { 305 + expect(isHtmlContent('page.htm')).toBe(true) 306 + }) 307 + 308 + test('case-insensitive', () => { 309 + expect(isHtmlContent('INDEX.HTML')).toBe(true) 310 + expect(isHtmlContent('page.HTM')).toBe(true) 311 + }) 312 + 313 + test('non-HTML', () => { 314 + expect(isHtmlContent('script.js')).toBe(false) 315 + expect(isHtmlContent('style.css')).toBe(false) 316 + expect(isHtmlContent('image.png')).toBe(false) 317 + }) 318 + 319 + test('no extension', () => { 320 + expect(isHtmlContent('README')).toBe(false) 321 + expect(isHtmlContent('Makefile')).toBe(false) 322 + }) 323 + })
+77
packages/@wispplace/fs-utils/src/html-rewriter.ts
··· 1 + /** 2 + * Rewrites root-relative URL attributes in an HTML document so it serves correctly 3 + * from a `basePath` (e.g. `/did/rkey/`) instead of the site root. 4 + * 5 + * Uses Bun's streaming `HTMLRewriter`: only the attribute bytes we target are replaced; 6 + * everything else (text, inline `<script>`/`<style>`, custom elements like `<md-block>`, 7 + * unbalanced markup, HTML-looking content inside Markdown code spans) passes through 8 + * byte-for-byte. No DOM construction, no re-serialisation. 9 + */ 10 + 11 + const REWRITABLE_ATTRS: Record<string, 'url' | 'srcset'> = { 12 + src: 'url', 13 + href: 'url', 14 + action: 'url', 15 + data: 'url', 16 + poster: 'url', 17 + srcset: 'srcset', 18 + } 19 + 20 + function isRootRelative(url: string): boolean { 21 + if (!url || !url.startsWith('/')) return false 22 + // Protocol-relative (//cdn.example.com) — not a local path 23 + if (url.startsWith('//')) return false 24 + return true 25 + } 26 + 27 + function rewriteUrl(url: string, basePath: string): string { 28 + if (!isRootRelative(url)) return url 29 + if (url.startsWith(basePath)) return url 30 + const resolved = new URL(url, 'http://x') 31 + return basePath + resolved.pathname.slice(1) + resolved.search + resolved.hash 32 + } 33 + 34 + function rewriteSrcset(srcset: string, basePath: string): string { 35 + return srcset 36 + .split(',') 37 + .map((entry) => { 38 + const trimmed = entry.trim() 39 + const spaceIdx = trimmed.search(/\s/) 40 + if (spaceIdx === -1) return rewriteUrl(trimmed, basePath) 41 + const url = trimmed.slice(0, spaceIdx) 42 + const descriptor = trimmed.slice(spaceIdx) 43 + return rewriteUrl(url, basePath) + descriptor 44 + }) 45 + .join(', ') 46 + } 47 + 48 + /** 49 + * Rewrite root-relative paths in an HTML document so it serves correctly from `basePath`. 50 + * Relative paths (`./foo`, `../foo`, bare filenames) are left alone — browsers resolve 51 + * them against the document URL, which already lives under `basePath`. 52 + */ 53 + export async function rewriteHtmlPaths(html: string, basePath: string): Promise<string> { 54 + const normalizedBase = basePath.endsWith('/') ? basePath : `${basePath}/` 55 + 56 + const rewriter = new HTMLRewriter().on('*', { 57 + element(el) { 58 + for (const [attr, type] of Object.entries(REWRITABLE_ATTRS)) { 59 + const value = el.getAttribute(attr) 60 + if (value == null) continue 61 + el.setAttribute( 62 + attr, 63 + type === 'srcset' ? rewriteSrcset(value, normalizedBase) : rewriteUrl(value, normalizedBase), 64 + ) 65 + } 66 + }, 67 + }) 68 + 69 + return await rewriter.transform(new Response(html)).text() 70 + } 71 + 72 + /** Returns true if the file looks like HTML by content-type or extension. */ 73 + export function isHtmlContent(filepath: string, contentType?: string): boolean { 74 + if (contentType?.includes('text/html')) return true 75 + const ext = filepath.toLowerCase().split('.').pop() 76 + return ext === 'html' || ext === 'htm' 77 + }
+2
packages/@wispplace/fs-utils/src/index.ts
··· 3 3 // File CID normalization 4 4 export type { FileCidsNormalization, FileCidsNormalizationSource } from './file-cids' 5 5 export { normalizeFileCids } from './file-cids' 6 + // HTML rewriting for wisp basePath-scoped serving 7 + export { isHtmlContent, rewriteHtmlPaths } from './html-rewriter' 6 8 // Manifest creation 7 9 export { createManifest } from './manifest' 8 10 export { normalizePath, sanitizePath } from './path'