Bluesky app fork with some witchin' additions 💫
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

at cope-settings-sync 227 lines 7.5 kB view raw
1/** 2 * Witchsky Storage Manifest — codec 3 * 4 * Encodes arbitrary JSON as a thread of draft-post text segments. 5 * The first segment is a plaintext manifest header; subsequent segments 6 * contain the payload encoded as gzip+u15 (15-bit Unicode codepoints 7 * starting at U+3400, making the data look like CJK Unified Ideographs). 8 * 9 * Manifest format (one field per line): 10 * witchsky:storage:v1 11 * Do not change me! These are your Witchsky settings. 12 * updatedAt=<ISO8601> 13 * codec=gzip+u15 14 * overflowSegments=<N> 15 * bytes=<N> 16 * sha256=<hex> 17 * manifestHash=<hex> 18 * 19 * manifestHash is sha256 of all lines above it joined by '\n', so the 20 * manifest is self-authenticating. 21 */ 22 23import {gzip, inflate} from 'pako' 24import {sha256} from '@noble/hashes/sha256' 25 26// --------------------------------------------------------------------------- 27// Constants 28// --------------------------------------------------------------------------- 29 30const BASE = 0x3400 31const BITS_PER_CHAR = 15 32const SEGMENT_MAX_GRAPHEMES = 1000 33 34// --------------------------------------------------------------------------- 35// Helpers 36// --------------------------------------------------------------------------- 37 38function toHex(bytes: Uint8Array): string { 39 return Array.from(bytes) 40 .map(b => b.toString(16).padStart(2, '0')) 41 .join('') 42} 43 44// --------------------------------------------------------------------------- 45// u15 codec (reference implementation from spec) 46// --------------------------------------------------------------------------- 47 48function u15Encode(data: Uint8Array): string { 49 const bits: number[] = [] 50 for (const byte of data) { 51 for (let i = 7; i >= 0; i--) bits.push((byte >> i) & 1) 52 } 53 while (bits.length % BITS_PER_CHAR !== 0) bits.push(0) 54 let result = '' 55 for (let i = 0; i < bits.length; i += BITS_PER_CHAR) { 56 let val = 0 57 for (let j = 0; j < BITS_PER_CHAR; j++) val = (val << 1) | bits[i + j] 58 result += String.fromCodePoint(val + BASE) 59 } 60 return result 61} 62 63function u15Decode(encoded: string): Uint8Array { 64 const bits: number[] = [] 65 // for…of correctly handles Unicode codepoints (no broken surrogates) 66 for (const char of encoded) { 67 const val = char.codePointAt(0)! - BASE 68 for (let i = BITS_PER_CHAR - 1; i >= 0; i--) bits.push((val >> i) & 1) 69 } 70 const data = new Uint8Array(Math.floor(bits.length / 8)) 71 for (let i = 0; i < data.length; i++) { 72 let byte = 0 73 for (let j = 0; j < 8; j++) byte = (byte << 1) | bits[i * 8 + j] 74 data[i] = byte 75 } 76 return data 77} 78 79// --------------------------------------------------------------------------- 80// Public API 81// --------------------------------------------------------------------------- 82 83const MANIFEST_COMMENT = 'Do not change me! These are your Witchsky settings.' 84 85/** 86 * Encode an arbitrary value to an array of draft-post text segments. 87 * segments[0] is the manifest; segments[1..] are u15-encoded data chunks, 88 * each at most SEGMENT_MAX_GRAPHEMES characters. 89 */ 90export function encode(data: unknown): string[] { 91 const json = JSON.stringify(data) 92 const compressed = gzip(new TextEncoder().encode(json)) 93 const compressedHash = toHex(sha256(compressed)) 94 95 const encoded = u15Encode(compressed) 96 97 // All codepoints are in U+3400–U+4DBF (CJK Extension A), no surrogates, 98 // so string .length === grapheme count. Safe to slice by index. 99 const dataSegments: string[] = [] 100 for (let i = 0; i < encoded.length; i += SEGMENT_MAX_GRAPHEMES) { 101 dataSegments.push(encoded.slice(i, i + SEGMENT_MAX_GRAPHEMES)) 102 } 103 // Edge case: empty payload produces a single empty segment; omit it so 104 // overflowSegments can be 0 and still round-trip through decode. 105 if (dataSegments.length === 1 && dataSegments[0] === '') { 106 dataSegments.length = 0 107 } 108 109 // Build manifest without manifestHash, then hash it. 110 // Each field is on its own line; line 2 is a human-readable comment. 111 const partial = [ 112 'witchsky:storage:v1', 113 MANIFEST_COMMENT, 114 `updatedAt=${new Date().toISOString()}`, 115 `codec=gzip+u15`, 116 `overflowSegments=${dataSegments.length}`, 117 `bytes=${compressed.length}`, 118 `sha256=${compressedHash}`, 119 ].join('\n') 120 const manifestHash = toHex(sha256(new TextEncoder().encode(partial))) 121 const manifest = `${partial}\nmanifestHash=${manifestHash}` 122 123 return [manifest, ...dataSegments] 124} 125 126/** 127 * Decode an array of draft-post text segments back to the original value. 128 * Throws a descriptive Error for any validation failure. 129 * Validation order: manifestHash → segment count → bytes → sha256 → decompress → parse 130 */ 131export function decode(segments: string[]): unknown { 132 if (segments.length === 0) { 133 throw new Error('storage-manifest: no segments') 134 } 135 136 const manifestText = segments[0] 137 const lines = manifestText.split('\n') 138 139 if (lines[0] !== 'witchsky:storage:v1') { 140 throw new Error('storage-manifest: invalid manifest prefix') 141 } 142 143 // Last line must be the manifestHash 144 const lastLine = lines[lines.length - 1] 145 const hashLineMatch = lastLine.match(/^manifestHash=([0-9a-f]+)$/) 146 if (!hashLineMatch) { 147 throw new Error('storage-manifest: missing manifestHash field') 148 } 149 const manifestHashField = hashLineMatch[1] 150 151 // partial = everything except the last line 152 const partial = lines.slice(0, -1).join('\n') 153 154 // 1. Verify manifestHash 155 const expectedManifestHash = toHex( 156 sha256(new TextEncoder().encode(partial)), 157 ) 158 if (expectedManifestHash !== manifestHashField) { 159 throw new Error('storage-manifest: manifestHash mismatch') 160 } 161 162 // Parse key=value fields from lines 2.. (line 0 = header, line 1 = comment) 163 const fields: Record<string, string> = {} 164 for (const line of lines.slice(2, -1)) { 165 const eq = line.indexOf('=') 166 if (eq !== -1) fields[line.slice(0, eq)] = line.slice(eq + 1) 167 } 168 169 // 2. Codec check 170 if (fields.codec !== 'gzip+u15') { 171 throw new Error(`storage-manifest: unknown codec "${fields.codec}"`) 172 } 173 174 const overflowSegments = parseInt(fields.overflowSegments, 10) 175 const bytes = parseInt(fields.bytes, 10) 176 const sha256Hex = fields.sha256 177 178 // 3. Segment count 179 if (segments.length - 1 !== overflowSegments) { 180 throw new Error( 181 `storage-manifest: expected ${overflowSegments} data segments, got ${segments.length - 1}`, 182 ) 183 } 184 185 // 4. Decode u15 → compressed bytes 186 const encoded = segments.slice(1).join('') 187 const decoded = u15Decode(encoded) 188 189 // 5. bytes length check 190 if (decoded.length < bytes) { 191 throw new Error( 192 `storage-manifest: decoded length ${decoded.length} is less than declared bytes ${bytes}`, 193 ) 194 } 195 196 // Trim any padding byte that u15 decoding may have appended 197 const compressed = decoded.length === bytes ? decoded : decoded.subarray(0, bytes) 198 199 // 6. sha256 check 200 const actualHash = toHex(sha256(compressed)) 201 if (actualHash !== sha256Hex) { 202 throw new Error('storage-manifest: sha256 mismatch') 203 } 204 205 // 7. Decompress 206 let jsonBytes: Uint8Array 207 try { 208 jsonBytes = inflate(compressed) 209 } catch (e) { 210 throw new Error(`storage-manifest: decompression failed: ${e}`) 211 } 212 213 // 8. Parse 214 try { 215 return JSON.parse(new TextDecoder().decode(jsonBytes)) 216 } catch (e) { 217 throw new Error(`storage-manifest: JSON parse failed: ${e}`) 218 } 219} 220 221/** 222 * Return true if the given text looks like a witchsky storage manifest header. 223 * Used to identify the storage draft among all of a user's drafts. 224 */ 225export function isManifestSegment(text: string): boolean { 226 return text.startsWith('witchsky:storage:v1\n') 227}