Bluesky app fork with some witchin' additions 💫
1/**
2 * Witchsky Storage Manifest — codec
3 *
4 * Encodes arbitrary JSON as a thread of draft-post text segments.
5 * The first segment is a plaintext manifest header; subsequent segments
6 * contain the payload encoded as gzip+u15 (15-bit Unicode codepoints
7 * starting at U+3400, making the data look like CJK Unified Ideographs).
8 *
9 * Manifest format (one field per line):
10 * witchsky:storage:v1
11 * Do not change me! These are your Witchsky settings.
12 * updatedAt=<ISO8601>
13 * codec=gzip+u15
14 * overflowSegments=<N>
15 * bytes=<N>
16 * sha256=<hex>
17 * manifestHash=<hex>
18 *
19 * manifestHash is sha256 of all lines above it joined by '\n', so the
20 * manifest is self-authenticating.
21 */
22
23import {gzip, inflate} from 'pako'
24import {sha256} from '@noble/hashes/sha256'
25
26// ---------------------------------------------------------------------------
27// Constants
28// ---------------------------------------------------------------------------
29
30const BASE = 0x3400
31const BITS_PER_CHAR = 15
32const SEGMENT_MAX_GRAPHEMES = 1000
33
34// ---------------------------------------------------------------------------
35// Helpers
36// ---------------------------------------------------------------------------
37
38function toHex(bytes: Uint8Array): string {
39 return Array.from(bytes)
40 .map(b => b.toString(16).padStart(2, '0'))
41 .join('')
42}
43
44// ---------------------------------------------------------------------------
45// u15 codec (reference implementation from spec)
46// ---------------------------------------------------------------------------
47
48function u15Encode(data: Uint8Array): string {
49 const bits: number[] = []
50 for (const byte of data) {
51 for (let i = 7; i >= 0; i--) bits.push((byte >> i) & 1)
52 }
53 while (bits.length % BITS_PER_CHAR !== 0) bits.push(0)
54 let result = ''
55 for (let i = 0; i < bits.length; i += BITS_PER_CHAR) {
56 let val = 0
57 for (let j = 0; j < BITS_PER_CHAR; j++) val = (val << 1) | bits[i + j]
58 result += String.fromCodePoint(val + BASE)
59 }
60 return result
61}
62
63function u15Decode(encoded: string): Uint8Array {
64 const bits: number[] = []
65 // for…of correctly handles Unicode codepoints (no broken surrogates)
66 for (const char of encoded) {
67 const val = char.codePointAt(0)! - BASE
68 for (let i = BITS_PER_CHAR - 1; i >= 0; i--) bits.push((val >> i) & 1)
69 }
70 const data = new Uint8Array(Math.floor(bits.length / 8))
71 for (let i = 0; i < data.length; i++) {
72 let byte = 0
73 for (let j = 0; j < 8; j++) byte = (byte << 1) | bits[i * 8 + j]
74 data[i] = byte
75 }
76 return data
77}
78
79// ---------------------------------------------------------------------------
80// Public API
81// ---------------------------------------------------------------------------
82
83const MANIFEST_COMMENT = 'Do not change me! These are your Witchsky settings.'
84
85/**
86 * Encode an arbitrary value to an array of draft-post text segments.
87 * segments[0] is the manifest; segments[1..] are u15-encoded data chunks,
88 * each at most SEGMENT_MAX_GRAPHEMES characters.
89 */
90export function encode(data: unknown): string[] {
91 const json = JSON.stringify(data)
92 const compressed = gzip(new TextEncoder().encode(json))
93 const compressedHash = toHex(sha256(compressed))
94
95 const encoded = u15Encode(compressed)
96
97 // All codepoints are in U+3400–U+4DBF (CJK Extension A), no surrogates,
98 // so string .length === grapheme count. Safe to slice by index.
99 const dataSegments: string[] = []
100 for (let i = 0; i < encoded.length; i += SEGMENT_MAX_GRAPHEMES) {
101 dataSegments.push(encoded.slice(i, i + SEGMENT_MAX_GRAPHEMES))
102 }
103 // Edge case: empty payload produces a single empty segment; omit it so
104 // overflowSegments can be 0 and still round-trip through decode.
105 if (dataSegments.length === 1 && dataSegments[0] === '') {
106 dataSegments.length = 0
107 }
108
109 // Build manifest without manifestHash, then hash it.
110 // Each field is on its own line; line 2 is a human-readable comment.
111 const partial = [
112 'witchsky:storage:v1',
113 MANIFEST_COMMENT,
114 `updatedAt=${new Date().toISOString()}`,
115 `codec=gzip+u15`,
116 `overflowSegments=${dataSegments.length}`,
117 `bytes=${compressed.length}`,
118 `sha256=${compressedHash}`,
119 ].join('\n')
120 const manifestHash = toHex(sha256(new TextEncoder().encode(partial)))
121 const manifest = `${partial}\nmanifestHash=${manifestHash}`
122
123 return [manifest, ...dataSegments]
124}
125
126/**
127 * Decode an array of draft-post text segments back to the original value.
128 * Throws a descriptive Error for any validation failure.
129 * Validation order: manifestHash → segment count → bytes → sha256 → decompress → parse
130 */
131export function decode(segments: string[]): unknown {
132 if (segments.length === 0) {
133 throw new Error('storage-manifest: no segments')
134 }
135
136 const manifestText = segments[0]
137 const lines = manifestText.split('\n')
138
139 if (lines[0] !== 'witchsky:storage:v1') {
140 throw new Error('storage-manifest: invalid manifest prefix')
141 }
142
143 // Last line must be the manifestHash
144 const lastLine = lines[lines.length - 1]
145 const hashLineMatch = lastLine.match(/^manifestHash=([0-9a-f]+)$/)
146 if (!hashLineMatch) {
147 throw new Error('storage-manifest: missing manifestHash field')
148 }
149 const manifestHashField = hashLineMatch[1]
150
151 // partial = everything except the last line
152 const partial = lines.slice(0, -1).join('\n')
153
154 // 1. Verify manifestHash
155 const expectedManifestHash = toHex(
156 sha256(new TextEncoder().encode(partial)),
157 )
158 if (expectedManifestHash !== manifestHashField) {
159 throw new Error('storage-manifest: manifestHash mismatch')
160 }
161
162 // Parse key=value fields from lines 2.. (line 0 = header, line 1 = comment)
163 const fields: Record<string, string> = {}
164 for (const line of lines.slice(2, -1)) {
165 const eq = line.indexOf('=')
166 if (eq !== -1) fields[line.slice(0, eq)] = line.slice(eq + 1)
167 }
168
169 // 2. Codec check
170 if (fields.codec !== 'gzip+u15') {
171 throw new Error(`storage-manifest: unknown codec "${fields.codec}"`)
172 }
173
174 const overflowSegments = parseInt(fields.overflowSegments, 10)
175 const bytes = parseInt(fields.bytes, 10)
176 const sha256Hex = fields.sha256
177
178 // 3. Segment count
179 if (segments.length - 1 !== overflowSegments) {
180 throw new Error(
181 `storage-manifest: expected ${overflowSegments} data segments, got ${segments.length - 1}`,
182 )
183 }
184
185 // 4. Decode u15 → compressed bytes
186 const encoded = segments.slice(1).join('')
187 const decoded = u15Decode(encoded)
188
189 // 5. bytes length check
190 if (decoded.length < bytes) {
191 throw new Error(
192 `storage-manifest: decoded length ${decoded.length} is less than declared bytes ${bytes}`,
193 )
194 }
195
196 // Trim any padding byte that u15 decoding may have appended
197 const compressed = decoded.length === bytes ? decoded : decoded.subarray(0, bytes)
198
199 // 6. sha256 check
200 const actualHash = toHex(sha256(compressed))
201 if (actualHash !== sha256Hex) {
202 throw new Error('storage-manifest: sha256 mismatch')
203 }
204
205 // 7. Decompress
206 let jsonBytes: Uint8Array
207 try {
208 jsonBytes = inflate(compressed)
209 } catch (e) {
210 throw new Error(`storage-manifest: decompression failed: ${e}`)
211 }
212
213 // 8. Parse
214 try {
215 return JSON.parse(new TextDecoder().decode(jsonBytes))
216 } catch (e) {
217 throw new Error(`storage-manifest: JSON parse failed: ${e}`)
218 }
219}
220
221/**
222 * Return true if the given text looks like a witchsky storage manifest header.
223 * Used to identify the storage draft among all of a user's drafts.
224 */
225export function isManifestSegment(text: string): boolean {
226 return text.startsWith('witchsky:storage:v1\n')
227}