atproto utils for zig
zat.dev
atproto
sdk
zig
1//! DAG-CBOR codec
2//!
3//! encode and decode the DAG-CBOR subset used by AT Protocol.
4//! handles: integers, byte/text strings, arrays, maps, tag 42 (CID links),
5//! booleans, null. no floats, no indefinite lengths.
6//!
7//! encoding follows DAG-CBOR deterministic rules:
8//! - integers use shortest encoding
9//! - map keys sorted by byte length, then lexicographically
10//! - CIDs encoded as tag 42 with 0x00 identity multibase prefix
11//!
12//! see: https://ipld.io/specs/codecs/dag-cbor/spec/
13
14const std = @import("std");
15const Allocator = std.mem.Allocator;
16
17/// CBOR major types (high 3 bits of initial byte)
18const MajorType = enum(u3) {
19 unsigned = 0,
20 negative = 1,
21 byte_string = 2,
22 text_string = 3,
23 array = 4,
24 map = 5,
25 tag = 6,
26 simple = 7,
27};
28
29/// decoded CBOR value
30pub const Value = union(enum) {
31 unsigned: u64,
32 negative: i64, // stored as -(1 + raw), so -1 is stored as -1
33 bytes: []const u8,
34 text: []const u8,
35 array: []const Value,
36 map: []const MapEntry,
37 boolean: bool,
38 null,
39 cid: Cid,
40
41 pub const MapEntry = struct {
42 key: []const u8, // DAG-CBOR: keys are always text strings
43 value: Value,
44 };
45
46 /// look up a key in a map value
47 pub fn get(self: Value, key: []const u8) ?Value {
48 return switch (self) {
49 .map => |entries| {
50 for (entries) |entry| {
51 if (std.mem.eql(u8, entry.key, key)) return entry.value;
52 }
53 return null;
54 },
55 else => null,
56 };
57 }
58
59 /// get a text string from a map by key
60 pub fn getString(self: Value, key: []const u8) ?[]const u8 {
61 const v = self.get(key) orelse return null;
62 return switch (v) {
63 .text => |s| s,
64 else => null,
65 };
66 }
67
68 /// get an integer from a map by key
69 pub fn getInt(self: Value, key: []const u8) ?i64 {
70 const v = self.get(key) orelse return null;
71 return switch (v) {
72 .unsigned => |u| std.math.cast(i64, u),
73 .negative => |n| n,
74 else => null,
75 };
76 }
77
78 /// get an unsigned integer from a map by key
79 pub fn getUint(self: Value, key: []const u8) ?u64 {
80 const v = self.get(key) orelse return null;
81 return switch (v) {
82 .unsigned => |u| u,
83 .negative => |n| std.math.cast(u64, n),
84 else => null,
85 };
86 }
87
88 /// get a bool from a map by key
89 pub fn getBool(self: Value, key: []const u8) ?bool {
90 const v = self.get(key) orelse return null;
91 return switch (v) {
92 .boolean => |b| b,
93 else => null,
94 };
95 }
96
97 /// get a byte string from a map by key
98 pub fn getBytes(self: Value, key: []const u8) ?[]const u8 {
99 const v = self.get(key) orelse return null;
100 return switch (v) {
101 .bytes => |b| b,
102 else => null,
103 };
104 }
105
106 /// get an array from a map by key
107 pub fn getArray(self: Value, key: []const u8) ?[]const Value {
108 const v = self.get(key) orelse return null;
109 return switch (v) {
110 .array => |a| a,
111 else => null,
112 };
113 }
114
115 /// get a CID from a map by key
116 pub fn getCid(self: Value, key: []const u8) ?Cid {
117 const v = self.get(key) orelse return null;
118 return switch (v) {
119 .cid => |c| c,
120 else => null,
121 };
122 }
123
124 // verify the Value union stayed slim after Cid optimization (was ~64, now 24)
125 comptime {
126 std.debug.assert(@sizeOf(Value) == 24);
127 std.debug.assert(@sizeOf(MapEntry) == 40);
128 }
129};
130
131/// well-known multicodec values
132pub const Codec = struct {
133 pub const dag_cbor: u64 = 0x71;
134 pub const dag_pb: u64 = 0x70;
135 pub const raw: u64 = 0x55;
136};
137
138/// well-known multihash function codes
139pub const HashFn = struct {
140 pub const sha2_256: u64 = 0x12;
141 pub const identity: u64 = 0x00;
142};
143
144/// CID (Content Identifier) parsed from tag 42.
145/// stores only the raw bytes — version/codec/hash_fn/digest are parsed lazily on demand.
146/// this keeps the struct at 16 bytes (1 slice) instead of 56 bytes, which shrinks
147/// the Value union from ~64 to ~24 bytes.
148pub const Cid = struct {
149 raw: []const u8,
150
151 /// parse CID version from raw bytes (0 for CIDv0, 1+ for CIDv1)
152 pub fn version(self: Cid) ?u64 {
153 if (self.raw.len < 2) return null;
154 // CIDv0: starts with 0x12 0x20 (sha2-256 multihash)
155 if (self.raw[0] == 0x12 and self.raw[1] == 0x20) return 0;
156 var pos: usize = 0;
157 return readUvarint(self.raw, &pos);
158 }
159
160 /// parse codec from raw bytes (implicit dag-pb for CIDv0)
161 pub fn codec(self: Cid) ?u64 {
162 if (self.raw.len < 2) return null;
163 if (self.raw[0] == 0x12 and self.raw[1] == 0x20) return 0x70; // dag-pb
164 var pos: usize = 0;
165 _ = readUvarint(self.raw, &pos) orelse return null; // version
166 return readUvarint(self.raw, &pos);
167 }
168
169 /// parse hash function code from raw bytes
170 pub fn hashFn(self: Cid) ?u64 {
171 if (self.raw.len < 2) return null;
172 if (self.raw[0] == 0x12 and self.raw[1] == 0x20) return 0x12; // sha2-256
173 var pos: usize = 0;
174 _ = readUvarint(self.raw, &pos) orelse return null; // version
175 _ = readUvarint(self.raw, &pos) orelse return null; // codec
176 return readUvarint(self.raw, &pos);
177 }
178
179 /// parse digest bytes from raw CID
180 pub fn digest(self: Cid) ?[]const u8 {
181 if (self.raw.len < 2) return null;
182 if (self.raw[0] == 0x12 and self.raw[1] == 0x20) {
183 if (self.raw.len < 34) return null;
184 return self.raw[2..34];
185 }
186 var pos: usize = 0;
187 _ = readUvarint(self.raw, &pos) orelse return null; // version
188 _ = readUvarint(self.raw, &pos) orelse return null; // codec
189 _ = readUvarint(self.raw, &pos) orelse return null; // hash_fn
190 const digest_len = readUvarint(self.raw, &pos) orelse return null;
191 if (pos + digest_len > self.raw.len) return null;
192 return self.raw[pos..][0..digest_len];
193 }
194
195 /// create a CIDv1 by hashing DAG-CBOR encoded data with SHA-256.
196 /// the returned Cid's raw slice is owned by the allocator.
197 pub fn forDagCbor(allocator: Allocator, data: []const u8) !Cid {
198 return create(allocator, 1, Codec.dag_cbor, HashFn.sha2_256, data);
199 }
200
201 /// create a CIDv1 with the given codec by hashing data with SHA-256.
202 pub fn create(allocator: Allocator, ver: u64, cod: u64, hash_fn_code: u64, data: []const u8) !Cid {
203 const Sha256 = std.crypto.hash.sha2.Sha256;
204 var hash: [Sha256.digest_length]u8 = undefined;
205 Sha256.hash(data, &hash, .{});
206
207 // build CID on the stack then copy to allocator — avoids dynamic writer
208 // overhead. max varint size is 10 bytes × 4 fields + 32 byte hash = 72 bytes.
209 var buf: [72]u8 = undefined;
210 var w: std.Io.Writer = .fixed(&buf);
211 writeUvarint(&w, ver) catch unreachable;
212 writeUvarint(&w, cod) catch unreachable;
213 writeUvarint(&w, hash_fn_code) catch unreachable;
214 writeUvarint(&w, Sha256.digest_length) catch unreachable;
215 w.writeAll(&hash) catch unreachable;
216
217 const raw = try allocator.dupe(u8, w.buffered());
218 return .{ .raw = raw };
219 }
220
221 /// serialize this CID to raw bytes (version varint + codec varint + multihash)
222 pub fn toBytes(self: Cid, allocator: Allocator) ![]u8 {
223 return try allocator.dupe(u8, self.raw);
224 }
225};
226
227pub const DecodeError = error{
228 UnexpectedEof,
229 IndefiniteLength,
230 UnsupportedSimpleValue,
231 UnsupportedFloat,
232 InvalidMapKey,
233 InvalidCid,
234 ReservedAdditionalInfo,
235 Overflow,
236 OutOfMemory,
237 NonMinimalEncoding,
238 TrailingBytes,
239 UnsupportedTag,
240 UnsortedMapKeys,
241 DuplicateMapKey,
242 InvalidUtf8,
243 MaxDepthExceeded,
244 WrongType,
245};
246
247/// maximum nesting depth for arrays/maps to prevent stack overflow
248pub const max_depth: usize = 128;
249
250/// decode a single CBOR value from the front of `data`.
251/// returns the value and the number of bytes consumed.
252pub fn decode(allocator: Allocator, data: []const u8) DecodeError!struct { value: Value, consumed: usize } {
253 var pos: usize = 0;
254 const value = try decodeAt(allocator, data, &pos, 0);
255 return .{ .value = value, .consumed = pos };
256}
257
258/// decode all bytes as a single CBOR value, rejecting trailing bytes
259pub fn decodeAll(allocator: Allocator, data: []const u8) DecodeError!Value {
260 var pos: usize = 0;
261 const value = try decodeAt(allocator, data, &pos, 0);
262 if (pos != data.len) return error.TrailingBytes;
263 return value;
264}
265
266fn decodeAt(allocator: Allocator, data: []const u8, pos: *usize, depth: usize) DecodeError!Value {
267 if (pos.* >= data.len) return error.UnexpectedEof;
268 const initial = data[pos.*];
269 const major: u3 = @truncate(initial >> 5);
270 const additional: u5 = @truncate(initial);
271
272 // simple values (major 7) are handled without readArg since floats
273 // use additional 25/26/27 to mean float16/32/64, not integer arguments
274 if (major == 7) {
275 pos.* += 1;
276 return switch (additional) {
277 20 => .{ .boolean = false },
278 21 => .{ .boolean = true },
279 22 => .null,
280 25, 26, 27 => error.UnsupportedFloat, // DAG-CBOR forbids floats in AT Protocol
281 31 => error.IndefiniteLength, // break code — DAG-CBOR forbids indefinite lengths
282 else => error.UnsupportedSimpleValue,
283 };
284 }
285
286 const arg = try readArg(data, pos.*);
287 pos.* = arg.end;
288
289 return switch (@as(MajorType, @enumFromInt(major))) {
290 .unsigned => .{ .unsigned = arg.val },
291 .negative => blk: {
292 // negative CBOR: value is -1 - val
293 if (arg.val > std.math.maxInt(i64)) return error.Overflow;
294 break :blk .{ .negative = -1 - @as(i64, @intCast(arg.val)) };
295 },
296 .byte_string => blk: {
297 const len = std.math.cast(usize, arg.val) orelse return error.UnexpectedEof;
298 const end = std.math.add(usize, pos.*, len) catch return error.UnexpectedEof;
299 if (end > data.len) return error.UnexpectedEof;
300 const bytes = data[pos.*..end];
301 pos.* = end;
302 break :blk .{ .bytes = bytes };
303 },
304 .text_string => blk: {
305 const len = std.math.cast(usize, arg.val) orelse return error.UnexpectedEof;
306 const end = std.math.add(usize, pos.*, len) catch return error.UnexpectedEof;
307 if (end > data.len) return error.UnexpectedEof;
308 const text = data[pos.*..end];
309 if (!std.unicode.utf8ValidateSlice(text)) return error.InvalidUtf8;
310 pos.* = end;
311 break :blk .{ .text = text };
312 },
313 .array => blk: {
314 if (depth >= max_depth) return error.MaxDepthExceeded;
315 // sanity check: each element is at least 1 byte
316 if (arg.val > data.len - pos.*) return error.UnexpectedEof;
317 const items = try allocator.alloc(Value, @intCast(arg.val));
318 errdefer allocator.free(items);
319 for (items) |*item| {
320 item.* = try decodeAt(allocator, data, pos, depth + 1);
321 }
322 break :blk .{ .array = items };
323 },
324 .map => blk: {
325 if (depth >= max_depth) return error.MaxDepthExceeded;
326 // sanity check: each entry is at least 2 bytes (key + value)
327 if (arg.val > (data.len - pos.*) / 2) return error.UnexpectedEof;
328 const entries = try allocator.alloc(Value.MapEntry, @intCast(arg.val));
329 errdefer allocator.free(entries);
330 for (entries, 0..) |*entry, i| {
331 // DAG-CBOR: map keys must be text strings — inline read to avoid
332 // a full decodeAt + Value union construction per key
333 const key_arg = try readArg(data, pos.*);
334 pos.* = key_arg.end;
335 if (key_arg.major != 3) return error.InvalidMapKey;
336 const key_len = std.math.cast(usize, key_arg.val) orelse return error.UnexpectedEof;
337 const key_end = std.math.add(usize, pos.*, key_len) catch return error.UnexpectedEof;
338 if (key_end > data.len) return error.UnexpectedEof;
339 entry.key = data[pos.*..key_end];
340 if (!std.unicode.utf8ValidateSlice(entry.key)) return error.InvalidUtf8;
341 pos.* = key_end;
342
343 // DAG-CBOR: keys must be sorted (shorter first, then lex) and unique
344 if (i > 0) {
345 const prev = entries[i - 1].key;
346 if (prev.len < entry.key.len) {
347 // ok — shorter key first
348 } else if (prev.len == entry.key.len) {
349 switch (std.mem.order(u8, prev, entry.key)) {
350 .lt => {}, // ok — lex order
351 .eq => return error.DuplicateMapKey,
352 .gt => return error.UnsortedMapKeys,
353 }
354 } else {
355 return error.UnsortedMapKeys;
356 }
357 }
358
359 entry.value = try decodeAt(allocator, data, pos, depth + 1);
360 }
361 break :blk .{ .map = entries };
362 },
363 .tag => blk: {
364 if (arg.val != 42) return error.UnsupportedTag; // DAG-CBOR only allows tag 42 (CID)
365 // CID link — content is a byte string with 0x00 prefix
366 const content = try decodeAt(allocator, data, pos, depth);
367 const cid_bytes = switch (content) {
368 .bytes => |b| b,
369 else => return error.InvalidCid,
370 };
371 // CID byte string must have 0x00 identity multibase prefix + at least
372 // version byte + codec byte (minimum 3 bytes total)
373 if (cid_bytes.len < 3 or cid_bytes[0] != 0x00) return error.InvalidCid;
374 break :blk .{ .cid = .{ .raw = cid_bytes[1..] } }; // zero-cost: just reference the bytes
375 },
376 .simple => unreachable, // handled above
377 };
378}
379
380/// wrap raw CID bytes (after removing the 0x00 multibase prefix) into a Cid.
381/// does not validate the CID structure — call version()/codec()/digest() to parse lazily.
382pub fn parseCid(raw: []const u8) Cid {
383 return .{ .raw = raw };
384}
385
386/// read an unsigned varint (LEB128). rejects varints longer than 10 bytes
387/// and rejects overflow (10th byte must have value <= 1).
388pub fn readUvarint(data: []const u8, pos: *usize) ?u64 {
389 var result: u64 = 0;
390 var shift: u7 = 0;
391 for (0..10) |i| {
392 if (pos.* >= data.len) return null;
393 const byte = data[pos.*];
394 pos.* += 1;
395 // 10th byte (i=9, shift=63): only bit 0 can fit in u64
396 if (i == 9 and byte > 1) return null;
397 result |= @as(u64, byte & 0x7f) << @as(u6, @intCast(shift));
398 if (byte & 0x80 == 0) return result;
399 shift += 7;
400 }
401 return null; // varint too long
402}
403
404// === encoder ===
405
406pub const EncodeError = error{
407 OutOfMemory,
408};
409
410/// write the CBOR initial byte + argument using shortest encoding (DAG-CBOR requirement).
411/// batches all bytes into a single writeAll call to minimize writer dispatch overhead.
412fn writeArgument(writer: anytype, major: u3, val: u64) !void {
413 const prefix: u8 = @as(u8, major) << 5;
414 if (val < 24) {
415 try writer.writeAll(&.{prefix | @as(u8, @intCast(val))});
416 } else if (val <= 0xff) {
417 try writer.writeAll(&.{ prefix | 24, @as(u8, @intCast(val)) });
418 } else if (val <= 0xffff) {
419 const v: u16 = @intCast(val);
420 try writer.writeAll(&.{ prefix | 25, @truncate(v >> 8), @truncate(v) });
421 } else if (val <= 0xffffffff) {
422 const v: u32 = @intCast(val);
423 try writer.writeAll(&.{
424 prefix | 26,
425 @truncate(v >> 24),
426 @truncate(v >> 16),
427 @truncate(v >> 8),
428 @truncate(v),
429 });
430 } else {
431 try writer.writeAll(&.{
432 prefix | 27,
433 @truncate(val >> 56),
434 @truncate(val >> 48),
435 @truncate(val >> 40),
436 @truncate(val >> 32),
437 @truncate(val >> 24),
438 @truncate(val >> 16),
439 @truncate(val >> 8),
440 @truncate(val),
441 });
442 }
443}
444
445/// check if map entries are already in DAG-CBOR key order
446fn keysAlreadySorted(entries: []const Value.MapEntry) bool {
447 if (entries.len <= 1) return true;
448 var prev = entries[0].key;
449 for (entries[1..]) |entry| {
450 if (prev.len > entry.key.len) return false;
451 if (prev.len == entry.key.len and std.mem.order(u8, prev, entry.key) != .lt) return false;
452 prev = entry.key;
453 }
454 return true;
455}
456
457/// DAG-CBOR map key ordering: shorter keys first, then lexicographic
458fn dagCborKeyLessThan(_: void, a: Value.MapEntry, b: Value.MapEntry) bool {
459 if (a.key.len != b.key.len) return a.key.len < b.key.len;
460 return std.mem.order(u8, a.key, b.key) == .lt;
461}
462
463/// write a short text string (< 24 bytes) as a single fused write.
464/// this is the hot path for map keys in AT Protocol records, where keys
465/// are always short ASCII strings. fusing header+payload into one writeAll
466/// halves the writer dispatch count.
467fn writeShortText(writer: anytype, text: []const u8) !void {
468 if (text.len < 24) {
469 var buf: [24]u8 = undefined;
470 buf[0] = 0x60 | @as(u8, @intCast(text.len));
471 @memcpy(buf[1..][0..text.len], text);
472 try writer.writeAll(buf[0 .. 1 + text.len]);
473 } else {
474 try writeArgument(writer, 3, text.len);
475 try writer.writeAll(text);
476 }
477}
478
479/// encode a Value to the given writer in DAG-CBOR format.
480/// allocator is needed for sorting map keys during encoding.
481pub fn encode(allocator: Allocator, writer: anytype, value: Value) !void {
482 switch (value) {
483 .unsigned => |v| try writeArgument(writer, 0, v),
484 .negative => |v| {
485 // CBOR negative: -1 - n encoded in major type 1
486 const raw: u64 = @intCast(-1 - v);
487 try writeArgument(writer, 1, raw);
488 },
489 .bytes => |b| {
490 try writeArgument(writer, 2, b.len);
491 try writer.writeAll(b);
492 },
493 .text => |t| try writeShortText(writer, t),
494 .array => |items| {
495 try writeArgument(writer, 4, items.len);
496 for (items) |item| {
497 try encode(allocator, writer, item);
498 }
499 },
500 .map => |entries| {
501 try writeArgument(writer, 5, entries.len);
502 // DAG-CBOR: keys sorted by byte length, then lexicographically.
503 // three paths: already sorted (common for decoded data), stack sort
504 // for small maps (≤16 entries, covers all AT Protocol records), or
505 // heap sort for rare large maps.
506 if (keysAlreadySorted(entries)) {
507 for (entries) |entry| {
508 try writeShortText(writer, entry.key);
509 try encode(allocator, writer, entry.value);
510 }
511 } else if (entries.len <= 16) {
512 var buf: [16]Value.MapEntry = undefined;
513 const sorted = buf[0..entries.len];
514 @memcpy(sorted, entries);
515 std.mem.sort(Value.MapEntry, sorted, {}, dagCborKeyLessThan);
516 for (sorted) |entry| {
517 try writeShortText(writer, entry.key);
518 try encode(allocator, writer, entry.value);
519 }
520 } else {
521 const sorted = try allocator.dupe(Value.MapEntry, entries);
522 defer allocator.free(sorted);
523 std.mem.sort(Value.MapEntry, sorted, {}, dagCborKeyLessThan);
524 for (sorted) |entry| {
525 try writeShortText(writer, entry.key);
526 try encode(allocator, writer, entry.value);
527 }
528 }
529 },
530 .boolean => |b| try writer.writeByte(if (b) @as(u8, 0xf5) else @as(u8, 0xf4)),
531 .null => try writer.writeByte(0xf6),
532 .cid => |c| {
533 // tag 42 + byte string with 0x00 identity multibase prefix + raw CID bytes
534 try writeArgument(writer, 6, 42);
535 try writeArgument(writer, 2, 1 + c.raw.len);
536 try writer.writeByte(0x00);
537 try writer.writeAll(c.raw);
538 },
539 }
540}
541
542/// encode a Value to a freshly allocated byte slice
543pub fn encodeAlloc(allocator: Allocator, value: Value) ![]u8 {
544 var aw: std.Io.Writer.Allocating = .init(allocator);
545 errdefer aw.deinit();
546 try encode(allocator, &aw.writer, value);
547 return try aw.toOwnedSlice();
548}
549
550/// write an unsigned varint (LEB128) — used for CID and CAR serialization
551pub fn writeUvarint(writer: anytype, val: u64) !void {
552 var v = val;
553 while (v >= 0x80) {
554 try writer.writeByte(@as(u8, @truncate(v)) | 0x80);
555 v >>= 7;
556 }
557 try writer.writeByte(@as(u8, @truncate(v)));
558}
559
560/// Result of reading a CBOR initial byte and its argument.
561pub const Arg = struct {
562 major: u3,
563 val: u64,
564 end: usize,
565};
566
567/// Read a CBOR initial byte at `pos`, parse the argument value from
568/// additional info + following bytes, and return the major type (high 3 bits),
569/// argument value, and position after the header.
570///
571/// Validates shortest-form encoding (DAG-CBOR requirement).
572/// This is the public, value-semantics equivalent of the internal `readArgument`.
573pub fn readArg(data: []const u8, pos: usize) DecodeError!Arg {
574 if (pos >= data.len) return error.UnexpectedEof;
575 const initial = data[pos];
576 const major: u3 = @truncate(initial >> 5);
577 const additional: u5 = @truncate(initial);
578 var cur = pos + 1;
579 const val: u64 = switch (additional) {
580 0...23 => @as(u64, additional),
581 24 => blk: { // 1-byte
582 if (cur >= data.len) return error.UnexpectedEof;
583 const v = data[cur];
584 cur += 1;
585 if (v < 24) return error.NonMinimalEncoding;
586 break :blk @as(u64, v);
587 },
588 25 => blk: { // 2-byte big-endian
589 if (cur + 2 > data.len) return error.UnexpectedEof;
590 const v = std.mem.readInt(u16, data[cur..][0..2], .big);
591 cur += 2;
592 if (v <= 0xff) return error.NonMinimalEncoding;
593 break :blk @as(u64, v);
594 },
595 26 => blk: { // 4-byte big-endian
596 if (cur + 4 > data.len) return error.UnexpectedEof;
597 const v = std.mem.readInt(u32, data[cur..][0..4], .big);
598 cur += 4;
599 if (v <= 0xffff) return error.NonMinimalEncoding;
600 break :blk @as(u64, v);
601 },
602 27 => blk: { // 8-byte big-endian
603 if (cur + 8 > data.len) return error.UnexpectedEof;
604 const v = std.mem.readInt(u64, data[cur..][0..8], .big);
605 cur += 8;
606 if (v <= 0xffffffff) return error.NonMinimalEncoding;
607 break :blk v;
608 },
609 28, 29, 30 => return error.ReservedAdditionalInfo,
610 31 => return error.IndefiniteLength,
611 };
612 return .{ .major = major, .val = val, .end = cur };
613}
614
615// ---------------------------------------------------------------------------
616// Type-specific readers — zero-copy, no allocator needed
617// ---------------------------------------------------------------------------
618
619pub const SliceResult = struct { val: []const u8, end: usize };
620pub const U64Result = struct { val: u64, end: usize };
621pub const I64Result = struct { val: i64, end: usize };
622pub const BoolResult = struct { val: bool, end: usize };
623
624/// Read a CBOR text string (major type 3) at `pos`.
625/// Validates UTF-8. Returns a zero-copy slice into `data`.
626pub fn readText(data: []const u8, pos: usize) DecodeError!SliceResult {
627 const arg = try readArg(data, pos);
628 if (arg.major != 3) return error.WrongType;
629 const len = std.math.cast(usize, arg.val) orelse return error.UnexpectedEof;
630 const end = std.math.add(usize, arg.end, len) catch return error.UnexpectedEof;
631 if (end > data.len) return error.UnexpectedEof;
632 const text = data[arg.end..end];
633 if (!std.unicode.utf8ValidateSlice(text)) return error.InvalidUtf8;
634 return .{ .val = text, .end = end };
635}
636
637/// Read a CBOR byte string (major type 2) at `pos`.
638/// Returns a zero-copy slice into `data`.
639pub fn readBytes(data: []const u8, pos: usize) DecodeError!SliceResult {
640 const arg = try readArg(data, pos);
641 if (arg.major != 2) return error.WrongType;
642 const len = std.math.cast(usize, arg.val) orelse return error.UnexpectedEof;
643 const end = std.math.add(usize, arg.end, len) catch return error.UnexpectedEof;
644 if (end > data.len) return error.UnexpectedEof;
645 return .{ .val = data[arg.end..end], .end = end };
646}
647
648/// Read a CBOR unsigned integer (major type 0) at `pos`.
649pub fn readUint(data: []const u8, pos: usize) DecodeError!U64Result {
650 const arg = try readArg(data, pos);
651 if (arg.major != 0) return error.WrongType;
652 return .{ .val = arg.val, .end = arg.end };
653}
654
655/// Read a CBOR integer (major type 0 or 1) at `pos`.
656/// Major 0 = positive, major 1 = negative (-1 - val).
657/// Returns error.Overflow if a positive value exceeds maxInt(i64).
658pub fn readInt(data: []const u8, pos: usize) DecodeError!I64Result {
659 const arg = try readArg(data, pos);
660 switch (arg.major) {
661 0 => {
662 if (arg.val > @as(u64, @intCast(std.math.maxInt(i64)))) return error.Overflow;
663 return .{ .val = @intCast(arg.val), .end = arg.end };
664 },
665 1 => {
666 // CBOR negative: -1 - val
667 // val can be 0..2^64-1, result is -1..-2^64
668 // i64 can hold down to -2^63, so max raw val is 2^63 - 1
669 if (arg.val > @as(u64, @intCast(std.math.maxInt(i64)))) return error.Overflow;
670 return .{ .val = -1 - @as(i64, @intCast(arg.val)), .end = arg.end };
671 },
672 else => return error.WrongType,
673 }
674}
675
676/// Read a CBOR boolean at `pos`.
677/// 0xf4 = false, 0xf5 = true.
678pub fn readBool(data: []const u8, pos: usize) DecodeError!BoolResult {
679 if (pos >= data.len) return error.UnexpectedEof;
680 return switch (data[pos]) {
681 0xf4 => .{ .val = false, .end = pos + 1 },
682 0xf5 => .{ .val = true, .end = pos + 1 },
683 else => error.WrongType,
684 };
685}
686
687/// Read a CBOR null at `pos`.
688/// 0xf6 = null. Returns position after the null byte.
689pub fn readNull(data: []const u8, pos: usize) DecodeError!usize {
690 if (pos >= data.len) return error.UnexpectedEof;
691 if (data[pos] != 0xf6) return error.WrongType;
692 return pos + 1;
693}
694
695/// Read a CBOR map header (major type 5) at `pos`.
696/// Returns the entry count.
697pub fn readMapHeader(data: []const u8, pos: usize) DecodeError!U64Result {
698 const arg = try readArg(data, pos);
699 if (arg.major != 5) return error.WrongType;
700 return .{ .val = arg.val, .end = arg.end };
701}
702
703/// Read a CBOR array header (major type 4) at `pos`.
704/// Returns the element count.
705pub fn readArrayHeader(data: []const u8, pos: usize) DecodeError!U64Result {
706 const arg = try readArg(data, pos);
707 if (arg.major != 4) return error.WrongType;
708 return .{ .val = arg.val, .end = arg.end };
709}
710
711/// Read a DAG-CBOR CID link at `pos`.
712/// Expects tag(42) followed by a byte string with a 0x00 identity multibase prefix.
713/// Returns the raw CID bytes (after the 0x00 prefix) as a zero-copy slice.
714pub fn readCidLink(data: []const u8, pos: usize) DecodeError!SliceResult {
715 // Read the tag header — must be tag(42)
716 const tag_arg = try readArg(data, pos);
717 if (tag_arg.major != 6 or tag_arg.val != 42) return error.WrongType;
718 // Read the inner byte string
719 const bytes_result = try readBytes(data, tag_arg.end);
720 const payload = bytes_result.val;
721 // Must have 0x00 prefix + at least version byte + codec byte (min 3 bytes)
722 if (payload.len < 3 or payload[0] != 0x00) return error.InvalidCid;
723 return .{ .val = payload[1..], .end = bytes_result.end };
724}
725
726// ---------------------------------------------------------------------------
727// Streaming helpers — skip / peek without full decode
728// ---------------------------------------------------------------------------
729
730/// Skip one CBOR value at `pos` without decoding it. Returns the position
731/// after the skipped value. Iterative (not recursive) using a small stack
732/// for nested containers. Zero allocation.
733pub fn skipValue(data: []const u8, pos: usize) DecodeError!usize {
734 const max_stack = 32;
735 var stack: [max_stack]u64 = undefined;
736 var depth: usize = 0;
737 var cur = pos;
738
739 while (true) {
740 const arg = try readArg(data, cur);
741 cur = arg.end;
742
743 switch (arg.major) {
744 0, 1 => {
745 // integers: header only, nothing to skip after readArg
746 },
747 2, 3 => {
748 // byte string / text string: skip `val` bytes of payload
749 const len = std.math.cast(usize, arg.val) orelse return error.UnexpectedEof;
750 cur = std.math.add(usize, cur, len) catch return error.UnexpectedEof;
751 if (cur > data.len) return error.UnexpectedEof;
752 },
753 4 => {
754 // array: push element count
755 if (arg.val > 0) {
756 if (depth >= max_stack) return error.MaxDepthExceeded;
757 stack[depth] = arg.val;
758 depth += 1;
759 continue; // don't decrement — we haven't consumed an element yet
760 }
761 },
762 5 => {
763 // map: push key+value count (2 per entry)
764 if (arg.val > 0) {
765 if (depth >= max_stack) return error.MaxDepthExceeded;
766 stack[depth] = std.math.mul(u64, arg.val, 2) catch return error.Overflow;
767 depth += 1;
768 continue;
769 }
770 },
771 6 => {
772 // tag: the tagged value follows immediately — loop to read it
773 // don't push anything, don't decrement
774 continue;
775 },
776 7 => {
777 // simple/float: header only
778 },
779 }
780
781 // After consuming a value, unwind the stack
782 while (depth > 0) {
783 stack[depth - 1] -= 1;
784 if (stack[depth - 1] > 0) break;
785 depth -= 1;
786 }
787
788 if (depth == 0) return cur;
789 }
790}
791
792/// Peek at the "$type" field in a DAG-CBOR map without full decode.
793/// Returns the type string (zero-copy slice) or null if not found.
794pub fn peekType(data: []const u8) DecodeError!?[]const u8 {
795 return peekTypeAt(data, 0);
796}
797
798/// Peek at the "$type" field starting from a given position.
799pub fn peekTypeAt(data: []const u8, pos: usize) DecodeError!?[]const u8 {
800 const map_header = try readArg(data, pos);
801 if (map_header.major != 5) return null;
802
803 var cur = map_header.end;
804 const count = map_header.val;
805
806 const safe_count = std.math.cast(usize, count) orelse return null;
807 for (0..safe_count) |_| {
808 // Read key — DAG-CBOR keys are always text strings
809 const key = readText(data, cur) catch return null;
810 cur = key.end;
811
812 if (std.mem.eql(u8, key.val, "$type")) {
813 // Read the value as text
814 const val = readText(data, cur) catch return null;
815 return val.val;
816 }
817
818 // Skip the value
819 cur = try skipValue(data, cur);
820 }
821
822 return null;
823}
824
825// === low-level write API ===
826
827/// Write CBOR initial byte + argument using shortest encoding.
828/// Returns new position after written bytes. Caller must ensure buf is large enough.
829pub fn writeArg(buf: []u8, pos: usize, major: u3, val: u64) usize {
830 const prefix: u8 = @as(u8, major) << 5;
831 if (val < 24) {
832 buf[pos] = prefix | @as(u8, @intCast(val));
833 return pos + 1;
834 } else if (val <= 0xff) {
835 buf[pos] = prefix | 24;
836 buf[pos + 1] = @intCast(val);
837 return pos + 2;
838 } else if (val <= 0xffff) {
839 buf[pos] = prefix | 25;
840 const v: u16 = @intCast(val);
841 buf[pos + 1] = @truncate(v >> 8);
842 buf[pos + 2] = @truncate(v);
843 return pos + 3;
844 } else if (val <= 0xffffffff) {
845 buf[pos] = prefix | 26;
846 const v: u32 = @intCast(val);
847 buf[pos + 1] = @truncate(v >> 24);
848 buf[pos + 2] = @truncate(v >> 16);
849 buf[pos + 3] = @truncate(v >> 8);
850 buf[pos + 4] = @truncate(v);
851 return pos + 5;
852 } else {
853 buf[pos] = prefix | 27;
854 buf[pos + 1] = @truncate(val >> 56);
855 buf[pos + 2] = @truncate(val >> 48);
856 buf[pos + 3] = @truncate(val >> 40);
857 buf[pos + 4] = @truncate(val >> 32);
858 buf[pos + 5] = @truncate(val >> 24);
859 buf[pos + 6] = @truncate(val >> 16);
860 buf[pos + 7] = @truncate(val >> 8);
861 buf[pos + 8] = @truncate(val);
862 return pos + 9;
863 }
864}
865
866/// Write CBOR text string header + payload.
867pub fn writeText(buf: []u8, pos: usize, text: []const u8) usize {
868 const p = writeArg(buf, pos, 3, text.len);
869 @memcpy(buf[p..][0..text.len], text);
870 return p + text.len;
871}
872
873/// Write CBOR byte string header + payload.
874pub fn writeBytes(buf: []u8, pos: usize, bytes: []const u8) usize {
875 const p = writeArg(buf, pos, 2, bytes.len);
876 @memcpy(buf[p..][0..bytes.len], bytes);
877 return p + bytes.len;
878}
879
880/// Write unsigned integer (major 0).
881pub fn writeUint(buf: []u8, pos: usize, val: u64) usize {
882 return writeArg(buf, pos, 0, val);
883}
884
885/// Write signed integer. Positive values use major 0, negative values use major 1.
886pub fn writeInt(buf: []u8, pos: usize, val: i64) usize {
887 if (val >= 0) {
888 return writeArg(buf, pos, 0, @intCast(val));
889 } else {
890 const raw: u64 = @intCast(-1 - val);
891 return writeArg(buf, pos, 1, raw);
892 }
893}
894
895/// Write map header (major 5).
896pub fn writeMapHeader(buf: []u8, pos: usize, count: usize) usize {
897 return writeArg(buf, pos, 5, count);
898}
899
900/// Write array header (major 4).
901pub fn writeArrayHeader(buf: []u8, pos: usize, count: usize) usize {
902 return writeArg(buf, pos, 4, count);
903}
904
905/// Write boolean: 0xf5 (true) or 0xf4 (false).
906pub fn writeBool(buf: []u8, pos: usize, val: bool) usize {
907 buf[pos] = if (val) 0xf5 else 0xf4;
908 return pos + 1;
909}
910
911/// Write null: 0xf6.
912pub fn writeNull(buf: []u8, pos: usize) usize {
913 buf[pos] = 0xf6;
914 return pos + 1;
915}
916
917/// Write tag(42) + byte string with 0x00 prefix + CID raw bytes.
918pub fn writeCidLink(buf: []u8, pos: usize, cid_raw: []const u8) usize {
919 var p = writeArg(buf, pos, 6, 42);
920 p = writeArg(buf, p, 2, 1 + cid_raw.len);
921 buf[p] = 0x00;
922 p += 1;
923 @memcpy(buf[p..][0..cid_raw.len], cid_raw);
924 return p + cid_raw.len;
925}
926
927// === tests ===
928
929test "decode unsigned integers" {
930 var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
931 defer arena.deinit();
932 const alloc = arena.allocator();
933
934 // 0
935 try std.testing.expectEqual(@as(u64, 0), (try decode(alloc, &.{0x00})).value.unsigned);
936 // 1
937 try std.testing.expectEqual(@as(u64, 1), (try decode(alloc, &.{0x01})).value.unsigned);
938 // 23
939 try std.testing.expectEqual(@as(u64, 23), (try decode(alloc, &.{0x17})).value.unsigned);
940 // 24 (1-byte follows)
941 try std.testing.expectEqual(@as(u64, 24), (try decode(alloc, &.{ 0x18, 24 })).value.unsigned);
942 // 1000 (2-byte follows)
943 try std.testing.expectEqual(@as(u64, 1000), (try decode(alloc, &.{ 0x19, 0x03, 0xe8 })).value.unsigned);
944}
945
946test "decode negative integers" {
947 var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
948 defer arena.deinit();
949 const alloc = arena.allocator();
950
951 // -1 (major 1, additional 0)
952 try std.testing.expectEqual(@as(i64, -1), (try decode(alloc, &.{0x20})).value.negative);
953 // -10
954 try std.testing.expectEqual(@as(i64, -10), (try decode(alloc, &.{0x29})).value.negative);
955}
956
957test "decode text strings" {
958 var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
959 defer arena.deinit();
960 const alloc = arena.allocator();
961
962 // empty string
963 try std.testing.expectEqualStrings("", (try decode(alloc, &.{0x60})).value.text);
964 // "a"
965 try std.testing.expectEqualStrings("a", (try decode(alloc, &.{ 0x61, 'a' })).value.text);
966 // "hello"
967 try std.testing.expectEqualStrings("hello", (try decode(alloc, &.{ 0x65, 'h', 'e', 'l', 'l', 'o' })).value.text);
968}
969
970test "decode byte strings" {
971 var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
972 defer arena.deinit();
973 const alloc = arena.allocator();
974
975 // empty bytes
976 try std.testing.expectEqualSlices(u8, &.{}, (try decode(alloc, &.{0x40})).value.bytes);
977 // 3 bytes
978 try std.testing.expectEqualSlices(u8, &.{ 1, 2, 3 }, (try decode(alloc, &.{ 0x43, 1, 2, 3 })).value.bytes);
979}
980
981test "decode booleans and null" {
982 var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
983 defer arena.deinit();
984 const alloc = arena.allocator();
985
986 try std.testing.expectEqual(false, (try decode(alloc, &.{0xf4})).value.boolean);
987 try std.testing.expectEqual(true, (try decode(alloc, &.{0xf5})).value.boolean);
988 try std.testing.expectEqual(Value.null, (try decode(alloc, &.{0xf6})).value);
989}
990
991test "decode array" {
992 var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
993 defer arena.deinit();
994 const alloc = arena.allocator();
995
996 // [1, 2, 3]
997 const result = try decode(alloc, &.{ 0x83, 0x01, 0x02, 0x03 });
998 const arr = result.value.array;
999 try std.testing.expectEqual(@as(usize, 3), arr.len);
1000 try std.testing.expectEqual(@as(u64, 1), arr[0].unsigned);
1001 try std.testing.expectEqual(@as(u64, 2), arr[1].unsigned);
1002 try std.testing.expectEqual(@as(u64, 3), arr[2].unsigned);
1003}
1004
1005test "decode map" {
1006 var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
1007 defer arena.deinit();
1008 const alloc = arena.allocator();
1009
1010 // {"a": 1, "b": 2}
1011 const result = try decode(alloc, &.{
1012 0xa2, // map(2)
1013 0x61, 'a', 0x01, // "a": 1
1014 0x61, 'b', 0x02, // "b": 2
1015 });
1016 const val = result.value;
1017 try std.testing.expectEqual(@as(u64, 1), val.get("a").?.unsigned);
1018 try std.testing.expectEqual(@as(u64, 2), val.get("b").?.unsigned);
1019}
1020
1021test "decode nested map" {
1022 var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
1023 defer arena.deinit();
1024 const alloc = arena.allocator();
1025
1026 // {"t": "#commit", "op": 1} — sorted by key length (1 < 2)
1027 const result = try decode(alloc, &.{
1028 0xa2, // map(2)
1029 0x61, 't', 0x67, '#', 'c', 'o', 'm', 'm', 'i', 't', // "t": "#commit"
1030 0x62, 'o', 'p', 0x01, // "op": 1
1031 });
1032 const val = result.value;
1033 try std.testing.expectEqual(@as(u64, 1), val.get("op").?.unsigned);
1034 try std.testing.expectEqualStrings("#commit", val.getString("t").?);
1035}
1036
1037test "consumed bytes tracking" {
1038 var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
1039 defer arena.deinit();
1040 const alloc = arena.allocator();
1041
1042 // two concatenated CBOR values: 1, 2
1043 const data = &[_]u8{ 0x01, 0x02 };
1044 const first = try decode(alloc, data);
1045 try std.testing.expectEqual(@as(u64, 1), first.value.unsigned);
1046 try std.testing.expectEqual(@as(usize, 1), first.consumed);
1047
1048 const second = try decode(alloc, data[first.consumed..]);
1049 try std.testing.expectEqual(@as(u64, 2), second.value.unsigned);
1050}
1051
1052test "reject floats" {
1053 var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
1054 defer arena.deinit();
1055 const alloc = arena.allocator();
1056
1057 // half-float (f16)
1058 try std.testing.expectError(error.UnsupportedFloat, decode(alloc, &.{ 0xf9, 0x00, 0x00 }));
1059}
1060
1061test "Value helper methods" {
1062 var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
1063 defer arena.deinit();
1064 const alloc = arena.allocator();
1065
1066 const result = try decode(alloc, &.{
1067 0xa3, // map(3)
1068 0x63, 'a', 'g', 'e', 0x18, 30, // "age": 30 (3 bytes, shortest)
1069 0x64, 'n', 'a', 'm', 'e', 0x65, 'a', 'l', 'i', 'c', 'e', // "name": "alice" (4 bytes)
1070 0x66, 'a', 'c', 't', 'i', 'v', 'e', 0xf5, // "active": true (6 bytes)
1071 });
1072 const val = result.value;
1073 try std.testing.expectEqualStrings("alice", val.getString("name").?);
1074 try std.testing.expectEqual(@as(i64, 30), val.getInt("age").?);
1075 try std.testing.expectEqual(true, val.getBool("active").?);
1076 try std.testing.expect(val.getString("missing") == null);
1077}
1078
1079// === encoder tests ===
1080
1081test "encode unsigned integers" {
1082 var buf: [16]u8 = undefined;
1083 var w: std.Io.Writer = .fixed(&buf);
1084 const alloc = std.testing.allocator;
1085
1086 // 0 → single byte
1087 try encode(alloc, &w, .{ .unsigned = 0 });
1088 try std.testing.expectEqualSlices(u8, &.{0x00}, w.buffered());
1089
1090 w.end = 0;
1091 try encode(alloc, &w, .{ .unsigned = 23 });
1092 try std.testing.expectEqualSlices(u8, &.{0x17}, w.buffered());
1093
1094 // 24 → 2 bytes (shortest encoding)
1095 w.end = 0;
1096 try encode(alloc, &w, .{ .unsigned = 24 });
1097 try std.testing.expectEqualSlices(u8, &.{ 0x18, 24 }, w.buffered());
1098
1099 // 1000 → 3 bytes
1100 w.end = 0;
1101 try encode(alloc, &w, .{ .unsigned = 1000 });
1102 try std.testing.expectEqualSlices(u8, &.{ 0x19, 0x03, 0xe8 }, w.buffered());
1103}
1104
1105test "encode negative integers" {
1106 var buf: [16]u8 = undefined;
1107 var w: std.Io.Writer = .fixed(&buf);
1108 const alloc = std.testing.allocator;
1109
1110 // -1 → major 1, additional 0
1111 try encode(alloc, &w, .{ .negative = -1 });
1112 try std.testing.expectEqualSlices(u8, &.{0x20}, w.buffered());
1113
1114 w.end = 0;
1115 try encode(alloc, &w, .{ .negative = -10 });
1116 try std.testing.expectEqualSlices(u8, &.{0x29}, w.buffered());
1117}
1118
1119test "encode text strings" {
1120 var buf: [64]u8 = undefined;
1121 var w: std.Io.Writer = .fixed(&buf);
1122 const alloc = std.testing.allocator;
1123
1124 try encode(alloc, &w, .{ .text = "" });
1125 try std.testing.expectEqualSlices(u8, &.{0x60}, w.buffered());
1126
1127 w.end = 0;
1128 try encode(alloc, &w, .{ .text = "hello" });
1129 try std.testing.expectEqualSlices(u8, &.{ 0x65, 'h', 'e', 'l', 'l', 'o' }, w.buffered());
1130}
1131
1132test "encode byte strings" {
1133 var buf: [64]u8 = undefined;
1134 var w: std.Io.Writer = .fixed(&buf);
1135 const alloc = std.testing.allocator;
1136
1137 try encode(alloc, &w, .{ .bytes = &.{} });
1138 try std.testing.expectEqualSlices(u8, &.{0x40}, w.buffered());
1139
1140 w.end = 0;
1141 try encode(alloc, &w, .{ .bytes = &.{ 1, 2, 3 } });
1142 try std.testing.expectEqualSlices(u8, &.{ 0x43, 1, 2, 3 }, w.buffered());
1143}
1144
1145test "encode booleans and null" {
1146 var buf: [4]u8 = undefined;
1147 var w: std.Io.Writer = .fixed(&buf);
1148 const alloc = std.testing.allocator;
1149
1150 try encode(alloc, &w, .{ .boolean = false });
1151 try std.testing.expectEqualSlices(u8, &.{0xf4}, w.buffered());
1152
1153 w.end = 0;
1154 try encode(alloc, &w, .{ .boolean = true });
1155 try std.testing.expectEqualSlices(u8, &.{0xf5}, w.buffered());
1156
1157 w.end = 0;
1158 try encode(alloc, &w, .null);
1159 try std.testing.expectEqualSlices(u8, &.{0xf6}, w.buffered());
1160}
1161
1162test "encode array" {
1163 var buf: [64]u8 = undefined;
1164 var w: std.Io.Writer = .fixed(&buf);
1165 const alloc = std.testing.allocator;
1166
1167 // [1, 2, 3]
1168 try encode(alloc, &w, .{ .array = &.{
1169 .{ .unsigned = 1 },
1170 .{ .unsigned = 2 },
1171 .{ .unsigned = 3 },
1172 } });
1173 try std.testing.expectEqualSlices(u8, &.{ 0x83, 0x01, 0x02, 0x03 }, w.buffered());
1174}
1175
1176test "encode map with DAG-CBOR key sorting" {
1177 var buf: [128]u8 = undefined;
1178 var w: std.Io.Writer = .fixed(&buf);
1179 const alloc = std.testing.allocator;
1180
1181 // keys provided unsorted — encoder must sort by length, then lex
1182 // "bb" (len 2), "a" (len 1), "cc" (len 2) → sorted: "a", "bb", "cc"
1183 try encode(alloc, &w, .{ .map = &.{
1184 .{ .key = "bb", .value = .{ .unsigned = 2 } },
1185 .{ .key = "a", .value = .{ .unsigned = 1 } },
1186 .{ .key = "cc", .value = .{ .unsigned = 3 } },
1187 } });
1188
1189 const expected = &[_]u8{
1190 0xa3, // map(3)
1191 0x61, 'a', 0x01, // "a": 1 (shortest key first)
1192 0x62, 'b', 'b', 0x02, // "bb": 2 (same length, lex order)
1193 0x62, 'c', 'c', 0x03, // "cc": 3
1194 };
1195 try std.testing.expectEqualSlices(u8, expected, w.buffered());
1196}
1197
1198test "round-trip encode → decode" {
1199 var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
1200 defer arena.deinit();
1201 const alloc = arena.allocator();
1202
1203 // build a complex value: {"active": true, "name": "alice", "seq": 42}
1204 const original: Value = .{ .map = &.{
1205 .{ .key = "name", .value = .{ .text = "alice" } },
1206 .{ .key = "active", .value = .{ .boolean = true } },
1207 .{ .key = "seq", .value = .{ .unsigned = 42 } },
1208 } };
1209
1210 const encoded = try encodeAlloc(alloc, original);
1211 const decoded = try decodeAll(alloc, encoded);
1212
1213 try std.testing.expectEqualStrings("alice", decoded.getString("name").?);
1214 try std.testing.expectEqual(true, decoded.getBool("active").?);
1215 try std.testing.expectEqual(@as(i64, 42), decoded.getInt("seq").?);
1216}
1217
1218test "round-trip nested structures" {
1219 var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
1220 defer arena.deinit();
1221 const alloc = arena.allocator();
1222
1223 // {"ops": [{"action": "create"}], "seq": 1}
1224 const original: Value = .{ .map = &.{
1225 .{ .key = "ops", .value = .{ .array = &.{
1226 .{ .map = &.{
1227 .{ .key = "action", .value = .{ .text = "create" } },
1228 } },
1229 } } },
1230 .{ .key = "seq", .value = .{ .unsigned = 1 } },
1231 } };
1232
1233 const encoded = try encodeAlloc(alloc, original);
1234 const decoded = try decodeAll(alloc, encoded);
1235
1236 const ops = decoded.getArray("ops").?;
1237 try std.testing.expectEqual(@as(usize, 1), ops.len);
1238 try std.testing.expectEqualStrings("create", ops[0].getString("action").?);
1239 try std.testing.expectEqual(@as(i64, 1), decoded.getInt("seq").?);
1240}
1241
1242test "encode CID via tag 42" {
1243 var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
1244 defer arena.deinit();
1245 const alloc = arena.allocator();
1246
1247 // create a CIDv1 (dag-cbor, sha2-256, 32-byte digest of 0xaa)
1248 const raw_cid = [_]u8{
1249 0x01, // version
1250 0x71, // dag-cbor
1251 0x12, // sha2-256
1252 0x20, // 32-byte digest
1253 } ++ [_]u8{0xaa} ** 32;
1254
1255 const original: Value = .{ .cid = .{
1256 .raw = &raw_cid,
1257 } };
1258
1259 const encoded = try encodeAlloc(alloc, original);
1260 const decoded = try decodeAll(alloc, encoded);
1261
1262 // should decode back as a CID with the same raw bytes
1263 const cid = decoded.cid;
1264 try std.testing.expectEqual(@as(u64, 1), cid.version().?);
1265 try std.testing.expectEqual(@as(u64, 0x71), cid.codec().?);
1266 try std.testing.expectEqual(@as(u64, 0x12), cid.hashFn().?);
1267 try std.testing.expectEqualSlices(u8, &raw_cid, cid.raw);
1268}
1269
1270test "writeUvarint round-trip" {
1271 var buf: [16]u8 = undefined;
1272 var w: std.Io.Writer = .fixed(&buf);
1273
1274 const test_values = [_]u64{ 0, 1, 127, 128, 255, 256, 16384, 0xffffffff };
1275 for (test_values) |val| {
1276 w.end = 0;
1277 try writeUvarint(&w, val);
1278 const written = w.buffered();
1279
1280 var pos: usize = 0;
1281 const decoded = readUvarint(written, &pos).?;
1282 try std.testing.expectEqual(val, decoded);
1283 try std.testing.expectEqual(written.len, pos);
1284 }
1285}
1286
1287test "DAG-CBOR key sort is stable" {
1288 // same-length keys must be lexicographically sorted
1289 var buf: [128]u8 = undefined;
1290 var w: std.Io.Writer = .fixed(&buf);
1291 const alloc = std.testing.allocator;
1292
1293 try encode(alloc, &w, .{ .map = &.{
1294 .{ .key = "op", .value = .{ .unsigned = 1 } },
1295 .{ .key = "ab", .value = .{ .unsigned = 2 } },
1296 } });
1297
1298 var arena = std.heap.ArenaAllocator.init(alloc);
1299 defer arena.deinit();
1300 const decoded = try decodeAll(arena.allocator(), w.buffered());
1301
1302 // "ab" should come before "op" (lex order, same length)
1303 const entries = decoded.map;
1304 try std.testing.expectEqualStrings("ab", entries[0].key);
1305 try std.testing.expectEqualStrings("op", entries[1].key);
1306}
1307
1308// === CID creation tests ===
1309
1310test "Cid.forDagCbor creates valid CIDv1" {
1311 var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
1312 defer arena.deinit();
1313 const alloc = arena.allocator();
1314
1315 // encode some CBOR, then create a CID for it
1316 const value: Value = .{ .map = &.{
1317 .{ .key = "text", .value = .{ .text = "hello" } },
1318 } };
1319 const encoded = try encodeAlloc(alloc, value);
1320 const cid = try Cid.forDagCbor(alloc, encoded);
1321
1322 try std.testing.expectEqual(@as(u64, 1), cid.version().?);
1323 try std.testing.expectEqual(Codec.dag_cbor, cid.codec().?);
1324 try std.testing.expectEqual(HashFn.sha2_256, cid.hashFn().?);
1325 try std.testing.expectEqual(@as(usize, 32), cid.digest().?.len);
1326 // raw should be: version(1) + codec(0x71) + hash_fn(0x12) + digest_len(0x20) + 32 bytes
1327 try std.testing.expectEqual(@as(usize, 36), cid.raw.len);
1328}
1329
1330test "Cid.forDagCbor is deterministic" {
1331 var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
1332 defer arena.deinit();
1333 const alloc = arena.allocator();
1334
1335 const data = "identical input";
1336 const cid1 = try Cid.forDagCbor(alloc, data);
1337 const cid2 = try Cid.forDagCbor(alloc, data);
1338
1339 try std.testing.expectEqualSlices(u8, cid1.raw, cid2.raw);
1340 try std.testing.expectEqualSlices(u8, cid1.digest().?, cid2.digest().?);
1341}
1342
1343test "Cid.forDagCbor different data → different CIDs" {
1344 var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
1345 defer arena.deinit();
1346 const alloc = arena.allocator();
1347
1348 const cid1 = try Cid.forDagCbor(alloc, "data A");
1349 const cid2 = try Cid.forDagCbor(alloc, "data B");
1350
1351 try std.testing.expect(!std.mem.eql(u8, cid1.digest().?, cid2.digest().?));
1352}
1353
1354test "Cid.toBytes round-trips through parseCid" {
1355 var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
1356 defer arena.deinit();
1357 const alloc = arena.allocator();
1358
1359 const cid = try Cid.forDagCbor(alloc, "test content");
1360 const bytes = try cid.toBytes(alloc);
1361 const parsed = parseCid(bytes);
1362
1363 try std.testing.expectEqual(cid.version().?, parsed.version().?);
1364 try std.testing.expectEqual(cid.codec().?, parsed.codec().?);
1365 try std.testing.expectEqual(cid.hashFn().?, parsed.hashFn().?);
1366 try std.testing.expectEqualSlices(u8, cid.digest().?, parsed.digest().?);
1367}
1368
1369test "CID round-trip through CBOR encode/decode" {
1370 var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
1371 defer arena.deinit();
1372 const alloc = arena.allocator();
1373
1374 // create a CID for some content
1375 const cid = try Cid.forDagCbor(alloc, "block data");
1376
1377 // embed in a map and round-trip through CBOR
1378 const original: Value = .{ .map = &.{
1379 .{ .key = "link", .value = .{ .cid = cid } },
1380 } };
1381 const encoded = try encodeAlloc(alloc, original);
1382 const decoded = try decodeAll(alloc, encoded);
1383
1384 const got = decoded.get("link").?.cid;
1385 try std.testing.expectEqual(cid.version().?, got.version().?);
1386 try std.testing.expectEqual(cid.codec().?, got.codec().?);
1387 try std.testing.expectEqualSlices(u8, cid.digest().?, got.digest().?);
1388}
1389
1390// === verify CIDs against real AT Protocol records ===
1391
1392test "real record: pfrazee 'First!' post CID matches network" {
1393 // at://did:plc:ragtjsm2j2vknwkz3zp4oxrd/app.bsky.feed.post/3jhnzcfawac27
1394 // CID: bafyreiaqnrahsbvcssf2xe4iqhn2fnjw7utmvrbif2v36tqe3r5iqill7i
1395 var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
1396 defer arena.deinit();
1397 const alloc = arena.allocator();
1398
1399 const record: Value = .{ .map = &.{
1400 .{ .key = "$type", .value = .{ .text = "app.bsky.feed.post" } },
1401 .{ .key = "createdAt", .value = .{ .text = "2022-11-17T00:39:00.477Z" } },
1402 .{ .key = "text", .value = .{ .text = "First!" } },
1403 } };
1404
1405 const encoded = try encodeAlloc(alloc, record);
1406 const cid = try Cid.forDagCbor(alloc, encoded);
1407
1408 // verify against known production digest
1409 const expected_digest = [_]u8{
1410 0x10, 0x6c, 0x40, 0x79, 0x06, 0xa2, 0x94, 0x8b,
1411 0xab, 0x93, 0x88, 0x81, 0xdb, 0xa2, 0xb5, 0x36,
1412 0xfd, 0x26, 0xca, 0xc4, 0x28, 0x2e, 0xab, 0xbf,
1413 0x4e, 0x04, 0xdc, 0x7a, 0x88, 0x21, 0x6b, 0xfa,
1414 };
1415
1416 try std.testing.expectEqualSlices(u8, &expected_digest, cid.digest().?);
1417 try std.testing.expectEqual(@as(u64, 1), cid.version().?);
1418 try std.testing.expectEqual(Codec.dag_cbor, cid.codec().?);
1419 try std.testing.expectEqual(HashFn.sha2_256, cid.hashFn().?);
1420}
1421
1422test "real record: firehose post with emoji/langs/reply is byte-identical after re-encode" {
1423 // captured from live firehose: app.bsky.feed.post with emoji, langs, and reply
1424 var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
1425 defer arena.deinit();
1426 const alloc = arena.allocator();
1427
1428 const original_cbor = &[_]u8{
1429 0xa5, 0x64, 0x74, 0x65, 0x78, 0x74, 0x6b, 0xf0, 0x9f, 0xa5, 0xb5, 0x20, 0x6d, 0x65, 0x20, 0x74,
1430 0x6f, 0x6f, 0x65, 0x24, 0x74, 0x79, 0x70, 0x65, 0x72, 0x61, 0x70, 0x70, 0x2e, 0x62, 0x73, 0x6b,
1431 0x79, 0x2e, 0x66, 0x65, 0x65, 0x64, 0x2e, 0x70, 0x6f, 0x73, 0x74, 0x65, 0x6c, 0x61, 0x6e, 0x67,
1432 0x73, 0x81, 0x62, 0x65, 0x6e, 0x65, 0x72, 0x65, 0x70, 0x6c, 0x79, 0xa2, 0x64, 0x72, 0x6f, 0x6f,
1433 0x74, 0xa2, 0x63, 0x63, 0x69, 0x64, 0x78, 0x3b, 0x62, 0x61, 0x66, 0x79, 0x72, 0x65, 0x69, 0x62,
1434 0x33, 0x70, 0x77, 0x72, 0x66, 0x66, 0x32, 0x79, 0x61, 0x64, 0x7a, 0x6e, 0x6f, 0x70, 0x68, 0x7a,
1435 0x66, 0x34, 0x68, 0x63, 0x76, 0x74, 0x79, 0x6f, 0x63, 0x74, 0x77, 0x7a, 0x63, 0x75, 0x6a, 0x76,
1436 0x7a, 0x37, 0x78, 0x34, 0x70, 0x6e, 0x67, 0x6b, 0x32, 0x69, 0x73, 0x69, 0x63, 0x7a, 0x37, 0x79,
1437 0x73, 0x7a, 0x71, 0x63, 0x75, 0x72, 0x69, 0x78, 0x46, 0x61, 0x74, 0x3a, 0x2f, 0x2f, 0x64, 0x69,
1438 0x64, 0x3a, 0x70, 0x6c, 0x63, 0x3a, 0x34, 0x6e, 0x65, 0x6e, 0x64, 0x77, 0x71, 0x72, 0x73, 0x37,
1439 0x35, 0x34, 0x67, 0x74, 0x36, 0x71, 0x76, 0x67, 0x72, 0x35, 0x36, 0x6a, 0x6d, 0x6e, 0x2f, 0x61,
1440 0x70, 0x70, 0x2e, 0x62, 0x73, 0x6b, 0x79, 0x2e, 0x66, 0x65, 0x65, 0x64, 0x2e, 0x70, 0x6f, 0x73,
1441 0x74, 0x2f, 0x33, 0x6d, 0x65, 0x64, 0x67, 0x32, 0x71, 0x76, 0x63, 0x75, 0x63, 0x32, 0x63, 0x66,
1442 0x70, 0x61, 0x72, 0x65, 0x6e, 0x74, 0xa2, 0x63, 0x63, 0x69, 0x64, 0x78, 0x3b, 0x62, 0x61, 0x66,
1443 0x79, 0x72, 0x65, 0x69, 0x62, 0x33, 0x70, 0x77, 0x72, 0x66, 0x66, 0x32, 0x79, 0x61, 0x64, 0x7a,
1444 0x6e, 0x6f, 0x70, 0x68, 0x7a, 0x66, 0x34, 0x68, 0x63, 0x76, 0x74, 0x79, 0x6f, 0x63, 0x74, 0x77,
1445 0x7a, 0x63, 0x75, 0x6a, 0x76, 0x7a, 0x37, 0x78, 0x34, 0x70, 0x6e, 0x67, 0x6b, 0x32, 0x69, 0x73,
1446 0x69, 0x63, 0x7a, 0x37, 0x79, 0x73, 0x7a, 0x71, 0x63, 0x75, 0x72, 0x69, 0x78, 0x46, 0x61, 0x74,
1447 0x3a, 0x2f, 0x2f, 0x64, 0x69, 0x64, 0x3a, 0x70, 0x6c, 0x63, 0x3a, 0x34, 0x6e, 0x65, 0x6e, 0x64,
1448 0x77, 0x71, 0x72, 0x73, 0x37, 0x35, 0x34, 0x67, 0x74, 0x36, 0x71, 0x76, 0x67, 0x72, 0x35, 0x36,
1449 0x6a, 0x6d, 0x6e, 0x2f, 0x61, 0x70, 0x70, 0x2e, 0x62, 0x73, 0x6b, 0x79, 0x2e, 0x66, 0x65, 0x65,
1450 0x64, 0x2e, 0x70, 0x6f, 0x73, 0x74, 0x2f, 0x33, 0x6d, 0x65, 0x64, 0x67, 0x32, 0x71, 0x76, 0x63,
1451 0x75, 0x63, 0x32, 0x63, 0x69, 0x63, 0x72, 0x65, 0x61, 0x74, 0x65, 0x64, 0x41, 0x74, 0x78, 0x18,
1452 0x32, 0x30, 0x32, 0x36, 0x2d, 0x30, 0x32, 0x2d, 0x30, 0x38, 0x54, 0x30, 0x37, 0x3a, 0x34, 0x39,
1453 0x3a, 0x32, 0x30, 0x2e, 0x37, 0x37, 0x32, 0x5a,
1454 };
1455
1456 // expected CID digest from the firehose frame
1457 const expected_digest = [_]u8{
1458 0x80, 0x01, 0x66, 0x46, 0x81, 0x57, 0x18, 0xaf, 0xc9, 0x34, 0xcf, 0xbf,
1459 0x3b, 0x3e, 0x57, 0x04, 0x24, 0x17, 0x90, 0x29, 0x2f, 0x7b, 0xc4, 0xe0,
1460 0xf4, 0xcf, 0xe6, 0xe6, 0xb5, 0xad, 0x11, 0x28,
1461 };
1462
1463 // decode → re-encode → verify byte-identical
1464 const decoded = try decodeAll(alloc, original_cbor);
1465 const re_encoded = try encodeAlloc(alloc, decoded);
1466 try std.testing.expectEqualSlices(u8, original_cbor, re_encoded);
1467
1468 // verify CID matches the production CID
1469 const cid = try Cid.forDagCbor(alloc, re_encoded);
1470 try std.testing.expectEqualSlices(u8, &expected_digest, cid.digest().?);
1471}