atproto utils for zig zat.dev
atproto sdk zig
26
fork

Configure Feed

Select the types of activity you want to include in your feed.

at codex/xrpc-errors-retry 1471 lines 56 kB view raw
1//! DAG-CBOR codec 2//! 3//! encode and decode the DAG-CBOR subset used by AT Protocol. 4//! handles: integers, byte/text strings, arrays, maps, tag 42 (CID links), 5//! booleans, null. no floats, no indefinite lengths. 6//! 7//! encoding follows DAG-CBOR deterministic rules: 8//! - integers use shortest encoding 9//! - map keys sorted by byte length, then lexicographically 10//! - CIDs encoded as tag 42 with 0x00 identity multibase prefix 11//! 12//! see: https://ipld.io/specs/codecs/dag-cbor/spec/ 13 14const std = @import("std"); 15const Allocator = std.mem.Allocator; 16 17/// CBOR major types (high 3 bits of initial byte) 18const MajorType = enum(u3) { 19 unsigned = 0, 20 negative = 1, 21 byte_string = 2, 22 text_string = 3, 23 array = 4, 24 map = 5, 25 tag = 6, 26 simple = 7, 27}; 28 29/// decoded CBOR value 30pub const Value = union(enum) { 31 unsigned: u64, 32 negative: i64, // stored as -(1 + raw), so -1 is stored as -1 33 bytes: []const u8, 34 text: []const u8, 35 array: []const Value, 36 map: []const MapEntry, 37 boolean: bool, 38 null, 39 cid: Cid, 40 41 pub const MapEntry = struct { 42 key: []const u8, // DAG-CBOR: keys are always text strings 43 value: Value, 44 }; 45 46 /// look up a key in a map value 47 pub fn get(self: Value, key: []const u8) ?Value { 48 return switch (self) { 49 .map => |entries| { 50 for (entries) |entry| { 51 if (std.mem.eql(u8, entry.key, key)) return entry.value; 52 } 53 return null; 54 }, 55 else => null, 56 }; 57 } 58 59 /// get a text string from a map by key 60 pub fn getString(self: Value, key: []const u8) ?[]const u8 { 61 const v = self.get(key) orelse return null; 62 return switch (v) { 63 .text => |s| s, 64 else => null, 65 }; 66 } 67 68 /// get an integer from a map by key 69 pub fn getInt(self: Value, key: []const u8) ?i64 { 70 const v = self.get(key) orelse return null; 71 return switch (v) { 72 .unsigned => |u| std.math.cast(i64, u), 73 .negative => |n| n, 74 else => null, 75 }; 76 } 77 78 /// get an unsigned integer from a map by key 79 pub fn getUint(self: Value, key: []const u8) ?u64 { 80 const v = self.get(key) orelse return null; 81 return switch (v) { 82 .unsigned => |u| u, 83 .negative => |n| std.math.cast(u64, n), 84 else => null, 85 }; 86 } 87 88 /// get a bool from a map by key 89 pub fn getBool(self: Value, key: []const u8) ?bool { 90 const v = self.get(key) orelse return null; 91 return switch (v) { 92 .boolean => |b| b, 93 else => null, 94 }; 95 } 96 97 /// get a byte string from a map by key 98 pub fn getBytes(self: Value, key: []const u8) ?[]const u8 { 99 const v = self.get(key) orelse return null; 100 return switch (v) { 101 .bytes => |b| b, 102 else => null, 103 }; 104 } 105 106 /// get an array from a map by key 107 pub fn getArray(self: Value, key: []const u8) ?[]const Value { 108 const v = self.get(key) orelse return null; 109 return switch (v) { 110 .array => |a| a, 111 else => null, 112 }; 113 } 114 115 /// get a CID from a map by key 116 pub fn getCid(self: Value, key: []const u8) ?Cid { 117 const v = self.get(key) orelse return null; 118 return switch (v) { 119 .cid => |c| c, 120 else => null, 121 }; 122 } 123 124 // verify the Value union stayed slim after Cid optimization (was ~64, now 24) 125 comptime { 126 std.debug.assert(@sizeOf(Value) == 24); 127 std.debug.assert(@sizeOf(MapEntry) == 40); 128 } 129}; 130 131/// well-known multicodec values 132pub const Codec = struct { 133 pub const dag_cbor: u64 = 0x71; 134 pub const dag_pb: u64 = 0x70; 135 pub const raw: u64 = 0x55; 136}; 137 138/// well-known multihash function codes 139pub const HashFn = struct { 140 pub const sha2_256: u64 = 0x12; 141 pub const identity: u64 = 0x00; 142}; 143 144/// CID (Content Identifier) parsed from tag 42. 145/// stores only the raw bytes — version/codec/hash_fn/digest are parsed lazily on demand. 146/// this keeps the struct at 16 bytes (1 slice) instead of 56 bytes, which shrinks 147/// the Value union from ~64 to ~24 bytes. 148pub const Cid = struct { 149 raw: []const u8, 150 151 /// parse CID version from raw bytes (0 for CIDv0, 1+ for CIDv1) 152 pub fn version(self: Cid) ?u64 { 153 if (self.raw.len < 2) return null; 154 // CIDv0: starts with 0x12 0x20 (sha2-256 multihash) 155 if (self.raw[0] == 0x12 and self.raw[1] == 0x20) return 0; 156 var pos: usize = 0; 157 return readUvarint(self.raw, &pos); 158 } 159 160 /// parse codec from raw bytes (implicit dag-pb for CIDv0) 161 pub fn codec(self: Cid) ?u64 { 162 if (self.raw.len < 2) return null; 163 if (self.raw[0] == 0x12 and self.raw[1] == 0x20) return 0x70; // dag-pb 164 var pos: usize = 0; 165 _ = readUvarint(self.raw, &pos) orelse return null; // version 166 return readUvarint(self.raw, &pos); 167 } 168 169 /// parse hash function code from raw bytes 170 pub fn hashFn(self: Cid) ?u64 { 171 if (self.raw.len < 2) return null; 172 if (self.raw[0] == 0x12 and self.raw[1] == 0x20) return 0x12; // sha2-256 173 var pos: usize = 0; 174 _ = readUvarint(self.raw, &pos) orelse return null; // version 175 _ = readUvarint(self.raw, &pos) orelse return null; // codec 176 return readUvarint(self.raw, &pos); 177 } 178 179 /// parse digest bytes from raw CID 180 pub fn digest(self: Cid) ?[]const u8 { 181 if (self.raw.len < 2) return null; 182 if (self.raw[0] == 0x12 and self.raw[1] == 0x20) { 183 if (self.raw.len < 34) return null; 184 return self.raw[2..34]; 185 } 186 var pos: usize = 0; 187 _ = readUvarint(self.raw, &pos) orelse return null; // version 188 _ = readUvarint(self.raw, &pos) orelse return null; // codec 189 _ = readUvarint(self.raw, &pos) orelse return null; // hash_fn 190 const digest_len = readUvarint(self.raw, &pos) orelse return null; 191 if (pos + digest_len > self.raw.len) return null; 192 return self.raw[pos..][0..digest_len]; 193 } 194 195 /// create a CIDv1 by hashing DAG-CBOR encoded data with SHA-256. 196 /// the returned Cid's raw slice is owned by the allocator. 197 pub fn forDagCbor(allocator: Allocator, data: []const u8) !Cid { 198 return create(allocator, 1, Codec.dag_cbor, HashFn.sha2_256, data); 199 } 200 201 /// create a CIDv1 with the given codec by hashing data with SHA-256. 202 pub fn create(allocator: Allocator, ver: u64, cod: u64, hash_fn_code: u64, data: []const u8) !Cid { 203 const Sha256 = std.crypto.hash.sha2.Sha256; 204 var hash: [Sha256.digest_length]u8 = undefined; 205 Sha256.hash(data, &hash, .{}); 206 207 // build CID on the stack then copy to allocator — avoids dynamic writer 208 // overhead. max varint size is 10 bytes × 4 fields + 32 byte hash = 72 bytes. 209 var buf: [72]u8 = undefined; 210 var w: std.Io.Writer = .fixed(&buf); 211 writeUvarint(&w, ver) catch unreachable; 212 writeUvarint(&w, cod) catch unreachable; 213 writeUvarint(&w, hash_fn_code) catch unreachable; 214 writeUvarint(&w, Sha256.digest_length) catch unreachable; 215 w.writeAll(&hash) catch unreachable; 216 217 const raw = try allocator.dupe(u8, w.buffered()); 218 return .{ .raw = raw }; 219 } 220 221 /// serialize this CID to raw bytes (version varint + codec varint + multihash) 222 pub fn toBytes(self: Cid, allocator: Allocator) ![]u8 { 223 return try allocator.dupe(u8, self.raw); 224 } 225}; 226 227pub const DecodeError = error{ 228 UnexpectedEof, 229 IndefiniteLength, 230 UnsupportedSimpleValue, 231 UnsupportedFloat, 232 InvalidMapKey, 233 InvalidCid, 234 ReservedAdditionalInfo, 235 Overflow, 236 OutOfMemory, 237 NonMinimalEncoding, 238 TrailingBytes, 239 UnsupportedTag, 240 UnsortedMapKeys, 241 DuplicateMapKey, 242 InvalidUtf8, 243 MaxDepthExceeded, 244 WrongType, 245}; 246 247/// maximum nesting depth for arrays/maps to prevent stack overflow 248pub const max_depth: usize = 128; 249 250/// decode a single CBOR value from the front of `data`. 251/// returns the value and the number of bytes consumed. 252pub fn decode(allocator: Allocator, data: []const u8) DecodeError!struct { value: Value, consumed: usize } { 253 var pos: usize = 0; 254 const value = try decodeAt(allocator, data, &pos, 0); 255 return .{ .value = value, .consumed = pos }; 256} 257 258/// decode all bytes as a single CBOR value, rejecting trailing bytes 259pub fn decodeAll(allocator: Allocator, data: []const u8) DecodeError!Value { 260 var pos: usize = 0; 261 const value = try decodeAt(allocator, data, &pos, 0); 262 if (pos != data.len) return error.TrailingBytes; 263 return value; 264} 265 266fn decodeAt(allocator: Allocator, data: []const u8, pos: *usize, depth: usize) DecodeError!Value { 267 if (pos.* >= data.len) return error.UnexpectedEof; 268 const initial = data[pos.*]; 269 const major: u3 = @truncate(initial >> 5); 270 const additional: u5 = @truncate(initial); 271 272 // simple values (major 7) are handled without readArg since floats 273 // use additional 25/26/27 to mean float16/32/64, not integer arguments 274 if (major == 7) { 275 pos.* += 1; 276 return switch (additional) { 277 20 => .{ .boolean = false }, 278 21 => .{ .boolean = true }, 279 22 => .null, 280 25, 26, 27 => error.UnsupportedFloat, // DAG-CBOR forbids floats in AT Protocol 281 31 => error.IndefiniteLength, // break code — DAG-CBOR forbids indefinite lengths 282 else => error.UnsupportedSimpleValue, 283 }; 284 } 285 286 const arg = try readArg(data, pos.*); 287 pos.* = arg.end; 288 289 return switch (@as(MajorType, @enumFromInt(major))) { 290 .unsigned => .{ .unsigned = arg.val }, 291 .negative => blk: { 292 // negative CBOR: value is -1 - val 293 if (arg.val > std.math.maxInt(i64)) return error.Overflow; 294 break :blk .{ .negative = -1 - @as(i64, @intCast(arg.val)) }; 295 }, 296 .byte_string => blk: { 297 const len = std.math.cast(usize, arg.val) orelse return error.UnexpectedEof; 298 const end = std.math.add(usize, pos.*, len) catch return error.UnexpectedEof; 299 if (end > data.len) return error.UnexpectedEof; 300 const bytes = data[pos.*..end]; 301 pos.* = end; 302 break :blk .{ .bytes = bytes }; 303 }, 304 .text_string => blk: { 305 const len = std.math.cast(usize, arg.val) orelse return error.UnexpectedEof; 306 const end = std.math.add(usize, pos.*, len) catch return error.UnexpectedEof; 307 if (end > data.len) return error.UnexpectedEof; 308 const text = data[pos.*..end]; 309 if (!std.unicode.utf8ValidateSlice(text)) return error.InvalidUtf8; 310 pos.* = end; 311 break :blk .{ .text = text }; 312 }, 313 .array => blk: { 314 if (depth >= max_depth) return error.MaxDepthExceeded; 315 // sanity check: each element is at least 1 byte 316 if (arg.val > data.len - pos.*) return error.UnexpectedEof; 317 const items = try allocator.alloc(Value, @intCast(arg.val)); 318 errdefer allocator.free(items); 319 for (items) |*item| { 320 item.* = try decodeAt(allocator, data, pos, depth + 1); 321 } 322 break :blk .{ .array = items }; 323 }, 324 .map => blk: { 325 if (depth >= max_depth) return error.MaxDepthExceeded; 326 // sanity check: each entry is at least 2 bytes (key + value) 327 if (arg.val > (data.len - pos.*) / 2) return error.UnexpectedEof; 328 const entries = try allocator.alloc(Value.MapEntry, @intCast(arg.val)); 329 errdefer allocator.free(entries); 330 for (entries, 0..) |*entry, i| { 331 // DAG-CBOR: map keys must be text strings — inline read to avoid 332 // a full decodeAt + Value union construction per key 333 const key_arg = try readArg(data, pos.*); 334 pos.* = key_arg.end; 335 if (key_arg.major != 3) return error.InvalidMapKey; 336 const key_len = std.math.cast(usize, key_arg.val) orelse return error.UnexpectedEof; 337 const key_end = std.math.add(usize, pos.*, key_len) catch return error.UnexpectedEof; 338 if (key_end > data.len) return error.UnexpectedEof; 339 entry.key = data[pos.*..key_end]; 340 if (!std.unicode.utf8ValidateSlice(entry.key)) return error.InvalidUtf8; 341 pos.* = key_end; 342 343 // DAG-CBOR: keys must be sorted (shorter first, then lex) and unique 344 if (i > 0) { 345 const prev = entries[i - 1].key; 346 if (prev.len < entry.key.len) { 347 // ok — shorter key first 348 } else if (prev.len == entry.key.len) { 349 switch (std.mem.order(u8, prev, entry.key)) { 350 .lt => {}, // ok — lex order 351 .eq => return error.DuplicateMapKey, 352 .gt => return error.UnsortedMapKeys, 353 } 354 } else { 355 return error.UnsortedMapKeys; 356 } 357 } 358 359 entry.value = try decodeAt(allocator, data, pos, depth + 1); 360 } 361 break :blk .{ .map = entries }; 362 }, 363 .tag => blk: { 364 if (arg.val != 42) return error.UnsupportedTag; // DAG-CBOR only allows tag 42 (CID) 365 // CID link — content is a byte string with 0x00 prefix 366 const content = try decodeAt(allocator, data, pos, depth); 367 const cid_bytes = switch (content) { 368 .bytes => |b| b, 369 else => return error.InvalidCid, 370 }; 371 // CID byte string must have 0x00 identity multibase prefix + at least 372 // version byte + codec byte (minimum 3 bytes total) 373 if (cid_bytes.len < 3 or cid_bytes[0] != 0x00) return error.InvalidCid; 374 break :blk .{ .cid = .{ .raw = cid_bytes[1..] } }; // zero-cost: just reference the bytes 375 }, 376 .simple => unreachable, // handled above 377 }; 378} 379 380/// wrap raw CID bytes (after removing the 0x00 multibase prefix) into a Cid. 381/// does not validate the CID structure — call version()/codec()/digest() to parse lazily. 382pub fn parseCid(raw: []const u8) Cid { 383 return .{ .raw = raw }; 384} 385 386/// read an unsigned varint (LEB128). rejects varints longer than 10 bytes 387/// and rejects overflow (10th byte must have value <= 1). 388pub fn readUvarint(data: []const u8, pos: *usize) ?u64 { 389 var result: u64 = 0; 390 var shift: u7 = 0; 391 for (0..10) |i| { 392 if (pos.* >= data.len) return null; 393 const byte = data[pos.*]; 394 pos.* += 1; 395 // 10th byte (i=9, shift=63): only bit 0 can fit in u64 396 if (i == 9 and byte > 1) return null; 397 result |= @as(u64, byte & 0x7f) << @as(u6, @intCast(shift)); 398 if (byte & 0x80 == 0) return result; 399 shift += 7; 400 } 401 return null; // varint too long 402} 403 404// === encoder === 405 406pub const EncodeError = error{ 407 OutOfMemory, 408}; 409 410/// write the CBOR initial byte + argument using shortest encoding (DAG-CBOR requirement). 411/// batches all bytes into a single writeAll call to minimize writer dispatch overhead. 412fn writeArgument(writer: anytype, major: u3, val: u64) !void { 413 const prefix: u8 = @as(u8, major) << 5; 414 if (val < 24) { 415 try writer.writeAll(&.{prefix | @as(u8, @intCast(val))}); 416 } else if (val <= 0xff) { 417 try writer.writeAll(&.{ prefix | 24, @as(u8, @intCast(val)) }); 418 } else if (val <= 0xffff) { 419 const v: u16 = @intCast(val); 420 try writer.writeAll(&.{ prefix | 25, @truncate(v >> 8), @truncate(v) }); 421 } else if (val <= 0xffffffff) { 422 const v: u32 = @intCast(val); 423 try writer.writeAll(&.{ 424 prefix | 26, 425 @truncate(v >> 24), 426 @truncate(v >> 16), 427 @truncate(v >> 8), 428 @truncate(v), 429 }); 430 } else { 431 try writer.writeAll(&.{ 432 prefix | 27, 433 @truncate(val >> 56), 434 @truncate(val >> 48), 435 @truncate(val >> 40), 436 @truncate(val >> 32), 437 @truncate(val >> 24), 438 @truncate(val >> 16), 439 @truncate(val >> 8), 440 @truncate(val), 441 }); 442 } 443} 444 445/// check if map entries are already in DAG-CBOR key order 446fn keysAlreadySorted(entries: []const Value.MapEntry) bool { 447 if (entries.len <= 1) return true; 448 var prev = entries[0].key; 449 for (entries[1..]) |entry| { 450 if (prev.len > entry.key.len) return false; 451 if (prev.len == entry.key.len and std.mem.order(u8, prev, entry.key) != .lt) return false; 452 prev = entry.key; 453 } 454 return true; 455} 456 457/// DAG-CBOR map key ordering: shorter keys first, then lexicographic 458fn dagCborKeyLessThan(_: void, a: Value.MapEntry, b: Value.MapEntry) bool { 459 if (a.key.len != b.key.len) return a.key.len < b.key.len; 460 return std.mem.order(u8, a.key, b.key) == .lt; 461} 462 463/// write a short text string (< 24 bytes) as a single fused write. 464/// this is the hot path for map keys in AT Protocol records, where keys 465/// are always short ASCII strings. fusing header+payload into one writeAll 466/// halves the writer dispatch count. 467fn writeShortText(writer: anytype, text: []const u8) !void { 468 if (text.len < 24) { 469 var buf: [24]u8 = undefined; 470 buf[0] = 0x60 | @as(u8, @intCast(text.len)); 471 @memcpy(buf[1..][0..text.len], text); 472 try writer.writeAll(buf[0 .. 1 + text.len]); 473 } else { 474 try writeArgument(writer, 3, text.len); 475 try writer.writeAll(text); 476 } 477} 478 479/// encode a Value to the given writer in DAG-CBOR format. 480/// allocator is needed for sorting map keys during encoding. 481pub fn encode(allocator: Allocator, writer: anytype, value: Value) !void { 482 switch (value) { 483 .unsigned => |v| try writeArgument(writer, 0, v), 484 .negative => |v| { 485 // CBOR negative: -1 - n encoded in major type 1 486 const raw: u64 = @intCast(-1 - v); 487 try writeArgument(writer, 1, raw); 488 }, 489 .bytes => |b| { 490 try writeArgument(writer, 2, b.len); 491 try writer.writeAll(b); 492 }, 493 .text => |t| try writeShortText(writer, t), 494 .array => |items| { 495 try writeArgument(writer, 4, items.len); 496 for (items) |item| { 497 try encode(allocator, writer, item); 498 } 499 }, 500 .map => |entries| { 501 try writeArgument(writer, 5, entries.len); 502 // DAG-CBOR: keys sorted by byte length, then lexicographically. 503 // three paths: already sorted (common for decoded data), stack sort 504 // for small maps (≤16 entries, covers all AT Protocol records), or 505 // heap sort for rare large maps. 506 if (keysAlreadySorted(entries)) { 507 for (entries) |entry| { 508 try writeShortText(writer, entry.key); 509 try encode(allocator, writer, entry.value); 510 } 511 } else if (entries.len <= 16) { 512 var buf: [16]Value.MapEntry = undefined; 513 const sorted = buf[0..entries.len]; 514 @memcpy(sorted, entries); 515 std.mem.sort(Value.MapEntry, sorted, {}, dagCborKeyLessThan); 516 for (sorted) |entry| { 517 try writeShortText(writer, entry.key); 518 try encode(allocator, writer, entry.value); 519 } 520 } else { 521 const sorted = try allocator.dupe(Value.MapEntry, entries); 522 defer allocator.free(sorted); 523 std.mem.sort(Value.MapEntry, sorted, {}, dagCborKeyLessThan); 524 for (sorted) |entry| { 525 try writeShortText(writer, entry.key); 526 try encode(allocator, writer, entry.value); 527 } 528 } 529 }, 530 .boolean => |b| try writer.writeByte(if (b) @as(u8, 0xf5) else @as(u8, 0xf4)), 531 .null => try writer.writeByte(0xf6), 532 .cid => |c| { 533 // tag 42 + byte string with 0x00 identity multibase prefix + raw CID bytes 534 try writeArgument(writer, 6, 42); 535 try writeArgument(writer, 2, 1 + c.raw.len); 536 try writer.writeByte(0x00); 537 try writer.writeAll(c.raw); 538 }, 539 } 540} 541 542/// encode a Value to a freshly allocated byte slice 543pub fn encodeAlloc(allocator: Allocator, value: Value) ![]u8 { 544 var aw: std.Io.Writer.Allocating = .init(allocator); 545 errdefer aw.deinit(); 546 try encode(allocator, &aw.writer, value); 547 return try aw.toOwnedSlice(); 548} 549 550/// write an unsigned varint (LEB128) — used for CID and CAR serialization 551pub fn writeUvarint(writer: anytype, val: u64) !void { 552 var v = val; 553 while (v >= 0x80) { 554 try writer.writeByte(@as(u8, @truncate(v)) | 0x80); 555 v >>= 7; 556 } 557 try writer.writeByte(@as(u8, @truncate(v))); 558} 559 560/// Result of reading a CBOR initial byte and its argument. 561pub const Arg = struct { 562 major: u3, 563 val: u64, 564 end: usize, 565}; 566 567/// Read a CBOR initial byte at `pos`, parse the argument value from 568/// additional info + following bytes, and return the major type (high 3 bits), 569/// argument value, and position after the header. 570/// 571/// Validates shortest-form encoding (DAG-CBOR requirement). 572/// This is the public, value-semantics equivalent of the internal `readArgument`. 573pub fn readArg(data: []const u8, pos: usize) DecodeError!Arg { 574 if (pos >= data.len) return error.UnexpectedEof; 575 const initial = data[pos]; 576 const major: u3 = @truncate(initial >> 5); 577 const additional: u5 = @truncate(initial); 578 var cur = pos + 1; 579 const val: u64 = switch (additional) { 580 0...23 => @as(u64, additional), 581 24 => blk: { // 1-byte 582 if (cur >= data.len) return error.UnexpectedEof; 583 const v = data[cur]; 584 cur += 1; 585 if (v < 24) return error.NonMinimalEncoding; 586 break :blk @as(u64, v); 587 }, 588 25 => blk: { // 2-byte big-endian 589 if (cur + 2 > data.len) return error.UnexpectedEof; 590 const v = std.mem.readInt(u16, data[cur..][0..2], .big); 591 cur += 2; 592 if (v <= 0xff) return error.NonMinimalEncoding; 593 break :blk @as(u64, v); 594 }, 595 26 => blk: { // 4-byte big-endian 596 if (cur + 4 > data.len) return error.UnexpectedEof; 597 const v = std.mem.readInt(u32, data[cur..][0..4], .big); 598 cur += 4; 599 if (v <= 0xffff) return error.NonMinimalEncoding; 600 break :blk @as(u64, v); 601 }, 602 27 => blk: { // 8-byte big-endian 603 if (cur + 8 > data.len) return error.UnexpectedEof; 604 const v = std.mem.readInt(u64, data[cur..][0..8], .big); 605 cur += 8; 606 if (v <= 0xffffffff) return error.NonMinimalEncoding; 607 break :blk v; 608 }, 609 28, 29, 30 => return error.ReservedAdditionalInfo, 610 31 => return error.IndefiniteLength, 611 }; 612 return .{ .major = major, .val = val, .end = cur }; 613} 614 615// --------------------------------------------------------------------------- 616// Type-specific readers — zero-copy, no allocator needed 617// --------------------------------------------------------------------------- 618 619pub const SliceResult = struct { val: []const u8, end: usize }; 620pub const U64Result = struct { val: u64, end: usize }; 621pub const I64Result = struct { val: i64, end: usize }; 622pub const BoolResult = struct { val: bool, end: usize }; 623 624/// Read a CBOR text string (major type 3) at `pos`. 625/// Validates UTF-8. Returns a zero-copy slice into `data`. 626pub fn readText(data: []const u8, pos: usize) DecodeError!SliceResult { 627 const arg = try readArg(data, pos); 628 if (arg.major != 3) return error.WrongType; 629 const len = std.math.cast(usize, arg.val) orelse return error.UnexpectedEof; 630 const end = std.math.add(usize, arg.end, len) catch return error.UnexpectedEof; 631 if (end > data.len) return error.UnexpectedEof; 632 const text = data[arg.end..end]; 633 if (!std.unicode.utf8ValidateSlice(text)) return error.InvalidUtf8; 634 return .{ .val = text, .end = end }; 635} 636 637/// Read a CBOR byte string (major type 2) at `pos`. 638/// Returns a zero-copy slice into `data`. 639pub fn readBytes(data: []const u8, pos: usize) DecodeError!SliceResult { 640 const arg = try readArg(data, pos); 641 if (arg.major != 2) return error.WrongType; 642 const len = std.math.cast(usize, arg.val) orelse return error.UnexpectedEof; 643 const end = std.math.add(usize, arg.end, len) catch return error.UnexpectedEof; 644 if (end > data.len) return error.UnexpectedEof; 645 return .{ .val = data[arg.end..end], .end = end }; 646} 647 648/// Read a CBOR unsigned integer (major type 0) at `pos`. 649pub fn readUint(data: []const u8, pos: usize) DecodeError!U64Result { 650 const arg = try readArg(data, pos); 651 if (arg.major != 0) return error.WrongType; 652 return .{ .val = arg.val, .end = arg.end }; 653} 654 655/// Read a CBOR integer (major type 0 or 1) at `pos`. 656/// Major 0 = positive, major 1 = negative (-1 - val). 657/// Returns error.Overflow if a positive value exceeds maxInt(i64). 658pub fn readInt(data: []const u8, pos: usize) DecodeError!I64Result { 659 const arg = try readArg(data, pos); 660 switch (arg.major) { 661 0 => { 662 if (arg.val > @as(u64, @intCast(std.math.maxInt(i64)))) return error.Overflow; 663 return .{ .val = @intCast(arg.val), .end = arg.end }; 664 }, 665 1 => { 666 // CBOR negative: -1 - val 667 // val can be 0..2^64-1, result is -1..-2^64 668 // i64 can hold down to -2^63, so max raw val is 2^63 - 1 669 if (arg.val > @as(u64, @intCast(std.math.maxInt(i64)))) return error.Overflow; 670 return .{ .val = -1 - @as(i64, @intCast(arg.val)), .end = arg.end }; 671 }, 672 else => return error.WrongType, 673 } 674} 675 676/// Read a CBOR boolean at `pos`. 677/// 0xf4 = false, 0xf5 = true. 678pub fn readBool(data: []const u8, pos: usize) DecodeError!BoolResult { 679 if (pos >= data.len) return error.UnexpectedEof; 680 return switch (data[pos]) { 681 0xf4 => .{ .val = false, .end = pos + 1 }, 682 0xf5 => .{ .val = true, .end = pos + 1 }, 683 else => error.WrongType, 684 }; 685} 686 687/// Read a CBOR null at `pos`. 688/// 0xf6 = null. Returns position after the null byte. 689pub fn readNull(data: []const u8, pos: usize) DecodeError!usize { 690 if (pos >= data.len) return error.UnexpectedEof; 691 if (data[pos] != 0xf6) return error.WrongType; 692 return pos + 1; 693} 694 695/// Read a CBOR map header (major type 5) at `pos`. 696/// Returns the entry count. 697pub fn readMapHeader(data: []const u8, pos: usize) DecodeError!U64Result { 698 const arg = try readArg(data, pos); 699 if (arg.major != 5) return error.WrongType; 700 return .{ .val = arg.val, .end = arg.end }; 701} 702 703/// Read a CBOR array header (major type 4) at `pos`. 704/// Returns the element count. 705pub fn readArrayHeader(data: []const u8, pos: usize) DecodeError!U64Result { 706 const arg = try readArg(data, pos); 707 if (arg.major != 4) return error.WrongType; 708 return .{ .val = arg.val, .end = arg.end }; 709} 710 711/// Read a DAG-CBOR CID link at `pos`. 712/// Expects tag(42) followed by a byte string with a 0x00 identity multibase prefix. 713/// Returns the raw CID bytes (after the 0x00 prefix) as a zero-copy slice. 714pub fn readCidLink(data: []const u8, pos: usize) DecodeError!SliceResult { 715 // Read the tag header — must be tag(42) 716 const tag_arg = try readArg(data, pos); 717 if (tag_arg.major != 6 or tag_arg.val != 42) return error.WrongType; 718 // Read the inner byte string 719 const bytes_result = try readBytes(data, tag_arg.end); 720 const payload = bytes_result.val; 721 // Must have 0x00 prefix + at least version byte + codec byte (min 3 bytes) 722 if (payload.len < 3 or payload[0] != 0x00) return error.InvalidCid; 723 return .{ .val = payload[1..], .end = bytes_result.end }; 724} 725 726// --------------------------------------------------------------------------- 727// Streaming helpers — skip / peek without full decode 728// --------------------------------------------------------------------------- 729 730/// Skip one CBOR value at `pos` without decoding it. Returns the position 731/// after the skipped value. Iterative (not recursive) using a small stack 732/// for nested containers. Zero allocation. 733pub fn skipValue(data: []const u8, pos: usize) DecodeError!usize { 734 const max_stack = 32; 735 var stack: [max_stack]u64 = undefined; 736 var depth: usize = 0; 737 var cur = pos; 738 739 while (true) { 740 const arg = try readArg(data, cur); 741 cur = arg.end; 742 743 switch (arg.major) { 744 0, 1 => { 745 // integers: header only, nothing to skip after readArg 746 }, 747 2, 3 => { 748 // byte string / text string: skip `val` bytes of payload 749 const len = std.math.cast(usize, arg.val) orelse return error.UnexpectedEof; 750 cur = std.math.add(usize, cur, len) catch return error.UnexpectedEof; 751 if (cur > data.len) return error.UnexpectedEof; 752 }, 753 4 => { 754 // array: push element count 755 if (arg.val > 0) { 756 if (depth >= max_stack) return error.MaxDepthExceeded; 757 stack[depth] = arg.val; 758 depth += 1; 759 continue; // don't decrement — we haven't consumed an element yet 760 } 761 }, 762 5 => { 763 // map: push key+value count (2 per entry) 764 if (arg.val > 0) { 765 if (depth >= max_stack) return error.MaxDepthExceeded; 766 stack[depth] = std.math.mul(u64, arg.val, 2) catch return error.Overflow; 767 depth += 1; 768 continue; 769 } 770 }, 771 6 => { 772 // tag: the tagged value follows immediately — loop to read it 773 // don't push anything, don't decrement 774 continue; 775 }, 776 7 => { 777 // simple/float: header only 778 }, 779 } 780 781 // After consuming a value, unwind the stack 782 while (depth > 0) { 783 stack[depth - 1] -= 1; 784 if (stack[depth - 1] > 0) break; 785 depth -= 1; 786 } 787 788 if (depth == 0) return cur; 789 } 790} 791 792/// Peek at the "$type" field in a DAG-CBOR map without full decode. 793/// Returns the type string (zero-copy slice) or null if not found. 794pub fn peekType(data: []const u8) DecodeError!?[]const u8 { 795 return peekTypeAt(data, 0); 796} 797 798/// Peek at the "$type" field starting from a given position. 799pub fn peekTypeAt(data: []const u8, pos: usize) DecodeError!?[]const u8 { 800 const map_header = try readArg(data, pos); 801 if (map_header.major != 5) return null; 802 803 var cur = map_header.end; 804 const count = map_header.val; 805 806 const safe_count = std.math.cast(usize, count) orelse return null; 807 for (0..safe_count) |_| { 808 // Read key — DAG-CBOR keys are always text strings 809 const key = readText(data, cur) catch return null; 810 cur = key.end; 811 812 if (std.mem.eql(u8, key.val, "$type")) { 813 // Read the value as text 814 const val = readText(data, cur) catch return null; 815 return val.val; 816 } 817 818 // Skip the value 819 cur = try skipValue(data, cur); 820 } 821 822 return null; 823} 824 825// === low-level write API === 826 827/// Write CBOR initial byte + argument using shortest encoding. 828/// Returns new position after written bytes. Caller must ensure buf is large enough. 829pub fn writeArg(buf: []u8, pos: usize, major: u3, val: u64) usize { 830 const prefix: u8 = @as(u8, major) << 5; 831 if (val < 24) { 832 buf[pos] = prefix | @as(u8, @intCast(val)); 833 return pos + 1; 834 } else if (val <= 0xff) { 835 buf[pos] = prefix | 24; 836 buf[pos + 1] = @intCast(val); 837 return pos + 2; 838 } else if (val <= 0xffff) { 839 buf[pos] = prefix | 25; 840 const v: u16 = @intCast(val); 841 buf[pos + 1] = @truncate(v >> 8); 842 buf[pos + 2] = @truncate(v); 843 return pos + 3; 844 } else if (val <= 0xffffffff) { 845 buf[pos] = prefix | 26; 846 const v: u32 = @intCast(val); 847 buf[pos + 1] = @truncate(v >> 24); 848 buf[pos + 2] = @truncate(v >> 16); 849 buf[pos + 3] = @truncate(v >> 8); 850 buf[pos + 4] = @truncate(v); 851 return pos + 5; 852 } else { 853 buf[pos] = prefix | 27; 854 buf[pos + 1] = @truncate(val >> 56); 855 buf[pos + 2] = @truncate(val >> 48); 856 buf[pos + 3] = @truncate(val >> 40); 857 buf[pos + 4] = @truncate(val >> 32); 858 buf[pos + 5] = @truncate(val >> 24); 859 buf[pos + 6] = @truncate(val >> 16); 860 buf[pos + 7] = @truncate(val >> 8); 861 buf[pos + 8] = @truncate(val); 862 return pos + 9; 863 } 864} 865 866/// Write CBOR text string header + payload. 867pub fn writeText(buf: []u8, pos: usize, text: []const u8) usize { 868 const p = writeArg(buf, pos, 3, text.len); 869 @memcpy(buf[p..][0..text.len], text); 870 return p + text.len; 871} 872 873/// Write CBOR byte string header + payload. 874pub fn writeBytes(buf: []u8, pos: usize, bytes: []const u8) usize { 875 const p = writeArg(buf, pos, 2, bytes.len); 876 @memcpy(buf[p..][0..bytes.len], bytes); 877 return p + bytes.len; 878} 879 880/// Write unsigned integer (major 0). 881pub fn writeUint(buf: []u8, pos: usize, val: u64) usize { 882 return writeArg(buf, pos, 0, val); 883} 884 885/// Write signed integer. Positive values use major 0, negative values use major 1. 886pub fn writeInt(buf: []u8, pos: usize, val: i64) usize { 887 if (val >= 0) { 888 return writeArg(buf, pos, 0, @intCast(val)); 889 } else { 890 const raw: u64 = @intCast(-1 - val); 891 return writeArg(buf, pos, 1, raw); 892 } 893} 894 895/// Write map header (major 5). 896pub fn writeMapHeader(buf: []u8, pos: usize, count: usize) usize { 897 return writeArg(buf, pos, 5, count); 898} 899 900/// Write array header (major 4). 901pub fn writeArrayHeader(buf: []u8, pos: usize, count: usize) usize { 902 return writeArg(buf, pos, 4, count); 903} 904 905/// Write boolean: 0xf5 (true) or 0xf4 (false). 906pub fn writeBool(buf: []u8, pos: usize, val: bool) usize { 907 buf[pos] = if (val) 0xf5 else 0xf4; 908 return pos + 1; 909} 910 911/// Write null: 0xf6. 912pub fn writeNull(buf: []u8, pos: usize) usize { 913 buf[pos] = 0xf6; 914 return pos + 1; 915} 916 917/// Write tag(42) + byte string with 0x00 prefix + CID raw bytes. 918pub fn writeCidLink(buf: []u8, pos: usize, cid_raw: []const u8) usize { 919 var p = writeArg(buf, pos, 6, 42); 920 p = writeArg(buf, p, 2, 1 + cid_raw.len); 921 buf[p] = 0x00; 922 p += 1; 923 @memcpy(buf[p..][0..cid_raw.len], cid_raw); 924 return p + cid_raw.len; 925} 926 927// === tests === 928 929test "decode unsigned integers" { 930 var arena = std.heap.ArenaAllocator.init(std.testing.allocator); 931 defer arena.deinit(); 932 const alloc = arena.allocator(); 933 934 // 0 935 try std.testing.expectEqual(@as(u64, 0), (try decode(alloc, &.{0x00})).value.unsigned); 936 // 1 937 try std.testing.expectEqual(@as(u64, 1), (try decode(alloc, &.{0x01})).value.unsigned); 938 // 23 939 try std.testing.expectEqual(@as(u64, 23), (try decode(alloc, &.{0x17})).value.unsigned); 940 // 24 (1-byte follows) 941 try std.testing.expectEqual(@as(u64, 24), (try decode(alloc, &.{ 0x18, 24 })).value.unsigned); 942 // 1000 (2-byte follows) 943 try std.testing.expectEqual(@as(u64, 1000), (try decode(alloc, &.{ 0x19, 0x03, 0xe8 })).value.unsigned); 944} 945 946test "decode negative integers" { 947 var arena = std.heap.ArenaAllocator.init(std.testing.allocator); 948 defer arena.deinit(); 949 const alloc = arena.allocator(); 950 951 // -1 (major 1, additional 0) 952 try std.testing.expectEqual(@as(i64, -1), (try decode(alloc, &.{0x20})).value.negative); 953 // -10 954 try std.testing.expectEqual(@as(i64, -10), (try decode(alloc, &.{0x29})).value.negative); 955} 956 957test "decode text strings" { 958 var arena = std.heap.ArenaAllocator.init(std.testing.allocator); 959 defer arena.deinit(); 960 const alloc = arena.allocator(); 961 962 // empty string 963 try std.testing.expectEqualStrings("", (try decode(alloc, &.{0x60})).value.text); 964 // "a" 965 try std.testing.expectEqualStrings("a", (try decode(alloc, &.{ 0x61, 'a' })).value.text); 966 // "hello" 967 try std.testing.expectEqualStrings("hello", (try decode(alloc, &.{ 0x65, 'h', 'e', 'l', 'l', 'o' })).value.text); 968} 969 970test "decode byte strings" { 971 var arena = std.heap.ArenaAllocator.init(std.testing.allocator); 972 defer arena.deinit(); 973 const alloc = arena.allocator(); 974 975 // empty bytes 976 try std.testing.expectEqualSlices(u8, &.{}, (try decode(alloc, &.{0x40})).value.bytes); 977 // 3 bytes 978 try std.testing.expectEqualSlices(u8, &.{ 1, 2, 3 }, (try decode(alloc, &.{ 0x43, 1, 2, 3 })).value.bytes); 979} 980 981test "decode booleans and null" { 982 var arena = std.heap.ArenaAllocator.init(std.testing.allocator); 983 defer arena.deinit(); 984 const alloc = arena.allocator(); 985 986 try std.testing.expectEqual(false, (try decode(alloc, &.{0xf4})).value.boolean); 987 try std.testing.expectEqual(true, (try decode(alloc, &.{0xf5})).value.boolean); 988 try std.testing.expectEqual(Value.null, (try decode(alloc, &.{0xf6})).value); 989} 990 991test "decode array" { 992 var arena = std.heap.ArenaAllocator.init(std.testing.allocator); 993 defer arena.deinit(); 994 const alloc = arena.allocator(); 995 996 // [1, 2, 3] 997 const result = try decode(alloc, &.{ 0x83, 0x01, 0x02, 0x03 }); 998 const arr = result.value.array; 999 try std.testing.expectEqual(@as(usize, 3), arr.len); 1000 try std.testing.expectEqual(@as(u64, 1), arr[0].unsigned); 1001 try std.testing.expectEqual(@as(u64, 2), arr[1].unsigned); 1002 try std.testing.expectEqual(@as(u64, 3), arr[2].unsigned); 1003} 1004 1005test "decode map" { 1006 var arena = std.heap.ArenaAllocator.init(std.testing.allocator); 1007 defer arena.deinit(); 1008 const alloc = arena.allocator(); 1009 1010 // {"a": 1, "b": 2} 1011 const result = try decode(alloc, &.{ 1012 0xa2, // map(2) 1013 0x61, 'a', 0x01, // "a": 1 1014 0x61, 'b', 0x02, // "b": 2 1015 }); 1016 const val = result.value; 1017 try std.testing.expectEqual(@as(u64, 1), val.get("a").?.unsigned); 1018 try std.testing.expectEqual(@as(u64, 2), val.get("b").?.unsigned); 1019} 1020 1021test "decode nested map" { 1022 var arena = std.heap.ArenaAllocator.init(std.testing.allocator); 1023 defer arena.deinit(); 1024 const alloc = arena.allocator(); 1025 1026 // {"t": "#commit", "op": 1} — sorted by key length (1 < 2) 1027 const result = try decode(alloc, &.{ 1028 0xa2, // map(2) 1029 0x61, 't', 0x67, '#', 'c', 'o', 'm', 'm', 'i', 't', // "t": "#commit" 1030 0x62, 'o', 'p', 0x01, // "op": 1 1031 }); 1032 const val = result.value; 1033 try std.testing.expectEqual(@as(u64, 1), val.get("op").?.unsigned); 1034 try std.testing.expectEqualStrings("#commit", val.getString("t").?); 1035} 1036 1037test "consumed bytes tracking" { 1038 var arena = std.heap.ArenaAllocator.init(std.testing.allocator); 1039 defer arena.deinit(); 1040 const alloc = arena.allocator(); 1041 1042 // two concatenated CBOR values: 1, 2 1043 const data = &[_]u8{ 0x01, 0x02 }; 1044 const first = try decode(alloc, data); 1045 try std.testing.expectEqual(@as(u64, 1), first.value.unsigned); 1046 try std.testing.expectEqual(@as(usize, 1), first.consumed); 1047 1048 const second = try decode(alloc, data[first.consumed..]); 1049 try std.testing.expectEqual(@as(u64, 2), second.value.unsigned); 1050} 1051 1052test "reject floats" { 1053 var arena = std.heap.ArenaAllocator.init(std.testing.allocator); 1054 defer arena.deinit(); 1055 const alloc = arena.allocator(); 1056 1057 // half-float (f16) 1058 try std.testing.expectError(error.UnsupportedFloat, decode(alloc, &.{ 0xf9, 0x00, 0x00 })); 1059} 1060 1061test "Value helper methods" { 1062 var arena = std.heap.ArenaAllocator.init(std.testing.allocator); 1063 defer arena.deinit(); 1064 const alloc = arena.allocator(); 1065 1066 const result = try decode(alloc, &.{ 1067 0xa3, // map(3) 1068 0x63, 'a', 'g', 'e', 0x18, 30, // "age": 30 (3 bytes, shortest) 1069 0x64, 'n', 'a', 'm', 'e', 0x65, 'a', 'l', 'i', 'c', 'e', // "name": "alice" (4 bytes) 1070 0x66, 'a', 'c', 't', 'i', 'v', 'e', 0xf5, // "active": true (6 bytes) 1071 }); 1072 const val = result.value; 1073 try std.testing.expectEqualStrings("alice", val.getString("name").?); 1074 try std.testing.expectEqual(@as(i64, 30), val.getInt("age").?); 1075 try std.testing.expectEqual(true, val.getBool("active").?); 1076 try std.testing.expect(val.getString("missing") == null); 1077} 1078 1079// === encoder tests === 1080 1081test "encode unsigned integers" { 1082 var buf: [16]u8 = undefined; 1083 var w: std.Io.Writer = .fixed(&buf); 1084 const alloc = std.testing.allocator; 1085 1086 // 0 → single byte 1087 try encode(alloc, &w, .{ .unsigned = 0 }); 1088 try std.testing.expectEqualSlices(u8, &.{0x00}, w.buffered()); 1089 1090 w.end = 0; 1091 try encode(alloc, &w, .{ .unsigned = 23 }); 1092 try std.testing.expectEqualSlices(u8, &.{0x17}, w.buffered()); 1093 1094 // 24 → 2 bytes (shortest encoding) 1095 w.end = 0; 1096 try encode(alloc, &w, .{ .unsigned = 24 }); 1097 try std.testing.expectEqualSlices(u8, &.{ 0x18, 24 }, w.buffered()); 1098 1099 // 1000 → 3 bytes 1100 w.end = 0; 1101 try encode(alloc, &w, .{ .unsigned = 1000 }); 1102 try std.testing.expectEqualSlices(u8, &.{ 0x19, 0x03, 0xe8 }, w.buffered()); 1103} 1104 1105test "encode negative integers" { 1106 var buf: [16]u8 = undefined; 1107 var w: std.Io.Writer = .fixed(&buf); 1108 const alloc = std.testing.allocator; 1109 1110 // -1 → major 1, additional 0 1111 try encode(alloc, &w, .{ .negative = -1 }); 1112 try std.testing.expectEqualSlices(u8, &.{0x20}, w.buffered()); 1113 1114 w.end = 0; 1115 try encode(alloc, &w, .{ .negative = -10 }); 1116 try std.testing.expectEqualSlices(u8, &.{0x29}, w.buffered()); 1117} 1118 1119test "encode text strings" { 1120 var buf: [64]u8 = undefined; 1121 var w: std.Io.Writer = .fixed(&buf); 1122 const alloc = std.testing.allocator; 1123 1124 try encode(alloc, &w, .{ .text = "" }); 1125 try std.testing.expectEqualSlices(u8, &.{0x60}, w.buffered()); 1126 1127 w.end = 0; 1128 try encode(alloc, &w, .{ .text = "hello" }); 1129 try std.testing.expectEqualSlices(u8, &.{ 0x65, 'h', 'e', 'l', 'l', 'o' }, w.buffered()); 1130} 1131 1132test "encode byte strings" { 1133 var buf: [64]u8 = undefined; 1134 var w: std.Io.Writer = .fixed(&buf); 1135 const alloc = std.testing.allocator; 1136 1137 try encode(alloc, &w, .{ .bytes = &.{} }); 1138 try std.testing.expectEqualSlices(u8, &.{0x40}, w.buffered()); 1139 1140 w.end = 0; 1141 try encode(alloc, &w, .{ .bytes = &.{ 1, 2, 3 } }); 1142 try std.testing.expectEqualSlices(u8, &.{ 0x43, 1, 2, 3 }, w.buffered()); 1143} 1144 1145test "encode booleans and null" { 1146 var buf: [4]u8 = undefined; 1147 var w: std.Io.Writer = .fixed(&buf); 1148 const alloc = std.testing.allocator; 1149 1150 try encode(alloc, &w, .{ .boolean = false }); 1151 try std.testing.expectEqualSlices(u8, &.{0xf4}, w.buffered()); 1152 1153 w.end = 0; 1154 try encode(alloc, &w, .{ .boolean = true }); 1155 try std.testing.expectEqualSlices(u8, &.{0xf5}, w.buffered()); 1156 1157 w.end = 0; 1158 try encode(alloc, &w, .null); 1159 try std.testing.expectEqualSlices(u8, &.{0xf6}, w.buffered()); 1160} 1161 1162test "encode array" { 1163 var buf: [64]u8 = undefined; 1164 var w: std.Io.Writer = .fixed(&buf); 1165 const alloc = std.testing.allocator; 1166 1167 // [1, 2, 3] 1168 try encode(alloc, &w, .{ .array = &.{ 1169 .{ .unsigned = 1 }, 1170 .{ .unsigned = 2 }, 1171 .{ .unsigned = 3 }, 1172 } }); 1173 try std.testing.expectEqualSlices(u8, &.{ 0x83, 0x01, 0x02, 0x03 }, w.buffered()); 1174} 1175 1176test "encode map with DAG-CBOR key sorting" { 1177 var buf: [128]u8 = undefined; 1178 var w: std.Io.Writer = .fixed(&buf); 1179 const alloc = std.testing.allocator; 1180 1181 // keys provided unsorted — encoder must sort by length, then lex 1182 // "bb" (len 2), "a" (len 1), "cc" (len 2) → sorted: "a", "bb", "cc" 1183 try encode(alloc, &w, .{ .map = &.{ 1184 .{ .key = "bb", .value = .{ .unsigned = 2 } }, 1185 .{ .key = "a", .value = .{ .unsigned = 1 } }, 1186 .{ .key = "cc", .value = .{ .unsigned = 3 } }, 1187 } }); 1188 1189 const expected = &[_]u8{ 1190 0xa3, // map(3) 1191 0x61, 'a', 0x01, // "a": 1 (shortest key first) 1192 0x62, 'b', 'b', 0x02, // "bb": 2 (same length, lex order) 1193 0x62, 'c', 'c', 0x03, // "cc": 3 1194 }; 1195 try std.testing.expectEqualSlices(u8, expected, w.buffered()); 1196} 1197 1198test "round-trip encode → decode" { 1199 var arena = std.heap.ArenaAllocator.init(std.testing.allocator); 1200 defer arena.deinit(); 1201 const alloc = arena.allocator(); 1202 1203 // build a complex value: {"active": true, "name": "alice", "seq": 42} 1204 const original: Value = .{ .map = &.{ 1205 .{ .key = "name", .value = .{ .text = "alice" } }, 1206 .{ .key = "active", .value = .{ .boolean = true } }, 1207 .{ .key = "seq", .value = .{ .unsigned = 42 } }, 1208 } }; 1209 1210 const encoded = try encodeAlloc(alloc, original); 1211 const decoded = try decodeAll(alloc, encoded); 1212 1213 try std.testing.expectEqualStrings("alice", decoded.getString("name").?); 1214 try std.testing.expectEqual(true, decoded.getBool("active").?); 1215 try std.testing.expectEqual(@as(i64, 42), decoded.getInt("seq").?); 1216} 1217 1218test "round-trip nested structures" { 1219 var arena = std.heap.ArenaAllocator.init(std.testing.allocator); 1220 defer arena.deinit(); 1221 const alloc = arena.allocator(); 1222 1223 // {"ops": [{"action": "create"}], "seq": 1} 1224 const original: Value = .{ .map = &.{ 1225 .{ .key = "ops", .value = .{ .array = &.{ 1226 .{ .map = &.{ 1227 .{ .key = "action", .value = .{ .text = "create" } }, 1228 } }, 1229 } } }, 1230 .{ .key = "seq", .value = .{ .unsigned = 1 } }, 1231 } }; 1232 1233 const encoded = try encodeAlloc(alloc, original); 1234 const decoded = try decodeAll(alloc, encoded); 1235 1236 const ops = decoded.getArray("ops").?; 1237 try std.testing.expectEqual(@as(usize, 1), ops.len); 1238 try std.testing.expectEqualStrings("create", ops[0].getString("action").?); 1239 try std.testing.expectEqual(@as(i64, 1), decoded.getInt("seq").?); 1240} 1241 1242test "encode CID via tag 42" { 1243 var arena = std.heap.ArenaAllocator.init(std.testing.allocator); 1244 defer arena.deinit(); 1245 const alloc = arena.allocator(); 1246 1247 // create a CIDv1 (dag-cbor, sha2-256, 32-byte digest of 0xaa) 1248 const raw_cid = [_]u8{ 1249 0x01, // version 1250 0x71, // dag-cbor 1251 0x12, // sha2-256 1252 0x20, // 32-byte digest 1253 } ++ [_]u8{0xaa} ** 32; 1254 1255 const original: Value = .{ .cid = .{ 1256 .raw = &raw_cid, 1257 } }; 1258 1259 const encoded = try encodeAlloc(alloc, original); 1260 const decoded = try decodeAll(alloc, encoded); 1261 1262 // should decode back as a CID with the same raw bytes 1263 const cid = decoded.cid; 1264 try std.testing.expectEqual(@as(u64, 1), cid.version().?); 1265 try std.testing.expectEqual(@as(u64, 0x71), cid.codec().?); 1266 try std.testing.expectEqual(@as(u64, 0x12), cid.hashFn().?); 1267 try std.testing.expectEqualSlices(u8, &raw_cid, cid.raw); 1268} 1269 1270test "writeUvarint round-trip" { 1271 var buf: [16]u8 = undefined; 1272 var w: std.Io.Writer = .fixed(&buf); 1273 1274 const test_values = [_]u64{ 0, 1, 127, 128, 255, 256, 16384, 0xffffffff }; 1275 for (test_values) |val| { 1276 w.end = 0; 1277 try writeUvarint(&w, val); 1278 const written = w.buffered(); 1279 1280 var pos: usize = 0; 1281 const decoded = readUvarint(written, &pos).?; 1282 try std.testing.expectEqual(val, decoded); 1283 try std.testing.expectEqual(written.len, pos); 1284 } 1285} 1286 1287test "DAG-CBOR key sort is stable" { 1288 // same-length keys must be lexicographically sorted 1289 var buf: [128]u8 = undefined; 1290 var w: std.Io.Writer = .fixed(&buf); 1291 const alloc = std.testing.allocator; 1292 1293 try encode(alloc, &w, .{ .map = &.{ 1294 .{ .key = "op", .value = .{ .unsigned = 1 } }, 1295 .{ .key = "ab", .value = .{ .unsigned = 2 } }, 1296 } }); 1297 1298 var arena = std.heap.ArenaAllocator.init(alloc); 1299 defer arena.deinit(); 1300 const decoded = try decodeAll(arena.allocator(), w.buffered()); 1301 1302 // "ab" should come before "op" (lex order, same length) 1303 const entries = decoded.map; 1304 try std.testing.expectEqualStrings("ab", entries[0].key); 1305 try std.testing.expectEqualStrings("op", entries[1].key); 1306} 1307 1308// === CID creation tests === 1309 1310test "Cid.forDagCbor creates valid CIDv1" { 1311 var arena = std.heap.ArenaAllocator.init(std.testing.allocator); 1312 defer arena.deinit(); 1313 const alloc = arena.allocator(); 1314 1315 // encode some CBOR, then create a CID for it 1316 const value: Value = .{ .map = &.{ 1317 .{ .key = "text", .value = .{ .text = "hello" } }, 1318 } }; 1319 const encoded = try encodeAlloc(alloc, value); 1320 const cid = try Cid.forDagCbor(alloc, encoded); 1321 1322 try std.testing.expectEqual(@as(u64, 1), cid.version().?); 1323 try std.testing.expectEqual(Codec.dag_cbor, cid.codec().?); 1324 try std.testing.expectEqual(HashFn.sha2_256, cid.hashFn().?); 1325 try std.testing.expectEqual(@as(usize, 32), cid.digest().?.len); 1326 // raw should be: version(1) + codec(0x71) + hash_fn(0x12) + digest_len(0x20) + 32 bytes 1327 try std.testing.expectEqual(@as(usize, 36), cid.raw.len); 1328} 1329 1330test "Cid.forDagCbor is deterministic" { 1331 var arena = std.heap.ArenaAllocator.init(std.testing.allocator); 1332 defer arena.deinit(); 1333 const alloc = arena.allocator(); 1334 1335 const data = "identical input"; 1336 const cid1 = try Cid.forDagCbor(alloc, data); 1337 const cid2 = try Cid.forDagCbor(alloc, data); 1338 1339 try std.testing.expectEqualSlices(u8, cid1.raw, cid2.raw); 1340 try std.testing.expectEqualSlices(u8, cid1.digest().?, cid2.digest().?); 1341} 1342 1343test "Cid.forDagCbor different data → different CIDs" { 1344 var arena = std.heap.ArenaAllocator.init(std.testing.allocator); 1345 defer arena.deinit(); 1346 const alloc = arena.allocator(); 1347 1348 const cid1 = try Cid.forDagCbor(alloc, "data A"); 1349 const cid2 = try Cid.forDagCbor(alloc, "data B"); 1350 1351 try std.testing.expect(!std.mem.eql(u8, cid1.digest().?, cid2.digest().?)); 1352} 1353 1354test "Cid.toBytes round-trips through parseCid" { 1355 var arena = std.heap.ArenaAllocator.init(std.testing.allocator); 1356 defer arena.deinit(); 1357 const alloc = arena.allocator(); 1358 1359 const cid = try Cid.forDagCbor(alloc, "test content"); 1360 const bytes = try cid.toBytes(alloc); 1361 const parsed = parseCid(bytes); 1362 1363 try std.testing.expectEqual(cid.version().?, parsed.version().?); 1364 try std.testing.expectEqual(cid.codec().?, parsed.codec().?); 1365 try std.testing.expectEqual(cid.hashFn().?, parsed.hashFn().?); 1366 try std.testing.expectEqualSlices(u8, cid.digest().?, parsed.digest().?); 1367} 1368 1369test "CID round-trip through CBOR encode/decode" { 1370 var arena = std.heap.ArenaAllocator.init(std.testing.allocator); 1371 defer arena.deinit(); 1372 const alloc = arena.allocator(); 1373 1374 // create a CID for some content 1375 const cid = try Cid.forDagCbor(alloc, "block data"); 1376 1377 // embed in a map and round-trip through CBOR 1378 const original: Value = .{ .map = &.{ 1379 .{ .key = "link", .value = .{ .cid = cid } }, 1380 } }; 1381 const encoded = try encodeAlloc(alloc, original); 1382 const decoded = try decodeAll(alloc, encoded); 1383 1384 const got = decoded.get("link").?.cid; 1385 try std.testing.expectEqual(cid.version().?, got.version().?); 1386 try std.testing.expectEqual(cid.codec().?, got.codec().?); 1387 try std.testing.expectEqualSlices(u8, cid.digest().?, got.digest().?); 1388} 1389 1390// === verify CIDs against real AT Protocol records === 1391 1392test "real record: pfrazee 'First!' post CID matches network" { 1393 // at://did:plc:ragtjsm2j2vknwkz3zp4oxrd/app.bsky.feed.post/3jhnzcfawac27 1394 // CID: bafyreiaqnrahsbvcssf2xe4iqhn2fnjw7utmvrbif2v36tqe3r5iqill7i 1395 var arena = std.heap.ArenaAllocator.init(std.testing.allocator); 1396 defer arena.deinit(); 1397 const alloc = arena.allocator(); 1398 1399 const record: Value = .{ .map = &.{ 1400 .{ .key = "$type", .value = .{ .text = "app.bsky.feed.post" } }, 1401 .{ .key = "createdAt", .value = .{ .text = "2022-11-17T00:39:00.477Z" } }, 1402 .{ .key = "text", .value = .{ .text = "First!" } }, 1403 } }; 1404 1405 const encoded = try encodeAlloc(alloc, record); 1406 const cid = try Cid.forDagCbor(alloc, encoded); 1407 1408 // verify against known production digest 1409 const expected_digest = [_]u8{ 1410 0x10, 0x6c, 0x40, 0x79, 0x06, 0xa2, 0x94, 0x8b, 1411 0xab, 0x93, 0x88, 0x81, 0xdb, 0xa2, 0xb5, 0x36, 1412 0xfd, 0x26, 0xca, 0xc4, 0x28, 0x2e, 0xab, 0xbf, 1413 0x4e, 0x04, 0xdc, 0x7a, 0x88, 0x21, 0x6b, 0xfa, 1414 }; 1415 1416 try std.testing.expectEqualSlices(u8, &expected_digest, cid.digest().?); 1417 try std.testing.expectEqual(@as(u64, 1), cid.version().?); 1418 try std.testing.expectEqual(Codec.dag_cbor, cid.codec().?); 1419 try std.testing.expectEqual(HashFn.sha2_256, cid.hashFn().?); 1420} 1421 1422test "real record: firehose post with emoji/langs/reply is byte-identical after re-encode" { 1423 // captured from live firehose: app.bsky.feed.post with emoji, langs, and reply 1424 var arena = std.heap.ArenaAllocator.init(std.testing.allocator); 1425 defer arena.deinit(); 1426 const alloc = arena.allocator(); 1427 1428 const original_cbor = &[_]u8{ 1429 0xa5, 0x64, 0x74, 0x65, 0x78, 0x74, 0x6b, 0xf0, 0x9f, 0xa5, 0xb5, 0x20, 0x6d, 0x65, 0x20, 0x74, 1430 0x6f, 0x6f, 0x65, 0x24, 0x74, 0x79, 0x70, 0x65, 0x72, 0x61, 0x70, 0x70, 0x2e, 0x62, 0x73, 0x6b, 1431 0x79, 0x2e, 0x66, 0x65, 0x65, 0x64, 0x2e, 0x70, 0x6f, 0x73, 0x74, 0x65, 0x6c, 0x61, 0x6e, 0x67, 1432 0x73, 0x81, 0x62, 0x65, 0x6e, 0x65, 0x72, 0x65, 0x70, 0x6c, 0x79, 0xa2, 0x64, 0x72, 0x6f, 0x6f, 1433 0x74, 0xa2, 0x63, 0x63, 0x69, 0x64, 0x78, 0x3b, 0x62, 0x61, 0x66, 0x79, 0x72, 0x65, 0x69, 0x62, 1434 0x33, 0x70, 0x77, 0x72, 0x66, 0x66, 0x32, 0x79, 0x61, 0x64, 0x7a, 0x6e, 0x6f, 0x70, 0x68, 0x7a, 1435 0x66, 0x34, 0x68, 0x63, 0x76, 0x74, 0x79, 0x6f, 0x63, 0x74, 0x77, 0x7a, 0x63, 0x75, 0x6a, 0x76, 1436 0x7a, 0x37, 0x78, 0x34, 0x70, 0x6e, 0x67, 0x6b, 0x32, 0x69, 0x73, 0x69, 0x63, 0x7a, 0x37, 0x79, 1437 0x73, 0x7a, 0x71, 0x63, 0x75, 0x72, 0x69, 0x78, 0x46, 0x61, 0x74, 0x3a, 0x2f, 0x2f, 0x64, 0x69, 1438 0x64, 0x3a, 0x70, 0x6c, 0x63, 0x3a, 0x34, 0x6e, 0x65, 0x6e, 0x64, 0x77, 0x71, 0x72, 0x73, 0x37, 1439 0x35, 0x34, 0x67, 0x74, 0x36, 0x71, 0x76, 0x67, 0x72, 0x35, 0x36, 0x6a, 0x6d, 0x6e, 0x2f, 0x61, 1440 0x70, 0x70, 0x2e, 0x62, 0x73, 0x6b, 0x79, 0x2e, 0x66, 0x65, 0x65, 0x64, 0x2e, 0x70, 0x6f, 0x73, 1441 0x74, 0x2f, 0x33, 0x6d, 0x65, 0x64, 0x67, 0x32, 0x71, 0x76, 0x63, 0x75, 0x63, 0x32, 0x63, 0x66, 1442 0x70, 0x61, 0x72, 0x65, 0x6e, 0x74, 0xa2, 0x63, 0x63, 0x69, 0x64, 0x78, 0x3b, 0x62, 0x61, 0x66, 1443 0x79, 0x72, 0x65, 0x69, 0x62, 0x33, 0x70, 0x77, 0x72, 0x66, 0x66, 0x32, 0x79, 0x61, 0x64, 0x7a, 1444 0x6e, 0x6f, 0x70, 0x68, 0x7a, 0x66, 0x34, 0x68, 0x63, 0x76, 0x74, 0x79, 0x6f, 0x63, 0x74, 0x77, 1445 0x7a, 0x63, 0x75, 0x6a, 0x76, 0x7a, 0x37, 0x78, 0x34, 0x70, 0x6e, 0x67, 0x6b, 0x32, 0x69, 0x73, 1446 0x69, 0x63, 0x7a, 0x37, 0x79, 0x73, 0x7a, 0x71, 0x63, 0x75, 0x72, 0x69, 0x78, 0x46, 0x61, 0x74, 1447 0x3a, 0x2f, 0x2f, 0x64, 0x69, 0x64, 0x3a, 0x70, 0x6c, 0x63, 0x3a, 0x34, 0x6e, 0x65, 0x6e, 0x64, 1448 0x77, 0x71, 0x72, 0x73, 0x37, 0x35, 0x34, 0x67, 0x74, 0x36, 0x71, 0x76, 0x67, 0x72, 0x35, 0x36, 1449 0x6a, 0x6d, 0x6e, 0x2f, 0x61, 0x70, 0x70, 0x2e, 0x62, 0x73, 0x6b, 0x79, 0x2e, 0x66, 0x65, 0x65, 1450 0x64, 0x2e, 0x70, 0x6f, 0x73, 0x74, 0x2f, 0x33, 0x6d, 0x65, 0x64, 0x67, 0x32, 0x71, 0x76, 0x63, 1451 0x75, 0x63, 0x32, 0x63, 0x69, 0x63, 0x72, 0x65, 0x61, 0x74, 0x65, 0x64, 0x41, 0x74, 0x78, 0x18, 1452 0x32, 0x30, 0x32, 0x36, 0x2d, 0x30, 0x32, 0x2d, 0x30, 0x38, 0x54, 0x30, 0x37, 0x3a, 0x34, 0x39, 1453 0x3a, 0x32, 0x30, 0x2e, 0x37, 0x37, 0x32, 0x5a, 1454 }; 1455 1456 // expected CID digest from the firehose frame 1457 const expected_digest = [_]u8{ 1458 0x80, 0x01, 0x66, 0x46, 0x81, 0x57, 0x18, 0xaf, 0xc9, 0x34, 0xcf, 0xbf, 1459 0x3b, 0x3e, 0x57, 0x04, 0x24, 0x17, 0x90, 0x29, 0x2f, 0x7b, 0xc4, 0xe0, 1460 0xf4, 0xcf, 0xe6, 0xe6, 0xb5, 0xad, 0x11, 0x28, 1461 }; 1462 1463 // decode → re-encode → verify byte-identical 1464 const decoded = try decodeAll(alloc, original_cbor); 1465 const re_encoded = try encodeAlloc(alloc, decoded); 1466 try std.testing.expectEqualSlices(u8, original_cbor, re_encoded); 1467 1468 // verify CID matches the production CID 1469 const cid = try Cid.forDagCbor(alloc, re_encoded); 1470 try std.testing.expectEqualSlices(u8, &expected_digest, cid.digest().?); 1471}