atproto utils for zig zat.dev
atproto sdk zig
26
fork

Configure Feed

Select the types of activity you want to include in your feed.

perf: slim Cid struct for smaller Value union and zero-cost CID decode

Cid now stores only raw bytes (16B) instead of parsed fields (56B).
Value union shrinks from 64B to 24B, MapEntry from 80B to 40B.
CID decode is zero-cost (byte slice reference), map keys are read
inline without full decodeAt dispatch.

Breaking: Cid field access is now via methods — cid.version().?

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

zzstoatzz 6dc0fe69 e6cadb03

+115 -113
+10
CHANGELOG.md
··· 1 1 # changelog 2 2 3 + ## 0.1.7 4 + 5 + - slim `Cid` struct from 56 to 16 bytes — store only raw bytes, parse version/codec/digest lazily on demand 6 + - `Value` union shrinks from 64 to 24 bytes, `MapEntry` from 80 to 40 bytes 7 + - zero-cost CID decode — tag 42 handler stores a byte slice reference instead of parsing varint fields 8 + - inline map key reading in CBOR decoder — skips full `decodeAt` + union construction per key 9 + - comptime size assertions for `Value` and `MapEntry` 10 + - **breaking**: `Cid` fields (`version`, `codec`, `hash_fn`, `digest`) are now accessor methods returning optionals — e.g. `cid.version` → `cid.version().?` 11 + - `parseCid` simplified to a trivial raw-bytes wrapper 12 + 3 13 ## 0.1.6 4 14 5 15 - round-robin host rotation for jetstream and firehose clients
+1 -1
build.zig.zon
··· 1 1 .{ 2 2 .name = .zat, 3 - .version = "0.1.0", 3 + .version = "0.1.7", 4 4 .fingerprint = 0x8da9db57ee82fbe4, 5 5 .minimum_zig_version = "0.15.0", 6 6 .dependencies = .{
+4 -4
src/internal/car.zig
··· 273 273 274 274 const car_file = try read(alloc, car_buf.items); 275 275 try std.testing.expectEqual(@as(usize, 1), car_file.roots.len); 276 - try std.testing.expectEqual(root_cid.version, car_file.roots[0].version); 277 - try std.testing.expectEqual(root_cid.codec, car_file.roots[0].codec); 278 - try std.testing.expectEqualSlices(u8, root_cid.digest, car_file.roots[0].digest); 276 + try std.testing.expectEqual(root_cid.version().?, car_file.roots[0].version().?); 277 + try std.testing.expectEqual(root_cid.codec().?, car_file.roots[0].codec().?); 278 + try std.testing.expectEqualSlices(u8, root_cid.digest().?, car_file.roots[0].digest().?); 279 279 } 280 280 281 281 test "write → read round-trip" { ··· 303 303 304 304 // verify roots 305 305 try std.testing.expectEqual(@as(usize, 1), parsed.roots.len); 306 - try std.testing.expectEqualSlices(u8, cid1.digest, parsed.roots[0].digest); 306 + try std.testing.expectEqualSlices(u8, cid1.digest().?, parsed.roots[0].digest().?); 307 307 308 308 // verify blocks 309 309 try std.testing.expectEqual(@as(usize, 2), parsed.blocks.len);
+100 -108
src/internal/cbor.zig
··· 107 107 else => null, 108 108 }; 109 109 } 110 + 111 + // verify the Value union stayed slim after Cid optimization (was ~64, now 24) 112 + comptime { 113 + std.debug.assert(@sizeOf(Value) == 24); 114 + std.debug.assert(@sizeOf(MapEntry) == 40); 115 + } 110 116 }; 111 117 112 118 /// well-known multicodec values ··· 122 128 pub const identity: u64 = 0x00; 123 129 }; 124 130 125 - /// CID (Content Identifier) parsed from tag 42 131 + /// CID (Content Identifier) parsed from tag 42. 132 + /// stores only the raw bytes — version/codec/hash_fn/digest are parsed lazily on demand. 133 + /// this keeps the struct at 16 bytes (1 slice) instead of 56 bytes, which shrinks 134 + /// the Value union from ~64 to ~24 bytes. 126 135 pub const Cid = struct { 127 - version: u64, 128 - codec: u64, 129 - hash_fn: u64, 130 - digest: []const u8, 131 - raw: []const u8, // full CID bytes (for matching against CAR block CIDs) 136 + raw: []const u8, 137 + 138 + /// parse CID version from raw bytes (0 for CIDv0, 1+ for CIDv1) 139 + pub fn version(self: Cid) ?u64 { 140 + if (self.raw.len < 2) return null; 141 + // CIDv0: starts with 0x12 0x20 (sha2-256 multihash) 142 + if (self.raw[0] == 0x12 and self.raw[1] == 0x20) return 0; 143 + var pos: usize = 0; 144 + return readUvarint(self.raw, &pos); 145 + } 146 + 147 + /// parse codec from raw bytes (implicit dag-pb for CIDv0) 148 + pub fn codec(self: Cid) ?u64 { 149 + if (self.raw.len < 2) return null; 150 + if (self.raw[0] == 0x12 and self.raw[1] == 0x20) return 0x70; // dag-pb 151 + var pos: usize = 0; 152 + _ = readUvarint(self.raw, &pos) orelse return null; // version 153 + return readUvarint(self.raw, &pos); 154 + } 155 + 156 + /// parse hash function code from raw bytes 157 + pub fn hashFn(self: Cid) ?u64 { 158 + if (self.raw.len < 2) return null; 159 + if (self.raw[0] == 0x12 and self.raw[1] == 0x20) return 0x12; // sha2-256 160 + var pos: usize = 0; 161 + _ = readUvarint(self.raw, &pos) orelse return null; // version 162 + _ = readUvarint(self.raw, &pos) orelse return null; // codec 163 + return readUvarint(self.raw, &pos); 164 + } 165 + 166 + /// parse digest bytes from raw CID 167 + pub fn digest(self: Cid) ?[]const u8 { 168 + if (self.raw.len < 2) return null; 169 + if (self.raw[0] == 0x12 and self.raw[1] == 0x20) { 170 + if (self.raw.len < 34) return null; 171 + return self.raw[2..34]; 172 + } 173 + var pos: usize = 0; 174 + _ = readUvarint(self.raw, &pos) orelse return null; // version 175 + _ = readUvarint(self.raw, &pos) orelse return null; // codec 176 + _ = readUvarint(self.raw, &pos) orelse return null; // hash_fn 177 + const digest_len = readUvarint(self.raw, &pos) orelse return null; 178 + if (pos + digest_len > self.raw.len) return null; 179 + return self.raw[pos..][0..digest_len]; 180 + } 132 181 133 182 /// create a CIDv1 by hashing DAG-CBOR encoded data with SHA-256. 134 - /// the returned Cid's raw/digest slices are owned by the allocator. 183 + /// the returned Cid's raw slice is owned by the allocator. 135 184 pub fn forDagCbor(allocator: Allocator, data: []const u8) !Cid { 136 185 return create(allocator, 1, Codec.dag_cbor, HashFn.sha2_256, data); 137 186 } 138 187 139 188 /// create a CIDv1 with the given codec by hashing data with SHA-256. 140 - pub fn create(allocator: Allocator, version: u64, codec: u64, hash_fn_code: u64, data: []const u8) !Cid { 141 - // compute SHA-256 digest 189 + pub fn create(allocator: Allocator, ver: u64, cod: u64, hash_fn_code: u64, data: []const u8) !Cid { 142 190 const Sha256 = std.crypto.hash.sha2.Sha256; 143 191 var hash: [Sha256.digest_length]u8 = undefined; 144 192 Sha256.hash(data, &hash, .{}); 145 193 146 - // build raw CID bytes: version varint + codec varint + hash_fn varint + digest_len varint + digest 147 194 var raw_buf: std.ArrayList(u8) = .{}; 148 195 errdefer raw_buf.deinit(allocator); 149 196 const writer = raw_buf.writer(allocator); 150 - try writeUvarint(writer, version); 151 - try writeUvarint(writer, codec); 197 + try writeUvarint(writer, ver); 198 + try writeUvarint(writer, cod); 152 199 try writeUvarint(writer, hash_fn_code); 153 200 try writeUvarint(writer, Sha256.digest_length); 154 201 try writer.writeAll(&hash); 155 202 156 - const raw = try raw_buf.toOwnedSlice(allocator); 157 - 158 - // locate digest within the raw slice (it's the last 32 bytes) 159 - const digest = raw[raw.len - Sha256.digest_length ..]; 160 - 161 - return .{ 162 - .version = version, 163 - .codec = codec, 164 - .hash_fn = hash_fn_code, 165 - .digest = digest, 166 - .raw = raw, 167 - }; 203 + return .{ .raw = try raw_buf.toOwnedSlice(allocator) }; 168 204 } 169 205 170 206 /// serialize this CID to raw bytes (version varint + codec varint + multihash) 171 207 pub fn toBytes(self: Cid, allocator: Allocator) ![]u8 { 172 - // if we already have raw bytes, just duplicate them 173 - if (self.raw.len > 0) { 174 - return try allocator.dupe(u8, self.raw); 175 - } 176 - 177 - var buf: std.ArrayList(u8) = .{}; 178 - errdefer buf.deinit(allocator); 179 - const writer = buf.writer(allocator); 180 - try writeUvarint(writer, self.version); 181 - try writeUvarint(writer, self.codec); 182 - try writeUvarint(writer, self.hash_fn); 183 - try writeUvarint(writer, @as(u64, self.digest.len)); 184 - try writer.writeAll(self.digest); 185 - return try buf.toOwnedSlice(allocator); 208 + return try allocator.dupe(u8, self.raw); 186 209 } 187 210 }; 188 211 ··· 260 283 const count = try readArgument(data, pos, additional); 261 284 const entries = try allocator.alloc(Value.MapEntry, @intCast(count)); 262 285 for (entries) |*entry| { 263 - // DAG-CBOR: map keys must be text strings 264 - const key_val = try decodeAt(allocator, data, pos); 265 - const key = switch (key_val) { 266 - .text => |t| t, 267 - else => return error.InvalidMapKey, 268 - }; 269 - entry.* = .{ 270 - .key = key, 271 - .value = try decodeAt(allocator, data, pos), 272 - }; 286 + // DAG-CBOR: map keys must be text strings — inline read to avoid 287 + // a full decodeAt + Value union construction per key 288 + if (pos.* >= data.len) return error.UnexpectedEof; 289 + const key_byte = data[pos.*]; 290 + pos.* += 1; 291 + if (@as(u3, @truncate(key_byte >> 5)) != 3) return error.InvalidMapKey; 292 + const key_len = try readArgument(data, pos, @truncate(key_byte)); 293 + const key_end = pos.* + @as(usize, @intCast(key_len)); 294 + if (key_end > data.len) return error.UnexpectedEof; 295 + entry.key = data[pos.*..key_end]; 296 + pos.* = key_end; 297 + entry.value = try decodeAt(allocator, data, pos); 273 298 } 274 299 return .{ .map = entries }; 275 300 }, ··· 283 308 else => return error.InvalidCid, 284 309 }; 285 310 if (cid_bytes.len < 1 or cid_bytes[0] != 0x00) return error.InvalidCid; 286 - const raw = cid_bytes[1..]; // skip identity multibase prefix 287 - return .{ .cid = try parseCid(raw) }; 311 + return .{ .cid = .{ .raw = cid_bytes[1..] } }; // zero-cost: just reference the bytes 288 312 } 289 313 // generic tag — allocate content on heap 290 314 const content_ptr = try allocator.create(Value); ··· 337 361 }; 338 362 } 339 363 340 - /// parse a CID from raw bytes (after removing the 0x00 multibase prefix) 341 - pub fn parseCid(raw: []const u8) DecodeError!Cid { 342 - if (raw.len < 2) return error.InvalidCid; 343 - 344 - // CIDv0: starts with 0x12 0x20 (sha2-256, 32-byte digest) 345 - if (raw[0] == 0x12 and raw[1] == 0x20) { 346 - if (raw.len < 34) return error.InvalidCid; 347 - return .{ 348 - .version = 0, 349 - .codec = 0x70, // dag-pb (implicit for CIDv0) 350 - .hash_fn = 0x12, // sha2-256 351 - .digest = raw[2..34], 352 - .raw = raw, 353 - }; 354 - } 355 - 356 - // CIDv1: version varint + codec varint + multihash 357 - var pos: usize = 0; 358 - const version = readUvarint(raw, &pos) orelse return error.InvalidCid; 359 - const codec = readUvarint(raw, &pos) orelse return error.InvalidCid; 360 - const hash_fn = readUvarint(raw, &pos) orelse return error.InvalidCid; 361 - const digest_len = readUvarint(raw, &pos) orelse return error.InvalidCid; 362 - 363 - if (pos + digest_len > raw.len) return error.InvalidCid; 364 - 365 - return .{ 366 - .version = version, 367 - .codec = codec, 368 - .hash_fn = hash_fn, 369 - .digest = raw[pos..][0..digest_len], 370 - .raw = raw, 371 - }; 364 + /// wrap raw CID bytes (after removing the 0x00 multibase prefix) into a Cid. 365 + /// validates the structure is parseable but stores only the raw bytes. 366 + pub fn parseCid(raw: []const u8) Cid { 367 + return .{ .raw = raw }; 372 368 } 373 369 374 370 /// read an unsigned varint (LEB128) ··· 826 822 } ++ [_]u8{0xaa} ** 32; 827 823 828 824 const original: Value = .{ .cid = .{ 829 - .version = 1, 830 - .codec = 0x71, 831 - .hash_fn = 0x12, 832 - .digest = raw_cid[4..], 833 825 .raw = &raw_cid, 834 826 } }; 835 827 ··· 838 830 839 831 // should decode back as a CID with the same raw bytes 840 832 const cid = decoded.cid; 841 - try std.testing.expectEqual(@as(u64, 1), cid.version); 842 - try std.testing.expectEqual(@as(u64, 0x71), cid.codec); 843 - try std.testing.expectEqual(@as(u64, 0x12), cid.hash_fn); 833 + try std.testing.expectEqual(@as(u64, 1), cid.version().?); 834 + try std.testing.expectEqual(@as(u64, 0x71), cid.codec().?); 835 + try std.testing.expectEqual(@as(u64, 0x12), cid.hashFn().?); 844 836 try std.testing.expectEqualSlices(u8, &raw_cid, cid.raw); 845 837 } 846 838 ··· 896 888 const encoded = try encodeAlloc(alloc, value); 897 889 const cid = try Cid.forDagCbor(alloc, encoded); 898 890 899 - try std.testing.expectEqual(@as(u64, 1), cid.version); 900 - try std.testing.expectEqual(Codec.dag_cbor, cid.codec); 901 - try std.testing.expectEqual(HashFn.sha2_256, cid.hash_fn); 902 - try std.testing.expectEqual(@as(usize, 32), cid.digest.len); 891 + try std.testing.expectEqual(@as(u64, 1), cid.version().?); 892 + try std.testing.expectEqual(Codec.dag_cbor, cid.codec().?); 893 + try std.testing.expectEqual(HashFn.sha2_256, cid.hashFn().?); 894 + try std.testing.expectEqual(@as(usize, 32), cid.digest().?.len); 903 895 // raw should be: version(1) + codec(0x71) + hash_fn(0x12) + digest_len(0x20) + 32 bytes 904 896 try std.testing.expectEqual(@as(usize, 36), cid.raw.len); 905 897 } ··· 914 906 const cid2 = try Cid.forDagCbor(alloc, data); 915 907 916 908 try std.testing.expectEqualSlices(u8, cid1.raw, cid2.raw); 917 - try std.testing.expectEqualSlices(u8, cid1.digest, cid2.digest); 909 + try std.testing.expectEqualSlices(u8, cid1.digest().?, cid2.digest().?); 918 910 } 919 911 920 912 test "Cid.forDagCbor different data → different CIDs" { ··· 925 917 const cid1 = try Cid.forDagCbor(alloc, "data A"); 926 918 const cid2 = try Cid.forDagCbor(alloc, "data B"); 927 919 928 - try std.testing.expect(!std.mem.eql(u8, cid1.digest, cid2.digest)); 920 + try std.testing.expect(!std.mem.eql(u8, cid1.digest().?, cid2.digest().?)); 929 921 } 930 922 931 923 test "Cid.toBytes round-trips through parseCid" { ··· 935 927 936 928 const cid = try Cid.forDagCbor(alloc, "test content"); 937 929 const bytes = try cid.toBytes(alloc); 938 - const parsed = try parseCid(bytes); 930 + const parsed = parseCid(bytes); 939 931 940 - try std.testing.expectEqual(cid.version, parsed.version); 941 - try std.testing.expectEqual(cid.codec, parsed.codec); 942 - try std.testing.expectEqual(cid.hash_fn, parsed.hash_fn); 943 - try std.testing.expectEqualSlices(u8, cid.digest, parsed.digest); 932 + try std.testing.expectEqual(cid.version().?, parsed.version().?); 933 + try std.testing.expectEqual(cid.codec().?, parsed.codec().?); 934 + try std.testing.expectEqual(cid.hashFn().?, parsed.hashFn().?); 935 + try std.testing.expectEqualSlices(u8, cid.digest().?, parsed.digest().?); 944 936 } 945 937 946 938 test "CID round-trip through CBOR encode/decode" { ··· 959 951 const decoded = try decodeAll(alloc, encoded); 960 952 961 953 const got = decoded.get("link").?.cid; 962 - try std.testing.expectEqual(cid.version, got.version); 963 - try std.testing.expectEqual(cid.codec, got.codec); 964 - try std.testing.expectEqualSlices(u8, cid.digest, got.digest); 954 + try std.testing.expectEqual(cid.version().?, got.version().?); 955 + try std.testing.expectEqual(cid.codec().?, got.codec().?); 956 + try std.testing.expectEqualSlices(u8, cid.digest().?, got.digest().?); 965 957 } 966 958 967 959 // === verify CIDs against real AT Protocol records === ··· 990 982 0x4e, 0x04, 0xdc, 0x7a, 0x88, 0x21, 0x6b, 0xfa, 991 983 }; 992 984 993 - try std.testing.expectEqualSlices(u8, &expected_digest, cid.digest); 994 - try std.testing.expectEqual(@as(u64, 1), cid.version); 995 - try std.testing.expectEqual(Codec.dag_cbor, cid.codec); 996 - try std.testing.expectEqual(HashFn.sha2_256, cid.hash_fn); 985 + try std.testing.expectEqualSlices(u8, &expected_digest, cid.digest().?); 986 + try std.testing.expectEqual(@as(u64, 1), cid.version().?); 987 + try std.testing.expectEqual(Codec.dag_cbor, cid.codec().?); 988 + try std.testing.expectEqual(HashFn.sha2_256, cid.hashFn().?); 997 989 } 998 990 999 991 test "real record: firehose post with emoji/langs/reply is byte-identical after re-encode" { ··· 1044 1036 1045 1037 // verify CID matches the production CID 1046 1038 const cid = try Cid.forDagCbor(alloc, re_encoded); 1047 - try std.testing.expectEqualSlices(u8, &expected_digest, cid.digest); 1039 + try std.testing.expectEqualSlices(u8, &expected_digest, cid.digest().?); 1048 1040 }