A loose federation of distributed, typed datasets
1"""Tests for CID generation utilities."""
2
3import pytest
4import libipld
5
6from atdata._cid import (
7 generate_cid,
8 generate_cid_from_bytes,
9 verify_cid,
10 parse_cid,
11)
12
13
14class TestGenerateCid:
15 """Tests for generate_cid function."""
16
17 def test_generates_valid_cid_from_dict(self):
18 """CID is generated from a dictionary."""
19 data = {"name": "TestSample", "version": "1.0.0"}
20 cid = generate_cid(data)
21
22 # CIDv1 base32 starts with 'bafy'
23 assert cid.startswith("bafy")
24 assert len(cid) > 40 # CIDs are typically 59 chars
25
26 def test_deterministic_output(self):
27 """Same data always produces same CID."""
28 data = {"name": "TestSample", "version": "1.0.0", "fields": []}
29
30 cid1 = generate_cid(data)
31 cid2 = generate_cid(data)
32
33 assert cid1 == cid2
34
35 def test_different_data_different_cid(self):
36 """Different data produces different CIDs."""
37 data1 = {"name": "Sample1", "version": "1.0.0"}
38 data2 = {"name": "Sample2", "version": "1.0.0"}
39
40 cid1 = generate_cid(data1)
41 cid2 = generate_cid(data2)
42
43 assert cid1 != cid2
44
45 def test_key_order_matters_in_dag_cbor(self):
46 """DAG-CBOR has deterministic key ordering, so key order in input doesn't matter."""
47 # DAG-CBOR sorts keys, so these should produce the same CID
48 data1 = {"a": 1, "b": 2}
49 data2 = {"b": 2, "a": 1}
50
51 cid1 = generate_cid(data1)
52 cid2 = generate_cid(data2)
53
54 # DAG-CBOR canonicalizes key order
55 assert cid1 == cid2
56
57 def test_handles_nested_structures(self):
58 """CID can be generated from nested data structures."""
59 data = {
60 "name": "NestedSample",
61 "fields": [
62 {"name": "field1", "type": "str"},
63 {"name": "field2", "type": "int"},
64 ],
65 "metadata": {"author": "test", "tags": ["a", "b", "c"]},
66 }
67
68 cid = generate_cid(data)
69 assert cid.startswith("bafy")
70
71 def test_handles_various_types(self):
72 """CID handles various Python types."""
73 data = {
74 "string": "hello",
75 "integer": 42,
76 "float": 3.14,
77 "boolean": True,
78 "null": None,
79 "bytes": b"binary data",
80 "list": [1, 2, 3],
81 }
82
83 cid = generate_cid(data)
84 assert cid.startswith("bafy")
85
86 def test_invalid_data_raises_error(self):
87 """Non-CBOR-serializable data raises ValueError."""
88 # Functions can't be serialized to CBOR
89 data = {"func": lambda x: x}
90
91 with pytest.raises(ValueError, match="Failed to encode"):
92 generate_cid(data)
93
94
95class TestGenerateCidFromBytes:
96 """Tests for generate_cid_from_bytes function."""
97
98 def test_generates_cid_from_bytes(self):
99 """CID is generated from raw bytes."""
100 data_bytes = b"some raw bytes"
101 cid = generate_cid_from_bytes(data_bytes)
102
103 assert cid.startswith("bafy")
104
105 def test_matches_manual_encoding(self):
106 """CID from bytes matches CID from pre-encoded data."""
107 data = {"key": "value"}
108 cbor_bytes = libipld.encode_dag_cbor(data)
109
110 cid_from_data = generate_cid(data)
111 cid_from_bytes = generate_cid_from_bytes(cbor_bytes)
112
113 assert cid_from_data == cid_from_bytes
114
115
116class TestVerifyCid:
117 """Tests for verify_cid function."""
118
119 def test_verify_matching_data(self):
120 """verify_cid returns True for matching data."""
121 data = {"name": "test", "value": 123}
122 cid = generate_cid(data)
123
124 assert verify_cid(cid, data) is True
125
126 def test_verify_non_matching_data(self):
127 """verify_cid returns False for non-matching data."""
128 data = {"name": "test", "value": 123}
129 cid = generate_cid(data)
130
131 different_data = {"name": "test", "value": 456}
132 assert verify_cid(cid, different_data) is False
133
134 def test_verify_with_complex_data(self):
135 """verify_cid works with complex nested structures."""
136 data = {
137 "schema": {
138 "name": "ImageSample",
139 "version": "1.0.0",
140 "fields": [
141 {"name": "image", "type": "ndarray"},
142 {"name": "label", "type": "str"},
143 ],
144 }
145 }
146 cid = generate_cid(data)
147
148 assert verify_cid(cid, data) is True
149
150
151class TestParseCid:
152 """Tests for parse_cid function."""
153
154 def test_parse_cid_components(self):
155 """parse_cid extracts CID components."""
156 data = {"test": "data"}
157 cid = generate_cid(data)
158
159 parsed = parse_cid(cid)
160
161 assert parsed["version"] == 1
162 assert parsed["codec"] == 0x71 # dag-cbor
163 assert parsed["hash"]["code"] == 0x12 # sha256
164 assert parsed["hash"]["size"] == 32
165
166 def test_parse_cid_digest_matches(self):
167 """Parsed digest matches the SHA-256 of the data."""
168 import hashlib
169
170 data = {"test": "data"}
171 cid = generate_cid(data)
172
173 cbor_bytes = libipld.encode_dag_cbor(data)
174 expected_digest = hashlib.sha256(cbor_bytes).digest()
175
176 parsed = parse_cid(cid)
177 assert parsed["hash"]["digest"] == expected_digest
178
179 @pytest.mark.parametrize(
180 "malformed_cid",
181 [
182 "", # empty
183 "invalid", # not a CID
184 "bafy123", # truncated CID
185 "Qm123", # v0 prefix but invalid
186 ],
187 )
188 def test_parse_cid_malformed_raises_valueerror(self, malformed_cid):
189 """Malformed CID strings raise ValueError."""
190 with pytest.raises(ValueError, match="Failed to decode CID"):
191 parse_cid(malformed_cid)
192
193
194class TestAtprotoCompatibility:
195 """Tests verifying ATProto SDK compatibility."""
196
197 def test_cid_decodable_by_atproto(self):
198 """Generated CIDs can be decoded by atproto SDK."""
199 from atproto_core.cid.cid import CID
200
201 data = {"name": "TestSchema", "version": "1.0.0"}
202 cid_str = generate_cid(data)
203
204 # Should not raise
205 cid_obj = CID.decode(cid_str)
206
207 assert cid_obj.version == 1
208 assert cid_obj.codec == 0x71
209
210 def test_hash_matches_atproto_decode(self):
211 """Hash in generated CID matches when decoded by atproto."""
212 import hashlib
213 from atproto_core.cid.cid import CID
214
215 data = {"name": "TestSchema", "version": "1.0.0"}
216 cid_str = generate_cid(data)
217
218 cbor_bytes = libipld.encode_dag_cbor(data)
219 expected_hash = hashlib.sha256(cbor_bytes).digest()
220
221 cid_obj = CID.decode(cid_str)
222 assert cid_obj.hash.digest == expected_hash