A loose federation of distributed, typed datasets
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at main 222 lines 6.7 kB view raw
1"""Tests for CID generation utilities.""" 2 3import pytest 4import libipld 5 6from atdata._cid import ( 7 generate_cid, 8 generate_cid_from_bytes, 9 verify_cid, 10 parse_cid, 11) 12 13 14class TestGenerateCid: 15 """Tests for generate_cid function.""" 16 17 def test_generates_valid_cid_from_dict(self): 18 """CID is generated from a dictionary.""" 19 data = {"name": "TestSample", "version": "1.0.0"} 20 cid = generate_cid(data) 21 22 # CIDv1 base32 starts with 'bafy' 23 assert cid.startswith("bafy") 24 assert len(cid) > 40 # CIDs are typically 59 chars 25 26 def test_deterministic_output(self): 27 """Same data always produces same CID.""" 28 data = {"name": "TestSample", "version": "1.0.0", "fields": []} 29 30 cid1 = generate_cid(data) 31 cid2 = generate_cid(data) 32 33 assert cid1 == cid2 34 35 def test_different_data_different_cid(self): 36 """Different data produces different CIDs.""" 37 data1 = {"name": "Sample1", "version": "1.0.0"} 38 data2 = {"name": "Sample2", "version": "1.0.0"} 39 40 cid1 = generate_cid(data1) 41 cid2 = generate_cid(data2) 42 43 assert cid1 != cid2 44 45 def test_key_order_matters_in_dag_cbor(self): 46 """DAG-CBOR has deterministic key ordering, so key order in input doesn't matter.""" 47 # DAG-CBOR sorts keys, so these should produce the same CID 48 data1 = {"a": 1, "b": 2} 49 data2 = {"b": 2, "a": 1} 50 51 cid1 = generate_cid(data1) 52 cid2 = generate_cid(data2) 53 54 # DAG-CBOR canonicalizes key order 55 assert cid1 == cid2 56 57 def test_handles_nested_structures(self): 58 """CID can be generated from nested data structures.""" 59 data = { 60 "name": "NestedSample", 61 "fields": [ 62 {"name": "field1", "type": "str"}, 63 {"name": "field2", "type": "int"}, 64 ], 65 "metadata": {"author": "test", "tags": ["a", "b", "c"]}, 66 } 67 68 cid = generate_cid(data) 69 assert cid.startswith("bafy") 70 71 def test_handles_various_types(self): 72 """CID handles various Python types.""" 73 data = { 74 "string": "hello", 75 "integer": 42, 76 "float": 3.14, 77 "boolean": True, 78 "null": None, 79 "bytes": b"binary data", 80 "list": [1, 2, 3], 81 } 82 83 cid = generate_cid(data) 84 assert cid.startswith("bafy") 85 86 def test_invalid_data_raises_error(self): 87 """Non-CBOR-serializable data raises ValueError.""" 88 # Functions can't be serialized to CBOR 89 data = {"func": lambda x: x} 90 91 with pytest.raises(ValueError, match="Failed to encode"): 92 generate_cid(data) 93 94 95class TestGenerateCidFromBytes: 96 """Tests for generate_cid_from_bytes function.""" 97 98 def test_generates_cid_from_bytes(self): 99 """CID is generated from raw bytes.""" 100 data_bytes = b"some raw bytes" 101 cid = generate_cid_from_bytes(data_bytes) 102 103 assert cid.startswith("bafy") 104 105 def test_matches_manual_encoding(self): 106 """CID from bytes matches CID from pre-encoded data.""" 107 data = {"key": "value"} 108 cbor_bytes = libipld.encode_dag_cbor(data) 109 110 cid_from_data = generate_cid(data) 111 cid_from_bytes = generate_cid_from_bytes(cbor_bytes) 112 113 assert cid_from_data == cid_from_bytes 114 115 116class TestVerifyCid: 117 """Tests for verify_cid function.""" 118 119 def test_verify_matching_data(self): 120 """verify_cid returns True for matching data.""" 121 data = {"name": "test", "value": 123} 122 cid = generate_cid(data) 123 124 assert verify_cid(cid, data) is True 125 126 def test_verify_non_matching_data(self): 127 """verify_cid returns False for non-matching data.""" 128 data = {"name": "test", "value": 123} 129 cid = generate_cid(data) 130 131 different_data = {"name": "test", "value": 456} 132 assert verify_cid(cid, different_data) is False 133 134 def test_verify_with_complex_data(self): 135 """verify_cid works with complex nested structures.""" 136 data = { 137 "schema": { 138 "name": "ImageSample", 139 "version": "1.0.0", 140 "fields": [ 141 {"name": "image", "type": "ndarray"}, 142 {"name": "label", "type": "str"}, 143 ], 144 } 145 } 146 cid = generate_cid(data) 147 148 assert verify_cid(cid, data) is True 149 150 151class TestParseCid: 152 """Tests for parse_cid function.""" 153 154 def test_parse_cid_components(self): 155 """parse_cid extracts CID components.""" 156 data = {"test": "data"} 157 cid = generate_cid(data) 158 159 parsed = parse_cid(cid) 160 161 assert parsed["version"] == 1 162 assert parsed["codec"] == 0x71 # dag-cbor 163 assert parsed["hash"]["code"] == 0x12 # sha256 164 assert parsed["hash"]["size"] == 32 165 166 def test_parse_cid_digest_matches(self): 167 """Parsed digest matches the SHA-256 of the data.""" 168 import hashlib 169 170 data = {"test": "data"} 171 cid = generate_cid(data) 172 173 cbor_bytes = libipld.encode_dag_cbor(data) 174 expected_digest = hashlib.sha256(cbor_bytes).digest() 175 176 parsed = parse_cid(cid) 177 assert parsed["hash"]["digest"] == expected_digest 178 179 @pytest.mark.parametrize( 180 "malformed_cid", 181 [ 182 "", # empty 183 "invalid", # not a CID 184 "bafy123", # truncated CID 185 "Qm123", # v0 prefix but invalid 186 ], 187 ) 188 def test_parse_cid_malformed_raises_valueerror(self, malformed_cid): 189 """Malformed CID strings raise ValueError.""" 190 with pytest.raises(ValueError, match="Failed to decode CID"): 191 parse_cid(malformed_cid) 192 193 194class TestAtprotoCompatibility: 195 """Tests verifying ATProto SDK compatibility.""" 196 197 def test_cid_decodable_by_atproto(self): 198 """Generated CIDs can be decoded by atproto SDK.""" 199 from atproto_core.cid.cid import CID 200 201 data = {"name": "TestSchema", "version": "1.0.0"} 202 cid_str = generate_cid(data) 203 204 # Should not raise 205 cid_obj = CID.decode(cid_str) 206 207 assert cid_obj.version == 1 208 assert cid_obj.codec == 0x71 209 210 def test_hash_matches_atproto_decode(self): 211 """Hash in generated CID matches when decoded by atproto.""" 212 import hashlib 213 from atproto_core.cid.cid import CID 214 215 data = {"name": "TestSchema", "version": "1.0.0"} 216 cid_str = generate_cid(data) 217 218 cbor_bytes = libipld.encode_dag_cbor(data) 219 expected_hash = hashlib.sha256(cbor_bytes).digest() 220 221 cid_obj = CID.decode(cid_str) 222 assert cid_obj.hash.digest == expected_hash