An Elixir implementation of AT Protocol-flavoured Merkle Search Trees (MST)
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

chore: add mst-test-suite

+307 -5
+4
.gitignore
··· 24 24 25 25 .direnv 26 26 .envrc 27 + .venv 28 + 29 + /test/fixtures/mst-test-suite/cars/**/*.car 30 + /test/fixtures/mst-test-suite/tests/**/*.json
+4 -4
README.md
··· 15 15 end 16 16 ``` 17 17 18 - Documentation can be generated with [ExDoc](https://github.com/elixir-lang/ex_doc) 19 - and published on [HexDocs](https://hexdocs.pm). Once published, the docs can 20 - be found at <https://hexdocs.pm/mst>. 21 - 18 + Documentation can be generated with 19 + [ExDoc](https://github.com/elixir-lang/ex_doc) and published on 20 + [HexDocs](https://hexdocs.pm). Once published, the docs can be found at 21 + <https://hexdocs.pm/mst>.
+1 -1
flake.nix
··· 15 15 in { 16 16 devShells = defaultForSystems (pkgs: 17 17 pkgs.mkShell { 18 - nativeBuildInputs = with pkgs; [elixir erlang]; 18 + nativeBuildInputs = with pkgs; [elixir erlang uv]; 19 19 }); 20 20 }; 21 21 }
+21
test/fixtures/mst-test-suite/LICENSE
··· 1 + MIT License 2 + 3 + Copyright (c) 2024 David Buchanan 4 + 5 + Permission is hereby granted, free of charge, to any person obtaining a copy 6 + of this software and associated documentation files (the "Software"), to deal 7 + in the Software without restriction, including without limitation the rights 8 + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 + copies of the Software, and to permit persons to whom the Software is 10 + furnished to do so, subject to the following conditions: 11 + 12 + The above copyright notice and this permission notice shall be included in all 13 + copies or substantial portions of the Software. 14 + 15 + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 + SOFTWARE.
+14
test/fixtures/mst-test-suite/README.md
··· 1 + # mst-test-suite 2 + 3 + Test suite for [atproto-flavour](https://atproto.com/specs/repository) merkle 4 + search tree ops. 5 + 6 + Install [`uv`](https://docs.astral.sh/uv/), run `uv sync`, and then run 7 + `uv run ./scripts/generate_exhaustive_cars.py` to generate the fixtures for the 8 + test suite. 9 + 10 + Copied from 11 + [DavidBuchanan314/mst-test-suite](https://github.com/DavidBuchanan314/mst-test-suite) 12 + in order to comprehensively test MST functionality. 13 + 14 + mst-test-suite is licensed under the [MIT License](./LICENSE).
test/fixtures/mst-test-suite/cars/exhaustive/.gitkeep

This is a binary file and will not be displayed.

+8
test/fixtures/mst-test-suite/pyproject.toml
··· 1 + [project] 2 + name = "mst-test-suite" 3 + version = "0.1.0" 4 + requires-python = ">=3.15" 5 + dependencies = [ 6 + "atmst>=0.0.6", 7 + "cbrrr>=1.1.0", 8 + ]
+202
test/fixtures/mst-test-suite/scripts/generate_exhaustive_cars.py
··· 1 + from typing import BinaryIO, Optional 2 + import json 3 + 4 + from atmst.mst.node import MSTNode 5 + from atmst.mst.node_store import NodeStore 6 + from atmst.mst.node_wrangler import NodeWrangler 7 + from atmst.mst.node_walker import NodeWalker 8 + from atmst.mst.diff import very_slow_mst_diff, record_diff 9 + from atmst.blockstore import MemoryBlockStore, OverlayBlockStore, BlockStore 10 + from atmst.blockstore.car_file import encode_varint 11 + from atmst.mst import proof 12 + import cbrrr 13 + from cbrrr import CID 14 + 15 + class LoggingBlockStoreWrapper(BlockStore): 16 + def __init__(self, bs: BlockStore): 17 + self.bs = bs 18 + self.gets = set() 19 + 20 + def put_block(self, key: bytes, value: bytes) -> None: 21 + self.bs.put_block(key, value) 22 + 23 + def get_block(self, key: bytes) -> bytes: 24 + self.gets.add(key) 25 + return self.bs.get_block(key) 26 + 27 + def del_block(self, key: bytes) -> None: 28 + self.bs.del_block(key) 29 + 30 + """ 31 + class LoggingNodeStore(NodeStore): 32 + def __init__(self, bs): 33 + self.read_cids = set() 34 + self.stored_cids = set() 35 + super().__init__(bs) 36 + 37 + def get_node(self, cid: Optional[CID]) -> MSTNode: 38 + if cid is None: 39 + self.read_cids.add(MSTNode.empty_root().cid) 40 + else: 41 + self.read_cids.add(cid) 42 + return super().get_node(cid) 43 + 44 + def stored_node(self, node: MSTNode) -> MSTNode: 45 + self.stored_cids.add(node.cid) 46 + return super().stored_node(node) 47 + """ 48 + 49 + class CarWriter: 50 + def __init__(self, stream: BinaryIO, root: cbrrr.CID) -> None: 51 + self.stream = stream 52 + header_bytes = cbrrr.encode_dag_cbor( 53 + {"version": 1, "roots": [root]} 54 + ) 55 + stream.write(encode_varint(len(header_bytes))) 56 + stream.write(header_bytes) 57 + 58 + def write_block(self, cid: cbrrr.CID, value: bytes): 59 + cid_bytes = bytes(cid) 60 + self.stream.write(encode_varint(len(cid_bytes) + len(value))) 61 + self.stream.write(cid_bytes) 62 + self.stream.write(value) 63 + 64 + keys = [] 65 + key_heights = [0, 1, 0, 2, 0, 1, 0] # if all these keys are added to a MST, it'll form a perfect binary tree. 66 + i = 0 67 + for height in key_heights: 68 + while True: 69 + key = f"k/{i:02d}" 70 + i += 1 71 + if MSTNode.key_height(key) == height: 72 + keys.append(key) 73 + break 74 + 75 + vals = [CID.cidv1_dag_cbor_sha256_32_from(cbrrr.encode_dag_cbor({"$type": "mst-test-data", "value_for": k})) for k in keys] 76 + 77 + val_for_key = dict(zip(keys, vals)) 78 + 79 + print(keys) 80 + print(vals) 81 + 82 + # we can reuse these 83 + bs = MemoryBlockStore() 84 + ns = NodeStore(bs) 85 + wrangler = NodeWrangler(ns) 86 + 87 + roots = [] 88 + 89 + for i in range(2**len(keys)): 90 + filename = f"./cars/exhaustive/exhaustive_{i:03d}.car" 91 + root = ns.get_node(None).cid 92 + for j in range(len(keys)): 93 + if (i>>j) & 1: 94 + #filename += f"_{keys[j]}h{key_heights[j]}" 95 + root = wrangler.put_record(root, keys[j], vals[j]) 96 + #filename += ".car" 97 + print(i, filename) 98 + 99 + car_blocks = [] 100 + for node in NodeWalker(ns, root).iter_nodes(): 101 + car_blocks.append((node.cid, node.serialised)) 102 + 103 + assert(len(set(cid for cid, val in car_blocks)) == len(car_blocks)) # no dupes 104 + 105 + with open(filename, "wb") as carfile: 106 + car = CarWriter(carfile, root) 107 + for cid, val in sorted(car_blocks, key=lambda x: bytes(x[0])): 108 + car.write_block(cid, val) 109 + 110 + roots.append(root) 111 + 112 + # collecting these stats just for the sake of curiosity 113 + #identical_proof_and_creation_count = 0 114 + #proof_superset_of_creation_count = 0 115 + #creation_superset_of_proof_count = 0 116 + inversion_needs_extra_blocks = 0 117 + clusion_proof_nodes_not_in_inversion_proof = 0 118 + 119 + # generate exhaustive test cases 120 + for ai, root_a in enumerate(roots): 121 + for bi, root_b in enumerate(roots): 122 + filename = f"./tests/diff/exhaustive/exhaustive_{ai:03d}_{bi:03d}.json" 123 + print(filename) 124 + car_a = f"./cars/exhaustive/exhaustive_{ai:03d}.car" 125 + car_b = f"./cars/exhaustive/exhaustive_{bi:03d}.car" 126 + created_nodes, deleted_nodes = very_slow_mst_diff(ns, root_a, root_b) 127 + record_ops = [] 128 + proof_nodes = set() 129 + no_deletions = True 130 + for delta in record_diff(ns, created_nodes, deleted_nodes): 131 + record_ops.append({ 132 + "rpath": delta.path, 133 + "old_value": None if delta.prior_value is None else delta.prior_value.encode(), 134 + "new_value": None if delta.later_value is None else delta.later_value.encode() 135 + }) 136 + if delta.later_value is None: # deletion 137 + proof_nodes.update(proof.build_exclusion_proof(ns, root_b, delta.path)) 138 + no_deletions = False 139 + else: # update or create 140 + proof_nodes.update(proof.build_inclusion_proof(ns, root_b, delta.path)) 141 + 142 + if no_deletions: # commits with no deletions are more well-behaved 143 + assert(proof_nodes.issubset(created_nodes)) 144 + 145 + # my inductive-proof-generation logic is ops order sensitive, so we do the sort beforehand 146 + # TODO: maybe "deletes first" or similar produces smaller proofs on average? 147 + record_ops.sort(key=lambda x: x["rpath"]) 148 + 149 + # figure out which blocks are required for inductive proofs. 150 + # the idea here is that we use an overlay blockstore and log every "get" that has to fall thru to the lower layer. 151 + # those gets are therefore the blocks required for a stateless consumer to verify the proof. 152 + upper = MemoryBlockStore() 153 + lbs = LoggingBlockStoreWrapper(bs) 154 + lns = NodeStore(OverlayBlockStore(upper, lbs)) 155 + lnw = NodeWrangler(lns) 156 + proof_root = root_b 157 + for op in record_ops[::-1]: # while the order does not effect the final root CID, it does affect the set of CIDs that fall thru 158 + if op["old_value"] is None: 159 + proof_root = lnw.del_record(proof_root, op["rpath"]) 160 + else: 161 + proof_root = lnw.put_record(proof_root, op["rpath"], val_for_key[op["rpath"]]) 162 + assert(proof_root == root_a) # we're back to where we started 163 + inductive_proof_nodes = set(CID(cid) for cid in lbs.gets) 164 + 165 + if inductive_proof_nodes - (created_nodes | proof_nodes): 166 + #print(delta) 167 + inversion_needs_extra_blocks += 1 168 + 169 + if proof_nodes - inductive_proof_nodes: 170 + clusion_proof_nodes_not_in_inversion_proof += 1 171 + 172 + #if proof_nodes == created_nodes: 173 + # identical_proof_and_creation_count += 1 174 + #if proof_nodes.issuperset(created_nodes): 175 + # proof_superset_of_creation_count += 1 176 + #if created_nodes.issuperset(proof_nodes): 177 + # creation_superset_of_proof_count += 1 178 + 179 + testcase = { 180 + "$type": "mst-diff", 181 + "description": f'procedurally generated MST diff test case between MST {ai} and {bi}', 182 + "inputs": { 183 + "mst_a": car_a, 184 + "mst_b": car_b 185 + }, 186 + "results": { 187 + "created_nodes": sorted([cid.encode() for cid in created_nodes]), 188 + "deleted_nodes": sorted([cid.encode() for cid in deleted_nodes]), 189 + "record_ops": record_ops, # these were sorted earlier 190 + "proof_nodes": sorted([cid.encode() for cid in proof_nodes]), 191 + "inductive_proof_nodes": sorted([cid.encode() for cid in inductive_proof_nodes]), 192 + "firehose_cids": "TODO" 193 + } 194 + } 195 + with open(filename, "w") as jsonfile: 196 + json.dump(testcase, jsonfile, indent="\t") 197 + 198 + #print("identical_proof_and_creation_count", identical_proof_and_creation_count / (len(roots)**2)) # 0.75 199 + #print("proof_superset_of_creation_count", proof_superset_of_creation_count / (len(roots)**2)) # 0.84 200 + #print("creation_superset_of_proof_count", creation_superset_of_proof_count / (len(roots)**2)) # 0.91 201 + print("inversion_needs_extra_blocks", inversion_needs_extra_blocks / (len(roots)**2)) # 0.04 202 + print(clusion_proof_nodes_not_in_inversion_proof)
test/fixtures/mst-test-suite/tests/diff/exhaustive/.gitkeep

This is a binary file and will not be displayed.

+53
test/fixtures/mst-test-suite/uv.lock
··· 1 + version = 1 2 + revision = 3 3 + requires-python = ">=3.15" 4 + 5 + [[package]] 6 + name = "atmst" 7 + version = "0.0.6" 8 + source = { registry = "https://pypi.org/simple" } 9 + dependencies = [ 10 + { name = "cbrrr" }, 11 + { name = "lru-dict" }, 12 + { name = "more-itertools" }, 13 + ] 14 + sdist = { url = "https://files.pythonhosted.org/packages/47/7a/2cca04368b664d372473504615e37150466ecc796dff018504d8daf5de6d/atmst-0.0.6.tar.gz", hash = "sha256:bdc3ada3f234e28dada73f50cd40359534f99208436e831fe035f6fc7c7b188e", size = 18577, upload-time = "2024-12-21T11:37:20.15Z" } 15 + wheels = [ 16 + { url = "https://files.pythonhosted.org/packages/f1/ec/d743d809cadfaae230ebed08c00f63a68f1bfe82042f74ea98c51965ed8f/atmst-0.0.6-py3-none-any.whl", hash = "sha256:e63801225f31b602a3aacfe73360561fa5f488adb1a56d64e0746d462981ecff", size = 19744, upload-time = "2024-12-21T11:37:17.804Z" }, 17 + ] 18 + 19 + [[package]] 20 + name = "cbrrr" 21 + version = "1.1.0" 22 + source = { registry = "https://pypi.org/simple" } 23 + sdist = { url = "https://files.pythonhosted.org/packages/8f/e9/ccc7a90618e5d67da4bd7b98d9602d1ff791ee65792918659d161bac93a5/cbrrr-1.1.0.tar.gz", hash = "sha256:673e5bc27d213a549946886c22a4de8bbc5a091ebb8f0dee4317694a655f23f5", size = 18364, upload-time = "2026-03-28T22:56:15.309Z" } 24 + 25 + [[package]] 26 + name = "lru-dict" 27 + version = "1.4.1" 28 + source = { registry = "https://pypi.org/simple" } 29 + sdist = { url = "https://files.pythonhosted.org/packages/06/0a/dec86efe38b350314c49a8d39ef01ba7cf8bbbef1d177646320eedea7159/lru_dict-1.4.1.tar.gz", hash = "sha256:cc518ff2d38cc7a8ab56f9a6ae557f91e2e1524b57ed8e598e97f45a2bd708fc", size = 13439, upload-time = "2025-11-02T10:02:13.548Z" } 30 + 31 + [[package]] 32 + name = "more-itertools" 33 + version = "11.0.1" 34 + source = { registry = "https://pypi.org/simple" } 35 + sdist = { url = "https://files.pythonhosted.org/packages/24/24/e0acc4bf54cba50c1d432c70a72a3df96db4a321b2c4c68432a60759044f/more_itertools-11.0.1.tar.gz", hash = "sha256:fefaf25b7ab08f0b45fa9f1892cae93b9fc0089ef034d39213bce15f1cc9e199", size = 144739, upload-time = "2026-04-02T16:17:45.061Z" } 36 + wheels = [ 37 + { url = "https://files.pythonhosted.org/packages/d8/f4/5e52c7319b8087acef603ed6e50dc325c02eaa999355414830468611f13c/more_itertools-11.0.1-py3-none-any.whl", hash = "sha256:eaf287826069452a8f61026c597eae2428b2d1ba2859083abbf240b46842ce6d", size = 72182, upload-time = "2026-04-02T16:17:43.724Z" }, 38 + ] 39 + 40 + [[package]] 41 + name = "mst-test-suite" 42 + version = "0.1.0" 43 + source = { virtual = "." } 44 + dependencies = [ 45 + { name = "atmst" }, 46 + { name = "cbrrr" }, 47 + ] 48 + 49 + [package.metadata] 50 + requires-dist = [ 51 + { name = "atmst", specifier = ">=0.0.6" }, 52 + { name = "cbrrr", specifier = ">=1.1.0" }, 53 + ]