An Elixir implementation of AT Protocol-flavoured Merkle Search Trees (MST)
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

feat: more tests & edge cases

+858 -22
+21 -22
lib/mst/tree.ex
··· 204 204 def to_list(%__MODULE__{root: nil}), do: {:ok, []} 205 205 206 206 def to_list(%__MODULE__{root: root, store: store}) do 207 - walk(store, root, []) 207 + with {:ok, reversed} <- walk(store, root, []) do 208 + {:ok, Enum.reverse(reversed)} 209 + end 208 210 end 209 211 210 212 @doc """ ··· 228 230 229 231 def stream(%__MODULE__{root: root, store: store}) do 230 232 Stream.resource( 231 - fn -> [root] end, 233 + fn -> [{:visit, root}] end, 232 234 fn 233 235 [] -> 234 236 {:halt, []} 235 237 236 - [cid | rest] -> 238 + [{:yield, k, v} | rest] -> 239 + {[{k, v}], rest} 240 + 241 + [{:visit, cid} | rest] -> 237 242 node = fetch_node!(store, cid) 238 243 full_keys = Node.keys(node) 239 - {yields, next_stack} = node_to_stream_items(node, full_keys, rest) 240 - {yields, next_stack} 244 + {[], in_order_items(node, full_keys) ++ rest} 241 245 end, 242 246 fn _ -> :ok end 243 247 ) ··· 573 577 574 578 @spec walk_entries(store(), [Entry.t()], [binary()], [{binary(), CID.t()}]) :: 575 579 {:ok, [{binary(), CID.t()}]} | tree_error() 576 - defp walk_entries(_store, [], [], acc), do: {:ok, Enum.reverse(acc)} 580 + # Accumulates pairs in reverse order; to_list/1 reverses once at the top. 581 + defp walk_entries(_store, [], [], acc), do: {:ok, acc} 577 582 578 583 defp walk_entries(store, [entry | rest_e], [key | rest_k], acc) do 579 584 acc = [{key, entry.value} | acc] ··· 587 592 # Private — stream helpers 588 593 # --------------------------------------------------------------------------- 589 594 590 - # Turn a node into a stream item list (yields) and the updated DFS stack. 591 - # We push right subtrees and yield leaf entries in left-to-right order. 592 - @spec node_to_stream_items(Node.t(), [binary()], list()) :: {[{binary(), CID.t()}], list()} 593 - defp node_to_stream_items(node, full_keys, rest_stack) do 594 - # Build a plan: [{:visit, cid} | {:yield, key, val}] in order 595 - left_visits = if node.left, do: [{:visit, node.left}], else: [] 595 + # Expand a node into an in-order sequence of {:visit, cid} and {:yield, k, v} 596 + # items. When prepended to a DFS stack, this produces correct left-to-right 597 + # traversal: left subtree is visited first, then each entry interleaved with 598 + # its right subtree. 599 + @spec in_order_items(Node.t(), [binary()]) :: list() 600 + defp in_order_items(node, full_keys) do 601 + left_items = if node.left, do: [{:visit, node.left}], else: [] 596 602 597 603 entry_items = 598 604 Enum.zip(node.entries, full_keys) 599 605 |> Enum.flat_map(fn {e, k} -> 600 - right_visits = if e.right, do: [{:visit, e.right}], else: [] 601 - [{:yield, k, e.value} | right_visits] 606 + right_items = if e.right, do: [{:visit, e.right}], else: [] 607 + [{:yield, k, e.value} | right_items] 602 608 end) 603 609 604 - plan = left_visits ++ entry_items 605 - 606 - # Collect yields and build the new stack (visits go onto the front). 607 - # We want left-to-right order: process plan in reverse to prepend onto stack. 608 - Enum.reduce(Enum.reverse(plan), {[], rest_stack}, fn 609 - {:yield, k, v}, {yields, stack} -> {[{k, v} | yields], stack} 610 - {:visit, cid}, {yields, stack} -> {yields, [cid | stack]} 611 - end) 610 + left_items ++ entry_items 612 611 end 613 612 614 613 # ---------------------------------------------------------------------------
+96
test/fixtures/interop/commit_proof.json
··· 1 + [ 2 + { 3 + "comment": "two deep split", 4 + "leafValue": "bafyreie5cvv4h45feadgeuwhbcutmh6t2ceseocckahdoe6uat64zmz454", 5 + "keys": [ 6 + "A0/501344", 7 + "B1/293486", 8 + "C0/535043", 9 + "E0/922708", 10 + "F1/415452", 11 + "G0/714257" 12 + ], 13 + "adds": ["D2/915466"], 14 + "dels": [], 15 + "rootBeforeCommit": "bafyreibthlzzn3rwvmomwf4dz6utt7yeh5eyn6qwbumvjfv35gwanh7ovq", 16 + "rootAfterCommit": "bafyreidb6bxxylhmlzs4a6ruhcunv3fd32o6i5phlzkmjk6arletj2ua6a", 17 + "blocksInProof": [ 18 + "bafyreidb6bxxylhmlzs4a6ruhcunv3fd32o6i5phlzkmjk6arletj2ua6a", 19 + "bafyreifjsxnultnc3tbvnrawqpmrk6d76ymcstwcr5e3hn6u472nasb2xq", 20 + "bafyreibzch5k5j5xkg6dcwmur2p6jqwavyjhdtvifr6g2gnccwhixibzsi", 21 + "bafyreiamcu5ud3j4ovclrgq2sdyev5oajsmpnl2fdu5ffgpfint64n2jme", 22 + "bafyreidxvw3sbdg4t5b2mbtozitnyu7kjien2zcrtgdj4ssgmjb72mzawe" 23 + ] 24 + }, 25 + { 26 + "comment": "two deep leafless split", 27 + "leafValue": "bafyreie5cvv4h45feadgeuwhbcutmh6t2ceseocckahdoe6uat64zmz454", 28 + "keys": ["A0/501344", "B0/436099", "D0/360671", "E0/922708"], 29 + "adds": ["C2/953910"], 30 + "dels": [], 31 + "rootBeforeCommit": "bafyreid7jnvjg7mr4akmyf7rtaz47duex2l47rz36nvs4i7yjnpuhfmehe", 32 + "rootAfterCommit": "bafyreih2ry5gae5r4m47unhhuw4w2qjdhe6oprw3w2uico2tzbflwi74eu", 33 + "blocksInProof": [ 34 + "bafyreih2ry5gae5r4m47unhhuw4w2qjdhe6oprw3w2uico2tzbflwi74eu", 35 + "bafyreiag5ata5gtynbpef26l4kus2uz4nshuo526h275oljwlm5dwsvhqm", 36 + "bafyreiaybgpm7ahyiy7fko7c4czjokhzajvimot6lfi6mxqzw2bzwoddn4", 37 + "bafyreiheqxxydll4b4zlyemmegb7q3chs7aacczuotpxkqils6bufnsyse", 38 + "bafyreigkijiuasyl5x4f2j3kxzou2vsdyc3vockx63r6bvgoip4ybhj2sa" 39 + ] 40 + }, 41 + { 42 + "comment": "add on edge with neighbor two layers down", 43 + "leafValue": "bafyreie5cvv4h45feadgeuwhbcutmh6t2ceseocckahdoe6uat64zmz454", 44 + "keys": ["A0/501344", "B2/303249", "C0/535043"], 45 + "adds": ["D2/915466"], 46 + "dels": [], 47 + "rootBeforeCommit": "bafyreifoy7ierkqljk37wozudqhqjuuahjnubqvd3qprx5ocwcfrx5v3hm", 48 + "rootAfterCommit": "bafyreid2i3nxmsvv3ifb53nlkjh3qaymygrrxuno6z22gctzdme5lbptky", 49 + "blocksInProof": [ 50 + "bafyreid2i3nxmsvv3ifb53nlkjh3qaymygrrxuno6z22gctzdme5lbptky", 51 + "bafyreiagiwrefvm27hvgryirykp7reqcpz56v6txzksgbargjlibtpsqwu", 52 + "bafyreiewdvzcopoza6bdntvhmvdfqeolql6sckkiu75jpvfnwwnfi57jia" 53 + ] 54 + }, 55 + { 56 + "comment": "merge and split in multi-op commit", 57 + "leafValue": "bafyreie5cvv4h45feadgeuwhbcutmh6t2ceseocckahdoe6uat64zmz454", 58 + "keys": ["A0/501344", "B2/303249", "D2/915466", "E0/922708"], 59 + "adds": ["C2/953910"], 60 + "dels": ["B2/303249", "D2/915466"], 61 + "rootBeforeCommit": "bafyreielnllkafudlseizljjx32rkkivlgxziqayhctgbxncw2srrox7ny", 62 + "rootAfterCommit": "bafyreih6464tr7ue67qgllhiekgfmwiz45zuthrv72gwi2tjpuu5dbxt3a", 63 + "blocksInProof": [ 64 + "bafyreih6464tr7ue67qgllhiekgfmwiz45zuthrv72gwi2tjpuu5dbxt3a", 65 + "bafyreihexby6fnhajsjzzqkmegqpqt2lrr3rpesyl6kt3t3xppid7tuvfy", 66 + "bafyreiciix65xuk62hu6ew6jdy3m2swqstvnuhuwcwffidk3nduf7eaoh4", 67 + "bafyreieneexkszoung4zc5jzkjukjbbxm74ukz6mylydj7q2v42zqp6vmy", 68 + "bafyreidxvw3sbdg4t5b2mbtozitnyu7kjien2zcrtgdj4ssgmjb72mzawe" 69 + ] 70 + }, 71 + { 72 + "comment": "complex multi-op commit", 73 + "leafValue": "bafyreie5cvv4h45feadgeuwhbcutmh6t2ceseocckahdoe6uat64zmz454", 74 + "keys": [ 75 + "B0/436099", 76 + "C2/953910", 77 + "D0/360671", 78 + "E2/413113", 79 + "F0/606463", 80 + "H0/740256" 81 + ], 82 + "adds": ["A2/239654", "G2/536869"], 83 + "dels": ["C2/953910"], 84 + "rootBeforeCommit": "bafyreiej4jqggfhidabjfrjgogdwed5eglhnboepxscbwfrss4uclnrrmi", 85 + "rootAfterCommit": "bafyreifykpu67c4w4ynkx4lvjfjwxdofax6gx7j2wxrl6ewt3yslezcb6i", 86 + "blocksInProof": [ 87 + "bafyreifykpu67c4w4ynkx4lvjfjwxdofax6gx7j2wxrl6ewt3yslezcb6i", 88 + "bafyreig5pe2hdnhfbqleo6yyipkw3tdiju7tlm4sqp7btsiicxe4tex5de", 89 + "bafyreievjgro75jk6ma3xwuqvalsydtzgvbbaduhazbvajvslaf3l6kcxu", 90 + "bafyreieax6243224jnbout6ynursux2dvt6fabonofdu47dxupkxvmflvu", 91 + "bafyreie44qmlnwlyeh6ubb2eocfko6st7gmbarplmcci6c7ilx24vh4iym", 92 + "bafyreihlhqn4quwcgbum5g4wzkini2c42j7zi5dsjdgkzm55jxyvebndue", 93 + "bafyreiggcbzkb2wgenvyfhkh2nggf7pohb7uzjm6bs7hixhjxw2xpmnq6u" 94 + ] 95 + } 96 + ]
+15
test/fixtures/interop/common_prefix.json
··· 1 + [ 2 + {"left": "", "right": "", "len": 0}, 3 + {"left": "abc", "right": "abc", "len": 3}, 4 + {"left": "", "right": "abc", "len": 0}, 5 + {"left": "abc", "right": "", "len": 0}, 6 + {"left": "ab", "right": "abc", "len": 2}, 7 + {"left": "abc", "right": "ab", "len": 2}, 8 + {"left": "abcde", "right": "abc", "len": 3}, 9 + {"left": "abc", "right": "abcde", "len": 3}, 10 + {"left": "abcde", "right": "abc1", "len": 3}, 11 + {"left": "abcde", "right": "abb", "len": 2}, 12 + {"left": "abcde", "right": "qbb", "len": 0}, 13 + {"left": "abc", "right": "abc\u0000", "len": 3}, 14 + {"left": "abc\u0000", "right": "abc", "len": 3} 15 + ]
+81
test/fixtures/interop/example_keys.txt
··· 1 + A0/501344 2 + A1/700567 3 + A2/239654 4 + A3/570745 5 + A4/231700 6 + A5/343219 7 + B0/436099 8 + B1/293486 9 + B2/303249 10 + B3/690557 11 + B4/522003 12 + B5/528640 13 + C0/535043 14 + C1/970596 15 + C2/953910 16 + C3/016643 17 + C4/687126 18 + C5/136391 19 + D0/360671 20 + D1/637976 21 + D2/915466 22 + D3/722333 23 + D4/816246 24 + D5/611412 25 + E0/922708 26 + E1/710014 27 + E2/413113 28 + E3/226890 29 + E4/349347 30 + E5/574759 31 + F0/606463 32 + F1/415452 33 + F2/410478 34 + F3/000172 35 + F4/438093 36 + F5/131765 37 + G0/714257 38 + G1/254594 39 + G2/536869 40 + G3/188348 41 + G4/627086 42 + G5/436727 43 + H0/740256 44 + H1/113887 45 + H2/783135 46 + H3/911996 47 + H4/413212 48 + H5/205035 49 + I0/123247 50 + I1/186251 51 + I2/455864 52 + I3/874267 53 + I4/700662 54 + I5/355687 55 + J0/651505 56 + J1/747356 57 + J2/880562 58 + J3/337247 59 + J4/333302 60 + J5/802321 61 + K0/513509 62 + K1/512199 63 + K2/998695 64 + K3/030175 65 + K4/843537 66 + K5/621841 67 + L0/110539 68 + L1/902119 69 + L2/433601 70 + L3/578589 71 + L4/179159 72 + L5/411430 73 + M0/233209 74 + M1/807305 75 + M2/593452 76 + M3/412948 77 + M4/230935 78 + M5/340624 79 + N0/719700 80 + N1/322330 81 + N2/554
+11
test/fixtures/interop/key_heights.json
··· 1 + [ 2 + {"key": "", "height": 0}, 3 + {"key": "asdf", "height": 0}, 4 + {"key": "blue", "height": 1}, 5 + {"key": "2653ae71", "height": 0}, 6 + {"key": "88bfafc7", "height": 2}, 7 + {"key": "2a92d355", "height": 4}, 8 + {"key": "884976f5", "height": 6}, 9 + {"key": "app.bsky.feed.post/454397e440ec", "height": 4}, 10 + {"key": "app.bsky.feed.post/9adeb165882c", "height": 8} 11 + ]
+13
test/mst/height_test.exs
··· 3 3 4 4 doctest MST.Height 5 5 6 + @interop_fixtures Path.join([__DIR__, "..", "fixtures", "interop", "key_heights.json"]) 7 + |> File.read!() 8 + |> Jason.decode!() 9 + 6 10 describe "for_key/1" do 7 11 # Spec examples from https://atproto.com/specs/repository#mst-structure 8 12 test "spec example: depth 0" do ··· 48 52 d0 = MST.Height.for_key("2653ae71") 49 53 d1 = MST.Height.for_key("blue") 50 54 assert d0 != d1 55 + end 56 + end 57 + 58 + describe "key_heights.json interop" do 59 + test "all fixture entries match expected height" do 60 + for %{"key" => key, "height" => expected} <- @interop_fixtures do 61 + assert MST.Height.for_key(key) == expected, 62 + "for_key(#{inspect(key)}) expected #{expected}, got #{MST.Height.for_key(key)}" 63 + end 51 64 end 52 65 end 53 66 end
+315
test/mst/interop_test.exs
··· 1 + defmodule MST.InteropTest do 2 + @moduledoc """ 3 + Interoperability tests using fixtures from atproto-interop-tests and 4 + jacquard's additional edge-case vectors. 5 + 6 + Fixture sources: 7 + - https://github.com/bluesky-social/atproto-interop-tests/tree/main/mst 8 + - https://github.com/orual/jacquard (tests/fixtures/) 9 + 10 + Covers: 11 + - 156 real-world-shaped keys: full insert, selective delete, insertion-order 12 + determinism. 13 + - 5 commit-proof CID-exact scenarios stressing height-gap splits, leafless 14 + splits, edge inserts, and merge-then-split sequences. 15 + - The "rsky" 2-key tree regression from rsky's 16 + `handle_new_layers_that_are_two_higher_than_existing` test. 17 + - Trees spanning heights 0–8 to exercise multi-level intermediate empty 18 + nodes. 19 + """ 20 + 21 + use ExUnit.Case, async: true 22 + 23 + alias DASL.CID 24 + alias MST.Tree 25 + 26 + @fixture_dir Path.join([__DIR__, "..", "fixtures", "interop"]) 27 + 28 + # 156 keys of the form "X{level}/{number}" where each key's MST height 29 + # matches the digit in the name (generated by atproto-interop-tests/mst/gen_keys.py). 30 + @example_keys Path.join(@fixture_dir, "example_keys.txt") 31 + |> File.read!() 32 + |> String.split("\n", trim: true) 33 + 34 + # Stable key→CID mapping reused across example-key tests so that insertion 35 + # order doesn't affect which value a key maps to. 36 + @example_kv Map.new(Enum.with_index(@example_keys), fn {k, i} -> 37 + {k, CID.compute(<<i::8>>, :raw)} 38 + end) 39 + 40 + # 5 commit-proof scenarios; each specifies an initial key set, a batch of 41 + # adds/deletes, and the expected root CIDs before and after. 42 + @commit_proof Path.join(@fixture_dir, "commit_proof.json") 43 + |> File.read!() 44 + |> Jason.decode!() 45 + 46 + defp new_tree, do: Tree.new(MST.Store.Memory.new()) 47 + 48 + # --------------------------------------------------------------------------- 49 + # Example keys — insert / delete / determinism 50 + # --------------------------------------------------------------------------- 51 + 52 + describe "example keys" do 53 + @tag :slow 54 + test "insert all keys and retrieve each" do 55 + tree = 56 + Enum.reduce(@example_keys, new_tree(), fn key, acc -> 57 + {:ok, t} = Tree.put(acc, key, @example_kv[key]) 58 + t 59 + end) 60 + 61 + for key <- @example_keys do 62 + assert {:ok, @example_kv[key]} == Tree.get(tree, key), 63 + "key not found after insert: #{key}" 64 + end 65 + 66 + assert {:ok, count} = Tree.length(tree) 67 + assert count == length(@example_keys) 68 + end 69 + 70 + @tag :slow 71 + test "delete every other key; correct half remains" do 72 + tree = 73 + Enum.reduce(@example_keys, new_tree(), fn key, acc -> 74 + {:ok, t} = Tree.put(acc, key, @example_kv[key]) 75 + t 76 + end) 77 + 78 + indexed = Enum.with_index(@example_keys) 79 + {evens, odds} = Enum.split_with(indexed, fn {_, i} -> rem(i, 2) == 0 end) 80 + 81 + tree = 82 + Enum.reduce(evens, tree, fn {key, _}, acc -> 83 + {:ok, t} = Tree.delete(acc, key) 84 + t 85 + end) 86 + 87 + for {key, _} <- evens do 88 + assert {:error, :not_found} == Tree.get(tree, key), 89 + "deleted key still present: #{key}" 90 + end 91 + 92 + for {key, _} <- odds do 93 + assert {:ok, @example_kv[key]} == Tree.get(tree, key), 94 + "surviving key not found: #{key}" 95 + end 96 + 97 + assert {:ok, remaining} = Tree.length(tree) 98 + assert remaining == length(odds) 99 + end 100 + 101 + @tag :slow 102 + test "root CID is identical regardless of insertion order" do 103 + forward = 104 + Enum.reduce(@example_keys, new_tree(), fn key, acc -> 105 + {:ok, t} = Tree.put(acc, key, @example_kv[key]) 106 + t 107 + end) 108 + 109 + reverse = 110 + Enum.reduce(Enum.reverse(@example_keys), new_tree(), fn key, acc -> 111 + {:ok, t} = Tree.put(acc, key, @example_kv[key]) 112 + t 113 + end) 114 + 115 + assert forward.root == reverse.root, 116 + "insertion order changed root CID" 117 + end 118 + 119 + @tag :slow 120 + test "delete all keys produces empty tree" do 121 + tree = 122 + Enum.reduce(@example_keys, new_tree(), fn key, acc -> 123 + {:ok, t} = Tree.put(acc, key, @example_kv[key]) 124 + t 125 + end) 126 + 127 + empty = 128 + Enum.reduce(@example_keys, tree, fn key, acc -> 129 + {:ok, t} = Tree.delete(acc, key) 130 + t 131 + end) 132 + 133 + assert {:ok, []} = Tree.to_list(empty) 134 + assert empty.root == nil 135 + end 136 + end 137 + 138 + # --------------------------------------------------------------------------- 139 + # Commit-proof fixtures — CID-exact spec vectors 140 + # --------------------------------------------------------------------------- 141 + 142 + describe "commit proof fixtures" do 143 + # Each fixture drives a scenario that would catch specific structural bugs: 144 + # 145 + # "two deep split" — height-2 insert between height-1 nodes, 146 + # requires two levels of intermediate empties. 147 + # "two deep leafless split" — height-2 insert with no height-1 nodes 148 + # anywhere near the split point. 149 + # "add on edge with neighbor two layers down" 150 + # — new height-2 key adjacent to a subtree 151 + # whose highest key is 2 levels lower. 152 + # "merge and split in multi-op" — simultaneous adds and deletes; the 153 + # merge path (delete) and split path 154 + # (insert) both execute. 155 + # "complex multi-op commit" — larger batch with both creates and 156 + # deletes across multiple height levels. 157 + 158 + for fixture <- @commit_proof do 159 + @fixture fixture 160 + test @fixture["comment"] do 161 + run_commit_proof_fixture(@fixture) 162 + end 163 + end 164 + end 165 + 166 + # --------------------------------------------------------------------------- 167 + # rsky edge case — 2-key tree with a known root CID 168 + # --------------------------------------------------------------------------- 169 + 170 + describe "rsky simple case" do 171 + # Regression from rsky's `handle_new_layers_that_are_two_higher_than_existing`. 172 + # Two height-0 keys (same collection prefix) that differ only in their last 173 + # few chars. The expected root CID is taken from the reference TypeScript 174 + # implementation. 175 + test "two height-0 keys produce the known root CID" do 176 + {:ok, leaf} = CID.new("bafyreie5cvv4h45feadgeuwhbcutmh6t2ceseocckahdoe6uat64zmz454") 177 + 178 + {:ok, tree} = Tree.put(new_tree(), "com.example.record/3jqfcqzm3ft2j", leaf) 179 + {:ok, tree} = Tree.put(tree, "com.example.record/3jqfcqzm3fz2j", leaf) 180 + 181 + assert CID.encode(tree.root) == 182 + "bafyreidfcktqnfmykz2ps3dbul35pepleq7kvv526g47xahuz3rqtptmky" 183 + end 184 + end 185 + 186 + # --------------------------------------------------------------------------- 187 + # Multi-height trees — spec keys spanning heights 0–8 188 + # --------------------------------------------------------------------------- 189 + 190 + describe "keys spanning multiple heights" do 191 + # Keys taken directly from the atproto spec and key_heights.json; each has 192 + # a well-known height. Together they force intermediate empty nodes at every 193 + # level from 0 up to the maximum height. 194 + @spec_keys [ 195 + {"2653ae71", 0}, 196 + {"blue", 1}, 197 + {"88bfafc7", 2}, 198 + {"2a92d355", 4}, 199 + {"884976f5", 6}, 200 + {"app.bsky.feed.post/9adeb165882c", 8} 201 + ] 202 + 203 + test "insert keys at heights 0/1/2/4/6/8 — all retrievable" do 204 + val = CID.compute("v", :raw) 205 + 206 + tree = 207 + Enum.reduce(@spec_keys, new_tree(), fn {key, _}, acc -> 208 + {:ok, t} = Tree.put(acc, key, val) 209 + t 210 + end) 211 + 212 + for {key, _} <- @spec_keys do 213 + assert {:ok, ^val} = Tree.get(tree, key), "not found: #{key}" 214 + end 215 + 216 + assert {:ok, n} = Tree.length(tree) 217 + assert n == length(@spec_keys) 218 + end 219 + 220 + test "delete the height-4 key; other heights unaffected" do 221 + val = CID.compute("v", :raw) 222 + 223 + tree = 224 + Enum.reduce(@spec_keys, new_tree(), fn {key, _}, acc -> 225 + {:ok, t} = Tree.put(acc, key, val) 226 + t 227 + end) 228 + 229 + {del_key, _} = Enum.find(@spec_keys, fn {_, h} -> h == 4 end) 230 + {:ok, tree} = Tree.delete(tree, del_key) 231 + 232 + assert {:error, :not_found} = Tree.get(tree, del_key) 233 + 234 + for {key, _} <- @spec_keys, key != del_key do 235 + assert {:ok, ^val} = Tree.get(tree, key), "not found after delete: #{key}" 236 + end 237 + end 238 + 239 + test "root CID is stable regardless of insertion order" do 240 + val = CID.compute("v", :raw) 241 + keys = Enum.map(@spec_keys, &elem(&1, 0)) 242 + 243 + forward = 244 + Enum.reduce(keys, new_tree(), fn key, acc -> 245 + {:ok, t} = Tree.put(acc, key, val) 246 + t 247 + end) 248 + 249 + reverse = 250 + Enum.reduce(Enum.reverse(keys), new_tree(), fn key, acc -> 251 + {:ok, t} = Tree.put(acc, key, val) 252 + t 253 + end) 254 + 255 + assert forward.root == reverse.root 256 + end 257 + 258 + test "full delete cycle returns to empty" do 259 + val = CID.compute("v", :raw) 260 + keys = Enum.map(@spec_keys, &elem(&1, 0)) 261 + 262 + tree = 263 + Enum.reduce(keys, new_tree(), fn key, acc -> 264 + {:ok, t} = Tree.put(acc, key, val) 265 + t 266 + end) 267 + 268 + empty = 269 + Enum.reduce(keys, tree, fn key, acc -> 270 + {:ok, t} = Tree.delete(acc, key) 271 + t 272 + end) 273 + 274 + assert {:ok, []} = Tree.to_list(empty) 275 + assert empty.root == nil 276 + end 277 + end 278 + 279 + # --------------------------------------------------------------------------- 280 + # Helpers 281 + # --------------------------------------------------------------------------- 282 + 283 + defp run_commit_proof_fixture(fixture) do 284 + {:ok, leaf} = CID.new(fixture["leafValue"]) 285 + 286 + before_tree = 287 + Enum.reduce(fixture["keys"], new_tree(), fn key, acc -> 288 + {:ok, t} = Tree.put(acc, key, leaf) 289 + t 290 + end) 291 + 292 + assert CID.encode(before_tree.root) == fixture["rootBeforeCommit"], 293 + ~s(root before commit: expected #{fixture["rootBeforeCommit"]}, ) <> 294 + ~s(got #{CID.encode(before_tree.root)}) 295 + 296 + after_tree = 297 + before_tree 298 + |> then(fn t -> 299 + Enum.reduce(fixture["adds"], t, fn key, acc -> 300 + {:ok, t2} = Tree.put(acc, key, leaf) 301 + t2 302 + end) 303 + end) 304 + |> then(fn t -> 305 + Enum.reduce(fixture["dels"], t, fn key, acc -> 306 + {:ok, t2} = Tree.delete(acc, key) 307 + t2 308 + end) 309 + end) 310 + 311 + assert CID.encode(after_tree.root) == fixture["rootAfterCommit"], 312 + ~s(root after commit: expected #{fixture["rootAfterCommit"]}, ) <> 313 + ~s(got #{CID.encode(after_tree.root)}) 314 + end 315 + end
+21
test/mst/node_test.exs
··· 12 12 @cid_b CID.compute("value_b", :raw) 13 13 @cid_c CID.compute("value_c", :raw) 14 14 15 + @prefix_interop Path.join([__DIR__, "..", "fixtures", "interop", "common_prefix.json"]) 16 + |> File.read!() 17 + |> Jason.decode!() 18 + 15 19 describe "empty/0" do 16 20 test "returns an empty node" do 17 21 assert %Node{left: nil, entries: []} = Node.empty() ··· 177 181 entries = Node.compress_entries(triples) 178 182 node = %Node{left: nil, entries: entries} 179 183 assert Node.keys(node) == keys 184 + end 185 + end 186 + 187 + describe "common_prefix.json interop" do 188 + # The common prefix length between two keys is what determines prefix_len 189 + # in the second of any two adjacent compress_entries inputs. We test via 190 + # compress_entries since common_prefix_length/2 is private. 191 + test "all fixture pairs produce correct prefix_len" do 192 + cid = CID.compute("test", :raw) 193 + 194 + for %{"left" => left, "right" => right, "len" => expected} <- @prefix_interop do 195 + [_first, second] = Node.compress_entries([{left, cid, nil}, {right, cid, nil}]) 196 + 197 + assert second.prefix_len == expected, 198 + "common_prefix(#{inspect(left)}, #{inspect(right)}) " <> 199 + "expected #{expected}, got #{second.prefix_len}" 200 + end 180 201 end 181 202 end 182 203
+285
test/mst/stress_test.exs
··· 1 + defmodule MST.StressTest do 2 + @moduledoc """ 3 + Stress tests for MST structural correctness under long random mutation sequences. 4 + 5 + Inspired by jacquard's `large_proof_tests.rs`, which applies hundreds of 6 + random create/update/delete operations and validates commit proofs after each 7 + batch. We can't validate firehose commits (that layer doesn't exist here), but 8 + we capture the same invariant: after every mutation the tree's contents must 9 + exactly match a shadow map maintained in parallel. 10 + 11 + Three test dimensions: 12 + - **small**: 100 ops verified after every single operation — catches bugs in 13 + individual transitions. 14 + - **large**: 300 ops verified after every batch of 10 — catches accumulated 15 + drift. 16 + - **high-height**: 50 ops over a pool that includes height 8 keys — stresses 17 + intermediate empty-node creation and destruction. 18 + 19 + Each test ends with a determinism check: rebuild the same key-value set into 20 + a fresh tree and assert the root CID matches the evolved tree. 21 + """ 22 + 23 + use ExUnit.Case, async: true 24 + 25 + alias DASL.CID 26 + alias MST.Tree 27 + 28 + @fixture_dir Path.join([__DIR__, "..", "fixtures", "interop"]) 29 + 30 + # 81 real-world-shaped keys at heights 0–5 from the interop fixture. 31 + @key_pool @fixture_dir 32 + |> Path.join("example_keys.txt") 33 + |> File.read!() 34 + |> String.split("\n", trim: true) 35 + 36 + # Additional keys at heights 2, 4, 6, 8 — not in the example_keys file — 37 + # to ensure multi-level intermediate empty nodes are exercised. 38 + @high_height_keys [ 39 + "88bfafc7", 40 + "2a92d355", 41 + "884976f5", 42 + "app.bsky.feed.post/9adeb165882c" 43 + ] 44 + 45 + @full_pool @key_pool ++ @high_height_keys 46 + 47 + # Keys used for the focused high-height test. Heights: 0, 0, 1, 2, 4, 4, 6, 8. 48 + @height_spanning_keys [ 49 + "2653ae71", 50 + "asdf", 51 + "blue", 52 + "88bfafc7", 53 + "2a92d355", 54 + "app.bsky.feed.post/454397e440ec", 55 + "884976f5", 56 + "app.bsky.feed.post/9adeb165882c" 57 + ] 58 + 59 + defp new_tree, do: Tree.new(MST.Store.Memory.new()) 60 + 61 + # --------------------------------------------------------------------------- 62 + # PRNG helpers — explicit state threading for reproducibility 63 + # --------------------------------------------------------------------------- 64 + 65 + # Returns {{:put, key, val} | {:delete, key}, new_rng_state}. 66 + # Weights: 50% create-or-overwrite, 30% update-existing, 20% delete-existing. 67 + # Falls back to create when the shadow is empty. 68 + @spec rand_op(:rand.state(), map(), [binary()]) :: 69 + {{:put, binary(), CID.t()} | {:delete, binary()}, :rand.state()} 70 + defp rand_op(rng, shadow, pool) do 71 + {dice, rng} = :rand.uniform_s(100, rng) 72 + existing = Map.keys(shadow) 73 + n = length(existing) 74 + 75 + cond do 76 + dice <= 50 or n == 0 -> 77 + {idx, rng} = :rand.uniform_s(length(pool), rng) 78 + {seed, rng} = :rand.uniform_s(1_000_000_000, rng) 79 + key = Enum.at(pool, idx - 1) 80 + val = CID.compute("#{key}:#{seed}", :raw) 81 + {{:put, key, val}, rng} 82 + 83 + dice <= 80 -> 84 + {idx, rng} = :rand.uniform_s(n, rng) 85 + {seed, rng} = :rand.uniform_s(1_000_000_000, rng) 86 + key = Enum.at(existing, idx - 1) 87 + val = CID.compute("#{key}:#{seed}", :raw) 88 + {{:put, key, val}, rng} 89 + 90 + true -> 91 + {idx, rng} = :rand.uniform_s(n, rng) 92 + key = Enum.at(existing, idx - 1) 93 + {{:delete, key}, rng} 94 + end 95 + end 96 + 97 + # Apply a single op to both the tree and shadow map. 98 + @spec apply_op(Tree.t(), map(), {:put, binary(), CID.t()} | {:delete, binary()}) :: 99 + {Tree.t(), map()} 100 + defp apply_op(tree, shadow, {:put, key, val}) do 101 + {:ok, tree} = Tree.put(tree, key, val) 102 + {tree, Map.put(shadow, key, val)} 103 + end 104 + 105 + defp apply_op(tree, shadow, {:delete, key}) do 106 + {:ok, tree} = Tree.delete(tree, key) 107 + {tree, Map.delete(shadow, key)} 108 + end 109 + 110 + # Assert every key in `shadow` is present in `tree` with the correct value, 111 + # that the sizes match, and that `to_list/1` returns keys in sorted order. 112 + @spec assert_matches_shadow(Tree.t(), map()) :: :ok 113 + defp assert_matches_shadow(tree, shadow) do 114 + {:ok, pairs} = Tree.to_list(tree) 115 + 116 + assert length(pairs) == map_size(shadow), 117 + "size mismatch: tree has #{length(pairs)} keys, shadow has #{map_size(shadow)}" 118 + 119 + keys = Enum.map(pairs, &elem(&1, 0)) 120 + assert keys == Enum.sort(keys), "to_list/1 returned keys out of order" 121 + 122 + for {key, expected_val} <- shadow do 123 + assert {:ok, ^expected_val} = Tree.get(tree, key), 124 + "wrong value for #{inspect(key)}" 125 + end 126 + 127 + :ok 128 + end 129 + 130 + # Build a fresh tree from a shadow map (sorted insertion order) and return 131 + # the root CID. Used to verify determinism: the evolved tree and the 132 + # freshly-built tree must share the same root. 133 + @spec root_from_shadow(map()) :: CID.t() | nil 134 + defp root_from_shadow(shadow) do 135 + shadow 136 + |> Enum.sort_by(&elem(&1, 0)) 137 + |> Enum.reduce(new_tree(), fn {key, val}, acc -> 138 + {:ok, t} = Tree.put(acc, key, val) 139 + t 140 + end) 141 + |> Map.fetch!(:root) 142 + end 143 + 144 + # --------------------------------------------------------------------------- 145 + # Tests 146 + # --------------------------------------------------------------------------- 147 + 148 + describe "stress" do 149 + # ------------------------------------------------------------------------- 150 + # Small: 100 ops, verified after every single mutation 151 + # ------------------------------------------------------------------------- 152 + 153 + @tag :slow 154 + test "100 random ops over 20-key seed, verified after each op" do 155 + rng = :rand.seed_s(:exsss, {42, 1337, 99}) 156 + 157 + seed_keys = Enum.take(@full_pool, 20) 158 + 159 + {tree, shadow} = 160 + Enum.reduce(seed_keys, {new_tree(), %{}}, fn key, {t, s} -> 161 + val = CID.compute("seed:#{key}", :raw) 162 + {:ok, t} = Tree.put(t, key, val) 163 + {t, Map.put(s, key, val)} 164 + end) 165 + 166 + assert_matches_shadow(tree, shadow) 167 + 168 + {tree, shadow, _rng} = 169 + Enum.reduce(1..100, {tree, shadow, rng}, fn _i, {t, s, rng} -> 170 + {op, rng} = rand_op(rng, s, @full_pool) 171 + {t, s} = apply_op(t, s, op) 172 + assert_matches_shadow(t, s) 173 + {t, s, rng} 174 + end) 175 + 176 + assert tree.root == root_from_shadow(shadow), 177 + "evolved tree root differs from scratch-rebuilt root — history-dependence bug" 178 + end 179 + 180 + # ------------------------------------------------------------------------- 181 + # Large: 300 ops in batches of 10, verified after each batch 182 + # ------------------------------------------------------------------------- 183 + 184 + @tag :slow 185 + test "300 random ops over 50-key seed, verified per batch of 10" do 186 + rng = :rand.seed_s(:exsss, {7, 13, 21}) 187 + 188 + seed_keys = Enum.take(@full_pool, 50) 189 + 190 + {tree, shadow} = 191 + Enum.reduce(seed_keys, {new_tree(), %{}}, fn key, {t, s} -> 192 + val = CID.compute("seed:#{key}", :raw) 193 + {:ok, t} = Tree.put(t, key, val) 194 + {t, Map.put(s, key, val)} 195 + end) 196 + 197 + {tree, shadow, _rng} = 198 + Enum.reduce(1..30, {tree, shadow, rng}, fn _batch, {t, s, rng} -> 199 + {t, s, rng} = 200 + Enum.reduce(1..10, {t, s, rng}, fn _i, {t, s, rng} -> 201 + {op, rng} = rand_op(rng, s, @full_pool) 202 + {t, s} = apply_op(t, s, op) 203 + {t, s, rng} 204 + end) 205 + 206 + assert_matches_shadow(t, s) 207 + {t, s, rng} 208 + end) 209 + 210 + assert tree.root == root_from_shadow(shadow), 211 + "evolved tree root differs from scratch-rebuilt root — history-dependence bug" 212 + end 213 + 214 + # ------------------------------------------------------------------------- 215 + # High-height: 50 ops over a pool spanning heights 0–8, per-op verified 216 + # ------------------------------------------------------------------------- 217 + 218 + @tag :slow 219 + test "50 random ops over height-spanning pool (h0–h8), verified after each op" do 220 + # This specifically targets the intermediate empty-node paths: keys at 221 + # heights 6 and 8 force multiple levels of empty wrappers. Deleting them 222 + # exercises trim_top and the recursive merge across those levels. 223 + rng = :rand.seed_s(:exsss, {100, 200, 300}) 224 + 225 + {tree, shadow} = 226 + Enum.reduce(@height_spanning_keys, {new_tree(), %{}}, fn key, {t, s} -> 227 + val = CID.compute("seed:#{key}", :raw) 228 + {:ok, t} = Tree.put(t, key, val) 229 + {t, Map.put(s, key, val)} 230 + end) 231 + 232 + assert_matches_shadow(tree, shadow) 233 + 234 + {tree, shadow, _rng} = 235 + Enum.reduce(1..50, {tree, shadow, rng}, fn _i, {t, s, rng} -> 236 + {op, rng} = rand_op(rng, s, @height_spanning_keys) 237 + {t, s} = apply_op(t, s, op) 238 + assert_matches_shadow(t, s) 239 + {t, s, rng} 240 + end) 241 + 242 + assert tree.root == root_from_shadow(shadow), 243 + "evolved tree root differs from scratch-rebuilt root — history-dependence bug" 244 + end 245 + 246 + # ------------------------------------------------------------------------- 247 + # Delete-all then re-insert: evolved root must match original 248 + # ------------------------------------------------------------------------- 249 + 250 + @tag :slow 251 + test "delete all keys then re-insert in reverse order produces original root" do 252 + # Covers the full tree lifecycle: grow → shrink to nil → regrow. 253 + # Uses height-spanning keys so intermediate empty nodes are created and 254 + # destroyed at every level. 255 + keys = @height_spanning_keys ++ Enum.take(@key_pool, 22) 256 + 257 + {original_tree, shadow} = 258 + Enum.reduce(keys, {new_tree(), %{}}, fn key, {t, s} -> 259 + val = CID.compute("v:#{key}", :raw) 260 + {:ok, t} = Tree.put(t, key, val) 261 + {t, Map.put(s, key, val)} 262 + end) 263 + 264 + assert_matches_shadow(original_tree, shadow) 265 + 266 + empty = 267 + Enum.reduce(keys, original_tree, fn key, t -> 268 + {:ok, t} = Tree.delete(t, key) 269 + t 270 + end) 271 + 272 + assert empty.root == nil 273 + assert {:ok, []} = Tree.to_list(empty) 274 + 275 + rebuilt = 276 + Enum.reduce(Enum.reverse(keys), new_tree(), fn key, t -> 277 + {:ok, t} = Tree.put(t, key, shadow[key]) 278 + t 279 + end) 280 + 281 + assert rebuilt.root == original_tree.root, 282 + "re-insert in reverse order produced a different root — determinism bug" 283 + end 284 + end 285 + end