An Elixir implementation of AT Protocol-flavoured Merkle Search Trees (MST)
1defmodule MST.NodeTest do
2 use ExUnit.Case, async: true
3
4 doctest MST.Node
5
6 alias DASL.CID
7 alias MST.Node
8 alias MST.Node.Entry
9
10 # Shared fixtures
11 @cid_a CID.compute("value_a", :raw)
12 @cid_b CID.compute("value_b", :raw)
13 @cid_c CID.compute("value_c", :raw)
14
15 @prefix_interop Path.join([__DIR__, "..", "fixtures", "interop", "common_prefix.json"])
16 |> File.read!()
17 |> Jason.decode!()
18
19 describe "empty/0" do
20 test "returns an empty node" do
21 assert %Node{left: nil, entries: []} = Node.empty()
22 end
23 end
24
25 describe "encode/1 and decode/1 round-trip" do
26 test "empty node" do
27 node = Node.empty()
28 assert {:ok, bytes} = Node.encode(node)
29 assert {:ok, ^node} = Node.decode(bytes)
30 end
31
32 test "node with single entry, no subtrees" do
33 entry = %Entry{prefix_len: 0, key_suffix: "col/key", value: @cid_a, right: nil}
34 node = %Node{left: nil, entries: [entry]}
35
36 assert {:ok, bytes} = Node.encode(node)
37 assert {:ok, decoded} = Node.decode(bytes)
38 assert decoded.left == nil
39 assert length(decoded.entries) == 1
40 assert hd(decoded.entries).key_suffix == "col/key"
41 assert hd(decoded.entries).value == @cid_a
42 assert hd(decoded.entries).right == nil
43 end
44
45 test "node with left subtree pointer" do
46 entry = %Entry{prefix_len: 0, key_suffix: "col/key", value: @cid_a, right: nil}
47 node = %Node{left: @cid_b, entries: [entry]}
48
49 assert {:ok, bytes} = Node.encode(node)
50 assert {:ok, decoded} = Node.decode(bytes)
51 assert decoded.left == @cid_b
52 end
53
54 test "node with right subtree pointer" do
55 entry = %Entry{prefix_len: 0, key_suffix: "col/key", value: @cid_a, right: @cid_b}
56 node = %Node{left: nil, entries: [entry]}
57
58 assert {:ok, bytes} = Node.encode(node)
59 assert {:ok, decoded} = Node.decode(bytes)
60 assert hd(decoded.entries).right == @cid_b
61 end
62
63 test "node with multiple entries and prefix compression" do
64 # "app.bsky.feed.post/" is 19 bytes, so prefix_len for bbb/ccc is 19
65 entries = [
66 %Entry{prefix_len: 0, key_suffix: "app.bsky.feed.post/aaa", value: @cid_a, right: nil},
67 %Entry{prefix_len: 19, key_suffix: "bbb", value: @cid_b, right: nil},
68 %Entry{prefix_len: 19, key_suffix: "ccc", value: @cid_c, right: nil}
69 ]
70
71 node = %Node{left: nil, entries: entries}
72
73 assert {:ok, bytes} = Node.encode(node)
74 assert {:ok, decoded} = Node.decode(bytes)
75
76 assert Node.keys(decoded) == [
77 "app.bsky.feed.post/aaa",
78 "app.bsky.feed.post/bbb",
79 "app.bsky.feed.post/ccc"
80 ]
81 end
82
83 test "CID is stable across encode → decode → re-encode" do
84 entry = %Entry{prefix_len: 0, key_suffix: "col/key", value: @cid_a, right: nil}
85 node = %Node{left: nil, entries: [entry]}
86
87 assert {:ok, bytes1} = Node.encode(node)
88 assert {:ok, decoded} = Node.decode(bytes1)
89 assert {:ok, bytes2} = Node.encode(decoded)
90 assert bytes1 == bytes2
91 end
92
93 test "explicit null for nil left is required for determinism" do
94 # Two encodings of a node with left=nil must produce the same bytes
95 node1 = Node.empty()
96 node2 = Node.empty()
97 assert {:ok, bytes1} = Node.encode(node1)
98 assert {:ok, bytes2} = Node.encode(node2)
99 assert bytes1 == bytes2
100 end
101 end
102
103 describe "cid/1" do
104 test "returns a :drisl codec CID" do
105 assert {:ok, cid} = Node.cid(Node.empty())
106 assert cid.codec == :drisl
107 end
108
109 test "same node always produces the same CID" do
110 node = Node.empty()
111 assert {:ok, cid1} = Node.cid(node)
112 assert {:ok, cid2} = Node.cid(node)
113 assert cid1 == cid2
114 end
115
116 test "different nodes produce different CIDs" do
117 node_a = Node.empty()
118
119 entry = %Entry{prefix_len: 0, key_suffix: "col/key", value: @cid_a, right: nil}
120 node_b = %Node{left: nil, entries: [entry]}
121
122 assert {:ok, cid_a} = Node.cid(node_a)
123 assert {:ok, cid_b} = Node.cid(node_b)
124 assert cid_a != cid_b
125 end
126 end
127
128 describe "keys/1" do
129 test "empty node returns empty list" do
130 assert Node.keys(Node.empty()) == []
131 end
132
133 test "reconstructs full keys from prefix-compressed entries" do
134 entries = [
135 %Entry{prefix_len: 0, key_suffix: "foo/aaa", value: @cid_a, right: nil},
136 %Entry{prefix_len: 4, key_suffix: "bbb", value: @cid_b, right: nil},
137 %Entry{prefix_len: 4, key_suffix: "ccc", value: @cid_c, right: nil}
138 ]
139
140 node = %Node{left: nil, entries: entries}
141 assert Node.keys(node) == ["foo/aaa", "foo/bbb", "foo/ccc"]
142 end
143
144 test "first entry always has prefix_len 0" do
145 entry = %Entry{prefix_len: 0, key_suffix: "full/key", value: @cid_a, right: nil}
146 node = %Node{left: nil, entries: [entry]}
147 assert Node.keys(node) == ["full/key"]
148 end
149 end
150
151 describe "compress_entries/1" do
152 test "single entry has prefix_len 0" do
153 entries = Node.compress_entries([{"col/key", @cid_a, nil}])
154 assert hd(entries).prefix_len == 0
155 assert hd(entries).key_suffix == "col/key"
156 end
157
158 test "adjacent entries with common prefix are compressed" do
159 # "app.bsky.feed.post/" = 19 bytes shared; then 'a' vs 'b' diverge
160 entries =
161 Node.compress_entries([
162 {"app.bsky.feed.post/aaa", @cid_a, nil},
163 {"app.bsky.feed.post/bbb", @cid_b, nil}
164 ])
165
166 [e1, e2] = entries
167 assert e1.prefix_len == 0
168 assert e1.key_suffix == "app.bsky.feed.post/aaa"
169 assert e2.prefix_len == 19
170 assert e2.key_suffix == "bbb"
171 end
172
173 test "no shared prefix means prefix_len stays 0" do
174 entries = Node.compress_entries([{"aaa/x", @cid_a, nil}, {"zzz/y", @cid_b, nil}])
175 assert Enum.at(entries, 1).prefix_len == 0
176 end
177
178 test "compress then expand is identity" do
179 keys = ["col/aaa", "col/bbb", "col/ccc"]
180 triples = Enum.map(keys, fn k -> {k, @cid_a, nil} end)
181 entries = Node.compress_entries(triples)
182 node = %Node{left: nil, entries: entries}
183 assert Node.keys(node) == keys
184 end
185 end
186
187 describe "common_prefix.json interop" do
188 # The common prefix length between two keys is what determines prefix_len
189 # in the second of any two adjacent compress_entries inputs. We test via
190 # compress_entries since common_prefix_length/2 is private.
191 test "all fixture pairs produce correct prefix_len" do
192 cid = CID.compute("test", :raw)
193
194 for %{"left" => left, "right" => right, "len" => expected} <- @prefix_interop do
195 [_first, second] = Node.compress_entries([{left, cid, nil}, {right, cid, nil}])
196
197 assert second.prefix_len == expected,
198 "common_prefix(#{inspect(left)}, #{inspect(right)}) " <>
199 "expected #{expected}, got #{second.prefix_len}"
200 end
201 end
202 end
203
204 describe "decode/1 error cases" do
205 test "returns error for non-CBOR bytes" do
206 assert {:error, :decode, _} = Node.decode(<<0xFF, 0xFF, 0xFF>>)
207 end
208
209 test "returns error for trailing bytes" do
210 {:ok, bytes} = Node.encode(Node.empty())
211 assert {:error, :decode, :trailing_bytes} = Node.decode(bytes <> <<0x00>>)
212 end
213
214 test "returns error for invalid structure (not a map)" do
215 # CBOR-encode a plain integer
216 {:ok, not_a_map} = DASL.DRISL.encode(42)
217 assert {:error, :decode, _} = Node.decode(not_a_map)
218 end
219 end
220end