An Elixir implementation of AT Protocol-flavoured Merkle Search Trees (MST)
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

feat: implementation and tests

+3269 -12
+228 -7
lib/mst.ex
··· 1 1 defmodule MST do 2 2 @moduledoc """ 3 - Documentation for `MST`. 3 + AT Protocol-flavoured Merkle Search Tree (MST) for Elixir. 4 + 5 + An MST is a content-addressed, deterministic key/value tree where keys are 6 + byte arrays and values are `DASL.CID` links. The tree structure is fully 7 + determined by the current set of key/value pairs — equal content always 8 + produces the same root CID, making it suitable for Merkle proofs and 9 + efficient diffs. 10 + 11 + This library implements the AT Protocol MST specification but is designed to 12 + be generic: it makes no assumptions about repository structure, commit 13 + objects, or AT-URI paths. 14 + 15 + ## Quick start 16 + 17 + store = MST.Store.Memory.new() 18 + tree = MST.new(store) 19 + 20 + val = DASL.CID.compute("my record") 21 + {:ok, tree} = MST.put(tree, "collection/key", val) 22 + {:ok, ^val} = MST.get(tree, "collection/key") 23 + 24 + {:ok, tree} = MST.delete(tree, "collection/key") 25 + 26 + ## Loading from a CAR file 27 + 28 + {:ok, tree} = MST.from_car(File.read!("repo.car")) 29 + {:ok, binary} = MST.to_car(tree) 30 + 31 + ## Diffing two trees 32 + 33 + {:ok, diff} = MST.diff(tree_a, tree_b) 34 + # diff.record_ops — sorted list of MST.Diff.Op structs 35 + 36 + ## Key depth 37 + 38 + The MST height of a key is derived by SHA-256 hashing it and counting 39 + leading zero bits divided by 2 (floor), giving a fanout of 4. 40 + 41 + 0 = MST.key_height("2653ae71") 42 + 1 = MST.key_height("blue") 43 + 44 + Spec: https://atproto.com/specs/repository#mst-structure 45 + """ 46 + 47 + alias MST.{CAR, Diff, Store, Tree} 48 + alias DASL.CID 49 + 50 + # --------------------------------------------------------------------------- 51 + # Construction 52 + # --------------------------------------------------------------------------- 53 + 54 + @doc """ 55 + Returns a new empty tree backed by an `MST.Store.Memory`. 56 + 57 + Pass an explicit store to use a different backend: 58 + 59 + tree = MST.new(MST.Store.Memory.new()) 60 + 61 + ## Examples 62 + 63 + iex> tree = MST.new() 64 + iex> tree.root 65 + nil 66 + 67 + """ 68 + @spec new() :: Tree.t() 69 + def new, do: Tree.new(Store.Memory.new()) 70 + 71 + @doc """ 72 + Returns a new empty tree backed by the given store. 73 + 74 + ## Examples 75 + 76 + iex> tree = MST.new(MST.Store.Memory.new()) 77 + iex> tree.root 78 + nil 79 + 80 + """ 81 + @spec new(Store.t()) :: Tree.t() 82 + def new(store), do: Tree.new(store) 83 + 84 + # --------------------------------------------------------------------------- 85 + # Lookup / mutation 86 + # --------------------------------------------------------------------------- 87 + 88 + @doc """ 89 + Looks up `key` in the tree. 90 + 91 + ## Examples 92 + 93 + iex> tree = MST.new() 94 + iex> MST.get(tree, "col/k") 95 + {:error, :not_found} 96 + 97 + """ 98 + @spec get(Tree.t(), binary()) :: {:ok, CID.t()} | {:error, :not_found} | {:error, atom()} 99 + defdelegate get(tree, key), to: Tree 100 + 101 + @doc """ 102 + Inserts or updates `key` → `value`. Returns `{:ok, new_tree}`. 103 + 104 + ## Examples 105 + 106 + iex> tree = MST.new() 107 + iex> val = DASL.CID.compute("data") 108 + iex> {:ok, tree} = MST.put(tree, "col/k", val) 109 + iex> MST.get(tree, "col/k") 110 + {:ok, val} 111 + 112 + """ 113 + @spec put(Tree.t(), binary(), CID.t()) :: {:ok, Tree.t()} | {:error, atom()} 114 + defdelegate put(tree, key, value), to: Tree 115 + 116 + @doc """ 117 + Removes `key` from the tree. Returns `{:ok, new_tree}` or 118 + `{:error, :not_found}`. 119 + 120 + ## Examples 121 + 122 + iex> tree = MST.new() 123 + iex> val = DASL.CID.compute("data") 124 + iex> {:ok, tree} = MST.put(tree, "col/k", val) 125 + iex> {:ok, tree} = MST.delete(tree, "col/k") 126 + iex> MST.get(tree, "col/k") 127 + {:error, :not_found} 128 + 129 + """ 130 + @spec delete(Tree.t(), binary()) :: {:ok, Tree.t()} | {:error, :not_found | atom()} 131 + defdelegate delete(tree, key), to: Tree 132 + 133 + @doc """ 134 + Returns all key-value pairs in sorted order. 135 + """ 136 + @spec to_list(Tree.t()) :: {:ok, [{binary(), CID.t()}]} | {:error, atom()} 137 + defdelegate to_list(tree), to: Tree 138 + 139 + @doc """ 140 + Returns a lazy stream of `{key, value_cid}` pairs in sorted order. 141 + """ 142 + @spec stream(Tree.t()) :: Enumerable.t() 143 + defdelegate stream(tree), to: Tree 144 + 145 + @doc """ 146 + Returns the number of key-value pairs in the tree. 147 + """ 148 + @spec length(Tree.t()) :: {:ok, non_neg_integer()} | {:error, atom()} 149 + defdelegate length(tree), to: Tree 150 + 151 + # --------------------------------------------------------------------------- 152 + # CAR I/O 153 + # --------------------------------------------------------------------------- 154 + 155 + @doc """ 156 + Loads an MST from a CAR-encoded binary or an already-decoded `DASL.CAR` struct. 157 + 158 + When given a binary, it is decoded via `DASL.CAR.decode/2` first. When given 159 + a `%DASL.CAR{}` struct the decoding step is skipped entirely, which avoids a 160 + redundant encode/decode cycle when you already hold the struct in memory. 161 + 162 + Accepts the same options as `DASL.CAR.decode/2` (`verify: boolean`) when 163 + called with a binary; options are ignored for the struct variant. 164 + 165 + ## Examples 166 + 167 + iex> tree = MST.new() 168 + iex> val = DASL.CID.compute("x") 169 + iex> {:ok, tree} = MST.put(tree, "col/a", val) 170 + iex> {:ok, bin} = MST.to_car(tree) 171 + iex> {:ok, tree2} = MST.from_car(bin) 172 + iex> MST.get(tree2, "col/a") 173 + {:ok, val} 174 + 175 + iex> tree = MST.new() 176 + iex> val = DASL.CID.compute("x") 177 + iex> {:ok, tree} = MST.put(tree, "col/a", val) 178 + iex> {:ok, bin} = MST.to_car(tree) 179 + iex> {:ok, car} = DASL.CAR.decode(bin) 180 + iex> {:ok, tree2} = MST.from_car(car) 181 + iex> MST.get(tree2, "col/a") 182 + {:ok, val} 183 + 4 184 """ 185 + @spec from_car(binary() | DASL.CAR.t(), keyword()) :: {:ok, Tree.t()} | {:error, atom()} 186 + def from_car(input, opts \\ []) 187 + def from_car(%DASL.CAR{} = car, _opts), do: CAR.from_car(car) 188 + def from_car(binary, opts) when is_binary(binary), do: CAR.from_binary(binary, opts) 5 189 6 190 @doc """ 7 - Hello world. 191 + Serialises an `MST.Tree` to a CAR-encoded binary. 192 + """ 193 + @spec to_car(Tree.t(), keyword()) :: {:ok, binary()} | {:error, atom()} 194 + defdelegate to_car(tree, opts \\ []), to: CAR, as: :to_binary 195 + 196 + # --------------------------------------------------------------------------- 197 + # Diff 198 + # --------------------------------------------------------------------------- 199 + 200 + @doc """ 201 + Computes the diff from `tree_a` to `tree_b`. 202 + 203 + Returns an `MST.Diff` with `created_nodes`, `deleted_nodes`, and 204 + `record_ops` sorted by key. 8 205 9 206 ## Examples 10 207 11 - iex> MST.hello() 12 - :world 208 + iex> tree_a = MST.new() 209 + iex> val = DASL.CID.compute("v") 210 + iex> {:ok, tree_b} = MST.put(tree_a, "col/a", val) 211 + iex> {:ok, diff} = MST.diff(tree_a, tree_b) 212 + iex> length(diff.record_ops) 213 + 1 13 214 14 215 """ 15 - def hello do 16 - :world 17 - end 216 + @spec diff(Tree.t(), Tree.t()) :: {:ok, Diff.t()} | {:error, atom()} 217 + defdelegate diff(tree_a, tree_b), to: Diff, as: :compute 218 + 219 + # --------------------------------------------------------------------------- 220 + # Utilities 221 + # --------------------------------------------------------------------------- 222 + 223 + @doc """ 224 + Returns the MST depth for a key. 225 + 226 + SHA-256 hashes `key` and counts leading zero bits divided by 2 (floor). 227 + 228 + ## Examples 229 + 230 + iex> MST.key_height("2653ae71") 231 + 0 232 + 233 + iex> MST.key_height("blue") 234 + 1 235 + 236 + """ 237 + @spec key_height(binary()) :: non_neg_integer() 238 + defdelegate key_height(key), to: MST.Height, as: :for_key 18 239 end
+290
lib/mst/car.ex
··· 1 + defmodule MST.CAR do 2 + @moduledoc """ 3 + Bridges `MST.Tree` with the DASL CAR file format. 4 + 5 + Provides functions to load an MST from a CAR binary or stream, and to export 6 + an MST back to CAR format. The CAR header's first root CID is treated as the 7 + MST root; any additional roots are ignored. 8 + 9 + MST node blocks (DAG-CBOR codec, `:drisl`) are decoded into `MST.Node` 10 + structs and stored in an `MST.Store.Memory`. Non-MST blocks (e.g. record 11 + data with the `:raw` codec) are ignored during import — the store only holds 12 + MST structural nodes. 13 + 14 + ## Example 15 + 16 + {:ok, tree} = MST.CAR.from_binary(File.read!("repo.car")) 17 + {:ok, pairs} = MST.Tree.to_list(tree) 18 + 19 + """ 20 + 21 + alias DASL.{CAR, CID} 22 + alias MST.{Node, Store, Tree} 23 + 24 + @type car_error() :: 25 + {:error, :header, atom()} 26 + | {:error, :block, atom()} 27 + | {:error, atom()} 28 + 29 + # --------------------------------------------------------------------------- 30 + # Import 31 + # --------------------------------------------------------------------------- 32 + 33 + @doc """ 34 + Loads an MST from an already-decoded `DASL.CAR` struct. 35 + 36 + Populates an `MST.Store.Memory` from the struct's blocks map and returns an 37 + `MST.Tree` rooted at the CAR's first root CID. Use this when you already hold 38 + a `%DASL.CAR{}` in memory and want to avoid a redundant encode/decode cycle. 39 + 40 + ## Examples 41 + 42 + iex> store = MST.Store.Memory.new() 43 + iex> tree = MST.Tree.new(store) 44 + iex> val = DASL.CID.compute("data") 45 + iex> {:ok, tree} = MST.Tree.put(tree, "col/key", val) 46 + iex> {:ok, binary} = MST.CAR.to_binary(tree) 47 + iex> {:ok, car} = DASL.CAR.decode(binary) 48 + iex> {:ok, tree2} = MST.CAR.from_car(car) 49 + iex> MST.Tree.get(tree2, "col/key") 50 + {:ok, val} 51 + 52 + """ 53 + @spec from_car(CAR.t()) :: {:ok, Tree.t()} | car_error() 54 + def from_car(%CAR{roots: roots, blocks: blocks}), do: build_tree(roots, blocks) 55 + 56 + @doc """ 57 + Loads an MST from a CAR-encoded binary. 58 + 59 + Decodes all blocks, populates an `MST.Store.Memory` with MST nodes (DAG-CBOR 60 + codec), and returns an `MST.Tree` rooted at the CAR's first root CID. 61 + 62 + Accepts the same options as `DASL.CAR.decode/2` (`verify: boolean`). 63 + 64 + ## Examples 65 + 66 + iex> store = MST.Store.Memory.new() 67 + iex> tree = MST.Tree.new(store) 68 + iex> val = DASL.CID.compute("data") 69 + iex> {:ok, tree} = MST.Tree.put(tree, "col/key", val) 70 + iex> {:ok, binary} = MST.CAR.to_binary(tree) 71 + iex> {:ok, tree2} = MST.CAR.from_binary(binary) 72 + iex> MST.Tree.get(tree2, "col/key") 73 + {:ok, val} 74 + 75 + """ 76 + @spec from_binary(binary(), keyword()) :: {:ok, Tree.t()} | car_error() 77 + def from_binary(binary, opts \\ []) when is_binary(binary) do 78 + try do 79 + with {:ok, car} <- CAR.decode(binary, opts) do 80 + build_tree(car.roots, car.blocks) 81 + end 82 + rescue 83 + e in ArgumentError -> {:error, :header, {:invalid_binary, e.message}} 84 + end 85 + end 86 + 87 + @doc """ 88 + Loads an MST from a CAR stream (an `Enumerable` of binary chunks). 89 + 90 + Streams blocks through `DASL.CAR.stream_decode/2`, populating an 91 + `MST.Store.Memory` incrementally. Useful for large files where you want to 92 + avoid loading the full binary into memory at once. Converts stream raises 93 + to error tuples. 94 + 95 + ## Options 96 + 97 + - `:verify` — verify CID digests of incoming blocks (default: `true`) 98 + 99 + ## Examples 100 + 101 + iex> store = MST.Store.Memory.new() 102 + iex> tree = MST.Tree.new(store) 103 + iex> val = DASL.CID.compute("data") 104 + iex> {:ok, tree} = MST.Tree.put(tree, "col/key", val) 105 + iex> {:ok, binary} = MST.CAR.to_binary(tree) 106 + iex> chunk_stream = [binary] 107 + iex> {:ok, tree2} = MST.CAR.from_stream(chunk_stream) 108 + iex> MST.Tree.get(tree2, "col/key") 109 + {:ok, val} 110 + 111 + """ 112 + @spec from_stream(Enumerable.t(), keyword()) :: {:ok, Tree.t()} | car_error() 113 + def from_stream(stream, opts \\ []) do 114 + try do 115 + {roots, blocks} = 116 + stream 117 + |> CAR.stream_decode(opts) 118 + |> Enum.reduce({nil, %{}}, fn 119 + {:header, _version, roots}, {_roots, blocks} -> 120 + {roots, blocks} 121 + 122 + {:block, cid, data}, {roots, blocks} -> 123 + {roots, Map.put(blocks, cid, data)} 124 + end) 125 + 126 + build_tree(roots || [], blocks) 127 + rescue 128 + e in RuntimeError -> {:error, {:stream_decode, e.message}} 129 + end 130 + end 131 + 132 + # --------------------------------------------------------------------------- 133 + # Export 134 + # --------------------------------------------------------------------------- 135 + 136 + @doc """ 137 + Serialises an `MST.Tree` to a CAR-encoded binary. 138 + 139 + Collects all reachable MST node blocks and wraps them in a CARv1 file with 140 + the tree root as the sole header root. 141 + 142 + ## Examples 143 + 144 + iex> store = MST.Store.Memory.new() 145 + iex> tree = MST.Tree.new(store) 146 + iex> val = DASL.CID.compute("data") 147 + iex> {:ok, tree} = MST.Tree.put(tree, "col/key", val) 148 + iex> {:ok, binary} = MST.CAR.to_binary(tree) 149 + iex> is_binary(binary) 150 + true 151 + 152 + """ 153 + @spec to_binary(Tree.t()) :: {:ok, binary()} | car_error() 154 + def to_binary(tree), do: to_binary(tree, []) 155 + 156 + @doc false 157 + @spec to_binary(Tree.t(), keyword()) :: {:ok, binary()} | car_error() 158 + def to_binary(%Tree{root: nil}, _opts) do 159 + # Empty tree — emit a CAR with an empty node as root 160 + empty_node = Node.empty() 161 + 162 + with {:ok, bytes} <- Node.encode(empty_node) do 163 + cid = CID.compute(bytes, :drisl) 164 + 165 + car = %CAR{ 166 + version: 1, 167 + roots: [cid], 168 + blocks: %{cid => bytes} 169 + } 170 + 171 + CAR.encode(car) 172 + else 173 + {:error, :encode, reason} -> {:error, reason} 174 + end 175 + end 176 + 177 + def to_binary(%Tree{root: root} = tree, opts) do 178 + with {:ok, blocks} <- Tree.collect_blocks(tree) do 179 + car = %CAR{ 180 + version: 1, 181 + roots: [root], 182 + blocks: blocks 183 + } 184 + 185 + CAR.encode(car, opts) 186 + end 187 + end 188 + 189 + @doc """ 190 + Returns a stream of `DASL.CAR` stream items for the tree in pre-order 191 + (root first, then depth-first left-to-right). 192 + 193 + Emits `{:header, 1, [root_cid]}` followed by `{:block, cid, bytes}` for 194 + each reachable MST node. 195 + 196 + This stream can be piped into a custom CAR writer. It does **not** produce 197 + a fully-encoded CAR binary — use `to_binary/2` for that. 198 + 199 + """ 200 + @spec to_stream(Tree.t()) :: Enumerable.t() 201 + def to_stream(%Tree{root: nil}) do 202 + empty_node = Node.empty() 203 + {:ok, bytes} = Node.encode(empty_node) 204 + cid = CID.compute(bytes, :drisl) 205 + 206 + [ 207 + {:header, 1, [cid]}, 208 + {:block, cid, bytes} 209 + ] 210 + end 211 + 212 + def to_stream(%Tree{root: root, store: store}) do 213 + header = [{:header, 1, [root]}] 214 + blocks = preorder_stream(store, root) 215 + Stream.concat(header, blocks) 216 + end 217 + 218 + # --------------------------------------------------------------------------- 219 + # Private — tree construction from decoded blocks 220 + # --------------------------------------------------------------------------- 221 + 222 + @spec build_tree([CID.t()], %{CID.t() => binary()}) :: {:ok, Tree.t()} | car_error() 223 + defp build_tree([], _blocks), do: {:ok, Tree.new(Store.Memory.new())} 224 + 225 + defp build_tree([root | _], blocks) do 226 + # Decode all DAG-CBOR blocks into MST nodes; ignore raw-codec blocks. 227 + result = 228 + Enum.reduce_while(blocks, {:ok, Store.Memory.new()}, fn {cid, data}, {:ok, store} -> 229 + case decode_block(cid, data) do 230 + {:ok, node} -> 231 + {:cont, {:ok, Store.put(store, cid, node)}} 232 + 233 + :skip -> 234 + {:cont, {:ok, store}} 235 + 236 + {:error, _} = err -> 237 + {:halt, err} 238 + end 239 + end) 240 + 241 + case result do 242 + {:ok, store} -> {:ok, Tree.from_root(root, store)} 243 + err -> err 244 + end 245 + end 246 + 247 + @spec decode_block(CID.t(), binary()) :: {:ok, Node.t()} | :skip | {:error, atom()} 248 + defp decode_block(%CID{codec: :raw}, _data), do: :skip 249 + 250 + defp decode_block(%CID{codec: :drisl}, data) do 251 + case Node.decode(data) do 252 + {:ok, node} -> {:ok, node} 253 + {:error, :decode, reason} -> {:error, reason} 254 + end 255 + end 256 + 257 + # --------------------------------------------------------------------------- 258 + # Private — pre-order DFS stream 259 + # --------------------------------------------------------------------------- 260 + 261 + @spec preorder_stream(Store.t(), CID.t()) :: Enumerable.t() 262 + defp preorder_stream(store, root) do 263 + Stream.resource( 264 + fn -> [root] end, 265 + fn 266 + [] -> 267 + {:halt, []} 268 + 269 + [cid | rest] -> 270 + case Store.get(store, cid) do 271 + {:error, :not_found} -> 272 + raise "MST.CAR.to_stream/1: node not found: #{CID.encode(cid)}" 273 + 274 + {:ok, node} -> 275 + {:ok, bytes} = Node.encode(node) 276 + children = subtree_cids(node) 277 + {[{:block, cid, bytes}], children ++ rest} 278 + end 279 + end, 280 + fn _ -> :ok end 281 + ) 282 + end 283 + 284 + @spec subtree_cids(Node.t()) :: [CID.t()] 285 + defp subtree_cids(node) do 286 + left = if node.left, do: [node.left], else: [] 287 + rights = Enum.flat_map(node.entries, fn e -> if e.right, do: [e.right], else: [] end) 288 + left ++ rights 289 + end 290 + end
+214
lib/mst/diff.ex
··· 1 + defmodule MST.Diff do 2 + @moduledoc """ 3 + Computes the diff between two `MST.Tree` instances. 4 + 5 + A diff captures: 6 + 7 + - Which MST nodes were **created** (present in `b` but not `a`) 8 + - Which MST nodes were **deleted** (present in `a` but not `b`) 9 + - The per-key **record operations** (creates, updates, and deletes) 10 + 11 + ## Algorithm 12 + 13 + Node sets (`created_nodes` / `deleted_nodes`) are computed by collecting all 14 + reachable node CIDs from each tree root with a DFS, then taking set 15 + differences. Equal CIDs short-circuit entire subtrees (no need to recurse 16 + into subtrees both trees share). 17 + 18 + Record ops are computed by fully materialising both trees as sorted key/value 19 + lists and performing a linear sorted-merge comparison. This is straightforward 20 + and correct at the cost of O(n) memory; it is the right tradeoff given that 21 + the diff is typically used to inspect the full changeset anyway. 22 + 23 + ## Example 24 + 25 + {:ok, diff} = MST.Diff.compute(tree_a, tree_b) 26 + # diff.record_ops is a sorted list of MST.Diff.Op structs 27 + 28 + """ 29 + 30 + use TypedStruct 31 + 32 + alias DASL.CID 33 + alias MST.{Node, Store, Tree} 34 + 35 + @type diff_error() :: {:error, atom()} 36 + 37 + typedstruct enforce: true do 38 + field :created_nodes, MapSet.t(CID.t()), default: MapSet.new() 39 + field :deleted_nodes, MapSet.t(CID.t()), default: MapSet.new() 40 + field :record_ops, [MST.Diff.Op.t()], default: [] 41 + end 42 + 43 + # --------------------------------------------------------------------------- 44 + # Public API 45 + # --------------------------------------------------------------------------- 46 + 47 + @doc """ 48 + Computes the diff from `tree_a` to `tree_b`. 49 + 50 + Both trees must use stores that have their nodes populated (e.g. loaded from 51 + CAR files). Returns `{:ok, diff}` or an error if a node is missing. 52 + 53 + ## Examples 54 + 55 + iex> store = MST.Store.Memory.new() 56 + iex> ta = MST.Tree.new(store) 57 + iex> val = DASL.CID.compute("v") 58 + iex> {:ok, tb} = MST.Tree.put(ta, "col/a", val) 59 + iex> {:ok, diff} = MST.Diff.compute(ta, tb) 60 + iex> length(diff.record_ops) 61 + 1 62 + iex> hd(diff.record_ops).key 63 + "col/a" 64 + 65 + """ 66 + @spec compute(Tree.t(), Tree.t()) :: {:ok, t()} | diff_error() 67 + def compute(%Tree{root: root_a, store: store_a}, %Tree{root: root_b, store: store_b}) do 68 + with {:ok, nodes_a} <- reachable_nodes(store_a, root_a), 69 + {:ok, nodes_b} <- reachable_nodes(store_b, root_b), 70 + {:ok, leaves_a} <- collect_leaves(store_a, root_a), 71 + {:ok, leaves_b} <- collect_leaves(store_b, root_b) do 72 + ops = merge_ops(leaves_a, leaves_b, []) 73 + 74 + {:ok, 75 + %__MODULE__{ 76 + created_nodes: MapSet.difference(nodes_b, nodes_a), 77 + deleted_nodes: MapSet.difference(nodes_a, nodes_b), 78 + record_ops: ops 79 + }} 80 + end 81 + end 82 + 83 + # --------------------------------------------------------------------------- 84 + # Private — reachable node collection 85 + # --------------------------------------------------------------------------- 86 + 87 + @spec reachable_nodes(Store.t(), CID.t() | nil) :: {:ok, MapSet.t(CID.t())} | diff_error() 88 + defp reachable_nodes(_store, nil), do: {:ok, MapSet.new()} 89 + defp reachable_nodes(store, root), do: collect_nodes(store, root, MapSet.new()) 90 + 91 + @spec collect_nodes(Store.t(), CID.t(), MapSet.t(CID.t())) :: 92 + {:ok, MapSet.t(CID.t())} | diff_error() 93 + defp collect_nodes(store, cid, visited) do 94 + if MapSet.member?(visited, cid) do 95 + {:ok, visited} 96 + else 97 + with {:ok, node} <- fetch(store, cid) do 98 + visited = MapSet.put(visited, cid) 99 + 100 + Enum.reduce_while(subtree_cids(node), {:ok, visited}, fn sub, {:ok, v} -> 101 + case collect_nodes(store, sub, v) do 102 + {:ok, v} -> {:cont, {:ok, v}} 103 + err -> {:halt, err} 104 + end 105 + end) 106 + end 107 + end 108 + end 109 + 110 + @spec subtree_cids(Node.t()) :: [CID.t()] 111 + defp subtree_cids(node) do 112 + left = if node.left, do: [node.left], else: [] 113 + rights = Enum.flat_map(node.entries, fn e -> if e.right, do: [e.right], else: [] end) 114 + left ++ rights 115 + end 116 + 117 + # --------------------------------------------------------------------------- 118 + # Private — leaf collection (in sorted order) 119 + # --------------------------------------------------------------------------- 120 + 121 + @spec collect_leaves(Store.t(), CID.t() | nil) :: 122 + {:ok, [{binary(), CID.t()}]} | diff_error() 123 + defp collect_leaves(_store, nil), do: {:ok, []} 124 + 125 + defp collect_leaves(store, root) do 126 + with {:ok, pairs} <- do_walk(store, root, []) do 127 + {:ok, Enum.reverse(pairs)} 128 + end 129 + end 130 + 131 + # Accumulates pairs in reverse order (prepend for efficiency, reverse at end). 132 + @spec do_walk(Store.t(), CID.t(), [{binary(), CID.t()}]) :: 133 + {:ok, [{binary(), CID.t()}]} | diff_error() 134 + defp do_walk(store, cid, acc) do 135 + with {:ok, node} <- fetch(store, cid) do 136 + full_keys = Node.keys(node) 137 + do_walk_left(store, node, full_keys, acc) 138 + end 139 + end 140 + 141 + @spec do_walk_left(Store.t(), Node.t(), [binary()], [{binary(), CID.t()}]) :: 142 + {:ok, [{binary(), CID.t()}]} | diff_error() 143 + defp do_walk_left(store, node, full_keys, acc) do 144 + with {:ok, acc} <- maybe_do_walk(store, node.left, acc) do 145 + do_walk_entries(store, node.entries, full_keys, acc) 146 + end 147 + end 148 + 149 + defp maybe_do_walk(_store, nil, acc), do: {:ok, acc} 150 + defp maybe_do_walk(store, cid, acc), do: do_walk(store, cid, acc) 151 + 152 + defp do_walk_entries(_store, [], [], acc), do: {:ok, acc} 153 + 154 + defp do_walk_entries(store, [entry | rest_e], [key | rest_k], acc) do 155 + acc = [{key, entry.value} | acc] 156 + 157 + with {:ok, acc} <- maybe_do_walk(store, entry.right, acc) do 158 + do_walk_entries(store, rest_e, rest_k, acc) 159 + end 160 + end 161 + 162 + # --------------------------------------------------------------------------- 163 + # Private — sorted-merge diff 164 + # --------------------------------------------------------------------------- 165 + 166 + @spec merge_ops( 167 + [{binary(), CID.t()}], 168 + [{binary(), CID.t()}], 169 + [MST.Diff.Op.t()] 170 + ) :: [MST.Diff.Op.t()] 171 + defp merge_ops([], [], ops), do: Enum.reverse(ops) 172 + 173 + defp merge_ops([], [{kb, vb} | rest_b], ops) do 174 + op = %MST.Diff.Op{key: kb, old_value: nil, new_value: vb} 175 + merge_ops([], rest_b, [op | ops]) 176 + end 177 + 178 + defp merge_ops([{ka, va} | rest_a], [], ops) do 179 + op = %MST.Diff.Op{key: ka, old_value: va, new_value: nil} 180 + merge_ops(rest_a, [], [op | ops]) 181 + end 182 + 183 + defp merge_ops([{ka, va} | rest_a], [{kb, vb} | rest_b], ops) do 184 + cond do 185 + ka == kb -> 186 + new_ops = 187 + if va == vb, 188 + do: ops, 189 + else: [%MST.Diff.Op{key: ka, old_value: va, new_value: vb} | ops] 190 + 191 + merge_ops(rest_a, rest_b, new_ops) 192 + 193 + ka < kb -> 194 + op = %MST.Diff.Op{key: ka, old_value: va, new_value: nil} 195 + merge_ops(rest_a, [{kb, vb} | rest_b], [op | ops]) 196 + 197 + true -> 198 + op = %MST.Diff.Op{key: kb, old_value: nil, new_value: vb} 199 + merge_ops([{ka, va} | rest_a], rest_b, [op | ops]) 200 + end 201 + end 202 + 203 + # --------------------------------------------------------------------------- 204 + # Private — store access 205 + # --------------------------------------------------------------------------- 206 + 207 + @spec fetch(Store.t(), CID.t()) :: {:ok, Node.t()} | diff_error() 208 + defp fetch(store, cid) do 209 + case Store.get(store, cid) do 210 + {:ok, node} -> {:ok, node} 211 + {:error, :not_found} -> {:error, :missing_node} 212 + end 213 + end 214 + end
+19
lib/mst/diff/op.ex
··· 1 + defmodule MST.Diff.Op do 2 + @moduledoc """ 3 + A single key-level operation produced by `MST.Diff.compute/2`. 4 + 5 + - `old_value: nil, new_value: cid` — create 6 + - `old_value: cid, new_value: cid` — update 7 + - `old_value: cid, new_value: nil` — delete 8 + """ 9 + 10 + use TypedStruct 11 + 12 + alias DASL.CID 13 + 14 + typedstruct enforce: true do 15 + field :key, binary() 16 + field :old_value, CID.t() | nil 17 + field :new_value, CID.t() | nil 18 + end 19 + end
+71
lib/mst/height.ex
··· 1 + defmodule MST.Height do 2 + @moduledoc """ 3 + Key-depth computation for the AT Protocol Merkle Search Tree. 4 + 5 + Each key's depth (also called "layer" or "height") is derived by SHA-256 6 + hashing the key and counting the number of leading zero bits, divided by two 7 + (rounding down). This gives a theoretical fanout of 4: each additional level 8 + of depth is four times rarer than the previous. 9 + 10 + Spec: https://atproto.com/specs/repository#mst-structure 11 + """ 12 + 13 + @doc """ 14 + Computes the MST depth for a given key. 15 + 16 + SHA-256 hashes `key` and counts the number of leading zero bits in the 17 + binary output, then divides by 2 (floor). Returns a non-negative integer; 18 + depth 0 is the most common (probability ~75% per key), each higher depth 19 + is four times rarer. 20 + 21 + ## Examples 22 + 23 + iex> MST.Height.for_key("2653ae71") 24 + 0 25 + 26 + iex> MST.Height.for_key("blue") 27 + 1 28 + 29 + iex> MST.Height.for_key("app.bsky.feed.post/454397e440ec") 30 + 4 31 + 32 + iex> MST.Height.for_key("app.bsky.feed.post/9adeb165882c") 33 + 8 34 + 35 + """ 36 + @spec for_key(binary()) :: non_neg_integer() 37 + def for_key(key) when is_binary(key) do 38 + :crypto.hash(:sha256, key) 39 + |> leading_zero_bits() 40 + |> div(2) 41 + end 42 + 43 + # --------------------------------------------------------------------------- 44 + # Private helpers 45 + # --------------------------------------------------------------------------- 46 + 47 + @spec leading_zero_bits(binary()) :: non_neg_integer() 48 + defp leading_zero_bits(<<>>), do: 0 49 + 50 + defp leading_zero_bits(<<byte, rest::binary>>) do 51 + lz = leading_zeros_in_byte(byte) 52 + 53 + if lz == 8 do 54 + 8 + leading_zero_bits(rest) 55 + else 56 + lz 57 + end 58 + end 59 + 60 + # Returns the count of leading zero bits in a single byte (0–8). 61 + @spec leading_zeros_in_byte(byte()) :: 0..8 62 + defp leading_zeros_in_byte(0), do: 8 63 + defp leading_zeros_in_byte(b) when b >= 128, do: 0 64 + defp leading_zeros_in_byte(b) when b >= 64, do: 1 65 + defp leading_zeros_in_byte(b) when b >= 32, do: 2 66 + defp leading_zeros_in_byte(b) when b >= 16, do: 3 67 + defp leading_zeros_in_byte(b) when b >= 8, do: 4 68 + defp leading_zeros_in_byte(b) when b >= 4, do: 5 69 + defp leading_zeros_in_byte(b) when b >= 2, do: 6 70 + defp leading_zeros_in_byte(_), do: 7 71 + end
+282
lib/mst/node.ex
··· 1 + defmodule MST.Node do 2 + @moduledoc """ 3 + Wire-format representation of a single MST node, plus encode/decode. 4 + 5 + An MST node holds an optional left subtree CID (`left`) and an ordered list 6 + of `MST.Node.Entry` values, each carrying a key suffix, a value CID, and an 7 + optional right subtree CID. This maps exactly to the AT Protocol node schema: 8 + 9 + { l: CID | null, e: [ { p, k, v, t } ] } 10 + 11 + Keys inside a node are prefix-compressed: each entry's `key_suffix` is the 12 + portion of the full key that follows the bytes it shares with the previous 13 + entry's full key. The first entry always has `prefix_len: 0` and carries its 14 + full key in `key_suffix`. Prefix compression is mandatory — the serialised 15 + form must be deterministic across implementations. 16 + 17 + Spec: https://atproto.com/specs/repository#mst-structure 18 + """ 19 + 20 + use TypedStruct 21 + 22 + alias DASL.{CID, DRISL} 23 + alias MST.Node.Entry 24 + 25 + @type encode_error() :: {:error, :encode, atom()} 26 + @type decode_error() :: {:error, :decode, atom()} 27 + 28 + typedstruct enforce: true do 29 + field :left, CID.t() | nil 30 + field :entries, [Entry.t()], default: [] 31 + end 32 + 33 + # --------------------------------------------------------------------------- 34 + # Construction helpers 35 + # --------------------------------------------------------------------------- 36 + 37 + @doc """ 38 + Returns an empty MST node — the only valid representation of an empty tree. 39 + 40 + ## Examples 41 + 42 + iex> MST.Node.empty() 43 + %MST.Node{left: nil, entries: []} 44 + 45 + """ 46 + @spec empty() :: t() 47 + def empty, do: %__MODULE__{left: nil, entries: []} 48 + 49 + # --------------------------------------------------------------------------- 50 + # Key expansion 51 + # --------------------------------------------------------------------------- 52 + 53 + @doc """ 54 + Reconstructs the full keys for all entries in the node. 55 + 56 + Each entry stores only the suffix of its key relative to the previous entry. 57 + This function walks the entry list and accumulates the full key for each. 58 + 59 + ## Examples 60 + 61 + iex> cid = DASL.CID.compute("a") 62 + iex> entries = [ 63 + ...> %MST.Node.Entry{prefix_len: 0, key_suffix: "foo/bar", value: cid, right: nil}, 64 + ...> %MST.Node.Entry{prefix_len: 4, key_suffix: "baz", value: cid, right: nil}, 65 + ...> ] 66 + iex> MST.Node.keys(%MST.Node{left: nil, entries: entries}) 67 + ["foo/bar", "foo/baz"] 68 + 69 + """ 70 + @spec keys(t()) :: [binary()] 71 + def keys(%__MODULE__{entries: entries}), do: expand_keys(entries, "", []) 72 + 73 + # --------------------------------------------------------------------------- 74 + # CID computation 75 + # --------------------------------------------------------------------------- 76 + 77 + @doc """ 78 + Computes the `:drisl`-codec CID for this node. 79 + 80 + Encodes the node to DRISL CBOR bytes and hashes them. Returns an error tuple 81 + if encoding fails. 82 + 83 + ## Examples 84 + 85 + iex> {:ok, cid} = MST.Node.cid(MST.Node.empty()) 86 + iex> cid.codec 87 + :drisl 88 + 89 + """ 90 + @spec cid(t()) :: {:ok, CID.t()} | encode_error() 91 + def cid(node) do 92 + with {:ok, bytes} <- encode(node) do 93 + {:ok, CID.compute(bytes, :drisl)} 94 + end 95 + end 96 + 97 + # --------------------------------------------------------------------------- 98 + # Encoding 99 + # --------------------------------------------------------------------------- 100 + 101 + @doc """ 102 + Encodes an `MST.Node` to DRISL CBOR bytes. 103 + 104 + `nil` subtree links are serialised as explicit CBOR `null` — this is 105 + mandatory for cross-implementation CID compatibility: skipping a key vs. 106 + writing `null` produces different bytes and therefore a different CID. 107 + 108 + ## Examples 109 + 110 + iex> {:ok, bytes} = MST.Node.encode(MST.Node.empty()) 111 + iex> is_binary(bytes) 112 + true 113 + 114 + """ 115 + @spec encode(t()) :: {:ok, binary()} | encode_error() 116 + def encode(%__MODULE__{left: left, entries: entries}) do 117 + with {:ok, entry_maps} <- encode_entries(entries), 118 + {:ok, bytes} <- DRISL.encode(%{"e" => entry_maps, "l" => left}) do 119 + {:ok, bytes} 120 + else 121 + {:error, reason} when is_atom(reason) -> {:error, :encode, reason} 122 + {:error, :encode, _} = err -> err 123 + end 124 + end 125 + 126 + # --------------------------------------------------------------------------- 127 + # Decoding 128 + # --------------------------------------------------------------------------- 129 + 130 + @doc """ 131 + Decodes DRISL CBOR bytes into an `MST.Node`. 132 + 133 + ## Examples 134 + 135 + iex> {:ok, bytes} = MST.Node.encode(MST.Node.empty()) 136 + iex> {:ok, node} = MST.Node.decode(bytes) 137 + iex> node.entries 138 + [] 139 + iex> node.left 140 + nil 141 + 142 + """ 143 + @spec decode(binary()) :: {:ok, t()} | decode_error() 144 + def decode(bytes) when is_binary(bytes) do 145 + with {:ok, term, <<>>} <- DRISL.decode(bytes), 146 + {:ok, node} <- decode_term(term) do 147 + {:ok, node} 148 + else 149 + {:ok, _, _leftover} -> {:error, :decode, :trailing_bytes} 150 + {:error, reason} when is_atom(reason) -> {:error, :decode, reason} 151 + {:error, :decode, _} = err -> err 152 + end 153 + end 154 + 155 + # --------------------------------------------------------------------------- 156 + # Compression helpers (used by MST.Tree) 157 + # --------------------------------------------------------------------------- 158 + 159 + @doc """ 160 + Compresses a list of `{full_key, value_cid, right_cid | nil}` tuples into a 161 + list of `MST.Node.Entry` structs using the key prefix-compression scheme. 162 + 163 + The first entry always has `prefix_len: 0`. Each subsequent entry computes 164 + how many leading bytes it shares with the previous full key. 165 + 166 + ## Examples 167 + 168 + iex> cid = DASL.CID.compute("x") 169 + iex> entries = MST.Node.compress_entries([{"abc/def", cid, nil}, {"abc/ghi", cid, nil}]) 170 + iex> hd(tl(entries)).prefix_len 171 + 4 172 + 173 + """ 174 + @spec compress_entries([{binary(), CID.t(), CID.t() | nil}]) :: [Entry.t()] 175 + def compress_entries(triples), do: do_compress(triples, "", []) 176 + 177 + # --------------------------------------------------------------------------- 178 + # Private helpers 179 + # --------------------------------------------------------------------------- 180 + 181 + @spec expand_keys([Entry.t()], binary(), [binary()]) :: [binary()] 182 + defp expand_keys([], _prev, acc), do: Enum.reverse(acc) 183 + 184 + defp expand_keys([entry | rest], prev, acc) do 185 + full_key = binary_part(prev, 0, entry.prefix_len) <> entry.key_suffix 186 + expand_keys(rest, full_key, [full_key | acc]) 187 + end 188 + 189 + @spec do_compress([{binary(), CID.t(), CID.t() | nil}], binary(), [Entry.t()]) :: [Entry.t()] 190 + defp do_compress([], _prev, acc), do: Enum.reverse(acc) 191 + 192 + defp do_compress([{key, value, right} | rest], prev, acc) do 193 + plen = common_prefix_length(prev, key) 194 + suffix = binary_part(key, plen, byte_size(key) - plen) 195 + 196 + entry = %Entry{ 197 + prefix_len: plen, 198 + key_suffix: suffix, 199 + value: value, 200 + right: right 201 + } 202 + 203 + do_compress(rest, key, [entry | acc]) 204 + end 205 + 206 + @spec common_prefix_length(binary(), binary()) :: non_neg_integer() 207 + defp common_prefix_length(a, b), do: cpl(a, b, 0) 208 + 209 + defp cpl(<<c, ra::binary>>, <<c, rb::binary>>, n), do: cpl(ra, rb, n + 1) 210 + defp cpl(_, _, n), do: n 211 + 212 + @spec encode_entries([Entry.t()]) :: {:ok, [map()]} | encode_error() 213 + defp encode_entries(entries) do 214 + result = 215 + Enum.reduce_while(entries, {:ok, []}, fn entry, {:ok, acc} -> 216 + {:ok, map} = encode_entry(entry) 217 + {:cont, {:ok, [map | acc]}} 218 + end) 219 + 220 + case result do 221 + {:ok, reversed} -> {:ok, Enum.reverse(reversed)} 222 + err -> err 223 + end 224 + end 225 + 226 + @spec encode_entry(Entry.t()) :: {:ok, map()} | encode_error() 227 + defp encode_entry(%Entry{prefix_len: p, key_suffix: k, value: v, right: t}) do 228 + {:ok, 229 + %{ 230 + "k" => %CBOR.Tag{tag: :bytes, value: k}, 231 + "p" => p, 232 + "t" => t, 233 + "v" => v 234 + }} 235 + end 236 + 237 + @spec decode_term(any()) :: {:ok, t()} | decode_error() 238 + defp decode_term(%{"e" => entries_raw, "l" => left_raw}) when is_list(entries_raw) do 239 + with {:ok, left} <- decode_cid_or_null(left_raw), 240 + {:ok, entries} <- decode_entries(entries_raw) do 241 + {:ok, %__MODULE__{left: left, entries: entries}} 242 + end 243 + end 244 + 245 + defp decode_term(_), do: {:error, :decode, :invalid_structure} 246 + 247 + @spec decode_entries(list()) :: {:ok, [Entry.t()]} | decode_error() 248 + defp decode_entries(entries_raw) do 249 + result = 250 + Enum.reduce_while(entries_raw, {:ok, []}, fn raw, {:ok, acc} -> 251 + case decode_entry(raw) do 252 + {:ok, entry} -> {:cont, {:ok, [entry | acc]}} 253 + {:error, :decode, _} = err -> {:halt, err} 254 + end 255 + end) 256 + 257 + case result do 258 + {:ok, reversed} -> {:ok, Enum.reverse(reversed)} 259 + err -> err 260 + end 261 + end 262 + 263 + @spec decode_entry(any()) :: {:ok, Entry.t()} | decode_error() 264 + defp decode_entry(%{ 265 + "k" => %CBOR.Tag{tag: :bytes, value: k}, 266 + "p" => p, 267 + "t" => t_raw, 268 + "v" => %CID{} = v 269 + }) 270 + when is_integer(p) and p >= 0 and is_binary(k) do 271 + with {:ok, right} <- decode_cid_or_null(t_raw) do 272 + {:ok, %Entry{prefix_len: p, key_suffix: k, value: v, right: right}} 273 + end 274 + end 275 + 276 + defp decode_entry(_), do: {:error, :decode, :invalid_entry} 277 + 278 + @spec decode_cid_or_null(any()) :: {:ok, CID.t() | nil} | decode_error() 279 + defp decode_cid_or_null(nil), do: {:ok, nil} 280 + defp decode_cid_or_null(%CID{} = cid), do: {:ok, cid} 281 + defp decode_cid_or_null(_), do: {:error, :decode, :invalid_cid_link} 282 + end
+20
lib/mst/node/entry.ex
··· 1 + defmodule MST.Node.Entry do 2 + @moduledoc """ 3 + A single entry within an `MST.Node`. 4 + 5 + Stores a compressed key (`prefix_len` bytes shared with the previous entry's 6 + full key, plus `key_suffix`), the CID of the value record (`value`), and an 7 + optional CID pointing to a right subtree (`right`). 8 + """ 9 + 10 + use TypedStruct 11 + 12 + alias DASL.CID 13 + 14 + typedstruct enforce: true do 15 + field :prefix_len, non_neg_integer() 16 + field :key_suffix, binary() 17 + field :value, CID.t() 18 + field :right, CID.t() | nil 19 + end 20 + end
+94
lib/mst/store.ex
··· 1 + defmodule MST.Store do 2 + @moduledoc """ 3 + Behaviour for MST node block stores. 4 + 5 + A store holds a mapping from `DASL.CID` to decoded `MST.Node` structs. All 6 + implementations must satisfy these callbacks. The store state is an opaque 7 + term managed by the implementation module. 8 + 9 + The primary built-in implementation is `MST.Store.Memory`, a simple 10 + map-backed store suitable for in-memory use and tests. 11 + 12 + ## Usage 13 + 14 + An `MST.Tree` holds its store as a `{module, state}` pair. You interact with 15 + the store through the tree API; direct store access is only needed when 16 + building trees from external data (e.g., importing a CAR file). 17 + 18 + store = MST.Store.Memory.new() 19 + tree = MST.Tree.new(store) 20 + 21 + """ 22 + 23 + alias DASL.CID 24 + alias MST.Node 25 + 26 + @type t() :: {module(), any()} 27 + 28 + @doc """ 29 + Retrieves a node by CID. Returns `{:ok, node}` or `{:error, :not_found}`. 30 + """ 31 + @callback get(state :: any(), CID.t()) :: {:ok, Node.t()} | {:error, :not_found} 32 + 33 + @doc """ 34 + Stores a node under its CID. Returns updated state. 35 + """ 36 + @callback put(state :: any(), CID.t(), Node.t()) :: any() 37 + 38 + @doc """ 39 + Returns `true` if the store contains a node for the given CID. 40 + """ 41 + @callback has?(state :: any(), CID.t()) :: boolean() 42 + 43 + @doc """ 44 + Returns all CIDs present in the store. 45 + """ 46 + @callback cids(state :: any()) :: [CID.t()] 47 + 48 + # --------------------------------------------------------------------------- 49 + # Dispatch helpers 50 + # --------------------------------------------------------------------------- 51 + 52 + @doc """ 53 + Retrieves a node from the store. 54 + 55 + ## Examples 56 + 57 + iex> store = MST.Store.Memory.new() 58 + iex> cid = DASL.CID.compute("test", :drisl) 59 + iex> MST.Store.get(store, cid) 60 + {:error, :not_found} 61 + 62 + """ 63 + @spec get(t(), CID.t()) :: {:ok, Node.t()} | {:error, :not_found} 64 + def get({mod, state}, cid), do: mod.get(state, cid) 65 + 66 + @doc """ 67 + Stores a node in the store, returning the updated `{module, state}` pair. 68 + 69 + ## Examples 70 + 71 + iex> store = MST.Store.Memory.new() 72 + iex> node = MST.Node.empty() 73 + iex> {:ok, cid} = MST.Node.cid(node) 74 + iex> store2 = MST.Store.put(store, cid, node) 75 + iex> {:ok, _} = MST.Store.get(store2, cid) 76 + iex> :ok 77 + :ok 78 + 79 + """ 80 + @spec put(t(), CID.t(), Node.t()) :: t() 81 + def put({mod, state}, cid, node), do: {mod, mod.put(state, cid, node)} 82 + 83 + @doc """ 84 + Returns `true` if the store contains a node for the given CID. 85 + """ 86 + @spec has?(t(), CID.t()) :: boolean() 87 + def has?({mod, state}, cid), do: mod.has?(state, cid) 88 + 89 + @doc """ 90 + Returns all CIDs present in the store. 91 + """ 92 + @spec cids(t()) :: [CID.t()] 93 + def cids({mod, state}), do: mod.cids(state) 94 + end
+61
lib/mst/store/memory.ex
··· 1 + defmodule MST.Store.Memory do 2 + @moduledoc """ 3 + Map-backed in-memory MST node store. 4 + 5 + Suitable for tests and any use case where the full tree fits in memory. 6 + The store state is a plain `%{}` map from `DASL.CID` to `MST.Node`. 7 + 8 + ## Usage 9 + 10 + store = MST.Store.Memory.new() 11 + tree = MST.Tree.new(store) 12 + 13 + """ 14 + 15 + @behaviour MST.Store 16 + 17 + alias DASL.CID 18 + alias MST.Node 19 + 20 + @type state() :: %{CID.t() => Node.t()} 21 + 22 + @doc """ 23 + Returns a new, empty memory store as a `{MST.Store.Memory, %{}}` pair. 24 + 25 + ## Examples 26 + 27 + iex> {mod, state} = MST.Store.Memory.new() 28 + iex> mod 29 + MST.Store.Memory 30 + iex> state 31 + %{} 32 + 33 + """ 34 + @spec new() :: MST.Store.t() 35 + def new, do: {__MODULE__, %{}} 36 + 37 + # --------------------------------------------------------------------------- 38 + # MST.Store callbacks 39 + # --------------------------------------------------------------------------- 40 + 41 + @impl MST.Store 42 + @spec get(state(), CID.t()) :: {:ok, Node.t()} | {:error, :not_found} 43 + def get(state, cid) do 44 + case Map.fetch(state, cid) do 45 + {:ok, node} -> {:ok, node} 46 + :error -> {:error, :not_found} 47 + end 48 + end 49 + 50 + @impl MST.Store 51 + @spec put(state(), CID.t(), Node.t()) :: state() 52 + def put(state, cid, node), do: Map.put(state, cid, node) 53 + 54 + @impl MST.Store 55 + @spec has?(state(), CID.t()) :: boolean() 56 + def has?(state, cid), do: Map.has_key?(state, cid) 57 + 58 + @impl MST.Store 59 + @spec cids(state()) :: [CID.t()] 60 + def cids(state), do: Map.keys(state) 61 + end
+809
lib/mst/tree.ex
··· 1 + defmodule MST.Tree do 2 + @moduledoc """ 3 + An in-memory Merkle Search Tree. 4 + 5 + `MST.Tree` is the primary interface for building and querying MSTs. It pairs 6 + a root CID (or `nil` for an empty tree) with a block store that maps CIDs to 7 + decoded `MST.Node` structs. 8 + 9 + All mutation operations (`put/3`, `delete/3`) return a new `MST.Tree` — 10 + the data structure is persistent/immutable. The underlying store accumulates 11 + all written nodes across mutations; stale nodes are not removed (no GC). 12 + 13 + ## Example 14 + 15 + store = MST.Store.Memory.new() 16 + tree = MST.Tree.new(store) 17 + 18 + val = DASL.CID.compute("my record data") 19 + {:ok, tree} = MST.Tree.put(tree, "collection/record-key", val) 20 + {:ok, ^val} = MST.Tree.get(tree, "collection/record-key") 21 + 22 + {:ok, tree} = MST.Tree.delete(tree, "collection/record-key") 23 + {:error, :not_found} = MST.Tree.get(tree, "collection/record-key") 24 + 25 + """ 26 + 27 + use TypedStruct 28 + import Kernel, except: [length: 1] 29 + 30 + alias DASL.CID 31 + alias MST.{Height, Node, Node.Entry, Store} 32 + 33 + @type store() :: Store.t() 34 + @type tree_error() :: {:error, atom()} 35 + 36 + typedstruct enforce: true do 37 + field :root, CID.t() | nil 38 + field :store, store() 39 + end 40 + 41 + # --------------------------------------------------------------------------- 42 + # Construction 43 + # --------------------------------------------------------------------------- 44 + 45 + @doc """ 46 + Returns a new, empty tree backed by the given store. 47 + 48 + ## Examples 49 + 50 + iex> tree = MST.Tree.new(MST.Store.Memory.new()) 51 + iex> tree.root 52 + nil 53 + 54 + """ 55 + @spec new(store()) :: t() 56 + def new(store), do: %__MODULE__{root: nil, store: store} 57 + 58 + @doc """ 59 + Returns a tree referencing an existing root node CID in the given store. 60 + 61 + Use this to wrap an already-populated store (e.g. after loading from a CAR 62 + file). 63 + 64 + ## Examples 65 + 66 + iex> store = MST.Store.Memory.new() 67 + iex> node = MST.Node.empty() 68 + iex> {:ok, cid} = MST.Node.cid(node) 69 + iex> store = MST.Store.put(store, cid, node) 70 + iex> tree = MST.Tree.from_root(cid, store) 71 + iex> tree.root == cid 72 + true 73 + 74 + """ 75 + @spec from_root(CID.t() | nil, store()) :: t() 76 + def from_root(root, store), do: %__MODULE__{root: root, store: store} 77 + 78 + # --------------------------------------------------------------------------- 79 + # Lookup 80 + # --------------------------------------------------------------------------- 81 + 82 + @doc """ 83 + Looks up `key` in the tree. 84 + 85 + Returns `{:ok, value_cid}` if found, `{:error, :not_found}` otherwise. 86 + 87 + ## Examples 88 + 89 + iex> store = MST.Store.Memory.new() 90 + iex> tree = MST.Tree.new(store) 91 + iex> MST.Tree.get(tree, "col/key") 92 + {:error, :not_found} 93 + 94 + """ 95 + @spec get(t(), binary()) :: {:ok, CID.t()} | {:error, :not_found} | tree_error() 96 + def get(%__MODULE__{root: nil}, _key), do: {:error, :not_found} 97 + 98 + def get(%__MODULE__{root: root, store: store}, key) do 99 + search(store, root, key) 100 + end 101 + 102 + # --------------------------------------------------------------------------- 103 + # Mutation 104 + # --------------------------------------------------------------------------- 105 + 106 + @doc """ 107 + Inserts or updates `key` → `value` in the tree. 108 + 109 + Returns `{:ok, new_tree}` on success. The new tree shares the store with the 110 + old tree, but both may be used independently — nodes are append-only. 111 + 112 + ## Examples 113 + 114 + iex> store = MST.Store.Memory.new() 115 + iex> tree = MST.Tree.new(store) 116 + iex> val = DASL.CID.compute("data") 117 + iex> {:ok, tree2} = MST.Tree.put(tree, "col/key", val) 118 + iex> MST.Tree.get(tree2, "col/key") 119 + {:ok, val} 120 + 121 + """ 122 + @spec put(t(), binary(), CID.t()) :: {:ok, t()} | tree_error() 123 + def put(%__MODULE__{root: nil, store: store}, key, value) do 124 + # Empty tree — create a leaf node. No intermediate layers needed for a 125 + # single-key tree (the spec says empty nodes at the top must be pruned). 126 + node = leaf_node(key, value) 127 + 128 + with {:ok, root, store} <- write_node(store, node) do 129 + {:ok, %__MODULE__{root: root, store: store}} 130 + end 131 + end 132 + 133 + def put(%__MODULE__{root: root, store: store}, key, value) do 134 + with {:ok, root_node} <- fetch_node(store, root) do 135 + if root_node.entries == [] and root_node.left == nil do 136 + # Empty root from CAR import — treat as fresh empty tree. 137 + put(%__MODULE__{root: nil, store: store}, key, value) 138 + else 139 + key_height = Height.for_key(key) 140 + tree_height = require_height(store, root_node) 141 + 142 + with {:ok, new_root, store} <- 143 + do_insert(store, root, key, value, key_height, tree_height) do 144 + {:ok, %__MODULE__{root: new_root, store: store}} 145 + end 146 + end 147 + end 148 + end 149 + 150 + @doc """ 151 + Removes `key` from the tree. 152 + 153 + Returns `{:ok, new_tree}` on success, `{:error, :not_found}` if the key 154 + does not exist. 155 + 156 + ## Examples 157 + 158 + iex> store = MST.Store.Memory.new() 159 + iex> tree = MST.Tree.new(store) 160 + iex> val = DASL.CID.compute("data") 161 + iex> {:ok, tree2} = MST.Tree.put(tree, "col/key", val) 162 + iex> {:ok, tree3} = MST.Tree.delete(tree2, "col/key") 163 + iex> MST.Tree.get(tree3, "col/key") 164 + {:error, :not_found} 165 + 166 + """ 167 + @spec delete(t(), binary()) :: {:ok, t()} | {:error, :not_found} | tree_error() 168 + def delete(%__MODULE__{root: nil}, _key), do: {:error, :not_found} 169 + 170 + def delete(%__MODULE__{root: root, store: store}, key) do 171 + with {:ok, root_node} <- fetch_node(store, root) do 172 + key_height = Height.for_key(key) 173 + tree_height = require_height(store, root_node) 174 + 175 + with {:ok, new_root, store} <- 176 + do_remove(store, root, key, key_height, tree_height) do 177 + # Trim empty wrappers from the top after deletion. 178 + {:ok, new_root, store} = trim_top(store, new_root) 179 + {:ok, %__MODULE__{root: new_root, store: store}} 180 + end 181 + end 182 + end 183 + 184 + # --------------------------------------------------------------------------- 185 + # Traversal 186 + # --------------------------------------------------------------------------- 187 + 188 + @doc """ 189 + Returns all key-value pairs in the tree as a sorted list. 190 + 191 + ## Examples 192 + 193 + iex> store = MST.Store.Memory.new() 194 + iex> tree = MST.Tree.new(store) 195 + iex> val = DASL.CID.compute("data") 196 + iex> {:ok, tree} = MST.Tree.put(tree, "col/b", val) 197 + iex> {:ok, tree} = MST.Tree.put(tree, "col/a", val) 198 + iex> {:ok, pairs} = MST.Tree.to_list(tree) 199 + iex> Enum.map(pairs, &elem(&1, 0)) 200 + ["col/a", "col/b"] 201 + 202 + """ 203 + @spec to_list(t()) :: {:ok, [{binary(), CID.t()}]} | tree_error() 204 + def to_list(%__MODULE__{root: nil}), do: {:ok, []} 205 + 206 + def to_list(%__MODULE__{root: root, store: store}) do 207 + walk(store, root, []) 208 + end 209 + 210 + @doc """ 211 + Returns a lazy stream of `{key, value_cid}` pairs in sorted order. 212 + 213 + The stream reads nodes from the store on demand. Raises on missing nodes 214 + (consistent with lazy stream semantics). 215 + 216 + ## Examples 217 + 218 + iex> store = MST.Store.Memory.new() 219 + iex> tree = MST.Tree.new(store) 220 + iex> val = DASL.CID.compute("x") 221 + iex> {:ok, tree} = MST.Tree.put(tree, "col/a", val) 222 + iex> tree |> MST.Tree.stream() |> Enum.to_list() 223 + [{"col/a", val}] 224 + 225 + """ 226 + @spec stream(t()) :: Enumerable.t() 227 + def stream(%__MODULE__{root: nil}), do: [] 228 + 229 + def stream(%__MODULE__{root: root, store: store}) do 230 + Stream.resource( 231 + fn -> [root] end, 232 + fn 233 + [] -> 234 + {:halt, []} 235 + 236 + [cid | rest] -> 237 + node = fetch_node!(store, cid) 238 + full_keys = Node.keys(node) 239 + {yields, next_stack} = node_to_stream_items(node, full_keys, rest) 240 + {yields, next_stack} 241 + end, 242 + fn _ -> :ok end 243 + ) 244 + end 245 + 246 + @doc """ 247 + Returns the number of key-value pairs in the tree. 248 + 249 + ## Examples 250 + 251 + iex> store = MST.Store.Memory.new() 252 + iex> tree = MST.Tree.new(store) 253 + iex> {:ok, 0} = MST.Tree.length(tree) 254 + iex> val = DASL.CID.compute("x") 255 + iex> {:ok, tree} = MST.Tree.put(tree, "col/a", val) 256 + iex> MST.Tree.length(tree) 257 + {:ok, 1} 258 + 259 + """ 260 + @spec length(t()) :: {:ok, non_neg_integer()} | tree_error() 261 + def length(tree) do 262 + with {:ok, pairs} <- to_list(tree) do 263 + {:ok, Kernel.length(pairs)} 264 + end 265 + end 266 + 267 + # --------------------------------------------------------------------------- 268 + # Block collection 269 + # --------------------------------------------------------------------------- 270 + 271 + @doc """ 272 + Collects all MST nodes reachable from the root into a map of CID → encoded bytes. 273 + 274 + Useful for serialising the tree to a CAR file. 275 + 276 + ## Examples 277 + 278 + iex> store = MST.Store.Memory.new() 279 + iex> tree = MST.Tree.new(store) 280 + iex> val = DASL.CID.compute("x") 281 + iex> {:ok, tree} = MST.Tree.put(tree, "col/a", val) 282 + iex> {:ok, blocks} = MST.Tree.collect_blocks(tree) 283 + iex> map_size(blocks) >= 1 284 + true 285 + 286 + """ 287 + @spec collect_blocks(t()) :: {:ok, %{CID.t() => binary()}} | tree_error() 288 + def collect_blocks(%__MODULE__{root: nil}), do: {:ok, %{}} 289 + 290 + def collect_blocks(%__MODULE__{root: root, store: store}) do 291 + collect_reachable(store, root, %{}) 292 + end 293 + 294 + # --------------------------------------------------------------------------- 295 + # Private — search 296 + # --------------------------------------------------------------------------- 297 + 298 + @spec search(store(), CID.t(), binary()) :: 299 + {:ok, CID.t()} | {:error, :not_found} | tree_error() 300 + defp search(store, cid, key) do 301 + with {:ok, node} <- fetch_node(store, cid) do 302 + full_keys = Node.keys(node) 303 + search_node(store, node, full_keys, key) 304 + end 305 + end 306 + 307 + @spec search_node(store(), Node.t(), [binary()], binary()) :: 308 + {:ok, CID.t()} | {:error, :not_found} | tree_error() 309 + defp search_node(store, node, full_keys, key) do 310 + case locate(full_keys, key) do 311 + {:found, idx} -> 312 + {:ok, Enum.at(node.entries, idx).value} 313 + 314 + {:left} -> 315 + descend(store, node.left, key) 316 + 317 + {:right, idx} -> 318 + descend(store, Enum.at(node.entries, idx).right, key) 319 + end 320 + end 321 + 322 + @spec descend(store(), CID.t() | nil, binary()) :: 323 + {:ok, CID.t()} | {:error, :not_found} | tree_error() 324 + defp descend(_store, nil, _key), do: {:error, :not_found} 325 + defp descend(store, cid, key), do: search(store, cid, key) 326 + 327 + # --------------------------------------------------------------------------- 328 + # Private — insert 329 + # --------------------------------------------------------------------------- 330 + 331 + # Recursive insert into the subtree rooted at `cid`. 332 + # `tree_height` is the known height of the node (threaded from the parent). 333 + @spec do_insert(store(), CID.t(), binary(), CID.t(), non_neg_integer(), non_neg_integer()) :: 334 + {:ok, CID.t(), store()} | tree_error() 335 + defp do_insert(store, cid, key, value, key_height, tree_height) do 336 + with {:ok, node} <- fetch_node(store, cid) do 337 + cond do 338 + key_height > tree_height -> 339 + # Key belongs at a higher layer. Wrap the current node in an empty 340 + # parent and recurse at tree_height + 1. 341 + wrapper = %Node{left: cid, entries: []} 342 + 343 + with {:ok, wrapper_cid, store} <- write_node(store, wrapper) do 344 + do_insert(store, wrapper_cid, key, value, key_height, tree_height + 1) 345 + end 346 + 347 + key_height < tree_height -> 348 + # Descend into the appropriate subtree. 349 + {kv_pairs, subtrees} = node_to_arrays(node) 350 + keys = Enum.map(kv_pairs, &elem(&1, 0)) 351 + idx = lower_bound(keys, key) 352 + subtree_cid = Enum.at(subtrees, idx) 353 + 354 + with {:ok, new_sub, store} <- 355 + insert_into_subtree( 356 + store, 357 + subtree_cid, 358 + key, 359 + value, 360 + key_height, 361 + tree_height - 1 362 + ) do 363 + new_subtrees = List.replace_at(subtrees, idx, new_sub) 364 + write_node(store, arrays_to_node(kv_pairs, new_subtrees)) 365 + end 366 + 367 + true -> 368 + # key_height == tree_height — insert at this level. 369 + put_here(store, node, key, value) 370 + end 371 + end 372 + end 373 + 374 + # Insert into a subtree that may be nil. When nil, creates a new leaf and 375 + # wraps it in as many empty intermediate layers as needed. 376 + @spec insert_into_subtree( 377 + store(), 378 + CID.t() | nil, 379 + binary(), 380 + CID.t(), 381 + non_neg_integer(), 382 + non_neg_integer() 383 + ) :: {:ok, CID.t(), store()} | tree_error() 384 + defp insert_into_subtree(store, nil, key, value, key_height, expected_height) do 385 + leaf = leaf_node(key, value) 386 + 387 + with {:ok, leaf_cid, store} <- write_node(store, leaf) do 388 + wrap_with_empty_layers(store, leaf_cid, expected_height - key_height) 389 + end 390 + end 391 + 392 + defp insert_into_subtree(store, cid, key, value, key_height, expected_height) do 393 + do_insert(store, cid, key, value, key_height, expected_height) 394 + end 395 + 396 + # Insert a key at the current level (key_height == tree_height). 397 + # Splits the subtree at the insertion point recursively. 398 + @spec put_here(store(), Node.t(), binary(), CID.t()) :: 399 + {:ok, CID.t(), store()} | tree_error() 400 + defp put_here(store, node, key, value) do 401 + {kv_pairs, subtrees} = node_to_arrays(node) 402 + keys = Enum.map(kv_pairs, &elem(&1, 0)) 403 + idx = lower_bound(keys, key) 404 + 405 + if idx < Kernel.length(keys) and Enum.at(keys, idx) == key do 406 + # Overwrite existing key. 407 + new_kv = List.replace_at(kv_pairs, idx, {key, value}) 408 + write_node(store, arrays_to_node(new_kv, subtrees)) 409 + else 410 + # Split the subtree at the insertion point recursively. 411 + with {:ok, lsub, rsub, store} <- split_on_key(store, Enum.at(subtrees, idx), key) do 412 + new_kv = List.insert_at(kv_pairs, idx, {key, value}) 413 + 414 + new_subtrees = 415 + List.replace_at(subtrees, idx, lsub) |> List.insert_at(idx + 1, rsub) 416 + 417 + write_node(store, arrays_to_node(new_kv, new_subtrees)) 418 + end 419 + end 420 + end 421 + 422 + # Recursively splits the subtree at `key`. Returns {left_cid, right_cid} 423 + # where left contains all keys < `key` and right contains all keys >= `key`. 424 + # Either side may be nil if empty. 425 + @spec split_on_key(store(), CID.t() | nil, binary()) :: 426 + {:ok, CID.t() | nil, CID.t() | nil, store()} | tree_error() 427 + defp split_on_key(store, nil, _key), do: {:ok, nil, nil, store} 428 + 429 + defp split_on_key(store, cid, key) do 430 + with {:ok, node} <- fetch_node(store, cid) do 431 + {kv_pairs, subtrees} = node_to_arrays(node) 432 + keys = Enum.map(kv_pairs, &elem(&1, 0)) 433 + idx = lower_bound(keys, key) 434 + 435 + # Recursively split the subtree at the boundary position. 436 + with {:ok, inner_l, inner_r, store} <- 437 + split_on_key(store, Enum.at(subtrees, idx), key) do 438 + left_kv = Enum.take(kv_pairs, idx) 439 + left_subs = Enum.take(subtrees, idx) ++ [inner_l] 440 + 441 + right_kv = Enum.drop(kv_pairs, idx) 442 + right_subs = [inner_r | Enum.drop(subtrees, idx + 1)] 443 + 444 + with {:ok, left_cid, store} <- 445 + write_node_to_nullable(store, arrays_to_node(left_kv, left_subs)), 446 + {:ok, right_cid, store} <- 447 + write_node_to_nullable(store, arrays_to_node(right_kv, right_subs)) do 448 + {:ok, left_cid, right_cid, store} 449 + end 450 + end 451 + end 452 + end 453 + 454 + # --------------------------------------------------------------------------- 455 + # Private — delete 456 + # --------------------------------------------------------------------------- 457 + 458 + # Recursive delete. `tree_height` is the known height of the node at `cid`. 459 + @spec do_remove(store(), CID.t(), binary(), non_neg_integer(), non_neg_integer()) :: 460 + {:ok, CID.t() | nil, store()} | {:error, :not_found} | tree_error() 461 + defp do_remove(store, cid, key, key_height, tree_height) do 462 + with {:ok, node} <- fetch_node(store, cid) do 463 + cond do 464 + key_height > tree_height -> 465 + {:error, :not_found} 466 + 467 + key_height < tree_height -> 468 + {kv_pairs, subtrees} = node_to_arrays(node) 469 + keys = Enum.map(kv_pairs, &elem(&1, 0)) 470 + idx = lower_bound(keys, key) 471 + 472 + case Enum.at(subtrees, idx) do 473 + nil -> 474 + {:error, :not_found} 475 + 476 + sub_cid -> 477 + with {:ok, new_sub, store} <- 478 + do_remove(store, sub_cid, key, key_height, tree_height - 1) do 479 + new_subtrees = List.replace_at(subtrees, idx, new_sub) 480 + write_node_to_nullable(store, arrays_to_node(kv_pairs, new_subtrees)) 481 + end 482 + end 483 + 484 + true -> 485 + # key_height == tree_height — key must be at this level if it exists. 486 + {kv_pairs, subtrees} = node_to_arrays(node) 487 + keys = Enum.map(kv_pairs, &elem(&1, 0)) 488 + idx = lower_bound(keys, key) 489 + 490 + if idx < Kernel.length(keys) and Enum.at(keys, idx) == key do 491 + # Found! Merge the adjacent subtrees that flanked the deleted key. 492 + with {:ok, merged_sub, store} <- 493 + do_merge(store, Enum.at(subtrees, idx), Enum.at(subtrees, idx + 1)) do 494 + new_kv = List.delete_at(kv_pairs, idx) 495 + 496 + new_subtrees = 497 + Enum.take(subtrees, idx) ++ [merged_sub | Enum.drop(subtrees, idx + 2)] 498 + 499 + write_node_to_nullable(store, arrays_to_node(new_kv, new_subtrees)) 500 + end 501 + else 502 + {:error, :not_found} 503 + end 504 + end 505 + end 506 + end 507 + 508 + # Recursively merges two adjacent subtree pointers. The boundary subtrees 509 + # (rightmost of left, leftmost of right) are merged recursively. 510 + @spec do_merge(store(), CID.t() | nil, CID.t() | nil) :: 511 + {:ok, CID.t() | nil, store()} | tree_error() 512 + defp do_merge(store, nil, right_cid), do: {:ok, right_cid, store} 513 + defp do_merge(store, left_cid, nil), do: {:ok, left_cid, store} 514 + 515 + defp do_merge(store, left_cid, right_cid) do 516 + with {:ok, left_node} <- fetch_node(store, left_cid), 517 + {:ok, right_node} <- fetch_node(store, right_cid) do 518 + {left_kv, left_subs} = node_to_arrays(left_node) 519 + {right_kv, right_subs} = node_to_arrays(right_node) 520 + 521 + with {:ok, merged_boundary, store} <- 522 + do_merge(store, List.last(left_subs), hd(right_subs)) do 523 + new_kv = left_kv ++ right_kv 524 + new_subs = Enum.slice(left_subs, 0..-2//1) ++ [merged_boundary | tl(right_subs)] 525 + write_node_to_nullable(store, arrays_to_node(new_kv, new_subs)) 526 + end 527 + end 528 + end 529 + 530 + # Strips empty wrapper nodes from the top of the tree. Only called after 531 + # a top-level delete — intermediate empty nodes are preserved during 532 + # recursive descent. 533 + @spec trim_top(store(), CID.t() | nil) :: {:ok, CID.t() | nil, store()} | tree_error() 534 + defp trim_top(store, nil), do: {:ok, nil, store} 535 + 536 + defp trim_top(store, cid) do 537 + with {:ok, node} <- fetch_node(store, cid) do 538 + cond do 539 + node.entries != [] -> {:ok, cid, store} 540 + node.left == nil -> {:ok, nil, store} 541 + true -> trim_top(store, node.left) 542 + end 543 + end 544 + end 545 + 546 + # --------------------------------------------------------------------------- 547 + # Private — in-order traversal (to_list) 548 + # --------------------------------------------------------------------------- 549 + 550 + @spec walk(store(), CID.t(), [{binary(), CID.t()}]) :: 551 + {:ok, [{binary(), CID.t()}]} | tree_error() 552 + defp walk(store, cid, acc) do 553 + with {:ok, node} <- fetch_node(store, cid) do 554 + full_keys = Node.keys(node) 555 + walk_node(store, node, full_keys, acc) 556 + end 557 + end 558 + 559 + @spec walk_node(store(), Node.t(), [binary()], [{binary(), CID.t()}]) :: 560 + {:ok, [{binary(), CID.t()}]} | tree_error() 561 + defp walk_node(store, node, full_keys, acc) do 562 + # Walk in-order: left subtree, then entries interleaved with right subtrees. 563 + # We collect in reverse for efficiency, then reverse at the end. 564 + with {:ok, acc} <- walk_subtree(store, node.left, acc) do 565 + walk_entries(store, node.entries, full_keys, acc) 566 + end 567 + end 568 + 569 + @spec walk_subtree(store(), CID.t() | nil, [{binary(), CID.t()}]) :: 570 + {:ok, [{binary(), CID.t()}]} | tree_error() 571 + defp walk_subtree(_store, nil, acc), do: {:ok, acc} 572 + defp walk_subtree(store, cid, acc), do: walk(store, cid, acc) 573 + 574 + @spec walk_entries(store(), [Entry.t()], [binary()], [{binary(), CID.t()}]) :: 575 + {:ok, [{binary(), CID.t()}]} | tree_error() 576 + defp walk_entries(_store, [], [], acc), do: {:ok, Enum.reverse(acc)} 577 + 578 + defp walk_entries(store, [entry | rest_e], [key | rest_k], acc) do 579 + acc = [{key, entry.value} | acc] 580 + 581 + with {:ok, acc} <- walk_subtree(store, entry.right, acc) do 582 + walk_entries(store, rest_e, rest_k, acc) 583 + end 584 + end 585 + 586 + # --------------------------------------------------------------------------- 587 + # Private — stream helpers 588 + # --------------------------------------------------------------------------- 589 + 590 + # Turn a node into a stream item list (yields) and the updated DFS stack. 591 + # We push right subtrees and yield leaf entries in left-to-right order. 592 + @spec node_to_stream_items(Node.t(), [binary()], list()) :: {[{binary(), CID.t()}], list()} 593 + defp node_to_stream_items(node, full_keys, rest_stack) do 594 + # Build a plan: [{:visit, cid} | {:yield, key, val}] in order 595 + left_visits = if node.left, do: [{:visit, node.left}], else: [] 596 + 597 + entry_items = 598 + Enum.zip(node.entries, full_keys) 599 + |> Enum.flat_map(fn {e, k} -> 600 + right_visits = if e.right, do: [{:visit, e.right}], else: [] 601 + [{:yield, k, e.value} | right_visits] 602 + end) 603 + 604 + plan = left_visits ++ entry_items 605 + 606 + # Collect yields and build the new stack (visits go onto the front). 607 + # We want left-to-right order: process plan in reverse to prepend onto stack. 608 + Enum.reduce(Enum.reverse(plan), {[], rest_stack}, fn 609 + {:yield, k, v}, {yields, stack} -> {[{k, v} | yields], stack} 610 + {:visit, cid}, {yields, stack} -> {yields, [cid | stack]} 611 + end) 612 + end 613 + 614 + # --------------------------------------------------------------------------- 615 + # Private — block collection 616 + # --------------------------------------------------------------------------- 617 + 618 + @spec collect_reachable(store(), CID.t(), %{CID.t() => binary()}) :: 619 + {:ok, %{CID.t() => binary()}} | tree_error() 620 + defp collect_reachable(store, cid, acc) do 621 + if Map.has_key?(acc, cid) do 622 + {:ok, acc} 623 + else 624 + with {:ok, node} <- fetch_node(store, cid), 625 + {:ok, bytes} <- Node.encode(node) do 626 + acc = Map.put(acc, cid, bytes) 627 + collect_children(store, node, acc) 628 + else 629 + {:error, :not_found} -> {:error, :missing_node} 630 + {:error, :encode, reason} -> {:error, reason} 631 + end 632 + end 633 + end 634 + 635 + @spec collect_children(store(), Node.t(), %{CID.t() => binary()}) :: 636 + {:ok, %{CID.t() => binary()}} | tree_error() 637 + defp collect_children(store, node, acc) do 638 + subtrees = 639 + if(node.left, do: [node.left], else: []) ++ 640 + Enum.flat_map(node.entries, fn e -> if e.right, do: [e.right], else: [] end) 641 + 642 + Enum.reduce_while(subtrees, {:ok, acc}, fn cid, {:ok, acc} -> 643 + case collect_reachable(store, cid, acc) do 644 + {:ok, acc} -> {:cont, {:ok, acc}} 645 + err -> {:halt, err} 646 + end 647 + end) 648 + end 649 + 650 + # --------------------------------------------------------------------------- 651 + # Private — node I/O 652 + # --------------------------------------------------------------------------- 653 + 654 + @spec fetch_node(store(), CID.t()) :: {:ok, Node.t()} | tree_error() 655 + defp fetch_node(store, cid) do 656 + case Store.get(store, cid) do 657 + {:ok, node} -> {:ok, node} 658 + {:error, :not_found} -> {:error, :missing_node} 659 + end 660 + end 661 + 662 + @spec fetch_node!(store(), CID.t()) :: Node.t() 663 + defp fetch_node!(store, cid) do 664 + case Store.get(store, cid) do 665 + {:ok, node} -> node 666 + {:error, :not_found} -> raise "MST node not found: #{CID.encode(cid)}" 667 + end 668 + end 669 + 670 + @spec write_node(store(), Node.t()) :: {:ok, CID.t(), store()} | tree_error() 671 + defp write_node(store, node) do 672 + case Node.cid(node) do 673 + {:ok, cid} -> {:ok, cid, Store.put(store, cid, node)} 674 + {:error, :encode, reason} -> {:error, reason} 675 + end 676 + end 677 + 678 + # Write a node unless it is truly empty (no entries, no left). Returns nil 679 + # for empty leaf-level nodes; preserves empty intermediate nodes that have 680 + # a left subtree pointer. 681 + @spec write_node_to_nullable(store(), Node.t()) :: 682 + {:ok, CID.t() | nil, store()} | tree_error() 683 + defp write_node_to_nullable(store, %Node{left: nil, entries: []}), do: {:ok, nil, store} 684 + defp write_node_to_nullable(store, node), do: write_node(store, node) 685 + 686 + # Wraps a CID in `n` empty intermediate nodes (left-pointer only). 687 + @spec wrap_with_empty_layers(store(), CID.t(), non_neg_integer()) :: 688 + {:ok, CID.t(), store()} | tree_error() 689 + defp wrap_with_empty_layers(store, cid, 0), do: {:ok, cid, store} 690 + 691 + defp wrap_with_empty_layers(store, cid, n) when n > 0 do 692 + wrapper = %Node{left: cid, entries: []} 693 + 694 + with {:ok, wrapper_cid, store} <- write_node(store, wrapper) do 695 + wrap_with_empty_layers(store, wrapper_cid, n - 1) 696 + end 697 + end 698 + 699 + # --------------------------------------------------------------------------- 700 + # Private — key position helpers 701 + # --------------------------------------------------------------------------- 702 + 703 + # Returns the position of `key` in the sorted `full_keys` list: 704 + # {:found, idx} — key is at index idx 705 + # {:left} — key < all keys (belongs in left subtree) 706 + # {:right, idx} — key > keys[idx] (belongs in right subtree of entry idx) 707 + @spec locate([binary()], binary()) :: 708 + {:found, non_neg_integer()} | {:left} | {:right, non_neg_integer()} 709 + defp locate([], _key), do: {:left} 710 + 711 + defp locate(keys, key) do 712 + n = Kernel.length(keys) 713 + bin_locate(keys, key, 0, n - 1, n) 714 + end 715 + 716 + @spec bin_locate([binary()], binary(), integer(), integer(), non_neg_integer()) :: 717 + {:found, non_neg_integer()} | {:left} | {:right, non_neg_integer()} 718 + defp bin_locate(_keys, _key, lo, hi, _n) when lo > hi do 719 + if lo == 0, do: {:left}, else: {:right, lo - 1} 720 + end 721 + 722 + defp bin_locate(keys, key, lo, hi, n) do 723 + mid = div(lo + hi, 2) 724 + mid_key = Enum.at(keys, mid) 725 + 726 + cond do 727 + mid_key == key -> {:found, mid} 728 + mid_key < key -> bin_locate(keys, key, mid + 1, hi, n) 729 + true -> bin_locate(keys, key, lo, mid - 1, n) 730 + end 731 + end 732 + 733 + # Returns the index of the first key >= `target`, or `length(keys)` if none. 734 + @spec lower_bound([binary()], binary()) :: non_neg_integer() 735 + defp lower_bound(keys, target) do 736 + Enum.find_index(keys, fn k -> k >= target end) || Kernel.length(keys) 737 + end 738 + 739 + # --------------------------------------------------------------------------- 740 + # Private — layer inference 741 + # --------------------------------------------------------------------------- 742 + 743 + # Infer the MST layer of a non-empty node from its first entry's key. 744 + @spec node_layer(Node.t()) :: non_neg_integer() | nil 745 + defp node_layer(%Node{entries: []}), do: nil 746 + 747 + defp node_layer(%Node{entries: [first | _]}) do 748 + Height.for_key(first.key_suffix) 749 + end 750 + 751 + # Compute the height of a node, walking into children if the node has no 752 + # entries (empty intermediate nodes). 753 + @spec require_height(store(), Node.t()) :: non_neg_integer() 754 + defp require_height(store, node) do 755 + case node_layer(node) do 756 + nil -> 757 + if node.left do 758 + {:ok, child} = fetch_node(store, node.left) 759 + require_height(store, child) + 1 760 + else 761 + 0 762 + end 763 + 764 + h -> 765 + h 766 + end 767 + end 768 + 769 + # --------------------------------------------------------------------------- 770 + # Private — construction helpers 771 + # --------------------------------------------------------------------------- 772 + 773 + @spec leaf_node(binary(), CID.t()) :: Node.t() 774 + defp leaf_node(key, value) do 775 + %Node{ 776 + left: nil, 777 + entries: [%Entry{prefix_len: 0, key_suffix: key, value: value, right: nil}] 778 + } 779 + end 780 + 781 + # --------------------------------------------------------------------------- 782 + # Private — node array conversions 783 + # --------------------------------------------------------------------------- 784 + 785 + # Converts a node into a parallel-array representation: 786 + # {[{key, value}], [subtree_cid | nil]} 787 + # where subtrees has length(kv_pairs) + 1. 788 + # subtrees[0] = node.left, subtrees[i+1] = entries[i].right. 789 + @spec node_to_arrays(Node.t()) :: {[{binary(), CID.t()}], [CID.t() | nil]} 790 + defp node_to_arrays(node) do 791 + full_keys = Node.keys(node) 792 + kv_pairs = Enum.zip(full_keys, Enum.map(node.entries, & &1.value)) 793 + subtrees = [node.left | Enum.map(node.entries, & &1.right)] 794 + {kv_pairs, subtrees} 795 + end 796 + 797 + # Converts the parallel-array representation back to a `Node`. 798 + @spec arrays_to_node([{binary(), CID.t()}], [CID.t() | nil]) :: Node.t() 799 + defp arrays_to_node(kv_pairs, subtrees) do 800 + [left | right_ptrs] = subtrees 801 + 802 + triples = 803 + Enum.zip(kv_pairs, right_ptrs) 804 + |> Enum.map(fn {{k, v}, r} -> {k, v, r} end) 805 + 806 + entries = Node.compress_entries(triples) 807 + %Node{left: left, entries: entries} 808 + end 809 + end
+193
test/mst/car_test.exs
··· 1 + defmodule MST.CARTest do 2 + use ExUnit.Case, async: true 3 + 4 + doctest MST.CAR 5 + 6 + alias DASL.CID 7 + alias MST.{CAR, Tree} 8 + 9 + defp new_tree, do: Tree.new(MST.Store.Memory.new()) 10 + defp val(s), do: CID.compute(s, :raw) 11 + 12 + describe "to_binary/2 and from_binary/2" do 13 + test "empty tree round-trips" do 14 + tree = new_tree() 15 + assert {:ok, binary} = CAR.to_binary(tree) 16 + assert is_binary(binary) 17 + assert {:ok, tree2} = CAR.from_binary(binary) 18 + assert {:ok, []} = Tree.to_list(tree2) 19 + end 20 + 21 + test "single-key tree round-trips" do 22 + v = val("data") 23 + {:ok, tree} = Tree.put(new_tree(), "col/key", v) 24 + assert {:ok, binary} = CAR.to_binary(tree) 25 + assert {:ok, tree2} = CAR.from_binary(binary) 26 + assert {:ok, ^v} = Tree.get(tree2, "col/key") 27 + end 28 + 29 + test "multi-key tree round-trips with all keys intact" do 30 + pairs = for i <- 1..5, do: {"col/key#{i}", val("v#{i}")} 31 + 32 + tree = 33 + Enum.reduce(pairs, new_tree(), fn {k, v}, acc -> 34 + {:ok, t} = Tree.put(acc, k, v) 35 + t 36 + end) 37 + 38 + assert {:ok, binary} = CAR.to_binary(tree) 39 + assert {:ok, tree2} = CAR.from_binary(binary) 40 + 41 + for {k, v} <- pairs do 42 + assert {:ok, ^v} = Tree.get(tree2, k) 43 + end 44 + end 45 + 46 + test "round-trip preserves root CID" do 47 + v = val("data") 48 + {:ok, tree} = Tree.put(new_tree(), "col/key", v) 49 + assert {:ok, binary} = CAR.to_binary(tree) 50 + assert {:ok, tree2} = CAR.from_binary(binary) 51 + assert tree.root == tree2.root 52 + end 53 + 54 + test "round-trip preserves sorted order" do 55 + v = val("v") 56 + keys = ["col/z", "col/a", "col/m", "col/b"] 57 + 58 + tree = 59 + Enum.reduce(keys, new_tree(), fn k, acc -> 60 + {:ok, t} = Tree.put(acc, k, v) 61 + t 62 + end) 63 + 64 + assert {:ok, binary} = CAR.to_binary(tree) 65 + assert {:ok, tree2} = CAR.from_binary(binary) 66 + assert {:ok, pairs} = Tree.to_list(tree2) 67 + result_keys = Enum.map(pairs, &elem(&1, 0)) 68 + assert result_keys == Enum.sort(keys) 69 + end 70 + end 71 + 72 + describe "from_binary/2 error handling" do 73 + test "returns error for invalid binary" do 74 + assert {:error, _, _} = CAR.from_binary(<<0xFF, 0xFF, 0xFF>>) 75 + end 76 + 77 + test "returns error for empty binary" do 78 + assert {:error, _, _} = CAR.from_binary(<<>>) 79 + end 80 + end 81 + 82 + describe "from_car/1" do 83 + test "empty tree round-trips via struct" do 84 + tree = new_tree() 85 + assert {:ok, binary} = CAR.to_binary(tree) 86 + assert {:ok, car} = DASL.CAR.decode(binary) 87 + assert {:ok, tree2} = CAR.from_car(car) 88 + assert {:ok, []} = Tree.to_list(tree2) 89 + end 90 + 91 + test "single-key tree round-trips via struct" do 92 + v = val("data") 93 + {:ok, tree} = Tree.put(new_tree(), "col/key", v) 94 + assert {:ok, binary} = CAR.to_binary(tree) 95 + assert {:ok, car} = DASL.CAR.decode(binary) 96 + assert {:ok, tree2} = CAR.from_car(car) 97 + assert {:ok, ^v} = Tree.get(tree2, "col/key") 98 + end 99 + 100 + test "multi-key tree round-trips via struct with all keys intact" do 101 + pairs = for i <- 1..5, do: {"col/key#{i}", val("v#{i}")} 102 + 103 + tree = 104 + Enum.reduce(pairs, new_tree(), fn {k, v}, acc -> 105 + {:ok, t} = Tree.put(acc, k, v) 106 + t 107 + end) 108 + 109 + assert {:ok, binary} = CAR.to_binary(tree) 110 + assert {:ok, car} = DASL.CAR.decode(binary) 111 + assert {:ok, tree2} = CAR.from_car(car) 112 + 113 + for {k, v} <- pairs do 114 + assert {:ok, ^v} = Tree.get(tree2, k) 115 + end 116 + end 117 + 118 + test "preserves root CID" do 119 + v = val("data") 120 + {:ok, tree} = Tree.put(new_tree(), "col/key", v) 121 + assert {:ok, binary} = CAR.to_binary(tree) 122 + assert {:ok, car} = DASL.CAR.decode(binary) 123 + assert {:ok, tree2} = CAR.from_car(car) 124 + assert tree.root == tree2.root 125 + end 126 + 127 + test "struct with no roots returns empty tree" do 128 + car = %DASL.CAR{version: 1, roots: [], blocks: %{}} 129 + assert {:ok, tree} = CAR.from_car(car) 130 + assert {:ok, []} = Tree.to_list(tree) 131 + end 132 + end 133 + 134 + describe "from_stream/2" do 135 + test "stream round-trip matches binary round-trip" do 136 + v = val("v") 137 + {:ok, tree} = Tree.put(new_tree(), "col/a", v) 138 + {:ok, binary} = CAR.to_binary(tree) 139 + 140 + # Stream the binary as a single chunk 141 + stream = 142 + Stream.unfold(binary, fn 143 + <<>> -> nil 144 + bin -> {bin, <<>>} 145 + end) 146 + 147 + assert {:ok, tree_s} = CAR.from_stream(stream) 148 + assert tree.root == tree_s.root 149 + end 150 + 151 + test "handles multi-chunk stream" do 152 + v = val("v") 153 + {:ok, tree} = Tree.put(new_tree(), "col/a", v) 154 + {:ok, binary} = CAR.to_binary(tree) 155 + 156 + # Split into small 4-byte chunks 157 + chunks = 158 + binary 159 + |> :binary.bin_to_list() 160 + |> Enum.chunk_every(4) 161 + |> Enum.map(&:binary.list_to_bin/1) 162 + 163 + assert {:ok, tree_s} = CAR.from_stream(chunks) 164 + assert tree.root == tree_s.root 165 + end 166 + end 167 + 168 + describe "to_stream/1" do 169 + test "first item is the header" do 170 + v = val("v") 171 + {:ok, tree} = Tree.put(new_tree(), "col/a", v) 172 + items = CAR.to_stream(tree) |> Enum.to_list() 173 + assert [{:header, 1, [root]}] = Enum.take(items, 1) 174 + assert root == tree.root 175 + end 176 + 177 + test "subsequent items are blocks" do 178 + v = val("v") 179 + {:ok, tree} = Tree.put(new_tree(), "col/a", v) 180 + items = CAR.to_stream(tree) |> Enum.to_list() 181 + blocks = Enum.drop(items, 1) 182 + assert Enum.all?(blocks, &match?({:block, _, _}, &1)) 183 + end 184 + 185 + test "stream contains root block" do 186 + v = val("v") 187 + {:ok, tree} = Tree.put(new_tree(), "col/a", v) 188 + items = CAR.to_stream(tree) |> Enum.to_list() 189 + block_cids = for {:block, cid, _} <- items, do: cid 190 + assert tree.root in block_cids 191 + end 192 + end 193 + end
+115
test/mst/diff_test.exs
··· 1 + defmodule MST.DiffTest do 2 + use ExUnit.Case, async: true 3 + 4 + doctest MST.Diff 5 + 6 + alias DASL.CID 7 + alias MST.{Diff, Tree} 8 + 9 + defp new_tree, do: Tree.new(MST.Store.Memory.new()) 10 + defp val(s), do: CID.compute(s, :raw) 11 + 12 + describe "compute/2" do 13 + test "two empty trees produce empty diff" do 14 + assert {:ok, diff} = Diff.compute(new_tree(), new_tree()) 15 + assert MapSet.size(diff.created_nodes) == 0 16 + assert MapSet.size(diff.deleted_nodes) == 0 17 + assert diff.record_ops == [] 18 + end 19 + 20 + test "empty → non-empty: all keys are creates" do 21 + v = val("v") 22 + {:ok, tree_b} = Tree.put(new_tree(), "col/a", v) 23 + assert {:ok, diff} = Diff.compute(new_tree(), tree_b) 24 + assert length(diff.record_ops) == 1 25 + op = hd(diff.record_ops) 26 + assert op.key == "col/a" 27 + assert op.old_value == nil 28 + assert op.new_value == v 29 + end 30 + 31 + test "non-empty → empty: all keys are deletes" do 32 + v = val("v") 33 + {:ok, tree_a} = Tree.put(new_tree(), "col/a", v) 34 + assert {:ok, diff} = Diff.compute(tree_a, new_tree()) 35 + assert length(diff.record_ops) == 1 36 + op = hd(diff.record_ops) 37 + assert op.key == "col/a" 38 + assert op.old_value == v 39 + assert op.new_value == nil 40 + end 41 + 42 + test "identical trees produce empty diff" do 43 + v = val("v") 44 + {:ok, tree} = Tree.put(new_tree(), "col/a", v) 45 + assert {:ok, diff} = Diff.compute(tree, tree) 46 + assert diff.record_ops == [] 47 + assert MapSet.size(diff.created_nodes) == 0 48 + assert MapSet.size(diff.deleted_nodes) == 0 49 + end 50 + 51 + test "update: same key, different value" do 52 + v1 = val("v1") 53 + v2 = val("v2") 54 + {:ok, tree_a} = Tree.put(new_tree(), "col/a", v1) 55 + {:ok, tree_b} = Tree.put(new_tree(), "col/a", v2) 56 + assert {:ok, diff} = Diff.compute(tree_a, tree_b) 57 + assert length(diff.record_ops) == 1 58 + op = hd(diff.record_ops) 59 + assert op.old_value == v1 60 + assert op.new_value == v2 61 + end 62 + 63 + test "no-op: same key, same value, different surrounding context" do 64 + v = val("v") 65 + v2 = val("v2") 66 + {:ok, tree_a} = Tree.put(new_tree(), "col/a", v) 67 + {:ok, tree_a} = Tree.put(tree_a, "col/b", v2) 68 + {:ok, tree_b} = Tree.put(new_tree(), "col/a", v) 69 + {:ok, tree_b} = Tree.put(tree_b, "col/c", v2) 70 + assert {:ok, diff} = Diff.compute(tree_a, tree_b) 71 + keys = Enum.map(diff.record_ops, & &1.key) 72 + refute "col/a" in keys 73 + assert "col/b" in keys 74 + assert "col/c" in keys 75 + end 76 + 77 + test "record_ops are sorted by key" do 78 + v = val("v") 79 + 80 + {:ok, tree_b} = 81 + Enum.reduce(["col/z", "col/a", "col/m"], new_tree(), fn k, acc -> 82 + {:ok, t} = Tree.put(acc, k, v) 83 + t 84 + end) 85 + |> then(&{:ok, &1}) 86 + 87 + assert {:ok, diff} = Diff.compute(new_tree(), tree_b) 88 + keys = Enum.map(diff.record_ops, & &1.key) 89 + assert keys == Enum.sort(keys) 90 + end 91 + 92 + test "created_nodes and deleted_nodes are non-overlapping for insert" do 93 + v = val("v") 94 + {:ok, tree_b} = Tree.put(new_tree(), "col/a", v) 95 + assert {:ok, diff} = Diff.compute(new_tree(), tree_b) 96 + assert MapSet.disjoint?(diff.created_nodes, diff.deleted_nodes) 97 + end 98 + 99 + test "multi-key add and remove" do 100 + v = val("v") 101 + va = val("va") 102 + 103 + {:ok, base} = Tree.put(new_tree(), "col/keep", v) 104 + {:ok, tree_a} = Tree.put(base, "col/remove", v) 105 + {:ok, tree_b} = Tree.put(base, "col/add", va) 106 + 107 + assert {:ok, diff} = Diff.compute(tree_a, tree_b) 108 + 109 + op_keys = Enum.map(diff.record_ops, & &1.key) |> MapSet.new() 110 + assert MapSet.member?(op_keys, "col/remove") 111 + assert MapSet.member?(op_keys, "col/add") 112 + refute MapSet.member?(op_keys, "col/keep") 113 + end 114 + end 115 + end
+173
test/mst/fixtures_test.exs
··· 1 + defmodule MST.FixturesTest do 2 + @moduledoc """ 3 + Validates MST behaviour against the exhaustive mst-test-suite fixtures. 4 + 5 + 128 CAR files (MSTs 0–127) are loaded once at compile time. 16,384 JSON diff 6 + test vectors (all pairwise combinations) are used to verify: 7 + 8 + - `MST.Diff.compute/2` produces the correct `created_nodes` and 9 + `deleted_nodes` sets 10 + - Record operations (create / update / delete) match the expected `record_ops` 11 + 12 + Tag: `:slow` — run with `mix test` (included by default). 13 + To exclude: `mix test --exclude slow` 14 + """ 15 + 16 + use ExUnit.Case, async: true 17 + 18 + @fixture_root Path.join([__DIR__, "..", "fixtures", "mst-test-suite"]) 19 + @cars_dir Path.join(@fixture_root, "cars/exhaustive") 20 + @diff_dir Path.join(@fixture_root, "tests/diff/exhaustive") 21 + 22 + # --------------------------------------------------------------------------- 23 + # Load all 128 CAR files at compile time 24 + # --------------------------------------------------------------------------- 25 + 26 + # %{0 => %MST.Tree{}, 1 => %MST.Tree{}, ...} 27 + @trees Enum.reduce(0..127, %{}, fn i, acc -> 28 + name = "exhaustive_#{String.pad_leading("#{i}", 3, "0")}.car" 29 + path = Path.join(@cars_dir, name) 30 + binary = File.read!(path) 31 + {:ok, tree} = MST.CAR.from_binary(binary) 32 + Map.put(acc, i, tree) 33 + end) 34 + 35 + # --------------------------------------------------------------------------- 36 + # CAR loading sanity checks 37 + # --------------------------------------------------------------------------- 38 + 39 + describe "CAR loading" do 40 + test "all 128 CAR files load successfully" do 41 + assert map_size(@trees) == 128 42 + end 43 + 44 + test "MST 0 (empty) loads as an empty tree (no leaf keys)" do 45 + tree = @trees[0] 46 + assert {:ok, []} = MST.Tree.to_list(tree) 47 + end 48 + 49 + test "MST 127 (all 7 keys) loads with 7 leaf entries" do 50 + tree = @trees[127] 51 + assert {:ok, pairs} = MST.Tree.to_list(tree) 52 + assert length(pairs) == 7 53 + end 54 + 55 + test "MST root CIDs are stable (decode twice, same root)" do 56 + # Pick a few indices to spot-check 57 + for i <- [1, 63, 64, 127] do 58 + path = Path.join(@cars_dir, "exhaustive_#{String.pad_leading("#{i}", 3, "0")}.car") 59 + binary = File.read!(path) 60 + {:ok, tree1} = MST.CAR.from_binary(binary) 61 + {:ok, tree2} = MST.CAR.from_binary(binary) 62 + assert tree1.root == tree2.root, "Root mismatch for MST #{i}" 63 + end 64 + end 65 + end 66 + 67 + # --------------------------------------------------------------------------- 68 + # Diff fixtures (16,384 test vectors) 69 + # --------------------------------------------------------------------------- 70 + 71 + describe "diff fixtures" do 72 + # Run every JSON fixture file as a separate test case. 73 + for path <- Path.wildcard(Path.join(@diff_dir, "*.json")) do 74 + @path path 75 + 76 + @tag :slow 77 + test Path.basename(@path, ".json") do 78 + run_diff_fixture(@path) 79 + end 80 + end 81 + end 82 + 83 + # --------------------------------------------------------------------------- 84 + # Fixture runner 85 + # --------------------------------------------------------------------------- 86 + 87 + defp run_diff_fixture(path) do 88 + fixture = path |> File.read!() |> JSON.decode!() 89 + 90 + # Parse indices from input paths like "./cars/exhaustive/exhaustive_042.car" 91 + idx_a = parse_car_index(fixture["inputs"]["mst_a"]) 92 + idx_b = parse_car_index(fixture["inputs"]["mst_b"]) 93 + 94 + tree_a = @trees[idx_a] 95 + tree_b = @trees[idx_b] 96 + 97 + expected = fixture["results"] 98 + 99 + assert {:ok, diff} = MST.Diff.compute(tree_a, tree_b) 100 + 101 + # --- created_nodes --- 102 + expected_created = parse_cid_list(expected["created_nodes"]) 103 + 104 + actual_created = 105 + diff.created_nodes |> MapSet.to_list() |> Enum.map(&DASL.CID.encode/1) |> MapSet.new() 106 + 107 + assert actual_created == expected_created, 108 + "created_nodes mismatch for #{Path.basename(path)}\n" <> 109 + " expected: #{inspect(MapSet.to_list(expected_created))}\n" <> 110 + " got: #{inspect(MapSet.to_list(actual_created))}" 111 + 112 + # --- deleted_nodes --- 113 + expected_deleted = parse_cid_list(expected["deleted_nodes"]) 114 + 115 + actual_deleted = 116 + diff.deleted_nodes |> MapSet.to_list() |> Enum.map(&DASL.CID.encode/1) |> MapSet.new() 117 + 118 + assert actual_deleted == expected_deleted, 119 + "deleted_nodes mismatch for #{Path.basename(path)}\n" <> 120 + " expected: #{inspect(MapSet.to_list(expected_deleted))}\n" <> 121 + " got: #{inspect(MapSet.to_list(actual_deleted))}" 122 + 123 + # --- record_ops --- 124 + expected_ops = parse_record_ops(expected["record_ops"]) 125 + actual_ops = format_record_ops(diff.record_ops) 126 + 127 + assert actual_ops == expected_ops, 128 + "record_ops mismatch for #{Path.basename(path)}\n" <> 129 + " expected: #{inspect(expected_ops)}\n" <> 130 + " got: #{inspect(actual_ops)}" 131 + end 132 + 133 + # --------------------------------------------------------------------------- 134 + # Parsing helpers 135 + # --------------------------------------------------------------------------- 136 + 137 + defp parse_car_index(path_str) do 138 + # "./cars/exhaustive/exhaustive_042.car" → 42 139 + path_str 140 + |> Path.basename(".car") 141 + |> String.replace_prefix("exhaustive_", "") 142 + |> String.to_integer() 143 + end 144 + 145 + defp parse_cid_list(list) when is_list(list), do: MapSet.new(list) 146 + defp parse_cid_list(nil), do: MapSet.new() 147 + 148 + defp parse_record_ops(ops) when is_list(ops) do 149 + ops 150 + |> Enum.map(fn op -> 151 + %{ 152 + key: op["rpath"], 153 + old_value: op["old_value"], 154 + new_value: op["new_value"] 155 + } 156 + end) 157 + |> Enum.sort_by(& &1.key) 158 + end 159 + 160 + defp parse_record_ops(nil), do: [] 161 + 162 + defp format_record_ops(ops) do 163 + ops 164 + |> Enum.map(fn op -> 165 + %{ 166 + key: op.key, 167 + old_value: if(op.old_value, do: DASL.CID.encode(op.old_value), else: nil), 168 + new_value: if(op.new_value, do: DASL.CID.encode(op.new_value), else: nil) 169 + } 170 + end) 171 + |> Enum.sort_by(& &1.key) 172 + end 173 + end
+53
test/mst/height_test.exs
··· 1 + defmodule MST.HeightTest do 2 + use ExUnit.Case, async: true 3 + 4 + doctest MST.Height 5 + 6 + describe "for_key/1" do 7 + # Spec examples from https://atproto.com/specs/repository#mst-structure 8 + test "spec example: depth 0" do 9 + assert MST.Height.for_key("2653ae71") == 0 10 + end 11 + 12 + test "spec example: depth 1" do 13 + assert MST.Height.for_key("blue") == 1 14 + end 15 + 16 + test "spec example: depth 4" do 17 + assert MST.Height.for_key("app.bsky.feed.post/454397e440ec") == 4 18 + end 19 + 20 + test "spec example: depth 8" do 21 + assert MST.Height.for_key("app.bsky.feed.post/9adeb165882c") == 8 22 + end 23 + 24 + test "returns non-negative integer" do 25 + assert MST.Height.for_key("anything") >= 0 26 + end 27 + 28 + test "empty binary returns non-negative integer" do 29 + # SHA-256 of empty string is a known value; just verify it doesn't crash 30 + assert MST.Height.for_key("") >= 0 31 + end 32 + 33 + test "depth 0 is the most common result" do 34 + # ~75% of random keys should be depth 0 35 + keys = for i <- 1..100, do: "test/key#{i}" 36 + depths = Enum.map(keys, &MST.Height.for_key/1) 37 + depth_zero_count = Enum.count(depths, &(&1 == 0)) 38 + # Expect at least 50 out of 100 to be depth 0 (loose bound) 39 + assert depth_zero_count > 50 40 + end 41 + 42 + test "depth is consistent for the same key" do 43 + key = "some/key" 44 + assert MST.Height.for_key(key) == MST.Height.for_key(key) 45 + end 46 + 47 + test "different keys generally produce different depths" do 48 + d0 = MST.Height.for_key("2653ae71") 49 + d1 = MST.Height.for_key("blue") 50 + assert d0 != d1 51 + end 52 + end 53 + end
+199
test/mst/node_test.exs
··· 1 + defmodule MST.NodeTest do 2 + use ExUnit.Case, async: true 3 + 4 + doctest MST.Node 5 + 6 + alias DASL.CID 7 + alias MST.Node 8 + alias MST.Node.Entry 9 + 10 + # Shared fixtures 11 + @cid_a CID.compute("value_a", :raw) 12 + @cid_b CID.compute("value_b", :raw) 13 + @cid_c CID.compute("value_c", :raw) 14 + 15 + describe "empty/0" do 16 + test "returns an empty node" do 17 + assert %Node{left: nil, entries: []} = Node.empty() 18 + end 19 + end 20 + 21 + describe "encode/1 and decode/1 round-trip" do 22 + test "empty node" do 23 + node = Node.empty() 24 + assert {:ok, bytes} = Node.encode(node) 25 + assert {:ok, ^node} = Node.decode(bytes) 26 + end 27 + 28 + test "node with single entry, no subtrees" do 29 + entry = %Entry{prefix_len: 0, key_suffix: "col/key", value: @cid_a, right: nil} 30 + node = %Node{left: nil, entries: [entry]} 31 + 32 + assert {:ok, bytes} = Node.encode(node) 33 + assert {:ok, decoded} = Node.decode(bytes) 34 + assert decoded.left == nil 35 + assert length(decoded.entries) == 1 36 + assert hd(decoded.entries).key_suffix == "col/key" 37 + assert hd(decoded.entries).value == @cid_a 38 + assert hd(decoded.entries).right == nil 39 + end 40 + 41 + test "node with left subtree pointer" do 42 + entry = %Entry{prefix_len: 0, key_suffix: "col/key", value: @cid_a, right: nil} 43 + node = %Node{left: @cid_b, entries: [entry]} 44 + 45 + assert {:ok, bytes} = Node.encode(node) 46 + assert {:ok, decoded} = Node.decode(bytes) 47 + assert decoded.left == @cid_b 48 + end 49 + 50 + test "node with right subtree pointer" do 51 + entry = %Entry{prefix_len: 0, key_suffix: "col/key", value: @cid_a, right: @cid_b} 52 + node = %Node{left: nil, entries: [entry]} 53 + 54 + assert {:ok, bytes} = Node.encode(node) 55 + assert {:ok, decoded} = Node.decode(bytes) 56 + assert hd(decoded.entries).right == @cid_b 57 + end 58 + 59 + test "node with multiple entries and prefix compression" do 60 + # "app.bsky.feed.post/" is 19 bytes, so prefix_len for bbb/ccc is 19 61 + entries = [ 62 + %Entry{prefix_len: 0, key_suffix: "app.bsky.feed.post/aaa", value: @cid_a, right: nil}, 63 + %Entry{prefix_len: 19, key_suffix: "bbb", value: @cid_b, right: nil}, 64 + %Entry{prefix_len: 19, key_suffix: "ccc", value: @cid_c, right: nil} 65 + ] 66 + 67 + node = %Node{left: nil, entries: entries} 68 + 69 + assert {:ok, bytes} = Node.encode(node) 70 + assert {:ok, decoded} = Node.decode(bytes) 71 + 72 + assert Node.keys(decoded) == [ 73 + "app.bsky.feed.post/aaa", 74 + "app.bsky.feed.post/bbb", 75 + "app.bsky.feed.post/ccc" 76 + ] 77 + end 78 + 79 + test "CID is stable across encode → decode → re-encode" do 80 + entry = %Entry{prefix_len: 0, key_suffix: "col/key", value: @cid_a, right: nil} 81 + node = %Node{left: nil, entries: [entry]} 82 + 83 + assert {:ok, bytes1} = Node.encode(node) 84 + assert {:ok, decoded} = Node.decode(bytes1) 85 + assert {:ok, bytes2} = Node.encode(decoded) 86 + assert bytes1 == bytes2 87 + end 88 + 89 + test "explicit null for nil left is required for determinism" do 90 + # Two encodings of a node with left=nil must produce the same bytes 91 + node1 = Node.empty() 92 + node2 = Node.empty() 93 + assert {:ok, bytes1} = Node.encode(node1) 94 + assert {:ok, bytes2} = Node.encode(node2) 95 + assert bytes1 == bytes2 96 + end 97 + end 98 + 99 + describe "cid/1" do 100 + test "returns a :drisl codec CID" do 101 + assert {:ok, cid} = Node.cid(Node.empty()) 102 + assert cid.codec == :drisl 103 + end 104 + 105 + test "same node always produces the same CID" do 106 + node = Node.empty() 107 + assert {:ok, cid1} = Node.cid(node) 108 + assert {:ok, cid2} = Node.cid(node) 109 + assert cid1 == cid2 110 + end 111 + 112 + test "different nodes produce different CIDs" do 113 + node_a = Node.empty() 114 + 115 + entry = %Entry{prefix_len: 0, key_suffix: "col/key", value: @cid_a, right: nil} 116 + node_b = %Node{left: nil, entries: [entry]} 117 + 118 + assert {:ok, cid_a} = Node.cid(node_a) 119 + assert {:ok, cid_b} = Node.cid(node_b) 120 + assert cid_a != cid_b 121 + end 122 + end 123 + 124 + describe "keys/1" do 125 + test "empty node returns empty list" do 126 + assert Node.keys(Node.empty()) == [] 127 + end 128 + 129 + test "reconstructs full keys from prefix-compressed entries" do 130 + entries = [ 131 + %Entry{prefix_len: 0, key_suffix: "foo/aaa", value: @cid_a, right: nil}, 132 + %Entry{prefix_len: 4, key_suffix: "bbb", value: @cid_b, right: nil}, 133 + %Entry{prefix_len: 4, key_suffix: "ccc", value: @cid_c, right: nil} 134 + ] 135 + 136 + node = %Node{left: nil, entries: entries} 137 + assert Node.keys(node) == ["foo/aaa", "foo/bbb", "foo/ccc"] 138 + end 139 + 140 + test "first entry always has prefix_len 0" do 141 + entry = %Entry{prefix_len: 0, key_suffix: "full/key", value: @cid_a, right: nil} 142 + node = %Node{left: nil, entries: [entry]} 143 + assert Node.keys(node) == ["full/key"] 144 + end 145 + end 146 + 147 + describe "compress_entries/1" do 148 + test "single entry has prefix_len 0" do 149 + entries = Node.compress_entries([{"col/key", @cid_a, nil}]) 150 + assert hd(entries).prefix_len == 0 151 + assert hd(entries).key_suffix == "col/key" 152 + end 153 + 154 + test "adjacent entries with common prefix are compressed" do 155 + # "app.bsky.feed.post/" = 19 bytes shared; then 'a' vs 'b' diverge 156 + entries = 157 + Node.compress_entries([ 158 + {"app.bsky.feed.post/aaa", @cid_a, nil}, 159 + {"app.bsky.feed.post/bbb", @cid_b, nil} 160 + ]) 161 + 162 + [e1, e2] = entries 163 + assert e1.prefix_len == 0 164 + assert e1.key_suffix == "app.bsky.feed.post/aaa" 165 + assert e2.prefix_len == 19 166 + assert e2.key_suffix == "bbb" 167 + end 168 + 169 + test "no shared prefix means prefix_len stays 0" do 170 + entries = Node.compress_entries([{"aaa/x", @cid_a, nil}, {"zzz/y", @cid_b, nil}]) 171 + assert Enum.at(entries, 1).prefix_len == 0 172 + end 173 + 174 + test "compress then expand is identity" do 175 + keys = ["col/aaa", "col/bbb", "col/ccc"] 176 + triples = Enum.map(keys, fn k -> {k, @cid_a, nil} end) 177 + entries = Node.compress_entries(triples) 178 + node = %Node{left: nil, entries: entries} 179 + assert Node.keys(node) == keys 180 + end 181 + end 182 + 183 + describe "decode/1 error cases" do 184 + test "returns error for non-CBOR bytes" do 185 + assert {:error, :decode, _} = Node.decode(<<0xFF, 0xFF, 0xFF>>) 186 + end 187 + 188 + test "returns error for trailing bytes" do 189 + {:ok, bytes} = Node.encode(Node.empty()) 190 + assert {:error, :decode, :trailing_bytes} = Node.decode(bytes <> <<0x00>>) 191 + end 192 + 193 + test "returns error for invalid structure (not a map)" do 194 + # CBOR-encode a plain integer 195 + {:ok, not_a_map} = DASL.DRISL.encode(42) 196 + assert {:error, :decode, _} = Node.decode(not_a_map) 197 + end 198 + end 199 + end
+88
test/mst/store/memory_test.exs
··· 1 + defmodule MST.Store.MemoryTest do 2 + use ExUnit.Case, async: true 3 + 4 + doctest MST.Store.Memory 5 + 6 + alias DASL.CID 7 + alias MST.{Node, Store} 8 + 9 + # Shared fixture 10 + @node Node.empty() 11 + @cid with {:ok, c} <- Node.cid(@node), do: c 12 + 13 + describe "new/0" do 14 + test "returns a {module, state} pair" do 15 + assert {MST.Store.Memory, %{}} = MST.Store.Memory.new() 16 + end 17 + end 18 + 19 + describe "get/2" do 20 + test "returns :not_found for missing CID" do 21 + store = MST.Store.Memory.new() 22 + assert {:error, :not_found} = Store.get(store, @cid) 23 + end 24 + 25 + test "returns node after put" do 26 + store = MST.Store.Memory.new() |> Store.put(@cid, @node) 27 + assert {:ok, @node} = Store.get(store, @cid) 28 + end 29 + end 30 + 31 + describe "put/3" do 32 + test "returns updated store" do 33 + store = MST.Store.Memory.new() 34 + store2 = Store.put(store, @cid, @node) 35 + assert Store.has?(store2, @cid) 36 + end 37 + 38 + test "original store is unaffected (immutable)" do 39 + store = MST.Store.Memory.new() 40 + _store2 = Store.put(store, @cid, @node) 41 + refute Store.has?(store, @cid) 42 + end 43 + end 44 + 45 + describe "has?/2" do 46 + test "false for missing CID" do 47 + store = MST.Store.Memory.new() 48 + refute Store.has?(store, @cid) 49 + end 50 + 51 + test "true after put" do 52 + store = MST.Store.Memory.new() |> Store.put(@cid, @node) 53 + assert Store.has?(store, @cid) 54 + end 55 + end 56 + 57 + describe "cids/1" do 58 + test "empty store returns empty list" do 59 + assert Store.cids(MST.Store.Memory.new()) == [] 60 + end 61 + 62 + test "returns all inserted CIDs" do 63 + other_node = %Node{ 64 + left: nil, 65 + entries: [ 66 + %MST.Node.Entry{ 67 + prefix_len: 0, 68 + key_suffix: "x", 69 + value: CID.compute("v"), 70 + right: nil 71 + } 72 + ] 73 + } 74 + 75 + {:ok, other_cid} = Node.cid(other_node) 76 + 77 + store = 78 + MST.Store.Memory.new() 79 + |> Store.put(@cid, @node) 80 + |> Store.put(other_cid, other_node) 81 + 82 + cids = Store.cids(store) 83 + assert @cid in cids 84 + assert other_cid in cids 85 + assert length(cids) == 2 86 + end 87 + end 88 + end
+358
test/mst/tree_test.exs
··· 1 + defmodule MST.TreeTest do 2 + use ExUnit.Case, async: true 3 + 4 + doctest MST.Tree 5 + 6 + alias DASL.CID 7 + alias MST.Tree 8 + 9 + # Helpers 10 + defp new_tree, do: Tree.new(MST.Store.Memory.new()) 11 + defp val(s), do: CID.compute(s, :raw) 12 + 13 + describe "new/1" do 14 + test "creates an empty tree" do 15 + tree = new_tree() 16 + assert tree.root == nil 17 + assert {:ok, []} = Tree.to_list(tree) 18 + end 19 + end 20 + 21 + describe "put/3 and get/3" do 22 + test "insert and retrieve a single key" do 23 + v = val("data") 24 + assert {:ok, tree} = Tree.put(new_tree(), "col/key", v) 25 + assert {:ok, ^v} = Tree.get(tree, "col/key") 26 + end 27 + 28 + test "get returns :not_found for missing key" do 29 + tree = new_tree() 30 + assert {:error, :not_found} = Tree.get(tree, "col/missing") 31 + end 32 + 33 + test "insert multiple keys and retrieve each" do 34 + pairs = for i <- 1..10, do: {"col/k#{String.pad_leading("#{i}", 3, "0")}", val("v#{i}")} 35 + 36 + tree = 37 + Enum.reduce(pairs, new_tree(), fn {k, v}, acc -> 38 + {:ok, t} = Tree.put(acc, k, v) 39 + t 40 + end) 41 + 42 + for {k, v} <- pairs do 43 + assert {:ok, ^v} = Tree.get(tree, k) 44 + end 45 + end 46 + 47 + test "overwrite existing key updates value" do 48 + v1 = val("first") 49 + v2 = val("second") 50 + {:ok, tree} = Tree.put(new_tree(), "col/key", v1) 51 + {:ok, tree} = Tree.put(tree, "col/key", v2) 52 + assert {:ok, ^v2} = Tree.get(tree, "col/key") 53 + end 54 + 55 + test "insert is immutable (old tree unaffected)" do 56 + v = val("data") 57 + tree0 = new_tree() 58 + {:ok, tree1} = Tree.put(tree0, "col/key", v) 59 + assert {:error, :not_found} = Tree.get(tree0, "col/key") 60 + assert {:ok, ^v} = Tree.get(tree1, "col/key") 61 + end 62 + 63 + test "keys at different heights coexist correctly" do 64 + # "blue" is depth 1, "2653ae71" is depth 0 65 + v1 = val("v1") 66 + v2 = val("v2") 67 + {:ok, tree} = Tree.put(new_tree(), "blue", v1) 68 + {:ok, tree} = Tree.put(tree, "2653ae71", v2) 69 + assert {:ok, ^v1} = Tree.get(tree, "blue") 70 + assert {:ok, ^v2} = Tree.get(tree, "2653ae71") 71 + end 72 + end 73 + 74 + describe "delete/2" do 75 + test "delete returns :not_found for missing key" do 76 + assert {:error, :not_found} = Tree.delete(new_tree(), "col/missing") 77 + end 78 + 79 + test "delete removes a key" do 80 + v = val("data") 81 + {:ok, tree} = Tree.put(new_tree(), "col/key", v) 82 + {:ok, tree} = Tree.delete(tree, "col/key") 83 + assert {:error, :not_found} = Tree.get(tree, "col/key") 84 + end 85 + 86 + test "delete last key empties the tree" do 87 + v = val("data") 88 + {:ok, tree} = Tree.put(new_tree(), "col/key", v) 89 + {:ok, tree} = Tree.delete(tree, "col/key") 90 + assert {:ok, []} = Tree.to_list(tree) 91 + end 92 + 93 + test "delete one of several keys" do 94 + v = val("v") 95 + {:ok, tree} = Tree.put(new_tree(), "col/a", v) 96 + {:ok, tree} = Tree.put(tree, "col/b", v) 97 + {:ok, tree} = Tree.put(tree, "col/c", v) 98 + {:ok, tree} = Tree.delete(tree, "col/b") 99 + assert {:error, :not_found} = Tree.get(tree, "col/b") 100 + assert {:ok, ^v} = Tree.get(tree, "col/a") 101 + assert {:ok, ^v} = Tree.get(tree, "col/c") 102 + end 103 + 104 + test "delete is immutable (old tree unaffected)" do 105 + v = val("data") 106 + {:ok, tree1} = Tree.put(new_tree(), "col/key", v) 107 + {:ok, _tree2} = Tree.delete(tree1, "col/key") 108 + assert {:ok, ^v} = Tree.get(tree1, "col/key") 109 + end 110 + end 111 + 112 + describe "to_list/1" do 113 + test "empty tree returns empty list" do 114 + assert {:ok, []} = Tree.to_list(new_tree()) 115 + end 116 + 117 + test "returns keys in sorted order" do 118 + v = val("v") 119 + 120 + tree = 121 + ["col/z", "col/a", "col/m", "col/b"] 122 + |> Enum.reduce(new_tree(), fn k, acc -> 123 + {:ok, t} = Tree.put(acc, k, v) 124 + t 125 + end) 126 + 127 + assert {:ok, pairs} = Tree.to_list(tree) 128 + keys = Enum.map(pairs, &elem(&1, 0)) 129 + assert keys == Enum.sort(keys) 130 + assert keys == ["col/a", "col/b", "col/m", "col/z"] 131 + end 132 + 133 + test "values are correct for each key" do 134 + pairs = [{"col/a", val("va")}, {"col/b", val("vb")}, {"col/c", val("vc")}] 135 + 136 + tree = 137 + Enum.reduce(pairs, new_tree(), fn {k, v}, acc -> 138 + {:ok, t} = Tree.put(acc, k, v) 139 + t 140 + end) 141 + 142 + assert {:ok, result} = Tree.to_list(tree) 143 + assert result == Enum.sort_by(pairs, &elem(&1, 0)) 144 + end 145 + end 146 + 147 + describe "stream/1" do 148 + test "empty tree streams nothing" do 149 + assert [] = Tree.stream(new_tree()) |> Enum.to_list() 150 + end 151 + 152 + test "stream yields same pairs as to_list" do 153 + v = val("v") 154 + 155 + tree = 156 + ["col/c", "col/a", "col/b"] 157 + |> Enum.reduce(new_tree(), fn k, acc -> 158 + {:ok, t} = Tree.put(acc, k, v) 159 + t 160 + end) 161 + 162 + assert {:ok, list_pairs} = Tree.to_list(tree) 163 + assert Tree.stream(tree) |> Enum.to_list() == list_pairs 164 + end 165 + end 166 + 167 + describe "length/1" do 168 + test "empty tree has length 0" do 169 + assert {:ok, 0} = Tree.length(new_tree()) 170 + end 171 + 172 + test "size tracks insertions" do 173 + v = val("v") 174 + {:ok, t1} = Tree.put(new_tree(), "col/a", v) 175 + {:ok, t2} = Tree.put(t1, "col/b", v) 176 + assert {:ok, 1} = Tree.length(t1) 177 + assert {:ok, 2} = Tree.length(t2) 178 + end 179 + 180 + test "overwrite does not change size" do 181 + v = val("v") 182 + {:ok, tree} = Tree.put(new_tree(), "col/a", v) 183 + {:ok, tree} = Tree.put(tree, "col/a", val("v2")) 184 + assert {:ok, 1} = Tree.length(tree) 185 + end 186 + 187 + test "delete reduces size" do 188 + v = val("v") 189 + {:ok, tree} = Tree.put(new_tree(), "col/a", v) 190 + {:ok, tree} = Tree.put(tree, "col/b", v) 191 + {:ok, tree} = Tree.delete(tree, "col/a") 192 + assert {:ok, 1} = Tree.length(tree) 193 + end 194 + end 195 + 196 + describe "collect_blocks/1" do 197 + test "empty tree returns empty map" do 198 + assert {:ok, %{}} = Tree.collect_blocks(new_tree()) 199 + end 200 + 201 + test "non-empty tree returns at least one block" do 202 + v = val("v") 203 + {:ok, tree} = Tree.put(new_tree(), "col/a", v) 204 + assert {:ok, blocks} = Tree.collect_blocks(tree) 205 + assert map_size(blocks) >= 1 206 + assert Map.has_key?(blocks, tree.root) 207 + end 208 + 209 + test "all returned CIDs are :drisl codec" do 210 + v = val("v") 211 + {:ok, tree} = Tree.put(new_tree(), "col/a", v) 212 + assert {:ok, blocks} = Tree.collect_blocks(tree) 213 + 214 + for {cid, _bytes} <- blocks do 215 + assert cid.codec == :drisl 216 + end 217 + end 218 + end 219 + 220 + describe "determinism" do 221 + test "same keys/values in different insertion order produce the same root CID" do 222 + v = val("v") 223 + pairs = [{"col/a", v}, {"col/b", v}, {"col/c", v}] 224 + 225 + tree_forward = 226 + Enum.reduce(pairs, new_tree(), fn {k, vv}, acc -> 227 + {:ok, t} = Tree.put(acc, k, vv) 228 + t 229 + end) 230 + 231 + tree_reverse = 232 + Enum.reduce(Enum.reverse(pairs), new_tree(), fn {k, vv}, acc -> 233 + {:ok, t} = Tree.put(acc, k, vv) 234 + t 235 + end) 236 + 237 + assert tree_forward.root == tree_reverse.root 238 + end 239 + 240 + test "delete then re-insert produces the same root CID as never deleting" do 241 + v = val("v") 242 + {:ok, tree_ab} = Tree.put(new_tree(), "col/a", v) 243 + {:ok, tree_ab} = Tree.put(tree_ab, "col/b", v) 244 + 245 + {:ok, tree_a_only} = Tree.put(new_tree(), "col/a", v) 246 + {:ok, tree_a_back} = Tree.put(tree_a_only, "col/b", v) 247 + {:ok, tree_a_back} = Tree.delete(tree_a_back, "col/b") 248 + {:ok, tree_a_back} = Tree.put(tree_a_back, "col/b", v) 249 + 250 + assert tree_ab.root == tree_a_back.root 251 + end 252 + end 253 + 254 + describe "spec compliance (fixture CID matching)" do 255 + # The 7 fixture keys with known heights: 0, 1, 0, 2, 0, 1, 0. 256 + @fixture_keys ["k/00", "k/02", "k/04", "k/39", "k/40", "k/48", "k/49"] 257 + @fixture_values Map.new(@fixture_keys, fn key -> 258 + term = %{"$type" => "mst-test-data", "value_for" => key} 259 + {:ok, bytes} = DASL.DRISL.encode(term) 260 + {key, DASL.CID.compute(bytes, :drisl)} 261 + end) 262 + @cars_dir Path.join([__DIR__, "..", "fixtures", "mst-test-suite", "cars", "exhaustive"]) 263 + 264 + # Load all 128 fixture root CIDs at compile time. 265 + @fixture_roots Enum.reduce(0..127, %{}, fn i, acc -> 266 + name = "exhaustive_#{String.pad_leading("#{i}", 3, "0")}.car" 267 + {:ok, tree} = MST.CAR.from_binary(File.read!(Path.join(@cars_dir, name))) 268 + Map.put(acc, i, tree.root) 269 + end) 270 + 271 + for i <- 1..127 do 272 + @tag :slow 273 + test "MST #{i} built from scratch matches fixture root CID" do 274 + i = unquote(i) 275 + 276 + active_keys = 277 + @fixture_keys 278 + |> Enum.with_index() 279 + |> Enum.filter(fn {_, j} -> Bitwise.band(i, Bitwise.bsl(1, j)) != 0 end) 280 + |> Enum.map(&elem(&1, 0)) 281 + 282 + tree = 283 + Enum.reduce(active_keys, new_tree(), fn k, acc -> 284 + {:ok, t} = Tree.put(acc, k, @fixture_values[k]) 285 + t 286 + end) 287 + 288 + assert tree.root == @fixture_roots[i], 289 + "MST #{i} (keys: #{inspect(active_keys)}) root CID mismatch" 290 + end 291 + end 292 + 293 + for i <- 1..127 do 294 + @tag :slow 295 + test "MST #{i} survives put-all then delete-all cycle" do 296 + i = unquote(i) 297 + 298 + active_keys = 299 + @fixture_keys 300 + |> Enum.with_index() 301 + |> Enum.filter(fn {_, j} -> Bitwise.band(i, Bitwise.bsl(1, j)) != 0 end) 302 + |> Enum.map(&elem(&1, 0)) 303 + 304 + tree = 305 + Enum.reduce(active_keys, new_tree(), fn k, acc -> 306 + {:ok, t} = Tree.put(acc, k, @fixture_values[k]) 307 + t 308 + end) 309 + 310 + empty = 311 + Enum.reduce(active_keys, tree, fn k, acc -> 312 + {:ok, t} = Tree.delete(acc, k) 313 + t 314 + end) 315 + 316 + assert {:ok, []} = Tree.to_list(empty) 317 + assert empty.root == nil 318 + end 319 + end 320 + 321 + for i <- 1..127 do 322 + @tag :slow 323 + test "MST #{i} put+delete in every permutation produces same CID" do 324 + i = unquote(i) 325 + 326 + active_keys = 327 + @fixture_keys 328 + |> Enum.with_index() 329 + |> Enum.filter(fn {_, j} -> Bitwise.band(i, Bitwise.bsl(1, j)) != 0 end) 330 + |> Enum.map(&elem(&1, 0)) 331 + 332 + # Only test small permutations (up to 4 keys) to keep tests fast. 333 + if length(active_keys) <= 4 do 334 + roots = 335 + permutations(active_keys) 336 + |> Enum.map(fn perm -> 337 + Enum.reduce(perm, new_tree(), fn k, acc -> 338 + {:ok, t} = Tree.put(acc, k, @fixture_values[k]) 339 + t 340 + end) 341 + end) 342 + |> Enum.map(& &1.root) 343 + |> Enum.uniq() 344 + 345 + assert length(roots) == 1, 346 + "MST #{i}: different insertion orders produce different roots" 347 + end 348 + end 349 + end 350 + end 351 + 352 + # Generate all permutations of a list. 353 + defp permutations([]), do: [[]] 354 + 355 + defp permutations(list) do 356 + for elem <- list, rest <- permutations(list -- [elem]), do: [elem | rest] 357 + end 358 + end
+2 -5
test/mst_test.exs
··· 1 1 defmodule MSTTest do 2 - use ExUnit.Case 3 - doctest MST 2 + use ExUnit.Case, async: true 4 3 5 - test "greets the world" do 6 - assert MST.hello() == :world 7 - end 4 + doctest MST 8 5 end