An Elixir implementation of AT Protocol-flavoured Merkle Search Trees (MST)
1defmodule MST do
2 @moduledoc """
3 AT Protocol-flavoured Merkle Search Tree (MST) for Elixir.
4
5 An MST is a content-addressed, deterministic key/value tree where keys are
6 byte arrays and values are `DASL.CID` links. The tree structure is fully
7 determined by the current set of key/value pairs — equal content always
8 produces the same root CID, making it suitable for Merkle proofs and
9 efficient diffs.
10
11 This library implements the AT Protocol MST specification but is designed to
12 be generic: it makes no assumptions about repository structure, commit
13 objects, or AT-URI paths.
14
15 ## Quick start
16
17 store = MST.Store.Memory.new()
18 tree = MST.new(store)
19
20 val = DASL.CID.compute("my record")
21 {:ok, tree} = MST.put(tree, "collection/key", val)
22 {:ok, ^val} = MST.get(tree, "collection/key")
23
24 {:ok, tree} = MST.delete(tree, "collection/key")
25
26 ## Loading from a CAR file
27
28 {:ok, tree} = MST.from_car(File.read!("repo.car"))
29 {:ok, binary} = MST.to_car(tree)
30
31 ## Diffing two trees
32
33 {:ok, diff} = MST.diff(tree_a, tree_b)
34 # diff.record_ops — sorted list of MST.Diff.Op structs
35
36 ## Key depth
37
38 The MST height of a key is derived by SHA-256 hashing it and counting
39 leading zero bits divided by 2 (floor), giving a fanout of 4.
40
41 0 = MST.key_height("2653ae71")
42 1 = MST.key_height("blue")
43
44 Spec: https://atproto.com/specs/repository#mst-structure
45 """
46
47 alias MST.{CAR, Diff, Store, Tree}
48 alias DASL.CID
49
50 # ---------------------------------------------------------------------------
51 # Construction
52 # ---------------------------------------------------------------------------
53
54 @doc """
55 Returns a new empty tree backed by an `MST.Store.Memory`.
56
57 Pass an explicit store to use a different backend:
58
59 tree = MST.new(MST.Store.Memory.new())
60
61 ## Examples
62
63 iex> tree = MST.new()
64 iex> tree.root
65 nil
66
67 """
68 @spec new() :: Tree.t()
69 def new, do: Tree.new(Store.Memory.new())
70
71 @doc """
72 Returns a new empty tree backed by the given store.
73
74 ## Examples
75
76 iex> tree = MST.new(MST.Store.Memory.new())
77 iex> tree.root
78 nil
79
80 """
81 @spec new(Store.t()) :: Tree.t()
82 def new(store), do: Tree.new(store)
83
84 # ---------------------------------------------------------------------------
85 # Lookup / mutation
86 # ---------------------------------------------------------------------------
87
88 @doc """
89 Looks up `key` in the tree.
90
91 ## Examples
92
93 iex> tree = MST.new()
94 iex> MST.get(tree, "col/k")
95 {:error, :not_found}
96
97 """
98 @spec get(Tree.t(), binary()) :: {:ok, CID.t()} | {:error, :not_found} | {:error, atom()}
99 defdelegate get(tree, key), to: Tree
100
101 @doc """
102 Inserts or updates `key` → `value`. Returns `{:ok, new_tree}`.
103
104 ## Examples
105
106 iex> tree = MST.new()
107 iex> val = DASL.CID.compute("data")
108 iex> {:ok, tree} = MST.put(tree, "col/k", val)
109 iex> MST.get(tree, "col/k")
110 {:ok, val}
111
112 """
113 @spec put(Tree.t(), binary(), CID.t()) :: {:ok, Tree.t()} | {:error, atom()}
114 defdelegate put(tree, key, value), to: Tree
115
116 @doc """
117 Removes `key` from the tree. Returns `{:ok, new_tree}` or
118 `{:error, :not_found}`.
119
120 ## Examples
121
122 iex> tree = MST.new()
123 iex> val = DASL.CID.compute("data")
124 iex> {:ok, tree} = MST.put(tree, "col/k", val)
125 iex> {:ok, tree} = MST.delete(tree, "col/k")
126 iex> MST.get(tree, "col/k")
127 {:error, :not_found}
128
129 """
130 @spec delete(Tree.t(), binary()) :: {:ok, Tree.t()} | {:error, :not_found | atom()}
131 defdelegate delete(tree, key), to: Tree
132
133 @doc """
134 Returns all key-value pairs in sorted order.
135 """
136 @spec to_list(Tree.t()) :: {:ok, [{binary(), CID.t()}]} | {:error, atom()}
137 defdelegate to_list(tree), to: Tree
138
139 @doc """
140 Returns a lazy stream of `{key, value_cid}` pairs in sorted order.
141 """
142 @spec stream(Tree.t()) :: Enumerable.t()
143 defdelegate stream(tree), to: Tree
144
145 @doc """
146 Returns the number of key-value pairs in the tree.
147 """
148 @spec length(Tree.t()) :: {:ok, non_neg_integer()} | {:error, atom()}
149 defdelegate length(tree), to: Tree
150
151 # ---------------------------------------------------------------------------
152 # CAR I/O
153 # ---------------------------------------------------------------------------
154
155 @doc """
156 Loads an MST from a CAR-encoded binary or an already-decoded `DASL.CAR` struct.
157
158 When given a binary, it is decoded via `DASL.CAR.decode/2` first. When given
159 a `%DASL.CAR{}` struct the decoding step is skipped entirely, which avoids a
160 redundant encode/decode cycle when you already hold the struct in memory.
161
162 Accepts the same options as `DASL.CAR.decode/2` (`verify: boolean`) when
163 called with a binary; options are ignored for the struct variant.
164
165 ## Examples
166
167 iex> tree = MST.new()
168 iex> val = DASL.CID.compute("x")
169 iex> {:ok, tree} = MST.put(tree, "col/a", val)
170 iex> {:ok, bin} = MST.to_car(tree)
171 iex> {:ok, tree2} = MST.from_car(bin)
172 iex> MST.get(tree2, "col/a")
173 {:ok, val}
174
175 iex> tree = MST.new()
176 iex> val = DASL.CID.compute("x")
177 iex> {:ok, tree} = MST.put(tree, "col/a", val)
178 iex> {:ok, bin} = MST.to_car(tree)
179 iex> {:ok, car} = DASL.CAR.decode(bin)
180 iex> {:ok, tree2} = MST.from_car(car)
181 iex> MST.get(tree2, "col/a")
182 {:ok, val}
183
184 """
185 @spec from_car(binary() | DASL.CAR.t(), keyword()) :: {:ok, Tree.t()} | {:error, atom()}
186 def from_car(input, opts \\ [])
187 def from_car(%DASL.CAR{} = car, _opts), do: CAR.from_car(car)
188 def from_car(binary, opts) when is_binary(binary), do: CAR.from_binary(binary, opts)
189
190 @doc """
191 Serialises an `MST.Tree` to a CAR-encoded binary.
192 """
193 @spec to_car(Tree.t(), keyword()) :: {:ok, binary()} | {:error, atom()}
194 defdelegate to_car(tree, opts \\ []), to: CAR, as: :to_binary
195
196 # ---------------------------------------------------------------------------
197 # Diff
198 # ---------------------------------------------------------------------------
199
200 @doc """
201 Computes the diff from `tree_a` to `tree_b`.
202
203 Returns an `MST.Diff` with `created_nodes`, `deleted_nodes`, and
204 `record_ops` sorted by key.
205
206 ## Examples
207
208 iex> tree_a = MST.new()
209 iex> val = DASL.CID.compute("v")
210 iex> {:ok, tree_b} = MST.put(tree_a, "col/a", val)
211 iex> {:ok, diff} = MST.diff(tree_a, tree_b)
212 iex> length(diff.record_ops)
213 1
214
215 """
216 @spec diff(Tree.t(), Tree.t()) :: {:ok, Diff.t()} | {:error, atom()}
217 defdelegate diff(tree_a, tree_b), to: Diff, as: :compute
218
219 # ---------------------------------------------------------------------------
220 # Utilities
221 # ---------------------------------------------------------------------------
222
223 @doc """
224 Returns the MST depth for a key.
225
226 SHA-256 hashes `key` and counts leading zero bits divided by 2 (floor).
227
228 ## Examples
229
230 iex> MST.key_height("2653ae71")
231 0
232
233 iex> MST.key_height("blue")
234 1
235
236 """
237 @spec key_height(binary()) :: non_neg_integer()
238 defdelegate key_height(key), to: MST.Height, as: :for_key
239end