defmodule MST.CAR do @moduledoc """ Bridges `MST.Tree` with the DASL CAR file format. Provides functions to load an MST from a CAR binary or stream, and to export an MST back to CAR format. The CAR header's first root CID is treated as the MST root; any additional roots are ignored. MST node blocks (DAG-CBOR codec, `:drisl`) are decoded into `MST.Node` structs and stored in an `MST.Store.Memory`. Non-MST blocks (e.g. record data with the `:raw` codec) are ignored during import — the store only holds MST structural nodes. ## Example {:ok, tree} = MST.CAR.from_binary(File.read!("repo.car")) {:ok, pairs} = MST.Tree.to_list(tree) """ alias DASL.{CAR, CID} alias MST.{Node, Store, Tree} @type car_error() :: {:error, :header, atom()} | {:error, :block, atom()} | {:error, atom()} # --------------------------------------------------------------------------- # Import # --------------------------------------------------------------------------- @doc """ Loads an MST from an already-decoded `DASL.CAR` struct. Populates an `MST.Store.Memory` from the struct's blocks map and returns an `MST.Tree` rooted at the CAR's first root CID. Use this when you already hold a `%DASL.CAR{}` in memory and want to avoid a redundant encode/decode cycle. ## Examples iex> store = MST.Store.Memory.new() iex> tree = MST.Tree.new(store) iex> val = DASL.CID.compute("data") iex> {:ok, tree} = MST.Tree.put(tree, "col/key", val) iex> {:ok, binary} = MST.CAR.to_binary(tree) iex> {:ok, car} = DASL.CAR.decode(binary) iex> {:ok, tree2} = MST.CAR.from_car(car) iex> MST.Tree.get(tree2, "col/key") {:ok, val} """ @spec from_car(CAR.t()) :: {:ok, Tree.t()} | car_error() def from_car(%CAR{roots: roots, blocks: blocks}), do: build_tree(roots, blocks) @doc """ Loads an MST from a CAR-encoded binary. Decodes all blocks, populates an `MST.Store.Memory` with MST nodes (DAG-CBOR codec), and returns an `MST.Tree` rooted at the CAR's first root CID. Accepts the same options as `DASL.CAR.decode/2` (`verify: boolean`). ## Examples iex> store = MST.Store.Memory.new() iex> tree = MST.Tree.new(store) iex> val = DASL.CID.compute("data") iex> {:ok, tree} = MST.Tree.put(tree, "col/key", val) iex> {:ok, binary} = MST.CAR.to_binary(tree) iex> {:ok, tree2} = MST.CAR.from_binary(binary) iex> MST.Tree.get(tree2, "col/key") {:ok, val} """ @spec from_binary(binary(), keyword()) :: {:ok, Tree.t()} | car_error() def from_binary(binary, opts \\ []) when is_binary(binary) do try do with {:ok, car} <- CAR.decode(binary, opts) do build_tree(car.roots, car.blocks) end rescue e in ArgumentError -> {:error, :header, {:invalid_binary, e.message}} end end @doc """ Loads an MST from a CAR stream (an `Enumerable` of binary chunks). Streams blocks through `DASL.CAR.stream_decode/2`, populating an `MST.Store.Memory` incrementally. Useful for large files where you want to avoid loading the full binary into memory at once. Converts stream raises to error tuples. ## Options - `:verify` — verify CID digests of incoming blocks (default: `true`) ## Examples iex> store = MST.Store.Memory.new() iex> tree = MST.Tree.new(store) iex> val = DASL.CID.compute("data") iex> {:ok, tree} = MST.Tree.put(tree, "col/key", val) iex> {:ok, binary} = MST.CAR.to_binary(tree) iex> chunk_stream = [binary] iex> {:ok, tree2} = MST.CAR.from_stream(chunk_stream) iex> MST.Tree.get(tree2, "col/key") {:ok, val} """ @spec from_stream(Enumerable.t(), keyword()) :: {:ok, Tree.t()} | car_error() def from_stream(stream, opts \\ []) do try do {roots, blocks} = stream |> CAR.stream_decode(opts) |> Enum.reduce({nil, %{}}, fn {:header, _version, roots}, {_roots, blocks} -> {roots, blocks} {:block, cid, data}, {roots, blocks} -> {roots, Map.put(blocks, cid, data)} end) build_tree(roots || [], blocks) rescue e in RuntimeError -> {:error, {:stream_decode, e.message}} end end # --------------------------------------------------------------------------- # Export # --------------------------------------------------------------------------- @doc """ Serialises an `MST.Tree` to a CAR-encoded binary. Collects all reachable MST node blocks and wraps them in a CARv1 file with the tree root as the sole header root. ## Examples iex> store = MST.Store.Memory.new() iex> tree = MST.Tree.new(store) iex> val = DASL.CID.compute("data") iex> {:ok, tree} = MST.Tree.put(tree, "col/key", val) iex> {:ok, binary} = MST.CAR.to_binary(tree) iex> is_binary(binary) true """ @spec to_binary(Tree.t()) :: {:ok, binary()} | car_error() def to_binary(tree), do: to_binary(tree, []) @doc false @spec to_binary(Tree.t(), keyword()) :: {:ok, binary()} | car_error() def to_binary(%Tree{root: nil}, _opts) do # Empty tree — emit a CAR with an empty node as root empty_node = Node.empty() with {:ok, bytes} <- Node.encode(empty_node) do cid = CID.compute(bytes, :drisl) car = %CAR{ version: 1, roots: [cid], blocks: %{cid => bytes} } CAR.encode(car) else {:error, :encode, reason} -> {:error, reason} end end def to_binary(%Tree{root: root} = tree, opts) do with {:ok, blocks} <- Tree.collect_blocks(tree) do car = %CAR{ version: 1, roots: [root], blocks: blocks } CAR.encode(car, opts) end end @doc """ Returns a stream of `DASL.CAR` stream items for the tree in pre-order (root first, then depth-first left-to-right). Emits `{:header, 1, [root_cid]}` followed by `{:block, cid, bytes}` for each reachable MST node. This stream can be piped into a custom CAR writer. It does **not** produce a fully-encoded CAR binary — use `to_binary/2` for that. """ @spec to_stream(Tree.t()) :: Enumerable.t() def to_stream(%Tree{root: nil}) do empty_node = Node.empty() {:ok, bytes} = Node.encode(empty_node) cid = CID.compute(bytes, :drisl) [ {:header, 1, [cid]}, {:block, cid, bytes} ] end def to_stream(%Tree{root: root, store: store}) do header = [{:header, 1, [root]}] blocks = preorder_stream(store, root) Stream.concat(header, blocks) end # --------------------------------------------------------------------------- # Private — tree construction from decoded blocks # --------------------------------------------------------------------------- @spec build_tree([CID.t()], %{CID.t() => binary()}) :: {:ok, Tree.t()} | car_error() defp build_tree([], _blocks), do: {:ok, Tree.new(Store.Memory.new())} defp build_tree([root | _], blocks) do # Decode all DAG-CBOR blocks into MST nodes; ignore raw-codec blocks. result = Enum.reduce_while(blocks, {:ok, Store.Memory.new()}, fn {cid, data}, {:ok, store} -> case decode_block(cid, data) do {:ok, node} -> {:cont, {:ok, Store.put(store, cid, node)}} :skip -> {:cont, {:ok, store}} {:error, _} = err -> {:halt, err} end end) case result do {:ok, store} -> {:ok, Tree.from_root(root, store)} err -> err end end @spec decode_block(CID.t(), binary()) :: {:ok, Node.t()} | :skip | {:error, atom()} defp decode_block(%CID{codec: :raw}, _data), do: :skip defp decode_block(%CID{codec: :drisl}, data) do case Node.decode(data) do {:ok, node} -> {:ok, node} {:error, :decode, reason} -> {:error, reason} end end # --------------------------------------------------------------------------- # Private — pre-order DFS stream # --------------------------------------------------------------------------- @spec preorder_stream(Store.t(), CID.t()) :: Enumerable.t() defp preorder_stream(store, root) do Stream.resource( fn -> [root] end, fn [] -> {:halt, []} [cid | rest] -> case Store.get(store, cid) do {:error, :not_found} -> raise "MST.CAR.to_stream/1: node not found: #{CID.encode(cid)}" {:ok, node} -> {:ok, bytes} = Node.encode(node) children = subtree_cids(node) {[{:block, cid, bytes}], children ++ rest} end end, fn _ -> :ok end ) end @spec subtree_cids(Node.t()) :: [CID.t()] defp subtree_cids(node) do left = if node.left, do: [node.left], else: [] rights = Enum.flat_map(node.entries, fn e -> if e.right, do: [e.right], else: [] end) left ++ rights end end