defmodule MST.Diff do @moduledoc """ Computes the diff between two `MST.Tree` instances. A diff captures: - Which MST nodes were **created** (present in `b` but not `a`) - Which MST nodes were **deleted** (present in `a` but not `b`) - The per-key **record operations** (creates, updates, and deletes) ## Algorithm Node sets (`created_nodes` / `deleted_nodes`) are computed by collecting all reachable node CIDs from each tree root with a DFS, then taking set differences. Equal CIDs short-circuit entire subtrees (no need to recurse into subtrees both trees share). Record ops are computed by fully materialising both trees as sorted key/value lists and performing a linear sorted-merge comparison. This is straightforward and correct at the cost of O(n) memory; it is the right tradeoff given that the diff is typically used to inspect the full changeset anyway. ## Example {:ok, diff} = MST.Diff.compute(tree_a, tree_b) # diff.record_ops is a sorted list of MST.Diff.Op structs """ use TypedStruct alias DASL.CID alias MST.{Node, Store, Tree} @type diff_error() :: {:error, atom()} typedstruct enforce: true do field :created_nodes, MapSet.t(CID.t()), default: MapSet.new() field :deleted_nodes, MapSet.t(CID.t()), default: MapSet.new() field :record_ops, [MST.Diff.Op.t()], default: [] end # --------------------------------------------------------------------------- # Public API # --------------------------------------------------------------------------- @doc """ Computes the diff from `tree_a` to `tree_b`. Both trees must use stores that have their nodes populated (e.g. loaded from CAR files). Returns `{:ok, diff}` or an error if a node is missing. ## Examples iex> store = MST.Store.Memory.new() iex> ta = MST.Tree.new(store) iex> val = DASL.CID.compute("v") iex> {:ok, tb} = MST.Tree.put(ta, "col/a", val) iex> {:ok, diff} = MST.Diff.compute(ta, tb) iex> length(diff.record_ops) 1 iex> hd(diff.record_ops).key "col/a" """ @spec compute(Tree.t(), Tree.t()) :: {:ok, t()} | diff_error() def compute(%Tree{root: root_a, store: store_a}, %Tree{root: root_b, store: store_b}) do with {:ok, nodes_a} <- reachable_nodes(store_a, root_a), {:ok, nodes_b} <- reachable_nodes(store_b, root_b), {:ok, leaves_a} <- collect_leaves(store_a, root_a), {:ok, leaves_b} <- collect_leaves(store_b, root_b) do ops = merge_ops(leaves_a, leaves_b, []) {:ok, %__MODULE__{ created_nodes: MapSet.difference(nodes_b, nodes_a), deleted_nodes: MapSet.difference(nodes_a, nodes_b), record_ops: ops }} end end # --------------------------------------------------------------------------- # Private — reachable node collection # --------------------------------------------------------------------------- @spec reachable_nodes(Store.t(), CID.t() | nil) :: {:ok, MapSet.t(CID.t())} | diff_error() defp reachable_nodes(_store, nil), do: {:ok, MapSet.new()} defp reachable_nodes(store, root), do: collect_nodes(store, root, MapSet.new()) @spec collect_nodes(Store.t(), CID.t(), MapSet.t(CID.t())) :: {:ok, MapSet.t(CID.t())} | diff_error() defp collect_nodes(store, cid, visited) do if MapSet.member?(visited, cid) do {:ok, visited} else with {:ok, node} <- fetch(store, cid) do visited = MapSet.put(visited, cid) Enum.reduce_while(subtree_cids(node), {:ok, visited}, fn sub, {:ok, v} -> case collect_nodes(store, sub, v) do {:ok, v} -> {:cont, {:ok, v}} err -> {:halt, err} end end) end end end @spec subtree_cids(Node.t()) :: [CID.t()] defp subtree_cids(node) do left = if node.left, do: [node.left], else: [] rights = Enum.flat_map(node.entries, fn e -> if e.right, do: [e.right], else: [] end) left ++ rights end # --------------------------------------------------------------------------- # Private — leaf collection (in sorted order) # --------------------------------------------------------------------------- @spec collect_leaves(Store.t(), CID.t() | nil) :: {:ok, [{binary(), CID.t()}]} | diff_error() defp collect_leaves(_store, nil), do: {:ok, []} defp collect_leaves(store, root) do with {:ok, pairs} <- do_walk(store, root, []) do {:ok, Enum.reverse(pairs)} end end # Accumulates pairs in reverse order (prepend for efficiency, reverse at end). @spec do_walk(Store.t(), CID.t(), [{binary(), CID.t()}]) :: {:ok, [{binary(), CID.t()}]} | diff_error() defp do_walk(store, cid, acc) do with {:ok, node} <- fetch(store, cid) do full_keys = Node.keys(node) do_walk_left(store, node, full_keys, acc) end end @spec do_walk_left(Store.t(), Node.t(), [binary()], [{binary(), CID.t()}]) :: {:ok, [{binary(), CID.t()}]} | diff_error() defp do_walk_left(store, node, full_keys, acc) do with {:ok, acc} <- maybe_do_walk(store, node.left, acc) do do_walk_entries(store, node.entries, full_keys, acc) end end defp maybe_do_walk(_store, nil, acc), do: {:ok, acc} defp maybe_do_walk(store, cid, acc), do: do_walk(store, cid, acc) defp do_walk_entries(_store, [], [], acc), do: {:ok, acc} defp do_walk_entries(store, [entry | rest_e], [key | rest_k], acc) do acc = [{key, entry.value} | acc] with {:ok, acc} <- maybe_do_walk(store, entry.right, acc) do do_walk_entries(store, rest_e, rest_k, acc) end end # --------------------------------------------------------------------------- # Private — sorted-merge diff # --------------------------------------------------------------------------- @spec merge_ops( [{binary(), CID.t()}], [{binary(), CID.t()}], [MST.Diff.Op.t()] ) :: [MST.Diff.Op.t()] defp merge_ops([], [], ops), do: Enum.reverse(ops) defp merge_ops([], [{kb, vb} | rest_b], ops) do op = %MST.Diff.Op{key: kb, old_value: nil, new_value: vb} merge_ops([], rest_b, [op | ops]) end defp merge_ops([{ka, va} | rest_a], [], ops) do op = %MST.Diff.Op{key: ka, old_value: va, new_value: nil} merge_ops(rest_a, [], [op | ops]) end defp merge_ops([{ka, va} | rest_a], [{kb, vb} | rest_b], ops) do cond do ka == kb -> new_ops = if va == vb, do: ops, else: [%MST.Diff.Op{key: ka, old_value: va, new_value: vb} | ops] merge_ops(rest_a, rest_b, new_ops) ka < kb -> op = %MST.Diff.Op{key: ka, old_value: va, new_value: nil} merge_ops(rest_a, [{kb, vb} | rest_b], [op | ops]) true -> op = %MST.Diff.Op{key: kb, old_value: nil, new_value: vb} merge_ops([{ka, va} | rest_a], rest_b, [op | ops]) end end # --------------------------------------------------------------------------- # Private — store access # --------------------------------------------------------------------------- @spec fetch(Store.t(), CID.t()) :: {:ok, Node.t()} | diff_error() defp fetch(store, cid) do case Store.get(store, cid) do {:ok, node} -> {:ok, node} {:error, :not_found} -> {:error, :missing_node} end end end