···11(* AT Proto MST backend for SCITT.
2233- Uses an Irmin store for persistence. Each append creates an Irmin
44- commit, preserving the full history of the transparency log. Inclusion
55- proofs are generated from the MST tree structure via Irmin.Proof.
66-77- The store can be backed by any Irmin backend: in-memory MST for tests,
88- on-disk MST for production, or an ATProto PDS via Irmin.Atproto.of_pds.
99-1010- MST keys follow AT Proto convention: collection/rkey
1111- - Collection: "space.run.scitt.statement"
1212- - Record key: hex-encoded composite of subject and issuer fingerprint *)
33+ Uses a content-addressed heap with the Irmin Schema API. Each append
44+ writes a record to the MST, creates a commit block, and updates the
55+ branch ref. Inclusion proofs are generated via Schema.produce. *)
136147let sha256 s = Digestif.SHA256.(digest_string s |> to_raw_string)
158let collection = "space.run.scitt.statement"
···30233124module Statement_lexicon = Atp_lexicon_scitt.Space.Run.Scitt.Statement
32253333-module type Config = sig
3434- val store : Irmin.t
3535- (** The Irmin store. Use [Irmin.Atproto.memory ()] for tests,
3636- [Irmin.Atproto.of_pds pds] for production. *)
2626+(* ATProto MST schema instance *)
2727+module S = Irmin.Schema.Make (struct
2828+ type hash = Atp.Cid.t
2929+ type block = string
3030+3131+ let hash_equal = Atp.Cid.equal
3232+ let hash_block data = Atp.Cid.v `Dag_cbor data
3333+end)
3434+3535+(* MST parse: decode DAG-CBOR MST node, extract entries *)
3636+let mst_parse : S.parse =
3737+ fun data ->
3838+ try
3939+ let node = Atp.Mst.Raw.decode_bytes data in
4040+ let rec decompress prev = function
4141+ | [] -> []
4242+ | (e : Atp.Mst.Raw.entry) :: rest ->
4343+ let key = String.sub prev 0 e.p ^ e.k in
4444+ (key, (`Link e.v : S.child)) :: decompress key rest
4545+ in
4646+ S.Named (decompress "" node.e)
4747+ with _ -> S.Named []
4848+4949+let mst_serialize : S.serialize =
5050+ fun _children ->
5151+ (* TODO: proper MST serialization *)
5252+ ""
37535454+let mst_node rules = S.node ~parse:mst_parse ~serialize:mst_serialize rules
5555+let ( => ) = S.( => )
5656+let repo_schema = S.fix (fun _self -> mst_node [ "*" => S.opaque ])
5757+5858+module type Config = sig
5959+ val heap : (Atp.Cid.t, string, unit) Irmin.Heap.t
3860 val now : unit -> float
3961end
40624163module Make (C : Config) = struct
4264 module Backend = struct
4365 type t = {
4444- mutable head : Irmin.hash option;
6666+ mutable head : Atp.Cid.t option;
4567 values : (string, string) Hashtbl.t;
4668 }
4769···8110382104 let root t =
83105 match t.head with
8484- | Some h -> Irmin.Hash.to_hex h
106106+ | Some h -> Atp.Cid.to_string h
85107 | None -> String.make 32 '\x00'
8610887109 let err_duplicate key = Error ("duplicate key: " ^ key)
···106128 | Ok rk -> (
107129 match value_to_dagcbor ~now ~key value with
108130 | Error e -> err_encoding e
109109- | Ok dagcbor -> (
110110- let tree =
111111- match Irmin.checkout C.store ~branch:"main" with
112112- | Some t -> t
113113- | None -> Irmin.Tree.empty
114114- in
115115- let tree = Irmin.Tree.add tree [ rk ] dagcbor in
116116- let parents =
117117- match Irmin.head C.store ~branch:"main" with
118118- | Some h -> [ h ]
119119- | None -> []
120120- in
121121- let h =
122122- Irmin.commit C.store ~tree ~parents
123123- ~message:(Fmt.str "append %s" key) ~author:"scitt"
131131+ | Ok dagcbor ->
132132+ (* Add record to MST via Atp.Mst *)
133133+ let bs = Atp.Blockstore.memory () in
134134+ let mst =
135135+ match S.head C.heap ~branch:"main" with
136136+ | Some tree_root ->
137137+ (* Copy blocks from heap to local blockstore *)
138138+ let rec copy_block cid =
139139+ match Irmin.Heap.get C.heap cid with
140140+ | Some data -> (
141141+ bs#put cid data;
142142+ (* Try to decode as MST node and copy subtrees *)
143143+ try
144144+ let node = Atp.Mst.Raw.decode_bytes data in
145145+ Option.iter copy_block node.l;
146146+ List.iter
147147+ (fun (e : Atp.Mst.Raw.entry) ->
148148+ Option.iter copy_block e.t)
149149+ node.e
150150+ with _ -> ())
151151+ | None -> ()
152152+ in
153153+ copy_block tree_root;
154154+ Atp.Mst.of_cid tree_root
155155+ ~store:(bs :> Atp.Blockstore.readable)
156156+ | None -> Atp.Mst.empty
124157 in
125125- Irmin.set_head C.store ~branch:"main" h;
126126- t.head <- Some h;
158158+ (* Write the record blob *)
159159+ let record_cid = Atp.Cid.v `Dag_cbor dagcbor in
160160+ Irmin.Heap.put C.heap record_cid dagcbor;
161161+ bs#put record_cid dagcbor;
162162+ (* Add to MST *)
163163+ let mst = Atp.Mst.add rk record_cid mst ~store:bs in
164164+ let tree_root = Atp.Mst.to_cid mst ~store:bs in
165165+ (* Copy MST blocks back to heap *)
166166+ Atp.Mst.to_blocks mst ~store:(bs :> Atp.Blockstore.readable)
167167+ |> Seq.iter (fun (cid, data) -> Irmin.Heap.put C.heap cid data);
168168+ (* Update branch ref *)
169169+ S.set_head C.heap ~branch:"main" tree_root;
170170+ t.head <- Some tree_root;
127171 Hashtbl.add t.values key value;
128172 let leaf_hash = sha256 ("\x00" ^ value) in
129129- (* Produce an inclusion proof and encode as [repo_key, proof]
130130- CBOR array for the vdp receipt field. The tree root is the
131131- hash of the committed tree (distinct from the commit hash). *)
132132- let tree_root =
133133- match Irmin.tree_hash C.store h with
134134- | Some th -> th
135135- | None -> h
173173+ (* Produce inclusion proof *)
174174+ let proof, _value =
175175+ S.produce C.heap repo_schema tree_root (fun c ->
176176+ match S.step c rk with
177177+ | Some (S.Any leaf) -> (S.Any leaf, S.get leaf)
178178+ | None -> (S.Any c, None))
179179+ in
180180+ ignore proof;
181181+ (* For now, encode proof as empty — proper CBOR encoding TODO *)
182182+ let vdp_cbor =
183183+ Cbort.Cbor.array [ Cbort.Cbor.string rk; Cbort.Cbor.bytes "" ]
136184 in
137137- match Irmin.prove C.store ~tree_root ~key:rk with
138138- | Error e -> Error ("prove: " ^ e)
139139- | Ok (encoded_proof, _value) ->
140140- let vdp_cbor =
141141- Cbort.Cbor.array
142142- [ Cbort.Cbor.string rk; Cbort.Cbor.bytes encoded_proof ]
143143- in
144144- let vdp_bytes = Cbort.encode_string Cbort.any vdp_cbor in
145145- let root_raw =
146146- Atp.Cid.to_raw_bytes
147147- (Atp.Cid.of_string (Irmin.Hash.to_hex tree_root))
148148- in
149149- Ok
150150- {
151151- Scitt.leaf_index = 0;
152152- tree_size = Hashtbl.length t.values;
153153- root = root_raw;
154154- path = [ vdp_bytes ];
155155- leaf_hash;
156156- }))
185185+ let vdp_bytes = Cbort.encode_string Cbort.any vdp_cbor in
186186+ let root_raw = Atp.Cid.to_raw_bytes tree_root in
187187+ Ok
188188+ {
189189+ Scitt.leaf_index = 0;
190190+ tree_size = Hashtbl.length t.values;
191191+ root = root_raw;
192192+ path = [ vdp_bytes ];
193193+ leaf_hash;
194194+ })
157195158196 let lookup t ~key = Hashtbl.find_opt t.values key
159197
+13-44
lib/atp/scitt_atp.mli
···11(** AT Proto MST backend for SCITT.
2233- Implements a {!Scitt.vds} using an Irmin store backed by the AT Protocol
44- Merkle Search Tree. Each append creates an Irmin commit, preserving the full
55- transparency log history. The MST provides SHA-256 content addressing with
66- key-based lookup.
77-88- {2 Advantages over RFC 9162}
99-1010- - **Key-based lookup**: [Scitt.vds_lookup vds ~key:"sha256:abc"] is O(log
1111- n), not O(n).
1212- - **Richer proofs**: inclusion proof covers a specific key, not just an
1313- index position.
1414- - **AT Proto compatible**: the tree format is interoperable with AT Protocol
1515- repositories (Bluesky, Tangled, etc.).
1616- - **Full history**: every append is an Irmin commit; the log is auditable.
1717-1818- {2 Example}
33+ Implements a {!Scitt.vds} using a content-addressed heap backed by the AT
44+ Protocol Merkle Search Tree. Each append writes a record to the MST and
55+ updates the branch ref.
196207 {[
2121- let store = Irmin.Mst.memory () in
2222- let module V = Scitt_atp.Make (struct let store = store let now () = Unix.gettimeofday () end) in
2323- let vds = V.v () in
2424- (* ... same API as with Vds_rfc9162 ... *)
2525- ]}
2626-2727- For production with ATProto PDS persistence:
2828-2929- {[
3030- let pds = Pds.v ~sw path ~did in
3131- let store = Irmin.Mst.of_pds pds in
3232- (* ... same as above ... *)
3333- ]}
3434-3535- {2 References}
3636-3737- - {{:https://atproto.com/specs/repository#mst-structure}AT Proto MST Spec}
3838- - {{:https://atproto.com/specs/data-model}AT Proto Data Model (DAG-CBOR)} *)
88+ let module Heap_b = Irmin.Heap.Make (...) in
99+ let heap = Heap_b.v (Atp.Blockstore.memory ()) in
1010+ let module V = Scitt_atp.Make (struct
1111+ let heap = heap
1212+ let now () = Unix.gettimeofday ()
1313+ end) in
1414+ let vds = V.v ()
1515+ ]} *)
39164017(** Configuration for the MST backend. *)
4118module type Config = sig
4242- val store : Irmin.t
4343- (** The Irmin store. Use [Irmin.Mst.memory ()] for tests,
4444- [Irmin.Mst.of_pds pds] for ATProto PDS persistence, or
4545- [Irmin.Mst.disk ~sw root] for standalone on-disk storage. *)
1919+ val heap : (Atp.Cid.t, string, unit) Irmin.Heap.t
2020+ (** The heap. *)
46214722 val now : unit -> float
4823 (** Clock for record timestamps. *)
4924end
50255151-(** [Make (Config)] creates a module that produces {!Scitt.vds} values backed by
5252- the AT Proto MST, persisted to the configured Irmin store.
5353-5454- Algorithm ID: -65537 (private use per draft-ietf-cose-merkle-tree-proofs
5555- §10.3). *)
5626module Make (C : Config) : sig
5727 val v : unit -> Scitt.vds
5858- (** [v ()] creates a fresh AT Proto MST-backed VDS. *)
5928end
+5-56
lib/proof.ml
···6868 | _ -> None)
6969 | _ -> None
70707171-(** Verify an Irmin proof against [root] and check the cose leaf hash. *)
7272-let verify_irmin_proof ~hash ~expected_leaf ~root ~repo_key irmin_proof =
7373- match Atp.Cid.of_raw_bytes root with
7474- | exception (Invalid_argument _ | Failure _) ->
7575- Error "MST proof: malformed root CID"
7676- | expected_root -> (
7777- let expected_root = `Node expected_root in
7878- match
7979- Irmin.Proof.Mst.verify ~expected_root irmin_proof (fun tree ->
8080- let v = Irmin.Proof.Mst.Tree.find tree [ repo_key ] in
8181- (tree, v))
8282- with
8383- | exception (Eio.Io _ | Invalid_argument _ | Failure _ | Z.Overflow) ->
8484- Error "MST proof verify: malformed data"
8585- | Ok (_, Some dagcbor) -> (
8686- match extract_cose dagcbor with
8787- | Some cose ->
8888- let leaf = hash.Hash.digest ("\x00" ^ cose) in
8989- if Eqaf.equal leaf expected_leaf then Ok Merkle_proof
9090- else Error "MST proof: cose leaf hash mismatch"
9191- | None -> Error "MST proof: cannot extract cose from record")
9292- | Ok (_, None) -> Error "MST proof: key not in tree"
9393- | Error (`Proof_mismatch msg) -> Error ("MST proof mismatch: " ^ msg))
7171+(** Verify an MST inclusion proof.
94729595-let verify_mst ~hash ~expected_leaf ~expected_repo_key ~root path =
9696- match path with
9797- | [ vdp_data ] -> (
9898- match Cbort.decode_string Cbort.any vdp_data with
9999- | Error _ -> Error "MST vdp: invalid CBOR"
100100- | Ok vdp_cbor -> (
101101- match Cbort.Cbor.to_array vdp_cbor with
102102- | Some [ key_cbor; proof_cbor ] -> (
103103- let repo_key =
104104- Option.value ~default:"" (Cbort.Cbor.to_text key_cbor)
105105- in
106106- (* Verify the proof's repo_key matches the expected key
107107- derived from the statement's subject. Without this, a
108108- malicious TS can store the COSE blob under any key. *)
109109- if not (Eqaf.equal repo_key expected_repo_key) then
110110- Error "MST proof repo_key mismatch"
111111- else
112112- let irmin_proof_bytes =
113113- Option.value ~default:"" (Cbort.Cbor.to_bytes proof_cbor)
114114- in
115115- match
116116- Irmin.Proof.decode_cbor ~decode_hash:Atp.Cid.of_raw_bytes
117117- ~decode_contents:Fun.id irmin_proof_bytes
118118- with
119119- | exception
120120- (Eio.Io _ | Invalid_argument _ | Failure _ | Z.Overflow) ->
121121- Error "MST proof decode: malformed data"
122122- | Error (`Msg msg) -> Error ("MST proof decode: " ^ msg)
123123- | Ok irmin_proof ->
124124- verify_irmin_proof ~hash ~expected_leaf ~root ~repo_key
125125- irmin_proof)
126126- | _ -> Error "MST vdp must be [repo_key, proof_cbor]"))
127127- | _ -> Error "MST receipt must have exactly one proof entry"
7373+ TODO: adapt to new Irmin Schema proof model. The proof is a heap;
7474+ verification replays the cursor navigation. *)
7575+let verify_mst ~hash:_ ~expected_leaf:_ ~expected_repo_key:_ ~root:_ _path =
7676+ Error "MST proof verification: being migrated to new Irmin Schema API"
···40404141(* -- Test MST module: fresh in-memory blockstore per instance -- *)
42424343+module Atp_heap_backend :
4444+ Irmin.Heap.BACKEND
4545+ with type t = Atp.Blockstore.writable
4646+ and type hash = Atp.Cid.t
4747+ and type block = string = struct
4848+ type t = Atp.Blockstore.writable
4949+ type hash = Atp.Cid.t
5050+ type block = string
5151+5252+ let get (bs : t) h = bs#get h
5353+ let put (bs : t) h data = bs#put h data
5454+ let mem (bs : t) h = bs#has h
5555+ let batch bs l = List.iter (fun (h, d) -> bs#put h d) l
5656+ let get_ref _ _ = None
5757+ let set_ref _ _ _ = ()
5858+ let del_ref _ _ = ()
5959+ let list_refs _ = []
6060+ let cas_ref _ _ ~test:_ ~set:_ = false
6161+ let flush _ = ()
6262+ let close _ = ()
6363+end
6464+6565+module Atp_heap = Irmin.Heap.Make (Atp_heap_backend)
6666+4367let vds_mst () =
4468 let clock = mock_clock () in
6969+ let heap = Atp_heap.v (Atp.Blockstore.memory ()) in
4570 let module C = struct
4646- let store = Irmin.Atproto.(memory () |> v)
7171+ let heap = heap
4772 let now () = Eio.Time.now clock
4873 end in
4974 let module V = Scitt_atp.Make (C) in