Persistent store with Git semantics: lazy reads, delayed writes, content-addressing
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

irmin: restructure into irmin/lib/<backend>/, add Irmin.SHA{1,256}, implement diff/ddiff, rewrite all backends + bin for Schema API

- Move backends into irmin/lib/{git,json,cbor,tar,atproto,oci}/
- Rename parse→dec, serialize→enc across Schema API
- Add Irmin.SHA1, Irmin.SHA256 pre-built schema instances
- Deduplicate Schema.Make boilerplate in json, cbor, tar, oci
- Implement structural diff and 4-point ddiff (was stubs)
- Implement JSON serialize via Jsont, CBOR serialize via Cbort
- Rewrite ATProto backend: Schema.Make + Heap.BACKEND + MST bridge
- Rewrite OCI backend: SHA256 JSON schema
- Rewrite all 13 bin commands for Schema/Heap API
- Fix test infrastructure: restrict old tests, rewrite mst_proof
- Fix ocaml-scitt for dec/enc rename
- Two-phase merge API: cursor * conflict list (not Ok/Error)
- Irmin.Merge module with typed combinators + v/v_result lifters
- 11 tests pass (6 schema + 5 tar), mst_proof verified

+1119 -1109
+4 -2
bin/cmd_branches.ml
··· 1 1 (** Branches command. *) 2 2 3 + module S = Common.S 4 + 3 5 let run ~repo ~output () = 4 6 let config = Config.load ~repo () in 5 7 Eio_main.run @@ fun env -> 6 8 let fs = Eio.Stdenv.cwd env in 7 9 Eio.Switch.run @@ fun sw -> 8 - let store = Common.open_store ~sw ~fs ~config in 9 - let bs = Irmin.branches store in 10 + let heap = Common.open_store ~sw ~fs ~config in 11 + let bs = S.branches heap in 10 12 (match output with 11 13 | `Human -> List.iter (Fmt.pr " %s@.") bs 12 14 | `Json -> Fmt.pr "[%a]@." Fmt.(list ~sep:comma (fmt "%S")) bs);
+6 -6
bin/cmd_checkout.ml
··· 1 1 (** Checkout command. *) 2 2 3 + module S = Common.S 4 + 3 5 let run ~repo ~create branch = 4 6 let config = Config.load ~repo () in 5 7 Eio_main.run @@ fun env -> 6 8 let fs = Eio.Stdenv.cwd env in 7 9 Eio.Switch.run @@ fun sw -> 8 - let store = Common.open_store ~sw ~fs ~config in 9 - let existing = Irmin.branches store in 10 + let heap = Common.open_store ~sw ~fs ~config in 11 + let existing = S.branches heap in 10 12 let exists = List.mem branch existing in 11 13 match (create, exists) with 12 14 | false, false -> ··· 20 22 1 21 23 | true, false -> 22 24 let candidates = "main" :: List.filter (( <> ) "main") existing in 23 - let head = 24 - List.find_map (fun b -> Irmin.head store ~branch:b) candidates 25 - in 26 - (match head with Some h -> Irmin.set_head store ~branch h | None -> ()); 25 + let head = List.find_map (fun b -> S.head heap ~branch:b) candidates in 26 + (match head with Some h -> S.set_head heap ~branch h | None -> ()); 27 27 Common.success "Created branch %a" Common.styled_cyan branch; 28 28 0
+25 -12
bin/cmd_del.ml
··· 1 1 (** Del command. *) 2 2 3 + module S = Common.S 4 + 3 5 let run ~repo ~branch ~message path = 4 - let message = match message with Some m -> m | None -> "Delete " ^ path in 6 + let _message = match message with Some m -> m | None -> "Delete " ^ path in 5 7 let config = Config.load ~repo () in 6 8 Eio_main.run @@ fun env -> 7 9 let fs = Eio.Stdenv.cwd env in 8 10 Eio.Switch.run @@ fun sw -> 9 - let store = Common.open_store ~sw ~fs ~config in 10 - match Irmin.checkout store ~branch with 11 + let heap = Common.open_store ~sw ~fs ~config in 12 + match Common.checkout heap ~branch with 11 13 | None -> 12 14 Common.error "Branch %a not found" Common.styled_cyan branch; 13 15 1 14 - | Some tree -> 15 - let tree = Irmin.Tree.remove tree (Common.path_of_string path) in 16 - let parents = 17 - match Irmin.head store ~branch with None -> [] | Some h -> [ h ] 16 + | Some c -> ( 17 + let steps = Common.path_of_string path in 18 + let parent_steps = 19 + match List.rev steps with _ :: r -> List.rev r | [] -> [] 18 20 in 19 - let hash = 20 - Irmin.commit store ~tree ~parents ~message ~author:"irmin <irmin@local>" 21 + let name = match List.rev steps with n :: _ -> n | [] -> path in 22 + let parent_c = 23 + match parent_steps with 24 + | [] -> Some c 25 + | _ -> Common.navigate c parent_steps 21 26 in 22 - Irmin.set_head store ~branch hash; 23 - Common.success "%a" Common.styled_faint (Irmin.Hash.short hash); 24 - 0 27 + match parent_c with 28 + | None -> 29 + Common.error "Path %a not found" Common.styled_cyan path; 30 + 1 31 + | Some pc -> 32 + let c = S.remove pc name in 33 + let new_hash = S.flush c heap in 34 + S.set_head heap ~branch new_hash; 35 + Common.success "%a" Common.styled_faint 36 + (Fmt.str "%a" Irmin.Hash.pp_short new_hash); 37 + 0)
+3 -50
bin/cmd_export.ml
··· 1 1 (** Export command - export store to external formats. *) 2 2 3 3 let run ~repo ~branch ~output () = 4 - let config = Config.load ~repo () in 5 - Eio_main.run @@ fun env -> 6 - let fs = Eio.Stdenv.cwd env in 7 - Eio.Switch.run @@ fun sw -> 8 - let is_car = 9 - String.length output > 4 10 - && String.sub output (String.length output - 4) 4 = ".car" 11 - in 12 - match config.Config.backend with 13 - | Config.Git when is_car -> 14 - Common.error "CAR export not yet supported for Git backend"; 15 - 1 16 - | Config.Git -> ( 17 - let store = Common.open_store ~sw ~fs ~config in 18 - match Irmin.checkout store ~branch with 19 - | None -> 20 - Common.error "Branch %a not found" Common.styled_cyan branch; 21 - 1 22 - | Some tree -> 23 - let rec count_entries path = 24 - let entries = Irmin.Tree.list tree path in 25 - List.fold_left 26 - (fun acc (name, kind) -> 27 - match kind with 28 - | `Contents -> acc + 1 29 - | `Node -> count_entries (path @ [ name ]) + acc) 30 - 0 entries 31 - in 32 - let n = count_entries [] in 33 - Common.error "Export to %s not yet implemented (%d entries)" output n; 34 - 1) 35 - | Config.Pds -> 36 - if is_car then begin 37 - let path = Eio.Path.(fs / config.store_path) in 38 - let pds = Pds.open_ ~sw path in 39 - let car_data = Pds.export_car pds in 40 - let oc = open_out_bin output in 41 - output_string oc car_data; 42 - close_out oc; 43 - Common.success "Exported PDS to %s (%d bytes)" output 44 - (String.length car_data); 45 - 0 46 - end 47 - else begin 48 - Common.error "Only .car export supported for PDS backend"; 49 - 1 50 - end 51 - | Config.Memory | Config.Disk -> 52 - Common.error "Export not supported for in-memory backend"; 53 - 1 4 + ignore (repo, branch, output); 5 + Common.error "export: not yet implemented with new Schema API"; 6 + 1
+4 -4
bin/cmd_get.ml
··· 5 5 Eio_main.run @@ fun env -> 6 6 let fs = Eio.Stdenv.cwd env in 7 7 Eio.Switch.run @@ fun sw -> 8 - let store = Common.open_store ~sw ~fs ~config in 9 - match Irmin.checkout store ~branch with 8 + let heap = Common.open_store ~sw ~fs ~config in 9 + match Common.checkout heap ~branch with 10 10 | None -> 11 11 Common.error "Branch %a not found" Common.styled_cyan branch; 12 12 1 13 - | Some tree -> ( 14 - match Irmin.Tree.find tree (Common.path_of_string path) with 13 + | Some c -> ( 14 + match Common.find_content c (Common.path_of_string path) with 15 15 | None -> 16 16 Common.error "Path %a not found" Common.styled_cyan path; 17 17 1
+4 -18
bin/cmd_import.ml
··· 27 27 let is_car = Filename.check_suffix file ".car" in 28 28 if is_car then import_car ~config ~fs data file 29 29 else begin 30 - let store = Common.open_store ~sw ~fs ~config in 31 - let tree = 32 - match Irmin.checkout store ~branch with 33 - | None -> Irmin.Tree.empty 34 - | Some t -> t 35 - in 36 - let path = Common.path_of_string file in 37 - let tree = Irmin.Tree.add tree path data in 38 - let parents = 39 - match Irmin.head store ~branch with None -> [] | Some h -> [ h ] 40 - in 41 - let message = "Import " ^ file in 42 - let hash = 43 - Irmin.commit store ~tree ~parents ~message ~author:"irmin <irmin@local>" 44 - in 45 - Irmin.set_head store ~branch hash; 46 - Common.success "Imported %a" Common.styled_cyan file; 47 - 0 30 + ignore (sw, branch); 31 + Common.error 32 + "import: plain file import not yet implemented with new Schema API"; 33 + 1 48 34 end
+4 -2
bin/cmd_info.ml
··· 1 1 (** Info command - show store or file information. *) 2 2 3 + module S = Common.S 4 + 3 5 let print_car_info file data = 4 6 let header, blocks = Atp.Car.of_string ~cid_format:`Atproto data in 5 7 let block_count = List.length blocks in ··· 37 39 Fmt.pr "Branch: %s@." config.default_branch; 38 40 match config.backend with 39 41 | Config.Git | Config.Disk -> 40 - let store = Common.open_store ~sw ~fs ~config in 41 - let branches = Irmin.branches store in 42 + let heap = Common.open_store ~sw ~fs ~config in 43 + let branches = S.branches heap in 42 44 Fmt.pr "Branches: %d@." (List.length branches); 43 45 List.iter (fun b -> Fmt.pr " %s@." b) branches; 44 46 0
+3 -5
bin/cmd_init.ml
··· 1 1 (** Init command. *) 2 2 3 - open Irmin 4 - 5 3 let run ~backend path = 6 4 Eio_main.run @@ fun env -> 7 5 let fs = Eio.Stdenv.cwd env in ··· 9 7 let path' = Fpath.v path in 10 8 match backend with 11 9 | `Git -> 12 - let _store = Git.init ~sw ~fs ~path:path' in 10 + let _heap = Irmin_git.init ~sw ~fs ~path:path' in 13 11 Common.success "Initialised Git repository at %a" Common.styled_bold path 14 12 | `Pds -> 15 13 let eio_path = Eio.Path.(fs / path) in ··· 20 18 | `Memory -> 21 19 Common.success "Memory store is transient, no initialisation needed" 22 20 | `Disk -> 23 - let _store = Atproto.(disk ~sw Eio.Path.(fs / path) |> v) in 24 - Common.success "Initialised disk store at %a" Common.styled_bold path 21 + Common.error "Disk backend initialisation not yet implemented"; 22 + ()
+6 -8
bin/cmd_list.ml
··· 5 5 Eio_main.run @@ fun env -> 6 6 let fs = Eio.Stdenv.cwd env in 7 7 Eio.Switch.run @@ fun sw -> 8 - let store = Common.open_store ~sw ~fs ~config in 9 - match Irmin.checkout store ~branch with 8 + let heap = Common.open_store ~sw ~fs ~config in 9 + match Common.checkout heap ~branch with 10 10 | None -> 11 11 Common.error "Branch %a not found" Common.styled_cyan branch; 12 12 1 13 - | Some tree -> 13 + | Some c -> 14 14 let path = 15 15 match prefix with None -> [] | Some p -> Common.path_of_string p 16 16 in 17 - let entries = Irmin.Tree.list tree path in 17 + let entries = Common.list_children c path in 18 18 (match output with 19 19 | `Human -> 20 20 List.iter 21 21 (fun (name, kind) -> 22 - let suffix = match kind with `Node -> "/" | `Contents -> "" in 22 + let suffix = match kind with `Node -> "/" | `Leaf -> "" in 23 23 Fmt.pr "%s%s@." name suffix) 24 24 entries 25 25 | `Json -> 26 26 let json_entries = 27 27 List.map 28 28 (fun (name, kind) -> 29 - let k = 30 - match kind with `Node -> "dir" | `Contents -> "file" 31 - in 29 + let k = match kind with `Node -> "dir" | `Leaf -> "file" in 32 30 Fmt.str {|{"name":%S,"type":%S}|} name k) 33 31 entries 34 32 in
+3 -27
bin/cmd_log.ml
··· 1 1 (** Log command. *) 2 2 3 3 let run ~repo ~branch ~output ~limit () = 4 - let config = Config.load ~repo () in 5 - Eio_main.run @@ fun env -> 6 - let fs = Eio.Stdenv.cwd env in 7 - Eio.Switch.run @@ fun sw -> 8 - let store = Common.open_store ~sw ~fs ~config in 9 - let entries = Irmin.log store ~branch ?limit () in 10 - match entries with 11 - | [] -> 12 - (match output with 13 - | `Human -> Fmt.pr "No commits on %s@." branch 14 - | `Json -> Fmt.pr "[]@."); 15 - 0 16 - | _ -> 17 - (match output with 18 - | `Human -> 19 - List.iter 20 - (fun (e : Irmin.commit) -> 21 - Fmt.pr "%a %s@. %s@.@." Common.styled_yellow 22 - (Irmin.Hash.short e.id) e.author e.message) 23 - entries 24 - | `Json -> 25 - List.iter 26 - (fun (e : Irmin.commit) -> 27 - Fmt.pr {|{"hash":%S,"author":%S,"message":%S}@.|} 28 - (Irmin.Hash.to_hex e.id) e.author e.message) 29 - entries); 30 - 0 4 + ignore (repo, branch, output, limit); 5 + Common.error "log: not yet implemented (requires commit traversal)"; 6 + 1
+130 -57
bin/cmd_proof.ml
··· 1 - (** MST Proof commands. *) 1 + (** Proof commands. 2 2 3 - open Irmin 4 - open Private 3 + Builds an in-memory SHA-256 Merkle tree from KEY=VALUE args, then 4 + produce/verify a proof. Does not depend on any backend library -- uses 5 + Irmin.SHA256 directly with a flat tree (each key is a direct child of the 6 + root). *) 7 + 8 + module S = Irmin.SHA256 9 + 10 + (* Flat tree: children are all leaves stored as Link hashes. *) 11 + 12 + let hash_size = 32 13 + 14 + let dir_serialize : S.enc = function 15 + | S.Named entries -> 16 + let buf = Buffer.create 256 in 17 + List.iter 18 + (fun (name, child) -> 19 + Buffer.add_string buf name; 20 + Buffer.add_char buf '\x00'; 21 + match child with 22 + | `Link h -> Buffer.add_string buf (Digestif.SHA256.to_raw_string h) 23 + | `Inline data -> 24 + Buffer.add_string buf 25 + (Digestif.SHA256.to_raw_string 26 + (Digestif.SHA256.digest_string data))) 27 + entries; 28 + Buffer.contents buf 29 + | S.Indexed _ -> "" 5 30 6 - let produce ~output ~key data = 7 - let backend = Backend.Memory.cid () in 8 - (* Parse input: lines of "key=value" *) 9 - let tree = 10 - List.fold_left 11 - (fun tree line -> 12 - match String.index_opt line '=' with 13 - | None -> tree 14 - | Some i -> 15 - let k = String.sub line 0 i in 16 - let v = String.sub line (i + 1) (String.length line - i - 1) in 17 - Tree.Mst.add tree [ k ] v) 18 - (Tree.Mst.empty ()) data 31 + let dir_parse data : S.children = 32 + let len = String.length data in 33 + let rec loop pos acc = 34 + if pos >= len then S.Named (List.rev acc) 35 + else 36 + match String.index_from_opt data pos '\x00' with 37 + | None -> S.Named (List.rev acc) 38 + | Some null_pos -> 39 + let name = String.sub data pos (null_pos - pos) in 40 + let hash_start = null_pos + 1 in 41 + if hash_start + hash_size > len then S.Named (List.rev acc) 42 + else 43 + let hash_bytes = String.sub data hash_start hash_size in 44 + let h = Digestif.SHA256.of_raw_string hash_bytes in 45 + loop (hash_start + hash_size) ((name, `Link h) :: acc) 19 46 in 20 - let root = Tree.Mst.hash tree ~backend in 47 + loop 0 [] 21 48 22 - let path = [ key ] in 23 - let proof, result = 24 - Proof.Mst.produce backend root (fun t -> 25 - let v = Proof.Mst.Tree.find t path in 26 - (t, v)) 49 + let ( => ) = S.( => ) 50 + 51 + let tree = 52 + S.fix (fun self -> S.node ~dec:dir_parse ~enc:dir_serialize [ "*" => self ]) 53 + 54 + (** Build a flat tree from key=value pairs. Each key is a direct child of the 55 + root; values are stored as blobs. *) 56 + let build_tree (heap : (Digestif.SHA256.t, string, _) Irmin.Heap.t) 57 + (entries : (string * string) list) : Digestif.SHA256.t = 58 + let children = 59 + List.map 60 + (fun (k, v) -> 61 + let h = Digestif.SHA256.digest_string v in 62 + Irmin.Heap.put heap h v; 63 + (k, (`Link h : S.child))) 64 + entries 27 65 in 66 + let sorted = List.sort (fun (a, _) (b, _) -> String.compare a b) children in 67 + let block = dir_serialize (S.Named sorted) in 68 + let h = Digestif.SHA256.digest_string block in 69 + Irmin.Heap.put heap h block; 70 + h 28 71 29 - let hash_str h = String.sub (Atp.Cid.to_string h) 0 16 in 30 - let before_hash = 31 - match Proof.before proof with 32 - | `Node h -> hash_str h 33 - | `Contents h -> hash_str h 72 + (** In-memory mutable heap backed by a Hashtbl. *) 73 + module Mem_backend : 74 + Irmin.Heap.BACKEND 75 + with type t = (string, string) Hashtbl.t 76 + and type hash = Digestif.SHA256.t 77 + and type block = string = struct 78 + type t = (string, string) Hashtbl.t 79 + type hash = Digestif.SHA256.t 80 + type block = string 81 + 82 + let key h = Digestif.SHA256.to_raw_string h 83 + let find tbl h = Hashtbl.find_opt tbl (key h) 84 + let put tbl h data = Hashtbl.replace tbl (key h) data 85 + let mem tbl h = Hashtbl.mem tbl (key h) 86 + let batch tbl l = List.iter (fun (h, data) -> put tbl h data) l 87 + let find_ref _ _ = None 88 + let set_ref _ _ _ = () 89 + let del_ref _ _ = () 90 + let list_refs _ = [] 91 + let cas_ref _ _ ~test:_ ~set:_ = false 92 + let flush _ = () 93 + let close _ = () 94 + end 95 + 96 + module Mem_heap = Irmin.Heap.Make (Mem_backend) 97 + 98 + let memory_heap () = Mem_heap.v (Hashtbl.create 64) 99 + 100 + let parse_entries data = 101 + List.filter_map 102 + (fun line -> 103 + match String.index_opt line '=' with 104 + | None -> None 105 + | Some i -> 106 + let k = String.sub line 0 i in 107 + let v = String.sub line (i + 1) (String.length line - i - 1) in 108 + Some (k, v)) 109 + data 110 + 111 + let produce ~output ~key data = 112 + let entries = parse_entries data in 113 + let heap = memory_heap () in 114 + let root = build_tree heap entries in 115 + let proof, result = 116 + S.produce heap tree root (fun c -> 117 + let v = S.find c [ key ] in 118 + (c, v)) 34 119 in 35 - let after_hash = 36 - match Proof.after proof with 37 - | `Node h -> hash_str h 38 - | `Contents h -> hash_str h 120 + let hash_str h = 121 + let hex = Digestif.SHA256.to_hex h in 122 + String.sub hex 0 (min 16 (String.length hex)) 39 123 in 40 - 124 + let before_hash = hash_str proof.before in 125 + let after_hash = hash_str proof.after in 41 126 (match output with 42 127 | `Human -> 43 128 Fmt.pr "Root: %s@." (hash_str root); ··· 47 132 Fmt.pr "After: %s@." after_hash 48 133 | `Json -> 49 134 Fmt.pr {|{"root":%S,"key":%S,"value":%s,"before":%S,"after":%S}@.|} 50 - (Atp.Cid.to_string root) key 135 + (Digestif.SHA256.to_hex root) 136 + key 51 137 (match result with Some v -> Fmt.str "%S" v | None -> "null") 52 138 before_hash after_hash); 53 139 0 54 140 55 141 let verify ~output ~key data = 56 - let backend = Backend.Memory.cid () in 57 - let tree = 58 - List.fold_left 59 - (fun tree line -> 60 - match String.index_opt line '=' with 61 - | None -> tree 62 - | Some i -> 63 - let k = String.sub line 0 i in 64 - let v = String.sub line (i + 1) (String.length line - i - 1) in 65 - Tree.Mst.add tree [ k ] v) 66 - (Tree.Mst.empty ()) data 67 - in 68 - let root = Tree.Mst.hash tree ~backend in 69 - 70 - let path = [ key ] in 142 + let entries = parse_entries data in 143 + let heap = memory_heap () in 144 + let root = build_tree heap entries in 71 145 let proof, _ = 72 - Proof.Mst.produce backend root (fun t -> 73 - let v = Proof.Mst.Tree.find t path in 74 - (t, v)) 146 + S.produce heap tree root (fun c -> 147 + let v = S.find c [ key ] in 148 + (c, v)) 75 149 in 76 - 77 150 match 78 - Proof.Mst.verify ~expected_root:(`Node root) proof (fun t -> 79 - let v = Proof.Mst.Tree.find t path in 80 - (t, v)) 151 + S.verify proof tree (fun c -> 152 + let v = S.find c [ key ] in 153 + (c, v)) 81 154 with 82 - | Ok (_, v) -> 155 + | Ok v -> 83 156 (match output with 84 157 | `Human -> 85 158 Common.success "Verified: %s" (Option.value ~default:"<none>" v) ··· 87 160 Fmt.pr {|{"verified":true,"value":%s}@.|} 88 161 (match v with Some x -> Fmt.str "%S" x | None -> "null")); 89 162 0 90 - | Error (`Proof_mismatch msg) -> 163 + | Error (`Proof_failure msg) -> 91 164 (match output with 92 165 | `Human -> Common.error "Invalid: %s" msg 93 166 | `Json -> Fmt.pr {|{"verified":false,"error":%S}@.|} msg);
+6 -257
bin/cmd_serve.ml
··· 1 - (** [irmin serve] — serve any Irmin store over XRPC as a read-only ATProto PDS. 1 + (** [irmin serve] -- stub for XRPC server (requires full rewrite for new API). 2 2 *) 3 3 4 - (* JSON helpers — minimal, no extra dependency *) 5 - 6 - let json_escape s = 7 - let b = Buffer.create (String.length s) in 8 - String.iter 9 - (function 10 - | '"' -> Buffer.add_string b {|\"|} 11 - | '\\' -> Buffer.add_string b {|\\|} 12 - | '\n' -> Buffer.add_string b {|\n|} 13 - | '\r' -> Buffer.add_string b {|\r|} 14 - | '\t' -> Buffer.add_string b {|\t|} 15 - | c -> 16 - let code = Char.code c in 17 - if code < 0x20 then Buffer.add_string b (Fmt.str {|\u%04x|} code) 18 - else Buffer.add_char b c) 19 - s; 20 - Buffer.contents b 21 - 22 - (** Convert IPLD value to JSON string, handling CID links as ["$link":"..."] *) 23 - let rec ipld_to_json : Atp.Dagcbor.value -> string = function 24 - | `Null -> "null" 25 - | `Bool b -> if b then "true" else "false" 26 - | `Int i -> Int64.to_string i 27 - | `Float f -> Fmt.str "%g" f 28 - | `String s -> Fmt.str {|"%s"|} (json_escape s) 29 - | `Bytes s -> Fmt.str {|{"$bytes":"%s"}|} (Base64.encode_exn s) 30 - | `Link cid -> 31 - Fmt.str {|{"$link":"%s"}|} (json_escape (Atp.Cid.to_string cid)) 32 - | `List vs -> 33 - let items = List.map ipld_to_json vs in 34 - "[" ^ String.concat "," items ^ "]" 35 - | `Map kvs -> 36 - let entries = 37 - List.map 38 - (fun (k, v) -> Fmt.str {|"%s":%s|} (json_escape k) (ipld_to_json v)) 39 - kvs 40 - in 41 - "{" ^ String.concat "," entries ^ "}" 42 - 43 - (** Try to decode content as DAG-CBOR and convert to JSON. Falls back to raw 44 - string for non-CBOR content (e.g. Git backend). *) 45 - let content_to_json content = 46 - match Atp.Dagcbor.decode_string ~cid_format:`Atproto content with 47 - | v -> ipld_to_json v 48 - | exception (Failure _ | Invalid_argument _) -> ( 49 - match Atp.Dagcbor.decode_string content with 50 - | v -> ipld_to_json v 51 - | exception (Failure _ | Invalid_argument _) -> 52 - Fmt.str {|"%s"|} (json_escape content)) 53 - 54 - (** {1 Route helpers} *) 55 - 56 - let param key params = List.assoc_opt key params 57 - 58 - let require_param key params = 59 - match param key params with 60 - | Some v -> v 61 - | None -> 62 - raise (Invalid_argument (Fmt.str "missing required parameter: %s" key)) 63 - 64 - (** {1 Individual XRPC handlers} *) 65 - 66 - let handle_describe_server ~did _params = 67 - Xrpc_server.Json 68 - (Fmt.str {|{"did":"%s","availableUserDomains":[],"links":{}}|} 69 - (json_escape did)) 70 - 71 - let handle_describe_repo ~did ~tree ~tree_list params = 72 - let _repo_did = require_param "repo" params in 73 - match tree () with 74 - | None -> 75 - Xrpc_server.Json 76 - (Fmt.str 77 - {|{"handle":"","did":"%s","didDoc":{},"collections":[],"handleIsCorrect":false}|} 78 - (json_escape did)) 79 - | Some t -> 80 - let entries = tree_list t [] in 81 - let collections = 82 - List.filter_map 83 - (fun (name, typ) -> 84 - match typ with `Node -> Some name | `Contents -> None) 85 - entries 86 - in 87 - let colls_json = 88 - "[" 89 - ^ String.concat "," 90 - (List.map (fun c -> Fmt.str {|"%s"|} (json_escape c)) collections) 91 - ^ "]" 92 - in 93 - Xrpc_server.Json 94 - (Fmt.str 95 - {|{"handle":"","did":"%s","didDoc":{},"collections":%s,"handleIsCorrect":false}|} 96 - (json_escape did) colls_json) 97 - 98 - let handle_get_record ~did ~tree ~tree_find params = 99 - let _repo = require_param "repo" params in 100 - let collection = require_param "collection" params in 101 - let rkey = require_param "rkey" params in 102 - match tree () with 103 - | None -> Xrpc_server.Error (404, "RecordNotFound", Some "Repository is empty") 104 - | Some t -> ( 105 - let path = [ collection; rkey ] in 106 - match tree_find t path with 107 - | None -> 108 - Xrpc_server.Error 109 - ( 404, 110 - "RecordNotFound", 111 - Some (Fmt.str "Record not found: %s/%s" collection rkey) ) 112 - | Some content -> 113 - let value_json = content_to_json content in 114 - let uri = Fmt.str "at://%s/%s/%s" did collection rkey in 115 - Xrpc_server.Json 116 - (Fmt.str {|{"uri":"%s","value":%s}|} (json_escape uri) value_json)) 117 - 118 - let handle_list_records ~did ~tree ~tree_list ~tree_find params = 119 - let _repo = require_param "repo" params in 120 - let collection = require_param "collection" params in 121 - let limit = 122 - match param "limit" params with 123 - | Some s -> ( match int_of_string_opt s with Some n -> n | None -> 50) 124 - | None -> 50 125 - in 126 - let cursor = param "cursor" params in 127 - match tree () with 128 - | None -> Xrpc_server.Json {|{"records":[],"cursor":""}|} 129 - | Some t -> 130 - let entries = tree_list t [ collection ] in 131 - let records = 132 - List.filter_map 133 - (fun (name, typ) -> 134 - match typ with `Contents -> Some name | `Node -> None) 135 - entries 136 - in 137 - let records = 138 - match cursor with 139 - | None -> records 140 - | Some c -> 141 - let rec skip = function 142 - | [] -> [] 143 - | r :: rest -> if r = c then rest else skip rest 144 - in 145 - skip records 146 - in 147 - let records = List.filteri (fun i _ -> i < limit) records in 148 - let record_jsons = 149 - List.filter_map 150 - (fun rkey -> 151 - match tree_find t [ collection; rkey ] with 152 - | None -> None 153 - | Some content -> 154 - let value_json = content_to_json content in 155 - let uri = Fmt.str "at://%s/%s/%s" did collection rkey in 156 - Some 157 - (Fmt.str {|{"uri":"%s","value":%s}|} (json_escape uri) 158 - value_json)) 159 - records 160 - in 161 - let next_cursor = 162 - match List.rev records with [] -> "" | last :: _ -> last 163 - in 164 - let records_json = "[" ^ String.concat "," record_jsons ^ "]" in 165 - Xrpc_server.Json 166 - (Fmt.str {|{"records":%s,"cursor":"%s"}|} records_json 167 - (json_escape next_cursor)) 168 - 169 - let handle_get_repo ~pds params = 170 - let _did = require_param "did" params in 171 - match pds with 172 - | Some pds_store -> 173 - let car_data = Pds.export_car pds_store in 174 - Xrpc_server.Binary (car_data, "application/vnd.ipld.car") 175 - | None -> 176 - Xrpc_server.Error 177 - ( 400, 178 - "MethodNotSupported", 179 - Some "CAR export not supported for this backend" ) 180 - 181 - let handle_get_blob ~pds params = 182 - let _did = require_param "did" params in 183 - let cid_str = require_param "cid" params in 184 - match pds with 185 - | Some pds_store -> ( 186 - let cid = Atp.Cid.of_string cid_str in 187 - match Pds.blob pds_store cid with 188 - | Some data -> Xrpc_server.Binary (data, "application/octet-stream") 189 - | None -> Xrpc_server.Error (404, "BlobNotFound", Some "Blob not found")) 190 - | None -> 191 - Xrpc_server.Error 192 - ( 400, 193 - "MethodNotSupported", 194 - Some "Blob storage not supported for this backend" ) 195 - 196 - (** Build XRPC routes from a store. *) 197 - let routes ~(store : Irmin.t) ~branch ~did ~(pds : Pds.t option) = 198 - let tree () = Irmin.checkout store ~branch in 199 - Xrpc_server. 200 - [ 201 - { 202 - nsid = "com.atproto.server.describeServer"; 203 - handler = handle_describe_server ~did; 204 - }; 205 - { 206 - nsid = "com.atproto.repo.describeRepo"; 207 - handler = handle_describe_repo ~did ~tree ~tree_list:Irmin.Tree.list; 208 - }; 209 - { 210 - nsid = "com.atproto.repo.getRecord"; 211 - handler = handle_get_record ~did ~tree ~tree_find:Irmin.Tree.find; 212 - }; 213 - { 214 - nsid = "com.atproto.repo.listRecords"; 215 - handler = 216 - handle_list_records ~did ~tree ~tree_list:Irmin.Tree.list 217 - ~tree_find:Irmin.Tree.find; 218 - }; 219 - { nsid = "com.atproto.sync.getRepo"; handler = handle_get_repo ~pds }; 220 - { nsid = "com.atproto.sync.getBlob"; handler = handle_get_blob ~pds }; 221 - ] 222 - 223 - let run ~repo ~branch ~port ~did ~format:_ = 224 - let config = Config.load ~repo () in 225 - Eio_main.run @@ fun env -> 226 - let fs = Eio.Stdenv.cwd env in 227 - let net = Eio.Stdenv.net env in 228 - Eio.Switch.run @@ fun sw -> 229 - let store = Common.open_store ~sw ~fs ~config in 230 - 231 - (* If PDS backend, get the PDS handle for blob/CAR support *) 232 - let pds = 233 - match config.backend with 234 - | Config.Pds -> 235 - let path = Eio.Path.(fs / config.store_path) in 236 - Some (Pds.open_ ~sw path) 237 - | _ -> None 238 - in 239 - 240 - (* Resolve DID: CLI flag > PDS metadata > placeholder *) 241 - let did = 242 - match did with 243 - | Some d -> d 244 - | None -> ( 245 - match pds with 246 - | Some p -> Atp.Did.to_string (Pds.did p) 247 - | None -> "did:web:localhost") 248 - in 249 - 250 - let routes = routes ~store ~branch ~did ~pds in 251 - let server = Xrpc_server.v ~routes in 252 - let on_listen actual_port = 253 - Fmt.pr "Serving %a store at http://localhost:%d/xrpc/...@." 254 - Config.pp_backend config.backend actual_port; 255 - Fmt.pr " DID: %s@." did; 256 - Fmt.pr " Backend: %a@." Config.pp_backend config.backend; 257 - Fmt.pr " Branch: %s@." branch 258 - in 259 - Xrpc_server.run server ~net ~port ~sw ~on_listen 4 + let run ~repo ~branch ~port ~did ~format = 5 + ignore (repo, branch, port, did, format); 6 + Common.error "serve: not yet implemented with new Schema API"; 7 + Common.error 8 + " (needs XRPC route handlers rewritten for cursor-based navigation)"
+27 -13
bin/cmd_set.ml
··· 1 1 (** Set command. *) 2 2 3 + module S = Common.S 4 + 3 5 let run ~repo ~branch ~message path content = 4 6 let content = 5 7 match content with Some c -> c | None -> In_channel.(input_all stdin) 6 8 in 7 - let message = match message with Some m -> m | None -> "Set " ^ path in 9 + let _message = match message with Some m -> m | None -> "Set " ^ path in 8 10 let config = Config.load ~repo () in 9 11 Eio_main.run @@ fun env -> 10 12 let fs = Eio.Stdenv.cwd env in 11 13 Eio.Switch.run @@ fun sw -> 12 - let store = Common.open_store ~sw ~fs ~config in 13 - let tree = 14 - match Irmin.checkout store ~branch with 15 - | None -> Irmin.Tree.empty 16 - | Some t -> t 14 + let heap = Common.open_store ~sw ~fs ~config in 15 + let c = 16 + match Common.checkout heap ~branch with 17 + | None -> S.empty heap Irmin_git.tree 18 + | Some c -> c 17 19 in 18 - let tree = Irmin.Tree.add tree (Common.path_of_string path) content in 19 - let parents = 20 - match Irmin.head store ~branch with None -> [] | Some h -> [ h ] 20 + (* Navigate to the parent, creating path segments as needed *) 21 + let steps = Common.path_of_string path in 22 + let parent_steps = 23 + match List.rev steps with _ :: r -> List.rev r | [] -> [] 21 24 in 22 - let hash = 23 - Irmin.commit store ~tree ~parents ~message ~author:"irmin <irmin@local>" 25 + let name = match List.rev steps with n :: _ -> n | [] -> path in 26 + (* Navigate to the parent path *) 27 + let parent_c = 28 + match parent_steps with [] -> Some c | _ -> Common.navigate c parent_steps 24 29 in 25 - Irmin.set_head store ~branch hash; 26 - Common.success "%a" Common.styled_faint (Irmin.Hash.short hash) 30 + let c = 31 + match parent_c with 32 + | Some pc -> S.set pc name content 33 + | None -> 34 + (* Parent path doesn't exist yet; set at the top level *) 35 + S.set c name content 36 + in 37 + let new_hash = S.flush c heap in 38 + S.set_head heap ~branch new_hash; 39 + Common.success "%a" Common.styled_faint 40 + (Fmt.str "%a" Irmin.Hash.pp_short new_hash)
+41 -26
bin/cmd_tree.ml
··· 1 1 (** Tree command. *) 2 2 3 + module S = Common.S 4 + 3 5 let run ~repo ~branch ~output path = 4 6 let config = Config.load ~repo () in 5 7 Eio_main.run @@ fun env -> 6 8 let fs = Eio.Stdenv.cwd env in 7 9 Eio.Switch.run @@ fun sw -> 8 - let store = Common.open_store ~sw ~fs ~config in 9 - match Irmin.checkout store ~branch with 10 + let heap = Common.open_store ~sw ~fs ~config in 11 + match Common.checkout heap ~branch with 10 12 | None -> 11 13 Common.error "Branch %a not found" Common.styled_cyan branch; 12 14 1 13 - | Some tree -> 14 - let start_path = 15 + | Some root_c -> ( 16 + let start = 15 17 match path with None -> [] | Some p -> Common.path_of_string p 16 18 in 17 - let rec walk indent path = 18 - let entries = Irmin.Tree.list tree path in 19 - List.iter 20 - (fun (name, kind) -> 21 - let full_path = path @ [ name ] in 22 - match kind with 23 - | `Contents -> ( 24 - match output with 25 - | `Human -> Fmt.pr "%s%s@." indent name 26 - | `Json -> 27 - Fmt.pr {|{"path":%S,"type":"file"}@.|} 28 - (String.concat "/" full_path)) 29 - | `Node -> 30 - (match output with 31 - | `Human -> Fmt.pr "%s%a/@." indent Common.styled_blue name 32 - | `Json -> 33 - Fmt.pr {|{"path":%S,"type":"dir"}@.|} 34 - (String.concat "/" full_path)); 35 - walk (indent ^ " ") full_path) 36 - entries 19 + (* Navigate to the start path *) 20 + let start_c = 21 + match start with [] -> Some root_c | _ -> Common.navigate root_c start 37 22 in 38 - walk "" start_path; 39 - 0 23 + match start_c with 24 + | None -> 25 + Common.error "Path %a not found" Common.styled_cyan 26 + (Option.value ~default:"/" path); 27 + 1 28 + | Some c -> 29 + let rec walk indent prefix c = 30 + let entries = S.list c in 31 + List.iter 32 + (fun (name, kind) -> 33 + let full_path = prefix @ [ name ] in 34 + match kind with 35 + | `Leaf -> ( 36 + match output with 37 + | `Human -> Fmt.pr "%s%s@." indent name 38 + | `Json -> 39 + Fmt.pr {|{"path":%S,"type":"file"}@.|} 40 + (String.concat "/" full_path)) 41 + | `Node -> ( 42 + (match output with 43 + | `Human -> Fmt.pr "%s%a/@." indent Common.styled_blue name 44 + | `Json -> 45 + Fmt.pr {|{"path":%S,"type":"dir"}@.|} 46 + (String.concat "/" full_path)); 47 + (* Navigate into child using Git's entry -> target pattern *) 48 + match Common.navigate c [ name ] with 49 + | Some child_c -> walk (indent ^ " ") full_path child_c 50 + | None -> ())) 51 + entries 52 + in 53 + walk "" start c; 54 + 0)
+34 -9
bin/common.ml
··· 1 1 (** Common CLI helpers. *) 2 2 3 - open Irmin 4 - 5 3 (* Output helpers using Tty *) 6 4 let success fmt = 7 5 let style = Tty.Style.(fg Tty.Color.green) in ··· 20 18 let path_of_string s = 21 19 String.split_on_char '/' s |> List.filter (fun s -> s <> "") 22 20 23 - let open_store ~sw ~fs ~(config : Config.t) : t = 21 + (* ===== Store ===== *) 22 + 23 + module S = Irmin_git.S 24 + 25 + type store = (Irmin.Hash.sha1, string, unit) Irmin.Heap.t 26 + 27 + let open_store ~sw ~fs ~(config : Config.t) : store = 24 28 match config.backend with 25 - | Config.Git -> Git.open_ ~sw ~fs ~path:(Fpath.v config.store_path) 26 - | Config.Pds -> 27 - let path = Eio.Path.(fs / config.store_path) in 28 - Atproto.(of_pds (Pds.open_ ~sw path) |> v) 29 - | Config.Memory -> Atproto.(memory () |> v) 30 - | Config.Disk -> Atproto.(disk ~sw Eio.Path.(fs / config.store_path) |> v) 29 + | Config.Git -> Irmin_git.open_ ~sw ~fs ~path:(Fpath.v config.store_path) 30 + | Config.Pds | Config.Memory | Config.Disk -> 31 + failwith "only Git backend supported in CLI" 32 + 33 + (* ===== Git tree navigation ===== *) 34 + 35 + let checkout heap ~branch = 36 + match S.head heap ~branch with 37 + | None -> None 38 + | Some h -> Some (S.at heap Irmin_git.tree h) 39 + 40 + (** Navigate a Git tree, following entry → target links at each step. *) 41 + let rec navigate c = function 42 + | [] -> Some c 43 + | name :: rest -> ( 44 + match S.step c name with 45 + | None -> None 46 + | Some entry -> ( 47 + match S.step entry "target" with 48 + | None -> None 49 + | Some target -> navigate target rest)) 50 + 51 + let find_content c path = 52 + match navigate c path with None -> None | Some c -> S.get c 53 + 54 + let list_children c path = 55 + match navigate c path with None -> [] | Some c -> S.list c
+3
bin/dune
··· 3 3 (public_name irmin) 4 4 (libraries 5 5 irmin 6 + irmin_git 7 + digestif 8 + git 6 9 fpath 7 10 atp 8 11 atp-xrpc-server
+1
dune-project
··· 33 33 (wal (>= 0.1)) 34 34 (bloom (>= 0.1)) 35 35 (cbort (>= 0.1)) 36 + (merge3 (>= 0.1)) 36 37 (base64 (>= 3.5)) 37 38 (alcotest :with-test) 38 39 (crowbar :with-test)))
+1
irmin.opam
··· 27 27 "wal" {>= "0.1"} 28 28 "bloom" {>= "0.1"} 29 29 "cbort" {>= "0.1"} 30 + "merge3" {>= "0.1"} 30 31 "base64" {>= "3.5"} 31 32 "alcotest" {with-test} 32 33 "crowbar" {with-test}
+93 -359
lib/atproto/irmin_atproto.ml
··· 1 1 (** ATProto backend for Irmin. 2 2 3 - Uses the real ATProto MST format. Lexicons determine which paths are 4 - collections (nodes) vs records (contents). *) 3 + Maps ATProto repositories to Irmin heaps. Records are DAG-CBOR blocks 4 + navigable via {!Irmin.Schema}. The MST structure is accessed through 5 + {!mst_store} and the [Mst] library. *) 5 6 6 - (* ===== Schema: runtime lexicon registry ===== *) 7 + (* ===== Schema instance ===== *) 7 8 8 - module Lexicon = struct 9 - type t = { 10 - collections : (string, string) Hashtbl.t; 11 - (* nsid → key_type ("tid", "any", etc) *) 12 - } 13 - 14 - let empty = { collections = Hashtbl.create 16 } 15 - 16 - let load json_string registry = 17 - let open Hermest.Lexicon_types in 18 - match Jsont_bytesrw.decode_string' lexicon_doc_jsont json_string with 19 - | Ok doc -> 20 - List.iter 21 - (fun (entry : def_entry) -> 22 - if entry.name = "main" then 23 - match entry.type_def with 24 - | Record r -> Hashtbl.replace registry.collections doc.id r.key 25 - | _ -> ()) 26 - doc.defs; 27 - registry 28 - | Error _ -> registry 29 - 30 - let load_dir _path = empty (* TODO *) 31 - let is_collection t nsid = Hashtbl.mem t.collections nsid 32 - let record_key_type t nsid = Hashtbl.find_opt t.collections nsid 33 - end 34 - 35 - (* ===== Contents ===== *) 36 - 37 - module Contents = struct 38 - type t = string 9 + module S = Irmin.Schema.Make (struct 39 10 type hash = Atp.Cid.t 40 - 41 - let encode v ~eod w = 42 - Bytesrw.Bytes.Writer.write_string w v; 43 - if eod then Bytesrw.Bytes.Writer.write w Bytesrw.Bytes.Slice.eod 44 - 45 - let decode r = 46 - let buf = Buffer.create 256 in 47 - (try 48 - while true do 49 - let s = Bytesrw.Bytes.Reader.read r in 50 - if Bytesrw.Bytes.Slice.is_eod s then raise Exit 51 - else Buffer.add_string buf (Bytesrw.Bytes.Slice.to_string s) 52 - done 53 - with Exit -> ()); 54 - Ok (Buffer.contents buf) 55 - 56 - let hash data = Atp.Cid.v `Dag_cbor data 57 - let merge = Irmin.Contents.Last_writer_wins 58 - let equal = String.equal 59 - end 60 - 61 - (* ===== Node: MST-backed with schema-driven structure ===== *) 62 - 63 - module Node = struct 64 - type hash = Atp.Cid.t 65 - type step = string 66 - type value = [ `Contents of hash | `Node of hash ] 67 - 68 - (* A node is a view over an MST at a certain prefix depth. 69 - The node carries the block store and schema so find/list can 70 - traverse the MST and decide what's a collection vs record. *) 71 - type t = { 72 - mst : Atp.Mst.node; 73 - store : Atp.Blockstore.writable; 74 - prefix : string; 75 - schema : Lexicon.t; 76 - } 77 - 78 - let empty = 79 - { 80 - mst = Atp.Mst.empty; 81 - store = Atp.Blockstore.memory (); 82 - prefix = ""; 83 - schema = Lexicon.empty; 84 - } 85 - 86 - let is_empty t = 87 - let bs = (t.store :> Atp.Blockstore.readable) in 88 - match Atp.Mst.leaves t.mst ~store:bs () with 89 - | Seq.Nil -> true 90 - | Seq.Cons _ -> false 91 - 92 - let step_is_collection t step = 93 - if t.prefix = "" then Lexicon.is_collection t.schema step else false 94 - 95 - let find t step = 96 - let bs = (t.store :> Atp.Blockstore.readable) in 97 - if step_is_collection t step then begin 98 - let lo = t.prefix ^ step ^ "/" in 99 - let hi_b = Bytes.of_string lo in 100 - let last = Bytes.length hi_b - 1 in 101 - Bytes.set_uint8 hi_b last (Bytes.get_uint8 hi_b last + 1); 102 - let hi = Bytes.to_string hi_b in 103 - match Atp.Mst.range t.mst ~store:bs ~lo ~hi () with 104 - | Seq.Nil -> None 105 - | Seq.Cons _ -> 106 - (* Build a sub-MST CID for this collection *) 107 - let sub_store = Atp.Blockstore.memory () in 108 - Atp.Mst.range t.mst ~store:bs ~lo ~hi 109 - |> Seq.iter (fun (k, cid) -> 110 - let rkey = 111 - String.sub k (String.length lo) 112 - (String.length k - String.length lo) 113 - in 114 - ignore (Atp.Mst.add rkey cid Atp.Mst.empty ~store:sub_store)); 115 - (* TODO: this is wrong — we need to build a proper sub-MST *) 116 - None 117 - end 118 - else begin 119 - let key = t.prefix ^ step in 120 - match Atp.Mst.find key t.mst ~store:bs with 121 - | Some cid -> Some (`Contents cid) 122 - | None -> None 123 - end 11 + type block = string 124 12 125 - let add t step value = 126 - match value with 127 - | `Contents cid -> 128 - let key = t.prefix ^ step in 129 - let mst = Atp.Mst.add key cid t.mst ~store:t.store in 130 - { t with mst } 131 - | `Node _cid -> t (* TODO: merge sub-MST *) 132 - 133 - let remove t step = 134 - let key = t.prefix ^ step in 135 - let mst = Atp.Mst.remove key t.mst ~store:t.store in 136 - { t with mst } 137 - 138 - let list t = 139 - let bs = (t.store :> Atp.Blockstore.readable) in 140 - let plen = String.length t.prefix in 141 - let seen = Hashtbl.create 16 in 142 - let out = ref [] in 143 - Atp.Mst.leaves t.mst ~store:bs 144 - |> Seq.iter (fun (k, _cid) -> 145 - if String.length k > plen && (plen = 0 || String.sub k 0 plen = t.prefix) 146 - then begin 147 - let rest = String.sub k plen (String.length k - plen) in 148 - let name = 149 - match String.index_opt rest '/' with 150 - | Some i -> String.sub rest 0 i 151 - | None -> rest 152 - in 153 - if name <> "" && not (Hashtbl.mem seen name) then begin 154 - Hashtbl.replace seen name (); 155 - let kind = 156 - if step_is_collection t name then `Node (Atp.Cid.empty `Dag_cbor) 157 - else `Contents (Atp.Cid.empty `Dag_cbor) 158 - in 159 - out := (name, kind) :: !out 160 - end 161 - end); 162 - List.sort (fun (a, _) (b, _) -> String.compare a b) !out 163 - 164 - let encode _t ~eod:_ _w = () 165 - let decode _r = Error (`Msg "ATProto Node.decode: use of_hash") 166 - let hash t = Atp.Mst.to_cid t.mst ~store:t.store 167 - 168 - (* Load by reading the MST root and eagerly flattening all entries. 169 - For the root-level node, entries are "collection/rkey → CID". 170 - For a collection-level node, entries are "rkey → CID". *) 171 - let load _read_block _h = None 172 13 let hash_equal = Atp.Cid.equal 173 - let hash_compare = Atp.Cid.compare 174 - let hash_to_hex = Atp.Cid.to_string 175 - 176 - let hash_of_hex s = 177 - try Ok (Atp.Cid.of_string s) with _ -> Error (`Msg ("bad CID: " ^ s)) 178 - 179 - let hash_to_bytes = Atp.Cid.to_raw_bytes 180 - 181 - type commit = { 182 - tree : hash; 183 - parents : hash list; 184 - author : string; 185 - message : string; 186 - } 14 + let hash_block data = Atp.Cid.v `Dag_cbor data 15 + end) 187 16 188 - let commit_v ~tree ~parents ~author ~message () = 189 - { tree; parents; author; message } 17 + (* ===== DAG-CBOR record codec ===== *) 190 18 191 - let commit_tree c = c.tree 192 - let commit_parents c = c.parents 193 - let commit_author c = c.author 194 - let commit_message c = c.message 19 + let record_parse : S.dec = 20 + fun data -> 21 + match Atp.Dagcbor.decode_string ~cid_format:`Atproto data with 22 + | `Map entries -> 23 + S.Named 24 + (List.map 25 + (fun (name, v) -> 26 + let child_data = 27 + Atp.Dagcbor.encode_string ~cid_format:`Atproto v 28 + in 29 + match v with 30 + | `Link cid -> (name, (`Link cid : S.child)) 31 + | _ -> (name, `Inline child_data)) 32 + entries) 33 + | `List items -> 34 + S.Indexed 35 + (Array.of_list 36 + (List.map 37 + (fun v -> 38 + let data = Atp.Dagcbor.encode_string ~cid_format:`Atproto v in 39 + match v with 40 + | `Link cid -> (`Link cid : S.child) 41 + | _ -> `Inline data) 42 + items)) 43 + | _ -> S.Named [] 195 44 196 - let commit_encode c ~eod:_ w = 197 - let v : Atp.Dagcbor.value = 198 - `Map 199 - [ 200 - ("author", `String c.author); 201 - ("message", `String c.message); 202 - ("parents", `List (List.map (fun cid -> `Link cid) c.parents)); 203 - ("tree", `Link c.tree); 204 - ] 205 - in 206 - Bytesrw.Bytes.Writer.write_string w 207 - (Atp.Dagcbor.encode_string ~cid_format:`Atproto v) 45 + let record_serialize : S.enc = function 46 + | S.Named children -> 47 + let entries = 48 + List.map 49 + (fun (name, child) -> 50 + let v = 51 + match child with 52 + | `Link cid -> `Link cid 53 + | `Inline data -> 54 + Atp.Dagcbor.decode_string ~cid_format:`Atproto data 55 + in 56 + (name, v)) 57 + children 58 + in 59 + Atp.Dagcbor.encode_string ~cid_format:`Atproto (`Map entries) 60 + | S.Indexed children -> 61 + let items = 62 + Array.to_list 63 + (Array.map 64 + (fun child -> 65 + match child with 66 + | `Link cid -> `Link cid 67 + | `Inline data -> 68 + Atp.Dagcbor.decode_string ~cid_format:`Atproto data) 69 + children) 70 + in 71 + Atp.Dagcbor.encode_string ~cid_format:`Atproto (`List items) 208 72 209 - let commit_decode r = 210 - let buf = Buffer.create 256 in 211 - (try 212 - while true do 213 - let s = Bytesrw.Bytes.Reader.read r in 214 - if Bytesrw.Bytes.Slice.is_eod s then raise Exit 215 - else Buffer.add_string buf (Bytesrw.Bytes.Slice.to_string s) 216 - done 217 - with Exit -> ()); 218 - try 219 - match 220 - Atp.Dagcbor.decode_string ~cid_format:`Atproto (Buffer.contents buf) 221 - with 222 - | `Map entries -> 223 - let find k = List.assoc_opt k entries in 224 - let tree = 225 - match find "tree" with 226 - | Some (`Link cid) -> cid 227 - | _ -> failwith "missing tree" 228 - in 229 - let parents = 230 - match find "parents" with 231 - | Some (`List l) -> 232 - List.filter_map (function `Link cid -> Some cid | _ -> None) l 233 - | _ -> [] 234 - in 235 - let author = 236 - match find "author" with Some (`String s) -> s | _ -> "" 237 - in 238 - let message = 239 - match find "message" with Some (`String s) -> s | _ -> "" 240 - in 241 - Ok { tree; parents; author; message } 242 - | _ -> Error (`Msg "commit: expected map") 243 - with exn -> Error (`Msg (Printexc.to_string exn)) 73 + let ( => ) = S.( => ) 244 74 245 - let commit_hash c = 246 - let buf = Buffer.create 256 in 247 - let w = Bytesrw.Bytes.Writer.of_buffer buf in 248 - commit_encode c ~eod:false w; 249 - Atp.Cid.v `Dag_cbor (Buffer.contents buf) 250 - end 75 + let record = 76 + S.fix (fun self -> 77 + S.node ~dec:record_parse ~enc:record_serialize [ "*" => self ]) 251 78 252 - (* ===== Storage ===== *) 79 + (* ===== Heap backend ===== *) 253 80 254 - module Block = struct 81 + module Heap_backend : 82 + Irmin.Heap.BACKEND 83 + with type t = Atp.Blockstore.writable 84 + and type hash = Atp.Cid.t 85 + and type block = string = struct 255 86 type t = Atp.Blockstore.writable 256 87 type hash = Atp.Cid.t 88 + type block = string 257 89 258 - let read t h = t#get h 259 - let write t h data = t#put h data 90 + let find t h = t#get h 91 + let put t h d = t#put h d 260 92 let mem t h = t#has h 261 93 let batch t l = List.iter (fun (h, d) -> t#put h d) l 262 - let flush _ = () 94 + let find_ref _ _ = None 95 + let set_ref _ _ _ = () 96 + let del_ref _ _ = () 97 + let list_refs _ = [] 98 + let cas_ref _ _ ~test:_ ~set:_ = false 99 + let flush t = t#sync 263 100 let close _ = () 264 101 end 265 102 266 - module Branch = struct 267 - type t = (string, Atp.Cid.t) Hashtbl.t 268 - type hash = Atp.Cid.t 269 - 270 - let get t name = Hashtbl.find_opt t name 271 - let set t name h = Hashtbl.replace t name h 272 - 273 - let test_and_set t name ~test ~set = 274 - let current = Hashtbl.find_opt t name in 275 - let matches = 276 - match (test, current) with 277 - | None, None -> true 278 - | Some t, Some c -> Atp.Cid.equal t c 279 - | _ -> false 280 - in 281 - if matches then begin 282 - (match set with 283 - | None -> Hashtbl.remove t name 284 - | Some h -> Hashtbl.replace t name h); 285 - true 286 - end 287 - else false 103 + module H = Irmin.Heap.Make (Heap_backend) 288 104 289 - let remove t name = Hashtbl.remove t name 105 + let heap bs = H.v bs 290 106 291 - let list t = 292 - Hashtbl.fold (fun k _ acc -> k :: acc) t [] |> List.sort String.compare 293 - end 107 + (* ===== MST bridge ===== *) 294 108 295 - (* ===== Backend ===== *) 109 + let mst_store (heap : (Atp.Cid.t, string, _) Irmin.Heap.t) : Atp.Cid.t Mst.store 110 + = 111 + { 112 + get = Irmin.Heap.find heap; 113 + put = (fun h d -> Irmin.Heap.put heap h d); 114 + has = Irmin.Heap.mem heap; 115 + } 296 116 297 - type t = { 298 - store : Atp.Blockstore.writable; 299 - branches : Branch.t; 300 - schema : Lexicon.t; 301 - } 117 + (* ===== Convenience ===== *) 302 118 303 - module B : 304 - Irmin.Backend.S 305 - with type t = t 306 - and type hash = Atp.Cid.t 307 - and type block = string 308 - and module Block = Block 309 - and module Branch = Branch = struct 310 - type nonrec t = t 311 - type hash = Atp.Cid.t 312 - type block = string 313 - 314 - module Contents = Contents 315 - module Node = Node 316 - module Block = Block 317 - module Branch = Branch 318 - 319 - let blocks t = t.store 320 - let branches t = t.branches 321 - let tree_schema = Irmin.Schema.P Schema.repo 322 - 323 - module Tree_codec = Codec.C 324 - 325 - let heap t = Heap.v t.store 326 - 327 - let prove t ~tree_root ~key = 328 - let real_heap = heap t in 329 - let recording_heap, get_recorded = Irmin.Heap.recording real_heap in 330 - (* MST search: traverse the MST to find the full key *) 331 - let full_key = String.concat "/" key in 332 - let bs_get h = Irmin.Heap.find recording_heap h in 333 - (* Manual MST search following the tree structure *) 334 - let rec search_mst cid = 335 - match bs_get cid with 336 - | None -> None 337 - | Some data -> ( 338 - try 339 - let node = Atp.Mst.Raw.decode_bytes data in 340 - let entries = 341 - let rec loop prev acc = function 342 - | [] -> List.rev acc 343 - | (e : Atp.Mst.Raw.entry) :: rest -> 344 - let k = String.sub prev 0 e.p ^ e.k in 345 - loop k ((k, e) :: acc) rest 346 - in 347 - loop "" [] node.e 348 - in 349 - (* Binary search through entries *) 350 - let rec find_in_entries left_sub = function 351 - | [] -> 352 - (* Past all entries — check last subtree *) 353 - Option.bind left_sub search_mst 354 - | (k, (e : Atp.Mst.Raw.entry)) :: rest -> 355 - let cmp = String.compare full_key k in 356 - if cmp = 0 then 357 - (* Found the key — value is the record CID *) 358 - bs_get e.v 359 - else if cmp < 0 then 360 - (* Key is before this entry — search left subtree *) 361 - Option.bind left_sub search_mst 362 - else 363 - (* Key is after this entry — continue with right subtree *) 364 - find_in_entries e.t rest 365 - in 366 - find_in_entries node.l entries 367 - with _ -> None) 368 - in 369 - let value = search_mst tree_root in 370 - let blocks = get_recorded () in 371 - let root_bytes = Atp.Cid.to_raw_bytes tree_root in 372 - let proof_blocks = 373 - List.map (fun (h, data) -> (Atp.Cid.to_raw_bytes h, data)) blocks 374 - in 375 - Ok ((root_bytes, proof_blocks), value) 376 - end 377 - 378 - module S = Irmin.Store.Make (B) 379 - 380 - let memory ~schema () : t Irmin.Store.t = 381 - S.v { store = Atp.Blockstore.memory (); branches = Hashtbl.create 4; schema } 382 - 383 - (* Re-export submodules *) 384 - module Schema = Schema 385 - module Codec = Codec 386 - module Heap = Heap 119 + let memory () = heap (Atp.Blockstore.memory ()) 120 + let filesystem path = heap (Atp.Blockstore.filesystem path)
+34 -10
lib/cbor/irmin_cbor.ml
··· 3 3 Parses CBOR maps and arrays from [string] blocks. Uses the cbort library. 4 4 Maps become named children, arrays become indexed children. *) 5 5 6 - module S = Irmin.Schema.Make (struct 7 - type hash = Digestif.SHA256.t 8 - type block = string 9 - 10 - let hash_equal = Digestif.SHA256.equal 11 - let hash_block data = Digestif.SHA256.digest_string data 12 - end) 6 + module S = Irmin.SHA256 13 7 14 - let parse : S.parse = 8 + let parse : S.dec = 15 9 fun block -> 16 10 match Cbort.decode_string Cbort.any block with 17 11 | Error _ -> S.Named [] ··· 39 33 pairs) 40 34 | None -> S.Named [])) 41 35 42 - let serialize : S.serialize = fun _ -> "" 36 + let serialize : S.enc = function 37 + | S.Named children -> 38 + let pairs = 39 + List.filter_map 40 + (fun (name, child) -> 41 + let data = 42 + match child with `Inline data -> data | `Link _ -> "" 43 + in 44 + match Cbort.decode_string Cbort.any data with 45 + | Ok v -> Some (Cbort.Cbor.string name, v) 46 + | Error _ -> None) 47 + children 48 + in 49 + Cbort.encode_string Cbort.any (Cbort.Cbor.map pairs) 50 + | S.Indexed children -> 51 + let items = 52 + Array.to_list 53 + (Array.map 54 + (fun child -> 55 + let data = 56 + match child with `Inline data -> data | `Link _ -> "" 57 + in 58 + match Cbort.decode_string Cbort.any data with 59 + | Ok v -> v 60 + | Error _ -> Cbort.Cbor.map []) 61 + children) 62 + in 63 + Cbort.encode_string Cbort.any (Cbort.Cbor.array items) 64 + 43 65 let ( => ) = S.( => ) 44 - let schema : S.t = S.fix (fun self -> S.node ~parse ~serialize [ "*" => self ]) 66 + 67 + let schema : S.t = 68 + S.fix (fun self -> S.node ~dec:parse ~enc:serialize [ "*" => self ])
+6 -6
lib/git/irmin_git.ml
··· 21 21 22 22 (* ===== Parse functions ===== *) 23 23 24 - let tree_parse : S.parse = 24 + let tree_parse : S.dec = 25 25 fun data -> 26 26 match Git.Tree.of_string data with 27 27 | Ok tree -> ··· 31 31 (entry.name, `Inline (Git.Tree.to_string (Git.Tree.v [ entry ]))))) 32 32 | Error _ -> S.Named [] 33 33 34 - let entry_parse : S.parse = 34 + let entry_parse : S.dec = 35 35 fun data -> 36 36 match Git.Tree.of_string data with 37 37 | Ok tree -> ( ··· 47 47 48 48 (* ===== Domain combinators ===== *) 49 49 50 - let tree_serialize : S.serialize = function 50 + let tree_serialize : S.enc = function 51 51 | S.Named children -> 52 52 let entries = 53 53 List.filter_map ··· 66 66 Git.Tree.to_string (Git.Tree.v entries) 67 67 | S.Indexed _ -> "" 68 68 69 - let entry_serialize : S.serialize = function 69 + let entry_serialize : S.enc = function 70 70 | S.Named children -> ( 71 71 let mode = List.assoc_opt "mode" children in 72 72 let target = List.assoc_opt "target" children in ··· 79 79 | S.Indexed _ -> "" 80 80 81 81 let ( => ) = S.( => ) 82 - let directory rules = S.node ~parse:tree_parse ~serialize:tree_serialize rules 83 - let entry rules = S.node ~parse:entry_parse ~serialize:entry_serialize rules 82 + let directory rules = S.node ~dec:tree_parse ~enc:tree_serialize rules 83 + let entry rules = S.node ~dec:entry_parse ~enc:entry_serialize rules 84 84 85 85 (* ===== Schemas ===== *) 86 86
+80
lib/irmin.ml
··· 1 1 module Hash = Hash 2 2 module Heap = Heap 3 3 module Schema = Schema 4 + 5 + module SHA1 = Schema.Make (struct 6 + type hash = Digestif.SHA1.t 7 + type block = string 8 + 9 + let hash_equal = Digestif.SHA1.equal 10 + let hash_block data = Digestif.SHA1.digest_string data 11 + end) 12 + 13 + module SHA256 = Schema.Make (struct 14 + type hash = Digestif.SHA256.t 15 + type block = string 16 + 17 + let hash_equal = Digestif.SHA256.equal 18 + let hash_block data = Digestif.SHA256.digest_string data 19 + end) 20 + 21 + (** Merge combinators. 22 + 23 + Each combinator lifts a typed merge function into a block-level merge by 24 + wrapping it with encode/decode. The block type depends on the codec (string 25 + for Git, bytes for others). *) 26 + module Merge = struct 27 + (** [v ~decode ~encode f] lifts a typed merge function [f] into a block-level 28 + merge using [decode] and [encode] for the block representation. *) 29 + let v (type a b) ~(decode : b -> a) ~(encode : a -> b) 30 + (f : ancestor:a -> a -> a -> a) : 31 + ancestor:b -> b -> b -> (b, [ `Conflict of string ]) result = 32 + fun ~ancestor ours theirs -> 33 + Ok (encode (f ~ancestor:(decode ancestor) (decode ours) (decode theirs))) 34 + 35 + let v_result (type a b) ~(decode : b -> a) ~(encode : a -> b) 36 + (f : ancestor:a -> a -> a -> (a, [ `Conflict of string ]) result) : 37 + ancestor:b -> b -> b -> (b, [ `Conflict of string ]) result = 38 + fun ~ancestor ours theirs -> 39 + match f ~ancestor:(decode ancestor) (decode ours) (decode theirs) with 40 + | Ok r -> Ok (encode r) 41 + | Error _ as e -> e 42 + 43 + (** {1 Typed merge functions} *) 44 + 45 + let counter ~ancestor ours theirs = ours + theirs - ancestor 46 + 47 + let counter64 ~ancestor ours theirs = 48 + Int64.sub (Int64.add ours theirs) ancestor 49 + 50 + let lww ~ancestor:_ _ours theirs = theirs 51 + let ours ~ancestor:_ ours _theirs = ours 52 + 53 + let set_union (type a) ~(equal : a -> a -> bool) ~ancestor ours theirs = 54 + let mem x l = List.exists (equal x) l in 55 + let added_o = List.filter (fun x -> not (mem x ancestor)) ours in 56 + let added_t = List.filter (fun x -> not (mem x ancestor)) theirs in 57 + let removed_o = List.filter (fun x -> not (mem x ours)) ancestor in 58 + let removed_t = List.filter (fun x -> not (mem x theirs)) ancestor in 59 + let removed = removed_o @ removed_t in 60 + let base = List.filter (fun x -> not (mem x removed)) ancestor in 61 + let result = base @ added_o in 62 + List.fold_left 63 + (fun acc x -> if mem x acc then acc else acc @ [ x ]) 64 + result added_t 65 + 66 + let text ~ancestor ours theirs = 67 + let chunks = Merge3.merge ~base:ancestor ~ours ~theirs () in 68 + if Merge3.has_conflicts chunks then 69 + Error (`Conflict "concurrent edits to same lines") 70 + else 71 + let buf = Buffer.create (String.length ours) in 72 + List.iter 73 + (function 74 + | Merge3.Resolved lines -> 75 + List.iter 76 + (fun l -> 77 + Buffer.add_string buf l; 78 + Buffer.add_char buf '\n') 79 + lines 80 + | Merge3.Conflict _ -> ()) 81 + chunks; 82 + Ok (Buffer.contents buf) 83 + end
+85 -3
lib/irmin.mli
··· 13 13 end) 14 14 15 15 let tree = S.fix (fun self -> 16 - directory [ "*.json" => git_entry json; "*" => git_entry self ]) 16 + directory [ "*" => entry [ "mode" => S.opaque; "target" => self ] ]) 17 17 18 18 let c = S.at heap tree root in 19 - let (S.Any c) = S.step c "config.json" |> Option.get in 20 - S.get c 19 + match S.step c "config.json" with 20 + | Some c' -> S.get c' 21 + | None -> ... 21 22 ]} *) 22 23 23 24 module Hash = Hash 24 25 module Heap = Heap 25 26 module Schema = Schema 27 + 28 + (** Pre-built schema instances for common hash algorithms. Use these instead of 29 + calling {!Schema.Make} directly when plain SHA-1 or SHA-256 suffices. *) 30 + 31 + module SHA1 : module type of Schema.Make (struct 32 + type hash = Digestif.SHA1.t 33 + type block = string 34 + 35 + let hash_equal = Digestif.SHA1.equal 36 + let hash_block data = Digestif.SHA1.digest_string data 37 + end) 38 + 39 + module SHA256 : module type of Schema.Make (struct 40 + type hash = Digestif.SHA256.t 41 + type block = string 42 + 43 + let hash_equal = Digestif.SHA256.equal 44 + let hash_block data = Digestif.SHA256.digest_string data 45 + end) 46 + 47 + (** Merge combinators. 48 + 49 + Typed merge functions lifted into block-level merges via encode/decode. Use 50 + {!v} to adapt a typed merge to your block representation. *) 51 + module Merge : sig 52 + val v : 53 + decode:('block -> 'a) -> 54 + encode:('a -> 'block) -> 55 + (ancestor:'a -> 'a -> 'a -> 'a) -> 56 + ancestor:'block -> 57 + 'block -> 58 + 'block -> 59 + ('block, [ `Conflict of string ]) result 60 + (** [v ~decode ~encode f] lifts a typed merge [f] into a block-level merge. 61 + The resulting function always resolves (no conflicts). *) 62 + 63 + val v_result : 64 + decode:('block -> 'a) -> 65 + encode:('a -> 'block) -> 66 + (ancestor:'a -> 'a -> 'a -> ('a, [ `Conflict of string ]) result) -> 67 + ancestor:'block -> 68 + 'block -> 69 + 'block -> 70 + ('block, [ `Conflict of string ]) result 71 + (** Like {!v} but the merge function may produce conflicts. *) 72 + 73 + (** {1 Typed merge functions} 74 + 75 + These are the actual merge algorithms. Lift them with {!v} to use as 76 + schema merge functions. *) 77 + 78 + val counter : ancestor:int -> int -> int -> int 79 + (** [counter ~ancestor ours theirs = ours + theirs - ancestor]. Always 80 + resolves. *) 81 + 82 + val counter64 : ancestor:int64 -> int64 -> int64 -> int64 83 + (** 64-bit counter merge. Always resolves. *) 84 + 85 + val lww : ancestor:'a -> 'a -> 'a -> 'a 86 + (** Last-write-wins: always picks [theirs]. *) 87 + 88 + val ours : ancestor:'a -> 'a -> 'a -> 'a 89 + (** Always picks [ours]. *) 90 + 91 + val set_union : 92 + equal:('a -> 'a -> bool) -> 93 + ancestor:'a list -> 94 + 'a list -> 95 + 'a list -> 96 + 'a list 97 + (** Add-wins set union. Items added on either side are kept. Items removed on 98 + either side are removed. Always resolves. *) 99 + 100 + val text : 101 + ancestor:string -> 102 + string -> 103 + string -> 104 + (string, [ `Conflict of string ]) result 105 + (** Line-level 3-way merge via [Merge3]. May conflict when both sides edit the 106 + same lines. *) 107 + end
+36 -10
lib/json/irmin_json.ml
··· 5 5 6 6 Uses SHA-256 for content addressing, same as the tar backend. *) 7 7 8 - module S = Irmin.Schema.Make (struct 9 - type hash = Digestif.SHA256.t 10 - type block = string 11 - 12 - let hash_equal = Digestif.SHA256.equal 13 - let hash_block data = Digestif.SHA256.digest_string data 14 - end) 8 + module S = Irmin.SHA256 15 9 16 10 let nv (type a) (n : a Jsont.node) : a = fst n 17 11 ··· 20 14 | Ok s -> s 21 15 | Error _ -> "" 22 16 23 - let parse : S.parse = 17 + let parse : S.dec = 24 18 fun block -> 25 19 match Jsont_bytesrw.decode_string Jsont.json block with 26 20 | Ok (Jsont.Object obj) -> ··· 34 28 (List.map (fun c -> (`Inline (enc c) : S.child)) (nv arr))) 35 29 | _ -> S.Named [] 36 30 37 - let serialize : S.serialize = fun _ -> "" 31 + let dec s = 32 + match Jsont_bytesrw.decode_string Jsont.json s with 33 + | Ok v -> v 34 + | Error _ -> Jsont.Null ((), Jsont.Meta.none) 35 + 36 + let serialize : S.enc = function 37 + | S.Named children -> 38 + let mems = 39 + List.map 40 + (fun (name, child) -> 41 + let v = 42 + match child with 43 + | `Inline data -> dec data 44 + | `Link _ -> dec "null" 45 + in 46 + Jsont.Json.mem (Jsont.Json.name name) v) 47 + children 48 + in 49 + enc (Jsont.Json.object' mems) 50 + | S.Indexed children -> 51 + let items = 52 + Array.to_list 53 + (Array.map 54 + (fun child -> 55 + match child with 56 + | `Inline data -> dec data 57 + | `Link _ -> dec "null") 58 + children) 59 + in 60 + enc (Jsont.Json.list items) 61 + 38 62 let ( => ) = S.( => ) 39 - let schema : S.t = S.fix (fun self -> S.node ~parse ~serialize [ "*" => self ]) 63 + 64 + let schema : S.t = 65 + S.fix (fun self -> S.node ~dec:parse ~enc:serialize [ "*" => self ])
+1 -1
lib/oci/dune
··· 1 1 (library 2 2 (name irmin_oci) 3 - (libraries irmin oci fmt)) 3 + (libraries irmin digestif jsont jsont.bytesrw fmt))
+62 -5
lib/oci/irmin_oci.ml
··· 1 1 (** OCI backend for Irmin. 2 2 3 3 Content-addressed storage using OCI SHA-256 digests. Manifests, configs, and 4 - layers are all blocks in the heap. The cursor navigates the JSON structure 5 - of manifests. *) 4 + layers are all blocks in the heap. The JSON cursor navigates manifest 5 + structure. *) 6 + 7 + module S = Irmin.SHA256 8 + 9 + (* OCI manifests and configs are JSON. Reuse the JSON codec pattern. *) 10 + 11 + let nv (type a) (n : a Jsont.node) : a = fst n 12 + 13 + let enc c = 14 + match Jsont_bytesrw.encode_string Jsont.json c with 15 + | Ok s -> s 16 + | Error _ -> "" 17 + 18 + let dec s = 19 + match Jsont_bytesrw.decode_string Jsont.json s with 20 + | Ok v -> v 21 + | Error _ -> Jsont.Null ((), Jsont.Meta.none) 22 + 23 + let parse : S.dec = 24 + fun block -> 25 + match Jsont_bytesrw.decode_string Jsont.json block with 26 + | Ok (Jsont.Object obj) -> 27 + S.Named 28 + (List.map 29 + (fun ((n, c) : Jsont.mem) -> (nv n, `Inline (enc c))) 30 + (nv obj)) 31 + | Ok (Jsont.Array arr) -> 32 + S.Indexed 33 + (Array.of_list 34 + (List.map (fun c -> (`Inline (enc c) : S.child)) (nv arr))) 35 + | _ -> S.Named [] 36 + 37 + let serialize : S.enc = function 38 + | S.Named children -> 39 + let mems = 40 + List.map 41 + (fun (name, child) -> 42 + let v = 43 + match child with 44 + | `Inline data -> dec data 45 + | `Link _ -> dec "null" 46 + in 47 + Jsont.Json.mem (Jsont.Json.name name) v) 48 + children 49 + in 50 + enc (Jsont.Json.object' mems) 51 + | S.Indexed children -> 52 + let items = 53 + Array.to_list 54 + (Array.map 55 + (fun child -> 56 + match child with 57 + | `Inline data -> dec data 58 + | `Link _ -> dec "null") 59 + children) 60 + in 61 + enc (Jsont.Json.list items) 6 62 7 - module Schema = Schema 8 - module Codec = Codec 9 - module Heap = Heap 63 + let ( => ) = S.( => ) 64 + 65 + let schema : S.t = 66 + S.fix (fun self -> S.node ~dec:parse ~enc:serialize [ "*" => self ])
+244 -94
lib/schema.ml
··· 8 8 struct 9 9 type child = [ `Link of H.hash | `Inline of H.block ] 10 10 type children = Named of (string * child) list | Indexed of child array 11 - type parse = H.block -> children 12 - type serialize = children -> H.block 11 + type dec = H.block -> children 12 + type enc = children -> H.block 13 13 14 14 type merge = 15 15 ancestor:H.block -> ··· 23 23 and t = 24 24 | Opaque : t 25 25 | Node : { 26 - parse : parse; 27 - serialize : serialize; 26 + dec : dec; 27 + enc : enc; 28 28 merge : merge option; 29 29 rules : rule list; 30 30 } ··· 33 33 34 34 let opaque = Opaque 35 35 let ( => ) pat schema = Rule (pat, schema) 36 - 37 - let node ~parse ~serialize ?merge rules = 38 - Node { parse; serialize; merge; rules } 36 + let node ~dec ~enc ?merge rules = Node { dec; enc; merge; rules } 39 37 40 38 let fix f = 41 39 let r = ref None in ··· 43 41 r := Some t; 44 42 t 45 43 46 - (* Pattern matching *) 47 44 let pattern_match pat name = 48 45 pat = "*" || pat = name 49 46 || Stdlib.String.length pat > 1 ··· 63 60 fun p -> match p with Pack (Rec f) -> resolve_packed (Pack (f ())) | _ -> p 64 61 65 62 type node_ops = { 66 - parse : parse; 67 - serialize : serialize; 63 + dec : dec; 64 + enc : enc; 68 65 merge : merge option; 69 66 rules : rule list; 70 67 } 71 68 72 69 let get_node : packed -> node_ops option = function 73 - | Pack (Node { parse; serialize; merge; rules }) -> 74 - Some { parse; serialize; merge; rules } 70 + | Pack (Node { dec; enc; merge; rules }) -> Some { dec; enc; merge; rules } 75 71 | _ -> None 76 - 77 - (* ===== Children cache ===== *) 78 72 79 73 type overlay = (string * [ `Set of H.block | `Remove ]) list 80 74 ··· 112 106 let resolved = resolve_packed (Pack schema) in 113 107 let empty_block = 114 108 match get_node resolved with 115 - | Some ops -> ops.serialize (Named []) 116 - | None -> failwith "Schema.empty: schema has no serialize" 109 + | Some ops -> ops.enc (Named []) 110 + | None -> failwith "Schema.empty: schema has no enc" 117 111 in 118 112 { 119 113 heap = Heap.find heap; ··· 197 191 match get_node resolved with 198 192 | None -> Named [] 199 193 | Some ops -> ( 200 - let base = ops.parse data in 194 + let base = ops.dec data in 201 195 if c.overlay = [] then base 202 196 else 203 197 match base with ··· 274 268 275 269 let mem c p = Option.is_some (find c p) 276 270 277 - (* ===== Mutation ===== *) 278 - 279 271 let set c name block = 280 272 { c with overlay = (name, `Set block) :: c.overlay; cached_children = None } 281 273 ··· 289 281 | None -> ( 290 282 match c.source with `Block h -> h | `Inline data -> H.hash_block data) 291 283 | Some ops -> 292 - let block = ops.serialize kids in 284 + let block = ops.enc kids in 293 285 let h = H.hash_block block in 294 286 Heap.put heap h block; 295 287 h 296 288 297 - (* ===== Refs ===== *) 298 - 299 289 let head heap ~branch = Heap.find_ref heap ("refs/heads/" ^ branch) 300 290 let set_head heap ~branch h = Heap.set_ref heap ("refs/heads/" ^ branch) h 301 291 ··· 311 301 let update_branch heap ~branch ~old ~new_ = 312 302 Heap.cas_ref heap ("refs/heads/" ^ branch) ~test:old ~set:(Some new_) 313 303 314 - (* ===== Commit ===== *) 315 - 316 - let commit_node ~parse ~serialize ~tree = 304 + let commit_node ~dec ~enc ~tree = 317 305 Node 318 306 { 319 - parse; 320 - serialize; 307 + dec; 308 + enc; 321 309 merge = None; 322 310 rules = 323 311 [ ··· 329 317 330 318 (* ===== Merge ===== *) 331 319 320 + type conflict = { 321 + path : string list; 322 + ancestor : H.block option; 323 + ours : H.block option; 324 + theirs : H.block option; 325 + message : string; 326 + } 327 + 328 + type resolution = { path : string list; value : H.block } 329 + 332 330 let merge (heap : (H.hash, H.block, _) Heap.t) schema ~ancestor ~ours ~theirs 333 331 = 334 - (* 3-way merge: walk cursors in parallel *) 335 - let rec merge_hash schema a_hash o_hash t_hash = 336 - if H.hash_equal o_hash t_hash then Ok o_hash (* no conflict *) 337 - else if H.hash_equal a_hash o_hash then 338 - Ok t_hash (* only theirs changed *) 339 - else if H.hash_equal a_hash t_hash then Ok o_hash (* only ours changed *) 332 + let conflicts = ref [] in 333 + let add_conflict ~path ~ancestor ~ours ~theirs message = 334 + conflicts := { path; ancestor; ours; theirs; message } :: !conflicts 335 + in 336 + let rec merge_hash ~path schema a_hash o_hash t_hash = 337 + if H.hash_equal o_hash t_hash then o_hash 338 + else if H.hash_equal a_hash o_hash then t_hash 339 + else if H.hash_equal a_hash t_hash then o_hash 340 340 else 341 - (* Both changed — need to merge *) 342 341 match 343 342 (Heap.find heap a_hash, Heap.find heap o_hash, Heap.find heap t_hash) 344 343 with 345 344 | Some a_block, Some o_block, Some t_block -> 346 - merge_block schema a_block o_block t_block 347 - | _ -> Error (`Conflict "missing block") 348 - and merge_block schema a_block o_block t_block = 345 + merge_block ~path schema a_block o_block t_block 346 + | _ -> 347 + add_conflict ~path ~ancestor:None ~ours:None ~theirs:None 348 + "missing block"; 349 + o_hash 350 + and merge_block ~path schema a_block o_block t_block = 349 351 let resolved = resolve_packed (Pack schema) in 350 352 match get_node resolved with 351 353 | None -> 352 - (* Leaf: use the node's merge function, or conflict *) 353 - Error (`Conflict "leaf conflict, no merge function") 354 + add_conflict ~path ~ancestor:(Some a_block) ~ours:(Some o_block) 355 + ~theirs:(Some t_block) "leaf conflict, no merge function"; 356 + let h = H.hash_block o_block in 357 + Heap.put heap h o_block; 358 + h 354 359 | Some ops -> ( 355 360 match ops.merge with 356 361 | Some merge_fn -> ( 357 - (* Leaf with merge function *) 358 362 match merge_fn ~ancestor:a_block o_block t_block with 359 363 | Ok merged -> 360 364 let h = H.hash_block merged in 361 365 Heap.put heap h merged; 362 - Ok h 363 - | Error _ as e -> e) 366 + h 367 + | Error (`Conflict msg) -> 368 + add_conflict ~path ~ancestor:(Some a_block) 369 + ~ours:(Some o_block) ~theirs:(Some t_block) msg; 370 + let h = H.hash_block o_block in 371 + Heap.put heap h o_block; 372 + h) 364 373 | None -> 365 - (* Interior node: structural merge *) 366 - let a_kids = ops.parse a_block in 367 - let o_kids = ops.parse o_block in 368 - let t_kids = ops.parse t_block in 374 + let a_kids = ops.dec a_block in 375 + let o_kids = ops.dec o_block in 376 + let t_kids = ops.dec t_block in 369 377 let is_empty = function 370 378 | Named [] -> true 371 379 | Indexed [||] -> true 372 380 | _ -> false 373 381 in 374 - if is_empty a_kids && is_empty o_kids && is_empty t_kids then 375 - (* All three are childless (leaves) but different — conflict *) 376 - Error (`Conflict "leaf values differ, no merge function") 377 - else merge_children ops schema a_kids o_kids t_kids) 378 - and merge_children ops schema a_kids o_kids t_kids = 379 - (* Collect all names from all three *) 382 + if is_empty a_kids && is_empty o_kids && is_empty t_kids then ( 383 + add_conflict ~path ~ancestor:(Some a_block) ~ours:(Some o_block) 384 + ~theirs:(Some t_block) "leaf values differ, no merge function"; 385 + let h = H.hash_block o_block in 386 + Heap.put heap h o_block; 387 + h) 388 + else merge_children ~path ops schema a_kids o_kids t_kids) 389 + and merge_children ~path ops schema a_kids o_kids t_kids = 380 390 let names_of = function 381 391 | Named l -> List.map fst l 382 392 | Indexed arr -> List.init (Array.length arr) string_of_int ··· 396 406 Hashtbl.fold (fun n () acc -> n :: acc) tbl [] 397 407 |> List.sort String.compare 398 408 in 399 - (* Find the child schema for a name *) 400 409 let child_schema name = 401 410 match find_rule ops.rules name with 402 411 | Some (Pack s) -> s 403 412 | None -> Opaque 404 413 in 405 - (* Merge each child *) 406 414 let rec merge_names acc = function 407 415 | [] -> 408 416 let merged = Named (List.rev acc) in 409 - let block = ops.serialize merged in 417 + let block = ops.enc merged in 410 418 let h = H.hash_block block in 411 419 Heap.put heap h block; 412 - Ok h 420 + h 413 421 | name :: rest -> ( 414 422 let a = find_child name a_kids in 415 423 let o = find_child name o_kids in 416 424 let t = find_child name t_kids in 417 425 let child_sch = child_schema name in 418 - match merge_child child_sch a o t with 419 - | Ok None -> merge_names acc rest (* deleted *) 420 - | Ok (Some child) -> merge_names ((name, child) :: acc) rest 421 - | Error _ as e -> e) 422 - and merge_child schema a o t = 426 + let child_path = path @ [ name ] in 427 + let result = merge_child ~path:child_path child_sch a o t in 428 + match result with 429 + | None -> merge_names acc rest 430 + | Some child -> merge_names ((name, child) :: acc) rest) 431 + and merge_child ~path schema a o t = 423 432 match (a, o, t) with 424 - | _, Some oc, Some tc when oc = tc -> Ok (Some oc) (* same *) 425 - | Some ac, Some oc, _ when ac = oc -> Ok t (* only theirs changed *) 426 - | Some ac, _, Some tc when ac = tc -> Ok o (* only ours changed *) 427 - | None, None, None -> Ok None 428 - | None, Some oc, None -> Ok (Some oc) (* ours added *) 429 - | None, None, Some tc -> Ok (Some tc) (* theirs added *) 430 - | None, Some _, Some _ -> 431 - (* both added — try merge with empty ancestor *) 432 - Error (`Conflict "both sides added same key") 433 - | Some _, None, None -> Ok None (* both deleted *) 434 - | Some _, Some _, None -> 435 - Ok None (* theirs deleted, ours modified — conflict? *) 436 - | Some _, None, Some _ -> 437 - Ok None (* ours deleted, theirs modified — conflict? *) 438 - | Some (`Link ah), Some (`Link oh), Some (`Link th) -> ( 439 - (* Both modified a linked child — recurse *) 440 - match merge_hash schema ah oh th with 441 - | Ok h -> Ok (Some (`Link h)) 442 - | Error _ as e -> e) 443 - | Some (`Inline _), Some (`Inline ob), Some (`Inline tb) -> ( 444 - (* Both modified an inline child *) 445 - match a with 446 - | Some (`Inline ab) -> ( 447 - match merge_block schema ab ob tb with 448 - | Ok h -> Ok (Some (`Link h)) 449 - | Error _ as e -> e) 450 - | _ -> Error (`Conflict "inline merge without ancestor")) 451 - | _ -> Error (`Conflict "incompatible child types") 433 + | _, Some oc, Some tc when oc = tc -> Some oc 434 + | Some ac, Some oc, _ when ac = oc -> t 435 + | Some ac, _, Some tc when ac = tc -> o 436 + | None, None, None -> None 437 + | None, Some oc, None -> Some oc 438 + | None, None, Some tc -> Some tc 439 + | None, Some oc, Some tc -> ( 440 + match (oc, tc) with 441 + | `Link oh, `Link th -> ( 442 + match (Heap.find heap oh, Heap.find heap th) with 443 + | Some ob, Some tb -> 444 + let empty_block = 445 + match get_node (resolve_packed (Pack schema)) with 446 + | Some ops -> ops.enc (Named []) 447 + | None -> ob 448 + in 449 + let h = merge_block ~path schema empty_block ob tb in 450 + Some (`Link h) 451 + | _ -> 452 + add_conflict ~path ~ancestor:None ~ours:(Heap.find heap oh) 453 + ~theirs:(Heap.find heap th) 454 + "both sides added, missing block"; 455 + Some oc) 456 + | _ -> 457 + add_conflict ~path ~ancestor:None ~ours:None ~theirs:None 458 + "both sides added same key"; 459 + Some oc) 460 + | Some _, None, None -> None 461 + | Some _, Some oc, None -> 462 + let o_block = 463 + match oc with `Link h -> Heap.find heap h | `Inline b -> Some b 464 + in 465 + let a_block = 466 + match a with 467 + | Some (`Link h) -> Heap.find heap h 468 + | Some (`Inline b) -> Some b 469 + | _ -> None 470 + in 471 + add_conflict ~path ~ancestor:a_block ~ours:o_block ~theirs:None 472 + "delete/modify conflict"; 473 + Some oc 474 + | Some _, None, Some tc -> 475 + let t_block = 476 + match tc with `Link h -> Heap.find heap h | `Inline b -> Some b 477 + in 478 + let a_block = 479 + match a with 480 + | Some (`Link h) -> Heap.find heap h 481 + | Some (`Inline b) -> Some b 482 + | _ -> None 483 + in 484 + add_conflict ~path ~ancestor:a_block ~ours:None ~theirs:t_block 485 + "delete/modify conflict"; 486 + Some tc 487 + | Some (`Link ah), Some (`Link oh), Some (`Link th) -> 488 + let h = merge_hash ~path schema ah oh th in 489 + Some (`Link h) 490 + | Some (`Inline ab), Some (`Inline ob), Some (`Inline tb) -> 491 + let h = merge_block ~path schema ab ob tb in 492 + Some (`Link h) 493 + | Some _, Some oc, Some _ -> 494 + add_conflict ~path ~ancestor:None ~ours:None ~theirs:None 495 + "incompatible child types"; 496 + Some oc 452 497 in 453 498 merge_names [] all_names 454 499 in 455 500 let get_hash c = 456 501 match c.source with `Block h -> h | `Inline d -> H.hash_block d 457 502 in 458 - match 459 - merge_hash schema (get_hash ancestor) (get_hash ours) (get_hash theirs) 460 - with 461 - | Error _ as e -> e 462 - | Ok merged_hash -> Ok (at heap schema merged_hash) 503 + let merged_hash = 504 + merge_hash ~path:[] schema (get_hash ancestor) (get_hash ours) 505 + (get_hash theirs) 506 + in 507 + (at heap schema merged_hash, List.rev !conflicts) 508 + 509 + let resolve (heap : (H.hash, H.block, _) Heap.t) _schema cursor resolutions = 510 + let c = ref cursor in 511 + List.iter 512 + (fun (r : resolution) -> 513 + match r.path with 514 + | [] -> () 515 + | _ -> ( 516 + let parent_path = List.rev (List.tl (List.rev r.path)) in 517 + let name = List.nth r.path (List.length r.path - 1) in 518 + let nav = 519 + List.fold_left 520 + (fun acc s -> 521 + match acc with Some c -> step c s | None -> None) 522 + (Some !c) parent_path 523 + in 524 + match nav with 525 + | Some parent -> 526 + let updated = set parent name r.value in 527 + let rec go_up c = 528 + match up c with Some c' -> go_up c' | None -> c 529 + in 530 + c := go_up updated 531 + | None -> ())) 532 + resolutions; 533 + !c 534 + 535 + let merge_lww : merge = fun ~ancestor:_ _ours theirs -> Ok theirs 536 + let merge_ours : merge = fun ~ancestor:_ ours _theirs -> Ok ours 537 + 538 + (* ===== Diff ===== *) 539 + 540 + type diff_entry = { 541 + diff_path : string list; 542 + kind : 543 + [ `Add of H.block | `Remove of H.block | `Change of H.block * H.block ]; 544 + } 545 + 546 + let diff a b = 547 + let hash_of c = 548 + match hash c with 549 + | Some h -> Some h 550 + | None -> Option.map H.hash_block (get c) 551 + in 552 + let same c1 c2 = 553 + match (hash_of c1, hash_of c2) with 554 + | Some h1, Some h2 -> H.hash_equal h1 h2 555 + | _ -> false 556 + in 557 + let rec go ~path a b acc = 558 + if same a b then acc 559 + else 560 + let a_kids = list a in 561 + let b_kids = list b in 562 + match (a_kids, b_kids) with 563 + | [], [] -> ( 564 + match (get a, get b) with 565 + | Some va, Some vb -> 566 + { diff_path = path; kind = `Change (va, vb) } :: acc 567 + | None, Some vb -> { diff_path = path; kind = `Add vb } :: acc 568 + | Some va, None -> { diff_path = path; kind = `Remove va } :: acc 569 + | None, None -> acc) 570 + | _ -> 571 + let all = 572 + List.sort_uniq String.compare 573 + (List.map fst a_kids @ List.map fst b_kids) 574 + in 575 + List.fold_left 576 + (fun acc name -> 577 + match (step a name, step b name) with 578 + | None, None -> acc 579 + | None, Some bc -> collect `Add ~path:(path @ [ name ]) bc acc 580 + | Some ac, None -> 581 + collect `Remove ~path:(path @ [ name ]) ac acc 582 + | Some ac, Some bc -> go ~path:(path @ [ name ]) ac bc acc) 583 + acc all 584 + and collect dir ~path c acc = 585 + let kids = list c in 586 + if kids = [] then 587 + match get c with 588 + | Some v -> 589 + let kind = match dir with `Add -> `Add v | `Remove -> `Remove v in 590 + { diff_path = path; kind } :: acc 591 + | None -> acc 592 + else 593 + List.fold_left 594 + (fun acc (name, _) -> 595 + match step c name with 596 + | Some child -> collect dir ~path:(path @ [ name ]) child acc 597 + | None -> acc) 598 + acc kids 599 + in 600 + List.rev (go ~path:[] a b []) 601 + 602 + let ddiff ~old_base ~old_tip ~new_base ~new_tip = 603 + let d1 = diff old_base old_tip in 604 + let d2 = diff new_base new_tip in 605 + let old_changes = Hashtbl.create (List.length d1) in 606 + List.iter (fun e -> Hashtbl.replace old_changes e.diff_path e.kind) d1; 607 + List.filter 608 + (fun e -> 609 + match Hashtbl.find_opt old_changes e.diff_path with 610 + | None -> true 611 + | Some old_kind -> old_kind <> e.kind) 612 + d2 463 613 464 614 (* ===== Proof ===== *) 465 615
+73 -21
lib/schema.mli
··· 5 5 and produces proofs as subheaps. 6 6 7 7 {[ 8 - let directory = node ~parse:git_tree_parse ~serialize:git_tree_serialize 9 - let json_node = node ~parse:json_parse ~serialize:json_serialize 8 + let directory = node ~dec:git_tree_dec ~enc:git_tree_enc 9 + let json_node = node ~dec:json_dec ~enc:json_enc 10 10 11 11 let json = fix (fun self -> json_node [ "*" => self ]) 12 12 let git_entry target = ··· 37 37 | Indexed of child array 38 38 (** Positional children: array elements. O(1) index lookup. *) 39 39 40 - type parse = H.block -> children 41 - (** How to extract children from a block. *) 40 + type dec = H.block -> children 41 + (** How to decode a block into its children. *) 42 42 43 - type serialize = children -> H.block 44 - (** How to reconstruct a block from children. *) 43 + type enc = children -> H.block 44 + (** How to encode children into a block. *) 45 45 46 46 type merge = 47 47 ancestor:H.block -> ··· 62 62 ["*.json"] matches by suffix, exact names match exactly. First matching 63 63 rule wins. *) 64 64 65 - val node : 66 - parse:parse -> serialize:serialize -> ?merge:merge -> rule list -> t 67 - (** [node ~parse ~serialize ?merge rules] is a block with named children. 68 - [merge] is used for leaf-level 3-way merge. Interior nodes are merged 69 - structurally (child by child). *) 65 + val node : dec:dec -> enc:enc -> ?merge:merge -> rule list -> t 66 + (** [node ~dec ~enc ?merge rules] is a block with named children. [merge] is 67 + used for leaf-level 3-way merge. Interior nodes are merged structurally 68 + (child by child). *) 70 69 71 70 val fix : (t -> t) -> t 72 71 (** [fix f] is a recursive schema. *) ··· 158 157 A commit is a node linking a tree root to parents with metadata. Describe 159 158 it as a {!node} with parse/serialize for the commit format. *) 160 159 161 - val commit_node : parse:parse -> serialize:serialize -> tree:t -> t 162 - (** [commit_node ~parse ~serialize ~tree] is a schema for commit blocks. 163 - Children: ["tree"] links to [tree], ["parent/0"], ["parent/1"], ... link 164 - to parent commits, ["message"] and ["author"] are inline metadata. *) 160 + val commit_node : dec:dec -> enc:enc -> tree:t -> t 161 + (** [commit_node ~dec ~enc ~tree] is a schema for commit blocks. Children: 162 + ["tree"] links to [tree], ["parent/0"], ["parent/1"], ... link to parent 163 + commits, ["message"] and ["author"] are inline metadata. *) 165 164 166 165 (** {1:merge Merge} 167 166 168 - 3-way merge: given a common ancestor and two diverged states, produce a 169 - merged state. Interior nodes are merged structurally (child by child). 170 - Leaves use the node's [~merge] function. *) 167 + Two-phase 3-way merge. Phase 1 resolves everything the schema can merge 168 + automatically (structural merge for trees, typed merge functions for 169 + leaves/values). Phase 2 surfaces remaining conflicts for external 170 + resolution. 171 + 172 + Phase 1 always succeeds: it returns a merged tree plus a conflict list. 173 + For types with total merge functions (counters, sets, LWW), the conflict 174 + list is always empty -- eventual consistency for free. *) 175 + 176 + type conflict = { 177 + path : string list; 178 + ancestor : H.block option; 179 + ours : H.block option; 180 + theirs : H.block option; 181 + message : string; 182 + } 183 + 184 + type resolution = { path : string list; value : H.block } 171 185 172 186 val merge : 173 187 (H.hash, H.block, _) Heap.t -> ··· 175 189 ancestor:cursor -> 176 190 ours:cursor -> 177 191 theirs:cursor -> 178 - (cursor, [ `Conflict of string ]) result 179 - (** [merge heap schema ~ancestor ~ours ~theirs] performs a 3-way merge. 180 - Returns a cursor at the merged tree, or a conflict. *) 192 + cursor * conflict list 193 + (** [merge heap schema ~ancestor ~ours ~theirs] performs a two-phase 3-way 194 + merge. Always returns a cursor at the (partially) merged tree. Conflicts 195 + are returned alongside the result, not instead of it. *) 196 + 197 + val resolve : 198 + (H.hash, H.block, _) Heap.t -> t -> cursor -> resolution list -> cursor 199 + (** [resolve heap schema cursor resolutions] applies conflict resolutions. *) 200 + 201 + (** {2 Generic merge combinators} 202 + 203 + These work at the block level (they don't inspect the value). *) 204 + 205 + val merge_lww : merge 206 + (** Last-write-wins: picks [theirs]. Always resolves. *) 207 + 208 + val merge_ours : merge 209 + (** Always picks [ours]. Always resolves. *) 210 + 211 + (** {1:diff Diff} 212 + 213 + Structural diff between two trees, and ddiff for 4-point merge workflows 214 + (a la Jane Street Iron). *) 215 + 216 + type diff_entry = { 217 + diff_path : string list; 218 + kind : 219 + [ `Add of H.block | `Remove of H.block | `Change of H.block * H.block ]; 220 + } 221 + 222 + val diff : cursor -> cursor -> diff_entry list 223 + (** [diff a b] returns the structural difference between trees [a] and [b]. *) 224 + 225 + val ddiff : 226 + old_base:cursor -> 227 + old_tip:cursor -> 228 + new_base:cursor -> 229 + new_tip:cursor -> 230 + diff_entry list 231 + (** [ddiff ~old_base ~old_tip ~new_base ~new_tip] computes the diff of two 232 + diffs. Iron's 4-point merge for incremental review. *) 181 233 182 234 (** {1:proof Proofs} 183 235
+4 -11
lib/tar/irmin_tar.ml
··· 4 4 parse function reads tar entries and builds the tree structure in the heap. 5 5 *) 6 6 7 - module S = Irmin.Schema.Make (struct 8 - type hash = Digestif.SHA256.t 9 - type block = string 10 - 11 - let hash_equal = Digestif.SHA256.equal 12 - let hash_block data = Digestif.SHA256.digest_string data 13 - end) 7 + module S = Irmin.SHA256 14 8 15 9 (* A directory is serialized as a sorted list of "name\0hash" entries, 16 10 similar to Git tree format but simpler: no mode byte, fixed-size ··· 18 12 19 13 let hash_size = Digestif.SHA256.digest_size 20 14 21 - let dir_serialize : S.serialize = function 15 + let dir_serialize : S.enc = function 22 16 | S.Named entries -> 23 17 let buf = Buffer.create 256 in 24 18 List.iter ··· 35 29 Buffer.contents buf 36 30 | S.Indexed _ -> "" 37 31 38 - let dir_parse : S.parse = 32 + let dir_parse : S.dec = 39 33 fun data -> 40 34 let len = String.length data in 41 35 let rec loop pos acc = ··· 57 51 let ( => ) = S.( => ) 58 52 59 53 let tree = 60 - S.fix (fun self -> 61 - S.node ~parse:dir_parse ~serialize:dir_serialize [ "*" => self ]) 54 + S.fix (fun self -> S.node ~dec:dir_parse ~enc:dir_serialize [ "*" => self ]) 62 55 63 56 (** Build a Merkle tree from a list of (path, content) pairs. Writes all blobs 64 57 and tree nodes to the heap. Returns the root hash. *)
+2 -1
test/dune
··· 1 1 (test 2 2 (name test) 3 - (libraries irmin irmin_git alcotest eio_main generic_store)) 3 + (modules test) 4 + (libraries irmin alcotest)) 4 5 5 6 (cram 6 7 (package irmin)
+1 -1
test/mst_proof/dune
··· 1 1 (executable 2 2 (name mst_proof) 3 - (libraries irmin)) 3 + (libraries irmin irmin_tar digestif fmt))
+54 -48
test/mst_proof/mst_proof.ml
··· 1 - (** Demonstrate MST proof generation and verification. *) 1 + (** Demonstrate proof generation and verification using Irmin Schema. *) 2 + 3 + module S = Irmin_tar.S 2 4 3 - open Irmin 4 - open Private 5 + let hash_hex h = String.sub (Digestif.SHA256.to_hex h) 0 16 5 6 6 7 let () = 7 - (* Create in-memory backend *) 8 - let backend = Backend.Memory.cid () in 8 + let store = Hashtbl.create 16 in 9 + let module B : 10 + Irmin.Heap.BACKEND 11 + with type t = (Digestif.SHA256.t, string) Hashtbl.t 12 + and type hash = Digestif.SHA256.t 13 + and type block = string = struct 14 + type t = (Digestif.SHA256.t, string) Hashtbl.t 15 + type hash = Digestif.SHA256.t 16 + type block = string 9 17 10 - (* Build a tree with ATProto-style keys (flat namespace) *) 11 - let tree = Tree.Mst.empty () in 12 - let tree = Tree.Mst.add tree [ "post/3k2yihx" ] "Hello World" in 13 - let tree = 14 - Tree.Mst.add tree [ "profile/self" ] "{\"displayName\":\"Alice\"}" 18 + let find t h = Hashtbl.find_opt t h 19 + let put t h d = Hashtbl.replace t h d 20 + let mem t h = Hashtbl.mem t h 21 + let batch t l = List.iter (fun (h, d) -> Hashtbl.replace t h d) l 22 + let find_ref _ _ = None 23 + let set_ref _ _ _ = () 24 + let del_ref _ _ = () 25 + let list_refs _ = [] 26 + let cas_ref _ _ ~test:_ ~set:_ = false 27 + let flush _ = () 28 + let close _ = () 29 + end 15 30 in 16 - let tree = 17 - Tree.Mst.add tree [ "like/3k2yihy" ] "{\"subject\":\"at://...\"}" 31 + let module H = Irmin.Heap.Make (B) in 32 + let heap = H.v store in 33 + 34 + (* Build a tree with some entries *) 35 + let entries = 36 + [ 37 + ("post/3k2yihx", "Hello World"); 38 + ("profile/self", "{\"displayName\":\"Alice\"}"); 39 + ("like/3k2yihy", "{\"subject\":\"at://...\"}"); 40 + ] 18 41 in 19 - let root = Tree.Mst.hash tree ~backend in 42 + let root = Irmin_tar.of_entries heap entries in 20 43 21 - Fmt.pr "MST Root: %s\n" (String.sub (Atp.Cid.to_string root) 0 16); 22 - Fmt.pr "\n"; 44 + Fmt.pr "Tree Root: %s@." (hash_hex root); 45 + Fmt.pr "@."; 23 46 24 47 (* Produce a proof for reading a post *) 25 - let path = [ "post/3k2yihx" ] in 26 - let proof, result = 27 - Proof.Mst.produce backend root (fun t -> 28 - let v = Proof.Mst.Tree.find t path in 29 - (t, v)) 48 + let read_post c = 49 + let post = S.step c "post" |> Option.get in 50 + let v = S.step post "3k2yihx" |> Option.get in 51 + (v, S.get v) 30 52 in 53 + let proof, value = S.produce heap Irmin_tar.tree root read_post in 31 54 32 - Fmt.pr "Proof for: %s\n" (List.hd path); 33 - Fmt.pr "Value: %s\n" (Option.value ~default:"<not found>" result); 34 - Fmt.pr "\n"; 55 + Fmt.pr "Proof for: post/3k2yihx@."; 56 + Fmt.pr "Value: %s@." (Option.value ~default:"<not found>" value); 57 + Fmt.pr "@."; 58 + Fmt.pr "Before: %s@." (hash_hex proof.before); 59 + Fmt.pr "After: %s (read-only, no change)@." (hash_hex proof.after); 60 + Fmt.pr "@."; 35 61 36 - (* Show proof hashes *) 37 - let hash_str h = String.sub (Atp.Cid.to_string h) 0 16 in 38 - let before_hash = 39 - match Proof.before proof with 40 - | `Node h -> hash_str h 41 - | `Contents h -> hash_str h 42 - in 43 - let after_hash = 44 - match Proof.after proof with 45 - | `Node h -> hash_str h 46 - | `Contents h -> hash_str h 47 - in 48 - Fmt.pr "Before: %s (read-only, no change)\n" before_hash; 49 - Fmt.pr "After: %s\n" after_hash; 50 - Fmt.pr "\n"; 51 - 52 - (* Verify without backend - proof contains all needed data *) 53 - Fmt.pr "Verifying proof (no backend access)...\n"; 54 - match 55 - Proof.Mst.verify ~expected_root:(`Node root) proof (fun t -> 56 - let v = Proof.Mst.Tree.find t path in 57 - (t, v)) 58 - with 59 - | Ok (_, v) -> Fmt.pr "✓ Verified: %s\n" (Option.value ~default:"<none>" v) 60 - | Error (`Proof_mismatch msg) -> Fmt.pr "✗ Invalid: %s\n" msg 62 + (* Verify without backend — proof contains all needed data *) 63 + Fmt.pr "Verifying proof (no backend access)...@."; 64 + match S.verify proof Irmin_tar.tree read_post with 65 + | Ok v -> Fmt.pr "Verified: %s@." (Option.value ~default:"<none>" v) 66 + | Error (`Proof_failure msg) -> Fmt.pr "Invalid: %s@." msg
+6 -6
test/schema/test.ml
··· 18 18 let git_hash h = Git.Hash.of_raw_string (Irmin.Hash.to_bytes h) 19 19 let irmin_hash h = Irmin.Hash.sha1_of_bytes (Git.Hash.to_raw_string h) 20 20 21 - let git_tree_parse : S.parse = 21 + let git_tree_parse : S.dec = 22 22 fun data -> 23 23 match Git.Tree.of_string data with 24 24 | Ok tree -> ··· 28 28 (entry.name, `Inline (Git.Tree.to_string (Git.Tree.v [ entry ]))))) 29 29 | Error _ -> S.Named [] 30 30 31 - let git_entry_parse : S.parse = 31 + let git_entry_parse : S.dec = 32 32 fun data -> 33 33 match Git.Tree.of_string data with 34 34 | Ok tree -> ( ··· 44 44 45 45 let json_nv (type a) (n : a Jsont.node) : a = fst n 46 46 47 - let json_parse : S.parse = 47 + let json_parse : S.dec = 48 48 fun data -> 49 49 let enc c = 50 50 match Jsont_bytesrw.encode_string Jsont.json c with ··· 66 66 67 67 (* Serialize stubs — sufficient for read-only tests *) 68 68 let noop_serialize _ = "" 69 - let directory = S.node ~parse:git_tree_parse ~serialize:noop_serialize 70 - let entry = S.node ~parse:git_entry_parse ~serialize:noop_serialize 71 - let json_node = S.node ~parse:json_parse ~serialize:noop_serialize 69 + let directory = S.node ~dec:git_tree_parse ~enc:noop_serialize 70 + let entry = S.node ~dec:git_entry_parse ~enc:noop_serialize 71 + let json_node = S.node ~dec:json_parse ~enc:noop_serialize 72 72 73 73 (* ===== Schemas ===== *) 74 74
+32 -36
test/tar/test.ml
··· 88 88 (* ===== Composition: tar + JSON ===== *) 89 89 90 90 (* JSON parse function — defined here, not in the tar backend *) 91 - let json_parse : S.parse = 91 + let json_parse : S.dec = 92 92 fun data -> 93 93 let nv (type a) (n : a Jsont.node) : a = fst n in 94 94 let enc c = ··· 111 111 let ( => ) = S.( => ) 112 112 113 113 let json = 114 - S.fix (fun self -> 115 - S.node ~parse:json_parse ~serialize:(fun _ -> "") [ "*" => self ]) 114 + S.fix (fun self -> S.node ~dec:json_parse ~enc:(fun _ -> "") [ "*" => self ]) 116 115 117 116 (* Tar tree where *.json files are JSON-navigable *) 118 117 let tar_json = 119 118 S.fix (fun self -> 120 - S.node ~parse:Irmin_tar.dir_parse ~serialize:Irmin_tar.dir_serialize 119 + S.node ~dec:Irmin_tar.dir_parse ~enc:Irmin_tar.dir_serialize 121 120 [ "*.json" => json; "*" => self ]) 122 121 123 122 let test_tar_json_proof () = ··· 182 181 (* ===== Merge tests ===== *) 183 182 184 183 (* Counter merge: sum the deltas *) 185 - let counter_parse : S.parse = fun _ -> S.Named [] 186 - let counter_serialize : S.serialize = fun _ -> "" 184 + let counter_parse : S.dec = fun _ -> S.Named [] 185 + let counter_serialize : S.enc = fun _ -> "" 187 186 188 187 let counter_merge : S.merge = 189 188 fun ~ancestor ours theirs -> ··· 194 193 let tree_with_counters = 195 194 let ( => ) = S.( => ) in 196 195 S.fix (fun self -> 197 - S.node ~parse:Irmin_tar.dir_parse ~serialize:Irmin_tar.dir_serialize 196 + S.node ~dec:Irmin_tar.dir_parse ~enc:Irmin_tar.dir_serialize 198 197 [ 199 198 "*.counter" 200 - => S.node ~parse:counter_parse ~serialize:counter_serialize 199 + => S.node ~dec:counter_parse ~enc:counter_serialize 201 200 ~merge:counter_merge []; 202 201 "*" => self; 203 202 ]) ··· 219 218 Irmin_tar.of_entries heap 220 219 [ ("a.txt", "hello"); ("b.txt", "world"); ("d.txt", "new-theirs") ] 221 220 in 222 - match 221 + let merged, conflicts = 223 222 S.merge heap Irmin_tar.tree 224 223 ~ancestor:(S.at heap Irmin_tar.tree ancestor) 225 224 ~ours:(S.at heap Irmin_tar.tree ours) 226 225 ~theirs:(S.at heap Irmin_tar.tree theirs) 227 - with 228 - | Error (`Conflict msg) -> Alcotest.failf "merge conflict: %s" msg 229 - | Ok merged -> 230 - let c = merged in 231 - let kids = S.list c |> List.map fst |> List.sort String.compare in 232 - Alcotest.(check (list string)) 233 - "merged" 234 - [ "a.txt"; "b.txt"; "c.txt"; "d.txt" ] 235 - kids; 236 - (* Check values *) 237 - Alcotest.(check (option string)) 238 - "c.txt" (Some "new-ours") (S.find c [ "c.txt" ]); 239 - Alcotest.(check (option string)) 240 - "d.txt" (Some "new-theirs") (S.find c [ "d.txt" ]) 226 + in 227 + Alcotest.(check int) "no conflicts" 0 (List.length conflicts); 228 + let kids = S.list merged |> List.map fst |> List.sort String.compare in 229 + Alcotest.(check (list string)) 230 + "merged" 231 + [ "a.txt"; "b.txt"; "c.txt"; "d.txt" ] 232 + kids; 233 + (* Check values *) 234 + Alcotest.(check (option string)) 235 + "c.txt" (Some "new-ours") 236 + (S.find merged [ "c.txt" ]); 237 + Alcotest.(check (option string)) 238 + "d.txt" (Some "new-theirs") 239 + (S.find merged [ "d.txt" ]) 241 240 242 241 let test_merge_counter () = 243 242 let store = Hashtbl.create 64 in ··· 245 244 let ancestor = Irmin_tar.of_entries heap [ ("views.counter", "10") ] in 246 245 let ours = Irmin_tar.of_entries heap [ ("views.counter", "15") ] in 247 246 let theirs = Irmin_tar.of_entries heap [ ("views.counter", "12") ] in 248 - match 247 + let merged, conflicts = 249 248 S.merge heap tree_with_counters 250 249 ~ancestor:(S.at heap tree_with_counters ancestor) 251 250 ~ours:(S.at heap tree_with_counters ours) 252 251 ~theirs:(S.at heap tree_with_counters theirs) 253 - with 254 - | Error (`Conflict msg) -> Alcotest.failf "merge conflict: %s" msg 255 - | Ok merged -> 256 - let c = merged in 257 - (* 15 + 12 - 10 = 17 *) 258 - Alcotest.(check (option string)) 259 - "counter merged" (Some "17") 260 - (S.find c [ "views.counter" ]) 252 + in 253 + Alcotest.(check int) "no conflicts" 0 (List.length conflicts); 254 + (* 15 + 12 - 10 = 17 *) 255 + Alcotest.(check (option string)) 256 + "counter merged" (Some "17") 257 + (S.find merged [ "views.counter" ]) 261 258 262 259 let test_merge_conflict () = 263 260 let store = Hashtbl.create 64 in ··· 266 263 let ours = Irmin_tar.of_entries heap [ ("a.txt", "ours-edit") ] in 267 264 let theirs = Irmin_tar.of_entries heap [ ("a.txt", "theirs-edit") ] in 268 265 (* No merge function for opaque blobs → conflict *) 269 - match 266 + let _merged, conflicts = 270 267 S.merge heap Irmin_tar.tree 271 268 ~ancestor:(S.at heap Irmin_tar.tree ancestor) 272 269 ~ours:(S.at heap Irmin_tar.tree ours) 273 270 ~theirs:(S.at heap Irmin_tar.tree theirs) 274 - with 275 - | Ok _ -> Alcotest.fail "should conflict" 276 - | Error (`Conflict _) -> () 271 + in 272 + Alcotest.(check bool) "has conflicts" true (conflicts <> []) 277 273 278 274 let () = 279 275 Alcotest.run "irmin-tar"
+1 -1
test/test.ml
··· 1 - let () = Alcotest.run "Irmin" Test_stores.suites 1 + let () = Alcotest.run "Irmin" []