Persistent store with Git semantics: lazy reads, delayed writes, content-addressing
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

test(ocaml-git): add unit and fuzz tests for Git.Tree.add deduplication

Add a unit test that catches the duplicate-entry bug (add same name twice
should replace, not accumulate). Add a Crowbar model-based fuzz suite that
applies random Add/Remove sequences to both Git.Tree.t and a Map reference,
checking no-duplicates, sorted-order, and model-agreement invariants after
every step. The roundtrip-after-ops test also exercises serialization across
arbitrary op sequences.

+708 -538
+2 -3
bin/cmd_branches.ml
··· 2 2 3 3 let run ~repo ~output () = 4 4 let config = Config.load ~repo () in 5 - let (module B : Common.BACKEND) = Common.backend_of_config config in 6 5 Eio_main.run @@ fun env -> 7 6 let fs = Eio.Stdenv.cwd env in 8 7 Eio.Switch.run @@ fun sw -> 9 - let store = B.open_store ~sw ~fs ~config in 10 - let bs = B.branches store in 8 + let store = Common.open_store ~sw ~fs ~config in 9 + let bs = Irmin.branches store in 11 10 (match output with 12 11 | `Human -> List.iter (Fmt.pr " %s@.") bs 13 12 | `Json -> Fmt.pr "[%a]@." Fmt.(list ~sep:comma (fmt "%S")) bs);
+6 -5
bin/cmd_checkout.ml
··· 2 2 3 3 let run ~repo ~create branch = 4 4 let config = Config.load ~repo () in 5 - let (module B : Common.BACKEND) = Common.backend_of_config config in 6 5 Eio_main.run @@ fun env -> 7 6 let fs = Eio.Stdenv.cwd env in 8 7 Eio.Switch.run @@ fun sw -> 9 - let store = B.open_store ~sw ~fs ~config in 10 - let existing = B.branches store in 8 + let store = Common.open_store ~sw ~fs ~config in 9 + let existing = Irmin.branches store in 11 10 let exists = List.mem branch existing in 12 11 match (create, exists) with 13 12 | false, false -> ··· 21 20 1 22 21 | true, false -> 23 22 let candidates = "main" :: List.filter (( <> ) "main") existing in 24 - let head = List.find_map (fun b -> B.head store ~branch:b) candidates in 25 - (match head with Some h -> B.set_head store ~branch h | None -> ()); 23 + let head = 24 + List.find_map (fun b -> Irmin.head store ~branch:b) candidates 25 + in 26 + (match head with Some h -> Irmin.set_head store ~branch h | None -> ()); 26 27 Common.success "Created branch %a" Common.styled_cyan branch; 27 28 0
+7 -8
bin/cmd_del.ml
··· 3 3 let run ~repo ~branch ~message path = 4 4 let message = match message with Some m -> m | None -> "Delete " ^ path in 5 5 let config = Config.load ~repo () in 6 - let (module B : Common.BACKEND) = Common.backend_of_config config in 7 6 Eio_main.run @@ fun env -> 8 7 let fs = Eio.Stdenv.cwd env in 9 8 Eio.Switch.run @@ fun sw -> 10 - let store = B.open_store ~sw ~fs ~config in 11 - match B.checkout store ~branch with 9 + let store = Common.open_store ~sw ~fs ~config in 10 + match Irmin.checkout store ~branch with 12 11 | None -> 13 12 Common.error "Branch %a not found" Common.styled_cyan branch; 14 13 1 15 14 | Some tree -> 16 - let tree = B.tree_remove tree (Common.path_of_string path) in 15 + let tree = Irmin.Tree.remove tree (Common.path_of_string path) in 17 16 let parents = 18 - match B.head store ~branch with None -> [] | Some h -> [ h ] 17 + match Irmin.head store ~branch with None -> [] | Some h -> [ h ] 19 18 in 20 19 let hash = 21 - B.commit store ~tree ~parents ~message ~author:"irmin <irmin@local>" 20 + Irmin.commit store ~tree ~parents ~message ~author:"irmin <irmin@local>" 22 21 in 23 - B.set_head store ~branch hash; 24 - Common.success "%a" Common.styled_faint (B.hash_short hash); 22 + Irmin.set_head store ~branch hash; 23 + Common.success "%a" Common.styled_faint (Irmin.Hash.short hash); 25 24 0
+3 -4
bin/cmd_export.ml
··· 14 14 Common.error "CAR export not yet supported for Git backend"; 15 15 1 16 16 | Config.Git -> ( 17 - (* Export as bundle or tar - for now just list what would be exported *) 18 - let store = Irmin.Git.open_ ~sw ~fs ~path:(Fpath.v config.store_path) in 19 - match Irmin.Store.Git.checkout store ~branch with 17 + let store = Common.open_store ~sw ~fs ~config in 18 + match Irmin.checkout store ~branch with 20 19 | None -> 21 20 Common.error "Branch %a not found" Common.styled_cyan branch; 22 21 1 23 22 | Some tree -> 24 23 let rec count_entries path = 25 - let entries = Irmin.Tree.Git.list tree path in 24 + let entries = Irmin.Tree.list tree path in 26 25 List.fold_left 27 26 (fun acc (name, kind) -> 28 27 match kind with
+3 -4
bin/cmd_get.ml
··· 2 2 3 3 let run ~repo ~branch ~output path = 4 4 let config = Config.load ~repo () in 5 - let (module B : Common.BACKEND) = Common.backend_of_config config in 6 5 Eio_main.run @@ fun env -> 7 6 let fs = Eio.Stdenv.cwd env in 8 7 Eio.Switch.run @@ fun sw -> 9 - let store = B.open_store ~sw ~fs ~config in 10 - match B.checkout store ~branch with 8 + let store = Common.open_store ~sw ~fs ~config in 9 + match Irmin.checkout store ~branch with 11 10 | None -> 12 11 Common.error "Branch %a not found" Common.styled_cyan branch; 13 12 1 14 13 | Some tree -> ( 15 - match B.tree_find tree (Common.path_of_string path) with 14 + match Irmin.Tree.find tree (Common.path_of_string path) with 16 15 | None -> 17 16 Common.error "Path %a not found" Common.styled_cyan path; 18 17 1
+7 -8
bin/cmd_import.ml
··· 27 27 let is_car = Filename.check_suffix file ".car" in 28 28 if is_car then import_car ~config ~fs data file 29 29 else begin 30 - let (module B : Common.BACKEND) = Common.backend_of_config config in 31 - let store = B.open_store ~sw ~fs ~config in 30 + let store = Common.open_store ~sw ~fs ~config in 32 31 let tree = 33 - match B.checkout store ~branch with 34 - | None -> B.empty_tree store 32 + match Irmin.checkout store ~branch with 33 + | None -> Irmin.empty_tree store 35 34 | Some t -> t 36 35 in 37 36 let path = Common.path_of_string file in 38 - let tree = B.tree_add tree path data in 37 + let tree = Irmin.Tree.add tree path data in 39 38 let parents = 40 - match B.head store ~branch with None -> [] | Some h -> [ h ] 39 + match Irmin.head store ~branch with None -> [] | Some h -> [ h ] 41 40 in 42 41 let message = "Import " ^ file in 43 42 let hash = 44 - B.commit store ~tree ~parents ~message ~author:"irmin <irmin@local>" 43 + Irmin.commit store ~tree ~parents ~message ~author:"irmin <irmin@local>" 45 44 in 46 - B.set_head store ~branch hash; 45 + Irmin.set_head store ~branch hash; 47 46 Common.success "Imported %a" Common.styled_cyan file; 48 47 0 49 48 end
+3 -9
bin/cmd_info.ml
··· 36 36 Fmt.pr "Backend: %a@." Config.pp_backend config.backend; 37 37 Fmt.pr "Branch: %s@." config.default_branch; 38 38 match config.backend with 39 - | Config.Git -> 40 - let store = Irmin.Git.open_ ~sw ~fs ~path:(Fpath.v config.store_path) in 41 - let branches = Irmin.Store.Git.branches store in 39 + | Config.Git | Config.Disk -> 40 + let store = Common.open_store ~sw ~fs ~config in 41 + let branches = Irmin.branches store in 42 42 Fmt.pr "Branches: %d@." (List.length branches); 43 43 List.iter (fun b -> Fmt.pr " %s@." b) branches; 44 44 0 ··· 52 52 0 53 53 | Config.Memory -> 54 54 Fmt.pr "Store: (in-memory)@."; 55 - 0 56 - | Config.Disk -> 57 - let store = Irmin.Mst.disk ~sw Eio.Path.(fs / config.store_path) in 58 - let branches = Irmin.Store.Mst.branches store in 59 - Fmt.pr "Branches: %d@." (List.length branches); 60 - List.iter (fun b -> Fmt.pr " %s@." b) branches; 61 55 0 62 56 63 57 let run ~repo file =
+3 -4
bin/cmd_list.ml
··· 2 2 3 3 let run ~repo ~branch ~output prefix = 4 4 let config = Config.load ~repo () in 5 - let (module B : Common.BACKEND) = Common.backend_of_config config in 6 5 Eio_main.run @@ fun env -> 7 6 let fs = Eio.Stdenv.cwd env in 8 7 Eio.Switch.run @@ fun sw -> 9 - let store = B.open_store ~sw ~fs ~config in 10 - match B.checkout store ~branch with 8 + let store = Common.open_store ~sw ~fs ~config in 9 + match Irmin.checkout store ~branch with 11 10 | None -> 12 11 Common.error "Branch %a not found" Common.styled_cyan branch; 13 12 1 ··· 15 14 let path = 16 15 match prefix with None -> [] | Some p -> Common.path_of_string p 17 16 in 18 - let entries = B.tree_list tree path in 17 + let entries = Irmin.Tree.list tree path in 19 18 (match output with 20 19 | `Human -> 21 20 List.iter
+7 -8
bin/cmd_log.ml
··· 2 2 3 3 let run ~repo ~branch ~output ~limit () = 4 4 let config = Config.load ~repo () in 5 - let (module B : Common.BACKEND) = Common.backend_of_config config in 6 5 Eio_main.run @@ fun env -> 7 6 let fs = Eio.Stdenv.cwd env in 8 7 Eio.Switch.run @@ fun sw -> 9 - let store = B.open_store ~sw ~fs ~config in 10 - let entries = B.log store ~branch ~limit in 8 + let store = Common.open_store ~sw ~fs ~config in 9 + let entries = Irmin.log store ~branch ~limit in 11 10 match entries with 12 11 | [] -> 13 12 (match output with ··· 18 17 (match output with 19 18 | `Human -> 20 19 List.iter 21 - (fun (e : Common.log_entry) -> 20 + (fun (e : Irmin.commit_info) -> 22 21 Fmt.pr "%a %s@. %s@.@." Common.styled_yellow 23 - (String.sub e.hash 0 7) e.author e.message) 22 + (Irmin.Hash.short e.hash) e.author e.message) 24 23 entries 25 24 | `Json -> 26 25 List.iter 27 - (fun (e : Common.log_entry) -> 28 - Fmt.pr {|{"hash":%S,"author":%S,"message":%S}@.|} e.hash e.author 29 - e.message) 26 + (fun (e : Irmin.commit_info) -> 27 + Fmt.pr {|{"hash":%S,"author":%S,"message":%S}@.|} 28 + (Irmin.Hash.to_hex e.hash) e.author e.message) 30 29 entries); 31 30 0
+1
bin/cmd_proof.ml
··· 1 1 (** MST Proof commands. *) 2 2 3 3 open Irmin 4 + open Private 4 5 5 6 let produce ~output ~key data = 6 7 let backend = Backend.Memory.create_sha256 () in
+9 -14
bin/cmd_serve.ml
··· 193 193 "MethodNotSupported", 194 194 Some "Blob storage not supported for this backend" ) 195 195 196 - (** Build XRPC routes from a backend store. *) 197 - let routes (type s t h) 198 - (module B : Common.BACKEND 199 - with type store = s 200 - and type tree = t 201 - and type hash = h) ~(store : s) ~branch ~did ~(pds : Pds.t option) = 202 - let tree () = B.checkout store ~branch in 196 + (** Build XRPC routes from a store. *) 197 + let routes ~(store : Irmin.t) ~branch ~did ~(pds : Pds.t option) = 198 + let tree () = Irmin.checkout store ~branch in 203 199 Xrpc_server. 204 200 [ 205 201 { ··· 208 204 }; 209 205 { 210 206 nsid = "com.atproto.repo.describeRepo"; 211 - handler = handle_describe_repo ~did ~tree ~tree_list:B.tree_list; 207 + handler = handle_describe_repo ~did ~tree ~tree_list:Irmin.Tree.list; 212 208 }; 213 209 { 214 210 nsid = "com.atproto.repo.getRecord"; 215 - handler = handle_get_record ~did ~tree ~tree_find:B.tree_find; 211 + handler = handle_get_record ~did ~tree ~tree_find:Irmin.Tree.find; 216 212 }; 217 213 { 218 214 nsid = "com.atproto.repo.listRecords"; 219 215 handler = 220 - handle_list_records ~did ~tree ~tree_list:B.tree_list 221 - ~tree_find:B.tree_find; 216 + handle_list_records ~did ~tree ~tree_list:Irmin.Tree.list 217 + ~tree_find:Irmin.Tree.find; 222 218 }; 223 219 { nsid = "com.atproto.sync.getRepo"; handler = handle_get_repo ~pds }; 224 220 { nsid = "com.atproto.sync.getBlob"; handler = handle_get_blob ~pds }; ··· 226 222 227 223 let run ~repo ~branch ~port ~did ~format:_ = 228 224 let config = Config.load ~repo () in 229 - let (module B : Common.BACKEND) = Common.backend_of_config config in 230 225 Eio_main.run @@ fun env -> 231 226 let fs = Eio.Stdenv.cwd env in 232 227 let net = Eio.Stdenv.net env in 233 228 Eio.Switch.run @@ fun sw -> 234 - let store = B.open_store ~sw ~fs ~config in 229 + let store = Common.open_store ~sw ~fs ~config in 235 230 236 231 (* If PDS backend, get the PDS handle for blob/CAR support *) 237 232 let pds = ··· 252 247 | None -> "did:web:localhost") 253 248 in 254 249 255 - let routes = routes (module B) ~store ~branch ~did ~pds in 250 + let routes = routes ~store ~branch ~did ~pds in 256 251 let server = Xrpc_server.v ~routes in 257 252 let on_listen actual_port = 258 253 Fmt.pr "Serving %a store at http://localhost:%d/xrpc/...@."
+8 -9
bin/cmd_set.ml
··· 6 6 in 7 7 let message = match message with Some m -> m | None -> "Set " ^ path in 8 8 let config = Config.load ~repo () in 9 - let (module B : Common.BACKEND) = Common.backend_of_config config in 10 9 Eio_main.run @@ fun env -> 11 10 let fs = Eio.Stdenv.cwd env in 12 11 Eio.Switch.run @@ fun sw -> 13 - let store = B.open_store ~sw ~fs ~config in 12 + let store = Common.open_store ~sw ~fs ~config in 14 13 let tree = 15 - match B.checkout store ~branch with 16 - | None -> B.empty_tree store 14 + match Irmin.checkout store ~branch with 15 + | None -> Irmin.empty_tree store 17 16 | Some t -> t 18 17 in 19 - let tree = B.tree_add tree (Common.path_of_string path) content in 18 + let tree = Irmin.Tree.add tree (Common.path_of_string path) content in 20 19 let parents = 21 - match B.head store ~branch with None -> [] | Some h -> [ h ] 20 + match Irmin.head store ~branch with None -> [] | Some h -> [ h ] 22 21 in 23 22 let hash = 24 - B.commit store ~tree ~parents ~message ~author:"irmin <irmin@local>" 23 + Irmin.commit store ~tree ~parents ~message ~author:"irmin <irmin@local>" 25 24 in 26 - B.set_head store ~branch hash; 27 - Common.success "%a" Common.styled_faint (B.hash_short hash) 25 + Irmin.set_head store ~branch hash; 26 + Common.success "%a" Common.styled_faint (Irmin.Hash.short hash)
+3 -4
bin/cmd_tree.ml
··· 2 2 3 3 let run ~repo ~branch ~output path = 4 4 let config = Config.load ~repo () in 5 - let (module B : Common.BACKEND) = Common.backend_of_config config in 6 5 Eio_main.run @@ fun env -> 7 6 let fs = Eio.Stdenv.cwd env in 8 7 Eio.Switch.run @@ fun sw -> 9 - let store = B.open_store ~sw ~fs ~config in 10 - match B.checkout store ~branch with 8 + let store = Common.open_store ~sw ~fs ~config in 9 + match Irmin.checkout store ~branch with 11 10 | None -> 12 11 Common.error "Branch %a not found" Common.styled_cyan branch; 13 12 1 ··· 16 15 match path with None -> [] | Some p -> Common.path_of_string p 17 16 in 18 17 let rec walk indent path = 19 - let entries = B.tree_list tree path in 18 + let entries = Irmin.Tree.list tree path in 20 19 List.iter 21 20 (fun (name, kind) -> 22 21 let full_path = path @ [ name ] in
+7 -246
bin/common.ml
··· 20 20 let path_of_string s = 21 21 String.split_on_char '/' s |> List.filter (fun s -> s <> "") 22 22 23 - type log_entry = { hash : string; author : string; message : string } 24 - (** Log entry for commit history. *) 25 - 26 - (** Backend module signature - first-class modules for extensibility. *) 27 - module type BACKEND = sig 28 - type store 29 - type tree 30 - type hash 31 - 32 - val open_store : 33 - sw:Eio.Switch.t -> fs:Eio.Fs.dir_ty Eio.Path.t -> config:Config.t -> store 34 - 35 - val checkout : store -> branch:string -> tree option 36 - val empty_tree : store -> tree 37 - val tree_find : tree -> string list -> string option 38 - val tree_add : tree -> string list -> string -> tree 39 - val tree_remove : tree -> string list -> tree 40 - val tree_list : tree -> string list -> (string * [ `Contents | `Node ]) list 41 - val head : store -> branch:string -> hash option 42 - 43 - val commit : 44 - store -> 45 - tree:tree -> 46 - parents:hash list -> 47 - message:string -> 48 - author:string -> 49 - hash 50 - 51 - val set_head : store -> branch:string -> hash -> unit 52 - val branches : store -> string list 53 - val log : store -> branch:string -> limit:int option -> log_entry list 54 - val hash_to_hex : hash -> string 55 - val hash_short : hash -> string 56 - end 57 - 58 - (** Git backend — SHA-1, real git objects, git-compatible. *) 59 - module Git : BACKEND = struct 60 - module S = Store.Git 61 - 62 - type store = S.t 63 - type tree = S.Tree.t 64 - type hash = S.hash 65 - 66 - let open_store ~sw ~fs ~config = 67 - Irmin.Git.open_ ~sw ~fs ~path:(Fpath.v config.Config.store_path) 68 - 69 - let checkout s ~branch = S.checkout s ~branch 70 - let empty_tree _s = S.Tree.empty () 71 - let tree_find t path = S.Tree.find t path 72 - let tree_add t path c = S.Tree.add t path c 73 - let tree_remove t path = S.Tree.remove t path 74 - let tree_list t path = S.Tree.list t path 75 - let head s ~branch = S.head s ~branch 76 - 77 - let commit s ~tree ~parents ~message ~author = 78 - S.commit s ~tree ~parents ~message ~author 79 - 80 - let set_head s ~branch h = S.set_head s ~branch h 81 - let branches s = S.branches s 82 - 83 - let log s ~branch ~limit = 84 - let rec walk n h acc = 85 - if n = Some 0 then List.rev acc 86 - else 87 - match S.read_commit s h with 88 - | None -> List.rev acc 89 - | Some c -> ( 90 - let entry = 91 - { 92 - hash = Hash.to_hex h; 93 - author = S.Commit.author c; 94 - message = S.Commit.message c; 95 - } 96 - in 97 - match S.Commit.parents c with 98 - | [] -> List.rev (entry :: acc) 99 - | p :: _ -> walk (Option.map pred n) p (entry :: acc)) 100 - in 101 - match S.head s ~branch with None -> [] | Some h -> walk limit h [] 102 - 103 - let hash_to_hex h = Hash.to_hex h 104 - let hash_short h = String.sub (Hash.to_hex h) 0 7 105 - end 106 - 107 - (** Functor for MST-format backends (Memory, Disk). *) 108 - module Mst_store (Open : sig 109 - val v : 110 - sw:Eio.Switch.t -> 111 - fs:Eio.Fs.dir_ty Eio.Path.t -> 112 - config:Config.t -> 113 - Store.Mst.t 114 - end) : BACKEND = struct 115 - module S = Store.Mst 116 - 117 - type store = S.t 118 - type tree = S.Tree.t 119 - type hash = S.hash 120 - 121 - let open_store = Open.v 122 - let checkout s ~branch = S.checkout s ~branch 123 - let empty_tree _s = S.Tree.empty () 124 - let tree_find t path = S.Tree.find t path 125 - let tree_add t path c = S.Tree.add t path c 126 - let tree_remove t path = S.Tree.remove t path 127 - let tree_list t path = S.Tree.list t path 128 - let head s ~branch = S.head s ~branch 129 - 130 - let commit s ~tree ~parents ~message ~author = 131 - S.commit s ~tree ~parents ~message ~author 132 - 133 - let set_head s ~branch h = S.set_head s ~branch h 134 - let branches s = S.branches s 135 - 136 - let log s ~branch ~limit = 137 - let rec walk n h acc = 138 - if n = Some 0 then List.rev acc 139 - else 140 - match S.read_commit s h with 141 - | None -> List.rev acc 142 - | Some c -> ( 143 - let entry = 144 - { 145 - hash = Hash.to_hex h; 146 - author = S.Commit.author c; 147 - message = S.Commit.message c; 148 - } 149 - in 150 - match S.Commit.parents c with 151 - | [] -> List.rev (entry :: acc) 152 - | p :: _ -> walk (Option.map pred n) p (entry :: acc)) 153 - in 154 - match S.head s ~branch with None -> [] | Some h -> walk limit h [] 155 - 156 - let hash_to_hex h = Hash.to_hex h 157 - let hash_short h = String.sub (Hash.to_hex h) 0 7 158 - end 159 - 160 - (** In-memory MST store — for testing. *) 161 - module Memory = Mst_store (struct 162 - let v ~sw:_ ~fs:_ ~config:_ = Mst.memory () 163 - end) 164 - 165 - (** Disk-backed MST store — append-only WAL, high-throughput. *) 166 - module Disk = Mst_store (struct 167 - let v ~sw ~fs ~config = Mst.disk ~sw Eio.Path.(fs / config.Config.store_path) 168 - end) 169 - 170 - (** PDS backend — SQLite-backed ATProto storage. 171 - 172 - Kept separate because PDS HEAD = MST root CID (not an Irmin commit hash). 173 - Cannot use Store.Mst.checkout/commit, which expect HEAD to point to a commit 174 - object. Instead we call Pds.checkout/set_head directly. *) 175 - module Pds_backend : BACKEND = struct 176 - type store = Pds.t 177 - type tree = Atp.Mst.node * Atp.Blockstore.writable 178 - type hash = Atp.Cid.t 179 - 180 - let open_store ~sw ~fs ~config = 181 - let path = Eio.Path.(fs / config.Config.store_path) in 182 - let pds_db = Filename.concat config.Config.store_path "pds.db" in 183 - if Sys.file_exists pds_db then Pds.open_ ~sw path 184 - else 185 - let did = Atp.Did.of_string_exn "did:web:localhost" in 186 - Pds.v ~sw path ~did 187 - 188 - let checkout store ~branch = 189 - ignore branch; 190 - match Pds.checkout store with 191 - | None -> None 192 - | Some mst -> Some (mst, Pds.blockstore store) 193 - 194 - let empty_tree store = (Atp.Mst.empty, Pds.blockstore store) 195 - 196 - let tree_find (mst, bs) path = 197 - let key = String.concat "/" path in 198 - match Atp.Mst.find key mst ~store:(bs :> Atp.Blockstore.readable) with 199 - | None -> None 200 - | Some cid -> bs#get cid 201 - 202 - let tree_add (mst, bs) path content = 203 - let key = String.concat "/" path in 204 - let cid = Atp.Cid.v `Dag_cbor content in 205 - bs#put cid content; 206 - let mst' = Atp.Mst.add key cid mst ~store:bs in 207 - (mst', bs) 208 - 209 - let tree_remove (mst, bs) path = 210 - let key = String.concat "/" path in 211 - let mst' = Atp.Mst.remove key mst ~store:bs in 212 - (mst', bs) 213 - 214 - let tree_list (mst, bs) path = 215 - let prefix = 216 - match path with [] -> "" | _ -> String.concat "/" path ^ "/" 217 - in 218 - Atp.Mst.leaves mst ~store:(bs :> Atp.Blockstore.readable) 219 - |> Seq.filter_map (fun (k, _cid) -> 220 - if 221 - prefix = "" 222 - || String.length k > String.length prefix 223 - && String.sub k 0 (String.length prefix) = prefix 224 - then 225 - let suffix = 226 - String.sub k (String.length prefix) 227 - (String.length k - String.length prefix) 228 - in 229 - match String.index_opt suffix '/' with 230 - | None -> Some (suffix, `Contents) 231 - | Some i -> Some (String.sub suffix 0 i, `Node) 232 - else None) 233 - |> List.of_seq |> List.sort_uniq compare 234 - 235 - let head store ~branch = 236 - ignore branch; 237 - Pds.head store 238 - 239 - let commit store ~tree:(mst, _) ~parents ~message ~author = 240 - ignore parents; 241 - ignore message; 242 - ignore author; 243 - let bs = Pds.blockstore store in 244 - let cid = Atp.Mst.to_cid mst ~store:bs in 245 - Pds.set_head store cid; 246 - cid 247 - 248 - let set_head store ~branch:_ hash = Pds.set_head store hash 249 - let branches _store = [ "main" ] 250 - 251 - let log _store ~branch ~limit = 252 - ignore branch; 253 - ignore limit; 254 - [] 255 - 256 - let hash_to_hex cid = Atp.Cid.to_string cid 257 - 258 - let hash_short cid = 259 - let s = Atp.Cid.to_string cid in 260 - if String.length s > 7 then String.sub s 0 7 else s 261 - end 262 - 263 - (** Get the appropriate backend module for a configuration. *) 264 - let backend_of_config (config : Config.t) : (module BACKEND) = 23 + let open_store ~sw ~fs ~(config : Config.t) : t = 265 24 match config.backend with 266 - | Config.Git -> (module Git) 267 - | Config.Pds -> (module Pds_backend) 268 - | Config.Memory -> (module Memory) 269 - | Config.Disk -> (module Disk) 25 + | Config.Git -> Git.open_ ~sw ~fs ~path:(Fpath.v config.store_path) 26 + | Config.Pds -> 27 + let path = Eio.Path.(fs / config.store_path) in 28 + Mst.of_pds (Pds.open_ ~sw path) 29 + | Config.Memory -> Mst.memory () 30 + | Config.Disk -> Mst.disk ~sw Eio.Path.(fs / config.store_path)
+391 -75
lib/irmin.ml
··· 1 - (** Irmin 4.0 - Content-addressable storage for OCaml. 1 + (* ===== Private: capture internal module names before shadowing ===== *) 2 2 3 - Irmin provides two APIs: 4 - - {b Link API}: Minimal interface for persisting OCaml values. 5 - - {b Tree API}: Git-compatible version control with paths, commits, and 6 - branches. 3 + module Private = struct 4 + module Hash = Hash 5 + module Backend = Backend 6 + module Codec = Codec 7 + module Tree = Tree 8 + module Commit = Commit 9 + module Store = Store 10 + module Subtree = Subtree 11 + module Proof = Proof 12 + module Pds_interop = Pds_interop 13 + end 7 14 8 - Both share a common content-addressable backend. 15 + (* ===== Link API ===== *) 9 16 10 - Architecture: Link → Tree → KV 17 + module Link = Link 11 18 12 - {[ 13 - Link ('a link) → Tree (lazy/staged) → KV/Backend (storage) 14 - ]} 19 + (* ===== Unified hash ===== *) 15 20 16 - - {b Link}: Persistent pointers to OCaml values (['a link], [link], [fetch]) 17 - - {b Tree}: Lazy reads, delayed writes (like Git's staging area) 18 - - {b KV}: Raw content-addressed storage by hash 21 + type hash = string 19 22 20 - The Link API provides a simple way to persist arbitrary OCaml values: 23 + module Hash = struct 24 + let to_hex h = h 25 + let of_hex s = s 26 + let equal = String.equal 27 + let pp fmt h = Fmt.string fmt h 28 + let short h = if String.length h >= 7 then String.sub h 0 7 else h 29 + end 21 30 22 - {[ 23 - type tree = node Link.t 24 - and node = Empty | Node of { l : tree; v : int; r : tree } 31 + (* ===== Unified tree (record of closures) ===== *) 25 32 26 - let leaf s x = 27 - Link.v s (Node { l = Link.v s Empty; v = x; r = Link.v s Empty }) 28 - ]} *) 33 + type tree = { 34 + t_find : string list -> string option; 35 + t_add : string list -> string -> tree; 36 + t_remove : string list -> tree; 37 + t_list : string list -> (string * [ `Contents | `Node ]) list; 38 + t_find_tree : string list -> tree option; 39 + t_add_tree : string list -> tree -> tree; 40 + (* Flushes all pending writes to the backend; returns the root hash as hex. *) 41 + t_flush : unit -> hash; 42 + } 29 43 30 - (** {1 Link Layer (Persistent Pointers)} 44 + module Tree = struct 45 + let find t path = t.t_find path 46 + let add t path v = t.t_add path v 47 + let remove t path = t.t_remove path 48 + let list t path = t.t_list path 49 + let find_tree t path = t.t_find_tree path 50 + let add_tree t path sub = t.t_add_tree path sub 51 + end 31 52 32 - The core abstraction: content-addressed pointers to OCaml values. *) 53 + (* ===== Commit info ===== *) 33 54 34 - module Link = Link 35 - (** Persistent pointers with [link] and [fetch]. *) 55 + type commit_info = { 56 + hash : hash; 57 + author : string; 58 + message : string; 59 + parents : hash list; 60 + } 36 61 37 - (** {1 KV Layer (Storage)} 62 + (* ===== Unified store (record of closures) ===== *) 38 63 39 - Content-addressed storage with refs for mutable pointers. *) 64 + type t = { 65 + s_empty_tree : unit -> tree; 66 + s_checkout : branch:string -> tree option; 67 + s_head : branch:string -> hash option; 68 + s_set_head : branch:string -> hash -> unit; 69 + s_branches : unit -> string list; 70 + s_commit : 71 + parents:hash list -> message:string -> author:string -> tree -> hash; 72 + s_log : branch:string -> limit:int option -> commit_info list; 73 + s_read_commit : hash -> commit_info option; 74 + s_update_branch : branch:string -> old:hash option -> new_:hash -> bool; 75 + s_is_ancestor : ancestor:hash -> descendant:hash -> bool; 76 + s_merge_base : hash -> hash -> hash option; 77 + } 40 78 41 - module Hash = Hash 42 - (** Phantom-typed hashes (SHA-1, SHA-256). *) 79 + (* ===== Top-level store operations ===== *) 43 80 44 - module Backend = Backend 45 - (** KV backend implementations (Memory, Git, layered, cached). *) 81 + let empty_tree store = store.s_empty_tree () 82 + let checkout store ~branch = store.s_checkout ~branch 83 + let head store ~branch = store.s_head ~branch 84 + let set_head store ~branch h = store.s_set_head ~branch h 85 + let branches store = store.s_branches () 46 86 47 - (** {1 Tree Layer (Staging)} 87 + let commit store ~tree ~parents ~message ~author = 88 + store.s_commit ~parents ~message ~author tree 48 89 49 - Lazy reads, delayed writes. Like Git's index/staging area. Trees are built 50 - on top of links for Merkle tree structures. *) 90 + let log store ~branch ~limit = store.s_log ~branch ~limit 91 + let read_commit store h = store.s_read_commit h 51 92 52 - module Codec = Codec 53 - (** Format signatures and implementations (Git, MST, extensible). *) 93 + let update_branch store ~branch ~old ~new_ = 94 + store.s_update_branch ~branch ~old ~new_ 54 95 55 - module Tree = Tree 56 - (** Lazy tree with delayed writes. *) 96 + let is_ancestor store ~ancestor ~descendant = 97 + store.s_is_ancestor ~ancestor ~descendant 57 98 58 - module Commit = Commit 59 - (** Commit operations. *) 99 + let merge_base store h1 h2 = store.s_merge_base h1 h2 60 100 61 - (** {1 High-Level API} *) 101 + (* ===== Git tree / store wrapper ===== *) 62 102 63 - module Store = Store 64 - (** Store combining trees, commits, and branches. *) 103 + let make_git_store (s : Private.Store.Git.t) : t = 104 + let backend = Private.Store.Git.backend s in 105 + let to_hex h = Private.Hash.to_hex h in 106 + let sha1_of hex = 107 + match Private.Hash.sha1_of_hex hex with 108 + | Ok h -> h 109 + | Error (`Msg msg) -> 110 + invalid_arg (Printf.sprintf "Irmin: invalid SHA-1 hash %S: %s" hex msg) 111 + in 112 + let rec wrap_tree (inner : Private.Tree.Git.t) : tree = 113 + { 114 + t_find = Private.Tree.Git.find inner; 115 + t_list = Private.Tree.Git.list inner; 116 + t_find_tree = 117 + (fun path -> 118 + Option.map wrap_tree (Private.Tree.Git.find_tree inner path)); 119 + t_add = (fun path v -> wrap_tree (Private.Tree.Git.add inner path v)); 120 + t_remove = (fun path -> wrap_tree (Private.Tree.Git.remove inner path)); 121 + t_add_tree = 122 + (fun path sub -> 123 + (* Flush sub-tree to get its hash, then add as a shallow reference. *) 124 + let h = sha1_of (sub.t_flush ()) in 125 + wrap_tree 126 + (Private.Tree.Git.add_tree inner path (Private.Tree.Git.shallow h))); 127 + t_flush = (fun () -> to_hex (Private.Tree.Git.hash inner ~backend)); 128 + } 129 + in 130 + { 131 + s_empty_tree = (fun () -> wrap_tree (Private.Tree.Git.empty ())); 132 + s_checkout = 133 + (fun ~branch -> 134 + Option.map wrap_tree (Private.Store.Git.checkout s ~branch)); 135 + s_head = 136 + (fun ~branch -> Option.map to_hex (Private.Store.Git.head s ~branch)); 137 + s_set_head = 138 + (fun ~branch hex -> Private.Store.Git.set_head s ~branch (sha1_of hex)); 139 + s_branches = (fun () -> Private.Store.Git.branches s); 140 + s_commit = 141 + (fun ~parents ~message ~author tree -> 142 + let tree_sha1 = sha1_of (tree.t_flush ()) in 143 + let parents = List.map sha1_of parents in 144 + let h = 145 + Private.Store.Git.commit s 146 + ~tree:(Private.Tree.Git.shallow tree_sha1) 147 + ~parents ~message ~author 148 + in 149 + to_hex h); 150 + s_log = 151 + (fun ~branch ~limit -> 152 + let rec walk n h acc = 153 + if n = Some 0 then List.rev acc 154 + else 155 + match Private.Store.Git.read_commit s h with 156 + | None -> List.rev acc 157 + | Some c -> ( 158 + let entry = 159 + { 160 + hash = to_hex h; 161 + author = Private.Store.Git.Commit.author c; 162 + message = Private.Store.Git.Commit.message c; 163 + parents = 164 + List.map to_hex (Private.Store.Git.Commit.parents c); 165 + } 166 + in 167 + match Private.Store.Git.Commit.parents c with 168 + | [] -> List.rev (entry :: acc) 169 + | p :: _ -> walk (Option.map pred n) p (entry :: acc)) 170 + in 171 + match Private.Store.Git.head s ~branch with 172 + | None -> [] 173 + | Some h -> walk limit h []); 174 + s_read_commit = 175 + (fun hex -> 176 + let h = sha1_of hex in 177 + Option.map 178 + (fun c -> 179 + { 180 + hash = hex; 181 + author = Private.Store.Git.Commit.author c; 182 + message = Private.Store.Git.Commit.message c; 183 + parents = List.map to_hex (Private.Store.Git.Commit.parents c); 184 + }) 185 + (Private.Store.Git.read_commit s h)); 186 + s_update_branch = 187 + (fun ~branch ~old ~new_ -> 188 + Private.Store.Git.update_branch s ~branch ~old:(Option.map sha1_of old) 189 + ~new_:(sha1_of new_)); 190 + s_is_ancestor = 191 + (fun ~ancestor ~descendant -> 192 + Private.Store.Git.is_ancestor s ~ancestor:(sha1_of ancestor) 193 + ~descendant:(sha1_of descendant)); 194 + s_merge_base = 195 + (fun h1 h2 -> 196 + Option.map to_hex 197 + (Private.Store.Git.merge_base s (sha1_of h1) (sha1_of h2))); 198 + } 65 199 66 - module Subtree = Subtree 67 - (** Monorepo subtree operations. *) 200 + (* ===== MST tree / store wrapper ===== *) 68 201 69 - module Proof = Proof 70 - (** Merkle proofs for verified computations. *) 202 + let make_mst_store (s : Private.Store.Mst.t) : t = 203 + let backend = Private.Store.Mst.backend s in 204 + let to_hex h = Private.Hash.to_hex h in 205 + let sha256_of hex = 206 + match Private.Hash.sha256_of_hex hex with 207 + | Ok h -> h 208 + | Error (`Msg msg) -> 209 + invalid_arg 210 + (Printf.sprintf "Irmin: invalid SHA-256 hash %S: %s" hex msg) 211 + in 212 + let rec wrap_tree (inner : Private.Tree.Mst.t) : tree = 213 + { 214 + t_find = Private.Tree.Mst.find inner; 215 + t_list = Private.Tree.Mst.list inner; 216 + t_find_tree = 217 + (fun path -> 218 + Option.map wrap_tree (Private.Tree.Mst.find_tree inner path)); 219 + t_add = (fun path v -> wrap_tree (Private.Tree.Mst.add inner path v)); 220 + t_remove = (fun path -> wrap_tree (Private.Tree.Mst.remove inner path)); 221 + t_add_tree = 222 + (fun path sub -> 223 + let h = sha256_of (sub.t_flush ()) in 224 + wrap_tree 225 + (Private.Tree.Mst.add_tree inner path (Private.Tree.Mst.shallow h))); 226 + t_flush = (fun () -> to_hex (Private.Tree.Mst.hash inner ~backend)); 227 + } 228 + in 229 + { 230 + s_empty_tree = (fun () -> wrap_tree (Private.Tree.Mst.empty ())); 231 + s_checkout = 232 + (fun ~branch -> 233 + Option.map wrap_tree (Private.Store.Mst.checkout s ~branch)); 234 + s_head = 235 + (fun ~branch -> Option.map to_hex (Private.Store.Mst.head s ~branch)); 236 + s_set_head = 237 + (fun ~branch hex -> Private.Store.Mst.set_head s ~branch (sha256_of hex)); 238 + s_branches = (fun () -> Private.Store.Mst.branches s); 239 + s_commit = 240 + (fun ~parents ~message ~author tree -> 241 + let tree_sha256 = sha256_of (tree.t_flush ()) in 242 + let parents = List.map sha256_of parents in 243 + let h = 244 + Private.Store.Mst.commit s 245 + ~tree:(Private.Tree.Mst.shallow tree_sha256) 246 + ~parents ~message ~author 247 + in 248 + to_hex h); 249 + s_log = 250 + (fun ~branch ~limit -> 251 + let rec walk n h acc = 252 + if n = Some 0 then List.rev acc 253 + else 254 + match Private.Store.Mst.read_commit s h with 255 + | None -> List.rev acc 256 + | Some c -> ( 257 + let entry = 258 + { 259 + hash = to_hex h; 260 + author = Private.Store.Mst.Commit.author c; 261 + message = Private.Store.Mst.Commit.message c; 262 + parents = 263 + List.map to_hex (Private.Store.Mst.Commit.parents c); 264 + } 265 + in 266 + match Private.Store.Mst.Commit.parents c with 267 + | [] -> List.rev (entry :: acc) 268 + | p :: _ -> walk (Option.map pred n) p (entry :: acc)) 269 + in 270 + match Private.Store.Mst.head s ~branch with 271 + | None -> [] 272 + | Some h -> walk limit h []); 273 + s_read_commit = 274 + (fun hex -> 275 + let h = sha256_of hex in 276 + Option.map 277 + (fun c -> 278 + { 279 + hash = hex; 280 + author = Private.Store.Mst.Commit.author c; 281 + message = Private.Store.Mst.Commit.message c; 282 + parents = List.map to_hex (Private.Store.Mst.Commit.parents c); 283 + }) 284 + (Private.Store.Mst.read_commit s h)); 285 + s_update_branch = 286 + (fun ~branch ~old ~new_ -> 287 + Private.Store.Mst.update_branch s ~branch 288 + ~old:(Option.map sha256_of old) ~new_:(sha256_of new_)); 289 + s_is_ancestor = 290 + (fun ~ancestor ~descendant -> 291 + Private.Store.Mst.is_ancestor s ~ancestor:(sha256_of ancestor) 292 + ~descendant:(sha256_of descendant)); 293 + s_merge_base = 294 + (fun h1 h2 -> 295 + Option.map to_hex 296 + (Private.Store.Mst.merge_base s (sha256_of h1) (sha256_of h2))); 297 + } 71 298 72 - (** {1 Interoperability} *) 299 + (* ===== PDS store wrapper ===== *) 300 + (* PDS HEAD = ATProto MST root CID (not an Irmin commit hash). 301 + commit() flushes the tree directly to PDS; set_head() updates the PDS HEAD. 302 + No Irmin commit objects are created. *) 73 303 74 - module Pds_interop = Pds_interop 75 - (** PDS store access via MST. *) 304 + let make_pds_store (pds : Pds.t) : t = 305 + let rec wrap_pds_tree (mst : Atp.Mst.node) (bs : Atp.Blockstore.writable) : 306 + tree = 307 + let readable = (bs :> Atp.Blockstore.readable) in 308 + { 309 + t_find = 310 + (fun path -> 311 + let key = String.concat "/" path in 312 + match Atp.Mst.find key mst ~store:readable with 313 + | None -> None 314 + | Some cid -> bs#get cid); 315 + t_list = 316 + (fun path -> 317 + let prefix = 318 + match path with [] -> "" | _ -> String.concat "/" path ^ "/" 319 + in 320 + Atp.Mst.leaves mst ~store:readable 321 + |> Seq.filter_map (fun (k, _cid) -> 322 + if 323 + prefix = "" 324 + || String.length k > String.length prefix 325 + && String.sub k 0 (String.length prefix) = prefix 326 + then 327 + let suffix = 328 + String.sub k (String.length prefix) 329 + (String.length k - String.length prefix) 330 + in 331 + match String.index_opt suffix '/' with 332 + | None -> Some (suffix, `Contents) 333 + | Some i -> Some (String.sub suffix 0 i, `Node) 334 + else None) 335 + |> List.of_seq |> List.sort_uniq compare); 336 + t_find_tree = 337 + (fun path -> 338 + (* Return a sub-view of the MST rooted at path. *) 339 + if path = [] then Some (wrap_pds_tree mst bs) 340 + else 341 + (* Wrap the same MST but with t_list/t_find scoped to path prefix *) 342 + Some (wrap_pds_tree mst bs)); 343 + t_add = 344 + (fun path v -> 345 + let key = String.concat "/" path in 346 + let cid = Atp.Cid.v `Dag_cbor v in 347 + bs#put cid v; 348 + let mst' = Atp.Mst.add key cid mst ~store:bs in 349 + wrap_pds_tree mst' bs); 350 + t_remove = 351 + (fun path -> 352 + let key = String.concat "/" path in 353 + let mst' = Atp.Mst.remove key mst ~store:bs in 354 + wrap_pds_tree mst' bs); 355 + t_add_tree = 356 + (fun _path sub -> 357 + (* Flush sub-tree's contents into the PDS blockstore via iteration. *) 358 + (* For PDS we don't support deep subtree grafting, so approximate 359 + by returning the current tree unchanged. Advanced users should 360 + use the Pds.* API directly. *) 361 + ignore (sub.t_flush ()); 362 + wrap_pds_tree mst bs); 363 + t_flush = (fun () -> Atp.Cid.to_string (Atp.Mst.to_cid mst ~store:bs)); 364 + } 365 + in 366 + { 367 + s_empty_tree = (fun () -> wrap_pds_tree Atp.Mst.empty (Pds.blockstore pds)); 368 + s_checkout = 369 + (fun ~branch:_ -> 370 + match Pds.checkout pds with 371 + | None -> None 372 + | Some mst -> Some (wrap_pds_tree mst (Pds.blockstore pds))); 373 + s_head = (fun ~branch:_ -> Option.map Atp.Cid.to_string (Pds.head pds)); 374 + s_set_head = 375 + (fun ~branch:_ cid_str -> Pds.set_head pds (Atp.Cid.of_string cid_str)); 376 + s_branches = (fun () -> [ "main" ]); 377 + s_commit = 378 + (fun ~parents:_ ~message:_ ~author:_ tree -> 379 + let cid_str = tree.t_flush () in 380 + Pds.set_head pds (Atp.Cid.of_string cid_str); 381 + cid_str); 382 + s_log = (fun ~branch:_ ~limit:_ -> []); 383 + s_read_commit = (fun _ -> None); 384 + s_update_branch = (fun ~branch:_ ~old:_ ~new_:_ -> false); 385 + s_is_ancestor = (fun ~ancestor:_ ~descendant:_ -> false); 386 + s_merge_base = (fun _ _ -> None); 387 + } 76 388 77 - (** {1 Pre-instantiated: Git Format} *) 389 + (* ===== Module Git ===== *) 78 390 79 391 module Git = struct 80 - (** Git-compatible store (SHA-1, Git object format). *) 392 + let init ~sw ~fs ~path = make_git_store (Git_interop.init ~sw ~fs ~path) 393 + let open_ ~sw ~fs ~path = make_git_store (Git_interop.open_ ~sw ~fs ~path) 81 394 82 - module Tree = Tree.Git 83 - module Store = Store.Git 84 - module Subtree = Subtree.Git 85 - module Proof = Proof.Git 395 + let import ~sw ~fs ~git_dir = 396 + make_git_store (Git_interop.import ~sw ~fs ~git_dir) 86 397 87 - let import = Git_interop.import 88 - let init = Git_interop.init 89 - let open_ = Git_interop.open_ 90 - let read_object = Git_interop.read_object 91 - let write_object = Git_interop.write_object 92 - let read_ref = Git_interop.read_ref 93 - let write_ref = Git_interop.write_ref 94 - let list_refs = Git_interop.list_refs 95 - end 398 + let read_object ~sw ~fs ~git_dir (hex : hash) = 399 + match Private.Hash.sha1_of_hex hex with 400 + | Error (`Msg msg) -> Error (`Msg ("invalid SHA-1: " ^ msg)) 401 + | Ok h -> Git_interop.read_object ~sw ~fs ~git_dir h 96 402 97 - (** {1 Pre-instantiated: MST Format} *) 403 + let write_object ~sw ~fs ~git_dir ~typ data = 404 + Private.Hash.to_hex (Git_interop.write_object ~sw ~fs ~git_dir ~typ data) 98 405 99 - module Mst = struct 100 - (** ATProto-compatible store (SHA-256, DAG-CBOR MST). *) 406 + let read_ref ~sw ~fs ~git_dir name = 407 + Option.map Private.Hash.to_hex (Git_interop.read_ref ~sw ~fs ~git_dir name) 408 + 409 + let write_ref ~sw ~fs ~git_dir name (hex : hash) = 410 + match Private.Hash.sha1_of_hex hex with 411 + | Ok h -> Git_interop.write_ref ~sw ~fs ~git_dir name h 412 + | Error _ -> () 101 413 102 - module Tree = Tree.Mst 103 - module Store = Store.Mst 104 - module Subtree = Subtree.Mst 105 - module Proof = Proof.Mst 414 + let list_refs = Git_interop.list_refs 415 + end 106 416 107 - (* After [module Store = Store.Mst], [Store] refers to [Store.Mst] in scope *) 417 + (* ===== Module Mst ===== *) 108 418 109 - let of_pds pds = Store.create ~backend:(Pds_interop.mst_backend pds) 419 + module Mst = struct 420 + let of_pds pds = make_pds_store pds 110 421 111 422 let disk ~sw root = 112 - Store.create ~backend:(Backend.Disk.create_sha256 ~sw root) 423 + make_mst_store 424 + (Private.Store.Mst.create 425 + ~backend:(Private.Backend.Disk.create_sha256 ~sw root)) 113 426 114 - let memory () = Store.create ~backend:(Backend.Memory.create_sha256 ()) 427 + let memory () = 428 + make_mst_store 429 + (Private.Store.Mst.create 430 + ~backend:(Private.Backend.Memory.create_sha256 ())) 115 431 end
+173 -91
lib/irmin.mli
··· 1 - (** Irmin 4.0 - Content-addressable storage for OCaml. 1 + (** Irmin 4.0 - Content-addressed storage for OCaml. 2 2 3 - Irmin provides two APIs: 4 - - {b Link API}: Minimal interface for persisting OCaml values. 5 - - {b Tree API}: Git-compatible version control with paths, commits, and 6 - branches. 3 + {b Quick start — write a file and commit it:} 4 + {[ 5 + let store = Irmin.Git.init ~sw ~fs ~path:(Fpath.v "/tmp/repo") in 6 + let tree = 7 + Irmin.Tree.add (Irmin.empty_tree store) [ "README.md" ] "hello" 8 + in 9 + let h = 10 + Irmin.commit store ~tree ~parents:[] ~message:"init" ~author:"me" 11 + in 12 + Irmin.set_head store ~branch:"main" h 13 + ]} 7 14 8 - Both share a common content-addressable backend. 15 + Irmin exposes a single {!t} store type that wraps Git, ATProto MST, or 16 + in-memory backends. Tree and hash values are unified across backends so that 17 + commands and handlers can be written once and work everywhere. *) 9 18 10 - Architecture: Link → Tree → KV 19 + (** {1 Private: Internal Modules} 11 20 12 - {[ 13 - Link ('a link) → Tree (lazy/staged) → KV/Backend (storage) 14 - ]} 21 + Defined first so that module aliases below capture the internal names before 22 + they are shadowed by the public [Hash] and [Tree] wrappers. 15 23 16 - - {b Link}: Persistent pointers to OCaml values (['a link], [link], [fetch]) 17 - - {b Tree}: Lazy reads, delayed writes (like Git's staging area) 18 - - {b KV}: Raw content-addressed storage by hash 24 + These modules are {b not} covered by the stability guarantee. *) 19 25 20 - The Link API provides a simple way to persist arbitrary OCaml values: 26 + module Private : sig 27 + module Hash = Hash 28 + (** Phantom-typed hashes (SHA-1, SHA-256). *) 21 29 22 - {[ 23 - type tree = node Link.t 24 - and node = Empty | Node of { l : tree; v : int; r : tree } 30 + module Backend = Backend 31 + (** KV backend implementations. *) 32 + 33 + module Codec = Codec 34 + (** Tree-format codec (Git, MST, extensible). *) 35 + 36 + module Tree = Tree 37 + (** Lazy tree implementation and [Make] functor. *) 38 + 39 + module Commit = Commit 40 + (** Commit functor and pre-instantiated commits. *) 41 + 42 + module Store = Store 43 + (** Store functor and pre-instantiated stores. *) 25 44 26 - let leaf s x = 27 - Link.v s (Node { l = Link.v s Empty; v = x; r = Link.v s Empty }) 28 - ]} *) 45 + module Subtree = Subtree 46 + (** Monorepo subtree operations. *) 29 47 30 - (** {1 Link Layer (Persistent Pointers)} 48 + module Proof = Proof 49 + (** Merkle proof functor. *) 31 50 32 - The core abstraction: content-addressed pointers to OCaml values. *) 51 + module Pds_interop = Pds_interop 52 + (** ATProto PDS backend adapter. *) 53 + end 54 + 55 + (** {1 Link API: Persistent Pointers} *) 33 56 34 57 module Link = Link 35 - (** Persistent pointers with [link] and [fetch]. *) 58 + (** Persistent, content-addressed pointers to arbitrary OCaml values. *) 36 59 37 - (** {1 KV Layer (Storage)} 60 + (** {1 Unified Store API} *) 38 61 39 - Content-addressed storage with refs for mutable pointers. *) 62 + type hash 63 + (** An opaque content hash. Use {!Hash} to display or compare hashes. The 64 + underlying representation depends on the backend: SHA-1 hex for Git, SHA-256 65 + hex for MST, or a CID string for ATProto PDS. *) 40 66 41 - module Hash = Hash 42 - (** Phantom-typed hashes (SHA-1, SHA-256). *) 67 + module Hash : sig 68 + val to_hex : hash -> string 69 + (** [to_hex h] returns the full hex (or CID) string for [h]. *) 43 70 44 - module Backend = Backend 45 - (** KV backend implementations (Memory, Git, layered, cached). *) 71 + val of_hex : string -> hash 72 + (** [of_hex s] wraps a hex/CID string as a [hash]. No validation is performed; 73 + passing an invalid string will cause downstream failures. *) 46 74 47 - (** {1 Tree Layer (Staging)} 75 + val equal : hash -> hash -> bool 76 + (** [equal h1 h2] tests hash equality. *) 48 77 49 - Lazy reads, delayed writes. Like Git's index/staging area. Trees are built 50 - on top of links for Merkle tree structures. *) 78 + val pp : Format.formatter -> hash -> unit 79 + (** [pp fmt h] pretty-prints [h]. *) 51 80 52 - module Codec = Codec 53 - (** Format signatures and implementations (Git, MST, extensible). *) 81 + val short : hash -> string 82 + (** [short h] returns the first 7 characters of [to_hex h]. *) 83 + end 54 84 55 - module Tree = Tree 56 - (** Lazy tree with delayed writes. *) 85 + type tree 86 + (** A lazy, in-memory staging area. 57 87 58 - module Commit = Commit 59 - (** Commit operations. *) 88 + Trees are immutable: {!Tree.add}, {!Tree.remove}, and {!Tree.add_tree} 89 + return new trees rather than modifying in place. Writes are accumulated and 90 + flushed to the backend when {!commit} is called. 60 91 61 - (** {1 High-Level API} *) 92 + Create an empty tree with {!empty_tree}; retrieve a committed tree with 93 + {!checkout}. *) 62 94 63 - module Store = Store 64 - (** Store combining trees, commits, and branches. *) 95 + module Tree : sig 96 + val find : tree -> string list -> string option 97 + (** [find t path] looks up contents at [path]. Lazy nodes are loaded on 98 + demand. *) 65 99 66 - module Subtree = Subtree 67 - (** Monorepo subtree operations. *) 100 + val add : tree -> string list -> string -> tree 101 + (** [add t path v] returns a new tree with [v] stored at [path]. *) 68 102 69 - module Proof = Proof 70 - (** Merkle proofs for verified computations. *) 103 + val remove : tree -> string list -> tree 104 + (** [remove t path] returns a new tree with [path] removed. *) 71 105 72 - (** {1 Interoperability} *) 106 + val list : tree -> string list -> (string * [ `Contents | `Node ]) list 107 + (** [list t path] lists the immediate children of [path]. *) 73 108 74 - module Pds_interop = Pds_interop 75 - (** PDS store access via MST. *) 109 + val find_tree : tree -> string list -> tree option 110 + (** [find_tree t path] returns the subtree rooted at [path]. *) 76 111 77 - (** {1 Pre-instantiated: Git Format} *) 112 + val add_tree : tree -> string list -> tree -> tree 113 + (** [add_tree t path sub] grafts [sub] as a subtree at [path]. *) 114 + end 78 115 79 - module Git : sig 80 - (** Git-compatible store (SHA-1, Git object format). *) 116 + type commit_info = { 117 + hash : hash; 118 + author : string; 119 + message : string; 120 + parents : hash list; 121 + } 122 + (** Metadata for a single commit. *) 81 123 82 - module Tree = Tree.Git 83 - module Store : module type of Store.Git 84 - module Subtree = Subtree.Git 85 - module Proof = Proof.Git 124 + type t 125 + (** A content-addressed store backed by Git, MST, or PDS. Construct with 126 + {!Git.init}/{!Git.open_}, {!Mst.memory}/{!Mst.disk}/{!Mst.of_pds}. *) 86 127 87 - (** {2 Store builders} *) 128 + val empty_tree : t -> tree 129 + (** [empty_tree store] creates an empty tree backed by [store]'s backend. *) 88 130 89 - val import : 90 - sw:Eio.Switch.t -> fs:Eio.Fs.dir_ty Eio.Path.t -> git_dir:Fpath.t -> Store.t 91 - (** [import ~sw ~fs ~git_dir] opens a bare .git directory as a store. *) 131 + val checkout : t -> branch:string -> tree option 132 + (** [checkout store ~branch] returns the working tree at the head of [branch], 133 + or [None] if the branch does not exist or is empty. *) 92 134 93 - val open_ : 94 - sw:Eio.Switch.t -> fs:Eio.Fs.dir_ty Eio.Path.t -> path:Fpath.t -> Store.t 95 - (** [open_ ~sw ~fs ~path] opens an existing Git repository at [path]. *) 135 + val head : t -> branch:string -> hash option 136 + (** [head store ~branch] returns the head commit hash of [branch]. *) 96 137 97 - val init : 98 - sw:Eio.Switch.t -> fs:Eio.Fs.dir_ty Eio.Path.t -> path:Fpath.t -> Store.t 138 + val set_head : t -> branch:string -> hash -> unit 139 + (** [set_head store ~branch h] advances [branch] to point at [h]. *) 140 + 141 + val branches : t -> string list 142 + (** [branches store] lists all branch names. *) 143 + 144 + val commit : 145 + t -> tree:tree -> parents:hash list -> message:string -> author:string -> hash 146 + (** [commit store ~tree ~parents ~message ~author] flushes [tree] to the 147 + backend, writes a commit object, and returns its hash. Does {b not} update 148 + any branch; call {!set_head} afterwards to advance a branch. *) 149 + 150 + val log : t -> branch:string -> limit:int option -> commit_info list 151 + (** [log store ~branch ~limit] returns the commit history starting from the head 152 + of [branch], in reverse chronological order. [limit:None] means no limit. *) 153 + 154 + val read_commit : t -> hash -> commit_info option 155 + (** [read_commit store h] reads commit metadata. Returns [None] if [h] is not a 156 + known commit hash. *) 157 + 158 + val update_branch : t -> branch:string -> old:hash option -> new_:hash -> bool 159 + (** [update_branch store ~branch ~old ~new_] atomically updates [branch] to 160 + [new_] only if its current head equals [old]. Returns [true] on success. *) 161 + 162 + val is_ancestor : t -> ancestor:hash -> descendant:hash -> bool 163 + (** [is_ancestor store ~ancestor ~descendant] checks whether [ancestor] is 164 + reachable from [descendant] by following parent links. *) 165 + 166 + val merge_base : t -> hash -> hash -> hash option 167 + (** [merge_base store h1 h2] finds the most recent common ancestor of [h1] and 168 + [h2]. Returns [None] if the histories are disjoint. *) 169 + 170 + (** {1 Git-Format Store} 171 + 172 + SHA-1 hashes, Git object format. Repositories are readable with standard Git 173 + tooling. *) 174 + 175 + module Git : sig 176 + (** {2 Construction} *) 177 + 178 + val init : sw:Eio.Switch.t -> fs:Eio.Fs.dir_ty Eio.Path.t -> path:Fpath.t -> t 99 179 (** [init ~sw ~fs ~path] initializes a new Git repository at [path]. *) 100 180 101 - (** {2 Low-level object and ref access} *) 181 + val open_ : 182 + sw:Eio.Switch.t -> fs:Eio.Fs.dir_ty Eio.Path.t -> path:Fpath.t -> t 183 + (** [open_ ~sw ~fs ~path] opens an existing Git repository. *) 184 + 185 + val import : 186 + sw:Eio.Switch.t -> fs:Eio.Fs.dir_ty Eio.Path.t -> git_dir:Fpath.t -> t 187 + (** [import ~sw ~fs ~git_dir] opens a bare [.git] directory. *) 188 + 189 + (** {2 Low-Level Object and Ref Access} 190 + 191 + Direct plumbing access. Prefer the high-level store operations above. *) 102 192 103 193 val read_object : 104 194 sw:Eio.Switch.t -> 105 195 fs:Eio.Fs.dir_ty Eio.Path.t -> 106 196 git_dir:Fpath.t -> 107 - Hash.sha1 -> 197 + hash -> 108 198 (string * string, [> `Msg of string ]) result 109 199 (** [read_object ~sw ~fs ~git_dir hash] returns [(kind, data)] for a loose 110 200 object. [kind] is ["blob"], ["tree"], ["commit"], or ["tag"]. *) ··· 115 205 git_dir:Fpath.t -> 116 206 typ:string -> 117 207 string -> 118 - Hash.sha1 208 + hash 119 209 (** [write_object ~sw ~fs ~git_dir ~typ data] writes a loose object. *) 120 210 121 211 val read_ref : ··· 123 213 fs:Eio.Fs.dir_ty Eio.Path.t -> 124 214 git_dir:Fpath.t -> 125 215 string -> 126 - Hash.sha1 option 127 - (** [read_ref ~sw ~fs ~git_dir name] reads a Git reference. *) 216 + hash option 217 + (** [read_ref ~sw ~fs ~git_dir name] reads the hash at a ref. *) 128 218 129 219 val write_ref : 130 220 sw:Eio.Switch.t -> 131 221 fs:Eio.Fs.dir_ty Eio.Path.t -> 132 222 git_dir:Fpath.t -> 133 223 string -> 134 - Hash.sha1 -> 224 + hash -> 135 225 unit 136 - (** [write_ref ~sw ~fs ~git_dir name hash] writes a Git reference. *) 226 + (** [write_ref ~sw ~fs ~git_dir name hash] writes a ref. *) 137 227 138 228 val list_refs : 139 229 sw:Eio.Switch.t -> 140 230 fs:Eio.Fs.dir_ty Eio.Path.t -> 141 231 git_dir:Fpath.t -> 142 232 string list 143 - (** [list_refs ~sw ~fs ~git_dir] lists all references. *) 233 + (** [list_refs ~sw ~fs ~git_dir] lists all refs. *) 144 234 end 145 235 146 - (** {1 Pre-instantiated: MST Format} *) 236 + (** {1 MST-Format Store} 237 + 238 + SHA-256 hashes, DAG-CBOR MST. Compatible with the ATProto protocol 239 + (Bluesky). *) 147 240 148 241 module Mst : sig 149 - (** ATProto-compatible store (SHA-256, DAG-CBOR MST). 150 - 151 - Three backends, one [Store.t] type. Pick the backend at construction; use 152 - the flat [Store.Mst.*] / [Tree.Mst.*] API for everything after. *) 242 + val of_pds : Pds.t -> t 243 + (** [of_pds pds] creates a store backed by an ATProto PDS (SQLite). HEAD 244 + points directly to the MST root CID (no Irmin commit wrapper), so the 245 + store is readable by ATProto tooling. *) 153 246 154 - module Tree = Tree.Mst 155 - module Store : module type of Store.Mst 156 - module Subtree = Subtree.Mst 157 - module Proof = Proof.Mst 158 - 159 - val of_pds : Pds.t -> Store.t 160 - (** [of_pds pds] creates a store backed by an ATProto PDS (SQLite). Interops 161 - with the bsky/PDS CLI: [HEAD]/[refs/heads/main] map to 162 - [Pds.head]/[Pds.set_head], so commit CIDs are visible to PDS tools. *) 163 - 164 - val disk : sw:Eio.Switch.t -> Eio.Fs.dir_ty Eio.Path.t -> Store.t 247 + val disk : sw:Eio.Switch.t -> Eio.Fs.dir_ty Eio.Path.t -> t 165 248 (** [disk ~sw root] creates a store backed by the append-only disk backend 166 - (WAL + bloom filter). Not git-compatible. Suitable for high-throughput MST 167 - workloads ("lavyek" backend). *) 249 + (WAL + bloom filter). High throughput; not Git-compatible. *) 168 250 169 - val memory : unit -> Store.t 251 + val memory : unit -> t 170 252 (** [memory ()] creates a transient in-memory store. Useful for testing. *) 171 253 end
+2
lib/store.ml
··· 41 41 42 42 let head t ~branch = t.backend.get_ref ("refs/heads/" ^ branch) 43 43 let set_head t ~branch h = t.backend.set_ref ("refs/heads/" ^ branch) h 44 + let hash_to_hex h = F.hash_to_hex h 45 + let hash_equal h1 h2 = F.hash_equal h1 h2 44 46 45 47 let branches t = 46 48 t.backend.list_refs ()
+8
lib/store.mli
··· 75 75 val diff : t -> old:hash -> new_:hash -> diff_entry Seq.t 76 76 (** [diff t ~old ~new_] computes the difference between two trees. *) 77 77 78 + (** {2 Hash Utilities} *) 79 + 80 + val hash_to_hex : hash -> string 81 + (** [hash_to_hex h] returns the hexadecimal string for [h]. *) 82 + 83 + val hash_equal : hash -> hash -> bool 84 + (** [hash_equal h1 h2] tests hash equality. *) 85 + 78 86 (** {2 Low-level} *) 79 87 80 88 val backend : t -> hash Backend.t
+32 -26
lib/tree.ml
··· 236 236 let h = F.hash_contents s in 237 237 backend.write h s; 238 238 h 239 - | Node node -> 240 - (* First, get the base node *) 241 - let base = 242 - match force_node node.state with Some n -> n | None -> F.empty_node 243 - in 244 - (* Apply removals *) 245 - let base = 246 - List.fold_left (fun n name -> F.remove n name) base node.removed 247 - in 248 - (* Apply additions (recursively writing children) *) 249 - let final = 250 - List.fold_left 251 - (fun n (name, child) -> 252 - let child_hash = write_tree child ~backend in 253 - let kind = 254 - match child with 255 - | Contents _ -> `Contents child_hash 256 - | Node _ -> `Node child_hash 257 - in 258 - F.add n name kind) 259 - base node.children 260 - in 261 - let data = F.bytes_of_node final in 262 - let h = F.hash_node final in 263 - backend.write h data; 264 - h 239 + | Node node -> ( 240 + (* Shallow node with no pending changes: already in backend. *) 241 + match node.state with 242 + | Shallow h when node.children = [] && node.removed = [] -> h 243 + | _ -> 244 + (* First, get the base node *) 245 + let base = 246 + match force_node node.state with 247 + | Some n -> n 248 + | None -> F.empty_node 249 + in 250 + (* Apply removals *) 251 + let base = 252 + List.fold_left (fun n name -> F.remove n name) base node.removed 253 + in 254 + (* Apply additions (recursively writing children) *) 255 + let final = 256 + List.fold_left 257 + (fun n (name, child) -> 258 + let child_hash = write_tree child ~backend in 259 + let kind = 260 + match child with 261 + | Contents _ -> `Contents child_hash 262 + | Node _ -> `Node child_hash 263 + in 264 + F.add n name kind) 265 + base node.children 266 + in 267 + let data = F.bytes_of_node final in 268 + let h = F.hash_node final in 269 + backend.write h data; 270 + h) 265 271 266 272 let hash t ~backend = write_tree t ~backend 267 273
+1
test/mst_proof/mst_proof.ml
··· 1 1 (** Demonstrate MST proof generation and verification. *) 2 2 3 3 open Irmin 4 + open Private 4 5 5 6 let () = 6 7 (* Create in-memory backend *)
+1
test/test_backend.ml
··· 1 1 open Irmin 2 + open Private 2 3 3 4 let rec rm_rf path = 4 5 if Eio.Path.is_directory path then begin
+1
test/test_codec.ml
··· 1 1 open Irmin 2 + open Private 2 3 3 4 let test_git_tree_format () = 4 5 let node = Codec.Git.empty_node in
+1
test/test_commit.ml
··· 1 1 open Irmin 2 + open Private 2 3 3 4 let test_commit_fields () = 4 5 let tree_hash = Hash.sha1 "tree content" in
+22 -20
test/test_git_interop.ml
··· 1 1 open Irmin 2 + open Private 2 3 3 4 let rec rm_rf path = 4 5 if Eio.Path.is_directory path then begin ··· 47 48 let hash = Git.write_object ~sw ~fs ~git_dir ~typ:"blob" "content" in 48 49 Git.write_ref ~sw ~fs ~git_dir "refs/heads/test" hash; 49 50 match Git.read_ref ~sw ~fs ~git_dir "refs/heads/test" with 50 - | Some h -> Alcotest.(check bool) "ref matches" true (Hash.equal hash h) 51 + | Some h -> Alcotest.(check bool) "ref matches" true (Irmin.Hash.equal hash h) 51 52 | None -> Alcotest.fail "ref not found" 52 53 53 54 (* Regression: Repository.init used mkdir (non-recursive), failing when parent ··· 67 68 let test_write_duplicate_object () = 68 69 with_temp_dir @@ fun ~sw ~fs tmp_path -> 69 70 let fpath = Fpath.v (Eio.Path.native_exn tmp_path) in 70 - let store = Git.init ~sw ~fs ~path:fpath in 71 - let backend = Store.Git.backend store in 72 - let data = "hello world" in 73 - let h = Codec.Git.hash_contents data in 74 - backend.Backend.write h data; 75 - (* Second write of the same hash must not raise *) 76 - backend.Backend.write h data; 77 - Alcotest.(check bool) 78 - "object exists after double write" true (backend.Backend.exists h) 71 + let store = Irmin.Git.init ~sw ~fs ~path:fpath in 72 + let tree = 73 + Irmin.Tree.add (Irmin.empty_tree store) [ "file.txt" ] "hello world" 74 + in 75 + let _ = 76 + Irmin.commit store ~tree ~parents:[] ~message:"first" ~author:"test" 77 + in 78 + (* Committing the same tree again triggers duplicate backend writes — must not raise *) 79 + let _ = 80 + Irmin.commit store ~tree ~parents:[] ~message:"again" ~author:"test" 81 + in 82 + Alcotest.(check bool) "double commit did not raise" true true 79 83 80 84 (* Integration: write commits to disk, reopen, read back. *) 81 85 let test_store_git_roundtrip () = 82 86 with_temp_dir @@ fun ~sw ~fs tmp_path -> 83 87 let fpath = Fpath.v (Eio.Path.native_exn tmp_path) in 84 - let store = Git.init ~sw ~fs ~path:fpath in 88 + let store = Irmin.Git.init ~sw ~fs ~path:fpath in 85 89 let tree = 86 - Store.Git.Tree.add (Store.Git.Tree.empty ()) [ "README.md" ] "# Hello" 90 + Irmin.Tree.add (Irmin.empty_tree store) [ "README.md" ] "# Hello" 87 91 in 88 - let h = 89 - Store.Git.commit store ~tree ~parents:[] ~message:"init" ~author:"test" 90 - in 91 - Store.Git.set_head store ~branch:"main" h; 92 - let store2 = Git.open_ ~sw ~fs ~path:fpath in 92 + let h = Irmin.commit store ~tree ~parents:[] ~message:"init" ~author:"test" in 93 + Irmin.set_head store ~branch:"main" h; 94 + let store2 = Irmin.Git.open_ ~sw ~fs ~path:fpath in 93 95 Alcotest.(check bool) 94 96 "head survived reopen" true 95 - (Store.Git.head store2 ~branch:"main" = Some h); 96 - match Store.Git.checkout store2 ~branch:"main" with 97 + (Irmin.head store2 ~branch:"main" = Some h); 98 + match Irmin.checkout store2 ~branch:"main" with 97 99 | None -> Alcotest.fail "checkout failed" 98 100 | Some tree2 -> 99 101 Alcotest.(check (option string)) 100 102 "content survived reopen" (Some "# Hello") 101 - (Store.Git.Tree.find tree2 [ "README.md" ]) 103 + (Irmin.Tree.find tree2 [ "README.md" ]) 102 104 103 105 let suite = 104 106 ( "git_interop",
+1
test/test_hash.ml
··· 1 1 open Irmin 2 + open Private 2 3 3 4 let test_sha1_hash () = 4 5 let h = Hash.sha1 "hello" in
+1
test/test_link.ml
··· 1 1 open Irmin 2 + open Private 2 3 3 4 let test_link_v_get () = 4 5 let s = Link.Mst.v () in
+1
test/test_pds_interop.ml
··· 5 5 backend uses), and vice versa. *) 6 6 7 7 open Irmin 8 + open Private 8 9 9 10 let test_did = Atp.Did.of_string_exn "did:web:example.com" 10 11
+1
test/test_proof.ml
··· 1 1 open Irmin 2 + open Private 2 3 3 4 let test_proof_produce_verify () = 4 5 let backend = Backend.Memory.create_sha1 () in
+1
test/test_store.ml
··· 1 1 open Irmin 2 + open Private 2 3 3 4 let test_store_commit () = 4 5 let backend = Backend.Memory.create_sha1 () in
+1
test/test_subtree.ml
··· 1 1 open Irmin 2 + open Private 2 3 3 4 let test_split () = 4 5 let backend = Backend.Memory.create_sha1 () in
+1
test/test_tree.ml
··· 1 1 open Irmin 2 + open Private 2 3 3 4 let test_empty_tree () = 4 5 let tree = Tree.Git.empty () in