···11+(lang dune 3.17)
22+(name irmin)
33+(version 2.0.0~dev)
44+55+(generate_opam_files true)
66+77+(license ISC)
88+(authors "Thomas Gazagnaire <thomas@gazagnaire.org>")
99+(maintainers "Thomas Gazagnaire <thomas@gazagnaire.org>")
1010+(source
1111+ (uri https://tangled.org/gazagnaire.org/irmin))
1212+1313+(package
1414+ (name irmin)
1515+ (synopsis "Content-addressable store with Git and ATProto MST support")
1616+ (description
1717+ "Irmin is a library for building content-addressable stores with lazy reads,
1818+delayed writes, and multiple tree formats (Git, ATProto MST). It provides
1919+bidirectional Git compatibility and first-class subtree operations.")
2020+ (depends
2121+ (ocaml (>= 5.2))
2222+ (dune (>= 3.17))
2323+ (eio (>= 1.2))
2424+ (digestif (>= 1.2))
2525+ (fmt (>= 0.9))
2626+ (logs (>= 0.7))
2727+ (git (>= 0.1))
2828+ (atp (>= 0.1))
2929+ (alcotest :with-test)
3030+ (crowbar :with-test))))
+37
irmin.opam
···11+# This file is generated by dune, edit dune-project instead
22+opam-version: "2.0"
33+version: "2.0.0~dev"
44+synopsis: "Content-addressable store with Git and ATProto MST support"
55+description: """
66+Irmin is a library for building content-addressable stores with lazy reads,
77+delayed writes, and multiple tree formats (Git, ATProto MST). It provides
88+bidirectional Git compatibility and first-class subtree operations."""
99+maintainer: ["Thomas Gazagnaire <thomas@gazagnaire.org>"]
1010+authors: ["Thomas Gazagnaire <thomas@gazagnaire.org>"]
1111+license: "ISC"
1212+depends: [
1313+ "ocaml" {>= "5.2"}
1414+ "dune" {>= "3.17" & >= "3.17"}
1515+ "eio" {>= "1.2"}
1616+ "digestif" {>= "1.2"}
1717+ "fmt" {>= "0.9"}
1818+ "logs" {>= "0.7"}
1919+ "alcotest" {with-test}
2020+ "crowbar" {with-test}
2121+ "odoc" {with-doc}
2222+]
2323+build: [
2424+ ["dune" "subst"] {dev}
2525+ [
2626+ "dune"
2727+ "build"
2828+ "-p"
2929+ name
3030+ "-j"
3131+ jobs
3232+ "@install"
3333+ "@runtest" {with-test}
3434+ "@doc" {with-doc}
3535+ ]
3636+]
3737+dev-repo: "https://tangled.org/gazagnaire.org/irmin"
+171
lib/backend.ml
···11+type 'hash t = {
22+ read : 'hash -> string option;
33+ write : string -> 'hash;
44+ exists : 'hash -> bool;
55+ get_ref : string -> 'hash option;
66+ set_ref : string -> 'hash -> unit;
77+ test_and_set_ref : string -> test:'hash option -> set:'hash option -> bool;
88+ list_refs : unit -> string list;
99+ write_batch : string list -> 'hash list;
1010+ flush : unit -> unit;
1111+ close : unit -> unit;
1212+}
1313+1414+type stats = {
1515+ reads : int;
1616+ writes : int;
1717+ cache_hits : int;
1818+ cache_misses : int;
1919+}
2020+2121+module Memory = struct
2222+ module StringMap = Map.Make (String)
2323+2424+ type 'hash state = {
2525+ mutable objects : string StringMap.t;
2626+ mutable refs : 'hash StringMap.t;
2727+ hash_fn : string -> 'hash;
2828+ to_hex : 'hash -> string;
2929+ equal : 'hash -> 'hash -> bool;
3030+ }
3131+3232+ let create_with_hash (type h) (hash_fn : string -> h) (to_hex : h -> string)
3333+ (equal : h -> h -> bool) : h t =
3434+ let state =
3535+ { objects = StringMap.empty; refs = StringMap.empty; hash_fn; to_hex; equal }
3636+ in
3737+ {
3838+ read =
3939+ (fun h ->
4040+ let key = state.to_hex h in
4141+ StringMap.find_opt key state.objects);
4242+ write =
4343+ (fun data ->
4444+ let h = state.hash_fn data in
4545+ let key = state.to_hex h in
4646+ state.objects <- StringMap.add key data state.objects;
4747+ h);
4848+ exists =
4949+ (fun h ->
5050+ let key = state.to_hex h in
5151+ StringMap.mem key state.objects);
5252+ get_ref = (fun name -> StringMap.find_opt name state.refs);
5353+ set_ref =
5454+ (fun name hash -> state.refs <- StringMap.add name hash state.refs);
5555+ test_and_set_ref =
5656+ (fun name ~test ~set ->
5757+ let current = StringMap.find_opt name state.refs in
5858+ let matches =
5959+ match (test, current) with
6060+ | None, None -> true
6161+ | Some t, Some c -> state.equal t c
6262+ | _ -> false
6363+ in
6464+ if matches then (
6565+ (match set with
6666+ | None -> state.refs <- StringMap.remove name state.refs
6767+ | Some h -> state.refs <- StringMap.add name h state.refs);
6868+ true)
6969+ else false);
7070+ list_refs = (fun () -> StringMap.bindings state.refs |> List.map fst);
7171+ write_batch =
7272+ (fun objects ->
7373+ List.map
7474+ (fun data ->
7575+ let h = state.hash_fn data in
7676+ let key = state.to_hex h in
7777+ state.objects <- StringMap.add key data state.objects;
7878+ h)
7979+ objects);
8080+ flush = (fun () -> ());
8181+ close = (fun () -> ());
8282+ }
8383+8484+ let create_sha1 () = create_with_hash Hash.sha1 Hash.to_hex Hash.equal
8585+ let create_sha256 () = create_with_hash Hash.sha256 Hash.to_hex Hash.equal
8686+end
8787+8888+(* Simple LRU cache *)
8989+module Lru = struct
9090+ type ('k, 'v) t = {
9191+ capacity : int;
9292+ mutable items : ('k * 'v) list;
9393+ }
9494+9595+ let create capacity = { capacity; items = [] }
9696+9797+ let find t key =
9898+ match List.assoc_opt key t.items with
9999+ | Some v ->
100100+ (* Move to front *)
101101+ t.items <- (key, v) :: List.remove_assoc key t.items;
102102+ Some v
103103+ | None -> None
104104+105105+ let add t key value =
106106+ t.items <- (key, value) :: List.remove_assoc key t.items;
107107+ if List.length t.items > t.capacity then
108108+ t.items <- List.rev (List.tl (List.rev t.items))
109109+end
110110+111111+let cached (type h) (backend : h t) : h t =
112112+ let module H = struct
113113+ type t = h
114114+115115+ let hash = Hashtbl.hash
116116+ let equal a b = backend.read a = backend.read b
117117+ end in
118118+ let cache : (h, string) Lru.t = Lru.create 1000 in
119119+ {
120120+ backend with
121121+ read =
122122+ (fun h ->
123123+ match Lru.find cache h with
124124+ | Some v -> Some v
125125+ | None ->
126126+ let result = backend.read h in
127127+ Option.iter (fun v -> Lru.add cache h v) result;
128128+ result);
129129+ }
130130+131131+let readonly (backend : 'h t) : 'h t =
132132+ let fail () = invalid_arg "Backend is read-only" in
133133+ {
134134+ backend with
135135+ write = (fun _ -> fail ());
136136+ set_ref = (fun _ _ -> fail ());
137137+ test_and_set_ref = (fun _ ~test:_ ~set:_ -> fail ());
138138+ write_batch = (fun _ -> fail ());
139139+ }
140140+141141+let layered ~(upper : 'h t) ~(lower : 'h t) : 'h t =
142142+ {
143143+ read =
144144+ (fun h ->
145145+ match upper.read h with Some v -> Some v | None -> lower.read h);
146146+ write = upper.write;
147147+ exists = (fun h -> upper.exists h || lower.exists h);
148148+ get_ref =
149149+ (fun name ->
150150+ match upper.get_ref name with
151151+ | Some v -> Some v
152152+ | None -> lower.get_ref name);
153153+ set_ref = upper.set_ref;
154154+ test_and_set_ref = upper.test_and_set_ref;
155155+ list_refs =
156156+ (fun () ->
157157+ let upper_refs = upper.list_refs () in
158158+ let lower_refs = lower.list_refs () in
159159+ List.sort_uniq String.compare (upper_refs @ lower_refs));
160160+ write_batch = upper.write_batch;
161161+ flush =
162162+ (fun () ->
163163+ upper.flush ();
164164+ lower.flush ());
165165+ close =
166166+ (fun () ->
167167+ upper.close ();
168168+ lower.close ());
169169+ }
170170+171171+let stats _ = None
+68
lib/backend.mli
···11+(** Storage backends for Irmin.
22+33+ Backends are records of functions, NOT functors. This makes them
44+ composable and easy to create without functor application. *)
55+66+(** {1 Backend Interface} *)
77+88+type 'hash t = {
99+ read : 'hash -> string option;
1010+ (** [read hash] retrieves the object with the given hash. *)
1111+ write : string -> 'hash;
1212+ (** [write data] stores data and returns its hash. *)
1313+ exists : 'hash -> bool;
1414+ (** [exists hash] checks if an object exists. *)
1515+ get_ref : string -> 'hash option;
1616+ (** [get_ref name] reads a reference (branch/tag). *)
1717+ set_ref : string -> 'hash -> unit;
1818+ (** [set_ref name hash] sets a reference. *)
1919+ test_and_set_ref : string -> test:'hash option -> set:'hash option -> bool;
2020+ (** [test_and_set_ref name ~test ~set] atomically updates a reference
2121+ if its current value matches [test]. *)
2222+ list_refs : unit -> string list;
2323+ (** [list_refs ()] returns all reference names. *)
2424+ write_batch : string list -> 'hash list;
2525+ (** [write_batch objects] writes multiple objects efficiently. *)
2626+ flush : unit -> unit;
2727+ (** [flush ()] ensures all writes are persisted. *)
2828+ close : unit -> unit;
2929+ (** [close ()] releases resources. *)
3030+}
3131+3232+(** {1 Memory Backend} *)
3333+3434+module Memory : sig
3535+ val create_sha1 : unit -> Hash.sha1 t
3636+ (** Create an in-memory SHA-1 backend. *)
3737+3838+ val create_sha256 : unit -> Hash.sha256 t
3939+ (** Create an in-memory SHA-256 backend. *)
4040+end
4141+4242+(** {1 Backend Combinators} *)
4343+4444+val cached : 'h t -> 'h t
4545+(** [cached backend] wraps a backend with an LRU cache.
4646+ Reads are served from cache when possible. *)
4747+4848+val readonly : 'h t -> 'h t
4949+(** [readonly backend] makes a backend read-only.
5050+ Write operations raise [Invalid_argument]. *)
5151+5252+val layered : upper:'h t -> lower:'h t -> 'h t
5353+(** [layered ~upper ~lower] creates a layered backend.
5454+ Reads check upper first, then lower.
5555+ Writes go to upper only.
5656+ Used for garbage collection (upper=live, lower=frozen). *)
5757+5858+(** {1 Statistics} *)
5959+6060+type stats = {
6161+ reads : int;
6262+ writes : int;
6363+ cache_hits : int;
6464+ cache_misses : int;
6565+}
6666+6767+val stats : _ t -> stats option
6868+(** [stats backend] returns statistics if the backend tracks them. *)
+111
lib/commit.ml
···11+module Make (F : Tree_format.S) = struct
22+ type hash = F.hash
33+44+ type t = {
55+ tree : hash;
66+ parents : hash list;
77+ author : string;
88+ committer : string;
99+ message : string;
1010+ timestamp : int64;
1111+ }
1212+1313+ let tree c = c.tree
1414+ let parents c = c.parents
1515+ let author c = c.author
1616+ let committer c = c.committer
1717+ let message c = c.message
1818+ let timestamp c = c.timestamp
1919+2020+ let v ~tree ~parents ~author ?(committer = author)
2121+ ?(timestamp = Int64.of_float (Unix.gettimeofday ())) ~message () =
2222+ { tree; parents; author; committer; message; timestamp }
2323+2424+ (* Git commit format:
2525+ tree <sha1>
2626+ parent <sha1> (zero or more)
2727+ author <name> <email> <timestamp> <tz>
2828+ committer <name> <email> <timestamp> <tz>
2929+3030+ <message> *)
3131+ let to_bytes c =
3232+ let buf = Buffer.create 256 in
3333+ Buffer.add_string buf "tree ";
3434+ Buffer.add_string buf (Hash.to_hex c.tree);
3535+ Buffer.add_char buf '\n';
3636+ List.iter
3737+ (fun parent ->
3838+ Buffer.add_string buf "parent ";
3939+ Buffer.add_string buf (Hash.to_hex parent);
4040+ Buffer.add_char buf '\n')
4141+ c.parents;
4242+ Buffer.add_string buf "author ";
4343+ Buffer.add_string buf c.author;
4444+ Buffer.add_string buf " ";
4545+ Buffer.add_string buf (Int64.to_string c.timestamp);
4646+ Buffer.add_string buf " +0000\n";
4747+ Buffer.add_string buf "committer ";
4848+ Buffer.add_string buf c.committer;
4949+ Buffer.add_string buf " ";
5050+ Buffer.add_string buf (Int64.to_string c.timestamp);
5151+ Buffer.add_string buf " +0000\n";
5252+ Buffer.add_char buf '\n';
5353+ Buffer.add_string buf c.message;
5454+ Buffer.contents buf
5555+5656+ let hash c =
5757+ let data = to_bytes c in
5858+ let header = Printf.sprintf "commit %d\x00" (String.length data) in
5959+ F.hash_contents (header ^ data)
6060+6161+ let parse_hex_hash s =
6262+ (* This is simplified - real implementation would use proper parsing *)
6363+ match Hash.sha1_of_hex s with
6464+ | Ok h -> Ok (Obj.magic h : hash)
6565+ | Error e -> Error e
6666+6767+ let of_bytes data =
6868+ (* Simplified parser - real implementation would be more robust *)
6969+ let lines = String.split_on_char '\n' data in
7070+ let rec parse_headers lines tree parents author committer =
7171+ match lines with
7272+ | [] -> Error (`Msg "unexpected end of commit")
7373+ | "" :: rest ->
7474+ (* Empty line marks start of message *)
7575+ let message = String.concat "\n" rest in
7676+ (match tree with
7777+ | None -> Error (`Msg "missing tree")
7878+ | Some tree ->
7979+ Ok
8080+ {
8181+ tree;
8282+ parents = List.rev parents;
8383+ author = Option.value author ~default:"unknown";
8484+ committer = Option.value committer ~default:"unknown";
8585+ message;
8686+ timestamp = 0L;
8787+ })
8888+ | line :: rest ->
8989+ if String.length line >= 5 && String.sub line 0 5 = "tree " then
9090+ let hex = String.sub line 5 (String.length line - 5) in
9191+ match parse_hex_hash hex with
9292+ | Ok h -> parse_headers rest (Some h) parents author committer
9393+ | Error _ as e -> e
9494+ else if String.length line >= 7 && String.sub line 0 7 = "parent " then
9595+ let hex = String.sub line 7 (String.length line - 7) in
9696+ match parse_hex_hash hex with
9797+ | Ok h -> parse_headers rest tree (h :: parents) author committer
9898+ | Error _ as e -> e
9999+ else if String.length line >= 7 && String.sub line 0 7 = "author " then
100100+ let author_str = String.sub line 7 (String.length line - 7) in
101101+ parse_headers rest tree parents (Some author_str) committer
102102+ else if String.length line >= 10 && String.sub line 0 10 = "committer " then
103103+ let committer_str = String.sub line 10 (String.length line - 10) in
104104+ parse_headers rest tree parents author (Some committer_str)
105105+ else parse_headers rest tree parents author committer
106106+ in
107107+ parse_headers lines None [] None None
108108+end
109109+110110+module Git = Make (Tree_format.Git)
111111+module Mst = Make (Tree_format.Mst)
+65
lib/commit.mli
···11+(** Commit objects.
22+33+ Commits point to a tree and have metadata like author, message, timestamp. *)
44+55+(** {1 Commit Functor} *)
66+77+module Make (F : Tree_format.S) : sig
88+ type t
99+ (** A commit object. *)
1010+1111+ type hash = F.hash
1212+1313+ (** {2 Commit Fields} *)
1414+1515+ val tree : t -> hash
1616+ (** [tree c] returns the root tree hash. *)
1717+1818+ val parents : t -> hash list
1919+ (** [parents c] returns parent commit hashes. *)
2020+2121+ val author : t -> string
2222+ (** [author c] returns the author string. *)
2323+2424+ val committer : t -> string
2525+ (** [committer c] returns the committer string. *)
2626+2727+ val message : t -> string
2828+ (** [message c] returns the commit message. *)
2929+3030+ val timestamp : t -> int64
3131+ (** [timestamp c] returns the commit timestamp (Unix epoch). *)
3232+3333+ (** {2 Construction} *)
3434+3535+ val v :
3636+ tree:hash ->
3737+ parents:hash list ->
3838+ author:string ->
3939+ ?committer:string ->
4040+ ?timestamp:int64 ->
4141+ message:string ->
4242+ unit ->
4343+ t
4444+ (** [v ~tree ~parents ~author ?committer ?timestamp ~message ()]
4545+ creates a new commit. *)
4646+4747+ (** {2 Serialization} *)
4848+4949+ val hash : t -> hash
5050+ (** [hash c] computes the commit hash. *)
5151+5252+ val of_bytes : string -> (t, [> `Msg of string ]) result
5353+ (** [of_bytes data] deserializes a commit. *)
5454+5555+ val to_bytes : t -> string
5656+ (** [to_bytes c] serializes a commit. *)
5757+end
5858+5959+(** {1 Pre-instantiated Commits} *)
6060+6161+module Git : module type of Make (Tree_format.Git)
6262+(** Git-format commits with SHA-1 hashes. *)
6363+6464+module Mst : module type of Make (Tree_format.Mst)
6565+(** MST-format commits with SHA-256 hashes. *)
···11+(** Git interoperability using ocaml-git.
22+33+ Provides bidirectional support for reading and writing Git repositories. *)
44+55+(* Convert between irmin Hash.sha1 and Git.Hash.t *)
66+let git_hash_of_sha1 (h : Hash.sha1) : Git.Hash.t =
77+ Git.Hash.of_raw_string (Hash.to_bytes h)
88+99+let sha1_of_git_hash (h : Git.Hash.t) : Hash.sha1 =
1010+ Hash.sha1_of_bytes (Git.Hash.to_raw_string h)
1111+1212+(* Loose object path: .git/objects/ab/cdef... *)
1313+let loose_object_path git_dir hash =
1414+ let hex = Git.Hash.to_hex hash in
1515+ let dir = String.sub hex 0 2 in
1616+ let file = String.sub hex 2 (String.length hex - 2) in
1717+ Filename.concat git_dir (Filename.concat "objects" (Filename.concat dir file))
1818+1919+(* Read loose object - returns raw content without header *)
2020+let read_loose_object ~fs git_dir hash =
2121+ let path = loose_object_path git_dir hash in
2222+ let full_path = Eio.Path.(fs / path) in
2323+ try
2424+ let data = Eio.Path.load full_path in
2525+ (* TODO: Add zlib decompression *)
2626+ Git.Value.of_string_with_header data
2727+ with _ -> Error (`Msg "object not found")
2828+2929+(* Write loose object *)
3030+let write_loose_object ~fs git_dir (value : Git.Value.t) =
3131+ let hash = Git.Value.digest value in
3232+ let path = loose_object_path git_dir hash in
3333+ let full_path = Eio.Path.(fs / path) in
3434+3535+ (* Create directory if needed *)
3636+ let dir = Filename.dirname path in
3737+ let dir_path = Eio.Path.(fs / dir) in
3838+ (try Eio.Path.mkdir ~perm:0o755 dir_path with _ -> ());
3939+4040+ (* Write object with header *)
4141+ let data = Git.Value.to_string value in
4242+ (* TODO: Add zlib compression *)
4343+ Eio.Path.save ~create:(`Or_truncate 0o444) full_path data;
4444+ hash
4545+4646+(* Read reference *)
4747+let read_ref ~fs ~git_dir name =
4848+ let path = Filename.concat git_dir name in
4949+ let full_path = Eio.Path.(fs / path) in
5050+ try
5151+ let content = Eio.Path.load full_path in
5252+ let content = String.trim content in
5353+ (* Check for symbolic ref *)
5454+ if String.length content > 5 && String.sub content 0 5 = "ref: " then
5555+ let target = String.sub content 5 (String.length content - 5) in
5656+ let target_path = Eio.Path.(fs / Filename.concat git_dir target) in
5757+ try
5858+ let target_content = Eio.Path.load target_path in
5959+ Some (Git.Hash.of_hex (String.trim target_content))
6060+ with _ -> None
6161+ else Some (Git.Hash.of_hex content)
6262+ with _ -> None
6363+6464+(* Write reference *)
6565+let write_ref ~fs ~git_dir name hash =
6666+ let path = Filename.concat git_dir name in
6767+ let full_path = Eio.Path.(fs / path) in
6868+ let dir = Filename.dirname path in
6969+ let dir_path = Eio.Path.(fs / dir) in
7070+ (try Eio.Path.mkdir ~perm:0o755 dir_path with _ -> ());
7171+ let content = Git.Hash.to_hex hash ^ "\n" in
7272+ Eio.Path.save ~create:(`Or_truncate 0o644) full_path content
7373+7474+(* List references *)
7575+let list_refs ~fs ~git_dir =
7676+ let refs_dir = Filename.concat git_dir "refs" in
7777+ let refs_path = Eio.Path.(fs / refs_dir) in
7878+ let rec collect_refs path prefix acc =
7979+ try
8080+ let entries = Eio.Path.read_dir path in
8181+ List.fold_left
8282+ (fun acc entry ->
8383+ let entry_path = Eio.Path.(path / entry) in
8484+ let ref_name = if prefix = "" then entry else prefix ^ "/" ^ entry in
8585+ if Eio.Path.is_directory entry_path then
8686+ collect_refs entry_path ref_name acc
8787+ else ("refs/" ^ ref_name) :: acc)
8888+ acc entries
8989+ with _ -> acc
9090+ in
9191+ collect_refs refs_path "" []
9292+9393+(* Create Git backend using ocaml-git types *)
9494+let git_backend ~fs ~git_dir : Hash.sha1 Backend.t =
9595+ {
9696+ read =
9797+ (fun hash ->
9898+ let git_hash = git_hash_of_sha1 hash in
9999+ match read_loose_object ~fs git_dir git_hash with
100100+ | Ok value -> Some (Git.Value.to_string_without_header value)
101101+ | Error _ -> None);
102102+ write =
103103+ (fun data ->
104104+ (* Default to blob for raw data writes *)
105105+ let blob = Git.Blob.of_string data in
106106+ let value = Git.Value.blob blob in
107107+ let git_hash = write_loose_object ~fs git_dir value in
108108+ sha1_of_git_hash git_hash);
109109+ exists =
110110+ (fun hash ->
111111+ let git_hash = git_hash_of_sha1 hash in
112112+ let path = loose_object_path git_dir git_hash in
113113+ let full_path = Eio.Path.(fs / path) in
114114+ Eio.Path.is_file full_path);
115115+ get_ref =
116116+ (fun name ->
117117+ Option.map sha1_of_git_hash (read_ref ~fs ~git_dir name));
118118+ set_ref =
119119+ (fun name hash ->
120120+ write_ref ~fs ~git_dir name (git_hash_of_sha1 hash));
121121+ test_and_set_ref =
122122+ (fun name ~test ~set ->
123123+ let current = read_ref ~fs ~git_dir name in
124124+ let matches =
125125+ match (test, current) with
126126+ | None, None -> true
127127+ | Some t, Some c -> Git.Hash.equal (git_hash_of_sha1 t) c
128128+ | _ -> false
129129+ in
130130+ if matches then (
131131+ (match set with
132132+ | None ->
133133+ let path = Filename.concat git_dir name in
134134+ let full_path = Eio.Path.(fs / path) in
135135+ (try Eio.Path.unlink full_path with _ -> ())
136136+ | Some h -> write_ref ~fs ~git_dir name (git_hash_of_sha1 h));
137137+ true)
138138+ else false);
139139+ list_refs = (fun () -> list_refs ~fs ~git_dir);
140140+ write_batch =
141141+ (fun objects ->
142142+ List.map
143143+ (fun data ->
144144+ let blob = Git.Blob.of_string data in
145145+ let value = Git.Value.blob blob in
146146+ let git_hash = write_loose_object ~fs git_dir value in
147147+ sha1_of_git_hash git_hash)
148148+ objects);
149149+ flush = (fun () -> ());
150150+ close = (fun () -> ());
151151+ }
152152+153153+(** Write a tree value to the git store *)
154154+let write_tree ~fs ~git_dir (tree : Git.Tree.t) =
155155+ let value = Git.Value.tree tree in
156156+ write_loose_object ~fs git_dir value
157157+158158+(** Write a commit value to the git store *)
159159+let write_commit ~fs ~git_dir (commit : Git.Commit.t) =
160160+ let value = Git.Value.commit commit in
161161+ write_loose_object ~fs git_dir value
162162+163163+(** Read a tree from the git store *)
164164+let read_tree ~fs ~git_dir hash =
165165+ match read_loose_object ~fs git_dir hash with
166166+ | Ok (Git.Value.Tree t) -> Some t
167167+ | _ -> None
168168+169169+(** Read a commit from the git store *)
170170+let read_commit ~fs ~git_dir hash =
171171+ match read_loose_object ~fs git_dir hash with
172172+ | Ok (Git.Value.Commit c) -> Some c
173173+ | _ -> None
174174+175175+(* Public API *)
176176+177177+let import_git ~sw:_ ~fs ~git_dir =
178178+ let backend = git_backend ~fs ~git_dir in
179179+ Store.Git.create ~backend
180180+181181+let init_git ~sw:_ ~fs ~path =
182182+ let git_dir = Filename.concat path ".git" in
183183+ let git_path = Eio.Path.(fs / git_dir) in
184184+185185+ (* Create .git structure *)
186186+ Eio.Path.mkdir ~perm:0o755 git_path;
187187+ Eio.Path.mkdir ~perm:0o755 Eio.Path.(git_path / "objects");
188188+ Eio.Path.mkdir ~perm:0o755 Eio.Path.(git_path / "refs");
189189+ Eio.Path.mkdir ~perm:0o755 Eio.Path.(git_path / "refs" / "heads");
190190+191191+ (* Write HEAD *)
192192+ Eio.Path.save ~create:(`Or_truncate 0o644)
193193+ Eio.Path.(git_path / "HEAD")
194194+ "ref: refs/heads/main\n";
195195+196196+ import_git ~sw:() ~fs ~git_dir
197197+198198+let read_object ~sw:_ ~fs ~git_dir hash =
199199+ let git_hash = git_hash_of_sha1 hash in
200200+ match read_loose_object ~fs git_dir git_hash with
201201+ | Ok value ->
202202+ let kind =
203203+ match Git.Value.kind value with
204204+ | `Blob -> "blob"
205205+ | `Tree -> "tree"
206206+ | `Commit -> "commit"
207207+ | `Tag -> "tag"
208208+ in
209209+ Ok (kind, Git.Value.to_string_without_header value)
210210+ | Error _ as e -> e
211211+212212+let write_object ~sw:_ ~fs ~git_dir ~typ data =
213213+ let value =
214214+ match typ with
215215+ | "blob" -> Git.Value.blob (Git.Blob.of_string data)
216216+ | "tree" -> Git.Value.tree (Git.Tree.of_string_exn data)
217217+ | "commit" -> Git.Value.commit (Git.Commit.of_string_exn data)
218218+ | "tag" -> Git.Value.tag (Git.Tag.of_string_exn data)
219219+ | _ -> invalid_arg ("unknown object type: " ^ typ)
220220+ in
221221+ let git_hash = write_loose_object ~fs git_dir value in
222222+ sha1_of_git_hash git_hash
223223+224224+let read_ref ~sw:_ ~fs ~git_dir name =
225225+ Option.map sha1_of_git_hash (read_ref ~fs ~git_dir name)
226226+227227+let write_ref ~sw:_ ~fs ~git_dir name hash =
228228+ write_ref ~fs ~git_dir name (git_hash_of_sha1 hash)
229229+230230+let list_refs ~sw:_ ~fs ~git_dir = list_refs ~fs ~git_dir
231231+232232+let read_pack_index ~sw:_ ~fs:_ ~path:_ =
233233+ (* TODO: Implement pack index reading *)
234234+ []
235235+236236+let read_from_pack ~sw:_ ~fs:_ ~pack:_ ~offset:_ =
237237+ (* TODO: Implement pack file reading *)
238238+ Error (`Msg "pack file reading not yet implemented")
+89
lib/git_interop.mli
···11+(** Git interoperability.
22+33+ Bidirectional support for reading and writing Git repositories.
44+ This allows Irmin to work with existing .git directories and
55+ interoperate with the Git ecosystem. *)
66+77+(** {1 Git Repository Operations} *)
88+99+val import_git :
1010+ sw:Eio.Switch.t ->
1111+ fs:_ Eio.Path.t ->
1212+ git_dir:string ->
1313+ Store.Git.t
1414+(** [import_git ~sw ~fs ~git_dir] opens a .git directory as an Irmin store.
1515+ The store supports both reads and writes - changes are written back
1616+ in Git-compatible format. *)
1717+1818+val init_git :
1919+ sw:Eio.Switch.t ->
2020+ fs:_ Eio.Path.t ->
2121+ path:string ->
2222+ Store.Git.t
2323+(** [init_git ~sw ~fs ~path] initializes a new Git repository at [path]
2424+ and returns an Irmin store for it. *)
2525+2626+(** {1 Object Operations} *)
2727+2828+val read_object :
2929+ sw:Eio.Switch.t ->
3030+ fs:_ Eio.Path.t ->
3131+ git_dir:string ->
3232+ Hash.sha1 ->
3333+ (string * string, [> `Msg of string ]) result
3434+(** [read_object ~sw ~fs ~git_dir hash] reads a Git object, returning
3535+ [(type, data)] where type is "blob", "tree", "commit", or "tag". *)
3636+3737+val write_object :
3838+ sw:Eio.Switch.t ->
3939+ fs:_ Eio.Path.t ->
4040+ git_dir:string ->
4141+ typ:string ->
4242+ string ->
4343+ Hash.sha1
4444+(** [write_object ~sw ~fs ~git_dir ~typ data] writes a Git object. *)
4545+4646+(** {1 Reference Operations} *)
4747+4848+val read_ref :
4949+ sw:Eio.Switch.t ->
5050+ fs:_ Eio.Path.t ->
5151+ git_dir:string ->
5252+ string ->
5353+ Hash.sha1 option
5454+(** [read_ref ~sw ~fs ~git_dir name] reads a Git reference. *)
5555+5656+val write_ref :
5757+ sw:Eio.Switch.t ->
5858+ fs:_ Eio.Path.t ->
5959+ git_dir:string ->
6060+ string ->
6161+ Hash.sha1 ->
6262+ unit
6363+(** [write_ref ~sw ~fs ~git_dir name hash] writes a Git reference. *)
6464+6565+val list_refs :
6666+ sw:Eio.Switch.t ->
6767+ fs:_ Eio.Path.t ->
6868+ git_dir:string ->
6969+ string list
7070+(** [list_refs ~sw ~fs ~git_dir] lists all references. *)
7171+7272+(** {1 Pack File Operations} *)
7373+7474+val read_pack_index :
7575+ sw:Eio.Switch.t ->
7676+ fs:_ Eio.Path.t ->
7777+ path:string ->
7878+ (Hash.sha1 * int64) list
7979+(** [read_pack_index ~sw ~fs ~path] reads a .idx file, returning
8080+ [(hash, offset)] pairs. *)
8181+8282+val read_from_pack :
8383+ sw:Eio.Switch.t ->
8484+ fs:_ Eio.Path.t ->
8585+ pack:string ->
8686+ offset:int64 ->
8787+ (string * string, [> `Msg of string ]) result
8888+(** [read_from_pack ~sw ~fs ~pack ~offset] reads an object from a pack file
8989+ at the given offset. *)
+129
lib/hash.ml
···11+type algorithm = Sha1 | Sha256
22+33+type _ t =
44+ | Sha1_hash : string -> [ `Sha1 ] t
55+ | Sha256_hash : string -> [ `Sha256 ] t
66+77+type sha1 = [ `Sha1 ] t
88+type sha256 = [ `Sha256 ] t
99+1010+let sha1 data =
1111+ Sha1_hash (Digestif.SHA1.(to_raw_string (digest_string data)))
1212+1313+let sha256 data =
1414+ Sha256_hash (Digestif.SHA256.(to_raw_string (digest_string data)))
1515+1616+let sha1_of_bytes raw =
1717+ if String.length raw <> 20 then
1818+ invalid_arg "Hash.sha1_of_bytes: expected 20 bytes";
1919+ Sha1_hash raw
2020+2121+let sha256_of_bytes raw =
2222+ if String.length raw <> 32 then
2323+ invalid_arg "Hash.sha256_of_bytes: expected 32 bytes";
2424+ Sha256_hash raw
2525+2626+let to_bytes : type a. a t -> string = function
2727+ | Sha1_hash s -> s
2828+ | Sha256_hash s -> s
2929+3030+let to_hex h =
3131+ let bytes = to_bytes h in
3232+ let buf = Buffer.create (String.length bytes * 2) in
3333+ String.iter
3434+ (fun c ->
3535+ Buffer.add_string buf (Printf.sprintf "%02x" (Char.code c)))
3636+ bytes;
3737+ Buffer.contents buf
3838+3939+let hex_to_bytes hex =
4040+ let len = String.length hex in
4141+ if len mod 2 <> 0 then Error (`Msg "hex string has odd length")
4242+ else
4343+ let bytes = Bytes.create (len / 2) in
4444+ let rec loop i =
4545+ if i >= len then Ok (Bytes.to_string bytes)
4646+ else
4747+ let hi = hex.[i] and lo = hex.[i + 1] in
4848+ let decode c =
4949+ match c with
5050+ | '0' .. '9' -> Some (Char.code c - Char.code '0')
5151+ | 'a' .. 'f' -> Some (Char.code c - Char.code 'a' + 10)
5252+ | 'A' .. 'F' -> Some (Char.code c - Char.code 'A' + 10)
5353+ | _ -> None
5454+ in
5555+ match (decode hi, decode lo) with
5656+ | Some h, Some l ->
5757+ Bytes.set bytes (i / 2) (Char.chr ((h lsl 4) lor l));
5858+ loop (i + 2)
5959+ | _ -> Error (`Msg (Printf.sprintf "invalid hex character at position %d" i))
6060+ in
6161+ loop 0
6262+6363+let sha1_of_hex hex =
6464+ match hex_to_bytes hex with
6565+ | Error _ as e -> e
6666+ | Ok bytes ->
6767+ if String.length bytes <> 20 then
6868+ Error (`Msg "SHA-1 hex must be 40 characters")
6969+ else Ok (Sha1_hash bytes)
7070+7171+let sha256_of_hex hex =
7272+ match hex_to_bytes hex with
7373+ | Error _ as e -> e
7474+ | Ok bytes ->
7575+ if String.length bytes <> 32 then
7676+ Error (`Msg "SHA-256 hex must be 64 characters")
7777+ else Ok (Sha256_hash bytes)
7878+7979+type existential = Ex : _ t -> existential
8080+8181+let of_hex algo hex : (existential, _) result =
8282+ match algo with
8383+ | Sha1 -> Result.map (fun h -> Ex h) (sha1_of_hex hex)
8484+ | Sha256 -> Result.map (fun h -> Ex h) (sha256_of_hex hex)
8585+8686+let equal : type a. a t -> a t -> bool =
8787+ fun h1 h2 -> String.equal (to_bytes h1) (to_bytes h2)
8888+8989+let compare : type a. a t -> a t -> int =
9090+ fun h1 h2 -> String.compare (to_bytes h1) (to_bytes h2)
9191+9292+let length : type a. a t -> int = function
9393+ | Sha1_hash _ -> 20
9494+ | Sha256_hash _ -> 32
9595+9696+let algorithm_of : type a. a t -> algorithm = function
9797+ | Sha1_hash _ -> Sha1
9898+ | Sha256_hash _ -> Sha256
9999+100100+let algorithm_length = function Sha1 -> 20 | Sha256 -> 32
101101+102102+let mst_depth (Sha256_hash bytes) =
103103+ let rec count_zeros i acc =
104104+ if i >= 32 then acc
105105+ else
106106+ let byte = Char.code bytes.[i] in
107107+ let hi = (byte lsr 6) land 0x3 in
108108+ let mid_hi = (byte lsr 4) land 0x3 in
109109+ let mid_lo = (byte lsr 2) land 0x3 in
110110+ let lo = byte land 0x3 in
111111+ if hi <> 0 then acc
112112+ else if mid_hi <> 0 then acc + 1
113113+ else if mid_lo <> 0 then acc + 2
114114+ else if lo <> 0 then acc + 3
115115+ else count_zeros (i + 1) (acc + 4)
116116+ in
117117+ count_zeros 0 0
118118+119119+type any = Any : _ t -> any
120120+121121+let any_algorithm (Any h) = algorithm_of h
122122+let any_to_bytes (Any h) = to_bytes h
123123+let any_to_hex (Any h) = to_hex h
124124+125125+let pp fmt h = Format.fprintf fmt "%s" (to_hex h)
126126+127127+let pp_short fmt h =
128128+ let hex = to_hex h in
129129+ Format.fprintf fmt "%s" (String.sub hex 0 (min 7 (String.length hex)))
+100
lib/hash.mli
···11+(** Content-addressed hashes with phantom types for algorithm safety.
22+33+ This module provides SHA-1 and SHA-256 hash types that are distinguished
44+ at the type level, preventing accidental mixing of different hash
55+ algorithms. *)
66+77+(** {1 Hash Algorithms} *)
88+99+type algorithm =
1010+ | Sha1 (** SHA-1: 20 bytes, Git compatible *)
1111+ | Sha256 (** SHA-256: 32 bytes, ATProto compatible *)
1212+1313+(** {1 Phantom-Typed Hashes} *)
1414+1515+type 'a t
1616+(** ['a t] is a hash where ['a] is a phantom type indicating the algorithm. *)
1717+1818+type sha1 = [ `Sha1 ] t
1919+(** SHA-1 hash: 20 bytes, used by Git. *)
2020+2121+type sha256 = [ `Sha256 ] t
2222+(** SHA-256 hash: 32 bytes, used by ATProto MST. *)
2323+2424+(** {1 Hash Construction} *)
2525+2626+val sha1 : string -> sha1
2727+(** [sha1 data] computes the SHA-1 hash of [data]. *)
2828+2929+val sha256 : string -> sha256
3030+(** [sha256 data] computes the SHA-256 hash of [data]. *)
3131+3232+val sha1_of_bytes : string -> sha1
3333+(** [sha1_of_bytes raw] creates a SHA-1 hash from raw 20-byte digest. *)
3434+3535+val sha256_of_bytes : string -> sha256
3636+(** [sha256_of_bytes raw] creates a SHA-256 hash from raw 32-byte digest. *)
3737+3838+(** {1 Conversions} *)
3939+4040+val to_bytes : _ t -> string
4141+(** [to_bytes h] returns the raw bytes of the hash digest. *)
4242+4343+val to_hex : _ t -> string
4444+(** [to_hex h] returns the hexadecimal representation of the hash. *)
4545+4646+val of_hex : algorithm -> string -> (_, [> `Msg of string ]) result
4747+(** [of_hex algo hex] parses a hexadecimal hash string. *)
4848+4949+val sha1_of_hex : string -> (sha1, [> `Msg of string ]) result
5050+(** [sha1_of_hex hex] parses a SHA-1 hex string. *)
5151+5252+val sha256_of_hex : string -> (sha256, [> `Msg of string ]) result
5353+(** [sha256_of_hex hex] parses a SHA-256 hex string. *)
5454+5555+(** {1 Comparison} *)
5656+5757+val equal : 'a t -> 'a t -> bool
5858+(** [equal h1 h2] is [true] if [h1] and [h2] are the same hash. *)
5959+6060+val compare : 'a t -> 'a t -> int
6161+(** [compare h1 h2] is a total ordering on hashes. *)
6262+6363+(** {1 Algorithm Info} *)
6464+6565+val length : _ t -> int
6666+(** [length h] returns the byte length of the hash (20 for SHA-1, 32 for SHA-256). *)
6767+6868+val algorithm_of : _ t -> algorithm
6969+(** [algorithm_of h] returns the algorithm used for [h]. *)
7070+7171+val algorithm_length : algorithm -> int
7272+(** [algorithm_length algo] returns the byte length for [algo]. *)
7373+7474+(** {1 MST Support} *)
7575+7676+val mst_depth : sha256 -> int
7777+(** [mst_depth h] counts leading zeros in 2-bit chunks for ATProto MST.
7878+ This determines the tree depth for a given key hash. *)
7979+8080+(** {1 Type-Erased Hashes} *)
8181+8282+type any = Any : _ t -> any
8383+(** Type-erased hash for mixed-hash stores. *)
8484+8585+val any_algorithm : any -> algorithm
8686+(** [any_algorithm (Any h)] returns the algorithm of the erased hash. *)
8787+8888+val any_to_bytes : any -> string
8989+(** [any_to_bytes (Any h)] returns the raw bytes. *)
9090+9191+val any_to_hex : any -> string
9292+(** [any_to_hex (Any h)] returns the hex representation. *)
9393+9494+(** {1 Pretty Printing} *)
9595+9696+val pp : Format.formatter -> _ t -> unit
9797+(** [pp fmt h] pretty-prints [h] as hex. *)
9898+9999+val pp_short : Format.formatter -> _ t -> unit
100100+(** [pp_short fmt h] pretty-prints the first 7 characters of [h] (Git-style). *)
+43
lib/irmin.ml
···11+(** Irmin - Content-addressable store with Git and ATProto MST support.
22+33+ Irmin provides lazy reads, delayed writes, and multiple tree formats
44+ with bidirectional Git compatibility and first-class subtree operations. *)
55+66+(** {1 Core Types} *)
77+88+module Hash = Hash
99+module Backend = Backend
1010+1111+(** {1 Tree Formats} *)
1212+1313+module Tree_format = Tree_format
1414+1515+(** {1 Trees and Commits} *)
1616+1717+module Tree = Tree
1818+module Commit = Commit
1919+2020+(** {1 Stores} *)
2121+2222+module Store = Store
2323+2424+(** {1 Subtree Operations} *)
2525+2626+module Subtree = Subtree
2727+2828+(** {1 Git Interoperability} *)
2929+3030+module Git_interop = Git_interop
3131+3232+(** {1 Pre-instantiated Git Store} *)
3333+3434+module Git = struct
3535+ include Store.Git
3636+3737+ let import = Git_interop.import_git
3838+ let init = Git_interop.init_git
3939+end
4040+4141+(** {1 Pre-instantiated MST Store} *)
4242+4343+module Mst = Store.Mst
+119
lib/store.ml
···11+module Make (F : Tree_format.S) = struct
22+ type hash = F.hash
33+44+ module Tree = Tree.Make (F)
55+ module Commit = Commit.Make (F)
66+77+ type t = { backend : hash Backend.t }
88+99+ let create ~backend = { backend }
1010+ let backend t = t.backend
1111+1212+ let tree t ?at () =
1313+ match at with
1414+ | None -> Tree.empty ()
1515+ | Some h -> Tree.of_hash ~backend:t.backend h
1616+1717+ let read_commit t h =
1818+ match t.backend.read h with
1919+ | None -> None
2020+ | Some data -> (
2121+ match Commit.of_bytes data with Ok c -> Some c | Error _ -> None)
2222+2323+ let read_tree t h = Tree.of_hash ~backend:t.backend h
2424+2525+ let checkout t ~branch =
2626+ match t.backend.get_ref ("refs/heads/" ^ branch) with
2727+ | None -> None
2828+ | Some commit_hash -> (
2929+ match read_commit t commit_hash with
3030+ | None -> None
3131+ | Some commit -> Some (read_tree t (Commit.tree commit)))
3232+3333+ let commit t ~tree ~parents ~message ~author =
3434+ (* This is where delayed writes happen *)
3535+ let tree_hash = Tree.hash tree ~backend:t.backend in
3636+ let c = Commit.v ~tree:tree_hash ~parents ~author ~message () in
3737+ let data = Commit.to_bytes c in
3838+ let _ = t.backend.write data in
3939+ Commit.hash c
4040+4141+ let head t ~branch = t.backend.get_ref ("refs/heads/" ^ branch)
4242+4343+ let set_head t ~branch h = t.backend.set_ref ("refs/heads/" ^ branch) h
4444+4545+ let branches t =
4646+ t.backend.list_refs ()
4747+ |> List.filter_map (fun r ->
4848+ if String.length r > 11 && String.sub r 0 11 = "refs/heads/" then
4949+ Some (String.sub r 11 (String.length r - 11))
5050+ else None)
5151+5252+ let update_branch t ~branch ~old ~new_ =
5353+ t.backend.test_and_set_ref ("refs/heads/" ^ branch) ~test:old ~set:(Some new_)
5454+5555+ (* Simple ancestry check - walks parent chain *)
5656+ let is_ancestor t ~ancestor ~descendant =
5757+ let rec walk visited h =
5858+ if Hash.equal h ancestor then true
5959+ else if List.exists (Hash.equal h) visited then false
6060+ else
6161+ match read_commit t h with
6262+ | None -> false
6363+ | Some c ->
6464+ let visited = h :: visited in
6565+ List.exists (walk visited) (Commit.parents c)
6666+ in
6767+ Hash.equal ancestor descendant || walk [] descendant
6868+6969+ (* Find merge base using simple BFS *)
7070+ let merge_base t h1 h2 =
7171+ let rec ancestors_of h visited =
7272+ if List.exists (Hash.equal h) visited then visited
7373+ else
7474+ match read_commit t h with
7575+ | None -> h :: visited
7676+ | Some c ->
7777+ let visited = h :: visited in
7878+ List.fold_left
7979+ (fun acc p -> ancestors_of p acc)
8080+ visited (Commit.parents c)
8181+ in
8282+ let ancestors1 = ancestors_of h1 [] in
8383+ let rec find_common h =
8484+ if List.exists (Hash.equal h) ancestors1 then Some h
8585+ else
8686+ match read_commit t h with
8787+ | None -> None
8888+ | Some c -> (
8989+ match Commit.parents c with
9090+ | [] -> None
9191+ | p :: _ -> find_common p)
9292+ in
9393+ find_common h2
9494+9595+ let commits_between t ~base ~head =
9696+ let rec count h n =
9797+ if Hash.equal h base then n
9898+ else
9999+ match read_commit t h with
100100+ | None -> n
101101+ | Some c -> (
102102+ match Commit.parents c with
103103+ | [] -> n
104104+ | p :: _ -> count p (n + 1))
105105+ in
106106+ count head 0
107107+108108+ type diff_entry =
109109+ [ `Add of Tree.path * hash
110110+ | `Remove of Tree.path
111111+ | `Change of Tree.path * hash * hash ]
112112+113113+ let diff _t ~old:_ ~new_:_ =
114114+ (* TODO: Implement tree diff *)
115115+ Seq.empty
116116+end
117117+118118+module Git = Make (Tree_format.Git)
119119+module Mst = Make (Tree_format.Mst)
+97
lib/store.mli
···11+(** High-level store interface.
22+33+ Combines backend, tree, and commit operations into a unified API. *)
44+55+(** {1 Store Functor} *)
66+77+module Make (F : Tree_format.S) : sig
88+ type t
99+ (** A store instance. *)
1010+1111+ type hash = F.hash
1212+1313+ module Tree : module type of Tree.Make (F)
1414+ module Commit : module type of Commit.Make (F)
1515+1616+ (** {2 Construction} *)
1717+1818+ val create : backend:hash Backend.t -> t
1919+ (** [create ~backend] creates a store backed by [backend]. *)
2020+2121+ (** {2 Tree Operations} *)
2222+2323+ val tree : t -> ?at:hash -> unit -> Tree.t
2424+ (** [tree t ?at ()] returns a tree.
2525+ If [at] is given, returns the tree at that commit.
2626+ Otherwise returns an empty tree. *)
2727+2828+ val checkout : t -> branch:string -> Tree.t option
2929+ (** [checkout t ~branch] returns the tree at the head of [branch]. *)
3030+3131+ (** {2 Commit Operations} *)
3232+3333+ val commit :
3434+ t ->
3535+ tree:Tree.t ->
3636+ parents:hash list ->
3737+ message:string ->
3838+ author:string ->
3939+ hash
4040+ (** [commit t ~tree ~parents ~message ~author] creates a commit.
4141+ This is when delayed tree writes actually happen. *)
4242+4343+ (** {2 Branch Operations} *)
4444+4545+ val head : t -> branch:string -> hash option
4646+ (** [head t ~branch] returns the head commit of [branch]. *)
4747+4848+ val set_head : t -> branch:string -> hash -> unit
4949+ (** [set_head t ~branch h] sets the head of [branch] to [h]. *)
5050+5151+ val branches : t -> string list
5252+ (** [branches t] returns all branch names. *)
5353+5454+ val update_branch : t -> branch:string -> old:hash option -> new_:hash -> bool
5555+ (** [update_branch t ~branch ~old ~new_] atomically updates [branch]
5656+ if its current head matches [old]. *)
5757+5858+ (** {2 Ancestry Queries} *)
5959+6060+ val is_ancestor : t -> ancestor:hash -> descendant:hash -> bool
6161+ (** [is_ancestor t ~ancestor ~descendant] checks commit ancestry. *)
6262+6363+ val merge_base : t -> hash -> hash -> hash option
6464+ (** [merge_base t h1 h2] finds the common ancestor of two commits. *)
6565+6666+ val commits_between : t -> base:hash -> head:hash -> int
6767+ (** [commits_between t ~base ~head] counts commits from [base] to [head]. *)
6868+6969+ (** {2 Diff} *)
7070+7171+ type diff_entry =
7272+ [ `Add of Tree.path * hash
7373+ | `Remove of Tree.path
7474+ | `Change of Tree.path * hash * hash ]
7575+7676+ val diff : t -> old:hash -> new_:hash -> diff_entry Seq.t
7777+ (** [diff t ~old ~new_] computes the difference between two trees. *)
7878+7979+ (** {2 Low-level} *)
8080+8181+ val backend : t -> hash Backend.t
8282+ (** [backend t] returns the underlying backend. *)
8383+8484+ val read_commit : t -> hash -> Commit.t option
8585+ (** [read_commit t h] reads a commit by hash. *)
8686+8787+ val read_tree : t -> hash -> Tree.t
8888+ (** [read_tree t h] returns a lazy tree at [h]. *)
8989+end
9090+9191+(** {1 Pre-instantiated Stores} *)
9292+9393+module Git : module type of Make (Tree_format.Git)
9494+(** Git-format store with SHA-1 hashes. *)
9595+9696+module Mst : module type of Make (Tree_format.Mst)
9797+(** MST-format store with SHA-256 hashes. *)
+231
lib/subtree.ml
···11+module Make (F : Tree_format.S) = struct
22+ type hash = F.hash
33+44+ module Store = Store.Make (F)
55+ module Tree = Store.Tree
66+ module Commit = Store.Commit
77+88+ type status =
99+ [ `In_sync
1010+ | `Local_ahead of int
1111+ | `Remote_ahead of int
1212+ | `Diverged of int * int (* local, remote *)
1313+ | `Trees_differ ]
1414+1515+ (* Extract subtree at prefix from a tree *)
1616+ let extract_subtree tree prefix =
1717+ Tree.find_tree tree prefix
1818+1919+ (* Check if a commit touches the given prefix *)
2020+ let commit_touches_prefix store commit prefix =
2121+ let tree = Store.read_tree store (Commit.tree commit) in
2222+ Option.is_some (Tree.find_tree tree prefix)
2323+2424+ (* Split: Extract subtree history into a new store *)
2525+ let split store ~prefix =
2626+ let backend = Backend.Memory.create_sha1 () in
2727+ let new_store = Store.create ~backend:(Obj.magic backend) in
2828+2929+ (* Walk commits and rewrite those touching prefix *)
3030+ let rec rewrite_commit old_hash rewritten =
3131+ if List.mem_assoc old_hash rewritten then
3232+ (List.assoc old_hash rewritten, rewritten)
3333+ else
3434+ match Store.read_commit store old_hash with
3535+ | None -> (old_hash, rewritten)
3636+ | Some commit ->
3737+ if not (commit_touches_prefix store commit prefix) then
3838+ (* Skip commits not touching prefix *)
3939+ match Commit.parents commit with
4040+ | [] -> (old_hash, rewritten)
4141+ | p :: _ -> rewrite_commit p rewritten
4242+ else
4343+ (* Rewrite parents first *)
4444+ let parents, rewritten =
4545+ List.fold_left
4646+ (fun (parents, rw) p ->
4747+ let new_p, rw = rewrite_commit p rw in
4848+ (new_p :: parents, rw))
4949+ ([], rewritten)
5050+ (Commit.parents commit)
5151+ in
5252+ let parents = List.rev parents in
5353+5454+ (* Extract subtree *)
5555+ let tree = Store.read_tree store (Commit.tree commit) in
5656+ match extract_subtree tree prefix with
5757+ | None -> (old_hash, rewritten)
5858+ | Some subtree ->
5959+ let new_hash =
6060+ Store.commit new_store ~tree:subtree ~parents
6161+ ~message:(Commit.message commit)
6262+ ~author:(Commit.author commit)
6363+ in
6464+ (new_hash, (old_hash, new_hash) :: rewritten)
6565+ in
6666+6767+ (* Start from main branch head *)
6868+ (match Store.head store ~branch:"main" with
6969+ | Some head ->
7070+ let new_head, _ = rewrite_commit head [] in
7171+ Store.set_head new_store ~branch:"main" new_head
7272+ | None -> ());
7373+7474+ new_store
7575+7676+ (* Add: Add external repo as subtree *)
7777+ let add store ~prefix ~source =
7878+ match Store.head source ~branch:"main" with
7979+ | None -> failwith "Source has no main branch"
8080+ | Some source_head ->
8181+ match Store.read_commit source source_head with
8282+ | None -> failwith "Cannot read source commit"
8383+ | Some source_commit ->
8484+ let source_tree = Store.read_tree source (Commit.tree source_commit) in
8585+8686+ (* Get current tree or empty *)
8787+ let current_tree =
8888+ match Store.head store ~branch:"main" with
8989+ | None -> Tree.empty ()
9090+ | Some h ->
9191+ match Store.read_commit store h with
9292+ | None -> Tree.empty ()
9393+ | Some c -> Store.read_tree store (Commit.tree c)
9494+ in
9595+9696+ (* Add source tree at prefix *)
9797+ let new_tree = Tree.add_tree current_tree prefix source_tree in
9898+9999+ let parents =
100100+ match Store.head store ~branch:"main" with
101101+ | None -> []
102102+ | Some h -> [ h ]
103103+ in
104104+105105+ let message =
106106+ Printf.sprintf "Add '%s' from external source"
107107+ (String.concat "/" prefix)
108108+ in
109109+110110+ let new_head =
111111+ Store.commit store ~tree:new_tree ~parents ~message
112112+ ~author:"irmin-subtree"
113113+ in
114114+ Store.set_head store ~branch:"main" new_head;
115115+ new_head
116116+117117+ (* Pull: Update subtree from external source *)
118118+ let pull store ~prefix ~source =
119119+ match Store.head source ~branch:"main" with
120120+ | None -> Error (`Conflict [])
121121+ | Some source_head ->
122122+ match Store.read_commit source source_head with
123123+ | None -> Error (`Conflict [])
124124+ | Some source_commit ->
125125+ let source_tree = Store.read_tree source (Commit.tree source_commit) in
126126+127127+ let current_tree =
128128+ match Store.head store ~branch:"main" with
129129+ | None -> Tree.empty ()
130130+ | Some h ->
131131+ match Store.read_commit store h with
132132+ | None -> Tree.empty ()
133133+ | Some c -> Store.read_tree store (Commit.tree c)
134134+ in
135135+136136+ (* Replace subtree at prefix *)
137137+ let new_tree =
138138+ let without = Tree.remove current_tree prefix in
139139+ Tree.add_tree without prefix source_tree
140140+ in
141141+142142+ let parents =
143143+ match Store.head store ~branch:"main" with
144144+ | None -> []
145145+ | Some h -> [ h ]
146146+ in
147147+148148+ let message =
149149+ Printf.sprintf "Pull updates into '%s'" (String.concat "/" prefix)
150150+ in
151151+152152+ let new_head =
153153+ Store.commit store ~tree:new_tree ~parents ~message
154154+ ~author:"irmin-subtree"
155155+ in
156156+ Store.set_head store ~branch:"main" new_head;
157157+ Ok new_head
158158+159159+ (* Push: Push subtree changes to external repo *)
160160+ let push store ~prefix ~target =
161161+ match Store.head store ~branch:"main" with
162162+ | None -> failwith "Store has no main branch"
163163+ | Some head ->
164164+ match Store.read_commit store head with
165165+ | None -> failwith "Cannot read store commit"
166166+ | Some commit ->
167167+ let tree = Store.read_tree store (Commit.tree commit) in
168168+ match extract_subtree tree prefix with
169169+ | None -> failwith "No subtree at prefix"
170170+ | Some subtree ->
171171+ let parents =
172172+ match Store.head target ~branch:"main" with
173173+ | None -> []
174174+ | Some h -> [ h ]
175175+ in
176176+177177+ let message =
178178+ Printf.sprintf "Push from '%s'" (String.concat "/" prefix)
179179+ in
180180+181181+ let new_head =
182182+ Store.commit target ~tree:subtree ~parents ~message
183183+ ~author:"irmin-subtree"
184184+ in
185185+ Store.set_head target ~branch:"main" new_head;
186186+ new_head
187187+188188+ (* Status: Compare subtree with external repo *)
189189+ let status store ~prefix ~external_ =
190190+ let local_head = Store.head store ~branch:"main" in
191191+ let remote_head = Store.head external_ ~branch:"main" in
192192+193193+ match (local_head, remote_head) with
194194+ | None, None -> `In_sync
195195+ | None, Some _ -> `Remote_ahead 1
196196+ | Some _, None -> `Local_ahead 1
197197+ | Some lh, Some rh ->
198198+ (* Get subtree hash from local *)
199199+ let local_tree_hash =
200200+ match Store.read_commit store lh with
201201+ | None -> None
202202+ | Some c ->
203203+ let tree = Store.read_tree store (Commit.tree c) in
204204+ match Tree.find_tree tree prefix with
205205+ | None -> None
206206+ | Some t -> Some (Tree.hash t ~backend:(Store.backend store))
207207+ in
208208+209209+ (* Get tree hash from remote *)
210210+ let remote_tree_hash =
211211+ match Store.read_commit external_ rh with
212212+ | None -> None
213213+ | Some c -> Some (Commit.tree c)
214214+ in
215215+216216+ match (local_tree_hash, remote_tree_hash) with
217217+ | None, None -> `In_sync
218218+ | None, Some _ -> `Remote_ahead 1
219219+ | Some _, None -> `Local_ahead 1
220220+ | Some lt, Some rt ->
221221+ if Hash.equal lt rt then `In_sync
222222+ else
223223+ (* Check ancestry *)
224224+ if Store.is_ancestor external_ ~ancestor:rt ~descendant:lt then
225225+ `Local_ahead (Store.commits_between external_ ~base:rt ~head:lt)
226226+ else if Store.is_ancestor external_ ~ancestor:lt ~descendant:rt then
227227+ `Remote_ahead (Store.commits_between external_ ~base:lt ~head:rt)
228228+ else `Trees_differ
229229+end
230230+231231+module Git = Make (Tree_format.Git)
+74
lib/subtree.mli
···11+(** Subtree operations for monorepo management.
22+33+ This module provides first-class subtree operations that replace
44+ the need to shell out to [git subtree] commands. *)
55+66+(** {1 Subtree Functor} *)
77+88+module Make (F : Tree_format.S) : sig
99+ type hash = F.hash
1010+1111+ module Store : module type of Store.Make (F)
1212+1313+ (** {2 Subtree Split} *)
1414+1515+ val split : Store.t -> prefix:Store.Tree.path -> Store.t
1616+ (** [split store ~prefix] extracts the subtree at [prefix] into a
1717+ new store with rewritten history containing only commits that
1818+ touch that prefix.
1919+2020+ Like [git subtree split --prefix]. *)
2121+2222+ (** {2 Subtree Add} *)
2323+2424+ val add : Store.t -> prefix:Store.Tree.path -> source:Store.t -> hash
2525+ (** [add store ~prefix ~source] adds the contents of [source] as a
2626+ subtree at [prefix], creating a merge commit.
2727+2828+ Like [git subtree add --prefix --squash]. *)
2929+3030+ (** {2 Subtree Pull} *)
3131+3232+ val pull :
3333+ Store.t ->
3434+ prefix:Store.Tree.path ->
3535+ source:Store.t ->
3636+ (hash, [> `Conflict of Store.Tree.path list ]) result
3737+ (** [pull store ~prefix ~source] pulls updates from [source] into
3838+ the subtree at [prefix].
3939+4040+ Like [git subtree pull --prefix --squash]. *)
4141+4242+ (** {2 Subtree Push} *)
4343+4444+ val push : Store.t -> prefix:Store.Tree.path -> target:Store.t -> hash
4545+ (** [push store ~prefix ~target] pushes changes from the subtree at
4646+ [prefix] to [target].
4747+4848+ Like [git subtree push --prefix]. *)
4949+5050+ (** {2 Status} *)
5151+5252+ type status =
5353+ [ `In_sync
5454+ | `Local_ahead of int
5555+ | `Remote_ahead of int
5656+ | `Diverged of int * int (** local, remote *)
5757+ | `Trees_differ ]
5858+5959+ val status :
6060+ Store.t -> prefix:Store.Tree.path -> external_:Store.t -> status
6161+ (** [status store ~prefix ~external_] compares the subtree at [prefix]
6262+ with the external store.
6363+6464+ - [`In_sync]: Trees are identical
6565+ - [`Local_ahead n]: Local has n commits not in external
6666+ - [`Remote_ahead n]: External has n commits not in local
6767+ - [`Diverged]: Both have independent commits
6868+ - [`Trees_differ]: Trees differ but no history relationship *)
6969+end
7070+7171+(** {1 Pre-instantiated Subtree} *)
7272+7373+module Git : module type of Make (Tree_format.Git)
7474+(** Git-format subtree operations. *)
+294
lib/tree.ml
···11+module Make (F : Tree_format.S) = struct
22+ type hash = F.hash
33+ type path = string list
44+55+ type concrete =
66+ [ `Contents of string | `Tree of (string * concrete) list ]
77+88+ (* Internal tree representation with lazy loading *)
99+ type node_state =
1010+ | Loaded of F.node
1111+ | Lazy of { backend : hash Backend.t; hash : hash }
1212+ | Shallow of hash
1313+ | Pruned of hash
1414+1515+ type tree_node =
1616+ | Contents of string
1717+ | Node of {
1818+ mutable state : node_state;
1919+ mutable children : (string * tree_node) list; (* modifications *)
2020+ mutable removed : string list;
2121+ }
2222+2323+ type t = tree_node
2424+2525+ let empty () = Node { state = Loaded F.empty_node; children = []; removed = [] }
2626+2727+ let of_hash ~backend hash =
2828+ Node { state = Lazy { backend; hash }; children = []; removed = [] }
2929+3030+ let shallow hash = Node { state = Shallow hash; children = []; removed = [] }
3131+ let pruned hash = Node { state = Pruned hash; children = []; removed = [] }
3232+3333+ let rec of_concrete : concrete -> t = function
3434+ | `Contents s -> Contents s
3535+ | `Tree entries ->
3636+ let children = List.map (fun (name, c) -> (name, of_concrete c)) entries in
3737+ Node { state = Loaded F.empty_node; children; removed = [] }
3838+3939+ (* Force loading of a lazy node *)
4040+ let force_node state =
4141+ match state with
4242+ | Loaded n -> Some n
4343+ | Lazy { backend; hash } -> (
4444+ match backend.read hash with
4545+ | Some data -> (
4646+ match F.node_of_bytes data with
4747+ | Ok n -> Some n
4848+ | Error _ -> None)
4949+ | None -> None)
5050+ | Shallow _ -> None
5151+ | Pruned _ -> None
5252+5353+ (* Navigate to a path, returning the node and remaining path *)
5454+ let rec navigate t path =
5555+ match (t, path) with
5656+ | _, [] -> Some (t, [])
5757+ | Contents _, _ :: _ -> None
5858+ | Node node, name :: rest -> (
5959+ (* Check modifications first *)
6060+ match List.assoc_opt name node.children with
6161+ | Some child -> navigate child rest
6262+ | None ->
6363+ if List.mem name node.removed then None
6464+ else
6565+ (* Try to load from underlying node *)
6666+ match force_node node.state with
6767+ | None -> None
6868+ | Some loaded -> (
6969+ match F.find loaded name with
7070+ | None -> None
7171+ | Some (`Contents _hash) ->
7272+ (* Would need to load contents - for now return None *)
7373+ None
7474+ | Some (`Node hash) -> (
7575+ match node.state with
7676+ | Lazy { backend; _ } ->
7777+ let child = of_hash ~backend hash in
7878+ navigate child rest
7979+ | _ -> None)))
8080+8181+ let find t path =
8282+ match navigate t path with
8383+ | Some (Contents s, []) -> Some s
8484+ | _ -> None
8585+8686+ let find_tree t path =
8787+ match navigate t path with
8888+ | Some ((Node _ as n), []) -> Some n
8989+ | _ -> None
9090+9191+ let mem t path = Option.is_some (navigate t path)
9292+9393+ let mem_tree t path =
9494+ match navigate t path with
9595+ | Some (Node _, []) -> true
9696+ | _ -> false
9797+9898+ let list t path =
9999+ match navigate t path with
100100+ | Some (Node node, []) -> (
101101+ match force_node node.state with
102102+ | None -> []
103103+ | Some loaded ->
104104+ let base_entries =
105105+ F.list loaded
106106+ |> List.filter (fun (name, _) ->
107107+ (not (List.mem name node.removed))
108108+ && not (List.mem_assoc name node.children))
109109+ |> List.map (fun (name, kind) ->
110110+ let k = match kind with `Node _ -> `Node | `Contents _ -> `Contents in
111111+ (name, k))
112112+ in
113113+ let child_entries =
114114+ List.map
115115+ (fun (name, child) ->
116116+ let k = match child with Node _ -> `Node | Contents _ -> `Contents in
117117+ (name, k))
118118+ node.children
119119+ in
120120+ List.sort (fun (a, _) (b, _) -> String.compare a b) (base_entries @ child_entries))
121121+ | _ -> []
122122+123123+ (* Add contents at path, creating intermediate nodes as needed *)
124124+ let rec add_at t path value =
125125+ match (t, path) with
126126+ | _, [] -> value
127127+ | Contents _, _ :: _ ->
128128+ (* Replace contents with a tree *)
129129+ add_at (empty ()) path value
130130+ | Node node, [ name ] ->
131131+ let children =
132132+ (name, value) :: List.filter (fun (n, _) -> n <> name) node.children
133133+ in
134134+ let removed = List.filter (( <> ) name) node.removed in
135135+ Node { node with children; removed }
136136+ | Node node, name :: rest ->
137137+ let child =
138138+ match List.assoc_opt name node.children with
139139+ | Some c -> c
140140+ | None -> (
141141+ if List.mem name node.removed then empty ()
142142+ else
143143+ match force_node node.state with
144144+ | None -> empty ()
145145+ | Some loaded -> (
146146+ match F.find loaded name with
147147+ | Some (`Node hash) -> (
148148+ match node.state with
149149+ | Lazy { backend; _ } -> of_hash ~backend hash
150150+ | _ -> empty ())
151151+ | _ -> empty ()))
152152+ in
153153+ let new_child = add_at child rest value in
154154+ let children =
155155+ (name, new_child) :: List.filter (fun (n, _) -> n <> name) node.children
156156+ in
157157+ Node { node with children }
158158+159159+ let add t path contents = add_at t path (Contents contents)
160160+ let add_tree t path subtree = add_at t path subtree
161161+162162+ let rec remove t path =
163163+ match (t, path) with
164164+ | _, [] -> empty ()
165165+ | Contents _, _ :: _ -> t
166166+ | Node node, [ name ] ->
167167+ let children = List.filter (fun (n, _) -> n <> name) node.children in
168168+ let removed =
169169+ if List.mem name node.removed then node.removed else name :: node.removed
170170+ in
171171+ Node { node with children; removed }
172172+ | Node node, name :: rest ->
173173+ let child =
174174+ match List.assoc_opt name node.children with
175175+ | Some c -> c
176176+ | None -> (
177177+ if List.mem name node.removed then empty ()
178178+ else
179179+ match force_node node.state with
180180+ | None -> empty ()
181181+ | Some loaded -> (
182182+ match F.find loaded name with
183183+ | Some (`Node hash) -> (
184184+ match node.state with
185185+ | Lazy { backend; _ } -> of_hash ~backend hash
186186+ | _ -> empty ())
187187+ | _ -> empty ()))
188188+ in
189189+ let new_child = remove child rest in
190190+ let children =
191191+ (name, new_child) :: List.filter (fun (n, _) -> n <> name) node.children
192192+ in
193193+ Node { node with children }
194194+195195+ let rec to_concrete t =
196196+ match t with
197197+ | Contents s -> `Contents s
198198+ | Node node ->
199199+ let entries =
200200+ match force_node node.state with
201201+ | None -> []
202202+ | Some loaded ->
203203+ F.list loaded
204204+ |> List.filter_map (fun (name, _kind) ->
205205+ if List.mem name node.removed then None
206206+ else if List.mem_assoc name node.children then None
207207+ else
208208+ (* Would need to recursively load - simplified here *)
209209+ None)
210210+ in
211211+ let child_entries =
212212+ List.map (fun (name, child) -> (name, to_concrete child)) node.children
213213+ in
214214+ let all = List.sort (fun (a, _) (b, _) -> String.compare a b) (entries @ child_entries) in
215215+ `Tree all
216216+217217+ (* Write tree to backend and return hash *)
218218+ let rec write_tree t ~(backend : hash Backend.t) : hash =
219219+ match t with
220220+ | Contents s ->
221221+ let h = F.hash_contents s in
222222+ let _ = backend.write s in
223223+ h
224224+ | Node node ->
225225+ (* First, get the base node *)
226226+ let base =
227227+ match force_node node.state with Some n -> n | None -> F.empty_node
228228+ in
229229+ (* Apply removals *)
230230+ let base = List.fold_left (fun n name -> F.remove n name) base node.removed in
231231+ (* Apply additions (recursively writing children) *)
232232+ let final =
233233+ List.fold_left
234234+ (fun n (name, child) ->
235235+ let child_hash = write_tree child ~backend in
236236+ let kind =
237237+ match child with
238238+ | Contents _ -> `Contents child_hash
239239+ | Node _ -> `Node child_hash
240240+ in
241241+ F.add n name kind)
242242+ base node.children
243243+ in
244244+ let data = F.bytes_of_node final in
245245+ let _ = backend.write data in
246246+ F.hash_node final
247247+248248+ let hash t ~backend = write_tree t ~backend
249249+250250+ type 'a force = [ `True | `False of hash -> 'a | `Shallow of hash -> 'a ]
251251+252252+ let fold ?(force = `True) t init f =
253253+ let rec go path t acc =
254254+ match t with
255255+ | Contents s -> f path (`Contents s) acc
256256+ | Node node ->
257257+ let acc = f path `Tree acc in
258258+ match force with
259259+ | `True -> (
260260+ match force_node node.state with
261261+ | None -> acc
262262+ | Some _loaded ->
263263+ (* Fold over children *)
264264+ List.fold_left
265265+ (fun acc (name, child) -> go (path @ [ name ]) child acc)
266266+ acc node.children)
267267+ | `False fn -> (
268268+ match node.state with
269269+ | Lazy { hash; _ } -> fn hash
270270+ | Shallow hash -> fn hash
271271+ | Pruned hash -> fn hash
272272+ | Loaded _ ->
273273+ List.fold_left
274274+ (fun acc (name, child) -> go (path @ [ name ]) child acc)
275275+ acc node.children)
276276+ | `Shallow fn -> (
277277+ match node.state with
278278+ | Shallow hash -> fn hash
279279+ | _ ->
280280+ List.fold_left
281281+ (fun acc (name, child) -> go (path @ [ name ]) child acc)
282282+ acc node.children)
283283+ in
284284+ go [] t init
285285+286286+ let clear ?depth:_ _t = ()
287287+288288+ let equal t1 t2 =
289289+ (* Simple structural equality - could be optimized with hash comparison *)
290290+ to_concrete t1 = to_concrete t2
291291+end
292292+293293+module Git = Make (Tree_format.Git)
294294+module Mst = Make (Tree_format.Mst)
+121
lib/tree.mli
···11+(** Lazy trees with delayed writes.
22+33+ Trees are like Git's staging area: immutable, temporary, non-persistent
44+ areas held in memory. Reads are done lazily and writes are accumulated
55+ until commit - if you modify a key twice, only the last change is written. *)
66+77+(** {1 Tree Functor} *)
88+99+module Make (F : Tree_format.S) : sig
1010+ type t
1111+ (** Immutable in-memory tree with lazy reads and delayed writes. *)
1212+1313+ type hash = F.hash
1414+1515+ (** {2 Path Type} *)
1616+1717+ type path = string list
1818+ (** A path is a list of path segments. *)
1919+2020+ (** {2 Concrete Trees} *)
2121+2222+ type concrete =
2323+ [ `Contents of string | `Tree of (string * concrete) list ]
2424+ (** Fully materialized tree for import/export. *)
2525+2626+ (** {2 Construction} *)
2727+2828+ val empty : unit -> t
2929+ (** [empty ()] creates an empty tree. *)
3030+3131+ val of_hash : backend:hash Backend.t -> hash -> t
3232+ (** [of_hash ~backend h] creates a tree backed by the store.
3333+ Nothing is loaded until accessed (lazy reads). *)
3434+3535+ val of_concrete : concrete -> t
3636+ (** [of_concrete c] creates a tree from a fully materialized tree. *)
3737+3838+ val shallow : hash -> t
3939+ (** [shallow h] creates a tree with only a hash reference.
4040+ Accessing contents raises an error. *)
4141+4242+ val pruned : hash -> t
4343+ (** [pruned h] creates a pruned tree that raises on dereference.
4444+ Used for GC and export operations. *)
4545+4646+ (** {2 Reads (Lazy)} *)
4747+4848+ val find : t -> path -> string option
4949+ (** [find t path] looks up contents at [path].
5050+ Loads nodes lazily as needed. *)
5151+5252+ val find_tree : t -> path -> t option
5353+ (** [find_tree t path] looks up a subtree at [path]. *)
5454+5555+ val list : t -> path -> (string * [ `Node | `Contents ]) list
5656+ (** [list t path] lists entries at [path]. *)
5757+5858+ val mem : t -> path -> bool
5959+ (** [mem t path] checks if [path] exists. *)
6060+6161+ val mem_tree : t -> path -> bool
6262+ (** [mem_tree t path] checks if a subtree exists at [path]. *)
6363+6464+ (** {2 Writes (Delayed)} *)
6565+6666+ val add : t -> path -> string -> t
6767+ (** [add t path contents] adds contents at [path].
6868+ The write is accumulated, not performed immediately. *)
6969+7070+ val add_tree : t -> path -> t -> t
7171+ (** [add_tree t path subtree] adds a subtree at [path]. *)
7272+7373+ val remove : t -> path -> t
7474+ (** [remove t path] removes the entry at [path]. *)
7575+7676+ (** {2 Materialization} *)
7777+7878+ val to_concrete : t -> concrete
7979+ (** [to_concrete t] fully materializes the tree.
8080+ Forces all lazy nodes to be loaded. *)
8181+8282+ val hash : t -> backend:hash Backend.t -> hash
8383+ (** [hash t ~backend] computes the tree hash.
8484+ Writes all accumulated changes to the backend. *)
8585+8686+ (** {2 Force Control} *)
8787+8888+ type 'a force = [ `True | `False of hash -> 'a | `Shallow of hash -> 'a ]
8989+ (** Control how lazy nodes are handled during traversal:
9090+ - [`True]: force loading of all nodes
9191+ - [`False f]: call [f] on unloaded hashes instead
9292+ - [`Shallow f]: like [`False] but for shallow trees *)
9393+9494+ val fold :
9595+ ?force:'a force ->
9696+ t ->
9797+ 'a ->
9898+ (path -> [ `Contents of string | `Tree ] -> 'a -> 'a) ->
9999+ 'a
100100+ (** [fold ~force t init f] traverses the tree.
101101+ The [force] parameter controls lazy loading behavior. *)
102102+103103+ (** {2 Cache Management} *)
104104+105105+ val clear : ?depth:int -> t -> unit
106106+ (** [clear ?depth t] purges cached data.
107107+ If [depth] is given, only clears nodes at that depth or deeper. *)
108108+109109+ (** {2 Comparison} *)
110110+111111+ val equal : t -> t -> bool
112112+ (** [equal t1 t2] compares trees structurally. *)
113113+end
114114+115115+(** {1 Pre-instantiated Trees} *)
116116+117117+module Git : module type of Make (Tree_format.Git)
118118+(** Git-format trees with SHA-1 hashes. *)
119119+120120+module Mst : module type of Make (Tree_format.Mst)
121121+(** MST-format trees with SHA-256 hashes. *)
+166
lib/tree_format.ml
···11+module type S = sig
22+ type node
33+ type hash
44+55+ val hash_node : node -> hash
66+ val hash_contents : string -> hash
77+ val node_of_bytes : string -> (node, [> `Msg of string ]) result
88+ val bytes_of_node : node -> string
99+ val empty_node : node
1010+ val find : node -> string -> [ `Node of hash | `Contents of hash ] option
1111+ val add : node -> string -> [ `Node of hash | `Contents of hash ] -> node
1212+ val remove : node -> string -> node
1313+ val list : node -> (string * [ `Node of hash | `Contents of hash ]) list
1414+ val is_empty : node -> bool
1515+end
1616+1717+module type SHA1 = S with type hash = Hash.sha1
1818+module type SHA256 = S with type hash = Hash.sha256
1919+2020+(** Git tree object format using ocaml-git. *)
2121+module Git : SHA1 = struct
2222+ type hash = Hash.sha1
2323+ type node = Git.Tree.t
2424+2525+ (* Convert between irmin Hash.sha1 and Git.Hash.t *)
2626+ let git_hash_of_sha1 (h : hash) : Git.Hash.t =
2727+ Git.Hash.of_raw_string (Hash.to_bytes h)
2828+2929+ let sha1_of_git_hash (h : Git.Hash.t) : hash =
3030+ Hash.sha1_of_bytes (Git.Hash.to_raw_string h)
3131+3232+ let empty_node = Git.Tree.empty
3333+ let is_empty = Git.Tree.is_empty
3434+3535+ let find node name =
3636+ match Git.Tree.find ~name node with
3737+ | None -> None
3838+ | Some entry ->
3939+ let h = sha1_of_git_hash entry.hash in
4040+ (match entry.perm with
4141+ | `Dir -> Some (`Node h)
4242+ | _ -> Some (`Contents h))
4343+4444+ let add node name kind =
4545+ let perm, hash =
4646+ match kind with
4747+ | `Node h -> (`Dir, git_hash_of_sha1 h)
4848+ | `Contents h -> (`Normal, git_hash_of_sha1 h)
4949+ in
5050+ let entry = Git.Tree.entry ~perm ~name hash in
5151+ Git.Tree.add entry node
5252+5353+ let remove node name = Git.Tree.remove ~name node
5454+5555+ let list node =
5656+ Git.Tree.to_list node
5757+ |> List.map (fun (entry : Git.Tree.entry) ->
5858+ let h = sha1_of_git_hash entry.hash in
5959+ let kind =
6060+ match entry.perm with `Dir -> `Node h | _ -> `Contents h
6161+ in
6262+ (entry.name, kind))
6363+6464+ let bytes_of_node = Git.Tree.to_string
6565+ let node_of_bytes = Git.Tree.of_string
6666+ let hash_node node = sha1_of_git_hash (Git.Tree.digest node)
6767+6868+ let hash_contents data =
6969+ sha1_of_git_hash (Git.Hash.digest_string ~kind:`Blob data)
7070+end
7171+7272+(** ATProto Merkle Search Tree format using ocaml-atp.
7373+7474+ MST uses SHA-256 with 2-bit prefix counting for tree depth.
7575+ Keys are stored sorted with common prefix compression.
7676+ Encoded as DAG-CBOR. *)
7777+module Mst : SHA256 = struct
7878+ type hash = Hash.sha256
7979+8080+ (* Convert between irmin Hash.sha256 and Atp.Cid.t *)
8181+ let cid_of_sha256 (h : hash) : Atp.Cid.t =
8282+ Atp.Cid.of_digest `Dag_cbor (Hash.to_bytes h)
8383+8484+ let sha256_of_cid (cid : Atp.Cid.t) : hash =
8585+ Hash.sha256_of_bytes (Atp.Cid.digest cid)
8686+8787+ (* Our node wraps Atp.Mst.Raw.node for serialization *)
8888+ type node = Atp.Mst.Raw.node
8989+9090+ let empty_node : node = { l = None; e = [] }
9191+9292+ let is_empty (node : node) = node.l = None && node.e = []
9393+9494+ (* Decompress key from entry list *)
9595+ let decompress_keys (entries : Atp.Mst.Raw.entry list) : (string * Atp.Mst.Raw.entry) list =
9696+ let rec loop prev_key acc = function
9797+ | [] -> List.rev acc
9898+ | (e : Atp.Mst.Raw.entry) :: rest ->
9999+ let key = String.sub prev_key 0 e.p ^ e.k in
100100+ loop key ((key, e) :: acc) rest
101101+ in
102102+ loop "" [] entries
103103+104104+ let find (node : node) name =
105105+ let entries = decompress_keys node.e in
106106+ match List.find_opt (fun (k, _) -> k = name) entries with
107107+ | None -> None
108108+ | Some (_, e) ->
109109+ (* In MST, all values are content CIDs, subtrees are in 't' field *)
110110+ Some (`Contents (sha256_of_cid e.v))
111111+112112+ (* Compress keys for serialization *)
113113+ let compress_keys entries =
114114+ let sorted = List.sort (fun (k1, _) (k2, _) -> String.compare k1 k2) entries in
115115+ let rec loop prev_key acc = function
116116+ | [] -> List.rev acc
117117+ | (key, (v, t)) :: rest ->
118118+ let p =
119119+ let rec shared i =
120120+ if i >= String.length prev_key || i >= String.length key then i
121121+ else if prev_key.[i] = key.[i] then shared (i + 1)
122122+ else i
123123+ in
124124+ shared 0
125125+ in
126126+ let k = String.sub key p (String.length key - p) in
127127+ let entry : Atp.Mst.Raw.entry = { p; k; v; t } in
128128+ loop key (entry :: acc) rest
129129+ in
130130+ loop "" [] sorted
131131+132132+ let add (node : node) name kind =
133133+ let entries = decompress_keys node.e in
134134+ let v, t =
135135+ match kind with
136136+ | `Contents h -> (cid_of_sha256 h, None)
137137+ | `Node h -> (cid_of_sha256 h, None) (* TODO: Handle subtree pointers *)
138138+ in
139139+ let entries = List.filter (fun (k, _) -> k <> name) entries in
140140+ let entries = (name, (v, None)) :: List.map (fun (k, e) -> (k, (e.v, e.t))) entries in
141141+ let compressed = compress_keys entries in
142142+ { node with e = compressed }
143143+144144+ let remove (node : node) name =
145145+ let entries = decompress_keys node.e in
146146+ let entries = List.filter (fun (k, _) -> k <> name) entries in
147147+ let entries = List.map (fun (k, e) -> (k, (e.v, e.t))) entries in
148148+ let compressed = compress_keys entries in
149149+ { node with e = compressed }
150150+151151+ let list (node : node) =
152152+ let entries = decompress_keys node.e in
153153+ List.map (fun (key, e) -> (key, `Contents (sha256_of_cid e.v))) entries
154154+155155+ let bytes_of_node node = Atp.Mst.Raw.encode_bytes node
156156+157157+ let node_of_bytes data =
158158+ try Ok (Atp.Mst.Raw.decode_bytes data)
159159+ with _ -> Error (`Msg "failed to decode MST node")
160160+161161+ let hash_node node =
162162+ let data = bytes_of_node node in
163163+ Hash.sha256 data
164164+165165+ let hash_contents data = Hash.sha256 data
166166+end
+63
lib/tree_format.mli
···11+(** Tree format abstraction - the ONE functor in Irmin.
22+33+ This module type defines how tree nodes are encoded and decoded.
44+ Different formats (Git trees, ATProto MST) implement this interface.
55+ This is the only functor-based abstraction point in the library. *)
66+77+(** {1 Module Type} *)
88+99+module type S = sig
1010+ (** The tree format signature. *)
1111+1212+ type node
1313+ (** The internal node representation. *)
1414+1515+ type hash
1616+ (** The hash type used by this format. *)
1717+1818+ val hash_node : node -> hash
1919+ (** [hash_node n] computes the hash of [n]. *)
2020+2121+ val hash_contents : string -> hash
2222+ (** [hash_contents data] computes the hash of content [data]. *)
2323+2424+ val node_of_bytes : string -> (node, [> `Msg of string ]) result
2525+ (** [node_of_bytes data] deserializes a node from bytes. *)
2626+2727+ val bytes_of_node : node -> string
2828+ (** [bytes_of_node n] serializes a node to bytes. *)
2929+3030+ val empty_node : node
3131+ (** The empty node with no entries. *)
3232+3333+ val find : node -> string -> [ `Node of hash | `Contents of hash ] option
3434+ (** [find node name] looks up an entry by name. *)
3535+3636+ val add : node -> string -> [ `Node of hash | `Contents of hash ] -> node
3737+ (** [add node name entry] adds or replaces an entry. *)
3838+3939+ val remove : node -> string -> node
4040+ (** [remove node name] removes an entry. *)
4141+4242+ val list : node -> (string * [ `Node of hash | `Contents of hash ]) list
4343+ (** [list node] returns all entries sorted by name. *)
4444+4545+ val is_empty : node -> bool
4646+ (** [is_empty node] returns true if the node has no entries. *)
4747+end
4848+4949+(** {1 Hash-Specific Signatures} *)
5050+5151+module type SHA1 = S with type hash = Hash.sha1
5252+(** Tree format using SHA-1 (Git compatible). *)
5353+5454+module type SHA256 = S with type hash = Hash.sha256
5555+(** Tree format using SHA-256 (ATProto compatible). *)
5656+5757+(** {1 Built-in Formats} *)
5858+5959+module Git : SHA1
6060+(** Git tree object format. Bidirectional compatibility with Git. *)
6161+6262+module Mst : SHA256
6363+(** ATProto Merkle Search Tree format. *)