Persistent store with Git semantics: lazy reads, delayed writes, content-addressing
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge commit '0e37eb7e5abc5de978e806bbf8bbb6493cd1300b' as 'irmin'

+2429
+3
.gitignore
··· 1 + _build/ 2 + *.install 3 + .merlin
+30
dune-project
··· 1 + (lang dune 3.17) 2 + (name irmin) 3 + (version 2.0.0~dev) 4 + 5 + (generate_opam_files true) 6 + 7 + (license ISC) 8 + (authors "Thomas Gazagnaire <thomas@gazagnaire.org>") 9 + (maintainers "Thomas Gazagnaire <thomas@gazagnaire.org>") 10 + (source 11 + (uri https://tangled.org/gazagnaire.org/irmin)) 12 + 13 + (package 14 + (name irmin) 15 + (synopsis "Content-addressable store with Git and ATProto MST support") 16 + (description 17 + "Irmin is a library for building content-addressable stores with lazy reads, 18 + delayed writes, and multiple tree formats (Git, ATProto MST). It provides 19 + bidirectional Git compatibility and first-class subtree operations.") 20 + (depends 21 + (ocaml (>= 5.2)) 22 + (dune (>= 3.17)) 23 + (eio (>= 1.2)) 24 + (digestif (>= 1.2)) 25 + (fmt (>= 0.9)) 26 + (logs (>= 0.7)) 27 + (git (>= 0.1)) 28 + (atp (>= 0.1)) 29 + (alcotest :with-test) 30 + (crowbar :with-test))))
+37
irmin.opam
··· 1 + # This file is generated by dune, edit dune-project instead 2 + opam-version: "2.0" 3 + version: "2.0.0~dev" 4 + synopsis: "Content-addressable store with Git and ATProto MST support" 5 + description: """ 6 + Irmin is a library for building content-addressable stores with lazy reads, 7 + delayed writes, and multiple tree formats (Git, ATProto MST). It provides 8 + bidirectional Git compatibility and first-class subtree operations.""" 9 + maintainer: ["Thomas Gazagnaire <thomas@gazagnaire.org>"] 10 + authors: ["Thomas Gazagnaire <thomas@gazagnaire.org>"] 11 + license: "ISC" 12 + depends: [ 13 + "ocaml" {>= "5.2"} 14 + "dune" {>= "3.17" & >= "3.17"} 15 + "eio" {>= "1.2"} 16 + "digestif" {>= "1.2"} 17 + "fmt" {>= "0.9"} 18 + "logs" {>= "0.7"} 19 + "alcotest" {with-test} 20 + "crowbar" {with-test} 21 + "odoc" {with-doc} 22 + ] 23 + build: [ 24 + ["dune" "subst"] {dev} 25 + [ 26 + "dune" 27 + "build" 28 + "-p" 29 + name 30 + "-j" 31 + jobs 32 + "@install" 33 + "@runtest" {with-test} 34 + "@doc" {with-doc} 35 + ] 36 + ] 37 + dev-repo: "https://tangled.org/gazagnaire.org/irmin"
+171
lib/backend.ml
··· 1 + type 'hash t = { 2 + read : 'hash -> string option; 3 + write : string -> 'hash; 4 + exists : 'hash -> bool; 5 + get_ref : string -> 'hash option; 6 + set_ref : string -> 'hash -> unit; 7 + test_and_set_ref : string -> test:'hash option -> set:'hash option -> bool; 8 + list_refs : unit -> string list; 9 + write_batch : string list -> 'hash list; 10 + flush : unit -> unit; 11 + close : unit -> unit; 12 + } 13 + 14 + type stats = { 15 + reads : int; 16 + writes : int; 17 + cache_hits : int; 18 + cache_misses : int; 19 + } 20 + 21 + module Memory = struct 22 + module StringMap = Map.Make (String) 23 + 24 + type 'hash state = { 25 + mutable objects : string StringMap.t; 26 + mutable refs : 'hash StringMap.t; 27 + hash_fn : string -> 'hash; 28 + to_hex : 'hash -> string; 29 + equal : 'hash -> 'hash -> bool; 30 + } 31 + 32 + let create_with_hash (type h) (hash_fn : string -> h) (to_hex : h -> string) 33 + (equal : h -> h -> bool) : h t = 34 + let state = 35 + { objects = StringMap.empty; refs = StringMap.empty; hash_fn; to_hex; equal } 36 + in 37 + { 38 + read = 39 + (fun h -> 40 + let key = state.to_hex h in 41 + StringMap.find_opt key state.objects); 42 + write = 43 + (fun data -> 44 + let h = state.hash_fn data in 45 + let key = state.to_hex h in 46 + state.objects <- StringMap.add key data state.objects; 47 + h); 48 + exists = 49 + (fun h -> 50 + let key = state.to_hex h in 51 + StringMap.mem key state.objects); 52 + get_ref = (fun name -> StringMap.find_opt name state.refs); 53 + set_ref = 54 + (fun name hash -> state.refs <- StringMap.add name hash state.refs); 55 + test_and_set_ref = 56 + (fun name ~test ~set -> 57 + let current = StringMap.find_opt name state.refs in 58 + let matches = 59 + match (test, current) with 60 + | None, None -> true 61 + | Some t, Some c -> state.equal t c 62 + | _ -> false 63 + in 64 + if matches then ( 65 + (match set with 66 + | None -> state.refs <- StringMap.remove name state.refs 67 + | Some h -> state.refs <- StringMap.add name h state.refs); 68 + true) 69 + else false); 70 + list_refs = (fun () -> StringMap.bindings state.refs |> List.map fst); 71 + write_batch = 72 + (fun objects -> 73 + List.map 74 + (fun data -> 75 + let h = state.hash_fn data in 76 + let key = state.to_hex h in 77 + state.objects <- StringMap.add key data state.objects; 78 + h) 79 + objects); 80 + flush = (fun () -> ()); 81 + close = (fun () -> ()); 82 + } 83 + 84 + let create_sha1 () = create_with_hash Hash.sha1 Hash.to_hex Hash.equal 85 + let create_sha256 () = create_with_hash Hash.sha256 Hash.to_hex Hash.equal 86 + end 87 + 88 + (* Simple LRU cache *) 89 + module Lru = struct 90 + type ('k, 'v) t = { 91 + capacity : int; 92 + mutable items : ('k * 'v) list; 93 + } 94 + 95 + let create capacity = { capacity; items = [] } 96 + 97 + let find t key = 98 + match List.assoc_opt key t.items with 99 + | Some v -> 100 + (* Move to front *) 101 + t.items <- (key, v) :: List.remove_assoc key t.items; 102 + Some v 103 + | None -> None 104 + 105 + let add t key value = 106 + t.items <- (key, value) :: List.remove_assoc key t.items; 107 + if List.length t.items > t.capacity then 108 + t.items <- List.rev (List.tl (List.rev t.items)) 109 + end 110 + 111 + let cached (type h) (backend : h t) : h t = 112 + let module H = struct 113 + type t = h 114 + 115 + let hash = Hashtbl.hash 116 + let equal a b = backend.read a = backend.read b 117 + end in 118 + let cache : (h, string) Lru.t = Lru.create 1000 in 119 + { 120 + backend with 121 + read = 122 + (fun h -> 123 + match Lru.find cache h with 124 + | Some v -> Some v 125 + | None -> 126 + let result = backend.read h in 127 + Option.iter (fun v -> Lru.add cache h v) result; 128 + result); 129 + } 130 + 131 + let readonly (backend : 'h t) : 'h t = 132 + let fail () = invalid_arg "Backend is read-only" in 133 + { 134 + backend with 135 + write = (fun _ -> fail ()); 136 + set_ref = (fun _ _ -> fail ()); 137 + test_and_set_ref = (fun _ ~test:_ ~set:_ -> fail ()); 138 + write_batch = (fun _ -> fail ()); 139 + } 140 + 141 + let layered ~(upper : 'h t) ~(lower : 'h t) : 'h t = 142 + { 143 + read = 144 + (fun h -> 145 + match upper.read h with Some v -> Some v | None -> lower.read h); 146 + write = upper.write; 147 + exists = (fun h -> upper.exists h || lower.exists h); 148 + get_ref = 149 + (fun name -> 150 + match upper.get_ref name with 151 + | Some v -> Some v 152 + | None -> lower.get_ref name); 153 + set_ref = upper.set_ref; 154 + test_and_set_ref = upper.test_and_set_ref; 155 + list_refs = 156 + (fun () -> 157 + let upper_refs = upper.list_refs () in 158 + let lower_refs = lower.list_refs () in 159 + List.sort_uniq String.compare (upper_refs @ lower_refs)); 160 + write_batch = upper.write_batch; 161 + flush = 162 + (fun () -> 163 + upper.flush (); 164 + lower.flush ()); 165 + close = 166 + (fun () -> 167 + upper.close (); 168 + lower.close ()); 169 + } 170 + 171 + let stats _ = None
+68
lib/backend.mli
··· 1 + (** Storage backends for Irmin. 2 + 3 + Backends are records of functions, NOT functors. This makes them 4 + composable and easy to create without functor application. *) 5 + 6 + (** {1 Backend Interface} *) 7 + 8 + type 'hash t = { 9 + read : 'hash -> string option; 10 + (** [read hash] retrieves the object with the given hash. *) 11 + write : string -> 'hash; 12 + (** [write data] stores data and returns its hash. *) 13 + exists : 'hash -> bool; 14 + (** [exists hash] checks if an object exists. *) 15 + get_ref : string -> 'hash option; 16 + (** [get_ref name] reads a reference (branch/tag). *) 17 + set_ref : string -> 'hash -> unit; 18 + (** [set_ref name hash] sets a reference. *) 19 + test_and_set_ref : string -> test:'hash option -> set:'hash option -> bool; 20 + (** [test_and_set_ref name ~test ~set] atomically updates a reference 21 + if its current value matches [test]. *) 22 + list_refs : unit -> string list; 23 + (** [list_refs ()] returns all reference names. *) 24 + write_batch : string list -> 'hash list; 25 + (** [write_batch objects] writes multiple objects efficiently. *) 26 + flush : unit -> unit; 27 + (** [flush ()] ensures all writes are persisted. *) 28 + close : unit -> unit; 29 + (** [close ()] releases resources. *) 30 + } 31 + 32 + (** {1 Memory Backend} *) 33 + 34 + module Memory : sig 35 + val create_sha1 : unit -> Hash.sha1 t 36 + (** Create an in-memory SHA-1 backend. *) 37 + 38 + val create_sha256 : unit -> Hash.sha256 t 39 + (** Create an in-memory SHA-256 backend. *) 40 + end 41 + 42 + (** {1 Backend Combinators} *) 43 + 44 + val cached : 'h t -> 'h t 45 + (** [cached backend] wraps a backend with an LRU cache. 46 + Reads are served from cache when possible. *) 47 + 48 + val readonly : 'h t -> 'h t 49 + (** [readonly backend] makes a backend read-only. 50 + Write operations raise [Invalid_argument]. *) 51 + 52 + val layered : upper:'h t -> lower:'h t -> 'h t 53 + (** [layered ~upper ~lower] creates a layered backend. 54 + Reads check upper first, then lower. 55 + Writes go to upper only. 56 + Used for garbage collection (upper=live, lower=frozen). *) 57 + 58 + (** {1 Statistics} *) 59 + 60 + type stats = { 61 + reads : int; 62 + writes : int; 63 + cache_hits : int; 64 + cache_misses : int; 65 + } 66 + 67 + val stats : _ t -> stats option 68 + (** [stats backend] returns statistics if the backend tracks them. *)
+111
lib/commit.ml
··· 1 + module Make (F : Tree_format.S) = struct 2 + type hash = F.hash 3 + 4 + type t = { 5 + tree : hash; 6 + parents : hash list; 7 + author : string; 8 + committer : string; 9 + message : string; 10 + timestamp : int64; 11 + } 12 + 13 + let tree c = c.tree 14 + let parents c = c.parents 15 + let author c = c.author 16 + let committer c = c.committer 17 + let message c = c.message 18 + let timestamp c = c.timestamp 19 + 20 + let v ~tree ~parents ~author ?(committer = author) 21 + ?(timestamp = Int64.of_float (Unix.gettimeofday ())) ~message () = 22 + { tree; parents; author; committer; message; timestamp } 23 + 24 + (* Git commit format: 25 + tree <sha1> 26 + parent <sha1> (zero or more) 27 + author <name> <email> <timestamp> <tz> 28 + committer <name> <email> <timestamp> <tz> 29 + 30 + <message> *) 31 + let to_bytes c = 32 + let buf = Buffer.create 256 in 33 + Buffer.add_string buf "tree "; 34 + Buffer.add_string buf (Hash.to_hex c.tree); 35 + Buffer.add_char buf '\n'; 36 + List.iter 37 + (fun parent -> 38 + Buffer.add_string buf "parent "; 39 + Buffer.add_string buf (Hash.to_hex parent); 40 + Buffer.add_char buf '\n') 41 + c.parents; 42 + Buffer.add_string buf "author "; 43 + Buffer.add_string buf c.author; 44 + Buffer.add_string buf " "; 45 + Buffer.add_string buf (Int64.to_string c.timestamp); 46 + Buffer.add_string buf " +0000\n"; 47 + Buffer.add_string buf "committer "; 48 + Buffer.add_string buf c.committer; 49 + Buffer.add_string buf " "; 50 + Buffer.add_string buf (Int64.to_string c.timestamp); 51 + Buffer.add_string buf " +0000\n"; 52 + Buffer.add_char buf '\n'; 53 + Buffer.add_string buf c.message; 54 + Buffer.contents buf 55 + 56 + let hash c = 57 + let data = to_bytes c in 58 + let header = Printf.sprintf "commit %d\x00" (String.length data) in 59 + F.hash_contents (header ^ data) 60 + 61 + let parse_hex_hash s = 62 + (* This is simplified - real implementation would use proper parsing *) 63 + match Hash.sha1_of_hex s with 64 + | Ok h -> Ok (Obj.magic h : hash) 65 + | Error e -> Error e 66 + 67 + let of_bytes data = 68 + (* Simplified parser - real implementation would be more robust *) 69 + let lines = String.split_on_char '\n' data in 70 + let rec parse_headers lines tree parents author committer = 71 + match lines with 72 + | [] -> Error (`Msg "unexpected end of commit") 73 + | "" :: rest -> 74 + (* Empty line marks start of message *) 75 + let message = String.concat "\n" rest in 76 + (match tree with 77 + | None -> Error (`Msg "missing tree") 78 + | Some tree -> 79 + Ok 80 + { 81 + tree; 82 + parents = List.rev parents; 83 + author = Option.value author ~default:"unknown"; 84 + committer = Option.value committer ~default:"unknown"; 85 + message; 86 + timestamp = 0L; 87 + }) 88 + | line :: rest -> 89 + if String.length line >= 5 && String.sub line 0 5 = "tree " then 90 + let hex = String.sub line 5 (String.length line - 5) in 91 + match parse_hex_hash hex with 92 + | Ok h -> parse_headers rest (Some h) parents author committer 93 + | Error _ as e -> e 94 + else if String.length line >= 7 && String.sub line 0 7 = "parent " then 95 + let hex = String.sub line 7 (String.length line - 7) in 96 + match parse_hex_hash hex with 97 + | Ok h -> parse_headers rest tree (h :: parents) author committer 98 + | Error _ as e -> e 99 + else if String.length line >= 7 && String.sub line 0 7 = "author " then 100 + let author_str = String.sub line 7 (String.length line - 7) in 101 + parse_headers rest tree parents (Some author_str) committer 102 + else if String.length line >= 10 && String.sub line 0 10 = "committer " then 103 + let committer_str = String.sub line 10 (String.length line - 10) in 104 + parse_headers rest tree parents author (Some committer_str) 105 + else parse_headers rest tree parents author committer 106 + in 107 + parse_headers lines None [] None None 108 + end 109 + 110 + module Git = Make (Tree_format.Git) 111 + module Mst = Make (Tree_format.Mst)
+65
lib/commit.mli
··· 1 + (** Commit objects. 2 + 3 + Commits point to a tree and have metadata like author, message, timestamp. *) 4 + 5 + (** {1 Commit Functor} *) 6 + 7 + module Make (F : Tree_format.S) : sig 8 + type t 9 + (** A commit object. *) 10 + 11 + type hash = F.hash 12 + 13 + (** {2 Commit Fields} *) 14 + 15 + val tree : t -> hash 16 + (** [tree c] returns the root tree hash. *) 17 + 18 + val parents : t -> hash list 19 + (** [parents c] returns parent commit hashes. *) 20 + 21 + val author : t -> string 22 + (** [author c] returns the author string. *) 23 + 24 + val committer : t -> string 25 + (** [committer c] returns the committer string. *) 26 + 27 + val message : t -> string 28 + (** [message c] returns the commit message. *) 29 + 30 + val timestamp : t -> int64 31 + (** [timestamp c] returns the commit timestamp (Unix epoch). *) 32 + 33 + (** {2 Construction} *) 34 + 35 + val v : 36 + tree:hash -> 37 + parents:hash list -> 38 + author:string -> 39 + ?committer:string -> 40 + ?timestamp:int64 -> 41 + message:string -> 42 + unit -> 43 + t 44 + (** [v ~tree ~parents ~author ?committer ?timestamp ~message ()] 45 + creates a new commit. *) 46 + 47 + (** {2 Serialization} *) 48 + 49 + val hash : t -> hash 50 + (** [hash c] computes the commit hash. *) 51 + 52 + val of_bytes : string -> (t, [> `Msg of string ]) result 53 + (** [of_bytes data] deserializes a commit. *) 54 + 55 + val to_bytes : t -> string 56 + (** [to_bytes c] serializes a commit. *) 57 + end 58 + 59 + (** {1 Pre-instantiated Commits} *) 60 + 61 + module Git : module type of Make (Tree_format.Git) 62 + (** Git-format commits with SHA-1 hashes. *) 63 + 64 + module Mst : module type of Make (Tree_format.Mst) 65 + (** MST-format commits with SHA-256 hashes. *)
+4
lib/dune
··· 1 + (library 2 + (name irmin) 3 + (public_name irmin) 4 + (libraries eio digestif fmt logs git atp))
+238
lib/git_interop.ml
··· 1 + (** Git interoperability using ocaml-git. 2 + 3 + Provides bidirectional support for reading and writing Git repositories. *) 4 + 5 + (* Convert between irmin Hash.sha1 and Git.Hash.t *) 6 + let git_hash_of_sha1 (h : Hash.sha1) : Git.Hash.t = 7 + Git.Hash.of_raw_string (Hash.to_bytes h) 8 + 9 + let sha1_of_git_hash (h : Git.Hash.t) : Hash.sha1 = 10 + Hash.sha1_of_bytes (Git.Hash.to_raw_string h) 11 + 12 + (* Loose object path: .git/objects/ab/cdef... *) 13 + let loose_object_path git_dir hash = 14 + let hex = Git.Hash.to_hex hash in 15 + let dir = String.sub hex 0 2 in 16 + let file = String.sub hex 2 (String.length hex - 2) in 17 + Filename.concat git_dir (Filename.concat "objects" (Filename.concat dir file)) 18 + 19 + (* Read loose object - returns raw content without header *) 20 + let read_loose_object ~fs git_dir hash = 21 + let path = loose_object_path git_dir hash in 22 + let full_path = Eio.Path.(fs / path) in 23 + try 24 + let data = Eio.Path.load full_path in 25 + (* TODO: Add zlib decompression *) 26 + Git.Value.of_string_with_header data 27 + with _ -> Error (`Msg "object not found") 28 + 29 + (* Write loose object *) 30 + let write_loose_object ~fs git_dir (value : Git.Value.t) = 31 + let hash = Git.Value.digest value in 32 + let path = loose_object_path git_dir hash in 33 + let full_path = Eio.Path.(fs / path) in 34 + 35 + (* Create directory if needed *) 36 + let dir = Filename.dirname path in 37 + let dir_path = Eio.Path.(fs / dir) in 38 + (try Eio.Path.mkdir ~perm:0o755 dir_path with _ -> ()); 39 + 40 + (* Write object with header *) 41 + let data = Git.Value.to_string value in 42 + (* TODO: Add zlib compression *) 43 + Eio.Path.save ~create:(`Or_truncate 0o444) full_path data; 44 + hash 45 + 46 + (* Read reference *) 47 + let read_ref ~fs ~git_dir name = 48 + let path = Filename.concat git_dir name in 49 + let full_path = Eio.Path.(fs / path) in 50 + try 51 + let content = Eio.Path.load full_path in 52 + let content = String.trim content in 53 + (* Check for symbolic ref *) 54 + if String.length content > 5 && String.sub content 0 5 = "ref: " then 55 + let target = String.sub content 5 (String.length content - 5) in 56 + let target_path = Eio.Path.(fs / Filename.concat git_dir target) in 57 + try 58 + let target_content = Eio.Path.load target_path in 59 + Some (Git.Hash.of_hex (String.trim target_content)) 60 + with _ -> None 61 + else Some (Git.Hash.of_hex content) 62 + with _ -> None 63 + 64 + (* Write reference *) 65 + let write_ref ~fs ~git_dir name hash = 66 + let path = Filename.concat git_dir name in 67 + let full_path = Eio.Path.(fs / path) in 68 + let dir = Filename.dirname path in 69 + let dir_path = Eio.Path.(fs / dir) in 70 + (try Eio.Path.mkdir ~perm:0o755 dir_path with _ -> ()); 71 + let content = Git.Hash.to_hex hash ^ "\n" in 72 + Eio.Path.save ~create:(`Or_truncate 0o644) full_path content 73 + 74 + (* List references *) 75 + let list_refs ~fs ~git_dir = 76 + let refs_dir = Filename.concat git_dir "refs" in 77 + let refs_path = Eio.Path.(fs / refs_dir) in 78 + let rec collect_refs path prefix acc = 79 + try 80 + let entries = Eio.Path.read_dir path in 81 + List.fold_left 82 + (fun acc entry -> 83 + let entry_path = Eio.Path.(path / entry) in 84 + let ref_name = if prefix = "" then entry else prefix ^ "/" ^ entry in 85 + if Eio.Path.is_directory entry_path then 86 + collect_refs entry_path ref_name acc 87 + else ("refs/" ^ ref_name) :: acc) 88 + acc entries 89 + with _ -> acc 90 + in 91 + collect_refs refs_path "" [] 92 + 93 + (* Create Git backend using ocaml-git types *) 94 + let git_backend ~fs ~git_dir : Hash.sha1 Backend.t = 95 + { 96 + read = 97 + (fun hash -> 98 + let git_hash = git_hash_of_sha1 hash in 99 + match read_loose_object ~fs git_dir git_hash with 100 + | Ok value -> Some (Git.Value.to_string_without_header value) 101 + | Error _ -> None); 102 + write = 103 + (fun data -> 104 + (* Default to blob for raw data writes *) 105 + let blob = Git.Blob.of_string data in 106 + let value = Git.Value.blob blob in 107 + let git_hash = write_loose_object ~fs git_dir value in 108 + sha1_of_git_hash git_hash); 109 + exists = 110 + (fun hash -> 111 + let git_hash = git_hash_of_sha1 hash in 112 + let path = loose_object_path git_dir git_hash in 113 + let full_path = Eio.Path.(fs / path) in 114 + Eio.Path.is_file full_path); 115 + get_ref = 116 + (fun name -> 117 + Option.map sha1_of_git_hash (read_ref ~fs ~git_dir name)); 118 + set_ref = 119 + (fun name hash -> 120 + write_ref ~fs ~git_dir name (git_hash_of_sha1 hash)); 121 + test_and_set_ref = 122 + (fun name ~test ~set -> 123 + let current = read_ref ~fs ~git_dir name in 124 + let matches = 125 + match (test, current) with 126 + | None, None -> true 127 + | Some t, Some c -> Git.Hash.equal (git_hash_of_sha1 t) c 128 + | _ -> false 129 + in 130 + if matches then ( 131 + (match set with 132 + | None -> 133 + let path = Filename.concat git_dir name in 134 + let full_path = Eio.Path.(fs / path) in 135 + (try Eio.Path.unlink full_path with _ -> ()) 136 + | Some h -> write_ref ~fs ~git_dir name (git_hash_of_sha1 h)); 137 + true) 138 + else false); 139 + list_refs = (fun () -> list_refs ~fs ~git_dir); 140 + write_batch = 141 + (fun objects -> 142 + List.map 143 + (fun data -> 144 + let blob = Git.Blob.of_string data in 145 + let value = Git.Value.blob blob in 146 + let git_hash = write_loose_object ~fs git_dir value in 147 + sha1_of_git_hash git_hash) 148 + objects); 149 + flush = (fun () -> ()); 150 + close = (fun () -> ()); 151 + } 152 + 153 + (** Write a tree value to the git store *) 154 + let write_tree ~fs ~git_dir (tree : Git.Tree.t) = 155 + let value = Git.Value.tree tree in 156 + write_loose_object ~fs git_dir value 157 + 158 + (** Write a commit value to the git store *) 159 + let write_commit ~fs ~git_dir (commit : Git.Commit.t) = 160 + let value = Git.Value.commit commit in 161 + write_loose_object ~fs git_dir value 162 + 163 + (** Read a tree from the git store *) 164 + let read_tree ~fs ~git_dir hash = 165 + match read_loose_object ~fs git_dir hash with 166 + | Ok (Git.Value.Tree t) -> Some t 167 + | _ -> None 168 + 169 + (** Read a commit from the git store *) 170 + let read_commit ~fs ~git_dir hash = 171 + match read_loose_object ~fs git_dir hash with 172 + | Ok (Git.Value.Commit c) -> Some c 173 + | _ -> None 174 + 175 + (* Public API *) 176 + 177 + let import_git ~sw:_ ~fs ~git_dir = 178 + let backend = git_backend ~fs ~git_dir in 179 + Store.Git.create ~backend 180 + 181 + let init_git ~sw:_ ~fs ~path = 182 + let git_dir = Filename.concat path ".git" in 183 + let git_path = Eio.Path.(fs / git_dir) in 184 + 185 + (* Create .git structure *) 186 + Eio.Path.mkdir ~perm:0o755 git_path; 187 + Eio.Path.mkdir ~perm:0o755 Eio.Path.(git_path / "objects"); 188 + Eio.Path.mkdir ~perm:0o755 Eio.Path.(git_path / "refs"); 189 + Eio.Path.mkdir ~perm:0o755 Eio.Path.(git_path / "refs" / "heads"); 190 + 191 + (* Write HEAD *) 192 + Eio.Path.save ~create:(`Or_truncate 0o644) 193 + Eio.Path.(git_path / "HEAD") 194 + "ref: refs/heads/main\n"; 195 + 196 + import_git ~sw:() ~fs ~git_dir 197 + 198 + let read_object ~sw:_ ~fs ~git_dir hash = 199 + let git_hash = git_hash_of_sha1 hash in 200 + match read_loose_object ~fs git_dir git_hash with 201 + | Ok value -> 202 + let kind = 203 + match Git.Value.kind value with 204 + | `Blob -> "blob" 205 + | `Tree -> "tree" 206 + | `Commit -> "commit" 207 + | `Tag -> "tag" 208 + in 209 + Ok (kind, Git.Value.to_string_without_header value) 210 + | Error _ as e -> e 211 + 212 + let write_object ~sw:_ ~fs ~git_dir ~typ data = 213 + let value = 214 + match typ with 215 + | "blob" -> Git.Value.blob (Git.Blob.of_string data) 216 + | "tree" -> Git.Value.tree (Git.Tree.of_string_exn data) 217 + | "commit" -> Git.Value.commit (Git.Commit.of_string_exn data) 218 + | "tag" -> Git.Value.tag (Git.Tag.of_string_exn data) 219 + | _ -> invalid_arg ("unknown object type: " ^ typ) 220 + in 221 + let git_hash = write_loose_object ~fs git_dir value in 222 + sha1_of_git_hash git_hash 223 + 224 + let read_ref ~sw:_ ~fs ~git_dir name = 225 + Option.map sha1_of_git_hash (read_ref ~fs ~git_dir name) 226 + 227 + let write_ref ~sw:_ ~fs ~git_dir name hash = 228 + write_ref ~fs ~git_dir name (git_hash_of_sha1 hash) 229 + 230 + let list_refs ~sw:_ ~fs ~git_dir = list_refs ~fs ~git_dir 231 + 232 + let read_pack_index ~sw:_ ~fs:_ ~path:_ = 233 + (* TODO: Implement pack index reading *) 234 + [] 235 + 236 + let read_from_pack ~sw:_ ~fs:_ ~pack:_ ~offset:_ = 237 + (* TODO: Implement pack file reading *) 238 + Error (`Msg "pack file reading not yet implemented")
+89
lib/git_interop.mli
··· 1 + (** Git interoperability. 2 + 3 + Bidirectional support for reading and writing Git repositories. 4 + This allows Irmin to work with existing .git directories and 5 + interoperate with the Git ecosystem. *) 6 + 7 + (** {1 Git Repository Operations} *) 8 + 9 + val import_git : 10 + sw:Eio.Switch.t -> 11 + fs:_ Eio.Path.t -> 12 + git_dir:string -> 13 + Store.Git.t 14 + (** [import_git ~sw ~fs ~git_dir] opens a .git directory as an Irmin store. 15 + The store supports both reads and writes - changes are written back 16 + in Git-compatible format. *) 17 + 18 + val init_git : 19 + sw:Eio.Switch.t -> 20 + fs:_ Eio.Path.t -> 21 + path:string -> 22 + Store.Git.t 23 + (** [init_git ~sw ~fs ~path] initializes a new Git repository at [path] 24 + and returns an Irmin store for it. *) 25 + 26 + (** {1 Object Operations} *) 27 + 28 + val read_object : 29 + sw:Eio.Switch.t -> 30 + fs:_ Eio.Path.t -> 31 + git_dir:string -> 32 + Hash.sha1 -> 33 + (string * string, [> `Msg of string ]) result 34 + (** [read_object ~sw ~fs ~git_dir hash] reads a Git object, returning 35 + [(type, data)] where type is "blob", "tree", "commit", or "tag". *) 36 + 37 + val write_object : 38 + sw:Eio.Switch.t -> 39 + fs:_ Eio.Path.t -> 40 + git_dir:string -> 41 + typ:string -> 42 + string -> 43 + Hash.sha1 44 + (** [write_object ~sw ~fs ~git_dir ~typ data] writes a Git object. *) 45 + 46 + (** {1 Reference Operations} *) 47 + 48 + val read_ref : 49 + sw:Eio.Switch.t -> 50 + fs:_ Eio.Path.t -> 51 + git_dir:string -> 52 + string -> 53 + Hash.sha1 option 54 + (** [read_ref ~sw ~fs ~git_dir name] reads a Git reference. *) 55 + 56 + val write_ref : 57 + sw:Eio.Switch.t -> 58 + fs:_ Eio.Path.t -> 59 + git_dir:string -> 60 + string -> 61 + Hash.sha1 -> 62 + unit 63 + (** [write_ref ~sw ~fs ~git_dir name hash] writes a Git reference. *) 64 + 65 + val list_refs : 66 + sw:Eio.Switch.t -> 67 + fs:_ Eio.Path.t -> 68 + git_dir:string -> 69 + string list 70 + (** [list_refs ~sw ~fs ~git_dir] lists all references. *) 71 + 72 + (** {1 Pack File Operations} *) 73 + 74 + val read_pack_index : 75 + sw:Eio.Switch.t -> 76 + fs:_ Eio.Path.t -> 77 + path:string -> 78 + (Hash.sha1 * int64) list 79 + (** [read_pack_index ~sw ~fs ~path] reads a .idx file, returning 80 + [(hash, offset)] pairs. *) 81 + 82 + val read_from_pack : 83 + sw:Eio.Switch.t -> 84 + fs:_ Eio.Path.t -> 85 + pack:string -> 86 + offset:int64 -> 87 + (string * string, [> `Msg of string ]) result 88 + (** [read_from_pack ~sw ~fs ~pack ~offset] reads an object from a pack file 89 + at the given offset. *)
+129
lib/hash.ml
··· 1 + type algorithm = Sha1 | Sha256 2 + 3 + type _ t = 4 + | Sha1_hash : string -> [ `Sha1 ] t 5 + | Sha256_hash : string -> [ `Sha256 ] t 6 + 7 + type sha1 = [ `Sha1 ] t 8 + type sha256 = [ `Sha256 ] t 9 + 10 + let sha1 data = 11 + Sha1_hash (Digestif.SHA1.(to_raw_string (digest_string data))) 12 + 13 + let sha256 data = 14 + Sha256_hash (Digestif.SHA256.(to_raw_string (digest_string data))) 15 + 16 + let sha1_of_bytes raw = 17 + if String.length raw <> 20 then 18 + invalid_arg "Hash.sha1_of_bytes: expected 20 bytes"; 19 + Sha1_hash raw 20 + 21 + let sha256_of_bytes raw = 22 + if String.length raw <> 32 then 23 + invalid_arg "Hash.sha256_of_bytes: expected 32 bytes"; 24 + Sha256_hash raw 25 + 26 + let to_bytes : type a. a t -> string = function 27 + | Sha1_hash s -> s 28 + | Sha256_hash s -> s 29 + 30 + let to_hex h = 31 + let bytes = to_bytes h in 32 + let buf = Buffer.create (String.length bytes * 2) in 33 + String.iter 34 + (fun c -> 35 + Buffer.add_string buf (Printf.sprintf "%02x" (Char.code c))) 36 + bytes; 37 + Buffer.contents buf 38 + 39 + let hex_to_bytes hex = 40 + let len = String.length hex in 41 + if len mod 2 <> 0 then Error (`Msg "hex string has odd length") 42 + else 43 + let bytes = Bytes.create (len / 2) in 44 + let rec loop i = 45 + if i >= len then Ok (Bytes.to_string bytes) 46 + else 47 + let hi = hex.[i] and lo = hex.[i + 1] in 48 + let decode c = 49 + match c with 50 + | '0' .. '9' -> Some (Char.code c - Char.code '0') 51 + | 'a' .. 'f' -> Some (Char.code c - Char.code 'a' + 10) 52 + | 'A' .. 'F' -> Some (Char.code c - Char.code 'A' + 10) 53 + | _ -> None 54 + in 55 + match (decode hi, decode lo) with 56 + | Some h, Some l -> 57 + Bytes.set bytes (i / 2) (Char.chr ((h lsl 4) lor l)); 58 + loop (i + 2) 59 + | _ -> Error (`Msg (Printf.sprintf "invalid hex character at position %d" i)) 60 + in 61 + loop 0 62 + 63 + let sha1_of_hex hex = 64 + match hex_to_bytes hex with 65 + | Error _ as e -> e 66 + | Ok bytes -> 67 + if String.length bytes <> 20 then 68 + Error (`Msg "SHA-1 hex must be 40 characters") 69 + else Ok (Sha1_hash bytes) 70 + 71 + let sha256_of_hex hex = 72 + match hex_to_bytes hex with 73 + | Error _ as e -> e 74 + | Ok bytes -> 75 + if String.length bytes <> 32 then 76 + Error (`Msg "SHA-256 hex must be 64 characters") 77 + else Ok (Sha256_hash bytes) 78 + 79 + type existential = Ex : _ t -> existential 80 + 81 + let of_hex algo hex : (existential, _) result = 82 + match algo with 83 + | Sha1 -> Result.map (fun h -> Ex h) (sha1_of_hex hex) 84 + | Sha256 -> Result.map (fun h -> Ex h) (sha256_of_hex hex) 85 + 86 + let equal : type a. a t -> a t -> bool = 87 + fun h1 h2 -> String.equal (to_bytes h1) (to_bytes h2) 88 + 89 + let compare : type a. a t -> a t -> int = 90 + fun h1 h2 -> String.compare (to_bytes h1) (to_bytes h2) 91 + 92 + let length : type a. a t -> int = function 93 + | Sha1_hash _ -> 20 94 + | Sha256_hash _ -> 32 95 + 96 + let algorithm_of : type a. a t -> algorithm = function 97 + | Sha1_hash _ -> Sha1 98 + | Sha256_hash _ -> Sha256 99 + 100 + let algorithm_length = function Sha1 -> 20 | Sha256 -> 32 101 + 102 + let mst_depth (Sha256_hash bytes) = 103 + let rec count_zeros i acc = 104 + if i >= 32 then acc 105 + else 106 + let byte = Char.code bytes.[i] in 107 + let hi = (byte lsr 6) land 0x3 in 108 + let mid_hi = (byte lsr 4) land 0x3 in 109 + let mid_lo = (byte lsr 2) land 0x3 in 110 + let lo = byte land 0x3 in 111 + if hi <> 0 then acc 112 + else if mid_hi <> 0 then acc + 1 113 + else if mid_lo <> 0 then acc + 2 114 + else if lo <> 0 then acc + 3 115 + else count_zeros (i + 1) (acc + 4) 116 + in 117 + count_zeros 0 0 118 + 119 + type any = Any : _ t -> any 120 + 121 + let any_algorithm (Any h) = algorithm_of h 122 + let any_to_bytes (Any h) = to_bytes h 123 + let any_to_hex (Any h) = to_hex h 124 + 125 + let pp fmt h = Format.fprintf fmt "%s" (to_hex h) 126 + 127 + let pp_short fmt h = 128 + let hex = to_hex h in 129 + Format.fprintf fmt "%s" (String.sub hex 0 (min 7 (String.length hex)))
+100
lib/hash.mli
··· 1 + (** Content-addressed hashes with phantom types for algorithm safety. 2 + 3 + This module provides SHA-1 and SHA-256 hash types that are distinguished 4 + at the type level, preventing accidental mixing of different hash 5 + algorithms. *) 6 + 7 + (** {1 Hash Algorithms} *) 8 + 9 + type algorithm = 10 + | Sha1 (** SHA-1: 20 bytes, Git compatible *) 11 + | Sha256 (** SHA-256: 32 bytes, ATProto compatible *) 12 + 13 + (** {1 Phantom-Typed Hashes} *) 14 + 15 + type 'a t 16 + (** ['a t] is a hash where ['a] is a phantom type indicating the algorithm. *) 17 + 18 + type sha1 = [ `Sha1 ] t 19 + (** SHA-1 hash: 20 bytes, used by Git. *) 20 + 21 + type sha256 = [ `Sha256 ] t 22 + (** SHA-256 hash: 32 bytes, used by ATProto MST. *) 23 + 24 + (** {1 Hash Construction} *) 25 + 26 + val sha1 : string -> sha1 27 + (** [sha1 data] computes the SHA-1 hash of [data]. *) 28 + 29 + val sha256 : string -> sha256 30 + (** [sha256 data] computes the SHA-256 hash of [data]. *) 31 + 32 + val sha1_of_bytes : string -> sha1 33 + (** [sha1_of_bytes raw] creates a SHA-1 hash from raw 20-byte digest. *) 34 + 35 + val sha256_of_bytes : string -> sha256 36 + (** [sha256_of_bytes raw] creates a SHA-256 hash from raw 32-byte digest. *) 37 + 38 + (** {1 Conversions} *) 39 + 40 + val to_bytes : _ t -> string 41 + (** [to_bytes h] returns the raw bytes of the hash digest. *) 42 + 43 + val to_hex : _ t -> string 44 + (** [to_hex h] returns the hexadecimal representation of the hash. *) 45 + 46 + val of_hex : algorithm -> string -> (_, [> `Msg of string ]) result 47 + (** [of_hex algo hex] parses a hexadecimal hash string. *) 48 + 49 + val sha1_of_hex : string -> (sha1, [> `Msg of string ]) result 50 + (** [sha1_of_hex hex] parses a SHA-1 hex string. *) 51 + 52 + val sha256_of_hex : string -> (sha256, [> `Msg of string ]) result 53 + (** [sha256_of_hex hex] parses a SHA-256 hex string. *) 54 + 55 + (** {1 Comparison} *) 56 + 57 + val equal : 'a t -> 'a t -> bool 58 + (** [equal h1 h2] is [true] if [h1] and [h2] are the same hash. *) 59 + 60 + val compare : 'a t -> 'a t -> int 61 + (** [compare h1 h2] is a total ordering on hashes. *) 62 + 63 + (** {1 Algorithm Info} *) 64 + 65 + val length : _ t -> int 66 + (** [length h] returns the byte length of the hash (20 for SHA-1, 32 for SHA-256). *) 67 + 68 + val algorithm_of : _ t -> algorithm 69 + (** [algorithm_of h] returns the algorithm used for [h]. *) 70 + 71 + val algorithm_length : algorithm -> int 72 + (** [algorithm_length algo] returns the byte length for [algo]. *) 73 + 74 + (** {1 MST Support} *) 75 + 76 + val mst_depth : sha256 -> int 77 + (** [mst_depth h] counts leading zeros in 2-bit chunks for ATProto MST. 78 + This determines the tree depth for a given key hash. *) 79 + 80 + (** {1 Type-Erased Hashes} *) 81 + 82 + type any = Any : _ t -> any 83 + (** Type-erased hash for mixed-hash stores. *) 84 + 85 + val any_algorithm : any -> algorithm 86 + (** [any_algorithm (Any h)] returns the algorithm of the erased hash. *) 87 + 88 + val any_to_bytes : any -> string 89 + (** [any_to_bytes (Any h)] returns the raw bytes. *) 90 + 91 + val any_to_hex : any -> string 92 + (** [any_to_hex (Any h)] returns the hex representation. *) 93 + 94 + (** {1 Pretty Printing} *) 95 + 96 + val pp : Format.formatter -> _ t -> unit 97 + (** [pp fmt h] pretty-prints [h] as hex. *) 98 + 99 + val pp_short : Format.formatter -> _ t -> unit 100 + (** [pp_short fmt h] pretty-prints the first 7 characters of [h] (Git-style). *)
+43
lib/irmin.ml
··· 1 + (** Irmin - Content-addressable store with Git and ATProto MST support. 2 + 3 + Irmin provides lazy reads, delayed writes, and multiple tree formats 4 + with bidirectional Git compatibility and first-class subtree operations. *) 5 + 6 + (** {1 Core Types} *) 7 + 8 + module Hash = Hash 9 + module Backend = Backend 10 + 11 + (** {1 Tree Formats} *) 12 + 13 + module Tree_format = Tree_format 14 + 15 + (** {1 Trees and Commits} *) 16 + 17 + module Tree = Tree 18 + module Commit = Commit 19 + 20 + (** {1 Stores} *) 21 + 22 + module Store = Store 23 + 24 + (** {1 Subtree Operations} *) 25 + 26 + module Subtree = Subtree 27 + 28 + (** {1 Git Interoperability} *) 29 + 30 + module Git_interop = Git_interop 31 + 32 + (** {1 Pre-instantiated Git Store} *) 33 + 34 + module Git = struct 35 + include Store.Git 36 + 37 + let import = Git_interop.import_git 38 + let init = Git_interop.init_git 39 + end 40 + 41 + (** {1 Pre-instantiated MST Store} *) 42 + 43 + module Mst = Store.Mst
+119
lib/store.ml
··· 1 + module Make (F : Tree_format.S) = struct 2 + type hash = F.hash 3 + 4 + module Tree = Tree.Make (F) 5 + module Commit = Commit.Make (F) 6 + 7 + type t = { backend : hash Backend.t } 8 + 9 + let create ~backend = { backend } 10 + let backend t = t.backend 11 + 12 + let tree t ?at () = 13 + match at with 14 + | None -> Tree.empty () 15 + | Some h -> Tree.of_hash ~backend:t.backend h 16 + 17 + let read_commit t h = 18 + match t.backend.read h with 19 + | None -> None 20 + | Some data -> ( 21 + match Commit.of_bytes data with Ok c -> Some c | Error _ -> None) 22 + 23 + let read_tree t h = Tree.of_hash ~backend:t.backend h 24 + 25 + let checkout t ~branch = 26 + match t.backend.get_ref ("refs/heads/" ^ branch) with 27 + | None -> None 28 + | Some commit_hash -> ( 29 + match read_commit t commit_hash with 30 + | None -> None 31 + | Some commit -> Some (read_tree t (Commit.tree commit))) 32 + 33 + let commit t ~tree ~parents ~message ~author = 34 + (* This is where delayed writes happen *) 35 + let tree_hash = Tree.hash tree ~backend:t.backend in 36 + let c = Commit.v ~tree:tree_hash ~parents ~author ~message () in 37 + let data = Commit.to_bytes c in 38 + let _ = t.backend.write data in 39 + Commit.hash c 40 + 41 + let head t ~branch = t.backend.get_ref ("refs/heads/" ^ branch) 42 + 43 + let set_head t ~branch h = t.backend.set_ref ("refs/heads/" ^ branch) h 44 + 45 + let branches t = 46 + t.backend.list_refs () 47 + |> List.filter_map (fun r -> 48 + if String.length r > 11 && String.sub r 0 11 = "refs/heads/" then 49 + Some (String.sub r 11 (String.length r - 11)) 50 + else None) 51 + 52 + let update_branch t ~branch ~old ~new_ = 53 + t.backend.test_and_set_ref ("refs/heads/" ^ branch) ~test:old ~set:(Some new_) 54 + 55 + (* Simple ancestry check - walks parent chain *) 56 + let is_ancestor t ~ancestor ~descendant = 57 + let rec walk visited h = 58 + if Hash.equal h ancestor then true 59 + else if List.exists (Hash.equal h) visited then false 60 + else 61 + match read_commit t h with 62 + | None -> false 63 + | Some c -> 64 + let visited = h :: visited in 65 + List.exists (walk visited) (Commit.parents c) 66 + in 67 + Hash.equal ancestor descendant || walk [] descendant 68 + 69 + (* Find merge base using simple BFS *) 70 + let merge_base t h1 h2 = 71 + let rec ancestors_of h visited = 72 + if List.exists (Hash.equal h) visited then visited 73 + else 74 + match read_commit t h with 75 + | None -> h :: visited 76 + | Some c -> 77 + let visited = h :: visited in 78 + List.fold_left 79 + (fun acc p -> ancestors_of p acc) 80 + visited (Commit.parents c) 81 + in 82 + let ancestors1 = ancestors_of h1 [] in 83 + let rec find_common h = 84 + if List.exists (Hash.equal h) ancestors1 then Some h 85 + else 86 + match read_commit t h with 87 + | None -> None 88 + | Some c -> ( 89 + match Commit.parents c with 90 + | [] -> None 91 + | p :: _ -> find_common p) 92 + in 93 + find_common h2 94 + 95 + let commits_between t ~base ~head = 96 + let rec count h n = 97 + if Hash.equal h base then n 98 + else 99 + match read_commit t h with 100 + | None -> n 101 + | Some c -> ( 102 + match Commit.parents c with 103 + | [] -> n 104 + | p :: _ -> count p (n + 1)) 105 + in 106 + count head 0 107 + 108 + type diff_entry = 109 + [ `Add of Tree.path * hash 110 + | `Remove of Tree.path 111 + | `Change of Tree.path * hash * hash ] 112 + 113 + let diff _t ~old:_ ~new_:_ = 114 + (* TODO: Implement tree diff *) 115 + Seq.empty 116 + end 117 + 118 + module Git = Make (Tree_format.Git) 119 + module Mst = Make (Tree_format.Mst)
+97
lib/store.mli
··· 1 + (** High-level store interface. 2 + 3 + Combines backend, tree, and commit operations into a unified API. *) 4 + 5 + (** {1 Store Functor} *) 6 + 7 + module Make (F : Tree_format.S) : sig 8 + type t 9 + (** A store instance. *) 10 + 11 + type hash = F.hash 12 + 13 + module Tree : module type of Tree.Make (F) 14 + module Commit : module type of Commit.Make (F) 15 + 16 + (** {2 Construction} *) 17 + 18 + val create : backend:hash Backend.t -> t 19 + (** [create ~backend] creates a store backed by [backend]. *) 20 + 21 + (** {2 Tree Operations} *) 22 + 23 + val tree : t -> ?at:hash -> unit -> Tree.t 24 + (** [tree t ?at ()] returns a tree. 25 + If [at] is given, returns the tree at that commit. 26 + Otherwise returns an empty tree. *) 27 + 28 + val checkout : t -> branch:string -> Tree.t option 29 + (** [checkout t ~branch] returns the tree at the head of [branch]. *) 30 + 31 + (** {2 Commit Operations} *) 32 + 33 + val commit : 34 + t -> 35 + tree:Tree.t -> 36 + parents:hash list -> 37 + message:string -> 38 + author:string -> 39 + hash 40 + (** [commit t ~tree ~parents ~message ~author] creates a commit. 41 + This is when delayed tree writes actually happen. *) 42 + 43 + (** {2 Branch Operations} *) 44 + 45 + val head : t -> branch:string -> hash option 46 + (** [head t ~branch] returns the head commit of [branch]. *) 47 + 48 + val set_head : t -> branch:string -> hash -> unit 49 + (** [set_head t ~branch h] sets the head of [branch] to [h]. *) 50 + 51 + val branches : t -> string list 52 + (** [branches t] returns all branch names. *) 53 + 54 + val update_branch : t -> branch:string -> old:hash option -> new_:hash -> bool 55 + (** [update_branch t ~branch ~old ~new_] atomically updates [branch] 56 + if its current head matches [old]. *) 57 + 58 + (** {2 Ancestry Queries} *) 59 + 60 + val is_ancestor : t -> ancestor:hash -> descendant:hash -> bool 61 + (** [is_ancestor t ~ancestor ~descendant] checks commit ancestry. *) 62 + 63 + val merge_base : t -> hash -> hash -> hash option 64 + (** [merge_base t h1 h2] finds the common ancestor of two commits. *) 65 + 66 + val commits_between : t -> base:hash -> head:hash -> int 67 + (** [commits_between t ~base ~head] counts commits from [base] to [head]. *) 68 + 69 + (** {2 Diff} *) 70 + 71 + type diff_entry = 72 + [ `Add of Tree.path * hash 73 + | `Remove of Tree.path 74 + | `Change of Tree.path * hash * hash ] 75 + 76 + val diff : t -> old:hash -> new_:hash -> diff_entry Seq.t 77 + (** [diff t ~old ~new_] computes the difference between two trees. *) 78 + 79 + (** {2 Low-level} *) 80 + 81 + val backend : t -> hash Backend.t 82 + (** [backend t] returns the underlying backend. *) 83 + 84 + val read_commit : t -> hash -> Commit.t option 85 + (** [read_commit t h] reads a commit by hash. *) 86 + 87 + val read_tree : t -> hash -> Tree.t 88 + (** [read_tree t h] returns a lazy tree at [h]. *) 89 + end 90 + 91 + (** {1 Pre-instantiated Stores} *) 92 + 93 + module Git : module type of Make (Tree_format.Git) 94 + (** Git-format store with SHA-1 hashes. *) 95 + 96 + module Mst : module type of Make (Tree_format.Mst) 97 + (** MST-format store with SHA-256 hashes. *)
+231
lib/subtree.ml
··· 1 + module Make (F : Tree_format.S) = struct 2 + type hash = F.hash 3 + 4 + module Store = Store.Make (F) 5 + module Tree = Store.Tree 6 + module Commit = Store.Commit 7 + 8 + type status = 9 + [ `In_sync 10 + | `Local_ahead of int 11 + | `Remote_ahead of int 12 + | `Diverged of int * int (* local, remote *) 13 + | `Trees_differ ] 14 + 15 + (* Extract subtree at prefix from a tree *) 16 + let extract_subtree tree prefix = 17 + Tree.find_tree tree prefix 18 + 19 + (* Check if a commit touches the given prefix *) 20 + let commit_touches_prefix store commit prefix = 21 + let tree = Store.read_tree store (Commit.tree commit) in 22 + Option.is_some (Tree.find_tree tree prefix) 23 + 24 + (* Split: Extract subtree history into a new store *) 25 + let split store ~prefix = 26 + let backend = Backend.Memory.create_sha1 () in 27 + let new_store = Store.create ~backend:(Obj.magic backend) in 28 + 29 + (* Walk commits and rewrite those touching prefix *) 30 + let rec rewrite_commit old_hash rewritten = 31 + if List.mem_assoc old_hash rewritten then 32 + (List.assoc old_hash rewritten, rewritten) 33 + else 34 + match Store.read_commit store old_hash with 35 + | None -> (old_hash, rewritten) 36 + | Some commit -> 37 + if not (commit_touches_prefix store commit prefix) then 38 + (* Skip commits not touching prefix *) 39 + match Commit.parents commit with 40 + | [] -> (old_hash, rewritten) 41 + | p :: _ -> rewrite_commit p rewritten 42 + else 43 + (* Rewrite parents first *) 44 + let parents, rewritten = 45 + List.fold_left 46 + (fun (parents, rw) p -> 47 + let new_p, rw = rewrite_commit p rw in 48 + (new_p :: parents, rw)) 49 + ([], rewritten) 50 + (Commit.parents commit) 51 + in 52 + let parents = List.rev parents in 53 + 54 + (* Extract subtree *) 55 + let tree = Store.read_tree store (Commit.tree commit) in 56 + match extract_subtree tree prefix with 57 + | None -> (old_hash, rewritten) 58 + | Some subtree -> 59 + let new_hash = 60 + Store.commit new_store ~tree:subtree ~parents 61 + ~message:(Commit.message commit) 62 + ~author:(Commit.author commit) 63 + in 64 + (new_hash, (old_hash, new_hash) :: rewritten) 65 + in 66 + 67 + (* Start from main branch head *) 68 + (match Store.head store ~branch:"main" with 69 + | Some head -> 70 + let new_head, _ = rewrite_commit head [] in 71 + Store.set_head new_store ~branch:"main" new_head 72 + | None -> ()); 73 + 74 + new_store 75 + 76 + (* Add: Add external repo as subtree *) 77 + let add store ~prefix ~source = 78 + match Store.head source ~branch:"main" with 79 + | None -> failwith "Source has no main branch" 80 + | Some source_head -> 81 + match Store.read_commit source source_head with 82 + | None -> failwith "Cannot read source commit" 83 + | Some source_commit -> 84 + let source_tree = Store.read_tree source (Commit.tree source_commit) in 85 + 86 + (* Get current tree or empty *) 87 + let current_tree = 88 + match Store.head store ~branch:"main" with 89 + | None -> Tree.empty () 90 + | Some h -> 91 + match Store.read_commit store h with 92 + | None -> Tree.empty () 93 + | Some c -> Store.read_tree store (Commit.tree c) 94 + in 95 + 96 + (* Add source tree at prefix *) 97 + let new_tree = Tree.add_tree current_tree prefix source_tree in 98 + 99 + let parents = 100 + match Store.head store ~branch:"main" with 101 + | None -> [] 102 + | Some h -> [ h ] 103 + in 104 + 105 + let message = 106 + Printf.sprintf "Add '%s' from external source" 107 + (String.concat "/" prefix) 108 + in 109 + 110 + let new_head = 111 + Store.commit store ~tree:new_tree ~parents ~message 112 + ~author:"irmin-subtree" 113 + in 114 + Store.set_head store ~branch:"main" new_head; 115 + new_head 116 + 117 + (* Pull: Update subtree from external source *) 118 + let pull store ~prefix ~source = 119 + match Store.head source ~branch:"main" with 120 + | None -> Error (`Conflict []) 121 + | Some source_head -> 122 + match Store.read_commit source source_head with 123 + | None -> Error (`Conflict []) 124 + | Some source_commit -> 125 + let source_tree = Store.read_tree source (Commit.tree source_commit) in 126 + 127 + let current_tree = 128 + match Store.head store ~branch:"main" with 129 + | None -> Tree.empty () 130 + | Some h -> 131 + match Store.read_commit store h with 132 + | None -> Tree.empty () 133 + | Some c -> Store.read_tree store (Commit.tree c) 134 + in 135 + 136 + (* Replace subtree at prefix *) 137 + let new_tree = 138 + let without = Tree.remove current_tree prefix in 139 + Tree.add_tree without prefix source_tree 140 + in 141 + 142 + let parents = 143 + match Store.head store ~branch:"main" with 144 + | None -> [] 145 + | Some h -> [ h ] 146 + in 147 + 148 + let message = 149 + Printf.sprintf "Pull updates into '%s'" (String.concat "/" prefix) 150 + in 151 + 152 + let new_head = 153 + Store.commit store ~tree:new_tree ~parents ~message 154 + ~author:"irmin-subtree" 155 + in 156 + Store.set_head store ~branch:"main" new_head; 157 + Ok new_head 158 + 159 + (* Push: Push subtree changes to external repo *) 160 + let push store ~prefix ~target = 161 + match Store.head store ~branch:"main" with 162 + | None -> failwith "Store has no main branch" 163 + | Some head -> 164 + match Store.read_commit store head with 165 + | None -> failwith "Cannot read store commit" 166 + | Some commit -> 167 + let tree = Store.read_tree store (Commit.tree commit) in 168 + match extract_subtree tree prefix with 169 + | None -> failwith "No subtree at prefix" 170 + | Some subtree -> 171 + let parents = 172 + match Store.head target ~branch:"main" with 173 + | None -> [] 174 + | Some h -> [ h ] 175 + in 176 + 177 + let message = 178 + Printf.sprintf "Push from '%s'" (String.concat "/" prefix) 179 + in 180 + 181 + let new_head = 182 + Store.commit target ~tree:subtree ~parents ~message 183 + ~author:"irmin-subtree" 184 + in 185 + Store.set_head target ~branch:"main" new_head; 186 + new_head 187 + 188 + (* Status: Compare subtree with external repo *) 189 + let status store ~prefix ~external_ = 190 + let local_head = Store.head store ~branch:"main" in 191 + let remote_head = Store.head external_ ~branch:"main" in 192 + 193 + match (local_head, remote_head) with 194 + | None, None -> `In_sync 195 + | None, Some _ -> `Remote_ahead 1 196 + | Some _, None -> `Local_ahead 1 197 + | Some lh, Some rh -> 198 + (* Get subtree hash from local *) 199 + let local_tree_hash = 200 + match Store.read_commit store lh with 201 + | None -> None 202 + | Some c -> 203 + let tree = Store.read_tree store (Commit.tree c) in 204 + match Tree.find_tree tree prefix with 205 + | None -> None 206 + | Some t -> Some (Tree.hash t ~backend:(Store.backend store)) 207 + in 208 + 209 + (* Get tree hash from remote *) 210 + let remote_tree_hash = 211 + match Store.read_commit external_ rh with 212 + | None -> None 213 + | Some c -> Some (Commit.tree c) 214 + in 215 + 216 + match (local_tree_hash, remote_tree_hash) with 217 + | None, None -> `In_sync 218 + | None, Some _ -> `Remote_ahead 1 219 + | Some _, None -> `Local_ahead 1 220 + | Some lt, Some rt -> 221 + if Hash.equal lt rt then `In_sync 222 + else 223 + (* Check ancestry *) 224 + if Store.is_ancestor external_ ~ancestor:rt ~descendant:lt then 225 + `Local_ahead (Store.commits_between external_ ~base:rt ~head:lt) 226 + else if Store.is_ancestor external_ ~ancestor:lt ~descendant:rt then 227 + `Remote_ahead (Store.commits_between external_ ~base:lt ~head:rt) 228 + else `Trees_differ 229 + end 230 + 231 + module Git = Make (Tree_format.Git)
+74
lib/subtree.mli
··· 1 + (** Subtree operations for monorepo management. 2 + 3 + This module provides first-class subtree operations that replace 4 + the need to shell out to [git subtree] commands. *) 5 + 6 + (** {1 Subtree Functor} *) 7 + 8 + module Make (F : Tree_format.S) : sig 9 + type hash = F.hash 10 + 11 + module Store : module type of Store.Make (F) 12 + 13 + (** {2 Subtree Split} *) 14 + 15 + val split : Store.t -> prefix:Store.Tree.path -> Store.t 16 + (** [split store ~prefix] extracts the subtree at [prefix] into a 17 + new store with rewritten history containing only commits that 18 + touch that prefix. 19 + 20 + Like [git subtree split --prefix]. *) 21 + 22 + (** {2 Subtree Add} *) 23 + 24 + val add : Store.t -> prefix:Store.Tree.path -> source:Store.t -> hash 25 + (** [add store ~prefix ~source] adds the contents of [source] as a 26 + subtree at [prefix], creating a merge commit. 27 + 28 + Like [git subtree add --prefix --squash]. *) 29 + 30 + (** {2 Subtree Pull} *) 31 + 32 + val pull : 33 + Store.t -> 34 + prefix:Store.Tree.path -> 35 + source:Store.t -> 36 + (hash, [> `Conflict of Store.Tree.path list ]) result 37 + (** [pull store ~prefix ~source] pulls updates from [source] into 38 + the subtree at [prefix]. 39 + 40 + Like [git subtree pull --prefix --squash]. *) 41 + 42 + (** {2 Subtree Push} *) 43 + 44 + val push : Store.t -> prefix:Store.Tree.path -> target:Store.t -> hash 45 + (** [push store ~prefix ~target] pushes changes from the subtree at 46 + [prefix] to [target]. 47 + 48 + Like [git subtree push --prefix]. *) 49 + 50 + (** {2 Status} *) 51 + 52 + type status = 53 + [ `In_sync 54 + | `Local_ahead of int 55 + | `Remote_ahead of int 56 + | `Diverged of int * int (** local, remote *) 57 + | `Trees_differ ] 58 + 59 + val status : 60 + Store.t -> prefix:Store.Tree.path -> external_:Store.t -> status 61 + (** [status store ~prefix ~external_] compares the subtree at [prefix] 62 + with the external store. 63 + 64 + - [`In_sync]: Trees are identical 65 + - [`Local_ahead n]: Local has n commits not in external 66 + - [`Remote_ahead n]: External has n commits not in local 67 + - [`Diverged]: Both have independent commits 68 + - [`Trees_differ]: Trees differ but no history relationship *) 69 + end 70 + 71 + (** {1 Pre-instantiated Subtree} *) 72 + 73 + module Git : module type of Make (Tree_format.Git) 74 + (** Git-format subtree operations. *)
+294
lib/tree.ml
··· 1 + module Make (F : Tree_format.S) = struct 2 + type hash = F.hash 3 + type path = string list 4 + 5 + type concrete = 6 + [ `Contents of string | `Tree of (string * concrete) list ] 7 + 8 + (* Internal tree representation with lazy loading *) 9 + type node_state = 10 + | Loaded of F.node 11 + | Lazy of { backend : hash Backend.t; hash : hash } 12 + | Shallow of hash 13 + | Pruned of hash 14 + 15 + type tree_node = 16 + | Contents of string 17 + | Node of { 18 + mutable state : node_state; 19 + mutable children : (string * tree_node) list; (* modifications *) 20 + mutable removed : string list; 21 + } 22 + 23 + type t = tree_node 24 + 25 + let empty () = Node { state = Loaded F.empty_node; children = []; removed = [] } 26 + 27 + let of_hash ~backend hash = 28 + Node { state = Lazy { backend; hash }; children = []; removed = [] } 29 + 30 + let shallow hash = Node { state = Shallow hash; children = []; removed = [] } 31 + let pruned hash = Node { state = Pruned hash; children = []; removed = [] } 32 + 33 + let rec of_concrete : concrete -> t = function 34 + | `Contents s -> Contents s 35 + | `Tree entries -> 36 + let children = List.map (fun (name, c) -> (name, of_concrete c)) entries in 37 + Node { state = Loaded F.empty_node; children; removed = [] } 38 + 39 + (* Force loading of a lazy node *) 40 + let force_node state = 41 + match state with 42 + | Loaded n -> Some n 43 + | Lazy { backend; hash } -> ( 44 + match backend.read hash with 45 + | Some data -> ( 46 + match F.node_of_bytes data with 47 + | Ok n -> Some n 48 + | Error _ -> None) 49 + | None -> None) 50 + | Shallow _ -> None 51 + | Pruned _ -> None 52 + 53 + (* Navigate to a path, returning the node and remaining path *) 54 + let rec navigate t path = 55 + match (t, path) with 56 + | _, [] -> Some (t, []) 57 + | Contents _, _ :: _ -> None 58 + | Node node, name :: rest -> ( 59 + (* Check modifications first *) 60 + match List.assoc_opt name node.children with 61 + | Some child -> navigate child rest 62 + | None -> 63 + if List.mem name node.removed then None 64 + else 65 + (* Try to load from underlying node *) 66 + match force_node node.state with 67 + | None -> None 68 + | Some loaded -> ( 69 + match F.find loaded name with 70 + | None -> None 71 + | Some (`Contents _hash) -> 72 + (* Would need to load contents - for now return None *) 73 + None 74 + | Some (`Node hash) -> ( 75 + match node.state with 76 + | Lazy { backend; _ } -> 77 + let child = of_hash ~backend hash in 78 + navigate child rest 79 + | _ -> None))) 80 + 81 + let find t path = 82 + match navigate t path with 83 + | Some (Contents s, []) -> Some s 84 + | _ -> None 85 + 86 + let find_tree t path = 87 + match navigate t path with 88 + | Some ((Node _ as n), []) -> Some n 89 + | _ -> None 90 + 91 + let mem t path = Option.is_some (navigate t path) 92 + 93 + let mem_tree t path = 94 + match navigate t path with 95 + | Some (Node _, []) -> true 96 + | _ -> false 97 + 98 + let list t path = 99 + match navigate t path with 100 + | Some (Node node, []) -> ( 101 + match force_node node.state with 102 + | None -> [] 103 + | Some loaded -> 104 + let base_entries = 105 + F.list loaded 106 + |> List.filter (fun (name, _) -> 107 + (not (List.mem name node.removed)) 108 + && not (List.mem_assoc name node.children)) 109 + |> List.map (fun (name, kind) -> 110 + let k = match kind with `Node _ -> `Node | `Contents _ -> `Contents in 111 + (name, k)) 112 + in 113 + let child_entries = 114 + List.map 115 + (fun (name, child) -> 116 + let k = match child with Node _ -> `Node | Contents _ -> `Contents in 117 + (name, k)) 118 + node.children 119 + in 120 + List.sort (fun (a, _) (b, _) -> String.compare a b) (base_entries @ child_entries)) 121 + | _ -> [] 122 + 123 + (* Add contents at path, creating intermediate nodes as needed *) 124 + let rec add_at t path value = 125 + match (t, path) with 126 + | _, [] -> value 127 + | Contents _, _ :: _ -> 128 + (* Replace contents with a tree *) 129 + add_at (empty ()) path value 130 + | Node node, [ name ] -> 131 + let children = 132 + (name, value) :: List.filter (fun (n, _) -> n <> name) node.children 133 + in 134 + let removed = List.filter (( <> ) name) node.removed in 135 + Node { node with children; removed } 136 + | Node node, name :: rest -> 137 + let child = 138 + match List.assoc_opt name node.children with 139 + | Some c -> c 140 + | None -> ( 141 + if List.mem name node.removed then empty () 142 + else 143 + match force_node node.state with 144 + | None -> empty () 145 + | Some loaded -> ( 146 + match F.find loaded name with 147 + | Some (`Node hash) -> ( 148 + match node.state with 149 + | Lazy { backend; _ } -> of_hash ~backend hash 150 + | _ -> empty ()) 151 + | _ -> empty ())) 152 + in 153 + let new_child = add_at child rest value in 154 + let children = 155 + (name, new_child) :: List.filter (fun (n, _) -> n <> name) node.children 156 + in 157 + Node { node with children } 158 + 159 + let add t path contents = add_at t path (Contents contents) 160 + let add_tree t path subtree = add_at t path subtree 161 + 162 + let rec remove t path = 163 + match (t, path) with 164 + | _, [] -> empty () 165 + | Contents _, _ :: _ -> t 166 + | Node node, [ name ] -> 167 + let children = List.filter (fun (n, _) -> n <> name) node.children in 168 + let removed = 169 + if List.mem name node.removed then node.removed else name :: node.removed 170 + in 171 + Node { node with children; removed } 172 + | Node node, name :: rest -> 173 + let child = 174 + match List.assoc_opt name node.children with 175 + | Some c -> c 176 + | None -> ( 177 + if List.mem name node.removed then empty () 178 + else 179 + match force_node node.state with 180 + | None -> empty () 181 + | Some loaded -> ( 182 + match F.find loaded name with 183 + | Some (`Node hash) -> ( 184 + match node.state with 185 + | Lazy { backend; _ } -> of_hash ~backend hash 186 + | _ -> empty ()) 187 + | _ -> empty ())) 188 + in 189 + let new_child = remove child rest in 190 + let children = 191 + (name, new_child) :: List.filter (fun (n, _) -> n <> name) node.children 192 + in 193 + Node { node with children } 194 + 195 + let rec to_concrete t = 196 + match t with 197 + | Contents s -> `Contents s 198 + | Node node -> 199 + let entries = 200 + match force_node node.state with 201 + | None -> [] 202 + | Some loaded -> 203 + F.list loaded 204 + |> List.filter_map (fun (name, _kind) -> 205 + if List.mem name node.removed then None 206 + else if List.mem_assoc name node.children then None 207 + else 208 + (* Would need to recursively load - simplified here *) 209 + None) 210 + in 211 + let child_entries = 212 + List.map (fun (name, child) -> (name, to_concrete child)) node.children 213 + in 214 + let all = List.sort (fun (a, _) (b, _) -> String.compare a b) (entries @ child_entries) in 215 + `Tree all 216 + 217 + (* Write tree to backend and return hash *) 218 + let rec write_tree t ~(backend : hash Backend.t) : hash = 219 + match t with 220 + | Contents s -> 221 + let h = F.hash_contents s in 222 + let _ = backend.write s in 223 + h 224 + | Node node -> 225 + (* First, get the base node *) 226 + let base = 227 + match force_node node.state with Some n -> n | None -> F.empty_node 228 + in 229 + (* Apply removals *) 230 + let base = List.fold_left (fun n name -> F.remove n name) base node.removed in 231 + (* Apply additions (recursively writing children) *) 232 + let final = 233 + List.fold_left 234 + (fun n (name, child) -> 235 + let child_hash = write_tree child ~backend in 236 + let kind = 237 + match child with 238 + | Contents _ -> `Contents child_hash 239 + | Node _ -> `Node child_hash 240 + in 241 + F.add n name kind) 242 + base node.children 243 + in 244 + let data = F.bytes_of_node final in 245 + let _ = backend.write data in 246 + F.hash_node final 247 + 248 + let hash t ~backend = write_tree t ~backend 249 + 250 + type 'a force = [ `True | `False of hash -> 'a | `Shallow of hash -> 'a ] 251 + 252 + let fold ?(force = `True) t init f = 253 + let rec go path t acc = 254 + match t with 255 + | Contents s -> f path (`Contents s) acc 256 + | Node node -> 257 + let acc = f path `Tree acc in 258 + match force with 259 + | `True -> ( 260 + match force_node node.state with 261 + | None -> acc 262 + | Some _loaded -> 263 + (* Fold over children *) 264 + List.fold_left 265 + (fun acc (name, child) -> go (path @ [ name ]) child acc) 266 + acc node.children) 267 + | `False fn -> ( 268 + match node.state with 269 + | Lazy { hash; _ } -> fn hash 270 + | Shallow hash -> fn hash 271 + | Pruned hash -> fn hash 272 + | Loaded _ -> 273 + List.fold_left 274 + (fun acc (name, child) -> go (path @ [ name ]) child acc) 275 + acc node.children) 276 + | `Shallow fn -> ( 277 + match node.state with 278 + | Shallow hash -> fn hash 279 + | _ -> 280 + List.fold_left 281 + (fun acc (name, child) -> go (path @ [ name ]) child acc) 282 + acc node.children) 283 + in 284 + go [] t init 285 + 286 + let clear ?depth:_ _t = () 287 + 288 + let equal t1 t2 = 289 + (* Simple structural equality - could be optimized with hash comparison *) 290 + to_concrete t1 = to_concrete t2 291 + end 292 + 293 + module Git = Make (Tree_format.Git) 294 + module Mst = Make (Tree_format.Mst)
+121
lib/tree.mli
··· 1 + (** Lazy trees with delayed writes. 2 + 3 + Trees are like Git's staging area: immutable, temporary, non-persistent 4 + areas held in memory. Reads are done lazily and writes are accumulated 5 + until commit - if you modify a key twice, only the last change is written. *) 6 + 7 + (** {1 Tree Functor} *) 8 + 9 + module Make (F : Tree_format.S) : sig 10 + type t 11 + (** Immutable in-memory tree with lazy reads and delayed writes. *) 12 + 13 + type hash = F.hash 14 + 15 + (** {2 Path Type} *) 16 + 17 + type path = string list 18 + (** A path is a list of path segments. *) 19 + 20 + (** {2 Concrete Trees} *) 21 + 22 + type concrete = 23 + [ `Contents of string | `Tree of (string * concrete) list ] 24 + (** Fully materialized tree for import/export. *) 25 + 26 + (** {2 Construction} *) 27 + 28 + val empty : unit -> t 29 + (** [empty ()] creates an empty tree. *) 30 + 31 + val of_hash : backend:hash Backend.t -> hash -> t 32 + (** [of_hash ~backend h] creates a tree backed by the store. 33 + Nothing is loaded until accessed (lazy reads). *) 34 + 35 + val of_concrete : concrete -> t 36 + (** [of_concrete c] creates a tree from a fully materialized tree. *) 37 + 38 + val shallow : hash -> t 39 + (** [shallow h] creates a tree with only a hash reference. 40 + Accessing contents raises an error. *) 41 + 42 + val pruned : hash -> t 43 + (** [pruned h] creates a pruned tree that raises on dereference. 44 + Used for GC and export operations. *) 45 + 46 + (** {2 Reads (Lazy)} *) 47 + 48 + val find : t -> path -> string option 49 + (** [find t path] looks up contents at [path]. 50 + Loads nodes lazily as needed. *) 51 + 52 + val find_tree : t -> path -> t option 53 + (** [find_tree t path] looks up a subtree at [path]. *) 54 + 55 + val list : t -> path -> (string * [ `Node | `Contents ]) list 56 + (** [list t path] lists entries at [path]. *) 57 + 58 + val mem : t -> path -> bool 59 + (** [mem t path] checks if [path] exists. *) 60 + 61 + val mem_tree : t -> path -> bool 62 + (** [mem_tree t path] checks if a subtree exists at [path]. *) 63 + 64 + (** {2 Writes (Delayed)} *) 65 + 66 + val add : t -> path -> string -> t 67 + (** [add t path contents] adds contents at [path]. 68 + The write is accumulated, not performed immediately. *) 69 + 70 + val add_tree : t -> path -> t -> t 71 + (** [add_tree t path subtree] adds a subtree at [path]. *) 72 + 73 + val remove : t -> path -> t 74 + (** [remove t path] removes the entry at [path]. *) 75 + 76 + (** {2 Materialization} *) 77 + 78 + val to_concrete : t -> concrete 79 + (** [to_concrete t] fully materializes the tree. 80 + Forces all lazy nodes to be loaded. *) 81 + 82 + val hash : t -> backend:hash Backend.t -> hash 83 + (** [hash t ~backend] computes the tree hash. 84 + Writes all accumulated changes to the backend. *) 85 + 86 + (** {2 Force Control} *) 87 + 88 + type 'a force = [ `True | `False of hash -> 'a | `Shallow of hash -> 'a ] 89 + (** Control how lazy nodes are handled during traversal: 90 + - [`True]: force loading of all nodes 91 + - [`False f]: call [f] on unloaded hashes instead 92 + - [`Shallow f]: like [`False] but for shallow trees *) 93 + 94 + val fold : 95 + ?force:'a force -> 96 + t -> 97 + 'a -> 98 + (path -> [ `Contents of string | `Tree ] -> 'a -> 'a) -> 99 + 'a 100 + (** [fold ~force t init f] traverses the tree. 101 + The [force] parameter controls lazy loading behavior. *) 102 + 103 + (** {2 Cache Management} *) 104 + 105 + val clear : ?depth:int -> t -> unit 106 + (** [clear ?depth t] purges cached data. 107 + If [depth] is given, only clears nodes at that depth or deeper. *) 108 + 109 + (** {2 Comparison} *) 110 + 111 + val equal : t -> t -> bool 112 + (** [equal t1 t2] compares trees structurally. *) 113 + end 114 + 115 + (** {1 Pre-instantiated Trees} *) 116 + 117 + module Git : module type of Make (Tree_format.Git) 118 + (** Git-format trees with SHA-1 hashes. *) 119 + 120 + module Mst : module type of Make (Tree_format.Mst) 121 + (** MST-format trees with SHA-256 hashes. *)
+166
lib/tree_format.ml
··· 1 + module type S = sig 2 + type node 3 + type hash 4 + 5 + val hash_node : node -> hash 6 + val hash_contents : string -> hash 7 + val node_of_bytes : string -> (node, [> `Msg of string ]) result 8 + val bytes_of_node : node -> string 9 + val empty_node : node 10 + val find : node -> string -> [ `Node of hash | `Contents of hash ] option 11 + val add : node -> string -> [ `Node of hash | `Contents of hash ] -> node 12 + val remove : node -> string -> node 13 + val list : node -> (string * [ `Node of hash | `Contents of hash ]) list 14 + val is_empty : node -> bool 15 + end 16 + 17 + module type SHA1 = S with type hash = Hash.sha1 18 + module type SHA256 = S with type hash = Hash.sha256 19 + 20 + (** Git tree object format using ocaml-git. *) 21 + module Git : SHA1 = struct 22 + type hash = Hash.sha1 23 + type node = Git.Tree.t 24 + 25 + (* Convert between irmin Hash.sha1 and Git.Hash.t *) 26 + let git_hash_of_sha1 (h : hash) : Git.Hash.t = 27 + Git.Hash.of_raw_string (Hash.to_bytes h) 28 + 29 + let sha1_of_git_hash (h : Git.Hash.t) : hash = 30 + Hash.sha1_of_bytes (Git.Hash.to_raw_string h) 31 + 32 + let empty_node = Git.Tree.empty 33 + let is_empty = Git.Tree.is_empty 34 + 35 + let find node name = 36 + match Git.Tree.find ~name node with 37 + | None -> None 38 + | Some entry -> 39 + let h = sha1_of_git_hash entry.hash in 40 + (match entry.perm with 41 + | `Dir -> Some (`Node h) 42 + | _ -> Some (`Contents h)) 43 + 44 + let add node name kind = 45 + let perm, hash = 46 + match kind with 47 + | `Node h -> (`Dir, git_hash_of_sha1 h) 48 + | `Contents h -> (`Normal, git_hash_of_sha1 h) 49 + in 50 + let entry = Git.Tree.entry ~perm ~name hash in 51 + Git.Tree.add entry node 52 + 53 + let remove node name = Git.Tree.remove ~name node 54 + 55 + let list node = 56 + Git.Tree.to_list node 57 + |> List.map (fun (entry : Git.Tree.entry) -> 58 + let h = sha1_of_git_hash entry.hash in 59 + let kind = 60 + match entry.perm with `Dir -> `Node h | _ -> `Contents h 61 + in 62 + (entry.name, kind)) 63 + 64 + let bytes_of_node = Git.Tree.to_string 65 + let node_of_bytes = Git.Tree.of_string 66 + let hash_node node = sha1_of_git_hash (Git.Tree.digest node) 67 + 68 + let hash_contents data = 69 + sha1_of_git_hash (Git.Hash.digest_string ~kind:`Blob data) 70 + end 71 + 72 + (** ATProto Merkle Search Tree format using ocaml-atp. 73 + 74 + MST uses SHA-256 with 2-bit prefix counting for tree depth. 75 + Keys are stored sorted with common prefix compression. 76 + Encoded as DAG-CBOR. *) 77 + module Mst : SHA256 = struct 78 + type hash = Hash.sha256 79 + 80 + (* Convert between irmin Hash.sha256 and Atp.Cid.t *) 81 + let cid_of_sha256 (h : hash) : Atp.Cid.t = 82 + Atp.Cid.of_digest `Dag_cbor (Hash.to_bytes h) 83 + 84 + let sha256_of_cid (cid : Atp.Cid.t) : hash = 85 + Hash.sha256_of_bytes (Atp.Cid.digest cid) 86 + 87 + (* Our node wraps Atp.Mst.Raw.node for serialization *) 88 + type node = Atp.Mst.Raw.node 89 + 90 + let empty_node : node = { l = None; e = [] } 91 + 92 + let is_empty (node : node) = node.l = None && node.e = [] 93 + 94 + (* Decompress key from entry list *) 95 + let decompress_keys (entries : Atp.Mst.Raw.entry list) : (string * Atp.Mst.Raw.entry) list = 96 + let rec loop prev_key acc = function 97 + | [] -> List.rev acc 98 + | (e : Atp.Mst.Raw.entry) :: rest -> 99 + let key = String.sub prev_key 0 e.p ^ e.k in 100 + loop key ((key, e) :: acc) rest 101 + in 102 + loop "" [] entries 103 + 104 + let find (node : node) name = 105 + let entries = decompress_keys node.e in 106 + match List.find_opt (fun (k, _) -> k = name) entries with 107 + | None -> None 108 + | Some (_, e) -> 109 + (* In MST, all values are content CIDs, subtrees are in 't' field *) 110 + Some (`Contents (sha256_of_cid e.v)) 111 + 112 + (* Compress keys for serialization *) 113 + let compress_keys entries = 114 + let sorted = List.sort (fun (k1, _) (k2, _) -> String.compare k1 k2) entries in 115 + let rec loop prev_key acc = function 116 + | [] -> List.rev acc 117 + | (key, (v, t)) :: rest -> 118 + let p = 119 + let rec shared i = 120 + if i >= String.length prev_key || i >= String.length key then i 121 + else if prev_key.[i] = key.[i] then shared (i + 1) 122 + else i 123 + in 124 + shared 0 125 + in 126 + let k = String.sub key p (String.length key - p) in 127 + let entry : Atp.Mst.Raw.entry = { p; k; v; t } in 128 + loop key (entry :: acc) rest 129 + in 130 + loop "" [] sorted 131 + 132 + let add (node : node) name kind = 133 + let entries = decompress_keys node.e in 134 + let v, t = 135 + match kind with 136 + | `Contents h -> (cid_of_sha256 h, None) 137 + | `Node h -> (cid_of_sha256 h, None) (* TODO: Handle subtree pointers *) 138 + in 139 + let entries = List.filter (fun (k, _) -> k <> name) entries in 140 + let entries = (name, (v, None)) :: List.map (fun (k, e) -> (k, (e.v, e.t))) entries in 141 + let compressed = compress_keys entries in 142 + { node with e = compressed } 143 + 144 + let remove (node : node) name = 145 + let entries = decompress_keys node.e in 146 + let entries = List.filter (fun (k, _) -> k <> name) entries in 147 + let entries = List.map (fun (k, e) -> (k, (e.v, e.t))) entries in 148 + let compressed = compress_keys entries in 149 + { node with e = compressed } 150 + 151 + let list (node : node) = 152 + let entries = decompress_keys node.e in 153 + List.map (fun (key, e) -> (key, `Contents (sha256_of_cid e.v))) entries 154 + 155 + let bytes_of_node node = Atp.Mst.Raw.encode_bytes node 156 + 157 + let node_of_bytes data = 158 + try Ok (Atp.Mst.Raw.decode_bytes data) 159 + with _ -> Error (`Msg "failed to decode MST node") 160 + 161 + let hash_node node = 162 + let data = bytes_of_node node in 163 + Hash.sha256 data 164 + 165 + let hash_contents data = Hash.sha256 data 166 + end
+63
lib/tree_format.mli
··· 1 + (** Tree format abstraction - the ONE functor in Irmin. 2 + 3 + This module type defines how tree nodes are encoded and decoded. 4 + Different formats (Git trees, ATProto MST) implement this interface. 5 + This is the only functor-based abstraction point in the library. *) 6 + 7 + (** {1 Module Type} *) 8 + 9 + module type S = sig 10 + (** The tree format signature. *) 11 + 12 + type node 13 + (** The internal node representation. *) 14 + 15 + type hash 16 + (** The hash type used by this format. *) 17 + 18 + val hash_node : node -> hash 19 + (** [hash_node n] computes the hash of [n]. *) 20 + 21 + val hash_contents : string -> hash 22 + (** [hash_contents data] computes the hash of content [data]. *) 23 + 24 + val node_of_bytes : string -> (node, [> `Msg of string ]) result 25 + (** [node_of_bytes data] deserializes a node from bytes. *) 26 + 27 + val bytes_of_node : node -> string 28 + (** [bytes_of_node n] serializes a node to bytes. *) 29 + 30 + val empty_node : node 31 + (** The empty node with no entries. *) 32 + 33 + val find : node -> string -> [ `Node of hash | `Contents of hash ] option 34 + (** [find node name] looks up an entry by name. *) 35 + 36 + val add : node -> string -> [ `Node of hash | `Contents of hash ] -> node 37 + (** [add node name entry] adds or replaces an entry. *) 38 + 39 + val remove : node -> string -> node 40 + (** [remove node name] removes an entry. *) 41 + 42 + val list : node -> (string * [ `Node of hash | `Contents of hash ]) list 43 + (** [list node] returns all entries sorted by name. *) 44 + 45 + val is_empty : node -> bool 46 + (** [is_empty node] returns true if the node has no entries. *) 47 + end 48 + 49 + (** {1 Hash-Specific Signatures} *) 50 + 51 + module type SHA1 = S with type hash = Hash.sha1 52 + (** Tree format using SHA-1 (Git compatible). *) 53 + 54 + module type SHA256 = S with type hash = Hash.sha256 55 + (** Tree format using SHA-256 (ATProto compatible). *) 56 + 57 + (** {1 Built-in Formats} *) 58 + 59 + module Git : SHA1 60 + (** Git tree object format. Bidirectional compatibility with Git. *) 61 + 62 + module Mst : SHA256 63 + (** ATProto Merkle Search Tree format. *)
+3
test/dune
··· 1 + (test 2 + (name test_irmin) 3 + (libraries irmin alcotest eio_main))
+173
test/test_irmin.ml
··· 1 + open Irmin 2 + 3 + (* Hash tests *) 4 + let test_sha1_hash () = 5 + let h = Hash.sha1 "hello" in 6 + let hex = Hash.to_hex h in 7 + Alcotest.(check string) "sha1 hex length" (String.make 40 '0') (String.make (String.length hex) '0'); 8 + Alcotest.(check int) "sha1 bytes length" 20 (String.length (Hash.to_bytes h)) 9 + 10 + let test_sha256_hash () = 11 + let h = Hash.sha256 "hello" in 12 + let hex = Hash.to_hex h in 13 + Alcotest.(check string) "sha256 hex length" (String.make 64 '0') (String.make (String.length hex) '0'); 14 + Alcotest.(check int) "sha256 bytes length" 32 (String.length (Hash.to_bytes h)) 15 + 16 + let test_hash_roundtrip () = 17 + let h1 = Hash.sha1 "test data" in 18 + let hex = Hash.to_hex h1 in 19 + match Hash.sha1_of_hex hex with 20 + | Ok h2 -> Alcotest.(check bool) "roundtrip" true (Hash.equal h1 h2) 21 + | Error (`Msg msg) -> Alcotest.fail msg 22 + 23 + let test_mst_depth () = 24 + (* Test MST depth calculation *) 25 + let h = Hash.sha256 "test" in 26 + let depth = Hash.mst_depth h in 27 + Alcotest.(check bool) "depth >= 0" true (depth >= 0) 28 + 29 + (* Tree tests *) 30 + let test_empty_tree () = 31 + let tree = Tree.Git.empty () in 32 + Alcotest.(check (option string)) "find empty" None (Tree.Git.find tree ["foo"]) 33 + 34 + let test_tree_add_find () = 35 + let tree = Tree.Git.empty () in 36 + let tree = Tree.Git.add tree ["foo"; "bar"] "content" in 37 + Alcotest.(check (option string)) "find added" (Some "content") (Tree.Git.find tree ["foo"; "bar"]) 38 + 39 + let test_tree_remove () = 40 + let tree = Tree.Git.empty () in 41 + let tree = Tree.Git.add tree ["foo"] "content" in 42 + let tree = Tree.Git.remove tree ["foo"] in 43 + Alcotest.(check (option string)) "find removed" None (Tree.Git.find tree ["foo"]) 44 + 45 + let test_tree_overwrite () = 46 + let tree = Tree.Git.empty () in 47 + let tree = Tree.Git.add tree ["key"] "value1" in 48 + let tree = Tree.Git.add tree ["key"] "value2" in 49 + Alcotest.(check (option string)) "find overwritten" (Some "value2") (Tree.Git.find tree ["key"]) 50 + 51 + let test_tree_nested () = 52 + let tree = Tree.Git.empty () in 53 + let tree = Tree.Git.add tree ["a"; "b"; "c"] "deep" in 54 + let tree = Tree.Git.add tree ["a"; "x"] "shallow" in 55 + Alcotest.(check (option string)) "find deep" (Some "deep") (Tree.Git.find tree ["a"; "b"; "c"]); 56 + Alcotest.(check (option string)) "find shallow" (Some "shallow") (Tree.Git.find tree ["a"; "x"]) 57 + 58 + (* Backend tests *) 59 + let test_memory_backend () = 60 + let backend = Backend.Memory.create_sha1 () in 61 + let data = "test content" in 62 + let hash = backend.write data in 63 + Alcotest.(check (option string)) "read back" (Some data) (backend.read hash) 64 + 65 + let test_backend_refs () = 66 + let backend = Backend.Memory.create_sha1 () in 67 + let hash = backend.write "content" in 68 + backend.set_ref "refs/heads/main" hash; 69 + Alcotest.(check bool) "ref exists" true (Option.is_some (backend.get_ref "refs/heads/main")); 70 + match backend.get_ref "refs/heads/main" with 71 + | Some h -> Alcotest.(check bool) "ref matches" true (Hash.equal hash h) 72 + | None -> Alcotest.fail "ref not found" 73 + 74 + let test_backend_test_and_set () = 75 + let backend = Backend.Memory.create_sha1 () in 76 + let h1 = backend.write "content1" in 77 + let h2 = backend.write "content2" in 78 + backend.set_ref "ref" h1; 79 + 80 + (* Should fail with wrong test value *) 81 + let result = backend.test_and_set_ref "ref" ~test:(Some h2) ~set:(Some h2) in 82 + Alcotest.(check bool) "wrong test fails" false result; 83 + 84 + (* Should succeed with correct test value *) 85 + let result = backend.test_and_set_ref "ref" ~test:(Some h1) ~set:(Some h2) in 86 + Alcotest.(check bool) "correct test succeeds" true result 87 + 88 + (* Store tests *) 89 + let test_store_commit () = 90 + let backend = Backend.Memory.create_sha1 () in 91 + let store = Store.Git.create ~backend in 92 + let tree = Tree.Git.empty () in 93 + let tree = Tree.Git.add tree ["README.md"] "# Hello" in 94 + let hash = Store.Git.commit store ~tree ~parents:[] ~message:"Initial commit" ~author:"test" in 95 + Alcotest.(check bool) "commit hash exists" true (backend.exists hash) 96 + 97 + let test_store_branches () = 98 + let backend = Backend.Memory.create_sha1 () in 99 + let store = Store.Git.create ~backend in 100 + let tree = Tree.Git.empty () in 101 + let hash = Store.Git.commit store ~tree ~parents:[] ~message:"test" ~author:"test" in 102 + Store.Git.set_head store ~branch:"main" hash; 103 + let branches = Store.Git.branches store in 104 + Alcotest.(check (list string)) "branches" ["main"] branches 105 + 106 + (* Tree format tests *) 107 + let test_git_tree_format () = 108 + let node = Tree_format.Git.empty_node in 109 + Alcotest.(check bool) "empty is empty" true (Tree_format.Git.is_empty node); 110 + let h = Hash.sha1 "content" in 111 + let node = Tree_format.Git.add node "file.txt" (`Contents h) in 112 + Alcotest.(check bool) "not empty after add" false (Tree_format.Git.is_empty node); 113 + match Tree_format.Git.find node "file.txt" with 114 + | Some (`Contents h') -> Alcotest.(check bool) "find matches" true (Hash.equal h h') 115 + | _ -> Alcotest.fail "entry not found" 116 + 117 + let test_git_tree_serialization () = 118 + let h = Hash.sha1 "content" in 119 + let node = Tree_format.Git.empty_node in 120 + let node = Tree_format.Git.add node "file.txt" (`Contents h) in 121 + let bytes = Tree_format.Git.bytes_of_node node in 122 + match Tree_format.Git.node_of_bytes bytes with 123 + | Ok node' -> 124 + let entries = Tree_format.Git.list node' in 125 + Alcotest.(check int) "one entry" 1 (List.length entries) 126 + | Error (`Msg msg) -> Alcotest.fail msg 127 + 128 + (* Test suites *) 129 + let hash_tests = 130 + [ 131 + Alcotest.test_case "sha1 hash" `Quick test_sha1_hash; 132 + Alcotest.test_case "sha256 hash" `Quick test_sha256_hash; 133 + Alcotest.test_case "hash roundtrip" `Quick test_hash_roundtrip; 134 + Alcotest.test_case "mst depth" `Quick test_mst_depth; 135 + ] 136 + 137 + let tree_tests = 138 + [ 139 + Alcotest.test_case "empty tree" `Quick test_empty_tree; 140 + Alcotest.test_case "tree add/find" `Quick test_tree_add_find; 141 + Alcotest.test_case "tree remove" `Quick test_tree_remove; 142 + Alcotest.test_case "tree overwrite" `Quick test_tree_overwrite; 143 + Alcotest.test_case "tree nested" `Quick test_tree_nested; 144 + ] 145 + 146 + let backend_tests = 147 + [ 148 + Alcotest.test_case "memory backend" `Quick test_memory_backend; 149 + Alcotest.test_case "backend refs" `Quick test_backend_refs; 150 + Alcotest.test_case "backend test_and_set" `Quick test_backend_test_and_set; 151 + ] 152 + 153 + let store_tests = 154 + [ 155 + Alcotest.test_case "store commit" `Quick test_store_commit; 156 + Alcotest.test_case "store branches" `Quick test_store_branches; 157 + ] 158 + 159 + let tree_format_tests = 160 + [ 161 + Alcotest.test_case "git tree format" `Quick test_git_tree_format; 162 + Alcotest.test_case "git tree serialization" `Quick test_git_tree_serialization; 163 + ] 164 + 165 + let () = 166 + Alcotest.run "Irmin" 167 + [ 168 + ("Hash", hash_tests); 169 + ("Tree", tree_tests); 170 + ("Backend", backend_tests); 171 + ("Store", store_tests); 172 + ("Tree_format", tree_format_tests); 173 + ]