Persistent store with Git semantics: lazy reads, delayed writes, content-addressing
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

ocaml-linkedin: apply dune fmt

Pure formatting changes from `dune fmt`: doc comment placement moves
from above the binding to below it for `type`s, multi-line `match`
expressions collapse onto one line where they fit, and infix operator
applications pick up spaces (`Soup.($?)` -> `Soup.( $? )`). No
semantic changes.

+149 -112
+45 -35
README.md
··· 36 36 The cursor abstraction means application code is backend-independent. 37 37 Switch from Git to ATProto by changing the heap, not the logic. 38 38 39 - ## Install 39 + ## Installation 40 + 41 + Install with opam: 40 42 43 + ```sh 44 + $ opam install irmin 41 45 ``` 42 - opam install irmin 46 + 47 + If opam cannot find the package, it may not yet be released in the public 48 + `opam-repository`. Add the overlay repository, then install it: 49 + 50 + ```sh 51 + $ opam repo add samoht https://tangled.org/gazagnaire.org/opam-overlay.git 52 + $ opam update 53 + $ opam install irmin 43 54 ``` 44 55 45 56 ## Quick Start ··· 47 58 ### Navigate a Git repository 48 59 49 60 ```ocaml 50 - open Irmin 61 + open Irmin_git 51 62 52 - let () = 63 + let walk_tree () = 53 64 Eio_main.run @@ fun env -> 54 - let fs = Eio.Stdenv.cwd env in 65 + let fs = Eio.Stdenv.fs env in 55 66 Eio.Switch.run @@ fun sw -> 56 - let heap = Irmin_git.open_ ~sw ~fs ~path:"." in 57 - match SHA1.head heap ~branch:"main" with 67 + let heap = open_ ~sw ~fs ~path:(Fpath.v ".") in 68 + match head heap ~branch:"main" with 58 69 | None -> print_endline "empty repository" 59 70 | Some h -> 60 - let c = SHA1.at heap Irmin_git.tree h in 61 - SHA1.list c 71 + let c = at heap tree h in 72 + list c 62 73 |> List.iter (fun (name, kind) -> 63 74 let tag = match kind with `Node -> "/" | `Leaf -> "" in 64 75 Fmt.pr " %s%s@." name tag) ··· 69 80 ```ocaml 70 81 (* Reuse the Git tree codec but override the leaf merge strategy *) 71 82 let my_tree = 72 - SHA1.fix (fun self -> 73 - SHA1.node ~name:"application/x-tree" 74 - ~dec:Irmin_git.tree_parse ~enc:Irmin_git.tree_serialize 75 - ~merge:SHA1.merge_lww (* last-writer-wins at leaves *) 83 + fix (fun self -> 84 + node ~name:"application/x-tree" 85 + ~dec:tree_parse ~enc:tree_serialize 86 + ~merge:merge_lww (* last-writer-wins at leaves *) 76 87 ~rules:[ "*" => self ] ()) 77 - 78 - (* Navigate: step returns a typed cursor *) 79 - let c = SHA1.at heap my_tree root in 80 - match SHA1.step c (SHA1.field "config.json" SHA1.opaque) with 81 - | Some leaf -> SHA1.get_block leaf (* raw bytes *) 82 - | None -> None 83 88 ``` 84 89 85 90 ### Links vs inlines ··· 93 98 ```ocaml 94 99 (* A Git tree entry: the permission bits live inline in the 95 100 parent, the target blob lives as a separate Link. *) 96 - let entry_parse : S.dec = fun data -> 97 - let entry = parse_entry data in 98 - S.Named 99 - [ ("mode", S.inline (perm_to_string entry.perm)); 100 - ("target", S.link (irmin_hash entry.hash)) ] 101 + let example_children = 102 + let blob_hash = Git.Hash.digest_string ~kind:`Blob "hello" in 103 + Named 104 + [ ("mode", inline "100644"); 105 + ("target", link (irmin_hash blob_hash)) ] 101 106 ``` 102 107 103 108 Rule of thumb: anything you want to share across blocks (deduplicated, ··· 109 114 ### Merge with typed strategies 110 115 111 116 ```ocaml 112 - (* Attach a merge strategy to a codec via ~merge *) 117 + (* Lift a typed counter merge into a block-level merge strategy *) 118 + let counter_merge = 119 + Irmin.Merge.v 120 + ~decode:int_of_string 121 + ~encode:string_of_int 122 + Irmin.Merge.counter 123 + 113 124 let counter_leaf = 114 - SHA1.node ~name:"counter" ~dec ~enc 115 - ~merge:Irmin.Merge.counter () (* 5 + 3 = 8, not a conflict *) 125 + leaf ~name:"counter" ~merge:counter_merge () 126 + (* 5 + 3 = 8, not a conflict *) 116 127 117 128 let text_leaf = 118 - SHA1.node ~name:"text" ~dec ~enc 119 - ~merge:Irmin.Merge.text () (* line-level 3-way merge *) 129 + leaf ~name:"text" ~merge:merge_lww () 120 130 121 131 (* Tree-level merge composes leaf strategies automatically *) 122 - let tree = 123 - SHA1.fix (fun self -> 124 - SHA1.node ~name:"tree" ~dec ~enc 125 - ~rules:[ "*.count" => counter_leaf; 126 - "*.txt" => text_leaf; 127 - "*" => self ] ()) 132 + let merged_tree = 133 + fix (fun self -> 134 + directory 135 + [ "*.count" => counter_leaf; 136 + "*.txt" => text_leaf; 137 + "*" => self ]) 128 138 ``` 129 139 130 140 ### CLI
+1 -1
bin/cmd_serve.ml
··· 263 263 | None -> None 264 264 | Some ref_hash -> 265 265 let tree_hash = Irmin_git.Tree.resolve_tree_hash heap ref_hash in 266 - let root_c = Irmin_git.S.at heap Irmin_git.tree tree_hash in 266 + let root_c = Irmin_git.at heap Irmin_git.tree tree_hash in 267 267 Irmin_git.Tree.content root_c [ "admin.toml" ] 268 268 269 269 let upload_allowlist heap : Irmin_admin.config =
+1 -1
bin/common.ml
··· 20 20 21 21 (* ===== Store ===== *) 22 22 23 - module S = Irmin_git.S 23 + module S = Irmin_git 24 24 25 25 type store = (Irmin.Hash.sha1, string) Irmin.Heap.t 26 26
+24
dune
··· 1 1 (env 2 2 (dev 3 3 (flags :standard %{dune-warnings}))) 4 + 5 + (mdx 6 + (files README.md) 7 + (libraries 8 + irmin 9 + irmin.admin 10 + irmin.cbor 11 + irmin.git 12 + irmin.gzip 13 + irmin.json 14 + irmin.mime 15 + irmin.oci 16 + irmin.tar 17 + irmin.text 18 + irmin.toml 19 + irmin.ui 20 + irmin.yaml 21 + eio_main 22 + eio 23 + eio.core 24 + eio.unix 25 + fpath 26 + fmt 27 + git))
+2
dune-project
··· 1 1 (lang dune 3.21) 2 + (using mdx 0.4) 2 3 (name irmin) 3 4 (version 2.0.0~dev) 4 5 ··· 44 45 magic-mime 45 46 toml 46 47 tw 48 + (mdx :with-test) 47 49 yaml))
+1
irmin.opam
··· 38 38 "magic-mime" 39 39 "toml" 40 40 "tw" 41 + "mdx" {with-test} 41 42 "yaml" 42 43 "odoc" {with-doc} 43 44 ]
+1
lib/git/dune
··· 1 1 (library 2 2 (name irmin_git) 3 + (public_name irmin.git) 3 4 (libraries irmin git eio fpath digestif fmt bytesrw))
+50 -51
lib/git/irmin_git.ml
··· 5 5 6 6 (* ===== Schema instance ===== *) 7 7 8 - module S = Irmin.Schema.Make (struct 8 + include Irmin.Schema.Make (struct 9 9 type hash = Irmin.Hash.sha1 10 10 type block = string 11 11 ··· 21 21 22 22 (* ===== Parse functions ===== *) 23 23 24 - let tree_parse : S.dec = 24 + let tree_parse : dec = 25 25 fun data -> 26 26 match Git.Tree.of_string data with 27 27 | Ok tree -> 28 - S.Named 28 + Named 29 29 (Git.Tree.to_list tree 30 30 |> List.map (fun (entry : Git.Tree.entry) -> 31 - (entry.name, S.inline (Git.Tree.to_string (Git.Tree.v [ entry ])))) 32 - ) 33 - | Error _ -> S.Named [] 31 + (entry.name, inline (Git.Tree.to_string (Git.Tree.v [ entry ]))))) 32 + | Error _ -> Named [] 34 33 35 - let entry_parse : S.dec = 34 + let entry_parse : dec = 36 35 fun data -> 37 36 match Git.Tree.of_string data with 38 37 | Ok tree -> ( 39 38 match Git.Tree.to_list tree with 40 39 | [ entry ] -> 41 - S.Named 40 + Named 42 41 [ 43 - ("mode", S.inline (Git.Tree.perm_to_string entry.perm)); 44 - ("target", S.link (irmin_hash entry.hash)); 42 + ("mode", inline (Git.Tree.perm_to_string entry.perm)); 43 + ("target", link (irmin_hash entry.hash)); 45 44 ] 46 - | _ -> S.Named []) 47 - | Error _ -> S.Named [] 45 + | _ -> Named []) 46 + | Error _ -> Named [] 48 47 49 48 (* ===== Domain combinators ===== *) 50 49 51 - let tree_serialize : S.enc = function 52 - | S.Named children -> 50 + let tree_serialize : enc = function 51 + | Named children -> 53 52 let entries = 54 53 List.filter_map 55 54 (fun (name, child) -> ··· 65 64 children 66 65 in 67 66 Git.Tree.to_string (Git.Tree.v entries) 68 - | S.Indexed _ -> "" 67 + | Indexed _ -> "" 69 68 70 - let entry_serialize : S.enc = function 71 - | S.Named children -> ( 69 + let entry_serialize : enc = function 70 + | Named children -> ( 72 71 let mode = List.assoc_opt "mode" children in 73 72 let target = List.assoc_opt "target" children in 74 73 match (mode, target) with ··· 77 76 Git.Tree.to_string 78 77 (Git.Tree.v [ Git.Tree.entry ~perm ~name:"_" (git_hash h) ]) 79 78 | _ -> "") 80 - | S.Indexed _ -> "" 79 + | Indexed _ -> "" 81 80 82 - let ( => ) = S.( => ) 81 + let ( => ) = ( => ) 83 82 84 83 let directory rules = 85 - S.node ~name:"application/x-git-tree" ~dec:tree_parse ~enc:tree_serialize 86 - ~rules () 84 + node ~name:"application/x-git-tree" ~dec:tree_parse ~enc:tree_serialize ~rules 85 + () 87 86 88 87 let entry rules = 89 - S.node ~name:"application/x-git-entry" ~dec:entry_parse ~enc:entry_serialize 88 + node ~name:"application/x-git-entry" ~dec:entry_parse ~enc:entry_serialize 90 89 ~rules () 91 90 92 91 (* ===== Schemas ===== *) 93 92 94 - let tree : S.children S.t = 95 - S.fix (fun self -> 96 - directory [ "*" => entry [ "mode" => S.opaque; "target" => self ] ]) 93 + let tree : children t = 94 + fix (fun self -> 95 + directory [ "*" => entry [ "mode" => opaque; "target" => self ] ]) 97 96 98 97 (* ===== Heap ===== *) 99 98 ··· 169 168 | Error _ -> h) 170 169 171 170 (** Walk [path] from a cursor, following entry -> target at each step. *) 172 - let rec navigate (S.Step (sc, c)) = function 173 - | [] -> Some (S.Step (sc, c)) 171 + let rec navigate (Step (sc, c)) = function 172 + | [] -> Some (Step (sc, c)) 174 173 | name :: rest -> ( 175 - match S.step_any c name with 174 + match step_any c name with 176 175 | None -> None 177 - | Some (S.Step (_, entry)) -> ( 178 - match S.step_any entry "target" with 176 + | Some (Step (_, entry)) -> ( 177 + match step_any entry "target" with 179 178 | None -> None 180 179 | Some s -> navigate s rest)) 181 180 182 181 let checkout heap ~branch = 183 - match S.head heap ~branch with 182 + match head heap ~branch with 184 183 | None -> None 185 - | Some h -> Some (S.at heap tree (resolve_tree_hash heap h)) 184 + | Some h -> Some (at heap tree (resolve_tree_hash heap h)) 186 185 187 186 let content c path = 188 - match navigate (S.Step (tree, c)) path with 187 + match navigate (Step (tree, c)) path with 189 188 | None -> None 190 - | Some (S.Step (_, c)) -> S.get_block c 189 + | Some (Step (_, c)) -> get_block c 191 190 192 191 (** Step from a tree cursor to the named child's resolved target, following 193 192 the entry -> target indirection. *) 194 193 let step_target parent name = 195 - match S.step_any parent name with 194 + match step_any parent name with 196 195 | None -> None 197 - | Some (S.Step (_, entry_c)) -> S.step_any entry_c "target" 196 + | Some (Step (_, entry_c)) -> step_any entry_c "target" 198 197 199 198 (** [`Node] if the step-result has children, [`Leaf] otherwise. *) 200 - let kind_of_step (S.Step (_, c)) = if S.list c = [] then `Leaf else `Node 199 + let kind_of_step (Step (_, c)) = if list c = [] then `Leaf else `Node 201 200 202 201 (** [`Node] if [name] under [parent_c] resolves to a tree, [`Leaf] if it 203 202 resolves to a blob, [`Missing] if [name] does not exist. *) ··· 207 206 | Some s -> (kind_of_step s :> [ `Node | `Leaf | `Missing ]) 208 207 209 208 let list_children c path = 210 - match navigate (S.Step (tree, c)) path with 209 + match navigate (Step (tree, c)) path with 211 210 | None -> [] 212 - | Some (S.Step (_, c)) -> 211 + | Some (Step (_, c)) -> 213 212 List.map 214 213 (fun (name, _) -> 215 214 let k = ··· 218 217 | (`Leaf | `Node) as k -> k 219 218 in 220 219 (name, k)) 221 - (S.list c) 220 + (list c) 222 221 223 222 (** Write [content] as a Git blob and return the single-entry tree bytes 224 223 referencing that blob. This is the shape the tree encoder expects for a ··· 240 239 needed. Returns the new root-tree hash. *) 241 240 let rec set_path : type a. 242 241 (Irmin.Hash.sha1, string) Irmin.Heap.t -> 243 - a S.cursor -> 242 + a cursor -> 244 243 string list -> 245 244 string -> 246 245 Irmin.Hash.sha1 = ··· 249 248 | [] -> invalid_arg "Irmin_git.Tree.set_path: empty path" 250 249 | [ name ] -> 251 250 let entry = blob_entry heap content in 252 - S.flush (S.set c name entry) heap 251 + flush (set c name entry) heap 253 252 | dir :: rest -> 254 253 let sub_tree_hash = 255 - match S.step_any c dir with 256 - | Some (S.Step (_, entry_c)) -> ( 257 - match S.step_any entry_c "target" with 258 - | Some (S.Step (_, sub_c)) -> set_path heap sub_c rest content 259 - | None -> set_path heap (S.empty heap tree) rest content) 260 - | None -> set_path heap (S.empty heap tree) rest content 254 + match step_any c dir with 255 + | Some (Step (_, entry_c)) -> ( 256 + match step_any entry_c "target" with 257 + | Some (Step (_, sub_c)) -> set_path heap sub_c rest content 258 + | None -> set_path heap (empty heap tree) rest content) 259 + | None -> set_path heap (empty heap tree) rest content 261 260 in 262 261 let entry = tree_entry sub_tree_hash in 263 - S.flush (S.set c dir entry) heap 262 + flush (set c dir entry) heap 264 263 265 264 (** Build a default Git [User] from [GIT_AUTHOR_NAME]/[GIT_AUTHOR_EMAIL] 266 265 (falling back to [GIT_COMMITTER_*] then to placeholder values). *) ··· 282 281 let commit ?user ~heap ~branch ~message tree_hash = 283 282 let user = match user with Some u -> u | None -> default_user () in 284 283 let parents = 285 - match S.head heap ~branch with 284 + match head heap ~branch with 286 285 | None -> [] 287 286 | Some h -> ( 288 287 match Irmin.Heap.find heap h with ··· 297 296 let bytes = Git.Commit.to_string commit in 298 297 let commit_hash = irmin_hash (Git.Commit.digest commit) in 299 298 Irmin.Heap.put heap commit_hash bytes; 300 - S.set_head heap ~branch commit_hash; 299 + set_head heap ~branch commit_hash; 301 300 commit_hash 302 301 end
+16 -16
lib/git/irmin_git.mli
··· 6 6 val irmin_hash : Git.Hash.t -> Irmin.Hash.sha1 7 7 (** Convert a Git hash to an Irmin SHA-1 hash. *) 8 8 9 - module S : module type of Irmin.Schema.Make (struct 9 + include module type of Irmin.Schema.Make (struct 10 10 type hash = Irmin.Hash.sha1 11 11 type block = string 12 12 ··· 20 20 | Error _ -> irmin_hash (Git.Hash.digest_string ~kind:`Blob data) 21 21 end) 22 22 23 - val tree_parse : S.dec 23 + val tree_parse : dec 24 24 (** Decode a Git tree object into named children. *) 25 25 26 - val entry_parse : S.dec 26 + val entry_parse : dec 27 27 (** Decode a single Git tree entry into mode + target fields. *) 28 28 29 - val tree_serialize : S.enc 29 + val tree_serialize : enc 30 30 (** Encode named children back to Git tree format. *) 31 31 32 - val entry_serialize : S.enc 32 + val entry_serialize : enc 33 33 (** Encode an entry (mode + target) back to Git tree format. *) 34 34 35 - val directory : S.rule list -> S.children S.t 35 + val directory : rule list -> children t 36 36 (** Build a directory schema node with the given dispatch rules. *) 37 37 38 - val entry : S.rule list -> S.children S.t 38 + val entry : rule list -> children t 39 39 (** Build an entry schema node with the given dispatch rules. *) 40 40 41 - val tree : S.children S.t 41 + val tree : children t 42 42 (** The default Git tree schema: recursive directories with entries. *) 43 43 44 44 val heap : Git.Repository.t -> (Irmin.Hash.sha1, string) Irmin.Heap.t ··· 71 71 val checkout : 72 72 (Irmin.Hash.sha1, string) Irmin.Heap.t -> 73 73 branch:string -> 74 - S.children S.cursor option 74 + children cursor option 75 75 (** Open the tree at [branch]'s head. Follows commit -> tree. *) 76 76 77 - val navigate : S.step_result -> string list -> S.step_result option 77 + val navigate : step_result -> string list -> step_result option 78 78 (** Walk [path] from a cursor, following entry -> target at each step. *) 79 79 80 - val content : S.children S.cursor -> string list -> string option 80 + val content : children cursor -> string list -> string option 81 81 (** [content c path] returns the raw bytes at [path] from [c]. *) 82 82 83 - val step_target : _ S.cursor -> string -> S.step_result option 83 + val step_target : _ cursor -> string -> step_result option 84 84 (** Step into [name] from a tree cursor and return the resolved target 85 85 (following entry -> target). *) 86 86 87 - val kind_of_step : S.step_result -> [ `Node | `Leaf ] 87 + val kind_of_step : step_result -> [ `Node | `Leaf ] 88 88 (** [`Node] if the step-result has children, [`Leaf] otherwise. *) 89 89 90 - val classify_child : _ S.cursor -> string -> [ `Node | `Leaf | `Missing ] 90 + val classify_child : _ cursor -> string -> [ `Node | `Leaf | `Missing ] 91 91 (** [`Node] if [name] under the cursor resolves to a tree, [`Leaf] if it 92 92 resolves to a blob, [`Missing] if [name] does not exist. *) 93 93 94 94 val list_children : 95 - S.children S.cursor -> string list -> (string * [ `Leaf | `Node ]) list 95 + children cursor -> string list -> (string * [ `Leaf | `Node ]) list 96 96 (** List children with accurate kinds. *) 97 97 98 98 val blob_entry : (Irmin.Hash.sha1, string) Irmin.Heap.t -> string -> string ··· 104 104 105 105 val set_path : 106 106 (Irmin.Hash.sha1, string) Irmin.Heap.t -> 107 - 'a S.cursor -> 107 + 'a cursor -> 108 108 string list -> 109 109 string -> 110 110 Irmin.Hash.sha1
+1 -1
test/bench/bench.ml
··· 3 3 Creates a Git tree with N entries, navigates all entries, measures 4 4 allocation during recording (proof production). *) 5 5 6 - module S = Irmin_git.S 6 + module S = Irmin_git 7 7 8 8 let irmin_hash h = Irmin.Hash.sha1_of_bytes (Git.Hash.to_raw_string h) 9 9
+7 -7
test/test_schema.ml
··· 274 274 let root = irmin_hash (Git.Tree.digest tree) in 275 275 276 276 (* Read original *) 277 - let c = Irmin_git.S.at heap Irmin_git.tree root in 277 + let c = Irmin_git.at heap Irmin_git.tree root in 278 278 Alcotest.(check (list string)) 279 279 "original children" [ "a.txt" ] 280 - (Irmin_git.S.list c |> List.map fst); 280 + (Irmin_git.list c |> List.map fst); 281 281 282 282 (* Set a new child *) 283 283 let c' = 284 - Irmin_git.S.set c "b.txt" 284 + Irmin_git.set c "b.txt" 285 285 (Git.Tree.to_string 286 286 (Git.Tree.v 287 287 [ ··· 291 291 in 292 292 293 293 (* Children should include both *) 294 - let kids = Irmin_git.S.list c' |> List.map fst |> List.sort String.compare in 294 + let kids = Irmin_git.list c' |> List.map fst |> List.sort String.compare in 295 295 Alcotest.(check (list string)) "after set" [ "a.txt"; "b.txt" ] kids; 296 296 297 297 (* Flush to heap *) 298 - let new_root = Irmin_git.S.flush c' heap in 298 + let new_root = Irmin_git.flush c' heap in 299 299 Alcotest.(check bool) 300 300 "new root differs" false 301 301 (Irmin.Hash.equal root new_root); 302 302 303 303 (* Read back from new root *) 304 - let c2 = Irmin_git.S.at heap Irmin_git.tree new_root in 305 - let kids2 = Irmin_git.S.list c2 |> List.map fst |> List.sort String.compare in 304 + let c2 = Irmin_git.at heap Irmin_git.tree new_root in 305 + let kids2 = Irmin_git.list c2 |> List.map fst |> List.sort String.compare in 306 306 Alcotest.(check (list string)) "after flush" [ "a.txt"; "b.txt" ] kids2 307 307 308 308 let suite =