Persistent store with Git semantics: lazy reads, delayed writes, content-addressing
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

irmin: codec-based schema with MIME names, Type.Id, and typed fields

Replace the GADT-shaped schema with a record-based codec carrying a
MIME-style name and a Type.Id witness. This unifies "schema" and
"content type" into one concept, modelled after IPLD codecs and HTTP
content negotiation.

API changes:
- 'a t now means "codec for blocks of value type 'a"
- node ~name ~dec ~enc ?merge ?rules () : children t
- opaque : H.block t (leaf, no navigation)
- map ~dec ~enc inner : transforms value type, keeps navigation
- fix : ('a t -> 'a t) -> 'a t (recursion)
- name, id (Type.Id witness) for runtime introspection
- ('a, 'b) field: typed accessor (path + expected codec)
- step (typed): cursor -> field -> 'b cursor option
- step_any (untyped): returns Step (codec, cursor) existential
- cast: recover typed cursor via Type.Id equality
- Step constructor now carries (codec, cursor) for type recovery
- step_result type renamed (was step) to avoid clash with val step

All backends declare a MIME name like "application/json",
"application/x-git-tree", "application/dag-cbor".

11 tests pass (6 schema + 5 tar), mst_proof verified.

+557 -393
+3 -3
bin/cmd_del.ml
··· 21 21 let name = match List.rev steps with n :: _ -> n | [] -> path in 22 22 let parent_c = 23 23 match parent_steps with 24 - | [] -> Some (S.Step c) 25 - | _ -> Common.navigate (S.Step c) parent_steps 24 + | [] -> Some (S.Step (Irmin_git.tree, c)) 25 + | _ -> Common.navigate (S.Step (Irmin_git.tree, c)) parent_steps 26 26 in 27 27 match parent_c with 28 28 | None -> 29 29 Common.error "Path %a not found" Common.styled_cyan path; 30 30 1 31 - | Some (S.Step pc) -> 31 + | Some (S.Step (_, pc)) -> 32 32 let pc = S.remove pc name in 33 33 let new_hash = S.flush pc heap in 34 34 S.set_head heap ~branch new_hash;
+7 -4
bin/cmd_proof.ml
··· 49 49 let ( => ) = S.( => ) 50 50 51 51 let tree = 52 - S.fix (fun self -> S.node ~dec:dir_parse ~enc:dir_serialize [ "*" => self ]) 52 + S.fix (fun self -> 53 + S.node ~name:"application/x-flat-tree" ~dec:dir_parse ~enc:dir_serialize 54 + ~rules:[ "*" => self ] 55 + ()) 53 56 54 57 (** Build a flat tree from key=value pairs. Each key is a direct child of the 55 58 root; values are stored as blobs. *) ··· 115 118 let proof, result = 116 119 S.produce heap tree root (fun c -> 117 120 let v = S.find c [ key ] in 118 - (S.Step c, v)) 121 + (S.Step (tree, c), v)) 119 122 in 120 123 let hash_str h = 121 124 let hex = Digestif.SHA256.to_hex h in ··· 145 148 let proof, _ = 146 149 S.produce heap tree root (fun c -> 147 150 let v = S.find c [ key ] in 148 - (S.Step c, v)) 151 + (S.Step (tree, c), v)) 149 152 in 150 153 match 151 154 S.verify proof tree (fun c -> 152 155 let v = S.find c [ key ] in 153 - (S.Step c, v)) 156 + (S.Step (tree, c), v)) 154 157 with 155 158 | Ok v -> 156 159 (match output with
+3 -3
bin/cmd_set.ml
··· 26 26 (* Navigate to the parent path *) 27 27 let parent_c = 28 28 match parent_steps with 29 - | [] -> Some (S.Step c) 30 - | _ -> Common.navigate (S.Step c) parent_steps 29 + | [] -> Some (S.Step (Irmin_git.tree, c)) 30 + | _ -> Common.navigate (S.Step (Irmin_git.tree, c)) parent_steps 31 31 in 32 32 let new_hash = 33 33 match parent_c with 34 - | Some (S.Step pc) -> S.flush (S.set pc name content) heap 34 + | Some (S.Step (_, pc)) -> S.flush (S.set pc name content) heap 35 35 | None -> 36 36 (* Parent path doesn't exist yet; set at the top level *) 37 37 S.flush (S.set c name content) heap
+4 -4
bin/cmd_tree.ml
··· 19 19 (* Navigate to the start path *) 20 20 let start_c = 21 21 match start with 22 - | [] -> Some (S.Step root_c) 23 - | _ -> Common.navigate (S.Step root_c) start 22 + | [] -> Some (S.Step (Irmin_git.tree, root_c)) 23 + | _ -> Common.navigate (S.Step (Irmin_git.tree, root_c)) start 24 24 in 25 25 match start_c with 26 26 | None -> ··· 28 28 (Option.value ~default:"/" path); 29 29 1 30 30 | Some s -> 31 - let rec walk indent prefix (S.Step c) = 31 + let rec walk indent prefix (S.Step (sc, c)) = 32 32 let entries = S.list c in 33 33 List.iter 34 34 (fun (name, kind) -> ··· 47 47 Fmt.pr {|{"path":%S,"type":"dir"}@.|} 48 48 (String.concat "/" full_path)); 49 49 (* Navigate into child using Git's entry -> target pattern *) 50 - match Common.navigate (S.Step c) [ name ] with 50 + match Common.navigate (S.Step (sc, c)) [ name ] with 51 51 | Some child -> walk (indent ^ " ") full_path child 52 52 | None -> ())) 53 53 entries
+10 -8
bin/common.ml
··· 38 38 | Some h -> Some (S.at heap Irmin_git.tree h) 39 39 40 40 (** Navigate a Git tree, following entry → target links at each step. *) 41 - let rec navigate (S.Step c) = function 42 - | [] -> Some (S.Step c) 41 + let rec navigate (S.Step (sc, c)) = function 42 + | [] -> Some (S.Step (sc, c)) 43 43 | name :: rest -> ( 44 - match S.step c name with 44 + match S.step_any c name with 45 45 | None -> None 46 - | Some (S.Step entry) -> ( 47 - match S.step entry "target" with 46 + | Some (S.Step (_, entry)) -> ( 47 + match S.step_any entry "target" with 48 48 | None -> None 49 49 | Some s -> navigate s rest)) 50 50 51 51 let find_content c path = 52 - match navigate (S.Step c) path with 52 + match navigate (S.Step (Irmin_git.tree, c)) path with 53 53 | None -> None 54 - | Some (S.Step c) -> S.get_block c 54 + | Some (S.Step (_, c)) -> S.get_block c 55 55 56 56 let list_children c path = 57 - match navigate (S.Step c) path with None -> [] | Some (S.Step c) -> S.list c 57 + match navigate (S.Step (Irmin_git.tree, c)) path with 58 + | None -> [] 59 + | Some (S.Step (_, c)) -> S.list c
+4 -1
lib/atproto/irmin_atproto.ml
··· 74 74 75 75 let record = 76 76 S.fix (fun self -> 77 - S.node ~dec:record_parse ~enc:record_serialize [ "*" => self ]) 77 + S.node ~name:"application/dag-cbor" ~dec:record_parse 78 + ~enc:record_serialize 79 + ~rules:[ "*" => self ] 80 + ()) 78 81 79 82 (* ===== Heap backend ===== *) 80 83
+4 -1
lib/cbor/irmin_cbor.ml
··· 65 65 let ( => ) = S.( => ) 66 66 67 67 let schema : S.children S.t = 68 - S.fix (fun self -> S.node ~dec:parse ~enc:serialize [ "*" => self ]) 68 + S.fix (fun self -> 69 + S.node ~name:"application/cbor" ~dec:parse ~enc:serialize 70 + ~rules:[ "*" => self ] 71 + ())
+8 -2
lib/git/irmin_git.ml
··· 79 79 | S.Indexed _ -> "" 80 80 81 81 let ( => ) = S.( => ) 82 - let directory rules = S.node ~dec:tree_parse ~enc:tree_serialize rules 83 - let entry rules = S.node ~dec:entry_parse ~enc:entry_serialize rules 82 + 83 + let directory rules = 84 + S.node ~name:"application/x-git-tree" ~dec:tree_parse ~enc:tree_serialize 85 + ~rules () 86 + 87 + let entry rules = 88 + S.node ~name:"application/x-git-entry" ~dec:entry_parse ~enc:entry_serialize 89 + ~rules () 84 90 85 91 (* ===== Schemas ===== *) 86 92
+4 -1
lib/json/irmin_json.ml
··· 62 62 let ( => ) = S.( => ) 63 63 64 64 let schema : S.children S.t = 65 - S.fix (fun self -> S.node ~dec:parse ~enc:serialize [ "*" => self ]) 65 + S.fix (fun self -> 66 + S.node ~name:"application/json" ~dec:parse ~enc:serialize 67 + ~rules:[ "*" => self ] 68 + ())
+5 -1
lib/oci/irmin_oci.ml
··· 63 63 let ( => ) = S.( => ) 64 64 65 65 let schema : S.children S.t = 66 - S.fix (fun self -> S.node ~dec:parse ~enc:serialize [ "*" => self ]) 66 + S.fix (fun self -> 67 + S.node ~name:"application/vnd.oci.image.manifest.v1+json" ~dec:parse 68 + ~enc:serialize 69 + ~rules:[ "*" => self ] 70 + ())
+262 -234
lib/schema.ml
··· 17 17 H.block -> 18 18 (H.block, [ `Conflict of string ]) result 19 19 20 - type packed = Pack : _ t -> packed 20 + (* ===== Codec ===== 21 + 22 + A codec carries: 23 + - A MIME-style [name] (e.g. "application/json") 24 + - A [Type.Id.t] witness for type-safe recovery 25 + - A [decode]/[encode] pair from blocks to its value type 26 + - A [shape]: Leaf (opaque) or Tree (navigable, with children + rules) 27 + - An optional [merge] for 3-way merge 28 + *) 29 + 30 + type 'a info = { 31 + name : string; 32 + id : 'a Type.Id.t; 33 + decode : H.block -> 'a; 34 + encode : 'a -> H.block; 35 + shape : shape; 36 + merge : merge option; 37 + } 38 + 39 + and shape = Leaf | Tree of tree_ops 40 + and tree_ops = { dec_children : dec; enc_children : enc; rules : rule list } 41 + and 'a t = T of 'a info | Rec of (unit -> 'a t) 21 42 and rule = Rule : string * _ t -> rule 22 43 23 - and _ t = 24 - | Opaque : H.block t 25 - | Node : { 26 - dec : H.block -> children; 27 - enc : children -> H.block; 28 - merge : merge option; 29 - rules : rule list; 44 + type ('a, 'b) field = { fpath : string; fcodec : 'b t } 45 + 46 + let rec resolve : type a. a t -> a info = function 47 + | T i -> i 48 + | Rec f -> resolve (f ()) 49 + 50 + let node ~name ~dec ~enc ?merge ?(rules = []) () = 51 + T 52 + { 53 + name; 54 + id = Type.Id.make (); 55 + decode = dec; 56 + encode = enc; 57 + shape = Tree { dec_children = dec; enc_children = enc; rules }; 58 + merge; 30 59 } 31 - -> children t 32 - | Rec : (unit -> 'a t) -> 'a t 33 - | Map : { schema : 'b t; dec : 'b -> 'a; enc : 'a -> 'b } -> 'a t 34 60 35 - let opaque = Opaque 36 - let ( => ) pat schema = Rule (pat, schema) 37 - let node ~dec ~enc ?merge rules = Node { dec; enc; merge; rules } 61 + let opaque : H.block t = 62 + T 63 + { 64 + name = "opaque"; 65 + id = Type.Id.make (); 66 + decode = Fun.id; 67 + encode = Fun.id; 68 + shape = Leaf; 69 + merge = None; 70 + } 71 + 72 + let map ~dec ~enc inner = 73 + let i = resolve inner in 74 + T 75 + { 76 + name = i.name; 77 + id = Type.Id.make (); 78 + decode = (fun b -> dec (i.decode b)); 79 + encode = (fun a -> i.encode (enc a)); 80 + shape = i.shape; 81 + merge = i.merge; 82 + } 38 83 39 84 let fix f = 40 85 let r = ref None in ··· 42 87 r := Some t; 43 88 t 44 89 45 - let map ~dec ~enc schema = Map { schema; dec; enc } 90 + let ( => ) pat schema = Rule (pat, schema) 91 + let field fpath fcodec = { fpath; fcodec } 92 + let name t = (resolve t).name 93 + let id t = (resolve t).id 94 + 95 + (* ===== Internal helpers ===== *) 46 96 47 - (* Pattern matching *) 48 97 let pattern_match pat name = 49 98 pat = "*" || pat = name 50 99 || Stdlib.String.length pat > 1 ··· 52 101 && Filename.check_suffix name 53 102 (Stdlib.String.sub pat 1 (Stdlib.String.length pat - 1)) 54 103 55 - let find_rule rules name = 104 + type any_codec = Any : _ t -> any_codec 105 + 106 + let find_rule rules name : any_codec option = 56 107 let rec loop = function 57 108 | [] -> None 58 109 | Rule (pat, schema) :: rest -> 59 - if pattern_match pat name then Some (Pack schema) else loop rest 110 + if pattern_match pat name then Some (Any schema) else loop rest 60 111 in 61 112 loop rules 62 113 63 - (* GADT walkers *) 64 - 65 - type node_ops = { 66 - nav : H.block -> children; 67 - of_nav : children -> H.block; 68 - merge : merge option; 69 - rules : rule list; 70 - } 71 - 72 - let rec get_node_ops : type a. a t -> node_ops option = function 73 - | Opaque -> None 74 - | Node { dec; enc; merge; rules } -> 75 - Some { nav = dec; of_nav = enc; merge; rules } 76 - | Rec f -> get_node_ops (f ()) 77 - | Map { schema; _ } -> get_node_ops schema 114 + let kind_of_child = function `Link _ -> `Node | `Inline _ -> `Leaf 78 115 79 - let get_node_packed (Pack s) = get_node_ops s 80 - 81 - let rec decode : type a. a t -> H.block -> a = function 82 - | Opaque -> Fun.id 83 - | Node { dec; _ } -> dec 84 - | Rec f -> decode (f ()) 85 - | Map { schema; dec; _ } -> fun block -> dec (decode schema block) 86 - 87 - let rec encode : type a. a t -> a -> H.block = function 88 - | Opaque -> Fun.id 89 - | Node { enc; _ } -> enc 90 - | Rec f -> encode (f ()) 91 - | Map { schema; enc; _ } -> fun a -> encode schema (enc a) 92 - 93 - (* ===== Children cache ===== *) 116 + (* ===== Cursor ===== *) 94 117 95 118 type overlay = (string * [ `Set of H.block | `Remove ]) list 96 119 ··· 116 139 mutable cached_children : cache option; 117 140 } 118 141 119 - type step = Step : 'a cursor -> step 142 + type step_result = Step : 'a t * 'a cursor -> step_result 120 143 121 144 let at (heap : (H.hash, H.block, _) Heap.t) schema (h : H.hash) = 122 145 { ··· 129 152 } 130 153 131 154 let empty (heap : (H.hash, H.block, _) Heap.t) schema = 155 + let i = resolve schema in 132 156 let empty_block = 133 - match get_node_ops schema with 134 - | Some ops -> ops.of_nav (Named []) 135 - | None -> failwith "Schema.empty: schema has no enc" 157 + match i.shape with 158 + | Tree ops -> ops.enc_children (Named []) 159 + | Leaf -> failwith "Schema.empty: leaf codec has no empty block" 136 160 in 137 161 { 138 162 heap = Heap.find heap; ··· 146 170 let get_data c = 147 171 match c.source with `Block h -> c.heap h | `Inline v -> Some v 148 172 149 - let kind_of_child = function `Link _ -> `Node | `Inline _ -> `Leaf 150 - 151 - let cache (c : children) : cache = 173 + let cache_of_children (c : children) : cache = 152 174 match c with 153 175 | Indexed arr -> By_index arr 154 176 | Named l -> ··· 197 219 let base = List.filter (fun (n, _) -> not (Hashtbl.mem removed n)) base in 198 220 Named (base @ List.rev !added) 199 221 200 - let effective_children (type a) (c : a cursor) = 222 + let effective_children (type a) (c : a cursor) : cache = 201 223 match c.cached_children with 202 224 | Some m -> m 203 225 | None -> 226 + let i = resolve c.schema in 204 227 let kids = 205 - match get_data c with 206 - | None -> 228 + match (i.shape, get_data c) with 229 + | Leaf, _ -> Named [] 230 + | Tree _, None -> 207 231 Named 208 232 (List.filter_map 209 233 (fun (name, op) -> ··· 211 235 | `Set block -> Some (name, (`Inline block : child)) 212 236 | `Remove -> None) 213 237 c.overlay) 214 - | Some data -> ( 215 - match get_node_ops c.schema with 216 - | None -> Named [] 217 - | Some ops -> ( 218 - let base = ops.nav data in 219 - if c.overlay = [] then base 220 - else 221 - match base with 222 - | Named l -> apply_overlay_named l c.overlay 223 - | Indexed arr -> 224 - let l = 225 - Array.to_list 226 - (Array.mapi (fun i v -> (string_of_int i, v)) arr) 227 - in 228 - apply_overlay_named l c.overlay)) 238 + | Tree ops, Some data -> 239 + let base = ops.dec_children data in 240 + if c.overlay = [] then base 241 + else 242 + let base_named = 243 + match base with 244 + | Named l -> l 245 + | Indexed arr -> 246 + Array.to_list 247 + (Array.mapi (fun i v -> (string_of_int i, v)) arr) 248 + in 249 + apply_overlay_named base_named c.overlay 229 250 in 230 - let m = cache kids in 251 + let m = cache_of_children kids in 231 252 c.cached_children <- Some m; 232 253 m 233 254 234 - let step (type a) (c : a cursor) name = 235 - let m = effective_children c in 236 - match cache_find name m with 237 - | None -> None 238 - | Some child_source -> ( 239 - match get_node_ops c.schema with 255 + (* ===== Navigation ===== *) 256 + 257 + let step_any (type a) (c : a cursor) name : step_result option = 258 + let i = resolve c.schema in 259 + match i.shape with 260 + | Leaf -> None 261 + | Tree ops -> ( 262 + let m = effective_children c in 263 + match cache_find name m with 240 264 | None -> None 241 - | Some ops -> ( 265 + | Some child_source -> ( 242 266 match find_rule ops.rules name with 243 267 | None -> None 244 - | Some (Pack child_schema) -> 268 + | Some (Any child_schema) -> 245 269 let frame = 246 270 Frame 247 271 { ··· 258 282 in 259 283 Some 260 284 (Step 261 - { 262 - heap = c.heap; 263 - schema = child_schema; 264 - source; 265 - overlay = []; 266 - parents = frame :: c.parents; 267 - cached_children = None; 268 - }))) 285 + ( child_schema, 286 + { 287 + heap = c.heap; 288 + schema = child_schema; 289 + source; 290 + overlay = []; 291 + parents = frame :: c.parents; 292 + cached_children = None; 293 + } )))) 294 + 295 + let step (type a b) (c : a cursor) (f : (a, b) field) : b cursor option = 296 + match step_any c f.fpath with 297 + | None -> None 298 + | Some (Step (got, c')) -> ( 299 + match Type.Id.provably_equal (id f.fcodec) (id got) with 300 + | Some Type.Equal -> Some c' 301 + | None -> None) 269 302 270 - let up (type a) (c : a cursor) = 303 + let up (type a) (c : a cursor) : step_result option = 271 304 match c.parents with 272 305 | [] -> None 273 306 | Frame f :: rest -> 274 307 Some 275 308 (Step 276 - { 277 - heap = c.heap; 278 - schema = f.schema; 279 - source = f.source; 280 - overlay = f.overlay; 281 - parents = rest; 282 - cached_children = None; 283 - }) 309 + ( f.schema, 310 + { 311 + heap = c.heap; 312 + schema = f.schema; 313 + source = f.source; 314 + overlay = f.overlay; 315 + parents = rest; 316 + cached_children = None; 317 + } )) 318 + 319 + let cast (type a) (codec : a t) (Step (got, c)) : a cursor option = 320 + match Type.Id.provably_equal (id codec) (id got) with 321 + | Some Type.Equal -> Some c 322 + | None -> None 323 + 324 + (* ===== Read ===== *) 284 325 285 326 let get (type a) (c : a cursor) : a option = 286 - Option.map (decode c.schema) (get_data c) 327 + Option.map (resolve c.schema).decode (get_data c) 287 328 288 329 let get_block c = get_data c 289 - 290 - let put (type a) (c : a cursor) (v : a) : a cursor = 291 - let block = encode c.schema v in 292 - { c with source = `Inline block; overlay = []; cached_children = None } 293 - 294 330 let list c = cache_list (effective_children c) 295 331 let path c = List.rev_map (fun (Frame f) -> f.step_name) c.parents 296 332 let hash c = match c.source with `Block h -> Some h | `Inline _ -> None ··· 299 335 fun c -> function 300 336 | [] -> get_data c 301 337 | name :: rest -> ( 302 - match step c name with 338 + match step_any c name with 303 339 | None -> None 304 - | Some (Step child) -> find child rest) 340 + | Some (Step (_, child)) -> find child rest) 305 341 306 342 let mem c p = Option.is_some (find c p) 307 343 308 - (* ===== Mutation ===== *) 344 + (* ===== Write ===== *) 345 + 346 + let put (type a) (c : a cursor) (v : a) : a cursor = 347 + let block = (resolve c.schema).encode v in 348 + { c with source = `Inline block; overlay = []; cached_children = None } 309 349 310 350 let set c name block = 311 351 { c with overlay = (name, `Set block) :: c.overlay; cached_children = None } ··· 314 354 { c with overlay = (name, `Remove) :: c.overlay; cached_children = None } 315 355 316 356 let flush (type a) (c : a cursor) (heap : (H.hash, H.block, _) Heap.t) = 317 - let kids = cache_to_children (effective_children c) in 318 - match get_node_ops c.schema with 319 - | None -> ( 357 + let i = resolve c.schema in 358 + match i.shape with 359 + | Leaf -> ( 320 360 match c.source with `Block h -> h | `Inline data -> H.hash_block data) 321 - | Some ops -> 322 - let block = ops.of_nav kids in 361 + | Tree ops -> 362 + let kids = cache_to_children (effective_children c) in 363 + let block = ops.enc_children kids in 323 364 let h = H.hash_block block in 324 365 Heap.put heap h block; 325 366 h ··· 343 384 344 385 (* ===== Commit ===== *) 345 386 346 - let commit_node ~dec ~enc ~tree = 347 - Node 387 + let commit_node ~name ~dec ~enc ~tree = 388 + T 348 389 { 349 - dec; 350 - enc; 390 + name; 391 + id = Type.Id.make (); 392 + decode = dec; 393 + encode = enc; 394 + shape = 395 + Tree 396 + { 397 + dec_children = dec; 398 + enc_children = enc; 399 + rules = 400 + [ 401 + Rule ("tree", tree); 402 + Rule ("parent/*", Rec (fun () -> opaque)); 403 + Rule ("*", opaque); 404 + ]; 405 + }; 351 406 merge = None; 352 - rules = 353 - [ 354 - Rule ("tree", tree); 355 - Rule ("parent/*", Rec (fun () -> Opaque)); 356 - Rule ("*", Opaque); 357 - ]; 358 407 } 359 408 360 409 (* ===== Merge ===== *) ··· 369 418 370 419 type resolution = { path : string list; value : H.block } 371 420 372 - (* Merge works internally on packed schemas at the block level *) 373 421 let merge (type a) (heap : (H.hash, H.block, _) Heap.t) (schema : a t) 374 422 ~(ancestor : a cursor) ~(ours : a cursor) ~(theirs : a cursor) = 375 423 let conflicts = ref [] in 376 424 let add_conflict ~path ~ancestor ~ours ~theirs message = 377 425 conflicts := { path; ancestor; ours; theirs; message } :: !conflicts 378 426 in 379 - let rec merge_hash ~path (Pack schema) a_hash o_hash t_hash = 427 + let rec merge_hash : type b. path:string list -> b t -> _ = 428 + fun ~path schema a_hash o_hash t_hash -> 380 429 if H.hash_equal o_hash t_hash then o_hash 381 430 else if H.hash_equal a_hash o_hash then t_hash 382 431 else if H.hash_equal a_hash t_hash then o_hash ··· 385 434 (Heap.find heap a_hash, Heap.find heap o_hash, Heap.find heap t_hash) 386 435 with 387 436 | Some a_block, Some o_block, Some t_block -> 388 - merge_block ~path (Pack schema) a_block o_block t_block 437 + merge_block ~path schema a_block o_block t_block 389 438 | _ -> 390 439 add_conflict ~path ~ancestor:None ~ours:None ~theirs:None 391 440 "missing block"; 392 441 o_hash 393 - and merge_block ~path packed a_block o_block t_block = 394 - match get_node_packed packed with 395 - | None -> 396 - add_conflict ~path ~ancestor:(Some a_block) ~ours:(Some o_block) 397 - ~theirs:(Some t_block) "leaf conflict, no merge function"; 398 - let h = H.hash_block o_block in 399 - Heap.put heap h o_block; 400 - h 401 - | Some ops -> ( 402 - match ops.merge with 442 + and merge_block : type b. path:string list -> b t -> _ = 443 + fun ~path schema a_block o_block t_block -> 444 + let i = resolve schema in 445 + match i.shape with 446 + | Leaf -> ( 447 + match i.merge with 448 + | Some merge_fn -> ( 449 + match merge_fn ~ancestor:a_block o_block t_block with 450 + | Ok merged -> 451 + let h = H.hash_block merged in 452 + Heap.put heap h merged; 453 + h 454 + | Error (`Conflict msg) -> 455 + add_conflict ~path ~ancestor:(Some a_block) 456 + ~ours:(Some o_block) ~theirs:(Some t_block) msg; 457 + let h = H.hash_block o_block in 458 + Heap.put heap h o_block; 459 + h) 460 + | None -> 461 + add_conflict ~path ~ancestor:(Some a_block) ~ours:(Some o_block) 462 + ~theirs:(Some t_block) "leaf conflict, no merge function"; 463 + let h = H.hash_block o_block in 464 + Heap.put heap h o_block; 465 + h) 466 + | Tree ops -> ( 467 + match i.merge with 403 468 | Some merge_fn -> ( 404 469 match merge_fn ~ancestor:a_block o_block t_block with 405 470 | Ok merged -> ··· 413 478 Heap.put heap h o_block; 414 479 h) 415 480 | None -> 416 - let a_kids = ops.nav a_block in 417 - let o_kids = ops.nav o_block in 418 - let t_kids = ops.nav t_block in 481 + let a_kids = ops.dec_children a_block in 482 + let o_kids = ops.dec_children o_block in 483 + let t_kids = ops.dec_children t_block in 419 484 let is_empty = function 420 485 | Named [] -> true 421 486 | Indexed [||] -> true ··· 427 492 let h = H.hash_block o_block in 428 493 Heap.put heap h o_block; 429 494 h) 430 - else merge_children ~path ops packed a_kids o_kids t_kids) 431 - and merge_children ~path ops packed a_kids o_kids t_kids = 495 + else merge_children ~path ops a_kids o_kids t_kids) 496 + and merge_children : path:string list -> tree_ops -> _ = 497 + fun ~path ops a_kids o_kids t_kids -> 432 498 let names_of = function 433 499 | Named l -> List.map fst l 434 500 | Indexed arr -> List.init (Array.length arr) string_of_int ··· 449 515 |> List.sort String.compare 450 516 in 451 517 let child_schema name = 452 - match find_rule ops.rules name with Some p -> p | None -> Pack Opaque 518 + match find_rule ops.rules name with 519 + | Some (Any s) -> Any s 520 + | None -> Any opaque 453 521 in 454 522 let rec merge_names acc = function 455 523 | [] -> 456 524 let merged = Named (List.rev acc) in 457 - let block = ops.of_nav merged in 525 + let block = ops.enc_children merged in 458 526 let h = H.hash_block block in 459 527 Heap.put heap h block; 460 528 h ··· 462 530 let a = find_child name a_kids in 463 531 let o = find_child name o_kids in 464 532 let t = find_child name t_kids in 465 - let child_p = child_schema name in 533 + let (Any child_sch) = child_schema name in 466 534 let child_path = path @ [ name ] in 467 - let result = merge_child ~path:child_path child_p a o t in 535 + let result = merge_child ~path:child_path child_sch a o t in 468 536 match result with 469 537 | None -> merge_names acc rest 470 538 | Some child -> merge_names ((name, child) :: acc) rest) 471 - and merge_child ~path packed a o t = 539 + and merge_child : type b. path:string list -> b t -> _ = 540 + fun ~path schema a o t -> 472 541 match (a, o, t) with 473 542 | _, Some oc, Some tc when oc = tc -> Some oc 474 543 | Some ac, Some oc, _ when ac = oc -> t ··· 481 550 | `Link oh, `Link th -> ( 482 551 match (Heap.find heap oh, Heap.find heap th) with 483 552 | Some ob, Some tb -> 553 + let i = resolve schema in 484 554 let empty_block = 485 - match get_node_packed packed with 486 - | Some ops -> ops.of_nav (Named []) 487 - | None -> ob 555 + match i.shape with 556 + | Tree ops -> ops.enc_children (Named []) 557 + | Leaf -> ob 488 558 in 489 - let h = merge_block ~path packed empty_block ob tb in 559 + let h = merge_block ~path schema empty_block ob tb in 490 560 Some (`Link h) 491 561 | _ -> 492 562 add_conflict ~path ~ancestor:None ~ours:(Heap.find heap oh) ··· 525 595 "delete/modify conflict"; 526 596 Some tc 527 597 | Some (`Link ah), Some (`Link oh), Some (`Link th) -> 528 - let h = merge_hash ~path packed ah oh th in 598 + let h = merge_hash ~path schema ah oh th in 529 599 Some (`Link h) 530 600 | Some (`Inline ab), Some (`Inline ob), Some (`Inline tb) -> 531 - let h = merge_block ~path packed ab ob tb in 601 + let h = merge_block ~path schema ab ob tb in 532 602 Some (`Link h) 533 603 | Some _, Some oc, Some _ -> 534 604 add_conflict ~path ~ancestor:None ~ours:None ~theirs:None ··· 537 607 in 538 608 merge_names [] all_names 539 609 in 540 - let get_hash (type b) (c : b cursor) = 541 - match c.source with `Block h -> h | `Inline d -> H.hash_block d 610 + let get_hash : type b. b cursor -> H.hash = 611 + fun c -> match c.source with `Block h -> h | `Inline d -> H.hash_block d 542 612 in 543 613 let merged_hash = 544 - merge_hash ~path:[] (Pack schema) (get_hash ancestor) (get_hash ours) 614 + merge_hash ~path:[] schema (get_hash ancestor) (get_hash ours) 545 615 (get_hash theirs) 546 616 in 547 617 (at heap schema merged_hash, List.rev !conflicts) 548 618 549 - let resolve (type a) (heap : (H.hash, H.block, _) Heap.t) (schema : a t) 550 - (cursor : a cursor) resolutions = 551 - (* Apply resolutions, flush, re-at with original schema *) 552 - let step_s (Step c) name = 553 - match step c name with None -> None | Some s -> Some s 554 - in 555 - let up_s (Step c) = match up c with None -> None | Some s -> Some s in 556 - let s = ref (Step cursor) in 619 + let resolve_conflicts (type a) (heap : (H.hash, H.block, _) Heap.t) 620 + (schema : a t) (cursor : a cursor) (resolutions : resolution list) : 621 + a cursor = 622 + let s = ref (Step (schema, cursor)) in 557 623 List.iter 558 624 (fun (r : resolution) -> 559 625 match r.path with ··· 563 629 let name = List.nth r.path (List.length r.path - 1) in 564 630 let nav = 565 631 List.fold_left 566 - (fun acc n -> 567 - match acc with Some s -> step_s s n | None -> None) 632 + (fun acc step_name -> 633 + match acc with 634 + | Some (Step (_, c)) -> step_any c step_name 635 + | None -> None) 568 636 (Some !s) parent_path 569 637 in 570 638 match nav with 571 - | Some (Step parent) -> 572 - let updated = Step (set parent name r.value) in 639 + | Some (Step (sc, parent)) -> 640 + let updated = set parent name r.value in 573 641 let rec go_up s = 574 - match up_s s with Some s' -> go_up s' | None -> s 642 + let (Step (_, c)) = s in 643 + match up c with Some s' -> go_up s' | None -> s 575 644 in 576 - s := go_up updated 645 + s := go_up (Step (sc, updated)) 577 646 | None -> ())) 578 647 resolutions; 579 - (* Flush and reconstruct at the original schema type *) 580 - let (Step result) = !s in 648 + let (Step (_, result)) = !s in 581 649 let h = flush result heap in 582 650 at heap schema h 583 651 584 - (* ===== Merge combinators ===== *) 585 - 652 + let resolve = resolve_conflicts 586 653 let merge_lww : merge = fun ~ancestor:_ _ours theirs -> Ok theirs 587 654 let merge_ours : merge = fun ~ancestor:_ ours _theirs -> Ok ours 588 655 ··· 600 667 | Some h -> Some h 601 668 | None -> Option.map H.hash_block (get_data c) 602 669 in 603 - let same (type b c) (c1 : b cursor) (c2 : c cursor) = 670 + let same_step (Step (_, c1)) (Step (_, c2)) = 604 671 match (hash_of c1, hash_of c2) with 605 672 | Some h1, Some h2 -> H.hash_equal h1 h2 606 673 | _ -> false 607 674 in 608 - let rec go : type b. path:string list -> b cursor -> b cursor -> _ = 609 - fun ~path a b acc -> 610 - if same a b then acc 611 - else 612 - let a_kids = list a in 613 - let b_kids = list b in 614 - match (a_kids, b_kids) with 615 - | [], [] -> ( 616 - match (get_data a, get_data b) with 617 - | Some va, Some vb -> 618 - { diff_path = path; kind = `Change (va, vb) } :: acc 619 - | None, Some vb -> { diff_path = path; kind = `Add vb } :: acc 620 - | Some va, None -> { diff_path = path; kind = `Remove va } :: acc 621 - | None, None -> acc) 622 - | _ -> 623 - let all = 624 - List.sort_uniq String.compare 625 - (List.map fst a_kids @ List.map fst b_kids) 626 - in 627 - List.fold_left 628 - (fun acc name -> 629 - match (step a name, step b name) with 630 - | None, None -> acc 631 - | None, Some (Step bc) -> 632 - collect `Add ~path:(path @ [ name ]) (Step bc) acc 633 - | Some (Step ac), None -> 634 - collect `Remove ~path:(path @ [ name ]) (Step ac) acc 635 - | Some (Step ac), Some (Step bc) -> 636 - go_any ~path:(path @ [ name ]) (Step ac) (Step bc) acc) 637 - acc all 638 - and go_any ~path (Step a) (Step b) acc = 639 - (* Both are at the same structural position but possibly different types. 640 - Compare at block level. *) 641 - let hash_of (type b) (c : b cursor) = 642 - match hash c with 643 - | Some h -> Some h 644 - | None -> Option.map H.hash_block (get_data c) 645 - in 646 - let same = 647 - match (hash_of a, hash_of b) with 648 - | Some h1, Some h2 -> H.hash_equal h1 h2 649 - | _ -> false 650 - in 651 - if same then acc 675 + let rec go ~path s1 s2 acc = 676 + if same_step s1 s2 then acc 652 677 else 653 - let a_kids = list a and b_kids = list b in 678 + let (Step (_, ca)) = s1 in 679 + let (Step (_, cb)) = s2 in 680 + let a_kids = list ca in 681 + let b_kids = list cb in 654 682 match (a_kids, b_kids) with 655 683 | [], [] -> ( 656 - match (get_data a, get_data b) with 684 + match (get_data ca, get_data cb) with 657 685 | Some va, Some vb -> 658 686 { diff_path = path; kind = `Change (va, vb) } :: acc 659 687 | None, Some vb -> { diff_path = path; kind = `Add vb } :: acc ··· 666 694 in 667 695 List.fold_left 668 696 (fun acc name -> 669 - match (step a name, step b name) with 697 + match (step_any ca name, step_any cb name) with 670 698 | None, None -> acc 671 - | None, Some (Step bc) -> 672 - collect `Add ~path:(path @ [ name ]) (Step bc) acc 673 - | Some (Step ac), None -> 674 - collect `Remove ~path:(path @ [ name ]) (Step ac) acc 675 - | Some (Step ac), Some (Step bc) -> 676 - go_any ~path:(path @ [ name ]) (Step ac) (Step bc) acc) 699 + | None, Some sb -> collect `Add ~path:(path @ [ name ]) sb acc 700 + | Some sa, None -> 701 + collect `Remove ~path:(path @ [ name ]) sa acc 702 + | Some sa, Some sb -> go ~path:(path @ [ name ]) sa sb acc) 677 703 acc all 678 - and collect dir ~path (Step c) acc = 704 + and collect dir ~path (Step (_, c)) acc = 679 705 let kids = list c in 680 706 if kids = [] then 681 707 match get_data c with ··· 686 712 else 687 713 List.fold_left 688 714 (fun acc (name, _) -> 689 - match step c name with 715 + match step_any c name with 690 716 | Some s -> collect dir ~path:(path @ [ name ]) s acc 691 717 | None -> acc) 692 718 acc kids 693 719 in 694 - List.rev (go ~path:[] a b []) 720 + let s_a = Step (a.schema, a) in 721 + let s_b = Step (b.schema, b) in 722 + List.rev (go ~path:[] s_a s_b []) 695 723 696 724 let ddiff ~old_base ~old_tip ~new_base ~new_tip = 697 725 let d1 = diff old_base old_tip in ··· 714 742 } 715 743 716 744 let produce (type a) (heap : (H.hash, H.block, _) Heap.t) (schema : a t) 717 - (root : H.hash) (f : a cursor -> step * 'b) : proof * 'b = 745 + (root : H.hash) (f : a cursor -> step_result * 'b) : proof * 'b = 718 746 let recording_heap, get_recorded = Heap.recording heap in 719 747 let c = at recording_heap schema root in 720 - let Step result_c, result = f c in 748 + let Step (_, result_c), result = f c in 721 749 let after = 722 750 match result_c.source with 723 751 | `Block h -> h ··· 726 754 let heap = Heap.of_list ~equal:H.hash_equal (get_recorded ()) in 727 755 ({ before = root; after; heap }, result) 728 756 729 - let verify (type a) proof (schema : a t) (f : a cursor -> step * 'b) : 757 + let verify (type a) proof (schema : a t) (f : a cursor -> step_result * 'b) : 730 758 ('b, [ `Proof_failure of string ]) result = 731 759 let c = at proof.heap schema proof.before in 732 - let Step result_c, result = f c in 760 + let Step (_, result_c), result = f c in 733 761 let computed = 734 762 match result_c.source with 735 763 | `Block h -> h
+160 -68
lib/schema.mli
··· 1 - (** Content-addressed DAG navigation with typed cursors. 1 + (** Content-addressed DAG navigation with typed cursors and codecs. 2 2 3 - A schema describes the structure of a content-addressed DAG and how to 4 - decode each block into a typed value. The cursor navigates lazily, reads 5 - return typed values, and writes are delayed until {!flush}. 3 + A {b codec} ({!t}) is a typed view of a content-addressed block. Each codec 4 + carries a MIME-style {!val-name} and a runtime type witness ({!val-id}) 5 + based on {!Stdlib.Type.Id.t}. The schema author defines codecs and their 6 + nested {!rule}s; cursors then navigate the DAG via path patterns 7 + ({!step_any}) or typed {!field}s ({!val-step}). 6 8 7 9 {[ 8 - let directory = node ~dec:git_tree_dec ~enc:git_tree_enc 9 - let json_node = node ~dec:json_dec ~enc:json_enc 10 + let json = node ~name:"application/json" ~dec ~enc () 10 11 11 - let json = fix (fun self -> json_node [ "*" => self ]) 12 - let git_entry target = 13 - entry [ "mode" => opaque; "target" => target ] 14 - let git_tree = fix (fun self -> 15 - directory [ "*.json" => git_entry json; "*" => git_entry self ]) 12 + let directory = 13 + fix (fun self -> 14 + node ~name:"application/x-tree" ~dec ~enc 15 + ~rules:[ "*.json" => json; "*" => self ] ()) 16 16 17 - let c = at heap git_tree root in 18 - match step c "config.json" with 19 - | Some (Step entry) -> 20 - match get entry with Some children -> ... | None -> ... 21 - | None -> ... 17 + let c = at heap directory root in 18 + match step c (field "config.json" json) with 19 + | Some json_c -> get json_c (* : json_value option *) 20 + | None -> ... (* missing path or wrong codec *) 22 21 ]} *) 23 22 24 23 module Make (H : sig ··· 28 27 val hash_equal : hash -> hash -> bool 29 28 val hash_block : block -> hash 30 29 end) : sig 31 - (** {1:schema Schema} *) 30 + (** {1:children Children} 31 + 32 + A block's structural view: a list of named or indexed entries, each either 33 + an inline blob or a link to another block. *) 32 34 33 35 type child = [ `Link of H.hash | `Inline of H.block ] 34 - (** A child is either a link to another block or an inline value. *) 36 + (** A child entry: a hash link to another block, or an inline value. *) 35 37 36 38 type children = 37 39 | Named of (string * child) list ··· 50 52 H.block -> 51 53 H.block -> 52 54 (H.block, [ `Conflict of string ]) result 53 - (** Block-level 3-way merge. *) 55 + (** Block-level 3-way merge. Used at leaves where a value-aware merge strategy 56 + applies (counters, sets, text, etc.). *) 57 + 58 + (** {1:codec Codec} 59 + 60 + A codec describes a typed block at a single level of the DAG. It carries a 61 + MIME-style name, a {!Type.Id.t} witness, encoder/decoder, optional 62 + {!merge}, and any {!rule}s for sub-paths. *) 54 63 55 64 type 'a t 56 - (** A schema node that decodes blocks into values of type ['a]. *) 65 + (** A codec for blocks whose value type is ['a]. *) 57 66 58 67 type rule 59 - (** A pattern-schema pair. See {!(=>)}. *) 68 + (** A pattern–codec pair. See {!(=>)}. *) 60 69 61 70 val ( => ) : string -> _ t -> rule 62 - (** [pattern => schema] matches children by name. ["*"] matches anything, 71 + (** [pattern => codec] matches children by name. ["*"] matches anything, 63 72 ["*.json"] matches by suffix, exact names match exactly. First matching 64 73 rule wins. *) 65 74 66 75 val node : 67 - dec:(H.block -> children) -> 68 - enc:(children -> H.block) -> 76 + name:string -> 77 + dec:dec -> 78 + enc:enc -> 69 79 ?merge:merge -> 70 - rule list -> 80 + ?rules:rule list -> 81 + unit -> 71 82 children t 72 - (** [node ~dec ~enc ?merge rules] is a schema for blocks with named children. 73 - The value type is {!children}. Use {!map} to lift to a domain type. *) 83 + (** [node ~name ~dec ~enc ?merge ?rules ()] is a navigable codec whose value 84 + type is the structural {!children}. The MIME-style [name] (e.g. 85 + ["application/json"]) identifies the format. *) 74 86 75 87 val opaque : H.block t 76 - (** No children. The value is the raw block. *) 88 + (** Leaf codec: value is the raw block, no children, no navigation. [name] is 89 + ["opaque"]. *) 90 + 91 + val map : dec:('b -> 'a) -> enc:('a -> 'b) -> 'b t -> 'a t 92 + (** [map ~dec ~enc inner] wraps [inner] with a new value type ['a]. Navigation 93 + (children extraction, rules) is inherited from [inner]. *) 77 94 78 95 val fix : ('a t -> 'a t) -> 'a t 79 - (** [fix f] is a recursive schema. *) 96 + (** [fix f] is a recursive codec: [f] receives a self-reference. *) 97 + 98 + val name : _ t -> string 99 + (** [name c] is the MIME-style identifier (e.g. ["application/json"]). *) 100 + 101 + val id : 'a t -> 'a Type.Id.t 102 + (** [id c] is the runtime type witness for [c]'s value type. *) 103 + 104 + (** {1:fields Typed fields} 105 + 106 + A {!field} pairs a path string with the codec expected at that path. 107 + {!val-step} uses a field for type-safe navigation: if the schema's rule at 108 + that path uses a different codec, [step] returns [None] rather than 109 + yielding a wrongly-typed cursor. *) 80 110 81 - val map : dec:('b -> 'a) -> enc:('a -> 'b) -> 'b t -> 'a t 82 - (** [map ~dec ~enc schema] transforms the value type. Navigation still uses 83 - the underlying structural schema; [dec] and [enc] wrap {!get}/{!put}. *) 111 + type ('a, 'b) field 112 + (** [(\'a, \'b) field] navigates from an ['a]-typed parent cursor to a 113 + ['b]-typed child cursor. *) 114 + 115 + val field : string -> 'b t -> ('a, 'b) field 116 + (** [field path codec] is the typed accessor for [path] expecting [codec]. *) 84 117 85 - (** {1:cursor Typed Cursor} 118 + (** {1:cursor Cursor} 86 119 87 120 A cursor is a position in the DAG with a known value type. Reads are lazy: 88 121 blocks are fetched on demand. Writes are delayed until {!flush}. *) 89 122 90 123 type 'a cursor 91 - (** A position in the DAG where values have type ['a]. *) 124 + (** A position in the DAG where the focused block has value type ['a]. *) 92 125 93 - type step = 94 - | Step : 'a cursor -> step 95 - (** Existential wrapper returned by {!val-step} and {!up}, since the 96 - child's type depends on which rule matched. *) 126 + type step_result = 127 + | Step : 'a t * 'a cursor -> step_result 128 + (** Existential wrapper returned by untyped navigation ({!step_any}, 129 + {!up}). The wrapped codec carries the runtime type witness, so 130 + {!cast} or pattern matching against a known codec recovers the 131 + static type. *) 97 132 98 133 (** {2 Create} *) 99 134 100 135 val at : (H.hash, H.block, _) Heap.t -> 'a t -> H.hash -> 'a cursor 101 - (** [at heap schema root] is a cursor at [root]. *) 136 + (** [at heap codec root] is a cursor at [root] in [heap]. *) 102 137 103 138 val empty : (H.hash, H.block, _) Heap.t -> 'a t -> 'a cursor 104 - (** [empty heap schema] is a cursor at an empty node. *) 139 + (** [empty heap codec] is a cursor at an empty node (must be a Tree codec). *) 105 140 106 141 (** {2 Navigate} *) 107 142 108 - val step : _ cursor -> string -> step option 109 - (** [step c name] moves to child [name]. *) 143 + val step : 'a cursor -> ('a, 'b) field -> 'b cursor option 144 + (** [step c field] navigates [field]'s path and recovers a typed cursor. 145 + Returns [None] if the path is missing or the schema's rule uses a 146 + different codec than [field]'s expected codec. *) 147 + 148 + val step_any : _ cursor -> string -> step_result option 149 + (** [step_any c name] navigates by name only. The result is existential — use 150 + {!cast} or pattern matching on the codec to recover the type. *) 110 151 111 - val up : _ cursor -> step option 152 + val up : _ cursor -> step_result option 112 153 (** [up c] moves to the parent. [None] at root. *) 113 154 155 + val cast : 'a t -> step_result -> 'a cursor option 156 + (** [cast codec s] recovers a typed cursor from an untyped step result iff 157 + [s]'s codec id matches [codec]'s id. *) 158 + 114 159 (** {2 Read} *) 115 160 116 161 val get : 'a cursor -> 'a option 117 162 (** [get c] decodes and returns the value at this position. *) 118 163 119 164 val get_block : _ cursor -> H.block option 120 - (** [get_block c] returns the raw block, bypassing the schema's decoder. 121 - Useful when the cursor type is existentially bound (e.g., after {!step}). 122 - *) 165 + (** [get_block c] returns the raw block, bypassing the codec's decoder. Useful 166 + when the cursor type is existentially bound (e.g., after {!step_any}). *) 123 167 124 168 val list : _ cursor -> (string * [ `Node | `Leaf ]) list 125 169 (** [list c] lists children with their kind. *) ··· 139 183 (** {2 Write} *) 140 184 141 185 val put : 'a cursor -> 'a -> 'a cursor 142 - (** [put c v] replaces the value at this position. *) 186 + (** [put c v] replaces the value at this position with the encoded form of 187 + [v]. *) 143 188 144 189 val set : 'a cursor -> string -> H.block -> 'a cursor 145 190 (** [set c name block] sets child [name] to a raw block. *) ··· 151 196 (** [flush c heap] writes pending mutations to [heap] and returns the new root 152 197 hash. *) 153 198 154 - (** {1:refs Refs} *) 199 + (** {1:refs Refs} 200 + 201 + Named mutable pointers into the heap. *) 155 202 156 203 val head : (H.hash, _, _) Heap.t -> branch:string -> H.hash option 204 + (** [head heap ~branch] is the hash at the tip of [branch]. *) 205 + 157 206 val set_head : (H.hash, _, _) Heap.t -> branch:string -> H.hash -> unit 207 + (** [set_head heap ~branch h] advances [branch] to [h]. *) 208 + 158 209 val branches : (_, _, _) Heap.t -> string list 210 + (** [branches heap] lists all branches. *) 159 211 160 212 val update_branch : 161 213 (H.hash, _, _) Heap.t -> ··· 163 215 old:H.hash option -> 164 216 new_:H.hash -> 165 217 bool 218 + (** [update_branch heap ~branch ~old ~new_] is a compare-and-set. *) 166 219 167 - (** {1:commit Commits} *) 220 + (** {1:commit Commits} 168 221 169 - val commit_node : 170 - dec:(H.block -> children) -> 171 - enc:(children -> H.block) -> 172 - tree:_ t -> 173 - children t 222 + A commit is a node linking a tree root to parents with metadata. *) 174 223 175 - (** {1:merge Merge} *) 224 + val commit_node : name:string -> dec:dec -> enc:enc -> tree:_ t -> children t 225 + (** [commit_node ~name ~dec ~enc ~tree] is a codec for commit blocks. 226 + Children: ["tree"] links to [tree], ["parent/0"], ["parent/1"], ... link 227 + to parent commits, ["message"] and ["author"] are inline metadata. *) 228 + 229 + (** {1:merge Merge} 230 + 231 + Two-phase 3-way merge. Phase 1 resolves everything the schema can merge 232 + automatically (structural for trees, typed merge functions for leaves). 233 + Phase 2 surfaces remaining conflicts for external resolution. 234 + 235 + Phase 1 always succeeds: it returns a merged tree plus a conflict list. 236 + For types with total merge functions (counters, sets, LWW), the conflict 237 + list is always empty — eventual consistency for free. *) 176 238 177 239 type conflict = { 178 - path : string list; 179 - ancestor : H.block option; 180 - ours : H.block option; 181 - theirs : H.block option; 182 - message : string; 240 + path : string list; (** Path from root to the conflicting node. *) 241 + ancestor : H.block option; (** Common ancestor value. *) 242 + ours : H.block option; (** Our value. *) 243 + theirs : H.block option; (** Their value. *) 244 + message : string; (** Description of the conflict. *) 183 245 } 184 246 185 - type resolution = { path : string list; value : H.block } 247 + type resolution = { 248 + path : string list; (** Path of the conflict to resolve. *) 249 + value : H.block; (** The resolved value. *) 250 + } 186 251 187 252 val merge : 188 253 (H.hash, H.block, _) Heap.t -> ··· 191 256 ours:'a cursor -> 192 257 theirs:'a cursor -> 193 258 'a cursor * conflict list 259 + (** [merge heap codec ~ancestor ~ours ~theirs] performs a two-phase 3-way 260 + merge. Always returns a cursor at the (partially) merged tree alongside 261 + any unresolved conflicts. *) 194 262 195 263 val resolve : 196 264 (H.hash, H.block, _) Heap.t -> ··· 198 266 'a cursor -> 199 267 resolution list -> 200 268 'a cursor 269 + (** [resolve heap codec cursor resolutions] applies conflict resolutions to 270 + the merged tree. *) 271 + 272 + (** {2 Generic merge combinators} 273 + 274 + These work at the block level (they don't inspect the value). For typed 275 + combinators (counters, sets, text), see {!Irmin.Merge}. *) 201 276 202 277 val merge_lww : merge 278 + (** Last-write-wins: picks [theirs]. Always resolves. *) 279 + 203 280 val merge_ours : merge 281 + (** Always picks [ours]. Always resolves. *) 204 282 205 - (** {1:diff Diff} *) 283 + (** {1:diff Diff} 284 + 285 + Structural diff between two trees, and ddiff for 4-point merge workflows 286 + (à la Jane Street Iron). *) 206 287 207 288 type diff_entry = { 208 289 diff_path : string list; ··· 211 292 } 212 293 213 294 val diff : 'a cursor -> 'a cursor -> diff_entry list 295 + (** [diff a b] returns the structural difference between trees [a] and [b]. *) 214 296 215 297 val ddiff : 216 298 old_base:'a cursor -> ··· 218 300 new_base:'a cursor -> 219 301 new_tip:'a cursor -> 220 302 diff_entry list 303 + (** [ddiff ~old_base ~old_tip ~new_base ~new_tip] computes the diff of two 304 + diffs: what changed between [diff(old_base, old_tip)] and 305 + [diff(new_base, new_tip)]. Iron's 4-point merge for incremental review. *) 306 + 307 + (** {1:proof Proofs} 221 308 222 - (** {1:proof Proofs} *) 309 + A proof is a subheap: a {!Heap.t} containing only the blocks needed to 310 + replay a computation. *) 223 311 224 312 type proof = { 225 - before : H.hash; 226 - after : H.hash; 227 - heap : (H.hash, H.block, unit) Heap.t; 313 + before : H.hash; (** Root hash before the computation. *) 314 + after : H.hash; (** Root hash after the computation. *) 315 + heap : (H.hash, H.block, unit) Heap.t; (** The subheap. *) 228 316 } 229 317 230 318 val produce : 231 319 (H.hash, H.block, _) Heap.t -> 232 320 'a t -> 233 321 H.hash -> 234 - ('a cursor -> step * 'b) -> 322 + ('a cursor -> step_result * 'b) -> 235 323 proof * 'b 324 + (** [produce heap codec root f] runs [f] on [heap] and returns the subheap 325 + that [f] read, together with [f]'s result. *) 236 326 237 327 val verify : 238 328 proof -> 239 329 'a t -> 240 - ('a cursor -> step * 'b) -> 330 + ('a cursor -> step_result * 'b) -> 241 331 ('b, [ `Proof_failure of string ]) result 332 + (** [verify proof codec f] replays [f] on [proof.heap]. Returns [f]'s result 333 + iff the final hash matches [proof.after]. *) 242 334 end
+4 -1
lib/tar/irmin_tar.ml
··· 51 51 let ( => ) = S.( => ) 52 52 53 53 let tree = 54 - S.fix (fun self -> S.node ~dec:dir_parse ~enc:dir_serialize [ "*" => self ]) 54 + S.fix (fun self -> 55 + S.node ~name:"application/x-tar-tree" ~dec:dir_parse ~enc:dir_serialize 56 + ~rules:[ "*" => self ] 57 + ()) 55 58 56 59 (** Build a Merkle tree from a list of (path, content) pairs. Writes all blobs 57 60 and tree nodes to the heap. Returns the root hash. *)
+10 -10
test/bench/bench.ml
··· 33 33 (* Step into each entry and read target *) 34 34 List.iter 35 35 (fun (name, _) -> 36 - match S.step c name with 37 - | Some (S.Step entry) -> ( 38 - match S.step entry "target" with 39 - | Some (S.Step blob) -> ignore (S.get_block blob) 36 + match S.step_any c name with 37 + | Some (S.Step (_, entry)) -> ( 38 + match S.step_any entry "target" with 39 + | Some (S.Step (_, blob)) -> ignore (S.get_block blob) 40 40 | None -> ()) 41 41 | None -> ()) 42 42 kids ··· 47 47 let kids = S.list c in 48 48 assert (List.length kids = n); 49 49 (* Navigate to first entry *) 50 - match S.step c (Fmt.str "key-%04d" 0) with 51 - | Some (S.Step entry) -> ( 52 - match S.step entry "target" with 53 - | Some (S.Step blob) -> (S.Step blob, S.get_block blob) 54 - | None -> (S.Step c, None)) 55 - | None -> (S.Step c, None)) 50 + match S.step_any c (Fmt.str "key-%04d" 0) with 51 + | Some (S.Step (_, entry)) -> ( 52 + match S.step_any entry "target" with 53 + | Some (S.Step (sc, blob)) -> (S.Step (sc, blob), S.get_block blob) 54 + | None -> (S.Step (Irmin_git.tree, c), None)) 55 + | None -> (S.Step (Irmin_git.tree, c), None)) 56 56 in 57 57 () 58 58
+3 -3
test/mst_proof/mst_proof.ml
··· 46 46 47 47 (* Produce a proof for reading a post *) 48 48 let read_post c = 49 - let (S.Step post) = S.step c "post" |> Option.get in 50 - let (S.Step v) = S.step post "3k2yihx" |> Option.get in 51 - (S.Step v, S.get_block v) 49 + let (S.Step (_, post)) = S.step_any c "post" |> Option.get in 50 + let (S.Step (sc, v)) = S.step_any post "3k2yihx" |> Option.get in 51 + (S.Step (sc, v), S.get_block v) 52 52 in 53 53 let proof, value = S.produce heap Irmin_tar.tree root read_post in 54 54
+29 -21
test/schema/test.ml
··· 66 66 67 67 (* Serialize stubs — sufficient for read-only tests *) 68 68 let noop_serialize _ = "" 69 - let directory = S.node ~dec:git_tree_parse ~enc:noop_serialize 70 - let entry = S.node ~dec:git_entry_parse ~enc:noop_serialize 71 - let json_node = S.node ~dec:json_parse ~enc:noop_serialize 69 + 70 + let directory rules = 71 + S.node ~name:"application/x-git-tree" ~dec:git_tree_parse ~enc:noop_serialize 72 + ~rules () 73 + 74 + let entry rules = 75 + S.node ~name:"application/x-git-entry" ~dec:git_entry_parse 76 + ~enc:noop_serialize ~rules () 77 + 78 + let json_node rules = 79 + S.node ~name:"application/json" ~dec:json_parse ~enc:noop_serialize ~rules () 72 80 73 81 (* ===== Schemas ===== *) 74 82 ··· 122 130 Alcotest.(check (list string)) "root" [ "config.json"; "main.ml" ] kids; 123 131 124 132 (* Step into config.json entry *) 125 - let (S.Step c) = S.step c "config.json" |> Option.get in 133 + let (S.Step (_, c)) = S.step_any c "config.json" |> Option.get in 126 134 let entry_kids = S.list c |> List.map fst |> List.sort String.compare in 127 135 Alcotest.(check (list string)) 128 136 "entry fields" [ "mode"; "target" ] entry_kids; 129 137 130 138 (* Mode *) 131 - let (S.Step c_mode) = S.step c "mode" |> Option.get in 139 + let (S.Step (_, c_mode)) = S.step_any c "mode" |> Option.get in 132 140 Alcotest.(check (option string)) 133 141 "mode" (Some "100644") (S.get_block c_mode); 134 142 135 143 (* Follow target link → JSON blob *) 136 - let (S.Step c_blob) = S.step c "target" |> Option.get in 144 + let (S.Step (_, c_blob)) = S.step_any c "target" |> Option.get in 137 145 138 146 (* JSON navigation: object keys *) 139 147 let json_kids = ··· 142 150 Alcotest.(check (list string)) "json keys" [ "name"; "version" ] json_kids; 143 151 144 152 (* Step into JSON "name" *) 145 - let (S.Step c_name) = S.step c_blob "name" |> Option.get in 153 + let (S.Step (_, c_name)) = S.step_any c_blob "name" |> Option.get in 146 154 Alcotest.(check (option string)) 147 155 "json name" (Some {|"irmin"|}) (S.get_block c_name); 148 156 149 157 (* Go up: name → blob → entry → tree *) 150 - let (S.Step c_up) = S.up c_name |> Option.get in 151 - let (S.Step c_up) = S.up c_up |> Option.get in 152 - let (S.Step c_up) = S.up c_up |> Option.get in 158 + let (S.Step (_, c_up)) = S.up c_name |> Option.get in 159 + let (S.Step (_, c_up)) = S.up c_up |> Option.get in 160 + let (S.Step (_, c_up)) = S.up c_up |> Option.get in 153 161 let root_kids = S.list c_up |> List.map fst |> List.sort String.compare in 154 162 Alcotest.(check (list string)) 155 163 "back to root" ··· 186 194 187 195 (* The computation: navigate to data.json → target → key *) 188 196 let read_key c = 189 - let (S.Step c) = S.step c "data.json" |> Option.get in 190 - let (S.Step c) = S.step c "target" |> Option.get in 191 - let (S.Step c) = S.step c "key" |> Option.get in 192 - (S.Step c, S.get_block c) 197 + let (S.Step (_, c)) = S.step_any c "data.json" |> Option.get in 198 + let (S.Step (_, c)) = S.step_any c "target" |> Option.get in 199 + let (S.Step (sc, c)) = S.step_any c "key" |> Option.get in 200 + (S.Step (sc, c), S.get_block c) 193 201 in 194 202 195 203 (* Produce *) ··· 258 266 (* Step to nonexistent child *) 259 267 Alcotest.(check bool) 260 268 "nonexistent = None" true 261 - (Option.is_none (S.step c "does_not_exist")); 269 + (Option.is_none (S.step_any c "does_not_exist")); 262 270 263 271 (* Step to existing child, then nonexistent subchild *) 264 - let (S.Step c) = S.step c "a.ml" |> Option.get in 272 + let (S.Step (_, c)) = S.step_any c "a.ml" |> Option.get in 265 273 Alcotest.(check bool) 266 274 "no such field" true 267 - (Option.is_none (S.step c "nonexistent_field"))) 275 + (Option.is_none (S.step_any c "nonexistent_field"))) 268 276 269 277 let test_proof_tamper () = 270 278 Eio_main.run @@ fun env -> ··· 287 295 let heap = Irmin_git.heap repo in 288 296 289 297 let read c = 290 - let (S.Step c) = S.step c "f.json" |> Option.get in 291 - let (S.Step c) = S.step c "target" |> Option.get in 292 - let (S.Step c) = S.step c "k" |> Option.get in 293 - (S.Step c, S.get_block c) 298 + let (S.Step (_, c)) = S.step_any c "f.json" |> Option.get in 299 + let (S.Step (_, c)) = S.step_any c "target" |> Option.get in 300 + let (S.Step (sc, c)) = S.step_any c "k" |> Option.get in 301 + (S.Step (sc, c), S.get_block c) 294 302 in 295 303 296 304 (* Produce a valid proof *)
+37 -28
test/tar/test.ml
··· 46 46 Alcotest.(check (list string)) "root dirs" [ "README.md"; "doc"; "src" ] kids; 47 47 48 48 (* Step into src *) 49 - let (S.Step src) = S.step c "src" |> Option.get in 49 + let (S.Step (_, src)) = S.step_any c "src" |> Option.get in 50 50 let src_kids = S.list src |> List.map fst |> List.sort String.compare in 51 51 Alcotest.(check (list string)) "src files" [ "main.ml"; "util.ml" ] src_kids; 52 52 53 53 (* Read a file *) 54 - let (S.Step main) = S.step src "main.ml" |> Option.get in 54 + let (S.Step (_, main)) = S.step_any src "main.ml" |> Option.get in 55 55 Alcotest.(check (option string)) 56 56 "main.ml" (Some "let () = ()") (S.get_block main); 57 57 58 58 (* Navigate doc *) 59 - let (S.Step doc) = S.step c "doc" |> Option.get in 60 - let (S.Step guide) = S.step doc "guide.md" |> Option.get in 59 + let (S.Step (_, doc)) = S.step_any c "doc" |> Option.get in 60 + let (S.Step (_, guide)) = S.step_any doc "guide.md" |> Option.get in 61 61 Alcotest.(check (option string)) 62 62 "guide" (Some "Usage guide") (S.get_block guide); 63 63 ··· 66 66 "guide path" [ "doc"; "guide.md" ] (S.path guide); 67 67 68 68 (* Up *) 69 - let (S.Step doc') = S.up guide |> Option.get in 69 + let (S.Step (_, doc')) = S.up guide |> Option.get in 70 70 let doc_kids = S.list doc' |> List.map fst in 71 71 Alcotest.(check (list string)) "back to doc" [ "guide.md" ] doc_kids; 72 72 73 73 (* Proof *) 74 74 let proof, value = 75 75 S.produce heap Irmin_tar.tree root (fun c -> 76 - let (S.Step src) = S.step c "src" |> Option.get in 77 - let (S.Step main) = S.step src "main.ml" |> Option.get in 78 - (S.Step main, S.get_block main)) 76 + let (S.Step (_, src)) = S.step_any c "src" |> Option.get in 77 + let (S.Step (sc, main)) = S.step_any src "main.ml" |> Option.get in 78 + (S.Step (sc, main), S.get_block main)) 79 79 in 80 80 Alcotest.(check (option string)) "proved" (Some "let () = ()") value; 81 81 match 82 82 S.verify proof Irmin_tar.tree (fun c -> 83 - let (S.Step src) = S.step c "src" |> Option.get in 84 - let (S.Step main) = S.step src "main.ml" |> Option.get in 85 - (S.Step main, S.get_block main)) 83 + let (S.Step (_, src)) = S.step_any c "src" |> Option.get in 84 + let (S.Step (sc, main)) = S.step_any src "main.ml" |> Option.get in 85 + (S.Step (sc, main), S.get_block main)) 86 86 with 87 87 | Ok v -> Alcotest.(check (option string)) "verified" (Some "let () = ()") v 88 88 | Error (`Proof_failure msg) -> Alcotest.failf "verify: %s" msg ··· 113 113 let ( => ) = S.( => ) 114 114 115 115 let json = 116 - S.fix (fun self -> S.node ~dec:json_parse ~enc:(fun _ -> "") [ "*" => self ]) 116 + S.fix (fun self -> 117 + S.node ~name:"application/json" ~dec:json_parse 118 + ~enc:(fun _ -> "") 119 + ~rules:[ "*" => self ] 120 + ()) 117 121 118 122 (* Tar tree where *.json files are JSON-navigable *) 119 123 let tar_json = 120 124 S.fix (fun self -> 121 - S.node ~dec:Irmin_tar.dir_parse ~enc:Irmin_tar.dir_serialize 122 - [ "*.json" => json; "*" => self ]) 125 + S.node ~name:"application/x-tar-tree+json" ~dec:Irmin_tar.dir_parse 126 + ~enc:Irmin_tar.dir_serialize 127 + ~rules:[ "*.json" => json; "*" => self ] 128 + ()) 123 129 124 130 let test_tar_json_proof () = 125 131 let store = Hashtbl.create 16 in ··· 147 153 kids; 148 154 149 155 (* Step into package.json — dispatches to JSON codec *) 150 - let (S.Step pkg) = S.step c "package.json" |> Option.get in 156 + let (S.Step (_, pkg)) = S.step_any c "package.json" |> Option.get in 151 157 let pkg_kids = S.list pkg |> List.map fst |> List.sort String.compare in 152 158 Alcotest.(check (list string)) 153 159 "package.json keys" [ "dependencies"; "name" ] pkg_kids; 154 160 155 161 (* Navigate into dependencies.fmt *) 156 - let (S.Step deps) = S.step pkg "dependencies" |> Option.get in 157 - let (S.Step fmt_val) = S.step deps "fmt" |> Option.get in 162 + let (S.Step (_, deps)) = S.step_any pkg "dependencies" |> Option.get in 163 + let (S.Step (_, fmt_val)) = S.step_any deps "fmt" |> Option.get in 158 164 Alcotest.(check (option string)) 159 165 "fmt version" (Some {|"1.0"|}) (S.get_block fmt_val); 160 166 ··· 166 172 167 173 (* Prove: "package.json has dependencies.digestif = '2.0'" *) 168 174 let read_digestif_version c = 169 - let (S.Step pkg) = S.step c "package.json" |> Option.get in 170 - let (S.Step deps) = S.step pkg "dependencies" |> Option.get in 171 - let (S.Step v) = S.step deps "digestif" |> Option.get in 172 - (S.Step v, S.get_block v) 175 + let (S.Step (_, pkg)) = S.step_any c "package.json" |> Option.get in 176 + let (S.Step (_, deps)) = S.step_any pkg "dependencies" |> Option.get in 177 + let (S.Step (sc, v)) = S.step_any deps "digestif" |> Option.get in 178 + (S.Step (sc, v), S.get_block v) 173 179 in 174 180 let proof, value = S.produce heap tar_json root read_digestif_version in 175 181 Alcotest.(check (option string)) "proved digestif" (Some {|"2.0"|}) value; ··· 195 201 let tree_with_counters = 196 202 let ( => ) = S.( => ) in 197 203 S.fix (fun self -> 198 - S.node ~dec:Irmin_tar.dir_parse ~enc:Irmin_tar.dir_serialize 199 - [ 200 - "*.counter" 201 - => S.node ~dec:counter_parse ~enc:counter_serialize 202 - ~merge:counter_merge []; 203 - "*" => self; 204 - ]) 204 + S.node ~name:"application/x-tar-tree+counter" ~dec:Irmin_tar.dir_parse 205 + ~enc:Irmin_tar.dir_serialize 206 + ~rules: 207 + [ 208 + "*.counter" 209 + => S.node ~name:"application/x-counter" ~dec:counter_parse 210 + ~enc:counter_serialize ~merge:counter_merge (); 211 + "*" => self; 212 + ] 213 + ()) 205 214 206 215 let test_merge_structural () = 207 216 let store = Hashtbl.create 64 in