Persistent store with Git semantics: lazy reads, delayed writes, content-addressing
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

feat(irmin): rename commit_info to commit, improve API and docs

- Rename commit_info type to commit, hash field to id
- Change log signature to use optional limit with unit arg
- Return Result from update_branch instead of bool
- Add Tree.pp and Hash.compare
- Rewrite mli documentation with richer descriptions

+254 -120
+5 -5
bin/cmd_log.ml
··· 6 6 let fs = Eio.Stdenv.cwd env in 7 7 Eio.Switch.run @@ fun sw -> 8 8 let store = Common.open_store ~sw ~fs ~config in 9 - let entries = Irmin.log store ~branch ~limit in 9 + let entries = Irmin.log store ~branch ?limit () in 10 10 match entries with 11 11 | [] -> 12 12 (match output with ··· 17 17 (match output with 18 18 | `Human -> 19 19 List.iter 20 - (fun (e : Irmin.commit_info) -> 20 + (fun (e : Irmin.commit) -> 21 21 Fmt.pr "%a %s@. %s@.@." Common.styled_yellow 22 - (Irmin.Hash.short e.hash) e.author e.message) 22 + (Irmin.Hash.short e.id) e.author e.message) 23 23 entries 24 24 | `Json -> 25 25 List.iter 26 - (fun (e : Irmin.commit_info) -> 26 + (fun (e : Irmin.commit) -> 27 27 Fmt.pr {|{"hash":%S,"author":%S,"message":%S}@.|} 28 - (Irmin.Hash.to_hex e.hash) e.author e.message) 28 + (Irmin.Hash.to_hex e.id) e.author e.message) 29 29 entries); 30 30 0
+27 -11
lib/irmin.ml
··· 24 24 let to_hex h = h 25 25 let of_hex s = s 26 26 let equal = String.equal 27 + let compare = String.compare 27 28 let pp fmt h = Fmt.string fmt h 28 29 let short h = if String.length h >= 7 then String.sub h 0 7 else h 29 30 end ··· 48 49 let list t path = t.t_list path 49 50 let find_tree t path = t.t_find_tree path 50 51 let add_tree t path sub = t.t_add_tree path sub 52 + 53 + let pp fmt t = 54 + let rec go t path_rev = 55 + List.iter 56 + (fun (name, kind) -> 57 + let here = name :: path_rev in 58 + match kind with 59 + | `Contents -> Fmt.pf fmt "%s@." (String.concat "/" (List.rev here)) 60 + | `Node -> ( 61 + match t.t_find_tree [ name ] with 62 + | Some sub -> go sub here 63 + | None -> ())) 64 + (t.t_list []) 65 + in 66 + go t [] 51 67 end 52 68 53 - (* ===== Commit info ===== *) 69 + (* ===== Commit ===== *) 54 70 55 - type commit_info = { 56 - hash : hash; 71 + type commit = { 72 + id : hash; 57 73 author : string; 58 74 message : string; 59 75 parents : hash list; ··· 69 85 s_branches : unit -> string list; 70 86 s_commit : 71 87 parents:hash list -> message:string -> author:string -> tree -> hash; 72 - s_log : branch:string -> limit:int option -> commit_info list; 73 - s_read_commit : hash -> commit_info option; 88 + s_log : branch:string -> limit:int option -> commit list; 89 + s_read_commit : hash -> commit option; 74 90 s_update_branch : branch:string -> old:hash option -> new_:hash -> bool; 75 91 s_is_ancestor : ancestor:hash -> descendant:hash -> bool; 76 92 s_merge_base : hash -> hash -> hash option; ··· 87 103 let commit store ~tree ~parents ~message ~author = 88 104 store.s_commit ~parents ~message ~author tree 89 105 90 - let log store ~branch ~limit = store.s_log ~branch ~limit 106 + let log store ~branch ?limit () = store.s_log ~branch ~limit 91 107 let read_commit store h = store.s_read_commit h 92 108 93 109 let update_branch store ~branch ~old ~new_ = 94 - store.s_update_branch ~branch ~old ~new_ 110 + if store.s_update_branch ~branch ~old ~new_ then Ok () else Error `Conflict 95 111 96 112 let is_ancestor store ~ancestor ~descendant = 97 113 store.s_is_ancestor ~ancestor ~descendant ··· 157 173 | Some c -> ( 158 174 let entry = 159 175 { 160 - hash = to_hex h; 176 + id = to_hex h; 161 177 author = Private.Store.Git.Commit.author c; 162 178 message = Private.Store.Git.Commit.message c; 163 179 parents = ··· 177 193 Option.map 178 194 (fun c -> 179 195 { 180 - hash = hex; 196 + id = hex; 181 197 author = Private.Store.Git.Commit.author c; 182 198 message = Private.Store.Git.Commit.message c; 183 199 parents = List.map to_hex (Private.Store.Git.Commit.parents c); ··· 256 272 | Some c -> ( 257 273 let entry = 258 274 { 259 - hash = to_hex h; 275 + id = to_hex h; 260 276 author = Private.Store.Mst.Commit.author c; 261 277 message = Private.Store.Mst.Commit.message c; 262 278 parents = ··· 276 292 Option.map 277 293 (fun c -> 278 294 { 279 - hash = hex; 295 + id = hex; 280 296 author = Private.Store.Mst.Commit.author c; 281 297 message = Private.Store.Mst.Commit.message c; 282 298 parents = List.map to_hex (Private.Store.Mst.Commit.parents c);
+222 -104
lib/irmin.mli
··· 1 - (** Irmin 4.0 - Content-addressed storage for OCaml. 1 + (** Content-addressed version control for OCaml. 2 + 3 + {b What is content-addressing?} 4 + 5 + In a content-addressed store every object is named by a hash of its own 6 + data. The same bytes always hash to the same address, so objects are 7 + automatically deduplicated and every reference is a tamper-evident 8 + fingerprint. Irmin builds a version-control layer on top of that idea: a 9 + {!tree} of named values is assembled in memory, frozen into a {!commit} 10 + snapshot, and referenced by a named branch pointer — the same mental model 11 + as Git, generalised to multiple hash schemes and wire protocols. 12 + 13 + {b Three backends share one API:} 14 + 15 + - {!Git} stores use SHA-1 hashes and the Git object format. A repository 16 + created with {!Git.init} is a valid Git repository, readable and writable 17 + with the standard [git] command-line tool. 18 + - {!Mst} stores use SHA-256 hashes and the 19 + {{:https://atproto.com/specs/repository} ATProto Merkle Search Tree} 20 + format, with objects encoded in 21 + {{:https://ipld.io/docs/codecs/dag-cbor/} DAG-CBOR}. Use this backend when 22 + building on or interoperating with {{:https://atproto.com} ATProto} 23 + infrastructure (e.g. Bluesky). 24 + - {!Mst.of_pds} wraps a live ATProto Personal Data Server (PDS) SQLite 25 + database, treating it as an Mst store whose HEAD tracks the MST root 26 + directly. External ATProto tooling can read the data without an Irmin 27 + commit wrapper. 2 28 3 - {b Quick start — write a file and commit it:} 29 + {b Quick start:} 30 + 4 31 {[ 5 32 let store = Irmin.Git.init ~sw ~fs ~path:(Fpath.v "/tmp/repo") in 6 33 let tree = ··· 12 39 Irmin.set_head store ~branch:"main" h 13 40 ]} 14 41 15 - Irmin exposes a single {!t} store type that wraps Git, ATProto MST, or 16 - in-memory backends. Tree and hash values are unified across backends so that 17 - commands and handlers can be written once and work everywhere. *) 42 + {b For the public API see {!section:hashes}.} The section below exposes 43 + internal modules for testing and low-level tooling only; skip it for 44 + application code. *) 18 45 19 - (** {1 Private: Internal Modules} 46 + (** {1:private Internal Modules} 20 47 21 - Defined first so that module aliases below capture the internal names before 22 - they are shadowed by the public [Hash] and [Tree] wrappers. 48 + These modules expose implementation internals. They are {b not} subject to 49 + the library's stability guarantee and may change between any two releases. 50 + They must appear here — before the public {!Hash} and {!Tree} modules — so 51 + that the module aliases capture the internal implementations rather than the 52 + public wrappers. 23 53 24 - These modules are {b not} covered by the stability guarantee. *) 54 + {b Prefer the public API for all application code.} *) 25 55 26 56 module Private : sig 27 57 module Hash = Hash 28 - (** Phantom-typed hashes (SHA-1, SHA-256). *) 58 + (** Phantom-typed hashes — [Hash.sha1 "data"], [Hash.sha256 "data"], 59 + [Hash.to_hex], [Hash.sha1_of_hex]. *) 29 60 30 61 module Backend = Backend 31 - (** KV backend implementations. *) 62 + (** KV storage backends: {!Backend.Memory}, {!Backend.Disk}, combinators. *) 32 63 33 64 module Codec = Codec 34 - (** Tree-format codec (Git, MST, extensible). *) 65 + (** Tree codec functor — plugs a hash algorithm and serialisation format into 66 + the tree layer. *) 35 67 36 68 module Tree = Tree 37 - (** Lazy tree implementation and [Make] functor. *) 69 + (** Lazy tree functor with pre-instantiated {!Tree.Git} and {!Tree.Mst}. *) 38 70 39 71 module Commit = Commit 40 - (** Commit functor and pre-instantiated commits. *) 72 + (** Commit object functor. *) 41 73 42 74 module Store = Store 43 - (** Store functor and pre-instantiated stores. *) 75 + (** Store functor. *) 44 76 45 77 module Subtree = Subtree 46 - (** Monorepo subtree operations. *) 78 + (** Subtree (monorepo-style sub-path) operations. *) 47 79 48 80 module Proof = Proof 49 - (** Merkle proof functor. *) 81 + (** Merkle proof generation and verification. *) 50 82 51 83 module Pds_interop = Pds_interop 52 84 (** ATProto PDS backend adapter. *) 53 85 end 54 86 55 - (** {1 Link API: Persistent Pointers} *) 87 + (** {1:hashes Hashes} *) 56 88 57 - module Link = Link 58 - (** Persistent, content-addressed pointers to arbitrary OCaml values. *) 89 + type hash 90 + (** An opaque content hash. Every object in the store is identified by the hash 91 + of its own data; the same content always produces the same hash. 59 92 60 - (** {1 Unified Store API} *) 93 + The hash algorithm is fixed per backend: SHA-1 for {!Git}, SHA-256 for 94 + {!Mst}. Hashes are encoded as lowercase hexadecimal strings when converted 95 + via {!Hash.to_hex}. *) 61 96 62 - type hash 63 - (** An opaque content hash. Use {!Hash} to display or compare hashes. The 64 - underlying representation depends on the backend: SHA-1 hex for Git, SHA-256 65 - hex for MST, or a CID string for ATProto PDS. *) 97 + module Hash : sig 98 + val of_hex : string -> hash 99 + (** [of_hex s] wraps the hex string [s] as a hash without validation. Only use 100 + this to round-trip values previously obtained from {!to_hex}; passing an 101 + arbitrary string will not fail immediately but will produce incorrect 102 + results when the hash is used in a store operation. *) 66 103 67 - module Hash : sig 68 104 val to_hex : hash -> string 69 - (** [to_hex h] returns the full hex (or CID) string for [h]. *) 105 + (** [to_hex h] returns the full lowercase hexadecimal representation. *) 70 106 71 - val of_hex : string -> hash 72 - (** [of_hex s] wraps a hex/CID string as a [hash]. No validation is performed; 73 - passing an invalid string will cause downstream failures. *) 107 + val short : hash -> string 108 + (** [short h] returns the first seven characters of [to_hex h], suitable for 109 + compact display in log output. *) 74 110 75 111 val equal : hash -> hash -> bool 76 - (** [equal h1 h2] tests hash equality. *) 112 + val compare : hash -> hash -> int 113 + val pp : hash Fmt.t 114 + end 77 115 78 - val pp : Format.formatter -> hash -> unit 79 - (** [pp fmt h] pretty-prints [h]. *) 116 + (** {1 Trees} *) 117 + 118 + (** A tree is an immutable hierarchical namespace — a directory of named 119 + contents, structurally equivalent to a filesystem subtree. 80 120 81 - val short : hash -> string 82 - (** [short h] returns the first 7 characters of [to_hex h]. *) 83 - end 121 + {b Paths} are [string list] values: [["src"; "main.ml"]] addresses the leaf 122 + at path [src/main.ml]. 84 123 85 - type tree 86 - (** A lazy, in-memory staging area. 124 + {b Immutable updates:} {!Tree.add}, {!Tree.remove}, and {!Tree.add_tree} 125 + always return a {e new} tree; they do not modify their argument. Data is 126 + only persisted to the backend when the tree is passed to {!commit}. 87 127 88 - Trees are immutable: {!Tree.add}, {!Tree.remove}, and {!Tree.add_tree} 89 - return new trees rather than modifying in place. Writes are accumulated and 90 - flushed to the backend when {!commit} is called. 128 + {b Lazy loading:} trees retrieved via {!checkout} are loaded on demand. Only 129 + the root node is fetched at checkout time; subtrees are read from the 130 + backend as they are traversed. *) 91 131 92 - Create an empty tree with {!empty_tree}; retrieve a committed tree with 93 - {!checkout}. *) 132 + type tree 133 + (** A tree value. *) 94 134 95 135 module Tree : sig 96 136 val find : tree -> string list -> string option 97 - (** [find t path] looks up contents at [path]. Lazy nodes are loaded on 98 - demand. *) 137 + (** [find t path] returns the leaf contents at [path], or [None] if [path] 138 + does not exist or addresses a subtree node rather than a leaf. *) 99 139 100 140 val add : tree -> string list -> string -> tree 101 - (** [add t path v] returns a new tree with [v] stored at [path]. *) 141 + (** [add t path v] returns a new tree with [v] stored at [path]. Any existing 142 + value at [path] is replaced. Intermediate nodes are created automatically. 143 + *) 102 144 103 145 val remove : tree -> string list -> tree 104 - (** [remove t path] returns a new tree with [path] removed. *) 146 + (** [remove t path] returns a new tree with [path] removed. It is not an error 147 + to remove a path that does not exist. *) 105 148 106 149 val list : tree -> string list -> (string * [ `Contents | `Node ]) list 107 - (** [list t path] lists the immediate children of [path]. *) 150 + (** [list t path] returns the immediate children of the node at [path] as 151 + [(name, kind)] pairs, sorted lexicographically by name. [`Contents] 152 + entries are leaf values; [`Node] entries are subtrees. Returns [[]] if 153 + [path] does not exist. *) 108 154 109 155 val find_tree : tree -> string list -> tree option 110 - (** [find_tree t path] returns the subtree rooted at [path]. *) 156 + (** [find_tree t path] returns the subtree rooted at [path], or [None] if 157 + [path] does not exist or addresses a leaf. *) 111 158 112 159 val add_tree : tree -> string list -> tree -> tree 113 - (** [add_tree t path sub] grafts [sub] as a subtree at [path]. *) 160 + (** [add_tree t path sub] grafts [sub] as a subtree at [path]. Any existing 161 + node at [path] is replaced. *) 162 + 163 + val pp : tree Fmt.t 164 + (** [pp fmt t] pretty-prints [t] as a sorted list of full paths, one per line. 165 + Lazy nodes are loaded on demand during traversal. *) 114 166 end 115 167 116 - type commit_info = { 117 - hash : hash; 168 + (** {1 Commits} *) 169 + 170 + type commit = { 171 + id : hash; 172 + (** The commit's own hash — its stable, content-derived identity. *) 118 173 author : string; 119 - message : string; 174 + (** Free-form author string (e.g. ["Alice <alice@example.com>"]). *) 175 + message : string; (** Commit message. *) 120 176 parents : hash list; 177 + (** Parent commit hashes. Empty for the initial commit; one parent for a 178 + linear history; two for a merge commit. *) 121 179 } 122 180 (** Metadata for a single commit. *) 181 + 182 + (** {1 Stores} *) 123 183 124 184 type t 125 - (** A content-addressed store backed by Git, MST, or PDS. Construct with 126 - {!Git.init}/{!Git.open_}, {!Mst.memory}/{!Mst.disk}/{!Mst.of_pds}. *) 185 + (** A content-addressed store. Construct one with {!Git.init}, {!Git.open_}, 186 + {!Mst.memory}, {!Mst.disk}, or {!Mst.of_pds}. *) 127 187 128 188 val empty_tree : t -> tree 129 - (** [empty_tree store] creates an empty tree backed by [store]'s backend. *) 189 + (** [empty_tree store] returns an empty tree bound to [store]. 190 + 191 + Trees carry a reference to their backing store so that lazy nodes can be 192 + fetched on demand and so that {!commit} knows where to flush pending writes. 193 + Pass the result to {!Tree.add} to begin assembling a working tree. *) 130 194 131 195 val checkout : t -> branch:string -> tree option 132 - (** [checkout store ~branch] returns the working tree at the head of [branch], 133 - or [None] if the branch does not exist or is empty. *) 196 + (** [checkout store ~branch] returns the working tree at the tip of [branch], or 197 + [None] if the branch does not exist. The tree is loaded lazily: only the 198 + root node is fetched; subtrees are read from the backend as they are 199 + accessed. *) 134 200 135 201 val head : t -> branch:string -> hash option 136 - (** [head store ~branch] returns the head commit hash of [branch]. *) 202 + (** [head store ~branch] returns the commit hash at the tip of [branch], or 203 + [None] if the branch does not exist. *) 137 204 138 205 val set_head : t -> branch:string -> hash -> unit 139 - (** [set_head store ~branch h] advances [branch] to point at [h]. *) 206 + (** [set_head store ~branch h] moves [branch] to point at commit [h]. This is an 207 + unconditional write; use {!update_branch} for a safe compare-and-set in 208 + concurrent or distributed settings. *) 140 209 141 210 val branches : t -> string list 142 - (** [branches store] lists all branch names. *) 211 + (** [branches store] returns all branch names, sorted lexicographically. *) 143 212 144 213 val commit : 145 214 t -> tree:tree -> parents:hash list -> message:string -> author:string -> hash 146 215 (** [commit store ~tree ~parents ~message ~author] flushes [tree] to the 147 - backend, writes a commit object, and returns its hash. Does {b not} update 148 - any branch; call {!set_head} afterwards to advance a branch. *) 216 + backend, writes a commit object that records the tree root and the given 217 + metadata, and returns the commit hash. 149 218 150 - val log : t -> branch:string -> limit:int option -> commit_info list 151 - (** [log store ~branch ~limit] returns the commit history starting from the head 152 - of [branch], in reverse chronological order. [limit:None] means no limit. *) 219 + This does {b not} advance any branch; call {!set_head} afterwards: 220 + {[ 221 + let h = Irmin.commit store ~tree ~parents ~message ~author in 222 + Irmin.set_head store ~branch:"main" h 223 + ]} *) 153 224 154 - val read_commit : t -> hash -> commit_info option 155 - (** [read_commit store h] reads commit metadata. Returns [None] if [h] is not a 156 - known commit hash. *) 225 + val log : t -> branch:string -> ?limit:int -> unit -> commit list 226 + (** [log store ~branch ?limit ()] returns the commit history reachable from the 227 + head of [branch], in reverse-chronological order (most recent first). 228 + [~limit:n] caps the result at [n] entries; omit it for the full history. 229 + Returns [[]] if the branch does not exist. *) 230 + 231 + val read_commit : t -> hash -> commit option 232 + (** [read_commit store h] returns the commit with hash [h], or [None] if [h] is 233 + not present in the store. *) 234 + 235 + val update_branch : 236 + t -> 237 + branch:string -> 238 + old:hash option -> 239 + new_:hash -> 240 + (unit, [ `Conflict ]) result 241 + (** [update_branch store ~branch ~old ~new_] atomically sets [branch] to [new_] 242 + only if its current head equals [old]: 243 + 244 + - [~old:None] — the branch must not exist yet. 245 + - [~old:(Some h)] — the branch must currently point at [h]. 157 246 158 - val update_branch : t -> branch:string -> old:hash option -> new_:hash -> bool 159 - (** [update_branch store ~branch ~old ~new_] atomically updates [branch] to 160 - [new_] only if its current head equals [old]. Returns [true] on success. *) 247 + Returns [Error `Conflict] if the precondition is not met. Use this instead 248 + of {!set_head} when concurrent writers may be present. *) 161 249 162 250 val is_ancestor : t -> ancestor:hash -> descendant:hash -> bool 163 - (** [is_ancestor store ~ancestor ~descendant] checks whether [ancestor] is 164 - reachable from [descendant] by following parent links. *) 251 + (** [is_ancestor store ~ancestor ~descendant] is [true] if [ancestor] is 252 + reachable from [descendant] by following parent links. A commit is 253 + considered its own ancestor. *) 165 254 166 255 val merge_base : t -> hash -> hash -> hash option 167 - (** [merge_base store h1 h2] finds the most recent common ancestor of [h1] and 168 - [h2]. Returns [None] if the histories are disjoint. *) 256 + (** [merge_base store h1 h2] returns the most recent common ancestor of [h1] and 257 + [h2], or [None] if the two histories share no common commit. *) 169 258 170 - (** {1 Git-Format Store} 259 + (** {1 Git Backend} 171 260 172 - SHA-1 hashes, Git object format. Repositories are readable with standard Git 173 - tooling. *) 261 + Stores use SHA-1 hashes and the Git 262 + {{:https://git-scm.com/book/en/v2/Git-Internals-Git-Objects} loose-object} 263 + and {{:https://git-scm.com/book/en/v2/Git-Internals-Git-References} ref} 264 + formats. A repository created with {!init} is a valid Git repository: 265 + existing tooling such as [git log], [git diff], and [git push] work on it 266 + without modification. *) 174 267 175 268 module Git : sig 176 - (** {2 Construction} *) 177 - 178 269 val init : sw:Eio.Switch.t -> fs:Eio.Fs.dir_ty Eio.Path.t -> path:Fpath.t -> t 179 - (** [init ~sw ~fs ~path] initializes a new Git repository at [path]. *) 270 + (** [init ~sw ~fs ~path] creates a new Git repository at [path], creating 271 + parent directories as needed. Equivalent to [git init]. *) 180 272 181 273 val open_ : 182 274 sw:Eio.Switch.t -> fs:Eio.Fs.dir_ty Eio.Path.t -> path:Fpath.t -> t 183 - (** [open_ ~sw ~fs ~path] opens an existing Git repository. *) 275 + (** [open_ ~sw ~fs ~path] opens an existing Git repository at [path]. *) 184 276 185 277 val import : 186 278 sw:Eio.Switch.t -> fs:Eio.Fs.dir_ty Eio.Path.t -> git_dir:Fpath.t -> t 187 - (** [import ~sw ~fs ~git_dir] opens a bare [.git] directory. *) 279 + (** [import ~sw ~fs ~git_dir] opens a bare [.git] directory. Use this to wrap 280 + an existing repository that has no working tree. *) 188 281 189 - (** {2 Low-Level Object and Ref Access} 282 + (** {2 Low-Level Plumbing} 190 283 191 - Direct plumbing access. Prefer the high-level store operations above. *) 284 + Direct read/write access to individual Git objects and refs. Prefer the 285 + unified API above for application code; these functions are intended for 286 + tooling that needs to inspect or manipulate the object database at the 287 + byte level. *) 192 288 193 289 val read_object : 194 290 sw:Eio.Switch.t -> 195 291 fs:Eio.Fs.dir_ty Eio.Path.t -> 196 292 git_dir:Fpath.t -> 197 293 hash -> 198 - (string * string, [> `Msg of string ]) result 199 - (** [read_object ~sw ~fs ~git_dir hash] returns [(kind, data)] for a loose 200 - object. [kind] is ["blob"], ["tree"], ["commit"], or ["tag"]. *) 294 + (string * string, [ `Msg of string ]) result 295 + (** [read_object ~sw ~fs ~git_dir h] reads the loose object at [h] and returns 296 + [(kind, data)]. [kind] is one of ["blob"], ["tree"], ["commit"], or 297 + ["tag"]. Returns [Error] if the object does not exist or cannot be parsed. 298 + *) 201 299 202 300 val write_object : 203 301 sw:Eio.Switch.t -> ··· 206 304 typ:string -> 207 305 string -> 208 306 hash 209 - (** [write_object ~sw ~fs ~git_dir ~typ data] writes a loose object. *) 307 + (** [write_object ~sw ~fs ~git_dir ~typ data] writes [data] as a loose object 308 + with type header [typ] and returns its hash. Duplicate writes of the same 309 + content are silently ignored. *) 210 310 211 311 val read_ref : 212 312 sw:Eio.Switch.t -> ··· 214 314 git_dir:Fpath.t -> 215 315 string -> 216 316 hash option 217 - (** [read_ref ~sw ~fs ~git_dir name] reads the hash at a ref. *) 317 + (** [read_ref ~sw ~fs ~git_dir name] returns the hash stored at ref [name] 318 + (e.g. ["refs/heads/main"]), or [None] if the ref does not exist. *) 218 319 219 320 val write_ref : 220 321 sw:Eio.Switch.t -> ··· 223 324 string -> 224 325 hash -> 225 326 unit 226 - (** [write_ref ~sw ~fs ~git_dir name hash] writes a ref. *) 327 + (** [write_ref ~sw ~fs ~git_dir name h] writes [h] to ref [name], creating or 328 + overwriting it. *) 227 329 228 330 val list_refs : 229 331 sw:Eio.Switch.t -> 230 332 fs:Eio.Fs.dir_ty Eio.Path.t -> 231 333 git_dir:Fpath.t -> 232 334 string list 233 - (** [list_refs ~sw ~fs ~git_dir] lists all refs. *) 335 + (** [list_refs ~sw ~fs ~git_dir] returns the names of all refs, sorted 336 + lexicographically. *) 234 337 end 235 338 236 - (** {1 MST-Format Store} 339 + (** {1 MST Backend} 237 340 238 - SHA-256 hashes, DAG-CBOR MST. Compatible with the ATProto protocol 239 - (Bluesky). *) 341 + Stores use SHA-256 hashes and the 342 + {{:https://atproto.com/specs/repository} ATProto repository} format: objects 343 + are encoded in {{:https://ipld.io/docs/codecs/dag-cbor/} DAG-CBOR} and 344 + organised in a Merkle Search Tree (MST) for efficient range queries and 345 + inclusion proofs. Use this backend when building on 346 + {{:https://atproto.com} ATProto} infrastructure. For standalone use, prefer 347 + {!Git}. *) 240 348 241 349 module Mst : sig 242 - val of_pds : Pds.t -> t 243 - (** [of_pds pds] creates a store backed by an ATProto PDS (SQLite). HEAD 244 - points directly to the MST root CID (no Irmin commit wrapper), so the 245 - store is readable by ATProto tooling. *) 350 + val memory : unit -> t 351 + (** [memory ()] creates a transient in-memory MST store. All data is lost when 352 + the value is collected. Useful for testing. *) 246 353 247 354 val disk : sw:Eio.Switch.t -> Eio.Fs.dir_ty Eio.Path.t -> t 248 - (** [disk ~sw root] creates a store backed by the append-only disk backend 249 - (WAL + bloom filter). High throughput; not Git-compatible. *) 355 + (** [disk ~sw root] creates a persistent MST store backed by an append-only 356 + on-disk log at [root]. Offers higher write throughput than {!Git}; not 357 + compatible with Git tooling. *) 250 358 251 - val memory : unit -> t 252 - (** [memory ()] creates a transient in-memory store. Useful for testing. *) 359 + val of_pds : Pds.t -> t 360 + (** [of_pds pds] wraps a live ATProto Personal Data Server (PDS) SQLite 361 + database. The store HEAD points directly at the MST root CID so that 362 + external ATProto tooling can read the data without an Irmin commit 363 + wrapper. Obtain a {!Pds.t} from {!Pds.open_} or {!Pds.v}. *) 253 364 end 365 + 366 + (** {1 Persistent Pointers} *) 367 + 368 + module Link = Link 369 + (** Content-addressed references to arbitrary OCaml values. A {!Link.t} behaves 370 + like a pointer that survives process restarts by persisting its target in 371 + the backing object store. *)