Persistent store with Git semantics: lazy reads, delayed writes, content-addressing
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

irmin: add clone, restore export, tighten import; consistent CAR UX

Four commands covering the {file, remote} x {in, out} matrix:

import FILE file -> store (ingest)
export FILE store -> file (archive)
pull REMOTE store -> store (sync, fetch + merge)
push REMOTE store -> store (sync, fast-forward)
clone SOURCE [DIR] one-shot "init + import + set_head"

All take positional arguments (previously export used -o; now aligned
with import and the rest). CAR support targets the PDS backend today;
Git-backed stores get a clear "use git bundle" error, not a silent
stub.

cmd_import: dropped the half-implemented "plain file" path — use
[set] for single-file ingest. Import is CAR-only, errors clearly on
other extensions.

cmd_export: new implementation on top of [Pds.export_car]. Empty
store or unsupported backend surface a dedicated err_* helper; no
placeholder behaviour.

cmd_clone: new. Reads a CAR snapshot, initialises a PDS store at the
target directory (default: CAR basename minus [.car]), writes every
block, binds HEAD to the first root CID. Refuses to overwrite an
existing non-empty directory, matching git clone.

README: documents the 2x2 import/export/pull/push matrix and the
clone shortcut.

main.ml: Cmd.group now lists Cmd_clone.cmd and Cmd_export.cmd
alongside the rest.

+225 -36
+35
README.md
··· 143 143 main 144 144 ``` 145 145 146 + ### Moving data in and out 147 + 148 + Four commands, split by **source medium** (file on disk vs. another live 149 + store) and **direction** (ingest vs. emit): 150 + 151 + | | from a file | from another store | 152 + |-----------|---------------------|--------------------| 153 + | **in** | `irmin import FILE` | `irmin pull REMOTE` | 154 + | **out** | `irmin export FILE` | `irmin push REMOTE` | 155 + 156 + Plus the onboard shortcut: 157 + 158 + ``` 159 + $ irmin clone SOURCE [DIR] 160 + ``` 161 + 162 + which seeds a fresh store under `DIR` from a CAR archive (today) or a 163 + remote URL (later) — the one-shot of `init` + `import` + setting 164 + `HEAD`. With no `DIR`, the target folder is inferred from the source 165 + basename, matching `git clone`'s convention. 166 + 167 + The **archival** pair (`import` / `export`) and the **sync** pair 168 + (`pull` / `push`) are deliberately different workflows: 169 + 170 + - **Archive**: CAR file is a self-contained, hash-integral snapshot. No 171 + refs, no merge, no network. Hand it to someone, commit it to backup, 172 + re-hydrate with `import`. `clone` is the "get started" shortcut over 173 + this pair. 174 + - **Sync**: two *live* stores agreeing on a ref. `push` sends the 175 + delta, `pull` fetches + merges. Refs and merge strategy matter; the 176 + target must be reachable and writable for `push`. 177 + 178 + CAR support currently targets the PDS/ATProto backend; Git-backed 179 + stores export via `git bundle` on the underlying `.git`. 180 + 146 181 ## Backends 147 182 148 183 | Module | Type | Block format | Status |
+109
bin/cmd_clone.ml
··· 1 + (** [irmin clone] - create a new store seeded from a CAR file. 2 + 3 + Given a CAR snapshot (typically produced by [irmin export] or a 4 + PDS's [com.atproto.sync.getRepo] endpoint), initialise a fresh 5 + local store at [DIR] and import every block from the archive. 6 + The store's head is bound to the first root CID in the CAR. 7 + 8 + This is the one-shot counterpart to [init] + [import] + [set_head]. 9 + 10 + URL support (fetching the CAR over HTTP from a live PDS endpoint) 11 + is not implemented yet; for now [SOURCE] must be a path to a local 12 + CAR file. *) 13 + 14 + let err_not_car path = 15 + Common.error "clone: SOURCE must be a .car file; got %a" Common.styled_cyan 16 + path; 17 + 1 18 + 19 + let err_no_roots () = 20 + Common.error "clone: CAR file has no root CID; cannot bind HEAD"; 21 + 1 22 + 23 + let err_dir_exists dir = 24 + Common.error "clone: refusing to overwrite existing directory %a" 25 + Common.styled_cyan dir; 26 + 1 27 + 28 + let default_dir_for_source source = 29 + (* Match [git clone]: infer target dir from the source basename, minus 30 + the [.car] suffix. E.g. [backup.car] -> [backup]. *) 31 + let base = Filename.basename source in 32 + Filename.chop_suffix_opt ~suffix:".car" base |> Option.value ~default:base 33 + 34 + let do_clone ~sw ~fs ~source ~dir = 35 + let car_data = Eio.Path.load Eio.Path.(fs / source) in 36 + let header, blocks = Atp.Car.of_string ~cid_format:`Atproto car_data in 37 + match header.Atp.Car.roots with 38 + | [] -> err_no_roots () 39 + | head_cid :: _ -> 40 + let target = Eio.Path.(fs / dir) in 41 + (match Eio.Path.kind ~follow:false target with 42 + | `Not_found -> Eio.Path.mkdir ~perm:0o755 target 43 + | _ -> ()); 44 + (* Seed a PDS store with the blocks + head. The CAR's root is the 45 + new HEAD; [Pds.set_head] persists the ref. *) 46 + let did = Atp.Did.of_string_exn "did:web:localhost" in 47 + let pds = Pds.v ~sw target ~did in 48 + let blockstore = Pds.blockstore pds in 49 + let count = ref 0 in 50 + List.iter 51 + (fun (cid, data) -> 52 + blockstore#put cid data; 53 + incr count) 54 + blocks; 55 + blockstore#sync; 56 + Pds.set_head pds head_cid; 57 + Pds.close pds; 58 + Common.success "Cloned %d blocks into %a (HEAD %s)" !count 59 + Common.styled_cyan dir (Atp.Cid.to_string head_cid); 60 + 0 61 + 62 + let run ~source dir = 63 + if not (Filename.check_suffix source ".car") then err_not_car source 64 + else 65 + let dir = Option.value dir ~default:(default_dir_for_source source) in 66 + Eio_main.run @@ fun env -> 67 + let fs = Eio.Stdenv.cwd env in 68 + Eio.Switch.run @@ fun sw -> 69 + match Eio.Path.kind ~follow:false Eio.Path.(fs / dir) with 70 + | `Not_found | `Directory when not (Sys.file_exists dir) 71 + || Sys.readdir dir = [||] -> 72 + do_clone ~sw ~fs ~source ~dir 73 + | _ -> err_dir_exists dir 74 + 75 + open Cmdliner 76 + 77 + let clone_source = 78 + let doc = "Path to a CAR file to clone from." in 79 + Arg.(required & pos 0 (some string) None & info [] ~docv:"SOURCE" ~doc) 80 + 81 + let clone_dir = 82 + let doc = 83 + "Target directory for the new store. Defaults to the CAR file's basename \ 84 + without the [.car] suffix." 85 + in 86 + Arg.(value & pos 1 (some string) None & info [] ~docv:"DIR" ~doc) 87 + 88 + let cmd : unit Cmd.t = 89 + let doc = "Create a new store seeded from a CAR file." in 90 + let man = 91 + [ 92 + `S Manpage.s_description; 93 + `P 94 + "Initialise a fresh store at DIR and import every block from the CAR \ 95 + archive at SOURCE. The store's HEAD is bound to the first root CID \ 96 + in the archive."; 97 + `P 98 + "DIR must not exist, or must be empty. This mirrors the safety \ 99 + behaviour of [git clone]."; 100 + `S Manpage.s_examples; 101 + `Pre " irmin clone backup.car"; 102 + `Pre " irmin clone backup.car myrepo"; 103 + ] 104 + in 105 + Cmd.v 106 + (Cmd.info "clone" ~doc ~man) 107 + Term.( 108 + const (fun () source dir -> ignore (run ~source dir)) 109 + $ Terms.setup $ clone_source $ clone_dir)
+55 -14
bin/cmd_export.ml
··· 1 - (** Export command - export store to external formats. *) 1 + (** [irmin export] - snapshot a store as a CAR file. 2 + 3 + Writes every block reachable from the store's head to a CAR 4 + (Content-Addressable aRchive) file. The resulting file is a 5 + portable, hash-integral snapshot — it can be emailed, archived, 6 + or later [import]ed into any store that accepts CAR input. 7 + 8 + Today only the PDS/ATProto backend is implemented. Git-backed 9 + stores fail with a clear "backend not supported" message; Git 10 + export should use [git bundle] on the underlying [.git] 11 + directory. *) 12 + 13 + let err_unsupported_backend backend = 14 + Common.error 15 + "export: backend %a not supported; use [git bundle] for Git-backed stores" 16 + Config.pp_backend backend; 17 + 1 2 18 3 - let run ~repo ~branch ~output () = 4 - ignore (repo, branch, output); 5 - Common.error "export: not yet implemented with new Schema API"; 19 + let err_empty_store () = 20 + Common.error "export: store has no head commit (nothing to export)"; 6 21 1 7 22 23 + let export_pds ~sw ~fs ~config file = 24 + let path = Eio.Path.(fs / config.Config.store_path) in 25 + let pds = Pds.open_ ~sw path in 26 + match Pds.head pds with 27 + | None -> err_empty_store () 28 + | Some _ -> 29 + let data = Pds.export_car pds in 30 + let target = Eio.Path.(fs / file) in 31 + Eio.Path.save ~create:(`Or_truncate 0o644) target data; 32 + Common.success "Exported %d bytes to %a" (String.length data) 33 + Common.styled_cyan file; 34 + 0 35 + 36 + let run ~repo file = 37 + let config = Config.load ~repo () in 38 + Eio_main.run @@ fun env -> 39 + let fs = Eio.Stdenv.cwd env in 40 + Eio.Switch.run @@ fun sw -> 41 + match config.Config.backend with 42 + | Config.Pds -> export_pds ~sw ~fs ~config file 43 + | Config.Git | Config.Memory | Config.Disk as b -> 44 + err_unsupported_backend b 45 + 8 46 open Cmdliner 9 47 10 - let export_output = 11 - let doc = "Output file path." in 12 - Arg.( 13 - required & opt (some string) None & info [ "o"; "output" ] ~docv:"FILE" ~doc) 48 + let export_file = 49 + let doc = "Path to write the CAR file to." in 50 + Arg.(required & pos 0 (some string) None & info [] ~docv:"FILE" ~doc) 14 51 15 52 let cmd : unit Cmd.t = 16 - let doc = "Export store to file." in 53 + let doc = "Export the store as a CAR file." in 17 54 let man = 18 55 [ 19 56 `S Manpage.s_description; 20 - `P "Export store contents. Format determined by extension:"; 21 - `I ("$(b,.car)", "CAR file (ATProto format)"); 57 + `P 58 + "Write every block reachable from the store's head to a CAR \ 59 + (Content-Addressable aRchive) file. The resulting archive is a \ 60 + portable snapshot — transfer it out of band and re-ingest it with \ 61 + [irmin import]."; 22 62 `S Manpage.s_examples; 23 - `Pre " irmin export -o backup.car"; 63 + `Pre " irmin export backup.car"; 64 + `Pre " irmin --repo ./atproto-repo export snapshot.car"; 24 65 ] 25 66 in 26 67 Cmd.v 27 68 (Cmd.info "export" ~doc ~man) 28 69 Term.( 29 - const (fun () repo branch output -> ignore (run ~repo ~branch ~output ())) 30 - $ Terms.setup $ Terms.repo $ Terms.branch $ export_output) 70 + const (fun () repo file -> ignore (run ~repo file)) 71 + $ Terms.setup $ Terms.repo $ export_file)
+24 -21
bin/cmd_import.ml
··· 1 - (** Import command - import data from external formats. *) 1 + (** [irmin import] - import a CAR file into the store. 2 + 3 + Today only CAR (Content-Addressable aRchive) files are supported: 4 + every block in the CAR is written to the store's [.irmin/blocks] 5 + filesystem blockstore. Other formats fail fast with a clear error. *) 6 + 7 + let err_unsupported_format file = 8 + Common.error "import: only .car files are supported; got %a" 9 + Common.styled_cyan file; 10 + 1 2 11 3 12 let import_car ~config ~fs data file = 4 13 let header, blocks = Atp.Car.of_string ~cid_format:`Atproto data in ··· 17 26 ignore header; 18 27 0 19 28 20 - let run ~repo ~branch file = 29 + let run ~repo file = 21 30 let config = Config.load ~repo () in 22 31 Eio_main.run @@ fun env -> 23 32 let fs = Eio.Stdenv.cwd env in 24 - Eio.Switch.run @@ fun sw -> 25 - let file_path = Eio.Path.(fs / file) in 26 - let data = Eio.Path.load file_path in 27 - let is_car = Filename.check_suffix file ".car" in 28 - if is_car then import_car ~config ~fs data file 29 - else begin 30 - ignore (sw, branch); 31 - Common.error 32 - "import: plain file import not yet implemented with new Schema API"; 33 - 1 34 - end 33 + Eio.Switch.run @@ fun _sw -> 34 + if Filename.check_suffix file ".car" then 35 + let file_path = Eio.Path.(fs / file) in 36 + let data = Eio.Path.load file_path in 37 + import_car ~config ~fs data file 38 + else err_unsupported_format file 35 39 36 40 open Cmdliner 37 41 38 42 let import_file = 39 - let doc = "File to import (CAR or plain content)." in 43 + let doc = "CAR file to import." in 40 44 Arg.(required & pos 0 (some string) None & info [] ~docv:"FILE" ~doc) 41 45 42 46 let cmd : unit Cmd.t = 43 - let doc = "Import data from file." in 47 + let doc = "Import blocks from a CAR file." in 44 48 let man = 45 49 [ 46 50 `S Manpage.s_description; 47 - `P "Import data from external files. Format is auto-detected:"; 48 - `I ("$(b,.car)", "CAR file (ATProto blocks)"); 49 - `I ("$(b,other)", "Plain content added at path"); 51 + `P 52 + "Import every block of a CAR (Content-Addressable aRchive) file into \ 53 + the store's filesystem blockstore."; 50 54 `S Manpage.s_examples; 51 55 `Pre " irmin import repo.car"; 52 - `Pre " irmin import data.json"; 53 56 ] 54 57 in 55 58 Cmd.v 56 59 (Cmd.info "import" ~doc ~man) 57 60 Term.( 58 - const (fun () repo branch file -> ignore (run ~repo ~branch file)) 59 - $ Terms.setup $ Terms.repo $ Terms.branch $ import_file) 61 + const (fun () repo file -> ignore (run ~repo file)) 62 + $ Terms.setup $ Terms.repo $ import_file)
+2 -1
bin/main.ml
··· 28 28 Cmd_log.cmd; 29 29 Cmd_branches.cmd; 30 30 Cmd_checkout.cmd; 31 + Cmd_clone.cmd; 32 + Cmd_export.cmd; 31 33 Cmd_import.cmd; 32 - Cmd_export.cmd; 33 34 Cmd_info.cmd; 34 35 Cmd_proof.cmd; 35 36 Cmd_merge.cmd;