···11+open Cmdliner
22+33+let src = Logs.Src.create "unpac.main" ~doc:"Main CLI operations"
44+module Log = (val Logs.src_log src : Logs.LOG)
55+66+(* Logging setup *)
77+let setup_logging ?(verbose=false) () =
88+ Fmt_tty.setup_std_outputs ();
99+ let level = if verbose then Logs.Debug else Logs.Info in
1010+ Logs.set_level (Some level);
1111+ Logs.set_reporter (Logs_fmt.reporter ())
1212+1313+let logging_term =
1414+ Term.(const (setup_logging ~verbose:false) $ const ())
1515+1616+(* Helper to find project root *)
1717+let with_root f =
1818+ Eio_main.run @@ fun env ->
1919+ let fs = Eio.Stdenv.fs env in
2020+ let proc_mgr = Eio.Stdenv.process_mgr env in
2121+ let cwd = Sys.getcwd () in
2222+ match Unpac.Init.find_root ~fs ~cwd with
2323+ | None ->
2424+ Format.eprintf "Error: Not in an unpac project.@.";
2525+ exit 1
2626+ | Some root ->
2727+ f ~env ~fs ~proc_mgr ~root
2828+2929+(* Helper to wrap operations with audit logging *)
3030+let with_audit ~proc_mgr ~root ~operation_type ~args f =
3131+ let main_wt = Unpac.Worktree.path root Unpac.Worktree.Main in
3232+ let mgr = Unpac.Audit.create_manager ~proc_mgr ~main_wt in
3333+ let ctx = Unpac.Audit.begin_operation mgr ~operation_type ~args in
3434+ try
3535+ let result = f ctx in
3636+ ignore (Unpac.Audit.end_success mgr);
3737+ result
3838+ with exn ->
3939+ ignore (Unpac.Audit.end_failed mgr ~error:(Printexc.to_string exn));
4040+ raise exn
4141+4242+(* Helper to get config path *)
4343+let config_path root =
4444+ let main_path = Unpac.Worktree.path root Unpac.Worktree.Main in
4545+ Eio.Path.(main_path / "unpac.toml") |> snd
4646+4747+(* Helper to load config *)
4848+let load_config root =
4949+ let path = config_path root in
5050+ match Unpac.Config.load path with
5151+ | Ok cfg -> cfg
5252+ | Error _ -> Unpac.Config.empty
5353+5454+(* Helper to save config and commit *)
5555+let save_config ~proc_mgr root config msg =
5656+ let path = config_path root in
5757+ Unpac.Config.save_exn path config;
5858+ let main_wt = Unpac.Worktree.path root Unpac.Worktree.Main in
5959+ Unpac.Git.run_exn ~proc_mgr ~cwd:main_wt ["add"; "unpac.toml"] |> ignore;
6060+ Unpac.Git.run_exn ~proc_mgr ~cwd:main_wt ["commit"; "-m"; msg] |> ignore
6161+6262+(* Check if string looks like a URL or path (vs a package name) *)
6363+let is_url_or_path s =
6464+ String.starts_with ~prefix:"http://" s ||
6565+ String.starts_with ~prefix:"https://" s ||
6666+ String.starts_with ~prefix:"git@" s ||
6767+ String.starts_with ~prefix:"git://" s ||
6868+ String.starts_with ~prefix:"ssh://" s ||
6969+ String.starts_with ~prefix:"file://" s ||
7070+ String.starts_with ~prefix:"/" s || (* Absolute path *)
7171+ String.starts_with ~prefix:"./" s || (* Relative path *)
7272+ String.starts_with ~prefix:"../" s || (* Relative path *)
7373+ String.contains s ':' (* URL with scheme *)
7474+7575+(* Normalize a dev-repo URL for grouping comparison *)
7676+let normalize_dev_repo url =
7777+ let s = url in
7878+ (* Strip git+ prefix *)
7979+ let s = if String.starts_with ~prefix:"git+" s then
8080+ String.sub s 4 (String.length s - 4) else s in
8181+ (* Strip trailing .git *)
8282+ let s = if String.ends_with ~suffix:".git" s then
8383+ String.sub s 0 (String.length s - 4) else s in
8484+ (* Strip trailing slash *)
8585+ let s = if String.ends_with ~suffix:"/" s then
8686+ String.sub s 0 (String.length s - 1) else s in
8787+ (* Normalize github URLs: git@github.com:x/y -> https://github.com/x/y *)
8888+ let s = if String.starts_with ~prefix:"git@github.com:" s then
8989+ "https://github.com/" ^ String.sub s 15 (String.length s - 15) else s in
9090+ String.lowercase_ascii s
9191+9292+(* Group solved packages by their dev-repo *)
9393+type package_group = {
9494+ canonical_name : string; (* First package name, used as vendor name *)
9595+ dev_repo : string; (* Original dev-repo URL *)
9696+ packages : string list; (* All package names in this group *)
9797+}
9898+9999+let group_packages_by_dev_repo ~config (pkgs : OpamPackage.t list) : package_group list =
100100+ let repos = config.Unpac.Config.opam.repositories in
101101+ (* Build a map from normalized dev-repo to package info *)
102102+ let groups = Hashtbl.create 16 in
103103+ List.iter (fun pkg ->
104104+ let name = OpamPackage.Name.to_string (OpamPackage.name pkg) in
105105+ let version = OpamPackage.Version.to_string (OpamPackage.version pkg) in
106106+ match Unpac_opam.Repo.find_package ~repos ~name ~version () with
107107+ | None -> () (* Skip packages not found *)
108108+ | Some result ->
109109+ match result.metadata.dev_repo with
110110+ | None -> () (* Skip packages without dev-repo *)
111111+ | Some dev_repo ->
112112+ let key = normalize_dev_repo dev_repo in
113113+ match Hashtbl.find_opt groups key with
114114+ | None ->
115115+ Hashtbl.add groups key (dev_repo, [name])
116116+ | Some (orig_url, names) ->
117117+ Hashtbl.replace groups key (orig_url, name :: names)
118118+ ) pkgs;
119119+ (* Convert to list of groups *)
120120+ Hashtbl.fold (fun _key (dev_repo, names) acc ->
121121+ let names = List.rev names in (* Preserve order *)
122122+ let canonical_name = List.hd names in
123123+ { canonical_name; dev_repo; packages = names } :: acc
124124+ ) groups []
125125+ |> List.sort (fun a b -> String.compare a.canonical_name b.canonical_name)
126126+127127+(* Helper to resolve vendor cache *)
128128+let resolve_cache ~proc_mgr ~fs ~config ~cli_cache =
129129+ match Unpac.Config.resolve_vendor_cache ?cli_override:cli_cache config with
130130+ | None -> None
131131+ | Some path ->
132132+ Some (Unpac.Vendor_cache.init ~proc_mgr ~fs ~path ())
133133+134134+(* Init command *)
135135+let init_cmd =
136136+ let doc = "Initialize a new unpac workspace." in
137137+ let man = [
138138+ `S Manpage.s_description;
139139+ `P "Creates a new unpac workspace with the standard directory structure:";
140140+ `Pre " <path>/
141141+ git/ # Bare git repository (all branches stored here)
142142+ main/ # Main worktree (unpac.toml config lives here)
143143+ vendor/ # Vendor worktrees (created as needed)
144144+ opam/ # Opam package worktrees
145145+ git/ # Git repository worktrees
146146+ project/ # Project worktrees";
147147+ `P "The workspace uses git worktrees to maintain isolated views of \
148148+ vendored dependencies. Each vendored item has three branches:";
149149+ `I ("upstream/*", "Tracks original repository state");
150150+ `I ("vendor/*", "Clean snapshot for merging");
151151+ `I ("patches/*", "Local modifications");
152152+ `S Manpage.s_examples;
153153+ `P "Create a new workspace:";
154154+ `Pre " unpac init my-project
155155+ cd my-project";
156156+ `S "SEE ALSO";
157157+ `P "unpac-project(1), unpac-opam(1), unpac-git(1)";
158158+ ] in
159159+ let path_arg =
160160+ let doc = "Path for the new workspace. Will create the directory if it doesn't exist." in
161161+ Arg.(required & pos 0 (some string) None & info [] ~docv:"PATH" ~doc)
162162+ in
163163+ let run () path =
164164+ Eio_main.run @@ fun env ->
165165+ let fs = Eio.Stdenv.fs env in
166166+ let proc_mgr = Eio.Stdenv.process_mgr env in
167167+ let _root = Unpac.Init.init ~proc_mgr ~fs path in
168168+ Format.printf "Initialized unpac workspace at %s@." path;
169169+ Format.printf "@.Next steps:@.";
170170+ Format.printf " cd %s@." path;
171171+ Format.printf " unpac opam repo add <name> <path> # configure opam repository@.";
172172+ Format.printf " unpac project new <name> # create a project@."
173173+ in
174174+ let info = Cmd.info "init" ~doc ~man in
175175+ Cmd.v info Term.(const run $ logging_term $ path_arg)
176176+177177+(* Project new command *)
178178+let project_new_cmd =
179179+ let doc = "Create a new project branch." in
180180+ let name_arg =
181181+ let doc = "Name of the project." in
182182+ Arg.(required & pos 0 (some string) None & info [] ~docv:"NAME" ~doc)
183183+ in
184184+ let run () name =
185185+ with_root @@ fun ~env:_ ~fs:_ ~proc_mgr ~root ->
186186+ with_audit ~proc_mgr ~root ~operation_type:Unpac.Audit.Project_new ~args:[name] @@ fun _ctx ->
187187+ let _path = Unpac.Init.create_project ~proc_mgr root name in
188188+ Format.printf "Created project %s@." name;
189189+ Format.printf "@.Next steps:@.";
190190+ Format.printf " unpac opam add <package> # vendor a package@.";
191191+ Format.printf " unpac opam merge <package> %s # merge package into project@." name
192192+ in
193193+ let info = Cmd.info "new" ~doc in
194194+ Cmd.v info Term.(const run $ logging_term $ name_arg)
195195+196196+(* Project list command *)
197197+let project_list_cmd =
198198+ let doc = "List projects." in
199199+ let run () =
200200+ with_root @@ fun ~env:_ ~fs:_ ~proc_mgr ~root ->
201201+ let projects = Unpac.Worktree.list_projects ~proc_mgr root in
202202+ List.iter (Format.printf "%s@.") projects
203203+ in
204204+ let info = Cmd.info "list" ~doc in
205205+ Cmd.v info Term.(const run $ logging_term)
206206+207207+(* Project promote command *)
208208+let project_promote_cmd =
209209+ let doc = "Promote a local project to a vendored library." in
210210+ let man = [
211211+ `S Manpage.s_description;
212212+ `P "Converts a locally-developed project into the vendor branch structure \
213213+ used by unpac for dependency management. This allows the project to be:";
214214+ `I ("•", "Merged into other projects as a dependency");
215215+ `I ("•", "Pushed to an independent git repository");
216216+ `I ("•", "Updated independently of the workspace");
217217+ `S "FILTERING";
218218+ `P "The promotion process filters the project history to remove \
219219+ vendored dependencies (the vendor/ directory), producing a clean \
220220+ library that can be independently distributed.";
221221+ `P "Specifically, it:";
222222+ `I ("1.", "Extracts project/<name> branch history");
223223+ `I ("2.", "Filters out vendor/ directory (all backends' vendored code)");
224224+ `I ("3.", "Creates clean <backend>/upstream/<name> with filtered history");
225225+ `I ("4.", "Creates <backend>/vendor/<name> with path prefix applied");
226226+ `I ("5.", "Creates <backend>/patches/<name> for local modifications");
227227+ `P "The original project/<name> branch is preserved unchanged.";
228228+ `S "BACKENDS";
229229+ `P "The --backend flag determines how the library is structured:";
230230+ `I ("opam", "Use for OCaml libraries built with dune. \
231231+ Creates vendor/opam/<name>/ structure. \
232232+ Merge with: unpac opam merge <name> <project>");
233233+ `I ("git", "Use for reference code, C libraries, or non-OCaml sources. \
234234+ Creates vendor/git/<name>/ structure. \
235235+ Merge with: unpac git merge <name> <project>");
236236+ `S Manpage.s_examples;
237237+ `P "Promote a completed OCaml library:";
238238+ `Pre " unpac project promote brotli --backend opam";
239239+ `P "Promote with a different vendor name:";
240240+ `Pre " unpac project promote mybrotli --backend opam --name brotli";
241241+ `P "Promote a reference implementation:";
242242+ `Pre " unpac project promote zstd-reference --backend git";
243243+ `P "Full workflow from development to distribution:";
244244+ `Pre " unpac project new mybrotli
245245+ # ... develop the library ...
246246+ unpac project promote mybrotli --backend opam
247247+ unpac project set-remote mybrotli git@github.com:me/mybrotli.git
248248+ unpac opam merge mybrotli other-project";
249249+ `S "SEE ALSO";
250250+ `P "unpac-project-new(1), unpac-opam-merge(1), unpac-git-merge(1)";
251251+ ] in
252252+ let name_arg =
253253+ let doc = "Name of the project to promote." in
254254+ Arg.(required & pos 0 (some string) None & info [] ~docv:"PROJECT" ~doc)
255255+ in
256256+ let backend_arg =
257257+ let doc = "Vendor backend type: opam or git. \
258258+ Determines branch structure and merge semantics." in
259259+ Arg.(required & opt (some string) None & info ["backend"; "b"] ~docv:"BACKEND" ~doc)
260260+ in
261261+ let vendor_name_arg =
262262+ let doc = "Override the vendor library name (defaults to project name)." in
263263+ Arg.(value & opt (some string) None & info ["name"; "n"] ~docv:"NAME" ~doc)
264264+ in
265265+ let run () project backend_str vendor_name =
266266+ with_root @@ fun ~env:_ ~fs:_ ~proc_mgr ~root ->
267267+ (* Parse backend *)
268268+ let backend = match Unpac.Promote.backend_of_string backend_str with
269269+ | Some b -> b
270270+ | None ->
271271+ Format.eprintf "Error: Unknown backend '%s'. Use 'opam' or 'git'.@." backend_str;
272272+ exit 1
273273+ in
274274+ with_audit ~proc_mgr ~root ~operation_type:Unpac.Audit.Project_promote
275275+ ~args:(
276276+ [project; "--backend"; backend_str] @
277277+ (match vendor_name with Some n -> ["--name"; n] | None -> [])
278278+ ) @@ fun _ctx ->
279279+ match Unpac.Promote.promote ~proc_mgr ~root ~project ~backend ~vendor_name with
280280+ | Unpac.Promote.Promoted { name; backend; original_commits; filtered_commits } ->
281281+ Format.printf "Promoted %s as %s vendor@." project (Unpac.Promote.backend_to_string backend);
282282+ Format.printf "@.Filtered history: %d → %d commits (removed vendor/ directory)@."
283283+ original_commits filtered_commits;
284284+ Format.printf "@.Created branches:@.";
285285+ Format.printf " %s@." (Unpac.Promote.upstream_branch backend name);
286286+ Format.printf " %s@." (Unpac.Promote.vendor_branch backend name);
287287+ Format.printf " %s@." (Unpac.Promote.patches_branch backend name);
288288+ Format.printf "@.%s can now be merged into other projects:@." name;
289289+ (match backend with
290290+ | Unpac.Promote.Opam ->
291291+ Format.printf " unpac opam merge %s <project>@." name
292292+ | Unpac.Promote.Git ->
293293+ Format.printf " unpac git merge %s <project>@." name)
294294+ | Unpac.Promote.Already_promoted name ->
295295+ Format.eprintf "Error: %s is already promoted.@." name;
296296+ exit 1
297297+ | Unpac.Promote.Project_not_found name ->
298298+ Format.eprintf "Error: Project '%s' not found.@." name;
299299+ exit 1
300300+ | Unpac.Promote.Failed { name; error } ->
301301+ Format.eprintf "Error promoting %s: %s@." name error;
302302+ exit 1
303303+ in
304304+ let info = Cmd.info "promote" ~doc ~man in
305305+ Cmd.v info Term.(const run $ logging_term $ name_arg $ backend_arg $ vendor_name_arg)
306306+307307+(* Project set-remote command *)
308308+let project_set_remote_cmd =
309309+ let doc = "Set the remote URL for a project." in
310310+ let man = [
311311+ `S Manpage.s_description;
312312+ `P "Configures a git remote for pushing a project to an independent repository. \
313313+ This allows projects developed in the workspace to be published separately.";
314314+ `P "The remote is named 'origin-<project>' and is stored in the bare git \
315315+ repository. Use 'unpac project push' to push to this remote.";
316316+ `S Manpage.s_examples;
317317+ `P "Set remote for a project:";
318318+ `Pre " unpac project set-remote brotli git@github.com:user/ocaml-brotli.git";
319319+ `P "Full workflow:";
320320+ `Pre " unpac project new mylib
321321+ # ... develop the library ...
322322+ unpac project promote mylib --backend opam
323323+ unpac project set-remote mylib git@github.com:me/mylib.git
324324+ unpac project push mylib";
325325+ `S "SEE ALSO";
326326+ `P "unpac-project-push(1), unpac-project-promote(1)";
327327+ ] in
328328+ let name_arg =
329329+ let doc = "Name of the project." in
330330+ Arg.(required & pos 0 (some string) None & info [] ~docv:"PROJECT" ~doc)
331331+ in
332332+ let url_arg =
333333+ let doc = "Remote URL (git SSH or HTTPS URL)." in
334334+ Arg.(required & pos 1 (some string) None & info [] ~docv:"URL" ~doc)
335335+ in
336336+ let run () project url =
337337+ with_root @@ fun ~env:_ ~fs:_ ~proc_mgr ~root ->
338338+ with_audit ~proc_mgr ~root ~operation_type:Unpac.Audit.Project_set_remote
339339+ ~args:[project; url] @@ fun _ctx ->
340340+ match Unpac.Promote.set_remote ~proc_mgr ~root ~project ~url with
341341+ | Unpac.Promote.Remote_set { project; url; created } ->
342342+ if created then
343343+ Format.printf "Created remote for %s: %s@." project url
344344+ else
345345+ Format.printf "Updated remote for %s: %s@." project url;
346346+ Format.printf "@.Push with: unpac project push %s@." project
347347+ | Unpac.Promote.Project_not_found name ->
348348+ Format.eprintf "Error: Project '%s' not found.@." name;
349349+ exit 1
350350+ | Unpac.Promote.Set_remote_failed { project; error } ->
351351+ Format.eprintf "Error setting remote for %s: %s@." project error;
352352+ exit 1
353353+ in
354354+ let info = Cmd.info "set-remote" ~doc ~man in
355355+ Cmd.v info Term.(const run $ logging_term $ name_arg $ url_arg)
356356+357357+(* Project push command *)
358358+let project_push_cmd =
359359+ let doc = "Push a project to its configured remote." in
360360+ let man = [
361361+ `S Manpage.s_description;
362362+ `P "Pushes a project branch to the remote configured via 'set-remote'. \
363363+ This allows publishing projects developed in the workspace to \
364364+ independent repositories.";
365365+ `S Manpage.s_examples;
366366+ `P "Push a project:";
367367+ `Pre " unpac project push brotli";
368368+ `S "SEE ALSO";
369369+ `P "unpac-project-set-remote(1)";
370370+ ] in
371371+ let name_arg =
372372+ let doc = "Name of the project to push." in
373373+ Arg.(required & pos 0 (some string) None & info [] ~docv:"PROJECT" ~doc)
374374+ in
375375+ let run () project =
376376+ with_root @@ fun ~env:_ ~fs:_ ~proc_mgr ~root ->
377377+ match Unpac.Promote.push ~proc_mgr ~root ~project with
378378+ | Unpac.Promote.Pushed { project; branch; remote } ->
379379+ Format.printf "Pushed %s (%s) to %s@." project branch remote
380380+ | Unpac.Promote.No_remote project ->
381381+ Format.eprintf "Error: No remote configured for %s.@." project;
382382+ Format.eprintf "Set one with: unpac project set-remote %s <url>@." project;
383383+ exit 1
384384+ | Unpac.Promote.Project_not_found name ->
385385+ Format.eprintf "Error: Project '%s' not found.@." name;
386386+ exit 1
387387+ | Unpac.Promote.Push_failed { project; error } ->
388388+ Format.eprintf "Error pushing %s: %s@." project error;
389389+ exit 1
390390+ in
391391+ let info = Cmd.info "push" ~doc ~man in
392392+ Cmd.v info Term.(const run $ logging_term $ name_arg)
393393+394394+(* Project info command *)
395395+let project_info_cmd =
396396+ let doc = "Show detailed information about a project." in
397397+ let man = [
398398+ `S Manpage.s_description;
399399+ `P "Displays information about a project including:";
400400+ `I ("Origin", "Whether the project was created locally or vendored");
401401+ `I ("Remote", "Configured push URL (if any)");
402402+ `I ("Promoted", "Whether promoted to vendor library and which backend");
403403+ `S Manpage.s_examples;
404404+ `Pre " unpac project info brotli";
405405+ ] in
406406+ let name_arg =
407407+ let doc = "Name of the project." in
408408+ Arg.(required & pos 0 (some string) None & info [] ~docv:"PROJECT" ~doc)
409409+ in
410410+ let run () project =
411411+ with_root @@ fun ~env:_ ~fs:_ ~proc_mgr ~root ->
412412+ match Unpac.Promote.get_info ~proc_mgr ~root ~project with
413413+ | None ->
414414+ Format.eprintf "Error: Project '%s' not found.@." project;
415415+ exit 1
416416+ | Some info ->
417417+ Format.printf "Project: %s@." info.name;
418418+ Format.printf "Origin: %s@."
419419+ (match info.origin with `Local -> "local" | `Vendored -> "vendored");
420420+ Format.printf "Remote: %s@."
421421+ (match info.remote with Some url -> url | None -> "(none)");
422422+ Format.printf "Promoted: %s@."
423423+ (match info.promoted_as with
424424+ | Some (backend, name) ->
425425+ Printf.sprintf "%s vendor (%s)" (Unpac.Promote.backend_to_string backend) name
426426+ | None -> "no")
427427+ in
428428+ let info = Cmd.info "info" ~doc ~man in
429429+ Cmd.v info Term.(const run $ logging_term $ name_arg)
430430+431431+(* Export command - unvendor a package for upstream push *)
432432+let export_cmd =
433433+ let doc = "Export a vendored package for pushing to upstream." in
434434+ let man = [
435435+ `S Manpage.s_description;
436436+ `P "Creates an export branch from a vendored package with files moved \
437437+ from vendor/<backend>/<name>/ back to the repository root. This is \
438438+ the inverse of vendoring, producing a branch suitable for pushing \
439439+ to an upstream git repository.";
440440+ `P "Use --from-patches to include local modifications in the export. \
441441+ Without this flag, exports from the vendor/* branch (pristine upstream).";
442442+ `S "WORKFLOW";
443443+ `P "The typical export workflow is:";
444444+ `Pre " # Export with local patches
445445+ unpac export brotli --backend opam --from-patches
446446+447447+ # Set upstream remote
448448+ unpac export-set-remote brotli git@github.com:me/brotli.git
449449+450450+ # Push to upstream
451451+ unpac export-push brotli --backend opam";
452452+ `S Manpage.s_examples;
453453+ `P "Export an opam package (pristine upstream):";
454454+ `Pre " unpac export brotli --backend opam";
455455+ `P "Export with local patches included:";
456456+ `Pre " unpac export brotli --backend opam --from-patches";
457457+ `P "Export a git-vendored package:";
458458+ `Pre " unpac export zstd --backend git";
459459+ ] in
460460+ let name_arg =
461461+ let doc = "Name of the vendored package to export." in
462462+ Arg.(required & pos 0 (some string) None & info [] ~docv:"NAME" ~doc)
463463+ in
464464+ let backend_arg =
465465+ let doc = "Vendor backend type: opam or git." in
466466+ Arg.(required & opt (some string) None & info ["backend"; "b"] ~docv:"BACKEND" ~doc)
467467+ in
468468+ let from_patches_arg =
469469+ let doc = "Export from patches/* branch (includes local modifications) \
470470+ instead of vendor/* branch (pristine upstream)." in
471471+ Arg.(value & flag & info ["from-patches"; "p"] ~doc)
472472+ in
473473+ let run () name backend_str from_patches =
474474+ with_root @@ fun ~env:_ ~fs:_ ~proc_mgr ~root ->
475475+ let backend = match Unpac.Promote.backend_of_string backend_str with
476476+ | Some b -> b
477477+ | None ->
478478+ Format.eprintf "Error: Unknown backend '%s'. Use 'opam' or 'git'.@." backend_str;
479479+ exit 1
480480+ in
481481+ match Unpac.Promote.export ~proc_mgr ~root ~name ~backend ~from_patches with
482482+ | Unpac.Promote.Exported { name; backend; source_branch; export_branch; commits } ->
483483+ Format.printf "Exported %s (%s backend)@." name (Unpac.Promote.backend_to_string backend);
484484+ Format.printf " Source: %s@." source_branch;
485485+ Format.printf " Export: %s (%d commits)@." export_branch commits;
486486+ Format.printf "@.Files moved from vendor/%s/%s/ to repository root.@."
487487+ (Unpac.Promote.backend_to_string backend) name;
488488+ Format.printf "@.Next steps:@.";
489489+ Format.printf " unpac export-set-remote %s <url>@." name;
490490+ Format.printf " unpac export-push %s --backend %s@." name backend_str
491491+ | Unpac.Promote.Not_vendored name ->
492492+ Format.eprintf "Error: No vendor branch found for '%s'.@." name;
493493+ Format.eprintf "Check available packages with: unpac opam list / unpac git list@.";
494494+ exit 1
495495+ | Unpac.Promote.Already_exported name ->
496496+ Format.eprintf "Error: Export branch already exists for '%s'.@." name;
497497+ Format.eprintf "Delete it first with: git branch -D %s/export/%s@."
498498+ backend_str name;
499499+ exit 1
500500+ | Unpac.Promote.Export_failed { name; error } ->
501501+ Format.eprintf "Error exporting %s: %s@." name error;
502502+ exit 1
503503+ in
504504+ let info = Cmd.info "export" ~doc ~man in
505505+ Cmd.v info Term.(const run $ logging_term $ name_arg $ backend_arg $ from_patches_arg)
506506+507507+(* Export set-remote command *)
508508+let export_set_remote_cmd =
509509+ let doc = "Set the remote URL for pushing exports." in
510510+ let man = [
511511+ `S Manpage.s_description;
512512+ `P "Configures a git remote for pushing exported packages to an upstream \
513513+ repository. The remote is named 'export-<name>'.";
514514+ `S Manpage.s_examples;
515515+ `Pre " unpac export-set-remote brotli git@github.com:me/brotli.git";
516516+ ] in
517517+ let name_arg =
518518+ let doc = "Name of the package." in
519519+ Arg.(required & pos 0 (some string) None & info [] ~docv:"NAME" ~doc)
520520+ in
521521+ let url_arg =
522522+ let doc = "Remote URL (git SSH or HTTPS URL)." in
523523+ Arg.(required & pos 1 (some string) None & info [] ~docv:"URL" ~doc)
524524+ in
525525+ let run () name url =
526526+ with_root @@ fun ~env:_ ~fs:_ ~proc_mgr ~root ->
527527+ match Unpac.Promote.set_export_remote ~proc_mgr ~root ~name ~url with
528528+ | `Created ->
529529+ Format.printf "Created export remote for %s: %s@." name url;
530530+ Format.printf "@.Push with: unpac export-push %s --backend <backend>@." name
531531+ | `Updated ->
532532+ Format.printf "Updated export remote for %s: %s@." name url
533533+ | `Existed ->
534534+ Format.printf "Export remote already set for %s: %s@." name url
535535+ in
536536+ let info = Cmd.info "export-set-remote" ~doc ~man in
537537+ Cmd.v info Term.(const run $ logging_term $ name_arg $ url_arg)
538538+539539+(* Export push command *)
540540+let export_push_cmd =
541541+ let doc = "Push an export branch to its configured remote." in
542542+ let man = [
543543+ `S Manpage.s_description;
544544+ `P "Pushes an export branch to the remote configured via 'export-set-remote'. \
545545+ The export branch is pushed as 'main' on the remote repository.";
546546+ `S Manpage.s_examples;
547547+ `Pre " unpac export-push brotli --backend opam";
548548+ ] in
549549+ let name_arg =
550550+ let doc = "Name of the package to push." in
551551+ Arg.(required & pos 0 (some string) None & info [] ~docv:"NAME" ~doc)
552552+ in
553553+ let backend_arg =
554554+ let doc = "Vendor backend type: opam or git." in
555555+ Arg.(required & opt (some string) None & info ["backend"; "b"] ~docv:"BACKEND" ~doc)
556556+ in
557557+ let run () name backend_str =
558558+ with_root @@ fun ~env:_ ~fs:_ ~proc_mgr ~root ->
559559+ let backend = match Unpac.Promote.backend_of_string backend_str with
560560+ | Some b -> b
561561+ | None ->
562562+ Format.eprintf "Error: Unknown backend '%s'. Use 'opam' or 'git'.@." backend_str;
563563+ exit 1
564564+ in
565565+ match Unpac.Promote.push_export ~proc_mgr ~root ~name ~backend with
566566+ | Unpac.Promote.Export_pushed { name = _; backend; remote; branch; commits } ->
567567+ Format.printf "Pushed %s (%d commits) to %s@." branch commits remote;
568568+ Format.printf "Backend: %s@." (Unpac.Promote.backend_to_string backend);
569569+ Format.printf "@.Export pushed as 'main' on remote.@."
570570+ | Unpac.Promote.Export_not_found name ->
571571+ Format.eprintf "Error: No export branch found for '%s'.@." name;
572572+ Format.eprintf "Export first with: unpac export %s --backend %s@." name backend_str;
573573+ exit 1
574574+ | Unpac.Promote.No_export_remote name ->
575575+ Format.eprintf "Error: No export remote configured for '%s'.@." name;
576576+ Format.eprintf "Set one with: unpac export-set-remote %s <url>@." name;
577577+ exit 1
578578+ | Unpac.Promote.Export_push_failed { name; error } ->
579579+ Format.eprintf "Error pushing export %s: %s@." name error;
580580+ exit 1
581581+ in
582582+ let info = Cmd.info "export-push" ~doc ~man in
583583+ Cmd.v info Term.(const run $ logging_term $ name_arg $ backend_arg)
584584+585585+(* Export list command *)
586586+let export_list_cmd =
587587+ let doc = "List all exported packages." in
588588+ let run () =
589589+ with_root @@ fun ~env:_ ~fs:_ ~proc_mgr ~root ->
590590+ let exports = Unpac.Promote.list_exports ~proc_mgr ~root in
591591+ if exports = [] then
592592+ Format.printf "No exported packages.@."
593593+ else begin
594594+ Format.printf "Exported packages:@.";
595595+ List.iter (fun (backend, name) ->
596596+ let remote = Unpac.Promote.get_export_remote ~proc_mgr ~root ~name in
597597+ Format.printf " %s (%s)%s@." name
598598+ (Unpac.Promote.backend_to_string backend)
599599+ (match remote with Some url -> " → " ^ url | None -> "")
600600+ ) exports
601601+ end
602602+ in
603603+ let info = Cmd.info "export-list" ~doc in
604604+ Cmd.v info Term.(const run $ logging_term)
605605+606606+(* Project command group *)
607607+let project_cmd =
608608+ let doc = "Project management commands." in
609609+ let man = [
610610+ `S Manpage.s_description;
611611+ `P "Projects are isolated branches where you merge vendored dependencies \
612612+ and build your application. Each project is a git worktree at \
613613+ project/<name>/ with its own branch project/<name>.";
614614+ `P "Workflow:";
615615+ `Pre " 1. Create a project: unpac project new myapp
616616+ 2. Vendor dependencies: unpac opam add foo
617617+ 3. Merge into project: unpac opam merge foo myapp
618618+ 4. Build in project: cd project/myapp && dune build";
619619+ `P "Multiple projects can share the same vendored dependencies - \
620620+ each project merges the patches branch independently.";
621621+ `S "PROMOTING PROJECTS";
622622+ `P "Once a project is complete, you can promote it to a vendored library:";
623623+ `Pre " unpac project promote mylib --backend opam";
624624+ `P "This creates clean vendor branches (filtering out vendored deps) so \
625625+ the library can be merged into other projects.";
626626+ `S "PUBLISHING PROJECTS";
627627+ `P "Projects can be pushed to independent repositories:";
628628+ `Pre " unpac project set-remote mylib git@github.com:me/mylib.git
629629+ unpac project push mylib";
630630+ `S "EXPORTING AS STANDALONE LIBRARY";
631631+ `P "To publish a promoted project as a standalone opam library:";
632632+ `Pre " # 1. Promote project to opam vendor
633633+ unpac project promote mylib --backend opam
634634+635635+ # 2. Export with your patches (files moved to root)
636636+ unpac export mylib --backend opam --from-patches
637637+638638+ # 3. Configure remotes and push
639639+ unpac export-set-remote mylib git@github.com:me/mylib.git
640640+ unpac export-push mylib --backend opam
641641+642642+ # 4. Configure upstream for pulling updates
643643+ unpac opam set-upstream mylib git@github.com:me/mylib.git";
644644+ `P "The export branch has files at repository root (not in vendor/), \
645645+ suitable for a standalone git repository. The upstream remote \
646646+ allows 'unpac opam update' to fetch changes back.";
647647+ ] in
648648+ let info = Cmd.info "project" ~doc ~man in
649649+ Cmd.group info [
650650+ project_new_cmd;
651651+ project_list_cmd;
652652+ project_info_cmd;
653653+ project_promote_cmd;
654654+ project_set_remote_cmd;
655655+ project_push_cmd;
656656+ ]
657657+658658+(* Opam repo add command *)
659659+let opam_repo_add_cmd =
660660+ let doc = "Add an opam repository for package lookups." in
661661+ let name_arg =
662662+ let doc = "Name for the repository." in
663663+ Arg.(required & pos 0 (some string) None & info [] ~docv:"NAME" ~doc)
664664+ in
665665+ let path_arg =
666666+ let doc = "Path to the repository (local directory)." in
667667+ Arg.(required & pos 1 (some string) None & info [] ~docv:"PATH" ~doc)
668668+ in
669669+ let run () name path =
670670+ with_root @@ fun ~env:_ ~fs:_ ~proc_mgr ~root ->
671671+ let config = load_config root in
672672+ (* Check if already exists *)
673673+ if Unpac.Config.find_repo config name <> None then begin
674674+ Format.eprintf "Repository '%s' already exists@." name;
675675+ exit 1
676676+ end;
677677+ (* Resolve to absolute path *)
678678+ let abs_path =
679679+ if Filename.is_relative path then
680680+ Filename.concat (Sys.getcwd ()) path
681681+ else path
682682+ in
683683+ (* Check path exists *)
684684+ if not (Sys.file_exists abs_path && Sys.is_directory abs_path) then begin
685685+ Format.eprintf "Error: '%s' is not a valid directory@." abs_path;
686686+ exit 1
687687+ end;
688688+ let repo : Unpac.Config.repo_config = {
689689+ repo_name = name;
690690+ source = Local abs_path;
691691+ } in
692692+ let config' = Unpac.Config.add_repo config repo in
693693+ save_config ~proc_mgr root config' (Printf.sprintf "Add repository %s" name);
694694+ Format.printf "Added repository %s at %s@." name abs_path;
695695+ Format.printf "@.Next: unpac opam add <package> # vendor a package by name@."
696696+ in
697697+ let info = Cmd.info "add" ~doc in
698698+ Cmd.v info Term.(const run $ logging_term $ name_arg $ path_arg)
699699+700700+(* Opam repo list command *)
701701+let opam_repo_list_cmd =
702702+ let doc = "List configured opam repositories." in
703703+ let run () =
704704+ with_root @@ fun ~env:_ ~fs:_ ~proc_mgr:_ ~root ->
705705+ let config = load_config root in
706706+ if config.opam.repositories = [] then begin
707707+ Format.printf "No repositories configured@.";
708708+ Format.printf "@.Hint: unpac opam repo add <name> <path>@."
709709+ end else
710710+ List.iter (fun (r : Unpac.Config.repo_config) ->
711711+ let path = match r.source with
712712+ | Local p -> p
713713+ | Remote u -> u
714714+ in
715715+ Format.printf "%s: %s@." r.repo_name path
716716+ ) config.opam.repositories
717717+ in
718718+ let info = Cmd.info "list" ~doc in
719719+ Cmd.v info Term.(const run $ logging_term)
720720+721721+(* Opam repo remove command *)
722722+let opam_repo_remove_cmd =
723723+ let doc = "Remove an opam repository." in
724724+ let name_arg =
725725+ let doc = "Name of the repository to remove." in
726726+ Arg.(required & pos 0 (some string) None & info [] ~docv:"NAME" ~doc)
727727+ in
728728+ let run () name =
729729+ with_root @@ fun ~env:_ ~fs:_ ~proc_mgr ~root ->
730730+ let config = load_config root in
731731+ if Unpac.Config.find_repo config name = None then begin
732732+ Format.eprintf "Repository '%s' not found@." name;
733733+ exit 1
734734+ end;
735735+ let config' = Unpac.Config.remove_repo config name in
736736+ save_config ~proc_mgr root config' (Printf.sprintf "Remove repository %s" name);
737737+ Format.printf "Removed repository %s@." name
738738+ in
739739+ let info = Cmd.info "remove" ~doc in
740740+ Cmd.v info Term.(const run $ logging_term $ name_arg)
741741+742742+(* Opam repo command group *)
743743+let opam_repo_cmd =
744744+ let doc = "Manage opam repositories." in
745745+ let info = Cmd.info "repo" ~doc in
746746+ Cmd.group info [opam_repo_add_cmd; opam_repo_list_cmd; opam_repo_remove_cmd]
747747+748748+(* Opam config compiler command *)
749749+let opam_config_compiler_cmd =
750750+ let doc = "Set or show the OCaml compiler version for dependency solving." in
751751+ let version_arg =
752752+ let doc = "OCaml version to use (e.g., 5.2.0)." in
753753+ Arg.(value & pos 0 (some string) None & info [] ~docv:"VERSION" ~doc)
754754+ in
755755+ let run () version_opt =
756756+ with_root @@ fun ~env:_ ~fs:_ ~proc_mgr ~root ->
757757+ let config = load_config root in
758758+ match version_opt with
759759+ | None ->
760760+ (* Show current compiler *)
761761+ (match Unpac.Config.get_compiler config with
762762+ | Some v -> Format.printf "Compiler: %s@." v
763763+ | None -> Format.printf "No compiler configured@.@.Hint: unpac opam config compiler 5.2.0@.")
764764+ | Some version ->
765765+ (* Set compiler *)
766766+ let config' = Unpac.Config.set_compiler config version in
767767+ save_config ~proc_mgr root config' (Printf.sprintf "Set compiler to %s" version);
768768+ Format.printf "Compiler set to %s@." version
769769+ in
770770+ let info = Cmd.info "compiler" ~doc in
771771+ Cmd.v info Term.(const run $ logging_term $ version_arg)
772772+773773+(* Opam config command group *)
774774+let opam_config_cmd =
775775+ let doc = "Configure opam settings." in
776776+ let info = Cmd.info "config" ~doc in
777777+ Cmd.group info [opam_config_compiler_cmd]
778778+779779+(* Opam add command - enhanced to support package names and dependency solving *)
780780+let opam_add_cmd =
781781+ let doc = "Vendor an opam package (by name or git URL)." in
782782+ let pkg_arg =
783783+ let doc = "Package name or git URL." in
784784+ Arg.(required & pos 0 (some string) None & info [] ~docv:"PACKAGE" ~doc)
785785+ in
786786+ let name_arg =
787787+ let doc = "Override package name." in
788788+ Arg.(value & opt (some string) None & info ["n"; "name"] ~docv:"NAME" ~doc)
789789+ in
790790+ let version_arg =
791791+ let doc = "Package version (when adding by name)." in
792792+ Arg.(value & opt (some string) None & info ["V"; "pkg-version"] ~docv:"VERSION" ~doc)
793793+ in
794794+ let branch_arg =
795795+ let doc = "Git branch to vendor (defaults to remote default)." in
796796+ Arg.(value & opt (some string) None & info ["b"; "branch"] ~docv:"BRANCH" ~doc)
797797+ in
798798+ let solve_arg =
799799+ let doc = "Solve dependencies and vendor all required packages." in
800800+ Arg.(value & flag & info ["solve"] ~doc)
801801+ in
802802+ let cache_arg =
803803+ let doc = "Path to vendor cache (overrides config and UNPAC_VENDOR_CACHE env var)." in
804804+ Arg.(value & opt (some string) None & info ["cache"] ~docv:"PATH" ~doc)
805805+ in
806806+ let run () pkg name_opt version_opt branch_opt solve cli_cache =
807807+ with_root @@ fun ~env:_ ~fs ~proc_mgr ~root ->
808808+ let config = load_config root in
809809+ let cache = resolve_cache ~proc_mgr ~fs ~config ~cli_cache in
810810+811811+ (* Wrap entire operation with audit logging *)
812812+ let args = [pkg] @ (match name_opt with Some n -> ["--name"; n] | None -> [])
813813+ @ (if solve then ["--solve"] else []) in
814814+ with_audit ~proc_mgr ~root ~operation_type:Unpac.Audit.Opam_add ~args @@ fun _ctx ->
815815+816816+ if solve then begin
817817+ (* Solve dependencies and add all packages *)
818818+ let repos = config.opam.repositories in
819819+ if repos = [] then begin
820820+ Format.eprintf "No repositories configured. Add one with: unpac opam repo add <name> <path>@.";
821821+ exit 1
822822+ end;
823823+ let ocaml_version = match Unpac.Config.get_compiler config with
824824+ | Some v -> v
825825+ | None ->
826826+ Format.eprintf "No compiler version configured.@.";
827827+ Format.eprintf "Set one with: unpac opam config compiler 5.2.0@.";
828828+ exit 1
829829+ in
830830+ (* Get repo paths *)
831831+ let repo_paths = List.map (fun (r : Unpac.Config.repo_config) ->
832832+ match r.source with
833833+ | Unpac.Config.Local p -> p
834834+ | Unpac.Config.Remote u -> u (* TODO: handle remote repos *)
835835+ ) repos in
836836+ Format.printf "Solving dependencies for %s...@." pkg;
837837+ match Unpac_opam.Solver.solve ~repos:repo_paths ~ocaml_version ~packages:[pkg] with
838838+ | Error msg ->
839839+ Format.eprintf "Dependency solving failed:@.%s@." msg;
840840+ exit 1
841841+ | Ok result ->
842842+ let pkgs = result.packages in
843843+ Format.printf "Solution found: %d packages@." (List.length pkgs);
844844+ List.iter (fun p ->
845845+ Format.printf " %s.%s@."
846846+ (OpamPackage.Name.to_string (OpamPackage.name p))
847847+ (OpamPackage.Version.to_string (OpamPackage.version p))
848848+ ) pkgs;
849849+850850+ (* Group packages by dev-repo to avoid duplicating sources *)
851851+ let groups = group_packages_by_dev_repo ~config pkgs in
852852+ Format.printf "@.Grouped into %d unique repositories:@." (List.length groups);
853853+ List.iter (fun (g : package_group) ->
854854+ if List.length g.packages > 1 then
855855+ Format.printf " %s (%d packages: %s)@."
856856+ g.canonical_name
857857+ (List.length g.packages)
858858+ (String.concat ", " g.packages)
859859+ else
860860+ Format.printf " %s@." g.canonical_name
861861+ ) groups;
862862+863863+ Format.printf "@.Vendoring repositories...@.";
864864+ let added = ref 0 in
865865+ let failed = ref 0 in
866866+ let config = ref config in
867867+ List.iter (fun (g : package_group) ->
868868+ (* Use canonical name as vendor name, dev-repo as URL *)
869869+ let url = if String.starts_with ~prefix:"git+" g.dev_repo then
870870+ String.sub g.dev_repo 4 (String.length g.dev_repo - 4)
871871+ else g.dev_repo in
872872+ let info : Unpac.Backend.package_info = {
873873+ name = g.canonical_name;
874874+ url;
875875+ branch = None;
876876+ } in
877877+ match Unpac_opam.Opam.add_package ~proc_mgr ~root ?cache info with
878878+ | Unpac.Backend.Added { name = pkg_name; sha } ->
879879+ (* Record in config for remote recreation *)
880880+ let vendored : Unpac.Config.vendored_package = {
881881+ pkg_name; pkg_url = url; pkg_branch = None
882882+ } in
883883+ config := Unpac.Config.add_vendored_package !config vendored;
884884+ Format.printf "Added %s (%s)@." pkg_name (String.sub sha 0 7);
885885+ if List.length g.packages > 1 then
886886+ Format.printf " Contains: %s@." (String.concat ", " g.packages);
887887+ incr added
888888+ | Unpac.Backend.Already_exists pkg_name ->
889889+ Format.printf "Package %s already vendored@." pkg_name
890890+ | Unpac.Backend.Failed { name = pkg_name; error } ->
891891+ Format.eprintf "Error adding %s: %s@." pkg_name error;
892892+ incr failed
893893+ ) groups;
894894+ (* Save config with all vendored packages *)
895895+ if !added > 0 then
896896+ save_config ~proc_mgr root !config "Record vendored packages in config";
897897+ Format.printf "@.Done: %d repositories added, %d failed@." !added !failed;
898898+ if !failed > 0 then exit 1
899899+ end else begin
900900+ (* Single package mode *)
901901+ let url, name =
902902+ if is_url_or_path pkg then begin
903903+ (* It's a URL *)
904904+ let n = match name_opt with
905905+ | Some n -> n
906906+ | None ->
907907+ let base = Filename.basename pkg in
908908+ if String.ends_with ~suffix:".git" base then
909909+ String.sub base 0 (String.length base - 4)
910910+ else base
911911+ in
912912+ (pkg, n)
913913+ end else begin
914914+ (* It's a package name - look up in repositories *)
915915+ let repos = config.opam.repositories in
916916+ if repos = [] then begin
917917+ Format.eprintf "No repositories configured. Add one with: unpac opam repo add <name> <path>@.";
918918+ exit 1
919919+ end;
920920+ match Unpac_opam.Repo.find_package ~repos ~name:pkg ?version:version_opt () with
921921+ | None ->
922922+ Format.eprintf "Package '%s' not found in configured repositories@." pkg;
923923+ exit 1
924924+ | Some result ->
925925+ match result.metadata.dev_repo with
926926+ | None ->
927927+ Format.eprintf "Package '%s' has no dev-repo field@." pkg;
928928+ exit 1
929929+ | Some dev_repo ->
930930+ (* Strip git+ prefix if present (opam dev-repo format) *)
931931+ let url = if String.starts_with ~prefix:"git+" dev_repo then
932932+ String.sub dev_repo 4 (String.length dev_repo - 4)
933933+ else dev_repo in
934934+ let n = match name_opt with Some n -> n | None -> pkg in
935935+ (url, n)
936936+ end
937937+ in
938938+939939+ let info : Unpac.Backend.package_info = {
940940+ name;
941941+ url;
942942+ branch = branch_opt;
943943+ } in
944944+ match Unpac_opam.Opam.add_package ~proc_mgr ~root ?cache info with
945945+ | Unpac.Backend.Added { name = pkg_name; sha } ->
946946+ (* Record in config for remote recreation on fresh clones *)
947947+ let vendored : Unpac.Config.vendored_package = {
948948+ pkg_name; pkg_url = url; pkg_branch = branch_opt
949949+ } in
950950+ let config = Unpac.Config.add_vendored_package config vendored in
951951+ save_config ~proc_mgr root config "Record vendored package in config";
952952+ Format.printf "Added %s (%s)@." pkg_name (String.sub sha 0 7);
953953+ Format.printf "@.Next steps:@.";
954954+ Format.printf " unpac opam edit %s # make local changes@." pkg_name;
955955+ Format.printf " unpac opam merge %s <project> # merge into a project@." pkg_name
956956+ | Unpac.Backend.Already_exists name ->
957957+ Format.printf "Package %s already vendored@." name
958958+ | Unpac.Backend.Failed { name; error } ->
959959+ Format.eprintf "Error adding %s: %s@." name error;
960960+ exit 1
961961+ end
962962+ in
963963+ let info = Cmd.info "add" ~doc in
964964+ Cmd.v info Term.(const run $ logging_term $ pkg_arg $ name_arg $ version_arg $ branch_arg $ solve_arg $ cache_arg)
965965+966966+(* Opam list command *)
967967+let opam_list_cmd =
968968+ let doc = "List vendored opam packages." in
969969+ let run () =
970970+ with_root @@ fun ~env:_ ~fs:_ ~proc_mgr ~root ->
971971+ let packages = Unpac_opam.Opam.list_packages ~proc_mgr ~root in
972972+ if packages = [] then begin
973973+ Format.printf "No packages vendored@.";
974974+ Format.printf "@.Hint: unpac opam add <package>@."
975975+ end else
976976+ List.iter (Format.printf "%s@.") packages
977977+ in
978978+ let info = Cmd.info "list" ~doc in
979979+ Cmd.v info Term.(const run $ logging_term)
980980+981981+(* Opam edit command *)
982982+let opam_edit_cmd =
983983+ let doc = "Open a package's patches worktree for editing. \
984984+ Also creates a vendor worktree for reference." in
985985+ let pkg_arg =
986986+ let doc = "Package name to edit." in
987987+ Arg.(required & pos 0 (some string) None & info [] ~docv:"PACKAGE" ~doc)
988988+ in
989989+ let run () pkg =
990990+ with_root @@ fun ~env:_ ~fs:_ ~proc_mgr ~root ->
991991+ (* Check package exists *)
992992+ let packages = Unpac_opam.Opam.list_packages ~proc_mgr ~root in
993993+ if not (List.mem pkg packages) then begin
994994+ Format.eprintf "Package '%s' is not vendored@." pkg;
995995+ exit 1
996996+ end;
997997+ (* Ensure both patches and vendor worktrees exist *)
998998+ Unpac.Worktree.ensure ~proc_mgr root (Unpac.Worktree.Opam_patches pkg);
999999+ Unpac.Worktree.ensure ~proc_mgr root (Unpac.Worktree.Opam_vendor pkg);
10001000+ let patches_path = snd (Unpac.Worktree.path root (Unpac.Worktree.Opam_patches pkg)) in
10011001+ let vendor_path = snd (Unpac.Worktree.path root (Unpac.Worktree.Opam_vendor pkg)) in
10021002+ Format.printf "Editing %s@." pkg;
10031003+ Format.printf "@.";
10041004+ Format.printf "Worktrees created:@.";
10051005+ Format.printf " patches: %s (make changes here)@." patches_path;
10061006+ Format.printf " vendor: %s (original for reference)@." vendor_path;
10071007+ Format.printf "@.";
10081008+ Format.printf "Make your changes in the patches worktree, then:@.";
10091009+ Format.printf " cd %s@." patches_path;
10101010+ Format.printf " git add -A && git commit -m 'your message'@.";
10111011+ Format.printf "@.";
10121012+ Format.printf "When done: unpac opam done %s@." pkg
10131013+ in
10141014+ let info = Cmd.info "edit" ~doc in
10151015+ Cmd.v info Term.(const run $ logging_term $ pkg_arg)
10161016+10171017+(* Opam done command *)
10181018+let opam_done_cmd =
10191019+ let doc = "Close a package's patches and vendor worktrees after editing." in
10201020+ let pkg_arg =
10211021+ let doc = "Package name." in
10221022+ Arg.(required & pos 0 (some string) None & info [] ~docv:"PACKAGE" ~doc)
10231023+ in
10241024+ let run () pkg =
10251025+ with_root @@ fun ~env:_ ~fs:_ ~proc_mgr ~root ->
10261026+ let patches_kind = Unpac.Worktree.Opam_patches pkg in
10271027+ let vendor_kind = Unpac.Worktree.Opam_vendor pkg in
10281028+ if not (Unpac.Worktree.exists root patches_kind) then begin
10291029+ Format.eprintf "No editing session for '%s'@." pkg;
10301030+ exit 1
10311031+ end;
10321032+ (* Check for uncommitted changes in patches worktree *)
10331033+ let wt_path = Unpac.Worktree.path root patches_kind in
10341034+ let status = Unpac.Git.run_exn ~proc_mgr ~cwd:wt_path ["status"; "--porcelain"] in
10351035+ if String.trim status <> "" then begin
10361036+ Format.eprintf "Warning: uncommitted changes in %s@." pkg;
10371037+ Format.eprintf "Commit or discard them before closing.@.";
10381038+ exit 1
10391039+ end;
10401040+ (* Remove both worktrees *)
10411041+ Unpac.Worktree.remove ~proc_mgr root patches_kind;
10421042+ if Unpac.Worktree.exists root vendor_kind then
10431043+ Unpac.Worktree.remove ~proc_mgr root vendor_kind;
10441044+ Format.printf "Closed editing session for %s@." pkg;
10451045+ Format.printf "@.Next steps:@.";
10461046+ Format.printf " unpac opam diff %s # view your changes@." pkg;
10471047+ Format.printf " unpac opam merge %s <project> # merge into a project@." pkg
10481048+ in
10491049+ let info = Cmd.info "done" ~doc in
10501050+ Cmd.v info Term.(const run $ logging_term $ pkg_arg)
10511051+10521052+(* Opam set-upstream command *)
10531053+let opam_set_upstream_cmd =
10541054+ let doc = "Set the upstream URL for a vendored opam package." in
10551055+ let man = [
10561056+ `S Manpage.s_description;
10571057+ `P "Configures the upstream git URL for a vendored opam package. \
10581058+ This is used by 'unpac opam update' to fetch new changes from upstream.";
10591059+ `P "For packages added via 'unpac opam add', the upstream is automatically \
10601060+ configured from the opam source URL. This command is mainly useful for \
10611061+ promoted local projects that don't have an opam source.";
10621062+ `S Manpage.s_examples;
10631063+ `Pre " unpac opam set-upstream ocaml-zstd git@github.com:user/ocaml-zstd.git";
10641064+ `S "SEE ALSO";
10651065+ `P "unpac-opam-update(1), unpac-export-set-remote(1)";
10661066+ ] in
10671067+ let name_arg =
10681068+ let doc = "Name of the package." in
10691069+ Arg.(required & pos 0 (some string) None & info [] ~docv:"NAME" ~doc)
10701070+ in
10711071+ let url_arg =
10721072+ let doc = "Upstream URL (git SSH or HTTPS URL)." in
10731073+ Arg.(required & pos 1 (some string) None & info [] ~docv:"URL" ~doc)
10741074+ in
10751075+ let run () name url =
10761076+ with_root @@ fun ~env:_ ~fs:_ ~proc_mgr ~root ->
10771077+ match Unpac.Promote.set_upstream_remote ~proc_mgr ~root ~name ~url with
10781078+ | `Created ->
10791079+ Format.printf "Set upstream for %s: %s@." name url;
10801080+ Format.printf "@.You can now run: unpac opam update %s@." name
10811081+ | `Updated ->
10821082+ Format.printf "Updated upstream for %s: %s@." name url
10831083+ | `Existed ->
10841084+ Format.printf "Upstream already set for %s: %s@." name url
10851085+ in
10861086+ let info = Cmd.info "set-upstream" ~doc ~man in
10871087+ Cmd.v info Term.(const run $ logging_term $ name_arg $ url_arg)
10881088+10891089+(* Opam update command *)
10901090+let opam_update_cmd =
10911091+ let doc = "Update a vendored opam package from upstream." in
10921092+ let name_arg =
10931093+ let doc = "Package name to update." in
10941094+ Arg.(required & pos 0 (some string) None & info [] ~docv:"NAME" ~doc)
10951095+ in
10961096+ let run () name =
10971097+ with_root @@ fun ~env:_ ~fs:_ ~proc_mgr ~root ->
10981098+ with_audit ~proc_mgr ~root ~operation_type:Unpac.Audit.Opam_update ~args:[name] @@ fun _ctx ->
10991099+ match Unpac_opam.Opam.update_package ~proc_mgr ~root name with
11001100+ | Unpac.Backend.Updated { name = pkg_name; old_sha; new_sha } ->
11011101+ Format.printf "Updated %s: %s -> %s@." pkg_name
11021102+ (String.sub old_sha 0 7) (String.sub new_sha 0 7);
11031103+ Format.printf "@.Next steps:@.";
11041104+ Format.printf " unpac opam diff %s # view changes@." pkg_name;
11051105+ Format.printf " unpac opam merge %s <project> # merge into a project@." pkg_name
11061106+ | Unpac.Backend.No_changes name ->
11071107+ Format.printf "%s is up to date@." name
11081108+ | Unpac.Backend.Update_failed { name; error } ->
11091109+ Format.eprintf "Error updating %s: %s@." name error;
11101110+ exit 1
11111111+ in
11121112+ let info = Cmd.info "update" ~doc in
11131113+ Cmd.v info Term.(const run $ logging_term $ name_arg)
11141114+11151115+(* Opam merge command *)
11161116+let opam_merge_cmd =
11171117+ let doc = "Merge vendored opam packages into a project. \
11181118+ Use --solve to merge a package and its dependencies, \
11191119+ or --all to merge all vendored packages." in
11201120+ let args =
11211121+ let doc = "PACKAGE PROJECT (or just PROJECT with --all)." in
11221122+ Arg.(value & pos_all string [] & info [] ~docv:"ARGS" ~doc)
11231123+ in
11241124+ let all_flag =
11251125+ let doc = "Merge all vendored packages into the project." in
11261126+ Arg.(value & flag & info ["all"] ~doc)
11271127+ in
11281128+ let solve_flag =
11291129+ let doc = "Solve dependencies for PACKAGE and merge all solved packages into the project." in
11301130+ Arg.(value & flag & info ["solve"] ~doc)
11311131+ in
11321132+ let run () args all solve =
11331133+ with_root @@ fun ~env:_ ~fs:_ ~proc_mgr ~root ->
11341134+ let config = load_config root in
11351135+ let audit_args = args @ (if all then ["--all"] else []) @ (if solve then ["--solve"] else []) in
11361136+ with_audit ~proc_mgr ~root ~operation_type:Unpac.Audit.Opam_merge ~args:audit_args @@ fun _ctx ->
11371137+11381138+ let merge_one ~project pkg =
11391139+ let patches_branch = Unpac_opam.Opam.patches_branch pkg in
11401140+ match Unpac.Backend.merge_to_project ~proc_mgr ~root ~project ~patches_branch with
11411141+ | Ok () ->
11421142+ Format.printf "Merged %s@." pkg;
11431143+ true
11441144+ | Error (`Conflict files) ->
11451145+ Format.eprintf "Merge conflict in %s:@." pkg;
11461146+ List.iter (Format.eprintf " %s@.") files;
11471147+ false
11481148+ in
11491149+11501150+ let merge_packages packages project =
11511151+ Format.printf "Merging %d packages into project %s...@." (List.length packages) project;
11521152+ let (successes, failures) = List.fold_left (fun (s, f) pkg ->
11531153+ if merge_one ~project pkg then (s + 1, f) else (s, f + 1)
11541154+ ) (0, 0) packages in
11551155+ Format.printf "@.Done: %d merged" successes;
11561156+ if failures > 0 then Format.printf ", %d had conflicts" failures;
11571157+ Format.printf "@.";
11581158+ if failures > 0 then begin
11591159+ Format.eprintf "Resolve conflicts in project/%s and commit.@." project;
11601160+ exit 1
11611161+ end else
11621162+ Format.printf "Next: Build your project in project/%s@." project
11631163+ in
11641164+11651165+ if solve then begin
11661166+ (* Solve dependencies and merge all solved packages that are vendored *)
11671167+ let pkg, project = match args with
11681168+ | [pkg; project] -> pkg, project
11691169+ | _ ->
11701170+ Format.eprintf "Usage: unpac opam merge --solve PACKAGE PROJECT@.";
11711171+ exit 1
11721172+ in
11731173+ let repos = config.opam.repositories in
11741174+ if repos = [] then begin
11751175+ Format.eprintf "No repositories configured. Add one with: unpac opam repo add <name> <path>@.";
11761176+ exit 1
11771177+ end;
11781178+ let ocaml_version = match Unpac.Config.get_compiler config with
11791179+ | Some v -> v
11801180+ | None ->
11811181+ Format.eprintf "No compiler version configured.@.";
11821182+ Format.eprintf "Set one with: unpac opam config compiler 5.2.0@.";
11831183+ exit 1
11841184+ in
11851185+ let repo_paths = List.map (fun (r : Unpac.Config.repo_config) ->
11861186+ match r.source with
11871187+ | Unpac.Config.Local p -> p
11881188+ | Unpac.Config.Remote u -> u
11891189+ ) repos in
11901190+ Format.printf "Solving dependencies for %s...@." pkg;
11911191+ match Unpac_opam.Solver.solve ~repos:repo_paths ~ocaml_version ~packages:[pkg] with
11921192+ | Error msg ->
11931193+ Format.eprintf "Dependency solving failed:@.%s@." msg;
11941194+ exit 1
11951195+ | Ok result ->
11961196+ (* Group by dev-repo to get canonical names *)
11971197+ let groups = group_packages_by_dev_repo ~config result.packages in
11981198+ let canonical_names = List.map (fun (g : package_group) -> g.canonical_name) groups in
11991199+ (* Filter to only vendored packages *)
12001200+ let vendored = Unpac_opam.Opam.list_packages ~proc_mgr ~root in
12011201+ let to_merge = List.filter (fun name -> List.mem name vendored) canonical_names in
12021202+ if to_merge = [] then begin
12031203+ Format.eprintf "No vendored packages match the solved dependencies.@.";
12041204+ Format.eprintf "Run 'unpac opam add %s --solve' first to vendor them.@." pkg;
12051205+ exit 1
12061206+ end;
12071207+ Format.printf "Found %d vendored packages to merge.@.@." (List.length to_merge);
12081208+ merge_packages to_merge project
12091209+ end else if all then begin
12101210+ (* Merge all vendored packages *)
12111211+ let project = match args with
12121212+ | [project] -> project
12131213+ | _ ->
12141214+ Format.eprintf "Usage: unpac opam merge --all PROJECT@.";
12151215+ exit 1
12161216+ in
12171217+ let packages = Unpac_opam.Opam.list_packages ~proc_mgr ~root in
12181218+ if packages = [] then begin
12191219+ Format.eprintf "No vendored packages to merge.@.";
12201220+ exit 1
12211221+ end;
12221222+ merge_packages packages project
12231223+ end else begin
12241224+ (* Single package mode *)
12251225+ let pkg, project = match args with
12261226+ | [pkg; project] -> pkg, project
12271227+ | _ ->
12281228+ Format.eprintf "Usage: unpac opam merge PACKAGE PROJECT@.";
12291229+ exit 1
12301230+ in
12311231+ if merge_one ~project pkg then
12321232+ Format.printf "@.Next: Build your project in project/%s@." project
12331233+ else begin
12341234+ Format.eprintf "Resolve conflicts in project/%s and commit.@." project;
12351235+ exit 1
12361236+ end
12371237+ end
12381238+ in
12391239+ let info = Cmd.info "merge" ~doc in
12401240+ Cmd.v info Term.(const run $ logging_term $ args $ all_flag $ solve_flag)
12411241+12421242+(* Opam info command *)
12431243+let opam_info_cmd =
12441244+ let doc = "Show information about a vendored package." in
12451245+ let pkg_arg =
12461246+ let doc = "Package name." in
12471247+ Arg.(required & pos 0 (some string) None & info [] ~docv:"PACKAGE" ~doc)
12481248+ in
12491249+ let run () pkg =
12501250+ with_root @@ fun ~env:_ ~fs:_ ~proc_mgr ~root ->
12511251+ let git = Unpac.Worktree.git_dir root in
12521252+ (* Check package exists *)
12531253+ let packages = Unpac_opam.Opam.list_packages ~proc_mgr ~root in
12541254+ if not (List.mem pkg packages) then begin
12551255+ Format.eprintf "Package '%s' is not vendored@." pkg;
12561256+ exit 1
12571257+ end;
12581258+ (* Get remote URL *)
12591259+ let remote = "origin-" ^ pkg in
12601260+ let url = Unpac.Git.remote_url ~proc_mgr ~cwd:git remote in
12611261+ Format.printf "Package: %s@." pkg;
12621262+ (match url with
12631263+ | Some u -> Format.printf "URL: %s@." u
12641264+ | None -> ());
12651265+ (* Get branch SHAs *)
12661266+ let upstream = Unpac_opam.Opam.upstream_branch pkg in
12671267+ let vendor = Unpac_opam.Opam.vendor_branch pkg in
12681268+ let patches = Unpac_opam.Opam.patches_branch pkg in
12691269+ (match Unpac.Git.rev_parse ~proc_mgr ~cwd:git upstream with
12701270+ | Some sha -> Format.printf "Upstream: %s@." (String.sub sha 0 7)
12711271+ | None -> ());
12721272+ (match Unpac.Git.rev_parse ~proc_mgr ~cwd:git vendor with
12731273+ | Some sha -> Format.printf "Vendor: %s@." (String.sub sha 0 7)
12741274+ | None -> ());
12751275+ (match Unpac.Git.rev_parse ~proc_mgr ~cwd:git patches with
12761276+ | Some sha -> Format.printf "Patches: %s@." (String.sub sha 0 7)
12771277+ | None -> ());
12781278+ (* Count commits ahead *)
12791279+ let log_output = Unpac.Git.run_exn ~proc_mgr ~cwd:git
12801280+ ["log"; "--oneline"; vendor ^ ".." ^ patches] in
12811281+ let commits = List.length (String.split_on_char '\n' log_output |>
12821282+ List.filter (fun s -> String.trim s <> "")) in
12831283+ Format.printf "Local commits: %d@." commits;
12841284+ Format.printf "@.Commands:@.";
12851285+ Format.printf " unpac opam diff %s # view local changes@." pkg;
12861286+ Format.printf " unpac opam edit %s # edit package@." pkg;
12871287+ Format.printf " unpac opam update %s # fetch upstream@." pkg
12881288+ in
12891289+ let info = Cmd.info "info" ~doc in
12901290+ Cmd.v info Term.(const run $ logging_term $ pkg_arg)
12911291+12921292+(* Opam diff command *)
12931293+let opam_diff_cmd =
12941294+ let doc = "Show diff between vendor and patches branches." in
12951295+ let pkg_arg =
12961296+ let doc = "Package name." in
12971297+ Arg.(required & pos 0 (some string) None & info [] ~docv:"PACKAGE" ~doc)
12981298+ in
12991299+ let run () pkg =
13001300+ with_root @@ fun ~env:_ ~fs:_ ~proc_mgr ~root ->
13011301+ let git = Unpac.Worktree.git_dir root in
13021302+ (* Check package exists *)
13031303+ let packages = Unpac_opam.Opam.list_packages ~proc_mgr ~root in
13041304+ if not (List.mem pkg packages) then begin
13051305+ Format.eprintf "Package '%s' is not vendored@." pkg;
13061306+ exit 1
13071307+ end;
13081308+ let vendor = Unpac_opam.Opam.vendor_branch pkg in
13091309+ let patches = Unpac_opam.Opam.patches_branch pkg in
13101310+ let diff = Unpac.Git.run_exn ~proc_mgr ~cwd:git
13111311+ ["diff"; vendor; patches] in
13121312+ if String.trim diff = "" then begin
13131313+ Format.printf "No local changes@.";
13141314+ Format.printf "@.Hint: unpac opam edit %s # to make changes@." pkg
13151315+ end else begin
13161316+ print_string diff;
13171317+ Format.printf "@.Next: unpac opam merge %s <project>@." pkg
13181318+ end
13191319+ in
13201320+ let info = Cmd.info "diff" ~doc in
13211321+ Cmd.v info Term.(const run $ logging_term $ pkg_arg)
13221322+13231323+(* Opam remove command *)
13241324+let opam_remove_cmd =
13251325+ let doc = "Remove a vendored package." in
13261326+ let pkg_arg =
13271327+ let doc = "Package name to remove." in
13281328+ Arg.(required & pos 0 (some string) None & info [] ~docv:"PACKAGE" ~doc)
13291329+ in
13301330+ let run () pkg =
13311331+ with_root @@ fun ~env:_ ~fs:_ ~proc_mgr ~root ->
13321332+ let git = Unpac.Worktree.git_dir root in
13331333+ (* Check package exists *)
13341334+ let packages = Unpac_opam.Opam.list_packages ~proc_mgr ~root in
13351335+ if not (List.mem pkg packages) then begin
13361336+ Format.eprintf "Package '%s' is not vendored@." pkg;
13371337+ exit 1
13381338+ end;
13391339+ (* Remove worktrees if exist *)
13401340+ (try Unpac.Worktree.remove_force ~proc_mgr root (Unpac.Worktree.Opam_upstream pkg) with _ -> ());
13411341+ (try Unpac.Worktree.remove_force ~proc_mgr root (Unpac.Worktree.Opam_vendor pkg) with _ -> ());
13421342+ (try Unpac.Worktree.remove_force ~proc_mgr root (Unpac.Worktree.Opam_patches pkg) with _ -> ());
13431343+ (* Delete branches *)
13441344+ let upstream = Unpac_opam.Opam.upstream_branch pkg in
13451345+ let vendor = Unpac_opam.Opam.vendor_branch pkg in
13461346+ let patches = Unpac_opam.Opam.patches_branch pkg in
13471347+ (try Unpac.Git.run_exn ~proc_mgr ~cwd:git ["branch"; "-D"; upstream] |> ignore with _ -> ());
13481348+ (try Unpac.Git.run_exn ~proc_mgr ~cwd:git ["branch"; "-D"; vendor] |> ignore with _ -> ());
13491349+ (try Unpac.Git.run_exn ~proc_mgr ~cwd:git ["branch"; "-D"; patches] |> ignore with _ -> ());
13501350+ (* Remove remote *)
13511351+ let remote = "origin-" ^ pkg in
13521352+ (try Unpac.Git.run_exn ~proc_mgr ~cwd:git ["remote"; "remove"; remote] |> ignore with _ -> ());
13531353+ Format.printf "Removed %s@." pkg;
13541354+ Format.printf "@.Hint: unpac opam add <package> # to add another package@."
13551355+ in
13561356+ let info = Cmd.info "remove" ~doc in
13571357+ Cmd.v info Term.(const run $ logging_term $ pkg_arg)
13581358+13591359+(* Opam init command - create a new local opam package *)
13601360+let opam_init_cmd =
13611361+ let doc = "Create a new local opam package (no upstream repository)." in
13621362+ let man = [
13631363+ `S Manpage.s_description;
13641364+ `P "Creates a new opam package that originates locally rather than from \
13651365+ an external repository. This is useful for:";
13661366+ `I ("New libraries", "Starting a new OCaml library from scratch");
13671367+ `I ("Internal packages", "Creating packages that will never be published");
13681368+ `I ("Agent-created packages", "AI agents can create new dependencies on-demand");
13691369+ `P "The package is created with a minimal scaffold including dune-project \
13701370+ and a .opam file. It uses the standard three-tier branch model but \
13711371+ with no upstream branch (url='local' in config).";
13721372+ `S "PACKAGE STRUCTURE";
13731373+ `P "The created package will have:";
13741374+ `Pre " vendor/opam/<name>/
13751375+ dune-project # Dune project file
13761376+ <name>.opam # Opam package file
13771377+ lib/
13781378+ dune # Library build rules
13791379+ <name>.ml # Main module (empty)
13801380+ <name>.mli # Interface file (empty)";
13811381+ `S Manpage.s_examples;
13821382+ `P "Create a new local library:";
13831383+ `Pre " unpac opam init mylib
13841384+ unpac opam merge mylib myproject";
13851385+ `P "Create with description:";
13861386+ `Pre " unpac opam init mylib --synopsis 'My utility library'";
13871387+ `S "LIFECYCLE";
13881388+ `P "Local packages can later be published by:";
13891389+ `Pre " 1. Push the opam/patches/<name> branch to a git repository
13901390+ 2. Update config with: unpac opam set-upstream <name> <url>
13911391+ 3. Submit to opam-repository if desired";
13921392+ `S "SEE ALSO";
13931393+ `P "unpac-opam-promote(1) for graduating projects to dependencies.";
13941394+ ] in
13951395+ let name_arg =
13961396+ let doc = "Name for the new package. Should be a valid opam package name \
13971397+ (lowercase, alphanumeric, hyphens allowed)." in
13981398+ Arg.(required & pos 0 (some string) None & info [] ~docv:"NAME" ~doc)
13991399+ in
14001400+ let synopsis_arg =
14011401+ let doc = "One-line synopsis for the package." in
14021402+ Arg.(value & opt string "A local opam package" & info ["synopsis"; "s"] ~docv:"TEXT" ~doc)
14031403+ in
14041404+ let run () name synopsis =
14051405+ with_root @@ fun ~env:_ ~fs:_ ~proc_mgr ~root ->
14061406+ with_audit ~proc_mgr ~root ~operation_type:Unpac.Audit.Opam_init ~args:[name] @@ fun _ctx ->
14071407+ let git = Unpac.Worktree.git_dir root in
14081408+ let config = load_config root in
14091409+14101410+ (* Validate package name *)
14111411+ if String.length name = 0 then begin
14121412+ Format.eprintf "Error: Package name cannot be empty@.";
14131413+ exit 1
14141414+ end;
14151415+ let valid_char c = (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c = '-' || c = '_' in
14161416+ if not (String.for_all valid_char name) then begin
14171417+ Format.eprintf "Error: Package name must be lowercase alphanumeric (hyphens/underscores allowed)@.";
14181418+ exit 1
14191419+ end;
14201420+14211421+ (* Check if already exists *)
14221422+ let packages = Unpac_opam.Opam.list_packages ~proc_mgr ~root in
14231423+ if List.mem name packages then begin
14241424+ Format.eprintf "Package '%s' already exists@." name;
14251425+ exit 1
14261426+ end;
14271427+14281428+ (* Create an orphan branch for vendor *)
14291429+ let vendor_branch = Unpac_opam.Opam.vendor_branch name in
14301430+ let patches_branch = Unpac_opam.Opam.patches_branch name in
14311431+ let vendor_path = "vendor/opam/" ^ name in
14321432+14331433+ (* Create orphan branch with initial content *)
14341434+ Unpac.Git.checkout_orphan ~proc_mgr ~cwd:git vendor_branch;
14351435+14361436+ (* Remove any existing index content *)
14371437+ Unpac.Git.rm_cached_rf ~proc_mgr ~cwd:git;
14381438+14391439+ (* Create scaffold files in a temporary worktree *)
14401440+ let wt_path = Unpac.Worktree.path root (Unpac.Worktree.Opam_vendor name) in
14411441+ Unpac.Worktree.ensure ~proc_mgr root (Unpac.Worktree.Opam_vendor name);
14421442+14431443+ (* Create directory structure *)
14441444+ let pkg_dir = Eio.Path.(wt_path / vendor_path) in
14451445+ let lib_dir = Eio.Path.(pkg_dir / "lib") in
14461446+ Eio.Path.mkdirs ~exists_ok:true ~perm:0o755 lib_dir;
14471447+14481448+ (* Create dune-project *)
14491449+ let dune_project = Printf.sprintf {|(lang dune 3.0)
14501450+(name %s)
14511451+(generate_opam_files true)
14521452+(source (uri "local"))
14531453+(authors "Local")
14541454+(maintainers "Local")
14551455+(package
14561456+ (name %s)
14571457+ (synopsis "%s")
14581458+ (depends
14591459+ (ocaml (>= 4.14))))
14601460+|} name name synopsis in
14611461+ Eio.Path.save ~create:(`Or_truncate 0o644)
14621462+ Eio.Path.(pkg_dir / "dune-project") dune_project;
14631463+14641464+ (* Create lib/dune *)
14651465+ let lib_dune = Printf.sprintf {|(library
14661466+ (name %s)
14671467+ (public_name %s))
14681468+|} (String.map (fun c -> if c = '-' then '_' else c) name) name in
14691469+ Eio.Path.save ~create:(`Or_truncate 0o644)
14701470+ Eio.Path.(lib_dir / "dune") lib_dune;
14711471+14721472+ (* Create lib/<name>.ml *)
14731473+ let ml_file = Printf.sprintf {|(* %s - A local opam package *)
14741474+14751475+(** This module was created by [unpac opam init].
14761476+ Add your implementation here. *)
14771477+|} name in
14781478+ let ml_name = String.map (fun c -> if c = '-' then '_' else c) name in
14791479+ Eio.Path.save ~create:(`Or_truncate 0o644)
14801480+ Eio.Path.(lib_dir / (ml_name ^ ".ml")) ml_file;
14811481+14821482+ (* Create lib/<name>.mli *)
14831483+ let mli_file = Printf.sprintf {|(* %s - A local opam package *)
14841484+14851485+(** This module was created by [unpac opam init].
14861486+ Define your interface here. *)
14871487+|} name in
14881488+ Eio.Path.save ~create:(`Or_truncate 0o644)
14891489+ Eio.Path.(lib_dir / (ml_name ^ ".mli")) mli_file;
14901490+14911491+ (* Commit the scaffold *)
14921492+ Unpac.Git.add_all ~proc_mgr ~cwd:wt_path;
14931493+ Unpac.Git.commit ~proc_mgr ~cwd:wt_path
14941494+ ~message:(Printf.sprintf "Initialize local package %s" name);
14951495+14961496+ (* Get the commit SHA *)
14971497+ let sha = Unpac.Git.current_head ~proc_mgr ~cwd:wt_path in
14981498+14991499+ (* Create patches branch from vendor *)
15001500+ Unpac.Git.branch_create ~proc_mgr ~cwd:git
15011501+ ~name:patches_branch ~start_point:vendor_branch;
15021502+15031503+ (* Cleanup worktree *)
15041504+ Unpac.Worktree.remove ~proc_mgr root (Unpac.Worktree.Opam_vendor name);
15051505+15061506+ (* Switch back to main *)
15071507+ Unpac.Git.checkout ~proc_mgr ~cwd:git "main";
15081508+15091509+ (* Record in config with url = "local" *)
15101510+ let vendored : Unpac.Config.vendored_package = {
15111511+ pkg_name = name; pkg_url = "local"; pkg_branch = None
15121512+ } in
15131513+ let config = Unpac.Config.add_vendored_package config vendored in
15141514+ save_config ~proc_mgr root config (Printf.sprintf "Add local package %s" name);
15151515+15161516+ Format.printf "Created local package %s (%s)@." name (String.sub sha 0 7);
15171517+ Format.printf "@.Package structure:@.";
15181518+ Format.printf " %s/@." vendor_path;
15191519+ Format.printf " dune-project@.";
15201520+ Format.printf " lib/dune@.";
15211521+ Format.printf " lib/%s.ml@." ml_name;
15221522+ Format.printf " lib/%s.mli@." ml_name;
15231523+ Format.printf "@.Next steps:@.";
15241524+ Format.printf " unpac opam edit %s # add code to the package@." name;
15251525+ Format.printf " unpac opam merge %s <project> # use in a project@." name
15261526+ in
15271527+ let info = Cmd.info "init" ~doc ~man in
15281528+ Cmd.v info Term.(const run $ logging_term $ name_arg $ synopsis_arg)
15291529+15301530+(* Opam promote command - graduate a project to a vendored dependency *)
15311531+let opam_promote_cmd =
15321532+ let doc = "Promote a project to a vendored opam dependency." in
15331533+ let man = [
15341534+ `S Manpage.s_description;
15351535+ `P "Graduates a project branch to become a vendored opam dependency that \
15361536+ other projects can use. This is the lifecycle path for code that:";
15371537+ `I ("Started as a project", "Code developed in project/<name> that should \
15381538+ become a shared library");
15391539+ `I ("Needs reuse", "A project that other projects want to depend on");
15401540+ `I ("Agent refactoring", "AI agents can extract common code into libraries");
15411541+ `P "The project's content is copied to create opam/vendor/<name> and \
15421542+ opam/patches/<name> branches. The original project remains unchanged \
15431543+ and can be deleted if no longer needed.";
15441544+ `S "REQUIREMENTS";
15451545+ `P "The project directory should contain a valid dune-project file with \
15461546+ the package definition. If not present, a basic one will be created.";
15471547+ `S Manpage.s_examples;
15481548+ `P "Promote a project to a dependency:";
15491549+ `Pre " unpac opam promote my-utils
15501550+ unpac opam merge my-utils other-project";
15511551+ `P "Promote with a different name:";
15521552+ `Pre " unpac opam promote my-app --as my-lib";
15531553+ `S "LIFECYCLE";
15541554+ `P "After promotion:";
15551555+ `Pre " 1. The new package appears in 'unpac opam list'
15561556+ 2. Other projects can merge it with 'unpac opam merge'
15571557+ 3. Edit with 'unpac opam edit' (changes go to patches branch)
15581558+ 4. Original project can be deleted if desired";
15591559+ `S "SEE ALSO";
15601560+ `P "unpac-opam-init(1) for creating new packages from scratch.";
15611561+ ] in
15621562+ let project_arg =
15631563+ let doc = "Name of the project to promote." in
15641564+ Arg.(required & pos 0 (some string) None & info [] ~docv:"PROJECT" ~doc)
15651565+ in
15661566+ let as_arg =
15671567+ let doc = "Name for the opam package (defaults to project name)." in
15681568+ Arg.(value & opt (some string) None & info ["as"] ~docv:"NAME" ~doc)
15691569+ in
15701570+ let run () project pkg_name_opt =
15711571+ with_root @@ fun ~env:_ ~fs:_ ~proc_mgr ~root ->
15721572+ let pkg_name = match pkg_name_opt with Some n -> n | None -> project in
15731573+ with_audit ~proc_mgr ~root ~operation_type:Unpac.Audit.Opam_promote ~args:[project; pkg_name] @@ fun _ctx ->
15741574+ let git = Unpac.Worktree.git_dir root in
15751575+ let config = load_config root in
15761576+15771577+ (* Check project exists *)
15781578+ let projects = Unpac.Worktree.list_projects ~proc_mgr root in
15791579+ if not (List.mem project projects) then begin
15801580+ Format.eprintf "Project '%s' does not exist@." project;
15811581+ Format.eprintf "@.Available projects:@.";
15821582+ List.iter (Format.eprintf " %s@.") projects;
15831583+ exit 1
15841584+ end;
15851585+15861586+ (* Check package doesn't already exist *)
15871587+ let packages = Unpac_opam.Opam.list_packages ~proc_mgr ~root in
15881588+ if List.mem pkg_name packages then begin
15891589+ Format.eprintf "Package '%s' already exists@." pkg_name;
15901590+ exit 1
15911591+ end;
15921592+15931593+ let vendor_branch = Unpac_opam.Opam.vendor_branch pkg_name in
15941594+ let patches_branch = Unpac_opam.Opam.patches_branch pkg_name in
15951595+ let vendor_path = "vendor/opam/" ^ pkg_name in
15961596+15971597+ (* Create orphan branch for vendor *)
15981598+ Unpac.Git.checkout_orphan ~proc_mgr ~cwd:git vendor_branch;
15991599+ Unpac.Git.rm_cached_rf ~proc_mgr ~cwd:git;
16001600+16011601+ (* Create vendor worktree *)
16021602+ Unpac.Worktree.ensure ~proc_mgr root (Unpac.Worktree.Opam_vendor pkg_name);
16031603+ let vendor_wt = Unpac.Worktree.path root (Unpac.Worktree.Opam_vendor pkg_name) in
16041604+16051605+ (* Get project worktree or create temporary one *)
16061606+ let project_wt = Unpac.Worktree.path root (Unpac.Worktree.Project project) in
16071607+ let created_project_wt = not (Sys.file_exists (snd project_wt)) in
16081608+ if created_project_wt then
16091609+ Unpac.Worktree.ensure ~proc_mgr root (Unpac.Worktree.Project project);
16101610+16111611+ (* Create target directory *)
16121612+ let pkg_dir = Eio.Path.(vendor_wt / vendor_path) in
16131613+ Eio.Path.mkdirs ~exists_ok:true ~perm:0o755 pkg_dir;
16141614+16151615+ (* Copy project content to vendor path *)
16161616+ let rec copy_dir src dst =
16171617+ Eio.Path.read_dir src |> List.iter (fun name ->
16181618+ if name <> ".git" then begin
16191619+ let src_path = Eio.Path.(src / name) in
16201620+ let dst_path = Eio.Path.(dst / name) in
16211621+ if Eio.Path.is_directory src_path then begin
16221622+ Eio.Path.mkdirs ~exists_ok:true ~perm:0o755 dst_path;
16231623+ copy_dir src_path dst_path
16241624+ end else begin
16251625+ let content = Eio.Path.load src_path in
16261626+ Eio.Path.save ~create:(`Or_truncate 0o644) dst_path content
16271627+ end
16281628+ end
16291629+ )
16301630+ in
16311631+ copy_dir project_wt pkg_dir;
16321632+16331633+ (* Commit *)
16341634+ Unpac.Git.add_all ~proc_mgr ~cwd:vendor_wt;
16351635+ Unpac.Git.commit ~proc_mgr ~cwd:vendor_wt
16361636+ ~message:(Printf.sprintf "Promote project %s to package %s" project pkg_name);
16371637+16381638+ (* Get SHA *)
16391639+ let sha = Unpac.Git.current_head ~proc_mgr ~cwd:vendor_wt in
16401640+16411641+ (* Create patches branch from vendor *)
16421642+ Unpac.Git.branch_create ~proc_mgr ~cwd:git
16431643+ ~name:patches_branch ~start_point:vendor_branch;
16441644+16451645+ (* Cleanup *)
16461646+ Unpac.Worktree.remove ~proc_mgr root (Unpac.Worktree.Opam_vendor pkg_name);
16471647+ if created_project_wt then
16481648+ Unpac.Worktree.remove ~proc_mgr root (Unpac.Worktree.Project project);
16491649+16501650+ (* Switch back to main *)
16511651+ Unpac.Git.checkout ~proc_mgr ~cwd:git "main";
16521652+16531653+ (* Record in config *)
16541654+ let vendored : Unpac.Config.vendored_package = {
16551655+ pkg_name; pkg_url = "local"; pkg_branch = None
16561656+ } in
16571657+ let config = Unpac.Config.add_vendored_package config vendored in
16581658+ save_config ~proc_mgr root config (Printf.sprintf "Promote project %s to package %s" project pkg_name);
16591659+16601660+ Format.printf "Promoted project %s to package %s (%s)@." project pkg_name (String.sub sha 0 7);
16611661+ Format.printf "@.The package is now available as a vendored dependency.@.";
16621662+ Format.printf "@.Next steps:@.";
16631663+ Format.printf " unpac opam merge %s <other-project> # use in another project@." pkg_name;
16641664+ Format.printf " unpac opam edit %s # make changes@." pkg_name;
16651665+ if project <> pkg_name then
16661666+ Format.printf " unpac project remove %s # remove original project (optional)@." project
16671667+ in
16681668+ let info = Cmd.info "promote" ~doc ~man in
16691669+ Cmd.v info Term.(const run $ logging_term $ project_arg $ as_arg)
16701670+16711671+(* Opam command group *)
16721672+let opam_cmd =
16731673+ let doc = "Opam package vendoring commands." in
16741674+ let man = [
16751675+ `S Manpage.s_description;
16761676+ `P "Vendor OCaml packages from opam repositories or create new local packages. \
16771677+ Uses a three-tier branch model for conflict-free vendoring:";
16781678+ `I ("opam/upstream/<pkg>", "Tracks the original repository state (empty for local packages)");
16791679+ `I ("opam/vendor/<pkg>", "Clean snapshot used as merge base");
16801680+ `I ("opam/patches/<pkg>", "Local modifications on top of vendor");
16811681+ `S "PACKAGE SOURCES";
16821682+ `P "Packages can come from three sources:";
16831683+ `I ("External (unpac opam add)", "Vendor from opam repository or git URL. \
16841684+ Has upstream tracking for updates.");
16851685+ `I ("Local (unpac opam init)", "Create a new package from scratch. \
16861686+ No upstream, recorded as url='local' in config.");
16871687+ `I ("Promoted (unpac opam promote)", "Graduate a project to a dependency. \
16881688+ Allows code reuse between projects.");
16891689+ `S "TYPICAL WORKFLOW - External Packages";
16901690+ `P "1. Configure an opam repository:";
16911691+ `Pre " unpac opam repo add default /path/to/opam-repository";
16921692+ `P "2. Set the OCaml compiler version for dependency solving:";
16931693+ `Pre " unpac opam config compiler 5.2.0";
16941694+ `P "3. Vendor a package with dependency solving:";
16951695+ `Pre " unpac opam add mypackage --solve";
16961696+ `P "4. Create a project and merge dependencies:";
16971697+ `Pre " unpac project new myapp
16981698+ unpac opam merge mypackage myapp --solve";
16991699+ `P "5. Build in the project directory:";
17001700+ `Pre " cd project/myapp && dune build";
17011701+ `S "TYPICAL WORKFLOW - Local Packages";
17021702+ `P "1. Create a new local package:";
17031703+ `Pre " unpac opam init mylib --synopsis 'My utility library'";
17041704+ `P "2. Add code to the package:";
17051705+ `Pre " unpac opam edit mylib
17061706+ # edit files in vendor/opam/mylib-patches/
17071707+ git add -A && git commit -m 'implement mylib'
17081708+ unpac opam done mylib";
17091709+ `P "3. Use in a project:";
17101710+ `Pre " unpac opam merge mylib myproject";
17111711+ `S "TYPICAL WORKFLOW - Promoting Projects";
17121712+ `P "When a project should become a shared library:";
17131713+ `Pre " unpac opam promote myproject --as mylib
17141714+ unpac opam merge mylib other-project";
17151715+ `S "MAKING LOCAL CHANGES";
17161716+ `P "1. Open package for editing (creates worktrees):";
17171717+ `Pre " unpac opam edit mypackage";
17181718+ `P "2. Make changes in the patches worktree:";
17191719+ `Pre " cd vendor/opam/mypackage-patches
17201720+ # edit files...
17211721+ git add -A && git commit -m 'my changes'";
17221722+ `P "3. Close the editing session:";
17231723+ `Pre " unpac opam done mypackage";
17241724+ `P "4. View your changes:";
17251725+ `Pre " unpac opam diff mypackage";
17261726+ `S "UPDATING FROM UPSTREAM";
17271727+ `P "For packages with external upstreams (added via 'opam add'):";
17281728+ `Pre " unpac opam update mypackage
17291729+ unpac opam merge mypackage myapp";
17301730+ `P "For promoted local packages, first configure the upstream URL:";
17311731+ `Pre " unpac opam set-upstream mylib git@github.com:me/mylib.git
17321732+ unpac opam update mylib";
17331733+ `S "FOR AI AGENTS";
17341734+ `P "When an agent needs to create a new dependency:";
17351735+ `Pre " # Option 1: Create from scratch
17361736+ unpac opam init new-lib --synopsis 'Agent-created library'
17371737+ unpac opam edit new-lib
17381738+ # ... add implementation ...
17391739+ unpac opam done new-lib
17401740+ unpac opam merge new-lib target-project";
17411741+ `Pre " # Option 2: Extract from existing project
17421742+ unpac opam promote existing-project --as new-lib
17431743+ unpac opam merge new-lib other-project";
17441744+ `P "Local packages have url='local' in unpac.toml and can be identified with:";
17451745+ `Pre " unpac opam info <package> # shows URL: local";
17461746+ `S "COMMANDS";
17471747+ ] in
17481748+ let info = Cmd.info "opam" ~doc ~man in
17491749+ Cmd.group info [
17501750+ opam_repo_cmd;
17511751+ opam_config_cmd;
17521752+ opam_add_cmd;
17531753+ opam_init_cmd;
17541754+ opam_promote_cmd;
17551755+ opam_list_cmd;
17561756+ opam_edit_cmd;
17571757+ opam_done_cmd;
17581758+ opam_set_upstream_cmd;
17591759+ opam_update_cmd;
17601760+ opam_merge_cmd;
17611761+ opam_info_cmd;
17621762+ opam_diff_cmd;
17631763+ opam_remove_cmd;
17641764+ ]
17651765+17661766+(* Git add command *)
17671767+let git_add_cmd =
17681768+ let doc = "Vendor a git repository." in
17691769+ let url_arg =
17701770+ let doc = "Git URL to clone from." in
17711771+ Arg.(required & pos 0 (some string) None & info [] ~docv:"URL" ~doc)
17721772+ in
17731773+ let name_arg =
17741774+ let doc = "Override repository name (default: derived from URL)." in
17751775+ Arg.(value & opt (some string) None & info ["n"; "name"] ~docv:"NAME" ~doc)
17761776+ in
17771777+ let branch_arg =
17781778+ let doc = "Git branch or tag to vendor (default: remote default)." in
17791779+ Arg.(value & opt (some string) None & info ["b"; "branch"] ~docv:"REF" ~doc)
17801780+ in
17811781+ let subdir_arg =
17821782+ let doc = "Extract only this subdirectory from the repository." in
17831783+ Arg.(value & opt (some string) None & info ["subdir"] ~docv:"PATH" ~doc)
17841784+ in
17851785+ let cache_arg =
17861786+ let doc = "Path to vendor cache." in
17871787+ Arg.(value & opt (some string) None & info ["cache"] ~docv:"PATH" ~doc)
17881788+ in
17891789+ let run () url name_opt branch_opt subdir_opt cli_cache =
17901790+ with_root @@ fun ~env:_ ~fs ~proc_mgr ~root ->
17911791+ let config = load_config root in
17921792+ let cache = resolve_cache ~proc_mgr ~fs ~config ~cli_cache in
17931793+ let audit_args = [url] @ (match name_opt with Some n -> ["--name"; n] | None -> []) in
17941794+ with_audit ~proc_mgr ~root ~operation_type:Unpac.Audit.Git_add ~args:audit_args @@ fun _ctx ->
17951795+17961796+ let name = match name_opt with
17971797+ | Some n -> n
17981798+ | None ->
17991799+ let base = Filename.basename url in
18001800+ if String.ends_with ~suffix:".git" base then
18011801+ String.sub base 0 (String.length base - 4)
18021802+ else base
18031803+ in
18041804+18051805+ let info : Unpac.Git_backend.repo_info = {
18061806+ name; url; branch = branch_opt; subdir = subdir_opt;
18071807+ } in
18081808+18091809+ match Unpac.Git_backend.add_repo ~proc_mgr ~root ?cache info with
18101810+ | Unpac.Backend.Added { name = repo_name; sha } ->
18111811+ Format.printf "Added %s (%s)@." repo_name (String.sub sha 0 7);
18121812+ let repo_config : Unpac.Config.git_repo_config = {
18131813+ git_name = name; git_url = url;
18141814+ git_branch = branch_opt; git_subdir = subdir_opt;
18151815+ } in
18161816+ let config' = Unpac.Config.add_git_repo config repo_config in
18171817+ save_config ~proc_mgr root config' (Printf.sprintf "Add git repo %s" name);
18181818+ Format.printf "@.Next steps:@.";
18191819+ Format.printf " unpac git edit %s # make local changes@." repo_name;
18201820+ Format.printf " unpac git merge %s <project> # merge into a project@." repo_name
18211821+ | Unpac.Backend.Already_exists name ->
18221822+ Format.printf "Repository %s already vendored@." name
18231823+ | Unpac.Backend.Failed { name; error } ->
18241824+ Format.eprintf "Error adding %s: %s@." name error;
18251825+ exit 1
18261826+ in
18271827+ let info = Cmd.info "add" ~doc in
18281828+ Cmd.v info Term.(const run $ logging_term $ url_arg $ name_arg $ branch_arg $ subdir_arg $ cache_arg)
18291829+18301830+(* Git list command *)
18311831+let git_list_cmd =
18321832+ let doc = "List vendored git repositories." in
18331833+ let run () =
18341834+ with_root @@ fun ~env:_ ~fs:_ ~proc_mgr ~root ->
18351835+ let repos = Unpac.Git_backend.list_repos ~proc_mgr ~root in
18361836+ if repos = [] then begin
18371837+ Format.printf "No git repositories vendored@.";
18381838+ Format.printf "@.Hint: unpac git add <url>@."
18391839+ end else
18401840+ List.iter (Format.printf "%s@.") repos
18411841+ in
18421842+ let info = Cmd.info "list" ~doc in
18431843+ Cmd.v info Term.(const run $ logging_term)
18441844+18451845+(* Git update command *)
18461846+let git_update_cmd =
18471847+ let doc = "Update a vendored git repository from upstream." in
18481848+ let name_arg =
18491849+ let doc = "Repository name to update." in
18501850+ Arg.(required & pos 0 (some string) None & info [] ~docv:"NAME" ~doc)
18511851+ in
18521852+ let run () name =
18531853+ with_root @@ fun ~env:_ ~fs:_ ~proc_mgr ~root ->
18541854+ with_audit ~proc_mgr ~root ~operation_type:Unpac.Audit.Git_update ~args:[name] @@ fun _ctx ->
18551855+ match Unpac.Git_backend.update_repo ~proc_mgr ~root name with
18561856+ | Unpac.Backend.Updated { name = repo_name; old_sha; new_sha } ->
18571857+ Format.printf "Updated %s: %s -> %s@." repo_name
18581858+ (String.sub old_sha 0 7) (String.sub new_sha 0 7)
18591859+ | Unpac.Backend.No_changes name ->
18601860+ Format.printf "%s is up to date@." name
18611861+ | Unpac.Backend.Update_failed { name; error } ->
18621862+ Format.eprintf "Error updating %s: %s@." name error;
18631863+ exit 1
18641864+ in
18651865+ let info = Cmd.info "update" ~doc in
18661866+ Cmd.v info Term.(const run $ logging_term $ name_arg)
18671867+18681868+(* Git merge command *)
18691869+let git_merge_cmd =
18701870+ let doc = "Merge a vendored git repository into a project." in
18711871+ let name_arg =
18721872+ let doc = "Repository name to merge." in
18731873+ Arg.(required & pos 0 (some string) None & info [] ~docv:"NAME" ~doc)
18741874+ in
18751875+ let project_arg =
18761876+ let doc = "Project to merge into." in
18771877+ Arg.(required & pos 1 (some string) None & info [] ~docv:"PROJECT" ~doc)
18781878+ in
18791879+ let run () name project =
18801880+ with_root @@ fun ~env:_ ~fs:_ ~proc_mgr ~root ->
18811881+ with_audit ~proc_mgr ~root ~operation_type:Unpac.Audit.Git_merge ~args:[name; project] @@ fun _ctx ->
18821882+ let patches_branch = Unpac.Git_backend.patches_branch name in
18831883+ match Unpac.Backend.merge_to_project ~proc_mgr ~root ~project ~patches_branch with
18841884+ | Ok () ->
18851885+ Format.printf "Merged %s into %s@." name project;
18861886+ Format.printf "@.Next: Build your project in project/%s@." project
18871887+ | Error (`Conflict files) ->
18881888+ Format.eprintf "Merge conflict in %s:@." name;
18891889+ List.iter (Format.eprintf " %s@.") files;
18901890+ Format.eprintf "Resolve conflicts in project/%s and commit.@." project;
18911891+ exit 1
18921892+ in
18931893+ let info = Cmd.info "merge" ~doc in
18941894+ Cmd.v info Term.(const run $ logging_term $ name_arg $ project_arg)
18951895+18961896+(* Git info command *)
18971897+let git_info_cmd =
18981898+ let doc = "Show information about a vendored git repository." in
18991899+ let name_arg =
19001900+ let doc = "Repository name." in
19011901+ Arg.(required & pos 0 (some string) None & info [] ~docv:"NAME" ~doc)
19021902+ in
19031903+ let run () name =
19041904+ with_root @@ fun ~env:_ ~fs:_ ~proc_mgr ~root ->
19051905+ let git = Unpac.Worktree.git_dir root in
19061906+ let repos = Unpac.Git_backend.list_repos ~proc_mgr ~root in
19071907+ if not (List.mem name repos) then begin
19081908+ Format.eprintf "Repository '%s' is not vendored@." name;
19091909+ exit 1
19101910+ end;
19111911+ let remote = "origin-" ^ name in
19121912+ let url = Unpac.Git.remote_url ~proc_mgr ~cwd:git remote in
19131913+ Format.printf "Repository: %s@." name;
19141914+ (match url with Some u -> Format.printf "URL: %s@." u | None -> ());
19151915+ let upstream = Unpac.Git_backend.upstream_branch name in
19161916+ let vendor = Unpac.Git_backend.vendor_branch name in
19171917+ let patches = Unpac.Git_backend.patches_branch name in
19181918+ (match Unpac.Git.rev_parse ~proc_mgr ~cwd:git upstream with
19191919+ | Some sha -> Format.printf "Upstream: %s@." (String.sub sha 0 7) | None -> ());
19201920+ (match Unpac.Git.rev_parse ~proc_mgr ~cwd:git vendor with
19211921+ | Some sha -> Format.printf "Vendor: %s@." (String.sub sha 0 7) | None -> ());
19221922+ (match Unpac.Git.rev_parse ~proc_mgr ~cwd:git patches with
19231923+ | Some sha -> Format.printf "Patches: %s@." (String.sub sha 0 7) | None -> ());
19241924+ let log_output = Unpac.Git.run_exn ~proc_mgr ~cwd:git
19251925+ ["log"; "--oneline"; vendor ^ ".." ^ patches] in
19261926+ let commits = List.length (String.split_on_char '\n' log_output |>
19271927+ List.filter (fun s -> String.trim s <> "")) in
19281928+ Format.printf "Local commits: %d@." commits
19291929+ in
19301930+ let info = Cmd.info "info" ~doc in
19311931+ Cmd.v info Term.(const run $ logging_term $ name_arg)
19321932+19331933+(* Git diff command *)
19341934+let git_diff_cmd =
19351935+ let doc = "Show diff between vendor and patches branches." in
19361936+ let name_arg =
19371937+ let doc = "Repository name." in
19381938+ Arg.(required & pos 0 (some string) None & info [] ~docv:"NAME" ~doc)
19391939+ in
19401940+ let run () name =
19411941+ with_root @@ fun ~env:_ ~fs:_ ~proc_mgr ~root ->
19421942+ let git = Unpac.Worktree.git_dir root in
19431943+ let repos = Unpac.Git_backend.list_repos ~proc_mgr ~root in
19441944+ if not (List.mem name repos) then begin
19451945+ Format.eprintf "Repository '%s' is not vendored@." name;
19461946+ exit 1
19471947+ end;
19481948+ let vendor = Unpac.Git_backend.vendor_branch name in
19491949+ let patches = Unpac.Git_backend.patches_branch name in
19501950+ let diff = Unpac.Git.run_exn ~proc_mgr ~cwd:git ["diff"; vendor; patches] in
19511951+ if String.trim diff = "" then
19521952+ Format.printf "No local changes@."
19531953+ else
19541954+ print_string diff
19551955+ in
19561956+ let info = Cmd.info "diff" ~doc in
19571957+ Cmd.v info Term.(const run $ logging_term $ name_arg)
19581958+19591959+(* Git edit command *)
19601960+let git_edit_cmd =
19611961+ let doc = "Open a repository's patches worktree for editing." in
19621962+ let name_arg =
19631963+ let doc = "Repository name to edit." in
19641964+ Arg.(required & pos 0 (some string) None & info [] ~docv:"NAME" ~doc)
19651965+ in
19661966+ let run () name =
19671967+ with_root @@ fun ~env:_ ~fs:_ ~proc_mgr ~root ->
19681968+ let repos = Unpac.Git_backend.list_repos ~proc_mgr ~root in
19691969+ if not (List.mem name repos) then begin
19701970+ Format.eprintf "Repository '%s' is not vendored@." name;
19711971+ exit 1
19721972+ end;
19731973+ Unpac.Worktree.ensure ~proc_mgr root (Unpac.Worktree.Git_patches name);
19741974+ Unpac.Worktree.ensure ~proc_mgr root (Unpac.Worktree.Git_vendor name);
19751975+ let patches_path = snd (Unpac.Worktree.path root (Unpac.Worktree.Git_patches name)) in
19761976+ let vendor_path = snd (Unpac.Worktree.path root (Unpac.Worktree.Git_vendor name)) in
19771977+ Format.printf "Editing %s@.@." name;
19781978+ Format.printf "Worktrees created:@.";
19791979+ Format.printf " patches: %s (make changes here)@." patches_path;
19801980+ Format.printf " vendor: %s (original for reference)@." vendor_path;
19811981+ Format.printf "@.When done: unpac git done %s@." name
19821982+ in
19831983+ let info = Cmd.info "edit" ~doc in
19841984+ Cmd.v info Term.(const run $ logging_term $ name_arg)
19851985+19861986+(* Git done command *)
19871987+let git_done_cmd =
19881988+ let doc = "Close a repository's patches and vendor worktrees." in
19891989+ let name_arg =
19901990+ let doc = "Repository name." in
19911991+ Arg.(required & pos 0 (some string) None & info [] ~docv:"NAME" ~doc)
19921992+ in
19931993+ let run () name =
19941994+ with_root @@ fun ~env:_ ~fs:_ ~proc_mgr ~root ->
19951995+ let patches_kind = Unpac.Worktree.Git_patches name in
19961996+ let vendor_kind = Unpac.Worktree.Git_vendor name in
19971997+ if not (Unpac.Worktree.exists root patches_kind) then begin
19981998+ Format.eprintf "No editing session for '%s'@." name;
19991999+ exit 1
20002000+ end;
20012001+ let wt_path = Unpac.Worktree.path root patches_kind in
20022002+ let status = Unpac.Git.run_exn ~proc_mgr ~cwd:wt_path ["status"; "--porcelain"] in
20032003+ if String.trim status <> "" then begin
20042004+ Format.eprintf "Warning: uncommitted changes in %s@." name;
20052005+ Format.eprintf "Commit or discard them before closing.@.";
20062006+ exit 1
20072007+ end;
20082008+ Unpac.Worktree.remove ~proc_mgr root patches_kind;
20092009+ if Unpac.Worktree.exists root vendor_kind then
20102010+ Unpac.Worktree.remove ~proc_mgr root vendor_kind;
20112011+ Format.printf "Closed editing session for %s@." name
20122012+ in
20132013+ let info = Cmd.info "done" ~doc in
20142014+ Cmd.v info Term.(const run $ logging_term $ name_arg)
20152015+20162016+(* Git remove command *)
20172017+let git_remove_cmd =
20182018+ let doc = "Remove a vendored git repository." in
20192019+ let name_arg =
20202020+ let doc = "Repository name to remove." in
20212021+ Arg.(required & pos 0 (some string) None & info [] ~docv:"NAME" ~doc)
20222022+ in
20232023+ let run () name =
20242024+ with_root @@ fun ~env:_ ~fs:_ ~proc_mgr ~root ->
20252025+ let repos = Unpac.Git_backend.list_repos ~proc_mgr ~root in
20262026+ if not (List.mem name repos) then begin
20272027+ Format.eprintf "Repository '%s' is not vendored@." name;
20282028+ exit 1
20292029+ end;
20302030+ Unpac.Git_backend.remove_repo ~proc_mgr ~root name;
20312031+ let config = load_config root in
20322032+ let config' = Unpac.Config.remove_git_repo config name in
20332033+ save_config ~proc_mgr root config' (Printf.sprintf "Remove git repo %s" name);
20342034+ Format.printf "Removed %s@." name
20352035+ in
20362036+ let info = Cmd.info "remove" ~doc in
20372037+ Cmd.v info Term.(const run $ logging_term $ name_arg)
20382038+20392039+(* Git command group *)
20402040+let git_cmd =
20412041+ let doc = "Git repository vendoring commands." in
20422042+ let man = [
20432043+ `S Manpage.s_description;
20442044+ `P "Vendor arbitrary git repositories with full history preservation. \
20452045+ Uses the three-tier branch model:";
20462046+ `I ("git/upstream/<name>", "Tracks the original repository state");
20472047+ `I ("git/vendor/<name>", "Clean snapshot used as merge base");
20482048+ `I ("git/patches/<name>", "Local modifications on top of vendor");
20492049+ `S "REQUIREMENTS";
20502050+ `P "git-filter-repo must be installed and in PATH. Install with:";
20512051+ `Pre " curl -o ~/.local/bin/git-filter-repo \\
20522052+ https://raw.githubusercontent.com/newren/git-filter-repo/refs/heads/main/git-filter-repo
20532053+ chmod +x ~/.local/bin/git-filter-repo";
20542054+ `S "TYPICAL WORKFLOW";
20552055+ `P "1. Vendor a git repository:";
20562056+ `Pre " unpac git add https://github.com/owner/repo.git";
20572057+ `P "2. Optionally extract only a subdirectory:";
20582058+ `Pre " unpac git add https://github.com/owner/monorepo.git --subdir lib/component";
20592059+ `P "3. Create a project and merge:";
20602060+ `Pre " unpac project new myapp
20612061+ unpac git merge repo myapp";
20622062+ `S "MAKING LOCAL CHANGES";
20632063+ `P "1. Open repository for editing:";
20642064+ `Pre " unpac git edit repo";
20652065+ `P "2. Make changes in vendor/git/repo-patches/";
20662066+ `P "3. Close the editing session:";
20672067+ `Pre " unpac git done repo";
20682068+ `S "COMMANDS";
20692069+ ] in
20702070+ let info = Cmd.info "git" ~doc ~man in
20712071+ Cmd.group info [
20722072+ git_add_cmd; git_list_cmd; git_update_cmd; git_merge_cmd;
20732073+ git_info_cmd; git_diff_cmd; git_edit_cmd; git_done_cmd; git_remove_cmd;
20742074+ ]
20752075+20762076+(* Log command *)
20772077+let log_cmd =
20782078+ let doc = "Show audit log of unpac operations." in
20792079+ let man = [
20802080+ `S Manpage.s_description;
20812081+ `P "Display the audit log of all unpac operations. The log contains \
20822082+ hierarchical records including nested git commands.";
20832083+ `S Manpage.s_examples;
20842084+ `P "View recent operations:";
20852085+ `Pre " unpac log -n 5";
20862086+ `P "Export as JSON:";
20872087+ `Pre " unpac log --json > ops.json";
20882088+ `P "Generate HTML report:";
20892089+ `Pre " unpac log --html -o report.html";
20902090+ ] in
20912091+ let json_flag =
20922092+ let doc = "Output raw JSON instead of text." in
20932093+ Arg.(value & flag & info ["json"] ~doc)
20942094+ in
20952095+ let html_flag =
20962096+ let doc = "Generate HTML report." in
20972097+ Arg.(value & flag & info ["html"] ~doc)
20982098+ in
20992099+ let output_file =
21002100+ let doc = "Output file (defaults to stdout)." in
21012101+ Arg.(value & opt (some string) None & info ["o"; "output"] ~docv:"FILE" ~doc)
21022102+ in
21032103+ let last_n =
21042104+ let doc = "Show only the last N operations." in
21052105+ Arg.(value & opt (some int) None & info ["n"; "last"] ~docv:"N" ~doc)
21062106+ in
21072107+ let run () json html output last_n_opt =
21082108+ with_root @@ fun ~env:_ ~fs:_ ~proc_mgr:_ ~root ->
21092109+ let log_path = Filename.concat (snd (Unpac.Worktree.path root Unpac.Worktree.Main))
21102110+ Unpac.Audit.default_log_file in
21112111+ match Unpac.Audit.load log_path with
21122112+ | Error msg ->
21132113+ Format.eprintf "Failed to load audit log: %s@." msg;
21142114+ exit 1
21152115+ | Ok log ->
21162116+ let log = match last_n_opt with
21172117+ | None -> log
21182118+ | Some n ->
21192119+ let entries = List.filteri (fun i _ -> i < n) log.entries in
21202120+ { log with entries }
21212121+ in
21222122+ if html then begin
21232123+ let html_content = Unpac.Audit.to_html log in
21242124+ match output with
21252125+ | Some path ->
21262126+ let oc = open_out path in
21272127+ output_string oc html_content;
21282128+ close_out oc;
21292129+ Format.printf "HTML report written to %s@." path
21302130+ | None -> print_string html_content
21312131+ end else if json then begin
21322132+ match Jsont_bytesrw.encode_string ~format:Jsont.Indent Unpac.Audit.log_jsont log with
21332133+ | Ok s -> print_string s; print_newline ()
21342134+ | Error e -> Format.eprintf "Failed to encode JSON: %s@." e; exit 1
21352135+ end else begin
21362136+ if log.entries = [] then
21372137+ Format.printf "No operations recorded.@."
21382138+ else
21392139+ Format.printf "%a" Unpac.Audit.pp_log log
21402140+ end
21412141+ in
21422142+ let info = Cmd.info "log" ~doc ~man in
21432143+ Cmd.v info Term.(const run $ logging_term $ json_flag $ html_flag $ output_file $ last_n)
21442144+21452145+(* Push command - push all unpac branches to a remote *)
21462146+let push_cmd =
21472147+ let doc = "Push all unpac branches to a remote." in
21482148+ let remote_arg =
21492149+ let doc = "Remote name (e.g., origin)." in
21502150+ Arg.(required & pos 0 (some string) None & info [] ~docv:"REMOTE" ~doc)
21512151+ in
21522152+ let force_arg =
21532153+ let doc = "Force push (use with caution)." in
21542154+ Arg.(value & flag & info ["f"; "force"] ~doc)
21552155+ in
21562156+ let dry_run_arg =
21572157+ let doc = "Show what would be pushed without actually pushing." in
21582158+ Arg.(value & flag & info ["n"; "dry-run"] ~doc)
21592159+ in
21602160+ let run () remote force dry_run =
21612161+ with_root @@ fun ~env:_ ~fs:_ ~proc_mgr ~root ->
21622162+ let git = Unpac.Worktree.git_dir root in
21632163+21642164+ (* Check if remote exists *)
21652165+ (match Unpac.Git.remote_url ~proc_mgr ~cwd:git remote with
21662166+ | None ->
21672167+ Format.eprintf "Remote '%s' not configured.@." remote;
21682168+ Format.eprintf "Add it with: git -C %s remote add %s <url>@." (snd git) remote;
21692169+ exit 1
21702170+ | Some _ -> ());
21712171+21722172+ (* Get all branches *)
21732173+ let all_branches = Unpac.Git.run_lines ~proc_mgr ~cwd:git ["branch"; "--format=%(refname:short)"] in
21742174+21752175+ (* Filter to only unpac-managed branches *)
21762176+ let unpac_branches = List.filter (fun b ->
21772177+ b = "main" ||
21782178+ String.starts_with ~prefix:"opam/" b ||
21792179+ String.starts_with ~prefix:"project/" b
21802180+ ) all_branches in
21812181+21822182+ if unpac_branches = [] then begin
21832183+ Format.printf "No branches to push@.";
21842184+ exit 0
21852185+ end;
21862186+21872187+ Format.printf "Branches to push to %s:@." remote;
21882188+ List.iter (fun b -> Format.printf " %s@." b) unpac_branches;
21892189+ Format.printf "@.";
21902190+21912191+ if dry_run then begin
21922192+ Format.printf "(dry run - no changes made)@."
21932193+ end else begin
21942194+ (* Build push command *)
21952195+ let force_flag = if force then ["--force"] else [] in
21962196+ let push_args = ["push"] @ force_flag @ [remote; "--"] @ unpac_branches in
21972197+21982198+ Format.printf "Pushing %d branches...@." (List.length unpac_branches);
21992199+ try
22002200+ Unpac.Git.run_exn ~proc_mgr ~cwd:git push_args |> ignore;
22012201+ Format.printf "Done.@."
22022202+ with e ->
22032203+ Format.eprintf "Push failed: %s@." (Printexc.to_string e);
22042204+ exit 1
22052205+ end
22062206+ in
22072207+ let info = Cmd.info "push" ~doc in
22082208+ Cmd.v info Term.(const run $ logging_term $ remote_arg $ force_arg $ dry_run_arg)
22092209+22102210+(* Vendor status command *)
22112211+let vendor_status_cmd =
22122212+ let doc = "Show status of all vendored packages." in
22132213+ let run () =
22142214+ with_root @@ fun ~env:_ ~fs:_ ~proc_mgr ~root ->
22152215+ let git = Unpac.Worktree.git_dir root in
22162216+22172217+ (* Get all vendored packages *)
22182218+ let packages = Unpac_opam.Opam.list_packages ~proc_mgr ~root in
22192219+ if packages = [] then begin
22202220+ Format.printf "No vendored packages.@.";
22212221+ exit 0
22222222+ end;
22232223+22242224+ (* Get all project branches *)
22252225+ let all_branches = Unpac.Git.run_lines ~proc_mgr ~cwd:git
22262226+ ["branch"; "--format=%(refname:short)"] in
22272227+ let project_branches = List.filter (fun b ->
22282228+ String.starts_with ~prefix:"project/" b
22292229+ ) all_branches in
22302230+ let project_names = List.map (fun b ->
22312231+ String.sub b 8 (String.length b - 8) (* Remove "project/" prefix *)
22322232+ ) project_branches in
22332233+22342234+ (* Print header *)
22352235+ Format.printf "%-25s %8s %s@." "Package" "Patches" "Merged into";
22362236+ Format.printf "%s@." (String.make 70 '-');
22372237+22382238+ (* For each package, get patch count and merge status *)
22392239+ List.iter (fun pkg ->
22402240+ let vendor_branch = Unpac_opam.Opam.vendor_branch pkg in
22412241+ let patches_branch = Unpac_opam.Opam.patches_branch pkg in
22422242+22432243+ (* Count commits on patches that aren't on vendor *)
22442244+ let patch_count =
22452245+ let output = Unpac.Git.run_exn ~proc_mgr ~cwd:git
22462246+ ["rev-list"; "--count"; vendor_branch ^ ".." ^ patches_branch] in
22472247+ int_of_string (String.trim output)
22482248+ in
22492249+22502250+ (* Check which projects contain this package's patches *)
22512251+ let merged_into = List.filter (fun proj_name ->
22522252+ let proj_branch = "project/" ^ proj_name in
22532253+ (* Check if patches branch is an ancestor of project branch *)
22542254+ match Unpac.Git.run ~proc_mgr ~cwd:git
22552255+ ["merge-base"; "--is-ancestor"; patches_branch; proj_branch] with
22562256+ | Ok _ -> true
22572257+ | Error _ -> false
22582258+ ) project_names in
22592259+22602260+ let merged_str = if merged_into = [] then "-"
22612261+ else String.concat ", " merged_into in
22622262+22632263+ Format.printf "%-25s %8d %s@." pkg patch_count merged_str
22642264+ ) packages;
22652265+22662266+ Format.printf "@.Total: %d packages@." (List.length packages)
22672267+ in
22682268+ let info = Cmd.info "status" ~doc in
22692269+ Cmd.v info Term.(const run $ logging_term)
22702270+22712271+(* Vendor command group *)
22722272+let vendor_cmd =
22732273+ let doc = "Vendor status and management commands." in
22742274+ let info = Cmd.info "vendor" ~doc in
22752275+ Cmd.group info [vendor_status_cmd]
22762276+22772277+(* Status command - comprehensive workspace status *)
22782278+let status_cmd =
22792279+ let doc = "Show comprehensive workspace status." in
22802280+ let man = [
22812281+ `S Manpage.s_description;
22822282+ `P "Shows the overall state of the unpac workspace including:";
22832283+ `I ("Projects", "All project branches and their merge status");
22842284+ `I ("Opam packages", "Vendored packages, patch counts, and merge status");
22852285+ `I ("Git repos", "Vendored git repositories and their status");
22862286+ `I ("Worktrees", "Any active worktrees with uncommitted changes");
22872287+ `P "Also updates README.md in the main branch with status in markdown format.";
22882288+ `S Manpage.s_examples;
22892289+ `Pre " unpac status # Full status
22902290+ unpac status --short # Compact summary";
22912291+ ] in
22922292+ let short_flag =
22932293+ let doc = "Show compact summary only." in
22942294+ Arg.(value & flag & info ["s"; "short"] ~doc)
22952295+ in
22962296+ let no_readme_flag =
22972297+ let doc = "Don't update README.md." in
22982298+ Arg.(value & flag & info ["no-readme"] ~doc)
22992299+ in
23002300+ let verbose_flag =
23012301+ let doc = "Enable verbose/debug logging to help diagnose issues." in
23022302+ Arg.(value & flag & info ["v"; "verbose"] ~doc)
23032303+ in
23042304+ let run () short no_readme verbose =
23052305+ setup_logging ~verbose ();
23062306+ with_root @@ fun ~env:_ ~fs:_ ~proc_mgr ~root ->
23072307+ Log.debug (fun m -> m "Starting status command...");
23082308+ let git = Unpac.Worktree.git_dir root in
23092309+ let main_wt = Unpac.Worktree.path root Unpac.Worktree.Main in
23102310+ Log.debug (fun m -> m "Git dir: %s" (snd git));
23112311+ Log.debug (fun m -> m "Main worktree: %s" (snd main_wt));
23122312+23132313+ (* Get all branches *)
23142314+ Log.debug (fun m -> m "Listing all branches...");
23152315+ let all_branches = Unpac.Git.run_lines ~proc_mgr ~cwd:git
23162316+ ["branch"; "--format=%(refname:short)"] in
23172317+ Log.debug (fun m -> m "Found %d branches" (List.length all_branches));
23182318+23192319+ (* Categorize branches *)
23202320+ Log.debug (fun m -> m "Categorizing branches...");
23212321+ let project_branches = List.filter (fun b ->
23222322+ String.starts_with ~prefix:"project/" b
23232323+ ) all_branches in
23242324+ Log.debug (fun m -> m "Found %d project branches" (List.length project_branches));
23252325+23262326+ Log.debug (fun m -> m "Listing opam packages...");
23272327+ let opam_packages = Unpac_opam.Opam.list_packages ~proc_mgr ~root in
23282328+ Log.debug (fun m -> m "Found %d opam packages" (List.length opam_packages));
23292329+23302330+ Log.debug (fun m -> m "Listing git repos...");
23312331+ let git_repos = Unpac.Git_backend.list_repos ~proc_mgr ~root in
23322332+ Log.debug (fun m -> m "Found %d git repos" (List.length git_repos));
23332333+23342334+ (* Parallel map helper using Eio fibers *)
23352335+ let parallel_map f items =
23362336+ Log.debug (fun m -> m "Running %d operations in parallel..." (List.length items));
23372337+ Eio.Switch.run @@ fun sw ->
23382338+ let fibers = List.map (fun item ->
23392339+ Eio.Fiber.fork_promise ~sw (fun () -> f item)
23402340+ ) items in
23412341+ List.map Eio.Promise.await_exn fibers
23422342+ in
23432343+23442344+ (* Parallel commit count for all packages *)
23452345+ let commit_count_calls = ref 0 in
23462346+ let parallel_commit_counts pkgs vendor_fn patches_fn =
23472347+ Log.debug (fun m -> m "Counting commits for %d items in parallel..." (List.length pkgs));
23482348+ parallel_map (fun pkg ->
23492349+ let from_ref = vendor_fn pkg in
23502350+ let to_ref = patches_fn pkg in
23512351+ incr commit_count_calls;
23522352+ try
23532353+ let output = Unpac.Git.run_exn ~proc_mgr ~cwd:git
23542354+ ["rev-list"; "--count"; from_ref ^ ".." ^ to_ref] in
23552355+ (pkg, int_of_string (String.trim output))
23562356+ with _ -> (pkg, 0)
23572357+ ) pkgs
23582358+ in
23592359+23602360+ (* Helper to check if branch A is ancestor of B - single call *)
23612361+ let is_ancestor_calls = ref 0 in
23622362+ let is_ancestor a b =
23632363+ incr is_ancestor_calls;
23642364+ Log.debug (fun m -> m "is_ancestor #%d: %s in %s" !is_ancestor_calls a b);
23652365+ match Unpac.Git.run ~proc_mgr ~cwd:git
23662366+ ["merge-base"; "--is-ancestor"; a; b] with
23672367+ | Ok _ -> true
23682368+ | Error _ -> false
23692369+ in
23702370+23712371+ (* Parallel is_ancestor check for a list of (source, target) pairs *)
23722372+ let parallel_is_ancestor pairs =
23732373+ Log.debug (fun m -> m "Checking %d ancestry relations in parallel..." (List.length pairs));
23742374+ parallel_map (fun (a, b) ->
23752375+ incr is_ancestor_calls;
23762376+ match Unpac.Git.run ~proc_mgr ~cwd:git
23772377+ ["merge-base"; "--is-ancestor"; a; b] with
23782378+ | Ok _ -> (a, b, true)
23792379+ | Error _ -> (a, b, false)
23802380+ ) pairs
23812381+ in
23822382+23832383+ (* Check for uncommitted changes in a worktree *)
23842384+ let has_changes_calls = ref 0 in
23852385+ let has_changes wt_path =
23862386+ incr has_changes_calls;
23872387+ Log.debug (fun m -> m "has_changes #%d: %s" !has_changes_calls (snd wt_path));
23882388+ if Sys.file_exists (snd wt_path) then
23892389+ let status = Unpac.Git.run_exn ~proc_mgr ~cwd:wt_path ["status"; "--porcelain"] in
23902390+ String.trim status <> ""
23912391+ else false
23922392+ in
23932393+23942394+ (* Project names *)
23952395+ let project_names = List.map (fun b ->
23962396+ String.sub b 8 (String.length b - 8)
23972397+ ) project_branches in
23982398+ Log.debug (fun m -> m "Project names: %a" Fmt.(list ~sep:comma string) project_names);
23992399+24002400+ if short then begin
24012401+ (* Short summary *)
24022402+ Log.debug (fun m -> m "Generating short summary...");
24032403+ Format.printf "Workspace: %s@." (snd (Unpac.Worktree.git_dir root) |> Filename.dirname);
24042404+ Format.printf "Projects: %d | Opam: %d | Git: %d@."
24052405+ (List.length project_branches)
24062406+ (List.length opam_packages)
24072407+ (List.length git_repos);
24082408+24092409+ (* Count total patches - parallel *)
24102410+ Log.debug (fun m -> m "Counting opam patches (%d packages) in parallel..." (List.length opam_packages));
24112411+ let opam_patch_counts = parallel_commit_counts opam_packages
24122412+ Unpac_opam.Opam.vendor_branch Unpac_opam.Opam.patches_branch in
24132413+ let opam_patches = List.fold_left (fun acc (_, n) -> acc + n) 0 opam_patch_counts in
24142414+ Log.debug (fun m -> m "Counting git patches (%d repos) in parallel..." (List.length git_repos));
24152415+ let git_patch_counts = parallel_commit_counts git_repos
24162416+ Unpac.Git_backend.vendor_branch Unpac.Git_backend.patches_branch in
24172417+ let git_patches = List.fold_left (fun acc (_, n) -> acc + n) 0 git_patch_counts in
24182418+ if opam_patches + git_patches > 0 then
24192419+ Format.printf "Local patches: %d commits@." (opam_patches + git_patches);
24202420+24212421+ (* Check main for uncommitted *)
24222422+ Log.debug (fun m -> m "Checking main worktree for changes...");
24232423+ if has_changes main_wt then
24242424+ Format.printf "Warning: Uncommitted changes in main@.";
24252425+ Log.debug (fun m -> m "Short summary complete.")
24262426+ end else begin
24272427+ (* Full status *)
24282428+ Log.debug (fun m -> m "Generating full status...");
24292429+ Format.printf "=== Unpac Workspace Status ===@.@.";
24302430+24312431+ (* Main worktree status *)
24322432+ Log.debug (fun m -> m "Checking main worktree status...");
24332433+ Format.printf "Main worktree: %s@." (snd main_wt);
24342434+ if has_changes main_wt then
24352435+ Format.printf " @{<yellow>Warning: Uncommitted changes@}@."
24362436+ else
24372437+ Format.printf " Clean@.";
24382438+ Format.printf "@.";
24392439+24402440+ (* Projects - precompute all ancestry relationships in parallel *)
24412441+ Log.debug (fun m -> m "Processing %d projects..." (List.length project_names));
24422442+ Format.printf "=== Projects (%d) ===@." (List.length project_names);
24432443+ if project_names = [] then
24442444+ Format.printf " (none)@."
24452445+ else begin
24462446+ (* Build all ancestry pairs to check: (patches_branch, project_branch) *)
24472447+ let opam_pairs = List.concat_map (fun proj ->
24482448+ let proj_branch = "project/" ^ proj in
24492449+ List.map (fun pkg ->
24502450+ (Unpac_opam.Opam.patches_branch pkg, proj_branch)
24512451+ ) opam_packages
24522452+ ) project_names in
24532453+ let git_pairs = List.concat_map (fun proj ->
24542454+ let proj_branch = "project/" ^ proj in
24552455+ List.map (fun repo ->
24562456+ (Unpac.Git_backend.patches_branch repo, proj_branch)
24572457+ ) git_repos
24582458+ ) project_names in
24592459+24602460+ (* Check all ancestry in parallel *)
24612461+ Log.debug (fun m -> m "Checking %d ancestry relations in parallel..."
24622462+ (List.length opam_pairs + List.length git_pairs));
24632463+ let all_pairs = opam_pairs @ git_pairs in
24642464+ let ancestry_results = parallel_is_ancestor all_pairs in
24652465+24662466+ (* Build lookup table: (patches_branch, project_branch) -> is_ancestor *)
24672467+ let ancestry_table = Hashtbl.create (List.length ancestry_results) in
24682468+ List.iter (fun (a, b, result) ->
24692469+ Hashtbl.add ancestry_table (a, b) result
24702470+ ) ancestry_results;
24712471+24722472+ let is_ancestor_cached a b =
24732473+ try Hashtbl.find ancestry_table (a, b)
24742474+ with Not_found -> is_ancestor a b (* fallback *)
24752475+ in
24762476+24772477+ List.iteri (fun i proj ->
24782478+ Log.debug (fun m -> m " Project %d/%d: %s" (i+1) (List.length project_names) proj);
24792479+ let proj_branch = "project/" ^ proj in
24802480+ let proj_wt = Unpac.Worktree.path root (Unpac.Worktree.Project proj) in
24812481+ let wt_exists = Sys.file_exists (snd proj_wt) in
24822482+ let dirty = wt_exists && has_changes proj_wt in
24832483+24842484+ (* Count merged packages - use cached results *)
24852485+ let merged_opam = List.filter (fun pkg ->
24862486+ is_ancestor_cached (Unpac_opam.Opam.patches_branch pkg) proj_branch
24872487+ ) opam_packages in
24882488+ let merged_git = List.filter (fun repo ->
24892489+ is_ancestor_cached (Unpac.Git_backend.patches_branch repo) proj_branch
24902490+ ) git_repos in
24912491+24922492+ Format.printf " %s" proj;
24932493+ if wt_exists then Format.printf " [worktree]";
24942494+ if dirty then Format.printf " @{<yellow>*dirty*@}";
24952495+ Format.printf "@.";
24962496+ Format.printf " Merged: %d opam, %d git@."
24972497+ (List.length merged_opam) (List.length merged_git)
24982498+ ) project_names
24992499+ end;
25002500+ Format.printf "@.";
25012501+25022502+ (* Opam packages - parallel commit counts and cached ancestry *)
25032503+ Log.debug (fun m -> m "Processing %d opam packages..." (List.length opam_packages));
25042504+ Format.printf "=== Opam Packages (%d) ===@." (List.length opam_packages);
25052505+ if opam_packages = [] then
25062506+ Format.printf " (none)@."
25072507+ else begin
25082508+ (* Parallel commit counts *)
25092509+ let opam_counts = parallel_commit_counts opam_packages
25102510+ Unpac_opam.Opam.vendor_branch Unpac_opam.Opam.patches_branch in
25112511+ let opam_count_table = Hashtbl.create (List.length opam_counts) in
25122512+ List.iter (fun (pkg, count) -> Hashtbl.add opam_count_table pkg count) opam_counts;
25132513+25142514+ (* Build ancestry pairs for opam -> projects (reuse if possible) *)
25152515+ let opam_to_proj_pairs = List.concat_map (fun pkg ->
25162516+ let patches_branch = Unpac_opam.Opam.patches_branch pkg in
25172517+ List.map (fun proj -> (patches_branch, "project/" ^ proj)) project_names
25182518+ ) opam_packages in
25192519+ let opam_ancestry_results = parallel_is_ancestor opam_to_proj_pairs in
25202520+ let opam_ancestry_table = Hashtbl.create (List.length opam_ancestry_results) in
25212521+ List.iter (fun (a, b, result) ->
25222522+ Hashtbl.add opam_ancestry_table (a, b) result
25232523+ ) opam_ancestry_results;
25242524+25252525+ Format.printf " %-25s %8s %s@." "Package" "Patches" "Merged into";
25262526+ Format.printf " %s@." (String.make 60 '-');
25272527+ List.iteri (fun i pkg ->
25282528+ Log.debug (fun m -> m " Opam package %d/%d: %s" (i+1) (List.length opam_packages) pkg);
25292529+ let patches_branch = Unpac_opam.Opam.patches_branch pkg in
25302530+ let patch_count = try Hashtbl.find opam_count_table pkg with Not_found -> 0 in
25312531+25322532+ (* Check active worktrees *)
25332533+ let patches_wt = Unpac.Worktree.path root (Unpac.Worktree.Opam_patches pkg) in
25342534+ let has_wt = Sys.file_exists (snd patches_wt) in
25352535+ let dirty = has_wt && has_changes patches_wt in
25362536+25372537+ (* Check merged into which projects - use cached results *)
25382538+ let merged_into = List.filter (fun proj ->
25392539+ try Hashtbl.find opam_ancestry_table (patches_branch, "project/" ^ proj)
25402540+ with Not_found -> false
25412541+ ) project_names in
25422542+25432543+ let merged_str = if merged_into = [] then "-"
25442544+ else String.concat ", " merged_into in
25452545+25462546+ Format.printf " %-25s" pkg;
25472547+ if has_wt then Format.printf "*" else Format.printf " ";
25482548+ Format.printf "%7d %s" patch_count merged_str;
25492549+ if dirty then Format.printf " @{<yellow>(uncommitted)@}";
25502550+ Format.printf "@."
25512551+ ) opam_packages
25522552+ end;
25532553+ Format.printf "@.";
25542554+25552555+ (* Git repos - parallel commit counts and cached ancestry *)
25562556+ Log.debug (fun m -> m "Processing %d git repos..." (List.length git_repos));
25572557+ Format.printf "=== Git Repositories (%d) ===@." (List.length git_repos);
25582558+ if git_repos = [] then
25592559+ Format.printf " (none)@."
25602560+ else begin
25612561+ (* Parallel commit counts *)
25622562+ let git_counts = parallel_commit_counts git_repos
25632563+ Unpac.Git_backend.vendor_branch Unpac.Git_backend.patches_branch in
25642564+ let git_count_table = Hashtbl.create (List.length git_counts) in
25652565+ List.iter (fun (repo, count) -> Hashtbl.add git_count_table repo count) git_counts;
25662566+25672567+ (* Build ancestry pairs for git -> projects *)
25682568+ let git_to_proj_pairs = List.concat_map (fun repo ->
25692569+ let patches_branch = Unpac.Git_backend.patches_branch repo in
25702570+ List.map (fun proj -> (patches_branch, "project/" ^ proj)) project_names
25712571+ ) git_repos in
25722572+ let git_ancestry_results = parallel_is_ancestor git_to_proj_pairs in
25732573+ let git_ancestry_table = Hashtbl.create (List.length git_ancestry_results) in
25742574+ List.iter (fun (a, b, result) ->
25752575+ Hashtbl.add git_ancestry_table (a, b) result
25762576+ ) git_ancestry_results;
25772577+25782578+ Format.printf " %-25s %8s %s@." "Repository" "Patches" "Merged into";
25792579+ Format.printf " %s@." (String.make 60 '-');
25802580+ List.iteri (fun i repo ->
25812581+ Log.debug (fun m -> m " Git repo %d/%d: %s" (i+1) (List.length git_repos) repo);
25822582+ let patches_branch = Unpac.Git_backend.patches_branch repo in
25832583+ let patch_count = try Hashtbl.find git_count_table repo with Not_found -> 0 in
25842584+25852585+ let patches_wt = Unpac.Worktree.path root (Unpac.Worktree.Git_patches repo) in
25862586+ let has_wt = Sys.file_exists (snd patches_wt) in
25872587+ let dirty = has_wt && has_changes patches_wt in
25882588+25892589+ let merged_into = List.filter (fun proj ->
25902590+ try Hashtbl.find git_ancestry_table (patches_branch, "project/" ^ proj)
25912591+ with Not_found -> false
25922592+ ) project_names in
25932593+25942594+ let merged_str = if merged_into = [] then "-"
25952595+ else String.concat ", " merged_into in
25962596+25972597+ Format.printf " %-25s" repo;
25982598+ if has_wt then Format.printf "*" else Format.printf " ";
25992599+ Format.printf "%7d %s" patch_count merged_str;
26002600+ if dirty then Format.printf " @{<yellow>(uncommitted)@}";
26012601+ Format.printf "@."
26022602+ ) git_repos
26032603+ end;
26042604+ Format.printf "@.";
26052605+26062606+ (* Active worktrees summary *)
26072607+ let active_worktrees = ref [] in
26082608+ List.iter (fun pkg ->
26092609+ let wt = Unpac.Worktree.path root (Unpac.Worktree.Opam_patches pkg) in
26102610+ if Sys.file_exists (snd wt) then
26112611+ active_worktrees := ("opam/" ^ pkg ^ "-patches", has_changes wt) :: !active_worktrees
26122612+ ) opam_packages;
26132613+ List.iter (fun repo ->
26142614+ let wt = Unpac.Worktree.path root (Unpac.Worktree.Git_patches repo) in
26152615+ if Sys.file_exists (snd wt) then
26162616+ active_worktrees := ("git/" ^ repo ^ "-patches", has_changes wt) :: !active_worktrees
26172617+ ) git_repos;
26182618+26192619+ if !active_worktrees <> [] then begin
26202620+ Format.printf "=== Active Worktrees ===@.";
26212621+ List.iter (fun (name, dirty) ->
26222622+ Format.printf " %s" name;
26232623+ if dirty then Format.printf " @{<yellow>*uncommitted*@}";
26242624+ Format.printf "@."
26252625+ ) (List.rev !active_worktrees);
26262626+ Format.printf "@."
26272627+ end;
26282628+26292629+ (* Legend *)
26302630+ Format.printf "Legend: * = worktree active@.";
26312631+ Log.debug (fun m -> m "Full status output complete.")
26322632+ end;
26332633+26342634+ (* Generate README.md unless --no-readme *)
26352635+ if not no_readme then begin
26362636+ Log.debug (fun m -> m "Generating README.md...");
26372637+ let buf = Buffer.create 4096 in
26382638+ let add = Buffer.add_string buf in
26392639+ let addf fmt = Printf.ksprintf add fmt in
26402640+ let timestamp =
26412641+ let tm = Unix.localtime (Unix.time ()) in
26422642+ Printf.sprintf "%04d-%02d-%02d %02d:%02d:%02d"
26432643+ (tm.Unix.tm_year + 1900) (tm.Unix.tm_mon + 1) tm.Unix.tm_mday
26442644+ tm.Unix.tm_hour tm.Unix.tm_min tm.Unix.tm_sec
26452645+ in
26462646+26472647+ (* Get tangled.org base URL from origin remote *)
26482648+ Log.debug (fun m -> m "Getting origin remote URL...");
26492649+ let tangled_base =
26502650+ match Unpac.Git.remote_url ~proc_mgr ~cwd:git "origin" with
26512651+ | None ->
26522652+ Log.debug (fun m -> m "No origin remote found");
26532653+ None
26542654+ | Some url ->
26552655+ (* Parse git@git.recoil.org:user/repo or similar *)
26562656+ let url = String.trim url in
26572657+ (* Handle git@host:user/repo format *)
26582658+ if String.starts_with ~prefix:"git@" url then
26592659+ match String.index_opt url ':' with
26602660+ | None -> None
26612661+ | Some colon_pos ->
26622662+ let path = String.sub url (colon_pos + 1) (String.length url - colon_pos - 1) in
26632663+ (* Strip .git suffix if present *)
26642664+ let path = if String.ends_with ~suffix:".git" path then
26652665+ String.sub path 0 (String.length path - 4) else path in
26662666+ Some (Printf.sprintf "https://tangled.org/%s" path)
26672667+ else None
26682668+ in
26692669+26702670+ (* URL encode a branch name for tree URLs *)
26712671+ let url_encode s =
26722672+ let buf = Buffer.create (String.length s * 2) in
26732673+ String.iter (fun c ->
26742674+ match c with
26752675+ | '/' -> Buffer.add_string buf "%2F"
26762676+ | ' ' -> Buffer.add_string buf "%20"
26772677+ | c -> Buffer.add_char buf c
26782678+ ) s;
26792679+ Buffer.contents buf
26802680+ in
26812681+26822682+ (* Create a markdown link to a branch tree, or just the name if no base URL *)
26832683+ let branch_link name branch =
26842684+ match tangled_base with
26852685+ | None -> name
26862686+ | Some base -> Printf.sprintf "[%s](%s/tree/%s)" name base (url_encode branch)
26872687+ in
26882688+26892689+ add "# Unpac Workspace Status\n\n";
26902690+ addf "_Last updated: %s_\n\n" timestamp;
26912691+26922692+ (* Summary *)
26932693+ add "## Summary\n\n";
26942694+ addf "| Category | Count |\n";
26952695+ addf "|----------|-------|\n";
26962696+ addf "| Projects | %d |\n" (List.length project_names);
26972697+ addf "| Opam Packages | %d |\n" (List.length opam_packages);
26982698+ addf "| Git Repositories | %d |\n\n" (List.length git_repos);
26992699+27002700+ (* Projects section - parallel ancestry checks *)
27012701+ Log.debug (fun m -> m "README: Processing %d projects..." (List.length project_names));
27022702+ add "## Projects\n\n";
27032703+ if project_names = [] then
27042704+ add "_No projects created yet._\n\n"
27052705+ else begin
27062706+ (* Precompute all ancestry in parallel for README *)
27072707+ let readme_opam_pairs = List.concat_map (fun proj ->
27082708+ let proj_branch = "project/" ^ proj in
27092709+ List.map (fun pkg ->
27102710+ (Unpac_opam.Opam.patches_branch pkg, proj_branch)
27112711+ ) opam_packages
27122712+ ) project_names in
27132713+ let readme_git_pairs = List.concat_map (fun proj ->
27142714+ let proj_branch = "project/" ^ proj in
27152715+ List.map (fun repo ->
27162716+ (Unpac.Git_backend.patches_branch repo, proj_branch)
27172717+ ) git_repos
27182718+ ) project_names in
27192719+ Log.debug (fun m -> m "README: Checking %d ancestry relations in parallel..."
27202720+ (List.length readme_opam_pairs + List.length readme_git_pairs));
27212721+ let readme_ancestry_results = parallel_is_ancestor (readme_opam_pairs @ readme_git_pairs) in
27222722+ let readme_ancestry_table = Hashtbl.create (List.length readme_ancestry_results) in
27232723+ List.iter (fun (a, b, result) ->
27242724+ Hashtbl.add readme_ancestry_table (a, b) result
27252725+ ) readme_ancestry_results;
27262726+ let readme_is_ancestor a b =
27272727+ try Hashtbl.find readme_ancestry_table (a, b)
27282728+ with Not_found -> false
27292729+ in
27302730+27312731+ add "| Project | Opam Merged | Git Merged | Status |\n";
27322732+ add "|---------|-------------|------------|--------|\n";
27332733+ List.iteri (fun i proj ->
27342734+ Log.debug (fun m -> m "README: Project %d/%d: %s" (i+1) (List.length project_names) proj);
27352735+ let proj_branch = "project/" ^ proj in
27362736+ let proj_wt = Unpac.Worktree.path root (Unpac.Worktree.Project proj) in
27372737+ let wt_exists = Sys.file_exists (snd proj_wt) in
27382738+ let dirty = wt_exists && has_changes proj_wt in
27392739+27402740+ let merged_opam = List.filter (fun pkg ->
27412741+ readme_is_ancestor (Unpac_opam.Opam.patches_branch pkg) proj_branch
27422742+ ) opam_packages in
27432743+ let merged_git = List.filter (fun repo ->
27442744+ readme_is_ancestor (Unpac.Git_backend.patches_branch repo) proj_branch
27452745+ ) git_repos in
27462746+27472747+ let status =
27482748+ if dirty then "⚠️ uncommitted"
27492749+ else if wt_exists then "📂 worktree active"
27502750+ else "✓" in
27512751+ addf "| %s | %d | %d | %s |\n"
27522752+ (branch_link proj proj_branch) (List.length merged_opam) (List.length merged_git) status
27532753+ ) project_names;
27542754+ add "\n"
27552755+ end;
27562756+27572757+ (* Opam packages section - parallel *)
27582758+ Log.debug (fun m -> m "README: Processing %d opam packages..." (List.length opam_packages));
27592759+ add "## Opam Packages\n\n";
27602760+ if opam_packages = [] then
27612761+ add "_No opam packages vendored yet._\n\n"
27622762+ else begin
27632763+ (* Parallel commit counts *)
27642764+ let readme_opam_counts = parallel_commit_counts opam_packages
27652765+ Unpac_opam.Opam.vendor_branch Unpac_opam.Opam.patches_branch in
27662766+ let readme_opam_count_table = Hashtbl.create (List.length readme_opam_counts) in
27672767+ List.iter (fun (pkg, count) -> Hashtbl.add readme_opam_count_table pkg count) readme_opam_counts;
27682768+27692769+ (* Parallel ancestry for opam -> projects *)
27702770+ let readme_opam_to_proj = List.concat_map (fun pkg ->
27712771+ let patches_branch = Unpac_opam.Opam.patches_branch pkg in
27722772+ List.map (fun proj -> (patches_branch, "project/" ^ proj)) project_names
27732773+ ) opam_packages in
27742774+ let readme_opam_ancestry = parallel_is_ancestor readme_opam_to_proj in
27752775+ let readme_opam_anc_table = Hashtbl.create (List.length readme_opam_ancestry) in
27762776+ List.iter (fun (a, b, result) ->
27772777+ Hashtbl.add readme_opam_anc_table (a, b) result
27782778+ ) readme_opam_ancestry;
27792779+27802780+ add "| Package | Patches | Merged Into | Status |\n";
27812781+ add "|---------|---------|-------------|--------|\n";
27822782+ List.iteri (fun i pkg ->
27832783+ Log.debug (fun m -> m "README: Opam package %d/%d: %s" (i+1) (List.length opam_packages) pkg);
27842784+ let patches_branch = Unpac_opam.Opam.patches_branch pkg in
27852785+ let patch_count = try Hashtbl.find readme_opam_count_table pkg with Not_found -> 0 in
27862786+27872787+ let patches_wt = Unpac.Worktree.path root (Unpac.Worktree.Opam_patches pkg) in
27882788+ let has_wt = Sys.file_exists (snd patches_wt) in
27892789+ let dirty = has_wt && has_changes patches_wt in
27902790+27912791+ let merged_into = List.filter (fun proj ->
27922792+ try Hashtbl.find readme_opam_anc_table (patches_branch, "project/" ^ proj)
27932793+ with Not_found -> false
27942794+ ) project_names in
27952795+27962796+ let merged_str = if merged_into = [] then "-"
27972797+ else String.concat ", " (List.map (fun p -> branch_link p ("project/" ^ p)) merged_into) in
27982798+27992799+ let status =
28002800+ if dirty then "⚠️ uncommitted"
28012801+ else if has_wt then "📂 editing"
28022802+ else "✓" in
28032803+28042804+ addf "| %s | %d | %s | %s |\n" (branch_link pkg patches_branch) patch_count merged_str status
28052805+ ) opam_packages;
28062806+ add "\n"
28072807+ end;
28082808+28092809+ (* Git repositories section - parallel *)
28102810+ Log.debug (fun m -> m "README: Processing %d git repos..." (List.length git_repos));
28112811+ add "## Git Repositories\n\n";
28122812+ if git_repos = [] then
28132813+ add "_No git repositories vendored yet._\n\n"
28142814+ else begin
28152815+ (* Parallel commit counts *)
28162816+ let readme_git_counts = parallel_commit_counts git_repos
28172817+ Unpac.Git_backend.vendor_branch Unpac.Git_backend.patches_branch in
28182818+ let readme_git_count_table = Hashtbl.create (List.length readme_git_counts) in
28192819+ List.iter (fun (repo, count) -> Hashtbl.add readme_git_count_table repo count) readme_git_counts;
28202820+28212821+ (* Parallel ancestry for git -> projects *)
28222822+ let readme_git_to_proj = List.concat_map (fun repo ->
28232823+ let patches_branch = Unpac.Git_backend.patches_branch repo in
28242824+ List.map (fun proj -> (patches_branch, "project/" ^ proj)) project_names
28252825+ ) git_repos in
28262826+ let readme_git_ancestry = parallel_is_ancestor readme_git_to_proj in
28272827+ let readme_git_anc_table = Hashtbl.create (List.length readme_git_ancestry) in
28282828+ List.iter (fun (a, b, result) ->
28292829+ Hashtbl.add readme_git_anc_table (a, b) result
28302830+ ) readme_git_ancestry;
28312831+28322832+ add "| Repository | Patches | Merged Into | Status |\n";
28332833+ add "|------------|---------|-------------|--------|\n";
28342834+ List.iteri (fun i repo ->
28352835+ Log.debug (fun m -> m "README: Git repo %d/%d: %s" (i+1) (List.length git_repos) repo);
28362836+ let patches_branch = Unpac.Git_backend.patches_branch repo in
28372837+ let patch_count = try Hashtbl.find readme_git_count_table repo with Not_found -> 0 in
28382838+28392839+ let patches_wt = Unpac.Worktree.path root (Unpac.Worktree.Git_patches repo) in
28402840+ let has_wt = Sys.file_exists (snd patches_wt) in
28412841+ let dirty = has_wt && has_changes patches_wt in
28422842+28432843+ let merged_into = List.filter (fun proj ->
28442844+ try Hashtbl.find readme_git_anc_table (patches_branch, "project/" ^ proj)
28452845+ with Not_found -> false
28462846+ ) project_names in
28472847+28482848+ let merged_str = if merged_into = [] then "-"
28492849+ else String.concat ", " (List.map (fun p -> branch_link p ("project/" ^ p)) merged_into) in
28502850+28512851+ let status =
28522852+ if dirty then "⚠️ uncommitted"
28532853+ else if has_wt then "📂 editing"
28542854+ else "✓" in
28552855+28562856+ addf "| %s | %d | %s | %s |\n" (branch_link repo patches_branch) patch_count merged_str status
28572857+ ) git_repos;
28582858+ add "\n"
28592859+ end;
28602860+28612861+ (* Active worktrees *)
28622862+ let active_wts = ref [] in
28632863+ List.iter (fun pkg ->
28642864+ let wt = Unpac.Worktree.path root (Unpac.Worktree.Opam_patches pkg) in
28652865+ if Sys.file_exists (snd wt) then
28662866+ active_wts := (Printf.sprintf "vendor/opam/%s-patches" pkg, has_changes wt) :: !active_wts
28672867+ ) opam_packages;
28682868+ List.iter (fun repo ->
28692869+ let wt = Unpac.Worktree.path root (Unpac.Worktree.Git_patches repo) in
28702870+ if Sys.file_exists (snd wt) then
28712871+ active_wts := (Printf.sprintf "vendor/git/%s-patches" repo, has_changes wt) :: !active_wts
28722872+ ) git_repos;
28732873+28742874+ if !active_wts <> [] then begin
28752875+ add "## Active Worktrees\n\n";
28762876+ add "| Path | Status |\n";
28772877+ add "|------|--------|\n";
28782878+ List.iter (fun (name, dirty) ->
28792879+ let status = if dirty then "⚠️ uncommitted changes" else "✓ clean" in
28802880+ addf "| `%s` | %s |\n" name status
28812881+ ) (List.rev !active_wts);
28822882+ add "\n"
28832883+ end;
28842884+28852885+ (* Changes section from audit log *)
28862886+ Log.debug (fun m -> m "README: Generating Changes section from audit log...");
28872887+ let audit_path = Eio.Path.(main_wt / ".unpac-audit.json") |> snd in
28882888+ (match Unpac.Audit.load audit_path with
28892889+ | Error e ->
28902890+ Log.debug (fun m -> m "README: Could not load audit log: %s" e)
28912891+ | Ok audit_log ->
28922892+ (* Filter to significant events and take most recent *)
28932893+ let significant_ops = List.filter (fun (op : Unpac.Audit.operation) ->
28942894+ match op.operation_type with
28952895+ | Unpac.Audit.Project_new
28962896+ | Unpac.Audit.Project_promote
28972897+ | Unpac.Audit.Opam_add
28982898+ | Unpac.Audit.Git_add
28992899+ | Unpac.Audit.Init -> true
29002900+ | _ -> false
29012901+ ) audit_log.entries in
29022902+ (* Take most recent 20 events, reverse to show oldest first *)
29032903+ let recent_ops =
29042904+ significant_ops
29052905+ |> (fun l -> if List.length l > 20 then
29062906+ List.filteri (fun i _ -> i < 20) l
29072907+ else l)
29082908+ |> List.rev
29092909+ in
29102910+ if recent_ops <> [] then begin
29112911+ add "## Changes\n\n";
29122912+ add "| Date | Event | Details |\n";
29132913+ add "|------|-------|--------|\n";
29142914+ List.iter (fun (op : Unpac.Audit.operation) ->
29152915+ let tm = Unix.localtime op.timestamp in
29162916+ let date = Printf.sprintf "%04d-%02d-%02d"
29172917+ (tm.Unix.tm_year + 1900) (tm.Unix.tm_mon + 1) tm.Unix.tm_mday in
29182918+ let (event, details) = match op.operation_type, op.args with
29192919+ | Unpac.Audit.Init, _ ->
29202920+ ("Workspace initialized", "")
29212921+ | Unpac.Audit.Project_new, name :: _ ->
29222922+ ("Project created", Printf.sprintf "`%s`" name)
29232923+ | Unpac.Audit.Project_promote, name :: _ ->
29242924+ let backend = List.find_map (fun arg ->
29252925+ if String.starts_with ~prefix:"--backend" arg then None
29262926+ else match List.nth_opt op.args (1 + (List.length (List.filter ((=) arg) (List.filteri (fun i _ -> i = 0) op.args)))) with
29272927+ | _ -> None
29282928+ ) op.args in
29292929+ let backend_str = match backend with Some b -> b | None ->
29302930+ (* Try to find backend in args *)
29312931+ let rec find_backend = function
29322932+ | "--backend" :: b :: _ -> b
29332933+ | "-b" :: b :: _ -> b
29342934+ | _ :: rest -> find_backend rest
29352935+ | [] -> "opam"
29362936+ in find_backend op.args
29372937+ in
29382938+ ("Project promoted", Printf.sprintf "`%s` → %s vendor" name backend_str)
29392939+ | Unpac.Audit.Opam_add, pkgs ->
29402940+ let pkg_list = String.concat ", " (List.map (fun p -> Printf.sprintf "`%s`" p) pkgs) in
29412941+ ("Opam packages added", pkg_list)
29422942+ | Unpac.Audit.Git_add, name :: _ ->
29432943+ ("Git repo added", Printf.sprintf "`%s`" name)
29442944+ | _, args ->
29452945+ (Unpac.Audit.operation_type_to_string op.operation_type,
29462946+ String.concat " " args)
29472947+ in
29482948+ addf "| %s | %s | %s |\n" date event details
29492949+ ) recent_ops;
29502950+ add "\n"
29512951+ end);
29522952+29532953+ (* Footer *)
29542954+ add "---\n\n";
29552955+ add "_Generated by `unpac status`_\n";
29562956+29572957+ (* Write README.md *)
29582958+ Log.debug (fun m -> m "README: Checking if README.md needs update...");
29592959+ let readme_path = Filename.concat (snd main_wt) "README.md" in
29602960+ let content = Buffer.contents buf in
29612961+29622962+ (* Check if content changed *)
29632963+ let old_content =
29642964+ if Sys.file_exists readme_path then begin
29652965+ Log.debug (fun m -> m "README: Reading existing README.md...");
29662966+ let ic = open_in readme_path in
29672967+ let len = in_channel_length ic in
29682968+ let s = really_input_string ic len in
29692969+ close_in ic;
29702970+ Some s
29712971+ end else begin
29722972+ Log.debug (fun m -> m "README: No existing README.md");
29732973+ None
29742974+ end
29752975+ in
29762976+29772977+ (* Only write and commit if changed (ignoring timestamp line) *)
29782978+ let content_without_timestamp s =
29792979+ (* Remove the timestamp line for comparison *)
29802980+ Str.global_replace (Str.regexp "_Last updated:.*_") "" s
29812981+ in
29822982+ let changed = match old_content with
29832983+ | None -> true
29842984+ | Some old -> content_without_timestamp old <> content_without_timestamp content
29852985+ in
29862986+29872987+ if changed then begin
29882988+ Log.debug (fun m -> m "README: Writing updated README.md...");
29892989+ let oc = open_out readme_path in
29902990+ output_string oc content;
29912991+ close_out oc;
29922992+ Format.printf "@.README.md updated.@.";
29932993+ (* Git add and commit *)
29942994+ Log.debug (fun m -> m "README: Staging README.md...");
29952995+ Unpac.Git.run_exn ~proc_mgr ~cwd:main_wt ["add"; "README.md"] |> ignore;
29962996+ (try
29972997+ Log.debug (fun m -> m "README: Committing README.md...");
29982998+ Unpac.Git.run_exn ~proc_mgr ~cwd:main_wt
29992999+ ["commit"; "-m"; "Update workspace status in README.md"] |> ignore;
30003000+ Format.printf "Committed README.md changes.@."
30013001+ with _ ->
30023002+ (* Commit might fail if nothing staged (e.g., only timestamp changed) *)
30033003+ Log.debug (fun m -> m "README: Commit failed (likely nothing to commit)"))
30043004+ end else
30053005+ Log.debug (fun m -> m "README: No changes, skipping write");
30063006+ Log.debug (fun m -> m "Status command complete.")
30073007+ end
30083008+ in
30093009+ let info = Cmd.info "status" ~doc ~man in
30103010+ Cmd.v info Term.(const run $ const () $ short_flag $ no_readme_flag $ verbose_flag)
30113011+30123012+(* Monorepo export command *)
30133013+let monorepo_cmd =
30143014+ let doc = "Export a standalone buildable monorepo." in
30153015+ let man = [
30163016+ `S Manpage.s_description;
30173017+ `P "Creates a standalone directory containing all projects and their \
30183018+ vendored dependencies, suitable for building with dune. No git history \
30193019+ is included - only the current state of each branch.";
30203020+ `S "OUTPUT STRUCTURE";
30213021+ `Pre " output/
30223022+ ├── dune-project
30233023+ ├── dune
30243024+ ├── project1/
30253025+ │ ├── src/
30263026+ │ └── dune
30273027+ ├── project2/
30283028+ │ └── ...
30293029+ └── vendor/
30303030+ ├── opam/
30313031+ │ ├── pkg1/
30323032+ │ └── pkg2/
30333033+ └── git/
30343034+ └── repo1/";
30353035+ `S Manpage.s_examples;
30363036+ `P "Export all projects:";
30373037+ `Pre " unpac monorepo /path/to/output";
30383038+ `P "Export specific projects:";
30393039+ `Pre " unpac monorepo -p myapp -p mylib /path/to/output";
30403040+ `P "Export without opam packages:";
30413041+ `Pre " unpac monorepo --no-opam /path/to/output";
30423042+ ] in
30433043+ let output_arg =
30443044+ let doc = "Output directory for the monorepo." in
30453045+ Arg.(required & pos 0 (some string) None & info [] ~docv:"OUTPUT" ~doc)
30463046+ in
30473047+ let projects_arg =
30483048+ let doc = "Specific projects to include (can be repeated). Default: all projects." in
30493049+ Arg.(value & opt_all string [] & info ["p"; "project"] ~docv:"NAME" ~doc)
30503050+ in
30513051+ let no_opam_arg =
30523052+ let doc = "Exclude vendored opam packages." in
30533053+ Arg.(value & flag & info ["no-opam"] ~doc)
30543054+ in
30553055+ let no_git_arg =
30563056+ let doc = "Exclude vendored git repositories." in
30573057+ Arg.(value & flag & info ["no-git"] ~doc)
30583058+ in
30593059+ let run () output_dir projects no_opam no_git =
30603060+ with_root @@ fun ~env:_ ~fs:_ ~proc_mgr ~root ->
30613061+ let config : Unpac.Monorepo.export_config = {
30623062+ output_dir;
30633063+ projects = if projects = [] then None else Some projects;
30643064+ include_opam = not no_opam;
30653065+ include_git = not no_git;
30663066+ } in
30673067+ let result = Unpac.Monorepo.export ~proc_mgr ~root ~config in
30683068+ Format.printf "@.Monorepo exported to %s@." result.output_path;
30693069+ Format.printf "@.Contents:@.";
30703070+ Format.printf " Projects: %s@." (String.concat ", " result.projects_exported);
30713071+ if result.opam_packages <> [] then
30723072+ Format.printf " Opam packages: %d@." (List.length result.opam_packages);
30733073+ if result.git_repos <> [] then
30743074+ Format.printf " Git repos: %d@." (List.length result.git_repos);
30753075+ Format.printf "@.Build with:@.";
30763076+ Format.printf " cd %s && dune build@." output_dir;
30773077+ Format.printf " cd %s && dune build @doc@." output_dir
30783078+ in
30793079+ let info = Cmd.info "monorepo" ~doc ~man in
30803080+ Cmd.v info Term.(const run $ logging_term $ output_arg $ projects_arg $ no_opam_arg $ no_git_arg)
30813081+30823082+(* Main command *)
30833083+let main_cmd =
30843084+ let doc = "Multi-backend vendoring tool using git worktrees." in
30853085+ let man = [
30863086+ `S Manpage.s_description;
30873087+ `P "Unpac is a vendoring tool that maintains third-party dependencies \
30883088+ as git branches with full history. It uses git worktrees to provide \
30893089+ isolated views for editing, and a three-tier branch model \
30903090+ (upstream/vendor/patches) for conflict-free updates.";
30913091+ `S "VENDORING MODES";
30923092+ `I ("unpac opam", "Vendor OCaml packages from opam repositories with \
30933093+ dependency solving.");
30943094+ `I ("unpac git", "Vendor arbitrary git repositories directly by URL.");
30953095+ `S "THREE-TIER BRANCH MODEL";
30963096+ `P "Each vendored item has three branches:";
30973097+ `I ("upstream/*", "Tracks the original repository");
30983098+ `I ("vendor/*", "Clean snapshot used as merge base");
30993099+ `I ("patches/*", "Your local modifications");
31003100+ `S "QUICK START";
31013101+ `Pre " unpac init myproject && cd myproject
31023102+ unpac opam repo add default /path/to/opam-repository
31033103+ unpac opam config compiler 5.2.0
31043104+ unpac project new main
31053105+ unpac opam add mylib --solve
31063106+ unpac opam merge --all main";
31073107+ `S "COMMANDS";
31083108+ ] in
31093109+ let info = Cmd.info "unpac" ~version:"0.1.0" ~doc ~man in
31103110+ Cmd.group info [init_cmd; status_cmd; project_cmd; opam_cmd; git_cmd; vendor_cmd; push_cmd; log_cmd;
31113111+ export_cmd; export_set_remote_cmd; export_push_cmd; export_list_cmd; monorepo_cmd]
31123112+31133113+let () = exit (Cmd.eval main_cmd)
···11+(** Unpac Claude agent - ralph-loop style autonomous coding for workspace projects. *)
22+33+open Cmdliner
44+55+let setup_logging verbose =
66+ Fmt_tty.setup_std_outputs ();
77+ (* Normal mode: Warning level (suppress Claude lib's JSON INFO logs)
88+ Verbose mode: Debug level (show everything) *)
99+ let level = if verbose then Logs.Debug else Logs.Warning in
1010+ Logs.set_level (Some level);
1111+ Logs.set_reporter (Logs_fmt.reporter ())
1212+1313+let run_agent verbose web_port project workspace_path =
1414+ setup_logging verbose;
1515+ Eio_main.run @@ fun env ->
1616+ let config : Unpac_claude.Agent.config = {
1717+ verbose;
1818+ web_port;
1919+ max_iterations = 20;
2020+ project;
2121+ } in
2222+ Unpac_claude.Agent.run ~env ~config ~workspace_path ()
2323+2424+(* CLI *)
2525+let verbose_arg =
2626+ let doc = "Enable verbose logging." in
2727+ Arg.(value & flag & info ["v"; "verbose"] ~doc)
2828+2929+let web_port_arg =
3030+ let doc = "Enable web UI on this port. Shows live streaming events." in
3131+ Arg.(value & opt (some int) None & info ["web"] ~docv:"PORT" ~doc)
3232+3333+let project_arg =
3434+ let doc = "Specific project to work on. If not specified, runs all \
3535+ projects sequentially in random order." in
3636+ Arg.(value & opt (some string) None & info ["p"; "project"] ~docv:"NAME" ~doc)
3737+3838+let workspace_arg =
3939+ let doc = "Path to the unpac workspace. Required." in
4040+ Arg.(required & pos 0 (some string) None & info [] ~docv:"WORKSPACE" ~doc)
4141+4242+let cmd =
4343+ let doc = "Ralph-loop style Claude agent for unpac workspace projects" in
4444+ let man = [
4545+ `S Manpage.s_description;
4646+ `P "Runs an autonomous Claude agent using the ralph-loop pattern: \
4747+ the same prompt is fed each iteration, with state persisting in \
4848+ files. The agent works on projects until either:";
4949+ `I ("Iterations", "20 iterations have completed");
5050+ `I ("Completion", "The agent outputs the completion promise");
5151+ `S "RALPH-LOOP PATTERN";
5252+ `P "Unlike traditional agentic loops that vary prompts based on \
5353+ previous responses, ralph-loop feeds the SAME prompt every \
5454+ iteration. Claude's progress persists in files (STATUS.md, \
5555+ source code, git commits) which it reads on each iteration.";
5656+ `P "This creates a self-referential improvement loop where Claude \
5757+ sees its own previous work and continues from there.";
5858+ `S "COMPLETION PROMISE";
5959+ `P (Printf.sprintf "When all significant work is complete, Claude \
6060+ outputs exactly: %s" Unpac_claude.Agent.completion_promise);
6161+ `P "This signals the loop to stop early before 20 iterations.";
6262+ `S "MODEL";
6363+ `P "Always uses Claude Opus 4.5 for maximum capability.";
6464+ `S "WEB UI";
6565+ `P "Use --web PORT to enable a live web dashboard showing:";
6666+ `I ("Events", "Real-time streaming from the agent");
6767+ `I ("Tool calls", "Each tool invocation with input/output");
6868+ `I ("Iterations", "Current iteration progress");
6969+ `S Manpage.s_examples;
7070+ `P "Run agent on all projects (random order):";
7171+ `Pre " unpac-claude /path/to/workspace";
7272+ `P "Run agent on a specific project:";
7373+ `Pre " unpac-claude -p mylib /path/to/workspace";
7474+ `P "With web UI on port 8080:";
7575+ `Pre " unpac-claude --web 8080 /path/to/workspace";
7676+ `P "Verbose logging:";
7777+ `Pre " unpac-claude -v /path/to/workspace";
7878+ `S "WORKING DIRECTORY";
7979+ `P "State is maintained in <workspace>/.unpac-claude/<project>/ \
8080+ with a .claude subdirectory for ralph-loop state.";
8181+ `S "EXIT STATUS";
8282+ `P "Exits with 0 when all projects complete (either by iteration \
8383+ limit or completion promise). Can be interrupted with Ctrl+C.";
8484+ ] in
8585+ let info = Cmd.info "unpac-claude" ~version:"0.5.0" ~doc ~man in
8686+ Cmd.v info Term.(const run_agent $ verbose_arg $ web_port_arg $
8787+ project_arg $ workspace_arg)
8888+8989+let () = exit (Cmd.eval cmd)
···11+#!/usr/bin/env python3
22+33+"""
44+git-filter-repo filters git repositories, similar to git filter-branch, BFG
55+repo cleaner, and others. The basic idea is that it works by running
66+ git fast-export <options> | filter | git fast-import <options>
77+where this program not only launches the whole pipeline but also serves as
88+the 'filter' in the middle. It does a few additional things on top as well
99+in order to make it into a well-rounded filtering tool.
1010+1111+git-filter-repo can also be used as a library for more involved filtering
1212+operations; however:
1313+ ***** API BACKWARD COMPATIBILITY CAVEAT *****
1414+ Programs using git-filter-repo as a library can reach pretty far into its
1515+ internals, but I am not prepared to guarantee backward compatibility of
1616+ all APIs. I suspect changes will be rare, but I reserve the right to
1717+ change any API. Since it is assumed that repository filtering is
1818+ something one would do very rarely, and in particular that it's a
1919+ one-shot operation, this should not be a problem in practice for anyone.
2020+ However, if you want to re-use a program you have written that uses
2121+ git-filter-repo as a library (or makes use of one of its --*-callback
2222+ arguments), you should either make sure you are using the same version of
2323+ git and git-filter-repo, or make sure to re-test it.
2424+2525+ If there are particular pieces of the API you are concerned about, and
2626+ there is not already a testcase for it in t9391-lib-usage.sh or
2727+ t9392-python-callback.sh, please contribute a testcase. That will not
2828+ prevent me from changing the API, but it will allow you to look at the
2929+ history of a testcase to see whether and how the API changed.
3030+ ***** END API BACKWARD COMPATIBILITY CAVEAT *****
3131+"""
3232+3333+import argparse
3434+import collections
3535+import fnmatch
3636+import gettext
3737+import io
3838+import os
3939+import platform
4040+import re
4141+import shutil
4242+import subprocess
4343+import sys
4444+import time
4545+import textwrap
4646+4747+from datetime import tzinfo, timedelta, datetime
4848+4949+__all__ = ["Blob", "Reset", "FileChange", "Commit", "Tag", "Progress",
5050+ "Checkpoint", "FastExportParser", "ProgressWriter",
5151+ "string_to_date", "date_to_string",
5252+ "record_id_rename", "GitUtils", "FilteringOptions", "RepoFilter"]
5353+5454+# The globals to make visible to callbacks. They will see all our imports for
5555+# free, as well as our public API.
5656+public_globals = ["__builtins__", "argparse", "collections", "fnmatch",
5757+ "gettext", "io", "os", "platform", "re", "shutil",
5858+ "subprocess", "sys", "time", "textwrap", "tzinfo",
5959+ "timedelta", "datetime"] + __all__
6060+6161+deleted_hash = b'0'*40
6262+write_marks = True
6363+date_format_permissive = True
6464+6565+def gettext_poison(msg):
6666+ if "GIT_TEST_GETTEXT_POISON" in os.environ: # pragma: no cover
6767+ return "# GETTEXT POISON #"
6868+ return gettext.gettext(msg)
6969+7070+_ = gettext_poison
7171+7272+def setup_gettext():
7373+ TEXTDOMAIN="git-filter-repo"
7474+ podir = os.environ.get("GIT_TEXTDOMAINDIR") or "@@LOCALEDIR@@"
7575+ if not os.path.isdir(podir): # pragma: no cover
7676+ podir = None # Python has its own fallback; use that
7777+7878+ ## This looks like the most straightforward translation of the relevant
7979+ ## code in git.git:gettext.c and git.git:perl/Git/I18n.pm:
8080+ #import locale
8181+ #locale.setlocale(locale.LC_MESSAGES, "");
8282+ #locale.setlocale(locale.LC_TIME, "");
8383+ #locale.textdomain(TEXTDOMAIN);
8484+ #locale.bindtextdomain(TEXTDOMAIN, podir);
8585+ ## but the python docs suggest using the gettext module (which doesn't
8686+ ## have setlocale()) instead, so:
8787+ gettext.textdomain(TEXTDOMAIN);
8888+ gettext.bindtextdomain(TEXTDOMAIN, podir);
8989+9090+def _timedelta_to_seconds(delta):
9191+ """
9292+ Converts timedelta to seconds
9393+ """
9494+ offset = delta.days*86400 + delta.seconds + (delta.microseconds+0.0)/1000000
9595+ return round(offset)
9696+9797+class FixedTimeZone(tzinfo):
9898+ """
9999+ Fixed offset in minutes east from UTC.
100100+ """
101101+102102+ tz_re = re.compile(br'^([-+]?)(\d\d)(\d\d)$')
103103+104104+ def __init__(self, offset_string):
105105+ tzinfo.__init__(self)
106106+ sign, hh, mm = FixedTimeZone.tz_re.match(offset_string).groups()
107107+ factor = -1 if (sign and sign == b'-') else 1
108108+ self._offset = timedelta(minutes = factor*(60*int(hh) + int(mm)))
109109+ self._offset_string = offset_string
110110+111111+ def utcoffset(self, dt):
112112+ return self._offset
113113+114114+ def tzname(self, dt):
115115+ return self._offset_string
116116+117117+ def dst(self, dt):
118118+ return timedelta(0)
119119+120120+def string_to_date(datestring):
121121+ (unix_timestamp, tz_offset) = datestring.split()
122122+ return datetime.fromtimestamp(int(unix_timestamp),
123123+ FixedTimeZone(tz_offset))
124124+125125+def date_to_string(dateobj):
126126+ epoch = datetime.fromtimestamp(0, dateobj.tzinfo)
127127+ return(b'%d %s' % (int(_timedelta_to_seconds(dateobj - epoch)),
128128+ dateobj.tzinfo.tzname(0)))
129129+130130+def decode(bytestr):
131131+ 'Try to convert bytestr to utf-8 for outputting as an error message.'
132132+ return bytestr.decode('utf-8', 'backslashreplace')
133133+134134+def glob_to_regex(glob_bytestr):
135135+ 'Translate glob_bytestr into a regex on bytestrings'
136136+137137+ # fnmatch.translate is idiotic and won't accept bytestrings
138138+ if (decode(glob_bytestr).encode() != glob_bytestr): # pragma: no cover
139139+ raise SystemExit(_("Error: Cannot handle glob %s").format(glob_bytestr))
140140+141141+ # Create regex operating on string
142142+ regex = fnmatch.translate(decode(glob_bytestr))
143143+144144+ # FIXME: This is an ugly hack...
145145+ # fnmatch.translate tries to do multi-line matching and wants the glob to
146146+ # match up to the end of the input, which isn't relevant for us, so we
147147+ # have to modify the regex. fnmatch.translate has used different regex
148148+ # constructs to achieve this with different python versions, so we have
149149+ # to check for each of them and then fix it up. It would be much better
150150+ # if fnmatch.translate could just take some flags to allow us to specify
151151+ # what we want rather than employing this hackery, but since it
152152+ # doesn't...
153153+ if regex.endswith(r'\Z(?ms)'): # pragma: no cover
154154+ regex = regex[0:-7]
155155+ elif regex.startswith(r'(?s:') and regex.endswith(r')\Z'): # pragma: no cover
156156+ regex = regex[4:-3]
157157+ elif regex.startswith(r'(?s:') and regex.endswith(r')\z'): # pragma: no cover
158158+ # Yaay, python3.14 for senselessly duplicating \Z as \z...
159159+ regex = regex[4:-3]
160160+161161+ # Finally, convert back to regex operating on bytestr
162162+ return regex.encode()
163163+164164+class PathQuoting:
165165+ _unescape = {b'a': b'\a',
166166+ b'b': b'\b',
167167+ b'f': b'\f',
168168+ b'n': b'\n',
169169+ b'r': b'\r',
170170+ b't': b'\t',
171171+ b'v': b'\v',
172172+ b'"': b'"',
173173+ b'\\':b'\\'}
174174+ _unescape_re = re.compile(br'\\([a-z"\\]|[0-9]{3})')
175175+ _escape = [bytes([x]) for x in range(127)]+[
176176+ b'\\'+bytes(ord(c) for c in oct(x)[2:]) for x in range(127,256)]
177177+ _reverse = dict(map(reversed, _unescape.items()))
178178+ for x in _reverse:
179179+ _escape[ord(x)] = b'\\'+_reverse[x]
180180+ _special_chars = [len(x) > 1 for x in _escape]
181181+182182+ @staticmethod
183183+ def unescape_sequence(orig):
184184+ seq = orig.group(1)
185185+ return PathQuoting._unescape[seq] if len(seq) == 1 else bytes([int(seq, 8)])
186186+187187+ @staticmethod
188188+ def dequote(quoted_string):
189189+ if quoted_string.startswith(b'"'):
190190+ assert quoted_string.endswith(b'"')
191191+ return PathQuoting._unescape_re.sub(PathQuoting.unescape_sequence,
192192+ quoted_string[1:-1])
193193+ return quoted_string
194194+195195+ @staticmethod
196196+ def enquote(unquoted_string):
197197+ # Option 1: Quoting when fast-export would:
198198+ # pqsc = PathQuoting._special_chars
199199+ # if any(pqsc[x] for x in set(unquoted_string)):
200200+ # Option 2, perf hack: do minimal amount of quoting required by fast-import
201201+ if unquoted_string.startswith(b'"') or b'\n' in unquoted_string:
202202+ pqe = PathQuoting._escape
203203+ return b'"' + b''.join(pqe[x] for x in unquoted_string) + b'"'
204204+ return unquoted_string
205205+206206+class AncestryGraph(object):
207207+ """
208208+ A class that maintains a direct acycle graph of commits for the purpose of
209209+ determining if one commit is the ancestor of another.
210210+211211+ A note about identifiers in Commit objects:
212212+ * Commit objects have 2 identifiers: commit.old_id and commit.id, because:
213213+ * The original fast-export stream identified commits by an identifier.
214214+ This is often an integer, but is sometimes a hash (particularly when
215215+ --reference-excluded-parents is provided)
216216+ * The new fast-import stream we use may not use the same identifiers.
217217+ If new blobs or commits are inserted (such as lint-history does), then
218218+ the integer (or hash) are no longer valid.
219219+220220+ A note about identifiers in AncestryGraph objects, of which there are three:
221221+ * A given AncestryGraph is based on either commit.old_id or commit.id, but
222222+ not both. These are the keys for self.value.
223223+ * Using full hashes (occasionally) for children in self.graph felt
224224+ wasteful, so we use our own internal integer within self.graph.
225225+ self.value maps from commit {old_}id to our internal integer id.
226226+ * When working with commit.old_id, it is also sometimes useful to be able
227227+ to map these to the original hash, i.e. commit.original_id. So, we
228228+ also have self.git_hash for mapping from commit.old_id to git's commit
229229+ hash.
230230+ """
231231+232232+ def __init__(self):
233233+ # The next internal identifier we will use; increments with every commit
234234+ # added to the AncestryGraph
235235+ self.cur_value = 0
236236+237237+ # A mapping from the external identifers given to us to the simple integers
238238+ # we use in self.graph
239239+ self.value = {}
240240+241241+ # A tuple of (depth, list-of-ancestors). Values and keys in this graph are
242242+ # all integers from the (values of the) self.value dict. The depth of a
243243+ # commit is one more than the max depth of any of its ancestors.
244244+ self.graph = {}
245245+246246+ # A mapping from external identifier (i.e. from the keys of self.value) to
247247+ # the hash of the given commit. Only populated for graphs based on
248248+ # commit.old_id, since we won't know until later what the git_hash for
249249+ # graphs based on commit.id (since we have to wait for fast-import to
250250+ # create the commit and notify us of its hash; see _pending_renames).
251251+ # elsewhere
252252+ self.git_hash = {}
253253+254254+ # Reverse maps; only populated if needed. Caller responsible to check
255255+ # and ensure they are populated
256256+ self._reverse_value = {}
257257+ self._hash_to_id = {}
258258+259259+ # Cached results from previous calls to is_ancestor().
260260+ self._cached_is_ancestor = {}
261261+262262+ def record_external_commits(self, external_commits):
263263+ """
264264+ Record in graph that each commit in external_commits exists, and is
265265+ treated as a root commit with no parents.
266266+ """
267267+ for c in external_commits:
268268+ if c not in self.value:
269269+ self.cur_value += 1
270270+ self.value[c] = self.cur_value
271271+ self.graph[self.cur_value] = (1, [])
272272+ self.git_hash[c] = c
273273+274274+ def add_commit_and_parents(self, commit, parents, githash = None):
275275+ """
276276+ Record in graph that commit has the given parents (all identified by
277277+ fast export stream identifiers, usually integers but sometimes hashes).
278278+ parents _MUST_ have been first recorded. commit _MUST_ not have been
279279+ recorded yet. Also, record the mapping between commit and githash, if
280280+ githash is given.
281281+ """
282282+ assert all(p in self.value for p in parents)
283283+ assert commit not in self.value
284284+285285+ # Get values for commit and parents
286286+ self.cur_value += 1
287287+ self.value[commit] = self.cur_value
288288+ if githash:
289289+ self.git_hash[commit] = githash
290290+ graph_parents = [self.value[x] for x in parents]
291291+292292+ # Determine depth for commit, then insert the info into the graph
293293+ depth = 1
294294+ if parents:
295295+ depth += max(self.graph[p][0] for p in graph_parents)
296296+ self.graph[self.cur_value] = (depth, graph_parents)
297297+298298+ def record_hash(self, commit_id, githash):
299299+ '''
300300+ If a githash was not recorded for commit_id, when add_commit_and_parents
301301+ was called, add it now.
302302+ '''
303303+ assert commit_id in self.value
304304+ assert commit_id not in self.git_hash
305305+ self.git_hash[commit_id] = githash
306306+307307+ def _ensure_reverse_maps_populated(self):
308308+ if not self._hash_to_id:
309309+ assert not self._reverse_value
310310+ self._hash_to_id = {v: k for k, v in self.git_hash.items()}
311311+ self._reverse_value = {v: k for k, v in self.value.items()}
312312+313313+ def get_parent_hashes(self, commit_hash):
314314+ '''
315315+ Given a commit_hash, return its parents hashes
316316+ '''
317317+ #
318318+ # We have to map:
319319+ # commit hash -> fast export stream id -> graph id
320320+ # then lookup
321321+ # parent graph ids for given graph id
322322+ # then we need to map
323323+ # parent graph ids -> parent fast export ids -> parent commit hashes
324324+ #
325325+ self._ensure_reverse_maps_populated()
326326+ commit_fast_export_id = self._hash_to_id[commit_hash]
327327+ commit_graph_id = self.value[commit_fast_export_id]
328328+ parent_graph_ids = self.graph[commit_graph_id][1]
329329+ parent_fast_export_ids = [self._reverse_value[x] for x in parent_graph_ids]
330330+ parent_hashes = [self.git_hash[x] for x in parent_fast_export_ids]
331331+ return parent_hashes
332332+333333+ def map_to_hash(self, commit_id):
334334+ '''
335335+ Given a commit (by fast export stream id), return its hash
336336+ '''
337337+ return self.git_hash.get(commit_id, None)
338338+339339+ def is_ancestor(self, possible_ancestor, check):
340340+ """
341341+ Return whether possible_ancestor is an ancestor of check
342342+ """
343343+ a, b = self.value[possible_ancestor], self.value[check]
344344+ original_pair = (a,b)
345345+ a_depth = self.graph[a][0]
346346+ ancestors = [b]
347347+ visited = set()
348348+ while ancestors:
349349+ ancestor = ancestors.pop()
350350+ prev_pair = (a, ancestor)
351351+ if prev_pair in self._cached_is_ancestor:
352352+ if not self._cached_is_ancestor[prev_pair]:
353353+ continue
354354+ self._cached_is_ancestor[original_pair] = True
355355+ return True
356356+ if ancestor in visited:
357357+ continue
358358+ visited.add(ancestor)
359359+ depth, more_ancestors = self.graph[ancestor]
360360+ if ancestor == a:
361361+ self._cached_is_ancestor[original_pair] = True
362362+ return True
363363+ elif depth <= a_depth:
364364+ continue
365365+ ancestors.extend(more_ancestors)
366366+ self._cached_is_ancestor[original_pair] = False
367367+ return False
368368+369369+class MailmapInfo(object):
370370+ def __init__(self, filename):
371371+ self.changes = {}
372372+ self._parse_file(filename)
373373+374374+ def _parse_file(self, filename):
375375+ name_and_email_re = re.compile(br'(.*?)\s*<([^>]*)>\s*')
376376+ comment_re = re.compile(br'\s*#.*')
377377+ if not os.access(filename, os.R_OK):
378378+ raise SystemExit(_("Cannot read %s") % decode(filename))
379379+ with open(filename, 'br') as f:
380380+ count = 0
381381+ for line in f:
382382+ count += 1
383383+ err = "Unparseable mailmap file: line #{} is bad: {}".format(count, line)
384384+ # Remove comments
385385+ line = comment_re.sub(b'', line)
386386+ # Remove leading and trailing whitespace
387387+ line = line.strip()
388388+ if not line:
389389+ continue
390390+391391+ m = name_and_email_re.match(line)
392392+ if not m:
393393+ raise SystemExit(err)
394394+ proper_name, proper_email = m.groups()
395395+ if len(line) == m.end():
396396+ self.changes[(None, proper_email)] = (proper_name, proper_email)
397397+ continue
398398+ rest = line[m.end():]
399399+ m = name_and_email_re.match(rest)
400400+ if m:
401401+ commit_name, commit_email = m.groups()
402402+ if len(rest) != m.end():
403403+ raise SystemExit(err)
404404+ else:
405405+ commit_name, commit_email = rest, None
406406+ self.changes[(commit_name, commit_email)] = (proper_name, proper_email)
407407+408408+ def translate(self, name, email):
409409+ ''' Given a name and email, return the expected new name and email from the
410410+ mailmap if there is a translation rule for it, otherwise just return
411411+ the given name and email.'''
412412+ for old, new in self.changes.items():
413413+ old_name, old_email = old
414414+ new_name, new_email = new
415415+ if (old_email is None or email.lower() == old_email.lower()) and (
416416+ name == old_name or not old_name):
417417+ return (new_name or name, new_email or email)
418418+ return (name, email)
419419+420420+class ProgressWriter(object):
421421+ def __init__(self):
422422+ self._last_progress_update = time.time()
423423+ self._last_message = None
424424+425425+ def show(self, msg):
426426+ self._last_message = msg
427427+ now = time.time()
428428+ if now - self._last_progress_update > .1:
429429+ self._last_progress_update = now
430430+ sys.stdout.write("\r{}".format(msg))
431431+ sys.stdout.flush()
432432+433433+ def finish(self):
434434+ self._last_progress_update = 0
435435+ if self._last_message:
436436+ self.show(self._last_message)
437437+ sys.stdout.write("\n")
438438+439439+class _IDs(object):
440440+ """
441441+ A class that maintains the 'name domain' of all the 'marks' (short int
442442+ id for a blob/commit git object). There are two reasons this mechanism
443443+ is necessary:
444444+ (1) the output text of fast-export may refer to an object using a different
445445+ mark than the mark that was assigned to that object using IDS.new().
446446+ (This class allows you to translate the fast-export marks, "old" to
447447+ the marks assigned from IDS.new(), "new").
448448+ (2) when we prune a commit, its "old" id becomes invalid. Any commits
449449+ which had that commit as a parent needs to use the nearest unpruned
450450+ ancestor as its parent instead.
451451+452452+ Note that for purpose (1) above, this typically comes about because the user
453453+ manually creates Blob or Commit objects (for insertion into the stream).
454454+ It could also come about if we attempt to read the data from two different
455455+ repositories and trying to combine the data (git fast-export will number ids
456456+ from 1...n, and having two 1's, two 2's, two 3's, causes issues; granted, we
457457+ this scheme doesn't handle the two streams perfectly either, but if the first
458458+ fast export stream is entirely processed and handled before the second stream
459459+ is started, this mechanism may be sufficient to handle it).
460460+ """
461461+462462+ def __init__(self):
463463+ """
464464+ Init
465465+ """
466466+ # The id for the next created blob/commit object
467467+ self._next_id = 1
468468+469469+ # A map of old-ids to new-ids (1:1 map)
470470+ self._translation = {}
471471+472472+ # A map of new-ids to every old-id that points to the new-id (1:N map)
473473+ self._reverse_translation = {}
474474+475475+ def has_renames(self):
476476+ """
477477+ Return whether there have been ids remapped to new values
478478+ """
479479+ return bool(self._translation)
480480+481481+ def new(self):
482482+ """
483483+ Should be called whenever a new blob or commit object is created. The
484484+ returned value should be used as the id/mark for that object.
485485+ """
486486+ rv = self._next_id
487487+ self._next_id += 1
488488+ return rv
489489+490490+ def record_rename(self, old_id, new_id, handle_transitivity = False):
491491+ """
492492+ Record that old_id is being renamed to new_id.
493493+ """
494494+ if old_id != new_id or old_id in self._translation:
495495+ # old_id -> new_id
496496+ self._translation[old_id] = new_id
497497+498498+ # Transitivity will be needed if new commits are being inserted mid-way
499499+ # through a branch.
500500+ if handle_transitivity:
501501+ # Anything that points to old_id should point to new_id
502502+ if old_id in self._reverse_translation:
503503+ for id_ in self._reverse_translation[old_id]:
504504+ self._translation[id_] = new_id
505505+506506+ # Record that new_id is pointed to by old_id
507507+ if new_id not in self._reverse_translation:
508508+ self._reverse_translation[new_id] = []
509509+ self._reverse_translation[new_id].append(old_id)
510510+511511+ def translate(self, old_id):
512512+ """
513513+ If old_id has been mapped to an alternate id, return the alternate id.
514514+ """
515515+ if old_id in self._translation:
516516+ return self._translation[old_id]
517517+ else:
518518+ return old_id
519519+520520+ def __str__(self):
521521+ """
522522+ Convert IDs to string; used for debugging
523523+ """
524524+ rv = "Current count: %d\nTranslation:\n" % self._next_id
525525+ for k in sorted(self._translation):
526526+ rv += " %d -> %s\n" % (k, self._translation[k])
527527+528528+ rv += "Reverse translation:\n"
529529+ reverse_keys = list(self._reverse_translation.keys())
530530+ if None in reverse_keys: # pragma: no cover
531531+ reverse_keys.remove(None)
532532+ reverse_keys = sorted(reverse_keys)
533533+ reverse_keys.append(None)
534534+ for k in reverse_keys:
535535+ rv += " " + str(k) + " -> " + str(self._reverse_translation[k]) + "\n"
536536+537537+ return rv
538538+539539+class _GitElement(object):
540540+ """
541541+ The base class for all git elements that we create.
542542+ """
543543+544544+ def __init__(self):
545545+ # A string that describes what type of Git element this is
546546+ self.type = None
547547+548548+ # A flag telling us if this Git element has been dumped
549549+ # (i.e. printed) or skipped. Typically elements that have been
550550+ # dumped or skipped will not be dumped again.
551551+ self.dumped = 0
552552+553553+ def dump(self, file_):
554554+ """
555555+ This version should never be called. Derived classes need to
556556+ override! We should note that subclasses should implement this
557557+ method such that the output would match the format produced by
558558+ fast-export.
559559+ """
560560+ raise SystemExit(_("Unimplemented function: %s") % type(self).__name__
561561+ +".dump()") # pragma: no cover
562562+563563+ def __bytes__(self):
564564+ """
565565+ Convert GitElement to bytestring; used for debugging
566566+ """
567567+ old_dumped = self.dumped
568568+ writeme = io.BytesIO()
569569+ self.dump(writeme)
570570+ output_lines = writeme.getvalue().splitlines()
571571+ writeme.close()
572572+ self.dumped = old_dumped
573573+ return b"%s:\n %s" % (type(self).__name__.encode(),
574574+ b"\n ".join(output_lines))
575575+576576+ def skip(self, new_id=None):
577577+ """
578578+ Ensures this element will not be written to output
579579+ """
580580+ self.dumped = 2
581581+582582+class _GitElementWithId(_GitElement):
583583+ """
584584+ The base class for Git elements that have IDs (commits and blobs)
585585+ """
586586+587587+ def __init__(self):
588588+ _GitElement.__init__(self)
589589+590590+ # The mark (short, portable id) for this element
591591+ self.id = _IDS.new()
592592+593593+ # The previous mark for this element
594594+ self.old_id = None
595595+596596+ def skip(self, new_id=None):
597597+ """
598598+ This element will no longer be automatically written to output. When a
599599+ commit gets skipped, it's ID will need to be translated to that of its
600600+ parent.
601601+ """
602602+ self.dumped = 2
603603+604604+ _IDS.record_rename(self.old_id or self.id, new_id)
605605+606606+class Blob(_GitElementWithId):
607607+ """
608608+ This class defines our representation of git blob elements (i.e. our
609609+ way of representing file contents).
610610+ """
611611+612612+ def __init__(self, data, original_id = None):
613613+ _GitElementWithId.__init__(self)
614614+615615+ # Denote that this is a blob
616616+ self.type = 'blob'
617617+618618+ # Record original id
619619+ self.original_id = original_id
620620+621621+ # Stores the blob's data
622622+ assert(type(data) == bytes)
623623+ self.data = data
624624+625625+ def dump(self, file_):
626626+ """
627627+ Write this blob element to a file.
628628+ """
629629+ self.dumped = 1
630630+ BLOB_HASH_TO_NEW_ID[self.original_id] = self.id
631631+ BLOB_NEW_ID_TO_HASH[self.id] = self.original_id
632632+633633+ file_.write(b'blob\n')
634634+ file_.write(b'mark :%d\n' % self.id)
635635+ file_.write(b'data %d\n%s' % (len(self.data), self.data))
636636+ file_.write(b'\n')
637637+638638+639639+class Reset(_GitElement):
640640+ """
641641+ This class defines our representation of git reset elements. A reset
642642+ event is the creation (or recreation) of a named branch, optionally
643643+ starting from a specific revision).
644644+ """
645645+646646+ def __init__(self, ref, from_ref = None):
647647+ _GitElement.__init__(self)
648648+649649+ # Denote that this is a reset
650650+ self.type = 'reset'
651651+652652+ # The name of the branch being (re)created
653653+ self.ref = ref
654654+655655+ # Some reference to the branch/commit we are resetting from
656656+ self.from_ref = from_ref
657657+658658+ def dump(self, file_):
659659+ """
660660+ Write this reset element to a file
661661+ """
662662+ self.dumped = 1
663663+664664+ file_.write(b'reset %s\n' % self.ref)
665665+ if self.from_ref:
666666+ if isinstance(self.from_ref, int):
667667+ file_.write(b'from :%d\n' % self.from_ref)
668668+ else:
669669+ file_.write(b'from %s\n' % self.from_ref)
670670+ file_.write(b'\n')
671671+672672+class FileChange(_GitElement):
673673+ """
674674+ This class defines our representation of file change elements. File change
675675+ elements are components within a Commit element.
676676+ """
677677+678678+ def __init__(self, type_, filename = None, id_ = None, mode = None):
679679+ _GitElement.__init__(self)
680680+681681+ # Denote the type of file-change (b'M' for modify, b'D' for delete, etc)
682682+ # We could
683683+ # assert(type(type_) == bytes)
684684+ # here but I don't just due to worries about performance overhead...
685685+ self.type = type_
686686+687687+ # Record the name of the file being changed
688688+ self.filename = filename
689689+690690+ # Record the mode (mode describes type of file entry (non-executable,
691691+ # executable, or symlink)).
692692+ self.mode = mode
693693+694694+ # blob_id is the id (mark) of the affected blob
695695+ self.blob_id = id_
696696+697697+ if type_ == b'DELETEALL':
698698+ assert filename is None and id_ is None and mode is None
699699+ self.filename = b'' # Just so PathQuoting.enquote doesn't die
700700+ else:
701701+ assert filename is not None
702702+703703+ if type_ == b'M':
704704+ assert id_ is not None and mode is not None
705705+ elif type_ == b'D':
706706+ assert id_ is None and mode is None
707707+ elif type_ == b'R': # pragma: no cover (now avoid fast-export renames)
708708+ assert mode is None
709709+ if id_ is None:
710710+ raise SystemExit(_("new name needed for rename of %s") % filename)
711711+ self.filename = (self.filename, id_)
712712+ self.blob_id = None
713713+714714+ def dump(self, file_):
715715+ """
716716+ Write this file-change element to a file
717717+ """
718718+ skipped_blob = (self.type == b'M' and self.blob_id is None)
719719+ if skipped_blob: return
720720+ self.dumped = 1
721721+722722+ quoted_filename = PathQuoting.enquote(self.filename)
723723+ if self.type == b'M' and isinstance(self.blob_id, int):
724724+ file_.write(b'M %s :%d %s\n' % (self.mode, self.blob_id, quoted_filename))
725725+ elif self.type == b'M':
726726+ file_.write(b'M %s %s %s\n' % (self.mode, self.blob_id, quoted_filename))
727727+ elif self.type == b'D':
728728+ file_.write(b'D %s\n' % quoted_filename)
729729+ elif self.type == b'DELETEALL':
730730+ file_.write(b'deleteall\n')
731731+ else:
732732+ raise SystemExit(_("Unhandled filechange type: %s") % self.type) # pragma: no cover
733733+734734+class Commit(_GitElementWithId):
735735+ """
736736+ This class defines our representation of commit elements. Commit elements
737737+ contain all the information associated with a commit.
738738+ """
739739+740740+ def __init__(self, branch,
741741+ author_name, author_email, author_date,
742742+ committer_name, committer_email, committer_date,
743743+ message,
744744+ file_changes,
745745+ parents,
746746+ original_id = None,
747747+ encoding = None, # encoding for message; None implies UTF-8
748748+ **kwargs):
749749+ _GitElementWithId.__init__(self)
750750+ self.old_id = self.id
751751+752752+ # Denote that this is a commit element
753753+ self.type = 'commit'
754754+755755+ # Record the affected branch
756756+ self.branch = branch
757757+758758+ # Record original id
759759+ self.original_id = original_id
760760+761761+ # Record author's name
762762+ self.author_name = author_name
763763+764764+ # Record author's email
765765+ self.author_email = author_email
766766+767767+ # Record date of authoring
768768+ self.author_date = author_date
769769+770770+ # Record committer's name
771771+ self.committer_name = committer_name
772772+773773+ # Record committer's email
774774+ self.committer_email = committer_email
775775+776776+ # Record date the commit was made
777777+ self.committer_date = committer_date
778778+779779+ # Record commit message and its encoding
780780+ self.encoding = encoding
781781+ self.message = message
782782+783783+ # List of file-changes associated with this commit. Note that file-changes
784784+ # are also represented as git elements
785785+ self.file_changes = file_changes
786786+787787+ self.parents = parents
788788+789789+ def dump(self, file_):
790790+ """
791791+ Write this commit element to a file.
792792+ """
793793+ self.dumped = 1
794794+795795+ # Make output to fast-import slightly easier for humans to read if the
796796+ # message has no trailing newline of its own; cosmetic, but a nice touch...
797797+ extra_newline = b'\n'
798798+ if self.message.endswith(b'\n') or not (self.parents or self.file_changes):
799799+ extra_newline = b''
800800+801801+ if not self.parents:
802802+ file_.write(b'reset %s\n' % self.branch)
803803+ file_.write((b'commit %s\n'
804804+ b'mark :%d\n'
805805+ b'author %s <%s> %s\n'
806806+ b'committer %s <%s> %s\n'
807807+ ) % (
808808+ self.branch, self.id,
809809+ self.author_name, self.author_email, self.author_date,
810810+ self.committer_name, self.committer_email, self.committer_date
811811+ ))
812812+ if self.encoding:
813813+ file_.write(b'encoding %s\n' % self.encoding)
814814+ file_.write(b'data %d\n%s%s' %
815815+ (len(self.message), self.message, extra_newline))
816816+ for i, parent in enumerate(self.parents):
817817+ file_.write(b'from ' if i==0 else b'merge ')
818818+ if isinstance(parent, int):
819819+ file_.write(b':%d\n' % parent)
820820+ else:
821821+ file_.write(b'%s\n' % parent)
822822+ for change in self.file_changes:
823823+ change.dump(file_)
824824+ if not self.parents and not self.file_changes:
825825+ # Workaround a bug in pre-git-2.22 versions of fast-import with
826826+ # the get-mark directive.
827827+ file_.write(b'\n')
828828+ file_.write(b'\n')
829829+830830+ def first_parent(self):
831831+ """
832832+ Return first parent commit
833833+ """
834834+ if self.parents:
835835+ return self.parents[0]
836836+ return None
837837+838838+ def skip(self, new_id=None):
839839+ _SKIPPED_COMMITS.add(self.old_id or self.id)
840840+ _GitElementWithId.skip(self, new_id)
841841+842842+class Tag(_GitElementWithId):
843843+ """
844844+ This class defines our representation of annotated tag elements.
845845+ """
846846+847847+ def __init__(self, ref, from_ref,
848848+ tagger_name, tagger_email, tagger_date, tag_msg,
849849+ original_id = None):
850850+ _GitElementWithId.__init__(self)
851851+ self.old_id = self.id
852852+853853+ # Denote that this is a tag element
854854+ self.type = 'tag'
855855+856856+ # Store the name of the tag
857857+ self.ref = ref
858858+859859+ # Store the entity being tagged (this should be a commit)
860860+ self.from_ref = from_ref
861861+862862+ # Record original id
863863+ self.original_id = original_id
864864+865865+ # Store the name of the tagger
866866+ self.tagger_name = tagger_name
867867+868868+ # Store the email of the tagger
869869+ self.tagger_email = tagger_email
870870+871871+ # Store the date
872872+ self.tagger_date = tagger_date
873873+874874+ # Store the tag message
875875+ self.message = tag_msg
876876+877877+ def dump(self, file_):
878878+ """
879879+ Write this tag element to a file
880880+ """
881881+882882+ self.dumped = 1
883883+884884+ file_.write(b'tag %s\n' % self.ref)
885885+ if (write_marks and self.id):
886886+ file_.write(b'mark :%d\n' % self.id)
887887+ markfmt = b'from :%d\n' if isinstance(self.from_ref, int) else b'from %s\n'
888888+ file_.write(markfmt % self.from_ref)
889889+ if self.tagger_name:
890890+ file_.write(b'tagger %s <%s> ' % (self.tagger_name, self.tagger_email))
891891+ file_.write(self.tagger_date)
892892+ file_.write(b'\n')
893893+ file_.write(b'data %d\n%s' % (len(self.message), self.message))
894894+ file_.write(b'\n')
895895+896896+class Progress(_GitElement):
897897+ """
898898+ This class defines our representation of progress elements. The progress
899899+ element only contains a progress message, which is printed by fast-import
900900+ when it processes the progress output.
901901+ """
902902+903903+ def __init__(self, message):
904904+ _GitElement.__init__(self)
905905+906906+ # Denote that this is a progress element
907907+ self.type = 'progress'
908908+909909+ # Store the progress message
910910+ self.message = message
911911+912912+ def dump(self, file_):
913913+ """
914914+ Write this progress element to a file
915915+ """
916916+ self.dumped = 1
917917+918918+ file_.write(b'progress %s\n' % self.message)
919919+ file_.write(b'\n')
920920+921921+class Checkpoint(_GitElement):
922922+ """
923923+ This class defines our representation of checkpoint elements. These
924924+ elements represent events which force fast-import to close the current
925925+ packfile, start a new one, and to save out all current branch refs, tags
926926+ and marks.
927927+ """
928928+929929+ def __init__(self):
930930+ _GitElement.__init__(self)
931931+932932+ # Denote that this is a checkpoint element
933933+ self.type = 'checkpoint'
934934+935935+ def dump(self, file_):
936936+ """
937937+ Write this checkpoint element to a file
938938+ """
939939+ self.dumped = 1
940940+941941+ file_.write(b'checkpoint\n')
942942+ file_.write(b'\n')
943943+944944+class LiteralCommand(_GitElement):
945945+ """
946946+ This class defines our representation of commands. The literal command
947947+ includes only a single line, and is not processed in any special way.
948948+ """
949949+950950+ def __init__(self, line):
951951+ _GitElement.__init__(self)
952952+953953+ # Denote that this is a literal element
954954+ self.type = 'literal'
955955+956956+ # Store the command
957957+ self.line = line
958958+959959+ def dump(self, file_):
960960+ """
961961+ Write this progress element to a file
962962+ """
963963+ self.dumped = 1
964964+965965+ file_.write(self.line)
966966+967967+class Alias(_GitElement):
968968+ """
969969+ This class defines our representation of fast-import alias elements. An
970970+ alias element is the setting of one mark to the same sha1sum as another,
971971+ usually because the newer mark corresponded to a pruned commit.
972972+ """
973973+974974+ def __init__(self, ref, to_ref):
975975+ _GitElement.__init__(self)
976976+ # Denote that this is a reset
977977+ self.type = 'alias'
978978+979979+ self.ref = ref
980980+ self.to_ref = to_ref
981981+982982+ def dump(self, file_):
983983+ """
984984+ Write this reset element to a file
985985+ """
986986+ self.dumped = 1
987987+988988+ file_.write(b'alias\nmark :%d\nto :%d\n\n' % (self.ref, self.to_ref))
989989+990990+class FastExportParser(object):
991991+ """
992992+ A class for parsing and handling the output from fast-export. This
993993+ class allows the user to register callbacks when various types of
994994+ data are encountered in the fast-export output. The basic idea is that,
995995+ FastExportParser takes fast-export output, creates the various objects
996996+ as it encounters them, the user gets to use/modify these objects via
997997+ callbacks, and finally FastExportParser outputs the modified objects
998998+ in fast-import format (presumably so they can be used to create a new
999999+ repo).
10001000+ """
10011001+10021002+ def __init__(self,
10031003+ tag_callback = None, commit_callback = None,
10041004+ blob_callback = None, progress_callback = None,
10051005+ reset_callback = None, checkpoint_callback = None,
10061006+ done_callback = None):
10071007+ # Members below simply store callback functions for the various git
10081008+ # elements
10091009+ self._tag_callback = tag_callback
10101010+ self._blob_callback = blob_callback
10111011+ self._reset_callback = reset_callback
10121012+ self._commit_callback = commit_callback
10131013+ self._progress_callback = progress_callback
10141014+ self._checkpoint_callback = checkpoint_callback
10151015+ self._done_callback = done_callback
10161016+10171017+ # Keep track of which refs appear from the export, and which make it to
10181018+ # the import (pruning of empty commits, renaming of refs, and creating
10191019+ # new manual objects and inserting them can cause these to differ).
10201020+ self._exported_refs = set()
10211021+ self._imported_refs = set()
10221022+10231023+ # A list of the branches we've seen, plus the last known commit they
10241024+ # pointed to. An entry in latest_*commit will be deleted if we get a
10251025+ # reset for that branch. These are used because of fast-import's weird
10261026+ # decision to allow having an implicit parent via naming the branch
10271027+ # instead of requiring branches to be specified via 'from' directives.
10281028+ self._latest_commit = {}
10291029+ self._latest_orig_commit = {}
10301030+10311031+ # A handle to the input source for the fast-export data
10321032+ self._input = None
10331033+10341034+ # A handle to the output file for the output we generate (we call dump
10351035+ # on many of the git elements we create).
10361036+ self._output = None
10371037+10381038+ # Stores the contents of the current line of input being parsed
10391039+ self._currentline = ''
10401040+10411041+ # Tracks LFS objects we have found
10421042+ self._lfs_object_tracker = None
10431043+10441044+ # Compile some regexes and cache those
10451045+ self._mark_re = re.compile(br'mark :(\d+)\n$')
10461046+ self._parent_regexes = {}
10471047+ parent_regex_rules = (br' :(\d+)\n$', br' ([0-9a-f]{40})\n')
10481048+ for parent_refname in (b'from', b'merge'):
10491049+ ans = [re.compile(parent_refname+x) for x in parent_regex_rules]
10501050+ self._parent_regexes[parent_refname] = ans
10511051+ self._quoted_string_re = re.compile(br'"(?:[^"\\]|\\.)*"')
10521052+ self._refline_regexes = {}
10531053+ for refline_name in (b'reset', b'commit', b'tag', b'progress'):
10541054+ self._refline_regexes[refline_name] = re.compile(refline_name+b' (.*)\n$')
10551055+ self._user_regexes = {}
10561056+ for user in (b'author', b'committer', b'tagger'):
10571057+ self._user_regexes[user] = re.compile(user + b' (.*?) <(.*?)> (.*)\n$')
10581058+10591059+ def _advance_currentline(self):
10601060+ """
10611061+ Grab the next line of input
10621062+ """
10631063+ self._currentline = self._input.readline()
10641064+10651065+ def _parse_optional_mark(self):
10661066+ """
10671067+ If the current line contains a mark, parse it and advance to the
10681068+ next line; return None otherwise
10691069+ """
10701070+ mark = None
10711071+ matches = self._mark_re.match(self._currentline)
10721072+ if matches:
10731073+ mark = int(matches.group(1))
10741074+ self._advance_currentline()
10751075+ return mark
10761076+10771077+ def _parse_optional_parent_ref(self, refname):
10781078+ """
10791079+ If the current line contains a reference to a parent commit, then
10801080+ parse it and advance the current line; otherwise return None. Note
10811081+ that the name of the reference ('from', 'merge') must match the
10821082+ refname arg.
10831083+ """
10841084+ orig_baseref, baseref = None, None
10851085+ rule, altrule = self._parent_regexes[refname]
10861086+ matches = rule.match(self._currentline)
10871087+ if matches:
10881088+ orig_baseref = int(matches.group(1))
10891089+ # We translate the parent commit mark to what it needs to be in
10901090+ # our mark namespace
10911091+ baseref = _IDS.translate(orig_baseref)
10921092+ self._advance_currentline()
10931093+ else:
10941094+ matches = altrule.match(self._currentline)
10951095+ if matches:
10961096+ orig_baseref = matches.group(1)
10971097+ baseref = orig_baseref
10981098+ self._advance_currentline()
10991099+ return orig_baseref, baseref
11001100+11011101+ def _parse_optional_filechange(self):
11021102+ """
11031103+ If the current line contains a file-change object, then parse it
11041104+ and advance the current line; otherwise return None. We only care
11051105+ about file changes of type b'M' and b'D' (these are the only types
11061106+ of file-changes that fast-export will provide).
11071107+ """
11081108+ filechange = None
11091109+ changetype = self._currentline[0:1]
11101110+ if changetype == b'M':
11111111+ (changetype, mode, idnum, path) = self._currentline.split(None, 3)
11121112+ if idnum[0:1] == b':':
11131113+ idnum = idnum[1:]
11141114+ path = path.rstrip(b'\n')
11151115+ # Check for LFS objects from sources before we might toss this filechange
11161116+ if mode != b'160000' and self._lfs_object_tracker:
11171117+ value = int(idnum) if len(idnum) != 40 else idnum
11181118+ self._lfs_object_tracker.check_file_change_data(value, True)
11191119+ # We translate the idnum to our id system
11201120+ if len(idnum) != 40:
11211121+ idnum = _IDS.translate( int(idnum) )
11221122+ if idnum is not None:
11231123+ if path.startswith(b'"'):
11241124+ path = PathQuoting.dequote(path)
11251125+ filechange = FileChange(b'M', path, idnum, mode)
11261126+ else:
11271127+ filechange = b'skipped'
11281128+ self._advance_currentline()
11291129+ elif changetype == b'D':
11301130+ (changetype, path) = self._currentline.split(None, 1)
11311131+ path = path.rstrip(b'\n')
11321132+ if path.startswith(b'"'):
11331133+ path = PathQuoting.dequote(path)
11341134+ filechange = FileChange(b'D', path)
11351135+ self._advance_currentline()
11361136+ elif changetype == b'R': # pragma: no cover (now avoid fast-export renames)
11371137+ rest = self._currentline[2:-1]
11381138+ if rest.startswith(b'"'):
11391139+ m = self._quoted_string_re.match(rest)
11401140+ if not m:
11411141+ raise SystemExit(_("Couldn't parse rename source"))
11421142+ orig = PathQuoting.dequote(m.group(0))
11431143+ new = rest[m.end()+1:]
11441144+ else:
11451145+ orig, new = rest.split(b' ', 1)
11461146+ if new.startswith(b'"'):
11471147+ new = PathQuoting.dequote(new)
11481148+ filechange = FileChange(b'R', orig, new)
11491149+ self._advance_currentline()
11501150+ return filechange
11511151+11521152+ def _parse_original_id(self):
11531153+ original_id = self._currentline[len(b'original-oid '):].rstrip()
11541154+ self._advance_currentline()
11551155+ return original_id
11561156+11571157+ def _parse_encoding(self):
11581158+ encoding = self._currentline[len(b'encoding '):].rstrip()
11591159+ self._advance_currentline()
11601160+ return encoding
11611161+11621162+ def _parse_ref_line(self, refname):
11631163+ """
11641164+ Parses string data (often a branch name) from current-line. The name of
11651165+ the string data must match the refname arg. The program will crash if
11661166+ current-line does not match, so current-line will always be advanced if
11671167+ this method returns.
11681168+ """
11691169+ matches = self._refline_regexes[refname].match(self._currentline)
11701170+ if not matches:
11711171+ raise SystemExit(_("Malformed %(refname)s line: '%(line)s'") %
11721172+ ({'refname': refname, 'line':self._currentline})
11731173+ ) # pragma: no cover
11741174+ ref = matches.group(1)
11751175+ self._advance_currentline()
11761176+ return ref
11771177+11781178+ def _parse_user(self, usertype):
11791179+ """
11801180+ Get user name, email, datestamp from current-line. Current-line will
11811181+ be advanced.
11821182+ """
11831183+ user_regex = self._user_regexes[usertype]
11841184+ (name, email, when) = user_regex.match(self._currentline).groups()
11851185+11861186+ self._advance_currentline()
11871187+ return (name, email, when)
11881188+11891189+ def _parse_data(self):
11901190+ """
11911191+ Reads data from _input. Current-line will be advanced until it is beyond
11921192+ the data.
11931193+ """
11941194+ fields = self._currentline.split()
11951195+ assert fields[0] == b'data'
11961196+ size = int(fields[1])
11971197+ data = self._input.read(size)
11981198+ self._advance_currentline()
11991199+ if self._currentline == b'\n':
12001200+ self._advance_currentline()
12011201+ return data
12021202+12031203+ def _parse_blob(self):
12041204+ """
12051205+ Parse input data into a Blob object. Once the Blob has been created, it
12061206+ will be handed off to the appropriate callbacks. Current-line will be
12071207+ advanced until it is beyond this blob's data. The Blob will be dumped
12081208+ to _output once everything else is done (unless it has been skipped by
12091209+ the callback).
12101210+ """
12111211+ # Parse the Blob
12121212+ self._advance_currentline()
12131213+ id_ = self._parse_optional_mark()
12141214+12151215+ original_id = None
12161216+ if self._currentline.startswith(b'original-oid'):
12171217+ original_id = self._parse_original_id();
12181218+12191219+ data = self._parse_data()
12201220+ if self._currentline == b'\n':
12211221+ self._advance_currentline()
12221222+12231223+ # Create the blob
12241224+ blob = Blob(data, original_id)
12251225+12261226+ # If fast-export text had a mark for this blob, need to make sure this
12271227+ # mark translates to the blob's true id.
12281228+ if id_:
12291229+ blob.old_id = id_
12301230+ _IDS.record_rename(id_, blob.id)
12311231+12321232+ # Check for LFS objects
12331233+ if self._lfs_object_tracker:
12341234+ self._lfs_object_tracker.check_blob_data(data, blob.old_id, True)
12351235+12361236+ # Call any user callback to allow them to use/modify the blob
12371237+ if self._blob_callback:
12381238+ self._blob_callback(blob)
12391239+12401240+ # Now print the resulting blob
12411241+ if not blob.dumped:
12421242+ blob.dump(self._output)
12431243+12441244+ def _parse_reset(self):
12451245+ """
12461246+ Parse input data into a Reset object. Once the Reset has been created,
12471247+ it will be handed off to the appropriate callbacks. Current-line will
12481248+ be advanced until it is beyond the reset data. The Reset will be dumped
12491249+ to _output once everything else is done (unless it has been skipped by
12501250+ the callback).
12511251+ """
12521252+ # Parse the Reset
12531253+ ref = self._parse_ref_line(b'reset')
12541254+ self._exported_refs.add(ref)
12551255+ ignoreme, from_ref = self._parse_optional_parent_ref(b'from')
12561256+ if self._currentline == b'\n':
12571257+ self._advance_currentline()
12581258+12591259+ # fast-export likes to print extraneous resets that serve no purpose.
12601260+ # While we could continue processing such resets, that is a waste of
12611261+ # resources. Also, we want to avoid recording that this ref was
12621262+ # seen in such cases, since this ref could be rewritten to nothing.
12631263+ if not from_ref:
12641264+ self._latest_commit.pop(ref, None)
12651265+ self._latest_orig_commit.pop(ref, None)
12661266+ return
12671267+12681268+ # Create the reset
12691269+ reset = Reset(ref, from_ref)
12701270+12711271+ # Call any user callback to allow them to modify the reset
12721272+ if self._reset_callback:
12731273+ self._reset_callback(reset)
12741274+12751275+ # Update metadata
12761276+ self._latest_commit[reset.ref] = reset.from_ref
12771277+ self._latest_orig_commit[reset.ref] = reset.from_ref
12781278+12791279+ # Now print the resulting reset
12801280+ if not reset.dumped:
12811281+ self._imported_refs.add(reset.ref)
12821282+ reset.dump(self._output)
12831283+12841284+ def _parse_commit(self):
12851285+ """
12861286+ Parse input data into a Commit object. Once the Commit has been created,
12871287+ it will be handed off to the appropriate callbacks. Current-line will
12881288+ be advanced until it is beyond the commit data. The Commit will be dumped
12891289+ to _output once everything else is done (unless it has been skipped by
12901290+ the callback OR the callback has removed all file-changes from the commit).
12911291+ """
12921292+ # Parse the Commit. This may look involved, but it's pretty simple; it only
12931293+ # looks bad because a commit object contains many pieces of data.
12941294+ branch = self._parse_ref_line(b'commit')
12951295+ self._exported_refs.add(branch)
12961296+ id_ = self._parse_optional_mark()
12971297+12981298+ original_id = None
12991299+ if self._currentline.startswith(b'original-oid'):
13001300+ original_id = self._parse_original_id();
13011301+13021302+ author_name = None
13031303+ author_email = None
13041304+ if self._currentline.startswith(b'author'):
13051305+ (author_name, author_email, author_date) = self._parse_user(b'author')
13061306+13071307+ (committer_name, committer_email, committer_date) = \
13081308+ self._parse_user(b'committer')
13091309+13101310+ if not author_name and not author_email:
13111311+ (author_name, author_email, author_date) = \
13121312+ (committer_name, committer_email, committer_date)
13131313+13141314+ encoding = None
13151315+ if self._currentline.startswith(b'encoding '):
13161316+ encoding = self._parse_encoding()
13171317+13181318+ commit_msg = self._parse_data()
13191319+13201320+ pinfo = [self._parse_optional_parent_ref(b'from')]
13211321+ # Due to empty pruning, we can have real 'from' and 'merge' lines that
13221322+ # due to commit rewriting map to a parent of None. We need to record
13231323+ # 'from' if its non-None, and we need to parse all 'merge' lines.
13241324+ while self._currentline.startswith(b'merge '):
13251325+ pinfo.append(self._parse_optional_parent_ref(b'merge'))
13261326+ orig_parents, parents = [list(tmp) for tmp in zip(*pinfo)]
13271327+13281328+ # No parents is oddly represented as [None] instead of [], due to the
13291329+ # special 'from' handling. Convert it here to a more canonical form.
13301330+ if parents == [None]:
13311331+ parents = []
13321332+ if orig_parents == [None]:
13331333+ orig_parents = []
13341334+13351335+ # fast-import format is kinda stupid in that it allows implicit parents
13361336+ # based on the branch name instead of requiring them to be specified by
13371337+ # 'from' directives. The only way to get no parent is by using a reset
13381338+ # directive first, which clears the latest_commit_for_this_branch tracking.
13391339+ if not orig_parents and self._latest_commit.get(branch):
13401340+ parents = [self._latest_commit[branch]]
13411341+ if not orig_parents and self._latest_orig_commit.get(branch):
13421342+ orig_parents = [self._latest_orig_commit[branch]]
13431343+13441344+ # Get the list of file changes
13451345+ file_changes = []
13461346+ file_change = self._parse_optional_filechange()
13471347+ had_file_changes = file_change is not None
13481348+ while file_change:
13491349+ if not (type(file_change) == bytes and file_change == b'skipped'):
13501350+ file_changes.append(file_change)
13511351+ file_change = self._parse_optional_filechange()
13521352+ if self._currentline == b'\n':
13531353+ self._advance_currentline()
13541354+13551355+ # Okay, now we can finally create the Commit object
13561356+ commit = Commit(branch,
13571357+ author_name, author_email, author_date,
13581358+ committer_name, committer_email, committer_date,
13591359+ commit_msg, file_changes, parents, original_id, encoding)
13601360+13611361+ # If fast-export text had a mark for this commit, need to make sure this
13621362+ # mark translates to the commit's true id.
13631363+ if id_:
13641364+ commit.old_id = id_
13651365+ _IDS.record_rename(id_, commit.id)
13661366+13671367+ # refs/notes/ put commit-message-related material in blobs, and name their
13681368+ # files according to the hash of other commits. That totally messes with
13691369+ # all normal callbacks; fast-export should really export these as different
13701370+ # kinds of objects. Until then, let's just pass these commits through as-is
13711371+ # and hope the blob callbacks don't mess things up.
13721372+ if commit.branch.startswith(b'refs/notes/'):
13731373+ self._imported_refs.add(commit.branch)
13741374+ commit.dump(self._output)
13751375+ return
13761376+13771377+ # Call any user callback to allow them to modify the commit
13781378+ aux_info = {'orig_parents': orig_parents,
13791379+ 'had_file_changes': had_file_changes}
13801380+ if self._commit_callback:
13811381+ self._commit_callback(commit, aux_info)
13821382+13831383+ # Now print the resulting commit, or if prunable skip it
13841384+ self._latest_orig_commit[branch] = commit.id
13851385+ if not (commit.old_id or commit.id) in _SKIPPED_COMMITS:
13861386+ self._latest_commit[branch] = commit.id
13871387+ if not commit.dumped:
13881388+ self._imported_refs.add(commit.branch)
13891389+ commit.dump(self._output)
13901390+13911391+ def _parse_tag(self):
13921392+ """
13931393+ Parse input data into a Tag object. Once the Tag has been created,
13941394+ it will be handed off to the appropriate callbacks. Current-line will
13951395+ be advanced until it is beyond the tag data. The Tag will be dumped
13961396+ to _output once everything else is done (unless it has been skipped by
13971397+ the callback).
13981398+ """
13991399+ # Parse the Tag
14001400+ tag = self._parse_ref_line(b'tag')
14011401+ self._exported_refs.add(b'refs/tags/'+tag)
14021402+ id_ = self._parse_optional_mark()
14031403+ ignoreme, from_ref = self._parse_optional_parent_ref(b'from')
14041404+14051405+ original_id = None
14061406+ if self._currentline.startswith(b'original-oid'):
14071407+ original_id = self._parse_original_id();
14081408+14091409+ tagger_name, tagger_email, tagger_date = None, None, None
14101410+ if self._currentline.startswith(b'tagger'):
14111411+ (tagger_name, tagger_email, tagger_date) = self._parse_user(b'tagger')
14121412+ tag_msg = self._parse_data()
14131413+ if self._currentline == b'\n':
14141414+ self._advance_currentline()
14151415+14161416+ # Create the tag
14171417+ tag = Tag(tag, from_ref,
14181418+ tagger_name, tagger_email, tagger_date, tag_msg,
14191419+ original_id)
14201420+14211421+ # If fast-export text had a mark for this tag, need to make sure this
14221422+ # mark translates to the tag's true id.
14231423+ if id_:
14241424+ tag.old_id = id_
14251425+ _IDS.record_rename(id_, tag.id)
14261426+14271427+ # Call any user callback to allow them to modify the tag
14281428+ if self._tag_callback:
14291429+ self._tag_callback(tag)
14301430+14311431+ # The tag might not point at anything that still exists (self.from_ref
14321432+ # will be None if the commit it pointed to and all its ancestors were
14331433+ # pruned due to being empty)
14341434+ if tag.from_ref:
14351435+ # Print out this tag's information
14361436+ if not tag.dumped:
14371437+ self._imported_refs.add(b'refs/tags/'+tag.ref)
14381438+ tag.dump(self._output)
14391439+ else:
14401440+ tag.skip()
14411441+14421442+ def _parse_progress(self):
14431443+ """
14441444+ Parse input data into a Progress object. Once the Progress has
14451445+ been created, it will be handed off to the appropriate
14461446+ callbacks. Current-line will be advanced until it is beyond the
14471447+ progress data. The Progress will be dumped to _output once
14481448+ everything else is done (unless it has been skipped by the callback).
14491449+ """
14501450+ # Parse the Progress
14511451+ message = self._parse_ref_line(b'progress')
14521452+ if self._currentline == b'\n':
14531453+ self._advance_currentline()
14541454+14551455+ # Create the progress message
14561456+ progress = Progress(message)
14571457+14581458+ # Call any user callback to allow them to modify the progress messsage
14591459+ if self._progress_callback:
14601460+ self._progress_callback(progress)
14611461+14621462+ # NOTE: By default, we do NOT print the progress message; git
14631463+ # fast-import would write it to fast_import_pipes which could mess with
14641464+ # our parsing of output from the 'ls' and 'get-mark' directives we send
14651465+ # to fast-import. If users want these messages, they need to process
14661466+ # and handle them in the appropriate callback above.
14671467+14681468+ def _parse_checkpoint(self):
14691469+ """
14701470+ Parse input data into a Checkpoint object. Once the Checkpoint has
14711471+ been created, it will be handed off to the appropriate
14721472+ callbacks. Current-line will be advanced until it is beyond the
14731473+ checkpoint data. The Checkpoint will be dumped to _output once
14741474+ everything else is done (unless it has been skipped by the callback).
14751475+ """
14761476+ # Parse the Checkpoint
14771477+ self._advance_currentline()
14781478+ if self._currentline == b'\n':
14791479+ self._advance_currentline()
14801480+14811481+ # Create the checkpoint
14821482+ checkpoint = Checkpoint()
14831483+14841484+ # Call any user callback to allow them to drop the checkpoint
14851485+ if self._checkpoint_callback:
14861486+ self._checkpoint_callback(checkpoint)
14871487+14881488+ # NOTE: By default, we do NOT print the checkpoint message; although it
14891489+ # we would only realistically get them with --stdin, the fact that we
14901490+ # are filtering makes me think the checkpointing is less likely to be
14911491+ # reasonable. In fact, I don't think it's necessary in general. If
14921492+ # users do want it, they should process it in the checkpoint_callback.
14931493+14941494+ def _parse_literal_command(self):
14951495+ """
14961496+ Parse literal command. Then just dump the line as is.
14971497+ """
14981498+ # Create the literal command object
14991499+ command = LiteralCommand(self._currentline)
15001500+ self._advance_currentline()
15011501+15021502+ # Now print the resulting literal command
15031503+ if not command.dumped:
15041504+ command.dump(self._output)
15051505+15061506+ def insert(self, obj):
15071507+ assert not obj.dumped
15081508+ obj.dump(self._output)
15091509+ if type(obj) == Commit:
15101510+ self._imported_refs.add(obj.branch)
15111511+ elif type(obj) in (Reset, Tag):
15121512+ self._imported_refs.add(obj.ref)
15131513+15141514+ def run(self, input, output):
15151515+ """
15161516+ This method filters fast export output.
15171517+ """
15181518+ # Set input. If no args provided, use stdin.
15191519+ self._input = input
15201520+ self._output = output
15211521+15221522+ # Run over the input and do the filtering
15231523+ self._advance_currentline()
15241524+ while self._currentline:
15251525+ if self._currentline.startswith(b'blob'):
15261526+ self._parse_blob()
15271527+ elif self._currentline.startswith(b'reset'):
15281528+ self._parse_reset()
15291529+ elif self._currentline.startswith(b'commit'):
15301530+ self._parse_commit()
15311531+ elif self._currentline.startswith(b'tag'):
15321532+ self._parse_tag()
15331533+ elif self._currentline.startswith(b'progress'):
15341534+ self._parse_progress()
15351535+ elif self._currentline.startswith(b'checkpoint'):
15361536+ self._parse_checkpoint()
15371537+ elif self._currentline.startswith(b'feature'):
15381538+ self._parse_literal_command()
15391539+ elif self._currentline.startswith(b'option'):
15401540+ self._parse_literal_command()
15411541+ elif self._currentline.startswith(b'done'):
15421542+ if self._done_callback:
15431543+ self._done_callback()
15441544+ self._parse_literal_command()
15451545+ # Prevent confusion from others writing additional stuff that'll just
15461546+ # be ignored
15471547+ self._output.close()
15481548+ elif self._currentline.startswith(b'#'):
15491549+ self._parse_literal_command()
15501550+ elif self._currentline.startswith(b'get-mark') or \
15511551+ self._currentline.startswith(b'cat-blob') or \
15521552+ self._currentline.startswith(b'ls'):
15531553+ raise SystemExit(_("Unsupported command: '%s'") % self._currentline)
15541554+ else:
15551555+ raise SystemExit(_("Could not parse line: '%s'") % self._currentline)
15561556+15571557+ def get_exported_and_imported_refs(self):
15581558+ return self._exported_refs, self._imported_refs
15591559+15601560+def record_id_rename(old_id, new_id):
15611561+ """
15621562+ Register a new translation
15631563+ """
15641564+ handle_transitivity = True
15651565+ _IDS.record_rename(old_id, new_id, handle_transitivity)
15661566+15671567+# Internal globals
15681568+_IDS = _IDs()
15691569+_SKIPPED_COMMITS = set()
15701570+BLOB_HASH_TO_NEW_ID = {}
15711571+BLOB_NEW_ID_TO_HASH = {}
15721572+sdr_next_steps = _("""
15731573+NEXT STEPS FOR YOUR SENSITIVE DATA REMOVAL:
15741574+ * If you are doing your rewrite in multiple steps, ignore these next steps
15751575+ until you have completed all your invocations of git-filter-repo.
15761576+ * See the "Sensitive Data Removal" subsection of the "DISCUSSION" section
15771577+ of the manual for more details about any of the steps below.
15781578+ * Inspect this repository and verify that the sensitive data is indeed
15791579+ completely removed from all commits.
15801580+ * Force push the rewritten history to the server:
15811581+ %s
15821582+ * Contact the server admins for additional steps they need to take; the
15831583+ First Changed Commit(s)%s may come in handy here.
15841584+ * Have other colleagues with a clone either discard their clone and reclone
15851585+ OR follow the detailed steps in the manual to repeatedly rebase and
15861586+ purge the sensitive data from their copy. Again, the First Changed
15871587+ Commit(s)%s may come in handy.
15881588+ * See the "Prevent repeats and avoid future sensitive data spills" section
15891589+ of the manual.
15901590+"""[1:])
15911591+15921592+class SubprocessWrapper(object):
15931593+ @staticmethod
15941594+ def decodify(args):
15951595+ if type(args) == str:
15961596+ return args
15971597+ else:
15981598+ assert type(args) == list
15991599+ return [decode(x) if type(x)==bytes else x for x in args]
16001600+16011601+ @staticmethod
16021602+ def call(*args, **kwargs):
16031603+ if 'cwd' in kwargs:
16041604+ kwargs['cwd'] = decode(kwargs['cwd'])
16051605+ return subprocess.call(SubprocessWrapper.decodify(*args), **kwargs)
16061606+16071607+ @staticmethod
16081608+ def check_output(*args, **kwargs):
16091609+ if 'cwd' in kwargs:
16101610+ kwargs['cwd'] = decode(kwargs['cwd'])
16111611+ return subprocess.check_output(SubprocessWrapper.decodify(*args), **kwargs)
16121612+16131613+ @staticmethod
16141614+ def check_call(*args, **kwargs): # pragma: no cover # used by filter-lamely
16151615+ if 'cwd' in kwargs:
16161616+ kwargs['cwd'] = decode(kwargs['cwd'])
16171617+ return subprocess.check_call(SubprocessWrapper.decodify(*args), **kwargs)
16181618+16191619+ @staticmethod
16201620+ def Popen(*args, **kwargs):
16211621+ if 'cwd' in kwargs:
16221622+ kwargs['cwd'] = decode(kwargs['cwd'])
16231623+ return subprocess.Popen(SubprocessWrapper.decodify(*args), **kwargs)
16241624+16251625+subproc = subprocess
16261626+if platform.system() == 'Windows' or 'PRETEND_UNICODE_ARGS' in os.environ:
16271627+ subproc = SubprocessWrapper
16281628+16291629+class GitUtils(object):
16301630+ @staticmethod
16311631+ def get_commit_count(repo, *args):
16321632+ """
16331633+ Return the number of commits that have been made on repo.
16341634+ """
16351635+ if not args:
16361636+ args = ['--all']
16371637+ if len(args) == 1 and isinstance(args[0], list):
16381638+ args = args[0]
16391639+ p = subproc.Popen(["git", "rev-list", "--count"] + args,
16401640+ stdout=subprocess.PIPE, stderr=subprocess.PIPE,
16411641+ cwd=repo)
16421642+ if p.wait() != 0:
16431643+ raise SystemExit(_("%s does not appear to be a valid git repository")
16441644+ % decode(repo))
16451645+ return int(p.stdout.read())
16461646+16471647+ @staticmethod
16481648+ def get_total_objects(repo):
16491649+ """
16501650+ Return the number of objects (both packed and unpacked)
16511651+ """
16521652+ p1 = subproc.Popen(["git", "count-objects", "-v"],
16531653+ stdout=subprocess.PIPE, cwd=repo)
16541654+ lines = p1.stdout.read().splitlines()
16551655+ # Return unpacked objects + packed-objects
16561656+ return int(lines[0].split()[1]) + int(lines[2].split()[1])
16571657+16581658+ @staticmethod
16591659+ def is_repository_bare(repo_working_dir):
16601660+ out = subproc.check_output('git rev-parse --is-bare-repository'.split(),
16611661+ cwd=repo_working_dir)
16621662+ return (out.strip() == b'true')
16631663+16641664+ @staticmethod
16651665+ def determine_git_dir(repo_working_dir):
16661666+ d = subproc.check_output('git rev-parse --git-dir'.split(),
16671667+ cwd=repo_working_dir).strip()
16681668+ if repo_working_dir==b'.' or d.startswith(b'/'):
16691669+ return d
16701670+ return os.path.join(repo_working_dir, d)
16711671+16721672+ @staticmethod
16731673+ def get_refs(repo_working_dir):
16741674+ try:
16751675+ output = subproc.check_output('git show-ref'.split(),
16761676+ cwd=repo_working_dir)
16771677+ except subprocess.CalledProcessError as e:
16781678+ # If error code is 1, there just aren't any refs; i.e. new repo.
16791679+ # If error code is other than 1, some other error (e.g. not a git repo)
16801680+ if e.returncode != 1:
16811681+ raise SystemExit('fatal: {}'.format(e))
16821682+ output = ''
16831683+ return dict(reversed(x.split()) for x in output.splitlines())
16841684+16851685+ @staticmethod
16861686+ def get_config_settings(repo_working_dir):
16871687+ output = ''
16881688+ try:
16891689+ output = subproc.check_output('git config --list --null'.split(),
16901690+ cwd=repo_working_dir)
16911691+ except subprocess.CalledProcessError as e: # pragma: no cover
16921692+ raise SystemExit('fatal: {}'.format(e))
16931693+16941694+ # FIXME: Ignores multi-valued keys, just let them overwrite for now
16951695+ return dict(item.split(b'\n', maxsplit=1)
16961696+ for item in output.strip().split(b"\0") if item)
16971697+16981698+ @staticmethod
16991699+ def get_blob_sizes(quiet = False):
17001700+ blob_size_progress = ProgressWriter()
17011701+ num_blobs = 0
17021702+ processed_blobs_msg = _("Processed %d blob sizes")
17031703+17041704+ # Get sizes of blobs by sha1
17051705+ cmd = '--batch-check=%(objectname) %(objecttype) ' + \
17061706+ '%(objectsize) %(objectsize:disk)'
17071707+ cf = subproc.Popen(['git', 'cat-file', '--batch-all-objects', cmd],
17081708+ bufsize = -1,
17091709+ stdout = subprocess.PIPE)
17101710+ unpacked_size = {}
17111711+ packed_size = {}
17121712+ for line in cf.stdout:
17131713+ try:
17141714+ sha, objtype, objsize, objdisksize = line.split()
17151715+ objsize, objdisksize = int(objsize), int(objdisksize)
17161716+ if objtype == b'blob':
17171717+ unpacked_size[sha] = objsize
17181718+ packed_size[sha] = objdisksize
17191719+ num_blobs += 1
17201720+ except ValueError: # pragma: no cover
17211721+ sys.stderr.write(_("Error: unexpected `git cat-file` output: \"%s\"\n") % line)
17221722+ if not quiet:
17231723+ blob_size_progress.show(processed_blobs_msg % num_blobs)
17241724+ cf.wait()
17251725+ if not quiet:
17261726+ blob_size_progress.finish()
17271727+ return unpacked_size, packed_size
17281728+17291729+ @staticmethod
17301730+ def get_file_changes(repo, parent_hash, commit_hash):
17311731+ """
17321732+ Return a FileChanges list with the differences between parent_hash
17331733+ and commit_hash
17341734+ """
17351735+ file_changes = []
17361736+17371737+ cmd = ["git", "diff-tree", "-r", parent_hash, commit_hash]
17381738+ output = subproc.check_output(cmd, cwd=repo)
17391739+ for line in output.splitlines():
17401740+ fileinfo, path = line.split(b'\t', 1)
17411741+ if path.startswith(b'"'):
17421742+ path = PathQuoting.dequote(path)
17431743+ oldmode, mode, oldhash, newhash, changetype = fileinfo.split()
17441744+ if changetype == b'D':
17451745+ file_changes.append(FileChange(b'D', path))
17461746+ elif changetype in (b'A', b'M', b'T'):
17471747+ identifier = BLOB_HASH_TO_NEW_ID.get(newhash, newhash)
17481748+ file_changes.append(FileChange(b'M', path, identifier, mode))
17491749+ else: # pragma: no cover
17501750+ raise SystemExit("Unknown change type for line {}".format(line))
17511751+17521752+ return file_changes
17531753+17541754+ @staticmethod
17551755+ def print_my_version():
17561756+ with open(__file__, 'br') as f:
17571757+ contents = f.read()
17581758+ # If people replaced @@LOCALEDIR@@ string to point at their local
17591759+ # directory, undo it so we can get original source version.
17601760+ contents = re.sub(br'\A#\!.*',
17611761+ br'#!/usr/bin/env python3', contents)
17621762+ contents = re.sub(br'(\("GIT_TEXTDOMAINDIR"\) or ").*"',
17631763+ br'\1@@LOCALEDIR@@"', contents)
17641764+17651765+ cmd = 'git hash-object --stdin'.split()
17661766+ version = subproc.check_output(cmd, input=contents).strip()
17671767+ print(decode(version[0:12]))
17681768+17691769+class FilteringOptions(object):
17701770+ default_replace_text = b'***REMOVED***'
17711771+ class AppendFilter(argparse.Action):
17721772+ def __call__(self, parser, namespace, values, option_string=None):
17731773+ user_path = values
17741774+ suffix = option_string[len('--path-'):] or 'match'
17751775+ if suffix.startswith('rename'):
17761776+ mod_type = 'rename'
17771777+ match_type = option_string[len('--path-rename-'):] or 'match'
17781778+ values = values.split(b':')
17791779+ if len(values) != 2:
17801780+ raise SystemExit(_("Error: --path-rename expects one colon in its"
17811781+ " argument: <old_name:new_name>."))
17821782+ if values[0] and values[1] and not (
17831783+ values[0].endswith(b'/') == values[1].endswith(b'/')):
17841784+ raise SystemExit(_("Error: With --path-rename, if OLD_NAME and "
17851785+ "NEW_NAME are both non-empty and either ends "
17861786+ "with a slash then both must."))
17871787+ if any(v.startswith(b'/') for v in values):
17881788+ raise SystemExit(_("Error: Pathnames cannot begin with a '/'"))
17891789+ components = values[0].split(b'/') + values[1].split(b'/')
17901790+ else:
17911791+ mod_type = 'filter'
17921792+ match_type = suffix
17931793+ components = values.split(b'/')
17941794+ if values.startswith(b'/'):
17951795+ raise SystemExit(_("Error: Pathnames cannot begin with a '/'"))
17961796+ for illegal_path in [b'.', b'..']:
17971797+ if illegal_path in components:
17981798+ raise SystemExit(_("Error: Invalid path component '%s' found in '%s'")
17991799+ % (decode(illegal_path), decode(user_path)))
18001800+ if match_type == 'regex':
18011801+ values = re.compile(values)
18021802+ items = getattr(namespace, self.dest, []) or []
18031803+ items.append((mod_type, match_type, values))
18041804+ if (match_type, mod_type) == ('glob', 'filter'):
18051805+ if not values.endswith(b'*'):
18061806+ extension = b'*' if values.endswith(b'/') else b'/*'
18071807+ items.append((mod_type, match_type, values+extension))
18081808+ setattr(namespace, self.dest, items)
18091809+18101810+ class HelperFilter(argparse.Action):
18111811+ def __call__(self, parser, namespace, values, option_string=None):
18121812+ af = FilteringOptions.AppendFilter(dest='path_changes',
18131813+ option_strings=None)
18141814+ dirname = values if values[-1:] == b'/' else values+b'/'
18151815+ if option_string == '--subdirectory-filter':
18161816+ af(parser, namespace, dirname, '--path-match')
18171817+ af(parser, namespace, dirname+b':', '--path-rename')
18181818+ elif option_string == '--to-subdirectory-filter':
18191819+ af(parser, namespace, b':'+dirname, '--path-rename')
18201820+ else:
18211821+ raise SystemExit(_("Error: HelperFilter given invalid option_string: %s")
18221822+ % option_string) # pragma: no cover
18231823+18241824+ class FileWithPathsFilter(argparse.Action):
18251825+ def __call__(self, parser, namespace, values, option_string=None):
18261826+ if not namespace.path_changes:
18271827+ namespace.path_changes = []
18281828+ namespace.path_changes += FilteringOptions.get_paths_from_file(values)
18291829+18301830+ @staticmethod
18311831+ def create_arg_parser():
18321832+ # Include usage in the summary, so we can put the description first
18331833+ summary = _('''Rewrite (or analyze) repository history
18341834+18351835+ git-filter-repo destructively rewrites history (unless --analyze or
18361836+ --dry-run are given) according to specified rules. It refuses to do any
18371837+ rewriting unless either run from a clean fresh clone, or --force was
18381838+ given.
18391839+18401840+ Basic Usage:
18411841+ git-filter-repo --analyze
18421842+ git-filter-repo [FILTER/RENAME/CONTROL OPTIONS]
18431843+18441844+ See EXAMPLES section for details.
18451845+ ''').rstrip()
18461846+18471847+ # Provide a long helpful examples section
18481848+ example_text = _('''CALLBACKS
18491849+18501850+ Most callback functions are of the same general format. For a command line
18511851+ argument like
18521852+ --foo-callback 'BODY'
18531853+18541854+ the following code will be compiled and called:
18551855+ def foo_callback(foo):
18561856+ BODY
18571857+18581858+ The exception on callbacks is the --file-info-callback, which will be
18591859+ discussed further below.
18601860+18611861+ Given the callback style, we can thus make a simple callback to replace
18621862+ 'Jon' with 'John' in author/committer/tagger names:
18631863+ git filter-repo --name-callback 'return name.replace(b"Jon", b"John")'
18641864+18651865+ To remove all 'Tested-by' tags in commit (or tag) messages:
18661866+ git filter-repo --message-callback 'return re.sub(br"\\nTested-by:.*", "", message)'
18671867+18681868+ To remove all .DS_Store files:
18691869+ git filter-repo --filename-callback 'return None if os.path.basename(filename) == b".DS_Store" else filename'
18701870+18711871+ Note that if BODY resolves to a filename, then the contents of that file
18721872+ will be used as the BODY in the callback function.
18731873+18741874+ The --file-info-callback has a more involved function callback; for it the
18751875+ following code will be compiled and called:
18761876+ def file_info_callback(filename, mode, blob_id, value):
18771877+ BODY
18781878+18791879+ It is designed to be used in cases where filtering depends on both
18801880+ filename and contents (and maybe mode). It is called for file changes
18811881+ other than deletions (since deletions have no file contents to operate
18821882+ on). This callback is expected to return a tuple of (filename, mode,
18831883+ blob_id). It can make use of the following functions from the value
18841884+ instance:
18851885+ value.get_contents_by_identifier(blob_id) -> contents (bytestring)
18861886+ value.get_size_by_identifier(blob_id) -> size_of_blob (int)
18871887+ value.insert_file_with_contents(contents) -> blob_id
18881888+ value.is_binary(contents) -> bool
18891889+ value.apply_replace_text(contents) -> new_contents (bytestring)
18901890+ and can read/write the following data member from the value instance:
18911891+ value.data (dict)
18921892+18931893+ The filename can be used for renaming the file similar to
18941894+ --filename-callback (or None to drop the change), and mode is one
18951895+ of b'100644', b'100755', b'120000', or b'160000'.
18961896+18971897+ For more detailed examples and explanations AND caveats, see
18981898+ https://htmlpreview.github.io/?https://github.com/newren/git-filter-repo/blob/docs/html/git-filter-repo.html#CALLBACKS
18991899+19001900+EXAMPLES
19011901+19021902+ To get a bunch of reports mentioning renames that have occurred in
19031903+ your repo and listing sizes of objects aggregated by any of path,
19041904+ directory, extension, or blob-id:
19051905+ git filter-repo --analyze
19061906+19071907+ (These reports can help you choose how to filter your repo; it can
19081908+ be useful to re-run this command after filtering to regenerate the
19091909+ report and verify the changes look correct.)
19101910+19111911+ To extract the history that touched just 'guides' and 'tools/releases':
19121912+ git filter-repo --path guides/ --path tools/releases
19131913+19141914+ To remove foo.zip and bar/baz/zips from every revision in history:
19151915+ git filter-repo --path foo.zip --path bar/baz/zips/ --invert-paths
19161916+19171917+ To replace the text 'password' with 'p455w0rd':
19181918+ git filter-repo --replace-text <(echo "password==>p455w0rd")
19191919+19201920+ To use the current version of the .mailmap file to update authors,
19211921+ committers, and taggers throughout history and make it permanent:
19221922+ git filter-repo --use-mailmap
19231923+19241924+ To extract the history of 'src/', rename all files to have a new leading
19251925+ directory 'my-module' (e.g. src/foo.java -> my-module/src/foo.java), and
19261926+ add a 'my-module-' prefix to all tags:
19271927+ git filter-repo --path src/ --to-subdirectory-filter my-module --tag-rename '':'my-module-'
19281928+19291929+ For more detailed examples and explanations, see
19301930+ https://htmlpreview.github.io/?https://github.com/newren/git-filter-repo/blob/docs/html/git-filter-repo.html#EXAMPLES''')
19311931+19321932+ # Create the basic parser
19331933+ parser = argparse.ArgumentParser(description=summary,
19341934+ usage = argparse.SUPPRESS,
19351935+ add_help = False,
19361936+ epilog = example_text,
19371937+ formatter_class=argparse.RawDescriptionHelpFormatter)
19381938+19391939+ analyze = parser.add_argument_group(title=_("Analysis"))
19401940+ analyze.add_argument('--analyze', action='store_true',
19411941+ help=_("Analyze repository history and create a report that may be "
19421942+ "useful in determining what to filter in a subsequent run. "
19431943+ "Will not modify your repo."))
19441944+ analyze.add_argument('--report-dir',
19451945+ metavar='DIR_OR_FILE',
19461946+ type=os.fsencode,
19471947+ dest='report_dir',
19481948+ help=_("Directory to write report, defaults to GIT_DIR/filter_repo/analysis,"
19491949+ "refuses to run if exists, --force delete existing dir first."))
19501950+19511951+ path = parser.add_argument_group(title=_("Filtering based on paths "
19521952+ "(see also --filename-callback)"),
19531953+ description=textwrap.dedent(_("""
19541954+ These options specify the paths to select. Note that much like git
19551955+ itself, renames are NOT followed so you may need to specify multiple
19561956+ paths, e.g. `--path olddir/ --path newdir/`
19571957+ """[1:])))
19581958+19591959+ path.add_argument('--invert-paths', action='store_false', dest='inclusive',
19601960+ help=_("Invert the selection of files from the specified "
19611961+ "--path-{match,glob,regex} options below, i.e. only select "
19621962+ "files matching none of those options."))
19631963+19641964+ path.add_argument('--path-match', '--path', metavar='DIR_OR_FILE',
19651965+ type=os.fsencode,
19661966+ action=FilteringOptions.AppendFilter, dest='path_changes',
19671967+ help=_("Exact paths (files or directories) to include in filtered "
19681968+ "history. Multiple --path options can be specified to get "
19691969+ "a union of paths."))
19701970+ path.add_argument('--path-glob', metavar='GLOB', type=os.fsencode,
19711971+ action=FilteringOptions.AppendFilter, dest='path_changes',
19721972+ help=_("Glob of paths to include in filtered history. Multiple "
19731973+ "--path-glob options can be specified to get a union of "
19741974+ "paths."))
19751975+ path.add_argument('--path-regex', metavar='REGEX', type=os.fsencode,
19761976+ action=FilteringOptions.AppendFilter, dest='path_changes',
19771977+ help=_("Regex of paths to include in filtered history. Multiple "
19781978+ "--path-regex options can be specified to get a union of "
19791979+ "paths"))
19801980+ path.add_argument('--use-base-name', action='store_true',
19811981+ help=_("Match on file base name instead of full path from the top "
19821982+ "of the repo. Incompatible with --path-rename, and "
19831983+ "incompatible with matching against directory names."))
19841984+19851985+ rename = parser.add_argument_group(title=_("Renaming based on paths "
19861986+ "(see also --filename-callback)"))
19871987+ rename.add_argument('--path-rename', '--path-rename-match',
19881988+ metavar='OLD_NAME:NEW_NAME', dest='path_changes', type=os.fsencode,
19891989+ action=FilteringOptions.AppendFilter,
19901990+ help=_("Path to rename; if filename or directory matches OLD_NAME "
19911991+ "rename to NEW_NAME. Multiple --path-rename options can be "
19921992+ "specified. NOTE: If you combine filtering options with "
19931993+ "renaming ones, do not rely on a rename argument to select "
19941994+ "paths; you also need a filter to select them."))
19951995+19961996+ helpers = parser.add_argument_group(title=_("Path shortcuts"))
19971997+ helpers.add_argument('--paths', help=argparse.SUPPRESS, metavar='IGNORE')
19981998+ helpers.add_argument('--paths-from-file', metavar='FILENAME',
19991999+ type=os.fsencode,
20002000+ action=FilteringOptions.FileWithPathsFilter, dest='path_changes',
20012001+ help=_("Specify several path filtering and renaming directives, one "
20022002+ "per line. Lines with '==>' in them specify path renames, "
20032003+ "and lines can begin with 'literal:' (the default), 'glob:', "
20042004+ "or 'regex:' to specify different matching styles. Blank "
20052005+ "lines and lines starting with a '#' are ignored."))
20062006+ helpers.add_argument('--subdirectory-filter', metavar='DIRECTORY',
20072007+ action=FilteringOptions.HelperFilter, type=os.fsencode,
20082008+ help=_("Only look at history that touches the given subdirectory "
20092009+ "and treat that directory as the project root. Equivalent "
20102010+ "to using '--path DIRECTORY/ --path-rename DIRECTORY/:'"))
20112011+ helpers.add_argument('--to-subdirectory-filter', metavar='DIRECTORY',
20122012+ action=FilteringOptions.HelperFilter, type=os.fsencode,
20132013+ help=_("Treat the project root as if it were under DIRECTORY. "
20142014+ "Equivalent to using '--path-rename :DIRECTORY/'"))
20152015+20162016+ contents = parser.add_argument_group(title=_("Content editing filters "
20172017+ "(see also --blob-callback)"))
20182018+ contents.add_argument('--replace-text', metavar='EXPRESSIONS_FILE',
20192019+ help=_("A file with expressions that, if found, will be replaced. "
20202020+ "By default, each expression is treated as literal text, "
20212021+ "but 'regex:' and 'glob:' prefixes are supported. You can "
20222022+ "end the line with '==>' and some replacement text to "
20232023+ "choose a replacement choice other than the default of '{}'."
20242024+ .format(decode(FilteringOptions.default_replace_text))))
20252025+ contents.add_argument('--strip-blobs-bigger-than', metavar='SIZE',
20262026+ dest='max_blob_size', default=0,
20272027+ help=_("Strip blobs (files) bigger than specified size (e.g. '5M', "
20282028+ "'2G', etc)"))
20292029+ contents.add_argument('--strip-blobs-with-ids', metavar='BLOB-ID-FILENAME',
20302030+ help=_("Read git object ids from each line of the given file, and "
20312031+ "strip all of them from history"))
20322032+20332033+ refrename = parser.add_argument_group(title=_("Renaming of refs "
20342034+ "(see also --refname-callback)"))
20352035+ refrename.add_argument('--tag-rename', metavar='OLD:NEW', type=os.fsencode,
20362036+ help=_("Rename tags starting with OLD to start with NEW. For "
20372037+ "example, --tag-rename foo:bar will rename tag foo-1.2.3 "
20382038+ "to bar-1.2.3; either OLD or NEW can be empty."))
20392039+20402040+ messages = parser.add_argument_group(title=_("Filtering of commit messages "
20412041+ "(see also --message-callback)"))
20422042+ messages.add_argument('--replace-message', metavar='EXPRESSIONS_FILE',
20432043+ help=_("A file with expressions that, if found in commit or tag "
20442044+ "messages, will be replaced. This file uses the same syntax "
20452045+ "as --replace-text."))
20462046+ messages.add_argument('--preserve-commit-hashes', action='store_true',
20472047+ help=_("By default, since commits are rewritten and thus gain new "
20482048+ "hashes, references to old commit hashes in commit messages "
20492049+ "are replaced with new commit hashes (abbreviated to the same "
20502050+ "length as the old reference). Use this flag to turn off "
20512051+ "updating commit hashes in commit messages."))
20522052+ messages.add_argument('--preserve-commit-encoding', action='store_true',
20532053+ help=_("Do not reencode commit messages into UTF-8. By default, if "
20542054+ "the commit object specifies an encoding for the commit "
20552055+ "message, the message is re-encoded into UTF-8."))
20562056+20572057+ people = parser.add_argument_group(title=_("Filtering of names & emails "
20582058+ "(see also --name-callback "
20592059+ "and --email-callback)"))
20602060+ people.add_argument('--mailmap', dest='mailmap', metavar='FILENAME',
20612061+ type=os.fsencode,
20622062+ help=_("Use specified mailmap file (see git-shortlog(1) for "
20632063+ "details on the format) when rewriting author, committer, "
20642064+ "and tagger names and emails. If the specified file is "
20652065+ "part of git history, historical versions of the file will "
20662066+ "be ignored; only the current contents are consulted."))
20672067+ people.add_argument('--use-mailmap', dest='mailmap',
20682068+ action='store_const', const=b'.mailmap',
20692069+ help=_("Same as: '--mailmap .mailmap' "))
20702070+20712071+ parents = parser.add_argument_group(title=_("Parent rewriting"))
20722072+ parents.add_argument('--replace-refs', default=None,
20732073+ choices=['delete-no-add', 'delete-and-add',
20742074+ 'update-no-add', 'update-or-add',
20752075+ 'update-and-add', 'old-default'],
20762076+ help=_("How to handle replace refs (see git-replace(1)). Replace "
20772077+ "refs can be added during the history rewrite as a way to "
20782078+ "allow users to pass old commit IDs (from before "
20792079+ "git-filter-repo was run) to git commands and have git know "
20802080+ "how to translate those old commit IDs to the new "
20812081+ "(post-rewrite) commit IDs. Also, replace refs that existed "
20822082+ "before the rewrite can either be deleted or updated. The "
20832083+ "choices to pass to --replace-refs thus need to specify both "
20842084+ "what to do with existing refs and what to do with commit "
20852085+ "rewrites. Thus 'update-and-add' means to update existing "
20862086+ "replace refs, and for any commit rewrite (even if already "
20872087+ "pointed at by a replace ref) add a new refs/replace/ reference "
20882088+ "to map from the old commit ID to the new commit ID. The "
20892089+ "default is update-no-add, meaning update existing replace refs "
20902090+ "but do not add any new ones. There is also a special "
20912091+ "'old-default' option for picking the default used in versions "
20922092+ "prior to git-filter-repo-2.45, namely 'update-and-add' upon "
20932093+ "the first run of git-filter-repo in a repository and "
20942094+ "'update-or-add' if running git-filter-repo again on a "
20952095+ "repository."))
20962096+ parents.add_argument('--prune-empty', default='auto',
20972097+ choices=['always', 'auto', 'never'],
20982098+ help=_("Whether to prune empty commits. 'auto' (the default) means "
20992099+ "only prune commits which become empty (not commits which were "
21002100+ "empty in the original repo, unless their parent was pruned). "
21012101+ "When the parent of a commit is pruned, the first non-pruned "
21022102+ "ancestor becomes the new parent."))
21032103+ parents.add_argument('--prune-degenerate', default='auto',
21042104+ choices=['always', 'auto', 'never'],
21052105+ help=_("Since merge commits are needed for history topology, they "
21062106+ "are typically exempt from pruning. However, they can become "
21072107+ "degenerate with the pruning of other commits (having fewer "
21082108+ "than two parents, having one commit serve as both parents, or "
21092109+ "having one parent as the ancestor of the other.) If such "
21102110+ "merge commits have no file changes, they can be pruned. The "
21112111+ "default ('auto') is to only prune empty merge commits which "
21122112+ "become degenerate (not which started as such)."))
21132113+ parents.add_argument('--no-ff', action='store_true',
21142114+ help=_("Even if the first parent is or becomes an ancestor of another "
21152115+ "parent, do not prune it. This modifies how "
21162116+ "--prune-degenerate behaves, and may be useful in projects who "
21172117+ "always use merge --no-ff."))
21182118+21192119+ callback = parser.add_argument_group(title=_("Generic callback code snippets"))
21202120+ callback.add_argument('--filename-callback', metavar="FUNCTION_BODY_OR_FILE",
21212121+ help=_("Python code body for processing filenames; see CALLBACKS "
21222122+ "sections below."))
21232123+ callback.add_argument('--file-info-callback', metavar="FUNCTION_BODY_OR_FILE",
21242124+ help=_("Python code body for processing file and metadata; see "
21252125+ "CALLBACKS sections below."))
21262126+ callback.add_argument('--message-callback', metavar="FUNCTION_BODY_OR_FILE",
21272127+ help=_("Python code body for processing messages (both commit "
21282128+ "messages and tag messages); see CALLBACKS section below."))
21292129+ callback.add_argument('--name-callback', metavar="FUNCTION_BODY_OR_FILE",
21302130+ help=_("Python code body for processing names of people; see "
21312131+ "CALLBACKS section below."))
21322132+ callback.add_argument('--email-callback', metavar="FUNCTION_BODY_OR_FILE",
21332133+ help=_("Python code body for processing emails addresses; see "
21342134+ "CALLBACKS section below."))
21352135+ callback.add_argument('--refname-callback', metavar="FUNCTION_BODY_OR_FILE",
21362136+ help=_("Python code body for processing refnames; see CALLBACKS "
21372137+ "section below."))
21382138+21392139+ callback.add_argument('--blob-callback', metavar="FUNCTION_BODY_OR_FILE",
21402140+ help=_("Python code body for processing blob objects; see "
21412141+ "CALLBACKS section below."))
21422142+ callback.add_argument('--commit-callback', metavar="FUNCTION_BODY_OR_FILE",
21432143+ help=_("Python code body for processing commit objects; see "
21442144+ "CALLBACKS section below."))
21452145+ callback.add_argument('--tag-callback', metavar="FUNCTION_BODY_OR_FILE",
21462146+ help=_("Python code body for processing tag objects. Note that "
21472147+ "lightweight tags have no tag object and are thus not "
21482148+ "handled by this callback. See CALLBACKS section below."))
21492149+ callback.add_argument('--reset-callback', metavar="FUNCTION_BODY_OR_FILE",
21502150+ help=_("Python code body for processing reset objects; see "
21512151+ "CALLBACKS section below."))
21522152+21532153+ sdr = parser.add_argument_group(title=_("Sensitive Data Removal Handling"))
21542154+ sdr.add_argument('--sensitive-data-removal', '--sdr', action='store_true',
21552155+ help=_("This rewrite is intended to remove sensitive data from a "
21562156+ "repository. Gather extra information from the rewrite needed "
21572157+ "to provide additional instructions on how to clean up other "
21582158+ "copies."))
21592159+ sdr.add_argument('--no-fetch', action='store_true',
21602160+ help=_("By default, --sensitive-data-removal will trigger a "
21612161+ "mirror-like fetch of all refs from origin, discarding local "
21622162+ "changes, but ensuring that _all_ fetchable refs that hold on "
21632163+ "to the sensitve data are rewritten. This flag removes that "
21642164+ "fetch, risking that other refs continue holding on to the "
21652165+ "sensitive data. This option is implied by --partial or any "
21662166+ "flag that implies --partial."))
21672167+21682168+ desc = _(
21692169+ "Specifying alternate source or target locations implies --partial,\n"
21702170+ "except that the normal default for --replace-refs is used. However,\n"
21712171+ "unlike normal uses of --partial, this doesn't risk mixing old and new\n"
21722172+ "history since the old and new histories are in different repositories.")
21732173+ location = parser.add_argument_group(title=_("Location to filter from/to"),
21742174+ description=desc)
21752175+ location.add_argument('--source', type=os.fsencode,
21762176+ help=_("Git repository to read from"))
21772177+ location.add_argument('--target', type=os.fsencode,
21782178+ help=_("Git repository to overwrite with filtered history"))
21792179+21802180+ order = parser.add_argument_group(title=_("Ordering of commits"))
21812181+ order.add_argument('--date-order', action='store_true',
21822182+ help=_("Processes commits in commit timestamp order."))
21832183+21842184+ misc = parser.add_argument_group(title=_("Miscellaneous options"))
21852185+ misc.add_argument('--help', '-h', action='store_true',
21862186+ help=_("Show this help message and exit."))
21872187+ misc.add_argument('--version', action='store_true',
21882188+ help=_("Display filter-repo's version and exit."))
21892189+ misc.add_argument('--proceed', action='store_true',
21902190+ help=_("Avoid triggering the no-arguments-specified check."))
21912191+ misc.add_argument('--force', '-f', action='store_true',
21922192+ help=_("Rewrite repository history even if the current repo does not "
21932193+ "look like a fresh clone. History rewriting is irreversible "
21942194+ "(and includes immediate pruning of reflogs and old objects), "
21952195+ "so be cautious about using this flag."))
21962196+ misc.add_argument('--partial', action='store_true',
21972197+ help=_("Do a partial history rewrite, resulting in the mixture of "
21982198+ "old and new history. This disables rewriting "
21992199+ "refs/remotes/origin/* to refs/heads/*, disables removing "
22002200+ "of the 'origin' remote, disables removing unexported refs, "
22012201+ "disables expiring the reflog, and disables the automatic "
22022202+ "post-filter gc. Also, this modifies --tag-rename and "
22032203+ "--refname-callback options such that instead of replacing "
22042204+ "old refs with new refnames, it will instead create new "
22052205+ "refs and keep the old ones around. Use with caution."))
22062206+ misc.add_argument('--no-gc', action='store_true',
22072207+ help=_("Do not run 'git gc' after filtering."))
22082208+ # WARNING: --refs presents a problem with become-degenerate pruning:
22092209+ # * Excluding a commit also excludes its ancestors so when some other
22102210+ # commit has an excluded ancestor as a parent we have no way of
22112211+ # knowing what it is an ancestor of without doing a special
22122212+ # full-graph walk.
22132213+ misc.add_argument('--refs', nargs='+',
22142214+ help=_("Limit history rewriting to the specified refs. Implies "
22152215+ "--partial. In addition to the normal caveats of --partial "
22162216+ "(mixing old and new history, no automatic remapping of "
22172217+ "refs/remotes/origin/* to refs/heads/*, etc.), this also may "
22182218+ "cause problems for pruning of degenerate empty merge "
22192219+ "commits when negative revisions are specified."))
22202220+22212221+ misc.add_argument('--dry-run', action='store_true',
22222222+ help=_("Do not change the repository. Run `git fast-export` and "
22232223+ "filter its output, and save both the original and the "
22242224+ "filtered version for comparison. This also disables "
22252225+ "rewriting commit messages due to not knowing new commit "
22262226+ "IDs and disables filtering of some empty commits due to "
22272227+ "inability to query the fast-import backend." ))
22282228+ misc.add_argument('--debug', action='store_true',
22292229+ help=_("Print additional information about operations being "
22302230+ "performed and commands being run. When used together "
22312231+ "with --dry-run, also show extra information about what "
22322232+ "would be run."))
22332233+ # WARNING: --state-branch has some problems:
22342234+ # * It does not work well with manually inserted objects (user creating
22352235+ # Blob() or Commit() or Tag() objects and calling
22362236+ # RepoFilter.insert(obj) on them).
22372237+ # * It does not work well with multiple source or multiple target repos
22382238+ # * It doesn't work so well with pruning become-empty commits (though
22392239+ # --refs doesn't work so well with it either)
22402240+ # These are probably fixable, given some work (e.g. re-importing the
22412241+ # graph at the beginning to get the AncestryGraph right, doing our own
22422242+ # export of marks instead of using fast-export --export-marks, etc.), but
22432243+ # for now just hide the option.
22442244+ misc.add_argument('--state-branch',
22452245+ #help=_("Enable incremental filtering by saving the mapping of old "
22462246+ # "to new objects to the specified branch upon exit, and"
22472247+ # "loading that mapping from that branch (if it exists) "
22482248+ # "upon startup."))
22492249+ help=argparse.SUPPRESS)
22502250+ misc.add_argument('--stdin', action='store_true',
22512251+ help=_("Instead of running `git fast-export` and filtering its "
22522252+ "output, filter the fast-export stream from stdin. The "
22532253+ "stdin must be in the expected input format (e.g. it needs "
22542254+ "to include original-oid directives)."))
22552255+ misc.add_argument('--quiet', action='store_true',
22562256+ help=_("Pass --quiet to other git commands called"))
22572257+ return parser
22582258+22592259+ @staticmethod
22602260+ def sanity_check_args(args):
22612261+ if args.analyze and args.path_changes:
22622262+ raise SystemExit(_("Error: --analyze is incompatible with --path* flags; "
22632263+ "it's a read-only operation."))
22642264+ if args.analyze and args.stdin:
22652265+ raise SystemExit(_("Error: --analyze is incompatible with --stdin."))
22662266+ # If no path_changes are found, initialize with empty list but mark as
22672267+ # not inclusive so that all files match
22682268+ if args.path_changes == None:
22692269+ args.path_changes = []
22702270+ args.inclusive = False
22712271+ else:
22722272+ # Similarly, if we have no filtering paths, then no path should be
22732273+ # filtered out. Based on how newname() works, the easiest way to
22742274+ # achieve that is setting args.inclusive to False.
22752275+ if not any(x[0] == 'filter' for x in args.path_changes):
22762276+ args.inclusive = False
22772277+ # Also check for incompatible --use-base-name and --path-rename flags.
22782278+ if args.use_base_name:
22792279+ if any(x[0] == 'rename' for x in args.path_changes):
22802280+ raise SystemExit(_("Error: --use-base-name and --path-rename are "
22812281+ "incompatible."))
22822282+ # Also throw some sanity checks on git version here;
22832283+ # PERF: remove these checks once new enough git versions are common
22842284+ p = subproc.Popen('git fast-export -h'.split(),
22852285+ stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
22862286+ output = p.stdout.read()
22872287+ if b'--anonymize-map' not in output: # pragma: no cover
22882288+ global date_format_permissive
22892289+ date_format_permissive = False
22902290+ if not any(x in output for x in [b'--mark-tags',b'--[no-]mark-tags']): # pragma: no cover
22912291+ global write_marks
22922292+ write_marks = False
22932293+ if args.state_branch:
22942294+ # We need a version of git-fast-export with --mark-tags
22952295+ raise SystemExit(_("Error: need git >= 2.24.0"))
22962296+ if not any(x in output for x in [b'--reencode', b'--[no-]reencode']): # pragma: no cover
22972297+ if args.preserve_commit_encoding:
22982298+ # We need a version of git-fast-export with --reencode
22992299+ raise SystemExit(_("Error: need git >= 2.23.0"))
23002300+ else:
23012301+ # Set args.preserve_commit_encoding to None which we'll check for later
23022302+ # to avoid passing --reencode=yes to fast-export (that option was the
23032303+ # default prior to git-2.23)
23042304+ args.preserve_commit_encoding = None
23052305+ # If we don't have fast-exoprt --reencode, we may also be missing
23062306+ # diff-tree --combined-all-paths, which is even more important...
23072307+ p = subproc.Popen('git diff-tree -h'.split(),
23082308+ stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
23092309+ output = p.stdout.read()
23102310+ if b'--combined-all-paths' not in output:
23112311+ # We need a version of git-diff-tree with --combined-all-paths
23122312+ raise SystemExit(_("Error: need git >= 2.22.0"))
23132313+ if args.sensitive_data_removal:
23142314+ p = subproc.Popen('git cat-file -h'.split(),
23152315+ stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
23162316+ output = p.stdout.read()
23172317+ if b"--batch-command" not in output: # pragma: no cover
23182318+ raise SystemExit(_("Error: need git >= 2.36.0"))
23192319+ # End of sanity checks on git version
23202320+ if args.max_blob_size:
23212321+ suffix = args.max_blob_size[-1]
23222322+ if suffix not in '1234567890':
23232323+ mult = {'K': 1024, 'M': 1024**2, 'G': 1024**3}
23242324+ if suffix not in mult:
23252325+ raise SystemExit(_("Error: could not parse --strip-blobs-bigger-than"
23262326+ " argument %s")
23272327+ % args.max_blob_size)
23282328+ args.max_blob_size = int(args.max_blob_size[0:-1]) * mult[suffix]
23292329+ else:
23302330+ args.max_blob_size = int(args.max_blob_size)
23312331+ if args.file_info_callback and (
23322332+ args.stdin or args.blob_callback or args.filename_callback):
23332333+ raise SystemExit(_("Error: --file-info-callback is incompatible with "
23342334+ "--stdin, --blob-callback,\nand --filename-callback."))
23352335+23362336+ @staticmethod
23372337+ def get_replace_text(filename):
23382338+ replace_literals = []
23392339+ replace_regexes = []
23402340+ with open(filename, 'br') as f:
23412341+ for line in f:
23422342+ line = line.rstrip(b'\r\n')
23432343+23442344+ # Determine the replacement
23452345+ replacement = FilteringOptions.default_replace_text
23462346+ if b'==>' in line:
23472347+ line, replacement = line.rsplit(b'==>', 1)
23482348+23492349+ # See if we need to match via regex
23502350+ regex = None
23512351+ if line.startswith(b'regex:'):
23522352+ regex = line[6:]
23532353+ elif line.startswith(b'glob:'):
23542354+ regex = glob_to_regex(line[5:])
23552355+ if regex:
23562356+ replace_regexes.append((re.compile(regex), replacement))
23572357+ else:
23582358+ # Otherwise, find the literal we need to replace
23592359+ if line.startswith(b'literal:'):
23602360+ line = line[8:]
23612361+ if not line:
23622362+ continue
23632363+ replace_literals.append((line, replacement))
23642364+ return {'literals': replace_literals, 'regexes': replace_regexes}
23652365+23662366+ @staticmethod
23672367+ def get_paths_from_file(filename):
23682368+ new_path_changes = []
23692369+ with open(filename, 'br') as f:
23702370+ for line in f:
23712371+ line = line.rstrip(b'\r\n')
23722372+23732373+ # Skip blank lines
23742374+ if not line:
23752375+ continue
23762376+ # Skip comment lines
23772377+ if line.startswith(b'#'):
23782378+ continue
23792379+23802380+ # Determine the replacement
23812381+ match_type, repl = 'literal', None
23822382+ if b'==>' in line:
23832383+ line, repl = line.rsplit(b'==>', 1)
23842384+23852385+ # See if we need to match via regex
23862386+ match_type = 'match' # a.k.a. 'literal'
23872387+ if line.startswith(b'regex:'):
23882388+ match_type = 'regex'
23892389+ match = re.compile(line[6:])
23902390+ elif line.startswith(b'glob:'):
23912391+ match_type = 'glob'
23922392+ match = line[5:]
23932393+ if repl:
23942394+ raise SystemExit(_("Error: In %s, 'glob:' and '==>' are incompatible (renaming globs makes no sense)" % decode(filename)))
23952395+ else:
23962396+ if line.startswith(b'literal:'):
23972397+ match = line[8:]
23982398+ else:
23992399+ match = line
24002400+ if repl is not None:
24012401+ if match and repl and match.endswith(b'/') != repl.endswith(b'/'):
24022402+ raise SystemExit(_("Error: When rename directories, if OLDNAME "
24032403+ "and NEW_NAME are both non-empty and either "
24042404+ "ends with a slash then both must."))
24052405+24062406+ # Record the filter or rename
24072407+ if repl is not None:
24082408+ new_path_changes.append(['rename', match_type, (match, repl)])
24092409+ else:
24102410+ new_path_changes.append(['filter', match_type, match])
24112411+ if match_type == 'glob' and not match.endswith(b'*'):
24122412+ extension = b'*' if match.endswith(b'/') else b'/*'
24132413+ new_path_changes.append(['filter', match_type, match+extension])
24142414+ return new_path_changes
24152415+24162416+ @staticmethod
24172417+ def default_options():
24182418+ return FilteringOptions.parse_args([], error_on_empty = False)
24192419+24202420+ @staticmethod
24212421+ def parse_args(input_args, error_on_empty = True):
24222422+ parser = FilteringOptions.create_arg_parser()
24232423+ if not input_args and error_on_empty:
24242424+ parser.print_usage()
24252425+ raise SystemExit(_("No arguments specified."))
24262426+ args = parser.parse_args(input_args)
24272427+ if args.help:
24282428+ parser.print_help()
24292429+ raise SystemExit()
24302430+ if args.paths:
24312431+ raise SystemExit("Error: Option `--paths` unrecognized; did you mean --path or --paths-from-file?")
24322432+ if args.version:
24332433+ GitUtils.print_my_version()
24342434+ raise SystemExit()
24352435+ FilteringOptions.sanity_check_args(args)
24362436+ if args.mailmap:
24372437+ args.mailmap = MailmapInfo(args.mailmap)
24382438+ if args.replace_text:
24392439+ args.replace_text = FilteringOptions.get_replace_text(args.replace_text)
24402440+ if args.replace_message:
24412441+ args.replace_message = FilteringOptions.get_replace_text(args.replace_message)
24422442+ if args.strip_blobs_with_ids:
24432443+ with open(args.strip_blobs_with_ids, 'br') as f:
24442444+ args.strip_blobs_with_ids = set(f.read().split())
24452445+ else:
24462446+ args.strip_blobs_with_ids = set()
24472447+ if (args.partial or args.refs) and not args.replace_refs:
24482448+ args.replace_refs = 'update-no-add'
24492449+ args.repack = not (args.partial or args.refs or args.no_gc)
24502450+ if args.refs or args.source or args.target:
24512451+ args.partial = True
24522452+ if args.partial:
24532453+ args.no_fetch = True
24542454+ if not args.refs:
24552455+ args.refs = ['--all']
24562456+ return args
24572457+24582458+class RepoAnalyze(object):
24592459+24602460+ # First, several helper functions for analyze_commit()
24612461+24622462+ @staticmethod
24632463+ def equiv_class(stats, filename):
24642464+ return stats['equivalence'].get(filename, (filename,))
24652465+24662466+ @staticmethod
24672467+ def setup_equivalence_for_rename(stats, oldname, newname):
24682468+ # if A is renamed to B and B is renamed to C, then the user thinks of
24692469+ # A, B, and C as all being different names for the same 'file'. We record
24702470+ # this as an equivalence class:
24712471+ # stats['equivalence'][name] = (A,B,C)
24722472+ # for name being each of A, B, and C.
24732473+ old_tuple = stats['equivalence'].get(oldname, ())
24742474+ if newname in old_tuple:
24752475+ return
24762476+ elif old_tuple:
24772477+ new_tuple = tuple(list(old_tuple)+[newname])
24782478+ else:
24792479+ new_tuple = (oldname, newname)
24802480+ for f in new_tuple:
24812481+ stats['equivalence'][f] = new_tuple
24822482+24832483+ @staticmethod
24842484+ def setup_or_update_rename_history(stats, commit, oldname, newname):
24852485+ rename_commits = stats['rename_history'].get(oldname, set())
24862486+ rename_commits.add(commit)
24872487+ stats['rename_history'][oldname] = rename_commits
24882488+24892489+ @staticmethod
24902490+ def handle_renames(stats, commit, change_types, filenames):
24912491+ for index, change_type in enumerate(change_types):
24922492+ if change_type == ord(b'R'):
24932493+ oldname, newname = filenames[index], filenames[-1]
24942494+ RepoAnalyze.setup_equivalence_for_rename(stats, oldname, newname)
24952495+ RepoAnalyze.setup_or_update_rename_history(stats, commit,
24962496+ oldname, newname)
24972497+24982498+ @staticmethod
24992499+ def handle_file(stats, graph, commit, modes, shas, filenames):
25002500+ mode, sha, filename = modes[-1], shas[-1], filenames[-1]
25012501+25022502+ # Figure out kind of deletions to undo for this file, and update lists
25032503+ # of all-names-by-sha and all-filenames
25042504+ delmode = 'tree_deletions'
25052505+ if mode != b'040000':
25062506+ delmode = 'file_deletions'
25072507+ stats['names'][sha].add(filename)
25082508+ stats['allnames'].add(filename)
25092509+25102510+ # If the file (or equivalence class of files) was recorded as deleted,
25112511+ # clearly it isn't anymore
25122512+ equiv = RepoAnalyze.equiv_class(stats, filename)
25132513+ for f in equiv:
25142514+ stats[delmode].pop(f, None)
25152515+25162516+ # If we get a modify/add for a path that was renamed, we may need to break
25172517+ # the equivalence class. However, if the modify/add was on a branch that
25182518+ # doesn't have the rename in its history, we are still okay.
25192519+ need_to_break_equivalence = False
25202520+ if equiv[-1] != filename:
25212521+ for rename_commit in stats['rename_history'][filename]:
25222522+ if graph.is_ancestor(rename_commit, commit):
25232523+ need_to_break_equivalence = True
25242524+25252525+ if need_to_break_equivalence:
25262526+ for f in equiv:
25272527+ if f in stats['equivalence']:
25282528+ del stats['equivalence'][f]
25292529+25302530+ @staticmethod
25312531+ def analyze_commit(stats, graph, commit, parents, date, file_changes):
25322532+ graph.add_commit_and_parents(commit, parents)
25332533+ for change in file_changes:
25342534+ modes, shas, change_types, filenames = change
25352535+ if len(parents) == 1 and change_types.startswith(b'R'):
25362536+ change_types = b'R' # remove the rename score; we don't care
25372537+ if modes[-1] == b'160000':
25382538+ continue
25392539+ elif modes[-1] == b'000000':
25402540+ # Track when files/directories are deleted
25412541+ for f in RepoAnalyze.equiv_class(stats, filenames[-1]):
25422542+ if any(x == b'040000' for x in modes[0:-1]):
25432543+ stats['tree_deletions'][f] = date
25442544+ else:
25452545+ stats['file_deletions'][f] = date
25462546+ elif change_types.strip(b'AMT') == b'':
25472547+ RepoAnalyze.handle_file(stats, graph, commit, modes, shas, filenames)
25482548+ elif modes[-1] == b'040000' and change_types.strip(b'RAM') == b'':
25492549+ RepoAnalyze.handle_file(stats, graph, commit, modes, shas, filenames)
25502550+ elif change_types.strip(b'RAMT') == b'':
25512551+ RepoAnalyze.handle_file(stats, graph, commit, modes, shas, filenames)
25522552+ RepoAnalyze.handle_renames(stats, commit, change_types, filenames)
25532553+ else:
25542554+ raise SystemExit(_("Unhandled change type(s): %(change_type)s "
25552555+ "(in commit %(commit)s)")
25562556+ % ({'change_type': change_types, 'commit': commit})
25572557+ ) # pragma: no cover
25582558+25592559+ @staticmethod
25602560+ def gather_data(args):
25612561+ unpacked_size, packed_size = GitUtils.get_blob_sizes()
25622562+ stats = {'names': collections.defaultdict(set),
25632563+ 'allnames' : set(),
25642564+ 'file_deletions': {},
25652565+ 'tree_deletions': {},
25662566+ 'equivalence': {},
25672567+ 'rename_history': collections.defaultdict(set),
25682568+ 'unpacked_size': unpacked_size,
25692569+ 'packed_size': packed_size,
25702570+ 'num_commits': 0}
25712571+25722572+ # Setup the rev-list/diff-tree process
25732573+ processed_commits_msg = _("Processed %d commits")
25742574+ commit_parse_progress = ProgressWriter()
25752575+ num_commits = 0
25762576+ cmd = ('git rev-list --topo-order --reverse {}'.format(' '.join(args.refs)) +
25772577+ ' | git diff-tree --stdin --always --root --format=%H%n%P%n%cd' +
25782578+ ' --date=short -M -t -c --raw --combined-all-paths')
25792579+ dtp = subproc.Popen(cmd, shell=True, bufsize=-1, stdout=subprocess.PIPE)
25802580+ f = dtp.stdout
25812581+ line = f.readline()
25822582+ if not line:
25832583+ raise SystemExit(_("Nothing to analyze; repository is empty."))
25842584+ cont = bool(line)
25852585+ graph = AncestryGraph()
25862586+ while cont:
25872587+ commit = line.rstrip()
25882588+ parents = f.readline().split()
25892589+ date = f.readline().rstrip()
25902590+25912591+ # We expect a blank line next; if we get a non-blank line then
25922592+ # this commit modified no files and we need to move on to the next.
25932593+ # If there is no line, we've reached end-of-input.
25942594+ line = f.readline()
25952595+ if not line:
25962596+ cont = False
25972597+ line = line.rstrip()
25982598+25992599+ # If we haven't reached end of input, and we got a blank line meaning
26002600+ # a commit that has modified files, then get the file changes associated
26012601+ # with this commit.
26022602+ file_changes = []
26032603+ if cont and not line:
26042604+ cont = False
26052605+ for line in f:
26062606+ if not line.startswith(b':'):
26072607+ cont = True
26082608+ break
26092609+ n = 1+max(1, len(parents))
26102610+ assert line.startswith(b':'*(n-1))
26112611+ relevant = line[n-1:-1]
26122612+ splits = relevant.split(None, n)
26132613+ modes = splits[0:n]
26142614+ splits = splits[n].split(None, n)
26152615+ shas = splits[0:n]
26162616+ splits = splits[n].split(b'\t')
26172617+ change_types = splits[0]
26182618+ filenames = [PathQuoting.dequote(x) for x in splits[1:]]
26192619+ file_changes.append([modes, shas, change_types, filenames])
26202620+26212621+ # If someone is trying to analyze a subset of the history, make sure
26222622+ # to avoid dying on commits with parents that we haven't seen before
26232623+ if args.refs:
26242624+ graph.record_external_commits([p for p in parents
26252625+ if not p in graph.value])
26262626+26272627+ # Analyze this commit and update progress
26282628+ RepoAnalyze.analyze_commit(stats, graph, commit, parents, date,
26292629+ file_changes)
26302630+ num_commits += 1
26312631+ commit_parse_progress.show(processed_commits_msg % num_commits)
26322632+26332633+ # Show the final commits processed message and record the number of commits
26342634+ commit_parse_progress.finish()
26352635+ stats['num_commits'] = num_commits
26362636+26372637+ # Close the output, ensure rev-list|diff-tree pipeline completed successfully
26382638+ dtp.stdout.close()
26392639+ if dtp.wait():
26402640+ raise SystemExit(_("Error: rev-list|diff-tree pipeline failed; see above.")) # pragma: no cover
26412641+26422642+ return stats
26432643+26442644+ @staticmethod
26452645+ def write_report(reportdir, stats):
26462646+ def datestr(datetimestr):
26472647+ return datetimestr if datetimestr else _('<present>').encode()
26482648+26492649+ def dirnames(path):
26502650+ while True:
26512651+ path = os.path.dirname(path)
26522652+ yield path
26532653+ if path == b'':
26542654+ break
26552655+26562656+ # Compute aggregate size information for paths, extensions, and dirs
26572657+ total_size = {'packed': 0, 'unpacked': 0}
26582658+ path_size = {'packed': collections.defaultdict(int),
26592659+ 'unpacked': collections.defaultdict(int)}
26602660+ ext_size = {'packed': collections.defaultdict(int),
26612661+ 'unpacked': collections.defaultdict(int)}
26622662+ dir_size = {'packed': collections.defaultdict(int),
26632663+ 'unpacked': collections.defaultdict(int)}
26642664+ for sha in stats['names']:
26652665+ size = {'packed': stats['packed_size'][sha],
26662666+ 'unpacked': stats['unpacked_size'][sha]}
26672667+ for which in ('packed', 'unpacked'):
26682668+ for name in stats['names'][sha]:
26692669+ total_size[which] += size[which]
26702670+ path_size[which][name] += size[which]
26712671+ basename, ext = os.path.splitext(name)
26722672+ ext_size[which][ext] += size[which]
26732673+ for dirname in dirnames(name):
26742674+ dir_size[which][dirname] += size[which]
26752675+26762676+ # Determine if and when extensions and directories were deleted
26772677+ ext_deleted_data = {}
26782678+ for name in stats['allnames']:
26792679+ when = stats['file_deletions'].get(name, None)
26802680+26812681+ # Update the extension
26822682+ basename, ext = os.path.splitext(name)
26832683+ if when is None:
26842684+ ext_deleted_data[ext] = None
26852685+ elif ext in ext_deleted_data:
26862686+ if ext_deleted_data[ext] is not None:
26872687+ ext_deleted_data[ext] = max(ext_deleted_data[ext], when)
26882688+ else:
26892689+ ext_deleted_data[ext] = when
26902690+26912691+ dir_deleted_data = {}
26922692+ for name in dir_size['packed']:
26932693+ dir_deleted_data[name] = stats['tree_deletions'].get(name, None)
26942694+26952695+ with open(os.path.join(reportdir, b"README"), 'bw') as f:
26962696+ # Give a basic overview of this file
26972697+ f.write(b"== %s ==\n" % _("Overall Statistics").encode())
26982698+ f.write((" %s: %d\n" % (_("Number of commits"),
26992699+ stats['num_commits'])).encode())
27002700+ f.write((" %s: %d\n" % (_("Number of filenames"),
27012701+ len(path_size['packed']))).encode())
27022702+ f.write((" %s: %d\n" % (_("Number of directories"),
27032703+ len(dir_size['packed']))).encode())
27042704+ f.write((" %s: %d\n" % (_("Number of file extensions"),
27052705+ len(ext_size['packed']))).encode())
27062706+ f.write(b"\n")
27072707+ f.write((" %s: %d\n" % (_("Total unpacked size (bytes)"),
27082708+ total_size['unpacked'])).encode())
27092709+ f.write((" %s: %d\n" % (_("Total packed size (bytes)"),
27102710+ total_size['packed'])).encode())
27112711+ f.write(b"\n")
27122712+27132713+ # Mention issues with the report
27142714+ f.write(("== %s ==\n" % _("Caveats")).encode())
27152715+ f.write(("=== %s ===\n" % _("Sizes")).encode())
27162716+ f.write(textwrap.dedent(_("""
27172717+ Packed size represents what size your repository would be if no
27182718+ trees, commits, tags, or other metadata were included (though it may
27192719+ fail to represent de-duplication; see below). It also represents the
27202720+ current packing, which may be suboptimal if you haven't gc'ed for a
27212721+ while.
27222722+27232723+ Unpacked size represents what size your repository would be if no
27242724+ trees, commits, tags, or other metadata were included AND if no
27252725+ files were packed; i.e., without delta-ing or compression.
27262726+27272727+ Both unpacked and packed sizes can be slightly misleading. Deleting
27282728+ a blob from history not save as much space as the unpacked size,
27292729+ because it is obviously normally stored in packed form. Also,
27302730+ deleting a blob from history may not save as much space as its packed
27312731+ size either, because another blob could be stored as a delta against
27322732+ that blob, so when you remove one blob another blob's packed size may
27332733+ grow.
27342734+27352735+ Also, the sum of the packed sizes can add up to more than the
27362736+ repository size; if the same contents appeared in the repository in
27372737+ multiple places, git will automatically de-dupe and store only one
27382738+ copy, while the way sizes are added in this analysis adds the size
27392739+ for each file path that has those contents. Further, if a file is
27402740+ ever reverted to a previous version's contents, the previous
27412741+ version's size will be counted multiple times in this analysis, even
27422742+ though git will only store it once.
27432743+ """)[1:]).encode())
27442744+ f.write(b"\n")
27452745+ f.write(("=== %s ===\n" % _("Deletions")).encode())
27462746+ f.write(textwrap.dedent(_("""
27472747+ Whether a file is deleted is not a binary quality, since it can be
27482748+ deleted on some branches but still exist in others. Also, it might
27492749+ exist in an old tag, but have been deleted in versions newer than
27502750+ that. More thorough tracking could be done, including looking at
27512751+ merge commits where one side of history deleted and the other modified,
27522752+ in order to give a more holistic picture of deletions. However, that
27532753+ algorithm would not only be more complex to implement, it'd also be
27542754+ quite difficult to present and interpret by users. Since --analyze
27552755+ is just about getting a high-level rough picture of history, it instead
27562756+ implements the simplistic rule that is good enough for 98% of cases:
27572757+ A file is marked as deleted if the last commit in the fast-export
27582758+ stream that mentions the file lists it as deleted.
27592759+ This makes it dependent on topological ordering, but generally gives
27602760+ the "right" answer.
27612761+ """)[1:]).encode())
27622762+ f.write(b"\n")
27632763+ f.write(("=== %s ===\n" % _("Renames")).encode())
27642764+ f.write(textwrap.dedent(_("""
27652765+ Renames share the same non-binary nature that deletions do, plus
27662766+ additional challenges:
27672767+ * If the renamed file is renamed again, instead of just two names for
27682768+ a path you can have three or more.
27692769+ * Rename pairs of the form (oldname, newname) that we consider to be
27702770+ different names of the "same file" might only be valid over certain
27712771+ commit ranges. For example, if a new commit reintroduces a file
27722772+ named oldname, then new versions of oldname aren't the "same file"
27732773+ anymore. We could try to portray this to the user, but it's easier
27742774+ for the user to just break the pairing and only report unbroken
27752775+ rename pairings to the user.
27762776+ * The ability for users to rename files differently in different
27772777+ branches means that our chains of renames will not necessarily be
27782778+ linear but may branch out.
27792779+ """)[1:]).encode())
27802780+ f.write(b"\n")
27812781+27822782+ # Equivalence classes for names, so if folks only want to keep a
27832783+ # certain set of paths, they know the old names they want to include
27842784+ # too.
27852785+ with open(os.path.join(reportdir, b"renames.txt"), 'bw') as f:
27862786+ seen = set()
27872787+ for pathname,equiv_group in sorted(stats['equivalence'].items(),
27882788+ key=lambda x:(x[1], x[0])):
27892789+ if equiv_group in seen:
27902790+ continue
27912791+ seen.add(equiv_group)
27922792+ f.write(("{} ->\n ".format(decode(equiv_group[0])) +
27932793+ "\n ".join(decode(x) for x in equiv_group[1:]) +
27942794+ "\n").encode())
27952795+27962796+ # List directories in reverse sorted order of unpacked size
27972797+ with open(os.path.join(reportdir, b"directories-deleted-sizes.txt"), 'bw') as f:
27982798+ msg = "=== %s ===\n" % _("Deleted directories by reverse size")
27992799+ f.write(msg.encode())
28002800+ msg = _("Format: unpacked size, packed size, date deleted, directory name\n")
28012801+ f.write(msg.encode())
28022802+ for dirname, size in sorted(dir_size['packed'].items(),
28032803+ key=lambda x:(x[1],x[0]), reverse=True):
28042804+ if (dir_deleted_data[dirname]):
28052805+ f.write(b" %10d %10d %-10s %s\n" % (dir_size['unpacked'][dirname],
28062806+ size,
28072807+ datestr(dir_deleted_data[dirname]),
28082808+ dirname or _('<toplevel>').encode()))
28092809+28102810+ with open(os.path.join(reportdir, b"directories-all-sizes.txt"), 'bw') as f:
28112811+ f.write(("=== %s ===\n" % _("All directories by reverse size")).encode())
28122812+ msg = _("Format: unpacked size, packed size, date deleted, directory name\n")
28132813+ f.write(msg.encode())
28142814+ for dirname, size in sorted(dir_size['packed'].items(),
28152815+ key=lambda x:(x[1],x[0]), reverse=True):
28162816+ f.write(b" %10d %10d %-10s %s\n" % (dir_size['unpacked'][dirname],
28172817+ size,
28182818+ datestr(dir_deleted_data[dirname]),
28192819+ dirname or _("<toplevel>").encode()))
28202820+28212821+ # List extensions in reverse sorted order of unpacked size
28222822+ with open(os.path.join(reportdir, b"extensions-deleted-sizes.txt"), 'bw') as f:
28232823+ msg = "=== %s ===\n" % _("Deleted extensions by reverse size")
28242824+ f.write(msg.encode())
28252825+ msg = _("Format: unpacked size, packed size, date deleted, extension name\n")
28262826+ f.write(msg.encode())
28272827+ for extname, size in sorted(ext_size['packed'].items(),
28282828+ key=lambda x:(x[1],x[0]), reverse=True):
28292829+ if (ext_deleted_data[extname]):
28302830+ f.write(b" %10d %10d %-10s %s\n" % (ext_size['unpacked'][extname],
28312831+ size,
28322832+ datestr(ext_deleted_data[extname]),
28332833+ extname or _('<no extension>').encode()))
28342834+28352835+ with open(os.path.join(reportdir, b"extensions-all-sizes.txt"), 'bw') as f:
28362836+ f.write(("=== %s ===\n" % _("All extensions by reverse size")).encode())
28372837+ msg = _("Format: unpacked size, packed size, date deleted, extension name\n")
28382838+ f.write(msg.encode())
28392839+ for extname, size in sorted(ext_size['packed'].items(),
28402840+ key=lambda x:(x[1],x[0]), reverse=True):
28412841+ f.write(b" %10d %10d %-10s %s\n" % (ext_size['unpacked'][extname],
28422842+ size,
28432843+ datestr(ext_deleted_data[extname]),
28442844+ extname or _('<no extension>').encode()))
28452845+28462846+ # List files in reverse sorted order of unpacked size
28472847+ with open(os.path.join(reportdir, b"path-deleted-sizes.txt"), 'bw') as f:
28482848+ msg = "=== %s ===\n" % _("Deleted paths by reverse accumulated size")
28492849+ f.write(msg.encode())
28502850+ msg = _("Format: unpacked size, packed size, date deleted, path name(s)\n")
28512851+ f.write(msg.encode())
28522852+ for pathname, size in sorted(path_size['packed'].items(),
28532853+ key=lambda x:(x[1],x[0]), reverse=True):
28542854+ when = stats['file_deletions'].get(pathname, None)
28552855+ if when:
28562856+ f.write(b" %10d %10d %-10s %s\n" % (path_size['unpacked'][pathname],
28572857+ size,
28582858+ datestr(when),
28592859+ pathname))
28602860+28612861+ with open(os.path.join(reportdir, b"path-all-sizes.txt"), 'bw') as f:
28622862+ msg = "=== %s ===\n" % _("All paths by reverse accumulated size")
28632863+ f.write(msg.encode())
28642864+ msg = _("Format: unpacked size, packed size, date deleted, path name\n")
28652865+ f.write(msg.encode())
28662866+ for pathname, size in sorted(path_size['packed'].items(),
28672867+ key=lambda x:(x[1],x[0]), reverse=True):
28682868+ when = stats['file_deletions'].get(pathname, None)
28692869+ f.write(b" %10d %10d %-10s %s\n" % (path_size['unpacked'][pathname],
28702870+ size,
28712871+ datestr(when),
28722872+ pathname))
28732873+28742874+ # List of filenames and sizes in descending order
28752875+ with open(os.path.join(reportdir, b"blob-shas-and-paths.txt"), 'bw') as f:
28762876+ f.write(("=== %s ===\n" % _("Files by sha and associated pathnames in reverse size")).encode())
28772877+ f.write(_("Format: sha, unpacked size, packed size, filename(s) object stored as\n").encode())
28782878+ for sha, size in sorted(stats['packed_size'].items(),
28792879+ key=lambda x:(x[1],x[0]), reverse=True):
28802880+ if sha not in stats['names']:
28812881+ # Some objects in the repository might not be referenced, or not
28822882+ # referenced by the branches/tags the user cares about; skip them.
28832883+ continue
28842884+ names_with_sha = stats['names'][sha]
28852885+ if len(names_with_sha) == 1:
28862886+ names_with_sha = names_with_sha.pop()
28872887+ else:
28882888+ names_with_sha = b'[' + b', '.join(sorted(names_with_sha)) + b']'
28892889+ f.write(b" %s %10d %10d %s\n" % (sha,
28902890+ stats['unpacked_size'][sha],
28912891+ size,
28922892+ names_with_sha))
28932893+28942894+ @staticmethod
28952895+ def run(args):
28962896+ if args.report_dir:
28972897+ reportdir = args.report_dir
28982898+ else:
28992899+ git_dir = GitUtils.determine_git_dir(b'.')
29002900+29012901+ # Create the report directory as necessary
29022902+ results_tmp_dir = os.path.join(git_dir, b'filter-repo')
29032903+ if not os.path.isdir(results_tmp_dir):
29042904+ os.mkdir(results_tmp_dir)
29052905+ reportdir = os.path.join(results_tmp_dir, b"analysis")
29062906+29072907+ if os.path.isdir(reportdir):
29082908+ if args.force:
29092909+ sys.stdout.write(_("Warning: Removing recursively: \"%s\"\n") % decode(reportdir))
29102910+ shutil.rmtree(reportdir)
29112911+ else:
29122912+ sys.stdout.write(_("Error: dir already exists (use --force to delete): \"%s\"\n") % decode(reportdir))
29132913+ sys.exit(1)
29142914+29152915+ os.mkdir(reportdir)
29162916+29172917+ # Gather the data we need
29182918+ stats = RepoAnalyze.gather_data(args)
29192919+29202920+ # Write the reports
29212921+ sys.stdout.write(_("Writing reports to \"%s\"...") % decode(reportdir))
29222922+ sys.stdout.flush()
29232923+ RepoAnalyze.write_report(reportdir, stats)
29242924+ sys.stdout.write(_("done.\n"))
29252925+ sys.stdout.write(_("README: \"%s\"\n") % decode( os.path.join(reportdir, b"README") ))
29262926+29272927+class FileInfoValueHelper:
29282928+ def __init__(self, replace_text, insert_blob_func, source_working_dir):
29292929+ self.data = {}
29302930+ self._replace_text = replace_text
29312931+ self._insert_blob_func = insert_blob_func
29322932+ cmd = ['git', 'cat-file', '--batch-command']
29332933+ self._cat_file_process = subproc.Popen(cmd,
29342934+ stdin = subprocess.PIPE,
29352935+ stdout = subprocess.PIPE,
29362936+ cwd = source_working_dir)
29372937+29382938+ def finalize(self):
29392939+ self._cat_file_process.stdin.close()
29402940+ self._cat_file_process.wait()
29412941+29422942+ def get_contents_by_identifier(self, blobhash):
29432943+ self._cat_file_process.stdin.write(b'contents '+blobhash+b'\n')
29442944+ self._cat_file_process.stdin.flush()
29452945+ line = self._cat_file_process.stdout.readline()
29462946+ try:
29472947+ (oid, oidtype, size) = line.split()
29482948+ except ValueError:
29492949+ assert(line == blobhash+b" missing\n")
29502950+ return None
29512951+ size = int(size) # Convert e.g. b'6283' to 6283
29522952+ assert(oidtype == b'blob')
29532953+ contents_plus_newline = self._cat_file_process.stdout.read(size+1)
29542954+ return contents_plus_newline[:-1] # return all but the newline
29552955+29562956+ def get_size_by_identifier(self, blobhash):
29572957+ self._cat_file_process.stdin.write(b'info '+blobhash+b'\n')
29582958+ self._cat_file_process.stdin.flush()
29592959+ line = self._cat_file_process.stdout.readline()
29602960+ (oid, oidtype, size) = line.split()
29612961+ size = int(size) # Convert e.g. b'6283' to 6283
29622962+ assert(oidtype == b'blob')
29632963+ return size
29642964+29652965+ def insert_file_with_contents(self, contents):
29662966+ blob = Blob(contents)
29672967+ self._insert_blob_func(blob)
29682968+ return blob.id
29692969+29702970+ def is_binary(self, contents):
29712971+ return b"\0" in contents[0:8192]
29722972+29732973+ def apply_replace_text(self, contents):
29742974+ new_contents = contents
29752975+ for literal, replacement in self._replace_text['literals']:
29762976+ new_contents = new_contents.replace(literal, replacement)
29772977+ for regex, replacement in self._replace_text['regexes']:
29782978+ new_contents = regex.sub(replacement, new_contents)
29792979+ return new_contents
29802980+29812981+class LFSObjectTracker:
29822982+ class LFSObjs:
29832983+ def __init__(self):
29842984+ self.id_to_object_map = {}
29852985+ self.objects = set()
29862986+29872987+ def __init__(self, file_info, check_sources, check_targets):
29882988+ self.source_objects = LFSObjectTracker.LFSObjs()
29892989+ self.target_objects = LFSObjectTracker.LFSObjs()
29902990+ self.hash_to_object_map = {}
29912991+ self.file_info = file_info
29922992+ self.check_sources = check_sources
29932993+ self.check_targets = check_targets
29942994+ self.objects_orphaned = False
29952995+29962996+ def _get_lfs_values(self, contents):
29972997+ values = {}
29982998+ if len(contents) > 1024:
29992999+ return {}
30003000+ for line in contents.splitlines():
30013001+ try:
30023002+ (key, value) = line.split(b' ', 1)
30033003+ except ValueError:
30043004+ return {}
30053005+ if not values and key != b'version':
30063006+ return values
30073007+ values[key] = value
30083008+ return values
30093009+30103010+ def check_blob_data(self, contents, fast_export_id, source):
30113011+ if source and not self.check_sources:
30123012+ return
30133013+ mymap = self.source_objects if source else self.target_objects
30143014+ lfs_object_id = self._get_lfs_values(contents).get(b'oid')
30153015+ if lfs_object_id:
30163016+ mymap.id_to_object_map[fast_export_id] = lfs_object_id
30173017+30183018+ def check_file_change_data(self, git_id, source):
30193019+ if source and not self.check_sources:
30203020+ return
30213021+ mymap = self.source_objects if source else self.target_objects
30223022+ if isinstance(git_id, int):
30233023+ lfs_object_id = mymap.id_to_object_map.get(git_id)
30243024+ if lfs_object_id:
30253025+ mymap.objects.add(lfs_object_id)
30263026+ else:
30273027+ if git_id in self.hash_to_object_map:
30283028+ mymap.objects.add(self.hash_to_object_map[git_id])
30293029+ return
30303030+ size = self.file_info.get_size_by_identifier(git_id)
30313031+ if size >= 1024:
30323032+ return
30333033+ contents = self.file_info.get_contents_by_identifier(git_id)
30343034+ lfs_object_id = self._get_lfs_values(contents).get(b'oid')
30353035+ if lfs_object_id:
30363036+ self.hash_to_object_map[git_id] = lfs_object_id
30373037+ mymap.objects.add(lfs_object_id)
30383038+30393039+ def check_output_object(self, obj):
30403040+ if not self.check_targets:
30413041+ return
30423042+ if type(obj) == Blob:
30433043+ self.check_blob_data(obj.data, obj.id, False)
30443044+ elif type(obj) == Commit:
30453045+ for change in obj.file_changes:
30463046+ sys.stdout.flush()
30473047+ if change.type != b'M' or change.mode == b'160000':
30483048+ continue
30493049+ self.check_file_change_data(change.blob_id, False)
30503050+30513051+ def find_all_lfs_objects_in_repo(self, repo, source):
30523052+ if not source:
30533053+ self.file_info = FileInfoValueHelper(None, None, repo)
30543054+ p = subproc.Popen(["git", "rev-list", "--objects", "--all"],
30553055+ stdout=subprocess.PIPE, stderr=subprocess.PIPE,
30563056+ cwd=repo)
30573057+ for line in p.stdout.readlines():
30583058+ try:
30593059+ (git_oid, filename) = line.split()
30603060+ except ValueError:
30613061+ # Commit and tree objects only have oid
30623062+ continue
30633063+30643064+ mymap = self.source_objects if source else self.target_objects
30653065+ size = self.file_info.get_size_by_identifier(git_oid)
30663066+ if size >= 1024:
30673067+ continue
30683068+ contents = self.file_info.get_contents_by_identifier(git_oid)
30693069+ lfs_object_id = self._get_lfs_values(contents).get(b'oid')
30703070+ if lfs_object_id:
30713071+ mymap.objects.add(lfs_object_id)
30723072+ if not source:
30733073+ self.file_info.finalize()
30743074+30753075+class InputFileBackup:
30763076+ def __init__(self, input_file, output_file):
30773077+ self.input_file = input_file
30783078+ self.output_file = output_file
30793079+30803080+ def close(self):
30813081+ self.input_file.close()
30823082+ self.output_file.close()
30833083+30843084+ def read(self, size):
30853085+ output = self.input_file.read(size)
30863086+ self.output_file.write(output)
30873087+ return output
30883088+30893089+ def readline(self):
30903090+ line = self.input_file.readline()
30913091+ self.output_file.write(line)
30923092+ return line
30933093+30943094+class DualFileWriter:
30953095+ def __init__(self, file1, file2):
30963096+ self.file1 = file1
30973097+ self.file2 = file2
30983098+30993099+ def write(self, *args):
31003100+ self.file1.write(*args)
31013101+ self.file2.write(*args)
31023102+31033103+ def flush(self):
31043104+ self.file1.flush()
31053105+ self.file2.flush()
31063106+31073107+ def close(self):
31083108+ self.file1.close()
31093109+ self.file2.close()
31103110+31113111+class RepoFilter(object):
31123112+ def __init__(self,
31133113+ args,
31143114+ filename_callback = None,
31153115+ message_callback = None,
31163116+ name_callback = None,
31173117+ email_callback = None,
31183118+ refname_callback = None,
31193119+ blob_callback = None,
31203120+ commit_callback = None,
31213121+ tag_callback = None,
31223122+ reset_callback = None,
31233123+ done_callback = None,
31243124+ file_info_callback = None):
31253125+31263126+ self._args = args
31273127+31283128+ # Repo we are exporting
31293129+ self._repo_working_dir = None
31303130+31313131+ # Store callbacks for acting on objects printed by FastExport
31323132+ self._blob_callback = blob_callback
31333133+ self._commit_callback = commit_callback
31343134+ self._tag_callback = tag_callback
31353135+ self._reset_callback = reset_callback
31363136+ self._done_callback = done_callback
31373137+31383138+ # Store callbacks for acting on slices of FastExport objects
31393139+ self._filename_callback = filename_callback # filenames from commits
31403140+ self._message_callback = message_callback # commit OR tag message
31413141+ self._name_callback = name_callback # author, committer, tagger
31423142+ self._email_callback = email_callback # author, committer, tagger
31433143+ self._refname_callback = refname_callback # from commit/tag/reset
31443144+ self._file_info_callback = file_info_callback # various file info
31453145+ self._handle_arg_callbacks()
31463146+31473147+ # Helpers for callbacks
31483148+ self._file_info_value = None
31493149+31503150+ # Defaults for input
31513151+ self._input = None
31523152+ self._fep = None # Fast Export Process
31533153+ self._fe_orig = None # Path to where original fast-export output stored
31543154+ self._fe_filt = None # Path to where filtered fast-export output stored
31553155+ self._parser = None # FastExportParser object we are working with
31563156+31573157+ # Defaults for output
31583158+ self._output = None
31593159+ self._fip = None # Fast Import Process
31603160+ self._import_pipes = None
31613161+ self._managed_output = True
31623162+31633163+ # A tuple of (depth, list-of-ancestors). Commits and ancestors are
31643164+ # identified by their id (their 'mark' in fast-export or fast-import
31653165+ # speak). The depth of a commit is one more than the max depth of any
31663166+ # of its ancestors.
31673167+ self._graph = AncestryGraph()
31683168+ # Another one, for ancestry of commits in the original repo
31693169+ self._orig_graph = AncestryGraph()
31703170+31713171+ # Names of files that were tweaked in any commit; such paths could lead
31723172+ # to subsequent commits being empty
31733173+ self._files_tweaked = set()
31743174+31753175+ # A set of commit hash pairs (oldhash, newhash) which used to be merge
31763176+ # commits but due to filtering were turned into non-merge commits.
31773177+ # The commits probably have suboptimal commit messages (e.g. "Merge branch
31783178+ # next into master").
31793179+ self._commits_no_longer_merges = []
31803180+31813181+ # A dict of original_ids to new_ids; filtering commits means getting
31823182+ # new commit hash (sha1sums), and we record the mapping both for
31833183+ # diagnostic purposes and so we can rewrite commit messages. Note that
31843184+ # the new_id can be None rather than a commit hash if the original
31853185+ # commit became empty and was pruned or was otherwise dropped.
31863186+ self._commit_renames = {}
31873187+31883188+ # A set of original_ids (i.e. original hashes) for which we have not yet
31893189+ # gotten the new hashses; the value is always the corresponding fast-export
31903190+ # id (i.e. commit.id)
31913191+ self._pending_renames = collections.OrderedDict()
31923192+31933193+ # A dict of commit_hash[0:7] -> set(commit_hashes with that prefix).
31943194+ #
31953195+ # It's common for commit messages to refer to commits by abbreviated
31963196+ # commit hashes, as short as 7 characters. To facilitate translating
31973197+ # such short hashes, we have a mapping of prefixes to full old hashes.
31983198+ self._commit_short_old_hashes = collections.defaultdict(set)
31993199+32003200+ # A set of commit hash references appearing in commit messages which
32013201+ # mapped to a valid commit that was removed entirely in the filtering
32023202+ # process. The commit message will continue to reference the
32033203+ # now-missing commit hash, since there was nothing to map it to.
32043204+ self._commits_referenced_but_removed = set()
32053205+32063206+ # Other vars related to metadata tracking
32073207+ self._already_ran = False
32083208+ self._changed_refs = set()
32093209+ self._lfs_object_tracker = None
32103210+32113211+ # Progress handling (number of commits parsed, etc.)
32123212+ self._progress_writer = ProgressWriter()
32133213+ self._num_commits = 0
32143214+32153215+ # Size of blobs in the repo
32163216+ self._unpacked_size = {}
32173217+32183218+ # Other vars
32193219+ self._sanity_checks_handled = False
32203220+ self._finalize_handled = False
32213221+ self._orig_refs = None
32223222+ self._config_settings = {}
32233223+ self._newnames = {}
32243224+ self._stash = None
32253225+32263226+ # Cache a few message translations for performance reasons
32273227+ self._parsed_message = _("Parsed %d commits")
32283228+32293229+ # Compile some regexes and cache those
32303230+ self._hash_re = re.compile(br'(\b[0-9a-f]{7,40}\b)')
32313231+32323232+ def _handle_arg_callbacks(self):
32333233+ def make_callback(args, bdy):
32343234+ callback_globals = {g: globals()[g] for g in public_globals}
32353235+ callback_locals = {}
32363236+ if type(args) == str:
32373237+ args = (args, '_do_not_use_this_var = None')
32383238+ exec('def callback({}):\n'.format(', '.join(args))+
32393239+ ' '+'\n '.join(bdy.splitlines()), callback_globals, callback_locals)
32403240+ return callback_locals['callback']
32413241+ def handle(which, args=None):
32423242+ which_under = which.replace('-','_')
32433243+ if not args:
32443244+ args = which
32453245+ callback_field = '_{}_callback'.format(which_under)
32463246+ code_string = getattr(self._args, which_under+'_callback')
32473247+ if code_string:
32483248+ if os.path.exists(code_string):
32493249+ with open(code_string, 'r', encoding='utf-8') as f:
32503250+ code_string = f.read()
32513251+ if getattr(self, callback_field):
32523252+ raise SystemExit(_("Error: Cannot pass a %s_callback to RepoFilter "
32533253+ "AND pass --%s-callback"
32543254+ % (which_under, which)))
32553255+ if 'return ' not in code_string and \
32563256+ which not in ('blob', 'commit', 'tag', 'reset'):
32573257+ raise SystemExit(_("Error: --%s-callback should have a return statement")
32583258+ % which)
32593259+ setattr(self, callback_field, make_callback(args, code_string))
32603260+ handle('filename')
32613261+ handle('message')
32623262+ handle('name')
32633263+ handle('email')
32643264+ handle('refname')
32653265+ handle('blob')
32663266+ handle('commit')
32673267+ handle('tag')
32683268+ handle('reset')
32693269+ handle('file-info', ('filename', 'mode', 'blob_id', 'value'))
32703270+32713271+ def _run_sanity_checks(self):
32723272+ self._sanity_checks_handled = True
32733273+ if not self._managed_output:
32743274+ if not self._args.replace_refs:
32753275+ # If not _managed_output we don't want to make extra changes to the
32763276+ # repo, so set default to no-op 'update-no-add'
32773277+ self._args.replace_refs = 'update-no-add'
32783278+ return
32793279+32803280+ if self._args.debug:
32813281+ print("[DEBUG] Passed arguments:\n{}".format(self._args))
32823282+32833283+ # Determine basic repository information
32843284+ target_working_dir = self._args.target or b'.'
32853285+ self._orig_refs = GitUtils.get_refs(target_working_dir)
32863286+ is_bare = GitUtils.is_repository_bare(target_working_dir)
32873287+ self._config_settings = GitUtils.get_config_settings(target_working_dir)
32883288+32893289+ # Determine if this is second or later run of filter-repo
32903290+ tmp_dir = self.results_tmp_dir(create_if_missing=False)
32913291+ ran_path = os.path.join(tmp_dir, b'already_ran')
32923292+ self._already_ran = os.path.isfile(ran_path)
32933293+ if self._already_ran:
32943294+ current_time = time.time()
32953295+ file_mod_time = os.path.getmtime(ran_path)
32963296+ file_age = current_time - file_mod_time
32973297+ if file_age > 86400: # file older than a day
32983298+ msg = (f"The previous run is older than a day ({decode(ran_path)} already exists).\n"
32993299+ f"See \"Already Ran\" section in the manual for more information.\n"
33003300+ f"Treat this run as a continuation of filtering in the previous run (Y/N)? ")
33013301+ response = input(msg)
33023302+33033303+ if response.lower() != 'y':
33043304+ os.remove(ran_path)
33053305+ self._already_ran = False
33063306+33073307+ # Interaction between --already-ran and --sensitive_data_removal
33083308+ msg = textwrap.dedent(_("""\
33093309+ Error: Cannot specify --sensitive-data-removal on a follow-up invocation
33103310+ of git-filter-repo unless it was specified in previously runs."""))
33113311+ if self._already_ran:
33123312+ sdr_path = os.path.join(tmp_dir, b'sensitive_data_removal')
33133313+ sdr_previously = os.path.isfile(sdr_path)
33143314+ if not sdr_previously and self._args.sensitive_data_removal:
33153315+ raise SystemExit(msg)
33163316+ # Treat this as a --sensitive-data-removal run if a previous run was,
33173317+ # even if it wasn't specified this time
33183318+ self._args.sensitive_data_removal = sdr_previously
33193319+33203320+ # Have to check sensitive_data_removal interactions here instead of
33213321+ # sanity_check_args because of the above interaction with already_ran stuff
33223322+ if self._args.sensitive_data_removal:
33233323+ if self._args.stdin:
33243324+ msg = _("Error: sensitive data removal is incompatible with --stdin")
33253325+ raise SystemExit(msg)
33263326+ if self._args.source or self._args.target:
33273327+ msg = _("Error: sensitive data removal is incompatible with --source and --target")
33283328+ raise SystemExit(msg)
33293329+33303330+ # Default for --replace-refs
33313331+ if not self._args.replace_refs:
33323332+ self._args.replace_refs = 'delete-no-add'
33333333+ if self._args.replace_refs == 'old-default':
33343334+ self._args.replace_refs = ('update-or-add' if self._already_ran
33353335+ else 'update-and-add')
33363336+33373337+ # Do sanity checks from the correct directory
33383338+ if not self._args.force and not self._already_ran:
33393339+ cwd = os.getcwd()
33403340+ os.chdir(target_working_dir)
33413341+ RepoFilter.sanity_check(self._orig_refs, is_bare, self._config_settings)
33423342+ os.chdir(cwd)
33433343+33443344+ def _setup_lfs_orphaning_checks(self):
33453345+ # Do a couple checks to see if we want to do lfs orphaning checks
33463346+ if not self._args.sensitive_data_removal:
33473347+ return
33483348+ metadata_dir = self.results_tmp_dir()
33493349+ lfs_objects_file = os.path.join(metadata_dir, b'original_lfs_objects')
33503350+ if self._already_ran:
33513351+ # Check if we did lfs filtering in the previous run
33523352+ if not os.path.isfile(lfs_objects_file):
33533353+ return
33543354+33553355+ # Set up self._file_info_value so we can query git for stuff
33563356+ source_working_dir = self._args.source or b'.'
33573357+ self._file_info_value = FileInfoValueHelper(self._args.replace_text,
33583358+ self.insert,
33593359+ source_working_dir)
33603360+33613361+ # One more check to see if we want to do lfs orphaning checks
33623362+ if not self._already_ran:
33633363+ # Check if lfs filtering is active in HEAD's .gitattributes file
33643364+ a = self._file_info_value.get_contents_by_identifier(b"HEAD:.gitattributes")
33653365+ if not a or not re.search(rb'\bfilter=lfs\b', a):
33663366+ return
33673367+33683368+ # Set up the object tracker
33693369+ check_sources = not self._already_ran and not self._args.partial
33703370+ check_targets = not self._args.partial
33713371+ self._lfs_object_tracker = LFSObjectTracker(self._file_info_value,
33723372+ check_sources,
33733373+ check_targets)
33743374+ self._parser._lfs_object_tracker = self._lfs_object_tracker # kinda gross
33753375+33763376+ # Get initial objects
33773377+ if self._already_ran:
33783378+ with open(lfs_objects_file, 'br') as f:
33793379+ for line in f:
33803380+ self._lfs_object_tracker.source_objects.objects.add(line.strip())
33813381+ elif self._args.partial:
33823382+ source = True
33833383+ self._lfs_object_tracker.find_all_lfs_objects_in_repo(source_working_dir,
33843384+ source)
33853385+33863386+ @staticmethod
33873387+ def loose_objects_are_replace_refs(git_dir, refs, num_loose_objects):
33883388+ replace_objects = set()
33893389+ for refname, rev in refs.items():
33903390+ if not refname.startswith(b'refs/replace/'):
33913391+ continue
33923392+ replace_objects.add(rev)
33933393+33943394+ validobj_re = re.compile(rb'^[0-9a-f]{40}$')
33953395+ object_dir=os.path.join(git_dir, b'objects')
33963396+ for root, dirs, files in os.walk(object_dir):
33973397+ for filename in files:
33983398+ objname = os.path.basename(root)+filename
33993399+ if objname not in replace_objects and validobj_re.match(objname):
34003400+ return False
34013401+34023402+ return True
34033403+34043404+ @staticmethod
34053405+ def sanity_check(refs, is_bare, config_settings):
34063406+ def abort(reason):
34073407+ dirname = config_settings.get(b'remote.origin.url', b'')
34083408+ msg = ""
34093409+ if dirname and os.path.isdir(dirname):
34103410+ msg = _("Note: when cloning local repositories, you need to pass\n"
34113411+ " --no-local to git clone to avoid this issue.\n")
34123412+ raise SystemExit(
34133413+ _("Aborting: Refusing to destructively overwrite repo history since\n"
34143414+ "this does not look like a fresh clone.\n"
34153415+ " (%s)\n%s"
34163416+ "Please operate on a fresh clone instead. If you want to proceed\n"
34173417+ "anyway, use --force.") % (reason, msg))
34183418+34193419+ # Avoid letting people running with weird setups and overwriting GIT_DIR
34203420+ # elsewhere
34213421+ git_dir = GitUtils.determine_git_dir(b'.')
34223422+ if is_bare and git_dir != b'.':
34233423+ abort(_("GIT_DIR must be ."))
34243424+ elif not is_bare and git_dir != b'.git':
34253425+ abort(_("GIT_DIR must be .git"))
34263426+34273427+ # Check for refname collisions
34283428+ if config_settings.get(b'core.ignorecase', b'false') == b'true':
34293429+ collisions = collections.defaultdict(list)
34303430+ for ref in refs:
34313431+ collisions[ref.lower()].append(ref)
34323432+ msg = ""
34333433+ for ref in collisions:
34343434+ if len(collisions[ref]) >= 2:
34353435+ msg += " " + decode(b", ".join(collisions[ref])) + "\n"
34363436+ if msg:
34373437+ raise SystemExit(
34383438+ _("Aborting: Cannot rewrite history on a case insensitive\n"
34393439+ "filesystem since you have refs that differ in case only:\n"
34403440+ "%s") % msg)
34413441+ if config_settings.get(b'core.precomposeunicode', b'false') == b'true':
34423442+ import unicodedata # Mac users need to have python-3.8
34433443+ collisions = collections.defaultdict(list)
34443444+ for ref in refs:
34453445+ strref = decode(ref)
34463446+ collisions[unicodedata.normalize('NFC', strref)].append(strref)
34473447+ msg = ""
34483448+ for ref in collisions:
34493449+ if len(collisions[ref]) >= 2:
34503450+ msg += " " + ", ".join(collisions[ref]) + "\n"
34513451+ if msg:
34523452+ raise SystemExit(
34533453+ _("Aborting: Cannot rewrite history on a character normalizing\n"
34543454+ "filesystem since you have refs that differ in normalization:\n"
34553455+ "%s") % msg)
34563456+34573457+ # Make sure repo is fully packed, just like a fresh clone would be.
34583458+ # Note that transfer.unpackLimit defaults to 100, meaning that a
34593459+ # repository with no packs and less than 100 objects should be considered
34603460+ # fully packed.
34613461+ output = subproc.check_output('git count-objects -v'.split())
34623462+ stats = dict(x.split(b': ') for x in output.splitlines())
34633463+ num_packs = int(stats[b'packs'])
34643464+ num_loose_objects = int(stats[b'count'])
34653465+ if num_packs > 1 or \
34663466+ num_loose_objects >= 100 or \
34673467+ (num_packs == 1 and num_loose_objects > 0 and
34683468+ not RepoFilter.loose_objects_are_replace_refs(git_dir, refs,
34693469+ num_loose_objects)):
34703470+ abort(_("expected freshly packed repo"))
34713471+34723472+ # Make sure there is precisely one remote, named "origin"...or that this
34733473+ # is a new bare repo with no packs and no remotes
34743474+ output = subproc.check_output('git remote'.split()).strip()
34753475+ if not (output == b"origin" or (num_packs == 0 and not output)):
34763476+ abort(_("expected one remote, origin"))
34773477+34783478+ # Make sure that all reflogs have precisely one entry
34793479+ reflog_dir=os.path.join(git_dir, b'logs')
34803480+ for root, dirs, files in os.walk(reflog_dir):
34813481+ for filename in files:
34823482+ pathname = os.path.join(root, filename)
34833483+ with open(pathname, 'br') as f:
34843484+ if len(f.read().splitlines()) > 1:
34853485+ shortpath = pathname[len(reflog_dir)+1:]
34863486+ abort(_("expected at most one entry in the reflog for %s") %
34873487+ decode(shortpath))
34883488+34893489+ # Make sure there are no stashed changes
34903490+ if b'refs/stash' in refs:
34913491+ abort(_("has stashed changes"))
34923492+34933493+ # Do extra checks in non-bare repos
34943494+ if not is_bare:
34953495+ # Avoid uncommitted, unstaged, or untracked changes
34963496+ if subproc.call('git diff --staged --quiet'.split()):
34973497+ abort(_("you have uncommitted changes"))
34983498+ if subproc.call('git diff --quiet'.split()):
34993499+ abort(_("you have unstaged changes"))
35003500+ untracked_output = subproc.check_output('git ls-files -o'.split())
35013501+ if len(untracked_output) > 0:
35023502+ uf = untracked_output.rstrip(b'\n').split(b'\n')
35033503+ # Since running git-filter-repo can result in files being written to
35043504+ # __pycache__ (depending on python version, env vars, etc.), let's
35053505+ # ignore those as far as "clean clone" is concerned.
35063506+ relevant_uf = [x for x in uf
35073507+ if not x.startswith(b'__pycache__/git_filter_repo.')]
35083508+ if len(relevant_uf) > 0:
35093509+ abort(_("you have untracked changes"))
35103510+35113511+ # Avoid unpushed changes
35123512+ for refname, rev in refs.items():
35133513+ if not refname.startswith(b'refs/heads/'):
35143514+ continue
35153515+ origin_ref = refname.replace(b'refs/heads/', b'refs/remotes/origin/')
35163516+ if origin_ref not in refs:
35173517+ abort(_('%s exists, but %s not found') % (decode(refname),
35183518+ decode(origin_ref)))
35193519+ if rev != refs[origin_ref]:
35203520+ abort(_('%s does not match %s') % (decode(refname),
35213521+ decode(origin_ref)))
35223522+35233523+ # Make sure there is only one worktree
35243524+ output = subproc.check_output('git worktree list'.split())
35253525+ if len(output.splitlines()) > 1:
35263526+ abort(_('you have multiple worktrees'))
35273527+35283528+ def cleanup(self, repo, repack, reset,
35293529+ run_quietly=False, show_debuginfo=False):
35303530+ ''' cleanup repo; if repack then expire reflogs and do a gc --prune=now.
35313531+ if reset then do a reset --hard. Optionally also curb output if
35323532+ run_quietly is True, or go the opposite direction and show extra
35333533+ output if show_debuginfo is True. '''
35343534+ assert not (run_quietly and show_debuginfo)
35353535+35363536+ if (repack and not run_quietly and not show_debuginfo):
35373537+ print(_("Repacking your repo and cleaning out old unneeded objects"))
35383538+ quiet_flags = '--quiet' if run_quietly else ''
35393539+ cleanup_cmds = []
35403540+ if repack:
35413541+ cleanup_cmds = ['git reflog expire --expire=now --all'.split(),
35423542+ 'git gc {} --prune=now'.format(quiet_flags).split()]
35433543+ if reset:
35443544+ cleanup_cmds.insert(0, 'git reset {} --hard'.format(quiet_flags).split())
35453545+ location_info = ' (in {})'.format(decode(repo)) if repo != b'.' else ''
35463546+ for cmd in cleanup_cmds:
35473547+ if show_debuginfo:
35483548+ print("[DEBUG] Running{}: {}".format(location_info, ' '.join(cmd)))
35493549+ ret = subproc.call(cmd, cwd=repo)
35503550+ if ret != 0:
35513551+ raise SystemExit("fatal: running '%s' failed!" % ' '.join(cmd))
35523552+ if cmd[0:3] == 'git reflog expire'.split():
35533553+ self._write_stash()
35543554+35553555+ def _get_rename(self, old_hash):
35563556+ # If we already know the rename, just return it
35573557+ new_hash = self._commit_renames.get(old_hash, None)
35583558+ if new_hash:
35593559+ return new_hash
35603560+35613561+ # If it's not in the remaining pending renames, we don't know it
35623562+ if old_hash is not None and old_hash not in self._pending_renames:
35633563+ return None
35643564+35653565+ # Read through the pending renames until we find it or we've read them all,
35663566+ # and return whatever we might find
35673567+ self._flush_renames(old_hash)
35683568+ return self._commit_renames.get(old_hash, None)
35693569+35703570+ def _flush_renames(self, old_hash=None, limit=0):
35713571+ # Parse through self._pending_renames until we have read enough. We have
35723572+ # read enough if:
35733573+ # self._pending_renames is empty
35743574+ # old_hash != None and we found a rename for old_hash
35753575+ # limit > 0 and len(self._pending_renames) started less than 2*limit
35763576+ # limit > 0 and len(self._pending_renames) < limit
35773577+ if limit and len(self._pending_renames) < 2 * limit:
35783578+ return
35793579+ fi_input, fi_output = self._import_pipes
35803580+ while self._pending_renames:
35813581+ orig_hash, new_fast_export_id = self._pending_renames.popitem(last=False)
35823582+ new_hash = fi_output.readline().rstrip()
35833583+ self._commit_renames[orig_hash] = new_hash
35843584+ self._graph.record_hash(new_fast_export_id, new_hash)
35853585+ if old_hash == orig_hash:
35863586+ return
35873587+ if limit and len(self._pending_renames) < limit:
35883588+ return
35893589+35903590+ def _translate_commit_hash(self, matchobj_or_oldhash):
35913591+ old_hash = matchobj_or_oldhash
35923592+ if not isinstance(matchobj_or_oldhash, bytes):
35933593+ old_hash = matchobj_or_oldhash.group(1)
35943594+ orig_len = len(old_hash)
35953595+ new_hash = self._get_rename(old_hash)
35963596+ if new_hash is None:
35973597+ if old_hash[0:7] not in self._commit_short_old_hashes:
35983598+ self._commits_referenced_but_removed.add(old_hash)
35993599+ return old_hash
36003600+ possibilities = self._commit_short_old_hashes[old_hash[0:7]]
36013601+ matches = [x for x in possibilities
36023602+ if x[0:orig_len] == old_hash]
36033603+ if len(matches) != 1:
36043604+ self._commits_referenced_but_removed.add(old_hash)
36053605+ return old_hash
36063606+ old_hash = matches[0]
36073607+ new_hash = self._get_rename(old_hash)
36083608+36093609+ assert new_hash is not None
36103610+ return new_hash[0:orig_len]
36113611+36123612+ def _maybe_trim_extra_parents(self, orig_parents, parents):
36133613+ '''Due to pruning of empty commits, some parents could be non-existent
36143614+ (None) or otherwise redundant. Remove the non-existent parents, and
36153615+ remove redundant parents ***SO LONG AS*** that doesn't transform a
36163616+ merge commit into a non-merge commit.
36173617+36183618+ Returns a tuple:
36193619+ (parents, new_first_parent_if_would_become_non_merge)'''
36203620+36213621+ always_prune = (self._args.prune_degenerate == 'always')
36223622+36233623+ # Pruning of empty commits means multiple things:
36243624+ # * An original parent of this commit may have been pruned causing the
36253625+ # need to rewrite the reported parent to the nearest ancestor. We
36263626+ # want to know when we're dealing with such a parent.
36273627+ # * Further, there may be no "nearest ancestor" if the entire history
36283628+ # of that parent was also pruned. (Detectable by the parent being
36293629+ # 'None')
36303630+ # Remove all parents rewritten to None, and keep track of which parents
36313631+ # were rewritten to an ancestor.
36323632+ tmp = zip(parents,
36333633+ orig_parents,
36343634+ [(x in _SKIPPED_COMMITS or always_prune) for x in orig_parents])
36353635+ tmp2 = [x for x in tmp if x[0] is not None]
36363636+ if not tmp2:
36373637+ # All ancestors have been pruned; we have no parents.
36383638+ return [], None
36393639+ parents, orig_parents, is_rewritten = [list(x) for x in zip(*tmp2)]
36403640+36413641+ # We can't have redundant parents if we don't have at least 2 parents
36423642+ if len(parents) < 2:
36433643+ return parents, None
36443644+36453645+ # Don't remove redundant parents if user doesn't want us to
36463646+ if self._args.prune_degenerate == 'never':
36473647+ return parents, None
36483648+36493649+ # Remove duplicate parents (if both sides of history have lots of commits
36503650+ # which become empty due to pruning, the most recent ancestor on both
36513651+ # sides may be the same commit), except only remove parents that have
36523652+ # been rewritten due to previous empty pruning.
36533653+ seen = set()
36543654+ seen_add = seen.add
36553655+ # Deleting duplicate rewritten parents means keeping parents if either
36563656+ # they have not been seen or they are ones that have not been rewritten.
36573657+ parents_copy = parents
36583658+ uniq = [[p, orig_parents[i], is_rewritten[i]] for i, p in enumerate(parents)
36593659+ if not (p in seen or seen_add(p)) or not is_rewritten[i]]
36603660+ parents, orig_parents, is_rewritten = [list(x) for x in zip(*uniq)]
36613661+ if len(parents) < 2:
36623662+ return parents_copy, parents[0]
36633663+36643664+ # Flatten unnecessary merges. (If one side of history is entirely
36653665+ # empty commits that were pruned, we may end up attempting to
36663666+ # merge a commit with its ancestor. Remove parents that are an
36673667+ # ancestor of another parent.)
36683668+ num_parents = len(parents)
36693669+ to_remove = []
36703670+ for cur in range(num_parents):
36713671+ if not is_rewritten[cur]:
36723672+ continue
36733673+ for other in range(num_parents):
36743674+ if cur == other:
36753675+ continue
36763676+ if not self._graph.is_ancestor(parents[cur], parents[other]):
36773677+ continue
36783678+ # parents[cur] is an ancestor of parents[other], so parents[cur]
36793679+ # seems redundant. However, if it was intentionally redundant
36803680+ # (e.g. a no-ff merge) in the original, then we want to keep it.
36813681+ if not always_prune and \
36823682+ self._orig_graph.is_ancestor(orig_parents[cur],
36833683+ orig_parents[other]):
36843684+ continue
36853685+ # Some folks want their history to have all first parents be merge
36863686+ # commits (except for any root commits), and always do a merge --no-ff.
36873687+ # For such folks, don't remove the first parent even if it's an
36883688+ # ancestor of other commits.
36893689+ if self._args.no_ff and cur == 0:
36903690+ continue
36913691+ # Okay so the cur-th parent is an ancestor of the other-th parent,
36923692+ # and it wasn't that way in the original repository; mark the
36933693+ # cur-th parent as removable.
36943694+ to_remove.append(cur)
36953695+ break # cur removed, so skip rest of others -- i.e. check cur+=1
36963696+ for x in reversed(to_remove):
36973697+ parents.pop(x)
36983698+ if len(parents) < 2:
36993699+ return parents_copy, parents[0]
37003700+37013701+ return parents, None
37023702+37033703+ def _prunable(self, commit, new_1st_parent, had_file_changes, orig_parents):
37043704+ parents = commit.parents
37053705+37063706+ if self._args.prune_empty == 'never':
37073707+ return False
37083708+ always_prune = (self._args.prune_empty == 'always')
37093709+37103710+ # For merge commits, unless there are prunable (redundant) parents, we
37113711+ # do not want to prune
37123712+ if len(parents) >= 2 and not new_1st_parent:
37133713+ return False
37143714+37153715+ if len(parents) < 2:
37163716+ # Special logic for commits that started empty...
37173717+ if not had_file_changes and not always_prune:
37183718+ had_parents_pruned = (len(parents) < len(orig_parents) or
37193719+ (len(orig_parents) == 1 and
37203720+ orig_parents[0] in _SKIPPED_COMMITS))
37213721+ # If the commit remains empty and had parents which were pruned,
37223722+ # then prune this commit; otherwise, retain it
37233723+ return (not commit.file_changes and had_parents_pruned)
37243724+37253725+ # We can only get here if the commit didn't start empty, so if it's
37263726+ # empty now, it obviously became empty
37273727+ if not commit.file_changes:
37283728+ return True
37293729+37303730+ # If there are no parents of this commit and we didn't match the case
37313731+ # above, then this commit cannot be pruned. Since we have no parent(s)
37323732+ # to compare to, abort now to prevent future checks from failing.
37333733+ if not parents:
37343734+ return False
37353735+37363736+ # Similarly, we cannot handle the hard cases if we don't have a pipe
37373737+ # to communicate with fast-import
37383738+ if not self._import_pipes:
37393739+ return False
37403740+37413741+ # If there have not been renames/remappings of IDs (due to insertion of
37423742+ # new blobs), then we can sometimes know things aren't prunable with a
37433743+ # simple check
37443744+ if not _IDS.has_renames():
37453745+ # non-merge commits can only be empty if blob/file-change editing caused
37463746+ # all file changes in the commit to have the same file contents as
37473747+ # the parent.
37483748+ changed_files = set(change.filename for change in commit.file_changes)
37493749+ if len(orig_parents) < 2 and changed_files - self._files_tweaked:
37503750+ return False
37513751+37523752+ # Finally, the hard case: due to either blob rewriting, or due to pruning
37533753+ # of empty commits wiping out the first parent history back to the merge
37543754+ # base, the list of file_changes we have may not actually differ from our
37553755+ # (new) first parent's version of the files, i.e. this would actually be
37563756+ # an empty commit. Check by comparing the contents of this commit to its
37573757+ # (remaining) parent.
37583758+ #
37593759+ # NOTE on why this works, for the case of original first parent history
37603760+ # having been pruned away due to being empty:
37613761+ # The first parent history having been pruned away due to being
37623762+ # empty implies the original first parent would have a tree (after
37633763+ # filtering) that matched the merge base's tree. Since
37643764+ # file_changes has the changes needed to go from what would have
37653765+ # been the first parent to our new commit, and what would have been
37663766+ # our first parent has a tree that matches the merge base, then if
37673767+ # the new first parent has a tree matching the versions of files in
37683768+ # file_changes, then this new commit is empty and thus prunable.
37693769+ fi_input, fi_output = self._import_pipes
37703770+ self._flush_renames() # Avoid fi_output having other stuff present
37713771+ # Optimization note: we could have two loops over file_changes, the
37723772+ # first doing all the self._output.write() calls, and the second doing
37733773+ # the rest. But I'm worried about fast-import blocking on fi_output
37743774+ # buffers filling up so I instead read from it as I go.
37753775+ for change in commit.file_changes:
37763776+ parent = new_1st_parent or commit.parents[0] # exists due to above checks
37773777+ quoted_filename = PathQuoting.enquote(change.filename)
37783778+ if isinstance(parent, int):
37793779+ self._output.write(b"ls :%d %s\n" % (parent, quoted_filename))
37803780+ else:
37813781+ self._output.write(b"ls %s %s\n" % (parent, quoted_filename))
37823782+ self._output.flush()
37833783+ parent_version = fi_output.readline().split()
37843784+ if change.type == b'D':
37853785+ if parent_version != [b'missing', quoted_filename]:
37863786+ return False
37873787+ else:
37883788+ blob_sha = change.blob_id
37893789+ if isinstance(change.blob_id, int):
37903790+ self._output.write(b"get-mark :%d\n" % change.blob_id)
37913791+ self._output.flush()
37923792+ blob_sha = fi_output.readline().rstrip()
37933793+ if parent_version != [change.mode, b'blob', blob_sha, quoted_filename]:
37943794+ return False
37953795+37963796+ return True
37973797+37983798+ def _record_remapping(self, commit, orig_parents):
37993799+ new_id = None
38003800+ # Record the mapping of old commit hash to new one
38013801+ if commit.original_id and self._import_pipes:
38023802+ fi_input, fi_output = self._import_pipes
38033803+ self._output.write(b"get-mark :%d\n" % commit.id)
38043804+ self._output.flush()
38053805+ orig_id = commit.original_id
38063806+ self._commit_short_old_hashes[orig_id[0:7]].add(orig_id)
38073807+ # Note that we have queued up an id for later reading; flush a
38083808+ # few of the older ones if we have too many queued up
38093809+ self._pending_renames[orig_id] = commit.id
38103810+ self._flush_renames(None, limit=40)
38113811+ # Also, record if this was a merge commit that turned into a non-merge
38123812+ # commit.
38133813+ if len(orig_parents) >= 2 and len(commit.parents) < 2:
38143814+ self._commits_no_longer_merges.append((commit.original_id, new_id))
38153815+38163816+ def callback_metadata(self, extra_items = dict()):
38173817+ return {'commit_rename_func': self._translate_commit_hash,
38183818+ 'ancestry_graph': self._graph,
38193819+ 'original_ancestry_graph': self._orig_graph,
38203820+ **extra_items}
38213821+38223822+ def _tweak_blob(self, blob):
38233823+ if self._args.max_blob_size and len(blob.data) > self._args.max_blob_size:
38243824+ blob.skip()
38253825+38263826+ if blob.original_id in self._args.strip_blobs_with_ids:
38273827+ blob.skip()
38283828+38293829+ if ( self._args.replace_text
38303830+ and not self._file_info_callback
38313831+ # not (if blob contains zero byte in the first 8Kb, that is, if blob is binary data)
38323832+ and not b"\0" in blob.data[0:8192]
38333833+ ):
38343834+ for literal, replacement in self._args.replace_text['literals']:
38353835+ blob.data = blob.data.replace(literal, replacement)
38363836+ for regex, replacement in self._args.replace_text['regexes']:
38373837+ blob.data = regex.sub(replacement, blob.data)
38383838+38393839+ if self._blob_callback:
38403840+ self._blob_callback(blob, self.callback_metadata())
38413841+38423842+ self._insert_into_stream(blob)
38433843+38443844+ def _filter_files(self, commit):
38453845+ def filename_matches(path_expression, pathname):
38463846+ ''' Returns whether path_expression matches pathname or a leading
38473847+ directory thereof, allowing path_expression to not have a trailing
38483848+ slash even if it is meant to match a leading directory. '''
38493849+ if path_expression == b'':
38503850+ return True
38513851+ n = len(path_expression)
38523852+ if (pathname.startswith(path_expression) and
38533853+ (path_expression[n-1:n] == b'/' or
38543854+ len(pathname) == n or
38553855+ pathname[n:n+1] == b'/')):
38563856+ return True
38573857+ return False
38583858+38593859+ def newname(path_changes, pathname, use_base_name, filtering_is_inclusive):
38603860+ ''' Applies filtering and rename changes from path_changes to pathname,
38613861+ returning any of None (file isn't wanted), original filename (file
38623862+ is wanted with original name), or new filename. '''
38633863+ wanted = False
38643864+ full_pathname = pathname
38653865+ if use_base_name:
38663866+ pathname = os.path.basename(pathname)
38673867+ for (mod_type, match_type, path_exp) in path_changes:
38683868+ if mod_type == 'filter' and not wanted:
38693869+ assert match_type in ('match', 'glob', 'regex')
38703870+ if match_type == 'match' and filename_matches(path_exp, pathname):
38713871+ wanted = True
38723872+ if match_type == 'glob' and fnmatch.fnmatch(pathname, path_exp):
38733873+ wanted = True
38743874+ if match_type == 'regex' and path_exp.search(pathname):
38753875+ wanted = True
38763876+ elif mod_type == 'rename':
38773877+ match, repl = path_exp
38783878+ assert match_type in ('match','regex') # glob was translated to regex
38793879+ if match_type == 'match' and filename_matches(match, full_pathname):
38803880+ full_pathname = full_pathname.replace(match, repl, 1)
38813881+ pathname = full_pathname # rename incompatible with use_base_name
38823882+ if match_type == 'regex':
38833883+ full_pathname = match.sub(repl, full_pathname)
38843884+ pathname = full_pathname # rename incompatible with use_base_name
38853885+ return full_pathname if (wanted == filtering_is_inclusive) else None
38863886+38873887+ args = self._args
38883888+ new_file_changes = {} # Assumes no renames or copies, otherwise collisions
38893889+ for change in commit.file_changes:
38903890+ # NEEDSWORK: _If_ we ever want to pass `--full-tree` to fast-export and
38913891+ # parse that output, we'll need to modify this block; `--full-tree`
38923892+ # issues a deleteall directive which has no filename, and thus this
38933893+ # block would normally strip it. Of course, FileChange() and
38943894+ # _parse_optional_filechange() would need updates too.
38953895+ if change.type == b'DELETEALL':
38963896+ new_file_changes[b''] = change
38973897+ continue
38983898+ if change.filename in self._newnames:
38993899+ change.filename = self._newnames[change.filename]
39003900+ else:
39013901+ original_filename = change.filename
39023902+ change.filename = newname(args.path_changes, change.filename,
39033903+ args.use_base_name, args.inclusive)
39043904+ if self._filename_callback:
39053905+ change.filename = self._filename_callback(change.filename)
39063906+ self._newnames[original_filename] = change.filename
39073907+ if not change.filename:
39083908+ continue # Filtering criteria excluded this file; move on to next one
39093909+ if change.filename in new_file_changes:
39103910+ # Getting here means that path renaming is in effect, and caused one
39113911+ # path to collide with another. That's usually bad, but can be okay
39123912+ # under two circumstances:
39133913+ # 1) Sometimes people have a file named OLDFILE in old revisions of
39143914+ # history, and they rename to NEWFILE, and would like to rewrite
39153915+ # history so that all revisions refer to it as NEWFILE. As such,
39163916+ # we can allow a collision when (at least) one of the two paths
39173917+ # is a deletion. Note that if OLDFILE and NEWFILE are unrelated
39183918+ # this also allows the rewrite to continue, which makes sense
39193919+ # since OLDFILE is no longer in the way.
39203920+ # 2) If OLDFILE and NEWFILE are exactly equal, then writing them
39213921+ # both to the same location poses no problem; we only need one
39223922+ # file. (This could come up if someone copied a file in some
39233923+ # commit, then later either deleted the file or kept it exactly
39243924+ # in sync with the original with any changes, and then decides
39253925+ # they want to rewrite history to only have one of the two files)
39263926+ colliding_change = new_file_changes[change.filename]
39273927+ if change.type == b'D':
39283928+ # We can just throw this one away and keep the other
39293929+ continue
39303930+ elif change.type == b'M' and (
39313931+ change.mode == colliding_change.mode and
39323932+ change.blob_id == colliding_change.blob_id):
39333933+ # The two are identical, so we can throw this one away and keep other
39343934+ continue
39353935+ elif new_file_changes[change.filename].type != b'D':
39363936+ raise SystemExit(_("File renaming caused colliding pathnames!\n") +
39373937+ _(" Commit: {}\n").format(commit.original_id) +
39383938+ _(" Filename: {}").format(change.filename))
39393939+ # Strip files that are too large
39403940+ if self._args.max_blob_size and \
39413941+ self._unpacked_size.get(change.blob_id, 0) > self._args.max_blob_size:
39423942+ continue
39433943+ if self._args.strip_blobs_with_ids and \
39443944+ change.blob_id in self._args.strip_blobs_with_ids:
39453945+ continue
39463946+ # Otherwise, record the change
39473947+ new_file_changes[change.filename] = change
39483948+ commit.file_changes = [v for k,v in sorted(new_file_changes.items())]
39493949+39503950+ def _tweak_commit(self, commit, aux_info):
39513951+ if self._args.replace_message:
39523952+ for literal, replacement in self._args.replace_message['literals']:
39533953+ commit.message = commit.message.replace(literal, replacement)
39543954+ for regex, replacement in self._args.replace_message['regexes']:
39553955+ commit.message = regex.sub(replacement, commit.message)
39563956+ if self._message_callback:
39573957+ commit.message = self._message_callback(commit.message)
39583958+39593959+ # Change the commit message according to callback
39603960+ if not self._args.preserve_commit_hashes:
39613961+ commit.message = self._hash_re.sub(self._translate_commit_hash,
39623962+ commit.message)
39633963+39643964+ # Change the author & committer according to mailmap rules
39653965+ args = self._args
39663966+ if args.mailmap:
39673967+ commit.author_name, commit.author_email = \
39683968+ args.mailmap.translate(commit.author_name, commit.author_email)
39693969+ commit.committer_name, commit.committer_email = \
39703970+ args.mailmap.translate(commit.committer_name, commit.committer_email)
39713971+ # Change author & committer according to callbacks
39723972+ if self._name_callback:
39733973+ commit.author_name = self._name_callback(commit.author_name)
39743974+ commit.committer_name = self._name_callback(commit.committer_name)
39753975+ if self._email_callback:
39763976+ commit.author_email = self._email_callback(commit.author_email)
39773977+ commit.committer_email = self._email_callback(commit.committer_email)
39783978+39793979+ # Sometimes the 'branch' given is a tag; if so, rename it as requested so
39803980+ # we don't get any old tagnames
39813981+ if self._args.tag_rename:
39823982+ commit.branch = RepoFilter._do_tag_rename(args.tag_rename, commit.branch)
39833983+ if self._refname_callback:
39843984+ commit.branch = self._refname_callback(commit.branch)
39853985+39863986+ # Filter or rename the list of file changes
39873987+ orig_file_changes = set(commit.file_changes)
39883988+ self._filter_files(commit)
39893989+39903990+ # Record ancestry graph
39913991+ parents, orig_parents = commit.parents, aux_info['orig_parents']
39923992+ if self._args.state_branch:
39933993+ external_parents = parents
39943994+ else:
39953995+ external_parents = [p for p in parents if not isinstance(p, int)]
39963996+ # The use of 'reversed' is intentional here; there is a risk that we have
39973997+ # duplicates in parents, and we want to map from parents to the first
39983998+ # entry we find in orig_parents in such cases.
39993999+ parent_reverse_dict = dict(zip(reversed(parents), reversed(orig_parents)))
40004000+40014001+ self._graph.record_external_commits(external_parents)
40024002+ self._orig_graph.record_external_commits(external_parents)
40034003+ self._graph.add_commit_and_parents(commit.id, parents) # new githash unknown
40044004+ self._orig_graph.add_commit_and_parents(commit.old_id, orig_parents,
40054005+ commit.original_id)
40064006+40074007+ # Prune parents (due to pruning of empty commits) if relevant, note that
40084008+ # new_1st_parent is None unless this was a merge commit that is becoming
40094009+ # a non-merge
40104010+ prev_1st_parent = parents[0] if parents else None
40114011+ parents, new_1st_parent = self._maybe_trim_extra_parents(orig_parents,
40124012+ parents)
40134013+ commit.parents = parents
40144014+40154015+ # If parents were pruned, then we need our file changes to be relative
40164016+ # to the new first parent
40174017+ #
40184018+ # Notes:
40194019+ # * new_1st_parent and new_1st_parent != parents[0] uniquely happens for example when:
40204020+ # working on merge, selecting subset of files and merge base still
40214021+ # valid while first parent history doesn't touch any of those paths,
40224022+ # but second parent history does. prev_1st_parent had already been
40234023+ # rewritten to the non-None first ancestor and it remains valid.
40244024+ # self._maybe_trim_extra_parents() avoids removing this first parent
40254025+ # because it'd make the commit a non-merge. However, if there are
40264026+ # no file_changes of note, we'll drop this commit and mark
40274027+ # new_1st_parent as the new replacement. To correctly determine if
40284028+ # there are no file_changes of note, we need to have the list of
40294029+ # file_changes relative to new_1st_parent.
40304030+ # (See t9390#3, "basic -> basic-ten using '--path ten'")
40314031+ # * prev_1st_parent != parents[0] happens for example when:
40324032+ # similar to above, but the merge base is no longer valid and was
40334033+ # pruned away as well. Then parents started as e.g. [None, $num],
40344034+ # and both prev_1st_parent and new_1st_parent are None, while parents
40354035+ # after self._maybe_trim_extra_parents() becomes just [$num].
40364036+ # (See t9390#67, "degenerate merge with non-matching filename".)
40374037+ # Since $num was originally a second parent, we need to rewrite
40384038+ # file changes to be relative to parents[0].
40394039+ # * TODO: We should be getting the changes relative to the new first
40404040+ # parent even if self._fep is None, BUT we can't. Our method of
40414041+ # getting the changes right now is an external git diff invocation,
40424042+ # which we can't do if we just have a fast export stream. We can't
40434043+ # really work around it by querying the fast-import stream either,
40444044+ # because the 'ls' directive only allows us to list info about
40454045+ # specific paths, but we need to find out which paths exist in two
40464046+ # commits and then query them. We could maybe force checkpointing in
40474047+ # fast-import, then doing a diff from what'll be the new first parent
40484048+ # back to prev_1st_parent (which may be None, i.e. empty tree), using
40494049+ # the fact that in A->{B,C}->D, where D is merge of B & C, the diff
40504050+ # from C->D == C->A + A->B + B->D, and in these cases A==B, so it
40514051+ # simplifies to C->D == C->A + B->D, and C is our new 1st parent
40524052+ # commit, A is prev_1st_commit, and B->D is commit.file_changes that
40534053+ # we already have. However, checkpointing the fast-import process
40544054+ # and figuring out how long to wait before we can run our diff just
40554055+ # seems excessive. For now, just punt and assume the merge wasn't
40564056+ # "evil" (i.e. that it's remerge-diff is empty, as is true for most
40574057+ # merges). If the merge isn't evil, no further steps are necessary.
40584058+ if parents and self._fep and (
40594059+ prev_1st_parent != parents[0] or
40604060+ new_1st_parent and new_1st_parent != parents[0]):
40614061+ # Get the id from the original fast export stream corresponding to the
40624062+ # new 1st parent. As noted above, that new 1st parent might be
40634063+ # new_1st_parent, or if that is None, it'll be parents[0].
40644064+ will_be_1st = new_1st_parent or parents[0]
40654065+ old_id = parent_reverse_dict[will_be_1st]
40664066+ # Now, translate that to a hash
40674067+ will_be_1st_commit_hash = self._orig_graph.map_to_hash(old_id)
40684068+ # Get the changes from what is going to be the new 1st parent to this
40694069+ # merge commit. Note that since we are going from the new 1st parent
40704070+ # to the merge commit, we can just replace the existing
40714071+ # commit.file_changes rather than getting something we need to combine
40724072+ # with the existing commit.file_changes. Also, we can just replace
40734073+ # because prev_1st_parent is an ancestor of will_be_1st_commit_hash
40744074+ # (or prev_1st_parent is None and first parent history is gone), so
40754075+ # even if we retain prev_1st_parent and do not prune it, the changes
40764076+ # will still work given the snapshot-based way fast-export/fast-import
40774077+ # work.
40784078+ commit.file_changes = GitUtils.get_file_changes(self._repo_working_dir,
40794079+ will_be_1st_commit_hash,
40804080+ commit.original_id)
40814081+40824082+ # Save these and filter them
40834083+ orig_file_changes = set(commit.file_changes)
40844084+ self._filter_files(commit)
40854085+40864086+ # Process the --file-info-callback
40874087+ if self._file_info_callback:
40884088+ if self._file_info_value is None:
40894089+ source_working_dir = self._args.source or b'.'
40904090+ self._file_info_value = FileInfoValueHelper(self._args.replace_text,
40914091+ self.insert,
40924092+ source_working_dir)
40934093+ new_file_changes = []
40944094+ for change in commit.file_changes:
40954095+ if change.type != b'D':
40964096+ assert(change.type == b'M')
40974097+ (filename, mode, blob_id) = \
40984098+ self._file_info_callback(change.filename,
40994099+ change.mode,
41004100+ change.blob_id,
41014101+ self._file_info_value)
41024102+ if mode is None:
41034103+ # TODO: Should deletion of the file even be a feature? Might
41044104+ # want to remove this branch of the if-elif-else.
41054105+ assert(filename is not None)
41064106+ assert(blob_id is not None)
41074107+ new_change = FileChange(b'D', filename)
41084108+ elif filename is None:
41094109+ continue # Drop the FileChange from this commit
41104110+ else:
41114111+ new_change = FileChange(b'M', filename, blob_id, mode)
41124112+ else:
41134113+ new_change = change # use change as-is for deletions
41144114+ new_file_changes.append(new_change)
41154115+ commit.file_changes = new_file_changes
41164116+41174117+ # Call the user-defined callback, if any
41184118+ if self._commit_callback:
41194119+ self._commit_callback(commit, self.callback_metadata(aux_info))
41204120+41214121+ # Find out which files were modified by the callbacks. Such paths could
41224122+ # lead to subsequent commits being empty (e.g. if removing a line containing
41234123+ # a password from every version of a file that had the password, and some
41244124+ # later commit did nothing more than remove that line)
41254125+ final_file_changes = set(commit.file_changes)
41264126+ if self._args.replace_text or self._blob_callback:
41274127+ differences = orig_file_changes.union(final_file_changes)
41284128+ else:
41294129+ differences = orig_file_changes.symmetric_difference(final_file_changes)
41304130+ self._files_tweaked.update(x.filename for x in differences)
41314131+41324132+ # Now print the resulting commit, or if prunable skip it
41334133+ if not commit.dumped:
41344134+ if not self._prunable(commit, new_1st_parent,
41354135+ aux_info['had_file_changes'], orig_parents):
41364136+ self._insert_into_stream(commit)
41374137+ self._record_remapping(commit, orig_parents)
41384138+ else:
41394139+ rewrite_to = new_1st_parent or commit.first_parent()
41404140+ commit.skip(new_id = rewrite_to)
41414141+ if self._args.state_branch:
41424142+ alias = Alias(commit.old_id or commit.id, rewrite_to or deleted_hash)
41434143+ self._insert_into_stream(alias)
41444144+ if commit.branch.startswith(b'refs/') or commit.branch == b'HEAD':
41454145+ # The special check above is because when direct revisions are passed
41464146+ # along to fast-export (such as with stashes), there is a chance the
41474147+ # revision is rewritten to nothing. In such cases, we don't want to
41484148+ # point an invalid ref that just names a revision to some other point.
41494149+ reset = Reset(commit.branch, rewrite_to or deleted_hash)
41504150+ self._insert_into_stream(reset)
41514151+ self._commit_renames[commit.original_id] = None
41524152+41534153+ # Show progress
41544154+ self._num_commits += 1
41554155+ if not self._args.quiet:
41564156+ self._progress_writer.show(self._parsed_message % self._num_commits)
41574157+41584158+ @staticmethod
41594159+ def _do_tag_rename(rename_pair, tagname):
41604160+ old, new = rename_pair.split(b':', 1)
41614161+ old, new = b'refs/tags/'+old, b'refs/tags/'+new
41624162+ if tagname.startswith(old):
41634163+ return tagname.replace(old, new, 1)
41644164+ return tagname
41654165+41664166+ def _tweak_tag(self, tag):
41674167+ # Tweak the tag message according to callbacks
41684168+ if self._args.replace_message:
41694169+ for literal, replacement in self._args.replace_message['literals']:
41704170+ tag.message = tag.message.replace(literal, replacement)
41714171+ for regex, replacement in self._args.replace_message['regexes']:
41724172+ tag.message = regex.sub(replacement, tag.message)
41734173+ if self._message_callback:
41744174+ tag.message = self._message_callback(tag.message)
41754175+41764176+ # Tweak the tag name according to tag-name-related callbacks
41774177+ tag_prefix = b'refs/tags/'
41784178+ fullref = tag_prefix+tag.ref
41794179+ if self._args.tag_rename:
41804180+ fullref = RepoFilter._do_tag_rename(self._args.tag_rename, fullref)
41814181+ if self._refname_callback:
41824182+ fullref = self._refname_callback(fullref)
41834183+ if not fullref.startswith(tag_prefix):
41844184+ msg = "Error: fast-import requires tags to be in refs/tags/ namespace."
41854185+ msg += "\n {} renamed to {}".format(tag_prefix+tag.ref, fullref)
41864186+ raise SystemExit(msg)
41874187+ tag.ref = fullref[len(tag_prefix):]
41884188+41894189+ # Tweak the tagger according to callbacks
41904190+ if self._args.mailmap:
41914191+ tag.tagger_name, tag.tagger_email = \
41924192+ self._args.mailmap.translate(tag.tagger_name, tag.tagger_email)
41934193+ if self._name_callback:
41944194+ tag.tagger_name = self._name_callback(tag.tagger_name)
41954195+ if self._email_callback:
41964196+ tag.tagger_email = self._email_callback(tag.tagger_email)
41974197+41984198+ # Call general purpose tag callback
41994199+ if self._tag_callback:
42004200+ self._tag_callback(tag, self.callback_metadata())
42014201+42024202+ def _tweak_reset(self, reset):
42034203+ if self._args.tag_rename:
42044204+ reset.ref = RepoFilter._do_tag_rename(self._args.tag_rename, reset.ref)
42054205+ if self._refname_callback:
42064206+ reset.ref = self._refname_callback(reset.ref)
42074207+ if self._reset_callback:
42084208+ self._reset_callback(reset, self.callback_metadata())
42094209+42104210+ def results_tmp_dir(self, create_if_missing=True):
42114211+ target_working_dir = self._args.target or b'.'
42124212+ git_dir = GitUtils.determine_git_dir(target_working_dir)
42134213+ d = os.path.join(git_dir, b'filter-repo')
42144214+ if create_if_missing and not os.path.isdir(d):
42154215+ os.mkdir(d)
42164216+ return d
42174217+42184218+ def _load_marks_file(self, marks_basename):
42194219+ full_branch = 'refs/heads/{}'.format(self._args.state_branch)
42204220+ marks_file = os.path.join(self.results_tmp_dir(), marks_basename)
42214221+ working_dir = self._args.target or b'.'
42224222+ cmd = ['git', '-C', working_dir, 'show-ref', full_branch]
42234223+ contents = b''
42244224+ if subproc.call(cmd, stdout=subprocess.DEVNULL) == 0:
42254225+ cmd = ['git', '-C', working_dir, 'show',
42264226+ '%s:%s' % (full_branch, decode(marks_basename))]
42274227+ try:
42284228+ contents = subproc.check_output(cmd)
42294229+ except subprocess.CalledProcessError as e: # pragma: no cover
42304230+ raise SystemExit(_("Failed loading %s from %s") %
42314231+ (decode(marks_basename), full_branch))
42324232+ if contents:
42334233+ biggest_id = max(int(x.split()[0][1:]) for x in contents.splitlines())
42344234+ _IDS._next_id = max(_IDS._next_id, biggest_id+1)
42354235+ with open(marks_file, 'bw') as f:
42364236+ f.write(contents)
42374237+ return marks_file
42384238+42394239+ def _save_marks_files(self):
42404240+ basenames = [b'source-marks', b'target-marks']
42414241+ working_dir = self._args.target or b'.'
42424242+42434243+ # Check whether the branch exists
42444244+ parent = []
42454245+ full_branch = 'refs/heads/{}'.format(self._args.state_branch)
42464246+ cmd = ['git', '-C', working_dir, 'show-ref', full_branch]
42474247+ if subproc.call(cmd, stdout=subprocess.DEVNULL) == 0:
42484248+ parent = ['-p', full_branch]
42494249+42504250+ # Run 'git hash-object $MARKS_FILE' for each marks file, save result
42514251+ blob_hashes = {}
42524252+ for marks_basename in basenames:
42534253+ marks_file = os.path.join(self.results_tmp_dir(), marks_basename)
42544254+ if not os.path.isfile(marks_file): # pragma: no cover
42554255+ raise SystemExit(_("Failed to find %s to save to %s")
42564256+ % (marks_file, self._args.state_branch))
42574257+ cmd = ['git', '-C', working_dir, 'hash-object', '-w', marks_file]
42584258+ blob_hashes[marks_basename] = subproc.check_output(cmd).strip()
42594259+42604260+ # Run 'git mktree' to create a tree out of it
42614261+ p = subproc.Popen(['git', '-C', working_dir, 'mktree'],
42624262+ stdin=subprocess.PIPE, stdout=subprocess.PIPE)
42634263+ for b in basenames:
42644264+ p.stdin.write(b'100644 blob %s\t%s\n' % (blob_hashes[b], b))
42654265+ p.stdin.close()
42664266+ p.wait()
42674267+ tree = p.stdout.read().strip()
42684268+42694269+ # Create the new commit
42704270+ cmd = (['git', '-C', working_dir, 'commit-tree', '-m', 'New mark files',
42714271+ tree] + parent)
42724272+ commit = subproc.check_output(cmd).strip()
42734273+ subproc.call(['git', '-C', working_dir, 'update-ref', full_branch, commit])
42744274+42754275+ def importer_only(self):
42764276+ self._run_sanity_checks()
42774277+ self._setup_output()
42784278+42794279+ def set_output(self, outputRepoFilter):
42804280+ assert outputRepoFilter._output
42814281+42824282+ # set_output implies this RepoFilter is doing exporting, though may not
42834283+ # be the only one.
42844284+ self._setup_input(use_done_feature = False)
42854285+42864286+ # Set our output management up to pipe to outputRepoFilter's locations
42874287+ self._managed_output = False
42884288+ self._output = outputRepoFilter._output
42894289+ self._import_pipes = outputRepoFilter._import_pipes
42904290+42914291+ # Handle sanity checks, though currently none needed for export-only cases
42924292+ self._run_sanity_checks()
42934293+42944294+ def _read_stash(self):
42954295+ if self._stash:
42964296+ return
42974297+ if self._orig_refs and b'refs/stash' in self._orig_refs and \
42984298+ self._args.refs == ['--all']:
42994299+ repo_working_dir = self._args.source or b'.'
43004300+ git_dir = GitUtils.determine_git_dir(repo_working_dir)
43014301+ stash = os.path.join(git_dir, b'logs', b'refs', b'stash')
43024302+ if os.path.exists(stash):
43034303+ self._stash = []
43044304+ with open(stash, 'br') as f:
43054305+ for line in f:
43064306+ (oldhash, newhash, rest) = line.split(None, 2)
43074307+ self._stash.append((newhash, rest))
43084308+ self._args.refs.extend([x[0] for x in self._stash])
43094309+43104310+ def _write_stash(self):
43114311+ last = deleted_hash
43124312+ if self._stash:
43134313+ target_working_dir = self._args.target or b'.'
43144314+ git_dir = GitUtils.determine_git_dir(target_working_dir)
43154315+ stash = os.path.join(git_dir, b'logs', b'refs', b'stash')
43164316+ with open(stash, 'bw') as f:
43174317+ for (hash, rest) in self._stash:
43184318+ new_hash = self._get_rename(hash)
43194319+ if new_hash is None:
43204320+ continue
43214321+ f.write(b' '.join([last, new_hash, rest]) + b'\n')
43224322+ last = new_hash
43234323+ print(_("Rewrote the stash."))
43244324+43254325+ def _setup_input(self, use_done_feature):
43264326+ if self._args.stdin:
43274327+ self._input = sys.stdin.detach()
43284328+ sys.stdin = None # Make sure no one tries to accidentally use it
43294329+ self._fe_orig = None
43304330+ else:
43314331+ self._read_stash()
43324332+ skip_blobs = (self._blob_callback is None and
43334333+ (self._args.replace_text is None or
43344334+ self._file_info_callback is not None) and
43354335+ self._args.source == self._args.target)
43364336+ extra_flags = []
43374337+ if skip_blobs:
43384338+ extra_flags.append('--no-data')
43394339+ if self._args.max_blob_size:
43404340+ self._unpacked_size, packed_size = GitUtils.get_blob_sizes()
43414341+ if use_done_feature:
43424342+ extra_flags.append('--use-done-feature')
43434343+ if write_marks:
43444344+ extra_flags.append(b'--mark-tags')
43454345+ if self._args.state_branch:
43464346+ assert(write_marks)
43474347+ source_marks_file = self._load_marks_file(b'source-marks')
43484348+ extra_flags.extend([b'--export-marks='+source_marks_file,
43494349+ b'--import-marks='+source_marks_file])
43504350+ if self._args.preserve_commit_encoding is not None: # pragma: no cover
43514351+ reencode = 'no' if self._args.preserve_commit_encoding else 'yes'
43524352+ extra_flags.append('--reencode='+reencode)
43534353+ if self._args.date_order:
43544354+ extra_flags.append('--date-order')
43554355+ location = ['-C', self._args.source] if self._args.source else []
43564356+ fep_cmd = ['git'] + location + ['fast-export', '--show-original-ids',
43574357+ '--signed-tags=strip', '--tag-of-filtered-object=rewrite',
43584358+ '--fake-missing-tagger', '--reference-excluded-parents'
43594359+ ] + extra_flags + self._args.refs
43604360+ self._fep = subproc.Popen(fep_cmd, bufsize=-1, stdout=subprocess.PIPE)
43614361+ self._input = self._fep.stdout
43624362+ if self._args.dry_run or self._args.debug:
43634363+ self._fe_orig = os.path.join(self.results_tmp_dir(),
43644364+ b'fast-export.original')
43654365+ output = open(self._fe_orig, 'bw')
43664366+ self._input = InputFileBackup(self._input, output)
43674367+ if self._args.debug:
43684368+ tmp = [decode(x) if isinstance(x, bytes) else x for x in fep_cmd]
43694369+ print("[DEBUG] Running: {}".format(' '.join(tmp)))
43704370+ print(" (saving a copy of the output at {})"
43714371+ .format(decode(self._fe_orig)))
43724372+43734373+ def _setup_output(self):
43744374+ if not self._args.dry_run:
43754375+ location = ['-C', self._args.target] if self._args.target else []
43764376+ fip_cmd = ['git'] + location + ['-c', 'core.ignorecase=false',
43774377+ 'fast-import', '--force', '--quiet']
43784378+ if date_format_permissive:
43794379+ fip_cmd.append('--date-format=raw-permissive')
43804380+ if self._args.state_branch:
43814381+ target_marks_file = self._load_marks_file(b'target-marks')
43824382+ fip_cmd.extend([b'--export-marks='+target_marks_file,
43834383+ b'--import-marks='+target_marks_file])
43844384+ self._fip = subproc.Popen(fip_cmd, bufsize=-1,
43854385+ stdin=subprocess.PIPE, stdout=subprocess.PIPE)
43864386+ self._import_pipes = (self._fip.stdin, self._fip.stdout)
43874387+ if self._args.dry_run or self._args.debug:
43884388+ self._fe_filt = os.path.join(self.results_tmp_dir(),
43894389+ b'fast-export.filtered')
43904390+ self._output = open(self._fe_filt, 'bw')
43914391+ else:
43924392+ self._output = self._fip.stdin
43934393+ if self._args.debug and not self._args.dry_run:
43944394+ self._output = DualFileWriter(self._fip.stdin, self._output)
43954395+ tmp = [decode(x) if isinstance(x, bytes) else x for x in fip_cmd]
43964396+ print("[DEBUG] Running: {}".format(' '.join(tmp)))
43974397+ print(" (using the following file as input: {})"
43984398+ .format(decode(self._fe_filt)))
43994399+44004400+ def _migrate_origin_to_heads(self):
44014401+ source_working_dir = self._args.source or b'.'
44024402+ target_working_dir = self._args.target or b'.'
44034403+ refs_to_migrate = set(x for x in self._orig_refs
44044404+ if x.startswith(b'refs/remotes/origin/'))
44054405+ refs_to_warn_about = set()
44064406+ if refs_to_migrate:
44074407+ if self._args.debug:
44084408+ print("[DEBUG] Migrating refs/remotes/origin/* -> refs/heads/*")
44094409+ p = subproc.Popen('git update-ref --no-deref --stdin'.split(),
44104410+ stdin=subprocess.PIPE, cwd=source_working_dir)
44114411+ for ref in refs_to_migrate:
44124412+ if ref == b'refs/remotes/origin/HEAD':
44134413+ p.stdin.write(b'delete %s %s\n' % (ref, self._orig_refs[ref]))
44144414+ del self._orig_refs[ref]
44154415+ continue
44164416+ newref = ref.replace(b'refs/remotes/origin/', b'refs/heads/')
44174417+ if newref not in self._orig_refs:
44184418+ p.stdin.write(b'create %s %s\n' % (newref, self._orig_refs[ref]))
44194419+ self._orig_refs[newref] = self._orig_refs[ref]
44204420+ elif self._orig_refs[ref] != self._orig_refs[newref]:
44214421+ refs_to_warn_about.add(newref)
44224422+ p.stdin.write(b'delete %s %s\n' % (ref, self._orig_refs[ref]))
44234423+ del self._orig_refs[ref]
44244424+ p.stdin.close()
44254425+ if p.wait(): # pragma: no cover
44264426+ msg = _("git update-ref failed; see above")
44274427+ raise SystemExit(msg)
44284428+44294429+ if b'remote.origin.url' not in self._config_settings:
44304430+ return
44314431+44324432+ # For sensitive data removals, fetch ALL refs. Non-mirror clones normally
44334433+ # only grab branches and tags, but other refs may hold on to the sensitive
44344434+ # data as well.
44354435+ if self._args.sensitive_data_removal and \
44364436+ not self._args.no_fetch and \
44374437+ not self._already_ran and \
44384438+ self._config_settings.get(b'remote.origin.mirror', b'false') != b'true':
44394439+44404440+ if refs_to_warn_about:
44414441+ msg = ("Warning: You have refs modified from upstream:\n " +
44424442+ "\n ".join([decode(x) for x in refs_to_warn_about]) +
44434443+ "\n" +
44444444+ " We want to forcibly fetch from upstream to ensure\n" +
44454445+ " that all relevent refs are rewritten, but this will\n" +
44464446+ " discard your local changes before starting the\n" +
44474447+ " rewrite. Proceed with fetch (Y/N)?")
44484448+ response = input(msg)
44494449+44504450+ if response.lower() != 'y':
44514451+ self._args.no_fetch = True
44524452+ # Don't do the fetch, and don't remove the origin remote
44534453+ return
44544454+44554455+ cmd = 'git fetch -q --prune --update-head-ok --refmap "" origin +refs/*:refs/*'
44564456+ m = _("NOTICE: Fetching all refs from origin to make sure we rewrite\n"
44574457+ " all history that may reference the sensitive data, via\n"
44584458+ " "+cmd)
44594459+ print(m)
44604460+ ret = subproc.call([arg if arg != '""' else '' for arg in cmd.split()],
44614461+ cwd=source_working_dir)
44624462+ if ret != 0: # pragma: no cover
44634463+ m = _("Warning: Fetching all refs from origin failed")
44644464+ print(m)
44654465+ if self._args.sensitive_data_removal:
44664466+ return
44674467+44684468+ # Now remove the origin remote
44694469+ url = self._config_settings[b'remote.origin.url'].decode(errors='replace')
44704470+ m = _("NOTICE: Removing 'origin' remote; see 'Why is my origin removed?'\n"
44714471+ " in the manual if you want to push back there.\n"
44724472+ " (was %s)") % url
44734473+ print(m)
44744474+ subproc.call('git remote rm origin'.split(), cwd=target_working_dir)
44754475+44764476+ def _final_commands(self):
44774477+ self._finalize_handled = True
44784478+ self._done_callback and self._done_callback()
44794479+44804480+ if self._file_info_value:
44814481+ self._file_info_value.finalize()
44824482+ if not self._args.quiet:
44834483+ self._progress_writer.finish()
44844484+44854485+ def _ref_update(self, target_working_dir):
44864486+ # Start the update-ref process
44874487+ p = subproc.Popen('git update-ref --no-deref --stdin'.split(),
44884488+ stdin=subprocess.PIPE,
44894489+ cwd=target_working_dir)
44904490+44914491+ # Remove replace_refs from _orig_refs
44924492+ replace_refs = {k:v for k, v in self._orig_refs.items()
44934493+ if k.startswith(b'refs/replace/')}
44944494+ reverse_replace_refs = collections.defaultdict(list)
44954495+ for k,v in replace_refs.items():
44964496+ reverse_replace_refs[v].append(k)
44974497+ all(map(self._orig_refs.pop, replace_refs))
44984498+44994499+ # Remove unused refs
45004500+ exported_refs, imported_refs = self.get_exported_and_imported_refs()
45014501+ refs_to_nuke = exported_refs - imported_refs
45024502+ # Because revisions can be passed to fast-export which handles them as
45034503+ # though they were refs, we might have bad "refs" to nuke; strip them out.
45044504+ refs_to_nuke = [x for x in refs_to_nuke
45054505+ if x.startswith(b'refs/') or x == b'HEAD']
45064506+ if self._args.partial:
45074507+ refs_to_nuke = set()
45084508+ if refs_to_nuke and self._args.debug:
45094509+ print("[DEBUG] Deleting the following refs:\n "+
45104510+ decode(b"\n ".join(sorted(refs_to_nuke))))
45114511+ p.stdin.write(b''.join([b"delete %s\n" % x
45124512+ for x in refs_to_nuke]))
45134513+45144514+ # Delete or update and add replace_refs; note that fast-export automatically
45154515+ # handles 'update-no-add', we only need to take action for the other four
45164516+ # choices for replace_refs.
45174517+ self._flush_renames()
45184518+ actual_renames = {k:v for k,v in self._commit_renames.items() if k != v}
45194519+ if self._args.replace_refs in ['delete-no-add', 'delete-and-add']:
45204520+ # Delete old replace refs, if unwanted
45214521+ replace_refs_to_nuke = set(replace_refs)
45224522+ if self._args.replace_refs == 'delete-and-add':
45234523+ # git-update-ref won't allow us to update a ref twice, so be careful
45244524+ # to avoid deleting refs we'll later update
45254525+ replace_refs_to_nuke = replace_refs_to_nuke.difference(
45264526+ [b'refs/replace/'+x for x in actual_renames])
45274527+ p.stdin.write(b''.join([b"delete %s\n" % x
45284528+ for x in replace_refs_to_nuke]))
45294529+ if self._args.replace_refs in ['delete-and-add', 'update-or-add',
45304530+ 'update-and-add']:
45314531+ # Add new replace refs
45324532+ update_only = (self._args.replace_refs == 'update-or-add')
45334533+ p.stdin.write(b''.join([b"update refs/replace/%s %s\n" % (old, new)
45344534+ for old,new in actual_renames.items()
45354535+ if new and not (update_only and
45364536+ old in reverse_replace_refs)]))
45374537+45384538+ # Complete the update-ref process
45394539+ p.stdin.close()
45404540+ if p.wait():
45414541+ raise SystemExit(_("git update-ref failed; see above")) # pragma: no cover
45424542+45434543+ def _remap_to(self, oldish_hash):
45444544+ '''
45454545+ Given an oldish_hash (from the beginning of the current run), return:
45464546+ IF oldish_hash is NOT pruned:
45474547+ the hash of the rewrite of oldish_hash
45484548+ otherwise:
45494549+ the hash of the rewrite of the first unpruned ancestor of oldish_hash
45504550+ '''
45514551+ old_id = self._orig_graph._hash_to_id[oldish_hash]
45524552+ new_id = _IDS.translate(old_id)
45534553+ new_hash = self._graph.git_hash[new_id] if new_id else deleted_hash
45544554+ return new_hash
45554555+45564556+ def _compute_metadata(self, metadata_dir, orig_refs):
45574557+ #
45584558+ # First, handle commit_renames
45594559+ #
45604560+ old_commit_renames = dict()
45614561+ if not self._already_ran:
45624562+ commit_renames = {old: new
45634563+ for old, new in self._commit_renames.items()
45644564+ }
45654565+ else:
45664566+ # Read commit-map into old_commit_renames
45674567+ with open(os.path.join(metadata_dir, b'commit-map'), 'br') as f:
45684568+ f.readline() # Skip the header line
45694569+ for line in f:
45704570+ (old,new) = line.split()
45714571+ old_commit_renames[old] = new
45724572+ # Use A->B mappings in old_commit_renames, and B->C mappings in
45734573+ # self._commit_renames to yield A->C mappings in commit_renames
45744574+ commit_renames = {old: self._commit_renames.get(newish, newish)
45754575+ for old, newish in old_commit_renames.items()}
45764576+ # If there are any B->C mappings in self._commit_renames for which
45774577+ # there was no A->B mapping in old_commit_renames, then add the
45784578+ # B->C mapping to commit_renames too.
45794579+ seen = set(old_commit_renames.values())
45804580+ commit_renames.update({old: new
45814581+ for old, new in self._commit_renames.items()
45824582+ if old not in seen})
45834583+45844584+ #
45854585+ # Second, handle ref_maps
45864586+ #
45874587+ exported_refs, imported_refs = self.get_exported_and_imported_refs()
45884588+45894589+ old_commit_unrenames = dict()
45904590+ if not self._already_ran:
45914591+ old_ref_map = dict((refname, (old_hash, deleted_hash))
45924592+ for refname, old_hash in orig_refs.items()
45934593+ if refname in exported_refs)
45944594+ else:
45954595+ # old_commit_renames talk about how commits were renamed in the original
45964596+ # run. Let's reverse it to find out how to get from the intermediate
45974597+ # commit name, back to the original. Because everything in orig_refs
45984598+ # right now refers to the intermediate commits after the first run(s),
45994599+ # and we need to map them back to what they were before any changes.
46004600+ old_commit_unrenames = dict((v,k) for (k,v) in old_commit_renames.items())
46014601+46024602+ old_ref_map = {}
46034603+ # Populate old_ref_map from the 'ref-map' file
46044604+ with open(os.path.join(metadata_dir, b'ref-map'), 'br') as f:
46054605+ f.readline() # Skip the header line
46064606+ for line in f:
46074607+ (old,intermediate,ref) = line.split()
46084608+ old_ref_map[ref] = (old, intermediate)
46094609+ # Append to old_ref_map items from orig_refs that were exported, but
46104610+ # get the actual original commit name
46114611+ for refname, old_hash in orig_refs.items():
46124612+ if refname in old_ref_map:
46134613+ continue
46144614+ if refname not in exported_refs:
46154615+ continue
46164616+ # Compute older_hash
46174617+ original_hash = old_commit_unrenames.get(old_hash, old_hash)
46184618+ old_ref_map[refname] = (original_hash, deleted_hash)
46194619+46204620+ new_refs = {}
46214621+ new_refs_initialized = False
46224622+ ref_maps = {}
46234623+ self._orig_graph._ensure_reverse_maps_populated()
46244624+ for refname, pair in old_ref_map.items():
46254625+ old_hash, hash_ref_becomes_if_not_imported_in_this_run = pair
46264626+ if refname not in imported_refs:
46274627+ new_hash = hash_ref_becomes_if_not_imported_in_this_run
46284628+ elif old_hash in commit_renames:
46294629+ intermediate = old_commit_renames.get(old_hash,old_hash)
46304630+ if intermediate in self._commit_renames:
46314631+ new_hash = self._remap_to(intermediate)
46324632+ else:
46334633+ new_hash = intermediate
46344634+ else: # Must be either an annotated tag, or a ref whose tip was pruned
46354635+ if not new_refs_initialized:
46364636+ target_working_dir = self._args.target or b'.'
46374637+ new_refs = GitUtils.get_refs(target_working_dir)
46384638+ new_refs_initialized = True
46394639+ if refname in new_refs:
46404640+ new_hash = new_refs[refname]
46414641+ else:
46424642+ new_hash = deleted_hash
46434643+ ref_maps[refname] = (old_hash, new_hash)
46444644+ if self._args.source or self._args.target:
46454645+ if not new_refs_initialized:
46464646+ target_working_dir = self._args.target or b'.'
46474647+ new_refs = GitUtils.get_refs(target_working_dir)
46484648+ new_refs_initialized = True
46494649+ for ref, new_hash in new_refs.items():
46504650+ if ref not in orig_refs and not ref.startswith(b'refs/replace/'):
46514651+ old_hash = b'0'*len(new_hash)
46524652+ ref_maps[ref] = (old_hash, new_hash)
46534653+46544654+ #
46554655+ # Third, handle first_changes
46564656+ #
46574657+46584658+ old_first_changes = dict()
46594659+ if self._already_ran:
46604660+ # Read first_changes into old_first_changes
46614661+ with open(os.path.join(metadata_dir, b'first-changed-commits'), 'br') as f:
46624662+ for line in f:
46634663+ changed_commit, undeleted_self_or_ancestor = line.strip().split()
46644664+ old_first_changes[changed_commit] = undeleted_self_or_ancestor
46654665+ # We need to find the commits that were modified whose parents were not.
46664666+ # To be able to find parents, we need the commit names as of the beginning
46674667+ # of this run, and then when we are done, we need to map them back to the
46684668+ # name of the commits from before any git-filter-repo runs.
46694669+ #
46704670+ # We are excluding here any commits deleted in previous git-filter-repo
46714671+ # runs
46724672+ undo_old_commit_renames = dict((v,k) for (k,v) in old_commit_renames.items()
46734673+ if v != deleted_hash)
46744674+ # Get a list of all commits that were changed, as of the beginning of
46754675+ # this latest run.
46764676+ changed_commits = {new
46774677+ for (old,new) in old_commit_renames.items()
46784678+ if old != new and new != deleted_hash} | \
46794679+ {old
46804680+ for (old,new) in self._commit_renames.items()
46814681+ if old != new}
46824682+ special_changed_commits = {old
46834683+ for (old,new) in old_commit_renames.items()
46844684+ if new == deleted_hash}
46854685+ first_changes = dict()
46864686+ for (old,new) in self._commit_renames.items():
46874687+ if old == new:
46884688+ # old wasn't modified, can't be first change if not even a change
46894689+ continue
46904690+ if old_commit_unrenames.get(old,old) != old:
46914691+ # old was already modified in previous run; while it might represent
46924692+ # something that is still a first change, we'll handle that as we
46934693+ # loop over old_first_changes below
46944694+ continue
46954695+ if any(parent in changed_commits
46964696+ for parent in self._orig_graph.get_parent_hashes(old)):
46974697+ # a parent of old was modified, so old is not a first change
46984698+ continue
46994699+ # At this point, old IS a first change. We need to find out what new
47004700+ # commit it maps to, or if it doesn't map to one, what new commit was
47014701+ # its most recent ancestor that wasn't pruned.
47024702+ if new is None:
47034703+ new = self._remap_to(old)
47044704+ first_changes[old] = (new if new is not None else deleted_hash)
47054705+ for (old,undeleted_self_or_ancestor) in old_first_changes.items():
47064706+ if undeleted_self_or_ancestor == deleted_hash:
47074707+ # old represents a commit that was pruned and whose entire ancestry
47084708+ # was pruned. So, old is still a first change
47094709+ first_changes[old] = undeleted_self_or_ancestor
47104710+ continue
47114711+ intermediate = old_commit_renames.get(old, old)
47124712+ usoa = undeleted_self_or_ancestor
47134713+ new_ancestor = self._commit_renames.get(usoa, usoa)
47144714+ if intermediate == deleted_hash:
47154715+ # old was pruned in previous rewrite
47164716+ if usoa != new_ancestor:
47174717+ # old's ancestor got rewritten in this filtering run; we can drop
47184718+ # this one from first_changes.
47194719+ continue
47204720+ # Getting here means old was a first change and old was pruned in a
47214721+ # previous run, and its ancestors that survived were non rewritten in
47224722+ # this run, so old remains a first change
47234723+ first_changes[old] = new_ancestor # or usoa, since new_ancestor == usoa
47244724+ continue
47254725+ assert(usoa == intermediate) # old wasn't pruned => usoa == intermediate
47264726+47274727+ # Check whether parents of intermediate were rewritten. Note that
47284728+ # intermediate in self._commit_renames only means that intermediate was
47294729+ # processed by the latest filtering (not necessarily that it changed),
47304730+ # but we need to know that before we can check for parent hashes having
47314731+ # changed.
47324732+ if intermediate not in self._commit_renames:
47334733+ # This commit was not processed by this run, so it remains a first
47344734+ # change
47354735+ first_changes[old] = usoa
47364736+ continue
47374737+ if any(parent in changed_commits
47384738+ for parent in self._orig_graph.get_parent_hashes(intermediate)):
47394739+ # An ancestor was modified by this run, so it is no longer a first
47404740+ # change; continue to the next one.
47414741+ continue
47424742+ # This change is a first_change; find the new commit its usoa maps to
47434743+ new = self._remap_to(intermediate)
47444744+ assert(new is not None)
47454745+ first_changes[old] = new
47464746+47474747+ return commit_renames, ref_maps, first_changes
47484748+47494749+ def _handle_lfs_metadata(self, metadata_dir):
47504750+ if self._lfs_object_tracker is None:
47514751+ print("NOTE: LFS object orphaning not checked (LFS not in use)")
47524752+ return
47534753+47544754+ if self._args.partial:
47554755+ target_working_dir = self._args.target or b'.'
47564756+ source = False
47574757+ self._lfs_object_tracker.find_all_lfs_objects_in_repo(target_working_dir,
47584758+ source)
47594759+47604760+ with open(os.path.join(metadata_dir, b'original_lfs_objects'), 'bw') as f:
47614761+ for obj in sorted(self._lfs_object_tracker.source_objects.objects):
47624762+ f.write(obj+b"\n")
47634763+47644764+ orphaned_lfs_path = os.path.join(metadata_dir, b'orphaned_lfs_objects')
47654765+ msg = textwrap.dedent(_(f"""\
47664766+ NOTE: There were LFS Objects Orphaned by this rewrite recorded in
47674767+ {decode(orphaned_lfs_path)}."""))
47684768+ with open(orphaned_lfs_path, 'bw') as f:
47694769+ differences = self._lfs_object_tracker.source_objects.objects - \
47704770+ self._lfs_object_tracker.target_objects.objects
47714771+ for obj in sorted(differences):
47724772+ f.write(obj+b"\n")
47734773+ if differences:
47744774+ self._lfs_object_tracker.objects_orphaned = True
47754775+ print(msg)
47764776+47774777+ def _record_metadata(self, metadata_dir, orig_refs):
47784778+ self._flush_renames()
47794779+ commit_renames, ref_maps, first_changes = \
47804780+ self._compute_metadata(metadata_dir, orig_refs)
47814781+47824782+ if self._args.sensitive_data_removal:
47834783+ changed_commits = sum(k!=v for (k,v) in commit_renames.items())
47844784+ print(f"You rewrote {changed_commits} (of {len(commit_renames)}) commits.")
47854785+ print("") # Add a blank line before important rewrite information
47864786+ print(f"NOTE: First Changed Commit(s) is/are:\n "
47874787+ + decode(b"\n ".join(x for x in first_changes)))
47884788+47894789+ with open(os.path.join(metadata_dir, b'sensitive_data_removal'), 'bw') as f:
47904790+ pass # Write nothing; we only need the file created
47914791+47924792+ self._handle_lfs_metadata(metadata_dir)
47934793+ print("") # Add a blank line after important rewrite information
47944794+47954795+ with open(os.path.join(metadata_dir, b'commit-map'), 'bw') as f:
47964796+ f.write(("%-40s %s\n" % (_("old"), _("new"))).encode())
47974797+ for (old,new) in sorted(commit_renames.items()):
47984798+ msg = b'%s %s\n' % (old, new if new != None else deleted_hash)
47994799+ f.write(msg)
48004800+48014801+ with open(os.path.join(metadata_dir, b'ref-map'), 'bw') as f:
48024802+ f.write(("%-40s %-40s %s\n" % (_("old"), _("new"), _("ref"))).encode())
48034803+ for refname, hash_pair in sorted(ref_maps.items()):
48044804+ (old_hash, new_hash) = hash_pair
48054805+ f.write(b'%s %s %s\n' % (old_hash, new_hash, refname))
48064806+ if old_hash != new_hash:
48074807+ self._changed_refs.add(refname)
48084808+48094809+ with open(os.path.join(metadata_dir, b'changed-refs'), 'bw') as f:
48104810+ for refname in sorted(self._changed_refs):
48114811+ f.write(b'%s\n' % refname)
48124812+48134813+ with open(os.path.join(metadata_dir, b'first-changed-commits'), 'bw') as f:
48144814+ for commit, undeleted_self_or_ancestor in sorted(first_changes.items()):
48154815+ f.write(b'%s %s\n' % (commit, undeleted_self_or_ancestor))
48164816+48174817+ with open(os.path.join(metadata_dir, b'suboptimal-issues'), 'bw') as f:
48184818+ issues_found = False
48194819+ if self._commits_no_longer_merges:
48204820+ issues_found = True
48214821+48224822+ f.write(textwrap.dedent(_('''
48234823+ The following commits used to be merge commits but due to filtering
48244824+ are now regular commits; they likely have suboptimal commit messages
48254825+ (e.g. "Merge branch next into master"). Original commit hash on the
48264826+ left, commit hash after filtering/rewriting on the right:
48274827+ ''')[1:]).encode())
48284828+ for oldhash, newhash in self._commits_no_longer_merges:
48294829+ f.write(' {} {}\n'.format(oldhash, newhash).encode())
48304830+ f.write(b'\n')
48314831+48324832+ if self._commits_referenced_but_removed:
48334833+ issues_found = True
48344834+ f.write(textwrap.dedent(_('''
48354835+ The following commits were filtered out, but referenced in another
48364836+ commit message. The reference to the now-nonexistent commit hash
48374837+ (or a substring thereof) was left as-is in any commit messages:
48384838+ ''')[1:]).encode())
48394839+ for bad_commit_reference in self._commits_referenced_but_removed:
48404840+ f.write(' {}\n'.format(bad_commit_reference).encode())
48414841+ f.write(b'\n')
48424842+48434843+ if not issues_found:
48444844+ f.write(_("No filtering problems encountered.\n").encode())
48454845+48464846+ with open(os.path.join(metadata_dir, b'already_ran'), 'bw') as f:
48474847+ f.write(_("This file exists to allow you to filter again without --force,\n"
48484848+ "and to specify that metadata files should be updated instead\n"
48494849+ "of rewritten").encode())
48504850+48514851+ def finish(self):
48524852+ ''' Alternative to run() when there is no input of our own to parse,
48534853+ meaning that run only really needs to close the handle to fast-import
48544854+ and let it finish, thus making a call to "run" feel like a misnomer. '''
48554855+ assert not self._input
48564856+ assert self._managed_output
48574857+ self.run()
48584858+48594859+ def insert(self, obj, direct_insertion = False):
48604860+ if not direct_insertion:
48614861+ if type(obj) == Blob:
48624862+ self._tweak_blob(obj)
48634863+ elif type(obj) == Commit:
48644864+ aux_info = {'orig_parents': obj.parents,
48654865+ 'had_file_changes': bool(obj.file_changes)}
48664866+ self._tweak_commit(obj, aux_info)
48674867+ elif type(obj) == Reset:
48684868+ self._tweak_reset(obj)
48694869+ elif type(obj) == Tag:
48704870+ self._tweak_tag(obj)
48714871+ self._insert_into_stream(obj)
48724872+48734873+ def _insert_into_stream(self, obj):
48744874+ if not obj.dumped:
48754875+ if self._lfs_object_tracker:
48764876+ self._lfs_object_tracker.check_output_object(obj)
48774877+ if self._parser:
48784878+ self._parser.insert(obj)
48794879+ else:
48804880+ obj.dump(self._output)
48814881+48824882+ def get_exported_and_imported_refs(self):
48834883+ return self._parser.get_exported_and_imported_refs()
48844884+48854885+ def run(self):
48864886+ start = time.time()
48874887+ if not self._input and not self._output:
48884888+ self._run_sanity_checks()
48894889+ if not self._args.dry_run and not self._args.partial:
48904890+ self._read_stash()
48914891+ self._migrate_origin_to_heads()
48924892+ self._setup_input(use_done_feature = True)
48934893+ self._setup_output()
48944894+ assert self._sanity_checks_handled
48954895+48964896+ if self._input:
48974897+ # Create and run the filter
48984898+ self._repo_working_dir = self._args.source or b'.'
48994899+ self._parser = FastExportParser(blob_callback = self._tweak_blob,
49004900+ commit_callback = self._tweak_commit,
49014901+ tag_callback = self._tweak_tag,
49024902+ reset_callback = self._tweak_reset,
49034903+ done_callback = self._final_commands)
49044904+ self._setup_lfs_orphaning_checks()
49054905+ self._parser.run(self._input, self._output)
49064906+ if not self._finalize_handled:
49074907+ self._final_commands()
49084908+49094909+ # Make sure fast-export completed successfully
49104910+ if not self._args.stdin and self._fep.wait():
49114911+ raise SystemExit(_("Error: fast-export failed; see above.")) # pragma: no cover
49124912+ self._input.close()
49134913+49144914+ # If we're not the manager of self._output, we should avoid post-run cleanup
49154915+ if not self._managed_output:
49164916+ return
49174917+49184918+ # Close the output and ensure fast-import successfully completes
49194919+ self._output.close()
49204920+ if not self._args.dry_run and self._fip.wait():
49214921+ raise SystemExit(_("Error: fast-import failed; see above.")) # pragma: no cover
49224922+49234923+ # With fast-export and fast-import complete, update state if requested
49244924+ if self._args.state_branch:
49254925+ self._save_marks_files()
49264926+49274927+ # Notify user how long it took, before doing a gc and such
49284928+ msg = "New history written in {:.2f} seconds..."
49294929+ if self._args.repack:
49304930+ msg = "New history written in {:.2f} seconds; now repacking/cleaning..."
49314931+ print(msg.format(time.time()-start))
49324932+49334933+ # Exit early, if requested
49344934+ if self._args.dry_run:
49354935+ print(_("NOTE: Not running fast-import or cleaning up; --dry-run passed."))
49364936+ if self._fe_orig:
49374937+ print(_(" Requested filtering can be seen by comparing:"))
49384938+ print(" " + decode(self._fe_orig))
49394939+ else:
49404940+ print(_(" Requested filtering can be seen at:"))
49414941+ print(" " + decode(self._fe_filt))
49424942+ return
49434943+49444944+ target_working_dir = self._args.target or b'.'
49454945+ if self._input:
49464946+ self._ref_update(target_working_dir)
49474947+49484948+ # Write out data about run
49494949+ self._record_metadata(self.results_tmp_dir(), self._orig_refs)
49504950+49514951+ # Final cleanup:
49524952+ # If we need a repack, then nuke the reflogs and repack.
49534953+ # If we need a reset, do a reset --hard
49544954+ reset = not GitUtils.is_repository_bare(target_working_dir)
49554955+ self.cleanup(target_working_dir, self._args.repack, reset,
49564956+ run_quietly=self._args.quiet,
49574957+ show_debuginfo=self._args.debug)
49584958+49594959+ # Let user know how long it took
49604960+ print(_("Completely finished after {:.2f} seconds.")
49614961+ .format(time.time()-start))
49624962+49634963+ # Give post-rewrite instructions for cleaning up other copies for SDR
49644964+ if self._args.sensitive_data_removal:
49654965+ lfs_note = ""
49664966+ if self._lfs_object_tracker and \
49674967+ self._lfs_object_tracker.objects_orphaned == True:
49684968+ lfs_note = _(" and LFS Objects Orphaned")
49694969+ push_command = "git push --force --mirror origin"
49704970+ if self._args.no_fetch:
49714971+ if self._args.partial:
49724972+ push_command = "git push --force origin " + \
49734973+ " ".join(sorted([decode(x) for x in self._changed_refs]))
49744974+ else:
49754975+ push_command = "git push --all --tags origin"
49764976+ print("")
49774977+ print(sdr_next_steps % (push_command, lfs_note, lfs_note))
49784978+49794979+def main():
49804980+ setup_gettext()
49814981+ args = FilteringOptions.parse_args(sys.argv[1:])
49824982+ if args.analyze:
49834983+ RepoAnalyze.run(args)
49844984+ else:
49854985+ filter = RepoFilter(args)
49864986+ filter.run()
49874987+49884988+if __name__ == '__main__':
49894989+ main()
+652
lib/audit.ml
···11+(** Structured audit logging for unpac operations. *)
22+33+let src = Logs.Src.create "unpac.audit" ~doc:"Audit logging"
44+module Log = (val Logs.src_log src : Logs.LOG)
55+66+(* Git operation types *)
77+88+type git_result = {
99+ exit_code : int;
1010+ stdout : string;
1111+ stderr : string;
1212+}
1313+1414+type git_operation = {
1515+ git_id : string;
1616+ git_timestamp : float;
1717+ git_cmd : string list;
1818+ git_cwd : string;
1919+ git_duration_ms : int;
2020+ git_result : git_result;
2121+}
2222+2323+(* Unpac operation types *)
2424+2525+type status =
2626+ | Success
2727+ | Failed of string
2828+ | Conflict of string list
2929+3030+type operation_type =
3131+ | Init
3232+ | Project_new
3333+ | Project_promote
3434+ | Project_set_remote
3535+ | Opam_add
3636+ | Opam_init
3737+ | Opam_promote
3838+ | Opam_update
3939+ | Opam_merge
4040+ | Opam_edit
4141+ | Opam_done
4242+ | Opam_remove
4343+ | Git_add
4444+ | Git_update
4545+ | Git_merge
4646+ | Git_remove
4747+ | Push
4848+ | Unknown of string
4949+5050+type operation = {
5151+ id : string;
5252+ timestamp : float;
5353+ operation_type : operation_type;
5454+ args : string list;
5555+ cwd : string;
5656+ duration_ms : int;
5757+ status : status;
5858+ git_operations : git_operation list;
5959+}
6060+6161+type log = {
6262+ version : string;
6363+ entries : operation list;
6464+}
6565+6666+let current_version = "1.0"
6767+6868+(* UUID generation - simple random hex *)
6969+let () = Random.self_init ()
7070+7171+let generate_id () =
7272+ let buf = Buffer.create 32 in
7373+ for _ = 1 to 8 do
7474+ Buffer.add_string buf (Printf.sprintf "%04x" (Random.int 0x10000))
7575+ done;
7676+ let s = Buffer.contents buf in
7777+ (* Format as UUID: 8-4-4-4-12 *)
7878+ Printf.sprintf "%s-%s-%s-%s-%s"
7979+ (String.sub s 0 8)
8080+ (String.sub s 8 4)
8181+ (String.sub s 12 4)
8282+ (String.sub s 16 4)
8383+ (String.sub s 20 12)
8484+8585+(* JSON codecs *)
8686+8787+let git_result_jsont =
8888+ Jsont.Object.map
8989+ ~kind:"git_result"
9090+ (fun exit_code stdout stderr -> { exit_code; stdout; stderr })
9191+ |> Jsont.Object.mem "exit_code" Jsont.int ~enc:(fun r -> r.exit_code)
9292+ |> Jsont.Object.mem "stdout" Jsont.string ~enc:(fun r -> r.stdout)
9393+ |> Jsont.Object.mem "stderr" Jsont.string ~enc:(fun r -> r.stderr)
9494+ |> Jsont.Object.finish
9595+9696+let git_operation_jsont =
9797+ Jsont.Object.map
9898+ ~kind:"git_operation"
9999+ (fun git_id git_timestamp git_cmd git_cwd git_duration_ms git_result ->
100100+ { git_id; git_timestamp; git_cmd; git_cwd; git_duration_ms; git_result })
101101+ |> Jsont.Object.mem "id" Jsont.string ~enc:(fun g -> g.git_id)
102102+ |> Jsont.Object.mem "timestamp" Jsont.number ~enc:(fun g -> g.git_timestamp)
103103+ |> Jsont.Object.mem "cmd" (Jsont.list Jsont.string) ~enc:(fun g -> g.git_cmd)
104104+ |> Jsont.Object.mem "cwd" Jsont.string ~enc:(fun g -> g.git_cwd)
105105+ |> Jsont.Object.mem "duration_ms" Jsont.int ~enc:(fun g -> g.git_duration_ms)
106106+ |> Jsont.Object.mem "result" git_result_jsont ~enc:(fun g -> g.git_result)
107107+ |> Jsont.Object.finish
108108+109109+let status_jsont =
110110+ (* Encode status as a simple object with status field and optional data *)
111111+ Jsont.Object.map ~kind:"status"
112112+ (fun status data_opt ->
113113+ match status, data_opt with
114114+ | "success", _ -> Success
115115+ | "failed", Some msg -> Failed msg
116116+ | "conflict", Some files_str ->
117117+ Conflict (String.split_on_char ',' files_str)
118118+ | s, _ -> Failed (Printf.sprintf "Unknown status: %s" s))
119119+ |> Jsont.Object.mem "status" Jsont.string
120120+ ~enc:(function
121121+ | Success -> "success"
122122+ | Failed _ -> "failed"
123123+ | Conflict _ -> "conflict")
124124+ |> Jsont.Object.opt_mem "data" Jsont.string
125125+ ~enc:(function
126126+ | Success -> None
127127+ | Failed msg -> Some msg
128128+ | Conflict files -> Some (String.concat "," files))
129129+ |> Jsont.Object.finish
130130+131131+let operation_type_to_string = function
132132+ | Init -> "init"
133133+ | Project_new -> "project.new"
134134+ | Project_promote -> "project.promote"
135135+ | Project_set_remote -> "project.set-remote"
136136+ | Opam_add -> "opam.add"
137137+ | Opam_init -> "opam.init"
138138+ | Opam_promote -> "opam.promote"
139139+ | Opam_update -> "opam.update"
140140+ | Opam_merge -> "opam.merge"
141141+ | Opam_edit -> "opam.edit"
142142+ | Opam_done -> "opam.done"
143143+ | Opam_remove -> "opam.remove"
144144+ | Git_add -> "git.add"
145145+ | Git_update -> "git.update"
146146+ | Git_merge -> "git.merge"
147147+ | Git_remove -> "git.remove"
148148+ | Push -> "push"
149149+ | Unknown s -> s
150150+151151+let operation_type_of_string = function
152152+ | "init" -> Init
153153+ | "project.new" -> Project_new
154154+ | "project.promote" -> Project_promote
155155+ | "project.set-remote" -> Project_set_remote
156156+ | "opam.add" -> Opam_add
157157+ | "opam.init" -> Opam_init
158158+ | "opam.promote" -> Opam_promote
159159+ | "opam.update" -> Opam_update
160160+ | "opam.merge" -> Opam_merge
161161+ | "opam.edit" -> Opam_edit
162162+ | "opam.done" -> Opam_done
163163+ | "opam.remove" -> Opam_remove
164164+ | "git.add" -> Git_add
165165+ | "git.update" -> Git_update
166166+ | "git.merge" -> Git_merge
167167+ | "git.remove" -> Git_remove
168168+ | "push" -> Push
169169+ | s -> Unknown s
170170+171171+let operation_type_jsont =
172172+ Jsont.string
173173+ |> Jsont.map ~dec:operation_type_of_string ~enc:operation_type_to_string
174174+175175+let operation_jsont =
176176+ Jsont.Object.map
177177+ ~kind:"operation"
178178+ (fun id timestamp operation_type args cwd duration_ms status git_operations ->
179179+ { id; timestamp; operation_type; args; cwd; duration_ms; status; git_operations })
180180+ |> Jsont.Object.mem "id" Jsont.string ~enc:(fun o -> o.id)
181181+ |> Jsont.Object.mem "timestamp" Jsont.number ~enc:(fun o -> o.timestamp)
182182+ |> Jsont.Object.mem "operation" operation_type_jsont ~enc:(fun o -> o.operation_type)
183183+ |> Jsont.Object.mem "args" (Jsont.list Jsont.string) ~enc:(fun o -> o.args)
184184+ |> Jsont.Object.mem "cwd" Jsont.string ~enc:(fun o -> o.cwd)
185185+ |> Jsont.Object.mem "duration_ms" Jsont.int ~enc:(fun o -> o.duration_ms)
186186+ |> Jsont.Object.mem "status" status_jsont ~enc:(fun o -> o.status)
187187+ |> Jsont.Object.mem "git_operations" (Jsont.list git_operation_jsont)
188188+ ~enc:(fun o -> o.git_operations)
189189+ |> Jsont.Object.finish
190190+191191+let log_jsont =
192192+ Jsont.Object.map
193193+ ~kind:"audit_log"
194194+ (fun version entries -> { version; entries })
195195+ |> Jsont.Object.mem "version" Jsont.string ~enc:(fun l -> l.version)
196196+ |> Jsont.Object.mem "entries" (Jsont.list operation_jsont) ~enc:(fun l -> l.entries)
197197+ |> Jsont.Object.finish
198198+199199+(* Context for accumulating git operations *)
200200+201201+type context = {
202202+ ctx_id : string;
203203+ ctx_operation_type : operation_type;
204204+ ctx_args : string list;
205205+ ctx_cwd : string;
206206+ ctx_start : float;
207207+ mutable ctx_git_ops : git_operation list;
208208+}
209209+210210+let start_operation ~operation_type ~args ~cwd =
211211+ let ctx = {
212212+ ctx_id = generate_id ();
213213+ ctx_operation_type = operation_type;
214214+ ctx_args = args;
215215+ ctx_cwd = cwd;
216216+ ctx_start = Unix.gettimeofday ();
217217+ ctx_git_ops = [];
218218+ } in
219219+ Log.debug (fun m -> m "Starting operation %s: %s %a"
220220+ ctx.ctx_id (operation_type_to_string operation_type)
221221+ Fmt.(list ~sep:sp string) args);
222222+ ctx
223223+224224+let record_git ctx ~cmd ~cwd ~started ~result =
225225+ let now = Unix.gettimeofday () in
226226+ let duration_ms = int_of_float ((now -. started) *. 1000.0) in
227227+ let op = {
228228+ git_id = generate_id ();
229229+ git_timestamp = started;
230230+ git_cmd = cmd;
231231+ git_cwd = cwd;
232232+ git_duration_ms = duration_ms;
233233+ git_result = result;
234234+ } in
235235+ ctx.ctx_git_ops <- op :: ctx.ctx_git_ops;
236236+ Log.debug (fun m -> m "Recorded git: %a (exit %d, %dms)"
237237+ Fmt.(list ~sep:sp string) cmd result.exit_code duration_ms)
238238+239239+let finalize_operation ctx status =
240240+ let now = Unix.gettimeofday () in
241241+ let duration_ms = int_of_float ((now -. ctx.ctx_start) *. 1000.0) in
242242+ let op = {
243243+ id = ctx.ctx_id;
244244+ timestamp = ctx.ctx_start;
245245+ operation_type = ctx.ctx_operation_type;
246246+ args = ctx.ctx_args;
247247+ cwd = ctx.ctx_cwd;
248248+ duration_ms;
249249+ status;
250250+ git_operations = List.rev ctx.ctx_git_ops;
251251+ } in
252252+ Log.info (fun m -> m "Completed operation %s in %dms" ctx.ctx_id duration_ms);
253253+ op
254254+255255+let complete_success ctx = finalize_operation ctx Success
256256+257257+let complete_failed ctx ~error =
258258+ Log.warn (fun m -> m "Operation %s failed: %s" ctx.ctx_id error);
259259+ finalize_operation ctx (Failed error)
260260+261261+let complete_conflict ctx ~files =
262262+ Log.warn (fun m -> m "Operation %s had conflicts in %d files" ctx.ctx_id (List.length files));
263263+ finalize_operation ctx (Conflict files)
264264+265265+(* Log file management *)
266266+267267+let default_log_file = ".unpac-audit.json"
268268+269269+let load path =
270270+ if not (Sys.file_exists path) then
271271+ Ok { version = current_version; entries = [] }
272272+ else
273273+ try
274274+ let ic = open_in path in
275275+ let content = really_input_string ic (in_channel_length ic) in
276276+ close_in ic;
277277+ match Jsont_bytesrw.decode_string' log_jsont content with
278278+ | Ok log -> Ok log
279279+ | Error e -> Error (Printf.sprintf "Parse error: %s" (Jsont.Error.to_string e))
280280+ with
281281+ | Sys_error msg -> Error msg
282282+283283+let save path log =
284284+ try
285285+ match Jsont_bytesrw.encode_string ~format:Jsont.Indent log_jsont log with
286286+ | Ok content ->
287287+ let oc = open_out path in
288288+ output_string oc content;
289289+ close_out oc;
290290+ Ok ()
291291+ | Error e -> Error (Printf.sprintf "Encode error: %s" e)
292292+ with
293293+ | Sys_error msg -> Error msg
294294+295295+let append path op =
296296+ match load path with
297297+ | Error e -> Error e
298298+ | Ok log ->
299299+ let log' = { log with entries = op :: log.entries } in
300300+ save path log'
301301+302302+(* Pretty printing *)
303303+304304+let pp_status fmt = function
305305+ | Success -> Format.fprintf fmt "@{<green>SUCCESS@}"
306306+ | Failed msg -> Format.fprintf fmt "@{<red>FAILED@}: %s" msg
307307+ | Conflict files ->
308308+ Format.fprintf fmt "@{<yellow>CONFLICT@}: %a"
309309+ Fmt.(list ~sep:comma string) files
310310+311311+let pp_git_operation fmt op =
312312+ let status_color = if op.git_result.exit_code = 0 then "green" else "red" in
313313+ Format.fprintf fmt " @{<%s>[%d]@} git %a (%dms)@."
314314+ status_color op.git_result.exit_code
315315+ Fmt.(list ~sep:sp string) op.git_cmd
316316+ op.git_duration_ms
317317+318318+let pp_operation fmt op =
319319+ let tm = Unix.localtime op.timestamp in
320320+ Format.fprintf fmt "@[<v>";
321321+ Format.fprintf fmt "[%04d-%02d-%02d %02d:%02d:%02d] %s %a@."
322322+ (tm.Unix.tm_year + 1900) (tm.Unix.tm_mon + 1) tm.Unix.tm_mday
323323+ tm.Unix.tm_hour tm.Unix.tm_min tm.Unix.tm_sec
324324+ (operation_type_to_string op.operation_type)
325325+ Fmt.(list ~sep:sp string) op.args;
326326+ Format.fprintf fmt " ID: %s | Duration: %dms@." op.id op.duration_ms;
327327+ Format.fprintf fmt " Status: %a@." pp_status op.status;
328328+ if op.git_operations <> [] then begin
329329+ Format.fprintf fmt " Git operations (%d):@." (List.length op.git_operations);
330330+ List.iter (pp_git_operation fmt) op.git_operations
331331+ end;
332332+ Format.fprintf fmt "@]"
333333+334334+let pp_log fmt log =
335335+ Format.fprintf fmt "@[<v>Unpac Audit Log (version %s)@." log.version;
336336+ Format.fprintf fmt "Total operations: %d@.@." (List.length log.entries);
337337+ List.iter (fun op ->
338338+ pp_operation fmt op;
339339+ Format.fprintf fmt "@."
340340+ ) log.entries;
341341+ Format.fprintf fmt "@]"
342342+343343+(* HTML generation *)
344344+345345+let html_escape s =
346346+ let buf = Buffer.create (String.length s) in
347347+ String.iter (function
348348+ | '<' -> Buffer.add_string buf "<"
349349+ | '>' -> Buffer.add_string buf ">"
350350+ | '&' -> Buffer.add_string buf "&"
351351+ | '"' -> Buffer.add_string buf """
352352+ | c -> Buffer.add_char buf c
353353+ ) s;
354354+ Buffer.contents buf
355355+356356+(* Commit audit log to git *)
357357+358358+let commit_log ~proc_mgr ~main_wt ~log_path =
359359+ (* Stage the audit log *)
360360+ let rel_path = Filename.basename log_path in
361361+ let started = Unix.gettimeofday () in
362362+ let result =
363363+ try
364364+ (* Add the file *)
365365+ Eio.Switch.run @@ fun sw ->
366366+ let stdout_r, stdout_w = Eio.Process.pipe proc_mgr ~sw in
367367+ let stderr_r, stderr_w = Eio.Process.pipe proc_mgr ~sw in
368368+ let child = Eio.Process.spawn proc_mgr ~sw
369369+ ~cwd:(main_wt :> Eio.Fs.dir_ty Eio.Path.t)
370370+ ~stdout:stdout_w ~stderr:stderr_w
371371+ ["git"; "add"; rel_path]
372372+ in
373373+ Eio.Flow.close stdout_w;
374374+ Eio.Flow.close stderr_w;
375375+ (* Drain outputs *)
376376+ let stdout_buf = Buffer.create 64 in
377377+ let stderr_buf = Buffer.create 64 in
378378+ Eio.Fiber.both
379379+ (fun () ->
380380+ try
381381+ while true do
382382+ let chunk = Cstruct.create 1024 in
383383+ let n = Eio.Flow.single_read stdout_r chunk in
384384+ Buffer.add_string stdout_buf (Cstruct.to_string (Cstruct.sub chunk 0 n))
385385+ done
386386+ with End_of_file -> ())
387387+ (fun () ->
388388+ try
389389+ while true do
390390+ let chunk = Cstruct.create 1024 in
391391+ let n = Eio.Flow.single_read stderr_r chunk in
392392+ Buffer.add_string stderr_buf (Cstruct.to_string (Cstruct.sub chunk 0 n))
393393+ done
394394+ with End_of_file -> ());
395395+ let status = Eio.Process.await child in
396396+ match status with
397397+ | `Exited 0 -> Ok ()
398398+ | `Exited code -> Error (Printf.sprintf "git add failed (exit %d): %s" code (Buffer.contents stderr_buf))
399399+ | `Signaled sig_ -> Error (Printf.sprintf "git add killed by signal %d" sig_)
400400+ with exn -> Error (Printf.sprintf "Exception: %s" (Printexc.to_string exn))
401401+ in
402402+ match result with
403403+ | Error e ->
404404+ Log.warn (fun m -> m "Failed to stage audit log: %s" e);
405405+ Error e
406406+ | Ok () ->
407407+ (* Commit the file *)
408408+ let result =
409409+ try
410410+ Eio.Switch.run @@ fun sw ->
411411+ let stdout_r, stdout_w = Eio.Process.pipe proc_mgr ~sw in
412412+ let stderr_r, stderr_w = Eio.Process.pipe proc_mgr ~sw in
413413+ let child = Eio.Process.spawn proc_mgr ~sw
414414+ ~cwd:(main_wt :> Eio.Fs.dir_ty Eio.Path.t)
415415+ ~stdout:stdout_w ~stderr:stderr_w
416416+ ["git"; "commit"; "-m"; "Update audit log"; "--no-verify"]
417417+ in
418418+ Eio.Flow.close stdout_w;
419419+ Eio.Flow.close stderr_w;
420420+ (* Drain outputs *)
421421+ let stdout_buf = Buffer.create 64 in
422422+ let stderr_buf = Buffer.create 64 in
423423+ Eio.Fiber.both
424424+ (fun () ->
425425+ try
426426+ while true do
427427+ let chunk = Cstruct.create 1024 in
428428+ let n = Eio.Flow.single_read stdout_r chunk in
429429+ Buffer.add_string stdout_buf (Cstruct.to_string (Cstruct.sub chunk 0 n))
430430+ done
431431+ with End_of_file -> ())
432432+ (fun () ->
433433+ try
434434+ while true do
435435+ let chunk = Cstruct.create 1024 in
436436+ let n = Eio.Flow.single_read stderr_r chunk in
437437+ Buffer.add_string stderr_buf (Cstruct.to_string (Cstruct.sub chunk 0 n))
438438+ done
439439+ with End_of_file -> ());
440440+ let status = Eio.Process.await child in
441441+ match status with
442442+ | `Exited 0 -> Ok ()
443443+ | `Exited 1 when String.length (Buffer.contents stdout_buf) > 0 &&
444444+ (String.exists (fun c -> c = 'n') (Buffer.contents stdout_buf)) ->
445445+ (* "nothing to commit" - this is fine *)
446446+ Ok ()
447447+ | `Exited code -> Error (Printf.sprintf "git commit failed (exit %d): %s" code (Buffer.contents stderr_buf))
448448+ | `Signaled sig_ -> Error (Printf.sprintf "git commit killed by signal %d" sig_)
449449+ with exn -> Error (Printf.sprintf "Exception: %s" (Printexc.to_string exn))
450450+ in
451451+ let duration = int_of_float ((Unix.gettimeofday () -. started) *. 1000.0) in
452452+ (match result with
453453+ | Ok () -> Log.debug (fun m -> m "Committed audit log (%dms)" duration)
454454+ | Error e -> Log.warn (fun m -> m "Failed to commit audit log: %s" e));
455455+ result
456456+457457+(** Full audit manager that wraps operations *)
458458+type manager = {
459459+ proc_mgr : [ `Generic | `Unix ] Eio.Process.mgr_ty Eio.Resource.t;
460460+ main_wt : Eio.Fs.dir_ty Eio.Path.t;
461461+ log_path : string;
462462+ mutable current_ctx : context option;
463463+}
464464+465465+let create_manager ~proc_mgr ~main_wt =
466466+ let log_path = Eio.Path.(main_wt / default_log_file) |> snd in
467467+ { proc_mgr; main_wt; log_path; current_ctx = None }
468468+469469+let begin_operation mgr ~operation_type ~args =
470470+ let cwd = snd mgr.main_wt in
471471+ let ctx = start_operation ~operation_type ~args ~cwd in
472472+ mgr.current_ctx <- Some ctx;
473473+ ctx
474474+475475+let end_operation mgr status =
476476+ match mgr.current_ctx with
477477+ | None ->
478478+ Log.warn (fun m -> m "end_operation called without active context");
479479+ Error "No active operation"
480480+ | Some ctx ->
481481+ mgr.current_ctx <- None;
482482+ let op = finalize_operation ctx status in
483483+ (* Append to log file *)
484484+ (match append mgr.log_path op with
485485+ | Error e ->
486486+ Log.err (fun m -> m "Failed to append to audit log: %s" e);
487487+ Error e
488488+ | Ok () ->
489489+ (* Commit the log *)
490490+ match commit_log ~proc_mgr:mgr.proc_mgr ~main_wt:mgr.main_wt ~log_path:mgr.log_path with
491491+ | Error e ->
492492+ Log.warn (fun m -> m "Failed to commit audit log (will retry next operation): %s" e);
493493+ Ok op (* Still return success - the log is saved, just not committed *)
494494+ | Ok () ->
495495+ Ok op)
496496+497497+let end_success mgr = end_operation mgr Success
498498+let end_failed mgr ~error = end_operation mgr (Failed error)
499499+let end_conflict mgr ~files = end_operation mgr (Conflict files)
500500+501501+let get_context mgr = mgr.current_ctx
502502+503503+let to_html log =
504504+ let buf = Buffer.create 4096 in
505505+ let add = Buffer.add_string buf in
506506+ add {|<!DOCTYPE html>
507507+<html lang="en">
508508+<head>
509509+ <meta charset="UTF-8">
510510+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
511511+ <title>Unpac Audit Log</title>
512512+ <style>
513513+ :root {
514514+ --bg: #1a1a2e;
515515+ --card: #16213e;
516516+ --text: #e4e4e4;
517517+ --accent: #0f3460;
518518+ --success: #4ecca3;
519519+ --error: #e94560;
520520+ --warning: #f39c12;
521521+ }
522522+ body {
523523+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
524524+ background: var(--bg);
525525+ color: var(--text);
526526+ margin: 0;
527527+ padding: 20px;
528528+ line-height: 1.6;
529529+ }
530530+ h1 { color: var(--success); margin-bottom: 10px; }
531531+ .meta { color: #888; margin-bottom: 30px; }
532532+ .operation {
533533+ background: var(--card);
534534+ border-radius: 8px;
535535+ padding: 20px;
536536+ margin-bottom: 20px;
537537+ border-left: 4px solid var(--accent);
538538+ }
539539+ .operation.success { border-left-color: var(--success); }
540540+ .operation.failed { border-left-color: var(--error); }
541541+ .operation.conflict { border-left-color: var(--warning); }
542542+ .op-header {
543543+ display: flex;
544544+ justify-content: space-between;
545545+ align-items: center;
546546+ margin-bottom: 10px;
547547+ }
548548+ .op-type {
549549+ font-weight: bold;
550550+ font-size: 1.1em;
551551+ color: var(--success);
552552+ }
553553+ .op-time { color: #888; font-size: 0.9em; }
554554+ .op-args { font-family: monospace; color: #888; margin: 5px 0; }
555555+ .status {
556556+ display: inline-block;
557557+ padding: 2px 8px;
558558+ border-radius: 4px;
559559+ font-size: 0.85em;
560560+ font-weight: bold;
561561+ }
562562+ .status.success { background: var(--success); color: #000; }
563563+ .status.failed { background: var(--error); color: #fff; }
564564+ .status.conflict { background: var(--warning); color: #000; }
565565+ .git-ops {
566566+ margin-top: 15px;
567567+ padding-top: 15px;
568568+ border-top: 1px solid var(--accent);
569569+ }
570570+ .git-ops summary {
571571+ cursor: pointer;
572572+ color: #888;
573573+ }
574574+ .git-op {
575575+ font-family: monospace;
576576+ font-size: 0.9em;
577577+ padding: 5px 10px;
578578+ margin: 5px 0;
579579+ background: var(--accent);
580580+ border-radius: 4px;
581581+ }
582582+ .git-op.error { border-left: 3px solid var(--error); }
583583+ .git-cmd { color: var(--success); }
584584+ .git-exit { color: #888; }
585585+ .git-duration { color: #888; float: right; }
586586+ </style>
587587+</head>
588588+<body>
589589+ <h1>Unpac Audit Log</h1>
590590+ <div class="meta">Version |};
591591+ add (html_escape log.version);
592592+ add {| | |};
593593+ add (string_of_int (List.length log.entries));
594594+ add {| operations</div>
595595+|};
596596+ List.iter (fun op ->
597597+ let status_class = match op.status with
598598+ | Success -> "success"
599599+ | Failed _ -> "failed"
600600+ | Conflict _ -> "conflict"
601601+ in
602602+ let tm = Unix.localtime op.timestamp in
603603+ add (Printf.sprintf {| <div class="operation %s">
604604+ <div class="op-header">
605605+ <span class="op-type">%s</span>
606606+ <span class="op-time">%04d-%02d-%02d %02d:%02d:%02d (%dms)</span>
607607+ </div>
608608+ <div class="op-args">%s</div>
609609+ <span class="status %s">%s</span>
610610+|}
611611+ status_class
612612+ (html_escape (operation_type_to_string op.operation_type))
613613+ (tm.Unix.tm_year + 1900) (tm.Unix.tm_mon + 1) tm.Unix.tm_mday
614614+ tm.Unix.tm_hour tm.Unix.tm_min tm.Unix.tm_sec
615615+ op.duration_ms
616616+ (html_escape (String.concat " " op.args))
617617+ status_class
618618+ (match op.status with
619619+ | Success -> "SUCCESS"
620620+ | Failed msg -> "FAILED: " ^ html_escape msg
621621+ | Conflict files -> "CONFLICT: " ^ html_escape (String.concat ", " files)));
622622+ if op.git_operations <> [] then begin
623623+ add {| <div class="git-ops">
624624+ <details>
625625+ <summary>|};
626626+ add (string_of_int (List.length op.git_operations));
627627+ add {| git operations</summary>
628628+|};
629629+ List.iter (fun git_op ->
630630+ let error_class = if git_op.git_result.exit_code <> 0 then " error" else "" in
631631+ add (Printf.sprintf {| <div class="git-op%s">
632632+ <span class="git-cmd">git %s</span>
633633+ <span class="git-duration">%dms</span>
634634+ <span class="git-exit">[exit %d]</span>
635635+ </div>
636636+|}
637637+ error_class
638638+ (html_escape (String.concat " " git_op.git_cmd))
639639+ git_op.git_duration_ms
640640+ git_op.git_result.exit_code)
641641+ ) op.git_operations;
642642+ add {| </details>
643643+ </div>
644644+|}
645645+ end;
646646+ add {| </div>
647647+|}
648648+ ) log.entries;
649649+ add {|</body>
650650+</html>
651651+|};
652652+ Buffer.contents buf
+188
lib/audit.mli
···11+(** Structured audit logging for unpac operations.
22+33+ This module provides hierarchical logging with JSON serialization,
44+ enabling both human-readable and machine-processable audit trails.
55+66+ All unpac operations are logged with their constituent git operations,
77+ timestamps, durations, and outcomes. *)
88+99+(** {1 Git Operation Logging} *)
1010+1111+(** Result of a git command *)
1212+type git_result = {
1313+ exit_code : int;
1414+ stdout : string;
1515+ stderr : string;
1616+}
1717+1818+(** A single git command execution *)
1919+type git_operation = {
2020+ git_id : string; (** Unique ID for this operation *)
2121+ git_timestamp : float; (** Unix timestamp when started *)
2222+ git_cmd : string list; (** Git command args (without 'git') *)
2323+ git_cwd : string; (** Working directory *)
2424+ git_duration_ms : int; (** Duration in milliseconds *)
2525+ git_result : git_result; (** Command result *)
2626+}
2727+2828+val git_operation_jsont : git_operation Jsont.t
2929+(** JSON codec for git operations *)
3030+3131+(** {1 Unpac Operation Logging} *)
3232+3333+(** Status of an unpac operation *)
3434+type status =
3535+ | Success
3636+ | Failed of string
3737+ | Conflict of string list
3838+3939+val status_jsont : status Jsont.t
4040+4141+(** Type of unpac operation *)
4242+type operation_type =
4343+ | Init
4444+ | Project_new
4545+ | Project_promote
4646+ | Project_set_remote
4747+ | Opam_add
4848+ | Opam_init
4949+ | Opam_promote
5050+ | Opam_update
5151+ | Opam_merge
5252+ | Opam_edit
5353+ | Opam_done
5454+ | Opam_remove
5555+ | Git_add
5656+ | Git_update
5757+ | Git_merge
5858+ | Git_remove
5959+ | Push
6060+ | Unknown of string
6161+6262+val operation_type_jsont : operation_type Jsont.t
6363+6464+val operation_type_to_string : operation_type -> string
6565+(** Convert operation type to string representation *)
6666+6767+(** An unpac operation with its git operations *)
6868+type operation = {
6969+ id : string; (** Unique operation ID (UUID) *)
7070+ timestamp : float; (** Unix timestamp when started *)
7171+ operation_type : operation_type;
7272+ args : string list; (** Command arguments *)
7373+ cwd : string; (** Working directory *)
7474+ duration_ms : int; (** Total duration in milliseconds *)
7575+ status : status; (** Final status *)
7676+ git_operations : git_operation list; (** Constituent git operations *)
7777+}
7878+7979+val operation_jsont : operation Jsont.t
8080+(** JSON codec for unpac operations *)
8181+8282+(** {1 Audit Log} *)
8383+8484+(** Complete audit log *)
8585+type log = {
8686+ version : string; (** Log format version *)
8787+ entries : operation list; (** Log entries, newest first *)
8888+}
8989+9090+val log_jsont : log Jsont.t
9191+(** JSON codec for the complete audit log *)
9292+9393+(** Current log format version *)
9494+val current_version : string
9595+9696+(** {1 Logging API} *)
9797+9898+(** Active operation context for accumulating git operations *)
9999+type context
100100+101101+(** Start a new unpac operation.
102102+ Returns a context for recording git operations. *)
103103+val start_operation :
104104+ operation_type:operation_type ->
105105+ args:string list ->
106106+ cwd:string ->
107107+ context
108108+109109+(** Record a git operation within the current context.
110110+ Call this after each git command completes. *)
111111+val record_git :
112112+ context ->
113113+ cmd:string list ->
114114+ cwd:string ->
115115+ started:float ->
116116+ result:git_result ->
117117+ unit
118118+119119+(** Complete an operation successfully *)
120120+val complete_success : context -> operation
121121+122122+(** Complete an operation with failure *)
123123+val complete_failed : context -> error:string -> operation
124124+125125+(** Complete an operation with conflict *)
126126+val complete_conflict : context -> files:string list -> operation
127127+128128+(** {1 Log File Management} *)
129129+130130+(** Default log file path relative to project root *)
131131+val default_log_file : string
132132+133133+(** Load audit log from file. Returns empty log if file doesn't exist. *)
134134+val load : string -> (log, string) result
135135+136136+(** Save audit log to file *)
137137+val save : string -> log -> (unit, string) result
138138+139139+(** Append an operation to the log file *)
140140+val append : string -> operation -> (unit, string) result
141141+142142+(** {1 Formatting} *)
143143+144144+(** Pretty-print an operation for terminal output *)
145145+val pp_operation : Format.formatter -> operation -> unit
146146+147147+(** Pretty-print the log for terminal output *)
148148+val pp_log : Format.formatter -> log -> unit
149149+150150+(** Generate HTML report from log *)
151151+val to_html : log -> string
152152+153153+(** {1 Audit Manager} *)
154154+155155+(** Manager that handles full operation lifecycle with auto-commit *)
156156+type manager
157157+158158+(** Create an audit manager for the given workspace *)
159159+val create_manager :
160160+ proc_mgr:[ `Generic | `Unix ] Eio.Process.mgr_ty Eio.Resource.t ->
161161+ main_wt:Eio.Fs.dir_ty Eio.Path.t ->
162162+ manager
163163+164164+(** Begin a new audited operation. Returns the context for recording git ops. *)
165165+val begin_operation :
166166+ manager ->
167167+ operation_type:operation_type ->
168168+ args:string list ->
169169+ context
170170+171171+(** End an operation successfully. Appends to log and commits. *)
172172+val end_success : manager -> (operation, string) result
173173+174174+(** End an operation with failure. Appends to log and commits. *)
175175+val end_failed : manager -> error:string -> (operation, string) result
176176+177177+(** End an operation with merge conflict. Appends to log and commits. *)
178178+val end_conflict : manager -> files:string list -> (operation, string) result
179179+180180+(** Get the current context if one is active *)
181181+val get_context : manager -> context option
182182+183183+(** Commit the audit log to git *)
184184+val commit_log :
185185+ proc_mgr:[ `Generic | `Unix ] Eio.Process.mgr_ty Eio.Resource.t ->
186186+ main_wt:Eio.Fs.dir_ty Eio.Path.t ->
187187+ log_path:string ->
188188+ (unit, string) result
+97
lib/backend.ml
···11+(** Backend module signature for package managers.
22+33+ Each backend (opam, cargo, etc.) implements this interface to provide
44+ vendoring capabilities. *)
55+66+(** {1 Types} *)
77+88+type package_info = {
99+ name : string;
1010+ url : string;
1111+ branch : string option; (** Branch/tag/ref to use *)
1212+}
1313+(** Information about a package to vendor. *)
1414+1515+type add_result =
1616+ | Added of { name : string; sha : string }
1717+ | Already_exists of string
1818+ | Failed of { name : string; error : string }
1919+2020+type update_result =
2121+ | Updated of { name : string; old_sha : string; new_sha : string }
2222+ | No_changes of string
2323+ | Update_failed of { name : string; error : string }
2424+2525+(** {1 Backend Signature} *)
2626+2727+module type S = sig
2828+ val name : string
2929+ (** Backend name, e.g. "opam", "cargo". *)
3030+3131+ (** {2 Branch Naming} *)
3232+3333+ val upstream_branch : string -> string
3434+ (** [upstream_branch pkg] returns branch name, e.g. "opam/upstream/astring". *)
3535+3636+ val vendor_branch : string -> string
3737+ (** [vendor_branch pkg] returns branch name, e.g. "opam/vendor/astring". *)
3838+3939+ val patches_branch : string -> string
4040+ (** [patches_branch pkg] returns branch name, e.g. "opam/patches/astring". *)
4141+4242+ val vendor_path : string -> string
4343+ (** [vendor_path pkg] returns path prefix, e.g. "vendor/opam/astring". *)
4444+4545+ (** {2 Worktree Kinds} *)
4646+4747+ val upstream_kind : string -> Worktree.kind
4848+ val vendor_kind : string -> Worktree.kind
4949+ val patches_kind : string -> Worktree.kind
5050+5151+ (** {2 Package Operations} *)
5252+5353+ val add_package :
5454+ proc_mgr:Git.proc_mgr ->
5555+ root:Worktree.root ->
5656+ package_info ->
5757+ add_result
5858+ (** [add_package ~proc_mgr ~root info] vendors a single package.
5959+6060+ 1. Creates/updates opam/upstream/<pkg> from URL
6161+ 2. Creates opam/vendor/<pkg> orphan with vendor/ prefix
6262+ 3. Creates opam/patches/<pkg> from vendor *)
6363+6464+ val update_package :
6565+ proc_mgr:Git.proc_mgr ->
6666+ root:Worktree.root ->
6767+ string ->
6868+ update_result
6969+ (** [update_package ~proc_mgr ~root name] updates a package from upstream.
7070+7171+ 1. Fetches latest into opam/upstream/<pkg>
7272+ 2. Updates opam/vendor/<pkg> with new content
7373+ Does NOT rebase patches - that's a separate operation. *)
7474+7575+ val list_packages :
7676+ proc_mgr:Git.proc_mgr ->
7777+ root:Worktree.root ->
7878+ string list
7979+ (** [list_packages ~proc_mgr root] returns all vendored package names. *)
8080+end
8181+8282+(** {1 Merge Operations} *)
8383+8484+(** These operations are backend-agnostic and work on any patches branch. *)
8585+8686+let merge_to_project ~proc_mgr ~root ~project ~patches_branch =
8787+ let project_wt = Worktree.path root (Worktree.Project project) in
8888+ Git.merge_allow_unrelated ~proc_mgr ~cwd:project_wt
8989+ ~branch:patches_branch
9090+ ~message:(Printf.sprintf "Merge %s" patches_branch)
9191+9292+let rebase_patches ~proc_mgr ~root ~patches_kind ~onto =
9393+ Worktree.ensure ~proc_mgr root patches_kind;
9494+ let patches_wt = Worktree.path root patches_kind in
9595+ let result = Git.rebase ~proc_mgr ~cwd:patches_wt ~onto in
9696+ Worktree.remove ~proc_mgr root patches_kind;
9797+ result
+389
lib/claude/agent.ml
···11+(** Ralph-loop style Claude agent for unpac workspace analysis.
22+33+ Implements the ralph-loop pattern: same prompt fed each iteration,
44+ with state persisting in files. Runs up to max_iterations per project,
55+ exiting early on completion promise.
66+77+ Uses the Claude SDK's MCP-based custom tool architecture. Custom unpac
88+ tools are registered via an in-process MCP server, while Claude's built-in
99+ tools (Read, Write, Bash, etc.) are handled by Claude CLI directly. *)
1010+1111+let src = Logs.Src.create "unpac.claude.agent" ~doc:"Claude agent"
1212+module Log = (val Logs.src_log src : Logs.LOG)
1313+1414+(* ANSI color codes *)
1515+module Color = struct
1616+ let reset = "\x1b[0m"
1717+ let bold = "\x1b[1m"
1818+ let dim = "\x1b[2m"
1919+ let red = "\x1b[31m"
2020+ let green = "\x1b[32m"
2121+ let yellow = "\x1b[33m"
2222+ let blue = "\x1b[34m"
2323+ let magenta = "\x1b[35m"
2424+ let cyan = "\x1b[36m"
2525+end
2626+2727+type config = {
2828+ verbose : bool;
2929+ web_port : int option;
3030+ max_iterations : int;
3131+ project : string option;
3232+}
3333+3434+let default_config = {
3535+ verbose = false;
3636+ web_port = None;
3737+ max_iterations = 20;
3838+ project = None;
3939+}
4040+4141+let completion_promise = "AGENTIC-HUMPS-COUNT-2"
4242+4343+(* Format tool call for logging - full paths, no truncation *)
4444+let format_tool_call name (input : Claude.Tool_input.t) =
4545+ let get_string key = Claude.Tool_input.get_string input key in
4646+ match name with
4747+ | "Read" ->
4848+ let path = get_string "file_path" |> Option.value ~default:"?" in
4949+ Printf.sprintf "Read %s" path
5050+ | "Write" ->
5151+ let path = get_string "file_path" |> Option.value ~default:"?" in
5252+ Printf.sprintf "Write %s" path
5353+ | "Edit" ->
5454+ let path = get_string "file_path" |> Option.value ~default:"?" in
5555+ Printf.sprintf "Edit %s" path
5656+ | "Bash" ->
5757+ let cmd = get_string "command" |> Option.value ~default:"?" in
5858+ Printf.sprintf "$ %s" cmd
5959+ | "Glob" ->
6060+ let pattern = get_string "pattern" |> Option.value ~default:"*" in
6161+ let path = get_string "path" |> Option.value ~default:"" in
6262+ if path = "" then Printf.sprintf "Glob %s" pattern
6363+ else Printf.sprintf "Glob %s in %s" pattern path
6464+ | "Grep" ->
6565+ let pattern = get_string "pattern" |> Option.value ~default:"?" in
6666+ let path = get_string "path" |> Option.value ~default:"" in
6767+ if path = "" then Printf.sprintf "Grep %s" pattern
6868+ else Printf.sprintf "Grep %s in %s" pattern path
6969+ (* MCP tools are prefixed with mcp__unpac__ *)
7070+ | s when String.length s > 12 && String.sub s 0 12 = "mcp__unpac__" ->
7171+ let tool_name = String.sub s 12 (String.length s - 12) in
7272+ (match tool_name with
7373+ | "read_file" ->
7474+ let path = get_string "path" |> Option.value ~default:"?" in
7575+ Printf.sprintf "unpac:read %s" path
7676+ | "write_file" ->
7777+ let path = get_string "path" |> Option.value ~default:"?" in
7878+ Printf.sprintf "unpac:write %s" path
7979+ | "list_directory" ->
8080+ let path = get_string "path" |> Option.value ~default:"." in
8181+ Printf.sprintf "unpac:ls %s" path
8282+ | "glob_files" ->
8383+ let pattern = get_string "pattern" |> Option.value ~default:"*" in
8484+ Printf.sprintf "unpac:glob %s" pattern
8585+ | "run_shell" ->
8686+ let cmd = get_string "command" |> Option.value ~default:"?" in
8787+ Printf.sprintf "unpac:$ %s" cmd
8888+ | "git_commit" ->
8989+ let msg = get_string "message" |> Option.value ~default:"" in
9090+ Printf.sprintf "unpac:commit %s" msg
9191+ | "unpac_status" -> "unpac:status"
9292+ | "unpac_status_sync" -> "unpac:status --sync"
9393+ | "unpac_push" ->
9494+ let remote = get_string "remote" |> Option.value ~default:"origin" in
9595+ Printf.sprintf "unpac:push %s" remote
9696+ | "unpac_project_list" -> "unpac:projects"
9797+ | "unpac_opam_list" -> "unpac:opam list"
9898+ | "unpac_git_list" -> "unpac:git list"
9999+ | "unpac_git_add" ->
100100+ let url = get_string "url" |> Option.value ~default:"?" in
101101+ Printf.sprintf "unpac:git add %s" url
102102+ | "unpac_git_info" ->
103103+ let n = get_string "name" |> Option.value ~default:"?" in
104104+ Printf.sprintf "unpac:git info %s" n
105105+ | "unpac_git_diff" ->
106106+ let n = get_string "name" |> Option.value ~default:"?" in
107107+ Printf.sprintf "unpac:git diff %s" n
108108+ | _ -> Printf.sprintf "unpac:%s" tool_name)
109109+ | _ -> name
110110+111111+(* Find unpac root from a given directory *)
112112+let find_root_from fs dir =
113113+ let rec search path depth =
114114+ if depth > 10 then None
115115+ else begin
116116+ let git_path = Eio.Path.(path / "git") in
117117+ let main_path = Eio.Path.(path / "main") in
118118+ if Eio.Path.is_directory git_path && Eio.Path.is_directory main_path then
119119+ Some path
120120+ else
121121+ match Eio.Path.split path with
122122+ | Some (parent, _) -> search parent (depth + 1)
123123+ | None -> None
124124+ end
125125+ in
126126+ search (Eio.Path.(fs / dir)) 0
127127+128128+let string_contains ~sub s =
129129+ let len_sub = String.length sub in
130130+ let len_s = String.length s in
131131+ if len_sub > len_s then false
132132+ else begin
133133+ let rec check i =
134134+ if i > len_s - len_sub then false
135135+ else if String.sub s i len_sub = sub then true
136136+ else check (i + 1)
137137+ in
138138+ check 0
139139+ end
140140+141141+(* Shuffle a list randomly *)
142142+let shuffle list =
143143+ let arr = Array.of_list list in
144144+ let n = Array.length arr in
145145+ for i = n - 1 downto 1 do
146146+ let j = Random.int (i + 1) in
147147+ let tmp = arr.(i) in
148148+ arr.(i) <- arr.(j);
149149+ arr.(j) <- tmp
150150+ done;
151151+ Array.to_list arr
152152+153153+(* Ensure working directory exists *)
154154+let ensure_work_dir fs root project =
155155+ let root_dir = snd root in
156156+ let work_base = Eio.Path.(fs / root_dir / ".unpac-claude") in
157157+ let work_dir = Eio.Path.(work_base / project) in
158158+ let claude_dir = Eio.Path.(work_dir / ".claude") in
159159+ (try Eio.Path.mkdir ~perm:0o755 work_base with _ -> ());
160160+ (try Eio.Path.mkdir ~perm:0o755 work_dir with _ -> ());
161161+ (try Eio.Path.mkdir ~perm:0o755 claude_dir with _ -> ());
162162+ work_dir
163163+164164+(* Run ralph-loop for a single project *)
165165+let run_project_ralph_loop ~env ~config ~root ~project ~event_bus =
166166+ let proc_mgr = Eio.Stdenv.process_mgr env in
167167+ let fs = Eio.Stdenv.fs env in
168168+169169+ let prefix = Printf.sprintf "[%s] " project in
170170+ let log msg = Log.info (fun m -> m "%s%s" prefix msg) in
171171+ let emit event = Event.emit event_bus event in
172172+173173+ log "Starting ralph-loop agent";
174174+ Format.printf "@.%s%s═══ Project: %s ═══%s@." Color.bold Color.blue project Color.reset;
175175+ emit (Event.Text (Printf.sprintf "\n=== Starting ralph-loop for %s ===\n" project));
176176+177177+ (* Ensure working directory *)
178178+ let _work_dir = ensure_work_dir fs root project in
179179+180180+ (* Get project path *)
181181+ let project_path = Unpac.Worktree.path root (Unpac.Worktree.Project project) in
182182+ let project_dir = snd project_path in
183183+184184+ (* Generate the prompt (same prompt used every iteration - ralph-loop style) *)
185185+ let prompt = Prompt.generate_for_project ~proc_mgr ~root ~project in
186186+187187+ (* Create MCP server with custom unpac tools *)
188188+ let mcp_server = Tools.create_mcp_server ~proc_mgr ~fs ~root in
189189+190190+ (* Build Claude options - always Opus 4.5 *)
191191+ (* Register MCP server so custom tools are available via mcp__unpac__<tool> *)
192192+ let options =
193193+ Claude.Options.default
194194+ |> Claude.Options.with_model (`Custom "claude-opus-4-5")
195195+ |> Claude.Options.with_system_prompt prompt
196196+ |> Claude.Options.with_permission_mode Claude.Permissions.Mode.Bypass_permissions
197197+ |> Claude.Options.with_mcp_server ~name:"unpac" mcp_server
198198+ in
199199+200200+ (* Ralph-loop: same prompt each iteration *)
201201+ let iteration_prompt = Printf.sprintf
202202+ "You are working on the '%s' project at %s.\n\n\
203203+ Analyze the project, make improvements, update STATUS.md, and commit changes.\n\n\
204204+ You have access to:\n\
205205+ - Claude's built-in tools: Read, Write, Edit, Bash, Glob, Grep\n\
206206+ - Custom unpac tools (mcp__unpac__*): unpac_status, unpac_git_list, etc.\n\n\
207207+ When ALL significant work is complete, output exactly: %s\n\n\
208208+ Begin."
209209+ project project_dir completion_promise
210210+ in
211211+212212+ (* Run the ralph-loop *)
213213+ let rec ralph_loop iteration =
214214+ if iteration > config.max_iterations then begin
215215+ log (Printf.sprintf "Max iterations (%d) reached" config.max_iterations);
216216+ Format.printf "@.%s%s⚠ Max iterations (%d) reached%s@."
217217+ Color.bold Color.yellow config.max_iterations Color.reset;
218218+ emit (Event.Text (Printf.sprintf "\n[%s] Max iterations reached.\n" project))
219219+ end else begin
220220+ log (Printf.sprintf "Iteration %d/%d" iteration config.max_iterations);
221221+ Format.printf "@.%s%s── Iteration %d/%d ──%s@."
222222+ Color.bold Color.yellow iteration config.max_iterations Color.reset;
223223+ emit (Event.Text (Printf.sprintf "\n[%s] --- Iteration %d/%d ---\n"
224224+ project iteration config.max_iterations));
225225+226226+ let accumulated_response = ref "" in
227227+ let completion_detected = ref false in
228228+ let last_was_tool = ref false in
229229+230230+ begin
231231+ try
232232+ Eio.Switch.run @@ fun inner_sw ->
233233+ let client = Claude.Client.create ~sw:inner_sw ~process_mgr:proc_mgr
234234+ ~clock:(Eio.Stdenv.clock env) ~options () in
235235+236236+ emit Event.Thinking;
237237+238238+ let handler = object
239239+ inherit Claude.Handler.default
240240+241241+ method! on_text text =
242242+ let content = Claude.Response.Text.content text in
243243+ accumulated_response := !accumulated_response ^ content;
244244+ if !last_was_tool then begin
245245+ Format.printf "@.";
246246+ last_was_tool := false
247247+ end;
248248+ Format.printf "%s@?" content;
249249+ emit (Event.Text (Printf.sprintf "[%s] %s" project content));
250250+ if string_contains ~sub:completion_promise !accumulated_response then
251251+ completion_detected := true
252252+253253+ method! on_thinking thinking =
254254+ let content = Claude.Response.Thinking.content thinking in
255255+ Format.printf "%s%s 💭 %s%s@." Color.dim Color.magenta content Color.reset
256256+257257+ method! on_tool_use tool =
258258+ (* Just log tool usage - execution is handled by Claude CLI for built-in
259259+ tools and by MCP server for custom tools *)
260260+ let name = Claude.Response.Tool_use.name tool in
261261+ let id = Claude.Response.Tool_use.id tool in
262262+ let input = Claude.Response.Tool_use.input tool in
263263+264264+ let call_summary = format_tool_call name input in
265265+266266+ if config.verbose then
267267+ log (Printf.sprintf "Tool %s (id: %s)" name id);
268268+269269+ emit (Event.Tool_call { id; name; input = call_summary });
270270+271271+ (* Print tool call with color *)
272272+ Format.printf " %s→%s %s@." Color.cyan Color.reset call_summary;
273273+ last_was_tool := true
274274+275275+ method! on_tool_result result =
276276+ (* Log tool results for observability *)
277277+ let tool_use_id = Claude.Content_block.Tool_result.tool_use_id result in
278278+ let is_error = Claude.Content_block.Tool_result.is_error result
279279+ |> Option.value ~default:false in
280280+ let result_color = if is_error then Color.red else Color.green in
281281+ let status = if is_error then "ERROR" else "ok" in
282282+ Format.printf " %s←%s %s@." result_color Color.reset status;
283283+ emit (Event.Tool_result { id = tool_use_id; name = ""; output = status; is_error })
284284+285285+ method! on_complete result =
286286+ let cost = Claude.Response.Complete.total_cost_usd result in
287287+ emit (Event.Turn_complete { turn = iteration; cost_usd = cost })
288288+289289+ method! on_error err =
290290+ let msg = Claude.Response.Error.message err in
291291+ log (Printf.sprintf "Error: %s" msg);
292292+ Format.printf "%s%sError: %s%s@." Color.bold Color.red msg Color.reset;
293293+ emit (Event.Error (Printf.sprintf "[%s] %s" project msg))
294294+ end in
295295+296296+ (* Same prompt every iteration - ralph-loop style *)
297297+ Claude.Client.query client iteration_prompt;
298298+ Claude.Client.run client ~handler
299299+ with exn ->
300300+ let msg = Printexc.to_string exn in
301301+ log (Printf.sprintf "Exception: %s" msg);
302302+ emit (Event.Error (Printf.sprintf "[%s] %s" project msg))
303303+ end;
304304+305305+ (* Check if we should stop *)
306306+ if !completion_detected then begin
307307+ log "Completion promise detected!";
308308+ Format.printf "@.%s%s✓ Completion promise detected%s@." Color.bold Color.green Color.reset;
309309+ emit (Event.Text (Printf.sprintf "\n[%s] ✓ Completion promise detected.\n" project))
310310+ end else
311311+ ralph_loop (iteration + 1)
312312+ end
313313+ in
314314+315315+ ralph_loop 1;
316316+ log "Ralph-loop complete";
317317+ Format.printf "@.%s%s─── Project complete: %s ───%s@." Color.dim Color.blue project Color.reset;
318318+ emit (Event.Text (Printf.sprintf "\n=== Ralph-loop complete for %s ===\n" project))
319319+320320+(* Main entry point *)
321321+let run ~env ~config ~workspace_path () =
322322+ Random.self_init ();
323323+324324+ let fs = Eio.Stdenv.fs env in
325325+ let net = Eio.Stdenv.net env in
326326+327327+ (* Find unpac root *)
328328+ let root = match find_root_from fs workspace_path with
329329+ | Some r -> r
330330+ | None ->
331331+ Format.eprintf "Error: '%s' is not an unpac workspace.@." workspace_path;
332332+ exit 1
333333+ in
334334+335335+ Log.info (fun m -> m "Starting ralph-loop agent in workspace: %s" (snd root));
336336+337337+ (* Get projects to process *)
338338+ let all_projects = Unpac.Worktree.list_projects ~proc_mgr:(Eio.Stdenv.process_mgr env) root in
339339+340340+ if all_projects = [] then begin
341341+ Format.eprintf "No projects found in workspace.@.";
342342+ exit 1
343343+ end;
344344+345345+ let projects = match config.project with
346346+ | Some p ->
347347+ if List.mem p all_projects then [p]
348348+ else begin
349349+ Format.eprintf "Project '%s' not found. Available: %s@."
350350+ p (String.concat ", " all_projects);
351351+ exit 1
352352+ end
353353+ | None ->
354354+ shuffle all_projects
355355+ in
356356+357357+ Log.info (fun m -> m "Projects to process: %s" (String.concat ", " projects));
358358+359359+ (* Create shared event bus *)
360360+ let event_bus = Event.create_bus () in
361361+362362+ Eio.Switch.run @@ fun sw ->
363363+364364+ (* Start web server if enabled *)
365365+ (match config.web_port with
366366+ | Some port ->
367367+ let _web = Web.start ~sw ~net ~port event_bus in
368368+ Log.info (fun m -> m "Web UI available at http://localhost:%d" port);
369369+ Format.printf "Web UI: http://localhost:%d@." port
370370+ | None -> ());
371371+372372+ Event.emit event_bus Event.Agent_start;
373373+374374+ Format.printf "%s%sRalph-loop agent starting...%s@." Color.bold Color.cyan Color.reset;
375375+ Format.printf " %sModel:%s Opus 4.5@." Color.dim Color.reset;
376376+ Format.printf " %sMax iterations:%s %d@." Color.dim Color.reset config.max_iterations;
377377+ Format.printf " %sCompletion promise:%s %s@." Color.dim Color.reset completion_promise;
378378+ Format.printf " %sCustom tools:%s mcp__unpac__* (via MCP server)@." Color.dim Color.reset;
379379+ Format.printf " %sProjects (%d):%s %s@."
380380+ Color.dim (List.length projects) Color.reset (String.concat ", " projects);
381381+382382+ (* Process projects sequentially *)
383383+ List.iter (fun project ->
384384+ run_project_ralph_loop ~env ~config ~root ~project ~event_bus
385385+ ) projects;
386386+387387+ Event.emit event_bus Event.Agent_stop;
388388+389389+ Format.printf "@.%s%s✓ All projects complete.%s@." Color.bold Color.green Color.reset
+45
lib/claude/agent.mli
···11+(** Ralph-loop style Claude agent for unpac workspace analysis.
22+33+ Runs a single autonomous agent per project using ralph-loop iteration:
44+ - Same prompt fed each iteration (state persists in files)
55+ - Up to 20 iterations per project
66+ - Early exit on completion promise
77+ - Projects processed sequentially in random order *)
88+99+(** {1 Agent Configuration} *)
1010+1111+type config = {
1212+ verbose : bool;
1313+ web_port : int option; (** Port for web UI, None = disabled *)
1414+ max_iterations : int; (** Max ralph-loop iterations per project *)
1515+ project : string option; (** Specific project, or None for all *)
1616+}
1717+1818+val default_config : config
1919+2020+(** {1 Completion Promise} *)
2121+2222+val completion_promise : string
2323+(** The phrase that signals work is complete: "AGENTIC-HUMPS-COUNT-2" *)
2424+2525+(** {1 Running Agents} *)
2626+2727+val run :
2828+ env:Eio_unix.Stdenv.base ->
2929+ config:config ->
3030+ workspace_path:string ->
3131+ unit ->
3232+ unit
3333+(** [run ~env ~config ~workspace_path ()] runs ralph-loop agents for projects
3434+ in the workspace.
3535+3636+ If [config.project] is specified, runs only that project.
3737+ Otherwise, runs all projects sequentially in random order.
3838+3939+ Each project agent:
4040+ - Uses Opus 4.5 model
4141+ - Runs up to [max_iterations] iterations
4242+ - Exits early if response contains [completion_promise]
4343+ - Works from [workspace/.unpac-claude/project/] directory
4444+4545+ The function blocks until all projects complete. *)
···11+(** Event types emitted by the Claude agent for live UI updates. *)
22+33+type tool_call = {
44+ id : string;
55+ name : string;
66+ input : string; (* JSON string *)
77+}
88+99+type tool_result = {
1010+ id : string;
1111+ name : string;
1212+ output : string;
1313+ is_error : bool;
1414+}
1515+1616+type t =
1717+ | Thinking
1818+ | Text of string
1919+ | Tool_call of tool_call
2020+ | Tool_result of tool_result
2121+ | Error of string
2222+ | Sync of string (* "status" or "push" *)
2323+ | Turn_complete of { turn : int; cost_usd : float option }
2424+ | Agent_start
2525+ | Agent_stop
2626+2727+(* Simple JSON string escaping *)
2828+let escape_json_string s =
2929+ let buf = Buffer.create (String.length s + 16) in
3030+ Buffer.add_char buf '"';
3131+ String.iter (fun c ->
3232+ match c with
3333+ | '"' -> Buffer.add_string buf "\\\""
3434+ | '\\' -> Buffer.add_string buf "\\\\"
3535+ | '\n' -> Buffer.add_string buf "\\n"
3636+ | '\r' -> Buffer.add_string buf "\\r"
3737+ | '\t' -> Buffer.add_string buf "\\t"
3838+ | c when Char.code c < 32 ->
3939+ Buffer.add_string buf (Printf.sprintf "\\u%04x" (Char.code c))
4040+ | c -> Buffer.add_char buf c
4141+ ) s;
4242+ Buffer.add_char buf '"';
4343+ Buffer.contents buf
4444+4545+let to_json = function
4646+ | Thinking ->
4747+ {|{"type":"thinking"}|}
4848+ | Text s ->
4949+ Printf.sprintf {|{"type":"text","content":%s}|}
5050+ (escape_json_string s)
5151+ | Tool_call { id; name; input } ->
5252+ Printf.sprintf {|{"type":"tool_call","id":%s,"name":%s,"input":%s}|}
5353+ (escape_json_string id)
5454+ (escape_json_string name)
5555+ input (* input is already JSON *)
5656+ | Tool_result { id; name; output; is_error } ->
5757+ Printf.sprintf {|{"type":"tool_result","id":%s,"name":%s,"output":%s,"is_error":%b}|}
5858+ (escape_json_string id)
5959+ (escape_json_string name)
6060+ (escape_json_string output)
6161+ is_error
6262+ | Error msg ->
6363+ Printf.sprintf {|{"type":"error","message":%s}|}
6464+ (escape_json_string msg)
6565+ | Sync action ->
6666+ Printf.sprintf {|{"type":"sync","action":%s}|}
6767+ (escape_json_string action)
6868+ | Turn_complete { turn; cost_usd } ->
6969+ let cost_str = match cost_usd with
7070+ | Some c -> Printf.sprintf "%.6f" c
7171+ | None -> "null"
7272+ in
7373+ Printf.sprintf {|{"type":"turn_complete","turn":%d,"cost_usd":%s}|}
7474+ turn cost_str
7575+ | Agent_start ->
7676+ {|{"type":"agent_start"}|}
7777+ | Agent_stop ->
7878+ {|{"type":"agent_stop"}|}
7979+8080+(* Event bus for broadcasting to listeners *)
8181+type listener = t -> unit
8282+8383+type bus = {
8484+ mutable listeners : listener list;
8585+}
8686+8787+let create_bus () = { listeners = [] }
8888+8989+let subscribe bus listener =
9090+ bus.listeners <- listener :: bus.listeners
9191+9292+let unsubscribe bus listener =
9393+ bus.listeners <- List.filter (fun l -> l != listener) bus.listeners
9494+9595+let emit bus event =
9696+ List.iter (fun l -> l event) bus.listeners
+41
lib/claude/event.mli
···11+(** Event types for live agent UI updates. *)
22+33+(** Tool call event data. *)
44+type tool_call = {
55+ id : string;
66+ name : string;
77+ input : string;
88+}
99+1010+(** Tool result event data. *)
1111+type tool_result = {
1212+ id : string;
1313+ name : string;
1414+ output : string;
1515+ is_error : bool;
1616+}
1717+1818+(** Agent events. *)
1919+type t =
2020+ | Thinking
2121+ | Text of string
2222+ | Tool_call of tool_call
2323+ | Tool_result of tool_result
2424+ | Error of string
2525+ | Sync of string
2626+ | Turn_complete of { turn : int; cost_usd : float option }
2727+ | Agent_start
2828+ | Agent_stop
2929+3030+val to_json : t -> string
3131+(** Convert event to JSON string. *)
3232+3333+(** Event bus for broadcasting to listeners. *)
3434+3535+type listener = t -> unit
3636+type bus
3737+3838+val create_bus : unit -> bus
3939+val subscribe : bus -> listener -> unit
4040+val unsubscribe : bus -> listener -> unit
4141+val emit : bus -> t -> unit
+423
lib/claude/prompt.ml
···11+(** Dynamic system prompt generation for autonomous Claude agent. *)
22+33+let src = Logs.Src.create "unpac.claude.prompt" ~doc:"Prompt generation"
44+module Log = (val Logs.src_log src : Logs.LOG)
55+66+let autonomous_base_prompt = {|You are an autonomous code maintenance agent for OCaml projects in an unpac workspace.
77+88+## Your Mission
99+1010+You continuously analyze and improve the codebase by:
1111+1212+1. **Analyzing Projects**: Review each project's code, STATUS.md, and tests
1313+2. **Completing Features**: Implement incomplete functionality marked in STATUS.md
1414+3. **Recording Status**: Faithfully update STATUS.md with current state and shortcomings
1515+4. **Code Quality**: Refactor using OCaml Stdlib combinators and higher-order functions
1616+5. **Test Coverage**: Identify missing tests and add them where needed
1717+6. **Syncing Changes**: Regularly run unpac_status_sync and unpac_push to keep remote updated
1818+1919+## OCaml Code Quality Guidelines
2020+2121+When reviewing and improving OCaml code, look for:
2222+2323+### Replace Imperative Patterns with Functional Idioms
2424+- Replace `for` loops with `List.iter`, `List.map`, `List.fold_left`
2525+- Replace mutable refs with functional accumulation
2626+- Use `Option.map`, `Option.bind`, `Result.map`, `Result.bind` instead of pattern matching
2727+- Use `|>` pipeline operator for cleaner composition
2828+2929+### Stdlib Combinators to Prefer
3030+```ocaml
3131+(* Instead of manual recursion, use: *)
3232+List.filter_map (* filter and map in one pass *)
3333+List.concat_map (* map then flatten *)
3434+List.find_opt (* safe find *)
3535+List.assoc_opt (* safe association lookup *)
3636+Option.value (* provide default *)
3737+Option.join (* flatten option option *)
3838+String.concat (* join strings *)
3939+String.split_on_char
4040+```
4141+4242+### Common Refactoring Patterns
4343+```ocaml
4444+(* BEFORE: *)
4545+let result = ref [] in
4646+List.iter (fun x ->
4747+ if pred x then result := transform x :: !result
4848+) items;
4949+List.rev !result
5050+5151+(* AFTER: *)
5252+items |> List.filter_map (fun x ->
5353+ if pred x then Some (transform x) else None
5454+)
5555+5656+(* BEFORE: *)
5757+match opt with
5858+| Some x -> Some (f x)
5959+| None -> None
6060+6161+(* AFTER: *)
6262+Option.map f opt
6363+6464+(* BEFORE: *)
6565+match foo () with
6666+| Ok x -> bar x
6767+| Error _ as e -> e
6868+6969+(* AFTER: *)
7070+Result.bind (foo ()) bar
7171+```
7272+7373+## STATUS.md Format
7474+7575+Each project should have a STATUS.md with:
7676+7777+```markdown
7878+# Project Name
7979+8080+**Status**: [STUB | IN_PROGRESS | COMPLETE | NEEDS_REVIEW]
8181+8282+## Overview
8383+Brief description of what this project does.
8484+8585+## Current State
8686+- What is implemented
8787+- What works
8888+8989+## TODO
9090+- [ ] Task 1
9191+- [ ] Task 2
9292+- [x] Completed task
9393+9494+## Known Issues
9595+- Issue 1
9696+- Issue 2
9797+9898+## Test Coverage
9999+- What is tested
100100+- What needs tests
101101+102102+## Dependencies
103103+- Required packages
104104+```
105105+106106+## Workflow
107107+108108+1. **Start**: List all projects with unpac_project_list
109109+2. **For each project**:
110110+ - Read STATUS.md if it exists
111111+ - Glob all *.ml files
112112+ - Read key source files
113113+ - Analyze code quality
114114+ - Check for tests (look for test/ or *_test.ml files)
115115+ - Update STATUS.md with findings
116116+ - Make small, focused improvements
117117+ - Commit changes with clear messages
118118+3. **Periodically**: Run unpac_status_sync and unpac_push to sync
119119+120120+## Rate Limit Handling
121121+122122+If you encounter rate limit errors:
123123+- Wait the indicated time before retrying
124124+- The system will handle backoff automatically
125125+- Focus on one project at a time to avoid rapid API calls
126126+127127+## Important Rules
128128+129129+1. **Small Changes**: Make incremental improvements, not sweeping rewrites
130130+2. **Commit Often**: Commit after each logical change
131131+3. **Document**: Always update STATUS.md to reflect current state
132132+4. **Test First**: Run dune build before committing code changes
133133+5. **Push Regularly**: Keep remote in sync with local changes
134134+6. **Be Honest**: Record actual shortcomings, don't hide problems
135135+136136+## Available Tools
137137+138138+You have access to these tools:
139139+- **unpac_status**: Get workspace overview
140140+- **unpac_status_sync**: Update README.md and sync state
141141+- **unpac_push**: Push all branches to remote
142142+- **unpac_project_list**: List projects
143143+- **unpac_opam_list**: List vendored packages
144144+- **unpac_git_list**: List vendored git repos
145145+- **read_file**: Read source code and config files
146146+- **write_file**: Update code or STATUS.md
147147+- **list_directory**: Explore directory structure
148148+- **glob_files**: Find files by pattern
149149+- **run_shell**: Run dune build/test commands
150150+- **git_commit**: Commit changes
151151+152152+Start by getting the workspace status and listing all projects.
153153+|}
154154+155155+let interactive_base_prompt = {|You are an autonomous coding agent running in an unpac workspace.
156156+157157+Unpac is a monorepo vendoring tool that uses git worktrees to manage dependencies.
158158+It supports two backends:
159159+1. **opam** - OCaml package vendoring with dependency solving
160160+2. **git** - Direct git repository vendoring without solving
161161+162162+Both backends use a three-tier branch model:
163163+- upstream/* - pristine upstream code
164164+- vendor/* - history-rewritten with vendor/<backend>/<name>/ prefix
165165+- patches/* - local modifications
166166+167167+This architecture allows:
168168+- Full git history preservation (git blame/log work)
169169+- Conflict-free merging into multiple project branches
170170+- Local patches that survive upstream updates
171171+172172+Your role is to help explore and develop code in this workspace. You can:
173173+- Add new git repositories as dependencies
174174+- Explore vendored code
175175+- Make local patches
176176+- Merge dependencies into projects
177177+- Analyze and improve code quality
178178+- Update STATUS.md documentation
179179+180180+Always use the provided tools to interact with unpac. Query the workspace state
181181+before making changes to understand the current configuration.
182182+|}
183183+184184+let run_help ~proc_mgr =
185185+ try
186186+ (* Run unpac --help to get CLI documentation *)
187187+ let output = Eio.Switch.run @@ fun sw ->
188188+ let stdout_buf = Buffer.create 4096 in
189189+ let stdout_r, stdout_w = Eio.Process.pipe proc_mgr ~sw in
190190+ let child = Eio.Process.spawn proc_mgr ~sw
191191+ ~stdout:stdout_w
192192+ ["unpac"; "--help"]
193193+ in
194194+ Eio.Flow.close stdout_w;
195195+ (* Read output *)
196196+ let chunk = Cstruct.create 4096 in
197197+ let rec loop () =
198198+ match Eio.Flow.single_read stdout_r chunk with
199199+ | n ->
200200+ Buffer.add_string stdout_buf (Cstruct.to_string (Cstruct.sub chunk 0 n));
201201+ loop ()
202202+ | exception End_of_file -> ()
203203+ in
204204+ loop ();
205205+ ignore (Eio.Process.await child);
206206+ Buffer.contents stdout_buf
207207+ in
208208+ Some output
209209+ with _ ->
210210+ Log.warn (fun m -> m "Could not run unpac --help");
211211+ None
212212+213213+let read_architecture ~root =
214214+ try
215215+ let main_path = Unpac.Worktree.path root Unpac.Worktree.Main in
216216+ let arch_path = Eio.Path.(main_path / "ARCHITECTURE.md") in
217217+ if Eio.Path.is_file arch_path then begin
218218+ let content = Eio.Path.load arch_path in
219219+ (* Truncate if too long *)
220220+ let max_len = 8000 in
221221+ if String.length content > max_len then
222222+ Some (String.sub content 0 max_len ^ "\n\n[... truncated ...]")
223223+ else
224224+ Some content
225225+ end else
226226+ None
227227+ with _ ->
228228+ Log.debug (fun m -> m "No ARCHITECTURE.md found");
229229+ None
230230+231231+let get_workspace_state ~proc_mgr ~root =
232232+ let buf = Buffer.create 1024 in
233233+ let add s = Buffer.add_string buf s in
234234+235235+ add "## Current Workspace State\n\n";
236236+237237+ (* Projects *)
238238+ let projects = Unpac.Worktree.list_projects ~proc_mgr root in
239239+ add (Printf.sprintf "**Projects** (%d):\n" (List.length projects));
240240+ List.iter (fun p -> add (Printf.sprintf "- %s\n" p)) projects;
241241+ if projects = [] then add "- (none)\n";
242242+ add "\n";
243243+244244+ (* Git repos *)
245245+ let git_repos = Unpac.Git_backend.list_repos ~proc_mgr ~root in
246246+ add (Printf.sprintf "**Git Repositories** (%d):\n" (List.length git_repos));
247247+ List.iter (fun r -> add (Printf.sprintf "- %s\n" r)) git_repos;
248248+ if git_repos = [] then add "- (none)\n";
249249+ add "\n";
250250+251251+ (* Opam packages *)
252252+ let opam_pkgs = Unpac.Worktree.list_opam_packages ~proc_mgr root in
253253+ add (Printf.sprintf "**Opam Packages** (%d):\n" (List.length opam_pkgs));
254254+ List.iter (fun p -> add (Printf.sprintf "- %s\n" p)) opam_pkgs;
255255+ if opam_pkgs = [] then add "- (none)\n";
256256+257257+ Buffer.contents buf
258258+259259+let generate ~proc_mgr ~root ~autonomous =
260260+ let buf = Buffer.create 16384 in
261261+ let add s = Buffer.add_string buf s in
262262+263263+ (* Choose base prompt based on mode *)
264264+ if autonomous then
265265+ add autonomous_base_prompt
266266+ else
267267+ add interactive_base_prompt;
268268+269269+ add "\n\n---\n\n";
270270+271271+ (* Add CLI help if available *)
272272+ (match run_help ~proc_mgr with
273273+ | Some help ->
274274+ add "## CLI Reference\n\n";
275275+ add "```\n";
276276+ add help;
277277+ add "```\n\n";
278278+ | None -> ());
279279+280280+ (* Add architecture docs if available *)
281281+ (match read_architecture ~root with
282282+ | Some arch ->
283283+ add "## Architecture Documentation\n\n";
284284+ add arch;
285285+ add "\n\n";
286286+ | None -> ());
287287+288288+ (* Add current workspace state *)
289289+ add (get_workspace_state ~proc_mgr ~root);
290290+291291+ Buffer.contents buf
292292+293293+let project_base_prompt project project_dir = Printf.sprintf
294294+{|You are an autonomous coding agent assigned to work on the '%s' project.
295295+296296+## Your Mission
297297+298298+You are working EXCLUSIVELY on the '%s' project located at: %s
299299+300300+Your goals are to:
301301+1. **Understand**: Read and analyze all project source code
302302+2. **Document**: Update STATUS.md with accurate project state
303303+3. **Improve**: Make focused code quality improvements
304304+4. **Test**: Ensure code builds and tests pass
305305+5. **Commit**: Commit meaningful changes with clear messages
306306+307307+## OCaml Code Quality Guidelines
308308+309309+When improving code, look for:
310310+311311+### Functional Idioms
312312+- Replace `for` loops with `List.iter`, `List.map`, `List.fold_left`
313313+- Use `Option.map`, `Option.bind`, `Result.map`, `Result.bind`
314314+- Use `|>` pipeline operator for cleaner composition
315315+- Prefer `List.filter_map` over filter + map
316316+317317+### Stdlib Combinators
318318+```ocaml
319319+List.filter_map (* filter and map in one pass *)
320320+List.concat_map (* map then flatten *)
321321+List.find_opt (* safe find *)
322322+Option.value (* provide default *)
323323+Option.join (* flatten option option *)
324324+String.concat (* join strings *)
325325+String.split_on_char
326326+```
327327+328328+## STATUS.md Format
329329+330330+Maintain a STATUS.md in the project with:
331331+332332+```markdown
333333+# %s
334334+335335+**Status**: [STUB | IN_PROGRESS | COMPLETE | NEEDS_REVIEW]
336336+337337+## Overview
338338+Brief description of what this project does.
339339+340340+## Current State
341341+- What is implemented
342342+- What works
343343+344344+## TODO
345345+- [ ] Task 1
346346+- [ ] Task 2
347347+- [x] Completed task
348348+349349+## Known Issues
350350+- Issue 1
351351+352352+## Test Coverage
353353+- What is tested
354354+- What needs tests
355355+```
356356+357357+## Workflow
358358+359359+1. Check if STATUS.md exists and read it
360360+2. Glob all *.ml and *.mli files
361361+3. Read and analyze source files
362362+4. Run dune build to check compilation
363363+5. Update STATUS.md with findings
364364+6. Make focused improvements
365365+7. Commit changes
366366+8. Repeat until work is complete
367367+368368+## Completion Signal
369369+370370+When you have completed all significant work on this project:
371371+- Updated STATUS.md comprehensively
372372+- Made all reasonable improvements
373373+- Verified builds pass
374374+- No obvious remaining issues
375375+376376+Then output exactly: AGENTIC-HUMPS-COUNT-2
377377+378378+This signals the ralph-loop to stop iterating.
379379+380380+## Important Rules
381381+382382+1. **Focus**: Only work on %s - ignore other projects
383383+2. **Small Changes**: Make incremental improvements
384384+3. **Test First**: Run dune build before committing
385385+4. **Be Honest**: Record actual shortcomings in STATUS.md
386386+5. **Commit Often**: Commit after each logical change
387387+388388+## Available Tools
389389+390390+- **read_file**: Read source code and config files
391391+- **write_file**: Update code or STATUS.md
392392+- **list_directory**: Explore directory structure
393393+- **glob_files**: Find files by pattern
394394+- **run_shell**: Run dune build/test commands
395395+- **git_commit**: Commit changes
396396+- **unpac_status_sync**: Update workspace status
397397+- **unpac_push**: Push changes to remote
398398+399399+Start by exploring the project structure and reading existing files.
400400+|} project project project_dir project project
401401+402402+let generate_for_project ~proc_mgr:_ ~root ~project =
403403+ let buf = Buffer.create 16384 in
404404+ let add s = Buffer.add_string buf s in
405405+406406+ (* Get project directory *)
407407+ let project_path = Unpac.Worktree.path root (Unpac.Worktree.Project project) in
408408+ let project_dir = snd project_path in
409409+410410+ (* Add project-specific prompt *)
411411+ add (project_base_prompt project project_dir);
412412+413413+ add "\n\n---\n\n";
414414+415415+ (* Add architecture docs if available *)
416416+ (match read_architecture ~root with
417417+ | Some arch ->
418418+ add "## Workspace Architecture\n\n";
419419+ add arch;
420420+ add "\n\n";
421421+ | None -> ());
422422+423423+ Buffer.contents buf
+29
lib/claude/prompt.mli
···11+(** Dynamic system prompt generation for Claude agent.
22+33+ Builds a comprehensive system prompt by:
44+ 1. Running 'unpac --help' to get current CLI documentation
55+ 2. Reading ARCHITECTURE.md if present
66+ 3. Querying current workspace state *)
77+88+val generate :
99+ proc_mgr:Unpac.Git.proc_mgr ->
1010+ root:Unpac.Worktree.root ->
1111+ autonomous:bool ->
1212+ string
1313+(** Generate a system prompt with full unpac knowledge.
1414+ If [autonomous] is true, includes detailed instructions for autonomous
1515+ code maintenance and improvement. *)
1616+1717+val generate_for_project :
1818+ proc_mgr:Unpac.Git.proc_mgr ->
1919+ root:Unpac.Worktree.root ->
2020+ project:string ->
2121+ string
2222+(** Generate a system prompt for a specific project agent.
2323+ The agent will focus exclusively on the given project. *)
2424+2525+val autonomous_base_prompt : string
2626+(** Base system prompt for autonomous mode. *)
2727+2828+val interactive_base_prompt : string
2929+(** Base system prompt for interactive mode. *)
+679
lib/claude/tools.ml
···11+(** Tool definitions for Claude to interact with unpac and analyze code.
22+33+ Uses the Claude SDK's MCP-based custom tool architecture. Tools are
44+ defined as Claude.Tool.t values and bundled into an Mcp_server that
55+ gets registered with the Claude client. *)
66+77+let src = Logs.Src.create "unpac.claude.tools" ~doc:"Claude tools"
88+module Log = (val Logs.src_log src : Logs.LOG)
99+1010+(* Helper to truncate long output *)
1111+let truncate_output ?(max_len=50000) s =
1212+ if String.length s > max_len then
1313+ String.sub s 0 max_len ^ "\n\n[... truncated ...]"
1414+ else s
1515+1616+(* Tool result helpers - convert to Claude.Tool format *)
1717+let ok s = Ok (Claude.Tool.text_result s)
1818+let err s = Error s
1919+2020+(* === TOOL IMPLEMENTATIONS === *)
2121+2222+(* Git list tool *)
2323+let git_list ~proc_mgr ~root () =
2424+ try
2525+ let repos = Unpac.Git_backend.list_repos ~proc_mgr ~root in
2626+ if repos = [] then
2727+ ok "No git repositories vendored.\n\nTo add one: use git_add tool with url parameter."
2828+ else
2929+ let buf = Buffer.create 256 in
3030+ Buffer.add_string buf "Vendored git repositories:\n";
3131+ List.iter (fun r -> Buffer.add_string buf (Printf.sprintf "- %s\n" r)) repos;
3232+ ok (Buffer.contents buf)
3333+ with exn ->
3434+ err (Printf.sprintf "Failed to list git repos: %s" (Printexc.to_string exn))
3535+3636+(* Git add tool *)
3737+let git_add ~proc_mgr ~fs ~root ~url ?name ?branch ?subdir () =
3838+ try
3939+ let repo_name = match name with
4040+ | Some n -> n
4141+ | None ->
4242+ let base = Filename.basename url in
4343+ if String.ends_with ~suffix:".git" base then
4444+ String.sub base 0 (String.length base - 4)
4545+ else base
4646+ in
4747+4848+ let info : Unpac.Git_backend.repo_info = {
4949+ name = repo_name;
5050+ url;
5151+ branch;
5252+ subdir;
5353+ } in
5454+5555+ let config_path = Filename.concat (snd (Unpac.Worktree.path root Unpac.Worktree.Main))
5656+ "unpac.toml" in
5757+ let cache = if Sys.file_exists config_path then begin
5858+ match Unpac.Config.load config_path with
5959+ | Ok config -> Unpac.Config.resolve_vendor_cache config
6060+ | Error _ -> None
6161+ end else None in
6262+6363+ let cache = match cache with
6464+ | Some path -> Some (Eio.Path.(fs / path))
6565+ | None -> None
6666+ in
6767+6868+ match Unpac.Git_backend.add_repo ~proc_mgr ~root ?cache info with
6969+ | Unpac.Backend.Added { name = added_name; sha } ->
7070+ ok (Printf.sprintf
7171+ "Successfully added repository '%s' (commit %s).\n\n\
7272+ Next steps:\n\
7373+ - Use git_info %s to see repository details\n\
7474+ - Use git_diff %s to see any local changes\n\
7575+ - Merge into a project when ready" added_name (String.sub sha 0 7)
7676+ added_name added_name)
7777+ | Unpac.Backend.Already_exists name ->
7878+ ok (Printf.sprintf "Repository '%s' is already vendored." name)
7979+ | Unpac.Backend.Failed { name; error } ->
8080+ err (Printf.sprintf "Failed to add '%s': %s" name error)
8181+ with exn ->
8282+ err (Printf.sprintf "Failed to add repository: %s" (Printexc.to_string exn))
8383+8484+(* Git info tool *)
8585+let git_info ~proc_mgr ~root ~name () =
8686+ try
8787+ let git = Unpac.Worktree.git_dir root in
8888+ let repos = Unpac.Git_backend.list_repos ~proc_mgr ~root in
8989+ if not (List.mem name repos) then
9090+ err (Printf.sprintf "Repository '%s' is not vendored" name)
9191+ else begin
9292+ let buf = Buffer.create 512 in
9393+ let add s = Buffer.add_string buf s in
9494+9595+ add (Printf.sprintf "Repository: %s\n" name);
9696+9797+ let remote = "origin-" ^ name in
9898+ (match Unpac.Git.remote_url ~proc_mgr ~cwd:git remote with
9999+ | Some u -> add (Printf.sprintf "URL: %s\n" u)
100100+ | None -> ());
101101+102102+ let upstream = Unpac.Git_backend.upstream_branch name in
103103+ let vendor = Unpac.Git_backend.vendor_branch name in
104104+ let patches = Unpac.Git_backend.patches_branch name in
105105+106106+ (match Unpac.Git.rev_parse ~proc_mgr ~cwd:git upstream with
107107+ | Some sha -> add (Printf.sprintf "Upstream: %s\n" (String.sub sha 0 7))
108108+ | None -> ());
109109+ (match Unpac.Git.rev_parse ~proc_mgr ~cwd:git vendor with
110110+ | Some sha -> add (Printf.sprintf "Vendor: %s\n" (String.sub sha 0 7))
111111+ | None -> ());
112112+ (match Unpac.Git.rev_parse ~proc_mgr ~cwd:git patches with
113113+ | Some sha -> add (Printf.sprintf "Patches: %s\n" (String.sub sha 0 7))
114114+ | None -> ());
115115+116116+ let log_output = Unpac.Git.run_exn ~proc_mgr ~cwd:git
117117+ ["log"; "--oneline"; vendor ^ ".." ^ patches] in
118118+ let commits = List.length (String.split_on_char '\n' log_output |>
119119+ List.filter (fun s -> String.trim s <> "")) in
120120+ add (Printf.sprintf "Local commits: %d\n" commits);
121121+122122+ ok (Buffer.contents buf)
123123+ end
124124+ with exn ->
125125+ err (Printf.sprintf "Failed to get info for '%s': %s" name (Printexc.to_string exn))
126126+127127+(* Git diff tool *)
128128+let git_diff ~proc_mgr ~root ~name () =
129129+ try
130130+ let git = Unpac.Worktree.git_dir root in
131131+ let repos = Unpac.Git_backend.list_repos ~proc_mgr ~root in
132132+ if not (List.mem name repos) then
133133+ err (Printf.sprintf "Repository '%s' is not vendored" name)
134134+ else begin
135135+ let vendor = Unpac.Git_backend.vendor_branch name in
136136+ let patches = Unpac.Git_backend.patches_branch name in
137137+ let diff = Unpac.Git.run_exn ~proc_mgr ~cwd:git ["diff"; vendor; patches] in
138138+ if String.trim diff = "" then
139139+ ok (Printf.sprintf "No local changes in '%s'." name)
140140+ else
141141+ ok (truncate_output (Printf.sprintf "Diff for '%s':\n\n%s" name diff))
142142+ end
143143+ with exn ->
144144+ err (Printf.sprintf "Failed to get diff for '%s': %s" name (Printexc.to_string exn))
145145+146146+(* Opam list tool *)
147147+let opam_list ~proc_mgr ~root () =
148148+ try
149149+ let pkgs = Unpac.Worktree.list_opam_packages ~proc_mgr root in
150150+ if pkgs = [] then
151151+ ok "No opam packages vendored."
152152+ else begin
153153+ let buf = Buffer.create 256 in
154154+ Buffer.add_string buf "Vendored opam packages:\n";
155155+ List.iter (fun p -> Buffer.add_string buf (Printf.sprintf "- %s\n" p)) pkgs;
156156+ ok (Buffer.contents buf)
157157+ end
158158+ with exn ->
159159+ err (Printf.sprintf "Failed to list opam packages: %s" (Printexc.to_string exn))
160160+161161+(* Project list tool *)
162162+let project_list ~proc_mgr ~root () =
163163+ try
164164+ let projects = Unpac.Worktree.list_projects ~proc_mgr root in
165165+ if projects = [] then
166166+ ok "No projects configured."
167167+ else begin
168168+ let buf = Buffer.create 256 in
169169+ Buffer.add_string buf "Projects:\n";
170170+ List.iter (fun p -> Buffer.add_string buf (Printf.sprintf "- %s\n" p)) projects;
171171+ ok (Buffer.contents buf)
172172+ end
173173+ with exn ->
174174+ err (Printf.sprintf "Failed to list projects: %s" (Printexc.to_string exn))
175175+176176+(* Status tool - overview of the workspace *)
177177+let status ~proc_mgr ~root () =
178178+ try
179179+ let buf = Buffer.create 1024 in
180180+ let add s = Buffer.add_string buf s in
181181+182182+ add "=== Unpac Workspace Status ===\n\n";
183183+184184+ let projects = Unpac.Worktree.list_projects ~proc_mgr root in
185185+ add (Printf.sprintf "Projects (%d):\n" (List.length projects));
186186+ List.iter (fun p -> add (Printf.sprintf " - %s\n" p)) projects;
187187+ if projects = [] then add " (none)\n";
188188+ add "\n";
189189+190190+ let git_repos = Unpac.Git_backend.list_repos ~proc_mgr ~root in
191191+ add (Printf.sprintf "Git Repositories (%d):\n" (List.length git_repos));
192192+ List.iter (fun r -> add (Printf.sprintf " - %s\n" r)) git_repos;
193193+ if git_repos = [] then add " (none)\n";
194194+ add "\n";
195195+196196+ let opam_pkgs = Unpac.Worktree.list_opam_packages ~proc_mgr root in
197197+ add (Printf.sprintf "Opam Packages (%d):\n" (List.length opam_pkgs));
198198+ List.iter (fun p -> add (Printf.sprintf " - %s\n" p)) opam_pkgs;
199199+ if opam_pkgs = [] then add " (none)\n";
200200+201201+ ok (Buffer.contents buf)
202202+ with exn ->
203203+ err (Printf.sprintf "Failed to get status: %s" (Printexc.to_string exn))
204204+205205+(* Read file tool *)
206206+let read_file ~fs ~path () =
207207+ try
208208+ let full_path = Eio.Path.(fs / path) in
209209+ if not (Eio.Path.is_file full_path) then
210210+ err (Printf.sprintf "File not found: %s" path)
211211+ else begin
212212+ let content = Eio.Path.load full_path in
213213+ ok (truncate_output content)
214214+ end
215215+ with exn ->
216216+ err (Printf.sprintf "Failed to read '%s': %s" path (Printexc.to_string exn))
217217+218218+(* Write file tool *)
219219+let write_file ~fs ~path ~content () =
220220+ try
221221+ let full_path = Eio.Path.(fs / path) in
222222+ let parent = Filename.dirname path in
223223+ if parent <> "." && parent <> "/" then begin
224224+ let parent_path = Eio.Path.(fs / parent) in
225225+ Eio.Path.mkdirs ~exists_ok:true ~perm:0o755 parent_path
226226+ end;
227227+ Eio.Path.save ~create:(`Or_truncate 0o644) full_path content;
228228+ ok (Printf.sprintf "Successfully wrote %d bytes to %s" (String.length content) path)
229229+ with exn ->
230230+ err (Printf.sprintf "Failed to write '%s': %s" path (Printexc.to_string exn))
231231+232232+(* List directory tool *)
233233+let list_dir ~fs ~path () =
234234+ try
235235+ let full_path = Eio.Path.(fs / path) in
236236+ if not (Eio.Path.is_directory full_path) then
237237+ err (Printf.sprintf "Not a directory: %s" path)
238238+ else begin
239239+ let entries = Eio.Path.read_dir full_path in
240240+ let entries = List.sort String.compare entries in
241241+ let buf = Buffer.create 256 in
242242+ Buffer.add_string buf (Printf.sprintf "Contents of %s:\n" path);
243243+ List.iter (fun e ->
244244+ let entry_path = Eio.Path.(full_path / e) in
245245+ let suffix = if Eio.Path.is_directory entry_path then "/" else "" in
246246+ Buffer.add_string buf (Printf.sprintf " %s%s\n" e suffix)
247247+ ) entries;
248248+ ok (Buffer.contents buf)
249249+ end
250250+ with exn ->
251251+ err (Printf.sprintf "Failed to list '%s': %s" path (Printexc.to_string exn))
252252+253253+(* Glob files tool *)
254254+let glob_files ~fs ~pattern ~base_path () =
255255+ try
256256+ let full_base = Eio.Path.(fs / base_path) in
257257+ if not (Eio.Path.is_directory full_base) then
258258+ err (Printf.sprintf "Base path not a directory: %s" base_path)
259259+ else begin
260260+ let results = ref [] in
261261+ let rec walk dir rel_path =
262262+ let entries = try Eio.Path.read_dir dir with _ -> [] in
263263+ List.iter (fun name ->
264264+ let entry_path = Eio.Path.(dir / name) in
265265+ let rel = if rel_path = "" then name else rel_path ^ "/" ^ name in
266266+ if Eio.Path.is_directory entry_path then
267267+ walk entry_path rel
268268+ else begin
269269+ let matches =
270270+ if String.starts_with ~prefix:"**/" pattern then
271271+ let ext = String.sub pattern 3 (String.length pattern - 3) in
272272+ String.ends_with ~suffix:ext name
273273+ else if String.starts_with ~prefix:"*" pattern then
274274+ let ext = String.sub pattern 1 (String.length pattern - 1) in
275275+ String.ends_with ~suffix:ext name
276276+ else
277277+ name = pattern
278278+ in
279279+ if matches then results := rel :: !results
280280+ end
281281+ ) entries
282282+ in
283283+ walk full_base "";
284284+ let files = List.sort String.compare !results in
285285+ if files = [] then
286286+ ok (Printf.sprintf "No files matching '%s' in %s" pattern base_path)
287287+ else begin
288288+ let buf = Buffer.create 256 in
289289+ Buffer.add_string buf (Printf.sprintf "Files matching '%s' in %s:\n" pattern base_path);
290290+ List.iter (fun f -> Buffer.add_string buf (Printf.sprintf " %s\n" f)) files;
291291+ ok (Buffer.contents buf)
292292+ end
293293+ end
294294+ with exn ->
295295+ err (Printf.sprintf "Failed to glob '%s': %s" pattern (Printexc.to_string exn))
296296+297297+(* Shell execution tool *)
298298+let run_shell ~proc_mgr ~fs ~cwd ~command () =
299299+ try
300300+ let result = Eio.Switch.run @@ fun sw ->
301301+ let stdout_buf = Buffer.create 4096 in
302302+ let stderr_buf = Buffer.create 4096 in
303303+ let stdout_r, stdout_w = Eio.Process.pipe proc_mgr ~sw in
304304+ let stderr_r, stderr_w = Eio.Process.pipe proc_mgr ~sw in
305305+306306+ let cwd_path = Eio.Path.(fs / cwd) in
307307+308308+ let child = Eio.Process.spawn proc_mgr ~sw
309309+ ~cwd:(cwd_path :> Eio.Fs.dir_ty Eio.Path.t)
310310+ ~stdout:stdout_w ~stderr:stderr_w
311311+ ["sh"; "-c"; command]
312312+ in
313313+ Eio.Flow.close stdout_w;
314314+ Eio.Flow.close stderr_w;
315315+316316+ Eio.Fiber.both
317317+ (fun () ->
318318+ let chunk = Cstruct.create 4096 in
319319+ let rec loop () =
320320+ match Eio.Flow.single_read stdout_r chunk with
321321+ | n ->
322322+ Buffer.add_string stdout_buf (Cstruct.to_string (Cstruct.sub chunk 0 n));
323323+ loop ()
324324+ | exception End_of_file -> ()
325325+ in loop ())
326326+ (fun () ->
327327+ let chunk = Cstruct.create 4096 in
328328+ let rec loop () =
329329+ match Eio.Flow.single_read stderr_r chunk with
330330+ | n ->
331331+ Buffer.add_string stderr_buf (Cstruct.to_string (Cstruct.sub chunk 0 n));
332332+ loop ()
333333+ | exception End_of_file -> ()
334334+ in loop ());
335335+336336+ let status = Eio.Process.await child in
337337+ let stdout = Buffer.contents stdout_buf in
338338+ let stderr = Buffer.contents stderr_buf in
339339+ (status, stdout, stderr)
340340+ in
341341+ let (status, stdout, stderr) = result in
342342+ let exit_code = match status with
343343+ | `Exited c -> c
344344+ | `Signaled s -> 128 + s
345345+ in
346346+ let buf = Buffer.create 256 in
347347+ Buffer.add_string buf (Printf.sprintf "Exit code: %d\n" exit_code);
348348+ if stdout <> "" then begin
349349+ Buffer.add_string buf "\n=== STDOUT ===\n";
350350+ Buffer.add_string buf stdout
351351+ end;
352352+ if stderr <> "" then begin
353353+ Buffer.add_string buf "\n=== STDERR ===\n";
354354+ Buffer.add_string buf stderr
355355+ end;
356356+ if exit_code = 0 then
357357+ ok (truncate_output (Buffer.contents buf))
358358+ else
359359+ err (truncate_output (Buffer.contents buf))
360360+ with exn ->
361361+ err (Printf.sprintf "Failed to run command: %s" (Printexc.to_string exn))
362362+363363+(* Unpac status sync *)
364364+let unpac_status_sync ~proc_mgr ~root () =
365365+ try
366366+ let main_wt = Unpac.Worktree.path root Unpac.Worktree.Main in
367367+ let result = Eio.Switch.run @@ fun sw ->
368368+ let stdout_buf = Buffer.create 4096 in
369369+ let stderr_buf = Buffer.create 4096 in
370370+ let stdout_r, stdout_w = Eio.Process.pipe proc_mgr ~sw in
371371+ let stderr_r, stderr_w = Eio.Process.pipe proc_mgr ~sw in
372372+373373+ let child = Eio.Process.spawn proc_mgr ~sw
374374+ ~cwd:(main_wt :> Eio.Fs.dir_ty Eio.Path.t)
375375+ ~stdout:stdout_w ~stderr:stderr_w
376376+ ["unpac"; "status"]
377377+ in
378378+ Eio.Flow.close stdout_w;
379379+ Eio.Flow.close stderr_w;
380380+381381+ Eio.Fiber.both
382382+ (fun () ->
383383+ let chunk = Cstruct.create 4096 in
384384+ let rec loop () =
385385+ match Eio.Flow.single_read stdout_r chunk with
386386+ | n -> Buffer.add_string stdout_buf (Cstruct.to_string (Cstruct.sub chunk 0 n)); loop ()
387387+ | exception End_of_file -> ()
388388+ in loop ())
389389+ (fun () ->
390390+ let chunk = Cstruct.create 4096 in
391391+ let rec loop () =
392392+ match Eio.Flow.single_read stderr_r chunk with
393393+ | n -> Buffer.add_string stderr_buf (Cstruct.to_string (Cstruct.sub chunk 0 n)); loop ()
394394+ | exception End_of_file -> ()
395395+ in loop ());
396396+397397+ ignore (Eio.Process.await child);
398398+ Buffer.contents stdout_buf
399399+ in
400400+ ok (Printf.sprintf "Ran unpac status:\n%s" (truncate_output result))
401401+ with exn ->
402402+ err (Printf.sprintf "Failed to run unpac status: %s" (Printexc.to_string exn))
403403+404404+(* Unpac push *)
405405+let unpac_push ~proc_mgr ~root ~remote () =
406406+ try
407407+ let main_wt = Unpac.Worktree.path root Unpac.Worktree.Main in
408408+ let result = Eio.Switch.run @@ fun sw ->
409409+ let stdout_buf = Buffer.create 4096 in
410410+ let stderr_buf = Buffer.create 4096 in
411411+ let stdout_r, stdout_w = Eio.Process.pipe proc_mgr ~sw in
412412+ let stderr_r, stderr_w = Eio.Process.pipe proc_mgr ~sw in
413413+414414+ let child = Eio.Process.spawn proc_mgr ~sw
415415+ ~cwd:(main_wt :> Eio.Fs.dir_ty Eio.Path.t)
416416+ ~stdout:stdout_w ~stderr:stderr_w
417417+ ["unpac"; "push"; remote]
418418+ in
419419+ Eio.Flow.close stdout_w;
420420+ Eio.Flow.close stderr_w;
421421+422422+ Eio.Fiber.both
423423+ (fun () ->
424424+ let chunk = Cstruct.create 4096 in
425425+ let rec loop () =
426426+ match Eio.Flow.single_read stdout_r chunk with
427427+ | n -> Buffer.add_string stdout_buf (Cstruct.to_string (Cstruct.sub chunk 0 n)); loop ()
428428+ | exception End_of_file -> ()
429429+ in loop ())
430430+ (fun () ->
431431+ let chunk = Cstruct.create 4096 in
432432+ let rec loop () =
433433+ match Eio.Flow.single_read stderr_r chunk with
434434+ | n -> Buffer.add_string stderr_buf (Cstruct.to_string (Cstruct.sub chunk 0 n)); loop ()
435435+ | exception End_of_file -> ()
436436+ in loop ());
437437+438438+ let status = Eio.Process.await child in
439439+ let stdout = Buffer.contents stdout_buf in
440440+ let stderr = Buffer.contents stderr_buf in
441441+ (status, stdout, stderr)
442442+ in
443443+ let (status, stdout, stderr) = result in
444444+ let exit_code = match status with `Exited c -> c | `Signaled s -> 128 + s in
445445+ if exit_code = 0 then
446446+ ok (Printf.sprintf "Pushed to %s:\n%s" remote (truncate_output stdout))
447447+ else
448448+ err (Printf.sprintf "Push failed (exit %d):\n%s\n%s" exit_code stdout stderr)
449449+ with exn ->
450450+ err (Printf.sprintf "Failed to push: %s" (Printexc.to_string exn))
451451+452452+(* Git commit tool *)
453453+let git_commit ~proc_mgr ~cwd ~message () =
454454+ try
455455+ let result = Eio.Switch.run @@ fun sw ->
456456+ let add_child = Eio.Process.spawn proc_mgr ~sw
457457+ ~cwd:(cwd :> Eio.Fs.dir_ty Eio.Path.t)
458458+ ["git"; "add"; "-A"]
459459+ in
460460+ let add_status = Eio.Process.await add_child in
461461+ (match add_status with
462462+ | `Exited 0 -> ()
463463+ | _ -> failwith "git add failed");
464464+465465+ let stdout_r, stdout_w = Eio.Process.pipe proc_mgr ~sw in
466466+ let stderr_r, stderr_w = Eio.Process.pipe proc_mgr ~sw in
467467+ let stdout_buf = Buffer.create 256 in
468468+ let stderr_buf = Buffer.create 256 in
469469+470470+ let commit_child = Eio.Process.spawn proc_mgr ~sw
471471+ ~cwd:(cwd :> Eio.Fs.dir_ty Eio.Path.t)
472472+ ~stdout:stdout_w ~stderr:stderr_w
473473+ ["git"; "commit"; "-m"; message]
474474+ in
475475+ Eio.Flow.close stdout_w;
476476+ Eio.Flow.close stderr_w;
477477+478478+ Eio.Fiber.both
479479+ (fun () ->
480480+ let chunk = Cstruct.create 1024 in
481481+ let rec loop () =
482482+ match Eio.Flow.single_read stdout_r chunk with
483483+ | n -> Buffer.add_string stdout_buf (Cstruct.to_string (Cstruct.sub chunk 0 n)); loop ()
484484+ | exception End_of_file -> ()
485485+ in loop ())
486486+ (fun () ->
487487+ let chunk = Cstruct.create 1024 in
488488+ let rec loop () =
489489+ match Eio.Flow.single_read stderr_r chunk with
490490+ | n -> Buffer.add_string stderr_buf (Cstruct.to_string (Cstruct.sub chunk 0 n)); loop ()
491491+ | exception End_of_file -> ()
492492+ in loop ());
493493+494494+ let status = Eio.Process.await commit_child in
495495+ (status, Buffer.contents stdout_buf, Buffer.contents stderr_buf)
496496+ in
497497+ let (status, stdout, stderr) = result in
498498+ match status with
499499+ | `Exited 0 -> ok (Printf.sprintf "Committed:\n%s" stdout)
500500+ | `Exited 1 when String.length stdout > 0 ->
501501+ ok "Nothing to commit (working tree clean)"
502502+ | _ -> err (Printf.sprintf "Commit failed:\n%s\n%s" stdout stderr)
503503+ with exn ->
504504+ err (Printf.sprintf "Failed to commit: %s" (Printexc.to_string exn))
505505+506506+(* === MCP SERVER CREATION === *)
507507+508508+(** Create an MCP server with all unpac tools.
509509+510510+ The server name will be "unpac" so tools are accessible as mcp__unpac__<tool_name>.
511511+ Call this with the Eio environment to create handlers with captured context. *)
512512+let create_mcp_server ~proc_mgr ~fs ~root =
513513+ let open Claude.Tool in
514514+515515+ let tools = [
516516+ (* Workspace status tools *)
517517+ create
518518+ ~name:"unpac_status"
519519+ ~description:"Get an overview of the unpac workspace, including all projects, \
520520+ vendored git repositories, and opam packages."
521521+ ~input_schema:(schema_object [] ~required:[])
522522+ ~handler:(fun _args -> status ~proc_mgr ~root ());
523523+524524+ create
525525+ ~name:"unpac_status_sync"
526526+ ~description:"Run 'unpac status' to update README.md and sync workspace state. \
527527+ Call this periodically to keep the workspace documentation current."
528528+ ~input_schema:(schema_object [] ~required:[])
529529+ ~handler:(fun _args -> unpac_status_sync ~proc_mgr ~root ());
530530+531531+ create
532532+ ~name:"unpac_push"
533533+ ~description:"Push all branches to the remote repository. Call this after making \
534534+ changes to sync with the remote."
535535+ ~input_schema:(schema_object [("remote", schema_string)] ~required:["remote"])
536536+ ~handler:(fun args ->
537537+ match Claude.Tool_input.get_string args "remote" with
538538+ | None -> err "Missing required parameter: remote"
539539+ | Some remote -> unpac_push ~proc_mgr ~root ~remote ());
540540+541541+ (* Git vendoring tools *)
542542+ create
543543+ ~name:"unpac_git_list"
544544+ ~description:"List all vendored git repositories in the workspace."
545545+ ~input_schema:(schema_object [] ~required:[])
546546+ ~handler:(fun _args -> git_list ~proc_mgr ~root ());
547547+548548+ create
549549+ ~name:"unpac_git_add"
550550+ ~description:"Vendor a new git repository. Clones the repo and creates the three-tier \
551551+ branch structure for conflict-free vendoring with full history preservation."
552552+ ~input_schema:(schema_object [
553553+ ("url", schema_string);
554554+ ("name", schema_string);
555555+ ("branch", schema_string);
556556+ ("subdir", schema_string);
557557+ ] ~required:["url"])
558558+ ~handler:(fun args ->
559559+ match Claude.Tool_input.get_string args "url" with
560560+ | None -> err "Missing required parameter: url"
561561+ | Some url ->
562562+ let name = Claude.Tool_input.get_string args "name" in
563563+ let branch = Claude.Tool_input.get_string args "branch" in
564564+ let subdir = Claude.Tool_input.get_string args "subdir" in
565565+ git_add ~proc_mgr ~fs ~root ~url ?name ?branch ?subdir ());
566566+567567+ create
568568+ ~name:"unpac_git_info"
569569+ ~description:"Show detailed information about a vendored git repository, including \
570570+ branch SHAs and number of local commits."
571571+ ~input_schema:(schema_object [("name", schema_string)] ~required:["name"])
572572+ ~handler:(fun args ->
573573+ match Claude.Tool_input.get_string args "name" with
574574+ | None -> err "Missing required parameter: name"
575575+ | Some name -> git_info ~proc_mgr ~root ~name ());
576576+577577+ create
578578+ ~name:"unpac_git_diff"
579579+ ~description:"Show the diff between vendor and patches branches for a git repository. \
580580+ This shows what local modifications have been made."
581581+ ~input_schema:(schema_object [("name", schema_string)] ~required:["name"])
582582+ ~handler:(fun args ->
583583+ match Claude.Tool_input.get_string args "name" with
584584+ | None -> err "Missing required parameter: name"
585585+ | Some name -> git_diff ~proc_mgr ~root ~name ());
586586+587587+ (* Opam tools *)
588588+ create
589589+ ~name:"unpac_opam_list"
590590+ ~description:"List all vendored opam packages in the workspace."
591591+ ~input_schema:(schema_object [] ~required:[])
592592+ ~handler:(fun _args -> opam_list ~proc_mgr ~root ());
593593+594594+ create
595595+ ~name:"unpac_project_list"
596596+ ~description:"List all projects in the workspace."
597597+ ~input_schema:(schema_object [] ~required:[])
598598+ ~handler:(fun _args -> project_list ~proc_mgr ~root ());
599599+600600+ (* File operation tools *)
601601+ create
602602+ ~name:"read_file"
603603+ ~description:"Read the contents of a file. Use this to analyze source code, \
604604+ STATUS.md files, test files, etc."
605605+ ~input_schema:(schema_object [("path", schema_string)] ~required:["path"])
606606+ ~handler:(fun args ->
607607+ match Claude.Tool_input.get_string args "path" with
608608+ | None -> err "Missing required parameter: path"
609609+ | Some path -> read_file ~fs ~path ());
610610+611611+ create
612612+ ~name:"write_file"
613613+ ~description:"Write content to a file. Use this to update STATUS.md, fix code, \
614614+ add tests, etc. Parent directories are created if needed."
615615+ ~input_schema:(schema_object [
616616+ ("path", schema_string);
617617+ ("content", schema_string);
618618+ ] ~required:["path"; "content"])
619619+ ~handler:(fun args ->
620620+ match Claude.Tool_input.get_string args "path", Claude.Tool_input.get_string args "content" with
621621+ | None, _ -> err "Missing required parameter: path"
622622+ | _, None -> err "Missing required parameter: content"
623623+ | Some path, Some content -> write_file ~fs ~path ~content ());
624624+625625+ create
626626+ ~name:"list_directory"
627627+ ~description:"List the contents of a directory."
628628+ ~input_schema:(schema_object [("path", schema_string)] ~required:["path"])
629629+ ~handler:(fun args ->
630630+ match Claude.Tool_input.get_string args "path" with
631631+ | None -> err "Missing required parameter: path"
632632+ | Some path -> list_dir ~fs ~path ());
633633+634634+ create
635635+ ~name:"glob_files"
636636+ ~description:"Find files matching a glob pattern. Supports *.ml, **/*.ml patterns."
637637+ ~input_schema:(schema_object [
638638+ ("pattern", schema_string);
639639+ ("base_path", schema_string);
640640+ ] ~required:["pattern"; "base_path"])
641641+ ~handler:(fun args ->
642642+ match Claude.Tool_input.get_string args "pattern", Claude.Tool_input.get_string args "base_path" with
643643+ | None, _ -> err "Missing required parameter: pattern"
644644+ | _, None -> err "Missing required parameter: base_path"
645645+ | Some pattern, Some base_path -> glob_files ~fs ~pattern ~base_path ());
646646+647647+ (* Shell execution *)
648648+ create
649649+ ~name:"run_shell"
650650+ ~description:"Execute a shell command. Use for building (dune build), testing \
651651+ (dune test), or other operations. Be careful with destructive commands."
652652+ ~input_schema:(schema_object [
653653+ ("command", schema_string);
654654+ ("cwd", schema_string);
655655+ ] ~required:["command"; "cwd"])
656656+ ~handler:(fun args ->
657657+ match Claude.Tool_input.get_string args "command", Claude.Tool_input.get_string args "cwd" with
658658+ | None, _ -> err "Missing required parameter: command"
659659+ | _, None -> err "Missing required parameter: cwd"
660660+ | Some command, Some cwd -> run_shell ~proc_mgr ~fs ~cwd ~command ());
661661+662662+ (* Git commit *)
663663+ create
664664+ ~name:"git_commit"
665665+ ~description:"Stage all changes and create a git commit with the given message."
666666+ ~input_schema:(schema_object [
667667+ ("cwd", schema_string);
668668+ ("message", schema_string);
669669+ ] ~required:["cwd"; "message"])
670670+ ~handler:(fun args ->
671671+ match Claude.Tool_input.get_string args "cwd", Claude.Tool_input.get_string args "message" with
672672+ | None, _ -> err "Missing required parameter: cwd"
673673+ | _, None -> err "Missing required parameter: message"
674674+ | Some cwd, Some message ->
675675+ let cwd_path = Eio.Path.(fs / cwd) in
676676+ git_commit ~proc_mgr ~cwd:cwd_path ~message ());
677677+ ] in
678678+679679+ Claude.Mcp_server.create ~name:"unpac" ~version:"1.0.0" ~tools ()
+42
lib/claude/tools.mli
···11+(** Tool definitions for Claude to interact with unpac.
22+33+ Uses the Claude SDK's MCP-based custom tool architecture. All tools are
44+ bundled into an in-process MCP server that gets registered with the
55+ Claude client, making them available as mcp__unpac__<tool_name>.
66+77+ {1 Available Tools}
88+99+ Workspace status:
1010+ - [unpac_status] - Overview of workspace (projects, git repos, opam packages)
1111+ - [unpac_status_sync] - Run 'unpac status' to update README.md
1212+ - [unpac_push] - Push all branches to remote
1313+1414+ Git vendoring:
1515+ - [unpac_git_list] - List vendored git repositories
1616+ - [unpac_git_add] - Vendor a new git repository
1717+ - [unpac_git_info] - Show details about a vendored repository
1818+ - [unpac_git_diff] - Show local changes in a vendored repository
1919+2020+ Opam:
2121+ - [unpac_opam_list] - List vendored opam packages
2222+ - [unpac_project_list] - List projects
2323+2424+ File operations:
2525+ - [read_file] - Read file contents
2626+ - [write_file] - Write content to a file
2727+ - [list_directory] - List directory contents
2828+ - [glob_files] - Find files matching a pattern
2929+3030+ Shell:
3131+ - [run_shell] - Execute a shell command
3232+ - [git_commit] - Stage and commit changes *)
3333+3434+val create_mcp_server :
3535+ proc_mgr:Unpac.Git.proc_mgr ->
3636+ fs:Eio.Fs.dir_ty Eio.Path.t ->
3737+ root:Unpac.Worktree.root ->
3838+ Claude.Mcp_server.t
3939+(** Create an MCP server with all unpac tools.
4040+4141+ The server is named "unpac" so tools are accessible as [mcp__unpac__<tool_name>].
4242+ Register it with [Claude.Options.with_mcp_server ~name:"unpac" server]. *)
+7
lib/claude/unpac_claude.ml
···11+(** Unpac Claude agent - autonomous coding assistant for unpac workflows. *)
22+33+module Tools = Tools
44+module Prompt = Prompt
55+module Agent = Agent
66+module Event = Event
77+module Web = Web
+460
lib/claude/web.ml
···11+(** Minimal WebSocket server for live agent UI.
22+33+ Uses cohttp-eio for the HTTP upgrade handshake,
44+ then raw Eio sockets for WebSocket frames. *)
55+66+let src = Logs.Src.create "unpac.claude.web" ~doc:"Web server"
77+module Log = (val Logs.src_log src : Logs.LOG)
88+99+(* WebSocket frame helpers *)
1010+module Ws = struct
1111+ (* Compute Sec-WebSocket-Accept from client key *)
1212+ let accept_key client_key =
1313+ let magic = "258EAFA5-E914-47DA-95CA-C5AB0DC85B11" in
1414+ let combined = client_key ^ magic in
1515+ let hash = Digestif.SHA1.digest_string combined in
1616+ Base64.encode_exn (Digestif.SHA1.to_raw_string hash)
1717+1818+ (* Send a WebSocket text frame *)
1919+ let send_text flow text =
2020+ let len = String.length text in
2121+ let header =
2222+ if len < 126 then
2323+ let h = Bytes.create 2 in
2424+ Bytes.set_uint8 h 0 0x81; (* FIN + text opcode *)
2525+ Bytes.set_uint8 h 1 len;
2626+ Bytes.to_string h
2727+ else if len < 65536 then
2828+ let h = Bytes.create 4 in
2929+ Bytes.set_uint8 h 0 0x81;
3030+ Bytes.set_uint8 h 1 126;
3131+ Bytes.set_uint16_be h 2 len;
3232+ Bytes.to_string h
3333+ else
3434+ let h = Bytes.create 10 in
3535+ Bytes.set_uint8 h 0 0x81;
3636+ Bytes.set_uint8 h 1 127;
3737+ Bytes.set_int64_be h 2 (Int64.of_int len);
3838+ Bytes.to_string h
3939+ in
4040+ Eio.Flow.copy_string (header ^ text) flow
4141+4242+ (* Send close frame *)
4343+ let send_close flow =
4444+ let frame = Bytes.create 2 in
4545+ Bytes.set_uint8 frame 0 0x88; (* FIN + close opcode *)
4646+ Bytes.set_uint8 frame 1 0;
4747+ Eio.Flow.copy_string (Bytes.to_string frame) flow
4848+4949+ (* Read a WebSocket frame, returns (opcode, payload) or None on close *)
5050+ let read_frame flow =
5151+ let buf = Cstruct.create 2 in
5252+ match Eio.Flow.read_exact flow buf with
5353+ | exception End_of_file -> None
5454+ | () ->
5555+ let b0 = Cstruct.get_uint8 buf 0 in
5656+ let b1 = Cstruct.get_uint8 buf 1 in
5757+ let _fin = (b0 land 0x80) <> 0 in
5858+ let opcode = b0 land 0x0F in
5959+ let masked = (b1 land 0x80) <> 0 in
6060+ let len0 = b1 land 0x7F in
6161+6262+ (* Get actual length *)
6363+ let len =
6464+ if len0 < 126 then len0
6565+ else if len0 = 126 then begin
6666+ let buf = Cstruct.create 2 in
6767+ Eio.Flow.read_exact flow buf;
6868+ Cstruct.BE.get_uint16 buf 0
6969+ end else begin
7070+ let buf = Cstruct.create 8 in
7171+ Eio.Flow.read_exact flow buf;
7272+ Int64.to_int (Cstruct.BE.get_uint64 buf 0)
7373+ end
7474+ in
7575+7676+ (* Get mask if present *)
7777+ let mask =
7878+ if masked then begin
7979+ let buf = Cstruct.create 4 in
8080+ Eio.Flow.read_exact flow buf;
8181+ Some (Cstruct.to_bytes buf)
8282+ end else None
8383+ in
8484+8585+ (* Read payload *)
8686+ let payload = Cstruct.create len in
8787+ if len > 0 then Eio.Flow.read_exact flow payload;
8888+8989+ (* Unmask if needed *)
9090+ let data =
9191+ match mask with
9292+ | None -> Cstruct.to_string payload
9393+ | Some m ->
9494+ let bytes = Cstruct.to_bytes payload in
9595+ for i = 0 to len - 1 do
9696+ let b = Bytes.get_uint8 bytes i in
9797+ let k = Bytes.get_uint8 m (i mod 4) in
9898+ Bytes.set_uint8 bytes i (b lxor k)
9999+ done;
100100+ Bytes.to_string bytes
101101+ in
102102+ Some (opcode, data)
103103+end
104104+105105+(* Static HTML page *)
106106+let index_html = {|<!DOCTYPE html>
107107+<html lang="en">
108108+<head>
109109+ <meta charset="UTF-8">
110110+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
111111+ <title>unpac-claude</title>
112112+ <style>
113113+ :root {
114114+ --bg: #0d1117;
115115+ --fg: #c9d1d9;
116116+ --dim: #6e7681;
117117+ --border: #30363d;
118118+ --accent: #58a6ff;
119119+ --green: #3fb950;
120120+ --red: #f85149;
121121+ --yellow: #d29922;
122122+ }
123123+ * { box-sizing: border-box; margin: 0; padding: 0; }
124124+ body {
125125+ font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, monospace;
126126+ font-size: 14px;
127127+ line-height: 1.5;
128128+ background: var(--bg);
129129+ color: var(--fg);
130130+ height: 100vh;
131131+ display: flex;
132132+ flex-direction: column;
133133+ }
134134+ header {
135135+ padding: 12px 16px;
136136+ border-bottom: 1px solid var(--border);
137137+ display: flex;
138138+ align-items: center;
139139+ gap: 12px;
140140+ }
141141+ header h1 {
142142+ font-size: 16px;
143143+ font-weight: 600;
144144+ }
145145+ .status {
146146+ font-size: 12px;
147147+ padding: 2px 8px;
148148+ border-radius: 12px;
149149+ background: var(--border);
150150+ }
151151+ .status.connected { background: var(--green); color: #000; }
152152+ .status.error { background: var(--red); color: #fff; }
153153+ #events {
154154+ flex: 1;
155155+ overflow-y: auto;
156156+ padding: 16px;
157157+ }
158158+ .event {
159159+ margin-bottom: 8px;
160160+ padding: 8px 12px;
161161+ border-radius: 6px;
162162+ border-left: 3px solid var(--border);
163163+ }
164164+ .event.thinking { border-color: var(--yellow); color: var(--dim); }
165165+ .event.text { border-color: var(--fg); white-space: pre-wrap; }
166166+ .event.tool_call { border-color: var(--accent); }
167167+ .event.tool_result { border-color: var(--green); }
168168+ .event.tool_result.error { border-color: var(--red); }
169169+ .event.error { border-color: var(--red); background: rgba(248,81,73,0.1); }
170170+ .event.sync { border-color: var(--yellow); }
171171+ .event.turn_complete { border-color: var(--dim); color: var(--dim); font-size: 12px; }
172172+ .tool-name {
173173+ color: var(--accent);
174174+ font-weight: 600;
175175+ }
176176+ .tool-input, .tool-output {
177177+ margin-top: 4px;
178178+ padding: 8px;
179179+ background: rgba(0,0,0,0.3);
180180+ border-radius: 4px;
181181+ font-size: 12px;
182182+ max-height: 200px;
183183+ overflow: auto;
184184+ white-space: pre-wrap;
185185+ word-break: break-all;
186186+ }
187187+ .cost { color: var(--dim); }
188188+ footer {
189189+ padding: 8px 16px;
190190+ border-top: 1px solid var(--border);
191191+ font-size: 12px;
192192+ color: var(--dim);
193193+ }
194194+ </style>
195195+</head>
196196+<body>
197197+ <header>
198198+ <h1>unpac-claude</h1>
199199+ <span id="status" class="status">connecting...</span>
200200+ </header>
201201+ <div id="events"></div>
202202+ <footer>
203203+ <span id="turn">Turn: 0</span> |
204204+ <span id="cost">Cost: $0.00</span>
205205+ </footer>
206206+ <script>
207207+ const events = document.getElementById('events');
208208+ const status = document.getElementById('status');
209209+ const turnEl = document.getElementById('turn');
210210+ const costEl = document.getElementById('cost');
211211+ let totalCost = 0;
212212+ let currentTurn = 0;
213213+214214+ function connect() {
215215+ const ws = new WebSocket(`ws://${location.host}/ws`);
216216+217217+ ws.onopen = () => {
218218+ status.textContent = 'connected';
219219+ status.className = 'status connected';
220220+ };
221221+222222+ ws.onclose = () => {
223223+ status.textContent = 'disconnected';
224224+ status.className = 'status';
225225+ setTimeout(connect, 2000);
226226+ };
227227+228228+ ws.onerror = () => {
229229+ status.textContent = 'error';
230230+ status.className = 'status error';
231231+ };
232232+233233+ ws.onmessage = (e) => {
234234+ const data = JSON.parse(e.data);
235235+ handleEvent(data);
236236+ };
237237+ }
238238+239239+ function handleEvent(e) {
240240+ const div = document.createElement('div');
241241+ div.className = 'event ' + e.type;
242242+243243+ switch (e.type) {
244244+ case 'thinking':
245245+ div.textContent = '⋯ thinking...';
246246+ break;
247247+ case 'text':
248248+ div.textContent = e.content;
249249+ break;
250250+ case 'tool_call':
251251+ div.innerHTML = `<span class="tool-name">${esc(e.name)}</span>` +
252252+ `<div class="tool-input">${esc(formatJson(e.input))}</div>`;
253253+ break;
254254+ case 'tool_result':
255255+ if (e.is_error) div.classList.add('error');
256256+ div.innerHTML = `<span class="tool-name">${esc(e.name)}</span> ${e.is_error ? '✗' : '✓'}` +
257257+ `<div class="tool-output">${esc(truncate(e.output, 2000))}</div>`;
258258+ break;
259259+ case 'error':
260260+ div.textContent = '✗ ' + e.message;
261261+ break;
262262+ case 'sync':
263263+ div.textContent = '↻ sync: ' + e.action;
264264+ break;
265265+ case 'turn_complete':
266266+ currentTurn = e.turn;
267267+ if (e.cost_usd) totalCost += e.cost_usd;
268268+ turnEl.textContent = 'Turn: ' + currentTurn;
269269+ costEl.textContent = 'Cost: $' + totalCost.toFixed(4);
270270+ div.textContent = `Turn ${e.turn} complete` + (e.cost_usd ? ` ($${e.cost_usd.toFixed(4)})` : '');
271271+ break;
272272+ case 'agent_start':
273273+ div.textContent = '▶ Agent started';
274274+ div.style.borderColor = 'var(--green)';
275275+ break;
276276+ case 'agent_stop':
277277+ div.textContent = '■ Agent stopped';
278278+ div.style.borderColor = 'var(--red)';
279279+ break;
280280+ default:
281281+ div.textContent = JSON.stringify(e);
282282+ }
283283+284284+ events.appendChild(div);
285285+ events.scrollTop = events.scrollHeight;
286286+ }
287287+288288+ function esc(s) {
289289+ return s.replace(/&/g,'&').replace(/</g,'<').replace(/>/g,'>');
290290+ }
291291+292292+ function truncate(s, n) {
293293+ return s.length > n ? s.slice(0, n) + '...[truncated]' : s;
294294+ }
295295+296296+ function formatJson(s) {
297297+ try {
298298+ return JSON.stringify(JSON.parse(s), null, 2);
299299+ } catch {
300300+ return s;
301301+ }
302302+ }
303303+304304+ connect();
305305+ </script>
306306+</body>
307307+</html>|}
308308+309309+(* We don't track clients for WebSocket broadcasting in this simple implementation.
310310+ Instead, each WebSocket connection runs in its own fiber and subscribes to events. *)
311311+312312+type t = unit
313313+314314+let create _event_bus = ()
315315+316316+(* Broadcast is now a no-op since each connection handles its own events *)
317317+let broadcast _t _event = ()
318318+319319+(* Handle WebSocket connection - each connection subscribes to events directly *)
320320+let handle_websocket event_bus (flow : _ Eio.Net.stream_socket) =
321321+ let closed = ref false in
322322+ Log.info (fun m -> m "WebSocket client connected");
323323+324324+ (* Subscribe to events and send them to this client *)
325325+ let listener event =
326326+ if not !closed then begin
327327+ try
328328+ let json = Event.to_json event in
329329+ Ws.send_text flow json
330330+ with _ ->
331331+ closed := true
332332+ end
333333+ in
334334+ Event.subscribe event_bus listener;
335335+336336+ (* Read loop - handle pings and close *)
337337+ let rec loop () =
338338+ match Ws.read_frame flow with
339339+ | None ->
340340+ closed := true
341341+ | Some (0x8, _) -> (* Close *)
342342+ Ws.send_close flow;
343343+ closed := true
344344+ | Some (0x9, data) -> (* Ping -> Pong *)
345345+ let pong = Bytes.create (2 + String.length data) in
346346+ Bytes.set_uint8 pong 0 0x8A; (* FIN + pong *)
347347+ Bytes.set_uint8 pong 1 (String.length data);
348348+ Bytes.blit_string data 0 pong 2 (String.length data);
349349+ Eio.Flow.copy_string (Bytes.to_string pong) flow;
350350+ loop ()
351351+ | Some _ ->
352352+ loop ()
353353+ in
354354+ (try loop () with _ -> ());
355355+ closed := true;
356356+ Event.unsubscribe event_bus listener;
357357+ Log.info (fun m -> m "WebSocket client disconnected")
358358+359359+(* Parse HTTP request headers *)
360360+let parse_request data =
361361+ let lines = String.split_on_char '\n' data in
362362+ let headers = Hashtbl.create 16 in
363363+ let path = ref "/" in
364364+ List.iteri (fun i line ->
365365+ let line = String.trim line in
366366+ if i = 0 then begin
367367+ (* Request line: GET /path HTTP/1.1 *)
368368+ match String.split_on_char ' ' line with
369369+ | _ :: p :: _ -> path := p
370370+ | _ -> ()
371371+ end else begin
372372+ match String.index_opt line ':' with
373373+ | Some idx ->
374374+ let key = String.lowercase_ascii (String.trim (String.sub line 0 idx)) in
375375+ let value = String.trim (String.sub line (idx + 1) (String.length line - idx - 1)) in
376376+ Hashtbl.add headers key value
377377+ | None -> ()
378378+ end
379379+ ) lines;
380380+ (!path, headers)
381381+382382+(* Handle HTTP request *)
383383+let handle_request event_bus (flow : _ Eio.Net.stream_socket) =
384384+ let buf = Buffer.create 4096 in
385385+ let chunk = Cstruct.create 4096 in
386386+387387+ (* Read request *)
388388+ let rec read_headers () =
389389+ match Eio.Flow.single_read flow chunk with
390390+ | n ->
391391+ Buffer.add_string buf (Cstruct.to_string (Cstruct.sub chunk 0 n));
392392+ let data = Buffer.contents buf in
393393+ if String.length data > 4 &&
394394+ String.sub data (String.length data - 4) 4 = "\r\n\r\n"
395395+ then data
396396+ else read_headers ()
397397+ | exception End_of_file -> Buffer.contents buf
398398+ in
399399+ let request = read_headers () in
400400+ let (path, headers) = parse_request request in
401401+402402+ Log.debug (fun m -> m "Request: %s" path);
403403+404404+ (* Check for WebSocket upgrade *)
405405+ let is_upgrade =
406406+ Hashtbl.find_opt headers "upgrade" = Some "websocket" &&
407407+ Hashtbl.mem headers "sec-websocket-key"
408408+ in
409409+410410+ if path = "/ws" && is_upgrade then begin
411411+ (* WebSocket handshake *)
412412+ let key = Hashtbl.find headers "sec-websocket-key" in
413413+ let accept = Ws.accept_key key in
414414+ let response = Printf.sprintf
415415+ "HTTP/1.1 101 Switching Protocols\r\n\
416416+ Upgrade: websocket\r\n\
417417+ Connection: Upgrade\r\n\
418418+ Sec-WebSocket-Accept: %s\r\n\r\n" accept
419419+ in
420420+ Eio.Flow.copy_string response flow;
421421+ handle_websocket event_bus flow
422422+ end else begin
423423+ (* Serve static content *)
424424+ let (status, content_type, body) =
425425+ if path = "/" || path = "/index.html" then
426426+ ("200 OK", "text/html", index_html)
427427+ else
428428+ ("404 Not Found", "text/plain", "Not Found")
429429+ in
430430+ let response = Printf.sprintf
431431+ "HTTP/1.1 %s\r\n\
432432+ Content-Type: %s\r\n\
433433+ Content-Length: %d\r\n\
434434+ Connection: close\r\n\r\n%s"
435435+ status content_type (String.length body) body
436436+ in
437437+ Eio.Flow.copy_string response flow
438438+ end
439439+440440+(* Start the web server *)
441441+let start ~sw ~net ~port event_bus =
442442+ let t = create event_bus in
443443+444444+ (* Listen for connections *)
445445+ let addr = `Tcp (Eio.Net.Ipaddr.V4.loopback, port) in
446446+ let socket = Eio.Net.listen net ~sw ~reuse_addr:true ~backlog:10 addr in
447447+ Log.info (fun m -> m "Web server listening on http://localhost:%d" port);
448448+449449+ (* Accept loop *)
450450+ let rec accept_loop () =
451451+ let flow, _addr = Eio.Net.accept ~sw socket in
452452+ Eio.Fiber.fork ~sw (fun () ->
453453+ try handle_request event_bus flow
454454+ with exn ->
455455+ Log.warn (fun m -> m "Request error: %s" (Printexc.to_string exn))
456456+ );
457457+ accept_loop ()
458458+ in
459459+ Eio.Fiber.fork ~sw accept_loop;
460460+ t
+23
lib/claude/web.mli
···11+(** Minimal WebSocket server for live agent UI.
22+33+ Serves a single-page web UI that displays agent events in real-time. *)
44+55+type t
66+(** Web server state. *)
77+88+val start :
99+ sw:Eio.Switch.t ->
1010+ net:_ Eio.Net.t ->
1111+ port:int ->
1212+ Event.bus ->
1313+ t
1414+(** [start ~sw ~net ~port event_bus] starts the web server.
1515+1616+ Listens on [port] and serves:
1717+ - GET / - The HTML UI
1818+ - WS /ws - WebSocket for event streaming
1919+2020+ Subscribes to [event_bus] and broadcasts all events to connected clients. *)
2121+2222+val broadcast : t -> Event.t -> unit
2323+(** [broadcast t event] sends an event to all connected WebSocket clients. *)
+239
lib/config.ml
···11+(** Configuration file handling for unpac.
22+33+ Loads and parses main/unpac.toml configuration files. *)
44+55+(** {1 Types} *)
66+77+type repo_source =
88+ | Local of string
99+ | Remote of string
1010+1111+type repo_config = {
1212+ repo_name : string;
1313+ source : repo_source;
1414+}
1515+1616+type vendored_package = {
1717+ pkg_name : string; (** Package name (used as vendor name) *)
1818+ pkg_url : string; (** Original remote URL *)
1919+ pkg_branch : string option; (** Original branch if specified *)
2020+}
2121+2222+type opam_config = {
2323+ repositories : repo_config list;
2424+ compiler : string option;
2525+ vendored : vendored_package list; (** Tracked vendored packages *)
2626+}
2727+2828+(** Git repository configuration for direct git vendoring *)
2929+type git_repo_config = {
3030+ git_name : string; (** User-specified name for the repo *)
3131+ git_url : string; (** Git URL to clone from *)
3232+ git_branch : string option; (** Optional branch/tag to track *)
3333+ git_subdir : string option; (** Optional subdirectory to extract *)
3434+}
3535+3636+type git_config = {
3737+ repos : git_repo_config list;
3838+}
3939+4040+type project_config = {
4141+ project_name : string;
4242+}
4343+4444+type t = {
4545+ opam : opam_config;
4646+ git : git_config;
4747+ vendor_cache : string option;
4848+ projects : project_config list;
4949+}
5050+5151+(** {1 TOML Codecs} *)
5252+5353+let repo_config_codec : repo_config Tomlt.t =
5454+ let open Tomlt in
5555+ let open Table in
5656+ let make repo_name path url : repo_config =
5757+ let source =
5858+ match (path, url) with
5959+ | Some p, None -> Local p
6060+ | None, Some u -> Remote u
6161+ | Some _, Some _ ->
6262+ failwith "Repository cannot have both 'path' and 'url'"
6363+ | None, None -> failwith "Repository must have either 'path' or 'url'"
6464+ in
6565+ { repo_name; source }
6666+ in
6767+ let enc_path (r : repo_config) =
6868+ match r.source with Local p -> Some p | Remote _ -> None
6969+ in
7070+ let enc_url (r : repo_config) =
7171+ match r.source with Remote u -> Some u | Local _ -> None
7272+ in
7373+ obj make
7474+ |> mem "name" string ~enc:(fun (r : repo_config) -> r.repo_name)
7575+ |> opt_mem "path" string ~enc:enc_path
7676+ |> opt_mem "url" string ~enc:enc_url
7777+ |> finish
7878+7979+let vendored_package_codec : vendored_package Tomlt.t =
8080+ let open Tomlt in
8181+ let open Table in
8282+ obj (fun pkg_name pkg_url pkg_branch : vendored_package ->
8383+ { pkg_name; pkg_url; pkg_branch })
8484+ |> mem "name" string ~enc:(fun (p : vendored_package) -> p.pkg_name)
8585+ |> mem "url" string ~enc:(fun (p : vendored_package) -> p.pkg_url)
8686+ |> opt_mem "branch" string ~enc:(fun (p : vendored_package) -> p.pkg_branch)
8787+ |> finish
8888+8989+let opam_config_codec : opam_config Tomlt.t =
9090+ let open Tomlt in
9191+ let open Table in
9292+ obj (fun repositories compiler vendored : opam_config ->
9393+ { repositories; compiler; vendored = Option.value ~default:[] vendored })
9494+ |> mem "repositories" (list repo_config_codec)
9595+ ~enc:(fun (c : opam_config) -> c.repositories)
9696+ |> opt_mem "compiler" string ~enc:(fun (c : opam_config) -> c.compiler)
9797+ |> opt_mem "vendored" (list vendored_package_codec)
9898+ ~enc:(fun (c : opam_config) -> if c.vendored = [] then None else Some c.vendored)
9999+ |> finish
100100+101101+let git_repo_config_codec : git_repo_config Tomlt.t =
102102+ let open Tomlt in
103103+ let open Table in
104104+ obj (fun git_name git_url git_branch git_subdir : git_repo_config ->
105105+ { git_name; git_url; git_branch; git_subdir })
106106+ |> mem "name" string ~enc:(fun (r : git_repo_config) -> r.git_name)
107107+ |> mem "url" string ~enc:(fun (r : git_repo_config) -> r.git_url)
108108+ |> opt_mem "branch" string ~enc:(fun (r : git_repo_config) -> r.git_branch)
109109+ |> opt_mem "subdir" string ~enc:(fun (r : git_repo_config) -> r.git_subdir)
110110+ |> finish
111111+112112+let git_config_codec : git_config Tomlt.t =
113113+ let open Tomlt in
114114+ let open Table in
115115+ obj (fun repos : git_config -> { repos })
116116+ |> mem "repos" (list git_repo_config_codec)
117117+ ~enc:(fun (c : git_config) -> c.repos)
118118+ |> finish
119119+120120+let empty_git = { repos = [] }
121121+122122+(* For now, projects is not parsed from TOML - derived from git branches *)
123123+type config = t
124124+125125+let codec : config Tomlt.t =
126126+ let open Tomlt in
127127+ let open Table in
128128+ obj (fun opam git vendor_cache : config ->
129129+ { opam; git = Option.value ~default:empty_git git; vendor_cache; projects = [] })
130130+ |> mem "opam" opam_config_codec ~enc:(fun (c : config) -> c.opam)
131131+ |> opt_mem "git" git_config_codec ~enc:(fun (c : config) ->
132132+ if c.git.repos = [] then None else Some c.git)
133133+ |> opt_mem "vendor_cache" string ~enc:(fun (c : config) -> c.vendor_cache)
134134+ |> finish
135135+136136+(** {1 Loading} *)
137137+138138+let load path =
139139+ try
140140+ let content = In_channel.with_open_text path In_channel.input_all in
141141+ Tomlt_bytesrw.decode_string codec content
142142+ |> Result.map_error Tomlt.Toml.Error.to_string
143143+ with
144144+ | Sys_error msg -> Error msg
145145+ | Failure msg -> Error msg
146146+147147+let load_exn path =
148148+ match load path with Ok c -> c | Error msg -> failwith msg
149149+150150+(** {1 Saving} *)
151151+152152+let save path config =
153153+ try
154154+ let content = Tomlt_bytesrw.encode_string codec config in
155155+ Out_channel.with_open_text path (fun oc ->
156156+ Out_channel.output_string oc content);
157157+ Ok ()
158158+ with
159159+ | Sys_error msg -> Error msg
160160+ | Failure msg -> Error msg
161161+162162+let save_exn path config =
163163+ match save path config with
164164+ | Ok () -> ()
165165+ | Error msg -> failwith msg
166166+167167+(** {1 Helpers} *)
168168+169169+let empty_opam = { repositories = []; compiler = None; vendored = [] }
170170+let empty = { opam = empty_opam; git = empty_git; vendor_cache = None; projects = [] }
171171+172172+let find_project config name =
173173+ List.find_opt (fun p -> p.project_name = name) config.projects
174174+175175+(* Opam repo helpers *)
176176+let add_repo config repo =
177177+ let repos = config.opam.repositories @ [repo] in
178178+ { config with opam = { config.opam with repositories = repos } }
179179+180180+let remove_repo config name =
181181+ let repos = List.filter (fun r -> r.repo_name <> name) config.opam.repositories in
182182+ { config with opam = { config.opam with repositories = repos } }
183183+184184+let find_repo config name =
185185+ List.find_opt (fun r -> r.repo_name = name) config.opam.repositories
186186+187187+let set_compiler config version =
188188+ { config with opam = { config.opam with compiler = Some version } }
189189+190190+let get_compiler config =
191191+ config.opam.compiler
192192+193193+(* Git repo helpers *)
194194+let add_git_repo config (repo : git_repo_config) =
195195+ let repos = config.git.repos @ [repo] in
196196+ { config with git = { repos } }
197197+198198+let remove_git_repo config name =
199199+ let repos = List.filter (fun (r : git_repo_config) -> r.git_name <> name) config.git.repos in
200200+ { config with git = { repos } }
201201+202202+let find_git_repo config name =
203203+ List.find_opt (fun (r : git_repo_config) -> r.git_name = name) config.git.repos
204204+205205+let list_git_repos config =
206206+ config.git.repos
207207+208208+(* Vendor cache helpers *)
209209+let set_vendor_cache config path =
210210+ { config with vendor_cache = Some path }
211211+212212+let get_vendor_cache config =
213213+ config.vendor_cache
214214+215215+let resolve_vendor_cache ?cli_override config =
216216+ (* Priority: CLI flag > env var > config file > default *)
217217+ match cli_override with
218218+ | Some path -> Some path
219219+ | None ->
220220+ match Sys.getenv_opt "UNPAC_VENDOR_CACHE" with
221221+ | Some path -> Some path
222222+ | None -> config.vendor_cache
223223+224224+(* Vendored package helpers *)
225225+let add_vendored_package config (pkg : vendored_package) =
226226+ (* Replace if exists, otherwise append *)
227227+ let vendored = List.filter (fun p -> p.pkg_name <> pkg.pkg_name) config.opam.vendored in
228228+ let vendored = vendored @ [pkg] in
229229+ { config with opam = { config.opam with vendored } }
230230+231231+let remove_vendored_package config name =
232232+ let vendored = List.filter (fun p -> p.pkg_name <> name) config.opam.vendored in
233233+ { config with opam = { config.opam with vendored } }
234234+235235+let find_vendored_package config name =
236236+ List.find_opt (fun p -> p.pkg_name = name) config.opam.vendored
237237+238238+let list_vendored_packages config =
239239+ config.opam.vendored
+136
lib/config.mli
···11+(** Configuration file handling for unpac.
22+33+ Loads and parses main/unpac.toml configuration files. *)
44+55+(** {1 Types} *)
66+77+type repo_source =
88+ | Local of string
99+ | Remote of string
1010+1111+type repo_config = {
1212+ repo_name : string;
1313+ source : repo_source;
1414+}
1515+1616+type vendored_package = {
1717+ pkg_name : string; (** Package name (used as vendor name) *)
1818+ pkg_url : string; (** Original remote URL *)
1919+ pkg_branch : string option; (** Original branch if specified *)
2020+}
2121+2222+type opam_config = {
2323+ repositories : repo_config list;
2424+ compiler : string option;
2525+ vendored : vendored_package list; (** Tracked vendored packages *)
2626+}
2727+2828+(** Git repository configuration for direct git vendoring *)
2929+type git_repo_config = {
3030+ git_name : string; (** User-specified name for the repo *)
3131+ git_url : string; (** Git URL to clone from *)
3232+ git_branch : string option; (** Optional branch/tag to track *)
3333+ git_subdir : string option; (** Optional subdirectory to extract *)
3434+}
3535+3636+type git_config = {
3737+ repos : git_repo_config list;
3838+}
3939+4040+type project_config = {
4141+ project_name : string;
4242+}
4343+4444+type t = {
4545+ opam : opam_config;
4646+ git : git_config;
4747+ vendor_cache : string option;
4848+ projects : project_config list;
4949+}
5050+5151+(** {1 Loading} *)
5252+5353+val load : string -> (t, string) result
5454+(** [load path] loads configuration from the TOML file at [path]. *)
5555+5656+val load_exn : string -> t
5757+(** [load_exn path] is like {!load} but raises on error. *)
5858+5959+(** {1 Saving} *)
6060+6161+val save : string -> t -> (unit, string) result
6262+(** [save path config] saves configuration to the TOML file at [path]. *)
6363+6464+val save_exn : string -> t -> unit
6565+(** [save_exn path config] is like {!save} but raises on error. *)
6666+6767+(** {1 Helpers} *)
6868+6969+val empty : t
7070+(** Empty configuration. *)
7171+7272+val find_project : t -> string -> project_config option
7373+(** [find_project config name] finds a project by name. *)
7474+7575+(** {2 Opam Repository Helpers} *)
7676+7777+val add_repo : t -> repo_config -> t
7878+(** [add_repo config repo] adds an opam repository to the config. *)
7979+8080+val remove_repo : t -> string -> t
8181+(** [remove_repo config name] removes an opam repository by name. *)
8282+8383+val find_repo : t -> string -> repo_config option
8484+(** [find_repo config name] finds an opam repository by name. *)
8585+8686+val set_compiler : t -> string -> t
8787+(** [set_compiler config version] sets the OCaml compiler version. *)
8888+8989+val get_compiler : t -> string option
9090+(** [get_compiler config] gets the configured OCaml compiler version. *)
9191+9292+(** {2 Git Repository Helpers} *)
9393+9494+val add_git_repo : t -> git_repo_config -> t
9595+(** [add_git_repo config repo] adds a git repository to the config. *)
9696+9797+val remove_git_repo : t -> string -> t
9898+(** [remove_git_repo config name] removes a git repository by name. *)
9999+100100+val find_git_repo : t -> string -> git_repo_config option
101101+(** [find_git_repo config name] finds a git repository by name. *)
102102+103103+val list_git_repos : t -> git_repo_config list
104104+(** [list_git_repos config] returns all configured git repositories. *)
105105+106106+(** {2 Vendor Cache Helpers} *)
107107+108108+val set_vendor_cache : t -> string -> t
109109+(** [set_vendor_cache config path] sets the vendor cache path. *)
110110+111111+val get_vendor_cache : t -> string option
112112+(** [get_vendor_cache config] gets the configured vendor cache path. *)
113113+114114+val resolve_vendor_cache : ?cli_override:string -> t -> string option
115115+(** [resolve_vendor_cache ?cli_override config] resolves vendor cache path.
116116+ Priority: CLI flag > UNPAC_VENDOR_CACHE env var > config file.
117117+ Returns None if not configured anywhere. *)
118118+119119+(** {2 Vendored Package Helpers} *)
120120+121121+val add_vendored_package : t -> vendored_package -> t
122122+(** [add_vendored_package config pkg] adds or replaces a vendored package entry. *)
123123+124124+val remove_vendored_package : t -> string -> t
125125+(** [remove_vendored_package config name] removes a vendored package by name. *)
126126+127127+val find_vendored_package : t -> string -> vendored_package option
128128+(** [find_vendored_package config name] finds a vendored package by name. *)
129129+130130+val list_vendored_packages : t -> vendored_package list
131131+(** [list_vendored_packages config] returns all vendored packages. *)
132132+133133+(** {1 Codecs} *)
134134+135135+val codec : t Tomlt.t
136136+(** TOML codec for the configuration type. *)
···11+(** Git operations wrapped with Eio and robust error handling. *)
22+33+let src = Logs.Src.create "unpac.git" ~doc:"Git operations"
44+module Log = (val Logs.src_log src : Logs.LOG)
55+66+(* Error types *)
77+88+type error =
99+ | Command_failed of {
1010+ cmd : string list;
1111+ exit_code : int;
1212+ stdout : string;
1313+ stderr : string;
1414+ }
1515+ | Not_a_repository
1616+ | Remote_exists of string
1717+ | Remote_not_found of string
1818+ | Branch_exists of string
1919+ | Branch_not_found of string
2020+ | Merge_conflict of { branch : string; conflicting_files : string list }
2121+ | Rebase_conflict of { onto : string; hint : string }
2222+ | Uncommitted_changes
2323+ | Not_on_branch
2424+ | Detached_head
2525+2626+let pp_error fmt = function
2727+ | Command_failed { cmd; exit_code; stderr; _ } ->
2828+ Format.fprintf fmt "git %a failed (exit %d): %s"
2929+ Fmt.(list ~sep:sp string) cmd exit_code
3030+ (String.trim stderr)
3131+ | Not_a_repository ->
3232+ Format.fprintf fmt "not a git repository"
3333+ | Remote_exists name ->
3434+ Format.fprintf fmt "remote '%s' already exists" name
3535+ | Remote_not_found name ->
3636+ Format.fprintf fmt "remote '%s' not found" name
3737+ | Branch_exists name ->
3838+ Format.fprintf fmt "branch '%s' already exists" name
3939+ | Branch_not_found name ->
4040+ Format.fprintf fmt "branch '%s' not found" name
4141+ | Merge_conflict { branch; conflicting_files } ->
4242+ Format.fprintf fmt "merge conflict in '%s': %a" branch
4343+ Fmt.(list ~sep:comma string) conflicting_files
4444+ | Rebase_conflict { onto; hint } ->
4545+ Format.fprintf fmt "rebase conflict onto '%s': %s" onto hint
4646+ | Uncommitted_changes ->
4747+ Format.fprintf fmt "uncommitted changes in working directory"
4848+ | Not_on_branch ->
4949+ Format.fprintf fmt "not on any branch"
5050+ | Detached_head ->
5151+ Format.fprintf fmt "HEAD is detached"
5252+5353+type Eio.Exn.err += E of error
5454+5555+let () =
5656+ Eio.Exn.register_pp (fun fmt -> function
5757+ | E e -> Format.fprintf fmt "Git %a" pp_error e; true
5858+ | _ -> false)
5959+6060+let err e = Eio.Exn.create (E e)
6161+6262+(* Types *)
6363+6464+type proc_mgr = [ `Generic | `Unix ] Eio.Process.mgr_ty Eio.Resource.t
6565+type path = Eio.Fs.dir_ty Eio.Path.t
6666+6767+(* Helpers *)
6868+6969+let string_trim s = String.trim s
7070+7171+let lines s =
7272+ String.split_on_char '\n' s
7373+ |> List.filter (fun s -> String.trim s <> "")
7474+7575+(* Low-level execution *)
7676+7777+let run ~proc_mgr ?cwd ?audit args =
7878+ let full_cmd = "git" :: args in
7979+ Log.debug (fun m -> m "Running: %a" Fmt.(list ~sep:sp string) full_cmd);
8080+ let started = Unix.gettimeofday () in
8181+ let cwd_str = match cwd with Some p -> snd p | None -> Sys.getcwd () in
8282+ let stdout_buf = Buffer.create 256 in
8383+ let stderr_buf = Buffer.create 256 in
8484+ try
8585+ Eio.Switch.run @@ fun sw ->
8686+ let stdout_r, stdout_w = Eio.Process.pipe proc_mgr ~sw in
8787+ let stderr_r, stderr_w = Eio.Process.pipe proc_mgr ~sw in
8888+ let child = Eio.Process.spawn proc_mgr ~sw
8989+ ?cwd:(Option.map (fun p -> (p :> Eio.Fs.dir_ty Eio.Path.t)) cwd)
9090+ ~stdout:stdout_w ~stderr:stderr_w
9191+ full_cmd
9292+ in
9393+ Eio.Flow.close stdout_w;
9494+ Eio.Flow.close stderr_w;
9595+ (* Read stdout and stderr concurrently *)
9696+ Eio.Fiber.both
9797+ (fun () ->
9898+ let chunk = Cstruct.create 4096 in
9999+ let rec loop () =
100100+ match Eio.Flow.single_read stdout_r chunk with
101101+ | n ->
102102+ Buffer.add_string stdout_buf (Cstruct.to_string (Cstruct.sub chunk 0 n));
103103+ loop ()
104104+ | exception End_of_file -> ()
105105+ in
106106+ loop ())
107107+ (fun () ->
108108+ let chunk = Cstruct.create 4096 in
109109+ let rec loop () =
110110+ match Eio.Flow.single_read stderr_r chunk with
111111+ | n ->
112112+ Buffer.add_string stderr_buf (Cstruct.to_string (Cstruct.sub chunk 0 n));
113113+ loop ()
114114+ | exception End_of_file -> ()
115115+ in
116116+ loop ());
117117+ let status = Eio.Process.await child in
118118+ let stdout = Buffer.contents stdout_buf in
119119+ let stderr = Buffer.contents stderr_buf in
120120+ let exit_code, result = match status with
121121+ | `Exited 0 ->
122122+ Log.debug (fun m -> m "Output: %s" (string_trim stdout));
123123+ 0, Ok stdout
124124+ | `Exited code ->
125125+ Log.debug (fun m -> m "Failed (exit %d): %s" code (string_trim stderr));
126126+ code, Error (Command_failed { cmd = args; exit_code = code; stdout; stderr })
127127+ | `Signaled signal ->
128128+ Log.debug (fun m -> m "Killed by signal %d" signal);
129129+ let code = 128 + signal in
130130+ code, Error (Command_failed { cmd = args; exit_code = code; stdout; stderr })
131131+ in
132132+ (* Record to audit if provided *)
133133+ Option.iter (fun ctx ->
134134+ let git_result : Audit.git_result = { exit_code; stdout; stderr } in
135135+ Audit.record_git ctx ~cmd:args ~cwd:cwd_str ~started ~result:git_result
136136+ ) audit;
137137+ result
138138+ with exn ->
139139+ Log.err (fun m -> m "Exception running git: %a" Fmt.exn exn);
140140+ raise exn
141141+142142+let run_exn ~proc_mgr ?cwd ?audit args =
143143+ match run ~proc_mgr ?cwd ?audit args with
144144+ | Ok output -> output
145145+ | Error e ->
146146+ let ex = err e in
147147+ raise (Eio.Exn.add_context ex "running git %a" Fmt.(list ~sep:sp string) args)
148148+149149+let run_lines ~proc_mgr ?cwd ?audit args =
150150+ run_exn ~proc_mgr ?cwd ?audit args |> string_trim |> lines
151151+152152+(* Queries *)
153153+154154+let is_repository path =
155155+ let git_dir = Eio.Path.(path / ".git") in
156156+ match Eio.Path.kind ~follow:false git_dir with
157157+ | `Directory | `Regular_file -> true (* .git can be a file for worktrees *)
158158+ | _ -> false
159159+ | exception _ -> false
160160+161161+let current_branch ~proc_mgr ~cwd =
162162+ match run ~proc_mgr ~cwd ["symbolic-ref"; "--short"; "HEAD"] with
163163+ | Ok output -> Some (string_trim output)
164164+ | Error _ -> None
165165+166166+let current_branch_exn ~proc_mgr ~cwd =
167167+ match current_branch ~proc_mgr ~cwd with
168168+ | Some b -> b
169169+ | None -> raise (err Not_on_branch)
170170+171171+let current_head ~proc_mgr ~cwd =
172172+ run_exn ~proc_mgr ~cwd ["rev-parse"; "HEAD"] |> string_trim
173173+174174+let has_uncommitted_changes ~proc_mgr ~cwd =
175175+ let status = run_exn ~proc_mgr ~cwd ["status"; "--porcelain"] in
176176+ String.trim status <> ""
177177+178178+let remote_exists ~proc_mgr ~cwd name =
179179+ match run ~proc_mgr ~cwd ["remote"; "get-url"; name] with
180180+ | Ok _ -> true
181181+ | Error _ -> false
182182+183183+let branch_exists ~proc_mgr ~cwd name =
184184+ match run ~proc_mgr ~cwd ["show-ref"; "--verify"; "--quiet"; "refs/heads/" ^ name] with
185185+ | Ok _ -> true
186186+ | Error _ -> false
187187+188188+let rev_parse ~proc_mgr ~cwd ref_ =
189189+ match run ~proc_mgr ~cwd ["rev-parse"; "--verify"; "--quiet"; ref_] with
190190+ | Ok output -> Some (string_trim output)
191191+ | Error _ -> None
192192+193193+let rev_parse_exn ~proc_mgr ~cwd ref_ =
194194+ match rev_parse ~proc_mgr ~cwd ref_ with
195195+ | Some sha -> sha
196196+ | None -> raise (err (Branch_not_found ref_))
197197+198198+let rev_parse_short ~proc_mgr ~cwd ref_ =
199199+ run_exn ~proc_mgr ~cwd ["rev-parse"; "--short"; ref_] |> string_trim
200200+201201+let ls_remote_default_branch ~proc_mgr ~cwd ~url =
202202+ Log.info (fun m -> m "Detecting default branch for %s..." url);
203203+ (* Try to get the default branch from the remote *)
204204+ let output = run_exn ~proc_mgr ~cwd ["ls-remote"; "--symref"; url; "HEAD"] in
205205+ (* Parse output like: ref: refs/heads/main\tHEAD *)
206206+ let default =
207207+ let lines = String.split_on_char '\n' output in
208208+ List.find_map (fun line ->
209209+ if String.starts_with ~prefix:"ref:" line then
210210+ let parts = String.split_on_char '\t' line in
211211+ match parts with
212212+ | ref_part :: _ ->
213213+ let ref_part = String.trim ref_part in
214214+ if String.starts_with ~prefix:"ref: refs/heads/" ref_part then
215215+ Some (String.sub ref_part 16 (String.length ref_part - 16))
216216+ else None
217217+ | _ -> None
218218+ else None
219219+ ) lines
220220+ in
221221+ match default with
222222+ | Some branch ->
223223+ Log.info (fun m -> m "Default branch: %s" branch);
224224+ branch
225225+ | None ->
226226+ (* Fallback: try common branch names *)
227227+ Log.debug (fun m -> m "Could not detect default branch, trying common names...");
228228+ let try_branch name =
229229+ match run ~proc_mgr ~cwd ["ls-remote"; "--heads"; url; name] with
230230+ | Ok output when String.trim output <> "" -> true
231231+ | _ -> false
232232+ in
233233+ if try_branch "main" then "main"
234234+ else if try_branch "master" then "master"
235235+ else begin
236236+ Log.warn (fun m -> m "Could not detect default branch, assuming 'main'");
237237+ "main"
238238+ end
239239+240240+let list_remotes ~proc_mgr ~cwd =
241241+ run_lines ~proc_mgr ~cwd ["remote"]
242242+243243+let remote_url ~proc_mgr ~cwd name =
244244+ match run ~proc_mgr ~cwd ["remote"; "get-url"; name] with
245245+ | Ok output -> Some (string_trim output)
246246+ | Error _ -> None
247247+248248+let log_oneline ~proc_mgr ~cwd ?max_count from_ref to_ref =
249249+ let range = from_ref ^ ".." ^ to_ref in
250250+ let args = ["log"; "--oneline"; range] in
251251+ let args = match max_count with
252252+ | Some n -> args @ ["--max-count"; string_of_int n]
253253+ | None -> args
254254+ in
255255+ run_lines ~proc_mgr ~cwd args
256256+257257+let diff_stat ~proc_mgr ~cwd from_ref to_ref =
258258+ let range = from_ref ^ ".." ^ to_ref in
259259+ run_exn ~proc_mgr ~cwd ["diff"; "--stat"; range]
260260+261261+let ls_tree ~proc_mgr ~cwd ~tree ~path =
262262+ match run ~proc_mgr ~cwd ["ls-tree"; tree; path] with
263263+ | Ok output -> String.trim output <> ""
264264+ | Error _ -> false
265265+266266+let rev_list_count ~proc_mgr ~cwd from_ref to_ref =
267267+ let range = from_ref ^ ".." ^ to_ref in
268268+ let output = run_exn ~proc_mgr ~cwd ["rev-list"; "--count"; range] in
269269+ int_of_string (string_trim output)
270270+271271+(* Idempotent mutations *)
272272+273273+let ensure_remote ~proc_mgr ~cwd ~name ~url =
274274+ match remote_url ~proc_mgr ~cwd name with
275275+ | None ->
276276+ Log.info (fun m -> m "Adding remote %s -> %s" name url);
277277+ run_exn ~proc_mgr ~cwd ["remote"; "add"; name; url] |> ignore;
278278+ `Created
279279+ | Some existing_url ->
280280+ if existing_url = url then begin
281281+ Log.debug (fun m -> m "Remote %s already exists with correct URL" name);
282282+ `Existed
283283+ end else begin
284284+ Log.info (fun m -> m "Updating remote %s URL: %s -> %s" name existing_url url);
285285+ run_exn ~proc_mgr ~cwd ["remote"; "set-url"; name; url] |> ignore;
286286+ `Updated
287287+ end
288288+289289+let ensure_branch ~proc_mgr ~cwd ~name ~start_point =
290290+ if branch_exists ~proc_mgr ~cwd name then begin
291291+ Log.debug (fun m -> m "Branch %s already exists" name);
292292+ `Existed
293293+ end else begin
294294+ Log.info (fun m -> m "Creating branch %s at %s" name start_point);
295295+ run_exn ~proc_mgr ~cwd ["branch"; name; start_point] |> ignore;
296296+ `Created
297297+ end
298298+299299+let ensure_vendored_remotes ~proc_mgr ~cwd (packages : Config.vendored_package list) =
300300+ let created = ref 0 in
301301+ List.iter (fun (pkg : Config.vendored_package) ->
302302+ let remote_name = "origin-" ^ pkg.pkg_name in
303303+ match ensure_remote ~proc_mgr ~cwd ~name:remote_name ~url:pkg.pkg_url with
304304+ | `Created ->
305305+ Log.info (fun m -> m "Recreated remote %s -> %s" remote_name pkg.pkg_url);
306306+ incr created
307307+ | `Updated ->
308308+ Log.info (fun m -> m "Updated remote %s -> %s" remote_name pkg.pkg_url)
309309+ | `Existed -> ()
310310+ ) packages;
311311+ !created
312312+313313+(* State-changing operations *)
314314+315315+let init ~proc_mgr ~cwd =
316316+ Log.info (fun m -> m "Initializing git repository...");
317317+ run_exn ~proc_mgr ~cwd ["init"] |> ignore
318318+319319+let fetch ~proc_mgr ~cwd ~remote =
320320+ Log.info (fun m -> m "Fetching from %s..." remote);
321321+ run_exn ~proc_mgr ~cwd ["fetch"; remote] |> ignore
322322+323323+let fetch_with_tags ~proc_mgr ~cwd ~remote =
324324+ Log.info (fun m -> m "Fetching from %s (with tags)..." remote);
325325+ run_exn ~proc_mgr ~cwd ["fetch"; "--tags"; "--force"; remote] |> ignore
326326+327327+let resolve_branch_or_tag ~proc_mgr ~cwd ~remote ~ref_name =
328328+ (* Try as a remote tracking branch first *)
329329+ let branch_ref = remote ^ "/" ^ ref_name in
330330+ match rev_parse ~proc_mgr ~cwd branch_ref with
331331+ | Some _ -> branch_ref
332332+ | None ->
333333+ (* Try as a tag *)
334334+ let tag_ref = "refs/tags/" ^ ref_name in
335335+ match rev_parse ~proc_mgr ~cwd tag_ref with
336336+ | Some _ -> tag_ref
337337+ | None ->
338338+ failwith (Printf.sprintf "Ref not found: %s (tried branch %s and tag %s)"
339339+ ref_name branch_ref tag_ref)
340340+341341+let checkout ~proc_mgr ~cwd ref_ =
342342+ Log.debug (fun m -> m "Checking out %s" ref_);
343343+ run_exn ~proc_mgr ~cwd ["checkout"; ref_] |> ignore
344344+345345+let checkout_orphan ~proc_mgr ~cwd name =
346346+ Log.info (fun m -> m "Creating orphan branch %s" name);
347347+ run_exn ~proc_mgr ~cwd ["checkout"; "--orphan"; name] |> ignore
348348+349349+let read_tree_prefix ~proc_mgr ~cwd ~prefix ~tree =
350350+ Log.debug (fun m -> m "Reading tree %s with prefix %s" tree prefix);
351351+ run_exn ~proc_mgr ~cwd ["read-tree"; "--prefix=" ^ prefix; tree] |> ignore
352352+353353+let checkout_index ~proc_mgr ~cwd =
354354+ Log.debug (fun m -> m "Checking out index to working directory");
355355+ run_exn ~proc_mgr ~cwd ["checkout-index"; "-a"; "-f"] |> ignore
356356+357357+let rm_rf ~proc_mgr ~cwd ~target =
358358+ Log.debug (fun m -> m "Removing %s from git" target);
359359+ (* Ignore errors - target might not exist *)
360360+ ignore (run ~proc_mgr ~cwd ["rm"; "-rf"; target])
361361+362362+let rm_cached_rf ~proc_mgr ~cwd =
363363+ Log.debug (fun m -> m "Removing all files from index");
364364+ (* Ignore errors - index might be empty *)
365365+ ignore (run ~proc_mgr ~cwd ["rm"; "-rf"; "--cached"; "."])
366366+367367+let add_all ~proc_mgr ~cwd =
368368+ Log.debug (fun m -> m "Staging all changes");
369369+ run_exn ~proc_mgr ~cwd ["add"; "-A"] |> ignore
370370+371371+let commit ~proc_mgr ~cwd ~message =
372372+ Log.debug (fun m -> m "Committing: %s" (String.sub message 0 (min 50 (String.length message))));
373373+ run_exn ~proc_mgr ~cwd ["commit"; "-m"; message] |> ignore
374374+375375+let commit_allow_empty ~proc_mgr ~cwd ~message =
376376+ Log.debug (fun m -> m "Committing (allow empty): %s" (String.sub message 0 (min 50 (String.length message))));
377377+ run_exn ~proc_mgr ~cwd ["commit"; "--allow-empty"; "-m"; message] |> ignore
378378+379379+let branch_create ~proc_mgr ~cwd ~name ~start_point =
380380+ Log.info (fun m -> m "Creating branch %s at %s" name start_point);
381381+ run_exn ~proc_mgr ~cwd ["branch"; name; start_point] |> ignore
382382+383383+let branch_force ~proc_mgr ~cwd ~name ~point =
384384+ Log.info (fun m -> m "Force-moving branch %s to %s" name point);
385385+ run_exn ~proc_mgr ~cwd ["branch"; "-f"; name; point] |> ignore
386386+387387+let remote_add ~proc_mgr ~cwd ~name ~url =
388388+ Log.info (fun m -> m "Adding remote %s -> %s" name url);
389389+ run_exn ~proc_mgr ~cwd ["remote"; "add"; name; url] |> ignore
390390+391391+let remote_set_url ~proc_mgr ~cwd ~name ~url =
392392+ Log.info (fun m -> m "Setting remote %s URL to %s" name url);
393393+ run_exn ~proc_mgr ~cwd ["remote"; "set-url"; name; url] |> ignore
394394+395395+let merge_allow_unrelated ~proc_mgr ~cwd ~branch ~message =
396396+ Log.info (fun m -> m "Merging %s (allow unrelated histories)..." branch);
397397+ match run ~proc_mgr ~cwd ["merge"; "--allow-unrelated-histories"; "-m"; message; branch] with
398398+ | Ok _ -> Ok ()
399399+ | Error (Command_failed { exit_code = 1; _ }) ->
400400+ (* Merge conflict - get list of conflicting files *)
401401+ let output = run_exn ~proc_mgr ~cwd ["diff"; "--name-only"; "--diff-filter=U"] in
402402+ let files = lines output in
403403+ Log.warn (fun m -> m "Merge conflict: %a" Fmt.(list ~sep:comma string) files);
404404+ Error (`Conflict files)
405405+ | Error e ->
406406+ raise (err e)
407407+408408+let rebase ~proc_mgr ~cwd ~onto =
409409+ Log.info (fun m -> m "Rebasing onto %s..." onto);
410410+ match run ~proc_mgr ~cwd ["rebase"; onto] with
411411+ | Ok _ -> Ok ()
412412+ | Error (Command_failed { stderr; _ }) ->
413413+ let hint =
414414+ if String.length stderr > 200 then
415415+ String.sub stderr 0 200 ^ "..."
416416+ else
417417+ stderr
418418+ in
419419+ Log.warn (fun m -> m "Rebase conflict onto %s" onto);
420420+ Error (`Conflict hint)
421421+ | Error e ->
422422+ raise (err e)
423423+424424+let rebase_abort ~proc_mgr ~cwd =
425425+ Log.info (fun m -> m "Aborting rebase...");
426426+ ignore (run ~proc_mgr ~cwd ["rebase"; "--abort"])
427427+428428+let merge_abort ~proc_mgr ~cwd =
429429+ Log.info (fun m -> m "Aborting merge...");
430430+ ignore (run ~proc_mgr ~cwd ["merge"; "--abort"])
431431+432432+let reset_hard ~proc_mgr ~cwd ref_ =
433433+ Log.info (fun m -> m "Hard reset to %s" ref_);
434434+ run_exn ~proc_mgr ~cwd ["reset"; "--hard"; ref_] |> ignore
435435+436436+let clean_fd ~proc_mgr ~cwd =
437437+ Log.debug (fun m -> m "Cleaning untracked files");
438438+ run_exn ~proc_mgr ~cwd ["clean"; "-fd"] |> ignore
439439+440440+let filter_repo_to_subdirectory ~proc_mgr ~cwd ~branch ~subdirectory =
441441+ Log.info (fun m -> m "Rewriting history of %s into subdirectory %s..." branch subdirectory);
442442+ (* Use git-filter-repo with --to-subdirectory-filter to rewrite all paths into subdirectory.
443443+ This preserves full history with paths prefixed. Much faster than filter-branch.
444444+445445+ For bare repositories, we need to create a temporary worktree, run filter-repo
446446+ there, and then update the branch in the bare repo. *)
447447+448448+ (* Create a unique temporary worktree name using the branch name *)
449449+ let safe_branch = String.map (fun c -> if c = '/' then '-' else c) branch in
450450+ let temp_wt_name = ".filter-tmp-" ^ safe_branch in
451451+ let temp_wt_relpath = "../" ^ temp_wt_name in
452452+453453+ (* Construct the worktree path - cwd is (fs, path_string), so we go up one level *)
454454+ let fs = fst cwd in
455455+ let git_path = snd cwd in
456456+ let parent_path = Filename.dirname git_path in
457457+ let temp_wt_path = Filename.concat parent_path temp_wt_name in
458458+ let temp_wt : path = (fs, temp_wt_path) in
459459+460460+ (* Remove any existing temp worktree *)
461461+ ignore (run ~proc_mgr ~cwd ["worktree"; "remove"; "-f"; temp_wt_relpath]);
462462+463463+ (* Create worktree for the branch *)
464464+ run_exn ~proc_mgr ~cwd ["worktree"; "add"; temp_wt_relpath; branch] |> ignore;
465465+466466+ (* Run git-filter-repo in the worktree *)
467467+ let result = run ~proc_mgr ~cwd:temp_wt [
468468+ "filter-repo";
469469+ "--to-subdirectory-filter"; subdirectory;
470470+ "--force";
471471+ "--refs"; "HEAD"
472472+ ] in
473473+474474+ (* Handle result: get the new SHA, cleanup worktree, then update branch *)
475475+ (match result with
476476+ | Ok _ ->
477477+ (* Get the new HEAD SHA from the worktree BEFORE removing it *)
478478+ let new_sha = run_exn ~proc_mgr ~cwd:temp_wt ["rev-parse"; "HEAD"] |> string_trim in
479479+ (* Cleanup temporary worktree first (must do this before updating branch) *)
480480+ ignore (run ~proc_mgr ~cwd ["worktree"; "remove"; "-f"; temp_wt_relpath]);
481481+ (* Now update the branch in the bare repo *)
482482+ run_exn ~proc_mgr ~cwd ["branch"; "-f"; branch; new_sha] |> ignore
483483+ | Error e ->
484484+ (* Cleanup and re-raise *)
485485+ ignore (run ~proc_mgr ~cwd ["worktree"; "remove"; "-f"; temp_wt_relpath]);
486486+ raise (err e))
487487+488488+let filter_repo_from_subdirectory ~proc_mgr ~cwd ~branch ~subdirectory =
489489+ Log.info (fun m -> m "Extracting %s from subdirectory %s to root..." branch subdirectory);
490490+ (* Use git-filter-repo with --subdirectory-filter to extract files from subdirectory
491491+ to root. This is the inverse of --to-subdirectory-filter.
492492+ Preserves history for files that were in the subdirectory.
493493+494494+ For bare repositories, we need to create a temporary worktree, run filter-repo
495495+ there, and then update the branch in the bare repo. *)
496496+497497+ (* Create a unique temporary worktree name using the branch name *)
498498+ let safe_branch = String.map (fun c -> if c = '/' then '-' else c) branch in
499499+ let temp_wt_name = ".filter-tmp-" ^ safe_branch in
500500+ let temp_wt_relpath = "../" ^ temp_wt_name in
501501+502502+ (* Construct the worktree path - cwd is (fs, path_string), so we go up one level *)
503503+ let fs = fst cwd in
504504+ let git_path = snd cwd in
505505+ let parent_path = Filename.dirname git_path in
506506+ let temp_wt_path = Filename.concat parent_path temp_wt_name in
507507+ let temp_wt : path = (fs, temp_wt_path) in
508508+509509+ (* Remove any existing temp worktree *)
510510+ ignore (run ~proc_mgr ~cwd ["worktree"; "remove"; "-f"; temp_wt_relpath]);
511511+512512+ (* Create worktree for the branch *)
513513+ run_exn ~proc_mgr ~cwd ["worktree"; "add"; temp_wt_relpath; branch] |> ignore;
514514+515515+ (* Run git-filter-repo in the worktree with --subdirectory-filter *)
516516+ let result = run ~proc_mgr ~cwd:temp_wt [
517517+ "filter-repo";
518518+ "--subdirectory-filter"; subdirectory;
519519+ "--force";
520520+ "--refs"; "HEAD"
521521+ ] in
522522+523523+ (* Handle result: get the new SHA, cleanup worktree, then update branch *)
524524+ (match result with
525525+ | Ok _ ->
526526+ (* Get the new HEAD SHA from the worktree BEFORE removing it *)
527527+ let new_sha = run_exn ~proc_mgr ~cwd:temp_wt ["rev-parse"; "HEAD"] |> string_trim in
528528+ (* Cleanup temporary worktree first (must do this before updating branch) *)
529529+ ignore (run ~proc_mgr ~cwd ["worktree"; "remove"; "-f"; temp_wt_relpath]);
530530+ (* Now update the branch in the bare repo *)
531531+ run_exn ~proc_mgr ~cwd ["branch"; "-f"; branch; new_sha] |> ignore
532532+ | Error e ->
533533+ (* Cleanup and re-raise *)
534534+ ignore (run ~proc_mgr ~cwd ["worktree"; "remove"; "-f"; temp_wt_relpath]);
535535+ raise (err e))
+399
lib/git.mli
···11+(** Git operations wrapped with Eio and robust error handling.
22+33+ All git commands are executed via [Eio.Process] with proper logging
44+ and error context. Errors are wrapped in [Eio.Exn.Io] with context
55+ chains for debugging. *)
66+77+(** {1 Error Types} *)
88+99+type error =
1010+ | Command_failed of {
1111+ cmd : string list;
1212+ exit_code : int;
1313+ stdout : string;
1414+ stderr : string;
1515+ }
1616+ | Not_a_repository
1717+ | Remote_exists of string
1818+ | Remote_not_found of string
1919+ | Branch_exists of string
2020+ | Branch_not_found of string
2121+ | Merge_conflict of { branch : string; conflicting_files : string list }
2222+ | Rebase_conflict of { onto : string; hint : string }
2323+ | Uncommitted_changes
2424+ | Not_on_branch
2525+ | Detached_head
2626+2727+val pp_error : Format.formatter -> error -> unit
2828+2929+type Eio.Exn.err += E of error
3030+3131+val err : error -> exn
3232+(** [err e] creates an [Eio.Exn.Io] exception with the given error. *)
3333+3434+(** {1 Types} *)
3535+3636+type proc_mgr = [ `Generic | `Unix ] Eio.Process.mgr_ty Eio.Resource.t
3737+type path = Eio.Fs.dir_ty Eio.Path.t
3838+3939+(** {1 Low-level execution} *)
4040+4141+val run :
4242+ proc_mgr:proc_mgr ->
4343+ ?cwd:path ->
4444+ ?audit:Audit.context ->
4545+ string list ->
4646+ (string, error) result
4747+(** [run ~proc_mgr args] executes [git args] and returns stdout on success.
4848+ If [audit] is provided, records the operation to the audit context. *)
4949+5050+val run_exn :
5151+ proc_mgr:proc_mgr ->
5252+ ?cwd:path ->
5353+ ?audit:Audit.context ->
5454+ string list ->
5555+ string
5656+(** [run_exn ~proc_mgr args] executes [git args] and returns stdout.
5757+ Raises on failure with context. If [audit] is provided, records the operation. *)
5858+5959+val run_lines :
6060+ proc_mgr:proc_mgr ->
6161+ ?cwd:path ->
6262+ ?audit:Audit.context ->
6363+ string list ->
6464+ string list
6565+(** [run_lines ~proc_mgr args] executes and splits output by newlines.
6666+ If [audit] is provided, records the operation. *)
6767+6868+(** {1 Queries - Safe read-only operations} *)
6969+7070+val is_repository : path -> bool
7171+(** [is_repository path] checks if [path] contains a [.git] directory. *)
7272+7373+val current_branch :
7474+ proc_mgr:proc_mgr ->
7575+ cwd:path ->
7676+ string option
7777+(** [current_branch] returns [Some branch] if on a branch, [None] if detached. *)
7878+7979+val current_branch_exn :
8080+ proc_mgr:proc_mgr ->
8181+ cwd:path ->
8282+ string
8383+(** [current_branch_exn] returns current branch or raises [Not_on_branch]. *)
8484+8585+val current_head :
8686+ proc_mgr:proc_mgr ->
8787+ cwd:path ->
8888+ string
8989+(** [current_head] returns the current HEAD SHA. *)
9090+9191+val has_uncommitted_changes :
9292+ proc_mgr:proc_mgr ->
9393+ cwd:path ->
9494+ bool
9595+(** [has_uncommitted_changes] returns true if there are staged or unstaged changes. *)
9696+9797+val remote_exists :
9898+ proc_mgr:proc_mgr ->
9999+ cwd:path ->
100100+ string ->
101101+ bool
102102+(** [remote_exists ~proc_mgr ~cwd name] checks if remote [name] exists. *)
103103+104104+val branch_exists :
105105+ proc_mgr:proc_mgr ->
106106+ cwd:path ->
107107+ string ->
108108+ bool
109109+(** [branch_exists ~proc_mgr ~cwd name] checks if branch [name] exists. *)
110110+111111+val rev_parse :
112112+ proc_mgr:proc_mgr ->
113113+ cwd:path ->
114114+ string ->
115115+ string option
116116+(** [rev_parse ~proc_mgr ~cwd ref] returns the SHA for [ref], or [None]. *)
117117+118118+val rev_parse_exn :
119119+ proc_mgr:proc_mgr ->
120120+ cwd:path ->
121121+ string ->
122122+ string
123123+(** [rev_parse_exn] returns SHA or raises. *)
124124+125125+val rev_parse_short :
126126+ proc_mgr:proc_mgr ->
127127+ cwd:path ->
128128+ string ->
129129+ string
130130+(** [rev_parse_short] returns abbreviated SHA. *)
131131+132132+val ls_remote_default_branch :
133133+ proc_mgr:proc_mgr ->
134134+ cwd:path ->
135135+ url:string ->
136136+ string
137137+(** [ls_remote_default_branch ~proc_mgr ~cwd ~url] detects the default branch of remote. *)
138138+139139+val list_remotes :
140140+ proc_mgr:proc_mgr ->
141141+ cwd:path ->
142142+ string list
143143+(** [list_remotes] returns all remote names. *)
144144+145145+val remote_url :
146146+ proc_mgr:proc_mgr ->
147147+ cwd:path ->
148148+ string ->
149149+ string option
150150+(** [remote_url ~proc_mgr ~cwd name] returns the URL for remote [name]. *)
151151+152152+val log_oneline :
153153+ proc_mgr:proc_mgr ->
154154+ cwd:path ->
155155+ ?max_count:int ->
156156+ string ->
157157+ string ->
158158+ string list
159159+(** [log_oneline ~proc_mgr ~cwd from_ref to_ref] returns commit summaries. *)
160160+161161+val diff_stat :
162162+ proc_mgr:proc_mgr ->
163163+ cwd:path ->
164164+ string ->
165165+ string ->
166166+ string
167167+(** [diff_stat ~proc_mgr ~cwd from_ref to_ref] returns diff statistics. *)
168168+169169+val ls_tree :
170170+ proc_mgr:proc_mgr ->
171171+ cwd:path ->
172172+ tree:string ->
173173+ path:string ->
174174+ bool
175175+(** [ls_tree ~proc_mgr ~cwd ~tree ~path] checks if [path] exists in [tree]. *)
176176+177177+val rev_list_count :
178178+ proc_mgr:proc_mgr ->
179179+ cwd:path ->
180180+ string ->
181181+ string ->
182182+ int
183183+(** [rev_list_count ~proc_mgr ~cwd from_ref to_ref] counts commits between refs. *)
184184+185185+(** {1 Idempotent mutations - Safe to re-run} *)
186186+187187+val ensure_remote :
188188+ proc_mgr:proc_mgr ->
189189+ cwd:path ->
190190+ name:string ->
191191+ url:string ->
192192+ [ `Created | `Existed | `Updated ]
193193+(** [ensure_remote] adds remote if missing, updates URL if different. *)
194194+195195+val ensure_branch :
196196+ proc_mgr:proc_mgr ->
197197+ cwd:path ->
198198+ name:string ->
199199+ start_point:string ->
200200+ [ `Created | `Existed ]
201201+(** [ensure_branch] creates branch if it doesn't exist. *)
202202+203203+val ensure_vendored_remotes :
204204+ proc_mgr:proc_mgr ->
205205+ cwd:path ->
206206+ Config.vendored_package list ->
207207+ int
208208+(** [ensure_vendored_remotes ~proc_mgr ~cwd packages] ensures remotes exist for
209209+ all vendored packages. Returns the number of remotes created.
210210+ Use this to recreate remotes after cloning a workspace. *)
211211+212212+(** {1 State-changing operations} *)
213213+214214+val init :
215215+ proc_mgr:proc_mgr ->
216216+ cwd:path ->
217217+ unit
218218+(** [init] initializes a new git repository. *)
219219+220220+val fetch :
221221+ proc_mgr:proc_mgr ->
222222+ cwd:path ->
223223+ remote:string ->
224224+ unit
225225+(** [fetch] fetches from a remote. *)
226226+227227+val fetch_with_tags :
228228+ proc_mgr:proc_mgr ->
229229+ cwd:path ->
230230+ remote:string ->
231231+ unit
232232+(** [fetch_with_tags] fetches from a remote including all tags. *)
233233+234234+val resolve_branch_or_tag :
235235+ proc_mgr:proc_mgr ->
236236+ cwd:path ->
237237+ remote:string ->
238238+ ref_name:string ->
239239+ string
240240+(** [resolve_branch_or_tag] tries to resolve a ref first as a remote tracking
241241+ branch (remote/ref_name), then as a tag (refs/tags/ref_name). Returns the
242242+ resolved ref or raises an exception if neither exists. *)
243243+244244+val checkout :
245245+ proc_mgr:proc_mgr ->
246246+ cwd:path ->
247247+ string ->
248248+ unit
249249+(** [checkout] switches to a branch or commit. *)
250250+251251+val checkout_orphan :
252252+ proc_mgr:proc_mgr ->
253253+ cwd:path ->
254254+ string ->
255255+ unit
256256+(** [checkout_orphan] creates and switches to a new orphan branch. *)
257257+258258+val read_tree_prefix :
259259+ proc_mgr:proc_mgr ->
260260+ cwd:path ->
261261+ prefix:string ->
262262+ tree:string ->
263263+ unit
264264+(** [read_tree_prefix] reads a tree into the index with a path prefix. *)
265265+266266+val checkout_index :
267267+ proc_mgr:proc_mgr ->
268268+ cwd:path ->
269269+ unit
270270+(** [checkout_index] checks out files from the index to working directory. *)
271271+272272+val rm_rf :
273273+ proc_mgr:proc_mgr ->
274274+ cwd:path ->
275275+ target:string ->
276276+ unit
277277+(** [rm_rf] removes files/directories from git tracking. *)
278278+279279+val rm_cached_rf :
280280+ proc_mgr:proc_mgr ->
281281+ cwd:path ->
282282+ unit
283283+(** [rm_cached_rf] removes all files from index (for orphan branch setup). *)
284284+285285+val add_all :
286286+ proc_mgr:proc_mgr ->
287287+ cwd:path ->
288288+ unit
289289+(** [add_all] stages all changes. *)
290290+291291+val commit :
292292+ proc_mgr:proc_mgr ->
293293+ cwd:path ->
294294+ message:string ->
295295+ unit
296296+(** [commit] creates a commit with the given message. *)
297297+298298+val commit_allow_empty :
299299+ proc_mgr:proc_mgr ->
300300+ cwd:path ->
301301+ message:string ->
302302+ unit
303303+(** [commit_allow_empty] creates a commit even if there are no changes. *)
304304+305305+val branch_create :
306306+ proc_mgr:proc_mgr ->
307307+ cwd:path ->
308308+ name:string ->
309309+ start_point:string ->
310310+ unit
311311+(** [branch_create] creates a new branch at [start_point]. *)
312312+313313+val branch_force :
314314+ proc_mgr:proc_mgr ->
315315+ cwd:path ->
316316+ name:string ->
317317+ point:string ->
318318+ unit
319319+(** [branch_force] moves branch to point (creates if needed). *)
320320+321321+val remote_add :
322322+ proc_mgr:proc_mgr ->
323323+ cwd:path ->
324324+ name:string ->
325325+ url:string ->
326326+ unit
327327+(** [remote_add] adds a new remote. *)
328328+329329+val remote_set_url :
330330+ proc_mgr:proc_mgr ->
331331+ cwd:path ->
332332+ name:string ->
333333+ url:string ->
334334+ unit
335335+(** [remote_set_url] updates the URL of an existing remote. *)
336336+337337+val merge_allow_unrelated :
338338+ proc_mgr:proc_mgr ->
339339+ cwd:path ->
340340+ branch:string ->
341341+ message:string ->
342342+ (unit, [ `Conflict of string list ]) result
343343+(** [merge_allow_unrelated] merges with [--allow-unrelated-histories].
344344+ Returns [Error (`Conflict files)] if there are conflicts. *)
345345+346346+val rebase :
347347+ proc_mgr:proc_mgr ->
348348+ cwd:path ->
349349+ onto:string ->
350350+ (unit, [ `Conflict of string ]) result
351351+(** [rebase] rebases current branch onto [onto].
352352+ Returns [Error (`Conflict hint)] if there are conflicts. *)
353353+354354+val rebase_abort :
355355+ proc_mgr:proc_mgr ->
356356+ cwd:path ->
357357+ unit
358358+(** [rebase_abort] aborts an in-progress rebase. *)
359359+360360+val merge_abort :
361361+ proc_mgr:proc_mgr ->
362362+ cwd:path ->
363363+ unit
364364+(** [merge_abort] aborts an in-progress merge. *)
365365+366366+val reset_hard :
367367+ proc_mgr:proc_mgr ->
368368+ cwd:path ->
369369+ string ->
370370+ unit
371371+(** [reset_hard] does a hard reset to the given ref. *)
372372+373373+val clean_fd :
374374+ proc_mgr:proc_mgr ->
375375+ cwd:path ->
376376+ unit
377377+(** [clean_fd] removes untracked files and directories. *)
378378+379379+val filter_repo_to_subdirectory :
380380+ proc_mgr:proc_mgr ->
381381+ cwd:path ->
382382+ branch:string ->
383383+ subdirectory:string ->
384384+ unit
385385+(** [filter_repo_to_subdirectory ~proc_mgr ~cwd ~branch ~subdirectory]
386386+ rewrites the history of [branch] so all files are moved into [subdirectory].
387387+ Uses git-filter-repo for fast history rewriting. Preserves full commit history. *)
388388+389389+val filter_repo_from_subdirectory :
390390+ proc_mgr:proc_mgr ->
391391+ cwd:path ->
392392+ branch:string ->
393393+ subdirectory:string ->
394394+ unit
395395+(** [filter_repo_from_subdirectory ~proc_mgr ~cwd ~branch ~subdirectory]
396396+ rewrites the history of [branch] extracting only files from [subdirectory]
397397+ and placing them at the repository root. This is the inverse of
398398+ [filter_repo_to_subdirectory]. Uses git-filter-repo --subdirectory-filter.
399399+ Preserves full commit history for files that were in the subdirectory. *)
+242
lib/git_backend.ml
···11+(** Git backend for direct repository vendoring.
22+33+ Implements vendoring of arbitrary git repositories using the three-tier branch model:
44+ - git/upstream/<name> - pristine upstream code
55+ - git/vendor/<name> - upstream history rewritten with vendor/git/<name>/ prefix
66+ - git/patches/<name> - local modifications *)
77+88+(** {1 Branch Naming} *)
99+1010+let upstream_branch name = "git/upstream/" ^ name
1111+let vendor_branch name = "git/vendor/" ^ name
1212+let patches_branch name = "git/patches/" ^ name
1313+let vendor_path name = "vendor/git/" ^ name
1414+1515+(** {1 Worktree Kinds} *)
1616+1717+let upstream_kind name = Worktree.Git_upstream name
1818+let vendor_kind name = Worktree.Git_vendor name
1919+let patches_kind name = Worktree.Git_patches name
2020+2121+(** {1 Repository Info} *)
2222+2323+type repo_info = {
2424+ name : string;
2525+ url : string;
2626+ branch : string option;
2727+ subdir : string option;
2828+}
2929+3030+(** {1 Repository Operations} *)
3131+3232+let add_repo ~proc_mgr ~root ?cache info =
3333+ let repo_name = info.name in
3434+ let git = Worktree.git_dir root in
3535+3636+ try
3737+ (* Check if already exists *)
3838+ if Worktree.branch_exists ~proc_mgr root (patches_kind repo_name) then
3939+ Backend.Already_exists repo_name
4040+ else begin
4141+ (* Rewrite URL for known mirrors *)
4242+ let url = Git_repo_lookup.rewrite_url info.url in
4343+4444+ (* Determine the ref to use: explicit > override > default *)
4545+ let branch = match info.branch with
4646+ | Some b -> b
4747+ | None ->
4848+ match Git_repo_lookup.branch_override ~name:repo_name ~url with
4949+ | Some b -> b
5050+ | None -> Git.ls_remote_default_branch ~proc_mgr ~cwd:git ~url
5151+ in
5252+5353+ (* Fetch - either via cache or directly *)
5454+ let ref_point = match cache with
5555+ | Some cache_path ->
5656+ (* Fetch through vendor cache *)
5757+ Vendor_cache.fetch_to_project ~proc_mgr
5858+ ~cache:cache_path ~project_git:git ~url ~branch
5959+ | None ->
6060+ (* Direct fetch (with tags to support version tags) *)
6161+ let remote = "origin-" ^ repo_name in
6262+ ignore (Git.ensure_remote ~proc_mgr ~cwd:git ~name:remote ~url);
6363+ Git.fetch_with_tags ~proc_mgr ~cwd:git ~remote;
6464+ Git.resolve_branch_or_tag ~proc_mgr ~cwd:git ~remote ~ref_name:branch
6565+ in
6666+6767+ (* Step 1: Create upstream branch from fetched ref *)
6868+ Git.branch_force ~proc_mgr ~cwd:git
6969+ ~name:(upstream_branch repo_name) ~point:ref_point;
7070+7171+ (* Step 2: Create vendor branch from upstream and rewrite history *)
7272+ Git.branch_force ~proc_mgr ~cwd:git
7373+ ~name:(vendor_branch repo_name) ~point:(upstream_branch repo_name);
7474+7575+ (* If subdir is specified, we first filter to that subdirectory,
7676+ then move to vendor path. Otherwise, just move to vendor path. *)
7777+ (match info.subdir with
7878+ | Some subdir ->
7979+ (* First filter to extract only the subdirectory *)
8080+ Git.filter_repo_to_subdirectory ~proc_mgr ~cwd:git
8181+ ~branch:(vendor_branch repo_name)
8282+ ~subdirectory:subdir;
8383+ (* Now the subdir is at root, rewrite to vendor path *)
8484+ Git.filter_repo_to_subdirectory ~proc_mgr ~cwd:git
8585+ ~branch:(vendor_branch repo_name)
8686+ ~subdirectory:(vendor_path repo_name)
8787+ | None ->
8888+ (* Rewrite vendor branch history to move all files into vendor/git/<name>/ *)
8989+ Git.filter_repo_to_subdirectory ~proc_mgr ~cwd:git
9090+ ~branch:(vendor_branch repo_name)
9191+ ~subdirectory:(vendor_path repo_name));
9292+9393+ (* Get the vendor SHA after rewriting *)
9494+ let vendor_sha = match Git.rev_parse ~proc_mgr ~cwd:git (vendor_branch repo_name) with
9595+ | Some sha -> sha
9696+ | None -> failwith "Vendor branch not found after filter-repo"
9797+ in
9898+9999+ (* Step 3: Create patches branch from vendor *)
100100+ Git.branch_create ~proc_mgr ~cwd:git
101101+ ~name:(patches_branch repo_name)
102102+ ~start_point:(vendor_branch repo_name);
103103+104104+ Backend.Added { name = repo_name; sha = vendor_sha }
105105+ end
106106+ with exn ->
107107+ (* Cleanup on failure *)
108108+ (try Worktree.remove_force ~proc_mgr root (upstream_kind repo_name) with _ -> ());
109109+ (try Worktree.remove_force ~proc_mgr root (vendor_kind repo_name) with _ -> ());
110110+ Backend.Failed { name = repo_name; error = Printexc.to_string exn }
111111+112112+let copy_with_prefix ~src_dir ~dst_dir ~prefix =
113113+ (* Recursively copy files from src_dir to dst_dir/prefix/ *)
114114+ let prefix_dir = Eio.Path.(dst_dir / prefix) in
115115+ Eio.Path.mkdirs ~exists_ok:true ~perm:0o755 prefix_dir;
116116+117117+ let rec copy_dir src dst =
118118+ Eio.Path.read_dir src |> List.iter (fun name ->
119119+ let src_path = Eio.Path.(src / name) in
120120+ let dst_path = Eio.Path.(dst / name) in
121121+ if Eio.Path.is_directory src_path then begin
122122+ Eio.Path.mkdirs ~exists_ok:true ~perm:0o755 dst_path;
123123+ copy_dir src_path dst_path
124124+ end else begin
125125+ let content = Eio.Path.load src_path in
126126+ Eio.Path.save ~create:(`Or_truncate 0o644) dst_path content
127127+ end
128128+ )
129129+ in
130130+131131+ (* Copy everything except .git *)
132132+ Eio.Path.read_dir src_dir |> List.iter (fun name ->
133133+ if name <> ".git" then begin
134134+ let src_path = Eio.Path.(src_dir / name) in
135135+ let dst_path = Eio.Path.(prefix_dir / name) in
136136+ if Eio.Path.is_directory src_path then begin
137137+ Eio.Path.mkdirs ~exists_ok:true ~perm:0o755 dst_path;
138138+ copy_dir src_path dst_path
139139+ end else begin
140140+ let content = Eio.Path.load src_path in
141141+ Eio.Path.save ~create:(`Or_truncate 0o644) dst_path content
142142+ end
143143+ end
144144+ )
145145+146146+let update_repo ~proc_mgr ~root ?cache repo_name =
147147+ let git = Worktree.git_dir root in
148148+149149+ try
150150+ (* Check if repo exists *)
151151+ if not (Worktree.branch_exists ~proc_mgr root (patches_kind repo_name)) then
152152+ Backend.Update_failed { name = repo_name; error = "Repository not vendored" }
153153+ else begin
154154+ (* Get remote URL *)
155155+ let remote = "origin-" ^ repo_name in
156156+ let url = match Git.remote_url ~proc_mgr ~cwd:git remote with
157157+ | Some u -> u
158158+ | None -> failwith ("Remote not found: " ^ remote)
159159+ in
160160+161161+ (* Fetch latest - either via cache or directly (with tags for completeness) *)
162162+ (match cache with
163163+ | Some cache_path ->
164164+ let branch = Git.ls_remote_default_branch ~proc_mgr ~cwd:git ~url in
165165+ ignore (Vendor_cache.fetch_to_project ~proc_mgr
166166+ ~cache:cache_path ~project_git:git ~url ~branch)
167167+ | None ->
168168+ Git.fetch_with_tags ~proc_mgr ~cwd:git ~remote);
169169+170170+ (* Get old SHA *)
171171+ let old_sha = match Git.rev_parse ~proc_mgr ~cwd:git (upstream_branch repo_name) with
172172+ | Some sha -> sha
173173+ | None -> failwith "Upstream branch not found"
174174+ in
175175+176176+ (* Determine default branch and update upstream *)
177177+ let default_branch = Git.ls_remote_default_branch ~proc_mgr ~cwd:git ~url in
178178+ let ref_point = remote ^ "/" ^ default_branch in
179179+ Git.branch_force ~proc_mgr ~cwd:git
180180+ ~name:(upstream_branch repo_name) ~point:ref_point;
181181+182182+ (* Get new SHA *)
183183+ let new_sha = match Git.rev_parse ~proc_mgr ~cwd:git (upstream_branch repo_name) with
184184+ | Some sha -> sha
185185+ | None -> failwith "Upstream branch not found"
186186+ in
187187+188188+ if old_sha = new_sha then
189189+ Backend.No_changes repo_name
190190+ else begin
191191+ (* Create worktrees *)
192192+ Worktree.ensure ~proc_mgr root (upstream_kind repo_name);
193193+ Worktree.ensure ~proc_mgr root (vendor_kind repo_name);
194194+195195+ let upstream_wt = Worktree.path root (upstream_kind repo_name) in
196196+ let vendor_wt = Worktree.path root (vendor_kind repo_name) in
197197+198198+ (* Clear vendor content and copy new *)
199199+ let vendor_pkg_path = Eio.Path.(vendor_wt / "vendor" / "git" / repo_name) in
200200+ (try Eio.Path.rmtree vendor_pkg_path with _ -> ());
201201+202202+ copy_with_prefix
203203+ ~src_dir:upstream_wt
204204+ ~dst_dir:vendor_wt
205205+ ~prefix:(vendor_path repo_name);
206206+207207+ (* Commit *)
208208+ Git.add_all ~proc_mgr ~cwd:vendor_wt;
209209+ Git.commit ~proc_mgr ~cwd:vendor_wt
210210+ ~message:(Printf.sprintf "Update %s to %s" repo_name (String.sub new_sha 0 7));
211211+212212+ (* Cleanup *)
213213+ Worktree.remove ~proc_mgr root (upstream_kind repo_name);
214214+ Worktree.remove ~proc_mgr root (vendor_kind repo_name);
215215+216216+ Backend.Updated { name = repo_name; old_sha; new_sha }
217217+ end
218218+ end
219219+ with exn ->
220220+ (try Worktree.remove_force ~proc_mgr root (upstream_kind repo_name) with _ -> ());
221221+ (try Worktree.remove_force ~proc_mgr root (vendor_kind repo_name) with _ -> ());
222222+ Backend.Update_failed { name = repo_name; error = Printexc.to_string exn }
223223+224224+let list_repos ~proc_mgr ~root =
225225+ Worktree.list_git_repos ~proc_mgr root
226226+227227+let remove_repo ~proc_mgr ~root repo_name =
228228+ let git = Worktree.git_dir root in
229229+230230+ (* Remove worktrees if exist *)
231231+ (try Worktree.remove_force ~proc_mgr root (upstream_kind repo_name) with _ -> ());
232232+ (try Worktree.remove_force ~proc_mgr root (vendor_kind repo_name) with _ -> ());
233233+ (try Worktree.remove_force ~proc_mgr root (patches_kind repo_name) with _ -> ());
234234+235235+ (* Delete branches *)
236236+ (try Git.run_exn ~proc_mgr ~cwd:git ["branch"; "-D"; upstream_branch repo_name] |> ignore with _ -> ());
237237+ (try Git.run_exn ~proc_mgr ~cwd:git ["branch"; "-D"; vendor_branch repo_name] |> ignore with _ -> ());
238238+ (try Git.run_exn ~proc_mgr ~cwd:git ["branch"; "-D"; patches_branch repo_name] |> ignore with _ -> ());
239239+240240+ (* Remove remote *)
241241+ let remote = "origin-" ^ repo_name in
242242+ (try Git.run_exn ~proc_mgr ~cwd:git ["remote"; "remove"; remote] |> ignore with _ -> ())
+70
lib/git_backend.mli
···11+(** Git backend for direct repository vendoring.
22+33+ Implements vendoring of arbitrary git repositories using the three-tier branch model:
44+ - git/upstream/<name> - pristine upstream code
55+ - git/vendor/<name> - upstream history rewritten with vendor/git/<name>/ prefix
66+ - git/patches/<name> - local modifications
77+88+ Unlike the opam backend which discovers packages via opam repositories,
99+ this backend allows cloning any git repository directly. *)
1010+1111+(** {1 Branch Naming} *)
1212+1313+val upstream_branch : string -> string
1414+(** [upstream_branch name] returns the upstream branch name "git/upstream/<name>". *)
1515+1616+val vendor_branch : string -> string
1717+(** [vendor_branch name] returns the vendor branch name "git/vendor/<name>". *)
1818+1919+val patches_branch : string -> string
2020+(** [patches_branch name] returns the patches branch name "git/patches/<name>". *)
2121+2222+val vendor_path : string -> string
2323+(** [vendor_path name] returns the vendor directory path "vendor/git/<name>". *)
2424+2525+(** {1 Repository Info} *)
2626+2727+type repo_info = {
2828+ name : string; (** User-specified name *)
2929+ url : string; (** Git URL to clone from *)
3030+ branch : string option; (** Optional branch/tag to track *)
3131+ subdir : string option; (** Optional subdirectory to extract *)
3232+}
3333+3434+(** {1 Repository Operations} *)
3535+3636+val add_repo :
3737+ proc_mgr:Git.proc_mgr ->
3838+ root:Worktree.root ->
3939+ ?cache:Vendor_cache.t ->
4040+ repo_info ->
4141+ Backend.add_result
4242+(** [add_repo ~proc_mgr ~root ?cache info] vendors a git repository.
4343+4444+ Creates the three-tier branch structure:
4545+ 1. Fetches from url into git/upstream/<name>
4646+ 2. Rewrites history into git/vendor/<name> with vendor/git/<name>/ prefix
4747+ 3. Creates git/patches/<name> for local modifications
4848+4949+ If [subdir] is specified, only that subdirectory is extracted from the repo. *)
5050+5151+val update_repo :
5252+ proc_mgr:Git.proc_mgr ->
5353+ root:Worktree.root ->
5454+ ?cache:Vendor_cache.t ->
5555+ string ->
5656+ Backend.update_result
5757+(** [update_repo ~proc_mgr ~root ?cache name] updates a vendored repository from upstream. *)
5858+5959+val list_repos :
6060+ proc_mgr:Git.proc_mgr ->
6161+ root:Worktree.root ->
6262+ string list
6363+(** [list_repos ~proc_mgr ~root] returns names of all vendored git repositories. *)
6464+6565+val remove_repo :
6666+ proc_mgr:Git.proc_mgr ->
6767+ root:Worktree.root ->
6868+ string ->
6969+ unit
7070+(** [remove_repo ~proc_mgr ~root name] removes a vendored repository. *)
+73
lib/git_repo_lookup.ml
···11+(** Git repository URL lookup and rewriting.
22+33+ This module handles URL rewriting for git repositories, mapping known
44+ slow upstream URLs to faster mirrors, and branch/tag overrides for
55+ specific packages. *)
66+77+(** Rewrite a git URL to use a faster mirror if available.
88+99+ Currently handles:
1010+ - erratique.ch repos are mirrored on GitHub under dbuenzli
1111+ - git.robur.coop repos are mirrored on GitHub under robur-coop
1212+ (strips the org prefix: git.robur.coop/robur/X -> github.com/robur-coop/X) *)
1313+let rewrite_url url =
1414+ (* Helper to check and rewrite prefix *)
1515+ let try_rewrite ~prefix ~replacement url =
1616+ if String.length url > String.length prefix
1717+ && String.sub url 0 (String.length prefix) = prefix
1818+ then
1919+ let rest = String.sub url (String.length prefix)
2020+ (String.length url - String.length prefix) in
2121+ Some (replacement ^ rest)
2222+ else None
2323+ in
2424+ (* Helper to rewrite robur.coop URLs, stripping the org path component *)
2525+ let try_rewrite_robur ~prefix url =
2626+ if String.length url > String.length prefix
2727+ && String.sub url 0 (String.length prefix) = prefix
2828+ then
2929+ (* rest is e.g. "robur/ohex.git" - strip org prefix *)
3030+ let rest = String.sub url (String.length prefix)
3131+ (String.length url - String.length prefix) in
3232+ (* Find the first slash to strip the org *)
3333+ match String.index_opt rest '/' with
3434+ | Some idx ->
3535+ let repo = String.sub rest (idx + 1) (String.length rest - idx - 1) in
3636+ Some ("https://github.com/robur-coop/" ^ repo)
3737+ | None -> Some ("https://github.com/robur-coop/" ^ rest)
3838+ else None
3939+ in
4040+ (* Try each rewrite rule in order *)
4141+ match try_rewrite ~prefix:"https://erratique.ch/repos/"
4242+ ~replacement:"https://github.com/dbuenzli/" url with
4343+ | Some u -> u
4444+ | None ->
4545+ match try_rewrite ~prefix:"http://erratique.ch/repos/"
4646+ ~replacement:"https://github.com/dbuenzli/" url with
4747+ | Some u -> u
4848+ | None ->
4949+ match try_rewrite_robur ~prefix:"https://git.robur.coop/" url with
5050+ | Some u -> u
5151+ | None ->
5252+ match try_rewrite_robur ~prefix:"git://git.robur.coop/" url with
5353+ | Some u -> u
5454+ | None -> url
5555+5656+(** Override branch/tag for specific packages.
5757+5858+ Some packages have unstable main branches or we want to pin to specific
5959+ versions. This returns Some ref if an override exists, None otherwise.
6060+6161+ Currently handles:
6262+ - dune: use tag 3.20.2 instead of main branch *)
6363+let branch_override ~name ~url =
6464+ (* Dune's main branch can be unstable; pin to release tag *)
6565+ let is_dune_url =
6666+ String.equal url "https://github.com/ocaml/dune.git" ||
6767+ String.equal url "https://github.com/ocaml/dune" ||
6868+ String.equal url "git://github.com/ocaml/dune.git"
6969+ in
7070+ if name = "dune" || is_dune_url then
7171+ Some "3.20.2"
7272+ else
7373+ None
+164
lib/init.ml
···11+(** Project initialization for unpac.
22+33+ Creates the bare repository structure and initial main worktree. *)
44+55+let default_unpac_toml = {|[opam]
66+repositories = []
77+# compiler = "5.4.0"
88+99+# Vendor cache location (default: XDG cache directory)
1010+# vendor_cache = "/path/to/cache"
1111+1212+[projects]
1313+# Projects will be added here
1414+|}
1515+1616+let project_dune_project name = Printf.sprintf {|(lang dune 3.20)
1717+(name %s)
1818+|} name
1919+2020+let project_dune = {|(vendored_dirs vendor)
2121+|}
2222+2323+let project_gitignore = {|_build/
2424+*.install
2525+|}
2626+2727+let vendor_dune = {|(vendored_dirs opam)
2828+|}
2929+3030+(** Initialize a new unpac project at the given path. *)
3131+let init ~proc_mgr ~fs path =
3232+ (* Convert relative paths to absolute *)
3333+ let abs_path =
3434+ if Filename.is_relative path then
3535+ Filename.concat (Sys.getcwd ()) path
3636+ else path
3737+ in
3838+ let root = Eio.Path.(fs / abs_path) in
3939+4040+ (* Create root directory *)
4141+ Eio.Path.mkdirs ~exists_ok:false ~perm:0o755 root;
4242+4343+ (* Initialize bare repository *)
4444+ let git_path = Eio.Path.(root / "git") in
4545+ Eio.Path.mkdirs ~exists_ok:false ~perm:0o755 git_path;
4646+ Git.run_exn ~proc_mgr ~cwd:git_path ["init"; "--bare"] |> ignore;
4747+4848+ (* Create initial main branch with unpac.toml *)
4949+ (* First create a temporary worktree to make the initial commit *)
5050+ let main_path = Eio.Path.(root / "main") in
5151+ Eio.Path.mkdirs ~exists_ok:false ~perm:0o755 main_path;
5252+5353+ (* Initialize as a regular repo temporarily to create first commit *)
5454+ Git.run_exn ~proc_mgr ~cwd:main_path ["init"] |> ignore;
5555+5656+ (* Write unpac.toml *)
5757+ Eio.Path.save ~create:(`Or_truncate 0o644)
5858+ Eio.Path.(main_path / "unpac.toml")
5959+ default_unpac_toml;
6060+6161+ (* Create initial commit *)
6262+ Git.run_exn ~proc_mgr ~cwd:main_path ["add"; "unpac.toml"] |> ignore;
6363+ Git.run_exn ~proc_mgr ~cwd:main_path
6464+ ["commit"; "-m"; "Initial commit"] |> ignore;
6565+6666+ (* Rename branch to main if needed *)
6767+ Git.run_exn ~proc_mgr ~cwd:main_path ["branch"; "-M"; "main"] |> ignore;
6868+6969+ (* Push to bare repo and convert to worktree *)
7070+ Git.run_exn ~proc_mgr ~cwd:main_path
7171+ ["remote"; "add"; "origin"; "../git"] |> ignore;
7272+ Git.run_exn ~proc_mgr ~cwd:main_path
7373+ ["push"; "-u"; "origin"; "main"] |> ignore;
7474+7575+ (* Remove the temporary clone and add main as a worktree of the bare repo *)
7676+ Eio.Path.rmtree main_path;
7777+7878+ (* Add main as a worktree of the bare repo *)
7979+ Git.run_exn ~proc_mgr ~cwd:git_path
8080+ ["worktree"; "add"; "../main"; "main"] |> ignore;
8181+8282+ root
8383+8484+(** Check if a path is an unpac project root. *)
8585+let is_unpac_root path =
8686+ Eio.Path.is_directory Eio.Path.(path / "git") &&
8787+ Eio.Path.is_directory Eio.Path.(path / "main") &&
8888+ Eio.Path.is_file Eio.Path.(path / "main" / "unpac.toml")
8989+9090+(** Find the unpac root by walking up from current directory. *)
9191+let find_root ~fs ~cwd =
9292+ let rec go path =
9393+ if is_unpac_root path then Some path
9494+ else match Eio.Path.split path with
9595+ | Some (parent, _) -> go parent
9696+ | None -> None
9797+ in
9898+ go Eio.Path.(fs / cwd)
9999+100100+(** Create a new project branch with template. *)
101101+let create_project ~proc_mgr root name =
102102+ let project_path = Worktree.path root (Project name) in
103103+104104+ (* Ensure project directory parent exists *)
105105+ let project_dir = Eio.Path.(root / "project") in
106106+ Eio.Path.mkdirs ~exists_ok:true ~perm:0o755 project_dir;
107107+108108+ (* Create orphan branch *)
109109+ Worktree.ensure_orphan ~proc_mgr root (Project name);
110110+111111+ (* Write template files *)
112112+ Eio.Path.save ~create:(`Or_truncate 0o644)
113113+ Eio.Path.(project_path / "dune-project")
114114+ (project_dune_project name);
115115+116116+ Eio.Path.save ~create:(`Or_truncate 0o644)
117117+ Eio.Path.(project_path / "dune")
118118+ project_dune;
119119+120120+ Eio.Path.save ~create:(`Or_truncate 0o644)
121121+ Eio.Path.(project_path / ".gitignore")
122122+ project_gitignore;
123123+124124+ (* Create vendor directory structure with dune file *)
125125+ Eio.Path.mkdirs ~exists_ok:true ~perm:0o755
126126+ Eio.Path.(project_path / "vendor" / "opam");
127127+128128+ Eio.Path.save ~create:(`Or_truncate 0o644)
129129+ Eio.Path.(project_path / "vendor" / "dune")
130130+ vendor_dune;
131131+132132+ (* Commit template *)
133133+ Git.run_exn ~proc_mgr ~cwd:project_path ["add"; "-A"] |> ignore;
134134+ Git.run_exn ~proc_mgr ~cwd:project_path
135135+ ["commit"; "-m"; "Initialize project " ^ name] |> ignore;
136136+137137+ (* Update main/unpac.toml to register project *)
138138+ let main_path = Worktree.path root Main in
139139+ let toml_path = Eio.Path.(main_path / "unpac.toml") in
140140+ let content = Eio.Path.load toml_path in
141141+142142+ (* Simple append to [projects] section - a proper implementation would parse TOML *)
143143+ let updated =
144144+ if content = "" || not (String.ends_with ~suffix:"\n" content)
145145+ then content ^ "\n" ^ name ^ " = {}\n"
146146+ else content ^ name ^ " = {}\n"
147147+ in
148148+ Eio.Path.save ~create:(`Or_truncate 0o644) toml_path updated;
149149+150150+ Git.run_exn ~proc_mgr ~cwd:main_path ["add"; "unpac.toml"] |> ignore;
151151+ Git.run_exn ~proc_mgr ~cwd:main_path
152152+ ["commit"; "-m"; "Add project " ^ name] |> ignore;
153153+154154+ project_path
155155+156156+(** Remove a project branch and worktree. *)
157157+let remove_project ~proc_mgr root name =
158158+ (* Remove worktree if exists *)
159159+ Worktree.remove_force ~proc_mgr root (Project name);
160160+161161+ (* Delete the branch *)
162162+ let git = Worktree.git_dir root in
163163+ let branch = Worktree.branch (Project name) in
164164+ Git.run_exn ~proc_mgr ~cwd:git ["branch"; "-D"; branch] |> ignore
+44
lib/init.mli
···11+(** Project initialization for unpac.
22+33+ Creates the bare repository structure and initial main worktree. *)
44+55+val init :
66+ proc_mgr:Git.proc_mgr ->
77+ fs:Eio.Fs.dir_ty Eio.Path.t ->
88+ string ->
99+ Worktree.root
1010+(** [init ~proc_mgr ~fs path] creates a new unpac project at [path].
1111+1212+ Creates:
1313+ - [path/git/] - bare git repository
1414+ - [path/main/] - worktree for main branch with unpac.toml *)
1515+1616+val is_unpac_root : Eio.Fs.dir_ty Eio.Path.t -> bool
1717+(** [is_unpac_root path] checks if [path] is an unpac project root. *)
1818+1919+val find_root :
2020+ fs:Eio.Fs.dir_ty Eio.Path.t ->
2121+ cwd:string ->
2222+ Worktree.root option
2323+(** [find_root ~fs ~cwd] walks up from [cwd] to find the unpac root. *)
2424+2525+val create_project :
2626+ proc_mgr:Git.proc_mgr ->
2727+ Worktree.root ->
2828+ string ->
2929+ Eio.Fs.dir_ty Eio.Path.t
3030+(** [create_project ~proc_mgr root name] creates a new project branch.
3131+3232+ Creates orphan branch [project/<name>] with template:
3333+ - dune-project (lang dune 3.20)
3434+ - dune with (vendored_dirs vendor)
3535+ - vendor/opam/ directory
3636+3737+ Updates main/unpac.toml to register the project. *)
3838+3939+val remove_project :
4040+ proc_mgr:Git.proc_mgr ->
4141+ Worktree.root ->
4242+ string ->
4343+ unit
4444+(** [remove_project ~proc_mgr root name] removes a project branch and worktree. *)
+304
lib/monorepo.ml
···11+(** Monorepo export: create a standalone buildable directory from unpac workspace.
22+33+ Combines all projects and their vendored dependencies into a single directory
44+ structure suitable for building with dune. No git history is included. *)
55+66+let src = Logs.Src.create "unpac.monorepo" ~doc:"Monorepo export"
77+module Log = (val Logs.src_log src : Logs.LOG)
88+99+type export_config = {
1010+ output_dir : string;
1111+ projects : string list option; (** None = all projects *)
1212+ include_opam : bool;
1313+ include_git : bool;
1414+}
1515+1616+type export_result = {
1717+ projects_exported : string list;
1818+ opam_packages : string list;
1919+ git_repos : string list;
2020+ output_path : string;
2121+}
2222+2323+let default_config ~output_dir = {
2424+ output_dir;
2525+ projects = None;
2626+ include_opam = true;
2727+ include_git = true;
2828+}
2929+3030+(* Copy a directory tree recursively, excluding .git and _build *)
3131+let rec copy_tree ~src ~dst =
3232+ if Sys.is_directory src then begin
3333+ if not (Sys.file_exists dst) then
3434+ Unix.mkdir dst 0o755;
3535+ let entries = Sys.readdir src in
3636+ Array.iter (fun name ->
3737+ if name <> ".git" && name <> "_build" then begin
3838+ let src_path = Filename.concat src name in
3939+ let dst_path = Filename.concat dst name in
4040+ copy_tree ~src:src_path ~dst:dst_path
4141+ end
4242+ ) entries
4343+ end else begin
4444+ (* Copy file *)
4545+ let content = In_channel.with_open_bin src In_channel.input_all in
4646+ Out_channel.with_open_bin dst (fun oc ->
4747+ Out_channel.output_string oc content)
4848+ end
4949+5050+(* Remove a directory tree recursively *)
5151+let rec remove_tree path =
5252+ if Sys.file_exists path then begin
5353+ if Sys.is_directory path then begin
5454+ Array.iter (fun name ->
5555+ remove_tree (Filename.concat path name)
5656+ ) (Sys.readdir path);
5757+ Unix.rmdir path
5858+ end else
5959+ Sys.remove path
6060+ end
6161+6262+(* Export files from a git branch to a directory (without git history) *)
6363+let export_branch_to_dir ~proc_mgr ~git_dir ~branch ~output_dir =
6464+ Log.info (fun m -> m "Exporting branch %s to %s" branch output_dir);
6565+ let temp_dir = Filename.temp_dir "unpac-export" "" in
6666+ begin
6767+ try
6868+ (* Check if branch exists *)
6969+ let branch_exists =
7070+ match Git.rev_parse ~proc_mgr ~cwd:git_dir branch with
7171+ | Some _ -> true
7272+ | None -> false
7373+ in
7474+ if not branch_exists then begin
7575+ Log.warn (fun m -> m "Branch %s does not exist, skipping" branch);
7676+ false
7777+ end else begin
7878+ (* Create temporary worktree *)
7979+ Git.run_exn ~proc_mgr ~cwd:git_dir
8080+ ["worktree"; "add"; "--detach"; temp_dir; branch]
8181+ |> ignore;
8282+ (* Copy files to output *)
8383+ if not (Sys.file_exists output_dir) then
8484+ Unix.mkdir output_dir 0o755;
8585+ copy_tree ~src:temp_dir ~dst:output_dir;
8686+ (* Remove worktree *)
8787+ Git.run_exn ~proc_mgr ~cwd:git_dir
8888+ ["worktree"; "remove"; "--force"; temp_dir]
8989+ |> ignore;
9090+ true
9191+ end
9292+ with exn ->
9393+ (* Clean up on error *)
9494+ (try
9595+ Git.run_exn ~proc_mgr ~cwd:git_dir
9696+ ["worktree"; "remove"; "--force"; temp_dir]
9797+ |> ignore
9898+ with _ -> ());
9999+ (try remove_tree temp_dir with _ -> ());
100100+ raise exn
101101+ end
102102+103103+(* Export a project, stripping its vendor/ directory *)
104104+let export_project ~proc_mgr ~git_dir ~project ~output_dir =
105105+ let branch = "project/" ^ project in
106106+ let project_dir = Filename.concat output_dir project in
107107+ Log.info (fun m -> m "Exporting project %s" project);
108108+109109+ if export_branch_to_dir ~proc_mgr ~git_dir ~branch ~output_dir:project_dir then begin
110110+ (* Remove the vendor/ directory from exported project - deps go in root vendor/ *)
111111+ let vendor_dir = Filename.concat project_dir "vendor" in
112112+ if Sys.file_exists vendor_dir then begin
113113+ Log.info (fun m -> m "Removing vendor/ from project %s (will use root vendor/)" project);
114114+ remove_tree vendor_dir
115115+ end;
116116+ true
117117+ end else
118118+ false
119119+120120+(* Export an opam package from patches branch *)
121121+let export_opam_package ~proc_mgr ~git_dir ~package ~vendor_dir =
122122+ let branch = "opam/patches/" ^ package in
123123+ let package_dir = Filename.concat (Filename.concat vendor_dir "opam") package in
124124+ Log.info (fun m -> m "Exporting opam package %s" package);
125125+ export_branch_to_dir ~proc_mgr ~git_dir ~branch ~output_dir:package_dir
126126+127127+(* Export a git repo from patches branch *)
128128+let export_git_repo ~proc_mgr ~git_dir ~repo ~vendor_dir =
129129+ let branch = "git-repos/patches/" ^ repo in
130130+ let repo_dir = Filename.concat (Filename.concat vendor_dir "git") repo in
131131+ Log.info (fun m -> m "Exporting git repo %s" repo);
132132+ export_branch_to_dir ~proc_mgr ~git_dir ~branch ~output_dir:repo_dir
133133+134134+(* Generate root dune-project file *)
135135+let generate_dune_project ~output_dir ~projects =
136136+ let content = Printf.sprintf
137137+{|(lang dune 3.0)
138138+(name unpac-monorepo)
139139+140140+; Combined monorepo from unpac workspace
141141+; Projects: %s
142142+143143+(generate_opam_files false)
144144+|}
145145+ (String.concat ", " projects)
146146+ in
147147+ let path = Filename.concat output_dir "dune-project" in
148148+ Out_channel.with_open_bin path (fun oc ->
149149+ Out_channel.output_string oc content)
150150+151151+(* Generate root dune file with vendored_dirs and includes *)
152152+let generate_root_dune ~output_dir ~projects ~has_opam ~has_git =
153153+ let vendor_stanzas =
154154+ if has_opam || has_git then
155155+ "(vendored_dirs vendor)\n"
156156+ else ""
157157+ in
158158+ (* Simple root dune - projects are subdirectories *)
159159+ let content = Printf.sprintf
160160+{|; Root dune file for unpac monorepo
161161+; Auto-generated - do not edit
162162+; Projects: %s
163163+164164+%s|}
165165+ (String.concat ", " projects)
166166+ vendor_stanzas
167167+ in
168168+ let path = Filename.concat output_dir "dune" in
169169+ Out_channel.with_open_bin path (fun oc ->
170170+ Out_channel.output_string oc content)
171171+172172+(* Generate vendor/dune file *)
173173+let generate_vendor_dune ~vendor_dir ~has_opam ~has_git =
174174+ let subdirs =
175175+ (if has_opam then ["opam"] else []) @
176176+ (if has_git then ["git"] else [])
177177+ in
178178+ if subdirs <> [] then begin
179179+ let content = Printf.sprintf
180180+{|; Vendor dune file for unpac monorepo
181181+(vendored_dirs %s)
182182+|}
183183+ (String.concat " " subdirs)
184184+ in
185185+ let path = Filename.concat vendor_dir "dune" in
186186+ Out_channel.with_open_bin path (fun oc ->
187187+ Out_channel.output_string oc content)
188188+ end
189189+190190+(* Update project dune files to reference parent vendor/ *)
191191+let update_project_dune ~project_dir =
192192+ let dune_path = Filename.concat project_dir "dune" in
193193+ if Sys.file_exists dune_path then begin
194194+ let content = In_channel.with_open_bin dune_path In_channel.input_all in
195195+ (* Remove local vendored_dirs since we use root-level vendor/ *)
196196+ if String.length content > 0 then begin
197197+ let lines = String.split_on_char '\n' content in
198198+ let filtered = List.filter (fun line ->
199199+ let trimmed = String.trim line in
200200+ not (String.length trimmed >= 14 &&
201201+ String.sub trimmed 0 14 = "(vendored_dirs")
202202+ ) lines in
203203+ let updated = String.concat "\n" filtered in
204204+ Out_channel.with_open_bin dune_path (fun oc ->
205205+ Out_channel.output_string oc updated)
206206+ end
207207+ end
208208+209209+(* Main export function *)
210210+let export ~proc_mgr ~root ~config =
211211+ let git_dir = Worktree.git_dir root in
212212+213213+ (* Create output directory *)
214214+ if not (Sys.file_exists config.output_dir) then
215215+ Unix.mkdir config.output_dir 0o755;
216216+217217+ (* Get list of projects to export *)
218218+ let all_projects = Worktree.list_projects ~proc_mgr root in
219219+ let projects = match config.projects with
220220+ | Some ps -> List.filter (fun p -> List.mem p all_projects) ps
221221+ | None -> all_projects
222222+ in
223223+224224+ if projects = [] then begin
225225+ Log.warn (fun m -> m "No projects to export");
226226+ { projects_exported = []; opam_packages = []; git_repos = [];
227227+ output_path = config.output_dir }
228228+ end else begin
229229+ Log.info (fun m -> m "Exporting %d projects: %s"
230230+ (List.length projects) (String.concat ", " projects));
231231+232232+ (* Export each project *)
233233+ let exported_projects = List.filter_map (fun project ->
234234+ if export_project ~proc_mgr ~git_dir ~project ~output_dir:config.output_dir then
235235+ Some project
236236+ else
237237+ None
238238+ ) projects in
239239+240240+ (* Create vendor directory *)
241241+ let vendor_dir = Filename.concat config.output_dir "vendor" in
242242+ if not (Sys.file_exists vendor_dir) then
243243+ Unix.mkdir vendor_dir 0o755;
244244+245245+ (* Export opam packages *)
246246+ let opam_packages =
247247+ if config.include_opam then begin
248248+ let all_opam = Worktree.list_opam_packages ~proc_mgr root in
249249+ Log.info (fun m -> m "Exporting %d opam packages" (List.length all_opam));
250250+ (* Create opam subdirectory *)
251251+ let opam_dir = Filename.concat vendor_dir "opam" in
252252+ if not (Sys.file_exists opam_dir) && all_opam <> [] then
253253+ Unix.mkdir opam_dir 0o755;
254254+ List.filter_map (fun pkg ->
255255+ if export_opam_package ~proc_mgr ~git_dir ~package:pkg ~vendor_dir then
256256+ Some pkg
257257+ else
258258+ None
259259+ ) all_opam
260260+ end else []
261261+ in
262262+263263+ (* Export git repos *)
264264+ let git_repos =
265265+ if config.include_git then begin
266266+ let all_git = Git_backend.list_repos ~proc_mgr ~root in
267267+ Log.info (fun m -> m "Exporting %d git repos" (List.length all_git));
268268+ (* Create git subdirectory *)
269269+ let git_subdir = Filename.concat vendor_dir "git" in
270270+ if not (Sys.file_exists git_subdir) && all_git <> [] then
271271+ Unix.mkdir git_subdir 0o755;
272272+ List.filter_map (fun repo ->
273273+ if export_git_repo ~proc_mgr ~git_dir ~repo ~vendor_dir then
274274+ Some repo
275275+ else
276276+ None
277277+ ) all_git
278278+ end else []
279279+ in
280280+281281+ (* Generate dune files *)
282282+ let has_opam = opam_packages <> [] in
283283+ let has_git = git_repos <> [] in
284284+285285+ generate_dune_project ~output_dir:config.output_dir ~projects:exported_projects;
286286+ generate_root_dune ~output_dir:config.output_dir ~projects:exported_projects
287287+ ~has_opam ~has_git;
288288+289289+ if has_opam || has_git then
290290+ generate_vendor_dune ~vendor_dir ~has_opam ~has_git;
291291+292292+ (* Update project dune files *)
293293+ List.iter (fun project ->
294294+ let project_dir = Filename.concat config.output_dir project in
295295+ update_project_dune ~project_dir
296296+ ) exported_projects;
297297+298298+ Log.info (fun m -> m "Monorepo export complete: %s" config.output_dir);
299299+300300+ { projects_exported = exported_projects;
301301+ opam_packages;
302302+ git_repos;
303303+ output_path = config.output_dir }
304304+ end
+79
lib/monorepo.mli
···11+(** Monorepo export: create a standalone buildable directory from unpac workspace.
22+33+ Combines all projects and their vendored dependencies into a single directory
44+ structure suitable for building with dune. No git history is included.
55+66+ {1 Output Structure}
77+88+ The exported monorepo has this structure:
99+ {v
1010+ output/
1111+ ├── dune-project # Combined project metadata
1212+ ├── dune # Root dune with vendored_dirs
1313+ ├── project1/ # First project
1414+ │ ├── src/
1515+ │ ├── dune
1616+ │ └── dune-project
1717+ ├── project2/ # Second project
1818+ │ └── ...
1919+ └── vendor/ # All vendored dependencies
2020+ ├── dune # (vendored_dirs opam git)
2121+ ├── opam/
2222+ │ ├── astring/
2323+ │ ├── eio/
2424+ │ └── ...
2525+ └── git/
2626+ ├── mylib/
2727+ └── ...
2828+ v}
2929+3030+ {1 Usage}
3131+3232+ {v
3333+ unpac monorepo /path/to/output
3434+ unpac monorepo -p myproject /path/to/output # single project
3535+ unpac monorepo --no-opam /path/to/output # skip opam packages
3636+ v}
3737+3838+ The output can be built directly with [dune build] or [dune build @doc].
3939+*)
4040+4141+(** {1 Configuration} *)
4242+4343+type export_config = {
4444+ output_dir : string; (** Target directory for export *)
4545+ projects : string list option; (** Projects to include (None = all) *)
4646+ include_opam : bool; (** Include vendored opam packages *)
4747+ include_git : bool; (** Include vendored git repositories *)
4848+}
4949+5050+val default_config : output_dir:string -> export_config
5151+(** Create default config exporting all projects and dependencies. *)
5252+5353+(** {1 Export Result} *)
5454+5555+type export_result = {
5656+ projects_exported : string list; (** Projects that were exported *)
5757+ opam_packages : string list; (** Opam packages in vendor/ *)
5858+ git_repos : string list; (** Git repos in vendor/ *)
5959+ output_path : string; (** Path to output directory *)
6060+}
6161+6262+(** {1 Export Function} *)
6363+6464+val export :
6565+ proc_mgr:Git.proc_mgr ->
6666+ root:Worktree.root ->
6767+ config:export_config ->
6868+ export_result
6969+(** [export ~proc_mgr ~root ~config] creates a standalone monorepo.
7070+7171+ The function:
7272+ 1. Exports each project from its [project/<name>] branch
7373+ 2. Strips the [vendor/] directory from each project
7474+ 3. Exports all vendored opam packages from [opam/patches/*] branches
7575+ 4. Exports all vendored git repos from [git-repos/patches/*] branches
7676+ 5. Places dependencies in a shared [vendor/] directory
7777+ 6. Generates appropriate dune files for building
7878+7979+ No git history is preserved - only the current state of each branch. *)
···11+(** Opam backend for unpac.
22+33+ Implements vendoring of opam packages using the three-tier branch model:
44+ - opam/upstream/<pkg> - pristine upstream code
55+ - opam/vendor/<pkg> - upstream history rewritten with vendor/opam/<pkg>/ prefix
66+ - opam/patches/<pkg> - local modifications
77+88+ The vendor branch preserves full git history from upstream, with all paths
99+ rewritten to be under vendor/opam/<pkg>/. This allows git blame/log to work
1010+ correctly on vendored files. *)
1111+1212+module Worktree = Unpac.Worktree
1313+module Git = Unpac.Git
1414+module Git_repo_lookup = Unpac.Git_repo_lookup
1515+module Vendor_cache = Unpac.Vendor_cache
1616+module Backend = Unpac.Backend
1717+1818+let name = "opam"
1919+2020+(** {1 Branch Naming} *)
2121+2222+let upstream_branch pkg = "opam/upstream/" ^ pkg
2323+let vendor_branch pkg = "opam/vendor/" ^ pkg
2424+let patches_branch pkg = "opam/patches/" ^ pkg
2525+let vendor_path pkg = "vendor/opam/" ^ pkg
2626+2727+(** {1 Worktree Kinds} *)
2828+2929+let upstream_kind pkg = Worktree.Opam_upstream pkg
3030+let vendor_kind pkg = Worktree.Opam_vendor pkg
3131+let patches_kind pkg = Worktree.Opam_patches pkg
3232+3333+(** {1 Package Operations} *)
3434+3535+let copy_with_prefix ~src_dir ~dst_dir ~prefix =
3636+ (* Recursively copy files from src_dir to dst_dir/prefix/ *)
3737+ let prefix_dir = Eio.Path.(dst_dir / prefix) in
3838+ Eio.Path.mkdirs ~exists_ok:true ~perm:0o755 prefix_dir;
3939+4040+ let rec copy_dir src dst =
4141+ Eio.Path.read_dir src |> List.iter (fun name ->
4242+ let src_path = Eio.Path.(src / name) in
4343+ let dst_path = Eio.Path.(dst / name) in
4444+ if Eio.Path.is_directory src_path then begin
4545+ Eio.Path.mkdirs ~exists_ok:true ~perm:0o755 dst_path;
4646+ copy_dir src_path dst_path
4747+ end else begin
4848+ let content = Eio.Path.load src_path in
4949+ Eio.Path.save ~create:(`Or_truncate 0o644) dst_path content
5050+ end
5151+ )
5252+ in
5353+5454+ (* Copy everything except .git *)
5555+ Eio.Path.read_dir src_dir |> List.iter (fun name ->
5656+ if name <> ".git" then begin
5757+ let src_path = Eio.Path.(src_dir / name) in
5858+ let dst_path = Eio.Path.(prefix_dir / name) in
5959+ if Eio.Path.is_directory src_path then begin
6060+ Eio.Path.mkdirs ~exists_ok:true ~perm:0o755 dst_path;
6161+ copy_dir src_path dst_path
6262+ end else begin
6363+ let content = Eio.Path.load src_path in
6464+ Eio.Path.save ~create:(`Or_truncate 0o644) dst_path content
6565+ end
6666+ end
6767+ )
6868+6969+let add_package ~proc_mgr ~root ?cache (info : Backend.package_info) =
7070+ let pkg = info.name in
7171+ let git = Worktree.git_dir root in
7272+7373+ try
7474+ (* Check if already exists *)
7575+ if Worktree.branch_exists ~proc_mgr root (patches_kind pkg) then
7676+ Backend.Already_exists pkg
7777+ else begin
7878+ (* Rewrite URL for known mirrors *)
7979+ let url = Git_repo_lookup.rewrite_url info.url in
8080+8181+ (* Determine the ref to use: explicit > override > default *)
8282+ let branch = match info.branch with
8383+ | Some b -> b
8484+ | None ->
8585+ match Git_repo_lookup.branch_override ~name:pkg ~url with
8686+ | Some b -> b
8787+ | None -> Git.ls_remote_default_branch ~proc_mgr ~cwd:git ~url
8888+ in
8989+9090+ (* Fetch - either via cache or directly *)
9191+ let ref_point = match cache with
9292+ | Some cache_path ->
9393+ (* Fetch through vendor cache *)
9494+ Vendor_cache.fetch_to_project ~proc_mgr
9595+ ~cache:cache_path ~project_git:git ~url ~branch
9696+ | None ->
9797+ (* Direct fetch (with tags to support version tags like 3.20.2) *)
9898+ let remote = "origin-" ^ pkg in
9999+ ignore (Git.ensure_remote ~proc_mgr ~cwd:git ~name:remote ~url);
100100+ Git.fetch_with_tags ~proc_mgr ~cwd:git ~remote;
101101+ Git.resolve_branch_or_tag ~proc_mgr ~cwd:git ~remote ~ref_name:branch
102102+ in
103103+104104+ (* Step 1: Create upstream branch from fetched ref *)
105105+ Git.branch_force ~proc_mgr ~cwd:git
106106+ ~name:(upstream_branch pkg) ~point:ref_point;
107107+108108+ (* Step 2: Create vendor branch from upstream and rewrite history *)
109109+ Git.branch_force ~proc_mgr ~cwd:git
110110+ ~name:(vendor_branch pkg) ~point:(upstream_branch pkg);
111111+112112+ (* Rewrite vendor branch history to move all files into vendor/opam/<pkg>/ *)
113113+ Git.filter_repo_to_subdirectory ~proc_mgr ~cwd:git
114114+ ~branch:(vendor_branch pkg)
115115+ ~subdirectory:(vendor_path pkg);
116116+117117+ (* Get the vendor SHA after rewriting *)
118118+ let vendor_sha = match Git.rev_parse ~proc_mgr ~cwd:git (vendor_branch pkg) with
119119+ | Some sha -> sha
120120+ | None -> failwith "Vendor branch not found after filter-repo"
121121+ in
122122+123123+ (* Step 3: Create patches branch from vendor *)
124124+ Git.branch_create ~proc_mgr ~cwd:git
125125+ ~name:(patches_branch pkg)
126126+ ~start_point:(vendor_branch pkg);
127127+128128+ Backend.Added { name = pkg; sha = vendor_sha }
129129+ end
130130+ with exn ->
131131+ (* Cleanup on failure *)
132132+ (try Worktree.remove_force ~proc_mgr root (upstream_kind pkg) with _ -> ());
133133+ (try Worktree.remove_force ~proc_mgr root (vendor_kind pkg) with _ -> ());
134134+ Backend.Failed { name = pkg; error = Printexc.to_string exn }
135135+136136+let update_package ~proc_mgr ~root ?cache pkg =
137137+ let git = Worktree.git_dir root in
138138+139139+ try
140140+ (* Check if package exists *)
141141+ if not (Worktree.branch_exists ~proc_mgr root (patches_kind pkg)) then
142142+ Backend.Update_failed { name = pkg; error = "Package not vendored" }
143143+ else begin
144144+ (* Get remote URL - check origin-<pkg> first, then upstream-<pkg> for promoted packages *)
145145+ let origin_remote = "origin-" ^ pkg in
146146+ let upstream_remote = "upstream-" ^ pkg in
147147+ let (remote, url) = match Git.remote_url ~proc_mgr ~cwd:git origin_remote with
148148+ | Some u -> (origin_remote, u)
149149+ | None ->
150150+ (* Try upstream remote for promoted/local packages *)
151151+ match Git.remote_url ~proc_mgr ~cwd:git upstream_remote with
152152+ | Some u -> (upstream_remote, u)
153153+ | None -> failwith (Printf.sprintf "No remote found. Set one with: unpac opam set-upstream %s <url>" pkg)
154154+ in
155155+156156+ (* Fetch latest - either via cache or directly (with tags for completeness) *)
157157+ (match cache with
158158+ | Some cache_path ->
159159+ let branch = Git.ls_remote_default_branch ~proc_mgr ~cwd:git ~url in
160160+ ignore (Vendor_cache.fetch_to_project ~proc_mgr
161161+ ~cache:cache_path ~project_git:git ~url ~branch)
162162+ | None ->
163163+ Git.fetch_with_tags ~proc_mgr ~cwd:git ~remote);
164164+165165+ (* Get old SHA *)
166166+ let old_sha = match Git.rev_parse ~proc_mgr ~cwd:git (upstream_branch pkg) with
167167+ | Some sha -> sha
168168+ | None -> failwith "Upstream branch not found"
169169+ in
170170+171171+ (* Determine default branch and update upstream *)
172172+ let default_branch = Git.ls_remote_default_branch ~proc_mgr ~cwd:git ~url in
173173+ let ref_point = remote ^ "/" ^ default_branch in
174174+ Git.branch_force ~proc_mgr ~cwd:git
175175+ ~name:(upstream_branch pkg) ~point:ref_point;
176176+177177+ (* Get new SHA *)
178178+ let new_sha = match Git.rev_parse ~proc_mgr ~cwd:git (upstream_branch pkg) with
179179+ | Some sha -> sha
180180+ | None -> failwith "Upstream branch not found"
181181+ in
182182+183183+ if old_sha = new_sha then
184184+ Backend.No_changes pkg
185185+ else begin
186186+ (* Create worktrees *)
187187+ Worktree.ensure ~proc_mgr root (upstream_kind pkg);
188188+ Worktree.ensure ~proc_mgr root (vendor_kind pkg);
189189+190190+ let upstream_wt = Worktree.path root (upstream_kind pkg) in
191191+ let vendor_wt = Worktree.path root (vendor_kind pkg) in
192192+193193+ (* Clear vendor content and copy new *)
194194+ let vendor_pkg_path = Eio.Path.(vendor_wt / "vendor" / "opam" / pkg) in
195195+ (try Eio.Path.rmtree vendor_pkg_path with _ -> ());
196196+197197+ copy_with_prefix
198198+ ~src_dir:upstream_wt
199199+ ~dst_dir:vendor_wt
200200+ ~prefix:(vendor_path pkg);
201201+202202+ (* Commit *)
203203+ Git.add_all ~proc_mgr ~cwd:vendor_wt;
204204+ Git.commit ~proc_mgr ~cwd:vendor_wt
205205+ ~message:(Printf.sprintf "Update %s to %s" pkg (String.sub new_sha 0 7));
206206+207207+ (* Cleanup *)
208208+ Worktree.remove ~proc_mgr root (upstream_kind pkg);
209209+ Worktree.remove ~proc_mgr root (vendor_kind pkg);
210210+211211+ Backend.Updated { name = pkg; old_sha; new_sha }
212212+ end
213213+ end
214214+ with exn ->
215215+ (try Worktree.remove_force ~proc_mgr root (upstream_kind pkg) with _ -> ());
216216+ (try Worktree.remove_force ~proc_mgr root (vendor_kind pkg) with _ -> ());
217217+ Backend.Update_failed { name = pkg; error = Printexc.to_string exn }
218218+219219+let list_packages ~proc_mgr ~root =
220220+ Worktree.list_opam_packages ~proc_mgr root
+67
lib/opam/opam.mli
···11+(** Opam backend for unpac.
22+33+ Implements vendoring of opam packages using the three-tier branch model:
44+ - opam/upstream/<pkg> - pristine upstream code
55+ - opam/vendor/<pkg> - orphan branch with vendor/opam/<pkg>/ prefix
66+ - opam/patches/<pkg> - local modifications *)
77+88+val name : string
99+(** Backend name: "opam" *)
1010+1111+(** {1 Branch Naming} *)
1212+1313+val upstream_branch : string -> string
1414+(** [upstream_branch pkg] returns "opam/upstream/<pkg>". *)
1515+1616+val vendor_branch : string -> string
1717+(** [vendor_branch pkg] returns "opam/vendor/<pkg>". *)
1818+1919+val patches_branch : string -> string
2020+(** [patches_branch pkg] returns "opam/patches/<pkg>". *)
2121+2222+val vendor_path : string -> string
2323+(** [vendor_path pkg] returns "vendor/opam/<pkg>". *)
2424+2525+(** {1 Worktree Kinds} *)
2626+2727+val upstream_kind : string -> Unpac.Worktree.kind
2828+val vendor_kind : string -> Unpac.Worktree.kind
2929+val patches_kind : string -> Unpac.Worktree.kind
3030+3131+(** {1 Package Operations} *)
3232+3333+val add_package :
3434+ proc_mgr:Unpac.Git.proc_mgr ->
3535+ root:Unpac.Worktree.root ->
3636+ ?cache:Unpac.Vendor_cache.t ->
3737+ Unpac.Backend.package_info ->
3838+ Unpac.Backend.add_result
3939+(** [add_package ~proc_mgr ~root ?cache info] vendors a single package.
4040+4141+ 1. Fetches upstream into opam/upstream/<pkg> (via cache if provided)
4242+ 2. Creates opam/vendor/<pkg> with vendor/opam/<pkg>/ prefix (preserving history)
4343+ 3. Creates opam/patches/<pkg> from vendor
4444+4545+ Uses git-filter-repo for fast history rewriting.
4646+ @param cache Optional vendor cache for shared fetches across projects. *)
4747+4848+val update_package :
4949+ proc_mgr:Unpac.Git.proc_mgr ->
5050+ root:Unpac.Worktree.root ->
5151+ ?cache:Unpac.Vendor_cache.t ->
5252+ string ->
5353+ Unpac.Backend.update_result
5454+(** [update_package ~proc_mgr ~root ?cache name] updates a package from upstream.
5555+5656+ 1. Fetches latest into opam/upstream/<pkg> (via cache if provided)
5757+ 2. Updates opam/vendor/<pkg> with new content
5858+5959+ Does NOT rebase patches - call [Backend.rebase_patches] separately.
6060+6161+ @param cache Optional vendor cache for shared fetches across projects. *)
6262+6363+val list_packages :
6464+ proc_mgr:Unpac.Git.proc_mgr ->
6565+ root:Unpac.Worktree.root ->
6666+ string list
6767+(** [list_packages ~proc_mgr root] returns all vendored opam package names. *)
+107
lib/opam/opam_file.ml
···11+(** Opam file parsing for extracting package metadata. *)
22+33+type metadata = {
44+ name : string;
55+ version : string;
66+ dev_repo : string option;
77+ synopsis : string option;
88+}
99+1010+let empty_metadata = {
1111+ name = "";
1212+ version = "";
1313+ dev_repo = None;
1414+ synopsis = None;
1515+}
1616+1717+(** Parse an opam file and extract metadata. *)
1818+let parse ~name ~version content =
1919+ try
2020+ let opam = OpamParser.FullPos.string content "<opam>" in
2121+ let items = opam.file_contents in
2222+2323+ let dev_repo = ref None in
2424+ let synopsis = ref None in
2525+2626+ List.iter (fun item ->
2727+ match item.OpamParserTypes.FullPos.pelem with
2828+ | OpamParserTypes.FullPos.Variable (name_pos, value_pos) ->
2929+ let var_name = name_pos.OpamParserTypes.FullPos.pelem in
3030+ (match var_name, value_pos.OpamParserTypes.FullPos.pelem with
3131+ | "dev-repo", OpamParserTypes.FullPos.String s ->
3232+ dev_repo := Some s
3333+ | "synopsis", OpamParserTypes.FullPos.String s ->
3434+ synopsis := Some s
3535+ | _ -> ())
3636+ | _ -> ()
3737+ ) items;
3838+3939+ { name; version; dev_repo = !dev_repo; synopsis = !synopsis }
4040+ with _ ->
4141+ { empty_metadata with name; version }
4242+4343+(** Parse an opam file from a path. *)
4444+let parse_file ~name ~version path =
4545+ let content = In_channel.with_open_text path In_channel.input_all in
4646+ parse ~name ~version content
4747+4848+(** Find a package in an opam repository directory.
4949+ Returns the path to the opam file if found. *)
5050+let find_in_repo ~repo_path ~name ?version () =
5151+ let packages_dir = Filename.concat repo_path "packages" in
5252+ let pkg_dir = Filename.concat packages_dir name in
5353+5454+ if not (Sys.file_exists pkg_dir && Sys.is_directory pkg_dir) then
5555+ None
5656+ else
5757+ (* List version directories *)
5858+ let entries = Sys.readdir pkg_dir |> Array.to_list in
5959+ let version_dirs = List.filter (fun entry ->
6060+ let full = Filename.concat pkg_dir entry in
6161+ Sys.is_directory full && String.starts_with ~prefix:(name ^ ".") entry
6262+ ) entries in
6363+6464+ match version with
6565+ | Some v ->
6666+ (* Look for specific version *)
6767+ let target = name ^ "." ^ v in
6868+ if List.mem target version_dirs then
6969+ let opam_path = Filename.concat (Filename.concat pkg_dir target) "opam" in
7070+ if Sys.file_exists opam_path then Some (opam_path, v)
7171+ else None
7272+ else None
7373+ | None ->
7474+ (* Find latest version (simple string sort, works for semver) *)
7575+ let sorted = List.sort (fun a b -> String.compare b a) version_dirs in
7676+ match sorted with
7777+ | [] -> None
7878+ | latest :: _ ->
7979+ let v = String.sub latest (String.length name + 1)
8080+ (String.length latest - String.length name - 1) in
8181+ let opam_path = Filename.concat (Filename.concat pkg_dir latest) "opam" in
8282+ if Sys.file_exists opam_path then Some (opam_path, v)
8383+ else None
8484+8585+(** Get metadata for a package from an opam repository. *)
8686+let get_metadata ~repo_path ~name ?version () =
8787+ match find_in_repo ~repo_path ~name ?version () with
8888+ | None -> None
8989+ | Some (opam_path, v) ->
9090+ Some (parse_file ~name ~version:v opam_path)
9191+9292+(** List all versions of a package in a repository. *)
9393+let list_versions ~repo_path ~name =
9494+ let packages_dir = Filename.concat repo_path "packages" in
9595+ let pkg_dir = Filename.concat packages_dir name in
9696+9797+ if not (Sys.file_exists pkg_dir && Sys.is_directory pkg_dir) then
9898+ []
9999+ else
100100+ Sys.readdir pkg_dir
101101+ |> Array.to_list
102102+ |> List.filter_map (fun entry ->
103103+ if String.starts_with ~prefix:(name ^ ".") entry then
104104+ Some (String.sub entry (String.length name + 1)
105105+ (String.length entry - String.length name - 1))
106106+ else None)
107107+ |> List.sort String.compare
+24
lib/opam/opam_file.mli
···11+(** Opam file parsing for extracting package metadata. *)
22+33+type metadata = {
44+ name : string;
55+ version : string;
66+ dev_repo : string option;
77+ synopsis : string option;
88+}
99+1010+val parse : name:string -> version:string -> string -> metadata
1111+(** [parse ~name ~version content] parses opam file content. *)
1212+1313+val parse_file : name:string -> version:string -> string -> metadata
1414+(** [parse_file ~name ~version path] parses an opam file from disk. *)
1515+1616+val find_in_repo : repo_path:string -> name:string -> ?version:string -> unit -> (string * string) option
1717+(** [find_in_repo ~repo_path ~name ?version ()] finds a package in an opam repository.
1818+ Returns [Some (opam_file_path, version)] if found. *)
1919+2020+val get_metadata : repo_path:string -> name:string -> ?version:string -> unit -> metadata option
2121+(** [get_metadata ~repo_path ~name ?version ()] gets package metadata from a repository. *)
2222+2323+val list_versions : repo_path:string -> name:string -> string list
2424+(** [list_versions ~repo_path ~name] lists all available versions of a package. *)
+71
lib/opam/repo.ml
···11+(** Opam repository operations. *)
22+33+type repo = {
44+ name : string;
55+ path : string;
66+}
77+88+type search_result = {
99+ repo : repo;
1010+ metadata : Opam_file.metadata;
1111+}
1212+1313+(** Resolve repository path from config. *)
1414+let resolve_repo (cfg : Unpac.Config.repo_config) : repo option =
1515+ match cfg.source with
1616+ | Unpac.Config.Local path ->
1717+ if Sys.file_exists path && Sys.is_directory path then
1818+ Some { name = cfg.repo_name; path }
1919+ else None
2020+ | Unpac.Config.Remote _url ->
2121+ (* Remote repos not yet supported *)
2222+ None
2323+2424+(** Search for a package in configured repositories. *)
2525+let find_package ~repos ~name ?version () : search_result option =
2626+ let rec search = function
2727+ | [] -> None
2828+ | cfg :: rest ->
2929+ match resolve_repo cfg with
3030+ | None -> search rest
3131+ | Some repo ->
3232+ match Opam_file.get_metadata ~repo_path:repo.path ~name ?version () with
3333+ | None -> search rest
3434+ | Some metadata -> Some { repo; metadata }
3535+ in
3636+ search repos
3737+3838+(** List all versions of a package across repositories. *)
3939+let list_versions ~repos ~name : (repo * string list) list =
4040+ List.filter_map (fun cfg ->
4141+ match resolve_repo cfg with
4242+ | None -> None
4343+ | Some repo ->
4444+ let versions = Opam_file.list_versions ~repo_path:repo.path ~name in
4545+ if versions = [] then None
4646+ else Some (repo, versions)
4747+ ) repos
4848+4949+(** Search for packages matching a pattern. *)
5050+let search_packages ~repos ~pattern : (repo * string) list =
5151+ List.concat_map (fun cfg ->
5252+ match resolve_repo cfg with
5353+ | None -> []
5454+ | Some repo ->
5555+ let packages_dir = Filename.concat repo.path "packages" in
5656+ if not (Sys.file_exists packages_dir) then []
5757+ else
5858+ Sys.readdir packages_dir
5959+ |> Array.to_list
6060+ |> List.filter (fun name ->
6161+ (* Simple substring match *)
6262+ let pattern_lower = String.lowercase_ascii pattern in
6363+ let name_lower = String.lowercase_ascii name in
6464+ String.length pattern_lower <= String.length name_lower &&
6565+ (let rec check i =
6666+ if i > String.length name_lower - String.length pattern_lower then false
6767+ else if String.sub name_lower i (String.length pattern_lower) = pattern_lower then true
6868+ else check (i + 1)
6969+ in check 0))
7070+ |> List.map (fun name -> (repo, name))
7171+ ) repos
+32
lib/opam/repo.mli
···11+(** Opam repository operations. *)
22+33+type repo = {
44+ name : string;
55+ path : string;
66+}
77+88+type search_result = {
99+ repo : repo;
1010+ metadata : Opam_file.metadata;
1111+}
1212+1313+val find_package :
1414+ repos:Unpac.Config.repo_config list ->
1515+ name:string ->
1616+ ?version:string ->
1717+ unit ->
1818+ search_result option
1919+(** [find_package ~repos ~name ?version ()] searches for a package in repositories.
2020+ Returns the first match found. *)
2121+2222+val list_versions :
2323+ repos:Unpac.Config.repo_config list ->
2424+ name:string ->
2525+ (repo * string list) list
2626+(** [list_versions ~repos ~name] lists all versions across repositories. *)
2727+2828+val search_packages :
2929+ repos:Unpac.Config.repo_config list ->
3030+ pattern:string ->
3131+ (repo * string) list
3232+(** [search_packages ~repos ~pattern] searches for packages matching a pattern. *)
+169
lib/opam/solver.ml
···11+(** Dependency solver using 0install algorithm. *)
22+33+let ( / ) = Filename.concat
44+55+(** List directory entries, returns empty list if directory doesn't exist. *)
66+let list_dir path =
77+ try Sys.readdir path |> Array.to_list
88+ with Sys_error _ -> []
99+1010+(** Known compiler packages to filter out. *)
1111+let is_compiler_package name =
1212+ let s = OpamPackage.Name.to_string name in
1313+ String.starts_with ~prefix:"ocaml-base-compiler" s ||
1414+ String.starts_with ~prefix:"ocaml-variants" s ||
1515+ String.starts_with ~prefix:"ocaml-system" s ||
1616+ String.starts_with ~prefix:"ocaml-config" s ||
1717+ s = "ocaml" ||
1818+ s = "base-unix" ||
1919+ s = "base-threads" ||
2020+ s = "base-bigarray" ||
2121+ s = "base-domains" ||
2222+ s = "base-nnp"
2323+2424+(** Check if a package has the compiler flag. *)
2525+let has_compiler_flag opam =
2626+ let flags = OpamFile.OPAM.flags opam in
2727+ List.mem OpamTypes.Pkgflag_Compiler flags
2828+2929+(** Multi-repo context that searches multiple opam repository directories. *)
3030+module Multi_context : sig
3131+ include Opam_0install.S.CONTEXT
3232+3333+ val create :
3434+ ?constraints:OpamFormula.version_constraint OpamTypes.name_map ->
3535+ repos:string list ->
3636+ ocaml_version:string ->
3737+ unit -> t
3838+end = struct
3939+ type rejection =
4040+ | UserConstraint of OpamFormula.atom
4141+ | Unavailable
4242+ | CompilerPackage
4343+4444+ let pp_rejection f = function
4545+ | UserConstraint x -> Fmt.pf f "Rejected by user-specified constraint %s" (OpamFormula.string_of_atom x)
4646+ | Unavailable -> Fmt.pf f "Availability condition not satisfied"
4747+ | CompilerPackage -> Fmt.pf f "Compiler package (filtered out)"
4848+4949+ type t = {
5050+ repos : string list; (* List of packages/ directories *)
5151+ constraints : OpamFormula.version_constraint OpamTypes.name_map;
5252+ ocaml_version : string;
5353+ }
5454+5555+ let env t _pkg v =
5656+ match OpamVariable.Full.to_string v with
5757+ | "arch" -> Some (OpamTypes.S "x86_64")
5858+ | "os" -> Some (OpamTypes.S "linux")
5959+ | "os-distribution" -> Some (OpamTypes.S "debian")
6060+ | "os-version" -> Some (OpamTypes.S "12")
6161+ | "os-family" -> Some (OpamTypes.S "debian")
6262+ | "opam-version" -> Some (OpamTypes.S "2.2.0")
6363+ | "sys-ocaml-version" -> Some (OpamTypes.S t.ocaml_version)
6464+ | "ocaml:native" -> Some (OpamTypes.B true)
6565+ | _ -> None
6666+6767+ let filter_deps t pkg f =
6868+ f
6969+ |> OpamFilter.partial_filter_formula (env t pkg)
7070+ |> OpamFilter.filter_deps ~build:true ~post:true ~test:false ~doc:false ~dev:false ~dev_setup:false ~default:false
7171+7272+ let user_restrictions t name =
7373+ OpamPackage.Name.Map.find_opt name t.constraints
7474+7575+ (** Load opam file from path. *)
7676+ let load_opam path =
7777+ try Some (OpamFile.OPAM.read (OpamFile.make (OpamFilename.raw path)))
7878+ with _ -> None
7979+8080+ (** Create a minimal virtual opam file for base packages. *)
8181+ let virtual_opam () =
8282+ OpamFile.OPAM.empty
8383+8484+ (** Find all versions of a package across all repos. *)
8585+ let find_versions t name =
8686+ let name_str = OpamPackage.Name.to_string name in
8787+ (* Collect versions from all repos, first repo wins for duplicates *)
8888+ let seen = Hashtbl.create 16 in
8989+ List.iter (fun packages_dir ->
9090+ let pkg_dir = packages_dir / name_str in
9191+ list_dir pkg_dir |> List.iter (fun entry ->
9292+ match OpamPackage.of_string_opt entry with
9393+ | Some pkg when OpamPackage.name pkg = name ->
9494+ let v = OpamPackage.version pkg in
9595+ if not (Hashtbl.mem seen v) then begin
9696+ let opam_path = pkg_dir / entry / "opam" in
9797+ Hashtbl.add seen v opam_path
9898+ end
9999+ | _ -> ()
100100+ )
101101+ ) t.repos;
102102+ Hashtbl.fold (fun v path acc -> (v, path) :: acc) seen []
103103+104104+ let candidates t name =
105105+ let name_str = OpamPackage.Name.to_string name in
106106+ (* Provide virtual packages for compiler/base packages at the configured version *)
107107+ if name_str = "ocaml" then
108108+ let v = OpamPackage.Version.of_string t.ocaml_version in
109109+ [v, Ok (virtual_opam ())]
110110+ else if name_str = "base-unix" || name_str = "base-threads" ||
111111+ name_str = "base-bigarray" || name_str = "base-domains" ||
112112+ name_str = "base-nnp" then
113113+ let v = OpamPackage.Version.of_string "base" in
114114+ [v, Ok (virtual_opam ())]
115115+ else if is_compiler_package name then
116116+ (* Other compiler packages - not available *)
117117+ []
118118+ else
119119+ let user_constraints = user_restrictions t name in
120120+ find_versions t name
121121+ |> List.sort (fun (v1, _) (v2, _) -> OpamPackage.Version.compare v2 v1) (* Prefer newest *)
122122+ |> List.map (fun (v, opam_path) ->
123123+ match user_constraints with
124124+ | Some test when not (OpamFormula.check_version_formula (OpamFormula.Atom test) v) ->
125125+ v, Error (UserConstraint (name, Some test))
126126+ | _ ->
127127+ match load_opam opam_path with
128128+ | None -> v, Error Unavailable
129129+ | Some opam ->
130130+ (* Check flags:compiler *)
131131+ if has_compiler_flag opam then
132132+ v, Error CompilerPackage
133133+ else
134134+ (* Check available filter *)
135135+ let pkg = OpamPackage.create name v in
136136+ let available = OpamFile.OPAM.available opam in
137137+ match OpamFilter.eval ~default:(OpamTypes.B false) (env t pkg) available with
138138+ | B true -> v, Ok opam
139139+ | _ -> v, Error Unavailable
140140+ )
141141+142142+ let create ?(constraints=OpamPackage.Name.Map.empty) ~repos ~ocaml_version () =
143143+ (* Convert repo roots to packages/ directories *)
144144+ let packages_dirs = List.map (fun r -> r / "packages") repos in
145145+ { repos = packages_dirs; constraints; ocaml_version }
146146+end
147147+148148+module Solver = Opam_0install.Solver.Make(Multi_context)
149149+150150+type solve_result = {
151151+ packages : OpamPackage.t list;
152152+}
153153+154154+type solve_error = string
155155+156156+(** Solve dependencies for a list of package names. *)
157157+let solve ~repos ~ocaml_version ~packages : (solve_result, solve_error) result =
158158+ let context = Multi_context.create ~repos ~ocaml_version () in
159159+ let names = List.map OpamPackage.Name.of_string packages in
160160+ match Solver.solve context names with
161161+ | Ok selections ->
162162+ let pkgs = Solver.packages_of_result selections in
163163+ (* Filter out compiler packages from result *)
164164+ let pkgs = List.filter (fun pkg ->
165165+ not (is_compiler_package (OpamPackage.name pkg))
166166+ ) pkgs in
167167+ Ok { packages = pkgs }
168168+ | Error diagnostics ->
169169+ Error (Solver.diagnostics diagnostics)
+33
lib/opam/solver.mli
···11+(** Dependency solver using 0install algorithm.
22+33+ Solves package dependencies across multiple configured opam repositories,
44+ filtering out compiler packages and respecting availability constraints. *)
55+66+type solve_result = {
77+ packages : OpamPackage.t list;
88+ (** List of packages that need to be installed, including transitive deps. *)
99+}
1010+1111+type solve_error = string
1212+(** Human-readable error message explaining why solving failed. *)
1313+1414+val solve :
1515+ repos:string list ->
1616+ ocaml_version:string ->
1717+ packages:string list ->
1818+ (solve_result, solve_error) result
1919+(** [solve ~repos ~ocaml_version ~packages] solves dependencies for [packages].
2020+2121+ @param repos List of opam repository root directories (containing packages/)
2222+ @param ocaml_version The OCaml compiler version to solve for (e.g. "5.2.0")
2323+ @param packages List of package names to solve for
2424+2525+ Returns the full list of packages (including transitive dependencies) that
2626+ need to be installed, or an error message if solving failed.
2727+2828+ Compiler packages (ocaml-base-compiler, base-*, etc.) are automatically
2929+ filtered out since they are assumed to be pre-installed. *)
3030+3131+val is_compiler_package : OpamPackage.Name.t -> bool
3232+(** [is_compiler_package name] returns true if [name] is a known compiler
3333+ or base package that should be filtered out. *)
+454
lib/promote.ml
···11+(** Project promotion to vendor library.
22+33+ Promotes a locally-developed project to a vendored library by:
44+ 1. Filtering out the vendor/ directory from the project history
55+ 2. Creating vendor branches (upstream/vendor/patches) for the specified backend
66+ 3. Recording the promotion in the audit log
77+88+ This allows the project to be merged into other projects as a dependency. *)
99+1010+let src = Logs.Src.create "unpac.promote" ~doc:"Project promotion"
1111+module Log = (val Logs.src_log src : Logs.LOG)
1212+1313+(** Backend types for promotion *)
1414+type backend =
1515+ | Opam
1616+ | Git
1717+1818+let backend_of_string = function
1919+ | "opam" -> Some Opam
2020+ | "git" -> Some Git
2121+ | _ -> None
2222+2323+let backend_to_string = function
2424+ | Opam -> "opam"
2525+ | Git -> "git"
2626+2727+(** Branch names for a backend *)
2828+let upstream_branch backend name = match backend with
2929+ | Opam -> "opam/upstream/" ^ name
3030+ | Git -> "git/upstream/" ^ name
3131+3232+let vendor_branch backend name = match backend with
3333+ | Opam -> "opam/vendor/" ^ name
3434+ | Git -> "git/vendor/" ^ name
3535+3636+let patches_branch backend name = match backend with
3737+ | Opam -> "opam/patches/" ^ name
3838+ | Git -> "git/patches/" ^ name
3939+4040+let vendor_path backend name = match backend with
4141+ | Opam -> "vendor/opam/" ^ name
4242+ | Git -> "vendor/git/" ^ name
4343+4444+(** Result of promotion *)
4545+type promote_result =
4646+ | Promoted of {
4747+ name : string;
4848+ backend : backend;
4949+ original_commits : int;
5050+ filtered_commits : int;
5151+ }
5252+ | Already_promoted of string
5353+ | Project_not_found of string
5454+ | Failed of { name : string; error : string }
5555+5656+(** Filter a branch to exclude vendor/ directory.
5757+ Uses git-filter-repo to rewrite history. *)
5858+let filter_vendor_directory ~proc_mgr ~cwd ~branch =
5959+ Log.info (fun m -> m "Filtering vendor/ directory from branch %s..." branch);
6060+6161+ (* Use git-filter-repo with path filtering to exclude vendor/ *)
6262+ let fs = fst cwd in
6363+ let git_path = snd cwd in
6464+ let parent_path = Filename.dirname git_path in
6565+6666+ (* Create a unique temporary worktree *)
6767+ let safe_branch = String.map (fun c -> if c = '/' then '-' else c) branch in
6868+ let temp_wt_name = ".filter-vendor-" ^ safe_branch in
6969+ let temp_wt_relpath = "../" ^ temp_wt_name in
7070+ let temp_wt_path = Filename.concat parent_path temp_wt_name in
7171+ let temp_wt : Git.path = (fs, temp_wt_path) in
7272+7373+ (* Remove any existing temp worktree *)
7474+ ignore (Git.run ~proc_mgr ~cwd ["worktree"; "remove"; "-f"; temp_wt_relpath]);
7575+7676+ (* Create worktree for the branch *)
7777+ Git.run_exn ~proc_mgr ~cwd ["worktree"; "add"; temp_wt_relpath; branch] |> ignore;
7878+7979+ (* Count commits before filtering *)
8080+ let commits_before =
8181+ int_of_string (String.trim (Git.run_exn ~proc_mgr ~cwd:temp_wt ["rev-list"; "--count"; "HEAD"]))
8282+ in
8383+8484+ (* Run git-filter-repo to exclude vendor/ *)
8585+ let result = Git.run ~proc_mgr ~cwd:temp_wt [
8686+ "filter-repo";
8787+ "--invert-paths";
8888+ "--path"; "vendor/";
8989+ "--force";
9090+ "--refs"; "HEAD"
9191+ ] in
9292+9393+ match result with
9494+ | Ok _ ->
9595+ (* Count commits after filtering *)
9696+ let commits_after =
9797+ int_of_string (String.trim (Git.run_exn ~proc_mgr ~cwd:temp_wt ["rev-list"; "--count"; "HEAD"]))
9898+ in
9999+ (* Get the new HEAD SHA *)
100100+ let new_sha = Git.run_exn ~proc_mgr ~cwd:temp_wt ["rev-parse"; "HEAD"] |> String.trim in
101101+ (* Cleanup temporary worktree *)
102102+ ignore (Git.run ~proc_mgr ~cwd ["worktree"; "remove"; "-f"; temp_wt_relpath]);
103103+ (* Update the branch in the bare repo *)
104104+ Git.run_exn ~proc_mgr ~cwd ["branch"; "-f"; branch; new_sha] |> ignore;
105105+ Ok (commits_before, commits_after)
106106+ | Error e ->
107107+ (* Cleanup and return error *)
108108+ ignore (Git.run ~proc_mgr ~cwd ["worktree"; "remove"; "-f"; temp_wt_relpath]);
109109+ Error (Fmt.str "%a" Git.pp_error e)
110110+111111+(** Promote a project to a vendored library *)
112112+let promote ~proc_mgr ~root ~project ~backend ~vendor_name =
113113+ let git = Worktree.git_dir root in
114114+ let name = Option.value ~default:project vendor_name in
115115+116116+ (* Check if project exists *)
117117+ if not (Worktree.branch_exists ~proc_mgr root (Worktree.Project project)) then
118118+ Project_not_found project
119119+ else begin
120120+ (* Check if already promoted for this backend *)
121121+ let patches_br = patches_branch backend name in
122122+ if Git.branch_exists ~proc_mgr ~cwd:git patches_br then
123123+ Already_promoted name
124124+ else begin
125125+ try
126126+ Log.info (fun m -> m "Promoting project %s as %s vendor %s..." project (backend_to_string backend) name);
127127+128128+ let project_branch = Worktree.branch (Worktree.Project project) in
129129+130130+ (* Step 1: Create a temporary branch from the project for filtering *)
131131+ let temp_branch = "promote-temp-" ^ name in
132132+ Git.run_exn ~proc_mgr ~cwd:git ["branch"; "-f"; temp_branch; project_branch] |> ignore;
133133+134134+ (* Step 2: Filter out vendor/ directory from the temp branch *)
135135+ let (commits_before, commits_after) =
136136+ match filter_vendor_directory ~proc_mgr ~cwd:git ~branch:temp_branch with
137137+ | Ok counts -> counts
138138+ | Error msg ->
139139+ (* Cleanup temp branch *)
140140+ ignore (Git.run ~proc_mgr ~cwd:git ["branch"; "-D"; temp_branch]);
141141+ failwith msg
142142+ in
143143+144144+ Log.info (fun m -> m "Filtered %d -> %d commits" commits_before commits_after);
145145+146146+ (* Step 3: Create upstream branch (filtered, files at root) *)
147147+ (* For local projects, upstream is the same as filtered temp - no external upstream *)
148148+ Git.run_exn ~proc_mgr ~cwd:git ["branch"; "-f"; upstream_branch backend name; temp_branch] |> ignore;
149149+150150+ (* Step 4: Create vendor branch from upstream and rewrite to vendor path *)
151151+ Git.run_exn ~proc_mgr ~cwd:git ["branch"; "-f"; vendor_branch backend name; upstream_branch backend name] |> ignore;
152152+153153+ (* Rewrite vendor branch to move files into vendor/<backend>/<name>/ *)
154154+ Git.filter_repo_to_subdirectory ~proc_mgr ~cwd:git
155155+ ~branch:(vendor_branch backend name)
156156+ ~subdirectory:(vendor_path backend name);
157157+158158+ (* Step 5: Create patches branch from vendor *)
159159+ Git.run_exn ~proc_mgr ~cwd:git ["branch"; patches_branch backend name; vendor_branch backend name] |> ignore;
160160+161161+ (* Step 6: Cleanup temp branch *)
162162+ ignore (Git.run ~proc_mgr ~cwd:git ["branch"; "-D"; temp_branch]);
163163+164164+ Promoted {
165165+ name;
166166+ backend;
167167+ original_commits = commits_before;
168168+ filtered_commits = commits_after
169169+ }
170170+ with exn ->
171171+ (* Cleanup on failure *)
172172+ let temp_branch = "promote-temp-" ^ name in
173173+ ignore (Git.run ~proc_mgr ~cwd:git ["branch"; "-D"; temp_branch]);
174174+ ignore (Git.run ~proc_mgr ~cwd:git ["branch"; "-D"; upstream_branch backend name]);
175175+ ignore (Git.run ~proc_mgr ~cwd:git ["branch"; "-D"; vendor_branch backend name]);
176176+ Failed { name = project; error = Printexc.to_string exn }
177177+ end
178178+ end
179179+180180+(** {1 Remote Management} *)
181181+182182+(** Remote name for a project *)
183183+let project_remote_name project = "origin-" ^ project
184184+185185+(** Result of set-remote operation *)
186186+type set_remote_result =
187187+ | Remote_set of { project : string; url : string; created : bool }
188188+ | Project_not_found of string
189189+ | Set_remote_failed of { project : string; error : string }
190190+191191+(** Set the remote URL for a project *)
192192+let set_remote ~proc_mgr ~root ~project ~url =
193193+ let git = Worktree.git_dir root in
194194+195195+ (* Check if project exists *)
196196+ if not (Worktree.branch_exists ~proc_mgr root (Worktree.Project project)) then
197197+ Project_not_found project
198198+ else begin
199199+ try
200200+ let remote_name = project_remote_name project in
201201+ Log.info (fun m -> m "Setting remote %s -> %s for project %s" remote_name url project);
202202+203203+ let created = match Git.ensure_remote ~proc_mgr ~cwd:git ~name:remote_name ~url with
204204+ | `Created -> true
205205+ | `Updated | `Existed -> false
206206+ in
207207+208208+ Remote_set { project; url; created }
209209+ with exn ->
210210+ Set_remote_failed { project; error = Printexc.to_string exn }
211211+ end
212212+213213+(** Get the remote URL for a project *)
214214+let get_remote ~proc_mgr ~root ~project =
215215+ let git = Worktree.git_dir root in
216216+ let remote_name = project_remote_name project in
217217+ Git.remote_url ~proc_mgr ~cwd:git remote_name
218218+219219+(** Result of push operation *)
220220+type push_result =
221221+ | Pushed of { project : string; branch : string; remote : string }
222222+ | No_remote of string
223223+ | Project_not_found of string
224224+ | Push_failed of { project : string; error : string }
225225+226226+(** Push a project to its configured remote *)
227227+let push ~proc_mgr ~root ~project =
228228+ let git = Worktree.git_dir root in
229229+230230+ (* Check if project exists *)
231231+ if not (Worktree.branch_exists ~proc_mgr root (Worktree.Project project)) then
232232+ Project_not_found project
233233+ else begin
234234+ let remote_name = project_remote_name project in
235235+ match Git.remote_url ~proc_mgr ~cwd:git remote_name with
236236+ | None -> No_remote project
237237+ | Some _url ->
238238+ try
239239+ let branch = Worktree.branch (Worktree.Project project) in
240240+ Log.info (fun m -> m "Pushing %s to %s..." branch remote_name);
241241+ Git.run_exn ~proc_mgr ~cwd:git ["push"; "-u"; remote_name; branch] |> ignore;
242242+ Pushed { project; branch; remote = remote_name }
243243+ with exn ->
244244+ Push_failed { project; error = Printexc.to_string exn }
245245+ end
246246+247247+(** {1 Project Info} *)
248248+249249+type project_info = {
250250+ name : string;
251251+ origin : [`Local | `Vendored];
252252+ remote : string option;
253253+ promoted_as : (backend * string) option; (* backend, vendor_name *)
254254+}
255255+256256+(** Get information about a project *)
257257+let get_info ~proc_mgr ~root ~project =
258258+ let git = Worktree.git_dir root in
259259+260260+ if not (Worktree.branch_exists ~proc_mgr root (Worktree.Project project)) then
261261+ None
262262+ else begin
263263+ (* Check for remote *)
264264+ let remote = get_remote ~proc_mgr ~root ~project in
265265+266266+ (* Check if promoted - look for opam/patches/<project> or git/patches/<project> *)
267267+ let promoted_as =
268268+ if Git.branch_exists ~proc_mgr ~cwd:git (patches_branch Opam project) then
269269+ Some (Opam, project)
270270+ else if Git.branch_exists ~proc_mgr ~cwd:git (patches_branch Git project) then
271271+ Some (Git, project)
272272+ else
273273+ None
274274+ in
275275+276276+ Some {
277277+ name = project;
278278+ origin = `Local; (* All projects created via `unpac project new` are local *)
279279+ remote;
280280+ promoted_as;
281281+ }
282282+ end
283283+284284+(** {1 Export (Unvendor)} *)
285285+286286+(** Export branch name - where unvendored code goes *)
287287+let export_branch backend name = match backend with
288288+ | Opam -> "opam/export/" ^ name
289289+ | Git -> "git/export/" ^ name
290290+291291+(** Result of export operation *)
292292+type export_result =
293293+ | Exported of {
294294+ name : string;
295295+ backend : backend;
296296+ source_branch : string;
297297+ export_branch : string;
298298+ commits : int;
299299+ }
300300+ | Not_vendored of string
301301+ | Already_exported of string
302302+ | Export_failed of { name : string; error : string }
303303+304304+(** Export a vendored package back to root-level files.
305305+ This is the inverse of vendoring - takes a vendor branch and creates
306306+ an export branch with files moved from vendor/<backend>/<name>/ to root.
307307+308308+ Can export from either vendor/* or patches/* branch. *)
309309+let export ~proc_mgr ~root ~name ~backend ~from_patches =
310310+ let git = Worktree.git_dir root in
311311+312312+ (* Determine source branch *)
313313+ let source_br = if from_patches then patches_branch backend name
314314+ else vendor_branch backend name in
315315+ let export_br = export_branch backend name in
316316+ let subdir = vendor_path backend name in
317317+318318+ (* Check if source branch exists *)
319319+ if not (Git.branch_exists ~proc_mgr ~cwd:git source_br) then
320320+ Not_vendored name
321321+ else if Git.branch_exists ~proc_mgr ~cwd:git export_br then
322322+ Already_exported name
323323+ else begin
324324+ try
325325+ Log.info (fun m -> m "Exporting %s from %s to %s..." name source_br export_br);
326326+327327+ (* Step 1: Create export branch from source *)
328328+ Git.run_exn ~proc_mgr ~cwd:git ["branch"; "-f"; export_br; source_br] |> ignore;
329329+330330+ (* Step 2: Count commits before transformation *)
331331+ let commits =
332332+ int_of_string (String.trim (
333333+ Git.run_exn ~proc_mgr ~cwd:git ["rev-list"; "--count"; export_br]))
334334+ in
335335+336336+ (* Step 3: Rewrite export branch to move files from subdirectory to root *)
337337+ Git.filter_repo_from_subdirectory ~proc_mgr ~cwd:git
338338+ ~branch:export_br
339339+ ~subdirectory:subdir;
340340+341341+ Exported {
342342+ name;
343343+ backend;
344344+ source_branch = source_br;
345345+ export_branch = export_br;
346346+ commits;
347347+ }
348348+ with exn ->
349349+ (* Cleanup on failure *)
350350+ ignore (Git.run ~proc_mgr ~cwd:git ["branch"; "-D"; export_br]);
351351+ Export_failed { name; error = Printexc.to_string exn }
352352+ end
353353+354354+(** Remote name for export (where we push to) *)
355355+let export_remote_name name = "export-" ^ name
356356+357357+(** Remote name for upstream (where we fetch from) *)
358358+let upstream_remote_name name = "upstream-" ^ name
359359+360360+(** Result of export push operation *)
361361+type export_push_result =
362362+ | Export_pushed of {
363363+ name : string;
364364+ backend : backend;
365365+ remote : string;
366366+ branch : string;
367367+ commits : int;
368368+ }
369369+ | Export_not_found of string
370370+ | No_export_remote of string
371371+ | Export_push_failed of { name : string; error : string }
372372+373373+(** Set the remote URL for exporting a package *)
374374+let set_export_remote ~proc_mgr ~root ~name ~url =
375375+ let git = Worktree.git_dir root in
376376+ let remote_name = export_remote_name name in
377377+ Log.info (fun m -> m "Setting export remote %s -> %s" remote_name url);
378378+ Git.ensure_remote ~proc_mgr ~cwd:git ~name:remote_name ~url
379379+380380+(** Get the export remote URL for a package *)
381381+let get_export_remote ~proc_mgr ~root ~name =
382382+ let git = Worktree.git_dir root in
383383+ let remote_name = export_remote_name name in
384384+ Git.remote_url ~proc_mgr ~cwd:git remote_name
385385+386386+(** Set the remote URL for fetching upstream updates.
387387+ This is used for promoted local packages that don't have an opam source URL. *)
388388+let set_upstream_remote ~proc_mgr ~root ~name ~url =
389389+ let git = Worktree.git_dir root in
390390+ let remote_name = upstream_remote_name name in
391391+ Log.info (fun m -> m "Setting upstream remote %s -> %s" remote_name url);
392392+ Git.ensure_remote ~proc_mgr ~cwd:git ~name:remote_name ~url
393393+394394+(** Get the upstream remote URL for a package *)
395395+let get_upstream_remote ~proc_mgr ~root ~name =
396396+ let git = Worktree.git_dir root in
397397+ let remote_name = upstream_remote_name name in
398398+ Git.remote_url ~proc_mgr ~cwd:git remote_name
399399+400400+(** Push an exported branch to its remote *)
401401+let push_export ~proc_mgr ~root ~name ~backend =
402402+ let git = Worktree.git_dir root in
403403+ let export_br = export_branch backend name in
404404+ let remote_name = export_remote_name name in
405405+406406+ (* Check if export branch exists *)
407407+ if not (Git.branch_exists ~proc_mgr ~cwd:git export_br) then
408408+ Export_not_found name
409409+ else begin
410410+ match Git.remote_url ~proc_mgr ~cwd:git remote_name with
411411+ | None -> No_export_remote name
412412+ | Some _url ->
413413+ try
414414+ (* Count commits *)
415415+ let commits =
416416+ int_of_string (String.trim (
417417+ Git.run_exn ~proc_mgr ~cwd:git ["rev-list"; "--count"; export_br]))
418418+ in
419419+420420+ Log.info (fun m -> m "Pushing %s to %s..." export_br remote_name);
421421+ (* Push the export branch - push to main/master on the remote *)
422422+ Git.run_exn ~proc_mgr ~cwd:git [
423423+ "push"; "-u"; remote_name;
424424+ export_br ^ ":main" (* Push export branch as 'main' on remote *)
425425+ ] |> ignore;
426426+427427+ Export_pushed {
428428+ name;
429429+ backend;
430430+ remote = remote_name;
431431+ branch = export_br;
432432+ commits;
433433+ }
434434+ with exn ->
435435+ Export_push_failed { name; error = Printexc.to_string exn }
436436+ end
437437+438438+(** List all exported packages *)
439439+let list_exports ~proc_mgr ~root =
440440+ let git = Worktree.git_dir root in
441441+ let branches = Git.run_lines ~proc_mgr ~cwd:git ["branch"; "--list"; "*/export/*"] in
442442+ List.filter_map (fun line ->
443443+ let branch = String.trim line in
444444+ let branch = if String.length branch > 0 && branch.[0] = '*' then
445445+ String.trim (String.sub branch 1 (String.length branch - 1))
446446+ else branch in
447447+ (* Parse backend/export/name *)
448448+ match String.split_on_char '/' branch with
449449+ | [backend_str; "export"; name] ->
450450+ (match backend_of_string backend_str with
451451+ | Some backend -> Some (backend, name)
452452+ | None -> None)
453453+ | _ -> None
454454+ ) branches
+292
lib/promote.mli
···11+(** Project promotion to vendor library.
22+33+ Promotes a locally-developed project to a vendored library by:
44+ 1. Filtering out the vendor/ directory from the project history
55+ 2. Creating vendor branches (upstream/vendor/patches) for the specified backend
66+ 3. Recording the promotion in the audit log
77+88+ This allows the project to be merged into other projects as a dependency. *)
99+1010+(** {1 Backend Types} *)
1111+1212+(** Vendor backend for the promoted library *)
1313+type backend =
1414+ | Opam (** OCaml package - creates opam/* branches, vendor/opam/<name>/ path *)
1515+ | Git (** Git repository - creates git/* branches, vendor/git/<name>/ path *)
1616+1717+val backend_of_string : string -> backend option
1818+(** Parse backend from string: "opam" or "git" *)
1919+2020+val backend_to_string : backend -> string
2121+(** Convert backend to string *)
2222+2323+(** {1 Branch Names} *)
2424+2525+val upstream_branch : backend -> string -> string
2626+(** [upstream_branch backend name] returns the upstream branch name,
2727+ e.g., "opam/upstream/brotli" or "git/upstream/brotli" *)
2828+2929+val vendor_branch : backend -> string -> string
3030+(** [vendor_branch backend name] returns the vendor branch name *)
3131+3232+val patches_branch : backend -> string -> string
3333+(** [patches_branch backend name] returns the patches branch name *)
3434+3535+val vendor_path : backend -> string -> string
3636+(** [vendor_path backend name] returns the vendor directory path,
3737+ e.g., "vendor/opam/brotli" or "vendor/git/brotli" *)
3838+3939+(** {1 Promotion} *)
4040+4141+(** Result of a promote operation *)
4242+type promote_result =
4343+ | Promoted of {
4444+ name : string; (** Vendor library name *)
4545+ backend : backend; (** Backend used *)
4646+ original_commits : int; (** Commits in project before filtering *)
4747+ filtered_commits : int; (** Commits after removing vendor/ *)
4848+ }
4949+ | Already_promoted of string
5050+ (** Library already exists with this name *)
5151+ | Project_not_found of string
5252+ (** Source project does not exist *)
5353+ | Failed of { name : string; error : string }
5454+ (** Promotion failed *)
5555+5656+val promote :
5757+ proc_mgr:Git.proc_mgr ->
5858+ root:Worktree.root ->
5959+ project:string ->
6060+ backend:backend ->
6161+ vendor_name:string option ->
6262+ promote_result
6363+(** [promote ~proc_mgr ~root ~project ~backend ~vendor_name] promotes
6464+ a local project to a vendored library.
6565+6666+ The operation:
6767+ 1. Checks that the project exists and hasn't been promoted yet
6868+ 2. Creates a filtered copy of project history (excluding vendor/)
6969+ 3. Creates upstream/vendor/patches branches for the backend
7070+ 4. The original project branch is preserved unchanged
7171+7272+ @param project Name of the project to promote (e.g., "brotli")
7373+ @param backend Backend type (Opam or Git)
7474+ @param vendor_name Optional override for the vendor library name
7575+7676+ After promotion, the library can be merged into other projects using:
7777+ - [unpac opam merge <name> <project>] for Opam backend
7878+ - [unpac git merge <name> <project>] for Git backend *)
7979+8080+(** {1 Remote Management} *)
8181+8282+val project_remote_name : string -> string
8383+(** [project_remote_name project] returns the git remote name for a project,
8484+ e.g., "origin-brotli" *)
8585+8686+(** Result of set-remote operation *)
8787+type set_remote_result =
8888+ | Remote_set of { project : string; url : string; created : bool }
8989+ | Project_not_found of string
9090+ | Set_remote_failed of { project : string; error : string }
9191+9292+val set_remote :
9393+ proc_mgr:Git.proc_mgr ->
9494+ root:Worktree.root ->
9595+ project:string ->
9696+ url:string ->
9797+ set_remote_result
9898+(** [set_remote ~proc_mgr ~root ~project ~url] sets the remote URL for a project.
9999+100100+ Creates or updates a git remote named "origin-<project>" pointing to the URL.
101101+ This allows the project to be pushed independently using [push]. *)
102102+103103+val get_remote :
104104+ proc_mgr:Git.proc_mgr ->
105105+ root:Worktree.root ->
106106+ project:string ->
107107+ string option
108108+(** [get_remote ~proc_mgr ~root ~project] returns the remote URL for a project,
109109+ or None if no remote is configured. *)
110110+111111+(** Result of push operation *)
112112+type push_result =
113113+ | Pushed of { project : string; branch : string; remote : string }
114114+ | No_remote of string
115115+ | Project_not_found of string
116116+ | Push_failed of { project : string; error : string }
117117+118118+val push :
119119+ proc_mgr:Git.proc_mgr ->
120120+ root:Worktree.root ->
121121+ project:string ->
122122+ push_result
123123+(** [push ~proc_mgr ~root ~project] pushes a project to its configured remote.
124124+125125+ Pushes the project/<name> branch to the remote configured via [set_remote].
126126+ Returns [No_remote] if no remote has been configured. *)
127127+128128+(** {1 Project Info} *)
129129+130130+type project_info = {
131131+ name : string;
132132+ origin : [`Local | `Vendored];
133133+ remote : string option;
134134+ promoted_as : (backend * string) option; (** backend, vendor_name *)
135135+}
136136+137137+val get_info :
138138+ proc_mgr:Git.proc_mgr ->
139139+ root:Worktree.root ->
140140+ project:string ->
141141+ project_info option
142142+(** [get_info ~proc_mgr ~root ~project] returns information about a project,
143143+ or None if the project doesn't exist. *)
144144+145145+(** {1 Export (Unvendor)}
146146+147147+ Export reverses the vendoring process, creating a branch with files
148148+ at the repository root suitable for pushing to an external git repo.
149149+150150+ This is the inverse of vendoring:
151151+ - Vendoring: files at root → files in vendor/<backend>/<name>/
152152+ - Exporting: files in vendor/<backend>/<name>/ → files at root *)
153153+154154+val export_branch : backend -> string -> string
155155+(** [export_branch backend name] returns the export branch name,
156156+ e.g., "opam/export/brotli" or "git/export/brotli" *)
157157+158158+(** Result of export operation *)
159159+type export_result =
160160+ | Exported of {
161161+ name : string; (** Package name *)
162162+ backend : backend; (** Backend used *)
163163+ source_branch : string; (** Branch exported from (vendor or patches) *)
164164+ export_branch : string; (** Created export branch *)
165165+ commits : int; (** Number of commits in export *)
166166+ }
167167+ | Not_vendored of string
168168+ (** No vendor branch exists for this package *)
169169+ | Already_exported of string
170170+ (** Export branch already exists *)
171171+ | Export_failed of { name : string; error : string }
172172+ (** Export operation failed *)
173173+174174+val export :
175175+ proc_mgr:Git.proc_mgr ->
176176+ root:Worktree.root ->
177177+ name:string ->
178178+ backend:backend ->
179179+ from_patches:bool ->
180180+ export_result
181181+(** [export ~proc_mgr ~root ~name ~backend ~from_patches] exports a vendored
182182+ package back to root-level files.
183183+184184+ Creates an export branch where files are moved from [vendor/<backend>/<name>/]
185185+ to the repository root. This branch can then be pushed to an upstream repo.
186186+187187+ @param name The vendored package name
188188+ @param backend The backend (Opam or Git)
189189+ @param from_patches If true, exports from patches/* branch (includes local mods);
190190+ if false, exports from vendor/* branch (pristine upstream)
191191+192192+ The export branch is named [<backend>/export/<name>], e.g., "git/export/brotli".
193193+194194+ Example workflow:
195195+ {[
196196+ (* Export with local patches *)
197197+ export ~from_patches:true ...
198198+199199+ (* Set remote and push *)
200200+ set_export_remote ~url:"git@github.com:me/brotli.git" ...
201201+ push_export ...
202202+ ]} *)
203203+204204+val export_remote_name : string -> string
205205+(** [export_remote_name name] returns the git remote name for exports,
206206+ e.g., "export-brotli" *)
207207+208208+val set_export_remote :
209209+ proc_mgr:Git.proc_mgr ->
210210+ root:Worktree.root ->
211211+ name:string ->
212212+ url:string ->
213213+ [ `Created | `Existed | `Updated ]
214214+(** [set_export_remote ~proc_mgr ~root ~name ~url] sets the remote URL
215215+ for pushing exports of a package. Creates remote "export-<name>". *)
216216+217217+val get_export_remote :
218218+ proc_mgr:Git.proc_mgr ->
219219+ root:Worktree.root ->
220220+ name:string ->
221221+ string option
222222+(** [get_export_remote ~proc_mgr ~root ~name] returns the export remote URL,
223223+ or None if no export remote is configured. *)
224224+225225+(** {2 Upstream Remote}
226226+227227+ The upstream remote is where we fetch updates from. For packages added
228228+ via [opam add], the upstream is automatically configured. For promoted
229229+ local projects, use [set_upstream_remote] to configure where updates
230230+ should be fetched from. *)
231231+232232+val upstream_remote_name : string -> string
233233+(** [upstream_remote_name name] returns the git remote name for upstream,
234234+ e.g., "upstream-brotli" *)
235235+236236+val set_upstream_remote :
237237+ proc_mgr:Git.proc_mgr ->
238238+ root:Worktree.root ->
239239+ name:string ->
240240+ url:string ->
241241+ [ `Created | `Existed | `Updated ]
242242+(** [set_upstream_remote ~proc_mgr ~root ~name ~url] sets the remote URL
243243+ for fetching upstream updates. Creates remote "upstream-<name>".
244244+245245+ This is used by [opam update] to fetch new changes. For promoted local
246246+ projects, this typically points to the same repo as the export remote. *)
247247+248248+val get_upstream_remote :
249249+ proc_mgr:Git.proc_mgr ->
250250+ root:Worktree.root ->
251251+ name:string ->
252252+ string option
253253+(** [get_upstream_remote ~proc_mgr ~root ~name] returns the upstream remote URL,
254254+ or None if no upstream remote is configured. *)
255255+256256+(** Result of export push operation *)
257257+type export_push_result =
258258+ | Export_pushed of {
259259+ name : string;
260260+ backend : backend;
261261+ remote : string;
262262+ branch : string;
263263+ commits : int;
264264+ }
265265+ | Export_not_found of string
266266+ (** No export branch exists for this package *)
267267+ | No_export_remote of string
268268+ (** No export remote configured *)
269269+ | Export_push_failed of { name : string; error : string }
270270+ (** Push operation failed *)
271271+272272+val push_export :
273273+ proc_mgr:Git.proc_mgr ->
274274+ root:Worktree.root ->
275275+ name:string ->
276276+ backend:backend ->
277277+ export_push_result
278278+(** [push_export ~proc_mgr ~root ~name ~backend] pushes an export branch
279279+ to its configured remote.
280280+281281+ Pushes the [<backend>/export/<name>] branch to the remote configured
282282+ via [set_export_remote], targeting the 'main' branch on the remote.
283283+284284+ Returns [Export_not_found] if the package hasn't been exported yet.
285285+ Returns [No_export_remote] if no remote has been configured. *)
286286+287287+val list_exports :
288288+ proc_mgr:Git.proc_mgr ->
289289+ root:Worktree.root ->
290290+ (backend * string) list
291291+(** [list_exports ~proc_mgr ~root] returns all exported packages as
292292+ (backend, name) pairs. *)