The unpac monorepo manager self-hosting as a monorepo using unpac
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

Add unpac source code

+15758
+1
.ocamlformat
··· 1 + version=0.28.1
+5
bin/dune
··· 1 + (executable 2 + (name main) 3 + (public_name unpac) 4 + (package unpac) 5 + (libraries unpac unpac_opam cmdliner eio_main logs logs.fmt fmt.tty jsont jsont.bytesrw str))
+3113
bin/main.ml
··· 1 + open Cmdliner 2 + 3 + let src = Logs.Src.create "unpac.main" ~doc:"Main CLI operations" 4 + module Log = (val Logs.src_log src : Logs.LOG) 5 + 6 + (* Logging setup *) 7 + let setup_logging ?(verbose=false) () = 8 + Fmt_tty.setup_std_outputs (); 9 + let level = if verbose then Logs.Debug else Logs.Info in 10 + Logs.set_level (Some level); 11 + Logs.set_reporter (Logs_fmt.reporter ()) 12 + 13 + let logging_term = 14 + Term.(const (setup_logging ~verbose:false) $ const ()) 15 + 16 + (* Helper to find project root *) 17 + let with_root f = 18 + Eio_main.run @@ fun env -> 19 + let fs = Eio.Stdenv.fs env in 20 + let proc_mgr = Eio.Stdenv.process_mgr env in 21 + let cwd = Sys.getcwd () in 22 + match Unpac.Init.find_root ~fs ~cwd with 23 + | None -> 24 + Format.eprintf "Error: Not in an unpac project.@."; 25 + exit 1 26 + | Some root -> 27 + f ~env ~fs ~proc_mgr ~root 28 + 29 + (* Helper to wrap operations with audit logging *) 30 + let with_audit ~proc_mgr ~root ~operation_type ~args f = 31 + let main_wt = Unpac.Worktree.path root Unpac.Worktree.Main in 32 + let mgr = Unpac.Audit.create_manager ~proc_mgr ~main_wt in 33 + let ctx = Unpac.Audit.begin_operation mgr ~operation_type ~args in 34 + try 35 + let result = f ctx in 36 + ignore (Unpac.Audit.end_success mgr); 37 + result 38 + with exn -> 39 + ignore (Unpac.Audit.end_failed mgr ~error:(Printexc.to_string exn)); 40 + raise exn 41 + 42 + (* Helper to get config path *) 43 + let config_path root = 44 + let main_path = Unpac.Worktree.path root Unpac.Worktree.Main in 45 + Eio.Path.(main_path / "unpac.toml") |> snd 46 + 47 + (* Helper to load config *) 48 + let load_config root = 49 + let path = config_path root in 50 + match Unpac.Config.load path with 51 + | Ok cfg -> cfg 52 + | Error _ -> Unpac.Config.empty 53 + 54 + (* Helper to save config and commit *) 55 + let save_config ~proc_mgr root config msg = 56 + let path = config_path root in 57 + Unpac.Config.save_exn path config; 58 + let main_wt = Unpac.Worktree.path root Unpac.Worktree.Main in 59 + Unpac.Git.run_exn ~proc_mgr ~cwd:main_wt ["add"; "unpac.toml"] |> ignore; 60 + Unpac.Git.run_exn ~proc_mgr ~cwd:main_wt ["commit"; "-m"; msg] |> ignore 61 + 62 + (* Check if string looks like a URL or path (vs a package name) *) 63 + let is_url_or_path s = 64 + String.starts_with ~prefix:"http://" s || 65 + String.starts_with ~prefix:"https://" s || 66 + String.starts_with ~prefix:"git@" s || 67 + String.starts_with ~prefix:"git://" s || 68 + String.starts_with ~prefix:"ssh://" s || 69 + String.starts_with ~prefix:"file://" s || 70 + String.starts_with ~prefix:"/" s || (* Absolute path *) 71 + String.starts_with ~prefix:"./" s || (* Relative path *) 72 + String.starts_with ~prefix:"../" s || (* Relative path *) 73 + String.contains s ':' (* URL with scheme *) 74 + 75 + (* Normalize a dev-repo URL for grouping comparison *) 76 + let normalize_dev_repo url = 77 + let s = url in 78 + (* Strip git+ prefix *) 79 + let s = if String.starts_with ~prefix:"git+" s then 80 + String.sub s 4 (String.length s - 4) else s in 81 + (* Strip trailing .git *) 82 + let s = if String.ends_with ~suffix:".git" s then 83 + String.sub s 0 (String.length s - 4) else s in 84 + (* Strip trailing slash *) 85 + let s = if String.ends_with ~suffix:"/" s then 86 + String.sub s 0 (String.length s - 1) else s in 87 + (* Normalize github URLs: git@github.com:x/y -> https://github.com/x/y *) 88 + let s = if String.starts_with ~prefix:"git@github.com:" s then 89 + "https://github.com/" ^ String.sub s 15 (String.length s - 15) else s in 90 + String.lowercase_ascii s 91 + 92 + (* Group solved packages by their dev-repo *) 93 + type package_group = { 94 + canonical_name : string; (* First package name, used as vendor name *) 95 + dev_repo : string; (* Original dev-repo URL *) 96 + packages : string list; (* All package names in this group *) 97 + } 98 + 99 + let group_packages_by_dev_repo ~config (pkgs : OpamPackage.t list) : package_group list = 100 + let repos = config.Unpac.Config.opam.repositories in 101 + (* Build a map from normalized dev-repo to package info *) 102 + let groups = Hashtbl.create 16 in 103 + List.iter (fun pkg -> 104 + let name = OpamPackage.Name.to_string (OpamPackage.name pkg) in 105 + let version = OpamPackage.Version.to_string (OpamPackage.version pkg) in 106 + match Unpac_opam.Repo.find_package ~repos ~name ~version () with 107 + | None -> () (* Skip packages not found *) 108 + | Some result -> 109 + match result.metadata.dev_repo with 110 + | None -> () (* Skip packages without dev-repo *) 111 + | Some dev_repo -> 112 + let key = normalize_dev_repo dev_repo in 113 + match Hashtbl.find_opt groups key with 114 + | None -> 115 + Hashtbl.add groups key (dev_repo, [name]) 116 + | Some (orig_url, names) -> 117 + Hashtbl.replace groups key (orig_url, name :: names) 118 + ) pkgs; 119 + (* Convert to list of groups *) 120 + Hashtbl.fold (fun _key (dev_repo, names) acc -> 121 + let names = List.rev names in (* Preserve order *) 122 + let canonical_name = List.hd names in 123 + { canonical_name; dev_repo; packages = names } :: acc 124 + ) groups [] 125 + |> List.sort (fun a b -> String.compare a.canonical_name b.canonical_name) 126 + 127 + (* Helper to resolve vendor cache *) 128 + let resolve_cache ~proc_mgr ~fs ~config ~cli_cache = 129 + match Unpac.Config.resolve_vendor_cache ?cli_override:cli_cache config with 130 + | None -> None 131 + | Some path -> 132 + Some (Unpac.Vendor_cache.init ~proc_mgr ~fs ~path ()) 133 + 134 + (* Init command *) 135 + let init_cmd = 136 + let doc = "Initialize a new unpac workspace." in 137 + let man = [ 138 + `S Manpage.s_description; 139 + `P "Creates a new unpac workspace with the standard directory structure:"; 140 + `Pre " <path>/ 141 + git/ # Bare git repository (all branches stored here) 142 + main/ # Main worktree (unpac.toml config lives here) 143 + vendor/ # Vendor worktrees (created as needed) 144 + opam/ # Opam package worktrees 145 + git/ # Git repository worktrees 146 + project/ # Project worktrees"; 147 + `P "The workspace uses git worktrees to maintain isolated views of \ 148 + vendored dependencies. Each vendored item has three branches:"; 149 + `I ("upstream/*", "Tracks original repository state"); 150 + `I ("vendor/*", "Clean snapshot for merging"); 151 + `I ("patches/*", "Local modifications"); 152 + `S Manpage.s_examples; 153 + `P "Create a new workspace:"; 154 + `Pre " unpac init my-project 155 + cd my-project"; 156 + `S "SEE ALSO"; 157 + `P "unpac-project(1), unpac-opam(1), unpac-git(1)"; 158 + ] in 159 + let path_arg = 160 + let doc = "Path for the new workspace. Will create the directory if it doesn't exist." in 161 + Arg.(required & pos 0 (some string) None & info [] ~docv:"PATH" ~doc) 162 + in 163 + let run () path = 164 + Eio_main.run @@ fun env -> 165 + let fs = Eio.Stdenv.fs env in 166 + let proc_mgr = Eio.Stdenv.process_mgr env in 167 + let _root = Unpac.Init.init ~proc_mgr ~fs path in 168 + Format.printf "Initialized unpac workspace at %s@." path; 169 + Format.printf "@.Next steps:@."; 170 + Format.printf " cd %s@." path; 171 + Format.printf " unpac opam repo add <name> <path> # configure opam repository@."; 172 + Format.printf " unpac project new <name> # create a project@." 173 + in 174 + let info = Cmd.info "init" ~doc ~man in 175 + Cmd.v info Term.(const run $ logging_term $ path_arg) 176 + 177 + (* Project new command *) 178 + let project_new_cmd = 179 + let doc = "Create a new project branch." in 180 + let name_arg = 181 + let doc = "Name of the project." in 182 + Arg.(required & pos 0 (some string) None & info [] ~docv:"NAME" ~doc) 183 + in 184 + let run () name = 185 + with_root @@ fun ~env:_ ~fs:_ ~proc_mgr ~root -> 186 + with_audit ~proc_mgr ~root ~operation_type:Unpac.Audit.Project_new ~args:[name] @@ fun _ctx -> 187 + let _path = Unpac.Init.create_project ~proc_mgr root name in 188 + Format.printf "Created project %s@." name; 189 + Format.printf "@.Next steps:@."; 190 + Format.printf " unpac opam add <package> # vendor a package@."; 191 + Format.printf " unpac opam merge <package> %s # merge package into project@." name 192 + in 193 + let info = Cmd.info "new" ~doc in 194 + Cmd.v info Term.(const run $ logging_term $ name_arg) 195 + 196 + (* Project list command *) 197 + let project_list_cmd = 198 + let doc = "List projects." in 199 + let run () = 200 + with_root @@ fun ~env:_ ~fs:_ ~proc_mgr ~root -> 201 + let projects = Unpac.Worktree.list_projects ~proc_mgr root in 202 + List.iter (Format.printf "%s@.") projects 203 + in 204 + let info = Cmd.info "list" ~doc in 205 + Cmd.v info Term.(const run $ logging_term) 206 + 207 + (* Project promote command *) 208 + let project_promote_cmd = 209 + let doc = "Promote a local project to a vendored library." in 210 + let man = [ 211 + `S Manpage.s_description; 212 + `P "Converts a locally-developed project into the vendor branch structure \ 213 + used by unpac for dependency management. This allows the project to be:"; 214 + `I ("•", "Merged into other projects as a dependency"); 215 + `I ("•", "Pushed to an independent git repository"); 216 + `I ("•", "Updated independently of the workspace"); 217 + `S "FILTERING"; 218 + `P "The promotion process filters the project history to remove \ 219 + vendored dependencies (the vendor/ directory), producing a clean \ 220 + library that can be independently distributed."; 221 + `P "Specifically, it:"; 222 + `I ("1.", "Extracts project/<name> branch history"); 223 + `I ("2.", "Filters out vendor/ directory (all backends' vendored code)"); 224 + `I ("3.", "Creates clean <backend>/upstream/<name> with filtered history"); 225 + `I ("4.", "Creates <backend>/vendor/<name> with path prefix applied"); 226 + `I ("5.", "Creates <backend>/patches/<name> for local modifications"); 227 + `P "The original project/<name> branch is preserved unchanged."; 228 + `S "BACKENDS"; 229 + `P "The --backend flag determines how the library is structured:"; 230 + `I ("opam", "Use for OCaml libraries built with dune. \ 231 + Creates vendor/opam/<name>/ structure. \ 232 + Merge with: unpac opam merge <name> <project>"); 233 + `I ("git", "Use for reference code, C libraries, or non-OCaml sources. \ 234 + Creates vendor/git/<name>/ structure. \ 235 + Merge with: unpac git merge <name> <project>"); 236 + `S Manpage.s_examples; 237 + `P "Promote a completed OCaml library:"; 238 + `Pre " unpac project promote brotli --backend opam"; 239 + `P "Promote with a different vendor name:"; 240 + `Pre " unpac project promote mybrotli --backend opam --name brotli"; 241 + `P "Promote a reference implementation:"; 242 + `Pre " unpac project promote zstd-reference --backend git"; 243 + `P "Full workflow from development to distribution:"; 244 + `Pre " unpac project new mybrotli 245 + # ... develop the library ... 246 + unpac project promote mybrotli --backend opam 247 + unpac project set-remote mybrotli git@github.com:me/mybrotli.git 248 + unpac opam merge mybrotli other-project"; 249 + `S "SEE ALSO"; 250 + `P "unpac-project-new(1), unpac-opam-merge(1), unpac-git-merge(1)"; 251 + ] in 252 + let name_arg = 253 + let doc = "Name of the project to promote." in 254 + Arg.(required & pos 0 (some string) None & info [] ~docv:"PROJECT" ~doc) 255 + in 256 + let backend_arg = 257 + let doc = "Vendor backend type: opam or git. \ 258 + Determines branch structure and merge semantics." in 259 + Arg.(required & opt (some string) None & info ["backend"; "b"] ~docv:"BACKEND" ~doc) 260 + in 261 + let vendor_name_arg = 262 + let doc = "Override the vendor library name (defaults to project name)." in 263 + Arg.(value & opt (some string) None & info ["name"; "n"] ~docv:"NAME" ~doc) 264 + in 265 + let run () project backend_str vendor_name = 266 + with_root @@ fun ~env:_ ~fs:_ ~proc_mgr ~root -> 267 + (* Parse backend *) 268 + let backend = match Unpac.Promote.backend_of_string backend_str with 269 + | Some b -> b 270 + | None -> 271 + Format.eprintf "Error: Unknown backend '%s'. Use 'opam' or 'git'.@." backend_str; 272 + exit 1 273 + in 274 + with_audit ~proc_mgr ~root ~operation_type:Unpac.Audit.Project_promote 275 + ~args:( 276 + [project; "--backend"; backend_str] @ 277 + (match vendor_name with Some n -> ["--name"; n] | None -> []) 278 + ) @@ fun _ctx -> 279 + match Unpac.Promote.promote ~proc_mgr ~root ~project ~backend ~vendor_name with 280 + | Unpac.Promote.Promoted { name; backend; original_commits; filtered_commits } -> 281 + Format.printf "Promoted %s as %s vendor@." project (Unpac.Promote.backend_to_string backend); 282 + Format.printf "@.Filtered history: %d → %d commits (removed vendor/ directory)@." 283 + original_commits filtered_commits; 284 + Format.printf "@.Created branches:@."; 285 + Format.printf " %s@." (Unpac.Promote.upstream_branch backend name); 286 + Format.printf " %s@." (Unpac.Promote.vendor_branch backend name); 287 + Format.printf " %s@." (Unpac.Promote.patches_branch backend name); 288 + Format.printf "@.%s can now be merged into other projects:@." name; 289 + (match backend with 290 + | Unpac.Promote.Opam -> 291 + Format.printf " unpac opam merge %s <project>@." name 292 + | Unpac.Promote.Git -> 293 + Format.printf " unpac git merge %s <project>@." name) 294 + | Unpac.Promote.Already_promoted name -> 295 + Format.eprintf "Error: %s is already promoted.@." name; 296 + exit 1 297 + | Unpac.Promote.Project_not_found name -> 298 + Format.eprintf "Error: Project '%s' not found.@." name; 299 + exit 1 300 + | Unpac.Promote.Failed { name; error } -> 301 + Format.eprintf "Error promoting %s: %s@." name error; 302 + exit 1 303 + in 304 + let info = Cmd.info "promote" ~doc ~man in 305 + Cmd.v info Term.(const run $ logging_term $ name_arg $ backend_arg $ vendor_name_arg) 306 + 307 + (* Project set-remote command *) 308 + let project_set_remote_cmd = 309 + let doc = "Set the remote URL for a project." in 310 + let man = [ 311 + `S Manpage.s_description; 312 + `P "Configures a git remote for pushing a project to an independent repository. \ 313 + This allows projects developed in the workspace to be published separately."; 314 + `P "The remote is named 'origin-<project>' and is stored in the bare git \ 315 + repository. Use 'unpac project push' to push to this remote."; 316 + `S Manpage.s_examples; 317 + `P "Set remote for a project:"; 318 + `Pre " unpac project set-remote brotli git@github.com:user/ocaml-brotli.git"; 319 + `P "Full workflow:"; 320 + `Pre " unpac project new mylib 321 + # ... develop the library ... 322 + unpac project promote mylib --backend opam 323 + unpac project set-remote mylib git@github.com:me/mylib.git 324 + unpac project push mylib"; 325 + `S "SEE ALSO"; 326 + `P "unpac-project-push(1), unpac-project-promote(1)"; 327 + ] in 328 + let name_arg = 329 + let doc = "Name of the project." in 330 + Arg.(required & pos 0 (some string) None & info [] ~docv:"PROJECT" ~doc) 331 + in 332 + let url_arg = 333 + let doc = "Remote URL (git SSH or HTTPS URL)." in 334 + Arg.(required & pos 1 (some string) None & info [] ~docv:"URL" ~doc) 335 + in 336 + let run () project url = 337 + with_root @@ fun ~env:_ ~fs:_ ~proc_mgr ~root -> 338 + with_audit ~proc_mgr ~root ~operation_type:Unpac.Audit.Project_set_remote 339 + ~args:[project; url] @@ fun _ctx -> 340 + match Unpac.Promote.set_remote ~proc_mgr ~root ~project ~url with 341 + | Unpac.Promote.Remote_set { project; url; created } -> 342 + if created then 343 + Format.printf "Created remote for %s: %s@." project url 344 + else 345 + Format.printf "Updated remote for %s: %s@." project url; 346 + Format.printf "@.Push with: unpac project push %s@." project 347 + | Unpac.Promote.Project_not_found name -> 348 + Format.eprintf "Error: Project '%s' not found.@." name; 349 + exit 1 350 + | Unpac.Promote.Set_remote_failed { project; error } -> 351 + Format.eprintf "Error setting remote for %s: %s@." project error; 352 + exit 1 353 + in 354 + let info = Cmd.info "set-remote" ~doc ~man in 355 + Cmd.v info Term.(const run $ logging_term $ name_arg $ url_arg) 356 + 357 + (* Project push command *) 358 + let project_push_cmd = 359 + let doc = "Push a project to its configured remote." in 360 + let man = [ 361 + `S Manpage.s_description; 362 + `P "Pushes a project branch to the remote configured via 'set-remote'. \ 363 + This allows publishing projects developed in the workspace to \ 364 + independent repositories."; 365 + `S Manpage.s_examples; 366 + `P "Push a project:"; 367 + `Pre " unpac project push brotli"; 368 + `S "SEE ALSO"; 369 + `P "unpac-project-set-remote(1)"; 370 + ] in 371 + let name_arg = 372 + let doc = "Name of the project to push." in 373 + Arg.(required & pos 0 (some string) None & info [] ~docv:"PROJECT" ~doc) 374 + in 375 + let run () project = 376 + with_root @@ fun ~env:_ ~fs:_ ~proc_mgr ~root -> 377 + match Unpac.Promote.push ~proc_mgr ~root ~project with 378 + | Unpac.Promote.Pushed { project; branch; remote } -> 379 + Format.printf "Pushed %s (%s) to %s@." project branch remote 380 + | Unpac.Promote.No_remote project -> 381 + Format.eprintf "Error: No remote configured for %s.@." project; 382 + Format.eprintf "Set one with: unpac project set-remote %s <url>@." project; 383 + exit 1 384 + | Unpac.Promote.Project_not_found name -> 385 + Format.eprintf "Error: Project '%s' not found.@." name; 386 + exit 1 387 + | Unpac.Promote.Push_failed { project; error } -> 388 + Format.eprintf "Error pushing %s: %s@." project error; 389 + exit 1 390 + in 391 + let info = Cmd.info "push" ~doc ~man in 392 + Cmd.v info Term.(const run $ logging_term $ name_arg) 393 + 394 + (* Project info command *) 395 + let project_info_cmd = 396 + let doc = "Show detailed information about a project." in 397 + let man = [ 398 + `S Manpage.s_description; 399 + `P "Displays information about a project including:"; 400 + `I ("Origin", "Whether the project was created locally or vendored"); 401 + `I ("Remote", "Configured push URL (if any)"); 402 + `I ("Promoted", "Whether promoted to vendor library and which backend"); 403 + `S Manpage.s_examples; 404 + `Pre " unpac project info brotli"; 405 + ] in 406 + let name_arg = 407 + let doc = "Name of the project." in 408 + Arg.(required & pos 0 (some string) None & info [] ~docv:"PROJECT" ~doc) 409 + in 410 + let run () project = 411 + with_root @@ fun ~env:_ ~fs:_ ~proc_mgr ~root -> 412 + match Unpac.Promote.get_info ~proc_mgr ~root ~project with 413 + | None -> 414 + Format.eprintf "Error: Project '%s' not found.@." project; 415 + exit 1 416 + | Some info -> 417 + Format.printf "Project: %s@." info.name; 418 + Format.printf "Origin: %s@." 419 + (match info.origin with `Local -> "local" | `Vendored -> "vendored"); 420 + Format.printf "Remote: %s@." 421 + (match info.remote with Some url -> url | None -> "(none)"); 422 + Format.printf "Promoted: %s@." 423 + (match info.promoted_as with 424 + | Some (backend, name) -> 425 + Printf.sprintf "%s vendor (%s)" (Unpac.Promote.backend_to_string backend) name 426 + | None -> "no") 427 + in 428 + let info = Cmd.info "info" ~doc ~man in 429 + Cmd.v info Term.(const run $ logging_term $ name_arg) 430 + 431 + (* Export command - unvendor a package for upstream push *) 432 + let export_cmd = 433 + let doc = "Export a vendored package for pushing to upstream." in 434 + let man = [ 435 + `S Manpage.s_description; 436 + `P "Creates an export branch from a vendored package with files moved \ 437 + from vendor/<backend>/<name>/ back to the repository root. This is \ 438 + the inverse of vendoring, producing a branch suitable for pushing \ 439 + to an upstream git repository."; 440 + `P "Use --from-patches to include local modifications in the export. \ 441 + Without this flag, exports from the vendor/* branch (pristine upstream)."; 442 + `S "WORKFLOW"; 443 + `P "The typical export workflow is:"; 444 + `Pre " # Export with local patches 445 + unpac export brotli --backend opam --from-patches 446 + 447 + # Set upstream remote 448 + unpac export-set-remote brotli git@github.com:me/brotli.git 449 + 450 + # Push to upstream 451 + unpac export-push brotli --backend opam"; 452 + `S Manpage.s_examples; 453 + `P "Export an opam package (pristine upstream):"; 454 + `Pre " unpac export brotli --backend opam"; 455 + `P "Export with local patches included:"; 456 + `Pre " unpac export brotli --backend opam --from-patches"; 457 + `P "Export a git-vendored package:"; 458 + `Pre " unpac export zstd --backend git"; 459 + ] in 460 + let name_arg = 461 + let doc = "Name of the vendored package to export." in 462 + Arg.(required & pos 0 (some string) None & info [] ~docv:"NAME" ~doc) 463 + in 464 + let backend_arg = 465 + let doc = "Vendor backend type: opam or git." in 466 + Arg.(required & opt (some string) None & info ["backend"; "b"] ~docv:"BACKEND" ~doc) 467 + in 468 + let from_patches_arg = 469 + let doc = "Export from patches/* branch (includes local modifications) \ 470 + instead of vendor/* branch (pristine upstream)." in 471 + Arg.(value & flag & info ["from-patches"; "p"] ~doc) 472 + in 473 + let run () name backend_str from_patches = 474 + with_root @@ fun ~env:_ ~fs:_ ~proc_mgr ~root -> 475 + let backend = match Unpac.Promote.backend_of_string backend_str with 476 + | Some b -> b 477 + | None -> 478 + Format.eprintf "Error: Unknown backend '%s'. Use 'opam' or 'git'.@." backend_str; 479 + exit 1 480 + in 481 + match Unpac.Promote.export ~proc_mgr ~root ~name ~backend ~from_patches with 482 + | Unpac.Promote.Exported { name; backend; source_branch; export_branch; commits } -> 483 + Format.printf "Exported %s (%s backend)@." name (Unpac.Promote.backend_to_string backend); 484 + Format.printf " Source: %s@." source_branch; 485 + Format.printf " Export: %s (%d commits)@." export_branch commits; 486 + Format.printf "@.Files moved from vendor/%s/%s/ to repository root.@." 487 + (Unpac.Promote.backend_to_string backend) name; 488 + Format.printf "@.Next steps:@."; 489 + Format.printf " unpac export-set-remote %s <url>@." name; 490 + Format.printf " unpac export-push %s --backend %s@." name backend_str 491 + | Unpac.Promote.Not_vendored name -> 492 + Format.eprintf "Error: No vendor branch found for '%s'.@." name; 493 + Format.eprintf "Check available packages with: unpac opam list / unpac git list@."; 494 + exit 1 495 + | Unpac.Promote.Already_exported name -> 496 + Format.eprintf "Error: Export branch already exists for '%s'.@." name; 497 + Format.eprintf "Delete it first with: git branch -D %s/export/%s@." 498 + backend_str name; 499 + exit 1 500 + | Unpac.Promote.Export_failed { name; error } -> 501 + Format.eprintf "Error exporting %s: %s@." name error; 502 + exit 1 503 + in 504 + let info = Cmd.info "export" ~doc ~man in 505 + Cmd.v info Term.(const run $ logging_term $ name_arg $ backend_arg $ from_patches_arg) 506 + 507 + (* Export set-remote command *) 508 + let export_set_remote_cmd = 509 + let doc = "Set the remote URL for pushing exports." in 510 + let man = [ 511 + `S Manpage.s_description; 512 + `P "Configures a git remote for pushing exported packages to an upstream \ 513 + repository. The remote is named 'export-<name>'."; 514 + `S Manpage.s_examples; 515 + `Pre " unpac export-set-remote brotli git@github.com:me/brotli.git"; 516 + ] in 517 + let name_arg = 518 + let doc = "Name of the package." in 519 + Arg.(required & pos 0 (some string) None & info [] ~docv:"NAME" ~doc) 520 + in 521 + let url_arg = 522 + let doc = "Remote URL (git SSH or HTTPS URL)." in 523 + Arg.(required & pos 1 (some string) None & info [] ~docv:"URL" ~doc) 524 + in 525 + let run () name url = 526 + with_root @@ fun ~env:_ ~fs:_ ~proc_mgr ~root -> 527 + match Unpac.Promote.set_export_remote ~proc_mgr ~root ~name ~url with 528 + | `Created -> 529 + Format.printf "Created export remote for %s: %s@." name url; 530 + Format.printf "@.Push with: unpac export-push %s --backend <backend>@." name 531 + | `Updated -> 532 + Format.printf "Updated export remote for %s: %s@." name url 533 + | `Existed -> 534 + Format.printf "Export remote already set for %s: %s@." name url 535 + in 536 + let info = Cmd.info "export-set-remote" ~doc ~man in 537 + Cmd.v info Term.(const run $ logging_term $ name_arg $ url_arg) 538 + 539 + (* Export push command *) 540 + let export_push_cmd = 541 + let doc = "Push an export branch to its configured remote." in 542 + let man = [ 543 + `S Manpage.s_description; 544 + `P "Pushes an export branch to the remote configured via 'export-set-remote'. \ 545 + The export branch is pushed as 'main' on the remote repository."; 546 + `S Manpage.s_examples; 547 + `Pre " unpac export-push brotli --backend opam"; 548 + ] in 549 + let name_arg = 550 + let doc = "Name of the package to push." in 551 + Arg.(required & pos 0 (some string) None & info [] ~docv:"NAME" ~doc) 552 + in 553 + let backend_arg = 554 + let doc = "Vendor backend type: opam or git." in 555 + Arg.(required & opt (some string) None & info ["backend"; "b"] ~docv:"BACKEND" ~doc) 556 + in 557 + let run () name backend_str = 558 + with_root @@ fun ~env:_ ~fs:_ ~proc_mgr ~root -> 559 + let backend = match Unpac.Promote.backend_of_string backend_str with 560 + | Some b -> b 561 + | None -> 562 + Format.eprintf "Error: Unknown backend '%s'. Use 'opam' or 'git'.@." backend_str; 563 + exit 1 564 + in 565 + match Unpac.Promote.push_export ~proc_mgr ~root ~name ~backend with 566 + | Unpac.Promote.Export_pushed { name = _; backend; remote; branch; commits } -> 567 + Format.printf "Pushed %s (%d commits) to %s@." branch commits remote; 568 + Format.printf "Backend: %s@." (Unpac.Promote.backend_to_string backend); 569 + Format.printf "@.Export pushed as 'main' on remote.@." 570 + | Unpac.Promote.Export_not_found name -> 571 + Format.eprintf "Error: No export branch found for '%s'.@." name; 572 + Format.eprintf "Export first with: unpac export %s --backend %s@." name backend_str; 573 + exit 1 574 + | Unpac.Promote.No_export_remote name -> 575 + Format.eprintf "Error: No export remote configured for '%s'.@." name; 576 + Format.eprintf "Set one with: unpac export-set-remote %s <url>@." name; 577 + exit 1 578 + | Unpac.Promote.Export_push_failed { name; error } -> 579 + Format.eprintf "Error pushing export %s: %s@." name error; 580 + exit 1 581 + in 582 + let info = Cmd.info "export-push" ~doc ~man in 583 + Cmd.v info Term.(const run $ logging_term $ name_arg $ backend_arg) 584 + 585 + (* Export list command *) 586 + let export_list_cmd = 587 + let doc = "List all exported packages." in 588 + let run () = 589 + with_root @@ fun ~env:_ ~fs:_ ~proc_mgr ~root -> 590 + let exports = Unpac.Promote.list_exports ~proc_mgr ~root in 591 + if exports = [] then 592 + Format.printf "No exported packages.@." 593 + else begin 594 + Format.printf "Exported packages:@."; 595 + List.iter (fun (backend, name) -> 596 + let remote = Unpac.Promote.get_export_remote ~proc_mgr ~root ~name in 597 + Format.printf " %s (%s)%s@." name 598 + (Unpac.Promote.backend_to_string backend) 599 + (match remote with Some url -> " → " ^ url | None -> "") 600 + ) exports 601 + end 602 + in 603 + let info = Cmd.info "export-list" ~doc in 604 + Cmd.v info Term.(const run $ logging_term) 605 + 606 + (* Project command group *) 607 + let project_cmd = 608 + let doc = "Project management commands." in 609 + let man = [ 610 + `S Manpage.s_description; 611 + `P "Projects are isolated branches where you merge vendored dependencies \ 612 + and build your application. Each project is a git worktree at \ 613 + project/<name>/ with its own branch project/<name>."; 614 + `P "Workflow:"; 615 + `Pre " 1. Create a project: unpac project new myapp 616 + 2. Vendor dependencies: unpac opam add foo 617 + 3. Merge into project: unpac opam merge foo myapp 618 + 4. Build in project: cd project/myapp && dune build"; 619 + `P "Multiple projects can share the same vendored dependencies - \ 620 + each project merges the patches branch independently."; 621 + `S "PROMOTING PROJECTS"; 622 + `P "Once a project is complete, you can promote it to a vendored library:"; 623 + `Pre " unpac project promote mylib --backend opam"; 624 + `P "This creates clean vendor branches (filtering out vendored deps) so \ 625 + the library can be merged into other projects."; 626 + `S "PUBLISHING PROJECTS"; 627 + `P "Projects can be pushed to independent repositories:"; 628 + `Pre " unpac project set-remote mylib git@github.com:me/mylib.git 629 + unpac project push mylib"; 630 + `S "EXPORTING AS STANDALONE LIBRARY"; 631 + `P "To publish a promoted project as a standalone opam library:"; 632 + `Pre " # 1. Promote project to opam vendor 633 + unpac project promote mylib --backend opam 634 + 635 + # 2. Export with your patches (files moved to root) 636 + unpac export mylib --backend opam --from-patches 637 + 638 + # 3. Configure remotes and push 639 + unpac export-set-remote mylib git@github.com:me/mylib.git 640 + unpac export-push mylib --backend opam 641 + 642 + # 4. Configure upstream for pulling updates 643 + unpac opam set-upstream mylib git@github.com:me/mylib.git"; 644 + `P "The export branch has files at repository root (not in vendor/), \ 645 + suitable for a standalone git repository. The upstream remote \ 646 + allows 'unpac opam update' to fetch changes back."; 647 + ] in 648 + let info = Cmd.info "project" ~doc ~man in 649 + Cmd.group info [ 650 + project_new_cmd; 651 + project_list_cmd; 652 + project_info_cmd; 653 + project_promote_cmd; 654 + project_set_remote_cmd; 655 + project_push_cmd; 656 + ] 657 + 658 + (* Opam repo add command *) 659 + let opam_repo_add_cmd = 660 + let doc = "Add an opam repository for package lookups." in 661 + let name_arg = 662 + let doc = "Name for the repository." in 663 + Arg.(required & pos 0 (some string) None & info [] ~docv:"NAME" ~doc) 664 + in 665 + let path_arg = 666 + let doc = "Path to the repository (local directory)." in 667 + Arg.(required & pos 1 (some string) None & info [] ~docv:"PATH" ~doc) 668 + in 669 + let run () name path = 670 + with_root @@ fun ~env:_ ~fs:_ ~proc_mgr ~root -> 671 + let config = load_config root in 672 + (* Check if already exists *) 673 + if Unpac.Config.find_repo config name <> None then begin 674 + Format.eprintf "Repository '%s' already exists@." name; 675 + exit 1 676 + end; 677 + (* Resolve to absolute path *) 678 + let abs_path = 679 + if Filename.is_relative path then 680 + Filename.concat (Sys.getcwd ()) path 681 + else path 682 + in 683 + (* Check path exists *) 684 + if not (Sys.file_exists abs_path && Sys.is_directory abs_path) then begin 685 + Format.eprintf "Error: '%s' is not a valid directory@." abs_path; 686 + exit 1 687 + end; 688 + let repo : Unpac.Config.repo_config = { 689 + repo_name = name; 690 + source = Local abs_path; 691 + } in 692 + let config' = Unpac.Config.add_repo config repo in 693 + save_config ~proc_mgr root config' (Printf.sprintf "Add repository %s" name); 694 + Format.printf "Added repository %s at %s@." name abs_path; 695 + Format.printf "@.Next: unpac opam add <package> # vendor a package by name@." 696 + in 697 + let info = Cmd.info "add" ~doc in 698 + Cmd.v info Term.(const run $ logging_term $ name_arg $ path_arg) 699 + 700 + (* Opam repo list command *) 701 + let opam_repo_list_cmd = 702 + let doc = "List configured opam repositories." in 703 + let run () = 704 + with_root @@ fun ~env:_ ~fs:_ ~proc_mgr:_ ~root -> 705 + let config = load_config root in 706 + if config.opam.repositories = [] then begin 707 + Format.printf "No repositories configured@."; 708 + Format.printf "@.Hint: unpac opam repo add <name> <path>@." 709 + end else 710 + List.iter (fun (r : Unpac.Config.repo_config) -> 711 + let path = match r.source with 712 + | Local p -> p 713 + | Remote u -> u 714 + in 715 + Format.printf "%s: %s@." r.repo_name path 716 + ) config.opam.repositories 717 + in 718 + let info = Cmd.info "list" ~doc in 719 + Cmd.v info Term.(const run $ logging_term) 720 + 721 + (* Opam repo remove command *) 722 + let opam_repo_remove_cmd = 723 + let doc = "Remove an opam repository." in 724 + let name_arg = 725 + let doc = "Name of the repository to remove." in 726 + Arg.(required & pos 0 (some string) None & info [] ~docv:"NAME" ~doc) 727 + in 728 + let run () name = 729 + with_root @@ fun ~env:_ ~fs:_ ~proc_mgr ~root -> 730 + let config = load_config root in 731 + if Unpac.Config.find_repo config name = None then begin 732 + Format.eprintf "Repository '%s' not found@." name; 733 + exit 1 734 + end; 735 + let config' = Unpac.Config.remove_repo config name in 736 + save_config ~proc_mgr root config' (Printf.sprintf "Remove repository %s" name); 737 + Format.printf "Removed repository %s@." name 738 + in 739 + let info = Cmd.info "remove" ~doc in 740 + Cmd.v info Term.(const run $ logging_term $ name_arg) 741 + 742 + (* Opam repo command group *) 743 + let opam_repo_cmd = 744 + let doc = "Manage opam repositories." in 745 + let info = Cmd.info "repo" ~doc in 746 + Cmd.group info [opam_repo_add_cmd; opam_repo_list_cmd; opam_repo_remove_cmd] 747 + 748 + (* Opam config compiler command *) 749 + let opam_config_compiler_cmd = 750 + let doc = "Set or show the OCaml compiler version for dependency solving." in 751 + let version_arg = 752 + let doc = "OCaml version to use (e.g., 5.2.0)." in 753 + Arg.(value & pos 0 (some string) None & info [] ~docv:"VERSION" ~doc) 754 + in 755 + let run () version_opt = 756 + with_root @@ fun ~env:_ ~fs:_ ~proc_mgr ~root -> 757 + let config = load_config root in 758 + match version_opt with 759 + | None -> 760 + (* Show current compiler *) 761 + (match Unpac.Config.get_compiler config with 762 + | Some v -> Format.printf "Compiler: %s@." v 763 + | None -> Format.printf "No compiler configured@.@.Hint: unpac opam config compiler 5.2.0@.") 764 + | Some version -> 765 + (* Set compiler *) 766 + let config' = Unpac.Config.set_compiler config version in 767 + save_config ~proc_mgr root config' (Printf.sprintf "Set compiler to %s" version); 768 + Format.printf "Compiler set to %s@." version 769 + in 770 + let info = Cmd.info "compiler" ~doc in 771 + Cmd.v info Term.(const run $ logging_term $ version_arg) 772 + 773 + (* Opam config command group *) 774 + let opam_config_cmd = 775 + let doc = "Configure opam settings." in 776 + let info = Cmd.info "config" ~doc in 777 + Cmd.group info [opam_config_compiler_cmd] 778 + 779 + (* Opam add command - enhanced to support package names and dependency solving *) 780 + let opam_add_cmd = 781 + let doc = "Vendor an opam package (by name or git URL)." in 782 + let pkg_arg = 783 + let doc = "Package name or git URL." in 784 + Arg.(required & pos 0 (some string) None & info [] ~docv:"PACKAGE" ~doc) 785 + in 786 + let name_arg = 787 + let doc = "Override package name." in 788 + Arg.(value & opt (some string) None & info ["n"; "name"] ~docv:"NAME" ~doc) 789 + in 790 + let version_arg = 791 + let doc = "Package version (when adding by name)." in 792 + Arg.(value & opt (some string) None & info ["V"; "pkg-version"] ~docv:"VERSION" ~doc) 793 + in 794 + let branch_arg = 795 + let doc = "Git branch to vendor (defaults to remote default)." in 796 + Arg.(value & opt (some string) None & info ["b"; "branch"] ~docv:"BRANCH" ~doc) 797 + in 798 + let solve_arg = 799 + let doc = "Solve dependencies and vendor all required packages." in 800 + Arg.(value & flag & info ["solve"] ~doc) 801 + in 802 + let cache_arg = 803 + let doc = "Path to vendor cache (overrides config and UNPAC_VENDOR_CACHE env var)." in 804 + Arg.(value & opt (some string) None & info ["cache"] ~docv:"PATH" ~doc) 805 + in 806 + let run () pkg name_opt version_opt branch_opt solve cli_cache = 807 + with_root @@ fun ~env:_ ~fs ~proc_mgr ~root -> 808 + let config = load_config root in 809 + let cache = resolve_cache ~proc_mgr ~fs ~config ~cli_cache in 810 + 811 + (* Wrap entire operation with audit logging *) 812 + let args = [pkg] @ (match name_opt with Some n -> ["--name"; n] | None -> []) 813 + @ (if solve then ["--solve"] else []) in 814 + with_audit ~proc_mgr ~root ~operation_type:Unpac.Audit.Opam_add ~args @@ fun _ctx -> 815 + 816 + if solve then begin 817 + (* Solve dependencies and add all packages *) 818 + let repos = config.opam.repositories in 819 + if repos = [] then begin 820 + Format.eprintf "No repositories configured. Add one with: unpac opam repo add <name> <path>@."; 821 + exit 1 822 + end; 823 + let ocaml_version = match Unpac.Config.get_compiler config with 824 + | Some v -> v 825 + | None -> 826 + Format.eprintf "No compiler version configured.@."; 827 + Format.eprintf "Set one with: unpac opam config compiler 5.2.0@."; 828 + exit 1 829 + in 830 + (* Get repo paths *) 831 + let repo_paths = List.map (fun (r : Unpac.Config.repo_config) -> 832 + match r.source with 833 + | Unpac.Config.Local p -> p 834 + | Unpac.Config.Remote u -> u (* TODO: handle remote repos *) 835 + ) repos in 836 + Format.printf "Solving dependencies for %s...@." pkg; 837 + match Unpac_opam.Solver.solve ~repos:repo_paths ~ocaml_version ~packages:[pkg] with 838 + | Error msg -> 839 + Format.eprintf "Dependency solving failed:@.%s@." msg; 840 + exit 1 841 + | Ok result -> 842 + let pkgs = result.packages in 843 + Format.printf "Solution found: %d packages@." (List.length pkgs); 844 + List.iter (fun p -> 845 + Format.printf " %s.%s@." 846 + (OpamPackage.Name.to_string (OpamPackage.name p)) 847 + (OpamPackage.Version.to_string (OpamPackage.version p)) 848 + ) pkgs; 849 + 850 + (* Group packages by dev-repo to avoid duplicating sources *) 851 + let groups = group_packages_by_dev_repo ~config pkgs in 852 + Format.printf "@.Grouped into %d unique repositories:@." (List.length groups); 853 + List.iter (fun (g : package_group) -> 854 + if List.length g.packages > 1 then 855 + Format.printf " %s (%d packages: %s)@." 856 + g.canonical_name 857 + (List.length g.packages) 858 + (String.concat ", " g.packages) 859 + else 860 + Format.printf " %s@." g.canonical_name 861 + ) groups; 862 + 863 + Format.printf "@.Vendoring repositories...@."; 864 + let added = ref 0 in 865 + let failed = ref 0 in 866 + let config = ref config in 867 + List.iter (fun (g : package_group) -> 868 + (* Use canonical name as vendor name, dev-repo as URL *) 869 + let url = if String.starts_with ~prefix:"git+" g.dev_repo then 870 + String.sub g.dev_repo 4 (String.length g.dev_repo - 4) 871 + else g.dev_repo in 872 + let info : Unpac.Backend.package_info = { 873 + name = g.canonical_name; 874 + url; 875 + branch = None; 876 + } in 877 + match Unpac_opam.Opam.add_package ~proc_mgr ~root ?cache info with 878 + | Unpac.Backend.Added { name = pkg_name; sha } -> 879 + (* Record in config for remote recreation *) 880 + let vendored : Unpac.Config.vendored_package = { 881 + pkg_name; pkg_url = url; pkg_branch = None 882 + } in 883 + config := Unpac.Config.add_vendored_package !config vendored; 884 + Format.printf "Added %s (%s)@." pkg_name (String.sub sha 0 7); 885 + if List.length g.packages > 1 then 886 + Format.printf " Contains: %s@." (String.concat ", " g.packages); 887 + incr added 888 + | Unpac.Backend.Already_exists pkg_name -> 889 + Format.printf "Package %s already vendored@." pkg_name 890 + | Unpac.Backend.Failed { name = pkg_name; error } -> 891 + Format.eprintf "Error adding %s: %s@." pkg_name error; 892 + incr failed 893 + ) groups; 894 + (* Save config with all vendored packages *) 895 + if !added > 0 then 896 + save_config ~proc_mgr root !config "Record vendored packages in config"; 897 + Format.printf "@.Done: %d repositories added, %d failed@." !added !failed; 898 + if !failed > 0 then exit 1 899 + end else begin 900 + (* Single package mode *) 901 + let url, name = 902 + if is_url_or_path pkg then begin 903 + (* It's a URL *) 904 + let n = match name_opt with 905 + | Some n -> n 906 + | None -> 907 + let base = Filename.basename pkg in 908 + if String.ends_with ~suffix:".git" base then 909 + String.sub base 0 (String.length base - 4) 910 + else base 911 + in 912 + (pkg, n) 913 + end else begin 914 + (* It's a package name - look up in repositories *) 915 + let repos = config.opam.repositories in 916 + if repos = [] then begin 917 + Format.eprintf "No repositories configured. Add one with: unpac opam repo add <name> <path>@."; 918 + exit 1 919 + end; 920 + match Unpac_opam.Repo.find_package ~repos ~name:pkg ?version:version_opt () with 921 + | None -> 922 + Format.eprintf "Package '%s' not found in configured repositories@." pkg; 923 + exit 1 924 + | Some result -> 925 + match result.metadata.dev_repo with 926 + | None -> 927 + Format.eprintf "Package '%s' has no dev-repo field@." pkg; 928 + exit 1 929 + | Some dev_repo -> 930 + (* Strip git+ prefix if present (opam dev-repo format) *) 931 + let url = if String.starts_with ~prefix:"git+" dev_repo then 932 + String.sub dev_repo 4 (String.length dev_repo - 4) 933 + else dev_repo in 934 + let n = match name_opt with Some n -> n | None -> pkg in 935 + (url, n) 936 + end 937 + in 938 + 939 + let info : Unpac.Backend.package_info = { 940 + name; 941 + url; 942 + branch = branch_opt; 943 + } in 944 + match Unpac_opam.Opam.add_package ~proc_mgr ~root ?cache info with 945 + | Unpac.Backend.Added { name = pkg_name; sha } -> 946 + (* Record in config for remote recreation on fresh clones *) 947 + let vendored : Unpac.Config.vendored_package = { 948 + pkg_name; pkg_url = url; pkg_branch = branch_opt 949 + } in 950 + let config = Unpac.Config.add_vendored_package config vendored in 951 + save_config ~proc_mgr root config "Record vendored package in config"; 952 + Format.printf "Added %s (%s)@." pkg_name (String.sub sha 0 7); 953 + Format.printf "@.Next steps:@."; 954 + Format.printf " unpac opam edit %s # make local changes@." pkg_name; 955 + Format.printf " unpac opam merge %s <project> # merge into a project@." pkg_name 956 + | Unpac.Backend.Already_exists name -> 957 + Format.printf "Package %s already vendored@." name 958 + | Unpac.Backend.Failed { name; error } -> 959 + Format.eprintf "Error adding %s: %s@." name error; 960 + exit 1 961 + end 962 + in 963 + let info = Cmd.info "add" ~doc in 964 + Cmd.v info Term.(const run $ logging_term $ pkg_arg $ name_arg $ version_arg $ branch_arg $ solve_arg $ cache_arg) 965 + 966 + (* Opam list command *) 967 + let opam_list_cmd = 968 + let doc = "List vendored opam packages." in 969 + let run () = 970 + with_root @@ fun ~env:_ ~fs:_ ~proc_mgr ~root -> 971 + let packages = Unpac_opam.Opam.list_packages ~proc_mgr ~root in 972 + if packages = [] then begin 973 + Format.printf "No packages vendored@."; 974 + Format.printf "@.Hint: unpac opam add <package>@." 975 + end else 976 + List.iter (Format.printf "%s@.") packages 977 + in 978 + let info = Cmd.info "list" ~doc in 979 + Cmd.v info Term.(const run $ logging_term) 980 + 981 + (* Opam edit command *) 982 + let opam_edit_cmd = 983 + let doc = "Open a package's patches worktree for editing. \ 984 + Also creates a vendor worktree for reference." in 985 + let pkg_arg = 986 + let doc = "Package name to edit." in 987 + Arg.(required & pos 0 (some string) None & info [] ~docv:"PACKAGE" ~doc) 988 + in 989 + let run () pkg = 990 + with_root @@ fun ~env:_ ~fs:_ ~proc_mgr ~root -> 991 + (* Check package exists *) 992 + let packages = Unpac_opam.Opam.list_packages ~proc_mgr ~root in 993 + if not (List.mem pkg packages) then begin 994 + Format.eprintf "Package '%s' is not vendored@." pkg; 995 + exit 1 996 + end; 997 + (* Ensure both patches and vendor worktrees exist *) 998 + Unpac.Worktree.ensure ~proc_mgr root (Unpac.Worktree.Opam_patches pkg); 999 + Unpac.Worktree.ensure ~proc_mgr root (Unpac.Worktree.Opam_vendor pkg); 1000 + let patches_path = snd (Unpac.Worktree.path root (Unpac.Worktree.Opam_patches pkg)) in 1001 + let vendor_path = snd (Unpac.Worktree.path root (Unpac.Worktree.Opam_vendor pkg)) in 1002 + Format.printf "Editing %s@." pkg; 1003 + Format.printf "@."; 1004 + Format.printf "Worktrees created:@."; 1005 + Format.printf " patches: %s (make changes here)@." patches_path; 1006 + Format.printf " vendor: %s (original for reference)@." vendor_path; 1007 + Format.printf "@."; 1008 + Format.printf "Make your changes in the patches worktree, then:@."; 1009 + Format.printf " cd %s@." patches_path; 1010 + Format.printf " git add -A && git commit -m 'your message'@."; 1011 + Format.printf "@."; 1012 + Format.printf "When done: unpac opam done %s@." pkg 1013 + in 1014 + let info = Cmd.info "edit" ~doc in 1015 + Cmd.v info Term.(const run $ logging_term $ pkg_arg) 1016 + 1017 + (* Opam done command *) 1018 + let opam_done_cmd = 1019 + let doc = "Close a package's patches and vendor worktrees after editing." in 1020 + let pkg_arg = 1021 + let doc = "Package name." in 1022 + Arg.(required & pos 0 (some string) None & info [] ~docv:"PACKAGE" ~doc) 1023 + in 1024 + let run () pkg = 1025 + with_root @@ fun ~env:_ ~fs:_ ~proc_mgr ~root -> 1026 + let patches_kind = Unpac.Worktree.Opam_patches pkg in 1027 + let vendor_kind = Unpac.Worktree.Opam_vendor pkg in 1028 + if not (Unpac.Worktree.exists root patches_kind) then begin 1029 + Format.eprintf "No editing session for '%s'@." pkg; 1030 + exit 1 1031 + end; 1032 + (* Check for uncommitted changes in patches worktree *) 1033 + let wt_path = Unpac.Worktree.path root patches_kind in 1034 + let status = Unpac.Git.run_exn ~proc_mgr ~cwd:wt_path ["status"; "--porcelain"] in 1035 + if String.trim status <> "" then begin 1036 + Format.eprintf "Warning: uncommitted changes in %s@." pkg; 1037 + Format.eprintf "Commit or discard them before closing.@."; 1038 + exit 1 1039 + end; 1040 + (* Remove both worktrees *) 1041 + Unpac.Worktree.remove ~proc_mgr root patches_kind; 1042 + if Unpac.Worktree.exists root vendor_kind then 1043 + Unpac.Worktree.remove ~proc_mgr root vendor_kind; 1044 + Format.printf "Closed editing session for %s@." pkg; 1045 + Format.printf "@.Next steps:@."; 1046 + Format.printf " unpac opam diff %s # view your changes@." pkg; 1047 + Format.printf " unpac opam merge %s <project> # merge into a project@." pkg 1048 + in 1049 + let info = Cmd.info "done" ~doc in 1050 + Cmd.v info Term.(const run $ logging_term $ pkg_arg) 1051 + 1052 + (* Opam set-upstream command *) 1053 + let opam_set_upstream_cmd = 1054 + let doc = "Set the upstream URL for a vendored opam package." in 1055 + let man = [ 1056 + `S Manpage.s_description; 1057 + `P "Configures the upstream git URL for a vendored opam package. \ 1058 + This is used by 'unpac opam update' to fetch new changes from upstream."; 1059 + `P "For packages added via 'unpac opam add', the upstream is automatically \ 1060 + configured from the opam source URL. This command is mainly useful for \ 1061 + promoted local projects that don't have an opam source."; 1062 + `S Manpage.s_examples; 1063 + `Pre " unpac opam set-upstream ocaml-zstd git@github.com:user/ocaml-zstd.git"; 1064 + `S "SEE ALSO"; 1065 + `P "unpac-opam-update(1), unpac-export-set-remote(1)"; 1066 + ] in 1067 + let name_arg = 1068 + let doc = "Name of the package." in 1069 + Arg.(required & pos 0 (some string) None & info [] ~docv:"NAME" ~doc) 1070 + in 1071 + let url_arg = 1072 + let doc = "Upstream URL (git SSH or HTTPS URL)." in 1073 + Arg.(required & pos 1 (some string) None & info [] ~docv:"URL" ~doc) 1074 + in 1075 + let run () name url = 1076 + with_root @@ fun ~env:_ ~fs:_ ~proc_mgr ~root -> 1077 + match Unpac.Promote.set_upstream_remote ~proc_mgr ~root ~name ~url with 1078 + | `Created -> 1079 + Format.printf "Set upstream for %s: %s@." name url; 1080 + Format.printf "@.You can now run: unpac opam update %s@." name 1081 + | `Updated -> 1082 + Format.printf "Updated upstream for %s: %s@." name url 1083 + | `Existed -> 1084 + Format.printf "Upstream already set for %s: %s@." name url 1085 + in 1086 + let info = Cmd.info "set-upstream" ~doc ~man in 1087 + Cmd.v info Term.(const run $ logging_term $ name_arg $ url_arg) 1088 + 1089 + (* Opam update command *) 1090 + let opam_update_cmd = 1091 + let doc = "Update a vendored opam package from upstream." in 1092 + let name_arg = 1093 + let doc = "Package name to update." in 1094 + Arg.(required & pos 0 (some string) None & info [] ~docv:"NAME" ~doc) 1095 + in 1096 + let run () name = 1097 + with_root @@ fun ~env:_ ~fs:_ ~proc_mgr ~root -> 1098 + with_audit ~proc_mgr ~root ~operation_type:Unpac.Audit.Opam_update ~args:[name] @@ fun _ctx -> 1099 + match Unpac_opam.Opam.update_package ~proc_mgr ~root name with 1100 + | Unpac.Backend.Updated { name = pkg_name; old_sha; new_sha } -> 1101 + Format.printf "Updated %s: %s -> %s@." pkg_name 1102 + (String.sub old_sha 0 7) (String.sub new_sha 0 7); 1103 + Format.printf "@.Next steps:@."; 1104 + Format.printf " unpac opam diff %s # view changes@." pkg_name; 1105 + Format.printf " unpac opam merge %s <project> # merge into a project@." pkg_name 1106 + | Unpac.Backend.No_changes name -> 1107 + Format.printf "%s is up to date@." name 1108 + | Unpac.Backend.Update_failed { name; error } -> 1109 + Format.eprintf "Error updating %s: %s@." name error; 1110 + exit 1 1111 + in 1112 + let info = Cmd.info "update" ~doc in 1113 + Cmd.v info Term.(const run $ logging_term $ name_arg) 1114 + 1115 + (* Opam merge command *) 1116 + let opam_merge_cmd = 1117 + let doc = "Merge vendored opam packages into a project. \ 1118 + Use --solve to merge a package and its dependencies, \ 1119 + or --all to merge all vendored packages." in 1120 + let args = 1121 + let doc = "PACKAGE PROJECT (or just PROJECT with --all)." in 1122 + Arg.(value & pos_all string [] & info [] ~docv:"ARGS" ~doc) 1123 + in 1124 + let all_flag = 1125 + let doc = "Merge all vendored packages into the project." in 1126 + Arg.(value & flag & info ["all"] ~doc) 1127 + in 1128 + let solve_flag = 1129 + let doc = "Solve dependencies for PACKAGE and merge all solved packages into the project." in 1130 + Arg.(value & flag & info ["solve"] ~doc) 1131 + in 1132 + let run () args all solve = 1133 + with_root @@ fun ~env:_ ~fs:_ ~proc_mgr ~root -> 1134 + let config = load_config root in 1135 + let audit_args = args @ (if all then ["--all"] else []) @ (if solve then ["--solve"] else []) in 1136 + with_audit ~proc_mgr ~root ~operation_type:Unpac.Audit.Opam_merge ~args:audit_args @@ fun _ctx -> 1137 + 1138 + let merge_one ~project pkg = 1139 + let patches_branch = Unpac_opam.Opam.patches_branch pkg in 1140 + match Unpac.Backend.merge_to_project ~proc_mgr ~root ~project ~patches_branch with 1141 + | Ok () -> 1142 + Format.printf "Merged %s@." pkg; 1143 + true 1144 + | Error (`Conflict files) -> 1145 + Format.eprintf "Merge conflict in %s:@." pkg; 1146 + List.iter (Format.eprintf " %s@.") files; 1147 + false 1148 + in 1149 + 1150 + let merge_packages packages project = 1151 + Format.printf "Merging %d packages into project %s...@." (List.length packages) project; 1152 + let (successes, failures) = List.fold_left (fun (s, f) pkg -> 1153 + if merge_one ~project pkg then (s + 1, f) else (s, f + 1) 1154 + ) (0, 0) packages in 1155 + Format.printf "@.Done: %d merged" successes; 1156 + if failures > 0 then Format.printf ", %d had conflicts" failures; 1157 + Format.printf "@."; 1158 + if failures > 0 then begin 1159 + Format.eprintf "Resolve conflicts in project/%s and commit.@." project; 1160 + exit 1 1161 + end else 1162 + Format.printf "Next: Build your project in project/%s@." project 1163 + in 1164 + 1165 + if solve then begin 1166 + (* Solve dependencies and merge all solved packages that are vendored *) 1167 + let pkg, project = match args with 1168 + | [pkg; project] -> pkg, project 1169 + | _ -> 1170 + Format.eprintf "Usage: unpac opam merge --solve PACKAGE PROJECT@."; 1171 + exit 1 1172 + in 1173 + let repos = config.opam.repositories in 1174 + if repos = [] then begin 1175 + Format.eprintf "No repositories configured. Add one with: unpac opam repo add <name> <path>@."; 1176 + exit 1 1177 + end; 1178 + let ocaml_version = match Unpac.Config.get_compiler config with 1179 + | Some v -> v 1180 + | None -> 1181 + Format.eprintf "No compiler version configured.@."; 1182 + Format.eprintf "Set one with: unpac opam config compiler 5.2.0@."; 1183 + exit 1 1184 + in 1185 + let repo_paths = List.map (fun (r : Unpac.Config.repo_config) -> 1186 + match r.source with 1187 + | Unpac.Config.Local p -> p 1188 + | Unpac.Config.Remote u -> u 1189 + ) repos in 1190 + Format.printf "Solving dependencies for %s...@." pkg; 1191 + match Unpac_opam.Solver.solve ~repos:repo_paths ~ocaml_version ~packages:[pkg] with 1192 + | Error msg -> 1193 + Format.eprintf "Dependency solving failed:@.%s@." msg; 1194 + exit 1 1195 + | Ok result -> 1196 + (* Group by dev-repo to get canonical names *) 1197 + let groups = group_packages_by_dev_repo ~config result.packages in 1198 + let canonical_names = List.map (fun (g : package_group) -> g.canonical_name) groups in 1199 + (* Filter to only vendored packages *) 1200 + let vendored = Unpac_opam.Opam.list_packages ~proc_mgr ~root in 1201 + let to_merge = List.filter (fun name -> List.mem name vendored) canonical_names in 1202 + if to_merge = [] then begin 1203 + Format.eprintf "No vendored packages match the solved dependencies.@."; 1204 + Format.eprintf "Run 'unpac opam add %s --solve' first to vendor them.@." pkg; 1205 + exit 1 1206 + end; 1207 + Format.printf "Found %d vendored packages to merge.@.@." (List.length to_merge); 1208 + merge_packages to_merge project 1209 + end else if all then begin 1210 + (* Merge all vendored packages *) 1211 + let project = match args with 1212 + | [project] -> project 1213 + | _ -> 1214 + Format.eprintf "Usage: unpac opam merge --all PROJECT@."; 1215 + exit 1 1216 + in 1217 + let packages = Unpac_opam.Opam.list_packages ~proc_mgr ~root in 1218 + if packages = [] then begin 1219 + Format.eprintf "No vendored packages to merge.@."; 1220 + exit 1 1221 + end; 1222 + merge_packages packages project 1223 + end else begin 1224 + (* Single package mode *) 1225 + let pkg, project = match args with 1226 + | [pkg; project] -> pkg, project 1227 + | _ -> 1228 + Format.eprintf "Usage: unpac opam merge PACKAGE PROJECT@."; 1229 + exit 1 1230 + in 1231 + if merge_one ~project pkg then 1232 + Format.printf "@.Next: Build your project in project/%s@." project 1233 + else begin 1234 + Format.eprintf "Resolve conflicts in project/%s and commit.@." project; 1235 + exit 1 1236 + end 1237 + end 1238 + in 1239 + let info = Cmd.info "merge" ~doc in 1240 + Cmd.v info Term.(const run $ logging_term $ args $ all_flag $ solve_flag) 1241 + 1242 + (* Opam info command *) 1243 + let opam_info_cmd = 1244 + let doc = "Show information about a vendored package." in 1245 + let pkg_arg = 1246 + let doc = "Package name." in 1247 + Arg.(required & pos 0 (some string) None & info [] ~docv:"PACKAGE" ~doc) 1248 + in 1249 + let run () pkg = 1250 + with_root @@ fun ~env:_ ~fs:_ ~proc_mgr ~root -> 1251 + let git = Unpac.Worktree.git_dir root in 1252 + (* Check package exists *) 1253 + let packages = Unpac_opam.Opam.list_packages ~proc_mgr ~root in 1254 + if not (List.mem pkg packages) then begin 1255 + Format.eprintf "Package '%s' is not vendored@." pkg; 1256 + exit 1 1257 + end; 1258 + (* Get remote URL *) 1259 + let remote = "origin-" ^ pkg in 1260 + let url = Unpac.Git.remote_url ~proc_mgr ~cwd:git remote in 1261 + Format.printf "Package: %s@." pkg; 1262 + (match url with 1263 + | Some u -> Format.printf "URL: %s@." u 1264 + | None -> ()); 1265 + (* Get branch SHAs *) 1266 + let upstream = Unpac_opam.Opam.upstream_branch pkg in 1267 + let vendor = Unpac_opam.Opam.vendor_branch pkg in 1268 + let patches = Unpac_opam.Opam.patches_branch pkg in 1269 + (match Unpac.Git.rev_parse ~proc_mgr ~cwd:git upstream with 1270 + | Some sha -> Format.printf "Upstream: %s@." (String.sub sha 0 7) 1271 + | None -> ()); 1272 + (match Unpac.Git.rev_parse ~proc_mgr ~cwd:git vendor with 1273 + | Some sha -> Format.printf "Vendor: %s@." (String.sub sha 0 7) 1274 + | None -> ()); 1275 + (match Unpac.Git.rev_parse ~proc_mgr ~cwd:git patches with 1276 + | Some sha -> Format.printf "Patches: %s@." (String.sub sha 0 7) 1277 + | None -> ()); 1278 + (* Count commits ahead *) 1279 + let log_output = Unpac.Git.run_exn ~proc_mgr ~cwd:git 1280 + ["log"; "--oneline"; vendor ^ ".." ^ patches] in 1281 + let commits = List.length (String.split_on_char '\n' log_output |> 1282 + List.filter (fun s -> String.trim s <> "")) in 1283 + Format.printf "Local commits: %d@." commits; 1284 + Format.printf "@.Commands:@."; 1285 + Format.printf " unpac opam diff %s # view local changes@." pkg; 1286 + Format.printf " unpac opam edit %s # edit package@." pkg; 1287 + Format.printf " unpac opam update %s # fetch upstream@." pkg 1288 + in 1289 + let info = Cmd.info "info" ~doc in 1290 + Cmd.v info Term.(const run $ logging_term $ pkg_arg) 1291 + 1292 + (* Opam diff command *) 1293 + let opam_diff_cmd = 1294 + let doc = "Show diff between vendor and patches branches." in 1295 + let pkg_arg = 1296 + let doc = "Package name." in 1297 + Arg.(required & pos 0 (some string) None & info [] ~docv:"PACKAGE" ~doc) 1298 + in 1299 + let run () pkg = 1300 + with_root @@ fun ~env:_ ~fs:_ ~proc_mgr ~root -> 1301 + let git = Unpac.Worktree.git_dir root in 1302 + (* Check package exists *) 1303 + let packages = Unpac_opam.Opam.list_packages ~proc_mgr ~root in 1304 + if not (List.mem pkg packages) then begin 1305 + Format.eprintf "Package '%s' is not vendored@." pkg; 1306 + exit 1 1307 + end; 1308 + let vendor = Unpac_opam.Opam.vendor_branch pkg in 1309 + let patches = Unpac_opam.Opam.patches_branch pkg in 1310 + let diff = Unpac.Git.run_exn ~proc_mgr ~cwd:git 1311 + ["diff"; vendor; patches] in 1312 + if String.trim diff = "" then begin 1313 + Format.printf "No local changes@."; 1314 + Format.printf "@.Hint: unpac opam edit %s # to make changes@." pkg 1315 + end else begin 1316 + print_string diff; 1317 + Format.printf "@.Next: unpac opam merge %s <project>@." pkg 1318 + end 1319 + in 1320 + let info = Cmd.info "diff" ~doc in 1321 + Cmd.v info Term.(const run $ logging_term $ pkg_arg) 1322 + 1323 + (* Opam remove command *) 1324 + let opam_remove_cmd = 1325 + let doc = "Remove a vendored package." in 1326 + let pkg_arg = 1327 + let doc = "Package name to remove." in 1328 + Arg.(required & pos 0 (some string) None & info [] ~docv:"PACKAGE" ~doc) 1329 + in 1330 + let run () pkg = 1331 + with_root @@ fun ~env:_ ~fs:_ ~proc_mgr ~root -> 1332 + let git = Unpac.Worktree.git_dir root in 1333 + (* Check package exists *) 1334 + let packages = Unpac_opam.Opam.list_packages ~proc_mgr ~root in 1335 + if not (List.mem pkg packages) then begin 1336 + Format.eprintf "Package '%s' is not vendored@." pkg; 1337 + exit 1 1338 + end; 1339 + (* Remove worktrees if exist *) 1340 + (try Unpac.Worktree.remove_force ~proc_mgr root (Unpac.Worktree.Opam_upstream pkg) with _ -> ()); 1341 + (try Unpac.Worktree.remove_force ~proc_mgr root (Unpac.Worktree.Opam_vendor pkg) with _ -> ()); 1342 + (try Unpac.Worktree.remove_force ~proc_mgr root (Unpac.Worktree.Opam_patches pkg) with _ -> ()); 1343 + (* Delete branches *) 1344 + let upstream = Unpac_opam.Opam.upstream_branch pkg in 1345 + let vendor = Unpac_opam.Opam.vendor_branch pkg in 1346 + let patches = Unpac_opam.Opam.patches_branch pkg in 1347 + (try Unpac.Git.run_exn ~proc_mgr ~cwd:git ["branch"; "-D"; upstream] |> ignore with _ -> ()); 1348 + (try Unpac.Git.run_exn ~proc_mgr ~cwd:git ["branch"; "-D"; vendor] |> ignore with _ -> ()); 1349 + (try Unpac.Git.run_exn ~proc_mgr ~cwd:git ["branch"; "-D"; patches] |> ignore with _ -> ()); 1350 + (* Remove remote *) 1351 + let remote = "origin-" ^ pkg in 1352 + (try Unpac.Git.run_exn ~proc_mgr ~cwd:git ["remote"; "remove"; remote] |> ignore with _ -> ()); 1353 + Format.printf "Removed %s@." pkg; 1354 + Format.printf "@.Hint: unpac opam add <package> # to add another package@." 1355 + in 1356 + let info = Cmd.info "remove" ~doc in 1357 + Cmd.v info Term.(const run $ logging_term $ pkg_arg) 1358 + 1359 + (* Opam init command - create a new local opam package *) 1360 + let opam_init_cmd = 1361 + let doc = "Create a new local opam package (no upstream repository)." in 1362 + let man = [ 1363 + `S Manpage.s_description; 1364 + `P "Creates a new opam package that originates locally rather than from \ 1365 + an external repository. This is useful for:"; 1366 + `I ("New libraries", "Starting a new OCaml library from scratch"); 1367 + `I ("Internal packages", "Creating packages that will never be published"); 1368 + `I ("Agent-created packages", "AI agents can create new dependencies on-demand"); 1369 + `P "The package is created with a minimal scaffold including dune-project \ 1370 + and a .opam file. It uses the standard three-tier branch model but \ 1371 + with no upstream branch (url='local' in config)."; 1372 + `S "PACKAGE STRUCTURE"; 1373 + `P "The created package will have:"; 1374 + `Pre " vendor/opam/<name>/ 1375 + dune-project # Dune project file 1376 + <name>.opam # Opam package file 1377 + lib/ 1378 + dune # Library build rules 1379 + <name>.ml # Main module (empty) 1380 + <name>.mli # Interface file (empty)"; 1381 + `S Manpage.s_examples; 1382 + `P "Create a new local library:"; 1383 + `Pre " unpac opam init mylib 1384 + unpac opam merge mylib myproject"; 1385 + `P "Create with description:"; 1386 + `Pre " unpac opam init mylib --synopsis 'My utility library'"; 1387 + `S "LIFECYCLE"; 1388 + `P "Local packages can later be published by:"; 1389 + `Pre " 1. Push the opam/patches/<name> branch to a git repository 1390 + 2. Update config with: unpac opam set-upstream <name> <url> 1391 + 3. Submit to opam-repository if desired"; 1392 + `S "SEE ALSO"; 1393 + `P "unpac-opam-promote(1) for graduating projects to dependencies."; 1394 + ] in 1395 + let name_arg = 1396 + let doc = "Name for the new package. Should be a valid opam package name \ 1397 + (lowercase, alphanumeric, hyphens allowed)." in 1398 + Arg.(required & pos 0 (some string) None & info [] ~docv:"NAME" ~doc) 1399 + in 1400 + let synopsis_arg = 1401 + let doc = "One-line synopsis for the package." in 1402 + Arg.(value & opt string "A local opam package" & info ["synopsis"; "s"] ~docv:"TEXT" ~doc) 1403 + in 1404 + let run () name synopsis = 1405 + with_root @@ fun ~env:_ ~fs:_ ~proc_mgr ~root -> 1406 + with_audit ~proc_mgr ~root ~operation_type:Unpac.Audit.Opam_init ~args:[name] @@ fun _ctx -> 1407 + let git = Unpac.Worktree.git_dir root in 1408 + let config = load_config root in 1409 + 1410 + (* Validate package name *) 1411 + if String.length name = 0 then begin 1412 + Format.eprintf "Error: Package name cannot be empty@."; 1413 + exit 1 1414 + end; 1415 + let valid_char c = (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c = '-' || c = '_' in 1416 + if not (String.for_all valid_char name) then begin 1417 + Format.eprintf "Error: Package name must be lowercase alphanumeric (hyphens/underscores allowed)@."; 1418 + exit 1 1419 + end; 1420 + 1421 + (* Check if already exists *) 1422 + let packages = Unpac_opam.Opam.list_packages ~proc_mgr ~root in 1423 + if List.mem name packages then begin 1424 + Format.eprintf "Package '%s' already exists@." name; 1425 + exit 1 1426 + end; 1427 + 1428 + (* Create an orphan branch for vendor *) 1429 + let vendor_branch = Unpac_opam.Opam.vendor_branch name in 1430 + let patches_branch = Unpac_opam.Opam.patches_branch name in 1431 + let vendor_path = "vendor/opam/" ^ name in 1432 + 1433 + (* Create orphan branch with initial content *) 1434 + Unpac.Git.checkout_orphan ~proc_mgr ~cwd:git vendor_branch; 1435 + 1436 + (* Remove any existing index content *) 1437 + Unpac.Git.rm_cached_rf ~proc_mgr ~cwd:git; 1438 + 1439 + (* Create scaffold files in a temporary worktree *) 1440 + let wt_path = Unpac.Worktree.path root (Unpac.Worktree.Opam_vendor name) in 1441 + Unpac.Worktree.ensure ~proc_mgr root (Unpac.Worktree.Opam_vendor name); 1442 + 1443 + (* Create directory structure *) 1444 + let pkg_dir = Eio.Path.(wt_path / vendor_path) in 1445 + let lib_dir = Eio.Path.(pkg_dir / "lib") in 1446 + Eio.Path.mkdirs ~exists_ok:true ~perm:0o755 lib_dir; 1447 + 1448 + (* Create dune-project *) 1449 + let dune_project = Printf.sprintf {|(lang dune 3.0) 1450 + (name %s) 1451 + (generate_opam_files true) 1452 + (source (uri "local")) 1453 + (authors "Local") 1454 + (maintainers "Local") 1455 + (package 1456 + (name %s) 1457 + (synopsis "%s") 1458 + (depends 1459 + (ocaml (>= 4.14)))) 1460 + |} name name synopsis in 1461 + Eio.Path.save ~create:(`Or_truncate 0o644) 1462 + Eio.Path.(pkg_dir / "dune-project") dune_project; 1463 + 1464 + (* Create lib/dune *) 1465 + let lib_dune = Printf.sprintf {|(library 1466 + (name %s) 1467 + (public_name %s)) 1468 + |} (String.map (fun c -> if c = '-' then '_' else c) name) name in 1469 + Eio.Path.save ~create:(`Or_truncate 0o644) 1470 + Eio.Path.(lib_dir / "dune") lib_dune; 1471 + 1472 + (* Create lib/<name>.ml *) 1473 + let ml_file = Printf.sprintf {|(* %s - A local opam package *) 1474 + 1475 + (** This module was created by [unpac opam init]. 1476 + Add your implementation here. *) 1477 + |} name in 1478 + let ml_name = String.map (fun c -> if c = '-' then '_' else c) name in 1479 + Eio.Path.save ~create:(`Or_truncate 0o644) 1480 + Eio.Path.(lib_dir / (ml_name ^ ".ml")) ml_file; 1481 + 1482 + (* Create lib/<name>.mli *) 1483 + let mli_file = Printf.sprintf {|(* %s - A local opam package *) 1484 + 1485 + (** This module was created by [unpac opam init]. 1486 + Define your interface here. *) 1487 + |} name in 1488 + Eio.Path.save ~create:(`Or_truncate 0o644) 1489 + Eio.Path.(lib_dir / (ml_name ^ ".mli")) mli_file; 1490 + 1491 + (* Commit the scaffold *) 1492 + Unpac.Git.add_all ~proc_mgr ~cwd:wt_path; 1493 + Unpac.Git.commit ~proc_mgr ~cwd:wt_path 1494 + ~message:(Printf.sprintf "Initialize local package %s" name); 1495 + 1496 + (* Get the commit SHA *) 1497 + let sha = Unpac.Git.current_head ~proc_mgr ~cwd:wt_path in 1498 + 1499 + (* Create patches branch from vendor *) 1500 + Unpac.Git.branch_create ~proc_mgr ~cwd:git 1501 + ~name:patches_branch ~start_point:vendor_branch; 1502 + 1503 + (* Cleanup worktree *) 1504 + Unpac.Worktree.remove ~proc_mgr root (Unpac.Worktree.Opam_vendor name); 1505 + 1506 + (* Switch back to main *) 1507 + Unpac.Git.checkout ~proc_mgr ~cwd:git "main"; 1508 + 1509 + (* Record in config with url = "local" *) 1510 + let vendored : Unpac.Config.vendored_package = { 1511 + pkg_name = name; pkg_url = "local"; pkg_branch = None 1512 + } in 1513 + let config = Unpac.Config.add_vendored_package config vendored in 1514 + save_config ~proc_mgr root config (Printf.sprintf "Add local package %s" name); 1515 + 1516 + Format.printf "Created local package %s (%s)@." name (String.sub sha 0 7); 1517 + Format.printf "@.Package structure:@."; 1518 + Format.printf " %s/@." vendor_path; 1519 + Format.printf " dune-project@."; 1520 + Format.printf " lib/dune@."; 1521 + Format.printf " lib/%s.ml@." ml_name; 1522 + Format.printf " lib/%s.mli@." ml_name; 1523 + Format.printf "@.Next steps:@."; 1524 + Format.printf " unpac opam edit %s # add code to the package@." name; 1525 + Format.printf " unpac opam merge %s <project> # use in a project@." name 1526 + in 1527 + let info = Cmd.info "init" ~doc ~man in 1528 + Cmd.v info Term.(const run $ logging_term $ name_arg $ synopsis_arg) 1529 + 1530 + (* Opam promote command - graduate a project to a vendored dependency *) 1531 + let opam_promote_cmd = 1532 + let doc = "Promote a project to a vendored opam dependency." in 1533 + let man = [ 1534 + `S Manpage.s_description; 1535 + `P "Graduates a project branch to become a vendored opam dependency that \ 1536 + other projects can use. This is the lifecycle path for code that:"; 1537 + `I ("Started as a project", "Code developed in project/<name> that should \ 1538 + become a shared library"); 1539 + `I ("Needs reuse", "A project that other projects want to depend on"); 1540 + `I ("Agent refactoring", "AI agents can extract common code into libraries"); 1541 + `P "The project's content is copied to create opam/vendor/<name> and \ 1542 + opam/patches/<name> branches. The original project remains unchanged \ 1543 + and can be deleted if no longer needed."; 1544 + `S "REQUIREMENTS"; 1545 + `P "The project directory should contain a valid dune-project file with \ 1546 + the package definition. If not present, a basic one will be created."; 1547 + `S Manpage.s_examples; 1548 + `P "Promote a project to a dependency:"; 1549 + `Pre " unpac opam promote my-utils 1550 + unpac opam merge my-utils other-project"; 1551 + `P "Promote with a different name:"; 1552 + `Pre " unpac opam promote my-app --as my-lib"; 1553 + `S "LIFECYCLE"; 1554 + `P "After promotion:"; 1555 + `Pre " 1. The new package appears in 'unpac opam list' 1556 + 2. Other projects can merge it with 'unpac opam merge' 1557 + 3. Edit with 'unpac opam edit' (changes go to patches branch) 1558 + 4. Original project can be deleted if desired"; 1559 + `S "SEE ALSO"; 1560 + `P "unpac-opam-init(1) for creating new packages from scratch."; 1561 + ] in 1562 + let project_arg = 1563 + let doc = "Name of the project to promote." in 1564 + Arg.(required & pos 0 (some string) None & info [] ~docv:"PROJECT" ~doc) 1565 + in 1566 + let as_arg = 1567 + let doc = "Name for the opam package (defaults to project name)." in 1568 + Arg.(value & opt (some string) None & info ["as"] ~docv:"NAME" ~doc) 1569 + in 1570 + let run () project pkg_name_opt = 1571 + with_root @@ fun ~env:_ ~fs:_ ~proc_mgr ~root -> 1572 + let pkg_name = match pkg_name_opt with Some n -> n | None -> project in 1573 + with_audit ~proc_mgr ~root ~operation_type:Unpac.Audit.Opam_promote ~args:[project; pkg_name] @@ fun _ctx -> 1574 + let git = Unpac.Worktree.git_dir root in 1575 + let config = load_config root in 1576 + 1577 + (* Check project exists *) 1578 + let projects = Unpac.Worktree.list_projects ~proc_mgr root in 1579 + if not (List.mem project projects) then begin 1580 + Format.eprintf "Project '%s' does not exist@." project; 1581 + Format.eprintf "@.Available projects:@."; 1582 + List.iter (Format.eprintf " %s@.") projects; 1583 + exit 1 1584 + end; 1585 + 1586 + (* Check package doesn't already exist *) 1587 + let packages = Unpac_opam.Opam.list_packages ~proc_mgr ~root in 1588 + if List.mem pkg_name packages then begin 1589 + Format.eprintf "Package '%s' already exists@." pkg_name; 1590 + exit 1 1591 + end; 1592 + 1593 + let vendor_branch = Unpac_opam.Opam.vendor_branch pkg_name in 1594 + let patches_branch = Unpac_opam.Opam.patches_branch pkg_name in 1595 + let vendor_path = "vendor/opam/" ^ pkg_name in 1596 + 1597 + (* Create orphan branch for vendor *) 1598 + Unpac.Git.checkout_orphan ~proc_mgr ~cwd:git vendor_branch; 1599 + Unpac.Git.rm_cached_rf ~proc_mgr ~cwd:git; 1600 + 1601 + (* Create vendor worktree *) 1602 + Unpac.Worktree.ensure ~proc_mgr root (Unpac.Worktree.Opam_vendor pkg_name); 1603 + let vendor_wt = Unpac.Worktree.path root (Unpac.Worktree.Opam_vendor pkg_name) in 1604 + 1605 + (* Get project worktree or create temporary one *) 1606 + let project_wt = Unpac.Worktree.path root (Unpac.Worktree.Project project) in 1607 + let created_project_wt = not (Sys.file_exists (snd project_wt)) in 1608 + if created_project_wt then 1609 + Unpac.Worktree.ensure ~proc_mgr root (Unpac.Worktree.Project project); 1610 + 1611 + (* Create target directory *) 1612 + let pkg_dir = Eio.Path.(vendor_wt / vendor_path) in 1613 + Eio.Path.mkdirs ~exists_ok:true ~perm:0o755 pkg_dir; 1614 + 1615 + (* Copy project content to vendor path *) 1616 + let rec copy_dir src dst = 1617 + Eio.Path.read_dir src |> List.iter (fun name -> 1618 + if name <> ".git" then begin 1619 + let src_path = Eio.Path.(src / name) in 1620 + let dst_path = Eio.Path.(dst / name) in 1621 + if Eio.Path.is_directory src_path then begin 1622 + Eio.Path.mkdirs ~exists_ok:true ~perm:0o755 dst_path; 1623 + copy_dir src_path dst_path 1624 + end else begin 1625 + let content = Eio.Path.load src_path in 1626 + Eio.Path.save ~create:(`Or_truncate 0o644) dst_path content 1627 + end 1628 + end 1629 + ) 1630 + in 1631 + copy_dir project_wt pkg_dir; 1632 + 1633 + (* Commit *) 1634 + Unpac.Git.add_all ~proc_mgr ~cwd:vendor_wt; 1635 + Unpac.Git.commit ~proc_mgr ~cwd:vendor_wt 1636 + ~message:(Printf.sprintf "Promote project %s to package %s" project pkg_name); 1637 + 1638 + (* Get SHA *) 1639 + let sha = Unpac.Git.current_head ~proc_mgr ~cwd:vendor_wt in 1640 + 1641 + (* Create patches branch from vendor *) 1642 + Unpac.Git.branch_create ~proc_mgr ~cwd:git 1643 + ~name:patches_branch ~start_point:vendor_branch; 1644 + 1645 + (* Cleanup *) 1646 + Unpac.Worktree.remove ~proc_mgr root (Unpac.Worktree.Opam_vendor pkg_name); 1647 + if created_project_wt then 1648 + Unpac.Worktree.remove ~proc_mgr root (Unpac.Worktree.Project project); 1649 + 1650 + (* Switch back to main *) 1651 + Unpac.Git.checkout ~proc_mgr ~cwd:git "main"; 1652 + 1653 + (* Record in config *) 1654 + let vendored : Unpac.Config.vendored_package = { 1655 + pkg_name; pkg_url = "local"; pkg_branch = None 1656 + } in 1657 + let config = Unpac.Config.add_vendored_package config vendored in 1658 + save_config ~proc_mgr root config (Printf.sprintf "Promote project %s to package %s" project pkg_name); 1659 + 1660 + Format.printf "Promoted project %s to package %s (%s)@." project pkg_name (String.sub sha 0 7); 1661 + Format.printf "@.The package is now available as a vendored dependency.@."; 1662 + Format.printf "@.Next steps:@."; 1663 + Format.printf " unpac opam merge %s <other-project> # use in another project@." pkg_name; 1664 + Format.printf " unpac opam edit %s # make changes@." pkg_name; 1665 + if project <> pkg_name then 1666 + Format.printf " unpac project remove %s # remove original project (optional)@." project 1667 + in 1668 + let info = Cmd.info "promote" ~doc ~man in 1669 + Cmd.v info Term.(const run $ logging_term $ project_arg $ as_arg) 1670 + 1671 + (* Opam command group *) 1672 + let opam_cmd = 1673 + let doc = "Opam package vendoring commands." in 1674 + let man = [ 1675 + `S Manpage.s_description; 1676 + `P "Vendor OCaml packages from opam repositories or create new local packages. \ 1677 + Uses a three-tier branch model for conflict-free vendoring:"; 1678 + `I ("opam/upstream/<pkg>", "Tracks the original repository state (empty for local packages)"); 1679 + `I ("opam/vendor/<pkg>", "Clean snapshot used as merge base"); 1680 + `I ("opam/patches/<pkg>", "Local modifications on top of vendor"); 1681 + `S "PACKAGE SOURCES"; 1682 + `P "Packages can come from three sources:"; 1683 + `I ("External (unpac opam add)", "Vendor from opam repository or git URL. \ 1684 + Has upstream tracking for updates."); 1685 + `I ("Local (unpac opam init)", "Create a new package from scratch. \ 1686 + No upstream, recorded as url='local' in config."); 1687 + `I ("Promoted (unpac opam promote)", "Graduate a project to a dependency. \ 1688 + Allows code reuse between projects."); 1689 + `S "TYPICAL WORKFLOW - External Packages"; 1690 + `P "1. Configure an opam repository:"; 1691 + `Pre " unpac opam repo add default /path/to/opam-repository"; 1692 + `P "2. Set the OCaml compiler version for dependency solving:"; 1693 + `Pre " unpac opam config compiler 5.2.0"; 1694 + `P "3. Vendor a package with dependency solving:"; 1695 + `Pre " unpac opam add mypackage --solve"; 1696 + `P "4. Create a project and merge dependencies:"; 1697 + `Pre " unpac project new myapp 1698 + unpac opam merge mypackage myapp --solve"; 1699 + `P "5. Build in the project directory:"; 1700 + `Pre " cd project/myapp && dune build"; 1701 + `S "TYPICAL WORKFLOW - Local Packages"; 1702 + `P "1. Create a new local package:"; 1703 + `Pre " unpac opam init mylib --synopsis 'My utility library'"; 1704 + `P "2. Add code to the package:"; 1705 + `Pre " unpac opam edit mylib 1706 + # edit files in vendor/opam/mylib-patches/ 1707 + git add -A && git commit -m 'implement mylib' 1708 + unpac opam done mylib"; 1709 + `P "3. Use in a project:"; 1710 + `Pre " unpac opam merge mylib myproject"; 1711 + `S "TYPICAL WORKFLOW - Promoting Projects"; 1712 + `P "When a project should become a shared library:"; 1713 + `Pre " unpac opam promote myproject --as mylib 1714 + unpac opam merge mylib other-project"; 1715 + `S "MAKING LOCAL CHANGES"; 1716 + `P "1. Open package for editing (creates worktrees):"; 1717 + `Pre " unpac opam edit mypackage"; 1718 + `P "2. Make changes in the patches worktree:"; 1719 + `Pre " cd vendor/opam/mypackage-patches 1720 + # edit files... 1721 + git add -A && git commit -m 'my changes'"; 1722 + `P "3. Close the editing session:"; 1723 + `Pre " unpac opam done mypackage"; 1724 + `P "4. View your changes:"; 1725 + `Pre " unpac opam diff mypackage"; 1726 + `S "UPDATING FROM UPSTREAM"; 1727 + `P "For packages with external upstreams (added via 'opam add'):"; 1728 + `Pre " unpac opam update mypackage 1729 + unpac opam merge mypackage myapp"; 1730 + `P "For promoted local packages, first configure the upstream URL:"; 1731 + `Pre " unpac opam set-upstream mylib git@github.com:me/mylib.git 1732 + unpac opam update mylib"; 1733 + `S "FOR AI AGENTS"; 1734 + `P "When an agent needs to create a new dependency:"; 1735 + `Pre " # Option 1: Create from scratch 1736 + unpac opam init new-lib --synopsis 'Agent-created library' 1737 + unpac opam edit new-lib 1738 + # ... add implementation ... 1739 + unpac opam done new-lib 1740 + unpac opam merge new-lib target-project"; 1741 + `Pre " # Option 2: Extract from existing project 1742 + unpac opam promote existing-project --as new-lib 1743 + unpac opam merge new-lib other-project"; 1744 + `P "Local packages have url='local' in unpac.toml and can be identified with:"; 1745 + `Pre " unpac opam info <package> # shows URL: local"; 1746 + `S "COMMANDS"; 1747 + ] in 1748 + let info = Cmd.info "opam" ~doc ~man in 1749 + Cmd.group info [ 1750 + opam_repo_cmd; 1751 + opam_config_cmd; 1752 + opam_add_cmd; 1753 + opam_init_cmd; 1754 + opam_promote_cmd; 1755 + opam_list_cmd; 1756 + opam_edit_cmd; 1757 + opam_done_cmd; 1758 + opam_set_upstream_cmd; 1759 + opam_update_cmd; 1760 + opam_merge_cmd; 1761 + opam_info_cmd; 1762 + opam_diff_cmd; 1763 + opam_remove_cmd; 1764 + ] 1765 + 1766 + (* Git add command *) 1767 + let git_add_cmd = 1768 + let doc = "Vendor a git repository." in 1769 + let url_arg = 1770 + let doc = "Git URL to clone from." in 1771 + Arg.(required & pos 0 (some string) None & info [] ~docv:"URL" ~doc) 1772 + in 1773 + let name_arg = 1774 + let doc = "Override repository name (default: derived from URL)." in 1775 + Arg.(value & opt (some string) None & info ["n"; "name"] ~docv:"NAME" ~doc) 1776 + in 1777 + let branch_arg = 1778 + let doc = "Git branch or tag to vendor (default: remote default)." in 1779 + Arg.(value & opt (some string) None & info ["b"; "branch"] ~docv:"REF" ~doc) 1780 + in 1781 + let subdir_arg = 1782 + let doc = "Extract only this subdirectory from the repository." in 1783 + Arg.(value & opt (some string) None & info ["subdir"] ~docv:"PATH" ~doc) 1784 + in 1785 + let cache_arg = 1786 + let doc = "Path to vendor cache." in 1787 + Arg.(value & opt (some string) None & info ["cache"] ~docv:"PATH" ~doc) 1788 + in 1789 + let run () url name_opt branch_opt subdir_opt cli_cache = 1790 + with_root @@ fun ~env:_ ~fs ~proc_mgr ~root -> 1791 + let config = load_config root in 1792 + let cache = resolve_cache ~proc_mgr ~fs ~config ~cli_cache in 1793 + let audit_args = [url] @ (match name_opt with Some n -> ["--name"; n] | None -> []) in 1794 + with_audit ~proc_mgr ~root ~operation_type:Unpac.Audit.Git_add ~args:audit_args @@ fun _ctx -> 1795 + 1796 + let name = match name_opt with 1797 + | Some n -> n 1798 + | None -> 1799 + let base = Filename.basename url in 1800 + if String.ends_with ~suffix:".git" base then 1801 + String.sub base 0 (String.length base - 4) 1802 + else base 1803 + in 1804 + 1805 + let info : Unpac.Git_backend.repo_info = { 1806 + name; url; branch = branch_opt; subdir = subdir_opt; 1807 + } in 1808 + 1809 + match Unpac.Git_backend.add_repo ~proc_mgr ~root ?cache info with 1810 + | Unpac.Backend.Added { name = repo_name; sha } -> 1811 + Format.printf "Added %s (%s)@." repo_name (String.sub sha 0 7); 1812 + let repo_config : Unpac.Config.git_repo_config = { 1813 + git_name = name; git_url = url; 1814 + git_branch = branch_opt; git_subdir = subdir_opt; 1815 + } in 1816 + let config' = Unpac.Config.add_git_repo config repo_config in 1817 + save_config ~proc_mgr root config' (Printf.sprintf "Add git repo %s" name); 1818 + Format.printf "@.Next steps:@."; 1819 + Format.printf " unpac git edit %s # make local changes@." repo_name; 1820 + Format.printf " unpac git merge %s <project> # merge into a project@." repo_name 1821 + | Unpac.Backend.Already_exists name -> 1822 + Format.printf "Repository %s already vendored@." name 1823 + | Unpac.Backend.Failed { name; error } -> 1824 + Format.eprintf "Error adding %s: %s@." name error; 1825 + exit 1 1826 + in 1827 + let info = Cmd.info "add" ~doc in 1828 + Cmd.v info Term.(const run $ logging_term $ url_arg $ name_arg $ branch_arg $ subdir_arg $ cache_arg) 1829 + 1830 + (* Git list command *) 1831 + let git_list_cmd = 1832 + let doc = "List vendored git repositories." in 1833 + let run () = 1834 + with_root @@ fun ~env:_ ~fs:_ ~proc_mgr ~root -> 1835 + let repos = Unpac.Git_backend.list_repos ~proc_mgr ~root in 1836 + if repos = [] then begin 1837 + Format.printf "No git repositories vendored@."; 1838 + Format.printf "@.Hint: unpac git add <url>@." 1839 + end else 1840 + List.iter (Format.printf "%s@.") repos 1841 + in 1842 + let info = Cmd.info "list" ~doc in 1843 + Cmd.v info Term.(const run $ logging_term) 1844 + 1845 + (* Git update command *) 1846 + let git_update_cmd = 1847 + let doc = "Update a vendored git repository from upstream." in 1848 + let name_arg = 1849 + let doc = "Repository name to update." in 1850 + Arg.(required & pos 0 (some string) None & info [] ~docv:"NAME" ~doc) 1851 + in 1852 + let run () name = 1853 + with_root @@ fun ~env:_ ~fs:_ ~proc_mgr ~root -> 1854 + with_audit ~proc_mgr ~root ~operation_type:Unpac.Audit.Git_update ~args:[name] @@ fun _ctx -> 1855 + match Unpac.Git_backend.update_repo ~proc_mgr ~root name with 1856 + | Unpac.Backend.Updated { name = repo_name; old_sha; new_sha } -> 1857 + Format.printf "Updated %s: %s -> %s@." repo_name 1858 + (String.sub old_sha 0 7) (String.sub new_sha 0 7) 1859 + | Unpac.Backend.No_changes name -> 1860 + Format.printf "%s is up to date@." name 1861 + | Unpac.Backend.Update_failed { name; error } -> 1862 + Format.eprintf "Error updating %s: %s@." name error; 1863 + exit 1 1864 + in 1865 + let info = Cmd.info "update" ~doc in 1866 + Cmd.v info Term.(const run $ logging_term $ name_arg) 1867 + 1868 + (* Git merge command *) 1869 + let git_merge_cmd = 1870 + let doc = "Merge a vendored git repository into a project." in 1871 + let name_arg = 1872 + let doc = "Repository name to merge." in 1873 + Arg.(required & pos 0 (some string) None & info [] ~docv:"NAME" ~doc) 1874 + in 1875 + let project_arg = 1876 + let doc = "Project to merge into." in 1877 + Arg.(required & pos 1 (some string) None & info [] ~docv:"PROJECT" ~doc) 1878 + in 1879 + let run () name project = 1880 + with_root @@ fun ~env:_ ~fs:_ ~proc_mgr ~root -> 1881 + with_audit ~proc_mgr ~root ~operation_type:Unpac.Audit.Git_merge ~args:[name; project] @@ fun _ctx -> 1882 + let patches_branch = Unpac.Git_backend.patches_branch name in 1883 + match Unpac.Backend.merge_to_project ~proc_mgr ~root ~project ~patches_branch with 1884 + | Ok () -> 1885 + Format.printf "Merged %s into %s@." name project; 1886 + Format.printf "@.Next: Build your project in project/%s@." project 1887 + | Error (`Conflict files) -> 1888 + Format.eprintf "Merge conflict in %s:@." name; 1889 + List.iter (Format.eprintf " %s@.") files; 1890 + Format.eprintf "Resolve conflicts in project/%s and commit.@." project; 1891 + exit 1 1892 + in 1893 + let info = Cmd.info "merge" ~doc in 1894 + Cmd.v info Term.(const run $ logging_term $ name_arg $ project_arg) 1895 + 1896 + (* Git info command *) 1897 + let git_info_cmd = 1898 + let doc = "Show information about a vendored git repository." in 1899 + let name_arg = 1900 + let doc = "Repository name." in 1901 + Arg.(required & pos 0 (some string) None & info [] ~docv:"NAME" ~doc) 1902 + in 1903 + let run () name = 1904 + with_root @@ fun ~env:_ ~fs:_ ~proc_mgr ~root -> 1905 + let git = Unpac.Worktree.git_dir root in 1906 + let repos = Unpac.Git_backend.list_repos ~proc_mgr ~root in 1907 + if not (List.mem name repos) then begin 1908 + Format.eprintf "Repository '%s' is not vendored@." name; 1909 + exit 1 1910 + end; 1911 + let remote = "origin-" ^ name in 1912 + let url = Unpac.Git.remote_url ~proc_mgr ~cwd:git remote in 1913 + Format.printf "Repository: %s@." name; 1914 + (match url with Some u -> Format.printf "URL: %s@." u | None -> ()); 1915 + let upstream = Unpac.Git_backend.upstream_branch name in 1916 + let vendor = Unpac.Git_backend.vendor_branch name in 1917 + let patches = Unpac.Git_backend.patches_branch name in 1918 + (match Unpac.Git.rev_parse ~proc_mgr ~cwd:git upstream with 1919 + | Some sha -> Format.printf "Upstream: %s@." (String.sub sha 0 7) | None -> ()); 1920 + (match Unpac.Git.rev_parse ~proc_mgr ~cwd:git vendor with 1921 + | Some sha -> Format.printf "Vendor: %s@." (String.sub sha 0 7) | None -> ()); 1922 + (match Unpac.Git.rev_parse ~proc_mgr ~cwd:git patches with 1923 + | Some sha -> Format.printf "Patches: %s@." (String.sub sha 0 7) | None -> ()); 1924 + let log_output = Unpac.Git.run_exn ~proc_mgr ~cwd:git 1925 + ["log"; "--oneline"; vendor ^ ".." ^ patches] in 1926 + let commits = List.length (String.split_on_char '\n' log_output |> 1927 + List.filter (fun s -> String.trim s <> "")) in 1928 + Format.printf "Local commits: %d@." commits 1929 + in 1930 + let info = Cmd.info "info" ~doc in 1931 + Cmd.v info Term.(const run $ logging_term $ name_arg) 1932 + 1933 + (* Git diff command *) 1934 + let git_diff_cmd = 1935 + let doc = "Show diff between vendor and patches branches." in 1936 + let name_arg = 1937 + let doc = "Repository name." in 1938 + Arg.(required & pos 0 (some string) None & info [] ~docv:"NAME" ~doc) 1939 + in 1940 + let run () name = 1941 + with_root @@ fun ~env:_ ~fs:_ ~proc_mgr ~root -> 1942 + let git = Unpac.Worktree.git_dir root in 1943 + let repos = Unpac.Git_backend.list_repos ~proc_mgr ~root in 1944 + if not (List.mem name repos) then begin 1945 + Format.eprintf "Repository '%s' is not vendored@." name; 1946 + exit 1 1947 + end; 1948 + let vendor = Unpac.Git_backend.vendor_branch name in 1949 + let patches = Unpac.Git_backend.patches_branch name in 1950 + let diff = Unpac.Git.run_exn ~proc_mgr ~cwd:git ["diff"; vendor; patches] in 1951 + if String.trim diff = "" then 1952 + Format.printf "No local changes@." 1953 + else 1954 + print_string diff 1955 + in 1956 + let info = Cmd.info "diff" ~doc in 1957 + Cmd.v info Term.(const run $ logging_term $ name_arg) 1958 + 1959 + (* Git edit command *) 1960 + let git_edit_cmd = 1961 + let doc = "Open a repository's patches worktree for editing." in 1962 + let name_arg = 1963 + let doc = "Repository name to edit." in 1964 + Arg.(required & pos 0 (some string) None & info [] ~docv:"NAME" ~doc) 1965 + in 1966 + let run () name = 1967 + with_root @@ fun ~env:_ ~fs:_ ~proc_mgr ~root -> 1968 + let repos = Unpac.Git_backend.list_repos ~proc_mgr ~root in 1969 + if not (List.mem name repos) then begin 1970 + Format.eprintf "Repository '%s' is not vendored@." name; 1971 + exit 1 1972 + end; 1973 + Unpac.Worktree.ensure ~proc_mgr root (Unpac.Worktree.Git_patches name); 1974 + Unpac.Worktree.ensure ~proc_mgr root (Unpac.Worktree.Git_vendor name); 1975 + let patches_path = snd (Unpac.Worktree.path root (Unpac.Worktree.Git_patches name)) in 1976 + let vendor_path = snd (Unpac.Worktree.path root (Unpac.Worktree.Git_vendor name)) in 1977 + Format.printf "Editing %s@.@." name; 1978 + Format.printf "Worktrees created:@."; 1979 + Format.printf " patches: %s (make changes here)@." patches_path; 1980 + Format.printf " vendor: %s (original for reference)@." vendor_path; 1981 + Format.printf "@.When done: unpac git done %s@." name 1982 + in 1983 + let info = Cmd.info "edit" ~doc in 1984 + Cmd.v info Term.(const run $ logging_term $ name_arg) 1985 + 1986 + (* Git done command *) 1987 + let git_done_cmd = 1988 + let doc = "Close a repository's patches and vendor worktrees." in 1989 + let name_arg = 1990 + let doc = "Repository name." in 1991 + Arg.(required & pos 0 (some string) None & info [] ~docv:"NAME" ~doc) 1992 + in 1993 + let run () name = 1994 + with_root @@ fun ~env:_ ~fs:_ ~proc_mgr ~root -> 1995 + let patches_kind = Unpac.Worktree.Git_patches name in 1996 + let vendor_kind = Unpac.Worktree.Git_vendor name in 1997 + if not (Unpac.Worktree.exists root patches_kind) then begin 1998 + Format.eprintf "No editing session for '%s'@." name; 1999 + exit 1 2000 + end; 2001 + let wt_path = Unpac.Worktree.path root patches_kind in 2002 + let status = Unpac.Git.run_exn ~proc_mgr ~cwd:wt_path ["status"; "--porcelain"] in 2003 + if String.trim status <> "" then begin 2004 + Format.eprintf "Warning: uncommitted changes in %s@." name; 2005 + Format.eprintf "Commit or discard them before closing.@."; 2006 + exit 1 2007 + end; 2008 + Unpac.Worktree.remove ~proc_mgr root patches_kind; 2009 + if Unpac.Worktree.exists root vendor_kind then 2010 + Unpac.Worktree.remove ~proc_mgr root vendor_kind; 2011 + Format.printf "Closed editing session for %s@." name 2012 + in 2013 + let info = Cmd.info "done" ~doc in 2014 + Cmd.v info Term.(const run $ logging_term $ name_arg) 2015 + 2016 + (* Git remove command *) 2017 + let git_remove_cmd = 2018 + let doc = "Remove a vendored git repository." in 2019 + let name_arg = 2020 + let doc = "Repository name to remove." in 2021 + Arg.(required & pos 0 (some string) None & info [] ~docv:"NAME" ~doc) 2022 + in 2023 + let run () name = 2024 + with_root @@ fun ~env:_ ~fs:_ ~proc_mgr ~root -> 2025 + let repos = Unpac.Git_backend.list_repos ~proc_mgr ~root in 2026 + if not (List.mem name repos) then begin 2027 + Format.eprintf "Repository '%s' is not vendored@." name; 2028 + exit 1 2029 + end; 2030 + Unpac.Git_backend.remove_repo ~proc_mgr ~root name; 2031 + let config = load_config root in 2032 + let config' = Unpac.Config.remove_git_repo config name in 2033 + save_config ~proc_mgr root config' (Printf.sprintf "Remove git repo %s" name); 2034 + Format.printf "Removed %s@." name 2035 + in 2036 + let info = Cmd.info "remove" ~doc in 2037 + Cmd.v info Term.(const run $ logging_term $ name_arg) 2038 + 2039 + (* Git command group *) 2040 + let git_cmd = 2041 + let doc = "Git repository vendoring commands." in 2042 + let man = [ 2043 + `S Manpage.s_description; 2044 + `P "Vendor arbitrary git repositories with full history preservation. \ 2045 + Uses the three-tier branch model:"; 2046 + `I ("git/upstream/<name>", "Tracks the original repository state"); 2047 + `I ("git/vendor/<name>", "Clean snapshot used as merge base"); 2048 + `I ("git/patches/<name>", "Local modifications on top of vendor"); 2049 + `S "REQUIREMENTS"; 2050 + `P "git-filter-repo must be installed and in PATH. Install with:"; 2051 + `Pre " curl -o ~/.local/bin/git-filter-repo \\ 2052 + https://raw.githubusercontent.com/newren/git-filter-repo/refs/heads/main/git-filter-repo 2053 + chmod +x ~/.local/bin/git-filter-repo"; 2054 + `S "TYPICAL WORKFLOW"; 2055 + `P "1. Vendor a git repository:"; 2056 + `Pre " unpac git add https://github.com/owner/repo.git"; 2057 + `P "2. Optionally extract only a subdirectory:"; 2058 + `Pre " unpac git add https://github.com/owner/monorepo.git --subdir lib/component"; 2059 + `P "3. Create a project and merge:"; 2060 + `Pre " unpac project new myapp 2061 + unpac git merge repo myapp"; 2062 + `S "MAKING LOCAL CHANGES"; 2063 + `P "1. Open repository for editing:"; 2064 + `Pre " unpac git edit repo"; 2065 + `P "2. Make changes in vendor/git/repo-patches/"; 2066 + `P "3. Close the editing session:"; 2067 + `Pre " unpac git done repo"; 2068 + `S "COMMANDS"; 2069 + ] in 2070 + let info = Cmd.info "git" ~doc ~man in 2071 + Cmd.group info [ 2072 + git_add_cmd; git_list_cmd; git_update_cmd; git_merge_cmd; 2073 + git_info_cmd; git_diff_cmd; git_edit_cmd; git_done_cmd; git_remove_cmd; 2074 + ] 2075 + 2076 + (* Log command *) 2077 + let log_cmd = 2078 + let doc = "Show audit log of unpac operations." in 2079 + let man = [ 2080 + `S Manpage.s_description; 2081 + `P "Display the audit log of all unpac operations. The log contains \ 2082 + hierarchical records including nested git commands."; 2083 + `S Manpage.s_examples; 2084 + `P "View recent operations:"; 2085 + `Pre " unpac log -n 5"; 2086 + `P "Export as JSON:"; 2087 + `Pre " unpac log --json > ops.json"; 2088 + `P "Generate HTML report:"; 2089 + `Pre " unpac log --html -o report.html"; 2090 + ] in 2091 + let json_flag = 2092 + let doc = "Output raw JSON instead of text." in 2093 + Arg.(value & flag & info ["json"] ~doc) 2094 + in 2095 + let html_flag = 2096 + let doc = "Generate HTML report." in 2097 + Arg.(value & flag & info ["html"] ~doc) 2098 + in 2099 + let output_file = 2100 + let doc = "Output file (defaults to stdout)." in 2101 + Arg.(value & opt (some string) None & info ["o"; "output"] ~docv:"FILE" ~doc) 2102 + in 2103 + let last_n = 2104 + let doc = "Show only the last N operations." in 2105 + Arg.(value & opt (some int) None & info ["n"; "last"] ~docv:"N" ~doc) 2106 + in 2107 + let run () json html output last_n_opt = 2108 + with_root @@ fun ~env:_ ~fs:_ ~proc_mgr:_ ~root -> 2109 + let log_path = Filename.concat (snd (Unpac.Worktree.path root Unpac.Worktree.Main)) 2110 + Unpac.Audit.default_log_file in 2111 + match Unpac.Audit.load log_path with 2112 + | Error msg -> 2113 + Format.eprintf "Failed to load audit log: %s@." msg; 2114 + exit 1 2115 + | Ok log -> 2116 + let log = match last_n_opt with 2117 + | None -> log 2118 + | Some n -> 2119 + let entries = List.filteri (fun i _ -> i < n) log.entries in 2120 + { log with entries } 2121 + in 2122 + if html then begin 2123 + let html_content = Unpac.Audit.to_html log in 2124 + match output with 2125 + | Some path -> 2126 + let oc = open_out path in 2127 + output_string oc html_content; 2128 + close_out oc; 2129 + Format.printf "HTML report written to %s@." path 2130 + | None -> print_string html_content 2131 + end else if json then begin 2132 + match Jsont_bytesrw.encode_string ~format:Jsont.Indent Unpac.Audit.log_jsont log with 2133 + | Ok s -> print_string s; print_newline () 2134 + | Error e -> Format.eprintf "Failed to encode JSON: %s@." e; exit 1 2135 + end else begin 2136 + if log.entries = [] then 2137 + Format.printf "No operations recorded.@." 2138 + else 2139 + Format.printf "%a" Unpac.Audit.pp_log log 2140 + end 2141 + in 2142 + let info = Cmd.info "log" ~doc ~man in 2143 + Cmd.v info Term.(const run $ logging_term $ json_flag $ html_flag $ output_file $ last_n) 2144 + 2145 + (* Push command - push all unpac branches to a remote *) 2146 + let push_cmd = 2147 + let doc = "Push all unpac branches to a remote." in 2148 + let remote_arg = 2149 + let doc = "Remote name (e.g., origin)." in 2150 + Arg.(required & pos 0 (some string) None & info [] ~docv:"REMOTE" ~doc) 2151 + in 2152 + let force_arg = 2153 + let doc = "Force push (use with caution)." in 2154 + Arg.(value & flag & info ["f"; "force"] ~doc) 2155 + in 2156 + let dry_run_arg = 2157 + let doc = "Show what would be pushed without actually pushing." in 2158 + Arg.(value & flag & info ["n"; "dry-run"] ~doc) 2159 + in 2160 + let run () remote force dry_run = 2161 + with_root @@ fun ~env:_ ~fs:_ ~proc_mgr ~root -> 2162 + let git = Unpac.Worktree.git_dir root in 2163 + 2164 + (* Check if remote exists *) 2165 + (match Unpac.Git.remote_url ~proc_mgr ~cwd:git remote with 2166 + | None -> 2167 + Format.eprintf "Remote '%s' not configured.@." remote; 2168 + Format.eprintf "Add it with: git -C %s remote add %s <url>@." (snd git) remote; 2169 + exit 1 2170 + | Some _ -> ()); 2171 + 2172 + (* Get all branches *) 2173 + let all_branches = Unpac.Git.run_lines ~proc_mgr ~cwd:git ["branch"; "--format=%(refname:short)"] in 2174 + 2175 + (* Filter to only unpac-managed branches *) 2176 + let unpac_branches = List.filter (fun b -> 2177 + b = "main" || 2178 + String.starts_with ~prefix:"opam/" b || 2179 + String.starts_with ~prefix:"project/" b 2180 + ) all_branches in 2181 + 2182 + if unpac_branches = [] then begin 2183 + Format.printf "No branches to push@."; 2184 + exit 0 2185 + end; 2186 + 2187 + Format.printf "Branches to push to %s:@." remote; 2188 + List.iter (fun b -> Format.printf " %s@." b) unpac_branches; 2189 + Format.printf "@."; 2190 + 2191 + if dry_run then begin 2192 + Format.printf "(dry run - no changes made)@." 2193 + end else begin 2194 + (* Build push command *) 2195 + let force_flag = if force then ["--force"] else [] in 2196 + let push_args = ["push"] @ force_flag @ [remote; "--"] @ unpac_branches in 2197 + 2198 + Format.printf "Pushing %d branches...@." (List.length unpac_branches); 2199 + try 2200 + Unpac.Git.run_exn ~proc_mgr ~cwd:git push_args |> ignore; 2201 + Format.printf "Done.@." 2202 + with e -> 2203 + Format.eprintf "Push failed: %s@." (Printexc.to_string e); 2204 + exit 1 2205 + end 2206 + in 2207 + let info = Cmd.info "push" ~doc in 2208 + Cmd.v info Term.(const run $ logging_term $ remote_arg $ force_arg $ dry_run_arg) 2209 + 2210 + (* Vendor status command *) 2211 + let vendor_status_cmd = 2212 + let doc = "Show status of all vendored packages." in 2213 + let run () = 2214 + with_root @@ fun ~env:_ ~fs:_ ~proc_mgr ~root -> 2215 + let git = Unpac.Worktree.git_dir root in 2216 + 2217 + (* Get all vendored packages *) 2218 + let packages = Unpac_opam.Opam.list_packages ~proc_mgr ~root in 2219 + if packages = [] then begin 2220 + Format.printf "No vendored packages.@."; 2221 + exit 0 2222 + end; 2223 + 2224 + (* Get all project branches *) 2225 + let all_branches = Unpac.Git.run_lines ~proc_mgr ~cwd:git 2226 + ["branch"; "--format=%(refname:short)"] in 2227 + let project_branches = List.filter (fun b -> 2228 + String.starts_with ~prefix:"project/" b 2229 + ) all_branches in 2230 + let project_names = List.map (fun b -> 2231 + String.sub b 8 (String.length b - 8) (* Remove "project/" prefix *) 2232 + ) project_branches in 2233 + 2234 + (* Print header *) 2235 + Format.printf "%-25s %8s %s@." "Package" "Patches" "Merged into"; 2236 + Format.printf "%s@." (String.make 70 '-'); 2237 + 2238 + (* For each package, get patch count and merge status *) 2239 + List.iter (fun pkg -> 2240 + let vendor_branch = Unpac_opam.Opam.vendor_branch pkg in 2241 + let patches_branch = Unpac_opam.Opam.patches_branch pkg in 2242 + 2243 + (* Count commits on patches that aren't on vendor *) 2244 + let patch_count = 2245 + let output = Unpac.Git.run_exn ~proc_mgr ~cwd:git 2246 + ["rev-list"; "--count"; vendor_branch ^ ".." ^ patches_branch] in 2247 + int_of_string (String.trim output) 2248 + in 2249 + 2250 + (* Check which projects contain this package's patches *) 2251 + let merged_into = List.filter (fun proj_name -> 2252 + let proj_branch = "project/" ^ proj_name in 2253 + (* Check if patches branch is an ancestor of project branch *) 2254 + match Unpac.Git.run ~proc_mgr ~cwd:git 2255 + ["merge-base"; "--is-ancestor"; patches_branch; proj_branch] with 2256 + | Ok _ -> true 2257 + | Error _ -> false 2258 + ) project_names in 2259 + 2260 + let merged_str = if merged_into = [] then "-" 2261 + else String.concat ", " merged_into in 2262 + 2263 + Format.printf "%-25s %8d %s@." pkg patch_count merged_str 2264 + ) packages; 2265 + 2266 + Format.printf "@.Total: %d packages@." (List.length packages) 2267 + in 2268 + let info = Cmd.info "status" ~doc in 2269 + Cmd.v info Term.(const run $ logging_term) 2270 + 2271 + (* Vendor command group *) 2272 + let vendor_cmd = 2273 + let doc = "Vendor status and management commands." in 2274 + let info = Cmd.info "vendor" ~doc in 2275 + Cmd.group info [vendor_status_cmd] 2276 + 2277 + (* Status command - comprehensive workspace status *) 2278 + let status_cmd = 2279 + let doc = "Show comprehensive workspace status." in 2280 + let man = [ 2281 + `S Manpage.s_description; 2282 + `P "Shows the overall state of the unpac workspace including:"; 2283 + `I ("Projects", "All project branches and their merge status"); 2284 + `I ("Opam packages", "Vendored packages, patch counts, and merge status"); 2285 + `I ("Git repos", "Vendored git repositories and their status"); 2286 + `I ("Worktrees", "Any active worktrees with uncommitted changes"); 2287 + `P "Also updates README.md in the main branch with status in markdown format."; 2288 + `S Manpage.s_examples; 2289 + `Pre " unpac status # Full status 2290 + unpac status --short # Compact summary"; 2291 + ] in 2292 + let short_flag = 2293 + let doc = "Show compact summary only." in 2294 + Arg.(value & flag & info ["s"; "short"] ~doc) 2295 + in 2296 + let no_readme_flag = 2297 + let doc = "Don't update README.md." in 2298 + Arg.(value & flag & info ["no-readme"] ~doc) 2299 + in 2300 + let verbose_flag = 2301 + let doc = "Enable verbose/debug logging to help diagnose issues." in 2302 + Arg.(value & flag & info ["v"; "verbose"] ~doc) 2303 + in 2304 + let run () short no_readme verbose = 2305 + setup_logging ~verbose (); 2306 + with_root @@ fun ~env:_ ~fs:_ ~proc_mgr ~root -> 2307 + Log.debug (fun m -> m "Starting status command..."); 2308 + let git = Unpac.Worktree.git_dir root in 2309 + let main_wt = Unpac.Worktree.path root Unpac.Worktree.Main in 2310 + Log.debug (fun m -> m "Git dir: %s" (snd git)); 2311 + Log.debug (fun m -> m "Main worktree: %s" (snd main_wt)); 2312 + 2313 + (* Get all branches *) 2314 + Log.debug (fun m -> m "Listing all branches..."); 2315 + let all_branches = Unpac.Git.run_lines ~proc_mgr ~cwd:git 2316 + ["branch"; "--format=%(refname:short)"] in 2317 + Log.debug (fun m -> m "Found %d branches" (List.length all_branches)); 2318 + 2319 + (* Categorize branches *) 2320 + Log.debug (fun m -> m "Categorizing branches..."); 2321 + let project_branches = List.filter (fun b -> 2322 + String.starts_with ~prefix:"project/" b 2323 + ) all_branches in 2324 + Log.debug (fun m -> m "Found %d project branches" (List.length project_branches)); 2325 + 2326 + Log.debug (fun m -> m "Listing opam packages..."); 2327 + let opam_packages = Unpac_opam.Opam.list_packages ~proc_mgr ~root in 2328 + Log.debug (fun m -> m "Found %d opam packages" (List.length opam_packages)); 2329 + 2330 + Log.debug (fun m -> m "Listing git repos..."); 2331 + let git_repos = Unpac.Git_backend.list_repos ~proc_mgr ~root in 2332 + Log.debug (fun m -> m "Found %d git repos" (List.length git_repos)); 2333 + 2334 + (* Parallel map helper using Eio fibers *) 2335 + let parallel_map f items = 2336 + Log.debug (fun m -> m "Running %d operations in parallel..." (List.length items)); 2337 + Eio.Switch.run @@ fun sw -> 2338 + let fibers = List.map (fun item -> 2339 + Eio.Fiber.fork_promise ~sw (fun () -> f item) 2340 + ) items in 2341 + List.map Eio.Promise.await_exn fibers 2342 + in 2343 + 2344 + (* Parallel commit count for all packages *) 2345 + let commit_count_calls = ref 0 in 2346 + let parallel_commit_counts pkgs vendor_fn patches_fn = 2347 + Log.debug (fun m -> m "Counting commits for %d items in parallel..." (List.length pkgs)); 2348 + parallel_map (fun pkg -> 2349 + let from_ref = vendor_fn pkg in 2350 + let to_ref = patches_fn pkg in 2351 + incr commit_count_calls; 2352 + try 2353 + let output = Unpac.Git.run_exn ~proc_mgr ~cwd:git 2354 + ["rev-list"; "--count"; from_ref ^ ".." ^ to_ref] in 2355 + (pkg, int_of_string (String.trim output)) 2356 + with _ -> (pkg, 0) 2357 + ) pkgs 2358 + in 2359 + 2360 + (* Helper to check if branch A is ancestor of B - single call *) 2361 + let is_ancestor_calls = ref 0 in 2362 + let is_ancestor a b = 2363 + incr is_ancestor_calls; 2364 + Log.debug (fun m -> m "is_ancestor #%d: %s in %s" !is_ancestor_calls a b); 2365 + match Unpac.Git.run ~proc_mgr ~cwd:git 2366 + ["merge-base"; "--is-ancestor"; a; b] with 2367 + | Ok _ -> true 2368 + | Error _ -> false 2369 + in 2370 + 2371 + (* Parallel is_ancestor check for a list of (source, target) pairs *) 2372 + let parallel_is_ancestor pairs = 2373 + Log.debug (fun m -> m "Checking %d ancestry relations in parallel..." (List.length pairs)); 2374 + parallel_map (fun (a, b) -> 2375 + incr is_ancestor_calls; 2376 + match Unpac.Git.run ~proc_mgr ~cwd:git 2377 + ["merge-base"; "--is-ancestor"; a; b] with 2378 + | Ok _ -> (a, b, true) 2379 + | Error _ -> (a, b, false) 2380 + ) pairs 2381 + in 2382 + 2383 + (* Check for uncommitted changes in a worktree *) 2384 + let has_changes_calls = ref 0 in 2385 + let has_changes wt_path = 2386 + incr has_changes_calls; 2387 + Log.debug (fun m -> m "has_changes #%d: %s" !has_changes_calls (snd wt_path)); 2388 + if Sys.file_exists (snd wt_path) then 2389 + let status = Unpac.Git.run_exn ~proc_mgr ~cwd:wt_path ["status"; "--porcelain"] in 2390 + String.trim status <> "" 2391 + else false 2392 + in 2393 + 2394 + (* Project names *) 2395 + let project_names = List.map (fun b -> 2396 + String.sub b 8 (String.length b - 8) 2397 + ) project_branches in 2398 + Log.debug (fun m -> m "Project names: %a" Fmt.(list ~sep:comma string) project_names); 2399 + 2400 + if short then begin 2401 + (* Short summary *) 2402 + Log.debug (fun m -> m "Generating short summary..."); 2403 + Format.printf "Workspace: %s@." (snd (Unpac.Worktree.git_dir root) |> Filename.dirname); 2404 + Format.printf "Projects: %d | Opam: %d | Git: %d@." 2405 + (List.length project_branches) 2406 + (List.length opam_packages) 2407 + (List.length git_repos); 2408 + 2409 + (* Count total patches - parallel *) 2410 + Log.debug (fun m -> m "Counting opam patches (%d packages) in parallel..." (List.length opam_packages)); 2411 + let opam_patch_counts = parallel_commit_counts opam_packages 2412 + Unpac_opam.Opam.vendor_branch Unpac_opam.Opam.patches_branch in 2413 + let opam_patches = List.fold_left (fun acc (_, n) -> acc + n) 0 opam_patch_counts in 2414 + Log.debug (fun m -> m "Counting git patches (%d repos) in parallel..." (List.length git_repos)); 2415 + let git_patch_counts = parallel_commit_counts git_repos 2416 + Unpac.Git_backend.vendor_branch Unpac.Git_backend.patches_branch in 2417 + let git_patches = List.fold_left (fun acc (_, n) -> acc + n) 0 git_patch_counts in 2418 + if opam_patches + git_patches > 0 then 2419 + Format.printf "Local patches: %d commits@." (opam_patches + git_patches); 2420 + 2421 + (* Check main for uncommitted *) 2422 + Log.debug (fun m -> m "Checking main worktree for changes..."); 2423 + if has_changes main_wt then 2424 + Format.printf "Warning: Uncommitted changes in main@."; 2425 + Log.debug (fun m -> m "Short summary complete.") 2426 + end else begin 2427 + (* Full status *) 2428 + Log.debug (fun m -> m "Generating full status..."); 2429 + Format.printf "=== Unpac Workspace Status ===@.@."; 2430 + 2431 + (* Main worktree status *) 2432 + Log.debug (fun m -> m "Checking main worktree status..."); 2433 + Format.printf "Main worktree: %s@." (snd main_wt); 2434 + if has_changes main_wt then 2435 + Format.printf " @{<yellow>Warning: Uncommitted changes@}@." 2436 + else 2437 + Format.printf " Clean@."; 2438 + Format.printf "@."; 2439 + 2440 + (* Projects - precompute all ancestry relationships in parallel *) 2441 + Log.debug (fun m -> m "Processing %d projects..." (List.length project_names)); 2442 + Format.printf "=== Projects (%d) ===@." (List.length project_names); 2443 + if project_names = [] then 2444 + Format.printf " (none)@." 2445 + else begin 2446 + (* Build all ancestry pairs to check: (patches_branch, project_branch) *) 2447 + let opam_pairs = List.concat_map (fun proj -> 2448 + let proj_branch = "project/" ^ proj in 2449 + List.map (fun pkg -> 2450 + (Unpac_opam.Opam.patches_branch pkg, proj_branch) 2451 + ) opam_packages 2452 + ) project_names in 2453 + let git_pairs = List.concat_map (fun proj -> 2454 + let proj_branch = "project/" ^ proj in 2455 + List.map (fun repo -> 2456 + (Unpac.Git_backend.patches_branch repo, proj_branch) 2457 + ) git_repos 2458 + ) project_names in 2459 + 2460 + (* Check all ancestry in parallel *) 2461 + Log.debug (fun m -> m "Checking %d ancestry relations in parallel..." 2462 + (List.length opam_pairs + List.length git_pairs)); 2463 + let all_pairs = opam_pairs @ git_pairs in 2464 + let ancestry_results = parallel_is_ancestor all_pairs in 2465 + 2466 + (* Build lookup table: (patches_branch, project_branch) -> is_ancestor *) 2467 + let ancestry_table = Hashtbl.create (List.length ancestry_results) in 2468 + List.iter (fun (a, b, result) -> 2469 + Hashtbl.add ancestry_table (a, b) result 2470 + ) ancestry_results; 2471 + 2472 + let is_ancestor_cached a b = 2473 + try Hashtbl.find ancestry_table (a, b) 2474 + with Not_found -> is_ancestor a b (* fallback *) 2475 + in 2476 + 2477 + List.iteri (fun i proj -> 2478 + Log.debug (fun m -> m " Project %d/%d: %s" (i+1) (List.length project_names) proj); 2479 + let proj_branch = "project/" ^ proj in 2480 + let proj_wt = Unpac.Worktree.path root (Unpac.Worktree.Project proj) in 2481 + let wt_exists = Sys.file_exists (snd proj_wt) in 2482 + let dirty = wt_exists && has_changes proj_wt in 2483 + 2484 + (* Count merged packages - use cached results *) 2485 + let merged_opam = List.filter (fun pkg -> 2486 + is_ancestor_cached (Unpac_opam.Opam.patches_branch pkg) proj_branch 2487 + ) opam_packages in 2488 + let merged_git = List.filter (fun repo -> 2489 + is_ancestor_cached (Unpac.Git_backend.patches_branch repo) proj_branch 2490 + ) git_repos in 2491 + 2492 + Format.printf " %s" proj; 2493 + if wt_exists then Format.printf " [worktree]"; 2494 + if dirty then Format.printf " @{<yellow>*dirty*@}"; 2495 + Format.printf "@."; 2496 + Format.printf " Merged: %d opam, %d git@." 2497 + (List.length merged_opam) (List.length merged_git) 2498 + ) project_names 2499 + end; 2500 + Format.printf "@."; 2501 + 2502 + (* Opam packages - parallel commit counts and cached ancestry *) 2503 + Log.debug (fun m -> m "Processing %d opam packages..." (List.length opam_packages)); 2504 + Format.printf "=== Opam Packages (%d) ===@." (List.length opam_packages); 2505 + if opam_packages = [] then 2506 + Format.printf " (none)@." 2507 + else begin 2508 + (* Parallel commit counts *) 2509 + let opam_counts = parallel_commit_counts opam_packages 2510 + Unpac_opam.Opam.vendor_branch Unpac_opam.Opam.patches_branch in 2511 + let opam_count_table = Hashtbl.create (List.length opam_counts) in 2512 + List.iter (fun (pkg, count) -> Hashtbl.add opam_count_table pkg count) opam_counts; 2513 + 2514 + (* Build ancestry pairs for opam -> projects (reuse if possible) *) 2515 + let opam_to_proj_pairs = List.concat_map (fun pkg -> 2516 + let patches_branch = Unpac_opam.Opam.patches_branch pkg in 2517 + List.map (fun proj -> (patches_branch, "project/" ^ proj)) project_names 2518 + ) opam_packages in 2519 + let opam_ancestry_results = parallel_is_ancestor opam_to_proj_pairs in 2520 + let opam_ancestry_table = Hashtbl.create (List.length opam_ancestry_results) in 2521 + List.iter (fun (a, b, result) -> 2522 + Hashtbl.add opam_ancestry_table (a, b) result 2523 + ) opam_ancestry_results; 2524 + 2525 + Format.printf " %-25s %8s %s@." "Package" "Patches" "Merged into"; 2526 + Format.printf " %s@." (String.make 60 '-'); 2527 + List.iteri (fun i pkg -> 2528 + Log.debug (fun m -> m " Opam package %d/%d: %s" (i+1) (List.length opam_packages) pkg); 2529 + let patches_branch = Unpac_opam.Opam.patches_branch pkg in 2530 + let patch_count = try Hashtbl.find opam_count_table pkg with Not_found -> 0 in 2531 + 2532 + (* Check active worktrees *) 2533 + let patches_wt = Unpac.Worktree.path root (Unpac.Worktree.Opam_patches pkg) in 2534 + let has_wt = Sys.file_exists (snd patches_wt) in 2535 + let dirty = has_wt && has_changes patches_wt in 2536 + 2537 + (* Check merged into which projects - use cached results *) 2538 + let merged_into = List.filter (fun proj -> 2539 + try Hashtbl.find opam_ancestry_table (patches_branch, "project/" ^ proj) 2540 + with Not_found -> false 2541 + ) project_names in 2542 + 2543 + let merged_str = if merged_into = [] then "-" 2544 + else String.concat ", " merged_into in 2545 + 2546 + Format.printf " %-25s" pkg; 2547 + if has_wt then Format.printf "*" else Format.printf " "; 2548 + Format.printf "%7d %s" patch_count merged_str; 2549 + if dirty then Format.printf " @{<yellow>(uncommitted)@}"; 2550 + Format.printf "@." 2551 + ) opam_packages 2552 + end; 2553 + Format.printf "@."; 2554 + 2555 + (* Git repos - parallel commit counts and cached ancestry *) 2556 + Log.debug (fun m -> m "Processing %d git repos..." (List.length git_repos)); 2557 + Format.printf "=== Git Repositories (%d) ===@." (List.length git_repos); 2558 + if git_repos = [] then 2559 + Format.printf " (none)@." 2560 + else begin 2561 + (* Parallel commit counts *) 2562 + let git_counts = parallel_commit_counts git_repos 2563 + Unpac.Git_backend.vendor_branch Unpac.Git_backend.patches_branch in 2564 + let git_count_table = Hashtbl.create (List.length git_counts) in 2565 + List.iter (fun (repo, count) -> Hashtbl.add git_count_table repo count) git_counts; 2566 + 2567 + (* Build ancestry pairs for git -> projects *) 2568 + let git_to_proj_pairs = List.concat_map (fun repo -> 2569 + let patches_branch = Unpac.Git_backend.patches_branch repo in 2570 + List.map (fun proj -> (patches_branch, "project/" ^ proj)) project_names 2571 + ) git_repos in 2572 + let git_ancestry_results = parallel_is_ancestor git_to_proj_pairs in 2573 + let git_ancestry_table = Hashtbl.create (List.length git_ancestry_results) in 2574 + List.iter (fun (a, b, result) -> 2575 + Hashtbl.add git_ancestry_table (a, b) result 2576 + ) git_ancestry_results; 2577 + 2578 + Format.printf " %-25s %8s %s@." "Repository" "Patches" "Merged into"; 2579 + Format.printf " %s@." (String.make 60 '-'); 2580 + List.iteri (fun i repo -> 2581 + Log.debug (fun m -> m " Git repo %d/%d: %s" (i+1) (List.length git_repos) repo); 2582 + let patches_branch = Unpac.Git_backend.patches_branch repo in 2583 + let patch_count = try Hashtbl.find git_count_table repo with Not_found -> 0 in 2584 + 2585 + let patches_wt = Unpac.Worktree.path root (Unpac.Worktree.Git_patches repo) in 2586 + let has_wt = Sys.file_exists (snd patches_wt) in 2587 + let dirty = has_wt && has_changes patches_wt in 2588 + 2589 + let merged_into = List.filter (fun proj -> 2590 + try Hashtbl.find git_ancestry_table (patches_branch, "project/" ^ proj) 2591 + with Not_found -> false 2592 + ) project_names in 2593 + 2594 + let merged_str = if merged_into = [] then "-" 2595 + else String.concat ", " merged_into in 2596 + 2597 + Format.printf " %-25s" repo; 2598 + if has_wt then Format.printf "*" else Format.printf " "; 2599 + Format.printf "%7d %s" patch_count merged_str; 2600 + if dirty then Format.printf " @{<yellow>(uncommitted)@}"; 2601 + Format.printf "@." 2602 + ) git_repos 2603 + end; 2604 + Format.printf "@."; 2605 + 2606 + (* Active worktrees summary *) 2607 + let active_worktrees = ref [] in 2608 + List.iter (fun pkg -> 2609 + let wt = Unpac.Worktree.path root (Unpac.Worktree.Opam_patches pkg) in 2610 + if Sys.file_exists (snd wt) then 2611 + active_worktrees := ("opam/" ^ pkg ^ "-patches", has_changes wt) :: !active_worktrees 2612 + ) opam_packages; 2613 + List.iter (fun repo -> 2614 + let wt = Unpac.Worktree.path root (Unpac.Worktree.Git_patches repo) in 2615 + if Sys.file_exists (snd wt) then 2616 + active_worktrees := ("git/" ^ repo ^ "-patches", has_changes wt) :: !active_worktrees 2617 + ) git_repos; 2618 + 2619 + if !active_worktrees <> [] then begin 2620 + Format.printf "=== Active Worktrees ===@."; 2621 + List.iter (fun (name, dirty) -> 2622 + Format.printf " %s" name; 2623 + if dirty then Format.printf " @{<yellow>*uncommitted*@}"; 2624 + Format.printf "@." 2625 + ) (List.rev !active_worktrees); 2626 + Format.printf "@." 2627 + end; 2628 + 2629 + (* Legend *) 2630 + Format.printf "Legend: * = worktree active@."; 2631 + Log.debug (fun m -> m "Full status output complete.") 2632 + end; 2633 + 2634 + (* Generate README.md unless --no-readme *) 2635 + if not no_readme then begin 2636 + Log.debug (fun m -> m "Generating README.md..."); 2637 + let buf = Buffer.create 4096 in 2638 + let add = Buffer.add_string buf in 2639 + let addf fmt = Printf.ksprintf add fmt in 2640 + let timestamp = 2641 + let tm = Unix.localtime (Unix.time ()) in 2642 + Printf.sprintf "%04d-%02d-%02d %02d:%02d:%02d" 2643 + (tm.Unix.tm_year + 1900) (tm.Unix.tm_mon + 1) tm.Unix.tm_mday 2644 + tm.Unix.tm_hour tm.Unix.tm_min tm.Unix.tm_sec 2645 + in 2646 + 2647 + (* Get tangled.org base URL from origin remote *) 2648 + Log.debug (fun m -> m "Getting origin remote URL..."); 2649 + let tangled_base = 2650 + match Unpac.Git.remote_url ~proc_mgr ~cwd:git "origin" with 2651 + | None -> 2652 + Log.debug (fun m -> m "No origin remote found"); 2653 + None 2654 + | Some url -> 2655 + (* Parse git@git.recoil.org:user/repo or similar *) 2656 + let url = String.trim url in 2657 + (* Handle git@host:user/repo format *) 2658 + if String.starts_with ~prefix:"git@" url then 2659 + match String.index_opt url ':' with 2660 + | None -> None 2661 + | Some colon_pos -> 2662 + let path = String.sub url (colon_pos + 1) (String.length url - colon_pos - 1) in 2663 + (* Strip .git suffix if present *) 2664 + let path = if String.ends_with ~suffix:".git" path then 2665 + String.sub path 0 (String.length path - 4) else path in 2666 + Some (Printf.sprintf "https://tangled.org/%s" path) 2667 + else None 2668 + in 2669 + 2670 + (* URL encode a branch name for tree URLs *) 2671 + let url_encode s = 2672 + let buf = Buffer.create (String.length s * 2) in 2673 + String.iter (fun c -> 2674 + match c with 2675 + | '/' -> Buffer.add_string buf "%2F" 2676 + | ' ' -> Buffer.add_string buf "%20" 2677 + | c -> Buffer.add_char buf c 2678 + ) s; 2679 + Buffer.contents buf 2680 + in 2681 + 2682 + (* Create a markdown link to a branch tree, or just the name if no base URL *) 2683 + let branch_link name branch = 2684 + match tangled_base with 2685 + | None -> name 2686 + | Some base -> Printf.sprintf "[%s](%s/tree/%s)" name base (url_encode branch) 2687 + in 2688 + 2689 + add "# Unpac Workspace Status\n\n"; 2690 + addf "_Last updated: %s_\n\n" timestamp; 2691 + 2692 + (* Summary *) 2693 + add "## Summary\n\n"; 2694 + addf "| Category | Count |\n"; 2695 + addf "|----------|-------|\n"; 2696 + addf "| Projects | %d |\n" (List.length project_names); 2697 + addf "| Opam Packages | %d |\n" (List.length opam_packages); 2698 + addf "| Git Repositories | %d |\n\n" (List.length git_repos); 2699 + 2700 + (* Projects section - parallel ancestry checks *) 2701 + Log.debug (fun m -> m "README: Processing %d projects..." (List.length project_names)); 2702 + add "## Projects\n\n"; 2703 + if project_names = [] then 2704 + add "_No projects created yet._\n\n" 2705 + else begin 2706 + (* Precompute all ancestry in parallel for README *) 2707 + let readme_opam_pairs = List.concat_map (fun proj -> 2708 + let proj_branch = "project/" ^ proj in 2709 + List.map (fun pkg -> 2710 + (Unpac_opam.Opam.patches_branch pkg, proj_branch) 2711 + ) opam_packages 2712 + ) project_names in 2713 + let readme_git_pairs = List.concat_map (fun proj -> 2714 + let proj_branch = "project/" ^ proj in 2715 + List.map (fun repo -> 2716 + (Unpac.Git_backend.patches_branch repo, proj_branch) 2717 + ) git_repos 2718 + ) project_names in 2719 + Log.debug (fun m -> m "README: Checking %d ancestry relations in parallel..." 2720 + (List.length readme_opam_pairs + List.length readme_git_pairs)); 2721 + let readme_ancestry_results = parallel_is_ancestor (readme_opam_pairs @ readme_git_pairs) in 2722 + let readme_ancestry_table = Hashtbl.create (List.length readme_ancestry_results) in 2723 + List.iter (fun (a, b, result) -> 2724 + Hashtbl.add readme_ancestry_table (a, b) result 2725 + ) readme_ancestry_results; 2726 + let readme_is_ancestor a b = 2727 + try Hashtbl.find readme_ancestry_table (a, b) 2728 + with Not_found -> false 2729 + in 2730 + 2731 + add "| Project | Opam Merged | Git Merged | Status |\n"; 2732 + add "|---------|-------------|------------|--------|\n"; 2733 + List.iteri (fun i proj -> 2734 + Log.debug (fun m -> m "README: Project %d/%d: %s" (i+1) (List.length project_names) proj); 2735 + let proj_branch = "project/" ^ proj in 2736 + let proj_wt = Unpac.Worktree.path root (Unpac.Worktree.Project proj) in 2737 + let wt_exists = Sys.file_exists (snd proj_wt) in 2738 + let dirty = wt_exists && has_changes proj_wt in 2739 + 2740 + let merged_opam = List.filter (fun pkg -> 2741 + readme_is_ancestor (Unpac_opam.Opam.patches_branch pkg) proj_branch 2742 + ) opam_packages in 2743 + let merged_git = List.filter (fun repo -> 2744 + readme_is_ancestor (Unpac.Git_backend.patches_branch repo) proj_branch 2745 + ) git_repos in 2746 + 2747 + let status = 2748 + if dirty then "⚠️ uncommitted" 2749 + else if wt_exists then "📂 worktree active" 2750 + else "✓" in 2751 + addf "| %s | %d | %d | %s |\n" 2752 + (branch_link proj proj_branch) (List.length merged_opam) (List.length merged_git) status 2753 + ) project_names; 2754 + add "\n" 2755 + end; 2756 + 2757 + (* Opam packages section - parallel *) 2758 + Log.debug (fun m -> m "README: Processing %d opam packages..." (List.length opam_packages)); 2759 + add "## Opam Packages\n\n"; 2760 + if opam_packages = [] then 2761 + add "_No opam packages vendored yet._\n\n" 2762 + else begin 2763 + (* Parallel commit counts *) 2764 + let readme_opam_counts = parallel_commit_counts opam_packages 2765 + Unpac_opam.Opam.vendor_branch Unpac_opam.Opam.patches_branch in 2766 + let readme_opam_count_table = Hashtbl.create (List.length readme_opam_counts) in 2767 + List.iter (fun (pkg, count) -> Hashtbl.add readme_opam_count_table pkg count) readme_opam_counts; 2768 + 2769 + (* Parallel ancestry for opam -> projects *) 2770 + let readme_opam_to_proj = List.concat_map (fun pkg -> 2771 + let patches_branch = Unpac_opam.Opam.patches_branch pkg in 2772 + List.map (fun proj -> (patches_branch, "project/" ^ proj)) project_names 2773 + ) opam_packages in 2774 + let readme_opam_ancestry = parallel_is_ancestor readme_opam_to_proj in 2775 + let readme_opam_anc_table = Hashtbl.create (List.length readme_opam_ancestry) in 2776 + List.iter (fun (a, b, result) -> 2777 + Hashtbl.add readme_opam_anc_table (a, b) result 2778 + ) readme_opam_ancestry; 2779 + 2780 + add "| Package | Patches | Merged Into | Status |\n"; 2781 + add "|---------|---------|-------------|--------|\n"; 2782 + List.iteri (fun i pkg -> 2783 + Log.debug (fun m -> m "README: Opam package %d/%d: %s" (i+1) (List.length opam_packages) pkg); 2784 + let patches_branch = Unpac_opam.Opam.patches_branch pkg in 2785 + let patch_count = try Hashtbl.find readme_opam_count_table pkg with Not_found -> 0 in 2786 + 2787 + let patches_wt = Unpac.Worktree.path root (Unpac.Worktree.Opam_patches pkg) in 2788 + let has_wt = Sys.file_exists (snd patches_wt) in 2789 + let dirty = has_wt && has_changes patches_wt in 2790 + 2791 + let merged_into = List.filter (fun proj -> 2792 + try Hashtbl.find readme_opam_anc_table (patches_branch, "project/" ^ proj) 2793 + with Not_found -> false 2794 + ) project_names in 2795 + 2796 + let merged_str = if merged_into = [] then "-" 2797 + else String.concat ", " (List.map (fun p -> branch_link p ("project/" ^ p)) merged_into) in 2798 + 2799 + let status = 2800 + if dirty then "⚠️ uncommitted" 2801 + else if has_wt then "📂 editing" 2802 + else "✓" in 2803 + 2804 + addf "| %s | %d | %s | %s |\n" (branch_link pkg patches_branch) patch_count merged_str status 2805 + ) opam_packages; 2806 + add "\n" 2807 + end; 2808 + 2809 + (* Git repositories section - parallel *) 2810 + Log.debug (fun m -> m "README: Processing %d git repos..." (List.length git_repos)); 2811 + add "## Git Repositories\n\n"; 2812 + if git_repos = [] then 2813 + add "_No git repositories vendored yet._\n\n" 2814 + else begin 2815 + (* Parallel commit counts *) 2816 + let readme_git_counts = parallel_commit_counts git_repos 2817 + Unpac.Git_backend.vendor_branch Unpac.Git_backend.patches_branch in 2818 + let readme_git_count_table = Hashtbl.create (List.length readme_git_counts) in 2819 + List.iter (fun (repo, count) -> Hashtbl.add readme_git_count_table repo count) readme_git_counts; 2820 + 2821 + (* Parallel ancestry for git -> projects *) 2822 + let readme_git_to_proj = List.concat_map (fun repo -> 2823 + let patches_branch = Unpac.Git_backend.patches_branch repo in 2824 + List.map (fun proj -> (patches_branch, "project/" ^ proj)) project_names 2825 + ) git_repos in 2826 + let readme_git_ancestry = parallel_is_ancestor readme_git_to_proj in 2827 + let readme_git_anc_table = Hashtbl.create (List.length readme_git_ancestry) in 2828 + List.iter (fun (a, b, result) -> 2829 + Hashtbl.add readme_git_anc_table (a, b) result 2830 + ) readme_git_ancestry; 2831 + 2832 + add "| Repository | Patches | Merged Into | Status |\n"; 2833 + add "|------------|---------|-------------|--------|\n"; 2834 + List.iteri (fun i repo -> 2835 + Log.debug (fun m -> m "README: Git repo %d/%d: %s" (i+1) (List.length git_repos) repo); 2836 + let patches_branch = Unpac.Git_backend.patches_branch repo in 2837 + let patch_count = try Hashtbl.find readme_git_count_table repo with Not_found -> 0 in 2838 + 2839 + let patches_wt = Unpac.Worktree.path root (Unpac.Worktree.Git_patches repo) in 2840 + let has_wt = Sys.file_exists (snd patches_wt) in 2841 + let dirty = has_wt && has_changes patches_wt in 2842 + 2843 + let merged_into = List.filter (fun proj -> 2844 + try Hashtbl.find readme_git_anc_table (patches_branch, "project/" ^ proj) 2845 + with Not_found -> false 2846 + ) project_names in 2847 + 2848 + let merged_str = if merged_into = [] then "-" 2849 + else String.concat ", " (List.map (fun p -> branch_link p ("project/" ^ p)) merged_into) in 2850 + 2851 + let status = 2852 + if dirty then "⚠️ uncommitted" 2853 + else if has_wt then "📂 editing" 2854 + else "✓" in 2855 + 2856 + addf "| %s | %d | %s | %s |\n" (branch_link repo patches_branch) patch_count merged_str status 2857 + ) git_repos; 2858 + add "\n" 2859 + end; 2860 + 2861 + (* Active worktrees *) 2862 + let active_wts = ref [] in 2863 + List.iter (fun pkg -> 2864 + let wt = Unpac.Worktree.path root (Unpac.Worktree.Opam_patches pkg) in 2865 + if Sys.file_exists (snd wt) then 2866 + active_wts := (Printf.sprintf "vendor/opam/%s-patches" pkg, has_changes wt) :: !active_wts 2867 + ) opam_packages; 2868 + List.iter (fun repo -> 2869 + let wt = Unpac.Worktree.path root (Unpac.Worktree.Git_patches repo) in 2870 + if Sys.file_exists (snd wt) then 2871 + active_wts := (Printf.sprintf "vendor/git/%s-patches" repo, has_changes wt) :: !active_wts 2872 + ) git_repos; 2873 + 2874 + if !active_wts <> [] then begin 2875 + add "## Active Worktrees\n\n"; 2876 + add "| Path | Status |\n"; 2877 + add "|------|--------|\n"; 2878 + List.iter (fun (name, dirty) -> 2879 + let status = if dirty then "⚠️ uncommitted changes" else "✓ clean" in 2880 + addf "| `%s` | %s |\n" name status 2881 + ) (List.rev !active_wts); 2882 + add "\n" 2883 + end; 2884 + 2885 + (* Changes section from audit log *) 2886 + Log.debug (fun m -> m "README: Generating Changes section from audit log..."); 2887 + let audit_path = Eio.Path.(main_wt / ".unpac-audit.json") |> snd in 2888 + (match Unpac.Audit.load audit_path with 2889 + | Error e -> 2890 + Log.debug (fun m -> m "README: Could not load audit log: %s" e) 2891 + | Ok audit_log -> 2892 + (* Filter to significant events and take most recent *) 2893 + let significant_ops = List.filter (fun (op : Unpac.Audit.operation) -> 2894 + match op.operation_type with 2895 + | Unpac.Audit.Project_new 2896 + | Unpac.Audit.Project_promote 2897 + | Unpac.Audit.Opam_add 2898 + | Unpac.Audit.Git_add 2899 + | Unpac.Audit.Init -> true 2900 + | _ -> false 2901 + ) audit_log.entries in 2902 + (* Take most recent 20 events, reverse to show oldest first *) 2903 + let recent_ops = 2904 + significant_ops 2905 + |> (fun l -> if List.length l > 20 then 2906 + List.filteri (fun i _ -> i < 20) l 2907 + else l) 2908 + |> List.rev 2909 + in 2910 + if recent_ops <> [] then begin 2911 + add "## Changes\n\n"; 2912 + add "| Date | Event | Details |\n"; 2913 + add "|------|-------|--------|\n"; 2914 + List.iter (fun (op : Unpac.Audit.operation) -> 2915 + let tm = Unix.localtime op.timestamp in 2916 + let date = Printf.sprintf "%04d-%02d-%02d" 2917 + (tm.Unix.tm_year + 1900) (tm.Unix.tm_mon + 1) tm.Unix.tm_mday in 2918 + let (event, details) = match op.operation_type, op.args with 2919 + | Unpac.Audit.Init, _ -> 2920 + ("Workspace initialized", "") 2921 + | Unpac.Audit.Project_new, name :: _ -> 2922 + ("Project created", Printf.sprintf "`%s`" name) 2923 + | Unpac.Audit.Project_promote, name :: _ -> 2924 + let backend = List.find_map (fun arg -> 2925 + if String.starts_with ~prefix:"--backend" arg then None 2926 + else match List.nth_opt op.args (1 + (List.length (List.filter ((=) arg) (List.filteri (fun i _ -> i = 0) op.args)))) with 2927 + | _ -> None 2928 + ) op.args in 2929 + let backend_str = match backend with Some b -> b | None -> 2930 + (* Try to find backend in args *) 2931 + let rec find_backend = function 2932 + | "--backend" :: b :: _ -> b 2933 + | "-b" :: b :: _ -> b 2934 + | _ :: rest -> find_backend rest 2935 + | [] -> "opam" 2936 + in find_backend op.args 2937 + in 2938 + ("Project promoted", Printf.sprintf "`%s` → %s vendor" name backend_str) 2939 + | Unpac.Audit.Opam_add, pkgs -> 2940 + let pkg_list = String.concat ", " (List.map (fun p -> Printf.sprintf "`%s`" p) pkgs) in 2941 + ("Opam packages added", pkg_list) 2942 + | Unpac.Audit.Git_add, name :: _ -> 2943 + ("Git repo added", Printf.sprintf "`%s`" name) 2944 + | _, args -> 2945 + (Unpac.Audit.operation_type_to_string op.operation_type, 2946 + String.concat " " args) 2947 + in 2948 + addf "| %s | %s | %s |\n" date event details 2949 + ) recent_ops; 2950 + add "\n" 2951 + end); 2952 + 2953 + (* Footer *) 2954 + add "---\n\n"; 2955 + add "_Generated by `unpac status`_\n"; 2956 + 2957 + (* Write README.md *) 2958 + Log.debug (fun m -> m "README: Checking if README.md needs update..."); 2959 + let readme_path = Filename.concat (snd main_wt) "README.md" in 2960 + let content = Buffer.contents buf in 2961 + 2962 + (* Check if content changed *) 2963 + let old_content = 2964 + if Sys.file_exists readme_path then begin 2965 + Log.debug (fun m -> m "README: Reading existing README.md..."); 2966 + let ic = open_in readme_path in 2967 + let len = in_channel_length ic in 2968 + let s = really_input_string ic len in 2969 + close_in ic; 2970 + Some s 2971 + end else begin 2972 + Log.debug (fun m -> m "README: No existing README.md"); 2973 + None 2974 + end 2975 + in 2976 + 2977 + (* Only write and commit if changed (ignoring timestamp line) *) 2978 + let content_without_timestamp s = 2979 + (* Remove the timestamp line for comparison *) 2980 + Str.global_replace (Str.regexp "_Last updated:.*_") "" s 2981 + in 2982 + let changed = match old_content with 2983 + | None -> true 2984 + | Some old -> content_without_timestamp old <> content_without_timestamp content 2985 + in 2986 + 2987 + if changed then begin 2988 + Log.debug (fun m -> m "README: Writing updated README.md..."); 2989 + let oc = open_out readme_path in 2990 + output_string oc content; 2991 + close_out oc; 2992 + Format.printf "@.README.md updated.@."; 2993 + (* Git add and commit *) 2994 + Log.debug (fun m -> m "README: Staging README.md..."); 2995 + Unpac.Git.run_exn ~proc_mgr ~cwd:main_wt ["add"; "README.md"] |> ignore; 2996 + (try 2997 + Log.debug (fun m -> m "README: Committing README.md..."); 2998 + Unpac.Git.run_exn ~proc_mgr ~cwd:main_wt 2999 + ["commit"; "-m"; "Update workspace status in README.md"] |> ignore; 3000 + Format.printf "Committed README.md changes.@." 3001 + with _ -> 3002 + (* Commit might fail if nothing staged (e.g., only timestamp changed) *) 3003 + Log.debug (fun m -> m "README: Commit failed (likely nothing to commit)")) 3004 + end else 3005 + Log.debug (fun m -> m "README: No changes, skipping write"); 3006 + Log.debug (fun m -> m "Status command complete.") 3007 + end 3008 + in 3009 + let info = Cmd.info "status" ~doc ~man in 3010 + Cmd.v info Term.(const run $ const () $ short_flag $ no_readme_flag $ verbose_flag) 3011 + 3012 + (* Monorepo export command *) 3013 + let monorepo_cmd = 3014 + let doc = "Export a standalone buildable monorepo." in 3015 + let man = [ 3016 + `S Manpage.s_description; 3017 + `P "Creates a standalone directory containing all projects and their \ 3018 + vendored dependencies, suitable for building with dune. No git history \ 3019 + is included - only the current state of each branch."; 3020 + `S "OUTPUT STRUCTURE"; 3021 + `Pre " output/ 3022 + ├── dune-project 3023 + ├── dune 3024 + ├── project1/ 3025 + │ ├── src/ 3026 + │ └── dune 3027 + ├── project2/ 3028 + │ └── ... 3029 + └── vendor/ 3030 + ├── opam/ 3031 + │ ├── pkg1/ 3032 + │ └── pkg2/ 3033 + └── git/ 3034 + └── repo1/"; 3035 + `S Manpage.s_examples; 3036 + `P "Export all projects:"; 3037 + `Pre " unpac monorepo /path/to/output"; 3038 + `P "Export specific projects:"; 3039 + `Pre " unpac monorepo -p myapp -p mylib /path/to/output"; 3040 + `P "Export without opam packages:"; 3041 + `Pre " unpac monorepo --no-opam /path/to/output"; 3042 + ] in 3043 + let output_arg = 3044 + let doc = "Output directory for the monorepo." in 3045 + Arg.(required & pos 0 (some string) None & info [] ~docv:"OUTPUT" ~doc) 3046 + in 3047 + let projects_arg = 3048 + let doc = "Specific projects to include (can be repeated). Default: all projects." in 3049 + Arg.(value & opt_all string [] & info ["p"; "project"] ~docv:"NAME" ~doc) 3050 + in 3051 + let no_opam_arg = 3052 + let doc = "Exclude vendored opam packages." in 3053 + Arg.(value & flag & info ["no-opam"] ~doc) 3054 + in 3055 + let no_git_arg = 3056 + let doc = "Exclude vendored git repositories." in 3057 + Arg.(value & flag & info ["no-git"] ~doc) 3058 + in 3059 + let run () output_dir projects no_opam no_git = 3060 + with_root @@ fun ~env:_ ~fs:_ ~proc_mgr ~root -> 3061 + let config : Unpac.Monorepo.export_config = { 3062 + output_dir; 3063 + projects = if projects = [] then None else Some projects; 3064 + include_opam = not no_opam; 3065 + include_git = not no_git; 3066 + } in 3067 + let result = Unpac.Monorepo.export ~proc_mgr ~root ~config in 3068 + Format.printf "@.Monorepo exported to %s@." result.output_path; 3069 + Format.printf "@.Contents:@."; 3070 + Format.printf " Projects: %s@." (String.concat ", " result.projects_exported); 3071 + if result.opam_packages <> [] then 3072 + Format.printf " Opam packages: %d@." (List.length result.opam_packages); 3073 + if result.git_repos <> [] then 3074 + Format.printf " Git repos: %d@." (List.length result.git_repos); 3075 + Format.printf "@.Build with:@."; 3076 + Format.printf " cd %s && dune build@." output_dir; 3077 + Format.printf " cd %s && dune build @doc@." output_dir 3078 + in 3079 + let info = Cmd.info "monorepo" ~doc ~man in 3080 + Cmd.v info Term.(const run $ logging_term $ output_arg $ projects_arg $ no_opam_arg $ no_git_arg) 3081 + 3082 + (* Main command *) 3083 + let main_cmd = 3084 + let doc = "Multi-backend vendoring tool using git worktrees." in 3085 + let man = [ 3086 + `S Manpage.s_description; 3087 + `P "Unpac is a vendoring tool that maintains third-party dependencies \ 3088 + as git branches with full history. It uses git worktrees to provide \ 3089 + isolated views for editing, and a three-tier branch model \ 3090 + (upstream/vendor/patches) for conflict-free updates."; 3091 + `S "VENDORING MODES"; 3092 + `I ("unpac opam", "Vendor OCaml packages from opam repositories with \ 3093 + dependency solving."); 3094 + `I ("unpac git", "Vendor arbitrary git repositories directly by URL."); 3095 + `S "THREE-TIER BRANCH MODEL"; 3096 + `P "Each vendored item has three branches:"; 3097 + `I ("upstream/*", "Tracks the original repository"); 3098 + `I ("vendor/*", "Clean snapshot used as merge base"); 3099 + `I ("patches/*", "Your local modifications"); 3100 + `S "QUICK START"; 3101 + `Pre " unpac init myproject && cd myproject 3102 + unpac opam repo add default /path/to/opam-repository 3103 + unpac opam config compiler 5.2.0 3104 + unpac project new main 3105 + unpac opam add mylib --solve 3106 + unpac opam merge --all main"; 3107 + `S "COMMANDS"; 3108 + ] in 3109 + let info = Cmd.info "unpac" ~version:"0.1.0" ~doc ~man in 3110 + Cmd.group info [init_cmd; status_cmd; project_cmd; opam_cmd; git_cmd; vendor_cmd; push_cmd; log_cmd; 3111 + export_cmd; export_set_remote_cmd; export_push_cmd; export_list_cmd; monorepo_cmd] 3112 + 3113 + let () = exit (Cmd.eval main_cmd)
+5
bin/unpac-claude/dune
··· 1 + (executable 2 + (name main) 3 + (public_name unpac-claude) 4 + (package unpac-claude) 5 + (libraries unpac_claude cmdliner eio_main logs logs.fmt fmt.tty))
+89
bin/unpac-claude/main.ml
··· 1 + (** Unpac Claude agent - ralph-loop style autonomous coding for workspace projects. *) 2 + 3 + open Cmdliner 4 + 5 + let setup_logging verbose = 6 + Fmt_tty.setup_std_outputs (); 7 + (* Normal mode: Warning level (suppress Claude lib's JSON INFO logs) 8 + Verbose mode: Debug level (show everything) *) 9 + let level = if verbose then Logs.Debug else Logs.Warning in 10 + Logs.set_level (Some level); 11 + Logs.set_reporter (Logs_fmt.reporter ()) 12 + 13 + let run_agent verbose web_port project workspace_path = 14 + setup_logging verbose; 15 + Eio_main.run @@ fun env -> 16 + let config : Unpac_claude.Agent.config = { 17 + verbose; 18 + web_port; 19 + max_iterations = 20; 20 + project; 21 + } in 22 + Unpac_claude.Agent.run ~env ~config ~workspace_path () 23 + 24 + (* CLI *) 25 + let verbose_arg = 26 + let doc = "Enable verbose logging." in 27 + Arg.(value & flag & info ["v"; "verbose"] ~doc) 28 + 29 + let web_port_arg = 30 + let doc = "Enable web UI on this port. Shows live streaming events." in 31 + Arg.(value & opt (some int) None & info ["web"] ~docv:"PORT" ~doc) 32 + 33 + let project_arg = 34 + let doc = "Specific project to work on. If not specified, runs all \ 35 + projects sequentially in random order." in 36 + Arg.(value & opt (some string) None & info ["p"; "project"] ~docv:"NAME" ~doc) 37 + 38 + let workspace_arg = 39 + let doc = "Path to the unpac workspace. Required." in 40 + Arg.(required & pos 0 (some string) None & info [] ~docv:"WORKSPACE" ~doc) 41 + 42 + let cmd = 43 + let doc = "Ralph-loop style Claude agent for unpac workspace projects" in 44 + let man = [ 45 + `S Manpage.s_description; 46 + `P "Runs an autonomous Claude agent using the ralph-loop pattern: \ 47 + the same prompt is fed each iteration, with state persisting in \ 48 + files. The agent works on projects until either:"; 49 + `I ("Iterations", "20 iterations have completed"); 50 + `I ("Completion", "The agent outputs the completion promise"); 51 + `S "RALPH-LOOP PATTERN"; 52 + `P "Unlike traditional agentic loops that vary prompts based on \ 53 + previous responses, ralph-loop feeds the SAME prompt every \ 54 + iteration. Claude's progress persists in files (STATUS.md, \ 55 + source code, git commits) which it reads on each iteration."; 56 + `P "This creates a self-referential improvement loop where Claude \ 57 + sees its own previous work and continues from there."; 58 + `S "COMPLETION PROMISE"; 59 + `P (Printf.sprintf "When all significant work is complete, Claude \ 60 + outputs exactly: %s" Unpac_claude.Agent.completion_promise); 61 + `P "This signals the loop to stop early before 20 iterations."; 62 + `S "MODEL"; 63 + `P "Always uses Claude Opus 4.5 for maximum capability."; 64 + `S "WEB UI"; 65 + `P "Use --web PORT to enable a live web dashboard showing:"; 66 + `I ("Events", "Real-time streaming from the agent"); 67 + `I ("Tool calls", "Each tool invocation with input/output"); 68 + `I ("Iterations", "Current iteration progress"); 69 + `S Manpage.s_examples; 70 + `P "Run agent on all projects (random order):"; 71 + `Pre " unpac-claude /path/to/workspace"; 72 + `P "Run agent on a specific project:"; 73 + `Pre " unpac-claude -p mylib /path/to/workspace"; 74 + `P "With web UI on port 8080:"; 75 + `Pre " unpac-claude --web 8080 /path/to/workspace"; 76 + `P "Verbose logging:"; 77 + `Pre " unpac-claude -v /path/to/workspace"; 78 + `S "WORKING DIRECTORY"; 79 + `P "State is maintained in <workspace>/.unpac-claude/<project>/ \ 80 + with a .claude subdirectory for ralph-loop state."; 81 + `S "EXIT STATUS"; 82 + `P "Exits with 0 when all projects complete (either by iteration \ 83 + limit or completion promise). Can be interrupted with Ctrl+C."; 84 + ] in 85 + let info = Cmd.info "unpac-claude" ~version:"0.5.0" ~doc ~man in 86 + Cmd.v info Term.(const run_agent $ verbose_arg $ web_port_arg $ 87 + project_arg $ workspace_arg) 88 + 89 + let () = exit (Cmd.eval cmd)
+47
dune-project
··· 1 1 (lang dune 3.20) 2 2 (name unpac) 3 + (generate_opam_files true) 4 + 5 + (package 6 + (name unpac) 7 + (synopsis "Monorepo management tool") 8 + (description "A tool for managing OCaml monorepos with opam repository integration") 9 + (authors "Anil Madhavapeddy") 10 + (license ISC) 11 + (depends 12 + (ocaml (>= 5.1.0)) 13 + (eio_main (>= 1.0)) 14 + (logs (>= 0.7.0)) 15 + (fmt (>= 0.9.0)) 16 + tomlt 17 + (jsont (>= 0.1.0)))) 18 + 19 + (package 20 + (name unpac-opam) 21 + (synopsis "Opam backend for unpac") 22 + (description "Opam package vendoring backend for unpac") 23 + (authors "Anil Madhavapeddy") 24 + (license ISC) 25 + (depends 26 + (ocaml (>= 5.1.0)) 27 + unpac 28 + opam-format 29 + opam-core 30 + opam-state 31 + opam-0install 32 + (cmdliner (>= 1.2.0)))) 33 + 34 + (package 35 + (name unpac-claude) 36 + (synopsis "Claude AI agent for unpac") 37 + (description "An autonomous Claude agent that understands unpac workflows and can explore/code in a loop") 38 + (authors "Anil Madhavapeddy") 39 + (license ISC) 40 + (depends 41 + (ocaml (>= 5.1.0)) 42 + unpac 43 + claude 44 + (eio_main (>= 1.0)) 45 + (cmdliner (>= 1.2.0)) 46 + (logs (>= 0.7.0)) 47 + (fmt (>= 0.9.0)) 48 + (digestif (>= 1.0.0)) 49 + (base64 (>= 3.0.0))))
+4989
git-filter-repo
··· 1 + #!/usr/bin/env python3 2 + 3 + """ 4 + git-filter-repo filters git repositories, similar to git filter-branch, BFG 5 + repo cleaner, and others. The basic idea is that it works by running 6 + git fast-export <options> | filter | git fast-import <options> 7 + where this program not only launches the whole pipeline but also serves as 8 + the 'filter' in the middle. It does a few additional things on top as well 9 + in order to make it into a well-rounded filtering tool. 10 + 11 + git-filter-repo can also be used as a library for more involved filtering 12 + operations; however: 13 + ***** API BACKWARD COMPATIBILITY CAVEAT ***** 14 + Programs using git-filter-repo as a library can reach pretty far into its 15 + internals, but I am not prepared to guarantee backward compatibility of 16 + all APIs. I suspect changes will be rare, but I reserve the right to 17 + change any API. Since it is assumed that repository filtering is 18 + something one would do very rarely, and in particular that it's a 19 + one-shot operation, this should not be a problem in practice for anyone. 20 + However, if you want to re-use a program you have written that uses 21 + git-filter-repo as a library (or makes use of one of its --*-callback 22 + arguments), you should either make sure you are using the same version of 23 + git and git-filter-repo, or make sure to re-test it. 24 + 25 + If there are particular pieces of the API you are concerned about, and 26 + there is not already a testcase for it in t9391-lib-usage.sh or 27 + t9392-python-callback.sh, please contribute a testcase. That will not 28 + prevent me from changing the API, but it will allow you to look at the 29 + history of a testcase to see whether and how the API changed. 30 + ***** END API BACKWARD COMPATIBILITY CAVEAT ***** 31 + """ 32 + 33 + import argparse 34 + import collections 35 + import fnmatch 36 + import gettext 37 + import io 38 + import os 39 + import platform 40 + import re 41 + import shutil 42 + import subprocess 43 + import sys 44 + import time 45 + import textwrap 46 + 47 + from datetime import tzinfo, timedelta, datetime 48 + 49 + __all__ = ["Blob", "Reset", "FileChange", "Commit", "Tag", "Progress", 50 + "Checkpoint", "FastExportParser", "ProgressWriter", 51 + "string_to_date", "date_to_string", 52 + "record_id_rename", "GitUtils", "FilteringOptions", "RepoFilter"] 53 + 54 + # The globals to make visible to callbacks. They will see all our imports for 55 + # free, as well as our public API. 56 + public_globals = ["__builtins__", "argparse", "collections", "fnmatch", 57 + "gettext", "io", "os", "platform", "re", "shutil", 58 + "subprocess", "sys", "time", "textwrap", "tzinfo", 59 + "timedelta", "datetime"] + __all__ 60 + 61 + deleted_hash = b'0'*40 62 + write_marks = True 63 + date_format_permissive = True 64 + 65 + def gettext_poison(msg): 66 + if "GIT_TEST_GETTEXT_POISON" in os.environ: # pragma: no cover 67 + return "# GETTEXT POISON #" 68 + return gettext.gettext(msg) 69 + 70 + _ = gettext_poison 71 + 72 + def setup_gettext(): 73 + TEXTDOMAIN="git-filter-repo" 74 + podir = os.environ.get("GIT_TEXTDOMAINDIR") or "@@LOCALEDIR@@" 75 + if not os.path.isdir(podir): # pragma: no cover 76 + podir = None # Python has its own fallback; use that 77 + 78 + ## This looks like the most straightforward translation of the relevant 79 + ## code in git.git:gettext.c and git.git:perl/Git/I18n.pm: 80 + #import locale 81 + #locale.setlocale(locale.LC_MESSAGES, ""); 82 + #locale.setlocale(locale.LC_TIME, ""); 83 + #locale.textdomain(TEXTDOMAIN); 84 + #locale.bindtextdomain(TEXTDOMAIN, podir); 85 + ## but the python docs suggest using the gettext module (which doesn't 86 + ## have setlocale()) instead, so: 87 + gettext.textdomain(TEXTDOMAIN); 88 + gettext.bindtextdomain(TEXTDOMAIN, podir); 89 + 90 + def _timedelta_to_seconds(delta): 91 + """ 92 + Converts timedelta to seconds 93 + """ 94 + offset = delta.days*86400 + delta.seconds + (delta.microseconds+0.0)/1000000 95 + return round(offset) 96 + 97 + class FixedTimeZone(tzinfo): 98 + """ 99 + Fixed offset in minutes east from UTC. 100 + """ 101 + 102 + tz_re = re.compile(br'^([-+]?)(\d\d)(\d\d)$') 103 + 104 + def __init__(self, offset_string): 105 + tzinfo.__init__(self) 106 + sign, hh, mm = FixedTimeZone.tz_re.match(offset_string).groups() 107 + factor = -1 if (sign and sign == b'-') else 1 108 + self._offset = timedelta(minutes = factor*(60*int(hh) + int(mm))) 109 + self._offset_string = offset_string 110 + 111 + def utcoffset(self, dt): 112 + return self._offset 113 + 114 + def tzname(self, dt): 115 + return self._offset_string 116 + 117 + def dst(self, dt): 118 + return timedelta(0) 119 + 120 + def string_to_date(datestring): 121 + (unix_timestamp, tz_offset) = datestring.split() 122 + return datetime.fromtimestamp(int(unix_timestamp), 123 + FixedTimeZone(tz_offset)) 124 + 125 + def date_to_string(dateobj): 126 + epoch = datetime.fromtimestamp(0, dateobj.tzinfo) 127 + return(b'%d %s' % (int(_timedelta_to_seconds(dateobj - epoch)), 128 + dateobj.tzinfo.tzname(0))) 129 + 130 + def decode(bytestr): 131 + 'Try to convert bytestr to utf-8 for outputting as an error message.' 132 + return bytestr.decode('utf-8', 'backslashreplace') 133 + 134 + def glob_to_regex(glob_bytestr): 135 + 'Translate glob_bytestr into a regex on bytestrings' 136 + 137 + # fnmatch.translate is idiotic and won't accept bytestrings 138 + if (decode(glob_bytestr).encode() != glob_bytestr): # pragma: no cover 139 + raise SystemExit(_("Error: Cannot handle glob %s").format(glob_bytestr)) 140 + 141 + # Create regex operating on string 142 + regex = fnmatch.translate(decode(glob_bytestr)) 143 + 144 + # FIXME: This is an ugly hack... 145 + # fnmatch.translate tries to do multi-line matching and wants the glob to 146 + # match up to the end of the input, which isn't relevant for us, so we 147 + # have to modify the regex. fnmatch.translate has used different regex 148 + # constructs to achieve this with different python versions, so we have 149 + # to check for each of them and then fix it up. It would be much better 150 + # if fnmatch.translate could just take some flags to allow us to specify 151 + # what we want rather than employing this hackery, but since it 152 + # doesn't... 153 + if regex.endswith(r'\Z(?ms)'): # pragma: no cover 154 + regex = regex[0:-7] 155 + elif regex.startswith(r'(?s:') and regex.endswith(r')\Z'): # pragma: no cover 156 + regex = regex[4:-3] 157 + elif regex.startswith(r'(?s:') and regex.endswith(r')\z'): # pragma: no cover 158 + # Yaay, python3.14 for senselessly duplicating \Z as \z... 159 + regex = regex[4:-3] 160 + 161 + # Finally, convert back to regex operating on bytestr 162 + return regex.encode() 163 + 164 + class PathQuoting: 165 + _unescape = {b'a': b'\a', 166 + b'b': b'\b', 167 + b'f': b'\f', 168 + b'n': b'\n', 169 + b'r': b'\r', 170 + b't': b'\t', 171 + b'v': b'\v', 172 + b'"': b'"', 173 + b'\\':b'\\'} 174 + _unescape_re = re.compile(br'\\([a-z"\\]|[0-9]{3})') 175 + _escape = [bytes([x]) for x in range(127)]+[ 176 + b'\\'+bytes(ord(c) for c in oct(x)[2:]) for x in range(127,256)] 177 + _reverse = dict(map(reversed, _unescape.items())) 178 + for x in _reverse: 179 + _escape[ord(x)] = b'\\'+_reverse[x] 180 + _special_chars = [len(x) > 1 for x in _escape] 181 + 182 + @staticmethod 183 + def unescape_sequence(orig): 184 + seq = orig.group(1) 185 + return PathQuoting._unescape[seq] if len(seq) == 1 else bytes([int(seq, 8)]) 186 + 187 + @staticmethod 188 + def dequote(quoted_string): 189 + if quoted_string.startswith(b'"'): 190 + assert quoted_string.endswith(b'"') 191 + return PathQuoting._unescape_re.sub(PathQuoting.unescape_sequence, 192 + quoted_string[1:-1]) 193 + return quoted_string 194 + 195 + @staticmethod 196 + def enquote(unquoted_string): 197 + # Option 1: Quoting when fast-export would: 198 + # pqsc = PathQuoting._special_chars 199 + # if any(pqsc[x] for x in set(unquoted_string)): 200 + # Option 2, perf hack: do minimal amount of quoting required by fast-import 201 + if unquoted_string.startswith(b'"') or b'\n' in unquoted_string: 202 + pqe = PathQuoting._escape 203 + return b'"' + b''.join(pqe[x] for x in unquoted_string) + b'"' 204 + return unquoted_string 205 + 206 + class AncestryGraph(object): 207 + """ 208 + A class that maintains a direct acycle graph of commits for the purpose of 209 + determining if one commit is the ancestor of another. 210 + 211 + A note about identifiers in Commit objects: 212 + * Commit objects have 2 identifiers: commit.old_id and commit.id, because: 213 + * The original fast-export stream identified commits by an identifier. 214 + This is often an integer, but is sometimes a hash (particularly when 215 + --reference-excluded-parents is provided) 216 + * The new fast-import stream we use may not use the same identifiers. 217 + If new blobs or commits are inserted (such as lint-history does), then 218 + the integer (or hash) are no longer valid. 219 + 220 + A note about identifiers in AncestryGraph objects, of which there are three: 221 + * A given AncestryGraph is based on either commit.old_id or commit.id, but 222 + not both. These are the keys for self.value. 223 + * Using full hashes (occasionally) for children in self.graph felt 224 + wasteful, so we use our own internal integer within self.graph. 225 + self.value maps from commit {old_}id to our internal integer id. 226 + * When working with commit.old_id, it is also sometimes useful to be able 227 + to map these to the original hash, i.e. commit.original_id. So, we 228 + also have self.git_hash for mapping from commit.old_id to git's commit 229 + hash. 230 + """ 231 + 232 + def __init__(self): 233 + # The next internal identifier we will use; increments with every commit 234 + # added to the AncestryGraph 235 + self.cur_value = 0 236 + 237 + # A mapping from the external identifers given to us to the simple integers 238 + # we use in self.graph 239 + self.value = {} 240 + 241 + # A tuple of (depth, list-of-ancestors). Values and keys in this graph are 242 + # all integers from the (values of the) self.value dict. The depth of a 243 + # commit is one more than the max depth of any of its ancestors. 244 + self.graph = {} 245 + 246 + # A mapping from external identifier (i.e. from the keys of self.value) to 247 + # the hash of the given commit. Only populated for graphs based on 248 + # commit.old_id, since we won't know until later what the git_hash for 249 + # graphs based on commit.id (since we have to wait for fast-import to 250 + # create the commit and notify us of its hash; see _pending_renames). 251 + # elsewhere 252 + self.git_hash = {} 253 + 254 + # Reverse maps; only populated if needed. Caller responsible to check 255 + # and ensure they are populated 256 + self._reverse_value = {} 257 + self._hash_to_id = {} 258 + 259 + # Cached results from previous calls to is_ancestor(). 260 + self._cached_is_ancestor = {} 261 + 262 + def record_external_commits(self, external_commits): 263 + """ 264 + Record in graph that each commit in external_commits exists, and is 265 + treated as a root commit with no parents. 266 + """ 267 + for c in external_commits: 268 + if c not in self.value: 269 + self.cur_value += 1 270 + self.value[c] = self.cur_value 271 + self.graph[self.cur_value] = (1, []) 272 + self.git_hash[c] = c 273 + 274 + def add_commit_and_parents(self, commit, parents, githash = None): 275 + """ 276 + Record in graph that commit has the given parents (all identified by 277 + fast export stream identifiers, usually integers but sometimes hashes). 278 + parents _MUST_ have been first recorded. commit _MUST_ not have been 279 + recorded yet. Also, record the mapping between commit and githash, if 280 + githash is given. 281 + """ 282 + assert all(p in self.value for p in parents) 283 + assert commit not in self.value 284 + 285 + # Get values for commit and parents 286 + self.cur_value += 1 287 + self.value[commit] = self.cur_value 288 + if githash: 289 + self.git_hash[commit] = githash 290 + graph_parents = [self.value[x] for x in parents] 291 + 292 + # Determine depth for commit, then insert the info into the graph 293 + depth = 1 294 + if parents: 295 + depth += max(self.graph[p][0] for p in graph_parents) 296 + self.graph[self.cur_value] = (depth, graph_parents) 297 + 298 + def record_hash(self, commit_id, githash): 299 + ''' 300 + If a githash was not recorded for commit_id, when add_commit_and_parents 301 + was called, add it now. 302 + ''' 303 + assert commit_id in self.value 304 + assert commit_id not in self.git_hash 305 + self.git_hash[commit_id] = githash 306 + 307 + def _ensure_reverse_maps_populated(self): 308 + if not self._hash_to_id: 309 + assert not self._reverse_value 310 + self._hash_to_id = {v: k for k, v in self.git_hash.items()} 311 + self._reverse_value = {v: k for k, v in self.value.items()} 312 + 313 + def get_parent_hashes(self, commit_hash): 314 + ''' 315 + Given a commit_hash, return its parents hashes 316 + ''' 317 + # 318 + # We have to map: 319 + # commit hash -> fast export stream id -> graph id 320 + # then lookup 321 + # parent graph ids for given graph id 322 + # then we need to map 323 + # parent graph ids -> parent fast export ids -> parent commit hashes 324 + # 325 + self._ensure_reverse_maps_populated() 326 + commit_fast_export_id = self._hash_to_id[commit_hash] 327 + commit_graph_id = self.value[commit_fast_export_id] 328 + parent_graph_ids = self.graph[commit_graph_id][1] 329 + parent_fast_export_ids = [self._reverse_value[x] for x in parent_graph_ids] 330 + parent_hashes = [self.git_hash[x] for x in parent_fast_export_ids] 331 + return parent_hashes 332 + 333 + def map_to_hash(self, commit_id): 334 + ''' 335 + Given a commit (by fast export stream id), return its hash 336 + ''' 337 + return self.git_hash.get(commit_id, None) 338 + 339 + def is_ancestor(self, possible_ancestor, check): 340 + """ 341 + Return whether possible_ancestor is an ancestor of check 342 + """ 343 + a, b = self.value[possible_ancestor], self.value[check] 344 + original_pair = (a,b) 345 + a_depth = self.graph[a][0] 346 + ancestors = [b] 347 + visited = set() 348 + while ancestors: 349 + ancestor = ancestors.pop() 350 + prev_pair = (a, ancestor) 351 + if prev_pair in self._cached_is_ancestor: 352 + if not self._cached_is_ancestor[prev_pair]: 353 + continue 354 + self._cached_is_ancestor[original_pair] = True 355 + return True 356 + if ancestor in visited: 357 + continue 358 + visited.add(ancestor) 359 + depth, more_ancestors = self.graph[ancestor] 360 + if ancestor == a: 361 + self._cached_is_ancestor[original_pair] = True 362 + return True 363 + elif depth <= a_depth: 364 + continue 365 + ancestors.extend(more_ancestors) 366 + self._cached_is_ancestor[original_pair] = False 367 + return False 368 + 369 + class MailmapInfo(object): 370 + def __init__(self, filename): 371 + self.changes = {} 372 + self._parse_file(filename) 373 + 374 + def _parse_file(self, filename): 375 + name_and_email_re = re.compile(br'(.*?)\s*<([^>]*)>\s*') 376 + comment_re = re.compile(br'\s*#.*') 377 + if not os.access(filename, os.R_OK): 378 + raise SystemExit(_("Cannot read %s") % decode(filename)) 379 + with open(filename, 'br') as f: 380 + count = 0 381 + for line in f: 382 + count += 1 383 + err = "Unparseable mailmap file: line #{} is bad: {}".format(count, line) 384 + # Remove comments 385 + line = comment_re.sub(b'', line) 386 + # Remove leading and trailing whitespace 387 + line = line.strip() 388 + if not line: 389 + continue 390 + 391 + m = name_and_email_re.match(line) 392 + if not m: 393 + raise SystemExit(err) 394 + proper_name, proper_email = m.groups() 395 + if len(line) == m.end(): 396 + self.changes[(None, proper_email)] = (proper_name, proper_email) 397 + continue 398 + rest = line[m.end():] 399 + m = name_and_email_re.match(rest) 400 + if m: 401 + commit_name, commit_email = m.groups() 402 + if len(rest) != m.end(): 403 + raise SystemExit(err) 404 + else: 405 + commit_name, commit_email = rest, None 406 + self.changes[(commit_name, commit_email)] = (proper_name, proper_email) 407 + 408 + def translate(self, name, email): 409 + ''' Given a name and email, return the expected new name and email from the 410 + mailmap if there is a translation rule for it, otherwise just return 411 + the given name and email.''' 412 + for old, new in self.changes.items(): 413 + old_name, old_email = old 414 + new_name, new_email = new 415 + if (old_email is None or email.lower() == old_email.lower()) and ( 416 + name == old_name or not old_name): 417 + return (new_name or name, new_email or email) 418 + return (name, email) 419 + 420 + class ProgressWriter(object): 421 + def __init__(self): 422 + self._last_progress_update = time.time() 423 + self._last_message = None 424 + 425 + def show(self, msg): 426 + self._last_message = msg 427 + now = time.time() 428 + if now - self._last_progress_update > .1: 429 + self._last_progress_update = now 430 + sys.stdout.write("\r{}".format(msg)) 431 + sys.stdout.flush() 432 + 433 + def finish(self): 434 + self._last_progress_update = 0 435 + if self._last_message: 436 + self.show(self._last_message) 437 + sys.stdout.write("\n") 438 + 439 + class _IDs(object): 440 + """ 441 + A class that maintains the 'name domain' of all the 'marks' (short int 442 + id for a blob/commit git object). There are two reasons this mechanism 443 + is necessary: 444 + (1) the output text of fast-export may refer to an object using a different 445 + mark than the mark that was assigned to that object using IDS.new(). 446 + (This class allows you to translate the fast-export marks, "old" to 447 + the marks assigned from IDS.new(), "new"). 448 + (2) when we prune a commit, its "old" id becomes invalid. Any commits 449 + which had that commit as a parent needs to use the nearest unpruned 450 + ancestor as its parent instead. 451 + 452 + Note that for purpose (1) above, this typically comes about because the user 453 + manually creates Blob or Commit objects (for insertion into the stream). 454 + It could also come about if we attempt to read the data from two different 455 + repositories and trying to combine the data (git fast-export will number ids 456 + from 1...n, and having two 1's, two 2's, two 3's, causes issues; granted, we 457 + this scheme doesn't handle the two streams perfectly either, but if the first 458 + fast export stream is entirely processed and handled before the second stream 459 + is started, this mechanism may be sufficient to handle it). 460 + """ 461 + 462 + def __init__(self): 463 + """ 464 + Init 465 + """ 466 + # The id for the next created blob/commit object 467 + self._next_id = 1 468 + 469 + # A map of old-ids to new-ids (1:1 map) 470 + self._translation = {} 471 + 472 + # A map of new-ids to every old-id that points to the new-id (1:N map) 473 + self._reverse_translation = {} 474 + 475 + def has_renames(self): 476 + """ 477 + Return whether there have been ids remapped to new values 478 + """ 479 + return bool(self._translation) 480 + 481 + def new(self): 482 + """ 483 + Should be called whenever a new blob or commit object is created. The 484 + returned value should be used as the id/mark for that object. 485 + """ 486 + rv = self._next_id 487 + self._next_id += 1 488 + return rv 489 + 490 + def record_rename(self, old_id, new_id, handle_transitivity = False): 491 + """ 492 + Record that old_id is being renamed to new_id. 493 + """ 494 + if old_id != new_id or old_id in self._translation: 495 + # old_id -> new_id 496 + self._translation[old_id] = new_id 497 + 498 + # Transitivity will be needed if new commits are being inserted mid-way 499 + # through a branch. 500 + if handle_transitivity: 501 + # Anything that points to old_id should point to new_id 502 + if old_id in self._reverse_translation: 503 + for id_ in self._reverse_translation[old_id]: 504 + self._translation[id_] = new_id 505 + 506 + # Record that new_id is pointed to by old_id 507 + if new_id not in self._reverse_translation: 508 + self._reverse_translation[new_id] = [] 509 + self._reverse_translation[new_id].append(old_id) 510 + 511 + def translate(self, old_id): 512 + """ 513 + If old_id has been mapped to an alternate id, return the alternate id. 514 + """ 515 + if old_id in self._translation: 516 + return self._translation[old_id] 517 + else: 518 + return old_id 519 + 520 + def __str__(self): 521 + """ 522 + Convert IDs to string; used for debugging 523 + """ 524 + rv = "Current count: %d\nTranslation:\n" % self._next_id 525 + for k in sorted(self._translation): 526 + rv += " %d -> %s\n" % (k, self._translation[k]) 527 + 528 + rv += "Reverse translation:\n" 529 + reverse_keys = list(self._reverse_translation.keys()) 530 + if None in reverse_keys: # pragma: no cover 531 + reverse_keys.remove(None) 532 + reverse_keys = sorted(reverse_keys) 533 + reverse_keys.append(None) 534 + for k in reverse_keys: 535 + rv += " " + str(k) + " -> " + str(self._reverse_translation[k]) + "\n" 536 + 537 + return rv 538 + 539 + class _GitElement(object): 540 + """ 541 + The base class for all git elements that we create. 542 + """ 543 + 544 + def __init__(self): 545 + # A string that describes what type of Git element this is 546 + self.type = None 547 + 548 + # A flag telling us if this Git element has been dumped 549 + # (i.e. printed) or skipped. Typically elements that have been 550 + # dumped or skipped will not be dumped again. 551 + self.dumped = 0 552 + 553 + def dump(self, file_): 554 + """ 555 + This version should never be called. Derived classes need to 556 + override! We should note that subclasses should implement this 557 + method such that the output would match the format produced by 558 + fast-export. 559 + """ 560 + raise SystemExit(_("Unimplemented function: %s") % type(self).__name__ 561 + +".dump()") # pragma: no cover 562 + 563 + def __bytes__(self): 564 + """ 565 + Convert GitElement to bytestring; used for debugging 566 + """ 567 + old_dumped = self.dumped 568 + writeme = io.BytesIO() 569 + self.dump(writeme) 570 + output_lines = writeme.getvalue().splitlines() 571 + writeme.close() 572 + self.dumped = old_dumped 573 + return b"%s:\n %s" % (type(self).__name__.encode(), 574 + b"\n ".join(output_lines)) 575 + 576 + def skip(self, new_id=None): 577 + """ 578 + Ensures this element will not be written to output 579 + """ 580 + self.dumped = 2 581 + 582 + class _GitElementWithId(_GitElement): 583 + """ 584 + The base class for Git elements that have IDs (commits and blobs) 585 + """ 586 + 587 + def __init__(self): 588 + _GitElement.__init__(self) 589 + 590 + # The mark (short, portable id) for this element 591 + self.id = _IDS.new() 592 + 593 + # The previous mark for this element 594 + self.old_id = None 595 + 596 + def skip(self, new_id=None): 597 + """ 598 + This element will no longer be automatically written to output. When a 599 + commit gets skipped, it's ID will need to be translated to that of its 600 + parent. 601 + """ 602 + self.dumped = 2 603 + 604 + _IDS.record_rename(self.old_id or self.id, new_id) 605 + 606 + class Blob(_GitElementWithId): 607 + """ 608 + This class defines our representation of git blob elements (i.e. our 609 + way of representing file contents). 610 + """ 611 + 612 + def __init__(self, data, original_id = None): 613 + _GitElementWithId.__init__(self) 614 + 615 + # Denote that this is a blob 616 + self.type = 'blob' 617 + 618 + # Record original id 619 + self.original_id = original_id 620 + 621 + # Stores the blob's data 622 + assert(type(data) == bytes) 623 + self.data = data 624 + 625 + def dump(self, file_): 626 + """ 627 + Write this blob element to a file. 628 + """ 629 + self.dumped = 1 630 + BLOB_HASH_TO_NEW_ID[self.original_id] = self.id 631 + BLOB_NEW_ID_TO_HASH[self.id] = self.original_id 632 + 633 + file_.write(b'blob\n') 634 + file_.write(b'mark :%d\n' % self.id) 635 + file_.write(b'data %d\n%s' % (len(self.data), self.data)) 636 + file_.write(b'\n') 637 + 638 + 639 + class Reset(_GitElement): 640 + """ 641 + This class defines our representation of git reset elements. A reset 642 + event is the creation (or recreation) of a named branch, optionally 643 + starting from a specific revision). 644 + """ 645 + 646 + def __init__(self, ref, from_ref = None): 647 + _GitElement.__init__(self) 648 + 649 + # Denote that this is a reset 650 + self.type = 'reset' 651 + 652 + # The name of the branch being (re)created 653 + self.ref = ref 654 + 655 + # Some reference to the branch/commit we are resetting from 656 + self.from_ref = from_ref 657 + 658 + def dump(self, file_): 659 + """ 660 + Write this reset element to a file 661 + """ 662 + self.dumped = 1 663 + 664 + file_.write(b'reset %s\n' % self.ref) 665 + if self.from_ref: 666 + if isinstance(self.from_ref, int): 667 + file_.write(b'from :%d\n' % self.from_ref) 668 + else: 669 + file_.write(b'from %s\n' % self.from_ref) 670 + file_.write(b'\n') 671 + 672 + class FileChange(_GitElement): 673 + """ 674 + This class defines our representation of file change elements. File change 675 + elements are components within a Commit element. 676 + """ 677 + 678 + def __init__(self, type_, filename = None, id_ = None, mode = None): 679 + _GitElement.__init__(self) 680 + 681 + # Denote the type of file-change (b'M' for modify, b'D' for delete, etc) 682 + # We could 683 + # assert(type(type_) == bytes) 684 + # here but I don't just due to worries about performance overhead... 685 + self.type = type_ 686 + 687 + # Record the name of the file being changed 688 + self.filename = filename 689 + 690 + # Record the mode (mode describes type of file entry (non-executable, 691 + # executable, or symlink)). 692 + self.mode = mode 693 + 694 + # blob_id is the id (mark) of the affected blob 695 + self.blob_id = id_ 696 + 697 + if type_ == b'DELETEALL': 698 + assert filename is None and id_ is None and mode is None 699 + self.filename = b'' # Just so PathQuoting.enquote doesn't die 700 + else: 701 + assert filename is not None 702 + 703 + if type_ == b'M': 704 + assert id_ is not None and mode is not None 705 + elif type_ == b'D': 706 + assert id_ is None and mode is None 707 + elif type_ == b'R': # pragma: no cover (now avoid fast-export renames) 708 + assert mode is None 709 + if id_ is None: 710 + raise SystemExit(_("new name needed for rename of %s") % filename) 711 + self.filename = (self.filename, id_) 712 + self.blob_id = None 713 + 714 + def dump(self, file_): 715 + """ 716 + Write this file-change element to a file 717 + """ 718 + skipped_blob = (self.type == b'M' and self.blob_id is None) 719 + if skipped_blob: return 720 + self.dumped = 1 721 + 722 + quoted_filename = PathQuoting.enquote(self.filename) 723 + if self.type == b'M' and isinstance(self.blob_id, int): 724 + file_.write(b'M %s :%d %s\n' % (self.mode, self.blob_id, quoted_filename)) 725 + elif self.type == b'M': 726 + file_.write(b'M %s %s %s\n' % (self.mode, self.blob_id, quoted_filename)) 727 + elif self.type == b'D': 728 + file_.write(b'D %s\n' % quoted_filename) 729 + elif self.type == b'DELETEALL': 730 + file_.write(b'deleteall\n') 731 + else: 732 + raise SystemExit(_("Unhandled filechange type: %s") % self.type) # pragma: no cover 733 + 734 + class Commit(_GitElementWithId): 735 + """ 736 + This class defines our representation of commit elements. Commit elements 737 + contain all the information associated with a commit. 738 + """ 739 + 740 + def __init__(self, branch, 741 + author_name, author_email, author_date, 742 + committer_name, committer_email, committer_date, 743 + message, 744 + file_changes, 745 + parents, 746 + original_id = None, 747 + encoding = None, # encoding for message; None implies UTF-8 748 + **kwargs): 749 + _GitElementWithId.__init__(self) 750 + self.old_id = self.id 751 + 752 + # Denote that this is a commit element 753 + self.type = 'commit' 754 + 755 + # Record the affected branch 756 + self.branch = branch 757 + 758 + # Record original id 759 + self.original_id = original_id 760 + 761 + # Record author's name 762 + self.author_name = author_name 763 + 764 + # Record author's email 765 + self.author_email = author_email 766 + 767 + # Record date of authoring 768 + self.author_date = author_date 769 + 770 + # Record committer's name 771 + self.committer_name = committer_name 772 + 773 + # Record committer's email 774 + self.committer_email = committer_email 775 + 776 + # Record date the commit was made 777 + self.committer_date = committer_date 778 + 779 + # Record commit message and its encoding 780 + self.encoding = encoding 781 + self.message = message 782 + 783 + # List of file-changes associated with this commit. Note that file-changes 784 + # are also represented as git elements 785 + self.file_changes = file_changes 786 + 787 + self.parents = parents 788 + 789 + def dump(self, file_): 790 + """ 791 + Write this commit element to a file. 792 + """ 793 + self.dumped = 1 794 + 795 + # Make output to fast-import slightly easier for humans to read if the 796 + # message has no trailing newline of its own; cosmetic, but a nice touch... 797 + extra_newline = b'\n' 798 + if self.message.endswith(b'\n') or not (self.parents or self.file_changes): 799 + extra_newline = b'' 800 + 801 + if not self.parents: 802 + file_.write(b'reset %s\n' % self.branch) 803 + file_.write((b'commit %s\n' 804 + b'mark :%d\n' 805 + b'author %s <%s> %s\n' 806 + b'committer %s <%s> %s\n' 807 + ) % ( 808 + self.branch, self.id, 809 + self.author_name, self.author_email, self.author_date, 810 + self.committer_name, self.committer_email, self.committer_date 811 + )) 812 + if self.encoding: 813 + file_.write(b'encoding %s\n' % self.encoding) 814 + file_.write(b'data %d\n%s%s' % 815 + (len(self.message), self.message, extra_newline)) 816 + for i, parent in enumerate(self.parents): 817 + file_.write(b'from ' if i==0 else b'merge ') 818 + if isinstance(parent, int): 819 + file_.write(b':%d\n' % parent) 820 + else: 821 + file_.write(b'%s\n' % parent) 822 + for change in self.file_changes: 823 + change.dump(file_) 824 + if not self.parents and not self.file_changes: 825 + # Workaround a bug in pre-git-2.22 versions of fast-import with 826 + # the get-mark directive. 827 + file_.write(b'\n') 828 + file_.write(b'\n') 829 + 830 + def first_parent(self): 831 + """ 832 + Return first parent commit 833 + """ 834 + if self.parents: 835 + return self.parents[0] 836 + return None 837 + 838 + def skip(self, new_id=None): 839 + _SKIPPED_COMMITS.add(self.old_id or self.id) 840 + _GitElementWithId.skip(self, new_id) 841 + 842 + class Tag(_GitElementWithId): 843 + """ 844 + This class defines our representation of annotated tag elements. 845 + """ 846 + 847 + def __init__(self, ref, from_ref, 848 + tagger_name, tagger_email, tagger_date, tag_msg, 849 + original_id = None): 850 + _GitElementWithId.__init__(self) 851 + self.old_id = self.id 852 + 853 + # Denote that this is a tag element 854 + self.type = 'tag' 855 + 856 + # Store the name of the tag 857 + self.ref = ref 858 + 859 + # Store the entity being tagged (this should be a commit) 860 + self.from_ref = from_ref 861 + 862 + # Record original id 863 + self.original_id = original_id 864 + 865 + # Store the name of the tagger 866 + self.tagger_name = tagger_name 867 + 868 + # Store the email of the tagger 869 + self.tagger_email = tagger_email 870 + 871 + # Store the date 872 + self.tagger_date = tagger_date 873 + 874 + # Store the tag message 875 + self.message = tag_msg 876 + 877 + def dump(self, file_): 878 + """ 879 + Write this tag element to a file 880 + """ 881 + 882 + self.dumped = 1 883 + 884 + file_.write(b'tag %s\n' % self.ref) 885 + if (write_marks and self.id): 886 + file_.write(b'mark :%d\n' % self.id) 887 + markfmt = b'from :%d\n' if isinstance(self.from_ref, int) else b'from %s\n' 888 + file_.write(markfmt % self.from_ref) 889 + if self.tagger_name: 890 + file_.write(b'tagger %s <%s> ' % (self.tagger_name, self.tagger_email)) 891 + file_.write(self.tagger_date) 892 + file_.write(b'\n') 893 + file_.write(b'data %d\n%s' % (len(self.message), self.message)) 894 + file_.write(b'\n') 895 + 896 + class Progress(_GitElement): 897 + """ 898 + This class defines our representation of progress elements. The progress 899 + element only contains a progress message, which is printed by fast-import 900 + when it processes the progress output. 901 + """ 902 + 903 + def __init__(self, message): 904 + _GitElement.__init__(self) 905 + 906 + # Denote that this is a progress element 907 + self.type = 'progress' 908 + 909 + # Store the progress message 910 + self.message = message 911 + 912 + def dump(self, file_): 913 + """ 914 + Write this progress element to a file 915 + """ 916 + self.dumped = 1 917 + 918 + file_.write(b'progress %s\n' % self.message) 919 + file_.write(b'\n') 920 + 921 + class Checkpoint(_GitElement): 922 + """ 923 + This class defines our representation of checkpoint elements. These 924 + elements represent events which force fast-import to close the current 925 + packfile, start a new one, and to save out all current branch refs, tags 926 + and marks. 927 + """ 928 + 929 + def __init__(self): 930 + _GitElement.__init__(self) 931 + 932 + # Denote that this is a checkpoint element 933 + self.type = 'checkpoint' 934 + 935 + def dump(self, file_): 936 + """ 937 + Write this checkpoint element to a file 938 + """ 939 + self.dumped = 1 940 + 941 + file_.write(b'checkpoint\n') 942 + file_.write(b'\n') 943 + 944 + class LiteralCommand(_GitElement): 945 + """ 946 + This class defines our representation of commands. The literal command 947 + includes only a single line, and is not processed in any special way. 948 + """ 949 + 950 + def __init__(self, line): 951 + _GitElement.__init__(self) 952 + 953 + # Denote that this is a literal element 954 + self.type = 'literal' 955 + 956 + # Store the command 957 + self.line = line 958 + 959 + def dump(self, file_): 960 + """ 961 + Write this progress element to a file 962 + """ 963 + self.dumped = 1 964 + 965 + file_.write(self.line) 966 + 967 + class Alias(_GitElement): 968 + """ 969 + This class defines our representation of fast-import alias elements. An 970 + alias element is the setting of one mark to the same sha1sum as another, 971 + usually because the newer mark corresponded to a pruned commit. 972 + """ 973 + 974 + def __init__(self, ref, to_ref): 975 + _GitElement.__init__(self) 976 + # Denote that this is a reset 977 + self.type = 'alias' 978 + 979 + self.ref = ref 980 + self.to_ref = to_ref 981 + 982 + def dump(self, file_): 983 + """ 984 + Write this reset element to a file 985 + """ 986 + self.dumped = 1 987 + 988 + file_.write(b'alias\nmark :%d\nto :%d\n\n' % (self.ref, self.to_ref)) 989 + 990 + class FastExportParser(object): 991 + """ 992 + A class for parsing and handling the output from fast-export. This 993 + class allows the user to register callbacks when various types of 994 + data are encountered in the fast-export output. The basic idea is that, 995 + FastExportParser takes fast-export output, creates the various objects 996 + as it encounters them, the user gets to use/modify these objects via 997 + callbacks, and finally FastExportParser outputs the modified objects 998 + in fast-import format (presumably so they can be used to create a new 999 + repo). 1000 + """ 1001 + 1002 + def __init__(self, 1003 + tag_callback = None, commit_callback = None, 1004 + blob_callback = None, progress_callback = None, 1005 + reset_callback = None, checkpoint_callback = None, 1006 + done_callback = None): 1007 + # Members below simply store callback functions for the various git 1008 + # elements 1009 + self._tag_callback = tag_callback 1010 + self._blob_callback = blob_callback 1011 + self._reset_callback = reset_callback 1012 + self._commit_callback = commit_callback 1013 + self._progress_callback = progress_callback 1014 + self._checkpoint_callback = checkpoint_callback 1015 + self._done_callback = done_callback 1016 + 1017 + # Keep track of which refs appear from the export, and which make it to 1018 + # the import (pruning of empty commits, renaming of refs, and creating 1019 + # new manual objects and inserting them can cause these to differ). 1020 + self._exported_refs = set() 1021 + self._imported_refs = set() 1022 + 1023 + # A list of the branches we've seen, plus the last known commit they 1024 + # pointed to. An entry in latest_*commit will be deleted if we get a 1025 + # reset for that branch. These are used because of fast-import's weird 1026 + # decision to allow having an implicit parent via naming the branch 1027 + # instead of requiring branches to be specified via 'from' directives. 1028 + self._latest_commit = {} 1029 + self._latest_orig_commit = {} 1030 + 1031 + # A handle to the input source for the fast-export data 1032 + self._input = None 1033 + 1034 + # A handle to the output file for the output we generate (we call dump 1035 + # on many of the git elements we create). 1036 + self._output = None 1037 + 1038 + # Stores the contents of the current line of input being parsed 1039 + self._currentline = '' 1040 + 1041 + # Tracks LFS objects we have found 1042 + self._lfs_object_tracker = None 1043 + 1044 + # Compile some regexes and cache those 1045 + self._mark_re = re.compile(br'mark :(\d+)\n$') 1046 + self._parent_regexes = {} 1047 + parent_regex_rules = (br' :(\d+)\n$', br' ([0-9a-f]{40})\n') 1048 + for parent_refname in (b'from', b'merge'): 1049 + ans = [re.compile(parent_refname+x) for x in parent_regex_rules] 1050 + self._parent_regexes[parent_refname] = ans 1051 + self._quoted_string_re = re.compile(br'"(?:[^"\\]|\\.)*"') 1052 + self._refline_regexes = {} 1053 + for refline_name in (b'reset', b'commit', b'tag', b'progress'): 1054 + self._refline_regexes[refline_name] = re.compile(refline_name+b' (.*)\n$') 1055 + self._user_regexes = {} 1056 + for user in (b'author', b'committer', b'tagger'): 1057 + self._user_regexes[user] = re.compile(user + b' (.*?) <(.*?)> (.*)\n$') 1058 + 1059 + def _advance_currentline(self): 1060 + """ 1061 + Grab the next line of input 1062 + """ 1063 + self._currentline = self._input.readline() 1064 + 1065 + def _parse_optional_mark(self): 1066 + """ 1067 + If the current line contains a mark, parse it and advance to the 1068 + next line; return None otherwise 1069 + """ 1070 + mark = None 1071 + matches = self._mark_re.match(self._currentline) 1072 + if matches: 1073 + mark = int(matches.group(1)) 1074 + self._advance_currentline() 1075 + return mark 1076 + 1077 + def _parse_optional_parent_ref(self, refname): 1078 + """ 1079 + If the current line contains a reference to a parent commit, then 1080 + parse it and advance the current line; otherwise return None. Note 1081 + that the name of the reference ('from', 'merge') must match the 1082 + refname arg. 1083 + """ 1084 + orig_baseref, baseref = None, None 1085 + rule, altrule = self._parent_regexes[refname] 1086 + matches = rule.match(self._currentline) 1087 + if matches: 1088 + orig_baseref = int(matches.group(1)) 1089 + # We translate the parent commit mark to what it needs to be in 1090 + # our mark namespace 1091 + baseref = _IDS.translate(orig_baseref) 1092 + self._advance_currentline() 1093 + else: 1094 + matches = altrule.match(self._currentline) 1095 + if matches: 1096 + orig_baseref = matches.group(1) 1097 + baseref = orig_baseref 1098 + self._advance_currentline() 1099 + return orig_baseref, baseref 1100 + 1101 + def _parse_optional_filechange(self): 1102 + """ 1103 + If the current line contains a file-change object, then parse it 1104 + and advance the current line; otherwise return None. We only care 1105 + about file changes of type b'M' and b'D' (these are the only types 1106 + of file-changes that fast-export will provide). 1107 + """ 1108 + filechange = None 1109 + changetype = self._currentline[0:1] 1110 + if changetype == b'M': 1111 + (changetype, mode, idnum, path) = self._currentline.split(None, 3) 1112 + if idnum[0:1] == b':': 1113 + idnum = idnum[1:] 1114 + path = path.rstrip(b'\n') 1115 + # Check for LFS objects from sources before we might toss this filechange 1116 + if mode != b'160000' and self._lfs_object_tracker: 1117 + value = int(idnum) if len(idnum) != 40 else idnum 1118 + self._lfs_object_tracker.check_file_change_data(value, True) 1119 + # We translate the idnum to our id system 1120 + if len(idnum) != 40: 1121 + idnum = _IDS.translate( int(idnum) ) 1122 + if idnum is not None: 1123 + if path.startswith(b'"'): 1124 + path = PathQuoting.dequote(path) 1125 + filechange = FileChange(b'M', path, idnum, mode) 1126 + else: 1127 + filechange = b'skipped' 1128 + self._advance_currentline() 1129 + elif changetype == b'D': 1130 + (changetype, path) = self._currentline.split(None, 1) 1131 + path = path.rstrip(b'\n') 1132 + if path.startswith(b'"'): 1133 + path = PathQuoting.dequote(path) 1134 + filechange = FileChange(b'D', path) 1135 + self._advance_currentline() 1136 + elif changetype == b'R': # pragma: no cover (now avoid fast-export renames) 1137 + rest = self._currentline[2:-1] 1138 + if rest.startswith(b'"'): 1139 + m = self._quoted_string_re.match(rest) 1140 + if not m: 1141 + raise SystemExit(_("Couldn't parse rename source")) 1142 + orig = PathQuoting.dequote(m.group(0)) 1143 + new = rest[m.end()+1:] 1144 + else: 1145 + orig, new = rest.split(b' ', 1) 1146 + if new.startswith(b'"'): 1147 + new = PathQuoting.dequote(new) 1148 + filechange = FileChange(b'R', orig, new) 1149 + self._advance_currentline() 1150 + return filechange 1151 + 1152 + def _parse_original_id(self): 1153 + original_id = self._currentline[len(b'original-oid '):].rstrip() 1154 + self._advance_currentline() 1155 + return original_id 1156 + 1157 + def _parse_encoding(self): 1158 + encoding = self._currentline[len(b'encoding '):].rstrip() 1159 + self._advance_currentline() 1160 + return encoding 1161 + 1162 + def _parse_ref_line(self, refname): 1163 + """ 1164 + Parses string data (often a branch name) from current-line. The name of 1165 + the string data must match the refname arg. The program will crash if 1166 + current-line does not match, so current-line will always be advanced if 1167 + this method returns. 1168 + """ 1169 + matches = self._refline_regexes[refname].match(self._currentline) 1170 + if not matches: 1171 + raise SystemExit(_("Malformed %(refname)s line: '%(line)s'") % 1172 + ({'refname': refname, 'line':self._currentline}) 1173 + ) # pragma: no cover 1174 + ref = matches.group(1) 1175 + self._advance_currentline() 1176 + return ref 1177 + 1178 + def _parse_user(self, usertype): 1179 + """ 1180 + Get user name, email, datestamp from current-line. Current-line will 1181 + be advanced. 1182 + """ 1183 + user_regex = self._user_regexes[usertype] 1184 + (name, email, when) = user_regex.match(self._currentline).groups() 1185 + 1186 + self._advance_currentline() 1187 + return (name, email, when) 1188 + 1189 + def _parse_data(self): 1190 + """ 1191 + Reads data from _input. Current-line will be advanced until it is beyond 1192 + the data. 1193 + """ 1194 + fields = self._currentline.split() 1195 + assert fields[0] == b'data' 1196 + size = int(fields[1]) 1197 + data = self._input.read(size) 1198 + self._advance_currentline() 1199 + if self._currentline == b'\n': 1200 + self._advance_currentline() 1201 + return data 1202 + 1203 + def _parse_blob(self): 1204 + """ 1205 + Parse input data into a Blob object. Once the Blob has been created, it 1206 + will be handed off to the appropriate callbacks. Current-line will be 1207 + advanced until it is beyond this blob's data. The Blob will be dumped 1208 + to _output once everything else is done (unless it has been skipped by 1209 + the callback). 1210 + """ 1211 + # Parse the Blob 1212 + self._advance_currentline() 1213 + id_ = self._parse_optional_mark() 1214 + 1215 + original_id = None 1216 + if self._currentline.startswith(b'original-oid'): 1217 + original_id = self._parse_original_id(); 1218 + 1219 + data = self._parse_data() 1220 + if self._currentline == b'\n': 1221 + self._advance_currentline() 1222 + 1223 + # Create the blob 1224 + blob = Blob(data, original_id) 1225 + 1226 + # If fast-export text had a mark for this blob, need to make sure this 1227 + # mark translates to the blob's true id. 1228 + if id_: 1229 + blob.old_id = id_ 1230 + _IDS.record_rename(id_, blob.id) 1231 + 1232 + # Check for LFS objects 1233 + if self._lfs_object_tracker: 1234 + self._lfs_object_tracker.check_blob_data(data, blob.old_id, True) 1235 + 1236 + # Call any user callback to allow them to use/modify the blob 1237 + if self._blob_callback: 1238 + self._blob_callback(blob) 1239 + 1240 + # Now print the resulting blob 1241 + if not blob.dumped: 1242 + blob.dump(self._output) 1243 + 1244 + def _parse_reset(self): 1245 + """ 1246 + Parse input data into a Reset object. Once the Reset has been created, 1247 + it will be handed off to the appropriate callbacks. Current-line will 1248 + be advanced until it is beyond the reset data. The Reset will be dumped 1249 + to _output once everything else is done (unless it has been skipped by 1250 + the callback). 1251 + """ 1252 + # Parse the Reset 1253 + ref = self._parse_ref_line(b'reset') 1254 + self._exported_refs.add(ref) 1255 + ignoreme, from_ref = self._parse_optional_parent_ref(b'from') 1256 + if self._currentline == b'\n': 1257 + self._advance_currentline() 1258 + 1259 + # fast-export likes to print extraneous resets that serve no purpose. 1260 + # While we could continue processing such resets, that is a waste of 1261 + # resources. Also, we want to avoid recording that this ref was 1262 + # seen in such cases, since this ref could be rewritten to nothing. 1263 + if not from_ref: 1264 + self._latest_commit.pop(ref, None) 1265 + self._latest_orig_commit.pop(ref, None) 1266 + return 1267 + 1268 + # Create the reset 1269 + reset = Reset(ref, from_ref) 1270 + 1271 + # Call any user callback to allow them to modify the reset 1272 + if self._reset_callback: 1273 + self._reset_callback(reset) 1274 + 1275 + # Update metadata 1276 + self._latest_commit[reset.ref] = reset.from_ref 1277 + self._latest_orig_commit[reset.ref] = reset.from_ref 1278 + 1279 + # Now print the resulting reset 1280 + if not reset.dumped: 1281 + self._imported_refs.add(reset.ref) 1282 + reset.dump(self._output) 1283 + 1284 + def _parse_commit(self): 1285 + """ 1286 + Parse input data into a Commit object. Once the Commit has been created, 1287 + it will be handed off to the appropriate callbacks. Current-line will 1288 + be advanced until it is beyond the commit data. The Commit will be dumped 1289 + to _output once everything else is done (unless it has been skipped by 1290 + the callback OR the callback has removed all file-changes from the commit). 1291 + """ 1292 + # Parse the Commit. This may look involved, but it's pretty simple; it only 1293 + # looks bad because a commit object contains many pieces of data. 1294 + branch = self._parse_ref_line(b'commit') 1295 + self._exported_refs.add(branch) 1296 + id_ = self._parse_optional_mark() 1297 + 1298 + original_id = None 1299 + if self._currentline.startswith(b'original-oid'): 1300 + original_id = self._parse_original_id(); 1301 + 1302 + author_name = None 1303 + author_email = None 1304 + if self._currentline.startswith(b'author'): 1305 + (author_name, author_email, author_date) = self._parse_user(b'author') 1306 + 1307 + (committer_name, committer_email, committer_date) = \ 1308 + self._parse_user(b'committer') 1309 + 1310 + if not author_name and not author_email: 1311 + (author_name, author_email, author_date) = \ 1312 + (committer_name, committer_email, committer_date) 1313 + 1314 + encoding = None 1315 + if self._currentline.startswith(b'encoding '): 1316 + encoding = self._parse_encoding() 1317 + 1318 + commit_msg = self._parse_data() 1319 + 1320 + pinfo = [self._parse_optional_parent_ref(b'from')] 1321 + # Due to empty pruning, we can have real 'from' and 'merge' lines that 1322 + # due to commit rewriting map to a parent of None. We need to record 1323 + # 'from' if its non-None, and we need to parse all 'merge' lines. 1324 + while self._currentline.startswith(b'merge '): 1325 + pinfo.append(self._parse_optional_parent_ref(b'merge')) 1326 + orig_parents, parents = [list(tmp) for tmp in zip(*pinfo)] 1327 + 1328 + # No parents is oddly represented as [None] instead of [], due to the 1329 + # special 'from' handling. Convert it here to a more canonical form. 1330 + if parents == [None]: 1331 + parents = [] 1332 + if orig_parents == [None]: 1333 + orig_parents = [] 1334 + 1335 + # fast-import format is kinda stupid in that it allows implicit parents 1336 + # based on the branch name instead of requiring them to be specified by 1337 + # 'from' directives. The only way to get no parent is by using a reset 1338 + # directive first, which clears the latest_commit_for_this_branch tracking. 1339 + if not orig_parents and self._latest_commit.get(branch): 1340 + parents = [self._latest_commit[branch]] 1341 + if not orig_parents and self._latest_orig_commit.get(branch): 1342 + orig_parents = [self._latest_orig_commit[branch]] 1343 + 1344 + # Get the list of file changes 1345 + file_changes = [] 1346 + file_change = self._parse_optional_filechange() 1347 + had_file_changes = file_change is not None 1348 + while file_change: 1349 + if not (type(file_change) == bytes and file_change == b'skipped'): 1350 + file_changes.append(file_change) 1351 + file_change = self._parse_optional_filechange() 1352 + if self._currentline == b'\n': 1353 + self._advance_currentline() 1354 + 1355 + # Okay, now we can finally create the Commit object 1356 + commit = Commit(branch, 1357 + author_name, author_email, author_date, 1358 + committer_name, committer_email, committer_date, 1359 + commit_msg, file_changes, parents, original_id, encoding) 1360 + 1361 + # If fast-export text had a mark for this commit, need to make sure this 1362 + # mark translates to the commit's true id. 1363 + if id_: 1364 + commit.old_id = id_ 1365 + _IDS.record_rename(id_, commit.id) 1366 + 1367 + # refs/notes/ put commit-message-related material in blobs, and name their 1368 + # files according to the hash of other commits. That totally messes with 1369 + # all normal callbacks; fast-export should really export these as different 1370 + # kinds of objects. Until then, let's just pass these commits through as-is 1371 + # and hope the blob callbacks don't mess things up. 1372 + if commit.branch.startswith(b'refs/notes/'): 1373 + self._imported_refs.add(commit.branch) 1374 + commit.dump(self._output) 1375 + return 1376 + 1377 + # Call any user callback to allow them to modify the commit 1378 + aux_info = {'orig_parents': orig_parents, 1379 + 'had_file_changes': had_file_changes} 1380 + if self._commit_callback: 1381 + self._commit_callback(commit, aux_info) 1382 + 1383 + # Now print the resulting commit, or if prunable skip it 1384 + self._latest_orig_commit[branch] = commit.id 1385 + if not (commit.old_id or commit.id) in _SKIPPED_COMMITS: 1386 + self._latest_commit[branch] = commit.id 1387 + if not commit.dumped: 1388 + self._imported_refs.add(commit.branch) 1389 + commit.dump(self._output) 1390 + 1391 + def _parse_tag(self): 1392 + """ 1393 + Parse input data into a Tag object. Once the Tag has been created, 1394 + it will be handed off to the appropriate callbacks. Current-line will 1395 + be advanced until it is beyond the tag data. The Tag will be dumped 1396 + to _output once everything else is done (unless it has been skipped by 1397 + the callback). 1398 + """ 1399 + # Parse the Tag 1400 + tag = self._parse_ref_line(b'tag') 1401 + self._exported_refs.add(b'refs/tags/'+tag) 1402 + id_ = self._parse_optional_mark() 1403 + ignoreme, from_ref = self._parse_optional_parent_ref(b'from') 1404 + 1405 + original_id = None 1406 + if self._currentline.startswith(b'original-oid'): 1407 + original_id = self._parse_original_id(); 1408 + 1409 + tagger_name, tagger_email, tagger_date = None, None, None 1410 + if self._currentline.startswith(b'tagger'): 1411 + (tagger_name, tagger_email, tagger_date) = self._parse_user(b'tagger') 1412 + tag_msg = self._parse_data() 1413 + if self._currentline == b'\n': 1414 + self._advance_currentline() 1415 + 1416 + # Create the tag 1417 + tag = Tag(tag, from_ref, 1418 + tagger_name, tagger_email, tagger_date, tag_msg, 1419 + original_id) 1420 + 1421 + # If fast-export text had a mark for this tag, need to make sure this 1422 + # mark translates to the tag's true id. 1423 + if id_: 1424 + tag.old_id = id_ 1425 + _IDS.record_rename(id_, tag.id) 1426 + 1427 + # Call any user callback to allow them to modify the tag 1428 + if self._tag_callback: 1429 + self._tag_callback(tag) 1430 + 1431 + # The tag might not point at anything that still exists (self.from_ref 1432 + # will be None if the commit it pointed to and all its ancestors were 1433 + # pruned due to being empty) 1434 + if tag.from_ref: 1435 + # Print out this tag's information 1436 + if not tag.dumped: 1437 + self._imported_refs.add(b'refs/tags/'+tag.ref) 1438 + tag.dump(self._output) 1439 + else: 1440 + tag.skip() 1441 + 1442 + def _parse_progress(self): 1443 + """ 1444 + Parse input data into a Progress object. Once the Progress has 1445 + been created, it will be handed off to the appropriate 1446 + callbacks. Current-line will be advanced until it is beyond the 1447 + progress data. The Progress will be dumped to _output once 1448 + everything else is done (unless it has been skipped by the callback). 1449 + """ 1450 + # Parse the Progress 1451 + message = self._parse_ref_line(b'progress') 1452 + if self._currentline == b'\n': 1453 + self._advance_currentline() 1454 + 1455 + # Create the progress message 1456 + progress = Progress(message) 1457 + 1458 + # Call any user callback to allow them to modify the progress messsage 1459 + if self._progress_callback: 1460 + self._progress_callback(progress) 1461 + 1462 + # NOTE: By default, we do NOT print the progress message; git 1463 + # fast-import would write it to fast_import_pipes which could mess with 1464 + # our parsing of output from the 'ls' and 'get-mark' directives we send 1465 + # to fast-import. If users want these messages, they need to process 1466 + # and handle them in the appropriate callback above. 1467 + 1468 + def _parse_checkpoint(self): 1469 + """ 1470 + Parse input data into a Checkpoint object. Once the Checkpoint has 1471 + been created, it will be handed off to the appropriate 1472 + callbacks. Current-line will be advanced until it is beyond the 1473 + checkpoint data. The Checkpoint will be dumped to _output once 1474 + everything else is done (unless it has been skipped by the callback). 1475 + """ 1476 + # Parse the Checkpoint 1477 + self._advance_currentline() 1478 + if self._currentline == b'\n': 1479 + self._advance_currentline() 1480 + 1481 + # Create the checkpoint 1482 + checkpoint = Checkpoint() 1483 + 1484 + # Call any user callback to allow them to drop the checkpoint 1485 + if self._checkpoint_callback: 1486 + self._checkpoint_callback(checkpoint) 1487 + 1488 + # NOTE: By default, we do NOT print the checkpoint message; although it 1489 + # we would only realistically get them with --stdin, the fact that we 1490 + # are filtering makes me think the checkpointing is less likely to be 1491 + # reasonable. In fact, I don't think it's necessary in general. If 1492 + # users do want it, they should process it in the checkpoint_callback. 1493 + 1494 + def _parse_literal_command(self): 1495 + """ 1496 + Parse literal command. Then just dump the line as is. 1497 + """ 1498 + # Create the literal command object 1499 + command = LiteralCommand(self._currentline) 1500 + self._advance_currentline() 1501 + 1502 + # Now print the resulting literal command 1503 + if not command.dumped: 1504 + command.dump(self._output) 1505 + 1506 + def insert(self, obj): 1507 + assert not obj.dumped 1508 + obj.dump(self._output) 1509 + if type(obj) == Commit: 1510 + self._imported_refs.add(obj.branch) 1511 + elif type(obj) in (Reset, Tag): 1512 + self._imported_refs.add(obj.ref) 1513 + 1514 + def run(self, input, output): 1515 + """ 1516 + This method filters fast export output. 1517 + """ 1518 + # Set input. If no args provided, use stdin. 1519 + self._input = input 1520 + self._output = output 1521 + 1522 + # Run over the input and do the filtering 1523 + self._advance_currentline() 1524 + while self._currentline: 1525 + if self._currentline.startswith(b'blob'): 1526 + self._parse_blob() 1527 + elif self._currentline.startswith(b'reset'): 1528 + self._parse_reset() 1529 + elif self._currentline.startswith(b'commit'): 1530 + self._parse_commit() 1531 + elif self._currentline.startswith(b'tag'): 1532 + self._parse_tag() 1533 + elif self._currentline.startswith(b'progress'): 1534 + self._parse_progress() 1535 + elif self._currentline.startswith(b'checkpoint'): 1536 + self._parse_checkpoint() 1537 + elif self._currentline.startswith(b'feature'): 1538 + self._parse_literal_command() 1539 + elif self._currentline.startswith(b'option'): 1540 + self._parse_literal_command() 1541 + elif self._currentline.startswith(b'done'): 1542 + if self._done_callback: 1543 + self._done_callback() 1544 + self._parse_literal_command() 1545 + # Prevent confusion from others writing additional stuff that'll just 1546 + # be ignored 1547 + self._output.close() 1548 + elif self._currentline.startswith(b'#'): 1549 + self._parse_literal_command() 1550 + elif self._currentline.startswith(b'get-mark') or \ 1551 + self._currentline.startswith(b'cat-blob') or \ 1552 + self._currentline.startswith(b'ls'): 1553 + raise SystemExit(_("Unsupported command: '%s'") % self._currentline) 1554 + else: 1555 + raise SystemExit(_("Could not parse line: '%s'") % self._currentline) 1556 + 1557 + def get_exported_and_imported_refs(self): 1558 + return self._exported_refs, self._imported_refs 1559 + 1560 + def record_id_rename(old_id, new_id): 1561 + """ 1562 + Register a new translation 1563 + """ 1564 + handle_transitivity = True 1565 + _IDS.record_rename(old_id, new_id, handle_transitivity) 1566 + 1567 + # Internal globals 1568 + _IDS = _IDs() 1569 + _SKIPPED_COMMITS = set() 1570 + BLOB_HASH_TO_NEW_ID = {} 1571 + BLOB_NEW_ID_TO_HASH = {} 1572 + sdr_next_steps = _(""" 1573 + NEXT STEPS FOR YOUR SENSITIVE DATA REMOVAL: 1574 + * If you are doing your rewrite in multiple steps, ignore these next steps 1575 + until you have completed all your invocations of git-filter-repo. 1576 + * See the "Sensitive Data Removal" subsection of the "DISCUSSION" section 1577 + of the manual for more details about any of the steps below. 1578 + * Inspect this repository and verify that the sensitive data is indeed 1579 + completely removed from all commits. 1580 + * Force push the rewritten history to the server: 1581 + %s 1582 + * Contact the server admins for additional steps they need to take; the 1583 + First Changed Commit(s)%s may come in handy here. 1584 + * Have other colleagues with a clone either discard their clone and reclone 1585 + OR follow the detailed steps in the manual to repeatedly rebase and 1586 + purge the sensitive data from their copy. Again, the First Changed 1587 + Commit(s)%s may come in handy. 1588 + * See the "Prevent repeats and avoid future sensitive data spills" section 1589 + of the manual. 1590 + """[1:]) 1591 + 1592 + class SubprocessWrapper(object): 1593 + @staticmethod 1594 + def decodify(args): 1595 + if type(args) == str: 1596 + return args 1597 + else: 1598 + assert type(args) == list 1599 + return [decode(x) if type(x)==bytes else x for x in args] 1600 + 1601 + @staticmethod 1602 + def call(*args, **kwargs): 1603 + if 'cwd' in kwargs: 1604 + kwargs['cwd'] = decode(kwargs['cwd']) 1605 + return subprocess.call(SubprocessWrapper.decodify(*args), **kwargs) 1606 + 1607 + @staticmethod 1608 + def check_output(*args, **kwargs): 1609 + if 'cwd' in kwargs: 1610 + kwargs['cwd'] = decode(kwargs['cwd']) 1611 + return subprocess.check_output(SubprocessWrapper.decodify(*args), **kwargs) 1612 + 1613 + @staticmethod 1614 + def check_call(*args, **kwargs): # pragma: no cover # used by filter-lamely 1615 + if 'cwd' in kwargs: 1616 + kwargs['cwd'] = decode(kwargs['cwd']) 1617 + return subprocess.check_call(SubprocessWrapper.decodify(*args), **kwargs) 1618 + 1619 + @staticmethod 1620 + def Popen(*args, **kwargs): 1621 + if 'cwd' in kwargs: 1622 + kwargs['cwd'] = decode(kwargs['cwd']) 1623 + return subprocess.Popen(SubprocessWrapper.decodify(*args), **kwargs) 1624 + 1625 + subproc = subprocess 1626 + if platform.system() == 'Windows' or 'PRETEND_UNICODE_ARGS' in os.environ: 1627 + subproc = SubprocessWrapper 1628 + 1629 + class GitUtils(object): 1630 + @staticmethod 1631 + def get_commit_count(repo, *args): 1632 + """ 1633 + Return the number of commits that have been made on repo. 1634 + """ 1635 + if not args: 1636 + args = ['--all'] 1637 + if len(args) == 1 and isinstance(args[0], list): 1638 + args = args[0] 1639 + p = subproc.Popen(["git", "rev-list", "--count"] + args, 1640 + stdout=subprocess.PIPE, stderr=subprocess.PIPE, 1641 + cwd=repo) 1642 + if p.wait() != 0: 1643 + raise SystemExit(_("%s does not appear to be a valid git repository") 1644 + % decode(repo)) 1645 + return int(p.stdout.read()) 1646 + 1647 + @staticmethod 1648 + def get_total_objects(repo): 1649 + """ 1650 + Return the number of objects (both packed and unpacked) 1651 + """ 1652 + p1 = subproc.Popen(["git", "count-objects", "-v"], 1653 + stdout=subprocess.PIPE, cwd=repo) 1654 + lines = p1.stdout.read().splitlines() 1655 + # Return unpacked objects + packed-objects 1656 + return int(lines[0].split()[1]) + int(lines[2].split()[1]) 1657 + 1658 + @staticmethod 1659 + def is_repository_bare(repo_working_dir): 1660 + out = subproc.check_output('git rev-parse --is-bare-repository'.split(), 1661 + cwd=repo_working_dir) 1662 + return (out.strip() == b'true') 1663 + 1664 + @staticmethod 1665 + def determine_git_dir(repo_working_dir): 1666 + d = subproc.check_output('git rev-parse --git-dir'.split(), 1667 + cwd=repo_working_dir).strip() 1668 + if repo_working_dir==b'.' or d.startswith(b'/'): 1669 + return d 1670 + return os.path.join(repo_working_dir, d) 1671 + 1672 + @staticmethod 1673 + def get_refs(repo_working_dir): 1674 + try: 1675 + output = subproc.check_output('git show-ref'.split(), 1676 + cwd=repo_working_dir) 1677 + except subprocess.CalledProcessError as e: 1678 + # If error code is 1, there just aren't any refs; i.e. new repo. 1679 + # If error code is other than 1, some other error (e.g. not a git repo) 1680 + if e.returncode != 1: 1681 + raise SystemExit('fatal: {}'.format(e)) 1682 + output = '' 1683 + return dict(reversed(x.split()) for x in output.splitlines()) 1684 + 1685 + @staticmethod 1686 + def get_config_settings(repo_working_dir): 1687 + output = '' 1688 + try: 1689 + output = subproc.check_output('git config --list --null'.split(), 1690 + cwd=repo_working_dir) 1691 + except subprocess.CalledProcessError as e: # pragma: no cover 1692 + raise SystemExit('fatal: {}'.format(e)) 1693 + 1694 + # FIXME: Ignores multi-valued keys, just let them overwrite for now 1695 + return dict(item.split(b'\n', maxsplit=1) 1696 + for item in output.strip().split(b"\0") if item) 1697 + 1698 + @staticmethod 1699 + def get_blob_sizes(quiet = False): 1700 + blob_size_progress = ProgressWriter() 1701 + num_blobs = 0 1702 + processed_blobs_msg = _("Processed %d blob sizes") 1703 + 1704 + # Get sizes of blobs by sha1 1705 + cmd = '--batch-check=%(objectname) %(objecttype) ' + \ 1706 + '%(objectsize) %(objectsize:disk)' 1707 + cf = subproc.Popen(['git', 'cat-file', '--batch-all-objects', cmd], 1708 + bufsize = -1, 1709 + stdout = subprocess.PIPE) 1710 + unpacked_size = {} 1711 + packed_size = {} 1712 + for line in cf.stdout: 1713 + try: 1714 + sha, objtype, objsize, objdisksize = line.split() 1715 + objsize, objdisksize = int(objsize), int(objdisksize) 1716 + if objtype == b'blob': 1717 + unpacked_size[sha] = objsize 1718 + packed_size[sha] = objdisksize 1719 + num_blobs += 1 1720 + except ValueError: # pragma: no cover 1721 + sys.stderr.write(_("Error: unexpected `git cat-file` output: \"%s\"\n") % line) 1722 + if not quiet: 1723 + blob_size_progress.show(processed_blobs_msg % num_blobs) 1724 + cf.wait() 1725 + if not quiet: 1726 + blob_size_progress.finish() 1727 + return unpacked_size, packed_size 1728 + 1729 + @staticmethod 1730 + def get_file_changes(repo, parent_hash, commit_hash): 1731 + """ 1732 + Return a FileChanges list with the differences between parent_hash 1733 + and commit_hash 1734 + """ 1735 + file_changes = [] 1736 + 1737 + cmd = ["git", "diff-tree", "-r", parent_hash, commit_hash] 1738 + output = subproc.check_output(cmd, cwd=repo) 1739 + for line in output.splitlines(): 1740 + fileinfo, path = line.split(b'\t', 1) 1741 + if path.startswith(b'"'): 1742 + path = PathQuoting.dequote(path) 1743 + oldmode, mode, oldhash, newhash, changetype = fileinfo.split() 1744 + if changetype == b'D': 1745 + file_changes.append(FileChange(b'D', path)) 1746 + elif changetype in (b'A', b'M', b'T'): 1747 + identifier = BLOB_HASH_TO_NEW_ID.get(newhash, newhash) 1748 + file_changes.append(FileChange(b'M', path, identifier, mode)) 1749 + else: # pragma: no cover 1750 + raise SystemExit("Unknown change type for line {}".format(line)) 1751 + 1752 + return file_changes 1753 + 1754 + @staticmethod 1755 + def print_my_version(): 1756 + with open(__file__, 'br') as f: 1757 + contents = f.read() 1758 + # If people replaced @@LOCALEDIR@@ string to point at their local 1759 + # directory, undo it so we can get original source version. 1760 + contents = re.sub(br'\A#\!.*', 1761 + br'#!/usr/bin/env python3', contents) 1762 + contents = re.sub(br'(\("GIT_TEXTDOMAINDIR"\) or ").*"', 1763 + br'\1@@LOCALEDIR@@"', contents) 1764 + 1765 + cmd = 'git hash-object --stdin'.split() 1766 + version = subproc.check_output(cmd, input=contents).strip() 1767 + print(decode(version[0:12])) 1768 + 1769 + class FilteringOptions(object): 1770 + default_replace_text = b'***REMOVED***' 1771 + class AppendFilter(argparse.Action): 1772 + def __call__(self, parser, namespace, values, option_string=None): 1773 + user_path = values 1774 + suffix = option_string[len('--path-'):] or 'match' 1775 + if suffix.startswith('rename'): 1776 + mod_type = 'rename' 1777 + match_type = option_string[len('--path-rename-'):] or 'match' 1778 + values = values.split(b':') 1779 + if len(values) != 2: 1780 + raise SystemExit(_("Error: --path-rename expects one colon in its" 1781 + " argument: <old_name:new_name>.")) 1782 + if values[0] and values[1] and not ( 1783 + values[0].endswith(b'/') == values[1].endswith(b'/')): 1784 + raise SystemExit(_("Error: With --path-rename, if OLD_NAME and " 1785 + "NEW_NAME are both non-empty and either ends " 1786 + "with a slash then both must.")) 1787 + if any(v.startswith(b'/') for v in values): 1788 + raise SystemExit(_("Error: Pathnames cannot begin with a '/'")) 1789 + components = values[0].split(b'/') + values[1].split(b'/') 1790 + else: 1791 + mod_type = 'filter' 1792 + match_type = suffix 1793 + components = values.split(b'/') 1794 + if values.startswith(b'/'): 1795 + raise SystemExit(_("Error: Pathnames cannot begin with a '/'")) 1796 + for illegal_path in [b'.', b'..']: 1797 + if illegal_path in components: 1798 + raise SystemExit(_("Error: Invalid path component '%s' found in '%s'") 1799 + % (decode(illegal_path), decode(user_path))) 1800 + if match_type == 'regex': 1801 + values = re.compile(values) 1802 + items = getattr(namespace, self.dest, []) or [] 1803 + items.append((mod_type, match_type, values)) 1804 + if (match_type, mod_type) == ('glob', 'filter'): 1805 + if not values.endswith(b'*'): 1806 + extension = b'*' if values.endswith(b'/') else b'/*' 1807 + items.append((mod_type, match_type, values+extension)) 1808 + setattr(namespace, self.dest, items) 1809 + 1810 + class HelperFilter(argparse.Action): 1811 + def __call__(self, parser, namespace, values, option_string=None): 1812 + af = FilteringOptions.AppendFilter(dest='path_changes', 1813 + option_strings=None) 1814 + dirname = values if values[-1:] == b'/' else values+b'/' 1815 + if option_string == '--subdirectory-filter': 1816 + af(parser, namespace, dirname, '--path-match') 1817 + af(parser, namespace, dirname+b':', '--path-rename') 1818 + elif option_string == '--to-subdirectory-filter': 1819 + af(parser, namespace, b':'+dirname, '--path-rename') 1820 + else: 1821 + raise SystemExit(_("Error: HelperFilter given invalid option_string: %s") 1822 + % option_string) # pragma: no cover 1823 + 1824 + class FileWithPathsFilter(argparse.Action): 1825 + def __call__(self, parser, namespace, values, option_string=None): 1826 + if not namespace.path_changes: 1827 + namespace.path_changes = [] 1828 + namespace.path_changes += FilteringOptions.get_paths_from_file(values) 1829 + 1830 + @staticmethod 1831 + def create_arg_parser(): 1832 + # Include usage in the summary, so we can put the description first 1833 + summary = _('''Rewrite (or analyze) repository history 1834 + 1835 + git-filter-repo destructively rewrites history (unless --analyze or 1836 + --dry-run are given) according to specified rules. It refuses to do any 1837 + rewriting unless either run from a clean fresh clone, or --force was 1838 + given. 1839 + 1840 + Basic Usage: 1841 + git-filter-repo --analyze 1842 + git-filter-repo [FILTER/RENAME/CONTROL OPTIONS] 1843 + 1844 + See EXAMPLES section for details. 1845 + ''').rstrip() 1846 + 1847 + # Provide a long helpful examples section 1848 + example_text = _('''CALLBACKS 1849 + 1850 + Most callback functions are of the same general format. For a command line 1851 + argument like 1852 + --foo-callback 'BODY' 1853 + 1854 + the following code will be compiled and called: 1855 + def foo_callback(foo): 1856 + BODY 1857 + 1858 + The exception on callbacks is the --file-info-callback, which will be 1859 + discussed further below. 1860 + 1861 + Given the callback style, we can thus make a simple callback to replace 1862 + 'Jon' with 'John' in author/committer/tagger names: 1863 + git filter-repo --name-callback 'return name.replace(b"Jon", b"John")' 1864 + 1865 + To remove all 'Tested-by' tags in commit (or tag) messages: 1866 + git filter-repo --message-callback 'return re.sub(br"\\nTested-by:.*", "", message)' 1867 + 1868 + To remove all .DS_Store files: 1869 + git filter-repo --filename-callback 'return None if os.path.basename(filename) == b".DS_Store" else filename' 1870 + 1871 + Note that if BODY resolves to a filename, then the contents of that file 1872 + will be used as the BODY in the callback function. 1873 + 1874 + The --file-info-callback has a more involved function callback; for it the 1875 + following code will be compiled and called: 1876 + def file_info_callback(filename, mode, blob_id, value): 1877 + BODY 1878 + 1879 + It is designed to be used in cases where filtering depends on both 1880 + filename and contents (and maybe mode). It is called for file changes 1881 + other than deletions (since deletions have no file contents to operate 1882 + on). This callback is expected to return a tuple of (filename, mode, 1883 + blob_id). It can make use of the following functions from the value 1884 + instance: 1885 + value.get_contents_by_identifier(blob_id) -> contents (bytestring) 1886 + value.get_size_by_identifier(blob_id) -> size_of_blob (int) 1887 + value.insert_file_with_contents(contents) -> blob_id 1888 + value.is_binary(contents) -> bool 1889 + value.apply_replace_text(contents) -> new_contents (bytestring) 1890 + and can read/write the following data member from the value instance: 1891 + value.data (dict) 1892 + 1893 + The filename can be used for renaming the file similar to 1894 + --filename-callback (or None to drop the change), and mode is one 1895 + of b'100644', b'100755', b'120000', or b'160000'. 1896 + 1897 + For more detailed examples and explanations AND caveats, see 1898 + https://htmlpreview.github.io/?https://github.com/newren/git-filter-repo/blob/docs/html/git-filter-repo.html#CALLBACKS 1899 + 1900 + EXAMPLES 1901 + 1902 + To get a bunch of reports mentioning renames that have occurred in 1903 + your repo and listing sizes of objects aggregated by any of path, 1904 + directory, extension, or blob-id: 1905 + git filter-repo --analyze 1906 + 1907 + (These reports can help you choose how to filter your repo; it can 1908 + be useful to re-run this command after filtering to regenerate the 1909 + report and verify the changes look correct.) 1910 + 1911 + To extract the history that touched just 'guides' and 'tools/releases': 1912 + git filter-repo --path guides/ --path tools/releases 1913 + 1914 + To remove foo.zip and bar/baz/zips from every revision in history: 1915 + git filter-repo --path foo.zip --path bar/baz/zips/ --invert-paths 1916 + 1917 + To replace the text 'password' with 'p455w0rd': 1918 + git filter-repo --replace-text <(echo "password==>p455w0rd") 1919 + 1920 + To use the current version of the .mailmap file to update authors, 1921 + committers, and taggers throughout history and make it permanent: 1922 + git filter-repo --use-mailmap 1923 + 1924 + To extract the history of 'src/', rename all files to have a new leading 1925 + directory 'my-module' (e.g. src/foo.java -> my-module/src/foo.java), and 1926 + add a 'my-module-' prefix to all tags: 1927 + git filter-repo --path src/ --to-subdirectory-filter my-module --tag-rename '':'my-module-' 1928 + 1929 + For more detailed examples and explanations, see 1930 + https://htmlpreview.github.io/?https://github.com/newren/git-filter-repo/blob/docs/html/git-filter-repo.html#EXAMPLES''') 1931 + 1932 + # Create the basic parser 1933 + parser = argparse.ArgumentParser(description=summary, 1934 + usage = argparse.SUPPRESS, 1935 + add_help = False, 1936 + epilog = example_text, 1937 + formatter_class=argparse.RawDescriptionHelpFormatter) 1938 + 1939 + analyze = parser.add_argument_group(title=_("Analysis")) 1940 + analyze.add_argument('--analyze', action='store_true', 1941 + help=_("Analyze repository history and create a report that may be " 1942 + "useful in determining what to filter in a subsequent run. " 1943 + "Will not modify your repo.")) 1944 + analyze.add_argument('--report-dir', 1945 + metavar='DIR_OR_FILE', 1946 + type=os.fsencode, 1947 + dest='report_dir', 1948 + help=_("Directory to write report, defaults to GIT_DIR/filter_repo/analysis," 1949 + "refuses to run if exists, --force delete existing dir first.")) 1950 + 1951 + path = parser.add_argument_group(title=_("Filtering based on paths " 1952 + "(see also --filename-callback)"), 1953 + description=textwrap.dedent(_(""" 1954 + These options specify the paths to select. Note that much like git 1955 + itself, renames are NOT followed so you may need to specify multiple 1956 + paths, e.g. `--path olddir/ --path newdir/` 1957 + """[1:]))) 1958 + 1959 + path.add_argument('--invert-paths', action='store_false', dest='inclusive', 1960 + help=_("Invert the selection of files from the specified " 1961 + "--path-{match,glob,regex} options below, i.e. only select " 1962 + "files matching none of those options.")) 1963 + 1964 + path.add_argument('--path-match', '--path', metavar='DIR_OR_FILE', 1965 + type=os.fsencode, 1966 + action=FilteringOptions.AppendFilter, dest='path_changes', 1967 + help=_("Exact paths (files or directories) to include in filtered " 1968 + "history. Multiple --path options can be specified to get " 1969 + "a union of paths.")) 1970 + path.add_argument('--path-glob', metavar='GLOB', type=os.fsencode, 1971 + action=FilteringOptions.AppendFilter, dest='path_changes', 1972 + help=_("Glob of paths to include in filtered history. Multiple " 1973 + "--path-glob options can be specified to get a union of " 1974 + "paths.")) 1975 + path.add_argument('--path-regex', metavar='REGEX', type=os.fsencode, 1976 + action=FilteringOptions.AppendFilter, dest='path_changes', 1977 + help=_("Regex of paths to include in filtered history. Multiple " 1978 + "--path-regex options can be specified to get a union of " 1979 + "paths")) 1980 + path.add_argument('--use-base-name', action='store_true', 1981 + help=_("Match on file base name instead of full path from the top " 1982 + "of the repo. Incompatible with --path-rename, and " 1983 + "incompatible with matching against directory names.")) 1984 + 1985 + rename = parser.add_argument_group(title=_("Renaming based on paths " 1986 + "(see also --filename-callback)")) 1987 + rename.add_argument('--path-rename', '--path-rename-match', 1988 + metavar='OLD_NAME:NEW_NAME', dest='path_changes', type=os.fsencode, 1989 + action=FilteringOptions.AppendFilter, 1990 + help=_("Path to rename; if filename or directory matches OLD_NAME " 1991 + "rename to NEW_NAME. Multiple --path-rename options can be " 1992 + "specified. NOTE: If you combine filtering options with " 1993 + "renaming ones, do not rely on a rename argument to select " 1994 + "paths; you also need a filter to select them.")) 1995 + 1996 + helpers = parser.add_argument_group(title=_("Path shortcuts")) 1997 + helpers.add_argument('--paths', help=argparse.SUPPRESS, metavar='IGNORE') 1998 + helpers.add_argument('--paths-from-file', metavar='FILENAME', 1999 + type=os.fsencode, 2000 + action=FilteringOptions.FileWithPathsFilter, dest='path_changes', 2001 + help=_("Specify several path filtering and renaming directives, one " 2002 + "per line. Lines with '==>' in them specify path renames, " 2003 + "and lines can begin with 'literal:' (the default), 'glob:', " 2004 + "or 'regex:' to specify different matching styles. Blank " 2005 + "lines and lines starting with a '#' are ignored.")) 2006 + helpers.add_argument('--subdirectory-filter', metavar='DIRECTORY', 2007 + action=FilteringOptions.HelperFilter, type=os.fsencode, 2008 + help=_("Only look at history that touches the given subdirectory " 2009 + "and treat that directory as the project root. Equivalent " 2010 + "to using '--path DIRECTORY/ --path-rename DIRECTORY/:'")) 2011 + helpers.add_argument('--to-subdirectory-filter', metavar='DIRECTORY', 2012 + action=FilteringOptions.HelperFilter, type=os.fsencode, 2013 + help=_("Treat the project root as if it were under DIRECTORY. " 2014 + "Equivalent to using '--path-rename :DIRECTORY/'")) 2015 + 2016 + contents = parser.add_argument_group(title=_("Content editing filters " 2017 + "(see also --blob-callback)")) 2018 + contents.add_argument('--replace-text', metavar='EXPRESSIONS_FILE', 2019 + help=_("A file with expressions that, if found, will be replaced. " 2020 + "By default, each expression is treated as literal text, " 2021 + "but 'regex:' and 'glob:' prefixes are supported. You can " 2022 + "end the line with '==>' and some replacement text to " 2023 + "choose a replacement choice other than the default of '{}'." 2024 + .format(decode(FilteringOptions.default_replace_text)))) 2025 + contents.add_argument('--strip-blobs-bigger-than', metavar='SIZE', 2026 + dest='max_blob_size', default=0, 2027 + help=_("Strip blobs (files) bigger than specified size (e.g. '5M', " 2028 + "'2G', etc)")) 2029 + contents.add_argument('--strip-blobs-with-ids', metavar='BLOB-ID-FILENAME', 2030 + help=_("Read git object ids from each line of the given file, and " 2031 + "strip all of them from history")) 2032 + 2033 + refrename = parser.add_argument_group(title=_("Renaming of refs " 2034 + "(see also --refname-callback)")) 2035 + refrename.add_argument('--tag-rename', metavar='OLD:NEW', type=os.fsencode, 2036 + help=_("Rename tags starting with OLD to start with NEW. For " 2037 + "example, --tag-rename foo:bar will rename tag foo-1.2.3 " 2038 + "to bar-1.2.3; either OLD or NEW can be empty.")) 2039 + 2040 + messages = parser.add_argument_group(title=_("Filtering of commit messages " 2041 + "(see also --message-callback)")) 2042 + messages.add_argument('--replace-message', metavar='EXPRESSIONS_FILE', 2043 + help=_("A file with expressions that, if found in commit or tag " 2044 + "messages, will be replaced. This file uses the same syntax " 2045 + "as --replace-text.")) 2046 + messages.add_argument('--preserve-commit-hashes', action='store_true', 2047 + help=_("By default, since commits are rewritten and thus gain new " 2048 + "hashes, references to old commit hashes in commit messages " 2049 + "are replaced with new commit hashes (abbreviated to the same " 2050 + "length as the old reference). Use this flag to turn off " 2051 + "updating commit hashes in commit messages.")) 2052 + messages.add_argument('--preserve-commit-encoding', action='store_true', 2053 + help=_("Do not reencode commit messages into UTF-8. By default, if " 2054 + "the commit object specifies an encoding for the commit " 2055 + "message, the message is re-encoded into UTF-8.")) 2056 + 2057 + people = parser.add_argument_group(title=_("Filtering of names & emails " 2058 + "(see also --name-callback " 2059 + "and --email-callback)")) 2060 + people.add_argument('--mailmap', dest='mailmap', metavar='FILENAME', 2061 + type=os.fsencode, 2062 + help=_("Use specified mailmap file (see git-shortlog(1) for " 2063 + "details on the format) when rewriting author, committer, " 2064 + "and tagger names and emails. If the specified file is " 2065 + "part of git history, historical versions of the file will " 2066 + "be ignored; only the current contents are consulted.")) 2067 + people.add_argument('--use-mailmap', dest='mailmap', 2068 + action='store_const', const=b'.mailmap', 2069 + help=_("Same as: '--mailmap .mailmap' ")) 2070 + 2071 + parents = parser.add_argument_group(title=_("Parent rewriting")) 2072 + parents.add_argument('--replace-refs', default=None, 2073 + choices=['delete-no-add', 'delete-and-add', 2074 + 'update-no-add', 'update-or-add', 2075 + 'update-and-add', 'old-default'], 2076 + help=_("How to handle replace refs (see git-replace(1)). Replace " 2077 + "refs can be added during the history rewrite as a way to " 2078 + "allow users to pass old commit IDs (from before " 2079 + "git-filter-repo was run) to git commands and have git know " 2080 + "how to translate those old commit IDs to the new " 2081 + "(post-rewrite) commit IDs. Also, replace refs that existed " 2082 + "before the rewrite can either be deleted or updated. The " 2083 + "choices to pass to --replace-refs thus need to specify both " 2084 + "what to do with existing refs and what to do with commit " 2085 + "rewrites. Thus 'update-and-add' means to update existing " 2086 + "replace refs, and for any commit rewrite (even if already " 2087 + "pointed at by a replace ref) add a new refs/replace/ reference " 2088 + "to map from the old commit ID to the new commit ID. The " 2089 + "default is update-no-add, meaning update existing replace refs " 2090 + "but do not add any new ones. There is also a special " 2091 + "'old-default' option for picking the default used in versions " 2092 + "prior to git-filter-repo-2.45, namely 'update-and-add' upon " 2093 + "the first run of git-filter-repo in a repository and " 2094 + "'update-or-add' if running git-filter-repo again on a " 2095 + "repository.")) 2096 + parents.add_argument('--prune-empty', default='auto', 2097 + choices=['always', 'auto', 'never'], 2098 + help=_("Whether to prune empty commits. 'auto' (the default) means " 2099 + "only prune commits which become empty (not commits which were " 2100 + "empty in the original repo, unless their parent was pruned). " 2101 + "When the parent of a commit is pruned, the first non-pruned " 2102 + "ancestor becomes the new parent.")) 2103 + parents.add_argument('--prune-degenerate', default='auto', 2104 + choices=['always', 'auto', 'never'], 2105 + help=_("Since merge commits are needed for history topology, they " 2106 + "are typically exempt from pruning. However, they can become " 2107 + "degenerate with the pruning of other commits (having fewer " 2108 + "than two parents, having one commit serve as both parents, or " 2109 + "having one parent as the ancestor of the other.) If such " 2110 + "merge commits have no file changes, they can be pruned. The " 2111 + "default ('auto') is to only prune empty merge commits which " 2112 + "become degenerate (not which started as such).")) 2113 + parents.add_argument('--no-ff', action='store_true', 2114 + help=_("Even if the first parent is or becomes an ancestor of another " 2115 + "parent, do not prune it. This modifies how " 2116 + "--prune-degenerate behaves, and may be useful in projects who " 2117 + "always use merge --no-ff.")) 2118 + 2119 + callback = parser.add_argument_group(title=_("Generic callback code snippets")) 2120 + callback.add_argument('--filename-callback', metavar="FUNCTION_BODY_OR_FILE", 2121 + help=_("Python code body for processing filenames; see CALLBACKS " 2122 + "sections below.")) 2123 + callback.add_argument('--file-info-callback', metavar="FUNCTION_BODY_OR_FILE", 2124 + help=_("Python code body for processing file and metadata; see " 2125 + "CALLBACKS sections below.")) 2126 + callback.add_argument('--message-callback', metavar="FUNCTION_BODY_OR_FILE", 2127 + help=_("Python code body for processing messages (both commit " 2128 + "messages and tag messages); see CALLBACKS section below.")) 2129 + callback.add_argument('--name-callback', metavar="FUNCTION_BODY_OR_FILE", 2130 + help=_("Python code body for processing names of people; see " 2131 + "CALLBACKS section below.")) 2132 + callback.add_argument('--email-callback', metavar="FUNCTION_BODY_OR_FILE", 2133 + help=_("Python code body for processing emails addresses; see " 2134 + "CALLBACKS section below.")) 2135 + callback.add_argument('--refname-callback', metavar="FUNCTION_BODY_OR_FILE", 2136 + help=_("Python code body for processing refnames; see CALLBACKS " 2137 + "section below.")) 2138 + 2139 + callback.add_argument('--blob-callback', metavar="FUNCTION_BODY_OR_FILE", 2140 + help=_("Python code body for processing blob objects; see " 2141 + "CALLBACKS section below.")) 2142 + callback.add_argument('--commit-callback', metavar="FUNCTION_BODY_OR_FILE", 2143 + help=_("Python code body for processing commit objects; see " 2144 + "CALLBACKS section below.")) 2145 + callback.add_argument('--tag-callback', metavar="FUNCTION_BODY_OR_FILE", 2146 + help=_("Python code body for processing tag objects. Note that " 2147 + "lightweight tags have no tag object and are thus not " 2148 + "handled by this callback. See CALLBACKS section below.")) 2149 + callback.add_argument('--reset-callback', metavar="FUNCTION_BODY_OR_FILE", 2150 + help=_("Python code body for processing reset objects; see " 2151 + "CALLBACKS section below.")) 2152 + 2153 + sdr = parser.add_argument_group(title=_("Sensitive Data Removal Handling")) 2154 + sdr.add_argument('--sensitive-data-removal', '--sdr', action='store_true', 2155 + help=_("This rewrite is intended to remove sensitive data from a " 2156 + "repository. Gather extra information from the rewrite needed " 2157 + "to provide additional instructions on how to clean up other " 2158 + "copies.")) 2159 + sdr.add_argument('--no-fetch', action='store_true', 2160 + help=_("By default, --sensitive-data-removal will trigger a " 2161 + "mirror-like fetch of all refs from origin, discarding local " 2162 + "changes, but ensuring that _all_ fetchable refs that hold on " 2163 + "to the sensitve data are rewritten. This flag removes that " 2164 + "fetch, risking that other refs continue holding on to the " 2165 + "sensitive data. This option is implied by --partial or any " 2166 + "flag that implies --partial.")) 2167 + 2168 + desc = _( 2169 + "Specifying alternate source or target locations implies --partial,\n" 2170 + "except that the normal default for --replace-refs is used. However,\n" 2171 + "unlike normal uses of --partial, this doesn't risk mixing old and new\n" 2172 + "history since the old and new histories are in different repositories.") 2173 + location = parser.add_argument_group(title=_("Location to filter from/to"), 2174 + description=desc) 2175 + location.add_argument('--source', type=os.fsencode, 2176 + help=_("Git repository to read from")) 2177 + location.add_argument('--target', type=os.fsencode, 2178 + help=_("Git repository to overwrite with filtered history")) 2179 + 2180 + order = parser.add_argument_group(title=_("Ordering of commits")) 2181 + order.add_argument('--date-order', action='store_true', 2182 + help=_("Processes commits in commit timestamp order.")) 2183 + 2184 + misc = parser.add_argument_group(title=_("Miscellaneous options")) 2185 + misc.add_argument('--help', '-h', action='store_true', 2186 + help=_("Show this help message and exit.")) 2187 + misc.add_argument('--version', action='store_true', 2188 + help=_("Display filter-repo's version and exit.")) 2189 + misc.add_argument('--proceed', action='store_true', 2190 + help=_("Avoid triggering the no-arguments-specified check.")) 2191 + misc.add_argument('--force', '-f', action='store_true', 2192 + help=_("Rewrite repository history even if the current repo does not " 2193 + "look like a fresh clone. History rewriting is irreversible " 2194 + "(and includes immediate pruning of reflogs and old objects), " 2195 + "so be cautious about using this flag.")) 2196 + misc.add_argument('--partial', action='store_true', 2197 + help=_("Do a partial history rewrite, resulting in the mixture of " 2198 + "old and new history. This disables rewriting " 2199 + "refs/remotes/origin/* to refs/heads/*, disables removing " 2200 + "of the 'origin' remote, disables removing unexported refs, " 2201 + "disables expiring the reflog, and disables the automatic " 2202 + "post-filter gc. Also, this modifies --tag-rename and " 2203 + "--refname-callback options such that instead of replacing " 2204 + "old refs with new refnames, it will instead create new " 2205 + "refs and keep the old ones around. Use with caution.")) 2206 + misc.add_argument('--no-gc', action='store_true', 2207 + help=_("Do not run 'git gc' after filtering.")) 2208 + # WARNING: --refs presents a problem with become-degenerate pruning: 2209 + # * Excluding a commit also excludes its ancestors so when some other 2210 + # commit has an excluded ancestor as a parent we have no way of 2211 + # knowing what it is an ancestor of without doing a special 2212 + # full-graph walk. 2213 + misc.add_argument('--refs', nargs='+', 2214 + help=_("Limit history rewriting to the specified refs. Implies " 2215 + "--partial. In addition to the normal caveats of --partial " 2216 + "(mixing old and new history, no automatic remapping of " 2217 + "refs/remotes/origin/* to refs/heads/*, etc.), this also may " 2218 + "cause problems for pruning of degenerate empty merge " 2219 + "commits when negative revisions are specified.")) 2220 + 2221 + misc.add_argument('--dry-run', action='store_true', 2222 + help=_("Do not change the repository. Run `git fast-export` and " 2223 + "filter its output, and save both the original and the " 2224 + "filtered version for comparison. This also disables " 2225 + "rewriting commit messages due to not knowing new commit " 2226 + "IDs and disables filtering of some empty commits due to " 2227 + "inability to query the fast-import backend." )) 2228 + misc.add_argument('--debug', action='store_true', 2229 + help=_("Print additional information about operations being " 2230 + "performed and commands being run. When used together " 2231 + "with --dry-run, also show extra information about what " 2232 + "would be run.")) 2233 + # WARNING: --state-branch has some problems: 2234 + # * It does not work well with manually inserted objects (user creating 2235 + # Blob() or Commit() or Tag() objects and calling 2236 + # RepoFilter.insert(obj) on them). 2237 + # * It does not work well with multiple source or multiple target repos 2238 + # * It doesn't work so well with pruning become-empty commits (though 2239 + # --refs doesn't work so well with it either) 2240 + # These are probably fixable, given some work (e.g. re-importing the 2241 + # graph at the beginning to get the AncestryGraph right, doing our own 2242 + # export of marks instead of using fast-export --export-marks, etc.), but 2243 + # for now just hide the option. 2244 + misc.add_argument('--state-branch', 2245 + #help=_("Enable incremental filtering by saving the mapping of old " 2246 + # "to new objects to the specified branch upon exit, and" 2247 + # "loading that mapping from that branch (if it exists) " 2248 + # "upon startup.")) 2249 + help=argparse.SUPPRESS) 2250 + misc.add_argument('--stdin', action='store_true', 2251 + help=_("Instead of running `git fast-export` and filtering its " 2252 + "output, filter the fast-export stream from stdin. The " 2253 + "stdin must be in the expected input format (e.g. it needs " 2254 + "to include original-oid directives).")) 2255 + misc.add_argument('--quiet', action='store_true', 2256 + help=_("Pass --quiet to other git commands called")) 2257 + return parser 2258 + 2259 + @staticmethod 2260 + def sanity_check_args(args): 2261 + if args.analyze and args.path_changes: 2262 + raise SystemExit(_("Error: --analyze is incompatible with --path* flags; " 2263 + "it's a read-only operation.")) 2264 + if args.analyze and args.stdin: 2265 + raise SystemExit(_("Error: --analyze is incompatible with --stdin.")) 2266 + # If no path_changes are found, initialize with empty list but mark as 2267 + # not inclusive so that all files match 2268 + if args.path_changes == None: 2269 + args.path_changes = [] 2270 + args.inclusive = False 2271 + else: 2272 + # Similarly, if we have no filtering paths, then no path should be 2273 + # filtered out. Based on how newname() works, the easiest way to 2274 + # achieve that is setting args.inclusive to False. 2275 + if not any(x[0] == 'filter' for x in args.path_changes): 2276 + args.inclusive = False 2277 + # Also check for incompatible --use-base-name and --path-rename flags. 2278 + if args.use_base_name: 2279 + if any(x[0] == 'rename' for x in args.path_changes): 2280 + raise SystemExit(_("Error: --use-base-name and --path-rename are " 2281 + "incompatible.")) 2282 + # Also throw some sanity checks on git version here; 2283 + # PERF: remove these checks once new enough git versions are common 2284 + p = subproc.Popen('git fast-export -h'.split(), 2285 + stdout=subprocess.PIPE, stderr=subprocess.STDOUT) 2286 + output = p.stdout.read() 2287 + if b'--anonymize-map' not in output: # pragma: no cover 2288 + global date_format_permissive 2289 + date_format_permissive = False 2290 + if not any(x in output for x in [b'--mark-tags',b'--[no-]mark-tags']): # pragma: no cover 2291 + global write_marks 2292 + write_marks = False 2293 + if args.state_branch: 2294 + # We need a version of git-fast-export with --mark-tags 2295 + raise SystemExit(_("Error: need git >= 2.24.0")) 2296 + if not any(x in output for x in [b'--reencode', b'--[no-]reencode']): # pragma: no cover 2297 + if args.preserve_commit_encoding: 2298 + # We need a version of git-fast-export with --reencode 2299 + raise SystemExit(_("Error: need git >= 2.23.0")) 2300 + else: 2301 + # Set args.preserve_commit_encoding to None which we'll check for later 2302 + # to avoid passing --reencode=yes to fast-export (that option was the 2303 + # default prior to git-2.23) 2304 + args.preserve_commit_encoding = None 2305 + # If we don't have fast-exoprt --reencode, we may also be missing 2306 + # diff-tree --combined-all-paths, which is even more important... 2307 + p = subproc.Popen('git diff-tree -h'.split(), 2308 + stdout=subprocess.PIPE, stderr=subprocess.STDOUT) 2309 + output = p.stdout.read() 2310 + if b'--combined-all-paths' not in output: 2311 + # We need a version of git-diff-tree with --combined-all-paths 2312 + raise SystemExit(_("Error: need git >= 2.22.0")) 2313 + if args.sensitive_data_removal: 2314 + p = subproc.Popen('git cat-file -h'.split(), 2315 + stdout=subprocess.PIPE, stderr=subprocess.STDOUT) 2316 + output = p.stdout.read() 2317 + if b"--batch-command" not in output: # pragma: no cover 2318 + raise SystemExit(_("Error: need git >= 2.36.0")) 2319 + # End of sanity checks on git version 2320 + if args.max_blob_size: 2321 + suffix = args.max_blob_size[-1] 2322 + if suffix not in '1234567890': 2323 + mult = {'K': 1024, 'M': 1024**2, 'G': 1024**3} 2324 + if suffix not in mult: 2325 + raise SystemExit(_("Error: could not parse --strip-blobs-bigger-than" 2326 + " argument %s") 2327 + % args.max_blob_size) 2328 + args.max_blob_size = int(args.max_blob_size[0:-1]) * mult[suffix] 2329 + else: 2330 + args.max_blob_size = int(args.max_blob_size) 2331 + if args.file_info_callback and ( 2332 + args.stdin or args.blob_callback or args.filename_callback): 2333 + raise SystemExit(_("Error: --file-info-callback is incompatible with " 2334 + "--stdin, --blob-callback,\nand --filename-callback.")) 2335 + 2336 + @staticmethod 2337 + def get_replace_text(filename): 2338 + replace_literals = [] 2339 + replace_regexes = [] 2340 + with open(filename, 'br') as f: 2341 + for line in f: 2342 + line = line.rstrip(b'\r\n') 2343 + 2344 + # Determine the replacement 2345 + replacement = FilteringOptions.default_replace_text 2346 + if b'==>' in line: 2347 + line, replacement = line.rsplit(b'==>', 1) 2348 + 2349 + # See if we need to match via regex 2350 + regex = None 2351 + if line.startswith(b'regex:'): 2352 + regex = line[6:] 2353 + elif line.startswith(b'glob:'): 2354 + regex = glob_to_regex(line[5:]) 2355 + if regex: 2356 + replace_regexes.append((re.compile(regex), replacement)) 2357 + else: 2358 + # Otherwise, find the literal we need to replace 2359 + if line.startswith(b'literal:'): 2360 + line = line[8:] 2361 + if not line: 2362 + continue 2363 + replace_literals.append((line, replacement)) 2364 + return {'literals': replace_literals, 'regexes': replace_regexes} 2365 + 2366 + @staticmethod 2367 + def get_paths_from_file(filename): 2368 + new_path_changes = [] 2369 + with open(filename, 'br') as f: 2370 + for line in f: 2371 + line = line.rstrip(b'\r\n') 2372 + 2373 + # Skip blank lines 2374 + if not line: 2375 + continue 2376 + # Skip comment lines 2377 + if line.startswith(b'#'): 2378 + continue 2379 + 2380 + # Determine the replacement 2381 + match_type, repl = 'literal', None 2382 + if b'==>' in line: 2383 + line, repl = line.rsplit(b'==>', 1) 2384 + 2385 + # See if we need to match via regex 2386 + match_type = 'match' # a.k.a. 'literal' 2387 + if line.startswith(b'regex:'): 2388 + match_type = 'regex' 2389 + match = re.compile(line[6:]) 2390 + elif line.startswith(b'glob:'): 2391 + match_type = 'glob' 2392 + match = line[5:] 2393 + if repl: 2394 + raise SystemExit(_("Error: In %s, 'glob:' and '==>' are incompatible (renaming globs makes no sense)" % decode(filename))) 2395 + else: 2396 + if line.startswith(b'literal:'): 2397 + match = line[8:] 2398 + else: 2399 + match = line 2400 + if repl is not None: 2401 + if match and repl and match.endswith(b'/') != repl.endswith(b'/'): 2402 + raise SystemExit(_("Error: When rename directories, if OLDNAME " 2403 + "and NEW_NAME are both non-empty and either " 2404 + "ends with a slash then both must.")) 2405 + 2406 + # Record the filter or rename 2407 + if repl is not None: 2408 + new_path_changes.append(['rename', match_type, (match, repl)]) 2409 + else: 2410 + new_path_changes.append(['filter', match_type, match]) 2411 + if match_type == 'glob' and not match.endswith(b'*'): 2412 + extension = b'*' if match.endswith(b'/') else b'/*' 2413 + new_path_changes.append(['filter', match_type, match+extension]) 2414 + return new_path_changes 2415 + 2416 + @staticmethod 2417 + def default_options(): 2418 + return FilteringOptions.parse_args([], error_on_empty = False) 2419 + 2420 + @staticmethod 2421 + def parse_args(input_args, error_on_empty = True): 2422 + parser = FilteringOptions.create_arg_parser() 2423 + if not input_args and error_on_empty: 2424 + parser.print_usage() 2425 + raise SystemExit(_("No arguments specified.")) 2426 + args = parser.parse_args(input_args) 2427 + if args.help: 2428 + parser.print_help() 2429 + raise SystemExit() 2430 + if args.paths: 2431 + raise SystemExit("Error: Option `--paths` unrecognized; did you mean --path or --paths-from-file?") 2432 + if args.version: 2433 + GitUtils.print_my_version() 2434 + raise SystemExit() 2435 + FilteringOptions.sanity_check_args(args) 2436 + if args.mailmap: 2437 + args.mailmap = MailmapInfo(args.mailmap) 2438 + if args.replace_text: 2439 + args.replace_text = FilteringOptions.get_replace_text(args.replace_text) 2440 + if args.replace_message: 2441 + args.replace_message = FilteringOptions.get_replace_text(args.replace_message) 2442 + if args.strip_blobs_with_ids: 2443 + with open(args.strip_blobs_with_ids, 'br') as f: 2444 + args.strip_blobs_with_ids = set(f.read().split()) 2445 + else: 2446 + args.strip_blobs_with_ids = set() 2447 + if (args.partial or args.refs) and not args.replace_refs: 2448 + args.replace_refs = 'update-no-add' 2449 + args.repack = not (args.partial or args.refs or args.no_gc) 2450 + if args.refs or args.source or args.target: 2451 + args.partial = True 2452 + if args.partial: 2453 + args.no_fetch = True 2454 + if not args.refs: 2455 + args.refs = ['--all'] 2456 + return args 2457 + 2458 + class RepoAnalyze(object): 2459 + 2460 + # First, several helper functions for analyze_commit() 2461 + 2462 + @staticmethod 2463 + def equiv_class(stats, filename): 2464 + return stats['equivalence'].get(filename, (filename,)) 2465 + 2466 + @staticmethod 2467 + def setup_equivalence_for_rename(stats, oldname, newname): 2468 + # if A is renamed to B and B is renamed to C, then the user thinks of 2469 + # A, B, and C as all being different names for the same 'file'. We record 2470 + # this as an equivalence class: 2471 + # stats['equivalence'][name] = (A,B,C) 2472 + # for name being each of A, B, and C. 2473 + old_tuple = stats['equivalence'].get(oldname, ()) 2474 + if newname in old_tuple: 2475 + return 2476 + elif old_tuple: 2477 + new_tuple = tuple(list(old_tuple)+[newname]) 2478 + else: 2479 + new_tuple = (oldname, newname) 2480 + for f in new_tuple: 2481 + stats['equivalence'][f] = new_tuple 2482 + 2483 + @staticmethod 2484 + def setup_or_update_rename_history(stats, commit, oldname, newname): 2485 + rename_commits = stats['rename_history'].get(oldname, set()) 2486 + rename_commits.add(commit) 2487 + stats['rename_history'][oldname] = rename_commits 2488 + 2489 + @staticmethod 2490 + def handle_renames(stats, commit, change_types, filenames): 2491 + for index, change_type in enumerate(change_types): 2492 + if change_type == ord(b'R'): 2493 + oldname, newname = filenames[index], filenames[-1] 2494 + RepoAnalyze.setup_equivalence_for_rename(stats, oldname, newname) 2495 + RepoAnalyze.setup_or_update_rename_history(stats, commit, 2496 + oldname, newname) 2497 + 2498 + @staticmethod 2499 + def handle_file(stats, graph, commit, modes, shas, filenames): 2500 + mode, sha, filename = modes[-1], shas[-1], filenames[-1] 2501 + 2502 + # Figure out kind of deletions to undo for this file, and update lists 2503 + # of all-names-by-sha and all-filenames 2504 + delmode = 'tree_deletions' 2505 + if mode != b'040000': 2506 + delmode = 'file_deletions' 2507 + stats['names'][sha].add(filename) 2508 + stats['allnames'].add(filename) 2509 + 2510 + # If the file (or equivalence class of files) was recorded as deleted, 2511 + # clearly it isn't anymore 2512 + equiv = RepoAnalyze.equiv_class(stats, filename) 2513 + for f in equiv: 2514 + stats[delmode].pop(f, None) 2515 + 2516 + # If we get a modify/add for a path that was renamed, we may need to break 2517 + # the equivalence class. However, if the modify/add was on a branch that 2518 + # doesn't have the rename in its history, we are still okay. 2519 + need_to_break_equivalence = False 2520 + if equiv[-1] != filename: 2521 + for rename_commit in stats['rename_history'][filename]: 2522 + if graph.is_ancestor(rename_commit, commit): 2523 + need_to_break_equivalence = True 2524 + 2525 + if need_to_break_equivalence: 2526 + for f in equiv: 2527 + if f in stats['equivalence']: 2528 + del stats['equivalence'][f] 2529 + 2530 + @staticmethod 2531 + def analyze_commit(stats, graph, commit, parents, date, file_changes): 2532 + graph.add_commit_and_parents(commit, parents) 2533 + for change in file_changes: 2534 + modes, shas, change_types, filenames = change 2535 + if len(parents) == 1 and change_types.startswith(b'R'): 2536 + change_types = b'R' # remove the rename score; we don't care 2537 + if modes[-1] == b'160000': 2538 + continue 2539 + elif modes[-1] == b'000000': 2540 + # Track when files/directories are deleted 2541 + for f in RepoAnalyze.equiv_class(stats, filenames[-1]): 2542 + if any(x == b'040000' for x in modes[0:-1]): 2543 + stats['tree_deletions'][f] = date 2544 + else: 2545 + stats['file_deletions'][f] = date 2546 + elif change_types.strip(b'AMT') == b'': 2547 + RepoAnalyze.handle_file(stats, graph, commit, modes, shas, filenames) 2548 + elif modes[-1] == b'040000' and change_types.strip(b'RAM') == b'': 2549 + RepoAnalyze.handle_file(stats, graph, commit, modes, shas, filenames) 2550 + elif change_types.strip(b'RAMT') == b'': 2551 + RepoAnalyze.handle_file(stats, graph, commit, modes, shas, filenames) 2552 + RepoAnalyze.handle_renames(stats, commit, change_types, filenames) 2553 + else: 2554 + raise SystemExit(_("Unhandled change type(s): %(change_type)s " 2555 + "(in commit %(commit)s)") 2556 + % ({'change_type': change_types, 'commit': commit}) 2557 + ) # pragma: no cover 2558 + 2559 + @staticmethod 2560 + def gather_data(args): 2561 + unpacked_size, packed_size = GitUtils.get_blob_sizes() 2562 + stats = {'names': collections.defaultdict(set), 2563 + 'allnames' : set(), 2564 + 'file_deletions': {}, 2565 + 'tree_deletions': {}, 2566 + 'equivalence': {}, 2567 + 'rename_history': collections.defaultdict(set), 2568 + 'unpacked_size': unpacked_size, 2569 + 'packed_size': packed_size, 2570 + 'num_commits': 0} 2571 + 2572 + # Setup the rev-list/diff-tree process 2573 + processed_commits_msg = _("Processed %d commits") 2574 + commit_parse_progress = ProgressWriter() 2575 + num_commits = 0 2576 + cmd = ('git rev-list --topo-order --reverse {}'.format(' '.join(args.refs)) + 2577 + ' | git diff-tree --stdin --always --root --format=%H%n%P%n%cd' + 2578 + ' --date=short -M -t -c --raw --combined-all-paths') 2579 + dtp = subproc.Popen(cmd, shell=True, bufsize=-1, stdout=subprocess.PIPE) 2580 + f = dtp.stdout 2581 + line = f.readline() 2582 + if not line: 2583 + raise SystemExit(_("Nothing to analyze; repository is empty.")) 2584 + cont = bool(line) 2585 + graph = AncestryGraph() 2586 + while cont: 2587 + commit = line.rstrip() 2588 + parents = f.readline().split() 2589 + date = f.readline().rstrip() 2590 + 2591 + # We expect a blank line next; if we get a non-blank line then 2592 + # this commit modified no files and we need to move on to the next. 2593 + # If there is no line, we've reached end-of-input. 2594 + line = f.readline() 2595 + if not line: 2596 + cont = False 2597 + line = line.rstrip() 2598 + 2599 + # If we haven't reached end of input, and we got a blank line meaning 2600 + # a commit that has modified files, then get the file changes associated 2601 + # with this commit. 2602 + file_changes = [] 2603 + if cont and not line: 2604 + cont = False 2605 + for line in f: 2606 + if not line.startswith(b':'): 2607 + cont = True 2608 + break 2609 + n = 1+max(1, len(parents)) 2610 + assert line.startswith(b':'*(n-1)) 2611 + relevant = line[n-1:-1] 2612 + splits = relevant.split(None, n) 2613 + modes = splits[0:n] 2614 + splits = splits[n].split(None, n) 2615 + shas = splits[0:n] 2616 + splits = splits[n].split(b'\t') 2617 + change_types = splits[0] 2618 + filenames = [PathQuoting.dequote(x) for x in splits[1:]] 2619 + file_changes.append([modes, shas, change_types, filenames]) 2620 + 2621 + # If someone is trying to analyze a subset of the history, make sure 2622 + # to avoid dying on commits with parents that we haven't seen before 2623 + if args.refs: 2624 + graph.record_external_commits([p for p in parents 2625 + if not p in graph.value]) 2626 + 2627 + # Analyze this commit and update progress 2628 + RepoAnalyze.analyze_commit(stats, graph, commit, parents, date, 2629 + file_changes) 2630 + num_commits += 1 2631 + commit_parse_progress.show(processed_commits_msg % num_commits) 2632 + 2633 + # Show the final commits processed message and record the number of commits 2634 + commit_parse_progress.finish() 2635 + stats['num_commits'] = num_commits 2636 + 2637 + # Close the output, ensure rev-list|diff-tree pipeline completed successfully 2638 + dtp.stdout.close() 2639 + if dtp.wait(): 2640 + raise SystemExit(_("Error: rev-list|diff-tree pipeline failed; see above.")) # pragma: no cover 2641 + 2642 + return stats 2643 + 2644 + @staticmethod 2645 + def write_report(reportdir, stats): 2646 + def datestr(datetimestr): 2647 + return datetimestr if datetimestr else _('<present>').encode() 2648 + 2649 + def dirnames(path): 2650 + while True: 2651 + path = os.path.dirname(path) 2652 + yield path 2653 + if path == b'': 2654 + break 2655 + 2656 + # Compute aggregate size information for paths, extensions, and dirs 2657 + total_size = {'packed': 0, 'unpacked': 0} 2658 + path_size = {'packed': collections.defaultdict(int), 2659 + 'unpacked': collections.defaultdict(int)} 2660 + ext_size = {'packed': collections.defaultdict(int), 2661 + 'unpacked': collections.defaultdict(int)} 2662 + dir_size = {'packed': collections.defaultdict(int), 2663 + 'unpacked': collections.defaultdict(int)} 2664 + for sha in stats['names']: 2665 + size = {'packed': stats['packed_size'][sha], 2666 + 'unpacked': stats['unpacked_size'][sha]} 2667 + for which in ('packed', 'unpacked'): 2668 + for name in stats['names'][sha]: 2669 + total_size[which] += size[which] 2670 + path_size[which][name] += size[which] 2671 + basename, ext = os.path.splitext(name) 2672 + ext_size[which][ext] += size[which] 2673 + for dirname in dirnames(name): 2674 + dir_size[which][dirname] += size[which] 2675 + 2676 + # Determine if and when extensions and directories were deleted 2677 + ext_deleted_data = {} 2678 + for name in stats['allnames']: 2679 + when = stats['file_deletions'].get(name, None) 2680 + 2681 + # Update the extension 2682 + basename, ext = os.path.splitext(name) 2683 + if when is None: 2684 + ext_deleted_data[ext] = None 2685 + elif ext in ext_deleted_data: 2686 + if ext_deleted_data[ext] is not None: 2687 + ext_deleted_data[ext] = max(ext_deleted_data[ext], when) 2688 + else: 2689 + ext_deleted_data[ext] = when 2690 + 2691 + dir_deleted_data = {} 2692 + for name in dir_size['packed']: 2693 + dir_deleted_data[name] = stats['tree_deletions'].get(name, None) 2694 + 2695 + with open(os.path.join(reportdir, b"README"), 'bw') as f: 2696 + # Give a basic overview of this file 2697 + f.write(b"== %s ==\n" % _("Overall Statistics").encode()) 2698 + f.write((" %s: %d\n" % (_("Number of commits"), 2699 + stats['num_commits'])).encode()) 2700 + f.write((" %s: %d\n" % (_("Number of filenames"), 2701 + len(path_size['packed']))).encode()) 2702 + f.write((" %s: %d\n" % (_("Number of directories"), 2703 + len(dir_size['packed']))).encode()) 2704 + f.write((" %s: %d\n" % (_("Number of file extensions"), 2705 + len(ext_size['packed']))).encode()) 2706 + f.write(b"\n") 2707 + f.write((" %s: %d\n" % (_("Total unpacked size (bytes)"), 2708 + total_size['unpacked'])).encode()) 2709 + f.write((" %s: %d\n" % (_("Total packed size (bytes)"), 2710 + total_size['packed'])).encode()) 2711 + f.write(b"\n") 2712 + 2713 + # Mention issues with the report 2714 + f.write(("== %s ==\n" % _("Caveats")).encode()) 2715 + f.write(("=== %s ===\n" % _("Sizes")).encode()) 2716 + f.write(textwrap.dedent(_(""" 2717 + Packed size represents what size your repository would be if no 2718 + trees, commits, tags, or other metadata were included (though it may 2719 + fail to represent de-duplication; see below). It also represents the 2720 + current packing, which may be suboptimal if you haven't gc'ed for a 2721 + while. 2722 + 2723 + Unpacked size represents what size your repository would be if no 2724 + trees, commits, tags, or other metadata were included AND if no 2725 + files were packed; i.e., without delta-ing or compression. 2726 + 2727 + Both unpacked and packed sizes can be slightly misleading. Deleting 2728 + a blob from history not save as much space as the unpacked size, 2729 + because it is obviously normally stored in packed form. Also, 2730 + deleting a blob from history may not save as much space as its packed 2731 + size either, because another blob could be stored as a delta against 2732 + that blob, so when you remove one blob another blob's packed size may 2733 + grow. 2734 + 2735 + Also, the sum of the packed sizes can add up to more than the 2736 + repository size; if the same contents appeared in the repository in 2737 + multiple places, git will automatically de-dupe and store only one 2738 + copy, while the way sizes are added in this analysis adds the size 2739 + for each file path that has those contents. Further, if a file is 2740 + ever reverted to a previous version's contents, the previous 2741 + version's size will be counted multiple times in this analysis, even 2742 + though git will only store it once. 2743 + """)[1:]).encode()) 2744 + f.write(b"\n") 2745 + f.write(("=== %s ===\n" % _("Deletions")).encode()) 2746 + f.write(textwrap.dedent(_(""" 2747 + Whether a file is deleted is not a binary quality, since it can be 2748 + deleted on some branches but still exist in others. Also, it might 2749 + exist in an old tag, but have been deleted in versions newer than 2750 + that. More thorough tracking could be done, including looking at 2751 + merge commits where one side of history deleted and the other modified, 2752 + in order to give a more holistic picture of deletions. However, that 2753 + algorithm would not only be more complex to implement, it'd also be 2754 + quite difficult to present and interpret by users. Since --analyze 2755 + is just about getting a high-level rough picture of history, it instead 2756 + implements the simplistic rule that is good enough for 98% of cases: 2757 + A file is marked as deleted if the last commit in the fast-export 2758 + stream that mentions the file lists it as deleted. 2759 + This makes it dependent on topological ordering, but generally gives 2760 + the "right" answer. 2761 + """)[1:]).encode()) 2762 + f.write(b"\n") 2763 + f.write(("=== %s ===\n" % _("Renames")).encode()) 2764 + f.write(textwrap.dedent(_(""" 2765 + Renames share the same non-binary nature that deletions do, plus 2766 + additional challenges: 2767 + * If the renamed file is renamed again, instead of just two names for 2768 + a path you can have three or more. 2769 + * Rename pairs of the form (oldname, newname) that we consider to be 2770 + different names of the "same file" might only be valid over certain 2771 + commit ranges. For example, if a new commit reintroduces a file 2772 + named oldname, then new versions of oldname aren't the "same file" 2773 + anymore. We could try to portray this to the user, but it's easier 2774 + for the user to just break the pairing and only report unbroken 2775 + rename pairings to the user. 2776 + * The ability for users to rename files differently in different 2777 + branches means that our chains of renames will not necessarily be 2778 + linear but may branch out. 2779 + """)[1:]).encode()) 2780 + f.write(b"\n") 2781 + 2782 + # Equivalence classes for names, so if folks only want to keep a 2783 + # certain set of paths, they know the old names they want to include 2784 + # too. 2785 + with open(os.path.join(reportdir, b"renames.txt"), 'bw') as f: 2786 + seen = set() 2787 + for pathname,equiv_group in sorted(stats['equivalence'].items(), 2788 + key=lambda x:(x[1], x[0])): 2789 + if equiv_group in seen: 2790 + continue 2791 + seen.add(equiv_group) 2792 + f.write(("{} ->\n ".format(decode(equiv_group[0])) + 2793 + "\n ".join(decode(x) for x in equiv_group[1:]) + 2794 + "\n").encode()) 2795 + 2796 + # List directories in reverse sorted order of unpacked size 2797 + with open(os.path.join(reportdir, b"directories-deleted-sizes.txt"), 'bw') as f: 2798 + msg = "=== %s ===\n" % _("Deleted directories by reverse size") 2799 + f.write(msg.encode()) 2800 + msg = _("Format: unpacked size, packed size, date deleted, directory name\n") 2801 + f.write(msg.encode()) 2802 + for dirname, size in sorted(dir_size['packed'].items(), 2803 + key=lambda x:(x[1],x[0]), reverse=True): 2804 + if (dir_deleted_data[dirname]): 2805 + f.write(b" %10d %10d %-10s %s\n" % (dir_size['unpacked'][dirname], 2806 + size, 2807 + datestr(dir_deleted_data[dirname]), 2808 + dirname or _('<toplevel>').encode())) 2809 + 2810 + with open(os.path.join(reportdir, b"directories-all-sizes.txt"), 'bw') as f: 2811 + f.write(("=== %s ===\n" % _("All directories by reverse size")).encode()) 2812 + msg = _("Format: unpacked size, packed size, date deleted, directory name\n") 2813 + f.write(msg.encode()) 2814 + for dirname, size in sorted(dir_size['packed'].items(), 2815 + key=lambda x:(x[1],x[0]), reverse=True): 2816 + f.write(b" %10d %10d %-10s %s\n" % (dir_size['unpacked'][dirname], 2817 + size, 2818 + datestr(dir_deleted_data[dirname]), 2819 + dirname or _("<toplevel>").encode())) 2820 + 2821 + # List extensions in reverse sorted order of unpacked size 2822 + with open(os.path.join(reportdir, b"extensions-deleted-sizes.txt"), 'bw') as f: 2823 + msg = "=== %s ===\n" % _("Deleted extensions by reverse size") 2824 + f.write(msg.encode()) 2825 + msg = _("Format: unpacked size, packed size, date deleted, extension name\n") 2826 + f.write(msg.encode()) 2827 + for extname, size in sorted(ext_size['packed'].items(), 2828 + key=lambda x:(x[1],x[0]), reverse=True): 2829 + if (ext_deleted_data[extname]): 2830 + f.write(b" %10d %10d %-10s %s\n" % (ext_size['unpacked'][extname], 2831 + size, 2832 + datestr(ext_deleted_data[extname]), 2833 + extname or _('<no extension>').encode())) 2834 + 2835 + with open(os.path.join(reportdir, b"extensions-all-sizes.txt"), 'bw') as f: 2836 + f.write(("=== %s ===\n" % _("All extensions by reverse size")).encode()) 2837 + msg = _("Format: unpacked size, packed size, date deleted, extension name\n") 2838 + f.write(msg.encode()) 2839 + for extname, size in sorted(ext_size['packed'].items(), 2840 + key=lambda x:(x[1],x[0]), reverse=True): 2841 + f.write(b" %10d %10d %-10s %s\n" % (ext_size['unpacked'][extname], 2842 + size, 2843 + datestr(ext_deleted_data[extname]), 2844 + extname or _('<no extension>').encode())) 2845 + 2846 + # List files in reverse sorted order of unpacked size 2847 + with open(os.path.join(reportdir, b"path-deleted-sizes.txt"), 'bw') as f: 2848 + msg = "=== %s ===\n" % _("Deleted paths by reverse accumulated size") 2849 + f.write(msg.encode()) 2850 + msg = _("Format: unpacked size, packed size, date deleted, path name(s)\n") 2851 + f.write(msg.encode()) 2852 + for pathname, size in sorted(path_size['packed'].items(), 2853 + key=lambda x:(x[1],x[0]), reverse=True): 2854 + when = stats['file_deletions'].get(pathname, None) 2855 + if when: 2856 + f.write(b" %10d %10d %-10s %s\n" % (path_size['unpacked'][pathname], 2857 + size, 2858 + datestr(when), 2859 + pathname)) 2860 + 2861 + with open(os.path.join(reportdir, b"path-all-sizes.txt"), 'bw') as f: 2862 + msg = "=== %s ===\n" % _("All paths by reverse accumulated size") 2863 + f.write(msg.encode()) 2864 + msg = _("Format: unpacked size, packed size, date deleted, path name\n") 2865 + f.write(msg.encode()) 2866 + for pathname, size in sorted(path_size['packed'].items(), 2867 + key=lambda x:(x[1],x[0]), reverse=True): 2868 + when = stats['file_deletions'].get(pathname, None) 2869 + f.write(b" %10d %10d %-10s %s\n" % (path_size['unpacked'][pathname], 2870 + size, 2871 + datestr(when), 2872 + pathname)) 2873 + 2874 + # List of filenames and sizes in descending order 2875 + with open(os.path.join(reportdir, b"blob-shas-and-paths.txt"), 'bw') as f: 2876 + f.write(("=== %s ===\n" % _("Files by sha and associated pathnames in reverse size")).encode()) 2877 + f.write(_("Format: sha, unpacked size, packed size, filename(s) object stored as\n").encode()) 2878 + for sha, size in sorted(stats['packed_size'].items(), 2879 + key=lambda x:(x[1],x[0]), reverse=True): 2880 + if sha not in stats['names']: 2881 + # Some objects in the repository might not be referenced, or not 2882 + # referenced by the branches/tags the user cares about; skip them. 2883 + continue 2884 + names_with_sha = stats['names'][sha] 2885 + if len(names_with_sha) == 1: 2886 + names_with_sha = names_with_sha.pop() 2887 + else: 2888 + names_with_sha = b'[' + b', '.join(sorted(names_with_sha)) + b']' 2889 + f.write(b" %s %10d %10d %s\n" % (sha, 2890 + stats['unpacked_size'][sha], 2891 + size, 2892 + names_with_sha)) 2893 + 2894 + @staticmethod 2895 + def run(args): 2896 + if args.report_dir: 2897 + reportdir = args.report_dir 2898 + else: 2899 + git_dir = GitUtils.determine_git_dir(b'.') 2900 + 2901 + # Create the report directory as necessary 2902 + results_tmp_dir = os.path.join(git_dir, b'filter-repo') 2903 + if not os.path.isdir(results_tmp_dir): 2904 + os.mkdir(results_tmp_dir) 2905 + reportdir = os.path.join(results_tmp_dir, b"analysis") 2906 + 2907 + if os.path.isdir(reportdir): 2908 + if args.force: 2909 + sys.stdout.write(_("Warning: Removing recursively: \"%s\"\n") % decode(reportdir)) 2910 + shutil.rmtree(reportdir) 2911 + else: 2912 + sys.stdout.write(_("Error: dir already exists (use --force to delete): \"%s\"\n") % decode(reportdir)) 2913 + sys.exit(1) 2914 + 2915 + os.mkdir(reportdir) 2916 + 2917 + # Gather the data we need 2918 + stats = RepoAnalyze.gather_data(args) 2919 + 2920 + # Write the reports 2921 + sys.stdout.write(_("Writing reports to \"%s\"...") % decode(reportdir)) 2922 + sys.stdout.flush() 2923 + RepoAnalyze.write_report(reportdir, stats) 2924 + sys.stdout.write(_("done.\n")) 2925 + sys.stdout.write(_("README: \"%s\"\n") % decode( os.path.join(reportdir, b"README") )) 2926 + 2927 + class FileInfoValueHelper: 2928 + def __init__(self, replace_text, insert_blob_func, source_working_dir): 2929 + self.data = {} 2930 + self._replace_text = replace_text 2931 + self._insert_blob_func = insert_blob_func 2932 + cmd = ['git', 'cat-file', '--batch-command'] 2933 + self._cat_file_process = subproc.Popen(cmd, 2934 + stdin = subprocess.PIPE, 2935 + stdout = subprocess.PIPE, 2936 + cwd = source_working_dir) 2937 + 2938 + def finalize(self): 2939 + self._cat_file_process.stdin.close() 2940 + self._cat_file_process.wait() 2941 + 2942 + def get_contents_by_identifier(self, blobhash): 2943 + self._cat_file_process.stdin.write(b'contents '+blobhash+b'\n') 2944 + self._cat_file_process.stdin.flush() 2945 + line = self._cat_file_process.stdout.readline() 2946 + try: 2947 + (oid, oidtype, size) = line.split() 2948 + except ValueError: 2949 + assert(line == blobhash+b" missing\n") 2950 + return None 2951 + size = int(size) # Convert e.g. b'6283' to 6283 2952 + assert(oidtype == b'blob') 2953 + contents_plus_newline = self._cat_file_process.stdout.read(size+1) 2954 + return contents_plus_newline[:-1] # return all but the newline 2955 + 2956 + def get_size_by_identifier(self, blobhash): 2957 + self._cat_file_process.stdin.write(b'info '+blobhash+b'\n') 2958 + self._cat_file_process.stdin.flush() 2959 + line = self._cat_file_process.stdout.readline() 2960 + (oid, oidtype, size) = line.split() 2961 + size = int(size) # Convert e.g. b'6283' to 6283 2962 + assert(oidtype == b'blob') 2963 + return size 2964 + 2965 + def insert_file_with_contents(self, contents): 2966 + blob = Blob(contents) 2967 + self._insert_blob_func(blob) 2968 + return blob.id 2969 + 2970 + def is_binary(self, contents): 2971 + return b"\0" in contents[0:8192] 2972 + 2973 + def apply_replace_text(self, contents): 2974 + new_contents = contents 2975 + for literal, replacement in self._replace_text['literals']: 2976 + new_contents = new_contents.replace(literal, replacement) 2977 + for regex, replacement in self._replace_text['regexes']: 2978 + new_contents = regex.sub(replacement, new_contents) 2979 + return new_contents 2980 + 2981 + class LFSObjectTracker: 2982 + class LFSObjs: 2983 + def __init__(self): 2984 + self.id_to_object_map = {} 2985 + self.objects = set() 2986 + 2987 + def __init__(self, file_info, check_sources, check_targets): 2988 + self.source_objects = LFSObjectTracker.LFSObjs() 2989 + self.target_objects = LFSObjectTracker.LFSObjs() 2990 + self.hash_to_object_map = {} 2991 + self.file_info = file_info 2992 + self.check_sources = check_sources 2993 + self.check_targets = check_targets 2994 + self.objects_orphaned = False 2995 + 2996 + def _get_lfs_values(self, contents): 2997 + values = {} 2998 + if len(contents) > 1024: 2999 + return {} 3000 + for line in contents.splitlines(): 3001 + try: 3002 + (key, value) = line.split(b' ', 1) 3003 + except ValueError: 3004 + return {} 3005 + if not values and key != b'version': 3006 + return values 3007 + values[key] = value 3008 + return values 3009 + 3010 + def check_blob_data(self, contents, fast_export_id, source): 3011 + if source and not self.check_sources: 3012 + return 3013 + mymap = self.source_objects if source else self.target_objects 3014 + lfs_object_id = self._get_lfs_values(contents).get(b'oid') 3015 + if lfs_object_id: 3016 + mymap.id_to_object_map[fast_export_id] = lfs_object_id 3017 + 3018 + def check_file_change_data(self, git_id, source): 3019 + if source and not self.check_sources: 3020 + return 3021 + mymap = self.source_objects if source else self.target_objects 3022 + if isinstance(git_id, int): 3023 + lfs_object_id = mymap.id_to_object_map.get(git_id) 3024 + if lfs_object_id: 3025 + mymap.objects.add(lfs_object_id) 3026 + else: 3027 + if git_id in self.hash_to_object_map: 3028 + mymap.objects.add(self.hash_to_object_map[git_id]) 3029 + return 3030 + size = self.file_info.get_size_by_identifier(git_id) 3031 + if size >= 1024: 3032 + return 3033 + contents = self.file_info.get_contents_by_identifier(git_id) 3034 + lfs_object_id = self._get_lfs_values(contents).get(b'oid') 3035 + if lfs_object_id: 3036 + self.hash_to_object_map[git_id] = lfs_object_id 3037 + mymap.objects.add(lfs_object_id) 3038 + 3039 + def check_output_object(self, obj): 3040 + if not self.check_targets: 3041 + return 3042 + if type(obj) == Blob: 3043 + self.check_blob_data(obj.data, obj.id, False) 3044 + elif type(obj) == Commit: 3045 + for change in obj.file_changes: 3046 + sys.stdout.flush() 3047 + if change.type != b'M' or change.mode == b'160000': 3048 + continue 3049 + self.check_file_change_data(change.blob_id, False) 3050 + 3051 + def find_all_lfs_objects_in_repo(self, repo, source): 3052 + if not source: 3053 + self.file_info = FileInfoValueHelper(None, None, repo) 3054 + p = subproc.Popen(["git", "rev-list", "--objects", "--all"], 3055 + stdout=subprocess.PIPE, stderr=subprocess.PIPE, 3056 + cwd=repo) 3057 + for line in p.stdout.readlines(): 3058 + try: 3059 + (git_oid, filename) = line.split() 3060 + except ValueError: 3061 + # Commit and tree objects only have oid 3062 + continue 3063 + 3064 + mymap = self.source_objects if source else self.target_objects 3065 + size = self.file_info.get_size_by_identifier(git_oid) 3066 + if size >= 1024: 3067 + continue 3068 + contents = self.file_info.get_contents_by_identifier(git_oid) 3069 + lfs_object_id = self._get_lfs_values(contents).get(b'oid') 3070 + if lfs_object_id: 3071 + mymap.objects.add(lfs_object_id) 3072 + if not source: 3073 + self.file_info.finalize() 3074 + 3075 + class InputFileBackup: 3076 + def __init__(self, input_file, output_file): 3077 + self.input_file = input_file 3078 + self.output_file = output_file 3079 + 3080 + def close(self): 3081 + self.input_file.close() 3082 + self.output_file.close() 3083 + 3084 + def read(self, size): 3085 + output = self.input_file.read(size) 3086 + self.output_file.write(output) 3087 + return output 3088 + 3089 + def readline(self): 3090 + line = self.input_file.readline() 3091 + self.output_file.write(line) 3092 + return line 3093 + 3094 + class DualFileWriter: 3095 + def __init__(self, file1, file2): 3096 + self.file1 = file1 3097 + self.file2 = file2 3098 + 3099 + def write(self, *args): 3100 + self.file1.write(*args) 3101 + self.file2.write(*args) 3102 + 3103 + def flush(self): 3104 + self.file1.flush() 3105 + self.file2.flush() 3106 + 3107 + def close(self): 3108 + self.file1.close() 3109 + self.file2.close() 3110 + 3111 + class RepoFilter(object): 3112 + def __init__(self, 3113 + args, 3114 + filename_callback = None, 3115 + message_callback = None, 3116 + name_callback = None, 3117 + email_callback = None, 3118 + refname_callback = None, 3119 + blob_callback = None, 3120 + commit_callback = None, 3121 + tag_callback = None, 3122 + reset_callback = None, 3123 + done_callback = None, 3124 + file_info_callback = None): 3125 + 3126 + self._args = args 3127 + 3128 + # Repo we are exporting 3129 + self._repo_working_dir = None 3130 + 3131 + # Store callbacks for acting on objects printed by FastExport 3132 + self._blob_callback = blob_callback 3133 + self._commit_callback = commit_callback 3134 + self._tag_callback = tag_callback 3135 + self._reset_callback = reset_callback 3136 + self._done_callback = done_callback 3137 + 3138 + # Store callbacks for acting on slices of FastExport objects 3139 + self._filename_callback = filename_callback # filenames from commits 3140 + self._message_callback = message_callback # commit OR tag message 3141 + self._name_callback = name_callback # author, committer, tagger 3142 + self._email_callback = email_callback # author, committer, tagger 3143 + self._refname_callback = refname_callback # from commit/tag/reset 3144 + self._file_info_callback = file_info_callback # various file info 3145 + self._handle_arg_callbacks() 3146 + 3147 + # Helpers for callbacks 3148 + self._file_info_value = None 3149 + 3150 + # Defaults for input 3151 + self._input = None 3152 + self._fep = None # Fast Export Process 3153 + self._fe_orig = None # Path to where original fast-export output stored 3154 + self._fe_filt = None # Path to where filtered fast-export output stored 3155 + self._parser = None # FastExportParser object we are working with 3156 + 3157 + # Defaults for output 3158 + self._output = None 3159 + self._fip = None # Fast Import Process 3160 + self._import_pipes = None 3161 + self._managed_output = True 3162 + 3163 + # A tuple of (depth, list-of-ancestors). Commits and ancestors are 3164 + # identified by their id (their 'mark' in fast-export or fast-import 3165 + # speak). The depth of a commit is one more than the max depth of any 3166 + # of its ancestors. 3167 + self._graph = AncestryGraph() 3168 + # Another one, for ancestry of commits in the original repo 3169 + self._orig_graph = AncestryGraph() 3170 + 3171 + # Names of files that were tweaked in any commit; such paths could lead 3172 + # to subsequent commits being empty 3173 + self._files_tweaked = set() 3174 + 3175 + # A set of commit hash pairs (oldhash, newhash) which used to be merge 3176 + # commits but due to filtering were turned into non-merge commits. 3177 + # The commits probably have suboptimal commit messages (e.g. "Merge branch 3178 + # next into master"). 3179 + self._commits_no_longer_merges = [] 3180 + 3181 + # A dict of original_ids to new_ids; filtering commits means getting 3182 + # new commit hash (sha1sums), and we record the mapping both for 3183 + # diagnostic purposes and so we can rewrite commit messages. Note that 3184 + # the new_id can be None rather than a commit hash if the original 3185 + # commit became empty and was pruned or was otherwise dropped. 3186 + self._commit_renames = {} 3187 + 3188 + # A set of original_ids (i.e. original hashes) for which we have not yet 3189 + # gotten the new hashses; the value is always the corresponding fast-export 3190 + # id (i.e. commit.id) 3191 + self._pending_renames = collections.OrderedDict() 3192 + 3193 + # A dict of commit_hash[0:7] -> set(commit_hashes with that prefix). 3194 + # 3195 + # It's common for commit messages to refer to commits by abbreviated 3196 + # commit hashes, as short as 7 characters. To facilitate translating 3197 + # such short hashes, we have a mapping of prefixes to full old hashes. 3198 + self._commit_short_old_hashes = collections.defaultdict(set) 3199 + 3200 + # A set of commit hash references appearing in commit messages which 3201 + # mapped to a valid commit that was removed entirely in the filtering 3202 + # process. The commit message will continue to reference the 3203 + # now-missing commit hash, since there was nothing to map it to. 3204 + self._commits_referenced_but_removed = set() 3205 + 3206 + # Other vars related to metadata tracking 3207 + self._already_ran = False 3208 + self._changed_refs = set() 3209 + self._lfs_object_tracker = None 3210 + 3211 + # Progress handling (number of commits parsed, etc.) 3212 + self._progress_writer = ProgressWriter() 3213 + self._num_commits = 0 3214 + 3215 + # Size of blobs in the repo 3216 + self._unpacked_size = {} 3217 + 3218 + # Other vars 3219 + self._sanity_checks_handled = False 3220 + self._finalize_handled = False 3221 + self._orig_refs = None 3222 + self._config_settings = {} 3223 + self._newnames = {} 3224 + self._stash = None 3225 + 3226 + # Cache a few message translations for performance reasons 3227 + self._parsed_message = _("Parsed %d commits") 3228 + 3229 + # Compile some regexes and cache those 3230 + self._hash_re = re.compile(br'(\b[0-9a-f]{7,40}\b)') 3231 + 3232 + def _handle_arg_callbacks(self): 3233 + def make_callback(args, bdy): 3234 + callback_globals = {g: globals()[g] for g in public_globals} 3235 + callback_locals = {} 3236 + if type(args) == str: 3237 + args = (args, '_do_not_use_this_var = None') 3238 + exec('def callback({}):\n'.format(', '.join(args))+ 3239 + ' '+'\n '.join(bdy.splitlines()), callback_globals, callback_locals) 3240 + return callback_locals['callback'] 3241 + def handle(which, args=None): 3242 + which_under = which.replace('-','_') 3243 + if not args: 3244 + args = which 3245 + callback_field = '_{}_callback'.format(which_under) 3246 + code_string = getattr(self._args, which_under+'_callback') 3247 + if code_string: 3248 + if os.path.exists(code_string): 3249 + with open(code_string, 'r', encoding='utf-8') as f: 3250 + code_string = f.read() 3251 + if getattr(self, callback_field): 3252 + raise SystemExit(_("Error: Cannot pass a %s_callback to RepoFilter " 3253 + "AND pass --%s-callback" 3254 + % (which_under, which))) 3255 + if 'return ' not in code_string and \ 3256 + which not in ('blob', 'commit', 'tag', 'reset'): 3257 + raise SystemExit(_("Error: --%s-callback should have a return statement") 3258 + % which) 3259 + setattr(self, callback_field, make_callback(args, code_string)) 3260 + handle('filename') 3261 + handle('message') 3262 + handle('name') 3263 + handle('email') 3264 + handle('refname') 3265 + handle('blob') 3266 + handle('commit') 3267 + handle('tag') 3268 + handle('reset') 3269 + handle('file-info', ('filename', 'mode', 'blob_id', 'value')) 3270 + 3271 + def _run_sanity_checks(self): 3272 + self._sanity_checks_handled = True 3273 + if not self._managed_output: 3274 + if not self._args.replace_refs: 3275 + # If not _managed_output we don't want to make extra changes to the 3276 + # repo, so set default to no-op 'update-no-add' 3277 + self._args.replace_refs = 'update-no-add' 3278 + return 3279 + 3280 + if self._args.debug: 3281 + print("[DEBUG] Passed arguments:\n{}".format(self._args)) 3282 + 3283 + # Determine basic repository information 3284 + target_working_dir = self._args.target or b'.' 3285 + self._orig_refs = GitUtils.get_refs(target_working_dir) 3286 + is_bare = GitUtils.is_repository_bare(target_working_dir) 3287 + self._config_settings = GitUtils.get_config_settings(target_working_dir) 3288 + 3289 + # Determine if this is second or later run of filter-repo 3290 + tmp_dir = self.results_tmp_dir(create_if_missing=False) 3291 + ran_path = os.path.join(tmp_dir, b'already_ran') 3292 + self._already_ran = os.path.isfile(ran_path) 3293 + if self._already_ran: 3294 + current_time = time.time() 3295 + file_mod_time = os.path.getmtime(ran_path) 3296 + file_age = current_time - file_mod_time 3297 + if file_age > 86400: # file older than a day 3298 + msg = (f"The previous run is older than a day ({decode(ran_path)} already exists).\n" 3299 + f"See \"Already Ran\" section in the manual for more information.\n" 3300 + f"Treat this run as a continuation of filtering in the previous run (Y/N)? ") 3301 + response = input(msg) 3302 + 3303 + if response.lower() != 'y': 3304 + os.remove(ran_path) 3305 + self._already_ran = False 3306 + 3307 + # Interaction between --already-ran and --sensitive_data_removal 3308 + msg = textwrap.dedent(_("""\ 3309 + Error: Cannot specify --sensitive-data-removal on a follow-up invocation 3310 + of git-filter-repo unless it was specified in previously runs.""")) 3311 + if self._already_ran: 3312 + sdr_path = os.path.join(tmp_dir, b'sensitive_data_removal') 3313 + sdr_previously = os.path.isfile(sdr_path) 3314 + if not sdr_previously and self._args.sensitive_data_removal: 3315 + raise SystemExit(msg) 3316 + # Treat this as a --sensitive-data-removal run if a previous run was, 3317 + # even if it wasn't specified this time 3318 + self._args.sensitive_data_removal = sdr_previously 3319 + 3320 + # Have to check sensitive_data_removal interactions here instead of 3321 + # sanity_check_args because of the above interaction with already_ran stuff 3322 + if self._args.sensitive_data_removal: 3323 + if self._args.stdin: 3324 + msg = _("Error: sensitive data removal is incompatible with --stdin") 3325 + raise SystemExit(msg) 3326 + if self._args.source or self._args.target: 3327 + msg = _("Error: sensitive data removal is incompatible with --source and --target") 3328 + raise SystemExit(msg) 3329 + 3330 + # Default for --replace-refs 3331 + if not self._args.replace_refs: 3332 + self._args.replace_refs = 'delete-no-add' 3333 + if self._args.replace_refs == 'old-default': 3334 + self._args.replace_refs = ('update-or-add' if self._already_ran 3335 + else 'update-and-add') 3336 + 3337 + # Do sanity checks from the correct directory 3338 + if not self._args.force and not self._already_ran: 3339 + cwd = os.getcwd() 3340 + os.chdir(target_working_dir) 3341 + RepoFilter.sanity_check(self._orig_refs, is_bare, self._config_settings) 3342 + os.chdir(cwd) 3343 + 3344 + def _setup_lfs_orphaning_checks(self): 3345 + # Do a couple checks to see if we want to do lfs orphaning checks 3346 + if not self._args.sensitive_data_removal: 3347 + return 3348 + metadata_dir = self.results_tmp_dir() 3349 + lfs_objects_file = os.path.join(metadata_dir, b'original_lfs_objects') 3350 + if self._already_ran: 3351 + # Check if we did lfs filtering in the previous run 3352 + if not os.path.isfile(lfs_objects_file): 3353 + return 3354 + 3355 + # Set up self._file_info_value so we can query git for stuff 3356 + source_working_dir = self._args.source or b'.' 3357 + self._file_info_value = FileInfoValueHelper(self._args.replace_text, 3358 + self.insert, 3359 + source_working_dir) 3360 + 3361 + # One more check to see if we want to do lfs orphaning checks 3362 + if not self._already_ran: 3363 + # Check if lfs filtering is active in HEAD's .gitattributes file 3364 + a = self._file_info_value.get_contents_by_identifier(b"HEAD:.gitattributes") 3365 + if not a or not re.search(rb'\bfilter=lfs\b', a): 3366 + return 3367 + 3368 + # Set up the object tracker 3369 + check_sources = not self._already_ran and not self._args.partial 3370 + check_targets = not self._args.partial 3371 + self._lfs_object_tracker = LFSObjectTracker(self._file_info_value, 3372 + check_sources, 3373 + check_targets) 3374 + self._parser._lfs_object_tracker = self._lfs_object_tracker # kinda gross 3375 + 3376 + # Get initial objects 3377 + if self._already_ran: 3378 + with open(lfs_objects_file, 'br') as f: 3379 + for line in f: 3380 + self._lfs_object_tracker.source_objects.objects.add(line.strip()) 3381 + elif self._args.partial: 3382 + source = True 3383 + self._lfs_object_tracker.find_all_lfs_objects_in_repo(source_working_dir, 3384 + source) 3385 + 3386 + @staticmethod 3387 + def loose_objects_are_replace_refs(git_dir, refs, num_loose_objects): 3388 + replace_objects = set() 3389 + for refname, rev in refs.items(): 3390 + if not refname.startswith(b'refs/replace/'): 3391 + continue 3392 + replace_objects.add(rev) 3393 + 3394 + validobj_re = re.compile(rb'^[0-9a-f]{40}$') 3395 + object_dir=os.path.join(git_dir, b'objects') 3396 + for root, dirs, files in os.walk(object_dir): 3397 + for filename in files: 3398 + objname = os.path.basename(root)+filename 3399 + if objname not in replace_objects and validobj_re.match(objname): 3400 + return False 3401 + 3402 + return True 3403 + 3404 + @staticmethod 3405 + def sanity_check(refs, is_bare, config_settings): 3406 + def abort(reason): 3407 + dirname = config_settings.get(b'remote.origin.url', b'') 3408 + msg = "" 3409 + if dirname and os.path.isdir(dirname): 3410 + msg = _("Note: when cloning local repositories, you need to pass\n" 3411 + " --no-local to git clone to avoid this issue.\n") 3412 + raise SystemExit( 3413 + _("Aborting: Refusing to destructively overwrite repo history since\n" 3414 + "this does not look like a fresh clone.\n" 3415 + " (%s)\n%s" 3416 + "Please operate on a fresh clone instead. If you want to proceed\n" 3417 + "anyway, use --force.") % (reason, msg)) 3418 + 3419 + # Avoid letting people running with weird setups and overwriting GIT_DIR 3420 + # elsewhere 3421 + git_dir = GitUtils.determine_git_dir(b'.') 3422 + if is_bare and git_dir != b'.': 3423 + abort(_("GIT_DIR must be .")) 3424 + elif not is_bare and git_dir != b'.git': 3425 + abort(_("GIT_DIR must be .git")) 3426 + 3427 + # Check for refname collisions 3428 + if config_settings.get(b'core.ignorecase', b'false') == b'true': 3429 + collisions = collections.defaultdict(list) 3430 + for ref in refs: 3431 + collisions[ref.lower()].append(ref) 3432 + msg = "" 3433 + for ref in collisions: 3434 + if len(collisions[ref]) >= 2: 3435 + msg += " " + decode(b", ".join(collisions[ref])) + "\n" 3436 + if msg: 3437 + raise SystemExit( 3438 + _("Aborting: Cannot rewrite history on a case insensitive\n" 3439 + "filesystem since you have refs that differ in case only:\n" 3440 + "%s") % msg) 3441 + if config_settings.get(b'core.precomposeunicode', b'false') == b'true': 3442 + import unicodedata # Mac users need to have python-3.8 3443 + collisions = collections.defaultdict(list) 3444 + for ref in refs: 3445 + strref = decode(ref) 3446 + collisions[unicodedata.normalize('NFC', strref)].append(strref) 3447 + msg = "" 3448 + for ref in collisions: 3449 + if len(collisions[ref]) >= 2: 3450 + msg += " " + ", ".join(collisions[ref]) + "\n" 3451 + if msg: 3452 + raise SystemExit( 3453 + _("Aborting: Cannot rewrite history on a character normalizing\n" 3454 + "filesystem since you have refs that differ in normalization:\n" 3455 + "%s") % msg) 3456 + 3457 + # Make sure repo is fully packed, just like a fresh clone would be. 3458 + # Note that transfer.unpackLimit defaults to 100, meaning that a 3459 + # repository with no packs and less than 100 objects should be considered 3460 + # fully packed. 3461 + output = subproc.check_output('git count-objects -v'.split()) 3462 + stats = dict(x.split(b': ') for x in output.splitlines()) 3463 + num_packs = int(stats[b'packs']) 3464 + num_loose_objects = int(stats[b'count']) 3465 + if num_packs > 1 or \ 3466 + num_loose_objects >= 100 or \ 3467 + (num_packs == 1 and num_loose_objects > 0 and 3468 + not RepoFilter.loose_objects_are_replace_refs(git_dir, refs, 3469 + num_loose_objects)): 3470 + abort(_("expected freshly packed repo")) 3471 + 3472 + # Make sure there is precisely one remote, named "origin"...or that this 3473 + # is a new bare repo with no packs and no remotes 3474 + output = subproc.check_output('git remote'.split()).strip() 3475 + if not (output == b"origin" or (num_packs == 0 and not output)): 3476 + abort(_("expected one remote, origin")) 3477 + 3478 + # Make sure that all reflogs have precisely one entry 3479 + reflog_dir=os.path.join(git_dir, b'logs') 3480 + for root, dirs, files in os.walk(reflog_dir): 3481 + for filename in files: 3482 + pathname = os.path.join(root, filename) 3483 + with open(pathname, 'br') as f: 3484 + if len(f.read().splitlines()) > 1: 3485 + shortpath = pathname[len(reflog_dir)+1:] 3486 + abort(_("expected at most one entry in the reflog for %s") % 3487 + decode(shortpath)) 3488 + 3489 + # Make sure there are no stashed changes 3490 + if b'refs/stash' in refs: 3491 + abort(_("has stashed changes")) 3492 + 3493 + # Do extra checks in non-bare repos 3494 + if not is_bare: 3495 + # Avoid uncommitted, unstaged, or untracked changes 3496 + if subproc.call('git diff --staged --quiet'.split()): 3497 + abort(_("you have uncommitted changes")) 3498 + if subproc.call('git diff --quiet'.split()): 3499 + abort(_("you have unstaged changes")) 3500 + untracked_output = subproc.check_output('git ls-files -o'.split()) 3501 + if len(untracked_output) > 0: 3502 + uf = untracked_output.rstrip(b'\n').split(b'\n') 3503 + # Since running git-filter-repo can result in files being written to 3504 + # __pycache__ (depending on python version, env vars, etc.), let's 3505 + # ignore those as far as "clean clone" is concerned. 3506 + relevant_uf = [x for x in uf 3507 + if not x.startswith(b'__pycache__/git_filter_repo.')] 3508 + if len(relevant_uf) > 0: 3509 + abort(_("you have untracked changes")) 3510 + 3511 + # Avoid unpushed changes 3512 + for refname, rev in refs.items(): 3513 + if not refname.startswith(b'refs/heads/'): 3514 + continue 3515 + origin_ref = refname.replace(b'refs/heads/', b'refs/remotes/origin/') 3516 + if origin_ref not in refs: 3517 + abort(_('%s exists, but %s not found') % (decode(refname), 3518 + decode(origin_ref))) 3519 + if rev != refs[origin_ref]: 3520 + abort(_('%s does not match %s') % (decode(refname), 3521 + decode(origin_ref))) 3522 + 3523 + # Make sure there is only one worktree 3524 + output = subproc.check_output('git worktree list'.split()) 3525 + if len(output.splitlines()) > 1: 3526 + abort(_('you have multiple worktrees')) 3527 + 3528 + def cleanup(self, repo, repack, reset, 3529 + run_quietly=False, show_debuginfo=False): 3530 + ''' cleanup repo; if repack then expire reflogs and do a gc --prune=now. 3531 + if reset then do a reset --hard. Optionally also curb output if 3532 + run_quietly is True, or go the opposite direction and show extra 3533 + output if show_debuginfo is True. ''' 3534 + assert not (run_quietly and show_debuginfo) 3535 + 3536 + if (repack and not run_quietly and not show_debuginfo): 3537 + print(_("Repacking your repo and cleaning out old unneeded objects")) 3538 + quiet_flags = '--quiet' if run_quietly else '' 3539 + cleanup_cmds = [] 3540 + if repack: 3541 + cleanup_cmds = ['git reflog expire --expire=now --all'.split(), 3542 + 'git gc {} --prune=now'.format(quiet_flags).split()] 3543 + if reset: 3544 + cleanup_cmds.insert(0, 'git reset {} --hard'.format(quiet_flags).split()) 3545 + location_info = ' (in {})'.format(decode(repo)) if repo != b'.' else '' 3546 + for cmd in cleanup_cmds: 3547 + if show_debuginfo: 3548 + print("[DEBUG] Running{}: {}".format(location_info, ' '.join(cmd))) 3549 + ret = subproc.call(cmd, cwd=repo) 3550 + if ret != 0: 3551 + raise SystemExit("fatal: running '%s' failed!" % ' '.join(cmd)) 3552 + if cmd[0:3] == 'git reflog expire'.split(): 3553 + self._write_stash() 3554 + 3555 + def _get_rename(self, old_hash): 3556 + # If we already know the rename, just return it 3557 + new_hash = self._commit_renames.get(old_hash, None) 3558 + if new_hash: 3559 + return new_hash 3560 + 3561 + # If it's not in the remaining pending renames, we don't know it 3562 + if old_hash is not None and old_hash not in self._pending_renames: 3563 + return None 3564 + 3565 + # Read through the pending renames until we find it or we've read them all, 3566 + # and return whatever we might find 3567 + self._flush_renames(old_hash) 3568 + return self._commit_renames.get(old_hash, None) 3569 + 3570 + def _flush_renames(self, old_hash=None, limit=0): 3571 + # Parse through self._pending_renames until we have read enough. We have 3572 + # read enough if: 3573 + # self._pending_renames is empty 3574 + # old_hash != None and we found a rename for old_hash 3575 + # limit > 0 and len(self._pending_renames) started less than 2*limit 3576 + # limit > 0 and len(self._pending_renames) < limit 3577 + if limit and len(self._pending_renames) < 2 * limit: 3578 + return 3579 + fi_input, fi_output = self._import_pipes 3580 + while self._pending_renames: 3581 + orig_hash, new_fast_export_id = self._pending_renames.popitem(last=False) 3582 + new_hash = fi_output.readline().rstrip() 3583 + self._commit_renames[orig_hash] = new_hash 3584 + self._graph.record_hash(new_fast_export_id, new_hash) 3585 + if old_hash == orig_hash: 3586 + return 3587 + if limit and len(self._pending_renames) < limit: 3588 + return 3589 + 3590 + def _translate_commit_hash(self, matchobj_or_oldhash): 3591 + old_hash = matchobj_or_oldhash 3592 + if not isinstance(matchobj_or_oldhash, bytes): 3593 + old_hash = matchobj_or_oldhash.group(1) 3594 + orig_len = len(old_hash) 3595 + new_hash = self._get_rename(old_hash) 3596 + if new_hash is None: 3597 + if old_hash[0:7] not in self._commit_short_old_hashes: 3598 + self._commits_referenced_but_removed.add(old_hash) 3599 + return old_hash 3600 + possibilities = self._commit_short_old_hashes[old_hash[0:7]] 3601 + matches = [x for x in possibilities 3602 + if x[0:orig_len] == old_hash] 3603 + if len(matches) != 1: 3604 + self._commits_referenced_but_removed.add(old_hash) 3605 + return old_hash 3606 + old_hash = matches[0] 3607 + new_hash = self._get_rename(old_hash) 3608 + 3609 + assert new_hash is not None 3610 + return new_hash[0:orig_len] 3611 + 3612 + def _maybe_trim_extra_parents(self, orig_parents, parents): 3613 + '''Due to pruning of empty commits, some parents could be non-existent 3614 + (None) or otherwise redundant. Remove the non-existent parents, and 3615 + remove redundant parents ***SO LONG AS*** that doesn't transform a 3616 + merge commit into a non-merge commit. 3617 + 3618 + Returns a tuple: 3619 + (parents, new_first_parent_if_would_become_non_merge)''' 3620 + 3621 + always_prune = (self._args.prune_degenerate == 'always') 3622 + 3623 + # Pruning of empty commits means multiple things: 3624 + # * An original parent of this commit may have been pruned causing the 3625 + # need to rewrite the reported parent to the nearest ancestor. We 3626 + # want to know when we're dealing with such a parent. 3627 + # * Further, there may be no "nearest ancestor" if the entire history 3628 + # of that parent was also pruned. (Detectable by the parent being 3629 + # 'None') 3630 + # Remove all parents rewritten to None, and keep track of which parents 3631 + # were rewritten to an ancestor. 3632 + tmp = zip(parents, 3633 + orig_parents, 3634 + [(x in _SKIPPED_COMMITS or always_prune) for x in orig_parents]) 3635 + tmp2 = [x for x in tmp if x[0] is not None] 3636 + if not tmp2: 3637 + # All ancestors have been pruned; we have no parents. 3638 + return [], None 3639 + parents, orig_parents, is_rewritten = [list(x) for x in zip(*tmp2)] 3640 + 3641 + # We can't have redundant parents if we don't have at least 2 parents 3642 + if len(parents) < 2: 3643 + return parents, None 3644 + 3645 + # Don't remove redundant parents if user doesn't want us to 3646 + if self._args.prune_degenerate == 'never': 3647 + return parents, None 3648 + 3649 + # Remove duplicate parents (if both sides of history have lots of commits 3650 + # which become empty due to pruning, the most recent ancestor on both 3651 + # sides may be the same commit), except only remove parents that have 3652 + # been rewritten due to previous empty pruning. 3653 + seen = set() 3654 + seen_add = seen.add 3655 + # Deleting duplicate rewritten parents means keeping parents if either 3656 + # they have not been seen or they are ones that have not been rewritten. 3657 + parents_copy = parents 3658 + uniq = [[p, orig_parents[i], is_rewritten[i]] for i, p in enumerate(parents) 3659 + if not (p in seen or seen_add(p)) or not is_rewritten[i]] 3660 + parents, orig_parents, is_rewritten = [list(x) for x in zip(*uniq)] 3661 + if len(parents) < 2: 3662 + return parents_copy, parents[0] 3663 + 3664 + # Flatten unnecessary merges. (If one side of history is entirely 3665 + # empty commits that were pruned, we may end up attempting to 3666 + # merge a commit with its ancestor. Remove parents that are an 3667 + # ancestor of another parent.) 3668 + num_parents = len(parents) 3669 + to_remove = [] 3670 + for cur in range(num_parents): 3671 + if not is_rewritten[cur]: 3672 + continue 3673 + for other in range(num_parents): 3674 + if cur == other: 3675 + continue 3676 + if not self._graph.is_ancestor(parents[cur], parents[other]): 3677 + continue 3678 + # parents[cur] is an ancestor of parents[other], so parents[cur] 3679 + # seems redundant. However, if it was intentionally redundant 3680 + # (e.g. a no-ff merge) in the original, then we want to keep it. 3681 + if not always_prune and \ 3682 + self._orig_graph.is_ancestor(orig_parents[cur], 3683 + orig_parents[other]): 3684 + continue 3685 + # Some folks want their history to have all first parents be merge 3686 + # commits (except for any root commits), and always do a merge --no-ff. 3687 + # For such folks, don't remove the first parent even if it's an 3688 + # ancestor of other commits. 3689 + if self._args.no_ff and cur == 0: 3690 + continue 3691 + # Okay so the cur-th parent is an ancestor of the other-th parent, 3692 + # and it wasn't that way in the original repository; mark the 3693 + # cur-th parent as removable. 3694 + to_remove.append(cur) 3695 + break # cur removed, so skip rest of others -- i.e. check cur+=1 3696 + for x in reversed(to_remove): 3697 + parents.pop(x) 3698 + if len(parents) < 2: 3699 + return parents_copy, parents[0] 3700 + 3701 + return parents, None 3702 + 3703 + def _prunable(self, commit, new_1st_parent, had_file_changes, orig_parents): 3704 + parents = commit.parents 3705 + 3706 + if self._args.prune_empty == 'never': 3707 + return False 3708 + always_prune = (self._args.prune_empty == 'always') 3709 + 3710 + # For merge commits, unless there are prunable (redundant) parents, we 3711 + # do not want to prune 3712 + if len(parents) >= 2 and not new_1st_parent: 3713 + return False 3714 + 3715 + if len(parents) < 2: 3716 + # Special logic for commits that started empty... 3717 + if not had_file_changes and not always_prune: 3718 + had_parents_pruned = (len(parents) < len(orig_parents) or 3719 + (len(orig_parents) == 1 and 3720 + orig_parents[0] in _SKIPPED_COMMITS)) 3721 + # If the commit remains empty and had parents which were pruned, 3722 + # then prune this commit; otherwise, retain it 3723 + return (not commit.file_changes and had_parents_pruned) 3724 + 3725 + # We can only get here if the commit didn't start empty, so if it's 3726 + # empty now, it obviously became empty 3727 + if not commit.file_changes: 3728 + return True 3729 + 3730 + # If there are no parents of this commit and we didn't match the case 3731 + # above, then this commit cannot be pruned. Since we have no parent(s) 3732 + # to compare to, abort now to prevent future checks from failing. 3733 + if not parents: 3734 + return False 3735 + 3736 + # Similarly, we cannot handle the hard cases if we don't have a pipe 3737 + # to communicate with fast-import 3738 + if not self._import_pipes: 3739 + return False 3740 + 3741 + # If there have not been renames/remappings of IDs (due to insertion of 3742 + # new blobs), then we can sometimes know things aren't prunable with a 3743 + # simple check 3744 + if not _IDS.has_renames(): 3745 + # non-merge commits can only be empty if blob/file-change editing caused 3746 + # all file changes in the commit to have the same file contents as 3747 + # the parent. 3748 + changed_files = set(change.filename for change in commit.file_changes) 3749 + if len(orig_parents) < 2 and changed_files - self._files_tweaked: 3750 + return False 3751 + 3752 + # Finally, the hard case: due to either blob rewriting, or due to pruning 3753 + # of empty commits wiping out the first parent history back to the merge 3754 + # base, the list of file_changes we have may not actually differ from our 3755 + # (new) first parent's version of the files, i.e. this would actually be 3756 + # an empty commit. Check by comparing the contents of this commit to its 3757 + # (remaining) parent. 3758 + # 3759 + # NOTE on why this works, for the case of original first parent history 3760 + # having been pruned away due to being empty: 3761 + # The first parent history having been pruned away due to being 3762 + # empty implies the original first parent would have a tree (after 3763 + # filtering) that matched the merge base's tree. Since 3764 + # file_changes has the changes needed to go from what would have 3765 + # been the first parent to our new commit, and what would have been 3766 + # our first parent has a tree that matches the merge base, then if 3767 + # the new first parent has a tree matching the versions of files in 3768 + # file_changes, then this new commit is empty and thus prunable. 3769 + fi_input, fi_output = self._import_pipes 3770 + self._flush_renames() # Avoid fi_output having other stuff present 3771 + # Optimization note: we could have two loops over file_changes, the 3772 + # first doing all the self._output.write() calls, and the second doing 3773 + # the rest. But I'm worried about fast-import blocking on fi_output 3774 + # buffers filling up so I instead read from it as I go. 3775 + for change in commit.file_changes: 3776 + parent = new_1st_parent or commit.parents[0] # exists due to above checks 3777 + quoted_filename = PathQuoting.enquote(change.filename) 3778 + if isinstance(parent, int): 3779 + self._output.write(b"ls :%d %s\n" % (parent, quoted_filename)) 3780 + else: 3781 + self._output.write(b"ls %s %s\n" % (parent, quoted_filename)) 3782 + self._output.flush() 3783 + parent_version = fi_output.readline().split() 3784 + if change.type == b'D': 3785 + if parent_version != [b'missing', quoted_filename]: 3786 + return False 3787 + else: 3788 + blob_sha = change.blob_id 3789 + if isinstance(change.blob_id, int): 3790 + self._output.write(b"get-mark :%d\n" % change.blob_id) 3791 + self._output.flush() 3792 + blob_sha = fi_output.readline().rstrip() 3793 + if parent_version != [change.mode, b'blob', blob_sha, quoted_filename]: 3794 + return False 3795 + 3796 + return True 3797 + 3798 + def _record_remapping(self, commit, orig_parents): 3799 + new_id = None 3800 + # Record the mapping of old commit hash to new one 3801 + if commit.original_id and self._import_pipes: 3802 + fi_input, fi_output = self._import_pipes 3803 + self._output.write(b"get-mark :%d\n" % commit.id) 3804 + self._output.flush() 3805 + orig_id = commit.original_id 3806 + self._commit_short_old_hashes[orig_id[0:7]].add(orig_id) 3807 + # Note that we have queued up an id for later reading; flush a 3808 + # few of the older ones if we have too many queued up 3809 + self._pending_renames[orig_id] = commit.id 3810 + self._flush_renames(None, limit=40) 3811 + # Also, record if this was a merge commit that turned into a non-merge 3812 + # commit. 3813 + if len(orig_parents) >= 2 and len(commit.parents) < 2: 3814 + self._commits_no_longer_merges.append((commit.original_id, new_id)) 3815 + 3816 + def callback_metadata(self, extra_items = dict()): 3817 + return {'commit_rename_func': self._translate_commit_hash, 3818 + 'ancestry_graph': self._graph, 3819 + 'original_ancestry_graph': self._orig_graph, 3820 + **extra_items} 3821 + 3822 + def _tweak_blob(self, blob): 3823 + if self._args.max_blob_size and len(blob.data) > self._args.max_blob_size: 3824 + blob.skip() 3825 + 3826 + if blob.original_id in self._args.strip_blobs_with_ids: 3827 + blob.skip() 3828 + 3829 + if ( self._args.replace_text 3830 + and not self._file_info_callback 3831 + # not (if blob contains zero byte in the first 8Kb, that is, if blob is binary data) 3832 + and not b"\0" in blob.data[0:8192] 3833 + ): 3834 + for literal, replacement in self._args.replace_text['literals']: 3835 + blob.data = blob.data.replace(literal, replacement) 3836 + for regex, replacement in self._args.replace_text['regexes']: 3837 + blob.data = regex.sub(replacement, blob.data) 3838 + 3839 + if self._blob_callback: 3840 + self._blob_callback(blob, self.callback_metadata()) 3841 + 3842 + self._insert_into_stream(blob) 3843 + 3844 + def _filter_files(self, commit): 3845 + def filename_matches(path_expression, pathname): 3846 + ''' Returns whether path_expression matches pathname or a leading 3847 + directory thereof, allowing path_expression to not have a trailing 3848 + slash even if it is meant to match a leading directory. ''' 3849 + if path_expression == b'': 3850 + return True 3851 + n = len(path_expression) 3852 + if (pathname.startswith(path_expression) and 3853 + (path_expression[n-1:n] == b'/' or 3854 + len(pathname) == n or 3855 + pathname[n:n+1] == b'/')): 3856 + return True 3857 + return False 3858 + 3859 + def newname(path_changes, pathname, use_base_name, filtering_is_inclusive): 3860 + ''' Applies filtering and rename changes from path_changes to pathname, 3861 + returning any of None (file isn't wanted), original filename (file 3862 + is wanted with original name), or new filename. ''' 3863 + wanted = False 3864 + full_pathname = pathname 3865 + if use_base_name: 3866 + pathname = os.path.basename(pathname) 3867 + for (mod_type, match_type, path_exp) in path_changes: 3868 + if mod_type == 'filter' and not wanted: 3869 + assert match_type in ('match', 'glob', 'regex') 3870 + if match_type == 'match' and filename_matches(path_exp, pathname): 3871 + wanted = True 3872 + if match_type == 'glob' and fnmatch.fnmatch(pathname, path_exp): 3873 + wanted = True 3874 + if match_type == 'regex' and path_exp.search(pathname): 3875 + wanted = True 3876 + elif mod_type == 'rename': 3877 + match, repl = path_exp 3878 + assert match_type in ('match','regex') # glob was translated to regex 3879 + if match_type == 'match' and filename_matches(match, full_pathname): 3880 + full_pathname = full_pathname.replace(match, repl, 1) 3881 + pathname = full_pathname # rename incompatible with use_base_name 3882 + if match_type == 'regex': 3883 + full_pathname = match.sub(repl, full_pathname) 3884 + pathname = full_pathname # rename incompatible with use_base_name 3885 + return full_pathname if (wanted == filtering_is_inclusive) else None 3886 + 3887 + args = self._args 3888 + new_file_changes = {} # Assumes no renames or copies, otherwise collisions 3889 + for change in commit.file_changes: 3890 + # NEEDSWORK: _If_ we ever want to pass `--full-tree` to fast-export and 3891 + # parse that output, we'll need to modify this block; `--full-tree` 3892 + # issues a deleteall directive which has no filename, and thus this 3893 + # block would normally strip it. Of course, FileChange() and 3894 + # _parse_optional_filechange() would need updates too. 3895 + if change.type == b'DELETEALL': 3896 + new_file_changes[b''] = change 3897 + continue 3898 + if change.filename in self._newnames: 3899 + change.filename = self._newnames[change.filename] 3900 + else: 3901 + original_filename = change.filename 3902 + change.filename = newname(args.path_changes, change.filename, 3903 + args.use_base_name, args.inclusive) 3904 + if self._filename_callback: 3905 + change.filename = self._filename_callback(change.filename) 3906 + self._newnames[original_filename] = change.filename 3907 + if not change.filename: 3908 + continue # Filtering criteria excluded this file; move on to next one 3909 + if change.filename in new_file_changes: 3910 + # Getting here means that path renaming is in effect, and caused one 3911 + # path to collide with another. That's usually bad, but can be okay 3912 + # under two circumstances: 3913 + # 1) Sometimes people have a file named OLDFILE in old revisions of 3914 + # history, and they rename to NEWFILE, and would like to rewrite 3915 + # history so that all revisions refer to it as NEWFILE. As such, 3916 + # we can allow a collision when (at least) one of the two paths 3917 + # is a deletion. Note that if OLDFILE and NEWFILE are unrelated 3918 + # this also allows the rewrite to continue, which makes sense 3919 + # since OLDFILE is no longer in the way. 3920 + # 2) If OLDFILE and NEWFILE are exactly equal, then writing them 3921 + # both to the same location poses no problem; we only need one 3922 + # file. (This could come up if someone copied a file in some 3923 + # commit, then later either deleted the file or kept it exactly 3924 + # in sync with the original with any changes, and then decides 3925 + # they want to rewrite history to only have one of the two files) 3926 + colliding_change = new_file_changes[change.filename] 3927 + if change.type == b'D': 3928 + # We can just throw this one away and keep the other 3929 + continue 3930 + elif change.type == b'M' and ( 3931 + change.mode == colliding_change.mode and 3932 + change.blob_id == colliding_change.blob_id): 3933 + # The two are identical, so we can throw this one away and keep other 3934 + continue 3935 + elif new_file_changes[change.filename].type != b'D': 3936 + raise SystemExit(_("File renaming caused colliding pathnames!\n") + 3937 + _(" Commit: {}\n").format(commit.original_id) + 3938 + _(" Filename: {}").format(change.filename)) 3939 + # Strip files that are too large 3940 + if self._args.max_blob_size and \ 3941 + self._unpacked_size.get(change.blob_id, 0) > self._args.max_blob_size: 3942 + continue 3943 + if self._args.strip_blobs_with_ids and \ 3944 + change.blob_id in self._args.strip_blobs_with_ids: 3945 + continue 3946 + # Otherwise, record the change 3947 + new_file_changes[change.filename] = change 3948 + commit.file_changes = [v for k,v in sorted(new_file_changes.items())] 3949 + 3950 + def _tweak_commit(self, commit, aux_info): 3951 + if self._args.replace_message: 3952 + for literal, replacement in self._args.replace_message['literals']: 3953 + commit.message = commit.message.replace(literal, replacement) 3954 + for regex, replacement in self._args.replace_message['regexes']: 3955 + commit.message = regex.sub(replacement, commit.message) 3956 + if self._message_callback: 3957 + commit.message = self._message_callback(commit.message) 3958 + 3959 + # Change the commit message according to callback 3960 + if not self._args.preserve_commit_hashes: 3961 + commit.message = self._hash_re.sub(self._translate_commit_hash, 3962 + commit.message) 3963 + 3964 + # Change the author & committer according to mailmap rules 3965 + args = self._args 3966 + if args.mailmap: 3967 + commit.author_name, commit.author_email = \ 3968 + args.mailmap.translate(commit.author_name, commit.author_email) 3969 + commit.committer_name, commit.committer_email = \ 3970 + args.mailmap.translate(commit.committer_name, commit.committer_email) 3971 + # Change author & committer according to callbacks 3972 + if self._name_callback: 3973 + commit.author_name = self._name_callback(commit.author_name) 3974 + commit.committer_name = self._name_callback(commit.committer_name) 3975 + if self._email_callback: 3976 + commit.author_email = self._email_callback(commit.author_email) 3977 + commit.committer_email = self._email_callback(commit.committer_email) 3978 + 3979 + # Sometimes the 'branch' given is a tag; if so, rename it as requested so 3980 + # we don't get any old tagnames 3981 + if self._args.tag_rename: 3982 + commit.branch = RepoFilter._do_tag_rename(args.tag_rename, commit.branch) 3983 + if self._refname_callback: 3984 + commit.branch = self._refname_callback(commit.branch) 3985 + 3986 + # Filter or rename the list of file changes 3987 + orig_file_changes = set(commit.file_changes) 3988 + self._filter_files(commit) 3989 + 3990 + # Record ancestry graph 3991 + parents, orig_parents = commit.parents, aux_info['orig_parents'] 3992 + if self._args.state_branch: 3993 + external_parents = parents 3994 + else: 3995 + external_parents = [p for p in parents if not isinstance(p, int)] 3996 + # The use of 'reversed' is intentional here; there is a risk that we have 3997 + # duplicates in parents, and we want to map from parents to the first 3998 + # entry we find in orig_parents in such cases. 3999 + parent_reverse_dict = dict(zip(reversed(parents), reversed(orig_parents))) 4000 + 4001 + self._graph.record_external_commits(external_parents) 4002 + self._orig_graph.record_external_commits(external_parents) 4003 + self._graph.add_commit_and_parents(commit.id, parents) # new githash unknown 4004 + self._orig_graph.add_commit_and_parents(commit.old_id, orig_parents, 4005 + commit.original_id) 4006 + 4007 + # Prune parents (due to pruning of empty commits) if relevant, note that 4008 + # new_1st_parent is None unless this was a merge commit that is becoming 4009 + # a non-merge 4010 + prev_1st_parent = parents[0] if parents else None 4011 + parents, new_1st_parent = self._maybe_trim_extra_parents(orig_parents, 4012 + parents) 4013 + commit.parents = parents 4014 + 4015 + # If parents were pruned, then we need our file changes to be relative 4016 + # to the new first parent 4017 + # 4018 + # Notes: 4019 + # * new_1st_parent and new_1st_parent != parents[0] uniquely happens for example when: 4020 + # working on merge, selecting subset of files and merge base still 4021 + # valid while first parent history doesn't touch any of those paths, 4022 + # but second parent history does. prev_1st_parent had already been 4023 + # rewritten to the non-None first ancestor and it remains valid. 4024 + # self._maybe_trim_extra_parents() avoids removing this first parent 4025 + # because it'd make the commit a non-merge. However, if there are 4026 + # no file_changes of note, we'll drop this commit and mark 4027 + # new_1st_parent as the new replacement. To correctly determine if 4028 + # there are no file_changes of note, we need to have the list of 4029 + # file_changes relative to new_1st_parent. 4030 + # (See t9390#3, "basic -> basic-ten using '--path ten'") 4031 + # * prev_1st_parent != parents[0] happens for example when: 4032 + # similar to above, but the merge base is no longer valid and was 4033 + # pruned away as well. Then parents started as e.g. [None, $num], 4034 + # and both prev_1st_parent and new_1st_parent are None, while parents 4035 + # after self._maybe_trim_extra_parents() becomes just [$num]. 4036 + # (See t9390#67, "degenerate merge with non-matching filename".) 4037 + # Since $num was originally a second parent, we need to rewrite 4038 + # file changes to be relative to parents[0]. 4039 + # * TODO: We should be getting the changes relative to the new first 4040 + # parent even if self._fep is None, BUT we can't. Our method of 4041 + # getting the changes right now is an external git diff invocation, 4042 + # which we can't do if we just have a fast export stream. We can't 4043 + # really work around it by querying the fast-import stream either, 4044 + # because the 'ls' directive only allows us to list info about 4045 + # specific paths, but we need to find out which paths exist in two 4046 + # commits and then query them. We could maybe force checkpointing in 4047 + # fast-import, then doing a diff from what'll be the new first parent 4048 + # back to prev_1st_parent (which may be None, i.e. empty tree), using 4049 + # the fact that in A->{B,C}->D, where D is merge of B & C, the diff 4050 + # from C->D == C->A + A->B + B->D, and in these cases A==B, so it 4051 + # simplifies to C->D == C->A + B->D, and C is our new 1st parent 4052 + # commit, A is prev_1st_commit, and B->D is commit.file_changes that 4053 + # we already have. However, checkpointing the fast-import process 4054 + # and figuring out how long to wait before we can run our diff just 4055 + # seems excessive. For now, just punt and assume the merge wasn't 4056 + # "evil" (i.e. that it's remerge-diff is empty, as is true for most 4057 + # merges). If the merge isn't evil, no further steps are necessary. 4058 + if parents and self._fep and ( 4059 + prev_1st_parent != parents[0] or 4060 + new_1st_parent and new_1st_parent != parents[0]): 4061 + # Get the id from the original fast export stream corresponding to the 4062 + # new 1st parent. As noted above, that new 1st parent might be 4063 + # new_1st_parent, or if that is None, it'll be parents[0]. 4064 + will_be_1st = new_1st_parent or parents[0] 4065 + old_id = parent_reverse_dict[will_be_1st] 4066 + # Now, translate that to a hash 4067 + will_be_1st_commit_hash = self._orig_graph.map_to_hash(old_id) 4068 + # Get the changes from what is going to be the new 1st parent to this 4069 + # merge commit. Note that since we are going from the new 1st parent 4070 + # to the merge commit, we can just replace the existing 4071 + # commit.file_changes rather than getting something we need to combine 4072 + # with the existing commit.file_changes. Also, we can just replace 4073 + # because prev_1st_parent is an ancestor of will_be_1st_commit_hash 4074 + # (or prev_1st_parent is None and first parent history is gone), so 4075 + # even if we retain prev_1st_parent and do not prune it, the changes 4076 + # will still work given the snapshot-based way fast-export/fast-import 4077 + # work. 4078 + commit.file_changes = GitUtils.get_file_changes(self._repo_working_dir, 4079 + will_be_1st_commit_hash, 4080 + commit.original_id) 4081 + 4082 + # Save these and filter them 4083 + orig_file_changes = set(commit.file_changes) 4084 + self._filter_files(commit) 4085 + 4086 + # Process the --file-info-callback 4087 + if self._file_info_callback: 4088 + if self._file_info_value is None: 4089 + source_working_dir = self._args.source or b'.' 4090 + self._file_info_value = FileInfoValueHelper(self._args.replace_text, 4091 + self.insert, 4092 + source_working_dir) 4093 + new_file_changes = [] 4094 + for change in commit.file_changes: 4095 + if change.type != b'D': 4096 + assert(change.type == b'M') 4097 + (filename, mode, blob_id) = \ 4098 + self._file_info_callback(change.filename, 4099 + change.mode, 4100 + change.blob_id, 4101 + self._file_info_value) 4102 + if mode is None: 4103 + # TODO: Should deletion of the file even be a feature? Might 4104 + # want to remove this branch of the if-elif-else. 4105 + assert(filename is not None) 4106 + assert(blob_id is not None) 4107 + new_change = FileChange(b'D', filename) 4108 + elif filename is None: 4109 + continue # Drop the FileChange from this commit 4110 + else: 4111 + new_change = FileChange(b'M', filename, blob_id, mode) 4112 + else: 4113 + new_change = change # use change as-is for deletions 4114 + new_file_changes.append(new_change) 4115 + commit.file_changes = new_file_changes 4116 + 4117 + # Call the user-defined callback, if any 4118 + if self._commit_callback: 4119 + self._commit_callback(commit, self.callback_metadata(aux_info)) 4120 + 4121 + # Find out which files were modified by the callbacks. Such paths could 4122 + # lead to subsequent commits being empty (e.g. if removing a line containing 4123 + # a password from every version of a file that had the password, and some 4124 + # later commit did nothing more than remove that line) 4125 + final_file_changes = set(commit.file_changes) 4126 + if self._args.replace_text or self._blob_callback: 4127 + differences = orig_file_changes.union(final_file_changes) 4128 + else: 4129 + differences = orig_file_changes.symmetric_difference(final_file_changes) 4130 + self._files_tweaked.update(x.filename for x in differences) 4131 + 4132 + # Now print the resulting commit, or if prunable skip it 4133 + if not commit.dumped: 4134 + if not self._prunable(commit, new_1st_parent, 4135 + aux_info['had_file_changes'], orig_parents): 4136 + self._insert_into_stream(commit) 4137 + self._record_remapping(commit, orig_parents) 4138 + else: 4139 + rewrite_to = new_1st_parent or commit.first_parent() 4140 + commit.skip(new_id = rewrite_to) 4141 + if self._args.state_branch: 4142 + alias = Alias(commit.old_id or commit.id, rewrite_to or deleted_hash) 4143 + self._insert_into_stream(alias) 4144 + if commit.branch.startswith(b'refs/') or commit.branch == b'HEAD': 4145 + # The special check above is because when direct revisions are passed 4146 + # along to fast-export (such as with stashes), there is a chance the 4147 + # revision is rewritten to nothing. In such cases, we don't want to 4148 + # point an invalid ref that just names a revision to some other point. 4149 + reset = Reset(commit.branch, rewrite_to or deleted_hash) 4150 + self._insert_into_stream(reset) 4151 + self._commit_renames[commit.original_id] = None 4152 + 4153 + # Show progress 4154 + self._num_commits += 1 4155 + if not self._args.quiet: 4156 + self._progress_writer.show(self._parsed_message % self._num_commits) 4157 + 4158 + @staticmethod 4159 + def _do_tag_rename(rename_pair, tagname): 4160 + old, new = rename_pair.split(b':', 1) 4161 + old, new = b'refs/tags/'+old, b'refs/tags/'+new 4162 + if tagname.startswith(old): 4163 + return tagname.replace(old, new, 1) 4164 + return tagname 4165 + 4166 + def _tweak_tag(self, tag): 4167 + # Tweak the tag message according to callbacks 4168 + if self._args.replace_message: 4169 + for literal, replacement in self._args.replace_message['literals']: 4170 + tag.message = tag.message.replace(literal, replacement) 4171 + for regex, replacement in self._args.replace_message['regexes']: 4172 + tag.message = regex.sub(replacement, tag.message) 4173 + if self._message_callback: 4174 + tag.message = self._message_callback(tag.message) 4175 + 4176 + # Tweak the tag name according to tag-name-related callbacks 4177 + tag_prefix = b'refs/tags/' 4178 + fullref = tag_prefix+tag.ref 4179 + if self._args.tag_rename: 4180 + fullref = RepoFilter._do_tag_rename(self._args.tag_rename, fullref) 4181 + if self._refname_callback: 4182 + fullref = self._refname_callback(fullref) 4183 + if not fullref.startswith(tag_prefix): 4184 + msg = "Error: fast-import requires tags to be in refs/tags/ namespace." 4185 + msg += "\n {} renamed to {}".format(tag_prefix+tag.ref, fullref) 4186 + raise SystemExit(msg) 4187 + tag.ref = fullref[len(tag_prefix):] 4188 + 4189 + # Tweak the tagger according to callbacks 4190 + if self._args.mailmap: 4191 + tag.tagger_name, tag.tagger_email = \ 4192 + self._args.mailmap.translate(tag.tagger_name, tag.tagger_email) 4193 + if self._name_callback: 4194 + tag.tagger_name = self._name_callback(tag.tagger_name) 4195 + if self._email_callback: 4196 + tag.tagger_email = self._email_callback(tag.tagger_email) 4197 + 4198 + # Call general purpose tag callback 4199 + if self._tag_callback: 4200 + self._tag_callback(tag, self.callback_metadata()) 4201 + 4202 + def _tweak_reset(self, reset): 4203 + if self._args.tag_rename: 4204 + reset.ref = RepoFilter._do_tag_rename(self._args.tag_rename, reset.ref) 4205 + if self._refname_callback: 4206 + reset.ref = self._refname_callback(reset.ref) 4207 + if self._reset_callback: 4208 + self._reset_callback(reset, self.callback_metadata()) 4209 + 4210 + def results_tmp_dir(self, create_if_missing=True): 4211 + target_working_dir = self._args.target or b'.' 4212 + git_dir = GitUtils.determine_git_dir(target_working_dir) 4213 + d = os.path.join(git_dir, b'filter-repo') 4214 + if create_if_missing and not os.path.isdir(d): 4215 + os.mkdir(d) 4216 + return d 4217 + 4218 + def _load_marks_file(self, marks_basename): 4219 + full_branch = 'refs/heads/{}'.format(self._args.state_branch) 4220 + marks_file = os.path.join(self.results_tmp_dir(), marks_basename) 4221 + working_dir = self._args.target or b'.' 4222 + cmd = ['git', '-C', working_dir, 'show-ref', full_branch] 4223 + contents = b'' 4224 + if subproc.call(cmd, stdout=subprocess.DEVNULL) == 0: 4225 + cmd = ['git', '-C', working_dir, 'show', 4226 + '%s:%s' % (full_branch, decode(marks_basename))] 4227 + try: 4228 + contents = subproc.check_output(cmd) 4229 + except subprocess.CalledProcessError as e: # pragma: no cover 4230 + raise SystemExit(_("Failed loading %s from %s") % 4231 + (decode(marks_basename), full_branch)) 4232 + if contents: 4233 + biggest_id = max(int(x.split()[0][1:]) for x in contents.splitlines()) 4234 + _IDS._next_id = max(_IDS._next_id, biggest_id+1) 4235 + with open(marks_file, 'bw') as f: 4236 + f.write(contents) 4237 + return marks_file 4238 + 4239 + def _save_marks_files(self): 4240 + basenames = [b'source-marks', b'target-marks'] 4241 + working_dir = self._args.target or b'.' 4242 + 4243 + # Check whether the branch exists 4244 + parent = [] 4245 + full_branch = 'refs/heads/{}'.format(self._args.state_branch) 4246 + cmd = ['git', '-C', working_dir, 'show-ref', full_branch] 4247 + if subproc.call(cmd, stdout=subprocess.DEVNULL) == 0: 4248 + parent = ['-p', full_branch] 4249 + 4250 + # Run 'git hash-object $MARKS_FILE' for each marks file, save result 4251 + blob_hashes = {} 4252 + for marks_basename in basenames: 4253 + marks_file = os.path.join(self.results_tmp_dir(), marks_basename) 4254 + if not os.path.isfile(marks_file): # pragma: no cover 4255 + raise SystemExit(_("Failed to find %s to save to %s") 4256 + % (marks_file, self._args.state_branch)) 4257 + cmd = ['git', '-C', working_dir, 'hash-object', '-w', marks_file] 4258 + blob_hashes[marks_basename] = subproc.check_output(cmd).strip() 4259 + 4260 + # Run 'git mktree' to create a tree out of it 4261 + p = subproc.Popen(['git', '-C', working_dir, 'mktree'], 4262 + stdin=subprocess.PIPE, stdout=subprocess.PIPE) 4263 + for b in basenames: 4264 + p.stdin.write(b'100644 blob %s\t%s\n' % (blob_hashes[b], b)) 4265 + p.stdin.close() 4266 + p.wait() 4267 + tree = p.stdout.read().strip() 4268 + 4269 + # Create the new commit 4270 + cmd = (['git', '-C', working_dir, 'commit-tree', '-m', 'New mark files', 4271 + tree] + parent) 4272 + commit = subproc.check_output(cmd).strip() 4273 + subproc.call(['git', '-C', working_dir, 'update-ref', full_branch, commit]) 4274 + 4275 + def importer_only(self): 4276 + self._run_sanity_checks() 4277 + self._setup_output() 4278 + 4279 + def set_output(self, outputRepoFilter): 4280 + assert outputRepoFilter._output 4281 + 4282 + # set_output implies this RepoFilter is doing exporting, though may not 4283 + # be the only one. 4284 + self._setup_input(use_done_feature = False) 4285 + 4286 + # Set our output management up to pipe to outputRepoFilter's locations 4287 + self._managed_output = False 4288 + self._output = outputRepoFilter._output 4289 + self._import_pipes = outputRepoFilter._import_pipes 4290 + 4291 + # Handle sanity checks, though currently none needed for export-only cases 4292 + self._run_sanity_checks() 4293 + 4294 + def _read_stash(self): 4295 + if self._stash: 4296 + return 4297 + if self._orig_refs and b'refs/stash' in self._orig_refs and \ 4298 + self._args.refs == ['--all']: 4299 + repo_working_dir = self._args.source or b'.' 4300 + git_dir = GitUtils.determine_git_dir(repo_working_dir) 4301 + stash = os.path.join(git_dir, b'logs', b'refs', b'stash') 4302 + if os.path.exists(stash): 4303 + self._stash = [] 4304 + with open(stash, 'br') as f: 4305 + for line in f: 4306 + (oldhash, newhash, rest) = line.split(None, 2) 4307 + self._stash.append((newhash, rest)) 4308 + self._args.refs.extend([x[0] for x in self._stash]) 4309 + 4310 + def _write_stash(self): 4311 + last = deleted_hash 4312 + if self._stash: 4313 + target_working_dir = self._args.target or b'.' 4314 + git_dir = GitUtils.determine_git_dir(target_working_dir) 4315 + stash = os.path.join(git_dir, b'logs', b'refs', b'stash') 4316 + with open(stash, 'bw') as f: 4317 + for (hash, rest) in self._stash: 4318 + new_hash = self._get_rename(hash) 4319 + if new_hash is None: 4320 + continue 4321 + f.write(b' '.join([last, new_hash, rest]) + b'\n') 4322 + last = new_hash 4323 + print(_("Rewrote the stash.")) 4324 + 4325 + def _setup_input(self, use_done_feature): 4326 + if self._args.stdin: 4327 + self._input = sys.stdin.detach() 4328 + sys.stdin = None # Make sure no one tries to accidentally use it 4329 + self._fe_orig = None 4330 + else: 4331 + self._read_stash() 4332 + skip_blobs = (self._blob_callback is None and 4333 + (self._args.replace_text is None or 4334 + self._file_info_callback is not None) and 4335 + self._args.source == self._args.target) 4336 + extra_flags = [] 4337 + if skip_blobs: 4338 + extra_flags.append('--no-data') 4339 + if self._args.max_blob_size: 4340 + self._unpacked_size, packed_size = GitUtils.get_blob_sizes() 4341 + if use_done_feature: 4342 + extra_flags.append('--use-done-feature') 4343 + if write_marks: 4344 + extra_flags.append(b'--mark-tags') 4345 + if self._args.state_branch: 4346 + assert(write_marks) 4347 + source_marks_file = self._load_marks_file(b'source-marks') 4348 + extra_flags.extend([b'--export-marks='+source_marks_file, 4349 + b'--import-marks='+source_marks_file]) 4350 + if self._args.preserve_commit_encoding is not None: # pragma: no cover 4351 + reencode = 'no' if self._args.preserve_commit_encoding else 'yes' 4352 + extra_flags.append('--reencode='+reencode) 4353 + if self._args.date_order: 4354 + extra_flags.append('--date-order') 4355 + location = ['-C', self._args.source] if self._args.source else [] 4356 + fep_cmd = ['git'] + location + ['fast-export', '--show-original-ids', 4357 + '--signed-tags=strip', '--tag-of-filtered-object=rewrite', 4358 + '--fake-missing-tagger', '--reference-excluded-parents' 4359 + ] + extra_flags + self._args.refs 4360 + self._fep = subproc.Popen(fep_cmd, bufsize=-1, stdout=subprocess.PIPE) 4361 + self._input = self._fep.stdout 4362 + if self._args.dry_run or self._args.debug: 4363 + self._fe_orig = os.path.join(self.results_tmp_dir(), 4364 + b'fast-export.original') 4365 + output = open(self._fe_orig, 'bw') 4366 + self._input = InputFileBackup(self._input, output) 4367 + if self._args.debug: 4368 + tmp = [decode(x) if isinstance(x, bytes) else x for x in fep_cmd] 4369 + print("[DEBUG] Running: {}".format(' '.join(tmp))) 4370 + print(" (saving a copy of the output at {})" 4371 + .format(decode(self._fe_orig))) 4372 + 4373 + def _setup_output(self): 4374 + if not self._args.dry_run: 4375 + location = ['-C', self._args.target] if self._args.target else [] 4376 + fip_cmd = ['git'] + location + ['-c', 'core.ignorecase=false', 4377 + 'fast-import', '--force', '--quiet'] 4378 + if date_format_permissive: 4379 + fip_cmd.append('--date-format=raw-permissive') 4380 + if self._args.state_branch: 4381 + target_marks_file = self._load_marks_file(b'target-marks') 4382 + fip_cmd.extend([b'--export-marks='+target_marks_file, 4383 + b'--import-marks='+target_marks_file]) 4384 + self._fip = subproc.Popen(fip_cmd, bufsize=-1, 4385 + stdin=subprocess.PIPE, stdout=subprocess.PIPE) 4386 + self._import_pipes = (self._fip.stdin, self._fip.stdout) 4387 + if self._args.dry_run or self._args.debug: 4388 + self._fe_filt = os.path.join(self.results_tmp_dir(), 4389 + b'fast-export.filtered') 4390 + self._output = open(self._fe_filt, 'bw') 4391 + else: 4392 + self._output = self._fip.stdin 4393 + if self._args.debug and not self._args.dry_run: 4394 + self._output = DualFileWriter(self._fip.stdin, self._output) 4395 + tmp = [decode(x) if isinstance(x, bytes) else x for x in fip_cmd] 4396 + print("[DEBUG] Running: {}".format(' '.join(tmp))) 4397 + print(" (using the following file as input: {})" 4398 + .format(decode(self._fe_filt))) 4399 + 4400 + def _migrate_origin_to_heads(self): 4401 + source_working_dir = self._args.source or b'.' 4402 + target_working_dir = self._args.target or b'.' 4403 + refs_to_migrate = set(x for x in self._orig_refs 4404 + if x.startswith(b'refs/remotes/origin/')) 4405 + refs_to_warn_about = set() 4406 + if refs_to_migrate: 4407 + if self._args.debug: 4408 + print("[DEBUG] Migrating refs/remotes/origin/* -> refs/heads/*") 4409 + p = subproc.Popen('git update-ref --no-deref --stdin'.split(), 4410 + stdin=subprocess.PIPE, cwd=source_working_dir) 4411 + for ref in refs_to_migrate: 4412 + if ref == b'refs/remotes/origin/HEAD': 4413 + p.stdin.write(b'delete %s %s\n' % (ref, self._orig_refs[ref])) 4414 + del self._orig_refs[ref] 4415 + continue 4416 + newref = ref.replace(b'refs/remotes/origin/', b'refs/heads/') 4417 + if newref not in self._orig_refs: 4418 + p.stdin.write(b'create %s %s\n' % (newref, self._orig_refs[ref])) 4419 + self._orig_refs[newref] = self._orig_refs[ref] 4420 + elif self._orig_refs[ref] != self._orig_refs[newref]: 4421 + refs_to_warn_about.add(newref) 4422 + p.stdin.write(b'delete %s %s\n' % (ref, self._orig_refs[ref])) 4423 + del self._orig_refs[ref] 4424 + p.stdin.close() 4425 + if p.wait(): # pragma: no cover 4426 + msg = _("git update-ref failed; see above") 4427 + raise SystemExit(msg) 4428 + 4429 + if b'remote.origin.url' not in self._config_settings: 4430 + return 4431 + 4432 + # For sensitive data removals, fetch ALL refs. Non-mirror clones normally 4433 + # only grab branches and tags, but other refs may hold on to the sensitive 4434 + # data as well. 4435 + if self._args.sensitive_data_removal and \ 4436 + not self._args.no_fetch and \ 4437 + not self._already_ran and \ 4438 + self._config_settings.get(b'remote.origin.mirror', b'false') != b'true': 4439 + 4440 + if refs_to_warn_about: 4441 + msg = ("Warning: You have refs modified from upstream:\n " + 4442 + "\n ".join([decode(x) for x in refs_to_warn_about]) + 4443 + "\n" + 4444 + " We want to forcibly fetch from upstream to ensure\n" + 4445 + " that all relevent refs are rewritten, but this will\n" + 4446 + " discard your local changes before starting the\n" + 4447 + " rewrite. Proceed with fetch (Y/N)?") 4448 + response = input(msg) 4449 + 4450 + if response.lower() != 'y': 4451 + self._args.no_fetch = True 4452 + # Don't do the fetch, and don't remove the origin remote 4453 + return 4454 + 4455 + cmd = 'git fetch -q --prune --update-head-ok --refmap "" origin +refs/*:refs/*' 4456 + m = _("NOTICE: Fetching all refs from origin to make sure we rewrite\n" 4457 + " all history that may reference the sensitive data, via\n" 4458 + " "+cmd) 4459 + print(m) 4460 + ret = subproc.call([arg if arg != '""' else '' for arg in cmd.split()], 4461 + cwd=source_working_dir) 4462 + if ret != 0: # pragma: no cover 4463 + m = _("Warning: Fetching all refs from origin failed") 4464 + print(m) 4465 + if self._args.sensitive_data_removal: 4466 + return 4467 + 4468 + # Now remove the origin remote 4469 + url = self._config_settings[b'remote.origin.url'].decode(errors='replace') 4470 + m = _("NOTICE: Removing 'origin' remote; see 'Why is my origin removed?'\n" 4471 + " in the manual if you want to push back there.\n" 4472 + " (was %s)") % url 4473 + print(m) 4474 + subproc.call('git remote rm origin'.split(), cwd=target_working_dir) 4475 + 4476 + def _final_commands(self): 4477 + self._finalize_handled = True 4478 + self._done_callback and self._done_callback() 4479 + 4480 + if self._file_info_value: 4481 + self._file_info_value.finalize() 4482 + if not self._args.quiet: 4483 + self._progress_writer.finish() 4484 + 4485 + def _ref_update(self, target_working_dir): 4486 + # Start the update-ref process 4487 + p = subproc.Popen('git update-ref --no-deref --stdin'.split(), 4488 + stdin=subprocess.PIPE, 4489 + cwd=target_working_dir) 4490 + 4491 + # Remove replace_refs from _orig_refs 4492 + replace_refs = {k:v for k, v in self._orig_refs.items() 4493 + if k.startswith(b'refs/replace/')} 4494 + reverse_replace_refs = collections.defaultdict(list) 4495 + for k,v in replace_refs.items(): 4496 + reverse_replace_refs[v].append(k) 4497 + all(map(self._orig_refs.pop, replace_refs)) 4498 + 4499 + # Remove unused refs 4500 + exported_refs, imported_refs = self.get_exported_and_imported_refs() 4501 + refs_to_nuke = exported_refs - imported_refs 4502 + # Because revisions can be passed to fast-export which handles them as 4503 + # though they were refs, we might have bad "refs" to nuke; strip them out. 4504 + refs_to_nuke = [x for x in refs_to_nuke 4505 + if x.startswith(b'refs/') or x == b'HEAD'] 4506 + if self._args.partial: 4507 + refs_to_nuke = set() 4508 + if refs_to_nuke and self._args.debug: 4509 + print("[DEBUG] Deleting the following refs:\n "+ 4510 + decode(b"\n ".join(sorted(refs_to_nuke)))) 4511 + p.stdin.write(b''.join([b"delete %s\n" % x 4512 + for x in refs_to_nuke])) 4513 + 4514 + # Delete or update and add replace_refs; note that fast-export automatically 4515 + # handles 'update-no-add', we only need to take action for the other four 4516 + # choices for replace_refs. 4517 + self._flush_renames() 4518 + actual_renames = {k:v for k,v in self._commit_renames.items() if k != v} 4519 + if self._args.replace_refs in ['delete-no-add', 'delete-and-add']: 4520 + # Delete old replace refs, if unwanted 4521 + replace_refs_to_nuke = set(replace_refs) 4522 + if self._args.replace_refs == 'delete-and-add': 4523 + # git-update-ref won't allow us to update a ref twice, so be careful 4524 + # to avoid deleting refs we'll later update 4525 + replace_refs_to_nuke = replace_refs_to_nuke.difference( 4526 + [b'refs/replace/'+x for x in actual_renames]) 4527 + p.stdin.write(b''.join([b"delete %s\n" % x 4528 + for x in replace_refs_to_nuke])) 4529 + if self._args.replace_refs in ['delete-and-add', 'update-or-add', 4530 + 'update-and-add']: 4531 + # Add new replace refs 4532 + update_only = (self._args.replace_refs == 'update-or-add') 4533 + p.stdin.write(b''.join([b"update refs/replace/%s %s\n" % (old, new) 4534 + for old,new in actual_renames.items() 4535 + if new and not (update_only and 4536 + old in reverse_replace_refs)])) 4537 + 4538 + # Complete the update-ref process 4539 + p.stdin.close() 4540 + if p.wait(): 4541 + raise SystemExit(_("git update-ref failed; see above")) # pragma: no cover 4542 + 4543 + def _remap_to(self, oldish_hash): 4544 + ''' 4545 + Given an oldish_hash (from the beginning of the current run), return: 4546 + IF oldish_hash is NOT pruned: 4547 + the hash of the rewrite of oldish_hash 4548 + otherwise: 4549 + the hash of the rewrite of the first unpruned ancestor of oldish_hash 4550 + ''' 4551 + old_id = self._orig_graph._hash_to_id[oldish_hash] 4552 + new_id = _IDS.translate(old_id) 4553 + new_hash = self._graph.git_hash[new_id] if new_id else deleted_hash 4554 + return new_hash 4555 + 4556 + def _compute_metadata(self, metadata_dir, orig_refs): 4557 + # 4558 + # First, handle commit_renames 4559 + # 4560 + old_commit_renames = dict() 4561 + if not self._already_ran: 4562 + commit_renames = {old: new 4563 + for old, new in self._commit_renames.items() 4564 + } 4565 + else: 4566 + # Read commit-map into old_commit_renames 4567 + with open(os.path.join(metadata_dir, b'commit-map'), 'br') as f: 4568 + f.readline() # Skip the header line 4569 + for line in f: 4570 + (old,new) = line.split() 4571 + old_commit_renames[old] = new 4572 + # Use A->B mappings in old_commit_renames, and B->C mappings in 4573 + # self._commit_renames to yield A->C mappings in commit_renames 4574 + commit_renames = {old: self._commit_renames.get(newish, newish) 4575 + for old, newish in old_commit_renames.items()} 4576 + # If there are any B->C mappings in self._commit_renames for which 4577 + # there was no A->B mapping in old_commit_renames, then add the 4578 + # B->C mapping to commit_renames too. 4579 + seen = set(old_commit_renames.values()) 4580 + commit_renames.update({old: new 4581 + for old, new in self._commit_renames.items() 4582 + if old not in seen}) 4583 + 4584 + # 4585 + # Second, handle ref_maps 4586 + # 4587 + exported_refs, imported_refs = self.get_exported_and_imported_refs() 4588 + 4589 + old_commit_unrenames = dict() 4590 + if not self._already_ran: 4591 + old_ref_map = dict((refname, (old_hash, deleted_hash)) 4592 + for refname, old_hash in orig_refs.items() 4593 + if refname in exported_refs) 4594 + else: 4595 + # old_commit_renames talk about how commits were renamed in the original 4596 + # run. Let's reverse it to find out how to get from the intermediate 4597 + # commit name, back to the original. Because everything in orig_refs 4598 + # right now refers to the intermediate commits after the first run(s), 4599 + # and we need to map them back to what they were before any changes. 4600 + old_commit_unrenames = dict((v,k) for (k,v) in old_commit_renames.items()) 4601 + 4602 + old_ref_map = {} 4603 + # Populate old_ref_map from the 'ref-map' file 4604 + with open(os.path.join(metadata_dir, b'ref-map'), 'br') as f: 4605 + f.readline() # Skip the header line 4606 + for line in f: 4607 + (old,intermediate,ref) = line.split() 4608 + old_ref_map[ref] = (old, intermediate) 4609 + # Append to old_ref_map items from orig_refs that were exported, but 4610 + # get the actual original commit name 4611 + for refname, old_hash in orig_refs.items(): 4612 + if refname in old_ref_map: 4613 + continue 4614 + if refname not in exported_refs: 4615 + continue 4616 + # Compute older_hash 4617 + original_hash = old_commit_unrenames.get(old_hash, old_hash) 4618 + old_ref_map[refname] = (original_hash, deleted_hash) 4619 + 4620 + new_refs = {} 4621 + new_refs_initialized = False 4622 + ref_maps = {} 4623 + self._orig_graph._ensure_reverse_maps_populated() 4624 + for refname, pair in old_ref_map.items(): 4625 + old_hash, hash_ref_becomes_if_not_imported_in_this_run = pair 4626 + if refname not in imported_refs: 4627 + new_hash = hash_ref_becomes_if_not_imported_in_this_run 4628 + elif old_hash in commit_renames: 4629 + intermediate = old_commit_renames.get(old_hash,old_hash) 4630 + if intermediate in self._commit_renames: 4631 + new_hash = self._remap_to(intermediate) 4632 + else: 4633 + new_hash = intermediate 4634 + else: # Must be either an annotated tag, or a ref whose tip was pruned 4635 + if not new_refs_initialized: 4636 + target_working_dir = self._args.target or b'.' 4637 + new_refs = GitUtils.get_refs(target_working_dir) 4638 + new_refs_initialized = True 4639 + if refname in new_refs: 4640 + new_hash = new_refs[refname] 4641 + else: 4642 + new_hash = deleted_hash 4643 + ref_maps[refname] = (old_hash, new_hash) 4644 + if self._args.source or self._args.target: 4645 + if not new_refs_initialized: 4646 + target_working_dir = self._args.target or b'.' 4647 + new_refs = GitUtils.get_refs(target_working_dir) 4648 + new_refs_initialized = True 4649 + for ref, new_hash in new_refs.items(): 4650 + if ref not in orig_refs and not ref.startswith(b'refs/replace/'): 4651 + old_hash = b'0'*len(new_hash) 4652 + ref_maps[ref] = (old_hash, new_hash) 4653 + 4654 + # 4655 + # Third, handle first_changes 4656 + # 4657 + 4658 + old_first_changes = dict() 4659 + if self._already_ran: 4660 + # Read first_changes into old_first_changes 4661 + with open(os.path.join(metadata_dir, b'first-changed-commits'), 'br') as f: 4662 + for line in f: 4663 + changed_commit, undeleted_self_or_ancestor = line.strip().split() 4664 + old_first_changes[changed_commit] = undeleted_self_or_ancestor 4665 + # We need to find the commits that were modified whose parents were not. 4666 + # To be able to find parents, we need the commit names as of the beginning 4667 + # of this run, and then when we are done, we need to map them back to the 4668 + # name of the commits from before any git-filter-repo runs. 4669 + # 4670 + # We are excluding here any commits deleted in previous git-filter-repo 4671 + # runs 4672 + undo_old_commit_renames = dict((v,k) for (k,v) in old_commit_renames.items() 4673 + if v != deleted_hash) 4674 + # Get a list of all commits that were changed, as of the beginning of 4675 + # this latest run. 4676 + changed_commits = {new 4677 + for (old,new) in old_commit_renames.items() 4678 + if old != new and new != deleted_hash} | \ 4679 + {old 4680 + for (old,new) in self._commit_renames.items() 4681 + if old != new} 4682 + special_changed_commits = {old 4683 + for (old,new) in old_commit_renames.items() 4684 + if new == deleted_hash} 4685 + first_changes = dict() 4686 + for (old,new) in self._commit_renames.items(): 4687 + if old == new: 4688 + # old wasn't modified, can't be first change if not even a change 4689 + continue 4690 + if old_commit_unrenames.get(old,old) != old: 4691 + # old was already modified in previous run; while it might represent 4692 + # something that is still a first change, we'll handle that as we 4693 + # loop over old_first_changes below 4694 + continue 4695 + if any(parent in changed_commits 4696 + for parent in self._orig_graph.get_parent_hashes(old)): 4697 + # a parent of old was modified, so old is not a first change 4698 + continue 4699 + # At this point, old IS a first change. We need to find out what new 4700 + # commit it maps to, or if it doesn't map to one, what new commit was 4701 + # its most recent ancestor that wasn't pruned. 4702 + if new is None: 4703 + new = self._remap_to(old) 4704 + first_changes[old] = (new if new is not None else deleted_hash) 4705 + for (old,undeleted_self_or_ancestor) in old_first_changes.items(): 4706 + if undeleted_self_or_ancestor == deleted_hash: 4707 + # old represents a commit that was pruned and whose entire ancestry 4708 + # was pruned. So, old is still a first change 4709 + first_changes[old] = undeleted_self_or_ancestor 4710 + continue 4711 + intermediate = old_commit_renames.get(old, old) 4712 + usoa = undeleted_self_or_ancestor 4713 + new_ancestor = self._commit_renames.get(usoa, usoa) 4714 + if intermediate == deleted_hash: 4715 + # old was pruned in previous rewrite 4716 + if usoa != new_ancestor: 4717 + # old's ancestor got rewritten in this filtering run; we can drop 4718 + # this one from first_changes. 4719 + continue 4720 + # Getting here means old was a first change and old was pruned in a 4721 + # previous run, and its ancestors that survived were non rewritten in 4722 + # this run, so old remains a first change 4723 + first_changes[old] = new_ancestor # or usoa, since new_ancestor == usoa 4724 + continue 4725 + assert(usoa == intermediate) # old wasn't pruned => usoa == intermediate 4726 + 4727 + # Check whether parents of intermediate were rewritten. Note that 4728 + # intermediate in self._commit_renames only means that intermediate was 4729 + # processed by the latest filtering (not necessarily that it changed), 4730 + # but we need to know that before we can check for parent hashes having 4731 + # changed. 4732 + if intermediate not in self._commit_renames: 4733 + # This commit was not processed by this run, so it remains a first 4734 + # change 4735 + first_changes[old] = usoa 4736 + continue 4737 + if any(parent in changed_commits 4738 + for parent in self._orig_graph.get_parent_hashes(intermediate)): 4739 + # An ancestor was modified by this run, so it is no longer a first 4740 + # change; continue to the next one. 4741 + continue 4742 + # This change is a first_change; find the new commit its usoa maps to 4743 + new = self._remap_to(intermediate) 4744 + assert(new is not None) 4745 + first_changes[old] = new 4746 + 4747 + return commit_renames, ref_maps, first_changes 4748 + 4749 + def _handle_lfs_metadata(self, metadata_dir): 4750 + if self._lfs_object_tracker is None: 4751 + print("NOTE: LFS object orphaning not checked (LFS not in use)") 4752 + return 4753 + 4754 + if self._args.partial: 4755 + target_working_dir = self._args.target or b'.' 4756 + source = False 4757 + self._lfs_object_tracker.find_all_lfs_objects_in_repo(target_working_dir, 4758 + source) 4759 + 4760 + with open(os.path.join(metadata_dir, b'original_lfs_objects'), 'bw') as f: 4761 + for obj in sorted(self._lfs_object_tracker.source_objects.objects): 4762 + f.write(obj+b"\n") 4763 + 4764 + orphaned_lfs_path = os.path.join(metadata_dir, b'orphaned_lfs_objects') 4765 + msg = textwrap.dedent(_(f"""\ 4766 + NOTE: There were LFS Objects Orphaned by this rewrite recorded in 4767 + {decode(orphaned_lfs_path)}.""")) 4768 + with open(orphaned_lfs_path, 'bw') as f: 4769 + differences = self._lfs_object_tracker.source_objects.objects - \ 4770 + self._lfs_object_tracker.target_objects.objects 4771 + for obj in sorted(differences): 4772 + f.write(obj+b"\n") 4773 + if differences: 4774 + self._lfs_object_tracker.objects_orphaned = True 4775 + print(msg) 4776 + 4777 + def _record_metadata(self, metadata_dir, orig_refs): 4778 + self._flush_renames() 4779 + commit_renames, ref_maps, first_changes = \ 4780 + self._compute_metadata(metadata_dir, orig_refs) 4781 + 4782 + if self._args.sensitive_data_removal: 4783 + changed_commits = sum(k!=v for (k,v) in commit_renames.items()) 4784 + print(f"You rewrote {changed_commits} (of {len(commit_renames)}) commits.") 4785 + print("") # Add a blank line before important rewrite information 4786 + print(f"NOTE: First Changed Commit(s) is/are:\n " 4787 + + decode(b"\n ".join(x for x in first_changes))) 4788 + 4789 + with open(os.path.join(metadata_dir, b'sensitive_data_removal'), 'bw') as f: 4790 + pass # Write nothing; we only need the file created 4791 + 4792 + self._handle_lfs_metadata(metadata_dir) 4793 + print("") # Add a blank line after important rewrite information 4794 + 4795 + with open(os.path.join(metadata_dir, b'commit-map'), 'bw') as f: 4796 + f.write(("%-40s %s\n" % (_("old"), _("new"))).encode()) 4797 + for (old,new) in sorted(commit_renames.items()): 4798 + msg = b'%s %s\n' % (old, new if new != None else deleted_hash) 4799 + f.write(msg) 4800 + 4801 + with open(os.path.join(metadata_dir, b'ref-map'), 'bw') as f: 4802 + f.write(("%-40s %-40s %s\n" % (_("old"), _("new"), _("ref"))).encode()) 4803 + for refname, hash_pair in sorted(ref_maps.items()): 4804 + (old_hash, new_hash) = hash_pair 4805 + f.write(b'%s %s %s\n' % (old_hash, new_hash, refname)) 4806 + if old_hash != new_hash: 4807 + self._changed_refs.add(refname) 4808 + 4809 + with open(os.path.join(metadata_dir, b'changed-refs'), 'bw') as f: 4810 + for refname in sorted(self._changed_refs): 4811 + f.write(b'%s\n' % refname) 4812 + 4813 + with open(os.path.join(metadata_dir, b'first-changed-commits'), 'bw') as f: 4814 + for commit, undeleted_self_or_ancestor in sorted(first_changes.items()): 4815 + f.write(b'%s %s\n' % (commit, undeleted_self_or_ancestor)) 4816 + 4817 + with open(os.path.join(metadata_dir, b'suboptimal-issues'), 'bw') as f: 4818 + issues_found = False 4819 + if self._commits_no_longer_merges: 4820 + issues_found = True 4821 + 4822 + f.write(textwrap.dedent(_(''' 4823 + The following commits used to be merge commits but due to filtering 4824 + are now regular commits; they likely have suboptimal commit messages 4825 + (e.g. "Merge branch next into master"). Original commit hash on the 4826 + left, commit hash after filtering/rewriting on the right: 4827 + ''')[1:]).encode()) 4828 + for oldhash, newhash in self._commits_no_longer_merges: 4829 + f.write(' {} {}\n'.format(oldhash, newhash).encode()) 4830 + f.write(b'\n') 4831 + 4832 + if self._commits_referenced_but_removed: 4833 + issues_found = True 4834 + f.write(textwrap.dedent(_(''' 4835 + The following commits were filtered out, but referenced in another 4836 + commit message. The reference to the now-nonexistent commit hash 4837 + (or a substring thereof) was left as-is in any commit messages: 4838 + ''')[1:]).encode()) 4839 + for bad_commit_reference in self._commits_referenced_but_removed: 4840 + f.write(' {}\n'.format(bad_commit_reference).encode()) 4841 + f.write(b'\n') 4842 + 4843 + if not issues_found: 4844 + f.write(_("No filtering problems encountered.\n").encode()) 4845 + 4846 + with open(os.path.join(metadata_dir, b'already_ran'), 'bw') as f: 4847 + f.write(_("This file exists to allow you to filter again without --force,\n" 4848 + "and to specify that metadata files should be updated instead\n" 4849 + "of rewritten").encode()) 4850 + 4851 + def finish(self): 4852 + ''' Alternative to run() when there is no input of our own to parse, 4853 + meaning that run only really needs to close the handle to fast-import 4854 + and let it finish, thus making a call to "run" feel like a misnomer. ''' 4855 + assert not self._input 4856 + assert self._managed_output 4857 + self.run() 4858 + 4859 + def insert(self, obj, direct_insertion = False): 4860 + if not direct_insertion: 4861 + if type(obj) == Blob: 4862 + self._tweak_blob(obj) 4863 + elif type(obj) == Commit: 4864 + aux_info = {'orig_parents': obj.parents, 4865 + 'had_file_changes': bool(obj.file_changes)} 4866 + self._tweak_commit(obj, aux_info) 4867 + elif type(obj) == Reset: 4868 + self._tweak_reset(obj) 4869 + elif type(obj) == Tag: 4870 + self._tweak_tag(obj) 4871 + self._insert_into_stream(obj) 4872 + 4873 + def _insert_into_stream(self, obj): 4874 + if not obj.dumped: 4875 + if self._lfs_object_tracker: 4876 + self._lfs_object_tracker.check_output_object(obj) 4877 + if self._parser: 4878 + self._parser.insert(obj) 4879 + else: 4880 + obj.dump(self._output) 4881 + 4882 + def get_exported_and_imported_refs(self): 4883 + return self._parser.get_exported_and_imported_refs() 4884 + 4885 + def run(self): 4886 + start = time.time() 4887 + if not self._input and not self._output: 4888 + self._run_sanity_checks() 4889 + if not self._args.dry_run and not self._args.partial: 4890 + self._read_stash() 4891 + self._migrate_origin_to_heads() 4892 + self._setup_input(use_done_feature = True) 4893 + self._setup_output() 4894 + assert self._sanity_checks_handled 4895 + 4896 + if self._input: 4897 + # Create and run the filter 4898 + self._repo_working_dir = self._args.source or b'.' 4899 + self._parser = FastExportParser(blob_callback = self._tweak_blob, 4900 + commit_callback = self._tweak_commit, 4901 + tag_callback = self._tweak_tag, 4902 + reset_callback = self._tweak_reset, 4903 + done_callback = self._final_commands) 4904 + self._setup_lfs_orphaning_checks() 4905 + self._parser.run(self._input, self._output) 4906 + if not self._finalize_handled: 4907 + self._final_commands() 4908 + 4909 + # Make sure fast-export completed successfully 4910 + if not self._args.stdin and self._fep.wait(): 4911 + raise SystemExit(_("Error: fast-export failed; see above.")) # pragma: no cover 4912 + self._input.close() 4913 + 4914 + # If we're not the manager of self._output, we should avoid post-run cleanup 4915 + if not self._managed_output: 4916 + return 4917 + 4918 + # Close the output and ensure fast-import successfully completes 4919 + self._output.close() 4920 + if not self._args.dry_run and self._fip.wait(): 4921 + raise SystemExit(_("Error: fast-import failed; see above.")) # pragma: no cover 4922 + 4923 + # With fast-export and fast-import complete, update state if requested 4924 + if self._args.state_branch: 4925 + self._save_marks_files() 4926 + 4927 + # Notify user how long it took, before doing a gc and such 4928 + msg = "New history written in {:.2f} seconds..." 4929 + if self._args.repack: 4930 + msg = "New history written in {:.2f} seconds; now repacking/cleaning..." 4931 + print(msg.format(time.time()-start)) 4932 + 4933 + # Exit early, if requested 4934 + if self._args.dry_run: 4935 + print(_("NOTE: Not running fast-import or cleaning up; --dry-run passed.")) 4936 + if self._fe_orig: 4937 + print(_(" Requested filtering can be seen by comparing:")) 4938 + print(" " + decode(self._fe_orig)) 4939 + else: 4940 + print(_(" Requested filtering can be seen at:")) 4941 + print(" " + decode(self._fe_filt)) 4942 + return 4943 + 4944 + target_working_dir = self._args.target or b'.' 4945 + if self._input: 4946 + self._ref_update(target_working_dir) 4947 + 4948 + # Write out data about run 4949 + self._record_metadata(self.results_tmp_dir(), self._orig_refs) 4950 + 4951 + # Final cleanup: 4952 + # If we need a repack, then nuke the reflogs and repack. 4953 + # If we need a reset, do a reset --hard 4954 + reset = not GitUtils.is_repository_bare(target_working_dir) 4955 + self.cleanup(target_working_dir, self._args.repack, reset, 4956 + run_quietly=self._args.quiet, 4957 + show_debuginfo=self._args.debug) 4958 + 4959 + # Let user know how long it took 4960 + print(_("Completely finished after {:.2f} seconds.") 4961 + .format(time.time()-start)) 4962 + 4963 + # Give post-rewrite instructions for cleaning up other copies for SDR 4964 + if self._args.sensitive_data_removal: 4965 + lfs_note = "" 4966 + if self._lfs_object_tracker and \ 4967 + self._lfs_object_tracker.objects_orphaned == True: 4968 + lfs_note = _(" and LFS Objects Orphaned") 4969 + push_command = "git push --force --mirror origin" 4970 + if self._args.no_fetch: 4971 + if self._args.partial: 4972 + push_command = "git push --force origin " + \ 4973 + " ".join(sorted([decode(x) for x in self._changed_refs])) 4974 + else: 4975 + push_command = "git push --all --tags origin" 4976 + print("") 4977 + print(sdr_next_steps % (push_command, lfs_note, lfs_note)) 4978 + 4979 + def main(): 4980 + setup_gettext() 4981 + args = FilteringOptions.parse_args(sys.argv[1:]) 4982 + if args.analyze: 4983 + RepoAnalyze.run(args) 4984 + else: 4985 + filter = RepoFilter(args) 4986 + filter.run() 4987 + 4988 + if __name__ == '__main__': 4989 + main()
+652
lib/audit.ml
··· 1 + (** Structured audit logging for unpac operations. *) 2 + 3 + let src = Logs.Src.create "unpac.audit" ~doc:"Audit logging" 4 + module Log = (val Logs.src_log src : Logs.LOG) 5 + 6 + (* Git operation types *) 7 + 8 + type git_result = { 9 + exit_code : int; 10 + stdout : string; 11 + stderr : string; 12 + } 13 + 14 + type git_operation = { 15 + git_id : string; 16 + git_timestamp : float; 17 + git_cmd : string list; 18 + git_cwd : string; 19 + git_duration_ms : int; 20 + git_result : git_result; 21 + } 22 + 23 + (* Unpac operation types *) 24 + 25 + type status = 26 + | Success 27 + | Failed of string 28 + | Conflict of string list 29 + 30 + type operation_type = 31 + | Init 32 + | Project_new 33 + | Project_promote 34 + | Project_set_remote 35 + | Opam_add 36 + | Opam_init 37 + | Opam_promote 38 + | Opam_update 39 + | Opam_merge 40 + | Opam_edit 41 + | Opam_done 42 + | Opam_remove 43 + | Git_add 44 + | Git_update 45 + | Git_merge 46 + | Git_remove 47 + | Push 48 + | Unknown of string 49 + 50 + type operation = { 51 + id : string; 52 + timestamp : float; 53 + operation_type : operation_type; 54 + args : string list; 55 + cwd : string; 56 + duration_ms : int; 57 + status : status; 58 + git_operations : git_operation list; 59 + } 60 + 61 + type log = { 62 + version : string; 63 + entries : operation list; 64 + } 65 + 66 + let current_version = "1.0" 67 + 68 + (* UUID generation - simple random hex *) 69 + let () = Random.self_init () 70 + 71 + let generate_id () = 72 + let buf = Buffer.create 32 in 73 + for _ = 1 to 8 do 74 + Buffer.add_string buf (Printf.sprintf "%04x" (Random.int 0x10000)) 75 + done; 76 + let s = Buffer.contents buf in 77 + (* Format as UUID: 8-4-4-4-12 *) 78 + Printf.sprintf "%s-%s-%s-%s-%s" 79 + (String.sub s 0 8) 80 + (String.sub s 8 4) 81 + (String.sub s 12 4) 82 + (String.sub s 16 4) 83 + (String.sub s 20 12) 84 + 85 + (* JSON codecs *) 86 + 87 + let git_result_jsont = 88 + Jsont.Object.map 89 + ~kind:"git_result" 90 + (fun exit_code stdout stderr -> { exit_code; stdout; stderr }) 91 + |> Jsont.Object.mem "exit_code" Jsont.int ~enc:(fun r -> r.exit_code) 92 + |> Jsont.Object.mem "stdout" Jsont.string ~enc:(fun r -> r.stdout) 93 + |> Jsont.Object.mem "stderr" Jsont.string ~enc:(fun r -> r.stderr) 94 + |> Jsont.Object.finish 95 + 96 + let git_operation_jsont = 97 + Jsont.Object.map 98 + ~kind:"git_operation" 99 + (fun git_id git_timestamp git_cmd git_cwd git_duration_ms git_result -> 100 + { git_id; git_timestamp; git_cmd; git_cwd; git_duration_ms; git_result }) 101 + |> Jsont.Object.mem "id" Jsont.string ~enc:(fun g -> g.git_id) 102 + |> Jsont.Object.mem "timestamp" Jsont.number ~enc:(fun g -> g.git_timestamp) 103 + |> Jsont.Object.mem "cmd" (Jsont.list Jsont.string) ~enc:(fun g -> g.git_cmd) 104 + |> Jsont.Object.mem "cwd" Jsont.string ~enc:(fun g -> g.git_cwd) 105 + |> Jsont.Object.mem "duration_ms" Jsont.int ~enc:(fun g -> g.git_duration_ms) 106 + |> Jsont.Object.mem "result" git_result_jsont ~enc:(fun g -> g.git_result) 107 + |> Jsont.Object.finish 108 + 109 + let status_jsont = 110 + (* Encode status as a simple object with status field and optional data *) 111 + Jsont.Object.map ~kind:"status" 112 + (fun status data_opt -> 113 + match status, data_opt with 114 + | "success", _ -> Success 115 + | "failed", Some msg -> Failed msg 116 + | "conflict", Some files_str -> 117 + Conflict (String.split_on_char ',' files_str) 118 + | s, _ -> Failed (Printf.sprintf "Unknown status: %s" s)) 119 + |> Jsont.Object.mem "status" Jsont.string 120 + ~enc:(function 121 + | Success -> "success" 122 + | Failed _ -> "failed" 123 + | Conflict _ -> "conflict") 124 + |> Jsont.Object.opt_mem "data" Jsont.string 125 + ~enc:(function 126 + | Success -> None 127 + | Failed msg -> Some msg 128 + | Conflict files -> Some (String.concat "," files)) 129 + |> Jsont.Object.finish 130 + 131 + let operation_type_to_string = function 132 + | Init -> "init" 133 + | Project_new -> "project.new" 134 + | Project_promote -> "project.promote" 135 + | Project_set_remote -> "project.set-remote" 136 + | Opam_add -> "opam.add" 137 + | Opam_init -> "opam.init" 138 + | Opam_promote -> "opam.promote" 139 + | Opam_update -> "opam.update" 140 + | Opam_merge -> "opam.merge" 141 + | Opam_edit -> "opam.edit" 142 + | Opam_done -> "opam.done" 143 + | Opam_remove -> "opam.remove" 144 + | Git_add -> "git.add" 145 + | Git_update -> "git.update" 146 + | Git_merge -> "git.merge" 147 + | Git_remove -> "git.remove" 148 + | Push -> "push" 149 + | Unknown s -> s 150 + 151 + let operation_type_of_string = function 152 + | "init" -> Init 153 + | "project.new" -> Project_new 154 + | "project.promote" -> Project_promote 155 + | "project.set-remote" -> Project_set_remote 156 + | "opam.add" -> Opam_add 157 + | "opam.init" -> Opam_init 158 + | "opam.promote" -> Opam_promote 159 + | "opam.update" -> Opam_update 160 + | "opam.merge" -> Opam_merge 161 + | "opam.edit" -> Opam_edit 162 + | "opam.done" -> Opam_done 163 + | "opam.remove" -> Opam_remove 164 + | "git.add" -> Git_add 165 + | "git.update" -> Git_update 166 + | "git.merge" -> Git_merge 167 + | "git.remove" -> Git_remove 168 + | "push" -> Push 169 + | s -> Unknown s 170 + 171 + let operation_type_jsont = 172 + Jsont.string 173 + |> Jsont.map ~dec:operation_type_of_string ~enc:operation_type_to_string 174 + 175 + let operation_jsont = 176 + Jsont.Object.map 177 + ~kind:"operation" 178 + (fun id timestamp operation_type args cwd duration_ms status git_operations -> 179 + { id; timestamp; operation_type; args; cwd; duration_ms; status; git_operations }) 180 + |> Jsont.Object.mem "id" Jsont.string ~enc:(fun o -> o.id) 181 + |> Jsont.Object.mem "timestamp" Jsont.number ~enc:(fun o -> o.timestamp) 182 + |> Jsont.Object.mem "operation" operation_type_jsont ~enc:(fun o -> o.operation_type) 183 + |> Jsont.Object.mem "args" (Jsont.list Jsont.string) ~enc:(fun o -> o.args) 184 + |> Jsont.Object.mem "cwd" Jsont.string ~enc:(fun o -> o.cwd) 185 + |> Jsont.Object.mem "duration_ms" Jsont.int ~enc:(fun o -> o.duration_ms) 186 + |> Jsont.Object.mem "status" status_jsont ~enc:(fun o -> o.status) 187 + |> Jsont.Object.mem "git_operations" (Jsont.list git_operation_jsont) 188 + ~enc:(fun o -> o.git_operations) 189 + |> Jsont.Object.finish 190 + 191 + let log_jsont = 192 + Jsont.Object.map 193 + ~kind:"audit_log" 194 + (fun version entries -> { version; entries }) 195 + |> Jsont.Object.mem "version" Jsont.string ~enc:(fun l -> l.version) 196 + |> Jsont.Object.mem "entries" (Jsont.list operation_jsont) ~enc:(fun l -> l.entries) 197 + |> Jsont.Object.finish 198 + 199 + (* Context for accumulating git operations *) 200 + 201 + type context = { 202 + ctx_id : string; 203 + ctx_operation_type : operation_type; 204 + ctx_args : string list; 205 + ctx_cwd : string; 206 + ctx_start : float; 207 + mutable ctx_git_ops : git_operation list; 208 + } 209 + 210 + let start_operation ~operation_type ~args ~cwd = 211 + let ctx = { 212 + ctx_id = generate_id (); 213 + ctx_operation_type = operation_type; 214 + ctx_args = args; 215 + ctx_cwd = cwd; 216 + ctx_start = Unix.gettimeofday (); 217 + ctx_git_ops = []; 218 + } in 219 + Log.debug (fun m -> m "Starting operation %s: %s %a" 220 + ctx.ctx_id (operation_type_to_string operation_type) 221 + Fmt.(list ~sep:sp string) args); 222 + ctx 223 + 224 + let record_git ctx ~cmd ~cwd ~started ~result = 225 + let now = Unix.gettimeofday () in 226 + let duration_ms = int_of_float ((now -. started) *. 1000.0) in 227 + let op = { 228 + git_id = generate_id (); 229 + git_timestamp = started; 230 + git_cmd = cmd; 231 + git_cwd = cwd; 232 + git_duration_ms = duration_ms; 233 + git_result = result; 234 + } in 235 + ctx.ctx_git_ops <- op :: ctx.ctx_git_ops; 236 + Log.debug (fun m -> m "Recorded git: %a (exit %d, %dms)" 237 + Fmt.(list ~sep:sp string) cmd result.exit_code duration_ms) 238 + 239 + let finalize_operation ctx status = 240 + let now = Unix.gettimeofday () in 241 + let duration_ms = int_of_float ((now -. ctx.ctx_start) *. 1000.0) in 242 + let op = { 243 + id = ctx.ctx_id; 244 + timestamp = ctx.ctx_start; 245 + operation_type = ctx.ctx_operation_type; 246 + args = ctx.ctx_args; 247 + cwd = ctx.ctx_cwd; 248 + duration_ms; 249 + status; 250 + git_operations = List.rev ctx.ctx_git_ops; 251 + } in 252 + Log.info (fun m -> m "Completed operation %s in %dms" ctx.ctx_id duration_ms); 253 + op 254 + 255 + let complete_success ctx = finalize_operation ctx Success 256 + 257 + let complete_failed ctx ~error = 258 + Log.warn (fun m -> m "Operation %s failed: %s" ctx.ctx_id error); 259 + finalize_operation ctx (Failed error) 260 + 261 + let complete_conflict ctx ~files = 262 + Log.warn (fun m -> m "Operation %s had conflicts in %d files" ctx.ctx_id (List.length files)); 263 + finalize_operation ctx (Conflict files) 264 + 265 + (* Log file management *) 266 + 267 + let default_log_file = ".unpac-audit.json" 268 + 269 + let load path = 270 + if not (Sys.file_exists path) then 271 + Ok { version = current_version; entries = [] } 272 + else 273 + try 274 + let ic = open_in path in 275 + let content = really_input_string ic (in_channel_length ic) in 276 + close_in ic; 277 + match Jsont_bytesrw.decode_string' log_jsont content with 278 + | Ok log -> Ok log 279 + | Error e -> Error (Printf.sprintf "Parse error: %s" (Jsont.Error.to_string e)) 280 + with 281 + | Sys_error msg -> Error msg 282 + 283 + let save path log = 284 + try 285 + match Jsont_bytesrw.encode_string ~format:Jsont.Indent log_jsont log with 286 + | Ok content -> 287 + let oc = open_out path in 288 + output_string oc content; 289 + close_out oc; 290 + Ok () 291 + | Error e -> Error (Printf.sprintf "Encode error: %s" e) 292 + with 293 + | Sys_error msg -> Error msg 294 + 295 + let append path op = 296 + match load path with 297 + | Error e -> Error e 298 + | Ok log -> 299 + let log' = { log with entries = op :: log.entries } in 300 + save path log' 301 + 302 + (* Pretty printing *) 303 + 304 + let pp_status fmt = function 305 + | Success -> Format.fprintf fmt "@{<green>SUCCESS@}" 306 + | Failed msg -> Format.fprintf fmt "@{<red>FAILED@}: %s" msg 307 + | Conflict files -> 308 + Format.fprintf fmt "@{<yellow>CONFLICT@}: %a" 309 + Fmt.(list ~sep:comma string) files 310 + 311 + let pp_git_operation fmt op = 312 + let status_color = if op.git_result.exit_code = 0 then "green" else "red" in 313 + Format.fprintf fmt " @{<%s>[%d]@} git %a (%dms)@." 314 + status_color op.git_result.exit_code 315 + Fmt.(list ~sep:sp string) op.git_cmd 316 + op.git_duration_ms 317 + 318 + let pp_operation fmt op = 319 + let tm = Unix.localtime op.timestamp in 320 + Format.fprintf fmt "@[<v>"; 321 + Format.fprintf fmt "[%04d-%02d-%02d %02d:%02d:%02d] %s %a@." 322 + (tm.Unix.tm_year + 1900) (tm.Unix.tm_mon + 1) tm.Unix.tm_mday 323 + tm.Unix.tm_hour tm.Unix.tm_min tm.Unix.tm_sec 324 + (operation_type_to_string op.operation_type) 325 + Fmt.(list ~sep:sp string) op.args; 326 + Format.fprintf fmt " ID: %s | Duration: %dms@." op.id op.duration_ms; 327 + Format.fprintf fmt " Status: %a@." pp_status op.status; 328 + if op.git_operations <> [] then begin 329 + Format.fprintf fmt " Git operations (%d):@." (List.length op.git_operations); 330 + List.iter (pp_git_operation fmt) op.git_operations 331 + end; 332 + Format.fprintf fmt "@]" 333 + 334 + let pp_log fmt log = 335 + Format.fprintf fmt "@[<v>Unpac Audit Log (version %s)@." log.version; 336 + Format.fprintf fmt "Total operations: %d@.@." (List.length log.entries); 337 + List.iter (fun op -> 338 + pp_operation fmt op; 339 + Format.fprintf fmt "@." 340 + ) log.entries; 341 + Format.fprintf fmt "@]" 342 + 343 + (* HTML generation *) 344 + 345 + let html_escape s = 346 + let buf = Buffer.create (String.length s) in 347 + String.iter (function 348 + | '<' -> Buffer.add_string buf "&lt;" 349 + | '>' -> Buffer.add_string buf "&gt;" 350 + | '&' -> Buffer.add_string buf "&amp;" 351 + | '"' -> Buffer.add_string buf "&quot;" 352 + | c -> Buffer.add_char buf c 353 + ) s; 354 + Buffer.contents buf 355 + 356 + (* Commit audit log to git *) 357 + 358 + let commit_log ~proc_mgr ~main_wt ~log_path = 359 + (* Stage the audit log *) 360 + let rel_path = Filename.basename log_path in 361 + let started = Unix.gettimeofday () in 362 + let result = 363 + try 364 + (* Add the file *) 365 + Eio.Switch.run @@ fun sw -> 366 + let stdout_r, stdout_w = Eio.Process.pipe proc_mgr ~sw in 367 + let stderr_r, stderr_w = Eio.Process.pipe proc_mgr ~sw in 368 + let child = Eio.Process.spawn proc_mgr ~sw 369 + ~cwd:(main_wt :> Eio.Fs.dir_ty Eio.Path.t) 370 + ~stdout:stdout_w ~stderr:stderr_w 371 + ["git"; "add"; rel_path] 372 + in 373 + Eio.Flow.close stdout_w; 374 + Eio.Flow.close stderr_w; 375 + (* Drain outputs *) 376 + let stdout_buf = Buffer.create 64 in 377 + let stderr_buf = Buffer.create 64 in 378 + Eio.Fiber.both 379 + (fun () -> 380 + try 381 + while true do 382 + let chunk = Cstruct.create 1024 in 383 + let n = Eio.Flow.single_read stdout_r chunk in 384 + Buffer.add_string stdout_buf (Cstruct.to_string (Cstruct.sub chunk 0 n)) 385 + done 386 + with End_of_file -> ()) 387 + (fun () -> 388 + try 389 + while true do 390 + let chunk = Cstruct.create 1024 in 391 + let n = Eio.Flow.single_read stderr_r chunk in 392 + Buffer.add_string stderr_buf (Cstruct.to_string (Cstruct.sub chunk 0 n)) 393 + done 394 + with End_of_file -> ()); 395 + let status = Eio.Process.await child in 396 + match status with 397 + | `Exited 0 -> Ok () 398 + | `Exited code -> Error (Printf.sprintf "git add failed (exit %d): %s" code (Buffer.contents stderr_buf)) 399 + | `Signaled sig_ -> Error (Printf.sprintf "git add killed by signal %d" sig_) 400 + with exn -> Error (Printf.sprintf "Exception: %s" (Printexc.to_string exn)) 401 + in 402 + match result with 403 + | Error e -> 404 + Log.warn (fun m -> m "Failed to stage audit log: %s" e); 405 + Error e 406 + | Ok () -> 407 + (* Commit the file *) 408 + let result = 409 + try 410 + Eio.Switch.run @@ fun sw -> 411 + let stdout_r, stdout_w = Eio.Process.pipe proc_mgr ~sw in 412 + let stderr_r, stderr_w = Eio.Process.pipe proc_mgr ~sw in 413 + let child = Eio.Process.spawn proc_mgr ~sw 414 + ~cwd:(main_wt :> Eio.Fs.dir_ty Eio.Path.t) 415 + ~stdout:stdout_w ~stderr:stderr_w 416 + ["git"; "commit"; "-m"; "Update audit log"; "--no-verify"] 417 + in 418 + Eio.Flow.close stdout_w; 419 + Eio.Flow.close stderr_w; 420 + (* Drain outputs *) 421 + let stdout_buf = Buffer.create 64 in 422 + let stderr_buf = Buffer.create 64 in 423 + Eio.Fiber.both 424 + (fun () -> 425 + try 426 + while true do 427 + let chunk = Cstruct.create 1024 in 428 + let n = Eio.Flow.single_read stdout_r chunk in 429 + Buffer.add_string stdout_buf (Cstruct.to_string (Cstruct.sub chunk 0 n)) 430 + done 431 + with End_of_file -> ()) 432 + (fun () -> 433 + try 434 + while true do 435 + let chunk = Cstruct.create 1024 in 436 + let n = Eio.Flow.single_read stderr_r chunk in 437 + Buffer.add_string stderr_buf (Cstruct.to_string (Cstruct.sub chunk 0 n)) 438 + done 439 + with End_of_file -> ()); 440 + let status = Eio.Process.await child in 441 + match status with 442 + | `Exited 0 -> Ok () 443 + | `Exited 1 when String.length (Buffer.contents stdout_buf) > 0 && 444 + (String.exists (fun c -> c = 'n') (Buffer.contents stdout_buf)) -> 445 + (* "nothing to commit" - this is fine *) 446 + Ok () 447 + | `Exited code -> Error (Printf.sprintf "git commit failed (exit %d): %s" code (Buffer.contents stderr_buf)) 448 + | `Signaled sig_ -> Error (Printf.sprintf "git commit killed by signal %d" sig_) 449 + with exn -> Error (Printf.sprintf "Exception: %s" (Printexc.to_string exn)) 450 + in 451 + let duration = int_of_float ((Unix.gettimeofday () -. started) *. 1000.0) in 452 + (match result with 453 + | Ok () -> Log.debug (fun m -> m "Committed audit log (%dms)" duration) 454 + | Error e -> Log.warn (fun m -> m "Failed to commit audit log: %s" e)); 455 + result 456 + 457 + (** Full audit manager that wraps operations *) 458 + type manager = { 459 + proc_mgr : [ `Generic | `Unix ] Eio.Process.mgr_ty Eio.Resource.t; 460 + main_wt : Eio.Fs.dir_ty Eio.Path.t; 461 + log_path : string; 462 + mutable current_ctx : context option; 463 + } 464 + 465 + let create_manager ~proc_mgr ~main_wt = 466 + let log_path = Eio.Path.(main_wt / default_log_file) |> snd in 467 + { proc_mgr; main_wt; log_path; current_ctx = None } 468 + 469 + let begin_operation mgr ~operation_type ~args = 470 + let cwd = snd mgr.main_wt in 471 + let ctx = start_operation ~operation_type ~args ~cwd in 472 + mgr.current_ctx <- Some ctx; 473 + ctx 474 + 475 + let end_operation mgr status = 476 + match mgr.current_ctx with 477 + | None -> 478 + Log.warn (fun m -> m "end_operation called without active context"); 479 + Error "No active operation" 480 + | Some ctx -> 481 + mgr.current_ctx <- None; 482 + let op = finalize_operation ctx status in 483 + (* Append to log file *) 484 + (match append mgr.log_path op with 485 + | Error e -> 486 + Log.err (fun m -> m "Failed to append to audit log: %s" e); 487 + Error e 488 + | Ok () -> 489 + (* Commit the log *) 490 + match commit_log ~proc_mgr:mgr.proc_mgr ~main_wt:mgr.main_wt ~log_path:mgr.log_path with 491 + | Error e -> 492 + Log.warn (fun m -> m "Failed to commit audit log (will retry next operation): %s" e); 493 + Ok op (* Still return success - the log is saved, just not committed *) 494 + | Ok () -> 495 + Ok op) 496 + 497 + let end_success mgr = end_operation mgr Success 498 + let end_failed mgr ~error = end_operation mgr (Failed error) 499 + let end_conflict mgr ~files = end_operation mgr (Conflict files) 500 + 501 + let get_context mgr = mgr.current_ctx 502 + 503 + let to_html log = 504 + let buf = Buffer.create 4096 in 505 + let add = Buffer.add_string buf in 506 + add {|<!DOCTYPE html> 507 + <html lang="en"> 508 + <head> 509 + <meta charset="UTF-8"> 510 + <meta name="viewport" content="width=device-width, initial-scale=1.0"> 511 + <title>Unpac Audit Log</title> 512 + <style> 513 + :root { 514 + --bg: #1a1a2e; 515 + --card: #16213e; 516 + --text: #e4e4e4; 517 + --accent: #0f3460; 518 + --success: #4ecca3; 519 + --error: #e94560; 520 + --warning: #f39c12; 521 + } 522 + body { 523 + font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; 524 + background: var(--bg); 525 + color: var(--text); 526 + margin: 0; 527 + padding: 20px; 528 + line-height: 1.6; 529 + } 530 + h1 { color: var(--success); margin-bottom: 10px; } 531 + .meta { color: #888; margin-bottom: 30px; } 532 + .operation { 533 + background: var(--card); 534 + border-radius: 8px; 535 + padding: 20px; 536 + margin-bottom: 20px; 537 + border-left: 4px solid var(--accent); 538 + } 539 + .operation.success { border-left-color: var(--success); } 540 + .operation.failed { border-left-color: var(--error); } 541 + .operation.conflict { border-left-color: var(--warning); } 542 + .op-header { 543 + display: flex; 544 + justify-content: space-between; 545 + align-items: center; 546 + margin-bottom: 10px; 547 + } 548 + .op-type { 549 + font-weight: bold; 550 + font-size: 1.1em; 551 + color: var(--success); 552 + } 553 + .op-time { color: #888; font-size: 0.9em; } 554 + .op-args { font-family: monospace; color: #888; margin: 5px 0; } 555 + .status { 556 + display: inline-block; 557 + padding: 2px 8px; 558 + border-radius: 4px; 559 + font-size: 0.85em; 560 + font-weight: bold; 561 + } 562 + .status.success { background: var(--success); color: #000; } 563 + .status.failed { background: var(--error); color: #fff; } 564 + .status.conflict { background: var(--warning); color: #000; } 565 + .git-ops { 566 + margin-top: 15px; 567 + padding-top: 15px; 568 + border-top: 1px solid var(--accent); 569 + } 570 + .git-ops summary { 571 + cursor: pointer; 572 + color: #888; 573 + } 574 + .git-op { 575 + font-family: monospace; 576 + font-size: 0.9em; 577 + padding: 5px 10px; 578 + margin: 5px 0; 579 + background: var(--accent); 580 + border-radius: 4px; 581 + } 582 + .git-op.error { border-left: 3px solid var(--error); } 583 + .git-cmd { color: var(--success); } 584 + .git-exit { color: #888; } 585 + .git-duration { color: #888; float: right; } 586 + </style> 587 + </head> 588 + <body> 589 + <h1>Unpac Audit Log</h1> 590 + <div class="meta">Version |}; 591 + add (html_escape log.version); 592 + add {| | |}; 593 + add (string_of_int (List.length log.entries)); 594 + add {| operations</div> 595 + |}; 596 + List.iter (fun op -> 597 + let status_class = match op.status with 598 + | Success -> "success" 599 + | Failed _ -> "failed" 600 + | Conflict _ -> "conflict" 601 + in 602 + let tm = Unix.localtime op.timestamp in 603 + add (Printf.sprintf {| <div class="operation %s"> 604 + <div class="op-header"> 605 + <span class="op-type">%s</span> 606 + <span class="op-time">%04d-%02d-%02d %02d:%02d:%02d (%dms)</span> 607 + </div> 608 + <div class="op-args">%s</div> 609 + <span class="status %s">%s</span> 610 + |} 611 + status_class 612 + (html_escape (operation_type_to_string op.operation_type)) 613 + (tm.Unix.tm_year + 1900) (tm.Unix.tm_mon + 1) tm.Unix.tm_mday 614 + tm.Unix.tm_hour tm.Unix.tm_min tm.Unix.tm_sec 615 + op.duration_ms 616 + (html_escape (String.concat " " op.args)) 617 + status_class 618 + (match op.status with 619 + | Success -> "SUCCESS" 620 + | Failed msg -> "FAILED: " ^ html_escape msg 621 + | Conflict files -> "CONFLICT: " ^ html_escape (String.concat ", " files))); 622 + if op.git_operations <> [] then begin 623 + add {| <div class="git-ops"> 624 + <details> 625 + <summary>|}; 626 + add (string_of_int (List.length op.git_operations)); 627 + add {| git operations</summary> 628 + |}; 629 + List.iter (fun git_op -> 630 + let error_class = if git_op.git_result.exit_code <> 0 then " error" else "" in 631 + add (Printf.sprintf {| <div class="git-op%s"> 632 + <span class="git-cmd">git %s</span> 633 + <span class="git-duration">%dms</span> 634 + <span class="git-exit">[exit %d]</span> 635 + </div> 636 + |} 637 + error_class 638 + (html_escape (String.concat " " git_op.git_cmd)) 639 + git_op.git_duration_ms 640 + git_op.git_result.exit_code) 641 + ) op.git_operations; 642 + add {| </details> 643 + </div> 644 + |} 645 + end; 646 + add {| </div> 647 + |} 648 + ) log.entries; 649 + add {|</body> 650 + </html> 651 + |}; 652 + Buffer.contents buf
+188
lib/audit.mli
··· 1 + (** Structured audit logging for unpac operations. 2 + 3 + This module provides hierarchical logging with JSON serialization, 4 + enabling both human-readable and machine-processable audit trails. 5 + 6 + All unpac operations are logged with their constituent git operations, 7 + timestamps, durations, and outcomes. *) 8 + 9 + (** {1 Git Operation Logging} *) 10 + 11 + (** Result of a git command *) 12 + type git_result = { 13 + exit_code : int; 14 + stdout : string; 15 + stderr : string; 16 + } 17 + 18 + (** A single git command execution *) 19 + type git_operation = { 20 + git_id : string; (** Unique ID for this operation *) 21 + git_timestamp : float; (** Unix timestamp when started *) 22 + git_cmd : string list; (** Git command args (without 'git') *) 23 + git_cwd : string; (** Working directory *) 24 + git_duration_ms : int; (** Duration in milliseconds *) 25 + git_result : git_result; (** Command result *) 26 + } 27 + 28 + val git_operation_jsont : git_operation Jsont.t 29 + (** JSON codec for git operations *) 30 + 31 + (** {1 Unpac Operation Logging} *) 32 + 33 + (** Status of an unpac operation *) 34 + type status = 35 + | Success 36 + | Failed of string 37 + | Conflict of string list 38 + 39 + val status_jsont : status Jsont.t 40 + 41 + (** Type of unpac operation *) 42 + type operation_type = 43 + | Init 44 + | Project_new 45 + | Project_promote 46 + | Project_set_remote 47 + | Opam_add 48 + | Opam_init 49 + | Opam_promote 50 + | Opam_update 51 + | Opam_merge 52 + | Opam_edit 53 + | Opam_done 54 + | Opam_remove 55 + | Git_add 56 + | Git_update 57 + | Git_merge 58 + | Git_remove 59 + | Push 60 + | Unknown of string 61 + 62 + val operation_type_jsont : operation_type Jsont.t 63 + 64 + val operation_type_to_string : operation_type -> string 65 + (** Convert operation type to string representation *) 66 + 67 + (** An unpac operation with its git operations *) 68 + type operation = { 69 + id : string; (** Unique operation ID (UUID) *) 70 + timestamp : float; (** Unix timestamp when started *) 71 + operation_type : operation_type; 72 + args : string list; (** Command arguments *) 73 + cwd : string; (** Working directory *) 74 + duration_ms : int; (** Total duration in milliseconds *) 75 + status : status; (** Final status *) 76 + git_operations : git_operation list; (** Constituent git operations *) 77 + } 78 + 79 + val operation_jsont : operation Jsont.t 80 + (** JSON codec for unpac operations *) 81 + 82 + (** {1 Audit Log} *) 83 + 84 + (** Complete audit log *) 85 + type log = { 86 + version : string; (** Log format version *) 87 + entries : operation list; (** Log entries, newest first *) 88 + } 89 + 90 + val log_jsont : log Jsont.t 91 + (** JSON codec for the complete audit log *) 92 + 93 + (** Current log format version *) 94 + val current_version : string 95 + 96 + (** {1 Logging API} *) 97 + 98 + (** Active operation context for accumulating git operations *) 99 + type context 100 + 101 + (** Start a new unpac operation. 102 + Returns a context for recording git operations. *) 103 + val start_operation : 104 + operation_type:operation_type -> 105 + args:string list -> 106 + cwd:string -> 107 + context 108 + 109 + (** Record a git operation within the current context. 110 + Call this after each git command completes. *) 111 + val record_git : 112 + context -> 113 + cmd:string list -> 114 + cwd:string -> 115 + started:float -> 116 + result:git_result -> 117 + unit 118 + 119 + (** Complete an operation successfully *) 120 + val complete_success : context -> operation 121 + 122 + (** Complete an operation with failure *) 123 + val complete_failed : context -> error:string -> operation 124 + 125 + (** Complete an operation with conflict *) 126 + val complete_conflict : context -> files:string list -> operation 127 + 128 + (** {1 Log File Management} *) 129 + 130 + (** Default log file path relative to project root *) 131 + val default_log_file : string 132 + 133 + (** Load audit log from file. Returns empty log if file doesn't exist. *) 134 + val load : string -> (log, string) result 135 + 136 + (** Save audit log to file *) 137 + val save : string -> log -> (unit, string) result 138 + 139 + (** Append an operation to the log file *) 140 + val append : string -> operation -> (unit, string) result 141 + 142 + (** {1 Formatting} *) 143 + 144 + (** Pretty-print an operation for terminal output *) 145 + val pp_operation : Format.formatter -> operation -> unit 146 + 147 + (** Pretty-print the log for terminal output *) 148 + val pp_log : Format.formatter -> log -> unit 149 + 150 + (** Generate HTML report from log *) 151 + val to_html : log -> string 152 + 153 + (** {1 Audit Manager} *) 154 + 155 + (** Manager that handles full operation lifecycle with auto-commit *) 156 + type manager 157 + 158 + (** Create an audit manager for the given workspace *) 159 + val create_manager : 160 + proc_mgr:[ `Generic | `Unix ] Eio.Process.mgr_ty Eio.Resource.t -> 161 + main_wt:Eio.Fs.dir_ty Eio.Path.t -> 162 + manager 163 + 164 + (** Begin a new audited operation. Returns the context for recording git ops. *) 165 + val begin_operation : 166 + manager -> 167 + operation_type:operation_type -> 168 + args:string list -> 169 + context 170 + 171 + (** End an operation successfully. Appends to log and commits. *) 172 + val end_success : manager -> (operation, string) result 173 + 174 + (** End an operation with failure. Appends to log and commits. *) 175 + val end_failed : manager -> error:string -> (operation, string) result 176 + 177 + (** End an operation with merge conflict. Appends to log and commits. *) 178 + val end_conflict : manager -> files:string list -> (operation, string) result 179 + 180 + (** Get the current context if one is active *) 181 + val get_context : manager -> context option 182 + 183 + (** Commit the audit log to git *) 184 + val commit_log : 185 + proc_mgr:[ `Generic | `Unix ] Eio.Process.mgr_ty Eio.Resource.t -> 186 + main_wt:Eio.Fs.dir_ty Eio.Path.t -> 187 + log_path:string -> 188 + (unit, string) result
+97
lib/backend.ml
··· 1 + (** Backend module signature for package managers. 2 + 3 + Each backend (opam, cargo, etc.) implements this interface to provide 4 + vendoring capabilities. *) 5 + 6 + (** {1 Types} *) 7 + 8 + type package_info = { 9 + name : string; 10 + url : string; 11 + branch : string option; (** Branch/tag/ref to use *) 12 + } 13 + (** Information about a package to vendor. *) 14 + 15 + type add_result = 16 + | Added of { name : string; sha : string } 17 + | Already_exists of string 18 + | Failed of { name : string; error : string } 19 + 20 + type update_result = 21 + | Updated of { name : string; old_sha : string; new_sha : string } 22 + | No_changes of string 23 + | Update_failed of { name : string; error : string } 24 + 25 + (** {1 Backend Signature} *) 26 + 27 + module type S = sig 28 + val name : string 29 + (** Backend name, e.g. "opam", "cargo". *) 30 + 31 + (** {2 Branch Naming} *) 32 + 33 + val upstream_branch : string -> string 34 + (** [upstream_branch pkg] returns branch name, e.g. "opam/upstream/astring". *) 35 + 36 + val vendor_branch : string -> string 37 + (** [vendor_branch pkg] returns branch name, e.g. "opam/vendor/astring". *) 38 + 39 + val patches_branch : string -> string 40 + (** [patches_branch pkg] returns branch name, e.g. "opam/patches/astring". *) 41 + 42 + val vendor_path : string -> string 43 + (** [vendor_path pkg] returns path prefix, e.g. "vendor/opam/astring". *) 44 + 45 + (** {2 Worktree Kinds} *) 46 + 47 + val upstream_kind : string -> Worktree.kind 48 + val vendor_kind : string -> Worktree.kind 49 + val patches_kind : string -> Worktree.kind 50 + 51 + (** {2 Package Operations} *) 52 + 53 + val add_package : 54 + proc_mgr:Git.proc_mgr -> 55 + root:Worktree.root -> 56 + package_info -> 57 + add_result 58 + (** [add_package ~proc_mgr ~root info] vendors a single package. 59 + 60 + 1. Creates/updates opam/upstream/<pkg> from URL 61 + 2. Creates opam/vendor/<pkg> orphan with vendor/ prefix 62 + 3. Creates opam/patches/<pkg> from vendor *) 63 + 64 + val update_package : 65 + proc_mgr:Git.proc_mgr -> 66 + root:Worktree.root -> 67 + string -> 68 + update_result 69 + (** [update_package ~proc_mgr ~root name] updates a package from upstream. 70 + 71 + 1. Fetches latest into opam/upstream/<pkg> 72 + 2. Updates opam/vendor/<pkg> with new content 73 + Does NOT rebase patches - that's a separate operation. *) 74 + 75 + val list_packages : 76 + proc_mgr:Git.proc_mgr -> 77 + root:Worktree.root -> 78 + string list 79 + (** [list_packages ~proc_mgr root] returns all vendored package names. *) 80 + end 81 + 82 + (** {1 Merge Operations} *) 83 + 84 + (** These operations are backend-agnostic and work on any patches branch. *) 85 + 86 + let merge_to_project ~proc_mgr ~root ~project ~patches_branch = 87 + let project_wt = Worktree.path root (Worktree.Project project) in 88 + Git.merge_allow_unrelated ~proc_mgr ~cwd:project_wt 89 + ~branch:patches_branch 90 + ~message:(Printf.sprintf "Merge %s" patches_branch) 91 + 92 + let rebase_patches ~proc_mgr ~root ~patches_kind ~onto = 93 + Worktree.ensure ~proc_mgr root patches_kind; 94 + let patches_wt = Worktree.path root patches_kind in 95 + let result = Git.rebase ~proc_mgr ~cwd:patches_wt ~onto in 96 + Worktree.remove ~proc_mgr root patches_kind; 97 + result
+389
lib/claude/agent.ml
··· 1 + (** Ralph-loop style Claude agent for unpac workspace analysis. 2 + 3 + Implements the ralph-loop pattern: same prompt fed each iteration, 4 + with state persisting in files. Runs up to max_iterations per project, 5 + exiting early on completion promise. 6 + 7 + Uses the Claude SDK's MCP-based custom tool architecture. Custom unpac 8 + tools are registered via an in-process MCP server, while Claude's built-in 9 + tools (Read, Write, Bash, etc.) are handled by Claude CLI directly. *) 10 + 11 + let src = Logs.Src.create "unpac.claude.agent" ~doc:"Claude agent" 12 + module Log = (val Logs.src_log src : Logs.LOG) 13 + 14 + (* ANSI color codes *) 15 + module Color = struct 16 + let reset = "\x1b[0m" 17 + let bold = "\x1b[1m" 18 + let dim = "\x1b[2m" 19 + let red = "\x1b[31m" 20 + let green = "\x1b[32m" 21 + let yellow = "\x1b[33m" 22 + let blue = "\x1b[34m" 23 + let magenta = "\x1b[35m" 24 + let cyan = "\x1b[36m" 25 + end 26 + 27 + type config = { 28 + verbose : bool; 29 + web_port : int option; 30 + max_iterations : int; 31 + project : string option; 32 + } 33 + 34 + let default_config = { 35 + verbose = false; 36 + web_port = None; 37 + max_iterations = 20; 38 + project = None; 39 + } 40 + 41 + let completion_promise = "AGENTIC-HUMPS-COUNT-2" 42 + 43 + (* Format tool call for logging - full paths, no truncation *) 44 + let format_tool_call name (input : Claude.Tool_input.t) = 45 + let get_string key = Claude.Tool_input.get_string input key in 46 + match name with 47 + | "Read" -> 48 + let path = get_string "file_path" |> Option.value ~default:"?" in 49 + Printf.sprintf "Read %s" path 50 + | "Write" -> 51 + let path = get_string "file_path" |> Option.value ~default:"?" in 52 + Printf.sprintf "Write %s" path 53 + | "Edit" -> 54 + let path = get_string "file_path" |> Option.value ~default:"?" in 55 + Printf.sprintf "Edit %s" path 56 + | "Bash" -> 57 + let cmd = get_string "command" |> Option.value ~default:"?" in 58 + Printf.sprintf "$ %s" cmd 59 + | "Glob" -> 60 + let pattern = get_string "pattern" |> Option.value ~default:"*" in 61 + let path = get_string "path" |> Option.value ~default:"" in 62 + if path = "" then Printf.sprintf "Glob %s" pattern 63 + else Printf.sprintf "Glob %s in %s" pattern path 64 + | "Grep" -> 65 + let pattern = get_string "pattern" |> Option.value ~default:"?" in 66 + let path = get_string "path" |> Option.value ~default:"" in 67 + if path = "" then Printf.sprintf "Grep %s" pattern 68 + else Printf.sprintf "Grep %s in %s" pattern path 69 + (* MCP tools are prefixed with mcp__unpac__ *) 70 + | s when String.length s > 12 && String.sub s 0 12 = "mcp__unpac__" -> 71 + let tool_name = String.sub s 12 (String.length s - 12) in 72 + (match tool_name with 73 + | "read_file" -> 74 + let path = get_string "path" |> Option.value ~default:"?" in 75 + Printf.sprintf "unpac:read %s" path 76 + | "write_file" -> 77 + let path = get_string "path" |> Option.value ~default:"?" in 78 + Printf.sprintf "unpac:write %s" path 79 + | "list_directory" -> 80 + let path = get_string "path" |> Option.value ~default:"." in 81 + Printf.sprintf "unpac:ls %s" path 82 + | "glob_files" -> 83 + let pattern = get_string "pattern" |> Option.value ~default:"*" in 84 + Printf.sprintf "unpac:glob %s" pattern 85 + | "run_shell" -> 86 + let cmd = get_string "command" |> Option.value ~default:"?" in 87 + Printf.sprintf "unpac:$ %s" cmd 88 + | "git_commit" -> 89 + let msg = get_string "message" |> Option.value ~default:"" in 90 + Printf.sprintf "unpac:commit %s" msg 91 + | "unpac_status" -> "unpac:status" 92 + | "unpac_status_sync" -> "unpac:status --sync" 93 + | "unpac_push" -> 94 + let remote = get_string "remote" |> Option.value ~default:"origin" in 95 + Printf.sprintf "unpac:push %s" remote 96 + | "unpac_project_list" -> "unpac:projects" 97 + | "unpac_opam_list" -> "unpac:opam list" 98 + | "unpac_git_list" -> "unpac:git list" 99 + | "unpac_git_add" -> 100 + let url = get_string "url" |> Option.value ~default:"?" in 101 + Printf.sprintf "unpac:git add %s" url 102 + | "unpac_git_info" -> 103 + let n = get_string "name" |> Option.value ~default:"?" in 104 + Printf.sprintf "unpac:git info %s" n 105 + | "unpac_git_diff" -> 106 + let n = get_string "name" |> Option.value ~default:"?" in 107 + Printf.sprintf "unpac:git diff %s" n 108 + | _ -> Printf.sprintf "unpac:%s" tool_name) 109 + | _ -> name 110 + 111 + (* Find unpac root from a given directory *) 112 + let find_root_from fs dir = 113 + let rec search path depth = 114 + if depth > 10 then None 115 + else begin 116 + let git_path = Eio.Path.(path / "git") in 117 + let main_path = Eio.Path.(path / "main") in 118 + if Eio.Path.is_directory git_path && Eio.Path.is_directory main_path then 119 + Some path 120 + else 121 + match Eio.Path.split path with 122 + | Some (parent, _) -> search parent (depth + 1) 123 + | None -> None 124 + end 125 + in 126 + search (Eio.Path.(fs / dir)) 0 127 + 128 + let string_contains ~sub s = 129 + let len_sub = String.length sub in 130 + let len_s = String.length s in 131 + if len_sub > len_s then false 132 + else begin 133 + let rec check i = 134 + if i > len_s - len_sub then false 135 + else if String.sub s i len_sub = sub then true 136 + else check (i + 1) 137 + in 138 + check 0 139 + end 140 + 141 + (* Shuffle a list randomly *) 142 + let shuffle list = 143 + let arr = Array.of_list list in 144 + let n = Array.length arr in 145 + for i = n - 1 downto 1 do 146 + let j = Random.int (i + 1) in 147 + let tmp = arr.(i) in 148 + arr.(i) <- arr.(j); 149 + arr.(j) <- tmp 150 + done; 151 + Array.to_list arr 152 + 153 + (* Ensure working directory exists *) 154 + let ensure_work_dir fs root project = 155 + let root_dir = snd root in 156 + let work_base = Eio.Path.(fs / root_dir / ".unpac-claude") in 157 + let work_dir = Eio.Path.(work_base / project) in 158 + let claude_dir = Eio.Path.(work_dir / ".claude") in 159 + (try Eio.Path.mkdir ~perm:0o755 work_base with _ -> ()); 160 + (try Eio.Path.mkdir ~perm:0o755 work_dir with _ -> ()); 161 + (try Eio.Path.mkdir ~perm:0o755 claude_dir with _ -> ()); 162 + work_dir 163 + 164 + (* Run ralph-loop for a single project *) 165 + let run_project_ralph_loop ~env ~config ~root ~project ~event_bus = 166 + let proc_mgr = Eio.Stdenv.process_mgr env in 167 + let fs = Eio.Stdenv.fs env in 168 + 169 + let prefix = Printf.sprintf "[%s] " project in 170 + let log msg = Log.info (fun m -> m "%s%s" prefix msg) in 171 + let emit event = Event.emit event_bus event in 172 + 173 + log "Starting ralph-loop agent"; 174 + Format.printf "@.%s%s═══ Project: %s ═══%s@." Color.bold Color.blue project Color.reset; 175 + emit (Event.Text (Printf.sprintf "\n=== Starting ralph-loop for %s ===\n" project)); 176 + 177 + (* Ensure working directory *) 178 + let _work_dir = ensure_work_dir fs root project in 179 + 180 + (* Get project path *) 181 + let project_path = Unpac.Worktree.path root (Unpac.Worktree.Project project) in 182 + let project_dir = snd project_path in 183 + 184 + (* Generate the prompt (same prompt used every iteration - ralph-loop style) *) 185 + let prompt = Prompt.generate_for_project ~proc_mgr ~root ~project in 186 + 187 + (* Create MCP server with custom unpac tools *) 188 + let mcp_server = Tools.create_mcp_server ~proc_mgr ~fs ~root in 189 + 190 + (* Build Claude options - always Opus 4.5 *) 191 + (* Register MCP server so custom tools are available via mcp__unpac__<tool> *) 192 + let options = 193 + Claude.Options.default 194 + |> Claude.Options.with_model (`Custom "claude-opus-4-5") 195 + |> Claude.Options.with_system_prompt prompt 196 + |> Claude.Options.with_permission_mode Claude.Permissions.Mode.Bypass_permissions 197 + |> Claude.Options.with_mcp_server ~name:"unpac" mcp_server 198 + in 199 + 200 + (* Ralph-loop: same prompt each iteration *) 201 + let iteration_prompt = Printf.sprintf 202 + "You are working on the '%s' project at %s.\n\n\ 203 + Analyze the project, make improvements, update STATUS.md, and commit changes.\n\n\ 204 + You have access to:\n\ 205 + - Claude's built-in tools: Read, Write, Edit, Bash, Glob, Grep\n\ 206 + - Custom unpac tools (mcp__unpac__*): unpac_status, unpac_git_list, etc.\n\n\ 207 + When ALL significant work is complete, output exactly: %s\n\n\ 208 + Begin." 209 + project project_dir completion_promise 210 + in 211 + 212 + (* Run the ralph-loop *) 213 + let rec ralph_loop iteration = 214 + if iteration > config.max_iterations then begin 215 + log (Printf.sprintf "Max iterations (%d) reached" config.max_iterations); 216 + Format.printf "@.%s%s⚠ Max iterations (%d) reached%s@." 217 + Color.bold Color.yellow config.max_iterations Color.reset; 218 + emit (Event.Text (Printf.sprintf "\n[%s] Max iterations reached.\n" project)) 219 + end else begin 220 + log (Printf.sprintf "Iteration %d/%d" iteration config.max_iterations); 221 + Format.printf "@.%s%s── Iteration %d/%d ──%s@." 222 + Color.bold Color.yellow iteration config.max_iterations Color.reset; 223 + emit (Event.Text (Printf.sprintf "\n[%s] --- Iteration %d/%d ---\n" 224 + project iteration config.max_iterations)); 225 + 226 + let accumulated_response = ref "" in 227 + let completion_detected = ref false in 228 + let last_was_tool = ref false in 229 + 230 + begin 231 + try 232 + Eio.Switch.run @@ fun inner_sw -> 233 + let client = Claude.Client.create ~sw:inner_sw ~process_mgr:proc_mgr 234 + ~clock:(Eio.Stdenv.clock env) ~options () in 235 + 236 + emit Event.Thinking; 237 + 238 + let handler = object 239 + inherit Claude.Handler.default 240 + 241 + method! on_text text = 242 + let content = Claude.Response.Text.content text in 243 + accumulated_response := !accumulated_response ^ content; 244 + if !last_was_tool then begin 245 + Format.printf "@."; 246 + last_was_tool := false 247 + end; 248 + Format.printf "%s@?" content; 249 + emit (Event.Text (Printf.sprintf "[%s] %s" project content)); 250 + if string_contains ~sub:completion_promise !accumulated_response then 251 + completion_detected := true 252 + 253 + method! on_thinking thinking = 254 + let content = Claude.Response.Thinking.content thinking in 255 + Format.printf "%s%s 💭 %s%s@." Color.dim Color.magenta content Color.reset 256 + 257 + method! on_tool_use tool = 258 + (* Just log tool usage - execution is handled by Claude CLI for built-in 259 + tools and by MCP server for custom tools *) 260 + let name = Claude.Response.Tool_use.name tool in 261 + let id = Claude.Response.Tool_use.id tool in 262 + let input = Claude.Response.Tool_use.input tool in 263 + 264 + let call_summary = format_tool_call name input in 265 + 266 + if config.verbose then 267 + log (Printf.sprintf "Tool %s (id: %s)" name id); 268 + 269 + emit (Event.Tool_call { id; name; input = call_summary }); 270 + 271 + (* Print tool call with color *) 272 + Format.printf " %s→%s %s@." Color.cyan Color.reset call_summary; 273 + last_was_tool := true 274 + 275 + method! on_tool_result result = 276 + (* Log tool results for observability *) 277 + let tool_use_id = Claude.Content_block.Tool_result.tool_use_id result in 278 + let is_error = Claude.Content_block.Tool_result.is_error result 279 + |> Option.value ~default:false in 280 + let result_color = if is_error then Color.red else Color.green in 281 + let status = if is_error then "ERROR" else "ok" in 282 + Format.printf " %s←%s %s@." result_color Color.reset status; 283 + emit (Event.Tool_result { id = tool_use_id; name = ""; output = status; is_error }) 284 + 285 + method! on_complete result = 286 + let cost = Claude.Response.Complete.total_cost_usd result in 287 + emit (Event.Turn_complete { turn = iteration; cost_usd = cost }) 288 + 289 + method! on_error err = 290 + let msg = Claude.Response.Error.message err in 291 + log (Printf.sprintf "Error: %s" msg); 292 + Format.printf "%s%sError: %s%s@." Color.bold Color.red msg Color.reset; 293 + emit (Event.Error (Printf.sprintf "[%s] %s" project msg)) 294 + end in 295 + 296 + (* Same prompt every iteration - ralph-loop style *) 297 + Claude.Client.query client iteration_prompt; 298 + Claude.Client.run client ~handler 299 + with exn -> 300 + let msg = Printexc.to_string exn in 301 + log (Printf.sprintf "Exception: %s" msg); 302 + emit (Event.Error (Printf.sprintf "[%s] %s" project msg)) 303 + end; 304 + 305 + (* Check if we should stop *) 306 + if !completion_detected then begin 307 + log "Completion promise detected!"; 308 + Format.printf "@.%s%s✓ Completion promise detected%s@." Color.bold Color.green Color.reset; 309 + emit (Event.Text (Printf.sprintf "\n[%s] ✓ Completion promise detected.\n" project)) 310 + end else 311 + ralph_loop (iteration + 1) 312 + end 313 + in 314 + 315 + ralph_loop 1; 316 + log "Ralph-loop complete"; 317 + Format.printf "@.%s%s─── Project complete: %s ───%s@." Color.dim Color.blue project Color.reset; 318 + emit (Event.Text (Printf.sprintf "\n=== Ralph-loop complete for %s ===\n" project)) 319 + 320 + (* Main entry point *) 321 + let run ~env ~config ~workspace_path () = 322 + Random.self_init (); 323 + 324 + let fs = Eio.Stdenv.fs env in 325 + let net = Eio.Stdenv.net env in 326 + 327 + (* Find unpac root *) 328 + let root = match find_root_from fs workspace_path with 329 + | Some r -> r 330 + | None -> 331 + Format.eprintf "Error: '%s' is not an unpac workspace.@." workspace_path; 332 + exit 1 333 + in 334 + 335 + Log.info (fun m -> m "Starting ralph-loop agent in workspace: %s" (snd root)); 336 + 337 + (* Get projects to process *) 338 + let all_projects = Unpac.Worktree.list_projects ~proc_mgr:(Eio.Stdenv.process_mgr env) root in 339 + 340 + if all_projects = [] then begin 341 + Format.eprintf "No projects found in workspace.@."; 342 + exit 1 343 + end; 344 + 345 + let projects = match config.project with 346 + | Some p -> 347 + if List.mem p all_projects then [p] 348 + else begin 349 + Format.eprintf "Project '%s' not found. Available: %s@." 350 + p (String.concat ", " all_projects); 351 + exit 1 352 + end 353 + | None -> 354 + shuffle all_projects 355 + in 356 + 357 + Log.info (fun m -> m "Projects to process: %s" (String.concat ", " projects)); 358 + 359 + (* Create shared event bus *) 360 + let event_bus = Event.create_bus () in 361 + 362 + Eio.Switch.run @@ fun sw -> 363 + 364 + (* Start web server if enabled *) 365 + (match config.web_port with 366 + | Some port -> 367 + let _web = Web.start ~sw ~net ~port event_bus in 368 + Log.info (fun m -> m "Web UI available at http://localhost:%d" port); 369 + Format.printf "Web UI: http://localhost:%d@." port 370 + | None -> ()); 371 + 372 + Event.emit event_bus Event.Agent_start; 373 + 374 + Format.printf "%s%sRalph-loop agent starting...%s@." Color.bold Color.cyan Color.reset; 375 + Format.printf " %sModel:%s Opus 4.5@." Color.dim Color.reset; 376 + Format.printf " %sMax iterations:%s %d@." Color.dim Color.reset config.max_iterations; 377 + Format.printf " %sCompletion promise:%s %s@." Color.dim Color.reset completion_promise; 378 + Format.printf " %sCustom tools:%s mcp__unpac__* (via MCP server)@." Color.dim Color.reset; 379 + Format.printf " %sProjects (%d):%s %s@." 380 + Color.dim (List.length projects) Color.reset (String.concat ", " projects); 381 + 382 + (* Process projects sequentially *) 383 + List.iter (fun project -> 384 + run_project_ralph_loop ~env ~config ~root ~project ~event_bus 385 + ) projects; 386 + 387 + Event.emit event_bus Event.Agent_stop; 388 + 389 + Format.printf "@.%s%s✓ All projects complete.%s@." Color.bold Color.green Color.reset
+45
lib/claude/agent.mli
··· 1 + (** Ralph-loop style Claude agent for unpac workspace analysis. 2 + 3 + Runs a single autonomous agent per project using ralph-loop iteration: 4 + - Same prompt fed each iteration (state persists in files) 5 + - Up to 20 iterations per project 6 + - Early exit on completion promise 7 + - Projects processed sequentially in random order *) 8 + 9 + (** {1 Agent Configuration} *) 10 + 11 + type config = { 12 + verbose : bool; 13 + web_port : int option; (** Port for web UI, None = disabled *) 14 + max_iterations : int; (** Max ralph-loop iterations per project *) 15 + project : string option; (** Specific project, or None for all *) 16 + } 17 + 18 + val default_config : config 19 + 20 + (** {1 Completion Promise} *) 21 + 22 + val completion_promise : string 23 + (** The phrase that signals work is complete: "AGENTIC-HUMPS-COUNT-2" *) 24 + 25 + (** {1 Running Agents} *) 26 + 27 + val run : 28 + env:Eio_unix.Stdenv.base -> 29 + config:config -> 30 + workspace_path:string -> 31 + unit -> 32 + unit 33 + (** [run ~env ~config ~workspace_path ()] runs ralph-loop agents for projects 34 + in the workspace. 35 + 36 + If [config.project] is specified, runs only that project. 37 + Otherwise, runs all projects sequentially in random order. 38 + 39 + Each project agent: 40 + - Uses Opus 4.5 model 41 + - Runs up to [max_iterations] iterations 42 + - Exits early if response contains [completion_promise] 43 + - Works from [workspace/.unpac-claude/project/] directory 44 + 45 + The function blocks until all projects complete. *)
+4
lib/claude/dune
··· 1 + (library 2 + (name unpac_claude) 3 + (public_name unpac-claude) 4 + (libraries unpac claude eio eio_main logs fmt jsont digestif base64))
+96
lib/claude/event.ml
··· 1 + (** Event types emitted by the Claude agent for live UI updates. *) 2 + 3 + type tool_call = { 4 + id : string; 5 + name : string; 6 + input : string; (* JSON string *) 7 + } 8 + 9 + type tool_result = { 10 + id : string; 11 + name : string; 12 + output : string; 13 + is_error : bool; 14 + } 15 + 16 + type t = 17 + | Thinking 18 + | Text of string 19 + | Tool_call of tool_call 20 + | Tool_result of tool_result 21 + | Error of string 22 + | Sync of string (* "status" or "push" *) 23 + | Turn_complete of { turn : int; cost_usd : float option } 24 + | Agent_start 25 + | Agent_stop 26 + 27 + (* Simple JSON string escaping *) 28 + let escape_json_string s = 29 + let buf = Buffer.create (String.length s + 16) in 30 + Buffer.add_char buf '"'; 31 + String.iter (fun c -> 32 + match c with 33 + | '"' -> Buffer.add_string buf "\\\"" 34 + | '\\' -> Buffer.add_string buf "\\\\" 35 + | '\n' -> Buffer.add_string buf "\\n" 36 + | '\r' -> Buffer.add_string buf "\\r" 37 + | '\t' -> Buffer.add_string buf "\\t" 38 + | c when Char.code c < 32 -> 39 + Buffer.add_string buf (Printf.sprintf "\\u%04x" (Char.code c)) 40 + | c -> Buffer.add_char buf c 41 + ) s; 42 + Buffer.add_char buf '"'; 43 + Buffer.contents buf 44 + 45 + let to_json = function 46 + | Thinking -> 47 + {|{"type":"thinking"}|} 48 + | Text s -> 49 + Printf.sprintf {|{"type":"text","content":%s}|} 50 + (escape_json_string s) 51 + | Tool_call { id; name; input } -> 52 + Printf.sprintf {|{"type":"tool_call","id":%s,"name":%s,"input":%s}|} 53 + (escape_json_string id) 54 + (escape_json_string name) 55 + input (* input is already JSON *) 56 + | Tool_result { id; name; output; is_error } -> 57 + Printf.sprintf {|{"type":"tool_result","id":%s,"name":%s,"output":%s,"is_error":%b}|} 58 + (escape_json_string id) 59 + (escape_json_string name) 60 + (escape_json_string output) 61 + is_error 62 + | Error msg -> 63 + Printf.sprintf {|{"type":"error","message":%s}|} 64 + (escape_json_string msg) 65 + | Sync action -> 66 + Printf.sprintf {|{"type":"sync","action":%s}|} 67 + (escape_json_string action) 68 + | Turn_complete { turn; cost_usd } -> 69 + let cost_str = match cost_usd with 70 + | Some c -> Printf.sprintf "%.6f" c 71 + | None -> "null" 72 + in 73 + Printf.sprintf {|{"type":"turn_complete","turn":%d,"cost_usd":%s}|} 74 + turn cost_str 75 + | Agent_start -> 76 + {|{"type":"agent_start"}|} 77 + | Agent_stop -> 78 + {|{"type":"agent_stop"}|} 79 + 80 + (* Event bus for broadcasting to listeners *) 81 + type listener = t -> unit 82 + 83 + type bus = { 84 + mutable listeners : listener list; 85 + } 86 + 87 + let create_bus () = { listeners = [] } 88 + 89 + let subscribe bus listener = 90 + bus.listeners <- listener :: bus.listeners 91 + 92 + let unsubscribe bus listener = 93 + bus.listeners <- List.filter (fun l -> l != listener) bus.listeners 94 + 95 + let emit bus event = 96 + List.iter (fun l -> l event) bus.listeners
+41
lib/claude/event.mli
··· 1 + (** Event types for live agent UI updates. *) 2 + 3 + (** Tool call event data. *) 4 + type tool_call = { 5 + id : string; 6 + name : string; 7 + input : string; 8 + } 9 + 10 + (** Tool result event data. *) 11 + type tool_result = { 12 + id : string; 13 + name : string; 14 + output : string; 15 + is_error : bool; 16 + } 17 + 18 + (** Agent events. *) 19 + type t = 20 + | Thinking 21 + | Text of string 22 + | Tool_call of tool_call 23 + | Tool_result of tool_result 24 + | Error of string 25 + | Sync of string 26 + | Turn_complete of { turn : int; cost_usd : float option } 27 + | Agent_start 28 + | Agent_stop 29 + 30 + val to_json : t -> string 31 + (** Convert event to JSON string. *) 32 + 33 + (** Event bus for broadcasting to listeners. *) 34 + 35 + type listener = t -> unit 36 + type bus 37 + 38 + val create_bus : unit -> bus 39 + val subscribe : bus -> listener -> unit 40 + val unsubscribe : bus -> listener -> unit 41 + val emit : bus -> t -> unit
+423
lib/claude/prompt.ml
··· 1 + (** Dynamic system prompt generation for autonomous Claude agent. *) 2 + 3 + let src = Logs.Src.create "unpac.claude.prompt" ~doc:"Prompt generation" 4 + module Log = (val Logs.src_log src : Logs.LOG) 5 + 6 + let autonomous_base_prompt = {|You are an autonomous code maintenance agent for OCaml projects in an unpac workspace. 7 + 8 + ## Your Mission 9 + 10 + You continuously analyze and improve the codebase by: 11 + 12 + 1. **Analyzing Projects**: Review each project's code, STATUS.md, and tests 13 + 2. **Completing Features**: Implement incomplete functionality marked in STATUS.md 14 + 3. **Recording Status**: Faithfully update STATUS.md with current state and shortcomings 15 + 4. **Code Quality**: Refactor using OCaml Stdlib combinators and higher-order functions 16 + 5. **Test Coverage**: Identify missing tests and add them where needed 17 + 6. **Syncing Changes**: Regularly run unpac_status_sync and unpac_push to keep remote updated 18 + 19 + ## OCaml Code Quality Guidelines 20 + 21 + When reviewing and improving OCaml code, look for: 22 + 23 + ### Replace Imperative Patterns with Functional Idioms 24 + - Replace `for` loops with `List.iter`, `List.map`, `List.fold_left` 25 + - Replace mutable refs with functional accumulation 26 + - Use `Option.map`, `Option.bind`, `Result.map`, `Result.bind` instead of pattern matching 27 + - Use `|>` pipeline operator for cleaner composition 28 + 29 + ### Stdlib Combinators to Prefer 30 + ```ocaml 31 + (* Instead of manual recursion, use: *) 32 + List.filter_map (* filter and map in one pass *) 33 + List.concat_map (* map then flatten *) 34 + List.find_opt (* safe find *) 35 + List.assoc_opt (* safe association lookup *) 36 + Option.value (* provide default *) 37 + Option.join (* flatten option option *) 38 + String.concat (* join strings *) 39 + String.split_on_char 40 + ``` 41 + 42 + ### Common Refactoring Patterns 43 + ```ocaml 44 + (* BEFORE: *) 45 + let result = ref [] in 46 + List.iter (fun x -> 47 + if pred x then result := transform x :: !result 48 + ) items; 49 + List.rev !result 50 + 51 + (* AFTER: *) 52 + items |> List.filter_map (fun x -> 53 + if pred x then Some (transform x) else None 54 + ) 55 + 56 + (* BEFORE: *) 57 + match opt with 58 + | Some x -> Some (f x) 59 + | None -> None 60 + 61 + (* AFTER: *) 62 + Option.map f opt 63 + 64 + (* BEFORE: *) 65 + match foo () with 66 + | Ok x -> bar x 67 + | Error _ as e -> e 68 + 69 + (* AFTER: *) 70 + Result.bind (foo ()) bar 71 + ``` 72 + 73 + ## STATUS.md Format 74 + 75 + Each project should have a STATUS.md with: 76 + 77 + ```markdown 78 + # Project Name 79 + 80 + **Status**: [STUB | IN_PROGRESS | COMPLETE | NEEDS_REVIEW] 81 + 82 + ## Overview 83 + Brief description of what this project does. 84 + 85 + ## Current State 86 + - What is implemented 87 + - What works 88 + 89 + ## TODO 90 + - [ ] Task 1 91 + - [ ] Task 2 92 + - [x] Completed task 93 + 94 + ## Known Issues 95 + - Issue 1 96 + - Issue 2 97 + 98 + ## Test Coverage 99 + - What is tested 100 + - What needs tests 101 + 102 + ## Dependencies 103 + - Required packages 104 + ``` 105 + 106 + ## Workflow 107 + 108 + 1. **Start**: List all projects with unpac_project_list 109 + 2. **For each project**: 110 + - Read STATUS.md if it exists 111 + - Glob all *.ml files 112 + - Read key source files 113 + - Analyze code quality 114 + - Check for tests (look for test/ or *_test.ml files) 115 + - Update STATUS.md with findings 116 + - Make small, focused improvements 117 + - Commit changes with clear messages 118 + 3. **Periodically**: Run unpac_status_sync and unpac_push to sync 119 + 120 + ## Rate Limit Handling 121 + 122 + If you encounter rate limit errors: 123 + - Wait the indicated time before retrying 124 + - The system will handle backoff automatically 125 + - Focus on one project at a time to avoid rapid API calls 126 + 127 + ## Important Rules 128 + 129 + 1. **Small Changes**: Make incremental improvements, not sweeping rewrites 130 + 2. **Commit Often**: Commit after each logical change 131 + 3. **Document**: Always update STATUS.md to reflect current state 132 + 4. **Test First**: Run dune build before committing code changes 133 + 5. **Push Regularly**: Keep remote in sync with local changes 134 + 6. **Be Honest**: Record actual shortcomings, don't hide problems 135 + 136 + ## Available Tools 137 + 138 + You have access to these tools: 139 + - **unpac_status**: Get workspace overview 140 + - **unpac_status_sync**: Update README.md and sync state 141 + - **unpac_push**: Push all branches to remote 142 + - **unpac_project_list**: List projects 143 + - **unpac_opam_list**: List vendored packages 144 + - **unpac_git_list**: List vendored git repos 145 + - **read_file**: Read source code and config files 146 + - **write_file**: Update code or STATUS.md 147 + - **list_directory**: Explore directory structure 148 + - **glob_files**: Find files by pattern 149 + - **run_shell**: Run dune build/test commands 150 + - **git_commit**: Commit changes 151 + 152 + Start by getting the workspace status and listing all projects. 153 + |} 154 + 155 + let interactive_base_prompt = {|You are an autonomous coding agent running in an unpac workspace. 156 + 157 + Unpac is a monorepo vendoring tool that uses git worktrees to manage dependencies. 158 + It supports two backends: 159 + 1. **opam** - OCaml package vendoring with dependency solving 160 + 2. **git** - Direct git repository vendoring without solving 161 + 162 + Both backends use a three-tier branch model: 163 + - upstream/* - pristine upstream code 164 + - vendor/* - history-rewritten with vendor/<backend>/<name>/ prefix 165 + - patches/* - local modifications 166 + 167 + This architecture allows: 168 + - Full git history preservation (git blame/log work) 169 + - Conflict-free merging into multiple project branches 170 + - Local patches that survive upstream updates 171 + 172 + Your role is to help explore and develop code in this workspace. You can: 173 + - Add new git repositories as dependencies 174 + - Explore vendored code 175 + - Make local patches 176 + - Merge dependencies into projects 177 + - Analyze and improve code quality 178 + - Update STATUS.md documentation 179 + 180 + Always use the provided tools to interact with unpac. Query the workspace state 181 + before making changes to understand the current configuration. 182 + |} 183 + 184 + let run_help ~proc_mgr = 185 + try 186 + (* Run unpac --help to get CLI documentation *) 187 + let output = Eio.Switch.run @@ fun sw -> 188 + let stdout_buf = Buffer.create 4096 in 189 + let stdout_r, stdout_w = Eio.Process.pipe proc_mgr ~sw in 190 + let child = Eio.Process.spawn proc_mgr ~sw 191 + ~stdout:stdout_w 192 + ["unpac"; "--help"] 193 + in 194 + Eio.Flow.close stdout_w; 195 + (* Read output *) 196 + let chunk = Cstruct.create 4096 in 197 + let rec loop () = 198 + match Eio.Flow.single_read stdout_r chunk with 199 + | n -> 200 + Buffer.add_string stdout_buf (Cstruct.to_string (Cstruct.sub chunk 0 n)); 201 + loop () 202 + | exception End_of_file -> () 203 + in 204 + loop (); 205 + ignore (Eio.Process.await child); 206 + Buffer.contents stdout_buf 207 + in 208 + Some output 209 + with _ -> 210 + Log.warn (fun m -> m "Could not run unpac --help"); 211 + None 212 + 213 + let read_architecture ~root = 214 + try 215 + let main_path = Unpac.Worktree.path root Unpac.Worktree.Main in 216 + let arch_path = Eio.Path.(main_path / "ARCHITECTURE.md") in 217 + if Eio.Path.is_file arch_path then begin 218 + let content = Eio.Path.load arch_path in 219 + (* Truncate if too long *) 220 + let max_len = 8000 in 221 + if String.length content > max_len then 222 + Some (String.sub content 0 max_len ^ "\n\n[... truncated ...]") 223 + else 224 + Some content 225 + end else 226 + None 227 + with _ -> 228 + Log.debug (fun m -> m "No ARCHITECTURE.md found"); 229 + None 230 + 231 + let get_workspace_state ~proc_mgr ~root = 232 + let buf = Buffer.create 1024 in 233 + let add s = Buffer.add_string buf s in 234 + 235 + add "## Current Workspace State\n\n"; 236 + 237 + (* Projects *) 238 + let projects = Unpac.Worktree.list_projects ~proc_mgr root in 239 + add (Printf.sprintf "**Projects** (%d):\n" (List.length projects)); 240 + List.iter (fun p -> add (Printf.sprintf "- %s\n" p)) projects; 241 + if projects = [] then add "- (none)\n"; 242 + add "\n"; 243 + 244 + (* Git repos *) 245 + let git_repos = Unpac.Git_backend.list_repos ~proc_mgr ~root in 246 + add (Printf.sprintf "**Git Repositories** (%d):\n" (List.length git_repos)); 247 + List.iter (fun r -> add (Printf.sprintf "- %s\n" r)) git_repos; 248 + if git_repos = [] then add "- (none)\n"; 249 + add "\n"; 250 + 251 + (* Opam packages *) 252 + let opam_pkgs = Unpac.Worktree.list_opam_packages ~proc_mgr root in 253 + add (Printf.sprintf "**Opam Packages** (%d):\n" (List.length opam_pkgs)); 254 + List.iter (fun p -> add (Printf.sprintf "- %s\n" p)) opam_pkgs; 255 + if opam_pkgs = [] then add "- (none)\n"; 256 + 257 + Buffer.contents buf 258 + 259 + let generate ~proc_mgr ~root ~autonomous = 260 + let buf = Buffer.create 16384 in 261 + let add s = Buffer.add_string buf s in 262 + 263 + (* Choose base prompt based on mode *) 264 + if autonomous then 265 + add autonomous_base_prompt 266 + else 267 + add interactive_base_prompt; 268 + 269 + add "\n\n---\n\n"; 270 + 271 + (* Add CLI help if available *) 272 + (match run_help ~proc_mgr with 273 + | Some help -> 274 + add "## CLI Reference\n\n"; 275 + add "```\n"; 276 + add help; 277 + add "```\n\n"; 278 + | None -> ()); 279 + 280 + (* Add architecture docs if available *) 281 + (match read_architecture ~root with 282 + | Some arch -> 283 + add "## Architecture Documentation\n\n"; 284 + add arch; 285 + add "\n\n"; 286 + | None -> ()); 287 + 288 + (* Add current workspace state *) 289 + add (get_workspace_state ~proc_mgr ~root); 290 + 291 + Buffer.contents buf 292 + 293 + let project_base_prompt project project_dir = Printf.sprintf 294 + {|You are an autonomous coding agent assigned to work on the '%s' project. 295 + 296 + ## Your Mission 297 + 298 + You are working EXCLUSIVELY on the '%s' project located at: %s 299 + 300 + Your goals are to: 301 + 1. **Understand**: Read and analyze all project source code 302 + 2. **Document**: Update STATUS.md with accurate project state 303 + 3. **Improve**: Make focused code quality improvements 304 + 4. **Test**: Ensure code builds and tests pass 305 + 5. **Commit**: Commit meaningful changes with clear messages 306 + 307 + ## OCaml Code Quality Guidelines 308 + 309 + When improving code, look for: 310 + 311 + ### Functional Idioms 312 + - Replace `for` loops with `List.iter`, `List.map`, `List.fold_left` 313 + - Use `Option.map`, `Option.bind`, `Result.map`, `Result.bind` 314 + - Use `|>` pipeline operator for cleaner composition 315 + - Prefer `List.filter_map` over filter + map 316 + 317 + ### Stdlib Combinators 318 + ```ocaml 319 + List.filter_map (* filter and map in one pass *) 320 + List.concat_map (* map then flatten *) 321 + List.find_opt (* safe find *) 322 + Option.value (* provide default *) 323 + Option.join (* flatten option option *) 324 + String.concat (* join strings *) 325 + String.split_on_char 326 + ``` 327 + 328 + ## STATUS.md Format 329 + 330 + Maintain a STATUS.md in the project with: 331 + 332 + ```markdown 333 + # %s 334 + 335 + **Status**: [STUB | IN_PROGRESS | COMPLETE | NEEDS_REVIEW] 336 + 337 + ## Overview 338 + Brief description of what this project does. 339 + 340 + ## Current State 341 + - What is implemented 342 + - What works 343 + 344 + ## TODO 345 + - [ ] Task 1 346 + - [ ] Task 2 347 + - [x] Completed task 348 + 349 + ## Known Issues 350 + - Issue 1 351 + 352 + ## Test Coverage 353 + - What is tested 354 + - What needs tests 355 + ``` 356 + 357 + ## Workflow 358 + 359 + 1. Check if STATUS.md exists and read it 360 + 2. Glob all *.ml and *.mli files 361 + 3. Read and analyze source files 362 + 4. Run dune build to check compilation 363 + 5. Update STATUS.md with findings 364 + 6. Make focused improvements 365 + 7. Commit changes 366 + 8. Repeat until work is complete 367 + 368 + ## Completion Signal 369 + 370 + When you have completed all significant work on this project: 371 + - Updated STATUS.md comprehensively 372 + - Made all reasonable improvements 373 + - Verified builds pass 374 + - No obvious remaining issues 375 + 376 + Then output exactly: AGENTIC-HUMPS-COUNT-2 377 + 378 + This signals the ralph-loop to stop iterating. 379 + 380 + ## Important Rules 381 + 382 + 1. **Focus**: Only work on %s - ignore other projects 383 + 2. **Small Changes**: Make incremental improvements 384 + 3. **Test First**: Run dune build before committing 385 + 4. **Be Honest**: Record actual shortcomings in STATUS.md 386 + 5. **Commit Often**: Commit after each logical change 387 + 388 + ## Available Tools 389 + 390 + - **read_file**: Read source code and config files 391 + - **write_file**: Update code or STATUS.md 392 + - **list_directory**: Explore directory structure 393 + - **glob_files**: Find files by pattern 394 + - **run_shell**: Run dune build/test commands 395 + - **git_commit**: Commit changes 396 + - **unpac_status_sync**: Update workspace status 397 + - **unpac_push**: Push changes to remote 398 + 399 + Start by exploring the project structure and reading existing files. 400 + |} project project project_dir project project 401 + 402 + let generate_for_project ~proc_mgr:_ ~root ~project = 403 + let buf = Buffer.create 16384 in 404 + let add s = Buffer.add_string buf s in 405 + 406 + (* Get project directory *) 407 + let project_path = Unpac.Worktree.path root (Unpac.Worktree.Project project) in 408 + let project_dir = snd project_path in 409 + 410 + (* Add project-specific prompt *) 411 + add (project_base_prompt project project_dir); 412 + 413 + add "\n\n---\n\n"; 414 + 415 + (* Add architecture docs if available *) 416 + (match read_architecture ~root with 417 + | Some arch -> 418 + add "## Workspace Architecture\n\n"; 419 + add arch; 420 + add "\n\n"; 421 + | None -> ()); 422 + 423 + Buffer.contents buf
+29
lib/claude/prompt.mli
··· 1 + (** Dynamic system prompt generation for Claude agent. 2 + 3 + Builds a comprehensive system prompt by: 4 + 1. Running 'unpac --help' to get current CLI documentation 5 + 2. Reading ARCHITECTURE.md if present 6 + 3. Querying current workspace state *) 7 + 8 + val generate : 9 + proc_mgr:Unpac.Git.proc_mgr -> 10 + root:Unpac.Worktree.root -> 11 + autonomous:bool -> 12 + string 13 + (** Generate a system prompt with full unpac knowledge. 14 + If [autonomous] is true, includes detailed instructions for autonomous 15 + code maintenance and improvement. *) 16 + 17 + val generate_for_project : 18 + proc_mgr:Unpac.Git.proc_mgr -> 19 + root:Unpac.Worktree.root -> 20 + project:string -> 21 + string 22 + (** Generate a system prompt for a specific project agent. 23 + The agent will focus exclusively on the given project. *) 24 + 25 + val autonomous_base_prompt : string 26 + (** Base system prompt for autonomous mode. *) 27 + 28 + val interactive_base_prompt : string 29 + (** Base system prompt for interactive mode. *)
+679
lib/claude/tools.ml
··· 1 + (** Tool definitions for Claude to interact with unpac and analyze code. 2 + 3 + Uses the Claude SDK's MCP-based custom tool architecture. Tools are 4 + defined as Claude.Tool.t values and bundled into an Mcp_server that 5 + gets registered with the Claude client. *) 6 + 7 + let src = Logs.Src.create "unpac.claude.tools" ~doc:"Claude tools" 8 + module Log = (val Logs.src_log src : Logs.LOG) 9 + 10 + (* Helper to truncate long output *) 11 + let truncate_output ?(max_len=50000) s = 12 + if String.length s > max_len then 13 + String.sub s 0 max_len ^ "\n\n[... truncated ...]" 14 + else s 15 + 16 + (* Tool result helpers - convert to Claude.Tool format *) 17 + let ok s = Ok (Claude.Tool.text_result s) 18 + let err s = Error s 19 + 20 + (* === TOOL IMPLEMENTATIONS === *) 21 + 22 + (* Git list tool *) 23 + let git_list ~proc_mgr ~root () = 24 + try 25 + let repos = Unpac.Git_backend.list_repos ~proc_mgr ~root in 26 + if repos = [] then 27 + ok "No git repositories vendored.\n\nTo add one: use git_add tool with url parameter." 28 + else 29 + let buf = Buffer.create 256 in 30 + Buffer.add_string buf "Vendored git repositories:\n"; 31 + List.iter (fun r -> Buffer.add_string buf (Printf.sprintf "- %s\n" r)) repos; 32 + ok (Buffer.contents buf) 33 + with exn -> 34 + err (Printf.sprintf "Failed to list git repos: %s" (Printexc.to_string exn)) 35 + 36 + (* Git add tool *) 37 + let git_add ~proc_mgr ~fs ~root ~url ?name ?branch ?subdir () = 38 + try 39 + let repo_name = match name with 40 + | Some n -> n 41 + | None -> 42 + let base = Filename.basename url in 43 + if String.ends_with ~suffix:".git" base then 44 + String.sub base 0 (String.length base - 4) 45 + else base 46 + in 47 + 48 + let info : Unpac.Git_backend.repo_info = { 49 + name = repo_name; 50 + url; 51 + branch; 52 + subdir; 53 + } in 54 + 55 + let config_path = Filename.concat (snd (Unpac.Worktree.path root Unpac.Worktree.Main)) 56 + "unpac.toml" in 57 + let cache = if Sys.file_exists config_path then begin 58 + match Unpac.Config.load config_path with 59 + | Ok config -> Unpac.Config.resolve_vendor_cache config 60 + | Error _ -> None 61 + end else None in 62 + 63 + let cache = match cache with 64 + | Some path -> Some (Eio.Path.(fs / path)) 65 + | None -> None 66 + in 67 + 68 + match Unpac.Git_backend.add_repo ~proc_mgr ~root ?cache info with 69 + | Unpac.Backend.Added { name = added_name; sha } -> 70 + ok (Printf.sprintf 71 + "Successfully added repository '%s' (commit %s).\n\n\ 72 + Next steps:\n\ 73 + - Use git_info %s to see repository details\n\ 74 + - Use git_diff %s to see any local changes\n\ 75 + - Merge into a project when ready" added_name (String.sub sha 0 7) 76 + added_name added_name) 77 + | Unpac.Backend.Already_exists name -> 78 + ok (Printf.sprintf "Repository '%s' is already vendored." name) 79 + | Unpac.Backend.Failed { name; error } -> 80 + err (Printf.sprintf "Failed to add '%s': %s" name error) 81 + with exn -> 82 + err (Printf.sprintf "Failed to add repository: %s" (Printexc.to_string exn)) 83 + 84 + (* Git info tool *) 85 + let git_info ~proc_mgr ~root ~name () = 86 + try 87 + let git = Unpac.Worktree.git_dir root in 88 + let repos = Unpac.Git_backend.list_repos ~proc_mgr ~root in 89 + if not (List.mem name repos) then 90 + err (Printf.sprintf "Repository '%s' is not vendored" name) 91 + else begin 92 + let buf = Buffer.create 512 in 93 + let add s = Buffer.add_string buf s in 94 + 95 + add (Printf.sprintf "Repository: %s\n" name); 96 + 97 + let remote = "origin-" ^ name in 98 + (match Unpac.Git.remote_url ~proc_mgr ~cwd:git remote with 99 + | Some u -> add (Printf.sprintf "URL: %s\n" u) 100 + | None -> ()); 101 + 102 + let upstream = Unpac.Git_backend.upstream_branch name in 103 + let vendor = Unpac.Git_backend.vendor_branch name in 104 + let patches = Unpac.Git_backend.patches_branch name in 105 + 106 + (match Unpac.Git.rev_parse ~proc_mgr ~cwd:git upstream with 107 + | Some sha -> add (Printf.sprintf "Upstream: %s\n" (String.sub sha 0 7)) 108 + | None -> ()); 109 + (match Unpac.Git.rev_parse ~proc_mgr ~cwd:git vendor with 110 + | Some sha -> add (Printf.sprintf "Vendor: %s\n" (String.sub sha 0 7)) 111 + | None -> ()); 112 + (match Unpac.Git.rev_parse ~proc_mgr ~cwd:git patches with 113 + | Some sha -> add (Printf.sprintf "Patches: %s\n" (String.sub sha 0 7)) 114 + | None -> ()); 115 + 116 + let log_output = Unpac.Git.run_exn ~proc_mgr ~cwd:git 117 + ["log"; "--oneline"; vendor ^ ".." ^ patches] in 118 + let commits = List.length (String.split_on_char '\n' log_output |> 119 + List.filter (fun s -> String.trim s <> "")) in 120 + add (Printf.sprintf "Local commits: %d\n" commits); 121 + 122 + ok (Buffer.contents buf) 123 + end 124 + with exn -> 125 + err (Printf.sprintf "Failed to get info for '%s': %s" name (Printexc.to_string exn)) 126 + 127 + (* Git diff tool *) 128 + let git_diff ~proc_mgr ~root ~name () = 129 + try 130 + let git = Unpac.Worktree.git_dir root in 131 + let repos = Unpac.Git_backend.list_repos ~proc_mgr ~root in 132 + if not (List.mem name repos) then 133 + err (Printf.sprintf "Repository '%s' is not vendored" name) 134 + else begin 135 + let vendor = Unpac.Git_backend.vendor_branch name in 136 + let patches = Unpac.Git_backend.patches_branch name in 137 + let diff = Unpac.Git.run_exn ~proc_mgr ~cwd:git ["diff"; vendor; patches] in 138 + if String.trim diff = "" then 139 + ok (Printf.sprintf "No local changes in '%s'." name) 140 + else 141 + ok (truncate_output (Printf.sprintf "Diff for '%s':\n\n%s" name diff)) 142 + end 143 + with exn -> 144 + err (Printf.sprintf "Failed to get diff for '%s': %s" name (Printexc.to_string exn)) 145 + 146 + (* Opam list tool *) 147 + let opam_list ~proc_mgr ~root () = 148 + try 149 + let pkgs = Unpac.Worktree.list_opam_packages ~proc_mgr root in 150 + if pkgs = [] then 151 + ok "No opam packages vendored." 152 + else begin 153 + let buf = Buffer.create 256 in 154 + Buffer.add_string buf "Vendored opam packages:\n"; 155 + List.iter (fun p -> Buffer.add_string buf (Printf.sprintf "- %s\n" p)) pkgs; 156 + ok (Buffer.contents buf) 157 + end 158 + with exn -> 159 + err (Printf.sprintf "Failed to list opam packages: %s" (Printexc.to_string exn)) 160 + 161 + (* Project list tool *) 162 + let project_list ~proc_mgr ~root () = 163 + try 164 + let projects = Unpac.Worktree.list_projects ~proc_mgr root in 165 + if projects = [] then 166 + ok "No projects configured." 167 + else begin 168 + let buf = Buffer.create 256 in 169 + Buffer.add_string buf "Projects:\n"; 170 + List.iter (fun p -> Buffer.add_string buf (Printf.sprintf "- %s\n" p)) projects; 171 + ok (Buffer.contents buf) 172 + end 173 + with exn -> 174 + err (Printf.sprintf "Failed to list projects: %s" (Printexc.to_string exn)) 175 + 176 + (* Status tool - overview of the workspace *) 177 + let status ~proc_mgr ~root () = 178 + try 179 + let buf = Buffer.create 1024 in 180 + let add s = Buffer.add_string buf s in 181 + 182 + add "=== Unpac Workspace Status ===\n\n"; 183 + 184 + let projects = Unpac.Worktree.list_projects ~proc_mgr root in 185 + add (Printf.sprintf "Projects (%d):\n" (List.length projects)); 186 + List.iter (fun p -> add (Printf.sprintf " - %s\n" p)) projects; 187 + if projects = [] then add " (none)\n"; 188 + add "\n"; 189 + 190 + let git_repos = Unpac.Git_backend.list_repos ~proc_mgr ~root in 191 + add (Printf.sprintf "Git Repositories (%d):\n" (List.length git_repos)); 192 + List.iter (fun r -> add (Printf.sprintf " - %s\n" r)) git_repos; 193 + if git_repos = [] then add " (none)\n"; 194 + add "\n"; 195 + 196 + let opam_pkgs = Unpac.Worktree.list_opam_packages ~proc_mgr root in 197 + add (Printf.sprintf "Opam Packages (%d):\n" (List.length opam_pkgs)); 198 + List.iter (fun p -> add (Printf.sprintf " - %s\n" p)) opam_pkgs; 199 + if opam_pkgs = [] then add " (none)\n"; 200 + 201 + ok (Buffer.contents buf) 202 + with exn -> 203 + err (Printf.sprintf "Failed to get status: %s" (Printexc.to_string exn)) 204 + 205 + (* Read file tool *) 206 + let read_file ~fs ~path () = 207 + try 208 + let full_path = Eio.Path.(fs / path) in 209 + if not (Eio.Path.is_file full_path) then 210 + err (Printf.sprintf "File not found: %s" path) 211 + else begin 212 + let content = Eio.Path.load full_path in 213 + ok (truncate_output content) 214 + end 215 + with exn -> 216 + err (Printf.sprintf "Failed to read '%s': %s" path (Printexc.to_string exn)) 217 + 218 + (* Write file tool *) 219 + let write_file ~fs ~path ~content () = 220 + try 221 + let full_path = Eio.Path.(fs / path) in 222 + let parent = Filename.dirname path in 223 + if parent <> "." && parent <> "/" then begin 224 + let parent_path = Eio.Path.(fs / parent) in 225 + Eio.Path.mkdirs ~exists_ok:true ~perm:0o755 parent_path 226 + end; 227 + Eio.Path.save ~create:(`Or_truncate 0o644) full_path content; 228 + ok (Printf.sprintf "Successfully wrote %d bytes to %s" (String.length content) path) 229 + with exn -> 230 + err (Printf.sprintf "Failed to write '%s': %s" path (Printexc.to_string exn)) 231 + 232 + (* List directory tool *) 233 + let list_dir ~fs ~path () = 234 + try 235 + let full_path = Eio.Path.(fs / path) in 236 + if not (Eio.Path.is_directory full_path) then 237 + err (Printf.sprintf "Not a directory: %s" path) 238 + else begin 239 + let entries = Eio.Path.read_dir full_path in 240 + let entries = List.sort String.compare entries in 241 + let buf = Buffer.create 256 in 242 + Buffer.add_string buf (Printf.sprintf "Contents of %s:\n" path); 243 + List.iter (fun e -> 244 + let entry_path = Eio.Path.(full_path / e) in 245 + let suffix = if Eio.Path.is_directory entry_path then "/" else "" in 246 + Buffer.add_string buf (Printf.sprintf " %s%s\n" e suffix) 247 + ) entries; 248 + ok (Buffer.contents buf) 249 + end 250 + with exn -> 251 + err (Printf.sprintf "Failed to list '%s': %s" path (Printexc.to_string exn)) 252 + 253 + (* Glob files tool *) 254 + let glob_files ~fs ~pattern ~base_path () = 255 + try 256 + let full_base = Eio.Path.(fs / base_path) in 257 + if not (Eio.Path.is_directory full_base) then 258 + err (Printf.sprintf "Base path not a directory: %s" base_path) 259 + else begin 260 + let results = ref [] in 261 + let rec walk dir rel_path = 262 + let entries = try Eio.Path.read_dir dir with _ -> [] in 263 + List.iter (fun name -> 264 + let entry_path = Eio.Path.(dir / name) in 265 + let rel = if rel_path = "" then name else rel_path ^ "/" ^ name in 266 + if Eio.Path.is_directory entry_path then 267 + walk entry_path rel 268 + else begin 269 + let matches = 270 + if String.starts_with ~prefix:"**/" pattern then 271 + let ext = String.sub pattern 3 (String.length pattern - 3) in 272 + String.ends_with ~suffix:ext name 273 + else if String.starts_with ~prefix:"*" pattern then 274 + let ext = String.sub pattern 1 (String.length pattern - 1) in 275 + String.ends_with ~suffix:ext name 276 + else 277 + name = pattern 278 + in 279 + if matches then results := rel :: !results 280 + end 281 + ) entries 282 + in 283 + walk full_base ""; 284 + let files = List.sort String.compare !results in 285 + if files = [] then 286 + ok (Printf.sprintf "No files matching '%s' in %s" pattern base_path) 287 + else begin 288 + let buf = Buffer.create 256 in 289 + Buffer.add_string buf (Printf.sprintf "Files matching '%s' in %s:\n" pattern base_path); 290 + List.iter (fun f -> Buffer.add_string buf (Printf.sprintf " %s\n" f)) files; 291 + ok (Buffer.contents buf) 292 + end 293 + end 294 + with exn -> 295 + err (Printf.sprintf "Failed to glob '%s': %s" pattern (Printexc.to_string exn)) 296 + 297 + (* Shell execution tool *) 298 + let run_shell ~proc_mgr ~fs ~cwd ~command () = 299 + try 300 + let result = Eio.Switch.run @@ fun sw -> 301 + let stdout_buf = Buffer.create 4096 in 302 + let stderr_buf = Buffer.create 4096 in 303 + let stdout_r, stdout_w = Eio.Process.pipe proc_mgr ~sw in 304 + let stderr_r, stderr_w = Eio.Process.pipe proc_mgr ~sw in 305 + 306 + let cwd_path = Eio.Path.(fs / cwd) in 307 + 308 + let child = Eio.Process.spawn proc_mgr ~sw 309 + ~cwd:(cwd_path :> Eio.Fs.dir_ty Eio.Path.t) 310 + ~stdout:stdout_w ~stderr:stderr_w 311 + ["sh"; "-c"; command] 312 + in 313 + Eio.Flow.close stdout_w; 314 + Eio.Flow.close stderr_w; 315 + 316 + Eio.Fiber.both 317 + (fun () -> 318 + let chunk = Cstruct.create 4096 in 319 + let rec loop () = 320 + match Eio.Flow.single_read stdout_r chunk with 321 + | n -> 322 + Buffer.add_string stdout_buf (Cstruct.to_string (Cstruct.sub chunk 0 n)); 323 + loop () 324 + | exception End_of_file -> () 325 + in loop ()) 326 + (fun () -> 327 + let chunk = Cstruct.create 4096 in 328 + let rec loop () = 329 + match Eio.Flow.single_read stderr_r chunk with 330 + | n -> 331 + Buffer.add_string stderr_buf (Cstruct.to_string (Cstruct.sub chunk 0 n)); 332 + loop () 333 + | exception End_of_file -> () 334 + in loop ()); 335 + 336 + let status = Eio.Process.await child in 337 + let stdout = Buffer.contents stdout_buf in 338 + let stderr = Buffer.contents stderr_buf in 339 + (status, stdout, stderr) 340 + in 341 + let (status, stdout, stderr) = result in 342 + let exit_code = match status with 343 + | `Exited c -> c 344 + | `Signaled s -> 128 + s 345 + in 346 + let buf = Buffer.create 256 in 347 + Buffer.add_string buf (Printf.sprintf "Exit code: %d\n" exit_code); 348 + if stdout <> "" then begin 349 + Buffer.add_string buf "\n=== STDOUT ===\n"; 350 + Buffer.add_string buf stdout 351 + end; 352 + if stderr <> "" then begin 353 + Buffer.add_string buf "\n=== STDERR ===\n"; 354 + Buffer.add_string buf stderr 355 + end; 356 + if exit_code = 0 then 357 + ok (truncate_output (Buffer.contents buf)) 358 + else 359 + err (truncate_output (Buffer.contents buf)) 360 + with exn -> 361 + err (Printf.sprintf "Failed to run command: %s" (Printexc.to_string exn)) 362 + 363 + (* Unpac status sync *) 364 + let unpac_status_sync ~proc_mgr ~root () = 365 + try 366 + let main_wt = Unpac.Worktree.path root Unpac.Worktree.Main in 367 + let result = Eio.Switch.run @@ fun sw -> 368 + let stdout_buf = Buffer.create 4096 in 369 + let stderr_buf = Buffer.create 4096 in 370 + let stdout_r, stdout_w = Eio.Process.pipe proc_mgr ~sw in 371 + let stderr_r, stderr_w = Eio.Process.pipe proc_mgr ~sw in 372 + 373 + let child = Eio.Process.spawn proc_mgr ~sw 374 + ~cwd:(main_wt :> Eio.Fs.dir_ty Eio.Path.t) 375 + ~stdout:stdout_w ~stderr:stderr_w 376 + ["unpac"; "status"] 377 + in 378 + Eio.Flow.close stdout_w; 379 + Eio.Flow.close stderr_w; 380 + 381 + Eio.Fiber.both 382 + (fun () -> 383 + let chunk = Cstruct.create 4096 in 384 + let rec loop () = 385 + match Eio.Flow.single_read stdout_r chunk with 386 + | n -> Buffer.add_string stdout_buf (Cstruct.to_string (Cstruct.sub chunk 0 n)); loop () 387 + | exception End_of_file -> () 388 + in loop ()) 389 + (fun () -> 390 + let chunk = Cstruct.create 4096 in 391 + let rec loop () = 392 + match Eio.Flow.single_read stderr_r chunk with 393 + | n -> Buffer.add_string stderr_buf (Cstruct.to_string (Cstruct.sub chunk 0 n)); loop () 394 + | exception End_of_file -> () 395 + in loop ()); 396 + 397 + ignore (Eio.Process.await child); 398 + Buffer.contents stdout_buf 399 + in 400 + ok (Printf.sprintf "Ran unpac status:\n%s" (truncate_output result)) 401 + with exn -> 402 + err (Printf.sprintf "Failed to run unpac status: %s" (Printexc.to_string exn)) 403 + 404 + (* Unpac push *) 405 + let unpac_push ~proc_mgr ~root ~remote () = 406 + try 407 + let main_wt = Unpac.Worktree.path root Unpac.Worktree.Main in 408 + let result = Eio.Switch.run @@ fun sw -> 409 + let stdout_buf = Buffer.create 4096 in 410 + let stderr_buf = Buffer.create 4096 in 411 + let stdout_r, stdout_w = Eio.Process.pipe proc_mgr ~sw in 412 + let stderr_r, stderr_w = Eio.Process.pipe proc_mgr ~sw in 413 + 414 + let child = Eio.Process.spawn proc_mgr ~sw 415 + ~cwd:(main_wt :> Eio.Fs.dir_ty Eio.Path.t) 416 + ~stdout:stdout_w ~stderr:stderr_w 417 + ["unpac"; "push"; remote] 418 + in 419 + Eio.Flow.close stdout_w; 420 + Eio.Flow.close stderr_w; 421 + 422 + Eio.Fiber.both 423 + (fun () -> 424 + let chunk = Cstruct.create 4096 in 425 + let rec loop () = 426 + match Eio.Flow.single_read stdout_r chunk with 427 + | n -> Buffer.add_string stdout_buf (Cstruct.to_string (Cstruct.sub chunk 0 n)); loop () 428 + | exception End_of_file -> () 429 + in loop ()) 430 + (fun () -> 431 + let chunk = Cstruct.create 4096 in 432 + let rec loop () = 433 + match Eio.Flow.single_read stderr_r chunk with 434 + | n -> Buffer.add_string stderr_buf (Cstruct.to_string (Cstruct.sub chunk 0 n)); loop () 435 + | exception End_of_file -> () 436 + in loop ()); 437 + 438 + let status = Eio.Process.await child in 439 + let stdout = Buffer.contents stdout_buf in 440 + let stderr = Buffer.contents stderr_buf in 441 + (status, stdout, stderr) 442 + in 443 + let (status, stdout, stderr) = result in 444 + let exit_code = match status with `Exited c -> c | `Signaled s -> 128 + s in 445 + if exit_code = 0 then 446 + ok (Printf.sprintf "Pushed to %s:\n%s" remote (truncate_output stdout)) 447 + else 448 + err (Printf.sprintf "Push failed (exit %d):\n%s\n%s" exit_code stdout stderr) 449 + with exn -> 450 + err (Printf.sprintf "Failed to push: %s" (Printexc.to_string exn)) 451 + 452 + (* Git commit tool *) 453 + let git_commit ~proc_mgr ~cwd ~message () = 454 + try 455 + let result = Eio.Switch.run @@ fun sw -> 456 + let add_child = Eio.Process.spawn proc_mgr ~sw 457 + ~cwd:(cwd :> Eio.Fs.dir_ty Eio.Path.t) 458 + ["git"; "add"; "-A"] 459 + in 460 + let add_status = Eio.Process.await add_child in 461 + (match add_status with 462 + | `Exited 0 -> () 463 + | _ -> failwith "git add failed"); 464 + 465 + let stdout_r, stdout_w = Eio.Process.pipe proc_mgr ~sw in 466 + let stderr_r, stderr_w = Eio.Process.pipe proc_mgr ~sw in 467 + let stdout_buf = Buffer.create 256 in 468 + let stderr_buf = Buffer.create 256 in 469 + 470 + let commit_child = Eio.Process.spawn proc_mgr ~sw 471 + ~cwd:(cwd :> Eio.Fs.dir_ty Eio.Path.t) 472 + ~stdout:stdout_w ~stderr:stderr_w 473 + ["git"; "commit"; "-m"; message] 474 + in 475 + Eio.Flow.close stdout_w; 476 + Eio.Flow.close stderr_w; 477 + 478 + Eio.Fiber.both 479 + (fun () -> 480 + let chunk = Cstruct.create 1024 in 481 + let rec loop () = 482 + match Eio.Flow.single_read stdout_r chunk with 483 + | n -> Buffer.add_string stdout_buf (Cstruct.to_string (Cstruct.sub chunk 0 n)); loop () 484 + | exception End_of_file -> () 485 + in loop ()) 486 + (fun () -> 487 + let chunk = Cstruct.create 1024 in 488 + let rec loop () = 489 + match Eio.Flow.single_read stderr_r chunk with 490 + | n -> Buffer.add_string stderr_buf (Cstruct.to_string (Cstruct.sub chunk 0 n)); loop () 491 + | exception End_of_file -> () 492 + in loop ()); 493 + 494 + let status = Eio.Process.await commit_child in 495 + (status, Buffer.contents stdout_buf, Buffer.contents stderr_buf) 496 + in 497 + let (status, stdout, stderr) = result in 498 + match status with 499 + | `Exited 0 -> ok (Printf.sprintf "Committed:\n%s" stdout) 500 + | `Exited 1 when String.length stdout > 0 -> 501 + ok "Nothing to commit (working tree clean)" 502 + | _ -> err (Printf.sprintf "Commit failed:\n%s\n%s" stdout stderr) 503 + with exn -> 504 + err (Printf.sprintf "Failed to commit: %s" (Printexc.to_string exn)) 505 + 506 + (* === MCP SERVER CREATION === *) 507 + 508 + (** Create an MCP server with all unpac tools. 509 + 510 + The server name will be "unpac" so tools are accessible as mcp__unpac__<tool_name>. 511 + Call this with the Eio environment to create handlers with captured context. *) 512 + let create_mcp_server ~proc_mgr ~fs ~root = 513 + let open Claude.Tool in 514 + 515 + let tools = [ 516 + (* Workspace status tools *) 517 + create 518 + ~name:"unpac_status" 519 + ~description:"Get an overview of the unpac workspace, including all projects, \ 520 + vendored git repositories, and opam packages." 521 + ~input_schema:(schema_object [] ~required:[]) 522 + ~handler:(fun _args -> status ~proc_mgr ~root ()); 523 + 524 + create 525 + ~name:"unpac_status_sync" 526 + ~description:"Run 'unpac status' to update README.md and sync workspace state. \ 527 + Call this periodically to keep the workspace documentation current." 528 + ~input_schema:(schema_object [] ~required:[]) 529 + ~handler:(fun _args -> unpac_status_sync ~proc_mgr ~root ()); 530 + 531 + create 532 + ~name:"unpac_push" 533 + ~description:"Push all branches to the remote repository. Call this after making \ 534 + changes to sync with the remote." 535 + ~input_schema:(schema_object [("remote", schema_string)] ~required:["remote"]) 536 + ~handler:(fun args -> 537 + match Claude.Tool_input.get_string args "remote" with 538 + | None -> err "Missing required parameter: remote" 539 + | Some remote -> unpac_push ~proc_mgr ~root ~remote ()); 540 + 541 + (* Git vendoring tools *) 542 + create 543 + ~name:"unpac_git_list" 544 + ~description:"List all vendored git repositories in the workspace." 545 + ~input_schema:(schema_object [] ~required:[]) 546 + ~handler:(fun _args -> git_list ~proc_mgr ~root ()); 547 + 548 + create 549 + ~name:"unpac_git_add" 550 + ~description:"Vendor a new git repository. Clones the repo and creates the three-tier \ 551 + branch structure for conflict-free vendoring with full history preservation." 552 + ~input_schema:(schema_object [ 553 + ("url", schema_string); 554 + ("name", schema_string); 555 + ("branch", schema_string); 556 + ("subdir", schema_string); 557 + ] ~required:["url"]) 558 + ~handler:(fun args -> 559 + match Claude.Tool_input.get_string args "url" with 560 + | None -> err "Missing required parameter: url" 561 + | Some url -> 562 + let name = Claude.Tool_input.get_string args "name" in 563 + let branch = Claude.Tool_input.get_string args "branch" in 564 + let subdir = Claude.Tool_input.get_string args "subdir" in 565 + git_add ~proc_mgr ~fs ~root ~url ?name ?branch ?subdir ()); 566 + 567 + create 568 + ~name:"unpac_git_info" 569 + ~description:"Show detailed information about a vendored git repository, including \ 570 + branch SHAs and number of local commits." 571 + ~input_schema:(schema_object [("name", schema_string)] ~required:["name"]) 572 + ~handler:(fun args -> 573 + match Claude.Tool_input.get_string args "name" with 574 + | None -> err "Missing required parameter: name" 575 + | Some name -> git_info ~proc_mgr ~root ~name ()); 576 + 577 + create 578 + ~name:"unpac_git_diff" 579 + ~description:"Show the diff between vendor and patches branches for a git repository. \ 580 + This shows what local modifications have been made." 581 + ~input_schema:(schema_object [("name", schema_string)] ~required:["name"]) 582 + ~handler:(fun args -> 583 + match Claude.Tool_input.get_string args "name" with 584 + | None -> err "Missing required parameter: name" 585 + | Some name -> git_diff ~proc_mgr ~root ~name ()); 586 + 587 + (* Opam tools *) 588 + create 589 + ~name:"unpac_opam_list" 590 + ~description:"List all vendored opam packages in the workspace." 591 + ~input_schema:(schema_object [] ~required:[]) 592 + ~handler:(fun _args -> opam_list ~proc_mgr ~root ()); 593 + 594 + create 595 + ~name:"unpac_project_list" 596 + ~description:"List all projects in the workspace." 597 + ~input_schema:(schema_object [] ~required:[]) 598 + ~handler:(fun _args -> project_list ~proc_mgr ~root ()); 599 + 600 + (* File operation tools *) 601 + create 602 + ~name:"read_file" 603 + ~description:"Read the contents of a file. Use this to analyze source code, \ 604 + STATUS.md files, test files, etc." 605 + ~input_schema:(schema_object [("path", schema_string)] ~required:["path"]) 606 + ~handler:(fun args -> 607 + match Claude.Tool_input.get_string args "path" with 608 + | None -> err "Missing required parameter: path" 609 + | Some path -> read_file ~fs ~path ()); 610 + 611 + create 612 + ~name:"write_file" 613 + ~description:"Write content to a file. Use this to update STATUS.md, fix code, \ 614 + add tests, etc. Parent directories are created if needed." 615 + ~input_schema:(schema_object [ 616 + ("path", schema_string); 617 + ("content", schema_string); 618 + ] ~required:["path"; "content"]) 619 + ~handler:(fun args -> 620 + match Claude.Tool_input.get_string args "path", Claude.Tool_input.get_string args "content" with 621 + | None, _ -> err "Missing required parameter: path" 622 + | _, None -> err "Missing required parameter: content" 623 + | Some path, Some content -> write_file ~fs ~path ~content ()); 624 + 625 + create 626 + ~name:"list_directory" 627 + ~description:"List the contents of a directory." 628 + ~input_schema:(schema_object [("path", schema_string)] ~required:["path"]) 629 + ~handler:(fun args -> 630 + match Claude.Tool_input.get_string args "path" with 631 + | None -> err "Missing required parameter: path" 632 + | Some path -> list_dir ~fs ~path ()); 633 + 634 + create 635 + ~name:"glob_files" 636 + ~description:"Find files matching a glob pattern. Supports *.ml, **/*.ml patterns." 637 + ~input_schema:(schema_object [ 638 + ("pattern", schema_string); 639 + ("base_path", schema_string); 640 + ] ~required:["pattern"; "base_path"]) 641 + ~handler:(fun args -> 642 + match Claude.Tool_input.get_string args "pattern", Claude.Tool_input.get_string args "base_path" with 643 + | None, _ -> err "Missing required parameter: pattern" 644 + | _, None -> err "Missing required parameter: base_path" 645 + | Some pattern, Some base_path -> glob_files ~fs ~pattern ~base_path ()); 646 + 647 + (* Shell execution *) 648 + create 649 + ~name:"run_shell" 650 + ~description:"Execute a shell command. Use for building (dune build), testing \ 651 + (dune test), or other operations. Be careful with destructive commands." 652 + ~input_schema:(schema_object [ 653 + ("command", schema_string); 654 + ("cwd", schema_string); 655 + ] ~required:["command"; "cwd"]) 656 + ~handler:(fun args -> 657 + match Claude.Tool_input.get_string args "command", Claude.Tool_input.get_string args "cwd" with 658 + | None, _ -> err "Missing required parameter: command" 659 + | _, None -> err "Missing required parameter: cwd" 660 + | Some command, Some cwd -> run_shell ~proc_mgr ~fs ~cwd ~command ()); 661 + 662 + (* Git commit *) 663 + create 664 + ~name:"git_commit" 665 + ~description:"Stage all changes and create a git commit with the given message." 666 + ~input_schema:(schema_object [ 667 + ("cwd", schema_string); 668 + ("message", schema_string); 669 + ] ~required:["cwd"; "message"]) 670 + ~handler:(fun args -> 671 + match Claude.Tool_input.get_string args "cwd", Claude.Tool_input.get_string args "message" with 672 + | None, _ -> err "Missing required parameter: cwd" 673 + | _, None -> err "Missing required parameter: message" 674 + | Some cwd, Some message -> 675 + let cwd_path = Eio.Path.(fs / cwd) in 676 + git_commit ~proc_mgr ~cwd:cwd_path ~message ()); 677 + ] in 678 + 679 + Claude.Mcp_server.create ~name:"unpac" ~version:"1.0.0" ~tools ()
+42
lib/claude/tools.mli
··· 1 + (** Tool definitions for Claude to interact with unpac. 2 + 3 + Uses the Claude SDK's MCP-based custom tool architecture. All tools are 4 + bundled into an in-process MCP server that gets registered with the 5 + Claude client, making them available as mcp__unpac__<tool_name>. 6 + 7 + {1 Available Tools} 8 + 9 + Workspace status: 10 + - [unpac_status] - Overview of workspace (projects, git repos, opam packages) 11 + - [unpac_status_sync] - Run 'unpac status' to update README.md 12 + - [unpac_push] - Push all branches to remote 13 + 14 + Git vendoring: 15 + - [unpac_git_list] - List vendored git repositories 16 + - [unpac_git_add] - Vendor a new git repository 17 + - [unpac_git_info] - Show details about a vendored repository 18 + - [unpac_git_diff] - Show local changes in a vendored repository 19 + 20 + Opam: 21 + - [unpac_opam_list] - List vendored opam packages 22 + - [unpac_project_list] - List projects 23 + 24 + File operations: 25 + - [read_file] - Read file contents 26 + - [write_file] - Write content to a file 27 + - [list_directory] - List directory contents 28 + - [glob_files] - Find files matching a pattern 29 + 30 + Shell: 31 + - [run_shell] - Execute a shell command 32 + - [git_commit] - Stage and commit changes *) 33 + 34 + val create_mcp_server : 35 + proc_mgr:Unpac.Git.proc_mgr -> 36 + fs:Eio.Fs.dir_ty Eio.Path.t -> 37 + root:Unpac.Worktree.root -> 38 + Claude.Mcp_server.t 39 + (** Create an MCP server with all unpac tools. 40 + 41 + The server is named "unpac" so tools are accessible as [mcp__unpac__<tool_name>]. 42 + Register it with [Claude.Options.with_mcp_server ~name:"unpac" server]. *)
+7
lib/claude/unpac_claude.ml
··· 1 + (** Unpac Claude agent - autonomous coding assistant for unpac workflows. *) 2 + 3 + module Tools = Tools 4 + module Prompt = Prompt 5 + module Agent = Agent 6 + module Event = Event 7 + module Web = Web
+460
lib/claude/web.ml
··· 1 + (** Minimal WebSocket server for live agent UI. 2 + 3 + Uses cohttp-eio for the HTTP upgrade handshake, 4 + then raw Eio sockets for WebSocket frames. *) 5 + 6 + let src = Logs.Src.create "unpac.claude.web" ~doc:"Web server" 7 + module Log = (val Logs.src_log src : Logs.LOG) 8 + 9 + (* WebSocket frame helpers *) 10 + module Ws = struct 11 + (* Compute Sec-WebSocket-Accept from client key *) 12 + let accept_key client_key = 13 + let magic = "258EAFA5-E914-47DA-95CA-C5AB0DC85B11" in 14 + let combined = client_key ^ magic in 15 + let hash = Digestif.SHA1.digest_string combined in 16 + Base64.encode_exn (Digestif.SHA1.to_raw_string hash) 17 + 18 + (* Send a WebSocket text frame *) 19 + let send_text flow text = 20 + let len = String.length text in 21 + let header = 22 + if len < 126 then 23 + let h = Bytes.create 2 in 24 + Bytes.set_uint8 h 0 0x81; (* FIN + text opcode *) 25 + Bytes.set_uint8 h 1 len; 26 + Bytes.to_string h 27 + else if len < 65536 then 28 + let h = Bytes.create 4 in 29 + Bytes.set_uint8 h 0 0x81; 30 + Bytes.set_uint8 h 1 126; 31 + Bytes.set_uint16_be h 2 len; 32 + Bytes.to_string h 33 + else 34 + let h = Bytes.create 10 in 35 + Bytes.set_uint8 h 0 0x81; 36 + Bytes.set_uint8 h 1 127; 37 + Bytes.set_int64_be h 2 (Int64.of_int len); 38 + Bytes.to_string h 39 + in 40 + Eio.Flow.copy_string (header ^ text) flow 41 + 42 + (* Send close frame *) 43 + let send_close flow = 44 + let frame = Bytes.create 2 in 45 + Bytes.set_uint8 frame 0 0x88; (* FIN + close opcode *) 46 + Bytes.set_uint8 frame 1 0; 47 + Eio.Flow.copy_string (Bytes.to_string frame) flow 48 + 49 + (* Read a WebSocket frame, returns (opcode, payload) or None on close *) 50 + let read_frame flow = 51 + let buf = Cstruct.create 2 in 52 + match Eio.Flow.read_exact flow buf with 53 + | exception End_of_file -> None 54 + | () -> 55 + let b0 = Cstruct.get_uint8 buf 0 in 56 + let b1 = Cstruct.get_uint8 buf 1 in 57 + let _fin = (b0 land 0x80) <> 0 in 58 + let opcode = b0 land 0x0F in 59 + let masked = (b1 land 0x80) <> 0 in 60 + let len0 = b1 land 0x7F in 61 + 62 + (* Get actual length *) 63 + let len = 64 + if len0 < 126 then len0 65 + else if len0 = 126 then begin 66 + let buf = Cstruct.create 2 in 67 + Eio.Flow.read_exact flow buf; 68 + Cstruct.BE.get_uint16 buf 0 69 + end else begin 70 + let buf = Cstruct.create 8 in 71 + Eio.Flow.read_exact flow buf; 72 + Int64.to_int (Cstruct.BE.get_uint64 buf 0) 73 + end 74 + in 75 + 76 + (* Get mask if present *) 77 + let mask = 78 + if masked then begin 79 + let buf = Cstruct.create 4 in 80 + Eio.Flow.read_exact flow buf; 81 + Some (Cstruct.to_bytes buf) 82 + end else None 83 + in 84 + 85 + (* Read payload *) 86 + let payload = Cstruct.create len in 87 + if len > 0 then Eio.Flow.read_exact flow payload; 88 + 89 + (* Unmask if needed *) 90 + let data = 91 + match mask with 92 + | None -> Cstruct.to_string payload 93 + | Some m -> 94 + let bytes = Cstruct.to_bytes payload in 95 + for i = 0 to len - 1 do 96 + let b = Bytes.get_uint8 bytes i in 97 + let k = Bytes.get_uint8 m (i mod 4) in 98 + Bytes.set_uint8 bytes i (b lxor k) 99 + done; 100 + Bytes.to_string bytes 101 + in 102 + Some (opcode, data) 103 + end 104 + 105 + (* Static HTML page *) 106 + let index_html = {|<!DOCTYPE html> 107 + <html lang="en"> 108 + <head> 109 + <meta charset="UTF-8"> 110 + <meta name="viewport" content="width=device-width, initial-scale=1.0"> 111 + <title>unpac-claude</title> 112 + <style> 113 + :root { 114 + --bg: #0d1117; 115 + --fg: #c9d1d9; 116 + --dim: #6e7681; 117 + --border: #30363d; 118 + --accent: #58a6ff; 119 + --green: #3fb950; 120 + --red: #f85149; 121 + --yellow: #d29922; 122 + } 123 + * { box-sizing: border-box; margin: 0; padding: 0; } 124 + body { 125 + font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, monospace; 126 + font-size: 14px; 127 + line-height: 1.5; 128 + background: var(--bg); 129 + color: var(--fg); 130 + height: 100vh; 131 + display: flex; 132 + flex-direction: column; 133 + } 134 + header { 135 + padding: 12px 16px; 136 + border-bottom: 1px solid var(--border); 137 + display: flex; 138 + align-items: center; 139 + gap: 12px; 140 + } 141 + header h1 { 142 + font-size: 16px; 143 + font-weight: 600; 144 + } 145 + .status { 146 + font-size: 12px; 147 + padding: 2px 8px; 148 + border-radius: 12px; 149 + background: var(--border); 150 + } 151 + .status.connected { background: var(--green); color: #000; } 152 + .status.error { background: var(--red); color: #fff; } 153 + #events { 154 + flex: 1; 155 + overflow-y: auto; 156 + padding: 16px; 157 + } 158 + .event { 159 + margin-bottom: 8px; 160 + padding: 8px 12px; 161 + border-radius: 6px; 162 + border-left: 3px solid var(--border); 163 + } 164 + .event.thinking { border-color: var(--yellow); color: var(--dim); } 165 + .event.text { border-color: var(--fg); white-space: pre-wrap; } 166 + .event.tool_call { border-color: var(--accent); } 167 + .event.tool_result { border-color: var(--green); } 168 + .event.tool_result.error { border-color: var(--red); } 169 + .event.error { border-color: var(--red); background: rgba(248,81,73,0.1); } 170 + .event.sync { border-color: var(--yellow); } 171 + .event.turn_complete { border-color: var(--dim); color: var(--dim); font-size: 12px; } 172 + .tool-name { 173 + color: var(--accent); 174 + font-weight: 600; 175 + } 176 + .tool-input, .tool-output { 177 + margin-top: 4px; 178 + padding: 8px; 179 + background: rgba(0,0,0,0.3); 180 + border-radius: 4px; 181 + font-size: 12px; 182 + max-height: 200px; 183 + overflow: auto; 184 + white-space: pre-wrap; 185 + word-break: break-all; 186 + } 187 + .cost { color: var(--dim); } 188 + footer { 189 + padding: 8px 16px; 190 + border-top: 1px solid var(--border); 191 + font-size: 12px; 192 + color: var(--dim); 193 + } 194 + </style> 195 + </head> 196 + <body> 197 + <header> 198 + <h1>unpac-claude</h1> 199 + <span id="status" class="status">connecting...</span> 200 + </header> 201 + <div id="events"></div> 202 + <footer> 203 + <span id="turn">Turn: 0</span> | 204 + <span id="cost">Cost: $0.00</span> 205 + </footer> 206 + <script> 207 + const events = document.getElementById('events'); 208 + const status = document.getElementById('status'); 209 + const turnEl = document.getElementById('turn'); 210 + const costEl = document.getElementById('cost'); 211 + let totalCost = 0; 212 + let currentTurn = 0; 213 + 214 + function connect() { 215 + const ws = new WebSocket(`ws://${location.host}/ws`); 216 + 217 + ws.onopen = () => { 218 + status.textContent = 'connected'; 219 + status.className = 'status connected'; 220 + }; 221 + 222 + ws.onclose = () => { 223 + status.textContent = 'disconnected'; 224 + status.className = 'status'; 225 + setTimeout(connect, 2000); 226 + }; 227 + 228 + ws.onerror = () => { 229 + status.textContent = 'error'; 230 + status.className = 'status error'; 231 + }; 232 + 233 + ws.onmessage = (e) => { 234 + const data = JSON.parse(e.data); 235 + handleEvent(data); 236 + }; 237 + } 238 + 239 + function handleEvent(e) { 240 + const div = document.createElement('div'); 241 + div.className = 'event ' + e.type; 242 + 243 + switch (e.type) { 244 + case 'thinking': 245 + div.textContent = '⋯ thinking...'; 246 + break; 247 + case 'text': 248 + div.textContent = e.content; 249 + break; 250 + case 'tool_call': 251 + div.innerHTML = `<span class="tool-name">${esc(e.name)}</span>` + 252 + `<div class="tool-input">${esc(formatJson(e.input))}</div>`; 253 + break; 254 + case 'tool_result': 255 + if (e.is_error) div.classList.add('error'); 256 + div.innerHTML = `<span class="tool-name">${esc(e.name)}</span> ${e.is_error ? '✗' : '✓'}` + 257 + `<div class="tool-output">${esc(truncate(e.output, 2000))}</div>`; 258 + break; 259 + case 'error': 260 + div.textContent = '✗ ' + e.message; 261 + break; 262 + case 'sync': 263 + div.textContent = '↻ sync: ' + e.action; 264 + break; 265 + case 'turn_complete': 266 + currentTurn = e.turn; 267 + if (e.cost_usd) totalCost += e.cost_usd; 268 + turnEl.textContent = 'Turn: ' + currentTurn; 269 + costEl.textContent = 'Cost: $' + totalCost.toFixed(4); 270 + div.textContent = `Turn ${e.turn} complete` + (e.cost_usd ? ` ($${e.cost_usd.toFixed(4)})` : ''); 271 + break; 272 + case 'agent_start': 273 + div.textContent = '▶ Agent started'; 274 + div.style.borderColor = 'var(--green)'; 275 + break; 276 + case 'agent_stop': 277 + div.textContent = '■ Agent stopped'; 278 + div.style.borderColor = 'var(--red)'; 279 + break; 280 + default: 281 + div.textContent = JSON.stringify(e); 282 + } 283 + 284 + events.appendChild(div); 285 + events.scrollTop = events.scrollHeight; 286 + } 287 + 288 + function esc(s) { 289 + return s.replace(/&/g,'&amp;').replace(/</g,'&lt;').replace(/>/g,'&gt;'); 290 + } 291 + 292 + function truncate(s, n) { 293 + return s.length > n ? s.slice(0, n) + '...[truncated]' : s; 294 + } 295 + 296 + function formatJson(s) { 297 + try { 298 + return JSON.stringify(JSON.parse(s), null, 2); 299 + } catch { 300 + return s; 301 + } 302 + } 303 + 304 + connect(); 305 + </script> 306 + </body> 307 + </html>|} 308 + 309 + (* We don't track clients for WebSocket broadcasting in this simple implementation. 310 + Instead, each WebSocket connection runs in its own fiber and subscribes to events. *) 311 + 312 + type t = unit 313 + 314 + let create _event_bus = () 315 + 316 + (* Broadcast is now a no-op since each connection handles its own events *) 317 + let broadcast _t _event = () 318 + 319 + (* Handle WebSocket connection - each connection subscribes to events directly *) 320 + let handle_websocket event_bus (flow : _ Eio.Net.stream_socket) = 321 + let closed = ref false in 322 + Log.info (fun m -> m "WebSocket client connected"); 323 + 324 + (* Subscribe to events and send them to this client *) 325 + let listener event = 326 + if not !closed then begin 327 + try 328 + let json = Event.to_json event in 329 + Ws.send_text flow json 330 + with _ -> 331 + closed := true 332 + end 333 + in 334 + Event.subscribe event_bus listener; 335 + 336 + (* Read loop - handle pings and close *) 337 + let rec loop () = 338 + match Ws.read_frame flow with 339 + | None -> 340 + closed := true 341 + | Some (0x8, _) -> (* Close *) 342 + Ws.send_close flow; 343 + closed := true 344 + | Some (0x9, data) -> (* Ping -> Pong *) 345 + let pong = Bytes.create (2 + String.length data) in 346 + Bytes.set_uint8 pong 0 0x8A; (* FIN + pong *) 347 + Bytes.set_uint8 pong 1 (String.length data); 348 + Bytes.blit_string data 0 pong 2 (String.length data); 349 + Eio.Flow.copy_string (Bytes.to_string pong) flow; 350 + loop () 351 + | Some _ -> 352 + loop () 353 + in 354 + (try loop () with _ -> ()); 355 + closed := true; 356 + Event.unsubscribe event_bus listener; 357 + Log.info (fun m -> m "WebSocket client disconnected") 358 + 359 + (* Parse HTTP request headers *) 360 + let parse_request data = 361 + let lines = String.split_on_char '\n' data in 362 + let headers = Hashtbl.create 16 in 363 + let path = ref "/" in 364 + List.iteri (fun i line -> 365 + let line = String.trim line in 366 + if i = 0 then begin 367 + (* Request line: GET /path HTTP/1.1 *) 368 + match String.split_on_char ' ' line with 369 + | _ :: p :: _ -> path := p 370 + | _ -> () 371 + end else begin 372 + match String.index_opt line ':' with 373 + | Some idx -> 374 + let key = String.lowercase_ascii (String.trim (String.sub line 0 idx)) in 375 + let value = String.trim (String.sub line (idx + 1) (String.length line - idx - 1)) in 376 + Hashtbl.add headers key value 377 + | None -> () 378 + end 379 + ) lines; 380 + (!path, headers) 381 + 382 + (* Handle HTTP request *) 383 + let handle_request event_bus (flow : _ Eio.Net.stream_socket) = 384 + let buf = Buffer.create 4096 in 385 + let chunk = Cstruct.create 4096 in 386 + 387 + (* Read request *) 388 + let rec read_headers () = 389 + match Eio.Flow.single_read flow chunk with 390 + | n -> 391 + Buffer.add_string buf (Cstruct.to_string (Cstruct.sub chunk 0 n)); 392 + let data = Buffer.contents buf in 393 + if String.length data > 4 && 394 + String.sub data (String.length data - 4) 4 = "\r\n\r\n" 395 + then data 396 + else read_headers () 397 + | exception End_of_file -> Buffer.contents buf 398 + in 399 + let request = read_headers () in 400 + let (path, headers) = parse_request request in 401 + 402 + Log.debug (fun m -> m "Request: %s" path); 403 + 404 + (* Check for WebSocket upgrade *) 405 + let is_upgrade = 406 + Hashtbl.find_opt headers "upgrade" = Some "websocket" && 407 + Hashtbl.mem headers "sec-websocket-key" 408 + in 409 + 410 + if path = "/ws" && is_upgrade then begin 411 + (* WebSocket handshake *) 412 + let key = Hashtbl.find headers "sec-websocket-key" in 413 + let accept = Ws.accept_key key in 414 + let response = Printf.sprintf 415 + "HTTP/1.1 101 Switching Protocols\r\n\ 416 + Upgrade: websocket\r\n\ 417 + Connection: Upgrade\r\n\ 418 + Sec-WebSocket-Accept: %s\r\n\r\n" accept 419 + in 420 + Eio.Flow.copy_string response flow; 421 + handle_websocket event_bus flow 422 + end else begin 423 + (* Serve static content *) 424 + let (status, content_type, body) = 425 + if path = "/" || path = "/index.html" then 426 + ("200 OK", "text/html", index_html) 427 + else 428 + ("404 Not Found", "text/plain", "Not Found") 429 + in 430 + let response = Printf.sprintf 431 + "HTTP/1.1 %s\r\n\ 432 + Content-Type: %s\r\n\ 433 + Content-Length: %d\r\n\ 434 + Connection: close\r\n\r\n%s" 435 + status content_type (String.length body) body 436 + in 437 + Eio.Flow.copy_string response flow 438 + end 439 + 440 + (* Start the web server *) 441 + let start ~sw ~net ~port event_bus = 442 + let t = create event_bus in 443 + 444 + (* Listen for connections *) 445 + let addr = `Tcp (Eio.Net.Ipaddr.V4.loopback, port) in 446 + let socket = Eio.Net.listen net ~sw ~reuse_addr:true ~backlog:10 addr in 447 + Log.info (fun m -> m "Web server listening on http://localhost:%d" port); 448 + 449 + (* Accept loop *) 450 + let rec accept_loop () = 451 + let flow, _addr = Eio.Net.accept ~sw socket in 452 + Eio.Fiber.fork ~sw (fun () -> 453 + try handle_request event_bus flow 454 + with exn -> 455 + Log.warn (fun m -> m "Request error: %s" (Printexc.to_string exn)) 456 + ); 457 + accept_loop () 458 + in 459 + Eio.Fiber.fork ~sw accept_loop; 460 + t
+23
lib/claude/web.mli
··· 1 + (** Minimal WebSocket server for live agent UI. 2 + 3 + Serves a single-page web UI that displays agent events in real-time. *) 4 + 5 + type t 6 + (** Web server state. *) 7 + 8 + val start : 9 + sw:Eio.Switch.t -> 10 + net:_ Eio.Net.t -> 11 + port:int -> 12 + Event.bus -> 13 + t 14 + (** [start ~sw ~net ~port event_bus] starts the web server. 15 + 16 + Listens on [port] and serves: 17 + - GET / - The HTML UI 18 + - WS /ws - WebSocket for event streaming 19 + 20 + Subscribes to [event_bus] and broadcasts all events to connected clients. *) 21 + 22 + val broadcast : t -> Event.t -> unit 23 + (** [broadcast t event] sends an event to all connected WebSocket clients. *)
+239
lib/config.ml
··· 1 + (** Configuration file handling for unpac. 2 + 3 + Loads and parses main/unpac.toml configuration files. *) 4 + 5 + (** {1 Types} *) 6 + 7 + type repo_source = 8 + | Local of string 9 + | Remote of string 10 + 11 + type repo_config = { 12 + repo_name : string; 13 + source : repo_source; 14 + } 15 + 16 + type vendored_package = { 17 + pkg_name : string; (** Package name (used as vendor name) *) 18 + pkg_url : string; (** Original remote URL *) 19 + pkg_branch : string option; (** Original branch if specified *) 20 + } 21 + 22 + type opam_config = { 23 + repositories : repo_config list; 24 + compiler : string option; 25 + vendored : vendored_package list; (** Tracked vendored packages *) 26 + } 27 + 28 + (** Git repository configuration for direct git vendoring *) 29 + type git_repo_config = { 30 + git_name : string; (** User-specified name for the repo *) 31 + git_url : string; (** Git URL to clone from *) 32 + git_branch : string option; (** Optional branch/tag to track *) 33 + git_subdir : string option; (** Optional subdirectory to extract *) 34 + } 35 + 36 + type git_config = { 37 + repos : git_repo_config list; 38 + } 39 + 40 + type project_config = { 41 + project_name : string; 42 + } 43 + 44 + type t = { 45 + opam : opam_config; 46 + git : git_config; 47 + vendor_cache : string option; 48 + projects : project_config list; 49 + } 50 + 51 + (** {1 TOML Codecs} *) 52 + 53 + let repo_config_codec : repo_config Tomlt.t = 54 + let open Tomlt in 55 + let open Table in 56 + let make repo_name path url : repo_config = 57 + let source = 58 + match (path, url) with 59 + | Some p, None -> Local p 60 + | None, Some u -> Remote u 61 + | Some _, Some _ -> 62 + failwith "Repository cannot have both 'path' and 'url'" 63 + | None, None -> failwith "Repository must have either 'path' or 'url'" 64 + in 65 + { repo_name; source } 66 + in 67 + let enc_path (r : repo_config) = 68 + match r.source with Local p -> Some p | Remote _ -> None 69 + in 70 + let enc_url (r : repo_config) = 71 + match r.source with Remote u -> Some u | Local _ -> None 72 + in 73 + obj make 74 + |> mem "name" string ~enc:(fun (r : repo_config) -> r.repo_name) 75 + |> opt_mem "path" string ~enc:enc_path 76 + |> opt_mem "url" string ~enc:enc_url 77 + |> finish 78 + 79 + let vendored_package_codec : vendored_package Tomlt.t = 80 + let open Tomlt in 81 + let open Table in 82 + obj (fun pkg_name pkg_url pkg_branch : vendored_package -> 83 + { pkg_name; pkg_url; pkg_branch }) 84 + |> mem "name" string ~enc:(fun (p : vendored_package) -> p.pkg_name) 85 + |> mem "url" string ~enc:(fun (p : vendored_package) -> p.pkg_url) 86 + |> opt_mem "branch" string ~enc:(fun (p : vendored_package) -> p.pkg_branch) 87 + |> finish 88 + 89 + let opam_config_codec : opam_config Tomlt.t = 90 + let open Tomlt in 91 + let open Table in 92 + obj (fun repositories compiler vendored : opam_config -> 93 + { repositories; compiler; vendored = Option.value ~default:[] vendored }) 94 + |> mem "repositories" (list repo_config_codec) 95 + ~enc:(fun (c : opam_config) -> c.repositories) 96 + |> opt_mem "compiler" string ~enc:(fun (c : opam_config) -> c.compiler) 97 + |> opt_mem "vendored" (list vendored_package_codec) 98 + ~enc:(fun (c : opam_config) -> if c.vendored = [] then None else Some c.vendored) 99 + |> finish 100 + 101 + let git_repo_config_codec : git_repo_config Tomlt.t = 102 + let open Tomlt in 103 + let open Table in 104 + obj (fun git_name git_url git_branch git_subdir : git_repo_config -> 105 + { git_name; git_url; git_branch; git_subdir }) 106 + |> mem "name" string ~enc:(fun (r : git_repo_config) -> r.git_name) 107 + |> mem "url" string ~enc:(fun (r : git_repo_config) -> r.git_url) 108 + |> opt_mem "branch" string ~enc:(fun (r : git_repo_config) -> r.git_branch) 109 + |> opt_mem "subdir" string ~enc:(fun (r : git_repo_config) -> r.git_subdir) 110 + |> finish 111 + 112 + let git_config_codec : git_config Tomlt.t = 113 + let open Tomlt in 114 + let open Table in 115 + obj (fun repos : git_config -> { repos }) 116 + |> mem "repos" (list git_repo_config_codec) 117 + ~enc:(fun (c : git_config) -> c.repos) 118 + |> finish 119 + 120 + let empty_git = { repos = [] } 121 + 122 + (* For now, projects is not parsed from TOML - derived from git branches *) 123 + type config = t 124 + 125 + let codec : config Tomlt.t = 126 + let open Tomlt in 127 + let open Table in 128 + obj (fun opam git vendor_cache : config -> 129 + { opam; git = Option.value ~default:empty_git git; vendor_cache; projects = [] }) 130 + |> mem "opam" opam_config_codec ~enc:(fun (c : config) -> c.opam) 131 + |> opt_mem "git" git_config_codec ~enc:(fun (c : config) -> 132 + if c.git.repos = [] then None else Some c.git) 133 + |> opt_mem "vendor_cache" string ~enc:(fun (c : config) -> c.vendor_cache) 134 + |> finish 135 + 136 + (** {1 Loading} *) 137 + 138 + let load path = 139 + try 140 + let content = In_channel.with_open_text path In_channel.input_all in 141 + Tomlt_bytesrw.decode_string codec content 142 + |> Result.map_error Tomlt.Toml.Error.to_string 143 + with 144 + | Sys_error msg -> Error msg 145 + | Failure msg -> Error msg 146 + 147 + let load_exn path = 148 + match load path with Ok c -> c | Error msg -> failwith msg 149 + 150 + (** {1 Saving} *) 151 + 152 + let save path config = 153 + try 154 + let content = Tomlt_bytesrw.encode_string codec config in 155 + Out_channel.with_open_text path (fun oc -> 156 + Out_channel.output_string oc content); 157 + Ok () 158 + with 159 + | Sys_error msg -> Error msg 160 + | Failure msg -> Error msg 161 + 162 + let save_exn path config = 163 + match save path config with 164 + | Ok () -> () 165 + | Error msg -> failwith msg 166 + 167 + (** {1 Helpers} *) 168 + 169 + let empty_opam = { repositories = []; compiler = None; vendored = [] } 170 + let empty = { opam = empty_opam; git = empty_git; vendor_cache = None; projects = [] } 171 + 172 + let find_project config name = 173 + List.find_opt (fun p -> p.project_name = name) config.projects 174 + 175 + (* Opam repo helpers *) 176 + let add_repo config repo = 177 + let repos = config.opam.repositories @ [repo] in 178 + { config with opam = { config.opam with repositories = repos } } 179 + 180 + let remove_repo config name = 181 + let repos = List.filter (fun r -> r.repo_name <> name) config.opam.repositories in 182 + { config with opam = { config.opam with repositories = repos } } 183 + 184 + let find_repo config name = 185 + List.find_opt (fun r -> r.repo_name = name) config.opam.repositories 186 + 187 + let set_compiler config version = 188 + { config with opam = { config.opam with compiler = Some version } } 189 + 190 + let get_compiler config = 191 + config.opam.compiler 192 + 193 + (* Git repo helpers *) 194 + let add_git_repo config (repo : git_repo_config) = 195 + let repos = config.git.repos @ [repo] in 196 + { config with git = { repos } } 197 + 198 + let remove_git_repo config name = 199 + let repos = List.filter (fun (r : git_repo_config) -> r.git_name <> name) config.git.repos in 200 + { config with git = { repos } } 201 + 202 + let find_git_repo config name = 203 + List.find_opt (fun (r : git_repo_config) -> r.git_name = name) config.git.repos 204 + 205 + let list_git_repos config = 206 + config.git.repos 207 + 208 + (* Vendor cache helpers *) 209 + let set_vendor_cache config path = 210 + { config with vendor_cache = Some path } 211 + 212 + let get_vendor_cache config = 213 + config.vendor_cache 214 + 215 + let resolve_vendor_cache ?cli_override config = 216 + (* Priority: CLI flag > env var > config file > default *) 217 + match cli_override with 218 + | Some path -> Some path 219 + | None -> 220 + match Sys.getenv_opt "UNPAC_VENDOR_CACHE" with 221 + | Some path -> Some path 222 + | None -> config.vendor_cache 223 + 224 + (* Vendored package helpers *) 225 + let add_vendored_package config (pkg : vendored_package) = 226 + (* Replace if exists, otherwise append *) 227 + let vendored = List.filter (fun p -> p.pkg_name <> pkg.pkg_name) config.opam.vendored in 228 + let vendored = vendored @ [pkg] in 229 + { config with opam = { config.opam with vendored } } 230 + 231 + let remove_vendored_package config name = 232 + let vendored = List.filter (fun p -> p.pkg_name <> name) config.opam.vendored in 233 + { config with opam = { config.opam with vendored } } 234 + 235 + let find_vendored_package config name = 236 + List.find_opt (fun p -> p.pkg_name = name) config.opam.vendored 237 + 238 + let list_vendored_packages config = 239 + config.opam.vendored
+136
lib/config.mli
··· 1 + (** Configuration file handling for unpac. 2 + 3 + Loads and parses main/unpac.toml configuration files. *) 4 + 5 + (** {1 Types} *) 6 + 7 + type repo_source = 8 + | Local of string 9 + | Remote of string 10 + 11 + type repo_config = { 12 + repo_name : string; 13 + source : repo_source; 14 + } 15 + 16 + type vendored_package = { 17 + pkg_name : string; (** Package name (used as vendor name) *) 18 + pkg_url : string; (** Original remote URL *) 19 + pkg_branch : string option; (** Original branch if specified *) 20 + } 21 + 22 + type opam_config = { 23 + repositories : repo_config list; 24 + compiler : string option; 25 + vendored : vendored_package list; (** Tracked vendored packages *) 26 + } 27 + 28 + (** Git repository configuration for direct git vendoring *) 29 + type git_repo_config = { 30 + git_name : string; (** User-specified name for the repo *) 31 + git_url : string; (** Git URL to clone from *) 32 + git_branch : string option; (** Optional branch/tag to track *) 33 + git_subdir : string option; (** Optional subdirectory to extract *) 34 + } 35 + 36 + type git_config = { 37 + repos : git_repo_config list; 38 + } 39 + 40 + type project_config = { 41 + project_name : string; 42 + } 43 + 44 + type t = { 45 + opam : opam_config; 46 + git : git_config; 47 + vendor_cache : string option; 48 + projects : project_config list; 49 + } 50 + 51 + (** {1 Loading} *) 52 + 53 + val load : string -> (t, string) result 54 + (** [load path] loads configuration from the TOML file at [path]. *) 55 + 56 + val load_exn : string -> t 57 + (** [load_exn path] is like {!load} but raises on error. *) 58 + 59 + (** {1 Saving} *) 60 + 61 + val save : string -> t -> (unit, string) result 62 + (** [save path config] saves configuration to the TOML file at [path]. *) 63 + 64 + val save_exn : string -> t -> unit 65 + (** [save_exn path config] is like {!save} but raises on error. *) 66 + 67 + (** {1 Helpers} *) 68 + 69 + val empty : t 70 + (** Empty configuration. *) 71 + 72 + val find_project : t -> string -> project_config option 73 + (** [find_project config name] finds a project by name. *) 74 + 75 + (** {2 Opam Repository Helpers} *) 76 + 77 + val add_repo : t -> repo_config -> t 78 + (** [add_repo config repo] adds an opam repository to the config. *) 79 + 80 + val remove_repo : t -> string -> t 81 + (** [remove_repo config name] removes an opam repository by name. *) 82 + 83 + val find_repo : t -> string -> repo_config option 84 + (** [find_repo config name] finds an opam repository by name. *) 85 + 86 + val set_compiler : t -> string -> t 87 + (** [set_compiler config version] sets the OCaml compiler version. *) 88 + 89 + val get_compiler : t -> string option 90 + (** [get_compiler config] gets the configured OCaml compiler version. *) 91 + 92 + (** {2 Git Repository Helpers} *) 93 + 94 + val add_git_repo : t -> git_repo_config -> t 95 + (** [add_git_repo config repo] adds a git repository to the config. *) 96 + 97 + val remove_git_repo : t -> string -> t 98 + (** [remove_git_repo config name] removes a git repository by name. *) 99 + 100 + val find_git_repo : t -> string -> git_repo_config option 101 + (** [find_git_repo config name] finds a git repository by name. *) 102 + 103 + val list_git_repos : t -> git_repo_config list 104 + (** [list_git_repos config] returns all configured git repositories. *) 105 + 106 + (** {2 Vendor Cache Helpers} *) 107 + 108 + val set_vendor_cache : t -> string -> t 109 + (** [set_vendor_cache config path] sets the vendor cache path. *) 110 + 111 + val get_vendor_cache : t -> string option 112 + (** [get_vendor_cache config] gets the configured vendor cache path. *) 113 + 114 + val resolve_vendor_cache : ?cli_override:string -> t -> string option 115 + (** [resolve_vendor_cache ?cli_override config] resolves vendor cache path. 116 + Priority: CLI flag > UNPAC_VENDOR_CACHE env var > config file. 117 + Returns None if not configured anywhere. *) 118 + 119 + (** {2 Vendored Package Helpers} *) 120 + 121 + val add_vendored_package : t -> vendored_package -> t 122 + (** [add_vendored_package config pkg] adds or replaces a vendored package entry. *) 123 + 124 + val remove_vendored_package : t -> string -> t 125 + (** [remove_vendored_package config name] removes a vendored package by name. *) 126 + 127 + val find_vendored_package : t -> string -> vendored_package option 128 + (** [find_vendored_package config name] finds a vendored package by name. *) 129 + 130 + val list_vendored_packages : t -> vendored_package list 131 + (** [list_vendored_packages config] returns all vendored packages. *) 132 + 133 + (** {1 Codecs} *) 134 + 135 + val codec : t Tomlt.t 136 + (** TOML codec for the configuration type. *)
+13
lib/dune
··· 1 + (library 2 + (name unpac) 3 + (public_name unpac) 4 + (libraries 5 + eio 6 + logs 7 + logs.fmt 8 + fmt 9 + fmt.tty 10 + tomlt 11 + tomlt.bytesrw 12 + jsont 13 + jsont.bytesrw))
+535
lib/git.ml
··· 1 + (** Git operations wrapped with Eio and robust error handling. *) 2 + 3 + let src = Logs.Src.create "unpac.git" ~doc:"Git operations" 4 + module Log = (val Logs.src_log src : Logs.LOG) 5 + 6 + (* Error types *) 7 + 8 + type error = 9 + | Command_failed of { 10 + cmd : string list; 11 + exit_code : int; 12 + stdout : string; 13 + stderr : string; 14 + } 15 + | Not_a_repository 16 + | Remote_exists of string 17 + | Remote_not_found of string 18 + | Branch_exists of string 19 + | Branch_not_found of string 20 + | Merge_conflict of { branch : string; conflicting_files : string list } 21 + | Rebase_conflict of { onto : string; hint : string } 22 + | Uncommitted_changes 23 + | Not_on_branch 24 + | Detached_head 25 + 26 + let pp_error fmt = function 27 + | Command_failed { cmd; exit_code; stderr; _ } -> 28 + Format.fprintf fmt "git %a failed (exit %d): %s" 29 + Fmt.(list ~sep:sp string) cmd exit_code 30 + (String.trim stderr) 31 + | Not_a_repository -> 32 + Format.fprintf fmt "not a git repository" 33 + | Remote_exists name -> 34 + Format.fprintf fmt "remote '%s' already exists" name 35 + | Remote_not_found name -> 36 + Format.fprintf fmt "remote '%s' not found" name 37 + | Branch_exists name -> 38 + Format.fprintf fmt "branch '%s' already exists" name 39 + | Branch_not_found name -> 40 + Format.fprintf fmt "branch '%s' not found" name 41 + | Merge_conflict { branch; conflicting_files } -> 42 + Format.fprintf fmt "merge conflict in '%s': %a" branch 43 + Fmt.(list ~sep:comma string) conflicting_files 44 + | Rebase_conflict { onto; hint } -> 45 + Format.fprintf fmt "rebase conflict onto '%s': %s" onto hint 46 + | Uncommitted_changes -> 47 + Format.fprintf fmt "uncommitted changes in working directory" 48 + | Not_on_branch -> 49 + Format.fprintf fmt "not on any branch" 50 + | Detached_head -> 51 + Format.fprintf fmt "HEAD is detached" 52 + 53 + type Eio.Exn.err += E of error 54 + 55 + let () = 56 + Eio.Exn.register_pp (fun fmt -> function 57 + | E e -> Format.fprintf fmt "Git %a" pp_error e; true 58 + | _ -> false) 59 + 60 + let err e = Eio.Exn.create (E e) 61 + 62 + (* Types *) 63 + 64 + type proc_mgr = [ `Generic | `Unix ] Eio.Process.mgr_ty Eio.Resource.t 65 + type path = Eio.Fs.dir_ty Eio.Path.t 66 + 67 + (* Helpers *) 68 + 69 + let string_trim s = String.trim s 70 + 71 + let lines s = 72 + String.split_on_char '\n' s 73 + |> List.filter (fun s -> String.trim s <> "") 74 + 75 + (* Low-level execution *) 76 + 77 + let run ~proc_mgr ?cwd ?audit args = 78 + let full_cmd = "git" :: args in 79 + Log.debug (fun m -> m "Running: %a" Fmt.(list ~sep:sp string) full_cmd); 80 + let started = Unix.gettimeofday () in 81 + let cwd_str = match cwd with Some p -> snd p | None -> Sys.getcwd () in 82 + let stdout_buf = Buffer.create 256 in 83 + let stderr_buf = Buffer.create 256 in 84 + try 85 + Eio.Switch.run @@ fun sw -> 86 + let stdout_r, stdout_w = Eio.Process.pipe proc_mgr ~sw in 87 + let stderr_r, stderr_w = Eio.Process.pipe proc_mgr ~sw in 88 + let child = Eio.Process.spawn proc_mgr ~sw 89 + ?cwd:(Option.map (fun p -> (p :> Eio.Fs.dir_ty Eio.Path.t)) cwd) 90 + ~stdout:stdout_w ~stderr:stderr_w 91 + full_cmd 92 + in 93 + Eio.Flow.close stdout_w; 94 + Eio.Flow.close stderr_w; 95 + (* Read stdout and stderr concurrently *) 96 + Eio.Fiber.both 97 + (fun () -> 98 + let chunk = Cstruct.create 4096 in 99 + let rec loop () = 100 + match Eio.Flow.single_read stdout_r chunk with 101 + | n -> 102 + Buffer.add_string stdout_buf (Cstruct.to_string (Cstruct.sub chunk 0 n)); 103 + loop () 104 + | exception End_of_file -> () 105 + in 106 + loop ()) 107 + (fun () -> 108 + let chunk = Cstruct.create 4096 in 109 + let rec loop () = 110 + match Eio.Flow.single_read stderr_r chunk with 111 + | n -> 112 + Buffer.add_string stderr_buf (Cstruct.to_string (Cstruct.sub chunk 0 n)); 113 + loop () 114 + | exception End_of_file -> () 115 + in 116 + loop ()); 117 + let status = Eio.Process.await child in 118 + let stdout = Buffer.contents stdout_buf in 119 + let stderr = Buffer.contents stderr_buf in 120 + let exit_code, result = match status with 121 + | `Exited 0 -> 122 + Log.debug (fun m -> m "Output: %s" (string_trim stdout)); 123 + 0, Ok stdout 124 + | `Exited code -> 125 + Log.debug (fun m -> m "Failed (exit %d): %s" code (string_trim stderr)); 126 + code, Error (Command_failed { cmd = args; exit_code = code; stdout; stderr }) 127 + | `Signaled signal -> 128 + Log.debug (fun m -> m "Killed by signal %d" signal); 129 + let code = 128 + signal in 130 + code, Error (Command_failed { cmd = args; exit_code = code; stdout; stderr }) 131 + in 132 + (* Record to audit if provided *) 133 + Option.iter (fun ctx -> 134 + let git_result : Audit.git_result = { exit_code; stdout; stderr } in 135 + Audit.record_git ctx ~cmd:args ~cwd:cwd_str ~started ~result:git_result 136 + ) audit; 137 + result 138 + with exn -> 139 + Log.err (fun m -> m "Exception running git: %a" Fmt.exn exn); 140 + raise exn 141 + 142 + let run_exn ~proc_mgr ?cwd ?audit args = 143 + match run ~proc_mgr ?cwd ?audit args with 144 + | Ok output -> output 145 + | Error e -> 146 + let ex = err e in 147 + raise (Eio.Exn.add_context ex "running git %a" Fmt.(list ~sep:sp string) args) 148 + 149 + let run_lines ~proc_mgr ?cwd ?audit args = 150 + run_exn ~proc_mgr ?cwd ?audit args |> string_trim |> lines 151 + 152 + (* Queries *) 153 + 154 + let is_repository path = 155 + let git_dir = Eio.Path.(path / ".git") in 156 + match Eio.Path.kind ~follow:false git_dir with 157 + | `Directory | `Regular_file -> true (* .git can be a file for worktrees *) 158 + | _ -> false 159 + | exception _ -> false 160 + 161 + let current_branch ~proc_mgr ~cwd = 162 + match run ~proc_mgr ~cwd ["symbolic-ref"; "--short"; "HEAD"] with 163 + | Ok output -> Some (string_trim output) 164 + | Error _ -> None 165 + 166 + let current_branch_exn ~proc_mgr ~cwd = 167 + match current_branch ~proc_mgr ~cwd with 168 + | Some b -> b 169 + | None -> raise (err Not_on_branch) 170 + 171 + let current_head ~proc_mgr ~cwd = 172 + run_exn ~proc_mgr ~cwd ["rev-parse"; "HEAD"] |> string_trim 173 + 174 + let has_uncommitted_changes ~proc_mgr ~cwd = 175 + let status = run_exn ~proc_mgr ~cwd ["status"; "--porcelain"] in 176 + String.trim status <> "" 177 + 178 + let remote_exists ~proc_mgr ~cwd name = 179 + match run ~proc_mgr ~cwd ["remote"; "get-url"; name] with 180 + | Ok _ -> true 181 + | Error _ -> false 182 + 183 + let branch_exists ~proc_mgr ~cwd name = 184 + match run ~proc_mgr ~cwd ["show-ref"; "--verify"; "--quiet"; "refs/heads/" ^ name] with 185 + | Ok _ -> true 186 + | Error _ -> false 187 + 188 + let rev_parse ~proc_mgr ~cwd ref_ = 189 + match run ~proc_mgr ~cwd ["rev-parse"; "--verify"; "--quiet"; ref_] with 190 + | Ok output -> Some (string_trim output) 191 + | Error _ -> None 192 + 193 + let rev_parse_exn ~proc_mgr ~cwd ref_ = 194 + match rev_parse ~proc_mgr ~cwd ref_ with 195 + | Some sha -> sha 196 + | None -> raise (err (Branch_not_found ref_)) 197 + 198 + let rev_parse_short ~proc_mgr ~cwd ref_ = 199 + run_exn ~proc_mgr ~cwd ["rev-parse"; "--short"; ref_] |> string_trim 200 + 201 + let ls_remote_default_branch ~proc_mgr ~cwd ~url = 202 + Log.info (fun m -> m "Detecting default branch for %s..." url); 203 + (* Try to get the default branch from the remote *) 204 + let output = run_exn ~proc_mgr ~cwd ["ls-remote"; "--symref"; url; "HEAD"] in 205 + (* Parse output like: ref: refs/heads/main\tHEAD *) 206 + let default = 207 + let lines = String.split_on_char '\n' output in 208 + List.find_map (fun line -> 209 + if String.starts_with ~prefix:"ref:" line then 210 + let parts = String.split_on_char '\t' line in 211 + match parts with 212 + | ref_part :: _ -> 213 + let ref_part = String.trim ref_part in 214 + if String.starts_with ~prefix:"ref: refs/heads/" ref_part then 215 + Some (String.sub ref_part 16 (String.length ref_part - 16)) 216 + else None 217 + | _ -> None 218 + else None 219 + ) lines 220 + in 221 + match default with 222 + | Some branch -> 223 + Log.info (fun m -> m "Default branch: %s" branch); 224 + branch 225 + | None -> 226 + (* Fallback: try common branch names *) 227 + Log.debug (fun m -> m "Could not detect default branch, trying common names..."); 228 + let try_branch name = 229 + match run ~proc_mgr ~cwd ["ls-remote"; "--heads"; url; name] with 230 + | Ok output when String.trim output <> "" -> true 231 + | _ -> false 232 + in 233 + if try_branch "main" then "main" 234 + else if try_branch "master" then "master" 235 + else begin 236 + Log.warn (fun m -> m "Could not detect default branch, assuming 'main'"); 237 + "main" 238 + end 239 + 240 + let list_remotes ~proc_mgr ~cwd = 241 + run_lines ~proc_mgr ~cwd ["remote"] 242 + 243 + let remote_url ~proc_mgr ~cwd name = 244 + match run ~proc_mgr ~cwd ["remote"; "get-url"; name] with 245 + | Ok output -> Some (string_trim output) 246 + | Error _ -> None 247 + 248 + let log_oneline ~proc_mgr ~cwd ?max_count from_ref to_ref = 249 + let range = from_ref ^ ".." ^ to_ref in 250 + let args = ["log"; "--oneline"; range] in 251 + let args = match max_count with 252 + | Some n -> args @ ["--max-count"; string_of_int n] 253 + | None -> args 254 + in 255 + run_lines ~proc_mgr ~cwd args 256 + 257 + let diff_stat ~proc_mgr ~cwd from_ref to_ref = 258 + let range = from_ref ^ ".." ^ to_ref in 259 + run_exn ~proc_mgr ~cwd ["diff"; "--stat"; range] 260 + 261 + let ls_tree ~proc_mgr ~cwd ~tree ~path = 262 + match run ~proc_mgr ~cwd ["ls-tree"; tree; path] with 263 + | Ok output -> String.trim output <> "" 264 + | Error _ -> false 265 + 266 + let rev_list_count ~proc_mgr ~cwd from_ref to_ref = 267 + let range = from_ref ^ ".." ^ to_ref in 268 + let output = run_exn ~proc_mgr ~cwd ["rev-list"; "--count"; range] in 269 + int_of_string (string_trim output) 270 + 271 + (* Idempotent mutations *) 272 + 273 + let ensure_remote ~proc_mgr ~cwd ~name ~url = 274 + match remote_url ~proc_mgr ~cwd name with 275 + | None -> 276 + Log.info (fun m -> m "Adding remote %s -> %s" name url); 277 + run_exn ~proc_mgr ~cwd ["remote"; "add"; name; url] |> ignore; 278 + `Created 279 + | Some existing_url -> 280 + if existing_url = url then begin 281 + Log.debug (fun m -> m "Remote %s already exists with correct URL" name); 282 + `Existed 283 + end else begin 284 + Log.info (fun m -> m "Updating remote %s URL: %s -> %s" name existing_url url); 285 + run_exn ~proc_mgr ~cwd ["remote"; "set-url"; name; url] |> ignore; 286 + `Updated 287 + end 288 + 289 + let ensure_branch ~proc_mgr ~cwd ~name ~start_point = 290 + if branch_exists ~proc_mgr ~cwd name then begin 291 + Log.debug (fun m -> m "Branch %s already exists" name); 292 + `Existed 293 + end else begin 294 + Log.info (fun m -> m "Creating branch %s at %s" name start_point); 295 + run_exn ~proc_mgr ~cwd ["branch"; name; start_point] |> ignore; 296 + `Created 297 + end 298 + 299 + let ensure_vendored_remotes ~proc_mgr ~cwd (packages : Config.vendored_package list) = 300 + let created = ref 0 in 301 + List.iter (fun (pkg : Config.vendored_package) -> 302 + let remote_name = "origin-" ^ pkg.pkg_name in 303 + match ensure_remote ~proc_mgr ~cwd ~name:remote_name ~url:pkg.pkg_url with 304 + | `Created -> 305 + Log.info (fun m -> m "Recreated remote %s -> %s" remote_name pkg.pkg_url); 306 + incr created 307 + | `Updated -> 308 + Log.info (fun m -> m "Updated remote %s -> %s" remote_name pkg.pkg_url) 309 + | `Existed -> () 310 + ) packages; 311 + !created 312 + 313 + (* State-changing operations *) 314 + 315 + let init ~proc_mgr ~cwd = 316 + Log.info (fun m -> m "Initializing git repository..."); 317 + run_exn ~proc_mgr ~cwd ["init"] |> ignore 318 + 319 + let fetch ~proc_mgr ~cwd ~remote = 320 + Log.info (fun m -> m "Fetching from %s..." remote); 321 + run_exn ~proc_mgr ~cwd ["fetch"; remote] |> ignore 322 + 323 + let fetch_with_tags ~proc_mgr ~cwd ~remote = 324 + Log.info (fun m -> m "Fetching from %s (with tags)..." remote); 325 + run_exn ~proc_mgr ~cwd ["fetch"; "--tags"; "--force"; remote] |> ignore 326 + 327 + let resolve_branch_or_tag ~proc_mgr ~cwd ~remote ~ref_name = 328 + (* Try as a remote tracking branch first *) 329 + let branch_ref = remote ^ "/" ^ ref_name in 330 + match rev_parse ~proc_mgr ~cwd branch_ref with 331 + | Some _ -> branch_ref 332 + | None -> 333 + (* Try as a tag *) 334 + let tag_ref = "refs/tags/" ^ ref_name in 335 + match rev_parse ~proc_mgr ~cwd tag_ref with 336 + | Some _ -> tag_ref 337 + | None -> 338 + failwith (Printf.sprintf "Ref not found: %s (tried branch %s and tag %s)" 339 + ref_name branch_ref tag_ref) 340 + 341 + let checkout ~proc_mgr ~cwd ref_ = 342 + Log.debug (fun m -> m "Checking out %s" ref_); 343 + run_exn ~proc_mgr ~cwd ["checkout"; ref_] |> ignore 344 + 345 + let checkout_orphan ~proc_mgr ~cwd name = 346 + Log.info (fun m -> m "Creating orphan branch %s" name); 347 + run_exn ~proc_mgr ~cwd ["checkout"; "--orphan"; name] |> ignore 348 + 349 + let read_tree_prefix ~proc_mgr ~cwd ~prefix ~tree = 350 + Log.debug (fun m -> m "Reading tree %s with prefix %s" tree prefix); 351 + run_exn ~proc_mgr ~cwd ["read-tree"; "--prefix=" ^ prefix; tree] |> ignore 352 + 353 + let checkout_index ~proc_mgr ~cwd = 354 + Log.debug (fun m -> m "Checking out index to working directory"); 355 + run_exn ~proc_mgr ~cwd ["checkout-index"; "-a"; "-f"] |> ignore 356 + 357 + let rm_rf ~proc_mgr ~cwd ~target = 358 + Log.debug (fun m -> m "Removing %s from git" target); 359 + (* Ignore errors - target might not exist *) 360 + ignore (run ~proc_mgr ~cwd ["rm"; "-rf"; target]) 361 + 362 + let rm_cached_rf ~proc_mgr ~cwd = 363 + Log.debug (fun m -> m "Removing all files from index"); 364 + (* Ignore errors - index might be empty *) 365 + ignore (run ~proc_mgr ~cwd ["rm"; "-rf"; "--cached"; "."]) 366 + 367 + let add_all ~proc_mgr ~cwd = 368 + Log.debug (fun m -> m "Staging all changes"); 369 + run_exn ~proc_mgr ~cwd ["add"; "-A"] |> ignore 370 + 371 + let commit ~proc_mgr ~cwd ~message = 372 + Log.debug (fun m -> m "Committing: %s" (String.sub message 0 (min 50 (String.length message)))); 373 + run_exn ~proc_mgr ~cwd ["commit"; "-m"; message] |> ignore 374 + 375 + let commit_allow_empty ~proc_mgr ~cwd ~message = 376 + Log.debug (fun m -> m "Committing (allow empty): %s" (String.sub message 0 (min 50 (String.length message)))); 377 + run_exn ~proc_mgr ~cwd ["commit"; "--allow-empty"; "-m"; message] |> ignore 378 + 379 + let branch_create ~proc_mgr ~cwd ~name ~start_point = 380 + Log.info (fun m -> m "Creating branch %s at %s" name start_point); 381 + run_exn ~proc_mgr ~cwd ["branch"; name; start_point] |> ignore 382 + 383 + let branch_force ~proc_mgr ~cwd ~name ~point = 384 + Log.info (fun m -> m "Force-moving branch %s to %s" name point); 385 + run_exn ~proc_mgr ~cwd ["branch"; "-f"; name; point] |> ignore 386 + 387 + let remote_add ~proc_mgr ~cwd ~name ~url = 388 + Log.info (fun m -> m "Adding remote %s -> %s" name url); 389 + run_exn ~proc_mgr ~cwd ["remote"; "add"; name; url] |> ignore 390 + 391 + let remote_set_url ~proc_mgr ~cwd ~name ~url = 392 + Log.info (fun m -> m "Setting remote %s URL to %s" name url); 393 + run_exn ~proc_mgr ~cwd ["remote"; "set-url"; name; url] |> ignore 394 + 395 + let merge_allow_unrelated ~proc_mgr ~cwd ~branch ~message = 396 + Log.info (fun m -> m "Merging %s (allow unrelated histories)..." branch); 397 + match run ~proc_mgr ~cwd ["merge"; "--allow-unrelated-histories"; "-m"; message; branch] with 398 + | Ok _ -> Ok () 399 + | Error (Command_failed { exit_code = 1; _ }) -> 400 + (* Merge conflict - get list of conflicting files *) 401 + let output = run_exn ~proc_mgr ~cwd ["diff"; "--name-only"; "--diff-filter=U"] in 402 + let files = lines output in 403 + Log.warn (fun m -> m "Merge conflict: %a" Fmt.(list ~sep:comma string) files); 404 + Error (`Conflict files) 405 + | Error e -> 406 + raise (err e) 407 + 408 + let rebase ~proc_mgr ~cwd ~onto = 409 + Log.info (fun m -> m "Rebasing onto %s..." onto); 410 + match run ~proc_mgr ~cwd ["rebase"; onto] with 411 + | Ok _ -> Ok () 412 + | Error (Command_failed { stderr; _ }) -> 413 + let hint = 414 + if String.length stderr > 200 then 415 + String.sub stderr 0 200 ^ "..." 416 + else 417 + stderr 418 + in 419 + Log.warn (fun m -> m "Rebase conflict onto %s" onto); 420 + Error (`Conflict hint) 421 + | Error e -> 422 + raise (err e) 423 + 424 + let rebase_abort ~proc_mgr ~cwd = 425 + Log.info (fun m -> m "Aborting rebase..."); 426 + ignore (run ~proc_mgr ~cwd ["rebase"; "--abort"]) 427 + 428 + let merge_abort ~proc_mgr ~cwd = 429 + Log.info (fun m -> m "Aborting merge..."); 430 + ignore (run ~proc_mgr ~cwd ["merge"; "--abort"]) 431 + 432 + let reset_hard ~proc_mgr ~cwd ref_ = 433 + Log.info (fun m -> m "Hard reset to %s" ref_); 434 + run_exn ~proc_mgr ~cwd ["reset"; "--hard"; ref_] |> ignore 435 + 436 + let clean_fd ~proc_mgr ~cwd = 437 + Log.debug (fun m -> m "Cleaning untracked files"); 438 + run_exn ~proc_mgr ~cwd ["clean"; "-fd"] |> ignore 439 + 440 + let filter_repo_to_subdirectory ~proc_mgr ~cwd ~branch ~subdirectory = 441 + Log.info (fun m -> m "Rewriting history of %s into subdirectory %s..." branch subdirectory); 442 + (* Use git-filter-repo with --to-subdirectory-filter to rewrite all paths into subdirectory. 443 + This preserves full history with paths prefixed. Much faster than filter-branch. 444 + 445 + For bare repositories, we need to create a temporary worktree, run filter-repo 446 + there, and then update the branch in the bare repo. *) 447 + 448 + (* Create a unique temporary worktree name using the branch name *) 449 + let safe_branch = String.map (fun c -> if c = '/' then '-' else c) branch in 450 + let temp_wt_name = ".filter-tmp-" ^ safe_branch in 451 + let temp_wt_relpath = "../" ^ temp_wt_name in 452 + 453 + (* Construct the worktree path - cwd is (fs, path_string), so we go up one level *) 454 + let fs = fst cwd in 455 + let git_path = snd cwd in 456 + let parent_path = Filename.dirname git_path in 457 + let temp_wt_path = Filename.concat parent_path temp_wt_name in 458 + let temp_wt : path = (fs, temp_wt_path) in 459 + 460 + (* Remove any existing temp worktree *) 461 + ignore (run ~proc_mgr ~cwd ["worktree"; "remove"; "-f"; temp_wt_relpath]); 462 + 463 + (* Create worktree for the branch *) 464 + run_exn ~proc_mgr ~cwd ["worktree"; "add"; temp_wt_relpath; branch] |> ignore; 465 + 466 + (* Run git-filter-repo in the worktree *) 467 + let result = run ~proc_mgr ~cwd:temp_wt [ 468 + "filter-repo"; 469 + "--to-subdirectory-filter"; subdirectory; 470 + "--force"; 471 + "--refs"; "HEAD" 472 + ] in 473 + 474 + (* Handle result: get the new SHA, cleanup worktree, then update branch *) 475 + (match result with 476 + | Ok _ -> 477 + (* Get the new HEAD SHA from the worktree BEFORE removing it *) 478 + let new_sha = run_exn ~proc_mgr ~cwd:temp_wt ["rev-parse"; "HEAD"] |> string_trim in 479 + (* Cleanup temporary worktree first (must do this before updating branch) *) 480 + ignore (run ~proc_mgr ~cwd ["worktree"; "remove"; "-f"; temp_wt_relpath]); 481 + (* Now update the branch in the bare repo *) 482 + run_exn ~proc_mgr ~cwd ["branch"; "-f"; branch; new_sha] |> ignore 483 + | Error e -> 484 + (* Cleanup and re-raise *) 485 + ignore (run ~proc_mgr ~cwd ["worktree"; "remove"; "-f"; temp_wt_relpath]); 486 + raise (err e)) 487 + 488 + let filter_repo_from_subdirectory ~proc_mgr ~cwd ~branch ~subdirectory = 489 + Log.info (fun m -> m "Extracting %s from subdirectory %s to root..." branch subdirectory); 490 + (* Use git-filter-repo with --subdirectory-filter to extract files from subdirectory 491 + to root. This is the inverse of --to-subdirectory-filter. 492 + Preserves history for files that were in the subdirectory. 493 + 494 + For bare repositories, we need to create a temporary worktree, run filter-repo 495 + there, and then update the branch in the bare repo. *) 496 + 497 + (* Create a unique temporary worktree name using the branch name *) 498 + let safe_branch = String.map (fun c -> if c = '/' then '-' else c) branch in 499 + let temp_wt_name = ".filter-tmp-" ^ safe_branch in 500 + let temp_wt_relpath = "../" ^ temp_wt_name in 501 + 502 + (* Construct the worktree path - cwd is (fs, path_string), so we go up one level *) 503 + let fs = fst cwd in 504 + let git_path = snd cwd in 505 + let parent_path = Filename.dirname git_path in 506 + let temp_wt_path = Filename.concat parent_path temp_wt_name in 507 + let temp_wt : path = (fs, temp_wt_path) in 508 + 509 + (* Remove any existing temp worktree *) 510 + ignore (run ~proc_mgr ~cwd ["worktree"; "remove"; "-f"; temp_wt_relpath]); 511 + 512 + (* Create worktree for the branch *) 513 + run_exn ~proc_mgr ~cwd ["worktree"; "add"; temp_wt_relpath; branch] |> ignore; 514 + 515 + (* Run git-filter-repo in the worktree with --subdirectory-filter *) 516 + let result = run ~proc_mgr ~cwd:temp_wt [ 517 + "filter-repo"; 518 + "--subdirectory-filter"; subdirectory; 519 + "--force"; 520 + "--refs"; "HEAD" 521 + ] in 522 + 523 + (* Handle result: get the new SHA, cleanup worktree, then update branch *) 524 + (match result with 525 + | Ok _ -> 526 + (* Get the new HEAD SHA from the worktree BEFORE removing it *) 527 + let new_sha = run_exn ~proc_mgr ~cwd:temp_wt ["rev-parse"; "HEAD"] |> string_trim in 528 + (* Cleanup temporary worktree first (must do this before updating branch) *) 529 + ignore (run ~proc_mgr ~cwd ["worktree"; "remove"; "-f"; temp_wt_relpath]); 530 + (* Now update the branch in the bare repo *) 531 + run_exn ~proc_mgr ~cwd ["branch"; "-f"; branch; new_sha] |> ignore 532 + | Error e -> 533 + (* Cleanup and re-raise *) 534 + ignore (run ~proc_mgr ~cwd ["worktree"; "remove"; "-f"; temp_wt_relpath]); 535 + raise (err e))
+399
lib/git.mli
··· 1 + (** Git operations wrapped with Eio and robust error handling. 2 + 3 + All git commands are executed via [Eio.Process] with proper logging 4 + and error context. Errors are wrapped in [Eio.Exn.Io] with context 5 + chains for debugging. *) 6 + 7 + (** {1 Error Types} *) 8 + 9 + type error = 10 + | Command_failed of { 11 + cmd : string list; 12 + exit_code : int; 13 + stdout : string; 14 + stderr : string; 15 + } 16 + | Not_a_repository 17 + | Remote_exists of string 18 + | Remote_not_found of string 19 + | Branch_exists of string 20 + | Branch_not_found of string 21 + | Merge_conflict of { branch : string; conflicting_files : string list } 22 + | Rebase_conflict of { onto : string; hint : string } 23 + | Uncommitted_changes 24 + | Not_on_branch 25 + | Detached_head 26 + 27 + val pp_error : Format.formatter -> error -> unit 28 + 29 + type Eio.Exn.err += E of error 30 + 31 + val err : error -> exn 32 + (** [err e] creates an [Eio.Exn.Io] exception with the given error. *) 33 + 34 + (** {1 Types} *) 35 + 36 + type proc_mgr = [ `Generic | `Unix ] Eio.Process.mgr_ty Eio.Resource.t 37 + type path = Eio.Fs.dir_ty Eio.Path.t 38 + 39 + (** {1 Low-level execution} *) 40 + 41 + val run : 42 + proc_mgr:proc_mgr -> 43 + ?cwd:path -> 44 + ?audit:Audit.context -> 45 + string list -> 46 + (string, error) result 47 + (** [run ~proc_mgr args] executes [git args] and returns stdout on success. 48 + If [audit] is provided, records the operation to the audit context. *) 49 + 50 + val run_exn : 51 + proc_mgr:proc_mgr -> 52 + ?cwd:path -> 53 + ?audit:Audit.context -> 54 + string list -> 55 + string 56 + (** [run_exn ~proc_mgr args] executes [git args] and returns stdout. 57 + Raises on failure with context. If [audit] is provided, records the operation. *) 58 + 59 + val run_lines : 60 + proc_mgr:proc_mgr -> 61 + ?cwd:path -> 62 + ?audit:Audit.context -> 63 + string list -> 64 + string list 65 + (** [run_lines ~proc_mgr args] executes and splits output by newlines. 66 + If [audit] is provided, records the operation. *) 67 + 68 + (** {1 Queries - Safe read-only operations} *) 69 + 70 + val is_repository : path -> bool 71 + (** [is_repository path] checks if [path] contains a [.git] directory. *) 72 + 73 + val current_branch : 74 + proc_mgr:proc_mgr -> 75 + cwd:path -> 76 + string option 77 + (** [current_branch] returns [Some branch] if on a branch, [None] if detached. *) 78 + 79 + val current_branch_exn : 80 + proc_mgr:proc_mgr -> 81 + cwd:path -> 82 + string 83 + (** [current_branch_exn] returns current branch or raises [Not_on_branch]. *) 84 + 85 + val current_head : 86 + proc_mgr:proc_mgr -> 87 + cwd:path -> 88 + string 89 + (** [current_head] returns the current HEAD SHA. *) 90 + 91 + val has_uncommitted_changes : 92 + proc_mgr:proc_mgr -> 93 + cwd:path -> 94 + bool 95 + (** [has_uncommitted_changes] returns true if there are staged or unstaged changes. *) 96 + 97 + val remote_exists : 98 + proc_mgr:proc_mgr -> 99 + cwd:path -> 100 + string -> 101 + bool 102 + (** [remote_exists ~proc_mgr ~cwd name] checks if remote [name] exists. *) 103 + 104 + val branch_exists : 105 + proc_mgr:proc_mgr -> 106 + cwd:path -> 107 + string -> 108 + bool 109 + (** [branch_exists ~proc_mgr ~cwd name] checks if branch [name] exists. *) 110 + 111 + val rev_parse : 112 + proc_mgr:proc_mgr -> 113 + cwd:path -> 114 + string -> 115 + string option 116 + (** [rev_parse ~proc_mgr ~cwd ref] returns the SHA for [ref], or [None]. *) 117 + 118 + val rev_parse_exn : 119 + proc_mgr:proc_mgr -> 120 + cwd:path -> 121 + string -> 122 + string 123 + (** [rev_parse_exn] returns SHA or raises. *) 124 + 125 + val rev_parse_short : 126 + proc_mgr:proc_mgr -> 127 + cwd:path -> 128 + string -> 129 + string 130 + (** [rev_parse_short] returns abbreviated SHA. *) 131 + 132 + val ls_remote_default_branch : 133 + proc_mgr:proc_mgr -> 134 + cwd:path -> 135 + url:string -> 136 + string 137 + (** [ls_remote_default_branch ~proc_mgr ~cwd ~url] detects the default branch of remote. *) 138 + 139 + val list_remotes : 140 + proc_mgr:proc_mgr -> 141 + cwd:path -> 142 + string list 143 + (** [list_remotes] returns all remote names. *) 144 + 145 + val remote_url : 146 + proc_mgr:proc_mgr -> 147 + cwd:path -> 148 + string -> 149 + string option 150 + (** [remote_url ~proc_mgr ~cwd name] returns the URL for remote [name]. *) 151 + 152 + val log_oneline : 153 + proc_mgr:proc_mgr -> 154 + cwd:path -> 155 + ?max_count:int -> 156 + string -> 157 + string -> 158 + string list 159 + (** [log_oneline ~proc_mgr ~cwd from_ref to_ref] returns commit summaries. *) 160 + 161 + val diff_stat : 162 + proc_mgr:proc_mgr -> 163 + cwd:path -> 164 + string -> 165 + string -> 166 + string 167 + (** [diff_stat ~proc_mgr ~cwd from_ref to_ref] returns diff statistics. *) 168 + 169 + val ls_tree : 170 + proc_mgr:proc_mgr -> 171 + cwd:path -> 172 + tree:string -> 173 + path:string -> 174 + bool 175 + (** [ls_tree ~proc_mgr ~cwd ~tree ~path] checks if [path] exists in [tree]. *) 176 + 177 + val rev_list_count : 178 + proc_mgr:proc_mgr -> 179 + cwd:path -> 180 + string -> 181 + string -> 182 + int 183 + (** [rev_list_count ~proc_mgr ~cwd from_ref to_ref] counts commits between refs. *) 184 + 185 + (** {1 Idempotent mutations - Safe to re-run} *) 186 + 187 + val ensure_remote : 188 + proc_mgr:proc_mgr -> 189 + cwd:path -> 190 + name:string -> 191 + url:string -> 192 + [ `Created | `Existed | `Updated ] 193 + (** [ensure_remote] adds remote if missing, updates URL if different. *) 194 + 195 + val ensure_branch : 196 + proc_mgr:proc_mgr -> 197 + cwd:path -> 198 + name:string -> 199 + start_point:string -> 200 + [ `Created | `Existed ] 201 + (** [ensure_branch] creates branch if it doesn't exist. *) 202 + 203 + val ensure_vendored_remotes : 204 + proc_mgr:proc_mgr -> 205 + cwd:path -> 206 + Config.vendored_package list -> 207 + int 208 + (** [ensure_vendored_remotes ~proc_mgr ~cwd packages] ensures remotes exist for 209 + all vendored packages. Returns the number of remotes created. 210 + Use this to recreate remotes after cloning a workspace. *) 211 + 212 + (** {1 State-changing operations} *) 213 + 214 + val init : 215 + proc_mgr:proc_mgr -> 216 + cwd:path -> 217 + unit 218 + (** [init] initializes a new git repository. *) 219 + 220 + val fetch : 221 + proc_mgr:proc_mgr -> 222 + cwd:path -> 223 + remote:string -> 224 + unit 225 + (** [fetch] fetches from a remote. *) 226 + 227 + val fetch_with_tags : 228 + proc_mgr:proc_mgr -> 229 + cwd:path -> 230 + remote:string -> 231 + unit 232 + (** [fetch_with_tags] fetches from a remote including all tags. *) 233 + 234 + val resolve_branch_or_tag : 235 + proc_mgr:proc_mgr -> 236 + cwd:path -> 237 + remote:string -> 238 + ref_name:string -> 239 + string 240 + (** [resolve_branch_or_tag] tries to resolve a ref first as a remote tracking 241 + branch (remote/ref_name), then as a tag (refs/tags/ref_name). Returns the 242 + resolved ref or raises an exception if neither exists. *) 243 + 244 + val checkout : 245 + proc_mgr:proc_mgr -> 246 + cwd:path -> 247 + string -> 248 + unit 249 + (** [checkout] switches to a branch or commit. *) 250 + 251 + val checkout_orphan : 252 + proc_mgr:proc_mgr -> 253 + cwd:path -> 254 + string -> 255 + unit 256 + (** [checkout_orphan] creates and switches to a new orphan branch. *) 257 + 258 + val read_tree_prefix : 259 + proc_mgr:proc_mgr -> 260 + cwd:path -> 261 + prefix:string -> 262 + tree:string -> 263 + unit 264 + (** [read_tree_prefix] reads a tree into the index with a path prefix. *) 265 + 266 + val checkout_index : 267 + proc_mgr:proc_mgr -> 268 + cwd:path -> 269 + unit 270 + (** [checkout_index] checks out files from the index to working directory. *) 271 + 272 + val rm_rf : 273 + proc_mgr:proc_mgr -> 274 + cwd:path -> 275 + target:string -> 276 + unit 277 + (** [rm_rf] removes files/directories from git tracking. *) 278 + 279 + val rm_cached_rf : 280 + proc_mgr:proc_mgr -> 281 + cwd:path -> 282 + unit 283 + (** [rm_cached_rf] removes all files from index (for orphan branch setup). *) 284 + 285 + val add_all : 286 + proc_mgr:proc_mgr -> 287 + cwd:path -> 288 + unit 289 + (** [add_all] stages all changes. *) 290 + 291 + val commit : 292 + proc_mgr:proc_mgr -> 293 + cwd:path -> 294 + message:string -> 295 + unit 296 + (** [commit] creates a commit with the given message. *) 297 + 298 + val commit_allow_empty : 299 + proc_mgr:proc_mgr -> 300 + cwd:path -> 301 + message:string -> 302 + unit 303 + (** [commit_allow_empty] creates a commit even if there are no changes. *) 304 + 305 + val branch_create : 306 + proc_mgr:proc_mgr -> 307 + cwd:path -> 308 + name:string -> 309 + start_point:string -> 310 + unit 311 + (** [branch_create] creates a new branch at [start_point]. *) 312 + 313 + val branch_force : 314 + proc_mgr:proc_mgr -> 315 + cwd:path -> 316 + name:string -> 317 + point:string -> 318 + unit 319 + (** [branch_force] moves branch to point (creates if needed). *) 320 + 321 + val remote_add : 322 + proc_mgr:proc_mgr -> 323 + cwd:path -> 324 + name:string -> 325 + url:string -> 326 + unit 327 + (** [remote_add] adds a new remote. *) 328 + 329 + val remote_set_url : 330 + proc_mgr:proc_mgr -> 331 + cwd:path -> 332 + name:string -> 333 + url:string -> 334 + unit 335 + (** [remote_set_url] updates the URL of an existing remote. *) 336 + 337 + val merge_allow_unrelated : 338 + proc_mgr:proc_mgr -> 339 + cwd:path -> 340 + branch:string -> 341 + message:string -> 342 + (unit, [ `Conflict of string list ]) result 343 + (** [merge_allow_unrelated] merges with [--allow-unrelated-histories]. 344 + Returns [Error (`Conflict files)] if there are conflicts. *) 345 + 346 + val rebase : 347 + proc_mgr:proc_mgr -> 348 + cwd:path -> 349 + onto:string -> 350 + (unit, [ `Conflict of string ]) result 351 + (** [rebase] rebases current branch onto [onto]. 352 + Returns [Error (`Conflict hint)] if there are conflicts. *) 353 + 354 + val rebase_abort : 355 + proc_mgr:proc_mgr -> 356 + cwd:path -> 357 + unit 358 + (** [rebase_abort] aborts an in-progress rebase. *) 359 + 360 + val merge_abort : 361 + proc_mgr:proc_mgr -> 362 + cwd:path -> 363 + unit 364 + (** [merge_abort] aborts an in-progress merge. *) 365 + 366 + val reset_hard : 367 + proc_mgr:proc_mgr -> 368 + cwd:path -> 369 + string -> 370 + unit 371 + (** [reset_hard] does a hard reset to the given ref. *) 372 + 373 + val clean_fd : 374 + proc_mgr:proc_mgr -> 375 + cwd:path -> 376 + unit 377 + (** [clean_fd] removes untracked files and directories. *) 378 + 379 + val filter_repo_to_subdirectory : 380 + proc_mgr:proc_mgr -> 381 + cwd:path -> 382 + branch:string -> 383 + subdirectory:string -> 384 + unit 385 + (** [filter_repo_to_subdirectory ~proc_mgr ~cwd ~branch ~subdirectory] 386 + rewrites the history of [branch] so all files are moved into [subdirectory]. 387 + Uses git-filter-repo for fast history rewriting. Preserves full commit history. *) 388 + 389 + val filter_repo_from_subdirectory : 390 + proc_mgr:proc_mgr -> 391 + cwd:path -> 392 + branch:string -> 393 + subdirectory:string -> 394 + unit 395 + (** [filter_repo_from_subdirectory ~proc_mgr ~cwd ~branch ~subdirectory] 396 + rewrites the history of [branch] extracting only files from [subdirectory] 397 + and placing them at the repository root. This is the inverse of 398 + [filter_repo_to_subdirectory]. Uses git-filter-repo --subdirectory-filter. 399 + Preserves full commit history for files that were in the subdirectory. *)
+242
lib/git_backend.ml
··· 1 + (** Git backend for direct repository vendoring. 2 + 3 + Implements vendoring of arbitrary git repositories using the three-tier branch model: 4 + - git/upstream/<name> - pristine upstream code 5 + - git/vendor/<name> - upstream history rewritten with vendor/git/<name>/ prefix 6 + - git/patches/<name> - local modifications *) 7 + 8 + (** {1 Branch Naming} *) 9 + 10 + let upstream_branch name = "git/upstream/" ^ name 11 + let vendor_branch name = "git/vendor/" ^ name 12 + let patches_branch name = "git/patches/" ^ name 13 + let vendor_path name = "vendor/git/" ^ name 14 + 15 + (** {1 Worktree Kinds} *) 16 + 17 + let upstream_kind name = Worktree.Git_upstream name 18 + let vendor_kind name = Worktree.Git_vendor name 19 + let patches_kind name = Worktree.Git_patches name 20 + 21 + (** {1 Repository Info} *) 22 + 23 + type repo_info = { 24 + name : string; 25 + url : string; 26 + branch : string option; 27 + subdir : string option; 28 + } 29 + 30 + (** {1 Repository Operations} *) 31 + 32 + let add_repo ~proc_mgr ~root ?cache info = 33 + let repo_name = info.name in 34 + let git = Worktree.git_dir root in 35 + 36 + try 37 + (* Check if already exists *) 38 + if Worktree.branch_exists ~proc_mgr root (patches_kind repo_name) then 39 + Backend.Already_exists repo_name 40 + else begin 41 + (* Rewrite URL for known mirrors *) 42 + let url = Git_repo_lookup.rewrite_url info.url in 43 + 44 + (* Determine the ref to use: explicit > override > default *) 45 + let branch = match info.branch with 46 + | Some b -> b 47 + | None -> 48 + match Git_repo_lookup.branch_override ~name:repo_name ~url with 49 + | Some b -> b 50 + | None -> Git.ls_remote_default_branch ~proc_mgr ~cwd:git ~url 51 + in 52 + 53 + (* Fetch - either via cache or directly *) 54 + let ref_point = match cache with 55 + | Some cache_path -> 56 + (* Fetch through vendor cache *) 57 + Vendor_cache.fetch_to_project ~proc_mgr 58 + ~cache:cache_path ~project_git:git ~url ~branch 59 + | None -> 60 + (* Direct fetch (with tags to support version tags) *) 61 + let remote = "origin-" ^ repo_name in 62 + ignore (Git.ensure_remote ~proc_mgr ~cwd:git ~name:remote ~url); 63 + Git.fetch_with_tags ~proc_mgr ~cwd:git ~remote; 64 + Git.resolve_branch_or_tag ~proc_mgr ~cwd:git ~remote ~ref_name:branch 65 + in 66 + 67 + (* Step 1: Create upstream branch from fetched ref *) 68 + Git.branch_force ~proc_mgr ~cwd:git 69 + ~name:(upstream_branch repo_name) ~point:ref_point; 70 + 71 + (* Step 2: Create vendor branch from upstream and rewrite history *) 72 + Git.branch_force ~proc_mgr ~cwd:git 73 + ~name:(vendor_branch repo_name) ~point:(upstream_branch repo_name); 74 + 75 + (* If subdir is specified, we first filter to that subdirectory, 76 + then move to vendor path. Otherwise, just move to vendor path. *) 77 + (match info.subdir with 78 + | Some subdir -> 79 + (* First filter to extract only the subdirectory *) 80 + Git.filter_repo_to_subdirectory ~proc_mgr ~cwd:git 81 + ~branch:(vendor_branch repo_name) 82 + ~subdirectory:subdir; 83 + (* Now the subdir is at root, rewrite to vendor path *) 84 + Git.filter_repo_to_subdirectory ~proc_mgr ~cwd:git 85 + ~branch:(vendor_branch repo_name) 86 + ~subdirectory:(vendor_path repo_name) 87 + | None -> 88 + (* Rewrite vendor branch history to move all files into vendor/git/<name>/ *) 89 + Git.filter_repo_to_subdirectory ~proc_mgr ~cwd:git 90 + ~branch:(vendor_branch repo_name) 91 + ~subdirectory:(vendor_path repo_name)); 92 + 93 + (* Get the vendor SHA after rewriting *) 94 + let vendor_sha = match Git.rev_parse ~proc_mgr ~cwd:git (vendor_branch repo_name) with 95 + | Some sha -> sha 96 + | None -> failwith "Vendor branch not found after filter-repo" 97 + in 98 + 99 + (* Step 3: Create patches branch from vendor *) 100 + Git.branch_create ~proc_mgr ~cwd:git 101 + ~name:(patches_branch repo_name) 102 + ~start_point:(vendor_branch repo_name); 103 + 104 + Backend.Added { name = repo_name; sha = vendor_sha } 105 + end 106 + with exn -> 107 + (* Cleanup on failure *) 108 + (try Worktree.remove_force ~proc_mgr root (upstream_kind repo_name) with _ -> ()); 109 + (try Worktree.remove_force ~proc_mgr root (vendor_kind repo_name) with _ -> ()); 110 + Backend.Failed { name = repo_name; error = Printexc.to_string exn } 111 + 112 + let copy_with_prefix ~src_dir ~dst_dir ~prefix = 113 + (* Recursively copy files from src_dir to dst_dir/prefix/ *) 114 + let prefix_dir = Eio.Path.(dst_dir / prefix) in 115 + Eio.Path.mkdirs ~exists_ok:true ~perm:0o755 prefix_dir; 116 + 117 + let rec copy_dir src dst = 118 + Eio.Path.read_dir src |> List.iter (fun name -> 119 + let src_path = Eio.Path.(src / name) in 120 + let dst_path = Eio.Path.(dst / name) in 121 + if Eio.Path.is_directory src_path then begin 122 + Eio.Path.mkdirs ~exists_ok:true ~perm:0o755 dst_path; 123 + copy_dir src_path dst_path 124 + end else begin 125 + let content = Eio.Path.load src_path in 126 + Eio.Path.save ~create:(`Or_truncate 0o644) dst_path content 127 + end 128 + ) 129 + in 130 + 131 + (* Copy everything except .git *) 132 + Eio.Path.read_dir src_dir |> List.iter (fun name -> 133 + if name <> ".git" then begin 134 + let src_path = Eio.Path.(src_dir / name) in 135 + let dst_path = Eio.Path.(prefix_dir / name) in 136 + if Eio.Path.is_directory src_path then begin 137 + Eio.Path.mkdirs ~exists_ok:true ~perm:0o755 dst_path; 138 + copy_dir src_path dst_path 139 + end else begin 140 + let content = Eio.Path.load src_path in 141 + Eio.Path.save ~create:(`Or_truncate 0o644) dst_path content 142 + end 143 + end 144 + ) 145 + 146 + let update_repo ~proc_mgr ~root ?cache repo_name = 147 + let git = Worktree.git_dir root in 148 + 149 + try 150 + (* Check if repo exists *) 151 + if not (Worktree.branch_exists ~proc_mgr root (patches_kind repo_name)) then 152 + Backend.Update_failed { name = repo_name; error = "Repository not vendored" } 153 + else begin 154 + (* Get remote URL *) 155 + let remote = "origin-" ^ repo_name in 156 + let url = match Git.remote_url ~proc_mgr ~cwd:git remote with 157 + | Some u -> u 158 + | None -> failwith ("Remote not found: " ^ remote) 159 + in 160 + 161 + (* Fetch latest - either via cache or directly (with tags for completeness) *) 162 + (match cache with 163 + | Some cache_path -> 164 + let branch = Git.ls_remote_default_branch ~proc_mgr ~cwd:git ~url in 165 + ignore (Vendor_cache.fetch_to_project ~proc_mgr 166 + ~cache:cache_path ~project_git:git ~url ~branch) 167 + | None -> 168 + Git.fetch_with_tags ~proc_mgr ~cwd:git ~remote); 169 + 170 + (* Get old SHA *) 171 + let old_sha = match Git.rev_parse ~proc_mgr ~cwd:git (upstream_branch repo_name) with 172 + | Some sha -> sha 173 + | None -> failwith "Upstream branch not found" 174 + in 175 + 176 + (* Determine default branch and update upstream *) 177 + let default_branch = Git.ls_remote_default_branch ~proc_mgr ~cwd:git ~url in 178 + let ref_point = remote ^ "/" ^ default_branch in 179 + Git.branch_force ~proc_mgr ~cwd:git 180 + ~name:(upstream_branch repo_name) ~point:ref_point; 181 + 182 + (* Get new SHA *) 183 + let new_sha = match Git.rev_parse ~proc_mgr ~cwd:git (upstream_branch repo_name) with 184 + | Some sha -> sha 185 + | None -> failwith "Upstream branch not found" 186 + in 187 + 188 + if old_sha = new_sha then 189 + Backend.No_changes repo_name 190 + else begin 191 + (* Create worktrees *) 192 + Worktree.ensure ~proc_mgr root (upstream_kind repo_name); 193 + Worktree.ensure ~proc_mgr root (vendor_kind repo_name); 194 + 195 + let upstream_wt = Worktree.path root (upstream_kind repo_name) in 196 + let vendor_wt = Worktree.path root (vendor_kind repo_name) in 197 + 198 + (* Clear vendor content and copy new *) 199 + let vendor_pkg_path = Eio.Path.(vendor_wt / "vendor" / "git" / repo_name) in 200 + (try Eio.Path.rmtree vendor_pkg_path with _ -> ()); 201 + 202 + copy_with_prefix 203 + ~src_dir:upstream_wt 204 + ~dst_dir:vendor_wt 205 + ~prefix:(vendor_path repo_name); 206 + 207 + (* Commit *) 208 + Git.add_all ~proc_mgr ~cwd:vendor_wt; 209 + Git.commit ~proc_mgr ~cwd:vendor_wt 210 + ~message:(Printf.sprintf "Update %s to %s" repo_name (String.sub new_sha 0 7)); 211 + 212 + (* Cleanup *) 213 + Worktree.remove ~proc_mgr root (upstream_kind repo_name); 214 + Worktree.remove ~proc_mgr root (vendor_kind repo_name); 215 + 216 + Backend.Updated { name = repo_name; old_sha; new_sha } 217 + end 218 + end 219 + with exn -> 220 + (try Worktree.remove_force ~proc_mgr root (upstream_kind repo_name) with _ -> ()); 221 + (try Worktree.remove_force ~proc_mgr root (vendor_kind repo_name) with _ -> ()); 222 + Backend.Update_failed { name = repo_name; error = Printexc.to_string exn } 223 + 224 + let list_repos ~proc_mgr ~root = 225 + Worktree.list_git_repos ~proc_mgr root 226 + 227 + let remove_repo ~proc_mgr ~root repo_name = 228 + let git = Worktree.git_dir root in 229 + 230 + (* Remove worktrees if exist *) 231 + (try Worktree.remove_force ~proc_mgr root (upstream_kind repo_name) with _ -> ()); 232 + (try Worktree.remove_force ~proc_mgr root (vendor_kind repo_name) with _ -> ()); 233 + (try Worktree.remove_force ~proc_mgr root (patches_kind repo_name) with _ -> ()); 234 + 235 + (* Delete branches *) 236 + (try Git.run_exn ~proc_mgr ~cwd:git ["branch"; "-D"; upstream_branch repo_name] |> ignore with _ -> ()); 237 + (try Git.run_exn ~proc_mgr ~cwd:git ["branch"; "-D"; vendor_branch repo_name] |> ignore with _ -> ()); 238 + (try Git.run_exn ~proc_mgr ~cwd:git ["branch"; "-D"; patches_branch repo_name] |> ignore with _ -> ()); 239 + 240 + (* Remove remote *) 241 + let remote = "origin-" ^ repo_name in 242 + (try Git.run_exn ~proc_mgr ~cwd:git ["remote"; "remove"; remote] |> ignore with _ -> ())
+70
lib/git_backend.mli
··· 1 + (** Git backend for direct repository vendoring. 2 + 3 + Implements vendoring of arbitrary git repositories using the three-tier branch model: 4 + - git/upstream/<name> - pristine upstream code 5 + - git/vendor/<name> - upstream history rewritten with vendor/git/<name>/ prefix 6 + - git/patches/<name> - local modifications 7 + 8 + Unlike the opam backend which discovers packages via opam repositories, 9 + this backend allows cloning any git repository directly. *) 10 + 11 + (** {1 Branch Naming} *) 12 + 13 + val upstream_branch : string -> string 14 + (** [upstream_branch name] returns the upstream branch name "git/upstream/<name>". *) 15 + 16 + val vendor_branch : string -> string 17 + (** [vendor_branch name] returns the vendor branch name "git/vendor/<name>". *) 18 + 19 + val patches_branch : string -> string 20 + (** [patches_branch name] returns the patches branch name "git/patches/<name>". *) 21 + 22 + val vendor_path : string -> string 23 + (** [vendor_path name] returns the vendor directory path "vendor/git/<name>". *) 24 + 25 + (** {1 Repository Info} *) 26 + 27 + type repo_info = { 28 + name : string; (** User-specified name *) 29 + url : string; (** Git URL to clone from *) 30 + branch : string option; (** Optional branch/tag to track *) 31 + subdir : string option; (** Optional subdirectory to extract *) 32 + } 33 + 34 + (** {1 Repository Operations} *) 35 + 36 + val add_repo : 37 + proc_mgr:Git.proc_mgr -> 38 + root:Worktree.root -> 39 + ?cache:Vendor_cache.t -> 40 + repo_info -> 41 + Backend.add_result 42 + (** [add_repo ~proc_mgr ~root ?cache info] vendors a git repository. 43 + 44 + Creates the three-tier branch structure: 45 + 1. Fetches from url into git/upstream/<name> 46 + 2. Rewrites history into git/vendor/<name> with vendor/git/<name>/ prefix 47 + 3. Creates git/patches/<name> for local modifications 48 + 49 + If [subdir] is specified, only that subdirectory is extracted from the repo. *) 50 + 51 + val update_repo : 52 + proc_mgr:Git.proc_mgr -> 53 + root:Worktree.root -> 54 + ?cache:Vendor_cache.t -> 55 + string -> 56 + Backend.update_result 57 + (** [update_repo ~proc_mgr ~root ?cache name] updates a vendored repository from upstream. *) 58 + 59 + val list_repos : 60 + proc_mgr:Git.proc_mgr -> 61 + root:Worktree.root -> 62 + string list 63 + (** [list_repos ~proc_mgr ~root] returns names of all vendored git repositories. *) 64 + 65 + val remove_repo : 66 + proc_mgr:Git.proc_mgr -> 67 + root:Worktree.root -> 68 + string -> 69 + unit 70 + (** [remove_repo ~proc_mgr ~root name] removes a vendored repository. *)
+73
lib/git_repo_lookup.ml
··· 1 + (** Git repository URL lookup and rewriting. 2 + 3 + This module handles URL rewriting for git repositories, mapping known 4 + slow upstream URLs to faster mirrors, and branch/tag overrides for 5 + specific packages. *) 6 + 7 + (** Rewrite a git URL to use a faster mirror if available. 8 + 9 + Currently handles: 10 + - erratique.ch repos are mirrored on GitHub under dbuenzli 11 + - git.robur.coop repos are mirrored on GitHub under robur-coop 12 + (strips the org prefix: git.robur.coop/robur/X -> github.com/robur-coop/X) *) 13 + let rewrite_url url = 14 + (* Helper to check and rewrite prefix *) 15 + let try_rewrite ~prefix ~replacement url = 16 + if String.length url > String.length prefix 17 + && String.sub url 0 (String.length prefix) = prefix 18 + then 19 + let rest = String.sub url (String.length prefix) 20 + (String.length url - String.length prefix) in 21 + Some (replacement ^ rest) 22 + else None 23 + in 24 + (* Helper to rewrite robur.coop URLs, stripping the org path component *) 25 + let try_rewrite_robur ~prefix url = 26 + if String.length url > String.length prefix 27 + && String.sub url 0 (String.length prefix) = prefix 28 + then 29 + (* rest is e.g. "robur/ohex.git" - strip org prefix *) 30 + let rest = String.sub url (String.length prefix) 31 + (String.length url - String.length prefix) in 32 + (* Find the first slash to strip the org *) 33 + match String.index_opt rest '/' with 34 + | Some idx -> 35 + let repo = String.sub rest (idx + 1) (String.length rest - idx - 1) in 36 + Some ("https://github.com/robur-coop/" ^ repo) 37 + | None -> Some ("https://github.com/robur-coop/" ^ rest) 38 + else None 39 + in 40 + (* Try each rewrite rule in order *) 41 + match try_rewrite ~prefix:"https://erratique.ch/repos/" 42 + ~replacement:"https://github.com/dbuenzli/" url with 43 + | Some u -> u 44 + | None -> 45 + match try_rewrite ~prefix:"http://erratique.ch/repos/" 46 + ~replacement:"https://github.com/dbuenzli/" url with 47 + | Some u -> u 48 + | None -> 49 + match try_rewrite_robur ~prefix:"https://git.robur.coop/" url with 50 + | Some u -> u 51 + | None -> 52 + match try_rewrite_robur ~prefix:"git://git.robur.coop/" url with 53 + | Some u -> u 54 + | None -> url 55 + 56 + (** Override branch/tag for specific packages. 57 + 58 + Some packages have unstable main branches or we want to pin to specific 59 + versions. This returns Some ref if an override exists, None otherwise. 60 + 61 + Currently handles: 62 + - dune: use tag 3.20.2 instead of main branch *) 63 + let branch_override ~name ~url = 64 + (* Dune's main branch can be unstable; pin to release tag *) 65 + let is_dune_url = 66 + String.equal url "https://github.com/ocaml/dune.git" || 67 + String.equal url "https://github.com/ocaml/dune" || 68 + String.equal url "git://github.com/ocaml/dune.git" 69 + in 70 + if name = "dune" || is_dune_url then 71 + Some "3.20.2" 72 + else 73 + None
+164
lib/init.ml
··· 1 + (** Project initialization for unpac. 2 + 3 + Creates the bare repository structure and initial main worktree. *) 4 + 5 + let default_unpac_toml = {|[opam] 6 + repositories = [] 7 + # compiler = "5.4.0" 8 + 9 + # Vendor cache location (default: XDG cache directory) 10 + # vendor_cache = "/path/to/cache" 11 + 12 + [projects] 13 + # Projects will be added here 14 + |} 15 + 16 + let project_dune_project name = Printf.sprintf {|(lang dune 3.20) 17 + (name %s) 18 + |} name 19 + 20 + let project_dune = {|(vendored_dirs vendor) 21 + |} 22 + 23 + let project_gitignore = {|_build/ 24 + *.install 25 + |} 26 + 27 + let vendor_dune = {|(vendored_dirs opam) 28 + |} 29 + 30 + (** Initialize a new unpac project at the given path. *) 31 + let init ~proc_mgr ~fs path = 32 + (* Convert relative paths to absolute *) 33 + let abs_path = 34 + if Filename.is_relative path then 35 + Filename.concat (Sys.getcwd ()) path 36 + else path 37 + in 38 + let root = Eio.Path.(fs / abs_path) in 39 + 40 + (* Create root directory *) 41 + Eio.Path.mkdirs ~exists_ok:false ~perm:0o755 root; 42 + 43 + (* Initialize bare repository *) 44 + let git_path = Eio.Path.(root / "git") in 45 + Eio.Path.mkdirs ~exists_ok:false ~perm:0o755 git_path; 46 + Git.run_exn ~proc_mgr ~cwd:git_path ["init"; "--bare"] |> ignore; 47 + 48 + (* Create initial main branch with unpac.toml *) 49 + (* First create a temporary worktree to make the initial commit *) 50 + let main_path = Eio.Path.(root / "main") in 51 + Eio.Path.mkdirs ~exists_ok:false ~perm:0o755 main_path; 52 + 53 + (* Initialize as a regular repo temporarily to create first commit *) 54 + Git.run_exn ~proc_mgr ~cwd:main_path ["init"] |> ignore; 55 + 56 + (* Write unpac.toml *) 57 + Eio.Path.save ~create:(`Or_truncate 0o644) 58 + Eio.Path.(main_path / "unpac.toml") 59 + default_unpac_toml; 60 + 61 + (* Create initial commit *) 62 + Git.run_exn ~proc_mgr ~cwd:main_path ["add"; "unpac.toml"] |> ignore; 63 + Git.run_exn ~proc_mgr ~cwd:main_path 64 + ["commit"; "-m"; "Initial commit"] |> ignore; 65 + 66 + (* Rename branch to main if needed *) 67 + Git.run_exn ~proc_mgr ~cwd:main_path ["branch"; "-M"; "main"] |> ignore; 68 + 69 + (* Push to bare repo and convert to worktree *) 70 + Git.run_exn ~proc_mgr ~cwd:main_path 71 + ["remote"; "add"; "origin"; "../git"] |> ignore; 72 + Git.run_exn ~proc_mgr ~cwd:main_path 73 + ["push"; "-u"; "origin"; "main"] |> ignore; 74 + 75 + (* Remove the temporary clone and add main as a worktree of the bare repo *) 76 + Eio.Path.rmtree main_path; 77 + 78 + (* Add main as a worktree of the bare repo *) 79 + Git.run_exn ~proc_mgr ~cwd:git_path 80 + ["worktree"; "add"; "../main"; "main"] |> ignore; 81 + 82 + root 83 + 84 + (** Check if a path is an unpac project root. *) 85 + let is_unpac_root path = 86 + Eio.Path.is_directory Eio.Path.(path / "git") && 87 + Eio.Path.is_directory Eio.Path.(path / "main") && 88 + Eio.Path.is_file Eio.Path.(path / "main" / "unpac.toml") 89 + 90 + (** Find the unpac root by walking up from current directory. *) 91 + let find_root ~fs ~cwd = 92 + let rec go path = 93 + if is_unpac_root path then Some path 94 + else match Eio.Path.split path with 95 + | Some (parent, _) -> go parent 96 + | None -> None 97 + in 98 + go Eio.Path.(fs / cwd) 99 + 100 + (** Create a new project branch with template. *) 101 + let create_project ~proc_mgr root name = 102 + let project_path = Worktree.path root (Project name) in 103 + 104 + (* Ensure project directory parent exists *) 105 + let project_dir = Eio.Path.(root / "project") in 106 + Eio.Path.mkdirs ~exists_ok:true ~perm:0o755 project_dir; 107 + 108 + (* Create orphan branch *) 109 + Worktree.ensure_orphan ~proc_mgr root (Project name); 110 + 111 + (* Write template files *) 112 + Eio.Path.save ~create:(`Or_truncate 0o644) 113 + Eio.Path.(project_path / "dune-project") 114 + (project_dune_project name); 115 + 116 + Eio.Path.save ~create:(`Or_truncate 0o644) 117 + Eio.Path.(project_path / "dune") 118 + project_dune; 119 + 120 + Eio.Path.save ~create:(`Or_truncate 0o644) 121 + Eio.Path.(project_path / ".gitignore") 122 + project_gitignore; 123 + 124 + (* Create vendor directory structure with dune file *) 125 + Eio.Path.mkdirs ~exists_ok:true ~perm:0o755 126 + Eio.Path.(project_path / "vendor" / "opam"); 127 + 128 + Eio.Path.save ~create:(`Or_truncate 0o644) 129 + Eio.Path.(project_path / "vendor" / "dune") 130 + vendor_dune; 131 + 132 + (* Commit template *) 133 + Git.run_exn ~proc_mgr ~cwd:project_path ["add"; "-A"] |> ignore; 134 + Git.run_exn ~proc_mgr ~cwd:project_path 135 + ["commit"; "-m"; "Initialize project " ^ name] |> ignore; 136 + 137 + (* Update main/unpac.toml to register project *) 138 + let main_path = Worktree.path root Main in 139 + let toml_path = Eio.Path.(main_path / "unpac.toml") in 140 + let content = Eio.Path.load toml_path in 141 + 142 + (* Simple append to [projects] section - a proper implementation would parse TOML *) 143 + let updated = 144 + if content = "" || not (String.ends_with ~suffix:"\n" content) 145 + then content ^ "\n" ^ name ^ " = {}\n" 146 + else content ^ name ^ " = {}\n" 147 + in 148 + Eio.Path.save ~create:(`Or_truncate 0o644) toml_path updated; 149 + 150 + Git.run_exn ~proc_mgr ~cwd:main_path ["add"; "unpac.toml"] |> ignore; 151 + Git.run_exn ~proc_mgr ~cwd:main_path 152 + ["commit"; "-m"; "Add project " ^ name] |> ignore; 153 + 154 + project_path 155 + 156 + (** Remove a project branch and worktree. *) 157 + let remove_project ~proc_mgr root name = 158 + (* Remove worktree if exists *) 159 + Worktree.remove_force ~proc_mgr root (Project name); 160 + 161 + (* Delete the branch *) 162 + let git = Worktree.git_dir root in 163 + let branch = Worktree.branch (Project name) in 164 + Git.run_exn ~proc_mgr ~cwd:git ["branch"; "-D"; branch] |> ignore
+44
lib/init.mli
··· 1 + (** Project initialization for unpac. 2 + 3 + Creates the bare repository structure and initial main worktree. *) 4 + 5 + val init : 6 + proc_mgr:Git.proc_mgr -> 7 + fs:Eio.Fs.dir_ty Eio.Path.t -> 8 + string -> 9 + Worktree.root 10 + (** [init ~proc_mgr ~fs path] creates a new unpac project at [path]. 11 + 12 + Creates: 13 + - [path/git/] - bare git repository 14 + - [path/main/] - worktree for main branch with unpac.toml *) 15 + 16 + val is_unpac_root : Eio.Fs.dir_ty Eio.Path.t -> bool 17 + (** [is_unpac_root path] checks if [path] is an unpac project root. *) 18 + 19 + val find_root : 20 + fs:Eio.Fs.dir_ty Eio.Path.t -> 21 + cwd:string -> 22 + Worktree.root option 23 + (** [find_root ~fs ~cwd] walks up from [cwd] to find the unpac root. *) 24 + 25 + val create_project : 26 + proc_mgr:Git.proc_mgr -> 27 + Worktree.root -> 28 + string -> 29 + Eio.Fs.dir_ty Eio.Path.t 30 + (** [create_project ~proc_mgr root name] creates a new project branch. 31 + 32 + Creates orphan branch [project/<name>] with template: 33 + - dune-project (lang dune 3.20) 34 + - dune with (vendored_dirs vendor) 35 + - vendor/opam/ directory 36 + 37 + Updates main/unpac.toml to register the project. *) 38 + 39 + val remove_project : 40 + proc_mgr:Git.proc_mgr -> 41 + Worktree.root -> 42 + string -> 43 + unit 44 + (** [remove_project ~proc_mgr root name] removes a project branch and worktree. *)
+304
lib/monorepo.ml
··· 1 + (** Monorepo export: create a standalone buildable directory from unpac workspace. 2 + 3 + Combines all projects and their vendored dependencies into a single directory 4 + structure suitable for building with dune. No git history is included. *) 5 + 6 + let src = Logs.Src.create "unpac.monorepo" ~doc:"Monorepo export" 7 + module Log = (val Logs.src_log src : Logs.LOG) 8 + 9 + type export_config = { 10 + output_dir : string; 11 + projects : string list option; (** None = all projects *) 12 + include_opam : bool; 13 + include_git : bool; 14 + } 15 + 16 + type export_result = { 17 + projects_exported : string list; 18 + opam_packages : string list; 19 + git_repos : string list; 20 + output_path : string; 21 + } 22 + 23 + let default_config ~output_dir = { 24 + output_dir; 25 + projects = None; 26 + include_opam = true; 27 + include_git = true; 28 + } 29 + 30 + (* Copy a directory tree recursively, excluding .git and _build *) 31 + let rec copy_tree ~src ~dst = 32 + if Sys.is_directory src then begin 33 + if not (Sys.file_exists dst) then 34 + Unix.mkdir dst 0o755; 35 + let entries = Sys.readdir src in 36 + Array.iter (fun name -> 37 + if name <> ".git" && name <> "_build" then begin 38 + let src_path = Filename.concat src name in 39 + let dst_path = Filename.concat dst name in 40 + copy_tree ~src:src_path ~dst:dst_path 41 + end 42 + ) entries 43 + end else begin 44 + (* Copy file *) 45 + let content = In_channel.with_open_bin src In_channel.input_all in 46 + Out_channel.with_open_bin dst (fun oc -> 47 + Out_channel.output_string oc content) 48 + end 49 + 50 + (* Remove a directory tree recursively *) 51 + let rec remove_tree path = 52 + if Sys.file_exists path then begin 53 + if Sys.is_directory path then begin 54 + Array.iter (fun name -> 55 + remove_tree (Filename.concat path name) 56 + ) (Sys.readdir path); 57 + Unix.rmdir path 58 + end else 59 + Sys.remove path 60 + end 61 + 62 + (* Export files from a git branch to a directory (without git history) *) 63 + let export_branch_to_dir ~proc_mgr ~git_dir ~branch ~output_dir = 64 + Log.info (fun m -> m "Exporting branch %s to %s" branch output_dir); 65 + let temp_dir = Filename.temp_dir "unpac-export" "" in 66 + begin 67 + try 68 + (* Check if branch exists *) 69 + let branch_exists = 70 + match Git.rev_parse ~proc_mgr ~cwd:git_dir branch with 71 + | Some _ -> true 72 + | None -> false 73 + in 74 + if not branch_exists then begin 75 + Log.warn (fun m -> m "Branch %s does not exist, skipping" branch); 76 + false 77 + end else begin 78 + (* Create temporary worktree *) 79 + Git.run_exn ~proc_mgr ~cwd:git_dir 80 + ["worktree"; "add"; "--detach"; temp_dir; branch] 81 + |> ignore; 82 + (* Copy files to output *) 83 + if not (Sys.file_exists output_dir) then 84 + Unix.mkdir output_dir 0o755; 85 + copy_tree ~src:temp_dir ~dst:output_dir; 86 + (* Remove worktree *) 87 + Git.run_exn ~proc_mgr ~cwd:git_dir 88 + ["worktree"; "remove"; "--force"; temp_dir] 89 + |> ignore; 90 + true 91 + end 92 + with exn -> 93 + (* Clean up on error *) 94 + (try 95 + Git.run_exn ~proc_mgr ~cwd:git_dir 96 + ["worktree"; "remove"; "--force"; temp_dir] 97 + |> ignore 98 + with _ -> ()); 99 + (try remove_tree temp_dir with _ -> ()); 100 + raise exn 101 + end 102 + 103 + (* Export a project, stripping its vendor/ directory *) 104 + let export_project ~proc_mgr ~git_dir ~project ~output_dir = 105 + let branch = "project/" ^ project in 106 + let project_dir = Filename.concat output_dir project in 107 + Log.info (fun m -> m "Exporting project %s" project); 108 + 109 + if export_branch_to_dir ~proc_mgr ~git_dir ~branch ~output_dir:project_dir then begin 110 + (* Remove the vendor/ directory from exported project - deps go in root vendor/ *) 111 + let vendor_dir = Filename.concat project_dir "vendor" in 112 + if Sys.file_exists vendor_dir then begin 113 + Log.info (fun m -> m "Removing vendor/ from project %s (will use root vendor/)" project); 114 + remove_tree vendor_dir 115 + end; 116 + true 117 + end else 118 + false 119 + 120 + (* Export an opam package from patches branch *) 121 + let export_opam_package ~proc_mgr ~git_dir ~package ~vendor_dir = 122 + let branch = "opam/patches/" ^ package in 123 + let package_dir = Filename.concat (Filename.concat vendor_dir "opam") package in 124 + Log.info (fun m -> m "Exporting opam package %s" package); 125 + export_branch_to_dir ~proc_mgr ~git_dir ~branch ~output_dir:package_dir 126 + 127 + (* Export a git repo from patches branch *) 128 + let export_git_repo ~proc_mgr ~git_dir ~repo ~vendor_dir = 129 + let branch = "git-repos/patches/" ^ repo in 130 + let repo_dir = Filename.concat (Filename.concat vendor_dir "git") repo in 131 + Log.info (fun m -> m "Exporting git repo %s" repo); 132 + export_branch_to_dir ~proc_mgr ~git_dir ~branch ~output_dir:repo_dir 133 + 134 + (* Generate root dune-project file *) 135 + let generate_dune_project ~output_dir ~projects = 136 + let content = Printf.sprintf 137 + {|(lang dune 3.0) 138 + (name unpac-monorepo) 139 + 140 + ; Combined monorepo from unpac workspace 141 + ; Projects: %s 142 + 143 + (generate_opam_files false) 144 + |} 145 + (String.concat ", " projects) 146 + in 147 + let path = Filename.concat output_dir "dune-project" in 148 + Out_channel.with_open_bin path (fun oc -> 149 + Out_channel.output_string oc content) 150 + 151 + (* Generate root dune file with vendored_dirs and includes *) 152 + let generate_root_dune ~output_dir ~projects ~has_opam ~has_git = 153 + let vendor_stanzas = 154 + if has_opam || has_git then 155 + "(vendored_dirs vendor)\n" 156 + else "" 157 + in 158 + (* Simple root dune - projects are subdirectories *) 159 + let content = Printf.sprintf 160 + {|; Root dune file for unpac monorepo 161 + ; Auto-generated - do not edit 162 + ; Projects: %s 163 + 164 + %s|} 165 + (String.concat ", " projects) 166 + vendor_stanzas 167 + in 168 + let path = Filename.concat output_dir "dune" in 169 + Out_channel.with_open_bin path (fun oc -> 170 + Out_channel.output_string oc content) 171 + 172 + (* Generate vendor/dune file *) 173 + let generate_vendor_dune ~vendor_dir ~has_opam ~has_git = 174 + let subdirs = 175 + (if has_opam then ["opam"] else []) @ 176 + (if has_git then ["git"] else []) 177 + in 178 + if subdirs <> [] then begin 179 + let content = Printf.sprintf 180 + {|; Vendor dune file for unpac monorepo 181 + (vendored_dirs %s) 182 + |} 183 + (String.concat " " subdirs) 184 + in 185 + let path = Filename.concat vendor_dir "dune" in 186 + Out_channel.with_open_bin path (fun oc -> 187 + Out_channel.output_string oc content) 188 + end 189 + 190 + (* Update project dune files to reference parent vendor/ *) 191 + let update_project_dune ~project_dir = 192 + let dune_path = Filename.concat project_dir "dune" in 193 + if Sys.file_exists dune_path then begin 194 + let content = In_channel.with_open_bin dune_path In_channel.input_all in 195 + (* Remove local vendored_dirs since we use root-level vendor/ *) 196 + if String.length content > 0 then begin 197 + let lines = String.split_on_char '\n' content in 198 + let filtered = List.filter (fun line -> 199 + let trimmed = String.trim line in 200 + not (String.length trimmed >= 14 && 201 + String.sub trimmed 0 14 = "(vendored_dirs") 202 + ) lines in 203 + let updated = String.concat "\n" filtered in 204 + Out_channel.with_open_bin dune_path (fun oc -> 205 + Out_channel.output_string oc updated) 206 + end 207 + end 208 + 209 + (* Main export function *) 210 + let export ~proc_mgr ~root ~config = 211 + let git_dir = Worktree.git_dir root in 212 + 213 + (* Create output directory *) 214 + if not (Sys.file_exists config.output_dir) then 215 + Unix.mkdir config.output_dir 0o755; 216 + 217 + (* Get list of projects to export *) 218 + let all_projects = Worktree.list_projects ~proc_mgr root in 219 + let projects = match config.projects with 220 + | Some ps -> List.filter (fun p -> List.mem p all_projects) ps 221 + | None -> all_projects 222 + in 223 + 224 + if projects = [] then begin 225 + Log.warn (fun m -> m "No projects to export"); 226 + { projects_exported = []; opam_packages = []; git_repos = []; 227 + output_path = config.output_dir } 228 + end else begin 229 + Log.info (fun m -> m "Exporting %d projects: %s" 230 + (List.length projects) (String.concat ", " projects)); 231 + 232 + (* Export each project *) 233 + let exported_projects = List.filter_map (fun project -> 234 + if export_project ~proc_mgr ~git_dir ~project ~output_dir:config.output_dir then 235 + Some project 236 + else 237 + None 238 + ) projects in 239 + 240 + (* Create vendor directory *) 241 + let vendor_dir = Filename.concat config.output_dir "vendor" in 242 + if not (Sys.file_exists vendor_dir) then 243 + Unix.mkdir vendor_dir 0o755; 244 + 245 + (* Export opam packages *) 246 + let opam_packages = 247 + if config.include_opam then begin 248 + let all_opam = Worktree.list_opam_packages ~proc_mgr root in 249 + Log.info (fun m -> m "Exporting %d opam packages" (List.length all_opam)); 250 + (* Create opam subdirectory *) 251 + let opam_dir = Filename.concat vendor_dir "opam" in 252 + if not (Sys.file_exists opam_dir) && all_opam <> [] then 253 + Unix.mkdir opam_dir 0o755; 254 + List.filter_map (fun pkg -> 255 + if export_opam_package ~proc_mgr ~git_dir ~package:pkg ~vendor_dir then 256 + Some pkg 257 + else 258 + None 259 + ) all_opam 260 + end else [] 261 + in 262 + 263 + (* Export git repos *) 264 + let git_repos = 265 + if config.include_git then begin 266 + let all_git = Git_backend.list_repos ~proc_mgr ~root in 267 + Log.info (fun m -> m "Exporting %d git repos" (List.length all_git)); 268 + (* Create git subdirectory *) 269 + let git_subdir = Filename.concat vendor_dir "git" in 270 + if not (Sys.file_exists git_subdir) && all_git <> [] then 271 + Unix.mkdir git_subdir 0o755; 272 + List.filter_map (fun repo -> 273 + if export_git_repo ~proc_mgr ~git_dir ~repo ~vendor_dir then 274 + Some repo 275 + else 276 + None 277 + ) all_git 278 + end else [] 279 + in 280 + 281 + (* Generate dune files *) 282 + let has_opam = opam_packages <> [] in 283 + let has_git = git_repos <> [] in 284 + 285 + generate_dune_project ~output_dir:config.output_dir ~projects:exported_projects; 286 + generate_root_dune ~output_dir:config.output_dir ~projects:exported_projects 287 + ~has_opam ~has_git; 288 + 289 + if has_opam || has_git then 290 + generate_vendor_dune ~vendor_dir ~has_opam ~has_git; 291 + 292 + (* Update project dune files *) 293 + List.iter (fun project -> 294 + let project_dir = Filename.concat config.output_dir project in 295 + update_project_dune ~project_dir 296 + ) exported_projects; 297 + 298 + Log.info (fun m -> m "Monorepo export complete: %s" config.output_dir); 299 + 300 + { projects_exported = exported_projects; 301 + opam_packages; 302 + git_repos; 303 + output_path = config.output_dir } 304 + end
+79
lib/monorepo.mli
··· 1 + (** Monorepo export: create a standalone buildable directory from unpac workspace. 2 + 3 + Combines all projects and their vendored dependencies into a single directory 4 + structure suitable for building with dune. No git history is included. 5 + 6 + {1 Output Structure} 7 + 8 + The exported monorepo has this structure: 9 + {v 10 + output/ 11 + ├── dune-project # Combined project metadata 12 + ├── dune # Root dune with vendored_dirs 13 + ├── project1/ # First project 14 + │ ├── src/ 15 + │ ├── dune 16 + │ └── dune-project 17 + ├── project2/ # Second project 18 + │ └── ... 19 + └── vendor/ # All vendored dependencies 20 + ├── dune # (vendored_dirs opam git) 21 + ├── opam/ 22 + │ ├── astring/ 23 + │ ├── eio/ 24 + │ └── ... 25 + └── git/ 26 + ├── mylib/ 27 + └── ... 28 + v} 29 + 30 + {1 Usage} 31 + 32 + {v 33 + unpac monorepo /path/to/output 34 + unpac monorepo -p myproject /path/to/output # single project 35 + unpac monorepo --no-opam /path/to/output # skip opam packages 36 + v} 37 + 38 + The output can be built directly with [dune build] or [dune build @doc]. 39 + *) 40 + 41 + (** {1 Configuration} *) 42 + 43 + type export_config = { 44 + output_dir : string; (** Target directory for export *) 45 + projects : string list option; (** Projects to include (None = all) *) 46 + include_opam : bool; (** Include vendored opam packages *) 47 + include_git : bool; (** Include vendored git repositories *) 48 + } 49 + 50 + val default_config : output_dir:string -> export_config 51 + (** Create default config exporting all projects and dependencies. *) 52 + 53 + (** {1 Export Result} *) 54 + 55 + type export_result = { 56 + projects_exported : string list; (** Projects that were exported *) 57 + opam_packages : string list; (** Opam packages in vendor/ *) 58 + git_repos : string list; (** Git repos in vendor/ *) 59 + output_path : string; (** Path to output directory *) 60 + } 61 + 62 + (** {1 Export Function} *) 63 + 64 + val export : 65 + proc_mgr:Git.proc_mgr -> 66 + root:Worktree.root -> 67 + config:export_config -> 68 + export_result 69 + (** [export ~proc_mgr ~root ~config] creates a standalone monorepo. 70 + 71 + The function: 72 + 1. Exports each project from its [project/<name>] branch 73 + 2. Strips the [vendor/] directory from each project 74 + 3. Exports all vendored opam packages from [opam/patches/*] branches 75 + 4. Exports all vendored git repos from [git-repos/patches/*] branches 76 + 5. Places dependencies in a shared [vendor/] directory 77 + 6. Generates appropriate dune files for building 78 + 79 + No git history is preserved - only the current state of each branch. *)
+10
lib/opam/dune
··· 1 + (library 2 + (name unpac_opam) 3 + (public_name unpac-opam) 4 + (libraries 5 + unpac 6 + cmdliner 7 + opam-format 8 + opam-core 9 + opam-state 10 + opam-0install))
+220
lib/opam/opam.ml
··· 1 + (** Opam backend for unpac. 2 + 3 + Implements vendoring of opam packages using the three-tier branch model: 4 + - opam/upstream/<pkg> - pristine upstream code 5 + - opam/vendor/<pkg> - upstream history rewritten with vendor/opam/<pkg>/ prefix 6 + - opam/patches/<pkg> - local modifications 7 + 8 + The vendor branch preserves full git history from upstream, with all paths 9 + rewritten to be under vendor/opam/<pkg>/. This allows git blame/log to work 10 + correctly on vendored files. *) 11 + 12 + module Worktree = Unpac.Worktree 13 + module Git = Unpac.Git 14 + module Git_repo_lookup = Unpac.Git_repo_lookup 15 + module Vendor_cache = Unpac.Vendor_cache 16 + module Backend = Unpac.Backend 17 + 18 + let name = "opam" 19 + 20 + (** {1 Branch Naming} *) 21 + 22 + let upstream_branch pkg = "opam/upstream/" ^ pkg 23 + let vendor_branch pkg = "opam/vendor/" ^ pkg 24 + let patches_branch pkg = "opam/patches/" ^ pkg 25 + let vendor_path pkg = "vendor/opam/" ^ pkg 26 + 27 + (** {1 Worktree Kinds} *) 28 + 29 + let upstream_kind pkg = Worktree.Opam_upstream pkg 30 + let vendor_kind pkg = Worktree.Opam_vendor pkg 31 + let patches_kind pkg = Worktree.Opam_patches pkg 32 + 33 + (** {1 Package Operations} *) 34 + 35 + let copy_with_prefix ~src_dir ~dst_dir ~prefix = 36 + (* Recursively copy files from src_dir to dst_dir/prefix/ *) 37 + let prefix_dir = Eio.Path.(dst_dir / prefix) in 38 + Eio.Path.mkdirs ~exists_ok:true ~perm:0o755 prefix_dir; 39 + 40 + let rec copy_dir src dst = 41 + Eio.Path.read_dir src |> List.iter (fun name -> 42 + let src_path = Eio.Path.(src / name) in 43 + let dst_path = Eio.Path.(dst / name) in 44 + if Eio.Path.is_directory src_path then begin 45 + Eio.Path.mkdirs ~exists_ok:true ~perm:0o755 dst_path; 46 + copy_dir src_path dst_path 47 + end else begin 48 + let content = Eio.Path.load src_path in 49 + Eio.Path.save ~create:(`Or_truncate 0o644) dst_path content 50 + end 51 + ) 52 + in 53 + 54 + (* Copy everything except .git *) 55 + Eio.Path.read_dir src_dir |> List.iter (fun name -> 56 + if name <> ".git" then begin 57 + let src_path = Eio.Path.(src_dir / name) in 58 + let dst_path = Eio.Path.(prefix_dir / name) in 59 + if Eio.Path.is_directory src_path then begin 60 + Eio.Path.mkdirs ~exists_ok:true ~perm:0o755 dst_path; 61 + copy_dir src_path dst_path 62 + end else begin 63 + let content = Eio.Path.load src_path in 64 + Eio.Path.save ~create:(`Or_truncate 0o644) dst_path content 65 + end 66 + end 67 + ) 68 + 69 + let add_package ~proc_mgr ~root ?cache (info : Backend.package_info) = 70 + let pkg = info.name in 71 + let git = Worktree.git_dir root in 72 + 73 + try 74 + (* Check if already exists *) 75 + if Worktree.branch_exists ~proc_mgr root (patches_kind pkg) then 76 + Backend.Already_exists pkg 77 + else begin 78 + (* Rewrite URL for known mirrors *) 79 + let url = Git_repo_lookup.rewrite_url info.url in 80 + 81 + (* Determine the ref to use: explicit > override > default *) 82 + let branch = match info.branch with 83 + | Some b -> b 84 + | None -> 85 + match Git_repo_lookup.branch_override ~name:pkg ~url with 86 + | Some b -> b 87 + | None -> Git.ls_remote_default_branch ~proc_mgr ~cwd:git ~url 88 + in 89 + 90 + (* Fetch - either via cache or directly *) 91 + let ref_point = match cache with 92 + | Some cache_path -> 93 + (* Fetch through vendor cache *) 94 + Vendor_cache.fetch_to_project ~proc_mgr 95 + ~cache:cache_path ~project_git:git ~url ~branch 96 + | None -> 97 + (* Direct fetch (with tags to support version tags like 3.20.2) *) 98 + let remote = "origin-" ^ pkg in 99 + ignore (Git.ensure_remote ~proc_mgr ~cwd:git ~name:remote ~url); 100 + Git.fetch_with_tags ~proc_mgr ~cwd:git ~remote; 101 + Git.resolve_branch_or_tag ~proc_mgr ~cwd:git ~remote ~ref_name:branch 102 + in 103 + 104 + (* Step 1: Create upstream branch from fetched ref *) 105 + Git.branch_force ~proc_mgr ~cwd:git 106 + ~name:(upstream_branch pkg) ~point:ref_point; 107 + 108 + (* Step 2: Create vendor branch from upstream and rewrite history *) 109 + Git.branch_force ~proc_mgr ~cwd:git 110 + ~name:(vendor_branch pkg) ~point:(upstream_branch pkg); 111 + 112 + (* Rewrite vendor branch history to move all files into vendor/opam/<pkg>/ *) 113 + Git.filter_repo_to_subdirectory ~proc_mgr ~cwd:git 114 + ~branch:(vendor_branch pkg) 115 + ~subdirectory:(vendor_path pkg); 116 + 117 + (* Get the vendor SHA after rewriting *) 118 + let vendor_sha = match Git.rev_parse ~proc_mgr ~cwd:git (vendor_branch pkg) with 119 + | Some sha -> sha 120 + | None -> failwith "Vendor branch not found after filter-repo" 121 + in 122 + 123 + (* Step 3: Create patches branch from vendor *) 124 + Git.branch_create ~proc_mgr ~cwd:git 125 + ~name:(patches_branch pkg) 126 + ~start_point:(vendor_branch pkg); 127 + 128 + Backend.Added { name = pkg; sha = vendor_sha } 129 + end 130 + with exn -> 131 + (* Cleanup on failure *) 132 + (try Worktree.remove_force ~proc_mgr root (upstream_kind pkg) with _ -> ()); 133 + (try Worktree.remove_force ~proc_mgr root (vendor_kind pkg) with _ -> ()); 134 + Backend.Failed { name = pkg; error = Printexc.to_string exn } 135 + 136 + let update_package ~proc_mgr ~root ?cache pkg = 137 + let git = Worktree.git_dir root in 138 + 139 + try 140 + (* Check if package exists *) 141 + if not (Worktree.branch_exists ~proc_mgr root (patches_kind pkg)) then 142 + Backend.Update_failed { name = pkg; error = "Package not vendored" } 143 + else begin 144 + (* Get remote URL - check origin-<pkg> first, then upstream-<pkg> for promoted packages *) 145 + let origin_remote = "origin-" ^ pkg in 146 + let upstream_remote = "upstream-" ^ pkg in 147 + let (remote, url) = match Git.remote_url ~proc_mgr ~cwd:git origin_remote with 148 + | Some u -> (origin_remote, u) 149 + | None -> 150 + (* Try upstream remote for promoted/local packages *) 151 + match Git.remote_url ~proc_mgr ~cwd:git upstream_remote with 152 + | Some u -> (upstream_remote, u) 153 + | None -> failwith (Printf.sprintf "No remote found. Set one with: unpac opam set-upstream %s <url>" pkg) 154 + in 155 + 156 + (* Fetch latest - either via cache or directly (with tags for completeness) *) 157 + (match cache with 158 + | Some cache_path -> 159 + let branch = Git.ls_remote_default_branch ~proc_mgr ~cwd:git ~url in 160 + ignore (Vendor_cache.fetch_to_project ~proc_mgr 161 + ~cache:cache_path ~project_git:git ~url ~branch) 162 + | None -> 163 + Git.fetch_with_tags ~proc_mgr ~cwd:git ~remote); 164 + 165 + (* Get old SHA *) 166 + let old_sha = match Git.rev_parse ~proc_mgr ~cwd:git (upstream_branch pkg) with 167 + | Some sha -> sha 168 + | None -> failwith "Upstream branch not found" 169 + in 170 + 171 + (* Determine default branch and update upstream *) 172 + let default_branch = Git.ls_remote_default_branch ~proc_mgr ~cwd:git ~url in 173 + let ref_point = remote ^ "/" ^ default_branch in 174 + Git.branch_force ~proc_mgr ~cwd:git 175 + ~name:(upstream_branch pkg) ~point:ref_point; 176 + 177 + (* Get new SHA *) 178 + let new_sha = match Git.rev_parse ~proc_mgr ~cwd:git (upstream_branch pkg) with 179 + | Some sha -> sha 180 + | None -> failwith "Upstream branch not found" 181 + in 182 + 183 + if old_sha = new_sha then 184 + Backend.No_changes pkg 185 + else begin 186 + (* Create worktrees *) 187 + Worktree.ensure ~proc_mgr root (upstream_kind pkg); 188 + Worktree.ensure ~proc_mgr root (vendor_kind pkg); 189 + 190 + let upstream_wt = Worktree.path root (upstream_kind pkg) in 191 + let vendor_wt = Worktree.path root (vendor_kind pkg) in 192 + 193 + (* Clear vendor content and copy new *) 194 + let vendor_pkg_path = Eio.Path.(vendor_wt / "vendor" / "opam" / pkg) in 195 + (try Eio.Path.rmtree vendor_pkg_path with _ -> ()); 196 + 197 + copy_with_prefix 198 + ~src_dir:upstream_wt 199 + ~dst_dir:vendor_wt 200 + ~prefix:(vendor_path pkg); 201 + 202 + (* Commit *) 203 + Git.add_all ~proc_mgr ~cwd:vendor_wt; 204 + Git.commit ~proc_mgr ~cwd:vendor_wt 205 + ~message:(Printf.sprintf "Update %s to %s" pkg (String.sub new_sha 0 7)); 206 + 207 + (* Cleanup *) 208 + Worktree.remove ~proc_mgr root (upstream_kind pkg); 209 + Worktree.remove ~proc_mgr root (vendor_kind pkg); 210 + 211 + Backend.Updated { name = pkg; old_sha; new_sha } 212 + end 213 + end 214 + with exn -> 215 + (try Worktree.remove_force ~proc_mgr root (upstream_kind pkg) with _ -> ()); 216 + (try Worktree.remove_force ~proc_mgr root (vendor_kind pkg) with _ -> ()); 217 + Backend.Update_failed { name = pkg; error = Printexc.to_string exn } 218 + 219 + let list_packages ~proc_mgr ~root = 220 + Worktree.list_opam_packages ~proc_mgr root
+67
lib/opam/opam.mli
··· 1 + (** Opam backend for unpac. 2 + 3 + Implements vendoring of opam packages using the three-tier branch model: 4 + - opam/upstream/<pkg> - pristine upstream code 5 + - opam/vendor/<pkg> - orphan branch with vendor/opam/<pkg>/ prefix 6 + - opam/patches/<pkg> - local modifications *) 7 + 8 + val name : string 9 + (** Backend name: "opam" *) 10 + 11 + (** {1 Branch Naming} *) 12 + 13 + val upstream_branch : string -> string 14 + (** [upstream_branch pkg] returns "opam/upstream/<pkg>". *) 15 + 16 + val vendor_branch : string -> string 17 + (** [vendor_branch pkg] returns "opam/vendor/<pkg>". *) 18 + 19 + val patches_branch : string -> string 20 + (** [patches_branch pkg] returns "opam/patches/<pkg>". *) 21 + 22 + val vendor_path : string -> string 23 + (** [vendor_path pkg] returns "vendor/opam/<pkg>". *) 24 + 25 + (** {1 Worktree Kinds} *) 26 + 27 + val upstream_kind : string -> Unpac.Worktree.kind 28 + val vendor_kind : string -> Unpac.Worktree.kind 29 + val patches_kind : string -> Unpac.Worktree.kind 30 + 31 + (** {1 Package Operations} *) 32 + 33 + val add_package : 34 + proc_mgr:Unpac.Git.proc_mgr -> 35 + root:Unpac.Worktree.root -> 36 + ?cache:Unpac.Vendor_cache.t -> 37 + Unpac.Backend.package_info -> 38 + Unpac.Backend.add_result 39 + (** [add_package ~proc_mgr ~root ?cache info] vendors a single package. 40 + 41 + 1. Fetches upstream into opam/upstream/<pkg> (via cache if provided) 42 + 2. Creates opam/vendor/<pkg> with vendor/opam/<pkg>/ prefix (preserving history) 43 + 3. Creates opam/patches/<pkg> from vendor 44 + 45 + Uses git-filter-repo for fast history rewriting. 46 + @param cache Optional vendor cache for shared fetches across projects. *) 47 + 48 + val update_package : 49 + proc_mgr:Unpac.Git.proc_mgr -> 50 + root:Unpac.Worktree.root -> 51 + ?cache:Unpac.Vendor_cache.t -> 52 + string -> 53 + Unpac.Backend.update_result 54 + (** [update_package ~proc_mgr ~root ?cache name] updates a package from upstream. 55 + 56 + 1. Fetches latest into opam/upstream/<pkg> (via cache if provided) 57 + 2. Updates opam/vendor/<pkg> with new content 58 + 59 + Does NOT rebase patches - call [Backend.rebase_patches] separately. 60 + 61 + @param cache Optional vendor cache for shared fetches across projects. *) 62 + 63 + val list_packages : 64 + proc_mgr:Unpac.Git.proc_mgr -> 65 + root:Unpac.Worktree.root -> 66 + string list 67 + (** [list_packages ~proc_mgr root] returns all vendored opam package names. *)
+107
lib/opam/opam_file.ml
··· 1 + (** Opam file parsing for extracting package metadata. *) 2 + 3 + type metadata = { 4 + name : string; 5 + version : string; 6 + dev_repo : string option; 7 + synopsis : string option; 8 + } 9 + 10 + let empty_metadata = { 11 + name = ""; 12 + version = ""; 13 + dev_repo = None; 14 + synopsis = None; 15 + } 16 + 17 + (** Parse an opam file and extract metadata. *) 18 + let parse ~name ~version content = 19 + try 20 + let opam = OpamParser.FullPos.string content "<opam>" in 21 + let items = opam.file_contents in 22 + 23 + let dev_repo = ref None in 24 + let synopsis = ref None in 25 + 26 + List.iter (fun item -> 27 + match item.OpamParserTypes.FullPos.pelem with 28 + | OpamParserTypes.FullPos.Variable (name_pos, value_pos) -> 29 + let var_name = name_pos.OpamParserTypes.FullPos.pelem in 30 + (match var_name, value_pos.OpamParserTypes.FullPos.pelem with 31 + | "dev-repo", OpamParserTypes.FullPos.String s -> 32 + dev_repo := Some s 33 + | "synopsis", OpamParserTypes.FullPos.String s -> 34 + synopsis := Some s 35 + | _ -> ()) 36 + | _ -> () 37 + ) items; 38 + 39 + { name; version; dev_repo = !dev_repo; synopsis = !synopsis } 40 + with _ -> 41 + { empty_metadata with name; version } 42 + 43 + (** Parse an opam file from a path. *) 44 + let parse_file ~name ~version path = 45 + let content = In_channel.with_open_text path In_channel.input_all in 46 + parse ~name ~version content 47 + 48 + (** Find a package in an opam repository directory. 49 + Returns the path to the opam file if found. *) 50 + let find_in_repo ~repo_path ~name ?version () = 51 + let packages_dir = Filename.concat repo_path "packages" in 52 + let pkg_dir = Filename.concat packages_dir name in 53 + 54 + if not (Sys.file_exists pkg_dir && Sys.is_directory pkg_dir) then 55 + None 56 + else 57 + (* List version directories *) 58 + let entries = Sys.readdir pkg_dir |> Array.to_list in 59 + let version_dirs = List.filter (fun entry -> 60 + let full = Filename.concat pkg_dir entry in 61 + Sys.is_directory full && String.starts_with ~prefix:(name ^ ".") entry 62 + ) entries in 63 + 64 + match version with 65 + | Some v -> 66 + (* Look for specific version *) 67 + let target = name ^ "." ^ v in 68 + if List.mem target version_dirs then 69 + let opam_path = Filename.concat (Filename.concat pkg_dir target) "opam" in 70 + if Sys.file_exists opam_path then Some (opam_path, v) 71 + else None 72 + else None 73 + | None -> 74 + (* Find latest version (simple string sort, works for semver) *) 75 + let sorted = List.sort (fun a b -> String.compare b a) version_dirs in 76 + match sorted with 77 + | [] -> None 78 + | latest :: _ -> 79 + let v = String.sub latest (String.length name + 1) 80 + (String.length latest - String.length name - 1) in 81 + let opam_path = Filename.concat (Filename.concat pkg_dir latest) "opam" in 82 + if Sys.file_exists opam_path then Some (opam_path, v) 83 + else None 84 + 85 + (** Get metadata for a package from an opam repository. *) 86 + let get_metadata ~repo_path ~name ?version () = 87 + match find_in_repo ~repo_path ~name ?version () with 88 + | None -> None 89 + | Some (opam_path, v) -> 90 + Some (parse_file ~name ~version:v opam_path) 91 + 92 + (** List all versions of a package in a repository. *) 93 + let list_versions ~repo_path ~name = 94 + let packages_dir = Filename.concat repo_path "packages" in 95 + let pkg_dir = Filename.concat packages_dir name in 96 + 97 + if not (Sys.file_exists pkg_dir && Sys.is_directory pkg_dir) then 98 + [] 99 + else 100 + Sys.readdir pkg_dir 101 + |> Array.to_list 102 + |> List.filter_map (fun entry -> 103 + if String.starts_with ~prefix:(name ^ ".") entry then 104 + Some (String.sub entry (String.length name + 1) 105 + (String.length entry - String.length name - 1)) 106 + else None) 107 + |> List.sort String.compare
+24
lib/opam/opam_file.mli
··· 1 + (** Opam file parsing for extracting package metadata. *) 2 + 3 + type metadata = { 4 + name : string; 5 + version : string; 6 + dev_repo : string option; 7 + synopsis : string option; 8 + } 9 + 10 + val parse : name:string -> version:string -> string -> metadata 11 + (** [parse ~name ~version content] parses opam file content. *) 12 + 13 + val parse_file : name:string -> version:string -> string -> metadata 14 + (** [parse_file ~name ~version path] parses an opam file from disk. *) 15 + 16 + val find_in_repo : repo_path:string -> name:string -> ?version:string -> unit -> (string * string) option 17 + (** [find_in_repo ~repo_path ~name ?version ()] finds a package in an opam repository. 18 + Returns [Some (opam_file_path, version)] if found. *) 19 + 20 + val get_metadata : repo_path:string -> name:string -> ?version:string -> unit -> metadata option 21 + (** [get_metadata ~repo_path ~name ?version ()] gets package metadata from a repository. *) 22 + 23 + val list_versions : repo_path:string -> name:string -> string list 24 + (** [list_versions ~repo_path ~name] lists all available versions of a package. *)
+71
lib/opam/repo.ml
··· 1 + (** Opam repository operations. *) 2 + 3 + type repo = { 4 + name : string; 5 + path : string; 6 + } 7 + 8 + type search_result = { 9 + repo : repo; 10 + metadata : Opam_file.metadata; 11 + } 12 + 13 + (** Resolve repository path from config. *) 14 + let resolve_repo (cfg : Unpac.Config.repo_config) : repo option = 15 + match cfg.source with 16 + | Unpac.Config.Local path -> 17 + if Sys.file_exists path && Sys.is_directory path then 18 + Some { name = cfg.repo_name; path } 19 + else None 20 + | Unpac.Config.Remote _url -> 21 + (* Remote repos not yet supported *) 22 + None 23 + 24 + (** Search for a package in configured repositories. *) 25 + let find_package ~repos ~name ?version () : search_result option = 26 + let rec search = function 27 + | [] -> None 28 + | cfg :: rest -> 29 + match resolve_repo cfg with 30 + | None -> search rest 31 + | Some repo -> 32 + match Opam_file.get_metadata ~repo_path:repo.path ~name ?version () with 33 + | None -> search rest 34 + | Some metadata -> Some { repo; metadata } 35 + in 36 + search repos 37 + 38 + (** List all versions of a package across repositories. *) 39 + let list_versions ~repos ~name : (repo * string list) list = 40 + List.filter_map (fun cfg -> 41 + match resolve_repo cfg with 42 + | None -> None 43 + | Some repo -> 44 + let versions = Opam_file.list_versions ~repo_path:repo.path ~name in 45 + if versions = [] then None 46 + else Some (repo, versions) 47 + ) repos 48 + 49 + (** Search for packages matching a pattern. *) 50 + let search_packages ~repos ~pattern : (repo * string) list = 51 + List.concat_map (fun cfg -> 52 + match resolve_repo cfg with 53 + | None -> [] 54 + | Some repo -> 55 + let packages_dir = Filename.concat repo.path "packages" in 56 + if not (Sys.file_exists packages_dir) then [] 57 + else 58 + Sys.readdir packages_dir 59 + |> Array.to_list 60 + |> List.filter (fun name -> 61 + (* Simple substring match *) 62 + let pattern_lower = String.lowercase_ascii pattern in 63 + let name_lower = String.lowercase_ascii name in 64 + String.length pattern_lower <= String.length name_lower && 65 + (let rec check i = 66 + if i > String.length name_lower - String.length pattern_lower then false 67 + else if String.sub name_lower i (String.length pattern_lower) = pattern_lower then true 68 + else check (i + 1) 69 + in check 0)) 70 + |> List.map (fun name -> (repo, name)) 71 + ) repos
+32
lib/opam/repo.mli
··· 1 + (** Opam repository operations. *) 2 + 3 + type repo = { 4 + name : string; 5 + path : string; 6 + } 7 + 8 + type search_result = { 9 + repo : repo; 10 + metadata : Opam_file.metadata; 11 + } 12 + 13 + val find_package : 14 + repos:Unpac.Config.repo_config list -> 15 + name:string -> 16 + ?version:string -> 17 + unit -> 18 + search_result option 19 + (** [find_package ~repos ~name ?version ()] searches for a package in repositories. 20 + Returns the first match found. *) 21 + 22 + val list_versions : 23 + repos:Unpac.Config.repo_config list -> 24 + name:string -> 25 + (repo * string list) list 26 + (** [list_versions ~repos ~name] lists all versions across repositories. *) 27 + 28 + val search_packages : 29 + repos:Unpac.Config.repo_config list -> 30 + pattern:string -> 31 + (repo * string) list 32 + (** [search_packages ~repos ~pattern] searches for packages matching a pattern. *)
+169
lib/opam/solver.ml
··· 1 + (** Dependency solver using 0install algorithm. *) 2 + 3 + let ( / ) = Filename.concat 4 + 5 + (** List directory entries, returns empty list if directory doesn't exist. *) 6 + let list_dir path = 7 + try Sys.readdir path |> Array.to_list 8 + with Sys_error _ -> [] 9 + 10 + (** Known compiler packages to filter out. *) 11 + let is_compiler_package name = 12 + let s = OpamPackage.Name.to_string name in 13 + String.starts_with ~prefix:"ocaml-base-compiler" s || 14 + String.starts_with ~prefix:"ocaml-variants" s || 15 + String.starts_with ~prefix:"ocaml-system" s || 16 + String.starts_with ~prefix:"ocaml-config" s || 17 + s = "ocaml" || 18 + s = "base-unix" || 19 + s = "base-threads" || 20 + s = "base-bigarray" || 21 + s = "base-domains" || 22 + s = "base-nnp" 23 + 24 + (** Check if a package has the compiler flag. *) 25 + let has_compiler_flag opam = 26 + let flags = OpamFile.OPAM.flags opam in 27 + List.mem OpamTypes.Pkgflag_Compiler flags 28 + 29 + (** Multi-repo context that searches multiple opam repository directories. *) 30 + module Multi_context : sig 31 + include Opam_0install.S.CONTEXT 32 + 33 + val create : 34 + ?constraints:OpamFormula.version_constraint OpamTypes.name_map -> 35 + repos:string list -> 36 + ocaml_version:string -> 37 + unit -> t 38 + end = struct 39 + type rejection = 40 + | UserConstraint of OpamFormula.atom 41 + | Unavailable 42 + | CompilerPackage 43 + 44 + let pp_rejection f = function 45 + | UserConstraint x -> Fmt.pf f "Rejected by user-specified constraint %s" (OpamFormula.string_of_atom x) 46 + | Unavailable -> Fmt.pf f "Availability condition not satisfied" 47 + | CompilerPackage -> Fmt.pf f "Compiler package (filtered out)" 48 + 49 + type t = { 50 + repos : string list; (* List of packages/ directories *) 51 + constraints : OpamFormula.version_constraint OpamTypes.name_map; 52 + ocaml_version : string; 53 + } 54 + 55 + let env t _pkg v = 56 + match OpamVariable.Full.to_string v with 57 + | "arch" -> Some (OpamTypes.S "x86_64") 58 + | "os" -> Some (OpamTypes.S "linux") 59 + | "os-distribution" -> Some (OpamTypes.S "debian") 60 + | "os-version" -> Some (OpamTypes.S "12") 61 + | "os-family" -> Some (OpamTypes.S "debian") 62 + | "opam-version" -> Some (OpamTypes.S "2.2.0") 63 + | "sys-ocaml-version" -> Some (OpamTypes.S t.ocaml_version) 64 + | "ocaml:native" -> Some (OpamTypes.B true) 65 + | _ -> None 66 + 67 + let filter_deps t pkg f = 68 + f 69 + |> OpamFilter.partial_filter_formula (env t pkg) 70 + |> OpamFilter.filter_deps ~build:true ~post:true ~test:false ~doc:false ~dev:false ~dev_setup:false ~default:false 71 + 72 + let user_restrictions t name = 73 + OpamPackage.Name.Map.find_opt name t.constraints 74 + 75 + (** Load opam file from path. *) 76 + let load_opam path = 77 + try Some (OpamFile.OPAM.read (OpamFile.make (OpamFilename.raw path))) 78 + with _ -> None 79 + 80 + (** Create a minimal virtual opam file for base packages. *) 81 + let virtual_opam () = 82 + OpamFile.OPAM.empty 83 + 84 + (** Find all versions of a package across all repos. *) 85 + let find_versions t name = 86 + let name_str = OpamPackage.Name.to_string name in 87 + (* Collect versions from all repos, first repo wins for duplicates *) 88 + let seen = Hashtbl.create 16 in 89 + List.iter (fun packages_dir -> 90 + let pkg_dir = packages_dir / name_str in 91 + list_dir pkg_dir |> List.iter (fun entry -> 92 + match OpamPackage.of_string_opt entry with 93 + | Some pkg when OpamPackage.name pkg = name -> 94 + let v = OpamPackage.version pkg in 95 + if not (Hashtbl.mem seen v) then begin 96 + let opam_path = pkg_dir / entry / "opam" in 97 + Hashtbl.add seen v opam_path 98 + end 99 + | _ -> () 100 + ) 101 + ) t.repos; 102 + Hashtbl.fold (fun v path acc -> (v, path) :: acc) seen [] 103 + 104 + let candidates t name = 105 + let name_str = OpamPackage.Name.to_string name in 106 + (* Provide virtual packages for compiler/base packages at the configured version *) 107 + if name_str = "ocaml" then 108 + let v = OpamPackage.Version.of_string t.ocaml_version in 109 + [v, Ok (virtual_opam ())] 110 + else if name_str = "base-unix" || name_str = "base-threads" || 111 + name_str = "base-bigarray" || name_str = "base-domains" || 112 + name_str = "base-nnp" then 113 + let v = OpamPackage.Version.of_string "base" in 114 + [v, Ok (virtual_opam ())] 115 + else if is_compiler_package name then 116 + (* Other compiler packages - not available *) 117 + [] 118 + else 119 + let user_constraints = user_restrictions t name in 120 + find_versions t name 121 + |> List.sort (fun (v1, _) (v2, _) -> OpamPackage.Version.compare v2 v1) (* Prefer newest *) 122 + |> List.map (fun (v, opam_path) -> 123 + match user_constraints with 124 + | Some test when not (OpamFormula.check_version_formula (OpamFormula.Atom test) v) -> 125 + v, Error (UserConstraint (name, Some test)) 126 + | _ -> 127 + match load_opam opam_path with 128 + | None -> v, Error Unavailable 129 + | Some opam -> 130 + (* Check flags:compiler *) 131 + if has_compiler_flag opam then 132 + v, Error CompilerPackage 133 + else 134 + (* Check available filter *) 135 + let pkg = OpamPackage.create name v in 136 + let available = OpamFile.OPAM.available opam in 137 + match OpamFilter.eval ~default:(OpamTypes.B false) (env t pkg) available with 138 + | B true -> v, Ok opam 139 + | _ -> v, Error Unavailable 140 + ) 141 + 142 + let create ?(constraints=OpamPackage.Name.Map.empty) ~repos ~ocaml_version () = 143 + (* Convert repo roots to packages/ directories *) 144 + let packages_dirs = List.map (fun r -> r / "packages") repos in 145 + { repos = packages_dirs; constraints; ocaml_version } 146 + end 147 + 148 + module Solver = Opam_0install.Solver.Make(Multi_context) 149 + 150 + type solve_result = { 151 + packages : OpamPackage.t list; 152 + } 153 + 154 + type solve_error = string 155 + 156 + (** Solve dependencies for a list of package names. *) 157 + let solve ~repos ~ocaml_version ~packages : (solve_result, solve_error) result = 158 + let context = Multi_context.create ~repos ~ocaml_version () in 159 + let names = List.map OpamPackage.Name.of_string packages in 160 + match Solver.solve context names with 161 + | Ok selections -> 162 + let pkgs = Solver.packages_of_result selections in 163 + (* Filter out compiler packages from result *) 164 + let pkgs = List.filter (fun pkg -> 165 + not (is_compiler_package (OpamPackage.name pkg)) 166 + ) pkgs in 167 + Ok { packages = pkgs } 168 + | Error diagnostics -> 169 + Error (Solver.diagnostics diagnostics)
+33
lib/opam/solver.mli
··· 1 + (** Dependency solver using 0install algorithm. 2 + 3 + Solves package dependencies across multiple configured opam repositories, 4 + filtering out compiler packages and respecting availability constraints. *) 5 + 6 + type solve_result = { 7 + packages : OpamPackage.t list; 8 + (** List of packages that need to be installed, including transitive deps. *) 9 + } 10 + 11 + type solve_error = string 12 + (** Human-readable error message explaining why solving failed. *) 13 + 14 + val solve : 15 + repos:string list -> 16 + ocaml_version:string -> 17 + packages:string list -> 18 + (solve_result, solve_error) result 19 + (** [solve ~repos ~ocaml_version ~packages] solves dependencies for [packages]. 20 + 21 + @param repos List of opam repository root directories (containing packages/) 22 + @param ocaml_version The OCaml compiler version to solve for (e.g. "5.2.0") 23 + @param packages List of package names to solve for 24 + 25 + Returns the full list of packages (including transitive dependencies) that 26 + need to be installed, or an error message if solving failed. 27 + 28 + Compiler packages (ocaml-base-compiler, base-*, etc.) are automatically 29 + filtered out since they are assumed to be pre-installed. *) 30 + 31 + val is_compiler_package : OpamPackage.Name.t -> bool 32 + (** [is_compiler_package name] returns true if [name] is a known compiler 33 + or base package that should be filtered out. *)
+454
lib/promote.ml
··· 1 + (** Project promotion to vendor library. 2 + 3 + Promotes a locally-developed project to a vendored library by: 4 + 1. Filtering out the vendor/ directory from the project history 5 + 2. Creating vendor branches (upstream/vendor/patches) for the specified backend 6 + 3. Recording the promotion in the audit log 7 + 8 + This allows the project to be merged into other projects as a dependency. *) 9 + 10 + let src = Logs.Src.create "unpac.promote" ~doc:"Project promotion" 11 + module Log = (val Logs.src_log src : Logs.LOG) 12 + 13 + (** Backend types for promotion *) 14 + type backend = 15 + | Opam 16 + | Git 17 + 18 + let backend_of_string = function 19 + | "opam" -> Some Opam 20 + | "git" -> Some Git 21 + | _ -> None 22 + 23 + let backend_to_string = function 24 + | Opam -> "opam" 25 + | Git -> "git" 26 + 27 + (** Branch names for a backend *) 28 + let upstream_branch backend name = match backend with 29 + | Opam -> "opam/upstream/" ^ name 30 + | Git -> "git/upstream/" ^ name 31 + 32 + let vendor_branch backend name = match backend with 33 + | Opam -> "opam/vendor/" ^ name 34 + | Git -> "git/vendor/" ^ name 35 + 36 + let patches_branch backend name = match backend with 37 + | Opam -> "opam/patches/" ^ name 38 + | Git -> "git/patches/" ^ name 39 + 40 + let vendor_path backend name = match backend with 41 + | Opam -> "vendor/opam/" ^ name 42 + | Git -> "vendor/git/" ^ name 43 + 44 + (** Result of promotion *) 45 + type promote_result = 46 + | Promoted of { 47 + name : string; 48 + backend : backend; 49 + original_commits : int; 50 + filtered_commits : int; 51 + } 52 + | Already_promoted of string 53 + | Project_not_found of string 54 + | Failed of { name : string; error : string } 55 + 56 + (** Filter a branch to exclude vendor/ directory. 57 + Uses git-filter-repo to rewrite history. *) 58 + let filter_vendor_directory ~proc_mgr ~cwd ~branch = 59 + Log.info (fun m -> m "Filtering vendor/ directory from branch %s..." branch); 60 + 61 + (* Use git-filter-repo with path filtering to exclude vendor/ *) 62 + let fs = fst cwd in 63 + let git_path = snd cwd in 64 + let parent_path = Filename.dirname git_path in 65 + 66 + (* Create a unique temporary worktree *) 67 + let safe_branch = String.map (fun c -> if c = '/' then '-' else c) branch in 68 + let temp_wt_name = ".filter-vendor-" ^ safe_branch in 69 + let temp_wt_relpath = "../" ^ temp_wt_name in 70 + let temp_wt_path = Filename.concat parent_path temp_wt_name in 71 + let temp_wt : Git.path = (fs, temp_wt_path) in 72 + 73 + (* Remove any existing temp worktree *) 74 + ignore (Git.run ~proc_mgr ~cwd ["worktree"; "remove"; "-f"; temp_wt_relpath]); 75 + 76 + (* Create worktree for the branch *) 77 + Git.run_exn ~proc_mgr ~cwd ["worktree"; "add"; temp_wt_relpath; branch] |> ignore; 78 + 79 + (* Count commits before filtering *) 80 + let commits_before = 81 + int_of_string (String.trim (Git.run_exn ~proc_mgr ~cwd:temp_wt ["rev-list"; "--count"; "HEAD"])) 82 + in 83 + 84 + (* Run git-filter-repo to exclude vendor/ *) 85 + let result = Git.run ~proc_mgr ~cwd:temp_wt [ 86 + "filter-repo"; 87 + "--invert-paths"; 88 + "--path"; "vendor/"; 89 + "--force"; 90 + "--refs"; "HEAD" 91 + ] in 92 + 93 + match result with 94 + | Ok _ -> 95 + (* Count commits after filtering *) 96 + let commits_after = 97 + int_of_string (String.trim (Git.run_exn ~proc_mgr ~cwd:temp_wt ["rev-list"; "--count"; "HEAD"])) 98 + in 99 + (* Get the new HEAD SHA *) 100 + let new_sha = Git.run_exn ~proc_mgr ~cwd:temp_wt ["rev-parse"; "HEAD"] |> String.trim in 101 + (* Cleanup temporary worktree *) 102 + ignore (Git.run ~proc_mgr ~cwd ["worktree"; "remove"; "-f"; temp_wt_relpath]); 103 + (* Update the branch in the bare repo *) 104 + Git.run_exn ~proc_mgr ~cwd ["branch"; "-f"; branch; new_sha] |> ignore; 105 + Ok (commits_before, commits_after) 106 + | Error e -> 107 + (* Cleanup and return error *) 108 + ignore (Git.run ~proc_mgr ~cwd ["worktree"; "remove"; "-f"; temp_wt_relpath]); 109 + Error (Fmt.str "%a" Git.pp_error e) 110 + 111 + (** Promote a project to a vendored library *) 112 + let promote ~proc_mgr ~root ~project ~backend ~vendor_name = 113 + let git = Worktree.git_dir root in 114 + let name = Option.value ~default:project vendor_name in 115 + 116 + (* Check if project exists *) 117 + if not (Worktree.branch_exists ~proc_mgr root (Worktree.Project project)) then 118 + Project_not_found project 119 + else begin 120 + (* Check if already promoted for this backend *) 121 + let patches_br = patches_branch backend name in 122 + if Git.branch_exists ~proc_mgr ~cwd:git patches_br then 123 + Already_promoted name 124 + else begin 125 + try 126 + Log.info (fun m -> m "Promoting project %s as %s vendor %s..." project (backend_to_string backend) name); 127 + 128 + let project_branch = Worktree.branch (Worktree.Project project) in 129 + 130 + (* Step 1: Create a temporary branch from the project for filtering *) 131 + let temp_branch = "promote-temp-" ^ name in 132 + Git.run_exn ~proc_mgr ~cwd:git ["branch"; "-f"; temp_branch; project_branch] |> ignore; 133 + 134 + (* Step 2: Filter out vendor/ directory from the temp branch *) 135 + let (commits_before, commits_after) = 136 + match filter_vendor_directory ~proc_mgr ~cwd:git ~branch:temp_branch with 137 + | Ok counts -> counts 138 + | Error msg -> 139 + (* Cleanup temp branch *) 140 + ignore (Git.run ~proc_mgr ~cwd:git ["branch"; "-D"; temp_branch]); 141 + failwith msg 142 + in 143 + 144 + Log.info (fun m -> m "Filtered %d -> %d commits" commits_before commits_after); 145 + 146 + (* Step 3: Create upstream branch (filtered, files at root) *) 147 + (* For local projects, upstream is the same as filtered temp - no external upstream *) 148 + Git.run_exn ~proc_mgr ~cwd:git ["branch"; "-f"; upstream_branch backend name; temp_branch] |> ignore; 149 + 150 + (* Step 4: Create vendor branch from upstream and rewrite to vendor path *) 151 + Git.run_exn ~proc_mgr ~cwd:git ["branch"; "-f"; vendor_branch backend name; upstream_branch backend name] |> ignore; 152 + 153 + (* Rewrite vendor branch to move files into vendor/<backend>/<name>/ *) 154 + Git.filter_repo_to_subdirectory ~proc_mgr ~cwd:git 155 + ~branch:(vendor_branch backend name) 156 + ~subdirectory:(vendor_path backend name); 157 + 158 + (* Step 5: Create patches branch from vendor *) 159 + Git.run_exn ~proc_mgr ~cwd:git ["branch"; patches_branch backend name; vendor_branch backend name] |> ignore; 160 + 161 + (* Step 6: Cleanup temp branch *) 162 + ignore (Git.run ~proc_mgr ~cwd:git ["branch"; "-D"; temp_branch]); 163 + 164 + Promoted { 165 + name; 166 + backend; 167 + original_commits = commits_before; 168 + filtered_commits = commits_after 169 + } 170 + with exn -> 171 + (* Cleanup on failure *) 172 + let temp_branch = "promote-temp-" ^ name in 173 + ignore (Git.run ~proc_mgr ~cwd:git ["branch"; "-D"; temp_branch]); 174 + ignore (Git.run ~proc_mgr ~cwd:git ["branch"; "-D"; upstream_branch backend name]); 175 + ignore (Git.run ~proc_mgr ~cwd:git ["branch"; "-D"; vendor_branch backend name]); 176 + Failed { name = project; error = Printexc.to_string exn } 177 + end 178 + end 179 + 180 + (** {1 Remote Management} *) 181 + 182 + (** Remote name for a project *) 183 + let project_remote_name project = "origin-" ^ project 184 + 185 + (** Result of set-remote operation *) 186 + type set_remote_result = 187 + | Remote_set of { project : string; url : string; created : bool } 188 + | Project_not_found of string 189 + | Set_remote_failed of { project : string; error : string } 190 + 191 + (** Set the remote URL for a project *) 192 + let set_remote ~proc_mgr ~root ~project ~url = 193 + let git = Worktree.git_dir root in 194 + 195 + (* Check if project exists *) 196 + if not (Worktree.branch_exists ~proc_mgr root (Worktree.Project project)) then 197 + Project_not_found project 198 + else begin 199 + try 200 + let remote_name = project_remote_name project in 201 + Log.info (fun m -> m "Setting remote %s -> %s for project %s" remote_name url project); 202 + 203 + let created = match Git.ensure_remote ~proc_mgr ~cwd:git ~name:remote_name ~url with 204 + | `Created -> true 205 + | `Updated | `Existed -> false 206 + in 207 + 208 + Remote_set { project; url; created } 209 + with exn -> 210 + Set_remote_failed { project; error = Printexc.to_string exn } 211 + end 212 + 213 + (** Get the remote URL for a project *) 214 + let get_remote ~proc_mgr ~root ~project = 215 + let git = Worktree.git_dir root in 216 + let remote_name = project_remote_name project in 217 + Git.remote_url ~proc_mgr ~cwd:git remote_name 218 + 219 + (** Result of push operation *) 220 + type push_result = 221 + | Pushed of { project : string; branch : string; remote : string } 222 + | No_remote of string 223 + | Project_not_found of string 224 + | Push_failed of { project : string; error : string } 225 + 226 + (** Push a project to its configured remote *) 227 + let push ~proc_mgr ~root ~project = 228 + let git = Worktree.git_dir root in 229 + 230 + (* Check if project exists *) 231 + if not (Worktree.branch_exists ~proc_mgr root (Worktree.Project project)) then 232 + Project_not_found project 233 + else begin 234 + let remote_name = project_remote_name project in 235 + match Git.remote_url ~proc_mgr ~cwd:git remote_name with 236 + | None -> No_remote project 237 + | Some _url -> 238 + try 239 + let branch = Worktree.branch (Worktree.Project project) in 240 + Log.info (fun m -> m "Pushing %s to %s..." branch remote_name); 241 + Git.run_exn ~proc_mgr ~cwd:git ["push"; "-u"; remote_name; branch] |> ignore; 242 + Pushed { project; branch; remote = remote_name } 243 + with exn -> 244 + Push_failed { project; error = Printexc.to_string exn } 245 + end 246 + 247 + (** {1 Project Info} *) 248 + 249 + type project_info = { 250 + name : string; 251 + origin : [`Local | `Vendored]; 252 + remote : string option; 253 + promoted_as : (backend * string) option; (* backend, vendor_name *) 254 + } 255 + 256 + (** Get information about a project *) 257 + let get_info ~proc_mgr ~root ~project = 258 + let git = Worktree.git_dir root in 259 + 260 + if not (Worktree.branch_exists ~proc_mgr root (Worktree.Project project)) then 261 + None 262 + else begin 263 + (* Check for remote *) 264 + let remote = get_remote ~proc_mgr ~root ~project in 265 + 266 + (* Check if promoted - look for opam/patches/<project> or git/patches/<project> *) 267 + let promoted_as = 268 + if Git.branch_exists ~proc_mgr ~cwd:git (patches_branch Opam project) then 269 + Some (Opam, project) 270 + else if Git.branch_exists ~proc_mgr ~cwd:git (patches_branch Git project) then 271 + Some (Git, project) 272 + else 273 + None 274 + in 275 + 276 + Some { 277 + name = project; 278 + origin = `Local; (* All projects created via `unpac project new` are local *) 279 + remote; 280 + promoted_as; 281 + } 282 + end 283 + 284 + (** {1 Export (Unvendor)} *) 285 + 286 + (** Export branch name - where unvendored code goes *) 287 + let export_branch backend name = match backend with 288 + | Opam -> "opam/export/" ^ name 289 + | Git -> "git/export/" ^ name 290 + 291 + (** Result of export operation *) 292 + type export_result = 293 + | Exported of { 294 + name : string; 295 + backend : backend; 296 + source_branch : string; 297 + export_branch : string; 298 + commits : int; 299 + } 300 + | Not_vendored of string 301 + | Already_exported of string 302 + | Export_failed of { name : string; error : string } 303 + 304 + (** Export a vendored package back to root-level files. 305 + This is the inverse of vendoring - takes a vendor branch and creates 306 + an export branch with files moved from vendor/<backend>/<name>/ to root. 307 + 308 + Can export from either vendor/* or patches/* branch. *) 309 + let export ~proc_mgr ~root ~name ~backend ~from_patches = 310 + let git = Worktree.git_dir root in 311 + 312 + (* Determine source branch *) 313 + let source_br = if from_patches then patches_branch backend name 314 + else vendor_branch backend name in 315 + let export_br = export_branch backend name in 316 + let subdir = vendor_path backend name in 317 + 318 + (* Check if source branch exists *) 319 + if not (Git.branch_exists ~proc_mgr ~cwd:git source_br) then 320 + Not_vendored name 321 + else if Git.branch_exists ~proc_mgr ~cwd:git export_br then 322 + Already_exported name 323 + else begin 324 + try 325 + Log.info (fun m -> m "Exporting %s from %s to %s..." name source_br export_br); 326 + 327 + (* Step 1: Create export branch from source *) 328 + Git.run_exn ~proc_mgr ~cwd:git ["branch"; "-f"; export_br; source_br] |> ignore; 329 + 330 + (* Step 2: Count commits before transformation *) 331 + let commits = 332 + int_of_string (String.trim ( 333 + Git.run_exn ~proc_mgr ~cwd:git ["rev-list"; "--count"; export_br])) 334 + in 335 + 336 + (* Step 3: Rewrite export branch to move files from subdirectory to root *) 337 + Git.filter_repo_from_subdirectory ~proc_mgr ~cwd:git 338 + ~branch:export_br 339 + ~subdirectory:subdir; 340 + 341 + Exported { 342 + name; 343 + backend; 344 + source_branch = source_br; 345 + export_branch = export_br; 346 + commits; 347 + } 348 + with exn -> 349 + (* Cleanup on failure *) 350 + ignore (Git.run ~proc_mgr ~cwd:git ["branch"; "-D"; export_br]); 351 + Export_failed { name; error = Printexc.to_string exn } 352 + end 353 + 354 + (** Remote name for export (where we push to) *) 355 + let export_remote_name name = "export-" ^ name 356 + 357 + (** Remote name for upstream (where we fetch from) *) 358 + let upstream_remote_name name = "upstream-" ^ name 359 + 360 + (** Result of export push operation *) 361 + type export_push_result = 362 + | Export_pushed of { 363 + name : string; 364 + backend : backend; 365 + remote : string; 366 + branch : string; 367 + commits : int; 368 + } 369 + | Export_not_found of string 370 + | No_export_remote of string 371 + | Export_push_failed of { name : string; error : string } 372 + 373 + (** Set the remote URL for exporting a package *) 374 + let set_export_remote ~proc_mgr ~root ~name ~url = 375 + let git = Worktree.git_dir root in 376 + let remote_name = export_remote_name name in 377 + Log.info (fun m -> m "Setting export remote %s -> %s" remote_name url); 378 + Git.ensure_remote ~proc_mgr ~cwd:git ~name:remote_name ~url 379 + 380 + (** Get the export remote URL for a package *) 381 + let get_export_remote ~proc_mgr ~root ~name = 382 + let git = Worktree.git_dir root in 383 + let remote_name = export_remote_name name in 384 + Git.remote_url ~proc_mgr ~cwd:git remote_name 385 + 386 + (** Set the remote URL for fetching upstream updates. 387 + This is used for promoted local packages that don't have an opam source URL. *) 388 + let set_upstream_remote ~proc_mgr ~root ~name ~url = 389 + let git = Worktree.git_dir root in 390 + let remote_name = upstream_remote_name name in 391 + Log.info (fun m -> m "Setting upstream remote %s -> %s" remote_name url); 392 + Git.ensure_remote ~proc_mgr ~cwd:git ~name:remote_name ~url 393 + 394 + (** Get the upstream remote URL for a package *) 395 + let get_upstream_remote ~proc_mgr ~root ~name = 396 + let git = Worktree.git_dir root in 397 + let remote_name = upstream_remote_name name in 398 + Git.remote_url ~proc_mgr ~cwd:git remote_name 399 + 400 + (** Push an exported branch to its remote *) 401 + let push_export ~proc_mgr ~root ~name ~backend = 402 + let git = Worktree.git_dir root in 403 + let export_br = export_branch backend name in 404 + let remote_name = export_remote_name name in 405 + 406 + (* Check if export branch exists *) 407 + if not (Git.branch_exists ~proc_mgr ~cwd:git export_br) then 408 + Export_not_found name 409 + else begin 410 + match Git.remote_url ~proc_mgr ~cwd:git remote_name with 411 + | None -> No_export_remote name 412 + | Some _url -> 413 + try 414 + (* Count commits *) 415 + let commits = 416 + int_of_string (String.trim ( 417 + Git.run_exn ~proc_mgr ~cwd:git ["rev-list"; "--count"; export_br])) 418 + in 419 + 420 + Log.info (fun m -> m "Pushing %s to %s..." export_br remote_name); 421 + (* Push the export branch - push to main/master on the remote *) 422 + Git.run_exn ~proc_mgr ~cwd:git [ 423 + "push"; "-u"; remote_name; 424 + export_br ^ ":main" (* Push export branch as 'main' on remote *) 425 + ] |> ignore; 426 + 427 + Export_pushed { 428 + name; 429 + backend; 430 + remote = remote_name; 431 + branch = export_br; 432 + commits; 433 + } 434 + with exn -> 435 + Export_push_failed { name; error = Printexc.to_string exn } 436 + end 437 + 438 + (** List all exported packages *) 439 + let list_exports ~proc_mgr ~root = 440 + let git = Worktree.git_dir root in 441 + let branches = Git.run_lines ~proc_mgr ~cwd:git ["branch"; "--list"; "*/export/*"] in 442 + List.filter_map (fun line -> 443 + let branch = String.trim line in 444 + let branch = if String.length branch > 0 && branch.[0] = '*' then 445 + String.trim (String.sub branch 1 (String.length branch - 1)) 446 + else branch in 447 + (* Parse backend/export/name *) 448 + match String.split_on_char '/' branch with 449 + | [backend_str; "export"; name] -> 450 + (match backend_of_string backend_str with 451 + | Some backend -> Some (backend, name) 452 + | None -> None) 453 + | _ -> None 454 + ) branches
+292
lib/promote.mli
··· 1 + (** Project promotion to vendor library. 2 + 3 + Promotes a locally-developed project to a vendored library by: 4 + 1. Filtering out the vendor/ directory from the project history 5 + 2. Creating vendor branches (upstream/vendor/patches) for the specified backend 6 + 3. Recording the promotion in the audit log 7 + 8 + This allows the project to be merged into other projects as a dependency. *) 9 + 10 + (** {1 Backend Types} *) 11 + 12 + (** Vendor backend for the promoted library *) 13 + type backend = 14 + | Opam (** OCaml package - creates opam/* branches, vendor/opam/<name>/ path *) 15 + | Git (** Git repository - creates git/* branches, vendor/git/<name>/ path *) 16 + 17 + val backend_of_string : string -> backend option 18 + (** Parse backend from string: "opam" or "git" *) 19 + 20 + val backend_to_string : backend -> string 21 + (** Convert backend to string *) 22 + 23 + (** {1 Branch Names} *) 24 + 25 + val upstream_branch : backend -> string -> string 26 + (** [upstream_branch backend name] returns the upstream branch name, 27 + e.g., "opam/upstream/brotli" or "git/upstream/brotli" *) 28 + 29 + val vendor_branch : backend -> string -> string 30 + (** [vendor_branch backend name] returns the vendor branch name *) 31 + 32 + val patches_branch : backend -> string -> string 33 + (** [patches_branch backend name] returns the patches branch name *) 34 + 35 + val vendor_path : backend -> string -> string 36 + (** [vendor_path backend name] returns the vendor directory path, 37 + e.g., "vendor/opam/brotli" or "vendor/git/brotli" *) 38 + 39 + (** {1 Promotion} *) 40 + 41 + (** Result of a promote operation *) 42 + type promote_result = 43 + | Promoted of { 44 + name : string; (** Vendor library name *) 45 + backend : backend; (** Backend used *) 46 + original_commits : int; (** Commits in project before filtering *) 47 + filtered_commits : int; (** Commits after removing vendor/ *) 48 + } 49 + | Already_promoted of string 50 + (** Library already exists with this name *) 51 + | Project_not_found of string 52 + (** Source project does not exist *) 53 + | Failed of { name : string; error : string } 54 + (** Promotion failed *) 55 + 56 + val promote : 57 + proc_mgr:Git.proc_mgr -> 58 + root:Worktree.root -> 59 + project:string -> 60 + backend:backend -> 61 + vendor_name:string option -> 62 + promote_result 63 + (** [promote ~proc_mgr ~root ~project ~backend ~vendor_name] promotes 64 + a local project to a vendored library. 65 + 66 + The operation: 67 + 1. Checks that the project exists and hasn't been promoted yet 68 + 2. Creates a filtered copy of project history (excluding vendor/) 69 + 3. Creates upstream/vendor/patches branches for the backend 70 + 4. The original project branch is preserved unchanged 71 + 72 + @param project Name of the project to promote (e.g., "brotli") 73 + @param backend Backend type (Opam or Git) 74 + @param vendor_name Optional override for the vendor library name 75 + 76 + After promotion, the library can be merged into other projects using: 77 + - [unpac opam merge <name> <project>] for Opam backend 78 + - [unpac git merge <name> <project>] for Git backend *) 79 + 80 + (** {1 Remote Management} *) 81 + 82 + val project_remote_name : string -> string 83 + (** [project_remote_name project] returns the git remote name for a project, 84 + e.g., "origin-brotli" *) 85 + 86 + (** Result of set-remote operation *) 87 + type set_remote_result = 88 + | Remote_set of { project : string; url : string; created : bool } 89 + | Project_not_found of string 90 + | Set_remote_failed of { project : string; error : string } 91 + 92 + val set_remote : 93 + proc_mgr:Git.proc_mgr -> 94 + root:Worktree.root -> 95 + project:string -> 96 + url:string -> 97 + set_remote_result 98 + (** [set_remote ~proc_mgr ~root ~project ~url] sets the remote URL for a project. 99 + 100 + Creates or updates a git remote named "origin-<project>" pointing to the URL. 101 + This allows the project to be pushed independently using [push]. *) 102 + 103 + val get_remote : 104 + proc_mgr:Git.proc_mgr -> 105 + root:Worktree.root -> 106 + project:string -> 107 + string option 108 + (** [get_remote ~proc_mgr ~root ~project] returns the remote URL for a project, 109 + or None if no remote is configured. *) 110 + 111 + (** Result of push operation *) 112 + type push_result = 113 + | Pushed of { project : string; branch : string; remote : string } 114 + | No_remote of string 115 + | Project_not_found of string 116 + | Push_failed of { project : string; error : string } 117 + 118 + val push : 119 + proc_mgr:Git.proc_mgr -> 120 + root:Worktree.root -> 121 + project:string -> 122 + push_result 123 + (** [push ~proc_mgr ~root ~project] pushes a project to its configured remote. 124 + 125 + Pushes the project/<name> branch to the remote configured via [set_remote]. 126 + Returns [No_remote] if no remote has been configured. *) 127 + 128 + (** {1 Project Info} *) 129 + 130 + type project_info = { 131 + name : string; 132 + origin : [`Local | `Vendored]; 133 + remote : string option; 134 + promoted_as : (backend * string) option; (** backend, vendor_name *) 135 + } 136 + 137 + val get_info : 138 + proc_mgr:Git.proc_mgr -> 139 + root:Worktree.root -> 140 + project:string -> 141 + project_info option 142 + (** [get_info ~proc_mgr ~root ~project] returns information about a project, 143 + or None if the project doesn't exist. *) 144 + 145 + (** {1 Export (Unvendor)} 146 + 147 + Export reverses the vendoring process, creating a branch with files 148 + at the repository root suitable for pushing to an external git repo. 149 + 150 + This is the inverse of vendoring: 151 + - Vendoring: files at root → files in vendor/<backend>/<name>/ 152 + - Exporting: files in vendor/<backend>/<name>/ → files at root *) 153 + 154 + val export_branch : backend -> string -> string 155 + (** [export_branch backend name] returns the export branch name, 156 + e.g., "opam/export/brotli" or "git/export/brotli" *) 157 + 158 + (** Result of export operation *) 159 + type export_result = 160 + | Exported of { 161 + name : string; (** Package name *) 162 + backend : backend; (** Backend used *) 163 + source_branch : string; (** Branch exported from (vendor or patches) *) 164 + export_branch : string; (** Created export branch *) 165 + commits : int; (** Number of commits in export *) 166 + } 167 + | Not_vendored of string 168 + (** No vendor branch exists for this package *) 169 + | Already_exported of string 170 + (** Export branch already exists *) 171 + | Export_failed of { name : string; error : string } 172 + (** Export operation failed *) 173 + 174 + val export : 175 + proc_mgr:Git.proc_mgr -> 176 + root:Worktree.root -> 177 + name:string -> 178 + backend:backend -> 179 + from_patches:bool -> 180 + export_result 181 + (** [export ~proc_mgr ~root ~name ~backend ~from_patches] exports a vendored 182 + package back to root-level files. 183 + 184 + Creates an export branch where files are moved from [vendor/<backend>/<name>/] 185 + to the repository root. This branch can then be pushed to an upstream repo. 186 + 187 + @param name The vendored package name 188 + @param backend The backend (Opam or Git) 189 + @param from_patches If true, exports from patches/* branch (includes local mods); 190 + if false, exports from vendor/* branch (pristine upstream) 191 + 192 + The export branch is named [<backend>/export/<name>], e.g., "git/export/brotli". 193 + 194 + Example workflow: 195 + {[ 196 + (* Export with local patches *) 197 + export ~from_patches:true ... 198 + 199 + (* Set remote and push *) 200 + set_export_remote ~url:"git@github.com:me/brotli.git" ... 201 + push_export ... 202 + ]} *) 203 + 204 + val export_remote_name : string -> string 205 + (** [export_remote_name name] returns the git remote name for exports, 206 + e.g., "export-brotli" *) 207 + 208 + val set_export_remote : 209 + proc_mgr:Git.proc_mgr -> 210 + root:Worktree.root -> 211 + name:string -> 212 + url:string -> 213 + [ `Created | `Existed | `Updated ] 214 + (** [set_export_remote ~proc_mgr ~root ~name ~url] sets the remote URL 215 + for pushing exports of a package. Creates remote "export-<name>". *) 216 + 217 + val get_export_remote : 218 + proc_mgr:Git.proc_mgr -> 219 + root:Worktree.root -> 220 + name:string -> 221 + string option 222 + (** [get_export_remote ~proc_mgr ~root ~name] returns the export remote URL, 223 + or None if no export remote is configured. *) 224 + 225 + (** {2 Upstream Remote} 226 + 227 + The upstream remote is where we fetch updates from. For packages added 228 + via [opam add], the upstream is automatically configured. For promoted 229 + local projects, use [set_upstream_remote] to configure where updates 230 + should be fetched from. *) 231 + 232 + val upstream_remote_name : string -> string 233 + (** [upstream_remote_name name] returns the git remote name for upstream, 234 + e.g., "upstream-brotli" *) 235 + 236 + val set_upstream_remote : 237 + proc_mgr:Git.proc_mgr -> 238 + root:Worktree.root -> 239 + name:string -> 240 + url:string -> 241 + [ `Created | `Existed | `Updated ] 242 + (** [set_upstream_remote ~proc_mgr ~root ~name ~url] sets the remote URL 243 + for fetching upstream updates. Creates remote "upstream-<name>". 244 + 245 + This is used by [opam update] to fetch new changes. For promoted local 246 + projects, this typically points to the same repo as the export remote. *) 247 + 248 + val get_upstream_remote : 249 + proc_mgr:Git.proc_mgr -> 250 + root:Worktree.root -> 251 + name:string -> 252 + string option 253 + (** [get_upstream_remote ~proc_mgr ~root ~name] returns the upstream remote URL, 254 + or None if no upstream remote is configured. *) 255 + 256 + (** Result of export push operation *) 257 + type export_push_result = 258 + | Export_pushed of { 259 + name : string; 260 + backend : backend; 261 + remote : string; 262 + branch : string; 263 + commits : int; 264 + } 265 + | Export_not_found of string 266 + (** No export branch exists for this package *) 267 + | No_export_remote of string 268 + (** No export remote configured *) 269 + | Export_push_failed of { name : string; error : string } 270 + (** Push operation failed *) 271 + 272 + val push_export : 273 + proc_mgr:Git.proc_mgr -> 274 + root:Worktree.root -> 275 + name:string -> 276 + backend:backend -> 277 + export_push_result 278 + (** [push_export ~proc_mgr ~root ~name ~backend] pushes an export branch 279 + to its configured remote. 280 + 281 + Pushes the [<backend>/export/<name>] branch to the remote configured 282 + via [set_export_remote], targeting the 'main' branch on the remote. 283 + 284 + Returns [Export_not_found] if the package hasn't been exported yet. 285 + Returns [No_export_remote] if no remote has been configured. *) 286 + 287 + val list_exports : 288 + proc_mgr:Git.proc_mgr -> 289 + root:Worktree.root -> 290 + (backend * string) list 291 + (** [list_exports ~proc_mgr ~root] returns all exported packages as 292 + (backend, name) pairs. *)
+15
lib/unpac.ml
··· 1 + (** Unpac - Multi-backend vendoring library using git worktrees. *) 2 + 3 + (** {1 Core Modules} *) 4 + 5 + module Git = Git 6 + module Git_repo_lookup = Git_repo_lookup 7 + module Worktree = Worktree 8 + module Config = Config 9 + module Vendor_cache = Vendor_cache 10 + module Init = Init 11 + module Backend = Backend 12 + module Audit = Audit 13 + module Git_backend = Git_backend 14 + module Promote = Promote 15 + module Monorepo = Monorepo
+157
lib/vendor_cache.ml
··· 1 + (** Vendor cache - a persistent bare git repository for caching upstream fetches. 2 + 3 + The cache stores fetched repositories as remotes/branches, allowing multiple 4 + unpac projects to share fetched content without re-downloading. *) 5 + 6 + (** {1 Types} *) 7 + 8 + type t = Eio.Fs.dir_ty Eio.Path.t 9 + (** Path to the cache bare repository *) 10 + 11 + (** {1 Cache Location} *) 12 + 13 + let default_path () = 14 + let cache_home = 15 + match Sys.getenv_opt "XDG_CACHE_HOME" with 16 + | Some dir -> dir 17 + | None -> 18 + match Sys.getenv_opt "HOME" with 19 + | Some home -> Filename.concat home ".cache" 20 + | None -> "/tmp" 21 + in 22 + Filename.concat cache_home "unpac/vendor-cache" 23 + 24 + (** {1 Initialization} *) 25 + 26 + let init ~proc_mgr ~fs ?path () = 27 + let cache_path = match path with 28 + | Some p -> p 29 + | None -> default_path () 30 + in 31 + let cache = Eio.Path.(fs / cache_path) in 32 + 33 + (* Check if already initialized *) 34 + if Eio.Path.is_directory cache then 35 + cache 36 + else begin 37 + (* Create parent directories *) 38 + let parent = Filename.dirname cache_path in 39 + let parent_path = Eio.Path.(fs / parent) in 40 + Eio.Path.mkdirs ~exists_ok:true ~perm:0o755 parent_path; 41 + 42 + (* Initialize bare repository *) 43 + Eio.Path.mkdirs ~exists_ok:false ~perm:0o755 cache; 44 + Git.run_exn ~proc_mgr ~cwd:cache ["init"; "--bare"] |> ignore; 45 + cache 46 + end 47 + 48 + (** {1 Remote Naming} 49 + 50 + We use URL-based remote names to avoid conflicts. 51 + e.g., "github.com/dbuenzli/astring" for https://github.com/dbuenzli/astring.git *) 52 + 53 + let url_to_remote_name url = 54 + (* Strip protocol and .git suffix *) 55 + let url = 56 + let prefixes = ["https://"; "http://"; "git://"; "ssh://"; "git@"] in 57 + List.fold_left (fun u prefix -> 58 + if String.starts_with ~prefix u then 59 + String.sub u (String.length prefix) (String.length u - String.length prefix) 60 + else u 61 + ) url prefixes 62 + in 63 + let url = 64 + if String.ends_with ~suffix:".git" url then 65 + String.sub url 0 (String.length url - 4) 66 + else url 67 + in 68 + (* Replace : with / for git@ style URLs *) 69 + String.map (fun c -> if c = ':' then '/' else c) url 70 + 71 + let branch_name ~remote ~branch = 72 + remote ^ "/" ^ branch 73 + 74 + (** {1 Cache Operations} *) 75 + 76 + let has_remote ~proc_mgr cache remote_name = 77 + match Git.remote_url ~proc_mgr ~cwd:cache remote_name with 78 + | Some _ -> true 79 + | None -> false 80 + 81 + let ensure_remote ~proc_mgr cache ~url = 82 + let remote_name = url_to_remote_name url in 83 + if has_remote ~proc_mgr cache remote_name then 84 + remote_name 85 + else begin 86 + Git.run_exn ~proc_mgr ~cwd:cache 87 + ["remote"; "add"; remote_name; url] |> ignore; 88 + remote_name 89 + end 90 + 91 + let fetch ~proc_mgr cache ~url = 92 + let remote_name = ensure_remote ~proc_mgr cache ~url in 93 + Git.fetch ~proc_mgr ~cwd:cache ~remote:remote_name; 94 + remote_name 95 + 96 + let get_ref ~proc_mgr cache ~url ~branch = 97 + let remote_name = url_to_remote_name url in 98 + let ref_name = branch_name ~remote:remote_name ~branch in 99 + match Git.rev_parse ~proc_mgr ~cwd:cache ref_name with 100 + | Some sha -> Some sha 101 + | None -> None 102 + 103 + (** Fetch to cache, then clone ref into project's bare repo *) 104 + let fetch_to_project ~proc_mgr ~cache ~project_git ~url ~branch = 105 + (* First, fetch to cache (include tags, force update to avoid conflicts) *) 106 + let remote_name = ensure_remote ~proc_mgr cache ~url in 107 + Git.run_exn ~proc_mgr ~cwd:cache 108 + ["fetch"; "--tags"; "--force"; remote_name] |> ignore; 109 + 110 + (* Determine if this is a branch or tag *) 111 + let branch_ref = branch_name ~remote:remote_name ~branch in 112 + let tag_ref = "refs/tags/" ^ branch in 113 + 114 + (* Check which ref exists in cache *) 115 + let cache_ref = 116 + match Git.rev_parse ~proc_mgr ~cwd:cache branch_ref with 117 + | Some _ -> branch_ref 118 + | None -> 119 + (* Try as a tag *) 120 + match Git.rev_parse ~proc_mgr ~cwd:cache tag_ref with 121 + | Some _ -> tag_ref 122 + | None -> failwith (Printf.sprintf "Ref not found: %s (tried branch %s and tag %s)" 123 + branch branch_ref tag_ref) 124 + in 125 + 126 + (* Now fetch from cache into project *) 127 + let cache_path = snd cache in 128 + 129 + (* Add cache as a remote in project if not exists *) 130 + let cache_remote = "vendor-cache" in 131 + (match Git.remote_url ~proc_mgr ~cwd:project_git cache_remote with 132 + | None -> 133 + Git.run_exn ~proc_mgr ~cwd:project_git 134 + ["remote"; "add"; cache_remote; cache_path] |> ignore 135 + | Some _ -> ()); 136 + 137 + (* Fetch the specific ref from cache *) 138 + Git.run_exn ~proc_mgr ~cwd:project_git 139 + ["fetch"; cache_remote; cache_ref ^ ":" ^ cache_ref] |> ignore; 140 + 141 + cache_ref 142 + 143 + (** {1 Listing} *) 144 + 145 + let list_remotes ~proc_mgr cache = 146 + Git.run_lines ~proc_mgr ~cwd:cache ["remote"] 147 + 148 + let list_branches ~proc_mgr cache = 149 + Git.run_lines ~proc_mgr ~cwd:cache ["branch"; "-a"] 150 + |> List.filter_map (fun line -> 151 + let line = String.trim line in 152 + if String.starts_with ~prefix:"* " line then 153 + Some (String.sub line 2 (String.length line - 2)) 154 + else if line <> "" then 155 + Some line 156 + else 157 + None)
+63
lib/vendor_cache.mli
··· 1 + (** Vendor cache - a persistent bare git repository for caching upstream fetches. 2 + 3 + The cache stores fetched repositories as remotes/branches, allowing multiple 4 + unpac projects to share fetched content without re-downloading. *) 5 + 6 + (** {1 Types} *) 7 + 8 + type t = Eio.Fs.dir_ty Eio.Path.t 9 + (** Path to the cache bare repository *) 10 + 11 + (** {1 Cache Location} *) 12 + 13 + val default_path : unit -> string 14 + (** Returns the default cache path (XDG_CACHE_HOME/unpac/vendor-cache) *) 15 + 16 + (** {1 Initialization} *) 17 + 18 + val init : proc_mgr:Git.proc_mgr -> fs:Eio.Fs.dir_ty Eio.Path.t -> ?path:string -> unit -> t 19 + (** [init ~proc_mgr ~fs ?path ()] initializes and returns the cache. 20 + Creates the bare repository if it doesn't exist. 21 + @param path Optional custom cache path. Uses default if not provided. *) 22 + 23 + (** {1 Remote Naming} *) 24 + 25 + val url_to_remote_name : string -> string 26 + (** [url_to_remote_name url] converts a git URL to a remote name. 27 + e.g., "https://github.com/dbuenzli/astring.git" -> "github.com/dbuenzli/astring" *) 28 + 29 + val branch_name : remote:string -> branch:string -> string 30 + (** [branch_name ~remote ~branch] returns the full branch name in cache. *) 31 + 32 + (** {1 Cache Operations} *) 33 + 34 + val has_remote : proc_mgr:Git.proc_mgr -> t -> string -> bool 35 + (** [has_remote ~proc_mgr cache name] checks if a remote exists in cache. *) 36 + 37 + val ensure_remote : proc_mgr:Git.proc_mgr -> t -> url:string -> string 38 + (** [ensure_remote ~proc_mgr cache ~url] adds remote if needed, returns remote name. *) 39 + 40 + val fetch : proc_mgr:Git.proc_mgr -> t -> url:string -> string 41 + (** [fetch ~proc_mgr cache ~url] fetches from URL into cache, returns remote name. *) 42 + 43 + val get_ref : proc_mgr:Git.proc_mgr -> t -> url:string -> branch:string -> string option 44 + (** [get_ref ~proc_mgr cache ~url ~branch] returns the SHA for a cached ref. *) 45 + 46 + val fetch_to_project : 47 + proc_mgr:Git.proc_mgr -> 48 + cache:t -> 49 + project_git:Eio.Fs.dir_ty Eio.Path.t -> 50 + url:string -> 51 + branch:string -> 52 + string 53 + (** [fetch_to_project ~proc_mgr ~cache ~project_git ~url ~branch] 54 + fetches from upstream to cache, then from cache to project's bare repo. 55 + Returns the cache ref name. *) 56 + 57 + (** {1 Listing} *) 58 + 59 + val list_remotes : proc_mgr:Git.proc_mgr -> t -> string list 60 + (** List all remotes in the cache. *) 61 + 62 + val list_branches : proc_mgr:Git.proc_mgr -> t -> string list 63 + (** List all branches in the cache. *)
+212
lib/worktree.ml
··· 1 + (** Git worktree lifecycle management for unpac. 2 + 3 + Manages creation, cleanup, and paths of worktrees within the unpac 4 + directory structure. All branch operations happen in isolated worktrees. *) 5 + 6 + (** {1 Types} *) 7 + 8 + type root = Eio.Fs.dir_ty Eio.Path.t 9 + (** The unpac project root directory (contains git/, main/, etc.) *) 10 + 11 + type kind = 12 + | Main 13 + | Project of string 14 + | Opam_upstream of string 15 + | Opam_vendor of string 16 + | Opam_patches of string 17 + | Git_upstream of string 18 + | Git_vendor of string 19 + | Git_patches of string 20 + (** Worktree kinds with their associated names. 21 + Opam_* variants are for opam package vendoring. 22 + Git_* variants are for direct git repository vendoring. *) 23 + 24 + (** {1 Path and Branch Helpers} *) 25 + 26 + let git_dir root = Eio.Path.(root / "git") 27 + (** Path to the bare git repository. *) 28 + 29 + let path root = function 30 + | Main -> Eio.Path.(root / "main") 31 + | Project name -> Eio.Path.(root / "project" / name) 32 + | Opam_upstream name -> Eio.Path.(root / "opam" / "upstream" / name) 33 + | Opam_vendor name -> Eio.Path.(root / "opam" / "vendor" / name) 34 + | Opam_patches name -> Eio.Path.(root / "opam" / "patches" / name) 35 + | Git_upstream name -> Eio.Path.(root / "git-repos" / "upstream" / name) 36 + | Git_vendor name -> Eio.Path.(root / "git-repos" / "vendor" / name) 37 + | Git_patches name -> Eio.Path.(root / "git-repos" / "patches" / name) 38 + 39 + let branch = function 40 + | Main -> "main" 41 + | Project name -> "project/" ^ name 42 + | Opam_upstream name -> "opam/upstream/" ^ name 43 + | Opam_vendor name -> "opam/vendor/" ^ name 44 + | Opam_patches name -> "opam/patches/" ^ name 45 + | Git_upstream name -> "git/upstream/" ^ name 46 + | Git_vendor name -> "git/vendor/" ^ name 47 + | Git_patches name -> "git/patches/" ^ name 48 + 49 + let relative_path = function 50 + | Main -> "main" 51 + | Project name -> "project/" ^ name 52 + | Opam_upstream name -> "opam/upstream/" ^ name 53 + | Opam_vendor name -> "opam/vendor/" ^ name 54 + | Opam_patches name -> "opam/patches/" ^ name 55 + | Git_upstream name -> "git-repos/upstream/" ^ name 56 + | Git_vendor name -> "git-repos/vendor/" ^ name 57 + | Git_patches name -> "git-repos/patches/" ^ name 58 + 59 + (** {1 Queries} *) 60 + 61 + let exists root kind = 62 + let p = path root kind in 63 + Eio.Path.is_directory p 64 + 65 + let branch_exists ~proc_mgr root kind = 66 + let git = git_dir root in 67 + Git.branch_exists ~proc_mgr ~cwd:git (branch kind) 68 + 69 + (** {1 Operations} *) 70 + 71 + let ensure ~proc_mgr root kind = 72 + if exists root kind then () 73 + else begin 74 + let git = git_dir root in 75 + let wt_path = path root kind in 76 + let rel_path = "../" ^ relative_path kind in (* Relative to git/ dir *) 77 + let br = branch kind in 78 + 79 + (* Ensure parent directories exist *) 80 + let parent = Eio.Path.split wt_path |> Option.map fst in 81 + Option.iter (fun p -> Eio.Path.mkdirs ~exists_ok:true ~perm:0o755 p) parent; 82 + 83 + (* Create worktree *) 84 + Git.run_exn ~proc_mgr ~cwd:git 85 + ["worktree"; "add"; rel_path; br] |> ignore 86 + end 87 + 88 + let ensure_orphan ~proc_mgr root kind = 89 + if exists root kind then () 90 + else begin 91 + let git = git_dir root in 92 + let wt_path = path root kind in 93 + let rel_path = "../" ^ relative_path kind in (* Relative to git/ dir *) 94 + let br = branch kind in 95 + 96 + (* Ensure parent directories exist *) 97 + let parent = Eio.Path.split wt_path |> Option.map fst in 98 + Option.iter (fun p -> Eio.Path.mkdirs ~exists_ok:true ~perm:0o755 p) parent; 99 + 100 + (* Create a detached worktree from main branch, then make it an orphan *) 101 + let start_commit = Git.run_exn ~proc_mgr ~cwd:git ["rev-parse"; "main"] |> String.trim in 102 + Git.run_exn ~proc_mgr ~cwd:git 103 + ["worktree"; "add"; "--detach"; rel_path; start_commit] |> ignore; 104 + 105 + (* Now in the worktree, create an orphan branch and clear files *) 106 + Git.run_exn ~proc_mgr ~cwd:wt_path ["checkout"; "--orphan"; br] |> ignore; 107 + (* Remove all tracked files from index *) 108 + Git.run_exn ~proc_mgr ~cwd:wt_path ["rm"; "-rf"; "--cached"; "."] |> ignore; 109 + (* Clean the working directory *) 110 + Git.run_exn ~proc_mgr ~cwd:wt_path ["clean"; "-fd"] |> ignore 111 + end 112 + 113 + let ensure_detached ~proc_mgr root kind ~commit = 114 + if exists root kind then () 115 + else begin 116 + let git = git_dir root in 117 + let wt_path = path root kind in 118 + let rel_path = "../" ^ relative_path kind in (* Relative to git/ dir *) 119 + 120 + (* Ensure parent directories exist *) 121 + let parent = Eio.Path.split wt_path |> Option.map fst in 122 + Option.iter (fun p -> Eio.Path.mkdirs ~exists_ok:true ~perm:0o755 p) parent; 123 + 124 + (* Create detached worktree at commit *) 125 + Git.run_exn ~proc_mgr ~cwd:git 126 + ["worktree"; "add"; "--detach"; rel_path; commit] |> ignore 127 + end 128 + 129 + let remove ~proc_mgr root kind = 130 + if not (exists root kind) then () 131 + else begin 132 + let git = git_dir root in 133 + let rel_path = "../" ^ relative_path kind in (* Relative to git/ dir *) 134 + Git.run_exn ~proc_mgr ~cwd:git 135 + ["worktree"; "remove"; rel_path] |> ignore 136 + end 137 + 138 + let remove_force ~proc_mgr root kind = 139 + if not (exists root kind) then () 140 + else begin 141 + let git = git_dir root in 142 + let rel_path = "../" ^ relative_path kind in (* Relative to git/ dir *) 143 + Git.run_exn ~proc_mgr ~cwd:git 144 + ["worktree"; "remove"; "--force"; rel_path] |> ignore 145 + end 146 + 147 + let with_temp ~proc_mgr root kind f = 148 + ensure ~proc_mgr root kind; 149 + Fun.protect 150 + ~finally:(fun () -> remove ~proc_mgr root kind) 151 + (fun () -> f (path root kind)) 152 + 153 + let with_temp_orphan ~proc_mgr root kind f = 154 + ensure_orphan ~proc_mgr root kind; 155 + Fun.protect 156 + ~finally:(fun () -> remove ~proc_mgr root kind) 157 + (fun () -> f (path root kind)) 158 + 159 + (** {1 Listing} *) 160 + 161 + let list_worktrees ~proc_mgr root = 162 + let git = git_dir root in 163 + Git.run_lines ~proc_mgr ~cwd:git ["worktree"; "list"; "--porcelain"] 164 + |> List.filter_map (fun line -> 165 + if String.starts_with ~prefix:"worktree " line then 166 + Some (String.sub line 9 (String.length line - 9)) 167 + else None) 168 + 169 + let list_projects ~proc_mgr root = 170 + let git = git_dir root in 171 + Git.run_lines ~proc_mgr ~cwd:git ["branch"; "--list"; "project/*"] 172 + |> List.filter_map (fun line -> 173 + let line = String.trim line in 174 + (* Strip "* " (current) or "+ " (linked worktree) prefix *) 175 + let line = 176 + if String.starts_with ~prefix:"* " line || String.starts_with ~prefix:"+ " line 177 + then String.sub line 2 (String.length line - 2) 178 + else line 179 + in 180 + if String.starts_with ~prefix:"project/" line then 181 + Some (String.sub line 8 (String.length line - 8)) 182 + else None) 183 + 184 + let list_opam_packages ~proc_mgr root = 185 + let git = git_dir root in 186 + Git.run_lines ~proc_mgr ~cwd:git ["branch"; "--list"; "opam/patches/*"] 187 + |> List.filter_map (fun line -> 188 + let line = String.trim line in 189 + (* Strip "* " (current) or "+ " (linked worktree) prefix *) 190 + let line = 191 + if String.starts_with ~prefix:"* " line || String.starts_with ~prefix:"+ " line 192 + then String.sub line 2 (String.length line - 2) 193 + else line 194 + in 195 + if String.starts_with ~prefix:"opam/patches/" line then 196 + Some (String.sub line 13 (String.length line - 13)) 197 + else None) 198 + 199 + let list_git_repos ~proc_mgr root = 200 + let git = git_dir root in 201 + Git.run_lines ~proc_mgr ~cwd:git ["branch"; "--list"; "git/patches/*"] 202 + |> List.filter_map (fun line -> 203 + let line = String.trim line in 204 + (* Strip "* " (current) or "+ " (linked worktree) prefix *) 205 + let line = 206 + if String.starts_with ~prefix:"* " line || String.starts_with ~prefix:"+ " line 207 + then String.sub line 2 (String.length line - 2) 208 + else line 209 + in 210 + if String.starts_with ~prefix:"git/patches/" line then 211 + Some (String.sub line 12 (String.length line - 12)) 212 + else None)
+110
lib/worktree.mli
··· 1 + (** Git worktree lifecycle management for unpac. 2 + 3 + Manages creation, cleanup, and paths of worktrees within the unpac 4 + directory structure. All branch operations happen in isolated worktrees. 5 + 6 + {2 Directory Structure} 7 + 8 + An unpac project has this layout: 9 + {v 10 + my-project/ 11 + ├── git/ # Bare repository 12 + ├── main/ # Worktree → main branch 13 + ├── project/ 14 + │ └── myapp/ # Worktree → project/myapp 15 + ├── opam/ 16 + │ ├── upstream/ 17 + │ │ └── pkg/ # Worktree → opam/upstream/pkg 18 + │ ├── vendor/ 19 + │ │ └── pkg/ # Worktree → opam/vendor/pkg 20 + │ └── patches/ 21 + │ └── pkg/ # Worktree → opam/patches/pkg 22 + └── git-repos/ 23 + ├── upstream/ 24 + │ └── repo/ # Worktree → git/upstream/repo 25 + ├── vendor/ 26 + │ └── repo/ # Worktree → git/vendor/repo 27 + └── patches/ 28 + └── repo/ # Worktree → git/patches/repo 29 + v} *) 30 + 31 + (** {1 Types} *) 32 + 33 + type root = Eio.Fs.dir_ty Eio.Path.t 34 + (** The unpac project root directory (contains git/, main/, etc.) *) 35 + 36 + type kind = 37 + | Main 38 + | Project of string 39 + | Opam_upstream of string 40 + | Opam_vendor of string 41 + | Opam_patches of string 42 + | Git_upstream of string 43 + | Git_vendor of string 44 + | Git_patches of string 45 + (** Worktree kinds with their associated names. 46 + Opam_* variants are for opam package vendoring. 47 + Git_* variants are for direct git repository vendoring. *) 48 + 49 + (** {1 Path and Branch Helpers} *) 50 + 51 + val git_dir : root -> Eio.Fs.dir_ty Eio.Path.t 52 + (** [git_dir root] returns the path to the bare git repository. *) 53 + 54 + val path : root -> kind -> Eio.Fs.dir_ty Eio.Path.t 55 + (** [path root kind] returns the filesystem path for the worktree. *) 56 + 57 + val branch : kind -> string 58 + (** [branch kind] returns the git branch name for the worktree kind. *) 59 + 60 + (** {1 Queries} *) 61 + 62 + val exists : root -> kind -> bool 63 + (** [exists root kind] checks if the worktree directory exists. *) 64 + 65 + val branch_exists : proc_mgr:Git.proc_mgr -> root -> kind -> bool 66 + (** [branch_exists ~proc_mgr root kind] checks if the branch exists in git. *) 67 + 68 + (** {1 Operations} *) 69 + 70 + val ensure : proc_mgr:Git.proc_mgr -> root -> kind -> unit 71 + (** [ensure ~proc_mgr root kind] creates the worktree if it doesn't exist. 72 + The branch must already exist. *) 73 + 74 + val ensure_orphan : proc_mgr:Git.proc_mgr -> root -> kind -> unit 75 + (** [ensure_orphan ~proc_mgr root kind] creates an orphan worktree. 76 + Creates a new orphan branch. *) 77 + 78 + val ensure_detached : proc_mgr:Git.proc_mgr -> root -> kind -> commit:string -> unit 79 + (** [ensure_detached ~proc_mgr root kind ~commit] creates a detached worktree 80 + at the given commit. Does not create a branch. *) 81 + 82 + val remove : proc_mgr:Git.proc_mgr -> root -> kind -> unit 83 + (** [remove ~proc_mgr root kind] removes the worktree (keeps the branch). *) 84 + 85 + val remove_force : proc_mgr:Git.proc_mgr -> root -> kind -> unit 86 + (** [remove_force ~proc_mgr root kind] forcibly removes the worktree. *) 87 + 88 + val with_temp : proc_mgr:Git.proc_mgr -> root -> kind -> (Eio.Fs.dir_ty Eio.Path.t -> 'a) -> 'a 89 + (** [with_temp ~proc_mgr root kind f] creates the worktree, runs [f] with 90 + the worktree path, then removes the worktree. *) 91 + 92 + val with_temp_orphan : proc_mgr:Git.proc_mgr -> root -> kind -> (Eio.Fs.dir_ty Eio.Path.t -> 'a) -> 'a 93 + (** [with_temp_orphan ~proc_mgr root kind f] creates an orphan worktree, 94 + runs [f], then removes the worktree. *) 95 + 96 + (** {1 Listing} *) 97 + 98 + val list_worktrees : proc_mgr:Git.proc_mgr -> root -> string list 99 + (** [list_worktrees ~proc_mgr root] returns paths of all worktrees. *) 100 + 101 + val list_projects : proc_mgr:Git.proc_mgr -> root -> string list 102 + (** [list_projects ~proc_mgr root] returns names of all project branches. *) 103 + 104 + val list_opam_packages : proc_mgr:Git.proc_mgr -> root -> string list 105 + (** [list_opam_packages ~proc_mgr root] returns names of all vendored opam packages 106 + (packages with opam/patches/* branches). *) 107 + 108 + val list_git_repos : proc_mgr:Git.proc_mgr -> root -> string list 109 + (** [list_git_repos ~proc_mgr root] returns names of all vendored git repositories 110 + (repos with git/patches/* branches). *)