(** batch command: solve, build, and optionally generate docs *) open Cmdliner module Build = Day11_opam_layer.Build module Tool = Day11_opam_layer.Tool module Layer = Day11_layer.Layer type build = Build.t type tool = Tool.t let cleanup_stale_mounts () = (* Unmount and remove any leaked day11_run_* overlay mounts from previous runs that were killed without cleanup *) let tmp = Filename.get_temp_dir_name () in let entries = try Sys.readdir tmp |> Array.to_list with _ -> [] in let stale = List.filter (fun name -> String.length name > 10 && String.sub name 0 10 = "day11_run_" ) entries in if stale <> [] then begin Printf.printf "Cleaning up %d stale temp dirs...\n%!" (List.length stale); List.iter (fun name -> let merged = Filename.concat (Filename.concat tmp name) "merged" in ignore (Sys.command (Printf.sprintf "sudo umount %s 2>/dev/null" merged)); ) stale; ignore (Sys.command (Printf.sprintf "sudo rm -rf %s" (String.concat " " (List.map (fun name -> Filename.concat tmp name) stale)))) end let run profile_name profile_dir np solve_only dry_run rebuild_failed rebuild_base fake_build target_override = cleanup_stale_mounts (); let profile, paths = match Common.load_profile ~profile_dir ~name:profile_name with | Ok x -> x | Error (`Msg e) -> Printf.eprintf "Error: %s\n" e; exit 1 in Common.ensure_paths paths; (* Warn if base image digest is stale or not pinned *) if Day11_batch.Profile.base_image_stale profile then Printf.printf "WARNING: Base image digest is %s. Run 'day11 profile refresh-base --name %s' to update.\n%!" (match profile.base_image_digest with | None -> "not pinned" | Some _ -> "more than 30 days old") profile_name; let cache_dir = paths.cache_dir in let os_dir = paths.os_dir in let ocaml_version = Common.parse_ocaml_version profile.compiler in let driver_compiler = if profile.driver_compiler = "" then None else Some (OpamPackage.of_string profile.driver_compiler) in let opam_repositories = profile.opam_repositories in let with_doc = profile.with_doc in let os_distribution = profile.os_distribution in let os_version = profile.os_version in let arch = profile.arch in let patches_dir = profile.patches_dir in let opam_build_repo = profile.opam_build_repo in let extra_pins = profile.extra_pins in let odoc_repo = profile.odoc_repo in let jtw_repo = if profile.with_jtw then profile.jtw_repo else None in let small_universe, all_versions, target = match target_override with | Some t -> (* CLI target overrides the profile's target mode *) (false, false, Some t) | None -> let sm = profile.target_mode = Day11_batch.Profile.Small_universe in let av = profile.target_mode = Day11_batch.Profile.All_versions in let tgt = match profile.target_mode with | Day11_batch.Profile.Packages (pkg :: _) -> Some pkg | _ -> None in (sm, av, tgt) in let git_packages, repos_with_shas, opam_env = Common.setup_solver opam_repositories in let targets = Day11_batch.Targets.resolve ~small:small_universe ~all_versions git_packages target in Printf.printf "Targets: %d packages\n%!" (List.length targets); (* Snapshot — deterministic dir keyed by repo HEADs *) let snapshot = Day11_batch.Snapshot.current profile in let snapshot_dir = Fpath.(paths.snapshots_base / snapshot.key) in ignore (Bos.OS.Dir.create ~path:true snapshot_dir); ignore (Day11_batch.Snapshot.save snapshot_dir snapshot); Printf.printf "Snapshot: %s\n%!" snapshot.key; (* Start run log *) Day11_lib.Run_log.set_log_base_dir (Fpath.to_string snapshot_dir); let run_log = Day11_lib.Run_log.start_run () in Day11_lib.Run_log.write_plan run_log ~repos_with_shas ~n_targets:(List.length targets) ~ocaml_version:(Option.map OpamPackage.to_string ocaml_version) ~with_doc ~all_versions ~small_universe; (match ocaml_version with | Some v -> Printf.printf "Compiler: %s\n%!" (OpamPackage.to_string v) | None -> ()); (* Solve — load cached solutions where possible *) let solutions_dir = Day11_batch.Snapshot.solutions_dir snapshot_dir in Bos.OS.Dir.create ~path:true solutions_dir |> ignore; let cached = ref 0 in let need_solve = List.filter (fun target -> let cache_file = Fpath.(solutions_dir / (OpamPackage.to_string target ^ ".json")) in if Sys.file_exists (Fpath.to_string cache_file) then begin incr cached; false end else true ) targets in Printf.printf "Solving: %d cached, %d need solving (%d workers)...\n%!" !cached (List.length need_solve) np; let results = Day11_solver_pool.Solver_pool.solve_many ?ocaml_version ~np ~repos:repos_with_shas need_solve in (* Retry failed solves with older versions (useful for overlays that pin transitive deps to specific versions) *) let results, targets = if not small_universe then (results, targets) else let new_results = ref [] in let new_targets = ref [] in let solved_names = Hashtbl.create 16 in (* First pass: collect successes *) List.iter (fun (target, result) -> match result with | Ok _ -> new_results := (target, result) :: !new_results; Hashtbl.replace solved_names (OpamPackage.name target) target | Error _ -> () ) results; (* Second pass: retry failures *) List.iter (fun (target, result) -> match result with | Ok _ -> () | Error _ -> let name = OpamPackage.Name.to_string (OpamPackage.name target) in let candidates = Day11_batch.Targets.pick_latest_version git_packages name in let older = List.filter (fun pkg -> OpamPackage.Version.compare (OpamPackage.version pkg) (OpamPackage.version target) < 0 ) candidates in if older = [] then new_results := (target, result) :: !new_results else begin Printf.printf " Retrying %s with older versions...\n%!" name; let retries = Day11_solver_pool.Solver_pool.solve_many ?ocaml_version ~np:1 ~repos:repos_with_shas older in match List.find_opt (fun (_, r) -> Result.is_ok r) retries with | Some hit -> Printf.printf " %s -> %s\n%!" name (OpamPackage.to_string (fst hit)); new_results := hit :: !new_results; Hashtbl.replace solved_names (OpamPackage.name target) (fst hit) | None -> new_results := (target, result) :: !new_results end ) results; (* Update targets to use the versions that actually solved *) List.iter (fun target -> match Hashtbl.find_opt solved_names (OpamPackage.name target) with | Some pkg -> new_targets := pkg :: !new_targets | None -> new_targets := target :: !new_targets ) targets; (List.rev !new_results, List.rev !new_targets) in (* Save new solutions *) List.iter (fun (target, result) -> let entry = match result with | Ok result -> Day11_batch.Incremental_solver.Cached_solution { package = target; result } | Error (msg, examined) -> Day11_batch.Incremental_solver.Cached_failure { package = target; error = msg; examined } in ignore (Day11_batch.Incremental_solver.save Fpath.(solutions_dir / (OpamPackage.to_string target ^ ".json")) entry) ) results; (* Load all solutions (cached + new) *) let solutions = List.filter_map (fun target -> let cache_file = Fpath.(solutions_dir / (OpamPackage.to_string target ^ ".json")) in match Day11_batch.Incremental_solver.load cache_file with | Ok (Day11_batch.Incremental_solver.Cached_solution { result; _ }) -> Some (target, result) | _ -> None ) targets in (* Extract build_deps for consumers that don't need doc_deps *) let build_solutions = List.map (fun (t, r) -> (t, (r : Day11_solution.Solve_result.t).build_deps)) solutions in let n_solved = List.length solutions in let n_failed = List.length targets - n_solved in Printf.printf "Solved: %d/%d (%d failed)\n%!" n_solved (List.length targets) n_failed; Day11_lib.Run_log.write_solve run_log ~n_solved ~n_failed; if solve_only then begin Printf.printf "Solutions cached in %s\n%!" (Fpath.to_string solutions_dir); 0 end else let find_opam = Day11_opam.Git_packages.find_package git_packages in let patches = Option.map (fun dir -> Day11_opam_build.Patches.create (Fpath.v dir)) patches_dir in (* Delete base image early if --rebuild-base, before loading *) if rebuild_base then begin let base_dir = Fpath.(cache_dir / "base") in Printf.printf "Deleting base image and all build layers for rebuild...\n%!"; (* Both dirs have root-owned files — go straight to sudo rm -rf *) ignore (Sys.command (Printf.sprintf "sudo rm -rf %s %s" (Fpath.to_string base_dir) (Fpath.to_string os_dir))) end; let base_opt = Day11_opam_build.Base.load_cached ~cache_dir ~os_distribution ~os_version in let base_hash = Day11_opam_build.Base.build_hash ~os_distribution ~os_version ~arch ?digest:profile.base_image_digest () in (* Build DAG — no Eio needed *) let cache = Day11_opam_build.Hash_cache.create ~find_opam ?patches () in let nodes = Day11_opam_build.Dag.build_dag cache ~base_hash build_solutions in Printf.printf "DAG: %d unique build nodes\n%!" (List.length nodes); (* Delete failed layers if --rebuild-failed *) if rebuild_failed then begin let root_deleted = ref 0 in let cascade_deleted = ref 0 in List.iter (fun (node : Day11_opam_layer.Build.t) -> let layer = Build.layer ~os_dir node in if Layer.exists layer then match Day11_layer.Meta.load (Layer.meta_path layer) with | Ok { exit_status; failed_dep; _ } when exit_status <> 0 -> ignore (Bos.OS.Path.delete ~recurse:true (Layer.dir layer)); if failed_dep = None then incr root_deleted else incr cascade_deleted | _ -> () ) nodes; if !root_deleted + !cascade_deleted > 0 then Printf.printf "Deleted %d root failures + %d cascade failures for rebuild\n%!" !root_deleted !cascade_deleted end; (* Check which layers already exist *) let n_cached = List.length (List.filter (fun (node : Day11_opam_layer.Build.t) -> Layer.exists (Build.layer ~os_dir node) ) nodes) in let n_need_build = List.length nodes - n_cached in Printf.printf "Layers: %d cached, %d need building\n%!" n_cached n_need_build; Day11_lib.Run_log.write_dag run_log ~n_build:(List.length nodes) ~n_cached ~n_need_build; if dry_run then begin if n_need_build > 0 then begin Printf.printf "\nLayers to build:\n"; List.iter (fun (node : Day11_opam_layer.Build.t) -> if not (Layer.exists (Build.layer ~os_dir node)) then Printf.printf " %s (%d deps)\n" (OpamPackage.to_string node.pkg) (List.length node.deps) ) nodes end; 0 end else begin (* === Build phase (needs Eio, base image, containers) === *) Common.with_eio @@ fun env -> (* Build opam-build separately if needed *) if rebuild_base then begin let bin = Fpath.(cache_dir / "opam-build-bin") in ignore (Bos.OS.File.delete bin) end; let opam_build_repo_fpath = Option.map Fpath.v opam_build_repo in (match Day11_opam_build.Base.build_opam_build env ~cache_dir ~arch ?opam_build_repo:opam_build_repo_fpath () with | Ok path -> Printf.printf "opam-build: %s\n%!" (Fpath.to_string path) | Error (`Msg e) -> Printf.eprintf "opam-build build failed: %s\n%!" e; exit 1); let base = match base_opt with | Some b -> b | None -> Printf.printf "Building base image...\n%!"; let uid = Unix.getuid () and gid = Unix.getgid () in (match Day11_opam_build.Base.build env ~cache_dir ~os_distribution ~os_version ~arch ~opam_repositories:(List.map Fpath.v opam_repositories) ~uid ~gid ?digest:profile.base_image_digest () with | Ok base -> base | Error (`Msg e) -> Printf.eprintf "Base image build failed: %s\n%!" e; exit 1) in let benv = Day11_opam_build.Types.make_build_env ~base ~os_dir () in Day11_opam_build.Types.ensure_dirs benv; (* Create merged opam-repository and mount into containers — picks up changes without rebuilding the base image *) let merged_repo_dir = Fpath.(snapshot_dir / "merged-repo") in ignore (Day11_exec.Sudo.rm_rf env merged_repo_dir); Bos.OS.Dir.create ~path:true merged_repo_dir |> ignore; List.iteri (fun i repo -> let src = Fpath.v repo in if i = 0 then ignore (Day11_exec.Tree.copy ~source:src ~target:merged_repo_dir) else (* Overlay: copy packages/ from later repos, overwriting *) let src_pkgs = Fpath.(src / "packages") in if Bos.OS.Dir.exists src_pkgs |> Result.get_ok then ignore (Sys.command (Printf.sprintf "cp -a %s/* %s/packages/" (Fpath.to_string src_pkgs) (Fpath.to_string merged_repo_dir))) ) opam_repositories; let repo_mount = Day11_container.Mount.bind_rw ~src:(Fpath.to_string merged_repo_dir) "/home/opam/.opam/repo/default" in let base_mounts = [ repo_mount ] @ (match Day11_opam_build.Base.opam_build_mount ~cache_dir with | Some m -> [ m ] | None -> []) in (* Bless *) let blessing_maps = Day11_batch.Blessing.compute_blessings build_solutions in (* Build function for the unified DAG *) let packages_dir = Day11_batch.Snapshot.packages_dir snapshot_dir in ignore (Bos.OS.Dir.create ~path:true packages_dir); let fake_strategy pkg = let pkg_str = OpamPackage.to_string pkg in { Day11_opam_build.Types.cmd = Printf.sprintf "echo 'fake-build %s'" pkg_str; cleanup = Day11_opam_build.Build_layer.opam_build_cleanup } in (* Accumulate build outcomes for Summary *) let build_outcomes_lock = Mutex.create () in let build_outcomes : Day11_batch.Summary.build_outcome list ref = ref [] in let record_build_outcome (node : Day11_opam_layer.Build.t) success = let blessed = List.exists (fun (_target, map) -> Day11_batch.Blessing.is_blessed map node.pkg ) blessing_maps in let log_file = let dir = Day11_opam_layer.Build.dir ~os_dir node in let p = Fpath.(dir / "build.log") in if Sys.file_exists (Fpath.to_string p) then Some p else None in let outcome : Day11_batch.Summary.build_outcome = { pkg = node.pkg; build_hash = node.hash; success; log_file; blessed; } in Mutex.lock build_outcomes_lock; build_outcomes := outcome :: !build_outcomes; Mutex.unlock build_outcomes_lock in let build_one (node : Day11_opam_layer.Build.t) = let strategy = if fake_build then Some (fake_strategy node.pkg) else None in let pkg_str = OpamPackage.to_string node.pkg in let on_extract ~layer_dir ~success:_ = let installed_libs = Day11_opam_layer.Installed_files.scan_libs ~layer_dir in let installed_docs = Day11_opam_layer.Installed_files.scan_docs ~layer_dir in let bm : Day11_opam_layer.Build_meta.t = { package = pkg_str; deps = List.map (fun (d : Day11_opam_layer.Build.t) -> OpamPackage.to_string d.pkg) node.deps; installed_libs; installed_docs; patches = (match patches with | Some p -> Day11_opam_build.Patches.patch_filenames p node.pkg | None -> []); } in ignore (Day11_opam_layer.Build_meta.save layer_dir bm) in match Day11_opam_build.Build_layer.build env benv ?patches ~mounts:base_mounts ~on_extract node ?strategy () with | Day11_opam_build.Types.Success _ -> let layer_name = Day11_opam_layer.Build.dir_name node in ignore (Day11_layer.Symlinks.ensure ~packages_dir ~id:pkg_str ~layer_name); record_build_outcome node true; true | _ -> record_build_outcome node false; false in (* Build + Docs (unified pipeline when --with-doc) *) if with_doc then begin Day11_doc.Generate.build_tools_and_run env benv ~np ~os_dir ~packages:git_packages ~repos:repos_with_shas ~opam_env ~mounts:base_mounts ?driver_compiler ~odoc_repo ~build_one ~opam_repositories ~cache ~run_log ~nodes ~solutions ~blessing_maps () end else begin (* Build only — no docs *) let is_cached node = let layer = Build.layer ~os_dir node in if not (Layer.exists layer) then Day11_opam_build.Dag_executor.Not_cached else begin Day11_layer.Last_used.touch (Layer.dir layer); match Day11_layer.Meta.load (Layer.meta_path layer) with | Ok meta -> let success = meta.exit_status = 0 in record_build_outcome node success; if success then Day11_opam_build.Dag_executor.Cached_ok else Day11_opam_build.Dag_executor.Cached_fail | Error _ -> record_build_outcome node false; Day11_opam_build.Dag_executor.Cached_fail end in let cascaded_set : (string, unit) Hashtbl.t = Hashtbl.create 256 in Day11_opam_build.Dag_executor.execute env ~np ~is_cached ~on_complete:(fun ~stats node success -> let open Day11_opam_build.Dag_executor in if Hashtbl.mem cascaded_set node.hash then () else begin let status = if success then "ok" else "fail" in let layer = Fpath.to_string (Day11_opam_layer.Build.dir ~os_dir node) in Day11_lib.Run_log.log_build_result run_log ~pkg:(OpamPackage.to_string node.pkg) ~hash:node.hash ~status ~failed_dep:None ~kind:"build" ~layer_dir:layer (); if not success then Printf.printf "[%d/%d, %d ok, %d failed, %d cascade] FAIL: %s\n%!" stats.completed stats.total stats.ok stats.failed stats.cascaded (OpamPackage.to_string node.pkg) else if stats.completed mod 100 = 0 then Printf.printf "[%d/%d, %d ok, %d failed, %d cascade] %s\n%!" stats.completed stats.total stats.ok stats.failed stats.cascaded (OpamPackage.to_string node.pkg) end) ~on_cascade:(fun ~failed ~failed_dep -> Hashtbl.replace cascaded_set failed.hash (); (* Write a skeleton layer.json so re-runs skip this node *) let layer = Build.layer ~os_dir failed in ignore (Bos.OS.Dir.create ~path:true (Layer.dir layer)); if not (Layer.exists layer) then begin let meta : Day11_layer.Meta.t = { exit_status = 1; parent_hashes = []; uid = benv.uid; gid = benv.gid; base_hash = benv.base.hash; disk_usage = 0; timing = Day11_layer.Meta.empty_timing; created_at = ""; failed_dep = Some (Day11_opam_layer.Build.dir_name failed_dep); } in ignore (Day11_layer.Meta.save (Layer.meta_path layer) meta) end; (* Create package symlink so the failure is discoverable *) let pkg_str = OpamPackage.to_string failed.pkg in let layer_name = Day11_opam_layer.Build.dir_name failed in ignore (Day11_layer.Symlinks.ensure ~packages_dir ~id:pkg_str ~layer_name); Day11_lib.Run_log.log_build_result run_log ~pkg:(OpamPackage.to_string failed.pkg) ~hash:failed.hash ~status:"cascade" ~failed_dep:(Some (OpamPackage.to_string failed_dep.pkg)) ~kind:"build" (); record_build_outcome failed false) nodes build_one end; (* JTW *) (match jtw_repo with | Some dir -> let output = Fpath.to_string Fpath.(cache_dir / "jtw-output") in Day11_jtw.Build_tools.build_and_run env benv ~np ~os_dir ~packages:git_packages ~repos:repos_with_shas ~mounts:[repo_mount] ~extra_repo_dirs:extra_pins ~repo_dir:dir ~output ~nodes ~solutions:build_solutions | None -> ()); (* Write final summary via Summary module *) Day11_lib.Run_log.close_build_log (); let compiler = match ocaml_version with | Some v -> OpamPackage.to_string v | None -> "unknown" in let results : Day11_batch.Summary.results = { builds = !build_outcomes; docs = []; targets; } in ignore (Day11_batch.Summary.finish ~snapshot_dir ~packages_dir ~run_info:run_log ~compiler results); 0 end let solve_only_term = let doc = "Solve only — cache solutions and exit without building" in Arg.(value & flag & info [ "solve-only" ] ~doc) let dry_run_term = let doc = "Show what would be built without actually building" in Arg.(value & flag & info [ "dry-run" ] ~doc) let rebuild_failed_term = let doc = "Delete failed layers and rebuild them" in Arg.(value & flag & info [ "rebuild-failed" ] ~doc) let rebuild_base_term = let doc = "Delete and rebuild the base image (use when repos or opam-build change)" in Arg.(value & flag & info [ "rebuild-base" ] ~doc) let fake_build_term = let doc = "Replace opam-build with a trivial echo command (for testing)" in Arg.(value & flag & info [ "fake-build" ] ~doc) let target_term = let doc = "Optional target package (overrides profile's target mode)" in Arg.(value & pos 0 (some string) None & info [] ~docv:"TARGET" ~doc) let cmd = let info = Cmd.info "batch" ~doc:"Solve, build, and document packages" in let term = Term.(const run $ Common.profile_term $ Common.profile_dir_term $ Common.np_term $ solve_only_term $ dry_run_term $ rebuild_failed_term $ rebuild_base_term $ fake_build_term $ target_term) in Cmd.v info term