My aggregated monorepo of OCaml code, automaintained
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

Updates

authored by

Jon Ludlam and committed by
Jon Ludlam's Agent
7179ef29 c9ac930e

+1020
+150
day10/bin/git_context.ml
··· 1 + type rejection = 2 + | UserConstraint of OpamFormula.atom 3 + | Unavailable 4 + 5 + type t = { 6 + env : string -> OpamVariable.variable_contents option; 7 + packages : Git_packages.t; 8 + pins : (OpamPackage.Version.t * OpamFile.OPAM.t) OpamPackage.Name.Map.t; 9 + constraints : OpamFormula.version_constraint OpamTypes.name_map; 10 + test : OpamPackage.Name.Set.t; 11 + prefer_oldest : bool; 12 + doc : bool; 13 + post : bool; 14 + examined_packages : OpamPackage.Name.Set.t ref; 15 + } 16 + 17 + let user_restrictions t name = 18 + OpamPackage.Name.Map.find_opt name t.constraints 19 + 20 + let dev = OpamPackage.Version.of_string "dev" 21 + 22 + let env t pkg v = 23 + if List.mem v OpamPackageVar.predefined_depends_variables then None 24 + else 25 + match OpamVariable.Full.to_string v with 26 + | "version" -> Some (OpamTypes.S (OpamPackage.Version.to_string (OpamPackage.version pkg))) 27 + | x -> t.env x 28 + 29 + let filter_deps t pkg f = 30 + let dev = OpamPackage.Version.compare (OpamPackage.version pkg) dev = 0 in 31 + let test = OpamPackage.Name.Set.mem (OpamPackage.name pkg) t.test in 32 + f 33 + |> OpamFilter.partial_filter_formula (env t pkg) 34 + |> OpamFilter.filter_deps ~build:true ~post:t.post ~test ~doc:t.doc ~dev ~dev_setup:false ~default:false 35 + 36 + let filter_available t pkg opam = 37 + let available = OpamFile.OPAM.available opam in 38 + match OpamFilter.eval ~default:(B false) (env t pkg) available with 39 + | B true -> Ok opam 40 + | B false -> Error Unavailable 41 + | _ -> 42 + OpamConsole.error "Available expression not a boolean: %s" 43 + (OpamFilter.to_string available); 44 + Error Unavailable 45 + 46 + let version_compare t (v1, v1_avoid, _) (v2, v2_avoid, _) = 47 + match (v1_avoid, v2_avoid) with 48 + | true, true 49 + | false, false -> 50 + if t.prefer_oldest then OpamPackage.Version.compare v1 v2 51 + else OpamPackage.Version.compare v2 v1 52 + | true, false -> 1 53 + | false, true -> -1 54 + 55 + let candidates t name = 56 + t.examined_packages := OpamPackage.Name.Set.add name !(t.examined_packages); 57 + match OpamPackage.Name.Map.find_opt name t.pins with 58 + | Some (version, opam) -> 59 + let pkg = OpamPackage.create name version in 60 + [ (version, filter_available t pkg opam) ] 61 + | None -> 62 + let versions = Git_packages.get_versions t.packages name in 63 + let user_constraints = user_restrictions t name in 64 + OpamPackage.Version.Map.bindings versions 65 + |> List.filter_map (fun (v, opam) -> 66 + let pkg = OpamPackage.create name v in 67 + let avoid = OpamFile.OPAM.has_flag Pkgflag_AvoidVersion opam in 68 + let available = OpamFile.OPAM.available opam in 69 + match OpamFilter.eval_to_bool ~default:false (env t pkg) available with 70 + | true -> Some (v, avoid, opam) 71 + | false -> None) 72 + |> (fun l -> if List.for_all (fun (_, avoid, _) -> avoid) l then [] else l) 73 + |> List.sort (version_compare t) 74 + |> List.map (fun (v, _, opam) -> 75 + match user_constraints with 76 + | Some test when not (OpamFormula.check_version_formula (OpamFormula.Atom test) v) -> 77 + (v, Error (UserConstraint (name, Some test))) 78 + | _ -> (v, Ok opam)) 79 + 80 + let pp_rejection f = function 81 + | UserConstraint x -> 82 + Fmt.pf f "Rejected by user-specified constraint %s" 83 + (OpamFormula.string_of_atom x) 84 + | Unavailable -> Fmt.string f "Availability condition not satisfied" 85 + 86 + let create ?(prefer_oldest = false) ?(test = OpamPackage.Name.Set.empty) 87 + ?(pins = OpamPackage.Name.Map.empty) ?(doc = false) ?(post = true) 88 + ~constraints ~env ~packages () = 89 + { env; packages; pins; constraints; test; prefer_oldest; doc; post; 90 + examined_packages = ref OpamPackage.Name.Set.empty } 91 + 92 + let examined_packages t = !(t.examined_packages) 93 + 94 + (** Create a new context with different doc/post settings. *) 95 + let with_doc_post ~doc ~post t = 96 + { t with doc; post } 97 + 98 + (** Extract x-extra-doc-deps from an opam file. *) 99 + let get_extra_doc_deps opamfile = 100 + let open OpamParserTypes.FullPos in 101 + let extensions = OpamFile.OPAM.extensions opamfile in 102 + match OpamStd.String.Map.find_opt "x-extra-doc-deps" extensions with 103 + | None -> OpamPackage.Name.Set.empty 104 + | Some value -> 105 + let extract_name item = 106 + match item.pelem with 107 + | String name -> Some name 108 + | Option (inner, _) -> 109 + (match inner.pelem with 110 + | String name -> Some name 111 + | _ -> None) 112 + | _ -> None 113 + in 114 + let extract_names acc v = 115 + match v.pelem with 116 + | List { pelem = items; _ } -> 117 + List.fold_left (fun acc item -> 118 + match extract_name item with 119 + | Some name -> 120 + OpamPackage.Name.Set.add (OpamPackage.Name.of_string name) acc 121 + | None -> acc 122 + ) acc items 123 + | _ -> acc 124 + in 125 + extract_names OpamPackage.Name.Set.empty value 126 + 127 + (** Create an extended context where x-extra-doc-deps are added to each package's 128 + regular depends. *) 129 + let extend_with_extra_doc_deps t = 130 + let new_pins = OpamPackage.Name.Map.mapi (fun _name (version, opam) -> 131 + let extra_deps = get_extra_doc_deps opam in 132 + if OpamPackage.Name.Set.is_empty extra_deps then 133 + (version, opam) 134 + else begin 135 + let depends = OpamFile.OPAM.depends opam in 136 + let extra_formula = 137 + OpamPackage.Name.Set.fold (fun dep_name acc -> 138 + let atom = OpamFormula.Atom (dep_name, OpamFormula.Empty) in 139 + OpamFormula.And (acc, atom) 140 + ) extra_deps OpamFormula.Empty 141 + in 142 + let new_depends = match extra_formula with 143 + | OpamFormula.Empty -> depends 144 + | _ -> OpamFormula.And (depends, extra_formula) 145 + in 146 + let new_opam = OpamFile.OPAM.with_depends new_depends opam in 147 + (version, new_opam) 148 + end 149 + ) t.pins in 150 + { t with pins = new_pins }
+151
day10/bin/git_packages.ml
··· 1 + open Lwt.Infix 2 + module Store = Git_unix.Store 3 + module Search = Git.Search.Make (Digestif.SHA1) (Store) 4 + 5 + type package = OpamFile.OPAM.t 6 + type t = package OpamPackage.Version.Map.t Lazy.t OpamPackage.Name.Map.t 7 + 8 + let empty = OpamPackage.Name.Map.empty 9 + 10 + let read_dir store hash = 11 + Store.read store hash >|= function 12 + | Error e -> Fmt.failwith "Failed to read tree: %a" Store.pp_error e 13 + | Ok (Git.Value.Tree tree) -> Some tree 14 + | Ok _ -> None 15 + 16 + let load_opam_from_string pkg opam_content = 17 + let opam = OpamFile.OPAM.read_from_string opam_content in 18 + let opam = OpamFile.OPAM.with_name (OpamPackage.name pkg) opam in 19 + OpamFile.OPAM.with_version (OpamPackage.version pkg) opam 20 + 21 + let read_package store pkg hash = 22 + Search.find store hash (`Path [ "opam" ]) >>= function 23 + | None -> Fmt.failwith "opam file not found for %s" (OpamPackage.to_string pkg) 24 + | Some hash -> ( 25 + Store.read store hash >|= function 26 + | Ok (Git.Value.Blob blob) -> ( 27 + try 28 + let opam_content = Store.Value.Blob.to_string blob in 29 + load_opam_from_string pkg opam_content 30 + with 31 + | ex -> Fmt.failwith "Error parsing %s: %s" (OpamPackage.to_string pkg) (Printexc.to_string ex)) 32 + | _ -> Fmt.failwith "Bad Git object type for %s!" (OpamPackage.to_string pkg)) 33 + 34 + (* Get a map of the versions inside [entry] (an entry under "packages") — Lwt version *) 35 + let read_versions_lwt store (entry : Store.Value.Tree.entry) = 36 + read_dir store entry.node >>= function 37 + | None -> Lwt.return OpamPackage.Version.Map.empty 38 + | Some tree -> 39 + Store.Value.Tree.to_list tree 40 + |> Lwt_list.fold_left_s 41 + (fun acc (entry : Store.Value.Tree.entry) -> 42 + match OpamPackage.of_string_opt entry.name with 43 + | Some pkg -> 44 + read_package store pkg entry.node >|= fun opam -> 45 + OpamPackage.Version.Map.add pkg.version opam acc 46 + | None -> 47 + Lwt.return acc) 48 + OpamPackage.Version.Map.empty 49 + 50 + (* Lazy version: wraps read_versions_lwt in Lwt_main.run. 51 + Only safe when not already inside Lwt_main.run (i.e. single-domain use). *) 52 + let read_versions store entry = 53 + Lwt_main.run @@ read_versions_lwt store entry 54 + 55 + let read_packages ~store tree = 56 + Store.Value.Tree.to_list tree 57 + |> List.filter_map (fun (entry : Store.Value.Tree.entry) -> 58 + match OpamPackage.Name.of_string entry.name with 59 + | exception _ -> None 60 + | name -> Some (name, lazy (read_versions store entry))) 61 + |> OpamPackage.Name.Map.of_list 62 + 63 + (* Eager version: reads all versions within a single Lwt_main.run context. 64 + Safe for use with Domains. *) 65 + let read_packages_eager ~store tree = 66 + Store.Value.Tree.to_list tree 67 + |> List.filter_map (fun (entry : Store.Value.Tree.entry) -> 68 + match OpamPackage.Name.of_string entry.name with 69 + | exception _ -> None 70 + | name -> 71 + let versions = read_versions_lwt store entry in 72 + Some (name, versions)) 73 + 74 + let of_commit_eager store commit : t = 75 + Lwt_main.run @@ 76 + (Search.find store commit (`Commit (`Path [ "packages" ])) >>= function 77 + | None -> Fmt.failwith "Failed to find packages directory!" 78 + | Some tree_hash -> ( 79 + read_dir store tree_hash >>= function 80 + | None -> Fmt.failwith "'packages' is not a directory!" 81 + | Some tree -> 82 + let entries = read_packages_eager ~store tree in 83 + Lwt_list.map_s (fun (name, versions_lwt) -> 84 + versions_lwt >|= fun versions -> 85 + (name, lazy versions) 86 + ) entries >|= fun resolved -> 87 + OpamPackage.Name.Map.of_list resolved)) 88 + 89 + let diff_packages_lwt ~store commit1 commit2 = 90 + Search.find store commit1 (`Commit (`Path [ "packages" ])) >>= function 91 + | None -> Fmt.failwith "Failed to find packages directory in commit1" 92 + | Some tree1_hash -> ( 93 + read_dir store tree1_hash >>= function 94 + | None -> Fmt.failwith "'packages' is not a directory in commit1" 95 + | Some tree1 -> ( 96 + Search.find store commit2 (`Commit (`Path [ "packages" ])) >>= function 97 + | None -> Fmt.failwith "Failed to find packages directory in commit2" 98 + | Some tree2_hash -> ( 99 + read_dir store tree2_hash >>= function 100 + | None -> Fmt.failwith "'packages' is not a directory in commit2" 101 + | Some tree2 -> 102 + let tree1_list = Store.Value.Tree.to_list tree1 in 103 + let htbl = Hashtbl.create (List.length tree1_list) in 104 + let tree2_list = Store.Value.Tree.to_list tree2 in 105 + List.iter (fun (entry : Store.Value.Tree.entry) -> 106 + Hashtbl.add htbl entry.name entry) tree2_list; 107 + Lwt.return 108 + (List.fold_left 109 + (fun acc (entry : Store.Value.Tree.entry) -> 110 + match Hashtbl.find_opt htbl entry.name with 111 + | Some entry2 when entry.node = entry2.node -> acc 112 + | _ -> ( 113 + match OpamPackage.Name.of_string entry.name with 114 + | exception _ -> acc 115 + | name -> name :: acc)) 116 + [] tree1_list)))) 117 + 118 + let diff_packages ~store commit1 commit2 = 119 + Lwt_main.run @@ 120 + diff_packages_lwt ~store commit1 commit2 121 + 122 + let overlay v1 v2 = 123 + lazy ( 124 + let v1 = Lazy.force v1 in 125 + let v2 = Lazy.force v2 in 126 + OpamPackage.Version.Map.union (fun _ v2 -> v2) v1 v2) 127 + 128 + let of_commit ?(super = empty) store commit : t = 129 + Lwt_main.run @@ 130 + (Search.find store commit (`Commit (`Path [ "packages" ])) >>= function 131 + | None -> Fmt.failwith "Failed to find packages directory!" 132 + | Some tree_hash -> ( 133 + read_dir store tree_hash >>= function 134 + | None -> Fmt.failwith "'packages' is not a directory!" 135 + | Some tree -> 136 + let packages = read_packages ~store tree in 137 + Lwt.return (OpamPackage.Name.Map.union overlay super packages))) 138 + 139 + let get_versions (t : t) name = 140 + match OpamPackage.Name.Map.find_opt name t with 141 + | None -> OpamPackage.Version.Map.empty 142 + | Some versions -> Lazy.force versions 143 + 144 + let get_package (t : t) pkg = 145 + let versions = get_versions t (OpamPackage.name pkg) in 146 + OpamPackage.Version.Map.find (OpamPackage.version pkg) versions 147 + 148 + let of_opam_repository repo_path = 149 + let store, commit = Git_utils.get_git_repo_store_and_hash repo_path in 150 + let packages = of_commit store commit in 151 + (packages, store, commit)
+51
day10/bin/git_utils.ml
··· 1 + open Lwt.Infix 2 + module Store = Git_unix.Store 3 + 4 + let get_git_repo_store_and_hash_commit_lwt repo_path commit_opt = 5 + Store.v (Fpath.v repo_path) >>= function 6 + | Error e -> Fmt.failwith "Failed to open git store at %s: %a" repo_path Store.pp_error e 7 + | Ok store -> 8 + (match commit_opt with 9 + | Some commit_str -> 10 + let ref_name = Git.Reference.v commit_str in 11 + Store.Ref.resolve store ref_name >>= (function 12 + | Ok hash -> Lwt.return hash 13 + | Error _ -> 14 + (try 15 + let hash = Store.Hash.of_hex commit_str in 16 + Lwt.return hash 17 + with _ -> 18 + Fmt.failwith "Cannot resolve commit %s in %s" commit_str repo_path)) 19 + | None -> 20 + Store.Ref.resolve store Git.Reference.head >>= function 21 + | Error e -> Fmt.failwith "Failed to resolve HEAD in %s: %a" repo_path Store.pp_error e 22 + | Ok hash -> Lwt.return hash) >>= fun hash -> 23 + Lwt.return (store, hash) 24 + 25 + let get_git_repo_store_and_hash_commit repo_path commit_opt = 26 + Lwt_main.run @@ 27 + get_git_repo_store_and_hash_commit_lwt repo_path commit_opt 28 + 29 + let get_git_repo_store_and_hash repo_path = 30 + get_git_repo_store_and_hash_commit repo_path None 31 + 32 + let resolve_commit_in_store_lwt store commit_opt = 33 + match commit_opt with 34 + | Some commit_str -> 35 + let ref_name = Git.Reference.v commit_str in 36 + Store.Ref.resolve store ref_name >>= (function 37 + | Ok hash -> Lwt.return hash 38 + | Error _ -> 39 + (try 40 + let hash = Store.Hash.of_hex commit_str in 41 + Lwt.return hash 42 + with _ -> 43 + Fmt.failwith "Cannot resolve commit %s" commit_str)) 44 + | None -> 45 + Store.Ref.resolve store Git.Reference.head >>= function 46 + | Error e -> Fmt.failwith "Failed to resolve HEAD: %a" Store.pp_error e 47 + | Ok hash -> Lwt.return hash 48 + 49 + let resolve_commit_in_store store commit_opt = 50 + Lwt_main.run @@ 51 + resolve_commit_in_store_lwt store commit_opt
+376
day10/bin/test_incr_solver.ml
··· 1 + (** Test incremental solver reuse decisions across consecutive commits. 2 + 3 + For each pair of consecutive commits (A, B): 4 + 1. Eagerly load packages from both commits 5 + 2. Solve all packages for both commits in parallel via Domains 6 + 3. Compute changed packages between A and B 7 + 4. For each package where reuse would apply (examined ∩ changed = ∅): 8 + verify that solution_A == solution_B 9 + 5. Report any false negatives (reuse says "keep" but solutions differ) *) 10 + 11 + module Git_solver = Opam_0install.Solver.Make (Git_context) 12 + module Git_input = Git_solver.Input 13 + module Git_output = Git_solver.Solver.Output 14 + module Git_role_map = Git_output.RoleMap 15 + 16 + (* --- Solver --- *) 17 + 18 + let opam_env ~(config : Config.t) pkg v = 19 + match OpamVariable.Full.to_string v with 20 + | "version" -> Some (OpamTypes.S (OpamPackage.Version.to_string (OpamPackage.version pkg))) 21 + | "with-test" -> 22 + let is_tested_pkg = String.equal (OpamPackage.to_string pkg) config.package in 23 + Some (OpamTypes.B (config.with_test && is_tested_pkg)) 24 + | "with-dev" | "with-dev-setup" | "dev" | "with-doc" -> Some (OpamTypes.B false) 25 + | "build" -> Some (OpamTypes.B true) 26 + | "post" -> None 27 + | x -> Config.std_env ~config x 28 + 29 + let solve_git ~packages (config : Config.t) pkg = 30 + let pkg_constraint = (OpamPackage.name pkg, (`Eq, OpamPackage.version pkg)) in 31 + let constraints = 32 + match config.ocaml_version with 33 + | Some ocaml_ver -> 34 + OpamPackage.Name.Map.of_list 35 + [ (OpamPackage.name ocaml_ver, (`Eq, OpamPackage.version ocaml_ver)); pkg_constraint ] 36 + | None -> 37 + let ocaml_constraint = (OpamPackage.Name.of_string "ocaml-base-compiler", 38 + (`Geq, OpamPackage.Version.of_string "4.08.0")) in 39 + OpamPackage.Name.Map.of_list [ ocaml_constraint; pkg_constraint ] 40 + in 41 + let test = if config.with_test then OpamPackage.Name.Set.singleton (OpamPackage.name pkg) 42 + else OpamPackage.Name.Set.empty in 43 + let context = 44 + Git_context.create ~env:(Config.std_env ~config) ~constraints 45 + ~pins:OpamPackage.Name.Map.empty ~test ~packages () 46 + in 47 + let ocaml_name = match config.ocaml_version with 48 + | Some v -> OpamPackage.name v 49 + | None -> OpamPackage.Name.of_string "ocaml-base-compiler" 50 + in 51 + let roots = [ ocaml_name; OpamPackage.name pkg ] in 52 + let r = Git_solver.solve context roots in 53 + let examined = Git_context.examined_packages context in 54 + match r with 55 + | Ok out -> 56 + let sels = Git_output.to_map out in 57 + let depends = Hashtbl.create 100 in 58 + let classify x = 59 + match Git_solver.package_name x with 60 + | Some pkg -> `Opam pkg 61 + | None -> `Virtual x 62 + in 63 + Git_role_map.iter 64 + (fun role sel -> 65 + let impl = Git_output.unwrap sel in 66 + Git_solver.Input.requires role impl |> fst 67 + |> List.iter (fun dep -> 68 + let dep = Git_input.dep_info dep in 69 + let dep_role = dep.dep_role in 70 + if dep.dep_importance <> `Restricts then 71 + Hashtbl.add depends (classify role) (classify dep_role))) 72 + sels; 73 + let rec expand role = 74 + Hashtbl.find_all depends role 75 + |> List.concat_map (function 76 + | `Opam dep -> [ dep ] 77 + | `Virtual _ as role -> expand role) 78 + in 79 + let pkgs = Git_solver.packages_of_result out |> OpamPackage.Set.of_list in 80 + let pkgnames = OpamPackage.names_of_packages pkgs in 81 + let deptree = 82 + OpamPackage.Set.fold 83 + (fun pkg acc -> 84 + let opam = Git_packages.get_package packages pkg in 85 + let deps = OpamFile.OPAM.depends opam |> OpamFilter.partial_filter_formula (opam_env ~config pkg) in 86 + let with_post = OpamFilter.filter_deps ~build:true ~post:true deps |> OpamFormula.all_names in 87 + let without_post = OpamFilter.filter_deps ~build:true ~post:false deps |> OpamFormula.all_names in 88 + let deppost = OpamPackage.Name.Set.diff with_post without_post in 89 + let depopts = OpamFile.OPAM.depopts opam |> OpamFormula.all_names in 90 + let depopts = OpamPackage.Name.Set.inter depopts pkgnames |> OpamPackage.Name.Set.to_list in 91 + let name = OpamPackage.name pkg in 92 + let deps = 93 + expand (`Opam name) @ depopts |> OpamPackage.Name.Set.of_list |> fun x -> 94 + OpamPackage.Name.Set.diff x deppost |> OpamPackage.packages_of_names pkgs 95 + in 96 + OpamPackage.Map.add pkg deps acc) 97 + pkgs OpamPackage.Map.empty 98 + in 99 + let rec dfs map pkg = 100 + let deps = OpamPackage.Map.find pkg deptree in 101 + OpamPackage.Set.fold 102 + (fun p acc -> 103 + match OpamPackage.Map.mem p acc with 104 + | true -> acc 105 + | false -> dfs acc p) 106 + deps (OpamPackage.Map.add pkg deps map) 107 + in 108 + let solution = dfs OpamPackage.Map.empty pkg in 109 + Ok (solution, examined) 110 + | Error problem -> Error (Git_solver.diagnostics problem, examined) 111 + 112 + (* --- Domain-based parallel map --- *) 113 + 114 + let parallel_map ~np (f : 'a -> 'b) (items : 'a array) : 'b array = 115 + let n = Array.length items in 116 + if n = 0 then [||] 117 + else begin 118 + let results = Array.make n (f items.(0)) in 119 + let next = Atomic.make 1 in (* 0 already computed above *) 120 + let domains = Array.init (min (np - 1) (n - 1)) (fun _ -> 121 + Domain.spawn (fun () -> 122 + let rec loop () = 123 + let i = Atomic.fetch_and_add next 1 in 124 + if i < n then begin 125 + results.(i) <- f items.(i); 126 + loop () 127 + end 128 + in 129 + loop () 130 + ) 131 + ) in 132 + (* Parent also does work *) 133 + let rec loop () = 134 + let i = Atomic.fetch_and_add next 1 in 135 + if i < n then begin 136 + results.(i) <- f items.(i); 137 + loop () 138 + end 139 + in 140 + loop (); 141 + Array.iter Domain.join domains; 142 + results 143 + end 144 + 145 + (* --- Package enumeration --- *) 146 + 147 + let enumerate_packages (packages : Git_packages.t) = 148 + OpamPackage.Name.Map.fold (fun name versions_lazy acc -> 149 + let versions = Lazy.force versions_lazy in 150 + match OpamPackage.Version.Map.max_binding_opt versions with 151 + | Some (v, _) -> OpamPackage.create name v :: acc 152 + | None -> acc 153 + ) packages [] 154 + |> List.sort OpamPackage.compare 155 + 156 + let solutions_match s1 s2 = 157 + OpamPackage.Map.equal OpamPackage.Set.equal s1 s2 158 + 159 + (* --- Main --- *) 160 + 161 + let () = OpamFormatConfig.init () 162 + let () = OpamCoreConfig.init () 163 + 164 + let () = 165 + let usage = "test_incr_solver <opam-repository-path> <commit1> <commit2> [commit3 ...] [--limit N] [--domains N]" in 166 + let args = Array.to_list Sys.argv |> List.tl in 167 + let limit = ref None in 168 + let np = ref (Domain.recommended_domain_count ()) in 169 + let positional = ref [] in 170 + let rec parse = function 171 + | [] -> () 172 + | "--limit" :: n :: rest -> limit := Some (int_of_string n); parse rest 173 + | "--domains" :: n :: rest -> np := int_of_string n; parse rest 174 + | x :: rest -> positional := x :: !positional; parse rest 175 + in 176 + parse args; 177 + let positional = List.rev !positional in 178 + match positional with 179 + | repo_path :: commits when List.length commits >= 2 -> 180 + let limit = !limit in 181 + let np = !np in 182 + 183 + Printf.printf "Repository: %s\n%!" repo_path; 184 + Printf.printf "Commits: %s\n%!" (String.concat " -> " commits); 185 + Printf.printf "Parallelism: %d domains\n%!" np; 186 + 187 + (* Open git store once *) 188 + let store, _ = Git_utils.get_git_repo_store_and_hash repo_path in 189 + 190 + let config : Config.t = { 191 + dir = "/tmp/test-incr-solver"; 192 + ocaml_version = None; 193 + opam_repositories = [ repo_path ]; 194 + package = ""; 195 + arch = "x86_64"; 196 + os = "linux"; 197 + os_distribution = "ubuntu"; 198 + os_family = "debian"; 199 + os_version = "24.04"; 200 + directory = None; 201 + md = None; 202 + json = None; 203 + dot = None; 204 + with_test = false; 205 + with_doc = false; 206 + with_jtw = false; 207 + doc_tools_repo = ""; 208 + doc_tools_branch = ""; 209 + jtw_tools_repo = ""; 210 + jtw_tools_branch = ""; 211 + local_repos = []; 212 + html_output = None; 213 + jtw_output = None; 214 + tag = None; 215 + log = false; 216 + dry_run = false; 217 + fork = None; 218 + prune_layers = false; 219 + blessed_map = None; 220 + } in 221 + 222 + (* Preload ALL commits — Lwt_main.run must finish before we spawn domains *) 223 + let pairs = List.combine (List.rev (List.tl (List.rev commits))) (List.tl commits) in 224 + let total_wrong = ref 0 in 225 + 226 + let resolve sha = 227 + let cmd = Printf.sprintf "git -C %s rev-parse %s" repo_path sha in 228 + Os.run cmd |> String.trim 229 + in 230 + 231 + Printf.printf "\n=== Preloading packages for all %d commits ===\n%!" (List.length commits); 232 + let commit_data = List.map (fun sha_str -> 233 + let full = resolve sha_str in 234 + let hash = Git_unix.Store.Hash.of_hex full in 235 + Printf.printf "Loading packages for %s (%s)...\n%!" sha_str full; 236 + let t = Unix.gettimeofday () in 237 + let packages = Git_packages.of_commit_eager store hash in 238 + Printf.printf " Loaded in %.1fs\n%!" (Unix.gettimeofday () -. t); 239 + (sha_str, full, hash, packages) 240 + ) commits in 241 + 242 + (* Precompute changed sets *) 243 + let pair_data = List.map (fun (a_str, b_str) -> 244 + let (_, _, hash_a, _) = List.find (fun (s, _, _, _) -> s = a_str) commit_data in 245 + let (_, _, hash_b, _) = List.find (fun (s, _, _, _) -> s = b_str) commit_data in 246 + let changed_names = Git_packages.diff_packages ~store hash_a hash_b in 247 + let changed_set = OpamPackage.Name.Set.of_list changed_names in 248 + (a_str, b_str, changed_set) 249 + ) pairs in 250 + Printf.printf "=== Preloading complete ===\n%!"; 251 + 252 + let find_commit sha_str = 253 + List.find (fun (s, _, _, _) -> s = sha_str) commit_data 254 + in 255 + 256 + List.iter (fun (commit_a_str, commit_b_str, changed_set) -> 257 + Printf.printf "\n================================================================\n%!"; 258 + Printf.printf "Testing pair: %s -> %s\n%!" commit_a_str commit_b_str; 259 + Printf.printf "================================================================\n%!"; 260 + 261 + let (_, commit_a_full, _, packages_a) = find_commit commit_a_str in 262 + let (_, commit_b_full, _, packages_b) = find_commit commit_b_str in 263 + Printf.printf "Commit A: %s (%s)\n%!" commit_a_str commit_a_full; 264 + Printf.printf "Commit B: %s (%s)\n%!" commit_b_str commit_b_full; 265 + 266 + let t0 = Unix.gettimeofday () in 267 + Printf.printf "Changed packages: %d\n%!" 268 + (OpamPackage.Name.Set.cardinal changed_set); 269 + OpamPackage.Name.Set.iter (fun n -> 270 + Printf.printf " - %s\n%!" (OpamPackage.Name.to_string n) 271 + ) changed_set; 272 + 273 + (* Enumerate packages: latest version per name from commit A *) 274 + let all_packages = enumerate_packages packages_a in 275 + let pkg_list = match limit with 276 + | Some n -> List.filteri (fun i _ -> i < n) all_packages 277 + | None -> all_packages 278 + in 279 + let pkg_arr = Array.of_list pkg_list in 280 + Printf.printf "Packages to test: %d%s\n%!" 281 + (Array.length pkg_arr) 282 + (match limit with Some n -> Printf.sprintf " (limited to %d)" n | None -> ""); 283 + 284 + let do_solve ~packages pkg = 285 + let pkg_name = OpamPackage.to_string pkg in 286 + let pkg_config = { config with package = pkg_name } in 287 + solve_git ~packages pkg_config pkg 288 + in 289 + 290 + (* Solve all packages for both commits in parallel *) 291 + Printf.printf "\nSolving for commit A (%s)...\n%!" commit_a_str; 292 + let t1 = Unix.gettimeofday () in 293 + let results_a = parallel_map ~np (do_solve ~packages:packages_a) pkg_arr in 294 + let t2 = Unix.gettimeofday () in 295 + Printf.printf " Done in %.1fs\n%!" (t2 -. t1); 296 + 297 + Printf.printf "Solving for commit B (%s)...\n%!" commit_b_str; 298 + let results_b = parallel_map ~np (do_solve ~packages:packages_b) pkg_arr in 299 + let t3 = Unix.gettimeofday () in 300 + Printf.printf " Done in %.1fs\n%!" (t3 -. t2); 301 + 302 + (* Analyze results *) 303 + let n_reusable = ref 0 in 304 + let n_correct_reuse = ref 0 in 305 + let n_wrong_reuse = ref 0 in 306 + let n_not_reusable = ref 0 in 307 + let n_both_ok = ref 0 in 308 + let n_a_failed = ref 0 in 309 + let n_b_failed = ref 0 in 310 + let wrong_details = Buffer.create 1024 in 311 + 312 + Array.iteri (fun i pkg -> 313 + let pkg_name = OpamPackage.to_string pkg in 314 + match results_a.(i), results_b.(i) with 315 + | Ok (sol_a, examined_a), Ok (sol_b, _) -> 316 + incr n_both_ok; 317 + if OpamPackage.Name.Set.disjoint examined_a changed_set then begin 318 + incr n_reusable; 319 + if solutions_match sol_a sol_b then 320 + incr n_correct_reuse 321 + else begin 322 + incr n_wrong_reuse; 323 + let in_a_not_b = OpamPackage.Map.filter (fun k _ -> not (OpamPackage.Map.mem k sol_b)) sol_a in 324 + let in_b_not_a = OpamPackage.Map.filter (fun k _ -> not (OpamPackage.Map.mem k sol_a)) sol_b in 325 + let msg = Printf.sprintf " WRONG REUSE: %s (A has %d extra, B has %d extra, examined %d pkgs)\n" 326 + pkg_name 327 + (OpamPackage.Map.cardinal in_a_not_b) 328 + (OpamPackage.Map.cardinal in_b_not_a) 329 + (OpamPackage.Name.Set.cardinal examined_a) in 330 + Printf.printf "%s%!" msg; 331 + Buffer.add_string wrong_details msg; 332 + let detail p tag = 333 + let s = Printf.sprintf " %s %s\n" tag (OpamPackage.to_string p) in 334 + Printf.printf "%s%!" s; 335 + Buffer.add_string wrong_details s 336 + in 337 + OpamPackage.Map.iter (fun p _ -> detail p "+ (in A only)") in_a_not_b; 338 + OpamPackage.Map.iter (fun p _ -> detail p "- (in B only)") in_b_not_a; 339 + let examined_list = OpamPackage.Name.Set.elements examined_a 340 + |> List.map OpamPackage.Name.to_string in 341 + let s = Printf.sprintf " examined: %s\n" (String.concat ", " examined_list) in 342 + Printf.printf "%s%!" s; 343 + Buffer.add_string wrong_details s 344 + end 345 + end else 346 + incr n_not_reusable 347 + | Error _, _ -> incr n_a_failed 348 + | _, Error _ -> incr n_b_failed 349 + ) pkg_arr; 350 + 351 + let t4 = Unix.gettimeofday () in 352 + Printf.printf "\n--- Results for %s -> %s (%.1fs total) ---\n%!" commit_a_str commit_b_str (t4 -. t0); 353 + Printf.printf "Both solved OK: %d\n%!" !n_both_ok; 354 + Printf.printf "Reusable: %d (%.1f%%)\n%!" !n_reusable 355 + (if !n_both_ok > 0 then 100.0 *. float_of_int !n_reusable /. float_of_int !n_both_ok else 0.0); 356 + Printf.printf " Correct reuse: %d\n%!" !n_correct_reuse; 357 + Printf.printf " WRONG reuse: %d\n%!" !n_wrong_reuse; 358 + Printf.printf "Not reusable: %d (would re-solve)\n%!" !n_not_reusable; 359 + Printf.printf "A failed: %d\n%!" !n_a_failed; 360 + Printf.printf "B failed: %d\n%!" !n_b_failed; 361 + if !n_wrong_reuse > 0 then begin 362 + Printf.printf "\nWrong reuse details:\n%!"; 363 + print_string (Buffer.contents wrong_details) 364 + end else 365 + Printf.printf "All reuse decisions are correct!\n%!"; 366 + total_wrong := !total_wrong + !n_wrong_reuse 367 + ) pair_data; 368 + 369 + Printf.printf "\n================================================================\n%!"; 370 + Printf.printf "OVERALL: %d wrong reuse decisions across %d commit pairs\n%!" 371 + !total_wrong (List.length pairs); 372 + if !total_wrong > 0 then exit 1 373 + 374 + | _ -> 375 + Printf.eprintf "Usage: %s\n" usage; 376 + exit 1
+292
day10/docs/ON_DISK_STRUCTURE.md
··· 1 + # On-Disk File Structure 2 + 3 + This document describes the files and directories managed by day10's build 4 + cache, and how each CLI command reads and writes them. 5 + 6 + ## Directory Layout 7 + 8 + ``` 9 + <cache-dir>/ 10 + ├── solutions/ 11 + │ └── <opam-repo-sha>/ 12 + │ └── <package>.json # Cached solver solution 13 + 14 + ├── logs/ 15 + │ ├── <pid>.log # Per-process execution log (timestamps + commands) 16 + │ └── runs/ 17 + │ └── <run-id>/ # YYYY-MM-DD-HHMMSS 18 + │ ├── summary.json # Run results and statistics 19 + │ ├── progress.json # Real-time progress (deleted on completion) 20 + │ ├── build/ 21 + │ │ └── <package>.log # Symlink to build layer's build.log 22 + │ └── docs/ 23 + │ └── <package>.log # Symlink to doc layer's log 24 + 25 + └── <os-key>/ # e.g. "debian-12-x86_64" 26 + ├── build-config.json # Saved batch configuration 27 + ├── status.json # Current build status overview 28 + 29 + ├── base/ # Base container image layer 30 + │ ├── build.log # Base image build log 31 + │ ├── Dockerfile # Generated Dockerfile 32 + │ ├── fs/ # Root filesystem 33 + │ └── opam-repository/ # Checked-out opam repository 34 + 35 + ├── build-<hash>/ # Build layer (one per unique dependency set) 36 + │ ├── layer.json # Metadata: package, exit_status, deps, hashes, 37 + │ │ # created, installed_libs, installed_docs 38 + │ ├── build.log # Build output (absent for dep-failure skeletons) 39 + │ ├── fs/ # Layer filesystem delta (absent for dep-failure skeletons) 40 + │ └── opam-repository/ # Opam repo snapshot: exact opam files for this 41 + │ # package and all its deps. Used by rerun/cascade 42 + │ # so they don't need an external opam-repository path. 43 + 44 + ├── doc-<hash>/ # Documentation layer 45 + │ ├── layer.json # Metadata: package, status, build_hash reference 46 + │ └── odoc-voodoo-all.log # Doc generation log 47 + 48 + ├── jtw-<hash>/ # JS Top Worker layer 49 + │ ├── layer.json # Metadata: package, status, build_hash reference 50 + │ └── jtw.log # Worker generation log 51 + 52 + ├── universes/ 53 + │ └── <hash>.json # Universe snapshot: universe_hash + packages array 54 + 55 + └── packages/ 56 + └── <package>/ # e.g. "dune.3.17.2" 57 + ├── history.jsonl # Build history (append-only, one JSON object per line) 58 + ├── build-<hash> # Symlink -> ../../build-<hash> 59 + ├── blessed-build # Symlink -> ../../build-<hash> (blessed result) 60 + └── blessed-docs # Symlink -> ../../doc-<hash> (blessed docs) 61 + ``` 62 + 63 + ### OS Key 64 + 65 + `<os-key>` = `<os_distribution>-<os_version>-<arch>`, e.g. `debian-12-x86_64`, 66 + `ubuntu-24.04-aarch64`, `freebsd-14.2-amd64`. 67 + 68 + ### Content Addressing 69 + 70 + Build layer hashes are computed from: 71 + - The `base_hash` (OS distribution + version + arch, plus opam-build source hash on Linux) 72 + - The opam file contents of every dependency in the layer 73 + 74 + This means identical dependency sets always produce the same hash, enabling 75 + cross-solution cache reuse. 76 + 77 + ### Skeleton Layers (Dependency Failures) 78 + 79 + When a package is skipped because a dependency failed, a skeleton 80 + `build-<hash>/` directory is still created containing: 81 + - `layer.json` with `exit_status: -1` (marks it as never built) 82 + - `opam-repository/` with the package's and all deps' opam files 83 + 84 + This allows `rerun` and `cascade` to rebuild the package later using the 85 + exact same opam files, without needing access to the original opam-repository 86 + path or `build-config.json`. The skeleton has no `fs/` or `build.log`. 87 + 88 + ## Key File Formats 89 + 90 + ### history.jsonl 91 + 92 + One JSON object per line, appended after each build attempt: 93 + 94 + ```json 95 + {"ts":"2026-03-09T13:01:44","run":"2026-03-09-125444","build_hash":"build-68cface...","status":"success","category":"success","compiler":"4.14.3","blessed":false} 96 + ``` 97 + 98 + Categories: `success`, `failure`, `dep-failure`, `aborted`. 99 + 100 + ### layer.json 101 + 102 + Build layer metadata written when a layer completes: 103 + 104 + ```json 105 + {"package":"either.1.0.0","exit_status":0,"deps":["ocaml.4.14.3"],"hashes":{"ocaml.4.14.3":"build-abc123"},"created":"2026-03-09T13:01:44","installed_libs":["either"],"installed_docs":["either"]} 106 + ``` 107 + 108 + ### status.json 109 + 110 + Snapshot of the most recent run's results: 111 + 112 + ```json 113 + {"generated":"2026-03-09T13:05:00","run_id":"2026-03-09-125444","blessed_totals":{"success":1200,"failure":45,"dep-failure":12},"non_blessed_totals":{"success":800},"changes_since_last":[...],"new_packages":[...]} 114 + ``` 115 + 116 + ### build-config.json 117 + 118 + Saved configuration from the batch that created the cache. Used by `rerun` 119 + and `cascade` so they don't need `--opam-repository` arguments: 120 + 121 + ```json 122 + {"opam_repositories":["/path/to/opam-repository"],"local_repos":[{"path":"/path/to/repo","packages":["pkg1","pkg2"]}],"with_doc":true,"html_output":"/var/www/docs","jtw_output":"/var/www/jtw"} 123 + ``` 124 + 125 + ### solutions/\<sha\>/\<package\>.json 126 + 127 + Cached opam solver result, keyed by opam repository commit SHA: 128 + 129 + ```json 130 + {"package":"dune.3.17.2","solution":["ocaml.4.14.3","ocaml-base-compiler.4.14.3","dune.3.17.2"]} 131 + ``` 132 + 133 + ## Per-Command File Usage 134 + 135 + ### status 136 + 137 + Displays an overview of build results (success/failure/dep-failure counts). 138 + 139 + | Operation | Files | 140 + |-----------|-------| 141 + | Reads | `<os-key>/status.json` | 142 + | Reads (with `--details`) | `<os-key>/packages/<pkg>/history.jsonl`, `<os-key>/build-<hash>/layer.json` | 143 + 144 + ### query 145 + 146 + Shows detailed build information for a specific package. 147 + 148 + | Operation | Files | 149 + |-----------|-------| 150 + | Reads | `<os-key>/packages/<pkg>/history.jsonl` | 151 + | Reads | `<os-key>/build-<hash>/layer.json` | 152 + | Reads (with `--log`) | `<os-key>/build-<hash>/build.log` | 153 + 154 + ### failures 155 + 156 + Lists all packages with failing builds, with optional category filtering. 157 + 158 + | Operation | Files | 159 + |-----------|-------| 160 + | Reads | `<os-key>/packages/` (directory listing) | 161 + | Reads | `<os-key>/packages/<pkg>/history.jsonl` (for each package) | 162 + 163 + ### changes 164 + 165 + Shows status transitions since the last run (e.g. success -> failure). 166 + 167 + | Operation | Files | 168 + |-----------|-------| 169 + | Reads | `<os-key>/status.json` (the `changes_since_last` field) | 170 + 171 + ### disk 172 + 173 + Reports disk usage breakdown by category. 174 + 175 + | Operation | Files | 176 + |-----------|-------| 177 + | Reads (size only) | `<os-key>/base/`, `<os-key>/build-*/`, `<os-key>/doc-*/`, `<os-key>/packages/`, `logs/`, `solutions/` | 178 + 179 + ### log 180 + 181 + Displays the build or doc log for a specific layer hash. 182 + 183 + | Operation | Files | 184 + |-----------|-------| 185 + | Reads | `<os-key>/<layer>/layer.json` | 186 + | Reads | `<os-key>/<layer>/build.log` or `odoc-voodoo-all.log` | 187 + 188 + ### rerun 189 + 190 + Retries a failed build. Uses the opam files stored in the layer's own 191 + `opam-repository/` directory, so no external opam-repository path is needed 192 + and the rebuild uses the exact same opam files as the original build. 193 + 194 + | Operation | Files | 195 + |-----------|-------| 196 + | Reads | `<os-key>/build-<hash>/layer.json` (package, deps, hashes) | 197 + | Reads | `<os-key>/build-<hash>/opam-repository/` (saved opam files for rebuild) | 198 + | Reads | `<os-key>/packages/<pkg>/history.jsonl` | 199 + | Reads (with `--cascade`) | `<os-key>/build-config.json` (cascade re-solves, needs opam repo paths) | 200 + | Writes | `<os-key>/build-<hash>/layer.json`, `build.log`, `fs/` (new layer) | 201 + | Writes | `<os-key>/packages/<pkg>/history.jsonl` (appends result) | 202 + | Writes | `<os-key>/status.json` (regenerated) | 203 + | Writes | `logs/runs/<run-id>/summary.json`, `build/<pkg>.log` | 204 + 205 + ### cascade 206 + 207 + Reruns packages whose dependency failures have since been fixed. Like `rerun`, 208 + uses the opam files stored in each layer's own `opam-repository/` directory. 209 + No `build-config.json` or external opam-repository path needed. 210 + 211 + | Operation | Files | 212 + |-----------|-------| 213 + | Reads | `<os-key>/packages/` (all packages) | 214 + | Reads | `<os-key>/packages/<pkg>/history.jsonl` (finds dep-failure entries) | 215 + | Reads | `<os-key>/build-<hash>/layer.json` (checks if failed dep now succeeds) | 216 + | Reads | `<os-key>/build-<hash>/opam-repository/` (saved opam files for rebuild) | 217 + | Writes | `<os-key>/build-<hash>/layer.json`, `build.log`, `fs/` (new layers) | 218 + | Writes | `<os-key>/packages/<pkg>/history.jsonl` (appends results) | 219 + | Writes | `<os-key>/status.json` (regenerated) | 220 + 221 + ### rdeps 222 + 223 + Finds reverse dependencies of a package by scanning cached solutions. 224 + 225 + | Operation | Files | 226 + |-----------|-------| 227 + | Reads | `solutions/<sha>/<pkg>.json` (all cached solutions) | 228 + | Reads (with `--failing`) | `<os-key>/packages/<pkg>/history.jsonl` | 229 + 230 + ### gc 231 + 232 + Reclaims disk space by removing unreferenced layers and old logs. 233 + 234 + | Operation | Files | 235 + |-----------|-------| 236 + | Reads | `<os-key>/` (lists all layer directories) | 237 + | Reads | `<os-key>/packages/<pkg>/history.jsonl` | 238 + | Reads | `logs/runs/` (lists all run directories) | 239 + | Deletes | `<os-key>/build-<hash>/`, `doc-<hash>/`, `jtw-<hash>/` (unreferenced) | 240 + | Deletes | `logs/runs/<old-run-id>/` (per `--keep-runs` policy) | 241 + | Compacts | `<os-key>/packages/<pkg>/history.jsonl` (merges old entries) | 242 + 243 + **Never deleted by gc:** `base/`, `packages/`, `universes/`, `solutions/`. 244 + 245 + ### notify 246 + 247 + Sends build status notifications to external channels. No file I/O. 248 + 249 + ### batch (the main build command) 250 + 251 + Runs a full build across all packages. This is the primary producer of cache data. 252 + 253 + | Operation | Files | 254 + |-----------|-------| 255 + | Writes | `<os-key>/build-config.json` | 256 + | Writes | `solutions/<sha>/<pkg>.json` (cached solver results) | 257 + | Writes | `<os-key>/base/` (Dockerfile, fs/, opam-repository/, build.log) | 258 + | Writes | `<os-key>/build-<hash>/` (layer.json, build.log, fs/) | 259 + | Writes | `<os-key>/doc-<hash>/` (layer.json, log) — if `--with-doc` | 260 + | Writes | `<os-key>/jtw-<hash>/` (layer.json, log) — if `--with-jtw` | 261 + | Writes | `<os-key>/universes/<hash>.json` | 262 + | Writes | `<os-key>/packages/<pkg>/history.jsonl` (appends per build) | 263 + | Writes | `<os-key>/packages/<pkg>/build-<hash>` (symlink) | 264 + | Writes | `<os-key>/packages/<pkg>/blessed-build` (symlink, if blessed) | 265 + | Writes | `<os-key>/status.json` (at end of run) | 266 + | Writes | `logs/runs/<run-id>/summary.json`, `progress.json`, `build/`, `docs/` | 267 + 268 + ## Lock Files 269 + 270 + When building a layer, `create_directory_exclusively` creates the layer 271 + directory atomically. If it already exists, another process owns it. The lock 272 + file inside contains: 273 + 274 + - PID of the owning process 275 + - Timestamp 276 + - Layer name 277 + - Path to the temporary build log 278 + 279 + This allows multiple workers to build concurrently without collisions — if 280 + two workers need the same layer, the second one waits for the first to finish. 281 + 282 + ## DAG Execution (--fork mode) 283 + 284 + When `--fork N` is specified, batch mode builds a global DAG across all 285 + solutions. Layers are deduplicated by hash and executed in topological order 286 + with up to N concurrent workers. Cache hits are resolved inline without 287 + forking. Dependency failures propagate immediately, skipping downstream layers. 288 + 289 + Progress is logged to `logs/<pid>.log` with entries like: 290 + ``` 291 + [2026-03-09T13:01:44] dag: [5/42] ok either.1.0.0 (build-68cface...) 292 + ```