···114114let reporepo_url () =
115115 getenv_or ~default:Oi.Source.Reporepo.default_url "OI_REPOREPO_URL"
116116117117-let remote_of_registry = function
118118- | "" -> None
119119- | url -> Some (`Http_remote url : D10.Layer.remote)
117117+let use_registry_conv =
118118+ let parser s =
119119+ match Oi.Use_registry.of_string s with
120120+ | Ok v -> Ok v
121121+ | Error msg -> Error (`Msg msg)
122122+ in
123123+ Arg.conv ~docv:"MODE" (parser, Oi.Use_registry.pp)
124124+125125+let use_registry =
126126+ let doc =
127127+ "What to consult the remote registry for. $(b,all) (default): binary \
128128+ layers and source archives. $(b,archives): source archives only — every \
129129+ layer is built from scratch (CI registry-build mode). $(b,never): offline \
130130+ w.r.t. the registry; upstream source fetches still happen."
131131+ in
132132+ Arg.(
133133+ value
134134+ & opt use_registry_conv Oi.Use_registry.All
135135+ & info ~docv:"MODE" ~doc [ "use-registry" ])
136136+137137+type remotes = {
138138+ layer_remote : D10.Layer.remote option;
139139+ source_remote : D10.Layer.remote option;
140140+}
141141+142142+let remotes_of ~url ~(mode : Oi.Use_registry.t) =
143143+ match mode with
144144+ | Never -> { layer_remote = None; source_remote = None }
145145+ | All | Archives ->
146146+ if url = "" then
147147+ Oi.Error.config_error
148148+ "--use-registry=%s requires a non-empty --registry URL (use \
149149+ --use-registry=never for fully offline)"
150150+ (Oi.Use_registry.to_string mode);
151151+ let r : D10.Layer.remote = `Http_remote url in
152152+ {
153153+ layer_remote = (if mode = All then Some r else None);
154154+ source_remote = Some r;
155155+ }
+12-3
lib/cmd/terms.mli
···5454val default_registry : string
5555(** [https://oi.ci.dev]. *)
56565757-val remote_of_registry : string -> D10.Layer.remote option
5858-(** Convert a registry URL string to a {!D10.Layer.remote}. Empty string returns
5959- [None] (registry disabled). *)
5757+val use_registry : Oi.Use_registry.t Cmdliner.Term.t
5858+(** [--use-registry] term, accepting [all], [archives], or [never]. Defaults to
5959+ {!Oi.Use_registry.All}. *)
6060+6161+type remotes = {
6262+ layer_remote : D10.Layer.remote option;
6363+ source_remote : D10.Layer.remote option;
6464+}
6565+6666+val remotes_of : url:string -> mode:Oi.Use_registry.t -> remotes
6767+(** Split [(url, mode)] into the two remotes the pipeline consumes separately.
6868+ Errors if [url] is empty unless [mode = Never]. *)
+12-12
lib/oi/pipeline.ml
···153153154154(* -- Build helpers ------------------------------------------------------- *)
155155156156-let cache_urls ~cache ~remote =
156156+let cache_urls ~cache ~source_remote =
157157 let local = Source.Mirror.url ~cache in
158158- match remote with
158158+ match source_remote with
159159 | Some (`Http_remote r) -> [ local; Source.Mirror.remote_url ~registry:r ]
160160 | None | Some _ -> [ local ]
161161···175175 Fmt.str "%.0fKB" (Int64.to_float n /. 1024.)
176176 else Fmt.str "%LdB" n
177177178178-let fetch_remote_layers ?on_phase ?on_progress ?jobs ~remote ~d10 ~packages_dirs
179179- ~ctx ~pkgs build_plan =
180180- match remote with
178178+let fetch_remote_layers ?on_phase ?on_progress ?jobs ~layer_remote ~d10
179179+ ~packages_dirs ~ctx ~pkgs build_plan =
180180+ match layer_remote with
181181 | None -> build_plan
182182 | Some r ->
183183 let source_hashes =
···289289290290let build ~sys ~proc_mgr ~fs ~clock ~cache ~data_dir ~conf ~os_key
291291 ?(dry_run = false) ?(extra_repos = []) ?(pins = []) ?(refresh = false)
292292- ?remote ?jobs ?toolchain ?(constraints = OpamPackage.Name.Map.empty)
293293- ?project_root ?local_packages_dir ?on_phase ?on_progress ?preflight_done
294294- names =
292292+ ?layer_remote ?source_remote ?jobs ?toolchain
293293+ ?(constraints = OpamPackage.Name.Map.empty) ?project_root
294294+ ?local_packages_dir ?on_phase ?on_progress ?preflight_done names =
295295 let _ = preflight_done in
296296 let on_phase =
297297 match on_phase with
···394394 let build_plan = Plan.build ctx ~d10 ~packages_dirs pkgs in
395395 if dry_run then begin
396396 let remote_has =
397397- match remote with
397397+ match layer_remote with
398398 | Some r ->
399399 let idx = D10.Layer.fetch_remote_index d10 ~remote:r in
400400 fun h -> Hashtbl.mem idx h
···404404 exit 0
405405 end;
406406 let build_plan =
407407- match remote with
407407+ match layer_remote with
408408 | None -> build_plan
409409 | Some _ ->
410410 on_phase "Checking registry for prebuilt layers";
411411- fetch_remote_layers ~on_phase ?on_progress ?jobs ~remote ~d10
411411+ fetch_remote_layers ~on_phase ?on_progress ?jobs ~layer_remote ~d10
412412 ~packages_dirs ~ctx ~pkgs build_plan
413413 in
414414 let hashes = Plan.layer_hashes build_plan in
···522522 Plan.resolve ctx ~packages_dirs ~cache_root ~os_key
523523 ~ocaml_version:conf.ocaml_version build_plan
524524 in
525525- let urls = cache_urls ~cache ~remote in
525525+ let urls = cache_urls ~cache ~source_remote in
526526 Execute.run ~cache_urls:urls ~proc_mgr ~fs ?jobs
527527 ~clock:(clock :> D10.Config.clk)
528528 ~sys ~os_key exec_plan;
+8-7
lib/oi/pipeline.mli
···105105(** {1 Build pipeline} *)
106106107107val cache_urls :
108108- cache:Cache.t -> remote:D10.Layer.remote option -> OpamUrl.t list
108108+ cache:Cache.t -> source_remote:D10.Layer.remote option -> OpamUrl.t list
109109(** [cache_urls] for opam's [pull_tree]/[pull_file] to probe before falling back
110110 to upstream: always includes the local {!Source.Mirror}; with a remote
111111- registry, also the registry's [sources/] subtree. *)
111111+ [source_remote], also the registry's [sources/] subtree. *)
112112113113val fetch_remote_layers :
114114 ?on_phase:(string -> unit) ->
115115 ?on_progress:(string -> unit) ->
116116 ?jobs:int ->
117117- remote:D10.Layer.remote option ->
117117+ layer_remote:D10.Layer.remote option ->
118118 d10:D10.Config.t ->
119119 packages_dirs:string list ->
120120 ctx:Solver.Ctx.t ->
121121 pkgs:OpamPackage.t list ->
122122 Plan.graph ->
123123 Plan.graph
124124-(** Try fetching uncached [Source] layers from [remote]. Returns a new plan
125125- graph with downloaded layers promoted to [Binary]. No-op when
126126- [remote = None] or every layer is already cached.
124124+(** Try fetching uncached [Source] layers from [layer_remote]. Returns a new
125125+ plan graph with downloaded layers promoted to [Binary]. No-op when
126126+ [layer_remote = None] or every layer is already cached.
127127128128 Progress reporting is split:
129129 - [on_phase] receives one-shot milestones (e.g. the final "Fetched N/M
···147147 ?extra_repos:Project.extra_repo list ->
148148 ?pins:Project.pin list ->
149149 ?refresh:bool ->
150150- ?remote:D10.Layer.remote ->
150150+ ?layer_remote:D10.Layer.remote ->
151151+ ?source_remote:D10.Layer.remote ->
151152 ?jobs:int ->
152153 ?toolchain:Toolchain.info ->
153154 ?constraints:OpamFormula.version_constraint OpamTypes.name_map ->
+216-31
lib/oi/source.ml
···146146 let overlay_packages_dir ~path ~handle =
147147 overlay_dir ~path ~handle / "packages"
148148149149+ let iter_opam_files ~path ?(include_handles = []) ?(skip_handles = []) f =
150150+ let v1 = v1_root ~path in
151151+ if not (Sys.file_exists v1) then ()
152152+ else
153153+ let sorted_subdirs root =
154154+ if not (Sys.file_exists root) then []
155155+ else
156156+ Sys.readdir root |> Array.to_list
157157+ |> List.filter (fun n ->
158158+ (not (String.starts_with ~prefix:"." n))
159159+ && Sys.is_directory (root / n))
160160+ |> List.sort String.compare
161161+ in
162162+ let handle_ok h =
163163+ (* [v1/reporepo/] is the meta-overlay holding handle-registration
164164+ entries, not an archive-bearing overlay. *)
165165+ h <> "reporepo"
166166+ && (include_handles = [] || List.mem h include_handles)
167167+ && not (List.mem h skip_handles)
168168+ in
169169+ let strip_pkg_prefix pkg pkg_ver_dir =
170170+ let prefix = pkg ^ "." in
171171+ if String.starts_with ~prefix pkg_ver_dir then
172172+ String.sub pkg_ver_dir (String.length prefix)
173173+ (String.length pkg_ver_dir - String.length prefix)
174174+ else pkg_ver_dir
175175+ in
176176+ sorted_subdirs v1 |> List.filter handle_ok
177177+ |> List.iter (fun handle ->
178178+ let pkgs_dir = overlay_packages_dir ~path ~handle in
179179+ sorted_subdirs pkgs_dir
180180+ |> List.iter (fun pkg ->
181181+ let pkg_dir = pkgs_dir / pkg in
182182+ sorted_subdirs pkg_dir
183183+ |> List.filter (fun pv -> Sys.file_exists (pkg_dir / pv / "opam"))
184184+ |> List.iter (fun pkg_ver_dir ->
185185+ let opam_path = pkg_dir / pkg_ver_dir / "opam" in
186186+ let version = strip_pkg_prefix pkg pkg_ver_dir in
187187+ f ~handle ~pkg ~version ~opam_path)))
188188+149189 type entry = {
150190 handle : string;
151191 version : string;
···938978 "%s: %s URLs are not supported in v1 materialisation (URL: %s)" where
939979 name (OpamUrl.to_string u)
940980941941- (* Special-case host rewrite: tangled.org's plain HTTPS clone path is
942942- flaky in our hands, but the same content is mirrored on
943943- git.recoil.org without the [.git] suffix. We rewrite up front so
944944- both the [ls-remote] query and the URL we bake into the opam file
945945- point at the more reliable host. *)
946946- let rewrite_host (u : OpamUrl.t) : OpamUrl.t =
947947- let prefix = "tangled.org/" in
948948- if u.transport = "https" && String.starts_with ~prefix u.path then
949949- let after =
950950- String.sub u.path (String.length prefix)
951951- (String.length u.path - String.length prefix)
952952- in
953953- let after =
954954- if
955955- String.length after >= 4
956956- && String.sub after (String.length after - 4) 4 = ".git"
957957- then String.sub after 0 (String.length after - 4)
958958- else after
959959- in
960960- { u with path = "git.recoil.org/" ^ after }
961961- else u
962962-963981 (* Resolve a single URL into a content-addressed form. The caller maps
964982 the polymorphic-variant outcome to whatever opam-file mutation it
965965- needs; the four cases are: keep as-is, replace the URL (sha or host
966966- rewrite), add a checksum to a tarball that lacked one, or report
967967- why we couldn't pin it. *)
968968- let try_resolve_url ~fs ~sys ~where (u_in : OpamUrl.t) ~has_checksum :
983983+ needs; the three cases are: keep as-is, replace the URL (sha pin),
984984+ add a checksum to a tarball that lacked one, or report why we
985985+ couldn't pin it. *)
986986+ let try_resolve_url ~fs ~sys ~where (u : OpamUrl.t) ~has_checksum :
969987 [ `Keep
970988 | `Replace_url of OpamUrl.t
971989 | `Add_checksum of OpamHash.t
972990 | `Failed of string ] =
973973- let u = rewrite_host u_in in
974974- let host_changed = u.path <> u_in.path in
975991 match classify_url ~where u with
976992 | `Git ->
977993 begin match u.hash with
978978- | Some sha when is_sha_string sha ->
979979- if host_changed then `Replace_url u else `Keep
994994+ | Some sha when is_sha_string sha -> `Keep
980995 | ref_opt -> begin
981996 (* git(1) doesn't understand opam's [git+https://...]
982997 spelling — strip the [git+] backend prefix and feed it the
···9951010 end
9961011 end
9971012 | `Tarball ->
998998- if has_checksum then if host_changed then `Replace_url u else `Keep
10131013+ if has_checksum then `Keep
9991014 else begin
10001015 let url_str = OpamUrl.to_string u in
10011016 let tmp = Filename.temp_file "oi-bump-tarball-" ".bin" in
···19201935 put dst
19211936 with _ -> ());
19221937 !promoted)
19381938+19391939+ (* -- Bulk fetch into the mirror (used by [oi build --archives-only]) --- *)
19401940+19411941+ type archive = { url : OpamUrl.t; checksums : OpamHash.t list; pkg : string }
19421942+19431943+ type fetch_summary = {
19441944+ fetched : int;
19451945+ cached : int;
19461946+ failed : (string * string) list;
19471947+ bytes_added : int64;
19481948+ }
19491949+19501950+ let read_opam path =
19511951+ try Some (OpamFile.OPAM.read (OpamFile.make (OpamFilename.raw path)))
19521952+ with _ -> None
19531953+19541954+ let archive_of_url ~pkg u =
19551955+ { url = OpamFile.URL.url u; checksums = OpamFile.URL.checksum u; pkg }
19561956+19571957+ (* Drop checksum-less entries: a content-addressed mirror needs a hash
19581958+ to key on. In practice this skips git+ pins (the commit hash takes
19591959+ the place of an integrity check), which are resolved by clone, not
19601960+ by archive download. *)
19611961+ let archives_of_opam ~pkg opam =
19621962+ let main =
19631963+ match OpamFile.OPAM.url opam with None -> [] | Some u -> [ u ]
19641964+ in
19651965+ let extras = List.map snd (OpamFile.OPAM.extra_sources opam) in
19661966+ main @ extras
19671967+ |> List.map (archive_of_url ~pkg)
19681968+ |> List.filter (fun a -> a.checksums <> [])
19691969+19701970+ let archives_of_opam_file ~path ~pkg =
19711971+ match read_opam path with
19721972+ | None ->
19731973+ Log.info (fun m -> m "skipping unreadable opam file: %s" path);
19741974+ []
19751975+ | Some opam -> archives_of_opam ~pkg opam
19761976+19771977+ let dedup_by_url archives =
19781978+ let seen : (string, unit) Hashtbl.t = Hashtbl.create 256 in
19791979+ List.filter
19801980+ (fun a ->
19811981+ let key = OpamUrl.to_string a.url in
19821982+ if Hashtbl.mem seen key then false
19831983+ else (
19841984+ Hashtbl.add seen key ();
19851985+ true))
19861986+ archives
19871987+19881988+ (* Compact progress label: hostname + final path component. The full URL
19891989+ is too long for a single-line in-place progress sink. *)
19901990+ let label_of_url (u : OpamUrl.t) =
19911991+ let strip_scheme s =
19921992+ match String.index_opt s ':' with
19931993+ | None -> s
19941994+ | Some i ->
19951995+ let rest = String.sub s (i + 1) (String.length s - i - 1) in
19961996+ let rec drop_slashes s =
19971997+ if String.length s > 0 && s.[0] = '/' then
19981998+ drop_slashes (String.sub s 1 (String.length s - 1))
19991999+ else s
20002000+ in
20012001+ drop_slashes rest
20022002+ in
20032003+ let no_scheme = strip_scheme (OpamUrl.to_string u) in
20042004+ let host, rest =
20052005+ match String.index_opt no_scheme '/' with
20062006+ | None -> (no_scheme, "")
20072007+ | Some i ->
20082008+ ( String.sub no_scheme 0 i,
20092009+ String.sub no_scheme (i + 1) (String.length no_scheme - i - 1) )
20102010+ in
20112011+ let basename =
20122012+ match String.rindex_opt rest '/' with
20132013+ | None -> rest
20142014+ | Some i -> String.sub rest (i + 1) (String.length rest - i - 1)
20152015+ in
20162016+ if basename = "" then host else host ^ "/" ^ basename
20172017+20182018+ let collect_archives ~packages_dirs pkgs =
20192019+ let opam_path_for pkg =
20202020+ let name_s = OpamPackage.Name.to_string (OpamPackage.name pkg) in
20212021+ let pkg_s = OpamPackage.to_string pkg in
20222022+ List.find_opt Sys.file_exists
20232023+ (List.map (fun d -> d / name_s / pkg_s / "opam") packages_dirs)
20242024+ in
20252025+ pkgs
20262026+ |> List.concat_map (fun pkg ->
20272027+ match opam_path_for pkg with
20282028+ | None -> []
20292029+ | Some path ->
20302030+ archives_of_opam_file ~path ~pkg:(OpamPackage.to_string pkg))
20312031+ |> dedup_by_url
20322032+20332033+ let mirror_has ~mirror_dir checksums =
20342034+ List.exists
20352035+ (fun ck ->
20362036+ Sys.file_exists (List.fold_left ( / ) mirror_dir (OpamHash.to_path ck)))
20372037+ checksums
20382038+20392039+ let fetch_one ~fs ~mirror_dir ~cache_root ~cache_dir ~tmp_dir a =
20402040+ let tmp = tmp_dir / Fmt.str "%d.%d.bin" (Unix.getpid ()) (Random.bits ()) in
20412041+ (try Unix.unlink tmp with Unix.Unix_error _ -> ());
20422042+ let dst_file = OpamFilename.of_string tmp in
20432043+ let result =
20442044+ try
20452045+ OpamRepository.pull_file a.pkg ~cache_dir ~cache_urls:[]
20462046+ ~silent_hits:true dst_file a.checksums [ a.url ]
20472047+ |> OpamProcess.Job.run
20482048+ with exn -> OpamTypes.Not_available (None, Printexc.to_string exn)
20492049+ in
20502050+ let outcome =
20512051+ match result with
20522052+ | OpamTypes.Result () | OpamTypes.Up_to_date () -> (
20532053+ try
20542054+ let _ = promote ~fs ~cache_root a.checksums in
20552055+ (* Size lookup goes through the mirror because opam may have
20562056+ served from its download-cache without writing to [tmp]. *)
20572057+ let bytes =
20582058+ match a.checksums with
20592059+ | ck :: _ ->
20602060+ file_size
20612061+ (List.fold_left ( / ) mirror_dir (OpamHash.to_path ck))
20622062+ | [] -> 0L
20632063+ in
20642064+ `Fetched bytes
20652065+ with exn -> `Failed (Printexc.to_string exn))
20662066+ | OpamTypes.Not_available (_, msg) -> `Failed msg
20672067+ in
20682068+ (try Sys.remove tmp with Sys_error _ -> ());
20692069+ outcome
20702070+20712071+ let fetch_archives ~fs ~cache
20722072+ ?(on_progress = fun ~fetched:_ ~total:_ ~current:_ -> ()) archives =
20732073+ let mirror_dir = dir ~cache in
20742074+ let cache_root = Cache.root_s cache in
20752075+ let cache_dir =
20762076+ OpamRepositoryPath.download_cache OpamStateConfig.(!r.root_dir)
20772077+ in
20782078+ let tmp_dir = mirror_dir / ".incoming" in
20792079+ mkdir_p ~fs mirror_dir;
20802080+ mkdir_p ~fs tmp_dir;
20812081+ let total = List.length archives in
20822082+ let fetched = ref 0 and cached = ref 0 and failed = ref [] in
20832083+ let bytes_added = ref 0L in
20842084+ let done_count () = !fetched + !cached + List.length !failed in
20852085+ List.iter
20862086+ (fun a ->
20872087+ on_progress ~fetched:(done_count ()) ~total
20882088+ ~current:(Some (label_of_url a.url));
20892089+ if mirror_has ~mirror_dir a.checksums then incr cached
20902090+ else
20912091+ match fetch_one ~fs ~mirror_dir ~cache_root ~cache_dir ~tmp_dir a with
20922092+ | `Fetched bytes ->
20932093+ incr fetched;
20942094+ bytes_added := Int64.add !bytes_added bytes
20952095+ | `Failed msg ->
20962096+ Log.info (fun m ->
20972097+ m "fetch failed: %s -> %s" (OpamUrl.to_string a.url) msg);
20982098+ failed := (OpamUrl.to_string a.url, msg) :: !failed)
20992099+ archives;
21002100+ on_progress ~fetched:total ~total ~current:None;
21012101+ (try Unix.rmdir tmp_dir with Unix.Unix_error _ -> ());
21022102+ {
21032103+ fetched = !fetched;
21042104+ cached = !cached;
21052105+ failed = List.rev !failed;
21062106+ bytes_added = !bytes_added;
21072107+ }
19232108end
+53
lib/oi/source.mli
···138138 val overlay_dir : path:string -> handle:string -> string
139139 (** [<reporepo>/v1/<handle>]. *)
140140141141+ val iter_opam_files :
142142+ path:string ->
143143+ ?include_handles:string list ->
144144+ ?skip_handles:string list ->
145145+ (handle:string -> pkg:string -> version:string -> opam_path:string -> unit) ->
146146+ unit
147147+ (** Visit every [<path>/v1/<handle>/packages/<pkg>/<pkg.version>/opam] in the
148148+ reporepo. Empty [include_handles] means every overlay (including
149149+ [default]); [skip_handles] is applied last. The meta-overlay [reporepo] is
150150+ always skipped — it holds handle-registration entries, not archives. *)
151151+141152 val overlay_packages_dir : path:string -> handle:string -> string
142153 (** [<reporepo>/v1/<handle>/packages] — directly consumable as a solver
143154 [packages_dir]. *)
···374385 at [<cache_root>/mirror/<algo>/<XX>/<hash>] for each declared checksum.
375386 Returns the number of blobs newly added; [0] if nothing was promoted (no
376387 checksums supplied, or no cached file found). Idempotent. *)
388388+389389+ type archive = { url : OpamUrl.t; checksums : OpamHash.t list; pkg : string }
390390+ (** One downloadable source entity: either an [url {…}] block or an
391391+ [extra-source] entry. [pkg] is the [name.version] label, only used for
392392+ progress and failure messages. *)
393393+394394+ val collect_archives :
395395+ packages_dirs:string list -> OpamPackage.t list -> archive list
396396+ (** Resolve each [pkg]'s opam file from the first matching [packages_dirs]
397397+ entry, then extract its archives. Deduped by URL so packages sharing a
398398+ mirror tarball contribute one fetch. Drives [oi build --archives-only]
399399+ against the solver's resolved set. *)
400400+401401+ val archives_of_opam_file : path:string -> pkg:string -> archive list
402402+ (** Parse the opam file at [path] directly. Returns [[]] for unreadable or
403403+ sourceless files. Drives [oi build --archives-only --every-version], which
404404+ walks the reporepo's filesystem rather than the solver. *)
405405+406406+ val dedup_by_url : archive list -> archive list
407407+ (** First-occurrence dedup keyed on the URL string. Use after concatenating
408408+ per-group results (e.g. [oi build --archives-only --all]) where the same
409409+ archive is referenced across overlapping solves. *)
410410+411411+ type fetch_summary = {
412412+ fetched : int;
413413+ cached : int;
414414+ failed : (string * string) list; (** [(url, error_message)]. *)
415415+ bytes_added : int64;
416416+ }
417417+418418+ val fetch_archives :
419419+ fs:Eio.Fs.dir_ty Eio.Path.t ->
420420+ cache:Cache.t ->
421421+ ?on_progress:(fetched:int -> total:int -> current:string option -> unit) ->
422422+ archive list ->
423423+ fetch_summary
424424+ (** Fetch each archive and deposit it into the mirror. Skips entries whose
425425+ first declared checksum is already present (the [cached] tally). On a hard
426426+ failure (after retries), records the URL + message in [failed] and moves
427427+ on — no exception is raised. [on_progress] receives [current=Some label]
428428+ just before each fetch and [current=None] after the last; [label] is the
429429+ host + basename of the URL, suitable for an in-place progress line. *)
377430end