Monorepo management for opam overlays
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

monopam/status: parallelise --remote with progress + two clean buckets

Replace the blind per-subtree suffix-variant scan with a structural
probe: for each subtree, check the URL monopam would push to (mine),
and -- if sources.toml declares an upstream with a different basename
-- the URL at our origin named after the upstream basename (old_fork).
That targets exactly the rename case without guessing.

Pre-compute the distinct URL set, then probe in parallel (8 concurrent
git ls-remote) with a Tty.Progress bar so the run is visible instead
of silently hanging. Add a retry on miss to absorb transient failures
under contention; this dropped the false-positive rate from ~10% to
zero in testing.

Emit two buckets: 'Remote missing' (push target unreachable) and 'Old
fork still at origin (upstream-named), consider deleting'. Drops the
arbitrary 'canonical missing' / 'duplicates by suffix variant' scans.

+106 -44
+106 -44
bin/cmd_status.ml
··· 1 1 open Cmdliner 2 2 3 + let src = Logs.Src.create "monopam.status" ~doc:"Monopam status command" 4 + 5 + module Log = (val Logs.src_log src : Logs.LOG) 6 + 3 7 let man = 4 8 [ 5 9 `S Manpage.s_description; ··· 161 165 let canonical_url origin_base subtree = 162 166 Option.map (fun b -> strip_git_plus b ^ "/" ^ subtree) origin_base 163 167 164 - let prober ~proc ~fs = 165 - let cache = ref [] in 166 - fun url -> 167 - match List.assoc_opt url !cache with 168 - | Some r -> r 169 - | None -> 170 - let r = Monopam.Git_cli.ls_remote_exists ~proc ~fs ~url in 171 - cache := (url, r) :: !cache; 172 - r 168 + (** Strip the final path segment from a URL. *) 169 + let url_basename url = 170 + match String.rindex_opt url '/' with 171 + | Some i when i + 1 < String.length url -> 172 + Some (String.sub url (i + 1) (String.length url - i - 1)) 173 + | _ -> None 174 + 175 + (** If [sources.toml] declares an [upstream] URL whose basename differs from the 176 + subtree name, the legacy fork would live at 177 + [origin_base]/[upstream basename] on our own origin. Return that URL, or 178 + [None] if there is no such candidate. *) 179 + let old_fork_url sources origin_base subtree = 180 + match 181 + ( Option.bind sources (fun s -> Monopam.Sources_registry.find s ~subtree), 182 + origin_base ) 183 + with 184 + | Some { upstream = Some up; _ }, Some base -> ( 185 + match url_basename (strip_git_plus up) with 186 + | Some name when name <> subtree -> Some (strip_git_plus base ^ "/" ^ name) 187 + | _ -> None) 188 + | _ -> None 173 189 174 - let obsolete_suffix_variants = [ "t"; "wt"; "io"; "rw" ] 190 + (** Every distinct URL worth probing: the push target for each subtree, plus one 191 + candidate old-fork URL per subtree that has an [upstream]. *) 192 + let probe_set ~sources ~origin_base subtrees = 193 + let set = Hashtbl.create 256 in 194 + let add u = Hashtbl.replace set u () in 195 + List.iter 196 + (fun subtree -> 197 + (match configured_url sources subtree with 198 + | Some _ as u -> Option.iter add u 199 + | None -> Option.iter add (canonical_url origin_base subtree)); 200 + Option.iter add (old_fork_url sources origin_base subtree)) 201 + subtrees; 202 + Hashtbl.to_seq_keys set |> List.of_seq 175 203 204 + (** Probe every URL in parallel (bounded to 8 concurrent git ls-remote calls, 205 + which tangled tolerates without rate-limiting). Shows a [Tty.Progress] bar 206 + while running. Returns a [url -> bool] lookup. *) 207 + let run_probes ~proc ~fs urls = 208 + let n = List.length urls in 209 + Log.info (fun m -> m "probing %d unique URLs (up to 8 in parallel)" n); 210 + let t0 = Unix.gettimeofday () in 211 + let progress = Tty.Progress.v ~total:n "Probing remotes" in 212 + let mutex = Eio.Mutex.create () in 213 + let probe_one url = 214 + let t = Unix.gettimeofday () in 215 + let r = Monopam.Git_cli.ls_remote_exists ~proc ~fs ~url in 216 + (* Parallel ls-remote occasionally returns a false MISS under contention. 217 + Re-probe once before concluding the URL is unreachable. *) 218 + let r = 219 + if r then r 220 + else begin 221 + Log.debug (fun m -> m "retrying after miss: %s" url); 222 + Monopam.Git_cli.ls_remote_exists ~proc ~fs ~url 223 + end 224 + in 225 + let dt = Unix.gettimeofday () -. t in 226 + Log.debug (fun m -> m "%s %s (%.2fs)" (if r then "OK" else "MISS") url dt); 227 + Eio.Mutex.use_rw ~protect:true mutex (fun () -> 228 + Tty.Progress.update progress ~phase:"Probing" ~msg:url); 229 + (url, r) 230 + in 231 + let results = Eio.Fiber.List.map ~max_fibers:8 probe_one urls in 232 + Tty.Progress.finish progress; 233 + let elapsed = Unix.gettimeofday () -. t0 in 234 + let hits = List.filter (fun (_, r) -> r) results |> List.length in 235 + Log.info (fun m -> 236 + m "probed %d URLs in %.1fs: %d reachable, %d missing" n elapsed hits 237 + (n - hits)); 238 + let table = Hashtbl.create (List.length results) in 239 + List.iter (fun (u, r) -> Hashtbl.replace table u r) results; 240 + fun url -> try Hashtbl.find table url with Not_found -> false 241 + 242 + (** Two questions per subtree: 243 + - [mine]: the URL monopam will push to (configured source, or the 244 + default-origin URL). If it does not resolve, push would fail. 245 + - [old_fork]: if [sources.toml] declares an [upstream], the URL at our 246 + origin named after the upstream's basename. If the repo still exists 247 + there, it is a leftover from before the rename. *) 176 248 let classify_subtree ~probe ~sources ~origin_base subtree = 177 - let cfg = configured_url sources subtree in 178 - let canon = canonical_url origin_base subtree in 179 - let cfg_missing = 180 - match cfg with Some url when not (probe url) -> Some url | _ -> None 249 + let mine = 250 + match configured_url sources subtree with 251 + | Some _ as u -> u 252 + | None -> canonical_url origin_base subtree 181 253 in 182 - let canon_missing = 183 - match (canon, cfg) with 184 - | Some c, cfg_opt when Some c <> cfg_opt && not (probe c) -> Some c 254 + let missing = 255 + match mine with Some u when not (probe u) -> Some u | _ -> None 256 + in 257 + let old_fork = 258 + match old_fork_url sources origin_base subtree with 259 + | Some u when probe u -> Some u 185 260 | _ -> None 186 261 in 187 - let duplicates = 188 - List.filter_map 189 - (fun suf -> 190 - match canon with 191 - | Some c -> 192 - let variant = c ^ suf in 193 - if probe variant then Some variant else None 194 - | None -> None) 195 - obsolete_suffix_variants 196 - in 197 - (subtree, cfg_missing, canon_missing, duplicates) 262 + (subtree, missing, old_fork) 198 263 199 264 let print_remote_list ~style ~label rows = 200 265 if rows <> [] then ( ··· 217 282 List.map Monopam.Package.subtree_prefix pkgs 218 283 |> List.sort_uniq String.compare 219 284 in 285 + Log.info (fun m -> 286 + m "checking remotes for %d subtrees" (List.length subtrees)); 220 287 let origin_base = Option.bind sources Monopam.Sources_registry.origin in 221 - let probe = prober ~proc ~fs in 288 + Log.debug (fun m -> 289 + m "origin base: %s" (Option.value ~default:"(none)" origin_base)); 290 + let urls = probe_set ~sources ~origin_base subtrees in 291 + let probe = run_probes ~proc ~fs urls in 222 292 let classified = 223 293 List.map (classify_subtree ~probe ~sources ~origin_base) subtrees 224 294 in 225 - let configured_missing = 295 + let missing = 226 296 List.filter_map 227 - (fun (s, cfg, _, _) -> Option.map (fun u -> (s, u)) cfg) 297 + (fun (s, m, _) -> Option.map (fun u -> (s, u)) m) 228 298 classified 229 299 in 230 - let canonical_missing = 300 + let old_forks = 231 301 List.filter_map 232 - (fun (s, _, canon, _) -> Option.map (fun u -> (s, u)) canon) 233 - classified 234 - in 235 - let duplicates = 236 - List.concat_map 237 - (fun (s, _, _, dups) -> List.map (fun u -> (s, u)) dups) 302 + (fun (s, _, f) -> Option.map (fun u -> (s, u)) f) 238 303 classified 239 304 in 240 - print_remote_list ~style:`Red ~label:"Remote missing (configured):" 241 - configured_missing; 242 - print_remote_list ~style:`Yellow 243 - ~label:"Remote missing (canonical, needs provisioning):" 244 - canonical_missing; 305 + print_remote_list ~style:`Red ~label:"Remote missing:" missing; 245 306 print_remote_list ~style:`Cyan 246 - ~label:"Remote duplicates (old repo, consider deleting):" duplicates 307 + ~label:"Old fork still at origin (upstream-named), consider deleting:" 308 + old_forks 247 309 248 310 let run_status ~sw ~proc ~fs ~config ~show_all ~show_forks ~show_remote = 249 311 match Monopam.status ~sw ~fs ~config () with