···11+open Toru
22+open Cmdliner
33+44+(* Command line arguments *)
55+let registry_dir =
66+ let doc = "Directory containing tessera-manifests registry files" in
77+ Arg.(value & opt string "~/src/git/ucam-eo/tessera-manifests/registry" &
88+ info ["d"; "dir"] ~docv:"DIR" ~doc)
99+1010+let pattern =
1111+ let doc = "File pattern to match (e.g., '*2024*' for 2024 files only)" in
1212+ Arg.(value & opt string "*" & info ["p"; "pattern"] ~docv:"PATTERN" ~doc)
1313+1414+let verbose =
1515+ let doc = "Show verbose output including file details" in
1616+ Arg.(value & flag & info ["v"; "verbose"] ~doc)
1717+1818+let limit =
1919+ let doc = "Limit number of files to process (0 = no limit)" in
2020+ Arg.(value & opt int 0 & info ["l"; "limit"] ~docv:"N" ~doc)
2121+2222+(* Helper functions *)
2323+let rec take n lst =
2424+ match n, lst with
2525+ | 0, _ | _, [] -> []
2626+ | n, x :: xs -> x :: take (n - 1) xs
2727+2828+let expand_tilde path =
2929+ if String.starts_with ~prefix:"~/" path then
3030+ let home = Sys.getenv "HOME" in
3131+ let rest = String.sub path 2 (String.length path - 2) in
3232+ Filename.concat home rest
3333+ else if String.equal path "~" then
3434+ Sys.getenv "HOME"
3535+ else path
3636+3737+let find_registry_files dir pattern limit =
3838+ let expanded_dir = expand_tilde dir in
3939+ let cmd = if String.equal pattern "*" then
4040+ Printf.sprintf "find %s -type f -name '*.txt' | head -%s"
4141+ (Filename.quote expanded_dir)
4242+ (if limit > 0 then string_of_int limit else "9999")
4343+ else
4444+ Printf.sprintf "find %s -type f -name '*.txt' -name %s | head -%s"
4545+ (Filename.quote expanded_dir)
4646+ (Filename.quote pattern)
4747+ (if limit > 0 then string_of_int limit else "9999") in
4848+4949+ let ic = Unix.open_process_in cmd in
5050+ let rec read_lines acc =
5151+ try
5252+ let line = input_line ic in
5353+ read_lines (line :: acc)
5454+ with End_of_file -> List.rev acc
5555+ in
5656+ let files = read_lines [] in
5757+ let _ = Unix.close_process_in ic in
5858+ files
5959+6060+let humanize_bytes bytes =
6161+ let kb = Int64.div bytes 1024L in
6262+ let mb = Int64.div kb 1024L in
6363+ let gb = Int64.div mb 1024L in
6464+ if Int64.compare gb 0L > 0 then Printf.sprintf "%.1f GB" (Int64.to_float gb)
6565+ else if Int64.compare mb 0L > 0 then Printf.sprintf "%.1f MB" (Int64.to_float mb)
6666+ else if Int64.compare kb 0L > 0 then Printf.sprintf "%Ld KB" kb
6767+ else Printf.sprintf "%Ld B" bytes
6868+6969+(* Statistics collection *)
7070+type registry_stats = {
7171+ total_files: int;
7272+ total_entries: int;
7373+ total_size_estimate: int64;
7474+ years: (string * int) list;
7575+ file_types: (string * int) list;
7676+ hash_algorithms: (string * int) list;
7777+}
7878+7979+let analyze_filename filename =
8080+ let basename = Filename.basename filename in
8181+ let year = if String.contains basename '_' then
8282+ let parts = String.split_on_char '_' basename in
8383+ List.nth_opt parts 1
8484+ else None in
8585+ let extension = if String.contains basename '.' then
8686+ let parts = String.split_on_char '.' basename in
8787+ List.nth_opt parts (List.length parts - 1)
8888+ else None in
8989+ (year, extension)
9090+9191+let count_map_incr key map =
9292+ let count = try List.assoc key map with Not_found -> 0 in
9393+ (key, count + 1) :: List.remove_assoc key map
9494+9595+(* Main processing function *)
9696+let process_registries registry_dir pattern verbose limit =
9797+ Printf.printf "🔍 Searching for tessera-manifests registry files...\n";
9898+ Printf.printf "Directory: %s\n" (expand_tilde registry_dir);
9999+ Printf.printf "Pattern: %s\n" pattern;
100100+ if limit > 0 then Printf.printf "Limit: %d files\n" limit;
101101+ Printf.printf "\n";
102102+103103+ let registry_files = find_registry_files registry_dir pattern limit in
104104+105105+ if List.length registry_files = 0 then (
106106+ Printf.printf "❌ No registry files found matching pattern '%s'\n" pattern;
107107+ Printf.printf "Try checking the directory path or adjusting the pattern.\n";
108108+ exit 1
109109+ );
110110+111111+ Printf.printf "📋 Found %d registry files\n\n" (List.length registry_files);
112112+113113+ let total_files = List.length registry_files in
114114+115115+ (* Create a simple progress display *)
116116+ let start_time = Unix.gettimeofday () in
117117+ let print_progress i =
118118+ let pct = (float_of_int (i + 1) /. float_of_int total_files) *. 100.0 in
119119+ let elapsed = Unix.gettimeofday () -. start_time in
120120+ let eta = if i > 0 then
121121+ elapsed /. float_of_int (i + 1) *. float_of_int (total_files - i - 1)
122122+ else 0.0 in
123123+ let bar_width = 30 in
124124+ let filled = int_of_float (pct /. 100.0 *. float_of_int bar_width) in
125125+ let bar = String.make filled '#' ^ String.make (bar_width - filled) '-' in
126126+ Printf.printf "\r🚀 [%s] %.1f%% (%d/%d) ETA: %.0fs "
127127+ bar pct (i + 1) total_files eta;
128128+ flush stdout
129129+ in
130130+131131+ let stats = ref {
132132+ total_files = 0;
133133+ total_entries = 0;
134134+ total_size_estimate = 0L;
135135+ years = [];
136136+ file_types = [];
137137+ hash_algorithms = [];
138138+ } in
139139+140140+ (* Process each registry file *)
141141+ List.iteri (fun i file_path ->
142142+ try
143143+ if not verbose then print_progress i;
144144+ if verbose then Printf.printf "📄 Processing: %s\n" file_path;
145145+146146+ (* Load registry file *)
147147+ let ic = open_in file_path in
148148+ let content = really_input_string ic (in_channel_length ic) in
149149+ close_in ic;
150150+151151+ (* Parse registry with progress feedback if verbose *)
152152+ let registry = Registry.of_string ~progress:(fun current total ->
153153+ if verbose && current mod 100 = 0 then
154154+ Printf.printf "\r 📝 Parsing: %d/%d lines (%.0f%%)"
155155+ current total ((float_of_int current /. float_of_int total) *. 100.0)
156156+ ) content in
157157+ if verbose then Printf.printf "\r ✅ Parsed %d lines%s\n"
158158+ (List.length (String.split_on_char '\n' content)) (String.make 30 ' ');
159159+ let entries = Registry.entries registry in
160160+ let entry_count = List.length entries in
161161+162162+ if verbose then (
163163+ Printf.printf " └─ %d entries\n" entry_count;
164164+165165+ (* Show a few sample entries *)
166166+ let sample_size = min 3 entry_count in
167167+ let samples = take sample_size entries in
168168+ List.iter (fun entry ->
169169+ Printf.printf " • %s (%s)\n"
170170+ (Registry.filename entry)
171171+ (Hash.to_string (Registry.hash entry))
172172+ ) samples;
173173+ if entry_count > sample_size then
174174+ Printf.printf " ... and %d more\n" (entry_count - sample_size);
175175+ Printf.printf "\n"
176176+ );
177177+178178+ (* Collect statistics *)
179179+ stats := { !stats with
180180+ total_files = !stats.total_files + 1;
181181+ total_entries = !stats.total_entries + entry_count;
182182+ };
183183+184184+ (* Analyze entries for additional statistics *)
185185+ List.iter (fun entry ->
186186+ let filename = Registry.filename entry in
187187+ let hash = Registry.hash entry in
188188+ let (year, extension) = analyze_filename filename in
189189+190190+ (* Track years *)
191191+ (match year with
192192+ | Some y -> stats := { !stats with years = count_map_incr y !stats.years }
193193+ | None -> ());
194194+195195+ (* Track file types *)
196196+ (match extension with
197197+ | Some ext -> stats := { !stats with file_types = count_map_incr ext !stats.file_types }
198198+ | None -> ());
199199+200200+ (* Track hash algorithms *)
201201+ let algo_str = match Hash.algorithm hash with
202202+ | Hash.SHA256 -> "SHA256"
203203+ | Hash.SHA1 -> "SHA1"
204204+ | Hash.MD5 -> "MD5" in
205205+ stats := { !stats with hash_algorithms = count_map_incr algo_str !stats.hash_algorithms };
206206+207207+ (* Estimate file size (very rough - assume average 1MB per file) *)
208208+ stats := { !stats with total_size_estimate = Int64.add !stats.total_size_estimate 1048576L };
209209+210210+ ) entries;
211211+212212+213213+ with exn ->
214214+ Printf.printf "❌ Error processing %s: %s\n" file_path (Printexc.to_string exn)
215215+ ) registry_files;
216216+217217+ if not verbose then Printf.printf "\r%s\r✅ Processing complete!\n" (String.make 80 ' ');
218218+219219+ Printf.printf "\n📊 TESSERA REGISTRY ANALYSIS SUMMARY\n";
220220+ Printf.printf "=====================================\n\n";
221221+222222+ Printf.printf "📁 Registry Files: %d\n" !stats.total_files;
223223+ Printf.printf "📄 Total Data Entries: %s\n"
224224+ (Printf.sprintf "%d" !stats.total_entries |>
225225+ fun s -> String.fold_left (fun acc c ->
226226+ if String.length acc mod 4 = 3 then acc ^ "," ^ String.make 1 c
227227+ else acc ^ String.make 1 c) "" s);
228228+ Printf.printf "💾 Estimated Data Size: %s\n" (humanize_bytes !stats.total_size_estimate);
229229+ Printf.printf "\n";
230230+231231+ (* Show top categories *)
232232+ let show_top_list title items =
233233+ if List.length items > 0 then (
234234+ Printf.printf "🏆 %s:\n" title;
235235+ let sorted = List.sort (fun (_, a) (_, b) -> compare b a) items in
236236+ let top5 = take (min 5 (List.length sorted)) sorted in
237237+ List.iter (fun (name, count) ->
238238+ Printf.printf " • %s: %d entries\n" name count
239239+ ) top5;
240240+ Printf.printf "\n"
241241+ )
242242+ in
243243+244244+ show_top_list "Years by Entry Count" !stats.years;
245245+ show_top_list "File Types" !stats.file_types;
246246+ show_top_list "Hash Algorithms" !stats.hash_algorithms;
247247+248248+ Printf.printf "✨ Analysis complete!\n";
249249+ 0
250250+251251+(* Command line interface *)
252252+let tessera_cmd =
253253+ let doc = "Load and analyze tessera-manifests registry files" in
254254+ let info = Cmd.info "tessera-loader" ~doc in
255255+ Cmd.v info Term.(const process_registries $ registry_dir $ pattern $ verbose $ limit)
256256+257257+let () =
258258+ match Cmd.eval_value tessera_cmd with
259259+ | Ok (`Ok exit_code) -> exit exit_code
260260+ | Ok (`Version | `Help) -> exit 0
261261+ | Error _ -> exit 1
+605
toru/bin/toru_cache.ml
···11+(** Toru Cache Management CLI Tool *)
22+33+open Cmdliner
44+55+module File_info = struct
66+ type t = {
77+ name : string;
88+ size : int64;
99+ mtime : Ptime.t;
1010+ path : string;
1111+ }
1212+1313+ let create ~name ~size ~mtime ~path =
1414+ { name; size; mtime; path }
1515+1616+ let compare_by_size a b = Int64.compare b.size a.size (* Largest first *)
1717+ let compare_by_age a b = Ptime.compare a.mtime b.mtime (* Oldest first *)
1818+ let compare_by_name a b = String.compare a.name b.name
1919+end
2020+2121+module Utils = struct
2222+ let human_readable_bytes bytes =
2323+ let units = [|"B"; "KB"; "MB"; "GB"; "TB"|] in
2424+ let rec loop bytes unit_index =
2525+ if bytes < 1024.0 || unit_index >= Array.length units - 1 then
2626+ Printf.sprintf "%.1f %s" bytes units.(unit_index)
2727+ else
2828+ loop (bytes /. 1024.0) (unit_index + 1)
2929+ in
3030+ loop (Int64.to_float bytes) 0
3131+3232+ let format_time_ago ptime =
3333+ let now = Ptime_clock.now () in
3434+ let span = Ptime.diff now ptime in
3535+ let days = Ptime.Span.to_d_ps span |> fst in
3636+ if days = 0 then "today"
3737+ else if days = 1 then "1 day ago"
3838+ else Printf.sprintf "%d days ago" days
3939+4040+4141+ let get_file_info cache_path filename =
4242+ let full_path = Eio.Path.(cache_path / filename) in
4343+ try
4444+ let stat = Eio.Path.stat ~follow:false full_path in
4545+ let mtime = Ptime.of_float_s stat.mtime |> Option.value ~default:(Ptime_clock.now ())
4646+ in
4747+ Some (File_info.create
4848+ ~name:filename
4949+ ~size:(Optint.Int63.to_int64 stat.size)
5050+ ~mtime
5151+ ~path:(Eio.Path.native_exn full_path))
5252+ with
5353+ | _ -> None
5454+5555+ let collect_file_info cache =
5656+ let cache_path = match Toru.Cache.version cache with
5757+ | None -> Toru.Cache.base_path cache
5858+ | Some v -> Eio.Path.(Toru.Cache.base_path cache / v)
5959+ in
6060+ let filenames = Toru.Cache.list_files cache in
6161+ List.filter_map (get_file_info cache_path) filenames
6262+6363+ let print_header title =
6464+ Fmt.(pf stdout "%a@." (styled `Bold (styled `Cyan string)) title);
6565+ Fmt.(pf stdout "%a@." (styled `Cyan string) (String.make (String.length title) '='))
6666+6767+ let print_success msg =
6868+ Fmt.(pf stdout "%a%s@." (styled `Green string) "[OK] " msg)
6969+7070+ let print_warning msg =
7171+ Fmt.(pf stdout "%a%s@." (styled `Yellow string) "[WARN] " msg)
7272+7373+ let print_error msg =
7474+ Fmt.(pf stdout "%a%s@." (styled `Red string) "[ERROR] " msg)
7575+end
7676+7777+(* Global options *)
7878+type global_opts = {
7979+ cache_dir : string option;
8080+ app_name : string;
8181+ version : string option;
8282+}
8383+8484+let global_opts_term =
8585+ let cache_dir =
8686+ let doc = "Override default cache location" in
8787+ Arg.(value & opt (some string) None & info ["cache-dir"; "c"] ~docv:"DIR" ~doc)
8888+ in
8989+ let app_name =
9090+ let doc = "Override application name (default: toru)" in
9191+ Arg.(value & opt string "toru" & info ["app-name"] ~docv:"NAME" ~doc)
9292+ in
9393+ let version =
9494+ let doc = "Target specific cache version" in
9595+ Arg.(value & opt (some string) None & info ["cache-version"; "v"] ~docv:"VER" ~doc)
9696+ in
9797+ Term.(const (fun cache_dir app_name version -> { cache_dir; app_name; version })
9898+ $ cache_dir $ app_name $ version)
9999+100100+(* Command implementations *)
101101+let info_cmd global_opts =
102102+ (Eio_main.run @@ fun env ->
103103+ Eio.Switch.run @@ fun sw ->
104104+ let cache = match global_opts.cache_dir with
105105+ | Some path -> Toru.Cache.create ~sw ~env ?version:global_opts.version path
106106+ | None -> Toru.Cache.default ~sw ~env ~app_name:global_opts.app_name ()
107107+ in
108108+109109+ let cache = match global_opts.version with
110110+ | Some v ->
111111+ (* Create new cache with version override *)
112112+ (match global_opts.cache_dir with
113113+ | Some path -> Toru.Cache.create ~sw ~env ~version:v path
114114+ | None ->
115115+ let base_path = Toru.Cache.default_cache_path ~app_name:global_opts.app_name () in
116116+ Toru.Cache.create ~sw ~env ~version:v base_path)
117117+ | None -> cache
118118+ in
119119+120120+ Utils.print_header "Toru Cache Information";
121121+122122+ let cache_path = Toru.Cache.base_path cache in
123123+ let version_path = match Toru.Cache.version cache with
124124+ | Some v -> Eio.Path.(cache_path / v)
125125+ | None -> cache_path
126126+ in
127127+128128+ Printf.printf "Location: %s\n" (Eio.Path.native_exn version_path);
129129+130130+ (match Toru.Cache.version cache with
131131+ | Some v -> Printf.printf "Version: %s\n" v
132132+ | None -> Printf.printf "Version: none\n");
133133+134134+ let total_size = Toru.Cache.size_bytes cache in
135135+ Printf.printf "Total Size: %s (%Ld bytes)\n"
136136+ (Utils.human_readable_bytes total_size) total_size;
137137+138138+ let file_count = List.length (Toru.Cache.list_files cache) in
139139+ Printf.printf "File Count: %d files\n" file_count;
140140+141141+ if file_count > 0 then (
142142+ let file_infos = Utils.collect_file_info cache in
143143+ match file_infos with
144144+ | [] -> Printf.printf "Age Range: No files found\n"
145145+ | files ->
146146+ let sorted_by_age = List.sort File_info.compare_by_age files in
147147+ let oldest = List.hd sorted_by_age in
148148+ let newest = List.hd (List.rev sorted_by_age) in
149149+ Printf.printf "Age Range: %s to %s\n"
150150+ (Utils.format_time_ago oldest.mtime)
151151+ (Utils.format_time_ago newest.mtime)
152152+ );
153153+154154+ Printf.printf "Free Space: Unable to determine\n");
155155+ 0
156156+157157+let list_cmd global_opts sort_by format limit =
158158+ (Eio_main.run @@ fun env ->
159159+ Eio.Switch.run @@ fun sw ->
160160+ let cache = match global_opts.cache_dir with
161161+ | Some path -> Toru.Cache.create ~sw ~env ?version:global_opts.version path
162162+ | None -> Toru.Cache.default ~sw ~env ~app_name:global_opts.app_name ()
163163+ in
164164+165165+ let cache = match global_opts.version with
166166+ | Some v ->
167167+ (match global_opts.cache_dir with
168168+ | Some path -> Toru.Cache.create ~sw ~env ~version:v path
169169+ | None ->
170170+ let base_path = Toru.Cache.default_cache_path ~app_name:global_opts.app_name () in
171171+ Toru.Cache.create ~sw ~env ~version:v base_path)
172172+ | None -> cache
173173+ in
174174+175175+ let file_infos = Utils.collect_file_info cache in
176176+177177+ let sorted_files = match sort_by with
178178+ | `Size -> List.sort File_info.compare_by_size file_infos
179179+ | `Age -> List.sort File_info.compare_by_age file_infos
180180+ | `Name -> List.sort File_info.compare_by_name file_infos
181181+ in
182182+183183+ let limited_files = match limit with
184184+ | Some n ->
185185+ let rec take n lst acc =
186186+ match n, lst with
187187+ | 0, _ | _, [] -> List.rev acc
188188+ | n, x :: xs -> take (n - 1) xs (x :: acc)
189189+ in
190190+ take n sorted_files []
191191+ | None -> sorted_files
192192+ in
193193+194194+ match format with
195195+ | `Table ->
196196+ if limited_files = [] then
197197+ Printf.printf "No files found in cache.\n"
198198+ else (
199199+ Printf.printf "%-50s %12s %12s %s\n" "Filename" "Size" "Age" "Hash (SHA256)";
200200+ Fmt.(pf stdout "%a@." (styled `Cyan string) (String.make 90 '-'));
201201+202202+ List.iter (fun file ->
203203+ let truncated_name =
204204+ if String.length file.File_info.name > 47 then
205205+ String.sub file.File_info.name 0 44 ^ "..."
206206+ else file.File_info.name
207207+ in
208208+ Printf.printf "%-50s %12s %12s %s\n"
209209+ truncated_name
210210+ (Utils.human_readable_bytes file.File_info.size)
211211+ (Utils.format_time_ago file.File_info.mtime)
212212+ "no hash" (* TODO: Add hash computation if needed *)
213213+ ) limited_files
214214+ )
215215+ | `Json ->
216216+ let json_files = List.map (fun file ->
217217+ `Assoc [
218218+ ("filename", `String file.File_info.name);
219219+ ("size", `Int (Int64.to_int file.File_info.size));
220220+ ("path", `String file.File_info.path);
221221+ ("mtime", `String (Ptime.to_rfc3339 file.File_info.mtime));
222222+ ("age_days", `Int (
223223+ let span = Ptime.diff (Ptime_clock.now ()) file.File_info.mtime in
224224+ Ptime.Span.to_d_ps span |> fst
225225+ ));
226226+ ]
227227+ ) limited_files in
228228+ let json_output = `List json_files in
229229+ Printf.printf "%s\n" (Yojson.Safe.pretty_to_string json_output));
230230+ 0
231231+232232+let size_cmd global_opts breakdown human_readable =
233233+ (Eio_main.run @@ fun env ->
234234+ Eio.Switch.run @@ fun sw ->
235235+ let cache = match global_opts.cache_dir with
236236+ | Some path -> Toru.Cache.create ~sw ~env ?version:global_opts.version path
237237+ | None -> Toru.Cache.default ~sw ~env ~app_name:global_opts.app_name ()
238238+ in
239239+240240+ let cache = match global_opts.version with
241241+ | Some v ->
242242+ (match global_opts.cache_dir with
243243+ | Some path -> Toru.Cache.create ~sw ~env ~version:v path
244244+ | None ->
245245+ let base_path = Toru.Cache.default_cache_path ~app_name:global_opts.app_name () in
246246+ Toru.Cache.create ~sw ~env ~version:v base_path)
247247+ | None -> cache
248248+ in
249249+250250+ let total_size = Toru.Cache.size_bytes cache in
251251+252252+ if human_readable then
253253+ Printf.printf "Total Size: %s\n" (Utils.human_readable_bytes total_size)
254254+ else
255255+ Printf.printf "Total Size: %Ld bytes\n" total_size;
256256+257257+ if breakdown then (
258258+ let file_infos = Utils.collect_file_info cache in
259259+260260+ (* Breakdown by file extension *)
261261+ let ext_map = Hashtbl.create 16 in
262262+ List.iter (fun file ->
263263+ let ext =
264264+ try
265265+ let dot_idx = String.rindex file.File_info.name '.' in
266266+ String.sub file.File_info.name dot_idx (String.length file.File_info.name - dot_idx)
267267+ with
268268+ | Not_found -> "no extension"
269269+ in
270270+ let current = Hashtbl.find_opt ext_map ext |> Option.value ~default:0L in
271271+ Hashtbl.replace ext_map ext (Int64.add current file.File_info.size)
272272+ ) file_infos;
273273+274274+ Printf.printf "\nBreakdown by file type:\n";
275275+ Fmt.(pf stdout "%a@." (styled `Cyan string) (String.make 40 '-'));
276276+277277+ let ext_list = Hashtbl.fold (fun ext size acc -> (ext, size) :: acc) ext_map [] in
278278+ let sorted_ext = List.sort (fun (_, a) (_, b) -> Int64.compare b a) ext_list in
279279+280280+ List.iter (fun (ext, size) ->
281281+ let percentage = if total_size > 0L then
282282+ Int64.to_float size /. Int64.to_float total_size *. 100.0
283283+ else 0.0 in
284284+ if human_readable then
285285+ Printf.printf "%-20s %12s (%5.1f%%)\n" ext (Utils.human_readable_bytes size) percentage
286286+ else
287287+ Printf.printf "%-20s %12Ld (%5.1f%%)\n" ext size percentage
288288+ ) sorted_ext;
289289+290290+ (* Breakdown by age *)
291291+ let now = Ptime_clock.now () in
292292+ let age_buckets = [
293293+ ("< 1 day", 1);
294294+ ("1-7 days", 7);
295295+ ("1-4 weeks", 28);
296296+ ("1-12 months", 365);
297297+ ("> 1 year", max_int);
298298+ ] in
299299+300300+ Printf.printf "\nBreakdown by age:\n";
301301+ Fmt.(pf stdout "%a@." (styled `Cyan string) (String.make 40 '-'));
302302+303303+ List.iter (fun (label, max_days) ->
304304+ let bucket_size = List.fold_left (fun acc file ->
305305+ let span = Ptime.diff now file.File_info.mtime in
306306+ let days = Ptime.Span.to_d_ps span |> fst in
307307+ if days <= max_days then Int64.add acc file.File_info.size else acc
308308+ ) 0L file_infos in
309309+310310+ let percentage = if total_size > 0L then
311311+ Int64.to_float bucket_size /. Int64.to_float total_size *. 100.0
312312+ else 0.0 in
313313+314314+ if human_readable then
315315+ Printf.printf "%-20s %12s (%5.1f%%)\n" label (Utils.human_readable_bytes bucket_size) percentage
316316+ else
317317+ Printf.printf "%-20s %12Ld (%5.1f%%)\n" label bucket_size percentage
318318+ ) age_buckets
319319+ );
320320+ ());
321321+ 0
322322+323323+let clean_cmd global_opts max_size max_age dry_run =
324324+ (Eio_main.run @@ fun env ->
325325+ Eio.Switch.run @@ fun sw ->
326326+ let cache = match global_opts.cache_dir with
327327+ | Some path -> Toru.Cache.create ~sw ~env ?version:global_opts.version path
328328+ | None -> Toru.Cache.default ~sw ~env ~app_name:global_opts.app_name ()
329329+ in
330330+331331+ let cache = match global_opts.version with
332332+ | Some v ->
333333+ (match global_opts.cache_dir with
334334+ | Some path -> Toru.Cache.create ~sw ~env ~version:v path
335335+ | None ->
336336+ let base_path = Toru.Cache.default_cache_path ~app_name:global_opts.app_name () in
337337+ Toru.Cache.create ~sw ~env ~version:v base_path)
338338+ | None -> cache
339339+ in
340340+341341+ let file_infos = Utils.collect_file_info cache in
342342+ let now = Ptime_clock.now () in
343343+344344+ let files_to_remove = List.filter (fun file ->
345345+ (* Check age constraint *)
346346+ let age_match = match max_age with
347347+ | Some max_days ->
348348+ let span = Ptime.diff now file.File_info.mtime in
349349+ let days = Ptime.Span.to_d_ps span |> fst in
350350+ days > max_days
351351+ | None -> true
352352+ in
353353+ age_match
354354+ ) file_infos in
355355+356356+ (* If max_size is specified, sort by age and remove oldest until under limit *)
357357+ let files_to_remove = match max_size with
358358+ | Some max_bytes ->
359359+ let current_size = List.fold_left (fun acc file -> Int64.add acc file.File_info.size) 0L file_infos in
360360+ if current_size <= max_bytes then
361361+ []
362362+ else
363363+ let sorted_by_age = List.sort File_info.compare_by_age file_infos in
364364+ let rec select_for_removal remaining_files target_reduction acc_size acc_files =
365365+ match remaining_files with
366366+ | [] -> acc_files
367367+ | file :: rest ->
368368+ if acc_size >= target_reduction then acc_files
369369+ else select_for_removal rest target_reduction
370370+ (Int64.add acc_size file.File_info.size) (file :: acc_files)
371371+ in
372372+ select_for_removal sorted_by_age (Int64.sub current_size max_bytes) 0L []
373373+ | None -> files_to_remove
374374+ in
375375+376376+ if files_to_remove = [] then (
377377+ Utils.print_success "No files need to be removed.";
378378+ ()
379379+ ) else (
380380+ let total_size_to_remove = List.fold_left (fun acc file -> Int64.add acc file.File_info.size) 0L files_to_remove in
381381+ let file_count = List.length files_to_remove in
382382+383383+ if dry_run then (
384384+ Utils.print_header "Dry Run: Cache Cleanup";
385385+ Printf.printf "Would remove %d files (%s)\n\n"
386386+ file_count (Utils.human_readable_bytes total_size_to_remove);
387387+388388+ Printf.printf "Files to be removed:\n";
389389+ List.iter (fun file ->
390390+ Printf.printf "- %s (%s, %s)\n"
391391+ file.File_info.name
392392+ (Utils.human_readable_bytes file.File_info.size)
393393+ (Utils.format_time_ago file.File_info.mtime)
394394+ ) files_to_remove;
395395+396396+ print_endline "";
397397+ Utils.print_warning "Use without --dry-run to proceed with cleanup."
398398+ ) else (
399399+ Utils.print_header "Cache Cleanup";
400400+ Printf.printf "Removing %d files (%s)...\n\n"
401401+ file_count (Utils.human_readable_bytes total_size_to_remove);
402402+403403+ let removed_count = ref 0 in
404404+ List.iter (fun file ->
405405+ try
406406+ Unix.unlink file.File_info.path;
407407+ incr removed_count;
408408+ Printf.printf "Removed: %s\n" file.File_info.name
409409+ with
410410+ | exn ->
411411+ Utils.print_error (Printf.sprintf "Failed to remove %s: %s"
412412+ file.File_info.name (Printexc.to_string exn))
413413+ ) files_to_remove;
414414+415415+ Printf.printf "\nRemoved %d files successfully.\n" !removed_count
416416+ );
417417+ ()
418418+ ));
419419+ 0
420420+421421+let vacuum_cmd global_opts dry_run =
422422+ (Eio_main.run @@ fun env ->
423423+ Eio.Switch.run @@ fun sw ->
424424+ let cache = match global_opts.cache_dir with
425425+ | Some path -> Toru.Cache.create ~sw ~env ?version:global_opts.version path
426426+ | None -> Toru.Cache.default ~sw ~env ~app_name:global_opts.app_name ()
427427+ in
428428+429429+ let cache = match global_opts.version with
430430+ | Some v ->
431431+ (match global_opts.cache_dir with
432432+ | Some path -> Toru.Cache.create ~sw ~env ~version:v path
433433+ | None ->
434434+ let base_path = Toru.Cache.default_cache_path ~app_name:global_opts.app_name () in
435435+ Toru.Cache.create ~sw ~env ~version:v base_path)
436436+ | None -> cache
437437+ in
438438+439439+ let cache_path = match Toru.Cache.version cache with
440440+ | None -> Toru.Cache.base_path cache
441441+ | Some v -> Eio.Path.(Toru.Cache.base_path cache / v)
442442+ in
443443+444444+ let rec find_empty_dirs path =
445445+ try
446446+ let entries = Eio.Path.read_dir path in
447447+ if entries = [] then
448448+ [path]
449449+ else
450450+ List.fold_left (fun acc entry ->
451451+ let entry_path = Eio.Path.(path / entry) in
452452+ let stat = Eio.Path.stat ~follow:false entry_path in
453453+ match stat.kind with
454454+ | `Directory -> (find_empty_dirs entry_path) @ acc
455455+ | _ -> acc
456456+ ) [] entries
457457+ with
458458+ | _ -> []
459459+ in
460460+461461+ let empty_dirs = find_empty_dirs cache_path in
462462+ let cache_path_str = Eio.Path.native_exn cache_path in
463463+ let empty_dirs = List.filter (fun dir -> not (String.equal (Eio.Path.native_exn dir) cache_path_str)) empty_dirs in
464464+465465+ if empty_dirs = [] then (
466466+ Utils.print_success "No empty directories found.";
467467+ ()
468468+ ) else (
469469+ if dry_run then (
470470+ Utils.print_header "Dry Run: Vacuum Cache";
471471+ Printf.printf "Would remove %d empty directories:\n\n" (List.length empty_dirs);
472472+ List.iter (fun dir ->
473473+ Printf.printf "- %s\n" (Eio.Path.native_exn dir)
474474+ ) empty_dirs;
475475+ print_endline "";
476476+ Utils.print_warning "Use without --dry-run to proceed with vacuum."
477477+ ) else (
478478+ Utils.print_header "Vacuum Cache";
479479+ Printf.printf "Removing %d empty directories...\n\n" (List.length empty_dirs);
480480+481481+ let removed_count = ref 0 in
482482+ List.iter (fun dir ->
483483+ try
484484+ Eio.Path.rmdir dir;
485485+ incr removed_count;
486486+ Printf.printf "Removed: %s\n" (Eio.Path.native_exn dir)
487487+ with
488488+ | exn ->
489489+ Utils.print_error (Printf.sprintf "Failed to remove %s: %s"
490490+ (Eio.Path.native_exn dir) (Printexc.to_string exn))
491491+ ) empty_dirs;
492492+493493+ Printf.printf "\nRemoved %d directories successfully.\n" !removed_count
494494+ );
495495+ ()
496496+ ));
497497+ 0
498498+499499+(* Command definitions *)
500500+let info_cmd_def =
501501+ let doc = "Show cache statistics and location" in
502502+ Cmd.v (Cmd.info "info" ~doc) Term.(const info_cmd $ global_opts_term)
503503+504504+let list_cmd_def =
505505+ let sort_by =
506506+ let doc = "Sort files by size, age, or name" in
507507+ Arg.(value & opt (enum [("size", `Size); ("age", `Age); ("name", `Name)]) `Name &
508508+ info ["sort"] ~docv:"FIELD" ~doc)
509509+ in
510510+ let format =
511511+ let doc = "Output format: table or json" in
512512+ Arg.(value & opt (enum [("table", `Table); ("json", `Json)]) `Table &
513513+ info ["format"; "f"] ~docv:"FORMAT" ~doc)
514514+ in
515515+ let limit =
516516+ let doc = "Limit number of files shown" in
517517+ Arg.(value & opt (some int) None & info ["limit"; "n"] ~docv:"N" ~doc)
518518+ in
519519+ let doc = "List cached files with details" in
520520+ Cmd.v (Cmd.info "list" ~doc) Term.(const list_cmd $ global_opts_term $ sort_by $ format $ limit)
521521+522522+let size_cmd_def =
523523+ let breakdown =
524524+ let doc = "Show size breakdown by file type and age" in
525525+ Arg.(value & flag & info ["breakdown"; "b"] ~doc)
526526+ in
527527+ let human_readable =
528528+ let doc = "Display sizes in human readable format" in
529529+ Arg.(value & flag & info ["human-readable"; "h"] ~doc)
530530+ in
531531+ let doc = "Show cache size information" in
532532+ Cmd.v (Cmd.info "size" ~doc) Term.(const size_cmd $ global_opts_term $ breakdown $ human_readable)
533533+534534+let clean_cmd_def =
535535+ let max_size =
536536+ let doc = "Remove files to get cache under this size (e.g., 1GB, 500MB)" in
537537+ let parse_size s =
538538+ let s = String.uppercase_ascii s in
539539+ let len = String.length s in
540540+ if len < 2 then `Error "Invalid size format"
541541+ else
542542+ let (num_str, unit) =
543543+ if String.sub s (len-2) 2 = "GB" then
544544+ (String.sub s 0 (len-2), Int64.(mul 1024L (mul 1024L 1024L)))
545545+ else if String.sub s (len-2) 2 = "MB" then
546546+ (String.sub s 0 (len-2), Int64.(mul 1024L 1024L))
547547+ else if String.sub s (len-2) 2 = "KB" then
548548+ (String.sub s 0 (len-2), 1024L)
549549+ else if String.sub s (len-1) 1 = "B" then
550550+ (String.sub s 0 (len-1), 1L)
551551+ else
552552+ (s, 1L)
553553+ in
554554+ try
555555+ let num = Float.of_string num_str in
556556+ `Ok (Int64.of_float (num *. Int64.to_float unit))
557557+ with
558558+ | _ -> `Error "Invalid number in size"
559559+ in
560560+ Arg.(value & opt (some (parse_size, fun fmt size ->
561561+ Format.fprintf fmt "%Ld" size)) None & info ["max-size"] ~docv:"SIZE" ~doc)
562562+ in
563563+ let max_age =
564564+ let doc = "Remove files older than this many days" in
565565+ Arg.(value & opt (some int) None & info ["max-age"] ~docv:"DAYS" ~doc)
566566+ in
567567+ let dry_run =
568568+ let doc = "Show what would be removed without actually removing" in
569569+ Arg.(value & flag & info ["dry-run"; "n"] ~doc)
570570+ in
571571+ let doc = "Clean cache with various options" in
572572+ Cmd.v (Cmd.info "clean" ~doc) Term.(const clean_cmd $ global_opts_term $ max_size $ max_age $ dry_run)
573573+574574+let vacuum_cmd_def =
575575+ let dry_run =
576576+ let doc = "Show what would be removed without actually removing" in
577577+ Arg.(value & flag & info ["dry-run"; "n"] ~doc)
578578+ in
579579+ let doc = "Remove empty directories and broken links" in
580580+ Cmd.v (Cmd.info "vacuum" ~doc) Term.(const vacuum_cmd $ global_opts_term $ dry_run)
581581+582582+let main_cmd =
583583+ let doc = "Toru cache management tool" in
584584+ let sdocs = Manpage.s_common_options in
585585+ let man = [
586586+ `S Manpage.s_description;
587587+ `P "$(tname) manages the Toru data cache, providing commands to inspect, clean, and maintain cached data files.";
588588+ `P "The cache follows XDG Base Directory specifications on Unix systems and uses appropriate locations on other platforms.";
589589+ `S Manpage.s_commands;
590590+ `P "Use $(b,$(tname) COMMAND --help) for command-specific help.";
591591+ `S "ENVIRONMENT VARIABLES";
592592+ `P "$(b,XDG_CACHE_HOME) - Override default cache location on Unix systems";
593593+ `P "$(b,TORU_CACHE_DIR) - Override cache location (takes precedence)";
594594+ `S "EXAMPLES";
595595+ `P "$(b,toru-cache info) - Show cache information";
596596+ `P "$(b,toru-cache list --sort=size --limit=10) - Show 10 largest files";
597597+ `P "$(b,toru-cache clean --max-size=1GB --dry-run) - Preview cleanup to 1GB limit";
598598+ `P "$(b,toru-cache size --breakdown -h) - Show human-readable size breakdown";
599599+ ] in
600600+ let default = Term.(const 0) in
601601+ Cmd.group ~default (Cmd.info "toru-cache" ~version:"0.1.0" ~doc ~sdocs ~man)
602602+ [info_cmd_def; list_cmd_def; size_cmd_def; clean_cmd_def; vacuum_cmd_def]
603603+604604+let () =
605605+ exit (Cmd.eval' main_cmd)
+220
toru/bin/toru_make_registry.ml
···11+(** CLI tool for generating Pooch-compatible registry files from directories *)
22+33+open Cmdliner
44+open Eio.Std
55+66+(* CLI argument types *)
77+type output_format = Pooch | JSON
88+type path_format = Relative | Absolute
99+1010+(* CLI arguments *)
1111+let directory_arg =
1212+ let doc = "Directory to scan for files" in
1313+ Arg.(required & pos 0 (some dir) None & info [] ~docv:"DIRECTORY" ~doc)
1414+1515+let output_arg =
1616+ let doc = "Output file for registry (default: stdout)" in
1717+ Arg.(value & pos 1 (some string) None & info [] ~docv:"OUTPUT" ~doc)
1818+1919+let recursive_arg =
2020+ let doc = "Scan directories recursively" in
2121+ Arg.(value & flag & info ["r"; "recursive"] ~doc)
2222+2323+let follow_symlinks_arg =
2424+ let doc = "Follow symbolic links during traversal" in
2525+ Arg.(value & flag & info ["L"; "follow-symlinks"] ~doc)
2626+2727+let algorithm_arg =
2828+ let algorithms = [("sha256", Toru.Hash.SHA256); ("sha1", Toru.Hash.SHA1); ("md5", Toru.Hash.MD5)] in
2929+ let doc = "Hash algorithm to use: sha256, sha1, or md5" in
3030+ Arg.(value & opt (enum algorithms) Toru.Hash.SHA256 & info ["a"; "algorithm"] ~docv:"ALGO" ~doc)
3131+3232+let exclude_arg =
3333+ let doc = "Exclude files matching glob pattern (can be repeated)" in
3434+ Arg.(value & opt_all string [] & info ["e"; "exclude"] ~docv:"PATTERN" ~doc)
3535+3636+let include_hidden_arg =
3737+ let doc = "Include hidden files (starting with .)" in
3838+ Arg.(value & flag & info ["H"; "include-hidden"] ~doc)
3939+4040+let update_arg =
4141+ let doc = "Update existing registry file instead of creating new one" in
4242+ Arg.(value & opt (some file) None & info ["u"; "update"] ~docv:"FILE" ~doc)
4343+4444+let progress_arg =
4545+ let doc = "Show progress during scanning" in
4646+ Arg.(value & flag & info ["p"; "progress"] ~doc)
4747+4848+let format_arg =
4949+ let formats = [("pooch", Pooch); ("json", JSON)] in
5050+ let doc = "Output format: pooch or json" in
5151+ Arg.(value & opt (enum formats) Pooch & info ["f"; "format"] ~docv:"FORMAT" ~doc)
5252+5353+let path_format_arg =
5454+ let formats = [("relative", Relative); ("absolute", Absolute)] in
5555+ let doc = "Path format in output: relative or absolute" in
5656+ Arg.(value & opt (enum formats) Relative & info ["path-format"] ~docv:"FORMAT" ~doc)
5757+5858+(* Progress reporting *)
5959+let create_progress_reporter show_progress =
6060+ if show_progress then (
6161+ let last_update = ref (Unix.gettimeofday ()) in
6262+ fun filename current total ->
6363+ let now = Unix.gettimeofday () in
6464+ if now -. !last_update > 0.1 || current = total then (
6565+ last_update := now;
6666+ let percentage = if total > 0 then (current * 100) / total else 0 in
6767+ Printf.eprintf "\r\027[K[%3d%%] %s (%d/%d)" percentage filename current total;
6868+ if current = total then Printf.eprintf "\n";
6969+ flush stderr
7070+ )
7171+ ) else (
7272+ fun _ _ _ -> ()
7373+ )
7474+7575+(* Output functions *)
7676+let output_registry format registry output_file =
7777+ let content = match format with
7878+ | Pooch ->
7979+ let header = Printf.sprintf "# Generated by toru-make-registry on %s\n# Algorithm: %s\n"
8080+ (Ptime.to_rfc3339 (Ptime_clock.now ()))
8181+ (Toru.Hash.algorithm_to_string Toru.Hash.SHA256)
8282+ in
8383+ header ^ Toru.Registry.to_string registry
8484+ | JSON ->
8585+ (* For JSON, we need enhanced entries *)
8686+ failwith "JSON output requires enhanced entries (not yet implemented in this path)"
8787+ in
8888+ match output_file with
8989+ | Some filename ->
9090+ let oc = open_out filename in
9191+ output_string oc content;
9292+ close_out oc;
9393+ Printf.printf "Registry written to %s\n" filename
9494+ | None ->
9595+ print_string content
9696+9797+let output_enhanced_entries format enhanced_entries algorithm output_file =
9898+ let content = match format with
9999+ | Pooch ->
100100+ let header = Printf.sprintf "# Generated by toru-make-registry on %s\n# Algorithm: %s\n"
101101+ (Ptime.to_rfc3339 (Ptime_clock.now ()))
102102+ (Toru.Hash.algorithm_to_string algorithm)
103103+ in
104104+ let entries_str = String.concat "\n" (List.map (fun enhanced_entry ->
105105+ let entry = Toru.Make_registry.get_entry enhanced_entry in
106106+ let filename = Toru.Registry.filename entry in
107107+ let hash = Toru.Registry.hash entry in
108108+ Printf.sprintf "%s %s" filename (Toru.Hash.value hash)
109109+ ) enhanced_entries) in
110110+ header ^ entries_str ^ "\n"
111111+ | JSON ->
112112+ let json = Toru.Make_registry.enhanced_entries_to_json
113113+ ~algorithm ~generated:(Ptime_clock.now ()) enhanced_entries in
114114+ Yojson.Safe.pretty_to_string json
115115+ in
116116+ match output_file with
117117+ | Some filename ->
118118+ let oc = open_out filename in
119119+ output_string oc content;
120120+ close_out oc;
121121+ Printf.printf "Registry written to %s\n" filename
122122+ | None ->
123123+ print_string content
124124+125125+(* Main function *)
126126+let make_registry_main directory output recursive follow_symlinks algorithm
127127+ excludes include_hidden update_file show_progress format path_format =
128128+129129+ Eio_main.run @@ fun env ->
130130+ Eio.Switch.run @@ fun sw ->
131131+ try
132132+ let dir_path = env#fs |> Eio.Path.(fun fs -> fs / directory) in
133133+134134+ let options = {
135135+ Toru.Make_registry.recursive;
136136+ follow_symlinks;
137137+ hash_algorithm = algorithm;
138138+ exclude_patterns = excludes;
139139+ include_hidden;
140140+ } in
141141+142142+ let progress_fn = create_progress_reporter show_progress in
143143+144144+ let result = match update_file with
145145+ | Some update_filename ->
146146+ (* Update existing registry *)
147147+ let existing_registry =
148148+ let update_path = env#fs |> Eio.Path.(fun fs -> fs / update_filename) in
149149+ Toru.Registry.load update_path
150150+ in
151151+ if show_progress then Printf.eprintf "Updating registry from %s...\n" update_filename;
152152+ let updated_registry = Toru.Make_registry.update_registry ~sw ~env ~options
153153+ existing_registry dir_path in
154154+ output_registry format updated_registry output;
155155+ Ok ()
156156+157157+ | None ->
158158+ (* Create new registry *)
159159+ if show_progress then Printf.eprintf "Scanning directory %s...\n" directory;
160160+ let enhanced_entries = Toru.Make_registry.scan_directory_enhanced ~sw ~env ~options dir_path in
161161+162162+ (* Apply path format conversion if needed *)
163163+ let processed_entries = match path_format with
164164+ | Relative -> enhanced_entries
165165+ | Absolute ->
166166+ List.map (fun enhanced_entry ->
167167+ let metadata = Toru.Make_registry.get_metadata enhanced_entry in
168168+ let entry = Toru.Make_registry.get_entry enhanced_entry in
169169+ let abs_filename = metadata.absolute_path in
170170+ let abs_entry = Toru.Registry.create_entry
171171+ ~filename:abs_filename
172172+ ~hash:(Toru.Registry.hash entry) () in
173173+ Toru.Make_registry.update_entry enhanced_entry abs_entry
174174+ ) enhanced_entries
175175+ in
176176+177177+ output_enhanced_entries format processed_entries algorithm output;
178178+ Ok ()
179179+ in
180180+181181+ match result with
182182+ | Ok () -> 0
183183+ | Error msg ->
184184+ Printf.eprintf "Error: %s\n" msg;
185185+ 1
186186+187187+ with
188188+ | exn ->
189189+ Printf.eprintf "Error: %s\n" (Printexc.to_string exn);
190190+ 1
191191+192192+(* Command definition *)
193193+let cmd =
194194+ let doc = "Generate Pooch-compatible registry files from directories" in
195195+ let man = [
196196+ `S Manpage.s_description;
197197+ `P "$(tname) scans directories and generates registry files compatible with Python Pooch library.";
198198+ `P "The registry format is: 'filename hash' per line, with optional comments starting with #.";
199199+ `S Manpage.s_examples;
200200+ `P "Generate registry for data directory:";
201201+ `P "$(tname) data/ registry.txt";
202202+ `P "";
203203+ `P "Recursive scan with SHA256 and exclude patterns:";
204204+ `P "$(tname) -r -a sha256 -e '*.tmp' -e '*.log' ./dataset/";
205205+ `P "";
206206+ `P "Update existing registry with progress:";
207207+ `P "$(tname) --update existing.txt --progress data/";
208208+ `P "";
209209+ `P "Generate JSON format with absolute paths:";
210210+ `P "$(tname) --format json --path-format absolute data/";
211211+ ] in
212212+213213+ let info = Cmd.info "toru-make-registry" ~version:"1.0" ~doc ~man in
214214+215215+ Cmd.v info Term.(const make_registry_main
216216+ $ directory_arg $ output_arg $ recursive_arg $ follow_symlinks_arg
217217+ $ algorithm_arg $ exclude_arg $ include_hidden_arg $ update_arg
218218+ $ progress_arg $ format_arg $ path_format_arg)
219219+220220+let () = exit (Cmd.eval cmd)
+86
toru/bin/toru_make_registry_simple.ml
···11+(** Simple CLI tool for generating registry files - minimal version *)
22+33+open Cmdliner
44+55+(* CLI arguments *)
66+let directory_arg =
77+ let doc = "Directory to scan for files" in
88+ Arg.(required & pos 0 (some dir) None & info [] ~docv:"DIRECTORY" ~doc)
99+1010+let output_arg =
1111+ let doc = "Output file for registry (default: stdout)" in
1212+ Arg.(value & pos 1 (some string) None & info [] ~docv:"OUTPUT" ~doc)
1313+1414+let recursive_arg =
1515+ let doc = "Scan directories recursively" in
1616+ Arg.(value & flag & info ["r"; "recursive"] ~doc)
1717+1818+let algorithm_arg =
1919+ let algorithms = [("sha256", Toru.Hash.SHA256); ("sha1", Toru.Hash.SHA1); ("md5", Toru.Hash.MD5)] in
2020+ let doc = "Hash algorithm to use: sha256, sha1, or md5" in
2121+ Arg.(value & opt (enum algorithms) Toru.Hash.SHA256 & info ["a"; "algorithm"] ~docv:"ALGO" ~doc)
2222+2323+let progress_arg =
2424+ let doc = "Show progress during scanning" in
2525+ Arg.(value & flag & info ["p"; "progress"] ~doc)
2626+2727+(* Main function *)
2828+let make_registry_main directory output recursive algorithm show_progress () =
2929+ Eio_main.run @@ fun env ->
3030+ Eio.Switch.run @@ fun sw ->
3131+ try
3232+ let dir_path = env#fs |> Eio.Path.(fun fs -> fs / directory) in
3333+3434+ let options = {
3535+ Toru.Make_registry.recursive;
3636+ follow_symlinks = false;
3737+ hash_algorithm = algorithm;
3838+ exclude_patterns = [];
3939+ include_hidden = false;
4040+ } in
4141+4242+ if show_progress then Printf.eprintf "Scanning directory %s...\n" directory;
4343+4444+ let registry = Toru.Make_registry.scan_directory ~sw ~env ~options dir_path in
4545+ let entries = Toru.Registry.entries registry in
4646+4747+ if show_progress then Printf.eprintf "Found %d files\n" (List.length entries);
4848+4949+ (* Generate output *)
5050+ let header = Printf.sprintf "# Generated by toru-make-registry on %s\n# Algorithm: %s\n"
5151+ (Ptime.to_rfc3339 (Ptime_clock.now ()))
5252+ (Toru.Hash.algorithm_to_string algorithm)
5353+ in
5454+ let entries_str = String.concat "\n" (List.map (fun entry ->
5555+ let filename = Toru.Registry.filename entry in
5656+ let hash = Toru.Registry.hash entry in
5757+ Printf.sprintf "%s %s" filename (Toru.Hash.value hash)
5858+ ) entries) in
5959+ let content = header ^ entries_str ^ "\n" in
6060+6161+ (* Output *)
6262+ (match output with
6363+ | Some filename ->
6464+ let oc = open_out filename in
6565+ output_string oc content;
6666+ close_out oc;
6767+ Printf.printf "Registry written to %s\n" filename
6868+ | None ->
6969+ print_string content);
7070+7171+ ()
7272+ with
7373+ | exn ->
7474+ Printf.eprintf "Error: %s\n" (Printexc.to_string exn);
7575+ exit 1
7676+7777+(* Command definition *)
7878+let cmd =
7979+ let doc = "Generate Pooch-compatible registry files from directories (simple version)" in
8080+ let info = Cmd.info "toru-make-registry-simple" ~version:"1.0" ~doc in
8181+8282+ Cmd.v info Term.(const make_registry_main
8383+ $ directory_arg $ output_arg $ recursive_arg
8484+ $ algorithm_arg $ progress_arg $ const ())
8585+8686+let () = Cmd.eval cmd |> exit
+27
toru/dune-project
···11+(lang dune 3.0)
22+33+(name toru)
44+55+(package
66+ (name toru)
77+ (synopsis "OCaml data repository manager compatible with Python Pooch")
88+ (description "Toru is an OCaml library for managing data file downloads and caching, compatible with Python Pooch registry files. It provides automatic downloading, caching, and hash verification of data files from remote repositories using the Eio ecosystem.")
99+ (depends
1010+ ocaml
1111+ dune
1212+ (eio (>= 1.0))
1313+ digestif
1414+ yojson
1515+ cmdliner
1616+ progress
1717+ fmt
1818+ ptime
1919+ xdg)
2020+ (authors "Toru Development Team")
2121+ (maintainers "Toru Development Team")
2222+ (license MIT)
2323+ (homepage "https://github.com/ucam-eo/toru")
2424+ (bug_reports "https://github.com/ucam-eo/toru/issues")
2525+ (source
2626+ (github ucam-eo/toru)))
2727+
+296
toru/lib/toru/cache.ml
···11+(** File info: size in bytes and modification time *)
22+type file_info = {
33+ size: int64;
44+ mtime: float;
55+}
66+77+(** Cache usage statistics *)
88+type usage_stats = {
99+ total_size: int64;
1010+ file_count: int;
1111+ oldest: float;
1212+ newest: float;
1313+}
1414+1515+type t = {
1616+ base_path : Eio.Fs.dir_ty Eio.Path.t;
1717+ version : string option;
1818+ sw : Eio.Switch.t;
1919+ env : Eio_unix.Stdenv.base;
2020+}
2121+2222+let rec create ~sw ~env ?version path_str =
2323+ let base_path = Eio.Path.(env#fs / path_str) in
2424+ { base_path; version; sw; env }
2525+2626+and default ~sw ~env ?app_name () =
2727+ let app_name = Option.value app_name ~default:"toru" in
2828+ let path_str = default_cache_path ~app_name () in
2929+ create ~sw ~env path_str
3030+3131+and default_cache_path ?app_name () =
3232+ let app_name = Option.value app_name ~default:"toru" in
3333+ (* Use the official xdg package for XDG Base Directory Specification *)
3434+ let xdg_dirs = Xdg.create ~env:Sys.getenv_opt () in
3535+ let cache_dir = Xdg.cache_dir xdg_dirs in
3636+ Filename.concat cache_dir app_name
3737+3838+let base_path t = t.base_path
3939+let version t = t.version
4040+4141+let cache_path t =
4242+ Option.fold t.version ~none:t.base_path
4343+ ~some:(fun v -> Eio.Path.(t.base_path / v))
4444+4545+let file_path t filename =
4646+ Option.fold t.version ~none:Eio.Path.(t.base_path / filename)
4747+ ~some:(fun v -> Eio.Path.(t.base_path / v / filename))
4848+4949+let exists t filename =
5050+ let path = file_path t filename in
5151+ (* TODO: Use Eio.Path.exists when available *)
5252+ try
5353+ let _stat = Eio.Path.stat ~follow:false path in
5454+ true
5555+ with
5656+ | _ -> false
5757+5858+let exists_path path =
5959+ try
6060+ let _stat = Eio.Path.stat ~follow:false path in
6161+ true
6262+ with
6363+ | _ -> false
6464+6565+let ensure_dir t =
6666+ let create_dir_recursive path =
6767+ if not (exists_path path) then
6868+ try
6969+ (* Try to create parent directory first *)
7070+ (* Skip parent creation for now, rely on mkdir -p behavior if available *)
7171+ Eio.Path.mkdir path ~perm:0o755
7272+ with
7373+ | _ -> () (* Directory may already exist or creation failed *)
7474+ in
7575+ (* Create base directory first *)
7676+ create_dir_recursive t.base_path;
7777+ (* If version is specified, create version subdirectory *)
7878+ Option.iter (fun v ->
7979+ let version_path = Eio.Path.(t.base_path / v) in
8080+ create_dir_recursive version_path) t.version
8181+8282+let clear t =
8383+ let cache_dir = cache_path t in
8484+ let rec remove_contents path =
8585+ match Eio.Path.read_dir path with
8686+ | [] -> ()
8787+ | entries ->
8888+ List.iter (fun entry ->
8989+ let entry_path = Eio.Path.(path / entry) in
9090+ let stat = Eio.Path.stat ~follow:false entry_path in
9191+ match stat.kind with
9292+ | `Directory ->
9393+ remove_contents entry_path;
9494+ Eio.Path.rmdir entry_path
9595+ | `Regular_file | `Symbolic_link ->
9696+ Eio.Path.unlink entry_path
9797+ | _ -> () (* Skip other file types *)
9898+ ) entries
9999+ in
100100+ if exists_path cache_dir then
101101+ remove_contents cache_dir
102102+103103+let size_bytes t =
104104+ let cache_dir = cache_path t in
105105+ let rec calculate_size path acc =
106106+ if not (exists_path path) then acc
107107+ else
108108+ match Eio.Path.read_dir path with
109109+ | [] -> acc
110110+ | entries ->
111111+ List.fold_left (fun total entry ->
112112+ let entry_path = Eio.Path.(path / entry) in
113113+ let stat = Eio.Path.stat ~follow:false entry_path in
114114+ match stat.kind with
115115+ | `Regular_file -> Int64.add total (Optint.Int63.to_int64 stat.size)
116116+ | `Directory -> calculate_size entry_path total
117117+ | _ -> total
118118+ ) acc entries
119119+ in
120120+ calculate_size cache_dir 0L
121121+122122+let list_files t =
123123+ let cache_dir = cache_path t in
124124+ let rec collect_files path prefix acc =
125125+ if not (exists_path path) then acc
126126+ else
127127+ match Eio.Path.read_dir path with
128128+ | [] -> acc
129129+ | entries ->
130130+ List.fold_left (fun files entry ->
131131+ let entry_path = Eio.Path.(path / entry) in
132132+ let stat = Eio.Path.stat ~follow:false entry_path in
133133+ match stat.kind with
134134+ | `Regular_file ->
135135+ let full_name = if prefix = "" then entry else prefix ^ "/" ^ entry in
136136+ full_name :: files
137137+ | `Directory ->
138138+ let new_prefix = if prefix = "" then entry else prefix ^ "/" ^ entry in
139139+ collect_files entry_path new_prefix files
140140+ | _ -> files
141141+ ) acc entries
142142+ in
143143+ List.rev (collect_files cache_dir "" [])
144144+145145+(** Get file info (size and mtime) *)
146146+let file_info t filename =
147147+ let path = file_path t filename in
148148+ try
149149+ let stat = Eio.Path.stat ~follow:false path in
150150+ match stat.kind with
151151+ | `Regular_file ->
152152+ let size = Optint.Int63.to_int64 stat.size in
153153+ let mtime = stat.mtime in
154154+ Some { size; mtime }
155155+ | _ -> None
156156+ with
157157+ | _ -> None
158158+159159+(** Get cache usage statistics *)
160160+let usage_stats t =
161161+ let cache_dir = cache_path t in
162162+ let rec collect_stats path acc_size acc_count acc_oldest acc_newest =
163163+ if not (exists_path path) then (acc_size, acc_count, acc_oldest, acc_newest)
164164+ else
165165+ match Eio.Path.read_dir path with
166166+ | [] -> (acc_size, acc_count, acc_oldest, acc_newest)
167167+ | entries ->
168168+ List.fold_left (fun (total_size, file_count, oldest, newest) entry ->
169169+ let entry_path = Eio.Path.(path / entry) in
170170+ let stat = Eio.Path.stat ~follow:false entry_path in
171171+ match stat.kind with
172172+ | `Regular_file ->
173173+ let size = Optint.Int63.to_int64 stat.size in
174174+ let mtime = stat.mtime in
175175+ let new_oldest = if oldest = 0.0 || mtime < oldest then mtime else oldest in
176176+ let new_newest = if newest = 0.0 || mtime > newest then mtime else newest in
177177+ (Int64.add total_size size, file_count + 1, new_oldest, new_newest)
178178+ | `Directory ->
179179+ collect_stats entry_path total_size file_count oldest newest
180180+ | _ -> (total_size, file_count, oldest, newest)
181181+ ) (acc_size, acc_count, acc_oldest, acc_newest) entries
182182+ in
183183+ let (total_size, file_count, oldest, newest) = collect_stats cache_dir 0L 0 0.0 0.0 in
184184+ { total_size; file_count; oldest; newest }
185185+186186+(** Remove oldest files to fit within size limit *)
187187+let trim_to_size t max_size =
188188+ let cache_dir = cache_path t in
189189+ let rec collect_files_with_stats path prefix acc =
190190+ if not (exists_path path) then acc
191191+ else
192192+ match Eio.Path.read_dir path with
193193+ | [] -> acc
194194+ | entries ->
195195+ List.fold_left (fun files entry ->
196196+ let entry_path = Eio.Path.(path / entry) in
197197+ let stat = Eio.Path.stat ~follow:false entry_path in
198198+ match stat.kind with
199199+ | `Regular_file ->
200200+ let full_name = if prefix = "" then entry else prefix ^ "/" ^ entry in
201201+ let size = Optint.Int63.to_int64 stat.size in
202202+ let mtime = stat.mtime in
203203+ (full_name, entry_path, size, mtime) :: files
204204+ | `Directory ->
205205+ let new_prefix = if prefix = "" then entry else prefix ^ "/" ^ entry in
206206+ collect_files_with_stats entry_path new_prefix files
207207+ | _ -> files
208208+ ) acc entries
209209+ in
210210+ let files = collect_files_with_stats cache_dir "" [] in
211211+ let total_size = List.fold_left (fun acc (_, _, size, _) -> Int64.add acc size) 0L files in
212212+ if Int64.compare total_size max_size > 0 then (
213213+ (* Sort by modification time (oldest first) *)
214214+ let sorted_files = List.sort (fun (_, _, _, mtime1) (_, _, _, mtime2) ->
215215+ Float.compare mtime1 mtime2) files in
216216+ let rec remove_files remaining_files current_size =
217217+ if Int64.compare current_size max_size <= 0 then ()
218218+ else
219219+ match remaining_files with
220220+ | [] -> ()
221221+ | (_, path, size, _) :: rest ->
222222+ (try Eio.Path.unlink path with _ -> ());
223223+ remove_files rest (Int64.sub current_size size)
224224+ in
225225+ remove_files sorted_files total_size
226226+ )
227227+228228+(** Remove files older than N days *)
229229+let trim_by_age t max_age_days =
230230+ let cache_dir = cache_path t in
231231+ let current_time = Unix.time () in
232232+ let max_age_seconds = max_age_days *. 86400.0 in (* days to seconds *)
233233+ let rec remove_old_files path =
234234+ if exists_path path then
235235+ match Eio.Path.read_dir path with
236236+ | [] -> ()
237237+ | entries ->
238238+ List.iter (fun entry ->
239239+ let entry_path = Eio.Path.(path / entry) in
240240+ let stat = Eio.Path.stat ~follow:false entry_path in
241241+ match stat.kind with
242242+ | `Regular_file ->
243243+ let file_age = current_time -. stat.mtime in
244244+ if file_age > max_age_seconds then (
245245+ try Eio.Path.unlink entry_path with _ -> ()
246246+ )
247247+ | `Directory ->
248248+ remove_old_files entry_path
249249+ | _ -> ()
250250+ ) entries
251251+ in
252252+ remove_old_files cache_dir
253253+254254+(** Remove empty directories and broken links *)
255255+let vacuum t =
256256+ let cache_dir = cache_path t in
257257+ let rec vacuum_directory path =
258258+ if exists_path path then
259259+ match Eio.Path.read_dir path with
260260+ | [] ->
261261+ (* Try to remove empty directory if it's not the base cache path *)
262262+ if path <> cache_dir then (
263263+ try Eio.Path.rmdir path with _ -> ()
264264+ )
265265+ | entries ->
266266+ List.iter (fun entry ->
267267+ let entry_path = Eio.Path.(path / entry) in
268268+ try
269269+ let stat = Eio.Path.stat ~follow:false entry_path in
270270+ match stat.kind with
271271+ | `Directory -> vacuum_directory entry_path
272272+ | `Symbolic_link ->
273273+ (* Check if symlink is broken *)
274274+ (try
275275+ let _ = Eio.Path.stat ~follow:true entry_path in ()
276276+ with
277277+ | _ -> Eio.Path.unlink entry_path)
278278+ | _ -> ()
279279+ with
280280+ | _ ->
281281+ (* If we can't stat it, it might be broken - try to remove *)
282282+ (try Eio.Path.unlink entry_path with _ -> ())
283283+ ) entries;
284284+ (* Check again if directory is now empty *)
285285+ (match Eio.Path.read_dir path with
286286+ | [] when path <> cache_dir ->
287287+ (try Eio.Path.rmdir path with _ -> ())
288288+ | _ -> ())
289289+ in
290290+ vacuum_directory cache_dir
291291+292292+let pp fmt t =
293293+ let version_str = Option.fold t.version ~none:"no version"
294294+ ~some:(fun v -> "version " ^ v) in
295295+ Format.fprintf fmt "Cache at %s (%s)"
296296+ (Eio.Path.native_exn t.base_path) version_str
+83
toru/lib/toru/cache.mli
···11+(** Cache module for managing local file storage *)
22+33+(** Abstract cache type *)
44+type t
55+66+(** {1 Construction} *)
77+88+(** Create cache with explicit path *)
99+val create : sw:Eio.Switch.t -> env:Eio_unix.Stdenv.base ->
1010+ ?version:string -> string -> t
1111+1212+(** Create cache using default OS-specific location *)
1313+val default : sw:Eio.Switch.t -> env:Eio_unix.Stdenv.base ->
1414+ ?app_name:string -> unit -> t
1515+1616+(** {1 Field accessors} *)
1717+1818+(** Get base path of cache *)
1919+val base_path : t -> Eio.Fs.dir_ty Eio.Path.t
2020+2121+(** Get version string (if any) *)
2222+val version : t -> string option
2323+2424+(** {1 Operations} *)
2525+2626+(** Get full path for a filename within cache *)
2727+val file_path : t -> string -> Eio.Fs.dir_ty Eio.Path.t
2828+2929+(** Check if file exists in cache *)
3030+val exists : t -> string -> bool
3131+3232+(** Ensure cache directory exists *)
3333+val ensure_dir : t -> unit
3434+3535+(** Clear all files from cache *)
3636+val clear : t -> unit
3737+3838+(** Get total size of cache in bytes *)
3939+val size_bytes : t -> int64
4040+4141+(** List all files in cache *)
4242+val list_files : t -> string list
4343+4444+(** {1 Cache Management} *)
4545+4646+(** File info: size in bytes and modification time *)
4747+type file_info = {
4848+ size: int64;
4949+ mtime: float;
5050+}
5151+5252+(** Cache usage statistics *)
5353+type usage_stats = {
5454+ total_size: int64;
5555+ file_count: int;
5656+ oldest: float;
5757+ newest: float;
5858+}
5959+6060+(** Remove oldest files to fit within size limit *)
6161+val trim_to_size : t -> int64 -> unit
6262+6363+(** Remove files older than N days *)
6464+val trim_by_age : t -> float -> unit
6565+6666+(** Get file info (size and mtime) *)
6767+val file_info : t -> string -> file_info option
6868+6969+(** Get cache usage statistics *)
7070+val usage_stats : t -> usage_stats
7171+7272+(** Remove empty directories and broken links *)
7373+val vacuum : t -> unit
7474+7575+(** {1 Utilities} *)
7676+7777+(** Get default cache path for application *)
7878+val default_cache_path : ?app_name:string -> unit -> string
7979+8080+(** {1 Pretty printing} *)
8181+8282+(** Pretty printer for cache *)
8383+val pp : Format.formatter -> t -> unit
+256
toru/lib/toru/downloader.ml
···11+module Progress_reporter = struct
22+ type t = {
33+ name : string;
44+ total_bytes : int64 option;
55+ mutable current_bytes : int64;
66+ }
77+88+ let create ?total_bytes name =
99+ { name; total_bytes; current_bytes = 0L }
1010+1111+ let update t bytes =
1212+ t.current_bytes <- bytes
1313+ (* TODO: Integrate with progress library *)
1414+1515+ let finish _t =
1616+ (* TODO: Finish progress bar *)
1717+ ()
1818+end
1919+2020+module Config = struct
2121+ type auth = {
2222+ username : string option;
2323+ password : string option;
2424+ }
2525+end
2626+2727+module type DOWNLOADER = sig
2828+ type t
2929+3030+ val create : sw:Eio.Switch.t -> env:Eio_unix.Stdenv.base ->
3131+ ?auth:Config.auth -> unit -> t
3232+3333+ val download : t ->
3434+ url:string ->
3535+ dest:Eio.Fs.dir_ty Eio.Path.t ->
3636+ ?hash:Hash.t ->
3737+ ?progress:Progress_reporter.t ->
3838+ ?resume:bool ->
3939+ unit -> (unit, string) result
4040+4141+ val supports_resume : t -> bool
4242+ val name : t -> string
4343+end
4444+4545+module Wget_downloader = struct
4646+ type t = {
4747+ sw : Eio.Switch.t;
4848+ env : Eio_unix.Stdenv.base;
4949+ auth : Config.auth option;
5050+ timeout : float;
5151+ }
5252+5353+ let create ~sw ~env ?auth () = { sw; env; auth; timeout = 300.0 }
5454+5555+ let download t ~url ~dest ?hash ?progress:_ ?(resume=true) () =
5656+ let dest_path = Eio.Path.native_exn dest in
5757+5858+ (* Build wget arguments (excluding command name) *)
5959+ let args = [
6060+ "--quiet"; (* Reduce output noise *)
6161+ "--show-progress"; (* Show progress bar *)
6262+ "--timeout=300"; (* 5 minute timeout *)
6363+ "--tries=3"; (* Retry 3 times *)
6464+ "--output-document=" ^ dest_path; (* Output file *)
6565+ ] in
6666+6767+ (* Add authentication if provided *)
6868+ let args = Option.fold t.auth ~none:args ~some:(fun auth ->
6969+ let user_arg = Option.map (fun u -> "--user=" ^ u) auth.Config.username
7070+ |> Option.to_list in
7171+ let pass_arg = Option.map (fun p -> "--password=" ^ p) auth.Config.password
7272+ |> Option.to_list in
7373+ user_arg @ pass_arg @ args) in
7474+7575+ (* Add resume support if enabled *)
7676+ let args = if resume then "--continue" :: args else args in
7777+7878+ (* Add URL as last argument *)
7979+ let args = args @ [url] in
8080+8181+ (* Build command line with wget command *)
8282+ let cmd_args = "wget" :: args in
8383+8484+ try
8585+ (* Run wget using Eio process manager - use shell to handle PATH *)
8686+ let cmd_line = String.concat " " (List.map Filename.quote cmd_args) in
8787+ let process = Eio.Process.spawn t.env#process_mgr ~sw:t.sw
8888+ ~executable:"/bin/sh" ["/bin/sh"; "-c"; cmd_line] in
8989+ let exit_status = Eio.Process.await process in
9090+ if exit_status <> `Exited 0 then
9191+ let error_msg = match exit_status with
9292+ | `Exited n -> Printf.sprintf "wget exited with code %d" n
9393+ | `Signaled n -> Printf.sprintf "wget killed by signal %d" n in
9494+ Error ("Download failed: " ^ error_msg)
9595+ else (
9696+ (* Verify hash if provided *)
9797+ match hash with
9898+ | Some h ->
9999+ if Hash.verify dest h then
100100+ Ok ()
101101+ else
102102+ Error ("Hash verification failed for " ^ dest_path)
103103+ | None -> Ok ()
104104+ )
105105+ with
106106+ | exn -> Error ("wget failed: " ^ (Printexc.to_string exn))
107107+108108+ let supports_resume _ = true
109109+ let name _ = "wget"
110110+end
111111+112112+module Curl_downloader = struct
113113+ type t = {
114114+ sw : Eio.Switch.t;
115115+ env : Eio_unix.Stdenv.base;
116116+ auth : Config.auth option;
117117+ timeout : float;
118118+ }
119119+120120+ let create ~sw ~env ?auth () = { sw; env; auth; timeout = 300.0 }
121121+122122+ let download t ~url ~dest ?hash ?progress:_ ?(resume=true) () =
123123+ let dest_path = Eio.Path.native_exn dest in
124124+125125+ (* Build curl arguments (excluding command name) *)
126126+ let args = [
127127+ "--silent"; (* Reduce output noise *)
128128+ "--show-error"; (* Show error messages *)
129129+ "--location"; (* Follow redirects *)
130130+ "--max-time"; "300"; (* 5 minute timeout *)
131131+ "--retry"; "3"; (* Retry 3 times *)
132132+ "--output"; dest_path; (* Output file *)
133133+ ] in
134134+135135+ (* Add authentication if provided *)
136136+ let args = Option.fold t.auth ~none:args ~some:(fun auth ->
137137+ let auth_str = match auth.Config.username, auth.Config.password with
138138+ | Some user, Some pass -> Some (user ^ ":" ^ pass)
139139+ | Some user, None -> Some user
140140+ | None, _ -> None in
141141+ Option.fold auth_str ~none:args ~some:(fun str -> "--user" :: str :: args)) in
142142+143143+ (* Add resume support if enabled *)
144144+ let args = if resume then args @ ["--continue-at"; "-"] else args in
145145+146146+ (* Add URL as last argument *)
147147+ let args = args @ [url] in
148148+149149+ (* Build command line with curl command *)
150150+ let cmd_args = "curl" :: args in
151151+152152+ try
153153+ (* Run curl using Eio process manager - use shell to handle PATH *)
154154+ let cmd_line = String.concat " " (List.map Filename.quote cmd_args) in
155155+ let process = Eio.Process.spawn t.env#process_mgr ~sw:t.sw
156156+ ~executable:"/bin/sh" ["/bin/sh"; "-c"; cmd_line] in
157157+ let exit_status = Eio.Process.await process in
158158+ if exit_status <> `Exited 0 then
159159+ let error_msg = match exit_status with
160160+ | `Exited n -> Printf.sprintf "curl exited with code %d" n
161161+ | `Signaled n -> Printf.sprintf "curl killed by signal %d" n in
162162+ Error ("Download failed: " ^ error_msg)
163163+ else (
164164+ (* Verify hash if provided *)
165165+ match hash with
166166+ | Some h ->
167167+ if Hash.verify dest h then
168168+ Ok ()
169169+ else
170170+ Error ("Hash verification failed for " ^ dest_path)
171171+ | None -> Ok ()
172172+ )
173173+ with
174174+ | exn -> Error ("curl failed: " ^ (Printexc.to_string exn))
175175+176176+ let supports_resume _ = true
177177+ let name _ = "curl"
178178+end
179179+180180+module Cohttp_downloader = struct
181181+ type t = {
182182+ sw : Eio.Switch.t;
183183+ env : Eio_unix.Stdenv.base;
184184+ auth : Config.auth option;
185185+ timeout : float;
186186+ }
187187+188188+ let create ~sw ~env ?auth () = { sw; env; auth; timeout = 300.0 }
189189+190190+ let download _t ~url:_ ~dest:_ ?hash:_ ?progress:_ ?resume:_ () =
191191+ Error "Cohttp_downloader.download not yet implemented"
192192+193193+ let supports_resume _ = false
194194+ let name _ = "cohttp-eio"
195195+end
196196+197197+module Downloaders = struct
198198+ let wget () = (module Wget_downloader : DOWNLOADER)
199199+200200+ let curl () = (module Curl_downloader : DOWNLOADER)
201201+202202+ let cohttp () = (module Cohttp_downloader : DOWNLOADER)
203203+204204+ let detect_available ~env =
205205+ let test_command cmd =
206206+ try
207207+ Eio.Switch.run @@ fun sw ->
208208+ (* Use 'which' command to check if command exists on PATH *)
209209+ let process = Eio.Process.spawn env#process_mgr ~sw
210210+ ~executable:"/bin/sh" ["/bin/sh"; "-c"; "which " ^ cmd ^ " >/dev/null 2>&1"] in
211211+ let exit_status = Eio.Process.await process in
212212+ exit_status = `Exited 0
213213+ with
214214+ | _ -> false
215215+ in
216216+ [("wget", (module Wget_downloader : DOWNLOADER));
217217+ ("curl", (module Curl_downloader : DOWNLOADER))]
218218+ |> List.filter (fun (cmd, _) -> test_command cmd)
219219+220220+ let create_default ~env =
221221+ let available = detect_available ~env in
222222+ match available with
223223+ | (name, downloader) :: _ ->
224224+ Printf.eprintf "Using %s downloader\n" name;
225225+ downloader
226226+ | [] ->
227227+ failwith "No downloaders available (wget or curl required)"
228228+229229+ let of_string name =
230230+ [("wget", (module Wget_downloader : DOWNLOADER));
231231+ ("curl", (module Curl_downloader : DOWNLOADER))]
232232+ |> List.assoc_opt name
233233+end
234234+235235+module Cli = struct
236236+ type downloader_choice = [ `Wget | `Curl | `Cohttp | `Auto ]
237237+238238+ let downloader_term =
239239+ let open Cmdliner in
240240+ let doc = "Download tool to use. 'auto' detects available tools." in
241241+ let docv = "TOOL" in
242242+ Arg.(value & opt (enum [
243243+ ("wget", `Wget); ("curl", `Curl);
244244+ ("cohttp", `Cohttp); ("auto", `Auto)
245245+ ]) `Auto & info ["downloader"; "d"] ~doc ~docv)
246246+247247+ let downloader_info =
248248+ Cmdliner.Arg.info ["downloader"; "d"]
249249+ ~doc:"Download tool to use"
250250+251251+ let create_downloader ~env = function
252252+ | `Wget -> Downloaders.wget ()
253253+ | `Curl -> Downloaders.curl ()
254254+ | `Cohttp -> Downloaders.cohttp ()
255255+ | `Auto -> Downloaders.create_default ~env
256256+end
+76
toru/lib/toru/downloader.mli
···11+(** Downloader module for fetching files from remote sources *)
22+33+(** Progress reporter for download tracking *)
44+module Progress_reporter : sig
55+ type t
66+77+ val create : ?total_bytes:int64 -> string -> t
88+ val update : t -> int64 -> unit
99+ val finish : t -> unit
1010+end
1111+1212+(** Configuration for authentication *)
1313+module Config : sig
1414+ type auth = {
1515+ username : string option;
1616+ password : string option;
1717+ }
1818+end
1919+2020+(** Abstract downloader interface *)
2121+module type DOWNLOADER = sig
2222+ type t
2323+2424+ val create : sw:Eio.Switch.t -> env:Eio_unix.Stdenv.base ->
2525+ ?auth:Config.auth -> unit -> t
2626+2727+ val download : t ->
2828+ url:string ->
2929+ dest:Eio.Fs.dir_ty Eio.Path.t ->
3030+ ?hash:Hash.t ->
3131+ ?progress:Progress_reporter.t ->
3232+ ?resume:bool ->
3333+ unit -> (unit, string) result
3434+3535+ val supports_resume : t -> bool
3636+ val name : t -> string
3737+end
3838+3939+(** Concrete downloader implementations *)
4040+module Wget_downloader : sig
4141+ include DOWNLOADER
4242+end
4343+4444+module Curl_downloader : sig
4545+ include DOWNLOADER
4646+end
4747+4848+module Cohttp_downloader : sig
4949+ include DOWNLOADER
5050+end
5151+5252+(** Downloader selection utilities *)
5353+module Downloaders : sig
5454+ val wget : unit -> (module DOWNLOADER)
5555+ val curl : unit -> (module DOWNLOADER)
5656+ val cohttp : unit -> (module DOWNLOADER)
5757+5858+ val detect_available : env:Eio_unix.Stdenv.base ->
5959+ (string * (module DOWNLOADER)) list
6060+ val create_default : env:Eio_unix.Stdenv.base ->
6161+ (module DOWNLOADER)
6262+ val of_string : string -> (module DOWNLOADER) option
6363+end
6464+6565+(** CLI integration *)
6666+module Cli : sig
6767+ type downloader_choice = [ `Wget | `Curl | `Cohttp | `Auto ]
6868+6969+ val downloader_term : downloader_choice Cmdliner.Term.t
7070+ val downloader_info : Cmdliner.Arg.info
7171+7272+ val create_downloader :
7373+ env:Eio_unix.Stdenv.base ->
7474+ downloader_choice ->
7575+ (module DOWNLOADER)
7676+end