···11+(** Main Toru CLI tool for registry inspection and manipulation *)
22+33+open Cmdliner
44+open Toru
55+66+(** Global options shared across commands *)
77+type global_opts = {
88+ app_name : string;
99+ cache_dir : string option;
1010+ version : string option;
1111+ verbose_level : int;
1212+ env : Eio_unix.Stdenv.base;
1313+}
1414+1515+(** Resolve cache directory using XDG if none specified *)
1616+let resolve_cache_dir app_name cache_dir =
1717+ match cache_dir with
1818+ | Some dir -> dir
1919+ | None ->
2020+ (* Try application-specific environment variable first *)
2121+ let app_env_var = String.uppercase_ascii app_name ^ "_CACHE_DIR" in
2222+ (match Sys.getenv_opt app_env_var with
2323+ | Some dir -> dir
2424+ | None ->
2525+ (* Use XDG cache directory as default *)
2626+ (match Sys.getenv_opt "XDG_CACHE_HOME" with
2727+ | Some dir -> Filename.concat dir app_name
2828+ | None ->
2929+ (match Sys.getenv_opt "HOME" with
3030+ | Some home -> Filename.concat (Filename.concat home ".cache") app_name
3131+ | None -> Filename.concat "/tmp" ("cache-" ^ app_name))))
3232+3333+let create_global_opts app_name cache_dir version verbose_level env =
3434+ (* Setup logging based on verbosity level *)
3535+ Toru.Logging.setup_logging verbose_level;
3636+ (* Resolve cache directory using XDG *)
3737+ let resolved_cache_dir = resolve_cache_dir app_name cache_dir in
3838+ { app_name; cache_dir = Some resolved_cache_dir; version; verbose_level; env }
3939+4040+let global_opts_t env =
4141+ let app_name =
4242+ let doc = "Application name for cache and config directories" in
4343+ Arg.(value & opt string "toru" & info ["app-name"; "a"] ~doc)
4444+ in
4545+ let cache_dir =
4646+ let doc = "Override default cache directory path (respects XDG_CACHE_HOME)" in
4747+ Arg.(value & opt (some string) None & info ["cache-dir"; "c"] ~doc)
4848+ in
4949+ let version =
5050+ let doc = "Version string for cache subdirectory organization" in
5151+ Arg.(value & opt (some string) None & info ["data-version"] ~doc)
5252+ in
5353+ let verbose_count =
5454+ let doc = "Verbose output (repeat for more verbosity: -v=info, -vv=debug)" in
5555+ Arg.(value & flag_all & info ["verbose"; "v"] ~doc)
5656+ in
5757+ Term.(const (fun app_name cache_dir version verbose_flags ->
5858+ create_global_opts app_name cache_dir version (List.length verbose_flags) env
5959+ ) $ app_name $ cache_dir $ version $ verbose_count)
6060+6161+(** Registry inspect command *)
6262+let inspect_cmd env =
6363+ let registry_source =
6464+ let doc = "Registry file path or URL to inspect" in
6565+ Arg.(required & pos 0 (some string) None & info [] ~docv:"REGISTRY" ~doc)
6666+ in
6767+6868+ let show_stats =
6969+ let doc = "Show detailed statistics about the registry" in
7070+ Arg.(value & flag & info ["stats"; "s"] ~doc)
7171+ in
7272+7373+ let list_files =
7474+ let doc = "List all files in the registry" in
7575+ Arg.(value & flag & info ["list"; "l"] ~doc)
7676+ in
7777+7878+ let search_pattern =
7979+ let doc = "Search for files matching this pattern" in
8080+ Arg.(value & opt (some string) None & info ["search"] ~docv:"PATTERN" ~doc)
8181+ in
8282+8383+ let inspect global_opts registry_source show_stats list_files search_pattern =
8484+ try
8585+ Toru.Logging.Cli.debug (fun m -> m "Starting inspect function");
8686+8787+ Toru.Logging.Cli.info (fun m -> m "Loading registry from: %s" registry_source);
8888+8989+ Toru.Logging.Cli.debug (fun m -> m "Determining if source is URL or file path");
9090+9191+ (* Load the registry *)
9292+ let registry =
9393+ if (String.starts_with ~prefix:"http://" registry_source ||
9494+ String.starts_with ~prefix:"https://" registry_source) then (
9595+ (* Definitely a URL *)
9696+ Toru.Logging.Cli.debug (fun m -> m "Detected URL, loading from network");
9797+ Registry.load_from_url registry_source
9898+ ) else (
9999+ (* Treat as file path *)
100100+ Toru.Logging.Cli.debug (fun m -> m "Detected file path, loading with Eio");
101101+ (* Use Eio for file loading - this is the correct approach *)
102102+ Registry.load (Eio.Path.(global_opts.env#fs / registry_source))
103103+ )
104104+ in
105105+106106+ Toru.Logging.Cli.debug (fun m -> m "Registry loaded successfully");
107107+108108+ let total_entries = Registry.size registry in
109109+ Printf.printf "Registry contains %d entries\n" total_entries;
110110+111111+ if show_stats then (
112112+ Printf.printf "\nRegistry Statistics:\n";
113113+ Printf.printf "===================\n";
114114+ let entries = Registry.entries registry in
115115+116116+ (* Count by hash algorithm *)
117117+ let hash_counts = Hashtbl.create 8 in
118118+ List.iter (fun entry ->
119119+ let hash = Registry.hash entry in
120120+ let algo = Hash.algorithm hash in
121121+ let algo_str = Hash.algorithm_to_string algo in
122122+ let count = match Hashtbl.find_opt hash_counts algo_str with
123123+ | Some n -> n + 1
124124+ | None -> 1
125125+ in
126126+ Hashtbl.replace hash_counts algo_str count
127127+ ) entries;
128128+129129+ Printf.printf "Hash algorithms:\n";
130130+ Hashtbl.iter (fun algo count ->
131131+ Printf.printf " %s: %d entries\n" algo count
132132+ ) hash_counts;
133133+134134+ (* File extension analysis *)
135135+ let ext_counts = Hashtbl.create 16 in
136136+ List.iter (fun entry ->
137137+ let filename = Registry.filename entry in
138138+ let ext =
139139+ match String.rindex_opt filename '.' with
140140+ | Some idx -> String.sub filename (idx + 1) (String.length filename - idx - 1)
141141+ | None -> "(no extension)"
142142+ in
143143+ let count = match Hashtbl.find_opt ext_counts ext with
144144+ | Some n -> n + 1
145145+ | None -> 1
146146+ in
147147+ Hashtbl.replace ext_counts ext count
148148+ ) entries;
149149+150150+ Printf.printf "\nFile extensions:\n";
151151+ Hashtbl.iter (fun ext count ->
152152+ Printf.printf " .%s: %d files\n" ext count
153153+ ) ext_counts
154154+ );
155155+156156+ if list_files then (
157157+ Printf.printf "\nFiles in registry:\n";
158158+ Printf.printf "==================\n";
159159+ let entries = Registry.entries registry in
160160+ List.iter (fun entry ->
161161+ let filename = Registry.filename entry in
162162+ let hash = Registry.hash entry in
163163+ Printf.printf "%s %s\n" (Hash.value hash) filename
164164+ ) entries
165165+ );
166166+167167+ (match search_pattern with
168168+ | Some pattern ->
169169+ Printf.printf "\nFiles matching '%s':\n" pattern;
170170+ Printf.printf "=======================\n";
171171+ let entries = Registry.entries registry in
172172+ let matches = List.filter (fun entry ->
173173+ let filename = Registry.filename entry in
174174+ String.contains filename (String.get pattern 0) ||
175175+ String.length filename >= String.length pattern &&
176176+ String.sub filename 0 (String.length pattern) = pattern
177177+ ) entries in
178178+ List.iter (fun entry ->
179179+ let filename = Registry.filename entry in
180180+ let hash = Registry.hash entry in
181181+ Printf.printf "%s %s\n" (Hash.value hash) filename
182182+ ) matches
183183+ | None -> ());
184184+185185+ `Ok ()
186186+ with
187187+ | exn -> `Error (false, "Failed to inspect registry: " ^ (Printexc.to_string exn))
188188+ in
189189+190190+ let term env = Term.(ret (const inspect $ global_opts_t env $ registry_source $ show_stats $ list_files $ search_pattern)) in
191191+ let info = Cmd.info "inspect" ~doc:"Inspect a registry file or URL" in
192192+ Cmd.v info (term env)
193193+194194+(** Registry validate command *)
195195+let validate_cmd env =
196196+ let registry_source =
197197+ let doc = "Registry file path or URL to validate" in
198198+ Arg.(required & pos 0 (some string) None & info [] ~docv:"REGISTRY" ~doc)
199199+ in
200200+201201+ let check_hashes =
202202+ let doc = "Check if all hash formats are valid" in
203203+ Arg.(value & flag & info ["check-hashes"] ~doc)
204204+ in
205205+206206+ let validate global_opts registry_source check_hashes =
207207+ try
208208+ Toru.Logging.Cli.info (fun m -> m "Validating registry: %s" registry_source);
209209+210210+ let registry =
211211+ if (String.starts_with ~prefix:"http://" registry_source ||
212212+ String.starts_with ~prefix:"https://" registry_source) then (
213213+ Toru.Logging.Cli.debug (fun m -> m "Detected URL, using load_from_url");
214214+ Registry.load_from_url registry_source
215215+ ) else (
216216+ Toru.Logging.Cli.debug (fun m -> m "Detected file path, using Eio");
217217+ (* Use Eio for file loading *)
218218+ Registry.load (Eio.Path.(global_opts.env#fs / registry_source))
219219+ )
220220+ in
221221+222222+ let entries = Registry.entries registry in
223223+ let total = List.length entries in
224224+ Printf.printf "✓ Registry loaded successfully with %d entries\n" total;
225225+226226+ if check_hashes then (
227227+ Printf.printf "Validating hash formats...\n";
228228+ let valid_count = ref 0 in
229229+ let invalid_count = ref 0 in
230230+ List.iter (fun entry ->
231231+ let filename = Registry.filename entry in
232232+ let hash = Registry.hash entry in
233233+ let hash_str = Hash.value hash in
234234+ let algo = Hash.algorithm hash in
235235+ let expected_len = match algo with
236236+ | SHA256 -> 64
237237+ | SHA1 -> 40
238238+ | MD5 -> 32
239239+ in
240240+ if String.length hash_str = expected_len then
241241+ incr valid_count
242242+ else (
243243+ Printf.printf "✗ Invalid hash length for %s: expected %d chars, got %d\n"
244244+ filename expected_len (String.length hash_str);
245245+ incr invalid_count
246246+ )
247247+ ) entries;
248248+249249+ if !invalid_count = 0 then
250250+ Printf.printf "✓ All %d hash formats are valid\n" !valid_count
251251+ else
252252+ Printf.printf "✗ Found %d invalid hashes out of %d total\n" !invalid_count total
253253+ );
254254+255255+ `Ok ()
256256+ with
257257+ | exn -> `Error (false, "Registry validation failed: " ^ (Printexc.to_string exn))
258258+ in
259259+260260+ let term env = Term.(ret (const validate $ global_opts_t env $ registry_source $ check_hashes)) in
261261+ let info = Cmd.info "validate" ~doc:"Validate a registry file format and integrity" in
262262+ Cmd.v info (term env)
263263+264264+(** Registry convert command *)
265265+let convert_cmd env =
266266+ let input_registry =
267267+ let doc = "Input registry file path or URL" in
268268+ Arg.(required & pos 0 (some string) None & info [] ~docv:"INPUT" ~doc)
269269+ in
270270+271271+ let output_file =
272272+ let doc = "Output registry file path" in
273273+ Arg.(required & pos 1 (some string) None & info [] ~docv:"OUTPUT" ~doc)
274274+ in
275275+276276+ let convert global_opts input_registry output_file =
277277+ try
278278+ Toru.Logging.Cli.info (fun m -> m "Converting %s -> %s" input_registry output_file);
279279+280280+ (* Load input registry *)
281281+ let registry =
282282+ if (String.starts_with ~prefix:"http://" input_registry ||
283283+ String.starts_with ~prefix:"https://" input_registry) then (
284284+ Toru.Logging.Cli.debug (fun m -> m "Detected URL, using load_from_url");
285285+ Registry.load_from_url input_registry
286286+ ) else (
287287+ Toru.Logging.Cli.debug (fun m -> m "Detected file path, using Eio");
288288+ (* Use Eio for file loading *)
289289+ Registry.load (Eio.Path.(global_opts.env#fs / input_registry))
290290+ )
291291+ in
292292+293293+ (* Save to output file using Eio *)
294294+ Registry.save (Eio.Path.(global_opts.env#fs / output_file)) registry;
295295+296296+ let count = Registry.size registry in
297297+ Printf.printf "✓ Converted %d entries from %s to %s\n" count input_registry output_file;
298298+299299+ `Ok ()
300300+ with
301301+ | exn -> `Error (false, "Conversion failed: " ^ (Printexc.to_string exn))
302302+ in
303303+304304+ let term env = Term.(ret (const convert $ global_opts_t env $ input_registry $ output_file)) in
305305+ let info = Cmd.info "convert" ~doc:"Convert registry between different formats or sources" in
306306+ Cmd.v info (term env)
307307+308308+(** Main command *)
309309+let main_cmd env =
310310+ let doc = "Toru data repository management tool" in
311311+ let man = [
312312+ `S Cmdliner.Manpage.s_description;
313313+ `P "Toru is an OCaml data repository manager compatible with Python Pooch registry files.";
314314+ `P "It provides automatic downloading, caching, and hash verification of data files from remote repositories.";
315315+ `S "ENVIRONMENT VARIABLES";
316316+ `P "Toru respects the XDG Base Directory Specification:";
317317+ `P "XDG_CACHE_HOME - Override default cache directory location";
318318+ `P "TORU_CACHE_DIR - Application-specific cache directory override";
319319+ `S Cmdliner.Manpage.s_examples;
320320+ `P "Inspect a registry file:";
321321+ `P "$(b,toru inspect registry.txt)";
322322+ `P "Show detailed statistics:";
323323+ `P "$(b,toru inspect --stats registry.txt)";
324324+ `P "Validate registry with hash checking:";
325325+ `P "$(b,toru validate --check-hashes registry.txt)";
326326+ ] in
327327+ let info = Cmd.info "toru" ~version:"0.1.0" ~doc ~man in
328328+ let default_term = Term.(ret (const (`Help (`Pager, None)))) in
329329+ Cmd.group info ~default:default_term [
330330+ inspect_cmd env;
331331+ validate_cmd env;
332332+ convert_cmd env;
333333+ ]
334334+335335+let () =
336336+ (* Run the entire CLI inside Eio_main.run *)
337337+ print_endline "initialising eio";
338338+ Eio_main.run @@ fun env ->
339339+ print_endline "starting";
340340+ exit (Cmd.eval (main_cmd env))
+79
toru/lib/toru/logging.ml
···11+(** Logging module for Toru *)
22+33+(** Log sources for different parts of the system *)
44+let registry_src = Logs.Src.create "toru.registry" ~doc:"Registry operations"
55+let cache_src = Logs.Src.create "toru.cache" ~doc:"Cache operations"
66+let downloader_src = Logs.Src.create "toru.downloader" ~doc:"Download operations"
77+let hash_src = Logs.Src.create "toru.hash" ~doc:"Hash operations"
88+let main_src = Logs.Src.create "toru.main" ~doc:"Main Toru operations"
99+let cli_src = Logs.Src.create "toru.cli" ~doc:"CLI operations"
1010+1111+(** Logging functions for each component *)
1212+module Registry = struct
1313+ let debug msgf = Logs.debug ~src:registry_src msgf
1414+ let info msgf = Logs.info ~src:registry_src msgf
1515+ let warn msgf = Logs.warn ~src:registry_src msgf
1616+ let err msgf = Logs.err ~src:registry_src msgf
1717+end
1818+1919+module Cache = struct
2020+ let debug msgf = Logs.debug ~src:cache_src msgf
2121+ let info msgf = Logs.info ~src:cache_src msgf
2222+ let warn msgf = Logs.warn ~src:cache_src msgf
2323+ let err msgf = Logs.err ~src:cache_src msgf
2424+end
2525+2626+module Downloader = struct
2727+ let debug msgf = Logs.debug ~src:downloader_src msgf
2828+ let info msgf = Logs.info ~src:downloader_src msgf
2929+ let warn msgf = Logs.warn ~src:downloader_src msgf
3030+ let err msgf = Logs.err ~src:downloader_src msgf
3131+end
3232+3333+module Hash = struct
3434+ let debug msgf = Logs.debug ~src:hash_src msgf
3535+ let info msgf = Logs.info ~src:hash_src msgf
3636+ let warn msgf = Logs.warn ~src:hash_src msgf
3737+ let err msgf = Logs.err ~src:hash_src msgf
3838+end
3939+4040+module Main = struct
4141+ let debug msgf = Logs.debug ~src:main_src msgf
4242+ let info msgf = Logs.info ~src:main_src msgf
4343+ let warn msgf = Logs.warn ~src:main_src msgf
4444+ let err msgf = Logs.err ~src:main_src msgf
4545+end
4646+4747+module Cli = struct
4848+ let debug msgf = Logs.debug ~src:cli_src msgf
4949+ let info msgf = Logs.info ~src:cli_src msgf
5050+ let warn msgf = Logs.warn ~src:cli_src msgf
5151+ let err msgf = Logs.err ~src:cli_src msgf
5252+end
5353+5454+(** Setup logging based on verbosity level *)
5555+let setup_logging verbose_level =
5656+ let level = match verbose_level with
5757+ | 0 -> Some Logs.Warning (* Only warnings and errors *)
5858+ | 1 -> Some Logs.Info (* Info, warnings, and errors *)
5959+ | _ -> Some Logs.Debug (* Everything including debug *)
6060+ in
6161+ Logs.set_level level;
6262+ (* Use a simple format for CLI tools *)
6363+ Logs.set_reporter (Logs_fmt.reporter ())
6464+6565+(** Measure and log execution time *)
6666+let time_operation ~src ~operation_name f =
6767+ let start_time = Unix.gettimeofday () in
6868+ Logs.debug ~src (fun m -> m "Starting %s" operation_name);
6969+ let result = f () in
7070+ let end_time = Unix.gettimeofday () in
7171+ let duration = end_time -. start_time in
7272+ Logs.info ~src (fun m -> m "Completed %s in %.3fs" operation_name duration);
7373+ result
7474+7575+(** Log progress for long operations *)
7676+let log_progress ~src ~total ~current ~item_name =
7777+ if current mod 100 = 0 || current = total then
7878+ Logs.info ~src (fun m -> m "Progress: %d/%d %s processed (%.1f%%)"
7979+ current total item_name (100. *. float current /. float total))
+66
toru/lib/toru/logging.mli
···11+(** Logging module for Toru *)
22+33+(** Log sources for different components *)
44+val registry_src : Logs.Src.t
55+val cache_src : Logs.Src.t
66+val downloader_src : Logs.Src.t
77+val hash_src : Logs.Src.t
88+val main_src : Logs.Src.t
99+val cli_src : Logs.Src.t
1010+1111+(** Logging functions for Registry operations *)
1212+module Registry : sig
1313+ val debug : ('a, unit) Logs.msgf -> unit
1414+ val info : ('a, unit) Logs.msgf -> unit
1515+ val warn : ('a, unit) Logs.msgf -> unit
1616+ val err : ('a, unit) Logs.msgf -> unit
1717+end
1818+1919+(** Logging functions for Cache operations *)
2020+module Cache : sig
2121+ val debug : ('a, unit) Logs.msgf -> unit
2222+ val info : ('a, unit) Logs.msgf -> unit
2323+ val warn : ('a, unit) Logs.msgf -> unit
2424+ val err : ('a, unit) Logs.msgf -> unit
2525+end
2626+2727+(** Logging functions for Downloader operations *)
2828+module Downloader : sig
2929+ val debug : ('a, unit) Logs.msgf -> unit
3030+ val info : ('a, unit) Logs.msgf -> unit
3131+ val warn : ('a, unit) Logs.msgf -> unit
3232+ val err : ('a, unit) Logs.msgf -> unit
3333+end
3434+3535+(** Logging functions for Hash operations *)
3636+module Hash : sig
3737+ val debug : ('a, unit) Logs.msgf -> unit
3838+ val info : ('a, unit) Logs.msgf -> unit
3939+ val warn : ('a, unit) Logs.msgf -> unit
4040+ val err : ('a, unit) Logs.msgf -> unit
4141+end
4242+4343+(** Logging functions for Main operations *)
4444+module Main : sig
4545+ val debug : ('a, unit) Logs.msgf -> unit
4646+ val info : ('a, unit) Logs.msgf -> unit
4747+ val warn : ('a, unit) Logs.msgf -> unit
4848+ val err : ('a, unit) Logs.msgf -> unit
4949+end
5050+5151+(** Logging functions for CLI operations *)
5252+module Cli : sig
5353+ val debug : ('a, unit) Logs.msgf -> unit
5454+ val info : ('a, unit) Logs.msgf -> unit
5555+ val warn : ('a, unit) Logs.msgf -> unit
5656+ val err : ('a, unit) Logs.msgf -> unit
5757+end
5858+5959+(** Setup logging based on verbosity level (0=warn, 1=info, 2+=debug) *)
6060+val setup_logging : int -> unit
6161+6262+(** Time an operation and log the duration *)
6363+val time_operation : src:Logs.Src.t -> operation_name:string -> (unit -> 'a) -> 'a
6464+6565+(** Log progress for long operations *)
6666+val log_progress : src:Logs.Src.t -> total:int -> current:int -> item_name:string -> unit