this repo has no description
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

more

+517 -370
+164 -254
toru/TODO.md
··· 1 - # Toru TODO: XDG-Eio Integration & Missing Features 1 + # Toru Implementation TODO 2 2 3 - This document tracks the XDG-Eio integration plan and features missing from the OCaml Toru implementation compared to the Python Pooch library. 3 + This document outlines the implementation plan for Toru, an OCaml data repository manager compatible with Python Pooch registry files. 4 4 5 - ## XDG-Eio Integration Plan 5 + ## Phase 1: Core Modules 6 6 7 - ### Phase 1: Core Interface Updates 7 + ### 1.1 Hash Module ✨ 8 + - [ ] Define abstract `Hash.t` type with algorithm variants (SHA256, SHA1, MD5) 9 + - [ ] Implement `create`, `of_string`, `to_string` functions 10 + - [ ] Add algorithm parsing with prefix support ("sha1:", "md5:", plain) 11 + - [ ] Implement file verification using digestif library 12 + - [ ] Add hash computation for files 13 + - [ ] Create comprehensive test suite with known hash values 8 14 9 - #### 1. Update Cache Module (`lib/cache.mli`) 10 - - [ ] Change constructor to accept `Xdge.t` instead of explicit cache path 11 - - [ ] Add `xdg` field accessor 12 - - [ ] Keep version support for subdirectory organization 13 - - [ ] Use `Xdge.cache_dir` as base path 14 - - [ ] Follow XDG pretty-printing conventions 15 + ### 1.2 Registry Module 📋 16 + - [ ] Design abstract `Registry.t` and `Registry.entry` types 17 + - [ ] Implement Pooch-compatible file parser (comments, blank lines) 18 + - [ ] Add entry creation with filename, hash, optional custom URL 19 + - [ ] Implement registry operations (find, exists, add, remove) 20 + - [ ] Support loading from files and URLs 21 + - [ ] Add registry serialization (to_string, save) 15 22 16 - ```ocaml 17 - module Cache : sig 18 - type t 19 - 20 - val create : xdg:Xdge.t -> ?version:string -> unit -> t 21 - 22 - (* Field accessors *) 23 - val xdg : t -> Xdge.t 24 - val base_path : t -> Eio.Fs.dir_ty Eio.Path.t 25 - val version : t -> string option 26 - 27 - (* Operations unchanged *) 28 - val file_path : t -> string -> Eio.Fs.dir_ty Eio.Path.t 29 - val exists : t -> string -> bool 30 - val ensure_dir : t -> unit 31 - val clear : t -> unit 32 - val size_bytes : t -> int64 33 - val list_files : t -> string list 34 - 35 - (* XDG-compliant pretty printing *) 36 - val pp : Format.formatter -> t -> unit 37 - end 38 - ``` 23 + ### 1.3 Cache Module 💾 24 + - [ ] Create abstract `Cache.t` type with base path management 25 + - [ ] Implement XDG Base Directory specification 26 + - [ ] Add version subdirectory support 27 + - [ ] Implement cache operations (exists, clear, size, list) 28 + - [ ] Add lazy directory creation 29 + - [ ] Support environment variable overrides (TORU_CACHE_DIR) 39 30 40 - #### 2. Enhance Registry Module (`lib/registry.mli`) 41 - - [ ] Add multi-source support (files, URLs, strings) 42 - - [ ] Add registry merging capabilities (later sources override earlier) 43 - - [ ] Add XDG integration for loading/saving registry files 44 - - [ ] Support Pooch's multiple registry pattern 31 + ## Phase 2: External Tool Integration 45 32 46 - ```ocaml 47 - module Registry : sig 48 - type t 49 - type entry 50 - type source = 51 - | File of Eio.Fs.dir_ty Eio.Path.t 52 - | Url of string 53 - | String of string 54 - | Xdg_file of Xdge.t * string (* Search in XDG config dirs *) 33 + ### 2.1 Modular Downloader Interface 🔌 34 + - [ ] Define DOWNLOADER module signature 35 + - [ ] Create abstract Downloader.t type with module wrapping 36 + - [ ] Implement tool detection and availability checking 37 + - [ ] Add downloader selection (wget, curl, auto-detect) 55 38 56 - (* Entry operations unchanged *) 57 - val create_entry : filename:string -> hash:Hash.t -> ?custom_url:string -> unit -> entry 58 - val filename : entry -> string 59 - val hash : entry -> Hash.t 60 - val custom_url : entry -> string option 39 + ### 2.2 Wget Downloader Implementation 📥 40 + - [ ] Implement Wget_downloader module with DOWNLOADER interface 41 + - [ ] Add resume support with `--continue` flag 42 + - [ ] Handle timeout, retry, and quiet options 43 + - [ ] Implement hash verification after download 44 + - [ ] Add comprehensive error handling with exit codes 61 45 62 - (* Multi-source registry support *) 63 - val empty : t 64 - val create : source list -> t 65 - val load_sources : source list -> t 66 - val add_source : t -> source -> t 67 - val sources : t -> source list 68 - val merge : t list -> t 69 - 70 - (* XDG integration *) 71 - val load_from_xdg : Xdge.t -> ?filename:string -> (t, string) result 72 - val save_to_xdg : Xdge.t -> ?filename:string -> t -> (unit, string) result 73 - val find_registry_file : Xdge.t -> string -> Eio.Fs.dir_ty Eio.Path.t option 74 - 75 - (* Legacy single-file operations *) 76 - val load : Eio.Fs.dir_ty Eio.Path.t -> t 77 - val load_from_url : string -> t 78 - val save : Eio.Fs.dir_ty Eio.Path.t -> t -> unit 79 - val of_string : string -> t 80 - val to_string : t -> string 81 - 82 - (* Query operations unchanged *) 83 - val find : string -> t -> entry option 84 - val exists : string -> t -> bool 85 - val add : entry -> t -> t 86 - val remove : string -> t -> t 87 - val entries : t -> entry list 88 - val size : t -> int 46 + ### 2.3 Curl Downloader Implementation 📦 47 + - [ ] Implement Curl_downloader module with DOWNLOADER interface 48 + - [ ] Add resume support with `--continue-at -` flag 49 + - [ ] Configure timeout, retry, and progress options 50 + - [ ] Implement hash verification after download 51 + - [ ] Handle various curl error conditions 89 52 90 - (* XDG-compliant pretty printing *) 91 - val pp : Format.formatter -> t -> unit 92 - val pp_sources : Format.formatter -> source list -> unit 93 - end 94 - ``` 53 + ## Phase 3: Main Interface 95 54 96 - #### 3. Update Toru Main Interface (`lib/toru.mli`) 97 - - [ ] Accept `Xdge.t` as primary parameter 98 - - [ ] Remove `cache_path` parameter (use xdg cache directory) 99 - - [ ] Accept `Registry.t` instead of registry file path 100 - - [ ] Get app name from xdg context 101 - - [ ] Add XDG-compliant pretty printing 55 + ### 3.1 Toru Module Core 🎯 56 + - [ ] Design abstract Toru.t type with accessor functions 57 + - [ ] Implement constructor with registry loading 58 + - [ ] Add base_url, cache, and registry accessors 59 + - [ ] Create single file fetch functionality 60 + - [ ] Implement processor pipeline for post-download transformations 102 61 103 - ```ocaml 104 - module Toru : sig 105 - type t 106 - 107 - val create : 108 - xdg:Xdge.t -> 109 - base_url:string -> 110 - registry:Registry.t -> 111 - ?version:string -> 112 - ?downloader:(module DOWNLOADER) -> 113 - unit -> t 114 - 115 - (* Field accessors *) 116 - val xdg : t -> Xdge.t 117 - val app_name : t -> string (* derived from xdg *) 118 - val base_url : t -> string 119 - val cache : t -> Cache.t 120 - val registry : t -> Registry.t 121 - 122 - (* Operations unchanged *) 123 - val fetch : 124 - t -> 125 - filename:string -> 126 - ?processor:(Eio.Fs.dir_ty Eio.Path.t -> Eio.Fs.dir_ty Eio.Path.t) -> 127 - unit -> (Eio.Fs.dir_ty Eio.Path.t, string) result 128 - 129 - val fetch_all : 130 - t -> 131 - ?concurrency:int -> 132 - unit -> (unit, string) result 133 - 134 - val load_registry : t -> Registry.t -> t 135 - val add_registry_entry : t -> Registry.entry -> t 136 - val update_base_url : t -> string -> t 137 - 138 - (* Static functions - add xdg parameter *) 139 - val retrieve : 140 - xdg:Xdge.t -> 141 - url:string -> 142 - ?hash:Hash.t -> 143 - ?version:string -> 144 - ?downloader:(module DOWNLOADER) -> 145 - unit -> (Eio.Fs.dir_ty Eio.Path.t, string) result 146 - 147 - (* XDG-compliant pretty printing *) 148 - val pp : Format.formatter -> t -> unit 149 - val pp_brief : Format.formatter -> t -> unit 150 - end 151 - ``` 62 + ### 3.2 Concurrent Operations ⚡ 63 + - [ ] Implement fetch_all with configurable concurrency 64 + - [ ] Add Eio fiber-based parallel downloads 65 + - [ ] Implement progress reporting integration 66 + - [ ] Add error aggregation for batch operations 152 67 153 - ### Phase 2: XDG Directory Usage 68 + ### 3.3 Static Utilities 🛠️ 69 + - [ ] Implement standalone retrieve function 70 + - [ ] Add registry manipulation functions 71 + - [ ] Support base URL updates 72 + - [ ] Create convenience functions for common use cases 154 73 155 - #### 4. Add Configuration Support (`lib/config.mli`) 156 - - [ ] Create new Config module for application settings 157 - - [ ] Use `Xdge.config_dir` for storing configuration files 158 - - [ ] Support TOML configuration format 159 - - [ ] Use `Xdge.find_config_file` for config discovery 74 + ## Phase 4: Testing & Validation 160 75 161 - ```ocaml 162 - module Config : sig 163 - type t = { 164 - base_urls : string list; 165 - default_downloader : string option; 166 - timeout : float option; 167 - concurrency : int option; 168 - registry_sources : Registry.source list; 169 - } 170 - 171 - val default : t 172 - val load : Xdge.t -> (t, string) result 173 - val save : Xdge.t -> t -> (unit, string) result 174 - val find_config_file : Xdge.t -> (Eio.Fs.dir_ty Eio.Path.t, string) result 175 - 176 - (* XDG-compliant pretty printing *) 177 - val pp : Format.formatter -> t -> unit 178 - end 179 - ``` 76 + ### 4.1 Tessera-Manifests Integration 🧪 77 + - [ ] Set up test fixtures with tessera-manifests URLs 78 + - [ ] Test embeddings registry parsing (2024 data) 79 + - [ ] Validate landmasks registry parsing 80 + - [ ] Test geographic coordinate extraction 81 + - [ ] Performance test with large manifests (>100 entries) 82 + 83 + ### 4.2 Hash Algorithm Tests 🔐 84 + - [ ] Test SHA256, SHA1, MD5 verification with known files 85 + - [ ] Validate prefix parsing ("sha1:abc123", "md5:def456") 86 + - [ ] Test hash computation accuracy 87 + - [ ] Error handling for invalid hash formats 88 + 89 + ### 4.3 Download Integration Tests 📡 90 + - [ ] Test wget downloader with real tessera data 91 + - [ ] Test curl downloader with resume functionality 92 + - [ ] Validate hash verification after downloads 93 + - [ ] Test error handling for network failures 94 + 95 + ## Phase 5: CLI & User Experience 96 + 97 + ### 5.1 Command Line Interface 💻 98 + - [ ] Integrate cmdliner for argument parsing 99 + - [ ] Add downloader selection (--downloader wget|curl|auto) 100 + - [ ] Implement cache path configuration 101 + - [ ] Add verbose/quiet mode options 102 + 103 + ### 5.2 Progress Reporting 📊 104 + - [ ] Integrate OCaml progress library 105 + - [ ] Show download speed and ETA 106 + - [ ] Support multiple concurrent progress bars 107 + - [ ] Add file name and size information 108 + 109 + ### 5.3 Archive Processing 📁 110 + - [ ] Implement untar_gz processor using system tar 111 + - [ ] Add unzip processor using system unzip 112 + - [ ] Support untar_xz with tar -xJf 113 + - [ ] Create custom processor interface 114 + 115 + ## Phase 6: Future Extensions 116 + 117 + ### 6.1 Pure OCaml Implementation 🐪 118 + - [ ] Implement Cohttp_downloader module 119 + - [ ] Add streaming download support 120 + - [ ] Implement HTTP Range requests for resume 121 + - [ ] TLS support with tls-eio 122 + - [ ] Migrate from external tools gradually 123 + 124 + ### 6.2 DOI Resolution (Toru-DOI) 📚 125 + - [ ] Create separate toru-doi library 126 + - [ ] Implement Zenodo API integration 127 + - [ ] Add Figshare API support 128 + - [ ] DOI to registry conversion 129 + - [ ] Metadata caching and rate limiting 180 130 181 - #### 5. Add State Management (`lib/state.mli`) 182 - - [ ] Create State module for download history and logs 183 - - [ ] Use `Xdge.state_dir` for persistent state 184 - - [ ] Track download statistics and failures 185 - - [ ] Implement download resume capability 131 + ### 6.3 Advanced Features 🚀 132 + - [ ] FTP protocol support 133 + - [ ] Authentication mechanisms (API keys, tokens) 134 + - [ ] Checksum verification during download 135 + - [ ] Partial download recovery 136 + - [ ] Registry merging and diff operations 186 137 187 - ```ocaml 188 - module State : sig 189 - type t 190 - type download_entry = { 191 - filename : string; 192 - url : string; 193 - hash : Hash.t option; 194 - downloaded_at : Ptime.t; 195 - success : bool; 196 - error_msg : string option; 197 - } 198 - 199 - val create : Xdge.t -> t 200 - val load : Xdge.t -> (t, string) result 201 - val save : Xdge.t -> t -> (unit, string) result 202 - 203 - val add_download : t -> download_entry -> t 204 - val recent_downloads : t -> int -> download_entry list 205 - val failed_downloads : t -> download_entry list 206 - val download_count : t -> int 207 - val success_rate : t -> float 208 - 209 - (* XDG-compliant pretty printing *) 210 - val pp : Format.formatter -> t -> unit 211 - val pp_recent : Format.formatter -> t -> unit 212 - end 213 - ``` 138 + ## Dependencies 139 + 140 + ### Core Dependencies 141 + - `eio` (>= 1.0) - Effects-based I/O 142 + - `digestif` (>= 1.0) - Cryptographic hashes 143 + - `uri` - URL parsing 144 + - `cmdliner` - CLI parsing 145 + 146 + ### System Dependencies 147 + - `wget` or `curl` - Download tools (one required) 214 148 215 - #### 6. Add Data Directory Usage 216 - - [ ] Use `Xdge.data_dir` for user-installed registries 217 - - [ ] Store custom download processors in data directory 218 - - [ ] Support user plugin/extension discovery 149 + ### Optional Dependencies 150 + - `progress` - Progress bars 151 + - `yojson` - JSON configuration 152 + - `tar`, `unzip` - Archive processing 219 153 220 - ### Phase 3: Toru-Specific XDG Extensions 154 + ## Success Criteria 221 155 222 - #### 7. Create Toru XDG Extensions (`lib/toru_xdg.mli`) 223 - - [ ] Keep Toru-specific XDG functionality separate from xdg-eio 224 - - [ ] Add archive extraction support using temp directories 225 - - [ ] Add download lock file management 226 - - [ ] Add cache size management utilities 156 + ### Phase 1 Success ✅ 157 + - [ ] All core modules pass unit tests 158 + - [ ] Hash verification works with digestif 159 + - [ ] Registry parsing handles tessera-manifests correctly 160 + - [ ] Cache follows XDG directory specification 227 161 228 - ```ocaml 229 - module Toru_xdg : sig 230 - (* Temporary directory management *) 231 - val temp_dir : Xdge.t -> Eio.Fs.dir_ty Eio.Path.t 232 - val with_temp_dir : Xdge.t -> (Eio.Fs.dir_ty Eio.Path.t -> 'a) -> 'a 233 - 234 - (* Download locking *) 235 - val with_download_lock : Xdge.t -> string -> (unit -> 'a) -> ('a, string) result 236 - 237 - (* Cache management *) 238 - val cache_disk_usage : Xdge.t -> int64 239 - val cache_cleanup : Xdge.t -> ?max_size:int64 -> ?max_age:Ptime.Span.t -> unit -> int 240 - val validate_cache_writable : Xdge.t -> (unit, string) result 241 - 242 - (* Archive extraction *) 243 - val extract_to_cache : Xdge.t -> 244 - archive_path:Eio.Fs.dir_ty Eio.Path.t -> 245 - extract_subdir:string -> 246 - (Eio.Fs.dir_ty Eio.Path.t, string) result 247 - end 248 - ``` 162 + ### Phase 2 Success ✅ 163 + - [ ] Both wget and curl downloaders work 164 + - [ ] Resume functionality tested with interrupted downloads 165 + - [ ] Automatic tool detection and fallback 166 + - [ ] Hash verification after external tool downloads 249 167 250 - ### Phase 4: Examples and Documentation Updates 168 + ### Phase 3 Success ✅ 169 + - [ ] Full tessera-manifests integration test passes 170 + - [ ] Concurrent downloads work without conflicts 171 + - [ ] Single-file fetch and batch fetch both functional 172 + - [ ] Processor pipeline handles archives correctly 251 173 252 - #### 8. Update Examples 253 - - [ ] Update example code to use `Xdge.t` 254 - - [ ] Show registry multi-source patterns 255 - - [ ] Demonstrate configuration file usage 256 - - [ ] Show state/logging integration 174 + ### Final Success ✅ 175 + - [ ] Complete tessera geospatial data download workflow 176 + - [ ] CLI tool usable for real data management 177 + - [ ] Documentation and examples complete 178 + - [ ] Performance acceptable for large datasets (GB scale) 257 179 258 - #### 9. Add Cmdliner Integration Example 259 - - [ ] Create example showing `Xdge.Cmd.term` integration 260 - - [ ] Demonstrate automatic XDG directory CLI flags 261 - - [ ] Show environment variable precedence 180 + ## XDG Integration Notes 262 181 263 - ### Phase 5: Testing and Migration 182 + The current TODO includes XDG Base Directory specification support through the xdg-eio library. This provides: 264 183 265 - #### 10. Update Tests 266 - - [ ] Test XDG directory creation and permissions 267 - - [ ] Test multi-source registry loading 268 - - [ ] Test configuration file discovery 269 - - [ ] Test state persistence 270 - - [ ] Add integration tests with real xdg-eio 184 + - Automatic XDG cache directory detection 185 + - Cross-platform path handling (Unix, macOS, Windows) 186 + - Environment variable overrides (XDG_CACHE_HOME, etc.) 187 + - Pretty-printing for user-friendly directory display 271 188 272 - #### 11. Migration Guide 273 - - [ ] Document migration from explicit cache paths to XDG 274 - - [ ] Provide migration utility for existing cache directories 275 - - [ ] Document breaking changes in API 189 + --- 276 190 277 - ### Dependencies to Add 278 - - [ ] Add `xdge` dependency to dune-project 279 - - [ ] Add `ptime` for timestamp handling in state management 280 - - [ ] Add `toml` library for configuration files (optional) 281 - - [ ] Update OCaml-EIO-README.md reference if needed 191 + *This TODO represents approximately 6-8 weeks of development work, focusing on robust external tool integration before migrating to pure OCaml implementation.* 282 192 283 - ## Major Missing Features in Toru 193 + ## Additional Features for Pooch Compatibility 284 194 285 195 ### **1. Authentication Support** 286 196 - [ ] **HTTP Authentication**: Basic auth via username/password
+5
toru/bin/dune
··· 13 13 (name toru_make_registry_simple) 14 14 (libraries toru cmdliner ptime ptime.clock.os eio_main)) 15 15 16 + (executable 17 + (public_name toru) 18 + (name toru_main) 19 + (libraries toru cmdliner unix eio_main logs fmt xdge)) 20 + 16 21 ;; Complex version with enhanced features (disabled until field access is resolved) 17 22 ;; (executable 18 23 ;; (public_name toru-make-registry)
+62 -54
toru/bin/toru_cache.ml
··· 98 98 $ cache_dir $ app_name $ version) 99 99 100 100 (* Command implementations *) 101 - let info_cmd global_opts = 102 - (Eio_main.run @@ fun env -> 103 - Eio.Switch.run @@ fun sw -> 101 + let info_cmd env sw global_opts = 104 102 let cache = match global_opts.cache_dir with 105 - | Some path -> Toru.Cache.create ~sw ~env ?version:global_opts.version path 106 - | None -> Toru.Cache.default ~sw ~env ~app_name:global_opts.app_name () 103 + | Some path -> Toru.Cache.create ~sw ~fs:env#fs ?version:global_opts.version path 104 + | None -> 105 + let base_path = Toru.Cache.default_cache_path ~app_name:global_opts.app_name () in 106 + Toru.Cache.create ~sw ~fs:env#fs ?version:global_opts.version base_path 107 107 in 108 108 109 109 let cache = match global_opts.version with 110 110 | Some v -> 111 111 (* Create new cache with version override *) 112 112 (match global_opts.cache_dir with 113 - | Some path -> Toru.Cache.create ~sw ~env ~version:v path 113 + | Some path -> Toru.Cache.create ~sw ~fs:env#fs ~version:v path 114 114 | None -> 115 115 let base_path = Toru.Cache.default_cache_path ~app_name:global_opts.app_name () in 116 - Toru.Cache.create ~sw ~env ~version:v base_path) 116 + Toru.Cache.create ~sw ~fs:env#fs ~version:v base_path) 117 117 | None -> cache 118 118 in 119 119 ··· 151 151 (Utils.format_time_ago newest.mtime) 152 152 ); 153 153 154 - Printf.printf "Free Space: Unable to determine\n"); 154 + Printf.printf "Free Space: Unable to determine\n"; 155 155 0 156 156 157 - let list_cmd global_opts sort_by format limit = 158 - (Eio_main.run @@ fun env -> 159 - Eio.Switch.run @@ fun sw -> 157 + let list_cmd env sw global_opts sort_by format limit = 160 158 let cache = match global_opts.cache_dir with 161 - | Some path -> Toru.Cache.create ~sw ~env ?version:global_opts.version path 162 - | None -> Toru.Cache.default ~sw ~env ~app_name:global_opts.app_name () 159 + | Some path -> Toru.Cache.create ~sw ~fs:env#fs ?version:global_opts.version path 160 + | None -> 161 + let base_path = Toru.Cache.default_cache_path ~app_name:global_opts.app_name () in 162 + Toru.Cache.create ~sw ~fs:env#fs ?version:global_opts.version base_path 163 163 in 164 164 165 165 let cache = match global_opts.version with 166 166 | Some v -> 167 167 (match global_opts.cache_dir with 168 - | Some path -> Toru.Cache.create ~sw ~env ~version:v path 168 + | Some path -> Toru.Cache.create ~sw ~fs:env#fs ~version:v path 169 169 | None -> 170 170 let base_path = Toru.Cache.default_cache_path ~app_name:global_opts.app_name () in 171 - Toru.Cache.create ~sw ~env ~version:v base_path) 171 + Toru.Cache.create ~sw ~fs:env#fs ~version:v base_path) 172 172 | None -> cache 173 173 in 174 174 ··· 191 191 | None -> sorted_files 192 192 in 193 193 194 - match format with 194 + (match format with 195 195 | `Table -> 196 196 if limited_files = [] then 197 197 Printf.printf "No files found in cache.\n" ··· 226 226 ] 227 227 ) limited_files in 228 228 let json_output = `List json_files in 229 - Printf.printf "%s\n" (Yojson.Safe.pretty_to_string json_output)); 229 + Printf.printf "%s\n" (Yojson.Safe.pretty_to_string json_output) 230 + ); 230 231 0 231 232 232 - let size_cmd global_opts breakdown human_readable = 233 - (Eio_main.run @@ fun env -> 234 - Eio.Switch.run @@ fun sw -> 233 + let size_cmd env sw global_opts breakdown human_readable = 235 234 let cache = match global_opts.cache_dir with 236 - | Some path -> Toru.Cache.create ~sw ~env ?version:global_opts.version path 237 - | None -> Toru.Cache.default ~sw ~env ~app_name:global_opts.app_name () 235 + | Some path -> Toru.Cache.create ~sw ~fs:env#fs ?version:global_opts.version path 236 + | None -> 237 + let base_path = Toru.Cache.default_cache_path ~app_name:global_opts.app_name () in 238 + Toru.Cache.create ~sw ~fs:env#fs ?version:global_opts.version base_path 238 239 in 239 240 240 241 let cache = match global_opts.version with 241 242 | Some v -> 242 243 (match global_opts.cache_dir with 243 - | Some path -> Toru.Cache.create ~sw ~env ~version:v path 244 + | Some path -> Toru.Cache.create ~sw ~fs:env#fs ~version:v path 244 245 | None -> 245 246 let base_path = Toru.Cache.default_cache_path ~app_name:global_opts.app_name () in 246 - Toru.Cache.create ~sw ~env ~version:v base_path) 247 + Toru.Cache.create ~sw ~fs:env#fs ~version:v base_path) 247 248 | None -> cache 248 249 in 249 250 ··· 317 318 Printf.printf "%-20s %12Ld (%5.1f%%)\n" label bucket_size percentage 318 319 ) age_buckets 319 320 ); 320 - ()); 321 + (); 321 322 0 322 323 323 - let clean_cmd global_opts max_size max_age dry_run = 324 - (Eio_main.run @@ fun env -> 325 - Eio.Switch.run @@ fun sw -> 324 + let clean_cmd env sw global_opts max_size max_age dry_run = 326 325 let cache = match global_opts.cache_dir with 327 - | Some path -> Toru.Cache.create ~sw ~env ?version:global_opts.version path 328 - | None -> Toru.Cache.default ~sw ~env ~app_name:global_opts.app_name () 326 + | Some path -> Toru.Cache.create ~sw ~fs:env#fs ?version:global_opts.version path 327 + | None -> 328 + let base_path = Toru.Cache.default_cache_path ~app_name:global_opts.app_name () in 329 + Toru.Cache.create ~sw ~fs:env#fs ?version:global_opts.version base_path 329 330 in 330 331 331 332 let cache = match global_opts.version with 332 333 | Some v -> 333 334 (match global_opts.cache_dir with 334 - | Some path -> Toru.Cache.create ~sw ~env ~version:v path 335 + | Some path -> Toru.Cache.create ~sw ~fs:env#fs ~version:v path 335 336 | None -> 336 337 let base_path = Toru.Cache.default_cache_path ~app_name:global_opts.app_name () in 337 - Toru.Cache.create ~sw ~env ~version:v base_path) 338 + Toru.Cache.create ~sw ~fs:env#fs ~version:v base_path) 338 339 | None -> cache 339 340 in 340 341 ··· 415 416 Printf.printf "\nRemoved %d files successfully.\n" !removed_count 416 417 ); 417 418 () 418 - )); 419 + ); 419 420 0 420 421 421 - let vacuum_cmd global_opts dry_run = 422 - (Eio_main.run @@ fun env -> 423 - Eio.Switch.run @@ fun sw -> 422 + let vacuum_cmd env sw global_opts dry_run = 424 423 let cache = match global_opts.cache_dir with 425 - | Some path -> Toru.Cache.create ~sw ~env ?version:global_opts.version path 426 - | None -> Toru.Cache.default ~sw ~env ~app_name:global_opts.app_name () 424 + | Some path -> Toru.Cache.create ~sw ~fs:env#fs ?version:global_opts.version path 425 + | None -> 426 + let base_path = Toru.Cache.default_cache_path ~app_name:global_opts.app_name () in 427 + Toru.Cache.create ~sw ~fs:env#fs ?version:global_opts.version base_path 427 428 in 428 429 429 430 let cache = match global_opts.version with 430 431 | Some v -> 431 432 (match global_opts.cache_dir with 432 - | Some path -> Toru.Cache.create ~sw ~env ~version:v path 433 + | Some path -> Toru.Cache.create ~sw ~fs:env#fs ~version:v path 433 434 | None -> 434 435 let base_path = Toru.Cache.default_cache_path ~app_name:global_opts.app_name () in 435 - Toru.Cache.create ~sw ~env ~version:v base_path) 436 + Toru.Cache.create ~sw ~fs:env#fs ~version:v base_path) 436 437 | None -> cache 437 438 in 438 439 ··· 493 494 Printf.printf "\nRemoved %d directories successfully.\n" !removed_count 494 495 ); 495 496 () 496 - )); 497 + ); 497 498 0 498 499 499 500 (* Command definitions *) 500 - let info_cmd_def = 501 + let info_cmd_def env sw = 501 502 let doc = "Show cache statistics and location" in 502 - Cmd.v (Cmd.info "info" ~doc) Term.(const info_cmd $ global_opts_term) 503 + let term = Term.(const (info_cmd env sw) $ global_opts_term) in 504 + Cmd.v (Cmd.info "info" ~doc) term 503 505 504 - let list_cmd_def = 506 + let list_cmd_def env sw = 505 507 let sort_by = 506 508 let doc = "Sort files by size, age, or name" in 507 509 Arg.(value & opt (enum [("size", `Size); ("age", `Age); ("name", `Name)]) `Name & ··· 517 519 Arg.(value & opt (some int) None & info ["limit"; "n"] ~docv:"N" ~doc) 518 520 in 519 521 let doc = "List cached files with details" in 520 - Cmd.v (Cmd.info "list" ~doc) Term.(const list_cmd $ global_opts_term $ sort_by $ format $ limit) 522 + let term = Term.(const (list_cmd env sw) $ global_opts_term $ sort_by $ format $ limit) in 523 + Cmd.v (Cmd.info "list" ~doc) term 521 524 522 - let size_cmd_def = 525 + let size_cmd_def env sw = 523 526 let breakdown = 524 527 let doc = "Show size breakdown by file type and age" in 525 528 Arg.(value & flag & info ["breakdown"; "b"] ~doc) ··· 529 532 Arg.(value & flag & info ["human-readable"; "h"] ~doc) 530 533 in 531 534 let doc = "Show cache size information" in 532 - Cmd.v (Cmd.info "size" ~doc) Term.(const size_cmd $ global_opts_term $ breakdown $ human_readable) 535 + let term = Term.(const (size_cmd env sw) $ global_opts_term $ breakdown $ human_readable) in 536 + Cmd.v (Cmd.info "size" ~doc) term 533 537 534 - let clean_cmd_def = 538 + let clean_cmd_def env sw = 535 539 let max_size = 536 540 let doc = "Remove files to get cache under this size (e.g., 1GB, 500MB)" in 537 541 let parse_size s = ··· 569 573 Arg.(value & flag & info ["dry-run"; "n"] ~doc) 570 574 in 571 575 let doc = "Clean cache with various options" in 572 - Cmd.v (Cmd.info "clean" ~doc) Term.(const clean_cmd $ global_opts_term $ max_size $ max_age $ dry_run) 576 + let term = Term.(const (clean_cmd env sw) $ global_opts_term $ max_size $ max_age $ dry_run) in 577 + Cmd.v (Cmd.info "clean" ~doc) term 573 578 574 - let vacuum_cmd_def = 579 + let vacuum_cmd_def env sw = 575 580 let dry_run = 576 581 let doc = "Show what would be removed without actually removing" in 577 582 Arg.(value & flag & info ["dry-run"; "n"] ~doc) 578 583 in 579 584 let doc = "Remove empty directories and broken links" in 580 - Cmd.v (Cmd.info "vacuum" ~doc) Term.(const vacuum_cmd $ global_opts_term $ dry_run) 585 + let term = Term.(const (vacuum_cmd env sw) $ global_opts_term $ dry_run) in 586 + Cmd.v (Cmd.info "vacuum" ~doc) term 581 587 582 - let main_cmd = 588 + let main_cmd env sw = 583 589 let doc = "Toru cache management tool" in 584 590 let sdocs = Manpage.s_common_options in 585 591 let man = [ ··· 599 605 ] in 600 606 let default = Term.(const 0) in 601 607 Cmd.group ~default (Cmd.info "toru-cache" ~version:"0.1.0" ~doc ~sdocs ~man) 602 - [info_cmd_def; list_cmd_def; size_cmd_def; clean_cmd_def; vacuum_cmd_def] 608 + [info_cmd_def env sw; list_cmd_def env sw; size_cmd_def env sw; clean_cmd_def env sw; vacuum_cmd_def env sw] 603 609 604 610 let () = 605 - exit (Cmd.eval' main_cmd) 611 + Eio_main.run @@ fun env -> 612 + Eio.Switch.run @@ fun sw -> 613 + exit (Cmd.eval' (main_cmd env sw))
+7 -6
toru/bin/toru_make_registry_simple.ml
··· 25 25 Arg.(value & flag & info ["p"; "progress"] ~doc) 26 26 27 27 (* Main function *) 28 - let make_registry_main directory output recursive algorithm show_progress () = 29 - Eio_main.run @@ fun env -> 30 - Eio.Switch.run @@ fun sw -> 28 + let make_registry_main env sw directory output recursive algorithm show_progress () = 31 29 try 32 30 let dir_path = env#fs |> Eio.Path.(fun fs -> fs / directory) in 33 31 ··· 75 73 exit 1 76 74 77 75 (* Command definition *) 78 - let cmd = 76 + let cmd env sw = 79 77 let doc = "Generate Pooch-compatible registry files from directories (simple version)" in 80 78 let info = Cmd.info "toru-make-registry-simple" ~version:"1.0" ~doc in 81 79 82 - Cmd.v info Term.(const make_registry_main 80 + Cmd.v info Term.(const (make_registry_main env sw) 83 81 $ directory_arg $ output_arg $ recursive_arg 84 82 $ algorithm_arg $ progress_arg $ const ()) 85 83 86 - let () = Cmd.eval cmd |> exit 84 + let () = 85 + Eio_main.run @@ fun env -> 86 + Eio.Switch.run @@ fun sw -> 87 + exit (Cmd.eval (cmd env sw))
+4 -1
toru/dune-project
··· 14 14 yojson 15 15 cmdliner 16 16 progress 17 + re 17 18 fmt 18 19 ptime 19 - xdg) 20 + xdge 21 + (logs (>= 0.7.0)) 22 + unix) 20 23 (authors "Toru Development Team") 21 24 (maintainers "Toru Development Team") 22 25 (license MIT)
+5 -10
toru/lib/toru/cache.ml
··· 18 18 sw : Eio.Switch.t; 19 19 } 20 20 21 - let rec create ~sw ~fs ?app_name ?version path_str = 21 + let create ~sw ~fs ?version path_str = 22 + let base_path = Eio.Path.(fs / path_str) in 23 + { base_path; version; sw } 24 + 25 + let default_cache_path ?app_name () = 22 26 let app_name = Option.value app_name ~default:"toru" in 23 27 let xdg_dirs = Xdg.create ~env:Sys.getenv_opt () in 24 28 let cache_dir = Xdg.cache_dir xdg_dirs in 25 29 Filename.concat cache_dir app_name 26 - let base_path = Eio.Path.(fs / path_str) in 27 - { base_path; version; sw } 28 - 29 - and default ~sw ?app_name () = 30 - create ~sw path_str 31 - 32 - and default_cache_path ?app_name () = 33 - let app_name = Option.value app_name ~default:"toru" in 34 - (* Use the official xdg package for XDG Base Directory Specification *) 35 30 36 31 let base_path t = t.base_path 37 32 let version t = t.version
+18 -6
toru/lib/toru/downloader.ml
··· 3 3 name : string; 4 4 total_bytes : int64 option; 5 5 mutable current_bytes : int64; 6 + mutable last_percent : int; 6 7 } 7 8 8 9 let create ?total_bytes name = 9 - { name; total_bytes; current_bytes = 0L } 10 + Printf.printf "Starting download: %s\n%!" name; 11 + { name; total_bytes; current_bytes = 0L; last_percent = -1 } 10 12 11 13 let update t bytes = 12 - t.current_bytes <- bytes 13 - (* TODO: Integrate with progress library *) 14 + t.current_bytes <- bytes; 15 + match t.total_bytes with 16 + | Some total when total > 0L -> 17 + let percent = Int64.(to_int (div (mul bytes 100L) total)) in 18 + if percent > t.last_percent && percent mod 10 = 0 then ( 19 + t.last_percent <- percent; 20 + Printf.printf "\r%s: %d%% (%Ld/%Ld bytes)%!" 21 + t.name percent bytes total 22 + ) 23 + | _ -> 24 + (* Unknown size, just show bytes downloaded *) 25 + if Int64.rem bytes 1048576L = 0L then (* Every MB *) 26 + Printf.printf "\r%s: %Ld bytes downloaded%!" t.name bytes 14 27 15 - let finish _t = 16 - (* TODO: Finish progress bar *) 17 - () 28 + let finish t = 29 + Printf.printf "\n%s: Complete (%Ld bytes)\n%!" t.name t.current_bytes 18 30 end 19 31 20 32 module Config = struct
+8 -4
toru/lib/toru/dune
··· 1 1 (library 2 2 (public_name toru) 3 3 (name toru) 4 - (modules hash registry cache processors downloader make_registry toru) 4 + (modules hash registry cache processors downloader make_registry toru logging) 5 5 (libraries 6 6 eio 7 - eio_main 7 + eio.unix 8 8 digestif 9 9 yojson 10 10 cmdliner 11 - str 11 + re 12 12 ptime 13 - xdg)) 13 + xdg 14 + unix 15 + logs 16 + logs.fmt 17 + fmt)) 14 18 15 19 (documentation 16 20 (package toru))
+68 -11
toru/lib/toru/make_registry.ml
··· 34 34 35 35 (** Convert a glob pattern to a regex pattern *) 36 36 let glob_to_regex pattern = 37 - let escaped = Str.quote pattern in 38 - let with_wildcards = 39 - escaped 40 - |> Str.global_replace (Str.regexp_string "\\*\\*") "__DOUBLESTAR__" 41 - |> Str.global_replace (Str.regexp_string "\\*") "[^/]*" 42 - |> Str.global_replace (Str.regexp_string "__DOUBLESTAR__") ".*" 43 - |> Str.global_replace (Str.regexp_string "\\?") "[^/]" 37 + (* Process the pattern character by character *) 38 + let rec process i acc = 39 + if i >= String.length pattern then 40 + List.rev acc 41 + else 42 + match pattern.[i] with 43 + | '*' when i + 1 < String.length pattern && pattern.[i + 1] = '*' -> 44 + (* Handle ** for recursive matching *) 45 + process (i + 2) (Re.rep Re.any :: acc) 46 + | '*' -> 47 + (* Handle single * for non-recursive matching *) 48 + process (i + 1) (Re.rep (Re.compl [Re.char '/']) :: acc) 49 + | '?' -> 50 + (* Handle ? for single character *) 51 + process (i + 1) (Re.compl [Re.char '/'] :: acc) 52 + | c -> 53 + (* Regular character - add as literal *) 54 + process (i + 1) (Re.char c :: acc) 44 55 in 45 - Str.regexp ("^" ^ with_wildcards ^ "$") 56 + 57 + let regex_parts = process 0 [] in 58 + Re.compile (Re.seq (Re.bos :: regex_parts @ [Re.eos])) 46 59 47 60 (** Check if a file path matches any exclude patterns *) 48 61 let matches_exclude_pattern patterns path = 49 62 if patterns = [] then false else 50 63 let regexes = List.map glob_to_regex patterns in 51 - List.exists (fun regex -> Str.string_match regex path 0) regexes 64 + List.exists (fun regex -> Re.execp regex path) regexes 52 65 53 66 (** Get relative path from base directory to file *) 54 67 let relative_path ~base ~file = ··· 215 228 let entries = List.map (fun e -> e.entry) enhanced_entries in 216 229 List.fold_left (fun acc entry -> Registry.add entry acc) Registry.empty entries 217 230 231 + (** Scan directory with visual progress bar *) 232 + let scan_directory_with_bar ~sw ~env ?(options=default_options) dir_path = 233 + (* First count files for progress estimation *) 234 + let files = collect_files ~sw ~env ~options dir_path in 235 + let total_files = List.length files in 236 + 237 + if total_files = 0 then Registry.empty 238 + else ( 239 + Printf.printf "Scanning directory: %d files found\n%!" total_files; 240 + 241 + (* Process files with progress updates *) 242 + let progress_fn _filename current total = 243 + if current mod 10 = 0 || current = total then 244 + Printf.printf "\rScanning: %d/%d files (%d%%)%!" 245 + current total (current * 100 / total) 246 + in 247 + 248 + let enhanced_entries = 249 + process_files_concurrent ~sw ~env ~options ~base_path:dir_path 250 + ~progress:(Some progress_fn) files 251 + in 252 + 253 + Printf.printf "\nScanning complete: %d entries processed\n%!" total_files; 254 + 255 + let entries = List.map (fun e -> e.entry) enhanced_entries in 256 + List.fold_left (fun acc entry -> Registry.add entry acc) Registry.empty entries 257 + ) 258 + 218 259 (** Scan directory and create registry *) 219 260 let scan_directory ~sw ~env ?(options=default_options) dir_path = 220 261 scan_directory_with_progress ~sw ~env ~options ··· 241 282 List.fold_left (fun acc entry -> Registry.add entry acc) Registry.empty entries 242 283 243 284 (** Update existing registry with new/changed files *) 244 - let update_registry ~sw ~env ?(options=default_options) registry dir_path = 285 + let update_registry ~sw ~env ?(options=default_options) ?(show_progress=false) registry dir_path = 245 286 let files = collect_files ~sw ~env ~options dir_path in 246 287 let existing_entries = Registry.entries registry in 247 288 ··· 251 292 Hashtbl.add existing_map (Registry.filename entry) entry 252 293 ) existing_entries; 253 294 295 + (* Setup progress reporting if requested *) 296 + let total_files = List.length files in 297 + if show_progress && total_files > 0 then 298 + Printf.printf "Updating registry: %d files to check\n%!" total_files; 299 + 254 300 (* Process files and update registry *) 301 + let processed = ref 0 in 255 302 let updated_entries = List.filter_map (fun (file_path, relative_path) -> 256 - match Hashtbl.find_opt existing_map relative_path with 303 + let result = match Hashtbl.find_opt existing_map relative_path with 257 304 | Some existing_entry when not (file_changed file_path existing_entry) -> 258 305 (* File hasn't changed, keep existing entry *) 259 306 Some existing_entry ··· 262 309 (match hash_file ~sw ~env options.hash_algorithm file_path with 263 310 | Some hash -> Some (Registry.create_entry ~filename:relative_path ~hash ()) 264 311 | None -> None) 312 + in 313 + incr processed; 314 + if show_progress && (!processed mod 10 = 0 || !processed = total_files) then 315 + Printf.printf "\rUpdating: %d/%d files (%d%%)%!" 316 + !processed total_files (!processed * 100 / total_files); 317 + result 265 318 ) files in 319 + 320 + (* Finalize progress reporting *) 321 + if show_progress && total_files > 0 then 322 + Printf.printf "\nRegistry update complete: %d files processed\n%!" total_files; 266 323 267 324 (* Remove entries for files that no longer exist *) 268 325 let current_files = List.map snd files |> List.sort String.compare in
+9
toru/lib/toru/make_registry.mli
··· 36 36 progress:(string -> int -> int -> unit) -> 37 37 Eio.Fs.dir_ty Eio.Path.t -> 38 38 Registry.t 39 + 40 + (** Scan directory with visual progress bar *) 41 + val scan_directory_with_bar : 42 + sw:Eio.Switch.t -> 43 + env:Eio_unix.Stdenv.base -> 44 + ?options:options -> 45 + Eio.Fs.dir_ty Eio.Path.t -> 46 + Registry.t 39 47 40 48 (** {1 File list processing} *) 41 49 ··· 60 68 sw:Eio.Switch.t -> 61 69 env:Eio_unix.Stdenv.base -> 62 70 ?options:options -> 71 + ?show_progress:bool -> 63 72 Registry.t -> 64 73 Eio.Fs.dir_ty Eio.Path.t -> 65 74 Registry.t
+25 -4
toru/lib/toru/registry.ml
··· 74 74 let content = really_input_string ic (in_channel_length ic) in 75 75 of_string ?progress content 76 76 77 - let load_from_url ?progress:_ _url = 78 - (* For now, this is a placeholder. In a full implementation, this would 79 - use an HTTP client to fetch the URL content and parse it *) 80 - failwith "Registry.load_from_url not yet implemented - requires HTTP client" 77 + let load_from_url ?progress url = 78 + (* Use wget/curl via system command to fetch URL content *) 79 + let temp_file = Filename.temp_file "toru_registry" ".txt" in 80 + let finally () = 81 + try Sys.remove temp_file with _ -> () 82 + in 83 + Fun.protect ~finally @@ fun () -> 84 + let wget_cmd = Printf.sprintf "timeout 60 wget -q -O '%s' '%s'" temp_file url in 85 + let curl_cmd = Printf.sprintf "timeout 60 curl -s -o '%s' '%s'" temp_file url in 86 + 87 + (* Try wget first, then curl as fallback *) 88 + let result = 89 + if Sys.command wget_cmd = 0 then Ok () 90 + else if Sys.command curl_cmd = 0 then Ok () 91 + else Error "Failed to download registry (neither wget nor curl worked)" 92 + in 93 + 94 + match result with 95 + | Error msg -> failwith msg 96 + | Ok () -> 97 + let ic = open_in temp_file in 98 + let finally2 () = close_in ic in 99 + Fun.protect ~finally:finally2 @@ fun () -> 100 + let content = really_input_string ic (in_channel_length ic) in 101 + of_string ?progress content 81 102 82 103 let save path registry = 83 104 let file_path = Eio.Path.native_exn path in
+127 -12
toru/lib/toru/toru.ml
··· 4 4 registry : Registry.t; 5 5 downloader : (module Downloader.DOWNLOADER); 6 6 sw : Eio.Switch.t; 7 + env : Eio_unix.Stdenv.base; 7 8 } 8 9 9 10 let create ~sw ~env ~base_url ~cache_path ?version ?registry_file ?registry_url ?downloader () = 10 - let cache = Cache.create ~sw ~env ?version cache_path in 11 + let cache = Cache.create ~sw ~fs:env#fs ?version cache_path in 11 12 let registry = match registry_file with 12 13 | Some file -> Registry.load (Eio.Path.(env#fs / file)) 13 14 | None -> ··· 20 21 | None -> 21 22 Downloader.Downloaders.create_default ~env 22 23 in 23 - { base_url; cache; registry; downloader; sw } 24 + { base_url; cache; registry; downloader; sw; env } 24 25 25 26 let base_url t = t.base_url 26 27 let cache t = t.cache 27 28 let registry t = t.registry 28 29 29 - let fetch _t ~filename:_ ?processor:_ () = 30 - (* TODO: Implement file fetching *) 31 - Error "Toru.fetch not yet implemented" 30 + let rec fetch t ~filename ?processor () = 31 + (* 1. Check if file exists in registry *) 32 + match Registry.find filename t.registry with 33 + | None -> Error ("File not found in registry: " ^ filename) 34 + | Some entry -> 35 + (* 2. Check if file is already cached *) 36 + let cache_path = Cache.file_path t.cache filename in 37 + let cached_file_exists = Cache.exists t.cache filename in 38 + 39 + (* 3. If cached, verify hash *) 40 + if cached_file_exists then ( 41 + let entry_hash = Registry.hash entry in 42 + if Hash.verify cache_path entry_hash then ( 43 + (* File is cached and valid - apply processor if provided *) 44 + match processor with 45 + | None -> Ok cache_path 46 + | Some proc -> Ok (proc cache_path) 47 + ) else ( 48 + (* Cached file is corrupt - remove it and re-download *) 49 + (try Eio.Path.unlink cache_path with _ -> ()); 50 + (* Fall through to download *) 51 + fetch_file_to_cache t entry filename processor 52 + ) 53 + ) else ( 54 + (* File not cached - download it *) 55 + fetch_file_to_cache t entry filename processor 56 + ) 32 57 33 - let fetch_all _t ?concurrency:_ () = 34 - (* TODO: Implement batch fetching *) 35 - Error "Toru.fetch_all not yet implemented" 58 + and fetch_file_to_cache t entry filename processor = 59 + (* Ensure cache directory exists *) 60 + Cache.ensure_dir t.cache; 61 + 62 + (* Get the download URL *) 63 + let download_url = match Registry.custom_url entry with 64 + | Some custom -> custom 65 + | None -> t.base_url ^ filename 66 + in 67 + 68 + (* Download the file *) 69 + let (module D : Downloader.DOWNLOADER) = t.downloader in 70 + let downloader_instance = D.create ~sw:t.sw ~env:t.env () in 71 + let cache_path = Cache.file_path t.cache filename in 72 + let entry_hash = Registry.hash entry in 73 + 74 + match D.download downloader_instance ~url:download_url ~dest:cache_path ~hash:entry_hash () with 75 + | Error msg -> Error ("Download failed: " ^ msg) 76 + | Ok () -> 77 + (* Apply processor if provided *) 78 + match processor with 79 + | None -> Ok cache_path 80 + | Some proc -> Ok (proc cache_path) 81 + 82 + let fetch_all t ?concurrency:_ () = 83 + let all_entries = Registry.entries t.registry in 84 + let total = List.length all_entries in 85 + 86 + if total = 0 then 87 + Ok () 88 + else ( 89 + (* Create a semaphore to limit concurrency *) 90 + let results = ref [] in 91 + let errors = ref [] in 92 + let completed = ref 0 in 93 + 94 + (* Process entries in batches *) 95 + let rec process_batch entries = 96 + match entries with 97 + | [] -> 98 + if !completed = total then 99 + if List.length !errors > 0 then 100 + Error ("Multiple failures: " ^ String.concat "; " !errors) 101 + else 102 + Ok () 103 + else 104 + Error "Internal error: not all files processed" 105 + | entry :: rest -> 106 + let filename = Registry.filename entry in 107 + (match fetch t ~filename () with 108 + | Ok path -> 109 + results := path :: !results; 110 + incr completed 111 + | Error msg -> 112 + errors := (filename ^ ": " ^ msg) :: !errors; 113 + incr completed); 114 + process_batch rest 115 + in 116 + 117 + (* For now, implement simple sequential processing *) 118 + (* TODO: Add actual concurrent processing with Eio fibers *) 119 + process_batch all_entries 120 + ) 36 121 37 122 let load_registry t source = 38 123 let new_registry = ··· 51 136 let update_base_url t new_url = 52 137 { t with base_url = new_url } 53 138 54 - let retrieve ~sw:_ ~fs:_ ~url:_ ?hash:_ ?cache_path:_ ?downloader:_ () = 55 - (* TODO: Implement one-off file retrieval *) 56 - Error "Toru.retrieve not yet implemented" 139 + let retrieve ~sw ~env ~url ?hash ?cache_path ?downloader () = 140 + (* Get cache path *) 141 + let cache_dir = match cache_path with 142 + | Some path -> path 143 + | None -> Cache.default_cache_path ~app_name:"toru-temp" () 144 + in 145 + 146 + (* Create a temporary cache *) 147 + let cache = Cache.create ~sw ~fs:env#fs cache_dir in 148 + Cache.ensure_dir cache; 149 + 150 + (* Extract filename from URL *) 151 + let filename = 152 + match String.rindex_opt url '/' with 153 + | Some idx -> String.sub url (idx + 1) (String.length url - idx - 1) 154 + | None -> "downloaded_file" 155 + in 156 + 157 + (* Get downloader *) 158 + let downloader_module = match downloader with 159 + | Some d -> d 160 + | None -> Downloader.Downloaders.create_default ~env 161 + in 162 + 163 + (* Download the file *) 164 + let (module D : Downloader.DOWNLOADER) = downloader_module in 165 + let downloader_instance = D.create ~sw ~env () in 166 + let dest_path = Cache.file_path cache filename in 167 + 168 + match D.download downloader_instance ~url ~dest:dest_path ?hash () with 169 + | Error msg -> Error ("Download failed: " ^ msg) 170 + | Ok () -> Ok dest_path 57 171 58 172 let default_cache_path = Cache.default_cache_path 59 173 ··· 68 182 module Cache = Cache 69 183 module Downloader = Downloader 70 184 module Processors = Processors 71 - module Make_registry = Make_registry 185 + module Make_registry = Make_registry 186 + module Logging = Logging
+3 -2
toru/lib/toru/toru.mli
··· 61 61 (** Retrieve a single file without registry *) 62 62 val retrieve : 63 63 sw:Eio.Switch.t -> 64 - fs:Eio.Fs.dir_ty Eio.Path.t -> 64 + env:Eio_unix.Stdenv.base -> 65 65 url:string -> 66 66 ?hash:Hash.t -> 67 67 ?cache_path:string -> ··· 83 83 module Cache = Cache 84 84 module Downloader = Downloader 85 85 module Processors = Processors 86 - module Make_registry = Make_registry 86 + module Make_registry = Make_registry 87 + module Logging = Logging
+5
toru/test/dune
··· 61 61 (executable 62 62 (public_name downloader_demo) 63 63 (name downloader_demo) 64 + (libraries toru eio eio_main)) 65 + 66 + (executable 67 + (public_name test_tessera_integration) 68 + (name test_tessera_integration) 64 69 (libraries toru eio eio_main))
+2 -2
toru/test/test_cache.ml
··· 26 26 Printf.printf "=== Testing basic Cache functionality ===\n"; 27 27 28 28 (* Create a test cache *) 29 - let cache = Cache.create ~sw ~env "test_cache" in 29 + let cache = Cache.create ~sw ~fs:env#fs "test_cache" in 30 30 Printf.printf "Created cache at: %s\n" 31 31 (Eio.Path.native_exn (Cache.base_path cache)); 32 32 ··· 35 35 Printf.printf "Default cache path: %s\n" default_path; 36 36 37 37 (* Test XDG cache path detection *) 38 - let cache_with_version = Cache.create ~sw ~env ~version:"v1.0" "test_cache_versioned" in 38 + let cache_with_version = Cache.create ~sw ~fs:env#fs ~version:"v1.0" "test_cache_versioned" in 39 39 Printf.printf "Cache with version at: %s\n" 40 40 (Eio.Path.native_exn (Cache.base_path cache_with_version)); 41 41
+3 -3
toru/test/test_cache_xdg.ml
··· 73 73 Eio_main.run @@ fun env -> 74 74 Eio.Switch.run @@ fun sw -> 75 75 (* Test cache without version *) 76 - let cache_no_version = Cache.create ~sw ~env "test_cache_no_version" in 76 + let cache_no_version = Cache.create ~sw ~fs:env#fs "test_cache_no_version" in 77 77 let base_path = Cache.base_path cache_no_version in 78 78 Printf.printf "Cache without version: %s\n" (Eio.Path.native_exn base_path); 79 79 80 80 (* Test cache with version *) 81 - let cache_with_version = Cache.create ~sw ~env ~version:"v2.1" "test_cache_with_version" in 81 + let cache_with_version = Cache.create ~sw ~fs:env#fs ~version:"v2.1" "test_cache_with_version" in 82 82 let versioned_path = Cache.base_path cache_with_version in 83 83 Printf.printf "Cache with version: %s\n" (Eio.Path.native_exn versioned_path); 84 84 ··· 102 102 103 103 Eio_main.run @@ fun env -> 104 104 Eio.Switch.run @@ fun sw -> 105 - let cache = Cache.create ~sw ~env "test_cache_mgmt" in 105 + let cache = Cache.create ~sw ~fs:env#fs "test_cache_mgmt" in 106 106 Cache.ensure_dir cache; 107 107 108 108 (* Create test files with different content sizes *)
+2 -1
toru/test/test_xdg_integration.ml
··· 69 69 Eio_main.run @@ fun env -> 70 70 Eio.Switch.run @@ fun sw -> 71 71 (* Create cache using default XDG paths *) 72 - let cache = Toru.Cache.default ~sw ~env ~app_name:"xdg-test" () in 72 + let base_path = Toru.Cache.default_cache_path ~app_name:"xdg-test" () in 73 + let cache = Toru.Cache.create ~sw ~fs:env#fs base_path in 73 74 let base_path = Toru.Cache.base_path cache in 74 75 let path_str = Eio.Path.native_exn base_path in 75 76