My aggregated monorepo of OCaml code, automaintained
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

Add dry-run mode to bushel sync command

- Add --dry-run / -n flag to show commands without executing
- Refactor sync steps to accept dry_run parameter
- Show command details in dry-run mode for each step
- Use Eio.Path.mkdirs for recursive directory creation
- Integrate srcsetter as a library for direct invocation
- Fix sortal sync to skip PNG conversion when PNG exists

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

+501 -221
+14 -5
ocaml-bushel/bin/main.ml
··· 247 247 let doc = "Also upload to Typesense (remote sync)." in 248 248 Arg.(value & flag & info ["remote"] ~doc) 249 249 in 250 + let dry_run = 251 + let doc = "Show what commands would be run without executing them." in 252 + Arg.(value & flag & info ["dry-run"; "n"] ~doc) 253 + in 250 254 let only = 251 255 let doc = "Only run specific step (images, srcsetter, thumbs, faces, videos, typesense)." in 252 256 Arg.(value & opt (some string) None & info ["only"] ~docv:"STEP" ~doc) 253 257 in 254 - let run () config_file data_dir remote only = 258 + let run () config_file data_dir remote dry_run only = 255 259 match load_config config_file with 256 260 | Error e -> Printf.eprintf "Config error: %s\n" e; 1 257 261 | Ok config -> ··· 274 278 let fs = Eio.Stdenv.fs env in 275 279 let entries = Bushel_eio.Bushel_loader.load fs data_dir in 276 280 277 - Printf.printf "Running sync pipeline...\n"; 281 + Printf.printf "%s sync pipeline...\n" (if dry_run then "Dry-run" else "Running"); 278 282 List.iter (fun step -> 279 283 Printf.printf " - %s\n" (Bushel_sync.string_of_step step) 280 284 ) steps; 281 285 Printf.printf "\n"; 282 286 283 - let results = Bushel_sync.run ~env ~config ~steps ~entries in 287 + let results = Bushel_sync.run ~dry_run ~env ~config ~steps ~entries in 284 288 285 289 Printf.printf "\nResults:\n"; 286 290 List.iter (fun r -> ··· 288 292 Printf.printf " [%s] %s: %s\n" 289 293 status 290 294 (Bushel_sync.string_of_step r.step) 291 - r.message 295 + r.message; 296 + (* In dry-run mode, show the details (commands) *) 297 + if dry_run && r.Bushel_sync.details <> [] then begin 298 + List.iter (fun d -> Printf.printf " %s\n" d) r.Bushel_sync.details 299 + end 292 300 ) results; 293 301 294 302 let failures = List.filter (fun r -> not r.Bushel_sync.success) results in ··· 304 312 `P "4. $(b,faces) - Fetch contact face thumbnails from Immich"; 305 313 `P "5. $(b,videos) - Fetch video thumbnails from PeerTube"; 306 314 `P "6. $(b,typesense) - Upload to Typesense (with --remote)"; 315 + `P "Use $(b,--dry-run) to see what commands would be run without executing them."; 307 316 ] in 308 317 let info = Cmd.info "sync" ~doc ~man in 309 - Cmd.v info Term.(const run $ logging_t $ config_file $ data_dir $ remote $ only) 318 + Cmd.v info Term.(const run $ logging_t $ config_file $ data_dir $ remote $ dry_run $ only) 310 319 311 320 (** {1 Paper Add Command} *) 312 321
+191 -111
ocaml-bushel/lib_sync/bushel_sync.ml
··· 77 77 78 78 (** {1 Rsync Images} *) 79 79 80 - let sync_images ~proc_mgr config = 80 + let sync_images ~dry_run ~fs ~proc_mgr config = 81 81 Log.info (fun m -> m "Syncing images from remote..."); 82 - let cmd = Bushel_config.rsync_command config in 83 - Log.debug (fun m -> m "Running: %s" cmd); 84 - 85 - (* Ensure local directory exists *) 86 82 let local_dir = config.Bushel_config.local_source_dir in 87 - if not (Sys.file_exists local_dir) then begin 88 - Log.info (fun m -> m "Creating directory: %s" local_dir); 89 - Unix.mkdir local_dir 0o755 90 - end; 83 + let args = ["rsync"; "-avz"; 84 + Bushel_config.rsync_source config ^ "/"; 85 + local_dir ^ "/"] in 86 + let cmd = String.concat " " args in 91 87 92 - try 93 - let args = ["rsync"; "-avz"; 94 - Bushel_config.rsync_source config ^ "/"; 95 - local_dir ^ "/"] in 96 - Eio.Process.run proc_mgr args; 88 + if dry_run then begin 97 89 { step = Images; success = true; 98 - message = "Images synced from remote"; 99 - details = [] } 100 - with e -> 101 - { step = Images; success = false; 102 - message = Printf.sprintf "Rsync failed: %s" (Printexc.to_string e); 103 - details = [] } 90 + message = "Would run rsync"; 91 + details = [cmd] } 92 + end else begin 93 + Log.debug (fun m -> m "Running: %s" cmd); 94 + 95 + (* Ensure local directory exists (recursive) *) 96 + let local_path = Eio.Path.(fs / local_dir) in 97 + Eio.Path.mkdirs ~exists_ok:true ~perm:0o755 local_path; 98 + 99 + try 100 + Eio.Process.run proc_mgr args; 101 + { step = Images; success = true; 102 + message = "Images synced from remote"; 103 + details = [] } 104 + with e -> 105 + { step = Images; success = false; 106 + message = Printf.sprintf "Rsync failed: %s" (Printexc.to_string e); 107 + details = [] } 108 + end 104 109 105 110 (** {1 Srcsetter} *) 106 111 107 - let run_srcsetter ~proc_mgr config = 112 + let run_srcsetter ~dry_run ~fs ~proc_mgr config = 108 113 Log.info (fun m -> m "Running srcsetter..."); 109 114 let src_dir = config.Bushel_config.local_source_dir in 110 115 let dst_dir = config.Bushel_config.local_output_dir in 111 116 112 - (* Ensure output directory exists *) 113 - if not (Sys.file_exists dst_dir) then begin 114 - Log.info (fun m -> m "Creating directory: %s" dst_dir); 115 - Unix.mkdir dst_dir 0o755 116 - end; 117 + if dry_run then begin 118 + { step = Srcsetter; success = true; 119 + message = "Would run srcsetter"; 120 + details = [Printf.sprintf "srcsetter %s %s" src_dir dst_dir] } 121 + end else begin 122 + (* Ensure output directory exists (recursive) *) 123 + let src_path = Eio.Path.(fs / src_dir) in 124 + let dst_path = Eio.Path.(fs / dst_dir) in 125 + Eio.Path.mkdirs ~exists_ok:true ~perm:0o755 dst_path; 117 126 118 - try 119 - let args = ["srcsetter"; src_dir; dst_dir] in 120 - Eio.Process.run proc_mgr args; 121 - { step = Srcsetter; success = true; 122 - message = "Srcsetter completed"; 123 - details = [] } 124 - with e -> 125 - { step = Srcsetter; success = false; 126 - message = Printf.sprintf "Srcsetter failed: %s" (Printexc.to_string e); 127 - details = [] } 127 + try 128 + let entries = Srcsetter_cmd.run 129 + ~proc_mgr 130 + ~src_dir:src_path 131 + ~dst_dir:dst_path 132 + ~preserve:true 133 + () 134 + in 135 + { step = Srcsetter; success = true; 136 + message = Printf.sprintf "Srcsetter completed: %d images processed" 137 + (List.length entries); 138 + details = [] } 139 + with e -> 140 + { step = Srcsetter; success = false; 141 + message = Printf.sprintf "Srcsetter failed: %s" (Printexc.to_string e); 142 + details = [] } 143 + end 128 144 129 145 (** {1 Paper Thumbnails} *) 130 146 131 - let generate_paper_thumbnails ~proc_mgr config = 147 + let generate_paper_thumbnails ~dry_run ~fs ~proc_mgr config = 132 148 Log.info (fun m -> m "Generating paper thumbnails..."); 133 149 let pdfs_dir = config.Bushel_config.paper_pdfs_dir in 134 150 let output_dir = Bushel_config.paper_thumbs_dir config in ··· 139 155 message = "No PDFs directory"; 140 156 details = [] } 141 157 end else begin 142 - (* Ensure output directory exists *) 143 - if not (Sys.file_exists output_dir) then 144 - Unix.mkdir output_dir 0o755; 145 - 146 158 let pdfs = Sys.readdir pdfs_dir |> Array.to_list 147 159 |> List.filter (fun f -> Filename.check_suffix f ".pdf") in 148 160 149 - let results = List.map (fun pdf_file -> 150 - let slug = Filename.chop_extension pdf_file in 151 - let pdf_path = Filename.concat pdfs_dir pdf_file in 152 - let output_path = Filename.concat output_dir (slug ^ ".webp") in 153 - 154 - if Sys.file_exists output_path then begin 155 - Log.debug (fun m -> m "Skipping %s: thumbnail exists" slug); 156 - `Skipped slug 157 - end else begin 158 - Log.info (fun m -> m "Generating thumbnail for %s" slug); 159 - try 160 - (* ImageMagick command: render PDF at 600 DPI, crop top 50%, resize to 2048px *) 161 + if dry_run then begin 162 + let would_run = List.filter_map (fun pdf_file -> 163 + let slug = Filename.chop_extension pdf_file in 164 + let pdf_path = Filename.concat pdfs_dir pdf_file in 165 + let output_path = Filename.concat output_dir (slug ^ ".webp") in 166 + if Sys.file_exists output_path then None 167 + else begin 161 168 let args = [ 162 - "magick"; 163 - "-density"; "600"; 164 - "-quality"; "100"; 165 - pdf_path ^ "[0]"; (* First page only *) 166 - "-gravity"; "North"; 167 - "-crop"; "100%x50%+0+0"; 168 - "-resize"; "2048x"; 169 - output_path 169 + "magick"; "-density"; "600"; "-quality"; "100"; 170 + pdf_path ^ "[0]"; "-gravity"; "North"; 171 + "-crop"; "100%x50%+0+0"; "-resize"; "2048x"; output_path 170 172 ] in 171 - Eio.Process.run proc_mgr args; 172 - `Ok slug 173 - with e -> 174 - Log.err (fun m -> m "Failed to generate thumbnail for %s: %s" 175 - slug (Printexc.to_string e)); 176 - `Error slug 177 - end 178 - ) pdfs in 173 + Some (String.concat " " args) 174 + end 175 + ) pdfs in 176 + let skipped = List.length pdfs - List.length would_run in 177 + { step = Thumbs; success = true; 178 + message = Printf.sprintf "Would generate %d thumbnails (%d already exist)" 179 + (List.length would_run) skipped; 180 + details = would_run } 181 + end else begin 182 + (* Ensure output directory exists (recursive) *) 183 + let output_path = Eio.Path.(fs / output_dir) in 184 + Eio.Path.mkdirs ~exists_ok:true ~perm:0o755 output_path; 179 185 180 - let ok_count = List.fold_left (fun acc r -> match r with `Ok _ -> acc + 1 | _ -> acc) 0 results in 181 - let skipped_count = List.fold_left (fun acc r -> match r with `Skipped _ -> acc + 1 | _ -> acc) 0 results in 182 - let error_count = List.fold_left (fun acc r -> match r with `Error _ -> acc + 1 | _ -> acc) 0 results in 186 + let results = List.map (fun pdf_file -> 187 + let slug = Filename.chop_extension pdf_file in 188 + let pdf_path = Filename.concat pdfs_dir pdf_file in 189 + let output_path = Filename.concat output_dir (slug ^ ".webp") in 183 190 184 - { step = Thumbs; success = error_count = 0; 185 - message = Printf.sprintf "%d generated, %d skipped, %d errors" 186 - ok_count skipped_count error_count; 187 - details = List.filter_map (fun r -> match r with `Error s -> Some s | _ -> None) results } 191 + if Sys.file_exists output_path then begin 192 + Log.debug (fun m -> m "Skipping %s: thumbnail exists" slug); 193 + `Skipped slug 194 + end else begin 195 + Log.info (fun m -> m "Generating thumbnail for %s" slug); 196 + try 197 + (* ImageMagick command: render PDF at 600 DPI, crop top 50%, resize to 2048px *) 198 + let args = [ 199 + "magick"; 200 + "-density"; "600"; 201 + "-quality"; "100"; 202 + pdf_path ^ "[0]"; (* First page only *) 203 + "-gravity"; "North"; 204 + "-crop"; "100%x50%+0+0"; 205 + "-resize"; "2048x"; 206 + output_path 207 + ] in 208 + Eio.Process.run proc_mgr args; 209 + `Ok slug 210 + with e -> 211 + Log.err (fun m -> m "Failed to generate thumbnail for %s: %s" 212 + slug (Printexc.to_string e)); 213 + `Error slug 214 + end 215 + ) pdfs in 216 + 217 + let ok_count = List.fold_left (fun acc r -> match r with `Ok _ -> acc + 1 | _ -> acc) 0 results in 218 + let skipped_count = List.fold_left (fun acc r -> match r with `Skipped _ -> acc + 1 | _ -> acc) 0 results in 219 + let error_count = List.fold_left (fun acc r -> match r with `Error _ -> acc + 1 | _ -> acc) 0 results in 220 + 221 + { step = Thumbs; success = error_count = 0; 222 + message = Printf.sprintf "%d generated, %d skipped, %d errors" 223 + ok_count skipped_count error_count; 224 + details = List.filter_map (fun r -> match r with `Error s -> Some s | _ -> None) results } 225 + end 188 226 end 189 227 190 228 (** {1 Contact Faces} *) 191 229 192 - let sync_faces ~proc_mgr config entries = 230 + let sync_faces ~dry_run ~proc_mgr config entries = 193 231 Log.info (fun m -> m "Syncing contact faces from Immich..."); 194 232 let output_dir = Bushel_config.contact_faces_dir config in 195 233 ··· 201 239 details = [e] } 202 240 | Ok api_key -> 203 241 let contacts = Bushel.Entry.contacts entries in 204 - let results = Bushel_immich.fetch_all_faces 205 - ~proc_mgr 206 - ~endpoint:config.immich_endpoint 207 - ~api_key 208 - ~output_dir 209 - contacts in 242 + if dry_run then begin 243 + let would_fetch = List.filter (fun c -> 244 + let handle = Sortal_schema.Contact.handle c in 245 + let output_path = Filename.concat output_dir (handle ^ ".jpg") in 246 + not (Sys.file_exists output_path) 247 + ) contacts in 248 + let skipped = List.length contacts - List.length would_fetch in 249 + { step = Faces; success = true; 250 + message = Printf.sprintf "Would fetch %d faces from Immich (%d already exist)" 251 + (List.length would_fetch) skipped; 252 + details = List.map (fun c -> 253 + Printf.sprintf "curl -H 'X-Api-Key: ...' %s/api/search/person?name=%s" 254 + config.immich_endpoint 255 + (Uri.pct_encode (Sortal_schema.Contact.name c)) 256 + ) (List.filteri (fun i _ -> i < 5) would_fetch) @ 257 + (if List.length would_fetch > 5 then ["...and more"] else []) } 258 + end else begin 259 + let results = Bushel_immich.fetch_all_faces 260 + ~proc_mgr 261 + ~endpoint:config.immich_endpoint 262 + ~api_key 263 + ~output_dir 264 + contacts in 210 265 211 - let ok_count = List.length (List.filter (fun (_, r) -> 212 - match r with Bushel_immich.Ok _ -> true | _ -> false) results) in 213 - let skipped_count = List.length (List.filter (fun (_, r) -> 214 - match r with Bushel_immich.Skipped _ -> true | _ -> false) results) in 215 - let error_count = List.length (List.filter (fun (_, r) -> 216 - match r with Bushel_immich.Error _ -> true | _ -> false) results) in 266 + let ok_count = List.length (List.filter (fun (_, r) -> 267 + match r with Bushel_immich.Ok _ -> true | _ -> false) results) in 268 + let skipped_count = List.length (List.filter (fun (_, r) -> 269 + match r with Bushel_immich.Skipped _ -> true | _ -> false) results) in 270 + let error_count = List.length (List.filter (fun (_, r) -> 271 + match r with Bushel_immich.Error _ -> true | _ -> false) results) in 217 272 218 - { step = Faces; success = true; 219 - message = Printf.sprintf "%d fetched, %d skipped, %d errors" 220 - ok_count skipped_count error_count; 221 - details = List.filter_map (fun (h, r) -> 222 - match r with Bushel_immich.Error e -> Some (h ^ ": " ^ e) | _ -> None 223 - ) results } 273 + { step = Faces; success = true; 274 + message = Printf.sprintf "%d fetched, %d skipped, %d errors" 275 + ok_count skipped_count error_count; 276 + details = List.filter_map (fun (h, r) -> 277 + match r with Bushel_immich.Error e -> Some (h ^ ": " ^ e) | _ -> None 278 + ) results } 279 + end 224 280 225 281 (** {1 Video Thumbnails} *) 226 282 227 - let sync_video_thumbnails ~proc_mgr config = 283 + let sync_video_thumbnails ~dry_run ~proc_mgr config = 228 284 Log.info (fun m -> m "Syncing video thumbnails from PeerTube..."); 229 285 let output_dir = Bushel_config.video_thumbs_dir config in 230 286 let videos_yml = Filename.concat config.data_dir "videos.yml" in 231 287 232 288 let index = Bushel_peertube.VideoIndex.load_file videos_yml in 233 - let count = List.length (Bushel_peertube.VideoIndex.to_list index) in 289 + let video_list = Bushel_peertube.VideoIndex.to_list index in 290 + let count = List.length video_list in 234 291 235 292 if count = 0 then begin 236 293 Log.info (fun m -> m "No videos in index"); 237 294 { step = Videos; success = true; 238 295 message = "No videos in index"; 239 296 details = [] } 297 + end else if dry_run then begin 298 + let would_fetch = List.filter (fun (uuid, _server) -> 299 + let output_path = Filename.concat output_dir (uuid ^ ".jpg") in 300 + not (Sys.file_exists output_path) 301 + ) video_list in 302 + let skipped = count - List.length would_fetch in 303 + { step = Videos; success = true; 304 + message = Printf.sprintf "Would fetch %d video thumbnails from PeerTube (%d already exist)" 305 + (List.length would_fetch) skipped; 306 + details = List.map (fun (uuid, server) -> 307 + Printf.sprintf "curl <server:%s>/api/v1/videos/%s -> %s.jpg" server uuid uuid 308 + ) (List.filteri (fun i _ -> i < 5) would_fetch) @ 309 + (if List.length would_fetch > 5 then ["...and more"] else []) } 240 310 end else begin 241 311 let results = Bushel_peertube.fetch_thumbnails_from_index 242 312 ~proc_mgr ··· 261 331 262 332 (** {1 Typesense Upload} *) 263 333 264 - let upload_typesense config _entries = 334 + let upload_typesense ~dry_run config _entries = 265 335 Log.info (fun m -> m "Uploading to Typesense..."); 266 336 267 337 match Bushel_config.typesense_api_key config with ··· 270 340 message = "Missing Typesense API key"; 271 341 details = [e] } 272 342 | Ok _api_key -> 273 - (* TODO: Implement actual Typesense upload using bushel-typesense *) 274 - { step = Typesense; success = true; 275 - message = "Typesense upload (not yet implemented)"; 276 - details = [] } 343 + if dry_run then 344 + { step = Typesense; success = true; 345 + message = "Would upload to Typesense"; 346 + details = ["POST to Typesense API (not yet implemented)"] } 347 + else 348 + (* TODO: Implement actual Typesense upload using bushel-typesense *) 349 + { step = Typesense; success = true; 350 + message = "Typesense upload (not yet implemented)"; 351 + details = [] } 277 352 278 353 (** {1 Run Pipeline} *) 279 354 280 - let run ~env ~config ~steps ~entries = 355 + let run ~dry_run ~env ~config ~steps ~entries = 281 356 let proc_mgr = Eio.Stdenv.process_mgr env in 357 + let fs = Eio.Stdenv.fs env in 282 358 283 359 let results = List.map (fun step -> 284 - Log.info (fun m -> m "Running step: %s" (string_of_step step)); 360 + Log.info (fun m -> m "%s step: %s" 361 + (if dry_run then "Dry-run" else "Running") 362 + (string_of_step step)); 285 363 match step with 286 - | Images -> sync_images ~proc_mgr config 287 - | Srcsetter -> run_srcsetter ~proc_mgr config 288 - | Thumbs -> generate_paper_thumbnails ~proc_mgr config 289 - | Faces -> sync_faces ~proc_mgr config entries 290 - | Videos -> sync_video_thumbnails ~proc_mgr config 291 - | Typesense -> upload_typesense config entries 364 + | Images -> sync_images ~dry_run ~fs ~proc_mgr config 365 + | Srcsetter -> run_srcsetter ~dry_run ~fs ~proc_mgr config 366 + | Thumbs -> generate_paper_thumbnails ~dry_run ~fs ~proc_mgr config 367 + | Faces -> sync_faces ~dry_run ~proc_mgr config entries 368 + | Videos -> sync_video_thumbnails ~dry_run ~proc_mgr config 369 + | Typesense -> upload_typesense ~dry_run config entries 292 370 ) steps in 293 371 294 372 (* Summary *) 295 373 let success_count = List.length (List.filter (fun r -> r.success) results) in 296 374 let total = List.length results in 297 - Log.info (fun m -> m "Sync complete: %d/%d steps succeeded" success_count total); 375 + Log.info (fun m -> m "%s complete: %d/%d steps succeeded" 376 + (if dry_run then "Dry-run" else "Sync") 377 + success_count total); 298 378 299 379 results
+2 -1
ocaml-bushel/lib_sync/dune
··· 14 14 ptime 15 15 logs 16 16 fmt 17 - sortal.schema)) 17 + sortal.schema 18 + srcsetter-cmd))
+5 -82
srcsetter/bin/srcsetter.ml
··· 15 15 PERFORMANCE OF THIS SOFTWARE. 16 16 *) 17 17 18 - module SC = Srcsetter_cmd 19 - 20 - let min_interval = Some (Mtime.Span.of_uint64_ns 1000L) 21 - 22 - let stage1 { SC.img_exts; src_dir; _ } = 23 - let filter f = List.exists (Filename.check_suffix ("." ^ f)) img_exts in 24 - let fs = SC.file_seq ~filter src_dir in 25 - let total = Seq.length fs in 26 - Format.printf "[1/3] Scanned %d images from %a.\n%!" total Eio.Path.pp src_dir; 27 - fs 28 - 29 - let stage2 ({ SC.max_fibers; dst_dir; _ } as cfg) fs = 30 - let display = 31 - Progress.Display.start 32 - ~config:(Progress.Config.v ~persistent:false ~min_interval ()) 33 - (SC.main_bar_heading "[2/3] Processing images..." (Seq.length fs)) 34 - in 35 - let [ _; main_rep ] = Progress.Display.reporters display in 36 - let ents = ref [] in 37 - SC.iter_seq_p ~max_fibers 38 - (fun src -> 39 - let ent = SC.process_file cfg (display, main_rep) src in 40 - ents := ent :: !ents) 41 - fs; 42 - Progress.Display.finalise display; 43 - Format.printf "[2/3] Processed %d images to %a.\n%!" (List.length !ents) 44 - Eio.Path.pp dst_dir; 45 - !ents 46 - 47 - let stage3 ({ SC.dst_dir; max_fibers; _ } as cfg) ents = 48 - let ents_seq = List.to_seq ents in 49 - let oents = ref [] in 50 - let display = 51 - Progress.Display.start 52 - ~config:(Progress.Config.v ~persistent:false ~min_interval ()) 53 - (SC.main_bar_heading "[3/3] Verifying images..." (List.length ents)) 54 - in 55 - let [ _; rep ] = Progress.Display.reporters display in 56 - SC.iter_seq_p ~max_fibers 57 - (fun ent -> 58 - let w, h = SC.dims cfg Eio.Path.(dst_dir / Srcsetter.name ent) in 59 - let variants = 60 - Srcsetter.MS.bindings ent.variants 61 - |> List.map (fun (k, _) -> (k, SC.dims cfg Eio.Path.(dst_dir / k))) 62 - |> Srcsetter.MS.of_list 63 - in 64 - rep 1; 65 - oents := { ent with Srcsetter.dims = (w, h); variants } :: !oents) 66 - ents_seq; 67 - Progress.Display.finalise display; 68 - Printf.printf "[3/3] Verified %d generated image sizes.\n%!" 69 - (List.length ents); 70 - !oents 71 - 72 18 let _ = 73 19 (* TODO cmdliner *) 74 20 Eio_main.run @@ fun env -> 75 21 Eio.Switch.run @@ fun _ -> 22 + let fs = Eio.Stdenv.fs env in 76 23 let path_env p = 77 - if String.starts_with ~prefix:"/" p then Eio.(Path.(Stdenv.fs env / p)) 78 - else Eio.(Path.(Stdenv.cwd env / p)) 24 + if String.starts_with ~prefix:"/" p then Eio.Path.(fs / p) 25 + else Eio.Path.(Eio.Stdenv.cwd env / p) 79 26 in 80 27 let src_dir = path_env Sys.argv.(1) in 81 28 let dst_dir = path_env Sys.argv.(2) in 82 29 let proc_mgr = Eio.Stdenv.process_mgr env in 83 - let idx_file = "index.json" in 84 - let img_widths = 85 - [ 320; 480; 640; 768; 1024; 1280; 1440; 1600; 1920; 2560; 3840 ] 86 - in 87 - let img_exts = [ "png"; "webp"; "jpeg"; "jpg"; "bmp"; "heic"; "gif" ] in 88 - let img_widths = List.sort (fun a b -> compare b a) img_widths in 89 - let max_fibers = 8 in 90 - let cfg = 91 - { 92 - Srcsetter_cmd.dummy = false; 93 - preserve = true; 94 - proc_mgr; 95 - src_dir; 96 - dst_dir; 97 - idx_file; 98 - img_widths; 99 - img_exts; 100 - max_fibers; 101 - } 102 - in 103 - let fs = stage1 cfg in 104 - let ents = stage2 cfg fs in 105 - let oents = stage3 cfg ents in 106 - let j = Srcsetter.list_to_json oents |> Result.get_ok in 107 - let idx = Eio.Path.(dst_dir / idx_file) in 108 - Eio.Path.save ~append:false ~create:(`Or_truncate 0o644) idx j 30 + let _entries = Srcsetter_cmd.run ~proc_mgr ~src_dir ~dst_dir () in 31 + ()
+1 -1
srcsetter/lib/dune
··· 2 2 (name srcsetter_cmd) 3 3 (public_name srcsetter-cmd) 4 4 (modules srcsetter_cmd) 5 - (libraries srcsetter eio fpath progress)) 5 + (libraries srcsetter eio fpath progress mtime)) 6 6 7 7 (library 8 8 (name srcsetter)
+166 -21
srcsetter/lib/srcsetter_cmd.ml
··· 101 101 let output = Process.parse_out proc_mgr Buf_read.take_all args in 102 102 Scanf.sscanf output "%d %d" (fun w h -> (w, h)) 103 103 104 + (** [try_dims cfg path] returns [Some (w, h)] if identify succeeds, [None] otherwise. *) 105 + let try_dims cfg path = 106 + try Some (dims cfg path) 107 + with _ -> None 108 + 109 + (** [file_size path] returns the size of the file in bytes. *) 110 + let file_size path = 111 + let stat = Path.stat ~follow:true path in 112 + Optint.Int63.to_int stat.size 113 + 114 + (** [is_valid_image cfg path] returns true if the file exists, has non-zero size, 115 + and identify can read its dimensions. *) 116 + let is_valid_image cfg path = 117 + Path.is_file path && 118 + file_size path > 0 && 119 + Option.is_some (try_dims cfg path) 120 + 121 + (** [width_from_variant_name name] extracts the width from a variant filename. 122 + 123 + Variant filenames have the form "path/name.WIDTH.webp". Returns [None] for 124 + base images (no width suffix). *) 125 + let width_from_variant_name name = 126 + let base = Filename.chop_extension name in (* remove .webp *) 127 + let parts = String.split_on_char '.' base in 128 + match List.rev parts with 129 + | last :: _ -> ( 130 + match int_of_string_opt last with 131 + | Some w -> Some w 132 + | None -> None) 133 + | [] -> None 134 + 104 135 (** [run cfg args] executes a shell command unless in dummy mode. *) 105 136 let run { dummy; proc_mgr; _ } args = 106 137 if not dummy then Process.run proc_mgr args ··· 166 197 let dst = Path.(dst_dir / dst_file) in 167 198 (src_file, dst_file, w, needs_conversion ~preserve dst) 168 199 169 - (** [calc_needed cfg ~img_widths ~w src] computes which conversions are needed. 170 - 171 - Returns [(base, variants)] where each is tagged with [`Exists] or [`Todo]. *) 172 - let calc_needed { src_dir; dst_dir; preserve; _ } ~img_widths ~w src = 173 - let check_dst fname tw = 174 - let dst = Path.(dst_dir / fname) in 175 - let ent = (src, dst, tw) in 176 - if preserve && Path.is_file dst then `Exists ent else `Todo ent 177 - in 178 - let file = relativize_path src_dir src in 179 - let base_name = Filename.chop_extension file in 180 - let base = check_dst (Printf.sprintf "%s.webp" base_name) w in 181 - let variants = 182 - List.filter_map 183 - (fun tw -> 184 - if tw <= w then Some (check_dst (Printf.sprintf "%s.%d.webp" base_name tw) tw) 185 - else None) 186 - img_widths 187 - in 188 - (base, variants) 189 - 190 200 (** {1 Progress Bar Rendering} *) 191 201 192 202 (** [main_bar total] creates a progress bar for [total] items. *) ··· 269 279 main_rep 1 270 280 end; 271 281 ent 282 + 283 + (** {1 Pipeline Execution} *) 284 + 285 + let min_interval = Some (Mtime.Span.of_uint64_ns 1000L) 286 + 287 + (** [stage1 cfg] scans for images in the source directory. 288 + 289 + Returns a sequence of file paths matching the configured extensions. *) 290 + let stage1 { img_exts; src_dir; _ } = 291 + let filter f = List.exists (Filename.check_suffix ("." ^ f)) img_exts in 292 + let fs = file_seq ~filter src_dir in 293 + let total = Seq.length fs in 294 + Format.printf "[1/3] Scanned %d images from %a.\n%!" total Path.pp src_dir; 295 + fs 296 + 297 + (** [stage2 cfg fs] processes images, converting to WebP at multiple sizes. 298 + 299 + @return List of {!Srcsetter.t} entries with placeholder dimensions. *) 300 + let stage2 ({ max_fibers; dst_dir; _ } as cfg) fs = 301 + let display = 302 + Progress.Display.start 303 + ~config:(Progress.Config.v ~persistent:false ~min_interval ()) 304 + (main_bar_heading "[2/3] Processing images..." (Seq.length fs)) 305 + in 306 + let [ _; main_rep ] = Progress.Display.reporters display in 307 + let ents = ref [] in 308 + iter_seq_p ~max_fibers 309 + (fun src -> 310 + let ent = process_file cfg (display, main_rep) src in 311 + ents := ent :: !ents) 312 + fs; 313 + Progress.Display.finalise display; 314 + Format.printf "[2/3] Processed %d images to %a.\n%!" (List.length !ents) 315 + Path.pp dst_dir; 316 + !ents 317 + 318 + (** [stage3 cfg ents] verifies generated images and records their dimensions. 319 + 320 + Regenerates any images that have zero length or fail identify validation. 321 + 322 + @return List of {!Srcsetter.t} entries with actual dimensions. *) 323 + let stage3 ({ src_dir; dst_dir; max_fibers; _ } as cfg) ents = 324 + let ents_seq = List.to_seq ents in 325 + let oents = ref [] in 326 + let regenerated = ref 0 in 327 + let display = 328 + Progress.Display.start 329 + ~config:(Progress.Config.v ~persistent:false ~min_interval ()) 330 + (main_bar_heading "[3/3] Verifying images..." (List.length ents)) 331 + in 332 + let [ _; rep ] = Progress.Display.reporters display in 333 + iter_seq_p ~max_fibers 334 + (fun ent -> 335 + let src_path = Path.(src_dir / Srcsetter.origin ent) in 336 + let orig_w, _ = dims cfg src_path in 337 + (* Verify and regenerate base image if needed *) 338 + let base_path = Path.(dst_dir / Srcsetter.name ent) in 339 + if not (is_valid_image cfg base_path) then begin 340 + incr regenerated; 341 + convert cfg (Srcsetter.origin ent, Srcsetter.name ent, orig_w) 342 + end; 343 + let w, h = dims cfg base_path in 344 + (* Verify and regenerate variants if needed *) 345 + let variants = 346 + Srcsetter.MS.bindings ent.variants 347 + |> List.map (fun (k, _) -> 348 + let variant_path = Path.(dst_dir / k) in 349 + if not (is_valid_image cfg variant_path) then begin 350 + incr regenerated; 351 + let target_w = Option.value (width_from_variant_name k) ~default:orig_w in 352 + convert cfg (Srcsetter.origin ent, k, target_w) 353 + end; 354 + (k, dims cfg variant_path)) 355 + |> Srcsetter.MS.of_list 356 + in 357 + rep 1; 358 + oents := { ent with Srcsetter.dims = (w, h); variants } :: !oents) 359 + ents_seq; 360 + Progress.Display.finalise display; 361 + if !regenerated > 0 then 362 + Printf.printf "[3/3] Verified %d images, regenerated %d invalid outputs.\n%!" 363 + (List.length ents) !regenerated 364 + else 365 + Printf.printf "[3/3] Verified %d generated image sizes.\n%!" 366 + (List.length ents); 367 + !oents 368 + 369 + (** [run ~proc_mgr ~src_dir ~dst_dir ()] runs the full srcsetter pipeline. 370 + 371 + Scans [src_dir] for images, converts them to WebP format at multiple 372 + responsive sizes, and writes an index file to [dst_dir]. 373 + 374 + @param proc_mgr Eio process manager for running ImageMagick 375 + @param src_dir Source directory containing original images 376 + @param dst_dir Destination directory for generated images 377 + @param idx_file Name of the index file (default ["index.json"]) 378 + @param img_widths List of target widths (default common responsive breakpoints) 379 + @param img_exts List of extensions to process (default common image formats) 380 + @param max_fibers Maximum concurrent operations (default 8) 381 + @param dummy When true, skip actual conversions (default false) 382 + @param preserve When true, skip existing files (default true) 383 + @return List of {!Srcsetter.t} entries describing generated images *) 384 + let run 385 + ~proc_mgr 386 + ~src_dir 387 + ~dst_dir 388 + ?(idx_file = "index.json") 389 + ?(img_widths = [ 320; 480; 640; 768; 1024; 1280; 1440; 1600; 1920; 2560; 3840 ]) 390 + ?(img_exts = [ "png"; "webp"; "jpeg"; "jpg"; "bmp"; "heic"; "gif" ]) 391 + ?(max_fibers = 8) 392 + ?(dummy = false) 393 + ?(preserve = true) 394 + () 395 + = 396 + let img_widths = List.sort (fun a b -> compare b a) img_widths in 397 + let cfg = 398 + { 399 + dummy; 400 + preserve; 401 + proc_mgr; 402 + src_dir; 403 + dst_dir; 404 + idx_file; 405 + img_widths; 406 + img_exts; 407 + max_fibers; 408 + } 409 + in 410 + let fs = stage1 cfg in 411 + let ents = stage2 cfg fs in 412 + let oents = stage3 cfg ents in 413 + let j = Srcsetter.list_to_json oents |> Result.get_ok in 414 + let idx = Path.(dst_dir / idx_file) in 415 + Path.save ~append:false ~create:(`Or_truncate 0o644) idx j; 416 + oents
+122
srcsetter/lib/srcsetter_cmd.mli
··· 1 + (* Copyright (c) 2024, Anil Madhavapeddy <anil@recoil.org> 2 + 3 + Permission to use, copy, modify, and/or distribute this software for 4 + any purpose with or without fee is hereby granted, provided that the 5 + above copyright notice and this permission notice appear in all 6 + copies. 7 + 8 + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL 9 + WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED 10 + WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE 11 + AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL 12 + DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA 13 + OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER 14 + TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR 15 + PERFORMANCE OF THIS SOFTWARE. *) 16 + 17 + (** Command-line image processing operations for srcsetter. 18 + 19 + This module provides the core image processing pipeline including 20 + file discovery, image conversion, and progress reporting. 21 + 22 + {1 High-Level Pipeline} 23 + 24 + The simplest way to use this module is via {!run}, which executes 25 + the complete pipeline: 26 + 27 + {[ 28 + Srcsetter_cmd.run 29 + ~proc_mgr:(Eio.Stdenv.process_mgr env) 30 + ~src_dir:Eio.Path.(fs / "images/originals") 31 + ~dst_dir:Eio.Path.(fs / "images/output") 32 + () 33 + ]} 34 + 35 + {1 Configuration} *) 36 + 37 + (** Configuration for the image processing pipeline. *) 38 + type ('a, 'b) config = { 39 + dummy : bool; (** When true, skip actual image conversion (dry run) *) 40 + preserve : bool; (** When true, skip conversion if destination exists *) 41 + proc_mgr : 'a Eio.Process.mgr; (** Eio process manager for running ImageMagick *) 42 + src_dir : 'b Eio.Path.t; (** Source directory containing original images *) 43 + dst_dir : 'b Eio.Path.t; (** Destination directory for generated images *) 44 + img_widths : int list; (** List of target widths for responsive variants *) 45 + img_exts : string list; (** File extensions to process (e.g., ["jpg"; "png"]) *) 46 + idx_file : string; (** Name of the JSON index file to generate *) 47 + max_fibers : int; (** Maximum concurrent conversion operations *) 48 + } 49 + 50 + (** {1 File Operations} *) 51 + 52 + val file_seq : 53 + filter:(string -> bool) -> 54 + ([> Eio.Fs.dir_ty ] as 'a) Eio.Path.t -> 55 + 'a Eio.Path.t Seq.t 56 + (** [file_seq ~filter path] recursively enumerates files in [path]. 57 + 58 + Returns a sequence of file paths where [filter filename] is true. 59 + Directories are traversed depth-first. *) 60 + 61 + val iter_seq_p : ?max_fibers:int -> ('a -> unit) -> 'a Seq.t -> unit 62 + (** [iter_seq_p ?max_fibers fn seq] iterates [fn] over [seq] in parallel. 63 + 64 + @param max_fibers Optional limit on concurrent fibers. Must be positive. 65 + @raise Invalid_argument if [max_fibers] is not positive. *) 66 + 67 + (** {1 Image Operations} *) 68 + 69 + val dims : ('a, 'b) config -> 'b Eio.Path.t -> int * int 70 + (** [dims cfg path] returns the [(width, height)] dimensions of an image. 71 + 72 + Uses ImageMagick's [identify] command to read image metadata. *) 73 + 74 + val convert : ('a, 'b) config -> string * string * int -> unit 75 + (** [convert cfg (src, dst, size)] converts an image to WebP format. 76 + 77 + Creates the destination directory if needed, then uses ImageMagick 78 + to resize and convert the image with auto-orientation. 79 + 80 + @param src Source filename relative to [cfg.src_dir] 81 + @param dst Destination filename relative to [cfg.dst_dir] 82 + @param size Target width in pixels *) 83 + 84 + val convert_pdf : 85 + ('a, 'b) config -> 86 + size:string -> 87 + dst:'b Eio.Path.t -> 88 + src:'b Eio.Path.t -> 89 + unit 90 + (** [convert_pdf cfg ~size ~dst ~src] converts a PDF's first page to an image. 91 + 92 + Renders at 300 DPI, crops the top half, and resizes to the target width. *) 93 + 94 + (** {1 Pipeline Execution} *) 95 + 96 + val run : 97 + proc_mgr:'a Eio.Process.mgr -> 98 + src_dir:'b Eio.Path.t -> 99 + dst_dir:'b Eio.Path.t -> 100 + ?idx_file:string -> 101 + ?img_widths:int list -> 102 + ?img_exts:string list -> 103 + ?max_fibers:int -> 104 + ?dummy:bool -> 105 + ?preserve:bool -> 106 + unit -> 107 + Srcsetter.t list 108 + (** [run ~proc_mgr ~src_dir ~dst_dir ()] runs the full srcsetter pipeline. 109 + 110 + Scans [src_dir] for images, converts them to WebP format at multiple 111 + responsive sizes, and writes an index file to [dst_dir]. 112 + 113 + @param proc_mgr Eio process manager for running ImageMagick 114 + @param src_dir Source directory containing original images 115 + @param dst_dir Destination directory for generated images 116 + @param idx_file Name of the index file (default ["index.json"]) 117 + @param img_widths List of target widths (default common responsive breakpoints) 118 + @param img_exts List of extensions to process (default common image formats) 119 + @param max_fibers Maximum concurrent operations (default 8) 120 + @param dummy When true, skip actual conversions (default false) 121 + @param preserve When true, skip existing files (default true) 122 + @return List of {!Srcsetter.t} entries describing generated images *)