My aggregated monorepo of OCaml code, automaintained
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

Squashed 'ocaml-bushel/' changes from f1814aa1..2553361e

2553361e Sync contact faces from Sortal instead of Immich
ee94c11c Output paper thumbnails to local_source_dir for srcsetter
5db7242f Add bushel images command using srcsetter index
45f8b414 Add dry-run mode to bushel sync command
52319af8 Replace bushel contacts with sortal

git-subtree-dir: ocaml-bushel
git-subtree-split: 2553361e0518b135041a115011c52bccf15e39f6

+408 -330
+105 -6
bin/main.ml
··· 184 184 | Error e -> Printf.eprintf "Config error: %s\n" e; 1 185 185 | Ok config -> 186 186 let data_dir = get_data_dir config data_dir in 187 - with_entries data_dir @@ fun _env entries -> 187 + Eio_main.run @@ fun env -> 188 + let fs = Eio.Stdenv.fs env in 189 + let entries = Bushel_eio.Bushel_loader.load fs data_dir in 188 190 let papers = List.length (Bushel.Entry.papers entries) in 189 191 let notes = List.length (Bushel.Entry.notes entries) in 190 192 let projects = List.length (Bushel.Entry.projects entries) in 191 193 let ideas = List.length (Bushel.Entry.ideas entries) in 192 194 let videos = List.length (Bushel.Entry.videos entries) in 193 195 let contacts = List.length (Bushel.Entry.contacts entries) in 196 + let images = List.length (Bushel_eio.Bushel_loader.load_images fs 197 + ~output_dir:config.Bushel_config.local_output_dir) in 194 198 Printf.printf "Bushel Statistics\n"; 195 199 Printf.printf "=================\n"; 196 200 Printf.printf "Papers: %4d\n" papers; ··· 199 203 Printf.printf "Ideas: %4d\n" ideas; 200 204 Printf.printf "Videos: %4d\n" videos; 201 205 Printf.printf "Contacts: %4d\n" contacts; 206 + Printf.printf "Images: %4d\n" images; 202 207 Printf.printf "-----------------\n"; 203 208 Printf.printf "Total: %4d\n" (papers + notes + projects + ideas + videos); 204 209 0 ··· 247 252 let doc = "Also upload to Typesense (remote sync)." in 248 253 Arg.(value & flag & info ["remote"] ~doc) 249 254 in 255 + let dry_run = 256 + let doc = "Show what commands would be run without executing them." in 257 + Arg.(value & flag & info ["dry-run"; "n"] ~doc) 258 + in 250 259 let only = 251 260 let doc = "Only run specific step (images, srcsetter, thumbs, faces, videos, typesense)." in 252 261 Arg.(value & opt (some string) None & info ["only"] ~docv:"STEP" ~doc) 253 262 in 254 - let run () config_file data_dir remote only = 263 + let run () config_file data_dir remote dry_run only = 255 264 match load_config config_file with 256 265 | Error e -> Printf.eprintf "Config error: %s\n" e; 1 257 266 | Ok config -> ··· 274 283 let fs = Eio.Stdenv.fs env in 275 284 let entries = Bushel_eio.Bushel_loader.load fs data_dir in 276 285 277 - Printf.printf "Running sync pipeline...\n"; 286 + Printf.printf "%s sync pipeline...\n" (if dry_run then "Dry-run" else "Running"); 278 287 List.iter (fun step -> 279 288 Printf.printf " - %s\n" (Bushel_sync.string_of_step step) 280 289 ) steps; 281 290 Printf.printf "\n"; 282 291 283 - let results = Bushel_sync.run ~env ~config ~steps ~entries in 292 + let results = Bushel_sync.run ~dry_run ~env ~config ~steps ~entries in 284 293 285 294 Printf.printf "\nResults:\n"; 286 295 List.iter (fun r -> ··· 288 297 Printf.printf " [%s] %s: %s\n" 289 298 status 290 299 (Bushel_sync.string_of_step r.step) 291 - r.message 300 + r.message; 301 + (* In dry-run mode, show the details (commands) *) 302 + if dry_run && r.Bushel_sync.details <> [] then begin 303 + List.iter (fun d -> Printf.printf " %s\n" d) r.Bushel_sync.details 304 + end 292 305 ) results; 293 306 294 307 let failures = List.filter (fun r -> not r.Bushel_sync.success) results in ··· 304 317 `P "4. $(b,faces) - Fetch contact face thumbnails from Immich"; 305 318 `P "5. $(b,videos) - Fetch video thumbnails from PeerTube"; 306 319 `P "6. $(b,typesense) - Upload to Typesense (with --remote)"; 320 + `P "Use $(b,--dry-run) to see what commands would be run without executing them."; 307 321 ] in 308 322 let info = Cmd.info "sync" ~doc ~man in 309 - Cmd.v info Term.(const run $ logging_t $ config_file $ data_dir $ remote $ only) 323 + Cmd.v info Term.(const run $ logging_t $ config_file $ data_dir $ remote $ dry_run $ only) 310 324 311 325 (** {1 Paper Add Command} *) 312 326 ··· 506 520 let info = Cmd.info "video" ~doc in 507 521 Cmd.v info Term.(const run $ logging_t $ config_file $ data_dir $ server $ channel) 508 522 523 + (** {1 Images Command} *) 524 + 525 + let images_cmd = 526 + let limit = 527 + let doc = "Maximum number of images to show." in 528 + Arg.(value & opt (some int) None & info ["n"; "limit"] ~docv:"N" ~doc) 529 + in 530 + let sort_by = 531 + let doc = "Sort by field (slug, width, height, variants). Default: slug." in 532 + Arg.(value & opt string "slug" & info ["s"; "sort"] ~docv:"FIELD" ~doc) 533 + in 534 + let run () config_file limit sort_by = 535 + match load_config config_file with 536 + | Error e -> Printf.eprintf "Config error: %s\n" e; 1 537 + | Ok config -> 538 + Eio_main.run @@ fun env -> 539 + let fs = Eio.Stdenv.fs env in 540 + let output_dir = config.Bushel_config.local_output_dir in 541 + let images = Bushel_eio.Bushel_loader.load_images fs ~output_dir in 542 + if images = [] then begin 543 + Printf.printf "No images found.\n"; 544 + Printf.printf "Run 'bushel sync' to generate image index.\n"; 545 + 0 546 + end else begin 547 + (* Sort *) 548 + let sorted = match sort_by with 549 + | "width" -> 550 + List.sort (fun a b -> 551 + let (wa, _) = Srcsetter.dims a in 552 + let (wb, _) = Srcsetter.dims b in 553 + compare wb wa (* largest first *) 554 + ) images 555 + | "height" -> 556 + List.sort (fun a b -> 557 + let (_, ha) = Srcsetter.dims a in 558 + let (_, hb) = Srcsetter.dims b in 559 + compare hb ha (* largest first *) 560 + ) images 561 + | "variants" -> 562 + List.sort (fun a b -> 563 + let va = Srcsetter.MS.cardinal (Srcsetter.variants a) in 564 + let vb = Srcsetter.MS.cardinal (Srcsetter.variants b) in 565 + compare vb va (* most variants first *) 566 + ) images 567 + | _ -> (* slug, default *) 568 + List.sort (fun a b -> 569 + String.compare (Srcsetter.slug a) (Srcsetter.slug b) 570 + ) images 571 + in 572 + (* Limit *) 573 + let limited = match limit with 574 + | None -> sorted 575 + | Some n -> List.filteri (fun i _ -> i < n) sorted 576 + in 577 + (* Build table *) 578 + let rows = List.map (fun img -> 579 + let (w, h) = Srcsetter.dims img in 580 + let num_variants = Srcsetter.MS.cardinal (Srcsetter.variants img) in 581 + [ Srcsetter.slug img 582 + ; Printf.sprintf "%dx%d" w h 583 + ; string_of_int num_variants 584 + ; Srcsetter.origin img 585 + ] 586 + ) limited in 587 + let table = Table.make 588 + ~headers:["SLUG"; "DIMS"; "VARIANTS"; "ORIGIN"] 589 + rows 590 + in 591 + Table.print table; 592 + Printf.printf "\nTotal: %d images\n" (List.length limited); 593 + 0 594 + end 595 + in 596 + let doc = "List images from the srcsetter index." in 597 + let man = [ 598 + `S Manpage.s_description; 599 + `P "Lists images that have been processed by srcsetter."; 600 + `P "Images are stored separately from other entries and are referenced \ 601 + by slug in markdown content using the :slug syntax."; 602 + `P "Run $(b,bushel sync) to process images and generate the index."; 603 + ] in 604 + let info = Cmd.info "images" ~doc ~man in 605 + Cmd.v info Term.(const run $ logging_t $ config_file $ limit $ sort_by) 606 + 509 607 (** {1 Config Command} *) 510 608 511 609 let config_cmd = ··· 573 671 Cmd.group info [ 574 672 init_cmd; 575 673 list_cmd; 674 + images_cmd; 576 675 stats_cmd; 577 676 show_cmd; 578 677 sync_cmd;
-4
lib/bushel.ml
··· 12 12 13 13 {1 Entry Types} 14 14 15 - - {!Contact} - People/researchers with social links 16 15 - {!Note} - Blog posts and research notes 17 16 - {!Paper} - Academic papers with BibTeX metadata 18 17 - {!Project} - Research projects ··· 44 43 *) 45 44 46 45 (** {1 Entry Types} *) 47 - 48 - module Contact = Bushel_contact 49 - (** Contact/person entries. *) 50 46 51 47 module Note = Bushel_note 52 48 (** Blog post and research note entries. *)
-154
lib/bushel_contact.ml
··· 1 - (*--------------------------------------------------------------------------- 2 - Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 - SPDX-License-Identifier: ISC 4 - ---------------------------------------------------------------------------*) 5 - 6 - (** Contact/person entry type for Bushel *) 7 - 8 - type t = { 9 - names : string list; 10 - handle : string; 11 - email : string option; 12 - icon : string option; 13 - github : string option; 14 - twitter : string option; 15 - bluesky : string option; 16 - mastodon : string option; 17 - orcid : string option; 18 - url : string option; 19 - atom : string list option; 20 - } 21 - 22 - type ts = t list 23 - 24 - (** {1 Constructors} *) 25 - 26 - let v ?email ?github ?twitter ?bluesky ?mastodon ?orcid ?icon ?url ?atom handle names = 27 - { names; handle; email; github; twitter; bluesky; mastodon; orcid; url; icon; atom } 28 - 29 - let make names email icon github twitter bluesky mastodon orcid url atom = 30 - v ?email ?github ?twitter ?bluesky ?mastodon ?orcid ?icon ?url ?atom "" names 31 - 32 - (** {1 Accessors} *) 33 - 34 - let names { names; _ } = names 35 - 36 - let name c = 37 - match c.names with 38 - | n :: _ -> n 39 - | [] -> failwith (Printf.sprintf "Contact with handle '%s' has empty names list" c.handle) 40 - 41 - let handle { handle; _ } = handle 42 - let email { email; _ } = email 43 - let icon { icon; _ } = icon 44 - let github { github; _ } = github 45 - let twitter { twitter; _ } = twitter 46 - let bluesky { bluesky; _ } = bluesky 47 - let mastodon { mastodon; _ } = mastodon 48 - let orcid { orcid; _ } = orcid 49 - let url { url; _ } = url 50 - let atom { atom; _ } = atom 51 - 52 - (** {1 Jsont Codec} *) 53 - 54 - let jsont : t Jsont.t = 55 - let open Jsont in 56 - let open Jsont.Object in 57 - let mem_opt f v ~enc = mem f v ~dec_absent:None ~enc_omit:Option.is_none ~enc in 58 - map ~kind:"Contact" make 59 - |> mem "names" (list string) ~dec_absent:[] ~enc:names 60 - |> mem_opt "email" (some string) ~enc:email 61 - |> mem_opt "icon" (some string) ~enc:icon 62 - |> mem_opt "github" (some string) ~enc:github 63 - |> mem_opt "twitter" (some string) ~enc:twitter 64 - |> mem_opt "bluesky" (some string) ~enc:bluesky 65 - |> mem_opt "mastodon" (some string) ~enc:mastodon 66 - |> mem_opt "orcid" (some string) ~enc:orcid 67 - |> mem_opt "url" (some string) ~enc:url 68 - |> mem_opt "atom" (some (list string)) ~enc:atom 69 - |> finish 70 - 71 - (** {1 Parsing} *) 72 - 73 - let of_frontmatter ~handle (fm : Frontmatter.t) : (t, string) result = 74 - match Frontmatter.decode jsont fm with 75 - | Ok c -> Ok { c with handle } 76 - | Error e -> Error e 77 - 78 - (** {1 Lookup Functions} *) 79 - 80 - let compare a b = String.compare a.handle b.handle 81 - 82 - let find_by_handle ts h = List.find_opt (fun { handle; _ } -> handle = h) ts 83 - 84 - let best_url c = 85 - match c.url with 86 - | Some _ as url -> url 87 - | None -> 88 - match c.github with 89 - | Some g -> Some ("https://github.com/" ^ g) 90 - | None -> Option.map (fun e -> "mailto:" ^ e) c.email 91 - 92 - (** Given a name, turn it lowercase and return the concatenation of the 93 - initials of all the words in the name and the full last name. *) 94 - let handle_of_name name = 95 - let name = String.lowercase_ascii name in 96 - let words = String.split_on_char ' ' name in 97 - let initials = String.concat "" (List.map (fun w -> String.sub w 0 1) words) in 98 - initials ^ List.hd (List.rev words) 99 - 100 - (** Fuzzy lookup for an author by name. *) 101 - let lookup_by_name ts a = 102 - let a = String.lowercase_ascii a in 103 - let rec aux acc = function 104 - | [] -> acc 105 - | t :: ts -> 106 - if List.exists (fun n -> String.lowercase_ascii n = a) t.names 107 - then aux (t :: acc) ts 108 - else aux acc ts 109 - in 110 - match aux [] ts with 111 - | [ a ] -> a 112 - | [] -> raise (Failure ("Contact not found: " ^ a)) 113 - | _ -> raise (Failure ("Ambiguous contact: " ^ a)) 114 - 115 - (** {1 Pretty Printing} *) 116 - 117 - let pp ppf c = 118 - let open Fmt in 119 - pf ppf "@[<v>"; 120 - pf ppf "%a: %a@," (styled `Bold string) "Type" (styled `Cyan string) "Contact"; 121 - pf ppf "%a: @%a@," (styled `Bold string) "Handle" string (handle c); 122 - pf ppf "%a: %a@," (styled `Bold string) "Name" string (name c); 123 - let ns = names c in 124 - if List.length ns > 1 then 125 - pf ppf "%a: @[<h>%a@]@," (styled `Bold string) "Aliases" (list ~sep:comma string) (List.tl ns); 126 - (match email c with 127 - | Some e -> pf ppf "%a: %a@," (styled `Bold string) "Email" string e 128 - | None -> ()); 129 - (match github c with 130 - | Some g -> pf ppf "%a: https://github.com/%a@," (styled `Bold string) "GitHub" string g 131 - | None -> ()); 132 - (match twitter c with 133 - | Some t -> pf ppf "%a: https://twitter.com/%a@," (styled `Bold string) "Twitter" string t 134 - | None -> ()); 135 - (match bluesky c with 136 - | Some b -> pf ppf "%a: %a@," (styled `Bold string) "Bluesky" string b 137 - | None -> ()); 138 - (match mastodon c with 139 - | Some m -> pf ppf "%a: %a@," (styled `Bold string) "Mastodon" string m 140 - | None -> ()); 141 - (match orcid c with 142 - | Some o -> pf ppf "%a: https://orcid.org/%a@," (styled `Bold string) "ORCID" string o 143 - | None -> ()); 144 - (match url c with 145 - | Some u -> pf ppf "%a: %a@," (styled `Bold string) "URL" string u 146 - | None -> ()); 147 - (match icon c with 148 - | Some i -> pf ppf "%a: %a@," (styled `Bold string) "Icon" string i 149 - | None -> ()); 150 - (match atom c with 151 - | Some atoms when atoms <> [] -> 152 - pf ppf "%a: @[<h>%a@]@," (styled `Bold string) "Atom Feeds" (list ~sep:comma string) atoms 153 - | _ -> ()); 154 - pf ppf "@]"
+9 -4
lib/bushel_entry.ml
··· 23 23 projects : Bushel_project.ts; 24 24 ideas : Bushel_idea.ts; 25 25 videos : Bushel_video.ts; 26 - contacts : Bushel_contact.ts; 26 + contacts : Sortal_schema.Contact.t list; 27 27 data_dir : string; 28 28 } 29 29 ··· 153 153 (** {1 Contact Lookups} *) 154 154 155 155 let lookup_by_name { contacts; _ } n = 156 - match Bushel_contact.lookup_by_name contacts n with 157 - | v -> Some v 158 - | exception _ -> None 156 + let name_lower = String.lowercase_ascii n in 157 + let matches = List.filter (fun c -> 158 + List.exists (fun name -> String.lowercase_ascii name = name_lower) 159 + (Sortal_schema.Contact.names c) 160 + ) contacts in 161 + match matches with 162 + | [contact] -> Some contact 163 + | _ -> None 159 164 160 165 (** {1 Tag Functions} *) 161 166
+3 -3
lib/bushel_entry.mli
··· 28 28 projects:Bushel_project.t list -> 29 29 ideas:Bushel_idea.t list -> 30 30 videos:Bushel_video.t list -> 31 - contacts:Bushel_contact.t list -> 31 + contacts:Sortal_schema.Contact.t list -> 32 32 data_dir:string -> 33 33 t 34 34 (** Create an entry collection from lists of each entry type. *) 35 35 36 36 (** {1 Accessors} *) 37 37 38 - val contacts : t -> Bushel_contact.ts 38 + val contacts : t -> Sortal_schema.Contact.t list 39 39 val videos : t -> Bushel_video.ts 40 40 val ideas : t -> Bushel_idea.ts 41 41 val papers : t -> Bushel_paper.ts ··· 111 111 112 112 (** {1 Contact Lookups} *) 113 113 114 - val lookup_by_name : t -> string -> Bushel_contact.t option 114 + val lookup_by_name : t -> string -> Sortal_schema.Contact.t option 115 115 (** [lookup_by_name entries name] finds a contact by name. *) 116 116 117 117 (** {1 Tag Functions} *)
+5 -5
lib/bushel_md.ml
··· 137 137 | Some () -> 138 138 let slug = Label.key l in 139 139 let s = strip_handle slug in 140 - (match Bushel_contact.find_by_handle (Bushel_entry.contacts entries) s with 140 + (match List.find_opt (fun c -> Sortal_schema.Contact.handle c = s) (Bushel_entry.contacts entries) with 141 141 | Some c -> 142 - let name = Bushel_contact.name c in 143 - (match Bushel_contact.best_url c with 142 + let name = Sortal_schema.Contact.name c in 143 + (match Sortal_schema.Contact.best_url c with 144 144 | Some dest -> 145 145 let txt = Inline.Text (name, meta) in 146 146 let ld = Link_definition.make ~dest:(dest, meta) () in ··· 370 370 | Some (url, _title) -> 371 371 let s = strip_handle url in 372 372 if is_contact_slug url then 373 - (match Bushel_contact.find_by_handle (Bushel_entry.contacts entries) s with 373 + (match List.find_opt (fun c -> Sortal_schema.Contact.handle c = s) (Bushel_entry.contacts entries) with 374 374 | None -> Hashtbl.replace broken_contacts url () 375 375 | Some _ -> ()) 376 376 else if is_bushel_slug url then ··· 386 386 | Some () -> 387 387 let slug = Label.key l in 388 388 let handle = strip_handle slug in 389 - (match Bushel_contact.find_by_handle (Bushel_entry.contacts entries) handle with 389 + (match List.find_opt (fun c -> Sortal_schema.Contact.handle c = handle) (Bushel_entry.contacts entries) with 390 390 | None -> Hashtbl.replace broken_contacts slug () 391 391 | Some _ -> ()); 392 392 Mapper.default
+2 -1
lib/dune
··· 10 10 re 11 11 uri 12 12 fmt 13 - yamlrw)) 13 + yamlrw 14 + sortal.schema))
+25 -12
lib_eio/bushel_loader.ml
··· 8 8 let src = Logs.Src.create "bushel.loader" ~doc:"Bushel loader" 9 9 module Log = (val Logs.src_log src : Logs.LOG) 10 10 11 + (** Load images from srcsetter index.json *) 12 + let load_images fs ~output_dir = 13 + let index_path = Filename.concat output_dir "index.json" in 14 + let path = Eio.Path.(fs / index_path) in 15 + try 16 + let content = Eio.Path.load path in 17 + match Srcsetter.list_of_json content with 18 + | Ok images -> 19 + Log.info (fun m -> m "Loaded %d images from %s" (List.length images) index_path); 20 + images 21 + | Error e -> 22 + Log.warn (fun m -> m "Failed to parse %s: %s" index_path e); 23 + [] 24 + with 25 + | Eio.Io (Eio.Fs.E (Eio.Fs.Not_found _), _) -> 26 + Log.info (fun m -> m "No image index found at %s" index_path); 27 + [] 28 + 11 29 (** List markdown files in a directory *) 12 30 let list_md_files fs dir = 13 31 let path = Eio.Path.(fs / dir) in ··· 38 56 None 39 57 ) files 40 58 41 - (** Load contacts from data/contacts/ *) 42 - let load_contacts fs base = 43 - map_category fs base "contacts" (fun fm -> 44 - let handle = 45 - match Frontmatter.fname fm with 46 - | Some fname -> Filename.basename fname |> Filename.chop_extension 47 - | None -> "" 48 - in 49 - Bushel.Contact.of_frontmatter ~handle fm 50 - ) 59 + (** Load contacts from Sortal XDG store *) 60 + let load_contacts fs _base = 61 + let store = Sortal.Store.create fs "sortal" in 62 + Sortal.Store.list store 51 63 52 64 (** Load projects from data/projects/ *) 53 65 let load_projects fs base = ··· 165 177 | None -> ()) 166 178 else if Bushel.Md.is_contact_slug link then 167 179 let handle = Bushel.Md.strip_handle link in 168 - (match Bushel.Contact.find_by_handle (Bushel.Entry.contacts entries) handle with 180 + let contacts = Bushel.Entry.contacts entries in 181 + (match List.find_opt (fun c -> Sortal_schema.Contact.handle c = handle) contacts with 169 182 | Some c -> 170 - add_internal_link source_slug (Bushel.Contact.handle c) `Contact 183 + add_internal_link source_slug (Sortal_schema.Contact.handle c) `Contact 171 184 | None -> ()) 172 185 else if Bushel.Md.is_tag_slug link || Bushel.Md.is_type_filter_slug link then 173 186 () (* Skip tag links *)
+3 -1
lib_eio/dune
··· 5 5 bushel 6 6 frontmatter-eio 7 7 eio 8 - logs)) 8 + logs 9 + sortal 10 + srcsetter))
+3 -3
lib_sync/bushel_immich.ml
··· 73 73 (** {1 Contact Face Fetching} *) 74 74 75 75 let fetch_face_for_contact ~proc_mgr ~endpoint ~api_key ~output_dir contact = 76 - let names = Bushel.Contact.names contact in 77 - let handle = Bushel.Contact.handle contact in 76 + let names = Sortal_schema.Contact.names contact in 77 + let handle = Sortal_schema.Contact.handle contact in 78 78 let output_path = Filename.concat output_dir (handle ^ ".jpg") in 79 79 80 80 (* Skip if already exists *) ··· 113 113 Unix.mkdir output_dir 0o755; 114 114 115 115 let results = List.map (fun contact -> 116 - let handle = Bushel.Contact.handle contact in 116 + let handle = Sortal_schema.Contact.handle contact in 117 117 let result = fetch_face_for_contact ~proc_mgr ~endpoint ~api_key ~output_dir contact in 118 118 (handle, result) 119 119 ) contacts in
+223 -120
lib_sync/bushel_sync.ml
··· 55 55 | "typesense" -> Some Typesense 56 56 | _ -> None 57 57 58 - let all_steps = [Images; Srcsetter; Thumbs; Faces; Videos] 58 + let all_steps = [Images; Thumbs; Faces; Srcsetter; Videos] 59 59 let all_steps_with_remote = all_steps @ [Typesense] 60 60 61 61 (** {1 Step Results} *) ··· 77 77 78 78 (** {1 Rsync Images} *) 79 79 80 - let sync_images ~proc_mgr config = 80 + let sync_images ~dry_run ~fs ~proc_mgr config = 81 81 Log.info (fun m -> m "Syncing images from remote..."); 82 - let cmd = Bushel_config.rsync_command config in 83 - Log.debug (fun m -> m "Running: %s" cmd); 84 - 85 - (* Ensure local directory exists *) 86 82 let local_dir = config.Bushel_config.local_source_dir in 87 - if not (Sys.file_exists local_dir) then begin 88 - Log.info (fun m -> m "Creating directory: %s" local_dir); 89 - Unix.mkdir local_dir 0o755 90 - end; 83 + let args = ["rsync"; "-avz"; 84 + Bushel_config.rsync_source config ^ "/"; 85 + local_dir ^ "/"] in 86 + let cmd = String.concat " " args in 91 87 92 - try 93 - let args = ["rsync"; "-avz"; 94 - Bushel_config.rsync_source config ^ "/"; 95 - local_dir ^ "/"] in 96 - Eio.Process.run proc_mgr args; 88 + if dry_run then begin 97 89 { step = Images; success = true; 98 - message = "Images synced from remote"; 99 - details = [] } 100 - with e -> 101 - { step = Images; success = false; 102 - message = Printf.sprintf "Rsync failed: %s" (Printexc.to_string e); 103 - details = [] } 90 + message = "Would run rsync"; 91 + details = [cmd] } 92 + end else begin 93 + Log.debug (fun m -> m "Running: %s" cmd); 94 + 95 + (* Ensure local directory exists (recursive) *) 96 + let local_path = Eio.Path.(fs / local_dir) in 97 + Eio.Path.mkdirs ~exists_ok:true ~perm:0o755 local_path; 98 + 99 + try 100 + Eio.Process.run proc_mgr args; 101 + { step = Images; success = true; 102 + message = "Images synced from remote"; 103 + details = [] } 104 + with e -> 105 + { step = Images; success = false; 106 + message = Printf.sprintf "Rsync failed: %s" (Printexc.to_string e); 107 + details = [] } 108 + end 104 109 105 110 (** {1 Srcsetter} *) 106 111 107 - let run_srcsetter ~proc_mgr config = 112 + let run_srcsetter ~dry_run ~fs ~proc_mgr config = 108 113 Log.info (fun m -> m "Running srcsetter..."); 109 114 let src_dir = config.Bushel_config.local_source_dir in 110 115 let dst_dir = config.Bushel_config.local_output_dir in 111 116 112 - (* Ensure output directory exists *) 113 - if not (Sys.file_exists dst_dir) then begin 114 - Log.info (fun m -> m "Creating directory: %s" dst_dir); 115 - Unix.mkdir dst_dir 0o755 116 - end; 117 - 118 - try 119 - let args = ["srcsetter"; src_dir; dst_dir] in 120 - Eio.Process.run proc_mgr args; 117 + if dry_run then begin 121 118 { step = Srcsetter; success = true; 122 - message = "Srcsetter completed"; 123 - details = [] } 124 - with e -> 125 - { step = Srcsetter; success = false; 126 - message = Printf.sprintf "Srcsetter failed: %s" (Printexc.to_string e); 127 - details = [] } 119 + message = "Would run srcsetter"; 120 + details = [Printf.sprintf "srcsetter %s %s" src_dir dst_dir] } 121 + end else begin 122 + (* Ensure output directory exists (recursive) *) 123 + let src_path = Eio.Path.(fs / src_dir) in 124 + let dst_path = Eio.Path.(fs / dst_dir) in 125 + Eio.Path.mkdirs ~exists_ok:true ~perm:0o755 dst_path; 126 + 127 + try 128 + let entries = Srcsetter_cmd.run 129 + ~proc_mgr 130 + ~src_dir:src_path 131 + ~dst_dir:dst_path 132 + ~preserve:true 133 + () 134 + in 135 + { step = Srcsetter; success = true; 136 + message = Printf.sprintf "Srcsetter completed: %d images processed" 137 + (List.length entries); 138 + details = [] } 139 + with e -> 140 + { step = Srcsetter; success = false; 141 + message = Printf.sprintf "Srcsetter failed: %s" (Printexc.to_string e); 142 + details = [] } 143 + end 128 144 129 145 (** {1 Paper Thumbnails} *) 130 146 131 - let generate_paper_thumbnails ~proc_mgr config = 147 + let generate_paper_thumbnails ~dry_run ~fs ~proc_mgr config = 132 148 Log.info (fun m -> m "Generating paper thumbnails..."); 133 149 let pdfs_dir = config.Bushel_config.paper_pdfs_dir in 134 - let output_dir = Bushel_config.paper_thumbs_dir config in 150 + (* Output to local_source_dir/papers/ so srcsetter processes them *) 151 + let output_dir = Filename.concat config.Bushel_config.local_source_dir "papers" in 135 152 136 153 if not (Sys.file_exists pdfs_dir) then begin 137 154 Log.warn (fun m -> m "PDFs directory does not exist: %s" pdfs_dir); ··· 139 156 message = "No PDFs directory"; 140 157 details = [] } 141 158 end else begin 142 - (* Ensure output directory exists *) 143 - if not (Sys.file_exists output_dir) then 144 - Unix.mkdir output_dir 0o755; 145 - 146 159 let pdfs = Sys.readdir pdfs_dir |> Array.to_list 147 160 |> List.filter (fun f -> Filename.check_suffix f ".pdf") in 148 161 149 - let results = List.map (fun pdf_file -> 150 - let slug = Filename.chop_extension pdf_file in 151 - let pdf_path = Filename.concat pdfs_dir pdf_file in 152 - let output_path = Filename.concat output_dir (slug ^ ".webp") in 153 - 154 - if Sys.file_exists output_path then begin 155 - Log.debug (fun m -> m "Skipping %s: thumbnail exists" slug); 156 - `Skipped slug 157 - end else begin 158 - Log.info (fun m -> m "Generating thumbnail for %s" slug); 159 - try 160 - (* ImageMagick command: render PDF at 600 DPI, crop top 50%, resize to 2048px *) 162 + if dry_run then begin 163 + let would_run = List.filter_map (fun pdf_file -> 164 + let slug = Filename.chop_extension pdf_file in 165 + let pdf_path = Filename.concat pdfs_dir pdf_file in 166 + (* Output as PNG - srcsetter will convert to webp *) 167 + let output_path = Filename.concat output_dir (slug ^ ".png") in 168 + if Sys.file_exists output_path then None 169 + else begin 161 170 let args = [ 162 - "magick"; 163 - "-density"; "600"; 164 - "-quality"; "100"; 165 - pdf_path ^ "[0]"; (* First page only *) 166 - "-gravity"; "North"; 167 - "-crop"; "100%x50%+0+0"; 168 - "-resize"; "2048x"; 169 - output_path 171 + "magick"; "-density"; "600"; "-quality"; "100"; 172 + pdf_path ^ "[0]"; "-gravity"; "North"; 173 + "-crop"; "100%x50%+0+0"; "-resize"; "2048x"; output_path 170 174 ] in 171 - Eio.Process.run proc_mgr args; 172 - `Ok slug 173 - with e -> 174 - Log.err (fun m -> m "Failed to generate thumbnail for %s: %s" 175 - slug (Printexc.to_string e)); 176 - `Error slug 177 - end 178 - ) pdfs in 175 + Some (String.concat " " args) 176 + end 177 + ) pdfs in 178 + let skipped = List.length pdfs - List.length would_run in 179 + { step = Thumbs; success = true; 180 + message = Printf.sprintf "Would generate %d thumbnails (%d already exist)" 181 + (List.length would_run) skipped; 182 + details = would_run } 183 + end else begin 184 + (* Ensure output directory exists (recursive) *) 185 + let output_path = Eio.Path.(fs / output_dir) in 186 + Eio.Path.mkdirs ~exists_ok:true ~perm:0o755 output_path; 187 + 188 + let results = List.map (fun pdf_file -> 189 + let slug = Filename.chop_extension pdf_file in 190 + let pdf_path = Filename.concat pdfs_dir pdf_file in 191 + (* Output as PNG - srcsetter will convert to webp *) 192 + let output_path = Filename.concat output_dir (slug ^ ".png") in 193 + 194 + if Sys.file_exists output_path then begin 195 + Log.debug (fun m -> m "Skipping %s: thumbnail exists" slug); 196 + `Skipped slug 197 + end else begin 198 + Log.info (fun m -> m "Generating thumbnail for %s" slug); 199 + try 200 + (* ImageMagick command: render PDF at 600 DPI, crop top 50%, resize to 2048px *) 201 + let args = [ 202 + "magick"; 203 + "-density"; "600"; 204 + "-quality"; "100"; 205 + pdf_path ^ "[0]"; (* First page only *) 206 + "-gravity"; "North"; 207 + "-crop"; "100%x50%+0+0"; 208 + "-resize"; "2048x"; 209 + output_path 210 + ] in 211 + Eio.Process.run proc_mgr args; 212 + `Ok slug 213 + with e -> 214 + Log.err (fun m -> m "Failed to generate thumbnail for %s: %s" 215 + slug (Printexc.to_string e)); 216 + `Error slug 217 + end 218 + ) pdfs in 179 219 180 - let ok_count = List.fold_left (fun acc r -> match r with `Ok _ -> acc + 1 | _ -> acc) 0 results in 181 - let skipped_count = List.fold_left (fun acc r -> match r with `Skipped _ -> acc + 1 | _ -> acc) 0 results in 182 - let error_count = List.fold_left (fun acc r -> match r with `Error _ -> acc + 1 | _ -> acc) 0 results in 220 + let ok_count = List.fold_left (fun acc r -> match r with `Ok _ -> acc + 1 | _ -> acc) 0 results in 221 + let skipped_count = List.fold_left (fun acc r -> match r with `Skipped _ -> acc + 1 | _ -> acc) 0 results in 222 + let error_count = List.fold_left (fun acc r -> match r with `Error _ -> acc + 1 | _ -> acc) 0 results in 183 223 184 - { step = Thumbs; success = error_count = 0; 185 - message = Printf.sprintf "%d generated, %d skipped, %d errors" 186 - ok_count skipped_count error_count; 187 - details = List.filter_map (fun r -> match r with `Error s -> Some s | _ -> None) results } 224 + { step = Thumbs; success = error_count = 0; 225 + message = Printf.sprintf "%d generated, %d skipped, %d errors" 226 + ok_count skipped_count error_count; 227 + details = List.filter_map (fun r -> match r with `Error s -> Some s | _ -> None) results } 228 + end 188 229 end 189 230 190 231 (** {1 Contact Faces} *) 191 232 192 - let sync_faces ~proc_mgr config entries = 193 - Log.info (fun m -> m "Syncing contact faces from Immich..."); 194 - let output_dir = Bushel_config.contact_faces_dir config in 233 + let sync_faces ~dry_run ~fs config entries = 234 + Log.info (fun m -> m "Syncing contact faces from Sortal..."); 235 + (* Output to local_source_dir/faces/ so srcsetter processes them *) 236 + let output_dir = Filename.concat config.Bushel_config.local_source_dir "faces" in 237 + let contacts = Bushel.Entry.contacts entries in 195 238 196 - match Bushel_config.immich_api_key config with 197 - | Error e -> 198 - Log.warn (fun m -> m "Cannot read Immich API key: %s" e); 199 - { step = Faces; success = false; 200 - message = "Missing Immich API key"; 201 - details = [e] } 202 - | Ok api_key -> 203 - let contacts = Bushel.Entry.contacts entries in 204 - let results = Bushel_immich.fetch_all_faces 205 - ~proc_mgr 206 - ~endpoint:config.immich_endpoint 207 - ~api_key 208 - ~output_dir 209 - contacts in 239 + (* Load sortal store to get thumbnail paths *) 240 + let sortal_store = Sortal.Store.create fs "sortal" in 210 241 211 - let ok_count = List.length (List.filter (fun (_, r) -> 212 - match r with Bushel_immich.Ok _ -> true | _ -> false) results) in 213 - let skipped_count = List.length (List.filter (fun (_, r) -> 214 - match r with Bushel_immich.Skipped _ -> true | _ -> false) results) in 215 - let error_count = List.length (List.filter (fun (_, r) -> 216 - match r with Bushel_immich.Error _ -> true | _ -> false) results) in 242 + (* Find contacts with PNG thumbnails that need copying *) 243 + let contacts_with_thumbs = List.filter_map (fun c -> 244 + match Sortal.Store.png_thumbnail_path sortal_store c with 245 + | Some path -> Some (c, path) 246 + | None -> None 247 + ) contacts in 217 248 249 + if dry_run then begin 250 + let would_copy = List.filter (fun (c, _src_path) -> 251 + let handle = Sortal_schema.Contact.handle c in 252 + let output_path = Filename.concat output_dir (handle ^ ".png") in 253 + not (Sys.file_exists output_path) 254 + ) contacts_with_thumbs in 255 + let skipped = List.length contacts_with_thumbs - List.length would_copy in 256 + let no_thumb = List.length contacts - List.length contacts_with_thumbs in 218 257 { step = Faces; success = true; 219 - message = Printf.sprintf "%d fetched, %d skipped, %d errors" 220 - ok_count skipped_count error_count; 258 + message = Printf.sprintf "Would copy %d faces from Sortal (%d already exist, %d without thumbnails)" 259 + (List.length would_copy) skipped no_thumb; 260 + details = List.map (fun (c, src_path) -> 261 + let handle = Sortal_schema.Contact.handle c in 262 + Printf.sprintf "cp %s %s/%s.png" (Eio.Path.native_exn src_path) output_dir handle 263 + ) (List.filteri (fun i _ -> i < 5) would_copy) @ 264 + (if List.length would_copy > 5 then ["...and more"] else []) } 265 + end else begin 266 + (* Ensure output directory exists *) 267 + let output_path = Eio.Path.(fs / output_dir) in 268 + Eio.Path.mkdirs ~exists_ok:true ~perm:0o755 output_path; 269 + 270 + let results = List.map (fun (c, src_path) -> 271 + let handle = Sortal_schema.Contact.handle c in 272 + let dst_path = Filename.concat output_dir (handle ^ ".png") in 273 + 274 + if Sys.file_exists dst_path then begin 275 + Log.debug (fun m -> m "Skipping %s: already exists" handle); 276 + (handle, `Skipped) 277 + end else begin 278 + Log.info (fun m -> m "Copying face for %s" handle); 279 + try 280 + let content = Eio.Path.load src_path in 281 + let oc = open_out_bin dst_path in 282 + output_string oc content; 283 + close_out oc; 284 + (handle, `Ok) 285 + with e -> 286 + Log.err (fun m -> m "Failed to copy face for %s: %s" handle (Printexc.to_string e)); 287 + (handle, `Error (Printexc.to_string e)) 288 + end 289 + ) contacts_with_thumbs in 290 + 291 + let ok_count = List.length (List.filter (fun (_, r) -> r = `Ok) results) in 292 + let skipped_count = List.length (List.filter (fun (_, r) -> r = `Skipped) results) in 293 + let error_count = List.length (List.filter (fun (_, r) -> match r with `Error _ -> true | _ -> false) results) in 294 + let no_thumb = List.length contacts - List.length contacts_with_thumbs in 295 + 296 + { step = Faces; success = error_count = 0; 297 + message = Printf.sprintf "%d copied, %d skipped, %d errors, %d without thumbnails" 298 + ok_count skipped_count error_count no_thumb; 221 299 details = List.filter_map (fun (h, r) -> 222 - match r with Bushel_immich.Error e -> Some (h ^ ": " ^ e) | _ -> None 300 + match r with `Error e -> Some (h ^ ": " ^ e) | _ -> None 223 301 ) results } 302 + end 224 303 225 304 (** {1 Video Thumbnails} *) 226 305 227 - let sync_video_thumbnails ~proc_mgr config = 306 + let sync_video_thumbnails ~dry_run ~proc_mgr config = 228 307 Log.info (fun m -> m "Syncing video thumbnails from PeerTube..."); 229 308 let output_dir = Bushel_config.video_thumbs_dir config in 230 309 let videos_yml = Filename.concat config.data_dir "videos.yml" in 231 310 232 311 let index = Bushel_peertube.VideoIndex.load_file videos_yml in 233 - let count = List.length (Bushel_peertube.VideoIndex.to_list index) in 312 + let video_list = Bushel_peertube.VideoIndex.to_list index in 313 + let count = List.length video_list in 234 314 235 315 if count = 0 then begin 236 316 Log.info (fun m -> m "No videos in index"); 237 317 { step = Videos; success = true; 238 318 message = "No videos in index"; 239 319 details = [] } 320 + end else if dry_run then begin 321 + let would_fetch = List.filter (fun (uuid, _server) -> 322 + let output_path = Filename.concat output_dir (uuid ^ ".jpg") in 323 + not (Sys.file_exists output_path) 324 + ) video_list in 325 + let skipped = count - List.length would_fetch in 326 + { step = Videos; success = true; 327 + message = Printf.sprintf "Would fetch %d video thumbnails from PeerTube (%d already exist)" 328 + (List.length would_fetch) skipped; 329 + details = List.map (fun (uuid, server) -> 330 + Printf.sprintf "curl <server:%s>/api/v1/videos/%s -> %s.jpg" server uuid uuid 331 + ) (List.filteri (fun i _ -> i < 5) would_fetch) @ 332 + (if List.length would_fetch > 5 then ["...and more"] else []) } 240 333 end else begin 241 334 let results = Bushel_peertube.fetch_thumbnails_from_index 242 335 ~proc_mgr ··· 261 354 262 355 (** {1 Typesense Upload} *) 263 356 264 - let upload_typesense config _entries = 357 + let upload_typesense ~dry_run config _entries = 265 358 Log.info (fun m -> m "Uploading to Typesense..."); 266 359 267 360 match Bushel_config.typesense_api_key config with ··· 270 363 message = "Missing Typesense API key"; 271 364 details = [e] } 272 365 | Ok _api_key -> 273 - (* TODO: Implement actual Typesense upload using bushel-typesense *) 274 - { step = Typesense; success = true; 275 - message = "Typesense upload (not yet implemented)"; 276 - details = [] } 366 + if dry_run then 367 + { step = Typesense; success = true; 368 + message = "Would upload to Typesense"; 369 + details = ["POST to Typesense API (not yet implemented)"] } 370 + else 371 + (* TODO: Implement actual Typesense upload using bushel-typesense *) 372 + { step = Typesense; success = true; 373 + message = "Typesense upload (not yet implemented)"; 374 + details = [] } 277 375 278 376 (** {1 Run Pipeline} *) 279 377 280 - let run ~env ~config ~steps ~entries = 378 + let run ~dry_run ~env ~config ~steps ~entries = 281 379 let proc_mgr = Eio.Stdenv.process_mgr env in 380 + let fs = Eio.Stdenv.fs env in 282 381 283 382 let results = List.map (fun step -> 284 - Log.info (fun m -> m "Running step: %s" (string_of_step step)); 383 + Log.info (fun m -> m "%s step: %s" 384 + (if dry_run then "Dry-run" else "Running") 385 + (string_of_step step)); 285 386 match step with 286 - | Images -> sync_images ~proc_mgr config 287 - | Srcsetter -> run_srcsetter ~proc_mgr config 288 - | Thumbs -> generate_paper_thumbnails ~proc_mgr config 289 - | Faces -> sync_faces ~proc_mgr config entries 290 - | Videos -> sync_video_thumbnails ~proc_mgr config 291 - | Typesense -> upload_typesense config entries 387 + | Images -> sync_images ~dry_run ~fs ~proc_mgr config 388 + | Srcsetter -> run_srcsetter ~dry_run ~fs ~proc_mgr config 389 + | Thumbs -> generate_paper_thumbnails ~dry_run ~fs ~proc_mgr config 390 + | Faces -> sync_faces ~dry_run ~fs config entries 391 + | Videos -> sync_video_thumbnails ~dry_run ~proc_mgr config 392 + | Typesense -> upload_typesense ~dry_run config entries 292 393 ) steps in 293 394 294 395 (* Summary *) 295 396 let success_count = List.length (List.filter (fun r -> r.success) results) in 296 397 let total = List.length results in 297 - Log.info (fun m -> m "Sync complete: %d/%d steps succeeded" success_count total); 398 + Log.info (fun m -> m "%s complete: %d/%d steps succeeded" 399 + (if dry_run then "Dry-run" else "Sync") 400 + success_count total); 298 401 299 402 results
+4 -1
lib_sync/dune
··· 13 13 uri 14 14 ptime 15 15 logs 16 - fmt)) 16 + fmt 17 + sortal.schema 18 + sortal 19 + srcsetter-cmd))
+23 -13
lib_typesense/bushel_typesense.ml
··· 265 265 (match Bushel.Video.paper v with Some p -> [("paper", `A [`String p])] | None -> []) @ 266 266 (match Bushel.Video.project v with Some p -> [("project", `A [`String p])] | None -> [])) 267 267 268 - let contact_to_document (c : Bushel.Contact.t) = 268 + let contact_to_document (c : Sortal_schema.Contact.t) = 269 + (* Extract atom feed URLs from Sortal feeds *) 270 + let atom_urls = match Sortal_schema.Contact.feeds c with 271 + | Some feeds -> 272 + List.filter_map (fun f -> 273 + if Sortal_schema.Feed.feed_type f = Sortal_schema.Feed.Atom 274 + then Some (Sortal_schema.Feed.url f) 275 + else None 276 + ) feeds 277 + | None -> [] 278 + in 269 279 `O ([ 270 - ("id", `String (Bushel.Contact.handle c)); 271 - ("handle", `String (Bushel.Contact.handle c)); 272 - ("name", `String (Bushel.Contact.name c)); 273 - ("names", `A (List.map (fun n -> `String n) (Bushel.Contact.names c))); 280 + ("id", `String (Sortal_schema.Contact.handle c)); 281 + ("handle", `String (Sortal_schema.Contact.handle c)); 282 + ("name", `String (Sortal_schema.Contact.name c)); 283 + ("names", `A (List.map (fun n -> `String n) (Sortal_schema.Contact.names c))); 274 284 ] @ 275 - (match Bushel.Contact.email c with Some e -> [("email", `A [`String e])] | None -> []) @ 276 - (match Bushel.Contact.github c with Some g -> [("github", `A [`String g])] | None -> []) @ 277 - (match Bushel.Contact.twitter c with Some t -> [("twitter", `A [`String t])] | None -> []) @ 278 - (match Bushel.Contact.bluesky c with Some b -> [("bluesky", `A [`String b])] | None -> []) @ 279 - (match Bushel.Contact.mastodon c with Some m -> [("mastodon", `A [`String m])] | None -> []) @ 280 - (match Bushel.Contact.orcid c with Some o -> [("orcid", `A [`String o])] | None -> []) @ 281 - (match Bushel.Contact.url c with Some u -> [("url", `A [`String u])] | None -> []) @ 282 - (match Bushel.Contact.atom c with Some a -> [("atom", `A (List.map (fun x -> `String x) a))] | None -> [])) 285 + (match Sortal_schema.Contact.current_email c with Some e -> [("email", `A [`String e])] | None -> []) @ 286 + (match Sortal_schema.Contact.github_handle c with Some g -> [("github", `A [`String g])] | None -> []) @ 287 + (match Sortal_schema.Contact.twitter_handle c with Some t -> [("twitter", `A [`String t])] | None -> []) @ 288 + (match Sortal_schema.Contact.bluesky_handle c with Some b -> [("bluesky", `A [`String b])] | None -> []) @ 289 + (match Sortal_schema.Contact.mastodon_handle c with Some m -> [("mastodon", `A [`String m])] | None -> []) @ 290 + (match Sortal_schema.Contact.orcid c with Some o -> [("orcid", `A [`String o])] | None -> []) @ 291 + (match Sortal_schema.Contact.current_url c with Some u -> [("url", `A [`String u])] | None -> []) @ 292 + (if atom_urls = [] then [] else [("atom", `A (List.map (fun x -> `String x) atom_urls))]))
+1 -1
lib_typesense/dune
··· 1 1 (library 2 2 (name bushel_typesense) 3 3 (public_name bushel.typesense) 4 - (libraries bushel jsont ptime)) 4 + (libraries bushel jsont ptime sortal.schema))