this repo has no description
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

remove

-8071
-9
stack/bushel/.gitignore
··· 1 - _build 2 - .*.swp 3 - **/.claude/settings.local.json 4 - .photos-api 5 - .karakeep-api 6 - KARAKEEP.md 7 - karakeep-src 8 - .DS_Store 9 - .openapi-key
-1
stack/bushel/.ocamlformat
··· 1 - profile=janestreet
-127
stack/bushel/bin/bushel_bibtex.ml
··· 1 - open Cmdliner 2 - open Printf 3 - 4 - (** TODO:claude Generate bibtex entry from paper data *) 5 - let generate_bibtex_entry paper = 6 - let open Bushel.Paper in 7 - (* Use slug as the bibtex key/label *) 8 - let bibkey = slug paper in 9 - let bibtype = try bibtype paper with _ -> "misc" in 10 - let title = try title paper with _ -> "Untitled" in 11 - let authors = 12 - let auth_list = try authors paper with _ -> [] in 13 - String.concat " and " auth_list 14 - in 15 - let year = try year paper with _ -> 0 in 16 - 17 - (* Build the bibtex entry *) 18 - let buf = Buffer.create 1024 in 19 - Buffer.add_string buf (sprintf "@%s{%s,\n" bibtype bibkey); 20 - Buffer.add_string buf (sprintf " title = {%s},\n" title); 21 - Buffer.add_string buf (sprintf " author = {%s},\n" authors); 22 - Buffer.add_string buf (sprintf " year = {%d}" year); 23 - 24 - (* Add optional fields *) 25 - (match String.lowercase_ascii bibtype with 26 - | "article" -> 27 - (try 28 - Buffer.add_string buf (sprintf ",\n journal = {%s}" (journal paper)) 29 - with _ -> ()); 30 - (match volume paper with 31 - | Some v -> Buffer.add_string buf (sprintf ",\n volume = {%s}" v) 32 - | None -> ()); 33 - (match issue paper with 34 - | Some i -> Buffer.add_string buf (sprintf ",\n number = {%s}" i) 35 - | None -> ()); 36 - (match pages paper with 37 - | "" -> () 38 - | p -> Buffer.add_string buf (sprintf ",\n pages = {%s}" p)) 39 - | "inproceedings" -> 40 - (try 41 - Buffer.add_string buf (sprintf ",\n booktitle = {%s}" (booktitle paper)) 42 - with _ -> ()); 43 - (match pages paper with 44 - | "" -> () 45 - | p -> Buffer.add_string buf (sprintf ",\n pages = {%s}" p)); 46 - (match publisher paper with 47 - | "" -> () 48 - | p -> Buffer.add_string buf (sprintf ",\n publisher = {%s}" p)) 49 - | "techreport" -> 50 - (try 51 - Buffer.add_string buf (sprintf ",\n institution = {%s}" (institution paper)) 52 - with _ -> ()); 53 - (match number paper with 54 - | Some n -> Buffer.add_string buf (sprintf ",\n number = {%s}" n) 55 - | None -> ()) 56 - | "book" -> 57 - (match publisher paper with 58 - | "" -> () 59 - | p -> Buffer.add_string buf (sprintf ",\n publisher = {%s}" p)); 60 - (try 61 - Buffer.add_string buf (sprintf ",\n isbn = {%s}" (isbn paper)) 62 - with _ -> ()) 63 - | _ -> ()); 64 - 65 - (* Add DOI if available *) 66 - (match doi paper with 67 - | Some d -> Buffer.add_string buf (sprintf ",\n doi = {%s}" d) 68 - | None -> ()); 69 - 70 - (* Add URL if available *) 71 - (match url paper with 72 - | Some u -> Buffer.add_string buf (sprintf ",\n url = {%s}" u) 73 - | None -> ()); 74 - 75 - Buffer.add_string buf "\n}\n"; 76 - Buffer.contents buf 77 - 78 - (** TODO:claude Main function to export bibtex for all papers *) 79 - let export_bibtex base_dir output_file latest_only _env _xdg _profile = 80 - (* Load all papers *) 81 - let bushel = Bushel.load base_dir in 82 - let papers = Bushel.Entry.papers bushel in 83 - 84 - (* Filter to only latest versions if requested *) 85 - let papers = 86 - if latest_only then 87 - List.filter (fun p -> p.Bushel.Paper.latest) papers 88 - else 89 - papers 90 - in 91 - 92 - (* Sort papers by year (most recent first) *) 93 - let papers = List.sort Bushel.Paper.compare papers in 94 - 95 - (* Generate bibtex for each paper *) 96 - let bibtex_entries = List.map generate_bibtex_entry papers in 97 - let bibtex_content = String.concat "\n" bibtex_entries in 98 - 99 - (* Output to file or stdout *) 100 - match output_file with 101 - | None -> 102 - print_string bibtex_content; 103 - 0 104 - | Some file -> 105 - let oc = open_out file in 106 - output_string oc bibtex_content; 107 - close_out oc; 108 - printf "Bibtex exported to %s (%d entries)\n" file (List.length papers); 109 - 0 110 - 111 - (** TODO:claude Command line arguments *) 112 - let output_file_arg = 113 - let doc = "Output file for bibtex (defaults to stdout)" in 114 - Arg.(value & opt (some string) None & info ["o"; "output"] ~docv:"FILE" ~doc) 115 - 116 - let latest_only_arg = 117 - let doc = "Export only the latest version of each paper" in 118 - Arg.(value & flag & info ["latest"] ~doc) 119 - 120 - (** TODO:claude Command term *) 121 - let term : (Eio_unix.Stdenv.base -> Xdge.t -> Keyeio.Profile.t -> int) Cmdliner.Term.t = 122 - Term.(const export_bibtex $ Bushel_common.base_dir $ output_file_arg $ latest_only_arg) 123 - 124 - let cmd = 125 - let doc = "Export bibtex for all papers" in 126 - let info = Cmd.info "bibtex" ~doc in 127 - Cmd.v info term
-67
stack/bushel/bin/bushel_common.ml
··· 1 - open Cmdliner 2 - 3 - (** TODO:claude Get default base directory from BUSHEL_DATA env variable or current directory *) 4 - let get_default_base_dir () = 5 - match Sys.getenv_opt "BUSHEL_DATA" with 6 - | Some dir -> dir 7 - | None -> "." 8 - 9 - (** TODO:claude Optional base directory term with BUSHEL_DATA env variable support *) 10 - let base_dir = 11 - let doc = "Base directory containing Bushel data (defaults to BUSHEL_DATA env var or current directory)" in 12 - Arg.(value & opt dir (get_default_base_dir ()) & info ["d"; "dir"] ~docv:"DIR" ~doc) 13 - 14 - (** TODO:claude Output directory as option *) 15 - let output_dir ~default = 16 - let doc = "Output directory for generated files" in 17 - Arg.(value & opt string default & info ["o"; "output"] ~docv:"DIR" ~doc) 18 - 19 - (** TODO:claude URL term with custom default *) 20 - let url_term ~default ~doc = 21 - Arg.(value & opt string default & info ["u"; "url"] ~docv:"URL" ~doc) 22 - 23 - (** TODO:claude API key file term *) 24 - let api_key_file ~default = 25 - let doc = "File containing API key" in 26 - Arg.(value & opt string default & info ["k"; "key-file"] ~docv:"FILE" ~doc) 27 - 28 - (** TODO:claude API key term *) 29 - let api_key = 30 - let doc = "API key for authentication" in 31 - Arg.(value & opt (some string) None & info ["api-key"] ~docv:"KEY" ~doc) 32 - 33 - (** TODO:claude Overwrite flag *) 34 - let overwrite = 35 - let doc = "Overwrite existing files" in 36 - Arg.(value & flag & info ["overwrite"] ~doc) 37 - 38 - (** TODO:claude Verbose flag *) 39 - let verbose = 40 - let doc = "Enable verbose output" in 41 - Arg.(value & flag & info ["v"; "verbose"] ~doc) 42 - 43 - (** TODO:claude File path term *) 44 - let file_term ~default ~doc = 45 - Arg.(value & opt string default & info ["f"; "file"] ~docv:"FILE" ~doc) 46 - 47 - (** TODO:claude Channel/handle term *) 48 - let channel ~default = 49 - let doc = "Channel or handle name" in 50 - Arg.(value & opt string default & info ["c"; "channel"] ~docv:"CHANNEL" ~doc) 51 - 52 - (** TODO:claude Optional handle term *) 53 - let handle_opt = 54 - let doc = "Process specific handle" in 55 - Arg.(value & opt (some string) None & info ["h"; "handle"] ~docv:"HANDLE" ~doc) 56 - 57 - (** TODO:claude Tag term for filtering *) 58 - let tag = 59 - let doc = "Tag to filter or apply" in 60 - Arg.(value & opt (some string) None & info ["t"; "tag"] ~docv:"TAG" ~doc) 61 - 62 - (** TODO:claude Limit term *) 63 - let limit = 64 - let doc = "Limit number of items to process" in 65 - Arg.(value & opt (some int) None & info ["l"; "limit"] ~docv:"N" ~doc) 66 - 67 - (* Note: Logging setup is now handled by eiocmd for all bushel binaries *)
-295
stack/bushel/bin/bushel_doi.ml
··· 1 - module ZT = Zotero_translation 2 - open Lwt.Infix 3 - module J = Ezjsonm 4 - open Cmdliner 5 - 6 - (* Extract all DOIs from notes by scanning for doi.org URLs *) 7 - let extract_dois_from_notes notes = 8 - let doi_url_pattern = Re.Perl.compile_pat "https?://(?:dx\\.)?doi\\.org/([^)\\s\"'>]+)" in 9 - let dois = ref [] in 10 - List.iter (fun note -> 11 - let body = Bushel.Note.body note in 12 - let matches = Re.all doi_url_pattern body in 13 - List.iter (fun group -> 14 - try 15 - let encoded_doi = Re.Group.get group 1 in 16 - let doi = Uri.pct_decode encoded_doi in 17 - if not (List.mem doi !dois) then 18 - dois := doi :: !dois 19 - with _ -> () 20 - ) matches 21 - ) notes; 22 - !dois 23 - 24 - (* Extract publisher URLs from notes (Elsevier, ScienceDirect, IEEE, Nature, ACM, Sage, UPenn, Springer, Taylor & Francis, OUP) *) 25 - let extract_publisher_urls_from_notes notes = 26 - (* Matches publisher URLs: linkinghub.elsevier.com, sciencedirect.com/science/article, ieeexplore.ieee.org, academic.oup.com, nature.com, journals.sagepub.com, garfield.library.upenn.edu, link.springer.com, tandfonline.com/doi, and dl.acm.org/doi/10.* URLs *) 27 - let publisher_pattern = Re.Perl.compile_pat "https?://(?:(?:www\\.)?(?:linkinghub\\.elsevier\\.com|(?:www\\.)?sciencedirect\\.com/science/article|ieeexplore\\.ieee\\.org|academic\\.oup\\.com|nature\\.com|journals\\.sagepub\\.com|garfield\\.library\\.upenn\\.edu|link\\.springer\\.com)/[^)\\s\"'>]+|(?:dl\\.acm\\.org|(?:www\\.)?tandfonline\\.com)/doi(?:/pdf)?/10\\.[^)\\s\"'>]+)" in 28 - let urls = ref [] in 29 - List.iter (fun note -> 30 - let body = Bushel.Note.body note in 31 - let matches = Re.all publisher_pattern body in 32 - List.iter (fun group -> 33 - try 34 - let url = Re.Group.get group 0 in 35 - if not (List.mem url !urls) then 36 - urls := url :: !urls 37 - with _ -> () 38 - ) matches 39 - ) notes; 40 - !urls 41 - 42 - (* Resolve a single DOI via Zotero and convert to doi_entry *) 43 - let resolve_doi zt ~verbose doi = 44 - Printf.printf "Resolving DOI: %s\n%!" doi; 45 - let doi_url = Printf.sprintf "https://doi.org/%s" doi in 46 - Lwt.catch 47 - (fun () -> 48 - ZT.json_of_doi zt ~slug:"temp" doi >>= fun json -> 49 - if verbose then begin 50 - Printf.printf " Raw Zotero response:\n%s\n%!" (J.value_to_string json) 51 - end; 52 - try 53 - let keys = J.get_dict (json :> J.value) in 54 - let title = J.find json ["title"] |> J.get_string in 55 - let authors = J.find json ["author"] |> J.get_list J.get_string in 56 - let year = J.find json ["year"] |> J.get_string |> int_of_string in 57 - let bibtype = J.find json ["bibtype"] |> J.get_string in 58 - let publisher = 59 - try 60 - (* Try journal first, then booktitle, then proceedingsTitle, then publisher *) 61 - match List.assoc_opt "journal" keys with 62 - | Some j -> J.get_string j 63 - | None -> 64 - match List.assoc_opt "booktitle" keys with 65 - | Some b -> J.get_string b 66 - | None -> 67 - match List.assoc_opt "proceedingsTitle" keys with 68 - | Some pt -> J.get_string pt 69 - | None -> 70 - match List.assoc_opt "publisher" keys with 71 - | Some p -> J.get_string p 72 - | None -> "" 73 - with _ -> "" 74 - in 75 - let entry = Bushel.Doi_entry.create_resolved ~doi ~title ~authors ~year ~bibtype ~publisher ~source_urls:[doi_url] () in 76 - Printf.printf " ✓ Resolved: %s (%d)\n%!" title year; 77 - Lwt.return entry 78 - with e -> 79 - Printf.eprintf " ✗ Failed to parse response for %s: %s\n%!" doi (Printexc.to_string e); 80 - Lwt.return (Bushel.Doi_entry.create_failed ~doi ~error:(Printexc.to_string e) ~source_urls:[doi_url] ()) 81 - ) 82 - (fun exn -> 83 - Printf.eprintf " ✗ Failed to resolve %s: %s\n%!" doi (Printexc.to_string exn); 84 - Lwt.return (Bushel.Doi_entry.create_failed ~doi ~error:(Printexc.to_string exn) ~source_urls:[doi_url] ()) 85 - ) 86 - 87 - (* Resolve a publisher URL via Zotero /web endpoint *) 88 - let resolve_url zt ~verbose url = 89 - Printf.printf "Resolving URL: %s\n%!" url; 90 - Lwt.catch 91 - (fun () -> 92 - (* Use Zotero's resolve_url which calls /web endpoint with the URL directly *) 93 - ZT.resolve_url zt url >>= function 94 - | Error (`Msg err) -> 95 - Printf.eprintf " ✗ Failed to resolve URL: %s\n%!" err; 96 - Lwt.return (Bushel.Doi_entry.create_failed ~doi:url ~error:err ~source_urls:[url] ()) 97 - | Ok json -> 98 - if verbose then begin 99 - Printf.printf " Raw Zotero response:\n%s\n%!" (J.value_to_string json) 100 - end; 101 - try 102 - (* Extract metadata from the JSON response *) 103 - let json_list = match json with 104 - | `A lst -> lst 105 - | single -> [single] 106 - in 107 - match json_list with 108 - | [] -> 109 - Printf.eprintf " ✗ Empty response\n%!"; 110 - Lwt.return (Bushel.Doi_entry.create_failed ~doi:url ~error:"Empty response" ~source_urls:[url] ()) 111 - | item :: _ -> 112 - (* Extract DOI if present, otherwise use URL *) 113 - let doi = try J.find item ["DOI"] |> J.get_string with _ -> 114 - try J.find item ["doi"] |> J.get_string with _ -> url 115 - in 116 - let title = try J.find item ["title"] |> J.get_string with _ -> 117 - "Unknown Title" 118 - in 119 - (* Extract authors from Zotero's "creators" field *) 120 - let authors = try 121 - J.find item ["creators"] |> J.get_list (fun creator_obj -> 122 - try 123 - let last_name = J.find creator_obj ["lastName"] |> J.get_string in 124 - let first_name = try J.find creator_obj ["firstName"] |> J.get_string with _ -> "" in 125 - if first_name = "" then last_name else first_name ^ " " ^ last_name 126 - with _ -> "Unknown Author" 127 - ) 128 - with _ -> [] 129 - in 130 - (* Extract year from Zotero's "date" field *) 131 - (* Handles both ISO format "2025-07" and text format "November 28, 2023" *) 132 - let year = try 133 - let date_str = J.find item ["date"] |> J.get_string in 134 - (* First try splitting on '-' for ISO dates like "2025-07" or "2024-11-04" *) 135 - let parts = String.split_on_char '-' date_str in 136 - match parts with 137 - | year_str :: _ when String.length year_str = 4 -> 138 - (try int_of_string year_str with _ -> 0) 139 - | _ -> 140 - (* Try splitting on space and comma for dates like "November 28, 2023" *) 141 - let space_parts = String.split_on_char ' ' date_str in 142 - let year_candidate = List.find_opt (fun s -> 143 - let s = String.trim (String.map (fun c -> if c = ',' then ' ' else c) s) in 144 - String.length s = 4 && String.for_all (function '0'..'9' -> true | _ -> false) s 145 - ) space_parts in 146 - (match year_candidate with 147 - | Some year_str -> int_of_string (String.trim year_str) 148 - | None -> 0) 149 - with _ -> 0 150 - in 151 - (* Extract type/bibtype from Zotero's "itemType" field *) 152 - let bibtype = try J.find item ["itemType"] |> J.get_string with _ -> "article" in 153 - (* Extract publisher/journal from Zotero's "publicationTitle" or "proceedingsTitle" field *) 154 - let publisher = try 155 - J.find item ["publicationTitle"] |> J.get_string 156 - with _ -> 157 - try J.find item ["proceedingsTitle"] |> J.get_string 158 - with _ -> "" 159 - in 160 - (* Include both the original URL and the DOI URL in source_urls *) 161 - let doi_url = if doi = url then [] else [Printf.sprintf "https://doi.org/%s" doi] in 162 - let source_urls = url :: doi_url in 163 - let entry = Bushel.Doi_entry.create_resolved ~doi ~title ~authors ~year ~bibtype ~publisher ~source_urls () in 164 - Printf.printf " ✓ Resolved: %s (%d) [DOI: %s]\n%!" title year doi; 165 - Lwt.return entry 166 - with e -> 167 - Printf.eprintf " ✗ Failed to parse response: %s\n%!" (Printexc.to_string e); 168 - Lwt.return (Bushel.Doi_entry.create_failed ~doi:url ~error:(Printexc.to_string e) ~source_urls:[url] ()) 169 - ) 170 - (fun exn -> 171 - Printf.eprintf " ✗ Failed to resolve %s: %s\n%!" url (Printexc.to_string exn); 172 - Lwt.return (Bushel.Doi_entry.create_failed ~doi:url ~error:(Printexc.to_string exn) ~source_urls:[url] ()) 173 - ) 174 - 175 - let run base_dir force verbose = 176 - Printf.printf "Loading bushel database...\n%!"; 177 - let entries = Bushel.load base_dir in 178 - let notes = Bushel.Entry.notes entries in 179 - 180 - Printf.printf "Scanning %d notes for DOI URLs...\n%!" (List.length notes); 181 - let found_dois = extract_dois_from_notes notes in 182 - Printf.printf "Found %d unique DOIs\n%!" (List.length found_dois); 183 - 184 - Printf.printf "Scanning %d notes for publisher URLs...\n%!" (List.length notes); 185 - let found_urls = extract_publisher_urls_from_notes notes in 186 - Printf.printf "Found %d unique publisher URLs\n%!" (List.length found_urls); 187 - 188 - let data_dir = Bushel.Entry.data_dir entries in 189 - let doi_yml_path = Filename.concat data_dir "doi.yml" in 190 - Printf.printf "Loading existing DOI cache from %s...\n%!" doi_yml_path; 191 - let existing_entries = Bushel.Doi_entry.load doi_yml_path in 192 - Printf.printf "Loaded %d cached DOI entries\n%!" (List.length existing_entries); 193 - 194 - (* Filter DOIs that need resolution *) 195 - let dois_to_resolve = 196 - List.filter (fun doi -> 197 - match Bushel.Doi_entry.find_by_doi_including_ignored existing_entries doi with 198 - | Some _ when not force -> 199 - Printf.printf "Skipping DOI %s (already cached)\n%!" doi; 200 - false 201 - | Some _ when force -> 202 - Printf.printf "Re-resolving DOI %s (--force)\n%!" doi; 203 - true 204 - | Some _ -> false (* Catch-all for Some case *) 205 - | None -> true 206 - ) found_dois 207 - in 208 - 209 - (* Filter URLs that need resolution *) 210 - let urls_to_resolve = 211 - List.filter (fun url -> 212 - match Bushel.Doi_entry.find_by_url_including_ignored existing_entries url with 213 - | Some _ when not force -> 214 - Printf.printf "Skipping URL %s (already cached)\n%!" url; 215 - false 216 - | Some _ when force -> 217 - Printf.printf "Re-resolving URL %s (--force)\n%!" url; 218 - true 219 - | Some _ -> false (* Catch-all for Some case *) 220 - | None -> true 221 - ) found_urls 222 - in 223 - 224 - if List.length dois_to_resolve = 0 && List.length urls_to_resolve = 0 then begin 225 - Printf.printf "No DOIs or URLs to resolve!\n%!"; 226 - 0 227 - end else begin 228 - Printf.printf "Resolving %d DOI(s) and %d URL(s)...\n%!" (List.length dois_to_resolve) (List.length urls_to_resolve); 229 - 230 - let zt = ZT.v "http://svr-avsm2-eeg-ce:1969" in 231 - 232 - (* Resolve all DOIs *) 233 - let resolved_doi_entries_lwt = 234 - Lwt_list.map_s (resolve_doi zt ~verbose) dois_to_resolve 235 - in 236 - 237 - (* Resolve all publisher URLs *) 238 - let resolved_url_entries_lwt = 239 - Lwt_list.map_s (resolve_url zt ~verbose) urls_to_resolve 240 - in 241 - 242 - let new_doi_entries = Lwt_main.run resolved_doi_entries_lwt in 243 - let new_url_entries = Lwt_main.run resolved_url_entries_lwt in 244 - let new_entries = new_doi_entries @ new_url_entries in 245 - 246 - (* Merge with existing entries, combining source_urls for entries with the same DOI *) 247 - let all_entries = 248 - if force then 249 - (* Replace existing entries with new ones - match by DOI *) 250 - let is_updated entry = 251 - List.exists (fun new_e -> 252 - new_e.Bushel.Doi_entry.doi = entry.Bushel.Doi_entry.doi 253 - ) new_entries 254 - in 255 - let kept_existing = List.filter (fun e -> not (is_updated e)) existing_entries in 256 - kept_existing @ new_entries 257 - else 258 - (* Merge new entries with existing ones, combining source_urls *) 259 - let merged = ref existing_entries in 260 - List.iter (fun new_entry -> 261 - match Bushel.Doi_entry.find_by_doi_including_ignored !merged new_entry.Bushel.Doi_entry.doi with 262 - | Some existing_entry -> 263 - (* DOI already exists - merge the entries by combining source_urls and preserving ignore flag *) 264 - let combined = Bushel.Doi_entry.merge_entries existing_entry new_entry in 265 - merged := combined :: (List.filter (fun e -> e.Bushel.Doi_entry.doi <> new_entry.Bushel.Doi_entry.doi) !merged) 266 - | None -> 267 - (* New DOI - add it *) 268 - merged := new_entry :: !merged 269 - ) new_entries; 270 - !merged 271 - in 272 - 273 - (* Save updated cache *) 274 - Printf.printf "Saving %d total entries to %s...\n%!" (List.length all_entries) doi_yml_path; 275 - Bushel.Doi_entry.save doi_yml_path all_entries; 276 - 277 - Printf.printf "Done!\n%!"; 278 - 0 279 - end 280 - 281 - let force_flag = 282 - let doc = "Force re-resolution of already cached DOIs" in 283 - Arg.(value & flag & info ["force"; "f"] ~doc) 284 - 285 - let verbose_flag = 286 - let doc = "Show raw Zotero API responses for debugging" in 287 - Arg.(value & flag & info ["verbose"; "v"] ~doc) 288 - 289 - let term = 290 - Term.(const run $ Bushel_common.base_dir $ force_flag $ verbose_flag) 291 - 292 - let cmd = 293 - let doc = "Resolve DOIs found in notes via Zotero Translation Server" in 294 - let info = Cmd.info "doi-resolve" ~doc in 295 - Cmd.v info term
-182
stack/bushel/bin/bushel_faces.ml
··· 1 - open Cmdliner 2 - open Lwt.Infix 3 - open Printf 4 - 5 - (* Type for person response *) 6 - type person = { 7 - id: string; 8 - name: string; 9 - thumbnailPath: string option; 10 - } 11 - 12 - (* Parse a person from JSON *) 13 - let parse_person json = 14 - let open Ezjsonm in 15 - let id = find json ["id"] |> get_string in 16 - let name = find json ["name"] |> get_string in 17 - let thumbnailPath = 18 - try Some (find json ["thumbnailPath"] |> get_string) 19 - with _ -> None 20 - in 21 - { id; name; thumbnailPath } 22 - 23 - (* Parse a list of people from JSON response *) 24 - let parse_people_response json = 25 - let open Ezjsonm in 26 - get_list parse_person json 27 - 28 - (* Read API key from file *) 29 - let read_api_key file = 30 - let ic = open_in file in 31 - let key = input_line ic in 32 - close_in ic; 33 - key 34 - 35 - (* Search for a person by name *) 36 - let search_person base_url api_key name = 37 - let open Cohttp_lwt_unix in 38 - let headers = Cohttp.Header.init_with "X-Api-Key" api_key in 39 - let encoded_name = Uri.pct_encode name in 40 - let url = Printf.sprintf "%s/api/search/person?name=%s" base_url encoded_name in 41 - 42 - Client.get ~headers (Uri.of_string url) >>= fun (resp, body) -> 43 - if resp.status = `OK then 44 - Cohttp_lwt.Body.to_string body >>= fun body_str -> 45 - let json = Ezjsonm.from_string body_str in 46 - Lwt.return (parse_people_response json) 47 - else 48 - let status_code = Cohttp.Code.code_of_status resp.status in 49 - Lwt.fail_with (Printf.sprintf "HTTP error: %d" status_code) 50 - 51 - (* Download thumbnail for a person *) 52 - let download_thumbnail base_url api_key person_id output_path = 53 - let open Cohttp_lwt_unix in 54 - let headers = Cohttp.Header.init_with "X-Api-Key" api_key in 55 - let url = Printf.sprintf "%s/api/people/%s/thumbnail" base_url person_id in 56 - 57 - Client.get ~headers (Uri.of_string url) >>= fun (resp, body) -> 58 - match resp.status with 59 - | `OK -> 60 - Cohttp_lwt.Body.to_string body >>= fun img_data -> 61 - (* Ensure output directory exists *) 62 - (try 63 - let dir = Filename.dirname output_path in 64 - if not (Sys.file_exists dir) then Unix.mkdir dir 0o755; 65 - Lwt.return_unit 66 - with _ -> Lwt.return_unit) >>= fun () -> 67 - Lwt_io.with_file ~mode:Lwt_io.output output_path 68 - (fun oc -> Lwt_io.write oc img_data) >>= fun () -> 69 - Lwt.return_ok output_path 70 - | _ -> 71 - let status_code = Cohttp.Code.code_of_status resp.status in 72 - Lwt.return_error (Printf.sprintf "HTTP error: %d" status_code) 73 - 74 - (* Get face for a single contact *) 75 - (* TODO:claude *) 76 - let get_face_for_contact base_url api_key output_dir contact = 77 - let names = Bushel.Contact.names contact in 78 - let handle = Bushel.Contact.handle contact in 79 - let output_path = Filename.concat output_dir (handle ^ ".jpg") in 80 - 81 - (* Skip if file already exists *) 82 - if Sys.file_exists output_path then 83 - Lwt.return (`Skipped (sprintf "Thumbnail for '%s' already exists at %s" (List.hd names) output_path)) 84 - else begin 85 - printf "Processing contact: %s (handle: %s)\n%!" (List.hd names) handle; 86 - 87 - (* Try each name in the list until we find a match *) 88 - let rec try_names = function 89 - | [] -> 90 - Lwt.return (`Error (sprintf "No person found with any name for contact '%s'" handle)) 91 - | name :: rest_names -> 92 - printf " Trying name: %s\n%!" name; 93 - search_person base_url api_key name >>= function 94 - | [] -> 95 - printf " No results for '%s', trying next name...\n%!" name; 96 - try_names rest_names 97 - | person :: _ -> 98 - printf " Found match for '%s'\n%!" name; 99 - download_thumbnail base_url api_key person.id output_path >>= function 100 - | Ok path -> 101 - Lwt.return (`Ok (sprintf "Saved thumbnail for '%s' to %s" name path)) 102 - | Error err -> 103 - Lwt.return (`Error (sprintf "Error for '%s': %s" name err)) 104 - in 105 - try_names names 106 - end 107 - 108 - (* Process all contacts or a specific one *) 109 - let process_contacts base_dir output_dir specific_handle api_key base_url = 110 - printf "Loading Bushel database from %s\n%!" base_dir; 111 - let db = Bushel.load base_dir in 112 - let contacts = Bushel.Entry.contacts db in 113 - printf "Found %d contacts\n%!" (List.length contacts); 114 - 115 - (* Ensure output directory exists *) 116 - if not (Sys.file_exists output_dir) then Unix.mkdir output_dir 0o755; 117 - 118 - (* Filter contacts based on specific_handle if provided *) 119 - let contacts_to_process = 120 - match specific_handle with 121 - | Some handle -> 122 - begin match Bushel.Contact.find_by_handle contacts handle with 123 - | Some contact -> [contact] 124 - | None -> 125 - eprintf "No contact found with handle '%s'\n%!" handle; 126 - [] 127 - end 128 - | None -> contacts 129 - in 130 - 131 - (* Process each contact *) 132 - let results = Lwt_main.run begin 133 - Lwt_list.map_s 134 - (fun contact -> 135 - get_face_for_contact base_url api_key output_dir contact >>= fun result -> 136 - Lwt.return (Bushel.Contact.handle contact, result)) 137 - contacts_to_process 138 - end in 139 - 140 - (* Print summary *) 141 - let ok_count = List.length (List.filter (fun (_, r) -> match r with `Ok _ -> true | _ -> false) results) in 142 - let error_count = List.length (List.filter (fun (_, r) -> match r with `Error _ -> true | _ -> false) results) in 143 - let skipped_count = List.length (List.filter (fun (_, r) -> match r with `Skipped _ -> true | _ -> false) results) in 144 - 145 - printf "\nSummary:\n"; 146 - printf " Successfully processed: %d\n" ok_count; 147 - printf " Errors: %d\n" error_count; 148 - printf " Skipped (already exist): %d\n" skipped_count; 149 - 150 - (* Print detailed results *) 151 - if error_count > 0 then begin 152 - printf "\nError details:\n"; 153 - List.iter (fun (handle, result) -> 154 - match result with 155 - | `Error msg -> printf " %s: %s\n" handle msg 156 - | _ -> ()) 157 - results; 158 - end; 159 - 160 - if ok_count > 0 || skipped_count > 0 then 0 else 1 161 - 162 - (* Command line interface *) 163 - 164 - (* Export the term for use in main bushel.ml *) 165 - let term = 166 - Term.( 167 - const (fun base_dir output_dir handle api_key_file base_url -> 168 - try 169 - let api_key = read_api_key api_key_file in 170 - process_contacts base_dir output_dir handle api_key base_url 171 - with e -> 172 - eprintf "Error: %s\n%!" (Printexc.to_string e); 173 - 1 174 - ) $ Bushel_common.base_dir $ Bushel_common.output_dir ~default:"." $ Bushel_common.handle_opt $ 175 - Bushel_common.api_key_file ~default:".photos-api" $ 176 - Bushel_common.url_term ~default:"https://photos.recoil.org" ~doc:"Base URL of the Immich instance") 177 - 178 - let cmd = 179 - let info = Cmd.info "faces" ~doc:"Retrieve face thumbnails for Bushel contacts from Immich" in 180 - Cmd.v info term 181 - 182 - (* Main entry point removed - accessed through bushel_main.ml *)
-77
stack/bushel/bin/bushel_ideas.ml
··· 1 - open Cmdliner 2 - 3 - (** TODO:claude List completed ideas as markdown bullet list *) 4 - let list_ideas_md base_dir = 5 - let ideas_dir = Printf.sprintf "%s/ideas" base_dir in 6 - let contacts_dir = Printf.sprintf "%s/contacts" base_dir in 7 - 8 - if not (Sys.file_exists ideas_dir) then ( 9 - Printf.eprintf "Ideas directory not found: %s\n" ideas_dir; 10 - 1 11 - ) else ( 12 - (* Load all contacts *) 13 - let contacts = 14 - if Sys.file_exists contacts_dir then 15 - Sys.readdir contacts_dir 16 - |> Array.to_list 17 - |> List.filter (String.ends_with ~suffix:".md") 18 - |> List.filter_map (fun contact_file -> 19 - let filepath = Filename.concat contacts_dir contact_file in 20 - try Some (Bushel.Contact.of_md filepath) 21 - with e -> 22 - Printf.eprintf "Error loading contact %s: %s\n" filepath (Printexc.to_string e); 23 - None 24 - ) 25 - else [] 26 - in 27 - 28 - let idea_files = Sys.readdir ideas_dir 29 - |> Array.to_list 30 - |> List.filter (String.ends_with ~suffix:".md") in 31 - let ideas = List.filter_map (fun idea_file -> 32 - let filepath = Filename.concat ideas_dir idea_file in 33 - try 34 - let idea = Bushel.Idea.of_md filepath in 35 - match Bushel.Idea.status idea with 36 - | Bushel.Idea.Completed -> Some idea 37 - | _ -> None 38 - with e -> 39 - Printf.eprintf "Error processing %s: %s\n" filepath (Printexc.to_string e); 40 - None 41 - ) idea_files in 42 - 43 - (* Sort by year descending *) 44 - let sorted_ideas = List.sort (fun a b -> 45 - compare (Bushel.Idea.year b) (Bushel.Idea.year a) 46 - ) ideas in 47 - 48 - (* Output as markdown bullet list *) 49 - List.iter (fun idea -> 50 - let student_names = 51 - Bushel.Idea.students idea 52 - |> List.filter_map (fun handle -> 53 - match Bushel.Contact.find_by_handle contacts handle with 54 - | Some contact -> Some (Bushel.Contact.name contact) 55 - | None -> 56 - Printf.eprintf "Warning: contact not found for handle %s\n" handle; 57 - Some handle 58 - ) 59 - |> String.concat ", " 60 - in 61 - let level_str = Bushel.Idea.level_to_string (Bushel.Idea.level idea) in 62 - Printf.printf "- %d: \"%s\", %s (%s)\n" 63 - (Bushel.Idea.year idea) 64 - (Bushel.Idea.title idea) 65 - student_names 66 - level_str 67 - ) sorted_ideas; 68 - 0 69 - ) 70 - 71 - let term = 72 - Term.(const list_ideas_md $ Bushel_common.base_dir) 73 - 74 - let cmd = 75 - let doc = "List completed ideas as markdown bullet list" in 76 - let info = Cmd.info "ideas-md" ~doc in 77 - Cmd.v info term
-227
stack/bushel/bin/bushel_info.ml
··· 1 - open Cmdliner 2 - open Bushel 3 - 4 - (** Determine the color for a note based on DOI and perma status *) 5 - let note_color n = 6 - match Note.doi n, Note.perma n with 7 - | None, false -> `Red (* No DOI, no perma - red (normal note) *) 8 - | None, true -> `Magenta (* Has perma but no DOI - magenta (needs DOI assignment) *) 9 - | Some _, true -> `Green (* Has DOI with perma:true - green (correct state) *) 10 - | Some _, false -> `Yellow (* Has DOI without perma:true - yellow (bug in metadata) *) 11 - 12 - (** TODO:claude List all slugs with their types *) 13 - let list_all_slugs entries ~notes_only = 14 - let all = Entry.all_entries entries in 15 - (* Filter for notes only if requested *) 16 - let filtered = if notes_only then 17 - List.filter (fun entry -> match entry with `Note _ -> true | _ -> false) all 18 - else all in 19 - (* Sort by slug for consistent output *) 20 - let sorted = List.sort (fun a b -> 21 - String.compare (Entry.slug a) (Entry.slug b) 22 - ) filtered in 23 - Fmt.pr "@[<v>"; 24 - Fmt.pr "%a@," (Fmt.styled `Bold Fmt.string) (if notes_only then "Available notes:" else "Available entries:"); 25 - Fmt.pr "@,"; 26 - List.iter (fun entry -> 27 - let slug = Entry.slug entry in 28 - let type_str = Entry.to_type_string entry in 29 - let title = Entry.title entry in 30 - (* Color code notes based on DOI/perma status *) 31 - match entry with 32 - | `Note n -> 33 - let color = note_color n in 34 - Fmt.pr " %a %a - %a@," 35 - (Fmt.styled color Fmt.string) slug 36 - (Fmt.styled `Faint Fmt.string) (Printf.sprintf "(%s)" type_str) 37 - Fmt.string title 38 - | _ -> 39 - Fmt.pr " %a %a - %a@," 40 - (Fmt.styled `Cyan Fmt.string) slug 41 - (Fmt.styled `Faint Fmt.string) (Printf.sprintf "(%s)" type_str) 42 - Fmt.string title 43 - ) sorted; 44 - Fmt.pr "@]@."; 45 - 0 46 - 47 - (** TODO:claude Main info command implementation *) 48 - let info_cmd base_dir notes_only slug_opt _env _xdg _profile = 49 - let entries = load base_dir in 50 - match slug_opt with 51 - | None -> 52 - list_all_slugs entries ~notes_only 53 - | Some slug -> 54 - (* Handle contact handles starting with @ *) 55 - if String.starts_with ~prefix:"@" slug then 56 - let handle = String.sub slug 1 (String.length slug - 1) in 57 - match Contact.find_by_handle (Entry.contacts entries) handle with 58 - | None -> 59 - Fmt.epr "Error: No contact found with handle '@%s'@." handle; 60 - 1 61 - | Some contact -> 62 - Contact.pp Fmt.stdout contact; 63 - (* Add thumbnail information for contact *) 64 - (match Entry.contact_thumbnail_slug contact with 65 - | Some thumb_slug -> 66 - Fmt.pr "@.@."; 67 - Fmt.pr "@[<v>%a: %s@," (Fmt.styled `Bold Fmt.string) "Thumbnail Slug" thumb_slug; 68 - (* Look up the image in srcsetter *) 69 - (match Entry.lookup_image entries thumb_slug with 70 - | Some img -> 71 - let thumbnail_url = Entry.smallest_webp_variant img in 72 - Fmt.pr "%a: %s@," (Fmt.styled `Bold Fmt.string) "Thumbnail URL" thumbnail_url; 73 - Fmt.pr "%a: %s@," (Fmt.styled `Bold Fmt.string) "Origin" (Srcsetter.origin img); 74 - let (w, h) = Srcsetter.dims img in 75 - Fmt.pr "%a: %dx%d@," (Fmt.styled `Bold Fmt.string) "Dimensions" w h; 76 - let variants = Srcsetter.variants img in 77 - if not (Srcsetter.MS.is_empty variants) then begin 78 - Fmt.pr "%a:@," (Fmt.styled `Bold Fmt.string) "Variants"; 79 - Srcsetter.MS.iter (fun name (vw, vh) -> 80 - Fmt.pr " %s: %dx%d@," name vw vh 81 - ) variants 82 - end; 83 - Fmt.pr "@]" 84 - | None -> 85 - Fmt.epr "Warning: Contact thumbnail image not in srcsetter: %s@." thumb_slug; 86 - Fmt.pr "@]"; 87 - ()) 88 - | None -> ()); 89 - (* Add Typesense JSON *) 90 - let doc = Typesense.contact_to_document contact in 91 - Fmt.pr "@.@."; 92 - Fmt.pr "%a:@," (Fmt.styled `Bold Fmt.string) "Typesense Document"; 93 - Fmt.pr "%s@," (Ezjsonm.value_to_string ~minify:false doc); 94 - (* Add backlinks information for contact *) 95 - let backlinks = Bushel.Link_graph.get_backlinks_for_slug handle in 96 - if backlinks <> [] then begin 97 - Fmt.pr "@.@."; 98 - Fmt.pr "%a (%d):@," (Fmt.styled `Bold Fmt.string) "Backlinks" (List.length backlinks); 99 - List.iter (fun source_slug -> 100 - match Entry.lookup entries source_slug with 101 - | Some source_entry -> 102 - let source_type = Entry.to_type_string source_entry in 103 - let source_title = Entry.title source_entry in 104 - Fmt.pr " %a %a - %a@," 105 - (Fmt.styled `Cyan Fmt.string) source_slug 106 - (Fmt.styled `Faint Fmt.string) (Printf.sprintf "(%s)" source_type) 107 - Fmt.string source_title 108 - | None -> 109 - Fmt.pr " %a %a@," 110 - (Fmt.styled `Cyan Fmt.string) source_slug 111 - (Fmt.styled `Red Fmt.string) "(not found)" 112 - ) backlinks 113 - end; 114 - Fmt.pr "@."; 115 - 0 116 - else 117 - (* Remove leading ':' if present, as slugs are stored without it *) 118 - let normalized_slug = 119 - if String.starts_with ~prefix:":" slug 120 - then String.sub slug 1 (String.length slug - 1) 121 - else slug 122 - in 123 - match Entry.lookup entries normalized_slug with 124 - | None -> 125 - Fmt.epr "Error: No entry found with slug '%s'@." slug; 126 - 1 127 - | Some entry -> 128 - (match entry with 129 - | `Paper p -> Paper.pp Fmt.stdout p 130 - | `Project p -> Project.pp Fmt.stdout p 131 - | `Idea i -> Idea.pp Fmt.stdout i 132 - | `Video v -> Video.pp Fmt.stdout v 133 - | `Note n -> Note.pp Fmt.stdout n); 134 - (* Add thumbnail information if available *) 135 - (match Entry.thumbnail_slug entries entry with 136 - | Some thumb_slug -> 137 - Fmt.pr "@.@."; 138 - Fmt.pr "@[<v>%a: %s@," (Fmt.styled `Bold Fmt.string) "Thumbnail Slug" thumb_slug; 139 - (* Look up the image in srcsetter *) 140 - (match Entry.lookup_image entries thumb_slug with 141 - | Some img -> 142 - let thumbnail_url = Entry.smallest_webp_variant img in 143 - Fmt.pr "%a: %s@," (Fmt.styled `Bold Fmt.string) "Thumbnail URL" thumbnail_url; 144 - Fmt.pr "%a: %s@," (Fmt.styled `Bold Fmt.string) "Origin" (Srcsetter.origin img); 145 - let (w, h) = Srcsetter.dims img in 146 - Fmt.pr "%a: %dx%d@," (Fmt.styled `Bold Fmt.string) "Dimensions" w h; 147 - let variants = Srcsetter.variants img in 148 - if not (Srcsetter.MS.is_empty variants) then begin 149 - Fmt.pr "%a:@," (Fmt.styled `Bold Fmt.string) "Variants"; 150 - Srcsetter.MS.iter (fun name (vw, vh) -> 151 - Fmt.pr " %s: %dx%d@," name vw vh 152 - ) variants 153 - end; 154 - Fmt.pr "@]" 155 - | None -> 156 - Fmt.epr "Warning: Thumbnail image not in srcsetter: %s@." thumb_slug; 157 - Fmt.pr "@]"; 158 - ()) 159 - | None -> ()); 160 - (* Add Typesense JSON *) 161 - let doc = match entry with 162 - | `Paper p -> Typesense.paper_to_document entries p 163 - | `Project p -> Typesense.project_to_document entries p 164 - | `Idea i -> Typesense.idea_to_document entries i 165 - | `Video v -> Typesense.video_to_document entries v 166 - | `Note n -> Typesense.note_to_document entries n 167 - in 168 - Fmt.pr "@.@."; 169 - Fmt.pr "%a:@," (Fmt.styled `Bold Fmt.string) "Typesense Document"; 170 - Fmt.pr "%s@," (Ezjsonm.value_to_string ~minify:false doc); 171 - (* Add backlinks information *) 172 - let backlinks = Bushel.Link_graph.get_backlinks_for_slug normalized_slug in 173 - if backlinks <> [] then begin 174 - Fmt.pr "@.@."; 175 - Fmt.pr "%a (%d):@," (Fmt.styled `Bold Fmt.string) "Backlinks" (List.length backlinks); 176 - List.iter (fun source_slug -> 177 - match Entry.lookup entries source_slug with 178 - | Some source_entry -> 179 - let source_type = Entry.to_type_string source_entry in 180 - let source_title = Entry.title source_entry in 181 - Fmt.pr " %a %a - %a@," 182 - (Fmt.styled `Cyan Fmt.string) source_slug 183 - (Fmt.styled `Faint Fmt.string) (Printf.sprintf "(%s)" source_type) 184 - Fmt.string source_title 185 - | None -> 186 - Fmt.pr " %a %a@," 187 - (Fmt.styled `Cyan Fmt.string) source_slug 188 - (Fmt.styled `Red Fmt.string) "(not found)" 189 - ) backlinks 190 - end; 191 - (* Add references information for notes *) 192 - (match entry with 193 - | `Note n -> 194 - let default_author = match Contact.find_by_handle (Entry.contacts entries) "avsm" with 195 - | Some c -> c 196 - | None -> failwith "Default author 'avsm' not found" 197 - in 198 - let references = Md.note_references entries default_author n in 199 - if references <> [] then begin 200 - Fmt.pr "@.@."; 201 - Fmt.pr "%a (%d):@," (Fmt.styled `Bold Fmt.string) "References" (List.length references); 202 - List.iter (fun (doi, citation, _is_paper) -> 203 - Fmt.pr " %a: %s@," 204 - (Fmt.styled `Cyan Fmt.string) doi 205 - citation 206 - ) references 207 - end 208 - | _ -> ()); 209 - Fmt.pr "@."; 210 - 0 211 - 212 - (** TODO:claude Command line interface definition *) 213 - let notes_only_flag = 214 - let doc = "Show only notes when listing entries" in 215 - Arg.(value & flag & info ["notes-only"; "n"] ~doc) 216 - 217 - let slug_arg = 218 - let doc = "The slug of the entry to display (with or without leading ':'), or contact handle (with '@' prefix). If not provided, lists all available slugs." in 219 - Arg.(value & pos 0 (some string) None & info [] ~docv:"SLUG" ~doc) 220 - 221 - let term : (Eio_unix.Stdenv.base -> Xdge.t -> Keyeio.Profile.t -> int) Cmdliner.Term.t = 222 - Term.(const info_cmd $ Bushel_common.base_dir $ notes_only_flag $ slug_arg) 223 - 224 - let cmd = 225 - let doc = "Display all information for a given slug" in 226 - let info = Cmd.info "info" ~doc in 227 - Cmd.v info term
-549
stack/bushel/bin/bushel_links.ml
··· 1 - open Cmdliner 2 - open Lwt.Infix 3 - 4 - (* Helper function for logging with proper flushing *) 5 - let log fmt = Fmt.kstr (fun s -> prerr_string s; flush stderr) fmt 6 - let log_verbose verbose fmt = 7 - if verbose then Fmt.kstr (fun s -> prerr_string s; flush stderr) fmt 8 - else Fmt.kstr (fun _ -> ()) fmt 9 - 10 - (* Initialize a new links.yml file or ensure it exists *) 11 - let init_links_file links_file = 12 - if Sys.file_exists links_file then 13 - print_endline (Fmt.str "Links file %s already exists" links_file) 14 - else begin 15 - (* Create an empty links file *) 16 - Bushel.Link.save_links_file links_file []; 17 - print_endline (Fmt.str "Created empty links file: %s" links_file) 18 - end; 19 - 0 20 - 21 - (* Update links.yml from Karakeep *) 22 - let update_from_karakeep base_url api_key_opt tag links_file download_assets = 23 - match api_key_opt with 24 - | None -> 25 - prerr_endline "Error: API key is required."; 26 - prerr_endline "Please provide one with --api-key or create a ~/.karakeep-api file."; 27 - 1 28 - | Some api_key -> 29 - let assets_dir = "data/assets" in 30 - 31 - (* Run the Lwt program *) 32 - Lwt_main.run ( 33 - print_endline (Fmt.str "Fetching links from %s with tag '%s'..." base_url tag); 34 - 35 - (* Prepare tag filter *) 36 - let filter_tags = if tag = "" then [] else [tag] in 37 - 38 - (* Fetch bookmarks from Karakeep with error handling *) 39 - Lwt.catch 40 - (fun () -> 41 - Karakeep.fetch_all_bookmarks ~api_key ~filter_tags base_url >>= fun bookmarks -> 42 - 43 - print_endline (Fmt.str "Retrieved %d bookmarks from Karakeep" (List.length bookmarks)); 44 - 45 - (* Read existing links if file exists *) 46 - let existing_links = Bushel.Link.load_links_file links_file in 47 - 48 - (* Convert bookmarks to bushel links *) 49 - let new_links = List.map (fun bookmark -> 50 - Karakeep.to_bushel_link ~base_url bookmark 51 - ) bookmarks in 52 - 53 - (* Merge with existing links - keep existing dates (karakeep dates may be unreliable) *) 54 - let merged_links = Bushel.Link.merge_links existing_links new_links in 55 - 56 - (* Save the updated links file *) 57 - Bushel.Link.save_links_file links_file merged_links; 58 - 59 - print_endline (Fmt.str "Updated %s with %d links" links_file (List.length merged_links)); 60 - 61 - (* Download assets if requested *) 62 - if download_assets then begin 63 - print_endline "Downloading assets for bookmarks..."; 64 - 65 - (* Ensure the assets directory exists *) 66 - (try Unix.mkdir assets_dir 0o755 with Unix.Unix_error (Unix.EEXIST, _, _) -> ()); 67 - 68 - (* Process each bookmark with assets *) 69 - Lwt_list.iter_s (fun bookmark -> 70 - (* Extract asset IDs from bookmark *) 71 - let assets = bookmark.Karakeep.assets in 72 - 73 - (* Skip if no assets *) 74 - if assets = [] then 75 - Lwt.return_unit 76 - else 77 - (* Process each asset *) 78 - Lwt_list.iter_s (fun (asset_id, asset_type) -> 79 - let asset_dir = Fmt.str "%s/%s" assets_dir asset_id in 80 - let asset_file = Fmt.str "%s/asset.bin" asset_dir in 81 - let meta_file = Fmt.str "%s/metadata.json" asset_dir in 82 - 83 - (* Skip if the asset already exists *) 84 - if Sys.file_exists asset_file then 85 - Lwt.return_unit 86 - else begin 87 - (* Create the asset directory *) 88 - (try Unix.mkdir asset_dir 0o755 with Unix.Unix_error (Unix.EEXIST, _, _) -> ()); 89 - 90 - (* Download the asset *) 91 - print_endline (Fmt.str "Downloading %s asset %s..." asset_type asset_id); 92 - Karakeep.fetch_asset ~api_key base_url asset_id >>= fun data -> 93 - 94 - (* Guess content type based on first bytes *) 95 - let content_type = 96 - if String.length data >= 4 && String.sub data 0 4 = "\x89PNG" then 97 - "image/png" 98 - else if String.length data >= 3 && String.sub data 0 3 = "\xFF\xD8\xFF" then 99 - "image/jpeg" 100 - else if String.length data >= 4 && String.sub data 0 4 = "%PDF" then 101 - "application/pdf" 102 - else 103 - "application/octet-stream" 104 - in 105 - 106 - (* Write the asset data *) 107 - Lwt_io.with_file ~mode:Lwt_io.Output asset_file (fun oc -> 108 - Lwt_io.write oc data 109 - ) >>= fun () -> 110 - 111 - (* Write metadata file *) 112 - let metadata = Fmt.str "{\n \"contentType\": \"%s\",\n \"assetType\": \"%s\"\n}" 113 - content_type asset_type in 114 - Lwt_io.with_file ~mode:Lwt_io.Output meta_file (fun oc -> 115 - Lwt_io.write oc metadata 116 - ) 117 - end 118 - ) assets 119 - ) bookmarks >>= fun () -> 120 - 121 - print_endline "Asset download completed."; 122 - Lwt.return 0 123 - end else 124 - Lwt.return 0 125 - ) 126 - (fun exn -> 127 - prerr_endline (Fmt.str "Error fetching bookmarks: %s" (Printexc.to_string exn)); 128 - Lwt.return 1 129 - ) 130 - ) 131 - 132 - (* Extract outgoing links from Bushel entries *) 133 - let update_from_bushel bushel_dir links_file include_domains exclude_domains = 134 - (* Parse domain filters if provided *) 135 - let include_domains_list = match include_domains with 136 - | None -> [] 137 - | Some s -> String.split_on_char ',' s |> List.map String.trim 138 - in 139 - 140 - let exclude_domains_list = match exclude_domains with 141 - | None -> [] 142 - | Some s -> String.split_on_char ',' s |> List.map String.trim 143 - in 144 - 145 - (* Show filter settings if any *) 146 - if include_domains_list <> [] then 147 - print_endline (Fmt.str "Including only domains: %s" (String.concat ", " include_domains_list)); 148 - 149 - if exclude_domains_list <> [] then 150 - print_endline (Fmt.str "Excluding domains: %s" (String.concat ", " exclude_domains_list)); 151 - 152 - (* Load all entries from the bushel directory *) 153 - let notes_dir = Filename.concat bushel_dir "data/notes" in 154 - 155 - (* Make sure the notes directory exists *) 156 - if not (Sys.file_exists notes_dir) then begin 157 - prerr_endline (Fmt.str "Error: Notes directory %s does not exist" notes_dir); 158 - exit 1 159 - end; 160 - 161 - (* Load all entries with fallback *) 162 - print_endline (Fmt.str "Loading entries from %s..." bushel_dir); 163 - 164 - let entries_data = Bushel.load bushel_dir in 165 - let all_entries = Bushel.Entry.all_entries entries_data in 166 - print_endline (Fmt.str "Loaded %d entries" (List.length all_entries)); 167 - 168 - (* Extract outgoing links from all entries *) 169 - print_endline "Extracting outgoing links..."; 170 - let extracted_links = ref [] in 171 - 172 - (* Process each entry *) 173 - List.iter (fun entry -> 174 - let entry_body = Bushel.Entry.body entry in 175 - let entry_slug = Bushel.Entry.slug entry in 176 - 177 - (* Skip empty bodies *) 178 - if entry_body <> "" then begin 179 - let links = Bushel.Entry.extract_external_links entry_body in 180 - if links <> [] then begin 181 - (* Add each link from this entry *) 182 - List.iter (fun url -> 183 - (* Try to extract domain from URL *) 184 - let domain = 185 - try 186 - let uri = Uri.of_string url in 187 - match Uri.host uri with 188 - | Some host -> host 189 - | None -> "unknown" 190 - with _ -> "unknown" 191 - in 192 - 193 - (* Filter by domain if filters are specified *) 194 - let include_by_domain = 195 - if include_domains_list <> [] then 196 - List.exists (fun filter -> 197 - domain = filter || String.ends_with ~suffix:filter domain 198 - ) include_domains_list 199 - else true 200 - in 201 - 202 - let exclude_by_domain = 203 - List.exists (fun filter -> 204 - domain = filter || String.ends_with ~suffix:filter domain 205 - ) exclude_domains_list 206 - in 207 - 208 - if include_by_domain && not exclude_by_domain then begin 209 - let date = Bushel.Entry.date entry in 210 - 211 - (* Extract tags from the entry *) 212 - let entry_tags = Bushel.Tags.tags_of_ent entries_data entry in 213 - let tag_strings = List.map Bushel.Tags.to_string entry_tags in 214 - 215 - let link = { 216 - Bushel.Link.url; 217 - date; 218 - description = ""; 219 - karakeep = None; 220 - bushel = Some { 221 - Bushel.Link.slugs = [entry_slug]; 222 - tags = tag_strings 223 - }; 224 - } in 225 - extracted_links := link :: !extracted_links 226 - end 227 - ) links 228 - end 229 - end 230 - ) all_entries; 231 - 232 - (* Load existing links *) 233 - let existing_links = Bushel.Link.load_links_file links_file in 234 - 235 - (* Merge with existing links - prefer bushel entry dates *) 236 - let merged_links = Bushel.Link.merge_links ~prefer_new_date:true existing_links !extracted_links in 237 - 238 - (* Save the updated links file *) 239 - Bushel.Link.save_links_file links_file merged_links; 240 - 241 - print_endline (Fmt.str "Added %d extracted links from Bushel to %s" 242 - (List.length !extracted_links) links_file); 243 - print_endline (Fmt.str "Total links in file: %d" (List.length merged_links)); 244 - 0 245 - 246 - (* Helper function to filter links that don't have karakeep data for a specific remote *) 247 - let filter_links_without_karakeep base_url links = 248 - List.filter (fun link -> 249 - match link.Bushel.Link.karakeep with 250 - | Some { remote_url; _ } when remote_url = base_url -> false 251 - | _ -> true 252 - ) links 253 - 254 - (* Helper function to apply limit to links if specified *) 255 - let apply_limit_to_links limit links = 256 - match limit with 257 - | Some n when n > 0 -> 258 - let rec take_n acc count = function 259 - | [] -> List.rev acc 260 - | _ when count = 0 -> List.rev acc 261 - | x :: xs -> take_n (x :: acc) (count - 1) xs 262 - in 263 - let limited = take_n [] n links in 264 - if List.length links > n then 265 - log "Limited to first %d links (out of %d available)\n" n (List.length links); 266 - limited 267 - | _ -> links 268 - 269 - (* Helper function to prepare tags for a link *) 270 - let prepare_tags_for_link tag link = 271 - let slug_tags = 272 - match link.Bushel.Link.bushel with 273 - | Some { slugs; _ } -> List.map (fun slug -> "bushel:" ^ slug) slugs 274 - | None -> [] 275 - in 276 - if tag = "" then slug_tags 277 - else tag :: slug_tags 278 - 279 - (* Helper function to create batches for parallel processing *) 280 - let create_batches max_concurrent links = 281 - let rec create_batches_aux links acc = 282 - match links with 283 - | [] -> List.rev acc 284 - | _ -> 285 - let batch, rest = 286 - if List.length links <= max_concurrent then 287 - links, [] 288 - else 289 - let rec take n lst batch = 290 - if n = 0 || lst = [] then List.rev batch, lst 291 - else take (n-1) (List.tl lst) (List.hd lst :: batch) 292 - in 293 - take max_concurrent links [] 294 - in 295 - create_batches_aux rest (batch :: acc) 296 - in 297 - create_batches_aux links [] 298 - 299 - (* Helper function to upload a single link to Karakeep *) 300 - let upload_single_link api_key base_url tag verbose updated_links link = 301 - let url = link.Bushel.Link.url in 302 - let title = 303 - if link.Bushel.Link.description <> "" then 304 - Some link.Bushel.Link.description 305 - else None 306 - in 307 - let tags = prepare_tags_for_link tag link in 308 - 309 - if verbose then begin 310 - log " Uploading: %s\n" url; 311 - if tags <> [] then 312 - log " Tags: %s\n" (String.concat ", " tags); 313 - if title <> None then 314 - log " Title: %s\n" (Option.get title); 315 - end else begin 316 - log "Uploading: %s\n" url; 317 - end; 318 - 319 - (* Create the bookmark with tags *) 320 - Lwt.catch 321 - (fun () -> 322 - Karakeep.create_bookmark 323 - ~api_key 324 - ~url 325 - ?title 326 - ~tags 327 - base_url 328 - >>= fun bookmark -> 329 - 330 - (* Create updated link with karakeep data *) 331 - let updated_link = { 332 - link with 333 - Bushel.Link.karakeep = 334 - Some { 335 - Bushel.Link.remote_url = base_url; 336 - id = bookmark.id; 337 - tags = bookmark.tags; 338 - metadata = []; (* Will be populated on next sync *) 339 - } 340 - } in 341 - updated_links := updated_link :: !updated_links; 342 - 343 - if verbose then 344 - log " ✓ Added to Karakeep with ID: %s\n" bookmark.id 345 - else 346 - log " - Added to Karakeep with ID: %s\n" bookmark.id; 347 - Lwt.return 1 (* Success *) 348 - ) 349 - (fun exn -> 350 - if verbose then 351 - log " ✗ Error uploading %s: %s\n" url (Printexc.to_string exn) 352 - else 353 - log " - Error uploading %s: %s\n" url (Printexc.to_string exn); 354 - Lwt.return 0 (* Failure *) 355 - ) 356 - 357 - (* Helper function to process a batch of links *) 358 - let process_batch api_key base_url tag verbose updated_links batch_num total_batches batch = 359 - log_verbose verbose "\nProcessing batch %d/%d (%d links)...\n" 360 - (batch_num + 1) total_batches (List.length batch); 361 - 362 - (* Process links in this batch concurrently *) 363 - Lwt_list.map_p (upload_single_link api_key base_url tag verbose updated_links) batch 364 - 365 - (* Helper function to update links file with new karakeep data *) 366 - let update_links_file links_file original_links updated_links = 367 - if !updated_links <> [] then begin 368 - (* Replace the updated links in the original list *) 369 - let final_links = 370 - List.map (fun link -> 371 - let url = link.Bushel.Link.url in 372 - let updated = List.find_opt (fun ul -> ul.Bushel.Link.url = url) !updated_links in 373 - match updated with 374 - | Some ul -> ul 375 - | None -> link 376 - ) original_links 377 - in 378 - 379 - (* Save the updated links file *) 380 - Bushel.Link.save_links_file links_file final_links; 381 - 382 - log "Updated %s with %d new karakeep_ids\n" 383 - links_file (List.length !updated_links); 384 - end 385 - 386 - (* Upload links to Karakeep that don't already have karakeep data *) 387 - let upload_to_karakeep base_url api_key_opt links_file tag max_concurrent delay_seconds limit verbose = 388 - match api_key_opt with 389 - | None -> 390 - log "Error: API key is required.\n"; 391 - log "Please provide one with --api-key or create a ~/.karakeep-api file.\n"; 392 - 1 393 - | Some api_key -> 394 - (* Load links from file *) 395 - log_verbose verbose "Loading links from %s...\n" links_file; 396 - let links = Bushel.Link.load_links_file links_file in 397 - log_verbose verbose "Loaded %d total links\n" (List.length links); 398 - 399 - (* Filter links that don't have karakeep data for this remote *) 400 - log_verbose verbose "Filtering links that don't have karakeep data for %s...\n" base_url; 401 - let filtered_links = filter_links_without_karakeep base_url links in 402 - log_verbose verbose "Found %d links without karakeep data\n" (List.length filtered_links); 403 - 404 - (* Apply limit if specified *) 405 - let links_to_upload = apply_limit_to_links limit filtered_links in 406 - 407 - if links_to_upload = [] then begin 408 - log "No links to upload to %s (all links already have karakeep data)\n" base_url; 409 - 0 410 - end else begin 411 - log "Found %d links to upload to %s\n" (List.length links_to_upload) base_url; 412 - 413 - (* Split links into batches for parallel processing *) 414 - let batches = create_batches max_concurrent links_to_upload in 415 - log_verbose verbose "Processing in %d batches of up to %d links each...\n" 416 - (List.length batches) max_concurrent; 417 - log_verbose verbose "Delay between batches: %.1f seconds\n" delay_seconds; 418 - 419 - (* Process batches and accumulate updated links *) 420 - let updated_links = ref [] in 421 - 422 - let result = Lwt_main.run ( 423 - Lwt.catch 424 - (fun () -> 425 - Lwt_list.fold_left_s (fun (total_count, batch_num) batch -> 426 - process_batch api_key base_url tag verbose updated_links 427 - batch_num (List.length batches) batch >>= fun results -> 428 - 429 - (* Count successes in this batch *) 430 - let batch_successes = List.fold_left (+) 0 results in 431 - let new_total = total_count + batch_successes in 432 - 433 - log_verbose verbose " Batch %d complete: %d/%d successful (Total: %d/%d)\n" 434 - (batch_num + 1) batch_successes (List.length batch) new_total (new_total + (List.length links_to_upload - new_total)); 435 - 436 - (* Add a delay before processing the next batch *) 437 - if batch_num + 1 < List.length batches then begin 438 - log_verbose verbose " Waiting %.1f seconds before next batch...\n" delay_seconds; 439 - Lwt_unix.sleep delay_seconds >>= fun () -> 440 - Lwt.return (new_total, batch_num + 1) 441 - end else 442 - Lwt.return (new_total, batch_num + 1) 443 - ) (0, 0) batches >>= fun (final_count, _) -> 444 - Lwt.return final_count 445 - ) 446 - (fun exn -> 447 - log "Error during upload operation: %s\n" (Printexc.to_string exn); 448 - Lwt.return 0 449 - ) 450 - ) in 451 - 452 - (* Update the links file with the new karakeep_ids *) 453 - update_links_file links_file links updated_links; 454 - 455 - log "Upload complete. %d/%d links uploaded successfully.\n" 456 - result (List.length links_to_upload); 457 - 458 - 0 459 - end 460 - 461 - (* Common arguments *) 462 - let links_file_arg = 463 - let doc = "Links YAML file. Defaults to links.yml." in 464 - Arg.(value & opt string "links.yml" & info ["file"; "f"] ~doc ~docv:"FILE") 465 - 466 - let base_url_arg = 467 - let doc = "Base URL of the Karakeep instance" in 468 - let default = "https://hoard.recoil.org" in 469 - Arg.(value & opt string default & info ["url"] ~doc ~docv:"URL") 470 - 471 - let api_key_arg = 472 - let doc = "API key for Karakeep authentication (ak1_<key_id>_<secret>)" in 473 - let get_api_key () = 474 - let home = try Sys.getenv "HOME" with Not_found -> "." in 475 - let key_path = Filename.concat home ".karakeep-api" in 476 - try 477 - let ic = open_in key_path in 478 - let key = input_line ic in 479 - close_in ic; 480 - Some (String.trim key) 481 - with _ -> None 482 - in 483 - Arg.(value & opt (some string) (get_api_key ()) & info ["api-key"] ~doc ~docv:"API_KEY") 484 - 485 - let tag_arg = 486 - let doc = "Tag to filter or apply to bookmarks" in 487 - Arg.(value & opt string "" & info ["tag"; "t"] ~doc ~docv:"TAG") 488 - 489 - let download_assets_arg = 490 - let doc = "Download assets (screenshots, etc.) from Karakeep" in 491 - Arg.(value & flag & info ["download-assets"; "d"] ~doc) 492 - 493 - let base_dir_arg = 494 - let doc = "Base directory of the Bushel project" in 495 - Arg.(value & opt string "." & info ["dir"; "d"] ~doc ~docv:"DIR") 496 - 497 - let include_domains_arg = 498 - let doc = "Only include links to these domains (comma-separated list)" in 499 - Arg.(value & opt (some string) None & info ["include"] ~doc ~docv:"DOMAINS") 500 - 501 - let exclude_domains_arg = 502 - let doc = "Exclude links to these domains (comma-separated list)" in 503 - Arg.(value & opt (some string) None & info ["exclude"] ~doc ~docv:"DOMAINS") 504 - 505 - let concurrent_arg = 506 - let doc = "Maximum number of concurrent uploads (default: 5)" in 507 - Arg.(value & opt int 5 & info ["concurrent"; "c"] ~doc ~docv:"NUM") 508 - 509 - let delay_arg = 510 - let doc = "Delay in seconds between batches (default: 1.0)" in 511 - Arg.(value & opt float 1.0 & info ["delay"] ~doc ~docv:"SECONDS") 512 - 513 - let limit_arg = 514 - let doc = "Limit number of links to upload (for testing)" in 515 - Arg.(value & opt (some int) None & info ["limit"; "l"] ~doc ~docv:"NUM") 516 - 517 - let verbose_arg = 518 - let doc = "Show detailed progress information during upload" in 519 - Arg.(value & flag & info ["verbose"; "v"] ~doc) 520 - 521 - (* Command definitions *) 522 - let init_cmd = 523 - let doc = "Initialize a new links.yml file" in 524 - let info = Cmd.info "init" ~doc in 525 - Cmd.v info Term.(const init_links_file $ links_file_arg) 526 - 527 - let karakeep_cmd = 528 - let doc = "Update links.yml with links from Karakeep" in 529 - let info = Cmd.info "karakeep" ~doc in 530 - Cmd.v info Term.(const update_from_karakeep $ base_url_arg $ api_key_arg $ tag_arg $ links_file_arg $ download_assets_arg) 531 - 532 - let bushel_cmd = 533 - let doc = "Update links.yml with outgoing links from Bushel entries" in 534 - let info = Cmd.info "bushel" ~doc in 535 - Cmd.v info Term.(const update_from_bushel $ base_dir_arg $ links_file_arg $ include_domains_arg $ exclude_domains_arg) 536 - 537 - let upload_cmd = 538 - let doc = "Upload links without karakeep data to Karakeep" in 539 - let info = Cmd.info "upload" ~doc in 540 - Cmd.v info Term.(const upload_to_karakeep $ base_url_arg $ api_key_arg $ links_file_arg $ tag_arg $ concurrent_arg $ delay_arg $ limit_arg $ verbose_arg) 541 - 542 - (* Export the term and cmd for use in main bushel.ml *) 543 - let cmd = 544 - let doc = "Manage links between Bushel and Karakeep" in 545 - let info = Cmd.info "links" ~doc in 546 - Cmd.group info [init_cmd; karakeep_cmd; bushel_cmd; upload_cmd] 547 - 548 - (* For standalone execution *) 549 - (* Main entry point removed - accessed through bushel_main.ml *)
-119
stack/bushel/bin/bushel_main.ml
··· 1 - open Cmdliner 2 - 3 - let version = "0.1.0" 4 - 5 - (* Import actual command implementations from submodules *) 6 - 7 - (* Obsidian command - no API keys needed *) 8 - let obsidian_cmd = 9 - let doc = "Convert Bushel entries to Obsidian format" in 10 - Eiocmd.run 11 - ~use_keyeio:false 12 - ~info:(Cmd.info "obsidian" ~version ~doc) 13 - ~app_name:"bushel" 14 - ~service:"bushel" 15 - Bushel_obsidian.term 16 - 17 - (* Paper classify command *) 18 - let paper_classify_cmd = Bushel_paper_classify.cmd 19 - 20 - (* Paper tex command *) 21 - let paper_tex_cmd = Bushel_paper_tex.cmd 22 - 23 - (* Thumbs command - no API keys needed *) 24 - let thumbs_cmd = 25 - let doc = "Generate thumbnails from paper PDFs" in 26 - Eiocmd.run 27 - ~use_keyeio:false 28 - ~info:(Cmd.info "thumbs" ~version ~doc) 29 - ~app_name:"bushel" 30 - ~service:"bushel" 31 - Bushel_thumbs.term 32 - 33 - (* Query command - needs Typesense API key *) 34 - let query_cmd = 35 - let doc = "Query Bushel collections using multisearch" in 36 - Eiocmd.run 37 - ~use_keyeio:true 38 - ~info:(Cmd.info "query" ~version ~doc) 39 - ~app_name:"bushel" 40 - ~service:"bushel" 41 - Bushel_search.term 42 - 43 - (* Bibtex command - no API keys needed *) 44 - let bibtex_cmd = 45 - let doc = "Export bibtex for all papers" in 46 - Eiocmd.run 47 - ~use_keyeio:false 48 - ~info:(Cmd.info "bibtex" ~version ~doc) 49 - ~app_name:"bushel" 50 - ~service:"bushel" 51 - Bushel_bibtex.term 52 - 53 - (* Ideas command *) 54 - let ideas_cmd = Bushel_ideas.cmd 55 - 56 - (* Info command - no API keys needed *) 57 - let info_cmd = 58 - let doc = "Display all information for a given slug" in 59 - Eiocmd.run 60 - ~use_keyeio:false 61 - ~info:(Cmd.info "info" ~version ~doc) 62 - ~app_name:"bushel" 63 - ~service:"bushel" 64 - Bushel_info.term 65 - 66 - (* Missing command - no API keys needed *) 67 - let missing_cmd = 68 - let doc = "Check for missing metadata in entries" in 69 - Eiocmd.run 70 - ~use_keyeio:false 71 - ~info:(Cmd.info "missing" ~version ~doc) 72 - ~app_name:"bushel" 73 - ~service:"bushel" 74 - Bushel_missing.term 75 - 76 - (* Note DOI command - no API keys needed *) 77 - let note_doi_cmd = 78 - let doc = "Assign DOIs to notes with perma:true" in 79 - Eiocmd.run 80 - ~use_keyeio:false 81 - ~info:(Cmd.info "note-doi" ~version ~doc) 82 - ~app_name:"bushel" 83 - ~service:"bushel" 84 - Bushel_note_doi.term 85 - 86 - (* Main command *) 87 - let bushel_cmd = 88 - let doc = "Bushel content management toolkit" in 89 - let sdocs = Manpage.s_common_options in 90 - let man = [ 91 - `S Manpage.s_description; 92 - `P "$(tname) is a unified command-line tool for managing various types of \ 93 - content in the Bushel system, including papers, videos, links, and more."; 94 - `P "$(tname) provides unified access to all Bushel functionality through \ 95 - integrated subcommands."; 96 - `S Manpage.s_commands; 97 - `S Manpage.s_common_options; 98 - `S "ENVIRONMENT"; 99 - `P "BUSHEL_CONFIG - Path to configuration file with default settings"; 100 - `S Manpage.s_authors; 101 - `P "Anil Madhavapeddy"; 102 - `S Manpage.s_bugs; 103 - `P "Report bugs at https://github.com/avsm/bushel/issues"; 104 - ] in 105 - let info = Cmd.info "bushel" ~version ~doc ~sdocs ~man in 106 - Cmd.group info [ 107 - bibtex_cmd; 108 - ideas_cmd; 109 - info_cmd; 110 - missing_cmd; 111 - note_doi_cmd; 112 - obsidian_cmd; 113 - paper_classify_cmd; 114 - paper_tex_cmd; 115 - query_cmd; 116 - thumbs_cmd; 117 - ] 118 - 119 - let () = exit (Cmd.eval' bushel_cmd)
-186
stack/bushel/bin/bushel_missing.ml
··· 1 - open Cmdliner 2 - open Bushel 3 - 4 - (** Check if an entry has a thumbnail *) 5 - let has_thumbnail entries entry = 6 - match Entry.thumbnail_slug entries entry with 7 - | Some _ -> true 8 - | None -> false 9 - 10 - (** Check if an entry has a synopsis or description *) 11 - let has_synopsis = function 12 - | `Paper p -> Paper.abstract p <> "" (* Papers have abstracts *) 13 - | `Note n -> Note.synopsis n <> None (* Notes have optional synopsis *) 14 - | `Idea _ -> true (* Ideas don't have synopsis field *) 15 - | `Project _ -> true (* Projects don't have synopsis field *) 16 - | `Video _ -> true (* Videos don't have synopsis field *) 17 - 18 - (** Check if an entry has tags *) 19 - let has_tags = function 20 - | `Paper p -> Paper.tags p <> [] 21 - | `Note n -> Note.tags n <> [] 22 - | `Idea i -> i.Idea.tags <> [] (* Access record field directly *) 23 - | `Project p -> Project.tags p <> [] 24 - | `Video v -> v.Video.tags <> [] (* Access record field directly *) 25 - 26 - (** Entry with broken references *) 27 - type entry_with_broken_refs = { 28 - entry : Entry.entry; 29 - broken_slugs : string list; 30 - broken_contacts : string list; 31 - } 32 - 33 - (** Find entries missing thumbnails *) 34 - let find_missing_thumbnails entries = 35 - let all = Entry.all_entries entries in 36 - List.filter (fun entry -> not (has_thumbnail entries entry)) all 37 - 38 - (** Find entries missing synopsis *) 39 - let find_missing_synopsis entries = 40 - let all = Entry.all_entries entries in 41 - List.filter (fun entry -> not (has_synopsis entry)) all 42 - 43 - (** Find entries missing tags *) 44 - let find_missing_tags entries = 45 - let all = Entry.all_entries entries in 46 - List.filter (fun entry -> not (has_tags entry)) all 47 - 48 - (** Find entries with broken slugs or contact handles *) 49 - let find_broken_references entries = 50 - let all = Entry.all_entries entries in 51 - List.filter_map (fun entry -> 52 - let body = Entry.body entry in 53 - let broken_slugs, broken_contacts = Md.validate_references entries body in 54 - if broken_slugs <> [] || broken_contacts <> [] then 55 - Some { entry; broken_slugs; broken_contacts } 56 - else 57 - None 58 - ) all 59 - 60 - (** Print a list of entries *) 61 - let print_entries title entries_list = 62 - if entries_list <> [] then begin 63 - Fmt.pr "@.%a (%d):@," (Fmt.styled `Bold Fmt.string) title (List.length entries_list); 64 - List.iter (fun entry -> 65 - let slug = Entry.slug entry in 66 - let type_str = Entry.to_type_string entry in 67 - let title = Entry.title entry in 68 - Fmt.pr " %a %a - %a@," 69 - (Fmt.styled `Cyan Fmt.string) slug 70 - (Fmt.styled `Faint Fmt.string) (Printf.sprintf "(%s)" type_str) 71 - Fmt.string title 72 - ) entries_list 73 - end 74 - 75 - (** Print entries with broken references *) 76 - let print_broken_references title entries_with_broken_refs = 77 - if entries_with_broken_refs <> [] then begin 78 - Fmt.pr "@.%a (%d):@," (Fmt.styled `Bold Fmt.string) title (List.length entries_with_broken_refs); 79 - List.iter (fun { entry; broken_slugs; broken_contacts } -> 80 - let slug = Entry.slug entry in 81 - let type_str = Entry.to_type_string entry in 82 - let entry_title = Entry.title entry in 83 - Fmt.pr " %a %a - %a@," 84 - (Fmt.styled `Cyan Fmt.string) slug 85 - (Fmt.styled `Faint Fmt.string) (Printf.sprintf "(%s)" type_str) 86 - Fmt.string entry_title; 87 - if broken_slugs <> [] then 88 - Fmt.pr " %a %a@," 89 - (Fmt.styled `Red Fmt.string) "Broken slugs:" 90 - (Fmt.list ~sep:Fmt.comma Fmt.string) broken_slugs; 91 - if broken_contacts <> [] then 92 - Fmt.pr " %a %a@," 93 - (Fmt.styled `Red Fmt.string) "Broken contacts:" 94 - (Fmt.list ~sep:Fmt.comma Fmt.string) broken_contacts; 95 - ) entries_with_broken_refs 96 - end 97 - 98 - (** Main missing command implementation *) 99 - let missing_cmd base_dir check_thumbnails check_synopsis check_tags check_refs _env _xdg _profile = 100 - let entries = load base_dir in 101 - 102 - let count = ref 0 in 103 - 104 - if check_thumbnails then begin 105 - let missing = find_missing_thumbnails entries in 106 - print_entries "Entries missing thumbnails" missing; 107 - count := !count + List.length missing 108 - end; 109 - 110 - if check_synopsis then begin 111 - let missing = find_missing_synopsis entries in 112 - print_entries "Entries missing synopsis" missing; 113 - count := !count + List.length missing 114 - end; 115 - 116 - if check_tags then begin 117 - let missing = find_missing_tags entries in 118 - print_entries "Entries missing tags" missing; 119 - count := !count + List.length missing 120 - end; 121 - 122 - if check_refs then begin 123 - let broken = find_broken_references entries in 124 - print_broken_references "Entries with broken references" broken; 125 - (* Count total number of broken references, not just entries *) 126 - let broken_count = List.fold_left (fun acc { broken_slugs; broken_contacts; _ } -> 127 - acc + List.length broken_slugs + List.length broken_contacts 128 - ) 0 broken in 129 - count := !count + broken_count 130 - end; 131 - 132 - if !count = 0 then 133 - Fmt.pr "@.No missing metadata or broken references found.@." 134 - else 135 - Fmt.pr "@.Total issues found: %d@." !count; 136 - 137 - 0 138 - 139 - (** Command line arguments *) 140 - let thumbnails_flag = 141 - let doc = "Check for entries missing thumbnails" in 142 - Arg.(value & flag & info ["thumbnails"; "t"] ~doc) 143 - 144 - let synopsis_flag = 145 - let doc = "Check for entries missing synopsis" in 146 - Arg.(value & flag & info ["synopsis"; "s"] ~doc) 147 - 148 - let tags_flag = 149 - let doc = "Check for entries missing tags" in 150 - Arg.(value & flag & info ["tags"; "g"] ~doc) 151 - 152 - let refs_flag = 153 - let doc = "Check for broken slugs and contact handles" in 154 - Arg.(value & flag & info ["refs"; "r"] ~doc) 155 - 156 - let term : (Eio_unix.Stdenv.base -> Xdge.t -> Keyeio.Profile.t -> int) Cmdliner.Term.t = 157 - Term.(const (fun base thumbnails synopsis tags refs env xdg profile -> 158 - (* If no flags specified, check everything *) 159 - let check_all = not (thumbnails || synopsis || tags || refs) in 160 - missing_cmd base 161 - (check_all || thumbnails) 162 - (check_all || synopsis) 163 - (check_all || tags) 164 - (check_all || refs) 165 - env xdg profile 166 - ) $ Bushel_common.base_dir $ thumbnails_flag $ synopsis_flag $ tags_flag $ refs_flag) 167 - 168 - let cmd = 169 - let doc = "List entries with missing metadata or broken references" in 170 - let man = [ 171 - `S Manpage.s_description; 172 - `P "This command scans all entries and reports any that are missing thumbnails, synopsis, tags, or have broken slugs/contact handles."; 173 - `P "By default, all checks are performed. Use flags to select specific checks."; 174 - `S Manpage.s_options; 175 - `S Manpage.s_examples; 176 - `P "Check for all issues:"; 177 - `Pre " $(mname) $(tname)"; 178 - `P "Check only for missing thumbnails:"; 179 - `Pre " $(mname) $(tname) --thumbnails"; 180 - `P "Check for missing synopsis and tags:"; 181 - `Pre " $(mname) $(tname) --synopsis --tags"; 182 - `P "Check only for broken references:"; 183 - `Pre " $(mname) $(tname) --refs"; 184 - ] in 185 - let info = Cmd.info "missing" ~doc ~man in 186 - Cmd.v info term
-131
stack/bushel/bin/bushel_note_doi.ml
··· 1 - open Cmdliner 2 - open Bushel 3 - 4 - (** Generate a roguedoi identifier using Crockford base32 encoding *) 5 - let generate_roguedoi () = 6 - Random.self_init (); 7 - (* Generate a 10-character roguedoi with checksum and split every 5 chars *) 8 - let id = Crockford.generate ~length:10 ~split_every:5 ~checksum:true () in 9 - Printf.sprintf "10.59999/%s" id 10 - 11 - (** Add DOI to a specific note's frontmatter if it doesn't already have one *) 12 - let add_doi_to_note note_path = 13 - let content = In_channel.with_open_bin note_path In_channel.input_all in 14 - (* Check if note already has a doi: field *) 15 - let has_doi = try 16 - let _ = String.index content 'd' in 17 - let re = Str.regexp "^doi:" in 18 - let lines = String.split_on_char '\n' content in 19 - List.exists (fun line -> Str.string_match re (String.trim line) 0) lines 20 - with Not_found -> false 21 - in 22 - if has_doi then begin 23 - Fmt.pr "%a: Note already has a DOI, skipping@." 24 - (Fmt.styled `Yellow Fmt.string) note_path; 25 - false 26 - end else begin 27 - let roguedoi = generate_roguedoi () in 28 - (* Parse the file to extract frontmatter *) 29 - match String.split_on_char '\n' content with 30 - | "---" :: rest -> 31 - (* Find the end of frontmatter *) 32 - let rec find_end_fm acc = function 33 - | [] -> None 34 - | "---" :: body_lines -> Some (List.rev acc, body_lines) 35 - | line :: lines -> find_end_fm (line :: acc) lines 36 - in 37 - (match find_end_fm [] rest with 38 - | Some (fm_lines, body_lines) -> 39 - (* Add doi field to frontmatter *) 40 - let new_fm = fm_lines @ [Printf.sprintf "doi: %s" roguedoi] in 41 - let new_content = 42 - String.concat "\n" (["---"] @ new_fm @ ["---"] @ body_lines) 43 - in 44 - Out_channel.with_open_bin note_path (fun oc -> 45 - Out_channel.output_string oc new_content 46 - ); 47 - Fmt.pr "%a: Added DOI %a@." 48 - (Fmt.styled `Green Fmt.string) note_path 49 - (Fmt.styled `Cyan Fmt.string) roguedoi; 50 - true 51 - | None -> 52 - Fmt.epr "%a: Could not parse frontmatter@." 53 - (Fmt.styled `Red Fmt.string) note_path; 54 - false) 55 - | _ -> 56 - Fmt.epr "%a: No frontmatter found@." 57 - (Fmt.styled `Red Fmt.string) note_path; 58 - false 59 - end 60 - 61 - (** Main command implementation *) 62 - let note_doi_cmd base_dir dry_run _env _xdg _profile = 63 - let entries = load base_dir in 64 - let notes = Entry.notes entries in 65 - 66 - (* Filter for perma notes without DOI *) 67 - let perma_notes = List.filter (fun n -> 68 - Note.perma n && Option.is_none (Note.doi n) 69 - ) notes in 70 - 71 - if perma_notes = [] then begin 72 - Fmt.pr "No permanent notes without DOI found.@."; 73 - 0 74 - end else begin 75 - Fmt.pr "@[<v>"; 76 - Fmt.pr "%a: Found %d permanent notes without DOI@.@." 77 - (Fmt.styled `Bold Fmt.string) "Info" 78 - (List.length perma_notes); 79 - 80 - let count = ref 0 in 81 - List.iter (fun note -> 82 - let slug = Note.slug note in 83 - let note_path = Printf.sprintf "%s/data/notes/%s.md" base_dir slug in 84 - Fmt.pr "Processing %a (%a)...@," 85 - (Fmt.styled `Cyan Fmt.string) slug 86 - (Fmt.styled `Faint Fmt.string) (Note.title note); 87 - 88 - if not dry_run then begin 89 - if add_doi_to_note note_path then 90 - incr count 91 - end else begin 92 - let roguedoi = generate_roguedoi () in 93 - Fmt.pr " Would add DOI: %a@," 94 - (Fmt.styled `Cyan Fmt.string) roguedoi; 95 - incr count 96 - end 97 - ) perma_notes; 98 - 99 - Fmt.pr "@."; 100 - if dry_run then 101 - Fmt.pr "%a: Would add DOI to %d notes (dry run)@." 102 - (Fmt.styled `Bold Fmt.string) "Summary" 103 - !count 104 - else 105 - Fmt.pr "%a: Added DOI to %d notes@." 106 - (Fmt.styled `Bold Fmt.string) "Summary" 107 - !count; 108 - Fmt.pr "@]@."; 109 - 0 110 - end 111 - 112 - (** Command line interface definition *) 113 - let dry_run_flag = 114 - let doc = "Show what would be done without making changes" in 115 - Arg.(value & flag & info ["n"; "dry-run"] ~doc) 116 - 117 - let term : (Eio_unix.Stdenv.base -> Xdge.t -> Keyeio.Profile.t -> int) Cmdliner.Term.t = 118 - Term.(const note_doi_cmd $ Bushel_common.base_dir $ dry_run_flag) 119 - 120 - let cmd = 121 - let doc = "Generate and add DOI identifiers to permanent notes" in 122 - let man = [ 123 - `S Manpage.s_description; 124 - `P "This command generates roguedoi identifiers using Crockford base32 encoding \ 125 - and adds them to the frontmatter of permanent notes (notes with perma: true) \ 126 - that don't already have a DOI."; 127 - `P "Roguedoi format: 10.59999/xxxxx-xxxxx where x is a Crockford base32 character."; 128 - `S Manpage.s_options; 129 - ] in 130 - let info = Cmd.info "note-doi" ~doc ~man in 131 - Cmd.v info term
-88
stack/bushel/bin/bushel_obsidian.ml
··· 1 - open Bushel 2 - 3 - let obsidian_links = 4 - let inline c = function 5 - | Md.Obsidian_link l -> 6 - Cmarkit_renderer.Context.string c l; 7 - true 8 - | _ -> false 9 - in 10 - Cmarkit_renderer.make ~inline () 11 - ;; 12 - 13 - let obsidian_of_doc doc = 14 - let default = Cmarkit_commonmark.renderer () in 15 - let r = Cmarkit_renderer.compose default obsidian_links in 16 - Cmarkit_renderer.doc_to_string r doc 17 - ;; 18 - 19 - let md_to_obsidian entries md = 20 - let open Cmarkit in 21 - Doc.of_string ~strict:false ~resolver:Md.with_bushel_links md 22 - |> Mapper.map_doc (Mapper.make ~inline:(Md.bushel_inline_mapper_to_obsidian entries) ()) 23 - |> obsidian_of_doc 24 - ;; 25 - 26 - let obsidian_output base output_dir = 27 - let e = load base in 28 - let all = Entry.all_entries e @ Entry.all_papers e in 29 - List.iter 30 - (fun ent -> 31 - let slug = 32 - match ent with 33 - | `Paper { Paper.latest; slug; ver; _ } when not latest -> 34 - Printf.sprintf "%s-%s" slug ver 35 - | _ -> Entry.slug ent 36 - in 37 - let fname = Filename.concat output_dir (slug ^ ".md") in 38 - let tags = 39 - Tags.tags_of_ent e ent 40 - |> List.filter_map (fun tag -> 41 - match tag with 42 - | `Slug _ -> None 43 - | `Set s -> Some (Printf.sprintf "\"#%s\"" s) 44 - | `Text s -> Some (Printf.sprintf "%s" s) 45 - | `Contact _ -> None 46 - | `Year y -> Some (Printf.sprintf "\"#y%d\"" y)) 47 - |> List.map (fun s -> "- " ^ s) 48 - |> String.concat "\n" 49 - in 50 - let links = 51 - Tags.tags_of_ent e ent 52 - |> List.filter_map (fun tag -> 53 - match tag with 54 - | `Slug s when s <> slug -> Some (Printf.sprintf "- \"[[%s]]\"" s) 55 - | `Contact c -> Some (Printf.sprintf "- \"[[@%s]]\"" c) 56 - | _ -> None) 57 - |> String.concat "\n" 58 - |> function 59 - | "" -> "" 60 - | s -> "linklist:\n" ^ s ^ "\n" 61 - in 62 - let body = Entry.body ent |> md_to_obsidian e in 63 - let buf = Printf.sprintf "---\ntags:\n%s\n%s---\n\n%s" tags links body in 64 - Out_channel.with_open_bin fname (fun oc -> output_string oc buf)) 65 - all; 66 - List.iter 67 - (fun contact -> 68 - let slug = Contact.handle contact in 69 - let fname = Filename.concat output_dir ("@" ^ slug ^ ".md") in 70 - let buf = String.concat "\n" (Contact.names contact) in 71 - Out_channel.with_open_bin fname (fun oc -> output_string oc buf)) 72 - (Entry.contacts e) 73 - ;; 74 - 75 - (* Export the term for use in main bushel.ml *) 76 - let term : (Eio_unix.Stdenv.base -> Xdge.t -> Keyeio.Profile.t -> int) Cmdliner.Term.t = 77 - Cmdliner.Term.( 78 - const (fun base_dir output_dir _env _xdg _profile -> obsidian_output base_dir output_dir; 0) $ 79 - Bushel_common.base_dir $ 80 - Bushel_common.output_dir ~default:"obsidian" 81 - ) 82 - 83 - let cmd = 84 - let doc = "Generate Obsidian-compatible markdown files" in 85 - let info = Cmdliner.Cmd.info "obsidian" ~doc in 86 - Cmdliner.Cmd.v info term 87 - 88 - (* Main entry point removed - accessed through bushel_main.ml *)
-74
stack/bushel/bin/bushel_paper.ml
··· 1 - module ZT = Zotero_translation 2 - open Lwt.Infix 3 - open Printf 4 - module J = Ezjsonm 5 - open Cmdliner 6 - 7 - 8 - let _authors b j = 9 - let keys = J.get_dict j in 10 - let authors = J.get_list J.get_string (List.assoc "author" keys) in 11 - let a = 12 - List.fold_left (fun acc a -> 13 - match Bushel.Entry.lookup_by_name b a with 14 - | Some c -> `String ("@" ^ (Bushel.Contact.handle c)) :: acc 15 - | None -> failwith (sprintf "author %s not found" a) 16 - ) [] authors 17 - in 18 - J.update j ["author"] (Some (`A a)) 19 - 20 - let of_doi zt ~base_dir ~slug ~version doi = 21 - ZT.json_of_doi zt ~slug doi >>= fun j -> 22 - let papers_dir = Printf.sprintf "%s/papers/%s" base_dir slug in 23 - (* Ensure papers directory exists *) 24 - (try Unix.mkdir papers_dir 0o755 with Unix.Unix_error (Unix.EEXIST, _, _) -> ()); 25 - 26 - (* Extract abstract from JSON data *) 27 - let abstract = try 28 - let keys = Ezjsonm.get_dict (j :> Ezjsonm.value) in 29 - match List.assoc_opt "abstract" keys with 30 - | Some abstract_json -> Some (Ezjsonm.get_string abstract_json) 31 - | None -> None 32 - with _ -> None in 33 - 34 - (* Remove abstract from frontmatter - it goes in body *) 35 - let keys = Ezjsonm.get_dict (j :> Ezjsonm.value) in 36 - let filtered_keys = List.filter (fun (k, _) -> k <> "abstract") keys in 37 - let json_without_abstract = `O filtered_keys in 38 - 39 - (* Use library function to generate YAML with abstract in body *) 40 - let content = Bushel.Paper.to_yaml ?abstract ~ver:version json_without_abstract in 41 - 42 - let filename = Printf.sprintf "%s.md" version in 43 - let filepath = Filename.concat papers_dir filename in 44 - let oc = open_out filepath in 45 - output_string oc content; 46 - close_out oc; 47 - Printf.printf "Created paper file: %s\n" filepath; 48 - Lwt.return () 49 - 50 - let slug_arg = 51 - let doc = "Slug for the entry." in 52 - Arg.(required & pos 0 (some string) None & info [] ~docv:"SLUG" ~doc) 53 - 54 - let version_arg = 55 - let doc = "Version of the entry." in 56 - Arg.(required & pos 1 (some string) None & info [] ~docv:"VERSION" ~doc) 57 - 58 - let doi_arg = 59 - let doc = "DOI of the entry." in 60 - Arg.(required & pos 2 (some string) None & info [] ~docv:"DOI" ~doc) 61 - 62 - (* Export the term for use in main bushel.ml *) 63 - let term = 64 - Term.(const (fun base slug version doi -> 65 - let zt = ZT.v "http://svr-avsm2-eeg-ce:1969" in 66 - Lwt_main.run @@ of_doi zt ~base_dir:base ~slug ~version doi; 0 67 - ) $ Bushel_common.base_dir $ slug_arg $ version_arg $ doi_arg) 68 - 69 - let cmd = 70 - let doc = "Generate paper entry from DOI" in 71 - let info = Cmd.info "paper" ~doc in 72 - Cmd.v info term 73 - 74 - (* Main entry point removed - accessed through bushel_main.ml *)
-57
stack/bushel/bin/bushel_paper_classify.ml
··· 1 - open Cmdliner 2 - 3 - (** TODO:claude Classify papers based on heuristics and update metadata *) 4 - let classify_papers base_dir overwrite = 5 - let papers_dir = Printf.sprintf "%s/papers" base_dir in 6 - if not (Sys.file_exists papers_dir) then ( 7 - Printf.eprintf "Papers directory not found: %s\n" papers_dir; 8 - 1 9 - ) else ( 10 - let paper_dirs = Sys.readdir papers_dir |> Array.to_list in 11 - List.iter (fun paper_slug -> 12 - let paper_path = Filename.concat papers_dir paper_slug in 13 - if Sys.is_directory paper_path then ( 14 - let versions = Sys.readdir paper_path |> Array.to_list 15 - |> List.filter (String.ends_with ~suffix:".md") in 16 - List.iter (fun version_file -> 17 - let filepath = Filename.concat paper_path version_file in 18 - let version = Filename.remove_extension version_file in 19 - try 20 - let paper = Bushel.Paper.of_md ~slug:paper_slug ~ver:version filepath in 21 - let predicted_class = Bushel.Paper.classification paper in 22 - let class_str = Bushel.Paper.string_of_classification predicted_class in 23 - Printf.printf "%s/%s: %s\n" paper_slug version class_str; 24 - 25 - (* Update the file if overwrite is enabled *) 26 - if overwrite then ( 27 - let json_data = Bushel.Paper.raw_json paper in 28 - let keys = Ezjsonm.get_dict json_data in 29 - let updated_keys = ("classification", `String class_str) :: 30 - (List.filter (fun (k, _) -> k <> "classification") keys) in 31 - let updated_json = `O updated_keys in 32 - let abstract = Some (Bushel.Paper.abstract paper) in 33 - let content = Bushel.Paper.to_yaml ?abstract ~ver:version updated_json in 34 - let oc = open_out filepath in 35 - output_string oc content; 36 - close_out oc; 37 - Printf.printf " Updated %s\n" filepath 38 - ) 39 - with e -> 40 - Printf.eprintf "Error processing %s: %s\n" filepath (Printexc.to_string e) 41 - ) versions 42 - ) 43 - ) paper_dirs; 44 - 0 45 - ) 46 - 47 - let overwrite_flag = 48 - let doc = "Update paper files with classification metadata" in 49 - Arg.(value & flag & info ["overwrite"] ~doc) 50 - 51 - let term = 52 - Term.(const classify_papers $ Bushel_common.base_dir $ overwrite_flag) 53 - 54 - let cmd = 55 - let doc = "Classify papers as full/short/preprint" in 56 - let info = Cmd.info "paper-classify" ~doc in 57 - Cmd.v info term
-325
stack/bushel/bin/bushel_paper_tex.ml
··· 1 - open Printf 2 - open Cmdliner 3 - 4 - (** TODO:claude Format author name for LaTeX with initials and full last name *) 5 - let format_author_name author = 6 - (* Split author name and convert to "F.M.~Lastname" format *) 7 - let parts = String.split_on_char ' ' author |> List.filter (fun s -> s <> "") in 8 - match List.rev parts with 9 - | [] -> "" 10 - | lastname :: rest_rev -> 11 - let firstname_parts = List.rev rest_rev in 12 - let initials = List.map (fun name -> 13 - if String.length name > 0 then String.sub name 0 1 ^ "." else "" 14 - ) firstname_parts in 15 - let initials_str = String.concat "" initials in 16 - if initials_str = "" then lastname 17 - else initials_str ^ "~" ^ lastname 18 - 19 - (** TODO:claude Format author name for LaTeX with underline for target author *) 20 - let format_author target_name author = 21 - let formatted = format_author_name author in 22 - (* Check if author contains target name substring for underlining *) 23 - if String.lowercase_ascii author |> fun s -> 24 - Re.execp (Re.Perl.compile_pat ~opts:[`Caseless] target_name) s 25 - then sprintf "\\underline{%s}" formatted 26 - else formatted 27 - 28 - (** TODO:claude Format authors list for LaTeX *) 29 - let format_authors target_name authors = 30 - match authors with 31 - | [] -> "" 32 - | [single] -> format_author target_name single 33 - | _ -> 34 - let formatted = List.map (format_author target_name) authors in 35 - String.concat ", " formatted 36 - 37 - (** TODO:claude Escape special LaTeX characters *) 38 - let escape_latex str = 39 - let replacements = [ 40 - ("&", "\\&"); 41 - ("%", "\\%"); 42 - ("$", "\\$"); 43 - ("#", "\\#"); 44 - ("_", "\\_"); 45 - ("{", "\\{"); 46 - ("}", "\\}"); 47 - ("~", "\\textasciitilde{}"); 48 - ("^", "\\textasciicircum{}"); 49 - ] in 50 - List.fold_left (fun s (from, to_) -> 51 - Re.replace_string (Re.compile (Re.str from)) ~by:to_ s 52 - ) str replacements 53 - 54 - (** TODO:claude Clean venue name by removing common prefixes and handling arXiv *) 55 - let clean_venue_name venue = 56 - (* Special handling for arXiv to avoid redundancy like "arXiv (arXiv:ID)" *) 57 - let venue_lower = String.lowercase_ascii venue in 58 - if Re.execp (Re.Perl.compile_pat ~opts:[`Caseless] "arxiv") venue_lower then 59 - if String.contains venue ':' then 60 - (* If it contains arXiv:ID format, just return the ID part *) 61 - let parts = String.split_on_char ':' venue in 62 - match parts with 63 - | _ :: id :: _ -> String.trim id 64 - | _ -> venue 65 - else venue 66 - else 67 - let prefixes = [ 68 - "in proceedings of the "; 69 - "proceedings of the "; 70 - "in proceedings of "; 71 - "proceedings of "; 72 - "in the "; 73 - "the "; 74 - ] in 75 - let rec remove_prefixes v = function 76 - | [] -> v 77 - | prefix :: rest -> 78 - if String.length v >= String.length prefix && 79 - String.sub (String.lowercase_ascii v) 0 (String.length prefix) = prefix 80 - then String.sub v (String.length prefix) (String.length v - String.length prefix) 81 - else remove_prefixes v rest 82 - in 83 - let cleaned = remove_prefixes venue prefixes in 84 - (* Capitalize first letter *) 85 - if String.length cleaned > 0 then 86 - String.mapi (fun i c -> if i = 0 then Char.uppercase_ascii c else c) cleaned 87 - else cleaned 88 - 89 - (** TODO:claude Format venue for LaTeX with volume/number details for full papers *) 90 - let format_venue paper = 91 - let open Bushel.Paper in 92 - let classification = classification paper in 93 - match bibtype paper with 94 - | "article" -> 95 - let journal_name = try journal paper |> clean_venue_name |> escape_latex with _ -> "Journal" in 96 - if classification = Full then ( 97 - let vol_info = 98 - let vol = volume paper in 99 - let num = issue paper in 100 - match vol, num with 101 - | Some v, Some n -> sprintf ", %s(%s)" v n 102 - | Some v, None -> sprintf ", vol. %s" v 103 - | None, Some n -> sprintf ", no. %s" n 104 - | None, None -> "" 105 - in 106 - sprintf "\\textit{%s%s}" journal_name vol_info 107 - ) else 108 - sprintf "\\textit{%s}" journal_name 109 - | "inproceedings" -> 110 - let conf_name = try booktitle paper |> clean_venue_name |> escape_latex with _ -> "Conference" in 111 - sprintf "\\textit{%s}" conf_name 112 - | "techreport" -> 113 - let inst = try institution paper |> escape_latex with _ -> "Institution" in 114 - sprintf "\\textit{Technical Report, %s}" inst 115 - | "phdthesis" -> 116 - let school = try institution paper |> escape_latex with _ -> "University" in 117 - sprintf "\\textit{PhD thesis, %s}" school 118 - | "mastersthesis" -> 119 - let school = try institution paper |> escape_latex with _ -> "University" in 120 - sprintf "\\textit{Master's thesis, %s}" school 121 - | "book" -> 122 - let publisher_str = try Bushel.Paper.publisher paper |> escape_latex with _ -> "" in 123 - let edition_str = try 124 - let json = Bushel.Paper.raw_json paper in 125 - let keys = Ezjsonm.get_dict json in 126 - List.assoc "edition" keys |> Ezjsonm.get_string |> escape_latex 127 - with _ -> "" in 128 - let isbn_str = try Bushel.Paper.isbn paper |> escape_latex with _ -> "" in 129 - let venue_info = 130 - let base = match publisher_str, edition_str with 131 - | pub, ed when pub <> "" && ed <> "" -> sprintf "%s, %s edition" pub ed 132 - | pub, _ when pub <> "" -> pub 133 - | _, ed when ed <> "" -> sprintf "%s edition" ed 134 - | _, _ -> "Book" 135 - in 136 - if isbn_str <> "" then 137 - sprintf "%s, ISBN %s" base isbn_str 138 - else 139 - base 140 - in 141 - sprintf "\\textit{%s}" venue_info 142 - | "misc" -> 143 - (* Try to get meaningful venue info for misc entries *) 144 - let journal_str = try Bushel.Paper.journal paper |> clean_venue_name |> escape_latex with _ -> "" in 145 - let booktitle_str = try Bushel.Paper.booktitle paper |> clean_venue_name |> escape_latex with _ -> "" in 146 - let publisher_str = try Bushel.Paper.publisher paper |> escape_latex with _ -> "" in 147 - if journal_str <> "" then 148 - sprintf "\\textit{%s}" journal_str 149 - else if booktitle_str <> "" then 150 - sprintf "\\textit{%s}" booktitle_str 151 - else if publisher_str <> "" then 152 - sprintf "\\textit{%s}" publisher_str 153 - else 154 - sprintf "\\textit{Preprint}" 155 - | "abstract" -> 156 - (* Handle conference abstracts *) 157 - let conf_name = try Bushel.Paper.booktitle paper |> clean_venue_name |> escape_latex with _ -> "" in 158 - let journal_str = try Bushel.Paper.journal paper |> clean_venue_name |> escape_latex with _ -> "" in 159 - if conf_name <> "" then 160 - sprintf "\\textit{%s (Abstract)}" conf_name 161 - else if journal_str <> "" then 162 - sprintf "\\textit{%s (Abstract)}" journal_str 163 - else 164 - sprintf "\\textit{Conference Abstract}" 165 - | _ -> 166 - (* Fallback for other types with special arXiv handling *) 167 - let journal_str = try Bushel.Paper.journal paper with _ -> "" in 168 - let publisher_str = try Bushel.Paper.publisher paper |> escape_latex with _ -> "" in 169 - 170 - (* Special handling for arXiv papers - skip venue, let note handle it *) 171 - if String.lowercase_ascii journal_str = "arxiv" then 172 - "" 173 - else if journal_str <> "" then 174 - sprintf "\\textit{%s}" (journal_str |> clean_venue_name |> escape_latex) 175 - else if publisher_str <> "" then 176 - sprintf "\\textit{%s}" publisher_str 177 - else 178 - sprintf "\\textit{Preprint}" 179 - 180 - (** TODO:claude Generate LaTeX PubItem for a paper *) 181 - let generate_latex_entry target_name paper = 182 - let open Bushel.Paper in 183 - let slug_str = slug paper in 184 - let title_str = title paper |> escape_latex in 185 - let authors_str = format_authors target_name (authors paper) in 186 - let venue_str = format_venue paper in 187 - let year_str = year paper |> string_of_int in 188 - let month_str = 189 - let (_, m, _) = date paper in 190 - sprintf "%02d" m 191 - in 192 - 193 - (* Check if paper is in the future *) 194 - let is_in_press = 195 - let paper_time = datetime paper in 196 - let now = Ptime_clock.now () in 197 - Ptime.compare paper_time now > 0 198 - in 199 - 200 - (* Add DOI or PDF link if available, but not for in-press papers unless they have explicit URL *) 201 - let title_with_link = 202 - if is_in_press then 203 - (* For in-press papers, only add link if there's an explicit URL field *) 204 - match Bushel.Paper.url paper with 205 - | Some u -> sprintf "\\href{%s}{%s}" u title_str 206 - | None -> title_str (* No link for in-press papers without explicit URL *) 207 - else 208 - (* For published papers, use DOI or URL or default PDF link *) 209 - match Bushel.Paper.doi paper with 210 - | Some doi -> sprintf "\\href{https://doi.org/%s}{%s}" doi title_str 211 - | None -> 212 - (* Check if there's a URL, otherwise default to PDF link *) 213 - let url = match Bushel.Paper.url paper with 214 - | Some u -> u 215 - | None -> sprintf "https://anil.recoil.org/papers/%s.pdf" slug_str 216 - in 217 - sprintf "\\href{%s}{%s}" url title_str 218 - in 219 - 220 - (* Add "(in press)" if paper is in the future *) 221 - let in_press_str = if is_in_press then " \\textit{(in press)}" else "" in 222 - 223 - (* Add note if present *) 224 - let note_str = match Bushel.Paper.note paper with 225 - | Some n -> sprintf " \\textit{(%s)}" (escape_latex n) 226 - | None -> "" 227 - in 228 - 229 - sprintf "\\BigGap\n\\PubItemLabeled{%s}\n{``%s,''\n%s,\n%s%s%s,\n\\DatestampYM{%s}{%s}.}\n" 230 - slug_str title_with_link authors_str venue_str in_press_str note_str year_str month_str 231 - 232 - (** TODO:claude Generate LaTeX output files for papers *) 233 - let generate_tex base_dir output_dir target_name = 234 - try 235 - let papers = Bushel.load_papers base_dir in 236 - let latest_papers = List.filter (fun p -> p.Bushel.Paper.latest) papers in 237 - 238 - (* Extract selected papers first *) 239 - let selected_papers = List.filter Bushel.Paper.selected latest_papers in 240 - 241 - (* Group remaining papers by classification, excluding selected ones *) 242 - let non_selected_papers = List.filter (fun p -> not (Bushel.Paper.selected p)) latest_papers in 243 - let full_papers = List.filter (fun p -> 244 - Bushel.Paper.classification p = Bushel.Paper.Full) non_selected_papers in 245 - let short_papers = List.filter (fun p -> 246 - Bushel.Paper.classification p = Bushel.Paper.Short) non_selected_papers in 247 - let preprint_papers = List.filter (fun p -> 248 - Bushel.Paper.classification p = Bushel.Paper.Preprint) non_selected_papers in 249 - 250 - (* Sort each group by date, newest first *) 251 - let sorted_full = List.sort Bushel.Paper.compare full_papers in 252 - let sorted_short = List.sort Bushel.Paper.compare short_papers in 253 - let sorted_preprint = List.sort Bushel.Paper.compare preprint_papers in 254 - let sorted_selected = List.sort Bushel.Paper.compare selected_papers in 255 - 256 - (* Ensure output directory exists *) 257 - (try Unix.mkdir output_dir 0o755 with Unix.Unix_error (Unix.EEXIST, _, _) -> ()); 258 - 259 - (* Write papers_full.tex *) 260 - let oc_full = open_out (Filename.concat output_dir "papers_full.tex") in 261 - List.iter (fun paper -> 262 - let latex = generate_latex_entry target_name paper in 263 - output_string oc_full latex; 264 - output_char oc_full '\n' 265 - ) sorted_full; 266 - close_out oc_full; 267 - Printf.printf "Generated %s/papers_full.tex with %d entries\n" output_dir (List.length sorted_full); 268 - 269 - (* Write papers_short.tex *) 270 - let oc_short = open_out (Filename.concat output_dir "papers_short.tex") in 271 - List.iter (fun paper -> 272 - let latex = generate_latex_entry target_name paper in 273 - output_string oc_short latex; 274 - output_char oc_short '\n' 275 - ) sorted_short; 276 - close_out oc_short; 277 - Printf.printf "Generated %s/papers_short.tex with %d entries\n" output_dir (List.length sorted_short); 278 - 279 - (* Write papers_preprint.tex *) 280 - let oc_preprint = open_out (Filename.concat output_dir "papers_preprint.tex") in 281 - List.iter (fun paper -> 282 - let latex = generate_latex_entry target_name paper in 283 - output_string oc_preprint latex; 284 - output_char oc_preprint '\n' 285 - ) sorted_preprint; 286 - close_out oc_preprint; 287 - Printf.printf "Generated %s/papers_preprint.tex with %d entries\n" output_dir (List.length sorted_preprint); 288 - 289 - (* Write papers_selected.tex *) 290 - let oc_selected = open_out (Filename.concat output_dir "papers_selected.tex") in 291 - List.iter (fun paper -> 292 - let latex = generate_latex_entry target_name paper in 293 - output_string oc_selected latex; 294 - output_char oc_selected '\n' 295 - ) sorted_selected; 296 - close_out oc_selected; 297 - Printf.printf "Generated %s/papers_selected.tex with %d entries\n" output_dir (List.length sorted_selected); 298 - 299 - (* Write paper_count.tex *) 300 - let total_count = List.length latest_papers in 301 - let oc_count = open_out (Filename.concat output_dir "paper_count.tex") in 302 - output_string oc_count (sprintf "\\setcounter{pubcounter}{%d}\n" total_count); 303 - close_out oc_count; 304 - Printf.printf "Generated %s/paper_count.tex with total count: %d\n" output_dir total_count; 305 - 306 - 0 307 - with e -> 308 - Printf.eprintf "Error loading papers: %s\n" (Printexc.to_string e); 309 - 1 310 - 311 - let output_dir_arg = 312 - let doc = "Output directory for generated LaTeX files" in 313 - Arg.(value & opt string "." & info ["output"; "o"] ~docv:"DIR" ~doc) 314 - 315 - let target_name_arg = 316 - let doc = "Name to underline in author list (e.g., 'Madhavapeddy')" in 317 - Arg.(value & opt string "Madhavapeddy" & info ["target"; "t"] ~docv:"NAME" ~doc) 318 - 319 - let term = 320 - Term.(const generate_tex $ Bushel_common.base_dir $ output_dir_arg $ target_name_arg) 321 - 322 - let cmd = 323 - let doc = "Generate LaTeX publication entries" in 324 - let info = Cmd.info "paper-tex" ~doc in 325 - Cmd.v info term
-48
stack/bushel/bin/bushel_search.ml
··· 1 - open Cmdliner 2 - 3 - (** Bushel search command for integration with main CLI *) 4 - 5 - let limit = 6 - let doc = "Maximum number of results to return" in 7 - Arg.(value & opt int 50 & info ["limit"; "l"] ~doc) 8 - 9 - let offset = 10 - let doc = "Number of results to skip (for pagination)" in 11 - Arg.(value & opt int 0 & info ["offset"; "o"] ~doc) 12 - 13 - let query_text = 14 - let doc = "Search query text" in 15 - Arg.(required & pos 0 (some string) None & info [] ~docv:"QUERY" ~doc) 16 - 17 - (** Search function using multisearch *) 18 - let search query_text limit offset env _xdg _profile = 19 - let config = Bushel.Typesense.load_config_from_files () in 20 - 21 - if config.api_key = "" then ( 22 - Printf.eprintf "Error: API key is required. Use TYPESENSE_API_KEY environment variable or create .typesense-key file.\n"; 23 - 1 24 - ) else ( 25 - Printf.printf "Searching Typesense at %s\n" config.endpoint; 26 - Printf.printf "Query: \"%s\"\n" query_text; 27 - Printf.printf "Limit: %d, Offset: %d\n\n" limit offset; 28 - 29 - Eio.Switch.run (fun sw -> 30 - let result = Bushel.Typesense.multisearch ~sw ~env config query_text ~limit:50 () in 31 - match result with 32 - | Ok multisearch_resp -> 33 - let combined_response = Bushel.Typesense.combine_multisearch_results multisearch_resp ~limit ~offset () in 34 - Printf.printf "Found %d results (%.2fms)\n\n" combined_response.total combined_response.query_time; 35 - 36 - List.iteri (fun i (hit : Bushel.Typesense.search_result) -> 37 - Printf.printf "%d. %s (score: %.2f)\n" (i + 1) (Bushel.Typesense.pp_search_result_oneline hit) hit.Bushel.Typesense.score 38 - ) combined_response.hits 39 - | Error err -> 40 - Format.eprintf "Search error: %a\n" Bushel.Typesense.pp_error err; 41 - exit 1 42 - ); 43 - 0 44 - ) 45 - 46 - (** Command line term for integration with eiocmd *) 47 - let term : (Eio_unix.Stdenv.base -> Xdge.t -> Keyeio.Profile.t -> int) Cmdliner.Term.t = 48 - Term.(const search $ query_text $ limit $ offset)
-70
stack/bushel/bin/bushel_thumbs.ml
··· 1 - open Printf 2 - open Cmdliner 3 - 4 - (** TODO:claude 5 - Helper module for ImageMagick operations *) 6 - module Imagemagick = struct 7 - (* Generate thumbnail from PDF *) 8 - let generate_thumbnail ~pdf_path ~size ~output_path = 9 - let cmd = 10 - sprintf "magick -density 600 -quality 100 %s[0] -gravity North -crop 100%%x50%%+0+0 -resize %s %s" 11 - pdf_path size output_path 12 - in 13 - eprintf "Running: %s\n%!" cmd; 14 - Sys.command cmd 15 - end 16 - 17 - (** TODO:claude 18 - Process a single paper to generate its thumbnail *) 19 - let process_paper base_dir output_dir paper = 20 - let slug = Bushel.Paper.slug paper in 21 - let pdf_path = sprintf "%s/static/papers/%s.pdf" base_dir slug in 22 - let thumbnail_path = sprintf "%s/%s.png" output_dir slug in 23 - 24 - (* Skip if thumbnail already exists *) 25 - if Sys.file_exists thumbnail_path then ( 26 - printf "Thumbnail already exists for %s, skipping\n%!" slug 27 - ) else if Sys.file_exists pdf_path then ( 28 - try 29 - let size = sprintf "2048x" in 30 - printf "Generating high-res thumbnail for %s (size: %s)\n%!" slug size; 31 - match Imagemagick.generate_thumbnail ~pdf_path ~size ~output_path:thumbnail_path with 32 - | 0 -> printf "Successfully generated thumbnail for %s\n%!" slug 33 - | n -> eprintf "Error generating thumbnail for %s (exit code: %d)\n%!" slug n 34 - with 35 - | e -> eprintf "Error processing paper %s: %s\n%!" slug (Printexc.to_string e) 36 - ) else ( 37 - eprintf "PDF file not found for paper: %s\n%!" slug 38 - ) 39 - 40 - (** TODO:claude 41 - Main function to process all papers in a directory *) 42 - let process_papers base_dir output_dir = 43 - (* Create output directory if it doesn't exist *) 44 - if not (Sys.file_exists output_dir) then ( 45 - printf "Creating output directory: %s\n%!" output_dir; 46 - Unix.mkdir output_dir 0o755 47 - ); 48 - 49 - (* Load Bushel entries and get papers *) 50 - printf "Loading papers from %s\n%!" base_dir; 51 - let e = Bushel.load base_dir in 52 - let papers = Bushel.Entry.papers e in 53 - 54 - (* Process each paper *) 55 - printf "Found %d papers\n%!" (List.length papers); 56 - List.iter (process_paper base_dir output_dir) papers 57 - 58 - (* Command line arguments are now imported from Bushel_common *) 59 - 60 - (* Export the term for use in main bushel.ml *) 61 - let term : (Eio_unix.Stdenv.base -> Xdge.t -> Keyeio.Profile.t -> int) Cmdliner.Term.t = 62 - Term.(const (fun base_dir output_dir _env _xdg _profile -> process_papers base_dir output_dir; 0) $ 63 - Bushel_common.base_dir $ Bushel_common.output_dir ~default:".") 64 - 65 - let cmd = 66 - let doc = "Generate thumbnails for paper PDFs" in 67 - let info = Cmd.info "thumbs" ~doc in 68 - Cmd.v info term 69 - 70 - (* Main entry point removed - accessed through bushel_main.ml *)
-188
stack/bushel/bin/bushel_typesense.ml
··· 1 - open Cmdliner 2 - 3 - (** Bushel Typesense binary with upload and query functionality *) 4 - 5 - let data_dir = 6 - let doc = "Directory containing bushel data files" in 7 - Arg.(value & opt string "." & info ["data-dir"; "d"] ~doc) 8 - 9 - (** Main upload function *) 10 - let upload data_dir openai_key env _xdg _profile = 11 - let config = Bushel.Typesense.load_config_from_files () in 12 - 13 - let config = { config with 14 - openai_key = if openai_key = "" then config.openai_key else openai_key 15 - } in 16 - 17 - if config.api_key = "" then ( 18 - Printf.eprintf "Error: API key is required. Use TYPESENSE_API_KEY environment variable or create .typesense-key file.\n"; 19 - 1 20 - ) else if config.openai_key = "" then ( 21 - Printf.eprintf "Error: OpenAI API key is required for embeddings. Use OPENAI_API_KEY environment variable or create .openrouter-api file.\n"; 22 - 1 23 - ) else ( 24 - Printf.printf "Loading bushel data from %s\n%!" data_dir; 25 - let entries = Bushel.load data_dir in 26 - 27 - Printf.printf "Uploading bushel data to Typesense at %s\n%!" config.endpoint; 28 - 29 - Eio.Switch.run (fun sw -> 30 - Bushel.Typesense.upload_all ~sw ~env config entries 31 - ); 32 - 0 33 - ) 34 - 35 - 36 - (** Query function *) 37 - let query query_text collection limit offset env _xdg _profile = 38 - let config = Bushel.Typesense.load_config_from_files () in 39 - 40 - if config.api_key = "" then ( 41 - Printf.eprintf "Error: API key is required. Use TYPESENSE_API_KEY environment variable or create .typesense-key file.\n"; 42 - 1 43 - ) else ( 44 - Printf.printf "Searching Typesense at %s\n%!" config.endpoint; 45 - Printf.printf "Query: \"%s\"\n%!" query_text; 46 - if collection <> "" then Printf.printf "Collection: %s\n%!" collection; 47 - Printf.printf "Limit: %d, Offset: %d\n\n%!" limit offset; 48 - 49 - Eio.Switch.run (fun sw -> 50 - let search_fn = if collection = "" then 51 - Bushel.Typesense.search_all ~sw ~env config query_text ~limit ~offset 52 - else 53 - Bushel.Typesense.search_collection ~sw ~env config collection query_text ~limit ~offset 54 - in 55 - let result = search_fn () in 56 - match result with 57 - | Ok response -> 58 - Printf.printf "Found %d results (%.2fms)\n\n%!" response.total response.query_time; 59 - List.iteri (fun i (hit : Bushel.Typesense.search_result) -> 60 - Printf.printf "%d. [%s] %s (score: %.2f)\n%!" (i + 1) hit.collection hit.title hit.score; 61 - if hit.content <> "" then Printf.printf " %s\n%!" hit.content; 62 - if hit.highlights <> [] then ( 63 - Printf.printf " Highlights:\n%!"; 64 - List.iter (fun (field, snippets) -> 65 - List.iter (fun snippet -> 66 - Printf.printf " %s: %s\n%!" field snippet 67 - ) snippets 68 - ) hit.highlights 69 - ); 70 - Printf.printf "\n%!" 71 - ) response.hits 72 - | Error err -> 73 - Format.eprintf "Search error: %a\n%!" Bushel.Typesense.pp_error err; 74 - exit 1 75 - ); 76 - 0 77 - ) 78 - 79 - (** List collections function *) 80 - let list env _xdg _profile = 81 - let config = Bushel.Typesense.load_config_from_files () in 82 - 83 - if config.api_key = "" then ( 84 - Printf.eprintf "Error: API key is required. Use TYPESENSE_API_KEY environment variable or create .typesense-key file.\n"; 85 - 1 86 - ) else ( 87 - Printf.printf "Listing collections at %s\n\n%!" config.endpoint; 88 - 89 - Eio.Switch.run (fun sw -> 90 - let result = Bushel.Typesense.list_collections ~sw ~env config in 91 - match result with 92 - | Ok collections -> 93 - Printf.printf "Collections:\n%!"; 94 - List.iter (fun (name, count) -> 95 - Printf.printf " %s (%d documents)\n%!" name count 96 - ) collections 97 - | Error err -> 98 - Format.eprintf "List error: %a\n%!" Bushel.Typesense.pp_error err; 99 - exit 1 100 - ); 101 - 0 102 - ) 103 - 104 - (** Command line arguments *) 105 - let openai_key = 106 - let doc = "OpenAI API key for embeddings" in 107 - Arg.(value & opt string "" & info ["openai-key"; "oa"] ~doc) 108 - 109 - let query_text = 110 - let doc = "Search query text" in 111 - Arg.(required & pos 0 (some string) None & info [] ~docv:"QUERY" ~doc) 112 - 113 - let collection = 114 - let doc = "Specific collection to search (contacts, papers, projects, notes, videos, ideas)" in 115 - Arg.(value & opt string "" & info ["collection"; "c"] ~doc) 116 - 117 - let limit = 118 - let doc = "Maximum number of results to return" in 119 - Arg.(value & opt int 10 & info ["limit"; "l"] ~doc) 120 - 121 - let offset = 122 - let doc = "Number of results to skip (for pagination)" in 123 - Arg.(value & opt int 0 & info ["offset"; "o"] ~doc) 124 - 125 - (** Query command *) 126 - let query_cmd = 127 - let doc = "Search bushel collections in Typesense" in 128 - let man = [ 129 - `S Manpage.s_description; 130 - `P "Search across all or specific bushel collections in Typesense."; 131 - `P "The API key can be read from .typesense-key file or TYPESENSE_API_KEY environment variable."; 132 - `S Manpage.s_examples; 133 - `P "Search all collections:"; 134 - `Pre " bushel-typesense query \"machine learning\""; 135 - `P "Search specific collection:"; 136 - `Pre " bushel-typesense query \"OCaml\" --collection papers"; 137 - `P "Search with pagination:"; 138 - `Pre " bushel-typesense query \"AI\" --limit 5 --offset 10"; 139 - ] in 140 - Eiocmd.run 141 - ~info:(Cmd.info "query" ~doc ~man) 142 - ~app_name:"bushel-typesense" 143 - ~service:"typesense" 144 - Term.(const query $ query_text $ collection $ limit $ offset) 145 - 146 - (** List command *) 147 - let list_cmd = 148 - let doc = "List all collections in Typesense" in 149 - let man = [ 150 - `S Manpage.s_description; 151 - `P "List all available collections and their document counts."; 152 - ] in 153 - Eiocmd.run 154 - ~info:(Cmd.info "list" ~doc ~man) 155 - ~app_name:"bushel-typesense" 156 - ~service:"typesense" 157 - Term.(const list) 158 - 159 - (** Upload command *) 160 - let upload_cmd = 161 - let doc = "Upload bushel collections to Typesense search engine" in 162 - let man = [ 163 - `S Manpage.s_description; 164 - `P "Upload all bushel object types (contacts, papers, projects, notes, videos, ideas) to a Typesense search engine instance."; 165 - `P "The API keys can be read from files or environment variables."; 166 - `S Manpage.s_examples; 167 - `P "Upload to Typesense instance:"; 168 - `Pre " bushel-typesense upload --data-dir /path/to/data"; 169 - ] in 170 - Eiocmd.run 171 - ~info:(Cmd.info "upload" ~doc ~man) 172 - ~app_name:"bushel-typesense" 173 - ~service:"typesense" 174 - Term.(const upload $ data_dir $ openai_key) 175 - 176 - (** Main command group *) 177 - let main_cmd = 178 - let doc = "Bushel Typesense client" in 179 - let man = [ 180 - `S Manpage.s_description; 181 - `P "Client for uploading to and querying Bushel collections in Typesense search engine."; 182 - `S Manpage.s_commands; 183 - `S Manpage.s_common_options; 184 - ] in 185 - let info = Cmd.info "bushel-typesense" ~doc ~man in 186 - Cmd.group info [upload_cmd; query_cmd; list_cmd] 187 - 188 - let () = exit (Cmd.eval' main_cmd)
-138
stack/bushel/bin/bushel_video.ml
··· 1 - [@@@warning "-26-27-32"] 2 - 3 - open Lwt.Infix 4 - open Cmdliner 5 - 6 - let setup_log style_renderer level = 7 - Fmt_tty.setup_std_outputs ?style_renderer (); 8 - Logs.set_level level; 9 - Logs.set_reporter (Logs_fmt.reporter ()); 10 - () 11 - 12 - let process_videos output_dir overwrite base_url channel fetch_thumbs thumbs_dir = 13 - Peertube.fetch_all_channel_videos base_url channel >>= fun all_videos -> 14 - Logs.info (fun f -> f "Total videos: %d" (List.length all_videos)); 15 - 16 - (* Create thumbnails directory if needed *) 17 - (if fetch_thumbs && not (Sys.file_exists thumbs_dir) then 18 - Unix.mkdir thumbs_dir 0o755); 19 - 20 - (* Process each video, fetching full details for complete descriptions *) 21 - Lwt_list.map_s (fun video -> 22 - (* Fetch complete video details to get full description *) 23 - Peertube.fetch_video_details base_url video.Peertube.uuid >>= fun full_video -> 24 - let (description, published_date, title, url, uuid, slug) = 25 - Peertube.to_bushel_video full_video 26 - in 27 - Logs.info (fun f -> f "Title: %s, URL: %s" title url); 28 - 29 - (* Download thumbnail if requested *) 30 - (if fetch_thumbs then 31 - let thumb_path = Filename.concat thumbs_dir (uuid ^ ".jpg") in 32 - Peertube.download_thumbnail base_url full_video thumb_path >>= fun result -> 33 - match result with 34 - | Ok () -> 35 - Logs.info (fun f -> f "Downloaded thumbnail for %s to %s" title thumb_path); 36 - Lwt.return_unit 37 - | Error (`Msg e) -> 38 - Logs.warn (fun f -> f "Failed to download thumbnail for %s: %s" title e); 39 - Lwt.return_unit 40 - else 41 - Lwt.return_unit) >>= fun () -> 42 - 43 - Lwt.return {Bushel.Video.description; published_date; title; url; uuid; slug; 44 - talk=false; paper=None; project=None; tags=full_video.tags} 45 - ) all_videos >>= fun vids -> 46 - 47 - (* Write video files *) 48 - Lwt_list.iter_s (fun video -> 49 - let file_path = Filename.concat output_dir (video.Bushel.Video.uuid ^ ".md") in 50 - let file_exists = Sys.file_exists file_path in 51 - 52 - if file_exists then 53 - try 54 - (* If file exists, load it to preserve specific fields *) 55 - let existing_video = Bushel.Video.of_md file_path in 56 - (* Create merged video with preserved fields *) 57 - let merged_video = { 58 - video with 59 - tags = existing_video.tags; (* Preserve existing tags *) 60 - paper = existing_video.paper; (* Preserve paper field *) 61 - project = existing_video.project; (* Preserve project field *) 62 - talk = existing_video.talk; (* Preserve talk field *) 63 - } in 64 - 65 - (* Write the merged video data *) 66 - if overwrite then 67 - match Bushel.Video.to_file output_dir merged_video with 68 - | Ok () -> 69 - Logs.info (fun f -> f "Updated video %s with preserved fields in %s" 70 - merged_video.Bushel.Video.title file_path); 71 - Lwt.return_unit 72 - | Error (`Msg e) -> 73 - Logs.err (fun f -> f "Failed to update video %s: %s" 74 - merged_video.Bushel.Video.title e); 75 - Lwt.return_unit 76 - else begin 77 - Logs.info (fun f -> f "Skipping existing video %s (use --overwrite to replace)" 78 - video.Bushel.Video.title); 79 - Lwt.return_unit 80 - end 81 - with _ -> 82 - (* If reading existing file fails, proceed with new data *) 83 - if overwrite then 84 - match Bushel.Video.to_file output_dir video with 85 - | Ok () -> 86 - Logs.info (fun f -> f "Wrote video %s to %s (existing file could not be read)" 87 - video.Bushel.Video.title file_path); 88 - Lwt.return_unit 89 - | Error (`Msg e) -> 90 - Logs.err (fun f -> f "Failed to write video %s: %s" 91 - video.Bushel.Video.title e); 92 - Lwt.return_unit 93 - else begin 94 - Logs.info (fun f -> f "Skipping existing video %s (use --overwrite to replace)" 95 - video.Bushel.Video.title); 96 - Lwt.return_unit 97 - end 98 - else 99 - (* If file doesn't exist, just write new data *) 100 - match Bushel.Video.to_file output_dir video with 101 - | Ok () -> 102 - Logs.info (fun f -> f "Wrote new video %s to %s" 103 - video.Bushel.Video.title file_path); 104 - Lwt.return_unit 105 - | Error (`Msg e) -> 106 - Logs.err (fun f -> f "Failed to write video %s: %s" 107 - video.Bushel.Video.title e); 108 - Lwt.return_unit 109 - ) vids 110 - 111 - (* Command line arguments are now imported from Bushel_common *) 112 - 113 - (* Export the term for use in main bushel.ml *) 114 - let term = 115 - let fetch_thumbs = 116 - let doc = "Download video thumbnails" in 117 - Arg.(value & flag & info ["fetch-thumbs"] ~doc) 118 - in 119 - let thumbs_dir = 120 - let doc = "Directory to save thumbnails (default: images/videos)" in 121 - Arg.(value & opt string "images/videos" & info ["thumbs-dir"] ~docv:"DIR" ~doc) 122 - in 123 - Term.(const (fun output_dir overwrite base_url channel fetch_thumbs thumbs_dir () -> 124 - Lwt_main.run (process_videos output_dir overwrite base_url channel fetch_thumbs thumbs_dir); 0) 125 - $ Bushel_common.output_dir ~default:"." $ 126 - Bushel_common.overwrite $ 127 - Bushel_common.url_term ~default:"https://crank.recoil.org" ~doc:"PeerTube base URL" $ 128 - Bushel_common.channel ~default:"anil" $ 129 - fetch_thumbs $ 130 - thumbs_dir $ 131 - Bushel_common.setup_term) 132 - 133 - let cmd = 134 - let doc = "Fetch and process videos from PeerTube" in 135 - let info = Cmd.info "video" ~doc in 136 - Cmd.v info term 137 - 138 - (* Main entry point removed - accessed through bushel_main.ml *)
-81
stack/bushel/bin/bushel_video_thumbs.ml
··· 1 - [@@@warning "-26-27-32"] 2 - 3 - open Lwt.Infix 4 - open Cmdliner 5 - 6 - let setup_log style_renderer level = 7 - Fmt_tty.setup_std_outputs ?style_renderer (); 8 - Logs.set_level level; 9 - Logs.set_reporter (Logs_fmt.reporter ()); 10 - () 11 - 12 - let process_video_thumbs videos_dir thumbs_dir base_url = 13 - (* Ensure thumbnail directory exists *) 14 - (if not (Sys.file_exists thumbs_dir) then 15 - Unix.mkdir thumbs_dir 0o755); 16 - 17 - (* Read all video markdown files *) 18 - let video_files = Sys.readdir videos_dir 19 - |> Array.to_list 20 - |> List.filter (fun f -> Filename.check_suffix f ".md") 21 - |> List.map (fun f -> Filename.concat videos_dir f) 22 - in 23 - 24 - Logs.info (fun f -> f "Found %d video files to process" (List.length video_files)); 25 - 26 - (* Process each video file *) 27 - Lwt_list.iter_s (fun video_file -> 28 - try 29 - (* Load existing video *) 30 - let video = Bushel.Video.of_md video_file in 31 - let uuid = video.Bushel.Video.uuid in 32 - 33 - Logs.info (fun f -> f "Processing video: %s (UUID: %s)" video.title uuid); 34 - 35 - (* Fetch video details from PeerTube to get thumbnail info *) 36 - Peertube.fetch_video_details base_url uuid >>= fun peertube_video -> 37 - 38 - (* Download thumbnail *) 39 - let thumb_path = Filename.concat thumbs_dir (uuid ^ ".jpg") in 40 - Peertube.download_thumbnail base_url peertube_video thumb_path >>= fun result -> 41 - 42 - match result with 43 - | Ok () -> 44 - Logs.info (fun f -> f "Downloaded thumbnail for %s to %s" video.title thumb_path); 45 - 46 - (* Update video file with thumbnail_url field *) 47 - (match Peertube.thumbnail_url base_url peertube_video with 48 - | Some url -> 49 - Logs.info (fun f -> f "Thumbnail URL: %s" url); 50 - Lwt.return_unit 51 - | None -> 52 - Logs.warn (fun f -> f "No thumbnail URL for video %s" video.title); 53 - Lwt.return_unit) 54 - | Error (`Msg e) -> 55 - Logs.err (fun f -> f "Failed to download thumbnail for %s: %s" video.title e); 56 - Lwt.return_unit 57 - with exn -> 58 - Logs.err (fun f -> f "Error processing %s: %s" video_file (Printexc.to_string exn)); 59 - Lwt.return_unit 60 - ) video_files 61 - 62 - let term = 63 - let videos_dir = 64 - let doc = "Directory containing video markdown files" in 65 - Arg.(value & opt string "data/videos" & info ["videos-dir"; "d"] ~docv:"DIR" ~doc) 66 - in 67 - let thumbs_dir = 68 - let doc = "Directory to save thumbnails" in 69 - Arg.(value & opt string "images/videos" & info ["thumbs-dir"; "t"] ~docv:"DIR" ~doc) 70 - in 71 - Term.(const (fun videos_dir thumbs_dir base_url () -> 72 - Lwt_main.run (process_video_thumbs videos_dir thumbs_dir base_url); 0) 73 - $ videos_dir $ 74 - thumbs_dir $ 75 - Bushel_common.url_term ~default:"https://crank.recoil.org" ~doc:"PeerTube base URL" $ 76 - Bushel_common.setup_term) 77 - 78 - let cmd = 79 - let doc = "Download thumbnails for existing videos and update metadata" in 80 - let info = Cmd.info "video-thumbs" ~doc in 81 - Cmd.v info term
-20
stack/bushel/bin/dune
··· 1 - (library 2 - (name bushel_common) 3 - (modules bushel_common) 4 - (libraries cmdliner)) 5 - 6 - (executable 7 - (name bushel_main) 8 - (public_name bushel) 9 - (package bushel) 10 - (modules bushel_main bushel_bibtex bushel_ideas bushel_info bushel_missing bushel_note_doi bushel_obsidian bushel_paper_classify bushel_paper_tex bushel_thumbs bushel_search) 11 - (flags (:standard -w -69)) 12 - (libraries bushel bushel_common cmdliner eio eio_main eiocmd yaml ezjsonm zotero-translation fmt cmarkit uri unix ptime.clock.os crockford)) 13 - 14 - (executable 15 - (name bushel_typesense) 16 - (public_name bushel-typesense) 17 - (package bushel) 18 - (modules bushel_typesense) 19 - (flags (:standard -w -69)) 20 - (libraries bushel bushel_common cmdliner eio eio_main eiocmd))
-51
stack/bushel/bushel.opam
··· 1 - # This file is generated by dune, edit dune-project instead 2 - opam-version: "2.0" 3 - synopsis: "A webring but not as oldskool" 4 - description: "This is all still a work in progress" 5 - maintainer: ["anil@recoil.org"] 6 - authors: ["Anil Madhavapeddy"] 7 - license: "ISC" 8 - homepage: "https://github.com/avsm/bushel" 9 - bug-reports: "https://github.com/avsm/bushel/issues" 10 - depends: [ 11 - "dune" {>= "3.17"} 12 - "ocaml" {>= "5.2.0"} 13 - "uri" 14 - "cmarkit" 15 - "ezjsonm" 16 - "ptime" 17 - "jsont" 18 - "bytesrw" 19 - "jekyll-format" 20 - "yaml" 21 - "eio" 22 - "eio_main" 23 - "requests" 24 - "fmt" 25 - "peertubee" 26 - "karakeep" 27 - "typesense-client" 28 - "cmdliner" 29 - "eiocmd" 30 - "xdge" 31 - "keyeio" 32 - "odoc" {with-doc} 33 - ] 34 - build: [ 35 - ["dune" "subst"] {dev} 36 - [ 37 - "dune" 38 - "build" 39 - "-p" 40 - name 41 - "-j" 42 - jobs 43 - "@install" 44 - "@runtest" {with-test} 45 - "@doc" {with-doc} 46 - ] 47 - ] 48 - dev-repo: "git+https://github.com/avsm/bushel.git" 49 - pin-depends: [ 50 - [ "zotero-translation.dev" "git+https://github.com/avsm/zotero-translation.git" ] 51 - ]
-3
stack/bushel/bushel.opam.template
··· 1 - pin-depends: [ 2 - [ "zotero-translation.dev" "git+https://github.com/avsm/zotero-translation.git" ] 3 - ]
-35
stack/bushel/dune-project
··· 1 - (lang dune 3.17) 2 - (name bushel) 3 - 4 - (source (github avsm/bushel)) 5 - (license ISC) 6 - (authors "Anil Madhavapeddy") 7 - (maintainers "anil@recoil.org") 8 - 9 - (generate_opam_files true) 10 - 11 - (package 12 - (name bushel) 13 - (synopsis "A webring but not as oldskool") 14 - (description "This is all still a work in progress") 15 - (depends 16 - (ocaml (>= "5.2.0")) 17 - uri 18 - cmarkit 19 - ezjsonm 20 - ptime 21 - jsont 22 - bytesrw 23 - jekyll-format 24 - yaml 25 - eio 26 - eio_main 27 - requests 28 - fmt 29 - peertubee 30 - karakeep 31 - typesense-client 32 - cmdliner 33 - eiocmd 34 - xdge 35 - keyeio))
-79
stack/bushel/lib/bushel.ml
··· 1 - module Contact = Contact 2 - module Idea = Idea 3 - module Note = Note 4 - module Paper = Paper 5 - module Project = Project 6 - module Video = Video 7 - module Tags = Tags 8 - module Link = Link 9 - module Entry = Entry 10 - module Util = Util 11 - module Srcsetter = Srcsetter 12 - module Md = Md 13 - module Typesense = Typesense 14 - module Link_graph = Link_graph 15 - module Description = Description 16 - module Doi_entry = Doi_entry 17 - 18 - let map_md base subdir fn = 19 - let dir = base ^ "/data/" ^ subdir in 20 - Sys.readdir dir 21 - |> Array.to_list 22 - |> List.filter (fun f -> Filename.check_suffix f ".md") 23 - |> List.map (fun e -> fn dir e) 24 - ;; 25 - 26 - let map_category base c fn = map_md base c (fun dir e -> fn @@ Filename.concat dir e) 27 - let dbg l = Printf.eprintf "loading %s\n%!" l 28 - 29 - let load_contacts base = dbg "contacts"; map_category base "contacts" Contact.of_md 30 - let load_projects base = dbg "projects"; map_category base "projects" Project.of_md 31 - let load_notes base = 32 - dbg "notes"; 33 - let notes_from_notes = map_category base "notes" Note.of_md in 34 - let notes_from_news = map_category base "news" Note.of_md in 35 - notes_from_notes @ notes_from_news 36 - let load_ideas base = dbg "ideas"; map_category base "ideas" Idea.of_md 37 - let load_videos base = dbg "videos"; map_category base "videos" Video.of_md 38 - 39 - let load_images base = 40 - Printf.eprintf "load images %s/images\n%!" base; 41 - try 42 - Srcsetter.list_of_json (Util.read_file (base ^ "/images/index.json")) |> Result.get_ok 43 - with 44 - | _ -> [] (* FIXME log *) 45 - ;; 46 - 47 - let load_papers base = 48 - Printf.eprintf "load papers %s/data/papers\n%!" base; 49 - Sys.readdir (base ^ "/data/papers") 50 - |> Array.to_list 51 - |> List.filter (fun slug -> Sys.is_directory (base ^ "/data/papers/" ^ slug)) 52 - |> List.map (fun slug -> 53 - Sys.readdir (base ^ "/data/papers/" ^ slug) 54 - |> Array.to_list 55 - |> List.filter (fun ver -> Filename.check_suffix ver ".md") 56 - |> List.map (fun ver -> 57 - let ver = Filename.chop_extension ver in 58 - Paper.of_md ~slug ~ver (base ^ "/data/papers/" ^ slug ^ "/" ^ ver ^ ".md"))) 59 - |> List.flatten 60 - |> Paper.tv 61 - ;; 62 - 63 - let load base = 64 - let images = load_images base in 65 - let papers = load_papers base in 66 - let contacts = load_contacts base in 67 - let projects = load_projects base in 68 - let notes = load_notes base in 69 - let ideas = load_ideas base in 70 - let videos = load_videos base in 71 - let entries = Entry.v ~images ~papers ~notes ~projects ~ideas ~videos ~contacts ~data_dir:(base ^ "/data") in 72 - (* Build link graph *) 73 - Printf.eprintf "Building link_graph...\n%!"; 74 - let graph = Link_graph.build_link_graph entries in 75 - Fmt.epr "%a@." Link_graph.pp_graph graph; 76 - Link_graph.set_graph graph; 77 - entries 78 - ;; 79 -
-27
stack/bushel/lib/bushel.mli
··· 1 - (** Bushel *) 2 - 3 - module Contact = Contact 4 - module Idea = Idea 5 - module Note = Note 6 - module Paper = Paper 7 - module Project = Project 8 - module Video = Video 9 - module Tags = Tags 10 - module Link = Link 11 - module Entry = Entry 12 - module Util = Util 13 - module Md = Md 14 - module Srcsetter = Srcsetter 15 - module Typesense = Typesense 16 - module Link_graph = Link_graph 17 - module Description = Description 18 - module Doi_entry = Doi_entry 19 - 20 - val load_contacts : string -> Contact.ts 21 - val load_projects : string -> Project.ts 22 - val load_notes : string -> Note.ts 23 - val load_ideas : string -> Idea.ts 24 - val load_videos : string -> Video.ts 25 - val load_images : string -> Srcsetter.ts 26 - val load_papers : string -> Paper.ts 27 - val load : string -> Entry.t
-172
stack/bushel/lib/contact.ml
··· 1 - type t = 2 - { names : string list 3 - ; handle : string 4 - ; email : string option 5 - ; icon : string option 6 - ; github : string option 7 - ; twitter : string option 8 - ; bluesky : string option 9 - ; mastodon : string option 10 - ; orcid : string option 11 - ; url : string option 12 - ; atom : string list option 13 - } 14 - 15 - type ts = t list 16 - 17 - let v ?email ?github ?twitter ?bluesky ?mastodon ?orcid ?icon ?url ?atom handle names = 18 - { names; handle; email; github; twitter; bluesky; mastodon; orcid; url; icon; atom } 19 - ;; 20 - 21 - let make names email icon github twitter bluesky mastodon orcid url atom = 22 - v ?email ?github ?twitter ?bluesky ?mastodon ?orcid ?icon ?url ?atom "" names 23 - ;; 24 - 25 - let names { names; _ } = names 26 - let name { names; _ } = List.hd names 27 - let handle { handle; _ } = handle 28 - let email { email; _ } = email 29 - let icon { icon; _ } = icon 30 - let github { github; _ } = github 31 - let twitter { twitter; _ } = twitter 32 - let bluesky { bluesky; _ } = bluesky 33 - let mastodon { mastodon; _ } = mastodon 34 - let orcid { orcid; _ } = orcid 35 - let url { url; _ } = url 36 - let atom { atom; _ } = atom 37 - 38 - let json_t = 39 - let open Jsont in 40 - let open Jsont.Object in 41 - let mem_opt f v ~enc = mem f v ~dec_absent:None ~enc_omit:Option.is_none ~enc in 42 - map ~kind:"Contact" make 43 - |> mem "names" (list string) ~dec_absent:[] ~enc:names 44 - |> mem_opt "email" (some string) ~enc:email 45 - |> mem_opt "icon" (some string) ~enc:icon 46 - |> mem_opt "github" (some string) ~enc:github 47 - |> mem_opt "twitter" (some string) ~enc:twitter 48 - |> mem_opt "bluesky" (some string) ~enc:bluesky 49 - |> mem_opt "mastodon" (some string) ~enc:mastodon 50 - |> mem_opt "orcid" (some string) ~enc:orcid 51 - |> mem_opt "url" (some string) ~enc:url 52 - |> mem_opt "atom" (some (list string)) ~enc:atom 53 - |> finish 54 - ;; 55 - 56 - let v = Jsont_bytesrw.decode_string (Jsont.list json_t) 57 - let compare a b = String.compare a.handle b.handle 58 - let find_by_handle ts h = List.find_opt (fun { handle; _ } -> handle = h) ts 59 - 60 - let best_url c = 61 - match c.url with 62 - | Some v -> Some v 63 - | None -> 64 - (match c.github with 65 - | Some v -> Some ("https://github.com/" ^ v) 66 - | None -> 67 - (match c.email with 68 - | Some v -> Some ("mailto:" ^ v) 69 - | None -> None)) 70 - ;; 71 - 72 - let of_md fname = 73 - (* TODO fix Jekyll_post to not error on no date *) 74 - let fname' = "2000-01-01-" ^ Filename.basename fname in 75 - let handle = Filename.basename fname |> Filename.chop_extension in 76 - match Jekyll_post.of_string ~fname:fname' (Util.read_file fname) with 77 - | Error (`Msg m) -> failwith ("contact_of_md: " ^ m) 78 - | Ok jp -> 79 - let fields = jp.Jekyll_post.fields |> Jekyll_format.fields_to_yaml in 80 - let c = Jsont_bytesrw.decode_string json_t (Ezjsonm.value_to_string fields) in 81 - (match c with 82 - | Error e -> failwith e 83 - | Ok c -> { c with handle }) 84 - ;; 85 - 86 - (* Given a name, turn it lowercase and return the concatenation of the 87 - initials of all the words in the name and the full last name. *) 88 - let handle_of_name name = 89 - let name = String.lowercase_ascii name in 90 - let words = String.split_on_char ' ' name in 91 - let initials = String.concat "" (List.map (fun w -> String.sub w 0 1) words) in 92 - initials ^ List.hd (List.rev words) 93 - ;; 94 - 95 - (* fuzzy lookup for an author. Strip out any non alpha numeric characters while 96 - searching for the name *) 97 - let lookup_by_name ts a = 98 - let a = String.lowercase_ascii a in 99 - let rec aux acc = function 100 - | [] -> acc 101 - | t :: ts -> 102 - if List.exists (fun n -> String.lowercase_ascii n = a) t.names 103 - then aux (t :: acc) ts 104 - else aux acc ts 105 - in 106 - match aux [] ts with 107 - | [ a ] -> a 108 - | [] -> raise (Failure ("contact.ml: author not found: " ^ a)) 109 - | _ -> raise (Failure ("ambiguous author: " ^ a)) 110 - ;; 111 - 112 - (* TODO:claude *) 113 - let typesense_schema = 114 - let open Ezjsonm in 115 - dict [ 116 - ("name", string "contacts"); 117 - ("fields", list (fun d -> dict d) [ 118 - [("name", string "id"); ("type", string "string")]; 119 - [("name", string "handle"); ("type", string "string")]; 120 - [("name", string "name"); ("type", string "string")]; 121 - [("name", string "names"); ("type", string "string[]"); ("optional", bool true)]; 122 - [("name", string "email"); ("type", string "string[]"); ("optional", bool true)]; 123 - [("name", string "icon"); ("type", string "string[]"); ("optional", bool true)]; 124 - [("name", string "github"); ("type", string "string[]"); ("optional", bool true)]; 125 - [("name", string "twitter"); ("type", string "string[]"); ("optional", bool true)]; 126 - [("name", string "bluesky"); ("type", string "string[]"); ("optional", bool true)]; 127 - [("name", string "mastodon"); ("type", string "string[]"); ("optional", bool true)]; 128 - [("name", string "orcid"); ("type", string "string[]"); ("optional", bool true)]; 129 - [("name", string "url"); ("type", string "string[]"); ("optional", bool true)]; 130 - [("name", string "atom"); ("type", string "string[]"); ("optional", bool true)]; 131 - ]); 132 - ] 133 - 134 - (** TODO:claude Pretty-print a contact with ANSI formatting *) 135 - let pp ppf c = 136 - let open Fmt in 137 - pf ppf "@[<v>"; 138 - pf ppf "%a: %a@," (styled `Bold string) "Type" (styled `Cyan string) "Contact"; 139 - pf ppf "%a: @%a@," (styled `Bold string) "Handle" string (handle c); 140 - pf ppf "%a: %a@," (styled `Bold string) "Name" string (name c); 141 - let ns = names c in 142 - if List.length ns > 1 then 143 - pf ppf "%a: @[<h>%a@]@," (styled `Bold string) "Aliases" (list ~sep:comma string) (List.tl ns); 144 - (match email c with 145 - | Some e -> pf ppf "%a: %a@," (styled `Bold string) "Email" string e 146 - | None -> ()); 147 - (match github c with 148 - | Some g -> pf ppf "%a: https://github.com/%a@," (styled `Bold string) "GitHub" string g 149 - | None -> ()); 150 - (match twitter c with 151 - | Some t -> pf ppf "%a: https://twitter.com/%a@," (styled `Bold string) "Twitter" string t 152 - | None -> ()); 153 - (match bluesky c with 154 - | Some b -> pf ppf "%a: %a@," (styled `Bold string) "Bluesky" string b 155 - | None -> ()); 156 - (match mastodon c with 157 - | Some m -> pf ppf "%a: %a@," (styled `Bold string) "Mastodon" string m 158 - | None -> ()); 159 - (match orcid c with 160 - | Some o -> pf ppf "%a: https://orcid.org/%a@," (styled `Bold string) "ORCID" string o 161 - | None -> ()); 162 - (match url c with 163 - | Some u -> pf ppf "%a: %a@," (styled `Bold string) "URL" string u 164 - | None -> ()); 165 - (match icon c with 166 - | Some i -> pf ppf "%a: %a@," (styled `Bold string) "Icon" string i 167 - | None -> ()); 168 - (match atom c with 169 - | Some atoms when atoms <> [] -> 170 - pf ppf "%a: @[<h>%a@]@," (styled `Bold string) "Atom Feeds" (list ~sep:comma string) atoms 171 - | _ -> ()); 172 - pf ppf "@]"
-25
stack/bushel/lib/contact.mli
··· 1 - type t 2 - type ts = t list 3 - 4 - val v : string -> (ts, string) result 5 - val names : t -> string list 6 - val name : t -> string 7 - val handle : t -> string 8 - val email : t -> string option 9 - val icon : t -> string option 10 - val github : t -> string option 11 - val twitter : t -> string option 12 - val bluesky : t -> string option 13 - val mastodon : t -> string option 14 - val orcid : t -> string option 15 - val url : t -> string option 16 - val atom : t -> string list option 17 - val best_url : t -> string option 18 - val find_by_handle : t list -> string -> t option 19 - val handle_of_name : string -> string 20 - val lookup_by_name : ts -> string -> t 21 - val json_t : t Jsont.t 22 - val compare : t -> t -> int 23 - val of_md : string -> t 24 - val typesense_schema : Ezjsonm.value 25 - val pp : Format.formatter -> t -> unit
-72
stack/bushel/lib/description.ml
··· 1 - (** Generate descriptive text for bushel entries *) 2 - 3 - (* Helper to format a date as "Month Year" *) 4 - let format_date date = 5 - let (year, month, _day) = date in 6 - let month_name = match month with 7 - | 1 -> "January" | 2 -> "February" | 3 -> "March" | 4 -> "April" 8 - | 5 -> "May" | 6 -> "June" | 7 -> "July" | 8 -> "August" 9 - | 9 -> "September" | 10 -> "October" | 11 -> "November" | 12 -> "December" 10 - | _ -> "" 11 - in 12 - Printf.sprintf "%s %d" month_name year 13 - 14 - (* Generate a descriptive sentence for a paper *) 15 - let paper_description (p : Paper.t) ~date_str = 16 - let venue = match String.lowercase_ascii (Paper.bibtype p) with 17 - | "inproceedings" -> Paper.booktitle p 18 - | "article" -> Paper.journal p 19 - | "book" -> 20 - let pub = Paper.publisher p in 21 - if pub = "" then "Book" else "Book by " ^ pub 22 - | "techreport" -> 23 - (try "Technical report at " ^ Paper.institution p 24 - with _ -> "Technical report") 25 - | "misc" -> 26 - let pub = Paper.publisher p in 27 - if pub = "" then "Working paper" else "Working paper at " ^ pub 28 - | _ -> "Publication" 29 - in 30 - Printf.sprintf "Paper in %s (%s)" venue date_str 31 - 32 - (* Generate a descriptive sentence for a note *) 33 - let note_description (n : Note.t) ~date_str ~lookup_fn = 34 - match Note.slug_ent n with 35 - | Some slug_ent -> 36 - (match lookup_fn slug_ent with 37 - | Some related_title -> 38 - Printf.sprintf "Note about %s (%s)" related_title date_str 39 - | None -> Printf.sprintf "Research note (%s)" date_str) 40 - | None -> Printf.sprintf "Research note (%s)" date_str 41 - 42 - (* Generate a descriptive sentence for an idea *) 43 - let idea_description (i : Idea.t) ~date_str = 44 - let status_str = String.lowercase_ascii (Idea.status_to_string (Idea.status i)) in 45 - let level_str = Idea.level_to_string (Idea.level i) in 46 - Printf.sprintf "Research idea (%s, %s level, %s)" status_str level_str date_str 47 - 48 - (* Generate a descriptive sentence for a video *) 49 - let video_description (v : Video.t) ~date_str ~lookup_fn = 50 - let video_type = if Video.talk v then "Talk video" else "Video" in 51 - let context = match Video.paper v with 52 - | Some paper_slug -> 53 - (match lookup_fn paper_slug with 54 - | Some title -> Printf.sprintf " about %s" title 55 - | None -> "") 56 - | None -> 57 - (match Video.project v with 58 - | Some project_slug -> 59 - (match lookup_fn project_slug with 60 - | Some title -> Printf.sprintf " about %s" title 61 - | None -> "") 62 - | None -> "") 63 - in 64 - Printf.sprintf "%s%s (%s)" video_type context date_str 65 - 66 - (* Generate a descriptive sentence for a project *) 67 - let project_description (pr : Project.t) = 68 - let end_str = match pr.Project.finish with 69 - | Some year -> string_of_int year 70 - | None -> "present" 71 - in 72 - Printf.sprintf "Project (%d–%s)" pr.Project.start end_str
-19
stack/bushel/lib/description.mli
··· 1 - (** Generate descriptive text for bushel entries *) 2 - 3 - (** Format a date as "Month Year" *) 4 - val format_date : int * int * int -> string 5 - 6 - (** Generate a descriptive sentence for a paper with date string *) 7 - val paper_description : Paper.t -> date_str:string -> string 8 - 9 - (** Generate a descriptive sentence for a note with date string and lookup function *) 10 - val note_description : Note.t -> date_str:string -> lookup_fn:(string -> string option) -> string 11 - 12 - (** Generate a descriptive sentence for an idea with date string *) 13 - val idea_description : Idea.t -> date_str:string -> string 14 - 15 - (** Generate a descriptive sentence for a video with date string and lookup function *) 16 - val video_description : Video.t -> date_str:string -> lookup_fn:(string -> string option) -> string 17 - 18 - (** Generate a descriptive sentence for a project *) 19 - val project_description : Project.t -> string
-147
stack/bushel/lib/doi_entry.ml
··· 1 - module J = Ezjsonm 2 - 3 - type status = 4 - | Resolved 5 - | Failed of string 6 - 7 - type t = { 8 - doi: string; 9 - title: string; 10 - authors: string list; 11 - year: int; 12 - bibtype: string; 13 - publisher: string; 14 - resolved_at: string; 15 - source_urls: string list; 16 - status: status; 17 - ignore: bool; 18 - } 19 - 20 - type ts = t list 21 - 22 - let create_resolved ~doi ~title ~authors ~year ~bibtype ~publisher ?(source_urls=[]) () = 23 - let resolved_at = 24 - let now = Ptime_clock.now () in 25 - let rfc3339 = Ptime.to_rfc3339 ~space:false ~frac_s:0 now in 26 - String.sub rfc3339 0 10 (* Extract YYYY-MM-DD *) 27 - in 28 - { doi; title; authors; year; bibtype; publisher; resolved_at; source_urls; status = Resolved; ignore = false } 29 - 30 - let create_failed ~doi ~error ?(source_urls=[]) () = 31 - let resolved_at = 32 - let now = Ptime_clock.now () in 33 - let rfc3339 = Ptime.to_rfc3339 ~space:false ~frac_s:0 now in 34 - String.sub rfc3339 0 10 (* Extract YYYY-MM-DD *) 35 - in 36 - { doi; title = ""; authors = []; year = 0; bibtype = ""; publisher = ""; 37 - resolved_at; source_urls; status = Failed error; ignore = false } 38 - 39 - let merge_entries old_entry new_entry = 40 - (* Combine source_urls, removing duplicates *) 41 - let combined_urls = 42 - List.sort_uniq String.compare (old_entry.source_urls @ new_entry.source_urls) 43 - in 44 - (* Use new_entry's data but with combined URLs and preserve ignore flag from old entry *) 45 - { new_entry with source_urls = combined_urls; ignore = old_entry.ignore } 46 - 47 - let to_yaml_value entry = 48 - let status_field = match entry.status with 49 - | Resolved -> [] 50 - | Failed err -> [("error", `String err)] 51 - in 52 - let source_urls_field = match entry.source_urls with 53 - | [] -> [] 54 - | urls -> [("source_urls", `A (List.map (fun url -> `String url) urls))] 55 - in 56 - let ignore_field = if entry.ignore then [("ignore", `Bool true)] else [] in 57 - let fields = [ 58 - ("doi", `String entry.doi); 59 - ("resolved_at", `String entry.resolved_at); 60 - ] @ status_field @ source_urls_field @ ignore_field in 61 - let fields = match entry.status with 62 - | Resolved -> 63 - fields @ [ 64 - ("title", `String entry.title); 65 - ("authors", `A (List.map (fun a -> `String a) entry.authors)); 66 - ("year", `Float (float_of_int entry.year)); 67 - ("bibtype", `String entry.bibtype); 68 - ("publisher", `String entry.publisher); 69 - ] 70 - | Failed _ -> fields 71 - in 72 - `O fields 73 - 74 - let of_yaml_value v = 75 - try 76 - let doi = J.find v ["doi"] |> J.get_string in 77 - let resolved_at = J.find v ["resolved_at"] |> J.get_string in 78 - (* Support both old source_url (single) and new source_urls (list) for backwards compatibility *) 79 - let source_urls = 80 - try 81 - J.find v ["source_urls"] |> J.get_list J.get_string 82 - with _ -> 83 - try 84 - let single_url = J.find v ["source_url"] |> J.get_string in 85 - [single_url] 86 - with _ -> [] 87 - in 88 - let ignore = try J.find v ["ignore"] |> J.get_bool with _ -> false in 89 - let error = try Some (J.find v ["error"] |> J.get_string) with _ -> None in 90 - match error with 91 - | Some err -> 92 - { doi; title = ""; authors = []; year = 0; bibtype = ""; publisher = ""; 93 - resolved_at; source_urls; status = Failed err; ignore } 94 - | None -> 95 - let title = J.find v ["title"] |> J.get_string in 96 - let authors = J.find v ["authors"] |> J.get_list J.get_string in 97 - let year = J.find v ["year"] |> J.get_float |> int_of_float in 98 - let bibtype = J.find v ["bibtype"] |> J.get_string in 99 - let publisher = J.find v ["publisher"] |> J.get_string in 100 - { doi; title; authors; year; bibtype; publisher; resolved_at; source_urls; status = Resolved; ignore } 101 - with e -> 102 - Printf.eprintf "Failed to parse DOI entry: %s\n%!" (Printexc.to_string e); 103 - failwith "Invalid DOI entry in YAML" 104 - 105 - let load path = 106 - if not (Sys.file_exists path) then 107 - [] 108 - else 109 - try 110 - let yaml_str = In_channel.with_open_text path In_channel.input_all in 111 - match Yaml.of_string yaml_str with 112 - | Ok (`A entries) -> List.map of_yaml_value entries 113 - | Ok _ -> [] 114 - | Error (`Msg e) -> 115 - Printf.eprintf "Failed to parse %s: %s\n%!" path e; 116 - [] 117 - with e -> 118 - Printf.eprintf "Failed to load %s: %s\n%!" path (Printexc.to_string e); 119 - [] 120 - 121 - let save path entries = 122 - let yaml_list = `A (List.map to_yaml_value entries) in 123 - let yaml_str = Yaml.to_string_exn yaml_list in 124 - Out_channel.with_open_text path (fun oc -> 125 - Out_channel.output_string oc yaml_str 126 - ) 127 - 128 - let to_map entries = 129 - let map = Hashtbl.create (List.length entries) in 130 - List.iter (fun entry -> Hashtbl.add map entry.doi entry) entries; 131 - map 132 - 133 - let find_by_doi entries doi = 134 - List.find_opt (fun entry -> not entry.ignore && entry.doi = doi) entries 135 - 136 - let find_by_url entries url = 137 - List.find_opt (fun entry -> 138 - not entry.ignore && List.mem url entry.source_urls 139 - ) entries 140 - 141 - let find_by_doi_including_ignored entries doi = 142 - List.find_opt (fun entry -> entry.doi = doi) entries 143 - 144 - let find_by_url_including_ignored entries url = 145 - List.find_opt (fun entry -> 146 - List.mem url entry.source_urls 147 - ) entries
-51
stack/bushel/lib/doi_entry.mli
··· 1 - (** DOI entries resolved from external sources via Zotero Translation Server *) 2 - 3 - type status = 4 - | Resolved (** Successfully resolved from Zotero *) 5 - | Failed of string (** Failed to resolve, with error message *) 6 - 7 - type t = { 8 - doi: string; 9 - title: string; 10 - authors: string list; 11 - year: int; 12 - bibtype: string; (** article, inproceedings, book, etc *) 13 - publisher: string; (** journal/conference/publisher name *) 14 - resolved_at: string; (** ISO date when resolved *) 15 - source_urls: string list; (** All URLs that resolve to this DOI (publisher links, doi.org URLs, etc) *) 16 - status: status; 17 - ignore: bool; (** If true, skip this entry when looking up references *) 18 - } 19 - 20 - type ts = t list 21 - 22 - (** Load DOI entries from YAML file *) 23 - val load : string -> ts 24 - 25 - (** Save DOI entries to YAML file *) 26 - val save : string -> ts -> unit 27 - 28 - (** Convert list to hashtable for fast lookup by DOI *) 29 - val to_map : ts -> (string, t) Hashtbl.t 30 - 31 - (** Find entry by DOI (excludes ignored entries) *) 32 - val find_by_doi : ts -> string -> t option 33 - 34 - (** Find entry by source URL (searches through all source_urls, excludes ignored entries) *) 35 - val find_by_url : ts -> string -> t option 36 - 37 - (** Find entry by DOI including ignored entries (for resolution checks) *) 38 - val find_by_doi_including_ignored : ts -> string -> t option 39 - 40 - (** Find entry by source URL including ignored entries (for resolution checks) *) 41 - val find_by_url_including_ignored : ts -> string -> t option 42 - 43 - (** Create a new resolved entry *) 44 - val create_resolved : doi:string -> title:string -> authors:string list -> 45 - year:int -> bibtype:string -> publisher:string -> ?source_urls:string list -> unit -> t 46 - 47 - (** Create a new failed entry *) 48 - val create_failed : doi:string -> error:string -> ?source_urls:string list -> unit -> t 49 - 50 - (** Merge two entries with the same DOI, combining their source_urls *) 51 - val merge_entries : t -> t -> t
-19
stack/bushel/lib/dune
··· 1 - (library 2 - (name bushel) 3 - (public_name bushel) 4 - (libraries 5 - cmarkit 6 - uri 7 - jsont 8 - jsont.bytesrw 9 - ezjsonm 10 - ptime 11 - yaml.unix 12 - jekyll-format 13 - eio 14 - requests 15 - fmt 16 - re 17 - ptime.clock 18 - ptime.clock.os 19 - typesense-client))
-449
stack/bushel/lib/entry.ml
··· 1 - type entry = 2 - [ `Paper of Paper.t 3 - | `Project of Project.t 4 - | `Idea of Idea.t 5 - | `Video of Video.t 6 - | `Note of Note.t 7 - ] 8 - 9 - type slugs = (string, entry) Hashtbl.t 10 - 11 - type t = 12 - { slugs : slugs 13 - ; papers : Paper.ts 14 - ; old_papers : Paper.ts 15 - ; notes : Note.ts 16 - ; projects : Project.ts 17 - ; ideas : Idea.ts 18 - ; videos : Video.ts 19 - ; contacts : Contact.ts 20 - ; images : Srcsetter.ts 21 - ; doi_entries : Doi_entry.ts 22 - ; data_dir : string 23 - } 24 - 25 - let contacts { contacts; _ } = contacts 26 - let videos { videos; _ } = videos 27 - let ideas { ideas; _ } = ideas 28 - let papers { papers; _ } = papers 29 - let notes { notes; _ } = notes 30 - let projects { projects; _ } = projects 31 - let images { images; _ } = images 32 - let doi_entries { doi_entries; _ } = doi_entries 33 - let data_dir { data_dir; _ } = data_dir 34 - 35 - let v ~papers ~notes ~projects ~ideas ~videos ~contacts ~images ~data_dir = 36 - let slugs : slugs = Hashtbl.create 42 in 37 - let papers, old_papers = List.partition (fun p -> p.Paper.latest) papers in 38 - List.iter (fun n -> Hashtbl.add slugs n.Note.slug (`Note n)) notes; 39 - List.iter (fun p -> Hashtbl.add slugs p.Project.slug (`Project p)) projects; 40 - List.iter (fun i -> Hashtbl.add slugs i.Idea.slug (`Idea i)) ideas; 41 - List.iter (fun v -> Hashtbl.add slugs v.Video.slug (`Video v)) videos; 42 - List.iter (fun p -> Hashtbl.add slugs p.Paper.slug (`Paper p)) papers; 43 - (* Load DOI entries from doi.yml *) 44 - let doi_yml_path = Filename.concat data_dir "doi.yml" in 45 - let doi_entries = Doi_entry.load doi_yml_path in 46 - { slugs; papers; old_papers; notes; projects; ideas; videos; images; contacts; doi_entries; data_dir } 47 - ;; 48 - 49 - let lookup { slugs; _ } slug = Hashtbl.find_opt slugs slug 50 - let lookup_exn { slugs; _ } slug = Hashtbl.find slugs slug 51 - 52 - let old_papers { old_papers; _ } = old_papers 53 - 54 - let sidebar = function 55 - | `Note { Note.sidebar = Some s; _ } -> Some s 56 - | _ -> None 57 - ;; 58 - 59 - let to_type_string = function 60 - | `Paper _ -> "paper" 61 - | `Note _ -> "note" 62 - | `Project _ -> "project" 63 - | `Idea _ -> "idea" 64 - | `Video _ -> "video" 65 - ;; 66 - 67 - let synopsis = function 68 - | `Note n -> Note.synopsis n 69 - | _ -> None 70 - ;; 71 - 72 - let slug = function 73 - | `Paper p -> p.Paper.slug 74 - | `Note n -> n.Note.slug 75 - | `Project p -> p.Project.slug 76 - | `Idea i -> i.Idea.slug 77 - | `Video v -> v.Video.slug 78 - ;; 79 - 80 - let title = function 81 - | `Paper p -> Paper.title p 82 - | `Note n -> Note.title n 83 - | `Project p -> Project.title p 84 - | `Idea i -> Idea.title i 85 - | `Video v -> Video.title v 86 - ;; 87 - 88 - let body = function 89 - | `Paper _ -> "" 90 - | `Note n -> Note.body n 91 - | `Project p -> Project.body p 92 - | `Idea i -> Idea.body i 93 - | `Video _ -> "" 94 - ;; 95 - 96 - let site_url = function 97 - | `Paper p -> "/papers/" ^ p.Paper.slug 98 - | `Note n -> "/notes/" ^ n.Note.slug 99 - | `Project p -> "/projects/" ^ p.Project.slug 100 - | `Idea i -> "/ideas/" ^ i.Idea.slug 101 - | `Video v -> "/videos/" ^ v.Video.slug 102 - ;; 103 - 104 - (** Extract external URLs from markdown content *) 105 - let extract_external_links md = 106 - let open Cmarkit in 107 - let urls = ref [] in 108 - 109 - let is_external_url url = 110 - (* XXX FIXME *) 111 - let is_bushel_slug = String.starts_with ~prefix:":" in 112 - let is_tag_slug = String.starts_with ~prefix:"##" in 113 - if is_bushel_slug url || is_tag_slug url then false 114 - else 115 - try 116 - let uri = Uri.of_string url in 117 - match Uri.scheme uri with 118 - | Some s when s = "http" || s = "https" -> true 119 - | Some _ -> true (* Any other scheme is considered external *) 120 - | None -> false (* Local references or relative paths *) 121 - with _ -> false 122 - in 123 - 124 - let inline_mapper _ = function 125 - | Inline.Link (lb, _) | Inline.Image (lb, _) -> 126 - let ref = Inline.Link.reference lb in 127 - (match ref with 128 - | `Inline (ld, _) -> 129 - (match Link_definition.dest ld with 130 - | Some (url, _) when is_external_url url -> 131 - urls := url :: !urls; 132 - Mapper.default 133 - | _ -> Mapper.default) 134 - | `Ref (_, _, l) -> 135 - (* Get the referenced label definition and extract URL if it exists *) 136 - let defs = Doc.defs (Doc.of_string ~strict:false md) in 137 - (match Label.Map.find_opt (Label.key l) defs with 138 - | Some (Link_definition.Def (ld, _)) -> 139 - (match Link_definition.dest ld with 140 - | Some (url, _) when is_external_url url -> 141 - urls := url :: !urls 142 - | _ -> ()) 143 - | _ -> ()); 144 - Mapper.default) 145 - | Inline.Autolink (autolink, _) -> 146 - let url = Inline.Autolink.link autolink |> fst in 147 - if not (Inline.Autolink.is_email autolink) && is_external_url url then 148 - urls := url :: !urls; 149 - Mapper.default 150 - | _ -> Mapper.default 151 - in 152 - 153 - let mapper = Mapper.make ~inline:inline_mapper () in 154 - let doc = Doc.of_string ~strict:false md in 155 - let _ = Mapper.map_doc mapper doc in 156 - List.sort_uniq String.compare !urls 157 - 158 - let outgoing_links e = extract_external_links (body e) 159 - 160 - let lookup_site_url t slug = 161 - match lookup t slug with 162 - | Some ent -> site_url ent 163 - | None -> "" 164 - 165 - let lookup_title t slug = 166 - match lookup t slug with 167 - | Some ent -> title ent 168 - | None -> "" 169 - 170 - 171 - let date (x : entry) = 172 - match x with 173 - | `Paper p -> Paper.date p 174 - | `Note n -> Note.date n 175 - | `Project p -> p.Project.start, 1, 1 176 - | `Idea i -> i.Idea.year, i.Idea.month, 1 177 - | `Video v -> Video.date v 178 - ;; 179 - 180 - let datetime v = date v |> Ptime.of_date |> Option.get 181 - 182 - let year x = 183 - match date x with 184 - | y, _, _ -> y 185 - ;; 186 - 187 - let is_index_entry = function 188 - | `Note { Note.index_page; _ } -> index_page 189 - | _ -> false 190 - ;; 191 - 192 - let notes_for_slug { notes; _ } slug = 193 - List.filter (fun n -> match Note.slug_ent n with Some s -> s = slug | None -> false) notes 194 - let all_entries { slugs; _ } = Hashtbl.fold (fun _ v acc -> v :: acc) slugs [] 195 - 196 - let all_papers { papers; old_papers; _ } = 197 - List.map (fun x -> `Paper x) (papers @ old_papers) 198 - ;; 199 - 200 - let compare a b = 201 - let datetime v = Option.get (Ptime.of_date v) in 202 - let da = datetime (date a) in 203 - let db = datetime (date b) in 204 - if da = db then compare (title a) (title b) else Ptime.compare da db 205 - ;; 206 - 207 - let lookup_by_name {contacts;_} n = 208 - match Contact.lookup_by_name contacts n with 209 - | v -> Some v 210 - | exception _ -> None 211 - 212 - (** Extract the first image URL from markdown text *) 213 - let extract_first_image md = 214 - let open Cmarkit in 215 - (* Don't use bushel link resolver to avoid circular dependency *) 216 - let doc = Doc.of_string md in 217 - let found_image = ref None in 218 - 219 - let find_image_in_inline _mapper = function 220 - | Inline.Image (img, _) -> 221 - (match Inline.Link.reference img with 222 - | `Inline (ld, _) -> 223 - (match Link_definition.dest ld with 224 - | Some (url, _) when !found_image = None -> 225 - found_image := Some url; 226 - Mapper.default 227 - | _ -> Mapper.default) 228 - | _ -> Mapper.default) 229 - | _ -> Mapper.default 230 - in 231 - 232 - let mapper = Mapper.make ~inline:find_image_in_inline () in 233 - let _ = Mapper.map_doc mapper doc in 234 - !found_image 235 - ;; 236 - 237 - (** Extract the first video slug from markdown text by looking for bushel video links *) 238 - let extract_first_video entries md = 239 - let open Cmarkit in 240 - let doc = Doc.of_string md in 241 - let found_video = ref None in 242 - 243 - let find_video_in_inline _mapper = function 244 - | Inline.Link (link, _) -> 245 - (match Inline.Link.reference link with 246 - | `Inline (ld, _) -> 247 - (match Link_definition.dest ld with 248 - | Some (url, _) when !found_video = None && String.starts_with ~prefix:":" url -> 249 - (* Check if this is a video slug *) 250 - let slug = String.sub url 1 (String.length url - 1) in 251 - (match lookup entries slug with 252 - | Some (`Video v) -> 253 - found_video := Some (Video.uuid v); 254 - Mapper.default 255 - | _ -> Mapper.default) 256 - | _ -> Mapper.default) 257 - | _ -> Mapper.default) 258 - | _ -> Mapper.default 259 - in 260 - 261 - let mapper = Mapper.make ~inline:find_video_in_inline () in 262 - let _ = Mapper.map_doc mapper doc in 263 - !found_video 264 - ;; 265 - 266 - (** Look up an image in the srcsetter list by slug *) 267 - let lookup_image { images; _ } slug = 268 - List.find_opt (fun img -> Srcsetter.slug img = slug) images 269 - 270 - (** Get the smallest webp variant from a srcsetter image - prefers size just above 480px *) 271 - let smallest_webp_variant img = 272 - let variants = Srcsetter.variants img in 273 - let webp_variants = 274 - Srcsetter.MS.bindings variants 275 - |> List.filter (fun (name, _) -> String.ends_with ~suffix:".webp" name) 276 - in 277 - match webp_variants with 278 - | [] -> 279 - (* No webp variants - use the name field which is always webp *) 280 - "/images/" ^ Srcsetter.name img 281 - | variants -> 282 - (* Prefer variants with width > 480px, choosing the smallest one above 480 *) 283 - let large_variants = List.filter (fun (_, (w, _)) -> w > 480) variants in 284 - let candidates = if large_variants = [] then variants else large_variants in 285 - (* Find the smallest variant from candidates *) 286 - let smallest = List.fold_left (fun acc (name, (w, h)) -> 287 - match acc with 288 - | None -> Some (name, w, h) 289 - | Some (_, min_w, _) when w < min_w -> Some (name, w, h) 290 - | _ -> acc 291 - ) None candidates in 292 - match smallest with 293 - | Some (name, _, _) -> "/images/" ^ name 294 - | None -> "/images/" ^ Srcsetter.name img 295 - 296 - (** Get thumbnail slug for a contact *) 297 - let contact_thumbnail_slug contact = 298 - (* Contact images use just the handle as slug *) 299 - Some (Contact.handle contact) 300 - 301 - (** Get thumbnail URL for a contact - resolved through srcsetter *) 302 - let contact_thumbnail entries contact = 303 - match contact_thumbnail_slug contact with 304 - | None -> None 305 - | Some thumb_slug -> 306 - match lookup_image entries thumb_slug with 307 - | Some img -> Some (smallest_webp_variant img) 308 - | None -> None (* Image not in srcsetter - thumbnails are optional *) 309 - 310 - (** Get thumbnail slug for an entry with fallbacks *) 311 - let rec thumbnail_slug entries = function 312 - | `Paper p -> 313 - (* Slug is just the paper slug, directory is in the origin path *) 314 - Some (Paper.slug p) 315 - 316 - | `Video v -> 317 - (* Videos use their UUID as the slug *) 318 - Some (Video.uuid v) 319 - 320 - | `Project p -> 321 - (* Project images use "project-{slug}" format *) 322 - Some (Printf.sprintf "project-%s" p.Project.slug) 323 - 324 - | `Idea i -> 325 - let is_active = match Idea.status i with 326 - | Idea.Available | Idea.Discussion | Idea.Ongoing -> true 327 - | Idea.Completed | Idea.Expired -> false 328 - in 329 - if is_active then 330 - (* Use first supervisor's face image *) 331 - let supervisors = Idea.supervisors i in 332 - match supervisors with 333 - | sup :: _ -> 334 - let handle = if String.length sup > 0 && sup.[0] = '@' 335 - then String.sub sup 1 (String.length sup - 1) 336 - else sup 337 - in 338 - (match Contact.find_by_handle (contacts entries) handle with 339 - | Some c -> 340 - (* Contact images use just the handle as slug *) 341 - Some (Contact.handle c) 342 - | None -> 343 - (* Fallback to project thumbnail *) 344 - let project_slug = Idea.project i in 345 - (match lookup entries project_slug with 346 - | Some p -> thumbnail_slug entries p 347 - | None -> None)) 348 - | [] -> 349 - (* No supervisors, use project thumbnail *) 350 - let project_slug = Idea.project i in 351 - (match lookup entries project_slug with 352 - | Some p -> thumbnail_slug entries p 353 - | None -> None) 354 - else 355 - (* Use project thumbnail for completed/expired ideas *) 356 - let project_slug = Idea.project i in 357 - (match lookup entries project_slug with 358 - | Some p -> thumbnail_slug entries p 359 - | None -> None) 360 - 361 - | `Note n -> 362 - (* Use titleimage if set, otherwise extract first image from body, then try video, otherwise use slug_ent's thumbnail *) 363 - (match Note.titleimage n with 364 - | Some slug -> 365 - (* Always treat titleimage as a bushel slug (without ':' prefix) *) 366 - Some slug 367 - | None -> 368 - (* Extract first image from markdown body *) 369 - match extract_first_image (Note.body n) with 370 - | Some url when String.starts_with ~prefix:":" url -> 371 - Some (String.sub url 1 (String.length url - 1)) 372 - | Some _ -> None 373 - | None -> 374 - (* Try extracting first video from markdown body *) 375 - match extract_first_video entries (Note.body n) with 376 - | Some video_uuid -> Some video_uuid 377 - | None -> 378 - (* Fallback to slug_ent's thumbnail if present *) 379 - match Note.slug_ent n with 380 - | Some slug_ent -> 381 - (match lookup entries slug_ent with 382 - | Some entry -> thumbnail_slug entries entry 383 - | None -> None) 384 - | None -> None) 385 - 386 - (** Get thumbnail URL for an entry with fallbacks - resolved through srcsetter *) 387 - let thumbnail entries entry = 388 - match thumbnail_slug entries entry with 389 - | None -> None 390 - | Some thumb_slug -> 391 - match lookup_image entries thumb_slug with 392 - | Some img -> Some (smallest_webp_variant img) 393 - | None -> 394 - (* For projects, fallback to supervisor faces if project image doesn't exist *) 395 - (match entry with 396 - | `Project p -> 397 - (* Find ideas for this project *) 398 - let project_ideas = List.filter (fun idea -> 399 - Idea.project idea = ":" ^ p.Project.slug 400 - ) (ideas entries) in 401 - (* Collect all unique supervisors from these ideas *) 402 - let all_supervisors = 403 - List.fold_left (fun acc idea -> 404 - List.fold_left (fun acc2 sup -> 405 - if List.mem sup acc2 then acc2 else sup :: acc2 406 - ) acc (Idea.supervisors idea) 407 - ) [] project_ideas 408 - in 409 - (* Split into avsm and others, preferring others first *) 410 - let (others, avsm) = List.partition (fun sup -> 411 - let handle = if String.length sup > 0 && sup.[0] = '@' 412 - then String.sub sup 1 (String.length sup - 1) 413 - else sup 414 - in 415 - handle <> "avsm" 416 - ) all_supervisors in 417 - (* Try supervisors in order: others first, then avsm *) 418 - let ordered_supervisors = others @ avsm in 419 - (* Try each supervisor's face image *) 420 - let rec try_supervisors = function 421 - | [] -> None 422 - | sup :: rest -> 423 - let handle = if String.length sup > 0 && sup.[0] = '@' 424 - then String.sub sup 1 (String.length sup - 1) 425 - else sup 426 - in 427 - (match Contact.find_by_handle (contacts entries) handle with 428 - | Some c -> 429 - (match lookup_image entries (Contact.handle c) with 430 - | Some img -> Some (smallest_webp_variant img) 431 - | None -> try_supervisors rest) 432 - | None -> try_supervisors rest) 433 - in 434 - try_supervisors ordered_supervisors 435 - | _ -> None) 436 - 437 - (** Get thumbnail URL for a note with slug_ent *) 438 - let thumbnail_note_with_ent entries note_item = 439 - (* Use linked entry's thumbnail if slug_ent is set *) 440 - match Note.slug_ent note_item with 441 - | Some slug_ent -> 442 - (match lookup entries (":" ^ slug_ent) with 443 - | Some entry -> thumbnail entries entry 444 - | None -> 445 - (* Fallback to extracting first image from note body *) 446 - extract_first_image (Note.body note_item)) 447 - | None -> 448 - (* No slug_ent, extract from note body *) 449 - extract_first_image (Note.body note_item)
-79
stack/bushel/lib/entry.mli
··· 1 - type entry = 2 - [ `Idea of Idea.t 3 - | `Note of Note.t 4 - | `Paper of Paper.t 5 - | `Project of Project.t 6 - | `Video of Video.t 7 - ] 8 - 9 - type slugs = (string, entry) Hashtbl.t 10 - type t 11 - 12 - val contacts : t -> Contact.ts 13 - val videos : t -> Video.ts 14 - val ideas : t -> Idea.ts 15 - val papers : t -> Paper.ts 16 - val notes : t -> Note.ts 17 - val projects : t -> Project.ts 18 - val images : t -> Srcsetter.ts 19 - val doi_entries : t -> Doi_entry.ts 20 - val data_dir : t -> string 21 - 22 - val v 23 - : papers:Paper.t list 24 - -> notes:Note.ts 25 - -> projects:Project.ts 26 - -> ideas:Idea.ts 27 - -> videos:Video.ts 28 - -> contacts:Contact.ts 29 - -> images:Srcsetter.ts 30 - -> data_dir:string 31 - -> t 32 - 33 - val lookup : t -> string -> entry option 34 - val lookup_exn : t -> string -> entry 35 - val lookup_site_url : t -> string -> string 36 - val lookup_title : t -> string -> string 37 - val lookup_by_name : t -> string -> Contact.t option 38 - val old_papers : t -> Paper.ts 39 - val sidebar : [> `Note of Note.t ] -> string option 40 - val to_type_string : entry -> string 41 - val slug : entry -> string 42 - val title : entry -> string 43 - val body : entry -> string 44 - val extract_external_links : string -> string list 45 - val outgoing_links : entry -> string list 46 - 47 - (* FIXME move to view *) 48 - val site_url : entry -> string 49 - val date : entry -> Ptime.date 50 - val datetime : entry -> Ptime.t 51 - val year : entry -> int 52 - val synopsis : entry -> string option 53 - 54 - val is_index_entry : entry -> bool 55 - val notes_for_slug : t -> string -> Note.t list 56 - val all_entries : t -> entry list 57 - val all_papers : t -> entry list 58 - val compare : entry -> entry -> int 59 - 60 - (** Look up an image in the srcsetter list by slug *) 61 - val lookup_image : t -> string -> Srcsetter.t option 62 - 63 - (** Get the smallest webp variant from a srcsetter image *) 64 - val smallest_webp_variant : Srcsetter.t -> string 65 - 66 - (** Get thumbnail slug for a contact *) 67 - val contact_thumbnail_slug : Contact.t -> string option 68 - 69 - (** Get thumbnail URL for a contact - resolved through srcsetter *) 70 - val contact_thumbnail : t -> Contact.t -> string option 71 - 72 - (** Get thumbnail slug for an entry with fallbacks *) 73 - val thumbnail_slug : t -> entry -> string option 74 - 75 - (** Get thumbnail URL for an entry with fallbacks - resolved through srcsetter *) 76 - val thumbnail : t -> entry -> string option 77 - 78 - (** Get thumbnail URL for a note with slug_ent *) 79 - val thumbnail_note_with_ent : t -> Note.t -> string option
-223
stack/bushel/lib/idea.ml
··· 1 - type level = 2 - | Any 3 - | PartII 4 - | MPhil 5 - | PhD 6 - | Postdoc 7 - 8 - let level_of_yaml = function 9 - | `String ("Any" | "any") -> Ok Any 10 - | `String ("PartII" | "partii") -> Ok PartII 11 - | `String ("MPhil" | "mphil") -> Ok MPhil 12 - | `String ("PhD" | "phd") -> Ok PhD 13 - | `String ("postdoc" | "Postdoc") -> Ok Postdoc 14 - | _ -> Error (`Msg "level_of_yaml") 15 - ;; 16 - 17 - let level_to_string = function 18 - | Any -> "Any" 19 - | PartII -> "PartII" 20 - | MPhil -> "MPhil" 21 - | PhD -> "PhD" 22 - | Postdoc -> "postdoctoral" 23 - ;; 24 - 25 - let level_to_tag = function 26 - | Any -> "idea-beginner" 27 - | PartII -> "idea-medium" 28 - | MPhil -> "idea-hard" 29 - | PhD -> "idea-phd" 30 - | Postdoc -> "idea-postdoc" 31 - ;; 32 - 33 - let level_to_yaml s = `String (level_to_string s) 34 - 35 - type status = 36 - | Available 37 - | Discussion 38 - | Ongoing 39 - | Completed 40 - | Expired 41 - 42 - let status_of_yaml = function 43 - | `String ("Available" | "available") -> Ok Available 44 - | `String ("Discussion" | "discussion") -> Ok Discussion 45 - | `String ("Ongoing" | "ongoing") -> Ok Ongoing 46 - | `String ("Completed" | "completed") -> Ok Completed 47 - | `String ("Expired" | "expired") -> Ok Expired 48 - | _ -> Error (`Msg "status_of_yaml") 49 - ;; 50 - 51 - let status_to_string = function 52 - | Available -> "Available" 53 - | Discussion -> "Discussion" 54 - | Ongoing -> "Ongoing" 55 - | Completed -> "Completed" 56 - | Expired -> "Expired" 57 - ;; 58 - 59 - let status_to_tag = function 60 - | Available -> "idea-available" 61 - | Discussion -> "idea-discuss" 62 - | Ongoing -> "idea-ongoing" 63 - | Completed -> "idea-done" 64 - | Expired -> "idea-expired" 65 - ;; 66 - 67 - let status_to_yaml s = `String (status_to_string s) 68 - 69 - type t = 70 - { slug : string 71 - ; title : string 72 - ; level : level 73 - ; project : string 74 - ; status : status 75 - ; month: int 76 - ; year : int 77 - ; supervisors : string list 78 - ; students : string list 79 - ; reading : string 80 - ; body : string 81 - ; url : string option 82 - ; tags : string list 83 - } 84 - 85 - type ts = t list 86 - 87 - let title i = i.title 88 - let supervisors i = i.supervisors 89 - let students i = i.students 90 - let reading i = i.reading 91 - let status i = i.status 92 - let level i = i.level 93 - let year i = i.year 94 - let body i = i.body 95 - let project i = i.project 96 - 97 - let compare a b = 98 - match compare a.status b.status with 99 - | 0 -> 100 - (match a.status with 101 - | Completed -> compare b.year a.year 102 - | _ -> 103 - (match compare a.level b.level with 104 - | 0 -> begin 105 - match compare b.year a.year with 106 - | 0 -> compare b.month a.month 107 - | n -> n 108 - end 109 - | n -> n)) 110 - | n -> n 111 - ;; 112 - 113 - let of_md fname = 114 - match Jekyll_post.of_string ~fname:(Filename.basename fname) (Util.read_file fname) with 115 - | Error _ -> failwith "TODO" 116 - | Ok jp -> 117 - let fields = jp.Jekyll_post.fields in 118 - let y = Jekyll_format.fields_to_yaml fields in 119 - let year, month, _ = jp.Jekyll_post.date |> Ptime.to_date in 120 - let body = jp.Jekyll_post.body in 121 - let string f = Yaml.Util.(find_exn f y |> Option.get |> to_string |> Result.get_ok) in 122 - let string' f d = 123 - try Yaml.Util.(find_exn f y |> Option.get |> to_string |> Result.get_ok) with 124 - | _ -> d 125 - in 126 - let to_list = function 127 - | `A l -> Ok l 128 - | _ -> Error (`Msg "to_list") 129 - in 130 - let strings f = 131 - try 132 - Yaml.Util.( 133 - find_exn f y 134 - |> Option.get 135 - |> to_list 136 - |> Result.get_ok 137 - |> List.map (fun x -> to_string x |> Result.get_ok)) 138 - with 139 - | _exn -> [] 140 - in 141 - let level = 142 - Yaml.Util.(find_exn "level" y |> Option.get |> level_of_yaml |> Result.get_ok) 143 - in 144 - let status = 145 - Yaml.Util.(find_exn "status" y |> Option.get |> status_of_yaml |> Result.get_ok) 146 - in 147 - let slug = jp.Jekyll_post.slug in 148 - { slug 149 - ; title = string "title" 150 - ; level 151 - ; project = string "project" 152 - ; status 153 - ; supervisors = strings "supervisors" 154 - ; students = strings "students" 155 - ; tags = strings "tags" 156 - ; reading = string' "reading" "" 157 - ; month 158 - ; year 159 - ; body 160 - ; url = None (* TODO *) 161 - } 162 - ;; 163 - 164 - let lookup ideas slug = List.find_opt (fun i -> i.slug = slug) ideas 165 - 166 - (* TODO:claude *) 167 - let typesense_schema = 168 - let open Ezjsonm in 169 - dict [ 170 - ("name", string "ideas"); 171 - ("fields", list (fun d -> dict d) [ 172 - [("name", string "id"); ("type", string "string")]; 173 - [("name", string "title"); ("type", string "string")]; 174 - [("name", string "description"); ("type", string "string")]; 175 - [("name", string "year"); ("type", string "int32")]; 176 - [("name", string "date"); ("type", string "string")]; 177 - [("name", string "date_timestamp"); ("type", string "int64")]; 178 - [("name", string "tags"); ("type", string "string[]"); ("facet", bool true)]; 179 - [("name", string "level"); ("type", string "string"); ("facet", bool true)]; 180 - [("name", string "status"); ("type", string "string"); ("facet", bool true)]; 181 - [("name", string "project"); ("type", string "string"); ("facet", bool true)]; 182 - [("name", string "supervisors"); ("type", string "string[]"); ("optional", bool true)]; 183 - [("name", string "body"); ("type", string "string"); ("optional", bool true)]; 184 - [("name", string "students"); ("type", string "string[]"); ("optional", bool true)]; 185 - [("name", string "reading"); ("type", string "string"); ("optional", bool true)]; 186 - [("name", string "url"); ("type", string "string"); ("optional", bool true)]; 187 - ]); 188 - ("default_sorting_field", string "date_timestamp"); 189 - ] 190 - 191 - (** TODO:claude Pretty-print an idea with ANSI formatting *) 192 - let pp ppf i = 193 - let open Fmt in 194 - pf ppf "@[<v>"; 195 - pf ppf "%a: %a@," (styled `Bold string) "Type" (styled `Cyan string) "Idea"; 196 - pf ppf "%a: %a@," (styled `Bold string) "Slug" string i.slug; 197 - pf ppf "%a: %a@," (styled `Bold string) "Title" string (title i); 198 - pf ppf "%a: %a@," (styled `Bold string) "Level" string (level_to_string (level i)); 199 - pf ppf "%a: %a@," (styled `Bold string) "Status" string (status_to_string (status i)); 200 - pf ppf "%a: %a@," (styled `Bold string) "Project" string (project i); 201 - pf ppf "%a: %04d-%02d@," (styled `Bold string) "Date" (year i) i.month; 202 - let sups = supervisors i in 203 - if sups <> [] then 204 - pf ppf "%a: @[<h>%a@]@," (styled `Bold string) "Supervisors" (list ~sep:comma string) sups; 205 - let studs = students i in 206 - if studs <> [] then 207 - pf ppf "%a: @[<h>%a@]@," (styled `Bold string) "Students" (list ~sep:comma string) studs; 208 - (match i.url with 209 - | Some url -> pf ppf "%a: %a@," (styled `Bold string) "URL" string url 210 - | None -> ()); 211 - let t = i.tags in 212 - if t <> [] then 213 - pf ppf "%a: @[<h>%a@]@," (styled `Bold string) "Tags" (list ~sep:comma string) t; 214 - let r = reading i in 215 - if r <> "" then begin 216 - pf ppf "@,"; 217 - pf ppf "%a:@," (styled `Bold string) "Reading"; 218 - pf ppf "%a@," string r; 219 - end; 220 - pf ppf "@,"; 221 - pf ppf "%a:@," (styled `Bold string) "Body"; 222 - pf ppf "%a@," string (body i); 223 - pf ppf "@]"
-55
stack/bushel/lib/idea.mli
··· 1 - type level = 2 - | Any 3 - | PartII 4 - | MPhil 5 - | PhD 6 - | Postdoc 7 - 8 - type status = 9 - | Available 10 - | Discussion 11 - | Ongoing 12 - | Completed 13 - | Expired 14 - 15 - val level_of_yaml : Ezjsonm.value -> (level, [> `Msg of string ]) result 16 - val level_to_string : level -> string 17 - val level_to_tag : level -> string 18 - val level_to_yaml : level -> Ezjsonm.value 19 - val status_of_yaml : Ezjsonm.value -> (status, [> `Msg of string ]) result 20 - val status_to_string : status -> string 21 - val status_to_tag : status -> string 22 - val status_to_yaml : status -> Ezjsonm.value 23 - 24 - type t = 25 - { slug : string 26 - ; title : string 27 - ; level : level 28 - ; project : string 29 - ; status : status 30 - ; month : int 31 - ; year : int 32 - ; supervisors : string list 33 - ; students : string list 34 - ; reading : string 35 - ; body : string 36 - ; url : string option 37 - ; tags : string list 38 - } 39 - 40 - type ts = t list 41 - 42 - val title : t -> string 43 - val supervisors : t -> string list 44 - val students : t -> string list 45 - val reading : t -> string 46 - val status : t -> status 47 - val level : t -> level 48 - val year : t -> int 49 - val body : t -> string 50 - val project : t -> string 51 - val compare : t -> t -> int 52 - val lookup : t list -> string -> t option 53 - val of_md : string -> t 54 - val typesense_schema : Ezjsonm.value 55 - val pp : Format.formatter -> t -> unit
-296
stack/bushel/lib/link.ml
··· 1 - type karakeep_data = { 2 - remote_url : string; 3 - id : string; 4 - tags : string list; 5 - metadata : (string * string) list; 6 - } 7 - 8 - type bushel_data = { 9 - slugs : string list; 10 - tags : string list; 11 - } 12 - 13 - type t = { 14 - url : string; 15 - date : Ptime.date; 16 - description : string; 17 - karakeep : karakeep_data option; 18 - bushel : bushel_data option; 19 - } 20 - 21 - type ts = t list 22 - 23 - let url { url; _ } = url 24 - let date { date; _ } = date 25 - let description { description; _ } = description 26 - let datetime v = Option.get @@ Ptime.of_date @@ date v 27 - let compare a b = Ptime.compare (datetime b) (datetime a) 28 - 29 - (* Convert YAML to Link.t *) 30 - let t_of_yaml = function 31 - | `O fields -> 32 - let url = 33 - match List.assoc_opt "url" fields with 34 - | Some (`String v) -> v 35 - | _ -> failwith "link: missing or invalid url" 36 - in 37 - let date = 38 - match List.assoc_opt "date" fields with 39 - | Some (`String v) -> begin 40 - try 41 - match Scanf.sscanf v "%04d-%02d-%02d" (fun y m d -> (y, m, d)) with 42 - | (y, m, d) -> (y, m, d) 43 - with _ -> 44 - (* Fall back to RFC3339 parsing for backward compatibility *) 45 - v |> Ptime.of_rfc3339 |> Result.get_ok |> fun (a, _, _) -> Ptime.to_date a 46 - end 47 - | _ -> failwith "link: missing or invalid date" 48 - in 49 - let description = 50 - match List.assoc_opt "description" fields with 51 - | Some (`String v) -> v 52 - | _ -> "" 53 - in 54 - let karakeep = 55 - match List.assoc_opt "karakeep" fields with 56 - | Some (`O k_fields) -> 57 - let remote_url = 58 - match List.assoc_opt "remote_url" k_fields with 59 - | Some (`String v) -> v 60 - | _ -> failwith "link: invalid karakeep.remote_url" 61 - in 62 - let id = 63 - match List.assoc_opt "id" k_fields with 64 - | Some (`String v) -> v 65 - | _ -> failwith "link: invalid karakeep.id" 66 - in 67 - let tags = 68 - match List.assoc_opt "tags" k_fields with 69 - | Some (`A tag_list) -> 70 - List.fold_left (fun acc tag -> 71 - match tag with 72 - | `String t -> t :: acc 73 - | _ -> acc 74 - ) [] tag_list 75 - |> List.rev 76 - | _ -> [] 77 - in 78 - let metadata = 79 - match List.assoc_opt "metadata" k_fields with 80 - | Some (`O meta_fields) -> 81 - List.fold_left (fun acc (k, v) -> 82 - match v with 83 - | `String value -> (k, value) :: acc 84 - | _ -> acc 85 - ) [] meta_fields 86 - | _ -> [] 87 - in 88 - Some { remote_url; id; tags; metadata } 89 - | _ -> None 90 - in 91 - let bushel = 92 - match List.assoc_opt "bushel" fields with 93 - | Some (`O b_fields) -> 94 - let slugs = 95 - match List.assoc_opt "slugs" b_fields with 96 - | Some (`A slug_list) -> 97 - List.fold_left (fun acc slug -> 98 - match slug with 99 - | `String s -> s :: acc 100 - | _ -> acc 101 - ) [] slug_list 102 - |> List.rev 103 - | _ -> [] 104 - in 105 - let tags = 106 - match List.assoc_opt "tags" b_fields with 107 - | Some (`A tag_list) -> 108 - List.fold_left (fun acc tag -> 109 - match tag with 110 - | `String t -> t :: acc 111 - | _ -> acc 112 - ) [] tag_list 113 - |> List.rev 114 - | _ -> [] 115 - in 116 - Some { slugs; tags } 117 - | _ -> None 118 - in 119 - { url; date; description; karakeep; bushel } 120 - | _ -> failwith "invalid yaml" 121 - 122 - (* Read file contents *) 123 - let read_file file = In_channel.(with_open_bin file input_all) 124 - 125 - (* Load links from a YAML file *) 126 - let of_md fname = 127 - match Yaml.of_string_exn (read_file fname) with 128 - | `A links -> 129 - List.map t_of_yaml links 130 - | `O _ as single_link -> 131 - [t_of_yaml single_link] 132 - | _ -> failwith "link_of_md: expected array or object" 133 - 134 - (* Convert Link.t to YAML *) 135 - let to_yaml t = 136 - let (year, month, day) = t.date in 137 - let date_str = Printf.sprintf "%04d-%02d-%02d" year month day in 138 - 139 - (* Create base fields *) 140 - let base_fields = [ 141 - ("url", `String t.url); 142 - ("date", `String date_str); 143 - ] @ 144 - (if t.description = "" then [] else [("description", `String t.description)]) 145 - in 146 - 147 - (* Add karakeep data if present *) 148 - let karakeep_fields = 149 - match t.karakeep with 150 - | Some { remote_url; id; tags; metadata } -> 151 - let karakeep_obj = [ 152 - ("remote_url", `String remote_url); 153 - ("id", `String id); 154 - ] in 155 - let karakeep_obj = 156 - if tags = [] then karakeep_obj 157 - else ("tags", `A (List.map (fun t -> `String t) tags)) :: karakeep_obj 158 - in 159 - let karakeep_obj = 160 - if metadata = [] then karakeep_obj 161 - else ("metadata", `O (List.map (fun (k, v) -> (k, `String v)) metadata)) :: karakeep_obj 162 - in 163 - [("karakeep", `O karakeep_obj)] 164 - | None -> [] 165 - in 166 - 167 - (* Add bushel data if present *) 168 - let bushel_fields = 169 - match t.bushel with 170 - | Some { slugs; tags } -> 171 - let bushel_obj = [] in 172 - let bushel_obj = 173 - if slugs = [] then bushel_obj 174 - else ("slugs", `A (List.map (fun s -> `String s) slugs)) :: bushel_obj 175 - in 176 - let bushel_obj = 177 - if tags = [] then bushel_obj 178 - else ("tags", `A (List.map (fun t -> `String t) tags)) :: bushel_obj 179 - in 180 - if bushel_obj = [] then [] else [("bushel", `O bushel_obj)] 181 - | None -> [] 182 - in 183 - 184 - `O (base_fields @ karakeep_fields @ bushel_fields) 185 - 186 - (* Write a link to a file in the output directory *) 187 - let to_file output_dir t = 188 - let filename = 189 - let (y, m, d) = t.date in 190 - let hash = Digest.string t.url |> Digest.to_hex in 191 - let short_hash = String.sub hash 0 8 in 192 - Printf.sprintf "%04d-%02d-%02d-%s.md" y m d short_hash 193 - in 194 - let file_path = Fpath.v (Filename.concat output_dir filename) in 195 - let yaml = to_yaml t in 196 - let yaml_str = Yaml.to_string_exn yaml in 197 - let content = "---\n" ^ yaml_str ^ "---\n" in 198 - Bos.OS.File.write file_path content 199 - 200 - (* Load links from a YAML file *) 201 - let load_links_file path = 202 - try 203 - let yaml_str = In_channel.(with_open_bin path input_all) in 204 - match Yaml.of_string_exn yaml_str with 205 - | `A links -> List.map t_of_yaml links 206 - | _ -> [] 207 - with _ -> [] 208 - 209 - (* Save links to a YAML file *) 210 - let save_links_file path links = 211 - try 212 - let yaml = `A (List.map to_yaml links) in 213 - let yaml_str = Yaml.to_string_exn ~len:4200000 yaml in 214 - let oc = open_out path in 215 - output_string oc yaml_str; 216 - close_out oc 217 - with e -> 218 - Printf.eprintf "Error saving links file: %s\n%!" (Printexc.to_string e); 219 - Printf.eprintf "Attempting to save with smaller length limit...\n%!"; 220 - let yaml = `A (List.map to_yaml links) in 221 - let yaml_str = Yaml.to_string_exn ~len:800000 yaml in 222 - let oc = open_out path in 223 - output_string oc yaml_str; 224 - close_out oc 225 - 226 - (* Merge two lists of links, combining metadata from duplicates *) 227 - let merge_links ?(prefer_new_date=false) existing new_links = 228 - let links_by_url = Hashtbl.create (List.length existing) in 229 - 230 - (* Add existing links to hashtable *) 231 - List.iter (fun link -> 232 - Hashtbl.replace links_by_url link.url link 233 - ) existing; 234 - 235 - (* Merge new links with existing ones *) 236 - List.iter (fun new_link -> 237 - match Hashtbl.find_opt links_by_url new_link.url with 238 - | None -> 239 - (* New link not in existing links *) 240 - Hashtbl.add links_by_url new_link.url new_link 241 - | Some old_link -> 242 - (* Merge link data, prefer newer data for fields *) 243 - let title = 244 - if new_link.description <> "" then new_link.description 245 - else old_link.description 246 - in 247 - 248 - (* Combine karakeep data (prefer new over old) *) 249 - let karakeep = 250 - match new_link.karakeep, old_link.karakeep with 251 - | Some new_k, Some old_k when new_k.remote_url = old_k.remote_url -> 252 - (* Same remote, merge the data *) 253 - let merged_metadata = 254 - let meta_tbl = Hashtbl.create (List.length old_k.metadata) in 255 - List.iter (fun (k, v) -> Hashtbl.replace meta_tbl k v) old_k.metadata; 256 - List.iter (fun (k, v) -> Hashtbl.replace meta_tbl k v) new_k.metadata; 257 - Hashtbl.fold (fun k v acc -> (k, v) :: acc) meta_tbl [] 258 - in 259 - let merged_tags = List.sort_uniq String.compare (old_k.tags @ new_k.tags) in 260 - Some { new_k with metadata = merged_metadata; tags = merged_tags } 261 - | Some new_k, _ -> Some new_k 262 - | None, old_k -> old_k 263 - in 264 - 265 - (* Combine bushel data *) 266 - let bushel = 267 - match new_link.bushel, old_link.bushel with 268 - | Some new_b, Some old_b -> 269 - (* Merge slugs and tags *) 270 - let merged_slugs = List.sort_uniq String.compare (old_b.slugs @ new_b.slugs) in 271 - let merged_tags = List.sort_uniq String.compare (old_b.tags @ new_b.tags) in 272 - Some { slugs = merged_slugs; tags = merged_tags } 273 - | Some new_b, _ -> Some new_b 274 - | None, old_b -> old_b 275 - in 276 - 277 - (* Combined link - prefer new date when requested (for bushel entries) *) 278 - let date = 279 - if prefer_new_date then new_link.date 280 - else if compare new_link old_link > 0 then new_link.date 281 - else old_link.date 282 - in 283 - let merged_link = { 284 - url = new_link.url; 285 - date; 286 - description = title; 287 - karakeep; 288 - bushel 289 - } in 290 - Hashtbl.replace links_by_url new_link.url merged_link 291 - ) new_links; 292 - 293 - (* Convert hashtable back to list and sort by date *) 294 - Hashtbl.to_seq_values links_by_url 295 - |> List.of_seq 296 - |> List.sort compare
-34
stack/bushel/lib/link.mli
··· 1 - type karakeep_data = { 2 - remote_url : string; 3 - id : string; 4 - tags : string list; 5 - metadata : (string * string) list; 6 - } 7 - 8 - type bushel_data = { 9 - slugs : string list; 10 - tags : string list; 11 - } 12 - 13 - type t = { 14 - url : string; 15 - date : Ptime.date; 16 - description : string; 17 - karakeep : karakeep_data option; 18 - bushel : bushel_data option; 19 - } 20 - 21 - type ts = t list 22 - 23 - val compare : t -> t -> int 24 - val url : t -> string 25 - val date : t -> Ptime.date 26 - val datetime : t -> Ptime.t 27 - val description : t -> string 28 - val of_md : string -> ts 29 - val to_yaml : t -> Yaml.value 30 - val t_of_yaml : Yaml.value -> t 31 - val to_file : string -> t -> (unit, [> `Msg of string]) result 32 - val load_links_file : string -> ts 33 - val save_links_file : string -> ts -> unit 34 - val merge_links : ?prefer_new_date:bool -> ts -> ts -> ts
-781
stack/bushel/lib/md.ml
··· 1 - (** Bushel mappers for our Markdown extensions and utilities 2 - 3 - This module provides mappers to convert Bushel markdown extensions to different 4 - output formats. There are two main mappers: 5 - 6 - 1. {!make_bushel_inline_mapper} - Full sidenote mode for the main website 7 - - Converts Bushel links to interactive sidenotes 8 - - Includes entry previews, contact info, footnotes 9 - - Used for the main site HTML rendering 10 - 11 - 2. {!make_bushel_link_only_mapper} - Plain HTML mode for feeds and simple output 12 - - Converts Bushel links to regular HTML <a> tags 13 - - Automatically cleans up link text that contains Bushel slugs 14 - - Used for Atom feeds, RSS, search indexing 15 - - Images need .webp extension added (handled by calling code) 16 - 17 - For plain text output (search, LLM), use {!markdown_to_plaintext}. 18 - *) 19 - 20 - (* Sidenote data types - reuse existing Bushel types *) 21 - type sidenote_data = 22 - | Contact_note of Contact.t * string (* contact data + trigger text *) 23 - | Paper_note of Paper.t * string 24 - | Idea_note of Idea.t * string 25 - | Note_note of Note.t * string 26 - | Project_note of Project.t * string 27 - | Video_note of Video.t * string 28 - | Footnote_note of string * Cmarkit.Block.t * string 29 - (* slug, block content, trigger text *) 30 - 31 - type Cmarkit.Inline.t += Side_note of sidenote_data 32 - 33 - let authorlink = Cmarkit.Meta.key () 34 - 35 - let make_authorlink label = 36 - let meta = Cmarkit.Meta.tag authorlink (Cmarkit.Label.meta label) in 37 - Cmarkit.Label.with_meta meta label 38 - ;; 39 - 40 - let sluglink = Cmarkit.Meta.key () 41 - 42 - let make_sluglink label = 43 - let meta = Cmarkit.Meta.tag sluglink (Cmarkit.Label.meta label) in 44 - Cmarkit.Label.with_meta meta label 45 - ;; 46 - 47 - let with_bushel_links = function 48 - | `Def _ as ctx -> Cmarkit.Label.default_resolver ctx 49 - | `Ref (_, _, (Some _ as def)) -> def 50 - | `Ref (_, ref, None) -> 51 - let txt = Cmarkit.Label.key ref in 52 - (match txt.[0] with 53 - | '@' -> Some (make_authorlink ref) 54 - | ':' -> Some (make_sluglink ref) 55 - | '#' -> if txt.[1] = '#' then Some (make_sluglink ref) else None 56 - | _ -> None) 57 - ;; 58 - 59 - let strip_handle s = 60 - if s.[0] = '@' || s.[0] = ':' 61 - then String.sub s 1 (String.length s - 1) 62 - else if s.[0] = '#' && s.[1] = '#' 63 - then String.sub s 2 (String.length s - 2) 64 - else s 65 - ;; 66 - 67 - (* FIXME use Tags *) 68 - let is_bushel_slug = String.starts_with ~prefix:":" 69 - let is_tag_slug link = 70 - String.starts_with ~prefix:"##" link && 71 - not (String.starts_with ~prefix:"###" link) 72 - 73 - let is_type_filter_slug = String.starts_with ~prefix:"###" 74 - let is_contact_slug = String.starts_with ~prefix:"@" 75 - 76 - let text_of_inline lb = 77 - let open Cmarkit in 78 - Inline.to_plain_text ~break_on_soft:false lb 79 - |> fun r -> String.concat "\n" (List.map (String.concat "") r) 80 - ;; 81 - 82 - let link_target_is_bushel ?slugs lb = 83 - let open Cmarkit in 84 - let ref = Inline.Link.reference lb in 85 - match ref with 86 - | `Inline (ld, _) -> 87 - let dest = Link_definition.dest ld in 88 - (match dest with 89 - | Some (url, _) when is_bushel_slug url -> 90 - (match slugs with 91 - | Some s -> Hashtbl.replace s url () 92 - | _ -> ()); 93 - Some (url, Inline.Link.text lb |> text_of_inline) 94 - | Some (url, _) when is_tag_slug url -> 95 - (* Return the tag URL unchanged - will be handled by renderer *) 96 - Some (url, Inline.Link.text lb |> text_of_inline) 97 - | Some (url, _) when is_contact_slug url -> 98 - Some (url, Inline.Link.text lb |> text_of_inline) 99 - | _ -> None) 100 - | _ -> None 101 - ;; 102 - 103 - let image_target_is_bushel lb = 104 - let open Cmarkit in 105 - let ref = Inline.Link.reference lb in 106 - match ref with 107 - | `Inline (ld, _) -> 108 - let dest = Link_definition.dest ld in 109 - (match dest with 110 - | Some (url, _) when is_bushel_slug url -> 111 - let alt = Link_definition.title ld in 112 - let dir = 113 - Inline.Link.text lb 114 - |> Inline.to_plain_text ~break_on_soft:false 115 - |> fun r -> String.concat "\n" (List.map (String.concat "") r) 116 - in 117 - Some (url, alt, dir) 118 - | _ -> None) 119 - | _ -> None 120 - ;; 121 - 122 - let rewrite_bushel_link_reference entries slug title meta = 123 - let open Cmarkit in 124 - let s = strip_handle slug in 125 - (* Check if it's a tag, contact, or entry *) 126 - if is_tag_slug slug then 127 - (* Tag link - keep the ## prefix in dest for renderer to detect *) 128 - let txt = Inline.Text (title, meta) in 129 - let ld = Link_definition.make ~dest:(slug, meta) () in 130 - let ll = `Inline (ld, meta) in 131 - let ld = Inline.Link.make txt ll in 132 - Mapper.ret (Inline.Link (ld, meta)) 133 - else if is_contact_slug slug then 134 - (* Contact sidenote *) 135 - match Contact.find_by_handle (Entry.contacts entries) s with 136 - | Some c -> 137 - let sidenote = Side_note (Contact_note (c, title)) in 138 - Mapper.ret sidenote 139 - | None -> 140 - (* Contact not found, fallback to regular link *) 141 - let txt = Inline.Text (title, meta) in 142 - let ld = Link_definition.make ~dest:("", meta) () in 143 - let ll = `Inline (ld, meta) in 144 - let ld = Inline.Link.make txt ll in 145 - Mapper.ret (Inline.Link (ld, meta)) 146 - else 147 - (* Check entry type and generate appropriate sidenote *) 148 - match Entry.lookup entries s with 149 - | Some (`Paper p) -> 150 - let sidenote = Side_note (Paper_note (p, title)) in 151 - Mapper.ret sidenote 152 - | Some (`Idea i) -> 153 - let sidenote = Side_note (Idea_note (i, title)) in 154 - Mapper.ret sidenote 155 - | Some (`Note n) -> 156 - let sidenote = Side_note (Note_note (n, title)) in 157 - Mapper.ret sidenote 158 - | Some (`Project p) -> 159 - let sidenote = Side_note (Project_note (p, title)) in 160 - Mapper.ret sidenote 161 - | Some (`Video v) -> 162 - let sidenote = Side_note (Video_note (v, title)) in 163 - Mapper.ret sidenote 164 - | None -> 165 - (* Entry not found, use regular link *) 166 - let dest = Entry.lookup_site_url entries s in 167 - let txt = Inline.Text (title, meta) in 168 - let ld = Link_definition.make ~dest:(dest, meta) () in 169 - let ll = `Inline (ld, meta) in 170 - let ld = Inline.Link.make txt ll in 171 - Mapper.ret (Inline.Link (ld, meta)) 172 - ;; 173 - 174 - let rewrite_bushel_image_reference entries url title dir meta = 175 - let open Cmarkit in 176 - let dest = 177 - match Entry.lookup entries (strip_handle url) with 178 - | Some ent -> Entry.site_url ent (* This is a video *) 179 - | None -> Printf.sprintf "/images/%s" (strip_handle url) 180 - in 181 - let txt = Inline.Text (dir, meta) in 182 - let ld = Link_definition.make ?title ~dest:(dest, meta) () in 183 - let ll = `Inline (ld, meta) in 184 - let ld = Inline.Link.make txt ll in 185 - let ent_il = Inline.Image (ld, meta) in 186 - Mapper.ret ent_il 187 - ;; 188 - 189 - type Cmarkit.Inline.t += Obsidian_link of string 190 - 191 - let rewrite_label_reference_to_obsidian lb meta = 192 - let open Cmarkit in 193 - match Inline.Link.referenced_label lb with 194 - | None -> Mapper.default 195 - | Some l -> 196 - let m = Label.meta l in 197 - (match Meta.find authorlink m with 198 - | Some () -> 199 - let slug = Label.key l in 200 - let target = Printf.sprintf "[[%s]]" slug in 201 - let txt = Obsidian_link target in 202 - Mapper.ret txt 203 - | None -> 204 - (match Meta.find sluglink m with 205 - | None -> Mapper.default 206 - | Some () -> 207 - let slug = Label.key l in 208 - if is_bushel_slug slug 209 - then ( 210 - let target = Printf.sprintf "[[%s]]" (strip_handle slug) in 211 - let txt = Obsidian_link target in 212 - Mapper.ret txt) 213 - else if is_tag_slug slug 214 - then ( 215 - let target = Printf.sprintf "#%s" (strip_handle slug) in 216 - let txt = Inline.Text (target, meta) in 217 - Mapper.ret txt) 218 - else Mapper.default)) 219 - ;; 220 - 221 - let make_bushel_link_only_mapper _defs entries = 222 - let open Cmarkit in 223 - fun _m -> 224 - function 225 - | Inline.Link (lb, meta) -> 226 - (* Convert Bushel link references to regular links (not sidenotes) *) 227 - (match link_target_is_bushel lb with 228 - | Some (url, title) -> 229 - let s = strip_handle url in 230 - let dest = Entry.lookup_site_url entries s in 231 - (* If title is itself a Bushel slug, use the entry title instead *) 232 - let link_text = 233 - if is_bushel_slug title then 234 - match Entry.lookup entries (strip_handle title) with 235 - | Some ent -> Entry.title ent 236 - | None -> title 237 - else title 238 - in 239 - let txt = Inline.Text (link_text, meta) in 240 - let ld = Link_definition.make ~dest:(dest, meta) () in 241 - let ll = `Inline (ld, meta) in 242 - let ld = Inline.Link.make txt ll in 243 - Mapper.ret (Inline.Link (ld, meta)) 244 - | None -> 245 - (match Inline.Link.referenced_label lb with 246 - | Some l -> 247 - let m = Label.meta l in 248 - (* Check for authorlink (contact) first *) 249 - (match Meta.find authorlink m with 250 - | Some () -> 251 - let slug = Label.key l in 252 - let s = strip_handle slug in 253 - (match Contact.find_by_handle (Entry.contacts entries) s with 254 - | Some c -> 255 - let name = Contact.name c in 256 - (match Contact.best_url c with 257 - | Some dest -> 258 - let txt = Inline.Text (name, meta) in 259 - let ld = Link_definition.make ~dest:(dest, meta) () in 260 - let ll = `Inline (ld, meta) in 261 - let ld = Inline.Link.make txt ll in 262 - Mapper.ret (Inline.Link (ld, meta)) 263 - | None -> 264 - (* No URL for contact, just use name as text *) 265 - let txt = Inline.Text (name, meta) in 266 - Mapper.ret txt) 267 - | None -> 268 - (* Contact not found, use title as fallback text *) 269 - let title = Inline.Link.text lb |> text_of_inline in 270 - let txt = Inline.Text (title, meta) in 271 - Mapper.ret txt) 272 - | None -> 273 - (* Check for sluglink *) 274 - (match Meta.find sluglink m with 275 - | Some () -> 276 - let slug = Label.key l in 277 - if is_bushel_slug slug || is_tag_slug slug || is_contact_slug slug 278 - then ( 279 - let s = strip_handle slug in 280 - let dest = Entry.lookup_site_url entries s in 281 - let title = Inline.Link.text lb |> text_of_inline in 282 - (* If link text is itself a Bushel slug, use the entry title instead *) 283 - let link_text = 284 - let trimmed = String.trim title in 285 - if is_bushel_slug trimmed then 286 - match Entry.lookup entries (strip_handle trimmed) with 287 - | Some ent -> Entry.title ent 288 - | None -> title 289 - else title 290 - in 291 - let txt = Inline.Text (link_text, meta) in 292 - let ld = Link_definition.make ~dest:(dest, meta) () in 293 - let ll = `Inline (ld, meta) in 294 - let ld = Inline.Link.make txt ll in 295 - Mapper.ret (Inline.Link (ld, meta))) 296 - else Mapper.default 297 - | None -> Mapper.default)) 298 - | None -> Mapper.default)) 299 - | _ -> Mapper.default 300 - ;; 301 - 302 - let rewrite_footnote_reference ?footnote_map entries defs lb _meta = 303 - let open Cmarkit in 304 - match Inline.Link.referenced_label lb with 305 - | None -> Mapper.default 306 - | Some l -> 307 - (match Inline.Link.reference_definition defs lb with 308 - | Some (Block.Footnote.Def (fn, _)) -> 309 - let label_key = Label.key l in 310 - let slug, trigger_text = 311 - match footnote_map with 312 - | Some fm -> 313 - (match Hashtbl.find_opt fm label_key with 314 - | Some (slug, text) -> (slug, text) 315 - | None -> 316 - let num = Hashtbl.length fm + 1 in 317 - let slug = Printf.sprintf "fn-%d" num in 318 - let text = Printf.sprintf "[%d]" num in 319 - Hashtbl.add fm label_key (slug, text); 320 - (slug, text)) 321 - | None -> 322 - (* No map provided, use label key as slug *) 323 - let slug = Printf.sprintf "fn-%s" (String.sub label_key 1 (String.length label_key - 1)) in 324 - let text = "[?]" in 325 - (slug, text) 326 - in 327 - (* Process the block to convert Bushel link references to regular links (not sidenotes) *) 328 - let block = Block.Footnote.block fn in 329 - let link_mapper = Mapper.make ~inline:(make_bushel_link_only_mapper defs entries) () in 330 - let processed_block = 331 - match Mapper.map_block link_mapper block with 332 - | Some b -> b 333 - | None -> block 334 - in 335 - let sidenote = Side_note (Footnote_note (slug, processed_block, trigger_text)) in 336 - Mapper.ret sidenote 337 - | _ -> Mapper.default) 338 - 339 - let rewrite_label_reference ?slugs entries lb meta = 340 - let open Cmarkit in 341 - match Inline.Link.referenced_label lb with 342 - | None -> Mapper.default 343 - | Some l -> 344 - let m = Label.meta l in 345 - (match Meta.find authorlink m with 346 - | Some () -> 347 - let slug = Label.key l in 348 - (match Contact.find_by_handle (Entry.contacts entries) (strip_handle slug) with 349 - | Some c -> 350 - let trigger_text = Contact.name c in 351 - let sidenote = Side_note (Contact_note (c, trigger_text)) in 352 - Mapper.ret sidenote 353 - | None -> 354 - (* Contact not found, fallback to text *) 355 - let txt = Inline.Text ("Unknown Person", meta) in 356 - Mapper.ret txt) 357 - | None -> 358 - (match Meta.find sluglink m with 359 - | None -> Mapper.default 360 - | Some () -> 361 - let slug = Label.key l in 362 - if is_bushel_slug slug 363 - then ( 364 - (match slugs with 365 - | Some s -> Hashtbl.replace s slug () 366 - | _ -> ()); 367 - let s = strip_handle slug in 368 - (* Check entry type and generate appropriate sidenote *) 369 - match Entry.lookup entries s with 370 - | Some (`Paper p) -> 371 - let trigger_text = Entry.lookup_title entries s in 372 - let sidenote = Side_note (Paper_note (p, trigger_text)) in 373 - Mapper.ret sidenote 374 - | Some (`Idea i) -> 375 - let trigger_text = Entry.lookup_title entries s in 376 - let sidenote = Side_note (Idea_note (i, trigger_text)) in 377 - Mapper.ret sidenote 378 - | Some (`Note n) -> 379 - let trigger_text = Entry.lookup_title entries s in 380 - let sidenote = Side_note (Note_note (n, trigger_text)) in 381 - Mapper.ret sidenote 382 - | Some (`Project p) -> 383 - let trigger_text = Entry.lookup_title entries s in 384 - let sidenote = Side_note (Project_note (p, trigger_text)) in 385 - Mapper.ret sidenote 386 - | Some (`Video v) -> 387 - let trigger_text = Entry.lookup_title entries s in 388 - let sidenote = Side_note (Video_note (v, trigger_text)) in 389 - Mapper.ret sidenote 390 - | None -> 391 - (* Entry not found, use regular link *) 392 - let target = Entry.lookup_title entries s in 393 - let dest = Entry.lookup_site_url entries s in 394 - let txt = Inline.Text (target, meta) in 395 - let ld = Link_definition.make ~dest:(dest, meta) () in 396 - let ll = `Inline (ld, meta) in 397 - let ld = Inline.Link.make txt ll in 398 - Mapper.ret (Inline.Link (ld, meta))) 399 - else if is_tag_slug slug 400 - then ( 401 - let sh = strip_handle slug in 402 - (* Use # as dest to prevent navigation, JavaScript will intercept *) 403 - let target, dest = sh, "#" in 404 - let txt = Inline.Text (target, meta) in 405 - let ld = Link_definition.make ~dest:(dest, meta) () in 406 - let ll = `Inline (ld, meta) in 407 - let ld = Inline.Link.make txt ll in 408 - let ent_il = Inline.Link (ld, meta) in 409 - Mapper.ret ent_il) 410 - else Mapper.default)) 411 - ;; 412 - 413 - let bushel_inline_mapper_to_obsidian entries _m = 414 - let open Cmarkit in 415 - function 416 - | Inline.Link (lb, meta) -> 417 - (match link_target_is_bushel lb with 418 - | None -> rewrite_label_reference_to_obsidian lb meta 419 - | Some (url, title) -> rewrite_bushel_link_reference entries url title meta) 420 - | Inline.Image (lb, meta) -> 421 - (match image_target_is_bushel lb with 422 - | None -> rewrite_label_reference_to_obsidian lb meta 423 - | Some (url, alt, dir) -> rewrite_bushel_image_reference entries url alt dir meta) 424 - | _ -> Mapper.default 425 - ;; 426 - 427 - let make_bushel_inline_mapper ?slugs ?footnote_map defs entries = 428 - let open Cmarkit in 429 - fun _m -> 430 - function 431 - | Inline.Link (lb, meta) -> 432 - (* First check if this is a footnote reference *) 433 - (match Inline.Link.referenced_label lb with 434 - | Some l when String.starts_with ~prefix:"^" (Label.key l) -> 435 - (* This is a footnote reference *) 436 - rewrite_footnote_reference ?footnote_map entries defs lb meta 437 - | _ -> 438 - (* Not a footnote, handle as bushel link *) 439 - (match link_target_is_bushel ?slugs lb with 440 - | None -> rewrite_label_reference ?slugs entries lb meta 441 - | Some (url, title) -> rewrite_bushel_link_reference entries url title meta)) 442 - | Inline.Image (lb, meta) -> 443 - (match image_target_is_bushel lb with 444 - | None -> rewrite_label_reference entries lb meta 445 - | Some (url, alt, dir) -> rewrite_bushel_image_reference entries url alt dir meta) 446 - | _ -> Mapper.default 447 - ;; 448 - 449 - let scan_for_slugs entries md = 450 - let open Cmarkit in 451 - let slugs = Hashtbl.create 7 in 452 - let doc = Doc.of_string ~strict:false ~resolver:with_bushel_links md in 453 - let defs = Doc.defs doc in 454 - let _ = 455 - Mapper.map_doc (Mapper.make ~inline:(make_bushel_inline_mapper ~slugs defs entries) ()) doc 456 - in 457 - Hashtbl.fold (fun k () a -> k :: a) slugs [] 458 - ;; 459 - 460 - (** Validation mapper that collects broken references *) 461 - let make_validation_mapper entries broken_slugs broken_contacts = 462 - let open Cmarkit in 463 - fun _m -> 464 - function 465 - | Inline.Link (lb, _meta) -> 466 - (* Check inline bushel links *) 467 - (match link_target_is_bushel lb with 468 - | Some (url, _title) -> 469 - let s = strip_handle url in 470 - if is_contact_slug url then 471 - (* Validate contact handle *) 472 - (match Contact.find_by_handle (Entry.contacts entries) s with 473 - | None -> Hashtbl.replace broken_contacts url () 474 - | Some _ -> ()) 475 - else if is_bushel_slug url then 476 - (* Validate entry slug *) 477 - (match Entry.lookup entries s with 478 - | None -> Hashtbl.replace broken_slugs url () 479 - | Some _ -> ()) 480 - else (); 481 - Mapper.default 482 - | None -> 483 - (* Check referenced label links *) 484 - (match Inline.Link.referenced_label lb with 485 - | Some l -> 486 - let m = Label.meta l in 487 - (* Check for contact reference *) 488 - (match Meta.find authorlink m with 489 - | Some () -> 490 - let slug = Label.key l in 491 - let handle = strip_handle slug in 492 - (match Contact.find_by_handle (Entry.contacts entries) handle with 493 - | None -> Hashtbl.replace broken_contacts slug () 494 - | Some _ -> ()); 495 - Mapper.default 496 - | None -> 497 - (* Check for entry slug reference *) 498 - (match Meta.find sluglink m with 499 - | None -> Mapper.default 500 - | Some () -> 501 - let slug = Label.key l in 502 - if is_bushel_slug slug then ( 503 - let s = strip_handle slug in 504 - match Entry.lookup entries s with 505 - | None -> Hashtbl.replace broken_slugs slug () 506 - | Some _ -> () 507 - ); 508 - Mapper.default)) 509 - | None -> Mapper.default)) 510 - | _ -> Mapper.default 511 - ;; 512 - 513 - (** Validate all bushel references in markdown and return broken ones *) 514 - let validate_references entries md = 515 - let open Cmarkit in 516 - let broken_slugs = Hashtbl.create 7 in 517 - let broken_contacts = Hashtbl.create 7 in 518 - let doc = Doc.of_string ~strict:false ~resolver:with_bushel_links md in 519 - let mapper = Mapper.make ~inline:(make_validation_mapper entries broken_slugs broken_contacts) () in 520 - let _ = Mapper.map_doc mapper doc in 521 - let slugs = Hashtbl.fold (fun k () a -> k :: a) broken_slugs [] in 522 - let contacts = Hashtbl.fold (fun k () a -> k :: a) broken_contacts [] in 523 - (slugs, contacts) 524 - ;; 525 - 526 - (** Extract the first image URL from markdown text *) 527 - let extract_first_image md = 528 - let open Cmarkit in 529 - (* Don't use bushel link resolver to avoid circular dependency with Entry *) 530 - let doc = Doc.of_string md in 531 - let found_image = ref None in 532 - 533 - let find_image_in_inline _mapper = function 534 - | Inline.Image (img, _) -> 535 - (match Inline.Link.reference img with 536 - | `Inline (ld, _) -> 537 - (match Link_definition.dest ld with 538 - | Some (url, _) when !found_image = None -> 539 - found_image := Some url; 540 - Mapper.default 541 - | _ -> Mapper.default) 542 - | _ -> Mapper.default) 543 - | _ -> Mapper.default 544 - in 545 - 546 - let mapper = Mapper.make ~inline:find_image_in_inline () in 547 - let _ = Mapper.map_doc mapper doc in 548 - !found_image 549 - ;; 550 - 551 - (** Convert markdown text to plain text, resolving bushel links to just their text *) 552 - let markdown_to_plaintext _entries text = 553 - let open Cmarkit in 554 - (* Parse markdown with bushel link resolver *) 555 - let doc = Doc.of_string ~resolver:with_bushel_links text in 556 - 557 - (* Convert document blocks to plain text *) 558 - let rec block_to_text = function 559 - | Block.Blank_line _ -> "" 560 - | Block.Thematic_break _ -> "\n---\n" 561 - | Block.Paragraph (p, _) -> 562 - let inline = Block.Paragraph.inline p in 563 - Inline.to_plain_text ~break_on_soft:false inline 564 - |> List.map (String.concat "") |> String.concat "\n" 565 - | Block.Heading (h, _) -> 566 - let inline = Block.Heading.inline h in 567 - Inline.to_plain_text ~break_on_soft:false inline 568 - |> List.map (String.concat "") |> String.concat "\n" 569 - | Block.Block_quote (bq, _) -> 570 - let blocks = Block.Block_quote.block bq in 571 - block_to_text blocks 572 - | Block.List (l, _) -> 573 - let items = Block.List'.items l in 574 - List.map (fun (item, _) -> 575 - let blocks = Block.List_item.block item in 576 - block_to_text blocks 577 - ) items |> String.concat "\n" 578 - | Block.Code_block (cb, _) -> 579 - let code = Block.Code_block.code cb in 580 - String.concat "\n" (List.map Block_line.to_string code) 581 - | Block.Html_block _ -> "" (* Skip HTML blocks for search *) 582 - | Block.Link_reference_definition _ -> "" 583 - | Block.Ext_footnote_definition _ -> "" 584 - | Block.Blocks (blocks, _) -> 585 - List.map block_to_text blocks |> String.concat "\n" 586 - | _ -> "" 587 - in 588 - let blocks = Doc.block doc in 589 - block_to_text blocks 590 - ;; 591 - 592 - (** Extract all links from markdown text, including from images *) 593 - let extract_all_links text = 594 - let open Cmarkit in 595 - let doc = Doc.of_string ~resolver:with_bushel_links text in 596 - let links = ref [] in 597 - 598 - let find_links_in_inline _mapper = function 599 - | Inline.Link (lb, _) | Inline.Image (lb, _) -> 600 - (* Check for inline link/image destination *) 601 - (match Inline.Link.reference lb with 602 - | `Inline (ld, _) -> 603 - (match Link_definition.dest ld with 604 - | Some (url, _) -> 605 - links := url :: !links; 606 - Mapper.default 607 - | None -> Mapper.default) 608 - | `Ref _ -> 609 - (* For reference-style links/images, check if it has a referenced label *) 610 - (match Inline.Link.referenced_label lb with 611 - | Some l -> 612 - let key = Label.key l in 613 - (* Check if it's a bushel-style link *) 614 - if String.length key > 0 && (key.[0] = ':' || key.[0] = '@' || 615 - (String.length key > 1 && key.[0] = '#' && key.[1] = '#')) then 616 - links := key :: !links; 617 - Mapper.default 618 - | None -> Mapper.default)) 619 - | _ -> Mapper.default 620 - in 621 - 622 - let mapper = Mapper.make ~inline:find_links_in_inline () in 623 - let _ = Mapper.map_doc mapper doc in 624 - 625 - (* Deduplicate *) 626 - let module StringSet = Set.Make(String) in 627 - StringSet.elements (StringSet.of_list !links) 628 - ;; 629 - 630 - (* Reference source type for CiTO annotations *) 631 - type reference_source = 632 - | Paper (* CitesAsSourceDocument *) 633 - | Note (* CitesAsRelated *) 634 - | External (* Cites *) 635 - 636 - (* Extract references (papers/notes with DOIs) from a note *) 637 - let note_references entries default_author note = 638 - let refs = ref [] in 639 - 640 - (* Helper to format author name: extract last name from full name *) 641 - let format_author_last name = 642 - let parts = String.split_on_char ' ' name in 643 - List.nth parts (List.length parts - 1) 644 - in 645 - 646 - (* Helper to format a citation *) 647 - let format_citation ~authors ~year ~title ~publisher = 648 - let author_str = match authors with 649 - | [] -> "" 650 - | [author] -> format_author_last author ^ " " 651 - | author :: _ -> (format_author_last author) ^ " et al " 652 - in 653 - let pub_str = match publisher with 654 - | None | Some "" -> "" 655 - | Some p -> p ^ ". " 656 - in 657 - Printf.sprintf "%s(%d). %s. %s" author_str year title pub_str 658 - in 659 - 660 - (* Check slug_ent if it exists *) 661 - (match Note.slug_ent note with 662 - | Some slug -> 663 - (match Entry.lookup entries slug with 664 - | Some (`Paper p) -> 665 - (match Paper.doi p with 666 - | Some doi -> 667 - let authors = Paper.authors p in 668 - let year = Paper.year p in 669 - let title = Paper.title p in 670 - let publisher = Some (Paper.publisher p) in 671 - let citation = format_citation ~authors ~year ~title ~publisher in 672 - refs := (doi, citation, Paper) :: !refs 673 - | None -> ()) 674 - | Some (`Note n) -> 675 - (match Note.doi n with 676 - | Some doi -> 677 - let authors = match Note.author n with 678 - | Some a -> [a] 679 - | None -> [Contact.name default_author] 680 - in 681 - let (year, _, _) = Note.date n in 682 - let title = Note.title n in 683 - let publisher = None in 684 - let citation = format_citation ~authors ~year ~title ~publisher in 685 - refs := (doi, citation, Note) :: !refs 686 - | None -> ()) 687 - | _ -> ()) 688 - | None -> ()); 689 - 690 - (* Scan body for bushel references *) 691 - let slugs = scan_for_slugs entries (Note.body note) in 692 - List.iter (fun slug -> 693 - (* Strip leading : or @ from slug before lookup *) 694 - let normalized_slug = strip_handle slug in 695 - match Entry.lookup entries normalized_slug with 696 - | Some (`Paper p) -> 697 - (match Paper.doi p with 698 - | Some doi -> 699 - let authors = Paper.authors p in 700 - let year = Paper.year p in 701 - let title = Paper.title p in 702 - let publisher = Some (Paper.publisher p) in 703 - let citation = format_citation ~authors ~year ~title ~publisher in 704 - (* Check if doi already exists in refs *) 705 - if not (List.exists (fun (d, _, _) -> d = doi) !refs) then 706 - refs := (doi, citation, Paper) :: !refs 707 - | None -> ()) 708 - | Some (`Note n) -> 709 - (match Note.doi n with 710 - | Some doi -> 711 - let authors = match Note.author n with 712 - | Some a -> [a] 713 - | None -> [Contact.name default_author] 714 - in 715 - let (year, _, _) = Note.date n in 716 - let title = Note.title n in 717 - let publisher = None in 718 - let citation = format_citation ~authors ~year ~title ~publisher in 719 - (* Check if doi already exists in refs *) 720 - if not (List.exists (fun (d, _, _) -> d = doi) !refs) then 721 - refs := (doi, citation, Note) :: !refs 722 - | None -> ()) 723 - | _ -> () 724 - ) slugs; 725 - 726 - (* Scan body for external DOI URLs and resolve from cache *) 727 - let body = Note.body note in 728 - let doi_url_pattern = Re.Perl.compile_pat "https?://(?:dx\\.)?doi\\.org/([^)\\s\"'>]+)" in 729 - let matches = Re.all doi_url_pattern body in 730 - let doi_entries = Entry.doi_entries entries in 731 - List.iter (fun group -> 732 - try 733 - let encoded_doi = Re.Group.get group 1 in 734 - (* URL decode the DOI *) 735 - let doi = Uri.pct_decode encoded_doi in 736 - (* Check if doi already exists in refs *) 737 - if not (List.exists (fun (d, _, _) -> d = doi) !refs) then 738 - (* Look up in DOI cache *) 739 - match Doi_entry.find_by_doi doi_entries doi with 740 - | Some doi_entry when doi_entry.status = Resolved -> 741 - let citation = format_citation 742 - ~authors:doi_entry.authors 743 - ~year:doi_entry.year 744 - ~title:doi_entry.title 745 - ~publisher:(Some doi_entry.publisher) 746 - in 747 - refs := (doi, citation, External) :: !refs 748 - | _ -> 749 - (* Not found in cache, add minimal citation with just the DOI *) 750 - refs := (doi, doi, External) :: !refs 751 - with _ -> () 752 - ) matches; 753 - 754 - (* Scan body for publisher URLs (Elsevier, ScienceDirect, IEEE, Nature, ACM, Sage, UPenn, Springer, Taylor & Francis, OUP) and resolve from cache *) 755 - let publisher_pattern = Re.Perl.compile_pat "https?://(?:(?:www\\.)?(?:linkinghub\\.elsevier\\.com|(?:www\\.)?sciencedirect\\.com/science/article|ieeexplore\\.ieee\\.org|academic\\.oup\\.com|nature\\.com|journals\\.sagepub\\.com|garfield\\.library\\.upenn\\.edu|link\\.springer\\.com)/[^)\\s\"'>]+|(?:dl\\.acm\\.org|(?:www\\.)?tandfonline\\.com)/doi(?:/pdf)?/10\\.[^)\\s\"'>]+)" in 756 - let publisher_matches = Re.all publisher_pattern body in 757 - List.iter (fun group -> 758 - try 759 - let url = Re.Group.get group 0 in 760 - (* Look up in DOI cache by source URL *) 761 - match Doi_entry.find_by_url doi_entries url with 762 - | Some doi_entry when doi_entry.status = Resolved -> 763 - let doi = doi_entry.doi in 764 - (* Check if this DOI already exists in refs *) 765 - if not (List.exists (fun (d, _, _) -> d = doi) !refs) then 766 - let citation = format_citation 767 - ~authors:doi_entry.authors 768 - ~year:doi_entry.year 769 - ~title:doi_entry.title 770 - ~publisher:(Some doi_entry.publisher) 771 - in 772 - refs := (doi, citation, External) :: !refs 773 - | _ -> 774 - (* Not found in cache, skip it *) 775 - () 776 - with _ -> () 777 - ) publisher_matches; 778 - 779 - List.rev !refs 780 - ;; 781 -
-73
stack/bushel/lib/md.mli
··· 1 - val make_bushel_inline_mapper 2 - : ?slugs:(string, unit) Hashtbl.t 3 - -> ?footnote_map:(string, string * string) Hashtbl.t 4 - -> Cmarkit.Label.defs 5 - -> Entry.t 6 - -> 'a 7 - -> Cmarkit.Inline.t 8 - -> Cmarkit.Inline.t Cmarkit.Mapper.result 9 - 10 - val make_bushel_link_only_mapper 11 - : Cmarkit.Label.defs 12 - -> Entry.t 13 - -> 'a 14 - -> Cmarkit.Inline.t 15 - -> Cmarkit.Inline.t Cmarkit.Mapper.result 16 - 17 - type Cmarkit.Inline.t += Obsidian_link of string 18 - 19 - type sidenote_data = 20 - | Contact_note of Contact.t * string 21 - | Paper_note of Paper.t * string 22 - | Idea_note of Idea.t * string 23 - | Note_note of Note.t * string 24 - | Project_note of Project.t * string 25 - | Video_note of Video.t * string 26 - | Footnote_note of string * Cmarkit.Block.t * string 27 - 28 - type Cmarkit.Inline.t += Side_note of sidenote_data 29 - 30 - val bushel_inline_mapper_to_obsidian 31 - : Entry.t 32 - -> 'a 33 - -> Cmarkit.Inline.t 34 - -> Cmarkit.Inline.t Cmarkit.Mapper.result 35 - 36 - val with_bushel_links 37 - : [< `Def of Cmarkit.Label.t option * Cmarkit.Label.t 38 - | `Ref of 'a * Cmarkit.Label.t * Cmarkit.Label.t option 39 - ] 40 - -> Cmarkit.Label.t option 41 - 42 - val scan_for_slugs : Entry.t -> string -> string list 43 - 44 - (** Validate all bushel references in markdown and return broken ones. 45 - Returns (broken_slugs, broken_contacts) where each list contains 46 - the full reference string (e.g., ":missing-slug", "@unknown-handle") *) 47 - val validate_references : Entry.t -> string -> string list * string list 48 - 49 - (** Extract the first image URL from markdown text *) 50 - val extract_first_image : string -> string option 51 - 52 - (** Convert markdown text to plain text, resolving bushel links to just their text *) 53 - val markdown_to_plaintext : 'a -> string -> string 54 - 55 - val is_bushel_slug : string -> bool 56 - val is_tag_slug : string -> bool 57 - val is_type_filter_slug : string -> bool 58 - val is_contact_slug : string -> bool 59 - val strip_handle : string -> string 60 - 61 - (** Extract all links from markdown text, including from images (internal and external) *) 62 - val extract_all_links : string -> string list 63 - 64 - (** Type indicating the source of a reference for CiTO annotation *) 65 - type reference_source = 66 - | Paper (** CitesAsSourceDocument *) 67 - | Note (** CitesAsRelated *) 68 - | External (** Cites *) 69 - 70 - (** Extract references (papers/notes with DOIs) from a note. 71 - Returns a list of (DOI, citation_string, reference_source) tuples. 72 - Citation format: "Last, First (Year). Title. Publisher. https://doi.org/the/doi" *) 73 - val note_references : Entry.t -> Contact.t -> Note.t -> (string * string * reference_source) list
-230
stack/bushel/lib/note.ml
··· 1 - type t = 2 - { title : string 3 - ; date : Ptime.date 4 - ; slug : string 5 - ; body : string 6 - ; tags : string list 7 - ; draft : bool 8 - ; updated : Ptime.date option 9 - ; sidebar : string option 10 - ; index_page : bool 11 - ; perma : bool (* Permanent article that will receive a DOI *) 12 - ; doi : string option (* DOI identifier for permanent articles *) 13 - ; synopsis: string option 14 - ; titleimage: string option 15 - ; via : (string * string) option 16 - ; slug_ent : string option (* Optional reference to another entry *) 17 - ; source : string option (* Optional source for news-style notes *) 18 - ; url : string option (* Optional external URL for news-style notes *) 19 - ; author : string option (* Optional author for news-style notes *) 20 - ; category : string option (* Optional category for news-style notes *) 21 - } 22 - 23 - type ts = t list 24 - 25 - let link { body; via; slug; _ } = 26 - match body, via with 27 - | "", Some (l, u) -> `Ext (l, u) 28 - | "", None -> failwith (slug ^ ": note external without via, via-url") 29 - | _, _ -> `Local slug 30 - ;; 31 - 32 - let origdate { date; _ } = Option.get @@ Ptime.of_date date 33 - 34 - let date { date; updated; _ } = 35 - match updated with 36 - | None -> date 37 - | Some v -> v 38 - ;; 39 - 40 - let datetime v = Option.get @@ Ptime.of_date @@ date v 41 - let compare a b = Ptime.compare (datetime b) (datetime a) 42 - let slug { slug; _ } = slug 43 - let body { body; _ } = body 44 - let title { title; _ } = title 45 - let tags { tags; _ } = tags 46 - let sidebar { sidebar; _ } = sidebar 47 - let synopsis { synopsis; _ } = synopsis 48 - let draft { draft; _ } = draft 49 - let perma { perma; _ } = perma 50 - let doi { doi; _ } = doi 51 - let titleimage { titleimage; _ } = titleimage 52 - let slug_ent { slug_ent; _ } = slug_ent 53 - let source { source; _ } = source 54 - let url { url; _ } = url 55 - let author { author; _ } = author 56 - let category { category; _ } = category 57 - let lookup slug notes = List.find (fun n -> n.slug = slug) notes 58 - let read_file file = In_channel.(with_open_bin file input_all) 59 - let words { body; _ } = Util.count_words body 60 - 61 - 62 - let of_md fname = 63 - (* TODO fix Jekyll_post to basename the fname all the time *) 64 - match Jekyll_post.of_string ~fname:(Filename.basename fname) (read_file fname) with 65 - | Error (`Msg m) -> failwith ("note_of_md: " ^ m) 66 - | Ok jp -> 67 - let fields = jp.Jekyll_post.fields in 68 - let { Jekyll_post.title; date; slug; body; _ } = jp in 69 - let date, _ = Ptime.to_date_time date in 70 - let index_page = 71 - match Jekyll_format.find "index_page" fields with 72 - | Some (`Bool v) -> v 73 - | _ -> false 74 - in 75 - let perma = 76 - match Jekyll_format.find "perma" fields with 77 - | Some (`Bool v) -> v 78 - | _ -> false 79 - in 80 - let updated = 81 - match Jekyll_format.find "updated" fields with 82 - | Some (`String v) -> Some (Jekyll_format.parse_date_exn v |> Ptime.to_date) 83 - | _ -> None 84 - in 85 - let draft = 86 - match Jekyll_format.find "draft" fields with 87 - | Some (`Bool v) -> v 88 - | _ -> false 89 - in 90 - let titleimage = 91 - match Jekyll_format.find "titleimage" fields with 92 - | Some (`String v) -> Some v 93 - | _ -> None 94 - in 95 - let synopsis = 96 - match Jekyll_format.find "synopsis" fields with 97 - | Some (`String v) -> Some v 98 - | _ -> None 99 - in 100 - let sidebar = 101 - try Some (read_file ("data/sidebar/" ^ Filename.basename fname)) with 102 - | _ -> None 103 - in 104 - let tags = 105 - match Jekyll_format.find "tags" fields with 106 - | Some (`A l) -> 107 - List.filter_map 108 - (function 109 - | `String s -> Some s 110 - | _ -> None) 111 - l 112 - | _ -> [] 113 - in 114 - let via = 115 - match Jekyll_format.find "via" fields, Jekyll_format.find "via-url" fields with 116 - | Some (`String a), Some (`String b) -> Some (a, b) 117 - | None, Some (`String b) -> Some ("", b) 118 - | _ -> None 119 - in 120 - let slug_ent = 121 - match Jekyll_format.find "slug_ent" fields with 122 - | Some (`String v) -> Some v 123 - | _ -> None 124 - in 125 - let source = 126 - match Jekyll_format.find "source" fields with 127 - | Some (`String v) -> Some v 128 - | _ -> None 129 - in 130 - let url = 131 - match Jekyll_format.find "url" fields with 132 - | Some (`String v) -> Some v 133 - | _ -> None 134 - in 135 - let author = 136 - match Jekyll_format.find "author" fields with 137 - | Some (`String v) -> Some v 138 - | _ -> None 139 - in 140 - let category = 141 - match Jekyll_format.find "category" fields with 142 - | Some (`String v) -> Some v 143 - | _ -> None 144 - in 145 - let doi = 146 - match Jekyll_format.find "doi" fields with 147 - | Some (`String v) -> Some v 148 - | _ -> None 149 - in 150 - { title; draft; date; slug; synopsis; titleimage; index_page; perma; doi; body; via; updated; tags; sidebar; slug_ent; source; url; author; category } 151 - 152 - (* TODO:claude *) 153 - let typesense_schema = 154 - let open Ezjsonm in 155 - dict [ 156 - ("name", string "notes"); 157 - ("fields", list (fun d -> dict d) [ 158 - [("name", string "id"); ("type", string "string")]; 159 - [("name", string "title"); ("type", string "string")]; 160 - [("name", string "content"); ("type", string "string")]; 161 - [("name", string "date"); ("type", string "string")]; 162 - [("name", string "date_timestamp"); ("type", string "int64")]; 163 - [("name", string "tags"); ("type", string "string[]"); ("facet", bool true)]; 164 - [("name", string "body"); ("type", string "string"); ("optional", bool true)]; 165 - [("name", string "draft"); ("type", string "bool")]; 166 - [("name", string "synopsis"); ("type", string "string[]"); ("optional", bool true)]; 167 - [("name", string "thumbnail_url"); ("type", string "string"); ("optional", bool true)]; 168 - [("name", string "type"); ("type", string "string"); ("facet", bool true); ("optional", bool true)]; 169 - [("name", string "status"); ("type", string "string"); ("facet", bool true); ("optional", bool true)]; 170 - [("name", string "related_papers"); ("type", string "string[]"); ("optional", bool true)]; 171 - [("name", string "related_projects"); ("type", string "string[]"); ("optional", bool true)]; 172 - [("name", string "related_contacts"); ("type", string "string[]"); ("optional", bool true)]; 173 - [("name", string "attachments"); ("type", string "string[]"); ("optional", bool true)]; 174 - [("name", string "source"); ("type", string "string"); ("facet", bool true); ("optional", bool true)]; 175 - [("name", string "url"); ("type", string "string"); ("optional", bool true)]; 176 - [("name", string "author"); ("type", string "string"); ("optional", bool true)]; 177 - [("name", string "category"); ("type", string "string"); ("facet", bool true); ("optional", bool true)]; 178 - [("name", string "slug_ent"); ("type", string "string"); ("optional", bool true)]; 179 - [("name", string "words"); ("type", string "int32"); ("optional", bool true)]; 180 - ]); 181 - ("default_sorting_field", string "date_timestamp"); 182 - ] 183 - 184 - (** TODO:claude Pretty-print a note with ANSI formatting *) 185 - let pp ppf n = 186 - let open Fmt in 187 - pf ppf "@[<v>"; 188 - pf ppf "%a: %a@," (styled `Bold string) "Type" (styled `Cyan string) "Note"; 189 - pf ppf "%a: %a@," (styled `Bold string) "Slug" string (slug n); 190 - pf ppf "%a: %a@," (styled `Bold string) "Title" string (title n); 191 - let (year, month, day) = date n in 192 - pf ppf "%a: %04d-%02d-%02d@," (styled `Bold string) "Date" year month day; 193 - (match n.updated with 194 - | Some (y, m, d) -> pf ppf "%a: %04d-%02d-%02d@," (styled `Bold string) "Updated" y m d 195 - | None -> ()); 196 - pf ppf "%a: %b@," (styled `Bold string) "Draft" (draft n); 197 - pf ppf "%a: %b@," (styled `Bold string) "Index Page" n.index_page; 198 - pf ppf "%a: %b@," (styled `Bold string) "Perma" (perma n); 199 - (match doi n with 200 - | Some d -> pf ppf "%a: %a@," (styled `Bold string) "DOI" string d 201 - | None -> ()); 202 - (match synopsis n with 203 - | Some syn -> pf ppf "%a: %a@," (styled `Bold string) "Synopsis" string syn 204 - | None -> ()); 205 - (match titleimage n with 206 - | Some img -> pf ppf "%a: %a@," (styled `Bold string) "Title Image" string img 207 - | None -> ()); 208 - (match n.via with 209 - | Some (label, url) -> 210 - if label <> "" then 211 - pf ppf "%a: %a (%a)@," (styled `Bold string) "Via" string label string url 212 - else 213 - pf ppf "%a: %a@," (styled `Bold string) "Via" string url 214 - | None -> ()); 215 - let t = tags n in 216 - if t <> [] then 217 - pf ppf "%a: @[<h>%a@]@," (styled `Bold string) "Tags" (list ~sep:comma string) t; 218 - (match sidebar n with 219 - | Some sb -> 220 - pf ppf "@,"; 221 - pf ppf "%a:@," (styled `Bold string) "Sidebar"; 222 - pf ppf "%a@," string sb 223 - | None -> ()); 224 - let bd = body n in 225 - if bd <> "" then begin 226 - pf ppf "@,"; 227 - pf ppf "%a:@," (styled `Bold string) "Body"; 228 - pf ppf "%a@," string bd; 229 - end; 230 - pf ppf "@]"
-49
stack/bushel/lib/note.mli
··· 1 - type t = 2 - { title : string 3 - ; date : Ptime.date 4 - ; slug : string 5 - ; body : string 6 - ; tags : string list 7 - ; draft : bool 8 - ; updated : Ptime.date option 9 - ; sidebar : string option 10 - ; index_page : bool 11 - ; perma : bool 12 - ; doi : string option 13 - ; synopsis: string option 14 - ; titleimage: string option 15 - ; via : (string * string) option 16 - ; slug_ent : string option 17 - ; source : string option 18 - ; url : string option 19 - ; author : string option 20 - ; category : string option 21 - } 22 - 23 - type ts = t list 24 - 25 - val link : t -> [> `Ext of string * string | `Local of string ] 26 - val origdate : t -> Ptime.t 27 - val date : t -> Ptime.date 28 - val datetime : t -> Ptime.t 29 - val compare : t -> t -> int 30 - val slug : t -> string 31 - val body : t -> string 32 - val title : t -> string 33 - val draft : t -> bool 34 - val perma : t -> bool 35 - val doi : t -> string option 36 - val synopsis : t -> string option 37 - val titleimage : t -> string option 38 - val slug_ent : t -> string option 39 - val source : t -> string option 40 - val url : t -> string option 41 - val author : t -> string option 42 - val category : t -> string option 43 - val tags : t -> string list 44 - val sidebar : t -> string option 45 - val lookup : string -> t list -> t 46 - val words : t -> int 47 - val of_md : string -> t 48 - val typesense_schema : Ezjsonm.value 49 - val pp : Format.formatter -> t -> unit
-373
stack/bushel/lib/paper.ml
··· 1 - module J = Ezjsonm 2 - 3 - type paper = Ezjsonm.value 4 - 5 - type t = 6 - { slug : string 7 - ; ver : string 8 - ; paper : paper 9 - ; abstract : string 10 - ; latest : bool 11 - } 12 - 13 - type ts = t list 14 - 15 - let key y k = J.find y [ k ] 16 - 17 - let slugs ts = 18 - List.fold_left (fun acc t -> if List.mem t.slug acc then acc else t.slug :: acc) [] ts 19 - ;; 20 - 21 - let slug { slug; _ } = slug 22 - let title { paper; _ } : string = key paper "title" |> J.get_string 23 - let authors { paper; _ } : string list = key paper "author" |> J.get_list J.get_string 24 - 25 - let project_slugs { paper; _ } : string list = 26 - try key paper "projects" |> J.get_list J.get_string with 27 - | _ -> [] 28 - ;; 29 - 30 - let slides { paper; _ } : string list = 31 - try key paper "slides" |> J.get_list J.get_string with 32 - | _ -> [] 33 - ;; 34 - 35 - let bibtype { paper; _ } : string = key paper "bibtype" |> J.get_string 36 - 37 - let journal { paper; _ } = 38 - try key paper "journal" |> J.get_string with 39 - | Not_found -> 40 - failwith 41 - (Printf.sprintf "no journal found for %s\n%!" (Ezjsonm.value_to_string paper)) 42 - ;; 43 - 44 - (** TODO:claude Helper to extract raw JSON *) 45 - let raw_json { paper; _ } = paper 46 - 47 - let doi { paper; _ } = 48 - try Some (key paper "doi" |> J.get_string) with 49 - | _ -> None 50 - ;; 51 - 52 - let volume { paper; _ } = 53 - try Some (key paper "volume" |> J.get_string) with 54 - | _ -> None 55 - ;; 56 - 57 - let video { paper; _ } = 58 - try Some (key paper "video" |> J.get_string) with 59 - | _ -> None 60 - ;; 61 - 62 - let issue { paper; _ } = 63 - try Some (key paper "number" |> J.get_string) with 64 - | _ -> None 65 - ;; 66 - 67 - let url { paper; _ } = 68 - try Some (key paper "url" |> J.get_string) with 69 - | _ -> None 70 - ;; 71 - 72 - let pages { paper; _ } = try key paper "pages" |> J.get_string with _ -> "" 73 - let abstract { abstract; _ } = abstract 74 - 75 - let institution { paper; _ } = 76 - try key paper "institution" |> J.get_string with 77 - | Not_found -> 78 - failwith 79 - (Printf.sprintf "no institution found for %s\n%!" (Ezjsonm.value_to_string paper)) 80 - ;; 81 - 82 - let number { paper; _ } = 83 - try Some (key paper "number" |> J.get_string) with 84 - | Not_found -> None 85 - ;; 86 - 87 - let editor { paper; _ } = key paper "editor" |> J.get_string 88 - let isbn { paper; _ } = key paper "isbn" |> J.get_string 89 - let bib { paper; _ } = key paper "bib" |> J.get_string 90 - let year { paper; _ } = key paper "year" |> J.get_string |> int_of_string 91 - 92 - let publisher { paper; _ } = 93 - try key paper "publisher" |> J.get_string with 94 - | Not_found -> "" 95 - ;; 96 - 97 - let booktitle { paper; _ } = 98 - let r = key paper "booktitle" |> J.get_string |> Bytes.of_string in 99 - Bytes.set r 0 (Char.lowercase_ascii (Bytes.get r 0)); 100 - String.of_bytes r 101 - ;; 102 - 103 - let date { paper; _ } = 104 - let m = 105 - try 106 - match String.lowercase_ascii (key paper "month" |> J.get_string) with 107 - | "jan" -> 1 108 - | "feb" -> 2 109 - | "mar" -> 3 110 - | "apr" -> 4 111 - | "may" -> 5 112 - | "jun" -> 6 113 - | "jul" -> 7 114 - | "aug" -> 8 115 - | "sep" -> 9 116 - | "oct" -> 10 117 - | "nov" -> 11 118 - | "dec" -> 12 119 - | _ -> 1 120 - with 121 - | Not_found -> 1 122 - in 123 - let y = 124 - try key paper "year" |> J.get_string |> int_of_string with 125 - | Not_found -> 126 - failwith (Printf.sprintf "no year found for %s" (Ezjsonm.value_to_string paper)) 127 - in 128 - y, m, 1 129 - ;; 130 - 131 - let datetime p = Option.get @@ Ptime.of_date @@ date p 132 - 133 - let compare p2 p1 = 134 - let d1 = 135 - Ptime.of_date 136 - (try date p1 with 137 - | _ -> 1977, 1, 1) 138 - |> Option.get 139 - in 140 - let d2 = 141 - Ptime.of_date 142 - (try date p2 with 143 - | _ -> 1977, 1, 1) 144 - |> Option.get 145 - in 146 - Ptime.compare d1 d2 147 - ;; 148 - 149 - let get_papers ~slug ts = 150 - List.filter (fun p -> p.slug = slug && p.latest <> true) ts |> List.sort compare 151 - ;; 152 - 153 - let read_file file = In_channel.(with_open_bin file input_all) 154 - 155 - let of_md ~slug ~ver fname = 156 - (* TODO fix Jekyll_post to not error on no date *) 157 - let fname' = "2000-01-01-" ^ Filename.basename fname in 158 - match Jekyll_post.of_string ~fname:fname' (read_file fname) with 159 - | Error (`Msg m) -> failwith ("paper_of_md: " ^ m) 160 - | Ok jp -> 161 - let fields = jp.Jekyll_post.fields |> Jekyll_format.fields_to_yaml in 162 - let { Jekyll_post.body; _ } = jp in 163 - { slug; ver; abstract = body; paper = fields; latest = false } 164 - ;; 165 - 166 - let tv (l : t list) = 167 - let h = Hashtbl.create 7 in 168 - List.iter 169 - (fun { slug; ver; _ } -> 170 - match Hashtbl.find_opt h slug with 171 - | None -> Hashtbl.add h slug [ ver ] 172 - | Some l -> 173 - let l = ver :: l in 174 - let l = List.sort Stdlib.compare l in 175 - Hashtbl.replace h slug l) 176 - l; 177 - List.map 178 - (fun p -> 179 - let latest = Hashtbl.find h p.slug |> List.rev |> List.hd in 180 - let latest = p.ver = latest in 181 - { p with latest }) 182 - l 183 - ;; 184 - 185 - let lookup ts slug = List.find_opt (fun t -> t.slug = slug && t.latest) ts 186 - 187 - let tag_of_bibtype bt = 188 - match String.lowercase_ascii bt with 189 - | "article" -> "journal" 190 - | "inproceedings" -> "conference" 191 - | "techreport" -> "report" 192 - | "misc" -> "preprint" 193 - | "book" -> "book" 194 - | x -> x 195 - ;; 196 - 197 - let tags { paper; _ } = 198 - let tags f = 199 - try key paper f |> J.get_list J.get_string with 200 - | _ -> [] 201 - in 202 - let core = tags "tags" in 203 - let extra = tags "keywords" in 204 - let projects = tags "projects" in 205 - let ty = [ key paper "bibtype" |> J.get_string |> tag_of_bibtype ] in 206 - List.flatten [ core; extra; ty; projects ] 207 - ;; 208 - 209 - let best_url p = 210 - if Sys.file_exists (Printf.sprintf "static/papers/%s.pdf" (slug p)) 211 - then Some (Printf.sprintf "/papers/%s.pdf" (slug p)) 212 - else url p 213 - ;; 214 - 215 - (** TODO:claude Classification types for papers *) 216 - type classification = Full | Short | Preprint 217 - 218 - let string_of_classification = function 219 - | Full -> "full" 220 - | Short -> "short" 221 - | Preprint -> "preprint" 222 - 223 - let classification_of_string = function 224 - | "full" -> Full 225 - | "short" -> Short 226 - | "preprint" -> Preprint 227 - | _ -> Full (* default to full if unknown *) 228 - 229 - (** TODO:claude Get classification from paper metadata, with fallback to heuristic *) 230 - let classification { paper; _ } = 231 - try 232 - key paper "classification" |> J.get_string |> classification_of_string 233 - with _ -> 234 - (* Fallback to heuristic classification based on venue/bibtype/title *) 235 - let bibtype = try key paper "bibtype" |> J.get_string with _ -> "" in 236 - let journal = try key paper "journal" |> J.get_string |> String.lowercase_ascii with _ -> "" in 237 - let booktitle = try key paper "booktitle" |> J.get_string |> String.lowercase_ascii with _ -> "" in 238 - let title_str = try key paper "title" |> J.get_string |> String.lowercase_ascii with _ -> "" in 239 - 240 - (* Helper function to check if text contains any of the patterns *) 241 - let contains_any text patterns = 242 - List.exists (fun pattern -> 243 - let regex = Re.Perl.compile_pat ~opts:[`Caseless] pattern in 244 - Re.execp regex text 245 - ) patterns 246 - in 247 - 248 - (* Check for preprint indicators *) 249 - let bibtype_lower = String.lowercase_ascii bibtype in 250 - if contains_any journal ["arxiv"] || contains_any booktitle ["arxiv"] || bibtype_lower = "misc" || bibtype_lower = "techreport" 251 - then Preprint 252 - (* Check for workshop/short paper indicators including in title *) 253 - else if contains_any journal ["workshop"; "wip"; "poster"; "demo"; "hotdep"; "short"] || 254 - contains_any booktitle ["workshop"; "wip"; "poster"; "demo"; "hotdep"; "short"] || 255 - contains_any title_str ["poster"] 256 - then Short 257 - (* Default to full paper (journal or conference) *) 258 - else Full 259 - 260 - (** TODO:claude Check if paper is marked as selected *) 261 - let selected { paper; _ } = 262 - try 263 - let keys = J.get_dict paper in 264 - match List.assoc_opt "selected" keys with 265 - | Some (`Bool true) -> true 266 - | Some (`String "true") -> true 267 - | _ -> false 268 - with _ -> false 269 - 270 - (** TODO:claude Get note field from paper metadata *) 271 - let note { paper; _ } = 272 - try 273 - let keys = J.get_dict paper in 274 - match List.assoc_opt "note" keys with 275 - | Some note_json -> Some (J.get_string note_json) 276 - | None -> None 277 - with _ -> None 278 - 279 - (* TODO:claude *) 280 - let to_yaml ?abstract ~ver:_ json_data = 281 - (* Don't add version - it's inferred from filename *) 282 - let frontmatter = Yaml.to_string_exn json_data in 283 - match abstract with 284 - | Some abs -> 285 - (* Trim leading/trailing whitespace and normalize blank lines *) 286 - let trimmed_abs = String.trim abs in 287 - let normalized_abs = 288 - (* Replace 3+ consecutive newlines with exactly 2 newlines *) 289 - Re.replace_string (Re.compile (Re.seq [Re.char '\n'; Re.char '\n'; Re.rep1 (Re.char '\n')])) ~by:"\n\n" trimmed_abs 290 - in 291 - if normalized_abs = "" then 292 - Printf.sprintf "---\n%s---\n" frontmatter 293 - else 294 - Printf.sprintf "---\n%s---\n\n%s\n" frontmatter normalized_abs 295 - | None -> Printf.sprintf "---\n%s---\n" frontmatter 296 - 297 - (* TODO:claude *) 298 - let typesense_schema = 299 - let open Ezjsonm in 300 - dict [ 301 - ("name", string "papers"); 302 - ("fields", list (fun d -> dict d) [ 303 - [("name", string "id"); ("type", string "string")]; 304 - [("name", string "title"); ("type", string "string")]; 305 - [("name", string "authors"); ("type", string "string[]")]; 306 - [("name", string "abstract"); ("type", string "string")]; 307 - [("name", string "date"); ("type", string "string")]; 308 - [("name", string "date_timestamp"); ("type", string "int64")]; 309 - [("name", string "tags"); ("type", string "string[]"); ("facet", bool true)]; 310 - [("name", string "doi"); ("type", string "string[]"); ("optional", bool true)]; 311 - [("name", string "arxiv_id"); ("type", string "string"); ("optional", bool true)]; 312 - [("name", string "pdf_url"); ("type", string "string[]"); ("optional", bool true)]; 313 - [("name", string "thumbnail_url"); ("type", string "string"); ("optional", bool true)]; 314 - [("name", string "journal"); ("type", string "string[]"); ("optional", bool true)]; 315 - [("name", string "related_projects"); ("type", string "string[]"); ("optional", bool true)]; 316 - [("name", string "related_talks"); ("type", string "string[]"); ("optional", bool true)]; 317 - ]); 318 - ("default_sorting_field", string "date_timestamp"); 319 - ] 320 - 321 - (** TODO:claude Pretty-print a paper with ANSI formatting *) 322 - let pp ppf p = 323 - let open Fmt in 324 - pf ppf "@[<v>"; 325 - pf ppf "%a: %a@," (styled `Bold string) "Type" (styled `Cyan string) "Paper"; 326 - pf ppf "%a: %a@," (styled `Bold string) "Slug" string (slug p); 327 - pf ppf "%a: %a@," (styled `Bold string) "Version" string p.ver; 328 - pf ppf "%a: %a@," (styled `Bold string) "Title" string (title p); 329 - pf ppf "%a: @[<h>%a@]@," (styled `Bold string) "Authors" (list ~sep:comma string) (authors p); 330 - pf ppf "%a: %a@," (styled `Bold string) "Year" int (year p); 331 - pf ppf "%a: %a@," (styled `Bold string) "Bibtype" string (bibtype p); 332 - (match doi p with 333 - | Some d -> pf ppf "%a: %a@," (styled `Bold string) "DOI" string d 334 - | None -> ()); 335 - (match url p with 336 - | Some u -> pf ppf "%a: %a@," (styled `Bold string) "URL" string u 337 - | None -> ()); 338 - (match video p with 339 - | Some v -> pf ppf "%a: %a@," (styled `Bold string) "Video" string v 340 - | None -> ()); 341 - let projs = project_slugs p in 342 - if projs <> [] then 343 - pf ppf "%a: @[<h>%a@]@," (styled `Bold string) "Projects" (list ~sep:comma string) projs; 344 - let sl = slides p in 345 - if sl <> [] then 346 - pf ppf "%a: @[<h>%a@]@," (styled `Bold string) "Slides" (list ~sep:comma string) sl; 347 - (match bibtype p with 348 - | "article" -> 349 - pf ppf "%a: %a@," (styled `Bold string) "Journal" string (journal p); 350 - (match volume p with 351 - | Some vol -> pf ppf "%a: %a@," (styled `Bold string) "Volume" string vol 352 - | None -> ()); 353 - (match issue p with 354 - | Some iss -> pf ppf "%a: %a@," (styled `Bold string) "Issue" string iss 355 - | None -> ()); 356 - let pgs = pages p in 357 - if pgs <> "" then 358 - pf ppf "%a: %a@," (styled `Bold string) "Pages" string pgs; 359 - | "inproceedings" -> 360 - pf ppf "%a: %a@," (styled `Bold string) "Booktitle" string (booktitle p); 361 - let pgs = pages p in 362 - if pgs <> "" then 363 - pf ppf "%a: %a@," (styled `Bold string) "Pages" string pgs; 364 - | "techreport" -> 365 - pf ppf "%a: %a@," (styled `Bold string) "Institution" string (institution p); 366 - (match number p with 367 - | Some num -> pf ppf "%a: %a@," (styled `Bold string) "Number" string num 368 - | None -> ()); 369 - | _ -> ()); 370 - pf ppf "@,"; 371 - pf ppf "%a:@," (styled `Bold string) "Abstract"; 372 - pf ppf "%a@," (styled `Faint string) (abstract p); 373 - pf ppf "@]"
-55
stack/bushel/lib/paper.mli
··· 1 - type paper 2 - 3 - type t = 4 - { slug : string 5 - ; ver : string 6 - ; paper : paper 7 - ; abstract : string 8 - ; latest : bool 9 - } 10 - 11 - type ts = t list 12 - 13 - val tv : t list -> ts 14 - val slug : t -> string 15 - val title : t -> string 16 - val authors : t -> string list 17 - val project_slugs : t -> string list 18 - val slides : t -> string list 19 - val bibtype : t -> string 20 - val journal : t -> string 21 - val raw_json : t -> Ezjsonm.value 22 - val doi : t -> string option 23 - val volume : t -> string option 24 - val video : t -> string option 25 - val issue : t -> string option 26 - val url : t -> string option 27 - val best_url : t -> string option 28 - val pages : t -> string 29 - val abstract : t -> string 30 - val institution : t -> string 31 - val number : t -> string option 32 - val editor : t -> string 33 - val isbn : t -> string 34 - val bib : t -> string 35 - val year : t -> int 36 - val publisher : t -> string 37 - val booktitle : t -> string 38 - val tags : t -> string list 39 - val date : t -> int * int * int 40 - val datetime : t -> Ptime.t 41 - val compare : t -> t -> int 42 - val get_papers : slug:string -> ts -> ts 43 - val slugs : ts -> string list 44 - val lookup : ts -> string -> t option 45 - val of_md : slug:string -> ver:string -> string -> t 46 - val to_yaml : ?abstract:string -> ver:string -> Ezjsonm.value -> string 47 - val typesense_schema : Ezjsonm.value 48 - 49 - type classification = Full | Short | Preprint 50 - val string_of_classification : classification -> string 51 - val classification_of_string : string -> classification 52 - val classification : t -> classification 53 - val selected : t -> bool 54 - val note : t -> string option 55 - val pp : Format.formatter -> t -> unit
-100
stack/bushel/lib/project.ml
··· 1 - type t = 2 - { slug : string 3 - ; title : string 4 - ; start : int (* year *) 5 - ; finish : int option 6 - ; tags : string list 7 - ; ideas : string 8 - ; body : string 9 - } 10 - 11 - type ts = t list 12 - 13 - let tags p = p.tags 14 - 15 - let compare a b = 16 - match compare a.start b.start with 17 - | 0 -> compare b.finish a.finish 18 - | n -> n 19 - ;; 20 - 21 - let title { title; _ } = title 22 - let body { body; _ } = body 23 - let ideas { ideas; _ } = ideas 24 - 25 - let of_md fname = 26 - match Jekyll_post.of_string ~fname (Util.read_file fname) with 27 - | Error (`Msg m) -> failwith ("Project.of_file: " ^ m) 28 - | Ok jp -> 29 - let fields = jp.Jekyll_post.fields in 30 - let { Jekyll_post.title; date; slug; body; _ } = jp in 31 - let (start, _, _), _ = Ptime.to_date_time date in 32 - let finish = 33 - match Jekyll_format.find "finish" fields with 34 - | Some (`String date) -> 35 - let date = Jekyll_format.parse_date_exn date in 36 - let (finish, _, _), _ = Ptime.to_date_time date in 37 - Some finish 38 - | _ -> None 39 - in 40 - let ideas = 41 - match Jekyll_format.find "ideas" fields with 42 - | Some (`String e) -> e 43 - | _ -> failwith ("no ideas key in " ^ fname) 44 - in 45 - let tags = 46 - match Jekyll_format.find "tags" fields with 47 - | Some (`A tags) -> List.map Yaml.Util.to_string_exn tags 48 - | _ -> [] 49 - in 50 - { slug; title; start; finish; ideas; tags; body } 51 - ;; 52 - 53 - let lookup projects slug = List.find_opt (fun p -> p.slug = slug) projects 54 - 55 - (* TODO:claude *) 56 - let typesense_schema = 57 - let open Ezjsonm in 58 - dict [ 59 - ("name", string "projects"); 60 - ("fields", list (fun d -> dict d) [ 61 - [("name", string "id"); ("type", string "string")]; 62 - [("name", string "title"); ("type", string "string")]; 63 - [("name", string "description"); ("type", string "string")]; 64 - [("name", string "start_year"); ("type", string "int32")]; 65 - [("name", string "finish_year"); ("type", string "int32"); ("optional", bool true)]; 66 - [("name", string "date"); ("type", string "string")]; 67 - [("name", string "date_timestamp"); ("type", string "int64")]; 68 - [("name", string "tags"); ("type", string "string[]"); ("facet", bool true)]; 69 - [("name", string "repository_url"); ("type", string "string"); ("optional", bool true)]; 70 - [("name", string "homepage_url"); ("type", string "string"); ("optional", bool true)]; 71 - [("name", string "languages"); ("type", string "string[]"); ("facet", bool true); ("optional", bool true)]; 72 - [("name", string "license"); ("type", string "string"); ("facet", bool true); ("optional", bool true)]; 73 - [("name", string "status"); ("type", string "string"); ("facet", bool true); ("optional", bool true)]; 74 - [("name", string "related_papers"); ("type", string "string[]"); ("optional", bool true)]; 75 - [("name", string "related_talks"); ("type", string "string[]"); ("optional", bool true)]; 76 - [("name", string "body"); ("type", string "string"); ("optional", bool true)]; 77 - [("name", string "ideas"); ("type", string "string"); ("optional", bool true)]; 78 - ]); 79 - ("default_sorting_field", string "date_timestamp"); 80 - ] 81 - 82 - (** TODO:claude Pretty-print a project with ANSI formatting *) 83 - let pp ppf p = 84 - let open Fmt in 85 - pf ppf "@[<v>"; 86 - pf ppf "%a: %a@," (styled `Bold string) "Type" (styled `Cyan string) "Project"; 87 - pf ppf "%a: %a@," (styled `Bold string) "Slug" string p.slug; 88 - pf ppf "%a: %a@," (styled `Bold string) "Title" string (title p); 89 - pf ppf "%a: %d@," (styled `Bold string) "Start" p.start; 90 - (match p.finish with 91 - | Some year -> pf ppf "%a: %d@," (styled `Bold string) "Finish" year 92 - | None -> pf ppf "%a: ongoing@," (styled `Bold string) "Finish"); 93 - let t = tags p in 94 - if t <> [] then 95 - pf ppf "%a: @[<h>%a@]@," (styled `Bold string) "Tags" (list ~sep:comma string) t; 96 - pf ppf "%a: %a@," (styled `Bold string) "Ideas" string (ideas p); 97 - pf ppf "@,"; 98 - pf ppf "%a:@," (styled `Bold string) "Body"; 99 - pf ppf "%a@," string (body p); 100 - pf ppf "@]"
-21
stack/bushel/lib/project.mli
··· 1 - type t = 2 - { slug : string 3 - ; title : string 4 - ; start : int 5 - ; finish : int option 6 - ; tags : string list 7 - ; ideas : string 8 - ; body : string 9 - } 10 - 11 - type ts = t list 12 - 13 - val title : t -> string 14 - val body : t -> string 15 - val ideas : t -> string 16 - val lookup : t list -> string -> t option 17 - val tags : t -> string list 18 - val compare : t -> t -> int 19 - val of_md : string -> t 20 - val typesense_schema : Ezjsonm.value 21 - val pp : Format.formatter -> t -> unit
-44
stack/bushel/lib/srcsetter.ml
··· 1 - module MS = Map.Make (String) 2 - 3 - type t = 4 - { name : string 5 - ; slug : string 6 - ; origin : string 7 - ; dims : int * int 8 - ; variants : (int * int) MS.t 9 - } 10 - 11 - type ts = t list 12 - 13 - let v name slug origin variants dims = { name; slug; origin; variants; dims } 14 - let slug { slug; _ } = slug 15 - let origin { origin; _ } = origin 16 - let name { name; _ } = name 17 - let dims { dims; _ } = dims 18 - let variants { variants; _ } = variants 19 - 20 - let dims_json_t = 21 - let open Jsont in 22 - let dec x y = x, y in 23 - let enc (w, h) = function 24 - | 0 -> w 25 - | _ -> h 26 - in 27 - t2 ~dec ~enc uint16 28 - ;; 29 - 30 - let json_t = 31 - let open Jsont in 32 - let open Jsont.Object in 33 - map ~kind:"Entry" v 34 - |> mem "name" string ~enc:name 35 - |> mem "slug" string ~enc:slug 36 - |> mem "origin" string ~enc:origin 37 - |> mem "variants" (as_string_map dims_json_t) ~enc:variants 38 - |> mem "dims" dims_json_t ~enc:dims 39 - |> finish 40 - ;; 41 - 42 - let list = Jsont.list json_t 43 - let list_to_json es = Jsont_bytesrw.encode_string list ~format:Jsont.Indent es 44 - let list_of_json = Jsont_bytesrw.decode_string list
-21
stack/bushel/lib/srcsetter.mli
··· 1 - module MS : Map.S with type key = string 2 - 3 - type t = 4 - { name : string 5 - ; slug : string 6 - ; origin : string 7 - ; dims : int * int 8 - ; variants : (int * int) MS.t 9 - } 10 - 11 - type ts = t list 12 - 13 - val origin : t -> string 14 - val slug : t -> string 15 - val name : t -> string 16 - val dims : t -> int * int 17 - val variants : t -> (int * int) MS.t 18 - val list_to_json : t list -> (string, string) result 19 - val list_of_json : string -> (t list, string) result 20 - val json_t : t Jsont.t 21 - val list : t list Jsont.t
-114
stack/bushel/lib/tags.ml
··· 1 - open Entry 2 - 3 - type t = 4 - [ `Slug of string (* :foo points to the specific slug foo *) 5 - | `Contact of string (* @foo points to contact foo *) 6 - | `Set of string (* #papers points to all Paper entries *) 7 - | `Text of string (* foo points to a free text "foo" *) 8 - | `Year of int (* a number between 1900--2100 is interpreted as a year *) 9 - ] 10 - 11 - let is_text = function 12 - | `Text _ -> true 13 - | _ -> false 14 - ;; 15 - 16 - let is_slug = function 17 - | `Slug _ -> true 18 - | _ -> false 19 - ;; 20 - 21 - let is_set = function 22 - | `Set _ -> true 23 - | _ -> false 24 - ;; 25 - 26 - let is_year = function 27 - | `Year _ -> true 28 - | _ -> false 29 - ;; 30 - 31 - let of_string s : t = 32 - if String.length s < 2 then invalid_arg ("Tag.of_string: " ^ s); 33 - match s.[0] with 34 - | ':' -> 35 - let slug = String.sub s 1 (String.length s - 1) in 36 - `Slug slug 37 - | '@' -> failwith "TODO add contacts to entries" 38 - | '#' -> 39 - let cl = String.sub s 1 (String.length s - 1) in 40 - `Set cl 41 - | _ -> 42 - (try 43 - let x = int_of_string s in 44 - if x > 1900 && x < 2100 then `Year x else `Text s 45 - with 46 - | _ -> `Text s) 47 - ;; 48 - 49 - let of_string_list l = List.map of_string l 50 - 51 - let to_string = function 52 - | `Slug t -> ":" ^ t 53 - | `Contact c -> "@" ^ c 54 - | `Set s -> "#" ^ s 55 - | `Text t -> t 56 - | `Year y -> string_of_int y 57 - ;; 58 - 59 - let to_raw_string = function 60 - | `Slug t -> t 61 - | `Contact c -> c 62 - | `Set s -> s 63 - | `Text t -> t 64 - | `Year y -> string_of_int y 65 - ;; 66 - 67 - let pp ppf t = Fmt.string ppf (to_string t) 68 - 69 - let tags_of_ent _entries ent : t list = 70 - match ent with 71 - | `Paper p -> of_string_list @@ Paper.tags p 72 - | `Video v -> of_string_list v.Video.tags 73 - | `Project p -> of_string_list @@ Project.tags p 74 - | `Note n -> of_string_list @@ Note.tags n 75 - | `Idea i -> of_string_list i.Idea.tags 76 - ;; 77 - 78 - let mentions tags = 79 - List.filter 80 - (function 81 - | `Contact _ | `Slug _ -> true 82 - | _ -> false) 83 - tags 84 - ;; 85 - 86 - let mention_entries entries tags = 87 - let lk t = 88 - try Some (lookup_exn entries t) 89 - with Not_found -> Printf.eprintf "mention_entries not found: %s\n%!" t; None 90 - in 91 - List.filter_map 92 - (function 93 - | `Slug t -> lk t 94 - | _ -> None) 95 - tags 96 - ;; 97 - 98 - let count_tags ?h fn vs = 99 - let h = 100 - match h with 101 - | Some h -> h 102 - | None -> Hashtbl.create 42 103 - in 104 - List.iter 105 - (fun ent -> 106 - List.iter 107 - (fun tag -> 108 - match Hashtbl.find_opt h tag with 109 - | Some num -> Hashtbl.replace h tag (num + 1) 110 - | None -> Hashtbl.add h tag 1) 111 - (fn ent)) 112 - vs; 113 - h 114 - ;;
-25
stack/bushel/lib/tags.mli
··· 1 - type t = 2 - [ `Contact of string 3 - | `Set of string 4 - | `Slug of string 5 - | `Text of string 6 - | `Year of int 7 - ] 8 - 9 - val is_text : t -> bool 10 - val is_set : t -> bool 11 - val is_slug : t -> bool 12 - val is_year : t -> bool 13 - val of_string : string -> t 14 - val to_string : t -> string 15 - val to_raw_string : t -> string 16 - val pp : Format.formatter -> t -> unit 17 - val mention_entries : Entry.t -> t list -> Entry.entry list 18 - val tags_of_ent : Entry.t -> Entry.entry -> t list 19 - val mentions : t list -> t list 20 - 21 - val count_tags 22 - : ?h:('a, int) Hashtbl.t 23 - -> ('b -> 'a list) 24 - -> 'b list 25 - -> ('a, int) Hashtbl.t
-527
stack/bushel/lib/typesense.ml
··· 1 - (** Typesense API client for Bushel *) 2 - 3 - type config = { 4 - endpoint : string; 5 - api_key : string; 6 - openai_key : string; 7 - } 8 - 9 - type error = 10 - | Http_error of int * string 11 - | Json_error of string 12 - | Connection_error of string 13 - 14 - let pp_error fmt = function 15 - | Http_error (code, msg) -> Fmt.pf fmt "HTTP %d: %s" code msg 16 - | Json_error msg -> Fmt.pf fmt "JSON error: %s" msg 17 - | Connection_error msg -> Fmt.pf fmt "Connection error: %s" msg 18 - 19 - (** Create authentication headers for Typesense API *) 20 - let auth_headers api_key = 21 - Requests.Headers.empty 22 - |> Requests.Headers.set "X-TYPESENSE-API-KEY" api_key 23 - |> Requests.Headers.set "Content-Type" "application/json" 24 - 25 - (** Make HTTP request to Typesense API *) 26 - let make_request ~sw ~env ?(meth=`GET) ?(body="") config path = 27 - let uri = Uri.of_string (config.endpoint ^ path) in 28 - let headers = auth_headers config.api_key in 29 - let body = if body = "" then None else Some (Requests.Body.of_string Requests.Mime.json body) in 30 - 31 - try 32 - let response = Requests.One.request ~sw 33 - ~clock:env#clock ~net:env#net 34 - ?body 35 - ~headers 36 - ~method_:meth 37 - (Uri.to_string uri) 38 - in 39 - 40 - let status = Requests.Response.status_code response in 41 - let body_flow = Requests.Response.body response in 42 - let body_str = Eio.Flow.read_all body_flow in 43 - 44 - if status >= 200 && status < 300 then 45 - Ok body_str 46 - else 47 - Error (Http_error (status, body_str)) 48 - with exn -> 49 - Error (Connection_error (Printexc.to_string exn)) 50 - 51 - (** Create a collection with given schema *) 52 - let create_collection ~sw ~env config (schema : Ezjsonm.value) = 53 - let body = Ezjsonm.value_to_string schema in 54 - make_request ~sw ~env ~meth:`POST ~body config "/collections" 55 - 56 - (** Check if collection exists *) 57 - let collection_exists ~sw ~env config name = 58 - let result = make_request ~sw ~env config ("/collections/" ^ name) in 59 - match result with 60 - | Ok _ -> true 61 - | Error (Http_error (404, _)) -> false 62 - | Error _ -> false 63 - 64 - (** Delete a collection *) 65 - let delete_collection ~sw ~env config name = 66 - make_request ~sw ~env ~meth:`DELETE config ("/collections/" ^ name) 67 - 68 - (** Upload documents to a collection in batch *) 69 - let upload_documents ~sw ~env config collection_name (documents : Ezjsonm.value list) = 70 - let jsonl_lines = List.map (fun doc -> Ezjsonm.value_to_string doc) documents in 71 - let body = String.concat "\n" jsonl_lines in 72 - make_request ~sw ~env ~meth:`POST ~body config 73 - (Printf.sprintf "/collections/%s/documents/import?action=upsert" collection_name) 74 - 75 - 76 - (** Convert Bushel objects to Typesense documents *) 77 - 78 - (** Helper function to truncate long strings for embedding *) 79 - let truncate_for_embedding ?(max_chars=20000) text = 80 - if String.length text <= max_chars then text 81 - else String.sub text 0 max_chars 82 - 83 - (** Helper function to convert Ptime to Unix timestamp *) 84 - let ptime_to_timestamp ptime = 85 - let span = Ptime.to_span ptime in 86 - let seconds = Ptime.Span.to_int_s span in 87 - match seconds with 88 - | Some s -> Int64.of_int s 89 - | None -> 0L 90 - 91 - (** Helper function to convert date tuple to Unix timestamp *) 92 - let date_to_timestamp (year, month, day) = 93 - match Ptime.of_date (year, month, day) with 94 - | Some ptime -> ptime_to_timestamp ptime 95 - | None -> 0L 96 - 97 - (** Resolve author handles to full names in a list *) 98 - let resolve_author_list contacts authors = 99 - List.map (fun author -> 100 - (* Strip '@' prefix if present *) 101 - let handle = 102 - if String.length author > 0 && author.[0] = '@' then 103 - String.sub author 1 (String.length author - 1) 104 - else 105 - author 106 - in 107 - (* Try to look up as a contact handle *) 108 - match Contact.find_by_handle contacts handle with 109 - | Some contact -> Contact.name contact 110 - | None -> author (* Keep original if not found *) 111 - ) authors 112 - 113 - let contact_to_document (contact : Contact.t) = 114 - let open Ezjsonm in 115 - let safe_string_list_from_opt = function 116 - | Some s -> [s] 117 - | None -> [] 118 - in 119 - dict [ 120 - ("id", string (Contact.handle contact)); 121 - ("handle", string (Contact.handle contact)); 122 - ("name", string (Contact.name contact)); 123 - ("names", list string (Contact.names contact)); 124 - ("email", list string (safe_string_list_from_opt (Contact.email contact))); 125 - ("icon", list string (safe_string_list_from_opt (Contact.icon contact))); 126 - ("github", list string (safe_string_list_from_opt (Contact.github contact))); 127 - ("twitter", list string (safe_string_list_from_opt (Contact.twitter contact))); 128 - ("url", list string (safe_string_list_from_opt (Contact.url contact))); 129 - ] 130 - 131 - let paper_to_document entries (paper : Paper.t) = 132 - let date_tuple = Paper.date paper in 133 - let contacts = Entry.contacts entries in 134 - 135 - (* Helper to extract string arrays from JSON, handling both single strings and arrays *) 136 - let extract_string_array_from_json json_field_name = 137 - try 138 - (* Access the raw JSON from the paper record *) 139 - let paper_json = Paper.raw_json paper in 140 - let value = Ezjsonm.get_dict paper_json |> List.assoc json_field_name in 141 - match value with 142 - | `String s -> [s] 143 - | `A l -> List.filter_map (function `String s -> Some s | _ -> None) l 144 - | _ -> [] 145 - with _ -> [] 146 - in 147 - 148 - (* Resolve author handles to full names *) 149 - let authors = resolve_author_list contacts (Paper.authors paper) in 150 - 151 - (* Convert abstract markdown to plain text *) 152 - let abstract = Md.markdown_to_plaintext entries (Paper.abstract paper) |> truncate_for_embedding in 153 - 154 - (* Extract publication metadata *) 155 - let bibtype = Paper.bibtype paper in 156 - let metadata = 157 - try 158 - match bibtype with 159 - | "article" -> Printf.sprintf "Journal: %s" (Paper.journal paper) 160 - | "inproceedings" -> Printf.sprintf "Proceedings: %s" (Paper.journal paper) 161 - | "misc" | "techreport" -> Printf.sprintf "Preprint: %s" (Paper.journal paper) 162 - | _ -> Printf.sprintf "%s: %s" (String.capitalize_ascii bibtype) (Paper.journal paper) 163 - with _ -> bibtype 164 - in 165 - 166 - (* Get bibtex from raw JSON *) 167 - let bibtex = 168 - try 169 - let paper_json = Paper.raw_json paper in 170 - Ezjsonm.get_dict paper_json 171 - |> List.assoc "bibtex" 172 - |> Ezjsonm.get_string 173 - with _ -> "" 174 - in 175 - 176 - let thumbnail_url = Entry.thumbnail entries (`Paper paper) in 177 - Ezjsonm.dict [ 178 - ("id", Ezjsonm.string (Paper.slug paper)); 179 - ("title", Ezjsonm.string (Paper.title paper)); 180 - ("authors", Ezjsonm.list Ezjsonm.string authors); 181 - ("abstract", Ezjsonm.string abstract); 182 - ("metadata", Ezjsonm.string metadata); 183 - ("bibtex", Ezjsonm.string bibtex); 184 - ("date", Ezjsonm.string (let y, m, d = date_tuple in Printf.sprintf "%04d-%02d-%02d" y m d)); 185 - ("date_timestamp", Ezjsonm.int64 (date_to_timestamp date_tuple)); 186 - ("tags", Ezjsonm.list Ezjsonm.string (Paper.tags paper)); 187 - ("doi", Ezjsonm.list Ezjsonm.string (extract_string_array_from_json "doi")); 188 - ("pdf_url", Ezjsonm.list Ezjsonm.string (extract_string_array_from_json "pdf_url")); 189 - ("journal", Ezjsonm.list Ezjsonm.string (extract_string_array_from_json "journal")); 190 - ("related_projects", Ezjsonm.list Ezjsonm.string (Paper.project_slugs paper)); 191 - ("thumbnail_url", Ezjsonm.string (Option.value ~default:"" thumbnail_url)); 192 - ] 193 - 194 - let project_to_document entries (project : Project.t) = 195 - let open Ezjsonm in 196 - (* Use January 1st of start year as the date for sorting *) 197 - let date_timestamp = date_to_timestamp (project.start, 1, 1) in 198 - 199 - (* Convert body markdown to plain text *) 200 - let description = Md.markdown_to_plaintext entries (Project.body project) |> truncate_for_embedding in 201 - 202 - let thumbnail_url = Entry.thumbnail entries (`Project project) in 203 - dict [ 204 - ("id", string project.slug); 205 - ("title", string (Project.title project)); 206 - ("description", string description); 207 - ("start", int project.start); 208 - ("finish", option int project.finish); 209 - ("start_year", int project.start); 210 - ("date", string (Printf.sprintf "%04d-01-01" project.start)); 211 - ("date_timestamp", int64 date_timestamp); 212 - ("tags", list string (Project.tags project)); 213 - ("thumbnail_url", string (Option.value ~default:"" thumbnail_url)); 214 - ] 215 - 216 - let video_to_document entries (video : Video.t) = 217 - let open Ezjsonm in 218 - let datetime = Video.datetime video in 219 - let safe_string_list_from_opt = function 220 - | Some s -> [s] 221 - | None -> [] 222 - in 223 - 224 - (* Convert body markdown to plain text *) 225 - let description = Md.markdown_to_plaintext entries (Video.body video) |> truncate_for_embedding in 226 - 227 - (* Resolve paper and project slugs to titles *) 228 - let paper_title = match Video.paper video with 229 - | Some slug -> 230 - (match Entry.lookup entries slug with 231 - | Some entry -> Some (Entry.title entry) 232 - | None -> Some slug) (* Fallback to slug if not found *) 233 - | None -> None 234 - in 235 - let project_title = match Video.project video with 236 - | Some slug -> 237 - (match Entry.lookup entries slug with 238 - | Some entry -> Some (Entry.title entry) 239 - | None -> Some slug) (* Fallback to slug if not found *) 240 - | None -> None 241 - in 242 - 243 - let thumbnail_url = Entry.thumbnail entries (`Video video) in 244 - dict [ 245 - ("id", string (Video.slug video)); 246 - ("title", string (Video.title video)); 247 - ("description", string description); 248 - ("published_date", string (Ptime.to_rfc3339 datetime)); 249 - ("date", string (Ptime.to_rfc3339 datetime)); 250 - ("date_timestamp", int64 (ptime_to_timestamp datetime)); 251 - ("url", string (Video.url video)); 252 - ("uuid", string (Video.uuid video)); 253 - ("is_talk", bool (Video.talk video)); 254 - ("paper", list string (safe_string_list_from_opt paper_title)); 255 - ("project", list string (safe_string_list_from_opt project_title)); 256 - ("tags", list string video.tags); 257 - ("thumbnail_url", string (Option.value ~default:"" thumbnail_url)); 258 - ] 259 - 260 - let note_to_document entries (note : Note.t) = 261 - let open Ezjsonm in 262 - let datetime = Note.datetime note in 263 - let safe_string_list_from_opt = function 264 - | Some s -> [s] 265 - | None -> [] 266 - in 267 - 268 - (* Convert body markdown to plain text *) 269 - let content = Md.markdown_to_plaintext entries (Note.body note) |> truncate_for_embedding in 270 - 271 - let thumbnail_url = Entry.thumbnail entries (`Note note) in 272 - let word_count = Note.words note in 273 - dict [ 274 - ("id", string (Note.slug note)); 275 - ("title", string (Note.title note)); 276 - ("date", string (Ptime.to_rfc3339 datetime)); 277 - ("date_timestamp", int64 (ptime_to_timestamp datetime)); 278 - ("content", string content); 279 - ("tags", list string (Note.tags note)); 280 - ("draft", bool (Note.draft note)); 281 - ("synopsis", list string (safe_string_list_from_opt (Note.synopsis note))); 282 - ("thumbnail_url", string (Option.value ~default:"" thumbnail_url)); 283 - ("words", int word_count); 284 - ] 285 - 286 - let idea_to_document entries (idea : Idea.t) = 287 - let open Ezjsonm in 288 - let contacts = Entry.contacts entries in 289 - (* Use January 1st of the year as the date for sorting *) 290 - let date_timestamp = date_to_timestamp (Idea.year idea, 1, 1) in 291 - 292 - (* Convert body markdown to plain text *) 293 - let description = Md.markdown_to_plaintext entries (Idea.body idea) |> truncate_for_embedding in 294 - 295 - (* Resolve supervisor and student handles to full names *) 296 - let supervisors = resolve_author_list contacts (Idea.supervisors idea) in 297 - let students = resolve_author_list contacts (Idea.students idea) in 298 - 299 - (* Resolve project slug to project title *) 300 - let project_title = 301 - match Entry.lookup entries (Idea.project idea) with 302 - | Some entry -> Entry.title entry 303 - | None -> Idea.project idea (* Fallback to slug if not found *) 304 - in 305 - 306 - let thumbnail_url = Entry.thumbnail entries (`Idea idea) in 307 - dict [ 308 - ("id", string idea.slug); 309 - ("title", string (Idea.title idea)); 310 - ("description", string description); 311 - ("level", string (Idea.level_to_string (Idea.level idea))); 312 - ("project", string project_title); 313 - ("status", string (Idea.status_to_string (Idea.status idea))); 314 - ("year", int (Idea.year idea)); 315 - ("date", string (Printf.sprintf "%04d-01-01" (Idea.year idea))); 316 - ("date_timestamp", int64 date_timestamp); 317 - ("supervisors", list string supervisors); 318 - ("students", list string students); 319 - ("tags", list string idea.tags); 320 - ("thumbnail_url", string (Option.value ~default:"" thumbnail_url)); 321 - ] 322 - 323 - (** Helper function to add embedding field to schema *) 324 - let add_embedding_field_to_schema schema config embedding_from_fields = 325 - let open Ezjsonm in 326 - let fields = get_dict schema |> List.assoc "fields" |> get_list (fun f -> f) in 327 - let embedding_field = dict [ 328 - ("name", string "embedding"); 329 - ("type", string "float[]"); 330 - ("embed", dict [ 331 - ("from", list string embedding_from_fields); 332 - ("model_config", dict [ 333 - ("model_name", string "openai/text-embedding-3-small"); 334 - ("api_key", string config.openai_key); 335 - ]); 336 - ]); 337 - ] in 338 - let updated_fields = fields @ [embedding_field] in 339 - let updated_schema = 340 - List.map (fun (k, v) -> 341 - if k = "fields" then (k, list (fun f -> f) updated_fields) 342 - else (k, v) 343 - ) (get_dict schema) 344 - in 345 - dict updated_schema 346 - 347 - (** Upload all bushel objects to their respective collections *) 348 - let upload_all ~sw ~env config entries = 349 - print_string "Uploading bushel data to Typesense\n"; 350 - 351 - let contacts = Entry.contacts entries in 352 - let papers = Entry.papers entries in 353 - let projects = Entry.projects entries in 354 - let notes = Entry.notes entries in 355 - let videos = Entry.videos entries in 356 - let ideas = Entry.ideas entries in 357 - 358 - let collections = [ 359 - ("contacts", add_embedding_field_to_schema Contact.typesense_schema config ["name"; "names"], (List.map contact_to_document contacts : Ezjsonm.value list)); 360 - ("papers", add_embedding_field_to_schema Paper.typesense_schema config ["title"; "abstract"; "authors"], (List.map (paper_to_document entries) papers : Ezjsonm.value list)); 361 - ("videos", add_embedding_field_to_schema Video.typesense_schema config ["title"; "description"], (List.map (video_to_document entries) videos : Ezjsonm.value list)); 362 - ("projects", add_embedding_field_to_schema Project.typesense_schema config ["title"; "description"; "tags"], (List.map (project_to_document entries) projects : Ezjsonm.value list)); 363 - ("notes", add_embedding_field_to_schema Note.typesense_schema config ["title"; "content"; "tags"], (List.map (note_to_document entries) notes : Ezjsonm.value list)); 364 - ("ideas", add_embedding_field_to_schema Idea.typesense_schema config ["title"; "description"; "tags"], (List.map (idea_to_document entries) ideas : Ezjsonm.value list)); 365 - ] in 366 - 367 - let upload_collection ((name, schema, documents) : string * Ezjsonm.value * Ezjsonm.value list) = 368 - Printf.printf "Processing collection: %s\n%!" name; 369 - let exists = collection_exists ~sw ~env config name in 370 - (if exists then ( 371 - Printf.printf "Collection %s exists, deleting...\n%!" name; 372 - let result = delete_collection ~sw ~env config name in 373 - match result with 374 - | Ok _ -> Printf.printf "Deleted collection %s\n%!" name 375 - | Error err -> 376 - let err_str = Fmt.str "%a" pp_error err in 377 - Printf.printf "Failed to delete collection %s: %s\n%!" name err_str 378 - )); 379 - Printf.printf "Creating collection %s with %d documents\n%!" name (List.length documents); 380 - let result = create_collection ~sw ~env config schema in 381 - match result with 382 - | Ok _ -> 383 - Printf.printf "Created collection %s\n%!" name; 384 - if documents = [] then 385 - Printf.printf "No documents to upload for %s\n%!" name 386 - else ( 387 - let result = upload_documents ~sw ~env config name documents in 388 - match result with 389 - | Ok response -> 390 - (* Count successes and failures *) 391 - let lines = String.split_on_char '\n' response in 392 - let successes = List.fold_left (fun acc line -> 393 - if String.contains line ':' && Str.string_match (Str.regexp ".*success.*true.*") line 0 then acc + 1 else acc) 0 lines in 394 - let failures = List.fold_left (fun acc line -> 395 - if String.contains line ':' && Str.string_match (Str.regexp ".*success.*false.*") line 0 then acc + 1 else acc) 0 lines in 396 - Printf.printf "Upload results for %s: %d successful, %d failed out of %d total\n%!" 397 - name successes failures (List.length documents); 398 - if failures > 0 then ( 399 - Printf.printf "Failed documents in %s:\n%!" name; 400 - let failed_lines = List.filter (fun line -> Str.string_match (Str.regexp ".*success.*false.*") line 0) lines in 401 - List.iter (fun line -> Printf.printf "%s\n%!" line) failed_lines 402 - ) 403 - | Error err -> 404 - let err_str = Fmt.str "%a" pp_error err in 405 - Printf.printf "Failed to upload documents to %s: %s\n%!" name err_str 406 - ) 407 - | Error err -> 408 - let err_str = Fmt.str "%a" pp_error err in 409 - Printf.printf "Failed to create collection %s: %s\n%!" name err_str 410 - in 411 - 412 - List.iter upload_collection collections 413 - 414 - (** Re-export search types from Typesense_client *) 415 - type search_result = Typesense_client.search_result = { 416 - id: string; 417 - title: string; 418 - content: string; 419 - score: float; 420 - collection: string; 421 - highlights: (string * string list) list; 422 - document: Ezjsonm.value; 423 - } 424 - 425 - type search_response = Typesense_client.search_response = { 426 - hits: search_result list; 427 - total: int; 428 - query_time: float; 429 - } 430 - 431 - (** Convert bushel config to client config *) 432 - let to_client_config (config : config) = 433 - Typesense_client.{ endpoint = config.endpoint; api_key = config.api_key } 434 - 435 - (** Search a single collection *) 436 - let search_collection ~sw ~env (config : config) collection_name query ?(limit=10) ?(offset=0) () = 437 - let client_config = to_client_config config in 438 - let requests_session = Requests.create ~sw env in 439 - let client = Typesense_client.create ~requests_session ~config:client_config in 440 - let result = Typesense_client.search_collection client collection_name query ~limit ~offset () in 441 - match result with 442 - | Ok response -> Ok response 443 - | Error (Typesense_client.Http_error (code, msg)) -> Error (Http_error (code, msg)) 444 - | Error (Typesense_client.Json_error msg) -> Error (Json_error msg) 445 - | Error (Typesense_client.Connection_error msg) -> Error (Connection_error msg) 446 - 447 - (** Search across all collections - use client multisearch *) 448 - let search_all ~sw ~env (config : config) query ?(limit=10) ?(offset=0) () = 449 - let client_config = to_client_config config in 450 - let requests_session = Requests.create ~sw env in 451 - let client = Typesense_client.create ~requests_session ~config:client_config in 452 - let result = Typesense_client.multisearch client query ~limit:50 () in 453 - match result with 454 - | Ok multisearch_resp -> 455 - let combined_response = Typesense_client.combine_multisearch_results multisearch_resp ~limit ~offset () in 456 - Ok combined_response 457 - | Error (Typesense_client.Http_error (code, msg)) -> Error (Http_error (code, msg)) 458 - | Error (Typesense_client.Json_error msg) -> Error (Json_error msg) 459 - | Error (Typesense_client.Connection_error msg) -> Error (Connection_error msg) 460 - 461 - (** List all collections *) 462 - let list_collections ~sw ~env (config : config) = 463 - let client_config = to_client_config config in 464 - let requests_session = Requests.create ~sw env in 465 - let client = Typesense_client.create ~requests_session ~config:client_config in 466 - let result = Typesense_client.list_collections client in 467 - match result with 468 - | Ok collections -> Ok collections 469 - | Error (Typesense_client.Http_error (code, msg)) -> Error (Http_error (code, msg)) 470 - | Error (Typesense_client.Json_error msg) -> Error (Json_error msg) 471 - | Error (Typesense_client.Connection_error msg) -> Error (Connection_error msg) 472 - 473 - (** Re-export multisearch types from Typesense_client *) 474 - type multisearch_response = Typesense_client.multisearch_response = { 475 - results: search_response list; 476 - } 477 - 478 - (** Perform multisearch across all collections *) 479 - let multisearch ~sw ~env (config : config) query ?(limit=10) () = 480 - let client_config = to_client_config config in 481 - let requests_session = Requests.create ~sw env in 482 - let client = Typesense_client.create ~requests_session ~config:client_config in 483 - let result = Typesense_client.multisearch client query ~limit () in 484 - match result with 485 - | Ok multisearch_resp -> Ok multisearch_resp 486 - | Error (Typesense_client.Http_error (code, msg)) -> Error (Http_error (code, msg)) 487 - | Error (Typesense_client.Json_error msg) -> Error (Json_error msg) 488 - | Error (Typesense_client.Connection_error msg) -> Error (Connection_error msg) 489 - 490 - (** Combine multisearch results into single result set *) 491 - let combine_multisearch_results (multisearch_resp : multisearch_response) ?(limit=10) ?(offset=0) () = 492 - Typesense_client.combine_multisearch_results multisearch_resp ~limit ~offset () 493 - 494 - (** Load configuration from files *) 495 - let load_config_from_files () = 496 - let read_file_if_exists filename = 497 - if Sys.file_exists filename then 498 - let ic = open_in filename in 499 - let content = really_input_string ic (in_channel_length ic) in 500 - close_in ic; 501 - Some (String.trim content) 502 - else None 503 - in 504 - 505 - let endpoint = match read_file_if_exists ".typesense-url" with 506 - | Some url -> url 507 - | None -> "http://localhost:8108" 508 - in 509 - 510 - let api_key = match read_file_if_exists ".typesense-key" with 511 - | Some key -> key 512 - | None -> 513 - try Sys.getenv "TYPESENSE_API_KEY" 514 - with Not_found -> "" 515 - in 516 - 517 - let openai_key = match read_file_if_exists ".openrouter-api" with 518 - | Some key -> key 519 - | None -> 520 - try Sys.getenv "OPENAI_API_KEY" 521 - with Not_found -> "" 522 - in 523 - 524 - { endpoint; api_key; openai_key } 525 - 526 - (** Re-export pretty printer from Typesense_client *) 527 - let pp_search_result_oneline = Typesense_client.pp_search_result_oneline
-168
stack/bushel/lib/typesense.mli
··· 1 - (** Typesense API client for Bushel 2 - 3 - This module provides an OCaml client for the Typesense search engine API. 4 - It handles collection management and document indexing for all Bushel object 5 - types including contacts, papers, projects, news, videos, notes, and ideas. 6 - 7 - Example usage: 8 - {[ 9 - let config = { endpoint = "https://search.example.com"; api_key = "xyz123"; openai_key = "sk-..." } in 10 - Eio_main.run (fun env -> 11 - Eio.Switch.run (fun sw -> 12 - Typesense.upload_all ~sw ~env config entries)) 13 - ]} 14 - *) 15 - 16 - (** Configuration for connecting to a Typesense server *) 17 - type config = { 18 - endpoint : string; (** Typesense server URL (e.g., "https://search.example.com") *) 19 - api_key : string; (** API key for authentication *) 20 - openai_key : string; (** OpenAI API key for embeddings *) 21 - } 22 - 23 - (** Possible errors that can occur during Typesense operations *) 24 - type error = 25 - | Http_error of int * string (** HTTP error with status code and message *) 26 - | Json_error of string (** JSON parsing or encoding error *) 27 - | Connection_error of string (** Network connection error *) 28 - 29 - (** Pretty-printer for error types *) 30 - val pp_error : Format.formatter -> error -> unit 31 - 32 - (** Create a collection with the given schema. 33 - The schema should follow Typesense's collection schema format. *) 34 - val create_collection : 35 - sw:Eio.Switch.t -> 36 - env:< clock: [> float Eio.Time.clock_ty ] Eio.Resource.t; net: [> [> `Generic ] Eio.Net.ty ] Eio.Resource.t; .. > -> 37 - config -> 38 - Ezjsonm.value -> 39 - (string, error) result 40 - 41 - (** Check if a collection exists by name. 42 - Returns true if the collection exists, false otherwise. *) 43 - val collection_exists : 44 - sw:Eio.Switch.t -> 45 - env:< clock: [> float Eio.Time.clock_ty ] Eio.Resource.t; net: [> [> `Generic ] Eio.Net.ty ] Eio.Resource.t; .. > -> 46 - config -> 47 - string -> 48 - bool 49 - 50 - (** Delete a collection by name. *) 51 - val delete_collection : 52 - sw:Eio.Switch.t -> 53 - env:< clock: [> float Eio.Time.clock_ty ] Eio.Resource.t; net: [> [> `Generic ] Eio.Net.ty ] Eio.Resource.t; .. > -> 54 - config -> 55 - string -> 56 - (string, error) result 57 - 58 - (** Upload documents to a collection in batch using JSONL format. 59 - More efficient than uploading documents one by one. *) 60 - val upload_documents : 61 - sw:Eio.Switch.t -> 62 - env:< clock: [> float Eio.Time.clock_ty ] Eio.Resource.t; net: [> [> `Generic ] Eio.Net.ty ] Eio.Resource.t; .. > -> 63 - config -> 64 - string -> 65 - Ezjsonm.value list -> 66 - (string, error) result 67 - 68 - (** Upload all bushel objects to Typesense. 69 - This function will: 70 - - Extract all bushel data types from the Entry.t 71 - - Create or recreate collections for each type 72 - - Upload all documents in batches 73 - - Report progress to stdout *) 74 - val upload_all : 75 - sw:Eio.Switch.t -> 76 - env:< clock: [> float Eio.Time.clock_ty ] Eio.Resource.t; net: [> [> `Generic ] Eio.Net.ty ] Eio.Resource.t; .. > -> 77 - config -> 78 - Entry.t -> 79 - unit 80 - 81 - (** Search result structure containing document information and relevance score *) 82 - type search_result = { 83 - id: string; (** Document ID *) 84 - title: string; (** Document title *) 85 - content: string; (** Document content/description *) 86 - score: float; (** Relevance score *) 87 - collection: string; (** Collection name *) 88 - highlights: (string * string list) list; (** Highlighted search terms by field *) 89 - document: Ezjsonm.value; (** Raw document for flexible field access *) 90 - } 91 - 92 - (** Search response containing results and metadata *) 93 - type search_response = { 94 - hits: search_result list; (** List of matching documents *) 95 - total: int; (** Total number of matches *) 96 - query_time: float; (** Query execution time in milliseconds *) 97 - } 98 - 99 - (** Search a specific collection. *) 100 - val search_collection : 101 - sw:Eio.Switch.t -> 102 - env:< clock: float Eio.Time.clock_ty Eio.Resource.t; fs: Eio.Fs.dir_ty Eio.Path.t; net: [`Generic | `Unix] Eio.Net.ty Eio.Resource.t; .. > -> 103 - config -> 104 - string -> 105 - string -> 106 - ?limit:int -> 107 - ?offset:int -> 108 - unit -> 109 - (search_response, error) result 110 - 111 - (** Search across all bushel collections. 112 - Results are sorted by relevance score and paginated. *) 113 - val search_all : 114 - sw:Eio.Switch.t -> 115 - env:< clock: float Eio.Time.clock_ty Eio.Resource.t; fs: Eio.Fs.dir_ty Eio.Path.t; net: [`Generic | `Unix] Eio.Net.ty Eio.Resource.t; .. > -> 116 - config -> 117 - string -> 118 - ?limit:int -> 119 - ?offset:int -> 120 - unit -> 121 - (search_response, error) result 122 - 123 - (** Multisearch response containing results from multiple collections *) 124 - type multisearch_response = { 125 - results: search_response list; (** Results from each collection *) 126 - } 127 - 128 - (** Perform multisearch across all collections using Typesense's multi_search endpoint. 129 - More efficient than individual searches as it's done in a single request. *) 130 - val multisearch : 131 - sw:Eio.Switch.t -> 132 - env:< clock: float Eio.Time.clock_ty Eio.Resource.t; fs: Eio.Fs.dir_ty Eio.Path.t; net: [`Generic | `Unix] Eio.Net.ty Eio.Resource.t; .. > -> 133 - config -> 134 - string -> 135 - ?limit:int -> 136 - unit -> 137 - (multisearch_response, error) result 138 - 139 - (** Combine multisearch results into a single result set. 140 - Results are sorted by relevance score and paginated. *) 141 - val combine_multisearch_results : multisearch_response -> ?limit:int -> ?offset:int -> unit -> search_response 142 - 143 - (** List all collections with document counts. 144 - Returns a list of (collection_name, document_count) pairs. *) 145 - val list_collections : 146 - sw:Eio.Switch.t -> 147 - env:< clock: float Eio.Time.clock_ty Eio.Resource.t; fs: Eio.Fs.dir_ty Eio.Path.t; net: [`Generic | `Unix] Eio.Net.ty Eio.Resource.t; .. > -> 148 - config -> 149 - ((string * int) list, error) result 150 - 151 - (** Load configuration from .typesense-url and .typesense-api files. 152 - Falls back to environment variables and defaults. 153 - TODO:claude *) 154 - val load_config_from_files : unit -> config 155 - 156 - (** Pretty-print a search result in a one-line format with relevant information. 157 - Shows different fields based on the collection type (papers, videos, etc.). 158 - TODO:claude *) 159 - val pp_search_result_oneline : search_result -> string 160 - 161 - (** Convert Bushel objects to Typesense documents *) 162 - 163 - val contact_to_document : Contact.t -> Ezjsonm.value 164 - val paper_to_document : Entry.t -> Paper.t -> Ezjsonm.value 165 - val project_to_document : Entry.t -> Project.t -> Ezjsonm.value 166 - val video_to_document : Entry.t -> Video.t -> Ezjsonm.value 167 - val note_to_document : Entry.t -> Note.t -> Ezjsonm.value 168 - val idea_to_document : Entry.t -> Idea.t -> Ezjsonm.value
-80
stack/bushel/lib/util.ml
··· 1 - let first_hunk s = 2 - let lines = String.split_on_char '\n' s in 3 - let rec aux acc = function 4 - | [] -> String.concat "\n" (List.rev acc) 5 - | "" :: "" :: _ -> String.concat "\n" (List.rev acc) 6 - | line :: rest -> aux (line :: acc) rest 7 - in 8 - aux [] lines 9 - ;; 10 - 11 - let first_and_last_hunks s = 12 - let lines = String.split_on_char '\n' s in 13 - let rec aux acc = function 14 - | [] -> String.concat "\n" (List.rev acc), "" 15 - | "" :: "" :: rest -> 16 - String.concat "\n" (List.rev acc), String.concat "\n" (List.rev rest) 17 - | line :: rest -> aux (line :: acc) rest 18 - in 19 - aux [] lines 20 - ;; 21 - 22 - (* Find all footnote definition lines in text *) 23 - let find_footnote_lines s = 24 - let lines = String.split_on_char '\n' s in 25 - let is_footnote_def line = 26 - String.length line > 3 && 27 - line.[0] = '[' && 28 - line.[1] = '^' && 29 - String.contains line ':' && 30 - let colon_pos = String.index line ':' in 31 - colon_pos > 2 && line.[colon_pos - 1] = ']' 32 - in 33 - let is_continuation line = 34 - String.length line > 0 && (line.[0] = ' ' || line.[0] = '\t') 35 - in 36 - let rec collect_footnotes acc in_footnote = function 37 - | [] -> List.rev acc 38 - | line :: rest -> 39 - if is_footnote_def line then 40 - collect_footnotes (line :: acc) true rest 41 - else if in_footnote && is_continuation line then 42 - collect_footnotes (line :: acc) true rest 43 - else 44 - collect_footnotes acc false rest 45 - in 46 - collect_footnotes [] false lines 47 - ;; 48 - 49 - (* Augment first hunk with footnote definitions from last hunk *) 50 - let first_hunk_with_footnotes s = 51 - let first, last = first_and_last_hunks s in 52 - let footnote_lines = find_footnote_lines last in 53 - if footnote_lines = [] then first 54 - else first ^ "\n\n" ^ String.concat "\n" footnote_lines 55 - ;; 56 - 57 - let count_words (text : string) : int = 58 - let len = String.length text in 59 - let rec count_words_helper (index : int) (in_word : bool) (count : int) : int = 60 - if index >= len 61 - then if in_word then count + 1 else count 62 - else ( 63 - let char = String.get text index in 64 - let is_whitespace = 65 - Char.equal char ' ' 66 - || Char.equal char '\t' 67 - || Char.equal char '\n' 68 - || Char.equal char '\r' 69 - in 70 - if is_whitespace 71 - then 72 - if in_word 73 - then count_words_helper (index + 1) false (count + 1) 74 - else count_words_helper (index + 1) false count 75 - else count_words_helper (index + 1) true count) 76 - in 77 - count_words_helper 0 false 0 78 - ;; 79 - 80 - let read_file file = In_channel.(with_open_bin file input_all)
-166
stack/bushel/lib/video.ml
··· 1 - type t = 2 - { slug : string 3 - ; title : string 4 - ; published_date : Ptime.t 5 - ; uuid : string 6 - ; description : string 7 - ; url : string 8 - ; talk : bool 9 - ; paper : string option 10 - ; project : string option 11 - ; tags : string list 12 - } 13 - 14 - type ts = t list 15 - 16 - let get_shadow fs k = 17 - match List.assoc_opt k fs with 18 - | Some v -> Some v 19 - | None -> List.assoc_opt ("_" ^ k) fs 20 - ;; 21 - 22 - let get_shadow_string fs k = 23 - match get_shadow fs k with 24 - | Some (`String v) -> v 25 - | _ -> failwith "invalid yaml" 26 - ;; 27 - 28 - let get_shadow_bool fs k = 29 - match get_shadow fs k with 30 - | Some (`Bool v) -> v 31 - | _ -> failwith "invalid yaml" 32 - ;; 33 - 34 - let compare a b = Ptime.compare b.published_date a.published_date 35 - let url v = v.url 36 - let body { description; _ } = description 37 - let title { title; _ } = title 38 - let uuid { uuid; _ } = uuid 39 - let paper { paper; _ } = paper 40 - let project { project; _ } = project 41 - let slug { slug; _ } = slug 42 - let date { published_date; _ } = published_date |> Ptime.to_date 43 - let datetime { published_date; _ } = published_date 44 - let talk { talk; _ } = talk 45 - 46 - let t_of_yaml ~description = function 47 - | `O fields -> 48 - let slug = get_shadow_string fields "uuid" in 49 - let title = get_shadow_string fields "title" in 50 - let published_date = 51 - get_shadow_string fields "published_date" 52 - |> Ptime.of_rfc3339 53 - |> Result.get_ok 54 - |> fun (a, _, _) -> a 55 - in 56 - let uuid = get_shadow_string fields "uuid" in 57 - let url = get_shadow_string fields "url" in 58 - let talk = 59 - try get_shadow_bool fields "talk" with 60 - | _ -> false 61 - in 62 - let tags = 63 - match List.assoc_opt "tags" fields with 64 - | Some l -> Ezjsonm.get_list Ezjsonm.get_string l 65 - | _ -> [] 66 - in 67 - let paper = 68 - try Some (get_shadow_string fields "paper") with 69 - | _ -> None 70 - in 71 - let project = 72 - try Some (get_shadow_string fields "project") with 73 - | _ -> None 74 - in 75 - { slug; title; tags; published_date; uuid; description; talk; paper; project; url } 76 - | _ -> failwith "invalid yaml" 77 - ;; 78 - 79 - let to_yaml t = 80 - `O [ 81 - ("title", `String t.title); 82 - ("description", `String t.description); 83 - ("url", `String t.url); 84 - ("uuid", `String t.uuid); 85 - ("slug", `String t.slug); 86 - ("published_date", `String (Ptime.to_rfc3339 t.published_date)); 87 - ("talk", `Bool t.talk); 88 - ("tags", `A (List.map (fun t -> `String t) t.tags)); 89 - ("paper", match t.paper with None -> `Null | Some p -> `String p); 90 - ("project", match t.project with None -> `Null | Some p -> `String p) 91 - ] 92 - 93 - let to_file output_dir t = 94 - let file_path = Fpath.v (Filename.concat output_dir (t.uuid ^ ".md")) in 95 - let yaml = to_yaml t in 96 - let yaml_str = Yaml.to_string_exn yaml in 97 - let content = "---\n" ^ yaml_str ^ "---\n" in 98 - Bos.OS.File.write file_path content 99 - ;; 100 - 101 - let of_md fname = 102 - (* TODO fix Jekyll_post to not error on no date *) 103 - let fname' = "2000-01-01-" ^ Filename.basename fname in 104 - match Jekyll_post.of_string ~fname:fname' (Util.read_file fname) with 105 - | Error (`Msg m) -> failwith ("paper_of_md: " ^ m) 106 - | Ok jp -> 107 - let fields = jp.Jekyll_post.fields |> Jekyll_format.fields_to_yaml in 108 - let { Jekyll_post.body; _ } = jp in 109 - t_of_yaml ~description:body fields 110 - ;; 111 - 112 - (* TODO:claude *) 113 - let typesense_schema = 114 - let open Ezjsonm in 115 - dict [ 116 - ("name", string "videos"); 117 - ("fields", list (fun d -> dict d) [ 118 - [("name", string "id"); ("type", string "string")]; 119 - [("name", string "title"); ("type", string "string")]; 120 - [("name", string "description"); ("type", string "string")]; 121 - [("name", string "published_date"); ("type", string "string")]; 122 - [("name", string "date"); ("type", string "string")]; 123 - [("name", string "date_timestamp"); ("type", string "int64")]; 124 - [("name", string "tags"); ("type", string "string[]"); ("facet", bool true)]; 125 - [("name", string "url"); ("type", string "string")]; 126 - [("name", string "uuid"); ("type", string "string")]; 127 - [("name", string "is_talk"); ("type", string "bool")]; 128 - [("name", string "paper"); ("type", string "string[]"); ("optional", bool true)]; 129 - [("name", string "project"); ("type", string "string[]"); ("optional", bool true)]; 130 - [("name", string "video_url"); ("type", string "string"); ("optional", bool true)]; 131 - [("name", string "embed_url"); ("type", string "string"); ("optional", bool true)]; 132 - [("name", string "duration"); ("type", string "int32"); ("optional", bool true)]; 133 - [("name", string "channel"); ("type", string "string"); ("facet", bool true); ("optional", bool true)]; 134 - [("name", string "platform"); ("type", string "string"); ("facet", bool true); ("optional", bool true)]; 135 - [("name", string "views"); ("type", string "int32"); ("optional", bool true)]; 136 - [("name", string "related_papers"); ("type", string "string[]"); ("optional", bool true)]; 137 - [("name", string "related_talks"); ("type", string "string[]"); ("optional", bool true)]; 138 - ]); 139 - ("default_sorting_field", string "date_timestamp"); 140 - ] 141 - 142 - (** TODO:claude Pretty-print a video with ANSI formatting *) 143 - let pp ppf v = 144 - let open Fmt in 145 - pf ppf "@[<v>"; 146 - pf ppf "%a: %a@," (styled `Bold string) "Type" (styled `Cyan string) "Video"; 147 - pf ppf "%a: %a@," (styled `Bold string) "Slug" string (slug v); 148 - pf ppf "%a: %a@," (styled `Bold string) "UUID" string (uuid v); 149 - pf ppf "%a: %a@," (styled `Bold string) "Title" string (title v); 150 - let (year, month, day) = date v in 151 - pf ppf "%a: %04d-%02d-%02d@," (styled `Bold string) "Date" year month day; 152 - pf ppf "%a: %a@," (styled `Bold string) "URL" string (url v); 153 - pf ppf "%a: %b@," (styled `Bold string) "Talk" (talk v); 154 - (match paper v with 155 - | Some p -> pf ppf "%a: %a@," (styled `Bold string) "Paper" string p 156 - | None -> ()); 157 - (match project v with 158 - | Some p -> pf ppf "%a: %a@," (styled `Bold string) "Project" string p 159 - | None -> ()); 160 - let t = v.tags in 161 - if t <> [] then 162 - pf ppf "%a: @[<h>%a@]@," (styled `Bold string) "Tags" (list ~sep:comma string) t; 163 - pf ppf "@,"; 164 - pf ppf "%a:@," (styled `Bold string) "Description"; 165 - pf ppf "%a@," string v.description; 166 - pf ppf "@]"
-32
stack/bushel/lib/video.mli
··· 1 - type t = 2 - { slug : string 3 - ; title : string 4 - ; published_date : Ptime.t 5 - ; uuid : string 6 - ; description : string 7 - ; url : string 8 - ; talk : bool 9 - ; paper : string option 10 - ; project : string option 11 - ; tags : string list 12 - } 13 - 14 - type ts = t list 15 - 16 - val compare : t -> t -> int 17 - val url : t -> string 18 - val body : t -> string 19 - val title : t -> string 20 - val uuid : t -> string 21 - val paper : t -> string option 22 - val project : t -> string option 23 - val slug : t -> string 24 - val date : t -> Ptime.date 25 - val datetime : t -> Ptime.t 26 - val talk : t -> bool 27 - val of_md : string -> t 28 - val t_of_yaml : description:string -> Yaml.value -> t 29 - val to_yaml : t -> Yaml.value 30 - val to_file : string -> t -> (unit, [> `Msg of string]) result 31 - val typesense_schema : Ezjsonm.value 32 - val pp : Format.formatter -> t -> unit