···11-open Cmdliner
22-open Printf
33-44-(** TODO:claude Generate bibtex entry from paper data *)
55-let generate_bibtex_entry paper =
66- let open Bushel.Paper in
77- (* Use slug as the bibtex key/label *)
88- let bibkey = slug paper in
99- let bibtype = try bibtype paper with _ -> "misc" in
1010- let title = try title paper with _ -> "Untitled" in
1111- let authors =
1212- let auth_list = try authors paper with _ -> [] in
1313- String.concat " and " auth_list
1414- in
1515- let year = try year paper with _ -> 0 in
1616-1717- (* Build the bibtex entry *)
1818- let buf = Buffer.create 1024 in
1919- Buffer.add_string buf (sprintf "@%s{%s,\n" bibtype bibkey);
2020- Buffer.add_string buf (sprintf " title = {%s},\n" title);
2121- Buffer.add_string buf (sprintf " author = {%s},\n" authors);
2222- Buffer.add_string buf (sprintf " year = {%d}" year);
2323-2424- (* Add optional fields *)
2525- (match String.lowercase_ascii bibtype with
2626- | "article" ->
2727- (try
2828- Buffer.add_string buf (sprintf ",\n journal = {%s}" (journal paper))
2929- with _ -> ());
3030- (match volume paper with
3131- | Some v -> Buffer.add_string buf (sprintf ",\n volume = {%s}" v)
3232- | None -> ());
3333- (match issue paper with
3434- | Some i -> Buffer.add_string buf (sprintf ",\n number = {%s}" i)
3535- | None -> ());
3636- (match pages paper with
3737- | "" -> ()
3838- | p -> Buffer.add_string buf (sprintf ",\n pages = {%s}" p))
3939- | "inproceedings" ->
4040- (try
4141- Buffer.add_string buf (sprintf ",\n booktitle = {%s}" (booktitle paper))
4242- with _ -> ());
4343- (match pages paper with
4444- | "" -> ()
4545- | p -> Buffer.add_string buf (sprintf ",\n pages = {%s}" p));
4646- (match publisher paper with
4747- | "" -> ()
4848- | p -> Buffer.add_string buf (sprintf ",\n publisher = {%s}" p))
4949- | "techreport" ->
5050- (try
5151- Buffer.add_string buf (sprintf ",\n institution = {%s}" (institution paper))
5252- with _ -> ());
5353- (match number paper with
5454- | Some n -> Buffer.add_string buf (sprintf ",\n number = {%s}" n)
5555- | None -> ())
5656- | "book" ->
5757- (match publisher paper with
5858- | "" -> ()
5959- | p -> Buffer.add_string buf (sprintf ",\n publisher = {%s}" p));
6060- (try
6161- Buffer.add_string buf (sprintf ",\n isbn = {%s}" (isbn paper))
6262- with _ -> ())
6363- | _ -> ());
6464-6565- (* Add DOI if available *)
6666- (match doi paper with
6767- | Some d -> Buffer.add_string buf (sprintf ",\n doi = {%s}" d)
6868- | None -> ());
6969-7070- (* Add URL if available *)
7171- (match url paper with
7272- | Some u -> Buffer.add_string buf (sprintf ",\n url = {%s}" u)
7373- | None -> ());
7474-7575- Buffer.add_string buf "\n}\n";
7676- Buffer.contents buf
7777-7878-(** TODO:claude Main function to export bibtex for all papers *)
7979-let export_bibtex base_dir output_file latest_only _env _xdg _profile =
8080- (* Load all papers *)
8181- let bushel = Bushel.load base_dir in
8282- let papers = Bushel.Entry.papers bushel in
8383-8484- (* Filter to only latest versions if requested *)
8585- let papers =
8686- if latest_only then
8787- List.filter (fun p -> p.Bushel.Paper.latest) papers
8888- else
8989- papers
9090- in
9191-9292- (* Sort papers by year (most recent first) *)
9393- let papers = List.sort Bushel.Paper.compare papers in
9494-9595- (* Generate bibtex for each paper *)
9696- let bibtex_entries = List.map generate_bibtex_entry papers in
9797- let bibtex_content = String.concat "\n" bibtex_entries in
9898-9999- (* Output to file or stdout *)
100100- match output_file with
101101- | None ->
102102- print_string bibtex_content;
103103- 0
104104- | Some file ->
105105- let oc = open_out file in
106106- output_string oc bibtex_content;
107107- close_out oc;
108108- printf "Bibtex exported to %s (%d entries)\n" file (List.length papers);
109109- 0
110110-111111-(** TODO:claude Command line arguments *)
112112-let output_file_arg =
113113- let doc = "Output file for bibtex (defaults to stdout)" in
114114- Arg.(value & opt (some string) None & info ["o"; "output"] ~docv:"FILE" ~doc)
115115-116116-let latest_only_arg =
117117- let doc = "Export only the latest version of each paper" in
118118- Arg.(value & flag & info ["latest"] ~doc)
119119-120120-(** TODO:claude Command term *)
121121-let term : (Eio_unix.Stdenv.base -> Xdge.t -> Keyeio.Profile.t -> int) Cmdliner.Term.t =
122122- Term.(const export_bibtex $ Bushel_common.base_dir $ output_file_arg $ latest_only_arg)
123123-124124-let cmd =
125125- let doc = "Export bibtex for all papers" in
126126- let info = Cmd.info "bibtex" ~doc in
127127- Cmd.v info term
-67
stack/bushel/bin/bushel_common.ml
···11-open Cmdliner
22-33-(** TODO:claude Get default base directory from BUSHEL_DATA env variable or current directory *)
44-let get_default_base_dir () =
55- match Sys.getenv_opt "BUSHEL_DATA" with
66- | Some dir -> dir
77- | None -> "."
88-99-(** TODO:claude Optional base directory term with BUSHEL_DATA env variable support *)
1010-let base_dir =
1111- let doc = "Base directory containing Bushel data (defaults to BUSHEL_DATA env var or current directory)" in
1212- Arg.(value & opt dir (get_default_base_dir ()) & info ["d"; "dir"] ~docv:"DIR" ~doc)
1313-1414-(** TODO:claude Output directory as option *)
1515-let output_dir ~default =
1616- let doc = "Output directory for generated files" in
1717- Arg.(value & opt string default & info ["o"; "output"] ~docv:"DIR" ~doc)
1818-1919-(** TODO:claude URL term with custom default *)
2020-let url_term ~default ~doc =
2121- Arg.(value & opt string default & info ["u"; "url"] ~docv:"URL" ~doc)
2222-2323-(** TODO:claude API key file term *)
2424-let api_key_file ~default =
2525- let doc = "File containing API key" in
2626- Arg.(value & opt string default & info ["k"; "key-file"] ~docv:"FILE" ~doc)
2727-2828-(** TODO:claude API key term *)
2929-let api_key =
3030- let doc = "API key for authentication" in
3131- Arg.(value & opt (some string) None & info ["api-key"] ~docv:"KEY" ~doc)
3232-3333-(** TODO:claude Overwrite flag *)
3434-let overwrite =
3535- let doc = "Overwrite existing files" in
3636- Arg.(value & flag & info ["overwrite"] ~doc)
3737-3838-(** TODO:claude Verbose flag *)
3939-let verbose =
4040- let doc = "Enable verbose output" in
4141- Arg.(value & flag & info ["v"; "verbose"] ~doc)
4242-4343-(** TODO:claude File path term *)
4444-let file_term ~default ~doc =
4545- Arg.(value & opt string default & info ["f"; "file"] ~docv:"FILE" ~doc)
4646-4747-(** TODO:claude Channel/handle term *)
4848-let channel ~default =
4949- let doc = "Channel or handle name" in
5050- Arg.(value & opt string default & info ["c"; "channel"] ~docv:"CHANNEL" ~doc)
5151-5252-(** TODO:claude Optional handle term *)
5353-let handle_opt =
5454- let doc = "Process specific handle" in
5555- Arg.(value & opt (some string) None & info ["h"; "handle"] ~docv:"HANDLE" ~doc)
5656-5757-(** TODO:claude Tag term for filtering *)
5858-let tag =
5959- let doc = "Tag to filter or apply" in
6060- Arg.(value & opt (some string) None & info ["t"; "tag"] ~docv:"TAG" ~doc)
6161-6262-(** TODO:claude Limit term *)
6363-let limit =
6464- let doc = "Limit number of items to process" in
6565- Arg.(value & opt (some int) None & info ["l"; "limit"] ~docv:"N" ~doc)
6666-6767-(* Note: Logging setup is now handled by eiocmd for all bushel binaries *)
-295
stack/bushel/bin/bushel_doi.ml
···11-module ZT = Zotero_translation
22-open Lwt.Infix
33-module J = Ezjsonm
44-open Cmdliner
55-66-(* Extract all DOIs from notes by scanning for doi.org URLs *)
77-let extract_dois_from_notes notes =
88- let doi_url_pattern = Re.Perl.compile_pat "https?://(?:dx\\.)?doi\\.org/([^)\\s\"'>]+)" in
99- let dois = ref [] in
1010- List.iter (fun note ->
1111- let body = Bushel.Note.body note in
1212- let matches = Re.all doi_url_pattern body in
1313- List.iter (fun group ->
1414- try
1515- let encoded_doi = Re.Group.get group 1 in
1616- let doi = Uri.pct_decode encoded_doi in
1717- if not (List.mem doi !dois) then
1818- dois := doi :: !dois
1919- with _ -> ()
2020- ) matches
2121- ) notes;
2222- !dois
2323-2424-(* Extract publisher URLs from notes (Elsevier, ScienceDirect, IEEE, Nature, ACM, Sage, UPenn, Springer, Taylor & Francis, OUP) *)
2525-let extract_publisher_urls_from_notes notes =
2626- (* Matches publisher URLs: linkinghub.elsevier.com, sciencedirect.com/science/article, ieeexplore.ieee.org, academic.oup.com, nature.com, journals.sagepub.com, garfield.library.upenn.edu, link.springer.com, tandfonline.com/doi, and dl.acm.org/doi/10.* URLs *)
2727- let publisher_pattern = Re.Perl.compile_pat "https?://(?:(?:www\\.)?(?:linkinghub\\.elsevier\\.com|(?:www\\.)?sciencedirect\\.com/science/article|ieeexplore\\.ieee\\.org|academic\\.oup\\.com|nature\\.com|journals\\.sagepub\\.com|garfield\\.library\\.upenn\\.edu|link\\.springer\\.com)/[^)\\s\"'>]+|(?:dl\\.acm\\.org|(?:www\\.)?tandfonline\\.com)/doi(?:/pdf)?/10\\.[^)\\s\"'>]+)" in
2828- let urls = ref [] in
2929- List.iter (fun note ->
3030- let body = Bushel.Note.body note in
3131- let matches = Re.all publisher_pattern body in
3232- List.iter (fun group ->
3333- try
3434- let url = Re.Group.get group 0 in
3535- if not (List.mem url !urls) then
3636- urls := url :: !urls
3737- with _ -> ()
3838- ) matches
3939- ) notes;
4040- !urls
4141-4242-(* Resolve a single DOI via Zotero and convert to doi_entry *)
4343-let resolve_doi zt ~verbose doi =
4444- Printf.printf "Resolving DOI: %s\n%!" doi;
4545- let doi_url = Printf.sprintf "https://doi.org/%s" doi in
4646- Lwt.catch
4747- (fun () ->
4848- ZT.json_of_doi zt ~slug:"temp" doi >>= fun json ->
4949- if verbose then begin
5050- Printf.printf " Raw Zotero response:\n%s\n%!" (J.value_to_string json)
5151- end;
5252- try
5353- let keys = J.get_dict (json :> J.value) in
5454- let title = J.find json ["title"] |> J.get_string in
5555- let authors = J.find json ["author"] |> J.get_list J.get_string in
5656- let year = J.find json ["year"] |> J.get_string |> int_of_string in
5757- let bibtype = J.find json ["bibtype"] |> J.get_string in
5858- let publisher =
5959- try
6060- (* Try journal first, then booktitle, then proceedingsTitle, then publisher *)
6161- match List.assoc_opt "journal" keys with
6262- | Some j -> J.get_string j
6363- | None ->
6464- match List.assoc_opt "booktitle" keys with
6565- | Some b -> J.get_string b
6666- | None ->
6767- match List.assoc_opt "proceedingsTitle" keys with
6868- | Some pt -> J.get_string pt
6969- | None ->
7070- match List.assoc_opt "publisher" keys with
7171- | Some p -> J.get_string p
7272- | None -> ""
7373- with _ -> ""
7474- in
7575- let entry = Bushel.Doi_entry.create_resolved ~doi ~title ~authors ~year ~bibtype ~publisher ~source_urls:[doi_url] () in
7676- Printf.printf " ✓ Resolved: %s (%d)\n%!" title year;
7777- Lwt.return entry
7878- with e ->
7979- Printf.eprintf " ✗ Failed to parse response for %s: %s\n%!" doi (Printexc.to_string e);
8080- Lwt.return (Bushel.Doi_entry.create_failed ~doi ~error:(Printexc.to_string e) ~source_urls:[doi_url] ())
8181- )
8282- (fun exn ->
8383- Printf.eprintf " ✗ Failed to resolve %s: %s\n%!" doi (Printexc.to_string exn);
8484- Lwt.return (Bushel.Doi_entry.create_failed ~doi ~error:(Printexc.to_string exn) ~source_urls:[doi_url] ())
8585- )
8686-8787-(* Resolve a publisher URL via Zotero /web endpoint *)
8888-let resolve_url zt ~verbose url =
8989- Printf.printf "Resolving URL: %s\n%!" url;
9090- Lwt.catch
9191- (fun () ->
9292- (* Use Zotero's resolve_url which calls /web endpoint with the URL directly *)
9393- ZT.resolve_url zt url >>= function
9494- | Error (`Msg err) ->
9595- Printf.eprintf " ✗ Failed to resolve URL: %s\n%!" err;
9696- Lwt.return (Bushel.Doi_entry.create_failed ~doi:url ~error:err ~source_urls:[url] ())
9797- | Ok json ->
9898- if verbose then begin
9999- Printf.printf " Raw Zotero response:\n%s\n%!" (J.value_to_string json)
100100- end;
101101- try
102102- (* Extract metadata from the JSON response *)
103103- let json_list = match json with
104104- | `A lst -> lst
105105- | single -> [single]
106106- in
107107- match json_list with
108108- | [] ->
109109- Printf.eprintf " ✗ Empty response\n%!";
110110- Lwt.return (Bushel.Doi_entry.create_failed ~doi:url ~error:"Empty response" ~source_urls:[url] ())
111111- | item :: _ ->
112112- (* Extract DOI if present, otherwise use URL *)
113113- let doi = try J.find item ["DOI"] |> J.get_string with _ ->
114114- try J.find item ["doi"] |> J.get_string with _ -> url
115115- in
116116- let title = try J.find item ["title"] |> J.get_string with _ ->
117117- "Unknown Title"
118118- in
119119- (* Extract authors from Zotero's "creators" field *)
120120- let authors = try
121121- J.find item ["creators"] |> J.get_list (fun creator_obj ->
122122- try
123123- let last_name = J.find creator_obj ["lastName"] |> J.get_string in
124124- let first_name = try J.find creator_obj ["firstName"] |> J.get_string with _ -> "" in
125125- if first_name = "" then last_name else first_name ^ " " ^ last_name
126126- with _ -> "Unknown Author"
127127- )
128128- with _ -> []
129129- in
130130- (* Extract year from Zotero's "date" field *)
131131- (* Handles both ISO format "2025-07" and text format "November 28, 2023" *)
132132- let year = try
133133- let date_str = J.find item ["date"] |> J.get_string in
134134- (* First try splitting on '-' for ISO dates like "2025-07" or "2024-11-04" *)
135135- let parts = String.split_on_char '-' date_str in
136136- match parts with
137137- | year_str :: _ when String.length year_str = 4 ->
138138- (try int_of_string year_str with _ -> 0)
139139- | _ ->
140140- (* Try splitting on space and comma for dates like "November 28, 2023" *)
141141- let space_parts = String.split_on_char ' ' date_str in
142142- let year_candidate = List.find_opt (fun s ->
143143- let s = String.trim (String.map (fun c -> if c = ',' then ' ' else c) s) in
144144- String.length s = 4 && String.for_all (function '0'..'9' -> true | _ -> false) s
145145- ) space_parts in
146146- (match year_candidate with
147147- | Some year_str -> int_of_string (String.trim year_str)
148148- | None -> 0)
149149- with _ -> 0
150150- in
151151- (* Extract type/bibtype from Zotero's "itemType" field *)
152152- let bibtype = try J.find item ["itemType"] |> J.get_string with _ -> "article" in
153153- (* Extract publisher/journal from Zotero's "publicationTitle" or "proceedingsTitle" field *)
154154- let publisher = try
155155- J.find item ["publicationTitle"] |> J.get_string
156156- with _ ->
157157- try J.find item ["proceedingsTitle"] |> J.get_string
158158- with _ -> ""
159159- in
160160- (* Include both the original URL and the DOI URL in source_urls *)
161161- let doi_url = if doi = url then [] else [Printf.sprintf "https://doi.org/%s" doi] in
162162- let source_urls = url :: doi_url in
163163- let entry = Bushel.Doi_entry.create_resolved ~doi ~title ~authors ~year ~bibtype ~publisher ~source_urls () in
164164- Printf.printf " ✓ Resolved: %s (%d) [DOI: %s]\n%!" title year doi;
165165- Lwt.return entry
166166- with e ->
167167- Printf.eprintf " ✗ Failed to parse response: %s\n%!" (Printexc.to_string e);
168168- Lwt.return (Bushel.Doi_entry.create_failed ~doi:url ~error:(Printexc.to_string e) ~source_urls:[url] ())
169169- )
170170- (fun exn ->
171171- Printf.eprintf " ✗ Failed to resolve %s: %s\n%!" url (Printexc.to_string exn);
172172- Lwt.return (Bushel.Doi_entry.create_failed ~doi:url ~error:(Printexc.to_string exn) ~source_urls:[url] ())
173173- )
174174-175175-let run base_dir force verbose =
176176- Printf.printf "Loading bushel database...\n%!";
177177- let entries = Bushel.load base_dir in
178178- let notes = Bushel.Entry.notes entries in
179179-180180- Printf.printf "Scanning %d notes for DOI URLs...\n%!" (List.length notes);
181181- let found_dois = extract_dois_from_notes notes in
182182- Printf.printf "Found %d unique DOIs\n%!" (List.length found_dois);
183183-184184- Printf.printf "Scanning %d notes for publisher URLs...\n%!" (List.length notes);
185185- let found_urls = extract_publisher_urls_from_notes notes in
186186- Printf.printf "Found %d unique publisher URLs\n%!" (List.length found_urls);
187187-188188- let data_dir = Bushel.Entry.data_dir entries in
189189- let doi_yml_path = Filename.concat data_dir "doi.yml" in
190190- Printf.printf "Loading existing DOI cache from %s...\n%!" doi_yml_path;
191191- let existing_entries = Bushel.Doi_entry.load doi_yml_path in
192192- Printf.printf "Loaded %d cached DOI entries\n%!" (List.length existing_entries);
193193-194194- (* Filter DOIs that need resolution *)
195195- let dois_to_resolve =
196196- List.filter (fun doi ->
197197- match Bushel.Doi_entry.find_by_doi_including_ignored existing_entries doi with
198198- | Some _ when not force ->
199199- Printf.printf "Skipping DOI %s (already cached)\n%!" doi;
200200- false
201201- | Some _ when force ->
202202- Printf.printf "Re-resolving DOI %s (--force)\n%!" doi;
203203- true
204204- | Some _ -> false (* Catch-all for Some case *)
205205- | None -> true
206206- ) found_dois
207207- in
208208-209209- (* Filter URLs that need resolution *)
210210- let urls_to_resolve =
211211- List.filter (fun url ->
212212- match Bushel.Doi_entry.find_by_url_including_ignored existing_entries url with
213213- | Some _ when not force ->
214214- Printf.printf "Skipping URL %s (already cached)\n%!" url;
215215- false
216216- | Some _ when force ->
217217- Printf.printf "Re-resolving URL %s (--force)\n%!" url;
218218- true
219219- | Some _ -> false (* Catch-all for Some case *)
220220- | None -> true
221221- ) found_urls
222222- in
223223-224224- if List.length dois_to_resolve = 0 && List.length urls_to_resolve = 0 then begin
225225- Printf.printf "No DOIs or URLs to resolve!\n%!";
226226- 0
227227- end else begin
228228- Printf.printf "Resolving %d DOI(s) and %d URL(s)...\n%!" (List.length dois_to_resolve) (List.length urls_to_resolve);
229229-230230- let zt = ZT.v "http://svr-avsm2-eeg-ce:1969" in
231231-232232- (* Resolve all DOIs *)
233233- let resolved_doi_entries_lwt =
234234- Lwt_list.map_s (resolve_doi zt ~verbose) dois_to_resolve
235235- in
236236-237237- (* Resolve all publisher URLs *)
238238- let resolved_url_entries_lwt =
239239- Lwt_list.map_s (resolve_url zt ~verbose) urls_to_resolve
240240- in
241241-242242- let new_doi_entries = Lwt_main.run resolved_doi_entries_lwt in
243243- let new_url_entries = Lwt_main.run resolved_url_entries_lwt in
244244- let new_entries = new_doi_entries @ new_url_entries in
245245-246246- (* Merge with existing entries, combining source_urls for entries with the same DOI *)
247247- let all_entries =
248248- if force then
249249- (* Replace existing entries with new ones - match by DOI *)
250250- let is_updated entry =
251251- List.exists (fun new_e ->
252252- new_e.Bushel.Doi_entry.doi = entry.Bushel.Doi_entry.doi
253253- ) new_entries
254254- in
255255- let kept_existing = List.filter (fun e -> not (is_updated e)) existing_entries in
256256- kept_existing @ new_entries
257257- else
258258- (* Merge new entries with existing ones, combining source_urls *)
259259- let merged = ref existing_entries in
260260- List.iter (fun new_entry ->
261261- match Bushel.Doi_entry.find_by_doi_including_ignored !merged new_entry.Bushel.Doi_entry.doi with
262262- | Some existing_entry ->
263263- (* DOI already exists - merge the entries by combining source_urls and preserving ignore flag *)
264264- let combined = Bushel.Doi_entry.merge_entries existing_entry new_entry in
265265- merged := combined :: (List.filter (fun e -> e.Bushel.Doi_entry.doi <> new_entry.Bushel.Doi_entry.doi) !merged)
266266- | None ->
267267- (* New DOI - add it *)
268268- merged := new_entry :: !merged
269269- ) new_entries;
270270- !merged
271271- in
272272-273273- (* Save updated cache *)
274274- Printf.printf "Saving %d total entries to %s...\n%!" (List.length all_entries) doi_yml_path;
275275- Bushel.Doi_entry.save doi_yml_path all_entries;
276276-277277- Printf.printf "Done!\n%!";
278278- 0
279279- end
280280-281281-let force_flag =
282282- let doc = "Force re-resolution of already cached DOIs" in
283283- Arg.(value & flag & info ["force"; "f"] ~doc)
284284-285285-let verbose_flag =
286286- let doc = "Show raw Zotero API responses for debugging" in
287287- Arg.(value & flag & info ["verbose"; "v"] ~doc)
288288-289289-let term =
290290- Term.(const run $ Bushel_common.base_dir $ force_flag $ verbose_flag)
291291-292292-let cmd =
293293- let doc = "Resolve DOIs found in notes via Zotero Translation Server" in
294294- let info = Cmd.info "doi-resolve" ~doc in
295295- Cmd.v info term
-182
stack/bushel/bin/bushel_faces.ml
···11-open Cmdliner
22-open Lwt.Infix
33-open Printf
44-55-(* Type for person response *)
66-type person = {
77- id: string;
88- name: string;
99- thumbnailPath: string option;
1010-}
1111-1212-(* Parse a person from JSON *)
1313-let parse_person json =
1414- let open Ezjsonm in
1515- let id = find json ["id"] |> get_string in
1616- let name = find json ["name"] |> get_string in
1717- let thumbnailPath =
1818- try Some (find json ["thumbnailPath"] |> get_string)
1919- with _ -> None
2020- in
2121- { id; name; thumbnailPath }
2222-2323-(* Parse a list of people from JSON response *)
2424-let parse_people_response json =
2525- let open Ezjsonm in
2626- get_list parse_person json
2727-2828-(* Read API key from file *)
2929-let read_api_key file =
3030- let ic = open_in file in
3131- let key = input_line ic in
3232- close_in ic;
3333- key
3434-3535-(* Search for a person by name *)
3636-let search_person base_url api_key name =
3737- let open Cohttp_lwt_unix in
3838- let headers = Cohttp.Header.init_with "X-Api-Key" api_key in
3939- let encoded_name = Uri.pct_encode name in
4040- let url = Printf.sprintf "%s/api/search/person?name=%s" base_url encoded_name in
4141-4242- Client.get ~headers (Uri.of_string url) >>= fun (resp, body) ->
4343- if resp.status = `OK then
4444- Cohttp_lwt.Body.to_string body >>= fun body_str ->
4545- let json = Ezjsonm.from_string body_str in
4646- Lwt.return (parse_people_response json)
4747- else
4848- let status_code = Cohttp.Code.code_of_status resp.status in
4949- Lwt.fail_with (Printf.sprintf "HTTP error: %d" status_code)
5050-5151-(* Download thumbnail for a person *)
5252-let download_thumbnail base_url api_key person_id output_path =
5353- let open Cohttp_lwt_unix in
5454- let headers = Cohttp.Header.init_with "X-Api-Key" api_key in
5555- let url = Printf.sprintf "%s/api/people/%s/thumbnail" base_url person_id in
5656-5757- Client.get ~headers (Uri.of_string url) >>= fun (resp, body) ->
5858- match resp.status with
5959- | `OK ->
6060- Cohttp_lwt.Body.to_string body >>= fun img_data ->
6161- (* Ensure output directory exists *)
6262- (try
6363- let dir = Filename.dirname output_path in
6464- if not (Sys.file_exists dir) then Unix.mkdir dir 0o755;
6565- Lwt.return_unit
6666- with _ -> Lwt.return_unit) >>= fun () ->
6767- Lwt_io.with_file ~mode:Lwt_io.output output_path
6868- (fun oc -> Lwt_io.write oc img_data) >>= fun () ->
6969- Lwt.return_ok output_path
7070- | _ ->
7171- let status_code = Cohttp.Code.code_of_status resp.status in
7272- Lwt.return_error (Printf.sprintf "HTTP error: %d" status_code)
7373-7474-(* Get face for a single contact *)
7575-(* TODO:claude *)
7676-let get_face_for_contact base_url api_key output_dir contact =
7777- let names = Bushel.Contact.names contact in
7878- let handle = Bushel.Contact.handle contact in
7979- let output_path = Filename.concat output_dir (handle ^ ".jpg") in
8080-8181- (* Skip if file already exists *)
8282- if Sys.file_exists output_path then
8383- Lwt.return (`Skipped (sprintf "Thumbnail for '%s' already exists at %s" (List.hd names) output_path))
8484- else begin
8585- printf "Processing contact: %s (handle: %s)\n%!" (List.hd names) handle;
8686-8787- (* Try each name in the list until we find a match *)
8888- let rec try_names = function
8989- | [] ->
9090- Lwt.return (`Error (sprintf "No person found with any name for contact '%s'" handle))
9191- | name :: rest_names ->
9292- printf " Trying name: %s\n%!" name;
9393- search_person base_url api_key name >>= function
9494- | [] ->
9595- printf " No results for '%s', trying next name...\n%!" name;
9696- try_names rest_names
9797- | person :: _ ->
9898- printf " Found match for '%s'\n%!" name;
9999- download_thumbnail base_url api_key person.id output_path >>= function
100100- | Ok path ->
101101- Lwt.return (`Ok (sprintf "Saved thumbnail for '%s' to %s" name path))
102102- | Error err ->
103103- Lwt.return (`Error (sprintf "Error for '%s': %s" name err))
104104- in
105105- try_names names
106106- end
107107-108108-(* Process all contacts or a specific one *)
109109-let process_contacts base_dir output_dir specific_handle api_key base_url =
110110- printf "Loading Bushel database from %s\n%!" base_dir;
111111- let db = Bushel.load base_dir in
112112- let contacts = Bushel.Entry.contacts db in
113113- printf "Found %d contacts\n%!" (List.length contacts);
114114-115115- (* Ensure output directory exists *)
116116- if not (Sys.file_exists output_dir) then Unix.mkdir output_dir 0o755;
117117-118118- (* Filter contacts based on specific_handle if provided *)
119119- let contacts_to_process =
120120- match specific_handle with
121121- | Some handle ->
122122- begin match Bushel.Contact.find_by_handle contacts handle with
123123- | Some contact -> [contact]
124124- | None ->
125125- eprintf "No contact found with handle '%s'\n%!" handle;
126126- []
127127- end
128128- | None -> contacts
129129- in
130130-131131- (* Process each contact *)
132132- let results = Lwt_main.run begin
133133- Lwt_list.map_s
134134- (fun contact ->
135135- get_face_for_contact base_url api_key output_dir contact >>= fun result ->
136136- Lwt.return (Bushel.Contact.handle contact, result))
137137- contacts_to_process
138138- end in
139139-140140- (* Print summary *)
141141- let ok_count = List.length (List.filter (fun (_, r) -> match r with `Ok _ -> true | _ -> false) results) in
142142- let error_count = List.length (List.filter (fun (_, r) -> match r with `Error _ -> true | _ -> false) results) in
143143- let skipped_count = List.length (List.filter (fun (_, r) -> match r with `Skipped _ -> true | _ -> false) results) in
144144-145145- printf "\nSummary:\n";
146146- printf " Successfully processed: %d\n" ok_count;
147147- printf " Errors: %d\n" error_count;
148148- printf " Skipped (already exist): %d\n" skipped_count;
149149-150150- (* Print detailed results *)
151151- if error_count > 0 then begin
152152- printf "\nError details:\n";
153153- List.iter (fun (handle, result) ->
154154- match result with
155155- | `Error msg -> printf " %s: %s\n" handle msg
156156- | _ -> ())
157157- results;
158158- end;
159159-160160- if ok_count > 0 || skipped_count > 0 then 0 else 1
161161-162162-(* Command line interface *)
163163-164164-(* Export the term for use in main bushel.ml *)
165165-let term =
166166- Term.(
167167- const (fun base_dir output_dir handle api_key_file base_url ->
168168- try
169169- let api_key = read_api_key api_key_file in
170170- process_contacts base_dir output_dir handle api_key base_url
171171- with e ->
172172- eprintf "Error: %s\n%!" (Printexc.to_string e);
173173- 1
174174- ) $ Bushel_common.base_dir $ Bushel_common.output_dir ~default:"." $ Bushel_common.handle_opt $
175175- Bushel_common.api_key_file ~default:".photos-api" $
176176- Bushel_common.url_term ~default:"https://photos.recoil.org" ~doc:"Base URL of the Immich instance")
177177-178178-let cmd =
179179- let info = Cmd.info "faces" ~doc:"Retrieve face thumbnails for Bushel contacts from Immich" in
180180- Cmd.v info term
181181-182182-(* Main entry point removed - accessed through bushel_main.ml *)
-77
stack/bushel/bin/bushel_ideas.ml
···11-open Cmdliner
22-33-(** TODO:claude List completed ideas as markdown bullet list *)
44-let list_ideas_md base_dir =
55- let ideas_dir = Printf.sprintf "%s/ideas" base_dir in
66- let contacts_dir = Printf.sprintf "%s/contacts" base_dir in
77-88- if not (Sys.file_exists ideas_dir) then (
99- Printf.eprintf "Ideas directory not found: %s\n" ideas_dir;
1010- 1
1111- ) else (
1212- (* Load all contacts *)
1313- let contacts =
1414- if Sys.file_exists contacts_dir then
1515- Sys.readdir contacts_dir
1616- |> Array.to_list
1717- |> List.filter (String.ends_with ~suffix:".md")
1818- |> List.filter_map (fun contact_file ->
1919- let filepath = Filename.concat contacts_dir contact_file in
2020- try Some (Bushel.Contact.of_md filepath)
2121- with e ->
2222- Printf.eprintf "Error loading contact %s: %s\n" filepath (Printexc.to_string e);
2323- None
2424- )
2525- else []
2626- in
2727-2828- let idea_files = Sys.readdir ideas_dir
2929- |> Array.to_list
3030- |> List.filter (String.ends_with ~suffix:".md") in
3131- let ideas = List.filter_map (fun idea_file ->
3232- let filepath = Filename.concat ideas_dir idea_file in
3333- try
3434- let idea = Bushel.Idea.of_md filepath in
3535- match Bushel.Idea.status idea with
3636- | Bushel.Idea.Completed -> Some idea
3737- | _ -> None
3838- with e ->
3939- Printf.eprintf "Error processing %s: %s\n" filepath (Printexc.to_string e);
4040- None
4141- ) idea_files in
4242-4343- (* Sort by year descending *)
4444- let sorted_ideas = List.sort (fun a b ->
4545- compare (Bushel.Idea.year b) (Bushel.Idea.year a)
4646- ) ideas in
4747-4848- (* Output as markdown bullet list *)
4949- List.iter (fun idea ->
5050- let student_names =
5151- Bushel.Idea.students idea
5252- |> List.filter_map (fun handle ->
5353- match Bushel.Contact.find_by_handle contacts handle with
5454- | Some contact -> Some (Bushel.Contact.name contact)
5555- | None ->
5656- Printf.eprintf "Warning: contact not found for handle %s\n" handle;
5757- Some handle
5858- )
5959- |> String.concat ", "
6060- in
6161- let level_str = Bushel.Idea.level_to_string (Bushel.Idea.level idea) in
6262- Printf.printf "- %d: \"%s\", %s (%s)\n"
6363- (Bushel.Idea.year idea)
6464- (Bushel.Idea.title idea)
6565- student_names
6666- level_str
6767- ) sorted_ideas;
6868- 0
6969- )
7070-7171-let term =
7272- Term.(const list_ideas_md $ Bushel_common.base_dir)
7373-7474-let cmd =
7575- let doc = "List completed ideas as markdown bullet list" in
7676- let info = Cmd.info "ideas-md" ~doc in
7777- Cmd.v info term
-227
stack/bushel/bin/bushel_info.ml
···11-open Cmdliner
22-open Bushel
33-44-(** Determine the color for a note based on DOI and perma status *)
55-let note_color n =
66- match Note.doi n, Note.perma n with
77- | None, false -> `Red (* No DOI, no perma - red (normal note) *)
88- | None, true -> `Magenta (* Has perma but no DOI - magenta (needs DOI assignment) *)
99- | Some _, true -> `Green (* Has DOI with perma:true - green (correct state) *)
1010- | Some _, false -> `Yellow (* Has DOI without perma:true - yellow (bug in metadata) *)
1111-1212-(** TODO:claude List all slugs with their types *)
1313-let list_all_slugs entries ~notes_only =
1414- let all = Entry.all_entries entries in
1515- (* Filter for notes only if requested *)
1616- let filtered = if notes_only then
1717- List.filter (fun entry -> match entry with `Note _ -> true | _ -> false) all
1818- else all in
1919- (* Sort by slug for consistent output *)
2020- let sorted = List.sort (fun a b ->
2121- String.compare (Entry.slug a) (Entry.slug b)
2222- ) filtered in
2323- Fmt.pr "@[<v>";
2424- Fmt.pr "%a@," (Fmt.styled `Bold Fmt.string) (if notes_only then "Available notes:" else "Available entries:");
2525- Fmt.pr "@,";
2626- List.iter (fun entry ->
2727- let slug = Entry.slug entry in
2828- let type_str = Entry.to_type_string entry in
2929- let title = Entry.title entry in
3030- (* Color code notes based on DOI/perma status *)
3131- match entry with
3232- | `Note n ->
3333- let color = note_color n in
3434- Fmt.pr " %a %a - %a@,"
3535- (Fmt.styled color Fmt.string) slug
3636- (Fmt.styled `Faint Fmt.string) (Printf.sprintf "(%s)" type_str)
3737- Fmt.string title
3838- | _ ->
3939- Fmt.pr " %a %a - %a@,"
4040- (Fmt.styled `Cyan Fmt.string) slug
4141- (Fmt.styled `Faint Fmt.string) (Printf.sprintf "(%s)" type_str)
4242- Fmt.string title
4343- ) sorted;
4444- Fmt.pr "@]@.";
4545- 0
4646-4747-(** TODO:claude Main info command implementation *)
4848-let info_cmd base_dir notes_only slug_opt _env _xdg _profile =
4949- let entries = load base_dir in
5050- match slug_opt with
5151- | None ->
5252- list_all_slugs entries ~notes_only
5353- | Some slug ->
5454- (* Handle contact handles starting with @ *)
5555- if String.starts_with ~prefix:"@" slug then
5656- let handle = String.sub slug 1 (String.length slug - 1) in
5757- match Contact.find_by_handle (Entry.contacts entries) handle with
5858- | None ->
5959- Fmt.epr "Error: No contact found with handle '@%s'@." handle;
6060- 1
6161- | Some contact ->
6262- Contact.pp Fmt.stdout contact;
6363- (* Add thumbnail information for contact *)
6464- (match Entry.contact_thumbnail_slug contact with
6565- | Some thumb_slug ->
6666- Fmt.pr "@.@.";
6767- Fmt.pr "@[<v>%a: %s@," (Fmt.styled `Bold Fmt.string) "Thumbnail Slug" thumb_slug;
6868- (* Look up the image in srcsetter *)
6969- (match Entry.lookup_image entries thumb_slug with
7070- | Some img ->
7171- let thumbnail_url = Entry.smallest_webp_variant img in
7272- Fmt.pr "%a: %s@," (Fmt.styled `Bold Fmt.string) "Thumbnail URL" thumbnail_url;
7373- Fmt.pr "%a: %s@," (Fmt.styled `Bold Fmt.string) "Origin" (Srcsetter.origin img);
7474- let (w, h) = Srcsetter.dims img in
7575- Fmt.pr "%a: %dx%d@," (Fmt.styled `Bold Fmt.string) "Dimensions" w h;
7676- let variants = Srcsetter.variants img in
7777- if not (Srcsetter.MS.is_empty variants) then begin
7878- Fmt.pr "%a:@," (Fmt.styled `Bold Fmt.string) "Variants";
7979- Srcsetter.MS.iter (fun name (vw, vh) ->
8080- Fmt.pr " %s: %dx%d@," name vw vh
8181- ) variants
8282- end;
8383- Fmt.pr "@]"
8484- | None ->
8585- Fmt.epr "Warning: Contact thumbnail image not in srcsetter: %s@." thumb_slug;
8686- Fmt.pr "@]";
8787- ())
8888- | None -> ());
8989- (* Add Typesense JSON *)
9090- let doc = Typesense.contact_to_document contact in
9191- Fmt.pr "@.@.";
9292- Fmt.pr "%a:@," (Fmt.styled `Bold Fmt.string) "Typesense Document";
9393- Fmt.pr "%s@," (Ezjsonm.value_to_string ~minify:false doc);
9494- (* Add backlinks information for contact *)
9595- let backlinks = Bushel.Link_graph.get_backlinks_for_slug handle in
9696- if backlinks <> [] then begin
9797- Fmt.pr "@.@.";
9898- Fmt.pr "%a (%d):@," (Fmt.styled `Bold Fmt.string) "Backlinks" (List.length backlinks);
9999- List.iter (fun source_slug ->
100100- match Entry.lookup entries source_slug with
101101- | Some source_entry ->
102102- let source_type = Entry.to_type_string source_entry in
103103- let source_title = Entry.title source_entry in
104104- Fmt.pr " %a %a - %a@,"
105105- (Fmt.styled `Cyan Fmt.string) source_slug
106106- (Fmt.styled `Faint Fmt.string) (Printf.sprintf "(%s)" source_type)
107107- Fmt.string source_title
108108- | None ->
109109- Fmt.pr " %a %a@,"
110110- (Fmt.styled `Cyan Fmt.string) source_slug
111111- (Fmt.styled `Red Fmt.string) "(not found)"
112112- ) backlinks
113113- end;
114114- Fmt.pr "@.";
115115- 0
116116- else
117117- (* Remove leading ':' if present, as slugs are stored without it *)
118118- let normalized_slug =
119119- if String.starts_with ~prefix:":" slug
120120- then String.sub slug 1 (String.length slug - 1)
121121- else slug
122122- in
123123- match Entry.lookup entries normalized_slug with
124124- | None ->
125125- Fmt.epr "Error: No entry found with slug '%s'@." slug;
126126- 1
127127- | Some entry ->
128128- (match entry with
129129- | `Paper p -> Paper.pp Fmt.stdout p
130130- | `Project p -> Project.pp Fmt.stdout p
131131- | `Idea i -> Idea.pp Fmt.stdout i
132132- | `Video v -> Video.pp Fmt.stdout v
133133- | `Note n -> Note.pp Fmt.stdout n);
134134- (* Add thumbnail information if available *)
135135- (match Entry.thumbnail_slug entries entry with
136136- | Some thumb_slug ->
137137- Fmt.pr "@.@.";
138138- Fmt.pr "@[<v>%a: %s@," (Fmt.styled `Bold Fmt.string) "Thumbnail Slug" thumb_slug;
139139- (* Look up the image in srcsetter *)
140140- (match Entry.lookup_image entries thumb_slug with
141141- | Some img ->
142142- let thumbnail_url = Entry.smallest_webp_variant img in
143143- Fmt.pr "%a: %s@," (Fmt.styled `Bold Fmt.string) "Thumbnail URL" thumbnail_url;
144144- Fmt.pr "%a: %s@," (Fmt.styled `Bold Fmt.string) "Origin" (Srcsetter.origin img);
145145- let (w, h) = Srcsetter.dims img in
146146- Fmt.pr "%a: %dx%d@," (Fmt.styled `Bold Fmt.string) "Dimensions" w h;
147147- let variants = Srcsetter.variants img in
148148- if not (Srcsetter.MS.is_empty variants) then begin
149149- Fmt.pr "%a:@," (Fmt.styled `Bold Fmt.string) "Variants";
150150- Srcsetter.MS.iter (fun name (vw, vh) ->
151151- Fmt.pr " %s: %dx%d@," name vw vh
152152- ) variants
153153- end;
154154- Fmt.pr "@]"
155155- | None ->
156156- Fmt.epr "Warning: Thumbnail image not in srcsetter: %s@." thumb_slug;
157157- Fmt.pr "@]";
158158- ())
159159- | None -> ());
160160- (* Add Typesense JSON *)
161161- let doc = match entry with
162162- | `Paper p -> Typesense.paper_to_document entries p
163163- | `Project p -> Typesense.project_to_document entries p
164164- | `Idea i -> Typesense.idea_to_document entries i
165165- | `Video v -> Typesense.video_to_document entries v
166166- | `Note n -> Typesense.note_to_document entries n
167167- in
168168- Fmt.pr "@.@.";
169169- Fmt.pr "%a:@," (Fmt.styled `Bold Fmt.string) "Typesense Document";
170170- Fmt.pr "%s@," (Ezjsonm.value_to_string ~minify:false doc);
171171- (* Add backlinks information *)
172172- let backlinks = Bushel.Link_graph.get_backlinks_for_slug normalized_slug in
173173- if backlinks <> [] then begin
174174- Fmt.pr "@.@.";
175175- Fmt.pr "%a (%d):@," (Fmt.styled `Bold Fmt.string) "Backlinks" (List.length backlinks);
176176- List.iter (fun source_slug ->
177177- match Entry.lookup entries source_slug with
178178- | Some source_entry ->
179179- let source_type = Entry.to_type_string source_entry in
180180- let source_title = Entry.title source_entry in
181181- Fmt.pr " %a %a - %a@,"
182182- (Fmt.styled `Cyan Fmt.string) source_slug
183183- (Fmt.styled `Faint Fmt.string) (Printf.sprintf "(%s)" source_type)
184184- Fmt.string source_title
185185- | None ->
186186- Fmt.pr " %a %a@,"
187187- (Fmt.styled `Cyan Fmt.string) source_slug
188188- (Fmt.styled `Red Fmt.string) "(not found)"
189189- ) backlinks
190190- end;
191191- (* Add references information for notes *)
192192- (match entry with
193193- | `Note n ->
194194- let default_author = match Contact.find_by_handle (Entry.contacts entries) "avsm" with
195195- | Some c -> c
196196- | None -> failwith "Default author 'avsm' not found"
197197- in
198198- let references = Md.note_references entries default_author n in
199199- if references <> [] then begin
200200- Fmt.pr "@.@.";
201201- Fmt.pr "%a (%d):@," (Fmt.styled `Bold Fmt.string) "References" (List.length references);
202202- List.iter (fun (doi, citation, _is_paper) ->
203203- Fmt.pr " %a: %s@,"
204204- (Fmt.styled `Cyan Fmt.string) doi
205205- citation
206206- ) references
207207- end
208208- | _ -> ());
209209- Fmt.pr "@.";
210210- 0
211211-212212-(** TODO:claude Command line interface definition *)
213213-let notes_only_flag =
214214- let doc = "Show only notes when listing entries" in
215215- Arg.(value & flag & info ["notes-only"; "n"] ~doc)
216216-217217-let slug_arg =
218218- let doc = "The slug of the entry to display (with or without leading ':'), or contact handle (with '@' prefix). If not provided, lists all available slugs." in
219219- Arg.(value & pos 0 (some string) None & info [] ~docv:"SLUG" ~doc)
220220-221221-let term : (Eio_unix.Stdenv.base -> Xdge.t -> Keyeio.Profile.t -> int) Cmdliner.Term.t =
222222- Term.(const info_cmd $ Bushel_common.base_dir $ notes_only_flag $ slug_arg)
223223-224224-let cmd =
225225- let doc = "Display all information for a given slug" in
226226- let info = Cmd.info "info" ~doc in
227227- Cmd.v info term
-549
stack/bushel/bin/bushel_links.ml
···11-open Cmdliner
22-open Lwt.Infix
33-44-(* Helper function for logging with proper flushing *)
55-let log fmt = Fmt.kstr (fun s -> prerr_string s; flush stderr) fmt
66-let log_verbose verbose fmt =
77- if verbose then Fmt.kstr (fun s -> prerr_string s; flush stderr) fmt
88- else Fmt.kstr (fun _ -> ()) fmt
99-1010-(* Initialize a new links.yml file or ensure it exists *)
1111-let init_links_file links_file =
1212- if Sys.file_exists links_file then
1313- print_endline (Fmt.str "Links file %s already exists" links_file)
1414- else begin
1515- (* Create an empty links file *)
1616- Bushel.Link.save_links_file links_file [];
1717- print_endline (Fmt.str "Created empty links file: %s" links_file)
1818- end;
1919- 0
2020-2121-(* Update links.yml from Karakeep *)
2222-let update_from_karakeep base_url api_key_opt tag links_file download_assets =
2323- match api_key_opt with
2424- | None ->
2525- prerr_endline "Error: API key is required.";
2626- prerr_endline "Please provide one with --api-key or create a ~/.karakeep-api file.";
2727- 1
2828- | Some api_key ->
2929- let assets_dir = "data/assets" in
3030-3131- (* Run the Lwt program *)
3232- Lwt_main.run (
3333- print_endline (Fmt.str "Fetching links from %s with tag '%s'..." base_url tag);
3434-3535- (* Prepare tag filter *)
3636- let filter_tags = if tag = "" then [] else [tag] in
3737-3838- (* Fetch bookmarks from Karakeep with error handling *)
3939- Lwt.catch
4040- (fun () ->
4141- Karakeep.fetch_all_bookmarks ~api_key ~filter_tags base_url >>= fun bookmarks ->
4242-4343- print_endline (Fmt.str "Retrieved %d bookmarks from Karakeep" (List.length bookmarks));
4444-4545- (* Read existing links if file exists *)
4646- let existing_links = Bushel.Link.load_links_file links_file in
4747-4848- (* Convert bookmarks to bushel links *)
4949- let new_links = List.map (fun bookmark ->
5050- Karakeep.to_bushel_link ~base_url bookmark
5151- ) bookmarks in
5252-5353- (* Merge with existing links - keep existing dates (karakeep dates may be unreliable) *)
5454- let merged_links = Bushel.Link.merge_links existing_links new_links in
5555-5656- (* Save the updated links file *)
5757- Bushel.Link.save_links_file links_file merged_links;
5858-5959- print_endline (Fmt.str "Updated %s with %d links" links_file (List.length merged_links));
6060-6161- (* Download assets if requested *)
6262- if download_assets then begin
6363- print_endline "Downloading assets for bookmarks...";
6464-6565- (* Ensure the assets directory exists *)
6666- (try Unix.mkdir assets_dir 0o755 with Unix.Unix_error (Unix.EEXIST, _, _) -> ());
6767-6868- (* Process each bookmark with assets *)
6969- Lwt_list.iter_s (fun bookmark ->
7070- (* Extract asset IDs from bookmark *)
7171- let assets = bookmark.Karakeep.assets in
7272-7373- (* Skip if no assets *)
7474- if assets = [] then
7575- Lwt.return_unit
7676- else
7777- (* Process each asset *)
7878- Lwt_list.iter_s (fun (asset_id, asset_type) ->
7979- let asset_dir = Fmt.str "%s/%s" assets_dir asset_id in
8080- let asset_file = Fmt.str "%s/asset.bin" asset_dir in
8181- let meta_file = Fmt.str "%s/metadata.json" asset_dir in
8282-8383- (* Skip if the asset already exists *)
8484- if Sys.file_exists asset_file then
8585- Lwt.return_unit
8686- else begin
8787- (* Create the asset directory *)
8888- (try Unix.mkdir asset_dir 0o755 with Unix.Unix_error (Unix.EEXIST, _, _) -> ());
8989-9090- (* Download the asset *)
9191- print_endline (Fmt.str "Downloading %s asset %s..." asset_type asset_id);
9292- Karakeep.fetch_asset ~api_key base_url asset_id >>= fun data ->
9393-9494- (* Guess content type based on first bytes *)
9595- let content_type =
9696- if String.length data >= 4 && String.sub data 0 4 = "\x89PNG" then
9797- "image/png"
9898- else if String.length data >= 3 && String.sub data 0 3 = "\xFF\xD8\xFF" then
9999- "image/jpeg"
100100- else if String.length data >= 4 && String.sub data 0 4 = "%PDF" then
101101- "application/pdf"
102102- else
103103- "application/octet-stream"
104104- in
105105-106106- (* Write the asset data *)
107107- Lwt_io.with_file ~mode:Lwt_io.Output asset_file (fun oc ->
108108- Lwt_io.write oc data
109109- ) >>= fun () ->
110110-111111- (* Write metadata file *)
112112- let metadata = Fmt.str "{\n \"contentType\": \"%s\",\n \"assetType\": \"%s\"\n}"
113113- content_type asset_type in
114114- Lwt_io.with_file ~mode:Lwt_io.Output meta_file (fun oc ->
115115- Lwt_io.write oc metadata
116116- )
117117- end
118118- ) assets
119119- ) bookmarks >>= fun () ->
120120-121121- print_endline "Asset download completed.";
122122- Lwt.return 0
123123- end else
124124- Lwt.return 0
125125- )
126126- (fun exn ->
127127- prerr_endline (Fmt.str "Error fetching bookmarks: %s" (Printexc.to_string exn));
128128- Lwt.return 1
129129- )
130130- )
131131-132132-(* Extract outgoing links from Bushel entries *)
133133-let update_from_bushel bushel_dir links_file include_domains exclude_domains =
134134- (* Parse domain filters if provided *)
135135- let include_domains_list = match include_domains with
136136- | None -> []
137137- | Some s -> String.split_on_char ',' s |> List.map String.trim
138138- in
139139-140140- let exclude_domains_list = match exclude_domains with
141141- | None -> []
142142- | Some s -> String.split_on_char ',' s |> List.map String.trim
143143- in
144144-145145- (* Show filter settings if any *)
146146- if include_domains_list <> [] then
147147- print_endline (Fmt.str "Including only domains: %s" (String.concat ", " include_domains_list));
148148-149149- if exclude_domains_list <> [] then
150150- print_endline (Fmt.str "Excluding domains: %s" (String.concat ", " exclude_domains_list));
151151-152152- (* Load all entries from the bushel directory *)
153153- let notes_dir = Filename.concat bushel_dir "data/notes" in
154154-155155- (* Make sure the notes directory exists *)
156156- if not (Sys.file_exists notes_dir) then begin
157157- prerr_endline (Fmt.str "Error: Notes directory %s does not exist" notes_dir);
158158- exit 1
159159- end;
160160-161161- (* Load all entries with fallback *)
162162- print_endline (Fmt.str "Loading entries from %s..." bushel_dir);
163163-164164- let entries_data = Bushel.load bushel_dir in
165165- let all_entries = Bushel.Entry.all_entries entries_data in
166166- print_endline (Fmt.str "Loaded %d entries" (List.length all_entries));
167167-168168- (* Extract outgoing links from all entries *)
169169- print_endline "Extracting outgoing links...";
170170- let extracted_links = ref [] in
171171-172172- (* Process each entry *)
173173- List.iter (fun entry ->
174174- let entry_body = Bushel.Entry.body entry in
175175- let entry_slug = Bushel.Entry.slug entry in
176176-177177- (* Skip empty bodies *)
178178- if entry_body <> "" then begin
179179- let links = Bushel.Entry.extract_external_links entry_body in
180180- if links <> [] then begin
181181- (* Add each link from this entry *)
182182- List.iter (fun url ->
183183- (* Try to extract domain from URL *)
184184- let domain =
185185- try
186186- let uri = Uri.of_string url in
187187- match Uri.host uri with
188188- | Some host -> host
189189- | None -> "unknown"
190190- with _ -> "unknown"
191191- in
192192-193193- (* Filter by domain if filters are specified *)
194194- let include_by_domain =
195195- if include_domains_list <> [] then
196196- List.exists (fun filter ->
197197- domain = filter || String.ends_with ~suffix:filter domain
198198- ) include_domains_list
199199- else true
200200- in
201201-202202- let exclude_by_domain =
203203- List.exists (fun filter ->
204204- domain = filter || String.ends_with ~suffix:filter domain
205205- ) exclude_domains_list
206206- in
207207-208208- if include_by_domain && not exclude_by_domain then begin
209209- let date = Bushel.Entry.date entry in
210210-211211- (* Extract tags from the entry *)
212212- let entry_tags = Bushel.Tags.tags_of_ent entries_data entry in
213213- let tag_strings = List.map Bushel.Tags.to_string entry_tags in
214214-215215- let link = {
216216- Bushel.Link.url;
217217- date;
218218- description = "";
219219- karakeep = None;
220220- bushel = Some {
221221- Bushel.Link.slugs = [entry_slug];
222222- tags = tag_strings
223223- };
224224- } in
225225- extracted_links := link :: !extracted_links
226226- end
227227- ) links
228228- end
229229- end
230230- ) all_entries;
231231-232232- (* Load existing links *)
233233- let existing_links = Bushel.Link.load_links_file links_file in
234234-235235- (* Merge with existing links - prefer bushel entry dates *)
236236- let merged_links = Bushel.Link.merge_links ~prefer_new_date:true existing_links !extracted_links in
237237-238238- (* Save the updated links file *)
239239- Bushel.Link.save_links_file links_file merged_links;
240240-241241- print_endline (Fmt.str "Added %d extracted links from Bushel to %s"
242242- (List.length !extracted_links) links_file);
243243- print_endline (Fmt.str "Total links in file: %d" (List.length merged_links));
244244- 0
245245-246246-(* Helper function to filter links that don't have karakeep data for a specific remote *)
247247-let filter_links_without_karakeep base_url links =
248248- List.filter (fun link ->
249249- match link.Bushel.Link.karakeep with
250250- | Some { remote_url; _ } when remote_url = base_url -> false
251251- | _ -> true
252252- ) links
253253-254254-(* Helper function to apply limit to links if specified *)
255255-let apply_limit_to_links limit links =
256256- match limit with
257257- | Some n when n > 0 ->
258258- let rec take_n acc count = function
259259- | [] -> List.rev acc
260260- | _ when count = 0 -> List.rev acc
261261- | x :: xs -> take_n (x :: acc) (count - 1) xs
262262- in
263263- let limited = take_n [] n links in
264264- if List.length links > n then
265265- log "Limited to first %d links (out of %d available)\n" n (List.length links);
266266- limited
267267- | _ -> links
268268-269269-(* Helper function to prepare tags for a link *)
270270-let prepare_tags_for_link tag link =
271271- let slug_tags =
272272- match link.Bushel.Link.bushel with
273273- | Some { slugs; _ } -> List.map (fun slug -> "bushel:" ^ slug) slugs
274274- | None -> []
275275- in
276276- if tag = "" then slug_tags
277277- else tag :: slug_tags
278278-279279-(* Helper function to create batches for parallel processing *)
280280-let create_batches max_concurrent links =
281281- let rec create_batches_aux links acc =
282282- match links with
283283- | [] -> List.rev acc
284284- | _ ->
285285- let batch, rest =
286286- if List.length links <= max_concurrent then
287287- links, []
288288- else
289289- let rec take n lst batch =
290290- if n = 0 || lst = [] then List.rev batch, lst
291291- else take (n-1) (List.tl lst) (List.hd lst :: batch)
292292- in
293293- take max_concurrent links []
294294- in
295295- create_batches_aux rest (batch :: acc)
296296- in
297297- create_batches_aux links []
298298-299299-(* Helper function to upload a single link to Karakeep *)
300300-let upload_single_link api_key base_url tag verbose updated_links link =
301301- let url = link.Bushel.Link.url in
302302- let title =
303303- if link.Bushel.Link.description <> "" then
304304- Some link.Bushel.Link.description
305305- else None
306306- in
307307- let tags = prepare_tags_for_link tag link in
308308-309309- if verbose then begin
310310- log " Uploading: %s\n" url;
311311- if tags <> [] then
312312- log " Tags: %s\n" (String.concat ", " tags);
313313- if title <> None then
314314- log " Title: %s\n" (Option.get title);
315315- end else begin
316316- log "Uploading: %s\n" url;
317317- end;
318318-319319- (* Create the bookmark with tags *)
320320- Lwt.catch
321321- (fun () ->
322322- Karakeep.create_bookmark
323323- ~api_key
324324- ~url
325325- ?title
326326- ~tags
327327- base_url
328328- >>= fun bookmark ->
329329-330330- (* Create updated link with karakeep data *)
331331- let updated_link = {
332332- link with
333333- Bushel.Link.karakeep =
334334- Some {
335335- Bushel.Link.remote_url = base_url;
336336- id = bookmark.id;
337337- tags = bookmark.tags;
338338- metadata = []; (* Will be populated on next sync *)
339339- }
340340- } in
341341- updated_links := updated_link :: !updated_links;
342342-343343- if verbose then
344344- log " ✓ Added to Karakeep with ID: %s\n" bookmark.id
345345- else
346346- log " - Added to Karakeep with ID: %s\n" bookmark.id;
347347- Lwt.return 1 (* Success *)
348348- )
349349- (fun exn ->
350350- if verbose then
351351- log " ✗ Error uploading %s: %s\n" url (Printexc.to_string exn)
352352- else
353353- log " - Error uploading %s: %s\n" url (Printexc.to_string exn);
354354- Lwt.return 0 (* Failure *)
355355- )
356356-357357-(* Helper function to process a batch of links *)
358358-let process_batch api_key base_url tag verbose updated_links batch_num total_batches batch =
359359- log_verbose verbose "\nProcessing batch %d/%d (%d links)...\n"
360360- (batch_num + 1) total_batches (List.length batch);
361361-362362- (* Process links in this batch concurrently *)
363363- Lwt_list.map_p (upload_single_link api_key base_url tag verbose updated_links) batch
364364-365365-(* Helper function to update links file with new karakeep data *)
366366-let update_links_file links_file original_links updated_links =
367367- if !updated_links <> [] then begin
368368- (* Replace the updated links in the original list *)
369369- let final_links =
370370- List.map (fun link ->
371371- let url = link.Bushel.Link.url in
372372- let updated = List.find_opt (fun ul -> ul.Bushel.Link.url = url) !updated_links in
373373- match updated with
374374- | Some ul -> ul
375375- | None -> link
376376- ) original_links
377377- in
378378-379379- (* Save the updated links file *)
380380- Bushel.Link.save_links_file links_file final_links;
381381-382382- log "Updated %s with %d new karakeep_ids\n"
383383- links_file (List.length !updated_links);
384384- end
385385-386386-(* Upload links to Karakeep that don't already have karakeep data *)
387387-let upload_to_karakeep base_url api_key_opt links_file tag max_concurrent delay_seconds limit verbose =
388388- match api_key_opt with
389389- | None ->
390390- log "Error: API key is required.\n";
391391- log "Please provide one with --api-key or create a ~/.karakeep-api file.\n";
392392- 1
393393- | Some api_key ->
394394- (* Load links from file *)
395395- log_verbose verbose "Loading links from %s...\n" links_file;
396396- let links = Bushel.Link.load_links_file links_file in
397397- log_verbose verbose "Loaded %d total links\n" (List.length links);
398398-399399- (* Filter links that don't have karakeep data for this remote *)
400400- log_verbose verbose "Filtering links that don't have karakeep data for %s...\n" base_url;
401401- let filtered_links = filter_links_without_karakeep base_url links in
402402- log_verbose verbose "Found %d links without karakeep data\n" (List.length filtered_links);
403403-404404- (* Apply limit if specified *)
405405- let links_to_upload = apply_limit_to_links limit filtered_links in
406406-407407- if links_to_upload = [] then begin
408408- log "No links to upload to %s (all links already have karakeep data)\n" base_url;
409409- 0
410410- end else begin
411411- log "Found %d links to upload to %s\n" (List.length links_to_upload) base_url;
412412-413413- (* Split links into batches for parallel processing *)
414414- let batches = create_batches max_concurrent links_to_upload in
415415- log_verbose verbose "Processing in %d batches of up to %d links each...\n"
416416- (List.length batches) max_concurrent;
417417- log_verbose verbose "Delay between batches: %.1f seconds\n" delay_seconds;
418418-419419- (* Process batches and accumulate updated links *)
420420- let updated_links = ref [] in
421421-422422- let result = Lwt_main.run (
423423- Lwt.catch
424424- (fun () ->
425425- Lwt_list.fold_left_s (fun (total_count, batch_num) batch ->
426426- process_batch api_key base_url tag verbose updated_links
427427- batch_num (List.length batches) batch >>= fun results ->
428428-429429- (* Count successes in this batch *)
430430- let batch_successes = List.fold_left (+) 0 results in
431431- let new_total = total_count + batch_successes in
432432-433433- log_verbose verbose " Batch %d complete: %d/%d successful (Total: %d/%d)\n"
434434- (batch_num + 1) batch_successes (List.length batch) new_total (new_total + (List.length links_to_upload - new_total));
435435-436436- (* Add a delay before processing the next batch *)
437437- if batch_num + 1 < List.length batches then begin
438438- log_verbose verbose " Waiting %.1f seconds before next batch...\n" delay_seconds;
439439- Lwt_unix.sleep delay_seconds >>= fun () ->
440440- Lwt.return (new_total, batch_num + 1)
441441- end else
442442- Lwt.return (new_total, batch_num + 1)
443443- ) (0, 0) batches >>= fun (final_count, _) ->
444444- Lwt.return final_count
445445- )
446446- (fun exn ->
447447- log "Error during upload operation: %s\n" (Printexc.to_string exn);
448448- Lwt.return 0
449449- )
450450- ) in
451451-452452- (* Update the links file with the new karakeep_ids *)
453453- update_links_file links_file links updated_links;
454454-455455- log "Upload complete. %d/%d links uploaded successfully.\n"
456456- result (List.length links_to_upload);
457457-458458- 0
459459- end
460460-461461-(* Common arguments *)
462462-let links_file_arg =
463463- let doc = "Links YAML file. Defaults to links.yml." in
464464- Arg.(value & opt string "links.yml" & info ["file"; "f"] ~doc ~docv:"FILE")
465465-466466-let base_url_arg =
467467- let doc = "Base URL of the Karakeep instance" in
468468- let default = "https://hoard.recoil.org" in
469469- Arg.(value & opt string default & info ["url"] ~doc ~docv:"URL")
470470-471471-let api_key_arg =
472472- let doc = "API key for Karakeep authentication (ak1_<key_id>_<secret>)" in
473473- let get_api_key () =
474474- let home = try Sys.getenv "HOME" with Not_found -> "." in
475475- let key_path = Filename.concat home ".karakeep-api" in
476476- try
477477- let ic = open_in key_path in
478478- let key = input_line ic in
479479- close_in ic;
480480- Some (String.trim key)
481481- with _ -> None
482482- in
483483- Arg.(value & opt (some string) (get_api_key ()) & info ["api-key"] ~doc ~docv:"API_KEY")
484484-485485-let tag_arg =
486486- let doc = "Tag to filter or apply to bookmarks" in
487487- Arg.(value & opt string "" & info ["tag"; "t"] ~doc ~docv:"TAG")
488488-489489-let download_assets_arg =
490490- let doc = "Download assets (screenshots, etc.) from Karakeep" in
491491- Arg.(value & flag & info ["download-assets"; "d"] ~doc)
492492-493493-let base_dir_arg =
494494- let doc = "Base directory of the Bushel project" in
495495- Arg.(value & opt string "." & info ["dir"; "d"] ~doc ~docv:"DIR")
496496-497497-let include_domains_arg =
498498- let doc = "Only include links to these domains (comma-separated list)" in
499499- Arg.(value & opt (some string) None & info ["include"] ~doc ~docv:"DOMAINS")
500500-501501-let exclude_domains_arg =
502502- let doc = "Exclude links to these domains (comma-separated list)" in
503503- Arg.(value & opt (some string) None & info ["exclude"] ~doc ~docv:"DOMAINS")
504504-505505-let concurrent_arg =
506506- let doc = "Maximum number of concurrent uploads (default: 5)" in
507507- Arg.(value & opt int 5 & info ["concurrent"; "c"] ~doc ~docv:"NUM")
508508-509509-let delay_arg =
510510- let doc = "Delay in seconds between batches (default: 1.0)" in
511511- Arg.(value & opt float 1.0 & info ["delay"] ~doc ~docv:"SECONDS")
512512-513513-let limit_arg =
514514- let doc = "Limit number of links to upload (for testing)" in
515515- Arg.(value & opt (some int) None & info ["limit"; "l"] ~doc ~docv:"NUM")
516516-517517-let verbose_arg =
518518- let doc = "Show detailed progress information during upload" in
519519- Arg.(value & flag & info ["verbose"; "v"] ~doc)
520520-521521-(* Command definitions *)
522522-let init_cmd =
523523- let doc = "Initialize a new links.yml file" in
524524- let info = Cmd.info "init" ~doc in
525525- Cmd.v info Term.(const init_links_file $ links_file_arg)
526526-527527-let karakeep_cmd =
528528- let doc = "Update links.yml with links from Karakeep" in
529529- let info = Cmd.info "karakeep" ~doc in
530530- Cmd.v info Term.(const update_from_karakeep $ base_url_arg $ api_key_arg $ tag_arg $ links_file_arg $ download_assets_arg)
531531-532532-let bushel_cmd =
533533- let doc = "Update links.yml with outgoing links from Bushel entries" in
534534- let info = Cmd.info "bushel" ~doc in
535535- Cmd.v info Term.(const update_from_bushel $ base_dir_arg $ links_file_arg $ include_domains_arg $ exclude_domains_arg)
536536-537537-let upload_cmd =
538538- let doc = "Upload links without karakeep data to Karakeep" in
539539- let info = Cmd.info "upload" ~doc in
540540- Cmd.v info Term.(const upload_to_karakeep $ base_url_arg $ api_key_arg $ links_file_arg $ tag_arg $ concurrent_arg $ delay_arg $ limit_arg $ verbose_arg)
541541-542542-(* Export the term and cmd for use in main bushel.ml *)
543543-let cmd =
544544- let doc = "Manage links between Bushel and Karakeep" in
545545- let info = Cmd.info "links" ~doc in
546546- Cmd.group info [init_cmd; karakeep_cmd; bushel_cmd; upload_cmd]
547547-548548-(* For standalone execution *)
549549-(* Main entry point removed - accessed through bushel_main.ml *)
-119
stack/bushel/bin/bushel_main.ml
···11-open Cmdliner
22-33-let version = "0.1.0"
44-55-(* Import actual command implementations from submodules *)
66-77-(* Obsidian command - no API keys needed *)
88-let obsidian_cmd =
99- let doc = "Convert Bushel entries to Obsidian format" in
1010- Eiocmd.run
1111- ~use_keyeio:false
1212- ~info:(Cmd.info "obsidian" ~version ~doc)
1313- ~app_name:"bushel"
1414- ~service:"bushel"
1515- Bushel_obsidian.term
1616-1717-(* Paper classify command *)
1818-let paper_classify_cmd = Bushel_paper_classify.cmd
1919-2020-(* Paper tex command *)
2121-let paper_tex_cmd = Bushel_paper_tex.cmd
2222-2323-(* Thumbs command - no API keys needed *)
2424-let thumbs_cmd =
2525- let doc = "Generate thumbnails from paper PDFs" in
2626- Eiocmd.run
2727- ~use_keyeio:false
2828- ~info:(Cmd.info "thumbs" ~version ~doc)
2929- ~app_name:"bushel"
3030- ~service:"bushel"
3131- Bushel_thumbs.term
3232-3333-(* Query command - needs Typesense API key *)
3434-let query_cmd =
3535- let doc = "Query Bushel collections using multisearch" in
3636- Eiocmd.run
3737- ~use_keyeio:true
3838- ~info:(Cmd.info "query" ~version ~doc)
3939- ~app_name:"bushel"
4040- ~service:"bushel"
4141- Bushel_search.term
4242-4343-(* Bibtex command - no API keys needed *)
4444-let bibtex_cmd =
4545- let doc = "Export bibtex for all papers" in
4646- Eiocmd.run
4747- ~use_keyeio:false
4848- ~info:(Cmd.info "bibtex" ~version ~doc)
4949- ~app_name:"bushel"
5050- ~service:"bushel"
5151- Bushel_bibtex.term
5252-5353-(* Ideas command *)
5454-let ideas_cmd = Bushel_ideas.cmd
5555-5656-(* Info command - no API keys needed *)
5757-let info_cmd =
5858- let doc = "Display all information for a given slug" in
5959- Eiocmd.run
6060- ~use_keyeio:false
6161- ~info:(Cmd.info "info" ~version ~doc)
6262- ~app_name:"bushel"
6363- ~service:"bushel"
6464- Bushel_info.term
6565-6666-(* Missing command - no API keys needed *)
6767-let missing_cmd =
6868- let doc = "Check for missing metadata in entries" in
6969- Eiocmd.run
7070- ~use_keyeio:false
7171- ~info:(Cmd.info "missing" ~version ~doc)
7272- ~app_name:"bushel"
7373- ~service:"bushel"
7474- Bushel_missing.term
7575-7676-(* Note DOI command - no API keys needed *)
7777-let note_doi_cmd =
7878- let doc = "Assign DOIs to notes with perma:true" in
7979- Eiocmd.run
8080- ~use_keyeio:false
8181- ~info:(Cmd.info "note-doi" ~version ~doc)
8282- ~app_name:"bushel"
8383- ~service:"bushel"
8484- Bushel_note_doi.term
8585-8686-(* Main command *)
8787-let bushel_cmd =
8888- let doc = "Bushel content management toolkit" in
8989- let sdocs = Manpage.s_common_options in
9090- let man = [
9191- `S Manpage.s_description;
9292- `P "$(tname) is a unified command-line tool for managing various types of \
9393- content in the Bushel system, including papers, videos, links, and more.";
9494- `P "$(tname) provides unified access to all Bushel functionality through \
9595- integrated subcommands.";
9696- `S Manpage.s_commands;
9797- `S Manpage.s_common_options;
9898- `S "ENVIRONMENT";
9999- `P "BUSHEL_CONFIG - Path to configuration file with default settings";
100100- `S Manpage.s_authors;
101101- `P "Anil Madhavapeddy";
102102- `S Manpage.s_bugs;
103103- `P "Report bugs at https://github.com/avsm/bushel/issues";
104104- ] in
105105- let info = Cmd.info "bushel" ~version ~doc ~sdocs ~man in
106106- Cmd.group info [
107107- bibtex_cmd;
108108- ideas_cmd;
109109- info_cmd;
110110- missing_cmd;
111111- note_doi_cmd;
112112- obsidian_cmd;
113113- paper_classify_cmd;
114114- paper_tex_cmd;
115115- query_cmd;
116116- thumbs_cmd;
117117- ]
118118-119119-let () = exit (Cmd.eval' bushel_cmd)
-186
stack/bushel/bin/bushel_missing.ml
···11-open Cmdliner
22-open Bushel
33-44-(** Check if an entry has a thumbnail *)
55-let has_thumbnail entries entry =
66- match Entry.thumbnail_slug entries entry with
77- | Some _ -> true
88- | None -> false
99-1010-(** Check if an entry has a synopsis or description *)
1111-let has_synopsis = function
1212- | `Paper p -> Paper.abstract p <> "" (* Papers have abstracts *)
1313- | `Note n -> Note.synopsis n <> None (* Notes have optional synopsis *)
1414- | `Idea _ -> true (* Ideas don't have synopsis field *)
1515- | `Project _ -> true (* Projects don't have synopsis field *)
1616- | `Video _ -> true (* Videos don't have synopsis field *)
1717-1818-(** Check if an entry has tags *)
1919-let has_tags = function
2020- | `Paper p -> Paper.tags p <> []
2121- | `Note n -> Note.tags n <> []
2222- | `Idea i -> i.Idea.tags <> [] (* Access record field directly *)
2323- | `Project p -> Project.tags p <> []
2424- | `Video v -> v.Video.tags <> [] (* Access record field directly *)
2525-2626-(** Entry with broken references *)
2727-type entry_with_broken_refs = {
2828- entry : Entry.entry;
2929- broken_slugs : string list;
3030- broken_contacts : string list;
3131-}
3232-3333-(** Find entries missing thumbnails *)
3434-let find_missing_thumbnails entries =
3535- let all = Entry.all_entries entries in
3636- List.filter (fun entry -> not (has_thumbnail entries entry)) all
3737-3838-(** Find entries missing synopsis *)
3939-let find_missing_synopsis entries =
4040- let all = Entry.all_entries entries in
4141- List.filter (fun entry -> not (has_synopsis entry)) all
4242-4343-(** Find entries missing tags *)
4444-let find_missing_tags entries =
4545- let all = Entry.all_entries entries in
4646- List.filter (fun entry -> not (has_tags entry)) all
4747-4848-(** Find entries with broken slugs or contact handles *)
4949-let find_broken_references entries =
5050- let all = Entry.all_entries entries in
5151- List.filter_map (fun entry ->
5252- let body = Entry.body entry in
5353- let broken_slugs, broken_contacts = Md.validate_references entries body in
5454- if broken_slugs <> [] || broken_contacts <> [] then
5555- Some { entry; broken_slugs; broken_contacts }
5656- else
5757- None
5858- ) all
5959-6060-(** Print a list of entries *)
6161-let print_entries title entries_list =
6262- if entries_list <> [] then begin
6363- Fmt.pr "@.%a (%d):@," (Fmt.styled `Bold Fmt.string) title (List.length entries_list);
6464- List.iter (fun entry ->
6565- let slug = Entry.slug entry in
6666- let type_str = Entry.to_type_string entry in
6767- let title = Entry.title entry in
6868- Fmt.pr " %a %a - %a@,"
6969- (Fmt.styled `Cyan Fmt.string) slug
7070- (Fmt.styled `Faint Fmt.string) (Printf.sprintf "(%s)" type_str)
7171- Fmt.string title
7272- ) entries_list
7373- end
7474-7575-(** Print entries with broken references *)
7676-let print_broken_references title entries_with_broken_refs =
7777- if entries_with_broken_refs <> [] then begin
7878- Fmt.pr "@.%a (%d):@," (Fmt.styled `Bold Fmt.string) title (List.length entries_with_broken_refs);
7979- List.iter (fun { entry; broken_slugs; broken_contacts } ->
8080- let slug = Entry.slug entry in
8181- let type_str = Entry.to_type_string entry in
8282- let entry_title = Entry.title entry in
8383- Fmt.pr " %a %a - %a@,"
8484- (Fmt.styled `Cyan Fmt.string) slug
8585- (Fmt.styled `Faint Fmt.string) (Printf.sprintf "(%s)" type_str)
8686- Fmt.string entry_title;
8787- if broken_slugs <> [] then
8888- Fmt.pr " %a %a@,"
8989- (Fmt.styled `Red Fmt.string) "Broken slugs:"
9090- (Fmt.list ~sep:Fmt.comma Fmt.string) broken_slugs;
9191- if broken_contacts <> [] then
9292- Fmt.pr " %a %a@,"
9393- (Fmt.styled `Red Fmt.string) "Broken contacts:"
9494- (Fmt.list ~sep:Fmt.comma Fmt.string) broken_contacts;
9595- ) entries_with_broken_refs
9696- end
9797-9898-(** Main missing command implementation *)
9999-let missing_cmd base_dir check_thumbnails check_synopsis check_tags check_refs _env _xdg _profile =
100100- let entries = load base_dir in
101101-102102- let count = ref 0 in
103103-104104- if check_thumbnails then begin
105105- let missing = find_missing_thumbnails entries in
106106- print_entries "Entries missing thumbnails" missing;
107107- count := !count + List.length missing
108108- end;
109109-110110- if check_synopsis then begin
111111- let missing = find_missing_synopsis entries in
112112- print_entries "Entries missing synopsis" missing;
113113- count := !count + List.length missing
114114- end;
115115-116116- if check_tags then begin
117117- let missing = find_missing_tags entries in
118118- print_entries "Entries missing tags" missing;
119119- count := !count + List.length missing
120120- end;
121121-122122- if check_refs then begin
123123- let broken = find_broken_references entries in
124124- print_broken_references "Entries with broken references" broken;
125125- (* Count total number of broken references, not just entries *)
126126- let broken_count = List.fold_left (fun acc { broken_slugs; broken_contacts; _ } ->
127127- acc + List.length broken_slugs + List.length broken_contacts
128128- ) 0 broken in
129129- count := !count + broken_count
130130- end;
131131-132132- if !count = 0 then
133133- Fmt.pr "@.No missing metadata or broken references found.@."
134134- else
135135- Fmt.pr "@.Total issues found: %d@." !count;
136136-137137- 0
138138-139139-(** Command line arguments *)
140140-let thumbnails_flag =
141141- let doc = "Check for entries missing thumbnails" in
142142- Arg.(value & flag & info ["thumbnails"; "t"] ~doc)
143143-144144-let synopsis_flag =
145145- let doc = "Check for entries missing synopsis" in
146146- Arg.(value & flag & info ["synopsis"; "s"] ~doc)
147147-148148-let tags_flag =
149149- let doc = "Check for entries missing tags" in
150150- Arg.(value & flag & info ["tags"; "g"] ~doc)
151151-152152-let refs_flag =
153153- let doc = "Check for broken slugs and contact handles" in
154154- Arg.(value & flag & info ["refs"; "r"] ~doc)
155155-156156-let term : (Eio_unix.Stdenv.base -> Xdge.t -> Keyeio.Profile.t -> int) Cmdliner.Term.t =
157157- Term.(const (fun base thumbnails synopsis tags refs env xdg profile ->
158158- (* If no flags specified, check everything *)
159159- let check_all = not (thumbnails || synopsis || tags || refs) in
160160- missing_cmd base
161161- (check_all || thumbnails)
162162- (check_all || synopsis)
163163- (check_all || tags)
164164- (check_all || refs)
165165- env xdg profile
166166- ) $ Bushel_common.base_dir $ thumbnails_flag $ synopsis_flag $ tags_flag $ refs_flag)
167167-168168-let cmd =
169169- let doc = "List entries with missing metadata or broken references" in
170170- let man = [
171171- `S Manpage.s_description;
172172- `P "This command scans all entries and reports any that are missing thumbnails, synopsis, tags, or have broken slugs/contact handles.";
173173- `P "By default, all checks are performed. Use flags to select specific checks.";
174174- `S Manpage.s_options;
175175- `S Manpage.s_examples;
176176- `P "Check for all issues:";
177177- `Pre " $(mname) $(tname)";
178178- `P "Check only for missing thumbnails:";
179179- `Pre " $(mname) $(tname) --thumbnails";
180180- `P "Check for missing synopsis and tags:";
181181- `Pre " $(mname) $(tname) --synopsis --tags";
182182- `P "Check only for broken references:";
183183- `Pre " $(mname) $(tname) --refs";
184184- ] in
185185- let info = Cmd.info "missing" ~doc ~man in
186186- Cmd.v info term
-131
stack/bushel/bin/bushel_note_doi.ml
···11-open Cmdliner
22-open Bushel
33-44-(** Generate a roguedoi identifier using Crockford base32 encoding *)
55-let generate_roguedoi () =
66- Random.self_init ();
77- (* Generate a 10-character roguedoi with checksum and split every 5 chars *)
88- let id = Crockford.generate ~length:10 ~split_every:5 ~checksum:true () in
99- Printf.sprintf "10.59999/%s" id
1010-1111-(** Add DOI to a specific note's frontmatter if it doesn't already have one *)
1212-let add_doi_to_note note_path =
1313- let content = In_channel.with_open_bin note_path In_channel.input_all in
1414- (* Check if note already has a doi: field *)
1515- let has_doi = try
1616- let _ = String.index content 'd' in
1717- let re = Str.regexp "^doi:" in
1818- let lines = String.split_on_char '\n' content in
1919- List.exists (fun line -> Str.string_match re (String.trim line) 0) lines
2020- with Not_found -> false
2121- in
2222- if has_doi then begin
2323- Fmt.pr "%a: Note already has a DOI, skipping@."
2424- (Fmt.styled `Yellow Fmt.string) note_path;
2525- false
2626- end else begin
2727- let roguedoi = generate_roguedoi () in
2828- (* Parse the file to extract frontmatter *)
2929- match String.split_on_char '\n' content with
3030- | "---" :: rest ->
3131- (* Find the end of frontmatter *)
3232- let rec find_end_fm acc = function
3333- | [] -> None
3434- | "---" :: body_lines -> Some (List.rev acc, body_lines)
3535- | line :: lines -> find_end_fm (line :: acc) lines
3636- in
3737- (match find_end_fm [] rest with
3838- | Some (fm_lines, body_lines) ->
3939- (* Add doi field to frontmatter *)
4040- let new_fm = fm_lines @ [Printf.sprintf "doi: %s" roguedoi] in
4141- let new_content =
4242- String.concat "\n" (["---"] @ new_fm @ ["---"] @ body_lines)
4343- in
4444- Out_channel.with_open_bin note_path (fun oc ->
4545- Out_channel.output_string oc new_content
4646- );
4747- Fmt.pr "%a: Added DOI %a@."
4848- (Fmt.styled `Green Fmt.string) note_path
4949- (Fmt.styled `Cyan Fmt.string) roguedoi;
5050- true
5151- | None ->
5252- Fmt.epr "%a: Could not parse frontmatter@."
5353- (Fmt.styled `Red Fmt.string) note_path;
5454- false)
5555- | _ ->
5656- Fmt.epr "%a: No frontmatter found@."
5757- (Fmt.styled `Red Fmt.string) note_path;
5858- false
5959- end
6060-6161-(** Main command implementation *)
6262-let note_doi_cmd base_dir dry_run _env _xdg _profile =
6363- let entries = load base_dir in
6464- let notes = Entry.notes entries in
6565-6666- (* Filter for perma notes without DOI *)
6767- let perma_notes = List.filter (fun n ->
6868- Note.perma n && Option.is_none (Note.doi n)
6969- ) notes in
7070-7171- if perma_notes = [] then begin
7272- Fmt.pr "No permanent notes without DOI found.@.";
7373- 0
7474- end else begin
7575- Fmt.pr "@[<v>";
7676- Fmt.pr "%a: Found %d permanent notes without DOI@.@."
7777- (Fmt.styled `Bold Fmt.string) "Info"
7878- (List.length perma_notes);
7979-8080- let count = ref 0 in
8181- List.iter (fun note ->
8282- let slug = Note.slug note in
8383- let note_path = Printf.sprintf "%s/data/notes/%s.md" base_dir slug in
8484- Fmt.pr "Processing %a (%a)...@,"
8585- (Fmt.styled `Cyan Fmt.string) slug
8686- (Fmt.styled `Faint Fmt.string) (Note.title note);
8787-8888- if not dry_run then begin
8989- if add_doi_to_note note_path then
9090- incr count
9191- end else begin
9292- let roguedoi = generate_roguedoi () in
9393- Fmt.pr " Would add DOI: %a@,"
9494- (Fmt.styled `Cyan Fmt.string) roguedoi;
9595- incr count
9696- end
9797- ) perma_notes;
9898-9999- Fmt.pr "@.";
100100- if dry_run then
101101- Fmt.pr "%a: Would add DOI to %d notes (dry run)@."
102102- (Fmt.styled `Bold Fmt.string) "Summary"
103103- !count
104104- else
105105- Fmt.pr "%a: Added DOI to %d notes@."
106106- (Fmt.styled `Bold Fmt.string) "Summary"
107107- !count;
108108- Fmt.pr "@]@.";
109109- 0
110110- end
111111-112112-(** Command line interface definition *)
113113-let dry_run_flag =
114114- let doc = "Show what would be done without making changes" in
115115- Arg.(value & flag & info ["n"; "dry-run"] ~doc)
116116-117117-let term : (Eio_unix.Stdenv.base -> Xdge.t -> Keyeio.Profile.t -> int) Cmdliner.Term.t =
118118- Term.(const note_doi_cmd $ Bushel_common.base_dir $ dry_run_flag)
119119-120120-let cmd =
121121- let doc = "Generate and add DOI identifiers to permanent notes" in
122122- let man = [
123123- `S Manpage.s_description;
124124- `P "This command generates roguedoi identifiers using Crockford base32 encoding \
125125- and adds them to the frontmatter of permanent notes (notes with perma: true) \
126126- that don't already have a DOI.";
127127- `P "Roguedoi format: 10.59999/xxxxx-xxxxx where x is a Crockford base32 character.";
128128- `S Manpage.s_options;
129129- ] in
130130- let info = Cmd.info "note-doi" ~doc ~man in
131131- Cmd.v info term
-88
stack/bushel/bin/bushel_obsidian.ml
···11-open Bushel
22-33-let obsidian_links =
44- let inline c = function
55- | Md.Obsidian_link l ->
66- Cmarkit_renderer.Context.string c l;
77- true
88- | _ -> false
99- in
1010- Cmarkit_renderer.make ~inline ()
1111-;;
1212-1313-let obsidian_of_doc doc =
1414- let default = Cmarkit_commonmark.renderer () in
1515- let r = Cmarkit_renderer.compose default obsidian_links in
1616- Cmarkit_renderer.doc_to_string r doc
1717-;;
1818-1919-let md_to_obsidian entries md =
2020- let open Cmarkit in
2121- Doc.of_string ~strict:false ~resolver:Md.with_bushel_links md
2222- |> Mapper.map_doc (Mapper.make ~inline:(Md.bushel_inline_mapper_to_obsidian entries) ())
2323- |> obsidian_of_doc
2424-;;
2525-2626-let obsidian_output base output_dir =
2727- let e = load base in
2828- let all = Entry.all_entries e @ Entry.all_papers e in
2929- List.iter
3030- (fun ent ->
3131- let slug =
3232- match ent with
3333- | `Paper { Paper.latest; slug; ver; _ } when not latest ->
3434- Printf.sprintf "%s-%s" slug ver
3535- | _ -> Entry.slug ent
3636- in
3737- let fname = Filename.concat output_dir (slug ^ ".md") in
3838- let tags =
3939- Tags.tags_of_ent e ent
4040- |> List.filter_map (fun tag ->
4141- match tag with
4242- | `Slug _ -> None
4343- | `Set s -> Some (Printf.sprintf "\"#%s\"" s)
4444- | `Text s -> Some (Printf.sprintf "%s" s)
4545- | `Contact _ -> None
4646- | `Year y -> Some (Printf.sprintf "\"#y%d\"" y))
4747- |> List.map (fun s -> "- " ^ s)
4848- |> String.concat "\n"
4949- in
5050- let links =
5151- Tags.tags_of_ent e ent
5252- |> List.filter_map (fun tag ->
5353- match tag with
5454- | `Slug s when s <> slug -> Some (Printf.sprintf "- \"[[%s]]\"" s)
5555- | `Contact c -> Some (Printf.sprintf "- \"[[@%s]]\"" c)
5656- | _ -> None)
5757- |> String.concat "\n"
5858- |> function
5959- | "" -> ""
6060- | s -> "linklist:\n" ^ s ^ "\n"
6161- in
6262- let body = Entry.body ent |> md_to_obsidian e in
6363- let buf = Printf.sprintf "---\ntags:\n%s\n%s---\n\n%s" tags links body in
6464- Out_channel.with_open_bin fname (fun oc -> output_string oc buf))
6565- all;
6666- List.iter
6767- (fun contact ->
6868- let slug = Contact.handle contact in
6969- let fname = Filename.concat output_dir ("@" ^ slug ^ ".md") in
7070- let buf = String.concat "\n" (Contact.names contact) in
7171- Out_channel.with_open_bin fname (fun oc -> output_string oc buf))
7272- (Entry.contacts e)
7373-;;
7474-7575-(* Export the term for use in main bushel.ml *)
7676-let term : (Eio_unix.Stdenv.base -> Xdge.t -> Keyeio.Profile.t -> int) Cmdliner.Term.t =
7777- Cmdliner.Term.(
7878- const (fun base_dir output_dir _env _xdg _profile -> obsidian_output base_dir output_dir; 0) $
7979- Bushel_common.base_dir $
8080- Bushel_common.output_dir ~default:"obsidian"
8181- )
8282-8383-let cmd =
8484- let doc = "Generate Obsidian-compatible markdown files" in
8585- let info = Cmdliner.Cmd.info "obsidian" ~doc in
8686- Cmdliner.Cmd.v info term
8787-8888-(* Main entry point removed - accessed through bushel_main.ml *)
-74
stack/bushel/bin/bushel_paper.ml
···11-module ZT = Zotero_translation
22-open Lwt.Infix
33-open Printf
44-module J = Ezjsonm
55-open Cmdliner
66-77-88-let _authors b j =
99- let keys = J.get_dict j in
1010- let authors = J.get_list J.get_string (List.assoc "author" keys) in
1111- let a =
1212- List.fold_left (fun acc a ->
1313- match Bushel.Entry.lookup_by_name b a with
1414- | Some c -> `String ("@" ^ (Bushel.Contact.handle c)) :: acc
1515- | None -> failwith (sprintf "author %s not found" a)
1616- ) [] authors
1717- in
1818- J.update j ["author"] (Some (`A a))
1919-2020-let of_doi zt ~base_dir ~slug ~version doi =
2121- ZT.json_of_doi zt ~slug doi >>= fun j ->
2222- let papers_dir = Printf.sprintf "%s/papers/%s" base_dir slug in
2323- (* Ensure papers directory exists *)
2424- (try Unix.mkdir papers_dir 0o755 with Unix.Unix_error (Unix.EEXIST, _, _) -> ());
2525-2626- (* Extract abstract from JSON data *)
2727- let abstract = try
2828- let keys = Ezjsonm.get_dict (j :> Ezjsonm.value) in
2929- match List.assoc_opt "abstract" keys with
3030- | Some abstract_json -> Some (Ezjsonm.get_string abstract_json)
3131- | None -> None
3232- with _ -> None in
3333-3434- (* Remove abstract from frontmatter - it goes in body *)
3535- let keys = Ezjsonm.get_dict (j :> Ezjsonm.value) in
3636- let filtered_keys = List.filter (fun (k, _) -> k <> "abstract") keys in
3737- let json_without_abstract = `O filtered_keys in
3838-3939- (* Use library function to generate YAML with abstract in body *)
4040- let content = Bushel.Paper.to_yaml ?abstract ~ver:version json_without_abstract in
4141-4242- let filename = Printf.sprintf "%s.md" version in
4343- let filepath = Filename.concat papers_dir filename in
4444- let oc = open_out filepath in
4545- output_string oc content;
4646- close_out oc;
4747- Printf.printf "Created paper file: %s\n" filepath;
4848- Lwt.return ()
4949-5050-let slug_arg =
5151- let doc = "Slug for the entry." in
5252- Arg.(required & pos 0 (some string) None & info [] ~docv:"SLUG" ~doc)
5353-5454-let version_arg =
5555- let doc = "Version of the entry." in
5656- Arg.(required & pos 1 (some string) None & info [] ~docv:"VERSION" ~doc)
5757-5858-let doi_arg =
5959- let doc = "DOI of the entry." in
6060- Arg.(required & pos 2 (some string) None & info [] ~docv:"DOI" ~doc)
6161-6262-(* Export the term for use in main bushel.ml *)
6363-let term =
6464- Term.(const (fun base slug version doi ->
6565- let zt = ZT.v "http://svr-avsm2-eeg-ce:1969" in
6666- Lwt_main.run @@ of_doi zt ~base_dir:base ~slug ~version doi; 0
6767- ) $ Bushel_common.base_dir $ slug_arg $ version_arg $ doi_arg)
6868-6969-let cmd =
7070- let doc = "Generate paper entry from DOI" in
7171- let info = Cmd.info "paper" ~doc in
7272- Cmd.v info term
7373-7474-(* Main entry point removed - accessed through bushel_main.ml *)
-57
stack/bushel/bin/bushel_paper_classify.ml
···11-open Cmdliner
22-33-(** TODO:claude Classify papers based on heuristics and update metadata *)
44-let classify_papers base_dir overwrite =
55- let papers_dir = Printf.sprintf "%s/papers" base_dir in
66- if not (Sys.file_exists papers_dir) then (
77- Printf.eprintf "Papers directory not found: %s\n" papers_dir;
88- 1
99- ) else (
1010- let paper_dirs = Sys.readdir papers_dir |> Array.to_list in
1111- List.iter (fun paper_slug ->
1212- let paper_path = Filename.concat papers_dir paper_slug in
1313- if Sys.is_directory paper_path then (
1414- let versions = Sys.readdir paper_path |> Array.to_list
1515- |> List.filter (String.ends_with ~suffix:".md") in
1616- List.iter (fun version_file ->
1717- let filepath = Filename.concat paper_path version_file in
1818- let version = Filename.remove_extension version_file in
1919- try
2020- let paper = Bushel.Paper.of_md ~slug:paper_slug ~ver:version filepath in
2121- let predicted_class = Bushel.Paper.classification paper in
2222- let class_str = Bushel.Paper.string_of_classification predicted_class in
2323- Printf.printf "%s/%s: %s\n" paper_slug version class_str;
2424-2525- (* Update the file if overwrite is enabled *)
2626- if overwrite then (
2727- let json_data = Bushel.Paper.raw_json paper in
2828- let keys = Ezjsonm.get_dict json_data in
2929- let updated_keys = ("classification", `String class_str) ::
3030- (List.filter (fun (k, _) -> k <> "classification") keys) in
3131- let updated_json = `O updated_keys in
3232- let abstract = Some (Bushel.Paper.abstract paper) in
3333- let content = Bushel.Paper.to_yaml ?abstract ~ver:version updated_json in
3434- let oc = open_out filepath in
3535- output_string oc content;
3636- close_out oc;
3737- Printf.printf " Updated %s\n" filepath
3838- )
3939- with e ->
4040- Printf.eprintf "Error processing %s: %s\n" filepath (Printexc.to_string e)
4141- ) versions
4242- )
4343- ) paper_dirs;
4444- 0
4545- )
4646-4747-let overwrite_flag =
4848- let doc = "Update paper files with classification metadata" in
4949- Arg.(value & flag & info ["overwrite"] ~doc)
5050-5151-let term =
5252- Term.(const classify_papers $ Bushel_common.base_dir $ overwrite_flag)
5353-5454-let cmd =
5555- let doc = "Classify papers as full/short/preprint" in
5656- let info = Cmd.info "paper-classify" ~doc in
5757- Cmd.v info term
-325
stack/bushel/bin/bushel_paper_tex.ml
···11-open Printf
22-open Cmdliner
33-44-(** TODO:claude Format author name for LaTeX with initials and full last name *)
55-let format_author_name author =
66- (* Split author name and convert to "F.M.~Lastname" format *)
77- let parts = String.split_on_char ' ' author |> List.filter (fun s -> s <> "") in
88- match List.rev parts with
99- | [] -> ""
1010- | lastname :: rest_rev ->
1111- let firstname_parts = List.rev rest_rev in
1212- let initials = List.map (fun name ->
1313- if String.length name > 0 then String.sub name 0 1 ^ "." else ""
1414- ) firstname_parts in
1515- let initials_str = String.concat "" initials in
1616- if initials_str = "" then lastname
1717- else initials_str ^ "~" ^ lastname
1818-1919-(** TODO:claude Format author name for LaTeX with underline for target author *)
2020-let format_author target_name author =
2121- let formatted = format_author_name author in
2222- (* Check if author contains target name substring for underlining *)
2323- if String.lowercase_ascii author |> fun s ->
2424- Re.execp (Re.Perl.compile_pat ~opts:[`Caseless] target_name) s
2525- then sprintf "\\underline{%s}" formatted
2626- else formatted
2727-2828-(** TODO:claude Format authors list for LaTeX *)
2929-let format_authors target_name authors =
3030- match authors with
3131- | [] -> ""
3232- | [single] -> format_author target_name single
3333- | _ ->
3434- let formatted = List.map (format_author target_name) authors in
3535- String.concat ", " formatted
3636-3737-(** TODO:claude Escape special LaTeX characters *)
3838-let escape_latex str =
3939- let replacements = [
4040- ("&", "\\&");
4141- ("%", "\\%");
4242- ("$", "\\$");
4343- ("#", "\\#");
4444- ("_", "\\_");
4545- ("{", "\\{");
4646- ("}", "\\}");
4747- ("~", "\\textasciitilde{}");
4848- ("^", "\\textasciicircum{}");
4949- ] in
5050- List.fold_left (fun s (from, to_) ->
5151- Re.replace_string (Re.compile (Re.str from)) ~by:to_ s
5252- ) str replacements
5353-5454-(** TODO:claude Clean venue name by removing common prefixes and handling arXiv *)
5555-let clean_venue_name venue =
5656- (* Special handling for arXiv to avoid redundancy like "arXiv (arXiv:ID)" *)
5757- let venue_lower = String.lowercase_ascii venue in
5858- if Re.execp (Re.Perl.compile_pat ~opts:[`Caseless] "arxiv") venue_lower then
5959- if String.contains venue ':' then
6060- (* If it contains arXiv:ID format, just return the ID part *)
6161- let parts = String.split_on_char ':' venue in
6262- match parts with
6363- | _ :: id :: _ -> String.trim id
6464- | _ -> venue
6565- else venue
6666- else
6767- let prefixes = [
6868- "in proceedings of the ";
6969- "proceedings of the ";
7070- "in proceedings of ";
7171- "proceedings of ";
7272- "in the ";
7373- "the ";
7474- ] in
7575- let rec remove_prefixes v = function
7676- | [] -> v
7777- | prefix :: rest ->
7878- if String.length v >= String.length prefix &&
7979- String.sub (String.lowercase_ascii v) 0 (String.length prefix) = prefix
8080- then String.sub v (String.length prefix) (String.length v - String.length prefix)
8181- else remove_prefixes v rest
8282- in
8383- let cleaned = remove_prefixes venue prefixes in
8484- (* Capitalize first letter *)
8585- if String.length cleaned > 0 then
8686- String.mapi (fun i c -> if i = 0 then Char.uppercase_ascii c else c) cleaned
8787- else cleaned
8888-8989-(** TODO:claude Format venue for LaTeX with volume/number details for full papers *)
9090-let format_venue paper =
9191- let open Bushel.Paper in
9292- let classification = classification paper in
9393- match bibtype paper with
9494- | "article" ->
9595- let journal_name = try journal paper |> clean_venue_name |> escape_latex with _ -> "Journal" in
9696- if classification = Full then (
9797- let vol_info =
9898- let vol = volume paper in
9999- let num = issue paper in
100100- match vol, num with
101101- | Some v, Some n -> sprintf ", %s(%s)" v n
102102- | Some v, None -> sprintf ", vol. %s" v
103103- | None, Some n -> sprintf ", no. %s" n
104104- | None, None -> ""
105105- in
106106- sprintf "\\textit{%s%s}" journal_name vol_info
107107- ) else
108108- sprintf "\\textit{%s}" journal_name
109109- | "inproceedings" ->
110110- let conf_name = try booktitle paper |> clean_venue_name |> escape_latex with _ -> "Conference" in
111111- sprintf "\\textit{%s}" conf_name
112112- | "techreport" ->
113113- let inst = try institution paper |> escape_latex with _ -> "Institution" in
114114- sprintf "\\textit{Technical Report, %s}" inst
115115- | "phdthesis" ->
116116- let school = try institution paper |> escape_latex with _ -> "University" in
117117- sprintf "\\textit{PhD thesis, %s}" school
118118- | "mastersthesis" ->
119119- let school = try institution paper |> escape_latex with _ -> "University" in
120120- sprintf "\\textit{Master's thesis, %s}" school
121121- | "book" ->
122122- let publisher_str = try Bushel.Paper.publisher paper |> escape_latex with _ -> "" in
123123- let edition_str = try
124124- let json = Bushel.Paper.raw_json paper in
125125- let keys = Ezjsonm.get_dict json in
126126- List.assoc "edition" keys |> Ezjsonm.get_string |> escape_latex
127127- with _ -> "" in
128128- let isbn_str = try Bushel.Paper.isbn paper |> escape_latex with _ -> "" in
129129- let venue_info =
130130- let base = match publisher_str, edition_str with
131131- | pub, ed when pub <> "" && ed <> "" -> sprintf "%s, %s edition" pub ed
132132- | pub, _ when pub <> "" -> pub
133133- | _, ed when ed <> "" -> sprintf "%s edition" ed
134134- | _, _ -> "Book"
135135- in
136136- if isbn_str <> "" then
137137- sprintf "%s, ISBN %s" base isbn_str
138138- else
139139- base
140140- in
141141- sprintf "\\textit{%s}" venue_info
142142- | "misc" ->
143143- (* Try to get meaningful venue info for misc entries *)
144144- let journal_str = try Bushel.Paper.journal paper |> clean_venue_name |> escape_latex with _ -> "" in
145145- let booktitle_str = try Bushel.Paper.booktitle paper |> clean_venue_name |> escape_latex with _ -> "" in
146146- let publisher_str = try Bushel.Paper.publisher paper |> escape_latex with _ -> "" in
147147- if journal_str <> "" then
148148- sprintf "\\textit{%s}" journal_str
149149- else if booktitle_str <> "" then
150150- sprintf "\\textit{%s}" booktitle_str
151151- else if publisher_str <> "" then
152152- sprintf "\\textit{%s}" publisher_str
153153- else
154154- sprintf "\\textit{Preprint}"
155155- | "abstract" ->
156156- (* Handle conference abstracts *)
157157- let conf_name = try Bushel.Paper.booktitle paper |> clean_venue_name |> escape_latex with _ -> "" in
158158- let journal_str = try Bushel.Paper.journal paper |> clean_venue_name |> escape_latex with _ -> "" in
159159- if conf_name <> "" then
160160- sprintf "\\textit{%s (Abstract)}" conf_name
161161- else if journal_str <> "" then
162162- sprintf "\\textit{%s (Abstract)}" journal_str
163163- else
164164- sprintf "\\textit{Conference Abstract}"
165165- | _ ->
166166- (* Fallback for other types with special arXiv handling *)
167167- let journal_str = try Bushel.Paper.journal paper with _ -> "" in
168168- let publisher_str = try Bushel.Paper.publisher paper |> escape_latex with _ -> "" in
169169-170170- (* Special handling for arXiv papers - skip venue, let note handle it *)
171171- if String.lowercase_ascii journal_str = "arxiv" then
172172- ""
173173- else if journal_str <> "" then
174174- sprintf "\\textit{%s}" (journal_str |> clean_venue_name |> escape_latex)
175175- else if publisher_str <> "" then
176176- sprintf "\\textit{%s}" publisher_str
177177- else
178178- sprintf "\\textit{Preprint}"
179179-180180-(** TODO:claude Generate LaTeX PubItem for a paper *)
181181-let generate_latex_entry target_name paper =
182182- let open Bushel.Paper in
183183- let slug_str = slug paper in
184184- let title_str = title paper |> escape_latex in
185185- let authors_str = format_authors target_name (authors paper) in
186186- let venue_str = format_venue paper in
187187- let year_str = year paper |> string_of_int in
188188- let month_str =
189189- let (_, m, _) = date paper in
190190- sprintf "%02d" m
191191- in
192192-193193- (* Check if paper is in the future *)
194194- let is_in_press =
195195- let paper_time = datetime paper in
196196- let now = Ptime_clock.now () in
197197- Ptime.compare paper_time now > 0
198198- in
199199-200200- (* Add DOI or PDF link if available, but not for in-press papers unless they have explicit URL *)
201201- let title_with_link =
202202- if is_in_press then
203203- (* For in-press papers, only add link if there's an explicit URL field *)
204204- match Bushel.Paper.url paper with
205205- | Some u -> sprintf "\\href{%s}{%s}" u title_str
206206- | None -> title_str (* No link for in-press papers without explicit URL *)
207207- else
208208- (* For published papers, use DOI or URL or default PDF link *)
209209- match Bushel.Paper.doi paper with
210210- | Some doi -> sprintf "\\href{https://doi.org/%s}{%s}" doi title_str
211211- | None ->
212212- (* Check if there's a URL, otherwise default to PDF link *)
213213- let url = match Bushel.Paper.url paper with
214214- | Some u -> u
215215- | None -> sprintf "https://anil.recoil.org/papers/%s.pdf" slug_str
216216- in
217217- sprintf "\\href{%s}{%s}" url title_str
218218- in
219219-220220- (* Add "(in press)" if paper is in the future *)
221221- let in_press_str = if is_in_press then " \\textit{(in press)}" else "" in
222222-223223- (* Add note if present *)
224224- let note_str = match Bushel.Paper.note paper with
225225- | Some n -> sprintf " \\textit{(%s)}" (escape_latex n)
226226- | None -> ""
227227- in
228228-229229- sprintf "\\BigGap\n\\PubItemLabeled{%s}\n{``%s,''\n%s,\n%s%s%s,\n\\DatestampYM{%s}{%s}.}\n"
230230- slug_str title_with_link authors_str venue_str in_press_str note_str year_str month_str
231231-232232-(** TODO:claude Generate LaTeX output files for papers *)
233233-let generate_tex base_dir output_dir target_name =
234234- try
235235- let papers = Bushel.load_papers base_dir in
236236- let latest_papers = List.filter (fun p -> p.Bushel.Paper.latest) papers in
237237-238238- (* Extract selected papers first *)
239239- let selected_papers = List.filter Bushel.Paper.selected latest_papers in
240240-241241- (* Group remaining papers by classification, excluding selected ones *)
242242- let non_selected_papers = List.filter (fun p -> not (Bushel.Paper.selected p)) latest_papers in
243243- let full_papers = List.filter (fun p ->
244244- Bushel.Paper.classification p = Bushel.Paper.Full) non_selected_papers in
245245- let short_papers = List.filter (fun p ->
246246- Bushel.Paper.classification p = Bushel.Paper.Short) non_selected_papers in
247247- let preprint_papers = List.filter (fun p ->
248248- Bushel.Paper.classification p = Bushel.Paper.Preprint) non_selected_papers in
249249-250250- (* Sort each group by date, newest first *)
251251- let sorted_full = List.sort Bushel.Paper.compare full_papers in
252252- let sorted_short = List.sort Bushel.Paper.compare short_papers in
253253- let sorted_preprint = List.sort Bushel.Paper.compare preprint_papers in
254254- let sorted_selected = List.sort Bushel.Paper.compare selected_papers in
255255-256256- (* Ensure output directory exists *)
257257- (try Unix.mkdir output_dir 0o755 with Unix.Unix_error (Unix.EEXIST, _, _) -> ());
258258-259259- (* Write papers_full.tex *)
260260- let oc_full = open_out (Filename.concat output_dir "papers_full.tex") in
261261- List.iter (fun paper ->
262262- let latex = generate_latex_entry target_name paper in
263263- output_string oc_full latex;
264264- output_char oc_full '\n'
265265- ) sorted_full;
266266- close_out oc_full;
267267- Printf.printf "Generated %s/papers_full.tex with %d entries\n" output_dir (List.length sorted_full);
268268-269269- (* Write papers_short.tex *)
270270- let oc_short = open_out (Filename.concat output_dir "papers_short.tex") in
271271- List.iter (fun paper ->
272272- let latex = generate_latex_entry target_name paper in
273273- output_string oc_short latex;
274274- output_char oc_short '\n'
275275- ) sorted_short;
276276- close_out oc_short;
277277- Printf.printf "Generated %s/papers_short.tex with %d entries\n" output_dir (List.length sorted_short);
278278-279279- (* Write papers_preprint.tex *)
280280- let oc_preprint = open_out (Filename.concat output_dir "papers_preprint.tex") in
281281- List.iter (fun paper ->
282282- let latex = generate_latex_entry target_name paper in
283283- output_string oc_preprint latex;
284284- output_char oc_preprint '\n'
285285- ) sorted_preprint;
286286- close_out oc_preprint;
287287- Printf.printf "Generated %s/papers_preprint.tex with %d entries\n" output_dir (List.length sorted_preprint);
288288-289289- (* Write papers_selected.tex *)
290290- let oc_selected = open_out (Filename.concat output_dir "papers_selected.tex") in
291291- List.iter (fun paper ->
292292- let latex = generate_latex_entry target_name paper in
293293- output_string oc_selected latex;
294294- output_char oc_selected '\n'
295295- ) sorted_selected;
296296- close_out oc_selected;
297297- Printf.printf "Generated %s/papers_selected.tex with %d entries\n" output_dir (List.length sorted_selected);
298298-299299- (* Write paper_count.tex *)
300300- let total_count = List.length latest_papers in
301301- let oc_count = open_out (Filename.concat output_dir "paper_count.tex") in
302302- output_string oc_count (sprintf "\\setcounter{pubcounter}{%d}\n" total_count);
303303- close_out oc_count;
304304- Printf.printf "Generated %s/paper_count.tex with total count: %d\n" output_dir total_count;
305305-306306- 0
307307- with e ->
308308- Printf.eprintf "Error loading papers: %s\n" (Printexc.to_string e);
309309- 1
310310-311311-let output_dir_arg =
312312- let doc = "Output directory for generated LaTeX files" in
313313- Arg.(value & opt string "." & info ["output"; "o"] ~docv:"DIR" ~doc)
314314-315315-let target_name_arg =
316316- let doc = "Name to underline in author list (e.g., 'Madhavapeddy')" in
317317- Arg.(value & opt string "Madhavapeddy" & info ["target"; "t"] ~docv:"NAME" ~doc)
318318-319319-let term =
320320- Term.(const generate_tex $ Bushel_common.base_dir $ output_dir_arg $ target_name_arg)
321321-322322-let cmd =
323323- let doc = "Generate LaTeX publication entries" in
324324- let info = Cmd.info "paper-tex" ~doc in
325325- Cmd.v info term
-48
stack/bushel/bin/bushel_search.ml
···11-open Cmdliner
22-33-(** Bushel search command for integration with main CLI *)
44-55-let limit =
66- let doc = "Maximum number of results to return" in
77- Arg.(value & opt int 50 & info ["limit"; "l"] ~doc)
88-99-let offset =
1010- let doc = "Number of results to skip (for pagination)" in
1111- Arg.(value & opt int 0 & info ["offset"; "o"] ~doc)
1212-1313-let query_text =
1414- let doc = "Search query text" in
1515- Arg.(required & pos 0 (some string) None & info [] ~docv:"QUERY" ~doc)
1616-1717-(** Search function using multisearch *)
1818-let search query_text limit offset env _xdg _profile =
1919- let config = Bushel.Typesense.load_config_from_files () in
2020-2121- if config.api_key = "" then (
2222- Printf.eprintf "Error: API key is required. Use TYPESENSE_API_KEY environment variable or create .typesense-key file.\n";
2323- 1
2424- ) else (
2525- Printf.printf "Searching Typesense at %s\n" config.endpoint;
2626- Printf.printf "Query: \"%s\"\n" query_text;
2727- Printf.printf "Limit: %d, Offset: %d\n\n" limit offset;
2828-2929- Eio.Switch.run (fun sw ->
3030- let result = Bushel.Typesense.multisearch ~sw ~env config query_text ~limit:50 () in
3131- match result with
3232- | Ok multisearch_resp ->
3333- let combined_response = Bushel.Typesense.combine_multisearch_results multisearch_resp ~limit ~offset () in
3434- Printf.printf "Found %d results (%.2fms)\n\n" combined_response.total combined_response.query_time;
3535-3636- List.iteri (fun i (hit : Bushel.Typesense.search_result) ->
3737- Printf.printf "%d. %s (score: %.2f)\n" (i + 1) (Bushel.Typesense.pp_search_result_oneline hit) hit.Bushel.Typesense.score
3838- ) combined_response.hits
3939- | Error err ->
4040- Format.eprintf "Search error: %a\n" Bushel.Typesense.pp_error err;
4141- exit 1
4242- );
4343- 0
4444- )
4545-4646-(** Command line term for integration with eiocmd *)
4747-let term : (Eio_unix.Stdenv.base -> Xdge.t -> Keyeio.Profile.t -> int) Cmdliner.Term.t =
4848- Term.(const search $ query_text $ limit $ offset)
-70
stack/bushel/bin/bushel_thumbs.ml
···11-open Printf
22-open Cmdliner
33-44-(** TODO:claude
55- Helper module for ImageMagick operations *)
66-module Imagemagick = struct
77- (* Generate thumbnail from PDF *)
88- let generate_thumbnail ~pdf_path ~size ~output_path =
99- let cmd =
1010- sprintf "magick -density 600 -quality 100 %s[0] -gravity North -crop 100%%x50%%+0+0 -resize %s %s"
1111- pdf_path size output_path
1212- in
1313- eprintf "Running: %s\n%!" cmd;
1414- Sys.command cmd
1515-end
1616-1717-(** TODO:claude
1818- Process a single paper to generate its thumbnail *)
1919-let process_paper base_dir output_dir paper =
2020- let slug = Bushel.Paper.slug paper in
2121- let pdf_path = sprintf "%s/static/papers/%s.pdf" base_dir slug in
2222- let thumbnail_path = sprintf "%s/%s.png" output_dir slug in
2323-2424- (* Skip if thumbnail already exists *)
2525- if Sys.file_exists thumbnail_path then (
2626- printf "Thumbnail already exists for %s, skipping\n%!" slug
2727- ) else if Sys.file_exists pdf_path then (
2828- try
2929- let size = sprintf "2048x" in
3030- printf "Generating high-res thumbnail for %s (size: %s)\n%!" slug size;
3131- match Imagemagick.generate_thumbnail ~pdf_path ~size ~output_path:thumbnail_path with
3232- | 0 -> printf "Successfully generated thumbnail for %s\n%!" slug
3333- | n -> eprintf "Error generating thumbnail for %s (exit code: %d)\n%!" slug n
3434- with
3535- | e -> eprintf "Error processing paper %s: %s\n%!" slug (Printexc.to_string e)
3636- ) else (
3737- eprintf "PDF file not found for paper: %s\n%!" slug
3838- )
3939-4040-(** TODO:claude
4141- Main function to process all papers in a directory *)
4242-let process_papers base_dir output_dir =
4343- (* Create output directory if it doesn't exist *)
4444- if not (Sys.file_exists output_dir) then (
4545- printf "Creating output directory: %s\n%!" output_dir;
4646- Unix.mkdir output_dir 0o755
4747- );
4848-4949- (* Load Bushel entries and get papers *)
5050- printf "Loading papers from %s\n%!" base_dir;
5151- let e = Bushel.load base_dir in
5252- let papers = Bushel.Entry.papers e in
5353-5454- (* Process each paper *)
5555- printf "Found %d papers\n%!" (List.length papers);
5656- List.iter (process_paper base_dir output_dir) papers
5757-5858-(* Command line arguments are now imported from Bushel_common *)
5959-6060-(* Export the term for use in main bushel.ml *)
6161-let term : (Eio_unix.Stdenv.base -> Xdge.t -> Keyeio.Profile.t -> int) Cmdliner.Term.t =
6262- Term.(const (fun base_dir output_dir _env _xdg _profile -> process_papers base_dir output_dir; 0) $
6363- Bushel_common.base_dir $ Bushel_common.output_dir ~default:".")
6464-6565-let cmd =
6666- let doc = "Generate thumbnails for paper PDFs" in
6767- let info = Cmd.info "thumbs" ~doc in
6868- Cmd.v info term
6969-7070-(* Main entry point removed - accessed through bushel_main.ml *)
-188
stack/bushel/bin/bushel_typesense.ml
···11-open Cmdliner
22-33-(** Bushel Typesense binary with upload and query functionality *)
44-55-let data_dir =
66- let doc = "Directory containing bushel data files" in
77- Arg.(value & opt string "." & info ["data-dir"; "d"] ~doc)
88-99-(** Main upload function *)
1010-let upload data_dir openai_key env _xdg _profile =
1111- let config = Bushel.Typesense.load_config_from_files () in
1212-1313- let config = { config with
1414- openai_key = if openai_key = "" then config.openai_key else openai_key
1515- } in
1616-1717- if config.api_key = "" then (
1818- Printf.eprintf "Error: API key is required. Use TYPESENSE_API_KEY environment variable or create .typesense-key file.\n";
1919- 1
2020- ) else if config.openai_key = "" then (
2121- Printf.eprintf "Error: OpenAI API key is required for embeddings. Use OPENAI_API_KEY environment variable or create .openrouter-api file.\n";
2222- 1
2323- ) else (
2424- Printf.printf "Loading bushel data from %s\n%!" data_dir;
2525- let entries = Bushel.load data_dir in
2626-2727- Printf.printf "Uploading bushel data to Typesense at %s\n%!" config.endpoint;
2828-2929- Eio.Switch.run (fun sw ->
3030- Bushel.Typesense.upload_all ~sw ~env config entries
3131- );
3232- 0
3333- )
3434-3535-3636-(** Query function *)
3737-let query query_text collection limit offset env _xdg _profile =
3838- let config = Bushel.Typesense.load_config_from_files () in
3939-4040- if config.api_key = "" then (
4141- Printf.eprintf "Error: API key is required. Use TYPESENSE_API_KEY environment variable or create .typesense-key file.\n";
4242- 1
4343- ) else (
4444- Printf.printf "Searching Typesense at %s\n%!" config.endpoint;
4545- Printf.printf "Query: \"%s\"\n%!" query_text;
4646- if collection <> "" then Printf.printf "Collection: %s\n%!" collection;
4747- Printf.printf "Limit: %d, Offset: %d\n\n%!" limit offset;
4848-4949- Eio.Switch.run (fun sw ->
5050- let search_fn = if collection = "" then
5151- Bushel.Typesense.search_all ~sw ~env config query_text ~limit ~offset
5252- else
5353- Bushel.Typesense.search_collection ~sw ~env config collection query_text ~limit ~offset
5454- in
5555- let result = search_fn () in
5656- match result with
5757- | Ok response ->
5858- Printf.printf "Found %d results (%.2fms)\n\n%!" response.total response.query_time;
5959- List.iteri (fun i (hit : Bushel.Typesense.search_result) ->
6060- Printf.printf "%d. [%s] %s (score: %.2f)\n%!" (i + 1) hit.collection hit.title hit.score;
6161- if hit.content <> "" then Printf.printf " %s\n%!" hit.content;
6262- if hit.highlights <> [] then (
6363- Printf.printf " Highlights:\n%!";
6464- List.iter (fun (field, snippets) ->
6565- List.iter (fun snippet ->
6666- Printf.printf " %s: %s\n%!" field snippet
6767- ) snippets
6868- ) hit.highlights
6969- );
7070- Printf.printf "\n%!"
7171- ) response.hits
7272- | Error err ->
7373- Format.eprintf "Search error: %a\n%!" Bushel.Typesense.pp_error err;
7474- exit 1
7575- );
7676- 0
7777- )
7878-7979-(** List collections function *)
8080-let list env _xdg _profile =
8181- let config = Bushel.Typesense.load_config_from_files () in
8282-8383- if config.api_key = "" then (
8484- Printf.eprintf "Error: API key is required. Use TYPESENSE_API_KEY environment variable or create .typesense-key file.\n";
8585- 1
8686- ) else (
8787- Printf.printf "Listing collections at %s\n\n%!" config.endpoint;
8888-8989- Eio.Switch.run (fun sw ->
9090- let result = Bushel.Typesense.list_collections ~sw ~env config in
9191- match result with
9292- | Ok collections ->
9393- Printf.printf "Collections:\n%!";
9494- List.iter (fun (name, count) ->
9595- Printf.printf " %s (%d documents)\n%!" name count
9696- ) collections
9797- | Error err ->
9898- Format.eprintf "List error: %a\n%!" Bushel.Typesense.pp_error err;
9999- exit 1
100100- );
101101- 0
102102- )
103103-104104-(** Command line arguments *)
105105-let openai_key =
106106- let doc = "OpenAI API key for embeddings" in
107107- Arg.(value & opt string "" & info ["openai-key"; "oa"] ~doc)
108108-109109-let query_text =
110110- let doc = "Search query text" in
111111- Arg.(required & pos 0 (some string) None & info [] ~docv:"QUERY" ~doc)
112112-113113-let collection =
114114- let doc = "Specific collection to search (contacts, papers, projects, notes, videos, ideas)" in
115115- Arg.(value & opt string "" & info ["collection"; "c"] ~doc)
116116-117117-let limit =
118118- let doc = "Maximum number of results to return" in
119119- Arg.(value & opt int 10 & info ["limit"; "l"] ~doc)
120120-121121-let offset =
122122- let doc = "Number of results to skip (for pagination)" in
123123- Arg.(value & opt int 0 & info ["offset"; "o"] ~doc)
124124-125125-(** Query command *)
126126-let query_cmd =
127127- let doc = "Search bushel collections in Typesense" in
128128- let man = [
129129- `S Manpage.s_description;
130130- `P "Search across all or specific bushel collections in Typesense.";
131131- `P "The API key can be read from .typesense-key file or TYPESENSE_API_KEY environment variable.";
132132- `S Manpage.s_examples;
133133- `P "Search all collections:";
134134- `Pre " bushel-typesense query \"machine learning\"";
135135- `P "Search specific collection:";
136136- `Pre " bushel-typesense query \"OCaml\" --collection papers";
137137- `P "Search with pagination:";
138138- `Pre " bushel-typesense query \"AI\" --limit 5 --offset 10";
139139- ] in
140140- Eiocmd.run
141141- ~info:(Cmd.info "query" ~doc ~man)
142142- ~app_name:"bushel-typesense"
143143- ~service:"typesense"
144144- Term.(const query $ query_text $ collection $ limit $ offset)
145145-146146-(** List command *)
147147-let list_cmd =
148148- let doc = "List all collections in Typesense" in
149149- let man = [
150150- `S Manpage.s_description;
151151- `P "List all available collections and their document counts.";
152152- ] in
153153- Eiocmd.run
154154- ~info:(Cmd.info "list" ~doc ~man)
155155- ~app_name:"bushel-typesense"
156156- ~service:"typesense"
157157- Term.(const list)
158158-159159-(** Upload command *)
160160-let upload_cmd =
161161- let doc = "Upload bushel collections to Typesense search engine" in
162162- let man = [
163163- `S Manpage.s_description;
164164- `P "Upload all bushel object types (contacts, papers, projects, notes, videos, ideas) to a Typesense search engine instance.";
165165- `P "The API keys can be read from files or environment variables.";
166166- `S Manpage.s_examples;
167167- `P "Upload to Typesense instance:";
168168- `Pre " bushel-typesense upload --data-dir /path/to/data";
169169- ] in
170170- Eiocmd.run
171171- ~info:(Cmd.info "upload" ~doc ~man)
172172- ~app_name:"bushel-typesense"
173173- ~service:"typesense"
174174- Term.(const upload $ data_dir $ openai_key)
175175-176176-(** Main command group *)
177177-let main_cmd =
178178- let doc = "Bushel Typesense client" in
179179- let man = [
180180- `S Manpage.s_description;
181181- `P "Client for uploading to and querying Bushel collections in Typesense search engine.";
182182- `S Manpage.s_commands;
183183- `S Manpage.s_common_options;
184184- ] in
185185- let info = Cmd.info "bushel-typesense" ~doc ~man in
186186- Cmd.group info [upload_cmd; query_cmd; list_cmd]
187187-188188-let () = exit (Cmd.eval' main_cmd)
-138
stack/bushel/bin/bushel_video.ml
···11-[@@@warning "-26-27-32"]
22-33-open Lwt.Infix
44-open Cmdliner
55-66-let setup_log style_renderer level =
77- Fmt_tty.setup_std_outputs ?style_renderer ();
88- Logs.set_level level;
99- Logs.set_reporter (Logs_fmt.reporter ());
1010- ()
1111-1212-let process_videos output_dir overwrite base_url channel fetch_thumbs thumbs_dir =
1313- Peertube.fetch_all_channel_videos base_url channel >>= fun all_videos ->
1414- Logs.info (fun f -> f "Total videos: %d" (List.length all_videos));
1515-1616- (* Create thumbnails directory if needed *)
1717- (if fetch_thumbs && not (Sys.file_exists thumbs_dir) then
1818- Unix.mkdir thumbs_dir 0o755);
1919-2020- (* Process each video, fetching full details for complete descriptions *)
2121- Lwt_list.map_s (fun video ->
2222- (* Fetch complete video details to get full description *)
2323- Peertube.fetch_video_details base_url video.Peertube.uuid >>= fun full_video ->
2424- let (description, published_date, title, url, uuid, slug) =
2525- Peertube.to_bushel_video full_video
2626- in
2727- Logs.info (fun f -> f "Title: %s, URL: %s" title url);
2828-2929- (* Download thumbnail if requested *)
3030- (if fetch_thumbs then
3131- let thumb_path = Filename.concat thumbs_dir (uuid ^ ".jpg") in
3232- Peertube.download_thumbnail base_url full_video thumb_path >>= fun result ->
3333- match result with
3434- | Ok () ->
3535- Logs.info (fun f -> f "Downloaded thumbnail for %s to %s" title thumb_path);
3636- Lwt.return_unit
3737- | Error (`Msg e) ->
3838- Logs.warn (fun f -> f "Failed to download thumbnail for %s: %s" title e);
3939- Lwt.return_unit
4040- else
4141- Lwt.return_unit) >>= fun () ->
4242-4343- Lwt.return {Bushel.Video.description; published_date; title; url; uuid; slug;
4444- talk=false; paper=None; project=None; tags=full_video.tags}
4545- ) all_videos >>= fun vids ->
4646-4747- (* Write video files *)
4848- Lwt_list.iter_s (fun video ->
4949- let file_path = Filename.concat output_dir (video.Bushel.Video.uuid ^ ".md") in
5050- let file_exists = Sys.file_exists file_path in
5151-5252- if file_exists then
5353- try
5454- (* If file exists, load it to preserve specific fields *)
5555- let existing_video = Bushel.Video.of_md file_path in
5656- (* Create merged video with preserved fields *)
5757- let merged_video = {
5858- video with
5959- tags = existing_video.tags; (* Preserve existing tags *)
6060- paper = existing_video.paper; (* Preserve paper field *)
6161- project = existing_video.project; (* Preserve project field *)
6262- talk = existing_video.talk; (* Preserve talk field *)
6363- } in
6464-6565- (* Write the merged video data *)
6666- if overwrite then
6767- match Bushel.Video.to_file output_dir merged_video with
6868- | Ok () ->
6969- Logs.info (fun f -> f "Updated video %s with preserved fields in %s"
7070- merged_video.Bushel.Video.title file_path);
7171- Lwt.return_unit
7272- | Error (`Msg e) ->
7373- Logs.err (fun f -> f "Failed to update video %s: %s"
7474- merged_video.Bushel.Video.title e);
7575- Lwt.return_unit
7676- else begin
7777- Logs.info (fun f -> f "Skipping existing video %s (use --overwrite to replace)"
7878- video.Bushel.Video.title);
7979- Lwt.return_unit
8080- end
8181- with _ ->
8282- (* If reading existing file fails, proceed with new data *)
8383- if overwrite then
8484- match Bushel.Video.to_file output_dir video with
8585- | Ok () ->
8686- Logs.info (fun f -> f "Wrote video %s to %s (existing file could not be read)"
8787- video.Bushel.Video.title file_path);
8888- Lwt.return_unit
8989- | Error (`Msg e) ->
9090- Logs.err (fun f -> f "Failed to write video %s: %s"
9191- video.Bushel.Video.title e);
9292- Lwt.return_unit
9393- else begin
9494- Logs.info (fun f -> f "Skipping existing video %s (use --overwrite to replace)"
9595- video.Bushel.Video.title);
9696- Lwt.return_unit
9797- end
9898- else
9999- (* If file doesn't exist, just write new data *)
100100- match Bushel.Video.to_file output_dir video with
101101- | Ok () ->
102102- Logs.info (fun f -> f "Wrote new video %s to %s"
103103- video.Bushel.Video.title file_path);
104104- Lwt.return_unit
105105- | Error (`Msg e) ->
106106- Logs.err (fun f -> f "Failed to write video %s: %s"
107107- video.Bushel.Video.title e);
108108- Lwt.return_unit
109109- ) vids
110110-111111-(* Command line arguments are now imported from Bushel_common *)
112112-113113-(* Export the term for use in main bushel.ml *)
114114-let term =
115115- let fetch_thumbs =
116116- let doc = "Download video thumbnails" in
117117- Arg.(value & flag & info ["fetch-thumbs"] ~doc)
118118- in
119119- let thumbs_dir =
120120- let doc = "Directory to save thumbnails (default: images/videos)" in
121121- Arg.(value & opt string "images/videos" & info ["thumbs-dir"] ~docv:"DIR" ~doc)
122122- in
123123- Term.(const (fun output_dir overwrite base_url channel fetch_thumbs thumbs_dir () ->
124124- Lwt_main.run (process_videos output_dir overwrite base_url channel fetch_thumbs thumbs_dir); 0)
125125- $ Bushel_common.output_dir ~default:"." $
126126- Bushel_common.overwrite $
127127- Bushel_common.url_term ~default:"https://crank.recoil.org" ~doc:"PeerTube base URL" $
128128- Bushel_common.channel ~default:"anil" $
129129- fetch_thumbs $
130130- thumbs_dir $
131131- Bushel_common.setup_term)
132132-133133-let cmd =
134134- let doc = "Fetch and process videos from PeerTube" in
135135- let info = Cmd.info "video" ~doc in
136136- Cmd.v info term
137137-138138-(* Main entry point removed - accessed through bushel_main.ml *)
-81
stack/bushel/bin/bushel_video_thumbs.ml
···11-[@@@warning "-26-27-32"]
22-33-open Lwt.Infix
44-open Cmdliner
55-66-let setup_log style_renderer level =
77- Fmt_tty.setup_std_outputs ?style_renderer ();
88- Logs.set_level level;
99- Logs.set_reporter (Logs_fmt.reporter ());
1010- ()
1111-1212-let process_video_thumbs videos_dir thumbs_dir base_url =
1313- (* Ensure thumbnail directory exists *)
1414- (if not (Sys.file_exists thumbs_dir) then
1515- Unix.mkdir thumbs_dir 0o755);
1616-1717- (* Read all video markdown files *)
1818- let video_files = Sys.readdir videos_dir
1919- |> Array.to_list
2020- |> List.filter (fun f -> Filename.check_suffix f ".md")
2121- |> List.map (fun f -> Filename.concat videos_dir f)
2222- in
2323-2424- Logs.info (fun f -> f "Found %d video files to process" (List.length video_files));
2525-2626- (* Process each video file *)
2727- Lwt_list.iter_s (fun video_file ->
2828- try
2929- (* Load existing video *)
3030- let video = Bushel.Video.of_md video_file in
3131- let uuid = video.Bushel.Video.uuid in
3232-3333- Logs.info (fun f -> f "Processing video: %s (UUID: %s)" video.title uuid);
3434-3535- (* Fetch video details from PeerTube to get thumbnail info *)
3636- Peertube.fetch_video_details base_url uuid >>= fun peertube_video ->
3737-3838- (* Download thumbnail *)
3939- let thumb_path = Filename.concat thumbs_dir (uuid ^ ".jpg") in
4040- Peertube.download_thumbnail base_url peertube_video thumb_path >>= fun result ->
4141-4242- match result with
4343- | Ok () ->
4444- Logs.info (fun f -> f "Downloaded thumbnail for %s to %s" video.title thumb_path);
4545-4646- (* Update video file with thumbnail_url field *)
4747- (match Peertube.thumbnail_url base_url peertube_video with
4848- | Some url ->
4949- Logs.info (fun f -> f "Thumbnail URL: %s" url);
5050- Lwt.return_unit
5151- | None ->
5252- Logs.warn (fun f -> f "No thumbnail URL for video %s" video.title);
5353- Lwt.return_unit)
5454- | Error (`Msg e) ->
5555- Logs.err (fun f -> f "Failed to download thumbnail for %s: %s" video.title e);
5656- Lwt.return_unit
5757- with exn ->
5858- Logs.err (fun f -> f "Error processing %s: %s" video_file (Printexc.to_string exn));
5959- Lwt.return_unit
6060- ) video_files
6161-6262-let term =
6363- let videos_dir =
6464- let doc = "Directory containing video markdown files" in
6565- Arg.(value & opt string "data/videos" & info ["videos-dir"; "d"] ~docv:"DIR" ~doc)
6666- in
6767- let thumbs_dir =
6868- let doc = "Directory to save thumbnails" in
6969- Arg.(value & opt string "images/videos" & info ["thumbs-dir"; "t"] ~docv:"DIR" ~doc)
7070- in
7171- Term.(const (fun videos_dir thumbs_dir base_url () ->
7272- Lwt_main.run (process_video_thumbs videos_dir thumbs_dir base_url); 0)
7373- $ videos_dir $
7474- thumbs_dir $
7575- Bushel_common.url_term ~default:"https://crank.recoil.org" ~doc:"PeerTube base URL" $
7676- Bushel_common.setup_term)
7777-7878-let cmd =
7979- let doc = "Download thumbnails for existing videos and update metadata" in
8080- let info = Cmd.info "video-thumbs" ~doc in
8181- Cmd.v info term
···11-type t =
22- { names : string list
33- ; handle : string
44- ; email : string option
55- ; icon : string option
66- ; github : string option
77- ; twitter : string option
88- ; bluesky : string option
99- ; mastodon : string option
1010- ; orcid : string option
1111- ; url : string option
1212- ; atom : string list option
1313- }
1414-1515-type ts = t list
1616-1717-let v ?email ?github ?twitter ?bluesky ?mastodon ?orcid ?icon ?url ?atom handle names =
1818- { names; handle; email; github; twitter; bluesky; mastodon; orcid; url; icon; atom }
1919-;;
2020-2121-let make names email icon github twitter bluesky mastodon orcid url atom =
2222- v ?email ?github ?twitter ?bluesky ?mastodon ?orcid ?icon ?url ?atom "" names
2323-;;
2424-2525-let names { names; _ } = names
2626-let name { names; _ } = List.hd names
2727-let handle { handle; _ } = handle
2828-let email { email; _ } = email
2929-let icon { icon; _ } = icon
3030-let github { github; _ } = github
3131-let twitter { twitter; _ } = twitter
3232-let bluesky { bluesky; _ } = bluesky
3333-let mastodon { mastodon; _ } = mastodon
3434-let orcid { orcid; _ } = orcid
3535-let url { url; _ } = url
3636-let atom { atom; _ } = atom
3737-3838-let json_t =
3939- let open Jsont in
4040- let open Jsont.Object in
4141- let mem_opt f v ~enc = mem f v ~dec_absent:None ~enc_omit:Option.is_none ~enc in
4242- map ~kind:"Contact" make
4343- |> mem "names" (list string) ~dec_absent:[] ~enc:names
4444- |> mem_opt "email" (some string) ~enc:email
4545- |> mem_opt "icon" (some string) ~enc:icon
4646- |> mem_opt "github" (some string) ~enc:github
4747- |> mem_opt "twitter" (some string) ~enc:twitter
4848- |> mem_opt "bluesky" (some string) ~enc:bluesky
4949- |> mem_opt "mastodon" (some string) ~enc:mastodon
5050- |> mem_opt "orcid" (some string) ~enc:orcid
5151- |> mem_opt "url" (some string) ~enc:url
5252- |> mem_opt "atom" (some (list string)) ~enc:atom
5353- |> finish
5454-;;
5555-5656-let v = Jsont_bytesrw.decode_string (Jsont.list json_t)
5757-let compare a b = String.compare a.handle b.handle
5858-let find_by_handle ts h = List.find_opt (fun { handle; _ } -> handle = h) ts
5959-6060-let best_url c =
6161- match c.url with
6262- | Some v -> Some v
6363- | None ->
6464- (match c.github with
6565- | Some v -> Some ("https://github.com/" ^ v)
6666- | None ->
6767- (match c.email with
6868- | Some v -> Some ("mailto:" ^ v)
6969- | None -> None))
7070-;;
7171-7272-let of_md fname =
7373- (* TODO fix Jekyll_post to not error on no date *)
7474- let fname' = "2000-01-01-" ^ Filename.basename fname in
7575- let handle = Filename.basename fname |> Filename.chop_extension in
7676- match Jekyll_post.of_string ~fname:fname' (Util.read_file fname) with
7777- | Error (`Msg m) -> failwith ("contact_of_md: " ^ m)
7878- | Ok jp ->
7979- let fields = jp.Jekyll_post.fields |> Jekyll_format.fields_to_yaml in
8080- let c = Jsont_bytesrw.decode_string json_t (Ezjsonm.value_to_string fields) in
8181- (match c with
8282- | Error e -> failwith e
8383- | Ok c -> { c with handle })
8484-;;
8585-8686-(* Given a name, turn it lowercase and return the concatenation of the
8787-initials of all the words in the name and the full last name. *)
8888-let handle_of_name name =
8989- let name = String.lowercase_ascii name in
9090- let words = String.split_on_char ' ' name in
9191- let initials = String.concat "" (List.map (fun w -> String.sub w 0 1) words) in
9292- initials ^ List.hd (List.rev words)
9393-;;
9494-9595-(* fuzzy lookup for an author. Strip out any non alpha numeric characters while
9696- searching for the name *)
9797-let lookup_by_name ts a =
9898- let a = String.lowercase_ascii a in
9999- let rec aux acc = function
100100- | [] -> acc
101101- | t :: ts ->
102102- if List.exists (fun n -> String.lowercase_ascii n = a) t.names
103103- then aux (t :: acc) ts
104104- else aux acc ts
105105- in
106106- match aux [] ts with
107107- | [ a ] -> a
108108- | [] -> raise (Failure ("contact.ml: author not found: " ^ a))
109109- | _ -> raise (Failure ("ambiguous author: " ^ a))
110110-;;
111111-112112-(* TODO:claude *)
113113-let typesense_schema =
114114- let open Ezjsonm in
115115- dict [
116116- ("name", string "contacts");
117117- ("fields", list (fun d -> dict d) [
118118- [("name", string "id"); ("type", string "string")];
119119- [("name", string "handle"); ("type", string "string")];
120120- [("name", string "name"); ("type", string "string")];
121121- [("name", string "names"); ("type", string "string[]"); ("optional", bool true)];
122122- [("name", string "email"); ("type", string "string[]"); ("optional", bool true)];
123123- [("name", string "icon"); ("type", string "string[]"); ("optional", bool true)];
124124- [("name", string "github"); ("type", string "string[]"); ("optional", bool true)];
125125- [("name", string "twitter"); ("type", string "string[]"); ("optional", bool true)];
126126- [("name", string "bluesky"); ("type", string "string[]"); ("optional", bool true)];
127127- [("name", string "mastodon"); ("type", string "string[]"); ("optional", bool true)];
128128- [("name", string "orcid"); ("type", string "string[]"); ("optional", bool true)];
129129- [("name", string "url"); ("type", string "string[]"); ("optional", bool true)];
130130- [("name", string "atom"); ("type", string "string[]"); ("optional", bool true)];
131131- ]);
132132- ]
133133-134134-(** TODO:claude Pretty-print a contact with ANSI formatting *)
135135-let pp ppf c =
136136- let open Fmt in
137137- pf ppf "@[<v>";
138138- pf ppf "%a: %a@," (styled `Bold string) "Type" (styled `Cyan string) "Contact";
139139- pf ppf "%a: @%a@," (styled `Bold string) "Handle" string (handle c);
140140- pf ppf "%a: %a@," (styled `Bold string) "Name" string (name c);
141141- let ns = names c in
142142- if List.length ns > 1 then
143143- pf ppf "%a: @[<h>%a@]@," (styled `Bold string) "Aliases" (list ~sep:comma string) (List.tl ns);
144144- (match email c with
145145- | Some e -> pf ppf "%a: %a@," (styled `Bold string) "Email" string e
146146- | None -> ());
147147- (match github c with
148148- | Some g -> pf ppf "%a: https://github.com/%a@," (styled `Bold string) "GitHub" string g
149149- | None -> ());
150150- (match twitter c with
151151- | Some t -> pf ppf "%a: https://twitter.com/%a@," (styled `Bold string) "Twitter" string t
152152- | None -> ());
153153- (match bluesky c with
154154- | Some b -> pf ppf "%a: %a@," (styled `Bold string) "Bluesky" string b
155155- | None -> ());
156156- (match mastodon c with
157157- | Some m -> pf ppf "%a: %a@," (styled `Bold string) "Mastodon" string m
158158- | None -> ());
159159- (match orcid c with
160160- | Some o -> pf ppf "%a: https://orcid.org/%a@," (styled `Bold string) "ORCID" string o
161161- | None -> ());
162162- (match url c with
163163- | Some u -> pf ppf "%a: %a@," (styled `Bold string) "URL" string u
164164- | None -> ());
165165- (match icon c with
166166- | Some i -> pf ppf "%a: %a@," (styled `Bold string) "Icon" string i
167167- | None -> ());
168168- (match atom c with
169169- | Some atoms when atoms <> [] ->
170170- pf ppf "%a: @[<h>%a@]@," (styled `Bold string) "Atom Feeds" (list ~sep:comma string) atoms
171171- | _ -> ());
172172- pf ppf "@]"
-25
stack/bushel/lib/contact.mli
···11-type t
22-type ts = t list
33-44-val v : string -> (ts, string) result
55-val names : t -> string list
66-val name : t -> string
77-val handle : t -> string
88-val email : t -> string option
99-val icon : t -> string option
1010-val github : t -> string option
1111-val twitter : t -> string option
1212-val bluesky : t -> string option
1313-val mastodon : t -> string option
1414-val orcid : t -> string option
1515-val url : t -> string option
1616-val atom : t -> string list option
1717-val best_url : t -> string option
1818-val find_by_handle : t list -> string -> t option
1919-val handle_of_name : string -> string
2020-val lookup_by_name : ts -> string -> t
2121-val json_t : t Jsont.t
2222-val compare : t -> t -> int
2323-val of_md : string -> t
2424-val typesense_schema : Ezjsonm.value
2525-val pp : Format.formatter -> t -> unit
-72
stack/bushel/lib/description.ml
···11-(** Generate descriptive text for bushel entries *)
22-33-(* Helper to format a date as "Month Year" *)
44-let format_date date =
55- let (year, month, _day) = date in
66- let month_name = match month with
77- | 1 -> "January" | 2 -> "February" | 3 -> "March" | 4 -> "April"
88- | 5 -> "May" | 6 -> "June" | 7 -> "July" | 8 -> "August"
99- | 9 -> "September" | 10 -> "October" | 11 -> "November" | 12 -> "December"
1010- | _ -> ""
1111- in
1212- Printf.sprintf "%s %d" month_name year
1313-1414-(* Generate a descriptive sentence for a paper *)
1515-let paper_description (p : Paper.t) ~date_str =
1616- let venue = match String.lowercase_ascii (Paper.bibtype p) with
1717- | "inproceedings" -> Paper.booktitle p
1818- | "article" -> Paper.journal p
1919- | "book" ->
2020- let pub = Paper.publisher p in
2121- if pub = "" then "Book" else "Book by " ^ pub
2222- | "techreport" ->
2323- (try "Technical report at " ^ Paper.institution p
2424- with _ -> "Technical report")
2525- | "misc" ->
2626- let pub = Paper.publisher p in
2727- if pub = "" then "Working paper" else "Working paper at " ^ pub
2828- | _ -> "Publication"
2929- in
3030- Printf.sprintf "Paper in %s (%s)" venue date_str
3131-3232-(* Generate a descriptive sentence for a note *)
3333-let note_description (n : Note.t) ~date_str ~lookup_fn =
3434- match Note.slug_ent n with
3535- | Some slug_ent ->
3636- (match lookup_fn slug_ent with
3737- | Some related_title ->
3838- Printf.sprintf "Note about %s (%s)" related_title date_str
3939- | None -> Printf.sprintf "Research note (%s)" date_str)
4040- | None -> Printf.sprintf "Research note (%s)" date_str
4141-4242-(* Generate a descriptive sentence for an idea *)
4343-let idea_description (i : Idea.t) ~date_str =
4444- let status_str = String.lowercase_ascii (Idea.status_to_string (Idea.status i)) in
4545- let level_str = Idea.level_to_string (Idea.level i) in
4646- Printf.sprintf "Research idea (%s, %s level, %s)" status_str level_str date_str
4747-4848-(* Generate a descriptive sentence for a video *)
4949-let video_description (v : Video.t) ~date_str ~lookup_fn =
5050- let video_type = if Video.talk v then "Talk video" else "Video" in
5151- let context = match Video.paper v with
5252- | Some paper_slug ->
5353- (match lookup_fn paper_slug with
5454- | Some title -> Printf.sprintf " about %s" title
5555- | None -> "")
5656- | None ->
5757- (match Video.project v with
5858- | Some project_slug ->
5959- (match lookup_fn project_slug with
6060- | Some title -> Printf.sprintf " about %s" title
6161- | None -> "")
6262- | None -> "")
6363- in
6464- Printf.sprintf "%s%s (%s)" video_type context date_str
6565-6666-(* Generate a descriptive sentence for a project *)
6767-let project_description (pr : Project.t) =
6868- let end_str = match pr.Project.finish with
6969- | Some year -> string_of_int year
7070- | None -> "present"
7171- in
7272- Printf.sprintf "Project (%d–%s)" pr.Project.start end_str
-19
stack/bushel/lib/description.mli
···11-(** Generate descriptive text for bushel entries *)
22-33-(** Format a date as "Month Year" *)
44-val format_date : int * int * int -> string
55-66-(** Generate a descriptive sentence for a paper with date string *)
77-val paper_description : Paper.t -> date_str:string -> string
88-99-(** Generate a descriptive sentence for a note with date string and lookup function *)
1010-val note_description : Note.t -> date_str:string -> lookup_fn:(string -> string option) -> string
1111-1212-(** Generate a descriptive sentence for an idea with date string *)
1313-val idea_description : Idea.t -> date_str:string -> string
1414-1515-(** Generate a descriptive sentence for a video with date string and lookup function *)
1616-val video_description : Video.t -> date_str:string -> lookup_fn:(string -> string option) -> string
1717-1818-(** Generate a descriptive sentence for a project *)
1919-val project_description : Project.t -> string
-147
stack/bushel/lib/doi_entry.ml
···11-module J = Ezjsonm
22-33-type status =
44- | Resolved
55- | Failed of string
66-77-type t = {
88- doi: string;
99- title: string;
1010- authors: string list;
1111- year: int;
1212- bibtype: string;
1313- publisher: string;
1414- resolved_at: string;
1515- source_urls: string list;
1616- status: status;
1717- ignore: bool;
1818-}
1919-2020-type ts = t list
2121-2222-let create_resolved ~doi ~title ~authors ~year ~bibtype ~publisher ?(source_urls=[]) () =
2323- let resolved_at =
2424- let now = Ptime_clock.now () in
2525- let rfc3339 = Ptime.to_rfc3339 ~space:false ~frac_s:0 now in
2626- String.sub rfc3339 0 10 (* Extract YYYY-MM-DD *)
2727- in
2828- { doi; title; authors; year; bibtype; publisher; resolved_at; source_urls; status = Resolved; ignore = false }
2929-3030-let create_failed ~doi ~error ?(source_urls=[]) () =
3131- let resolved_at =
3232- let now = Ptime_clock.now () in
3333- let rfc3339 = Ptime.to_rfc3339 ~space:false ~frac_s:0 now in
3434- String.sub rfc3339 0 10 (* Extract YYYY-MM-DD *)
3535- in
3636- { doi; title = ""; authors = []; year = 0; bibtype = ""; publisher = "";
3737- resolved_at; source_urls; status = Failed error; ignore = false }
3838-3939-let merge_entries old_entry new_entry =
4040- (* Combine source_urls, removing duplicates *)
4141- let combined_urls =
4242- List.sort_uniq String.compare (old_entry.source_urls @ new_entry.source_urls)
4343- in
4444- (* Use new_entry's data but with combined URLs and preserve ignore flag from old entry *)
4545- { new_entry with source_urls = combined_urls; ignore = old_entry.ignore }
4646-4747-let to_yaml_value entry =
4848- let status_field = match entry.status with
4949- | Resolved -> []
5050- | Failed err -> [("error", `String err)]
5151- in
5252- let source_urls_field = match entry.source_urls with
5353- | [] -> []
5454- | urls -> [("source_urls", `A (List.map (fun url -> `String url) urls))]
5555- in
5656- let ignore_field = if entry.ignore then [("ignore", `Bool true)] else [] in
5757- let fields = [
5858- ("doi", `String entry.doi);
5959- ("resolved_at", `String entry.resolved_at);
6060- ] @ status_field @ source_urls_field @ ignore_field in
6161- let fields = match entry.status with
6262- | Resolved ->
6363- fields @ [
6464- ("title", `String entry.title);
6565- ("authors", `A (List.map (fun a -> `String a) entry.authors));
6666- ("year", `Float (float_of_int entry.year));
6767- ("bibtype", `String entry.bibtype);
6868- ("publisher", `String entry.publisher);
6969- ]
7070- | Failed _ -> fields
7171- in
7272- `O fields
7373-7474-let of_yaml_value v =
7575- try
7676- let doi = J.find v ["doi"] |> J.get_string in
7777- let resolved_at = J.find v ["resolved_at"] |> J.get_string in
7878- (* Support both old source_url (single) and new source_urls (list) for backwards compatibility *)
7979- let source_urls =
8080- try
8181- J.find v ["source_urls"] |> J.get_list J.get_string
8282- with _ ->
8383- try
8484- let single_url = J.find v ["source_url"] |> J.get_string in
8585- [single_url]
8686- with _ -> []
8787- in
8888- let ignore = try J.find v ["ignore"] |> J.get_bool with _ -> false in
8989- let error = try Some (J.find v ["error"] |> J.get_string) with _ -> None in
9090- match error with
9191- | Some err ->
9292- { doi; title = ""; authors = []; year = 0; bibtype = ""; publisher = "";
9393- resolved_at; source_urls; status = Failed err; ignore }
9494- | None ->
9595- let title = J.find v ["title"] |> J.get_string in
9696- let authors = J.find v ["authors"] |> J.get_list J.get_string in
9797- let year = J.find v ["year"] |> J.get_float |> int_of_float in
9898- let bibtype = J.find v ["bibtype"] |> J.get_string in
9999- let publisher = J.find v ["publisher"] |> J.get_string in
100100- { doi; title; authors; year; bibtype; publisher; resolved_at; source_urls; status = Resolved; ignore }
101101- with e ->
102102- Printf.eprintf "Failed to parse DOI entry: %s\n%!" (Printexc.to_string e);
103103- failwith "Invalid DOI entry in YAML"
104104-105105-let load path =
106106- if not (Sys.file_exists path) then
107107- []
108108- else
109109- try
110110- let yaml_str = In_channel.with_open_text path In_channel.input_all in
111111- match Yaml.of_string yaml_str with
112112- | Ok (`A entries) -> List.map of_yaml_value entries
113113- | Ok _ -> []
114114- | Error (`Msg e) ->
115115- Printf.eprintf "Failed to parse %s: %s\n%!" path e;
116116- []
117117- with e ->
118118- Printf.eprintf "Failed to load %s: %s\n%!" path (Printexc.to_string e);
119119- []
120120-121121-let save path entries =
122122- let yaml_list = `A (List.map to_yaml_value entries) in
123123- let yaml_str = Yaml.to_string_exn yaml_list in
124124- Out_channel.with_open_text path (fun oc ->
125125- Out_channel.output_string oc yaml_str
126126- )
127127-128128-let to_map entries =
129129- let map = Hashtbl.create (List.length entries) in
130130- List.iter (fun entry -> Hashtbl.add map entry.doi entry) entries;
131131- map
132132-133133-let find_by_doi entries doi =
134134- List.find_opt (fun entry -> not entry.ignore && entry.doi = doi) entries
135135-136136-let find_by_url entries url =
137137- List.find_opt (fun entry ->
138138- not entry.ignore && List.mem url entry.source_urls
139139- ) entries
140140-141141-let find_by_doi_including_ignored entries doi =
142142- List.find_opt (fun entry -> entry.doi = doi) entries
143143-144144-let find_by_url_including_ignored entries url =
145145- List.find_opt (fun entry ->
146146- List.mem url entry.source_urls
147147- ) entries
-51
stack/bushel/lib/doi_entry.mli
···11-(** DOI entries resolved from external sources via Zotero Translation Server *)
22-33-type status =
44- | Resolved (** Successfully resolved from Zotero *)
55- | Failed of string (** Failed to resolve, with error message *)
66-77-type t = {
88- doi: string;
99- title: string;
1010- authors: string list;
1111- year: int;
1212- bibtype: string; (** article, inproceedings, book, etc *)
1313- publisher: string; (** journal/conference/publisher name *)
1414- resolved_at: string; (** ISO date when resolved *)
1515- source_urls: string list; (** All URLs that resolve to this DOI (publisher links, doi.org URLs, etc) *)
1616- status: status;
1717- ignore: bool; (** If true, skip this entry when looking up references *)
1818-}
1919-2020-type ts = t list
2121-2222-(** Load DOI entries from YAML file *)
2323-val load : string -> ts
2424-2525-(** Save DOI entries to YAML file *)
2626-val save : string -> ts -> unit
2727-2828-(** Convert list to hashtable for fast lookup by DOI *)
2929-val to_map : ts -> (string, t) Hashtbl.t
3030-3131-(** Find entry by DOI (excludes ignored entries) *)
3232-val find_by_doi : ts -> string -> t option
3333-3434-(** Find entry by source URL (searches through all source_urls, excludes ignored entries) *)
3535-val find_by_url : ts -> string -> t option
3636-3737-(** Find entry by DOI including ignored entries (for resolution checks) *)
3838-val find_by_doi_including_ignored : ts -> string -> t option
3939-4040-(** Find entry by source URL including ignored entries (for resolution checks) *)
4141-val find_by_url_including_ignored : ts -> string -> t option
4242-4343-(** Create a new resolved entry *)
4444-val create_resolved : doi:string -> title:string -> authors:string list ->
4545- year:int -> bibtype:string -> publisher:string -> ?source_urls:string list -> unit -> t
4646-4747-(** Create a new failed entry *)
4848-val create_failed : doi:string -> error:string -> ?source_urls:string list -> unit -> t
4949-5050-(** Merge two entries with the same DOI, combining their source_urls *)
5151-val merge_entries : t -> t -> t
···11-type entry =
22- [ `Paper of Paper.t
33- | `Project of Project.t
44- | `Idea of Idea.t
55- | `Video of Video.t
66- | `Note of Note.t
77- ]
88-99-type slugs = (string, entry) Hashtbl.t
1010-1111-type t =
1212- { slugs : slugs
1313- ; papers : Paper.ts
1414- ; old_papers : Paper.ts
1515- ; notes : Note.ts
1616- ; projects : Project.ts
1717- ; ideas : Idea.ts
1818- ; videos : Video.ts
1919- ; contacts : Contact.ts
2020- ; images : Srcsetter.ts
2121- ; doi_entries : Doi_entry.ts
2222- ; data_dir : string
2323- }
2424-2525-let contacts { contacts; _ } = contacts
2626-let videos { videos; _ } = videos
2727-let ideas { ideas; _ } = ideas
2828-let papers { papers; _ } = papers
2929-let notes { notes; _ } = notes
3030-let projects { projects; _ } = projects
3131-let images { images; _ } = images
3232-let doi_entries { doi_entries; _ } = doi_entries
3333-let data_dir { data_dir; _ } = data_dir
3434-3535-let v ~papers ~notes ~projects ~ideas ~videos ~contacts ~images ~data_dir =
3636- let slugs : slugs = Hashtbl.create 42 in
3737- let papers, old_papers = List.partition (fun p -> p.Paper.latest) papers in
3838- List.iter (fun n -> Hashtbl.add slugs n.Note.slug (`Note n)) notes;
3939- List.iter (fun p -> Hashtbl.add slugs p.Project.slug (`Project p)) projects;
4040- List.iter (fun i -> Hashtbl.add slugs i.Idea.slug (`Idea i)) ideas;
4141- List.iter (fun v -> Hashtbl.add slugs v.Video.slug (`Video v)) videos;
4242- List.iter (fun p -> Hashtbl.add slugs p.Paper.slug (`Paper p)) papers;
4343- (* Load DOI entries from doi.yml *)
4444- let doi_yml_path = Filename.concat data_dir "doi.yml" in
4545- let doi_entries = Doi_entry.load doi_yml_path in
4646- { slugs; papers; old_papers; notes; projects; ideas; videos; images; contacts; doi_entries; data_dir }
4747-;;
4848-4949-let lookup { slugs; _ } slug = Hashtbl.find_opt slugs slug
5050-let lookup_exn { slugs; _ } slug = Hashtbl.find slugs slug
5151-5252-let old_papers { old_papers; _ } = old_papers
5353-5454-let sidebar = function
5555- | `Note { Note.sidebar = Some s; _ } -> Some s
5656- | _ -> None
5757-;;
5858-5959-let to_type_string = function
6060- | `Paper _ -> "paper"
6161- | `Note _ -> "note"
6262- | `Project _ -> "project"
6363- | `Idea _ -> "idea"
6464- | `Video _ -> "video"
6565-;;
6666-6767-let synopsis = function
6868- | `Note n -> Note.synopsis n
6969- | _ -> None
7070-;;
7171-7272-let slug = function
7373- | `Paper p -> p.Paper.slug
7474- | `Note n -> n.Note.slug
7575- | `Project p -> p.Project.slug
7676- | `Idea i -> i.Idea.slug
7777- | `Video v -> v.Video.slug
7878-;;
7979-8080-let title = function
8181- | `Paper p -> Paper.title p
8282- | `Note n -> Note.title n
8383- | `Project p -> Project.title p
8484- | `Idea i -> Idea.title i
8585- | `Video v -> Video.title v
8686-;;
8787-8888-let body = function
8989- | `Paper _ -> ""
9090- | `Note n -> Note.body n
9191- | `Project p -> Project.body p
9292- | `Idea i -> Idea.body i
9393- | `Video _ -> ""
9494-;;
9595-9696-let site_url = function
9797- | `Paper p -> "/papers/" ^ p.Paper.slug
9898- | `Note n -> "/notes/" ^ n.Note.slug
9999- | `Project p -> "/projects/" ^ p.Project.slug
100100- | `Idea i -> "/ideas/" ^ i.Idea.slug
101101- | `Video v -> "/videos/" ^ v.Video.slug
102102-;;
103103-104104-(** Extract external URLs from markdown content *)
105105-let extract_external_links md =
106106- let open Cmarkit in
107107- let urls = ref [] in
108108-109109- let is_external_url url =
110110- (* XXX FIXME *)
111111- let is_bushel_slug = String.starts_with ~prefix:":" in
112112- let is_tag_slug = String.starts_with ~prefix:"##" in
113113- if is_bushel_slug url || is_tag_slug url then false
114114- else
115115- try
116116- let uri = Uri.of_string url in
117117- match Uri.scheme uri with
118118- | Some s when s = "http" || s = "https" -> true
119119- | Some _ -> true (* Any other scheme is considered external *)
120120- | None -> false (* Local references or relative paths *)
121121- with _ -> false
122122- in
123123-124124- let inline_mapper _ = function
125125- | Inline.Link (lb, _) | Inline.Image (lb, _) ->
126126- let ref = Inline.Link.reference lb in
127127- (match ref with
128128- | `Inline (ld, _) ->
129129- (match Link_definition.dest ld with
130130- | Some (url, _) when is_external_url url ->
131131- urls := url :: !urls;
132132- Mapper.default
133133- | _ -> Mapper.default)
134134- | `Ref (_, _, l) ->
135135- (* Get the referenced label definition and extract URL if it exists *)
136136- let defs = Doc.defs (Doc.of_string ~strict:false md) in
137137- (match Label.Map.find_opt (Label.key l) defs with
138138- | Some (Link_definition.Def (ld, _)) ->
139139- (match Link_definition.dest ld with
140140- | Some (url, _) when is_external_url url ->
141141- urls := url :: !urls
142142- | _ -> ())
143143- | _ -> ());
144144- Mapper.default)
145145- | Inline.Autolink (autolink, _) ->
146146- let url = Inline.Autolink.link autolink |> fst in
147147- if not (Inline.Autolink.is_email autolink) && is_external_url url then
148148- urls := url :: !urls;
149149- Mapper.default
150150- | _ -> Mapper.default
151151- in
152152-153153- let mapper = Mapper.make ~inline:inline_mapper () in
154154- let doc = Doc.of_string ~strict:false md in
155155- let _ = Mapper.map_doc mapper doc in
156156- List.sort_uniq String.compare !urls
157157-158158-let outgoing_links e = extract_external_links (body e)
159159-160160-let lookup_site_url t slug =
161161- match lookup t slug with
162162- | Some ent -> site_url ent
163163- | None -> ""
164164-165165-let lookup_title t slug =
166166- match lookup t slug with
167167- | Some ent -> title ent
168168- | None -> ""
169169-170170-171171-let date (x : entry) =
172172- match x with
173173- | `Paper p -> Paper.date p
174174- | `Note n -> Note.date n
175175- | `Project p -> p.Project.start, 1, 1
176176- | `Idea i -> i.Idea.year, i.Idea.month, 1
177177- | `Video v -> Video.date v
178178-;;
179179-180180-let datetime v = date v |> Ptime.of_date |> Option.get
181181-182182-let year x =
183183- match date x with
184184- | y, _, _ -> y
185185-;;
186186-187187-let is_index_entry = function
188188- | `Note { Note.index_page; _ } -> index_page
189189- | _ -> false
190190-;;
191191-192192-let notes_for_slug { notes; _ } slug =
193193- List.filter (fun n -> match Note.slug_ent n with Some s -> s = slug | None -> false) notes
194194-let all_entries { slugs; _ } = Hashtbl.fold (fun _ v acc -> v :: acc) slugs []
195195-196196-let all_papers { papers; old_papers; _ } =
197197- List.map (fun x -> `Paper x) (papers @ old_papers)
198198-;;
199199-200200-let compare a b =
201201- let datetime v = Option.get (Ptime.of_date v) in
202202- let da = datetime (date a) in
203203- let db = datetime (date b) in
204204- if da = db then compare (title a) (title b) else Ptime.compare da db
205205-;;
206206-207207-let lookup_by_name {contacts;_} n =
208208- match Contact.lookup_by_name contacts n with
209209- | v -> Some v
210210- | exception _ -> None
211211-212212-(** Extract the first image URL from markdown text *)
213213-let extract_first_image md =
214214- let open Cmarkit in
215215- (* Don't use bushel link resolver to avoid circular dependency *)
216216- let doc = Doc.of_string md in
217217- let found_image = ref None in
218218-219219- let find_image_in_inline _mapper = function
220220- | Inline.Image (img, _) ->
221221- (match Inline.Link.reference img with
222222- | `Inline (ld, _) ->
223223- (match Link_definition.dest ld with
224224- | Some (url, _) when !found_image = None ->
225225- found_image := Some url;
226226- Mapper.default
227227- | _ -> Mapper.default)
228228- | _ -> Mapper.default)
229229- | _ -> Mapper.default
230230- in
231231-232232- let mapper = Mapper.make ~inline:find_image_in_inline () in
233233- let _ = Mapper.map_doc mapper doc in
234234- !found_image
235235-;;
236236-237237-(** Extract the first video slug from markdown text by looking for bushel video links *)
238238-let extract_first_video entries md =
239239- let open Cmarkit in
240240- let doc = Doc.of_string md in
241241- let found_video = ref None in
242242-243243- let find_video_in_inline _mapper = function
244244- | Inline.Link (link, _) ->
245245- (match Inline.Link.reference link with
246246- | `Inline (ld, _) ->
247247- (match Link_definition.dest ld with
248248- | Some (url, _) when !found_video = None && String.starts_with ~prefix:":" url ->
249249- (* Check if this is a video slug *)
250250- let slug = String.sub url 1 (String.length url - 1) in
251251- (match lookup entries slug with
252252- | Some (`Video v) ->
253253- found_video := Some (Video.uuid v);
254254- Mapper.default
255255- | _ -> Mapper.default)
256256- | _ -> Mapper.default)
257257- | _ -> Mapper.default)
258258- | _ -> Mapper.default
259259- in
260260-261261- let mapper = Mapper.make ~inline:find_video_in_inline () in
262262- let _ = Mapper.map_doc mapper doc in
263263- !found_video
264264-;;
265265-266266-(** Look up an image in the srcsetter list by slug *)
267267-let lookup_image { images; _ } slug =
268268- List.find_opt (fun img -> Srcsetter.slug img = slug) images
269269-270270-(** Get the smallest webp variant from a srcsetter image - prefers size just above 480px *)
271271-let smallest_webp_variant img =
272272- let variants = Srcsetter.variants img in
273273- let webp_variants =
274274- Srcsetter.MS.bindings variants
275275- |> List.filter (fun (name, _) -> String.ends_with ~suffix:".webp" name)
276276- in
277277- match webp_variants with
278278- | [] ->
279279- (* No webp variants - use the name field which is always webp *)
280280- "/images/" ^ Srcsetter.name img
281281- | variants ->
282282- (* Prefer variants with width > 480px, choosing the smallest one above 480 *)
283283- let large_variants = List.filter (fun (_, (w, _)) -> w > 480) variants in
284284- let candidates = if large_variants = [] then variants else large_variants in
285285- (* Find the smallest variant from candidates *)
286286- let smallest = List.fold_left (fun acc (name, (w, h)) ->
287287- match acc with
288288- | None -> Some (name, w, h)
289289- | Some (_, min_w, _) when w < min_w -> Some (name, w, h)
290290- | _ -> acc
291291- ) None candidates in
292292- match smallest with
293293- | Some (name, _, _) -> "/images/" ^ name
294294- | None -> "/images/" ^ Srcsetter.name img
295295-296296-(** Get thumbnail slug for a contact *)
297297-let contact_thumbnail_slug contact =
298298- (* Contact images use just the handle as slug *)
299299- Some (Contact.handle contact)
300300-301301-(** Get thumbnail URL for a contact - resolved through srcsetter *)
302302-let contact_thumbnail entries contact =
303303- match contact_thumbnail_slug contact with
304304- | None -> None
305305- | Some thumb_slug ->
306306- match lookup_image entries thumb_slug with
307307- | Some img -> Some (smallest_webp_variant img)
308308- | None -> None (* Image not in srcsetter - thumbnails are optional *)
309309-310310-(** Get thumbnail slug for an entry with fallbacks *)
311311-let rec thumbnail_slug entries = function
312312- | `Paper p ->
313313- (* Slug is just the paper slug, directory is in the origin path *)
314314- Some (Paper.slug p)
315315-316316- | `Video v ->
317317- (* Videos use their UUID as the slug *)
318318- Some (Video.uuid v)
319319-320320- | `Project p ->
321321- (* Project images use "project-{slug}" format *)
322322- Some (Printf.sprintf "project-%s" p.Project.slug)
323323-324324- | `Idea i ->
325325- let is_active = match Idea.status i with
326326- | Idea.Available | Idea.Discussion | Idea.Ongoing -> true
327327- | Idea.Completed | Idea.Expired -> false
328328- in
329329- if is_active then
330330- (* Use first supervisor's face image *)
331331- let supervisors = Idea.supervisors i in
332332- match supervisors with
333333- | sup :: _ ->
334334- let handle = if String.length sup > 0 && sup.[0] = '@'
335335- then String.sub sup 1 (String.length sup - 1)
336336- else sup
337337- in
338338- (match Contact.find_by_handle (contacts entries) handle with
339339- | Some c ->
340340- (* Contact images use just the handle as slug *)
341341- Some (Contact.handle c)
342342- | None ->
343343- (* Fallback to project thumbnail *)
344344- let project_slug = Idea.project i in
345345- (match lookup entries project_slug with
346346- | Some p -> thumbnail_slug entries p
347347- | None -> None))
348348- | [] ->
349349- (* No supervisors, use project thumbnail *)
350350- let project_slug = Idea.project i in
351351- (match lookup entries project_slug with
352352- | Some p -> thumbnail_slug entries p
353353- | None -> None)
354354- else
355355- (* Use project thumbnail for completed/expired ideas *)
356356- let project_slug = Idea.project i in
357357- (match lookup entries project_slug with
358358- | Some p -> thumbnail_slug entries p
359359- | None -> None)
360360-361361- | `Note n ->
362362- (* Use titleimage if set, otherwise extract first image from body, then try video, otherwise use slug_ent's thumbnail *)
363363- (match Note.titleimage n with
364364- | Some slug ->
365365- (* Always treat titleimage as a bushel slug (without ':' prefix) *)
366366- Some slug
367367- | None ->
368368- (* Extract first image from markdown body *)
369369- match extract_first_image (Note.body n) with
370370- | Some url when String.starts_with ~prefix:":" url ->
371371- Some (String.sub url 1 (String.length url - 1))
372372- | Some _ -> None
373373- | None ->
374374- (* Try extracting first video from markdown body *)
375375- match extract_first_video entries (Note.body n) with
376376- | Some video_uuid -> Some video_uuid
377377- | None ->
378378- (* Fallback to slug_ent's thumbnail if present *)
379379- match Note.slug_ent n with
380380- | Some slug_ent ->
381381- (match lookup entries slug_ent with
382382- | Some entry -> thumbnail_slug entries entry
383383- | None -> None)
384384- | None -> None)
385385-386386-(** Get thumbnail URL for an entry with fallbacks - resolved through srcsetter *)
387387-let thumbnail entries entry =
388388- match thumbnail_slug entries entry with
389389- | None -> None
390390- | Some thumb_slug ->
391391- match lookup_image entries thumb_slug with
392392- | Some img -> Some (smallest_webp_variant img)
393393- | None ->
394394- (* For projects, fallback to supervisor faces if project image doesn't exist *)
395395- (match entry with
396396- | `Project p ->
397397- (* Find ideas for this project *)
398398- let project_ideas = List.filter (fun idea ->
399399- Idea.project idea = ":" ^ p.Project.slug
400400- ) (ideas entries) in
401401- (* Collect all unique supervisors from these ideas *)
402402- let all_supervisors =
403403- List.fold_left (fun acc idea ->
404404- List.fold_left (fun acc2 sup ->
405405- if List.mem sup acc2 then acc2 else sup :: acc2
406406- ) acc (Idea.supervisors idea)
407407- ) [] project_ideas
408408- in
409409- (* Split into avsm and others, preferring others first *)
410410- let (others, avsm) = List.partition (fun sup ->
411411- let handle = if String.length sup > 0 && sup.[0] = '@'
412412- then String.sub sup 1 (String.length sup - 1)
413413- else sup
414414- in
415415- handle <> "avsm"
416416- ) all_supervisors in
417417- (* Try supervisors in order: others first, then avsm *)
418418- let ordered_supervisors = others @ avsm in
419419- (* Try each supervisor's face image *)
420420- let rec try_supervisors = function
421421- | [] -> None
422422- | sup :: rest ->
423423- let handle = if String.length sup > 0 && sup.[0] = '@'
424424- then String.sub sup 1 (String.length sup - 1)
425425- else sup
426426- in
427427- (match Contact.find_by_handle (contacts entries) handle with
428428- | Some c ->
429429- (match lookup_image entries (Contact.handle c) with
430430- | Some img -> Some (smallest_webp_variant img)
431431- | None -> try_supervisors rest)
432432- | None -> try_supervisors rest)
433433- in
434434- try_supervisors ordered_supervisors
435435- | _ -> None)
436436-437437-(** Get thumbnail URL for a note with slug_ent *)
438438-let thumbnail_note_with_ent entries note_item =
439439- (* Use linked entry's thumbnail if slug_ent is set *)
440440- match Note.slug_ent note_item with
441441- | Some slug_ent ->
442442- (match lookup entries (":" ^ slug_ent) with
443443- | Some entry -> thumbnail entries entry
444444- | None ->
445445- (* Fallback to extracting first image from note body *)
446446- extract_first_image (Note.body note_item))
447447- | None ->
448448- (* No slug_ent, extract from note body *)
449449- extract_first_image (Note.body note_item)
-79
stack/bushel/lib/entry.mli
···11-type entry =
22- [ `Idea of Idea.t
33- | `Note of Note.t
44- | `Paper of Paper.t
55- | `Project of Project.t
66- | `Video of Video.t
77- ]
88-99-type slugs = (string, entry) Hashtbl.t
1010-type t
1111-1212-val contacts : t -> Contact.ts
1313-val videos : t -> Video.ts
1414-val ideas : t -> Idea.ts
1515-val papers : t -> Paper.ts
1616-val notes : t -> Note.ts
1717-val projects : t -> Project.ts
1818-val images : t -> Srcsetter.ts
1919-val doi_entries : t -> Doi_entry.ts
2020-val data_dir : t -> string
2121-2222-val v
2323- : papers:Paper.t list
2424- -> notes:Note.ts
2525- -> projects:Project.ts
2626- -> ideas:Idea.ts
2727- -> videos:Video.ts
2828- -> contacts:Contact.ts
2929- -> images:Srcsetter.ts
3030- -> data_dir:string
3131- -> t
3232-3333-val lookup : t -> string -> entry option
3434-val lookup_exn : t -> string -> entry
3535-val lookup_site_url : t -> string -> string
3636-val lookup_title : t -> string -> string
3737-val lookup_by_name : t -> string -> Contact.t option
3838-val old_papers : t -> Paper.ts
3939-val sidebar : [> `Note of Note.t ] -> string option
4040-val to_type_string : entry -> string
4141-val slug : entry -> string
4242-val title : entry -> string
4343-val body : entry -> string
4444-val extract_external_links : string -> string list
4545-val outgoing_links : entry -> string list
4646-4747-(* FIXME move to view *)
4848-val site_url : entry -> string
4949-val date : entry -> Ptime.date
5050-val datetime : entry -> Ptime.t
5151-val year : entry -> int
5252-val synopsis : entry -> string option
5353-5454-val is_index_entry : entry -> bool
5555-val notes_for_slug : t -> string -> Note.t list
5656-val all_entries : t -> entry list
5757-val all_papers : t -> entry list
5858-val compare : entry -> entry -> int
5959-6060-(** Look up an image in the srcsetter list by slug *)
6161-val lookup_image : t -> string -> Srcsetter.t option
6262-6363-(** Get the smallest webp variant from a srcsetter image *)
6464-val smallest_webp_variant : Srcsetter.t -> string
6565-6666-(** Get thumbnail slug for a contact *)
6767-val contact_thumbnail_slug : Contact.t -> string option
6868-6969-(** Get thumbnail URL for a contact - resolved through srcsetter *)
7070-val contact_thumbnail : t -> Contact.t -> string option
7171-7272-(** Get thumbnail slug for an entry with fallbacks *)
7373-val thumbnail_slug : t -> entry -> string option
7474-7575-(** Get thumbnail URL for an entry with fallbacks - resolved through srcsetter *)
7676-val thumbnail : t -> entry -> string option
7777-7878-(** Get thumbnail URL for a note with slug_ent *)
7979-val thumbnail_note_with_ent : t -> Note.t -> string option
-223
stack/bushel/lib/idea.ml
···11-type level =
22- | Any
33- | PartII
44- | MPhil
55- | PhD
66- | Postdoc
77-88-let level_of_yaml = function
99- | `String ("Any" | "any") -> Ok Any
1010- | `String ("PartII" | "partii") -> Ok PartII
1111- | `String ("MPhil" | "mphil") -> Ok MPhil
1212- | `String ("PhD" | "phd") -> Ok PhD
1313- | `String ("postdoc" | "Postdoc") -> Ok Postdoc
1414- | _ -> Error (`Msg "level_of_yaml")
1515-;;
1616-1717-let level_to_string = function
1818- | Any -> "Any"
1919- | PartII -> "PartII"
2020- | MPhil -> "MPhil"
2121- | PhD -> "PhD"
2222- | Postdoc -> "postdoctoral"
2323-;;
2424-2525-let level_to_tag = function
2626- | Any -> "idea-beginner"
2727- | PartII -> "idea-medium"
2828- | MPhil -> "idea-hard"
2929- | PhD -> "idea-phd"
3030- | Postdoc -> "idea-postdoc"
3131-;;
3232-3333-let level_to_yaml s = `String (level_to_string s)
3434-3535-type status =
3636- | Available
3737- | Discussion
3838- | Ongoing
3939- | Completed
4040- | Expired
4141-4242-let status_of_yaml = function
4343- | `String ("Available" | "available") -> Ok Available
4444- | `String ("Discussion" | "discussion") -> Ok Discussion
4545- | `String ("Ongoing" | "ongoing") -> Ok Ongoing
4646- | `String ("Completed" | "completed") -> Ok Completed
4747- | `String ("Expired" | "expired") -> Ok Expired
4848- | _ -> Error (`Msg "status_of_yaml")
4949-;;
5050-5151-let status_to_string = function
5252- | Available -> "Available"
5353- | Discussion -> "Discussion"
5454- | Ongoing -> "Ongoing"
5555- | Completed -> "Completed"
5656- | Expired -> "Expired"
5757-;;
5858-5959-let status_to_tag = function
6060- | Available -> "idea-available"
6161- | Discussion -> "idea-discuss"
6262- | Ongoing -> "idea-ongoing"
6363- | Completed -> "idea-done"
6464- | Expired -> "idea-expired"
6565-;;
6666-6767-let status_to_yaml s = `String (status_to_string s)
6868-6969-type t =
7070- { slug : string
7171- ; title : string
7272- ; level : level
7373- ; project : string
7474- ; status : status
7575- ; month: int
7676- ; year : int
7777- ; supervisors : string list
7878- ; students : string list
7979- ; reading : string
8080- ; body : string
8181- ; url : string option
8282- ; tags : string list
8383- }
8484-8585-type ts = t list
8686-8787-let title i = i.title
8888-let supervisors i = i.supervisors
8989-let students i = i.students
9090-let reading i = i.reading
9191-let status i = i.status
9292-let level i = i.level
9393-let year i = i.year
9494-let body i = i.body
9595-let project i = i.project
9696-9797-let compare a b =
9898- match compare a.status b.status with
9999- | 0 ->
100100- (match a.status with
101101- | Completed -> compare b.year a.year
102102- | _ ->
103103- (match compare a.level b.level with
104104- | 0 -> begin
105105- match compare b.year a.year with
106106- | 0 -> compare b.month a.month
107107- | n -> n
108108- end
109109- | n -> n))
110110- | n -> n
111111-;;
112112-113113-let of_md fname =
114114- match Jekyll_post.of_string ~fname:(Filename.basename fname) (Util.read_file fname) with
115115- | Error _ -> failwith "TODO"
116116- | Ok jp ->
117117- let fields = jp.Jekyll_post.fields in
118118- let y = Jekyll_format.fields_to_yaml fields in
119119- let year, month, _ = jp.Jekyll_post.date |> Ptime.to_date in
120120- let body = jp.Jekyll_post.body in
121121- let string f = Yaml.Util.(find_exn f y |> Option.get |> to_string |> Result.get_ok) in
122122- let string' f d =
123123- try Yaml.Util.(find_exn f y |> Option.get |> to_string |> Result.get_ok) with
124124- | _ -> d
125125- in
126126- let to_list = function
127127- | `A l -> Ok l
128128- | _ -> Error (`Msg "to_list")
129129- in
130130- let strings f =
131131- try
132132- Yaml.Util.(
133133- find_exn f y
134134- |> Option.get
135135- |> to_list
136136- |> Result.get_ok
137137- |> List.map (fun x -> to_string x |> Result.get_ok))
138138- with
139139- | _exn -> []
140140- in
141141- let level =
142142- Yaml.Util.(find_exn "level" y |> Option.get |> level_of_yaml |> Result.get_ok)
143143- in
144144- let status =
145145- Yaml.Util.(find_exn "status" y |> Option.get |> status_of_yaml |> Result.get_ok)
146146- in
147147- let slug = jp.Jekyll_post.slug in
148148- { slug
149149- ; title = string "title"
150150- ; level
151151- ; project = string "project"
152152- ; status
153153- ; supervisors = strings "supervisors"
154154- ; students = strings "students"
155155- ; tags = strings "tags"
156156- ; reading = string' "reading" ""
157157- ; month
158158- ; year
159159- ; body
160160- ; url = None (* TODO *)
161161- }
162162-;;
163163-164164-let lookup ideas slug = List.find_opt (fun i -> i.slug = slug) ideas
165165-166166-(* TODO:claude *)
167167-let typesense_schema =
168168- let open Ezjsonm in
169169- dict [
170170- ("name", string "ideas");
171171- ("fields", list (fun d -> dict d) [
172172- [("name", string "id"); ("type", string "string")];
173173- [("name", string "title"); ("type", string "string")];
174174- [("name", string "description"); ("type", string "string")];
175175- [("name", string "year"); ("type", string "int32")];
176176- [("name", string "date"); ("type", string "string")];
177177- [("name", string "date_timestamp"); ("type", string "int64")];
178178- [("name", string "tags"); ("type", string "string[]"); ("facet", bool true)];
179179- [("name", string "level"); ("type", string "string"); ("facet", bool true)];
180180- [("name", string "status"); ("type", string "string"); ("facet", bool true)];
181181- [("name", string "project"); ("type", string "string"); ("facet", bool true)];
182182- [("name", string "supervisors"); ("type", string "string[]"); ("optional", bool true)];
183183- [("name", string "body"); ("type", string "string"); ("optional", bool true)];
184184- [("name", string "students"); ("type", string "string[]"); ("optional", bool true)];
185185- [("name", string "reading"); ("type", string "string"); ("optional", bool true)];
186186- [("name", string "url"); ("type", string "string"); ("optional", bool true)];
187187- ]);
188188- ("default_sorting_field", string "date_timestamp");
189189- ]
190190-191191-(** TODO:claude Pretty-print an idea with ANSI formatting *)
192192-let pp ppf i =
193193- let open Fmt in
194194- pf ppf "@[<v>";
195195- pf ppf "%a: %a@," (styled `Bold string) "Type" (styled `Cyan string) "Idea";
196196- pf ppf "%a: %a@," (styled `Bold string) "Slug" string i.slug;
197197- pf ppf "%a: %a@," (styled `Bold string) "Title" string (title i);
198198- pf ppf "%a: %a@," (styled `Bold string) "Level" string (level_to_string (level i));
199199- pf ppf "%a: %a@," (styled `Bold string) "Status" string (status_to_string (status i));
200200- pf ppf "%a: %a@," (styled `Bold string) "Project" string (project i);
201201- pf ppf "%a: %04d-%02d@," (styled `Bold string) "Date" (year i) i.month;
202202- let sups = supervisors i in
203203- if sups <> [] then
204204- pf ppf "%a: @[<h>%a@]@," (styled `Bold string) "Supervisors" (list ~sep:comma string) sups;
205205- let studs = students i in
206206- if studs <> [] then
207207- pf ppf "%a: @[<h>%a@]@," (styled `Bold string) "Students" (list ~sep:comma string) studs;
208208- (match i.url with
209209- | Some url -> pf ppf "%a: %a@," (styled `Bold string) "URL" string url
210210- | None -> ());
211211- let t = i.tags in
212212- if t <> [] then
213213- pf ppf "%a: @[<h>%a@]@," (styled `Bold string) "Tags" (list ~sep:comma string) t;
214214- let r = reading i in
215215- if r <> "" then begin
216216- pf ppf "@,";
217217- pf ppf "%a:@," (styled `Bold string) "Reading";
218218- pf ppf "%a@," string r;
219219- end;
220220- pf ppf "@,";
221221- pf ppf "%a:@," (styled `Bold string) "Body";
222222- pf ppf "%a@," string (body i);
223223- pf ppf "@]"
-55
stack/bushel/lib/idea.mli
···11-type level =
22- | Any
33- | PartII
44- | MPhil
55- | PhD
66- | Postdoc
77-88-type status =
99- | Available
1010- | Discussion
1111- | Ongoing
1212- | Completed
1313- | Expired
1414-1515-val level_of_yaml : Ezjsonm.value -> (level, [> `Msg of string ]) result
1616-val level_to_string : level -> string
1717-val level_to_tag : level -> string
1818-val level_to_yaml : level -> Ezjsonm.value
1919-val status_of_yaml : Ezjsonm.value -> (status, [> `Msg of string ]) result
2020-val status_to_string : status -> string
2121-val status_to_tag : status -> string
2222-val status_to_yaml : status -> Ezjsonm.value
2323-2424-type t =
2525- { slug : string
2626- ; title : string
2727- ; level : level
2828- ; project : string
2929- ; status : status
3030- ; month : int
3131- ; year : int
3232- ; supervisors : string list
3333- ; students : string list
3434- ; reading : string
3535- ; body : string
3636- ; url : string option
3737- ; tags : string list
3838- }
3939-4040-type ts = t list
4141-4242-val title : t -> string
4343-val supervisors : t -> string list
4444-val students : t -> string list
4545-val reading : t -> string
4646-val status : t -> status
4747-val level : t -> level
4848-val year : t -> int
4949-val body : t -> string
5050-val project : t -> string
5151-val compare : t -> t -> int
5252-val lookup : t list -> string -> t option
5353-val of_md : string -> t
5454-val typesense_schema : Ezjsonm.value
5555-val pp : Format.formatter -> t -> unit
-296
stack/bushel/lib/link.ml
···11-type karakeep_data = {
22- remote_url : string;
33- id : string;
44- tags : string list;
55- metadata : (string * string) list;
66-}
77-88-type bushel_data = {
99- slugs : string list;
1010- tags : string list;
1111-}
1212-1313-type t = {
1414- url : string;
1515- date : Ptime.date;
1616- description : string;
1717- karakeep : karakeep_data option;
1818- bushel : bushel_data option;
1919-}
2020-2121-type ts = t list
2222-2323-let url { url; _ } = url
2424-let date { date; _ } = date
2525-let description { description; _ } = description
2626-let datetime v = Option.get @@ Ptime.of_date @@ date v
2727-let compare a b = Ptime.compare (datetime b) (datetime a)
2828-2929-(* Convert YAML to Link.t *)
3030-let t_of_yaml = function
3131- | `O fields ->
3232- let url =
3333- match List.assoc_opt "url" fields with
3434- | Some (`String v) -> v
3535- | _ -> failwith "link: missing or invalid url"
3636- in
3737- let date =
3838- match List.assoc_opt "date" fields with
3939- | Some (`String v) -> begin
4040- try
4141- match Scanf.sscanf v "%04d-%02d-%02d" (fun y m d -> (y, m, d)) with
4242- | (y, m, d) -> (y, m, d)
4343- with _ ->
4444- (* Fall back to RFC3339 parsing for backward compatibility *)
4545- v |> Ptime.of_rfc3339 |> Result.get_ok |> fun (a, _, _) -> Ptime.to_date a
4646- end
4747- | _ -> failwith "link: missing or invalid date"
4848- in
4949- let description =
5050- match List.assoc_opt "description" fields with
5151- | Some (`String v) -> v
5252- | _ -> ""
5353- in
5454- let karakeep =
5555- match List.assoc_opt "karakeep" fields with
5656- | Some (`O k_fields) ->
5757- let remote_url =
5858- match List.assoc_opt "remote_url" k_fields with
5959- | Some (`String v) -> v
6060- | _ -> failwith "link: invalid karakeep.remote_url"
6161- in
6262- let id =
6363- match List.assoc_opt "id" k_fields with
6464- | Some (`String v) -> v
6565- | _ -> failwith "link: invalid karakeep.id"
6666- in
6767- let tags =
6868- match List.assoc_opt "tags" k_fields with
6969- | Some (`A tag_list) ->
7070- List.fold_left (fun acc tag ->
7171- match tag with
7272- | `String t -> t :: acc
7373- | _ -> acc
7474- ) [] tag_list
7575- |> List.rev
7676- | _ -> []
7777- in
7878- let metadata =
7979- match List.assoc_opt "metadata" k_fields with
8080- | Some (`O meta_fields) ->
8181- List.fold_left (fun acc (k, v) ->
8282- match v with
8383- | `String value -> (k, value) :: acc
8484- | _ -> acc
8585- ) [] meta_fields
8686- | _ -> []
8787- in
8888- Some { remote_url; id; tags; metadata }
8989- | _ -> None
9090- in
9191- let bushel =
9292- match List.assoc_opt "bushel" fields with
9393- | Some (`O b_fields) ->
9494- let slugs =
9595- match List.assoc_opt "slugs" b_fields with
9696- | Some (`A slug_list) ->
9797- List.fold_left (fun acc slug ->
9898- match slug with
9999- | `String s -> s :: acc
100100- | _ -> acc
101101- ) [] slug_list
102102- |> List.rev
103103- | _ -> []
104104- in
105105- let tags =
106106- match List.assoc_opt "tags" b_fields with
107107- | Some (`A tag_list) ->
108108- List.fold_left (fun acc tag ->
109109- match tag with
110110- | `String t -> t :: acc
111111- | _ -> acc
112112- ) [] tag_list
113113- |> List.rev
114114- | _ -> []
115115- in
116116- Some { slugs; tags }
117117- | _ -> None
118118- in
119119- { url; date; description; karakeep; bushel }
120120- | _ -> failwith "invalid yaml"
121121-122122-(* Read file contents *)
123123-let read_file file = In_channel.(with_open_bin file input_all)
124124-125125-(* Load links from a YAML file *)
126126-let of_md fname =
127127- match Yaml.of_string_exn (read_file fname) with
128128- | `A links ->
129129- List.map t_of_yaml links
130130- | `O _ as single_link ->
131131- [t_of_yaml single_link]
132132- | _ -> failwith "link_of_md: expected array or object"
133133-134134-(* Convert Link.t to YAML *)
135135-let to_yaml t =
136136- let (year, month, day) = t.date in
137137- let date_str = Printf.sprintf "%04d-%02d-%02d" year month day in
138138-139139- (* Create base fields *)
140140- let base_fields = [
141141- ("url", `String t.url);
142142- ("date", `String date_str);
143143- ] @
144144- (if t.description = "" then [] else [("description", `String t.description)])
145145- in
146146-147147- (* Add karakeep data if present *)
148148- let karakeep_fields =
149149- match t.karakeep with
150150- | Some { remote_url; id; tags; metadata } ->
151151- let karakeep_obj = [
152152- ("remote_url", `String remote_url);
153153- ("id", `String id);
154154- ] in
155155- let karakeep_obj =
156156- if tags = [] then karakeep_obj
157157- else ("tags", `A (List.map (fun t -> `String t) tags)) :: karakeep_obj
158158- in
159159- let karakeep_obj =
160160- if metadata = [] then karakeep_obj
161161- else ("metadata", `O (List.map (fun (k, v) -> (k, `String v)) metadata)) :: karakeep_obj
162162- in
163163- [("karakeep", `O karakeep_obj)]
164164- | None -> []
165165- in
166166-167167- (* Add bushel data if present *)
168168- let bushel_fields =
169169- match t.bushel with
170170- | Some { slugs; tags } ->
171171- let bushel_obj = [] in
172172- let bushel_obj =
173173- if slugs = [] then bushel_obj
174174- else ("slugs", `A (List.map (fun s -> `String s) slugs)) :: bushel_obj
175175- in
176176- let bushel_obj =
177177- if tags = [] then bushel_obj
178178- else ("tags", `A (List.map (fun t -> `String t) tags)) :: bushel_obj
179179- in
180180- if bushel_obj = [] then [] else [("bushel", `O bushel_obj)]
181181- | None -> []
182182- in
183183-184184- `O (base_fields @ karakeep_fields @ bushel_fields)
185185-186186-(* Write a link to a file in the output directory *)
187187-let to_file output_dir t =
188188- let filename =
189189- let (y, m, d) = t.date in
190190- let hash = Digest.string t.url |> Digest.to_hex in
191191- let short_hash = String.sub hash 0 8 in
192192- Printf.sprintf "%04d-%02d-%02d-%s.md" y m d short_hash
193193- in
194194- let file_path = Fpath.v (Filename.concat output_dir filename) in
195195- let yaml = to_yaml t in
196196- let yaml_str = Yaml.to_string_exn yaml in
197197- let content = "---\n" ^ yaml_str ^ "---\n" in
198198- Bos.OS.File.write file_path content
199199-200200-(* Load links from a YAML file *)
201201-let load_links_file path =
202202- try
203203- let yaml_str = In_channel.(with_open_bin path input_all) in
204204- match Yaml.of_string_exn yaml_str with
205205- | `A links -> List.map t_of_yaml links
206206- | _ -> []
207207- with _ -> []
208208-209209-(* Save links to a YAML file *)
210210-let save_links_file path links =
211211- try
212212- let yaml = `A (List.map to_yaml links) in
213213- let yaml_str = Yaml.to_string_exn ~len:4200000 yaml in
214214- let oc = open_out path in
215215- output_string oc yaml_str;
216216- close_out oc
217217- with e ->
218218- Printf.eprintf "Error saving links file: %s\n%!" (Printexc.to_string e);
219219- Printf.eprintf "Attempting to save with smaller length limit...\n%!";
220220- let yaml = `A (List.map to_yaml links) in
221221- let yaml_str = Yaml.to_string_exn ~len:800000 yaml in
222222- let oc = open_out path in
223223- output_string oc yaml_str;
224224- close_out oc
225225-226226-(* Merge two lists of links, combining metadata from duplicates *)
227227-let merge_links ?(prefer_new_date=false) existing new_links =
228228- let links_by_url = Hashtbl.create (List.length existing) in
229229-230230- (* Add existing links to hashtable *)
231231- List.iter (fun link ->
232232- Hashtbl.replace links_by_url link.url link
233233- ) existing;
234234-235235- (* Merge new links with existing ones *)
236236- List.iter (fun new_link ->
237237- match Hashtbl.find_opt links_by_url new_link.url with
238238- | None ->
239239- (* New link not in existing links *)
240240- Hashtbl.add links_by_url new_link.url new_link
241241- | Some old_link ->
242242- (* Merge link data, prefer newer data for fields *)
243243- let title =
244244- if new_link.description <> "" then new_link.description
245245- else old_link.description
246246- in
247247-248248- (* Combine karakeep data (prefer new over old) *)
249249- let karakeep =
250250- match new_link.karakeep, old_link.karakeep with
251251- | Some new_k, Some old_k when new_k.remote_url = old_k.remote_url ->
252252- (* Same remote, merge the data *)
253253- let merged_metadata =
254254- let meta_tbl = Hashtbl.create (List.length old_k.metadata) in
255255- List.iter (fun (k, v) -> Hashtbl.replace meta_tbl k v) old_k.metadata;
256256- List.iter (fun (k, v) -> Hashtbl.replace meta_tbl k v) new_k.metadata;
257257- Hashtbl.fold (fun k v acc -> (k, v) :: acc) meta_tbl []
258258- in
259259- let merged_tags = List.sort_uniq String.compare (old_k.tags @ new_k.tags) in
260260- Some { new_k with metadata = merged_metadata; tags = merged_tags }
261261- | Some new_k, _ -> Some new_k
262262- | None, old_k -> old_k
263263- in
264264-265265- (* Combine bushel data *)
266266- let bushel =
267267- match new_link.bushel, old_link.bushel with
268268- | Some new_b, Some old_b ->
269269- (* Merge slugs and tags *)
270270- let merged_slugs = List.sort_uniq String.compare (old_b.slugs @ new_b.slugs) in
271271- let merged_tags = List.sort_uniq String.compare (old_b.tags @ new_b.tags) in
272272- Some { slugs = merged_slugs; tags = merged_tags }
273273- | Some new_b, _ -> Some new_b
274274- | None, old_b -> old_b
275275- in
276276-277277- (* Combined link - prefer new date when requested (for bushel entries) *)
278278- let date =
279279- if prefer_new_date then new_link.date
280280- else if compare new_link old_link > 0 then new_link.date
281281- else old_link.date
282282- in
283283- let merged_link = {
284284- url = new_link.url;
285285- date;
286286- description = title;
287287- karakeep;
288288- bushel
289289- } in
290290- Hashtbl.replace links_by_url new_link.url merged_link
291291- ) new_links;
292292-293293- (* Convert hashtable back to list and sort by date *)
294294- Hashtbl.to_seq_values links_by_url
295295- |> List.of_seq
296296- |> List.sort compare
-34
stack/bushel/lib/link.mli
···11-type karakeep_data = {
22- remote_url : string;
33- id : string;
44- tags : string list;
55- metadata : (string * string) list;
66-}
77-88-type bushel_data = {
99- slugs : string list;
1010- tags : string list;
1111-}
1212-1313-type t = {
1414- url : string;
1515- date : Ptime.date;
1616- description : string;
1717- karakeep : karakeep_data option;
1818- bushel : bushel_data option;
1919-}
2020-2121-type ts = t list
2222-2323-val compare : t -> t -> int
2424-val url : t -> string
2525-val date : t -> Ptime.date
2626-val datetime : t -> Ptime.t
2727-val description : t -> string
2828-val of_md : string -> ts
2929-val to_yaml : t -> Yaml.value
3030-val t_of_yaml : Yaml.value -> t
3131-val to_file : string -> t -> (unit, [> `Msg of string]) result
3232-val load_links_file : string -> ts
3333-val save_links_file : string -> ts -> unit
3434-val merge_links : ?prefer_new_date:bool -> ts -> ts -> ts
-317
stack/bushel/lib/link_graph.ml
···11-module StringSet = Set.Make(String)
22-33-type entry_type = [ `Paper | `Project | `Note | `Idea | `Video | `Contact ]
44-55-type internal_link = {
66- source: string;
77- target: string;
88- target_type: entry_type;
99-}
1010-1111-type external_link = {
1212- source: string;
1313- domain: string;
1414- url: string;
1515-}
1616-1717-type link_graph = {
1818- (* All links *)
1919- mutable internal_links: internal_link list;
2020- mutable external_links: external_link list;
2121-2222- (* Indices for efficient queries *)
2323- outbound: (string, StringSet.t) Hashtbl.t;
2424- backlinks: (string, StringSet.t) Hashtbl.t;
2525- external_by_entry: (string, StringSet.t) Hashtbl.t;
2626- external_by_domain: (string, StringSet.t) Hashtbl.t; (* domain -> source slugs *)
2727-}
2828-2929-let empty_graph () = {
3030- internal_links = [];
3131- external_links = [];
3232- outbound = Hashtbl.create 256;
3333- backlinks = Hashtbl.create 256;
3434- external_by_entry = Hashtbl.create 256;
3535- external_by_domain = Hashtbl.create 64;
3636-}
3737-3838-(* Global storage for the link graph *)
3939-let current_graph : link_graph option ref = ref None
4040-4141-let set_graph graph = current_graph := Some graph
4242-let get_graph () = !current_graph
4343-4444-let entry_type_to_string = function
4545- | `Paper -> "paper"
4646- | `Project -> "project"
4747- | `Note -> "note"
4848- | `Idea -> "idea"
4949- | `Video -> "video"
5050- | `Contact -> "contact"
5151-5252-(* Query functions *)
5353-5454-let get_outbound graph slug =
5555- try StringSet.elements (Hashtbl.find graph.outbound slug)
5656- with Not_found -> []
5757-5858-let get_backlinks graph slug =
5959- try StringSet.elements (Hashtbl.find graph.backlinks slug)
6060- with Not_found -> []
6161-6262-let get_external_links graph slug =
6363- try StringSet.elements (Hashtbl.find graph.external_by_entry slug)
6464- with Not_found -> []
6565-6666-let get_entries_linking_to_domain graph domain =
6767- try StringSet.elements (Hashtbl.find graph.external_by_domain domain)
6868- with Not_found -> []
6969-7070-(* Query functions that use the global graph *)
7171-7272-let get_backlinks_for_slug slug =
7373- match !current_graph with
7474- | None -> []
7575- | Some graph -> get_backlinks graph slug
7676-7777-let get_outbound_for_slug slug =
7878- match !current_graph with
7979- | None -> []
8080- | Some graph -> get_outbound graph slug
8181-8282-let get_external_links_for_slug slug =
8383- match !current_graph with
8484- | None -> []
8585- | Some graph -> get_external_links graph slug
8686-8787-(* Pretty printing *)
8888-8989-let pp_internal_link ppf (link : internal_link) =
9090- Fmt.pf ppf "%s -> %s (%s)"
9191- link.source
9292- link.target
9393- (entry_type_to_string link.target_type)
9494-9595-let pp_external_link ppf (link : external_link) =
9696- Fmt.pf ppf "%s -> %s (%s)"
9797- link.source
9898- link.domain
9999- link.url
100100-101101-let pp_graph ppf graph =
102102- Fmt.pf ppf "@[<v>Internal links: %d@,External links: %d@,Entries with outbound: %d@,Entries with backlinks: %d@]"
103103- (List.length graph.internal_links)
104104- (List.length graph.external_links)
105105- (Hashtbl.length graph.outbound)
106106- (Hashtbl.length graph.backlinks)
107107-108108-let entry_type_of_entry = function
109109- | `Paper _ -> `Paper
110110- | `Project _ -> `Project
111111- | `Note _ -> `Note
112112- | `Idea _ -> `Idea
113113- | `Video _ -> `Video
114114- | `Contact _ -> `Contact
115115-116116-let extract_domain url =
117117- try
118118- let uri = Uri.of_string url in
119119- match Uri.host uri with
120120- | Some host -> host
121121- | None -> "unknown"
122122- with _ -> "unknown"
123123-124124-let add_to_set_hashtbl tbl key value =
125125- let current =
126126- try Hashtbl.find tbl key
127127- with Not_found -> StringSet.empty
128128- in
129129- Hashtbl.replace tbl key (StringSet.add value current)
130130-131131-let build_link_graph entries =
132132- let graph = empty_graph () in
133133-134134- (* Helper to add internal link *)
135135- let add_internal_link source target target_type =
136136- let link = { source; target; target_type } in
137137- graph.internal_links <- link :: graph.internal_links;
138138- add_to_set_hashtbl graph.outbound source target;
139139- add_to_set_hashtbl graph.backlinks target source
140140- in
141141-142142- (* Helper to add external link *)
143143- let add_external_link source url =
144144- let domain = extract_domain url in
145145- let link = { source; domain; url } in
146146- graph.external_links <- link :: graph.external_links;
147147- add_to_set_hashtbl graph.external_by_entry source url;
148148- add_to_set_hashtbl graph.external_by_domain domain source
149149- in
150150-151151- (* Process each entry *)
152152- let process_entry entry =
153153- let source_slug = Entry.slug entry in
154154-155155- (* Get all links from this entry's markdown content *)
156156- let md_content = Entry.body entry in
157157- let all_links = Md.extract_all_links md_content in
158158-159159- List.iter (fun link ->
160160- if Md.is_bushel_slug link then
161161- (* Internal bushel link *)
162162- let target_slug = Md.strip_handle link in
163163- match Entry.lookup entries target_slug with
164164- | Some target_entry ->
165165- let target_type = entry_type_of_entry target_entry in
166166- add_internal_link source_slug target_slug target_type
167167- | None -> ()
168168- else if Md.is_contact_slug link then
169169- (* Contact link *)
170170- let handle = Md.strip_handle link in
171171- match Contact.find_by_handle (Entry.contacts entries) handle with
172172- | Some c ->
173173- let target_slug = Contact.handle c in
174174- add_internal_link source_slug target_slug `Contact
175175- | None -> ()
176176- else if Md.is_tag_slug link then
177177- (* Skip tag links *)
178178- ()
179179- else if Md.is_type_filter_slug link then
180180- (* Skip type filter links *)
181181- ()
182182- else if String.starts_with ~prefix:"http://" link ||
183183- String.starts_with ~prefix:"https://" link then
184184- (* External link *)
185185- add_external_link source_slug link
186186- else
187187- (* Skip other links (relative paths, etc) *)
188188- ()
189189- ) all_links
190190- in
191191-192192- (* Process all entries *)
193193- List.iter process_entry (Entry.all_entries entries);
194194-195195- (* Process slug_ent references from notes *)
196196- let process_note_slug_ent note =
197197- match Note.slug_ent note with
198198- | Some target_slug ->
199199- let source_slug = Note.slug note in
200200- (* Look up the target entry by slug *)
201201- (match Entry.lookup entries target_slug with
202202- | Some target_entry ->
203203- let target_type = entry_type_of_entry target_entry in
204204- add_internal_link source_slug target_slug target_type
205205- | None -> ())
206206- | None -> ()
207207- in
208208- List.iter process_note_slug_ent (Entry.notes entries);
209209-210210- (* Process projects: field from papers *)
211211- let process_paper_projects paper =
212212- let source_slug = Paper.slug paper in
213213- let project_slugs = Paper.project_slugs paper in
214214- List.iter (fun project_slug ->
215215- (* Verify the project exists *)
216216- match Entry.lookup entries project_slug with
217217- | Some (`Project _) ->
218218- add_internal_link source_slug project_slug `Project
219219- | _ -> ()
220220- ) project_slugs
221221- in
222222- List.iter process_paper_projects (Entry.papers entries);
223223-224224- (* Deduplicate links *)
225225- let module LinkSet = Set.Make(struct
226226- type t = internal_link
227227- let compare (a : internal_link) (b : internal_link) =
228228- match String.compare a.source b.source with
229229- | 0 -> String.compare a.target b.target
230230- | n -> n
231231- end) in
232232-233233- let module ExtLinkSet = Set.Make(struct
234234- type t = external_link
235235- let compare (a : external_link) (b : external_link) =
236236- match String.compare a.source b.source with
237237- | 0 -> String.compare a.url b.url
238238- | n -> n
239239- end) in
240240-241241- graph.internal_links <- LinkSet.elements (LinkSet.of_list graph.internal_links);
242242- graph.external_links <- ExtLinkSet.elements (ExtLinkSet.of_list graph.external_links);
243243-244244- graph
245245-246246-(* Export for visualization *)
247247-248248-let to_json graph entries =
249249- (* Build nodes from entries *)
250250- let entry_nodes = List.map (fun entry ->
251251- let slug = Entry.slug entry in
252252- let title = Entry.title entry in
253253- let entry_type = entry_type_of_entry entry in
254254- `O [
255255- ("id", `String slug);
256256- ("title", `String title);
257257- ("type", `String (entry_type_to_string entry_type));
258258- ("group", `String "entry");
259259- ]
260260- ) (Entry.all_entries entries) in
261261-262262- (* Build nodes from contacts *)
263263- let contact_nodes = List.map (fun contact ->
264264- let handle = Contact.handle contact in
265265- let name = Contact.name contact in
266266- `O [
267267- ("id", `String handle);
268268- ("title", `String name);
269269- ("type", `String "contact");
270270- ("group", `String "entry");
271271- ]
272272- ) (Entry.contacts entries) in
273273-274274- (* Build domain nodes from external links *)
275275- let domain_map = Hashtbl.create 64 in
276276- List.iter (fun link ->
277277- if not (Hashtbl.mem domain_map link.domain) then
278278- Hashtbl.add domain_map link.domain ()
279279- ) graph.external_links;
280280-281281- let domain_nodes = Hashtbl.fold (fun domain () acc ->
282282- (`O [
283283- ("id", `String ("domain:" ^ domain));
284284- ("title", `String domain);
285285- ("type", `String "domain");
286286- ("group", `String "domain");
287287- ]) :: acc
288288- ) domain_map [] in
289289-290290- let all_nodes = entry_nodes @ contact_nodes @ domain_nodes in
291291-292292- (* Build internal links *)
293293- let internal_links_json = List.map (fun (link : internal_link) ->
294294- `O [
295295- ("source", `String link.source);
296296- ("target", `String link.target);
297297- ("type", `String "internal");
298298- ]
299299- ) graph.internal_links in
300300-301301- (* Build external links (entry -> domain) *)
302302- let external_links_json = List.map (fun (link : external_link) ->
303303- `O [
304304- ("source", `String link.source);
305305- ("target", `String ("domain:" ^ link.domain));
306306- ("type", `String "external");
307307- ]
308308- ) graph.external_links in
309309-310310- let all_links = internal_links_json @ external_links_json in
311311-312312- let json = `O [
313313- ("nodes", `A all_nodes);
314314- ("links", `A all_links);
315315- ] in
316316-317317- Ezjsonm.to_string json
-781
stack/bushel/lib/md.ml
···11-(** Bushel mappers for our Markdown extensions and utilities
22-33- This module provides mappers to convert Bushel markdown extensions to different
44- output formats. There are two main mappers:
55-66- 1. {!make_bushel_inline_mapper} - Full sidenote mode for the main website
77- - Converts Bushel links to interactive sidenotes
88- - Includes entry previews, contact info, footnotes
99- - Used for the main site HTML rendering
1010-1111- 2. {!make_bushel_link_only_mapper} - Plain HTML mode for feeds and simple output
1212- - Converts Bushel links to regular HTML <a> tags
1313- - Automatically cleans up link text that contains Bushel slugs
1414- - Used for Atom feeds, RSS, search indexing
1515- - Images need .webp extension added (handled by calling code)
1616-1717- For plain text output (search, LLM), use {!markdown_to_plaintext}.
1818-*)
1919-2020-(* Sidenote data types - reuse existing Bushel types *)
2121-type sidenote_data =
2222- | Contact_note of Contact.t * string (* contact data + trigger text *)
2323- | Paper_note of Paper.t * string
2424- | Idea_note of Idea.t * string
2525- | Note_note of Note.t * string
2626- | Project_note of Project.t * string
2727- | Video_note of Video.t * string
2828- | Footnote_note of string * Cmarkit.Block.t * string
2929- (* slug, block content, trigger text *)
3030-3131-type Cmarkit.Inline.t += Side_note of sidenote_data
3232-3333-let authorlink = Cmarkit.Meta.key ()
3434-3535-let make_authorlink label =
3636- let meta = Cmarkit.Meta.tag authorlink (Cmarkit.Label.meta label) in
3737- Cmarkit.Label.with_meta meta label
3838-;;
3939-4040-let sluglink = Cmarkit.Meta.key ()
4141-4242-let make_sluglink label =
4343- let meta = Cmarkit.Meta.tag sluglink (Cmarkit.Label.meta label) in
4444- Cmarkit.Label.with_meta meta label
4545-;;
4646-4747-let with_bushel_links = function
4848- | `Def _ as ctx -> Cmarkit.Label.default_resolver ctx
4949- | `Ref (_, _, (Some _ as def)) -> def
5050- | `Ref (_, ref, None) ->
5151- let txt = Cmarkit.Label.key ref in
5252- (match txt.[0] with
5353- | '@' -> Some (make_authorlink ref)
5454- | ':' -> Some (make_sluglink ref)
5555- | '#' -> if txt.[1] = '#' then Some (make_sluglink ref) else None
5656- | _ -> None)
5757-;;
5858-5959-let strip_handle s =
6060- if s.[0] = '@' || s.[0] = ':'
6161- then String.sub s 1 (String.length s - 1)
6262- else if s.[0] = '#' && s.[1] = '#'
6363- then String.sub s 2 (String.length s - 2)
6464- else s
6565-;;
6666-6767-(* FIXME use Tags *)
6868-let is_bushel_slug = String.starts_with ~prefix:":"
6969-let is_tag_slug link =
7070- String.starts_with ~prefix:"##" link &&
7171- not (String.starts_with ~prefix:"###" link)
7272-7373-let is_type_filter_slug = String.starts_with ~prefix:"###"
7474-let is_contact_slug = String.starts_with ~prefix:"@"
7575-7676-let text_of_inline lb =
7777- let open Cmarkit in
7878- Inline.to_plain_text ~break_on_soft:false lb
7979- |> fun r -> String.concat "\n" (List.map (String.concat "") r)
8080-;;
8181-8282-let link_target_is_bushel ?slugs lb =
8383- let open Cmarkit in
8484- let ref = Inline.Link.reference lb in
8585- match ref with
8686- | `Inline (ld, _) ->
8787- let dest = Link_definition.dest ld in
8888- (match dest with
8989- | Some (url, _) when is_bushel_slug url ->
9090- (match slugs with
9191- | Some s -> Hashtbl.replace s url ()
9292- | _ -> ());
9393- Some (url, Inline.Link.text lb |> text_of_inline)
9494- | Some (url, _) when is_tag_slug url ->
9595- (* Return the tag URL unchanged - will be handled by renderer *)
9696- Some (url, Inline.Link.text lb |> text_of_inline)
9797- | Some (url, _) when is_contact_slug url ->
9898- Some (url, Inline.Link.text lb |> text_of_inline)
9999- | _ -> None)
100100- | _ -> None
101101-;;
102102-103103-let image_target_is_bushel lb =
104104- let open Cmarkit in
105105- let ref = Inline.Link.reference lb in
106106- match ref with
107107- | `Inline (ld, _) ->
108108- let dest = Link_definition.dest ld in
109109- (match dest with
110110- | Some (url, _) when is_bushel_slug url ->
111111- let alt = Link_definition.title ld in
112112- let dir =
113113- Inline.Link.text lb
114114- |> Inline.to_plain_text ~break_on_soft:false
115115- |> fun r -> String.concat "\n" (List.map (String.concat "") r)
116116- in
117117- Some (url, alt, dir)
118118- | _ -> None)
119119- | _ -> None
120120-;;
121121-122122-let rewrite_bushel_link_reference entries slug title meta =
123123- let open Cmarkit in
124124- let s = strip_handle slug in
125125- (* Check if it's a tag, contact, or entry *)
126126- if is_tag_slug slug then
127127- (* Tag link - keep the ## prefix in dest for renderer to detect *)
128128- let txt = Inline.Text (title, meta) in
129129- let ld = Link_definition.make ~dest:(slug, meta) () in
130130- let ll = `Inline (ld, meta) in
131131- let ld = Inline.Link.make txt ll in
132132- Mapper.ret (Inline.Link (ld, meta))
133133- else if is_contact_slug slug then
134134- (* Contact sidenote *)
135135- match Contact.find_by_handle (Entry.contacts entries) s with
136136- | Some c ->
137137- let sidenote = Side_note (Contact_note (c, title)) in
138138- Mapper.ret sidenote
139139- | None ->
140140- (* Contact not found, fallback to regular link *)
141141- let txt = Inline.Text (title, meta) in
142142- let ld = Link_definition.make ~dest:("", meta) () in
143143- let ll = `Inline (ld, meta) in
144144- let ld = Inline.Link.make txt ll in
145145- Mapper.ret (Inline.Link (ld, meta))
146146- else
147147- (* Check entry type and generate appropriate sidenote *)
148148- match Entry.lookup entries s with
149149- | Some (`Paper p) ->
150150- let sidenote = Side_note (Paper_note (p, title)) in
151151- Mapper.ret sidenote
152152- | Some (`Idea i) ->
153153- let sidenote = Side_note (Idea_note (i, title)) in
154154- Mapper.ret sidenote
155155- | Some (`Note n) ->
156156- let sidenote = Side_note (Note_note (n, title)) in
157157- Mapper.ret sidenote
158158- | Some (`Project p) ->
159159- let sidenote = Side_note (Project_note (p, title)) in
160160- Mapper.ret sidenote
161161- | Some (`Video v) ->
162162- let sidenote = Side_note (Video_note (v, title)) in
163163- Mapper.ret sidenote
164164- | None ->
165165- (* Entry not found, use regular link *)
166166- let dest = Entry.lookup_site_url entries s in
167167- let txt = Inline.Text (title, meta) in
168168- let ld = Link_definition.make ~dest:(dest, meta) () in
169169- let ll = `Inline (ld, meta) in
170170- let ld = Inline.Link.make txt ll in
171171- Mapper.ret (Inline.Link (ld, meta))
172172-;;
173173-174174-let rewrite_bushel_image_reference entries url title dir meta =
175175- let open Cmarkit in
176176- let dest =
177177- match Entry.lookup entries (strip_handle url) with
178178- | Some ent -> Entry.site_url ent (* This is a video *)
179179- | None -> Printf.sprintf "/images/%s" (strip_handle url)
180180- in
181181- let txt = Inline.Text (dir, meta) in
182182- let ld = Link_definition.make ?title ~dest:(dest, meta) () in
183183- let ll = `Inline (ld, meta) in
184184- let ld = Inline.Link.make txt ll in
185185- let ent_il = Inline.Image (ld, meta) in
186186- Mapper.ret ent_il
187187-;;
188188-189189-type Cmarkit.Inline.t += Obsidian_link of string
190190-191191-let rewrite_label_reference_to_obsidian lb meta =
192192- let open Cmarkit in
193193- match Inline.Link.referenced_label lb with
194194- | None -> Mapper.default
195195- | Some l ->
196196- let m = Label.meta l in
197197- (match Meta.find authorlink m with
198198- | Some () ->
199199- let slug = Label.key l in
200200- let target = Printf.sprintf "[[%s]]" slug in
201201- let txt = Obsidian_link target in
202202- Mapper.ret txt
203203- | None ->
204204- (match Meta.find sluglink m with
205205- | None -> Mapper.default
206206- | Some () ->
207207- let slug = Label.key l in
208208- if is_bushel_slug slug
209209- then (
210210- let target = Printf.sprintf "[[%s]]" (strip_handle slug) in
211211- let txt = Obsidian_link target in
212212- Mapper.ret txt)
213213- else if is_tag_slug slug
214214- then (
215215- let target = Printf.sprintf "#%s" (strip_handle slug) in
216216- let txt = Inline.Text (target, meta) in
217217- Mapper.ret txt)
218218- else Mapper.default))
219219-;;
220220-221221-let make_bushel_link_only_mapper _defs entries =
222222- let open Cmarkit in
223223- fun _m ->
224224- function
225225- | Inline.Link (lb, meta) ->
226226- (* Convert Bushel link references to regular links (not sidenotes) *)
227227- (match link_target_is_bushel lb with
228228- | Some (url, title) ->
229229- let s = strip_handle url in
230230- let dest = Entry.lookup_site_url entries s in
231231- (* If title is itself a Bushel slug, use the entry title instead *)
232232- let link_text =
233233- if is_bushel_slug title then
234234- match Entry.lookup entries (strip_handle title) with
235235- | Some ent -> Entry.title ent
236236- | None -> title
237237- else title
238238- in
239239- let txt = Inline.Text (link_text, meta) in
240240- let ld = Link_definition.make ~dest:(dest, meta) () in
241241- let ll = `Inline (ld, meta) in
242242- let ld = Inline.Link.make txt ll in
243243- Mapper.ret (Inline.Link (ld, meta))
244244- | None ->
245245- (match Inline.Link.referenced_label lb with
246246- | Some l ->
247247- let m = Label.meta l in
248248- (* Check for authorlink (contact) first *)
249249- (match Meta.find authorlink m with
250250- | Some () ->
251251- let slug = Label.key l in
252252- let s = strip_handle slug in
253253- (match Contact.find_by_handle (Entry.contacts entries) s with
254254- | Some c ->
255255- let name = Contact.name c in
256256- (match Contact.best_url c with
257257- | Some dest ->
258258- let txt = Inline.Text (name, meta) in
259259- let ld = Link_definition.make ~dest:(dest, meta) () in
260260- let ll = `Inline (ld, meta) in
261261- let ld = Inline.Link.make txt ll in
262262- Mapper.ret (Inline.Link (ld, meta))
263263- | None ->
264264- (* No URL for contact, just use name as text *)
265265- let txt = Inline.Text (name, meta) in
266266- Mapper.ret txt)
267267- | None ->
268268- (* Contact not found, use title as fallback text *)
269269- let title = Inline.Link.text lb |> text_of_inline in
270270- let txt = Inline.Text (title, meta) in
271271- Mapper.ret txt)
272272- | None ->
273273- (* Check for sluglink *)
274274- (match Meta.find sluglink m with
275275- | Some () ->
276276- let slug = Label.key l in
277277- if is_bushel_slug slug || is_tag_slug slug || is_contact_slug slug
278278- then (
279279- let s = strip_handle slug in
280280- let dest = Entry.lookup_site_url entries s in
281281- let title = Inline.Link.text lb |> text_of_inline in
282282- (* If link text is itself a Bushel slug, use the entry title instead *)
283283- let link_text =
284284- let trimmed = String.trim title in
285285- if is_bushel_slug trimmed then
286286- match Entry.lookup entries (strip_handle trimmed) with
287287- | Some ent -> Entry.title ent
288288- | None -> title
289289- else title
290290- in
291291- let txt = Inline.Text (link_text, meta) in
292292- let ld = Link_definition.make ~dest:(dest, meta) () in
293293- let ll = `Inline (ld, meta) in
294294- let ld = Inline.Link.make txt ll in
295295- Mapper.ret (Inline.Link (ld, meta)))
296296- else Mapper.default
297297- | None -> Mapper.default))
298298- | None -> Mapper.default))
299299- | _ -> Mapper.default
300300-;;
301301-302302-let rewrite_footnote_reference ?footnote_map entries defs lb _meta =
303303- let open Cmarkit in
304304- match Inline.Link.referenced_label lb with
305305- | None -> Mapper.default
306306- | Some l ->
307307- (match Inline.Link.reference_definition defs lb with
308308- | Some (Block.Footnote.Def (fn, _)) ->
309309- let label_key = Label.key l in
310310- let slug, trigger_text =
311311- match footnote_map with
312312- | Some fm ->
313313- (match Hashtbl.find_opt fm label_key with
314314- | Some (slug, text) -> (slug, text)
315315- | None ->
316316- let num = Hashtbl.length fm + 1 in
317317- let slug = Printf.sprintf "fn-%d" num in
318318- let text = Printf.sprintf "[%d]" num in
319319- Hashtbl.add fm label_key (slug, text);
320320- (slug, text))
321321- | None ->
322322- (* No map provided, use label key as slug *)
323323- let slug = Printf.sprintf "fn-%s" (String.sub label_key 1 (String.length label_key - 1)) in
324324- let text = "[?]" in
325325- (slug, text)
326326- in
327327- (* Process the block to convert Bushel link references to regular links (not sidenotes) *)
328328- let block = Block.Footnote.block fn in
329329- let link_mapper = Mapper.make ~inline:(make_bushel_link_only_mapper defs entries) () in
330330- let processed_block =
331331- match Mapper.map_block link_mapper block with
332332- | Some b -> b
333333- | None -> block
334334- in
335335- let sidenote = Side_note (Footnote_note (slug, processed_block, trigger_text)) in
336336- Mapper.ret sidenote
337337- | _ -> Mapper.default)
338338-339339-let rewrite_label_reference ?slugs entries lb meta =
340340- let open Cmarkit in
341341- match Inline.Link.referenced_label lb with
342342- | None -> Mapper.default
343343- | Some l ->
344344- let m = Label.meta l in
345345- (match Meta.find authorlink m with
346346- | Some () ->
347347- let slug = Label.key l in
348348- (match Contact.find_by_handle (Entry.contacts entries) (strip_handle slug) with
349349- | Some c ->
350350- let trigger_text = Contact.name c in
351351- let sidenote = Side_note (Contact_note (c, trigger_text)) in
352352- Mapper.ret sidenote
353353- | None ->
354354- (* Contact not found, fallback to text *)
355355- let txt = Inline.Text ("Unknown Person", meta) in
356356- Mapper.ret txt)
357357- | None ->
358358- (match Meta.find sluglink m with
359359- | None -> Mapper.default
360360- | Some () ->
361361- let slug = Label.key l in
362362- if is_bushel_slug slug
363363- then (
364364- (match slugs with
365365- | Some s -> Hashtbl.replace s slug ()
366366- | _ -> ());
367367- let s = strip_handle slug in
368368- (* Check entry type and generate appropriate sidenote *)
369369- match Entry.lookup entries s with
370370- | Some (`Paper p) ->
371371- let trigger_text = Entry.lookup_title entries s in
372372- let sidenote = Side_note (Paper_note (p, trigger_text)) in
373373- Mapper.ret sidenote
374374- | Some (`Idea i) ->
375375- let trigger_text = Entry.lookup_title entries s in
376376- let sidenote = Side_note (Idea_note (i, trigger_text)) in
377377- Mapper.ret sidenote
378378- | Some (`Note n) ->
379379- let trigger_text = Entry.lookup_title entries s in
380380- let sidenote = Side_note (Note_note (n, trigger_text)) in
381381- Mapper.ret sidenote
382382- | Some (`Project p) ->
383383- let trigger_text = Entry.lookup_title entries s in
384384- let sidenote = Side_note (Project_note (p, trigger_text)) in
385385- Mapper.ret sidenote
386386- | Some (`Video v) ->
387387- let trigger_text = Entry.lookup_title entries s in
388388- let sidenote = Side_note (Video_note (v, trigger_text)) in
389389- Mapper.ret sidenote
390390- | None ->
391391- (* Entry not found, use regular link *)
392392- let target = Entry.lookup_title entries s in
393393- let dest = Entry.lookup_site_url entries s in
394394- let txt = Inline.Text (target, meta) in
395395- let ld = Link_definition.make ~dest:(dest, meta) () in
396396- let ll = `Inline (ld, meta) in
397397- let ld = Inline.Link.make txt ll in
398398- Mapper.ret (Inline.Link (ld, meta)))
399399- else if is_tag_slug slug
400400- then (
401401- let sh = strip_handle slug in
402402- (* Use # as dest to prevent navigation, JavaScript will intercept *)
403403- let target, dest = sh, "#" in
404404- let txt = Inline.Text (target, meta) in
405405- let ld = Link_definition.make ~dest:(dest, meta) () in
406406- let ll = `Inline (ld, meta) in
407407- let ld = Inline.Link.make txt ll in
408408- let ent_il = Inline.Link (ld, meta) in
409409- Mapper.ret ent_il)
410410- else Mapper.default))
411411-;;
412412-413413-let bushel_inline_mapper_to_obsidian entries _m =
414414- let open Cmarkit in
415415- function
416416- | Inline.Link (lb, meta) ->
417417- (match link_target_is_bushel lb with
418418- | None -> rewrite_label_reference_to_obsidian lb meta
419419- | Some (url, title) -> rewrite_bushel_link_reference entries url title meta)
420420- | Inline.Image (lb, meta) ->
421421- (match image_target_is_bushel lb with
422422- | None -> rewrite_label_reference_to_obsidian lb meta
423423- | Some (url, alt, dir) -> rewrite_bushel_image_reference entries url alt dir meta)
424424- | _ -> Mapper.default
425425-;;
426426-427427-let make_bushel_inline_mapper ?slugs ?footnote_map defs entries =
428428- let open Cmarkit in
429429- fun _m ->
430430- function
431431- | Inline.Link (lb, meta) ->
432432- (* First check if this is a footnote reference *)
433433- (match Inline.Link.referenced_label lb with
434434- | Some l when String.starts_with ~prefix:"^" (Label.key l) ->
435435- (* This is a footnote reference *)
436436- rewrite_footnote_reference ?footnote_map entries defs lb meta
437437- | _ ->
438438- (* Not a footnote, handle as bushel link *)
439439- (match link_target_is_bushel ?slugs lb with
440440- | None -> rewrite_label_reference ?slugs entries lb meta
441441- | Some (url, title) -> rewrite_bushel_link_reference entries url title meta))
442442- | Inline.Image (lb, meta) ->
443443- (match image_target_is_bushel lb with
444444- | None -> rewrite_label_reference entries lb meta
445445- | Some (url, alt, dir) -> rewrite_bushel_image_reference entries url alt dir meta)
446446- | _ -> Mapper.default
447447-;;
448448-449449-let scan_for_slugs entries md =
450450- let open Cmarkit in
451451- let slugs = Hashtbl.create 7 in
452452- let doc = Doc.of_string ~strict:false ~resolver:with_bushel_links md in
453453- let defs = Doc.defs doc in
454454- let _ =
455455- Mapper.map_doc (Mapper.make ~inline:(make_bushel_inline_mapper ~slugs defs entries) ()) doc
456456- in
457457- Hashtbl.fold (fun k () a -> k :: a) slugs []
458458-;;
459459-460460-(** Validation mapper that collects broken references *)
461461-let make_validation_mapper entries broken_slugs broken_contacts =
462462- let open Cmarkit in
463463- fun _m ->
464464- function
465465- | Inline.Link (lb, _meta) ->
466466- (* Check inline bushel links *)
467467- (match link_target_is_bushel lb with
468468- | Some (url, _title) ->
469469- let s = strip_handle url in
470470- if is_contact_slug url then
471471- (* Validate contact handle *)
472472- (match Contact.find_by_handle (Entry.contacts entries) s with
473473- | None -> Hashtbl.replace broken_contacts url ()
474474- | Some _ -> ())
475475- else if is_bushel_slug url then
476476- (* Validate entry slug *)
477477- (match Entry.lookup entries s with
478478- | None -> Hashtbl.replace broken_slugs url ()
479479- | Some _ -> ())
480480- else ();
481481- Mapper.default
482482- | None ->
483483- (* Check referenced label links *)
484484- (match Inline.Link.referenced_label lb with
485485- | Some l ->
486486- let m = Label.meta l in
487487- (* Check for contact reference *)
488488- (match Meta.find authorlink m with
489489- | Some () ->
490490- let slug = Label.key l in
491491- let handle = strip_handle slug in
492492- (match Contact.find_by_handle (Entry.contacts entries) handle with
493493- | None -> Hashtbl.replace broken_contacts slug ()
494494- | Some _ -> ());
495495- Mapper.default
496496- | None ->
497497- (* Check for entry slug reference *)
498498- (match Meta.find sluglink m with
499499- | None -> Mapper.default
500500- | Some () ->
501501- let slug = Label.key l in
502502- if is_bushel_slug slug then (
503503- let s = strip_handle slug in
504504- match Entry.lookup entries s with
505505- | None -> Hashtbl.replace broken_slugs slug ()
506506- | Some _ -> ()
507507- );
508508- Mapper.default))
509509- | None -> Mapper.default))
510510- | _ -> Mapper.default
511511-;;
512512-513513-(** Validate all bushel references in markdown and return broken ones *)
514514-let validate_references entries md =
515515- let open Cmarkit in
516516- let broken_slugs = Hashtbl.create 7 in
517517- let broken_contacts = Hashtbl.create 7 in
518518- let doc = Doc.of_string ~strict:false ~resolver:with_bushel_links md in
519519- let mapper = Mapper.make ~inline:(make_validation_mapper entries broken_slugs broken_contacts) () in
520520- let _ = Mapper.map_doc mapper doc in
521521- let slugs = Hashtbl.fold (fun k () a -> k :: a) broken_slugs [] in
522522- let contacts = Hashtbl.fold (fun k () a -> k :: a) broken_contacts [] in
523523- (slugs, contacts)
524524-;;
525525-526526-(** Extract the first image URL from markdown text *)
527527-let extract_first_image md =
528528- let open Cmarkit in
529529- (* Don't use bushel link resolver to avoid circular dependency with Entry *)
530530- let doc = Doc.of_string md in
531531- let found_image = ref None in
532532-533533- let find_image_in_inline _mapper = function
534534- | Inline.Image (img, _) ->
535535- (match Inline.Link.reference img with
536536- | `Inline (ld, _) ->
537537- (match Link_definition.dest ld with
538538- | Some (url, _) when !found_image = None ->
539539- found_image := Some url;
540540- Mapper.default
541541- | _ -> Mapper.default)
542542- | _ -> Mapper.default)
543543- | _ -> Mapper.default
544544- in
545545-546546- let mapper = Mapper.make ~inline:find_image_in_inline () in
547547- let _ = Mapper.map_doc mapper doc in
548548- !found_image
549549-;;
550550-551551-(** Convert markdown text to plain text, resolving bushel links to just their text *)
552552-let markdown_to_plaintext _entries text =
553553- let open Cmarkit in
554554- (* Parse markdown with bushel link resolver *)
555555- let doc = Doc.of_string ~resolver:with_bushel_links text in
556556-557557- (* Convert document blocks to plain text *)
558558- let rec block_to_text = function
559559- | Block.Blank_line _ -> ""
560560- | Block.Thematic_break _ -> "\n---\n"
561561- | Block.Paragraph (p, _) ->
562562- let inline = Block.Paragraph.inline p in
563563- Inline.to_plain_text ~break_on_soft:false inline
564564- |> List.map (String.concat "") |> String.concat "\n"
565565- | Block.Heading (h, _) ->
566566- let inline = Block.Heading.inline h in
567567- Inline.to_plain_text ~break_on_soft:false inline
568568- |> List.map (String.concat "") |> String.concat "\n"
569569- | Block.Block_quote (bq, _) ->
570570- let blocks = Block.Block_quote.block bq in
571571- block_to_text blocks
572572- | Block.List (l, _) ->
573573- let items = Block.List'.items l in
574574- List.map (fun (item, _) ->
575575- let blocks = Block.List_item.block item in
576576- block_to_text blocks
577577- ) items |> String.concat "\n"
578578- | Block.Code_block (cb, _) ->
579579- let code = Block.Code_block.code cb in
580580- String.concat "\n" (List.map Block_line.to_string code)
581581- | Block.Html_block _ -> "" (* Skip HTML blocks for search *)
582582- | Block.Link_reference_definition _ -> ""
583583- | Block.Ext_footnote_definition _ -> ""
584584- | Block.Blocks (blocks, _) ->
585585- List.map block_to_text blocks |> String.concat "\n"
586586- | _ -> ""
587587- in
588588- let blocks = Doc.block doc in
589589- block_to_text blocks
590590-;;
591591-592592-(** Extract all links from markdown text, including from images *)
593593-let extract_all_links text =
594594- let open Cmarkit in
595595- let doc = Doc.of_string ~resolver:with_bushel_links text in
596596- let links = ref [] in
597597-598598- let find_links_in_inline _mapper = function
599599- | Inline.Link (lb, _) | Inline.Image (lb, _) ->
600600- (* Check for inline link/image destination *)
601601- (match Inline.Link.reference lb with
602602- | `Inline (ld, _) ->
603603- (match Link_definition.dest ld with
604604- | Some (url, _) ->
605605- links := url :: !links;
606606- Mapper.default
607607- | None -> Mapper.default)
608608- | `Ref _ ->
609609- (* For reference-style links/images, check if it has a referenced label *)
610610- (match Inline.Link.referenced_label lb with
611611- | Some l ->
612612- let key = Label.key l in
613613- (* Check if it's a bushel-style link *)
614614- if String.length key > 0 && (key.[0] = ':' || key.[0] = '@' ||
615615- (String.length key > 1 && key.[0] = '#' && key.[1] = '#')) then
616616- links := key :: !links;
617617- Mapper.default
618618- | None -> Mapper.default))
619619- | _ -> Mapper.default
620620- in
621621-622622- let mapper = Mapper.make ~inline:find_links_in_inline () in
623623- let _ = Mapper.map_doc mapper doc in
624624-625625- (* Deduplicate *)
626626- let module StringSet = Set.Make(String) in
627627- StringSet.elements (StringSet.of_list !links)
628628-;;
629629-630630-(* Reference source type for CiTO annotations *)
631631-type reference_source =
632632- | Paper (* CitesAsSourceDocument *)
633633- | Note (* CitesAsRelated *)
634634- | External (* Cites *)
635635-636636-(* Extract references (papers/notes with DOIs) from a note *)
637637-let note_references entries default_author note =
638638- let refs = ref [] in
639639-640640- (* Helper to format author name: extract last name from full name *)
641641- let format_author_last name =
642642- let parts = String.split_on_char ' ' name in
643643- List.nth parts (List.length parts - 1)
644644- in
645645-646646- (* Helper to format a citation *)
647647- let format_citation ~authors ~year ~title ~publisher =
648648- let author_str = match authors with
649649- | [] -> ""
650650- | [author] -> format_author_last author ^ " "
651651- | author :: _ -> (format_author_last author) ^ " et al "
652652- in
653653- let pub_str = match publisher with
654654- | None | Some "" -> ""
655655- | Some p -> p ^ ". "
656656- in
657657- Printf.sprintf "%s(%d). %s. %s" author_str year title pub_str
658658- in
659659-660660- (* Check slug_ent if it exists *)
661661- (match Note.slug_ent note with
662662- | Some slug ->
663663- (match Entry.lookup entries slug with
664664- | Some (`Paper p) ->
665665- (match Paper.doi p with
666666- | Some doi ->
667667- let authors = Paper.authors p in
668668- let year = Paper.year p in
669669- let title = Paper.title p in
670670- let publisher = Some (Paper.publisher p) in
671671- let citation = format_citation ~authors ~year ~title ~publisher in
672672- refs := (doi, citation, Paper) :: !refs
673673- | None -> ())
674674- | Some (`Note n) ->
675675- (match Note.doi n with
676676- | Some doi ->
677677- let authors = match Note.author n with
678678- | Some a -> [a]
679679- | None -> [Contact.name default_author]
680680- in
681681- let (year, _, _) = Note.date n in
682682- let title = Note.title n in
683683- let publisher = None in
684684- let citation = format_citation ~authors ~year ~title ~publisher in
685685- refs := (doi, citation, Note) :: !refs
686686- | None -> ())
687687- | _ -> ())
688688- | None -> ());
689689-690690- (* Scan body for bushel references *)
691691- let slugs = scan_for_slugs entries (Note.body note) in
692692- List.iter (fun slug ->
693693- (* Strip leading : or @ from slug before lookup *)
694694- let normalized_slug = strip_handle slug in
695695- match Entry.lookup entries normalized_slug with
696696- | Some (`Paper p) ->
697697- (match Paper.doi p with
698698- | Some doi ->
699699- let authors = Paper.authors p in
700700- let year = Paper.year p in
701701- let title = Paper.title p in
702702- let publisher = Some (Paper.publisher p) in
703703- let citation = format_citation ~authors ~year ~title ~publisher in
704704- (* Check if doi already exists in refs *)
705705- if not (List.exists (fun (d, _, _) -> d = doi) !refs) then
706706- refs := (doi, citation, Paper) :: !refs
707707- | None -> ())
708708- | Some (`Note n) ->
709709- (match Note.doi n with
710710- | Some doi ->
711711- let authors = match Note.author n with
712712- | Some a -> [a]
713713- | None -> [Contact.name default_author]
714714- in
715715- let (year, _, _) = Note.date n in
716716- let title = Note.title n in
717717- let publisher = None in
718718- let citation = format_citation ~authors ~year ~title ~publisher in
719719- (* Check if doi already exists in refs *)
720720- if not (List.exists (fun (d, _, _) -> d = doi) !refs) then
721721- refs := (doi, citation, Note) :: !refs
722722- | None -> ())
723723- | _ -> ()
724724- ) slugs;
725725-726726- (* Scan body for external DOI URLs and resolve from cache *)
727727- let body = Note.body note in
728728- let doi_url_pattern = Re.Perl.compile_pat "https?://(?:dx\\.)?doi\\.org/([^)\\s\"'>]+)" in
729729- let matches = Re.all doi_url_pattern body in
730730- let doi_entries = Entry.doi_entries entries in
731731- List.iter (fun group ->
732732- try
733733- let encoded_doi = Re.Group.get group 1 in
734734- (* URL decode the DOI *)
735735- let doi = Uri.pct_decode encoded_doi in
736736- (* Check if doi already exists in refs *)
737737- if not (List.exists (fun (d, _, _) -> d = doi) !refs) then
738738- (* Look up in DOI cache *)
739739- match Doi_entry.find_by_doi doi_entries doi with
740740- | Some doi_entry when doi_entry.status = Resolved ->
741741- let citation = format_citation
742742- ~authors:doi_entry.authors
743743- ~year:doi_entry.year
744744- ~title:doi_entry.title
745745- ~publisher:(Some doi_entry.publisher)
746746- in
747747- refs := (doi, citation, External) :: !refs
748748- | _ ->
749749- (* Not found in cache, add minimal citation with just the DOI *)
750750- refs := (doi, doi, External) :: !refs
751751- with _ -> ()
752752- ) matches;
753753-754754- (* Scan body for publisher URLs (Elsevier, ScienceDirect, IEEE, Nature, ACM, Sage, UPenn, Springer, Taylor & Francis, OUP) and resolve from cache *)
755755- let publisher_pattern = Re.Perl.compile_pat "https?://(?:(?:www\\.)?(?:linkinghub\\.elsevier\\.com|(?:www\\.)?sciencedirect\\.com/science/article|ieeexplore\\.ieee\\.org|academic\\.oup\\.com|nature\\.com|journals\\.sagepub\\.com|garfield\\.library\\.upenn\\.edu|link\\.springer\\.com)/[^)\\s\"'>]+|(?:dl\\.acm\\.org|(?:www\\.)?tandfonline\\.com)/doi(?:/pdf)?/10\\.[^)\\s\"'>]+)" in
756756- let publisher_matches = Re.all publisher_pattern body in
757757- List.iter (fun group ->
758758- try
759759- let url = Re.Group.get group 0 in
760760- (* Look up in DOI cache by source URL *)
761761- match Doi_entry.find_by_url doi_entries url with
762762- | Some doi_entry when doi_entry.status = Resolved ->
763763- let doi = doi_entry.doi in
764764- (* Check if this DOI already exists in refs *)
765765- if not (List.exists (fun (d, _, _) -> d = doi) !refs) then
766766- let citation = format_citation
767767- ~authors:doi_entry.authors
768768- ~year:doi_entry.year
769769- ~title:doi_entry.title
770770- ~publisher:(Some doi_entry.publisher)
771771- in
772772- refs := (doi, citation, External) :: !refs
773773- | _ ->
774774- (* Not found in cache, skip it *)
775775- ()
776776- with _ -> ()
777777- ) publisher_matches;
778778-779779- List.rev !refs
780780-;;
781781-
-73
stack/bushel/lib/md.mli
···11-val make_bushel_inline_mapper
22- : ?slugs:(string, unit) Hashtbl.t
33- -> ?footnote_map:(string, string * string) Hashtbl.t
44- -> Cmarkit.Label.defs
55- -> Entry.t
66- -> 'a
77- -> Cmarkit.Inline.t
88- -> Cmarkit.Inline.t Cmarkit.Mapper.result
99-1010-val make_bushel_link_only_mapper
1111- : Cmarkit.Label.defs
1212- -> Entry.t
1313- -> 'a
1414- -> Cmarkit.Inline.t
1515- -> Cmarkit.Inline.t Cmarkit.Mapper.result
1616-1717-type Cmarkit.Inline.t += Obsidian_link of string
1818-1919-type sidenote_data =
2020- | Contact_note of Contact.t * string
2121- | Paper_note of Paper.t * string
2222- | Idea_note of Idea.t * string
2323- | Note_note of Note.t * string
2424- | Project_note of Project.t * string
2525- | Video_note of Video.t * string
2626- | Footnote_note of string * Cmarkit.Block.t * string
2727-2828-type Cmarkit.Inline.t += Side_note of sidenote_data
2929-3030-val bushel_inline_mapper_to_obsidian
3131- : Entry.t
3232- -> 'a
3333- -> Cmarkit.Inline.t
3434- -> Cmarkit.Inline.t Cmarkit.Mapper.result
3535-3636-val with_bushel_links
3737- : [< `Def of Cmarkit.Label.t option * Cmarkit.Label.t
3838- | `Ref of 'a * Cmarkit.Label.t * Cmarkit.Label.t option
3939- ]
4040- -> Cmarkit.Label.t option
4141-4242-val scan_for_slugs : Entry.t -> string -> string list
4343-4444-(** Validate all bushel references in markdown and return broken ones.
4545- Returns (broken_slugs, broken_contacts) where each list contains
4646- the full reference string (e.g., ":missing-slug", "@unknown-handle") *)
4747-val validate_references : Entry.t -> string -> string list * string list
4848-4949-(** Extract the first image URL from markdown text *)
5050-val extract_first_image : string -> string option
5151-5252-(** Convert markdown text to plain text, resolving bushel links to just their text *)
5353-val markdown_to_plaintext : 'a -> string -> string
5454-5555-val is_bushel_slug : string -> bool
5656-val is_tag_slug : string -> bool
5757-val is_type_filter_slug : string -> bool
5858-val is_contact_slug : string -> bool
5959-val strip_handle : string -> string
6060-6161-(** Extract all links from markdown text, including from images (internal and external) *)
6262-val extract_all_links : string -> string list
6363-6464-(** Type indicating the source of a reference for CiTO annotation *)
6565-type reference_source =
6666- | Paper (** CitesAsSourceDocument *)
6767- | Note (** CitesAsRelated *)
6868- | External (** Cites *)
6969-7070-(** Extract references (papers/notes with DOIs) from a note.
7171- Returns a list of (DOI, citation_string, reference_source) tuples.
7272- Citation format: "Last, First (Year). Title. Publisher. https://doi.org/the/doi" *)
7373-val note_references : Entry.t -> Contact.t -> Note.t -> (string * string * reference_source) list
-230
stack/bushel/lib/note.ml
···11-type t =
22- { title : string
33- ; date : Ptime.date
44- ; slug : string
55- ; body : string
66- ; tags : string list
77- ; draft : bool
88- ; updated : Ptime.date option
99- ; sidebar : string option
1010- ; index_page : bool
1111- ; perma : bool (* Permanent article that will receive a DOI *)
1212- ; doi : string option (* DOI identifier for permanent articles *)
1313- ; synopsis: string option
1414- ; titleimage: string option
1515- ; via : (string * string) option
1616- ; slug_ent : string option (* Optional reference to another entry *)
1717- ; source : string option (* Optional source for news-style notes *)
1818- ; url : string option (* Optional external URL for news-style notes *)
1919- ; author : string option (* Optional author for news-style notes *)
2020- ; category : string option (* Optional category for news-style notes *)
2121- }
2222-2323-type ts = t list
2424-2525-let link { body; via; slug; _ } =
2626- match body, via with
2727- | "", Some (l, u) -> `Ext (l, u)
2828- | "", None -> failwith (slug ^ ": note external without via, via-url")
2929- | _, _ -> `Local slug
3030-;;
3131-3232-let origdate { date; _ } = Option.get @@ Ptime.of_date date
3333-3434-let date { date; updated; _ } =
3535- match updated with
3636- | None -> date
3737- | Some v -> v
3838-;;
3939-4040-let datetime v = Option.get @@ Ptime.of_date @@ date v
4141-let compare a b = Ptime.compare (datetime b) (datetime a)
4242-let slug { slug; _ } = slug
4343-let body { body; _ } = body
4444-let title { title; _ } = title
4545-let tags { tags; _ } = tags
4646-let sidebar { sidebar; _ } = sidebar
4747-let synopsis { synopsis; _ } = synopsis
4848-let draft { draft; _ } = draft
4949-let perma { perma; _ } = perma
5050-let doi { doi; _ } = doi
5151-let titleimage { titleimage; _ } = titleimage
5252-let slug_ent { slug_ent; _ } = slug_ent
5353-let source { source; _ } = source
5454-let url { url; _ } = url
5555-let author { author; _ } = author
5656-let category { category; _ } = category
5757-let lookup slug notes = List.find (fun n -> n.slug = slug) notes
5858-let read_file file = In_channel.(with_open_bin file input_all)
5959-let words { body; _ } = Util.count_words body
6060-6161-6262-let of_md fname =
6363- (* TODO fix Jekyll_post to basename the fname all the time *)
6464- match Jekyll_post.of_string ~fname:(Filename.basename fname) (read_file fname) with
6565- | Error (`Msg m) -> failwith ("note_of_md: " ^ m)
6666- | Ok jp ->
6767- let fields = jp.Jekyll_post.fields in
6868- let { Jekyll_post.title; date; slug; body; _ } = jp in
6969- let date, _ = Ptime.to_date_time date in
7070- let index_page =
7171- match Jekyll_format.find "index_page" fields with
7272- | Some (`Bool v) -> v
7373- | _ -> false
7474- in
7575- let perma =
7676- match Jekyll_format.find "perma" fields with
7777- | Some (`Bool v) -> v
7878- | _ -> false
7979- in
8080- let updated =
8181- match Jekyll_format.find "updated" fields with
8282- | Some (`String v) -> Some (Jekyll_format.parse_date_exn v |> Ptime.to_date)
8383- | _ -> None
8484- in
8585- let draft =
8686- match Jekyll_format.find "draft" fields with
8787- | Some (`Bool v) -> v
8888- | _ -> false
8989- in
9090- let titleimage =
9191- match Jekyll_format.find "titleimage" fields with
9292- | Some (`String v) -> Some v
9393- | _ -> None
9494- in
9595- let synopsis =
9696- match Jekyll_format.find "synopsis" fields with
9797- | Some (`String v) -> Some v
9898- | _ -> None
9999- in
100100- let sidebar =
101101- try Some (read_file ("data/sidebar/" ^ Filename.basename fname)) with
102102- | _ -> None
103103- in
104104- let tags =
105105- match Jekyll_format.find "tags" fields with
106106- | Some (`A l) ->
107107- List.filter_map
108108- (function
109109- | `String s -> Some s
110110- | _ -> None)
111111- l
112112- | _ -> []
113113- in
114114- let via =
115115- match Jekyll_format.find "via" fields, Jekyll_format.find "via-url" fields with
116116- | Some (`String a), Some (`String b) -> Some (a, b)
117117- | None, Some (`String b) -> Some ("", b)
118118- | _ -> None
119119- in
120120- let slug_ent =
121121- match Jekyll_format.find "slug_ent" fields with
122122- | Some (`String v) -> Some v
123123- | _ -> None
124124- in
125125- let source =
126126- match Jekyll_format.find "source" fields with
127127- | Some (`String v) -> Some v
128128- | _ -> None
129129- in
130130- let url =
131131- match Jekyll_format.find "url" fields with
132132- | Some (`String v) -> Some v
133133- | _ -> None
134134- in
135135- let author =
136136- match Jekyll_format.find "author" fields with
137137- | Some (`String v) -> Some v
138138- | _ -> None
139139- in
140140- let category =
141141- match Jekyll_format.find "category" fields with
142142- | Some (`String v) -> Some v
143143- | _ -> None
144144- in
145145- let doi =
146146- match Jekyll_format.find "doi" fields with
147147- | Some (`String v) -> Some v
148148- | _ -> None
149149- in
150150- { title; draft; date; slug; synopsis; titleimage; index_page; perma; doi; body; via; updated; tags; sidebar; slug_ent; source; url; author; category }
151151-152152-(* TODO:claude *)
153153-let typesense_schema =
154154- let open Ezjsonm in
155155- dict [
156156- ("name", string "notes");
157157- ("fields", list (fun d -> dict d) [
158158- [("name", string "id"); ("type", string "string")];
159159- [("name", string "title"); ("type", string "string")];
160160- [("name", string "content"); ("type", string "string")];
161161- [("name", string "date"); ("type", string "string")];
162162- [("name", string "date_timestamp"); ("type", string "int64")];
163163- [("name", string "tags"); ("type", string "string[]"); ("facet", bool true)];
164164- [("name", string "body"); ("type", string "string"); ("optional", bool true)];
165165- [("name", string "draft"); ("type", string "bool")];
166166- [("name", string "synopsis"); ("type", string "string[]"); ("optional", bool true)];
167167- [("name", string "thumbnail_url"); ("type", string "string"); ("optional", bool true)];
168168- [("name", string "type"); ("type", string "string"); ("facet", bool true); ("optional", bool true)];
169169- [("name", string "status"); ("type", string "string"); ("facet", bool true); ("optional", bool true)];
170170- [("name", string "related_papers"); ("type", string "string[]"); ("optional", bool true)];
171171- [("name", string "related_projects"); ("type", string "string[]"); ("optional", bool true)];
172172- [("name", string "related_contacts"); ("type", string "string[]"); ("optional", bool true)];
173173- [("name", string "attachments"); ("type", string "string[]"); ("optional", bool true)];
174174- [("name", string "source"); ("type", string "string"); ("facet", bool true); ("optional", bool true)];
175175- [("name", string "url"); ("type", string "string"); ("optional", bool true)];
176176- [("name", string "author"); ("type", string "string"); ("optional", bool true)];
177177- [("name", string "category"); ("type", string "string"); ("facet", bool true); ("optional", bool true)];
178178- [("name", string "slug_ent"); ("type", string "string"); ("optional", bool true)];
179179- [("name", string "words"); ("type", string "int32"); ("optional", bool true)];
180180- ]);
181181- ("default_sorting_field", string "date_timestamp");
182182- ]
183183-184184-(** TODO:claude Pretty-print a note with ANSI formatting *)
185185-let pp ppf n =
186186- let open Fmt in
187187- pf ppf "@[<v>";
188188- pf ppf "%a: %a@," (styled `Bold string) "Type" (styled `Cyan string) "Note";
189189- pf ppf "%a: %a@," (styled `Bold string) "Slug" string (slug n);
190190- pf ppf "%a: %a@," (styled `Bold string) "Title" string (title n);
191191- let (year, month, day) = date n in
192192- pf ppf "%a: %04d-%02d-%02d@," (styled `Bold string) "Date" year month day;
193193- (match n.updated with
194194- | Some (y, m, d) -> pf ppf "%a: %04d-%02d-%02d@," (styled `Bold string) "Updated" y m d
195195- | None -> ());
196196- pf ppf "%a: %b@," (styled `Bold string) "Draft" (draft n);
197197- pf ppf "%a: %b@," (styled `Bold string) "Index Page" n.index_page;
198198- pf ppf "%a: %b@," (styled `Bold string) "Perma" (perma n);
199199- (match doi n with
200200- | Some d -> pf ppf "%a: %a@," (styled `Bold string) "DOI" string d
201201- | None -> ());
202202- (match synopsis n with
203203- | Some syn -> pf ppf "%a: %a@," (styled `Bold string) "Synopsis" string syn
204204- | None -> ());
205205- (match titleimage n with
206206- | Some img -> pf ppf "%a: %a@," (styled `Bold string) "Title Image" string img
207207- | None -> ());
208208- (match n.via with
209209- | Some (label, url) ->
210210- if label <> "" then
211211- pf ppf "%a: %a (%a)@," (styled `Bold string) "Via" string label string url
212212- else
213213- pf ppf "%a: %a@," (styled `Bold string) "Via" string url
214214- | None -> ());
215215- let t = tags n in
216216- if t <> [] then
217217- pf ppf "%a: @[<h>%a@]@," (styled `Bold string) "Tags" (list ~sep:comma string) t;
218218- (match sidebar n with
219219- | Some sb ->
220220- pf ppf "@,";
221221- pf ppf "%a:@," (styled `Bold string) "Sidebar";
222222- pf ppf "%a@," string sb
223223- | None -> ());
224224- let bd = body n in
225225- if bd <> "" then begin
226226- pf ppf "@,";
227227- pf ppf "%a:@," (styled `Bold string) "Body";
228228- pf ppf "%a@," string bd;
229229- end;
230230- pf ppf "@]"
-49
stack/bushel/lib/note.mli
···11-type t =
22- { title : string
33- ; date : Ptime.date
44- ; slug : string
55- ; body : string
66- ; tags : string list
77- ; draft : bool
88- ; updated : Ptime.date option
99- ; sidebar : string option
1010- ; index_page : bool
1111- ; perma : bool
1212- ; doi : string option
1313- ; synopsis: string option
1414- ; titleimage: string option
1515- ; via : (string * string) option
1616- ; slug_ent : string option
1717- ; source : string option
1818- ; url : string option
1919- ; author : string option
2020- ; category : string option
2121- }
2222-2323-type ts = t list
2424-2525-val link : t -> [> `Ext of string * string | `Local of string ]
2626-val origdate : t -> Ptime.t
2727-val date : t -> Ptime.date
2828-val datetime : t -> Ptime.t
2929-val compare : t -> t -> int
3030-val slug : t -> string
3131-val body : t -> string
3232-val title : t -> string
3333-val draft : t -> bool
3434-val perma : t -> bool
3535-val doi : t -> string option
3636-val synopsis : t -> string option
3737-val titleimage : t -> string option
3838-val slug_ent : t -> string option
3939-val source : t -> string option
4040-val url : t -> string option
4141-val author : t -> string option
4242-val category : t -> string option
4343-val tags : t -> string list
4444-val sidebar : t -> string option
4545-val lookup : string -> t list -> t
4646-val words : t -> int
4747-val of_md : string -> t
4848-val typesense_schema : Ezjsonm.value
4949-val pp : Format.formatter -> t -> unit
-373
stack/bushel/lib/paper.ml
···11-module J = Ezjsonm
22-33-type paper = Ezjsonm.value
44-55-type t =
66- { slug : string
77- ; ver : string
88- ; paper : paper
99- ; abstract : string
1010- ; latest : bool
1111- }
1212-1313-type ts = t list
1414-1515-let key y k = J.find y [ k ]
1616-1717-let slugs ts =
1818- List.fold_left (fun acc t -> if List.mem t.slug acc then acc else t.slug :: acc) [] ts
1919-;;
2020-2121-let slug { slug; _ } = slug
2222-let title { paper; _ } : string = key paper "title" |> J.get_string
2323-let authors { paper; _ } : string list = key paper "author" |> J.get_list J.get_string
2424-2525-let project_slugs { paper; _ } : string list =
2626- try key paper "projects" |> J.get_list J.get_string with
2727- | _ -> []
2828-;;
2929-3030-let slides { paper; _ } : string list =
3131- try key paper "slides" |> J.get_list J.get_string with
3232- | _ -> []
3333-;;
3434-3535-let bibtype { paper; _ } : string = key paper "bibtype" |> J.get_string
3636-3737-let journal { paper; _ } =
3838- try key paper "journal" |> J.get_string with
3939- | Not_found ->
4040- failwith
4141- (Printf.sprintf "no journal found for %s\n%!" (Ezjsonm.value_to_string paper))
4242-;;
4343-4444-(** TODO:claude Helper to extract raw JSON *)
4545-let raw_json { paper; _ } = paper
4646-4747-let doi { paper; _ } =
4848- try Some (key paper "doi" |> J.get_string) with
4949- | _ -> None
5050-;;
5151-5252-let volume { paper; _ } =
5353- try Some (key paper "volume" |> J.get_string) with
5454- | _ -> None
5555-;;
5656-5757-let video { paper; _ } =
5858- try Some (key paper "video" |> J.get_string) with
5959- | _ -> None
6060-;;
6161-6262-let issue { paper; _ } =
6363- try Some (key paper "number" |> J.get_string) with
6464- | _ -> None
6565-;;
6666-6767-let url { paper; _ } =
6868- try Some (key paper "url" |> J.get_string) with
6969- | _ -> None
7070-;;
7171-7272-let pages { paper; _ } = try key paper "pages" |> J.get_string with _ -> ""
7373-let abstract { abstract; _ } = abstract
7474-7575-let institution { paper; _ } =
7676- try key paper "institution" |> J.get_string with
7777- | Not_found ->
7878- failwith
7979- (Printf.sprintf "no institution found for %s\n%!" (Ezjsonm.value_to_string paper))
8080-;;
8181-8282-let number { paper; _ } =
8383- try Some (key paper "number" |> J.get_string) with
8484- | Not_found -> None
8585-;;
8686-8787-let editor { paper; _ } = key paper "editor" |> J.get_string
8888-let isbn { paper; _ } = key paper "isbn" |> J.get_string
8989-let bib { paper; _ } = key paper "bib" |> J.get_string
9090-let year { paper; _ } = key paper "year" |> J.get_string |> int_of_string
9191-9292-let publisher { paper; _ } =
9393- try key paper "publisher" |> J.get_string with
9494- | Not_found -> ""
9595-;;
9696-9797-let booktitle { paper; _ } =
9898- let r = key paper "booktitle" |> J.get_string |> Bytes.of_string in
9999- Bytes.set r 0 (Char.lowercase_ascii (Bytes.get r 0));
100100- String.of_bytes r
101101-;;
102102-103103-let date { paper; _ } =
104104- let m =
105105- try
106106- match String.lowercase_ascii (key paper "month" |> J.get_string) with
107107- | "jan" -> 1
108108- | "feb" -> 2
109109- | "mar" -> 3
110110- | "apr" -> 4
111111- | "may" -> 5
112112- | "jun" -> 6
113113- | "jul" -> 7
114114- | "aug" -> 8
115115- | "sep" -> 9
116116- | "oct" -> 10
117117- | "nov" -> 11
118118- | "dec" -> 12
119119- | _ -> 1
120120- with
121121- | Not_found -> 1
122122- in
123123- let y =
124124- try key paper "year" |> J.get_string |> int_of_string with
125125- | Not_found ->
126126- failwith (Printf.sprintf "no year found for %s" (Ezjsonm.value_to_string paper))
127127- in
128128- y, m, 1
129129-;;
130130-131131-let datetime p = Option.get @@ Ptime.of_date @@ date p
132132-133133-let compare p2 p1 =
134134- let d1 =
135135- Ptime.of_date
136136- (try date p1 with
137137- | _ -> 1977, 1, 1)
138138- |> Option.get
139139- in
140140- let d2 =
141141- Ptime.of_date
142142- (try date p2 with
143143- | _ -> 1977, 1, 1)
144144- |> Option.get
145145- in
146146- Ptime.compare d1 d2
147147-;;
148148-149149-let get_papers ~slug ts =
150150- List.filter (fun p -> p.slug = slug && p.latest <> true) ts |> List.sort compare
151151-;;
152152-153153-let read_file file = In_channel.(with_open_bin file input_all)
154154-155155-let of_md ~slug ~ver fname =
156156- (* TODO fix Jekyll_post to not error on no date *)
157157- let fname' = "2000-01-01-" ^ Filename.basename fname in
158158- match Jekyll_post.of_string ~fname:fname' (read_file fname) with
159159- | Error (`Msg m) -> failwith ("paper_of_md: " ^ m)
160160- | Ok jp ->
161161- let fields = jp.Jekyll_post.fields |> Jekyll_format.fields_to_yaml in
162162- let { Jekyll_post.body; _ } = jp in
163163- { slug; ver; abstract = body; paper = fields; latest = false }
164164-;;
165165-166166-let tv (l : t list) =
167167- let h = Hashtbl.create 7 in
168168- List.iter
169169- (fun { slug; ver; _ } ->
170170- match Hashtbl.find_opt h slug with
171171- | None -> Hashtbl.add h slug [ ver ]
172172- | Some l ->
173173- let l = ver :: l in
174174- let l = List.sort Stdlib.compare l in
175175- Hashtbl.replace h slug l)
176176- l;
177177- List.map
178178- (fun p ->
179179- let latest = Hashtbl.find h p.slug |> List.rev |> List.hd in
180180- let latest = p.ver = latest in
181181- { p with latest })
182182- l
183183-;;
184184-185185-let lookup ts slug = List.find_opt (fun t -> t.slug = slug && t.latest) ts
186186-187187-let tag_of_bibtype bt =
188188- match String.lowercase_ascii bt with
189189- | "article" -> "journal"
190190- | "inproceedings" -> "conference"
191191- | "techreport" -> "report"
192192- | "misc" -> "preprint"
193193- | "book" -> "book"
194194- | x -> x
195195-;;
196196-197197-let tags { paper; _ } =
198198- let tags f =
199199- try key paper f |> J.get_list J.get_string with
200200- | _ -> []
201201- in
202202- let core = tags "tags" in
203203- let extra = tags "keywords" in
204204- let projects = tags "projects" in
205205- let ty = [ key paper "bibtype" |> J.get_string |> tag_of_bibtype ] in
206206- List.flatten [ core; extra; ty; projects ]
207207-;;
208208-209209-let best_url p =
210210- if Sys.file_exists (Printf.sprintf "static/papers/%s.pdf" (slug p))
211211- then Some (Printf.sprintf "/papers/%s.pdf" (slug p))
212212- else url p
213213-;;
214214-215215-(** TODO:claude Classification types for papers *)
216216-type classification = Full | Short | Preprint
217217-218218-let string_of_classification = function
219219- | Full -> "full"
220220- | Short -> "short"
221221- | Preprint -> "preprint"
222222-223223-let classification_of_string = function
224224- | "full" -> Full
225225- | "short" -> Short
226226- | "preprint" -> Preprint
227227- | _ -> Full (* default to full if unknown *)
228228-229229-(** TODO:claude Get classification from paper metadata, with fallback to heuristic *)
230230-let classification { paper; _ } =
231231- try
232232- key paper "classification" |> J.get_string |> classification_of_string
233233- with _ ->
234234- (* Fallback to heuristic classification based on venue/bibtype/title *)
235235- let bibtype = try key paper "bibtype" |> J.get_string with _ -> "" in
236236- let journal = try key paper "journal" |> J.get_string |> String.lowercase_ascii with _ -> "" in
237237- let booktitle = try key paper "booktitle" |> J.get_string |> String.lowercase_ascii with _ -> "" in
238238- let title_str = try key paper "title" |> J.get_string |> String.lowercase_ascii with _ -> "" in
239239-240240- (* Helper function to check if text contains any of the patterns *)
241241- let contains_any text patterns =
242242- List.exists (fun pattern ->
243243- let regex = Re.Perl.compile_pat ~opts:[`Caseless] pattern in
244244- Re.execp regex text
245245- ) patterns
246246- in
247247-248248- (* Check for preprint indicators *)
249249- let bibtype_lower = String.lowercase_ascii bibtype in
250250- if contains_any journal ["arxiv"] || contains_any booktitle ["arxiv"] || bibtype_lower = "misc" || bibtype_lower = "techreport"
251251- then Preprint
252252- (* Check for workshop/short paper indicators including in title *)
253253- else if contains_any journal ["workshop"; "wip"; "poster"; "demo"; "hotdep"; "short"] ||
254254- contains_any booktitle ["workshop"; "wip"; "poster"; "demo"; "hotdep"; "short"] ||
255255- contains_any title_str ["poster"]
256256- then Short
257257- (* Default to full paper (journal or conference) *)
258258- else Full
259259-260260-(** TODO:claude Check if paper is marked as selected *)
261261-let selected { paper; _ } =
262262- try
263263- let keys = J.get_dict paper in
264264- match List.assoc_opt "selected" keys with
265265- | Some (`Bool true) -> true
266266- | Some (`String "true") -> true
267267- | _ -> false
268268- with _ -> false
269269-270270-(** TODO:claude Get note field from paper metadata *)
271271-let note { paper; _ } =
272272- try
273273- let keys = J.get_dict paper in
274274- match List.assoc_opt "note" keys with
275275- | Some note_json -> Some (J.get_string note_json)
276276- | None -> None
277277- with _ -> None
278278-279279-(* TODO:claude *)
280280-let to_yaml ?abstract ~ver:_ json_data =
281281- (* Don't add version - it's inferred from filename *)
282282- let frontmatter = Yaml.to_string_exn json_data in
283283- match abstract with
284284- | Some abs ->
285285- (* Trim leading/trailing whitespace and normalize blank lines *)
286286- let trimmed_abs = String.trim abs in
287287- let normalized_abs =
288288- (* Replace 3+ consecutive newlines with exactly 2 newlines *)
289289- Re.replace_string (Re.compile (Re.seq [Re.char '\n'; Re.char '\n'; Re.rep1 (Re.char '\n')])) ~by:"\n\n" trimmed_abs
290290- in
291291- if normalized_abs = "" then
292292- Printf.sprintf "---\n%s---\n" frontmatter
293293- else
294294- Printf.sprintf "---\n%s---\n\n%s\n" frontmatter normalized_abs
295295- | None -> Printf.sprintf "---\n%s---\n" frontmatter
296296-297297-(* TODO:claude *)
298298-let typesense_schema =
299299- let open Ezjsonm in
300300- dict [
301301- ("name", string "papers");
302302- ("fields", list (fun d -> dict d) [
303303- [("name", string "id"); ("type", string "string")];
304304- [("name", string "title"); ("type", string "string")];
305305- [("name", string "authors"); ("type", string "string[]")];
306306- [("name", string "abstract"); ("type", string "string")];
307307- [("name", string "date"); ("type", string "string")];
308308- [("name", string "date_timestamp"); ("type", string "int64")];
309309- [("name", string "tags"); ("type", string "string[]"); ("facet", bool true)];
310310- [("name", string "doi"); ("type", string "string[]"); ("optional", bool true)];
311311- [("name", string "arxiv_id"); ("type", string "string"); ("optional", bool true)];
312312- [("name", string "pdf_url"); ("type", string "string[]"); ("optional", bool true)];
313313- [("name", string "thumbnail_url"); ("type", string "string"); ("optional", bool true)];
314314- [("name", string "journal"); ("type", string "string[]"); ("optional", bool true)];
315315- [("name", string "related_projects"); ("type", string "string[]"); ("optional", bool true)];
316316- [("name", string "related_talks"); ("type", string "string[]"); ("optional", bool true)];
317317- ]);
318318- ("default_sorting_field", string "date_timestamp");
319319- ]
320320-321321-(** TODO:claude Pretty-print a paper with ANSI formatting *)
322322-let pp ppf p =
323323- let open Fmt in
324324- pf ppf "@[<v>";
325325- pf ppf "%a: %a@," (styled `Bold string) "Type" (styled `Cyan string) "Paper";
326326- pf ppf "%a: %a@," (styled `Bold string) "Slug" string (slug p);
327327- pf ppf "%a: %a@," (styled `Bold string) "Version" string p.ver;
328328- pf ppf "%a: %a@," (styled `Bold string) "Title" string (title p);
329329- pf ppf "%a: @[<h>%a@]@," (styled `Bold string) "Authors" (list ~sep:comma string) (authors p);
330330- pf ppf "%a: %a@," (styled `Bold string) "Year" int (year p);
331331- pf ppf "%a: %a@," (styled `Bold string) "Bibtype" string (bibtype p);
332332- (match doi p with
333333- | Some d -> pf ppf "%a: %a@," (styled `Bold string) "DOI" string d
334334- | None -> ());
335335- (match url p with
336336- | Some u -> pf ppf "%a: %a@," (styled `Bold string) "URL" string u
337337- | None -> ());
338338- (match video p with
339339- | Some v -> pf ppf "%a: %a@," (styled `Bold string) "Video" string v
340340- | None -> ());
341341- let projs = project_slugs p in
342342- if projs <> [] then
343343- pf ppf "%a: @[<h>%a@]@," (styled `Bold string) "Projects" (list ~sep:comma string) projs;
344344- let sl = slides p in
345345- if sl <> [] then
346346- pf ppf "%a: @[<h>%a@]@," (styled `Bold string) "Slides" (list ~sep:comma string) sl;
347347- (match bibtype p with
348348- | "article" ->
349349- pf ppf "%a: %a@," (styled `Bold string) "Journal" string (journal p);
350350- (match volume p with
351351- | Some vol -> pf ppf "%a: %a@," (styled `Bold string) "Volume" string vol
352352- | None -> ());
353353- (match issue p with
354354- | Some iss -> pf ppf "%a: %a@," (styled `Bold string) "Issue" string iss
355355- | None -> ());
356356- let pgs = pages p in
357357- if pgs <> "" then
358358- pf ppf "%a: %a@," (styled `Bold string) "Pages" string pgs;
359359- | "inproceedings" ->
360360- pf ppf "%a: %a@," (styled `Bold string) "Booktitle" string (booktitle p);
361361- let pgs = pages p in
362362- if pgs <> "" then
363363- pf ppf "%a: %a@," (styled `Bold string) "Pages" string pgs;
364364- | "techreport" ->
365365- pf ppf "%a: %a@," (styled `Bold string) "Institution" string (institution p);
366366- (match number p with
367367- | Some num -> pf ppf "%a: %a@," (styled `Bold string) "Number" string num
368368- | None -> ());
369369- | _ -> ());
370370- pf ppf "@,";
371371- pf ppf "%a:@," (styled `Bold string) "Abstract";
372372- pf ppf "%a@," (styled `Faint string) (abstract p);
373373- pf ppf "@]"
-55
stack/bushel/lib/paper.mli
···11-type paper
22-33-type t =
44- { slug : string
55- ; ver : string
66- ; paper : paper
77- ; abstract : string
88- ; latest : bool
99- }
1010-1111-type ts = t list
1212-1313-val tv : t list -> ts
1414-val slug : t -> string
1515-val title : t -> string
1616-val authors : t -> string list
1717-val project_slugs : t -> string list
1818-val slides : t -> string list
1919-val bibtype : t -> string
2020-val journal : t -> string
2121-val raw_json : t -> Ezjsonm.value
2222-val doi : t -> string option
2323-val volume : t -> string option
2424-val video : t -> string option
2525-val issue : t -> string option
2626-val url : t -> string option
2727-val best_url : t -> string option
2828-val pages : t -> string
2929-val abstract : t -> string
3030-val institution : t -> string
3131-val number : t -> string option
3232-val editor : t -> string
3333-val isbn : t -> string
3434-val bib : t -> string
3535-val year : t -> int
3636-val publisher : t -> string
3737-val booktitle : t -> string
3838-val tags : t -> string list
3939-val date : t -> int * int * int
4040-val datetime : t -> Ptime.t
4141-val compare : t -> t -> int
4242-val get_papers : slug:string -> ts -> ts
4343-val slugs : ts -> string list
4444-val lookup : ts -> string -> t option
4545-val of_md : slug:string -> ver:string -> string -> t
4646-val to_yaml : ?abstract:string -> ver:string -> Ezjsonm.value -> string
4747-val typesense_schema : Ezjsonm.value
4848-4949-type classification = Full | Short | Preprint
5050-val string_of_classification : classification -> string
5151-val classification_of_string : string -> classification
5252-val classification : t -> classification
5353-val selected : t -> bool
5454-val note : t -> string option
5555-val pp : Format.formatter -> t -> unit
-100
stack/bushel/lib/project.ml
···11-type t =
22- { slug : string
33- ; title : string
44- ; start : int (* year *)
55- ; finish : int option
66- ; tags : string list
77- ; ideas : string
88- ; body : string
99- }
1010-1111-type ts = t list
1212-1313-let tags p = p.tags
1414-1515-let compare a b =
1616- match compare a.start b.start with
1717- | 0 -> compare b.finish a.finish
1818- | n -> n
1919-;;
2020-2121-let title { title; _ } = title
2222-let body { body; _ } = body
2323-let ideas { ideas; _ } = ideas
2424-2525-let of_md fname =
2626- match Jekyll_post.of_string ~fname (Util.read_file fname) with
2727- | Error (`Msg m) -> failwith ("Project.of_file: " ^ m)
2828- | Ok jp ->
2929- let fields = jp.Jekyll_post.fields in
3030- let { Jekyll_post.title; date; slug; body; _ } = jp in
3131- let (start, _, _), _ = Ptime.to_date_time date in
3232- let finish =
3333- match Jekyll_format.find "finish" fields with
3434- | Some (`String date) ->
3535- let date = Jekyll_format.parse_date_exn date in
3636- let (finish, _, _), _ = Ptime.to_date_time date in
3737- Some finish
3838- | _ -> None
3939- in
4040- let ideas =
4141- match Jekyll_format.find "ideas" fields with
4242- | Some (`String e) -> e
4343- | _ -> failwith ("no ideas key in " ^ fname)
4444- in
4545- let tags =
4646- match Jekyll_format.find "tags" fields with
4747- | Some (`A tags) -> List.map Yaml.Util.to_string_exn tags
4848- | _ -> []
4949- in
5050- { slug; title; start; finish; ideas; tags; body }
5151-;;
5252-5353-let lookup projects slug = List.find_opt (fun p -> p.slug = slug) projects
5454-5555-(* TODO:claude *)
5656-let typesense_schema =
5757- let open Ezjsonm in
5858- dict [
5959- ("name", string "projects");
6060- ("fields", list (fun d -> dict d) [
6161- [("name", string "id"); ("type", string "string")];
6262- [("name", string "title"); ("type", string "string")];
6363- [("name", string "description"); ("type", string "string")];
6464- [("name", string "start_year"); ("type", string "int32")];
6565- [("name", string "finish_year"); ("type", string "int32"); ("optional", bool true)];
6666- [("name", string "date"); ("type", string "string")];
6767- [("name", string "date_timestamp"); ("type", string "int64")];
6868- [("name", string "tags"); ("type", string "string[]"); ("facet", bool true)];
6969- [("name", string "repository_url"); ("type", string "string"); ("optional", bool true)];
7070- [("name", string "homepage_url"); ("type", string "string"); ("optional", bool true)];
7171- [("name", string "languages"); ("type", string "string[]"); ("facet", bool true); ("optional", bool true)];
7272- [("name", string "license"); ("type", string "string"); ("facet", bool true); ("optional", bool true)];
7373- [("name", string "status"); ("type", string "string"); ("facet", bool true); ("optional", bool true)];
7474- [("name", string "related_papers"); ("type", string "string[]"); ("optional", bool true)];
7575- [("name", string "related_talks"); ("type", string "string[]"); ("optional", bool true)];
7676- [("name", string "body"); ("type", string "string"); ("optional", bool true)];
7777- [("name", string "ideas"); ("type", string "string"); ("optional", bool true)];
7878- ]);
7979- ("default_sorting_field", string "date_timestamp");
8080- ]
8181-8282-(** TODO:claude Pretty-print a project with ANSI formatting *)
8383-let pp ppf p =
8484- let open Fmt in
8585- pf ppf "@[<v>";
8686- pf ppf "%a: %a@," (styled `Bold string) "Type" (styled `Cyan string) "Project";
8787- pf ppf "%a: %a@," (styled `Bold string) "Slug" string p.slug;
8888- pf ppf "%a: %a@," (styled `Bold string) "Title" string (title p);
8989- pf ppf "%a: %d@," (styled `Bold string) "Start" p.start;
9090- (match p.finish with
9191- | Some year -> pf ppf "%a: %d@," (styled `Bold string) "Finish" year
9292- | None -> pf ppf "%a: ongoing@," (styled `Bold string) "Finish");
9393- let t = tags p in
9494- if t <> [] then
9595- pf ppf "%a: @[<h>%a@]@," (styled `Bold string) "Tags" (list ~sep:comma string) t;
9696- pf ppf "%a: %a@," (styled `Bold string) "Ideas" string (ideas p);
9797- pf ppf "@,";
9898- pf ppf "%a:@," (styled `Bold string) "Body";
9999- pf ppf "%a@," string (body p);
100100- pf ppf "@]"
-21
stack/bushel/lib/project.mli
···11-type t =
22- { slug : string
33- ; title : string
44- ; start : int
55- ; finish : int option
66- ; tags : string list
77- ; ideas : string
88- ; body : string
99- }
1010-1111-type ts = t list
1212-1313-val title : t -> string
1414-val body : t -> string
1515-val ideas : t -> string
1616-val lookup : t list -> string -> t option
1717-val tags : t -> string list
1818-val compare : t -> t -> int
1919-val of_md : string -> t
2020-val typesense_schema : Ezjsonm.value
2121-val pp : Format.formatter -> t -> unit
-44
stack/bushel/lib/srcsetter.ml
···11-module MS = Map.Make (String)
22-33-type t =
44- { name : string
55- ; slug : string
66- ; origin : string
77- ; dims : int * int
88- ; variants : (int * int) MS.t
99- }
1010-1111-type ts = t list
1212-1313-let v name slug origin variants dims = { name; slug; origin; variants; dims }
1414-let slug { slug; _ } = slug
1515-let origin { origin; _ } = origin
1616-let name { name; _ } = name
1717-let dims { dims; _ } = dims
1818-let variants { variants; _ } = variants
1919-2020-let dims_json_t =
2121- let open Jsont in
2222- let dec x y = x, y in
2323- let enc (w, h) = function
2424- | 0 -> w
2525- | _ -> h
2626- in
2727- t2 ~dec ~enc uint16
2828-;;
2929-3030-let json_t =
3131- let open Jsont in
3232- let open Jsont.Object in
3333- map ~kind:"Entry" v
3434- |> mem "name" string ~enc:name
3535- |> mem "slug" string ~enc:slug
3636- |> mem "origin" string ~enc:origin
3737- |> mem "variants" (as_string_map dims_json_t) ~enc:variants
3838- |> mem "dims" dims_json_t ~enc:dims
3939- |> finish
4040-;;
4141-4242-let list = Jsont.list json_t
4343-let list_to_json es = Jsont_bytesrw.encode_string list ~format:Jsont.Indent es
4444-let list_of_json = Jsont_bytesrw.decode_string list
-21
stack/bushel/lib/srcsetter.mli
···11-module MS : Map.S with type key = string
22-33-type t =
44- { name : string
55- ; slug : string
66- ; origin : string
77- ; dims : int * int
88- ; variants : (int * int) MS.t
99- }
1010-1111-type ts = t list
1212-1313-val origin : t -> string
1414-val slug : t -> string
1515-val name : t -> string
1616-val dims : t -> int * int
1717-val variants : t -> (int * int) MS.t
1818-val list_to_json : t list -> (string, string) result
1919-val list_of_json : string -> (t list, string) result
2020-val json_t : t Jsont.t
2121-val list : t list Jsont.t
-114
stack/bushel/lib/tags.ml
···11-open Entry
22-33-type t =
44- [ `Slug of string (* :foo points to the specific slug foo *)
55- | `Contact of string (* @foo points to contact foo *)
66- | `Set of string (* #papers points to all Paper entries *)
77- | `Text of string (* foo points to a free text "foo" *)
88- | `Year of int (* a number between 1900--2100 is interpreted as a year *)
99- ]
1010-1111-let is_text = function
1212- | `Text _ -> true
1313- | _ -> false
1414-;;
1515-1616-let is_slug = function
1717- | `Slug _ -> true
1818- | _ -> false
1919-;;
2020-2121-let is_set = function
2222- | `Set _ -> true
2323- | _ -> false
2424-;;
2525-2626-let is_year = function
2727- | `Year _ -> true
2828- | _ -> false
2929-;;
3030-3131-let of_string s : t =
3232- if String.length s < 2 then invalid_arg ("Tag.of_string: " ^ s);
3333- match s.[0] with
3434- | ':' ->
3535- let slug = String.sub s 1 (String.length s - 1) in
3636- `Slug slug
3737- | '@' -> failwith "TODO add contacts to entries"
3838- | '#' ->
3939- let cl = String.sub s 1 (String.length s - 1) in
4040- `Set cl
4141- | _ ->
4242- (try
4343- let x = int_of_string s in
4444- if x > 1900 && x < 2100 then `Year x else `Text s
4545- with
4646- | _ -> `Text s)
4747-;;
4848-4949-let of_string_list l = List.map of_string l
5050-5151-let to_string = function
5252- | `Slug t -> ":" ^ t
5353- | `Contact c -> "@" ^ c
5454- | `Set s -> "#" ^ s
5555- | `Text t -> t
5656- | `Year y -> string_of_int y
5757-;;
5858-5959-let to_raw_string = function
6060- | `Slug t -> t
6161- | `Contact c -> c
6262- | `Set s -> s
6363- | `Text t -> t
6464- | `Year y -> string_of_int y
6565-;;
6666-6767-let pp ppf t = Fmt.string ppf (to_string t)
6868-6969-let tags_of_ent _entries ent : t list =
7070- match ent with
7171- | `Paper p -> of_string_list @@ Paper.tags p
7272- | `Video v -> of_string_list v.Video.tags
7373- | `Project p -> of_string_list @@ Project.tags p
7474- | `Note n -> of_string_list @@ Note.tags n
7575- | `Idea i -> of_string_list i.Idea.tags
7676-;;
7777-7878-let mentions tags =
7979- List.filter
8080- (function
8181- | `Contact _ | `Slug _ -> true
8282- | _ -> false)
8383- tags
8484-;;
8585-8686-let mention_entries entries tags =
8787- let lk t =
8888- try Some (lookup_exn entries t)
8989- with Not_found -> Printf.eprintf "mention_entries not found: %s\n%!" t; None
9090- in
9191- List.filter_map
9292- (function
9393- | `Slug t -> lk t
9494- | _ -> None)
9595- tags
9696-;;
9797-9898-let count_tags ?h fn vs =
9999- let h =
100100- match h with
101101- | Some h -> h
102102- | None -> Hashtbl.create 42
103103- in
104104- List.iter
105105- (fun ent ->
106106- List.iter
107107- (fun tag ->
108108- match Hashtbl.find_opt h tag with
109109- | Some num -> Hashtbl.replace h tag (num + 1)
110110- | None -> Hashtbl.add h tag 1)
111111- (fn ent))
112112- vs;
113113- h
114114-;;
-25
stack/bushel/lib/tags.mli
···11-type t =
22- [ `Contact of string
33- | `Set of string
44- | `Slug of string
55- | `Text of string
66- | `Year of int
77- ]
88-99-val is_text : t -> bool
1010-val is_set : t -> bool
1111-val is_slug : t -> bool
1212-val is_year : t -> bool
1313-val of_string : string -> t
1414-val to_string : t -> string
1515-val to_raw_string : t -> string
1616-val pp : Format.formatter -> t -> unit
1717-val mention_entries : Entry.t -> t list -> Entry.entry list
1818-val tags_of_ent : Entry.t -> Entry.entry -> t list
1919-val mentions : t list -> t list
2020-2121-val count_tags
2222- : ?h:('a, int) Hashtbl.t
2323- -> ('b -> 'a list)
2424- -> 'b list
2525- -> ('a, int) Hashtbl.t
-527
stack/bushel/lib/typesense.ml
···11-(** Typesense API client for Bushel *)
22-33-type config = {
44- endpoint : string;
55- api_key : string;
66- openai_key : string;
77-}
88-99-type error =
1010- | Http_error of int * string
1111- | Json_error of string
1212- | Connection_error of string
1313-1414-let pp_error fmt = function
1515- | Http_error (code, msg) -> Fmt.pf fmt "HTTP %d: %s" code msg
1616- | Json_error msg -> Fmt.pf fmt "JSON error: %s" msg
1717- | Connection_error msg -> Fmt.pf fmt "Connection error: %s" msg
1818-1919-(** Create authentication headers for Typesense API *)
2020-let auth_headers api_key =
2121- Requests.Headers.empty
2222- |> Requests.Headers.set "X-TYPESENSE-API-KEY" api_key
2323- |> Requests.Headers.set "Content-Type" "application/json"
2424-2525-(** Make HTTP request to Typesense API *)
2626-let make_request ~sw ~env ?(meth=`GET) ?(body="") config path =
2727- let uri = Uri.of_string (config.endpoint ^ path) in
2828- let headers = auth_headers config.api_key in
2929- let body = if body = "" then None else Some (Requests.Body.of_string Requests.Mime.json body) in
3030-3131- try
3232- let response = Requests.One.request ~sw
3333- ~clock:env#clock ~net:env#net
3434- ?body
3535- ~headers
3636- ~method_:meth
3737- (Uri.to_string uri)
3838- in
3939-4040- let status = Requests.Response.status_code response in
4141- let body_flow = Requests.Response.body response in
4242- let body_str = Eio.Flow.read_all body_flow in
4343-4444- if status >= 200 && status < 300 then
4545- Ok body_str
4646- else
4747- Error (Http_error (status, body_str))
4848- with exn ->
4949- Error (Connection_error (Printexc.to_string exn))
5050-5151-(** Create a collection with given schema *)
5252-let create_collection ~sw ~env config (schema : Ezjsonm.value) =
5353- let body = Ezjsonm.value_to_string schema in
5454- make_request ~sw ~env ~meth:`POST ~body config "/collections"
5555-5656-(** Check if collection exists *)
5757-let collection_exists ~sw ~env config name =
5858- let result = make_request ~sw ~env config ("/collections/" ^ name) in
5959- match result with
6060- | Ok _ -> true
6161- | Error (Http_error (404, _)) -> false
6262- | Error _ -> false
6363-6464-(** Delete a collection *)
6565-let delete_collection ~sw ~env config name =
6666- make_request ~sw ~env ~meth:`DELETE config ("/collections/" ^ name)
6767-6868-(** Upload documents to a collection in batch *)
6969-let upload_documents ~sw ~env config collection_name (documents : Ezjsonm.value list) =
7070- let jsonl_lines = List.map (fun doc -> Ezjsonm.value_to_string doc) documents in
7171- let body = String.concat "\n" jsonl_lines in
7272- make_request ~sw ~env ~meth:`POST ~body config
7373- (Printf.sprintf "/collections/%s/documents/import?action=upsert" collection_name)
7474-7575-7676-(** Convert Bushel objects to Typesense documents *)
7777-7878-(** Helper function to truncate long strings for embedding *)
7979-let truncate_for_embedding ?(max_chars=20000) text =
8080- if String.length text <= max_chars then text
8181- else String.sub text 0 max_chars
8282-8383-(** Helper function to convert Ptime to Unix timestamp *)
8484-let ptime_to_timestamp ptime =
8585- let span = Ptime.to_span ptime in
8686- let seconds = Ptime.Span.to_int_s span in
8787- match seconds with
8888- | Some s -> Int64.of_int s
8989- | None -> 0L
9090-9191-(** Helper function to convert date tuple to Unix timestamp *)
9292-let date_to_timestamp (year, month, day) =
9393- match Ptime.of_date (year, month, day) with
9494- | Some ptime -> ptime_to_timestamp ptime
9595- | None -> 0L
9696-9797-(** Resolve author handles to full names in a list *)
9898-let resolve_author_list contacts authors =
9999- List.map (fun author ->
100100- (* Strip '@' prefix if present *)
101101- let handle =
102102- if String.length author > 0 && author.[0] = '@' then
103103- String.sub author 1 (String.length author - 1)
104104- else
105105- author
106106- in
107107- (* Try to look up as a contact handle *)
108108- match Contact.find_by_handle contacts handle with
109109- | Some contact -> Contact.name contact
110110- | None -> author (* Keep original if not found *)
111111- ) authors
112112-113113-let contact_to_document (contact : Contact.t) =
114114- let open Ezjsonm in
115115- let safe_string_list_from_opt = function
116116- | Some s -> [s]
117117- | None -> []
118118- in
119119- dict [
120120- ("id", string (Contact.handle contact));
121121- ("handle", string (Contact.handle contact));
122122- ("name", string (Contact.name contact));
123123- ("names", list string (Contact.names contact));
124124- ("email", list string (safe_string_list_from_opt (Contact.email contact)));
125125- ("icon", list string (safe_string_list_from_opt (Contact.icon contact)));
126126- ("github", list string (safe_string_list_from_opt (Contact.github contact)));
127127- ("twitter", list string (safe_string_list_from_opt (Contact.twitter contact)));
128128- ("url", list string (safe_string_list_from_opt (Contact.url contact)));
129129- ]
130130-131131-let paper_to_document entries (paper : Paper.t) =
132132- let date_tuple = Paper.date paper in
133133- let contacts = Entry.contacts entries in
134134-135135- (* Helper to extract string arrays from JSON, handling both single strings and arrays *)
136136- let extract_string_array_from_json json_field_name =
137137- try
138138- (* Access the raw JSON from the paper record *)
139139- let paper_json = Paper.raw_json paper in
140140- let value = Ezjsonm.get_dict paper_json |> List.assoc json_field_name in
141141- match value with
142142- | `String s -> [s]
143143- | `A l -> List.filter_map (function `String s -> Some s | _ -> None) l
144144- | _ -> []
145145- with _ -> []
146146- in
147147-148148- (* Resolve author handles to full names *)
149149- let authors = resolve_author_list contacts (Paper.authors paper) in
150150-151151- (* Convert abstract markdown to plain text *)
152152- let abstract = Md.markdown_to_plaintext entries (Paper.abstract paper) |> truncate_for_embedding in
153153-154154- (* Extract publication metadata *)
155155- let bibtype = Paper.bibtype paper in
156156- let metadata =
157157- try
158158- match bibtype with
159159- | "article" -> Printf.sprintf "Journal: %s" (Paper.journal paper)
160160- | "inproceedings" -> Printf.sprintf "Proceedings: %s" (Paper.journal paper)
161161- | "misc" | "techreport" -> Printf.sprintf "Preprint: %s" (Paper.journal paper)
162162- | _ -> Printf.sprintf "%s: %s" (String.capitalize_ascii bibtype) (Paper.journal paper)
163163- with _ -> bibtype
164164- in
165165-166166- (* Get bibtex from raw JSON *)
167167- let bibtex =
168168- try
169169- let paper_json = Paper.raw_json paper in
170170- Ezjsonm.get_dict paper_json
171171- |> List.assoc "bibtex"
172172- |> Ezjsonm.get_string
173173- with _ -> ""
174174- in
175175-176176- let thumbnail_url = Entry.thumbnail entries (`Paper paper) in
177177- Ezjsonm.dict [
178178- ("id", Ezjsonm.string (Paper.slug paper));
179179- ("title", Ezjsonm.string (Paper.title paper));
180180- ("authors", Ezjsonm.list Ezjsonm.string authors);
181181- ("abstract", Ezjsonm.string abstract);
182182- ("metadata", Ezjsonm.string metadata);
183183- ("bibtex", Ezjsonm.string bibtex);
184184- ("date", Ezjsonm.string (let y, m, d = date_tuple in Printf.sprintf "%04d-%02d-%02d" y m d));
185185- ("date_timestamp", Ezjsonm.int64 (date_to_timestamp date_tuple));
186186- ("tags", Ezjsonm.list Ezjsonm.string (Paper.tags paper));
187187- ("doi", Ezjsonm.list Ezjsonm.string (extract_string_array_from_json "doi"));
188188- ("pdf_url", Ezjsonm.list Ezjsonm.string (extract_string_array_from_json "pdf_url"));
189189- ("journal", Ezjsonm.list Ezjsonm.string (extract_string_array_from_json "journal"));
190190- ("related_projects", Ezjsonm.list Ezjsonm.string (Paper.project_slugs paper));
191191- ("thumbnail_url", Ezjsonm.string (Option.value ~default:"" thumbnail_url));
192192- ]
193193-194194-let project_to_document entries (project : Project.t) =
195195- let open Ezjsonm in
196196- (* Use January 1st of start year as the date for sorting *)
197197- let date_timestamp = date_to_timestamp (project.start, 1, 1) in
198198-199199- (* Convert body markdown to plain text *)
200200- let description = Md.markdown_to_plaintext entries (Project.body project) |> truncate_for_embedding in
201201-202202- let thumbnail_url = Entry.thumbnail entries (`Project project) in
203203- dict [
204204- ("id", string project.slug);
205205- ("title", string (Project.title project));
206206- ("description", string description);
207207- ("start", int project.start);
208208- ("finish", option int project.finish);
209209- ("start_year", int project.start);
210210- ("date", string (Printf.sprintf "%04d-01-01" project.start));
211211- ("date_timestamp", int64 date_timestamp);
212212- ("tags", list string (Project.tags project));
213213- ("thumbnail_url", string (Option.value ~default:"" thumbnail_url));
214214- ]
215215-216216-let video_to_document entries (video : Video.t) =
217217- let open Ezjsonm in
218218- let datetime = Video.datetime video in
219219- let safe_string_list_from_opt = function
220220- | Some s -> [s]
221221- | None -> []
222222- in
223223-224224- (* Convert body markdown to plain text *)
225225- let description = Md.markdown_to_plaintext entries (Video.body video) |> truncate_for_embedding in
226226-227227- (* Resolve paper and project slugs to titles *)
228228- let paper_title = match Video.paper video with
229229- | Some slug ->
230230- (match Entry.lookup entries slug with
231231- | Some entry -> Some (Entry.title entry)
232232- | None -> Some slug) (* Fallback to slug if not found *)
233233- | None -> None
234234- in
235235- let project_title = match Video.project video with
236236- | Some slug ->
237237- (match Entry.lookup entries slug with
238238- | Some entry -> Some (Entry.title entry)
239239- | None -> Some slug) (* Fallback to slug if not found *)
240240- | None -> None
241241- in
242242-243243- let thumbnail_url = Entry.thumbnail entries (`Video video) in
244244- dict [
245245- ("id", string (Video.slug video));
246246- ("title", string (Video.title video));
247247- ("description", string description);
248248- ("published_date", string (Ptime.to_rfc3339 datetime));
249249- ("date", string (Ptime.to_rfc3339 datetime));
250250- ("date_timestamp", int64 (ptime_to_timestamp datetime));
251251- ("url", string (Video.url video));
252252- ("uuid", string (Video.uuid video));
253253- ("is_talk", bool (Video.talk video));
254254- ("paper", list string (safe_string_list_from_opt paper_title));
255255- ("project", list string (safe_string_list_from_opt project_title));
256256- ("tags", list string video.tags);
257257- ("thumbnail_url", string (Option.value ~default:"" thumbnail_url));
258258- ]
259259-260260-let note_to_document entries (note : Note.t) =
261261- let open Ezjsonm in
262262- let datetime = Note.datetime note in
263263- let safe_string_list_from_opt = function
264264- | Some s -> [s]
265265- | None -> []
266266- in
267267-268268- (* Convert body markdown to plain text *)
269269- let content = Md.markdown_to_plaintext entries (Note.body note) |> truncate_for_embedding in
270270-271271- let thumbnail_url = Entry.thumbnail entries (`Note note) in
272272- let word_count = Note.words note in
273273- dict [
274274- ("id", string (Note.slug note));
275275- ("title", string (Note.title note));
276276- ("date", string (Ptime.to_rfc3339 datetime));
277277- ("date_timestamp", int64 (ptime_to_timestamp datetime));
278278- ("content", string content);
279279- ("tags", list string (Note.tags note));
280280- ("draft", bool (Note.draft note));
281281- ("synopsis", list string (safe_string_list_from_opt (Note.synopsis note)));
282282- ("thumbnail_url", string (Option.value ~default:"" thumbnail_url));
283283- ("words", int word_count);
284284- ]
285285-286286-let idea_to_document entries (idea : Idea.t) =
287287- let open Ezjsonm in
288288- let contacts = Entry.contacts entries in
289289- (* Use January 1st of the year as the date for sorting *)
290290- let date_timestamp = date_to_timestamp (Idea.year idea, 1, 1) in
291291-292292- (* Convert body markdown to plain text *)
293293- let description = Md.markdown_to_plaintext entries (Idea.body idea) |> truncate_for_embedding in
294294-295295- (* Resolve supervisor and student handles to full names *)
296296- let supervisors = resolve_author_list contacts (Idea.supervisors idea) in
297297- let students = resolve_author_list contacts (Idea.students idea) in
298298-299299- (* Resolve project slug to project title *)
300300- let project_title =
301301- match Entry.lookup entries (Idea.project idea) with
302302- | Some entry -> Entry.title entry
303303- | None -> Idea.project idea (* Fallback to slug if not found *)
304304- in
305305-306306- let thumbnail_url = Entry.thumbnail entries (`Idea idea) in
307307- dict [
308308- ("id", string idea.slug);
309309- ("title", string (Idea.title idea));
310310- ("description", string description);
311311- ("level", string (Idea.level_to_string (Idea.level idea)));
312312- ("project", string project_title);
313313- ("status", string (Idea.status_to_string (Idea.status idea)));
314314- ("year", int (Idea.year idea));
315315- ("date", string (Printf.sprintf "%04d-01-01" (Idea.year idea)));
316316- ("date_timestamp", int64 date_timestamp);
317317- ("supervisors", list string supervisors);
318318- ("students", list string students);
319319- ("tags", list string idea.tags);
320320- ("thumbnail_url", string (Option.value ~default:"" thumbnail_url));
321321- ]
322322-323323-(** Helper function to add embedding field to schema *)
324324-let add_embedding_field_to_schema schema config embedding_from_fields =
325325- let open Ezjsonm in
326326- let fields = get_dict schema |> List.assoc "fields" |> get_list (fun f -> f) in
327327- let embedding_field = dict [
328328- ("name", string "embedding");
329329- ("type", string "float[]");
330330- ("embed", dict [
331331- ("from", list string embedding_from_fields);
332332- ("model_config", dict [
333333- ("model_name", string "openai/text-embedding-3-small");
334334- ("api_key", string config.openai_key);
335335- ]);
336336- ]);
337337- ] in
338338- let updated_fields = fields @ [embedding_field] in
339339- let updated_schema =
340340- List.map (fun (k, v) ->
341341- if k = "fields" then (k, list (fun f -> f) updated_fields)
342342- else (k, v)
343343- ) (get_dict schema)
344344- in
345345- dict updated_schema
346346-347347-(** Upload all bushel objects to their respective collections *)
348348-let upload_all ~sw ~env config entries =
349349- print_string "Uploading bushel data to Typesense\n";
350350-351351- let contacts = Entry.contacts entries in
352352- let papers = Entry.papers entries in
353353- let projects = Entry.projects entries in
354354- let notes = Entry.notes entries in
355355- let videos = Entry.videos entries in
356356- let ideas = Entry.ideas entries in
357357-358358- let collections = [
359359- ("contacts", add_embedding_field_to_schema Contact.typesense_schema config ["name"; "names"], (List.map contact_to_document contacts : Ezjsonm.value list));
360360- ("papers", add_embedding_field_to_schema Paper.typesense_schema config ["title"; "abstract"; "authors"], (List.map (paper_to_document entries) papers : Ezjsonm.value list));
361361- ("videos", add_embedding_field_to_schema Video.typesense_schema config ["title"; "description"], (List.map (video_to_document entries) videos : Ezjsonm.value list));
362362- ("projects", add_embedding_field_to_schema Project.typesense_schema config ["title"; "description"; "tags"], (List.map (project_to_document entries) projects : Ezjsonm.value list));
363363- ("notes", add_embedding_field_to_schema Note.typesense_schema config ["title"; "content"; "tags"], (List.map (note_to_document entries) notes : Ezjsonm.value list));
364364- ("ideas", add_embedding_field_to_schema Idea.typesense_schema config ["title"; "description"; "tags"], (List.map (idea_to_document entries) ideas : Ezjsonm.value list));
365365- ] in
366366-367367- let upload_collection ((name, schema, documents) : string * Ezjsonm.value * Ezjsonm.value list) =
368368- Printf.printf "Processing collection: %s\n%!" name;
369369- let exists = collection_exists ~sw ~env config name in
370370- (if exists then (
371371- Printf.printf "Collection %s exists, deleting...\n%!" name;
372372- let result = delete_collection ~sw ~env config name in
373373- match result with
374374- | Ok _ -> Printf.printf "Deleted collection %s\n%!" name
375375- | Error err ->
376376- let err_str = Fmt.str "%a" pp_error err in
377377- Printf.printf "Failed to delete collection %s: %s\n%!" name err_str
378378- ));
379379- Printf.printf "Creating collection %s with %d documents\n%!" name (List.length documents);
380380- let result = create_collection ~sw ~env config schema in
381381- match result with
382382- | Ok _ ->
383383- Printf.printf "Created collection %s\n%!" name;
384384- if documents = [] then
385385- Printf.printf "No documents to upload for %s\n%!" name
386386- else (
387387- let result = upload_documents ~sw ~env config name documents in
388388- match result with
389389- | Ok response ->
390390- (* Count successes and failures *)
391391- let lines = String.split_on_char '\n' response in
392392- let successes = List.fold_left (fun acc line ->
393393- if String.contains line ':' && Str.string_match (Str.regexp ".*success.*true.*") line 0 then acc + 1 else acc) 0 lines in
394394- let failures = List.fold_left (fun acc line ->
395395- if String.contains line ':' && Str.string_match (Str.regexp ".*success.*false.*") line 0 then acc + 1 else acc) 0 lines in
396396- Printf.printf "Upload results for %s: %d successful, %d failed out of %d total\n%!"
397397- name successes failures (List.length documents);
398398- if failures > 0 then (
399399- Printf.printf "Failed documents in %s:\n%!" name;
400400- let failed_lines = List.filter (fun line -> Str.string_match (Str.regexp ".*success.*false.*") line 0) lines in
401401- List.iter (fun line -> Printf.printf "%s\n%!" line) failed_lines
402402- )
403403- | Error err ->
404404- let err_str = Fmt.str "%a" pp_error err in
405405- Printf.printf "Failed to upload documents to %s: %s\n%!" name err_str
406406- )
407407- | Error err ->
408408- let err_str = Fmt.str "%a" pp_error err in
409409- Printf.printf "Failed to create collection %s: %s\n%!" name err_str
410410- in
411411-412412- List.iter upload_collection collections
413413-414414-(** Re-export search types from Typesense_client *)
415415-type search_result = Typesense_client.search_result = {
416416- id: string;
417417- title: string;
418418- content: string;
419419- score: float;
420420- collection: string;
421421- highlights: (string * string list) list;
422422- document: Ezjsonm.value;
423423-}
424424-425425-type search_response = Typesense_client.search_response = {
426426- hits: search_result list;
427427- total: int;
428428- query_time: float;
429429-}
430430-431431-(** Convert bushel config to client config *)
432432-let to_client_config (config : config) =
433433- Typesense_client.{ endpoint = config.endpoint; api_key = config.api_key }
434434-435435-(** Search a single collection *)
436436-let search_collection ~sw ~env (config : config) collection_name query ?(limit=10) ?(offset=0) () =
437437- let client_config = to_client_config config in
438438- let requests_session = Requests.create ~sw env in
439439- let client = Typesense_client.create ~requests_session ~config:client_config in
440440- let result = Typesense_client.search_collection client collection_name query ~limit ~offset () in
441441- match result with
442442- | Ok response -> Ok response
443443- | Error (Typesense_client.Http_error (code, msg)) -> Error (Http_error (code, msg))
444444- | Error (Typesense_client.Json_error msg) -> Error (Json_error msg)
445445- | Error (Typesense_client.Connection_error msg) -> Error (Connection_error msg)
446446-447447-(** Search across all collections - use client multisearch *)
448448-let search_all ~sw ~env (config : config) query ?(limit=10) ?(offset=0) () =
449449- let client_config = to_client_config config in
450450- let requests_session = Requests.create ~sw env in
451451- let client = Typesense_client.create ~requests_session ~config:client_config in
452452- let result = Typesense_client.multisearch client query ~limit:50 () in
453453- match result with
454454- | Ok multisearch_resp ->
455455- let combined_response = Typesense_client.combine_multisearch_results multisearch_resp ~limit ~offset () in
456456- Ok combined_response
457457- | Error (Typesense_client.Http_error (code, msg)) -> Error (Http_error (code, msg))
458458- | Error (Typesense_client.Json_error msg) -> Error (Json_error msg)
459459- | Error (Typesense_client.Connection_error msg) -> Error (Connection_error msg)
460460-461461-(** List all collections *)
462462-let list_collections ~sw ~env (config : config) =
463463- let client_config = to_client_config config in
464464- let requests_session = Requests.create ~sw env in
465465- let client = Typesense_client.create ~requests_session ~config:client_config in
466466- let result = Typesense_client.list_collections client in
467467- match result with
468468- | Ok collections -> Ok collections
469469- | Error (Typesense_client.Http_error (code, msg)) -> Error (Http_error (code, msg))
470470- | Error (Typesense_client.Json_error msg) -> Error (Json_error msg)
471471- | Error (Typesense_client.Connection_error msg) -> Error (Connection_error msg)
472472-473473-(** Re-export multisearch types from Typesense_client *)
474474-type multisearch_response = Typesense_client.multisearch_response = {
475475- results: search_response list;
476476-}
477477-478478-(** Perform multisearch across all collections *)
479479-let multisearch ~sw ~env (config : config) query ?(limit=10) () =
480480- let client_config = to_client_config config in
481481- let requests_session = Requests.create ~sw env in
482482- let client = Typesense_client.create ~requests_session ~config:client_config in
483483- let result = Typesense_client.multisearch client query ~limit () in
484484- match result with
485485- | Ok multisearch_resp -> Ok multisearch_resp
486486- | Error (Typesense_client.Http_error (code, msg)) -> Error (Http_error (code, msg))
487487- | Error (Typesense_client.Json_error msg) -> Error (Json_error msg)
488488- | Error (Typesense_client.Connection_error msg) -> Error (Connection_error msg)
489489-490490-(** Combine multisearch results into single result set *)
491491-let combine_multisearch_results (multisearch_resp : multisearch_response) ?(limit=10) ?(offset=0) () =
492492- Typesense_client.combine_multisearch_results multisearch_resp ~limit ~offset ()
493493-494494-(** Load configuration from files *)
495495-let load_config_from_files () =
496496- let read_file_if_exists filename =
497497- if Sys.file_exists filename then
498498- let ic = open_in filename in
499499- let content = really_input_string ic (in_channel_length ic) in
500500- close_in ic;
501501- Some (String.trim content)
502502- else None
503503- in
504504-505505- let endpoint = match read_file_if_exists ".typesense-url" with
506506- | Some url -> url
507507- | None -> "http://localhost:8108"
508508- in
509509-510510- let api_key = match read_file_if_exists ".typesense-key" with
511511- | Some key -> key
512512- | None ->
513513- try Sys.getenv "TYPESENSE_API_KEY"
514514- with Not_found -> ""
515515- in
516516-517517- let openai_key = match read_file_if_exists ".openrouter-api" with
518518- | Some key -> key
519519- | None ->
520520- try Sys.getenv "OPENAI_API_KEY"
521521- with Not_found -> ""
522522- in
523523-524524- { endpoint; api_key; openai_key }
525525-526526-(** Re-export pretty printer from Typesense_client *)
527527-let pp_search_result_oneline = Typesense_client.pp_search_result_oneline
-168
stack/bushel/lib/typesense.mli
···11-(** Typesense API client for Bushel
22-33- This module provides an OCaml client for the Typesense search engine API.
44- It handles collection management and document indexing for all Bushel object
55- types including contacts, papers, projects, news, videos, notes, and ideas.
66-77- Example usage:
88- {[
99- let config = { endpoint = "https://search.example.com"; api_key = "xyz123"; openai_key = "sk-..." } in
1010- Eio_main.run (fun env ->
1111- Eio.Switch.run (fun sw ->
1212- Typesense.upload_all ~sw ~env config entries))
1313- ]}
1414-*)
1515-1616-(** Configuration for connecting to a Typesense server *)
1717-type config = {
1818- endpoint : string; (** Typesense server URL (e.g., "https://search.example.com") *)
1919- api_key : string; (** API key for authentication *)
2020- openai_key : string; (** OpenAI API key for embeddings *)
2121-}
2222-2323-(** Possible errors that can occur during Typesense operations *)
2424-type error =
2525- | Http_error of int * string (** HTTP error with status code and message *)
2626- | Json_error of string (** JSON parsing or encoding error *)
2727- | Connection_error of string (** Network connection error *)
2828-2929-(** Pretty-printer for error types *)
3030-val pp_error : Format.formatter -> error -> unit
3131-3232-(** Create a collection with the given schema.
3333- The schema should follow Typesense's collection schema format. *)
3434-val create_collection :
3535- sw:Eio.Switch.t ->
3636- env:< clock: [> float Eio.Time.clock_ty ] Eio.Resource.t; net: [> [> `Generic ] Eio.Net.ty ] Eio.Resource.t; .. > ->
3737- config ->
3838- Ezjsonm.value ->
3939- (string, error) result
4040-4141-(** Check if a collection exists by name.
4242- Returns true if the collection exists, false otherwise. *)
4343-val collection_exists :
4444- sw:Eio.Switch.t ->
4545- env:< clock: [> float Eio.Time.clock_ty ] Eio.Resource.t; net: [> [> `Generic ] Eio.Net.ty ] Eio.Resource.t; .. > ->
4646- config ->
4747- string ->
4848- bool
4949-5050-(** Delete a collection by name. *)
5151-val delete_collection :
5252- sw:Eio.Switch.t ->
5353- env:< clock: [> float Eio.Time.clock_ty ] Eio.Resource.t; net: [> [> `Generic ] Eio.Net.ty ] Eio.Resource.t; .. > ->
5454- config ->
5555- string ->
5656- (string, error) result
5757-5858-(** Upload documents to a collection in batch using JSONL format.
5959- More efficient than uploading documents one by one. *)
6060-val upload_documents :
6161- sw:Eio.Switch.t ->
6262- env:< clock: [> float Eio.Time.clock_ty ] Eio.Resource.t; net: [> [> `Generic ] Eio.Net.ty ] Eio.Resource.t; .. > ->
6363- config ->
6464- string ->
6565- Ezjsonm.value list ->
6666- (string, error) result
6767-6868-(** Upload all bushel objects to Typesense.
6969- This function will:
7070- - Extract all bushel data types from the Entry.t
7171- - Create or recreate collections for each type
7272- - Upload all documents in batches
7373- - Report progress to stdout *)
7474-val upload_all :
7575- sw:Eio.Switch.t ->
7676- env:< clock: [> float Eio.Time.clock_ty ] Eio.Resource.t; net: [> [> `Generic ] Eio.Net.ty ] Eio.Resource.t; .. > ->
7777- config ->
7878- Entry.t ->
7979- unit
8080-8181-(** Search result structure containing document information and relevance score *)
8282-type search_result = {
8383- id: string; (** Document ID *)
8484- title: string; (** Document title *)
8585- content: string; (** Document content/description *)
8686- score: float; (** Relevance score *)
8787- collection: string; (** Collection name *)
8888- highlights: (string * string list) list; (** Highlighted search terms by field *)
8989- document: Ezjsonm.value; (** Raw document for flexible field access *)
9090-}
9191-9292-(** Search response containing results and metadata *)
9393-type search_response = {
9494- hits: search_result list; (** List of matching documents *)
9595- total: int; (** Total number of matches *)
9696- query_time: float; (** Query execution time in milliseconds *)
9797-}
9898-9999-(** Search a specific collection. *)
100100-val search_collection :
101101- sw:Eio.Switch.t ->
102102- env:< clock: float Eio.Time.clock_ty Eio.Resource.t; fs: Eio.Fs.dir_ty Eio.Path.t; net: [`Generic | `Unix] Eio.Net.ty Eio.Resource.t; .. > ->
103103- config ->
104104- string ->
105105- string ->
106106- ?limit:int ->
107107- ?offset:int ->
108108- unit ->
109109- (search_response, error) result
110110-111111-(** Search across all bushel collections.
112112- Results are sorted by relevance score and paginated. *)
113113-val search_all :
114114- sw:Eio.Switch.t ->
115115- env:< clock: float Eio.Time.clock_ty Eio.Resource.t; fs: Eio.Fs.dir_ty Eio.Path.t; net: [`Generic | `Unix] Eio.Net.ty Eio.Resource.t; .. > ->
116116- config ->
117117- string ->
118118- ?limit:int ->
119119- ?offset:int ->
120120- unit ->
121121- (search_response, error) result
122122-123123-(** Multisearch response containing results from multiple collections *)
124124-type multisearch_response = {
125125- results: search_response list; (** Results from each collection *)
126126-}
127127-128128-(** Perform multisearch across all collections using Typesense's multi_search endpoint.
129129- More efficient than individual searches as it's done in a single request. *)
130130-val multisearch :
131131- sw:Eio.Switch.t ->
132132- env:< clock: float Eio.Time.clock_ty Eio.Resource.t; fs: Eio.Fs.dir_ty Eio.Path.t; net: [`Generic | `Unix] Eio.Net.ty Eio.Resource.t; .. > ->
133133- config ->
134134- string ->
135135- ?limit:int ->
136136- unit ->
137137- (multisearch_response, error) result
138138-139139-(** Combine multisearch results into a single result set.
140140- Results are sorted by relevance score and paginated. *)
141141-val combine_multisearch_results : multisearch_response -> ?limit:int -> ?offset:int -> unit -> search_response
142142-143143-(** List all collections with document counts.
144144- Returns a list of (collection_name, document_count) pairs. *)
145145-val list_collections :
146146- sw:Eio.Switch.t ->
147147- env:< clock: float Eio.Time.clock_ty Eio.Resource.t; fs: Eio.Fs.dir_ty Eio.Path.t; net: [`Generic | `Unix] Eio.Net.ty Eio.Resource.t; .. > ->
148148- config ->
149149- ((string * int) list, error) result
150150-151151-(** Load configuration from .typesense-url and .typesense-api files.
152152- Falls back to environment variables and defaults.
153153- TODO:claude *)
154154-val load_config_from_files : unit -> config
155155-156156-(** Pretty-print a search result in a one-line format with relevant information.
157157- Shows different fields based on the collection type (papers, videos, etc.).
158158- TODO:claude *)
159159-val pp_search_result_oneline : search_result -> string
160160-161161-(** Convert Bushel objects to Typesense documents *)
162162-163163-val contact_to_document : Contact.t -> Ezjsonm.value
164164-val paper_to_document : Entry.t -> Paper.t -> Ezjsonm.value
165165-val project_to_document : Entry.t -> Project.t -> Ezjsonm.value
166166-val video_to_document : Entry.t -> Video.t -> Ezjsonm.value
167167-val note_to_document : Entry.t -> Note.t -> Ezjsonm.value
168168-val idea_to_document : Entry.t -> Idea.t -> Ezjsonm.value
-80
stack/bushel/lib/util.ml
···11-let first_hunk s =
22- let lines = String.split_on_char '\n' s in
33- let rec aux acc = function
44- | [] -> String.concat "\n" (List.rev acc)
55- | "" :: "" :: _ -> String.concat "\n" (List.rev acc)
66- | line :: rest -> aux (line :: acc) rest
77- in
88- aux [] lines
99-;;
1010-1111-let first_and_last_hunks s =
1212- let lines = String.split_on_char '\n' s in
1313- let rec aux acc = function
1414- | [] -> String.concat "\n" (List.rev acc), ""
1515- | "" :: "" :: rest ->
1616- String.concat "\n" (List.rev acc), String.concat "\n" (List.rev rest)
1717- | line :: rest -> aux (line :: acc) rest
1818- in
1919- aux [] lines
2020-;;
2121-2222-(* Find all footnote definition lines in text *)
2323-let find_footnote_lines s =
2424- let lines = String.split_on_char '\n' s in
2525- let is_footnote_def line =
2626- String.length line > 3 &&
2727- line.[0] = '[' &&
2828- line.[1] = '^' &&
2929- String.contains line ':' &&
3030- let colon_pos = String.index line ':' in
3131- colon_pos > 2 && line.[colon_pos - 1] = ']'
3232- in
3333- let is_continuation line =
3434- String.length line > 0 && (line.[0] = ' ' || line.[0] = '\t')
3535- in
3636- let rec collect_footnotes acc in_footnote = function
3737- | [] -> List.rev acc
3838- | line :: rest ->
3939- if is_footnote_def line then
4040- collect_footnotes (line :: acc) true rest
4141- else if in_footnote && is_continuation line then
4242- collect_footnotes (line :: acc) true rest
4343- else
4444- collect_footnotes acc false rest
4545- in
4646- collect_footnotes [] false lines
4747-;;
4848-4949-(* Augment first hunk with footnote definitions from last hunk *)
5050-let first_hunk_with_footnotes s =
5151- let first, last = first_and_last_hunks s in
5252- let footnote_lines = find_footnote_lines last in
5353- if footnote_lines = [] then first
5454- else first ^ "\n\n" ^ String.concat "\n" footnote_lines
5555-;;
5656-5757-let count_words (text : string) : int =
5858- let len = String.length text in
5959- let rec count_words_helper (index : int) (in_word : bool) (count : int) : int =
6060- if index >= len
6161- then if in_word then count + 1 else count
6262- else (
6363- let char = String.get text index in
6464- let is_whitespace =
6565- Char.equal char ' '
6666- || Char.equal char '\t'
6767- || Char.equal char '\n'
6868- || Char.equal char '\r'
6969- in
7070- if is_whitespace
7171- then
7272- if in_word
7373- then count_words_helper (index + 1) false (count + 1)
7474- else count_words_helper (index + 1) false count
7575- else count_words_helper (index + 1) true count)
7676- in
7777- count_words_helper 0 false 0
7878-;;
7979-8080-let read_file file = In_channel.(with_open_bin file input_all)
-166
stack/bushel/lib/video.ml
···11-type t =
22- { slug : string
33- ; title : string
44- ; published_date : Ptime.t
55- ; uuid : string
66- ; description : string
77- ; url : string
88- ; talk : bool
99- ; paper : string option
1010- ; project : string option
1111- ; tags : string list
1212- }
1313-1414-type ts = t list
1515-1616-let get_shadow fs k =
1717- match List.assoc_opt k fs with
1818- | Some v -> Some v
1919- | None -> List.assoc_opt ("_" ^ k) fs
2020-;;
2121-2222-let get_shadow_string fs k =
2323- match get_shadow fs k with
2424- | Some (`String v) -> v
2525- | _ -> failwith "invalid yaml"
2626-;;
2727-2828-let get_shadow_bool fs k =
2929- match get_shadow fs k with
3030- | Some (`Bool v) -> v
3131- | _ -> failwith "invalid yaml"
3232-;;
3333-3434-let compare a b = Ptime.compare b.published_date a.published_date
3535-let url v = v.url
3636-let body { description; _ } = description
3737-let title { title; _ } = title
3838-let uuid { uuid; _ } = uuid
3939-let paper { paper; _ } = paper
4040-let project { project; _ } = project
4141-let slug { slug; _ } = slug
4242-let date { published_date; _ } = published_date |> Ptime.to_date
4343-let datetime { published_date; _ } = published_date
4444-let talk { talk; _ } = talk
4545-4646-let t_of_yaml ~description = function
4747- | `O fields ->
4848- let slug = get_shadow_string fields "uuid" in
4949- let title = get_shadow_string fields "title" in
5050- let published_date =
5151- get_shadow_string fields "published_date"
5252- |> Ptime.of_rfc3339
5353- |> Result.get_ok
5454- |> fun (a, _, _) -> a
5555- in
5656- let uuid = get_shadow_string fields "uuid" in
5757- let url = get_shadow_string fields "url" in
5858- let talk =
5959- try get_shadow_bool fields "talk" with
6060- | _ -> false
6161- in
6262- let tags =
6363- match List.assoc_opt "tags" fields with
6464- | Some l -> Ezjsonm.get_list Ezjsonm.get_string l
6565- | _ -> []
6666- in
6767- let paper =
6868- try Some (get_shadow_string fields "paper") with
6969- | _ -> None
7070- in
7171- let project =
7272- try Some (get_shadow_string fields "project") with
7373- | _ -> None
7474- in
7575- { slug; title; tags; published_date; uuid; description; talk; paper; project; url }
7676- | _ -> failwith "invalid yaml"
7777-;;
7878-7979-let to_yaml t =
8080- `O [
8181- ("title", `String t.title);
8282- ("description", `String t.description);
8383- ("url", `String t.url);
8484- ("uuid", `String t.uuid);
8585- ("slug", `String t.slug);
8686- ("published_date", `String (Ptime.to_rfc3339 t.published_date));
8787- ("talk", `Bool t.talk);
8888- ("tags", `A (List.map (fun t -> `String t) t.tags));
8989- ("paper", match t.paper with None -> `Null | Some p -> `String p);
9090- ("project", match t.project with None -> `Null | Some p -> `String p)
9191- ]
9292-9393-let to_file output_dir t =
9494- let file_path = Fpath.v (Filename.concat output_dir (t.uuid ^ ".md")) in
9595- let yaml = to_yaml t in
9696- let yaml_str = Yaml.to_string_exn yaml in
9797- let content = "---\n" ^ yaml_str ^ "---\n" in
9898- Bos.OS.File.write file_path content
9999-;;
100100-101101-let of_md fname =
102102- (* TODO fix Jekyll_post to not error on no date *)
103103- let fname' = "2000-01-01-" ^ Filename.basename fname in
104104- match Jekyll_post.of_string ~fname:fname' (Util.read_file fname) with
105105- | Error (`Msg m) -> failwith ("paper_of_md: " ^ m)
106106- | Ok jp ->
107107- let fields = jp.Jekyll_post.fields |> Jekyll_format.fields_to_yaml in
108108- let { Jekyll_post.body; _ } = jp in
109109- t_of_yaml ~description:body fields
110110-;;
111111-112112-(* TODO:claude *)
113113-let typesense_schema =
114114- let open Ezjsonm in
115115- dict [
116116- ("name", string "videos");
117117- ("fields", list (fun d -> dict d) [
118118- [("name", string "id"); ("type", string "string")];
119119- [("name", string "title"); ("type", string "string")];
120120- [("name", string "description"); ("type", string "string")];
121121- [("name", string "published_date"); ("type", string "string")];
122122- [("name", string "date"); ("type", string "string")];
123123- [("name", string "date_timestamp"); ("type", string "int64")];
124124- [("name", string "tags"); ("type", string "string[]"); ("facet", bool true)];
125125- [("name", string "url"); ("type", string "string")];
126126- [("name", string "uuid"); ("type", string "string")];
127127- [("name", string "is_talk"); ("type", string "bool")];
128128- [("name", string "paper"); ("type", string "string[]"); ("optional", bool true)];
129129- [("name", string "project"); ("type", string "string[]"); ("optional", bool true)];
130130- [("name", string "video_url"); ("type", string "string"); ("optional", bool true)];
131131- [("name", string "embed_url"); ("type", string "string"); ("optional", bool true)];
132132- [("name", string "duration"); ("type", string "int32"); ("optional", bool true)];
133133- [("name", string "channel"); ("type", string "string"); ("facet", bool true); ("optional", bool true)];
134134- [("name", string "platform"); ("type", string "string"); ("facet", bool true); ("optional", bool true)];
135135- [("name", string "views"); ("type", string "int32"); ("optional", bool true)];
136136- [("name", string "related_papers"); ("type", string "string[]"); ("optional", bool true)];
137137- [("name", string "related_talks"); ("type", string "string[]"); ("optional", bool true)];
138138- ]);
139139- ("default_sorting_field", string "date_timestamp");
140140- ]
141141-142142-(** TODO:claude Pretty-print a video with ANSI formatting *)
143143-let pp ppf v =
144144- let open Fmt in
145145- pf ppf "@[<v>";
146146- pf ppf "%a: %a@," (styled `Bold string) "Type" (styled `Cyan string) "Video";
147147- pf ppf "%a: %a@," (styled `Bold string) "Slug" string (slug v);
148148- pf ppf "%a: %a@," (styled `Bold string) "UUID" string (uuid v);
149149- pf ppf "%a: %a@," (styled `Bold string) "Title" string (title v);
150150- let (year, month, day) = date v in
151151- pf ppf "%a: %04d-%02d-%02d@," (styled `Bold string) "Date" year month day;
152152- pf ppf "%a: %a@," (styled `Bold string) "URL" string (url v);
153153- pf ppf "%a: %b@," (styled `Bold string) "Talk" (talk v);
154154- (match paper v with
155155- | Some p -> pf ppf "%a: %a@," (styled `Bold string) "Paper" string p
156156- | None -> ());
157157- (match project v with
158158- | Some p -> pf ppf "%a: %a@," (styled `Bold string) "Project" string p
159159- | None -> ());
160160- let t = v.tags in
161161- if t <> [] then
162162- pf ppf "%a: @[<h>%a@]@," (styled `Bold string) "Tags" (list ~sep:comma string) t;
163163- pf ppf "@,";
164164- pf ppf "%a:@," (styled `Bold string) "Description";
165165- pf ppf "%a@," string v.description;
166166- pf ppf "@]"
-32
stack/bushel/lib/video.mli
···11-type t =
22- { slug : string
33- ; title : string
44- ; published_date : Ptime.t
55- ; uuid : string
66- ; description : string
77- ; url : string
88- ; talk : bool
99- ; paper : string option
1010- ; project : string option
1111- ; tags : string list
1212- }
1313-1414-type ts = t list
1515-1616-val compare : t -> t -> int
1717-val url : t -> string
1818-val body : t -> string
1919-val title : t -> string
2020-val uuid : t -> string
2121-val paper : t -> string option
2222-val project : t -> string option
2323-val slug : t -> string
2424-val date : t -> Ptime.date
2525-val datetime : t -> Ptime.t
2626-val talk : t -> bool
2727-val of_md : string -> t
2828-val t_of_yaml : description:string -> Yaml.value -> t
2929-val to_yaml : t -> Yaml.value
3030-val to_file : string -> t -> (unit, [> `Msg of string]) result
3131-val typesense_schema : Ezjsonm.value
3232-val pp : Format.formatter -> t -> unit