···11-(* Logging setup *)
22-let src = Logs.Src.create "river-cli" ~doc:"River CLI application"
33-module Log = (val Logs.src_log src : Logs.LOG)
44-55-(* Types *)
66-type user = {
77- username : string;
88- fullname : string;
99- email : string option;
1010- feeds : River.source list;
1111- last_synced : string option;
1212-}
1313-1414-type state = {
1515- xdg : Xdge.t;
1616-}
1717-1818-(* State directory management *)
1919-module State = struct
2020- let users_dir state = Eio.Path.(Xdge.state_dir state.xdg / "users")
2121- let feeds_dir state = Eio.Path.(Xdge.state_dir state.xdg / "feeds")
2222- let user_feeds_dir state = Eio.Path.(feeds_dir state / "user")
2323-2424- let user_file state username =
2525- Eio.Path.(users_dir state / (username ^ ".json"))
2626-2727- let user_feed_file state username =
2828- Eio.Path.(user_feeds_dir state / (username ^ ".xml"))
2929-3030- let ensure_directories state =
3131- let dirs = [
3232- users_dir state;
3333- feeds_dir state;
3434- user_feeds_dir state;
3535- ] in
3636- List.iter (fun dir ->
3737- try Eio.Path.mkdir ~perm:0o755 dir
3838- with Eio.Io (Eio.Fs.E (Already_exists _), _) -> ()
3939- ) dirs
4040-4141- (* JSON codecs for user data *)
4242-4343- (* Codec for River.source (feed) *)
4444- let source_jsont =
4545- let make name url = { River.name; url } in
4646- Jsont.Object.map ~kind:"Source" make
4747- |> Jsont.Object.mem "name" Jsont.string ~enc:(fun s -> s.River.name)
4848- |> Jsont.Object.mem "url" Jsont.string ~enc:(fun s -> s.River.url)
4949- |> Jsont.Object.finish
5050-5151- (* Codec for user *)
5252- let user_jsont =
5353- let make username fullname email feeds last_synced =
5454- { username; fullname; email; feeds; last_synced }
5555- in
5656- Jsont.Object.map ~kind:"User" make
5757- |> Jsont.Object.mem "username" Jsont.string ~enc:(fun u -> u.username)
5858- |> Jsont.Object.mem "fullname" Jsont.string ~enc:(fun u -> u.fullname)
5959- |> Jsont.Object.opt_mem "email" Jsont.string ~enc:(fun u -> u.email)
6060- |> Jsont.Object.mem "feeds" (Jsont.list source_jsont) ~enc:(fun u -> u.feeds)
6161- |> Jsont.Object.opt_mem "last_synced" Jsont.string ~enc:(fun u -> u.last_synced)
6262- |> Jsont.Object.finish
6363-6464- let user_of_string s =
6565- match Jsont_bytesrw.decode_string' user_jsont s with
6666- | Ok user -> Some user
6767- | Error err ->
6868- Log.err (fun m -> m "Failed to parse user JSON: %s" (Jsont.Error.to_string err));
6969- None
7070-7171- let user_to_string user =
7272- match Jsont_bytesrw.encode_string' ~format:Jsont.Indent user_jsont user with
7373- | Ok s -> s
7474- | Error err -> failwith ("Failed to encode user: " ^ Jsont.Error.to_string err)
7575-7676- let load_user state username =
7777- let file = user_file state username in
7878- try
7979- let content = Eio.Path.load file in
8080- user_of_string content
8181- with
8282- | Eio.Io (Eio.Fs.E (Not_found _), _) -> None
8383- | e ->
8484- Log.err (fun m -> m "Error loading user %s: %s" username (Printexc.to_string e));
8585- None
8686-8787- let save_user state user =
8888- let file = user_file state user.username in
8989- let json = user_to_string user in
9090- Eio.Path.save ~create:(`Or_truncate 0o644) file json
9191-9292- let list_users state =
9393- try
9494- Eio.Path.read_dir (users_dir state)
9595- |> List.filter_map (fun name ->
9696- if Filename.check_suffix name ".json" then
9797- Some (Filename.chop_suffix name ".json")
9898- else None
9999- )
100100- with _ -> []
101101-102102- let load_existing_posts state username =
103103- let file = user_feed_file state username in
104104- try
105105- let content = Eio.Path.load file in
106106- (* Parse existing Atom feed *)
107107- let input = Xmlm.make_input (`String (0, content)) in
108108- let feed = Syndic.Atom.parse input in
109109- feed.Syndic.Atom.entries
110110- with
111111- | Eio.Io (Eio.Fs.E (Not_found _), _) -> []
112112- | e ->
113113- Log.err (fun m -> m "Error loading existing posts for %s: %s"
114114- username (Printexc.to_string e));
115115- []
116116-117117- let save_atom_feed state username entries =
118118- let file = user_feed_file state username in
119119- let feed : Syndic.Atom.feed = {
120120- id = Uri.of_string ("urn:river:user:" ^ username);
121121- title = Syndic.Atom.Text username;
122122- updated = Ptime.of_float_s (Unix.time ()) |> Option.get;
123123- entries;
124124- authors = [];
125125- categories = [];
126126- contributors = [];
127127- generator = Some {
128128- Syndic.Atom.version = Some "1.0";
129129- uri = None;
130130- content = "River Feed Aggregator";
131131- };
132132- icon = None;
133133- links = [];
134134- logo = None;
135135- rights = None;
136136- subtitle = None;
137137- } in
138138- let output = Buffer.create 1024 in
139139- Syndic.Atom.output feed (`Buffer output);
140140- Eio.Path.save ~create:(`Or_truncate 0o644) file (Buffer.contents output)
141141-end
142142-143143-(* User management commands *)
144144-module User = struct
145145- let add state ~username ~fullname ~email =
146146- match State.load_user state username with
147147- | Some _ ->
148148- Log.err (fun m -> m "User %s already exists" username);
149149- 1
150150- | None ->
151151- let user = { username; fullname; email; feeds = []; last_synced = None } in
152152- State.save_user state user;
153153- Log.info (fun m -> m "User %s created" username);
154154- 0
155155-156156- let remove state ~username =
157157- match State.load_user state username with
158158- | None ->
159159- Log.err (fun m -> m "User %s not found" username);
160160- 1
161161- | Some _ ->
162162- (* Remove user file and feed file *)
163163- let user_file = State.user_file state username in
164164- let feed_file = State.user_feed_file state username in
165165- (try Eio.Path.unlink user_file with _ -> ());
166166- (try Eio.Path.unlink feed_file with _ -> ());
167167- Log.info (fun m -> m "User %s removed" username);
168168- 0
169169-170170- let list state =
171171- let users = State.list_users state in
172172- if users = [] then
173173- Printf.printf "No users found\n"
174174- else begin
175175- Printf.printf "Users:\n";
176176- List.iter (fun username ->
177177- match State.load_user state username with
178178- | Some user ->
179179- let email_str = match user.email with
180180- | Some e -> " <" ^ e ^ ">"
181181- | None -> ""
182182- in
183183- Printf.printf " %s (%s%s) - %d feeds\n"
184184- username user.fullname email_str (List.length user.feeds)
185185- | None -> ()
186186- ) users
187187- end;
188188- 0
189189-190190- let add_feed state ~username ~name ~url =
191191- match State.load_user state username with
192192- | None ->
193193- Log.err (fun m -> m "User %s not found" username);
194194- 1
195195- | Some user ->
196196- let feed = { River.name; url } in
197197- if List.exists (fun f -> f.River.url = url) user.feeds then begin
198198- Log.err (fun m -> m "Feed %s already exists for user %s" url username);
199199- 1
200200- end else begin
201201- let user = { user with feeds = feed :: user.feeds } in
202202- State.save_user state user;
203203- Log.info (fun m -> m "Feed %s added to user %s" name username);
204204- 0
205205- end
206206-207207- let remove_feed state ~username ~url =
208208- match State.load_user state username with
209209- | None ->
210210- Log.err (fun m -> m "User %s not found" username);
211211- 1
212212- | Some user ->
213213- let feeds = List.filter (fun f -> f.River.url <> url) user.feeds in
214214- if List.length feeds = List.length user.feeds then begin
215215- Log.err (fun m -> m "Feed %s not found for user %s" url username);
216216- 1
217217- end else begin
218218- let user = { user with feeds } in
219219- State.save_user state user;
220220- Log.info (fun m -> m "Feed removed from user %s" username);
221221- 0
222222- end
223223-224224- let show state ~username =
225225- match State.load_user state username with
226226- | None ->
227227- Log.err (fun m -> m "User %s not found" username);
228228- 1
229229- | Some user ->
230230- Printf.printf "Username: %s\n" user.username;
231231- Printf.printf "Full name: %s\n" user.fullname;
232232- Printf.printf "Email: %s\n"
233233- (Option.value user.email ~default:"(none)");
234234- Printf.printf "Last synced: %s\n"
235235- (Option.value user.last_synced ~default:"never");
236236- Printf.printf "Feeds (%d):\n" (List.length user.feeds);
237237- List.iter (fun feed ->
238238- Printf.printf " - %s: %s\n" feed.River.name feed.River.url
239239- ) user.feeds;
240240- 0
241241-end
242242-243243-(* Sync command *)
244244-module Sync = struct
245245- let merge_entries ~existing ~new_entries =
246246- (* Create a map of new entry IDs for efficient lookup and updates *)
247247- let module UriMap = Map.Make(Uri) in
248248- let new_entries_map =
249249- List.fold_left (fun acc (entry : Syndic.Atom.entry) ->
250250- UriMap.add entry.id entry acc
251251- ) UriMap.empty new_entries
252252- in
253253-254254- (* Update existing entries with new ones if IDs match, otherwise keep existing *)
255255- let updated_existing =
256256- List.filter_map (fun (entry : Syndic.Atom.entry) ->
257257- if UriMap.mem entry.id new_entries_map then
258258- None (* Will be replaced by new entry *)
259259- else
260260- Some entry (* Keep existing entry *)
261261- ) existing
262262- in
263263-264264- (* Combine new entries with non-replaced existing entries *)
265265- let combined = new_entries @ updated_existing in
266266- List.sort (fun (a : Syndic.Atom.entry) (b : Syndic.Atom.entry) ->
267267- Ptime.compare b.updated a.updated
268268- ) combined
269269-270270- let sync_user session state ~username =
271271- match State.load_user state username with
272272- | None ->
273273- Log.err (fun m -> m "User %s not found" username);
274274- 1
275275- | Some user when user.feeds = [] ->
276276- Log.info (fun m -> m "No feeds configured for user %s" username);
277277- 0
278278- | Some user ->
279279- Log.info (fun m -> m "Syncing feeds for user %s..." username);
280280-281281- (* Fetch all feeds concurrently using the session *)
282282- let fetched_feeds =
283283- Eio.Fiber.List.filter_map (fun source ->
284284- try
285285- Log.info (fun m -> m " Fetching %s (%s)..." source.River.name source.River.url);
286286- Some (River.fetch session source)
287287- with e ->
288288- Log.err (fun m -> m " Failed to fetch %s: %s"
289289- source.River.name (Printexc.to_string e));
290290- None
291291- ) user.feeds
292292- in
293293-294294- if fetched_feeds = [] then begin
295295- Log.err (fun m -> m "No feeds successfully fetched");
296296- 1
297297- end else begin
298298- (* Get posts from fetched feeds *)
299299- let posts = River.posts fetched_feeds in
300300- Log.info (fun m -> m " Found %d new posts" (List.length posts));
301301-302302- (* Convert to Atom entries *)
303303- let new_entries = River.create_atom_entries posts in
304304-305305- (* Load existing entries *)
306306- let existing = State.load_existing_posts state username in
307307- Log.info (fun m -> m " Found %d existing posts" (List.length existing));
308308-309309- (* Merge entries *)
310310- let merged = merge_entries ~existing ~new_entries in
311311- Log.info (fun m -> m " Total posts after merge: %d" (List.length merged));
312312-313313- (* Save updated feed *)
314314- State.save_atom_feed state username merged;
315315-316316- (* Update last_synced timestamp *)
317317- let now =
318318- let open Unix in
319319- let tm = gmtime (time ()) in
320320- Printf.sprintf "%04d-%02d-%02dT%02d:%02d:%02dZ"
321321- (tm.tm_year + 1900) (tm.tm_mon + 1) tm.tm_mday
322322- tm.tm_hour tm.tm_min tm.tm_sec
323323- in
324324- let user = { user with last_synced = Some now } in
325325- State.save_user state user;
326326-327327- Log.info (fun m -> m "Sync completed for user %s" username);
328328- 0
329329- end
330330-331331- let sync_all session state =
332332- let users = State.list_users state in
333333- if users = [] then begin
334334- Log.info (fun m -> m "No users to sync");
335335- 0
336336- end else begin
337337- Log.info (fun m -> m "Syncing %d users concurrently..." (List.length users));
338338-339339- let results =
340340- Eio.Fiber.List.map (fun username ->
341341- let result = sync_user session state ~username in
342342- Log.debug (fun m -> m "Completed sync for user");
343343- result
344344- ) users
345345- in
346346- let failures = List.filter ((<>) 0) results in
347347- if failures = [] then begin
348348- Log.info (fun m -> m "All users synced successfully");
349349- 0
350350- end else begin
351351- Log.err (fun m -> m "Failed to sync %d users" (List.length failures));
352352- 1
353353- end
354354- end
355355-end
356356-357357-(* Post listing commands *)
358358-module Post = struct
359359- let format_date ptime =
360360- let open Ptime in
361361- let (y, m, d), _ = to_date_time ptime in
362362- Printf.sprintf "%02d/%02d/%04d" d m y
363363-364364- let format_text_construct : Syndic.Atom.text_construct -> string = function
365365- | Syndic.Atom.Text s -> s
366366- | Syndic.Atom.Html (_, s) -> s
367367- | Syndic.Atom.Xhtml (_, _) -> "<xhtml content>"
368368-369369- let get_content_length (entry : Syndic.Atom.entry) =
370370- match entry.content with
371371- | Some (Syndic.Atom.Text s) -> String.length s
372372- | Some (Syndic.Atom.Html (_, s)) -> String.length s
373373- | Some (Syndic.Atom.Xhtml (_, _)) -> 0 (* Could calculate but complex *)
374374- | Some (Syndic.Atom.Mime _) -> 0
375375- | Some (Syndic.Atom.Src _) -> 0
376376- | None -> (
377377- match entry.summary with
378378- | Some (Syndic.Atom.Text s) -> String.length s
379379- | Some (Syndic.Atom.Html (_, s)) -> String.length s
380380- | Some (Syndic.Atom.Xhtml (_, _)) -> 0
381381- | None -> 0)
382382-383383- let list state ~username_opt ~limit =
384384- match username_opt with
385385- | Some username ->
386386- (* List posts for a specific user *)
387387- (match State.load_user state username with
388388- | None ->
389389- Log.err (fun m -> m "User %s not found" username);
390390- 1
391391- | Some user ->
392392- let entries = State.load_existing_posts state username in
393393- if entries = [] then begin
394394- Fmt.pr "%a@." Fmt.(styled `Yellow string)
395395- ("No posts found for user " ^ username);
396396- Fmt.pr "%a@." Fmt.(styled `Faint string)
397397- ("(Run 'river-cli sync " ^ username ^ "' to fetch posts)");
398398- 0
399399- end else begin
400400- let to_show = match limit with
401401- | Some n -> List.filteri (fun i _ -> i < n) entries
402402- | None -> entries
403403- in
404404- Fmt.pr "%a@."
405405- Fmt.(styled `Bold string)
406406- (Printf.sprintf "Posts for %s (%d total, showing %d):"
407407- user.fullname (List.length entries) (List.length to_show));
408408-409409- List.iteri (fun i (entry : Syndic.Atom.entry) ->
410410- (* Use user's full name for all entries *)
411411- let author_name = user.fullname in
412412- let content_len = get_content_length entry in
413413- let entry_id = Uri.to_string entry.id in
414414- Fmt.pr "%a %a@."
415415- Fmt.(styled `Cyan string) (Printf.sprintf "[%d]" (i + 1))
416416- Fmt.(styled (`Fg `Blue) string) (format_text_construct entry.title);
417417- Fmt.pr " %a %a@."
418418- Fmt.(styled `Faint string) "ID:"
419419- Fmt.(styled `Faint string) entry_id;
420420- Fmt.pr " %a - %a - %a chars@."
421421- Fmt.(styled `Green string) author_name
422422- Fmt.(styled `Magenta string) (format_date entry.updated)
423423- Fmt.(styled `Yellow string) (string_of_int content_len)
424424- ) to_show;
425425- 0
426426- end)
427427- | None ->
428428- (* List posts from all users *)
429429- let users = State.list_users state in
430430- if users = [] then begin
431431- Fmt.pr "%a@." Fmt.(styled `Yellow string)
432432- "No users found";
433433- Fmt.pr "%a@." Fmt.(styled `Faint string)
434434- "(Run 'river-cli user add' to create a user)";
435435- 0
436436- end else begin
437437- (* Load user data to get full names *)
438438- let user_map =
439439- List.fold_left (fun acc username ->
440440- match State.load_user state username with
441441- | Some user -> (username, user) :: acc
442442- | None -> acc
443443- ) [] users
444444- in
445445-446446- (* Collect all entries from all users with username tag *)
447447- let all_entries =
448448- List.concat_map (fun username ->
449449- let entries = State.load_existing_posts state username in
450450- List.map (fun entry -> (username, entry)) entries
451451- ) users
452452- in
453453-454454- if all_entries = [] then begin
455455- Fmt.pr "%a@." Fmt.(styled `Yellow string)
456456- "No posts found for any users";
457457- Fmt.pr "%a@." Fmt.(styled `Faint string)
458458- "(Run 'river-cli sync' to fetch posts)";
459459- 0
460460- end else begin
461461- (* Sort by date (newest first) *)
462462- let sorted = List.sort (fun (_, a : string * Syndic.Atom.entry) (_, b) ->
463463- Ptime.compare b.updated a.updated
464464- ) all_entries in
465465-466466- let to_show = match limit with
467467- | Some n -> List.filteri (fun i _ -> i < n) sorted
468468- | None -> sorted
469469- in
470470-471471- Fmt.pr "%a@."
472472- Fmt.(styled `Bold string)
473473- (Printf.sprintf "Posts from all users (%d total, showing %d):"
474474- (List.length all_entries) (List.length to_show));
475475-476476- List.iteri (fun i (username, entry : string * Syndic.Atom.entry) ->
477477- (* Use user's full name instead of feed author *)
478478- let author_name =
479479- match List.assoc_opt username user_map with
480480- | Some user -> user.fullname
481481- | None ->
482482- (* Fallback to entry author if user not found *)
483483- let (author, _) = entry.authors in
484484- String.trim author.name
485485- in
486486- let content_len = get_content_length entry in
487487- let entry_id = Uri.to_string entry.id in
488488- (* Shorten ID for display if it's too long *)
489489- Fmt.pr "%a %a@."
490490- Fmt.(styled `Cyan string) (Printf.sprintf "[%d]" (i + 1))
491491- Fmt.(styled (`Fg `Blue) string) (format_text_construct entry.title);
492492- Fmt.pr " %a %a@."
493493- Fmt.(styled `Faint string) "ID:"
494494- Fmt.(styled `Faint string) entry_id;
495495- Fmt.pr " %a - %a - %a chars@."
496496- Fmt.(styled `Green string) author_name
497497- Fmt.(styled `Magenta string) (format_date entry.updated)
498498- Fmt.(styled `Yellow string) (string_of_int content_len)
499499- ) to_show;
500500- 0
501501- end
502502- end
503503-end
504504-505505-(* Cmdliner interface *)
506506-open Cmdliner
507507-508508-let username_arg =
509509- let doc = "Username" in
510510- Arg.(required & pos 0 (some string) None & info [] ~docv:"USERNAME" ~doc)
511511-512512-let fullname_arg =
513513- let doc = "Full name of the user" in
514514- Arg.(required & opt (some string) None & info ["name"; "n"] ~doc)
515515-516516-let email_arg =
517517- let doc = "Email address of the user (optional)" in
518518- Arg.(value & opt (some string) None & info ["email"; "e"] ~doc)
519519-520520-let feed_name_arg =
521521- let doc = "Feed name/label" in
522522- Arg.(required & opt (some string) None & info ["name"; "n"] ~doc)
523523-524524-let feed_url_arg =
525525- let doc = "Feed URL" in
526526- Arg.(required & opt (some string) None & info ["url"; "u"] ~doc)
527527-528528-(* Note: eiocmd handles all logging setup automatically via Logs_cli *)
529529-530530-(* User commands - these don't need network, just filesystem access via Xdge *)
531531-let user_add_cmd =
532532- let doc = "Add a new user" in
533533- Eiocmd.run
534534- ~use_keyeio:false
535535- ~info:(Cmd.info "add" ~doc)
536536- ~app_name:"river"
537537- ~service:"river"
538538- Term.(const (fun username fullname email _env xdg _profile ->
539539- let state = { xdg } in
540540- State.ensure_directories state;
541541- User.add state ~username ~fullname ~email
542542- ) $ username_arg $ fullname_arg $ email_arg)
543543-544544-let user_remove_cmd =
545545- let doc = "Remove a user" in
546546- Eiocmd.run
547547- ~use_keyeio:false
548548- ~info:(Cmd.info "remove" ~doc)
549549- ~app_name:"river"
550550- ~service:"river"
551551- Term.(const (fun username _env xdg _profile ->
552552- let state = { xdg } in
553553- User.remove state ~username
554554- ) $ username_arg)
555555-556556-let user_list_cmd =
557557- let doc = "List all users" in
558558- Eiocmd.run
559559- ~use_keyeio:false
560560- ~info:(Cmd.info "list" ~doc)
561561- ~app_name:"river"
562562- ~service:"river"
563563- Term.(const (fun _env xdg _profile ->
564564- let state = { xdg } in
565565- User.list state
566566- ))
567567-568568-let user_show_cmd =
569569- let doc = "Show user details" in
570570- Eiocmd.run
571571- ~use_keyeio:false
572572- ~info:(Cmd.info "show" ~doc)
573573- ~app_name:"river"
574574- ~service:"river"
575575- Term.(const (fun username _env xdg _profile ->
576576- let state = { xdg } in
577577- User.show state ~username
578578- ) $ username_arg)
579579-580580-let user_add_feed_cmd =
581581- let doc = "Add a feed to a user" in
582582- Eiocmd.run
583583- ~use_keyeio:false
584584- ~info:(Cmd.info "add-feed" ~doc)
585585- ~app_name:"river"
586586- ~service:"river"
587587- Term.(const (fun username name url _env xdg _profile ->
588588- let state = { xdg } in
589589- User.add_feed state ~username ~name ~url
590590- ) $ username_arg $ feed_name_arg $ feed_url_arg)
591591-592592-let user_remove_feed_cmd =
593593- let doc = "Remove a feed from a user" in
594594- Eiocmd.run
595595- ~use_keyeio:false
596596- ~info:(Cmd.info "remove-feed" ~doc)
597597- ~app_name:"river"
598598- ~service:"river"
599599- Term.(const (fun username url _env xdg _profile ->
600600- let state = { xdg } in
601601- User.remove_feed state ~username ~url
602602- ) $ username_arg $ feed_url_arg)
603603-604604-let user_cmd =
605605- let doc = "Manage users" in
606606- let info = Cmd.info "user" ~doc in
607607- Cmd.group info [
608608- user_add_cmd;
609609- user_remove_cmd;
610610- user_list_cmd;
611611- user_show_cmd;
612612- user_add_feed_cmd;
613613- user_remove_feed_cmd;
614614- ]
615615-616616-(* Sync command - needs Eio environment for HTTP requests *)
617617-let sync_cmd =
618618- let doc = "Sync feeds for users" in
619619- let username_opt =
620620- let doc = "Sync specific user (omit to sync all)" in
621621- Arg.(value & pos 0 (some string) None & info [] ~docv:"USERNAME" ~doc)
622622- in
623623- Eiocmd.run
624624- ~use_keyeio:false
625625- ~info:(Cmd.info "sync" ~doc)
626626- ~app_name:"river"
627627- ~service:"river"
628628- Term.(const (fun username_opt env xdg _profile ->
629629- let state = { xdg } in
630630- State.ensure_directories state;
631631-632632- (* Use River.with_session for resource management *)
633633- River.with_session env @@ fun session ->
634634- match username_opt with
635635- | Some username -> Sync.sync_user session state ~username
636636- | None -> Sync.sync_all session state
637637- ) $ username_opt)
638638-639639-(* List command - doesn't need network, just reads local files *)
640640-let list_cmd =
641641- let doc = "List recent posts (from all users by default, or specify a user)" in
642642- let username_opt_arg =
643643- let doc = "Username (optional - defaults to all users)" in
644644- Arg.(value & pos 0 (some string) None & info [] ~docv:"USERNAME" ~doc)
645645- in
646646- let limit_arg =
647647- let doc = "Limit number of posts to display (default: all)" in
648648- Arg.(value & opt (some int) None & info ["limit"; "n"] ~doc)
649649- in
650650- Eiocmd.run
651651- ~use_keyeio:false
652652- ~info:(Cmd.info "list" ~doc)
653653- ~app_name:"river"
654654- ~service:"river"
655655- Term.(const (fun username_opt limit _env xdg _profile ->
656656- let state = { xdg } in
657657- Post.list state ~username_opt ~limit
658658- ) $ username_opt_arg $ limit_arg)
659659-660660-(* Info command - show detailed post information *)
661661-let info_cmd =
662662- let doc = "Display detailed information about a post by ID" in
663663- let id_arg =
664664- let doc = "Exact post ID to display" in
665665- Arg.(required & pos 0 (some string) None & info [] ~docv:"ID" ~doc)
666666- in
667667- let verbose_flag =
668668- let doc = "Show full content and all metadata" in
669669- Arg.(value & flag & info ["full"; "f"] ~doc)
670670- in
671671- Eiocmd.run
672672- ~use_keyeio:false
673673- ~info:(Cmd.info "info" ~doc)
674674- ~app_name:"river"
675675- ~service:"river"
676676- Term.(const (fun id verbose _env xdg _profile ->
677677- let state = { xdg } in
678678- let users = State.list_users state in
679679-680680- (* Load all entries from all users *)
681681- let all_entries =
682682- List.concat_map (fun username ->
683683- let entries = State.load_existing_posts state username in
684684- List.map (fun entry -> (username, entry)) entries
685685- ) users
686686- in
687687-688688- (* Find entry with matching ID *)
689689- let entry_opt = List.find_opt (fun (_username, entry : string * Syndic.Atom.entry) ->
690690- Uri.to_string entry.id = id
691691- ) all_entries in
692692-693693- match entry_opt with
694694- | None ->
695695- Fmt.pr "%a@." Fmt.(styled `Red string) (Printf.sprintf "No post found with ID: %s" id);
696696- Fmt.pr "%a@." Fmt.(styled `Faint string) "Hint: Use 'river-cli list' to see available posts and their IDs";
697697- 1
698698- | Some (username, entry) ->
699699- (* Get user info for author name *)
700700- let user_opt = State.load_user state username in
701701- let author_name = match user_opt with
702702- | Some user -> user.fullname
703703- | None ->
704704- let (author, _) = entry.authors in
705705- String.trim author.name
706706- in
707707-708708- (* Print header *)
709709- Fmt.pr "@.";
710710- Fmt.pr "%a@." Fmt.(styled `Bold string)
711711- (String.make 70 '=');
712712- Fmt.pr " %a@." Fmt.(styled `Bold (styled (`Fg `Blue) string))
713713- (Post.format_text_construct entry.title);
714714- Fmt.pr "%a@.@." Fmt.(styled `Bold string)
715715- (String.make 70 '=');
716716-717717- (* Basic metadata *)
718718- Fmt.pr "%a %s@." Fmt.(styled `Cyan string) "ID: " (Uri.to_string entry.id);
719719-720720- (* Links *)
721721- let links = entry.links in
722722- (match links with
723723- | [] -> ()
724724- | link :: _ ->
725725- Fmt.pr "%a %s@." Fmt.(styled `Cyan string) "URL: " (Uri.to_string link.href));
726726-727727- Fmt.pr "%a %s@." Fmt.(styled `Cyan string) "Author: " author_name;
728728-729729- Fmt.pr "%a %s@." Fmt.(styled `Cyan string) "Updated: " (Ptime.to_rfc3339 entry.updated);
730730-731731- (* Summary *)
732732- (match entry.summary with
733733- | Some summary ->
734734- Fmt.pr "@.%a@." Fmt.(styled (`Fg `Yellow) string) "Summary:";
735735- let summary_text = Post.format_text_construct summary in
736736- Fmt.pr " %s@." summary_text
737737- | None -> ());
738738-739739- (* Content *)
740740- (match entry.content with
741741- | Some content ->
742742- let content_html = match content with
743743- | Syndic.Atom.Text s -> s
744744- | Syndic.Atom.Html (_, s) -> s
745745- | Syndic.Atom.Xhtml (_, nodes) ->
746746- String.concat "" (List.map Syndic.XML.to_string nodes)
747747- | Syndic.Atom.Mime _ -> "(MIME content)"
748748- | Syndic.Atom.Src _ -> "(External content)"
749749- in
750750-751751- (* Extract outgoing links *)
752752- let links = Markdown_converter.extract_links content_html in
753753-754754- (* Convert to markdown *)
755755- let content_markdown = Markdown_converter.to_markdown content_html in
756756-757757- Fmt.pr "@.%a@." Fmt.(styled (`Fg `Green) string) "Content:";
758758- if verbose then
759759- Fmt.pr "%s@." content_markdown
760760- else begin
761761- let preview =
762762- if String.length content_markdown > 500 then
763763- String.sub content_markdown 0 500 ^ "..."
764764- else
765765- content_markdown
766766- in
767767- Fmt.pr "%s@." preview;
768768- if String.length content_markdown > 500 then
769769- Fmt.pr "@.%a@." Fmt.(styled `Faint string) "(Use --full to see full content)"
770770- end;
771771-772772- (* Display outgoing links *)
773773- if links <> [] then begin
774774- Fmt.pr "@.%a (%d)@." Fmt.(styled (`Fg `Cyan) string) "Outgoing Links:" (List.length links);
775775- List.iteri (fun i (href, text) ->
776776- let link_text = if text = "" then "(no text)" else text in
777777- Fmt.pr " %a %s@."
778778- Fmt.(styled `Faint string) (Printf.sprintf "[%d]" (i + 1))
779779- (Uri.to_string (Uri.of_string href));
780780- if text <> "" && String.length text < 80 then
781781- Fmt.pr " %a %s@." Fmt.(styled `Faint string) "→" link_text
782782- ) links
783783- end
784784- | None -> ());
785785-786786- Fmt.pr "@.";
787787- 0
788788- ) $ id_arg $ verbose_flag)
789789-790790-let main_cmd =
791791- let doc = "River feed management CLI" in
792792- let info = Cmd.info "river-cli" ~version:"1.0" ~doc in
793793- Cmd.group info [user_cmd; sync_cmd; list_cmd; info_cmd]
794794-795795-let () = exit (Cmd.eval' main_cmd)
11+(* Ultra-thin binary that delegates all command handling to River_cmd *)
22+let () = exit (Cmdliner.Cmd.eval' River_cmd.main_cmd)
···11+(** River.Cmd - Cmdliner terms for River CLI
22+33+ This module provides cmdliner terms that are thin wrappers around
44+ the River library functions. All business logic resides in the
55+ main River module. *)
66+77+open Cmdliner
88+99+(* Logging setup *)
1010+let src = Logs.Src.create "river-cli" ~doc:"River CLI application"
1111+module Log = (val Logs.src_log src : Logs.LOG)
1212+1313+(* User management commands *)
1414+module User = struct
1515+ let add state ~username ~fullname ~email =
1616+ let user = River.User.make ~username ~fullname ?email () in
1717+ match River.State.create_user state user with
1818+ | Ok () ->
1919+ Log.info (fun m -> m "User %s created" username);
2020+ 0
2121+ | Error err ->
2222+ Log.err (fun m -> m "%s" err);
2323+ 1
2424+2525+ let remove state ~username =
2626+ match River.State.delete_user state ~username with
2727+ | Ok () ->
2828+ Log.info (fun m -> m "User %s removed" username);
2929+ 0
3030+ | Error err ->
3131+ Log.err (fun m -> m "%s" err);
3232+ 1
3333+3434+ let list state =
3535+ let users = River.State.list_users state in
3636+ if users = [] then
3737+ Printf.printf "No users found\n"
3838+ else begin
3939+ Printf.printf "Users:\n";
4040+ List.iter (fun username ->
4141+ match River.State.get_user state ~username with
4242+ | Some user ->
4343+ let email_str = match River.User.email user with
4444+ | Some e -> " <" ^ e ^ ">"
4545+ | None -> ""
4646+ in
4747+ Printf.printf " %s (%s%s) - %d feeds\n"
4848+ username (River.User.fullname user) email_str
4949+ (List.length (River.User.feeds user))
5050+ | None -> ()
5151+ ) users
5252+ end;
5353+ 0
5454+5555+ let add_feed state ~username ~name ~url =
5656+ match River.State.get_user state ~username with
5757+ | None ->
5858+ Log.err (fun m -> m "User %s not found" username);
5959+ 1
6060+ | Some user ->
6161+ let source = River.Source.make ~name ~url in
6262+ let user = River.User.add_feed user source in
6363+ (match River.State.update_user state user with
6464+ | Ok () ->
6565+ Log.info (fun m -> m "Feed %s added to user %s" name username);
6666+ 0
6767+ | Error err ->
6868+ Log.err (fun m -> m "%s" err);
6969+ 1)
7070+7171+ let remove_feed state ~username ~url =
7272+ match River.State.get_user state ~username with
7373+ | None ->
7474+ Log.err (fun m -> m "User %s not found" username);
7575+ 1
7676+ | Some user ->
7777+ let user = River.User.remove_feed user ~url in
7878+ (match River.State.update_user state user with
7979+ | Ok () ->
8080+ Log.info (fun m -> m "Feed removed from user %s" username);
8181+ 0
8282+ | Error err ->
8383+ Log.err (fun m -> m "%s" err);
8484+ 1)
8585+8686+ let show state ~username =
8787+ match River.State.get_user state ~username with
8888+ | None ->
8989+ Log.err (fun m -> m "User %s not found" username);
9090+ 1
9191+ | Some user ->
9292+ Printf.printf "Username: %s\n" (River.User.username user);
9393+ Printf.printf "Full name: %s\n" (River.User.fullname user);
9494+ Printf.printf "Email: %s\n"
9595+ (Option.value (River.User.email user) ~default:"(none)");
9696+ Printf.printf "Last synced: %s\n"
9797+ (Option.value (River.User.last_synced user) ~default:"never");
9898+ let feeds = River.User.feeds user in
9999+ Printf.printf "Feeds (%d):\n" (List.length feeds);
100100+ List.iter (fun feed ->
101101+ Printf.printf " - %s: %s\n"
102102+ (River.Source.name feed) (River.Source.url feed)
103103+ ) feeds;
104104+ 0
105105+end
106106+107107+(* Sync command *)
108108+module Sync = struct
109109+ let sync_user env state ~username =
110110+ match River.State.sync_user env state ~username with
111111+ | Ok () ->
112112+ Log.info (fun m -> m "Sync completed for user %s" username);
113113+ 0
114114+ | Error err ->
115115+ Log.err (fun m -> m "Sync failed: %s" err);
116116+ 1
117117+118118+ let sync_all env state =
119119+ match River.State.sync_all env state with
120120+ | Ok (success, fail) ->
121121+ Log.info (fun m -> m "Synced %d users (%d failed)" success fail);
122122+ if fail = 0 then 0 else 1
123123+ | Error err ->
124124+ Log.err (fun m -> m "Sync failed: %s" err);
125125+ 1
126126+end
127127+128128+(* Post listing commands *)
129129+module Post = struct
130130+ let format_date ptime =
131131+ let open Ptime in
132132+ let (y, m, d), _ = to_date_time ptime in
133133+ Printf.sprintf "%02d/%02d/%04d" d m y
134134+135135+ let format_text_construct : Syndic.Atom.text_construct -> string = function
136136+ | Syndic.Atom.Text s -> s
137137+ | Syndic.Atom.Html (_, s) -> s
138138+ | Syndic.Atom.Xhtml (_, _) -> "<xhtml content>"
139139+140140+ let get_content_text (entry : Syndic.Atom.entry) =
141141+ match entry.content with
142142+ | Some (Syndic.Atom.Text s) -> Some s
143143+ | Some (Syndic.Atom.Html (_, s)) -> Some s
144144+ | Some (Syndic.Atom.Xhtml (_, _)) -> Some "<xhtml content>"
145145+ | Some (Syndic.Atom.Mime _) -> Some "<mime content>"
146146+ | Some (Syndic.Atom.Src _) -> Some "<external content>"
147147+ | None -> None
148148+149149+ let truncate_string s max_len =
150150+ if String.length s <= max_len then s
151151+ else String.sub s 0 max_len ^ "..."
152152+153153+ let list state ~username_opt ~limit ~metadata =
154154+ match username_opt with
155155+ | Some username ->
156156+ (* List posts for a specific user *)
157157+ (match River.State.get_user state ~username with
158158+ | None ->
159159+ Log.err (fun m -> m "User %s not found" username);
160160+ 1
161161+ | Some user ->
162162+ let entries = River.State.get_user_posts state ~username ?limit () in
163163+ if entries = [] then begin
164164+ Fmt.pr "%a@." Fmt.(styled `Yellow string)
165165+ ("No posts found for user " ^ username);
166166+ Fmt.pr "%a@." Fmt.(styled `Faint string)
167167+ ("(Run 'river-cli sync " ^ username ^ "' to fetch posts)");
168168+ 0
169169+ end else begin
170170+ Fmt.pr "@.%a@.@."
171171+ Fmt.(styled `Bold (styled (`Fg `Cyan) string))
172172+ (Printf.sprintf "Posts for %s (%d total)"
173173+ (River.User.fullname user) (List.length entries));
174174+175175+ List.iter (fun (entry : Syndic.Atom.entry) ->
176176+ let entry_id = Uri.to_string entry.id in
177177+178178+ (* Title and ID on separate lines for clarity *)
179179+ Fmt.pr "%a@."
180180+ Fmt.(styled `Bold (styled (`Fg `Blue) string))
181181+ (format_text_construct entry.title);
182182+ Fmt.pr " %a %a@."
183183+ Fmt.(styled `Faint string) "ID:"
184184+ Fmt.(styled (`Fg `Magenta) string) entry_id;
185185+186186+ if metadata then begin
187187+ (* Show all metadata *)
188188+ Fmt.pr " %a %a@."
189189+ Fmt.(styled `Faint string) "Author:"
190190+ Fmt.(styled `Green string) (River.User.fullname user);
191191+ Fmt.pr " %a %a@."
192192+ Fmt.(styled `Faint string) "Updated:"
193193+ Fmt.(styled `Yellow string) (format_date entry.updated);
194194+195195+ (* Summary if present *)
196196+ (match entry.summary with
197197+ | Some summary ->
198198+ let summary_text = format_text_construct summary in
199199+ Fmt.pr " %a %a@."
200200+ Fmt.(styled `Faint string) "Summary:"
201201+ Fmt.string (truncate_string summary_text 150)
202202+ | None -> ());
203203+204204+ (* Content (truncated) *)
205205+ (match get_content_text entry with
206206+ | Some content ->
207207+ let clean = Str.global_replace (Str.regexp "<[^>]*>") "" content in
208208+ let clean = Str.global_replace (Str.regexp "[ \t\n\r]+") " " clean in
209209+ Fmt.pr " %a %a@."
210210+ Fmt.(styled `Faint string) "Content:"
211211+ Fmt.string (truncate_string (String.trim clean) 200)
212212+ | None -> ());
213213+214214+ (* Links *)
215215+ (match entry.links with
216216+ | [] -> ()
217217+ | links ->
218218+ Fmt.pr " %a@." Fmt.(styled `Faint string) "Links:";
219219+ List.iter (fun link ->
220220+ Fmt.pr " %a@." Fmt.(styled (`Fg `Cyan) string)
221221+ (Uri.to_string link.Syndic.Atom.href)
222222+ ) links);
223223+224224+ (* Tags/Categories *)
225225+ (match entry.categories with
226226+ | [] -> ()
227227+ | categories ->
228228+ Fmt.pr " %a %a@." Fmt.(styled `Faint string) "Tags:"
229229+ Fmt.(list ~sep:comma (styled (`Fg `Yellow) string))
230230+ (List.map (fun c -> c.Syndic.Atom.term) categories));
231231+ end else begin
232232+ (* Compact view: just author and date *)
233233+ Fmt.pr " %a %a %a %a@."
234234+ Fmt.(styled `Faint string) "By"
235235+ Fmt.(styled `Green string) (River.User.fullname user)
236236+ Fmt.(styled `Faint string) "on"
237237+ Fmt.(styled `Yellow string) (format_date entry.updated);
238238+ end;
239239+ Fmt.pr "@."
240240+ ) entries;
241241+ 0
242242+ end)
243243+ | None ->
244244+ (* List posts from all users *)
245245+ let all_posts = River.State.get_all_posts state ?limit () in
246246+ if all_posts = [] then begin
247247+ Fmt.pr "%a@." Fmt.(styled `Yellow string)
248248+ "No posts found for any users";
249249+ Fmt.pr "%a@." Fmt.(styled `Faint string)
250250+ "(Run 'river-cli sync' to fetch posts)";
251251+ 0
252252+ end else begin
253253+ Fmt.pr "@.%a@.@."
254254+ Fmt.(styled `Bold (styled (`Fg `Cyan) string))
255255+ (Printf.sprintf "Posts from all users (%d total)"
256256+ (List.length all_posts));
257257+258258+ List.iter (fun (username, entry : string * Syndic.Atom.entry) ->
259259+ let author_name =
260260+ match River.State.get_user state ~username with
261261+ | Some user -> River.User.fullname user
262262+ | None ->
263263+ let (author, _) = entry.authors in
264264+ String.trim author.name
265265+ in
266266+ let entry_id = Uri.to_string entry.id in
267267+268268+ (* Title and ID on separate lines for clarity *)
269269+ Fmt.pr "%a@."
270270+ Fmt.(styled `Bold (styled (`Fg `Blue) string))
271271+ (format_text_construct entry.title);
272272+ Fmt.pr " %a %a@."
273273+ Fmt.(styled `Faint string) "ID:"
274274+ Fmt.(styled (`Fg `Magenta) string) entry_id;
275275+276276+ if metadata then begin
277277+ (* Show all metadata *)
278278+ Fmt.pr " %a %a@."
279279+ Fmt.(styled `Faint string) "Author:"
280280+ Fmt.(styled `Green string) author_name;
281281+ Fmt.pr " %a %a@."
282282+ Fmt.(styled `Faint string) "Updated:"
283283+ Fmt.(styled `Yellow string) (format_date entry.updated);
284284+285285+ (* Summary if present *)
286286+ (match entry.summary with
287287+ | Some summary ->
288288+ let summary_text = format_text_construct summary in
289289+ Fmt.pr " %a %a@."
290290+ Fmt.(styled `Faint string) "Summary:"
291291+ Fmt.string (truncate_string summary_text 150)
292292+ | None -> ());
293293+294294+ (* Content (truncated) *)
295295+ (match get_content_text entry with
296296+ | Some content ->
297297+ let clean = Str.global_replace (Str.regexp "<[^>]*>") "" content in
298298+ let clean = Str.global_replace (Str.regexp "[ \t\n\r]+") " " clean in
299299+ Fmt.pr " %a %a@."
300300+ Fmt.(styled `Faint string) "Content:"
301301+ Fmt.string (truncate_string (String.trim clean) 200)
302302+ | None -> ());
303303+304304+ (* Links *)
305305+ (match entry.links with
306306+ | [] -> ()
307307+ | links ->
308308+ Fmt.pr " %a@." Fmt.(styled `Faint string) "Links:";
309309+ List.iter (fun link ->
310310+ Fmt.pr " %a@." Fmt.(styled (`Fg `Cyan) string)
311311+ (Uri.to_string link.Syndic.Atom.href)
312312+ ) links);
313313+314314+ (* Tags/Categories *)
315315+ (match entry.categories with
316316+ | [] -> ()
317317+ | categories ->
318318+ Fmt.pr " %a %a@." Fmt.(styled `Faint string) "Tags:"
319319+ Fmt.(list ~sep:comma (styled (`Fg `Yellow) string))
320320+ (List.map (fun c -> c.Syndic.Atom.term) categories));
321321+ end else begin
322322+ (* Compact view: just author and date *)
323323+ Fmt.pr " %a %a %a %a@."
324324+ Fmt.(styled `Faint string) "By"
325325+ Fmt.(styled `Green string) author_name
326326+ Fmt.(styled `Faint string) "on"
327327+ Fmt.(styled `Yellow string) (format_date entry.updated);
328328+ end;
329329+ Fmt.pr "@."
330330+ ) all_posts;
331331+ 0
332332+ end
333333+334334+ let info state ~post_id ~verbose =
335335+ (* Find the post by ID across all users *)
336336+ let all_posts = River.State.get_all_posts state () in
337337+ match List.find_opt (fun (_, entry : string * Syndic.Atom.entry) ->
338338+ Uri.to_string entry.id = post_id
339339+ ) all_posts with
340340+ | None ->
341341+ Log.err (fun m -> m "Post with ID %s not found" post_id);
342342+ 1
343343+ | Some (username, entry) ->
344344+ (* Display post information *)
345345+ Fmt.pr "@.";
346346+ Fmt.pr "%a@." Fmt.(styled `Bold string) (String.make 70 '=');
347347+ Fmt.pr " %a@." Fmt.(styled `Bold (styled (`Fg `Blue) string))
348348+ (format_text_construct entry.title);
349349+ Fmt.pr "%a@.@." Fmt.(styled `Bold string) (String.make 70 '=');
350350+351351+ (* Author and date *)
352352+ let author_name =
353353+ match River.State.get_user state ~username with
354354+ | Some user -> River.User.fullname user
355355+ | None ->
356356+ let (author, _) = entry.authors in
357357+ String.trim author.name
358358+ in
359359+ Fmt.pr "%a %a@." Fmt.(styled `Cyan string) "Author:"
360360+ Fmt.(styled `Green string) author_name;
361361+ Fmt.pr "%a %a@." Fmt.(styled `Cyan string) "Published:"
362362+ Fmt.(styled `Magenta string) (format_date entry.updated);
363363+ Fmt.pr "%a %a@.@." Fmt.(styled `Cyan string) "ID:"
364364+ Fmt.(styled `Faint string) post_id;
365365+366366+ (* Summary if present *)
367367+ (match entry.summary with
368368+ | Some summary ->
369369+ Fmt.pr "%a@." Fmt.(styled (`Fg `Yellow) string) "Summary:";
370370+ Fmt.pr "%s@.@." (format_text_construct summary)
371371+ | None -> ());
372372+373373+ (* Content *)
374374+ (match entry.content with
375375+ | Some content ->
376376+ let content_str = match content with
377377+ | Syndic.Atom.Text s -> s
378378+ | Syndic.Atom.Html (_, s) -> s
379379+ | Syndic.Atom.Xhtml (_, _) -> "<xhtml content>"
380380+ | Syndic.Atom.Mime _ -> "<mime content>"
381381+ | Syndic.Atom.Src _ -> "<external content>"
382382+ in
383383+ Fmt.pr "%a@." Fmt.(styled (`Fg `Yellow) string) "Content:";
384384+ if verbose then begin
385385+ (* In verbose mode, attempt to convert HTML to markdown *)
386386+ let markdown = try
387387+ (* Simple HTML to markdown conversion - just strip tags for now *)
388388+ let re = Str.regexp "<[^>]*>" in
389389+ Str.global_replace re "" content_str
390390+ with _ -> content_str
391391+ in
392392+ Fmt.pr "%s@.@." markdown
393393+ end else begin
394394+ (* Non-verbose mode: show truncated content *)
395395+ let max_len = 500 in
396396+ if String.length content_str > max_len then
397397+ Fmt.pr "%s...@.@." (String.sub content_str 0 max_len)
398398+ else
399399+ Fmt.pr "%s@.@." content_str
400400+ end
401401+ | None -> ());
402402+403403+ (* Links *)
404404+ (match entry.links with
405405+ | [] -> ()
406406+ | links ->
407407+ Fmt.pr "%a@." Fmt.(styled `Cyan string) "Links:";
408408+ List.iter (fun link ->
409409+ Fmt.pr " - %s@." (Uri.to_string link.Syndic.Atom.href)
410410+ ) links;
411411+ Fmt.pr "@.");
412412+413413+ (* Categories/Tags if verbose *)
414414+ if verbose then begin
415415+ match entry.categories with
416416+ | [] -> ()
417417+ | categories ->
418418+ Fmt.pr "%a@." Fmt.(styled `Cyan string) "Tags:";
419419+ List.iter (fun cat ->
420420+ Fmt.pr " - %s@." cat.Syndic.Atom.term
421421+ ) categories;
422422+ Fmt.pr "@."
423423+ end;
424424+425425+ 0
426426+end
427427+428428+(* Cmdliner argument definitions *)
429429+let username_arg =
430430+ let doc = "Username" in
431431+ Arg.(required & pos 0 (some string) None & info [] ~docv:"USERNAME" ~doc)
432432+433433+let fullname_arg =
434434+ let doc = "Full name of the user" in
435435+ Arg.(required & opt (some string) None & info ["name"; "n"] ~doc)
436436+437437+let email_arg =
438438+ let doc = "Email address of the user (optional)" in
439439+ Arg.(value & opt (some string) None & info ["email"; "e"] ~doc)
440440+441441+let feed_name_arg =
442442+ let doc = "Feed name/label" in
443443+ Arg.(required & opt (some string) None & info ["name"; "n"] ~doc)
444444+445445+let feed_url_arg =
446446+ let doc = "Feed URL" in
447447+ Arg.(required & opt (some string) None & info ["url"; "u"] ~doc)
448448+449449+(* User commands - these don't need network, just filesystem access *)
450450+let user_add =
451451+ Term.(const (fun username fullname email env _xdg _profile ->
452452+ let state = River.State.create env ~app_name:"river" in
453453+ User.add state ~username ~fullname ~email
454454+ ) $ username_arg $ fullname_arg $ email_arg)
455455+456456+let user_remove =
457457+ Term.(const (fun username env _xdg _profile ->
458458+ let state = River.State.create env ~app_name:"river" in
459459+ User.remove state ~username
460460+ ) $ username_arg)
461461+462462+let user_list =
463463+ Term.(const (fun env _xdg _profile ->
464464+ let state = River.State.create env ~app_name:"river" in
465465+ User.list state
466466+ ))
467467+468468+let user_show =
469469+ Term.(const (fun username env _xdg _profile ->
470470+ let state = River.State.create env ~app_name:"river" in
471471+ User.show state ~username
472472+ ) $ username_arg)
473473+474474+let user_add_feed =
475475+ Term.(const (fun username name url env _xdg _profile ->
476476+ let state = River.State.create env ~app_name:"river" in
477477+ User.add_feed state ~username ~name ~url
478478+ ) $ username_arg $ feed_name_arg $ feed_url_arg)
479479+480480+let user_remove_feed =
481481+ Term.(const (fun username url env _xdg _profile ->
482482+ let state = River.State.create env ~app_name:"river" in
483483+ User.remove_feed state ~username ~url
484484+ ) $ username_arg $ feed_url_arg)
485485+486486+let user_cmd =
487487+ let doc = "Manage users" in
488488+ let info = Cmd.info "user" ~doc in
489489+ let user_add_cmd =
490490+ Eiocmd.run
491491+ ~use_keyeio:false
492492+ ~info:(Cmd.info "add" ~doc:"Add a new user")
493493+ ~app_name:"river"
494494+ ~service:"river"
495495+ user_add
496496+ in
497497+ let user_remove_cmd =
498498+ Eiocmd.run
499499+ ~use_keyeio:false
500500+ ~info:(Cmd.info "remove" ~doc:"Remove a user")
501501+ ~app_name:"river"
502502+ ~service:"river"
503503+ user_remove
504504+ in
505505+ let user_list_cmd =
506506+ Eiocmd.run
507507+ ~use_keyeio:false
508508+ ~info:(Cmd.info "list" ~doc:"List all users")
509509+ ~app_name:"river"
510510+ ~service:"river"
511511+ user_list
512512+ in
513513+ let user_show_cmd =
514514+ Eiocmd.run
515515+ ~use_keyeio:false
516516+ ~info:(Cmd.info "show" ~doc:"Show user details")
517517+ ~app_name:"river"
518518+ ~service:"river"
519519+ user_show
520520+ in
521521+ let user_add_feed_cmd =
522522+ Eiocmd.run
523523+ ~use_keyeio:false
524524+ ~info:(Cmd.info "add-feed" ~doc:"Add a feed to a user")
525525+ ~app_name:"river"
526526+ ~service:"river"
527527+ user_add_feed
528528+ in
529529+ let user_remove_feed_cmd =
530530+ Eiocmd.run
531531+ ~use_keyeio:false
532532+ ~info:(Cmd.info "remove-feed" ~doc:"Remove a feed from a user")
533533+ ~app_name:"river"
534534+ ~service:"river"
535535+ user_remove_feed
536536+ in
537537+ Cmd.group info [
538538+ user_add_cmd;
539539+ user_remove_cmd;
540540+ user_list_cmd;
541541+ user_show_cmd;
542542+ user_add_feed_cmd;
543543+ user_remove_feed_cmd;
544544+ ]
545545+546546+(* Sync command - needs Eio environment for HTTP requests *)
547547+let sync =
548548+ let username_opt =
549549+ let doc = "Sync specific user (omit to sync all)" in
550550+ Arg.(value & pos 0 (some string) None & info [] ~docv:"USERNAME" ~doc)
551551+ in
552552+ Term.(const (fun username_opt env _xdg _profile ->
553553+ let state = River.State.create env ~app_name:"river" in
554554+ match username_opt with
555555+ | Some username -> Sync.sync_user env state ~username
556556+ | None -> Sync.sync_all env state
557557+ ) $ username_opt)
558558+559559+(* List command - doesn't need network, just reads local files *)
560560+let list =
561561+ let username_opt_arg =
562562+ let doc = "Username (optional - defaults to all users)" in
563563+ Arg.(value & pos 0 (some string) None & info [] ~docv:"USERNAME" ~doc)
564564+ in
565565+ let limit_arg =
566566+ let doc = "Limit number of posts to display (default: all)" in
567567+ Arg.(value & opt (some int) None & info ["limit"; "n"] ~doc)
568568+ in
569569+ let metadata_arg =
570570+ let doc = "Show all metadata (author, date, summary, content preview, links, tags)" in
571571+ Arg.(value & flag & info ["metadata"; "m"] ~doc)
572572+ in
573573+ Term.(const (fun username_opt limit metadata env _xdg _profile ->
574574+ let state = River.State.create env ~app_name:"river" in
575575+ Post.list state ~username_opt ~limit ~metadata
576576+ ) $ username_opt_arg $ limit_arg $ metadata_arg)
577577+578578+(* Info command - show detailed post information *)
579579+let info =
580580+ let post_id_arg =
581581+ let doc = "Post ID (URI)" in
582582+ Arg.(required & pos 0 (some string) None & info [] ~docv:"POST_ID" ~doc)
583583+ in
584584+ let full_arg =
585585+ let doc = "Show full content without truncation" in
586586+ Arg.(value & flag & info ["full"] ~doc)
587587+ in
588588+ Term.(const (fun post_id full env _xdg _profile ->
589589+ let state = River.State.create env ~app_name:"river" in
590590+ Post.info state ~post_id ~verbose:full
591591+ ) $ post_id_arg $ full_arg)
592592+593593+(* Merge command - export merged feed *)
594594+let merge =
595595+ let format_arg =
596596+ let doc = "Output format: atom or jsonfeed" in
597597+ Arg.(value & opt string "atom" & info ["format"; "f"] ~doc)
598598+ in
599599+ let title_arg =
600600+ let doc = "Feed title" in
601601+ Arg.(value & opt string "River Merged Feed" & info ["title"; "t"] ~doc)
602602+ in
603603+ let limit_arg =
604604+ let doc = "Maximum number of entries to include (default: all)" in
605605+ Arg.(value & opt (some int) None & info ["limit"; "n"] ~doc)
606606+ in
607607+ Term.(const (fun format title limit env _xdg _profile ->
608608+ let state = River.State.create env ~app_name:"river" in
609609+ let format_type = match String.lowercase_ascii format with
610610+ | "jsonfeed" | "json" -> `Jsonfeed
611611+ | _ -> `Atom
612612+ in
613613+ match River.State.export_merged_feed state ~title ~format:format_type ?limit () with
614614+ | Ok output ->
615615+ print_endline output;
616616+ 0
617617+ | Error err ->
618618+ Log.err (fun m -> m "Failed to export merged feed: %s" err);
619619+ 1
620620+ ) $ format_arg $ title_arg $ limit_arg)
621621+622622+(* Quality command - analyze feed quality *)
623623+let quality =
624624+ let username_arg =
625625+ let doc = "Username to analyze" in
626626+ Arg.(required & pos 0 (some string) None & info [] ~docv:"USERNAME" ~doc)
627627+ in
628628+ Term.(const (fun username env _xdg _profile ->
629629+ let state = River.State.create env ~app_name:"river" in
630630+ match River.State.analyze_user_quality state ~username with
631631+ | Error err ->
632632+ Log.err (fun m -> m "%s" err);
633633+ 1
634634+ | Ok metrics ->
635635+ (* Display quality metrics *)
636636+ Fmt.pr "@.";
637637+ Fmt.pr "%a@." Fmt.(styled `Bold string)
638638+ (String.make 70 '=');
639639+ Fmt.pr " %a %s@." Fmt.(styled `Bold (styled (`Fg `Blue) string))
640640+ "User Quality Analysis:" username;
641641+ Fmt.pr "%a@.@." Fmt.(styled `Bold string)
642642+ (String.make 70 '=');
643643+644644+ (* Overall quality score *)
645645+ let score = River.Quality.quality_score metrics in
646646+ let score_color = match score with
647647+ | s when s >= 80.0 -> `Green
648648+ | s when s >= 60.0 -> `Yellow
649649+ | s when s >= 40.0 -> `Magenta
650650+ | _ -> `Red
651651+ in
652652+ Fmt.pr "%a %.1f/100.0@.@."
653653+ Fmt.(styled (`Fg score_color) (styled `Bold string))
654654+ "Overall Quality Score:"
655655+ score;
656656+657657+ (* Entry statistics *)
658658+ Fmt.pr "%a@." Fmt.(styled `Cyan string) "Entry Statistics:";
659659+ Fmt.pr " Total entries: %d@." (River.Quality.total_entries metrics);
660660+ Fmt.pr "@.";
661661+662662+ (* Completeness metrics *)
663663+ Fmt.pr "%a@." Fmt.(styled `Cyan string) "Completeness:";
664664+ let total = River.Quality.total_entries metrics in
665665+ let pct entries =
666666+ float_of_int entries /. float_of_int total *. 100.0
667667+ in
668668+ Fmt.pr " Entries with content: %3d/%d (%5.1f%%)@."
669669+ (River.Quality.entries_with_content metrics)
670670+ total
671671+ (pct (River.Quality.entries_with_content metrics));
672672+ Fmt.pr " Entries with dates: %3d/%d (%5.1f%%)@."
673673+ (River.Quality.entries_with_date metrics)
674674+ total
675675+ (pct (River.Quality.entries_with_date metrics));
676676+ Fmt.pr " Entries with authors: %3d/%d (%5.1f%%)@."
677677+ (River.Quality.entries_with_author metrics)
678678+ total
679679+ (pct (River.Quality.entries_with_author metrics));
680680+ Fmt.pr " Entries with summaries:%3d/%d (%5.1f%%)@."
681681+ (River.Quality.entries_with_summary metrics)
682682+ total
683683+ (pct (River.Quality.entries_with_summary metrics));
684684+ Fmt.pr " Entries with tags: %3d/%d (%5.1f%%)@."
685685+ (River.Quality.entries_with_tags metrics)
686686+ total
687687+ (pct (River.Quality.entries_with_tags metrics));
688688+ Fmt.pr "@.";
689689+690690+ (* Content statistics *)
691691+ if River.Quality.entries_with_content metrics > 0 then begin
692692+ Fmt.pr "%a@." Fmt.(styled `Cyan string) "Content Statistics:";
693693+ Fmt.pr " Average length: %.0f characters@."
694694+ (River.Quality.avg_content_length metrics);
695695+ Fmt.pr " Min length: %d characters@."
696696+ (River.Quality.min_content_length metrics);
697697+ Fmt.pr " Max length: %d characters@."
698698+ (River.Quality.max_content_length metrics);
699699+ Fmt.pr "@."
700700+ end;
701701+702702+ (* Posting frequency *)
703703+ (match River.Quality.posting_frequency_days metrics with
704704+ | Some freq ->
705705+ Fmt.pr "%a@." Fmt.(styled `Cyan string) "Posting Frequency:";
706706+ Fmt.pr " Average: %.1f days between posts@." freq;
707707+ let posts_per_week = 7.0 /. freq in
708708+ Fmt.pr " (~%.1f posts per week)@." posts_per_week;
709709+ Fmt.pr "@."
710710+ | None ->
711711+ Fmt.pr "%a@.@." Fmt.(styled `Faint string)
712712+ "Not enough data to calculate posting frequency");
713713+714714+ Fmt.pr "@.";
715715+ 0
716716+ ) $ username_arg)
717717+718718+let main_cmd =
719719+ let doc = "River feed management CLI" in
720720+ let main_info = Cmd.info "river-cli" ~version:"1.0" ~doc in
721721+ let sync_cmd =
722722+ Eiocmd.run
723723+ ~use_keyeio:false
724724+ ~info:(Cmd.info "sync" ~doc:"Sync feeds for users")
725725+ ~app_name:"river"
726726+ ~service:"river"
727727+ sync
728728+ in
729729+ let list_cmd =
730730+ Eiocmd.run
731731+ ~use_keyeio:false
732732+ ~info:(Cmd.info "list" ~doc:"List recent posts (from all users by default, or specify a user)")
733733+ ~app_name:"river"
734734+ ~service:"river"
735735+ list
736736+ in
737737+ let info_cmd =
738738+ Eiocmd.run
739739+ ~use_keyeio:false
740740+ ~info:(Cmd.info "info" ~doc:"Show detailed post information")
741741+ ~app_name:"river"
742742+ ~service:"river"
743743+ info
744744+ in
745745+ let merge_cmd =
746746+ Eiocmd.run
747747+ ~use_keyeio:false
748748+ ~info:(Cmd.info "merge" ~doc:"Export a merged feed combining all users' feeds")
749749+ ~app_name:"river"
750750+ ~service:"river"
751751+ merge
752752+ in
753753+ let quality_cmd =
754754+ Eiocmd.run
755755+ ~use_keyeio:false
756756+ ~info:(Cmd.info "quality" ~doc:"Analyze feed quality metrics for a user")
757757+ ~app_name:"river"
758758+ ~service:"river"
759759+ quality
760760+ in
761761+ Cmd.group main_info [user_cmd; sync_cmd; list_cmd; info_cmd; merge_cmd; quality_cmd]
+115
stack/river/cmd/river_cmd.mli
···11+(** River.Cmd - Cmdliner terms for River CLI
22+33+ This module provides cmdliner terms that are thin wrappers around
44+ the River library functions. All business logic resides in the
55+ main River module. *)
66+77+(** {1 Cmdliner Terms}
88+99+ These terms can be used to build command-line interfaces using
1010+ Cmdliner and Eiocmd. They handle argument parsing and call into
1111+ the River library functions. *)
1212+1313+open Cmdliner
1414+1515+(** {2 User Management Commands} *)
1616+1717+val user_add :
1818+ (Eio_unix.Stdenv.base -> Xdge.t -> Keyeio.Profile.t -> int) Term.t
1919+(** [user_add] command term for adding a new user.
2020+2121+ Reads: username, fullname, email from command-line arguments.
2222+ Calls: [River.State.create_user] *)
2323+2424+val user_remove :
2525+ (Eio_unix.Stdenv.base -> Xdge.t -> Keyeio.Profile.t -> int) Term.t
2626+(** [user_remove] command term for removing a user.
2727+2828+ Reads: username from command-line arguments.
2929+ Calls: [River.State.delete_user] *)
3030+3131+val user_list :
3232+ (Eio_unix.Stdenv.base -> Xdge.t -> Keyeio.Profile.t -> int) Term.t
3333+(** [user_list] command term for listing all users.
3434+3535+ Calls: [River.State.list_users] *)
3636+3737+val user_show :
3838+ (Eio_unix.Stdenv.base -> Xdge.t -> Keyeio.Profile.t -> int) Term.t
3939+(** [user_show] command term for showing user details.
4040+4141+ Reads: username from command-line arguments.
4242+ Calls: [River.State.get_user] *)
4343+4444+val user_add_feed :
4545+ (Eio_unix.Stdenv.base -> Xdge.t -> Keyeio.Profile.t -> int) Term.t
4646+(** [user_add_feed] command term for adding a feed to a user.
4747+4848+ Reads: username, name, url from command-line arguments.
4949+ Calls: [River.State.get_user], [River.User.add_feed], [River.State.update_user] *)
5050+5151+val user_remove_feed :
5252+ (Eio_unix.Stdenv.base -> Xdge.t -> Keyeio.Profile.t -> int) Term.t
5353+(** [user_remove_feed] command term for removing a feed from a user.
5454+5555+ Reads: username, url from command-line arguments.
5656+ Calls: [River.State.get_user], [River.User.remove_feed], [River.State.update_user] *)
5757+5858+val user_cmd : int Cmd.t
5959+(** [user_cmd] is the user management command group containing all user subcommands. *)
6060+6161+(** {2 Feed Sync Commands} *)
6262+6363+val sync :
6464+ (Eio_unix.Stdenv.base -> Xdge.t -> Keyeio.Profile.t -> int) Term.t
6565+(** [sync] command term for syncing feeds.
6666+6767+ Reads: optional username from command-line arguments.
6868+ Calls: [River.State.sync_user] or [River.State.sync_all] *)
6969+7070+(** {2 Post Listing Commands} *)
7171+7272+val list :
7373+ (Eio_unix.Stdenv.base -> Xdge.t -> Keyeio.Profile.t -> int) Term.t
7474+(** [list] command term for listing posts with enhanced formatting.
7575+7676+ Features:
7777+ - Pretty-printed output with colors using Fmt
7878+ - Clear ID display (never truncated) for each post
7979+ - Compact view (default): shows title, ID, author, and date
8080+ - Metadata view (--metadata/-m): shows all post metadata including summary,
8181+ content preview (truncated), links, and tags
8282+8383+ Reads: optional username, optional limit, --metadata flag from command-line arguments.
8484+ Calls: [River.State.get_user_posts] or [River.State.get_all_posts] *)
8585+8686+val info :
8787+ (Eio_unix.Stdenv.base -> Xdge.t -> Keyeio.Profile.t -> int) Term.t
8888+(** [info] command term for showing detailed post information.
8989+9090+ Reads: post ID, --full flag from command-line arguments.
9191+ Uses Logs for informational output (controlled by -v/--verbose from Eiocmd).
9292+ Calls: [River.State.get_all_posts] *)
9393+9494+(** {2 Feed Export Commands} *)
9595+9696+val merge :
9797+ (Eio_unix.Stdenv.base -> Xdge.t -> Keyeio.Profile.t -> int) Term.t
9898+(** [merge] command term for exporting merged feed.
9999+100100+ Reads: format (atom|jsonfeed), title, limit from command-line arguments.
101101+ Calls: [River.State.export_merged_feed] *)
102102+103103+(** {2 Quality Analysis Commands} *)
104104+105105+val quality :
106106+ (Eio_unix.Stdenv.base -> Xdge.t -> Keyeio.Profile.t -> int) Term.t
107107+(** [quality] command term for analyzing feed quality.
108108+109109+ Reads: username from command-line arguments.
110110+ Calls: [River.State.analyze_user_quality] *)
111111+112112+(** {2 Main Command} *)
113113+114114+val main_cmd : int Cmd.t
115115+(** [main_cmd] is the main command group containing all River CLI commands. *)
+6-10
stack/river/example/aggregate_feeds.ml
···11let sources =
22- River.
32 [
44- { name = "KC Sivaramakrishnan"; url = "http://kcsrk.info/atom-ocaml.xml" };
55- {
66- name = "Amir Chaudhry";
77- url = "http://amirchaudhry.com/tags/ocamllabs-atom.xml";
88- };
33+ River.Source.make ~name:"KC Sivaramakrishnan" ~url:"http://kcsrk.info/atom-ocaml.xml";
44+ River.Source.make ~name:"Amir Chaudhry" ~url:"http://amirchaudhry.com/tags/ocamllabs-atom.xml";
95 ]
106117let main env =
128 (* Use River.with_session for proper resource management *)
1313- River.with_session env @@ fun session ->
1414- let feeds = List.map (River.fetch session) sources in
1515- let posts = River.posts feeds in
1616- let entries = River.create_atom_entries posts in
99+ River.Session.with_session env @@ fun session ->
1010+ let feeds = List.map (River.Feed.fetch session) sources in
1111+ let posts = River.Post.of_feeds feeds in
1212+ let entries = River.Format.Atom.entries_of_posts posts in
1713 let feed =
1814 let authors = [ Syndic.Atom.author "OCaml Blog" ] in
1915 let id = Uri.of_string "https://ocaml.org/atom.xml" in
-45
stack/river/lib/client.ml
···11-(*
22- * Copyright (c) 2014, OCaml.org project
33- * Copyright (c) 2015 KC Sivaramakrishnan <sk826@cl.cam.ac.uk>
44- *
55- * Permission to use, copy, modify, and distribute this software for any
66- * purpose with or without fee is hereby granted, provided that the above
77- * copyright notice and this permission notice appear in all copies.
88- *
99- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
1010- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
1111- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
1212- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
1313- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
1414- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
1515- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
1616- *)
1717-1818-(* River HTTP client using Requests *)
1919-2020-let src = Logs.Src.create "river.client" ~doc:"River HTTP client"
2121-module Log = (val Logs.src_log src : Logs.LOG)
2222-2323-type t = {
2424- session : Requests.t;
2525-}
2626-2727-let create ~sw (env : _ ) =
2828- Log.info (fun m -> m "Creating River client");
2929- let session = Requests.create ~sw
3030- ~default_headers:(Requests.Headers.of_list [
3131- ("User-Agent", "OCaml-River/1.0");
3232- ])
3333- ~follow_redirects:true
3434- ~max_redirects:5
3535- ~verify_tls:true
3636- env
3737- in
3838- { session }
3939-4040-let with_client (env : _) f =
4141- Eio.Switch.run @@ fun sw ->
4242- let client = create ~sw env in
4343- f client
4444-4545-let session t = t.session
-57
stack/river/lib/client.mli
···11-(*
22- * Copyright (c) 2014, OCaml.org project
33- * Copyright (c) 2015 KC Sivaramakrishnan <sk826@cl.cam.ac.uk>
44- *
55- * Permission to use, copy, modify, and distribute this software for any
66- * purpose with or without fee is hereby granted, provided that the above
77- * copyright notice and this permission notice appear in all copies.
88- *
99- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
1010- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
1111- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
1212- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
1313- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
1414- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
1515- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
1616- *)
1717-1818-(** River HTTP client using Requests library.
1919-2020- This module provides a session-based HTTP client for fetching RSS/Atom feeds.
2121- The client manages a Requests session with appropriate defaults for feed fetching. *)
2222-2323-(** The type of a River HTTP client *)
2424-type t
2525-2626-(** [create ~sw env] creates a new River client with a Requests session.
2727-2828- The session is configured with:
2929- - User-Agent: "OCaml-River/1.0"
3030- - Automatic redirect following (max 5 redirects)
3131- - TLS verification enabled
3232-3333- @param sw The switch for resource management
3434- @param env The Eio environment providing network and time resources *)
3535-val create :
3636- sw:Eio.Switch.t ->
3737- < clock : float Eio.Time.clock_ty Eio.Resource.t;
3838- fs : Eio.Fs.dir_ty Eio.Path.t;
3939- net : [ `Generic | `Unix ] Eio.Net.ty Eio.Resource.t; .. > ->
4040- t
4141-4242-(** [with_client env f] creates a client and automatically manages its lifecycle.
4343-4444- This is the recommended way to use the client as it ensures proper cleanup.
4545-4646- @param env The Eio environment
4747- @param f The function to run with the client *)
4848-val with_client :
4949- < clock : float Eio.Time.clock_ty Eio.Resource.t;
5050- fs : Eio.Fs.dir_ty Eio.Path.t;
5151- net : [ `Generic | `Unix ] Eio.Net.ty Eio.Resource.t; .. > ->
5252- (t -> 'a) -> 'a
5353-5454-(** [session t] returns the underlying Requests session.
5555-5656- This is used internally by River's HTTP functions. *)
5757-val session : t -> Requests.t
···11-(*
22- * Copyright (c) 2014, OCaml.org project
33- * Copyright (c) 2015 KC Sivaramakrishnan <sk826@cl.cam.ac.uk>
44- *
55- * Permission to use, copy, modify, and distribute this software for any
66- * purpose with or without fee is hereby granted, provided that the above
77- * copyright notice and this permission notice appear in all copies.
88- *
99- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
1010- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
1111- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
1212- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
1313- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
1414- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
1515- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
1616- *)
1717-1818-let src = Logs.Src.create "river.feed" ~doc:"River feed parsing"
1919-module Log = (val Logs.src_log src : Logs.LOG)
2020-2121-type source = { name : string; url : string }
2222-type content = Atom of Syndic.Atom.feed | Rss2 of Syndic.Rss2.channel | Json of Jsonfeed.t
2323-2424-let string_of_feed = function Atom _ -> "Atom" | Rss2 _ -> "Rss2" | Json _ -> "JSONFeed"
2525-2626-type t = { name : string; title : string; url : string; content : content }
2727-2828-let classify_feed ~xmlbase (body : string) =
2929- Log.debug (fun m -> m "Attempting to parse feed (%d bytes)" (String.length body));
3030-3131- (* Quick check - does it look like JSON? *)
3232- let looks_like_json =
3333- String.length body > 0 &&
3434- let first_char = String.get body 0 in
3535- first_char = '{' || first_char = '['
3636- in
3737-3838- if looks_like_json then (
3939- (* Try JSONFeed first *)
4040- Log.debug (fun m -> m "Body looks like JSON, trying JSONFeed parser");
4141- match Jsonfeed.of_string body with
4242- | Ok jsonfeed ->
4343- Log.debug (fun m -> m "Successfully parsed as JSONFeed");
4444- Json jsonfeed
4545- | Error err ->
4646- Log.debug (fun m -> m "Not a JSONFeed: %s" (Jsont.Error.to_string err));
4747- (* Fall through to XML parsing *)
4848- failwith "Not a valid JSONFeed"
4949- ) else (
5050- (* Try XML formats *)
5151- try
5252- let feed = Atom (Syndic.Atom.parse ~xmlbase (Xmlm.make_input (`String (0, body)))) in
5353- Log.debug (fun m -> m "Successfully parsed as Atom feed");
5454- feed
5555- with
5656- | Syndic.Atom.Error.Error (pos, msg) -> (
5757- Log.debug (fun m -> m "Not an Atom feed: %s at position (%d, %d)"
5858- msg (fst pos) (snd pos));
5959- try
6060- let feed = Rss2 (Syndic.Rss2.parse ~xmlbase (Xmlm.make_input (`String (0, body)))) in
6161- Log.debug (fun m -> m "Successfully parsed as RSS2 feed");
6262- feed
6363- with Syndic.Rss2.Error.Error (pos, msg) ->
6464- Log.err (fun m -> m "Failed to parse as RSS2: %s at position (%d, %d)"
6565- msg (fst pos) (snd pos));
6666- failwith "Neither Atom nor RSS2 feed")
6767- | Not_found as e ->
6868- Log.err (fun m -> m "Not_found exception during Atom feed parsing");
6969- Log.err (fun m -> m "Backtrace:\n%s" (Printexc.get_backtrace ()));
7070- raise e
7171- | e ->
7272- Log.err (fun m -> m "Unexpected exception during feed parsing: %s"
7373- (Printexc.to_string e));
7474- Log.err (fun m -> m "Backtrace:\n%s" (Printexc.get_backtrace ()));
7575- raise e
7676- )
7777-7878-let fetch client (source : source) =
7979- Log.info (fun m -> m "Fetching feed '%s' from %s" source.name source.url);
8080-8181- let xmlbase = Uri.of_string @@ source.url in
8282-8383- (* Use Requests_json_api.get_result for clean Result-based error handling *)
8484- let session = Client.session client in
8585- let response =
8686- match Requests_json_api.get_result session source.url with
8787- | Ok body ->
8888- Log.info (fun m -> m "Successfully fetched %s (%d bytes)" source.url (String.length body));
8989- body
9090- | Error (status, msg) ->
9191- Log.err (fun m -> m "Failed to fetch feed '%s': HTTP %d - %s" source.name status msg);
9292- failwith (Printf.sprintf "HTTP %d: %s" status msg)
9393- in
9494-9595- let content = classify_feed ~xmlbase response in
9696- let title =
9797- match content with
9898- | Atom atom -> Util.string_of_text_construct atom.Syndic.Atom.title
9999- | Rss2 ch -> ch.Syndic.Rss2.title
100100- | Json jsonfeed -> Jsonfeed.title jsonfeed
101101- in
102102-103103- Log.info (fun m -> m "Successfully fetched %s feed '%s' (title: '%s')"
104104- (string_of_feed content) source.name title);
105105-106106- { name = source.name; title; content; url = source.url }
-309
stack/river/lib/markdown_converter.ml
···11-(** HTML to Markdown converter using Lambda Soup *)
22-33-(** Extract all links from HTML content *)
44-let extract_links html_str =
55- try
66- let soup = Soup.parse html_str in
77- let links = Soup.select "a[href]" soup in
88- Soup.fold (fun acc link ->
99- match Soup.attribute "href" link with
1010- | Some href ->
1111- let text = Soup.texts link |> String.concat "" |> String.trim in
1212- (href, text) :: acc
1313- | None -> acc
1414- ) [] links
1515- |> List.rev
1616- with _ -> []
1717-1818-(** Check if string contains any whitespace *)
1919-let has_whitespace s =
2020- try
2121- let _ = Str.search_forward (Str.regexp "[ \t\n\r]") s 0 in
2222- true
2323- with Not_found -> false
2424-2525-(** Clean up excessive newlines and normalize spacing *)
2626-let cleanup_markdown s =
2727- (* Normalize line endings *)
2828- let s = Str.global_replace (Str.regexp "\r\n") "\n" s in
2929-3030- (* Remove trailing whitespace from each line *)
3131- let lines = String.split_on_char '\n' s in
3232- let lines = List.map (fun line ->
3333- (* Trim trailing spaces but preserve leading spaces for indentation *)
3434- let len = String.length line in
3535- let rec find_last_non_space i =
3636- if i < 0 then -1
3737- else if line.[i] = ' ' || line.[i] = '\t' then find_last_non_space (i - 1)
3838- else i
3939- in
4040- let last = find_last_non_space (len - 1) in
4141- if last < 0 then ""
4242- else String.sub line 0 (last + 1)
4343- ) lines in
4444-4545- (* Join back and collapse excessive blank lines *)
4646- let s = String.concat "\n" lines in
4747-4848- (* Replace 3+ consecutive newlines with just 2 *)
4949- let s = Str.global_replace (Str.regexp "\n\n\n+") "\n\n" s in
5050-5151- (* Trim leading and trailing whitespace *)
5252- String.trim s
5353-5454-(** Convert HTML to Markdown using state-based whitespace handling *)
5555-let html_to_markdown html_str =
5656- try
5757- let soup = Soup.parse html_str in
5858- let buffer = Buffer.create 256 in
5959-6060- (* State: track if we need to insert a space before next text *)
6161- let need_space = ref false in
6262-6363- (* Get last character in buffer, if any *)
6464- let last_char () =
6565- let len = Buffer.length buffer in
6666- if len = 0 then None
6767- else Some (Buffer.nth buffer (len - 1))
6868- in
6969-7070- (* Add text with proper spacing *)
7171- let add_text text =
7272- let trimmed = String.trim text in
7373- if trimmed <> "" then begin
7474- (* Check if text starts with punctuation that shouldn't have space before it *)
7575- let starts_with_punctuation =
7676- String.length trimmed > 0 &&
7777- (match trimmed.[0] with
7878- | ',' | '.' | ';' | ':' | '!' | '?' | ')' | ']' | '}' -> true
7979- | _ -> false)
8080- in
8181-8282- (* Add space if needed, unless we're before punctuation *)
8383- if !need_space && not starts_with_punctuation then begin
8484- match last_char () with
8585- | Some (' ' | '\n') -> ()
8686- | _ -> Buffer.add_char buffer ' '
8787- end;
8888- Buffer.add_string buffer trimmed;
8989- need_space := false
9090- end
9191- in
9292-9393- (* Mark that we need space before next text (for inline elements) *)
9494- let mark_space_needed () =
9595- need_space := has_whitespace (Buffer.contents buffer) || Buffer.length buffer > 0
9696- in
9797-9898- (* Process header with ID/anchor handling *)
9999- let process_header level elem =
100100- need_space := false;
101101-102102- (* Check if header contains a link with an ID fragment *)
103103- let link_opt = Soup.select_one "a[href]" elem in
104104- let anchor_id = match link_opt with
105105- | Some link ->
106106- (match Soup.attribute "href" link with
107107- | Some href ->
108108- (* Extract fragment from URL *)
109109- let uri = Uri.of_string href in
110110- Uri.fragment uri
111111- | None -> None)
112112- | None -> None
113113- in
114114-115115- (* Add anchor if we found an ID *)
116116- (match anchor_id with
117117- | Some id when id <> "" ->
118118- Buffer.add_string buffer (Printf.sprintf "\n<a name=\"%s\"></a>\n" id)
119119- | _ -> ());
120120-121121- (* Add the header marker *)
122122- let marker = String.make level '#' in
123123- Buffer.add_string buffer ("\n" ^ marker ^ " ");
124124-125125- (* Get text content, excluding link tags *)
126126- let text = Soup.texts elem |> String.concat " " |> String.trim in
127127- Buffer.add_string buffer text;
128128-129129- Buffer.add_string buffer "\n\n";
130130- need_space := false
131131- in
132132-133133- let rec process_node node =
134134- match Soup.element node with
135135- | Some elem ->
136136- let tag = Soup.name elem in
137137- (match tag with
138138- (* Block elements - reset space tracking *)
139139- | "h1" -> process_header 1 elem
140140- | "h2" -> process_header 2 elem
141141- | "h3" -> process_header 3 elem
142142- | "h4" -> process_header 4 elem
143143- | "h5" -> process_header 5 elem
144144- | "h6" -> process_header 6 elem
145145- | "p" ->
146146- need_space := false;
147147- Soup.children elem |> Soup.iter process_node;
148148- Buffer.add_string buffer "\n\n";
149149- need_space := false
150150- | "br" ->
151151- Buffer.add_string buffer "\n";
152152- need_space := false
153153- (* Inline elements - preserve space tracking *)
154154- | "strong" | "b" ->
155155- (* Add space before if needed *)
156156- if !need_space then begin
157157- match last_char () with
158158- | Some (' ' | '\n') -> ()
159159- | _ -> Buffer.add_char buffer ' '
160160- end;
161161- Buffer.add_string buffer "**";
162162- need_space := false;
163163- Soup.children elem |> Soup.iter process_node;
164164- Buffer.add_string buffer "**";
165165- mark_space_needed ()
166166- | "em" | "i" ->
167167- (* Add space before if needed *)
168168- if !need_space then begin
169169- match last_char () with
170170- | Some (' ' | '\n') -> ()
171171- | _ -> Buffer.add_char buffer ' '
172172- end;
173173- Buffer.add_string buffer "*";
174174- need_space := false;
175175- Soup.children elem |> Soup.iter process_node;
176176- Buffer.add_string buffer "*";
177177- mark_space_needed ()
178178- | "code" ->
179179- (* Add space before if needed *)
180180- if !need_space then begin
181181- match last_char () with
182182- | Some (' ' | '\n') -> ()
183183- | _ -> Buffer.add_char buffer ' '
184184- end;
185185- Buffer.add_string buffer "`";
186186- need_space := false;
187187- Soup.children elem |> Soup.iter process_node;
188188- Buffer.add_string buffer "`";
189189- mark_space_needed ()
190190- | "pre" ->
191191- need_space := false;
192192- Buffer.add_string buffer "\n```\n";
193193- Soup.children elem |> Soup.iter process_node;
194194- Buffer.add_string buffer "\n```\n\n";
195195- need_space := false
196196- | "a" ->
197197- let text = Soup.texts elem |> String.concat " " |> String.trim in
198198- let href = Soup.attribute "href" elem in
199199- (match href with
200200- | Some href ->
201201- (* Add space before link if needed *)
202202- if !need_space then begin
203203- match last_char () with
204204- | Some (' ' | '\n') -> ()
205205- | _ -> Buffer.add_char buffer ' '
206206- end;
207207- need_space := false;
208208-209209- (* Add the link markdown *)
210210- if text = "" then
211211- Buffer.add_string buffer (Printf.sprintf "<%s>" href)
212212- else
213213- Buffer.add_string buffer (Printf.sprintf "[%s](%s)" text href);
214214-215215- (* Mark that space may be needed after link *)
216216- mark_space_needed ()
217217- | None ->
218218- add_text text)
219219- | "ul" | "ol" ->
220220- need_space := false;
221221- Buffer.add_string buffer "\n";
222222- let is_ordered = tag = "ol" in
223223- let items = Soup.children elem |> Soup.to_list in
224224- List.iteri (fun i item ->
225225- match Soup.element item with
226226- | Some li when Soup.name li = "li" ->
227227- need_space := false;
228228- if is_ordered then
229229- Buffer.add_string buffer (Printf.sprintf "%d. " (i + 1))
230230- else
231231- Buffer.add_string buffer "- ";
232232- Soup.children li |> Soup.iter process_node;
233233- Buffer.add_string buffer "\n"
234234- | _ -> ()
235235- ) items;
236236- Buffer.add_string buffer "\n";
237237- need_space := false
238238- | "blockquote" ->
239239- need_space := false;
240240- Buffer.add_string buffer "\n> ";
241241- Soup.children elem |> Soup.iter process_node;
242242- Buffer.add_string buffer "\n\n";
243243- need_space := false
244244- | "img" ->
245245- (* Add space before if needed *)
246246- if !need_space then begin
247247- match last_char () with
248248- | Some (' ' | '\n') -> ()
249249- | _ -> Buffer.add_char buffer ' '
250250- end;
251251- let alt = Soup.attribute "alt" elem |> Option.value ~default:"" in
252252- let src = Soup.attribute "src" elem |> Option.value ~default:"" in
253253- Buffer.add_string buffer (Printf.sprintf "" alt src);
254254- need_space := false;
255255- mark_space_needed ()
256256- | "hr" ->
257257- need_space := false;
258258- Buffer.add_string buffer "\n---\n\n";
259259- need_space := false
260260- (* Strip these tags but keep content *)
261261- | "div" | "span" | "article" | "section" | "header" | "footer"
262262- | "main" | "nav" | "aside" | "figure" | "figcaption" | "details" | "summary" ->
263263- Soup.children elem |> Soup.iter process_node
264264- (* Ignore script, style, etc *)
265265- | "script" | "style" | "noscript" -> ()
266266- (* Default: just process children *)
267267- | _ ->
268268- Soup.children elem |> Soup.iter process_node)
269269- | None ->
270270- (* Text node - handle whitespace properly *)
271271- match Soup.leaf_text node with
272272- | Some text ->
273273- (* If text is only whitespace, mark that we need space *)
274274- let trimmed = String.trim text in
275275- if trimmed = "" then begin
276276- if has_whitespace text then
277277- need_space := true
278278- end else begin
279279- (* Text has content - check if it had leading/trailing whitespace *)
280280- let had_leading_ws = has_whitespace text &&
281281- (String.length text > 0 &&
282282- (text.[0] = ' ' || text.[0] = '\t' || text.[0] = '\n' || text.[0] = '\r')) in
283283-284284- (* If had leading whitespace, mark we need space *)
285285- if had_leading_ws then need_space := true;
286286-287287- (* Add the text content *)
288288- add_text trimmed;
289289-290290- (* If had trailing whitespace, mark we need space for next *)
291291- let had_trailing_ws = has_whitespace text &&
292292- (String.length text > 0 &&
293293- let last = text.[String.length text - 1] in
294294- last = ' ' || last = '\t' || last = '\n' || last = '\r') in
295295- if had_trailing_ws then need_space := true
296296- end
297297- | None -> ()
298298- in
299299-300300- Soup.children soup |> Soup.iter process_node;
301301-302302- (* Clean up the result *)
303303- let result = Buffer.contents buffer in
304304- cleanup_markdown result
305305- with _ -> html_str
306306-307307-(** Convert HTML content to clean Markdown *)
308308-let to_markdown html_str =
309309- html_to_markdown html_str
-7
stack/river/lib/markdown_converter.mli
···11-(** HTML to Markdown converter *)
22-33-(** Extract all links from HTML content as (href, anchor_text) pairs *)
44-val extract_links : string -> (string * string) list
55-66-(** Convert HTML content to clean Markdown format *)
77-val to_markdown : string -> string
-80
stack/river/lib/meta.ml
···11-(*
22- * Copyright (c) 2014, OCaml.org project
33- * Copyright (c) 2015 KC Sivaramakrishnan <sk826@cl.cam.ac.uk>
44- *
55- * Permission to use, copy, modify, and distribute this software for any
66- * purpose with or without fee is hereby granted, provided that the above
77- * copyright notice and this permission notice appear in all copies.
88- *
99- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
1010- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
1111- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
1212- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
1313- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
1414- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
1515- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
1616- *)
1717-1818-(** This module determines an image to be used as preview of a website.
1919-2020- It does this by following the same logic Google+ and other websites use, and
2121- described in this article:
2222- https://www.raymondcamden.com/2011/07/26/How-are-Facebook-and-Google-creating-link-previews *)
2323-2424-let og_image html =
2525- let open Soup in
2626- let soup = parse html in
2727- try soup $ "meta[property=og:image]" |> R.attribute "content" |> Option.some
2828- with Failure _ -> None
2929-3030-let image_src html =
3131- let open Soup in
3232- let soup = parse html in
3333- try soup $ "link[rel=\"image_src\"]" |> R.attribute "href" |> Option.some
3434- with Failure _ -> None
3535-3636-let twitter_image html =
3737- let open Soup in
3838- let soup = parse html in
3939- try
4040- soup $ "meta[name=\"twitter:image\"]" |> R.attribute "content"
4141- |> Option.some
4242- with Failure _ -> None
4343-4444-let og_description html =
4545- let open Soup in
4646- let soup = parse html in
4747- try
4848- soup $ "meta[property=og:description]" |> R.attribute "content"
4949- |> Option.some
5050- with Failure _ -> None
5151-5252-let description html =
5353- let open Soup in
5454- let soup = parse html in
5555- try
5656- soup $ "meta[property=description]" |> R.attribute "content" |> Option.some
5757- with Failure _ -> None
5858-5959-let preview_image html =
6060- let preview_image =
6161- match og_image html with
6262- | None -> (
6363- match image_src html with
6464- | None -> twitter_image html
6565- | Some x -> Some x)
6666- | Some x -> Some x
6767- in
6868- match Option.map String.trim preview_image with
6969- | Some "" -> None
7070- | Some x -> Some x
7171- | None -> None
7272-7373-let description html =
7474- let preview_image =
7575- match og_description html with None -> description html | Some x -> Some x
7676- in
7777- match Option.map String.trim preview_image with
7878- | Some "" -> None
7979- | Some x -> Some x
8080- | None -> None
-449
stack/river/lib/post.ml
···11-(*
22- * Copyright (c) 2014, OCaml.org project
33- * Copyright (c) 2015 KC Sivaramakrishnan <sk826@cl.cam.ac.uk>
44- *
55- * Permission to use, copy, modify, and distribute this software for any
66- * purpose with or without fee is hereby granted, provided that the above
77- * copyright notice and this permission notice appear in all copies.
88- *
99- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
1010- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
1111- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
1212- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
1313- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
1414- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
1515- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
1616- *)
1717-1818-let src = Logs.Src.create "river.post" ~doc:"River post processing"
1919-module Log = (val Logs.src_log src : Logs.LOG)
2020-2121-type t = {
2222- id : string;
2323- title : string;
2424- link : Uri.t option;
2525- date : Syndic.Date.t option;
2626- feed : Feed.t;
2727- author : string;
2828- email : string;
2929- content : Soup.soup Soup.node;
3030- mutable link_response : (string, string) result option;
3131- tags : string list;
3232- summary : string option;
3333-}
3434-3535-(** Generate a stable, unique ID from available data *)
3636-let generate_id ?guid ?link ?title ?date ~feed_url () =
3737- match guid with
3838- | Some id when id <> "" ->
3939- (* Use explicit ID/GUID if available *)
4040- id
4141- | _ ->
4242- match link with
4343- | Some uri when Uri.to_string uri <> "" ->
4444- (* Use permalink as ID (stable and unique) *)
4545- Uri.to_string uri
4646- | _ ->
4747- (* Fallback: hash of feed_url + title + date *)
4848- let title_str = Option.value title ~default:"" in
4949- let date_str =
5050- match date with
5151- | Some d -> Ptime.to_rfc3339 d
5252- | None -> ""
5353- in
5454- let composite = Printf.sprintf "%s|%s|%s" feed_url title_str date_str in
5555- (* Use SHA256 for stable hashing *)
5656- Digest.string composite |> Digest.to_hex
5757-5858-let post_id post = post.id
5959-6060-let resolve_links_attr ~xmlbase attr el =
6161- Soup.R.attribute attr el
6262- |> Uri.of_string
6363- |> Syndic.XML.resolve ~xmlbase
6464- |> Uri.to_string
6565- |> fun value -> Soup.set_attribute attr value el
6666-6767-(* Things that posts should not contain *)
6868-let undesired_tags = [ "style"; "script" ]
6969-let undesired_attr = [ "id" ]
7070-7171-let html_of_text ?xmlbase s =
7272- let soup = Soup.parse s in
7373- let ($$) = Soup.($$) in
7474- soup $$ "a[href]" |> Soup.iter (resolve_links_attr ~xmlbase "href");
7575- soup $$ "img[src]" |> Soup.iter (resolve_links_attr ~xmlbase "src");
7676- undesired_tags |> List.iter (fun tag -> soup $$ tag |> Soup.iter Soup.delete);
7777- soup $$ "*" |> Soup.iter (fun el ->
7878- undesired_attr |> List.iter (fun attr -> Soup.delete_attribute attr el));
7979- soup
8080-8181-(* Do not trust sites using XML for HTML content. Convert to string and parse
8282- back. (Does not always fix bad HTML unfortunately.) *)
8383-let html_of_syndic =
8484- let ns_prefix _ = Some "" in
8585- fun ?xmlbase h ->
8686- html_of_text ?xmlbase
8787- (String.concat "" (List.map (Syndic.XML.to_string ~ns_prefix) h))
8888-8989-let string_of_option = function None -> "" | Some s -> s
9090-9191-(* Email on the forge contain the name in parenthesis *)
9292-let forge_name_re = Str.regexp ".*(\\([^()]*\\))"
9393-9494-let post_compare p1 p2 =
9595- (* Most recent posts first. Posts with no date are always last *)
9696- match (p1.date, p2.date) with
9797- | Some d1, Some d2 -> Syndic.Date.compare d2 d1
9898- | None, Some _ -> 1
9999- | Some _, None -> -1
100100- | None, None -> 1
101101-102102-let rec remove n l =
103103- if n <= 0 then l else match l with [] -> [] | _ :: tl -> remove (n - 1) tl
104104-105105-let rec take n = function
106106- | [] -> []
107107- | e :: tl -> if n > 0 then e :: take (n - 1) tl else []
108108-109109-(* Blog feed
110110- ***********************************************************************)
111111-112112-let post_of_atom ~(feed : Feed.t) (e : Syndic.Atom.entry) =
113113- Log.debug (fun m -> m "Processing Atom entry: %s"
114114- (Util.string_of_text_construct e.title));
115115-116116- let link =
117117- try
118118- Some
119119- (List.find (fun l -> l.Syndic.Atom.rel = Syndic.Atom.Alternate) e.links)
120120- .href
121121- with Not_found -> (
122122- Log.debug (fun m -> m "No alternate link found, trying fallback");
123123- match e.links with
124124- | l :: _ -> Some l.href
125125- | [] -> (
126126- match Uri.scheme e.id with
127127- | Some "http" -> Some e.id
128128- | Some "https" -> Some e.id
129129- | _ -> None))
130130- in
131131- let date =
132132- match e.published with Some _ -> e.published | None -> Some e.updated
133133- in
134134- let content =
135135- match e.content with
136136- | Some (Text s) -> html_of_text s
137137- | Some (Html (xmlbase, s)) -> html_of_text ?xmlbase s
138138- | Some (Xhtml (xmlbase, h)) -> html_of_syndic ?xmlbase h
139139- | Some (Mime _) | Some (Src _) | None -> (
140140- match e.summary with
141141- | Some (Text s) -> html_of_text s
142142- | Some (Html (xmlbase, s)) -> html_of_text ?xmlbase s
143143- | Some (Xhtml (xmlbase, h)) -> html_of_syndic ?xmlbase h
144144- | None -> Soup.parse "")
145145- in
146146- let is_valid_author_name name =
147147- (* Filter out empty strings and placeholder values like "Unknown" *)
148148- let trimmed = String.trim name in
149149- trimmed <> "" && trimmed <> "Unknown"
150150- in
151151- let author_name =
152152- (* Fallback chain for author:
153153- 1. Entry author (if present, not empty, and not "Unknown")
154154- 2. Feed-level author (from Atom feed metadata)
155155- 3. Feed title (from Atom feed metadata)
156156- 4. Source name (manually entered feed name) *)
157157- try
158158- let author, _ = e.authors in
159159- let trimmed = String.trim author.name in
160160- if is_valid_author_name author.name then trimmed
161161- else raise Not_found (* Try feed-level author *)
162162- with Not_found -> (
163163- match feed.content with
164164- | Feed.Atom atom_feed -> (
165165- (* Try feed-level authors *)
166166- match atom_feed.Syndic.Atom.authors with
167167- | author :: _ when is_valid_author_name author.name ->
168168- String.trim author.name
169169- | _ ->
170170- (* Use feed title *)
171171- Util.string_of_text_construct atom_feed.Syndic.Atom.title)
172172- | Feed.Rss2 _ | Feed.Json _ ->
173173- (* For RSS2 and JSONFeed, use the feed name which is the source name *)
174174- feed.name)
175175- in
176176- (* Extract tags from Atom categories *)
177177- let tags =
178178- List.map (fun cat -> cat.Syndic.Atom.term) e.categories
179179- in
180180- (* Extract summary - convert from text_construct to string *)
181181- let summary =
182182- match e.summary with
183183- | Some s -> Some (Util.string_of_text_construct s)
184184- | None -> None
185185- in
186186- (* Generate unique ID *)
187187- let guid = Uri.to_string e.id in
188188- let title_str = Util.string_of_text_construct e.title in
189189- let id =
190190- generate_id ~guid ?link ~title:title_str ?date ~feed_url:feed.url ()
191191- in
192192- {
193193- id;
194194- title = title_str;
195195- link;
196196- date;
197197- feed;
198198- author = author_name;
199199- email = "";
200200- content;
201201- link_response = None;
202202- tags;
203203- summary;
204204- }
205205-206206-let post_of_rss2 ~(feed : Feed.t) it =
207207- let title, content =
208208- match it.Syndic.Rss2.story with
209209- | All (t, xmlbase, d) -> (
210210- ( t,
211211- match it.content with
212212- | _, "" -> html_of_text ?xmlbase d
213213- | xmlbase, c -> html_of_text ?xmlbase c ))
214214- | Title t ->
215215- let xmlbase, c = it.content in
216216- (t, html_of_text ?xmlbase c)
217217- | Description (xmlbase, d) -> (
218218- ( "",
219219- match it.content with
220220- | _, "" -> html_of_text ?xmlbase d
221221- | xmlbase, c -> html_of_text ?xmlbase c ))
222222- in
223223- (* Note: it.link is of type Uri.t option in Syndic *)
224224- let link =
225225- match (it.guid, it.link) with
226226- | Some u, _ when u.permalink -> Some u.data
227227- | _, Some _ -> it.link
228228- | Some u, _ ->
229229- (* Sometimes the guid is indicated with isPermaLink="false" but is
230230- nonetheless the only URL we get (e.g. ocamlpro). *)
231231- Some u.data
232232- | None, None -> None
233233- in
234234- (* Extract GUID string for ID generation *)
235235- let guid_str =
236236- match it.guid with
237237- | Some u -> Some (Uri.to_string u.data)
238238- | None -> None
239239- in
240240- (* RSS2 doesn't have a categories field exposed, use empty list *)
241241- let tags = [] in
242242- (* RSS2 doesn't have a separate summary field, so leave it empty *)
243243- let summary = None in
244244- (* Generate unique ID *)
245245- let id =
246246- generate_id ?guid:guid_str ?link ~title ?date:it.pubDate ~feed_url:feed.url ()
247247- in
248248- {
249249- id;
250250- title;
251251- link;
252252- feed;
253253- author = feed.name;
254254- email = string_of_option it.author;
255255- content;
256256- date = it.pubDate;
257257- link_response = None;
258258- tags;
259259- summary;
260260- }
261261-262262-let post_of_jsonfeed_item ~(feed : Feed.t) (item : Jsonfeed.Item.t) =
263263- Log.debug (fun m -> m "Processing JSONFeed item: %s"
264264- (Option.value (Jsonfeed.Item.title item) ~default:"Untitled"));
265265-266266- (* Extract content - prefer HTML, fall back to text *)
267267- let content =
268268- match Jsonfeed.Item.content item with
269269- | `Html html -> html_of_text html
270270- | `Text text -> html_of_text text
271271- | `Both (html, _text) -> html_of_text html
272272- in
273273-274274- (* Extract author - use first author if multiple *)
275275- let author_name, author_email =
276276- match Jsonfeed.Item.authors item with
277277- | Some (first :: _) ->
278278- let name = Jsonfeed.Author.name first |> Option.value ~default:"" in
279279- (* JSONFeed authors don't typically have email *)
280280- (name, "")
281281- | _ ->
282282- (* Fall back to feed-level authors or feed title *)
283283- (match feed.content with
284284- | Feed.Json jsonfeed ->
285285- (match Jsonfeed.authors jsonfeed with
286286- | Some (first :: _) ->
287287- let name = Jsonfeed.Author.name first |> Option.value ~default:feed.title in
288288- (name, "")
289289- | _ -> (feed.title, ""))
290290- | _ -> (feed.title, ""))
291291- in
292292-293293- (* Link - use url field *)
294294- let link =
295295- Jsonfeed.Item.url item
296296- |> Option.map Uri.of_string
297297- in
298298-299299- (* Date *)
300300- let date = Jsonfeed.Item.date_published item in
301301-302302- (* Summary *)
303303- let summary = Jsonfeed.Item.summary item in
304304-305305- (* Tags *)
306306- let tags =
307307- Jsonfeed.Item.tags item
308308- |> Option.value ~default:[]
309309- in
310310-311311- (* Generate unique ID - JSONFeed items always have an id field (required) *)
312312- let guid = Jsonfeed.Item.id item in
313313- let title_str = Jsonfeed.Item.title item |> Option.value ~default:"Untitled" in
314314- let id =
315315- generate_id ~guid ?link ~title:title_str ?date ~feed_url:feed.url ()
316316- in
317317-318318- {
319319- id;
320320- title = title_str;
321321- link;
322322- date;
323323- feed;
324324- author = author_name;
325325- email = author_email;
326326- content;
327327- link_response = None;
328328- tags;
329329- summary;
330330- }
331331-332332-let posts_of_feed c =
333333- match c.Feed.content with
334334- | Feed.Atom f ->
335335- let posts = List.map (post_of_atom ~feed:c) f.Syndic.Atom.entries in
336336- Log.debug (fun m -> m "Extracted %d posts from Atom feed '%s'"
337337- (List.length posts) c.Feed.name);
338338- posts
339339- | Feed.Rss2 ch ->
340340- let posts = List.map (post_of_rss2 ~feed:c) ch.Syndic.Rss2.items in
341341- Log.debug (fun m -> m "Extracted %d posts from RSS2 feed '%s'"
342342- (List.length posts) c.Feed.name);
343343- posts
344344- | Feed.Json jsonfeed ->
345345- let items = Jsonfeed.items jsonfeed in
346346- let posts = List.map (post_of_jsonfeed_item ~feed:c) items in
347347- Log.debug (fun m -> m "Extracted %d posts from JSONFeed '%s'"
348348- (List.length posts) c.Feed.name);
349349- posts
350350-351351-let mk_entry post =
352352- let content = Syndic.Atom.Html (None, Soup.to_string post.content) in
353353- let contributors =
354354- [ Syndic.Atom.author ~uri:(Uri.of_string post.feed.url) post.feed.name ]
355355- in
356356- let links =
357357- match post.link with
358358- | Some l -> [ Syndic.Atom.link ~rel:Syndic.Atom.Alternate l ]
359359- | None -> []
360360- in
361361- (* TODO: include source *)
362362- let id =
363363- match post.link with
364364- | Some l -> l
365365- | None -> Uri.of_string (Digest.to_hex (Digest.string post.title))
366366- in
367367- let authors = (Syndic.Atom.author ~email:post.email post.author, []) in
368368- let title : Syndic.Atom.text_construct = Syndic.Atom.Text post.title in
369369- let updated =
370370- match post.date with
371371- (* Atom entry requires a date but RSS2 does not. So if a date
372372- * is not available, just capture the current date. *)
373373- | None -> Ptime.of_float_s (Unix.gettimeofday ()) |> Option.get
374374- | Some d -> d
375375- in
376376- Syndic.Atom.entry ~content ~contributors ~links ~id ~authors ~title ~updated
377377- ()
378378-379379-let mk_entries posts = List.map mk_entry posts
380380-381381-let mk_jsonfeed_item post =
382382- (* Convert HTML content back to string *)
383383- let html = Soup.to_string post.content in
384384- let content = `Html html in
385385-386386- (* Create author *)
387387- let authors =
388388- if post.author <> "" then
389389- let author = Jsonfeed.Author.create ~name:post.author () in
390390- Some [author]
391391- else
392392- None
393393- in
394394-395395- (* Create item *)
396396- Jsonfeed.Item.create
397397- ~id:post.id
398398- ~content
399399- ?url:(Option.map Uri.to_string post.link)
400400- ~title:post.title
401401- ?summary:post.summary
402402- ?date_published:post.date
403403- ?authors
404404- ~tags:post.tags
405405- ()
406406-407407-let mk_jsonfeed_items posts = List.map mk_jsonfeed_item posts
408408-409409-let get_posts ?n ?(ofs = 0) planet_feeds =
410410- Log.info (fun m -> m "Processing %d feeds for posts" (List.length planet_feeds));
411411-412412- let posts = List.concat @@ List.map posts_of_feed planet_feeds in
413413- Log.debug (fun m -> m "Total posts collected: %d" (List.length posts));
414414-415415- let posts = List.sort post_compare posts in
416416- Log.debug (fun m -> m "Posts sorted by date (most recent first)");
417417-418418- let posts = remove ofs posts in
419419- let result =
420420- match n with
421421- | None ->
422422- Log.debug (fun m -> m "Returning all %d posts (offset=%d)"
423423- (List.length posts) ofs);
424424- posts
425425- | Some n ->
426426- let limited = take n posts in
427427- Log.debug (fun m -> m "Returning %d posts (requested=%d, offset=%d)"
428428- (List.length limited) n ofs);
429429- limited
430430- in
431431- result
432432-433433-(* Fetch the link response and cache it. *)
434434-(* TODO: This requires environment for HTTP access
435435-let fetch_link env t =
436436- match (t.link, t.link_response) with
437437- | None, _ -> None
438438- | Some _, Some (Ok x) -> Some x
439439- | Some _, Some (Error _) -> None
440440- | Some link, None -> (
441441- try
442442- let response = Http.get env (Uri.to_string link) in
443443- t.link_response <- Some (Ok response);
444444- Some response
445445- with _exn ->
446446- t.link_response <- Some (Error "");
447447- None)
448448-*)
449449-let fetch_link _ = None
+1683-67
stack/river/lib/river.ml
···1515 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
1616 *)
17171818+(** River RSS/Atom/JSONFeed aggregator library *)
1919+1820let src = Logs.Src.create "river" ~doc:"River RSS/Atom aggregator"
1921module Log = (val Logs.src_log src : Logs.LOG)
20222121-(* Keep Client module internal *)
2222-module Internal_client = Client
2323+(** {1 Internal Utilities} *)
2424+2525+module Text_extract = struct
2626+ open Syndic
2727+2828+ (* Remove all tags *)
2929+ let rec syndic_to_buffer b = function
3030+ | XML.Node (_, _, subs) -> List.iter (syndic_to_buffer b) subs
3131+ | XML.Data (_, d) -> Buffer.add_string b d
3232+3333+ let syndic_to_string x =
3434+ let b = Buffer.create 1024 in
3535+ List.iter (syndic_to_buffer b) x;
3636+ Buffer.contents b
3737+3838+ let string_of_text_construct : Atom.text_construct -> string = function
3939+ | Atom.Text s | Atom.Html (_, s) -> s
4040+ | Atom.Xhtml (_, x) -> syndic_to_string x
4141+end
4242+4343+module Html_meta = struct
4444+ [@@@warning "-32"] (* Suppress unused value warnings for internal utilities *)
4545+4646+ (** This module determines an image to be used as preview of a website.
4747+4848+ It does this by following the same logic Google+ and other websites use, and
4949+ described in this article:
5050+ https://www.raymondcamden.com/2011/07/26/How-are-Facebook-and-Google-creating-link-previews *)
5151+5252+ let og_image html =
5353+ let open Soup in
5454+ let soup = parse html in
5555+ try soup $ "meta[property=og:image]" |> R.attribute "content" |> Option.some
5656+ with Failure _ -> None
5757+5858+ let image_src html =
5959+ let open Soup in
6060+ let soup = parse html in
6161+ try soup $ "link[rel=\"image_src\"]" |> R.attribute "href" |> Option.some
6262+ with Failure _ -> None
6363+6464+ let twitter_image html =
6565+ let open Soup in
6666+ let soup = parse html in
6767+ try
6868+ soup $ "meta[name=\"twitter:image\"]" |> R.attribute "content"
6969+ |> Option.some
7070+ with Failure _ -> None
7171+7272+ let og_description html =
7373+ let open Soup in
7474+ let soup = parse html in
7575+ try
7676+ soup $ "meta[property=og:description]" |> R.attribute "content"
7777+ |> Option.some
7878+ with Failure _ -> None
7979+8080+ let description html =
8181+ let open Soup in
8282+ let soup = parse html in
8383+ try
8484+ soup $ "meta[property=description]" |> R.attribute "content" |> Option.some
8585+ with Failure _ -> None
8686+8787+ let preview_image html =
8888+ let preview_image =
8989+ match og_image html with
9090+ | None -> (
9191+ match image_src html with
9292+ | None -> twitter_image html
9393+ | Some x -> Some x)
9494+ | Some x -> Some x
9595+ in
9696+ match Option.map String.trim preview_image with
9797+ | Some "" -> None
9898+ | Some x -> Some x
9999+ | None -> None
100100+101101+ let description html =
102102+ let preview_image =
103103+ match og_description html with None -> description html | Some x -> Some x
104104+ in
105105+ match Option.map String.trim preview_image with
106106+ | Some "" -> None
107107+ | Some x -> Some x
108108+ | None -> None
109109+end
110110+111111+module Html_markdown = struct
112112+ [@@@warning "-32"] (* Suppress unused value warnings for internal utilities *)
113113+114114+ (** HTML to Markdown converter using Lambda Soup *)
115115+116116+ (** Extract all links from HTML content *)
117117+ let extract_links html_str =
118118+ try
119119+ let soup = Soup.parse html_str in
120120+ let links = Soup.select "a[href]" soup in
121121+ Soup.fold (fun acc link ->
122122+ match Soup.attribute "href" link with
123123+ | Some href ->
124124+ let text = Soup.texts link |> String.concat "" |> String.trim in
125125+ (href, text) :: acc
126126+ | None -> acc
127127+ ) [] links
128128+ |> List.rev
129129+ with _ -> []
130130+131131+ (** Check if string contains any whitespace *)
132132+ let has_whitespace s =
133133+ try
134134+ let _ = Str.search_forward (Str.regexp "[ \t\n\r]") s 0 in
135135+ true
136136+ with Not_found -> false
137137+138138+ (** Clean up excessive newlines and normalize spacing *)
139139+ let cleanup_markdown s =
140140+ (* Normalize line endings *)
141141+ let s = Str.global_replace (Str.regexp "\r\n") "\n" s in
142142+143143+ (* Remove trailing whitespace from each line *)
144144+ let lines = String.split_on_char '\n' s in
145145+ let lines = List.map (fun line ->
146146+ (* Trim trailing spaces but preserve leading spaces for indentation *)
147147+ let len = String.length line in
148148+ let rec find_last_non_space i =
149149+ if i < 0 then -1
150150+ else if line.[i] = ' ' || line.[i] = '\t' then find_last_non_space (i - 1)
151151+ else i
152152+ in
153153+ let last = find_last_non_space (len - 1) in
154154+ if last < 0 then ""
155155+ else String.sub line 0 (last + 1)
156156+ ) lines in
157157+158158+ (* Join back and collapse excessive blank lines *)
159159+ let s = String.concat "\n" lines in
160160+161161+ (* Replace 3+ consecutive newlines with just 2 *)
162162+ let s = Str.global_replace (Str.regexp "\n\n\n+") "\n\n" s in
163163+164164+ (* Trim leading and trailing whitespace *)
165165+ String.trim s
166166+167167+ (** Convert HTML to Markdown using state-based whitespace handling *)
168168+ let html_to_markdown html_str =
169169+ try
170170+ let soup = Soup.parse html_str in
171171+ let buffer = Buffer.create 256 in
172172+173173+ (* State: track if we need to insert a space before next text *)
174174+ let need_space = ref false in
175175+176176+ (* Get last character in buffer, if any *)
177177+ let last_char () =
178178+ let len = Buffer.length buffer in
179179+ if len = 0 then None
180180+ else Some (Buffer.nth buffer (len - 1))
181181+ in
182182+183183+ (* Add text with proper spacing *)
184184+ let add_text text =
185185+ let trimmed = String.trim text in
186186+ if trimmed <> "" then begin
187187+ (* Check if text starts with punctuation that shouldn't have space before it *)
188188+ let starts_with_punctuation =
189189+ String.length trimmed > 0 &&
190190+ (match trimmed.[0] with
191191+ | ',' | '.' | ';' | ':' | '!' | '?' | ')' | ']' | '}' -> true
192192+ | _ -> false)
193193+ in
194194+195195+ (* Add space if needed, unless we're before punctuation *)
196196+ if !need_space && not starts_with_punctuation then begin
197197+ match last_char () with
198198+ | Some (' ' | '\n') -> ()
199199+ | _ -> Buffer.add_char buffer ' '
200200+ end;
201201+ Buffer.add_string buffer trimmed;
202202+ need_space := false
203203+ end
204204+ in
205205+206206+ (* Mark that we need space before next text (for inline elements) *)
207207+ let mark_space_needed () =
208208+ need_space := has_whitespace (Buffer.contents buffer) || Buffer.length buffer > 0
209209+ in
210210+211211+ (* Process header with ID/anchor handling *)
212212+ let process_header level elem =
213213+ need_space := false;
214214+215215+ (* Check if header contains a link with an ID fragment *)
216216+ let link_opt = Soup.select_one "a[href]" elem in
217217+ let anchor_id = match link_opt with
218218+ | Some link ->
219219+ (match Soup.attribute "href" link with
220220+ | Some href ->
221221+ (* Extract fragment from URL *)
222222+ let uri = Uri.of_string href in
223223+ Uri.fragment uri
224224+ | None -> None)
225225+ | None -> None
226226+ in
227227+228228+ (* Add anchor if we found an ID *)
229229+ (match anchor_id with
230230+ | Some id when id <> "" ->
231231+ Buffer.add_string buffer (Printf.sprintf "\n<a name=\"%s\"></a>\n" id)
232232+ | _ -> ());
233233+234234+ (* Add the header marker *)
235235+ let marker = String.make level '#' in
236236+ Buffer.add_string buffer ("\n" ^ marker ^ " ");
237237+238238+ (* Get text content, excluding link tags *)
239239+ let text = Soup.texts elem |> String.concat " " |> String.trim in
240240+ Buffer.add_string buffer text;
241241+242242+ Buffer.add_string buffer "\n\n";
243243+ need_space := false
244244+ in
245245+246246+ let rec process_node node =
247247+ match Soup.element node with
248248+ | Some elem ->
249249+ let tag = Soup.name elem in
250250+ (match tag with
251251+ (* Block elements - reset space tracking *)
252252+ | "h1" -> process_header 1 elem
253253+ | "h2" -> process_header 2 elem
254254+ | "h3" -> process_header 3 elem
255255+ | "h4" -> process_header 4 elem
256256+ | "h5" -> process_header 5 elem
257257+ | "h6" -> process_header 6 elem
258258+ | "p" ->
259259+ need_space := false;
260260+ Soup.children elem |> Soup.iter process_node;
261261+ Buffer.add_string buffer "\n\n";
262262+ need_space := false
263263+ | "br" ->
264264+ Buffer.add_string buffer "\n";
265265+ need_space := false
266266+ (* Inline elements - preserve space tracking *)
267267+ | "strong" | "b" ->
268268+ (* Add space before if needed *)
269269+ if !need_space then begin
270270+ match last_char () with
271271+ | Some (' ' | '\n') -> ()
272272+ | _ -> Buffer.add_char buffer ' '
273273+ end;
274274+ Buffer.add_string buffer "**";
275275+ need_space := false;
276276+ Soup.children elem |> Soup.iter process_node;
277277+ Buffer.add_string buffer "**";
278278+ mark_space_needed ()
279279+ | "em" | "i" ->
280280+ (* Add space before if needed *)
281281+ if !need_space then begin
282282+ match last_char () with
283283+ | Some (' ' | '\n') -> ()
284284+ | _ -> Buffer.add_char buffer ' '
285285+ end;
286286+ Buffer.add_string buffer "*";
287287+ need_space := false;
288288+ Soup.children elem |> Soup.iter process_node;
289289+ Buffer.add_string buffer "*";
290290+ mark_space_needed ()
291291+ | "code" ->
292292+ (* Add space before if needed *)
293293+ if !need_space then begin
294294+ match last_char () with
295295+ | Some (' ' | '\n') -> ()
296296+ | _ -> Buffer.add_char buffer ' '
297297+ end;
298298+ Buffer.add_string buffer "`";
299299+ need_space := false;
300300+ Soup.children elem |> Soup.iter process_node;
301301+ Buffer.add_string buffer "`";
302302+ mark_space_needed ()
303303+ | "pre" ->
304304+ need_space := false;
305305+ Buffer.add_string buffer "\n```\n";
306306+ Soup.children elem |> Soup.iter process_node;
307307+ Buffer.add_string buffer "\n```\n\n";
308308+ need_space := false
309309+ | "a" ->
310310+ let text = Soup.texts elem |> String.concat " " |> String.trim in
311311+ let href = Soup.attribute "href" elem in
312312+ (match href with
313313+ | Some href ->
314314+ (* Add space before link if needed *)
315315+ if !need_space then begin
316316+ match last_char () with
317317+ | Some (' ' | '\n') -> ()
318318+ | _ -> Buffer.add_char buffer ' '
319319+ end;
320320+ need_space := false;
321321+322322+ (* Add the link markdown *)
323323+ if text = "" then
324324+ Buffer.add_string buffer (Printf.sprintf "<%s>" href)
325325+ else
326326+ Buffer.add_string buffer (Printf.sprintf "[%s](%s)" text href);
327327+328328+ (* Mark that space may be needed after link *)
329329+ mark_space_needed ()
330330+ | None ->
331331+ add_text text)
332332+ | "ul" | "ol" ->
333333+ need_space := false;
334334+ Buffer.add_string buffer "\n";
335335+ let is_ordered = tag = "ol" in
336336+ let items = Soup.children elem |> Soup.to_list in
337337+ List.iteri (fun i item ->
338338+ match Soup.element item with
339339+ | Some li when Soup.name li = "li" ->
340340+ need_space := false;
341341+ if is_ordered then
342342+ Buffer.add_string buffer (Printf.sprintf "%d. " (i + 1))
343343+ else
344344+ Buffer.add_string buffer "- ";
345345+ Soup.children li |> Soup.iter process_node;
346346+ Buffer.add_string buffer "\n"
347347+ | _ -> ()
348348+ ) items;
349349+ Buffer.add_string buffer "\n";
350350+ need_space := false
351351+ | "blockquote" ->
352352+ need_space := false;
353353+ Buffer.add_string buffer "\n> ";
354354+ Soup.children elem |> Soup.iter process_node;
355355+ Buffer.add_string buffer "\n\n";
356356+ need_space := false
357357+ | "img" ->
358358+ (* Add space before if needed *)
359359+ if !need_space then begin
360360+ match last_char () with
361361+ | Some (' ' | '\n') -> ()
362362+ | _ -> Buffer.add_char buffer ' '
363363+ end;
364364+ let alt = Soup.attribute "alt" elem |> Option.value ~default:"" in
365365+ let src = Soup.attribute "src" elem |> Option.value ~default:"" in
366366+ Buffer.add_string buffer (Printf.sprintf "" alt src);
367367+ need_space := false;
368368+ mark_space_needed ()
369369+ | "hr" ->
370370+ need_space := false;
371371+ Buffer.add_string buffer "\n---\n\n";
372372+ need_space := false
373373+ (* Strip these tags but keep content *)
374374+ | "div" | "span" | "article" | "section" | "header" | "footer"
375375+ | "main" | "nav" | "aside" | "figure" | "figcaption" | "details" | "summary" ->
376376+ Soup.children elem |> Soup.iter process_node
377377+ (* Ignore script, style, etc *)
378378+ | "script" | "style" | "noscript" -> ()
379379+ (* Default: just process children *)
380380+ | _ ->
381381+ Soup.children elem |> Soup.iter process_node)
382382+ | None ->
383383+ (* Text node - handle whitespace properly *)
384384+ match Soup.leaf_text node with
385385+ | Some text ->
386386+ (* If text is only whitespace, mark that we need space *)
387387+ let trimmed = String.trim text in
388388+ if trimmed = "" then begin
389389+ if has_whitespace text then
390390+ need_space := true
391391+ end else begin
392392+ (* Text has content - check if it had leading/trailing whitespace *)
393393+ let had_leading_ws = has_whitespace text &&
394394+ (String.length text > 0 &&
395395+ (text.[0] = ' ' || text.[0] = '\t' || text.[0] = '\n' || text.[0] = '\r')) in
396396+397397+ (* If had leading whitespace, mark we need space *)
398398+ if had_leading_ws then need_space := true;
399399+400400+ (* Add the text content *)
401401+ add_text trimmed;
402402+403403+ (* If had trailing whitespace, mark we need space for next *)
404404+ let had_trailing_ws = has_whitespace text &&
405405+ (String.length text > 0 &&
406406+ let last = text.[String.length text - 1] in
407407+ last = ' ' || last = '\t' || last = '\n' || last = '\r') in
408408+ if had_trailing_ws then need_space := true
409409+ end
410410+ | None -> ()
411411+ in
412412+413413+ Soup.children soup |> Soup.iter process_node;
414414+415415+ (* Clean up the result *)
416416+ let result = Buffer.contents buffer in
417417+ cleanup_markdown result
418418+ with _ -> html_str
419419+420420+ (** Convert HTML content to clean Markdown *)
421421+ let to_markdown html_str =
422422+ html_to_markdown html_str
423423+end
424424+425425+(** {1 Feed Sources} *)
426426+427427+module Source = struct
428428+ type t = {
429429+ name : string;
430430+ url : string;
431431+ }
432432+433433+ let make ~name ~url = { name; url }
434434+435435+ let name t = t.name
436436+ let url t = t.url
437437+438438+ let jsont =
439439+ let make name url = { name; url } in
440440+ Jsont.Object.map ~kind:"Source" make
441441+ |> Jsont.Object.mem "name" Jsont.string ~enc:(fun s -> s.name)
442442+ |> Jsont.Object.mem "url" Jsont.string ~enc:(fun s -> s.url)
443443+ |> Jsont.Object.finish
444444+end
445445+446446+(** {1 HTTP Session Management} *)
447447+448448+module Session = struct
449449+ type t = {
450450+ session : (float Eio.Time.clock_ty Eio.Resource.t,
451451+ [`Generic | `Unix] Eio.Net.ty Eio.Resource.t) Requests.t;
452452+ }
453453+454454+ let init ~sw env =
455455+ Log.info (fun m -> m "Initializing River session");
456456+ let session = Requests.create ~sw
457457+ ~default_headers:(Requests.Headers.of_list [
458458+ ("User-Agent", "OCaml-River/1.0");
459459+ ])
460460+ ~follow_redirects:true
461461+ ~max_redirects:5
462462+ ~verify_tls:true
463463+ env
464464+ in
465465+ { session }
466466+467467+ let with_session env f =
468468+ Log.info (fun m -> m "Creating River session");
469469+ Eio.Switch.run @@ fun sw ->
470470+ let client = init ~sw env in
471471+ f client
472472+473473+ let get_requests_session t = t.session
474474+end
475475+476476+(** {1 Feeds and Posts} *)
477477+478478+module Feed = struct
479479+ type feed_content =
480480+ | Atom of Syndic.Atom.feed
481481+ | Rss2 of Syndic.Rss2.channel
482482+ | Json of Jsonfeed.t
483483+484484+ type t = {
485485+ source : Source.t;
486486+ title : string;
487487+ content : feed_content;
488488+ }
489489+490490+ let string_of_feed = function
491491+ | Atom _ -> "Atom"
492492+ | Rss2 _ -> "Rss2"
493493+ | Json _ -> "JSONFeed"
494494+495495+ let classify_feed ~xmlbase (body : string) =
496496+ Log.debug (fun m -> m "Attempting to parse feed (%d bytes)" (String.length body));
497497+498498+ (* Quick check - does it look like JSON? *)
499499+ let looks_like_json =
500500+ String.length body > 0 &&
501501+ let first_char = String.get body 0 in
502502+ first_char = '{' || first_char = '['
503503+ in
504504+505505+ if looks_like_json then (
506506+ (* Try JSONFeed first *)
507507+ Log.debug (fun m -> m "Body looks like JSON, trying JSONFeed parser");
508508+ match Jsonfeed.of_string body with
509509+ | Ok jsonfeed ->
510510+ Log.debug (fun m -> m "Successfully parsed as JSONFeed");
511511+ Json jsonfeed
512512+ | Error err ->
513513+ Log.debug (fun m -> m "Not a JSONFeed: %s" (Jsont.Error.to_string err));
514514+ (* Fall through to XML parsing *)
515515+ failwith "Not a valid JSONFeed"
516516+ ) else (
517517+ (* Try XML formats *)
518518+ try
519519+ let feed = Atom (Syndic.Atom.parse ~xmlbase (Xmlm.make_input (`String (0, body)))) in
520520+ Log.debug (fun m -> m "Successfully parsed as Atom feed");
521521+ feed
522522+ with
523523+ | Syndic.Atom.Error.Error (pos, msg) -> (
524524+ Log.debug (fun m -> m "Not an Atom feed: %s at position (%d, %d)"
525525+ msg (fst pos) (snd pos));
526526+ try
527527+ let feed = Rss2 (Syndic.Rss2.parse ~xmlbase (Xmlm.make_input (`String (0, body)))) in
528528+ Log.debug (fun m -> m "Successfully parsed as RSS2 feed");
529529+ feed
530530+ with Syndic.Rss2.Error.Error (pos, msg) ->
531531+ Log.err (fun m -> m "Failed to parse as RSS2: %s at position (%d, %d)"
532532+ msg (fst pos) (snd pos));
533533+ failwith "Neither Atom nor RSS2 feed")
534534+ | Not_found as e ->
535535+ Log.err (fun m -> m "Not_found exception during Atom feed parsing");
536536+ Log.err (fun m -> m "Backtrace:\n%s" (Printexc.get_backtrace ()));
537537+ raise e
538538+ | e ->
539539+ Log.err (fun m -> m "Unexpected exception during feed parsing: %s"
540540+ (Printexc.to_string e));
541541+ Log.err (fun m -> m "Backtrace:\n%s" (Printexc.get_backtrace ()));
542542+ raise e
543543+ )
544544+545545+ let fetch session source =
546546+ Log.info (fun m -> m "Fetching feed: %s" (Source.name source));
547547+548548+ let xmlbase = Uri.of_string (Source.url source) in
549549+550550+ (* Use Requests_json_api.get_result for clean Result-based error handling *)
551551+ let requests_session = Session.get_requests_session session in
552552+ let response =
553553+ match Requests_json_api.get_result requests_session (Source.url source) with
554554+ | Ok body ->
555555+ Log.info (fun m -> m "Successfully fetched %s (%d bytes)"
556556+ (Source.url source) (String.length body));
557557+ body
558558+ | Error (status, msg) ->
559559+ Log.err (fun m -> m "Failed to fetch feed '%s': HTTP %d - %s"
560560+ (Source.name source) status msg);
561561+ failwith (Printf.sprintf "HTTP %d: %s" status msg)
562562+ in
563563+564564+ let content = classify_feed ~xmlbase response in
565565+ let title =
566566+ match content with
567567+ | Atom atom -> Text_extract.string_of_text_construct atom.Syndic.Atom.title
568568+ | Rss2 ch -> ch.Syndic.Rss2.title
569569+ | Json jsonfeed -> Jsonfeed.title jsonfeed
570570+ in
571571+572572+ Log.info (fun m -> m "Successfully fetched %s feed '%s' (title: '%s')"
573573+ (string_of_feed content) (Source.name source) title);
574574+575575+ { source; title; content }
576576+577577+ let source t = t.source
578578+end
579579+580580+(** {1 Posts} *)
235812424-(* Abstract session type *)
2525-type session = Client.t
582582+module Post = struct
583583+ type t = {
584584+ id : string;
585585+ title : string;
586586+ link : Uri.t option;
587587+ date : Syndic.Date.t option;
588588+ feed : Feed.t;
589589+ author : string;
590590+ email : string;
591591+ content : Soup.soup Soup.node;
592592+ mutable link_response : (string, string) result option;
593593+ tags : string list;
594594+ summary : string option;
595595+ }
265962727-type source = Feed.source = { name : string; url : string }
2828-type feed = Feed.t
2929-type post = Post.t
597597+ (** Generate a stable, unique ID from available data *)
598598+ let generate_id ?guid ?link ?title ?date ~feed_url () =
599599+ match guid with
600600+ | Some id when id <> "" ->
601601+ (* Use explicit ID/GUID if available *)
602602+ id
603603+ | _ ->
604604+ match link with
605605+ | Some uri when Uri.to_string uri <> "" ->
606606+ (* Use permalink as ID (stable and unique) *)
607607+ Uri.to_string uri
608608+ | _ ->
609609+ (* Fallback: hash of feed_url + title + date *)
610610+ let title_str = Option.value title ~default:"" in
611611+ let date_str =
612612+ match date with
613613+ | Some d -> Ptime.to_rfc3339 d
614614+ | None -> ""
615615+ in
616616+ let composite = Printf.sprintf "%s|%s|%s" feed_url title_str date_str in
617617+ (* Use SHA256 for stable hashing *)
618618+ Digest.string composite |> Digest.to_hex
306193131-(* Session management *)
3232-let init ~sw env =
3333- Log.info (fun m -> m "Initializing River session");
3434- Internal_client.create ~sw env
620620+ let resolve_links_attr ~xmlbase attr el =
621621+ Soup.R.attribute attr el
622622+ |> Uri.of_string
623623+ |> Syndic.XML.resolve ~xmlbase
624624+ |> Uri.to_string
625625+ |> fun value -> Soup.set_attribute attr value el
356263636-let with_session env f =
3737- Log.info (fun m -> m "Creating River session");
3838- Internal_client.with_client env f
627627+ (* Things that posts should not contain *)
628628+ let undesired_tags = [ "style"; "script" ]
629629+ let undesired_attr = [ "id" ]
396304040-(* Feed operations *)
4141-let fetch session source =
4242- Log.info (fun m -> m "Fetching feed: %s" source.name);
4343- Feed.fetch session source
631631+ let html_of_text ?xmlbase s =
632632+ let soup = Soup.parse s in
633633+ let ($$) = Soup.($$) in
634634+ soup $$ "a[href]" |> Soup.iter (resolve_links_attr ~xmlbase "href");
635635+ soup $$ "img[src]" |> Soup.iter (resolve_links_attr ~xmlbase "src");
636636+ undesired_tags |> List.iter (fun tag -> soup $$ tag |> Soup.iter Soup.delete);
637637+ soup $$ "*" |> Soup.iter (fun el ->
638638+ undesired_attr |> List.iter (fun attr -> Soup.delete_attribute attr el));
639639+ soup
446404545-let name feed = feed.Feed.name
4646-let url feed = feed.Feed.url
641641+ (* Do not trust sites using XML for HTML content. Convert to string and parse
642642+ back. (Does not always fix bad HTML unfortunately.) *)
643643+ let html_of_syndic =
644644+ let ns_prefix _ = Some "" in
645645+ fun ?xmlbase h ->
646646+ html_of_text ?xmlbase
647647+ (String.concat "" (List.map (Syndic.XML.to_string ~ns_prefix) h))
476484848-let posts feeds =
4949- Log.info (fun m -> m "Aggregating posts from %d feed(s)" (List.length feeds));
5050- let result = Post.get_posts feeds in
5151- Log.info (fun m -> m "Aggregated %d posts total" (List.length result));
5252- result
649649+ let string_of_option = function None -> "" | Some s -> s
536505454-let title post = post.Post.title
5555-let link post = post.Post.link
5656-let date post = post.Post.date
5757-let feed post = post.Post.feed
5858-let author post = post.Post.author
5959-let email post = post.Post.email
6060-let content post = Soup.to_string post.Post.content
6161-let id post = post.Post.id
6262-let tags post = post.Post.tags
6363-let summary post = post.Post.summary
651651+ let post_compare p1 p2 =
652652+ (* Most recent posts first. Posts with no date are always last *)
653653+ match (p1.date, p2.date) with
654654+ | Some d1, Some d2 -> Syndic.Date.compare d2 d1
655655+ | None, Some _ -> 1
656656+ | Some _, None -> -1
657657+ | None, None -> 1
646586565-let meta_description _post =
6666- (* TODO: This requires environment for HTTP access *)
6767- Log.debug (fun m -> m "meta_description not implemented (requires environment)");
6868- None
659659+ let rec remove n l =
660660+ if n <= 0 then l else match l with [] -> [] | _ :: tl -> remove (n - 1) tl
696617070-let seo_image _post =
7171- (* TODO: This requires environment for HTTP access *)
7272- Log.debug (fun m -> m "seo_image not implemented (requires environment)");
7373- None
662662+ let rec take n = function
663663+ | [] -> []
664664+ | e :: tl -> if n > 0 then e :: take (n - 1) tl else []
746657575-let create_atom_entries posts =
7676- Log.info (fun m -> m "Creating Atom entries for %d posts" (List.length posts));
7777- Post.mk_entries posts
666666+ let post_of_atom ~(feed : Feed.t) (e : Syndic.Atom.entry) =
667667+ Log.debug (fun m -> m "Processing Atom entry: %s"
668668+ (Text_extract.string_of_text_construct e.title));
786697979-(* JSONFeed support *)
8080-let create_jsonfeed_items posts =
8181- Log.info (fun m -> m "Creating JSONFeed items for %d posts" (List.length posts));
8282- Post.mk_jsonfeed_items posts
670670+ let link =
671671+ try
672672+ Some
673673+ (List.find (fun l -> l.Syndic.Atom.rel = Syndic.Atom.Alternate) e.links)
674674+ .href
675675+ with Not_found -> (
676676+ Log.debug (fun m -> m "No alternate link found, trying fallback");
677677+ match e.links with
678678+ | l :: _ -> Some l.href
679679+ | [] -> (
680680+ match Uri.scheme e.id with
681681+ | Some "http" -> Some e.id
682682+ | Some "https" -> Some e.id
683683+ | _ -> None))
684684+ in
685685+ let date =
686686+ match e.published with Some _ -> e.published | None -> Some e.updated
687687+ in
688688+ let content =
689689+ match e.content with
690690+ | Some (Text s) -> html_of_text s
691691+ | Some (Html (xmlbase, s)) -> html_of_text ?xmlbase s
692692+ | Some (Xhtml (xmlbase, h)) -> html_of_syndic ?xmlbase h
693693+ | Some (Mime _) | Some (Src _) | None -> (
694694+ match e.summary with
695695+ | Some (Text s) -> html_of_text s
696696+ | Some (Html (xmlbase, s)) -> html_of_text ?xmlbase s
697697+ | Some (Xhtml (xmlbase, h)) -> html_of_syndic ?xmlbase h
698698+ | None -> Soup.parse "")
699699+ in
700700+ let is_valid_author_name name =
701701+ (* Filter out empty strings and placeholder values like "Unknown" *)
702702+ let trimmed = String.trim name in
703703+ trimmed <> "" && trimmed <> "Unknown"
704704+ in
705705+ let author_name =
706706+ (* Fallback chain for author:
707707+ 1. Entry author (if present, not empty, and not "Unknown")
708708+ 2. Feed-level author (from Atom feed metadata)
709709+ 3. Feed title (from Atom feed metadata)
710710+ 4. Source name (manually entered feed name) *)
711711+ try
712712+ let author, _ = e.authors in
713713+ let trimmed = String.trim author.name in
714714+ if is_valid_author_name author.name then trimmed
715715+ else raise Not_found (* Try feed-level author *)
716716+ with Not_found -> (
717717+ match feed.content with
718718+ | Feed.Atom atom_feed -> (
719719+ (* Try feed-level authors *)
720720+ match atom_feed.Syndic.Atom.authors with
721721+ | author :: _ when is_valid_author_name author.name ->
722722+ String.trim author.name
723723+ | _ ->
724724+ (* Use feed title *)
725725+ Text_extract.string_of_text_construct atom_feed.Syndic.Atom.title)
726726+ | Feed.Rss2 _ | Feed.Json _ ->
727727+ (* For RSS2 and JSONFeed, use the source name *)
728728+ Source.name feed.source)
729729+ in
730730+ (* Extract tags from Atom categories *)
731731+ let tags =
732732+ List.map (fun cat -> cat.Syndic.Atom.term) e.categories
733733+ in
734734+ (* Extract summary - convert from text_construct to string *)
735735+ let summary =
736736+ match e.summary with
737737+ | Some s -> Some (Text_extract.string_of_text_construct s)
738738+ | None -> None
739739+ in
740740+ (* Generate unique ID *)
741741+ let guid = Uri.to_string e.id in
742742+ let title_str = Text_extract.string_of_text_construct e.title in
743743+ let id =
744744+ generate_id ~guid ?link ~title:title_str ?date
745745+ ~feed_url:(Source.url feed.source) ()
746746+ in
747747+ {
748748+ id;
749749+ title = title_str;
750750+ link;
751751+ date;
752752+ feed;
753753+ author = author_name;
754754+ email = "";
755755+ content;
756756+ link_response = None;
757757+ tags;
758758+ summary;
759759+ }
837608484-let create_jsonfeed ~title ?home_page_url ?feed_url ?description ?icon ?favicon posts =
8585- Log.info (fun m -> m "Creating JSONFeed with %d posts" (List.length posts));
8686- let items = create_jsonfeed_items posts in
8787- Jsonfeed.create ~title ?home_page_url ?feed_url ?description ?icon ?favicon ~items ()
761761+ let post_of_rss2 ~(feed : Feed.t) it =
762762+ let title, content =
763763+ match it.Syndic.Rss2.story with
764764+ | All (t, xmlbase, d) -> (
765765+ ( t,
766766+ match it.content with
767767+ | _, "" -> html_of_text ?xmlbase d
768768+ | xmlbase, c -> html_of_text ?xmlbase c ))
769769+ | Title t ->
770770+ let xmlbase, c = it.content in
771771+ (t, html_of_text ?xmlbase c)
772772+ | Description (xmlbase, d) -> (
773773+ ( "",
774774+ match it.content with
775775+ | _, "" -> html_of_text ?xmlbase d
776776+ | xmlbase, c -> html_of_text ?xmlbase c ))
777777+ in
778778+ (* Note: it.link is of type Uri.t option in Syndic *)
779779+ let link =
780780+ match (it.guid, it.link) with
781781+ | Some u, _ when u.permalink -> Some u.data
782782+ | _, Some _ -> it.link
783783+ | Some u, _ ->
784784+ (* Sometimes the guid is indicated with isPermaLink="false" but is
785785+ nonetheless the only URL we get (e.g. ocamlpro). *)
786786+ Some u.data
787787+ | None, None -> None
788788+ in
789789+ (* Extract GUID string for ID generation *)
790790+ let guid_str =
791791+ match it.guid with
792792+ | Some u -> Some (Uri.to_string u.data)
793793+ | None -> None
794794+ in
795795+ (* RSS2 doesn't have a categories field exposed, use empty list *)
796796+ let tags = [] in
797797+ (* RSS2 doesn't have a separate summary field, so leave it empty *)
798798+ let summary = None in
799799+ (* Generate unique ID *)
800800+ let id =
801801+ generate_id ?guid:guid_str ?link ~title ?date:it.pubDate
802802+ ~feed_url:(Source.url feed.source) ()
803803+ in
804804+ {
805805+ id;
806806+ title;
807807+ link;
808808+ feed;
809809+ author = Source.name feed.source;
810810+ email = string_of_option it.author;
811811+ content;
812812+ date = it.pubDate;
813813+ link_response = None;
814814+ tags;
815815+ summary;
816816+ }
888178989-let jsonfeed_to_string ?(minify = false) jsonfeed =
9090- match Jsonfeed.to_string ~minify jsonfeed with
9191- | Ok s -> Ok s
9292- | Error err -> Error (Jsont.Error.to_string err)
818818+ let post_of_jsonfeed_item ~(feed : Feed.t) (item : Jsonfeed.Item.t) =
819819+ Log.debug (fun m -> m "Processing JSONFeed item: %s"
820820+ (Option.value (Jsonfeed.Item.title item) ~default:"Untitled"));
938219494-type feed_content =
9595- | Atom of Syndic.Atom.feed
9696- | Rss2 of Syndic.Rss2.channel
9797- | JSONFeed of Jsonfeed.t
822822+ (* Extract content - prefer HTML, fall back to text *)
823823+ let content =
824824+ match Jsonfeed.Item.content item with
825825+ | `Html html -> html_of_text html
826826+ | `Text text -> html_of_text text
827827+ | `Both (html, _text) -> html_of_text html
828828+ in
988299999-let feed_content feed =
100100- match feed.Feed.content with
101101- | Feed.Atom f -> Atom f
102102- | Feed.Rss2 ch -> Rss2 ch
103103- | Feed.Json jf -> JSONFeed jf830830+ (* Extract author - use first author if multiple *)
831831+ let author_name, author_email =
832832+ match Jsonfeed.Item.authors item with
833833+ | Some (first :: _) ->
834834+ let name = Jsonfeed.Author.name first |> Option.value ~default:"" in
835835+ (* JSONFeed authors don't typically have email *)
836836+ (name, "")
837837+ | _ ->
838838+ (* Fall back to feed-level authors or feed title *)
839839+ (match feed.content with
840840+ | Feed.Json jsonfeed ->
841841+ (match Jsonfeed.authors jsonfeed with
842842+ | Some (first :: _) ->
843843+ let name = Jsonfeed.Author.name first |> Option.value ~default:feed.title in
844844+ (name, "")
845845+ | _ -> (feed.title, ""))
846846+ | _ -> (feed.title, ""))
847847+ in
848848+849849+ (* Link - use url field *)
850850+ let link =
851851+ Jsonfeed.Item.url item
852852+ |> Option.map Uri.of_string
853853+ in
854854+855855+ (* Date *)
856856+ let date = Jsonfeed.Item.date_published item in
857857+858858+ (* Summary *)
859859+ let summary = Jsonfeed.Item.summary item in
860860+861861+ (* Tags *)
862862+ let tags =
863863+ Jsonfeed.Item.tags item
864864+ |> Option.value ~default:[]
865865+ in
866866+867867+ (* Generate unique ID - JSONFeed items always have an id field (required) *)
868868+ let guid = Jsonfeed.Item.id item in
869869+ let title_str = Jsonfeed.Item.title item |> Option.value ~default:"Untitled" in
870870+ let id =
871871+ generate_id ~guid ?link ~title:title_str ?date
872872+ ~feed_url:(Source.url feed.source) ()
873873+ in
874874+875875+ {
876876+ id;
877877+ title = title_str;
878878+ link;
879879+ date;
880880+ feed;
881881+ author = author_name;
882882+ email = author_email;
883883+ content;
884884+ link_response = None;
885885+ tags;
886886+ summary;
887887+ }
888888+889889+ let posts_of_feed c =
890890+ match c.Feed.content with
891891+ | Feed.Atom f ->
892892+ let posts = List.map (post_of_atom ~feed:c) f.Syndic.Atom.entries in
893893+ Log.debug (fun m -> m "Extracted %d posts from Atom feed '%s'"
894894+ (List.length posts) (Source.name c.source));
895895+ posts
896896+ | Feed.Rss2 ch ->
897897+ let posts = List.map (post_of_rss2 ~feed:c) ch.Syndic.Rss2.items in
898898+ Log.debug (fun m -> m "Extracted %d posts from RSS2 feed '%s'"
899899+ (List.length posts) (Source.name c.source));
900900+ posts
901901+ | Feed.Json jsonfeed ->
902902+ let items = Jsonfeed.items jsonfeed in
903903+ let posts = List.map (post_of_jsonfeed_item ~feed:c) items in
904904+ Log.debug (fun m -> m "Extracted %d posts from JSONFeed '%s'"
905905+ (List.length posts) (Source.name c.source));
906906+ posts
907907+908908+ let get_posts ?n ?(ofs = 0) planet_feeds =
909909+ Log.info (fun m -> m "Processing %d feeds for posts" (List.length planet_feeds));
910910+911911+ let posts = List.concat @@ List.map posts_of_feed planet_feeds in
912912+ Log.debug (fun m -> m "Total posts collected: %d" (List.length posts));
913913+914914+ let posts = List.sort post_compare posts in
915915+ Log.debug (fun m -> m "Posts sorted by date (most recent first)");
916916+917917+ let posts = remove ofs posts in
918918+ let result =
919919+ match n with
920920+ | None ->
921921+ Log.debug (fun m -> m "Returning all %d posts (offset=%d)"
922922+ (List.length posts) ofs);
923923+ posts
924924+ | Some n ->
925925+ let limited = take n posts in
926926+ Log.debug (fun m -> m "Returning %d posts (requested=%d, offset=%d)"
927927+ (List.length limited) n ofs);
928928+ limited
929929+ in
930930+ result
931931+932932+ let of_feeds feeds = get_posts feeds
933933+934934+ let feed t = t.feed
935935+ let title t = t.title
936936+ let link t = t.link
937937+ let date t = t.date
938938+ let author t = t.author
939939+ let email t = t.email
940940+ let content t = Soup.to_string t.content
941941+ let id t = t.id
942942+ let tags t = t.tags
943943+ let summary t = t.summary
944944+945945+ let meta_description _t =
946946+ (* TODO: This requires environment for HTTP access *)
947947+ Log.debug (fun m -> m "meta_description not implemented (requires environment)");
948948+ None
949949+950950+ let seo_image _t =
951951+ (* TODO: This requires environment for HTTP access *)
952952+ Log.debug (fun m -> m "seo_image not implemented (requires environment)");
953953+ None
954954+end
955955+956956+(** {1 Format Conversion and Export} *)
957957+958958+module Format = struct
959959+ module Atom = struct
960960+ let entry_of_post post =
961961+ let content = Syndic.Atom.Html (None, Post.content post) in
962962+ let contributors =
963963+ [ Syndic.Atom.author ~uri:(Uri.of_string (Source.url (Feed.source (Post.feed post))))
964964+ (Source.name (Feed.source (Post.feed post))) ]
965965+ in
966966+ let links =
967967+ match Post.link post with
968968+ | Some l -> [ Syndic.Atom.link ~rel:Syndic.Atom.Alternate l ]
969969+ | None -> []
970970+ in
971971+ let id =
972972+ match Post.link post with
973973+ | Some l -> l
974974+ | None -> Uri.of_string (Digest.to_hex (Digest.string (Post.title post)))
975975+ in
976976+ let authors = (Syndic.Atom.author ~email:(Post.email post) (Post.author post), []) in
977977+ let title : Syndic.Atom.text_construct = Syndic.Atom.Text (Post.title post) in
978978+ let updated =
979979+ match Post.date post with
980980+ (* Atom entry requires a date but RSS2 does not. So if a date
981981+ * is not available, just capture the current date. *)
982982+ | None -> Ptime.of_float_s (Unix.gettimeofday ()) |> Option.get
983983+ | Some d -> d
984984+ in
985985+ Syndic.Atom.entry ~content ~contributors ~links ~id ~authors ~title ~updated
986986+ ()
987987+988988+ let entries_of_posts posts = List.map entry_of_post posts
989989+990990+ let feed_of_entries ~title ?id ?(authors = []) entries =
991991+ let feed_id = match id with
992992+ | Some i -> Uri.of_string i
993993+ | None -> Uri.of_string "urn:river:merged"
994994+ in
995995+ let feed_authors = List.map (fun (name, email) ->
996996+ match email with
997997+ | Some e -> Syndic.Atom.author ~email:e name
998998+ | None -> Syndic.Atom.author name
999999+ ) authors in
10001000+ {
10011001+ Syndic.Atom.id = feed_id;
10021002+ title = Syndic.Atom.Text title;
10031003+ updated = Ptime.of_float_s (Unix.time ()) |> Option.get;
10041004+ entries;
10051005+ authors = feed_authors;
10061006+ categories = [];
10071007+ contributors = [];
10081008+ generator = Some {
10091009+ Syndic.Atom.version = Some "1.0";
10101010+ uri = None;
10111011+ content = "River Feed Aggregator";
10121012+ };
10131013+ icon = None;
10141014+ links = [];
10151015+ logo = None;
10161016+ rights = None;
10171017+ subtitle = None;
10181018+ }
10191019+10201020+ let to_string feed =
10211021+ let output = Buffer.create 4096 in
10221022+ Syndic.Atom.output feed (`Buffer output);
10231023+ Buffer.contents output
10241024+ end
10251025+10261026+ module Rss2 = struct
10271027+ let of_feed feed =
10281028+ match feed.Feed.content with
10291029+ | Feed.Rss2 ch -> Some ch
10301030+ | _ -> None
10311031+ end
10321032+10331033+ module Jsonfeed = struct
10341034+ let item_of_post post =
10351035+ (* Convert HTML content back to string *)
10361036+ let html = Post.content post in
10371037+ let content = `Html html in
10381038+10391039+ (* Create author *)
10401040+ let authors =
10411041+ if Post.author post <> "" then
10421042+ let author = Jsonfeed.Author.create ~name:(Post.author post) () in
10431043+ Some [author]
10441044+ else
10451045+ None
10461046+ in
10471047+10481048+ (* Create item *)
10491049+ Jsonfeed.Item.create
10501050+ ~id:(Post.id post)
10511051+ ~content
10521052+ ?url:(Option.map Uri.to_string (Post.link post))
10531053+ ~title:(Post.title post)
10541054+ ?summary:(Post.summary post)
10551055+ ?date_published:(Post.date post)
10561056+ ?authors
10571057+ ~tags:(Post.tags post)
10581058+ ()
10591059+10601060+ let items_of_posts posts = List.map item_of_post posts
10611061+10621062+ let feed_of_items ~title ?home_page_url ?feed_url ?description ?icon ?favicon items =
10631063+ Jsonfeed.create ~title ?home_page_url ?feed_url ?description ?icon ?favicon ~items ()
10641064+10651065+ let feed_of_posts ~title ?home_page_url ?feed_url ?description ?icon ?favicon posts =
10661066+ let items = items_of_posts posts in
10671067+ feed_of_items ~title ?home_page_url ?feed_url ?description ?icon ?favicon items
10681068+10691069+ let to_string ?(minify = false) jsonfeed =
10701070+ match Jsonfeed.to_string ~minify jsonfeed with
10711071+ | Ok s -> Ok s
10721072+ | Error err -> Error (Jsont.Error.to_string err)
10731073+10741074+ let of_feed feed =
10751075+ match feed.Feed.content with
10761076+ | Feed.Json jf -> Some jf
10771077+ | _ -> None
10781078+ end
10791079+end
10801080+10811081+(** {1 User Management} *)
10821082+10831083+module User = struct
10841084+ type t = {
10851085+ username : string;
10861086+ fullname : string;
10871087+ email : string option;
10881088+ feeds : Source.t list;
10891089+ last_synced : string option;
10901090+ }
10911091+10921092+ let make ~username ~fullname ?email ?(feeds = []) ?last_synced () =
10931093+ { username; fullname; email; feeds; last_synced }
10941094+10951095+ let username t = t.username
10961096+ let fullname t = t.fullname
10971097+ let email t = t.email
10981098+ let feeds t = t.feeds
10991099+ let last_synced t = t.last_synced
11001100+11011101+ let add_feed t source =
11021102+ { t with feeds = source :: t.feeds }
11031103+11041104+ let remove_feed t ~url =
11051105+ let feeds = List.filter (fun s -> Source.url s <> url) t.feeds in
11061106+ { t with feeds }
11071107+11081108+ let set_last_synced t timestamp =
11091109+ { t with last_synced = Some timestamp }
11101110+11111111+ let jsont =
11121112+ let make username fullname email feeds last_synced =
11131113+ { username; fullname; email; feeds; last_synced }
11141114+ in
11151115+ Jsont.Object.map ~kind:"User" make
11161116+ |> Jsont.Object.mem "username" Jsont.string ~enc:(fun u -> u.username)
11171117+ |> Jsont.Object.mem "fullname" Jsont.string ~enc:(fun u -> u.fullname)
11181118+ |> Jsont.Object.opt_mem "email" Jsont.string ~enc:(fun u -> u.email)
11191119+ |> Jsont.Object.mem "feeds" (Jsont.list Source.jsont) ~enc:(fun u -> u.feeds)
11201120+ |> Jsont.Object.opt_mem "last_synced" Jsont.string ~enc:(fun u -> u.last_synced)
11211121+ |> Jsont.Object.finish
11221122+end
11231123+11241124+(** {1 Feed Quality Analysis} *)
11251125+11261126+module Quality = struct
11271127+ type t = {
11281128+ total_entries : int;
11291129+ entries_with_summary : int;
11301130+ entries_with_author : int;
11311131+ entries_with_date : int;
11321132+ entries_with_content : int;
11331133+ entries_with_tags : int;
11341134+ avg_content_length : float;
11351135+ min_content_length : int;
11361136+ max_content_length : int;
11371137+ posting_frequency_days : float option;
11381138+ quality_score : float;
11391139+ }
11401140+11411141+ let make ~total_entries ~entries_with_summary ~entries_with_author
11421142+ ~entries_with_date ~entries_with_content ~entries_with_tags
11431143+ ~avg_content_length ~min_content_length ~max_content_length
11441144+ ~posting_frequency_days ~quality_score =
11451145+ {
11461146+ total_entries;
11471147+ entries_with_summary;
11481148+ entries_with_author;
11491149+ entries_with_date;
11501150+ entries_with_content;
11511151+ entries_with_tags;
11521152+ avg_content_length;
11531153+ min_content_length;
11541154+ max_content_length;
11551155+ posting_frequency_days;
11561156+ quality_score;
11571157+ }
11581158+11591159+ let total_entries t = t.total_entries
11601160+ let entries_with_summary t = t.entries_with_summary
11611161+ let entries_with_author t = t.entries_with_author
11621162+ let entries_with_date t = t.entries_with_date
11631163+ let entries_with_content t = t.entries_with_content
11641164+ let entries_with_tags t = t.entries_with_tags
11651165+ let avg_content_length t = t.avg_content_length
11661166+ let min_content_length t = t.min_content_length
11671167+ let max_content_length t = t.max_content_length
11681168+ let posting_frequency_days t = t.posting_frequency_days
11691169+ let quality_score t = t.quality_score
11701170+11711171+ (** Get content length from an Atom entry *)
11721172+ let get_content_length (entry : Syndic.Atom.entry) =
11731173+ match entry.content with
11741174+ | Some (Syndic.Atom.Text s) -> String.length s
11751175+ | Some (Syndic.Atom.Html (_, s)) -> String.length s
11761176+ | Some (Syndic.Atom.Xhtml (_, _)) -> 0 (* Could calculate but complex *)
11771177+ | Some (Syndic.Atom.Mime _) -> 0
11781178+ | Some (Syndic.Atom.Src _) -> 0
11791179+ | None -> (
11801180+ match entry.summary with
11811181+ | Some (Syndic.Atom.Text s) -> String.length s
11821182+ | Some (Syndic.Atom.Html (_, s)) -> String.length s
11831183+ | Some (Syndic.Atom.Xhtml (_, _)) -> 0
11841184+ | None -> 0)
11851185+11861186+ (** Check if entry has non-empty summary *)
11871187+ let has_summary (entry : Syndic.Atom.entry) =
11881188+ match entry.summary with
11891189+ | Some (Syndic.Atom.Text s) when String.trim s <> "" -> true
11901190+ | Some (Syndic.Atom.Html (_, s)) when String.trim s <> "" -> true
11911191+ | Some (Syndic.Atom.Xhtml (_, _)) -> true
11921192+ | _ -> false
11931193+11941194+ (** Check if entry has author *)
11951195+ let has_author (entry : Syndic.Atom.entry) =
11961196+ let (author, _) = entry.authors in
11971197+ String.trim author.name <> ""
11981198+11991199+ (** Check if entry has content *)
12001200+ let has_content (entry : Syndic.Atom.entry) =
12011201+ get_content_length entry > 0
12021202+12031203+ (** Check if entry has tags/categories *)
12041204+ let has_tags (entry : Syndic.Atom.entry) =
12051205+ entry.categories <> []
12061206+12071207+ (** Calculate quality score from metrics *)
12081208+ let calculate_quality_score t =
12091209+ let total = float_of_int t.total_entries in
12101210+ if total = 0.0 then 0.0
12111211+ else
12121212+ let summary_pct = float_of_int t.entries_with_summary /. total *. 100.0 in
12131213+ let author_pct = float_of_int t.entries_with_author /. total *. 100.0 in
12141214+ let date_pct = float_of_int t.entries_with_date /. total *. 100.0 in
12151215+ let content_pct = float_of_int t.entries_with_content /. total *. 100.0 in
12161216+ let tags_pct = float_of_int t.entries_with_tags /. total *. 100.0 in
12171217+12181218+ (* Weighted average: content and dates are most important *)
12191219+ let score =
12201220+ (content_pct *. 0.30) +.
12211221+ (date_pct *. 0.25) +.
12221222+ (author_pct *. 0.20) +.
12231223+ (summary_pct *. 0.15) +.
12241224+ (tags_pct *. 0.10)
12251225+ in
12261226+ score
12271227+12281228+ let analyze entries =
12291229+ if entries = [] then
12301230+ failwith "No entries to analyze"
12311231+ else
12321232+ let total_entries = List.length entries in
12331233+12341234+ let entries_with_summary = ref 0 in
12351235+ let entries_with_author = ref 0 in
12361236+ let entries_with_date = ref total_entries in (* All Atom entries have updated *)
12371237+ let entries_with_content = ref 0 in
12381238+ let entries_with_tags = ref 0 in
12391239+ let content_lengths = ref [] in
12401240+ let dates = ref [] in
12411241+12421242+ List.iter (fun (entry : Syndic.Atom.entry) ->
12431243+ if has_summary entry then incr entries_with_summary;
12441244+ if has_author entry then incr entries_with_author;
12451245+ if has_content entry then begin
12461246+ incr entries_with_content;
12471247+ content_lengths := get_content_length entry :: !content_lengths
12481248+ end;
12491249+ if has_tags entry then incr entries_with_tags;
12501250+ dates := entry.updated :: !dates
12511251+ ) entries;
12521252+12531253+ (* Calculate content statistics *)
12541254+ let avg_content_length, min_content_length, max_content_length =
12551255+ if !content_lengths = [] then
12561256+ (0.0, 0, 0)
12571257+ else
12581258+ let sorted = List.sort compare !content_lengths in
12591259+ let sum = List.fold_left (+) 0 sorted in
12601260+ let avg = float_of_int sum /. float_of_int (List.length sorted) in
12611261+ let min_len = List.hd sorted in
12621262+ let max_len = List.hd (List.rev sorted) in
12631263+ (avg, min_len, max_len)
12641264+ in
12651265+12661266+ (* Calculate posting frequency *)
12671267+ let posting_frequency_days =
12681268+ if List.length !dates < 2 then
12691269+ None
12701270+ else
12711271+ try
12721272+ let timestamps = List.map Ptime.to_float_s !dates in
12731273+ let sorted_timestamps = List.sort compare timestamps in
12741274+ let first = List.hd sorted_timestamps in
12751275+ let last = List.hd (List.rev sorted_timestamps) in
12761276+ let total_days = (last -. first) /. 86400.0 in
12771277+ let num_intervals = float_of_int (List.length sorted_timestamps - 1) in
12781278+ Some (total_days /. num_intervals)
12791279+ with _ -> None
12801280+ in
12811281+12821282+ (* Create metrics record (without quality_score first) *)
12831283+ let metrics = {
12841284+ total_entries;
12851285+ entries_with_summary = !entries_with_summary;
12861286+ entries_with_author = !entries_with_author;
12871287+ entries_with_date = !entries_with_date;
12881288+ entries_with_content = !entries_with_content;
12891289+ entries_with_tags = !entries_with_tags;
12901290+ avg_content_length;
12911291+ min_content_length;
12921292+ max_content_length;
12931293+ posting_frequency_days;
12941294+ quality_score = 0.0; (* Placeholder *)
12951295+ } in
12961296+12971297+ (* Calculate quality score *)
12981298+ let quality_score = calculate_quality_score metrics in
12991299+ { metrics with quality_score }
13001300+end
13011301+13021302+(** {1 State Management} *)
13031303+13041304+module State = struct
13051305+ type t = {
13061306+ xdg : Xdge.t;
13071307+ }
13081308+13091309+ module Paths = struct
13101310+ (** Get the users directory path *)
13111311+ let users_dir state = Eio.Path.(Xdge.state_dir state.xdg / "users")
13121312+13131313+ (** Get the feeds directory path *)
13141314+ let feeds_dir state = Eio.Path.(Xdge.state_dir state.xdg / "feeds")
13151315+13161316+ (** Get the user feeds directory path *)
13171317+ let user_feeds_dir state = Eio.Path.(feeds_dir state / "user")
13181318+13191319+ (** Get the path to a user's JSON file *)
13201320+ let user_file state username =
13211321+ Eio.Path.(users_dir state / (username ^ ".json"))
13221322+13231323+ (** Get the path to a user's Atom feed file *)
13241324+ let user_feed_file state username =
13251325+ Eio.Path.(user_feeds_dir state / (username ^ ".xml"))
13261326+13271327+ (** Ensure all necessary directories exist *)
13281328+ let ensure_directories state =
13291329+ let dirs = [
13301330+ users_dir state;
13311331+ feeds_dir state;
13321332+ user_feeds_dir state;
13331333+ ] in
13341334+ List.iter (fun dir ->
13351335+ try Eio.Path.mkdir ~perm:0o755 dir
13361336+ with Eio.Io (Eio.Fs.E (Already_exists _), _) -> ()
13371337+ ) dirs
13381338+ end
13391339+13401340+ module Json = struct
13411341+ (** Decode a user from JSON string *)
13421342+ let user_of_string s =
13431343+ match Jsont_bytesrw.decode_string' User.jsont s with
13441344+ | Ok user -> Some user
13451345+ | Error err ->
13461346+ Log.err (fun m -> m "Failed to parse user JSON: %s" (Jsont.Error.to_string err));
13471347+ None
13481348+13491349+ (** Encode a user to JSON string *)
13501350+ let user_to_string user =
13511351+ match Jsont_bytesrw.encode_string' ~format:Jsont.Indent User.jsont user with
13521352+ | Ok s -> s
13531353+ | Error err -> failwith ("Failed to encode user: " ^ Jsont.Error.to_string err)
13541354+ end
13551355+13561356+ module Storage = struct
13571357+ (** Load a user from disk *)
13581358+ let load_user state username =
13591359+ let file = Paths.user_file state username in
13601360+ try
13611361+ let content = Eio.Path.load file in
13621362+ Json.user_of_string content
13631363+ with
13641364+ | Eio.Io (Eio.Fs.E (Not_found _), _) -> None
13651365+ | e ->
13661366+ Log.err (fun m -> m "Error loading user %s: %s" username (Printexc.to_string e));
13671367+ None
13681368+13691369+ (** Save a user to disk *)
13701370+ let save_user state user =
13711371+ let file = Paths.user_file state (User.username user) in
13721372+ let json = Json.user_to_string user in
13731373+ Eio.Path.save ~create:(`Or_truncate 0o644) file json
13741374+13751375+ (** List all usernames *)
13761376+ let list_users state =
13771377+ try
13781378+ Eio.Path.read_dir (Paths.users_dir state)
13791379+ |> List.filter_map (fun name ->
13801380+ if Filename.check_suffix name ".json" then
13811381+ Some (Filename.chop_suffix name ".json")
13821382+ else None
13831383+ )
13841384+ with _ -> []
13851385+13861386+ (** Load existing Atom entries for a user *)
13871387+ let load_existing_posts state username =
13881388+ let file = Paths.user_feed_file state username in
13891389+ try
13901390+ let content = Eio.Path.load file in
13911391+ (* Parse existing Atom feed *)
13921392+ let input = Xmlm.make_input (`String (0, content)) in
13931393+ let feed = Syndic.Atom.parse input in
13941394+ feed.Syndic.Atom.entries
13951395+ with
13961396+ | Eio.Io (Eio.Fs.E (Not_found _), _) -> []
13971397+ | e ->
13981398+ Log.err (fun m -> m "Error loading existing posts for %s: %s"
13991399+ username (Printexc.to_string e));
14001400+ []
14011401+14021402+ (** Save Atom entries for a user *)
14031403+ let save_atom_feed state username entries =
14041404+ let file = Paths.user_feed_file state username in
14051405+ let feed = Format.Atom.feed_of_entries ~title:username entries in
14061406+ let xml = Format.Atom.to_string feed in
14071407+ Eio.Path.save ~create:(`Or_truncate 0o644) file xml
14081408+14091409+ (** Delete a user and their feed file *)
14101410+ let delete_user state username =
14111411+ let user_file = Paths.user_file state username in
14121412+ let feed_file = Paths.user_feed_file state username in
14131413+ (try Eio.Path.unlink user_file with _ -> ());
14141414+ (try Eio.Path.unlink feed_file with _ -> ())
14151415+ end
14161416+14171417+ module Sync = struct
14181418+ (** Merge new entries with existing ones, updating matching IDs *)
14191419+ let merge_entries ~existing ~new_entries =
14201420+ (* Create a map of new entry IDs for efficient lookup and updates *)
14211421+ let module UriMap = Map.Make(Uri) in
14221422+ let new_entries_map =
14231423+ List.fold_left (fun acc (entry : Syndic.Atom.entry) ->
14241424+ UriMap.add entry.id entry acc
14251425+ ) UriMap.empty new_entries
14261426+ in
14271427+14281428+ (* Update existing entries with new ones if IDs match, otherwise keep existing *)
14291429+ let updated_existing =
14301430+ List.filter_map (fun (entry : Syndic.Atom.entry) ->
14311431+ if UriMap.mem entry.id new_entries_map then
14321432+ None (* Will be replaced by new entry *)
14331433+ else
14341434+ Some entry (* Keep existing entry *)
14351435+ ) existing
14361436+ in
14371437+14381438+ (* Combine new entries with non-replaced existing entries *)
14391439+ let combined = new_entries @ updated_existing in
14401440+ List.sort (fun (a : Syndic.Atom.entry) (b : Syndic.Atom.entry) ->
14411441+ Ptime.compare b.updated a.updated
14421442+ ) combined
14431443+14441444+ (** Get current timestamp in ISO 8601 format *)
14451445+ let current_timestamp () =
14461446+ let open Unix in
14471447+ let tm = gmtime (time ()) in
14481448+ Printf.sprintf "%04d-%02d-%02dT%02d:%02d:%02dZ"
14491449+ (tm.tm_year + 1900) (tm.tm_mon + 1) tm.tm_mday
14501450+ tm.tm_hour tm.tm_min tm.tm_sec
14511451+14521452+ (** Sync feeds for a single user *)
14531453+ let sync_user session state ~username =
14541454+ match Storage.load_user state username with
14551455+ | None ->
14561456+ Error (Printf.sprintf "User %s not found" username)
14571457+ | Some user when User.feeds user = [] ->
14581458+ Log.info (fun m -> m "No feeds configured for user %s" username);
14591459+ Ok ()
14601460+ | Some user ->
14611461+ Log.info (fun m -> m "Syncing feeds for user %s..." username);
14621462+14631463+ (* Fetch all feeds concurrently *)
14641464+ let fetched_feeds =
14651465+ Eio.Fiber.List.filter_map (fun source ->
14661466+ try
14671467+ Log.info (fun m -> m " Fetching %s (%s)..."
14681468+ (Source.name source) (Source.url source));
14691469+ Some (Feed.fetch session source)
14701470+ with e ->
14711471+ Log.err (fun m -> m " Failed to fetch %s: %s"
14721472+ (Source.name source) (Printexc.to_string e));
14731473+ None
14741474+ ) (User.feeds user)
14751475+ in
14761476+14771477+ if fetched_feeds = [] then begin
14781478+ Error "No feeds successfully fetched"
14791479+ end else begin
14801480+ (* Get posts from fetched feeds *)
14811481+ let posts = Post.of_feeds fetched_feeds in
14821482+ Log.info (fun m -> m " Found %d new posts" (List.length posts));
14831483+14841484+ (* Convert to Atom entries *)
14851485+ let new_entries = Format.Atom.entries_of_posts posts in
14861486+14871487+ (* Load existing entries *)
14881488+ let existing = Storage.load_existing_posts state username in
14891489+ Log.info (fun m -> m " Found %d existing posts" (List.length existing));
14901490+14911491+ (* Merge entries *)
14921492+ let merged = merge_entries ~existing ~new_entries in
14931493+ Log.info (fun m -> m " Total posts after merge: %d" (List.length merged));
14941494+14951495+ (* Save updated feed *)
14961496+ Storage.save_atom_feed state username merged;
14971497+14981498+ (* Update last_synced timestamp *)
14991499+ let now = current_timestamp () in
15001500+ let user = User.set_last_synced user now in
15011501+ Storage.save_user state user;
15021502+15031503+ Log.info (fun m -> m "Sync completed for user %s" username);
15041504+ Ok ()
15051505+ end
15061506+ end
15071507+15081508+ module Export = struct
15091509+ (** Convert Atom entry to JSONFeed item *)
15101510+ let atom_entry_to_jsonfeed_item (entry : Syndic.Atom.entry) =
15111511+ (* Extract ID *)
15121512+ let id = Uri.to_string entry.id in
15131513+15141514+ (* Extract title *)
15151515+ let title =
15161516+ match entry.title with
15171517+ | Syndic.Atom.Text s -> Some s
15181518+ | Syndic.Atom.Html (_, s) -> Some s
15191519+ | Syndic.Atom.Xhtml (_, _) -> Some "Untitled"
15201520+ in
15211521+15221522+ (* Extract URL *)
15231523+ let url =
15241524+ match entry.links with
15251525+ | link :: _ -> Some (Uri.to_string link.href)
15261526+ | [] -> None
15271527+ in
15281528+15291529+ (* Extract content *)
15301530+ let content =
15311531+ match entry.content with
15321532+ | Some (Syndic.Atom.Text s) -> `Text s
15331533+ | Some (Syndic.Atom.Html (_, s)) -> `Html s
15341534+ | Some (Syndic.Atom.Xhtml (_, nodes)) ->
15351535+ let html = String.concat "" (List.map Syndic.XML.to_string nodes) in
15361536+ `Html html
15371537+ | Some (Syndic.Atom.Mime _) | Some (Syndic.Atom.Src _) | None ->
15381538+ `Text ""
15391539+ in
15401540+15411541+ (* Extract summary *)
15421542+ let summary =
15431543+ match entry.summary with
15441544+ | Some (Syndic.Atom.Text s) when String.trim s <> "" -> Some s
15451545+ | Some (Syndic.Atom.Html (_, s)) when String.trim s <> "" -> Some s
15461546+ | _ -> None
15471547+ in
15481548+15491549+ (* Extract authors *)
15501550+ let authors =
15511551+ let (author, contributors) = entry.authors in
15521552+ let author_list = author :: contributors in
15531553+ let jsonfeed_authors = List.filter_map (fun (a : Syndic.Atom.author) ->
15541554+ let name = String.trim a.name in
15551555+ if name = "" then None
15561556+ else Some (Jsonfeed.Author.create ~name ())
15571557+ ) author_list in
15581558+ if jsonfeed_authors = [] then None else Some jsonfeed_authors
15591559+ in
15601560+15611561+ (* Extract tags *)
15621562+ let tags =
15631563+ match entry.categories with
15641564+ | [] -> None
15651565+ | cats ->
15661566+ let tag_list = List.map (fun (c : Syndic.Atom.category) ->
15671567+ match c.label with
15681568+ | Some lbl -> lbl
15691569+ | None -> c.term
15701570+ ) cats in
15711571+ if tag_list = [] then None else Some tag_list
15721572+ in
15731573+15741574+ (* Create JSONFeed item *)
15751575+ Jsonfeed.Item.create
15761576+ ~id
15771577+ ~content
15781578+ ?title
15791579+ ?url
15801580+ ?summary
15811581+ ?authors
15821582+ ?tags
15831583+ ~date_published:entry.updated
15841584+ ()
15851585+15861586+ (** Export entries as JSONFeed *)
15871587+ let export_jsonfeed ~title entries =
15881588+ let items = List.map atom_entry_to_jsonfeed_item entries in
15891589+ let feed = Jsonfeed.create ~title ~items () in
15901590+ match Jsonfeed.to_string ~minify:false feed with
15911591+ | Ok json -> Ok json
15921592+ | Error err -> Error (Printf.sprintf "Failed to serialize JSON Feed: %s" (Jsont.Error.to_string err))
15931593+ end
15941594+15951595+ let create env ~app_name =
15961596+ let xdg = Xdge.create env#fs app_name in
15971597+ let state = { xdg } in
15981598+ Paths.ensure_directories state;
15991599+ state
16001600+16011601+ let create_user state user =
16021602+ match Storage.load_user state (User.username user) with
16031603+ | Some _ ->
16041604+ Error (Printf.sprintf "User %s already exists" (User.username user))
16051605+ | None ->
16061606+ Storage.save_user state user;
16071607+ Log.info (fun m -> m "User %s created" (User.username user));
16081608+ Ok ()
16091609+16101610+ let delete_user state ~username =
16111611+ match Storage.load_user state username with
16121612+ | None ->
16131613+ Error (Printf.sprintf "User %s not found" username)
16141614+ | Some _ ->
16151615+ Storage.delete_user state username;
16161616+ Log.info (fun m -> m "User %s deleted" username);
16171617+ Ok ()
16181618+16191619+ let get_user state ~username =
16201620+ Storage.load_user state username
16211621+16221622+ let update_user state user =
16231623+ match Storage.load_user state (User.username user) with
16241624+ | None ->
16251625+ Error (Printf.sprintf "User %s not found" (User.username user))
16261626+ | Some _ ->
16271627+ Storage.save_user state user;
16281628+ Log.info (fun m -> m "User %s updated" (User.username user));
16291629+ Ok ()
16301630+16311631+ let list_users state =
16321632+ Storage.list_users state
16331633+16341634+ let sync_user env state ~username =
16351635+ Session.with_session env @@ fun session ->
16361636+ Sync.sync_user session state ~username
16371637+16381638+ let sync_all env state =
16391639+ let users = Storage.list_users state in
16401640+ if users = [] then begin
16411641+ Log.info (fun m -> m "No users to sync");
16421642+ Ok (0, 0)
16431643+ end else begin
16441644+ Log.info (fun m -> m "Syncing %d users concurrently..." (List.length users));
16451645+16461646+ Session.with_session env @@ fun session ->
16471647+ let results =
16481648+ Eio.Fiber.List.map (fun username ->
16491649+ match Sync.sync_user session state ~username with
16501650+ | Ok () -> true
16511651+ | Error err ->
16521652+ Log.err (fun m -> m "Failed to sync user %s: %s" username err);
16531653+ false
16541654+ ) users
16551655+ in
16561656+ let success_count = List.length (List.filter (fun x -> x) results) in
16571657+ let fail_count = List.length users - success_count in
16581658+16591659+ if fail_count = 0 then
16601660+ Log.info (fun m -> m "All users synced successfully");
16611661+16621662+ Ok (success_count, fail_count)
16631663+ end
16641664+16651665+ let get_user_posts state ~username ?limit () =
16661666+ let entries = Storage.load_existing_posts state username in
16671667+ match limit with
16681668+ | None -> entries
16691669+ | Some n -> List.filteri (fun i _ -> i < n) entries
16701670+16711671+ let get_all_posts state ?limit () =
16721672+ let users = Storage.list_users state in
16731673+16741674+ (* Collect all entries from all users with username tag *)
16751675+ let all_entries =
16761676+ List.concat_map (fun username ->
16771677+ let entries = Storage.load_existing_posts state username in
16781678+ List.map (fun entry -> (username, entry)) entries
16791679+ ) users
16801680+ in
16811681+16821682+ (* Sort by date (newest first) *)
16831683+ let sorted = List.sort (fun (_, a : string * Syndic.Atom.entry) (_, b) ->
16841684+ Ptime.compare b.updated a.updated
16851685+ ) all_entries in
16861686+16871687+ match limit with
16881688+ | None -> sorted
16891689+ | Some n -> List.filteri (fun i _ -> i < n) sorted
16901690+16911691+ let export_merged_feed state ~title ~format ?limit () =
16921692+ let all_posts = get_all_posts state ?limit () in
16931693+ let entries = List.map snd all_posts in
16941694+16951695+ match format with
16961696+ | `Atom ->
16971697+ let xml = Format.Atom.to_string (Format.Atom.feed_of_entries ~title entries) in
16981698+ Ok xml
16991699+ | `Jsonfeed ->
17001700+ if entries = [] then
17011701+ (* Empty JSONFeed *)
17021702+ let feed = Jsonfeed.create ~title ~items:[] () in
17031703+ match Jsonfeed.to_string ~minify:false feed with
17041704+ | Ok json -> Ok json
17051705+ | Error err -> Error (Printf.sprintf "Failed to serialize JSON Feed: %s" (Jsont.Error.to_string err))
17061706+ else
17071707+ Export.export_jsonfeed ~title entries
17081708+17091709+ let analyze_user_quality state ~username =
17101710+ match Storage.load_user state username with
17111711+ | None ->
17121712+ Error (Printf.sprintf "User %s not found" username)
17131713+ | Some _ ->
17141714+ let entries = Storage.load_existing_posts state username in
17151715+ if entries = [] then
17161716+ Error "No entries to analyze"
17171717+ else
17181718+ Ok (Quality.analyze entries)
17191719+end
+376-107
stack/river/lib/river.mli
···1515 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
1616 *)
17171818-(** River RSS/Atom feed aggregator *)
1818+(** River RSS/Atom/JSONFeed aggregator library *)
1919+2020+(** {1 Feed Sources} *)
2121+2222+module Source : sig
2323+ type t
2424+ (** A feed source with name and URL. *)
2525+2626+ val make : name:string -> url:string -> t
2727+ (** [make ~name ~url] creates a new feed source. *)
2828+2929+ val name : t -> string
3030+ (** [name source] returns the feed name/label. *)
3131+3232+ val url : t -> string
3333+ (** [url source] returns the feed URL. *)
3434+3535+ val jsont : t Jsont.t
3636+ (** JSON codec for sources. *)
3737+end
3838+3939+(** {1 HTTP Session Management} *)
4040+4141+module Session : sig
4242+ type t
4343+ (** An abstract HTTP session for fetching feeds.
4444+4545+ The session manages HTTP connections and is tied to an Eio switch
4646+ for proper resource cleanup. *)
4747+4848+ val init :
4949+ sw:Eio.Switch.t ->
5050+ < clock : float Eio.Time.clock_ty Eio.Resource.t;
5151+ fs : Eio.Fs.dir_ty Eio.Path.t;
5252+ net : [ `Generic | `Unix ] Eio.Net.ty Eio.Resource.t; .. > ->
5353+ t
5454+ (** [init ~sw env] creates a new HTTP session.
5555+5656+ The session is configured with appropriate defaults for fetching feeds:
5757+ - User-Agent: "OCaml-River/1.0"
5858+ - Automatic redirect following (max 5 redirects)
5959+ - TLS verification enabled
6060+6161+ @param sw The switch for resource management
6262+ @param env The Eio environment *)
6363+6464+ val with_session :
6565+ < clock : float Eio.Time.clock_ty Eio.Resource.t;
6666+ fs : Eio.Fs.dir_ty Eio.Path.t;
6767+ net : [ `Generic | `Unix ] Eio.Net.ty Eio.Resource.t; .. > ->
6868+ (t -> 'a) -> 'a
6969+ (** [with_session env f] creates a session and automatically manages its lifecycle.
7070+7171+ This is the recommended way to use River as it ensures proper cleanup.
7272+7373+ @param env The Eio environment
7474+ @param f The function to run with the session *)
7575+end
7676+7777+(** {1 Feeds and Posts} *)
7878+7979+module Feed : sig
8080+ type t
8181+ (** An Atom, RSS2, or JSON Feed. *)
8282+8383+ val fetch : Session.t -> Source.t -> t
8484+ (** [fetch session source] fetches and parses a feed from the given source.
8585+8686+ @param session The HTTP session
8787+ @param source The feed source to fetch
8888+ @raise Failure if the feed cannot be fetched or parsed *)
8989+9090+ val source : t -> Source.t
9191+ (** [source feed] returns the source this feed was fetched from. *)
9292+end
9393+9494+module Post : sig
9595+ type t
9696+ (** A post from a feed. *)
9797+9898+ val of_feeds : Feed.t list -> t list
9999+ (** [of_feeds feeds] extracts and deduplicates posts from the given feeds.
100100+101101+ Posts are deduplicated by ID. *)
102102+103103+ val feed : t -> Feed.t
104104+ (** [feed post] returns the feed this post originated from. *)
105105+106106+ val title : t -> string
107107+ (** [title post] returns the post title. *)
108108+109109+ val link : t -> Uri.t option
110110+ (** [link post] returns the post link. *)
111111+112112+ val date : t -> Syndic.Date.t option
113113+ (** [date post] returns the post date. *)
114114+115115+ val author : t -> string
116116+ (** [author post] returns the post author name. *)
117117+118118+ val email : t -> string
119119+ (** [email post] returns the post author email. *)
120120+121121+ val content : t -> string
122122+ (** [content post] returns the post content. *)
123123+124124+ val id : t -> string
125125+ (** [id post] returns the unique identifier of the post. *)
126126+127127+ val tags : t -> string list
128128+ (** [tags post] returns the list of tags associated with the post. *)
129129+130130+ val summary : t -> string option
131131+ (** [summary post] returns the summary/excerpt of the post, if available. *)
132132+133133+ val meta_description : t -> string option
134134+ (** [meta_description post] returns the meta description from the origin site.
135135+136136+ To get the meta description, we fetch the content of [link post] and look
137137+ for an HTML meta tag with name "description" or "og:description". *)
191382020-(** {1 Session Management} *)
139139+ val seo_image : t -> string option
140140+ (** [seo_image post] returns the social media image URL.
141141+142142+ To get the SEO image, we fetch the content of [link post] and look for an
143143+ HTML meta tag with name "og:image" or "twitter:image". *)
144144+end
145145+146146+(** {1 Format Conversion and Export} *)
147147+148148+module Format : sig
149149+ (** Feed format conversion and export. *)
150150+151151+ module Atom : sig
152152+ (** Atom 1.0 format support. *)
153153+154154+ val entry_of_post : Post.t -> Syndic.Atom.entry
155155+ (** [entry_of_post post] converts a post to an Atom entry. *)
156156+157157+ val entries_of_posts : Post.t list -> Syndic.Atom.entry list
158158+ (** [entries_of_posts posts] converts posts to Atom entries. *)
159159+160160+ val feed_of_entries :
161161+ title:string ->
162162+ ?id:string ->
163163+ ?authors:(string * string option) list ->
164164+ Syndic.Atom.entry list ->
165165+ Syndic.Atom.feed
166166+ (** [feed_of_entries ~title entries] creates an Atom feed from entries.
167167+168168+ @param title The feed title
169169+ @param id Optional feed ID (default: "urn:river:merged")
170170+ @param authors Optional list of (name, email) tuples *)
171171+172172+ val to_string : Syndic.Atom.feed -> string
173173+ (** [to_string feed] serializes an Atom feed to XML string. *)
174174+ end
175175+176176+ module Rss2 : sig
177177+ (** RSS 2.0 format support. *)
178178+179179+ val of_feed : Feed.t -> Syndic.Rss2.channel option
180180+ (** [of_feed feed] extracts RSS2 channel if the feed is RSS2 format.
181181+182182+ Returns None if the feed is not RSS2. *)
183183+ end
184184+185185+ module Jsonfeed : sig
186186+ (** JSON Feed 1.1 format support. *)
187187+188188+ val item_of_post : Post.t -> Jsonfeed.Item.t
189189+ (** [item_of_post post] converts a post to a JSONFeed item. *)
190190+191191+ val items_of_posts : Post.t list -> Jsonfeed.Item.t list
192192+ (** [items_of_posts posts] converts posts to JSONFeed items. *)
193193+194194+ val feed_of_items :
195195+ title:string ->
196196+ ?home_page_url:string ->
197197+ ?feed_url:string ->
198198+ ?description:string ->
199199+ ?icon:string ->
200200+ ?favicon:string ->
201201+ Jsonfeed.Item.t list ->
202202+ Jsonfeed.t
203203+ (** [feed_of_items ~title items] creates a JSONFeed from items.
204204+205205+ @param title The feed title (required)
206206+ @param home_page_url The URL of the website the feed represents
207207+ @param feed_url The URL of the feed itself
208208+ @param description A description of the feed
209209+ @param icon URL of an icon for the feed (512x512 recommended)
210210+ @param favicon URL of a favicon for the feed (64x64 recommended) *)
211211+212212+ val feed_of_posts :
213213+ title:string ->
214214+ ?home_page_url:string ->
215215+ ?feed_url:string ->
216216+ ?description:string ->
217217+ ?icon:string ->
218218+ ?favicon:string ->
219219+ Post.t list ->
220220+ Jsonfeed.t
221221+ (** [feed_of_posts ~title posts] creates a JSONFeed from posts.
222222+223223+ Convenience function that combines [items_of_posts] and [feed_of_items]. *)
224224+225225+ val to_string : ?minify:bool -> Jsonfeed.t -> (string, string) result
226226+ (** [to_string ?minify feed] serializes a JSONFeed to JSON string.
227227+228228+ @param minify If true, output compact JSON; if false, pretty-print (default: false) *)
229229+230230+ val of_feed : Feed.t -> Jsonfeed.t option
231231+ (** [of_feed feed] extracts JSONFeed if the feed is JSONFeed format.
232232+233233+ Returns None if the feed is not JSONFeed. *)
234234+ end
235235+end
236236+237237+(** {1 User Management} *)
238238+239239+module User : sig
240240+ type t
241241+ (** User configuration with feed subscriptions. *)
242242+243243+ val make :
244244+ username:string ->
245245+ fullname:string ->
246246+ ?email:string ->
247247+ ?feeds:Source.t list ->
248248+ ?last_synced:string ->
249249+ unit ->
250250+ t
251251+ (** [make ~username ~fullname ()] creates a new user.
252252+253253+ @param username Unique username identifier
254254+ @param fullname User's display name
255255+ @param email Optional email address
256256+ @param feeds Optional list of feed sources (default: [])
257257+ @param last_synced Optional ISO 8601 timestamp of last sync *)
258258+259259+ val username : t -> string
260260+ (** [username user] returns the username. *)
212612222-type session
2323-(** An abstract River session for fetching feeds.
262262+ val fullname : t -> string
263263+ (** [fullname user] returns the full name. *)
242642525- The session manages HTTP connections and is tied to an Eio switch
2626- for proper resource cleanup. *)
265265+ val email : t -> string option
266266+ (** [email user] returns the email address if set. *)
272672828-val init :
2929- sw:Eio.Switch.t ->
3030- < clock : float Eio.Time.clock_ty Eio.Resource.t;
3131- fs : Eio.Fs.dir_ty Eio.Path.t;
3232- net : [ `Generic | `Unix ] Eio.Net.ty Eio.Resource.t; .. > ->
3333- session
3434-(** [init ~sw env] creates a new River session.
268268+ val feeds : t -> Source.t list
269269+ (** [feeds user] returns the list of subscribed feeds. *)
352703636- The session is configured with appropriate defaults for fetching feeds:
3737- - User-Agent: "OCaml-River/1.0"
3838- - Automatic redirect following (max 5 redirects)
3939- - TLS verification enabled
271271+ val last_synced : t -> string option
272272+ (** [last_synced user] returns the last sync timestamp if set. *)
402734141- @param sw The switch for resource management
4242- @param env The Eio environment *)
274274+ val add_feed : t -> Source.t -> t
275275+ (** [add_feed user source] returns a new user with the feed added. *)
432764444-val with_session :
4545- < clock : float Eio.Time.clock_ty Eio.Resource.t;
4646- fs : Eio.Fs.dir_ty Eio.Path.t;
4747- net : [ `Generic | `Unix ] Eio.Net.ty Eio.Resource.t; .. > ->
4848- (session -> 'a) -> 'a
4949-(** [with_session env f] creates a session and automatically manages its lifecycle.
277277+ val remove_feed : t -> url:string -> t
278278+ (** [remove_feed user ~url] returns a new user with the feed removed by URL. *)
502795151- This is the recommended way to use River as it ensures proper cleanup.
280280+ val set_last_synced : t -> string -> t
281281+ (** [set_last_synced user timestamp] returns a new user with updated sync time. *)
522825353- @param env The Eio environment
5454- @param f The function to run with the session *)
283283+ val jsont : t Jsont.t
284284+ (** JSON codec for users. *)
285285+end
552865656-(** {1 Feed Sources and Fetching} *)
287287+(** {1 Feed Quality Analysis} *)
572885858-type source = { name : string; url : string }
5959-(** The source of a feed. *)
289289+module Quality : sig
290290+ type t
291291+ (** Quality metrics for a feed or user's aggregated feed. *)
602926161-type feed
6262-(** An Atom, RSS2, or JSON Feed. *)
293293+ val make :
294294+ total_entries:int ->
295295+ entries_with_summary:int ->
296296+ entries_with_author:int ->
297297+ entries_with_date:int ->
298298+ entries_with_content:int ->
299299+ entries_with_tags:int ->
300300+ avg_content_length:float ->
301301+ min_content_length:int ->
302302+ max_content_length:int ->
303303+ posting_frequency_days:float option ->
304304+ quality_score:float ->
305305+ t
306306+ (** [make ~total_entries ...] creates quality metrics. *)
633076464-type post
6565-(** A post from a feed. *)
308308+ val total_entries : t -> int
309309+ val entries_with_summary : t -> int
310310+ val entries_with_author : t -> int
311311+ val entries_with_date : t -> int
312312+ val entries_with_content : t -> int
313313+ val entries_with_tags : t -> int
314314+ val avg_content_length : t -> float
315315+ val min_content_length : t -> int
316316+ val max_content_length : t -> int
317317+ val posting_frequency_days : t -> float option
318318+ val quality_score : t -> float
319319+ (** Accessors for quality metrics. *)
663206767-val fetch : session -> source -> feed
6868-(** [fetch session source] returns an Atom or RSS feed from a source.
321321+ val analyze : Syndic.Atom.entry list -> t
322322+ (** [analyze entries] computes quality metrics from Atom entries.
693237070- @param session The River session
7171- @param source The feed source to fetch *)
324324+ The quality score is a weighted average of:
325325+ - Content completeness (40%)
326326+ - Metadata completeness (30%)
327327+ - Content richness (30%) *)
328328+end
723297373-val name : feed -> string
7474-(** [name feed] is the name of the feed source passed to [fetch]. *)
330330+(** {1 State Management} *)
753317676-val url : feed -> string
7777-(** [url feed] is the url of the feed source passed to [fetch]. *)
332332+module State : sig
333333+ type t
334334+ (** State handle for managing user data and feeds on disk. *)
783357979-val posts : feed list -> post list
8080-(** [posts feeds] is the list of deduplicated posts of the given feeds. *)
336336+ val create :
337337+ < fs : Eio.Fs.dir_ty Eio.Path.t; .. > ->
338338+ app_name:string ->
339339+ t
340340+ (** [create env ~app_name] creates a state handle using XDG directories.
813418282-val feed : post -> feed
8383-(** [feed post] is the feed the post originates from. *)
342342+ Data is stored in:
343343+ - Users: $XDG_STATE_HOME/[app_name]/users/
344344+ - Feeds: $XDG_STATE_HOME/[app_name]/feeds/user/
843458585-val title : post -> string
8686-(** [title post] is the title of the post. *)
346346+ @param env The Eio environment with filesystem access
347347+ @param app_name Application name for XDG paths *)
873488888-val link : post -> Uri.t option
8989-(** [link post] is the link of the post. *)
349349+ (** {2 User Operations} *)
903509191-val date : post -> Syndic.Date.t option
9292-(** [date post] is the date of the post. *)
351351+ val create_user : t -> User.t -> (unit, string) result
352352+ (** [create_user state user] creates a new user.
933539494-val author : post -> string
9595-(** [author post] is the author of the post. *)
354354+ Returns [Error] if the user already exists. *)
963559797-val email : post -> string
9898-(** [email post] is the email of the post. *)
356356+ val delete_user : t -> username:string -> (unit, string) result
357357+ (** [delete_user state ~username] deletes a user and their feed data. *)
99358100100-val content : post -> string
101101-(** [content post] is the content of the post. *)
359359+ val get_user : t -> username:string -> User.t option
360360+ (** [get_user state ~username] retrieves a user by username. *)
102361103103-val id : post -> string
104104-(** [id post] is the unique identifier of the post. *)
362362+ val update_user : t -> User.t -> (unit, string) result
363363+ (** [update_user state user] saves updated user configuration. *)
105364106106-val tags : post -> string list
107107-(** [tags post] is the list of tags associated with the post. *)
365365+ val list_users : t -> string list
366366+ (** [list_users state] returns all usernames. *)
108367109109-val summary : post -> string option
110110-(** [summary post] is the summary/excerpt of the post, if available. *)
368368+ (** {2 Feed Operations} *)
111369112112-val meta_description : post -> string option
113113-(** [meta_description post] is the meta description of the post on the origin
114114- site.
370370+ val sync_user :
371371+ < clock : float Eio.Time.clock_ty Eio.Resource.t;
372372+ fs : Eio.Fs.dir_ty Eio.Path.t;
373373+ net : [ `Generic | `Unix ] Eio.Net.ty Eio.Resource.t; .. > ->
374374+ t ->
375375+ username:string ->
376376+ (unit, string) result
377377+ (** [sync_user env state ~username] fetches all feeds for the user and stores merged result.
115378116116- To get the meta description, we make get the content of [link post] and look
117117- for an HTML meta tag with the name "description" or "og:description".*)
379379+ Posts are fetched concurrently and merged with existing posts.
380380+ The result is stored as an Atom feed. *)
118381119119-val seo_image : post -> string option
120120-(** [seo_image post] is the image to be used by social networks and links to the
121121- post.
382382+ val sync_all :
383383+ < clock : float Eio.Time.clock_ty Eio.Resource.t;
384384+ fs : Eio.Fs.dir_ty Eio.Path.t;
385385+ net : [ `Generic | `Unix ] Eio.Net.ty Eio.Resource.t; .. > ->
386386+ t ->
387387+ (int * int, string) result
388388+ (** [sync_all env state] syncs all users concurrently.
122389123123- To get the seo image, we make get the content of [link post] and look for an
124124- HTML meta tag with the name "og:image" or "twitter:image". *)
390390+ Returns [Ok (success_count, fail_count)]. *)
125391126126-val create_atom_entries : post list -> Syndic.Atom.entry list
127127-(** [create_atom_feed posts] creates a list of atom entries, which can then be
128128- used to create an atom feed that is an aggregate of the posts. *)
392392+ val get_user_posts :
393393+ t ->
394394+ username:string ->
395395+ ?limit:int ->
396396+ unit ->
397397+ Syndic.Atom.entry list
398398+ (** [get_user_posts state ~username ()] retrieves stored posts for a user.
129399130130-(** {1 JSON Feed Support} *)
400400+ @param limit Optional maximum number of posts to return *)
131401132132-val create_jsonfeed_items : post list -> Jsonfeed.Item.t list
133133-(** [create_jsonfeed_items posts] creates a list of JSONFeed items from posts. *)
402402+ val get_all_posts :
403403+ t ->
404404+ ?limit:int ->
405405+ unit ->
406406+ (string * Syndic.Atom.entry) list
407407+ (** [get_all_posts state ()] retrieves posts from all users, sorted by date.
134408135135-val create_jsonfeed :
136136- title:string ->
137137- ?home_page_url:string ->
138138- ?feed_url:string ->
139139- ?description:string ->
140140- ?icon:string ->
141141- ?favicon:string ->
142142- post list ->
143143- Jsonfeed.t
144144-(** [create_jsonfeed ~title ?home_page_url ?feed_url ?description posts]
145145- creates a complete JSONFeed from the given posts.
409409+ Returns list of (username, entry) tuples.
410410+ @param limit Optional maximum number of posts to return *)
146411147147- @param title The feed title (required)
148148- @param home_page_url The URL of the website the feed represents
149149- @param feed_url The URL of the feed itself
150150- @param description A description of the feed
151151- @param icon URL of an icon for the feed (512x512 recommended)
152152- @param favicon URL of a favicon for the feed (64x64 recommended)
153153- @param posts The posts to include in the feed *)
412412+ (** {2 Export} *)
154413155155-val jsonfeed_to_string : ?minify:bool -> Jsonfeed.t -> (string, string) result
156156-(** [jsonfeed_to_string ?minify jsonfeed] serializes a JSONFeed to a string.
414414+ val export_merged_feed :
415415+ t ->
416416+ title:string ->
417417+ format:[ `Atom | `Jsonfeed ] ->
418418+ ?limit:int ->
419419+ unit ->
420420+ (string, string) result
421421+ (** [export_merged_feed state ~title ~format ()] exports a merged feed of all users.
157422158158- @param minify If true, output compact JSON; if false, pretty-print (default: false) *)
423423+ @param title Feed title
424424+ @param format Output format
425425+ @param limit Optional maximum number of entries *)
159426160160-type feed_content = Atom of Syndic.Atom.feed | Rss2 of Syndic.Rss2.channel | JSONFeed of Jsonfeed.t
161161-(** The native format of a feed. *)
427427+ (** {2 Analysis} *)
162428163163-val feed_content : feed -> feed_content
164164-(** [feed_content feed] returns the feed in its native format (Atom, RSS2, or JSONFeed).
165165- This allows access to format-specific features like JSONFeed attachments. *)
429429+ val analyze_user_quality :
430430+ t ->
431431+ username:string ->
432432+ (Quality.t, string) result
433433+ (** [analyze_user_quality state ~username] analyzes quality metrics for a user's feed. *)
434434+end
-656
stack/river/lib/river_store.ml
···11-(*
22- * Persistent storage for Atom feed entries using Cacheio and Jsonfeed
33- *)
44-55-let src = Logs.Src.create "river.store" ~doc:"River persistent storage"
66-module Log = (val Logs.src_log src : Logs.LOG)
77-88-(* Types *)
99-1010-(* Storage metadata that extends Jsonfeed.Item via unknown fields *)
1111-type storage_meta = {
1212- feed_url : string;
1313- feed_name : string;
1414- feed_title : string;
1515- stored_at : Ptime.t;
1616-}
1717-1818-(* A stored entry is a Jsonfeed.Item.t with storage metadata in unknown fields *)
1919-type stored_entry = {
2020- item : Jsonfeed.Item.t;
2121- meta : storage_meta;
2222-}
2323-2424-(* Stored entry accessors *)
2525-let entry_item entry = entry.item
2626-let entry_feed_url entry = entry.meta.feed_url
2727-let entry_feed_name entry = entry.meta.feed_name
2828-let entry_feed_title entry = entry.meta.feed_title
2929-let entry_stored_at entry = entry.meta.stored_at
3030-3131-type feed_info = {
3232- url : string;
3333- name : string;
3434- title : string;
3535- last_updated : Ptime.t;
3636- entry_count : int;
3737-}
3838-3939-type t = {
4040- cache : Cacheio.t;
4141- base_dir : Eio.Fs.dir_ty Eio.Path.t;
4242-}
4343-4444-(* Helper functions *)
4545-4646-let make_feed_key feed_url =
4747- (* Use SHA256 hash of feed URL as directory name for safety *)
4848- let hash = Digest.string feed_url |> Digest.to_hex in
4949- "feeds/" ^ hash
5050-5151-let make_entry_key feed_url atom_id =
5252- (* Store entry under feed directory with atom_id hash *)
5353- let feed_key = make_feed_key feed_url in
5454- let entry_hash = Digest.string atom_id |> Digest.to_hex in
5555- feed_key ^ "/entries/" ^ entry_hash
5656-5757-let make_feed_meta_key feed_url =
5858- let feed_key = make_feed_key feed_url in
5959- feed_key ^ "/meta.json"
6060-6161-(* JSON serialization using Jsonfeed and Jsont *)
6262-6363-(* Storage metadata codec - stores feed info and storage timestamp *)
6464-let storage_meta_jsont : storage_meta Jsont.t =
6565- Jsont.Object.(
6666- map ~kind:"StorageMeta" (fun feed_url feed_name feed_title stored_at : storage_meta ->
6767- { feed_url; feed_name; feed_title; stored_at })
6868- |> mem "x_river_feed_url" Jsont.string ~enc:(fun m -> m.feed_url)
6969- |> mem "x_river_feed_name" Jsont.string ~enc:(fun m -> m.feed_name)
7070- |> mem "x_river_feed_title" Jsont.string ~enc:(fun m -> m.feed_title)
7171- |> mem "x_river_stored_at" Jsonfeed.Rfc3339.jsont ~enc:(fun m -> m.stored_at)
7272- |> finish
7373- )
7474-7575-(* Codec for feed_info *)
7676-let feed_meta_jsont : feed_info Jsont.t =
7777- Jsont.Object.(
7878- map ~kind:"FeedInfo" (fun url name title last_updated entry_count : feed_info ->
7979- { url; name; title; last_updated; entry_count })
8080- |> mem "url" Jsont.string ~enc:(fun (m : feed_info) -> m.url)
8181- |> mem "name" Jsont.string ~enc:(fun m -> m.name)
8282- |> mem "title" Jsont.string ~enc:(fun m -> m.title)
8383- |> mem "last_updated" Jsonfeed.Rfc3339.jsont ~enc:(fun m -> m.last_updated)
8484- |> mem "entry_count" Jsont.int ~enc:(fun m -> m.entry_count)
8585- |> finish
8686- )
8787-8888-(* Helper to create item with storage metadata in unknown fields *)
8989-let merge_storage_meta item meta =
9090- let meta_json = Jsont_bytesrw.encode_string' storage_meta_jsont meta
9191- |> Result.get_ok in
9292- let meta_unknown = Jsont_bytesrw.decode_string' Jsont.json meta_json
9393- |> Result.get_ok in
9494- Jsonfeed.Item.create
9595- ~id:(Jsonfeed.Item.id item)
9696- ~content:(Jsonfeed.Item.content item)
9797- ?url:(Jsonfeed.Item.url item)
9898- ?external_url:(Jsonfeed.Item.external_url item)
9999- ?title:(Jsonfeed.Item.title item)
100100- ?summary:(Jsonfeed.Item.summary item)
101101- ?image:(Jsonfeed.Item.image item)
102102- ?banner_image:(Jsonfeed.Item.banner_image item)
103103- ?date_published:(Jsonfeed.Item.date_published item)
104104- ?date_modified:(Jsonfeed.Item.date_modified item)
105105- ?authors:(Jsonfeed.Item.authors item)
106106- ?tags:(Jsonfeed.Item.tags item)
107107- ?language:(Jsonfeed.Item.language item)
108108- ?attachments:(Jsonfeed.Item.attachments item)
109109- ?references:(Jsonfeed.Item.references item)
110110- ~unknown:meta_unknown
111111- ()
112112-113113-(* Helper to extract storage metadata from item's unknown fields *)
114114-let extract_storage_meta item =
115115- let unknown = Jsonfeed.Item.unknown item in
116116- let meta_str = Jsont_bytesrw.encode_string' Jsont.json unknown |> Result.get_ok in
117117- match Jsont_bytesrw.decode_string' storage_meta_jsont meta_str with
118118- | Ok meta -> meta
119119- | Error e -> failwith ("Missing storage metadata: " ^ Jsont.Error.to_string e)
120120-121121-(* Stored entry codec - just wraps Jsonfeed.Item.jsont *)
122122-let stored_entry_jsont : stored_entry Jsont.t =
123123- let kind = "StoredEntry" in
124124- let of_string s =
125125- match Jsont_bytesrw.decode_string' Jsonfeed.Item.jsont s with
126126- | Ok item -> Ok { item; meta = extract_storage_meta item }
127127- | Error e -> Error (Jsont.Error.to_string e)
128128- in
129129- let enc entry =
130130- let item_with_meta = merge_storage_meta entry.item entry.meta in
131131- match Jsont_bytesrw.encode_string' Jsonfeed.Item.jsont item_with_meta with
132132- | Ok s -> s
133133- | Error e -> failwith ("Failed to encode: " ^ Jsont.Error.to_string e)
134134- in
135135- Jsont.of_of_string ~kind of_string ~enc
136136-137137-(* Encode/decode functions *)
138138-let entry_to_string entry =
139139- match Jsont_bytesrw.encode_string' stored_entry_jsont entry with
140140- | Ok s -> s
141141- | Error err -> failwith ("Failed to encode entry: " ^ Jsont.Error.to_string err)
142142-143143-let entry_of_string s =
144144- match Jsont_bytesrw.decode_string' stored_entry_jsont s with
145145- | Ok entry -> entry
146146- | Error err -> failwith ("Failed to parse entry: " ^ Jsont.Error.to_string err)
147147-148148-let feed_meta_to_string meta =
149149- match Jsont_bytesrw.encode_string' feed_meta_jsont meta with
150150- | Ok s -> s
151151- | Error err -> failwith ("Failed to encode feed metadata: " ^ Jsont.Error.to_string err)
152152-153153-let feed_meta_of_string s =
154154- match Jsont_bytesrw.decode_string' feed_meta_jsont s with
155155- | Ok meta -> meta
156156- | Error err -> failwith ("Failed to parse feed metadata: " ^ Jsont.Error.to_string err)
157157-158158-(* Store creation *)
159159-160160-let create ~base_dir =
161161- let cache_dir = Eio.Path.(base_dir / "river_store") in
162162- (try
163163- Eio.Path.mkdir ~perm:0o755 cache_dir
164164- with Eio.Io (Eio.Fs.E (Already_exists _), _) -> ());
165165- let cache = Cacheio.create ~base_dir:cache_dir in
166166- Log.info (fun m -> m "Created River store at %a" Eio.Path.pp cache_dir);
167167- { cache; base_dir = cache_dir }
168168-169169-let create_with_xdge xdge =
170170- let cache = Cacheio.create_with_xdge xdge in
171171- let base_dir = Eio.Path.( / ) (Xdge.cache_dir xdge) "river_store" in
172172- Log.info (fun m -> m "Created River store with XDG at %a" Eio.Path.pp base_dir);
173173- { cache; base_dir }
174174-175175-(* Convert Post.t to Jsonfeed.Item.t *)
176176-let item_of_post ~feed_url ~feed_name ~feed_title (post : Post.t) =
177177- let content =
178178- let html = Soup.to_string post.content in
179179- `Html html
180180- in
181181- let url = Option.map Uri.to_string post.link in
182182- let authors =
183183- if post.author = "" then None
184184- else Some [Jsonfeed.Author.create ~name:post.author ()]
185185- in
186186- let tags = if post.tags = [] then None else Some post.tags in
187187- let item = Jsonfeed.Item.create
188188- ~id:post.id
189189- ~content
190190- ?url
191191- ?title:(if post.title = "" then None else Some post.title)
192192- ?summary:post.summary
193193- ?date_published:post.date
194194- ?date_modified:post.date
195195- ?authors
196196- ?tags
197197- ()
198198- in
199199- let meta = {
200200- feed_url;
201201- feed_name;
202202- feed_title;
203203- stored_at = Ptime.of_float_s (Unix.gettimeofday ()) |> Option.get;
204204- } in
205205- { item; meta }
206206-207207-(* Convert Syndic.Atom.entry to Jsonfeed.Item.t *)
208208-let item_of_atom ~feed_url ~feed_name ~feed_title (atom_entry : Syndic.Atom.entry) =
209209- let atom_id = Uri.to_string atom_entry.id in
210210- let date_modified = atom_entry.updated in
211211- let date_published = match atom_entry.published with
212212- | Some p -> Some p
213213- | None -> Some atom_entry.updated
214214- in
215215- (* Extract content *)
216216- let content_html = match atom_entry.content with
217217- | Some (Syndic.Atom.Text s) -> Some s
218218- | Some (Syndic.Atom.Html (_, s)) -> Some s
219219- | Some (Syndic.Atom.Xhtml (_, nodes)) ->
220220- let ns_prefix _ = Some "" in
221221- Some (String.concat "" (List.map (Syndic.XML.to_string ~ns_prefix) nodes))
222222- | Some (Syndic.Atom.Mime _) | Some (Syndic.Atom.Src _) | None -> None
223223- in
224224- let content_text = match atom_entry.summary with
225225- | Some s -> Some (Util.string_of_text_construct s)
226226- | None -> None
227227- in
228228- let content = match content_html, content_text with
229229- | Some h, Some t -> `Both (h, t)
230230- | Some h, None -> `Html h
231231- | None, Some t -> `Text t
232232- | None, None -> `Text "" (* Fallback *)
233233- in
234234- let url = try
235235- Some (Uri.to_string (List.find (fun l -> l.Syndic.Atom.rel = Syndic.Atom.Alternate) atom_entry.links).href)
236236- with Not_found ->
237237- match atom_entry.links with
238238- | l :: _ -> Some (Uri.to_string l.href)
239239- | [] -> None
240240- in
241241- let tags =
242242- let cat_tags = List.map (fun cat -> cat.Syndic.Atom.term) atom_entry.categories in
243243- if cat_tags = [] then None else Some cat_tags
244244- in
245245- let summary = match atom_entry.summary with
246246- | Some s -> Some (Util.string_of_text_construct s)
247247- | None -> None
248248- in
249249- let item = Jsonfeed.Item.create
250250- ~id:atom_id
251251- ~content
252252- ?url
253253- ~title:(Util.string_of_text_construct atom_entry.title)
254254- ?summary
255255- ?date_published
256256- ~date_modified
257257- ?tags
258258- ()
259259- in
260260- let meta = {
261261- feed_url;
262262- feed_name;
263263- feed_title;
264264- stored_at = Ptime.of_float_s (Unix.gettimeofday ()) |> Option.get;
265265- } in
266266- { item; meta }
267267-268268-(* Feed metadata management *)
269269-let update_feed_meta store ~feed_url ~feed_name ~feed_title ~sw:_ =
270270- let key = make_feed_meta_key feed_url in
271271- let meta = {
272272- url = feed_url;
273273- name = feed_name;
274274- title = feed_title;
275275- last_updated = Ptime.of_float_s (Unix.gettimeofday ()) |> Option.get;
276276- entry_count = 0;
277277- } in
278278- let json_str = feed_meta_to_string meta in
279279- let source = Eio.Flow.string_source json_str in
280280- Cacheio.put store.cache ~key ~source ~ttl:None ();
281281- Log.debug (fun m -> m "Updated feed metadata for %s" feed_url)
282282-283283-let get_feed_meta store ~feed_url ~sw =
284284- let key = make_feed_meta_key feed_url in
285285- match Cacheio.get store.cache ~key ~sw with
286286- | None -> None
287287- | Some source ->
288288- try
289289- let json_str = Eio.Buf_read.(parse_exn take_all) source ~max_size:Int.max_int in
290290- Some (feed_meta_of_string json_str)
291291- with e ->
292292- Log.err (fun m -> m "Failed to parse feed metadata: %s" (Printexc.to_string e));
293293- None
294294-295295-(* Entry storage *)
296296-297297-let store_entry store ~feed_url ~feed_name ~feed_title ~post ~sw =
298298- let entry = item_of_post ~feed_url ~feed_name ~feed_title post in
299299- let key = make_entry_key feed_url (Jsonfeed.Item.id entry.item) in
300300- let json_str = entry_to_string entry in
301301- let source = Eio.Flow.string_source json_str in
302302- Cacheio.put store.cache ~key ~source ~ttl:None ();
303303- Log.debug (fun m -> m "Stored entry %s for feed %s" (Jsonfeed.Item.id entry.item) feed_url);
304304- (* Update feed metadata *)
305305- update_feed_meta store ~feed_url ~feed_name ~feed_title ~sw
306306-307307-let store_posts store ~feed_url ~feed_name ~feed_title ~posts ~sw =
308308- Log.info (fun m -> m "Storing %d posts for feed %s" (List.length posts) feed_url);
309309- List.iter (fun post ->
310310- store_entry store ~feed_url ~feed_name ~feed_title ~post ~sw
311311- ) posts;
312312- Log.info (fun m -> m "Stored %d entries for feed %s" (List.length posts) feed_url)
313313-314314-let store_atom_entries store ~feed_url ~feed_name ~feed_title ~entries ~sw =
315315- Log.info (fun m -> m "Storing %d Atom entries for feed %s" (List.length entries) feed_url);
316316- List.iter (fun atom_entry ->
317317- let entry = item_of_atom ~feed_url ~feed_name ~feed_title atom_entry in
318318- let key = make_entry_key feed_url (Jsonfeed.Item.id entry.item) in
319319- let json_str = entry_to_string entry in
320320- let source = Eio.Flow.string_source json_str in
321321- Cacheio.put store.cache ~key ~source ~ttl:None ();
322322- Log.debug (fun m -> m "Stored Atom entry %s" (Jsonfeed.Item.id entry.item));
323323- ) entries;
324324- update_feed_meta store ~feed_url ~feed_name ~feed_title ~sw;
325325- Log.info (fun m -> m "Stored %d Atom entries for feed %s" (List.length entries) feed_url)
326326-327327-(* Entry retrieval *)
328328-329329-let get_entry store ~feed_url ~atom_id ~sw =
330330- let key = make_entry_key feed_url atom_id in
331331- match Cacheio.get store.cache ~key ~sw with
332332- | None -> None
333333- | Some source ->
334334- try
335335- let json_str = Eio.Buf_read.(parse_exn take_all) source ~max_size:Int.max_int in
336336- Some (entry_of_string json_str)
337337- with e ->
338338- Log.err (fun m -> m "Failed to parse entry: %s" (Printexc.to_string e));
339339- None
340340-341341-let list_entries store ~feed_url =
342342- let feed_key = make_feed_key feed_url in
343343- let prefix = feed_key ^ "/entries/" in
344344- let entries = Cacheio.scan store.cache in
345345- let feed_entries = List.filter_map (fun (cache_entry : Cacheio.Entry.t) ->
346346- let key = Cacheio.Entry.key cache_entry in
347347- if String.starts_with ~prefix key then
348348- Eio.Switch.run @@ fun sw ->
349349- match Cacheio.get store.cache ~key ~sw with
350350- | None -> None
351351- | Some source ->
352352- try
353353- let json_str = Eio.Buf_read.(parse_exn take_all) source ~max_size:Int.max_int in
354354- Some (entry_of_string json_str)
355355- with e ->
356356- Log.err (fun m -> m "Failed to parse entry from scan: %s" (Printexc.to_string e));
357357- None
358358- else None
359359- ) entries in
360360- (* Sort by date_modified, newest first *)
361361- List.sort (fun a b ->
362362- let time_a = Jsonfeed.Item.date_modified a.item |> Option.value ~default:a.meta.stored_at in
363363- let time_b = Jsonfeed.Item.date_modified b.item |> Option.value ~default:b.meta.stored_at in
364364- Ptime.compare time_b time_a
365365- ) feed_entries
366366-367367-let list_entries_filtered store ~feed_url ?since ?until ?limit ?(sort=`Updated) () =
368368- let entries = list_entries store ~feed_url in
369369- (* Filter by time *)
370370- let entries = match since with
371371- | None -> entries
372372- | Some t -> List.filter (fun e ->
373373- let time = Jsonfeed.Item.date_modified e.item |> Option.value ~default:e.meta.stored_at in
374374- Ptime.is_later time ~than:t || Ptime.equal time t) entries
375375- in
376376- let entries = match until with
377377- | None -> entries
378378- | Some t -> List.filter (fun e ->
379379- let time = Jsonfeed.Item.date_modified e.item |> Option.value ~default:e.meta.stored_at in
380380- Ptime.is_earlier time ~than:t || Ptime.equal time t) entries
381381- in
382382- (* Sort *)
383383- let entries = match sort with
384384- | `Published -> List.sort (fun a b ->
385385- let pa = Jsonfeed.Item.date_published a.item in
386386- let pb = Jsonfeed.Item.date_published b.item in
387387- match pa, pb with
388388- | Some ta, Some tb -> Ptime.compare tb ta
389389- | None, Some _ -> 1
390390- | Some _, None -> -1
391391- | None, None ->
392392- let ta = Jsonfeed.Item.date_modified a.item |> Option.value ~default:a.meta.stored_at in
393393- let tb = Jsonfeed.Item.date_modified b.item |> Option.value ~default:b.meta.stored_at in
394394- Ptime.compare tb ta
395395- ) entries
396396- | `Updated -> List.sort (fun a b ->
397397- let ta = Jsonfeed.Item.date_modified a.item |> Option.value ~default:a.meta.stored_at in
398398- let tb = Jsonfeed.Item.date_modified b.item |> Option.value ~default:b.meta.stored_at in
399399- Ptime.compare tb ta
400400- ) entries
401401- | `Stored -> List.sort (fun a b -> Ptime.compare b.meta.stored_at a.meta.stored_at) entries
402402- in
403403- (* Limit *)
404404- match limit with
405405- | None -> entries
406406- | Some n -> List.filteri (fun i _ -> i < n) entries
407407-408408-let exists_entry store ~feed_url ~atom_id =
409409- let key = make_entry_key feed_url atom_id in
410410- Cacheio.exists store.cache ~key
411411-412412-let get_recent_entries store ?(limit=50) () =
413413- let entries = Cacheio.scan store.cache in
414414- let all_entries = List.filter_map (fun (cache_entry : Cacheio.Entry.t) ->
415415- let key = Cacheio.Entry.key cache_entry in
416416- if String.contains key '/' &&
417417- String.ends_with ~suffix:"entries/" (String.sub key 0 (String.rindex key '/') ^ "/") then
418418- Eio.Switch.run @@ fun sw ->
419419- match Cacheio.get store.cache ~key ~sw with
420420- | None -> None
421421- | Some source ->
422422- try
423423- let json_str = Eio.Buf_read.(parse_exn take_all) source ~max_size:Int.max_int in
424424- Some (entry_of_string json_str)
425425- with e ->
426426- Log.err (fun m -> m "Failed to parse entry: %s" (Printexc.to_string e));
427427- None
428428- else None
429429- ) entries in
430430- let sorted = List.sort (fun a b ->
431431- let ta = Jsonfeed.Item.date_modified a.item |> Option.value ~default:a.meta.stored_at in
432432- let tb = Jsonfeed.Item.date_modified b.item |> Option.value ~default:b.meta.stored_at in
433433- Ptime.compare tb ta
434434- ) all_entries in
435435- List.filteri (fun i _ -> i < limit) sorted
436436-437437-let find_entry_by_id store ~id =
438438- Log.debug (fun m -> m "Searching for entry with ID: %s" id);
439439- let entries = Cacheio.scan store.cache in
440440- let matching_entry = List.find_map (fun (cache_entry : Cacheio.Entry.t) ->
441441- let key = Cacheio.Entry.key cache_entry in
442442- if String.contains key '/' &&
443443- String.ends_with ~suffix:"entries/" (String.sub key 0 (String.rindex key '/') ^ "/") then
444444- Eio.Switch.run @@ fun sw ->
445445- match Cacheio.get store.cache ~key ~sw with
446446- | None -> None
447447- | Some source ->
448448- (try
449449- let json_str = Eio.Buf_read.(parse_exn take_all) source ~max_size:Int.max_int in
450450- let entry = entry_of_string json_str in
451451- (* Exact ID match only *)
452452- if Jsonfeed.Item.id entry.item = id then
453453- Some entry
454454- else
455455- None
456456- with e ->
457457- Log.err (fun m -> m "Failed to parse entry: %s" (Printexc.to_string e));
458458- None)
459459- else None
460460- ) entries in
461461- (match matching_entry with
462462- | Some e -> Log.debug (fun m -> m "Found entry: %s"
463463- (Jsonfeed.Item.title e.item |> Option.value ~default:"(no title)"))
464464- | None -> Log.debug (fun m -> m "No entry found with ID: %s" id));
465465- matching_entry
466466-467467-(* Entry management *)
468468-469469-let delete_entry store ~feed_url ~atom_id =
470470- let key = make_entry_key feed_url atom_id in
471471- Cacheio.delete store.cache ~key;
472472- Log.info (fun m -> m "Deleted entry %s from feed %s" atom_id feed_url)
473473-474474-let delete_feed store ~feed_url =
475475- let feed_key = make_feed_key feed_url in
476476- let prefix = feed_key ^ "/" in
477477- let entries = Cacheio.scan store.cache in
478478- let count = ref 0 in
479479- List.iter (fun (cache_entry : Cacheio.Entry.t) ->
480480- let key = Cacheio.Entry.key cache_entry in
481481- if String.starts_with ~prefix key then begin
482482- Cacheio.delete store.cache ~key;
483483- incr count
484484- end
485485- ) entries;
486486- Log.info (fun m -> m "Deleted feed %s (%d entries)" feed_url !count)
487487-488488-let prune_entries store ~feed_url ~keep =
489489- let entries = list_entries store ~feed_url in
490490- let to_delete = List.filteri (fun i _ -> i >= keep) entries in
491491- List.iter (fun entry ->
492492- delete_entry store ~feed_url ~atom_id:(Jsonfeed.Item.id entry.item)
493493- ) to_delete;
494494- let deleted = List.length to_delete in
495495- Log.info (fun m -> m "Pruned %d entries from feed %s (kept %d)" deleted feed_url keep);
496496- deleted
497497-498498-let prune_old_entries store ~feed_url ~older_than =
499499- let entries = list_entries store ~feed_url in
500500- let to_delete = List.filter (fun e ->
501501- let time = Jsonfeed.Item.date_modified e.item |> Option.value ~default:e.meta.stored_at in
502502- Ptime.is_earlier time ~than:older_than
503503- ) entries in
504504- List.iter (fun entry ->
505505- delete_entry store ~feed_url ~atom_id:(Jsonfeed.Item.id entry.item)
506506- ) to_delete;
507507- let deleted = List.length to_delete in
508508- Log.info (fun m -> m "Pruned %d old entries from feed %s" deleted feed_url);
509509- deleted
510510-511511-(* Feed information *)
512512-513513-let list_feeds store =
514514- let feed_entries = Cacheio.scan store.cache in
515515- let feed_metas = List.filter_map (fun (cache_entry : Cacheio.Entry.t) ->
516516- let key = Cacheio.Entry.key cache_entry in
517517- if String.ends_with ~suffix:"/meta.json" key then
518518- Eio.Switch.run @@ fun sw ->
519519- match Cacheio.get store.cache ~key ~sw with
520520- | None -> None
521521- | Some source ->
522522- try
523523- let json_str = Eio.Buf_read.(parse_exn take_all) source ~max_size:Int.max_int in
524524- Some (feed_meta_of_string json_str)
525525- with e ->
526526- Log.err (fun m -> m "Failed to parse feed metadata: %s" (Printexc.to_string e));
527527- None
528528- else None
529529- ) feed_entries in
530530- (* Count entries for each feed *)
531531- List.map (fun meta ->
532532- let entries = list_entries store ~feed_url:meta.url in
533533- { meta with entry_count = List.length entries }
534534- ) feed_metas
535535-536536-let get_feed_info store ~feed_url =
537537- Eio.Switch.run @@ fun sw ->
538538- match get_feed_meta store ~feed_url ~sw with
539539- | None -> None
540540- | Some meta ->
541541- let entries = list_entries store ~feed_url in
542542- Some { meta with entry_count = List.length entries }
543543-544544-let stats store =
545545- Cacheio.stats store.cache
546546-547547-(* Maintenance *)
548548-549549-let expire store =
550550- Cacheio.expire store.cache
551551-552552-let compact _store =
553553- (* TODO: Implement compaction logic *)
554554- Log.info (fun m -> m "Compaction not yet implemented")
555555-556556-(* Export/Import *)
557557-558558-let export_to_atom store ~feed_url ?title ?limit () =
559559- let entries = match limit with
560560- | None -> list_entries store ~feed_url
561561- | Some n -> list_entries_filtered store ~feed_url ~limit:n ()
562562- in
563563- let atom_entries = List.map (fun entry ->
564564- let item = entry.item in
565565- let id = Uri.of_string (Jsonfeed.Item.id item) in
566566- let entry_title : Syndic.Atom.text_construct =
567567- Syndic.Atom.Text (Jsonfeed.Item.title item |> Option.value ~default:"(no title)") in
568568- let links = match Jsonfeed.Item.url item with
569569- | Some url_str -> [Syndic.Atom.link ~rel:Syndic.Atom.Alternate (Uri.of_string url_str)]
570570- | None -> []
571571- in
572572- let content_str = match Jsonfeed.Item.content item with
573573- | `Html h -> h
574574- | `Text t -> t
575575- | `Both (h, _) -> h
576576- in
577577- let entry_content : Syndic.Atom.content = Syndic.Atom.Html (None, content_str) in
578578- let author_name = match Jsonfeed.Item.authors item with
579579- | Some (a :: _) -> Jsonfeed.Author.name a |> Option.value ~default:entry.meta.feed_name
580580- | _ -> entry.meta.feed_name
581581- in
582582- let author = Syndic.Atom.author author_name in
583583- let authors = (author, []) in
584584- let updated = Jsonfeed.Item.date_modified item |> Option.value ~default:entry.meta.stored_at in
585585- Syndic.Atom.entry ~id ~title:entry_title ~updated
586586- ?published:(Jsonfeed.Item.date_published item)
587587- ~links ~content:entry_content ~authors ()
588588- ) entries in
589589- let feed_title : Syndic.Atom.text_construct = match title with
590590- | Some t -> Syndic.Atom.Text t
591591- | None -> Syndic.Atom.Text ("Archive: " ^ feed_url)
592592- in
593593- let feed_id = Uri.of_string ("urn:river:archive:" ^ (Digest.string feed_url |> Digest.to_hex)) in
594594- let feed_updated = match entries with
595595- | [] -> Ptime.of_float_s (Unix.gettimeofday ()) |> Option.get
596596- | e :: _ -> Jsonfeed.Item.date_modified e.item |> Option.value ~default:e.meta.stored_at
597597- in
598598- {
599599- Syndic.Atom.id = feed_id;
600600- title = feed_title;
601601- updated = feed_updated;
602602- entries = atom_entries;
603603- authors = [];
604604- categories = [];
605605- contributors = [];
606606- generator = Some {
607607- Syndic.Atom.version = Some "1.0";
608608- uri = None;
609609- content = "River Store";
610610- };
611611- icon = None;
612612- links = [];
613613- logo = None;
614614- rights = None;
615615- subtitle = None;
616616- }
617617-618618-let import_from_atom store ~feed_url ~feed_name ~feed ~sw =
619619- let entries = feed.Syndic.Atom.entries in
620620- store_atom_entries store ~feed_url ~feed_name ~feed_title:(Util.string_of_text_construct feed.title) ~entries ~sw;
621621- List.length entries
622622-623623-(* Pretty printing *)
624624-625625-let pp_entry fmt entry =
626626- let item = entry.item in
627627- Format.fprintf fmt "@[<v 2>Entry:@,";
628628- Format.fprintf fmt "ID: %s@," (Jsonfeed.Item.id item);
629629- Format.fprintf fmt "Title: %s@," (Jsonfeed.Item.title item |> Option.value ~default:"(no title)");
630630- Format.fprintf fmt "URL: %s@," (Jsonfeed.Item.url item |> Option.value ~default:"(none)");
631631- (match Jsonfeed.Item.date_published item with
632632- | Some t -> Format.fprintf fmt "Published: %s@," (Ptime.to_rfc3339 t)
633633- | None -> ());
634634- (match Jsonfeed.Item.date_modified item with
635635- | Some t -> Format.fprintf fmt "Modified: %s@," (Ptime.to_rfc3339 t)
636636- | None -> ());
637637- Format.fprintf fmt "Feed: %s (%s)@," entry.meta.feed_name entry.meta.feed_url;
638638- Format.fprintf fmt "Stored: %s@]" (Ptime.to_rfc3339 entry.meta.stored_at)
639639-640640-let pp_feed_info fmt info =
641641- Format.fprintf fmt "@[<v 2>Feed:@,";
642642- Format.fprintf fmt "Name: %s@," info.name;
643643- Format.fprintf fmt "Title: %s@," info.title;
644644- Format.fprintf fmt "URL: %s@," info.url;
645645- Format.fprintf fmt "Last updated: %s@," (Ptime.to_rfc3339 info.last_updated);
646646- Format.fprintf fmt "Entries: %d@]" info.entry_count
647647-648648-let pp fmt store =
649649- let feeds = list_feeds store in
650650- Format.fprintf fmt "@[<v 2>River Store:@,";
651651- Format.fprintf fmt "Base dir: %a@," Eio.Path.pp store.base_dir;
652652- Format.fprintf fmt "Feeds: %d@," (List.length feeds);
653653- List.iter (fun feed ->
654654- Format.fprintf fmt " - %s: %d entries@," feed.name feed.entry_count
655655- ) feeds;
656656- Format.fprintf fmt "@]"
-218
stack/river/lib/river_store.mli
···11-(** Persistent storage for Atom feed entries using Cacheio
22-33- River_store provides a persistent, per-feed storage system for Atom entries,
44- enabling long-term archival of feed items that may have expired upstream.
55-66- {2 Key Features}
77-88- - {b Per-feed storage}: Each feed's entries stored independently
99- - {b Atom ID keying}: Entries keyed by their unique Atom ID
1010- - {b URL resolution}: Resolves all URLs relative to feed base URI
1111- - {b Persistent caching}: Built on Cacheio for reliable file storage
1212- - {b Entry management}: List, update, delete, and prune operations
1313- - {b Metadata tracking}: Stores feed source, timestamps, and relationships
1414-1515- {2 Usage Example}
1616-1717- {[
1818- let store = River_store.create ~base_dir:store_dir in
1919-2020- (* Store entries from a feed *)
2121- Eio.Switch.run @@ fun sw ->
2222- let feed = River.fetch env source in
2323- let posts = River.posts [feed] in
2424- River_store.store_posts store ~feed_url:source.url ~posts ~sw ();
2525-2626- (* List entries for a feed *)
2727- let entries = River_store.list_entries store ~feed_url:source.url in
2828- List.iter (fun entry ->
2929- Printf.printf "%s: %s\n" entry.atom_id entry.title
3030- ) entries;
3131-3232- (* Get a specific entry *)
3333- match River_store.get_entry store ~feed_url:source.url ~atom_id:"..." ~sw with
3434- | Some entry -> (* Use entry *)
3535- | None -> (* Not found *)
3636- ]} *)
3737-3838-(** {1 Core Types} *)
3939-4040-(** Abstract type representing the store *)
4141-type t
4242-4343-(** Stored entry - combines Jsonfeed.Item with storage metadata *)
4444-type stored_entry
4545-4646-(** {2 Stored Entry Accessors} *)
4747-4848-val entry_item : stored_entry -> Jsonfeed.Item.t
4949-(** Get the underlying Jsonfeed Item *)
5050-5151-val entry_feed_url : stored_entry -> string
5252-(** Get the source feed URL *)
5353-5454-val entry_feed_name : stored_entry -> string
5555-(** Get the source feed name *)
5656-5757-val entry_feed_title : stored_entry -> string
5858-(** Get the source feed title *)
5959-6060-val entry_stored_at : stored_entry -> Ptime.t
6161-(** Get the storage timestamp *)
6262-6363-(** Feed metadata *)
6464-type feed_info = {
6565- url : string;
6666- (** Feed URL *)
6767-6868- name : string;
6969- (** Feed name/label *)
7070-7171- title : string;
7272- (** Feed title from metadata *)
7373-7474- last_updated : Ptime.t;
7575- (** Last time feed was synced *)
7676-7777- entry_count : int;
7878- (** Number of stored entries *)
7979-}
8080-8181-(** {1 Store Creation} *)
8282-8383-(** Create a store at the specified base directory *)
8484-val create : base_dir:Eio.Fs.dir_ty Eio.Path.t -> t
8585-8686-(** Create a store using an Xdge context for XDG-compliant paths *)
8787-val create_with_xdge : Xdge.t -> t
8888-8989-(** {1 Entry Storage} *)
9090-9191-(** Store a single post entry from a feed *)
9292-val store_entry :
9393- t ->
9494- feed_url:string ->
9595- feed_name:string ->
9696- feed_title:string ->
9797- post:Post.t ->
9898- sw:Eio.Switch.t ->
9999- unit
100100-101101-(** Store multiple posts from a feed *)
102102-val store_posts :
103103- t ->
104104- feed_url:string ->
105105- feed_name:string ->
106106- feed_title:string ->
107107- posts:Post.t list ->
108108- sw:Eio.Switch.t ->
109109- unit
110110-111111-(** Store entries directly from Syndic.Atom.entry list *)
112112-val store_atom_entries :
113113- t ->
114114- feed_url:string ->
115115- feed_name:string ->
116116- feed_title:string ->
117117- entries:Syndic.Atom.entry list ->
118118- sw:Eio.Switch.t ->
119119- unit
120120-121121-(** {1 Entry Retrieval} *)
122122-123123-(** Get a specific entry by Atom ID *)
124124-val get_entry :
125125- t ->
126126- feed_url:string ->
127127- atom_id:string ->
128128- sw:Eio.Switch.t ->
129129- stored_entry option
130130-131131-(** List all entries for a feed *)
132132-val list_entries : t -> feed_url:string -> stored_entry list
133133-134134-(** List entries with filtering and sorting options *)
135135-val list_entries_filtered :
136136- t ->
137137- feed_url:string ->
138138- ?since:Ptime.t ->
139139- ?until:Ptime.t ->
140140- ?limit:int ->
141141- ?sort:[`Published | `Updated | `Stored] ->
142142- unit ->
143143- stored_entry list
144144-145145-(** Check if an entry exists *)
146146-val exists_entry : t -> feed_url:string -> atom_id:string -> bool
147147-148148-(** Get the most recent entries across all feeds *)
149149-val get_recent_entries : t -> ?limit:int -> unit -> stored_entry list
150150-151151-(** Find an entry by ID across all feeds (searches by atom_id) *)
152152-val find_entry_by_id : t -> id:string -> stored_entry option
153153-154154-(** {1 Entry Management} *)
155155-156156-(** Delete a specific entry *)
157157-val delete_entry : t -> feed_url:string -> atom_id:string -> unit
158158-159159-(** Delete all entries for a feed *)
160160-val delete_feed : t -> feed_url:string -> unit
161161-162162-(** Prune old entries (keep most recent N per feed) *)
163163-val prune_entries : t -> feed_url:string -> keep:int -> int
164164-(** Returns number of entries deleted *)
165165-166166-(** Prune entries older than a given time *)
167167-val prune_old_entries : t -> feed_url:string -> older_than:Ptime.t -> int
168168-(** Returns number of entries deleted *)
169169-170170-(** {1 Feed Information} *)
171171-172172-(** List all feeds that have stored entries *)
173173-val list_feeds : t -> feed_info list
174174-175175-(** Get information about a specific feed *)
176176-val get_feed_info : t -> feed_url:string -> feed_info option
177177-178178-(** Get statistics about the store *)
179179-val stats : t -> Cacheio.Stats.t
180180-181181-(** {1 Maintenance} *)
182182-183183-(** Clean up expired entries (respects TTL if set) *)
184184-val expire : t -> int
185185-(** Returns number of entries expired *)
186186-187187-(** Compact storage (remove duplicate/orphaned data) *)
188188-val compact : t -> unit
189189-190190-(** Export entries to an Atom feed *)
191191-val export_to_atom :
192192- t ->
193193- feed_url:string ->
194194- ?title:string ->
195195- ?limit:int ->
196196- unit ->
197197- Syndic.Atom.feed
198198-199199-(** Import entries from an Atom feed *)
200200-val import_from_atom :
201201- t ->
202202- feed_url:string ->
203203- feed_name:string ->
204204- feed:Syndic.Atom.feed ->
205205- sw:Eio.Switch.t ->
206206- int
207207-(** Returns number of entries imported *)
208208-209209-(** {1 Pretty Printing} *)
210210-211211-(** Pretty printer for stored entries *)
212212-val pp_entry : Format.formatter -> stored_entry -> unit
213213-214214-(** Pretty printer for feed info *)
215215-val pp_feed_info : Format.formatter -> feed_info -> unit
216216-217217-(** Pretty printer for the store *)
218218-val pp : Format.formatter -> t -> unit
-33
stack/river/lib/util.ml
···11-(*
22- * Copyright (c) 2014, OCaml.org project
33- * Copyright (c) 2015 KC Sivaramakrishnan <sk826@cl.cam.ac.uk>
44- *
55- * Permission to use, copy, modify, and distribute this software for any
66- * purpose with or without fee is hereby granted, provided that the above
77- * copyright notice and this permission notice appear in all copies.
88- *
99- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
1010- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
1111- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
1212- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
1313- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
1414- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
1515- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
1616- *)
1717-1818-open Syndic
1919-2020-(* Remove all tags *)
2121-let rec syndic_to_buffer b = function
2222- | XML.Node (_, _, subs) -> List.iter (syndic_to_buffer b) subs
2323- | XML.Data (_, d) -> Buffer.add_string b d
2424-2525-let syndic_to_string x =
2626- let b = Buffer.create 1024 in
2727- List.iter (syndic_to_buffer b) x;
2828- Buffer.contents b
2929-3030-let string_of_text_construct : Atom.text_construct -> string = function
3131- (* FIXME: we probably would like to parse the HTML and remove the tags *)
3232- | Atom.Text s | Atom.Html (_, s) -> s
3333- | Atom.Xhtml (_, x) -> syndic_to_string x
+9-10
stack/river/test/test_eio_river.ml
···11(* Test the Eio-based River library *)
2233let test_sources =
44- River.
54 [
66- { name = "OCaml Planet"; url = "https://ocaml.org/feed.xml" };
55+ River.Source.make ~name:"OCaml Planet" ~url:"https://ocaml.org/feed.xml";
76 ]
8798let main env =
109 Printf.printf "Testing River library with Eio and Requests...\n";
11101212- (* Use River.with_session for proper resource management *)
1313- River.with_session env @@ fun session ->
1111+ (* Use River.Session.with_session for proper resource management *)
1212+ River.Session.with_session env @@ fun session ->
1413 (* Test fetching feeds *)
1514 let feeds =
1615 try
1717- List.map (River.fetch session) test_sources
1616+ List.map (River.Feed.fetch session) test_sources
1817 with
1918 | Failure msg ->
2019 Printf.printf "Error: %s\n" msg;
···2827 Printf.printf "Successfully fetched %d feed(s)\n" (List.length feeds);
29283029 (* Get posts from feeds *)
3131- let posts = River.posts feeds in
3030+ let posts = River.Post.of_feeds feeds in
3231 Printf.printf "Found %d posts\n" (List.length posts);
33323433 (* Show first 3 posts *)
···40394140 List.iteri (fun i post ->
4241 Printf.printf "\nPost %d:\n" (i + 1);
4343- Printf.printf " Title: %s\n" (River.title post);
4444- Printf.printf " Author: %s\n" (River.author post);
4242+ Printf.printf " Title: %s\n" (River.Post.title post);
4343+ Printf.printf " Author: %s\n" (River.Post.author post);
4544 Printf.printf " Date: %s\n"
4646- (match River.date post with
4545+ (match River.Post.date post with
4746 | Some _ -> "Date available" (* Syndic.Date doesn't have to_string *)
4847 | None -> "N/A");
4948 Printf.printf " Link: %s\n"
5050- (match River.link post with
4949+ (match River.Post.link post with
5150 | Some uri -> Uri.to_string uri
5251 | None -> "N/A")
5352 ) first_posts
+6-7
stack/river/test/test_logging.ml
···1818 Printf.printf "---\n\n"
19192020let test_sources =
2121- River.
2221 [
2323- { name = "Test Feed"; url = "https://example.com/feed.xml" };
2222+ River.Source.make ~name:"Test Feed" ~url:"https://example.com/feed.xml";
2423 ]
25242625let main env =
2726 (* Test with logging *)
2827 Printf.printf "Testing River library with logging...\n\n";
29283030- (* Use River.with_session for proper resource management *)
3131- River.with_session env @@ fun session ->
2929+ (* Use River.Session.with_session for proper resource management *)
3030+ River.Session.with_session env @@ fun session ->
3231 (* Demonstrate fetching with logging *)
3332 let feeds =
3433 try
3535- List.map (River.fetch session) test_sources
3434+ List.map (River.Feed.fetch session) test_sources
3635 with
3736 | Failure msg ->
3837 Printf.printf "Expected error (for demo): %s\n" msg;
···44434544 if feeds <> [] then begin
4645 (* This would show post aggregation logs *)
4747- let posts = River.posts feeds in
4646+ let posts = River.Post.of_feeds feeds in
4847 Printf.printf "\nFound %d posts\n" (List.length posts);
49485049 (* This would show Atom entry creation logs *)
5151- let _entries = River.create_atom_entries posts in
5050+ let _entries = River.Format.Atom.entries_of_posts posts in
5251 Printf.printf "Created Atom entries\n"
5352 end
5453