this repo has no description
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

sync

+2964 -2959
+1 -1
stack/river/bin/dune
··· 1 1 (executable 2 2 (public_name river-cli) 3 3 (name river_cli) 4 - (libraries river cmdliner jsont jsont.bytesrw fmt fmt.tty fmt.cli eio eio_main eiocmd unix ptime syndic xdge)) 4 + (libraries river river_cmd cmdliner jsont jsont.bytesrw fmt fmt.tty fmt.cli eio eio_main eiocmd unix ptime syndic xdge))
+2 -795
stack/river/bin/river_cli.ml
··· 1 - (* Logging setup *) 2 - let src = Logs.Src.create "river-cli" ~doc:"River CLI application" 3 - module Log = (val Logs.src_log src : Logs.LOG) 4 - 5 - (* Types *) 6 - type user = { 7 - username : string; 8 - fullname : string; 9 - email : string option; 10 - feeds : River.source list; 11 - last_synced : string option; 12 - } 13 - 14 - type state = { 15 - xdg : Xdge.t; 16 - } 17 - 18 - (* State directory management *) 19 - module State = struct 20 - let users_dir state = Eio.Path.(Xdge.state_dir state.xdg / "users") 21 - let feeds_dir state = Eio.Path.(Xdge.state_dir state.xdg / "feeds") 22 - let user_feeds_dir state = Eio.Path.(feeds_dir state / "user") 23 - 24 - let user_file state username = 25 - Eio.Path.(users_dir state / (username ^ ".json")) 26 - 27 - let user_feed_file state username = 28 - Eio.Path.(user_feeds_dir state / (username ^ ".xml")) 29 - 30 - let ensure_directories state = 31 - let dirs = [ 32 - users_dir state; 33 - feeds_dir state; 34 - user_feeds_dir state; 35 - ] in 36 - List.iter (fun dir -> 37 - try Eio.Path.mkdir ~perm:0o755 dir 38 - with Eio.Io (Eio.Fs.E (Already_exists _), _) -> () 39 - ) dirs 40 - 41 - (* JSON codecs for user data *) 42 - 43 - (* Codec for River.source (feed) *) 44 - let source_jsont = 45 - let make name url = { River.name; url } in 46 - Jsont.Object.map ~kind:"Source" make 47 - |> Jsont.Object.mem "name" Jsont.string ~enc:(fun s -> s.River.name) 48 - |> Jsont.Object.mem "url" Jsont.string ~enc:(fun s -> s.River.url) 49 - |> Jsont.Object.finish 50 - 51 - (* Codec for user *) 52 - let user_jsont = 53 - let make username fullname email feeds last_synced = 54 - { username; fullname; email; feeds; last_synced } 55 - in 56 - Jsont.Object.map ~kind:"User" make 57 - |> Jsont.Object.mem "username" Jsont.string ~enc:(fun u -> u.username) 58 - |> Jsont.Object.mem "fullname" Jsont.string ~enc:(fun u -> u.fullname) 59 - |> Jsont.Object.opt_mem "email" Jsont.string ~enc:(fun u -> u.email) 60 - |> Jsont.Object.mem "feeds" (Jsont.list source_jsont) ~enc:(fun u -> u.feeds) 61 - |> Jsont.Object.opt_mem "last_synced" Jsont.string ~enc:(fun u -> u.last_synced) 62 - |> Jsont.Object.finish 63 - 64 - let user_of_string s = 65 - match Jsont_bytesrw.decode_string' user_jsont s with 66 - | Ok user -> Some user 67 - | Error err -> 68 - Log.err (fun m -> m "Failed to parse user JSON: %s" (Jsont.Error.to_string err)); 69 - None 70 - 71 - let user_to_string user = 72 - match Jsont_bytesrw.encode_string' ~format:Jsont.Indent user_jsont user with 73 - | Ok s -> s 74 - | Error err -> failwith ("Failed to encode user: " ^ Jsont.Error.to_string err) 75 - 76 - let load_user state username = 77 - let file = user_file state username in 78 - try 79 - let content = Eio.Path.load file in 80 - user_of_string content 81 - with 82 - | Eio.Io (Eio.Fs.E (Not_found _), _) -> None 83 - | e -> 84 - Log.err (fun m -> m "Error loading user %s: %s" username (Printexc.to_string e)); 85 - None 86 - 87 - let save_user state user = 88 - let file = user_file state user.username in 89 - let json = user_to_string user in 90 - Eio.Path.save ~create:(`Or_truncate 0o644) file json 91 - 92 - let list_users state = 93 - try 94 - Eio.Path.read_dir (users_dir state) 95 - |> List.filter_map (fun name -> 96 - if Filename.check_suffix name ".json" then 97 - Some (Filename.chop_suffix name ".json") 98 - else None 99 - ) 100 - with _ -> [] 101 - 102 - let load_existing_posts state username = 103 - let file = user_feed_file state username in 104 - try 105 - let content = Eio.Path.load file in 106 - (* Parse existing Atom feed *) 107 - let input = Xmlm.make_input (`String (0, content)) in 108 - let feed = Syndic.Atom.parse input in 109 - feed.Syndic.Atom.entries 110 - with 111 - | Eio.Io (Eio.Fs.E (Not_found _), _) -> [] 112 - | e -> 113 - Log.err (fun m -> m "Error loading existing posts for %s: %s" 114 - username (Printexc.to_string e)); 115 - [] 116 - 117 - let save_atom_feed state username entries = 118 - let file = user_feed_file state username in 119 - let feed : Syndic.Atom.feed = { 120 - id = Uri.of_string ("urn:river:user:" ^ username); 121 - title = Syndic.Atom.Text username; 122 - updated = Ptime.of_float_s (Unix.time ()) |> Option.get; 123 - entries; 124 - authors = []; 125 - categories = []; 126 - contributors = []; 127 - generator = Some { 128 - Syndic.Atom.version = Some "1.0"; 129 - uri = None; 130 - content = "River Feed Aggregator"; 131 - }; 132 - icon = None; 133 - links = []; 134 - logo = None; 135 - rights = None; 136 - subtitle = None; 137 - } in 138 - let output = Buffer.create 1024 in 139 - Syndic.Atom.output feed (`Buffer output); 140 - Eio.Path.save ~create:(`Or_truncate 0o644) file (Buffer.contents output) 141 - end 142 - 143 - (* User management commands *) 144 - module User = struct 145 - let add state ~username ~fullname ~email = 146 - match State.load_user state username with 147 - | Some _ -> 148 - Log.err (fun m -> m "User %s already exists" username); 149 - 1 150 - | None -> 151 - let user = { username; fullname; email; feeds = []; last_synced = None } in 152 - State.save_user state user; 153 - Log.info (fun m -> m "User %s created" username); 154 - 0 155 - 156 - let remove state ~username = 157 - match State.load_user state username with 158 - | None -> 159 - Log.err (fun m -> m "User %s not found" username); 160 - 1 161 - | Some _ -> 162 - (* Remove user file and feed file *) 163 - let user_file = State.user_file state username in 164 - let feed_file = State.user_feed_file state username in 165 - (try Eio.Path.unlink user_file with _ -> ()); 166 - (try Eio.Path.unlink feed_file with _ -> ()); 167 - Log.info (fun m -> m "User %s removed" username); 168 - 0 169 - 170 - let list state = 171 - let users = State.list_users state in 172 - if users = [] then 173 - Printf.printf "No users found\n" 174 - else begin 175 - Printf.printf "Users:\n"; 176 - List.iter (fun username -> 177 - match State.load_user state username with 178 - | Some user -> 179 - let email_str = match user.email with 180 - | Some e -> " <" ^ e ^ ">" 181 - | None -> "" 182 - in 183 - Printf.printf " %s (%s%s) - %d feeds\n" 184 - username user.fullname email_str (List.length user.feeds) 185 - | None -> () 186 - ) users 187 - end; 188 - 0 189 - 190 - let add_feed state ~username ~name ~url = 191 - match State.load_user state username with 192 - | None -> 193 - Log.err (fun m -> m "User %s not found" username); 194 - 1 195 - | Some user -> 196 - let feed = { River.name; url } in 197 - if List.exists (fun f -> f.River.url = url) user.feeds then begin 198 - Log.err (fun m -> m "Feed %s already exists for user %s" url username); 199 - 1 200 - end else begin 201 - let user = { user with feeds = feed :: user.feeds } in 202 - State.save_user state user; 203 - Log.info (fun m -> m "Feed %s added to user %s" name username); 204 - 0 205 - end 206 - 207 - let remove_feed state ~username ~url = 208 - match State.load_user state username with 209 - | None -> 210 - Log.err (fun m -> m "User %s not found" username); 211 - 1 212 - | Some user -> 213 - let feeds = List.filter (fun f -> f.River.url <> url) user.feeds in 214 - if List.length feeds = List.length user.feeds then begin 215 - Log.err (fun m -> m "Feed %s not found for user %s" url username); 216 - 1 217 - end else begin 218 - let user = { user with feeds } in 219 - State.save_user state user; 220 - Log.info (fun m -> m "Feed removed from user %s" username); 221 - 0 222 - end 223 - 224 - let show state ~username = 225 - match State.load_user state username with 226 - | None -> 227 - Log.err (fun m -> m "User %s not found" username); 228 - 1 229 - | Some user -> 230 - Printf.printf "Username: %s\n" user.username; 231 - Printf.printf "Full name: %s\n" user.fullname; 232 - Printf.printf "Email: %s\n" 233 - (Option.value user.email ~default:"(none)"); 234 - Printf.printf "Last synced: %s\n" 235 - (Option.value user.last_synced ~default:"never"); 236 - Printf.printf "Feeds (%d):\n" (List.length user.feeds); 237 - List.iter (fun feed -> 238 - Printf.printf " - %s: %s\n" feed.River.name feed.River.url 239 - ) user.feeds; 240 - 0 241 - end 242 - 243 - (* Sync command *) 244 - module Sync = struct 245 - let merge_entries ~existing ~new_entries = 246 - (* Create a map of new entry IDs for efficient lookup and updates *) 247 - let module UriMap = Map.Make(Uri) in 248 - let new_entries_map = 249 - List.fold_left (fun acc (entry : Syndic.Atom.entry) -> 250 - UriMap.add entry.id entry acc 251 - ) UriMap.empty new_entries 252 - in 253 - 254 - (* Update existing entries with new ones if IDs match, otherwise keep existing *) 255 - let updated_existing = 256 - List.filter_map (fun (entry : Syndic.Atom.entry) -> 257 - if UriMap.mem entry.id new_entries_map then 258 - None (* Will be replaced by new entry *) 259 - else 260 - Some entry (* Keep existing entry *) 261 - ) existing 262 - in 263 - 264 - (* Combine new entries with non-replaced existing entries *) 265 - let combined = new_entries @ updated_existing in 266 - List.sort (fun (a : Syndic.Atom.entry) (b : Syndic.Atom.entry) -> 267 - Ptime.compare b.updated a.updated 268 - ) combined 269 - 270 - let sync_user session state ~username = 271 - match State.load_user state username with 272 - | None -> 273 - Log.err (fun m -> m "User %s not found" username); 274 - 1 275 - | Some user when user.feeds = [] -> 276 - Log.info (fun m -> m "No feeds configured for user %s" username); 277 - 0 278 - | Some user -> 279 - Log.info (fun m -> m "Syncing feeds for user %s..." username); 280 - 281 - (* Fetch all feeds concurrently using the session *) 282 - let fetched_feeds = 283 - Eio.Fiber.List.filter_map (fun source -> 284 - try 285 - Log.info (fun m -> m " Fetching %s (%s)..." source.River.name source.River.url); 286 - Some (River.fetch session source) 287 - with e -> 288 - Log.err (fun m -> m " Failed to fetch %s: %s" 289 - source.River.name (Printexc.to_string e)); 290 - None 291 - ) user.feeds 292 - in 293 - 294 - if fetched_feeds = [] then begin 295 - Log.err (fun m -> m "No feeds successfully fetched"); 296 - 1 297 - end else begin 298 - (* Get posts from fetched feeds *) 299 - let posts = River.posts fetched_feeds in 300 - Log.info (fun m -> m " Found %d new posts" (List.length posts)); 301 - 302 - (* Convert to Atom entries *) 303 - let new_entries = River.create_atom_entries posts in 304 - 305 - (* Load existing entries *) 306 - let existing = State.load_existing_posts state username in 307 - Log.info (fun m -> m " Found %d existing posts" (List.length existing)); 308 - 309 - (* Merge entries *) 310 - let merged = merge_entries ~existing ~new_entries in 311 - Log.info (fun m -> m " Total posts after merge: %d" (List.length merged)); 312 - 313 - (* Save updated feed *) 314 - State.save_atom_feed state username merged; 315 - 316 - (* Update last_synced timestamp *) 317 - let now = 318 - let open Unix in 319 - let tm = gmtime (time ()) in 320 - Printf.sprintf "%04d-%02d-%02dT%02d:%02d:%02dZ" 321 - (tm.tm_year + 1900) (tm.tm_mon + 1) tm.tm_mday 322 - tm.tm_hour tm.tm_min tm.tm_sec 323 - in 324 - let user = { user with last_synced = Some now } in 325 - State.save_user state user; 326 - 327 - Log.info (fun m -> m "Sync completed for user %s" username); 328 - 0 329 - end 330 - 331 - let sync_all session state = 332 - let users = State.list_users state in 333 - if users = [] then begin 334 - Log.info (fun m -> m "No users to sync"); 335 - 0 336 - end else begin 337 - Log.info (fun m -> m "Syncing %d users concurrently..." (List.length users)); 338 - 339 - let results = 340 - Eio.Fiber.List.map (fun username -> 341 - let result = sync_user session state ~username in 342 - Log.debug (fun m -> m "Completed sync for user"); 343 - result 344 - ) users 345 - in 346 - let failures = List.filter ((<>) 0) results in 347 - if failures = [] then begin 348 - Log.info (fun m -> m "All users synced successfully"); 349 - 0 350 - end else begin 351 - Log.err (fun m -> m "Failed to sync %d users" (List.length failures)); 352 - 1 353 - end 354 - end 355 - end 356 - 357 - (* Post listing commands *) 358 - module Post = struct 359 - let format_date ptime = 360 - let open Ptime in 361 - let (y, m, d), _ = to_date_time ptime in 362 - Printf.sprintf "%02d/%02d/%04d" d m y 363 - 364 - let format_text_construct : Syndic.Atom.text_construct -> string = function 365 - | Syndic.Atom.Text s -> s 366 - | Syndic.Atom.Html (_, s) -> s 367 - | Syndic.Atom.Xhtml (_, _) -> "<xhtml content>" 368 - 369 - let get_content_length (entry : Syndic.Atom.entry) = 370 - match entry.content with 371 - | Some (Syndic.Atom.Text s) -> String.length s 372 - | Some (Syndic.Atom.Html (_, s)) -> String.length s 373 - | Some (Syndic.Atom.Xhtml (_, _)) -> 0 (* Could calculate but complex *) 374 - | Some (Syndic.Atom.Mime _) -> 0 375 - | Some (Syndic.Atom.Src _) -> 0 376 - | None -> ( 377 - match entry.summary with 378 - | Some (Syndic.Atom.Text s) -> String.length s 379 - | Some (Syndic.Atom.Html (_, s)) -> String.length s 380 - | Some (Syndic.Atom.Xhtml (_, _)) -> 0 381 - | None -> 0) 382 - 383 - let list state ~username_opt ~limit = 384 - match username_opt with 385 - | Some username -> 386 - (* List posts for a specific user *) 387 - (match State.load_user state username with 388 - | None -> 389 - Log.err (fun m -> m "User %s not found" username); 390 - 1 391 - | Some user -> 392 - let entries = State.load_existing_posts state username in 393 - if entries = [] then begin 394 - Fmt.pr "%a@." Fmt.(styled `Yellow string) 395 - ("No posts found for user " ^ username); 396 - Fmt.pr "%a@." Fmt.(styled `Faint string) 397 - ("(Run 'river-cli sync " ^ username ^ "' to fetch posts)"); 398 - 0 399 - end else begin 400 - let to_show = match limit with 401 - | Some n -> List.filteri (fun i _ -> i < n) entries 402 - | None -> entries 403 - in 404 - Fmt.pr "%a@." 405 - Fmt.(styled `Bold string) 406 - (Printf.sprintf "Posts for %s (%d total, showing %d):" 407 - user.fullname (List.length entries) (List.length to_show)); 408 - 409 - List.iteri (fun i (entry : Syndic.Atom.entry) -> 410 - (* Use user's full name for all entries *) 411 - let author_name = user.fullname in 412 - let content_len = get_content_length entry in 413 - let entry_id = Uri.to_string entry.id in 414 - Fmt.pr "%a %a@." 415 - Fmt.(styled `Cyan string) (Printf.sprintf "[%d]" (i + 1)) 416 - Fmt.(styled (`Fg `Blue) string) (format_text_construct entry.title); 417 - Fmt.pr " %a %a@." 418 - Fmt.(styled `Faint string) "ID:" 419 - Fmt.(styled `Faint string) entry_id; 420 - Fmt.pr " %a - %a - %a chars@." 421 - Fmt.(styled `Green string) author_name 422 - Fmt.(styled `Magenta string) (format_date entry.updated) 423 - Fmt.(styled `Yellow string) (string_of_int content_len) 424 - ) to_show; 425 - 0 426 - end) 427 - | None -> 428 - (* List posts from all users *) 429 - let users = State.list_users state in 430 - if users = [] then begin 431 - Fmt.pr "%a@." Fmt.(styled `Yellow string) 432 - "No users found"; 433 - Fmt.pr "%a@." Fmt.(styled `Faint string) 434 - "(Run 'river-cli user add' to create a user)"; 435 - 0 436 - end else begin 437 - (* Load user data to get full names *) 438 - let user_map = 439 - List.fold_left (fun acc username -> 440 - match State.load_user state username with 441 - | Some user -> (username, user) :: acc 442 - | None -> acc 443 - ) [] users 444 - in 445 - 446 - (* Collect all entries from all users with username tag *) 447 - let all_entries = 448 - List.concat_map (fun username -> 449 - let entries = State.load_existing_posts state username in 450 - List.map (fun entry -> (username, entry)) entries 451 - ) users 452 - in 453 - 454 - if all_entries = [] then begin 455 - Fmt.pr "%a@." Fmt.(styled `Yellow string) 456 - "No posts found for any users"; 457 - Fmt.pr "%a@." Fmt.(styled `Faint string) 458 - "(Run 'river-cli sync' to fetch posts)"; 459 - 0 460 - end else begin 461 - (* Sort by date (newest first) *) 462 - let sorted = List.sort (fun (_, a : string * Syndic.Atom.entry) (_, b) -> 463 - Ptime.compare b.updated a.updated 464 - ) all_entries in 465 - 466 - let to_show = match limit with 467 - | Some n -> List.filteri (fun i _ -> i < n) sorted 468 - | None -> sorted 469 - in 470 - 471 - Fmt.pr "%a@." 472 - Fmt.(styled `Bold string) 473 - (Printf.sprintf "Posts from all users (%d total, showing %d):" 474 - (List.length all_entries) (List.length to_show)); 475 - 476 - List.iteri (fun i (username, entry : string * Syndic.Atom.entry) -> 477 - (* Use user's full name instead of feed author *) 478 - let author_name = 479 - match List.assoc_opt username user_map with 480 - | Some user -> user.fullname 481 - | None -> 482 - (* Fallback to entry author if user not found *) 483 - let (author, _) = entry.authors in 484 - String.trim author.name 485 - in 486 - let content_len = get_content_length entry in 487 - let entry_id = Uri.to_string entry.id in 488 - (* Shorten ID for display if it's too long *) 489 - Fmt.pr "%a %a@." 490 - Fmt.(styled `Cyan string) (Printf.sprintf "[%d]" (i + 1)) 491 - Fmt.(styled (`Fg `Blue) string) (format_text_construct entry.title); 492 - Fmt.pr " %a %a@." 493 - Fmt.(styled `Faint string) "ID:" 494 - Fmt.(styled `Faint string) entry_id; 495 - Fmt.pr " %a - %a - %a chars@." 496 - Fmt.(styled `Green string) author_name 497 - Fmt.(styled `Magenta string) (format_date entry.updated) 498 - Fmt.(styled `Yellow string) (string_of_int content_len) 499 - ) to_show; 500 - 0 501 - end 502 - end 503 - end 504 - 505 - (* Cmdliner interface *) 506 - open Cmdliner 507 - 508 - let username_arg = 509 - let doc = "Username" in 510 - Arg.(required & pos 0 (some string) None & info [] ~docv:"USERNAME" ~doc) 511 - 512 - let fullname_arg = 513 - let doc = "Full name of the user" in 514 - Arg.(required & opt (some string) None & info ["name"; "n"] ~doc) 515 - 516 - let email_arg = 517 - let doc = "Email address of the user (optional)" in 518 - Arg.(value & opt (some string) None & info ["email"; "e"] ~doc) 519 - 520 - let feed_name_arg = 521 - let doc = "Feed name/label" in 522 - Arg.(required & opt (some string) None & info ["name"; "n"] ~doc) 523 - 524 - let feed_url_arg = 525 - let doc = "Feed URL" in 526 - Arg.(required & opt (some string) None & info ["url"; "u"] ~doc) 527 - 528 - (* Note: eiocmd handles all logging setup automatically via Logs_cli *) 529 - 530 - (* User commands - these don't need network, just filesystem access via Xdge *) 531 - let user_add_cmd = 532 - let doc = "Add a new user" in 533 - Eiocmd.run 534 - ~use_keyeio:false 535 - ~info:(Cmd.info "add" ~doc) 536 - ~app_name:"river" 537 - ~service:"river" 538 - Term.(const (fun username fullname email _env xdg _profile -> 539 - let state = { xdg } in 540 - State.ensure_directories state; 541 - User.add state ~username ~fullname ~email 542 - ) $ username_arg $ fullname_arg $ email_arg) 543 - 544 - let user_remove_cmd = 545 - let doc = "Remove a user" in 546 - Eiocmd.run 547 - ~use_keyeio:false 548 - ~info:(Cmd.info "remove" ~doc) 549 - ~app_name:"river" 550 - ~service:"river" 551 - Term.(const (fun username _env xdg _profile -> 552 - let state = { xdg } in 553 - User.remove state ~username 554 - ) $ username_arg) 555 - 556 - let user_list_cmd = 557 - let doc = "List all users" in 558 - Eiocmd.run 559 - ~use_keyeio:false 560 - ~info:(Cmd.info "list" ~doc) 561 - ~app_name:"river" 562 - ~service:"river" 563 - Term.(const (fun _env xdg _profile -> 564 - let state = { xdg } in 565 - User.list state 566 - )) 567 - 568 - let user_show_cmd = 569 - let doc = "Show user details" in 570 - Eiocmd.run 571 - ~use_keyeio:false 572 - ~info:(Cmd.info "show" ~doc) 573 - ~app_name:"river" 574 - ~service:"river" 575 - Term.(const (fun username _env xdg _profile -> 576 - let state = { xdg } in 577 - User.show state ~username 578 - ) $ username_arg) 579 - 580 - let user_add_feed_cmd = 581 - let doc = "Add a feed to a user" in 582 - Eiocmd.run 583 - ~use_keyeio:false 584 - ~info:(Cmd.info "add-feed" ~doc) 585 - ~app_name:"river" 586 - ~service:"river" 587 - Term.(const (fun username name url _env xdg _profile -> 588 - let state = { xdg } in 589 - User.add_feed state ~username ~name ~url 590 - ) $ username_arg $ feed_name_arg $ feed_url_arg) 591 - 592 - let user_remove_feed_cmd = 593 - let doc = "Remove a feed from a user" in 594 - Eiocmd.run 595 - ~use_keyeio:false 596 - ~info:(Cmd.info "remove-feed" ~doc) 597 - ~app_name:"river" 598 - ~service:"river" 599 - Term.(const (fun username url _env xdg _profile -> 600 - let state = { xdg } in 601 - User.remove_feed state ~username ~url 602 - ) $ username_arg $ feed_url_arg) 603 - 604 - let user_cmd = 605 - let doc = "Manage users" in 606 - let info = Cmd.info "user" ~doc in 607 - Cmd.group info [ 608 - user_add_cmd; 609 - user_remove_cmd; 610 - user_list_cmd; 611 - user_show_cmd; 612 - user_add_feed_cmd; 613 - user_remove_feed_cmd; 614 - ] 615 - 616 - (* Sync command - needs Eio environment for HTTP requests *) 617 - let sync_cmd = 618 - let doc = "Sync feeds for users" in 619 - let username_opt = 620 - let doc = "Sync specific user (omit to sync all)" in 621 - Arg.(value & pos 0 (some string) None & info [] ~docv:"USERNAME" ~doc) 622 - in 623 - Eiocmd.run 624 - ~use_keyeio:false 625 - ~info:(Cmd.info "sync" ~doc) 626 - ~app_name:"river" 627 - ~service:"river" 628 - Term.(const (fun username_opt env xdg _profile -> 629 - let state = { xdg } in 630 - State.ensure_directories state; 631 - 632 - (* Use River.with_session for resource management *) 633 - River.with_session env @@ fun session -> 634 - match username_opt with 635 - | Some username -> Sync.sync_user session state ~username 636 - | None -> Sync.sync_all session state 637 - ) $ username_opt) 638 - 639 - (* List command - doesn't need network, just reads local files *) 640 - let list_cmd = 641 - let doc = "List recent posts (from all users by default, or specify a user)" in 642 - let username_opt_arg = 643 - let doc = "Username (optional - defaults to all users)" in 644 - Arg.(value & pos 0 (some string) None & info [] ~docv:"USERNAME" ~doc) 645 - in 646 - let limit_arg = 647 - let doc = "Limit number of posts to display (default: all)" in 648 - Arg.(value & opt (some int) None & info ["limit"; "n"] ~doc) 649 - in 650 - Eiocmd.run 651 - ~use_keyeio:false 652 - ~info:(Cmd.info "list" ~doc) 653 - ~app_name:"river" 654 - ~service:"river" 655 - Term.(const (fun username_opt limit _env xdg _profile -> 656 - let state = { xdg } in 657 - Post.list state ~username_opt ~limit 658 - ) $ username_opt_arg $ limit_arg) 659 - 660 - (* Info command - show detailed post information *) 661 - let info_cmd = 662 - let doc = "Display detailed information about a post by ID" in 663 - let id_arg = 664 - let doc = "Exact post ID to display" in 665 - Arg.(required & pos 0 (some string) None & info [] ~docv:"ID" ~doc) 666 - in 667 - let verbose_flag = 668 - let doc = "Show full content and all metadata" in 669 - Arg.(value & flag & info ["full"; "f"] ~doc) 670 - in 671 - Eiocmd.run 672 - ~use_keyeio:false 673 - ~info:(Cmd.info "info" ~doc) 674 - ~app_name:"river" 675 - ~service:"river" 676 - Term.(const (fun id verbose _env xdg _profile -> 677 - let state = { xdg } in 678 - let users = State.list_users state in 679 - 680 - (* Load all entries from all users *) 681 - let all_entries = 682 - List.concat_map (fun username -> 683 - let entries = State.load_existing_posts state username in 684 - List.map (fun entry -> (username, entry)) entries 685 - ) users 686 - in 687 - 688 - (* Find entry with matching ID *) 689 - let entry_opt = List.find_opt (fun (_username, entry : string * Syndic.Atom.entry) -> 690 - Uri.to_string entry.id = id 691 - ) all_entries in 692 - 693 - match entry_opt with 694 - | None -> 695 - Fmt.pr "%a@." Fmt.(styled `Red string) (Printf.sprintf "No post found with ID: %s" id); 696 - Fmt.pr "%a@." Fmt.(styled `Faint string) "Hint: Use 'river-cli list' to see available posts and their IDs"; 697 - 1 698 - | Some (username, entry) -> 699 - (* Get user info for author name *) 700 - let user_opt = State.load_user state username in 701 - let author_name = match user_opt with 702 - | Some user -> user.fullname 703 - | None -> 704 - let (author, _) = entry.authors in 705 - String.trim author.name 706 - in 707 - 708 - (* Print header *) 709 - Fmt.pr "@."; 710 - Fmt.pr "%a@." Fmt.(styled `Bold string) 711 - (String.make 70 '='); 712 - Fmt.pr " %a@." Fmt.(styled `Bold (styled (`Fg `Blue) string)) 713 - (Post.format_text_construct entry.title); 714 - Fmt.pr "%a@.@." Fmt.(styled `Bold string) 715 - (String.make 70 '='); 716 - 717 - (* Basic metadata *) 718 - Fmt.pr "%a %s@." Fmt.(styled `Cyan string) "ID: " (Uri.to_string entry.id); 719 - 720 - (* Links *) 721 - let links = entry.links in 722 - (match links with 723 - | [] -> () 724 - | link :: _ -> 725 - Fmt.pr "%a %s@." Fmt.(styled `Cyan string) "URL: " (Uri.to_string link.href)); 726 - 727 - Fmt.pr "%a %s@." Fmt.(styled `Cyan string) "Author: " author_name; 728 - 729 - Fmt.pr "%a %s@." Fmt.(styled `Cyan string) "Updated: " (Ptime.to_rfc3339 entry.updated); 730 - 731 - (* Summary *) 732 - (match entry.summary with 733 - | Some summary -> 734 - Fmt.pr "@.%a@." Fmt.(styled (`Fg `Yellow) string) "Summary:"; 735 - let summary_text = Post.format_text_construct summary in 736 - Fmt.pr " %s@." summary_text 737 - | None -> ()); 738 - 739 - (* Content *) 740 - (match entry.content with 741 - | Some content -> 742 - let content_html = match content with 743 - | Syndic.Atom.Text s -> s 744 - | Syndic.Atom.Html (_, s) -> s 745 - | Syndic.Atom.Xhtml (_, nodes) -> 746 - String.concat "" (List.map Syndic.XML.to_string nodes) 747 - | Syndic.Atom.Mime _ -> "(MIME content)" 748 - | Syndic.Atom.Src _ -> "(External content)" 749 - in 750 - 751 - (* Extract outgoing links *) 752 - let links = Markdown_converter.extract_links content_html in 753 - 754 - (* Convert to markdown *) 755 - let content_markdown = Markdown_converter.to_markdown content_html in 756 - 757 - Fmt.pr "@.%a@." Fmt.(styled (`Fg `Green) string) "Content:"; 758 - if verbose then 759 - Fmt.pr "%s@." content_markdown 760 - else begin 761 - let preview = 762 - if String.length content_markdown > 500 then 763 - String.sub content_markdown 0 500 ^ "..." 764 - else 765 - content_markdown 766 - in 767 - Fmt.pr "%s@." preview; 768 - if String.length content_markdown > 500 then 769 - Fmt.pr "@.%a@." Fmt.(styled `Faint string) "(Use --full to see full content)" 770 - end; 771 - 772 - (* Display outgoing links *) 773 - if links <> [] then begin 774 - Fmt.pr "@.%a (%d)@." Fmt.(styled (`Fg `Cyan) string) "Outgoing Links:" (List.length links); 775 - List.iteri (fun i (href, text) -> 776 - let link_text = if text = "" then "(no text)" else text in 777 - Fmt.pr " %a %s@." 778 - Fmt.(styled `Faint string) (Printf.sprintf "[%d]" (i + 1)) 779 - (Uri.to_string (Uri.of_string href)); 780 - if text <> "" && String.length text < 80 then 781 - Fmt.pr " %a %s@." Fmt.(styled `Faint string) "→" link_text 782 - ) links 783 - end 784 - | None -> ()); 785 - 786 - Fmt.pr "@."; 787 - 0 788 - ) $ id_arg $ verbose_flag) 789 - 790 - let main_cmd = 791 - let doc = "River feed management CLI" in 792 - let info = Cmd.info "river-cli" ~version:"1.0" ~doc in 793 - Cmd.group info [user_cmd; sync_cmd; list_cmd; info_cmd] 794 - 795 - let () = exit (Cmd.eval' main_cmd) 1 + (* Ultra-thin binary that delegates all command handling to River_cmd *) 2 + let () = exit (Cmdliner.Cmd.eval' River_cmd.main_cmd)
+4
stack/river/cmd/dune
··· 1 + (library 2 + (name river_cmd) 3 + (public_name river.cmd) 4 + (libraries river cmdliner eiocmd fmt logs))
+761
stack/river/cmd/river_cmd.ml
··· 1 + (** River.Cmd - Cmdliner terms for River CLI 2 + 3 + This module provides cmdliner terms that are thin wrappers around 4 + the River library functions. All business logic resides in the 5 + main River module. *) 6 + 7 + open Cmdliner 8 + 9 + (* Logging setup *) 10 + let src = Logs.Src.create "river-cli" ~doc:"River CLI application" 11 + module Log = (val Logs.src_log src : Logs.LOG) 12 + 13 + (* User management commands *) 14 + module User = struct 15 + let add state ~username ~fullname ~email = 16 + let user = River.User.make ~username ~fullname ?email () in 17 + match River.State.create_user state user with 18 + | Ok () -> 19 + Log.info (fun m -> m "User %s created" username); 20 + 0 21 + | Error err -> 22 + Log.err (fun m -> m "%s" err); 23 + 1 24 + 25 + let remove state ~username = 26 + match River.State.delete_user state ~username with 27 + | Ok () -> 28 + Log.info (fun m -> m "User %s removed" username); 29 + 0 30 + | Error err -> 31 + Log.err (fun m -> m "%s" err); 32 + 1 33 + 34 + let list state = 35 + let users = River.State.list_users state in 36 + if users = [] then 37 + Printf.printf "No users found\n" 38 + else begin 39 + Printf.printf "Users:\n"; 40 + List.iter (fun username -> 41 + match River.State.get_user state ~username with 42 + | Some user -> 43 + let email_str = match River.User.email user with 44 + | Some e -> " <" ^ e ^ ">" 45 + | None -> "" 46 + in 47 + Printf.printf " %s (%s%s) - %d feeds\n" 48 + username (River.User.fullname user) email_str 49 + (List.length (River.User.feeds user)) 50 + | None -> () 51 + ) users 52 + end; 53 + 0 54 + 55 + let add_feed state ~username ~name ~url = 56 + match River.State.get_user state ~username with 57 + | None -> 58 + Log.err (fun m -> m "User %s not found" username); 59 + 1 60 + | Some user -> 61 + let source = River.Source.make ~name ~url in 62 + let user = River.User.add_feed user source in 63 + (match River.State.update_user state user with 64 + | Ok () -> 65 + Log.info (fun m -> m "Feed %s added to user %s" name username); 66 + 0 67 + | Error err -> 68 + Log.err (fun m -> m "%s" err); 69 + 1) 70 + 71 + let remove_feed state ~username ~url = 72 + match River.State.get_user state ~username with 73 + | None -> 74 + Log.err (fun m -> m "User %s not found" username); 75 + 1 76 + | Some user -> 77 + let user = River.User.remove_feed user ~url in 78 + (match River.State.update_user state user with 79 + | Ok () -> 80 + Log.info (fun m -> m "Feed removed from user %s" username); 81 + 0 82 + | Error err -> 83 + Log.err (fun m -> m "%s" err); 84 + 1) 85 + 86 + let show state ~username = 87 + match River.State.get_user state ~username with 88 + | None -> 89 + Log.err (fun m -> m "User %s not found" username); 90 + 1 91 + | Some user -> 92 + Printf.printf "Username: %s\n" (River.User.username user); 93 + Printf.printf "Full name: %s\n" (River.User.fullname user); 94 + Printf.printf "Email: %s\n" 95 + (Option.value (River.User.email user) ~default:"(none)"); 96 + Printf.printf "Last synced: %s\n" 97 + (Option.value (River.User.last_synced user) ~default:"never"); 98 + let feeds = River.User.feeds user in 99 + Printf.printf "Feeds (%d):\n" (List.length feeds); 100 + List.iter (fun feed -> 101 + Printf.printf " - %s: %s\n" 102 + (River.Source.name feed) (River.Source.url feed) 103 + ) feeds; 104 + 0 105 + end 106 + 107 + (* Sync command *) 108 + module Sync = struct 109 + let sync_user env state ~username = 110 + match River.State.sync_user env state ~username with 111 + | Ok () -> 112 + Log.info (fun m -> m "Sync completed for user %s" username); 113 + 0 114 + | Error err -> 115 + Log.err (fun m -> m "Sync failed: %s" err); 116 + 1 117 + 118 + let sync_all env state = 119 + match River.State.sync_all env state with 120 + | Ok (success, fail) -> 121 + Log.info (fun m -> m "Synced %d users (%d failed)" success fail); 122 + if fail = 0 then 0 else 1 123 + | Error err -> 124 + Log.err (fun m -> m "Sync failed: %s" err); 125 + 1 126 + end 127 + 128 + (* Post listing commands *) 129 + module Post = struct 130 + let format_date ptime = 131 + let open Ptime in 132 + let (y, m, d), _ = to_date_time ptime in 133 + Printf.sprintf "%02d/%02d/%04d" d m y 134 + 135 + let format_text_construct : Syndic.Atom.text_construct -> string = function 136 + | Syndic.Atom.Text s -> s 137 + | Syndic.Atom.Html (_, s) -> s 138 + | Syndic.Atom.Xhtml (_, _) -> "<xhtml content>" 139 + 140 + let get_content_text (entry : Syndic.Atom.entry) = 141 + match entry.content with 142 + | Some (Syndic.Atom.Text s) -> Some s 143 + | Some (Syndic.Atom.Html (_, s)) -> Some s 144 + | Some (Syndic.Atom.Xhtml (_, _)) -> Some "<xhtml content>" 145 + | Some (Syndic.Atom.Mime _) -> Some "<mime content>" 146 + | Some (Syndic.Atom.Src _) -> Some "<external content>" 147 + | None -> None 148 + 149 + let truncate_string s max_len = 150 + if String.length s <= max_len then s 151 + else String.sub s 0 max_len ^ "..." 152 + 153 + let list state ~username_opt ~limit ~metadata = 154 + match username_opt with 155 + | Some username -> 156 + (* List posts for a specific user *) 157 + (match River.State.get_user state ~username with 158 + | None -> 159 + Log.err (fun m -> m "User %s not found" username); 160 + 1 161 + | Some user -> 162 + let entries = River.State.get_user_posts state ~username ?limit () in 163 + if entries = [] then begin 164 + Fmt.pr "%a@." Fmt.(styled `Yellow string) 165 + ("No posts found for user " ^ username); 166 + Fmt.pr "%a@." Fmt.(styled `Faint string) 167 + ("(Run 'river-cli sync " ^ username ^ "' to fetch posts)"); 168 + 0 169 + end else begin 170 + Fmt.pr "@.%a@.@." 171 + Fmt.(styled `Bold (styled (`Fg `Cyan) string)) 172 + (Printf.sprintf "Posts for %s (%d total)" 173 + (River.User.fullname user) (List.length entries)); 174 + 175 + List.iter (fun (entry : Syndic.Atom.entry) -> 176 + let entry_id = Uri.to_string entry.id in 177 + 178 + (* Title and ID on separate lines for clarity *) 179 + Fmt.pr "%a@." 180 + Fmt.(styled `Bold (styled (`Fg `Blue) string)) 181 + (format_text_construct entry.title); 182 + Fmt.pr " %a %a@." 183 + Fmt.(styled `Faint string) "ID:" 184 + Fmt.(styled (`Fg `Magenta) string) entry_id; 185 + 186 + if metadata then begin 187 + (* Show all metadata *) 188 + Fmt.pr " %a %a@." 189 + Fmt.(styled `Faint string) "Author:" 190 + Fmt.(styled `Green string) (River.User.fullname user); 191 + Fmt.pr " %a %a@." 192 + Fmt.(styled `Faint string) "Updated:" 193 + Fmt.(styled `Yellow string) (format_date entry.updated); 194 + 195 + (* Summary if present *) 196 + (match entry.summary with 197 + | Some summary -> 198 + let summary_text = format_text_construct summary in 199 + Fmt.pr " %a %a@." 200 + Fmt.(styled `Faint string) "Summary:" 201 + Fmt.string (truncate_string summary_text 150) 202 + | None -> ()); 203 + 204 + (* Content (truncated) *) 205 + (match get_content_text entry with 206 + | Some content -> 207 + let clean = Str.global_replace (Str.regexp "<[^>]*>") "" content in 208 + let clean = Str.global_replace (Str.regexp "[ \t\n\r]+") " " clean in 209 + Fmt.pr " %a %a@." 210 + Fmt.(styled `Faint string) "Content:" 211 + Fmt.string (truncate_string (String.trim clean) 200) 212 + | None -> ()); 213 + 214 + (* Links *) 215 + (match entry.links with 216 + | [] -> () 217 + | links -> 218 + Fmt.pr " %a@." Fmt.(styled `Faint string) "Links:"; 219 + List.iter (fun link -> 220 + Fmt.pr " %a@." Fmt.(styled (`Fg `Cyan) string) 221 + (Uri.to_string link.Syndic.Atom.href) 222 + ) links); 223 + 224 + (* Tags/Categories *) 225 + (match entry.categories with 226 + | [] -> () 227 + | categories -> 228 + Fmt.pr " %a %a@." Fmt.(styled `Faint string) "Tags:" 229 + Fmt.(list ~sep:comma (styled (`Fg `Yellow) string)) 230 + (List.map (fun c -> c.Syndic.Atom.term) categories)); 231 + end else begin 232 + (* Compact view: just author and date *) 233 + Fmt.pr " %a %a %a %a@." 234 + Fmt.(styled `Faint string) "By" 235 + Fmt.(styled `Green string) (River.User.fullname user) 236 + Fmt.(styled `Faint string) "on" 237 + Fmt.(styled `Yellow string) (format_date entry.updated); 238 + end; 239 + Fmt.pr "@." 240 + ) entries; 241 + 0 242 + end) 243 + | None -> 244 + (* List posts from all users *) 245 + let all_posts = River.State.get_all_posts state ?limit () in 246 + if all_posts = [] then begin 247 + Fmt.pr "%a@." Fmt.(styled `Yellow string) 248 + "No posts found for any users"; 249 + Fmt.pr "%a@." Fmt.(styled `Faint string) 250 + "(Run 'river-cli sync' to fetch posts)"; 251 + 0 252 + end else begin 253 + Fmt.pr "@.%a@.@." 254 + Fmt.(styled `Bold (styled (`Fg `Cyan) string)) 255 + (Printf.sprintf "Posts from all users (%d total)" 256 + (List.length all_posts)); 257 + 258 + List.iter (fun (username, entry : string * Syndic.Atom.entry) -> 259 + let author_name = 260 + match River.State.get_user state ~username with 261 + | Some user -> River.User.fullname user 262 + | None -> 263 + let (author, _) = entry.authors in 264 + String.trim author.name 265 + in 266 + let entry_id = Uri.to_string entry.id in 267 + 268 + (* Title and ID on separate lines for clarity *) 269 + Fmt.pr "%a@." 270 + Fmt.(styled `Bold (styled (`Fg `Blue) string)) 271 + (format_text_construct entry.title); 272 + Fmt.pr " %a %a@." 273 + Fmt.(styled `Faint string) "ID:" 274 + Fmt.(styled (`Fg `Magenta) string) entry_id; 275 + 276 + if metadata then begin 277 + (* Show all metadata *) 278 + Fmt.pr " %a %a@." 279 + Fmt.(styled `Faint string) "Author:" 280 + Fmt.(styled `Green string) author_name; 281 + Fmt.pr " %a %a@." 282 + Fmt.(styled `Faint string) "Updated:" 283 + Fmt.(styled `Yellow string) (format_date entry.updated); 284 + 285 + (* Summary if present *) 286 + (match entry.summary with 287 + | Some summary -> 288 + let summary_text = format_text_construct summary in 289 + Fmt.pr " %a %a@." 290 + Fmt.(styled `Faint string) "Summary:" 291 + Fmt.string (truncate_string summary_text 150) 292 + | None -> ()); 293 + 294 + (* Content (truncated) *) 295 + (match get_content_text entry with 296 + | Some content -> 297 + let clean = Str.global_replace (Str.regexp "<[^>]*>") "" content in 298 + let clean = Str.global_replace (Str.regexp "[ \t\n\r]+") " " clean in 299 + Fmt.pr " %a %a@." 300 + Fmt.(styled `Faint string) "Content:" 301 + Fmt.string (truncate_string (String.trim clean) 200) 302 + | None -> ()); 303 + 304 + (* Links *) 305 + (match entry.links with 306 + | [] -> () 307 + | links -> 308 + Fmt.pr " %a@." Fmt.(styled `Faint string) "Links:"; 309 + List.iter (fun link -> 310 + Fmt.pr " %a@." Fmt.(styled (`Fg `Cyan) string) 311 + (Uri.to_string link.Syndic.Atom.href) 312 + ) links); 313 + 314 + (* Tags/Categories *) 315 + (match entry.categories with 316 + | [] -> () 317 + | categories -> 318 + Fmt.pr " %a %a@." Fmt.(styled `Faint string) "Tags:" 319 + Fmt.(list ~sep:comma (styled (`Fg `Yellow) string)) 320 + (List.map (fun c -> c.Syndic.Atom.term) categories)); 321 + end else begin 322 + (* Compact view: just author and date *) 323 + Fmt.pr " %a %a %a %a@." 324 + Fmt.(styled `Faint string) "By" 325 + Fmt.(styled `Green string) author_name 326 + Fmt.(styled `Faint string) "on" 327 + Fmt.(styled `Yellow string) (format_date entry.updated); 328 + end; 329 + Fmt.pr "@." 330 + ) all_posts; 331 + 0 332 + end 333 + 334 + let info state ~post_id ~verbose = 335 + (* Find the post by ID across all users *) 336 + let all_posts = River.State.get_all_posts state () in 337 + match List.find_opt (fun (_, entry : string * Syndic.Atom.entry) -> 338 + Uri.to_string entry.id = post_id 339 + ) all_posts with 340 + | None -> 341 + Log.err (fun m -> m "Post with ID %s not found" post_id); 342 + 1 343 + | Some (username, entry) -> 344 + (* Display post information *) 345 + Fmt.pr "@."; 346 + Fmt.pr "%a@." Fmt.(styled `Bold string) (String.make 70 '='); 347 + Fmt.pr " %a@." Fmt.(styled `Bold (styled (`Fg `Blue) string)) 348 + (format_text_construct entry.title); 349 + Fmt.pr "%a@.@." Fmt.(styled `Bold string) (String.make 70 '='); 350 + 351 + (* Author and date *) 352 + let author_name = 353 + match River.State.get_user state ~username with 354 + | Some user -> River.User.fullname user 355 + | None -> 356 + let (author, _) = entry.authors in 357 + String.trim author.name 358 + in 359 + Fmt.pr "%a %a@." Fmt.(styled `Cyan string) "Author:" 360 + Fmt.(styled `Green string) author_name; 361 + Fmt.pr "%a %a@." Fmt.(styled `Cyan string) "Published:" 362 + Fmt.(styled `Magenta string) (format_date entry.updated); 363 + Fmt.pr "%a %a@.@." Fmt.(styled `Cyan string) "ID:" 364 + Fmt.(styled `Faint string) post_id; 365 + 366 + (* Summary if present *) 367 + (match entry.summary with 368 + | Some summary -> 369 + Fmt.pr "%a@." Fmt.(styled (`Fg `Yellow) string) "Summary:"; 370 + Fmt.pr "%s@.@." (format_text_construct summary) 371 + | None -> ()); 372 + 373 + (* Content *) 374 + (match entry.content with 375 + | Some content -> 376 + let content_str = match content with 377 + | Syndic.Atom.Text s -> s 378 + | Syndic.Atom.Html (_, s) -> s 379 + | Syndic.Atom.Xhtml (_, _) -> "<xhtml content>" 380 + | Syndic.Atom.Mime _ -> "<mime content>" 381 + | Syndic.Atom.Src _ -> "<external content>" 382 + in 383 + Fmt.pr "%a@." Fmt.(styled (`Fg `Yellow) string) "Content:"; 384 + if verbose then begin 385 + (* In verbose mode, attempt to convert HTML to markdown *) 386 + let markdown = try 387 + (* Simple HTML to markdown conversion - just strip tags for now *) 388 + let re = Str.regexp "<[^>]*>" in 389 + Str.global_replace re "" content_str 390 + with _ -> content_str 391 + in 392 + Fmt.pr "%s@.@." markdown 393 + end else begin 394 + (* Non-verbose mode: show truncated content *) 395 + let max_len = 500 in 396 + if String.length content_str > max_len then 397 + Fmt.pr "%s...@.@." (String.sub content_str 0 max_len) 398 + else 399 + Fmt.pr "%s@.@." content_str 400 + end 401 + | None -> ()); 402 + 403 + (* Links *) 404 + (match entry.links with 405 + | [] -> () 406 + | links -> 407 + Fmt.pr "%a@." Fmt.(styled `Cyan string) "Links:"; 408 + List.iter (fun link -> 409 + Fmt.pr " - %s@." (Uri.to_string link.Syndic.Atom.href) 410 + ) links; 411 + Fmt.pr "@."); 412 + 413 + (* Categories/Tags if verbose *) 414 + if verbose then begin 415 + match entry.categories with 416 + | [] -> () 417 + | categories -> 418 + Fmt.pr "%a@." Fmt.(styled `Cyan string) "Tags:"; 419 + List.iter (fun cat -> 420 + Fmt.pr " - %s@." cat.Syndic.Atom.term 421 + ) categories; 422 + Fmt.pr "@." 423 + end; 424 + 425 + 0 426 + end 427 + 428 + (* Cmdliner argument definitions *) 429 + let username_arg = 430 + let doc = "Username" in 431 + Arg.(required & pos 0 (some string) None & info [] ~docv:"USERNAME" ~doc) 432 + 433 + let fullname_arg = 434 + let doc = "Full name of the user" in 435 + Arg.(required & opt (some string) None & info ["name"; "n"] ~doc) 436 + 437 + let email_arg = 438 + let doc = "Email address of the user (optional)" in 439 + Arg.(value & opt (some string) None & info ["email"; "e"] ~doc) 440 + 441 + let feed_name_arg = 442 + let doc = "Feed name/label" in 443 + Arg.(required & opt (some string) None & info ["name"; "n"] ~doc) 444 + 445 + let feed_url_arg = 446 + let doc = "Feed URL" in 447 + Arg.(required & opt (some string) None & info ["url"; "u"] ~doc) 448 + 449 + (* User commands - these don't need network, just filesystem access *) 450 + let user_add = 451 + Term.(const (fun username fullname email env _xdg _profile -> 452 + let state = River.State.create env ~app_name:"river" in 453 + User.add state ~username ~fullname ~email 454 + ) $ username_arg $ fullname_arg $ email_arg) 455 + 456 + let user_remove = 457 + Term.(const (fun username env _xdg _profile -> 458 + let state = River.State.create env ~app_name:"river" in 459 + User.remove state ~username 460 + ) $ username_arg) 461 + 462 + let user_list = 463 + Term.(const (fun env _xdg _profile -> 464 + let state = River.State.create env ~app_name:"river" in 465 + User.list state 466 + )) 467 + 468 + let user_show = 469 + Term.(const (fun username env _xdg _profile -> 470 + let state = River.State.create env ~app_name:"river" in 471 + User.show state ~username 472 + ) $ username_arg) 473 + 474 + let user_add_feed = 475 + Term.(const (fun username name url env _xdg _profile -> 476 + let state = River.State.create env ~app_name:"river" in 477 + User.add_feed state ~username ~name ~url 478 + ) $ username_arg $ feed_name_arg $ feed_url_arg) 479 + 480 + let user_remove_feed = 481 + Term.(const (fun username url env _xdg _profile -> 482 + let state = River.State.create env ~app_name:"river" in 483 + User.remove_feed state ~username ~url 484 + ) $ username_arg $ feed_url_arg) 485 + 486 + let user_cmd = 487 + let doc = "Manage users" in 488 + let info = Cmd.info "user" ~doc in 489 + let user_add_cmd = 490 + Eiocmd.run 491 + ~use_keyeio:false 492 + ~info:(Cmd.info "add" ~doc:"Add a new user") 493 + ~app_name:"river" 494 + ~service:"river" 495 + user_add 496 + in 497 + let user_remove_cmd = 498 + Eiocmd.run 499 + ~use_keyeio:false 500 + ~info:(Cmd.info "remove" ~doc:"Remove a user") 501 + ~app_name:"river" 502 + ~service:"river" 503 + user_remove 504 + in 505 + let user_list_cmd = 506 + Eiocmd.run 507 + ~use_keyeio:false 508 + ~info:(Cmd.info "list" ~doc:"List all users") 509 + ~app_name:"river" 510 + ~service:"river" 511 + user_list 512 + in 513 + let user_show_cmd = 514 + Eiocmd.run 515 + ~use_keyeio:false 516 + ~info:(Cmd.info "show" ~doc:"Show user details") 517 + ~app_name:"river" 518 + ~service:"river" 519 + user_show 520 + in 521 + let user_add_feed_cmd = 522 + Eiocmd.run 523 + ~use_keyeio:false 524 + ~info:(Cmd.info "add-feed" ~doc:"Add a feed to a user") 525 + ~app_name:"river" 526 + ~service:"river" 527 + user_add_feed 528 + in 529 + let user_remove_feed_cmd = 530 + Eiocmd.run 531 + ~use_keyeio:false 532 + ~info:(Cmd.info "remove-feed" ~doc:"Remove a feed from a user") 533 + ~app_name:"river" 534 + ~service:"river" 535 + user_remove_feed 536 + in 537 + Cmd.group info [ 538 + user_add_cmd; 539 + user_remove_cmd; 540 + user_list_cmd; 541 + user_show_cmd; 542 + user_add_feed_cmd; 543 + user_remove_feed_cmd; 544 + ] 545 + 546 + (* Sync command - needs Eio environment for HTTP requests *) 547 + let sync = 548 + let username_opt = 549 + let doc = "Sync specific user (omit to sync all)" in 550 + Arg.(value & pos 0 (some string) None & info [] ~docv:"USERNAME" ~doc) 551 + in 552 + Term.(const (fun username_opt env _xdg _profile -> 553 + let state = River.State.create env ~app_name:"river" in 554 + match username_opt with 555 + | Some username -> Sync.sync_user env state ~username 556 + | None -> Sync.sync_all env state 557 + ) $ username_opt) 558 + 559 + (* List command - doesn't need network, just reads local files *) 560 + let list = 561 + let username_opt_arg = 562 + let doc = "Username (optional - defaults to all users)" in 563 + Arg.(value & pos 0 (some string) None & info [] ~docv:"USERNAME" ~doc) 564 + in 565 + let limit_arg = 566 + let doc = "Limit number of posts to display (default: all)" in 567 + Arg.(value & opt (some int) None & info ["limit"; "n"] ~doc) 568 + in 569 + let metadata_arg = 570 + let doc = "Show all metadata (author, date, summary, content preview, links, tags)" in 571 + Arg.(value & flag & info ["metadata"; "m"] ~doc) 572 + in 573 + Term.(const (fun username_opt limit metadata env _xdg _profile -> 574 + let state = River.State.create env ~app_name:"river" in 575 + Post.list state ~username_opt ~limit ~metadata 576 + ) $ username_opt_arg $ limit_arg $ metadata_arg) 577 + 578 + (* Info command - show detailed post information *) 579 + let info = 580 + let post_id_arg = 581 + let doc = "Post ID (URI)" in 582 + Arg.(required & pos 0 (some string) None & info [] ~docv:"POST_ID" ~doc) 583 + in 584 + let full_arg = 585 + let doc = "Show full content without truncation" in 586 + Arg.(value & flag & info ["full"] ~doc) 587 + in 588 + Term.(const (fun post_id full env _xdg _profile -> 589 + let state = River.State.create env ~app_name:"river" in 590 + Post.info state ~post_id ~verbose:full 591 + ) $ post_id_arg $ full_arg) 592 + 593 + (* Merge command - export merged feed *) 594 + let merge = 595 + let format_arg = 596 + let doc = "Output format: atom or jsonfeed" in 597 + Arg.(value & opt string "atom" & info ["format"; "f"] ~doc) 598 + in 599 + let title_arg = 600 + let doc = "Feed title" in 601 + Arg.(value & opt string "River Merged Feed" & info ["title"; "t"] ~doc) 602 + in 603 + let limit_arg = 604 + let doc = "Maximum number of entries to include (default: all)" in 605 + Arg.(value & opt (some int) None & info ["limit"; "n"] ~doc) 606 + in 607 + Term.(const (fun format title limit env _xdg _profile -> 608 + let state = River.State.create env ~app_name:"river" in 609 + let format_type = match String.lowercase_ascii format with 610 + | "jsonfeed" | "json" -> `Jsonfeed 611 + | _ -> `Atom 612 + in 613 + match River.State.export_merged_feed state ~title ~format:format_type ?limit () with 614 + | Ok output -> 615 + print_endline output; 616 + 0 617 + | Error err -> 618 + Log.err (fun m -> m "Failed to export merged feed: %s" err); 619 + 1 620 + ) $ format_arg $ title_arg $ limit_arg) 621 + 622 + (* Quality command - analyze feed quality *) 623 + let quality = 624 + let username_arg = 625 + let doc = "Username to analyze" in 626 + Arg.(required & pos 0 (some string) None & info [] ~docv:"USERNAME" ~doc) 627 + in 628 + Term.(const (fun username env _xdg _profile -> 629 + let state = River.State.create env ~app_name:"river" in 630 + match River.State.analyze_user_quality state ~username with 631 + | Error err -> 632 + Log.err (fun m -> m "%s" err); 633 + 1 634 + | Ok metrics -> 635 + (* Display quality metrics *) 636 + Fmt.pr "@."; 637 + Fmt.pr "%a@." Fmt.(styled `Bold string) 638 + (String.make 70 '='); 639 + Fmt.pr " %a %s@." Fmt.(styled `Bold (styled (`Fg `Blue) string)) 640 + "User Quality Analysis:" username; 641 + Fmt.pr "%a@.@." Fmt.(styled `Bold string) 642 + (String.make 70 '='); 643 + 644 + (* Overall quality score *) 645 + let score = River.Quality.quality_score metrics in 646 + let score_color = match score with 647 + | s when s >= 80.0 -> `Green 648 + | s when s >= 60.0 -> `Yellow 649 + | s when s >= 40.0 -> `Magenta 650 + | _ -> `Red 651 + in 652 + Fmt.pr "%a %.1f/100.0@.@." 653 + Fmt.(styled (`Fg score_color) (styled `Bold string)) 654 + "Overall Quality Score:" 655 + score; 656 + 657 + (* Entry statistics *) 658 + Fmt.pr "%a@." Fmt.(styled `Cyan string) "Entry Statistics:"; 659 + Fmt.pr " Total entries: %d@." (River.Quality.total_entries metrics); 660 + Fmt.pr "@."; 661 + 662 + (* Completeness metrics *) 663 + Fmt.pr "%a@." Fmt.(styled `Cyan string) "Completeness:"; 664 + let total = River.Quality.total_entries metrics in 665 + let pct entries = 666 + float_of_int entries /. float_of_int total *. 100.0 667 + in 668 + Fmt.pr " Entries with content: %3d/%d (%5.1f%%)@." 669 + (River.Quality.entries_with_content metrics) 670 + total 671 + (pct (River.Quality.entries_with_content metrics)); 672 + Fmt.pr " Entries with dates: %3d/%d (%5.1f%%)@." 673 + (River.Quality.entries_with_date metrics) 674 + total 675 + (pct (River.Quality.entries_with_date metrics)); 676 + Fmt.pr " Entries with authors: %3d/%d (%5.1f%%)@." 677 + (River.Quality.entries_with_author metrics) 678 + total 679 + (pct (River.Quality.entries_with_author metrics)); 680 + Fmt.pr " Entries with summaries:%3d/%d (%5.1f%%)@." 681 + (River.Quality.entries_with_summary metrics) 682 + total 683 + (pct (River.Quality.entries_with_summary metrics)); 684 + Fmt.pr " Entries with tags: %3d/%d (%5.1f%%)@." 685 + (River.Quality.entries_with_tags metrics) 686 + total 687 + (pct (River.Quality.entries_with_tags metrics)); 688 + Fmt.pr "@."; 689 + 690 + (* Content statistics *) 691 + if River.Quality.entries_with_content metrics > 0 then begin 692 + Fmt.pr "%a@." Fmt.(styled `Cyan string) "Content Statistics:"; 693 + Fmt.pr " Average length: %.0f characters@." 694 + (River.Quality.avg_content_length metrics); 695 + Fmt.pr " Min length: %d characters@." 696 + (River.Quality.min_content_length metrics); 697 + Fmt.pr " Max length: %d characters@." 698 + (River.Quality.max_content_length metrics); 699 + Fmt.pr "@." 700 + end; 701 + 702 + (* Posting frequency *) 703 + (match River.Quality.posting_frequency_days metrics with 704 + | Some freq -> 705 + Fmt.pr "%a@." Fmt.(styled `Cyan string) "Posting Frequency:"; 706 + Fmt.pr " Average: %.1f days between posts@." freq; 707 + let posts_per_week = 7.0 /. freq in 708 + Fmt.pr " (~%.1f posts per week)@." posts_per_week; 709 + Fmt.pr "@." 710 + | None -> 711 + Fmt.pr "%a@.@." Fmt.(styled `Faint string) 712 + "Not enough data to calculate posting frequency"); 713 + 714 + Fmt.pr "@."; 715 + 0 716 + ) $ username_arg) 717 + 718 + let main_cmd = 719 + let doc = "River feed management CLI" in 720 + let main_info = Cmd.info "river-cli" ~version:"1.0" ~doc in 721 + let sync_cmd = 722 + Eiocmd.run 723 + ~use_keyeio:false 724 + ~info:(Cmd.info "sync" ~doc:"Sync feeds for users") 725 + ~app_name:"river" 726 + ~service:"river" 727 + sync 728 + in 729 + let list_cmd = 730 + Eiocmd.run 731 + ~use_keyeio:false 732 + ~info:(Cmd.info "list" ~doc:"List recent posts (from all users by default, or specify a user)") 733 + ~app_name:"river" 734 + ~service:"river" 735 + list 736 + in 737 + let info_cmd = 738 + Eiocmd.run 739 + ~use_keyeio:false 740 + ~info:(Cmd.info "info" ~doc:"Show detailed post information") 741 + ~app_name:"river" 742 + ~service:"river" 743 + info 744 + in 745 + let merge_cmd = 746 + Eiocmd.run 747 + ~use_keyeio:false 748 + ~info:(Cmd.info "merge" ~doc:"Export a merged feed combining all users' feeds") 749 + ~app_name:"river" 750 + ~service:"river" 751 + merge 752 + in 753 + let quality_cmd = 754 + Eiocmd.run 755 + ~use_keyeio:false 756 + ~info:(Cmd.info "quality" ~doc:"Analyze feed quality metrics for a user") 757 + ~app_name:"river" 758 + ~service:"river" 759 + quality 760 + in 761 + Cmd.group main_info [user_cmd; sync_cmd; list_cmd; info_cmd; merge_cmd; quality_cmd]
+115
stack/river/cmd/river_cmd.mli
··· 1 + (** River.Cmd - Cmdliner terms for River CLI 2 + 3 + This module provides cmdliner terms that are thin wrappers around 4 + the River library functions. All business logic resides in the 5 + main River module. *) 6 + 7 + (** {1 Cmdliner Terms} 8 + 9 + These terms can be used to build command-line interfaces using 10 + Cmdliner and Eiocmd. They handle argument parsing and call into 11 + the River library functions. *) 12 + 13 + open Cmdliner 14 + 15 + (** {2 User Management Commands} *) 16 + 17 + val user_add : 18 + (Eio_unix.Stdenv.base -> Xdge.t -> Keyeio.Profile.t -> int) Term.t 19 + (** [user_add] command term for adding a new user. 20 + 21 + Reads: username, fullname, email from command-line arguments. 22 + Calls: [River.State.create_user] *) 23 + 24 + val user_remove : 25 + (Eio_unix.Stdenv.base -> Xdge.t -> Keyeio.Profile.t -> int) Term.t 26 + (** [user_remove] command term for removing a user. 27 + 28 + Reads: username from command-line arguments. 29 + Calls: [River.State.delete_user] *) 30 + 31 + val user_list : 32 + (Eio_unix.Stdenv.base -> Xdge.t -> Keyeio.Profile.t -> int) Term.t 33 + (** [user_list] command term for listing all users. 34 + 35 + Calls: [River.State.list_users] *) 36 + 37 + val user_show : 38 + (Eio_unix.Stdenv.base -> Xdge.t -> Keyeio.Profile.t -> int) Term.t 39 + (** [user_show] command term for showing user details. 40 + 41 + Reads: username from command-line arguments. 42 + Calls: [River.State.get_user] *) 43 + 44 + val user_add_feed : 45 + (Eio_unix.Stdenv.base -> Xdge.t -> Keyeio.Profile.t -> int) Term.t 46 + (** [user_add_feed] command term for adding a feed to a user. 47 + 48 + Reads: username, name, url from command-line arguments. 49 + Calls: [River.State.get_user], [River.User.add_feed], [River.State.update_user] *) 50 + 51 + val user_remove_feed : 52 + (Eio_unix.Stdenv.base -> Xdge.t -> Keyeio.Profile.t -> int) Term.t 53 + (** [user_remove_feed] command term for removing a feed from a user. 54 + 55 + Reads: username, url from command-line arguments. 56 + Calls: [River.State.get_user], [River.User.remove_feed], [River.State.update_user] *) 57 + 58 + val user_cmd : int Cmd.t 59 + (** [user_cmd] is the user management command group containing all user subcommands. *) 60 + 61 + (** {2 Feed Sync Commands} *) 62 + 63 + val sync : 64 + (Eio_unix.Stdenv.base -> Xdge.t -> Keyeio.Profile.t -> int) Term.t 65 + (** [sync] command term for syncing feeds. 66 + 67 + Reads: optional username from command-line arguments. 68 + Calls: [River.State.sync_user] or [River.State.sync_all] *) 69 + 70 + (** {2 Post Listing Commands} *) 71 + 72 + val list : 73 + (Eio_unix.Stdenv.base -> Xdge.t -> Keyeio.Profile.t -> int) Term.t 74 + (** [list] command term for listing posts with enhanced formatting. 75 + 76 + Features: 77 + - Pretty-printed output with colors using Fmt 78 + - Clear ID display (never truncated) for each post 79 + - Compact view (default): shows title, ID, author, and date 80 + - Metadata view (--metadata/-m): shows all post metadata including summary, 81 + content preview (truncated), links, and tags 82 + 83 + Reads: optional username, optional limit, --metadata flag from command-line arguments. 84 + Calls: [River.State.get_user_posts] or [River.State.get_all_posts] *) 85 + 86 + val info : 87 + (Eio_unix.Stdenv.base -> Xdge.t -> Keyeio.Profile.t -> int) Term.t 88 + (** [info] command term for showing detailed post information. 89 + 90 + Reads: post ID, --full flag from command-line arguments. 91 + Uses Logs for informational output (controlled by -v/--verbose from Eiocmd). 92 + Calls: [River.State.get_all_posts] *) 93 + 94 + (** {2 Feed Export Commands} *) 95 + 96 + val merge : 97 + (Eio_unix.Stdenv.base -> Xdge.t -> Keyeio.Profile.t -> int) Term.t 98 + (** [merge] command term for exporting merged feed. 99 + 100 + Reads: format (atom|jsonfeed), title, limit from command-line arguments. 101 + Calls: [River.State.export_merged_feed] *) 102 + 103 + (** {2 Quality Analysis Commands} *) 104 + 105 + val quality : 106 + (Eio_unix.Stdenv.base -> Xdge.t -> Keyeio.Profile.t -> int) Term.t 107 + (** [quality] command term for analyzing feed quality. 108 + 109 + Reads: username from command-line arguments. 110 + Calls: [River.State.analyze_user_quality] *) 111 + 112 + (** {2 Main Command} *) 113 + 114 + val main_cmd : int Cmd.t 115 + (** [main_cmd] is the main command group containing all River CLI commands. *)
+6 -10
stack/river/example/aggregate_feeds.ml
··· 1 1 let sources = 2 - River. 3 2 [ 4 - { name = "KC Sivaramakrishnan"; url = "http://kcsrk.info/atom-ocaml.xml" }; 5 - { 6 - name = "Amir Chaudhry"; 7 - url = "http://amirchaudhry.com/tags/ocamllabs-atom.xml"; 8 - }; 3 + River.Source.make ~name:"KC Sivaramakrishnan" ~url:"http://kcsrk.info/atom-ocaml.xml"; 4 + River.Source.make ~name:"Amir Chaudhry" ~url:"http://amirchaudhry.com/tags/ocamllabs-atom.xml"; 9 5 ] 10 6 11 7 let main env = 12 8 (* Use River.with_session for proper resource management *) 13 - River.with_session env @@ fun session -> 14 - let feeds = List.map (River.fetch session) sources in 15 - let posts = River.posts feeds in 16 - let entries = River.create_atom_entries posts in 9 + River.Session.with_session env @@ fun session -> 10 + let feeds = List.map (River.Feed.fetch session) sources in 11 + let posts = River.Post.of_feeds feeds in 12 + let entries = River.Format.Atom.entries_of_posts posts in 17 13 let feed = 18 14 let authors = [ Syndic.Atom.author "OCaml Blog" ] in 19 15 let id = Uri.of_string "https://ocaml.org/atom.xml" in
-45
stack/river/lib/client.ml
··· 1 - (* 2 - * Copyright (c) 2014, OCaml.org project 3 - * Copyright (c) 2015 KC Sivaramakrishnan <sk826@cl.cam.ac.uk> 4 - * 5 - * Permission to use, copy, modify, and distribute this software for any 6 - * purpose with or without fee is hereby granted, provided that the above 7 - * copyright notice and this permission notice appear in all copies. 8 - * 9 - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 - *) 17 - 18 - (* River HTTP client using Requests *) 19 - 20 - let src = Logs.Src.create "river.client" ~doc:"River HTTP client" 21 - module Log = (val Logs.src_log src : Logs.LOG) 22 - 23 - type t = { 24 - session : Requests.t; 25 - } 26 - 27 - let create ~sw (env : _ ) = 28 - Log.info (fun m -> m "Creating River client"); 29 - let session = Requests.create ~sw 30 - ~default_headers:(Requests.Headers.of_list [ 31 - ("User-Agent", "OCaml-River/1.0"); 32 - ]) 33 - ~follow_redirects:true 34 - ~max_redirects:5 35 - ~verify_tls:true 36 - env 37 - in 38 - { session } 39 - 40 - let with_client (env : _) f = 41 - Eio.Switch.run @@ fun sw -> 42 - let client = create ~sw env in 43 - f client 44 - 45 - let session t = t.session
-57
stack/river/lib/client.mli
··· 1 - (* 2 - * Copyright (c) 2014, OCaml.org project 3 - * Copyright (c) 2015 KC Sivaramakrishnan <sk826@cl.cam.ac.uk> 4 - * 5 - * Permission to use, copy, modify, and distribute this software for any 6 - * purpose with or without fee is hereby granted, provided that the above 7 - * copyright notice and this permission notice appear in all copies. 8 - * 9 - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 - *) 17 - 18 - (** River HTTP client using Requests library. 19 - 20 - This module provides a session-based HTTP client for fetching RSS/Atom feeds. 21 - The client manages a Requests session with appropriate defaults for feed fetching. *) 22 - 23 - (** The type of a River HTTP client *) 24 - type t 25 - 26 - (** [create ~sw env] creates a new River client with a Requests session. 27 - 28 - The session is configured with: 29 - - User-Agent: "OCaml-River/1.0" 30 - - Automatic redirect following (max 5 redirects) 31 - - TLS verification enabled 32 - 33 - @param sw The switch for resource management 34 - @param env The Eio environment providing network and time resources *) 35 - val create : 36 - sw:Eio.Switch.t -> 37 - < clock : float Eio.Time.clock_ty Eio.Resource.t; 38 - fs : Eio.Fs.dir_ty Eio.Path.t; 39 - net : [ `Generic | `Unix ] Eio.Net.ty Eio.Resource.t; .. > -> 40 - t 41 - 42 - (** [with_client env f] creates a client and automatically manages its lifecycle. 43 - 44 - This is the recommended way to use the client as it ensures proper cleanup. 45 - 46 - @param env The Eio environment 47 - @param f The function to run with the client *) 48 - val with_client : 49 - < clock : float Eio.Time.clock_ty Eio.Resource.t; 50 - fs : Eio.Fs.dir_ty Eio.Path.t; 51 - net : [ `Generic | `Unix ] Eio.Net.ty Eio.Resource.t; .. > -> 52 - (t -> 'a) -> 'a 53 - 54 - (** [session t] returns the underlying Requests session. 55 - 56 - This is used internally by River's HTTP functions. *) 57 - val session : t -> Requests.t
+1 -2
stack/river/lib/dune
··· 1 1 (library 2 2 (name river) 3 3 (public_name river) 4 - (wrapped false) 5 - (libraries eio eio_main requests requests_json_api logs str syndic lambdasoup uri ptime jsonfeed jsont jsont.bytesrw cacheio xdge)) 4 + (libraries eio eio_main requests requests_json_api logs str syndic lambdasoup uri ptime jsonfeed jsont jsont.bytesrw xdge cmdliner eiocmd fmt))
-106
stack/river/lib/feed.ml
··· 1 - (* 2 - * Copyright (c) 2014, OCaml.org project 3 - * Copyright (c) 2015 KC Sivaramakrishnan <sk826@cl.cam.ac.uk> 4 - * 5 - * Permission to use, copy, modify, and distribute this software for any 6 - * purpose with or without fee is hereby granted, provided that the above 7 - * copyright notice and this permission notice appear in all copies. 8 - * 9 - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 - *) 17 - 18 - let src = Logs.Src.create "river.feed" ~doc:"River feed parsing" 19 - module Log = (val Logs.src_log src : Logs.LOG) 20 - 21 - type source = { name : string; url : string } 22 - type content = Atom of Syndic.Atom.feed | Rss2 of Syndic.Rss2.channel | Json of Jsonfeed.t 23 - 24 - let string_of_feed = function Atom _ -> "Atom" | Rss2 _ -> "Rss2" | Json _ -> "JSONFeed" 25 - 26 - type t = { name : string; title : string; url : string; content : content } 27 - 28 - let classify_feed ~xmlbase (body : string) = 29 - Log.debug (fun m -> m "Attempting to parse feed (%d bytes)" (String.length body)); 30 - 31 - (* Quick check - does it look like JSON? *) 32 - let looks_like_json = 33 - String.length body > 0 && 34 - let first_char = String.get body 0 in 35 - first_char = '{' || first_char = '[' 36 - in 37 - 38 - if looks_like_json then ( 39 - (* Try JSONFeed first *) 40 - Log.debug (fun m -> m "Body looks like JSON, trying JSONFeed parser"); 41 - match Jsonfeed.of_string body with 42 - | Ok jsonfeed -> 43 - Log.debug (fun m -> m "Successfully parsed as JSONFeed"); 44 - Json jsonfeed 45 - | Error err -> 46 - Log.debug (fun m -> m "Not a JSONFeed: %s" (Jsont.Error.to_string err)); 47 - (* Fall through to XML parsing *) 48 - failwith "Not a valid JSONFeed" 49 - ) else ( 50 - (* Try XML formats *) 51 - try 52 - let feed = Atom (Syndic.Atom.parse ~xmlbase (Xmlm.make_input (`String (0, body)))) in 53 - Log.debug (fun m -> m "Successfully parsed as Atom feed"); 54 - feed 55 - with 56 - | Syndic.Atom.Error.Error (pos, msg) -> ( 57 - Log.debug (fun m -> m "Not an Atom feed: %s at position (%d, %d)" 58 - msg (fst pos) (snd pos)); 59 - try 60 - let feed = Rss2 (Syndic.Rss2.parse ~xmlbase (Xmlm.make_input (`String (0, body)))) in 61 - Log.debug (fun m -> m "Successfully parsed as RSS2 feed"); 62 - feed 63 - with Syndic.Rss2.Error.Error (pos, msg) -> 64 - Log.err (fun m -> m "Failed to parse as RSS2: %s at position (%d, %d)" 65 - msg (fst pos) (snd pos)); 66 - failwith "Neither Atom nor RSS2 feed") 67 - | Not_found as e -> 68 - Log.err (fun m -> m "Not_found exception during Atom feed parsing"); 69 - Log.err (fun m -> m "Backtrace:\n%s" (Printexc.get_backtrace ())); 70 - raise e 71 - | e -> 72 - Log.err (fun m -> m "Unexpected exception during feed parsing: %s" 73 - (Printexc.to_string e)); 74 - Log.err (fun m -> m "Backtrace:\n%s" (Printexc.get_backtrace ())); 75 - raise e 76 - ) 77 - 78 - let fetch client (source : source) = 79 - Log.info (fun m -> m "Fetching feed '%s' from %s" source.name source.url); 80 - 81 - let xmlbase = Uri.of_string @@ source.url in 82 - 83 - (* Use Requests_json_api.get_result for clean Result-based error handling *) 84 - let session = Client.session client in 85 - let response = 86 - match Requests_json_api.get_result session source.url with 87 - | Ok body -> 88 - Log.info (fun m -> m "Successfully fetched %s (%d bytes)" source.url (String.length body)); 89 - body 90 - | Error (status, msg) -> 91 - Log.err (fun m -> m "Failed to fetch feed '%s': HTTP %d - %s" source.name status msg); 92 - failwith (Printf.sprintf "HTTP %d: %s" status msg) 93 - in 94 - 95 - let content = classify_feed ~xmlbase response in 96 - let title = 97 - match content with 98 - | Atom atom -> Util.string_of_text_construct atom.Syndic.Atom.title 99 - | Rss2 ch -> ch.Syndic.Rss2.title 100 - | Json jsonfeed -> Jsonfeed.title jsonfeed 101 - in 102 - 103 - Log.info (fun m -> m "Successfully fetched %s feed '%s' (title: '%s')" 104 - (string_of_feed content) source.name title); 105 - 106 - { name = source.name; title; content; url = source.url }
-309
stack/river/lib/markdown_converter.ml
··· 1 - (** HTML to Markdown converter using Lambda Soup *) 2 - 3 - (** Extract all links from HTML content *) 4 - let extract_links html_str = 5 - try 6 - let soup = Soup.parse html_str in 7 - let links = Soup.select "a[href]" soup in 8 - Soup.fold (fun acc link -> 9 - match Soup.attribute "href" link with 10 - | Some href -> 11 - let text = Soup.texts link |> String.concat "" |> String.trim in 12 - (href, text) :: acc 13 - | None -> acc 14 - ) [] links 15 - |> List.rev 16 - with _ -> [] 17 - 18 - (** Check if string contains any whitespace *) 19 - let has_whitespace s = 20 - try 21 - let _ = Str.search_forward (Str.regexp "[ \t\n\r]") s 0 in 22 - true 23 - with Not_found -> false 24 - 25 - (** Clean up excessive newlines and normalize spacing *) 26 - let cleanup_markdown s = 27 - (* Normalize line endings *) 28 - let s = Str.global_replace (Str.regexp "\r\n") "\n" s in 29 - 30 - (* Remove trailing whitespace from each line *) 31 - let lines = String.split_on_char '\n' s in 32 - let lines = List.map (fun line -> 33 - (* Trim trailing spaces but preserve leading spaces for indentation *) 34 - let len = String.length line in 35 - let rec find_last_non_space i = 36 - if i < 0 then -1 37 - else if line.[i] = ' ' || line.[i] = '\t' then find_last_non_space (i - 1) 38 - else i 39 - in 40 - let last = find_last_non_space (len - 1) in 41 - if last < 0 then "" 42 - else String.sub line 0 (last + 1) 43 - ) lines in 44 - 45 - (* Join back and collapse excessive blank lines *) 46 - let s = String.concat "\n" lines in 47 - 48 - (* Replace 3+ consecutive newlines with just 2 *) 49 - let s = Str.global_replace (Str.regexp "\n\n\n+") "\n\n" s in 50 - 51 - (* Trim leading and trailing whitespace *) 52 - String.trim s 53 - 54 - (** Convert HTML to Markdown using state-based whitespace handling *) 55 - let html_to_markdown html_str = 56 - try 57 - let soup = Soup.parse html_str in 58 - let buffer = Buffer.create 256 in 59 - 60 - (* State: track if we need to insert a space before next text *) 61 - let need_space = ref false in 62 - 63 - (* Get last character in buffer, if any *) 64 - let last_char () = 65 - let len = Buffer.length buffer in 66 - if len = 0 then None 67 - else Some (Buffer.nth buffer (len - 1)) 68 - in 69 - 70 - (* Add text with proper spacing *) 71 - let add_text text = 72 - let trimmed = String.trim text in 73 - if trimmed <> "" then begin 74 - (* Check if text starts with punctuation that shouldn't have space before it *) 75 - let starts_with_punctuation = 76 - String.length trimmed > 0 && 77 - (match trimmed.[0] with 78 - | ',' | '.' | ';' | ':' | '!' | '?' | ')' | ']' | '}' -> true 79 - | _ -> false) 80 - in 81 - 82 - (* Add space if needed, unless we're before punctuation *) 83 - if !need_space && not starts_with_punctuation then begin 84 - match last_char () with 85 - | Some (' ' | '\n') -> () 86 - | _ -> Buffer.add_char buffer ' ' 87 - end; 88 - Buffer.add_string buffer trimmed; 89 - need_space := false 90 - end 91 - in 92 - 93 - (* Mark that we need space before next text (for inline elements) *) 94 - let mark_space_needed () = 95 - need_space := has_whitespace (Buffer.contents buffer) || Buffer.length buffer > 0 96 - in 97 - 98 - (* Process header with ID/anchor handling *) 99 - let process_header level elem = 100 - need_space := false; 101 - 102 - (* Check if header contains a link with an ID fragment *) 103 - let link_opt = Soup.select_one "a[href]" elem in 104 - let anchor_id = match link_opt with 105 - | Some link -> 106 - (match Soup.attribute "href" link with 107 - | Some href -> 108 - (* Extract fragment from URL *) 109 - let uri = Uri.of_string href in 110 - Uri.fragment uri 111 - | None -> None) 112 - | None -> None 113 - in 114 - 115 - (* Add anchor if we found an ID *) 116 - (match anchor_id with 117 - | Some id when id <> "" -> 118 - Buffer.add_string buffer (Printf.sprintf "\n<a name=\"%s\"></a>\n" id) 119 - | _ -> ()); 120 - 121 - (* Add the header marker *) 122 - let marker = String.make level '#' in 123 - Buffer.add_string buffer ("\n" ^ marker ^ " "); 124 - 125 - (* Get text content, excluding link tags *) 126 - let text = Soup.texts elem |> String.concat " " |> String.trim in 127 - Buffer.add_string buffer text; 128 - 129 - Buffer.add_string buffer "\n\n"; 130 - need_space := false 131 - in 132 - 133 - let rec process_node node = 134 - match Soup.element node with 135 - | Some elem -> 136 - let tag = Soup.name elem in 137 - (match tag with 138 - (* Block elements - reset space tracking *) 139 - | "h1" -> process_header 1 elem 140 - | "h2" -> process_header 2 elem 141 - | "h3" -> process_header 3 elem 142 - | "h4" -> process_header 4 elem 143 - | "h5" -> process_header 5 elem 144 - | "h6" -> process_header 6 elem 145 - | "p" -> 146 - need_space := false; 147 - Soup.children elem |> Soup.iter process_node; 148 - Buffer.add_string buffer "\n\n"; 149 - need_space := false 150 - | "br" -> 151 - Buffer.add_string buffer "\n"; 152 - need_space := false 153 - (* Inline elements - preserve space tracking *) 154 - | "strong" | "b" -> 155 - (* Add space before if needed *) 156 - if !need_space then begin 157 - match last_char () with 158 - | Some (' ' | '\n') -> () 159 - | _ -> Buffer.add_char buffer ' ' 160 - end; 161 - Buffer.add_string buffer "**"; 162 - need_space := false; 163 - Soup.children elem |> Soup.iter process_node; 164 - Buffer.add_string buffer "**"; 165 - mark_space_needed () 166 - | "em" | "i" -> 167 - (* Add space before if needed *) 168 - if !need_space then begin 169 - match last_char () with 170 - | Some (' ' | '\n') -> () 171 - | _ -> Buffer.add_char buffer ' ' 172 - end; 173 - Buffer.add_string buffer "*"; 174 - need_space := false; 175 - Soup.children elem |> Soup.iter process_node; 176 - Buffer.add_string buffer "*"; 177 - mark_space_needed () 178 - | "code" -> 179 - (* Add space before if needed *) 180 - if !need_space then begin 181 - match last_char () with 182 - | Some (' ' | '\n') -> () 183 - | _ -> Buffer.add_char buffer ' ' 184 - end; 185 - Buffer.add_string buffer "`"; 186 - need_space := false; 187 - Soup.children elem |> Soup.iter process_node; 188 - Buffer.add_string buffer "`"; 189 - mark_space_needed () 190 - | "pre" -> 191 - need_space := false; 192 - Buffer.add_string buffer "\n```\n"; 193 - Soup.children elem |> Soup.iter process_node; 194 - Buffer.add_string buffer "\n```\n\n"; 195 - need_space := false 196 - | "a" -> 197 - let text = Soup.texts elem |> String.concat " " |> String.trim in 198 - let href = Soup.attribute "href" elem in 199 - (match href with 200 - | Some href -> 201 - (* Add space before link if needed *) 202 - if !need_space then begin 203 - match last_char () with 204 - | Some (' ' | '\n') -> () 205 - | _ -> Buffer.add_char buffer ' ' 206 - end; 207 - need_space := false; 208 - 209 - (* Add the link markdown *) 210 - if text = "" then 211 - Buffer.add_string buffer (Printf.sprintf "<%s>" href) 212 - else 213 - Buffer.add_string buffer (Printf.sprintf "[%s](%s)" text href); 214 - 215 - (* Mark that space may be needed after link *) 216 - mark_space_needed () 217 - | None -> 218 - add_text text) 219 - | "ul" | "ol" -> 220 - need_space := false; 221 - Buffer.add_string buffer "\n"; 222 - let is_ordered = tag = "ol" in 223 - let items = Soup.children elem |> Soup.to_list in 224 - List.iteri (fun i item -> 225 - match Soup.element item with 226 - | Some li when Soup.name li = "li" -> 227 - need_space := false; 228 - if is_ordered then 229 - Buffer.add_string buffer (Printf.sprintf "%d. " (i + 1)) 230 - else 231 - Buffer.add_string buffer "- "; 232 - Soup.children li |> Soup.iter process_node; 233 - Buffer.add_string buffer "\n" 234 - | _ -> () 235 - ) items; 236 - Buffer.add_string buffer "\n"; 237 - need_space := false 238 - | "blockquote" -> 239 - need_space := false; 240 - Buffer.add_string buffer "\n> "; 241 - Soup.children elem |> Soup.iter process_node; 242 - Buffer.add_string buffer "\n\n"; 243 - need_space := false 244 - | "img" -> 245 - (* Add space before if needed *) 246 - if !need_space then begin 247 - match last_char () with 248 - | Some (' ' | '\n') -> () 249 - | _ -> Buffer.add_char buffer ' ' 250 - end; 251 - let alt = Soup.attribute "alt" elem |> Option.value ~default:"" in 252 - let src = Soup.attribute "src" elem |> Option.value ~default:"" in 253 - Buffer.add_string buffer (Printf.sprintf "![%s](%s)" alt src); 254 - need_space := false; 255 - mark_space_needed () 256 - | "hr" -> 257 - need_space := false; 258 - Buffer.add_string buffer "\n---\n\n"; 259 - need_space := false 260 - (* Strip these tags but keep content *) 261 - | "div" | "span" | "article" | "section" | "header" | "footer" 262 - | "main" | "nav" | "aside" | "figure" | "figcaption" | "details" | "summary" -> 263 - Soup.children elem |> Soup.iter process_node 264 - (* Ignore script, style, etc *) 265 - | "script" | "style" | "noscript" -> () 266 - (* Default: just process children *) 267 - | _ -> 268 - Soup.children elem |> Soup.iter process_node) 269 - | None -> 270 - (* Text node - handle whitespace properly *) 271 - match Soup.leaf_text node with 272 - | Some text -> 273 - (* If text is only whitespace, mark that we need space *) 274 - let trimmed = String.trim text in 275 - if trimmed = "" then begin 276 - if has_whitespace text then 277 - need_space := true 278 - end else begin 279 - (* Text has content - check if it had leading/trailing whitespace *) 280 - let had_leading_ws = has_whitespace text && 281 - (String.length text > 0 && 282 - (text.[0] = ' ' || text.[0] = '\t' || text.[0] = '\n' || text.[0] = '\r')) in 283 - 284 - (* If had leading whitespace, mark we need space *) 285 - if had_leading_ws then need_space := true; 286 - 287 - (* Add the text content *) 288 - add_text trimmed; 289 - 290 - (* If had trailing whitespace, mark we need space for next *) 291 - let had_trailing_ws = has_whitespace text && 292 - (String.length text > 0 && 293 - let last = text.[String.length text - 1] in 294 - last = ' ' || last = '\t' || last = '\n' || last = '\r') in 295 - if had_trailing_ws then need_space := true 296 - end 297 - | None -> () 298 - in 299 - 300 - Soup.children soup |> Soup.iter process_node; 301 - 302 - (* Clean up the result *) 303 - let result = Buffer.contents buffer in 304 - cleanup_markdown result 305 - with _ -> html_str 306 - 307 - (** Convert HTML content to clean Markdown *) 308 - let to_markdown html_str = 309 - html_to_markdown html_str
-7
stack/river/lib/markdown_converter.mli
··· 1 - (** HTML to Markdown converter *) 2 - 3 - (** Extract all links from HTML content as (href, anchor_text) pairs *) 4 - val extract_links : string -> (string * string) list 5 - 6 - (** Convert HTML content to clean Markdown format *) 7 - val to_markdown : string -> string
-80
stack/river/lib/meta.ml
··· 1 - (* 2 - * Copyright (c) 2014, OCaml.org project 3 - * Copyright (c) 2015 KC Sivaramakrishnan <sk826@cl.cam.ac.uk> 4 - * 5 - * Permission to use, copy, modify, and distribute this software for any 6 - * purpose with or without fee is hereby granted, provided that the above 7 - * copyright notice and this permission notice appear in all copies. 8 - * 9 - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 - *) 17 - 18 - (** This module determines an image to be used as preview of a website. 19 - 20 - It does this by following the same logic Google+ and other websites use, and 21 - described in this article: 22 - https://www.raymondcamden.com/2011/07/26/How-are-Facebook-and-Google-creating-link-previews *) 23 - 24 - let og_image html = 25 - let open Soup in 26 - let soup = parse html in 27 - try soup $ "meta[property=og:image]" |> R.attribute "content" |> Option.some 28 - with Failure _ -> None 29 - 30 - let image_src html = 31 - let open Soup in 32 - let soup = parse html in 33 - try soup $ "link[rel=\"image_src\"]" |> R.attribute "href" |> Option.some 34 - with Failure _ -> None 35 - 36 - let twitter_image html = 37 - let open Soup in 38 - let soup = parse html in 39 - try 40 - soup $ "meta[name=\"twitter:image\"]" |> R.attribute "content" 41 - |> Option.some 42 - with Failure _ -> None 43 - 44 - let og_description html = 45 - let open Soup in 46 - let soup = parse html in 47 - try 48 - soup $ "meta[property=og:description]" |> R.attribute "content" 49 - |> Option.some 50 - with Failure _ -> None 51 - 52 - let description html = 53 - let open Soup in 54 - let soup = parse html in 55 - try 56 - soup $ "meta[property=description]" |> R.attribute "content" |> Option.some 57 - with Failure _ -> None 58 - 59 - let preview_image html = 60 - let preview_image = 61 - match og_image html with 62 - | None -> ( 63 - match image_src html with 64 - | None -> twitter_image html 65 - | Some x -> Some x) 66 - | Some x -> Some x 67 - in 68 - match Option.map String.trim preview_image with 69 - | Some "" -> None 70 - | Some x -> Some x 71 - | None -> None 72 - 73 - let description html = 74 - let preview_image = 75 - match og_description html with None -> description html | Some x -> Some x 76 - in 77 - match Option.map String.trim preview_image with 78 - | Some "" -> None 79 - | Some x -> Some x 80 - | None -> None
-449
stack/river/lib/post.ml
··· 1 - (* 2 - * Copyright (c) 2014, OCaml.org project 3 - * Copyright (c) 2015 KC Sivaramakrishnan <sk826@cl.cam.ac.uk> 4 - * 5 - * Permission to use, copy, modify, and distribute this software for any 6 - * purpose with or without fee is hereby granted, provided that the above 7 - * copyright notice and this permission notice appear in all copies. 8 - * 9 - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 - *) 17 - 18 - let src = Logs.Src.create "river.post" ~doc:"River post processing" 19 - module Log = (val Logs.src_log src : Logs.LOG) 20 - 21 - type t = { 22 - id : string; 23 - title : string; 24 - link : Uri.t option; 25 - date : Syndic.Date.t option; 26 - feed : Feed.t; 27 - author : string; 28 - email : string; 29 - content : Soup.soup Soup.node; 30 - mutable link_response : (string, string) result option; 31 - tags : string list; 32 - summary : string option; 33 - } 34 - 35 - (** Generate a stable, unique ID from available data *) 36 - let generate_id ?guid ?link ?title ?date ~feed_url () = 37 - match guid with 38 - | Some id when id <> "" -> 39 - (* Use explicit ID/GUID if available *) 40 - id 41 - | _ -> 42 - match link with 43 - | Some uri when Uri.to_string uri <> "" -> 44 - (* Use permalink as ID (stable and unique) *) 45 - Uri.to_string uri 46 - | _ -> 47 - (* Fallback: hash of feed_url + title + date *) 48 - let title_str = Option.value title ~default:"" in 49 - let date_str = 50 - match date with 51 - | Some d -> Ptime.to_rfc3339 d 52 - | None -> "" 53 - in 54 - let composite = Printf.sprintf "%s|%s|%s" feed_url title_str date_str in 55 - (* Use SHA256 for stable hashing *) 56 - Digest.string composite |> Digest.to_hex 57 - 58 - let post_id post = post.id 59 - 60 - let resolve_links_attr ~xmlbase attr el = 61 - Soup.R.attribute attr el 62 - |> Uri.of_string 63 - |> Syndic.XML.resolve ~xmlbase 64 - |> Uri.to_string 65 - |> fun value -> Soup.set_attribute attr value el 66 - 67 - (* Things that posts should not contain *) 68 - let undesired_tags = [ "style"; "script" ] 69 - let undesired_attr = [ "id" ] 70 - 71 - let html_of_text ?xmlbase s = 72 - let soup = Soup.parse s in 73 - let ($$) = Soup.($$) in 74 - soup $$ "a[href]" |> Soup.iter (resolve_links_attr ~xmlbase "href"); 75 - soup $$ "img[src]" |> Soup.iter (resolve_links_attr ~xmlbase "src"); 76 - undesired_tags |> List.iter (fun tag -> soup $$ tag |> Soup.iter Soup.delete); 77 - soup $$ "*" |> Soup.iter (fun el -> 78 - undesired_attr |> List.iter (fun attr -> Soup.delete_attribute attr el)); 79 - soup 80 - 81 - (* Do not trust sites using XML for HTML content. Convert to string and parse 82 - back. (Does not always fix bad HTML unfortunately.) *) 83 - let html_of_syndic = 84 - let ns_prefix _ = Some "" in 85 - fun ?xmlbase h -> 86 - html_of_text ?xmlbase 87 - (String.concat "" (List.map (Syndic.XML.to_string ~ns_prefix) h)) 88 - 89 - let string_of_option = function None -> "" | Some s -> s 90 - 91 - (* Email on the forge contain the name in parenthesis *) 92 - let forge_name_re = Str.regexp ".*(\\([^()]*\\))" 93 - 94 - let post_compare p1 p2 = 95 - (* Most recent posts first. Posts with no date are always last *) 96 - match (p1.date, p2.date) with 97 - | Some d1, Some d2 -> Syndic.Date.compare d2 d1 98 - | None, Some _ -> 1 99 - | Some _, None -> -1 100 - | None, None -> 1 101 - 102 - let rec remove n l = 103 - if n <= 0 then l else match l with [] -> [] | _ :: tl -> remove (n - 1) tl 104 - 105 - let rec take n = function 106 - | [] -> [] 107 - | e :: tl -> if n > 0 then e :: take (n - 1) tl else [] 108 - 109 - (* Blog feed 110 - ***********************************************************************) 111 - 112 - let post_of_atom ~(feed : Feed.t) (e : Syndic.Atom.entry) = 113 - Log.debug (fun m -> m "Processing Atom entry: %s" 114 - (Util.string_of_text_construct e.title)); 115 - 116 - let link = 117 - try 118 - Some 119 - (List.find (fun l -> l.Syndic.Atom.rel = Syndic.Atom.Alternate) e.links) 120 - .href 121 - with Not_found -> ( 122 - Log.debug (fun m -> m "No alternate link found, trying fallback"); 123 - match e.links with 124 - | l :: _ -> Some l.href 125 - | [] -> ( 126 - match Uri.scheme e.id with 127 - | Some "http" -> Some e.id 128 - | Some "https" -> Some e.id 129 - | _ -> None)) 130 - in 131 - let date = 132 - match e.published with Some _ -> e.published | None -> Some e.updated 133 - in 134 - let content = 135 - match e.content with 136 - | Some (Text s) -> html_of_text s 137 - | Some (Html (xmlbase, s)) -> html_of_text ?xmlbase s 138 - | Some (Xhtml (xmlbase, h)) -> html_of_syndic ?xmlbase h 139 - | Some (Mime _) | Some (Src _) | None -> ( 140 - match e.summary with 141 - | Some (Text s) -> html_of_text s 142 - | Some (Html (xmlbase, s)) -> html_of_text ?xmlbase s 143 - | Some (Xhtml (xmlbase, h)) -> html_of_syndic ?xmlbase h 144 - | None -> Soup.parse "") 145 - in 146 - let is_valid_author_name name = 147 - (* Filter out empty strings and placeholder values like "Unknown" *) 148 - let trimmed = String.trim name in 149 - trimmed <> "" && trimmed <> "Unknown" 150 - in 151 - let author_name = 152 - (* Fallback chain for author: 153 - 1. Entry author (if present, not empty, and not "Unknown") 154 - 2. Feed-level author (from Atom feed metadata) 155 - 3. Feed title (from Atom feed metadata) 156 - 4. Source name (manually entered feed name) *) 157 - try 158 - let author, _ = e.authors in 159 - let trimmed = String.trim author.name in 160 - if is_valid_author_name author.name then trimmed 161 - else raise Not_found (* Try feed-level author *) 162 - with Not_found -> ( 163 - match feed.content with 164 - | Feed.Atom atom_feed -> ( 165 - (* Try feed-level authors *) 166 - match atom_feed.Syndic.Atom.authors with 167 - | author :: _ when is_valid_author_name author.name -> 168 - String.trim author.name 169 - | _ -> 170 - (* Use feed title *) 171 - Util.string_of_text_construct atom_feed.Syndic.Atom.title) 172 - | Feed.Rss2 _ | Feed.Json _ -> 173 - (* For RSS2 and JSONFeed, use the feed name which is the source name *) 174 - feed.name) 175 - in 176 - (* Extract tags from Atom categories *) 177 - let tags = 178 - List.map (fun cat -> cat.Syndic.Atom.term) e.categories 179 - in 180 - (* Extract summary - convert from text_construct to string *) 181 - let summary = 182 - match e.summary with 183 - | Some s -> Some (Util.string_of_text_construct s) 184 - | None -> None 185 - in 186 - (* Generate unique ID *) 187 - let guid = Uri.to_string e.id in 188 - let title_str = Util.string_of_text_construct e.title in 189 - let id = 190 - generate_id ~guid ?link ~title:title_str ?date ~feed_url:feed.url () 191 - in 192 - { 193 - id; 194 - title = title_str; 195 - link; 196 - date; 197 - feed; 198 - author = author_name; 199 - email = ""; 200 - content; 201 - link_response = None; 202 - tags; 203 - summary; 204 - } 205 - 206 - let post_of_rss2 ~(feed : Feed.t) it = 207 - let title, content = 208 - match it.Syndic.Rss2.story with 209 - | All (t, xmlbase, d) -> ( 210 - ( t, 211 - match it.content with 212 - | _, "" -> html_of_text ?xmlbase d 213 - | xmlbase, c -> html_of_text ?xmlbase c )) 214 - | Title t -> 215 - let xmlbase, c = it.content in 216 - (t, html_of_text ?xmlbase c) 217 - | Description (xmlbase, d) -> ( 218 - ( "", 219 - match it.content with 220 - | _, "" -> html_of_text ?xmlbase d 221 - | xmlbase, c -> html_of_text ?xmlbase c )) 222 - in 223 - (* Note: it.link is of type Uri.t option in Syndic *) 224 - let link = 225 - match (it.guid, it.link) with 226 - | Some u, _ when u.permalink -> Some u.data 227 - | _, Some _ -> it.link 228 - | Some u, _ -> 229 - (* Sometimes the guid is indicated with isPermaLink="false" but is 230 - nonetheless the only URL we get (e.g. ocamlpro). *) 231 - Some u.data 232 - | None, None -> None 233 - in 234 - (* Extract GUID string for ID generation *) 235 - let guid_str = 236 - match it.guid with 237 - | Some u -> Some (Uri.to_string u.data) 238 - | None -> None 239 - in 240 - (* RSS2 doesn't have a categories field exposed, use empty list *) 241 - let tags = [] in 242 - (* RSS2 doesn't have a separate summary field, so leave it empty *) 243 - let summary = None in 244 - (* Generate unique ID *) 245 - let id = 246 - generate_id ?guid:guid_str ?link ~title ?date:it.pubDate ~feed_url:feed.url () 247 - in 248 - { 249 - id; 250 - title; 251 - link; 252 - feed; 253 - author = feed.name; 254 - email = string_of_option it.author; 255 - content; 256 - date = it.pubDate; 257 - link_response = None; 258 - tags; 259 - summary; 260 - } 261 - 262 - let post_of_jsonfeed_item ~(feed : Feed.t) (item : Jsonfeed.Item.t) = 263 - Log.debug (fun m -> m "Processing JSONFeed item: %s" 264 - (Option.value (Jsonfeed.Item.title item) ~default:"Untitled")); 265 - 266 - (* Extract content - prefer HTML, fall back to text *) 267 - let content = 268 - match Jsonfeed.Item.content item with 269 - | `Html html -> html_of_text html 270 - | `Text text -> html_of_text text 271 - | `Both (html, _text) -> html_of_text html 272 - in 273 - 274 - (* Extract author - use first author if multiple *) 275 - let author_name, author_email = 276 - match Jsonfeed.Item.authors item with 277 - | Some (first :: _) -> 278 - let name = Jsonfeed.Author.name first |> Option.value ~default:"" in 279 - (* JSONFeed authors don't typically have email *) 280 - (name, "") 281 - | _ -> 282 - (* Fall back to feed-level authors or feed title *) 283 - (match feed.content with 284 - | Feed.Json jsonfeed -> 285 - (match Jsonfeed.authors jsonfeed with 286 - | Some (first :: _) -> 287 - let name = Jsonfeed.Author.name first |> Option.value ~default:feed.title in 288 - (name, "") 289 - | _ -> (feed.title, "")) 290 - | _ -> (feed.title, "")) 291 - in 292 - 293 - (* Link - use url field *) 294 - let link = 295 - Jsonfeed.Item.url item 296 - |> Option.map Uri.of_string 297 - in 298 - 299 - (* Date *) 300 - let date = Jsonfeed.Item.date_published item in 301 - 302 - (* Summary *) 303 - let summary = Jsonfeed.Item.summary item in 304 - 305 - (* Tags *) 306 - let tags = 307 - Jsonfeed.Item.tags item 308 - |> Option.value ~default:[] 309 - in 310 - 311 - (* Generate unique ID - JSONFeed items always have an id field (required) *) 312 - let guid = Jsonfeed.Item.id item in 313 - let title_str = Jsonfeed.Item.title item |> Option.value ~default:"Untitled" in 314 - let id = 315 - generate_id ~guid ?link ~title:title_str ?date ~feed_url:feed.url () 316 - in 317 - 318 - { 319 - id; 320 - title = title_str; 321 - link; 322 - date; 323 - feed; 324 - author = author_name; 325 - email = author_email; 326 - content; 327 - link_response = None; 328 - tags; 329 - summary; 330 - } 331 - 332 - let posts_of_feed c = 333 - match c.Feed.content with 334 - | Feed.Atom f -> 335 - let posts = List.map (post_of_atom ~feed:c) f.Syndic.Atom.entries in 336 - Log.debug (fun m -> m "Extracted %d posts from Atom feed '%s'" 337 - (List.length posts) c.Feed.name); 338 - posts 339 - | Feed.Rss2 ch -> 340 - let posts = List.map (post_of_rss2 ~feed:c) ch.Syndic.Rss2.items in 341 - Log.debug (fun m -> m "Extracted %d posts from RSS2 feed '%s'" 342 - (List.length posts) c.Feed.name); 343 - posts 344 - | Feed.Json jsonfeed -> 345 - let items = Jsonfeed.items jsonfeed in 346 - let posts = List.map (post_of_jsonfeed_item ~feed:c) items in 347 - Log.debug (fun m -> m "Extracted %d posts from JSONFeed '%s'" 348 - (List.length posts) c.Feed.name); 349 - posts 350 - 351 - let mk_entry post = 352 - let content = Syndic.Atom.Html (None, Soup.to_string post.content) in 353 - let contributors = 354 - [ Syndic.Atom.author ~uri:(Uri.of_string post.feed.url) post.feed.name ] 355 - in 356 - let links = 357 - match post.link with 358 - | Some l -> [ Syndic.Atom.link ~rel:Syndic.Atom.Alternate l ] 359 - | None -> [] 360 - in 361 - (* TODO: include source *) 362 - let id = 363 - match post.link with 364 - | Some l -> l 365 - | None -> Uri.of_string (Digest.to_hex (Digest.string post.title)) 366 - in 367 - let authors = (Syndic.Atom.author ~email:post.email post.author, []) in 368 - let title : Syndic.Atom.text_construct = Syndic.Atom.Text post.title in 369 - let updated = 370 - match post.date with 371 - (* Atom entry requires a date but RSS2 does not. So if a date 372 - * is not available, just capture the current date. *) 373 - | None -> Ptime.of_float_s (Unix.gettimeofday ()) |> Option.get 374 - | Some d -> d 375 - in 376 - Syndic.Atom.entry ~content ~contributors ~links ~id ~authors ~title ~updated 377 - () 378 - 379 - let mk_entries posts = List.map mk_entry posts 380 - 381 - let mk_jsonfeed_item post = 382 - (* Convert HTML content back to string *) 383 - let html = Soup.to_string post.content in 384 - let content = `Html html in 385 - 386 - (* Create author *) 387 - let authors = 388 - if post.author <> "" then 389 - let author = Jsonfeed.Author.create ~name:post.author () in 390 - Some [author] 391 - else 392 - None 393 - in 394 - 395 - (* Create item *) 396 - Jsonfeed.Item.create 397 - ~id:post.id 398 - ~content 399 - ?url:(Option.map Uri.to_string post.link) 400 - ~title:post.title 401 - ?summary:post.summary 402 - ?date_published:post.date 403 - ?authors 404 - ~tags:post.tags 405 - () 406 - 407 - let mk_jsonfeed_items posts = List.map mk_jsonfeed_item posts 408 - 409 - let get_posts ?n ?(ofs = 0) planet_feeds = 410 - Log.info (fun m -> m "Processing %d feeds for posts" (List.length planet_feeds)); 411 - 412 - let posts = List.concat @@ List.map posts_of_feed planet_feeds in 413 - Log.debug (fun m -> m "Total posts collected: %d" (List.length posts)); 414 - 415 - let posts = List.sort post_compare posts in 416 - Log.debug (fun m -> m "Posts sorted by date (most recent first)"); 417 - 418 - let posts = remove ofs posts in 419 - let result = 420 - match n with 421 - | None -> 422 - Log.debug (fun m -> m "Returning all %d posts (offset=%d)" 423 - (List.length posts) ofs); 424 - posts 425 - | Some n -> 426 - let limited = take n posts in 427 - Log.debug (fun m -> m "Returning %d posts (requested=%d, offset=%d)" 428 - (List.length limited) n ofs); 429 - limited 430 - in 431 - result 432 - 433 - (* Fetch the link response and cache it. *) 434 - (* TODO: This requires environment for HTTP access 435 - let fetch_link env t = 436 - match (t.link, t.link_response) with 437 - | None, _ -> None 438 - | Some _, Some (Ok x) -> Some x 439 - | Some _, Some (Error _) -> None 440 - | Some link, None -> ( 441 - try 442 - let response = Http.get env (Uri.to_string link) in 443 - t.link_response <- Some (Ok response); 444 - Some response 445 - with _exn -> 446 - t.link_response <- Some (Error ""); 447 - None) 448 - *) 449 - let fetch_link _ = None
+1683 -67
stack/river/lib/river.ml
··· 15 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 16 *) 17 17 18 + (** River RSS/Atom/JSONFeed aggregator library *) 19 + 18 20 let src = Logs.Src.create "river" ~doc:"River RSS/Atom aggregator" 19 21 module Log = (val Logs.src_log src : Logs.LOG) 20 22 21 - (* Keep Client module internal *) 22 - module Internal_client = Client 23 + (** {1 Internal Utilities} *) 24 + 25 + module Text_extract = struct 26 + open Syndic 27 + 28 + (* Remove all tags *) 29 + let rec syndic_to_buffer b = function 30 + | XML.Node (_, _, subs) -> List.iter (syndic_to_buffer b) subs 31 + | XML.Data (_, d) -> Buffer.add_string b d 32 + 33 + let syndic_to_string x = 34 + let b = Buffer.create 1024 in 35 + List.iter (syndic_to_buffer b) x; 36 + Buffer.contents b 37 + 38 + let string_of_text_construct : Atom.text_construct -> string = function 39 + | Atom.Text s | Atom.Html (_, s) -> s 40 + | Atom.Xhtml (_, x) -> syndic_to_string x 41 + end 42 + 43 + module Html_meta = struct 44 + [@@@warning "-32"] (* Suppress unused value warnings for internal utilities *) 45 + 46 + (** This module determines an image to be used as preview of a website. 47 + 48 + It does this by following the same logic Google+ and other websites use, and 49 + described in this article: 50 + https://www.raymondcamden.com/2011/07/26/How-are-Facebook-and-Google-creating-link-previews *) 51 + 52 + let og_image html = 53 + let open Soup in 54 + let soup = parse html in 55 + try soup $ "meta[property=og:image]" |> R.attribute "content" |> Option.some 56 + with Failure _ -> None 57 + 58 + let image_src html = 59 + let open Soup in 60 + let soup = parse html in 61 + try soup $ "link[rel=\"image_src\"]" |> R.attribute "href" |> Option.some 62 + with Failure _ -> None 63 + 64 + let twitter_image html = 65 + let open Soup in 66 + let soup = parse html in 67 + try 68 + soup $ "meta[name=\"twitter:image\"]" |> R.attribute "content" 69 + |> Option.some 70 + with Failure _ -> None 71 + 72 + let og_description html = 73 + let open Soup in 74 + let soup = parse html in 75 + try 76 + soup $ "meta[property=og:description]" |> R.attribute "content" 77 + |> Option.some 78 + with Failure _ -> None 79 + 80 + let description html = 81 + let open Soup in 82 + let soup = parse html in 83 + try 84 + soup $ "meta[property=description]" |> R.attribute "content" |> Option.some 85 + with Failure _ -> None 86 + 87 + let preview_image html = 88 + let preview_image = 89 + match og_image html with 90 + | None -> ( 91 + match image_src html with 92 + | None -> twitter_image html 93 + | Some x -> Some x) 94 + | Some x -> Some x 95 + in 96 + match Option.map String.trim preview_image with 97 + | Some "" -> None 98 + | Some x -> Some x 99 + | None -> None 100 + 101 + let description html = 102 + let preview_image = 103 + match og_description html with None -> description html | Some x -> Some x 104 + in 105 + match Option.map String.trim preview_image with 106 + | Some "" -> None 107 + | Some x -> Some x 108 + | None -> None 109 + end 110 + 111 + module Html_markdown = struct 112 + [@@@warning "-32"] (* Suppress unused value warnings for internal utilities *) 113 + 114 + (** HTML to Markdown converter using Lambda Soup *) 115 + 116 + (** Extract all links from HTML content *) 117 + let extract_links html_str = 118 + try 119 + let soup = Soup.parse html_str in 120 + let links = Soup.select "a[href]" soup in 121 + Soup.fold (fun acc link -> 122 + match Soup.attribute "href" link with 123 + | Some href -> 124 + let text = Soup.texts link |> String.concat "" |> String.trim in 125 + (href, text) :: acc 126 + | None -> acc 127 + ) [] links 128 + |> List.rev 129 + with _ -> [] 130 + 131 + (** Check if string contains any whitespace *) 132 + let has_whitespace s = 133 + try 134 + let _ = Str.search_forward (Str.regexp "[ \t\n\r]") s 0 in 135 + true 136 + with Not_found -> false 137 + 138 + (** Clean up excessive newlines and normalize spacing *) 139 + let cleanup_markdown s = 140 + (* Normalize line endings *) 141 + let s = Str.global_replace (Str.regexp "\r\n") "\n" s in 142 + 143 + (* Remove trailing whitespace from each line *) 144 + let lines = String.split_on_char '\n' s in 145 + let lines = List.map (fun line -> 146 + (* Trim trailing spaces but preserve leading spaces for indentation *) 147 + let len = String.length line in 148 + let rec find_last_non_space i = 149 + if i < 0 then -1 150 + else if line.[i] = ' ' || line.[i] = '\t' then find_last_non_space (i - 1) 151 + else i 152 + in 153 + let last = find_last_non_space (len - 1) in 154 + if last < 0 then "" 155 + else String.sub line 0 (last + 1) 156 + ) lines in 157 + 158 + (* Join back and collapse excessive blank lines *) 159 + let s = String.concat "\n" lines in 160 + 161 + (* Replace 3+ consecutive newlines with just 2 *) 162 + let s = Str.global_replace (Str.regexp "\n\n\n+") "\n\n" s in 163 + 164 + (* Trim leading and trailing whitespace *) 165 + String.trim s 166 + 167 + (** Convert HTML to Markdown using state-based whitespace handling *) 168 + let html_to_markdown html_str = 169 + try 170 + let soup = Soup.parse html_str in 171 + let buffer = Buffer.create 256 in 172 + 173 + (* State: track if we need to insert a space before next text *) 174 + let need_space = ref false in 175 + 176 + (* Get last character in buffer, if any *) 177 + let last_char () = 178 + let len = Buffer.length buffer in 179 + if len = 0 then None 180 + else Some (Buffer.nth buffer (len - 1)) 181 + in 182 + 183 + (* Add text with proper spacing *) 184 + let add_text text = 185 + let trimmed = String.trim text in 186 + if trimmed <> "" then begin 187 + (* Check if text starts with punctuation that shouldn't have space before it *) 188 + let starts_with_punctuation = 189 + String.length trimmed > 0 && 190 + (match trimmed.[0] with 191 + | ',' | '.' | ';' | ':' | '!' | '?' | ')' | ']' | '}' -> true 192 + | _ -> false) 193 + in 194 + 195 + (* Add space if needed, unless we're before punctuation *) 196 + if !need_space && not starts_with_punctuation then begin 197 + match last_char () with 198 + | Some (' ' | '\n') -> () 199 + | _ -> Buffer.add_char buffer ' ' 200 + end; 201 + Buffer.add_string buffer trimmed; 202 + need_space := false 203 + end 204 + in 205 + 206 + (* Mark that we need space before next text (for inline elements) *) 207 + let mark_space_needed () = 208 + need_space := has_whitespace (Buffer.contents buffer) || Buffer.length buffer > 0 209 + in 210 + 211 + (* Process header with ID/anchor handling *) 212 + let process_header level elem = 213 + need_space := false; 214 + 215 + (* Check if header contains a link with an ID fragment *) 216 + let link_opt = Soup.select_one "a[href]" elem in 217 + let anchor_id = match link_opt with 218 + | Some link -> 219 + (match Soup.attribute "href" link with 220 + | Some href -> 221 + (* Extract fragment from URL *) 222 + let uri = Uri.of_string href in 223 + Uri.fragment uri 224 + | None -> None) 225 + | None -> None 226 + in 227 + 228 + (* Add anchor if we found an ID *) 229 + (match anchor_id with 230 + | Some id when id <> "" -> 231 + Buffer.add_string buffer (Printf.sprintf "\n<a name=\"%s\"></a>\n" id) 232 + | _ -> ()); 233 + 234 + (* Add the header marker *) 235 + let marker = String.make level '#' in 236 + Buffer.add_string buffer ("\n" ^ marker ^ " "); 237 + 238 + (* Get text content, excluding link tags *) 239 + let text = Soup.texts elem |> String.concat " " |> String.trim in 240 + Buffer.add_string buffer text; 241 + 242 + Buffer.add_string buffer "\n\n"; 243 + need_space := false 244 + in 245 + 246 + let rec process_node node = 247 + match Soup.element node with 248 + | Some elem -> 249 + let tag = Soup.name elem in 250 + (match tag with 251 + (* Block elements - reset space tracking *) 252 + | "h1" -> process_header 1 elem 253 + | "h2" -> process_header 2 elem 254 + | "h3" -> process_header 3 elem 255 + | "h4" -> process_header 4 elem 256 + | "h5" -> process_header 5 elem 257 + | "h6" -> process_header 6 elem 258 + | "p" -> 259 + need_space := false; 260 + Soup.children elem |> Soup.iter process_node; 261 + Buffer.add_string buffer "\n\n"; 262 + need_space := false 263 + | "br" -> 264 + Buffer.add_string buffer "\n"; 265 + need_space := false 266 + (* Inline elements - preserve space tracking *) 267 + | "strong" | "b" -> 268 + (* Add space before if needed *) 269 + if !need_space then begin 270 + match last_char () with 271 + | Some (' ' | '\n') -> () 272 + | _ -> Buffer.add_char buffer ' ' 273 + end; 274 + Buffer.add_string buffer "**"; 275 + need_space := false; 276 + Soup.children elem |> Soup.iter process_node; 277 + Buffer.add_string buffer "**"; 278 + mark_space_needed () 279 + | "em" | "i" -> 280 + (* Add space before if needed *) 281 + if !need_space then begin 282 + match last_char () with 283 + | Some (' ' | '\n') -> () 284 + | _ -> Buffer.add_char buffer ' ' 285 + end; 286 + Buffer.add_string buffer "*"; 287 + need_space := false; 288 + Soup.children elem |> Soup.iter process_node; 289 + Buffer.add_string buffer "*"; 290 + mark_space_needed () 291 + | "code" -> 292 + (* Add space before if needed *) 293 + if !need_space then begin 294 + match last_char () with 295 + | Some (' ' | '\n') -> () 296 + | _ -> Buffer.add_char buffer ' ' 297 + end; 298 + Buffer.add_string buffer "`"; 299 + need_space := false; 300 + Soup.children elem |> Soup.iter process_node; 301 + Buffer.add_string buffer "`"; 302 + mark_space_needed () 303 + | "pre" -> 304 + need_space := false; 305 + Buffer.add_string buffer "\n```\n"; 306 + Soup.children elem |> Soup.iter process_node; 307 + Buffer.add_string buffer "\n```\n\n"; 308 + need_space := false 309 + | "a" -> 310 + let text = Soup.texts elem |> String.concat " " |> String.trim in 311 + let href = Soup.attribute "href" elem in 312 + (match href with 313 + | Some href -> 314 + (* Add space before link if needed *) 315 + if !need_space then begin 316 + match last_char () with 317 + | Some (' ' | '\n') -> () 318 + | _ -> Buffer.add_char buffer ' ' 319 + end; 320 + need_space := false; 321 + 322 + (* Add the link markdown *) 323 + if text = "" then 324 + Buffer.add_string buffer (Printf.sprintf "<%s>" href) 325 + else 326 + Buffer.add_string buffer (Printf.sprintf "[%s](%s)" text href); 327 + 328 + (* Mark that space may be needed after link *) 329 + mark_space_needed () 330 + | None -> 331 + add_text text) 332 + | "ul" | "ol" -> 333 + need_space := false; 334 + Buffer.add_string buffer "\n"; 335 + let is_ordered = tag = "ol" in 336 + let items = Soup.children elem |> Soup.to_list in 337 + List.iteri (fun i item -> 338 + match Soup.element item with 339 + | Some li when Soup.name li = "li" -> 340 + need_space := false; 341 + if is_ordered then 342 + Buffer.add_string buffer (Printf.sprintf "%d. " (i + 1)) 343 + else 344 + Buffer.add_string buffer "- "; 345 + Soup.children li |> Soup.iter process_node; 346 + Buffer.add_string buffer "\n" 347 + | _ -> () 348 + ) items; 349 + Buffer.add_string buffer "\n"; 350 + need_space := false 351 + | "blockquote" -> 352 + need_space := false; 353 + Buffer.add_string buffer "\n> "; 354 + Soup.children elem |> Soup.iter process_node; 355 + Buffer.add_string buffer "\n\n"; 356 + need_space := false 357 + | "img" -> 358 + (* Add space before if needed *) 359 + if !need_space then begin 360 + match last_char () with 361 + | Some (' ' | '\n') -> () 362 + | _ -> Buffer.add_char buffer ' ' 363 + end; 364 + let alt = Soup.attribute "alt" elem |> Option.value ~default:"" in 365 + let src = Soup.attribute "src" elem |> Option.value ~default:"" in 366 + Buffer.add_string buffer (Printf.sprintf "![%s](%s)" alt src); 367 + need_space := false; 368 + mark_space_needed () 369 + | "hr" -> 370 + need_space := false; 371 + Buffer.add_string buffer "\n---\n\n"; 372 + need_space := false 373 + (* Strip these tags but keep content *) 374 + | "div" | "span" | "article" | "section" | "header" | "footer" 375 + | "main" | "nav" | "aside" | "figure" | "figcaption" | "details" | "summary" -> 376 + Soup.children elem |> Soup.iter process_node 377 + (* Ignore script, style, etc *) 378 + | "script" | "style" | "noscript" -> () 379 + (* Default: just process children *) 380 + | _ -> 381 + Soup.children elem |> Soup.iter process_node) 382 + | None -> 383 + (* Text node - handle whitespace properly *) 384 + match Soup.leaf_text node with 385 + | Some text -> 386 + (* If text is only whitespace, mark that we need space *) 387 + let trimmed = String.trim text in 388 + if trimmed = "" then begin 389 + if has_whitespace text then 390 + need_space := true 391 + end else begin 392 + (* Text has content - check if it had leading/trailing whitespace *) 393 + let had_leading_ws = has_whitespace text && 394 + (String.length text > 0 && 395 + (text.[0] = ' ' || text.[0] = '\t' || text.[0] = '\n' || text.[0] = '\r')) in 396 + 397 + (* If had leading whitespace, mark we need space *) 398 + if had_leading_ws then need_space := true; 399 + 400 + (* Add the text content *) 401 + add_text trimmed; 402 + 403 + (* If had trailing whitespace, mark we need space for next *) 404 + let had_trailing_ws = has_whitespace text && 405 + (String.length text > 0 && 406 + let last = text.[String.length text - 1] in 407 + last = ' ' || last = '\t' || last = '\n' || last = '\r') in 408 + if had_trailing_ws then need_space := true 409 + end 410 + | None -> () 411 + in 412 + 413 + Soup.children soup |> Soup.iter process_node; 414 + 415 + (* Clean up the result *) 416 + let result = Buffer.contents buffer in 417 + cleanup_markdown result 418 + with _ -> html_str 419 + 420 + (** Convert HTML content to clean Markdown *) 421 + let to_markdown html_str = 422 + html_to_markdown html_str 423 + end 424 + 425 + (** {1 Feed Sources} *) 426 + 427 + module Source = struct 428 + type t = { 429 + name : string; 430 + url : string; 431 + } 432 + 433 + let make ~name ~url = { name; url } 434 + 435 + let name t = t.name 436 + let url t = t.url 437 + 438 + let jsont = 439 + let make name url = { name; url } in 440 + Jsont.Object.map ~kind:"Source" make 441 + |> Jsont.Object.mem "name" Jsont.string ~enc:(fun s -> s.name) 442 + |> Jsont.Object.mem "url" Jsont.string ~enc:(fun s -> s.url) 443 + |> Jsont.Object.finish 444 + end 445 + 446 + (** {1 HTTP Session Management} *) 447 + 448 + module Session = struct 449 + type t = { 450 + session : (float Eio.Time.clock_ty Eio.Resource.t, 451 + [`Generic | `Unix] Eio.Net.ty Eio.Resource.t) Requests.t; 452 + } 453 + 454 + let init ~sw env = 455 + Log.info (fun m -> m "Initializing River session"); 456 + let session = Requests.create ~sw 457 + ~default_headers:(Requests.Headers.of_list [ 458 + ("User-Agent", "OCaml-River/1.0"); 459 + ]) 460 + ~follow_redirects:true 461 + ~max_redirects:5 462 + ~verify_tls:true 463 + env 464 + in 465 + { session } 466 + 467 + let with_session env f = 468 + Log.info (fun m -> m "Creating River session"); 469 + Eio.Switch.run @@ fun sw -> 470 + let client = init ~sw env in 471 + f client 472 + 473 + let get_requests_session t = t.session 474 + end 475 + 476 + (** {1 Feeds and Posts} *) 477 + 478 + module Feed = struct 479 + type feed_content = 480 + | Atom of Syndic.Atom.feed 481 + | Rss2 of Syndic.Rss2.channel 482 + | Json of Jsonfeed.t 483 + 484 + type t = { 485 + source : Source.t; 486 + title : string; 487 + content : feed_content; 488 + } 489 + 490 + let string_of_feed = function 491 + | Atom _ -> "Atom" 492 + | Rss2 _ -> "Rss2" 493 + | Json _ -> "JSONFeed" 494 + 495 + let classify_feed ~xmlbase (body : string) = 496 + Log.debug (fun m -> m "Attempting to parse feed (%d bytes)" (String.length body)); 497 + 498 + (* Quick check - does it look like JSON? *) 499 + let looks_like_json = 500 + String.length body > 0 && 501 + let first_char = String.get body 0 in 502 + first_char = '{' || first_char = '[' 503 + in 504 + 505 + if looks_like_json then ( 506 + (* Try JSONFeed first *) 507 + Log.debug (fun m -> m "Body looks like JSON, trying JSONFeed parser"); 508 + match Jsonfeed.of_string body with 509 + | Ok jsonfeed -> 510 + Log.debug (fun m -> m "Successfully parsed as JSONFeed"); 511 + Json jsonfeed 512 + | Error err -> 513 + Log.debug (fun m -> m "Not a JSONFeed: %s" (Jsont.Error.to_string err)); 514 + (* Fall through to XML parsing *) 515 + failwith "Not a valid JSONFeed" 516 + ) else ( 517 + (* Try XML formats *) 518 + try 519 + let feed = Atom (Syndic.Atom.parse ~xmlbase (Xmlm.make_input (`String (0, body)))) in 520 + Log.debug (fun m -> m "Successfully parsed as Atom feed"); 521 + feed 522 + with 523 + | Syndic.Atom.Error.Error (pos, msg) -> ( 524 + Log.debug (fun m -> m "Not an Atom feed: %s at position (%d, %d)" 525 + msg (fst pos) (snd pos)); 526 + try 527 + let feed = Rss2 (Syndic.Rss2.parse ~xmlbase (Xmlm.make_input (`String (0, body)))) in 528 + Log.debug (fun m -> m "Successfully parsed as RSS2 feed"); 529 + feed 530 + with Syndic.Rss2.Error.Error (pos, msg) -> 531 + Log.err (fun m -> m "Failed to parse as RSS2: %s at position (%d, %d)" 532 + msg (fst pos) (snd pos)); 533 + failwith "Neither Atom nor RSS2 feed") 534 + | Not_found as e -> 535 + Log.err (fun m -> m "Not_found exception during Atom feed parsing"); 536 + Log.err (fun m -> m "Backtrace:\n%s" (Printexc.get_backtrace ())); 537 + raise e 538 + | e -> 539 + Log.err (fun m -> m "Unexpected exception during feed parsing: %s" 540 + (Printexc.to_string e)); 541 + Log.err (fun m -> m "Backtrace:\n%s" (Printexc.get_backtrace ())); 542 + raise e 543 + ) 544 + 545 + let fetch session source = 546 + Log.info (fun m -> m "Fetching feed: %s" (Source.name source)); 547 + 548 + let xmlbase = Uri.of_string (Source.url source) in 549 + 550 + (* Use Requests_json_api.get_result for clean Result-based error handling *) 551 + let requests_session = Session.get_requests_session session in 552 + let response = 553 + match Requests_json_api.get_result requests_session (Source.url source) with 554 + | Ok body -> 555 + Log.info (fun m -> m "Successfully fetched %s (%d bytes)" 556 + (Source.url source) (String.length body)); 557 + body 558 + | Error (status, msg) -> 559 + Log.err (fun m -> m "Failed to fetch feed '%s': HTTP %d - %s" 560 + (Source.name source) status msg); 561 + failwith (Printf.sprintf "HTTP %d: %s" status msg) 562 + in 563 + 564 + let content = classify_feed ~xmlbase response in 565 + let title = 566 + match content with 567 + | Atom atom -> Text_extract.string_of_text_construct atom.Syndic.Atom.title 568 + | Rss2 ch -> ch.Syndic.Rss2.title 569 + | Json jsonfeed -> Jsonfeed.title jsonfeed 570 + in 571 + 572 + Log.info (fun m -> m "Successfully fetched %s feed '%s' (title: '%s')" 573 + (string_of_feed content) (Source.name source) title); 574 + 575 + { source; title; content } 576 + 577 + let source t = t.source 578 + end 579 + 580 + (** {1 Posts} *) 23 581 24 - (* Abstract session type *) 25 - type session = Client.t 582 + module Post = struct 583 + type t = { 584 + id : string; 585 + title : string; 586 + link : Uri.t option; 587 + date : Syndic.Date.t option; 588 + feed : Feed.t; 589 + author : string; 590 + email : string; 591 + content : Soup.soup Soup.node; 592 + mutable link_response : (string, string) result option; 593 + tags : string list; 594 + summary : string option; 595 + } 26 596 27 - type source = Feed.source = { name : string; url : string } 28 - type feed = Feed.t 29 - type post = Post.t 597 + (** Generate a stable, unique ID from available data *) 598 + let generate_id ?guid ?link ?title ?date ~feed_url () = 599 + match guid with 600 + | Some id when id <> "" -> 601 + (* Use explicit ID/GUID if available *) 602 + id 603 + | _ -> 604 + match link with 605 + | Some uri when Uri.to_string uri <> "" -> 606 + (* Use permalink as ID (stable and unique) *) 607 + Uri.to_string uri 608 + | _ -> 609 + (* Fallback: hash of feed_url + title + date *) 610 + let title_str = Option.value title ~default:"" in 611 + let date_str = 612 + match date with 613 + | Some d -> Ptime.to_rfc3339 d 614 + | None -> "" 615 + in 616 + let composite = Printf.sprintf "%s|%s|%s" feed_url title_str date_str in 617 + (* Use SHA256 for stable hashing *) 618 + Digest.string composite |> Digest.to_hex 30 619 31 - (* Session management *) 32 - let init ~sw env = 33 - Log.info (fun m -> m "Initializing River session"); 34 - Internal_client.create ~sw env 620 + let resolve_links_attr ~xmlbase attr el = 621 + Soup.R.attribute attr el 622 + |> Uri.of_string 623 + |> Syndic.XML.resolve ~xmlbase 624 + |> Uri.to_string 625 + |> fun value -> Soup.set_attribute attr value el 35 626 36 - let with_session env f = 37 - Log.info (fun m -> m "Creating River session"); 38 - Internal_client.with_client env f 627 + (* Things that posts should not contain *) 628 + let undesired_tags = [ "style"; "script" ] 629 + let undesired_attr = [ "id" ] 39 630 40 - (* Feed operations *) 41 - let fetch session source = 42 - Log.info (fun m -> m "Fetching feed: %s" source.name); 43 - Feed.fetch session source 631 + let html_of_text ?xmlbase s = 632 + let soup = Soup.parse s in 633 + let ($$) = Soup.($$) in 634 + soup $$ "a[href]" |> Soup.iter (resolve_links_attr ~xmlbase "href"); 635 + soup $$ "img[src]" |> Soup.iter (resolve_links_attr ~xmlbase "src"); 636 + undesired_tags |> List.iter (fun tag -> soup $$ tag |> Soup.iter Soup.delete); 637 + soup $$ "*" |> Soup.iter (fun el -> 638 + undesired_attr |> List.iter (fun attr -> Soup.delete_attribute attr el)); 639 + soup 44 640 45 - let name feed = feed.Feed.name 46 - let url feed = feed.Feed.url 641 + (* Do not trust sites using XML for HTML content. Convert to string and parse 642 + back. (Does not always fix bad HTML unfortunately.) *) 643 + let html_of_syndic = 644 + let ns_prefix _ = Some "" in 645 + fun ?xmlbase h -> 646 + html_of_text ?xmlbase 647 + (String.concat "" (List.map (Syndic.XML.to_string ~ns_prefix) h)) 47 648 48 - let posts feeds = 49 - Log.info (fun m -> m "Aggregating posts from %d feed(s)" (List.length feeds)); 50 - let result = Post.get_posts feeds in 51 - Log.info (fun m -> m "Aggregated %d posts total" (List.length result)); 52 - result 649 + let string_of_option = function None -> "" | Some s -> s 53 650 54 - let title post = post.Post.title 55 - let link post = post.Post.link 56 - let date post = post.Post.date 57 - let feed post = post.Post.feed 58 - let author post = post.Post.author 59 - let email post = post.Post.email 60 - let content post = Soup.to_string post.Post.content 61 - let id post = post.Post.id 62 - let tags post = post.Post.tags 63 - let summary post = post.Post.summary 651 + let post_compare p1 p2 = 652 + (* Most recent posts first. Posts with no date are always last *) 653 + match (p1.date, p2.date) with 654 + | Some d1, Some d2 -> Syndic.Date.compare d2 d1 655 + | None, Some _ -> 1 656 + | Some _, None -> -1 657 + | None, None -> 1 64 658 65 - let meta_description _post = 66 - (* TODO: This requires environment for HTTP access *) 67 - Log.debug (fun m -> m "meta_description not implemented (requires environment)"); 68 - None 659 + let rec remove n l = 660 + if n <= 0 then l else match l with [] -> [] | _ :: tl -> remove (n - 1) tl 69 661 70 - let seo_image _post = 71 - (* TODO: This requires environment for HTTP access *) 72 - Log.debug (fun m -> m "seo_image not implemented (requires environment)"); 73 - None 662 + let rec take n = function 663 + | [] -> [] 664 + | e :: tl -> if n > 0 then e :: take (n - 1) tl else [] 74 665 75 - let create_atom_entries posts = 76 - Log.info (fun m -> m "Creating Atom entries for %d posts" (List.length posts)); 77 - Post.mk_entries posts 666 + let post_of_atom ~(feed : Feed.t) (e : Syndic.Atom.entry) = 667 + Log.debug (fun m -> m "Processing Atom entry: %s" 668 + (Text_extract.string_of_text_construct e.title)); 78 669 79 - (* JSONFeed support *) 80 - let create_jsonfeed_items posts = 81 - Log.info (fun m -> m "Creating JSONFeed items for %d posts" (List.length posts)); 82 - Post.mk_jsonfeed_items posts 670 + let link = 671 + try 672 + Some 673 + (List.find (fun l -> l.Syndic.Atom.rel = Syndic.Atom.Alternate) e.links) 674 + .href 675 + with Not_found -> ( 676 + Log.debug (fun m -> m "No alternate link found, trying fallback"); 677 + match e.links with 678 + | l :: _ -> Some l.href 679 + | [] -> ( 680 + match Uri.scheme e.id with 681 + | Some "http" -> Some e.id 682 + | Some "https" -> Some e.id 683 + | _ -> None)) 684 + in 685 + let date = 686 + match e.published with Some _ -> e.published | None -> Some e.updated 687 + in 688 + let content = 689 + match e.content with 690 + | Some (Text s) -> html_of_text s 691 + | Some (Html (xmlbase, s)) -> html_of_text ?xmlbase s 692 + | Some (Xhtml (xmlbase, h)) -> html_of_syndic ?xmlbase h 693 + | Some (Mime _) | Some (Src _) | None -> ( 694 + match e.summary with 695 + | Some (Text s) -> html_of_text s 696 + | Some (Html (xmlbase, s)) -> html_of_text ?xmlbase s 697 + | Some (Xhtml (xmlbase, h)) -> html_of_syndic ?xmlbase h 698 + | None -> Soup.parse "") 699 + in 700 + let is_valid_author_name name = 701 + (* Filter out empty strings and placeholder values like "Unknown" *) 702 + let trimmed = String.trim name in 703 + trimmed <> "" && trimmed <> "Unknown" 704 + in 705 + let author_name = 706 + (* Fallback chain for author: 707 + 1. Entry author (if present, not empty, and not "Unknown") 708 + 2. Feed-level author (from Atom feed metadata) 709 + 3. Feed title (from Atom feed metadata) 710 + 4. Source name (manually entered feed name) *) 711 + try 712 + let author, _ = e.authors in 713 + let trimmed = String.trim author.name in 714 + if is_valid_author_name author.name then trimmed 715 + else raise Not_found (* Try feed-level author *) 716 + with Not_found -> ( 717 + match feed.content with 718 + | Feed.Atom atom_feed -> ( 719 + (* Try feed-level authors *) 720 + match atom_feed.Syndic.Atom.authors with 721 + | author :: _ when is_valid_author_name author.name -> 722 + String.trim author.name 723 + | _ -> 724 + (* Use feed title *) 725 + Text_extract.string_of_text_construct atom_feed.Syndic.Atom.title) 726 + | Feed.Rss2 _ | Feed.Json _ -> 727 + (* For RSS2 and JSONFeed, use the source name *) 728 + Source.name feed.source) 729 + in 730 + (* Extract tags from Atom categories *) 731 + let tags = 732 + List.map (fun cat -> cat.Syndic.Atom.term) e.categories 733 + in 734 + (* Extract summary - convert from text_construct to string *) 735 + let summary = 736 + match e.summary with 737 + | Some s -> Some (Text_extract.string_of_text_construct s) 738 + | None -> None 739 + in 740 + (* Generate unique ID *) 741 + let guid = Uri.to_string e.id in 742 + let title_str = Text_extract.string_of_text_construct e.title in 743 + let id = 744 + generate_id ~guid ?link ~title:title_str ?date 745 + ~feed_url:(Source.url feed.source) () 746 + in 747 + { 748 + id; 749 + title = title_str; 750 + link; 751 + date; 752 + feed; 753 + author = author_name; 754 + email = ""; 755 + content; 756 + link_response = None; 757 + tags; 758 + summary; 759 + } 83 760 84 - let create_jsonfeed ~title ?home_page_url ?feed_url ?description ?icon ?favicon posts = 85 - Log.info (fun m -> m "Creating JSONFeed with %d posts" (List.length posts)); 86 - let items = create_jsonfeed_items posts in 87 - Jsonfeed.create ~title ?home_page_url ?feed_url ?description ?icon ?favicon ~items () 761 + let post_of_rss2 ~(feed : Feed.t) it = 762 + let title, content = 763 + match it.Syndic.Rss2.story with 764 + | All (t, xmlbase, d) -> ( 765 + ( t, 766 + match it.content with 767 + | _, "" -> html_of_text ?xmlbase d 768 + | xmlbase, c -> html_of_text ?xmlbase c )) 769 + | Title t -> 770 + let xmlbase, c = it.content in 771 + (t, html_of_text ?xmlbase c) 772 + | Description (xmlbase, d) -> ( 773 + ( "", 774 + match it.content with 775 + | _, "" -> html_of_text ?xmlbase d 776 + | xmlbase, c -> html_of_text ?xmlbase c )) 777 + in 778 + (* Note: it.link is of type Uri.t option in Syndic *) 779 + let link = 780 + match (it.guid, it.link) with 781 + | Some u, _ when u.permalink -> Some u.data 782 + | _, Some _ -> it.link 783 + | Some u, _ -> 784 + (* Sometimes the guid is indicated with isPermaLink="false" but is 785 + nonetheless the only URL we get (e.g. ocamlpro). *) 786 + Some u.data 787 + | None, None -> None 788 + in 789 + (* Extract GUID string for ID generation *) 790 + let guid_str = 791 + match it.guid with 792 + | Some u -> Some (Uri.to_string u.data) 793 + | None -> None 794 + in 795 + (* RSS2 doesn't have a categories field exposed, use empty list *) 796 + let tags = [] in 797 + (* RSS2 doesn't have a separate summary field, so leave it empty *) 798 + let summary = None in 799 + (* Generate unique ID *) 800 + let id = 801 + generate_id ?guid:guid_str ?link ~title ?date:it.pubDate 802 + ~feed_url:(Source.url feed.source) () 803 + in 804 + { 805 + id; 806 + title; 807 + link; 808 + feed; 809 + author = Source.name feed.source; 810 + email = string_of_option it.author; 811 + content; 812 + date = it.pubDate; 813 + link_response = None; 814 + tags; 815 + summary; 816 + } 88 817 89 - let jsonfeed_to_string ?(minify = false) jsonfeed = 90 - match Jsonfeed.to_string ~minify jsonfeed with 91 - | Ok s -> Ok s 92 - | Error err -> Error (Jsont.Error.to_string err) 818 + let post_of_jsonfeed_item ~(feed : Feed.t) (item : Jsonfeed.Item.t) = 819 + Log.debug (fun m -> m "Processing JSONFeed item: %s" 820 + (Option.value (Jsonfeed.Item.title item) ~default:"Untitled")); 93 821 94 - type feed_content = 95 - | Atom of Syndic.Atom.feed 96 - | Rss2 of Syndic.Rss2.channel 97 - | JSONFeed of Jsonfeed.t 822 + (* Extract content - prefer HTML, fall back to text *) 823 + let content = 824 + match Jsonfeed.Item.content item with 825 + | `Html html -> html_of_text html 826 + | `Text text -> html_of_text text 827 + | `Both (html, _text) -> html_of_text html 828 + in 98 829 99 - let feed_content feed = 100 - match feed.Feed.content with 101 - | Feed.Atom f -> Atom f 102 - | Feed.Rss2 ch -> Rss2 ch 103 - | Feed.Json jf -> JSONFeed jf 830 + (* Extract author - use first author if multiple *) 831 + let author_name, author_email = 832 + match Jsonfeed.Item.authors item with 833 + | Some (first :: _) -> 834 + let name = Jsonfeed.Author.name first |> Option.value ~default:"" in 835 + (* JSONFeed authors don't typically have email *) 836 + (name, "") 837 + | _ -> 838 + (* Fall back to feed-level authors or feed title *) 839 + (match feed.content with 840 + | Feed.Json jsonfeed -> 841 + (match Jsonfeed.authors jsonfeed with 842 + | Some (first :: _) -> 843 + let name = Jsonfeed.Author.name first |> Option.value ~default:feed.title in 844 + (name, "") 845 + | _ -> (feed.title, "")) 846 + | _ -> (feed.title, "")) 847 + in 848 + 849 + (* Link - use url field *) 850 + let link = 851 + Jsonfeed.Item.url item 852 + |> Option.map Uri.of_string 853 + in 854 + 855 + (* Date *) 856 + let date = Jsonfeed.Item.date_published item in 857 + 858 + (* Summary *) 859 + let summary = Jsonfeed.Item.summary item in 860 + 861 + (* Tags *) 862 + let tags = 863 + Jsonfeed.Item.tags item 864 + |> Option.value ~default:[] 865 + in 866 + 867 + (* Generate unique ID - JSONFeed items always have an id field (required) *) 868 + let guid = Jsonfeed.Item.id item in 869 + let title_str = Jsonfeed.Item.title item |> Option.value ~default:"Untitled" in 870 + let id = 871 + generate_id ~guid ?link ~title:title_str ?date 872 + ~feed_url:(Source.url feed.source) () 873 + in 874 + 875 + { 876 + id; 877 + title = title_str; 878 + link; 879 + date; 880 + feed; 881 + author = author_name; 882 + email = author_email; 883 + content; 884 + link_response = None; 885 + tags; 886 + summary; 887 + } 888 + 889 + let posts_of_feed c = 890 + match c.Feed.content with 891 + | Feed.Atom f -> 892 + let posts = List.map (post_of_atom ~feed:c) f.Syndic.Atom.entries in 893 + Log.debug (fun m -> m "Extracted %d posts from Atom feed '%s'" 894 + (List.length posts) (Source.name c.source)); 895 + posts 896 + | Feed.Rss2 ch -> 897 + let posts = List.map (post_of_rss2 ~feed:c) ch.Syndic.Rss2.items in 898 + Log.debug (fun m -> m "Extracted %d posts from RSS2 feed '%s'" 899 + (List.length posts) (Source.name c.source)); 900 + posts 901 + | Feed.Json jsonfeed -> 902 + let items = Jsonfeed.items jsonfeed in 903 + let posts = List.map (post_of_jsonfeed_item ~feed:c) items in 904 + Log.debug (fun m -> m "Extracted %d posts from JSONFeed '%s'" 905 + (List.length posts) (Source.name c.source)); 906 + posts 907 + 908 + let get_posts ?n ?(ofs = 0) planet_feeds = 909 + Log.info (fun m -> m "Processing %d feeds for posts" (List.length planet_feeds)); 910 + 911 + let posts = List.concat @@ List.map posts_of_feed planet_feeds in 912 + Log.debug (fun m -> m "Total posts collected: %d" (List.length posts)); 913 + 914 + let posts = List.sort post_compare posts in 915 + Log.debug (fun m -> m "Posts sorted by date (most recent first)"); 916 + 917 + let posts = remove ofs posts in 918 + let result = 919 + match n with 920 + | None -> 921 + Log.debug (fun m -> m "Returning all %d posts (offset=%d)" 922 + (List.length posts) ofs); 923 + posts 924 + | Some n -> 925 + let limited = take n posts in 926 + Log.debug (fun m -> m "Returning %d posts (requested=%d, offset=%d)" 927 + (List.length limited) n ofs); 928 + limited 929 + in 930 + result 931 + 932 + let of_feeds feeds = get_posts feeds 933 + 934 + let feed t = t.feed 935 + let title t = t.title 936 + let link t = t.link 937 + let date t = t.date 938 + let author t = t.author 939 + let email t = t.email 940 + let content t = Soup.to_string t.content 941 + let id t = t.id 942 + let tags t = t.tags 943 + let summary t = t.summary 944 + 945 + let meta_description _t = 946 + (* TODO: This requires environment for HTTP access *) 947 + Log.debug (fun m -> m "meta_description not implemented (requires environment)"); 948 + None 949 + 950 + let seo_image _t = 951 + (* TODO: This requires environment for HTTP access *) 952 + Log.debug (fun m -> m "seo_image not implemented (requires environment)"); 953 + None 954 + end 955 + 956 + (** {1 Format Conversion and Export} *) 957 + 958 + module Format = struct 959 + module Atom = struct 960 + let entry_of_post post = 961 + let content = Syndic.Atom.Html (None, Post.content post) in 962 + let contributors = 963 + [ Syndic.Atom.author ~uri:(Uri.of_string (Source.url (Feed.source (Post.feed post)))) 964 + (Source.name (Feed.source (Post.feed post))) ] 965 + in 966 + let links = 967 + match Post.link post with 968 + | Some l -> [ Syndic.Atom.link ~rel:Syndic.Atom.Alternate l ] 969 + | None -> [] 970 + in 971 + let id = 972 + match Post.link post with 973 + | Some l -> l 974 + | None -> Uri.of_string (Digest.to_hex (Digest.string (Post.title post))) 975 + in 976 + let authors = (Syndic.Atom.author ~email:(Post.email post) (Post.author post), []) in 977 + let title : Syndic.Atom.text_construct = Syndic.Atom.Text (Post.title post) in 978 + let updated = 979 + match Post.date post with 980 + (* Atom entry requires a date but RSS2 does not. So if a date 981 + * is not available, just capture the current date. *) 982 + | None -> Ptime.of_float_s (Unix.gettimeofday ()) |> Option.get 983 + | Some d -> d 984 + in 985 + Syndic.Atom.entry ~content ~contributors ~links ~id ~authors ~title ~updated 986 + () 987 + 988 + let entries_of_posts posts = List.map entry_of_post posts 989 + 990 + let feed_of_entries ~title ?id ?(authors = []) entries = 991 + let feed_id = match id with 992 + | Some i -> Uri.of_string i 993 + | None -> Uri.of_string "urn:river:merged" 994 + in 995 + let feed_authors = List.map (fun (name, email) -> 996 + match email with 997 + | Some e -> Syndic.Atom.author ~email:e name 998 + | None -> Syndic.Atom.author name 999 + ) authors in 1000 + { 1001 + Syndic.Atom.id = feed_id; 1002 + title = Syndic.Atom.Text title; 1003 + updated = Ptime.of_float_s (Unix.time ()) |> Option.get; 1004 + entries; 1005 + authors = feed_authors; 1006 + categories = []; 1007 + contributors = []; 1008 + generator = Some { 1009 + Syndic.Atom.version = Some "1.0"; 1010 + uri = None; 1011 + content = "River Feed Aggregator"; 1012 + }; 1013 + icon = None; 1014 + links = []; 1015 + logo = None; 1016 + rights = None; 1017 + subtitle = None; 1018 + } 1019 + 1020 + let to_string feed = 1021 + let output = Buffer.create 4096 in 1022 + Syndic.Atom.output feed (`Buffer output); 1023 + Buffer.contents output 1024 + end 1025 + 1026 + module Rss2 = struct 1027 + let of_feed feed = 1028 + match feed.Feed.content with 1029 + | Feed.Rss2 ch -> Some ch 1030 + | _ -> None 1031 + end 1032 + 1033 + module Jsonfeed = struct 1034 + let item_of_post post = 1035 + (* Convert HTML content back to string *) 1036 + let html = Post.content post in 1037 + let content = `Html html in 1038 + 1039 + (* Create author *) 1040 + let authors = 1041 + if Post.author post <> "" then 1042 + let author = Jsonfeed.Author.create ~name:(Post.author post) () in 1043 + Some [author] 1044 + else 1045 + None 1046 + in 1047 + 1048 + (* Create item *) 1049 + Jsonfeed.Item.create 1050 + ~id:(Post.id post) 1051 + ~content 1052 + ?url:(Option.map Uri.to_string (Post.link post)) 1053 + ~title:(Post.title post) 1054 + ?summary:(Post.summary post) 1055 + ?date_published:(Post.date post) 1056 + ?authors 1057 + ~tags:(Post.tags post) 1058 + () 1059 + 1060 + let items_of_posts posts = List.map item_of_post posts 1061 + 1062 + let feed_of_items ~title ?home_page_url ?feed_url ?description ?icon ?favicon items = 1063 + Jsonfeed.create ~title ?home_page_url ?feed_url ?description ?icon ?favicon ~items () 1064 + 1065 + let feed_of_posts ~title ?home_page_url ?feed_url ?description ?icon ?favicon posts = 1066 + let items = items_of_posts posts in 1067 + feed_of_items ~title ?home_page_url ?feed_url ?description ?icon ?favicon items 1068 + 1069 + let to_string ?(minify = false) jsonfeed = 1070 + match Jsonfeed.to_string ~minify jsonfeed with 1071 + | Ok s -> Ok s 1072 + | Error err -> Error (Jsont.Error.to_string err) 1073 + 1074 + let of_feed feed = 1075 + match feed.Feed.content with 1076 + | Feed.Json jf -> Some jf 1077 + | _ -> None 1078 + end 1079 + end 1080 + 1081 + (** {1 User Management} *) 1082 + 1083 + module User = struct 1084 + type t = { 1085 + username : string; 1086 + fullname : string; 1087 + email : string option; 1088 + feeds : Source.t list; 1089 + last_synced : string option; 1090 + } 1091 + 1092 + let make ~username ~fullname ?email ?(feeds = []) ?last_synced () = 1093 + { username; fullname; email; feeds; last_synced } 1094 + 1095 + let username t = t.username 1096 + let fullname t = t.fullname 1097 + let email t = t.email 1098 + let feeds t = t.feeds 1099 + let last_synced t = t.last_synced 1100 + 1101 + let add_feed t source = 1102 + { t with feeds = source :: t.feeds } 1103 + 1104 + let remove_feed t ~url = 1105 + let feeds = List.filter (fun s -> Source.url s <> url) t.feeds in 1106 + { t with feeds } 1107 + 1108 + let set_last_synced t timestamp = 1109 + { t with last_synced = Some timestamp } 1110 + 1111 + let jsont = 1112 + let make username fullname email feeds last_synced = 1113 + { username; fullname; email; feeds; last_synced } 1114 + in 1115 + Jsont.Object.map ~kind:"User" make 1116 + |> Jsont.Object.mem "username" Jsont.string ~enc:(fun u -> u.username) 1117 + |> Jsont.Object.mem "fullname" Jsont.string ~enc:(fun u -> u.fullname) 1118 + |> Jsont.Object.opt_mem "email" Jsont.string ~enc:(fun u -> u.email) 1119 + |> Jsont.Object.mem "feeds" (Jsont.list Source.jsont) ~enc:(fun u -> u.feeds) 1120 + |> Jsont.Object.opt_mem "last_synced" Jsont.string ~enc:(fun u -> u.last_synced) 1121 + |> Jsont.Object.finish 1122 + end 1123 + 1124 + (** {1 Feed Quality Analysis} *) 1125 + 1126 + module Quality = struct 1127 + type t = { 1128 + total_entries : int; 1129 + entries_with_summary : int; 1130 + entries_with_author : int; 1131 + entries_with_date : int; 1132 + entries_with_content : int; 1133 + entries_with_tags : int; 1134 + avg_content_length : float; 1135 + min_content_length : int; 1136 + max_content_length : int; 1137 + posting_frequency_days : float option; 1138 + quality_score : float; 1139 + } 1140 + 1141 + let make ~total_entries ~entries_with_summary ~entries_with_author 1142 + ~entries_with_date ~entries_with_content ~entries_with_tags 1143 + ~avg_content_length ~min_content_length ~max_content_length 1144 + ~posting_frequency_days ~quality_score = 1145 + { 1146 + total_entries; 1147 + entries_with_summary; 1148 + entries_with_author; 1149 + entries_with_date; 1150 + entries_with_content; 1151 + entries_with_tags; 1152 + avg_content_length; 1153 + min_content_length; 1154 + max_content_length; 1155 + posting_frequency_days; 1156 + quality_score; 1157 + } 1158 + 1159 + let total_entries t = t.total_entries 1160 + let entries_with_summary t = t.entries_with_summary 1161 + let entries_with_author t = t.entries_with_author 1162 + let entries_with_date t = t.entries_with_date 1163 + let entries_with_content t = t.entries_with_content 1164 + let entries_with_tags t = t.entries_with_tags 1165 + let avg_content_length t = t.avg_content_length 1166 + let min_content_length t = t.min_content_length 1167 + let max_content_length t = t.max_content_length 1168 + let posting_frequency_days t = t.posting_frequency_days 1169 + let quality_score t = t.quality_score 1170 + 1171 + (** Get content length from an Atom entry *) 1172 + let get_content_length (entry : Syndic.Atom.entry) = 1173 + match entry.content with 1174 + | Some (Syndic.Atom.Text s) -> String.length s 1175 + | Some (Syndic.Atom.Html (_, s)) -> String.length s 1176 + | Some (Syndic.Atom.Xhtml (_, _)) -> 0 (* Could calculate but complex *) 1177 + | Some (Syndic.Atom.Mime _) -> 0 1178 + | Some (Syndic.Atom.Src _) -> 0 1179 + | None -> ( 1180 + match entry.summary with 1181 + | Some (Syndic.Atom.Text s) -> String.length s 1182 + | Some (Syndic.Atom.Html (_, s)) -> String.length s 1183 + | Some (Syndic.Atom.Xhtml (_, _)) -> 0 1184 + | None -> 0) 1185 + 1186 + (** Check if entry has non-empty summary *) 1187 + let has_summary (entry : Syndic.Atom.entry) = 1188 + match entry.summary with 1189 + | Some (Syndic.Atom.Text s) when String.trim s <> "" -> true 1190 + | Some (Syndic.Atom.Html (_, s)) when String.trim s <> "" -> true 1191 + | Some (Syndic.Atom.Xhtml (_, _)) -> true 1192 + | _ -> false 1193 + 1194 + (** Check if entry has author *) 1195 + let has_author (entry : Syndic.Atom.entry) = 1196 + let (author, _) = entry.authors in 1197 + String.trim author.name <> "" 1198 + 1199 + (** Check if entry has content *) 1200 + let has_content (entry : Syndic.Atom.entry) = 1201 + get_content_length entry > 0 1202 + 1203 + (** Check if entry has tags/categories *) 1204 + let has_tags (entry : Syndic.Atom.entry) = 1205 + entry.categories <> [] 1206 + 1207 + (** Calculate quality score from metrics *) 1208 + let calculate_quality_score t = 1209 + let total = float_of_int t.total_entries in 1210 + if total = 0.0 then 0.0 1211 + else 1212 + let summary_pct = float_of_int t.entries_with_summary /. total *. 100.0 in 1213 + let author_pct = float_of_int t.entries_with_author /. total *. 100.0 in 1214 + let date_pct = float_of_int t.entries_with_date /. total *. 100.0 in 1215 + let content_pct = float_of_int t.entries_with_content /. total *. 100.0 in 1216 + let tags_pct = float_of_int t.entries_with_tags /. total *. 100.0 in 1217 + 1218 + (* Weighted average: content and dates are most important *) 1219 + let score = 1220 + (content_pct *. 0.30) +. 1221 + (date_pct *. 0.25) +. 1222 + (author_pct *. 0.20) +. 1223 + (summary_pct *. 0.15) +. 1224 + (tags_pct *. 0.10) 1225 + in 1226 + score 1227 + 1228 + let analyze entries = 1229 + if entries = [] then 1230 + failwith "No entries to analyze" 1231 + else 1232 + let total_entries = List.length entries in 1233 + 1234 + let entries_with_summary = ref 0 in 1235 + let entries_with_author = ref 0 in 1236 + let entries_with_date = ref total_entries in (* All Atom entries have updated *) 1237 + let entries_with_content = ref 0 in 1238 + let entries_with_tags = ref 0 in 1239 + let content_lengths = ref [] in 1240 + let dates = ref [] in 1241 + 1242 + List.iter (fun (entry : Syndic.Atom.entry) -> 1243 + if has_summary entry then incr entries_with_summary; 1244 + if has_author entry then incr entries_with_author; 1245 + if has_content entry then begin 1246 + incr entries_with_content; 1247 + content_lengths := get_content_length entry :: !content_lengths 1248 + end; 1249 + if has_tags entry then incr entries_with_tags; 1250 + dates := entry.updated :: !dates 1251 + ) entries; 1252 + 1253 + (* Calculate content statistics *) 1254 + let avg_content_length, min_content_length, max_content_length = 1255 + if !content_lengths = [] then 1256 + (0.0, 0, 0) 1257 + else 1258 + let sorted = List.sort compare !content_lengths in 1259 + let sum = List.fold_left (+) 0 sorted in 1260 + let avg = float_of_int sum /. float_of_int (List.length sorted) in 1261 + let min_len = List.hd sorted in 1262 + let max_len = List.hd (List.rev sorted) in 1263 + (avg, min_len, max_len) 1264 + in 1265 + 1266 + (* Calculate posting frequency *) 1267 + let posting_frequency_days = 1268 + if List.length !dates < 2 then 1269 + None 1270 + else 1271 + try 1272 + let timestamps = List.map Ptime.to_float_s !dates in 1273 + let sorted_timestamps = List.sort compare timestamps in 1274 + let first = List.hd sorted_timestamps in 1275 + let last = List.hd (List.rev sorted_timestamps) in 1276 + let total_days = (last -. first) /. 86400.0 in 1277 + let num_intervals = float_of_int (List.length sorted_timestamps - 1) in 1278 + Some (total_days /. num_intervals) 1279 + with _ -> None 1280 + in 1281 + 1282 + (* Create metrics record (without quality_score first) *) 1283 + let metrics = { 1284 + total_entries; 1285 + entries_with_summary = !entries_with_summary; 1286 + entries_with_author = !entries_with_author; 1287 + entries_with_date = !entries_with_date; 1288 + entries_with_content = !entries_with_content; 1289 + entries_with_tags = !entries_with_tags; 1290 + avg_content_length; 1291 + min_content_length; 1292 + max_content_length; 1293 + posting_frequency_days; 1294 + quality_score = 0.0; (* Placeholder *) 1295 + } in 1296 + 1297 + (* Calculate quality score *) 1298 + let quality_score = calculate_quality_score metrics in 1299 + { metrics with quality_score } 1300 + end 1301 + 1302 + (** {1 State Management} *) 1303 + 1304 + module State = struct 1305 + type t = { 1306 + xdg : Xdge.t; 1307 + } 1308 + 1309 + module Paths = struct 1310 + (** Get the users directory path *) 1311 + let users_dir state = Eio.Path.(Xdge.state_dir state.xdg / "users") 1312 + 1313 + (** Get the feeds directory path *) 1314 + let feeds_dir state = Eio.Path.(Xdge.state_dir state.xdg / "feeds") 1315 + 1316 + (** Get the user feeds directory path *) 1317 + let user_feeds_dir state = Eio.Path.(feeds_dir state / "user") 1318 + 1319 + (** Get the path to a user's JSON file *) 1320 + let user_file state username = 1321 + Eio.Path.(users_dir state / (username ^ ".json")) 1322 + 1323 + (** Get the path to a user's Atom feed file *) 1324 + let user_feed_file state username = 1325 + Eio.Path.(user_feeds_dir state / (username ^ ".xml")) 1326 + 1327 + (** Ensure all necessary directories exist *) 1328 + let ensure_directories state = 1329 + let dirs = [ 1330 + users_dir state; 1331 + feeds_dir state; 1332 + user_feeds_dir state; 1333 + ] in 1334 + List.iter (fun dir -> 1335 + try Eio.Path.mkdir ~perm:0o755 dir 1336 + with Eio.Io (Eio.Fs.E (Already_exists _), _) -> () 1337 + ) dirs 1338 + end 1339 + 1340 + module Json = struct 1341 + (** Decode a user from JSON string *) 1342 + let user_of_string s = 1343 + match Jsont_bytesrw.decode_string' User.jsont s with 1344 + | Ok user -> Some user 1345 + | Error err -> 1346 + Log.err (fun m -> m "Failed to parse user JSON: %s" (Jsont.Error.to_string err)); 1347 + None 1348 + 1349 + (** Encode a user to JSON string *) 1350 + let user_to_string user = 1351 + match Jsont_bytesrw.encode_string' ~format:Jsont.Indent User.jsont user with 1352 + | Ok s -> s 1353 + | Error err -> failwith ("Failed to encode user: " ^ Jsont.Error.to_string err) 1354 + end 1355 + 1356 + module Storage = struct 1357 + (** Load a user from disk *) 1358 + let load_user state username = 1359 + let file = Paths.user_file state username in 1360 + try 1361 + let content = Eio.Path.load file in 1362 + Json.user_of_string content 1363 + with 1364 + | Eio.Io (Eio.Fs.E (Not_found _), _) -> None 1365 + | e -> 1366 + Log.err (fun m -> m "Error loading user %s: %s" username (Printexc.to_string e)); 1367 + None 1368 + 1369 + (** Save a user to disk *) 1370 + let save_user state user = 1371 + let file = Paths.user_file state (User.username user) in 1372 + let json = Json.user_to_string user in 1373 + Eio.Path.save ~create:(`Or_truncate 0o644) file json 1374 + 1375 + (** List all usernames *) 1376 + let list_users state = 1377 + try 1378 + Eio.Path.read_dir (Paths.users_dir state) 1379 + |> List.filter_map (fun name -> 1380 + if Filename.check_suffix name ".json" then 1381 + Some (Filename.chop_suffix name ".json") 1382 + else None 1383 + ) 1384 + with _ -> [] 1385 + 1386 + (** Load existing Atom entries for a user *) 1387 + let load_existing_posts state username = 1388 + let file = Paths.user_feed_file state username in 1389 + try 1390 + let content = Eio.Path.load file in 1391 + (* Parse existing Atom feed *) 1392 + let input = Xmlm.make_input (`String (0, content)) in 1393 + let feed = Syndic.Atom.parse input in 1394 + feed.Syndic.Atom.entries 1395 + with 1396 + | Eio.Io (Eio.Fs.E (Not_found _), _) -> [] 1397 + | e -> 1398 + Log.err (fun m -> m "Error loading existing posts for %s: %s" 1399 + username (Printexc.to_string e)); 1400 + [] 1401 + 1402 + (** Save Atom entries for a user *) 1403 + let save_atom_feed state username entries = 1404 + let file = Paths.user_feed_file state username in 1405 + let feed = Format.Atom.feed_of_entries ~title:username entries in 1406 + let xml = Format.Atom.to_string feed in 1407 + Eio.Path.save ~create:(`Or_truncate 0o644) file xml 1408 + 1409 + (** Delete a user and their feed file *) 1410 + let delete_user state username = 1411 + let user_file = Paths.user_file state username in 1412 + let feed_file = Paths.user_feed_file state username in 1413 + (try Eio.Path.unlink user_file with _ -> ()); 1414 + (try Eio.Path.unlink feed_file with _ -> ()) 1415 + end 1416 + 1417 + module Sync = struct 1418 + (** Merge new entries with existing ones, updating matching IDs *) 1419 + let merge_entries ~existing ~new_entries = 1420 + (* Create a map of new entry IDs for efficient lookup and updates *) 1421 + let module UriMap = Map.Make(Uri) in 1422 + let new_entries_map = 1423 + List.fold_left (fun acc (entry : Syndic.Atom.entry) -> 1424 + UriMap.add entry.id entry acc 1425 + ) UriMap.empty new_entries 1426 + in 1427 + 1428 + (* Update existing entries with new ones if IDs match, otherwise keep existing *) 1429 + let updated_existing = 1430 + List.filter_map (fun (entry : Syndic.Atom.entry) -> 1431 + if UriMap.mem entry.id new_entries_map then 1432 + None (* Will be replaced by new entry *) 1433 + else 1434 + Some entry (* Keep existing entry *) 1435 + ) existing 1436 + in 1437 + 1438 + (* Combine new entries with non-replaced existing entries *) 1439 + let combined = new_entries @ updated_existing in 1440 + List.sort (fun (a : Syndic.Atom.entry) (b : Syndic.Atom.entry) -> 1441 + Ptime.compare b.updated a.updated 1442 + ) combined 1443 + 1444 + (** Get current timestamp in ISO 8601 format *) 1445 + let current_timestamp () = 1446 + let open Unix in 1447 + let tm = gmtime (time ()) in 1448 + Printf.sprintf "%04d-%02d-%02dT%02d:%02d:%02dZ" 1449 + (tm.tm_year + 1900) (tm.tm_mon + 1) tm.tm_mday 1450 + tm.tm_hour tm.tm_min tm.tm_sec 1451 + 1452 + (** Sync feeds for a single user *) 1453 + let sync_user session state ~username = 1454 + match Storage.load_user state username with 1455 + | None -> 1456 + Error (Printf.sprintf "User %s not found" username) 1457 + | Some user when User.feeds user = [] -> 1458 + Log.info (fun m -> m "No feeds configured for user %s" username); 1459 + Ok () 1460 + | Some user -> 1461 + Log.info (fun m -> m "Syncing feeds for user %s..." username); 1462 + 1463 + (* Fetch all feeds concurrently *) 1464 + let fetched_feeds = 1465 + Eio.Fiber.List.filter_map (fun source -> 1466 + try 1467 + Log.info (fun m -> m " Fetching %s (%s)..." 1468 + (Source.name source) (Source.url source)); 1469 + Some (Feed.fetch session source) 1470 + with e -> 1471 + Log.err (fun m -> m " Failed to fetch %s: %s" 1472 + (Source.name source) (Printexc.to_string e)); 1473 + None 1474 + ) (User.feeds user) 1475 + in 1476 + 1477 + if fetched_feeds = [] then begin 1478 + Error "No feeds successfully fetched" 1479 + end else begin 1480 + (* Get posts from fetched feeds *) 1481 + let posts = Post.of_feeds fetched_feeds in 1482 + Log.info (fun m -> m " Found %d new posts" (List.length posts)); 1483 + 1484 + (* Convert to Atom entries *) 1485 + let new_entries = Format.Atom.entries_of_posts posts in 1486 + 1487 + (* Load existing entries *) 1488 + let existing = Storage.load_existing_posts state username in 1489 + Log.info (fun m -> m " Found %d existing posts" (List.length existing)); 1490 + 1491 + (* Merge entries *) 1492 + let merged = merge_entries ~existing ~new_entries in 1493 + Log.info (fun m -> m " Total posts after merge: %d" (List.length merged)); 1494 + 1495 + (* Save updated feed *) 1496 + Storage.save_atom_feed state username merged; 1497 + 1498 + (* Update last_synced timestamp *) 1499 + let now = current_timestamp () in 1500 + let user = User.set_last_synced user now in 1501 + Storage.save_user state user; 1502 + 1503 + Log.info (fun m -> m "Sync completed for user %s" username); 1504 + Ok () 1505 + end 1506 + end 1507 + 1508 + module Export = struct 1509 + (** Convert Atom entry to JSONFeed item *) 1510 + let atom_entry_to_jsonfeed_item (entry : Syndic.Atom.entry) = 1511 + (* Extract ID *) 1512 + let id = Uri.to_string entry.id in 1513 + 1514 + (* Extract title *) 1515 + let title = 1516 + match entry.title with 1517 + | Syndic.Atom.Text s -> Some s 1518 + | Syndic.Atom.Html (_, s) -> Some s 1519 + | Syndic.Atom.Xhtml (_, _) -> Some "Untitled" 1520 + in 1521 + 1522 + (* Extract URL *) 1523 + let url = 1524 + match entry.links with 1525 + | link :: _ -> Some (Uri.to_string link.href) 1526 + | [] -> None 1527 + in 1528 + 1529 + (* Extract content *) 1530 + let content = 1531 + match entry.content with 1532 + | Some (Syndic.Atom.Text s) -> `Text s 1533 + | Some (Syndic.Atom.Html (_, s)) -> `Html s 1534 + | Some (Syndic.Atom.Xhtml (_, nodes)) -> 1535 + let html = String.concat "" (List.map Syndic.XML.to_string nodes) in 1536 + `Html html 1537 + | Some (Syndic.Atom.Mime _) | Some (Syndic.Atom.Src _) | None -> 1538 + `Text "" 1539 + in 1540 + 1541 + (* Extract summary *) 1542 + let summary = 1543 + match entry.summary with 1544 + | Some (Syndic.Atom.Text s) when String.trim s <> "" -> Some s 1545 + | Some (Syndic.Atom.Html (_, s)) when String.trim s <> "" -> Some s 1546 + | _ -> None 1547 + in 1548 + 1549 + (* Extract authors *) 1550 + let authors = 1551 + let (author, contributors) = entry.authors in 1552 + let author_list = author :: contributors in 1553 + let jsonfeed_authors = List.filter_map (fun (a : Syndic.Atom.author) -> 1554 + let name = String.trim a.name in 1555 + if name = "" then None 1556 + else Some (Jsonfeed.Author.create ~name ()) 1557 + ) author_list in 1558 + if jsonfeed_authors = [] then None else Some jsonfeed_authors 1559 + in 1560 + 1561 + (* Extract tags *) 1562 + let tags = 1563 + match entry.categories with 1564 + | [] -> None 1565 + | cats -> 1566 + let tag_list = List.map (fun (c : Syndic.Atom.category) -> 1567 + match c.label with 1568 + | Some lbl -> lbl 1569 + | None -> c.term 1570 + ) cats in 1571 + if tag_list = [] then None else Some tag_list 1572 + in 1573 + 1574 + (* Create JSONFeed item *) 1575 + Jsonfeed.Item.create 1576 + ~id 1577 + ~content 1578 + ?title 1579 + ?url 1580 + ?summary 1581 + ?authors 1582 + ?tags 1583 + ~date_published:entry.updated 1584 + () 1585 + 1586 + (** Export entries as JSONFeed *) 1587 + let export_jsonfeed ~title entries = 1588 + let items = List.map atom_entry_to_jsonfeed_item entries in 1589 + let feed = Jsonfeed.create ~title ~items () in 1590 + match Jsonfeed.to_string ~minify:false feed with 1591 + | Ok json -> Ok json 1592 + | Error err -> Error (Printf.sprintf "Failed to serialize JSON Feed: %s" (Jsont.Error.to_string err)) 1593 + end 1594 + 1595 + let create env ~app_name = 1596 + let xdg = Xdge.create env#fs app_name in 1597 + let state = { xdg } in 1598 + Paths.ensure_directories state; 1599 + state 1600 + 1601 + let create_user state user = 1602 + match Storage.load_user state (User.username user) with 1603 + | Some _ -> 1604 + Error (Printf.sprintf "User %s already exists" (User.username user)) 1605 + | None -> 1606 + Storage.save_user state user; 1607 + Log.info (fun m -> m "User %s created" (User.username user)); 1608 + Ok () 1609 + 1610 + let delete_user state ~username = 1611 + match Storage.load_user state username with 1612 + | None -> 1613 + Error (Printf.sprintf "User %s not found" username) 1614 + | Some _ -> 1615 + Storage.delete_user state username; 1616 + Log.info (fun m -> m "User %s deleted" username); 1617 + Ok () 1618 + 1619 + let get_user state ~username = 1620 + Storage.load_user state username 1621 + 1622 + let update_user state user = 1623 + match Storage.load_user state (User.username user) with 1624 + | None -> 1625 + Error (Printf.sprintf "User %s not found" (User.username user)) 1626 + | Some _ -> 1627 + Storage.save_user state user; 1628 + Log.info (fun m -> m "User %s updated" (User.username user)); 1629 + Ok () 1630 + 1631 + let list_users state = 1632 + Storage.list_users state 1633 + 1634 + let sync_user env state ~username = 1635 + Session.with_session env @@ fun session -> 1636 + Sync.sync_user session state ~username 1637 + 1638 + let sync_all env state = 1639 + let users = Storage.list_users state in 1640 + if users = [] then begin 1641 + Log.info (fun m -> m "No users to sync"); 1642 + Ok (0, 0) 1643 + end else begin 1644 + Log.info (fun m -> m "Syncing %d users concurrently..." (List.length users)); 1645 + 1646 + Session.with_session env @@ fun session -> 1647 + let results = 1648 + Eio.Fiber.List.map (fun username -> 1649 + match Sync.sync_user session state ~username with 1650 + | Ok () -> true 1651 + | Error err -> 1652 + Log.err (fun m -> m "Failed to sync user %s: %s" username err); 1653 + false 1654 + ) users 1655 + in 1656 + let success_count = List.length (List.filter (fun x -> x) results) in 1657 + let fail_count = List.length users - success_count in 1658 + 1659 + if fail_count = 0 then 1660 + Log.info (fun m -> m "All users synced successfully"); 1661 + 1662 + Ok (success_count, fail_count) 1663 + end 1664 + 1665 + let get_user_posts state ~username ?limit () = 1666 + let entries = Storage.load_existing_posts state username in 1667 + match limit with 1668 + | None -> entries 1669 + | Some n -> List.filteri (fun i _ -> i < n) entries 1670 + 1671 + let get_all_posts state ?limit () = 1672 + let users = Storage.list_users state in 1673 + 1674 + (* Collect all entries from all users with username tag *) 1675 + let all_entries = 1676 + List.concat_map (fun username -> 1677 + let entries = Storage.load_existing_posts state username in 1678 + List.map (fun entry -> (username, entry)) entries 1679 + ) users 1680 + in 1681 + 1682 + (* Sort by date (newest first) *) 1683 + let sorted = List.sort (fun (_, a : string * Syndic.Atom.entry) (_, b) -> 1684 + Ptime.compare b.updated a.updated 1685 + ) all_entries in 1686 + 1687 + match limit with 1688 + | None -> sorted 1689 + | Some n -> List.filteri (fun i _ -> i < n) sorted 1690 + 1691 + let export_merged_feed state ~title ~format ?limit () = 1692 + let all_posts = get_all_posts state ?limit () in 1693 + let entries = List.map snd all_posts in 1694 + 1695 + match format with 1696 + | `Atom -> 1697 + let xml = Format.Atom.to_string (Format.Atom.feed_of_entries ~title entries) in 1698 + Ok xml 1699 + | `Jsonfeed -> 1700 + if entries = [] then 1701 + (* Empty JSONFeed *) 1702 + let feed = Jsonfeed.create ~title ~items:[] () in 1703 + match Jsonfeed.to_string ~minify:false feed with 1704 + | Ok json -> Ok json 1705 + | Error err -> Error (Printf.sprintf "Failed to serialize JSON Feed: %s" (Jsont.Error.to_string err)) 1706 + else 1707 + Export.export_jsonfeed ~title entries 1708 + 1709 + let analyze_user_quality state ~username = 1710 + match Storage.load_user state username with 1711 + | None -> 1712 + Error (Printf.sprintf "User %s not found" username) 1713 + | Some _ -> 1714 + let entries = Storage.load_existing_posts state username in 1715 + if entries = [] then 1716 + Error "No entries to analyze" 1717 + else 1718 + Ok (Quality.analyze entries) 1719 + end
+376 -107
stack/river/lib/river.mli
··· 15 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 16 *) 17 17 18 - (** River RSS/Atom feed aggregator *) 18 + (** River RSS/Atom/JSONFeed aggregator library *) 19 + 20 + (** {1 Feed Sources} *) 21 + 22 + module Source : sig 23 + type t 24 + (** A feed source with name and URL. *) 25 + 26 + val make : name:string -> url:string -> t 27 + (** [make ~name ~url] creates a new feed source. *) 28 + 29 + val name : t -> string 30 + (** [name source] returns the feed name/label. *) 31 + 32 + val url : t -> string 33 + (** [url source] returns the feed URL. *) 34 + 35 + val jsont : t Jsont.t 36 + (** JSON codec for sources. *) 37 + end 38 + 39 + (** {1 HTTP Session Management} *) 40 + 41 + module Session : sig 42 + type t 43 + (** An abstract HTTP session for fetching feeds. 44 + 45 + The session manages HTTP connections and is tied to an Eio switch 46 + for proper resource cleanup. *) 47 + 48 + val init : 49 + sw:Eio.Switch.t -> 50 + < clock : float Eio.Time.clock_ty Eio.Resource.t; 51 + fs : Eio.Fs.dir_ty Eio.Path.t; 52 + net : [ `Generic | `Unix ] Eio.Net.ty Eio.Resource.t; .. > -> 53 + t 54 + (** [init ~sw env] creates a new HTTP session. 55 + 56 + The session is configured with appropriate defaults for fetching feeds: 57 + - User-Agent: "OCaml-River/1.0" 58 + - Automatic redirect following (max 5 redirects) 59 + - TLS verification enabled 60 + 61 + @param sw The switch for resource management 62 + @param env The Eio environment *) 63 + 64 + val with_session : 65 + < clock : float Eio.Time.clock_ty Eio.Resource.t; 66 + fs : Eio.Fs.dir_ty Eio.Path.t; 67 + net : [ `Generic | `Unix ] Eio.Net.ty Eio.Resource.t; .. > -> 68 + (t -> 'a) -> 'a 69 + (** [with_session env f] creates a session and automatically manages its lifecycle. 70 + 71 + This is the recommended way to use River as it ensures proper cleanup. 72 + 73 + @param env The Eio environment 74 + @param f The function to run with the session *) 75 + end 76 + 77 + (** {1 Feeds and Posts} *) 78 + 79 + module Feed : sig 80 + type t 81 + (** An Atom, RSS2, or JSON Feed. *) 82 + 83 + val fetch : Session.t -> Source.t -> t 84 + (** [fetch session source] fetches and parses a feed from the given source. 85 + 86 + @param session The HTTP session 87 + @param source The feed source to fetch 88 + @raise Failure if the feed cannot be fetched or parsed *) 89 + 90 + val source : t -> Source.t 91 + (** [source feed] returns the source this feed was fetched from. *) 92 + end 93 + 94 + module Post : sig 95 + type t 96 + (** A post from a feed. *) 97 + 98 + val of_feeds : Feed.t list -> t list 99 + (** [of_feeds feeds] extracts and deduplicates posts from the given feeds. 100 + 101 + Posts are deduplicated by ID. *) 102 + 103 + val feed : t -> Feed.t 104 + (** [feed post] returns the feed this post originated from. *) 105 + 106 + val title : t -> string 107 + (** [title post] returns the post title. *) 108 + 109 + val link : t -> Uri.t option 110 + (** [link post] returns the post link. *) 111 + 112 + val date : t -> Syndic.Date.t option 113 + (** [date post] returns the post date. *) 114 + 115 + val author : t -> string 116 + (** [author post] returns the post author name. *) 117 + 118 + val email : t -> string 119 + (** [email post] returns the post author email. *) 120 + 121 + val content : t -> string 122 + (** [content post] returns the post content. *) 123 + 124 + val id : t -> string 125 + (** [id post] returns the unique identifier of the post. *) 126 + 127 + val tags : t -> string list 128 + (** [tags post] returns the list of tags associated with the post. *) 129 + 130 + val summary : t -> string option 131 + (** [summary post] returns the summary/excerpt of the post, if available. *) 132 + 133 + val meta_description : t -> string option 134 + (** [meta_description post] returns the meta description from the origin site. 135 + 136 + To get the meta description, we fetch the content of [link post] and look 137 + for an HTML meta tag with name "description" or "og:description". *) 19 138 20 - (** {1 Session Management} *) 139 + val seo_image : t -> string option 140 + (** [seo_image post] returns the social media image URL. 141 + 142 + To get the SEO image, we fetch the content of [link post] and look for an 143 + HTML meta tag with name "og:image" or "twitter:image". *) 144 + end 145 + 146 + (** {1 Format Conversion and Export} *) 147 + 148 + module Format : sig 149 + (** Feed format conversion and export. *) 150 + 151 + module Atom : sig 152 + (** Atom 1.0 format support. *) 153 + 154 + val entry_of_post : Post.t -> Syndic.Atom.entry 155 + (** [entry_of_post post] converts a post to an Atom entry. *) 156 + 157 + val entries_of_posts : Post.t list -> Syndic.Atom.entry list 158 + (** [entries_of_posts posts] converts posts to Atom entries. *) 159 + 160 + val feed_of_entries : 161 + title:string -> 162 + ?id:string -> 163 + ?authors:(string * string option) list -> 164 + Syndic.Atom.entry list -> 165 + Syndic.Atom.feed 166 + (** [feed_of_entries ~title entries] creates an Atom feed from entries. 167 + 168 + @param title The feed title 169 + @param id Optional feed ID (default: "urn:river:merged") 170 + @param authors Optional list of (name, email) tuples *) 171 + 172 + val to_string : Syndic.Atom.feed -> string 173 + (** [to_string feed] serializes an Atom feed to XML string. *) 174 + end 175 + 176 + module Rss2 : sig 177 + (** RSS 2.0 format support. *) 178 + 179 + val of_feed : Feed.t -> Syndic.Rss2.channel option 180 + (** [of_feed feed] extracts RSS2 channel if the feed is RSS2 format. 181 + 182 + Returns None if the feed is not RSS2. *) 183 + end 184 + 185 + module Jsonfeed : sig 186 + (** JSON Feed 1.1 format support. *) 187 + 188 + val item_of_post : Post.t -> Jsonfeed.Item.t 189 + (** [item_of_post post] converts a post to a JSONFeed item. *) 190 + 191 + val items_of_posts : Post.t list -> Jsonfeed.Item.t list 192 + (** [items_of_posts posts] converts posts to JSONFeed items. *) 193 + 194 + val feed_of_items : 195 + title:string -> 196 + ?home_page_url:string -> 197 + ?feed_url:string -> 198 + ?description:string -> 199 + ?icon:string -> 200 + ?favicon:string -> 201 + Jsonfeed.Item.t list -> 202 + Jsonfeed.t 203 + (** [feed_of_items ~title items] creates a JSONFeed from items. 204 + 205 + @param title The feed title (required) 206 + @param home_page_url The URL of the website the feed represents 207 + @param feed_url The URL of the feed itself 208 + @param description A description of the feed 209 + @param icon URL of an icon for the feed (512x512 recommended) 210 + @param favicon URL of a favicon for the feed (64x64 recommended) *) 211 + 212 + val feed_of_posts : 213 + title:string -> 214 + ?home_page_url:string -> 215 + ?feed_url:string -> 216 + ?description:string -> 217 + ?icon:string -> 218 + ?favicon:string -> 219 + Post.t list -> 220 + Jsonfeed.t 221 + (** [feed_of_posts ~title posts] creates a JSONFeed from posts. 222 + 223 + Convenience function that combines [items_of_posts] and [feed_of_items]. *) 224 + 225 + val to_string : ?minify:bool -> Jsonfeed.t -> (string, string) result 226 + (** [to_string ?minify feed] serializes a JSONFeed to JSON string. 227 + 228 + @param minify If true, output compact JSON; if false, pretty-print (default: false) *) 229 + 230 + val of_feed : Feed.t -> Jsonfeed.t option 231 + (** [of_feed feed] extracts JSONFeed if the feed is JSONFeed format. 232 + 233 + Returns None if the feed is not JSONFeed. *) 234 + end 235 + end 236 + 237 + (** {1 User Management} *) 238 + 239 + module User : sig 240 + type t 241 + (** User configuration with feed subscriptions. *) 242 + 243 + val make : 244 + username:string -> 245 + fullname:string -> 246 + ?email:string -> 247 + ?feeds:Source.t list -> 248 + ?last_synced:string -> 249 + unit -> 250 + t 251 + (** [make ~username ~fullname ()] creates a new user. 252 + 253 + @param username Unique username identifier 254 + @param fullname User's display name 255 + @param email Optional email address 256 + @param feeds Optional list of feed sources (default: []) 257 + @param last_synced Optional ISO 8601 timestamp of last sync *) 258 + 259 + val username : t -> string 260 + (** [username user] returns the username. *) 21 261 22 - type session 23 - (** An abstract River session for fetching feeds. 262 + val fullname : t -> string 263 + (** [fullname user] returns the full name. *) 24 264 25 - The session manages HTTP connections and is tied to an Eio switch 26 - for proper resource cleanup. *) 265 + val email : t -> string option 266 + (** [email user] returns the email address if set. *) 27 267 28 - val init : 29 - sw:Eio.Switch.t -> 30 - < clock : float Eio.Time.clock_ty Eio.Resource.t; 31 - fs : Eio.Fs.dir_ty Eio.Path.t; 32 - net : [ `Generic | `Unix ] Eio.Net.ty Eio.Resource.t; .. > -> 33 - session 34 - (** [init ~sw env] creates a new River session. 268 + val feeds : t -> Source.t list 269 + (** [feeds user] returns the list of subscribed feeds. *) 35 270 36 - The session is configured with appropriate defaults for fetching feeds: 37 - - User-Agent: "OCaml-River/1.0" 38 - - Automatic redirect following (max 5 redirects) 39 - - TLS verification enabled 271 + val last_synced : t -> string option 272 + (** [last_synced user] returns the last sync timestamp if set. *) 40 273 41 - @param sw The switch for resource management 42 - @param env The Eio environment *) 274 + val add_feed : t -> Source.t -> t 275 + (** [add_feed user source] returns a new user with the feed added. *) 43 276 44 - val with_session : 45 - < clock : float Eio.Time.clock_ty Eio.Resource.t; 46 - fs : Eio.Fs.dir_ty Eio.Path.t; 47 - net : [ `Generic | `Unix ] Eio.Net.ty Eio.Resource.t; .. > -> 48 - (session -> 'a) -> 'a 49 - (** [with_session env f] creates a session and automatically manages its lifecycle. 277 + val remove_feed : t -> url:string -> t 278 + (** [remove_feed user ~url] returns a new user with the feed removed by URL. *) 50 279 51 - This is the recommended way to use River as it ensures proper cleanup. 280 + val set_last_synced : t -> string -> t 281 + (** [set_last_synced user timestamp] returns a new user with updated sync time. *) 52 282 53 - @param env The Eio environment 54 - @param f The function to run with the session *) 283 + val jsont : t Jsont.t 284 + (** JSON codec for users. *) 285 + end 55 286 56 - (** {1 Feed Sources and Fetching} *) 287 + (** {1 Feed Quality Analysis} *) 57 288 58 - type source = { name : string; url : string } 59 - (** The source of a feed. *) 289 + module Quality : sig 290 + type t 291 + (** Quality metrics for a feed or user's aggregated feed. *) 60 292 61 - type feed 62 - (** An Atom, RSS2, or JSON Feed. *) 293 + val make : 294 + total_entries:int -> 295 + entries_with_summary:int -> 296 + entries_with_author:int -> 297 + entries_with_date:int -> 298 + entries_with_content:int -> 299 + entries_with_tags:int -> 300 + avg_content_length:float -> 301 + min_content_length:int -> 302 + max_content_length:int -> 303 + posting_frequency_days:float option -> 304 + quality_score:float -> 305 + t 306 + (** [make ~total_entries ...] creates quality metrics. *) 63 307 64 - type post 65 - (** A post from a feed. *) 308 + val total_entries : t -> int 309 + val entries_with_summary : t -> int 310 + val entries_with_author : t -> int 311 + val entries_with_date : t -> int 312 + val entries_with_content : t -> int 313 + val entries_with_tags : t -> int 314 + val avg_content_length : t -> float 315 + val min_content_length : t -> int 316 + val max_content_length : t -> int 317 + val posting_frequency_days : t -> float option 318 + val quality_score : t -> float 319 + (** Accessors for quality metrics. *) 66 320 67 - val fetch : session -> source -> feed 68 - (** [fetch session source] returns an Atom or RSS feed from a source. 321 + val analyze : Syndic.Atom.entry list -> t 322 + (** [analyze entries] computes quality metrics from Atom entries. 69 323 70 - @param session The River session 71 - @param source The feed source to fetch *) 324 + The quality score is a weighted average of: 325 + - Content completeness (40%) 326 + - Metadata completeness (30%) 327 + - Content richness (30%) *) 328 + end 72 329 73 - val name : feed -> string 74 - (** [name feed] is the name of the feed source passed to [fetch]. *) 330 + (** {1 State Management} *) 75 331 76 - val url : feed -> string 77 - (** [url feed] is the url of the feed source passed to [fetch]. *) 332 + module State : sig 333 + type t 334 + (** State handle for managing user data and feeds on disk. *) 78 335 79 - val posts : feed list -> post list 80 - (** [posts feeds] is the list of deduplicated posts of the given feeds. *) 336 + val create : 337 + < fs : Eio.Fs.dir_ty Eio.Path.t; .. > -> 338 + app_name:string -> 339 + t 340 + (** [create env ~app_name] creates a state handle using XDG directories. 81 341 82 - val feed : post -> feed 83 - (** [feed post] is the feed the post originates from. *) 342 + Data is stored in: 343 + - Users: $XDG_STATE_HOME/[app_name]/users/ 344 + - Feeds: $XDG_STATE_HOME/[app_name]/feeds/user/ 84 345 85 - val title : post -> string 86 - (** [title post] is the title of the post. *) 346 + @param env The Eio environment with filesystem access 347 + @param app_name Application name for XDG paths *) 87 348 88 - val link : post -> Uri.t option 89 - (** [link post] is the link of the post. *) 349 + (** {2 User Operations} *) 90 350 91 - val date : post -> Syndic.Date.t option 92 - (** [date post] is the date of the post. *) 351 + val create_user : t -> User.t -> (unit, string) result 352 + (** [create_user state user] creates a new user. 93 353 94 - val author : post -> string 95 - (** [author post] is the author of the post. *) 354 + Returns [Error] if the user already exists. *) 96 355 97 - val email : post -> string 98 - (** [email post] is the email of the post. *) 356 + val delete_user : t -> username:string -> (unit, string) result 357 + (** [delete_user state ~username] deletes a user and their feed data. *) 99 358 100 - val content : post -> string 101 - (** [content post] is the content of the post. *) 359 + val get_user : t -> username:string -> User.t option 360 + (** [get_user state ~username] retrieves a user by username. *) 102 361 103 - val id : post -> string 104 - (** [id post] is the unique identifier of the post. *) 362 + val update_user : t -> User.t -> (unit, string) result 363 + (** [update_user state user] saves updated user configuration. *) 105 364 106 - val tags : post -> string list 107 - (** [tags post] is the list of tags associated with the post. *) 365 + val list_users : t -> string list 366 + (** [list_users state] returns all usernames. *) 108 367 109 - val summary : post -> string option 110 - (** [summary post] is the summary/excerpt of the post, if available. *) 368 + (** {2 Feed Operations} *) 111 369 112 - val meta_description : post -> string option 113 - (** [meta_description post] is the meta description of the post on the origin 114 - site. 370 + val sync_user : 371 + < clock : float Eio.Time.clock_ty Eio.Resource.t; 372 + fs : Eio.Fs.dir_ty Eio.Path.t; 373 + net : [ `Generic | `Unix ] Eio.Net.ty Eio.Resource.t; .. > -> 374 + t -> 375 + username:string -> 376 + (unit, string) result 377 + (** [sync_user env state ~username] fetches all feeds for the user and stores merged result. 115 378 116 - To get the meta description, we make get the content of [link post] and look 117 - for an HTML meta tag with the name "description" or "og:description".*) 379 + Posts are fetched concurrently and merged with existing posts. 380 + The result is stored as an Atom feed. *) 118 381 119 - val seo_image : post -> string option 120 - (** [seo_image post] is the image to be used by social networks and links to the 121 - post. 382 + val sync_all : 383 + < clock : float Eio.Time.clock_ty Eio.Resource.t; 384 + fs : Eio.Fs.dir_ty Eio.Path.t; 385 + net : [ `Generic | `Unix ] Eio.Net.ty Eio.Resource.t; .. > -> 386 + t -> 387 + (int * int, string) result 388 + (** [sync_all env state] syncs all users concurrently. 122 389 123 - To get the seo image, we make get the content of [link post] and look for an 124 - HTML meta tag with the name "og:image" or "twitter:image". *) 390 + Returns [Ok (success_count, fail_count)]. *) 125 391 126 - val create_atom_entries : post list -> Syndic.Atom.entry list 127 - (** [create_atom_feed posts] creates a list of atom entries, which can then be 128 - used to create an atom feed that is an aggregate of the posts. *) 392 + val get_user_posts : 393 + t -> 394 + username:string -> 395 + ?limit:int -> 396 + unit -> 397 + Syndic.Atom.entry list 398 + (** [get_user_posts state ~username ()] retrieves stored posts for a user. 129 399 130 - (** {1 JSON Feed Support} *) 400 + @param limit Optional maximum number of posts to return *) 131 401 132 - val create_jsonfeed_items : post list -> Jsonfeed.Item.t list 133 - (** [create_jsonfeed_items posts] creates a list of JSONFeed items from posts. *) 402 + val get_all_posts : 403 + t -> 404 + ?limit:int -> 405 + unit -> 406 + (string * Syndic.Atom.entry) list 407 + (** [get_all_posts state ()] retrieves posts from all users, sorted by date. 134 408 135 - val create_jsonfeed : 136 - title:string -> 137 - ?home_page_url:string -> 138 - ?feed_url:string -> 139 - ?description:string -> 140 - ?icon:string -> 141 - ?favicon:string -> 142 - post list -> 143 - Jsonfeed.t 144 - (** [create_jsonfeed ~title ?home_page_url ?feed_url ?description posts] 145 - creates a complete JSONFeed from the given posts. 409 + Returns list of (username, entry) tuples. 410 + @param limit Optional maximum number of posts to return *) 146 411 147 - @param title The feed title (required) 148 - @param home_page_url The URL of the website the feed represents 149 - @param feed_url The URL of the feed itself 150 - @param description A description of the feed 151 - @param icon URL of an icon for the feed (512x512 recommended) 152 - @param favicon URL of a favicon for the feed (64x64 recommended) 153 - @param posts The posts to include in the feed *) 412 + (** {2 Export} *) 154 413 155 - val jsonfeed_to_string : ?minify:bool -> Jsonfeed.t -> (string, string) result 156 - (** [jsonfeed_to_string ?minify jsonfeed] serializes a JSONFeed to a string. 414 + val export_merged_feed : 415 + t -> 416 + title:string -> 417 + format:[ `Atom | `Jsonfeed ] -> 418 + ?limit:int -> 419 + unit -> 420 + (string, string) result 421 + (** [export_merged_feed state ~title ~format ()] exports a merged feed of all users. 157 422 158 - @param minify If true, output compact JSON; if false, pretty-print (default: false) *) 423 + @param title Feed title 424 + @param format Output format 425 + @param limit Optional maximum number of entries *) 159 426 160 - type feed_content = Atom of Syndic.Atom.feed | Rss2 of Syndic.Rss2.channel | JSONFeed of Jsonfeed.t 161 - (** The native format of a feed. *) 427 + (** {2 Analysis} *) 162 428 163 - val feed_content : feed -> feed_content 164 - (** [feed_content feed] returns the feed in its native format (Atom, RSS2, or JSONFeed). 165 - This allows access to format-specific features like JSONFeed attachments. *) 429 + val analyze_user_quality : 430 + t -> 431 + username:string -> 432 + (Quality.t, string) result 433 + (** [analyze_user_quality state ~username] analyzes quality metrics for a user's feed. *) 434 + end
-656
stack/river/lib/river_store.ml
··· 1 - (* 2 - * Persistent storage for Atom feed entries using Cacheio and Jsonfeed 3 - *) 4 - 5 - let src = Logs.Src.create "river.store" ~doc:"River persistent storage" 6 - module Log = (val Logs.src_log src : Logs.LOG) 7 - 8 - (* Types *) 9 - 10 - (* Storage metadata that extends Jsonfeed.Item via unknown fields *) 11 - type storage_meta = { 12 - feed_url : string; 13 - feed_name : string; 14 - feed_title : string; 15 - stored_at : Ptime.t; 16 - } 17 - 18 - (* A stored entry is a Jsonfeed.Item.t with storage metadata in unknown fields *) 19 - type stored_entry = { 20 - item : Jsonfeed.Item.t; 21 - meta : storage_meta; 22 - } 23 - 24 - (* Stored entry accessors *) 25 - let entry_item entry = entry.item 26 - let entry_feed_url entry = entry.meta.feed_url 27 - let entry_feed_name entry = entry.meta.feed_name 28 - let entry_feed_title entry = entry.meta.feed_title 29 - let entry_stored_at entry = entry.meta.stored_at 30 - 31 - type feed_info = { 32 - url : string; 33 - name : string; 34 - title : string; 35 - last_updated : Ptime.t; 36 - entry_count : int; 37 - } 38 - 39 - type t = { 40 - cache : Cacheio.t; 41 - base_dir : Eio.Fs.dir_ty Eio.Path.t; 42 - } 43 - 44 - (* Helper functions *) 45 - 46 - let make_feed_key feed_url = 47 - (* Use SHA256 hash of feed URL as directory name for safety *) 48 - let hash = Digest.string feed_url |> Digest.to_hex in 49 - "feeds/" ^ hash 50 - 51 - let make_entry_key feed_url atom_id = 52 - (* Store entry under feed directory with atom_id hash *) 53 - let feed_key = make_feed_key feed_url in 54 - let entry_hash = Digest.string atom_id |> Digest.to_hex in 55 - feed_key ^ "/entries/" ^ entry_hash 56 - 57 - let make_feed_meta_key feed_url = 58 - let feed_key = make_feed_key feed_url in 59 - feed_key ^ "/meta.json" 60 - 61 - (* JSON serialization using Jsonfeed and Jsont *) 62 - 63 - (* Storage metadata codec - stores feed info and storage timestamp *) 64 - let storage_meta_jsont : storage_meta Jsont.t = 65 - Jsont.Object.( 66 - map ~kind:"StorageMeta" (fun feed_url feed_name feed_title stored_at : storage_meta -> 67 - { feed_url; feed_name; feed_title; stored_at }) 68 - |> mem "x_river_feed_url" Jsont.string ~enc:(fun m -> m.feed_url) 69 - |> mem "x_river_feed_name" Jsont.string ~enc:(fun m -> m.feed_name) 70 - |> mem "x_river_feed_title" Jsont.string ~enc:(fun m -> m.feed_title) 71 - |> mem "x_river_stored_at" Jsonfeed.Rfc3339.jsont ~enc:(fun m -> m.stored_at) 72 - |> finish 73 - ) 74 - 75 - (* Codec for feed_info *) 76 - let feed_meta_jsont : feed_info Jsont.t = 77 - Jsont.Object.( 78 - map ~kind:"FeedInfo" (fun url name title last_updated entry_count : feed_info -> 79 - { url; name; title; last_updated; entry_count }) 80 - |> mem "url" Jsont.string ~enc:(fun (m : feed_info) -> m.url) 81 - |> mem "name" Jsont.string ~enc:(fun m -> m.name) 82 - |> mem "title" Jsont.string ~enc:(fun m -> m.title) 83 - |> mem "last_updated" Jsonfeed.Rfc3339.jsont ~enc:(fun m -> m.last_updated) 84 - |> mem "entry_count" Jsont.int ~enc:(fun m -> m.entry_count) 85 - |> finish 86 - ) 87 - 88 - (* Helper to create item with storage metadata in unknown fields *) 89 - let merge_storage_meta item meta = 90 - let meta_json = Jsont_bytesrw.encode_string' storage_meta_jsont meta 91 - |> Result.get_ok in 92 - let meta_unknown = Jsont_bytesrw.decode_string' Jsont.json meta_json 93 - |> Result.get_ok in 94 - Jsonfeed.Item.create 95 - ~id:(Jsonfeed.Item.id item) 96 - ~content:(Jsonfeed.Item.content item) 97 - ?url:(Jsonfeed.Item.url item) 98 - ?external_url:(Jsonfeed.Item.external_url item) 99 - ?title:(Jsonfeed.Item.title item) 100 - ?summary:(Jsonfeed.Item.summary item) 101 - ?image:(Jsonfeed.Item.image item) 102 - ?banner_image:(Jsonfeed.Item.banner_image item) 103 - ?date_published:(Jsonfeed.Item.date_published item) 104 - ?date_modified:(Jsonfeed.Item.date_modified item) 105 - ?authors:(Jsonfeed.Item.authors item) 106 - ?tags:(Jsonfeed.Item.tags item) 107 - ?language:(Jsonfeed.Item.language item) 108 - ?attachments:(Jsonfeed.Item.attachments item) 109 - ?references:(Jsonfeed.Item.references item) 110 - ~unknown:meta_unknown 111 - () 112 - 113 - (* Helper to extract storage metadata from item's unknown fields *) 114 - let extract_storage_meta item = 115 - let unknown = Jsonfeed.Item.unknown item in 116 - let meta_str = Jsont_bytesrw.encode_string' Jsont.json unknown |> Result.get_ok in 117 - match Jsont_bytesrw.decode_string' storage_meta_jsont meta_str with 118 - | Ok meta -> meta 119 - | Error e -> failwith ("Missing storage metadata: " ^ Jsont.Error.to_string e) 120 - 121 - (* Stored entry codec - just wraps Jsonfeed.Item.jsont *) 122 - let stored_entry_jsont : stored_entry Jsont.t = 123 - let kind = "StoredEntry" in 124 - let of_string s = 125 - match Jsont_bytesrw.decode_string' Jsonfeed.Item.jsont s with 126 - | Ok item -> Ok { item; meta = extract_storage_meta item } 127 - | Error e -> Error (Jsont.Error.to_string e) 128 - in 129 - let enc entry = 130 - let item_with_meta = merge_storage_meta entry.item entry.meta in 131 - match Jsont_bytesrw.encode_string' Jsonfeed.Item.jsont item_with_meta with 132 - | Ok s -> s 133 - | Error e -> failwith ("Failed to encode: " ^ Jsont.Error.to_string e) 134 - in 135 - Jsont.of_of_string ~kind of_string ~enc 136 - 137 - (* Encode/decode functions *) 138 - let entry_to_string entry = 139 - match Jsont_bytesrw.encode_string' stored_entry_jsont entry with 140 - | Ok s -> s 141 - | Error err -> failwith ("Failed to encode entry: " ^ Jsont.Error.to_string err) 142 - 143 - let entry_of_string s = 144 - match Jsont_bytesrw.decode_string' stored_entry_jsont s with 145 - | Ok entry -> entry 146 - | Error err -> failwith ("Failed to parse entry: " ^ Jsont.Error.to_string err) 147 - 148 - let feed_meta_to_string meta = 149 - match Jsont_bytesrw.encode_string' feed_meta_jsont meta with 150 - | Ok s -> s 151 - | Error err -> failwith ("Failed to encode feed metadata: " ^ Jsont.Error.to_string err) 152 - 153 - let feed_meta_of_string s = 154 - match Jsont_bytesrw.decode_string' feed_meta_jsont s with 155 - | Ok meta -> meta 156 - | Error err -> failwith ("Failed to parse feed metadata: " ^ Jsont.Error.to_string err) 157 - 158 - (* Store creation *) 159 - 160 - let create ~base_dir = 161 - let cache_dir = Eio.Path.(base_dir / "river_store") in 162 - (try 163 - Eio.Path.mkdir ~perm:0o755 cache_dir 164 - with Eio.Io (Eio.Fs.E (Already_exists _), _) -> ()); 165 - let cache = Cacheio.create ~base_dir:cache_dir in 166 - Log.info (fun m -> m "Created River store at %a" Eio.Path.pp cache_dir); 167 - { cache; base_dir = cache_dir } 168 - 169 - let create_with_xdge xdge = 170 - let cache = Cacheio.create_with_xdge xdge in 171 - let base_dir = Eio.Path.( / ) (Xdge.cache_dir xdge) "river_store" in 172 - Log.info (fun m -> m "Created River store with XDG at %a" Eio.Path.pp base_dir); 173 - { cache; base_dir } 174 - 175 - (* Convert Post.t to Jsonfeed.Item.t *) 176 - let item_of_post ~feed_url ~feed_name ~feed_title (post : Post.t) = 177 - let content = 178 - let html = Soup.to_string post.content in 179 - `Html html 180 - in 181 - let url = Option.map Uri.to_string post.link in 182 - let authors = 183 - if post.author = "" then None 184 - else Some [Jsonfeed.Author.create ~name:post.author ()] 185 - in 186 - let tags = if post.tags = [] then None else Some post.tags in 187 - let item = Jsonfeed.Item.create 188 - ~id:post.id 189 - ~content 190 - ?url 191 - ?title:(if post.title = "" then None else Some post.title) 192 - ?summary:post.summary 193 - ?date_published:post.date 194 - ?date_modified:post.date 195 - ?authors 196 - ?tags 197 - () 198 - in 199 - let meta = { 200 - feed_url; 201 - feed_name; 202 - feed_title; 203 - stored_at = Ptime.of_float_s (Unix.gettimeofday ()) |> Option.get; 204 - } in 205 - { item; meta } 206 - 207 - (* Convert Syndic.Atom.entry to Jsonfeed.Item.t *) 208 - let item_of_atom ~feed_url ~feed_name ~feed_title (atom_entry : Syndic.Atom.entry) = 209 - let atom_id = Uri.to_string atom_entry.id in 210 - let date_modified = atom_entry.updated in 211 - let date_published = match atom_entry.published with 212 - | Some p -> Some p 213 - | None -> Some atom_entry.updated 214 - in 215 - (* Extract content *) 216 - let content_html = match atom_entry.content with 217 - | Some (Syndic.Atom.Text s) -> Some s 218 - | Some (Syndic.Atom.Html (_, s)) -> Some s 219 - | Some (Syndic.Atom.Xhtml (_, nodes)) -> 220 - let ns_prefix _ = Some "" in 221 - Some (String.concat "" (List.map (Syndic.XML.to_string ~ns_prefix) nodes)) 222 - | Some (Syndic.Atom.Mime _) | Some (Syndic.Atom.Src _) | None -> None 223 - in 224 - let content_text = match atom_entry.summary with 225 - | Some s -> Some (Util.string_of_text_construct s) 226 - | None -> None 227 - in 228 - let content = match content_html, content_text with 229 - | Some h, Some t -> `Both (h, t) 230 - | Some h, None -> `Html h 231 - | None, Some t -> `Text t 232 - | None, None -> `Text "" (* Fallback *) 233 - in 234 - let url = try 235 - Some (Uri.to_string (List.find (fun l -> l.Syndic.Atom.rel = Syndic.Atom.Alternate) atom_entry.links).href) 236 - with Not_found -> 237 - match atom_entry.links with 238 - | l :: _ -> Some (Uri.to_string l.href) 239 - | [] -> None 240 - in 241 - let tags = 242 - let cat_tags = List.map (fun cat -> cat.Syndic.Atom.term) atom_entry.categories in 243 - if cat_tags = [] then None else Some cat_tags 244 - in 245 - let summary = match atom_entry.summary with 246 - | Some s -> Some (Util.string_of_text_construct s) 247 - | None -> None 248 - in 249 - let item = Jsonfeed.Item.create 250 - ~id:atom_id 251 - ~content 252 - ?url 253 - ~title:(Util.string_of_text_construct atom_entry.title) 254 - ?summary 255 - ?date_published 256 - ~date_modified 257 - ?tags 258 - () 259 - in 260 - let meta = { 261 - feed_url; 262 - feed_name; 263 - feed_title; 264 - stored_at = Ptime.of_float_s (Unix.gettimeofday ()) |> Option.get; 265 - } in 266 - { item; meta } 267 - 268 - (* Feed metadata management *) 269 - let update_feed_meta store ~feed_url ~feed_name ~feed_title ~sw:_ = 270 - let key = make_feed_meta_key feed_url in 271 - let meta = { 272 - url = feed_url; 273 - name = feed_name; 274 - title = feed_title; 275 - last_updated = Ptime.of_float_s (Unix.gettimeofday ()) |> Option.get; 276 - entry_count = 0; 277 - } in 278 - let json_str = feed_meta_to_string meta in 279 - let source = Eio.Flow.string_source json_str in 280 - Cacheio.put store.cache ~key ~source ~ttl:None (); 281 - Log.debug (fun m -> m "Updated feed metadata for %s" feed_url) 282 - 283 - let get_feed_meta store ~feed_url ~sw = 284 - let key = make_feed_meta_key feed_url in 285 - match Cacheio.get store.cache ~key ~sw with 286 - | None -> None 287 - | Some source -> 288 - try 289 - let json_str = Eio.Buf_read.(parse_exn take_all) source ~max_size:Int.max_int in 290 - Some (feed_meta_of_string json_str) 291 - with e -> 292 - Log.err (fun m -> m "Failed to parse feed metadata: %s" (Printexc.to_string e)); 293 - None 294 - 295 - (* Entry storage *) 296 - 297 - let store_entry store ~feed_url ~feed_name ~feed_title ~post ~sw = 298 - let entry = item_of_post ~feed_url ~feed_name ~feed_title post in 299 - let key = make_entry_key feed_url (Jsonfeed.Item.id entry.item) in 300 - let json_str = entry_to_string entry in 301 - let source = Eio.Flow.string_source json_str in 302 - Cacheio.put store.cache ~key ~source ~ttl:None (); 303 - Log.debug (fun m -> m "Stored entry %s for feed %s" (Jsonfeed.Item.id entry.item) feed_url); 304 - (* Update feed metadata *) 305 - update_feed_meta store ~feed_url ~feed_name ~feed_title ~sw 306 - 307 - let store_posts store ~feed_url ~feed_name ~feed_title ~posts ~sw = 308 - Log.info (fun m -> m "Storing %d posts for feed %s" (List.length posts) feed_url); 309 - List.iter (fun post -> 310 - store_entry store ~feed_url ~feed_name ~feed_title ~post ~sw 311 - ) posts; 312 - Log.info (fun m -> m "Stored %d entries for feed %s" (List.length posts) feed_url) 313 - 314 - let store_atom_entries store ~feed_url ~feed_name ~feed_title ~entries ~sw = 315 - Log.info (fun m -> m "Storing %d Atom entries for feed %s" (List.length entries) feed_url); 316 - List.iter (fun atom_entry -> 317 - let entry = item_of_atom ~feed_url ~feed_name ~feed_title atom_entry in 318 - let key = make_entry_key feed_url (Jsonfeed.Item.id entry.item) in 319 - let json_str = entry_to_string entry in 320 - let source = Eio.Flow.string_source json_str in 321 - Cacheio.put store.cache ~key ~source ~ttl:None (); 322 - Log.debug (fun m -> m "Stored Atom entry %s" (Jsonfeed.Item.id entry.item)); 323 - ) entries; 324 - update_feed_meta store ~feed_url ~feed_name ~feed_title ~sw; 325 - Log.info (fun m -> m "Stored %d Atom entries for feed %s" (List.length entries) feed_url) 326 - 327 - (* Entry retrieval *) 328 - 329 - let get_entry store ~feed_url ~atom_id ~sw = 330 - let key = make_entry_key feed_url atom_id in 331 - match Cacheio.get store.cache ~key ~sw with 332 - | None -> None 333 - | Some source -> 334 - try 335 - let json_str = Eio.Buf_read.(parse_exn take_all) source ~max_size:Int.max_int in 336 - Some (entry_of_string json_str) 337 - with e -> 338 - Log.err (fun m -> m "Failed to parse entry: %s" (Printexc.to_string e)); 339 - None 340 - 341 - let list_entries store ~feed_url = 342 - let feed_key = make_feed_key feed_url in 343 - let prefix = feed_key ^ "/entries/" in 344 - let entries = Cacheio.scan store.cache in 345 - let feed_entries = List.filter_map (fun (cache_entry : Cacheio.Entry.t) -> 346 - let key = Cacheio.Entry.key cache_entry in 347 - if String.starts_with ~prefix key then 348 - Eio.Switch.run @@ fun sw -> 349 - match Cacheio.get store.cache ~key ~sw with 350 - | None -> None 351 - | Some source -> 352 - try 353 - let json_str = Eio.Buf_read.(parse_exn take_all) source ~max_size:Int.max_int in 354 - Some (entry_of_string json_str) 355 - with e -> 356 - Log.err (fun m -> m "Failed to parse entry from scan: %s" (Printexc.to_string e)); 357 - None 358 - else None 359 - ) entries in 360 - (* Sort by date_modified, newest first *) 361 - List.sort (fun a b -> 362 - let time_a = Jsonfeed.Item.date_modified a.item |> Option.value ~default:a.meta.stored_at in 363 - let time_b = Jsonfeed.Item.date_modified b.item |> Option.value ~default:b.meta.stored_at in 364 - Ptime.compare time_b time_a 365 - ) feed_entries 366 - 367 - let list_entries_filtered store ~feed_url ?since ?until ?limit ?(sort=`Updated) () = 368 - let entries = list_entries store ~feed_url in 369 - (* Filter by time *) 370 - let entries = match since with 371 - | None -> entries 372 - | Some t -> List.filter (fun e -> 373 - let time = Jsonfeed.Item.date_modified e.item |> Option.value ~default:e.meta.stored_at in 374 - Ptime.is_later time ~than:t || Ptime.equal time t) entries 375 - in 376 - let entries = match until with 377 - | None -> entries 378 - | Some t -> List.filter (fun e -> 379 - let time = Jsonfeed.Item.date_modified e.item |> Option.value ~default:e.meta.stored_at in 380 - Ptime.is_earlier time ~than:t || Ptime.equal time t) entries 381 - in 382 - (* Sort *) 383 - let entries = match sort with 384 - | `Published -> List.sort (fun a b -> 385 - let pa = Jsonfeed.Item.date_published a.item in 386 - let pb = Jsonfeed.Item.date_published b.item in 387 - match pa, pb with 388 - | Some ta, Some tb -> Ptime.compare tb ta 389 - | None, Some _ -> 1 390 - | Some _, None -> -1 391 - | None, None -> 392 - let ta = Jsonfeed.Item.date_modified a.item |> Option.value ~default:a.meta.stored_at in 393 - let tb = Jsonfeed.Item.date_modified b.item |> Option.value ~default:b.meta.stored_at in 394 - Ptime.compare tb ta 395 - ) entries 396 - | `Updated -> List.sort (fun a b -> 397 - let ta = Jsonfeed.Item.date_modified a.item |> Option.value ~default:a.meta.stored_at in 398 - let tb = Jsonfeed.Item.date_modified b.item |> Option.value ~default:b.meta.stored_at in 399 - Ptime.compare tb ta 400 - ) entries 401 - | `Stored -> List.sort (fun a b -> Ptime.compare b.meta.stored_at a.meta.stored_at) entries 402 - in 403 - (* Limit *) 404 - match limit with 405 - | None -> entries 406 - | Some n -> List.filteri (fun i _ -> i < n) entries 407 - 408 - let exists_entry store ~feed_url ~atom_id = 409 - let key = make_entry_key feed_url atom_id in 410 - Cacheio.exists store.cache ~key 411 - 412 - let get_recent_entries store ?(limit=50) () = 413 - let entries = Cacheio.scan store.cache in 414 - let all_entries = List.filter_map (fun (cache_entry : Cacheio.Entry.t) -> 415 - let key = Cacheio.Entry.key cache_entry in 416 - if String.contains key '/' && 417 - String.ends_with ~suffix:"entries/" (String.sub key 0 (String.rindex key '/') ^ "/") then 418 - Eio.Switch.run @@ fun sw -> 419 - match Cacheio.get store.cache ~key ~sw with 420 - | None -> None 421 - | Some source -> 422 - try 423 - let json_str = Eio.Buf_read.(parse_exn take_all) source ~max_size:Int.max_int in 424 - Some (entry_of_string json_str) 425 - with e -> 426 - Log.err (fun m -> m "Failed to parse entry: %s" (Printexc.to_string e)); 427 - None 428 - else None 429 - ) entries in 430 - let sorted = List.sort (fun a b -> 431 - let ta = Jsonfeed.Item.date_modified a.item |> Option.value ~default:a.meta.stored_at in 432 - let tb = Jsonfeed.Item.date_modified b.item |> Option.value ~default:b.meta.stored_at in 433 - Ptime.compare tb ta 434 - ) all_entries in 435 - List.filteri (fun i _ -> i < limit) sorted 436 - 437 - let find_entry_by_id store ~id = 438 - Log.debug (fun m -> m "Searching for entry with ID: %s" id); 439 - let entries = Cacheio.scan store.cache in 440 - let matching_entry = List.find_map (fun (cache_entry : Cacheio.Entry.t) -> 441 - let key = Cacheio.Entry.key cache_entry in 442 - if String.contains key '/' && 443 - String.ends_with ~suffix:"entries/" (String.sub key 0 (String.rindex key '/') ^ "/") then 444 - Eio.Switch.run @@ fun sw -> 445 - match Cacheio.get store.cache ~key ~sw with 446 - | None -> None 447 - | Some source -> 448 - (try 449 - let json_str = Eio.Buf_read.(parse_exn take_all) source ~max_size:Int.max_int in 450 - let entry = entry_of_string json_str in 451 - (* Exact ID match only *) 452 - if Jsonfeed.Item.id entry.item = id then 453 - Some entry 454 - else 455 - None 456 - with e -> 457 - Log.err (fun m -> m "Failed to parse entry: %s" (Printexc.to_string e)); 458 - None) 459 - else None 460 - ) entries in 461 - (match matching_entry with 462 - | Some e -> Log.debug (fun m -> m "Found entry: %s" 463 - (Jsonfeed.Item.title e.item |> Option.value ~default:"(no title)")) 464 - | None -> Log.debug (fun m -> m "No entry found with ID: %s" id)); 465 - matching_entry 466 - 467 - (* Entry management *) 468 - 469 - let delete_entry store ~feed_url ~atom_id = 470 - let key = make_entry_key feed_url atom_id in 471 - Cacheio.delete store.cache ~key; 472 - Log.info (fun m -> m "Deleted entry %s from feed %s" atom_id feed_url) 473 - 474 - let delete_feed store ~feed_url = 475 - let feed_key = make_feed_key feed_url in 476 - let prefix = feed_key ^ "/" in 477 - let entries = Cacheio.scan store.cache in 478 - let count = ref 0 in 479 - List.iter (fun (cache_entry : Cacheio.Entry.t) -> 480 - let key = Cacheio.Entry.key cache_entry in 481 - if String.starts_with ~prefix key then begin 482 - Cacheio.delete store.cache ~key; 483 - incr count 484 - end 485 - ) entries; 486 - Log.info (fun m -> m "Deleted feed %s (%d entries)" feed_url !count) 487 - 488 - let prune_entries store ~feed_url ~keep = 489 - let entries = list_entries store ~feed_url in 490 - let to_delete = List.filteri (fun i _ -> i >= keep) entries in 491 - List.iter (fun entry -> 492 - delete_entry store ~feed_url ~atom_id:(Jsonfeed.Item.id entry.item) 493 - ) to_delete; 494 - let deleted = List.length to_delete in 495 - Log.info (fun m -> m "Pruned %d entries from feed %s (kept %d)" deleted feed_url keep); 496 - deleted 497 - 498 - let prune_old_entries store ~feed_url ~older_than = 499 - let entries = list_entries store ~feed_url in 500 - let to_delete = List.filter (fun e -> 501 - let time = Jsonfeed.Item.date_modified e.item |> Option.value ~default:e.meta.stored_at in 502 - Ptime.is_earlier time ~than:older_than 503 - ) entries in 504 - List.iter (fun entry -> 505 - delete_entry store ~feed_url ~atom_id:(Jsonfeed.Item.id entry.item) 506 - ) to_delete; 507 - let deleted = List.length to_delete in 508 - Log.info (fun m -> m "Pruned %d old entries from feed %s" deleted feed_url); 509 - deleted 510 - 511 - (* Feed information *) 512 - 513 - let list_feeds store = 514 - let feed_entries = Cacheio.scan store.cache in 515 - let feed_metas = List.filter_map (fun (cache_entry : Cacheio.Entry.t) -> 516 - let key = Cacheio.Entry.key cache_entry in 517 - if String.ends_with ~suffix:"/meta.json" key then 518 - Eio.Switch.run @@ fun sw -> 519 - match Cacheio.get store.cache ~key ~sw with 520 - | None -> None 521 - | Some source -> 522 - try 523 - let json_str = Eio.Buf_read.(parse_exn take_all) source ~max_size:Int.max_int in 524 - Some (feed_meta_of_string json_str) 525 - with e -> 526 - Log.err (fun m -> m "Failed to parse feed metadata: %s" (Printexc.to_string e)); 527 - None 528 - else None 529 - ) feed_entries in 530 - (* Count entries for each feed *) 531 - List.map (fun meta -> 532 - let entries = list_entries store ~feed_url:meta.url in 533 - { meta with entry_count = List.length entries } 534 - ) feed_metas 535 - 536 - let get_feed_info store ~feed_url = 537 - Eio.Switch.run @@ fun sw -> 538 - match get_feed_meta store ~feed_url ~sw with 539 - | None -> None 540 - | Some meta -> 541 - let entries = list_entries store ~feed_url in 542 - Some { meta with entry_count = List.length entries } 543 - 544 - let stats store = 545 - Cacheio.stats store.cache 546 - 547 - (* Maintenance *) 548 - 549 - let expire store = 550 - Cacheio.expire store.cache 551 - 552 - let compact _store = 553 - (* TODO: Implement compaction logic *) 554 - Log.info (fun m -> m "Compaction not yet implemented") 555 - 556 - (* Export/Import *) 557 - 558 - let export_to_atom store ~feed_url ?title ?limit () = 559 - let entries = match limit with 560 - | None -> list_entries store ~feed_url 561 - | Some n -> list_entries_filtered store ~feed_url ~limit:n () 562 - in 563 - let atom_entries = List.map (fun entry -> 564 - let item = entry.item in 565 - let id = Uri.of_string (Jsonfeed.Item.id item) in 566 - let entry_title : Syndic.Atom.text_construct = 567 - Syndic.Atom.Text (Jsonfeed.Item.title item |> Option.value ~default:"(no title)") in 568 - let links = match Jsonfeed.Item.url item with 569 - | Some url_str -> [Syndic.Atom.link ~rel:Syndic.Atom.Alternate (Uri.of_string url_str)] 570 - | None -> [] 571 - in 572 - let content_str = match Jsonfeed.Item.content item with 573 - | `Html h -> h 574 - | `Text t -> t 575 - | `Both (h, _) -> h 576 - in 577 - let entry_content : Syndic.Atom.content = Syndic.Atom.Html (None, content_str) in 578 - let author_name = match Jsonfeed.Item.authors item with 579 - | Some (a :: _) -> Jsonfeed.Author.name a |> Option.value ~default:entry.meta.feed_name 580 - | _ -> entry.meta.feed_name 581 - in 582 - let author = Syndic.Atom.author author_name in 583 - let authors = (author, []) in 584 - let updated = Jsonfeed.Item.date_modified item |> Option.value ~default:entry.meta.stored_at in 585 - Syndic.Atom.entry ~id ~title:entry_title ~updated 586 - ?published:(Jsonfeed.Item.date_published item) 587 - ~links ~content:entry_content ~authors () 588 - ) entries in 589 - let feed_title : Syndic.Atom.text_construct = match title with 590 - | Some t -> Syndic.Atom.Text t 591 - | None -> Syndic.Atom.Text ("Archive: " ^ feed_url) 592 - in 593 - let feed_id = Uri.of_string ("urn:river:archive:" ^ (Digest.string feed_url |> Digest.to_hex)) in 594 - let feed_updated = match entries with 595 - | [] -> Ptime.of_float_s (Unix.gettimeofday ()) |> Option.get 596 - | e :: _ -> Jsonfeed.Item.date_modified e.item |> Option.value ~default:e.meta.stored_at 597 - in 598 - { 599 - Syndic.Atom.id = feed_id; 600 - title = feed_title; 601 - updated = feed_updated; 602 - entries = atom_entries; 603 - authors = []; 604 - categories = []; 605 - contributors = []; 606 - generator = Some { 607 - Syndic.Atom.version = Some "1.0"; 608 - uri = None; 609 - content = "River Store"; 610 - }; 611 - icon = None; 612 - links = []; 613 - logo = None; 614 - rights = None; 615 - subtitle = None; 616 - } 617 - 618 - let import_from_atom store ~feed_url ~feed_name ~feed ~sw = 619 - let entries = feed.Syndic.Atom.entries in 620 - store_atom_entries store ~feed_url ~feed_name ~feed_title:(Util.string_of_text_construct feed.title) ~entries ~sw; 621 - List.length entries 622 - 623 - (* Pretty printing *) 624 - 625 - let pp_entry fmt entry = 626 - let item = entry.item in 627 - Format.fprintf fmt "@[<v 2>Entry:@,"; 628 - Format.fprintf fmt "ID: %s@," (Jsonfeed.Item.id item); 629 - Format.fprintf fmt "Title: %s@," (Jsonfeed.Item.title item |> Option.value ~default:"(no title)"); 630 - Format.fprintf fmt "URL: %s@," (Jsonfeed.Item.url item |> Option.value ~default:"(none)"); 631 - (match Jsonfeed.Item.date_published item with 632 - | Some t -> Format.fprintf fmt "Published: %s@," (Ptime.to_rfc3339 t) 633 - | None -> ()); 634 - (match Jsonfeed.Item.date_modified item with 635 - | Some t -> Format.fprintf fmt "Modified: %s@," (Ptime.to_rfc3339 t) 636 - | None -> ()); 637 - Format.fprintf fmt "Feed: %s (%s)@," entry.meta.feed_name entry.meta.feed_url; 638 - Format.fprintf fmt "Stored: %s@]" (Ptime.to_rfc3339 entry.meta.stored_at) 639 - 640 - let pp_feed_info fmt info = 641 - Format.fprintf fmt "@[<v 2>Feed:@,"; 642 - Format.fprintf fmt "Name: %s@," info.name; 643 - Format.fprintf fmt "Title: %s@," info.title; 644 - Format.fprintf fmt "URL: %s@," info.url; 645 - Format.fprintf fmt "Last updated: %s@," (Ptime.to_rfc3339 info.last_updated); 646 - Format.fprintf fmt "Entries: %d@]" info.entry_count 647 - 648 - let pp fmt store = 649 - let feeds = list_feeds store in 650 - Format.fprintf fmt "@[<v 2>River Store:@,"; 651 - Format.fprintf fmt "Base dir: %a@," Eio.Path.pp store.base_dir; 652 - Format.fprintf fmt "Feeds: %d@," (List.length feeds); 653 - List.iter (fun feed -> 654 - Format.fprintf fmt " - %s: %d entries@," feed.name feed.entry_count 655 - ) feeds; 656 - Format.fprintf fmt "@]"
-218
stack/river/lib/river_store.mli
··· 1 - (** Persistent storage for Atom feed entries using Cacheio 2 - 3 - River_store provides a persistent, per-feed storage system for Atom entries, 4 - enabling long-term archival of feed items that may have expired upstream. 5 - 6 - {2 Key Features} 7 - 8 - - {b Per-feed storage}: Each feed's entries stored independently 9 - - {b Atom ID keying}: Entries keyed by their unique Atom ID 10 - - {b URL resolution}: Resolves all URLs relative to feed base URI 11 - - {b Persistent caching}: Built on Cacheio for reliable file storage 12 - - {b Entry management}: List, update, delete, and prune operations 13 - - {b Metadata tracking}: Stores feed source, timestamps, and relationships 14 - 15 - {2 Usage Example} 16 - 17 - {[ 18 - let store = River_store.create ~base_dir:store_dir in 19 - 20 - (* Store entries from a feed *) 21 - Eio.Switch.run @@ fun sw -> 22 - let feed = River.fetch env source in 23 - let posts = River.posts [feed] in 24 - River_store.store_posts store ~feed_url:source.url ~posts ~sw (); 25 - 26 - (* List entries for a feed *) 27 - let entries = River_store.list_entries store ~feed_url:source.url in 28 - List.iter (fun entry -> 29 - Printf.printf "%s: %s\n" entry.atom_id entry.title 30 - ) entries; 31 - 32 - (* Get a specific entry *) 33 - match River_store.get_entry store ~feed_url:source.url ~atom_id:"..." ~sw with 34 - | Some entry -> (* Use entry *) 35 - | None -> (* Not found *) 36 - ]} *) 37 - 38 - (** {1 Core Types} *) 39 - 40 - (** Abstract type representing the store *) 41 - type t 42 - 43 - (** Stored entry - combines Jsonfeed.Item with storage metadata *) 44 - type stored_entry 45 - 46 - (** {2 Stored Entry Accessors} *) 47 - 48 - val entry_item : stored_entry -> Jsonfeed.Item.t 49 - (** Get the underlying Jsonfeed Item *) 50 - 51 - val entry_feed_url : stored_entry -> string 52 - (** Get the source feed URL *) 53 - 54 - val entry_feed_name : stored_entry -> string 55 - (** Get the source feed name *) 56 - 57 - val entry_feed_title : stored_entry -> string 58 - (** Get the source feed title *) 59 - 60 - val entry_stored_at : stored_entry -> Ptime.t 61 - (** Get the storage timestamp *) 62 - 63 - (** Feed metadata *) 64 - type feed_info = { 65 - url : string; 66 - (** Feed URL *) 67 - 68 - name : string; 69 - (** Feed name/label *) 70 - 71 - title : string; 72 - (** Feed title from metadata *) 73 - 74 - last_updated : Ptime.t; 75 - (** Last time feed was synced *) 76 - 77 - entry_count : int; 78 - (** Number of stored entries *) 79 - } 80 - 81 - (** {1 Store Creation} *) 82 - 83 - (** Create a store at the specified base directory *) 84 - val create : base_dir:Eio.Fs.dir_ty Eio.Path.t -> t 85 - 86 - (** Create a store using an Xdge context for XDG-compliant paths *) 87 - val create_with_xdge : Xdge.t -> t 88 - 89 - (** {1 Entry Storage} *) 90 - 91 - (** Store a single post entry from a feed *) 92 - val store_entry : 93 - t -> 94 - feed_url:string -> 95 - feed_name:string -> 96 - feed_title:string -> 97 - post:Post.t -> 98 - sw:Eio.Switch.t -> 99 - unit 100 - 101 - (** Store multiple posts from a feed *) 102 - val store_posts : 103 - t -> 104 - feed_url:string -> 105 - feed_name:string -> 106 - feed_title:string -> 107 - posts:Post.t list -> 108 - sw:Eio.Switch.t -> 109 - unit 110 - 111 - (** Store entries directly from Syndic.Atom.entry list *) 112 - val store_atom_entries : 113 - t -> 114 - feed_url:string -> 115 - feed_name:string -> 116 - feed_title:string -> 117 - entries:Syndic.Atom.entry list -> 118 - sw:Eio.Switch.t -> 119 - unit 120 - 121 - (** {1 Entry Retrieval} *) 122 - 123 - (** Get a specific entry by Atom ID *) 124 - val get_entry : 125 - t -> 126 - feed_url:string -> 127 - atom_id:string -> 128 - sw:Eio.Switch.t -> 129 - stored_entry option 130 - 131 - (** List all entries for a feed *) 132 - val list_entries : t -> feed_url:string -> stored_entry list 133 - 134 - (** List entries with filtering and sorting options *) 135 - val list_entries_filtered : 136 - t -> 137 - feed_url:string -> 138 - ?since:Ptime.t -> 139 - ?until:Ptime.t -> 140 - ?limit:int -> 141 - ?sort:[`Published | `Updated | `Stored] -> 142 - unit -> 143 - stored_entry list 144 - 145 - (** Check if an entry exists *) 146 - val exists_entry : t -> feed_url:string -> atom_id:string -> bool 147 - 148 - (** Get the most recent entries across all feeds *) 149 - val get_recent_entries : t -> ?limit:int -> unit -> stored_entry list 150 - 151 - (** Find an entry by ID across all feeds (searches by atom_id) *) 152 - val find_entry_by_id : t -> id:string -> stored_entry option 153 - 154 - (** {1 Entry Management} *) 155 - 156 - (** Delete a specific entry *) 157 - val delete_entry : t -> feed_url:string -> atom_id:string -> unit 158 - 159 - (** Delete all entries for a feed *) 160 - val delete_feed : t -> feed_url:string -> unit 161 - 162 - (** Prune old entries (keep most recent N per feed) *) 163 - val prune_entries : t -> feed_url:string -> keep:int -> int 164 - (** Returns number of entries deleted *) 165 - 166 - (** Prune entries older than a given time *) 167 - val prune_old_entries : t -> feed_url:string -> older_than:Ptime.t -> int 168 - (** Returns number of entries deleted *) 169 - 170 - (** {1 Feed Information} *) 171 - 172 - (** List all feeds that have stored entries *) 173 - val list_feeds : t -> feed_info list 174 - 175 - (** Get information about a specific feed *) 176 - val get_feed_info : t -> feed_url:string -> feed_info option 177 - 178 - (** Get statistics about the store *) 179 - val stats : t -> Cacheio.Stats.t 180 - 181 - (** {1 Maintenance} *) 182 - 183 - (** Clean up expired entries (respects TTL if set) *) 184 - val expire : t -> int 185 - (** Returns number of entries expired *) 186 - 187 - (** Compact storage (remove duplicate/orphaned data) *) 188 - val compact : t -> unit 189 - 190 - (** Export entries to an Atom feed *) 191 - val export_to_atom : 192 - t -> 193 - feed_url:string -> 194 - ?title:string -> 195 - ?limit:int -> 196 - unit -> 197 - Syndic.Atom.feed 198 - 199 - (** Import entries from an Atom feed *) 200 - val import_from_atom : 201 - t -> 202 - feed_url:string -> 203 - feed_name:string -> 204 - feed:Syndic.Atom.feed -> 205 - sw:Eio.Switch.t -> 206 - int 207 - (** Returns number of entries imported *) 208 - 209 - (** {1 Pretty Printing} *) 210 - 211 - (** Pretty printer for stored entries *) 212 - val pp_entry : Format.formatter -> stored_entry -> unit 213 - 214 - (** Pretty printer for feed info *) 215 - val pp_feed_info : Format.formatter -> feed_info -> unit 216 - 217 - (** Pretty printer for the store *) 218 - val pp : Format.formatter -> t -> unit
-33
stack/river/lib/util.ml
··· 1 - (* 2 - * Copyright (c) 2014, OCaml.org project 3 - * Copyright (c) 2015 KC Sivaramakrishnan <sk826@cl.cam.ac.uk> 4 - * 5 - * Permission to use, copy, modify, and distribute this software for any 6 - * purpose with or without fee is hereby granted, provided that the above 7 - * copyright notice and this permission notice appear in all copies. 8 - * 9 - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 - *) 17 - 18 - open Syndic 19 - 20 - (* Remove all tags *) 21 - let rec syndic_to_buffer b = function 22 - | XML.Node (_, _, subs) -> List.iter (syndic_to_buffer b) subs 23 - | XML.Data (_, d) -> Buffer.add_string b d 24 - 25 - let syndic_to_string x = 26 - let b = Buffer.create 1024 in 27 - List.iter (syndic_to_buffer b) x; 28 - Buffer.contents b 29 - 30 - let string_of_text_construct : Atom.text_construct -> string = function 31 - (* FIXME: we probably would like to parse the HTML and remove the tags *) 32 - | Atom.Text s | Atom.Html (_, s) -> s 33 - | Atom.Xhtml (_, x) -> syndic_to_string x
+9 -10
stack/river/test/test_eio_river.ml
··· 1 1 (* Test the Eio-based River library *) 2 2 3 3 let test_sources = 4 - River. 5 4 [ 6 - { name = "OCaml Planet"; url = "https://ocaml.org/feed.xml" }; 5 + River.Source.make ~name:"OCaml Planet" ~url:"https://ocaml.org/feed.xml"; 7 6 ] 8 7 9 8 let main env = 10 9 Printf.printf "Testing River library with Eio and Requests...\n"; 11 10 12 - (* Use River.with_session for proper resource management *) 13 - River.with_session env @@ fun session -> 11 + (* Use River.Session.with_session for proper resource management *) 12 + River.Session.with_session env @@ fun session -> 14 13 (* Test fetching feeds *) 15 14 let feeds = 16 15 try 17 - List.map (River.fetch session) test_sources 16 + List.map (River.Feed.fetch session) test_sources 18 17 with 19 18 | Failure msg -> 20 19 Printf.printf "Error: %s\n" msg; ··· 28 27 Printf.printf "Successfully fetched %d feed(s)\n" (List.length feeds); 29 28 30 29 (* Get posts from feeds *) 31 - let posts = River.posts feeds in 30 + let posts = River.Post.of_feeds feeds in 32 31 Printf.printf "Found %d posts\n" (List.length posts); 33 32 34 33 (* Show first 3 posts *) ··· 40 39 41 40 List.iteri (fun i post -> 42 41 Printf.printf "\nPost %d:\n" (i + 1); 43 - Printf.printf " Title: %s\n" (River.title post); 44 - Printf.printf " Author: %s\n" (River.author post); 42 + Printf.printf " Title: %s\n" (River.Post.title post); 43 + Printf.printf " Author: %s\n" (River.Post.author post); 45 44 Printf.printf " Date: %s\n" 46 - (match River.date post with 45 + (match River.Post.date post with 47 46 | Some _ -> "Date available" (* Syndic.Date doesn't have to_string *) 48 47 | None -> "N/A"); 49 48 Printf.printf " Link: %s\n" 50 - (match River.link post with 49 + (match River.Post.link post with 51 50 | Some uri -> Uri.to_string uri 52 51 | None -> "N/A") 53 52 ) first_posts
+6 -7
stack/river/test/test_logging.ml
··· 18 18 Printf.printf "---\n\n" 19 19 20 20 let test_sources = 21 - River. 22 21 [ 23 - { name = "Test Feed"; url = "https://example.com/feed.xml" }; 22 + River.Source.make ~name:"Test Feed" ~url:"https://example.com/feed.xml"; 24 23 ] 25 24 26 25 let main env = 27 26 (* Test with logging *) 28 27 Printf.printf "Testing River library with logging...\n\n"; 29 28 30 - (* Use River.with_session for proper resource management *) 31 - River.with_session env @@ fun session -> 29 + (* Use River.Session.with_session for proper resource management *) 30 + River.Session.with_session env @@ fun session -> 32 31 (* Demonstrate fetching with logging *) 33 32 let feeds = 34 33 try 35 - List.map (River.fetch session) test_sources 34 + List.map (River.Feed.fetch session) test_sources 36 35 with 37 36 | Failure msg -> 38 37 Printf.printf "Expected error (for demo): %s\n" msg; ··· 44 43 45 44 if feeds <> [] then begin 46 45 (* This would show post aggregation logs *) 47 - let posts = River.posts feeds in 46 + let posts = River.Post.of_feeds feeds in 48 47 Printf.printf "\nFound %d posts\n" (List.length posts); 49 48 50 49 (* This would show Atom entry creation logs *) 51 - let _entries = River.create_atom_entries posts in 50 + let _entries = River.Format.Atom.entries_of_posts posts in 52 51 Printf.printf "Created Atom entries\n" 53 52 end 54 53