this repo has no description
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

river

+1327 -13
+35 -1
stack/river/cmd/river_cmd.ml
··· 580 580 Log.err (fun m -> m "Failed to export merged feed: %s" err); 581 581 1 582 582 ) $ format_arg $ title_arg $ limit_arg) 583 + 584 + let html = 585 + let output_dir_arg = 586 + let doc = "Output directory for HTML site" in 587 + Arg.(required & pos 0 (some string) None & info [] ~docv:"OUTPUT_DIR" ~doc) 588 + in 589 + let title_arg = 590 + let doc = "Site title" in 591 + Arg.(value & opt string "River Feed" & info ["title"; "t"] ~doc) 592 + in 593 + let posts_per_page_arg = 594 + let doc = "Number of posts per page (default: 25)" in 595 + Arg.(value & opt int 25 & info ["posts-per-page"; "p"] ~doc) 596 + in 597 + Term.(const (fun output_dir_str title posts_per_page env _xdg _profile -> 598 + let state = River.State.create env ~app_name:"river" in 599 + let output_dir = Eio.Path.(env#fs / output_dir_str) in 600 + match River.State.export_html_site state ~output_dir ~title ~posts_per_page () with 601 + | Ok () -> 602 + Log.info (fun m -> m "HTML site generated in %s" output_dir_str); 603 + 0 604 + | Error err -> 605 + Log.err (fun m -> m "Failed to generate HTML site: %s" err); 606 + 1 607 + ) $ output_dir_arg $ title_arg $ posts_per_page_arg) 608 + 583 609 let main_cmd = 584 610 let doc = "River feed management CLI" in 585 611 let main_info = Cmd.info "river-cli" ~version:"1.0" ~doc in ··· 615 641 ~service:"river" 616 642 merge 617 643 in 618 - Cmd.group main_info [user_cmd; sync_cmd; list_cmd; info_cmd; merge_cmd] 644 + let html_cmd = 645 + Eiocmd.run 646 + ~use_keyeio:false 647 + ~info:(Cmd.info "html" ~doc:"Generate a static HTML site from all feeds") 648 + ~app_name:"river" 649 + ~service:"river" 650 + html 651 + in 652 + Cmd.group main_info [user_cmd; sync_cmd; list_cmd; info_cmd; merge_cmd; html_cmd]
+1 -1
stack/river/lib/dune
··· 1 1 (library 2 2 (name river) 3 3 (public_name river) 4 - (libraries eio eio_main requests requests_json_api logs str syndic lambdasoup uri ptime jsonfeed jsont jsont.bytesrw xdge cmdliner eiocmd fmt sortal)) 4 + (libraries eio eio_main requests requests_json_api logs str syndic lambdasoup uri ptime jsonfeed jsont jsont.bytesrw xdge cmdliner eiocmd fmt sortal cmarkit))
+10 -3
stack/river/lib/feed.ml
··· 54 54 Log.debug (fun m -> m "Successfully parsed as JSONFeed"); 55 55 Json jsonfeed 56 56 | Error err -> 57 - Log.debug (fun m -> m "Not a JSONFeed: %s" (Jsont.Error.to_string err)); 57 + let err_str = Jsont.Error.to_string err in 58 + Log.debug (fun m -> m "Not a JSONFeed: %s" err_str); 58 59 (* Fall through to XML parsing *) 59 - failwith "Not a valid JSONFeed" 60 + failwith (Printf.sprintf "Not a valid JSONFeed: %s" err_str) 60 61 ) else ( 61 62 (* Try XML formats *) 62 63 try ··· 110 111 failwith (Printf.sprintf "HTTP %d: %s" status truncated_msg) 111 112 in 112 113 113 - let content = classify_feed ~xmlbase response in 114 + let content = 115 + try classify_feed ~xmlbase response 116 + with Failure msg -> 117 + Log.err (fun m -> m "Failed to parse feed '%s' (%s): %s" 118 + (Source.name source) (Source.url source) msg); 119 + raise (Failure msg) 120 + in 114 121 let title = 115 122 match content with 116 123 | Atom atom -> Text_extract.string_of_text_construct atom.Syndic.Atom.title
+653 -1
stack/river/lib/format.ml
··· 43 43 | None -> Ptime.of_float_s (Unix.gettimeofday ()) |> Option.get 44 44 | Some d -> d 45 45 in 46 - Syndic.Atom.entry ~content ~contributors ~links ~id ~authors ~title ~updated 46 + let categories = 47 + List.map (fun tag -> Syndic.Atom.category tag) (Post.tags post) 48 + in 49 + Syndic.Atom.entry ~content ~contributors ~links ~id ~authors ~title ~updated ~categories 47 50 () 48 51 49 52 let entries_of_posts posts = List.map entry_of_post posts ··· 137 140 | Feed.Json jf -> Some jf 138 141 | _ -> None 139 142 end 143 + 144 + module Html = struct 145 + (** HTML static site generation. *) 146 + 147 + let css = {| 148 + * { margin: 0; padding: 0; box-sizing: border-box; } 149 + 150 + body { 151 + font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Helvetica, Arial, sans-serif; 152 + line-height: 1.5; 153 + color: #333; 154 + background: #fff; 155 + max-width: 800px; 156 + margin: 0 auto; 157 + padding: 15px; 158 + } 159 + 160 + header { 161 + border-bottom: 1px solid #e1e4e8; 162 + padding-bottom: 10px; 163 + margin-bottom: 20px; 164 + } 165 + 166 + header h1 { 167 + font-size: 22px; 168 + font-weight: 600; 169 + margin-bottom: 6px; 170 + } 171 + 172 + header h1 a { 173 + color: #333; 174 + text-decoration: none; 175 + } 176 + 177 + nav { 178 + font-size: 13px; 179 + } 180 + 181 + nav a { 182 + color: #586069; 183 + text-decoration: none; 184 + margin-right: 12px; 185 + } 186 + 187 + nav a:hover { 188 + color: #0366d6; 189 + } 190 + 191 + .post { 192 + margin-bottom: 25px; 193 + padding-bottom: 20px; 194 + border-bottom: 1px solid #e1e4e8; 195 + } 196 + 197 + .post:last-child { 198 + border-bottom: none; 199 + } 200 + 201 + .post-title { 202 + font-size: 18px; 203 + font-weight: 600; 204 + margin-bottom: 5px; 205 + line-height: 1.3; 206 + } 207 + 208 + .post-title a { 209 + color: #0366d6; 210 + text-decoration: none; 211 + } 212 + 213 + .post-title a:hover { 214 + text-decoration: underline; 215 + } 216 + 217 + .post-meta { 218 + font-size: 12px; 219 + color: #586069; 220 + margin-bottom: 8px; 221 + display: flex; 222 + align-items: center; 223 + gap: 8px; 224 + } 225 + 226 + .post-meta a { 227 + color: #586069; 228 + text-decoration: none; 229 + } 230 + 231 + .post-meta a:hover { 232 + color: #0366d6; 233 + } 234 + 235 + .author-thumbnail { 236 + width: 24px; 237 + height: 24px; 238 + border-radius: 50%; 239 + object-fit: cover; 240 + } 241 + 242 + .post-meta-text { 243 + flex: 1; 244 + } 245 + 246 + .post-excerpt { 247 + font-size: 14px; 248 + color: #24292e; 249 + line-height: 1.5; 250 + } 251 + 252 + .post-excerpt p { 253 + margin-bottom: 8px; 254 + } 255 + 256 + .post-excerpt ul, .post-excerpt ol { 257 + margin-left: 20px; 258 + margin-bottom: 8px; 259 + } 260 + 261 + .post-excerpt li { 262 + margin-bottom: 3px; 263 + } 264 + 265 + .post-excerpt code { 266 + background: #f6f8fa; 267 + padding: 2px 4px; 268 + border-radius: 3px; 269 + font-size: 13px; 270 + } 271 + 272 + .post-excerpt img { 273 + float: right; 274 + width: 35%; 275 + max-width: 300px; 276 + margin: 0 0 10px 15px; 277 + border-radius: 4px; 278 + cursor: pointer; 279 + transition: opacity 0.2s; 280 + } 281 + 282 + .post-excerpt img:hover { 283 + opacity: 0.9; 284 + } 285 + 286 + @media (max-width: 600px) { 287 + .post-excerpt img { 288 + float: none; 289 + width: 100%; 290 + max-width: 100%; 291 + margin: 10px 0; 292 + } 293 + } 294 + 295 + .lightbox { 296 + display: none; 297 + position: fixed; 298 + top: 0; 299 + left: 0; 300 + width: 100%; 301 + height: 100%; 302 + background: rgba(0, 0, 0, 0.9); 303 + z-index: 1000; 304 + cursor: pointer; 305 + align-items: center; 306 + justify-content: center; 307 + } 308 + 309 + .lightbox.active { 310 + display: flex; 311 + } 312 + 313 + .lightbox img { 314 + max-width: 95%; 315 + max-height: 95%; 316 + object-fit: contain; 317 + } 318 + 319 + .post-full-content { 320 + display: none; 321 + font-size: 14px; 322 + color: #24292e; 323 + line-height: 1.5; 324 + margin-top: 10px; 325 + } 326 + 327 + .post-full-content.active { 328 + display: block; 329 + } 330 + 331 + .post-full-content p { 332 + margin-bottom: 10px; 333 + } 334 + 335 + .post-full-content ul, .post-full-content ol { 336 + margin-left: 20px; 337 + margin-bottom: 10px; 338 + } 339 + 340 + .post-full-content li { 341 + margin-bottom: 4px; 342 + } 343 + 344 + .post-full-content h1, .post-full-content h2, .post-full-content h3 { 345 + margin-top: 15px; 346 + margin-bottom: 8px; 347 + } 348 + 349 + .post-full-content h1 { 350 + font-size: 18px; 351 + font-weight: 600; 352 + } 353 + 354 + .post-full-content h2 { 355 + font-size: 16px; 356 + font-weight: 600; 357 + } 358 + 359 + .post-full-content h3 { 360 + font-size: 15px; 361 + font-weight: 600; 362 + } 363 + 364 + .post-full-content code { 365 + background: #f6f8fa; 366 + padding: 2px 4px; 367 + border-radius: 3px; 368 + font-size: 13px; 369 + } 370 + 371 + .post-full-content pre { 372 + background: #f6f8fa; 373 + padding: 10px; 374 + border-radius: 4px; 375 + overflow-x: auto; 376 + margin-bottom: 10px; 377 + } 378 + 379 + .post-full-content pre code { 380 + background: none; 381 + padding: 0; 382 + } 383 + 384 + .post-full-content blockquote { 385 + border-left: 3px solid #e1e4e8; 386 + padding-left: 12px; 387 + margin: 10px 0; 388 + color: #586069; 389 + } 390 + 391 + .post-full-content img { 392 + max-width: 100%; 393 + height: auto; 394 + margin: 10px 0; 395 + border-radius: 4px; 396 + } 397 + 398 + .read-more { 399 + display: inline-block; 400 + color: #0366d6; 401 + font-size: 13px; 402 + cursor: pointer; 403 + text-decoration: none; 404 + margin-top: 8px; 405 + padding: 4px 8px; 406 + border: 1px solid #e1e4e8; 407 + border-radius: 3px; 408 + background: #f6f8fa; 409 + transition: background 0.2s; 410 + } 411 + 412 + .read-more:hover { 413 + background: #e1e4e8; 414 + } 415 + 416 + .read-more::after { 417 + content: ' ▼'; 418 + font-size: 10px; 419 + } 420 + 421 + .read-more.active::after { 422 + content: ' ▲'; 423 + } 424 + 425 + .post-tags { 426 + margin-top: 8px; 427 + font-size: 11px; 428 + clear: both; 429 + } 430 + 431 + .post-tags a { 432 + display: inline-block; 433 + background: #f1f8ff; 434 + color: #0366d6; 435 + padding: 2px 6px; 436 + border-radius: 3px; 437 + text-decoration: none; 438 + margin-right: 4px; 439 + margin-bottom: 4px; 440 + } 441 + 442 + .post-tags a:hover { 443 + background: #dbedff; 444 + } 445 + 446 + .pagination { 447 + margin-top: 30px; 448 + padding-top: 15px; 449 + border-top: 1px solid #e1e4e8; 450 + text-align: center; 451 + font-size: 13px; 452 + } 453 + 454 + .pagination a { 455 + color: #0366d6; 456 + text-decoration: none; 457 + margin: 0 8px; 458 + } 459 + 460 + .pagination a:hover { 461 + text-decoration: underline; 462 + } 463 + 464 + .pagination .current { 465 + color: #24292e; 466 + font-weight: 600; 467 + } 468 + 469 + .link-item { 470 + margin-bottom: 15px; 471 + padding-bottom: 12px; 472 + border-bottom: 1px solid #e1e4e8; 473 + } 474 + 475 + .link-item:last-child { 476 + border-bottom: none; 477 + } 478 + 479 + .link-url { 480 + font-size: 14px; 481 + margin-bottom: 3px; 482 + } 483 + 484 + .link-url a { 485 + color: #0366d6; 486 + text-decoration: none; 487 + word-break: break-all; 488 + } 489 + 490 + .link-url a:hover { 491 + text-decoration: underline; 492 + } 493 + 494 + .link-meta { 495 + font-size: 11px; 496 + color: #586069; 497 + } 498 + 499 + .link-meta a { 500 + color: #586069; 501 + text-decoration: none; 502 + } 503 + 504 + .link-meta a:hover { 505 + color: #0366d6; 506 + } 507 + 508 + .author-list, .category-list { 509 + list-style: none; 510 + } 511 + 512 + .author-list li, .category-list li { 513 + margin-bottom: 12px; 514 + padding-bottom: 12px; 515 + border-bottom: 1px solid #e1e4e8; 516 + } 517 + 518 + .author-list li:last-child, .category-list li:last-child { 519 + border-bottom: none; 520 + } 521 + 522 + .author-list a, .category-list a { 523 + color: #0366d6; 524 + text-decoration: none; 525 + font-size: 15px; 526 + } 527 + 528 + .author-list a:hover, .category-list a:hover { 529 + text-decoration: underline; 530 + } 531 + 532 + .count { 533 + color: #586069; 534 + font-size: 12px; 535 + margin-left: 6px; 536 + } 537 + 538 + footer { 539 + margin-top: 40px; 540 + padding-top: 15px; 541 + border-top: 1px solid #e1e4e8; 542 + text-align: center; 543 + font-size: 11px; 544 + color: #586069; 545 + } 546 + |} 547 + 548 + let html_escape s = 549 + let buf = Buffer.create (String.length s) in 550 + String.iter (function 551 + | '<' -> Buffer.add_string buf "&lt;" 552 + | '>' -> Buffer.add_string buf "&gt;" 553 + | '&' -> Buffer.add_string buf "&amp;" 554 + | '"' -> Buffer.add_string buf "&quot;" 555 + | '\'' -> Buffer.add_string buf "&#39;" 556 + | c -> Buffer.add_char buf c 557 + ) s; 558 + Buffer.contents buf 559 + 560 + let format_date date = 561 + let open Unix in 562 + let tm = gmtime (Ptime.to_float_s date) in 563 + let months = [|"January"; "February"; "March"; "April"; "May"; "June"; 564 + "July"; "August"; "September"; "October"; "November"; "December"|] in 565 + Printf.sprintf "%s %d, %d" months.(tm.tm_mon) tm.tm_mday (1900 + tm.tm_year) 566 + 567 + let page_template ~title ~nav_current content = 568 + Printf.sprintf {|<!DOCTYPE html> 569 + <html lang="en"> 570 + <head> 571 + <meta charset="UTF-8"> 572 + <meta name="viewport" content="width=device-width, initial-scale=1.0"> 573 + <title>%s</title> 574 + <style>%s</style> 575 + </head> 576 + <body> 577 + <header> 578 + <h1><a href="index.html">River Feed</a></h1> 579 + <nav> 580 + <a href="index.html"%s>Posts</a> 581 + <a href="authors/index.html"%s>Authors</a> 582 + <a href="categories/index.html"%s>Categories</a> 583 + <a href="links.html"%s>Links</a> 584 + </nav> 585 + </header> 586 + <main> 587 + %s 588 + </main> 589 + <footer> 590 + Generated by River Feed Aggregator 591 + </footer> 592 + <div class="lightbox" id="lightbox"> 593 + <img id="lightbox-img" src="" alt=""> 594 + </div> 595 + <script> 596 + (function() { 597 + const lightbox = document.getElementById('lightbox'); 598 + const lightboxImg = document.getElementById('lightbox-img'); 599 + 600 + // Add click handler to all images in excerpts and full content 601 + document.addEventListener('click', function(e) { 602 + if (e.target.tagName === 'IMG' && (e.target.closest('.post-excerpt') || e.target.closest('.post-full-content'))) { 603 + e.preventDefault(); 604 + lightboxImg.src = e.target.src; 605 + lightboxImg.alt = e.target.alt; 606 + lightbox.classList.add('active'); 607 + } 608 + }); 609 + 610 + // Close lightbox on click 611 + lightbox.addEventListener('click', function() { 612 + lightbox.classList.remove('active'); 613 + lightboxImg.src = ''; 614 + }); 615 + 616 + // Close on escape key 617 + document.addEventListener('keydown', function(e) { 618 + if (e.key === 'Escape' && lightbox.classList.contains('active')) { 619 + lightbox.classList.remove('active'); 620 + lightboxImg.src = ''; 621 + } 622 + }); 623 + 624 + // Read more toggle 625 + document.addEventListener('click', function(e) { 626 + if (e.target.classList.contains('read-more')) { 627 + e.preventDefault(); 628 + const post = e.target.closest('.post'); 629 + const fullContent = post.querySelector('.post-full-content'); 630 + const excerpt = post.querySelector('.post-excerpt'); 631 + 632 + if (fullContent.classList.contains('active')) { 633 + fullContent.classList.remove('active'); 634 + excerpt.style.display = 'block'; 635 + e.target.textContent = 'Read more'; 636 + e.target.classList.remove('active'); 637 + } else { 638 + fullContent.classList.add('active'); 639 + excerpt.style.display = 'none'; 640 + e.target.textContent = 'Show less'; 641 + e.target.classList.add('active'); 642 + } 643 + } 644 + }); 645 + })(); 646 + </script> 647 + </body> 648 + </html>|} 649 + (html_escape title) 650 + css 651 + (if nav_current = "posts" then " class=\"current\"" else "") 652 + (if nav_current = "authors" then " class=\"current\"" else "") 653 + (if nav_current = "categories" then " class=\"current\"" else "") 654 + (if nav_current = "links" then " class=\"current\"" else "") 655 + content 656 + 657 + let pagination_html ~current_page ~total_pages ~base_path = 658 + if total_pages <= 1 then "" 659 + else 660 + let prev = if current_page > 1 then 661 + let prev_page = current_page - 1 in 662 + let href = if prev_page = 1 then base_path ^ "index.html" 663 + else Printf.sprintf "%spage-%d.html" base_path prev_page in 664 + Printf.sprintf {|<a href="%s">← Previous</a>|} href 665 + else "" 666 + in 667 + let next = if current_page < total_pages then 668 + Printf.sprintf {|<a href="%spage-%d.html">Next →</a>|} base_path (current_page + 1) 669 + else "" 670 + in 671 + let pages = 672 + let buf = Buffer.create 256 in 673 + for i = 1 to total_pages do 674 + if i = current_page then 675 + Buffer.add_string buf (Printf.sprintf {| <span class="current">%d</span>|} i) 676 + else 677 + let href = if i = 1 then base_path ^ "index.html" 678 + else Printf.sprintf "%spage-%d.html" base_path i in 679 + Buffer.add_string buf (Printf.sprintf {| <a href="%s">%d</a>|} href i) 680 + done; 681 + Buffer.contents buf 682 + in 683 + Printf.sprintf {|<div class="pagination">%s%s%s</div>|} prev pages next 684 + 685 + let full_content_from_html html_content = 686 + (* Convert HTML to markdown then to clean HTML using Cmarkit *) 687 + let markdown = Html_markdown.html_to_markdown html_content in 688 + let doc = Cmarkit.Doc.of_string markdown in 689 + Cmarkit_html.of_doc ~safe:true doc 690 + 691 + let post_excerpt_from_html html_content ~max_length = 692 + (* Convert HTML to markdown for excerpt *) 693 + let markdown = Html_markdown.html_to_markdown html_content in 694 + (* Find paragraph break after max_length *) 695 + let excerpt_md = 696 + if String.length markdown <= max_length then markdown 697 + else 698 + (* Look for double newline (paragraph break) after max_length *) 699 + let start_search = min max_length (String.length markdown - 1) in 700 + let rec find_para_break pos = 701 + if pos >= String.length markdown - 1 then 702 + String.length markdown 703 + else if pos < String.length markdown - 1 && 704 + markdown.[pos] = '\n' && markdown.[pos + 1] = '\n' then 705 + pos 706 + else 707 + find_para_break (pos + 1) 708 + in 709 + let break_pos = find_para_break start_search in 710 + let truncated = String.sub markdown 0 break_pos in 711 + if break_pos < String.length markdown then 712 + truncated ^ "..." 713 + else 714 + truncated 715 + in 716 + (* Convert markdown back to HTML using Cmarkit with custom renderer *) 717 + let doc = Cmarkit.Doc.of_string excerpt_md in 718 + 719 + (* Custom renderer that makes headings smaller and inline *) 720 + let inline_headings = 721 + let block c = function 722 + | Cmarkit.Block.Heading (h, _) -> 723 + let level = Cmarkit.Block.Heading.level h in 724 + let inline = Cmarkit.Block.Heading.inline h in 725 + (* Render heading as a strong tag with smaller font *) 726 + let style = match level with 727 + | 1 -> "font-size: 15px; font-weight: 600;" 728 + | 2 -> "font-size: 14px; font-weight: 600;" 729 + | _ -> "font-size: 14px; font-weight: 500;" 730 + in 731 + Cmarkit_renderer.Context.string c (Printf.sprintf "<strong style=\"%s\">" style); 732 + Cmarkit_renderer.Context.inline c inline; 733 + Cmarkit_renderer.Context.string c "</strong> "; 734 + true 735 + | _ -> false 736 + in 737 + Cmarkit_renderer.make ~block () 738 + in 739 + 740 + let renderer = Cmarkit_renderer.compose (Cmarkit_html.renderer ~safe:true ()) inline_headings in 741 + Cmarkit_renderer.doc_to_string renderer doc 742 + 743 + let render_post_html ~post ~author_username = 744 + let title = Post.title post in 745 + let author = Post.author post in 746 + let date_str = match Post.date post with 747 + | Some d -> format_date d 748 + | None -> "No date" 749 + in 750 + let link_html = match Post.link post with 751 + | Some uri -> 752 + Printf.sprintf {|<a href="%s">%s</a>|} 753 + (html_escape (Uri.to_string uri)) 754 + (html_escape title) 755 + | None -> html_escape title 756 + in 757 + let excerpt = post_excerpt_from_html (Post.content post) ~max_length:300 in 758 + let tags_html = 759 + match Post.tags post with 760 + | [] -> "" 761 + | tags -> 762 + let tag_links = List.map (fun tag -> 763 + Printf.sprintf {|<a href="../categories/%s.html">%s</a>|} 764 + (html_escape tag) (html_escape tag) 765 + ) tags in 766 + Printf.sprintf {|<div class="post-tags">%s</div>|} 767 + (String.concat "" tag_links) 768 + in 769 + Printf.sprintf {|<article class="post"> 770 + <h2 class="post-title">%s</h2> 771 + <div class="post-meta"> 772 + By <a href="../authors/%s.html">%s</a> on %s 773 + </div> 774 + <div class="post-excerpt"> 775 + %s 776 + </div> 777 + %s 778 + </article>|} 779 + link_html 780 + (html_escape author_username) 781 + (html_escape author) 782 + date_str 783 + excerpt 784 + tags_html 785 + 786 + let render_posts_page ~title ~posts ~current_page ~total_pages ~base_path ~nav_current = 787 + let posts_html = String.concat "\n" posts in 788 + let pagination = pagination_html ~current_page ~total_pages ~base_path in 789 + let content = posts_html ^ "\n" ^ pagination in 790 + page_template ~title ~nav_current content 791 + end
+54
stack/river/lib/format.mli
··· 101 101 102 102 Returns None if the feed is not JSONFeed. *) 103 103 end 104 + 105 + module Html : sig 106 + (** HTML static site generation. *) 107 + 108 + val format_date : Ptime.t -> string 109 + (** [format_date date] formats a date in human-readable format (e.g., "November 23, 2025"). *) 110 + 111 + val html_escape : string -> string 112 + (** [html_escape s] escapes HTML special characters in string. *) 113 + 114 + val full_content_from_html : string -> string 115 + (** [full_content_from_html html_content] converts HTML content to clean markdown-derived HTML. 116 + 117 + @param html_content The HTML content to convert *) 118 + 119 + val post_excerpt_from_html : string -> max_length:int -> string 120 + (** [post_excerpt_from_html html_content ~max_length] generates an excerpt from HTML content. 121 + 122 + Converts HTML to markdown, truncates to max_length, and converts back to simple HTML. 123 + 124 + @param html_content The HTML content to excerpt 125 + @param max_length Maximum length of the excerpt in characters *) 126 + 127 + val render_post_html : post:Post.t -> author_username:string -> string 128 + (** [render_post_html ~post ~author_username] renders a single post as HTML. 129 + 130 + @param post The post to render 131 + @param author_username The username of the author (for linking) *) 132 + 133 + val render_posts_page : 134 + title:string -> 135 + posts:string list -> 136 + current_page:int -> 137 + total_pages:int -> 138 + base_path:string -> 139 + nav_current:string -> 140 + string 141 + (** [render_posts_page ~title ~posts ~current_page ~total_pages ~base_path ~nav_current] 142 + renders a complete HTML page with posts and pagination. 143 + 144 + @param title Page title 145 + @param posts List of pre-rendered post HTML strings 146 + @param current_page Current page number (1-indexed) 147 + @param total_pages Total number of pages 148 + @param base_path Base path for pagination links (e.g., "" for root, "authors/" for author pages) 149 + @param nav_current Which nav item is current ("posts", "authors", "categories", "links") *) 150 + 151 + val page_template : title:string -> nav_current:string -> string -> string 152 + (** [page_template ~title ~nav_current content] wraps content in the HTML page template. 153 + 154 + @param title Page title 155 + @param nav_current Which nav item is current 156 + @param content The main content HTML *) 157 + end
+9 -2
stack/river/lib/html_markdown.ml
··· 29 29 Soup.fold (fun acc link -> 30 30 match Soup.attribute "href" link with 31 31 | Some href -> 32 - let text = Soup.texts link |> String.concat "" |> String.trim in 33 - (href, text) :: acc 32 + (* Filter out local anchors and only include absolute external URLs *) 33 + let uri = Uri.of_string href in 34 + let is_absolute = Uri.scheme uri <> None in 35 + let is_local_anchor = String.starts_with ~prefix:"#" href in 36 + if is_absolute && not is_local_anchor then 37 + let text = Soup.texts link |> String.concat "" |> String.trim in 38 + (href, text) :: acc 39 + else 40 + acc 34 41 | None -> acc 35 42 ) [] links 36 43 |> List.rev
+19
stack/river/lib/river.mli
··· 419 419 @param format Output format 420 420 @param limit Optional maximum number of entries *) 421 421 422 + val export_html_site : 423 + t -> 424 + output_dir:Eio.Fs.dir_ty Eio.Path.t -> 425 + title:string -> 426 + ?posts_per_page:int -> 427 + unit -> 428 + (unit, string) result 429 + (** [export_html_site state ~output_dir ~title ()] exports a static HTML site. 430 + 431 + Generates a complete static site with: 432 + - Paginated post listings 433 + - Author index and individual author pages 434 + - Category index and individual category pages 435 + - Links page showing all outgoing links from posts 436 + 437 + @param output_dir Directory to write HTML files to 438 + @param title Site title 439 + @param posts_per_page Number of posts per page (default: 25) *) 440 + 422 441 (** {2 Analysis} *) 423 442 424 443 val analyze_user_quality :
+527 -5
stack/river/lib/state.ml
··· 207 207 let fetched_feeds = 208 208 Eio.Fiber.List.filter_map (fun source -> 209 209 try 210 - Log.info (fun m -> m " Fetching %s (%s)..." 211 - (Source.name source) (Source.url source)); 210 + Log.info (fun m -> m " [%s] Fetching %s (%s)..." 211 + username (Source.name source) (Source.url source)); 212 212 Some (Feed.fetch session source) 213 213 with e -> 214 - Log.err (fun m -> m " Failed to fetch %s: %s" 215 - (Source.name source) (Printexc.to_string e)); 214 + Log.err (fun m -> m " [%s] Failed to fetch %s: %s" 215 + username (Source.name source) (Printexc.to_string e)); 216 216 None 217 217 ) (User.feeds user) 218 218 in ··· 414 414 415 415 let export_merged_feed state ~title ~format ?limit () = 416 416 let all_posts = get_all_posts state ?limit () in 417 - let entries = List.map snd all_posts in 417 + 418 + (* Rewrite author metadata from Sortal user info *) 419 + let rewrite_entry_author username (entry : Syndic.Atom.entry) = 420 + match Storage.get_user state username with 421 + | None -> entry 422 + | Some user -> 423 + (* Get user's full name and email from Sortal *) 424 + let fullname = User.fullname user in 425 + let email = User.email user in 426 + let username = User.username user in 427 + 428 + (* Create new author with Sortal information *) 429 + let new_author = 430 + match email with 431 + | Some email_addr -> 432 + Syndic.Atom.author ~email:email_addr ~uri:(Uri.of_string ("https://" ^ username)) fullname 433 + | None -> 434 + Syndic.Atom.author ~uri:(Uri.of_string ("https://" ^ username)) fullname 435 + in 436 + 437 + (* Update entry with new author, keeping existing contributors *) 438 + let _, other_authors = entry.authors in 439 + { entry with authors = (new_author, other_authors) } 440 + in 441 + 442 + let entries = List.map (fun (username, entry) -> 443 + rewrite_entry_author username entry 444 + ) all_posts in 418 445 419 446 match format with 420 447 | `Atom -> ··· 429 456 | Error err -> Error (Printf.sprintf "Failed to serialize JSON Feed: %s" (Jsont.Error.to_string err)) 430 457 else 431 458 Export.export_jsonfeed ~title entries 459 + 460 + let export_html_site state ~output_dir ~title ?(posts_per_page = 25) () = 461 + try 462 + Log.info (fun m -> m "=== Starting HTML site generation ==="); 463 + Log.info (fun m -> m "Output directory: %s" (Eio.Path.native_exn output_dir)); 464 + Log.info (fun m -> m "Site title: %s" title); 465 + Log.info (fun m -> m "Posts per page: %d" posts_per_page); 466 + 467 + (* Sanitize a string for use in filenames - replace unsafe characters *) 468 + let sanitize_filename s = 469 + let buf = Buffer.create (String.length s) in 470 + String.iter (fun c -> 471 + match c with 472 + | '/' | '\\' | ':' | '*' | '?' | '"' | '<' | '>' | '|' -> Buffer.add_char buf '-' 473 + | ' ' -> Buffer.add_char buf '-' 474 + | c -> Buffer.add_char buf c 475 + ) s; 476 + Buffer.contents buf 477 + in 478 + 479 + (* Create directory structure *) 480 + Log.info (fun m -> m "Creating directory structure"); 481 + let mkdir_if_not_exists dir = 482 + try Eio.Path.mkdir ~perm:0o755 dir 483 + with Eio.Io (Eio.Fs.E (Already_exists _), _) -> () 484 + in 485 + mkdir_if_not_exists output_dir; 486 + mkdir_if_not_exists Eio.Path.(output_dir / "authors"); 487 + mkdir_if_not_exists Eio.Path.(output_dir / "categories"); 488 + mkdir_if_not_exists Eio.Path.(output_dir / "thumbnails"); 489 + Log.info (fun m -> m "Directory structure created"); 490 + 491 + (* Helper to get and copy author thumbnail *) 492 + let get_author_thumbnail username = 493 + Log.debug (fun m -> m "Looking up thumbnail for username: %s" username); 494 + match Sortal.lookup state.sortal username with 495 + | Some contact -> 496 + Log.debug (fun m -> m " Found Sortal contact for %s: %s" username (Sortal.Contact.name contact)); 497 + (match Sortal.thumbnail_path state.sortal contact with 498 + | Some src_path -> 499 + Log.info (fun m -> m " Copying thumbnail for %s from: %s" username (Eio.Path.native_exn src_path)); 500 + (* Copy thumbnail to output directory *) 501 + let filename = Filename.basename (Eio.Path.native_exn src_path) in 502 + let dest_path = Eio.Path.(output_dir / "thumbnails" / filename) in 503 + (try 504 + Log.debug (fun m -> m " Source path: %s" (Eio.Path.native_exn src_path)); 505 + Log.debug (fun m -> m " Destination path: %s" (Eio.Path.native_exn dest_path)); 506 + let content = Eio.Path.load src_path in 507 + Eio.Path.save ~create:(`Or_truncate 0o644) dest_path content; 508 + Log.info (fun m -> m " Successfully copied thumbnail to: thumbnails/%s" filename); 509 + Some ("thumbnails/" ^ filename) 510 + with e -> 511 + Log.warn (fun m -> m " Failed to copy thumbnail for %s: %s" username (Printexc.to_string e)); 512 + None) 513 + | None -> 514 + Log.debug (fun m -> m " No thumbnail set for %s" username); 515 + None) 516 + | None -> 517 + Log.warn (fun m -> m " No Sortal contact found for username: %s" username); 518 + None 519 + in 520 + 521 + (* Helper to convert Atom entry to a simple record for HTML generation *) 522 + let entry_to_html_data username (entry : Syndic.Atom.entry) = 523 + let title = Text_extract.string_of_text_construct entry.title in 524 + let link = List.find_opt (fun (l : Syndic.Atom.link) -> 525 + l.rel = Syndic.Atom.Alternate 526 + ) entry.links in 527 + let link_uri = match link with 528 + | Some l -> Some l.href 529 + | None -> if List.length entry.links > 0 then Some (List.hd entry.links).href else None 530 + in 531 + let content_html = match entry.content with 532 + | Some (Syndic.Atom.Text s) -> s 533 + | Some (Syndic.Atom.Html (_, s)) -> s 534 + | Some (Syndic.Atom.Xhtml (_, nodes)) -> 535 + String.concat "" (List.map Syndic.XML.to_string nodes) 536 + | Some (Syndic.Atom.Mime _) | Some (Syndic.Atom.Src _) | None -> "" 537 + in 538 + let author, _ = entry.authors in 539 + let tags = List.map (fun (c : Syndic.Atom.category) -> c.term) entry.categories in 540 + (username, title, author.name, entry.updated, link_uri, content_html, tags) 541 + in 542 + 543 + (* Get all posts *) 544 + Log.info (fun m -> m "Retrieving all posts from state"); 545 + let all_posts = get_all_posts state () in 546 + let html_data = List.map (fun (username, entry) -> 547 + entry_to_html_data username entry 548 + ) all_posts in 549 + 550 + let unique_users = List.sort_uniq String.compare (List.map (fun (u, _, _, _, _, _, _) -> u) html_data) in 551 + Log.info (fun m -> m "Retrieved %d posts from %d users" (List.length html_data) (List.length unique_users)); 552 + Log.info (fun m -> m "Users: %s" (String.concat ", " unique_users)); 553 + 554 + (* Generate main index pages with pagination *) 555 + let total_posts = List.length html_data in 556 + let total_pages = (total_posts + posts_per_page - 1) / posts_per_page in 557 + Log.info (fun m -> m "Generating main index: %d posts across %d pages" total_posts total_pages); 558 + 559 + for page = 1 to total_pages do 560 + Log.info (fun m -> m " Generating index page %d/%d" page total_pages); 561 + let start_idx = (page - 1) * posts_per_page in 562 + let page_posts = List.filteri (fun i _ -> 563 + i >= start_idx && i < start_idx + posts_per_page 564 + ) html_data in 565 + 566 + let post_htmls = List.map (fun (username, title, author, date, link, content, tags) -> 567 + Log.debug (fun m -> m " Processing post: %s by %s (@%s)" title author username); 568 + (* Create a temporary Post-like structure for rendering *) 569 + (* We'll need to adapt this since we're working with Atom entries *) 570 + let post_html = 571 + let date_str = Format.Html.format_date date in 572 + let link_html = match link with 573 + | Some uri -> 574 + Printf.sprintf {|<a href="%s">%s</a>|} 575 + (Format.Html.html_escape (Uri.to_string uri)) 576 + (Format.Html.html_escape title) 577 + | None -> Format.Html.html_escape title 578 + in 579 + let excerpt = Format.Html.post_excerpt_from_html content ~max_length:300 in 580 + let full_content = Format.Html.full_content_from_html content in 581 + let tags_html = 582 + match tags with 583 + | [] -> "" 584 + | _ -> 585 + let tag_links = List.map (fun tag -> 586 + Printf.sprintf {|<a href="categories/%s.html">%s</a>|} 587 + (Format.Html.html_escape (sanitize_filename tag)) (Format.Html.html_escape tag) 588 + ) tags in 589 + Printf.sprintf {|<div class="post-tags">%s</div>|} 590 + (String.concat "" tag_links) 591 + in 592 + let thumbnail_html = match get_author_thumbnail username with 593 + | Some thumb_path -> 594 + Printf.sprintf {|<img src="%s" alt="%s" class="author-thumbnail">|} 595 + (Format.Html.html_escape thumb_path) 596 + (Format.Html.html_escape author) 597 + | None -> "" 598 + in 599 + Printf.sprintf {|<article class="post"> 600 + <h2 class="post-title">%s</h2> 601 + <div class="post-meta"> 602 + %s<div class="post-meta-text">By <a href="authors/%s.html">%s</a> on %s</div> 603 + </div> 604 + <div class="post-excerpt"> 605 + %s 606 + </div> 607 + <div class="post-full-content"> 608 + %s 609 + </div> 610 + <a href="#" class="read-more">Read more</a> 611 + %s 612 + </article>|} 613 + link_html 614 + thumbnail_html 615 + (Format.Html.html_escape (sanitize_filename username)) 616 + (Format.Html.html_escape author) 617 + date_str 618 + excerpt 619 + full_content 620 + tags_html 621 + in 622 + post_html 623 + ) page_posts in 624 + 625 + let page_html = Format.Html.render_posts_page 626 + ~title 627 + ~posts:post_htmls 628 + ~current_page:page 629 + ~total_pages 630 + ~base_path:"" 631 + ~nav_current:"posts" 632 + in 633 + 634 + let filename = if page = 1 then "index.html" 635 + else Printf.sprintf "page-%d.html" page in 636 + Eio.Path.save ~create:(`Or_truncate 0o644) 637 + Eio.Path.(output_dir / filename) 638 + page_html 639 + done; 640 + 641 + (* Generate author index *) 642 + Log.info (fun m -> m "Generating author index and pages"); 643 + let authors_map = Hashtbl.create 32 in 644 + List.iter (fun (username, _, author, _, _, _, _) -> 645 + let count = match Hashtbl.find_opt authors_map username with 646 + | Some (_, c) -> c + 1 647 + | None -> 1 648 + in 649 + Hashtbl.replace authors_map username (author, count) 650 + ) html_data; 651 + 652 + let authors_list = Hashtbl.fold (fun username (author, count) acc -> 653 + (username, author, count) :: acc 654 + ) authors_map [] |> List.sort (fun (_, a1, _) (_, a2, _) -> String.compare a1 a2) in 655 + 656 + Log.info (fun m -> m "Found %d authors" (List.length authors_list)); 657 + 658 + let authors_index_content = 659 + let items = List.map (fun (username, author, count) -> 660 + Printf.sprintf {|<li><a href="%s.html">%s</a><span class="count">%d post%s</span></li>|} 661 + (Format.Html.html_escape (sanitize_filename username)) 662 + (Format.Html.html_escape author) 663 + count 664 + (if count = 1 then "" else "s") 665 + ) authors_list in 666 + Printf.sprintf "<ul class=\"author-list\">\n%s\n</ul>" 667 + (String.concat "\n" items) 668 + in 669 + 670 + let authors_index_html = Format.Html.page_template 671 + ~title:(title ^ " - Authors") 672 + ~nav_current:"authors" 673 + authors_index_content 674 + in 675 + Eio.Path.save ~create:(`Or_truncate 0o644) 676 + Eio.Path.(output_dir / "authors" / "index.html") 677 + authors_index_html; 678 + 679 + (* Generate individual author pages *) 680 + Hashtbl.iter (fun username (author, _) -> 681 + let author_posts = List.filter (fun (u, _, _, _, _, _, _) -> u = username) html_data in 682 + let author_total = List.length author_posts in 683 + let author_pages = (author_total + posts_per_page - 1) / posts_per_page in 684 + Log.info (fun m -> m " Author: %s (@%s) - %d posts, %d pages" author username author_total author_pages); 685 + 686 + for page = 1 to author_pages do 687 + let start_idx = (page - 1) * posts_per_page in 688 + let page_posts = List.filteri (fun i _ -> 689 + i >= start_idx && i < start_idx + posts_per_page 690 + ) author_posts in 691 + 692 + let post_htmls = List.map (fun (_username, title, author, date, link, content, tags) -> 693 + let date_str = Format.Html.format_date date in 694 + let link_html = match link with 695 + | Some uri -> 696 + Printf.sprintf {|<a href="%s">%s</a>|} 697 + (Format.Html.html_escape (Uri.to_string uri)) 698 + (Format.Html.html_escape title) 699 + | None -> Format.Html.html_escape title 700 + in 701 + let excerpt = Format.Html.post_excerpt_from_html content ~max_length:300 in 702 + let full_content = Format.Html.full_content_from_html content in 703 + let tags_html = 704 + match tags with 705 + | [] -> "" 706 + | _ -> 707 + let tag_links = List.map (fun tag -> 708 + Printf.sprintf {|<a href="../categories/%s.html">%s</a>|} 709 + (Format.Html.html_escape (sanitize_filename tag)) (Format.Html.html_escape tag) 710 + ) tags in 711 + Printf.sprintf {|<div class="post-tags">%s</div>|} 712 + (String.concat "" tag_links) 713 + in 714 + Printf.sprintf {|<article class="post"> 715 + <h2 class="post-title">%s</h2> 716 + <div class="post-meta"> 717 + By %s on %s 718 + </div> 719 + <div class="post-excerpt"> 720 + %s 721 + </div> 722 + <div class="post-full-content"> 723 + %s 724 + </div> 725 + <a href="#" class="read-more">Read more</a> 726 + %s 727 + </article>|} 728 + link_html 729 + (Format.Html.html_escape author) 730 + date_str 731 + excerpt 732 + full_content 733 + tags_html 734 + ) page_posts in 735 + 736 + let page_html = Format.Html.render_posts_page 737 + ~title:(author ^ " - " ^ title) 738 + ~posts:post_htmls 739 + ~current_page:page 740 + ~total_pages:author_pages 741 + ~base_path:(sanitize_filename username ^ "-") 742 + ~nav_current:"authors" 743 + in 744 + 745 + let safe_username = sanitize_filename username in 746 + let filename = if page = 1 then safe_username ^ ".html" 747 + else Printf.sprintf "%s-%d.html" safe_username page in 748 + Eio.Path.save ~create:(`Or_truncate 0o644) 749 + Eio.Path.(output_dir / "authors" / filename) 750 + page_html 751 + done 752 + ) authors_map; 753 + 754 + (* Generate category index and pages *) 755 + Log.info (fun m -> m "Generating category index and pages"); 756 + let categories_map = Hashtbl.create 32 in 757 + List.iter (fun (_, _, _, _, _, _, tags) -> 758 + List.iter (fun tag -> 759 + let count = match Hashtbl.find_opt categories_map tag with 760 + | Some c -> c + 1 761 + | None -> 1 762 + in 763 + Hashtbl.replace categories_map tag count 764 + ) tags 765 + ) html_data; 766 + 767 + let categories_list = Hashtbl.fold (fun tag count acc -> 768 + (tag, count) :: acc 769 + ) categories_map [] |> List.sort (fun (t1, _) (t2, _) -> String.compare t1 t2) in 770 + 771 + Log.info (fun m -> m "Found %d categories" (List.length categories_list)); 772 + 773 + let categories_index_content = 774 + let items = List.map (fun (tag, count) -> 775 + Printf.sprintf {|<li><a href="%s.html">%s</a><span class="count">%d post%s</span></li>|} 776 + (Format.Html.html_escape (sanitize_filename tag)) 777 + (Format.Html.html_escape tag) 778 + count 779 + (if count = 1 then "" else "s") 780 + ) categories_list in 781 + Printf.sprintf "<ul class=\"category-list\">\n%s\n</ul>" 782 + (String.concat "\n" items) 783 + in 784 + 785 + let categories_index_html = Format.Html.page_template 786 + ~title:(title ^ " - Categories") 787 + ~nav_current:"categories" 788 + categories_index_content 789 + in 790 + Eio.Path.save ~create:(`Or_truncate 0o644) 791 + Eio.Path.(output_dir / "categories" / "index.html") 792 + categories_index_html; 793 + 794 + (* Generate individual category pages *) 795 + List.iter (fun (tag, count) -> 796 + let tag_posts = List.filter (fun (_, _, _, _, _, _, tags) -> 797 + List.mem tag tags 798 + ) html_data in 799 + 800 + let tag_total = List.length tag_posts in 801 + let tag_pages = (tag_total + posts_per_page - 1) / posts_per_page in 802 + Log.info (fun m -> m " Category: %s - %d posts, %d pages" tag count tag_pages); 803 + 804 + for page = 1 to tag_pages do 805 + let start_idx = (page - 1) * posts_per_page in 806 + let page_posts = List.filteri (fun i _ -> 807 + i >= start_idx && i < start_idx + posts_per_page 808 + ) tag_posts in 809 + 810 + let post_htmls = List.map (fun (username, title, author, date, link, content, tags) -> 811 + let date_str = Format.Html.format_date date in 812 + let link_html = match link with 813 + | Some uri -> 814 + Printf.sprintf {|<a href="%s">%s</a>|} 815 + (Format.Html.html_escape (Uri.to_string uri)) 816 + (Format.Html.html_escape title) 817 + | None -> Format.Html.html_escape title 818 + in 819 + let excerpt = Format.Html.post_excerpt_from_html content ~max_length:300 in 820 + let full_content = Format.Html.full_content_from_html content in 821 + let tags_html = 822 + match tags with 823 + | [] -> "" 824 + | _ -> 825 + let tag_links = List.map (fun t -> 826 + Printf.sprintf {|<a href="%s.html">%s</a>|} 827 + (Format.Html.html_escape (sanitize_filename t)) (Format.Html.html_escape t) 828 + ) tags in 829 + Printf.sprintf {|<div class="post-tags">%s</div>|} 830 + (String.concat "" tag_links) 831 + in 832 + Printf.sprintf {|<article class="post"> 833 + <h2 class="post-title">%s</h2> 834 + <div class="post-meta"> 835 + By <a href="../authors/%s.html">%s</a> on %s 836 + </div> 837 + <div class="post-excerpt"> 838 + %s 839 + </div> 840 + <div class="post-full-content"> 841 + %s 842 + </div> 843 + <a href="#" class="read-more">Read more</a> 844 + %s 845 + </article>|} 846 + link_html 847 + (Format.Html.html_escape (sanitize_filename username)) 848 + (Format.Html.html_escape author) 849 + date_str 850 + excerpt 851 + full_content 852 + tags_html 853 + ) page_posts in 854 + 855 + let page_html = Format.Html.render_posts_page 856 + ~title:(tag ^ " - " ^ title) 857 + ~posts:post_htmls 858 + ~current_page:page 859 + ~total_pages:tag_pages 860 + ~base_path:(sanitize_filename tag ^ "-") 861 + ~nav_current:"categories" 862 + in 863 + 864 + let safe_tag = sanitize_filename tag in 865 + let filename = if page = 1 then safe_tag ^ ".html" 866 + else Printf.sprintf "%s-%d.html" safe_tag page in 867 + Eio.Path.save ~create:(`Or_truncate 0o644) 868 + Eio.Path.(output_dir / "categories" / filename) 869 + page_html 870 + done 871 + ) categories_list; 872 + 873 + (* Generate links page *) 874 + Log.info (fun m -> m "Generating links page"); 875 + let all_links = List.concat_map (fun (username, title, author, date, post_link, content, _) -> 876 + let links = Html_markdown.extract_links content in 877 + List.map (fun (href, link_text) -> 878 + (href, link_text, username, author, title, post_link, date) 879 + ) links 880 + ) html_data in 881 + 882 + Log.info (fun m -> m " Extracted %d total links from all posts" (List.length all_links)); 883 + 884 + (* Group by URL and track most recent post date *) 885 + let links_map = Hashtbl.create 256 in 886 + List.iter (fun (href, link_text, username, author, post_title, post_link, date) -> 887 + let existing = Hashtbl.find_opt links_map href in 888 + let new_entry = (link_text, username, author, post_title, post_link, date) in 889 + match existing with 890 + | None -> Hashtbl.add links_map href [new_entry] 891 + | Some entries -> 892 + (* Add to list, will sort by date later *) 893 + Hashtbl.replace links_map href (new_entry :: entries) 894 + ) all_links; 895 + 896 + (* Sort links by most recent post date *) 897 + let sorted_links = Hashtbl.fold (fun href entries acc -> 898 + (* Get the most recent entry for this URL *) 899 + let sorted_entries = List.sort (fun (_, _, _, _, _, d1) (_, _, _, _, _, d2) -> 900 + Ptime.compare d2 d1 901 + ) entries in 902 + let most_recent = List.hd sorted_entries in 903 + (href, most_recent, entries) :: acc 904 + ) links_map [] |> List.sort (fun (_, (_, _, _, _, _, d1), _) (_, (_, _, _, _, _, d2), _) -> 905 + Ptime.compare d2 d1 906 + ) in 907 + 908 + Log.info (fun m -> m " Deduplicated to %d unique links" (List.length sorted_links)); 909 + 910 + let links_content = 911 + let items = List.map (fun (href, (link_text, username, author, post_title, post_link, date), all_entries) -> 912 + let date_str = Format.Html.format_date date in 913 + let display_text = if link_text = "" || link_text = href then href else link_text in 914 + let post_link_html = match post_link with 915 + | Some uri -> 916 + Printf.sprintf {|<a href="%s">%s</a>|} 917 + (Format.Html.html_escape (Uri.to_string uri)) 918 + (Format.Html.html_escape post_title) 919 + | None -> Format.Html.html_escape post_title 920 + in 921 + let count_str = if List.length all_entries > 1 then 922 + Printf.sprintf " (mentioned in %d posts)" (List.length all_entries) 923 + else "" 924 + in 925 + Printf.sprintf {|<div class="link-item"> 926 + <div class="link-url"><a href="%s">%s</a></div> 927 + <div class="link-meta">From %s by <a href="authors/%s.html">%s</a> on %s%s</div> 928 + </div>|} 929 + (Format.Html.html_escape href) 930 + (Format.Html.html_escape display_text) 931 + post_link_html 932 + (Format.Html.html_escape (sanitize_filename username)) 933 + (Format.Html.html_escape author) 934 + date_str 935 + count_str 936 + ) sorted_links in 937 + String.concat "\n" items 938 + in 939 + 940 + let links_html = Format.Html.page_template 941 + ~title:(title ^ " - Links") 942 + ~nav_current:"links" 943 + links_content 944 + in 945 + Eio.Path.save ~create:(`Or_truncate 0o644) 946 + Eio.Path.(output_dir / "links.html") 947 + links_html; 948 + 949 + Log.info (fun m -> m "HTML site generated successfully in %s" 950 + (Eio.Path.native_exn output_dir)); 951 + Ok () 952 + with e -> 953 + Error (Printf.sprintf "Failed to generate HTML site: %s" (Printexc.to_string e)) 432 954 433 955 let analyze_user_quality state ~username = 434 956 match Storage.get_user state username with
+19
stack/river/lib/state.mli
··· 117 117 @param format Output format 118 118 @param limit Optional maximum number of entries *) 119 119 120 + val export_html_site : 121 + t -> 122 + output_dir:Eio.Fs.dir_ty Eio.Path.t -> 123 + title:string -> 124 + ?posts_per_page:int -> 125 + unit -> 126 + (unit, string) result 127 + (** [export_html_site state ~output_dir ~title ()] exports a static HTML site. 128 + 129 + Generates a complete static site with: 130 + - Paginated post listings 131 + - Author index and individual author pages 132 + - Category index and individual category pages 133 + - Links page showing all outgoing links from posts 134 + 135 + @param output_dir Directory to write HTML files to 136 + @param title Site title 137 + @param posts_per_page Number of posts per page (default: 25) *) 138 + 120 139 (** {2 Analysis} *) 121 140 122 141 val analyze_user_quality :