···580580 Log.err (fun m -> m "Failed to export merged feed: %s" err);
581581 1
582582 ) $ format_arg $ title_arg $ limit_arg)
583583+584584+let html =
585585+ let output_dir_arg =
586586+ let doc = "Output directory for HTML site" in
587587+ Arg.(required & pos 0 (some string) None & info [] ~docv:"OUTPUT_DIR" ~doc)
588588+ in
589589+ let title_arg =
590590+ let doc = "Site title" in
591591+ Arg.(value & opt string "River Feed" & info ["title"; "t"] ~doc)
592592+ in
593593+ let posts_per_page_arg =
594594+ let doc = "Number of posts per page (default: 25)" in
595595+ Arg.(value & opt int 25 & info ["posts-per-page"; "p"] ~doc)
596596+ in
597597+ Term.(const (fun output_dir_str title posts_per_page env _xdg _profile ->
598598+ let state = River.State.create env ~app_name:"river" in
599599+ let output_dir = Eio.Path.(env#fs / output_dir_str) in
600600+ match River.State.export_html_site state ~output_dir ~title ~posts_per_page () with
601601+ | Ok () ->
602602+ Log.info (fun m -> m "HTML site generated in %s" output_dir_str);
603603+ 0
604604+ | Error err ->
605605+ Log.err (fun m -> m "Failed to generate HTML site: %s" err);
606606+ 1
607607+ ) $ output_dir_arg $ title_arg $ posts_per_page_arg)
608608+583609let main_cmd =
584610 let doc = "River feed management CLI" in
585611 let main_info = Cmd.info "river-cli" ~version:"1.0" ~doc in
···615641 ~service:"river"
616642 merge
617643 in
618618- Cmd.group main_info [user_cmd; sync_cmd; list_cmd; info_cmd; merge_cmd]
644644+ let html_cmd =
645645+ Eiocmd.run
646646+ ~use_keyeio:false
647647+ ~info:(Cmd.info "html" ~doc:"Generate a static HTML site from all feeds")
648648+ ~app_name:"river"
649649+ ~service:"river"
650650+ html
651651+ in
652652+ Cmd.group main_info [user_cmd; sync_cmd; list_cmd; info_cmd; merge_cmd; html_cmd]
···5454 Log.debug (fun m -> m "Successfully parsed as JSONFeed");
5555 Json jsonfeed
5656 | Error err ->
5757- Log.debug (fun m -> m "Not a JSONFeed: %s" (Jsont.Error.to_string err));
5757+ let err_str = Jsont.Error.to_string err in
5858+ Log.debug (fun m -> m "Not a JSONFeed: %s" err_str);
5859 (* Fall through to XML parsing *)
5959- failwith "Not a valid JSONFeed"
6060+ failwith (Printf.sprintf "Not a valid JSONFeed: %s" err_str)
6061 ) else (
6162 (* Try XML formats *)
6263 try
···110111 failwith (Printf.sprintf "HTTP %d: %s" status truncated_msg)
111112 in
112113113113- let content = classify_feed ~xmlbase response in
114114+ let content =
115115+ try classify_feed ~xmlbase response
116116+ with Failure msg ->
117117+ Log.err (fun m -> m "Failed to parse feed '%s' (%s): %s"
118118+ (Source.name source) (Source.url source) msg);
119119+ raise (Failure msg)
120120+ in
114121 let title =
115122 match content with
116123 | Atom atom -> Text_extract.string_of_text_construct atom.Syndic.Atom.title
+653-1
stack/river/lib/format.ml
···4343 | None -> Ptime.of_float_s (Unix.gettimeofday ()) |> Option.get
4444 | Some d -> d
4545 in
4646- Syndic.Atom.entry ~content ~contributors ~links ~id ~authors ~title ~updated
4646+ let categories =
4747+ List.map (fun tag -> Syndic.Atom.category tag) (Post.tags post)
4848+ in
4949+ Syndic.Atom.entry ~content ~contributors ~links ~id ~authors ~title ~updated ~categories
4750 ()
48514952 let entries_of_posts posts = List.map entry_of_post posts
···137140 | Feed.Json jf -> Some jf
138141 | _ -> None
139142end
143143+144144+module Html = struct
145145+ (** HTML static site generation. *)
146146+147147+ let css = {|
148148+* { margin: 0; padding: 0; box-sizing: border-box; }
149149+150150+body {
151151+ font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Helvetica, Arial, sans-serif;
152152+ line-height: 1.5;
153153+ color: #333;
154154+ background: #fff;
155155+ max-width: 800px;
156156+ margin: 0 auto;
157157+ padding: 15px;
158158+}
159159+160160+header {
161161+ border-bottom: 1px solid #e1e4e8;
162162+ padding-bottom: 10px;
163163+ margin-bottom: 20px;
164164+}
165165+166166+header h1 {
167167+ font-size: 22px;
168168+ font-weight: 600;
169169+ margin-bottom: 6px;
170170+}
171171+172172+header h1 a {
173173+ color: #333;
174174+ text-decoration: none;
175175+}
176176+177177+nav {
178178+ font-size: 13px;
179179+}
180180+181181+nav a {
182182+ color: #586069;
183183+ text-decoration: none;
184184+ margin-right: 12px;
185185+}
186186+187187+nav a:hover {
188188+ color: #0366d6;
189189+}
190190+191191+.post {
192192+ margin-bottom: 25px;
193193+ padding-bottom: 20px;
194194+ border-bottom: 1px solid #e1e4e8;
195195+}
196196+197197+.post:last-child {
198198+ border-bottom: none;
199199+}
200200+201201+.post-title {
202202+ font-size: 18px;
203203+ font-weight: 600;
204204+ margin-bottom: 5px;
205205+ line-height: 1.3;
206206+}
207207+208208+.post-title a {
209209+ color: #0366d6;
210210+ text-decoration: none;
211211+}
212212+213213+.post-title a:hover {
214214+ text-decoration: underline;
215215+}
216216+217217+.post-meta {
218218+ font-size: 12px;
219219+ color: #586069;
220220+ margin-bottom: 8px;
221221+ display: flex;
222222+ align-items: center;
223223+ gap: 8px;
224224+}
225225+226226+.post-meta a {
227227+ color: #586069;
228228+ text-decoration: none;
229229+}
230230+231231+.post-meta a:hover {
232232+ color: #0366d6;
233233+}
234234+235235+.author-thumbnail {
236236+ width: 24px;
237237+ height: 24px;
238238+ border-radius: 50%;
239239+ object-fit: cover;
240240+}
241241+242242+.post-meta-text {
243243+ flex: 1;
244244+}
245245+246246+.post-excerpt {
247247+ font-size: 14px;
248248+ color: #24292e;
249249+ line-height: 1.5;
250250+}
251251+252252+.post-excerpt p {
253253+ margin-bottom: 8px;
254254+}
255255+256256+.post-excerpt ul, .post-excerpt ol {
257257+ margin-left: 20px;
258258+ margin-bottom: 8px;
259259+}
260260+261261+.post-excerpt li {
262262+ margin-bottom: 3px;
263263+}
264264+265265+.post-excerpt code {
266266+ background: #f6f8fa;
267267+ padding: 2px 4px;
268268+ border-radius: 3px;
269269+ font-size: 13px;
270270+}
271271+272272+.post-excerpt img {
273273+ float: right;
274274+ width: 35%;
275275+ max-width: 300px;
276276+ margin: 0 0 10px 15px;
277277+ border-radius: 4px;
278278+ cursor: pointer;
279279+ transition: opacity 0.2s;
280280+}
281281+282282+.post-excerpt img:hover {
283283+ opacity: 0.9;
284284+}
285285+286286+@media (max-width: 600px) {
287287+ .post-excerpt img {
288288+ float: none;
289289+ width: 100%;
290290+ max-width: 100%;
291291+ margin: 10px 0;
292292+ }
293293+}
294294+295295+.lightbox {
296296+ display: none;
297297+ position: fixed;
298298+ top: 0;
299299+ left: 0;
300300+ width: 100%;
301301+ height: 100%;
302302+ background: rgba(0, 0, 0, 0.9);
303303+ z-index: 1000;
304304+ cursor: pointer;
305305+ align-items: center;
306306+ justify-content: center;
307307+}
308308+309309+.lightbox.active {
310310+ display: flex;
311311+}
312312+313313+.lightbox img {
314314+ max-width: 95%;
315315+ max-height: 95%;
316316+ object-fit: contain;
317317+}
318318+319319+.post-full-content {
320320+ display: none;
321321+ font-size: 14px;
322322+ color: #24292e;
323323+ line-height: 1.5;
324324+ margin-top: 10px;
325325+}
326326+327327+.post-full-content.active {
328328+ display: block;
329329+}
330330+331331+.post-full-content p {
332332+ margin-bottom: 10px;
333333+}
334334+335335+.post-full-content ul, .post-full-content ol {
336336+ margin-left: 20px;
337337+ margin-bottom: 10px;
338338+}
339339+340340+.post-full-content li {
341341+ margin-bottom: 4px;
342342+}
343343+344344+.post-full-content h1, .post-full-content h2, .post-full-content h3 {
345345+ margin-top: 15px;
346346+ margin-bottom: 8px;
347347+}
348348+349349+.post-full-content h1 {
350350+ font-size: 18px;
351351+ font-weight: 600;
352352+}
353353+354354+.post-full-content h2 {
355355+ font-size: 16px;
356356+ font-weight: 600;
357357+}
358358+359359+.post-full-content h3 {
360360+ font-size: 15px;
361361+ font-weight: 600;
362362+}
363363+364364+.post-full-content code {
365365+ background: #f6f8fa;
366366+ padding: 2px 4px;
367367+ border-radius: 3px;
368368+ font-size: 13px;
369369+}
370370+371371+.post-full-content pre {
372372+ background: #f6f8fa;
373373+ padding: 10px;
374374+ border-radius: 4px;
375375+ overflow-x: auto;
376376+ margin-bottom: 10px;
377377+}
378378+379379+.post-full-content pre code {
380380+ background: none;
381381+ padding: 0;
382382+}
383383+384384+.post-full-content blockquote {
385385+ border-left: 3px solid #e1e4e8;
386386+ padding-left: 12px;
387387+ margin: 10px 0;
388388+ color: #586069;
389389+}
390390+391391+.post-full-content img {
392392+ max-width: 100%;
393393+ height: auto;
394394+ margin: 10px 0;
395395+ border-radius: 4px;
396396+}
397397+398398+.read-more {
399399+ display: inline-block;
400400+ color: #0366d6;
401401+ font-size: 13px;
402402+ cursor: pointer;
403403+ text-decoration: none;
404404+ margin-top: 8px;
405405+ padding: 4px 8px;
406406+ border: 1px solid #e1e4e8;
407407+ border-radius: 3px;
408408+ background: #f6f8fa;
409409+ transition: background 0.2s;
410410+}
411411+412412+.read-more:hover {
413413+ background: #e1e4e8;
414414+}
415415+416416+.read-more::after {
417417+ content: ' ▼';
418418+ font-size: 10px;
419419+}
420420+421421+.read-more.active::after {
422422+ content: ' ▲';
423423+}
424424+425425+.post-tags {
426426+ margin-top: 8px;
427427+ font-size: 11px;
428428+ clear: both;
429429+}
430430+431431+.post-tags a {
432432+ display: inline-block;
433433+ background: #f1f8ff;
434434+ color: #0366d6;
435435+ padding: 2px 6px;
436436+ border-radius: 3px;
437437+ text-decoration: none;
438438+ margin-right: 4px;
439439+ margin-bottom: 4px;
440440+}
441441+442442+.post-tags a:hover {
443443+ background: #dbedff;
444444+}
445445+446446+.pagination {
447447+ margin-top: 30px;
448448+ padding-top: 15px;
449449+ border-top: 1px solid #e1e4e8;
450450+ text-align: center;
451451+ font-size: 13px;
452452+}
453453+454454+.pagination a {
455455+ color: #0366d6;
456456+ text-decoration: none;
457457+ margin: 0 8px;
458458+}
459459+460460+.pagination a:hover {
461461+ text-decoration: underline;
462462+}
463463+464464+.pagination .current {
465465+ color: #24292e;
466466+ font-weight: 600;
467467+}
468468+469469+.link-item {
470470+ margin-bottom: 15px;
471471+ padding-bottom: 12px;
472472+ border-bottom: 1px solid #e1e4e8;
473473+}
474474+475475+.link-item:last-child {
476476+ border-bottom: none;
477477+}
478478+479479+.link-url {
480480+ font-size: 14px;
481481+ margin-bottom: 3px;
482482+}
483483+484484+.link-url a {
485485+ color: #0366d6;
486486+ text-decoration: none;
487487+ word-break: break-all;
488488+}
489489+490490+.link-url a:hover {
491491+ text-decoration: underline;
492492+}
493493+494494+.link-meta {
495495+ font-size: 11px;
496496+ color: #586069;
497497+}
498498+499499+.link-meta a {
500500+ color: #586069;
501501+ text-decoration: none;
502502+}
503503+504504+.link-meta a:hover {
505505+ color: #0366d6;
506506+}
507507+508508+.author-list, .category-list {
509509+ list-style: none;
510510+}
511511+512512+.author-list li, .category-list li {
513513+ margin-bottom: 12px;
514514+ padding-bottom: 12px;
515515+ border-bottom: 1px solid #e1e4e8;
516516+}
517517+518518+.author-list li:last-child, .category-list li:last-child {
519519+ border-bottom: none;
520520+}
521521+522522+.author-list a, .category-list a {
523523+ color: #0366d6;
524524+ text-decoration: none;
525525+ font-size: 15px;
526526+}
527527+528528+.author-list a:hover, .category-list a:hover {
529529+ text-decoration: underline;
530530+}
531531+532532+.count {
533533+ color: #586069;
534534+ font-size: 12px;
535535+ margin-left: 6px;
536536+}
537537+538538+footer {
539539+ margin-top: 40px;
540540+ padding-top: 15px;
541541+ border-top: 1px solid #e1e4e8;
542542+ text-align: center;
543543+ font-size: 11px;
544544+ color: #586069;
545545+}
546546+|}
547547+548548+ let html_escape s =
549549+ let buf = Buffer.create (String.length s) in
550550+ String.iter (function
551551+ | '<' -> Buffer.add_string buf "<"
552552+ | '>' -> Buffer.add_string buf ">"
553553+ | '&' -> Buffer.add_string buf "&"
554554+ | '"' -> Buffer.add_string buf """
555555+ | '\'' -> Buffer.add_string buf "'"
556556+ | c -> Buffer.add_char buf c
557557+ ) s;
558558+ Buffer.contents buf
559559+560560+ let format_date date =
561561+ let open Unix in
562562+ let tm = gmtime (Ptime.to_float_s date) in
563563+ let months = [|"January"; "February"; "March"; "April"; "May"; "June";
564564+ "July"; "August"; "September"; "October"; "November"; "December"|] in
565565+ Printf.sprintf "%s %d, %d" months.(tm.tm_mon) tm.tm_mday (1900 + tm.tm_year)
566566+567567+ let page_template ~title ~nav_current content =
568568+ Printf.sprintf {|<!DOCTYPE html>
569569+<html lang="en">
570570+<head>
571571+ <meta charset="UTF-8">
572572+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
573573+ <title>%s</title>
574574+ <style>%s</style>
575575+</head>
576576+<body>
577577+ <header>
578578+ <h1><a href="index.html">River Feed</a></h1>
579579+ <nav>
580580+ <a href="index.html"%s>Posts</a>
581581+ <a href="authors/index.html"%s>Authors</a>
582582+ <a href="categories/index.html"%s>Categories</a>
583583+ <a href="links.html"%s>Links</a>
584584+ </nav>
585585+ </header>
586586+ <main>
587587+%s
588588+ </main>
589589+ <footer>
590590+ Generated by River Feed Aggregator
591591+ </footer>
592592+ <div class="lightbox" id="lightbox">
593593+ <img id="lightbox-img" src="" alt="">
594594+ </div>
595595+ <script>
596596+ (function() {
597597+ const lightbox = document.getElementById('lightbox');
598598+ const lightboxImg = document.getElementById('lightbox-img');
599599+600600+ // Add click handler to all images in excerpts and full content
601601+ document.addEventListener('click', function(e) {
602602+ if (e.target.tagName === 'IMG' && (e.target.closest('.post-excerpt') || e.target.closest('.post-full-content'))) {
603603+ e.preventDefault();
604604+ lightboxImg.src = e.target.src;
605605+ lightboxImg.alt = e.target.alt;
606606+ lightbox.classList.add('active');
607607+ }
608608+ });
609609+610610+ // Close lightbox on click
611611+ lightbox.addEventListener('click', function() {
612612+ lightbox.classList.remove('active');
613613+ lightboxImg.src = '';
614614+ });
615615+616616+ // Close on escape key
617617+ document.addEventListener('keydown', function(e) {
618618+ if (e.key === 'Escape' && lightbox.classList.contains('active')) {
619619+ lightbox.classList.remove('active');
620620+ lightboxImg.src = '';
621621+ }
622622+ });
623623+624624+ // Read more toggle
625625+ document.addEventListener('click', function(e) {
626626+ if (e.target.classList.contains('read-more')) {
627627+ e.preventDefault();
628628+ const post = e.target.closest('.post');
629629+ const fullContent = post.querySelector('.post-full-content');
630630+ const excerpt = post.querySelector('.post-excerpt');
631631+632632+ if (fullContent.classList.contains('active')) {
633633+ fullContent.classList.remove('active');
634634+ excerpt.style.display = 'block';
635635+ e.target.textContent = 'Read more';
636636+ e.target.classList.remove('active');
637637+ } else {
638638+ fullContent.classList.add('active');
639639+ excerpt.style.display = 'none';
640640+ e.target.textContent = 'Show less';
641641+ e.target.classList.add('active');
642642+ }
643643+ }
644644+ });
645645+ })();
646646+ </script>
647647+</body>
648648+</html>|}
649649+ (html_escape title)
650650+ css
651651+ (if nav_current = "posts" then " class=\"current\"" else "")
652652+ (if nav_current = "authors" then " class=\"current\"" else "")
653653+ (if nav_current = "categories" then " class=\"current\"" else "")
654654+ (if nav_current = "links" then " class=\"current\"" else "")
655655+ content
656656+657657+ let pagination_html ~current_page ~total_pages ~base_path =
658658+ if total_pages <= 1 then ""
659659+ else
660660+ let prev = if current_page > 1 then
661661+ let prev_page = current_page - 1 in
662662+ let href = if prev_page = 1 then base_path ^ "index.html"
663663+ else Printf.sprintf "%spage-%d.html" base_path prev_page in
664664+ Printf.sprintf {|<a href="%s">← Previous</a>|} href
665665+ else ""
666666+ in
667667+ let next = if current_page < total_pages then
668668+ Printf.sprintf {|<a href="%spage-%d.html">Next →</a>|} base_path (current_page + 1)
669669+ else ""
670670+ in
671671+ let pages =
672672+ let buf = Buffer.create 256 in
673673+ for i = 1 to total_pages do
674674+ if i = current_page then
675675+ Buffer.add_string buf (Printf.sprintf {| <span class="current">%d</span>|} i)
676676+ else
677677+ let href = if i = 1 then base_path ^ "index.html"
678678+ else Printf.sprintf "%spage-%d.html" base_path i in
679679+ Buffer.add_string buf (Printf.sprintf {| <a href="%s">%d</a>|} href i)
680680+ done;
681681+ Buffer.contents buf
682682+ in
683683+ Printf.sprintf {|<div class="pagination">%s%s%s</div>|} prev pages next
684684+685685+ let full_content_from_html html_content =
686686+ (* Convert HTML to markdown then to clean HTML using Cmarkit *)
687687+ let markdown = Html_markdown.html_to_markdown html_content in
688688+ let doc = Cmarkit.Doc.of_string markdown in
689689+ Cmarkit_html.of_doc ~safe:true doc
690690+691691+ let post_excerpt_from_html html_content ~max_length =
692692+ (* Convert HTML to markdown for excerpt *)
693693+ let markdown = Html_markdown.html_to_markdown html_content in
694694+ (* Find paragraph break after max_length *)
695695+ let excerpt_md =
696696+ if String.length markdown <= max_length then markdown
697697+ else
698698+ (* Look for double newline (paragraph break) after max_length *)
699699+ let start_search = min max_length (String.length markdown - 1) in
700700+ let rec find_para_break pos =
701701+ if pos >= String.length markdown - 1 then
702702+ String.length markdown
703703+ else if pos < String.length markdown - 1 &&
704704+ markdown.[pos] = '\n' && markdown.[pos + 1] = '\n' then
705705+ pos
706706+ else
707707+ find_para_break (pos + 1)
708708+ in
709709+ let break_pos = find_para_break start_search in
710710+ let truncated = String.sub markdown 0 break_pos in
711711+ if break_pos < String.length markdown then
712712+ truncated ^ "..."
713713+ else
714714+ truncated
715715+ in
716716+ (* Convert markdown back to HTML using Cmarkit with custom renderer *)
717717+ let doc = Cmarkit.Doc.of_string excerpt_md in
718718+719719+ (* Custom renderer that makes headings smaller and inline *)
720720+ let inline_headings =
721721+ let block c = function
722722+ | Cmarkit.Block.Heading (h, _) ->
723723+ let level = Cmarkit.Block.Heading.level h in
724724+ let inline = Cmarkit.Block.Heading.inline h in
725725+ (* Render heading as a strong tag with smaller font *)
726726+ let style = match level with
727727+ | 1 -> "font-size: 15px; font-weight: 600;"
728728+ | 2 -> "font-size: 14px; font-weight: 600;"
729729+ | _ -> "font-size: 14px; font-weight: 500;"
730730+ in
731731+ Cmarkit_renderer.Context.string c (Printf.sprintf "<strong style=\"%s\">" style);
732732+ Cmarkit_renderer.Context.inline c inline;
733733+ Cmarkit_renderer.Context.string c "</strong> ";
734734+ true
735735+ | _ -> false
736736+ in
737737+ Cmarkit_renderer.make ~block ()
738738+ in
739739+740740+ let renderer = Cmarkit_renderer.compose (Cmarkit_html.renderer ~safe:true ()) inline_headings in
741741+ Cmarkit_renderer.doc_to_string renderer doc
742742+743743+ let render_post_html ~post ~author_username =
744744+ let title = Post.title post in
745745+ let author = Post.author post in
746746+ let date_str = match Post.date post with
747747+ | Some d -> format_date d
748748+ | None -> "No date"
749749+ in
750750+ let link_html = match Post.link post with
751751+ | Some uri ->
752752+ Printf.sprintf {|<a href="%s">%s</a>|}
753753+ (html_escape (Uri.to_string uri))
754754+ (html_escape title)
755755+ | None -> html_escape title
756756+ in
757757+ let excerpt = post_excerpt_from_html (Post.content post) ~max_length:300 in
758758+ let tags_html =
759759+ match Post.tags post with
760760+ | [] -> ""
761761+ | tags ->
762762+ let tag_links = List.map (fun tag ->
763763+ Printf.sprintf {|<a href="../categories/%s.html">%s</a>|}
764764+ (html_escape tag) (html_escape tag)
765765+ ) tags in
766766+ Printf.sprintf {|<div class="post-tags">%s</div>|}
767767+ (String.concat "" tag_links)
768768+ in
769769+ Printf.sprintf {|<article class="post">
770770+ <h2 class="post-title">%s</h2>
771771+ <div class="post-meta">
772772+ By <a href="../authors/%s.html">%s</a> on %s
773773+ </div>
774774+ <div class="post-excerpt">
775775+%s
776776+ </div>
777777+%s
778778+</article>|}
779779+ link_html
780780+ (html_escape author_username)
781781+ (html_escape author)
782782+ date_str
783783+ excerpt
784784+ tags_html
785785+786786+ let render_posts_page ~title ~posts ~current_page ~total_pages ~base_path ~nav_current =
787787+ let posts_html = String.concat "\n" posts in
788788+ let pagination = pagination_html ~current_page ~total_pages ~base_path in
789789+ let content = posts_html ^ "\n" ^ pagination in
790790+ page_template ~title ~nav_current content
791791+end
+54
stack/river/lib/format.mli
···101101102102 Returns None if the feed is not JSONFeed. *)
103103end
104104+105105+module Html : sig
106106+ (** HTML static site generation. *)
107107+108108+ val format_date : Ptime.t -> string
109109+ (** [format_date date] formats a date in human-readable format (e.g., "November 23, 2025"). *)
110110+111111+ val html_escape : string -> string
112112+ (** [html_escape s] escapes HTML special characters in string. *)
113113+114114+ val full_content_from_html : string -> string
115115+ (** [full_content_from_html html_content] converts HTML content to clean markdown-derived HTML.
116116+117117+ @param html_content The HTML content to convert *)
118118+119119+ val post_excerpt_from_html : string -> max_length:int -> string
120120+ (** [post_excerpt_from_html html_content ~max_length] generates an excerpt from HTML content.
121121+122122+ Converts HTML to markdown, truncates to max_length, and converts back to simple HTML.
123123+124124+ @param html_content The HTML content to excerpt
125125+ @param max_length Maximum length of the excerpt in characters *)
126126+127127+ val render_post_html : post:Post.t -> author_username:string -> string
128128+ (** [render_post_html ~post ~author_username] renders a single post as HTML.
129129+130130+ @param post The post to render
131131+ @param author_username The username of the author (for linking) *)
132132+133133+ val render_posts_page :
134134+ title:string ->
135135+ posts:string list ->
136136+ current_page:int ->
137137+ total_pages:int ->
138138+ base_path:string ->
139139+ nav_current:string ->
140140+ string
141141+ (** [render_posts_page ~title ~posts ~current_page ~total_pages ~base_path ~nav_current]
142142+ renders a complete HTML page with posts and pagination.
143143+144144+ @param title Page title
145145+ @param posts List of pre-rendered post HTML strings
146146+ @param current_page Current page number (1-indexed)
147147+ @param total_pages Total number of pages
148148+ @param base_path Base path for pagination links (e.g., "" for root, "authors/" for author pages)
149149+ @param nav_current Which nav item is current ("posts", "authors", "categories", "links") *)
150150+151151+ val page_template : title:string -> nav_current:string -> string -> string
152152+ (** [page_template ~title ~nav_current content] wraps content in the HTML page template.
153153+154154+ @param title Page title
155155+ @param nav_current Which nav item is current
156156+ @param content The main content HTML *)
157157+end
+9-2
stack/river/lib/html_markdown.ml
···2929 Soup.fold (fun acc link ->
3030 match Soup.attribute "href" link with
3131 | Some href ->
3232- let text = Soup.texts link |> String.concat "" |> String.trim in
3333- (href, text) :: acc
3232+ (* Filter out local anchors and only include absolute external URLs *)
3333+ let uri = Uri.of_string href in
3434+ let is_absolute = Uri.scheme uri <> None in
3535+ let is_local_anchor = String.starts_with ~prefix:"#" href in
3636+ if is_absolute && not is_local_anchor then
3737+ let text = Soup.texts link |> String.concat "" |> String.trim in
3838+ (href, text) :: acc
3939+ else
4040+ acc
3441 | None -> acc
3542 ) [] links
3643 |> List.rev
+19
stack/river/lib/river.mli
···419419 @param format Output format
420420 @param limit Optional maximum number of entries *)
421421422422+ val export_html_site :
423423+ t ->
424424+ output_dir:Eio.Fs.dir_ty Eio.Path.t ->
425425+ title:string ->
426426+ ?posts_per_page:int ->
427427+ unit ->
428428+ (unit, string) result
429429+ (** [export_html_site state ~output_dir ~title ()] exports a static HTML site.
430430+431431+ Generates a complete static site with:
432432+ - Paginated post listings
433433+ - Author index and individual author pages
434434+ - Category index and individual category pages
435435+ - Links page showing all outgoing links from posts
436436+437437+ @param output_dir Directory to write HTML files to
438438+ @param title Site title
439439+ @param posts_per_page Number of posts per page (default: 25) *)
440440+422441 (** {2 Analysis} *)
423442424443 val analyze_user_quality :
+527-5
stack/river/lib/state.ml
···207207 let fetched_feeds =
208208 Eio.Fiber.List.filter_map (fun source ->
209209 try
210210- Log.info (fun m -> m " Fetching %s (%s)..."
211211- (Source.name source) (Source.url source));
210210+ Log.info (fun m -> m " [%s] Fetching %s (%s)..."
211211+ username (Source.name source) (Source.url source));
212212 Some (Feed.fetch session source)
213213 with e ->
214214- Log.err (fun m -> m " Failed to fetch %s: %s"
215215- (Source.name source) (Printexc.to_string e));
214214+ Log.err (fun m -> m " [%s] Failed to fetch %s: %s"
215215+ username (Source.name source) (Printexc.to_string e));
216216 None
217217 ) (User.feeds user)
218218 in
···414414415415let export_merged_feed state ~title ~format ?limit () =
416416 let all_posts = get_all_posts state ?limit () in
417417- let entries = List.map snd all_posts in
417417+418418+ (* Rewrite author metadata from Sortal user info *)
419419+ let rewrite_entry_author username (entry : Syndic.Atom.entry) =
420420+ match Storage.get_user state username with
421421+ | None -> entry
422422+ | Some user ->
423423+ (* Get user's full name and email from Sortal *)
424424+ let fullname = User.fullname user in
425425+ let email = User.email user in
426426+ let username = User.username user in
427427+428428+ (* Create new author with Sortal information *)
429429+ let new_author =
430430+ match email with
431431+ | Some email_addr ->
432432+ Syndic.Atom.author ~email:email_addr ~uri:(Uri.of_string ("https://" ^ username)) fullname
433433+ | None ->
434434+ Syndic.Atom.author ~uri:(Uri.of_string ("https://" ^ username)) fullname
435435+ in
436436+437437+ (* Update entry with new author, keeping existing contributors *)
438438+ let _, other_authors = entry.authors in
439439+ { entry with authors = (new_author, other_authors) }
440440+ in
441441+442442+ let entries = List.map (fun (username, entry) ->
443443+ rewrite_entry_author username entry
444444+ ) all_posts in
418445419446 match format with
420447 | `Atom ->
···429456 | Error err -> Error (Printf.sprintf "Failed to serialize JSON Feed: %s" (Jsont.Error.to_string err))
430457 else
431458 Export.export_jsonfeed ~title entries
459459+460460+let export_html_site state ~output_dir ~title ?(posts_per_page = 25) () =
461461+ try
462462+ Log.info (fun m -> m "=== Starting HTML site generation ===");
463463+ Log.info (fun m -> m "Output directory: %s" (Eio.Path.native_exn output_dir));
464464+ Log.info (fun m -> m "Site title: %s" title);
465465+ Log.info (fun m -> m "Posts per page: %d" posts_per_page);
466466+467467+ (* Sanitize a string for use in filenames - replace unsafe characters *)
468468+ let sanitize_filename s =
469469+ let buf = Buffer.create (String.length s) in
470470+ String.iter (fun c ->
471471+ match c with
472472+ | '/' | '\\' | ':' | '*' | '?' | '"' | '<' | '>' | '|' -> Buffer.add_char buf '-'
473473+ | ' ' -> Buffer.add_char buf '-'
474474+ | c -> Buffer.add_char buf c
475475+ ) s;
476476+ Buffer.contents buf
477477+ in
478478+479479+ (* Create directory structure *)
480480+ Log.info (fun m -> m "Creating directory structure");
481481+ let mkdir_if_not_exists dir =
482482+ try Eio.Path.mkdir ~perm:0o755 dir
483483+ with Eio.Io (Eio.Fs.E (Already_exists _), _) -> ()
484484+ in
485485+ mkdir_if_not_exists output_dir;
486486+ mkdir_if_not_exists Eio.Path.(output_dir / "authors");
487487+ mkdir_if_not_exists Eio.Path.(output_dir / "categories");
488488+ mkdir_if_not_exists Eio.Path.(output_dir / "thumbnails");
489489+ Log.info (fun m -> m "Directory structure created");
490490+491491+ (* Helper to get and copy author thumbnail *)
492492+ let get_author_thumbnail username =
493493+ Log.debug (fun m -> m "Looking up thumbnail for username: %s" username);
494494+ match Sortal.lookup state.sortal username with
495495+ | Some contact ->
496496+ Log.debug (fun m -> m " Found Sortal contact for %s: %s" username (Sortal.Contact.name contact));
497497+ (match Sortal.thumbnail_path state.sortal contact with
498498+ | Some src_path ->
499499+ Log.info (fun m -> m " Copying thumbnail for %s from: %s" username (Eio.Path.native_exn src_path));
500500+ (* Copy thumbnail to output directory *)
501501+ let filename = Filename.basename (Eio.Path.native_exn src_path) in
502502+ let dest_path = Eio.Path.(output_dir / "thumbnails" / filename) in
503503+ (try
504504+ Log.debug (fun m -> m " Source path: %s" (Eio.Path.native_exn src_path));
505505+ Log.debug (fun m -> m " Destination path: %s" (Eio.Path.native_exn dest_path));
506506+ let content = Eio.Path.load src_path in
507507+ Eio.Path.save ~create:(`Or_truncate 0o644) dest_path content;
508508+ Log.info (fun m -> m " Successfully copied thumbnail to: thumbnails/%s" filename);
509509+ Some ("thumbnails/" ^ filename)
510510+ with e ->
511511+ Log.warn (fun m -> m " Failed to copy thumbnail for %s: %s" username (Printexc.to_string e));
512512+ None)
513513+ | None ->
514514+ Log.debug (fun m -> m " No thumbnail set for %s" username);
515515+ None)
516516+ | None ->
517517+ Log.warn (fun m -> m " No Sortal contact found for username: %s" username);
518518+ None
519519+ in
520520+521521+ (* Helper to convert Atom entry to a simple record for HTML generation *)
522522+ let entry_to_html_data username (entry : Syndic.Atom.entry) =
523523+ let title = Text_extract.string_of_text_construct entry.title in
524524+ let link = List.find_opt (fun (l : Syndic.Atom.link) ->
525525+ l.rel = Syndic.Atom.Alternate
526526+ ) entry.links in
527527+ let link_uri = match link with
528528+ | Some l -> Some l.href
529529+ | None -> if List.length entry.links > 0 then Some (List.hd entry.links).href else None
530530+ in
531531+ let content_html = match entry.content with
532532+ | Some (Syndic.Atom.Text s) -> s
533533+ | Some (Syndic.Atom.Html (_, s)) -> s
534534+ | Some (Syndic.Atom.Xhtml (_, nodes)) ->
535535+ String.concat "" (List.map Syndic.XML.to_string nodes)
536536+ | Some (Syndic.Atom.Mime _) | Some (Syndic.Atom.Src _) | None -> ""
537537+ in
538538+ let author, _ = entry.authors in
539539+ let tags = List.map (fun (c : Syndic.Atom.category) -> c.term) entry.categories in
540540+ (username, title, author.name, entry.updated, link_uri, content_html, tags)
541541+ in
542542+543543+ (* Get all posts *)
544544+ Log.info (fun m -> m "Retrieving all posts from state");
545545+ let all_posts = get_all_posts state () in
546546+ let html_data = List.map (fun (username, entry) ->
547547+ entry_to_html_data username entry
548548+ ) all_posts in
549549+550550+ let unique_users = List.sort_uniq String.compare (List.map (fun (u, _, _, _, _, _, _) -> u) html_data) in
551551+ Log.info (fun m -> m "Retrieved %d posts from %d users" (List.length html_data) (List.length unique_users));
552552+ Log.info (fun m -> m "Users: %s" (String.concat ", " unique_users));
553553+554554+ (* Generate main index pages with pagination *)
555555+ let total_posts = List.length html_data in
556556+ let total_pages = (total_posts + posts_per_page - 1) / posts_per_page in
557557+ Log.info (fun m -> m "Generating main index: %d posts across %d pages" total_posts total_pages);
558558+559559+ for page = 1 to total_pages do
560560+ Log.info (fun m -> m " Generating index page %d/%d" page total_pages);
561561+ let start_idx = (page - 1) * posts_per_page in
562562+ let page_posts = List.filteri (fun i _ ->
563563+ i >= start_idx && i < start_idx + posts_per_page
564564+ ) html_data in
565565+566566+ let post_htmls = List.map (fun (username, title, author, date, link, content, tags) ->
567567+ Log.debug (fun m -> m " Processing post: %s by %s (@%s)" title author username);
568568+ (* Create a temporary Post-like structure for rendering *)
569569+ (* We'll need to adapt this since we're working with Atom entries *)
570570+ let post_html =
571571+ let date_str = Format.Html.format_date date in
572572+ let link_html = match link with
573573+ | Some uri ->
574574+ Printf.sprintf {|<a href="%s">%s</a>|}
575575+ (Format.Html.html_escape (Uri.to_string uri))
576576+ (Format.Html.html_escape title)
577577+ | None -> Format.Html.html_escape title
578578+ in
579579+ let excerpt = Format.Html.post_excerpt_from_html content ~max_length:300 in
580580+ let full_content = Format.Html.full_content_from_html content in
581581+ let tags_html =
582582+ match tags with
583583+ | [] -> ""
584584+ | _ ->
585585+ let tag_links = List.map (fun tag ->
586586+ Printf.sprintf {|<a href="categories/%s.html">%s</a>|}
587587+ (Format.Html.html_escape (sanitize_filename tag)) (Format.Html.html_escape tag)
588588+ ) tags in
589589+ Printf.sprintf {|<div class="post-tags">%s</div>|}
590590+ (String.concat "" tag_links)
591591+ in
592592+ let thumbnail_html = match get_author_thumbnail username with
593593+ | Some thumb_path ->
594594+ Printf.sprintf {|<img src="%s" alt="%s" class="author-thumbnail">|}
595595+ (Format.Html.html_escape thumb_path)
596596+ (Format.Html.html_escape author)
597597+ | None -> ""
598598+ in
599599+ Printf.sprintf {|<article class="post">
600600+ <h2 class="post-title">%s</h2>
601601+ <div class="post-meta">
602602+ %s<div class="post-meta-text">By <a href="authors/%s.html">%s</a> on %s</div>
603603+ </div>
604604+ <div class="post-excerpt">
605605+%s
606606+ </div>
607607+ <div class="post-full-content">
608608+%s
609609+ </div>
610610+ <a href="#" class="read-more">Read more</a>
611611+%s
612612+</article>|}
613613+ link_html
614614+ thumbnail_html
615615+ (Format.Html.html_escape (sanitize_filename username))
616616+ (Format.Html.html_escape author)
617617+ date_str
618618+ excerpt
619619+ full_content
620620+ tags_html
621621+ in
622622+ post_html
623623+ ) page_posts in
624624+625625+ let page_html = Format.Html.render_posts_page
626626+ ~title
627627+ ~posts:post_htmls
628628+ ~current_page:page
629629+ ~total_pages
630630+ ~base_path:""
631631+ ~nav_current:"posts"
632632+ in
633633+634634+ let filename = if page = 1 then "index.html"
635635+ else Printf.sprintf "page-%d.html" page in
636636+ Eio.Path.save ~create:(`Or_truncate 0o644)
637637+ Eio.Path.(output_dir / filename)
638638+ page_html
639639+ done;
640640+641641+ (* Generate author index *)
642642+ Log.info (fun m -> m "Generating author index and pages");
643643+ let authors_map = Hashtbl.create 32 in
644644+ List.iter (fun (username, _, author, _, _, _, _) ->
645645+ let count = match Hashtbl.find_opt authors_map username with
646646+ | Some (_, c) -> c + 1
647647+ | None -> 1
648648+ in
649649+ Hashtbl.replace authors_map username (author, count)
650650+ ) html_data;
651651+652652+ let authors_list = Hashtbl.fold (fun username (author, count) acc ->
653653+ (username, author, count) :: acc
654654+ ) authors_map [] |> List.sort (fun (_, a1, _) (_, a2, _) -> String.compare a1 a2) in
655655+656656+ Log.info (fun m -> m "Found %d authors" (List.length authors_list));
657657+658658+ let authors_index_content =
659659+ let items = List.map (fun (username, author, count) ->
660660+ Printf.sprintf {|<li><a href="%s.html">%s</a><span class="count">%d post%s</span></li>|}
661661+ (Format.Html.html_escape (sanitize_filename username))
662662+ (Format.Html.html_escape author)
663663+ count
664664+ (if count = 1 then "" else "s")
665665+ ) authors_list in
666666+ Printf.sprintf "<ul class=\"author-list\">\n%s\n</ul>"
667667+ (String.concat "\n" items)
668668+ in
669669+670670+ let authors_index_html = Format.Html.page_template
671671+ ~title:(title ^ " - Authors")
672672+ ~nav_current:"authors"
673673+ authors_index_content
674674+ in
675675+ Eio.Path.save ~create:(`Or_truncate 0o644)
676676+ Eio.Path.(output_dir / "authors" / "index.html")
677677+ authors_index_html;
678678+679679+ (* Generate individual author pages *)
680680+ Hashtbl.iter (fun username (author, _) ->
681681+ let author_posts = List.filter (fun (u, _, _, _, _, _, _) -> u = username) html_data in
682682+ let author_total = List.length author_posts in
683683+ let author_pages = (author_total + posts_per_page - 1) / posts_per_page in
684684+ Log.info (fun m -> m " Author: %s (@%s) - %d posts, %d pages" author username author_total author_pages);
685685+686686+ for page = 1 to author_pages do
687687+ let start_idx = (page - 1) * posts_per_page in
688688+ let page_posts = List.filteri (fun i _ ->
689689+ i >= start_idx && i < start_idx + posts_per_page
690690+ ) author_posts in
691691+692692+ let post_htmls = List.map (fun (_username, title, author, date, link, content, tags) ->
693693+ let date_str = Format.Html.format_date date in
694694+ let link_html = match link with
695695+ | Some uri ->
696696+ Printf.sprintf {|<a href="%s">%s</a>|}
697697+ (Format.Html.html_escape (Uri.to_string uri))
698698+ (Format.Html.html_escape title)
699699+ | None -> Format.Html.html_escape title
700700+ in
701701+ let excerpt = Format.Html.post_excerpt_from_html content ~max_length:300 in
702702+ let full_content = Format.Html.full_content_from_html content in
703703+ let tags_html =
704704+ match tags with
705705+ | [] -> ""
706706+ | _ ->
707707+ let tag_links = List.map (fun tag ->
708708+ Printf.sprintf {|<a href="../categories/%s.html">%s</a>|}
709709+ (Format.Html.html_escape (sanitize_filename tag)) (Format.Html.html_escape tag)
710710+ ) tags in
711711+ Printf.sprintf {|<div class="post-tags">%s</div>|}
712712+ (String.concat "" tag_links)
713713+ in
714714+ Printf.sprintf {|<article class="post">
715715+ <h2 class="post-title">%s</h2>
716716+ <div class="post-meta">
717717+ By %s on %s
718718+ </div>
719719+ <div class="post-excerpt">
720720+%s
721721+ </div>
722722+ <div class="post-full-content">
723723+%s
724724+ </div>
725725+ <a href="#" class="read-more">Read more</a>
726726+%s
727727+</article>|}
728728+ link_html
729729+ (Format.Html.html_escape author)
730730+ date_str
731731+ excerpt
732732+ full_content
733733+ tags_html
734734+ ) page_posts in
735735+736736+ let page_html = Format.Html.render_posts_page
737737+ ~title:(author ^ " - " ^ title)
738738+ ~posts:post_htmls
739739+ ~current_page:page
740740+ ~total_pages:author_pages
741741+ ~base_path:(sanitize_filename username ^ "-")
742742+ ~nav_current:"authors"
743743+ in
744744+745745+ let safe_username = sanitize_filename username in
746746+ let filename = if page = 1 then safe_username ^ ".html"
747747+ else Printf.sprintf "%s-%d.html" safe_username page in
748748+ Eio.Path.save ~create:(`Or_truncate 0o644)
749749+ Eio.Path.(output_dir / "authors" / filename)
750750+ page_html
751751+ done
752752+ ) authors_map;
753753+754754+ (* Generate category index and pages *)
755755+ Log.info (fun m -> m "Generating category index and pages");
756756+ let categories_map = Hashtbl.create 32 in
757757+ List.iter (fun (_, _, _, _, _, _, tags) ->
758758+ List.iter (fun tag ->
759759+ let count = match Hashtbl.find_opt categories_map tag with
760760+ | Some c -> c + 1
761761+ | None -> 1
762762+ in
763763+ Hashtbl.replace categories_map tag count
764764+ ) tags
765765+ ) html_data;
766766+767767+ let categories_list = Hashtbl.fold (fun tag count acc ->
768768+ (tag, count) :: acc
769769+ ) categories_map [] |> List.sort (fun (t1, _) (t2, _) -> String.compare t1 t2) in
770770+771771+ Log.info (fun m -> m "Found %d categories" (List.length categories_list));
772772+773773+ let categories_index_content =
774774+ let items = List.map (fun (tag, count) ->
775775+ Printf.sprintf {|<li><a href="%s.html">%s</a><span class="count">%d post%s</span></li>|}
776776+ (Format.Html.html_escape (sanitize_filename tag))
777777+ (Format.Html.html_escape tag)
778778+ count
779779+ (if count = 1 then "" else "s")
780780+ ) categories_list in
781781+ Printf.sprintf "<ul class=\"category-list\">\n%s\n</ul>"
782782+ (String.concat "\n" items)
783783+ in
784784+785785+ let categories_index_html = Format.Html.page_template
786786+ ~title:(title ^ " - Categories")
787787+ ~nav_current:"categories"
788788+ categories_index_content
789789+ in
790790+ Eio.Path.save ~create:(`Or_truncate 0o644)
791791+ Eio.Path.(output_dir / "categories" / "index.html")
792792+ categories_index_html;
793793+794794+ (* Generate individual category pages *)
795795+ List.iter (fun (tag, count) ->
796796+ let tag_posts = List.filter (fun (_, _, _, _, _, _, tags) ->
797797+ List.mem tag tags
798798+ ) html_data in
799799+800800+ let tag_total = List.length tag_posts in
801801+ let tag_pages = (tag_total + posts_per_page - 1) / posts_per_page in
802802+ Log.info (fun m -> m " Category: %s - %d posts, %d pages" tag count tag_pages);
803803+804804+ for page = 1 to tag_pages do
805805+ let start_idx = (page - 1) * posts_per_page in
806806+ let page_posts = List.filteri (fun i _ ->
807807+ i >= start_idx && i < start_idx + posts_per_page
808808+ ) tag_posts in
809809+810810+ let post_htmls = List.map (fun (username, title, author, date, link, content, tags) ->
811811+ let date_str = Format.Html.format_date date in
812812+ let link_html = match link with
813813+ | Some uri ->
814814+ Printf.sprintf {|<a href="%s">%s</a>|}
815815+ (Format.Html.html_escape (Uri.to_string uri))
816816+ (Format.Html.html_escape title)
817817+ | None -> Format.Html.html_escape title
818818+ in
819819+ let excerpt = Format.Html.post_excerpt_from_html content ~max_length:300 in
820820+ let full_content = Format.Html.full_content_from_html content in
821821+ let tags_html =
822822+ match tags with
823823+ | [] -> ""
824824+ | _ ->
825825+ let tag_links = List.map (fun t ->
826826+ Printf.sprintf {|<a href="%s.html">%s</a>|}
827827+ (Format.Html.html_escape (sanitize_filename t)) (Format.Html.html_escape t)
828828+ ) tags in
829829+ Printf.sprintf {|<div class="post-tags">%s</div>|}
830830+ (String.concat "" tag_links)
831831+ in
832832+ Printf.sprintf {|<article class="post">
833833+ <h2 class="post-title">%s</h2>
834834+ <div class="post-meta">
835835+ By <a href="../authors/%s.html">%s</a> on %s
836836+ </div>
837837+ <div class="post-excerpt">
838838+%s
839839+ </div>
840840+ <div class="post-full-content">
841841+%s
842842+ </div>
843843+ <a href="#" class="read-more">Read more</a>
844844+%s
845845+</article>|}
846846+ link_html
847847+ (Format.Html.html_escape (sanitize_filename username))
848848+ (Format.Html.html_escape author)
849849+ date_str
850850+ excerpt
851851+ full_content
852852+ tags_html
853853+ ) page_posts in
854854+855855+ let page_html = Format.Html.render_posts_page
856856+ ~title:(tag ^ " - " ^ title)
857857+ ~posts:post_htmls
858858+ ~current_page:page
859859+ ~total_pages:tag_pages
860860+ ~base_path:(sanitize_filename tag ^ "-")
861861+ ~nav_current:"categories"
862862+ in
863863+864864+ let safe_tag = sanitize_filename tag in
865865+ let filename = if page = 1 then safe_tag ^ ".html"
866866+ else Printf.sprintf "%s-%d.html" safe_tag page in
867867+ Eio.Path.save ~create:(`Or_truncate 0o644)
868868+ Eio.Path.(output_dir / "categories" / filename)
869869+ page_html
870870+ done
871871+ ) categories_list;
872872+873873+ (* Generate links page *)
874874+ Log.info (fun m -> m "Generating links page");
875875+ let all_links = List.concat_map (fun (username, title, author, date, post_link, content, _) ->
876876+ let links = Html_markdown.extract_links content in
877877+ List.map (fun (href, link_text) ->
878878+ (href, link_text, username, author, title, post_link, date)
879879+ ) links
880880+ ) html_data in
881881+882882+ Log.info (fun m -> m " Extracted %d total links from all posts" (List.length all_links));
883883+884884+ (* Group by URL and track most recent post date *)
885885+ let links_map = Hashtbl.create 256 in
886886+ List.iter (fun (href, link_text, username, author, post_title, post_link, date) ->
887887+ let existing = Hashtbl.find_opt links_map href in
888888+ let new_entry = (link_text, username, author, post_title, post_link, date) in
889889+ match existing with
890890+ | None -> Hashtbl.add links_map href [new_entry]
891891+ | Some entries ->
892892+ (* Add to list, will sort by date later *)
893893+ Hashtbl.replace links_map href (new_entry :: entries)
894894+ ) all_links;
895895+896896+ (* Sort links by most recent post date *)
897897+ let sorted_links = Hashtbl.fold (fun href entries acc ->
898898+ (* Get the most recent entry for this URL *)
899899+ let sorted_entries = List.sort (fun (_, _, _, _, _, d1) (_, _, _, _, _, d2) ->
900900+ Ptime.compare d2 d1
901901+ ) entries in
902902+ let most_recent = List.hd sorted_entries in
903903+ (href, most_recent, entries) :: acc
904904+ ) links_map [] |> List.sort (fun (_, (_, _, _, _, _, d1), _) (_, (_, _, _, _, _, d2), _) ->
905905+ Ptime.compare d2 d1
906906+ ) in
907907+908908+ Log.info (fun m -> m " Deduplicated to %d unique links" (List.length sorted_links));
909909+910910+ let links_content =
911911+ let items = List.map (fun (href, (link_text, username, author, post_title, post_link, date), all_entries) ->
912912+ let date_str = Format.Html.format_date date in
913913+ let display_text = if link_text = "" || link_text = href then href else link_text in
914914+ let post_link_html = match post_link with
915915+ | Some uri ->
916916+ Printf.sprintf {|<a href="%s">%s</a>|}
917917+ (Format.Html.html_escape (Uri.to_string uri))
918918+ (Format.Html.html_escape post_title)
919919+ | None -> Format.Html.html_escape post_title
920920+ in
921921+ let count_str = if List.length all_entries > 1 then
922922+ Printf.sprintf " (mentioned in %d posts)" (List.length all_entries)
923923+ else ""
924924+ in
925925+ Printf.sprintf {|<div class="link-item">
926926+ <div class="link-url"><a href="%s">%s</a></div>
927927+ <div class="link-meta">From %s by <a href="authors/%s.html">%s</a> on %s%s</div>
928928+</div>|}
929929+ (Format.Html.html_escape href)
930930+ (Format.Html.html_escape display_text)
931931+ post_link_html
932932+ (Format.Html.html_escape (sanitize_filename username))
933933+ (Format.Html.html_escape author)
934934+ date_str
935935+ count_str
936936+ ) sorted_links in
937937+ String.concat "\n" items
938938+ in
939939+940940+ let links_html = Format.Html.page_template
941941+ ~title:(title ^ " - Links")
942942+ ~nav_current:"links"
943943+ links_content
944944+ in
945945+ Eio.Path.save ~create:(`Or_truncate 0o644)
946946+ Eio.Path.(output_dir / "links.html")
947947+ links_html;
948948+949949+ Log.info (fun m -> m "HTML site generated successfully in %s"
950950+ (Eio.Path.native_exn output_dir));
951951+ Ok ()
952952+ with e ->
953953+ Error (Printf.sprintf "Failed to generate HTML site: %s" (Printexc.to_string e))
432954433955let analyze_user_quality state ~username =
434956 match Storage.get_user state username with
+19
stack/river/lib/state.mli
···117117 @param format Output format
118118 @param limit Optional maximum number of entries *)
119119120120+val export_html_site :
121121+ t ->
122122+ output_dir:Eio.Fs.dir_ty Eio.Path.t ->
123123+ title:string ->
124124+ ?posts_per_page:int ->
125125+ unit ->
126126+ (unit, string) result
127127+(** [export_html_site state ~output_dir ~title ()] exports a static HTML site.
128128+129129+ Generates a complete static site with:
130130+ - Paginated post listings
131131+ - Author index and individual author pages
132132+ - Category index and individual category pages
133133+ - Links page showing all outgoing links from posts
134134+135135+ @param output_dir Directory to write HTML files to
136136+ @param title Site title
137137+ @param posts_per_page Number of posts per page (default: 25) *)
138138+120139(** {2 Analysis} *)
121140122141val analyze_user_quality :