this repo has no description
3
fork

Configure Feed

Select the types of activity you want to include in your feed.

feat: add html parsing

+108 -8
+3 -8
src/features/services/check_hn.ts
··· 11 11 and, 12 12 isNull, 13 13 lt, 14 - gte, 15 14 notInArray, 16 15 not, 17 16 count, 18 17 inArray, 19 18 } from "drizzle-orm"; 20 - import { 21 - getNewStories, 22 - getItems, 23 - type Story, 24 - getTopStories, 25 - } from "../../libs/hackernews"; 19 + import { getItems, type Story, getTopStories } from "../../libs/hackernews"; 26 20 import { addDays } from "../../libs/time"; 27 21 import type { AnyMessageBlock } from "slack-edge"; 28 22 import { sqlite } from "../../libs/db"; 23 + import { htmlToSlackMarkdown } from "../../libs/slackmd"; 29 24 30 25 // Constants 31 26 const TOP_STORIES_LIMIT = 30; // Front page is considered the top 30 stories ··· 756 751 type: "section", 757 752 text: { 758 753 type: "mrkdwn", 759 - text: `*<${story.url || `https://news.ycombinator.com/item?id=${story.id}`}|${story.title}>*\n${story.text || ""}`, 754 + text: `*<${story.url || `https://news.ycombinator.com/item?id=${story.id}`}|${story.title}>*\n${htmlToSlackMarkdown(story.text || "")}`, 760 755 }, 761 756 }, 762 757 {
+105
src/libs/slackmd.ts
··· 1 + /** 2 + * Converts HTML to Slack Markdown format 3 + * Uses regular expressions instead of DOM manipulation for Node.js environments 4 + * @param html The HTML string to convert 5 + * @return The converted Slack Markdown string 6 + */ 7 + export function htmlToSlackMarkdown(html: string): string { 8 + // Basic tag replacements 9 + let markdown = html.trim(); 10 + 11 + // Handle paragraphs 12 + markdown = markdown.replace(/<p>(.*?)<\/p>/gs, "$1\n\n"); 13 + 14 + // Handle line breaks 15 + markdown = markdown.replace(/<br\s*\/?>/gi, "\n"); 16 + 17 + // Handle bold text 18 + markdown = markdown.replace(/<(strong|b)>(.*?)<\/(strong|b)>/gs, "*$2*"); 19 + 20 + // Handle italic text 21 + markdown = markdown.replace(/<(em|i)>(.*?)<\/(em|i)>/gs, "_$2_"); 22 + 23 + // Handle code blocks 24 + markdown = markdown.replace(/<code>(.*?)<\/code>/gs, "`$1`"); 25 + 26 + // Handle preformatted text 27 + markdown = markdown.replace(/<pre>(.*?)<\/pre>/gs, "```\n$1\n```\n"); 28 + 29 + // Handle blockquotes 30 + markdown = markdown.replace( 31 + /<blockquote>(.*?)<\/blockquote>/gs, 32 + (_match, content: string) => { 33 + return ( 34 + content 35 + .split("\n") 36 + .map((line: string) => "> " + line) 37 + .join("\n") + "\n" 38 + ); 39 + }, 40 + ); 41 + 42 + // Handle unordered lists 43 + markdown = markdown.replace(/<ul>(.*?)<\/ul>/gs, "$1\n"); 44 + 45 + // Handle ordered lists 46 + markdown = markdown.replace(/<ol>(.*?)<\/ol>/gs, "$1\n"); 47 + 48 + // Handle list items 49 + // We need to track if we're in an ordered list and what number to use 50 + let listItemCounter = 0; 51 + let inOrderedList = false; 52 + 53 + markdown = markdown.replace( 54 + /<(ul|ol|li)(?:\s[^>]*)?>|<\/(ul|ol|li)>/gs, 55 + (match) => { 56 + if (match.startsWith("</ul>") || match.startsWith("</ol>")) { 57 + inOrderedList = false; 58 + listItemCounter = 0; 59 + return ""; 60 + } else if (match.startsWith("<ul")) { 61 + inOrderedList = false; 62 + return ""; 63 + } else if (match.startsWith("<ol")) { 64 + inOrderedList = true; 65 + listItemCounter = 0; 66 + return ""; 67 + } else if (match.startsWith("<li")) { 68 + if (inOrderedList) { 69 + listItemCounter++; 70 + return `${listItemCounter}. `; 71 + } else { 72 + return "• "; 73 + } 74 + } else if (match.startsWith("</li>")) { 75 + return "\n"; 76 + } 77 + return ""; 78 + }, 79 + ); 80 + 81 + // Handle links 82 + markdown = markdown.replace( 83 + /<a\s+(?:[^>]*?\s+)?href="([^"]*)"(?:\s+[^>]*)?>(.*?)<\/a>/gs, 84 + (_match, href, text) => { 85 + return `<${href}|${text}>`; 86 + }, 87 + ); 88 + 89 + // Handle headings (h1-h6) 90 + markdown = markdown.replace(/<h[1-6]>(.*?)<\/h[1-6]>/gs, "*$1*\n\n"); 91 + 92 + // Handle horizontal rules 93 + markdown = markdown.replace(/<hr\s*\/?>/gi, "---\n"); 94 + 95 + // Remove all image tags 96 + markdown = markdown.replace(/<img\s+[^>]*>/g, ""); 97 + 98 + // Clean up any remaining HTML tags 99 + markdown = markdown.replace(/<[^>]+>/g, ""); 100 + 101 + // Fix multiple line breaks 102 + markdown = markdown.replace(/\n{3,}/g, "\n\n"); 103 + 104 + return markdown; 105 + }