Sync articles marked toread in kipclip to Crosspoint Reader (Xteink X4)
5
fork

Configure Feed

Select the types of activity you want to include in your feed.

at main 94 lines 3.0 kB view raw
1import { Readability } from "npm:@mozilla/readability@0.5.0"; 2import { parseHTML } from "npm:linkedom@0.18.9"; 3import { extractImageUrls, replaceImageUrls, sanitizeForXhtml } from "./html-sanitizer.ts"; 4import { type ProcessedImage, processImage } from "./image-processor.ts"; 5 6export interface ProcessedArticle { 7 title: string; 8 content: string; // sanitized XHTML with local image paths 9 excerpt?: string; 10 byline?: string; 11 images: ProcessedImage[]; 12} 13 14/** Fetch and clean an article URL using Mozilla Readability */ 15export async function processArticle( 16 url: string, 17 fallbackTitle?: string, 18 options?: { includeImages?: boolean }, 19): Promise<ProcessedArticle | null> { 20 const html = await fetchArticleHtml(url); 21 if (!html) return null; 22 23 const { document } = parseHTML(html); 24 // Set documentURI for Readability's URL resolution 25 Object.defineProperty(document, "documentURI", { value: url }); 26 const reader = new Readability(document); 27 const article = reader.parse(); 28 29 if (!article || !article.content) return null; 30 31 const includeImages = options?.includeImages ?? false; 32 33 // Sanitize HTML to valid XHTML (fix <picture>, void elements, unescaped &) 34 let content = sanitizeForXhtml(article.content, { 35 stripRemoteImages: !includeImages, 36 }); 37 38 // Optionally extract, download, and process images (grayscale + dither for e-ink) 39 const images: ProcessedImage[] = []; 40 if (options?.includeImages) { 41 const imageUrls = extractImageUrls(content); 42 const urlToPath = new Map<string, string>(); 43 44 for (let i = 0; i < imageUrls.length; i++) { 45 const processed = await processImage(imageUrls[i], i); 46 if (processed) { 47 images.push(processed); 48 urlToPath.set(imageUrls[i], `images/${processed.filename}`); 49 } 50 } 51 52 // Replace remote URLs with local EPUB paths 53 if (urlToPath.size > 0) { 54 content = replaceImageUrls(content, urlToPath); 55 } 56 } 57 58 return { 59 title: article.title || fallbackTitle || new URL(url).hostname, 60 content, 61 excerpt: article.excerpt || undefined, 62 byline: article.byline || undefined, 63 images, 64 }; 65} 66 67async function fetchArticleHtml(url: string): Promise<string | null> { 68 const controller = new AbortController(); 69 const timeout = setTimeout(() => controller.abort(), 15_000); 70 71 try { 72 const res = await fetch(url, { 73 signal: controller.signal, 74 headers: { 75 "User-Agent": "Mozilla/5.0 (compatible; KipclipSync/1.0; +https://kipclip.com)", 76 Accept: "text/html,application/xhtml+xml", 77 }, 78 }); 79 if (!res.ok) return null; 80 81 const contentType = res.headers.get("content-type") || ""; 82 if (!contentType.includes("html") && !contentType.includes("xhtml")) { 83 return null; 84 } 85 86 const text = await res.text(); 87 // Cap at 500KB to avoid processing enormous pages 88 return text.length > 512_000 ? text.slice(0, 512_000) : text; 89 } catch { 90 return null; 91 } finally { 92 clearTimeout(timeout); 93 } 94}