feat: new Reddit thread extraction with unsupported UI (not LLM) · ellioth.co/summarizer-extension@583c944

+92 -5

popup/popup.js

·································

+5 -3

scripts/background.js

··· 303 303 } 304 304 305 305 // Readability must load before content.js (content.js is not bundled with it). 306 + // reddit-new.js registers window.__webaiTryRedditNew for new-Reddit thread extraction. 306 307 await chrome.scripting.executeScript({ 307 308 target: { tabId }, 308 - files: ["scripts/Readability.js", "scripts/content.js"], 309 + files: ["scripts/Readability.js", "scripts/reddit-new.js", "scripts/content.js"], 309 310 }); 310 311 311 312 const response = await chrome.tabs.sendMessage(tabId, { action: "extract" }); 312 - if (!response || !response.content) { 313 + if (!response) { 313 314 return { content: "", wasTruncated: false }; 314 315 } 315 316 316 317 return { 317 - content: response.content, 318 + content: response.content ?? "", 318 319 wasTruncated: Boolean(response.wasTruncated), 319 320 extractionSource: response.extractionSource ?? "unknown", 321 + unsupportedReason: response.unsupportedReason ?? null, 320 322 }; 321 323 } 322 324

+24

scripts/content.js

··· 522 522 ); 523 523 } 524 524 525 + function tryRedditShredditExtract() { 526 + if (typeof window.__webaiTryRedditNew !== "function") { 527 + return null; 528 + } 529 + try { 530 + return window.__webaiTryRedditNew(MAX_LENGTH); 531 + } catch (err) { 532 + console.error("[Summarizer] Reddit extract error:", err); 533 + return null; 534 + } 535 + } 536 + 525 537 function extractContent() { 538 + const redditResult = tryRedditShredditExtract(); 539 + if (redditResult) { 540 + logExtractionDebug(redditResult.extractionSource, redditResult.text); 541 + return { 542 + text: redditResult.text, 543 + wasTruncated: Boolean(redditResult.wasTruncated), 544 + extractionSource: redditResult.extractionSource, 545 + unsupportedReason: redditResult.unsupportedReason, 546 + }; 547 + } 548 + 526 549 try { 527 550 const readability = extractWithReadability(); 528 551 const rbLen = readability.text.trim().length; ··· 628 651 content: result.text, 629 652 wasTruncated: result.wasTruncated, 630 653 extractionSource: result.extractionSource, 654 + unsupportedReason: result.unsupportedReason, 631 655 }); 632 656 } 633 657 return true;

+195

scripts/reddit-new.js

··· 1 + // New Reddit (shreddit) discussion-thread extraction for content scripts. 2 + // Loaded before scripts/content.js. Exposes window.__webaiTryRedditNew(maxLength). 3 + 4 + (function () { 5 + "use strict"; 6 + 7 + /** @type {Set<string>} */ 8 + const NEW_REDDIT_HOSTS = new Set([ 9 + "www.reddit.com", 10 + "reddit.com", 11 + "new.reddit.com", 12 + "m.reddit.com", 13 + "np.reddit.com", 14 + ]); 15 + 16 + const DISCUSSION_PATH_SUB = /^\/r\/[^/]+\/comments\/[^/]+/; 17 + const DISCUSSION_PATH_SHORT = /^\/comments\/[^/]+/; 18 + 19 + const MAX_COMMENTS = 90; 20 + 21 + function isNewRedditHost(host) { 22 + return NEW_REDDIT_HOSTS.has(String(host || "").toLowerCase()); 23 + } 24 + 25 + function isDiscussionPath(pathname) { 26 + const p = pathname || ""; 27 + return DISCUSSION_PATH_SUB.test(p) || DISCUSSION_PATH_SHORT.test(p); 28 + } 29 + 30 + function cleanText(s) { 31 + if (!s || typeof s !== "string") return ""; 32 + return s.replace(/\s+/g, " ").trim(); 33 + } 34 + 35 + function shouldSkipCommentBody(text) { 36 + if (!text || text.length < 2) return true; 37 + if (/^(\d+\s*)?(more replies|more reply)\.?$/i.test(text)) return true; 38 + return false; 39 + } 40 + 41 + /** 42 + * @param {number} maxLength 43 + * @returns {{ text: string, wasTruncated: boolean, extractionSource: string, unsupportedReason?: string } | null} 44 + */ 45 + function tryRedditNew(maxLength) { 46 + let host = ""; 47 + try { 48 + host = String(window.location.hostname || "").toLowerCase(); 49 + } catch (e) { 50 + return null; 51 + } 52 + 53 + if (!host || !host.includes("reddit.com")) { 54 + return null; 55 + } 56 + 57 + if (host === "old.reddit.com") { 58 + return { 59 + text: "", 60 + wasTruncated: false, 61 + extractionSource: "reddit-unsupported", 62 + unsupportedReason: "old-reddit", 63 + }; 64 + } 65 + 66 + if (!isNewRedditHost(host)) { 67 + return null; 68 + } 69 + 70 + let path = ""; 71 + try { 72 + path = window.location.pathname || ""; 73 + } catch (e2) { 74 + return null; 75 + } 76 + 77 + if (!isDiscussionPath(path)) { 78 + return { 79 + text: "", 80 + wasTruncated: false, 81 + extractionSource: "reddit-unsupported", 82 + unsupportedReason: "non-discussion", 83 + }; 84 + } 85 + 86 + const post = document.querySelector( 87 + 'shreddit-post[view-context="CommentsPage"]', 88 + ); 89 + if (!post) { 90 + return null; 91 + } 92 + 93 + const postId = post.getAttribute("id") || ""; 94 + const title = 95 + cleanText(post.getAttribute("post-title") || "") || 96 + cleanText( 97 + post.querySelector('h1[slot="title"]')?.textContent || "", 98 + ); 99 + const sub = cleanText(post.getAttribute("subreddit-prefixed-name") || ""); 100 + const author = cleanText(post.getAttribute("author") || ""); 101 + const score = cleanText(post.getAttribute("score") || ""); 102 + const commentCount = cleanText(post.getAttribute("comment-count") || ""); 103 + 104 + let bodyText = ""; 105 + if (postId) { 106 + const bodyEl = document.getElementById(`${postId}-post-rtjson-content`); 107 + bodyText = cleanText(bodyEl?.textContent || ""); 108 + } 109 + if (!bodyText) { 110 + const fallback = post.querySelector("shreddit-post-text-body [id$='-post-rtjson-content']"); 111 + bodyText = cleanText(fallback?.textContent || ""); 112 + } 113 + 114 + const tree = document.querySelector("shreddit-comment-tree#comment-tree"); 115 + const commentEls = tree 116 + ? Array.from(tree.querySelectorAll("shreddit-comment")) 117 + : []; 118 + 119 + let out = ""; 120 + let wasTruncated = false; 121 + 122 + function add(s) { 123 + if (wasTruncated) return false; 124 + const chunk = s == null ? "" : String(s); 125 + if (out.length + chunk.length <= maxLength) { 126 + out += chunk; 127 + return true; 128 + } 129 + wasTruncated = true; 130 + const room = maxLength - out.length; 131 + if (room > 20) { 132 + out += chunk.slice(0, room).trimEnd() + "…"; 133 + } 134 + return false; 135 + } 136 + 137 + const header = 138 + "Reddit thread (new Reddit)\n" + 139 + (sub ? `Subreddit: ${sub}\n` : "") + 140 + (title ? `Title: ${title}\n` : "") + 141 + (author ? `Author: u/${author}\n` : "") + 142 + (score ? `Post score: ${score}\n` : "") + 143 + (commentCount ? `Comment count (thread): ${commentCount}\n` : "") + 144 + `URL: ${window.location.href}\n`; 145 + 146 + add(header); 147 + add("\n--- Post ---\n"); 148 + 149 + if (bodyText) { 150 + add(bodyText); 151 + } else { 152 + add("(No post body text — link/media post or content not loaded.)"); 153 + } 154 + 155 + add("\n\n--- Comments (in page order) ---\n"); 156 + 157 + let n = 0; 158 + for (let i = 0; i < commentEls.length && n < MAX_COMMENTS; i++) { 159 + const el = commentEls[i]; 160 + const depth = parseInt(el.getAttribute("depth") || "0", 10) || 0; 161 + const cAuthor = cleanText(el.getAttribute("author") || ""); 162 + const cScore = cleanText(el.getAttribute("score") || ""); 163 + 164 + const bodyEl = 165 + el.querySelector("[id$='-comment-rtjson-content']") || 166 + el.querySelector('[slot="comment"]'); 167 + let cText = cleanText(bodyEl?.textContent || ""); 168 + if (shouldSkipCommentBody(cText)) continue; 169 + 170 + n++; 171 + const indent = " ".repeat(Math.min(depth, 8)); 172 + const block = 173 + `\n[${n}] u/${cAuthor || "?"} · score ${cScore || "?"} · depth ${depth}\n` + 174 + indent + 175 + cText.replace(/\n/g, "\n" + indent) + 176 + "\n"; 177 + 178 + if (!add(block)) { 179 + break; 180 + } 181 + } 182 + 183 + if (commentEls.length === 0) { 184 + add("(No comments found in the DOM — they may still be loading.)"); 185 + } 186 + 187 + return { 188 + text: out.trim(), 189 + wasTruncated, 190 + extractionSource: "reddit-thread", 191 + }; 192 + } 193 + 194 + window.__webaiTryRedditNew = tryRedditNew; 195 + })();

Configure Feed

Configure Feed