A browser extension that lets you summarize any webpage and ask questions using AI.
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

feat: new Reddit thread extraction with unsupported UI (not LLM)

+316 -8
+92 -5
popup/popup.js
··· 13 13 let contentWasTruncated = false; // Track if content was truncated during extraction 14 14 /** TEMP: remove after testing — "readability" | "legacy" | "unknown" */ 15 15 let contentExtractionSource = "unknown"; 16 + /** When set, page text must not be sent to the model; user sees Reddit-specific UI instead. */ 17 + let extractionUnsupportedReason = null; // "old-reddit" | "non-discussion" 16 18 17 19 // Streaming state 18 20 let streamingChatContent = ""; ··· 479 481 480 482 // If we have cached content for this tab, restore it 481 483 if (cachedContent && cachedContent.url === currentTabUrl) { 484 + if (cachedContent.unsupportedReason) { 485 + extractionUnsupportedReason = cachedContent.unsupportedReason; 486 + currentPageContent = ""; 487 + contentWasTruncated = false; 488 + contentExtractionSource = 489 + cachedContent.extractionSource || "reddit-unsupported"; 490 + isExtracting = false; 491 + summarizeBtn.disabled = false; 492 + setSummarizeLabel("Quick Summary"); 493 + showRedditUnsupportedError(extractionUnsupportedReason); 494 + return; 495 + } 496 + 497 + extractionUnsupportedReason = null; 482 498 currentPageContent = cachedContent.content; 483 499 contentWasTruncated = cachedContent.wasTruncated || false; 484 500 contentExtractionSource = cachedContent.extractionSource || "unknown"; ··· 532 548 // No cache, extract fresh content 533 549 await extractPageContent(); 534 550 isExtracting = false; 551 + 552 + if (extractionUnsupportedReason) { 553 + showRedditUnsupportedError(extractionUnsupportedReason); 554 + return; 555 + } 556 + 535 557 summarizeBtn.disabled = false; 536 558 setSummarizeLabel("Quick Summary"); 537 559 ··· 562 584 chatHistory = []; 563 585 generatedSuggestions = []; 564 586 contentWasTruncated = false; 587 + extractionUnsupportedReason = null; 565 588 resultContainer.innerHTML = ""; 566 589 resultContainer.classList.add("hidden"); 567 590 initialState.classList.remove("hidden"); ··· 582 605 showToast("Still loading page content…"); 583 606 return; 584 607 } 608 + if (extractionUnsupportedReason) { 609 + showToast(redditUnsupportedShortLine(extractionUnsupportedReason)); 610 + return; 611 + } 585 612 if (!currentPageContent) { 586 613 showToast("Could not extract page content. Try refreshing."); 587 614 return; ··· 608 635 currentPageContent = ""; 609 636 contentWasTruncated = false; 610 637 contentExtractionSource = "unknown"; 638 + extractionUnsupportedReason = null; 611 639 return; 612 640 } 613 641 ··· 616 644 tabId: currentTabId, 617 645 }); 618 646 619 - if (response && response.success && response.content) { 620 - currentPageContent = response.content; 647 + if (response && response.success) { 648 + contentExtractionSource = response.extractionSource || "unknown"; 621 649 contentWasTruncated = response.wasTruncated || false; 622 - contentExtractionSource = response.extractionSource || "unknown"; 623 - // Cache the content with truncation info 624 - if (currentTabId) { 650 + 651 + if ( 652 + response.extractionSource === "reddit-unsupported" && 653 + response.unsupportedReason 654 + ) { 655 + currentPageContent = ""; 656 + extractionUnsupportedReason = response.unsupportedReason; 657 + if (currentTabId) { 658 + await chrome.storage.session.set({ 659 + [CONTENT_CACHE_PREFIX + currentTabId]: { 660 + content: "", 661 + wasTruncated: false, 662 + url: currentTabUrl, 663 + extractionSource: contentExtractionSource, 664 + unsupportedReason: extractionUnsupportedReason, 665 + }, 666 + }); 667 + } 668 + return; 669 + } 670 + 671 + extractionUnsupportedReason = null; 672 + currentPageContent = response.content || ""; 673 + if (currentTabId && currentPageContent) { 625 674 await chrome.storage.session.set({ 626 675 [CONTENT_CACHE_PREFIX + currentTabId]: { 627 676 content: currentPageContent, ··· 631 680 }, 632 681 }); 633 682 } 683 + } else { 684 + extractionUnsupportedReason = null; 634 685 } 635 686 } catch (error) { 636 687 console.error("Error extracting content:", error); 637 688 currentPageContent = ""; 638 689 contentWasTruncated = false; 639 690 contentExtractionSource = "unknown"; 691 + extractionUnsupportedReason = null; 640 692 } 641 693 } 642 694 ··· 736 788 if (isLoading) return; 737 789 if (isExtracting) { 738 790 showToast("Still loading page content…"); 791 + return; 792 + } 793 + if (extractionUnsupportedReason) { 794 + showToast(redditUnsupportedShortLine(extractionUnsupportedReason)); 739 795 return; 740 796 } 741 797 if (!currentPageContent) { ··· 1014 1070 }); 1015 1071 1016 1072 async function generateQuickSummary() { 1073 + if (extractionUnsupportedReason) { 1074 + showRedditUnsupportedError(extractionUnsupportedReason); 1075 + return; 1076 + } 1077 + 1017 1078 bumpSuggestionsTask(); 1018 1079 setLoading(true); 1019 1080 currentSummaryMode = "quick"; ··· 1640 1701 </div> 1641 1702 `; 1642 1703 } 1704 + 1705 + const REDDIT_UNSUPPORTED_COPY = { 1706 + "old-reddit": { 1707 + title: "Old Reddit isn’t supported for thread extraction", 1708 + message: 1709 + "Open the same post on www.reddit.com (new Reddit), then open Summarizer again.", 1710 + }, 1711 + "non-discussion": { 1712 + title: "This Reddit page can’t be summarized with focused extraction yet", 1713 + message: 1714 + "Only new-Reddit discussion threads are supported (URLs containing /comments/). Feeds, profiles, and other Reddit pages aren’t covered yet.", 1715 + }, 1716 + }; 1717 + 1718 + function redditUnsupportedShortLine(reason) { 1719 + if (reason === "old-reddit") { 1720 + return "Open this thread on www.reddit.com to summarize it."; 1721 + } 1722 + return "Open a post’s comment thread on new Reddit to summarize it."; 1723 + } 1724 + 1725 + function showRedditUnsupportedError(reason) { 1726 + const copy = 1727 + REDDIT_UNSUPPORTED_COPY[reason] || REDDIT_UNSUPPORTED_COPY["non-discussion"]; 1728 + showUnsupportedExtractionError(copy.title, copy.message); 1729 + }
+5 -3
scripts/background.js
··· 303 303 } 304 304 305 305 // Readability must load before content.js (content.js is not bundled with it). 306 + // reddit-new.js registers window.__webaiTryRedditNew for new-Reddit thread extraction. 306 307 await chrome.scripting.executeScript({ 307 308 target: { tabId }, 308 - files: ["scripts/Readability.js", "scripts/content.js"], 309 + files: ["scripts/Readability.js", "scripts/reddit-new.js", "scripts/content.js"], 309 310 }); 310 311 311 312 const response = await chrome.tabs.sendMessage(tabId, { action: "extract" }); 312 - if (!response || !response.content) { 313 + if (!response) { 313 314 return { content: "", wasTruncated: false }; 314 315 } 315 316 316 317 return { 317 - content: response.content, 318 + content: response.content ?? "", 318 319 wasTruncated: Boolean(response.wasTruncated), 319 320 extractionSource: response.extractionSource ?? "unknown", 321 + unsupportedReason: response.unsupportedReason ?? null, 320 322 }; 321 323 } 322 324
+24
scripts/content.js
··· 522 522 ); 523 523 } 524 524 525 + function tryRedditShredditExtract() { 526 + if (typeof window.__webaiTryRedditNew !== "function") { 527 + return null; 528 + } 529 + try { 530 + return window.__webaiTryRedditNew(MAX_LENGTH); 531 + } catch (err) { 532 + console.error("[Summarizer] Reddit extract error:", err); 533 + return null; 534 + } 535 + } 536 + 525 537 function extractContent() { 538 + const redditResult = tryRedditShredditExtract(); 539 + if (redditResult) { 540 + logExtractionDebug(redditResult.extractionSource, redditResult.text); 541 + return { 542 + text: redditResult.text, 543 + wasTruncated: Boolean(redditResult.wasTruncated), 544 + extractionSource: redditResult.extractionSource, 545 + unsupportedReason: redditResult.unsupportedReason, 546 + }; 547 + } 548 + 526 549 try { 527 550 const readability = extractWithReadability(); 528 551 const rbLen = readability.text.trim().length; ··· 628 651 content: result.text, 629 652 wasTruncated: result.wasTruncated, 630 653 extractionSource: result.extractionSource, 654 + unsupportedReason: result.unsupportedReason, 631 655 }); 632 656 } 633 657 return true;
+195
scripts/reddit-new.js
··· 1 + // New Reddit (shreddit) discussion-thread extraction for content scripts. 2 + // Loaded before scripts/content.js. Exposes window.__webaiTryRedditNew(maxLength). 3 + 4 + (function () { 5 + "use strict"; 6 + 7 + /** @type {Set<string>} */ 8 + const NEW_REDDIT_HOSTS = new Set([ 9 + "www.reddit.com", 10 + "reddit.com", 11 + "new.reddit.com", 12 + "m.reddit.com", 13 + "np.reddit.com", 14 + ]); 15 + 16 + const DISCUSSION_PATH_SUB = /^\/r\/[^/]+\/comments\/[^/]+/; 17 + const DISCUSSION_PATH_SHORT = /^\/comments\/[^/]+/; 18 + 19 + const MAX_COMMENTS = 90; 20 + 21 + function isNewRedditHost(host) { 22 + return NEW_REDDIT_HOSTS.has(String(host || "").toLowerCase()); 23 + } 24 + 25 + function isDiscussionPath(pathname) { 26 + const p = pathname || ""; 27 + return DISCUSSION_PATH_SUB.test(p) || DISCUSSION_PATH_SHORT.test(p); 28 + } 29 + 30 + function cleanText(s) { 31 + if (!s || typeof s !== "string") return ""; 32 + return s.replace(/\s+/g, " ").trim(); 33 + } 34 + 35 + function shouldSkipCommentBody(text) { 36 + if (!text || text.length < 2) return true; 37 + if (/^(\d+\s*)?(more replies|more reply)\.?$/i.test(text)) return true; 38 + return false; 39 + } 40 + 41 + /** 42 + * @param {number} maxLength 43 + * @returns {{ text: string, wasTruncated: boolean, extractionSource: string, unsupportedReason?: string } | null} 44 + */ 45 + function tryRedditNew(maxLength) { 46 + let host = ""; 47 + try { 48 + host = String(window.location.hostname || "").toLowerCase(); 49 + } catch (e) { 50 + return null; 51 + } 52 + 53 + if (!host || !host.includes("reddit.com")) { 54 + return null; 55 + } 56 + 57 + if (host === "old.reddit.com") { 58 + return { 59 + text: "", 60 + wasTruncated: false, 61 + extractionSource: "reddit-unsupported", 62 + unsupportedReason: "old-reddit", 63 + }; 64 + } 65 + 66 + if (!isNewRedditHost(host)) { 67 + return null; 68 + } 69 + 70 + let path = ""; 71 + try { 72 + path = window.location.pathname || ""; 73 + } catch (e2) { 74 + return null; 75 + } 76 + 77 + if (!isDiscussionPath(path)) { 78 + return { 79 + text: "", 80 + wasTruncated: false, 81 + extractionSource: "reddit-unsupported", 82 + unsupportedReason: "non-discussion", 83 + }; 84 + } 85 + 86 + const post = document.querySelector( 87 + 'shreddit-post[view-context="CommentsPage"]', 88 + ); 89 + if (!post) { 90 + return null; 91 + } 92 + 93 + const postId = post.getAttribute("id") || ""; 94 + const title = 95 + cleanText(post.getAttribute("post-title") || "") || 96 + cleanText( 97 + post.querySelector('h1[slot="title"]')?.textContent || "", 98 + ); 99 + const sub = cleanText(post.getAttribute("subreddit-prefixed-name") || ""); 100 + const author = cleanText(post.getAttribute("author") || ""); 101 + const score = cleanText(post.getAttribute("score") || ""); 102 + const commentCount = cleanText(post.getAttribute("comment-count") || ""); 103 + 104 + let bodyText = ""; 105 + if (postId) { 106 + const bodyEl = document.getElementById(`${postId}-post-rtjson-content`); 107 + bodyText = cleanText(bodyEl?.textContent || ""); 108 + } 109 + if (!bodyText) { 110 + const fallback = post.querySelector("shreddit-post-text-body [id$='-post-rtjson-content']"); 111 + bodyText = cleanText(fallback?.textContent || ""); 112 + } 113 + 114 + const tree = document.querySelector("shreddit-comment-tree#comment-tree"); 115 + const commentEls = tree 116 + ? Array.from(tree.querySelectorAll("shreddit-comment")) 117 + : []; 118 + 119 + let out = ""; 120 + let wasTruncated = false; 121 + 122 + function add(s) { 123 + if (wasTruncated) return false; 124 + const chunk = s == null ? "" : String(s); 125 + if (out.length + chunk.length <= maxLength) { 126 + out += chunk; 127 + return true; 128 + } 129 + wasTruncated = true; 130 + const room = maxLength - out.length; 131 + if (room > 20) { 132 + out += chunk.slice(0, room).trimEnd() + "…"; 133 + } 134 + return false; 135 + } 136 + 137 + const header = 138 + "Reddit thread (new Reddit)\n" + 139 + (sub ? `Subreddit: ${sub}\n` : "") + 140 + (title ? `Title: ${title}\n` : "") + 141 + (author ? `Author: u/${author}\n` : "") + 142 + (score ? `Post score: ${score}\n` : "") + 143 + (commentCount ? `Comment count (thread): ${commentCount}\n` : "") + 144 + `URL: ${window.location.href}\n`; 145 + 146 + add(header); 147 + add("\n--- Post ---\n"); 148 + 149 + if (bodyText) { 150 + add(bodyText); 151 + } else { 152 + add("(No post body text — link/media post or content not loaded.)"); 153 + } 154 + 155 + add("\n\n--- Comments (in page order) ---\n"); 156 + 157 + let n = 0; 158 + for (let i = 0; i < commentEls.length && n < MAX_COMMENTS; i++) { 159 + const el = commentEls[i]; 160 + const depth = parseInt(el.getAttribute("depth") || "0", 10) || 0; 161 + const cAuthor = cleanText(el.getAttribute("author") || ""); 162 + const cScore = cleanText(el.getAttribute("score") || ""); 163 + 164 + const bodyEl = 165 + el.querySelector("[id$='-comment-rtjson-content']") || 166 + el.querySelector('[slot="comment"]'); 167 + let cText = cleanText(bodyEl?.textContent || ""); 168 + if (shouldSkipCommentBody(cText)) continue; 169 + 170 + n++; 171 + const indent = " ".repeat(Math.min(depth, 8)); 172 + const block = 173 + `\n[${n}] u/${cAuthor || "?"} · score ${cScore || "?"} · depth ${depth}\n` + 174 + indent + 175 + cText.replace(/\n/g, "\n" + indent) + 176 + "\n"; 177 + 178 + if (!add(block)) { 179 + break; 180 + } 181 + } 182 + 183 + if (commentEls.length === 0) { 184 + add("(No comments found in the DOM — they may still be loading.)"); 185 + } 186 + 187 + return { 188 + text: out.trim(), 189 + wasTruncated, 190 + extractionSource: "reddit-thread", 191 + }; 192 + } 193 + 194 + window.__webaiTryRedditNew = tryRedditNew; 195 + })();