A browser extension that lets you summarize any webpage and ask questions using AI.
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

feat: Ollama keep_alive setting, stream stop, and API hardening

- Options: configurable model keep-alive (Ollama native) with labeled presets
- Popup: chat send becomes Stop during replies; abort covers pre-stream wait
- Background: per-tab stream/other abort slots, batched stream chunks, Connection: close
- Config: stream caps, suggestion token limits; pass keepAlive through all Ollama calls

+670 -154
+46 -1
options/options.html
··· 234 234 </p> 235 235 </div> 236 236 237 + <div class="form-group" id="keep-alive-group"> 238 + <label for="keep-alive">Model keep-alive (Ollama)</label> 239 + <select id="keep-alive"> 240 + <option value=""> 241 + Ollama default — don’t override; server picks keep-alive 242 + </option> 243 + <option value="3s"> 244 + 3s — lowest RAM; enough for summary then AI suggestion 245 + chips if you’re quick 246 + </option> 247 + <option value="5s"> 248 + 5s — same idea as 3s, extra slack if the model or disk 249 + is slow between those calls 250 + </option> 251 + <option value="10s"> 252 + 10s — summary + suggestions with time to read before 253 + clicking a follow-up 254 + </option> 255 + <option value="15s"> 256 + 15s — relaxed pace between summary, suggestions, and 257 + your first chat question 258 + </option> 259 + <option value="30s"> 260 + 30s — browse the summary a bit before asking more 261 + </option> 262 + <option value="1m">1 minute</option> 263 + <option value="5m">5 minutes</option> 264 + <option value="15m">15 minutes</option> 265 + </select> 266 + <p class="help"> 267 + How long Ollama keeps the model loaded after a request. 268 + Shorter values free RAM sooner; the next call may wait 269 + while the model loads again. 270 + <strong>Only applies to Ollama native API.</strong> 271 + </p> 272 + </div> 273 + 237 274 <div class="form-group"> 238 275 <label for="auto-summarize"> 239 276 <input type="checkbox" id="auto-summarize" /> ··· 261 298 > 262 299 </p> 263 300 </div> 301 + 302 + <p class="help" style="margin-bottom: 12px"> 303 + Settings save automatically when you change them. You can 304 + still use Save for an explicit confirmation. 305 + </p> 264 306 265 307 <div class="buttons"> 266 308 <button type="submit" class="btn-primary">Save</button> ··· 292 334 </li> 293 335 <li>Pull a model: <code>ollama pull gemma3:1b</code></li> 294 336 <li>Run: <code>OLLAMA_ORIGINS=* ollama serve</code></li> 295 - <li>Keep default settings and click "Test connection"</li> 337 + <li> 338 + Keep default settings and use 339 + <strong>Test connection</strong> (no need to click Save) 340 + </li> 296 341 </ol> 297 342 </div> 298 343
+89 -11
options/options.js
··· 14 14 const apiKeyInput = document.getElementById("api-key"); 15 15 const disableThinkingInput = document.getElementById("disable-thinking"); 16 16 const thinkingModeGroup = document.getElementById("thinking-mode-group"); 17 + const keepAliveGroup = document.getElementById("keep-alive-group"); 18 + const keepAliveInput = document.getElementById("keep-alive"); 17 19 const autoSummarizeInput = document.getElementById("auto-summarize"); 18 20 const statusDiv = document.getElementById("status"); 19 21 const testBtn = document.getElementById("test-connection"); 20 22 const resetBtn = document.getElementById("reset-defaults"); 23 + 24 + let settingsHydrating = false; 25 + let autoSaveTimer = null; 26 + const AUTO_SAVE_DEBOUNCE_MS = 450; 21 27 22 28 // Build defaultSettings from centralized CONFIG 23 29 const defaultSettings = { ··· 29 35 apiKey: CONFIG.API.KEY, 30 36 disableThinking: CONFIG.API.DISABLE_THINKING, 31 37 autoSummarize: CONFIG.API.AUTO_SUMMARIZE, 38 + keepAlive: CONFIG.API.KEEP_ALIVE, 32 39 }; 33 40 34 41 // Use accent presets from CONFIG ··· 46 53 if (apiModeInput.value === "ollama") { 47 54 apiBaseUrlInput.placeholder = "http://localhost:11434"; 48 55 thinkingModeGroup.style.display = "block"; 56 + keepAliveGroup.style.display = "block"; 49 57 } else { 50 58 apiBaseUrlInput.placeholder = "http://localhost:11434/v1"; 51 59 thinkingModeGroup.style.display = "none"; 60 + keepAliveGroup.style.display = "none"; 52 61 } 62 + scheduleAutoSaveImmediate(); 53 63 }); 54 64 55 65 accentSwatchButtons.forEach((btn) => { ··· 57 67 setSelectedAccentPreset(btn.dataset.accentPreset); 58 68 const color = resolveAccentColor(); 59 69 if (color) applyAccentColor(color); 70 + scheduleAutoSaveImmediate(); 60 71 }); 61 72 }); 62 73 ··· 65 76 if (accentPresetInput.value === "custom" && color) { 66 77 applyAccentColor(color); 67 78 } 79 + scheduleAutoSaveDebounced(); 68 80 }); 69 81 82 + [apiBaseUrlInput, modelInput, apiKeyInput].forEach((el) => { 83 + el.addEventListener("input", () => scheduleAutoSaveDebounced()); 84 + }); 85 + 86 + disableThinkingInput.addEventListener("change", () => 87 + scheduleAutoSaveImmediate(), 88 + ); 89 + autoSummarizeInput.addEventListener("change", () => 90 + scheduleAutoSaveImmediate(), 91 + ); 92 + keepAliveInput.addEventListener("change", () => scheduleAutoSaveImmediate()); 93 + 70 94 themeBtn.addEventListener("click", async () => { 71 95 const idx = THEMES.indexOf(currentTheme); 72 96 currentTheme = THEMES[(idx + 1) % THEMES.length]; ··· 82 106 } 83 107 }); 84 108 85 - // Save settings 86 - form.addEventListener("submit", async (e) => { 87 - e.preventDefault(); 88 - 89 - const settings = { 109 + function collectSettingsFromForm() { 110 + return { 90 111 apiMode: apiModeInput.value, 91 112 apiBaseUrl: apiBaseUrlInput.value.trim() || CONFIG.API.BASE_URL, 92 113 model: modelInput.value.trim() || CONFIG.API.MODEL, ··· 95 116 apiKey: apiKeyInput.value.trim(), 96 117 disableThinking: disableThinkingInput.checked, 97 118 autoSummarize: autoSummarizeInput.checked, 119 + keepAlive: keepAliveInput.value, 98 120 }; 121 + } 122 + 123 + function scheduleAutoSaveDebounced() { 124 + if (settingsHydrating) return; 125 + clearTimeout(autoSaveTimer); 126 + autoSaveTimer = setTimeout(() => { 127 + autoSaveTimer = null; 128 + void persistSettings({ auto: true }); 129 + }, AUTO_SAVE_DEBOUNCE_MS); 130 + } 131 + 132 + function scheduleAutoSaveImmediate() { 133 + if (settingsHydrating) return; 134 + clearTimeout(autoSaveTimer); 135 + autoSaveTimer = null; 136 + void persistSettings({ auto: true }); 137 + } 138 + 139 + /** 140 + * @param {{ auto?: boolean, successMessage?: string }} opts 141 + * auto: true = silent skip when custom accent incomplete; short "Saved" toast 142 + */ 143 + async function persistSettings(opts = {}) { 144 + const { auto = false, successMessage } = opts; 145 + if (settingsHydrating) return; 146 + 147 + const settings = collectSettingsFromForm(); 99 148 100 149 if (!settings.accentColor) { 150 + if (auto) return; 101 151 showStatus( 102 152 "❌ Custom accent color must be a valid hex code (e.g. #F15B2F).", 103 153 "error", ··· 107 157 108 158 try { 109 159 await chrome.storage.sync.set(settings); 110 - showStatus("✅ Settings saved successfully!", "success"); 160 + const msg = 161 + successMessage ?? 162 + (auto ? "Saved" : "✅ Settings saved successfully!"); 163 + showStatus(msg, "success", auto ? 2500 : undefined); 111 164 } catch (error) { 112 165 showStatus("❌ Error saving settings: " + error.message, "error"); 113 166 } 167 + } 168 + 169 + // Save settings (explicit submit — same as auto-save) 170 + form.addEventListener("submit", async (e) => { 171 + e.preventDefault(); 172 + clearTimeout(autoSaveTimer); 173 + autoSaveTimer = null; 174 + await persistSettings({ auto: false }); 114 175 }); 115 176 116 177 // Test connection ··· 121 182 model: modelInput.value.trim() || CONFIG.API.MODEL, 122 183 apiKey: apiKeyInput.value.trim(), 123 184 disableThinking: disableThinkingInput.checked, 185 + keepAlive: keepAliveInput.value, 124 186 }; 125 187 126 188 showStatus("🔄 Testing connection...", "loading"); ··· 139 201 ], 140 202 apiMode: settings.apiMode, 141 203 disableThinking: settings.disableThinking, 204 + keepAlive: settings.apiMode === "ollama" ? settings.keepAlive : "", 142 205 }, 143 206 }); 144 207 ··· 180 243 }); 181 244 182 245 // Reset to defaults 183 - resetBtn.addEventListener("click", () => { 246 + resetBtn.addEventListener("click", async () => { 247 + settingsHydrating = true; 184 248 apiModeInput.value = CONFIG.API.MODE; 185 249 apiBaseUrlInput.value = CONFIG.API.BASE_URL; 186 250 modelInput.value = CONFIG.API.MODEL; ··· 190 254 apiKeyInput.value = CONFIG.API.KEY; 191 255 disableThinkingInput.checked = CONFIG.API.DISABLE_THINKING; 192 256 autoSummarizeInput.checked = false; 193 - // Show/hide thinking mode group based on API mode 257 + keepAliveInput.value = CONFIG.API.KEEP_ALIVE; 194 258 thinkingModeGroup.style.display = 195 259 CONFIG.API.MODE === "ollama" ? "block" : "none"; 196 - showStatus("Settings reset to defaults. Click Save to apply.", "success"); 260 + keepAliveGroup.style.display = 261 + CONFIG.API.MODE === "ollama" ? "block" : "none"; 262 + settingsHydrating = false; 263 + await persistSettings({ 264 + auto: false, 265 + successMessage: "Defaults restored and saved.", 266 + }); 197 267 }); 198 268 199 269 // Keyboard shortcuts link ··· 234 304 235 305 // Load settings 236 306 async function loadSettings() { 307 + settingsHydrating = true; 237 308 try { 238 309 const settings = await chrome.storage.sync.get(defaultSettings); 239 310 ··· 250 321 apiKeyInput.value = settings.apiKey; 251 322 disableThinkingInput.checked = settings.disableThinking; 252 323 autoSummarizeInput.checked = settings.autoSummarize || false; 324 + keepAliveInput.value = 325 + settings.keepAlive != null ? settings.keepAlive : CONFIG.API.KEEP_ALIVE; 253 326 // Show/hide thinking mode group based on API mode 254 327 thinkingModeGroup.style.display = 255 328 settings.apiMode === "ollama" ? "block" : "none"; 329 + keepAliveGroup.style.display = 330 + settings.apiMode === "ollama" ? "block" : "none"; 256 331 } catch (error) { 257 332 showStatus("Error loading settings: " + error.message, "error"); 333 + } finally { 334 + settingsHydrating = false; 258 335 } 259 336 } 260 337 ··· 348 425 } 349 426 350 427 // Show status message 351 - function showStatus(message, type) { 428 + function showStatus(message, type, dismissMs) { 352 429 // Replace newlines with <br> for display 353 430 statusDiv.innerHTML = message.replace(/\n/g, "<br>"); 354 431 statusDiv.className = "status " + type; 355 432 356 433 if (type !== "loading") { 434 + const ms = dismissMs ?? 8000; 357 435 setTimeout(() => { 358 436 statusDiv.className = "status"; 359 - }, 8000); 437 + }, ms); 360 438 } 361 439 }
+8
popup/popup.css
··· 925 925 cursor: not-allowed; 926 926 } 927 927 928 + .chat-send-btn.stop-streaming { 929 + background: var(--icon-btn-hover); 930 + } 931 + 932 + .chat-send-btn.stop-streaming:hover { 933 + background: var(--text-em); 934 + } 935 + 928 936 /* Summary action buttons container */ 929 937 .summary-actions { 930 938 display: flex;
+137 -28
popup/popup.js
··· 17 17 let currentStreamTarget = null; // "chat" or "summary" 18 18 let currentStreamElement = null; 19 19 20 + let chatSendDefaultHTML = null; 21 + 22 + function requestCancelStream() { 23 + if (currentTabId != null) { 24 + chrome.runtime.sendMessage({ 25 + action: "cancelStream", 26 + tabId: currentTabId, 27 + }); 28 + } 29 + } 30 + 31 + function setChatSendStopUI() { 32 + chatSendBtn.disabled = false; 33 + chatSendBtn.classList.add("stop-streaming"); 34 + chatSendBtn.title = "Stop generating"; 35 + chatSendBtn.setAttribute("aria-label", "Stop generating"); 36 + if (!chatSendDefaultHTML) { 37 + chatSendDefaultHTML = chatSendBtn.innerHTML; 38 + } 39 + chatSendBtn.innerHTML = 40 + '<svg width="14" height="14" viewBox="0 0 24 24" fill="currentColor" aria-hidden="true"><rect x="6" y="6" width="12" height="12" rx="1"/></svg>'; 41 + } 42 + 43 + function clearChatSendStopUI() { 44 + chatSendBtn.classList.remove("stop-streaming"); 45 + chatSendBtn.title = "Send"; 46 + chatSendBtn.setAttribute("aria-label", "Send"); 47 + if (chatSendDefaultHTML) { 48 + chatSendBtn.innerHTML = chatSendDefaultHTML; 49 + } 50 + chatSendBtn.disabled = false; 51 + } 52 + 53 + /** Bumped on each new chat reply or when user stops; ignores stale async work. */ 54 + let chatReplyGeneration = 0; 55 + 56 + function removePendingChatAssistantBubble() { 57 + resultContainer 58 + .querySelector('.chat-message.assistant[data-chat-pending="1"]') 59 + ?.remove(); 60 + } 61 + 62 + /** 63 + * Stop the current chat reply: aborts the network request (even before streaming starts) 64 + * and cleans up UI when nothing is streaming yet. 65 + */ 66 + function stopChatReply() { 67 + chatReplyGeneration++; 68 + requestCancelStream(); 69 + 70 + if (currentStreamTarget === "chat") { 71 + return; 72 + } 73 + 74 + isChatLoading = false; 75 + clearChatSendStopUI(); 76 + removePendingChatAssistantBubble(); 77 + currentStreamElement = null; 78 + streamingChatContent = ""; 79 + chatInput.focus(); 80 + } 81 + 20 82 const resultContainer = document.getElementById("result"); 21 83 const initialState = document.getElementById("initial-state"); 22 84 const summarizeBtn = document.getElementById("summarize-btn"); ··· 60 122 disableThinking: CONFIG.API.DISABLE_THINKING, 61 123 accentColor: CONFIG.ACCENTS.DEFAULT_COLOR, 62 124 autoSummarize: CONFIG.API.AUTO_SUMMARIZE, 125 + keepAlive: CONFIG.API.KEEP_ALIVE, 63 126 }; 64 127 65 128 async function getApiSettings() { ··· 162 225 return true; 163 226 } 164 227 if (message.action === "streamDone") { 165 - handleStreamDone(message.error); 228 + handleStreamDone(message); 166 229 return true; 167 230 } 168 231 return false; ··· 180 243 } 181 244 } 182 245 183 - function handleStreamDone(error) { 246 + function handleStreamDone(message) { 247 + const error = message?.error; 248 + const cancelled = Boolean(message?.cancelled); 249 + const truncated = Boolean(message?.truncated); 250 + 184 251 const streamTarget = currentStreamTarget; 185 252 const streamElement = currentStreamElement; 186 253 currentStreamTarget = null; 187 254 currentStreamElement = null; 188 255 189 - if (!streamTarget) return; // Nothing was streaming 256 + if (!streamTarget) return; 257 + 258 + if (streamTarget === "chat") { 259 + clearChatSendStopUI(); 260 + } 261 + 262 + if (truncated && !error) { 263 + showToast("Output was limited to avoid runaway generation."); 264 + } else if (cancelled && !error) { 265 + showToast("Stopped."); 266 + } 190 267 191 268 if (error) { 192 269 showToast("Error: " + withOllamaFetchHint(error)); 193 270 if (streamElement) { 194 271 streamElement.innerHTML = `<div class="error-message">${escapeHtml(withOllamaFetchHint(error))}</div>`; 195 272 } 196 - // Still finalize to reset states 197 273 if (streamTarget === "chat") { 198 274 finalizeChatStream(error); 199 275 } else if (streamTarget === "summary") { ··· 222 298 } 223 299 // Render markdown during streaming for consistent formatting 224 300 lastAssistantMsg.innerHTML = renderMarkdown(content); 225 - // Scroll to bottom 226 - const contentContainer = document.querySelector(".content-container"); 227 - contentContainer.scrollTop = contentContainer.scrollHeight; 228 301 } 229 302 } 230 303 ··· 258 331 // Render markdown content 259 332 htmlContent += renderMarkdown(content); 260 333 currentStreamElement.innerHTML = htmlContent; 261 - 262 - // Scroll to show new content 263 - const contentContainer = document.querySelector(".content-container"); 264 - contentContainer.scrollTop = contentContainer.scrollHeight; 265 334 } 266 335 } 267 336 ··· 716 785 const message = chatInput.value.trim(); 717 786 if (!message || isChatLoading || !currentPageContent) return; 718 787 788 + const gen = ++chatReplyGeneration; 789 + 719 790 // Add user message to history 720 791 chatHistory.push({ role: "user", content: message }); 721 792 chatInput.value = ""; 722 793 renderChatMessages(); 723 794 724 - // Show loading state 725 795 isChatLoading = true; 726 - chatSendBtn.disabled = true; 796 + setChatSendStopUI(); 727 797 728 798 // Find chat section or create it 729 799 let chatSection = resultContainer.querySelector(".chat-section"); ··· 738 808 739 809 const loadingEl = document.createElement("div"); 740 810 loadingEl.className = "chat-message assistant loading"; 811 + loadingEl.dataset.chatPending = "1"; 741 812 loadingEl.innerHTML = '<div class="chat-spinner"></div>'; 742 813 chatSection.appendChild(loadingEl); 743 814 ··· 748 819 try { 749 820 const settings = await getApiSettings(); 750 821 822 + if (gen !== chatReplyGeneration) { 823 + return; 824 + } 825 + 751 826 // Increased context limit for LLM (was 6000, now 12000) 752 827 const pageContentForLLM = currentPageContent.substring(0, 12000); 753 828 const summaryContent = ··· 789 864 messages: apiMessages, 790 865 apiMode: settings.apiMode, 791 866 disableThinking: settings.disableThinking, 867 + keepAlive: settings.keepAlive, 792 868 }, 793 869 }); 794 870 871 + delete loadingEl.dataset.chatPending; 872 + 795 873 // The response will come via onMessage listener 796 874 } catch (error) { 797 - // Remove loading indicator 875 + if (gen !== chatReplyGeneration) { 876 + return; 877 + } 878 + 798 879 loadingEl.remove(); 799 880 console.error("Chat Error:", error); 800 881 801 - // Show error in chat 802 882 const errorEl = document.createElement("div"); 803 883 errorEl.className = "chat-message assistant error"; 804 884 errorEl.textContent = "Error: " + withOllamaFetchHint(error.message); 805 885 chatSection.appendChild(errorEl); 806 886 807 - // Scroll to show error 808 887 const contentContainer = document.querySelector(".content-container"); 809 888 contentContainer.scrollTop = contentContainer.scrollHeight; 810 889 811 890 isChatLoading = false; 812 - chatSendBtn.disabled = false; 891 + clearChatSendStopUI(); 813 892 chatInput.focus(); 814 893 currentStreamTarget = null; 815 894 currentStreamElement = null; ··· 819 898 // Called when streaming is done (success or error) 820 899 function finalizeChatStream(error) { 821 900 isChatLoading = false; 822 - chatSendBtn.disabled = false; 823 901 chatInput.focus(); 824 902 825 903 if (error) { 826 - showToast("Error: " + withOllamaFetchHint(error)); 904 + return; 905 + } 906 + 907 + if (!streamingChatContent.trim()) { 908 + const chatSection = resultContainer.querySelector(".chat-section"); 909 + const assistantMsgs = chatSection?.querySelectorAll( 910 + ".chat-message.assistant", 911 + ); 912 + assistantMsgs?.[assistantMsgs.length - 1]?.remove(); 827 913 return; 828 914 } 829 915 ··· 847 933 } 848 934 849 935 // Chat event listeners 850 - chatSendBtn.addEventListener("click", sendChatMessage); 936 + chatSendBtn.addEventListener("click", () => { 937 + if (isChatLoading) { 938 + stopChatReply(); 939 + return; 940 + } 941 + sendChatMessage(); 942 + }); 851 943 chatInput.addEventListener("keypress", (e) => { 852 944 if (e.key === "Enter" && !e.shiftKey) { 853 945 e.preventDefault(); 946 + if (isChatLoading) { 947 + return; 948 + } 854 949 sendChatMessage(); 855 950 } 856 951 }); ··· 917 1012 messages: apiMessages, 918 1013 apiMode: settings.apiMode, 919 1014 disableThinking: settings.disableThinking, 1015 + keepAlive: settings.keepAlive, 920 1016 }, 921 1017 }); 922 1018 923 - // The response will come via postMessage listener 1019 + // The response will come via onMessage listener 924 1020 } catch (error) { 925 1021 console.error("API Error:", error); 926 1022 resultContainer.innerHTML = `<div class="error-message">${escapeHtml(withOllamaFetchHint(error.message))}</div>`; ··· 931 1027 function finalizeSummaryStream(error) { 932 1028 if (error) { 933 1029 setLoading(false); 934 - if (currentStreamElement) { 935 - currentStreamElement.innerHTML = `<div class="error-message">${escapeHtml(withOllamaFetchHint(error))}</div>`; 936 - } 1030 + return; 1031 + } 1032 + 1033 + if (!streamingSummaryContent.trim()) { 1034 + setLoading(false); 1035 + showToast("No summary was generated."); 1036 + resultContainer.innerHTML = ""; 1037 + resultContainer.classList.add("hidden"); 1038 + initialState.classList.remove("hidden"); 1039 + footer.classList.remove("hidden"); 937 1040 return; 938 1041 } 939 1042 ··· 1001 1104 const response = await chrome.runtime.sendMessage({ 1002 1105 action: "chat", 1003 1106 data: { 1107 + tabId: currentTabId, 1004 1108 apiBaseUrl: settings.apiBaseUrl, 1005 1109 model: settings.model, 1006 1110 apiKey: settings.apiKey, 1007 1111 messages: apiMessages, 1008 1112 apiMode: settings.apiMode, 1009 1113 disableThinking: settings.disableThinking, 1114 + maxOutputTokens: CONFIG.API.SUGGESTIONS_MAX_TOKENS, 1115 + keepAlive: settings.keepAlive, 1010 1116 }, 1011 1117 }); 1012 1118 ··· 1016 1122 throw new Error(response?.error || "Failed to generate suggestions"); 1017 1123 } 1018 1124 1019 - const content = 1125 + const rawContent = 1020 1126 response.data.choices?.[0]?.message?.content || 1021 1127 response.data.response || 1022 1128 response.data.message?.content || 1023 1129 ""; 1024 1130 1025 - console.log("Raw suggestions response:", content); 1131 + const content = rawContent.slice(0, CONFIG.API.SUGGESTIONS_MAX_PARSE_CHARS); 1026 1132 1027 1133 // Parse suggestions from the response (one per line) 1028 1134 // More lenient parsing - accept lines with questions or just meaningful content ··· 1118 1224 const response = await chrome.runtime.sendMessage({ 1119 1225 action: "chat", 1120 1226 data: { 1227 + tabId: currentTabId, 1121 1228 apiBaseUrl: settings.apiBaseUrl, 1122 1229 model: settings.model, 1123 1230 apiKey: settings.apiKey, 1124 1231 messages: apiMessages, 1125 1232 apiMode: settings.apiMode, 1126 1233 disableThinking: settings.disableThinking, 1234 + maxOutputTokens: CONFIG.API.SUGGESTIONS_MAX_TOKENS, 1235 + keepAlive: settings.keepAlive, 1127 1236 }, 1128 1237 }); 1129 1238 ··· 1133 1242 throw new Error(response?.error || "Failed to generate suggestions"); 1134 1243 } 1135 1244 1136 - const content = 1245 + const rawContent = 1137 1246 response.data.choices?.[0]?.message?.content || 1138 1247 response.data.response || 1139 1248 response.data.message?.content || 1140 1249 ""; 1141 1250 1142 - console.log("Chat suggestions response:", content); 1251 + const content = rawContent.slice(0, CONFIG.API.SUGGESTIONS_MAX_PARSE_CHARS); 1143 1252 1144 1253 // Parse suggestions from the response 1145 1254 generatedSuggestions = content
+379 -114
scripts/background.js
··· 13 13 const CONTENT_CACHE_PREFIX = CONFIG.CACHE.CONTENT; 14 14 const CHAT_CACHE_PREFIX = CONFIG.CACHE.CHAT; 15 15 16 + /** 17 + * Per-tab inflight Ollama/API state: one slot for streaming, one for non-stream (chat/suggestions). 18 + * - New stream aborts everything (user superseded or regenerate). 19 + * - New non-stream only aborts prior non-stream, never an active stream (avoids races with post-stream work). 20 + */ 21 + const inflightByTab = new Map(); 22 + 23 + function abortAllInflightForTab(tabId) { 24 + if (tabId == null) return; 25 + const e = inflightByTab.get(tabId); 26 + if (!e) return; 27 + if (e.stream) e.stream.abort(); 28 + if (e.other) e.other.abort(); 29 + inflightByTab.delete(tabId); 30 + } 31 + 32 + function takeStreamSlot(tabId) { 33 + if (tabId == null) return new AbortController(); 34 + abortAllInflightForTab(tabId); 35 + const c = new AbortController(); 36 + inflightByTab.set(tabId, { stream: c }); 37 + return c; 38 + } 39 + 40 + function releaseStreamSlot(tabId, controller) { 41 + if (tabId == null) return; 42 + const e = inflightByTab.get(tabId); 43 + if (e && e.stream === controller) { 44 + if (e.other) { 45 + inflightByTab.set(tabId, { other: e.other }); 46 + } else { 47 + inflightByTab.delete(tabId); 48 + } 49 + } 50 + } 51 + 52 + function takeOtherSlot(tabId) { 53 + if (tabId == null) return new AbortController(); 54 + const e = inflightByTab.get(tabId) || {}; 55 + if (e.other) e.other.abort(); 56 + const c = new AbortController(); 57 + inflightByTab.set(tabId, { ...e, other: c }); 58 + return c; 59 + } 60 + 61 + function releaseOtherSlot(tabId, controller) { 62 + if (tabId == null) return; 63 + const e = inflightByTab.get(tabId); 64 + if (e && e.other === controller) { 65 + if (e.stream) { 66 + inflightByTab.set(tabId, { stream: e.stream }); 67 + } else { 68 + inflightByTab.delete(tabId); 69 + } 70 + } 71 + } 72 + 73 + /** Coalesce tiny stream tokens into fewer runtime messages (easier on SW + popup; less backpressure). */ 74 + const CHUNK_BATCH_MAX_CHARS = 512; 75 + const CHUNK_BATCH_MS = 20; 76 + 77 + let streamChunkBuffer = ""; 78 + let streamChunkTimer = null; 79 + 80 + function resetStreamChunkBatching() { 81 + if (streamChunkTimer) { 82 + clearTimeout(streamChunkTimer); 83 + streamChunkTimer = null; 84 + } 85 + streamChunkBuffer = ""; 86 + } 87 + 88 + function flushStreamChunks() { 89 + streamChunkTimer = null; 90 + if (!streamChunkBuffer) return; 91 + const chunk = streamChunkBuffer; 92 + streamChunkBuffer = ""; 93 + chrome.runtime 94 + .sendMessage({ action: "streamChunk", chunk, done: false }) 95 + .catch(() => {}); 96 + } 97 + 98 + function queueStreamChunk(piece) { 99 + streamChunkBuffer += piece; 100 + if (streamChunkBuffer.length >= CHUNK_BATCH_MAX_CHARS) { 101 + if (streamChunkTimer) { 102 + clearTimeout(streamChunkTimer); 103 + streamChunkTimer = null; 104 + } 105 + flushStreamChunks(); 106 + } else if (!streamChunkTimer) { 107 + streamChunkTimer = setTimeout(flushStreamChunks, CHUNK_BATCH_MS); 108 + } 109 + } 110 + 111 + function finalizeStreamChunkBatching() { 112 + if (streamChunkTimer) { 113 + clearTimeout(streamChunkTimer); 114 + streamChunkTimer = null; 115 + } 116 + flushStreamChunks(); 117 + } 118 + 119 + /** Ollama /api/generate keep_alive — null means omit so the server uses its default. */ 120 + function normalizeOllamaKeepAlive(keepAlive) { 121 + if (keepAlive == null) return null; 122 + const s = String(keepAlive).trim(); 123 + return s || null; 124 + } 125 + 126 + function attachOllamaKeepAlive(requestBody, keepAlive) { 127 + const v = normalizeOllamaKeepAlive(keepAlive); 128 + if (v) requestBody.keep_alive = v; 129 + } 130 + 16 131 // ── Prompt templates from CONFIG ───────────────────────────────────────── 17 132 const OLLAMA_CONTEXT_TEMPLATE = CONFIG.OLLAMA.CONTEXT_TEMPLATE; 18 133 const OLLAMA_SINGLE_MESSAGE_TEMPLATE = CONFIG.OLLAMA.SINGLE_MESSAGE_TEMPLATE; ··· 29 144 apiKey: CONFIG.API.KEY, 30 145 disableThinking: CONFIG.API.DISABLE_THINKING, 31 146 autoSummarize: CONFIG.API.AUTO_SUMMARIZE, 147 + keepAlive: CONFIG.API.KEEP_ALIVE, 32 148 }); 33 149 } 34 150 }); ··· 145 261 const { tabId } = request; 146 262 handleStreamChatRequest(request.data, tabId).catch((error) => { 147 263 console.error("Stream chat error:", error); 148 - chrome.tabs.sendMessage(tabId, { 149 - action: "streamDone", 150 - error: error.message, 151 - }); 264 + sendStreamDoneToExtension({ error: error.message }); 152 265 }); 153 - return false; // We handle the response ourselves via sendMessage to tab 266 + return false; // Popup receives streamChunk/streamDone via runtime messages 267 + } 268 + 269 + if (request.action === "cancelStream") { 270 + const tabId = request.tabId; 271 + if (tabId != null) { 272 + abortAllInflightForTab(tabId); 273 + } 274 + return false; 154 275 } 155 276 156 277 if (request.action === "testOllama") { ··· 210 331 } 211 332 212 333 async function handleChatRequest(data) { 213 - const { apiBaseUrl, model, apiKey, messages, apiMode, disableThinking } = 214 - data; 334 + const { 335 + tabId, 336 + apiBaseUrl, 337 + model, 338 + apiKey, 339 + messages, 340 + apiMode, 341 + disableThinking, 342 + maxOutputTokens, 343 + keepAlive, 344 + } = data; 215 345 216 - let useNativeOllama = apiMode === "ollama"; 346 + const tokenCap = 347 + typeof maxOutputTokens === "number" && maxOutputTokens > 0 348 + ? maxOutputTokens 349 + : CONFIG.API.MAX_TOKENS; 217 350 218 - if (useNativeOllama) { 219 - return await callOllamaNative(apiBaseUrl, model, messages, disableThinking); 220 - } else { 221 - return await callOpenAICompatible(apiBaseUrl, model, apiKey, messages); 351 + const controller = takeOtherSlot(tabId); 352 + const signal = controller.signal; 353 + 354 + try { 355 + const useNativeOllama = apiMode === "ollama"; 356 + 357 + if (useNativeOllama) { 358 + return await callOllamaNative( 359 + apiBaseUrl, 360 + model, 361 + messages, 362 + disableThinking, 363 + tokenCap, 364 + signal, 365 + keepAlive, 366 + ); 367 + } else { 368 + return await callOpenAICompatible( 369 + apiBaseUrl, 370 + model, 371 + apiKey, 372 + messages, 373 + tokenCap, 374 + signal, 375 + ); 376 + } 377 + } catch (error) { 378 + if (error.name === "AbortError" || signal.aborted) { 379 + throw new Error("Request cancelled"); 380 + } 381 + throw error; 382 + } finally { 383 + releaseOtherSlot(tabId, controller); 222 384 } 223 385 } 224 386 225 387 async function handleStreamChatRequest(data, tabId) { 226 - const { apiBaseUrl, model, apiKey, messages, apiMode, disableThinking } = 227 - data; 388 + const { 389 + apiBaseUrl, 390 + model, 391 + apiKey, 392 + messages, 393 + apiMode, 394 + disableThinking, 395 + keepAlive, 396 + } = data; 228 397 229 - let useNativeOllama = apiMode === "ollama"; 398 + const controller = takeStreamSlot(tabId); 399 + const signal = controller.signal; 400 + 401 + try { 402 + const useNativeOllama = apiMode === "ollama"; 230 403 231 - if (useNativeOllama) { 232 - await callOllamaNativeStream( 233 - apiBaseUrl, 234 - model, 235 - messages, 236 - disableThinking, 237 - tabId, 238 - ); 239 - } else { 240 - await callOpenAICompatibleStream( 241 - apiBaseUrl, 242 - model, 243 - apiKey, 244 - messages, 245 - tabId, 246 - ); 404 + if (useNativeOllama) { 405 + await callOllamaNativeStream( 406 + apiBaseUrl, 407 + model, 408 + messages, 409 + disableThinking, 410 + signal, 411 + keepAlive, 412 + ); 413 + } else { 414 + await callOpenAICompatibleStream( 415 + apiBaseUrl, 416 + model, 417 + apiKey, 418 + messages, 419 + signal, 420 + ); 421 + } 422 + } finally { 423 + releaseStreamSlot(tabId, controller); 247 424 } 248 425 } 249 426 250 - async function callOllamaNative(baseUrl, model, messages, disableThinking) { 427 + async function callOllamaNative( 428 + baseUrl, 429 + model, 430 + messages, 431 + disableThinking, 432 + maxTokens = CONFIG.API.MAX_TOKENS, 433 + signal, 434 + keepAlive, 435 + ) { 251 436 // Merge all system messages into one so none are dropped 252 437 const systemMsgs = messages.filter((m) => m.role === "system"); 253 438 const systemContent = systemMsgs.map((m) => m.content).join("\n\n"); ··· 279 464 stream: false, 280 465 options: { 281 466 temperature: CONFIG.API.TEMPERATURE, 282 - num_predict: CONFIG.API.MAX_TOKENS, 467 + num_predict: maxTokens, 283 468 }, 284 469 }; 285 470 ··· 289 474 requestBody.think = false; 290 475 } 291 476 292 - const response = await fetch(url, { 477 + attachOllamaKeepAlive(requestBody, keepAlive); 478 + 479 + const fetchOpts = { 293 480 method: "POST", 294 481 headers: { 295 482 "Content-Type": "application/json", 483 + Connection: "close", 296 484 }, 297 485 body: JSON.stringify(requestBody), 298 - }); 486 + }; 487 + if (signal) fetchOpts.signal = signal; 488 + 489 + const response = await fetch(url, fetchOpts); 299 490 300 491 if (!response.ok) { 301 492 const text = await response.text(); ··· 329 520 }; 330 521 } 331 522 332 - async function callOpenAICompatible(baseUrl, model, apiKey, messages) { 523 + async function callOpenAICompatible( 524 + baseUrl, 525 + model, 526 + apiKey, 527 + messages, 528 + maxTokens = CONFIG.API.MAX_TOKENS, 529 + signal, 530 + ) { 333 531 let url = baseUrl.replace(/\/$/, ""); 334 532 335 533 if (!url.includes("/v1")) { ··· 338 536 339 537 url = url + "/chat/completions"; 340 538 341 - const response = await fetch(url, { 539 + const fetchOpts = { 342 540 method: "POST", 343 541 headers: { 344 542 "Content-Type": "application/json", ··· 348 546 model: model, 349 547 messages: messages, 350 548 stream: false, 351 - max_tokens: CONFIG.API.MAX_TOKENS, 549 + max_tokens: maxTokens, 352 550 }), 353 - }); 551 + }; 552 + if (signal) fetchOpts.signal = signal; 553 + 554 + const response = await fetch(url, fetchOpts); 354 555 355 556 if (!response.ok) { 356 557 const text = await response.text(); ··· 382 583 return await response.json(); 383 584 } 384 585 586 + function sendStreamDoneToExtension(payload = {}) { 587 + if (payload.error) { 588 + resetStreamChunkBatching(); 589 + } else { 590 + finalizeStreamChunkBatching(); 591 + } 592 + chrome.runtime.sendMessage({ action: "streamDone", ...payload }).catch(() => {}); 593 + } 594 + 385 595 async function callOllamaNativeStream( 386 596 baseUrl, 387 597 model, 388 598 messages, 389 599 disableThinking, 390 - tabId, 600 + signal, 601 + keepAlive, 391 602 ) { 392 603 const systemMsgs = messages.filter((m) => m.role === "system"); 393 604 const systemContent = systemMsgs.map((m) => m.content).join("\n\n"); ··· 425 636 requestBody.think = false; 426 637 } 427 638 639 + attachOllamaKeepAlive(requestBody, keepAlive); 640 + 641 + resetStreamChunkBatching(); 642 + let hitMaxChars = false; 643 + 428 644 try { 429 645 const response = await fetch(url, { 430 646 method: "POST", 431 647 headers: { 432 648 "Content-Type": "application/json", 649 + Connection: "close", 433 650 }, 434 651 body: JSON.stringify(requestBody), 652 + signal, 435 653 }); 436 654 437 655 if (!response.ok) { ··· 452 670 } 453 671 454 672 const reader = response.body.getReader(); 455 - const decoder = new TextDecoder(); 456 - let buffer = ""; 673 + try { 674 + const decoder = new TextDecoder(); 675 + let buffer = ""; 676 + let streamedChars = 0; 677 + const maxChars = CONFIG.API.STREAM_MAX_OUTPUT_CHARS; 678 + 679 + while (true) { 680 + let readResult; 681 + try { 682 + readResult = await reader.read(); 683 + } catch (readErr) { 684 + if (readErr.name === "AbortError" || signal.aborted) { 685 + sendStreamDoneToExtension({ cancelled: true }); 686 + return; 687 + } 688 + throw readErr; 689 + } 457 690 458 - while (true) { 459 - const { done, value } = await reader.read(); 460 - if (done) break; 691 + const { done, value } = readResult; 692 + if (done) break; 461 693 462 - buffer += decoder.decode(value, { stream: true }); 463 - const lines = buffer.split("\n"); 464 - buffer = lines.pop() || ""; 694 + buffer += decoder.decode(value, { stream: true }); 695 + const lines = buffer.split("\n"); 696 + buffer = lines.pop() || ""; 465 697 466 - for (const line of lines) { 467 - if (line.trim()) { 468 - try { 469 - const json = JSON.parse(line); 470 - if (json.response) { 471 - chrome.runtime 472 - .sendMessage({ 473 - action: "streamChunk", 474 - chunk: json.response, 475 - done: false, 476 - }) 477 - .catch(() => {}); 698 + for (const line of lines) { 699 + if (line.trim()) { 700 + try { 701 + const json = JSON.parse(line); 702 + if (json.response) { 703 + const piece = json.response; 704 + if (streamedChars + piece.length > maxChars) { 705 + hitMaxChars = true; 706 + reader.cancel().catch(() => {}); 707 + break; 708 + } 709 + streamedChars += piece.length; 710 + queueStreamChunk(piece); 711 + } 712 + } catch (e) { 713 + // Skip invalid JSON lines 478 714 } 479 - } catch (e) { 480 - // Skip invalid JSON lines 481 715 } 482 716 } 717 + 718 + if (hitMaxChars) break; 483 719 } 484 - } 485 720 486 - // Streaming complete - send done message 487 - chrome.runtime 488 - .sendMessage({ 489 - action: "streamDone", 490 - }) 491 - .catch(() => {}); 721 + if (hitMaxChars) { 722 + sendStreamDoneToExtension({ truncated: true }); 723 + } else { 724 + sendStreamDoneToExtension(); 725 + } 726 + } finally { 727 + try { 728 + await reader.cancel(); 729 + } catch (e) { 730 + /* stream may already be closed */ 731 + } 732 + } 492 733 } catch (error) { 493 - chrome.runtime 494 - .sendMessage({ 495 - action: "streamDone", 496 - error: error.message, 497 - }) 498 - .catch(() => {}); 734 + if (error.name === "AbortError" || signal.aborted) { 735 + sendStreamDoneToExtension({ cancelled: true }); 736 + return; 737 + } 738 + sendStreamDoneToExtension({ error: error.message }); 499 739 } 500 740 } 501 741 ··· 504 744 model, 505 745 apiKey, 506 746 messages, 507 - tabId, 747 + signal, 508 748 ) { 509 749 let url = baseUrl.replace(/\/$/, ""); 510 750 ··· 514 754 515 755 url = url + "/chat/completions"; 516 756 757 + resetStreamChunkBatching(); 758 + let hitMaxChars = false; 759 + 517 760 try { 518 761 const response = await fetch(url, { 519 762 method: "POST", ··· 527 770 stream: true, 528 771 max_tokens: CONFIG.API.MAX_TOKENS, 529 772 }), 773 + signal, 530 774 }); 531 775 532 776 if (!response.ok) { ··· 556 800 } 557 801 558 802 const reader = response.body.getReader(); 559 - const decoder = new TextDecoder(); 560 - let buffer = ""; 803 + try { 804 + const decoder = new TextDecoder(); 805 + let buffer = ""; 806 + let streamedChars = 0; 807 + const maxChars = CONFIG.API.STREAM_MAX_OUTPUT_CHARS; 561 808 562 - while (true) { 563 - const { done, value } = await reader.read(); 564 - if (done) break; 809 + while (true) { 810 + let readResult; 811 + try { 812 + readResult = await reader.read(); 813 + } catch (readErr) { 814 + if (readErr.name === "AbortError" || signal.aborted) { 815 + sendStreamDoneToExtension({ cancelled: true }); 816 + return; 817 + } 818 + throw readErr; 819 + } 820 + 821 + const { done, value } = readResult; 822 + if (done) break; 565 823 566 - buffer += decoder.decode(value, { stream: true }); 567 - const lines = buffer.split("\n"); 568 - buffer = lines.pop() || ""; 824 + buffer += decoder.decode(value, { stream: true }); 825 + const lines = buffer.split("\n"); 826 + buffer = lines.pop() || ""; 569 827 570 - for (const line of lines) { 571 - if (line.trim() && line.startsWith("data: ")) { 572 - const data = line.slice(6); 573 - if (data === "[DONE]") continue; 574 - try { 575 - const json = JSON.parse(data); 576 - const content = json.choices?.[0]?.delta?.content; 577 - if (content) { 578 - chrome.runtime 579 - .sendMessage({ 580 - action: "streamChunk", 581 - chunk: content, 582 - done: false, 583 - }) 584 - .catch(() => {}); 828 + for (const line of lines) { 829 + if (line.trim() && line.startsWith("data: ")) { 830 + const data = line.slice(6); 831 + if (data === "[DONE]") continue; 832 + try { 833 + const json = JSON.parse(data); 834 + const content = json.choices?.[0]?.delta?.content; 835 + if (content) { 836 + if (streamedChars + content.length > maxChars) { 837 + hitMaxChars = true; 838 + reader.cancel().catch(() => {}); 839 + break; 840 + } 841 + streamedChars += content.length; 842 + queueStreamChunk(content); 843 + } 844 + } catch (e) { 845 + // Skip invalid JSON lines 585 846 } 586 - } catch (e) { 587 - // Skip invalid JSON lines 588 847 } 589 848 } 849 + 850 + if (hitMaxChars) break; 590 851 } 591 - } 592 852 593 - // Streaming complete - send done message 594 - chrome.runtime 595 - .sendMessage({ 596 - action: "streamDone", 597 - }) 598 - .catch(() => {}); 853 + if (hitMaxChars) { 854 + sendStreamDoneToExtension({ truncated: true }); 855 + } else { 856 + sendStreamDoneToExtension(); 857 + } 858 + } finally { 859 + try { 860 + await reader.cancel(); 861 + } catch (e) { 862 + /* stream may already be closed */ 863 + } 864 + } 599 865 } catch (error) { 600 - chrome.runtime 601 - .sendMessage({ 602 - action: "streamDone", 603 - error: error.message, 604 - }) 605 - .catch(() => {}); 866 + if (error.name === "AbortError" || signal.aborted) { 867 + sendStreamDoneToExtension({ cancelled: true }); 868 + return; 869 + } 870 + sendStreamDoneToExtension({ error: error.message }); 606 871 } 607 872 }
+11
scripts/config.js
··· 22 22 DISABLE_THINKING: true, 23 23 AUTO_SUMMARIZE: false, 24 24 MAX_TOKENS: 2048, 25 + /** Hard cap on streamed assistant text (chars) to stop runaway output if the model misbehaves. */ 26 + STREAM_MAX_OUTPUT_CHARS: 16000, 27 + /** Non-streaming follow-up suggestion calls use a small budget so nonsense stays short. */ 28 + SUGGESTIONS_MAX_TOKENS: 256, 29 + /** Only parse suggestions from the first N chars of the model reply. */ 30 + SUGGESTIONS_MAX_PARSE_CHARS: 1000, 25 31 TEMPERATURE: 0.7, 26 32 TIMEOUT_MS: 30000, 33 + /** 34 + * Ollama only: how long the model stays loaded after each request (e.g. "0", "15s", "1m"). 35 + * Must match an option value in settings; "0" = unload immediately. 36 + */ 37 + KEEP_ALIVE: "0", 27 38 }, 28 39 29 40 // Content extraction settings