feat: Ollama keep_alive setting, stream stop, and API hardening · ellioth.co/summarizer-extension@dff3545

+46 -1

options/options.html

··· 234 234 </p> 235 235 </div> 236 236 237 + <div class="form-group" id="keep-alive-group"> 238 + <label for="keep-alive">Model keep-alive (Ollama)</label> 239 + <select id="keep-alive"> 240 + <option value=""> 241 + Ollama default — don’t override; server picks keep-alive 242 + </option> 243 + <option value="3s"> 244 + 3s — lowest RAM; enough for summary then AI suggestion 245 + chips if you’re quick 246 + </option> 247 + <option value="5s"> 248 + 5s — same idea as 3s, extra slack if the model or disk 249 + is slow between those calls 250 + </option> 251 + <option value="10s"> 252 + 10s — summary + suggestions with time to read before 253 + clicking a follow-up 254 + </option> 255 + <option value="15s"> 256 + 15s — relaxed pace between summary, suggestions, and 257 + your first chat question 258 + </option> 259 + <option value="30s"> 260 + 30s — browse the summary a bit before asking more 261 + </option> 262 + <option value="1m">1 minute</option> 263 + <option value="5m">5 minutes</option> 264 + <option value="15m">15 minutes</option> 265 + </select> 266 + <p class="help"> 267 + How long Ollama keeps the model loaded after a request. 268 + Shorter values free RAM sooner; the next call may wait 269 + while the model loads again. 270 + <strong>Only applies to Ollama native API.</strong> 271 + </p> 272 + </div> 273 + 237 274 <div class="form-group"> 238 275 <label for="auto-summarize"> 239 276 <input type="checkbox" id="auto-summarize" /> ··· 261 298 > 262 299 </p> 263 300 </div> 301 + 302 + <p class="help" style="margin-bottom: 12px"> 303 + Settings save automatically when you change them. You can 304 + still use Save for an explicit confirmation. 305 + </p> 264 306 265 307 <div class="buttons"> 266 308 <button type="submit" class="btn-primary">Save</button> ··· 292 334 </li> 293 335 <li>Pull a model: <code>ollama pull gemma3:1b</code></li> 294 336 <li>Run: <code>OLLAMA_ORIGINS=* ollama serve</code></li> 295 - <li>Keep default settings and click "Test connection"</li> 337 + <li> 338 + Keep default settings and use 339 + <strong>Test connection</strong> (no need to click Save) 340 + </li> 296 341 </ol> 297 342 </div> 298 343

+89 -11

options/options.js

··· 14 14 const apiKeyInput = document.getElementById("api-key"); 15 15 const disableThinkingInput = document.getElementById("disable-thinking"); 16 16 const thinkingModeGroup = document.getElementById("thinking-mode-group"); 17 + const keepAliveGroup = document.getElementById("keep-alive-group"); 18 + const keepAliveInput = document.getElementById("keep-alive"); 17 19 const autoSummarizeInput = document.getElementById("auto-summarize"); 18 20 const statusDiv = document.getElementById("status"); 19 21 const testBtn = document.getElementById("test-connection"); 20 22 const resetBtn = document.getElementById("reset-defaults"); 23 + 24 + let settingsHydrating = false; 25 + let autoSaveTimer = null; 26 + const AUTO_SAVE_DEBOUNCE_MS = 450; 21 27 22 28 // Build defaultSettings from centralized CONFIG 23 29 const defaultSettings = { ··· 29 35 apiKey: CONFIG.API.KEY, 30 36 disableThinking: CONFIG.API.DISABLE_THINKING, 31 37 autoSummarize: CONFIG.API.AUTO_SUMMARIZE, 38 + keepAlive: CONFIG.API.KEEP_ALIVE, 32 39 }; 33 40 34 41 // Use accent presets from CONFIG ··· 46 53 if (apiModeInput.value === "ollama") { 47 54 apiBaseUrlInput.placeholder = "http://localhost:11434"; 48 55 thinkingModeGroup.style.display = "block"; 56 + keepAliveGroup.style.display = "block"; 49 57 } else { 50 58 apiBaseUrlInput.placeholder = "http://localhost:11434/v1"; 51 59 thinkingModeGroup.style.display = "none"; 60 + keepAliveGroup.style.display = "none"; 52 61 } 62 + scheduleAutoSaveImmediate(); 53 63 }); 54 64 55 65 accentSwatchButtons.forEach((btn) => { ··· 57 67 setSelectedAccentPreset(btn.dataset.accentPreset); 58 68 const color = resolveAccentColor(); 59 69 if (color) applyAccentColor(color); 70 + scheduleAutoSaveImmediate(); 60 71 }); 61 72 }); 62 73 ··· 65 76 if (accentPresetInput.value === "custom" && color) { 66 77 applyAccentColor(color); 67 78 } 79 + scheduleAutoSaveDebounced(); 68 80 }); 69 81 82 + [apiBaseUrlInput, modelInput, apiKeyInput].forEach((el) => { 83 + el.addEventListener("input", () => scheduleAutoSaveDebounced()); 84 + }); 85 + 86 + disableThinkingInput.addEventListener("change", () => 87 + scheduleAutoSaveImmediate(), 88 + ); 89 + autoSummarizeInput.addEventListener("change", () => 90 + scheduleAutoSaveImmediate(), 91 + ); 92 + keepAliveInput.addEventListener("change", () => scheduleAutoSaveImmediate()); 93 + 70 94 themeBtn.addEventListener("click", async () => { 71 95 const idx = THEMES.indexOf(currentTheme); 72 96 currentTheme = THEMES[(idx + 1) % THEMES.length]; ··· 82 106 } 83 107 }); 84 108 85 - // Save settings 86 - form.addEventListener("submit", async (e) => { 87 - e.preventDefault(); 88 - 89 - const settings = { 109 + function collectSettingsFromForm() { 110 + return { 90 111 apiMode: apiModeInput.value, 91 112 apiBaseUrl: apiBaseUrlInput.value.trim() || CONFIG.API.BASE_URL, 92 113 model: modelInput.value.trim() || CONFIG.API.MODEL, ··· 95 116 apiKey: apiKeyInput.value.trim(), 96 117 disableThinking: disableThinkingInput.checked, 97 118 autoSummarize: autoSummarizeInput.checked, 119 + keepAlive: keepAliveInput.value, 98 120 }; 121 + } 122 + 123 + function scheduleAutoSaveDebounced() { 124 + if (settingsHydrating) return; 125 + clearTimeout(autoSaveTimer); 126 + autoSaveTimer = setTimeout(() => { 127 + autoSaveTimer = null; 128 + void persistSettings({ auto: true }); 129 + }, AUTO_SAVE_DEBOUNCE_MS); 130 + } 131 + 132 + function scheduleAutoSaveImmediate() { 133 + if (settingsHydrating) return; 134 + clearTimeout(autoSaveTimer); 135 + autoSaveTimer = null; 136 + void persistSettings({ auto: true }); 137 + } 138 + 139 + /** 140 + * @param {{ auto?: boolean, successMessage?: string }} opts 141 + * auto: true = silent skip when custom accent incomplete; short "Saved" toast 142 + */ 143 + async function persistSettings(opts = {}) { 144 + const { auto = false, successMessage } = opts; 145 + if (settingsHydrating) return; 146 + 147 + const settings = collectSettingsFromForm(); 99 148 100 149 if (!settings.accentColor) { 150 + if (auto) return; 101 151 showStatus( 102 152 "❌ Custom accent color must be a valid hex code (e.g. #F15B2F).", 103 153 "error", ··· 107 157 108 158 try { 109 159 await chrome.storage.sync.set(settings); 110 - showStatus("✅ Settings saved successfully!", "success"); 160 + const msg = 161 + successMessage ?? 162 + (auto ? "Saved" : "✅ Settings saved successfully!"); 163 + showStatus(msg, "success", auto ? 2500 : undefined); 111 164 } catch (error) { 112 165 showStatus("❌ Error saving settings: " + error.message, "error"); 113 166 } 167 + } 168 + 169 + // Save settings (explicit submit — same as auto-save) 170 + form.addEventListener("submit", async (e) => { 171 + e.preventDefault(); 172 + clearTimeout(autoSaveTimer); 173 + autoSaveTimer = null; 174 + await persistSettings({ auto: false }); 114 175 }); 115 176 116 177 // Test connection ··· 121 182 model: modelInput.value.trim() || CONFIG.API.MODEL, 122 183 apiKey: apiKeyInput.value.trim(), 123 184 disableThinking: disableThinkingInput.checked, 185 + keepAlive: keepAliveInput.value, 124 186 }; 125 187 126 188 showStatus("🔄 Testing connection...", "loading"); ··· 139 201 ], 140 202 apiMode: settings.apiMode, 141 203 disableThinking: settings.disableThinking, 204 + keepAlive: settings.apiMode === "ollama" ? settings.keepAlive : "", 142 205 }, 143 206 }); 144 207 ··· 180 243 }); 181 244 182 245 // Reset to defaults 183 - resetBtn.addEventListener("click", () => { 246 + resetBtn.addEventListener("click", async () => { 247 + settingsHydrating = true; 184 248 apiModeInput.value = CONFIG.API.MODE; 185 249 apiBaseUrlInput.value = CONFIG.API.BASE_URL; 186 250 modelInput.value = CONFIG.API.MODEL; ··· 190 254 apiKeyInput.value = CONFIG.API.KEY; 191 255 disableThinkingInput.checked = CONFIG.API.DISABLE_THINKING; 192 256 autoSummarizeInput.checked = false; 193 - // Show/hide thinking mode group based on API mode 257 + keepAliveInput.value = CONFIG.API.KEEP_ALIVE; 194 258 thinkingModeGroup.style.display = 195 259 CONFIG.API.MODE === "ollama" ? "block" : "none"; 196 - showStatus("Settings reset to defaults. Click Save to apply.", "success"); 260 + keepAliveGroup.style.display = 261 + CONFIG.API.MODE === "ollama" ? "block" : "none"; 262 + settingsHydrating = false; 263 + await persistSettings({ 264 + auto: false, 265 + successMessage: "Defaults restored and saved.", 266 + }); 197 267 }); 198 268 199 269 // Keyboard shortcuts link ··· 234 304 235 305 // Load settings 236 306 async function loadSettings() { 307 + settingsHydrating = true; 237 308 try { 238 309 const settings = await chrome.storage.sync.get(defaultSettings); 239 310 ··· 250 321 apiKeyInput.value = settings.apiKey; 251 322 disableThinkingInput.checked = settings.disableThinking; 252 323 autoSummarizeInput.checked = settings.autoSummarize || false; 324 + keepAliveInput.value = 325 + settings.keepAlive != null ? settings.keepAlive : CONFIG.API.KEEP_ALIVE; 253 326 // Show/hide thinking mode group based on API mode 254 327 thinkingModeGroup.style.display = 255 328 settings.apiMode === "ollama" ? "block" : "none"; 329 + keepAliveGroup.style.display = 330 + settings.apiMode === "ollama" ? "block" : "none"; 256 331 } catch (error) { 257 332 showStatus("Error loading settings: " + error.message, "error"); 333 + } finally { 334 + settingsHydrating = false; 258 335 } 259 336 } 260 337 ··· 348 425 } 349 426 350 427 // Show status message 351 - function showStatus(message, type) { 428 + function showStatus(message, type, dismissMs) { 352 429 // Replace newlines with <br> for display 353 430 statusDiv.innerHTML = message.replace(/\n/g, "<br>"); 354 431 statusDiv.className = "status " + type; 355 432 356 433 if (type !== "loading") { 434 + const ms = dismissMs ?? 8000; 357 435 setTimeout(() => { 358 436 statusDiv.className = "status"; 359 - }, 8000); 437 + }, ms); 360 438 } 361 439 }

+8

popup/popup.css

···

+137 -28

popup/popup.js

······················································

+379 -114

scripts/background.js

··· 13 13 const CONTENT_CACHE_PREFIX = CONFIG.CACHE.CONTENT; 14 14 const CHAT_CACHE_PREFIX = CONFIG.CACHE.CHAT; 15 15 16 + /** 17 + * Per-tab inflight Ollama/API state: one slot for streaming, one for non-stream (chat/suggestions). 18 + * - New stream aborts everything (user superseded or regenerate). 19 + * - New non-stream only aborts prior non-stream, never an active stream (avoids races with post-stream work). 20 + */ 21 + const inflightByTab = new Map(); 22 + 23 + function abortAllInflightForTab(tabId) { 24 + if (tabId == null) return; 25 + const e = inflightByTab.get(tabId); 26 + if (!e) return; 27 + if (e.stream) e.stream.abort(); 28 + if (e.other) e.other.abort(); 29 + inflightByTab.delete(tabId); 30 + } 31 + 32 + function takeStreamSlot(tabId) { 33 + if (tabId == null) return new AbortController(); 34 + abortAllInflightForTab(tabId); 35 + const c = new AbortController(); 36 + inflightByTab.set(tabId, { stream: c }); 37 + return c; 38 + } 39 + 40 + function releaseStreamSlot(tabId, controller) { 41 + if (tabId == null) return; 42 + const e = inflightByTab.get(tabId); 43 + if (e && e.stream === controller) { 44 + if (e.other) { 45 + inflightByTab.set(tabId, { other: e.other }); 46 + } else { 47 + inflightByTab.delete(tabId); 48 + } 49 + } 50 + } 51 + 52 + function takeOtherSlot(tabId) { 53 + if (tabId == null) return new AbortController(); 54 + const e = inflightByTab.get(tabId) || {}; 55 + if (e.other) e.other.abort(); 56 + const c = new AbortController(); 57 + inflightByTab.set(tabId, { ...e, other: c }); 58 + return c; 59 + } 60 + 61 + function releaseOtherSlot(tabId, controller) { 62 + if (tabId == null) return; 63 + const e = inflightByTab.get(tabId); 64 + if (e && e.other === controller) { 65 + if (e.stream) { 66 + inflightByTab.set(tabId, { stream: e.stream }); 67 + } else { 68 + inflightByTab.delete(tabId); 69 + } 70 + } 71 + } 72 + 73 + /** Coalesce tiny stream tokens into fewer runtime messages (easier on SW + popup; less backpressure). */ 74 + const CHUNK_BATCH_MAX_CHARS = 512; 75 + const CHUNK_BATCH_MS = 20; 76 + 77 + let streamChunkBuffer = ""; 78 + let streamChunkTimer = null; 79 + 80 + function resetStreamChunkBatching() { 81 + if (streamChunkTimer) { 82 + clearTimeout(streamChunkTimer); 83 + streamChunkTimer = null; 84 + } 85 + streamChunkBuffer = ""; 86 + } 87 + 88 + function flushStreamChunks() { 89 + streamChunkTimer = null; 90 + if (!streamChunkBuffer) return; 91 + const chunk = streamChunkBuffer; 92 + streamChunkBuffer = ""; 93 + chrome.runtime 94 + .sendMessage({ action: "streamChunk", chunk, done: false }) 95 + .catch(() => {}); 96 + } 97 + 98 + function queueStreamChunk(piece) { 99 + streamChunkBuffer += piece; 100 + if (streamChunkBuffer.length >= CHUNK_BATCH_MAX_CHARS) { 101 + if (streamChunkTimer) { 102 + clearTimeout(streamChunkTimer); 103 + streamChunkTimer = null; 104 + } 105 + flushStreamChunks(); 106 + } else if (!streamChunkTimer) { 107 + streamChunkTimer = setTimeout(flushStreamChunks, CHUNK_BATCH_MS); 108 + } 109 + } 110 + 111 + function finalizeStreamChunkBatching() { 112 + if (streamChunkTimer) { 113 + clearTimeout(streamChunkTimer); 114 + streamChunkTimer = null; 115 + } 116 + flushStreamChunks(); 117 + } 118 + 119 + /** Ollama /api/generate keep_alive — null means omit so the server uses its default. */ 120 + function normalizeOllamaKeepAlive(keepAlive) { 121 + if (keepAlive == null) return null; 122 + const s = String(keepAlive).trim(); 123 + return s || null; 124 + } 125 + 126 + function attachOllamaKeepAlive(requestBody, keepAlive) { 127 + const v = normalizeOllamaKeepAlive(keepAlive); 128 + if (v) requestBody.keep_alive = v; 129 + } 130 + 16 131 // ── Prompt templates from CONFIG ───────────────────────────────────────── 17 132 const OLLAMA_CONTEXT_TEMPLATE = CONFIG.OLLAMA.CONTEXT_TEMPLATE; 18 133 const OLLAMA_SINGLE_MESSAGE_TEMPLATE = CONFIG.OLLAMA.SINGLE_MESSAGE_TEMPLATE; ··· 29 144 apiKey: CONFIG.API.KEY, 30 145 disableThinking: CONFIG.API.DISABLE_THINKING, 31 146 autoSummarize: CONFIG.API.AUTO_SUMMARIZE, 147 + keepAlive: CONFIG.API.KEEP_ALIVE, 32 148 }); 33 149 } 34 150 }); ··· 145 261 const { tabId } = request; 146 262 handleStreamChatRequest(request.data, tabId).catch((error) => { 147 263 console.error("Stream chat error:", error); 148 - chrome.tabs.sendMessage(tabId, { 149 - action: "streamDone", 150 - error: error.message, 151 - }); 264 + sendStreamDoneToExtension({ error: error.message }); 152 265 }); 153 - return false; // We handle the response ourselves via sendMessage to tab 266 + return false; // Popup receives streamChunk/streamDone via runtime messages 267 + } 268 + 269 + if (request.action === "cancelStream") { 270 + const tabId = request.tabId; 271 + if (tabId != null) { 272 + abortAllInflightForTab(tabId); 273 + } 274 + return false; 154 275 } 155 276 156 277 if (request.action === "testOllama") { ··· 210 331 } 211 332 212 333 async function handleChatRequest(data) { 213 - const { apiBaseUrl, model, apiKey, messages, apiMode, disableThinking } = 214 - data; 334 + const { 335 + tabId, 336 + apiBaseUrl, 337 + model, 338 + apiKey, 339 + messages, 340 + apiMode, 341 + disableThinking, 342 + maxOutputTokens, 343 + keepAlive, 344 + } = data; 215 345 216 - let useNativeOllama = apiMode === "ollama"; 346 + const tokenCap = 347 + typeof maxOutputTokens === "number" && maxOutputTokens > 0 348 + ? maxOutputTokens 349 + : CONFIG.API.MAX_TOKENS; 217 350 218 - if (useNativeOllama) { 219 - return await callOllamaNative(apiBaseUrl, model, messages, disableThinking); 220 - } else { 221 - return await callOpenAICompatible(apiBaseUrl, model, apiKey, messages); 351 + const controller = takeOtherSlot(tabId); 352 + const signal = controller.signal; 353 + 354 + try { 355 + const useNativeOllama = apiMode === "ollama"; 356 + 357 + if (useNativeOllama) { 358 + return await callOllamaNative( 359 + apiBaseUrl, 360 + model, 361 + messages, 362 + disableThinking, 363 + tokenCap, 364 + signal, 365 + keepAlive, 366 + ); 367 + } else { 368 + return await callOpenAICompatible( 369 + apiBaseUrl, 370 + model, 371 + apiKey, 372 + messages, 373 + tokenCap, 374 + signal, 375 + ); 376 + } 377 + } catch (error) { 378 + if (error.name === "AbortError" || signal.aborted) { 379 + throw new Error("Request cancelled"); 380 + } 381 + throw error; 382 + } finally { 383 + releaseOtherSlot(tabId, controller); 222 384 } 223 385 } 224 386 225 387 async function handleStreamChatRequest(data, tabId) { 226 - const { apiBaseUrl, model, apiKey, messages, apiMode, disableThinking } = 227 - data; 388 + const { 389 + apiBaseUrl, 390 + model, 391 + apiKey, 392 + messages, 393 + apiMode, 394 + disableThinking, 395 + keepAlive, 396 + } = data; 228 397 229 - let useNativeOllama = apiMode === "ollama"; 398 + const controller = takeStreamSlot(tabId); 399 + const signal = controller.signal; 400 + 401 + try { 402 + const useNativeOllama = apiMode === "ollama"; 230 403 231 - if (useNativeOllama) { 232 - await callOllamaNativeStream( 233 - apiBaseUrl, 234 - model, 235 - messages, 236 - disableThinking, 237 - tabId, 238 - ); 239 - } else { 240 - await callOpenAICompatibleStream( 241 - apiBaseUrl, 242 - model, 243 - apiKey, 244 - messages, 245 - tabId, 246 - ); 404 + if (useNativeOllama) { 405 + await callOllamaNativeStream( 406 + apiBaseUrl, 407 + model, 408 + messages, 409 + disableThinking, 410 + signal, 411 + keepAlive, 412 + ); 413 + } else { 414 + await callOpenAICompatibleStream( 415 + apiBaseUrl, 416 + model, 417 + apiKey, 418 + messages, 419 + signal, 420 + ); 421 + } 422 + } finally { 423 + releaseStreamSlot(tabId, controller); 247 424 } 248 425 } 249 426 250 - async function callOllamaNative(baseUrl, model, messages, disableThinking) { 427 + async function callOllamaNative( 428 + baseUrl, 429 + model, 430 + messages, 431 + disableThinking, 432 + maxTokens = CONFIG.API.MAX_TOKENS, 433 + signal, 434 + keepAlive, 435 + ) { 251 436 // Merge all system messages into one so none are dropped 252 437 const systemMsgs = messages.filter((m) => m.role === "system"); 253 438 const systemContent = systemMsgs.map((m) => m.content).join("\n\n"); ··· 279 464 stream: false, 280 465 options: { 281 466 temperature: CONFIG.API.TEMPERATURE, 282 - num_predict: CONFIG.API.MAX_TOKENS, 467 + num_predict: maxTokens, 283 468 }, 284 469 }; 285 470 ··· 289 474 requestBody.think = false; 290 475 } 291 476 292 - const response = await fetch(url, { 477 + attachOllamaKeepAlive(requestBody, keepAlive); 478 + 479 + const fetchOpts = { 293 480 method: "POST", 294 481 headers: { 295 482 "Content-Type": "application/json", 483 + Connection: "close", 296 484 }, 297 485 body: JSON.stringify(requestBody), 298 - }); 486 + }; 487 + if (signal) fetchOpts.signal = signal; 488 + 489 + const response = await fetch(url, fetchOpts); 299 490 300 491 if (!response.ok) { 301 492 const text = await response.text(); ··· 329 520 }; 330 521 } 331 522 332 - async function callOpenAICompatible(baseUrl, model, apiKey, messages) { 523 + async function callOpenAICompatible( 524 + baseUrl, 525 + model, 526 + apiKey, 527 + messages, 528 + maxTokens = CONFIG.API.MAX_TOKENS, 529 + signal, 530 + ) { 333 531 let url = baseUrl.replace(/\/$/, ""); 334 532 335 533 if (!url.includes("/v1")) { ··· 338 536 339 537 url = url + "/chat/completions"; 340 538 341 - const response = await fetch(url, { 539 + const fetchOpts = { 342 540 method: "POST", 343 541 headers: { 344 542 "Content-Type": "application/json", ··· 348 546 model: model, 349 547 messages: messages, 350 548 stream: false, 351 - max_tokens: CONFIG.API.MAX_TOKENS, 549 + max_tokens: maxTokens, 352 550 }), 353 - }); 551 + }; 552 + if (signal) fetchOpts.signal = signal; 553 + 554 + const response = await fetch(url, fetchOpts); 354 555 355 556 if (!response.ok) { 356 557 const text = await response.text(); ··· 382 583 return await response.json(); 383 584 } 384 585 586 + function sendStreamDoneToExtension(payload = {}) { 587 + if (payload.error) { 588 + resetStreamChunkBatching(); 589 + } else { 590 + finalizeStreamChunkBatching(); 591 + } 592 + chrome.runtime.sendMessage({ action: "streamDone", ...payload }).catch(() => {}); 593 + } 594 + 385 595 async function callOllamaNativeStream( 386 596 baseUrl, 387 597 model, 388 598 messages, 389 599 disableThinking, 390 - tabId, 600 + signal, 601 + keepAlive, 391 602 ) { 392 603 const systemMsgs = messages.filter((m) => m.role === "system"); 393 604 const systemContent = systemMsgs.map((m) => m.content).join("\n\n"); ··· 425 636 requestBody.think = false; 426 637 } 427 638 639 + attachOllamaKeepAlive(requestBody, keepAlive); 640 + 641 + resetStreamChunkBatching(); 642 + let hitMaxChars = false; 643 + 428 644 try { 429 645 const response = await fetch(url, { 430 646 method: "POST", 431 647 headers: { 432 648 "Content-Type": "application/json", 649 + Connection: "close", 433 650 }, 434 651 body: JSON.stringify(requestBody), 652 + signal, 435 653 }); 436 654 437 655 if (!response.ok) { ··· 452 670 } 453 671 454 672 const reader = response.body.getReader(); 455 - const decoder = new TextDecoder(); 456 - let buffer = ""; 673 + try { 674 + const decoder = new TextDecoder(); 675 + let buffer = ""; 676 + let streamedChars = 0; 677 + const maxChars = CONFIG.API.STREAM_MAX_OUTPUT_CHARS; 678 + 679 + while (true) { 680 + let readResult; 681 + try { 682 + readResult = await reader.read(); 683 + } catch (readErr) { 684 + if (readErr.name === "AbortError" || signal.aborted) { 685 + sendStreamDoneToExtension({ cancelled: true }); 686 + return; 687 + } 688 + throw readErr; 689 + } 457 690 458 - while (true) { 459 - const { done, value } = await reader.read(); 460 - if (done) break; 691 + const { done, value } = readResult; 692 + if (done) break; 461 693 462 - buffer += decoder.decode(value, { stream: true }); 463 - const lines = buffer.split("\n"); 464 - buffer = lines.pop() || ""; 694 + buffer += decoder.decode(value, { stream: true }); 695 + const lines = buffer.split("\n"); 696 + buffer = lines.pop() || ""; 465 697 466 - for (const line of lines) { 467 - if (line.trim()) { 468 - try { 469 - const json = JSON.parse(line); 470 - if (json.response) { 471 - chrome.runtime 472 - .sendMessage({ 473 - action: "streamChunk", 474 - chunk: json.response, 475 - done: false, 476 - }) 477 - .catch(() => {}); 698 + for (const line of lines) { 699 + if (line.trim()) { 700 + try { 701 + const json = JSON.parse(line); 702 + if (json.response) { 703 + const piece = json.response; 704 + if (streamedChars + piece.length > maxChars) { 705 + hitMaxChars = true; 706 + reader.cancel().catch(() => {}); 707 + break; 708 + } 709 + streamedChars += piece.length; 710 + queueStreamChunk(piece); 711 + } 712 + } catch (e) { 713 + // Skip invalid JSON lines 478 714 } 479 - } catch (e) { 480 - // Skip invalid JSON lines 481 715 } 482 716 } 717 + 718 + if (hitMaxChars) break; 483 719 } 484 - } 485 720 486 - // Streaming complete - send done message 487 - chrome.runtime 488 - .sendMessage({ 489 - action: "streamDone", 490 - }) 491 - .catch(() => {}); 721 + if (hitMaxChars) { 722 + sendStreamDoneToExtension({ truncated: true }); 723 + } else { 724 + sendStreamDoneToExtension(); 725 + } 726 + } finally { 727 + try { 728 + await reader.cancel(); 729 + } catch (e) { 730 + /* stream may already be closed */ 731 + } 732 + } 492 733 } catch (error) { 493 - chrome.runtime 494 - .sendMessage({ 495 - action: "streamDone", 496 - error: error.message, 497 - }) 498 - .catch(() => {}); 734 + if (error.name === "AbortError" || signal.aborted) { 735 + sendStreamDoneToExtension({ cancelled: true }); 736 + return; 737 + } 738 + sendStreamDoneToExtension({ error: error.message }); 499 739 } 500 740 } 501 741 ··· 504 744 model, 505 745 apiKey, 506 746 messages, 507 - tabId, 747 + signal, 508 748 ) { 509 749 let url = baseUrl.replace(/\/$/, ""); 510 750 ··· 514 754 515 755 url = url + "/chat/completions"; 516 756 757 + resetStreamChunkBatching(); 758 + let hitMaxChars = false; 759 + 517 760 try { 518 761 const response = await fetch(url, { 519 762 method: "POST", ··· 527 770 stream: true, 528 771 max_tokens: CONFIG.API.MAX_TOKENS, 529 772 }), 773 + signal, 530 774 }); 531 775 532 776 if (!response.ok) { ··· 556 800 } 557 801 558 802 const reader = response.body.getReader(); 559 - const decoder = new TextDecoder(); 560 - let buffer = ""; 803 + try { 804 + const decoder = new TextDecoder(); 805 + let buffer = ""; 806 + let streamedChars = 0; 807 + const maxChars = CONFIG.API.STREAM_MAX_OUTPUT_CHARS; 561 808 562 - while (true) { 563 - const { done, value } = await reader.read(); 564 - if (done) break; 809 + while (true) { 810 + let readResult; 811 + try { 812 + readResult = await reader.read(); 813 + } catch (readErr) { 814 + if (readErr.name === "AbortError" || signal.aborted) { 815 + sendStreamDoneToExtension({ cancelled: true }); 816 + return; 817 + } 818 + throw readErr; 819 + } 820 + 821 + const { done, value } = readResult; 822 + if (done) break; 565 823 566 - buffer += decoder.decode(value, { stream: true }); 567 - const lines = buffer.split("\n"); 568 - buffer = lines.pop() || ""; 824 + buffer += decoder.decode(value, { stream: true }); 825 + const lines = buffer.split("\n"); 826 + buffer = lines.pop() || ""; 569 827 570 - for (const line of lines) { 571 - if (line.trim() && line.startsWith("data: ")) { 572 - const data = line.slice(6); 573 - if (data === "[DONE]") continue; 574 - try { 575 - const json = JSON.parse(data); 576 - const content = json.choices?.[0]?.delta?.content; 577 - if (content) { 578 - chrome.runtime 579 - .sendMessage({ 580 - action: "streamChunk", 581 - chunk: content, 582 - done: false, 583 - }) 584 - .catch(() => {}); 828 + for (const line of lines) { 829 + if (line.trim() && line.startsWith("data: ")) { 830 + const data = line.slice(6); 831 + if (data === "[DONE]") continue; 832 + try { 833 + const json = JSON.parse(data); 834 + const content = json.choices?.[0]?.delta?.content; 835 + if (content) { 836 + if (streamedChars + content.length > maxChars) { 837 + hitMaxChars = true; 838 + reader.cancel().catch(() => {}); 839 + break; 840 + } 841 + streamedChars += content.length; 842 + queueStreamChunk(content); 843 + } 844 + } catch (e) { 845 + // Skip invalid JSON lines 585 846 } 586 - } catch (e) { 587 - // Skip invalid JSON lines 588 847 } 589 848 } 849 + 850 + if (hitMaxChars) break; 590 851 } 591 - } 592 852 593 - // Streaming complete - send done message 594 - chrome.runtime 595 - .sendMessage({ 596 - action: "streamDone", 597 - }) 598 - .catch(() => {}); 853 + if (hitMaxChars) { 854 + sendStreamDoneToExtension({ truncated: true }); 855 + } else { 856 + sendStreamDoneToExtension(); 857 + } 858 + } finally { 859 + try { 860 + await reader.cancel(); 861 + } catch (e) { 862 + /* stream may already be closed */ 863 + } 864 + } 599 865 } catch (error) { 600 - chrome.runtime 601 - .sendMessage({ 602 - action: "streamDone", 603 - error: error.message, 604 - }) 605 - .catch(() => {}); 866 + if (error.name === "AbortError" || signal.aborted) { 867 + sendStreamDoneToExtension({ cancelled: true }); 868 + return; 869 + } 870 + sendStreamDoneToExtension({ error: error.message }); 606 871 } 607 872 }

+11

scripts/config.js

··· 22 22 DISABLE_THINKING: true, 23 23 AUTO_SUMMARIZE: false, 24 24 MAX_TOKENS: 2048, 25 + /** Hard cap on streamed assistant text (chars) to stop runaway output if the model misbehaves. */ 26 + STREAM_MAX_OUTPUT_CHARS: 16000, 27 + /** Non-streaming follow-up suggestion calls use a small budget so nonsense stays short. */ 28 + SUGGESTIONS_MAX_TOKENS: 256, 29 + /** Only parse suggestions from the first N chars of the model reply. */ 30 + SUGGESTIONS_MAX_PARSE_CHARS: 1000, 25 31 TEMPERATURE: 0.7, 26 32 TIMEOUT_MS: 30000, 33 + /** 34 + * Ollama only: how long the model stays loaded after each request (e.g. "0", "15s", "1m"). 35 + * Must match an option value in settings; "0" = unload immediately. 36 + */ 37 + KEEP_ALIVE: "0", 27 38 }, 28 39 29 40 // Content extraction settings

Configure Feed

Configure Feed