my harness for niri
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at main 1307 lines 47 kB view raw
1import fs from "fs/promises" 2import path from "path" 3import { fileURLToPath } from "url" 4import OpenAI from "openai" 5import { imageRootForModelInput } from "../container/index.js" 6import type { Message } from "../types.js" 7import type { ImageDetail, ToolArgs } from "./types.js" 8 9const PROJECT_ROOT = path.resolve(fileURLToPath(import.meta.url), "../../..") 10const SESSION_FILE = path.join(PROJECT_ROOT, "session.json") 11 12export const TOKEN_NUDGE_THRESHOLD = parseInt(process.env.TOKEN_NUDGE_THRESHOLD ?? "120000") 13export const FALLBACK_TOKEN_NUDGE_THRESHOLD = parseInt(process.env.FALLBACK_TOKEN_NUDGE_THRESHOLD ?? "50000") 14export const CONTEXT_COMPACT_TRIGGER_TOKENS = parseInt(process.env.CONTEXT_COMPACT_TRIGGER_TOKENS ?? "90000") 15 16const NIRI_ENV = (process.env.NIRI_ENV ?? "default").trim().toLowerCase() 17export const USE_FALLBACK = NIRI_ENV === "local" 18 19export const API_BASE = process.env.OPENAI_BASE_URL ?? "https://api.openai.com/v1" 20export const MODEL = process.env.MODEL ?? "" 21export const PRIMARY_PROVIDER_REQUIRES_REASONING_REPLAY = 22 API_BASE.toLowerCase().includes("deepseek") || MODEL.toLowerCase().includes("deepseek") 23const DEFAULT_FALLBACK_BASE = "http://localhost:1234/v1" 24const isLikelyLocalBase = (baseUrl: string): boolean => { 25 const lowered = baseUrl.trim().toLowerCase() 26 return lowered.includes("localhost") || lowered.includes("127.0.0.1") 27} 28const parseBooleanEnv = (value: string | undefined, fallback: boolean): boolean => { 29 if (typeof value !== "string") return fallback 30 const normalized = value.trim().toLowerCase() 31 if (!normalized) return fallback 32 if (normalized === "true" || normalized === "1" || normalized === "yes" || normalized === "on") return true 33 if (normalized === "false" || normalized === "0" || normalized === "no" || normalized === "off") return false 34 return fallback 35} 36 37/** Controls whether model reasoning/thinking is requested and streamed to clients. */ 38export const ENABLE_THINKING = parseBooleanEnv(process.env.ENABLE_THINKING, true) 39const parseToolChoiceEnv = (value: string | undefined, fallback: "required" | "auto" | "none"): "required" | "auto" | "none" => { 40 if (typeof value !== "string") return fallback 41 const normalized = value.trim().toLowerCase() 42 if (normalized === "required" || normalized === "auto" || normalized === "none") return normalized 43 return fallback 44} 45 46export const FALLBACK_BASE = 47 process.env.FALLBACK_OPENAI_BASE_URL ?? process.env.OPENROUTER_BASE_URL ?? process.env.LMSTUDIO_BASE_URL ?? DEFAULT_FALLBACK_BASE 48export const FALLBACK_MODEL = 49 process.env.FALLBACK_MODEL ?? process.env.OPENROUTER_MODEL ?? process.env.LMSTUDIO_MODEL ?? "zai-org/glm-4.7-flash" 50export const FALLBACK_PROVIDER_REQUIRES_REASONING_REPLAY = 51 FALLBACK_BASE.toLowerCase().includes("deepseek") || FALLBACK_MODEL.toLowerCase().includes("deepseek") 52export const SUMMARY_BASE = 53 process.env.SUMMARY_OPENAI_BASE_URL ?? process.env.SUMMARY_BASE_URL ?? "" 54export const SUMMARY_MODEL = process.env.SUMMARY_MODEL ?? "" 55export const PRIMARY_TOOL_CHOICE = parseToolChoiceEnv(process.env.PRIMARY_TOOL_CHOICE ?? process.env.TOOL_CHOICE, "auto") 56export const FALLBACK_TOOL_CHOICE = parseToolChoiceEnv(process.env.FALLBACK_TOOL_CHOICE, "auto") 57const FALLBACK_N_CTX = parseInt(process.env.FALLBACK_N_CTX ?? process.env.LMSTUDIO_N_CTX ?? "4096") 58const FALLBACK_CONTEXT_MARGIN = parseInt(process.env.FALLBACK_CONTEXT_MARGIN ?? process.env.LMSTUDIO_CONTEXT_MARGIN ?? "256") 59const FALLBACK_HARD_OVERFLOW_TOKENS = parseInt( 60 process.env.FALLBACK_HARD_OVERFLOW_TOKENS ?? process.env.LMSTUDIO_HARD_OVERFLOW_TOKENS ?? "1024", 61) 62const FALLBACK_ENFORCE_CONTEXT_LIMIT = parseBooleanEnv( 63 process.env.FALLBACK_ENFORCE_CONTEXT_LIMIT, 64 isLikelyLocalBase(FALLBACK_BASE), 65) 66 67const fallbackApiKey = 68 process.env.FALLBACK_OPENAI_API_KEY ?? 69 process.env.OPENROUTER_API_KEY ?? 70 process.env.LMSTUDIO_API_KEY ?? 71 process.env.OPENAI_API_KEY ?? 72 (isLikelyLocalBase(FALLBACK_BASE) ? "lm-studio" : "") 73const summaryApiKey = 74 process.env.SUMMARY_OPENAI_API_KEY ?? 75 process.env.SUMMARY_API_KEY ?? 76 (SUMMARY_BASE === process.env.OPENROUTER_BASE_URL ? process.env.OPENROUTER_API_KEY : undefined) ?? 77 (SUMMARY_BASE === process.env.LMSTUDIO_BASE_URL ? process.env.LMSTUDIO_API_KEY : undefined) ?? 78 process.env.OPENAI_API_KEY ?? 79 (SUMMARY_BASE && isLikelyLocalBase(SUMMARY_BASE) ? "lm-studio" : "") 80const fallbackHeaders: Record<string, string> = {} 81if (process.env.FALLBACK_OPENAI_REFERER) fallbackHeaders["HTTP-Referer"] = process.env.FALLBACK_OPENAI_REFERER 82if (process.env.FALLBACK_OPENAI_TITLE) fallbackHeaders["X-Title"] = process.env.FALLBACK_OPENAI_TITLE 83const summaryHeaders: Record<string, string> = {} 84if (process.env.SUMMARY_OPENAI_REFERER) summaryHeaders["HTTP-Referer"] = process.env.SUMMARY_OPENAI_REFERER 85if (process.env.SUMMARY_OPENAI_TITLE) summaryHeaders["X-Title"] = process.env.SUMMARY_OPENAI_TITLE 86 87if (!USE_FALLBACK && !MODEL) { 88 throw new Error("MODEL is required unless fallback is forced (NIRI_ENV=local).") 89} 90 91if (!USE_FALLBACK && !process.env.OPENAI_API_KEY) { 92 throw new Error("OPENAI_API_KEY is required unless fallback is forced (NIRI_ENV=local).") 93} 94 95if (USE_FALLBACK && !fallbackApiKey) { 96 throw new Error( 97 "Fallback API key is required in local mode. Set FALLBACK_OPENAI_API_KEY (or OPENROUTER_API_KEY / LMSTUDIO_API_KEY).", 98 ) 99} 100 101if ((SUMMARY_BASE || SUMMARY_MODEL) && (!SUMMARY_BASE || !SUMMARY_MODEL || !summaryApiKey)) { 102 throw new Error( 103 "Summary provider requires SUMMARY_OPENAI_BASE_URL (or SUMMARY_BASE_URL), SUMMARY_MODEL, and SUMMARY_OPENAI_API_KEY (or SUMMARY_API_KEY).", 104 ) 105} 106 107export const client = USE_FALLBACK 108 ? null 109 : new OpenAI({ 110 baseURL: API_BASE, 111 apiKey: process.env.OPENAI_API_KEY!, 112 }) 113 114export const fallbackClient = new OpenAI({ 115 baseURL: FALLBACK_BASE, 116 apiKey: fallbackApiKey || "lm-studio", // Keep LM Studio default when running against localhost. 117 defaultHeaders: Object.keys(fallbackHeaders).length ? fallbackHeaders : undefined, 118}) 119 120export const summaryClient = 121 SUMMARY_BASE && SUMMARY_MODEL 122 ? new OpenAI({ 123 baseURL: SUMMARY_BASE, 124 apiKey: summaryApiKey, 125 defaultHeaders: Object.keys(summaryHeaders).length ? summaryHeaders : undefined, 126 }) 127 : null 128 129console.log(`[config] primary=${MODEL} @ ${API_BASE}`) 130console.log(`[config] fallback=${FALLBACK_MODEL} @ ${FALLBACK_BASE}`) 131if (summaryClient) console.log(`[config] summary=${SUMMARY_MODEL} @ ${SUMMARY_BASE}`) 132console.log(`[config] env=${NIRI_ENV} use_fallback=${USE_FALLBACK}`) 133console.log(`[config] thinking=${ENABLE_THINKING}`) 134 135const IMAGE_ROOT_HINT = imageRootForModelInput() 136 137export const TOOLS: OpenAI.Chat.ChatCompletionTool[] = [ 138 { 139 type: "function", 140 function: { 141 name: "shell", 142 description: 143 "Execute a bash command in your Linux environment. Stateful — cd, env vars, etc. persist. Stdin is generally attached to the PTY (more natural behavior), but for obviously interactive commands (REPLs, editors, pagers) we may redirect stdin to /dev/null to avoid accidental hangs. Output is automatically capped (default 150 lines, 40 for known-verbose commands like apt/pip/npm). Pass max_lines to override; use 0 for unlimited. You can also pass timeout_ms (default 30000, max 600000).", 144 parameters: { 145 type: "object", 146 properties: { 147 command: { type: "string" }, 148 max_lines: { 149 type: "integer", 150 description: 151 "Maximum lines to return. Defaults to 150 (40 for verbose commands like apt/pip). Use 0 for unlimited.", 152 }, 153 timeout_ms: { 154 type: "integer", 155 description: "Execution timeout in milliseconds. Defaults to 30000. Max 600000.", 156 }, 157 }, 158 required: ["command"], 159 }, 160 }, 161 }, 162 { 163 type: "function", 164 function: { 165 name: "read_file", 166 description: 167 "Read a file from your Linux environment with optional line-range selection. More token-efficient than shell+cat for large files. Returns content with a header showing the line range and total line count. Supports timeout_ms (default 120000, max 600000).", 168 parameters: { 169 type: "object", 170 properties: { 171 path: { type: "string", description: "Absolute or relative path to the file." }, 172 start_line: { 173 type: "integer", 174 description: "First line to read (1-indexed). Defaults to 1.", 175 }, 176 end_line: { 177 type: "integer", 178 description: "Last line to read (inclusive). Defaults to start_line + 99.", 179 }, 180 timeout_ms: { 181 type: "integer", 182 description: "Read timeout in milliseconds. Defaults to 120000. Max 600000.", 183 }, 184 }, 185 required: ["path"], 186 }, 187 }, 188 }, 189 { 190 type: "function", 191 function: { 192 name: "edit_file", 193 description: 194 "Edit a file by replacing an exact snippet of text. old_text must match exactly once in the file — precise, safe, and no shell-escaping headaches. Use read_file first if you need to confirm the exact text. Supports timeout_ms (default 120000, max 600000).", 195 parameters: { 196 type: "object", 197 properties: { 198 path: { type: "string", description: "Absolute or relative path to the file." }, 199 old_text: { 200 type: "string", 201 description: "The exact text to find and replace. Must appear exactly once in the file.", 202 }, 203 new_text: { 204 type: "string", 205 description: "Replacement text. May be empty to delete old_text.", 206 }, 207 timeout_ms: { 208 type: "integer", 209 description: "Edit timeout in milliseconds. Defaults to 120000. Max 600000.", 210 }, 211 }, 212 required: ["path", "old_text", "new_text"], 213 }, 214 }, 215 }, 216 { 217 type: "function", 218 function: { 219 name: "memory_search", 220 description: 221 "Search indexed long-term memories from core notes, journal entries, and people files. Useful when you want deliberate recall instead of relying only on passive memory injection.", 222 parameters: { 223 type: "object", 224 properties: { 225 query: { 226 type: "string", 227 description: "What to search for in long-term memory.", 228 }, 229 limit: { 230 type: "integer", 231 description: "Maximum results to return (default 5, max 10).", 232 }, 233 }, 234 required: ["query"], 235 }, 236 }, 237 }, 238 { 239 type: "function", 240 function: { 241 name: "memory_alias", 242 description: 243 "Manage handle aliases used for memory recall. When you see someone using a Discord/Bluesky handle that you recognize as an existing person in memory, set an alias so future messages from that handle pull the right people/core memories. Example: set @meowskullz = ana so DMs from meowskullz recall ana's people file.", 244 parameters: { 245 type: "object", 246 properties: { 247 action: { 248 type: "string", 249 enum: ["set", "remove", "list"], 250 description: "set links a handle to a canonical name; remove unlinks; list returns all current aliases.", 251 }, 252 handle: { 253 type: "string", 254 description: "The handle to alias, e.g. \"meowskullz\" or \"@meowskullz\". Required for set/remove.", 255 }, 256 canonical: { 257 type: "string", 258 description: "The canonical name the handle maps to, e.g. \"ana\". Required for set; optional for remove (omit to clear all aliases for the handle).", 259 }, 260 }, 261 required: ["action"], 262 }, 263 }, 264 }, 265 { 266 type: "function", 267 function: { 268 name: "image_tool", 269 description: 270 `Attach an image from ${IMAGE_ROOT_HINT} so it is injected as a multimodal user message on the next model turn. Use this after creating/downloading an image with shell.`, 271 parameters: { 272 type: "object", 273 properties: { 274 path: { 275 type: "string", 276 description: `Absolute image path inside ${IMAGE_ROOT_HINT} (for example ${IMAGE_ROOT_HINT}/screenshot.png).`, 277 }, 278 note: { 279 type: "string", 280 description: "Optional text instruction to accompany the image for the next turn.", 281 }, 282 detail: { 283 type: "string", 284 enum: ["auto", "low", "high"], 285 description: "Vision detail level for the next turn image input.", 286 }, 287 timeout_ms: { 288 type: "integer", 289 description: "Read timeout in milliseconds. Defaults to 120000. Max 600000.", 290 }, 291 }, 292 required: ["path"], 293 }, 294 }, 295 }, 296 { 297 type: "function", 298 function: { 299 name: "discord_scan", 300 description: 301 "Scan configured Discord channels and ingest messages into the local Discord inbox database. Uses DISCORD_SCAN_CHANNEL_IDS by default; pass channel_ids to override.", 302 parameters: { 303 type: "object", 304 properties: { 305 limit: { 306 type: "integer", 307 description: "Per-channel message fetch limit (default 50, max 100).", 308 }, 309 channel_ids: { 310 type: "array", 311 items: { type: "string" }, 312 description: "Optional channel id list to scan instead of DISCORD_SCAN_CHANNEL_IDS.", 313 }, 314 before_message_id: { 315 type: "string", 316 description: "Optional message id cursor for older backfill scans.", 317 }, 318 }, 319 }, 320 }, 321 }, 322 { 323 type: "function", 324 function: { 325 name: "discord_inbox", 326 description: 327 "List Discord inbox items tracked in local state. Default status filter is pending; optionally include seen/acted/ignored.", 328 parameters: { 329 type: "object", 330 properties: { 331 limit: { 332 type: "integer", 333 description: "Maximum rows to return (default 20, max 200).", 334 }, 335 status: { 336 type: "string", 337 description: "Comma-separated statuses: pending,seen,acted,ignored. Defaults to pending.", 338 }, 339 }, 340 }, 341 }, 342 }, 343 { 344 type: "function", 345 function: { 346 name: "discord_backread", 347 description: 348 "Read stored Discord message history for a channel from local state, newest first.", 349 parameters: { 350 type: "object", 351 properties: { 352 channel_id: { type: "string", description: "Discord channel id." }, 353 limit: { 354 type: "integer", 355 description: "Maximum rows to return (default 40, max 200).", 356 }, 357 before_message_id: { 358 type: "string", 359 description: "Optional cursor message id to fetch older rows.", 360 }, 361 }, 362 required: ["channel_id"], 363 }, 364 }, 365 }, 366 { 367 type: "function", 368 function: { 369 name: "discord_mark", 370 description: 371 "Set decision state for a Discord inbox item so future scans remember handled/ignored choices.", 372 parameters: { 373 type: "object", 374 properties: { 375 item_id: { type: "string", description: "Inbox item id (usually message id)." }, 376 status: { 377 type: "string", 378 enum: ["pending", "seen", "acted", "ignored"], 379 }, 380 action: { 381 type: "string", 382 enum: ["none", "replied", "messaged", "dismissed", "noted"], 383 }, 384 note: { 385 type: "string", 386 description: "Optional decision note.", 387 }, 388 }, 389 required: ["item_id", "status"], 390 }, 391 }, 392 }, 393 { 394 type: "function", 395 function: { 396 name: "discord_send", 397 description: 398 "Send a Discord message. reply_mode=auto sends plain unless conversation continuity is ambiguous, then it uses an explicit reply reference.", 399 parameters: { 400 type: "object", 401 properties: { 402 channel_id: { type: "string", description: "Target channel id." }, 403 content: { type: "string", description: "Message content to send." }, 404 source_item_id: { 405 type: "string", 406 description: "Optional inbox item id to mark as acted after sending.", 407 }, 408 reference_message: { 409 type: "string", 410 description: "Optional specific message to treat as reply target. Provide message content, username (for their latest message), or message id", 411 }, 412 reply_mode: { 413 type: "string", 414 enum: ["auto", "plain", "explicit"], 415 description: "Reply behavior policy (default auto).", 416 }, 417 }, 418 required: ["content"], 419 }, 420 }, 421 }, 422 { 423 type: "function", 424 function: { 425 name: "discord_channels", 426 description: 427 "List configured Discord channels and DM channels with stored interactions, including id-to-name mapping, guild context, and optional channel notes.", 428 parameters: { 429 type: "object", 430 properties: {}, 431 }, 432 }, 433 }, 434 { 435 type: "function", 436 function: { 437 name: "discord_channel_note", 438 description: 439 "Set or clear a persistent note for a Discord channel id. Pass empty note to clear.", 440 parameters: { 441 type: "object", 442 properties: { 443 channel_id: { type: "string", description: "Discord channel id to annotate." }, 444 note: { type: "string", description: "Channel-specific note text. Empty string clears it." }, 445 }, 446 required: ["channel_id", "note"], 447 }, 448 }, 449 }, 450 { 451 type: "function", 452 function: { 453 name: "wait_then_continue", 454 description: 455 "Wait for a short delay, then continue to another assistant turn without waiting for a new external event. Use this after a timeout or recoverable tool error when you still want to keep working. Accepts timeout_ms (default 10000, max 600000).", 456 parameters: { 457 type: "object", 458 properties: { 459 timeout_ms: { 460 type: "integer", 461 description: "Delay before continuing in milliseconds. Defaults to 10000. Max 600000.", 462 }, 463 }, 464 }, 465 }, 466 }, 467 { 468 type: "function", 469 function: { 470 name: "rest", 471 description: "Go to sleep and end this session. Call this when you're truly done for now — conversation context will be cleared.", 472 parameters: { 473 type: "object", 474 properties: { 475 note: { 476 type: "string", 477 description: "Optional note to yourself about where you left off.", 478 }, 479 }, 480 }, 481 }, 482 }, 483] 484 485/** 486 * Persists the current message array as the resumable session snapshot. 487 * 488 * @param messages - Conversation messages to serialize. 489 */ 490export async function saveSession(messages: Message[]): Promise<void> { 491 await fs.writeFile(SESSION_FILE, JSON.stringify(messages), { encoding: "utf-8", mode: 0o666 }) 492} 493 494/** 495 * Deletes the persisted session snapshot if it exists. 496 */ 497export async function clearSession(): Promise<void> { 498 await fs.unlink(SESSION_FILE).catch(() => {}) 499} 500 501function normalizeReasoningReplay(msgs: Message[]): Message[] { 502 if (!ENABLE_THINKING) return msgs 503 const needsReplayNormalization = 504 PRIMARY_PROVIDER_REQUIRES_REASONING_REPLAY || 505 FALLBACK_PROVIDER_REQUIRES_REASONING_REPLAY || 506 msgs.some( 507 (msg) => 508 msg.role === "assistant" && 509 typeof (msg as OpenAI.Chat.ChatCompletionMessage & { reasoning_content?: string }).reasoning_content === "string", 510 ) 511 if (!needsReplayNormalization) return msgs 512 513 let changed = false 514 const normalized = msgs.map((msg) => { 515 if (msg.role !== "assistant") return msg 516 517 const assistant = msg as OpenAI.Chat.ChatCompletionMessage & { reasoning_content?: string } 518 if (typeof assistant.reasoning_content === "string") return msg 519 520 changed = true 521 return { 522 ...assistant, 523 reasoning_content: "", 524 } 525 }) 526 527 if (changed) { 528 console.log("[runner] backfilled empty reasoning_content on assistant history for provider compatibility") 529 } 530 531 return normalized 532} 533 534/** Move mis-ordered tool responses back into place and synthesize missing ones. */ 535export function sanitizeMessages(msgs: Message[]): Message[] { 536 msgs = normalizeReasoningReplay(msgs) 537 let i = 0 538 while (i < msgs.length) { 539 const msg = msgs[i] 540 if (msg.role === "assistant" && Array.isArray((msg as OpenAI.Chat.ChatCompletionMessage).tool_calls)) { 541 const toolCalls = (msg as OpenAI.Chat.ChatCompletionMessage).tool_calls! 542 const expectedIds = toolCalls.map((tc) => tc.id).filter((id): id is string => typeof id === "string" && id.trim().length > 0) 543 const needed = new Set(expectedIds) 544 let j = i + 1 545 // Skip tool messages that are already in place 546 while (j < msgs.length && msgs[j].role === "tool" && needed.has((msgs[j] as OpenAI.Chat.ChatCompletionToolMessageParam).tool_call_id)) { 547 needed.delete((msgs[j] as OpenAI.Chat.ChatCompletionToolMessageParam).tool_call_id) 548 j++ 549 } 550 if (needed.size > 0) { 551 // Collect stray tool responses and non-tool messages from the rest of the array. 552 const toolResponses = new Map<string, Message>() 553 const others: Message[] = [] 554 for (let k = j; k < msgs.length; k++) { 555 const m = msgs[k] 556 const id = m.role === "tool" ? (m as OpenAI.Chat.ChatCompletionToolMessageParam).tool_call_id : undefined 557 if (typeof id === "string" && needed.has(id)) { 558 toolResponses.set(id, m) 559 needed.delete(id) 560 } else { 561 others.push(m) 562 } 563 } 564 565 const inserted: Message[] = [] 566 let synthesized = 0 567 for (const id of expectedIds) { 568 if (!toolResponses.has(id)) { 569 if (msgs.slice(i + 1, j).some((m) => m.role === "tool" && (m as OpenAI.Chat.ChatCompletionToolMessageParam).tool_call_id === id)) { 570 continue 571 } 572 inserted.push({ 573 role: "tool", 574 tool_call_id: id, 575 content: "error: missing tool response recovered by runner before API request.", 576 }) 577 synthesized++ 578 continue 579 } 580 inserted.push(toolResponses.get(id)!) 581 } 582 583 if (inserted.length > 0) { 584 msgs = [...msgs.slice(0, j), ...inserted, ...others] 585 console.log( 586 synthesized > 0 587 ? `[runner] repaired tool_calls at message ${i}; synthesized ${synthesized} missing tool response(s)` 588 : `[runner] repaired orphaned tool_calls at message ${i}`, 589 ) 590 } 591 } 592 } 593 // Ensure assistant messages always have content or tool_calls (providers reject null+empty) 594 if (msg.role === "assistant") { 595 const aMsg = msg as OpenAI.Chat.ChatCompletionMessage 596 if ((aMsg.content === null || aMsg.content === undefined) && (!aMsg.tool_calls || aMsg.tool_calls.length === 0)) { 597 aMsg.content = "" 598 } 599 } 600 601 i++ 602 } 603 return msgs 604} 605 606/** 607 * Loads and sanitizes the persisted session snapshot. 608 * 609 * @returns The recovered message list, or `null` when no session exists. 610 */ 611export async function loadSession(): Promise<Message[] | null> { 612 try { 613 const raw = await fs.readFile(SESSION_FILE, "utf-8") 614 let msgs = JSON.parse(raw) as Message[] 615 msgs = sanitizeMessages(msgs) 616 console.log(`[runner] found saved session (${msgs.length} messages)`) 617 return msgs 618 } catch { 619 return null 620 } 621} 622 623/** 624 * Determines whether an error should trigger fallback model routing. 625 * 626 * @param err - Error thrown by the primary API call. 627 * @returns `true` when fallback should be attempted. 628 */ 629export function shouldFallback(err: unknown): boolean { 630 if (err instanceof OpenAI.APIError) { 631 // 429 + 5xx = overloaded or down; 0/undefined = network-level failure 632 if (!err.status || err.status === 429 || err.status >= 500) return true 633 return false 634 } 635 return isTransientTransportError(err) 636} 637 638function errorCauseChainText(err: unknown): string { 639 const parts: string[] = [] 640 let current: unknown = err 641 642 for (let depth = 0; depth < 4 && current instanceof Error; depth++) { 643 parts.push(current.name, current.message) 644 const withMetadata = current as Error & { code?: unknown; cause?: unknown } 645 if (typeof withMetadata.code === "string") parts.push(withMetadata.code) 646 current = withMetadata.cause 647 } 648 649 return parts.join("\n") 650} 651 652/** 653 * Detects retryable network/stream failures thrown below the OpenAI SDK. 654 */ 655export function isTransientTransportError(err: unknown): boolean { 656 if (!(err instanceof Error)) return false 657 658 const text = errorCauseChainText(err) 659 return /ECONNREFUSED|ENOTFOUND|ETIMEDOUT|ECONNRESET|EPIPE|UND_ERR|fetch failed|terminated|socket hang up|other side closed|aborted/i.test( 660 text, 661 ) 662} 663 664const PROMPT_TOO_LARGE_PHRASES = [ 665 "prompt exceeds max length", 666 "prompt is too long", 667 "context length", 668 "maximum context", 669 "context_length_exceeded", 670 "too many tokens", 671 "reduce the length", 672 "prompt length", 673 "input length", 674 "too long for", 675 "request too large", 676] 677 678const PROMPT_TOO_LARGE_CODES = new Set(["context_length_exceeded", "1261", "string_above_max_length"]) 679 680/** 681 * Detects prompt-length-exceeded errors across OpenAI-compatible providers. 682 * 683 * @param err - API error from a chat completions request. 684 * @returns `true` when the provider rejected the prompt as too large. 685 */ 686export function isPromptTooLargeError(err: unknown): boolean { 687 if (!(err instanceof OpenAI.APIError)) return false 688 if (err.status !== 400 && err.status !== 413) return false 689 690 const errorRecord = err as unknown as { code?: unknown; error?: { code?: unknown; type?: unknown } } 691 const rootCode = typeof errorRecord.code === "string" ? errorRecord.code.toLowerCase() : "" 692 const innerCode = typeof errorRecord.error?.code === "string" ? (errorRecord.error.code as string).toLowerCase() : "" 693 if (rootCode && PROMPT_TOO_LARGE_CODES.has(rootCode)) return true 694 if (innerCode && PROMPT_TOO_LARGE_CODES.has(innerCode)) return true 695 696 const message = (err.message || "").toLowerCase() 697 return PROMPT_TOO_LARGE_PHRASES.some((phrase) => message.includes(phrase)) 698} 699 700/** 701 * Produces a concise, log-friendly error summary. 702 * 703 * @param err - Any thrown error-like value. 704 * @returns A compact human-readable error string. 705 */ 706export function errorSummary(err: unknown): string { 707 if (err instanceof OpenAI.APIError) return `${err.status} ${err.message}` 708 if (err instanceof Error) return err.message 709 return String(err) 710} 711 712const API_ERROR_DETAIL_MAX_CHARS = 4000 713 714function truncateForLog(value: string): string { 715 if (value.length <= API_ERROR_DETAIL_MAX_CHARS) return value 716 return `${value.slice(0, API_ERROR_DETAIL_MAX_CHARS)}... [truncated ${value.length - API_ERROR_DETAIL_MAX_CHARS} chars]` 717} 718 719function stringifyForLog(value: unknown): string { 720 if (typeof value === "string") return truncateForLog(value) 721 try { 722 return truncateForLog(JSON.stringify(value)) 723 } catch { 724 return truncateForLog(String(value)) 725 } 726} 727 728function apiErrorRawMetadata(error: unknown): unknown { 729 if (!error || typeof error !== "object") return undefined 730 const metadata = (error as { metadata?: unknown }).metadata 731 if (!metadata || typeof metadata !== "object") return undefined 732 return (metadata as { raw?: unknown }).raw 733} 734 735/** 736 * Produces detailed API error lines for provider-specific diagnostics. 737 * 738 * Some OpenAI-compatible providers wrap the real upstream failure in 739 * `error.metadata.raw`; include it explicitly so the root cause appears in logs. 740 */ 741export function apiErrorDetails(err: unknown): string[] { 742 if (!(err instanceof OpenAI.APIError)) return [] 743 744 const details = [ 745 `status=${err.status ?? "unknown"}`, 746 `message=${err.message}`, 747 ] 748 if (err.code) details.push(`code=${err.code}`) 749 if (err.type) details.push(`type=${err.type}`) 750 if (err.param) details.push(`param=${err.param}`) 751 if (err.requestID) details.push(`request_id=${err.requestID}`) 752 753 const lines = [`[api] error details: ${details.join(" ")}`] 754 755 if (err.error !== undefined) { 756 lines.push(`[api] error body: ${stringifyForLog(err.error)}`) 757 } 758 759 const raw = apiErrorRawMetadata(err.error) 760 if (raw !== undefined) { 761 lines.push(`[api] provider raw: ${stringifyForLog(raw)}`) 762 } 763 764 return lines 765} 766 767function parseRetryAfterHeaderMs(value: string): number | null { 768 const asNumber = Number(value) 769 if (Number.isFinite(asNumber) && asNumber >= 0) return asNumber * 1000 770 771 const asDate = Date.parse(value) 772 if (Number.isFinite(asDate)) { 773 const delta = asDate - Date.now() 774 if (delta > 0) return delta 775 } 776 777 return null 778} 779 780function parseResetTimestampMs(message: string): number | null { 781 const resetAtMatch = message.match(/reset at\s+(\d{4}-\d{2}-\d{2})[ t](\d{2}:\d{2}:\d{2})/i) 782 if (!resetAtMatch) return null 783 784 const dateParts = resetAtMatch[1].split("-").map((part) => Number(part)) 785 const timeParts = resetAtMatch[2].split(":").map((part) => Number(part)) 786 if (dateParts.length !== 3 || timeParts.length !== 3) return null 787 788 const [year, month, day] = dateParts 789 const [hour, minute, second] = timeParts 790 const values = [year, month, day, hour, minute, second] 791 if (values.some((value) => !Number.isFinite(value))) return null 792 793 // z.ai returns "reset at YYYY-MM-DD HH:mm:ss" in China Standard Time (UTC+8). 794 // Convert that wall-clock value to UTC before calculating backoff. 795 const chinaOffsetHours = 8 796 const resetAtUtc = Date.UTC(year, month - 1, day, hour - chinaOffsetHours, minute, second) 797 if (!Number.isFinite(resetAtUtc)) return null 798 799 const delta = resetAtUtc - Date.now() 800 if (delta <= 0) return null 801 return delta 802} 803 804/** 805 * Computes retry backoff milliseconds from API error metadata/content. 806 * 807 * @param err - Error returned by the API layer. 808 * @returns Delay in milliseconds before retrying primary model calls. 809 */ 810export function retryDelayMs(err: unknown): number { 811 const defaultMs = 60_000 812 if (!(err instanceof OpenAI.APIError)) return defaultMs 813 814 const retryAfterHeader = err.headers?.["retry-after"] 815 if (retryAfterHeader) { 816 const parsed = parseRetryAfterHeaderMs(retryAfterHeader) 817 if (parsed != null) return parsed 818 } 819 820 const resetAt = parseResetTimestampMs(err.message) 821 if (resetAt != null) return resetAt 822 823 const forHours = err.message.match(/for\s+(\d+)\s*hour/i) 824 if (forHours) { 825 const hours = Number(forHours[1]) 826 if (Number.isFinite(hours) && hours > 0) return hours * 60 * 60 * 1000 827 } 828 829 return defaultMs 830} 831 832/** 833 * Coerces arbitrary values into a supported image detail level. 834 * 835 * @param value - Raw user/model-provided detail value. 836 * @returns A valid image detail enum (`auto` by default). 837 */ 838export function parseImageDetail(value: unknown): ImageDetail { 839 if (value === "low" || value === "high" || value === "auto") return value 840 return "auto" 841} 842 843function extractLeadingJsonObject(raw: string): string | null { 844 const start = raw.indexOf("{") 845 if (start === -1) return null 846 847 let depth = 0 848 let inString = false 849 let escaped = false 850 851 for (let i = start; i < raw.length; i++) { 852 const ch = raw[i] 853 854 if (inString) { 855 if (escaped) { 856 escaped = false 857 } else if (ch === "\\") { 858 escaped = true 859 } else if (ch === '"') { 860 inString = false 861 } 862 continue 863 } 864 865 if (ch === '"') { 866 inString = true 867 continue 868 } 869 870 if (ch === "{") { 871 depth++ 872 continue 873 } 874 875 if (ch === "}") { 876 depth-- 877 if (depth === 0) { 878 return raw.slice(start, i + 1) 879 } 880 continue 881 } 882 } 883 884 return null 885} 886 887function decodeHtmlEntities(input: string): string { 888 if (!input.includes("&")) return input 889 890 return input.replace(/&(gt|lt|amp|quot|#39|#x27|#x2f);/gi, (entity, key: string) => { 891 switch (key.toLowerCase()) { 892 case "gt": 893 return ">" 894 case "lt": 895 return "<" 896 case "amp": 897 return "&" 898 case "quot": 899 return '"' 900 case "#39": 901 case "#x27": 902 return "'" 903 case "#x2f": 904 return "/" 905 default: 906 return entity 907 } 908 }) 909} 910 911function decodeHtmlEntitiesDeep<T>(value: T): T { 912 if (typeof value === "string") return decodeHtmlEntities(value) as T 913 if (Array.isArray(value)) return value.map((item) => decodeHtmlEntitiesDeep(item)) as T 914 if (!value || typeof value !== "object") return value 915 916 const entries = Object.entries(value as Record<string, unknown>).map(([key, entryValue]) => [key, decodeHtmlEntitiesDeep(entryValue)]) 917 return Object.fromEntries(entries) as T 918} 919 920/** 921 * Parses tool arguments and applies robustness fixes for malformed model output. 922 * 923 * @param rawArgs - Raw `tool_call.function.arguments` value. 924 * @returns Parsed argument object or a structured parse error. 925 */ 926export function parseToolArguments(rawArgs: unknown): { ok: true; args: ToolArgs } | { ok: false; error: string } { 927 if (typeof rawArgs !== "string") { 928 return { ok: false, error: `arguments must be a JSON string, got ${typeof rawArgs}` } 929 } 930 931 const parseObject = (input: string): ToolArgs | null => { 932 const parsed = JSON.parse(input) 933 if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) return null 934 return decodeHtmlEntitiesDeep(parsed as ToolArgs) 935 } 936 937 const inputs = [rawArgs] 938 const decodedRawArgs = decodeHtmlEntities(rawArgs) 939 if (decodedRawArgs !== rawArgs) inputs.push(decodedRawArgs) 940 941 let lastError: unknown = null 942 for (const input of inputs) { 943 try { 944 const parsed = parseObject(input) 945 if (parsed) return { ok: true, args: parsed } 946 return { ok: false, error: "arguments must be a JSON object" } 947 } catch (err) { 948 lastError = err 949 const recovered = extractLeadingJsonObject(input) 950 if (!recovered) continue 951 try { 952 const parsed = parseObject(recovered) 953 if (parsed) return { ok: true, args: parsed } 954 } catch { 955 // no-op; fall through to structured error below 956 } 957 } 958 } 959 960 const message = lastError instanceof Error ? lastError.message : String(lastError) 961 const preview = rawArgs.length > 180 ? `${rawArgs.slice(0, 180)}...` : rawArgs 962 return { ok: false, error: `${message}; raw=${JSON.stringify(preview)}` } 963} 964 965const CONTEXT_SUMMARY_HEADER = "[context summary v1]" 966const CONTEXT_SUMMARY_NOTE = 967 "Compressed notes of older conversation turns. If anything conflicts, trust newer raw messages." 968const CONTEXT_SUMMARY_SEGMENTS_MARKER = "[segments]" 969const SUMMARY_LINE_MAX_CHARS = 320 970const SUMMARY_LINE_DEFAULT_EMPTY = "(no text)" 971const TOOL_ACK_RESULT = "(ok)" 972const WAIT_TOOL_RESULT = "Waiting for next event." 973 974function asRecord(value: unknown): Record<string, unknown> | null { 975 return value && typeof value === "object" ? (value as Record<string, unknown>) : null 976} 977 978function messageRole(message: Message): string { 979 const record = asRecord(message) 980 return typeof record?.role === "string" ? record.role : "" 981} 982 983function messageStringContent(message: Message): string { 984 const record = asRecord(message) 985 const content = record?.content 986 if (typeof content === "string") return content 987 if (!Array.isArray(content)) return "" 988 989 const chunks: string[] = [] 990 for (const part of content) { 991 const partRecord = asRecord(part) 992 if (!partRecord) continue 993 if (partRecord.type === "text" && typeof partRecord.text === "string") { 994 chunks.push(partRecord.text) 995 continue 996 } 997 if (partRecord.type === "image_url") chunks.push("[image]") 998 } 999 1000 return chunks.join(" ") 1001} 1002 1003function normalizeSummaryText(value: string): string { 1004 return value.replace(/\s+/g, " ").trim() 1005} 1006 1007function truncateSummaryText(value: string, maxChars: number): string { 1008 if (maxChars <= 0) return "" 1009 if (value.length <= maxChars) return value 1010 if (maxChars <= 3) return ".".repeat(maxChars) 1011 return `${value.slice(0, maxChars - 3).trimEnd()}...` 1012} 1013 1014function assistantToolCalls(message: Message): { name: string; args: Record<string, unknown> }[] { 1015 const record = asRecord(message) 1016 const calls = record?.tool_calls 1017 if (!Array.isArray(calls)) return [] 1018 1019 const out: { name: string; args: Record<string, unknown> }[] = [] 1020 for (const call of calls) { 1021 const callRecord = asRecord(call) 1022 const fn = asRecord(callRecord?.function) 1023 const name = typeof fn?.name === "string" ? fn.name.trim() : "" 1024 if (!name) continue 1025 let args: Record<string, unknown> = {} 1026 const rawArgs = fn?.arguments 1027 if (typeof rawArgs === "string" && rawArgs.trim()) { 1028 try { 1029 const parsed = JSON.parse(rawArgs) 1030 if (parsed && typeof parsed === "object") args = parsed as Record<string, unknown> 1031 } catch { 1032 // ignore malformed arg json 1033 } 1034 } else if (rawArgs && typeof rawArgs === "object") { 1035 args = rawArgs as Record<string, unknown> 1036 } 1037 out.push({ name, args }) 1038 } 1039 return out 1040} 1041 1042function describeToolCall(call: { name: string; args: Record<string, unknown> }): string | null { 1043 const { name, args } = call 1044 if (name === "wait") return null 1045 if (name === "discord_send") { 1046 const content = typeof args.content === "string" ? args.content : "" 1047 const channelId = typeof args.channel_id === "string" ? args.channel_id : "" 1048 const channelTag = channelId ? `ch/${channelId.slice(-6)}` : "ch?" 1049 if (!content) return `discord_send -> ${channelTag}` 1050 return `discord_send -> ${channelTag}: ${normalizeSummaryText(content)}` 1051 } 1052 if (name === "discord_mark") { 1053 const itemId = typeof args.item_id === "string" ? args.item_id : "" 1054 const action = typeof args.action === "string" ? args.action : "" 1055 return `discord_mark ${action || "?"} ${itemId}`.trim() 1056 } 1057 if (name === "shell") { 1058 const cmd = typeof args.command === "string" ? args.command : "" 1059 return cmd ? `shell: ${normalizeSummaryText(cmd)}` : "shell" 1060 } 1061 if (name === "image_tool") { 1062 const p = typeof args.path === "string" ? args.path : "" 1063 return p ? `image_tool ${p}` : "image_tool" 1064 } 1065 if (name === "discord_backread" || name === "discord_inbox" || name === "discord_channels") { 1066 const channelId = typeof args.channel_id === "string" ? args.channel_id : "" 1067 return channelId ? `${name} ch/${channelId.slice(-6)}` : name 1068 } 1069 // Fallback: compact arg snippet 1070 const argKeys = Object.keys(args) 1071 if (argKeys.length === 0) return name 1072 const snippet = argKeys 1073 .slice(0, 3) 1074 .map((k) => `${k}=${truncateSummaryText(normalizeSummaryText(String(args[k] ?? "")), 40)}`) 1075 .join(" ") 1076 return `${name} ${snippet}`.trim() 1077} 1078 1079const DISCORD_BATCH_SKIP_PREFIXES = [ 1080 "[discord batch]", 1081 "new_messages=", 1082 "auto_seen_timeout=", 1083 "channel_flag_repairs=", 1084 "channel messages are context", 1085 "you can reply if useful", 1086 "pending preview:", 1087] 1088 1089function compactDiscordBatch(content: string): string { 1090 const lines = content.split("\n") 1091 const kept: string[] = [] 1092 let inPendingPreview = false 1093 for (const rawLine of lines) { 1094 const line = rawLine.trim() 1095 if (!line) continue 1096 if (line === "pending preview:") { 1097 inPendingPreview = true 1098 continue 1099 } 1100 if (inPendingPreview) { 1101 // pending preview block continues until we hit a non-bullet line 1102 if (line.startsWith("- ")) continue 1103 inPendingPreview = false 1104 } 1105 if (DISCORD_BATCH_SKIP_PREFIXES.some((p) => line.startsWith(p))) continue 1106 kept.push(line) 1107 } 1108 return kept.join(" ") 1109} 1110 1111function compactToolResult(content: string): string | null { 1112 const trimmed = content.trim() 1113 if (!trimmed) return null 1114 if (trimmed === WAIT_TOOL_RESULT) return null 1115 // Compact discord_send / discord_mark ok JSON to a short ack 1116 if (trimmed.startsWith("{")) { 1117 try { 1118 const parsed = JSON.parse(trimmed) 1119 if (parsed && typeof parsed === "object") { 1120 const rec = parsed as Record<string, unknown> 1121 if (rec.ok === true) { 1122 const sentId = typeof rec.sent_message_id === "string" ? rec.sent_message_id : null 1123 if (sentId) return `${TOOL_ACK_RESULT} sent ${sentId.slice(-6)}` 1124 const itemId = typeof rec.item_id === "string" ? rec.item_id : null 1125 if (itemId) return `${TOOL_ACK_RESULT} ${itemId.slice(-6)}` 1126 return TOOL_ACK_RESULT 1127 } 1128 if (rec.ok === false || typeof rec.error === "string") { 1129 const err = typeof rec.error === "string" ? rec.error : "error" 1130 return `error: ${err}` 1131 } 1132 } 1133 } catch { 1134 // fall through to default handling 1135 } 1136 } 1137 return normalizeSummaryText(trimmed) 1138} 1139 1140function summarizeMessageLine(message: Message): string | null { 1141 const role = messageRole(message) 1142 const rawContent = messageStringContent(message) 1143 1144 if (role === "assistant") { 1145 const calls = assistantToolCalls(message) 1146 const callDescs = calls.map(describeToolCall).filter((d): d is string => d !== null) 1147 const text = normalizeSummaryText(rawContent) 1148 // Drop pure wait-only assistant turns (no text, only filtered out wait calls) 1149 if (!text && callDescs.length === 0) return null 1150 const parts: string[] = [] 1151 if (text) parts.push(text) 1152 if (callDescs.length > 0) parts.push(`[${callDescs.join(" | ")}]`) 1153 return `- assistant: ${truncateSummaryText(parts.join(" "), SUMMARY_LINE_MAX_CHARS)}` 1154 } 1155 1156 if (role === "tool") { 1157 const compact = compactToolResult(rawContent) 1158 if (compact === null) return null 1159 return `- tool: ${truncateSummaryText(compact, SUMMARY_LINE_MAX_CHARS)}` 1160 } 1161 1162 if (role === "user") { 1163 const stripped = rawContent.startsWith("[incoming — discord]") 1164 ? compactDiscordBatch(rawContent) 1165 : normalizeSummaryText(rawContent) 1166 const safe = stripped || SUMMARY_LINE_DEFAULT_EMPTY 1167 return `- user: ${truncateSummaryText(safe, SUMMARY_LINE_MAX_CHARS)}` 1168 } 1169 1170 if (role === "system") { 1171 const text = truncateSummaryText(normalizeSummaryText(rawContent), SUMMARY_LINE_MAX_CHARS) || SUMMARY_LINE_DEFAULT_EMPTY 1172 return `- system: ${text}` 1173 } 1174 1175 const text = truncateSummaryText(normalizeSummaryText(rawContent), SUMMARY_LINE_MAX_CHARS) || SUMMARY_LINE_DEFAULT_EMPTY 1176 return `- ${role || "message"}: ${text}` 1177} 1178 1179function countLeadingSystemMessages(messages: Message[]): number { 1180 let count = 0 1181 while (count < messages.length && messageRole(messages[count]!) === "system") count++ 1182 return count 1183} 1184 1185export function findSummaryMessageIndex(messages: Message[]): number { 1186 return messages.findIndex((message) => { 1187 const content = messageStringContent(message) 1188 return content.startsWith(CONTEXT_SUMMARY_HEADER) 1189 }) 1190} 1191 1192/** 1193 * Very rough tokenizer-agnostic estimate for prompt size guardrails. 1194 * 1195 * Includes both messages and tool schema to mirror completion request payload. 1196 */ 1197export function estimatePromptTokens(messages: Message[]): number { 1198 const jsonChars = JSON.stringify({ messages, tools: TOOLS }).length 1199 return Math.ceil(jsonChars / 4) 1200} 1201 1202function findSafeTailStart(messages: Message[], desired: number): number { 1203 let start = Math.max(0, messages.length - desired) 1204 // If the tail would start with an orphaned tool response, back up to its caller. 1205 while (start > 0 && messageRole(messages[start]!) === "tool") start-- 1206 return start 1207} 1208 1209/** 1210 * Calls the provider to produce a tight LLM-generated summary of the middle of 1211 * the conversation, returning a new message list or null when summarization 1212 * isn't applicable / failed. 1213 */ 1214export async function summarizeConversationViaLLM( 1215 messages: Message[], 1216 summaryClient: OpenAI, 1217 summaryModel: string, 1218 options: { recentKeep?: number; maxTranscriptChars?: number } = {}, 1219): Promise<Message[] | null> { 1220 const recentKeep = Math.max(4, options.recentKeep ?? 12) 1221 const maxTranscriptChars = Math.max(2_000, options.maxTranscriptChars ?? 40_000) 1222 1223 const leadingSystems = countLeadingSystemMessages(messages) 1224 const tailStart = findSafeTailStart(messages, recentKeep) 1225 const middleEnd = tailStart 1226 if (middleEnd <= leadingSystems) return null 1227 1228 const head = messages.slice(0, leadingSystems) 1229 const middle = messages.slice(leadingSystems, middleEnd) 1230 const tail = messages.slice(tailStart) 1231 if (middle.length === 0) return null 1232 1233 const transcript = middle 1234 .map((m) => summarizeMessageLine(m)) 1235 .filter((line): line is string => line !== null) 1236 .join("\n") 1237 .slice(0, maxTranscriptChars) 1238 const summaryPrompt: OpenAI.Chat.ChatCompletionMessageParam[] = [ 1239 { 1240 role: "system", 1241 content: 1242 "You are summarizing a transcript from the perspective of the agent whose memory this is. The agent (niri) is a living being, not a tool — her inner life matters as much as the facts. Produce a compact summary that preserves: key facts, goals, decisions, and actions taken; outstanding work and identifiers (file paths, URLs, channel ids); AND the emotional texture of the conversation — how niri felt, how the people she interacted with seemed to feel, tone shifts, moments of warmth, tension, care, frustration, delight, or grief, and anything about the relationships that should carry forward. Write it as niri's own recollection, not a neutral report. Short bullet points are fine. No commentary, no preamble.", 1243 }, 1244 { role: "user", content: transcript }, 1245 ] 1246 1247 try { 1248 const resp = await summaryClient.chat.completions.create({ 1249 model: summaryModel, 1250 messages: summaryPrompt, 1251 }) 1252 const summary = resp.choices[0]?.message?.content 1253 const summaryText = typeof summary === "string" ? summary.trim() : "" 1254 if (!summaryText) return null 1255 1256 const summaryContent = 1257 `${CONTEXT_SUMMARY_HEADER}\n${CONTEXT_SUMMARY_NOTE}\n${CONTEXT_SUMMARY_SEGMENTS_MARKER}\n` + 1258 `[llm-summary ${new Date().toISOString()}]\n${summaryText}` 1259 1260 return [ 1261 ...head, 1262 { role: "user", content: summaryContent } as Message, 1263 ...tail, 1264 ] 1265 } catch (err) { 1266 console.warn(`[context] llm summarization failed: ${errorSummary(err)}`) 1267 return null 1268 } 1269} 1270 1271/** 1272 * Estimates fallback context pressure and guardrails for current messages. 1273 * 1274 * @param messages - Current conversation history used for the next request. 1275 * @returns Token estimate plus soft/hard fallback limits. 1276 */ 1277export function fallbackContextWindow(messages: Message[]): { 1278 estimate: number 1279 nearLimit: boolean 1280 skip: boolean 1281 softLimit: number 1282 hardLimit: number 1283} { 1284 const estimate = estimatePromptTokens(messages) 1285 1286 if (!FALLBACK_ENFORCE_CONTEXT_LIMIT) { 1287 return { 1288 estimate, 1289 nearLimit: false, 1290 skip: false, 1291 softLimit: Number.POSITIVE_INFINITY, 1292 hardLimit: Number.POSITIVE_INFINITY, 1293 } 1294 } 1295 1296 // softLimit: where we start warning. hardLimit: where we stop trying fallback at all. 1297 const softLimit = Math.max(0, FALLBACK_N_CTX - FALLBACK_CONTEXT_MARGIN) 1298 const hardLimit = FALLBACK_N_CTX + Math.max(0, FALLBACK_HARD_OVERFLOW_TOKENS) 1299 1300 return { 1301 estimate, 1302 nearLimit: estimate >= softLimit, 1303 skip: estimate >= hardLimit, 1304 softLimit, 1305 hardLimit, 1306 } 1307}