my harness for niri
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at master 1039 lines 36 kB view raw
1import fs from "fs/promises" 2import path from "path" 3import { fileURLToPath } from "url" 4import OpenAI from "openai" 5import { imageRootForModelInput } from "../container/index.js" 6import type { Message } from "../types.js" 7import type { ImageDetail, ToolArgs } from "./types.js" 8 9const PROJECT_ROOT = path.resolve(fileURLToPath(import.meta.url), "../../..") 10const SESSION_FILE = path.join(PROJECT_ROOT, "session.json") 11 12export const TOKEN_NUDGE_THRESHOLD = parseInt(process.env.TOKEN_NUDGE_THRESHOLD ?? "120000") 13export const FALLBACK_TOKEN_NUDGE_THRESHOLD = parseInt(process.env.FALLBACK_TOKEN_NUDGE_THRESHOLD ?? "50000") 14export const CONTEXT_COMPACT_TARGET_TOKENS = parseInt(process.env.CONTEXT_COMPACT_TARGET_TOKENS ?? "65000") 15export const CONTEXT_COMPACT_TRIGGER_TOKENS = parseInt(process.env.CONTEXT_COMPACT_TRIGGER_TOKENS ?? "90000") 16export const CONTEXT_COMPACT_RECENT_MESSAGES = parseInt(process.env.CONTEXT_COMPACT_RECENT_MESSAGES ?? "80") 17export const CONTEXT_COMPACT_CHUNK_MESSAGES = parseInt(process.env.CONTEXT_COMPACT_CHUNK_MESSAGES ?? "32") 18export const CONTEXT_COMPACT_SUMMARY_MAX_CHARS = parseInt(process.env.CONTEXT_COMPACT_SUMMARY_MAX_CHARS ?? "16000") 19 20const NIRI_ENV = (process.env.NIRI_ENV ?? "default").trim().toLowerCase() 21export const USE_FALLBACK = NIRI_ENV === "local" 22 23export const API_BASE = process.env.OPENAI_BASE_URL ?? "https://api.openai.com/v1" 24export const MODEL = process.env.MODEL ?? "" 25const DEFAULT_FALLBACK_BASE = "http://localhost:1234/v1" 26const isLikelyLocalBase = (baseUrl: string): boolean => { 27 const lowered = baseUrl.trim().toLowerCase() 28 return lowered.includes("localhost") || lowered.includes("127.0.0.1") 29} 30const parseBooleanEnv = (value: string | undefined, fallback: boolean): boolean => { 31 if (typeof value !== "string") return fallback 32 const normalized = value.trim().toLowerCase() 33 if (!normalized) return fallback 34 if (normalized === "true" || normalized === "1" || normalized === "yes" || normalized === "on") return true 35 if (normalized === "false" || normalized === "0" || normalized === "no" || normalized === "off") return false 36 return fallback 37} 38const parseToolChoiceEnv = (value: string | undefined, fallback: "required" | "auto" | "none"): "required" | "auto" | "none" => { 39 if (typeof value !== "string") return fallback 40 const normalized = value.trim().toLowerCase() 41 if (normalized === "required" || normalized === "auto" || normalized === "none") return normalized 42 return fallback 43} 44 45export const FALLBACK_BASE = 46 process.env.FALLBACK_OPENAI_BASE_URL ?? process.env.OPENROUTER_BASE_URL ?? process.env.LMSTUDIO_BASE_URL ?? DEFAULT_FALLBACK_BASE 47export const FALLBACK_MODEL = 48 process.env.FALLBACK_MODEL ?? process.env.OPENROUTER_MODEL ?? process.env.LMSTUDIO_MODEL ?? "zai-org/glm-4.7-flash" 49export const FALLBACK_TOOL_CHOICE = parseToolChoiceEnv(process.env.FALLBACK_TOOL_CHOICE, "required") 50const FALLBACK_N_CTX = parseInt(process.env.FALLBACK_N_CTX ?? process.env.LMSTUDIO_N_CTX ?? "4096") 51const FALLBACK_CONTEXT_MARGIN = parseInt(process.env.FALLBACK_CONTEXT_MARGIN ?? process.env.LMSTUDIO_CONTEXT_MARGIN ?? "256") 52const FALLBACK_HARD_OVERFLOW_TOKENS = parseInt( 53 process.env.FALLBACK_HARD_OVERFLOW_TOKENS ?? process.env.LMSTUDIO_HARD_OVERFLOW_TOKENS ?? "1024", 54) 55const FALLBACK_ENFORCE_CONTEXT_LIMIT = parseBooleanEnv( 56 process.env.FALLBACK_ENFORCE_CONTEXT_LIMIT, 57 isLikelyLocalBase(FALLBACK_BASE), 58) 59 60const fallbackApiKey = 61 process.env.FALLBACK_OPENAI_API_KEY ?? 62 process.env.OPENROUTER_API_KEY ?? 63 process.env.LMSTUDIO_API_KEY ?? 64 process.env.OPENAI_API_KEY ?? 65 (isLikelyLocalBase(FALLBACK_BASE) ? "lm-studio" : "") 66const fallbackHeaders: Record<string, string> = {} 67if (process.env.FALLBACK_OPENAI_REFERER) fallbackHeaders["HTTP-Referer"] = process.env.FALLBACK_OPENAI_REFERER 68if (process.env.FALLBACK_OPENAI_TITLE) fallbackHeaders["X-Title"] = process.env.FALLBACK_OPENAI_TITLE 69 70if (!USE_FALLBACK && !MODEL) { 71 throw new Error("MODEL is required unless fallback is forced (NIRI_ENV=local).") 72} 73 74if (!USE_FALLBACK && !process.env.OPENAI_API_KEY) { 75 throw new Error("OPENAI_API_KEY is required unless fallback is forced (NIRI_ENV=local).") 76} 77 78if (USE_FALLBACK && !fallbackApiKey) { 79 throw new Error( 80 "Fallback API key is required in local mode. Set FALLBACK_OPENAI_API_KEY (or OPENROUTER_API_KEY / LMSTUDIO_API_KEY).", 81 ) 82} 83 84export const client = USE_FALLBACK 85 ? null 86 : new OpenAI({ 87 baseURL: API_BASE, 88 apiKey: process.env.OPENAI_API_KEY!, 89 }) 90 91export const fallbackClient = new OpenAI({ 92 baseURL: FALLBACK_BASE, 93 apiKey: fallbackApiKey || "lm-studio", // Keep LM Studio default when running against localhost. 94 defaultHeaders: Object.keys(fallbackHeaders).length ? fallbackHeaders : undefined, 95}) 96 97console.log(`[config] primary=${MODEL} @ ${API_BASE}`) 98console.log(`[config] fallback=${FALLBACK_MODEL} @ ${FALLBACK_BASE}`) 99console.log(`[config] env=${NIRI_ENV} use_fallback=${USE_FALLBACK}`) 100 101const IMAGE_ROOT_HINT = imageRootForModelInput() 102 103export const TOOLS: OpenAI.Chat.ChatCompletionTool[] = [ 104 { 105 type: "function", 106 function: { 107 name: "shell", 108 description: 109 "Execute a bash command in your Linux environment. Stateful — cd, env vars, etc. persist. Output is automatically capped (default 150 lines, 40 for known-verbose commands like apt/pip/npm). Pass max_lines to override; use 0 for unlimited. You can also pass timeout_ms (default 30000, max 600000).", 110 parameters: { 111 type: "object", 112 properties: { 113 command: { type: "string" }, 114 max_lines: { 115 type: "integer", 116 description: 117 "Maximum lines to return. Defaults to 150 (40 for verbose commands like apt/pip). Use 0 for unlimited.", 118 }, 119 timeout_ms: { 120 type: "integer", 121 description: "Execution timeout in milliseconds. Defaults to 30000. Max 600000.", 122 }, 123 }, 124 required: ["command"], 125 }, 126 }, 127 }, 128 { 129 type: "function", 130 function: { 131 name: "read_file", 132 description: 133 "Read a file from your Linux environment with optional line-range selection. More token-efficient than shell+cat for large files. Returns content with a header showing the line range and total line count. Supports timeout_ms (default 120000, max 600000).", 134 parameters: { 135 type: "object", 136 properties: { 137 path: { type: "string", description: "Absolute or relative path to the file." }, 138 start_line: { 139 type: "integer", 140 description: "First line to read (1-indexed). Defaults to 1.", 141 }, 142 end_line: { 143 type: "integer", 144 description: "Last line to read (inclusive). Defaults to start_line + 99.", 145 }, 146 timeout_ms: { 147 type: "integer", 148 description: "Read timeout in milliseconds. Defaults to 120000. Max 600000.", 149 }, 150 }, 151 required: ["path"], 152 }, 153 }, 154 }, 155 { 156 type: "function", 157 function: { 158 name: "edit_file", 159 description: 160 "Edit a file by replacing an exact snippet of text. old_text must match exactly once in the file — precise, safe, and no shell-escaping headaches. Use read_file first if you need to confirm the exact text. Supports timeout_ms (default 120000, max 600000).", 161 parameters: { 162 type: "object", 163 properties: { 164 path: { type: "string", description: "Absolute or relative path to the file." }, 165 old_text: { 166 type: "string", 167 description: "The exact text to find and replace. Must appear exactly once in the file.", 168 }, 169 new_text: { 170 type: "string", 171 description: "Replacement text. May be empty to delete old_text.", 172 }, 173 timeout_ms: { 174 type: "integer", 175 description: "Edit timeout in milliseconds. Defaults to 120000. Max 600000.", 176 }, 177 }, 178 required: ["path", "old_text", "new_text"], 179 }, 180 }, 181 }, 182 { 183 type: "function", 184 function: { 185 name: "image_tool", 186 description: 187 `Attach an image from ${IMAGE_ROOT_HINT} so it is injected as a multimodal user message on the next model turn. Use this after creating/downloading an image with shell.`, 188 parameters: { 189 type: "object", 190 properties: { 191 path: { 192 type: "string", 193 description: `Absolute image path inside ${IMAGE_ROOT_HINT} (for example ${IMAGE_ROOT_HINT}/screenshot.png).`, 194 }, 195 note: { 196 type: "string", 197 description: "Optional text instruction to accompany the image for the next turn.", 198 }, 199 detail: { 200 type: "string", 201 enum: ["auto", "low", "high"], 202 description: "Vision detail level for the next turn image input.", 203 }, 204 timeout_ms: { 205 type: "integer", 206 description: "Read timeout in milliseconds. Defaults to 120000. Max 600000.", 207 }, 208 }, 209 required: ["path"], 210 }, 211 }, 212 }, 213 { 214 type: "function", 215 function: { 216 name: "discord_scan", 217 description: 218 "Scan configured Discord channels and ingest messages into the local Discord inbox database. Uses DISCORD_SCAN_CHANNEL_IDS by default; pass channel_ids to override.", 219 parameters: { 220 type: "object", 221 properties: { 222 limit: { 223 type: "integer", 224 description: "Per-channel message fetch limit (default 50, max 100).", 225 }, 226 channel_ids: { 227 type: "array", 228 items: { type: "string" }, 229 description: "Optional channel id list to scan instead of DISCORD_SCAN_CHANNEL_IDS.", 230 }, 231 before_message_id: { 232 type: "string", 233 description: "Optional message id cursor for older backfill scans.", 234 }, 235 }, 236 }, 237 }, 238 }, 239 { 240 type: "function", 241 function: { 242 name: "discord_inbox", 243 description: 244 "List Discord inbox items tracked in local state. Default status filter is pending; optionally include seen/acted/ignored.", 245 parameters: { 246 type: "object", 247 properties: { 248 limit: { 249 type: "integer", 250 description: "Maximum rows to return (default 20, max 200).", 251 }, 252 status: { 253 type: "string", 254 description: "Comma-separated statuses: pending,seen,acted,ignored. Defaults to pending.", 255 }, 256 }, 257 }, 258 }, 259 }, 260 { 261 type: "function", 262 function: { 263 name: "discord_backread", 264 description: 265 "Read stored Discord message history for a channel from local state, newest first.", 266 parameters: { 267 type: "object", 268 properties: { 269 channel_id: { type: "string", description: "Discord channel id." }, 270 limit: { 271 type: "integer", 272 description: "Maximum rows to return (default 40, max 200).", 273 }, 274 before_message_id: { 275 type: "string", 276 description: "Optional cursor message id to fetch older rows.", 277 }, 278 }, 279 required: ["channel_id"], 280 }, 281 }, 282 }, 283 { 284 type: "function", 285 function: { 286 name: "discord_mark", 287 description: 288 "Set decision state for a Discord inbox item so future scans remember handled/ignored choices.", 289 parameters: { 290 type: "object", 291 properties: { 292 item_id: { type: "string", description: "Inbox item id (usually message id)." }, 293 status: { 294 type: "string", 295 enum: ["pending", "seen", "acted", "ignored"], 296 }, 297 action: { 298 type: "string", 299 enum: ["none", "replied", "messaged", "dismissed", "noted"], 300 }, 301 note: { 302 type: "string", 303 description: "Optional decision note.", 304 }, 305 }, 306 required: ["item_id", "status"], 307 }, 308 }, 309 }, 310 { 311 type: "function", 312 function: { 313 name: "discord_send", 314 description: 315 "Send a Discord message. reply_mode=auto sends plain unless conversation continuity is ambiguous, then it uses an explicit reply reference.", 316 parameters: { 317 type: "object", 318 properties: { 319 channel_id: { type: "string", description: "Target channel id." }, 320 content: { type: "string", description: "Message content to send." }, 321 source_item_id: { 322 type: "string", 323 description: "Optional inbox item id to mark as acted after sending.", 324 }, 325 reference_message_id: { 326 type: "string", 327 description: "Optional specific message id to treat as reply target.", 328 }, 329 reply_mode: { 330 type: "string", 331 enum: ["auto", "plain", "explicit"], 332 description: "Reply behavior policy (default auto).", 333 }, 334 }, 335 required: ["content"], 336 }, 337 }, 338 }, 339 { 340 type: "function", 341 function: { 342 name: "discord_channels", 343 description: 344 "List known Discord channels with id-to-name mapping, guild context, and optional channel notes.", 345 parameters: { 346 type: "object", 347 properties: { 348 include_unconfigured: { 349 type: "boolean", 350 description: "When true (default), include channels seen in history even if not in DISCORD_SCAN_CHANNEL_IDS.", 351 }, 352 }, 353 }, 354 }, 355 }, 356 { 357 type: "function", 358 function: { 359 name: "discord_channel_note", 360 description: 361 "Set or clear a persistent note for a Discord channel id. Pass empty note to clear.", 362 parameters: { 363 type: "object", 364 properties: { 365 channel_id: { type: "string", description: "Discord channel id to annotate." }, 366 note: { type: "string", description: "Channel-specific note text. Empty string clears it." }, 367 }, 368 required: ["channel_id", "note"], 369 }, 370 }, 371 }, 372 { 373 type: "function", 374 function: { 375 name: "wait", 376 description: "Pause and wait for the next incoming message or event. Use this when you've finished what you're doing and want to hear back before continuing.", 377 parameters: { 378 type: "object", 379 properties: {}, 380 }, 381 }, 382 }, 383 { 384 type: "function", 385 function: { 386 name: "rest", 387 description: "Go to sleep and end this session. Call this when you're truly done for now — conversation context will be cleared.", 388 parameters: { 389 type: "object", 390 properties: { 391 note: { 392 type: "string", 393 description: "Optional note to yourself about where you left off.", 394 }, 395 }, 396 }, 397 }, 398 }, 399] 400 401/** 402 * Persists the current message array as the resumable session snapshot. 403 * 404 * @param messages - Conversation messages to serialize. 405 */ 406export async function saveSession(messages: Message[]): Promise<void> { 407 await fs.writeFile(SESSION_FILE, JSON.stringify(messages), { encoding: "utf-8", mode: 0o666 }) 408} 409 410/** 411 * Deletes the persisted session snapshot if it exists. 412 */ 413export async function clearSession(): Promise<void> { 414 await fs.unlink(SESSION_FILE).catch(() => {}) 415} 416 417/** Move any mis-ordered tool responses back to immediately after their assistant message. */ 418function sanitizeMessages(msgs: Message[]): Message[] { 419 let i = 0 420 while (i < msgs.length) { 421 const msg = msgs[i] 422 if (msg.role === "assistant" && Array.isArray((msg as OpenAI.Chat.ChatCompletionMessage).tool_calls)) { 423 const toolCalls = (msg as OpenAI.Chat.ChatCompletionMessage).tool_calls! 424 const needed = new Set(toolCalls.map((tc) => tc.id)) 425 let j = i + 1 426 // Skip tool messages that are already in place 427 while (j < msgs.length && msgs[j].role === "tool" && needed.has((msgs[j] as OpenAI.Chat.ChatCompletionToolMessageParam).tool_call_id)) { 428 needed.delete((msgs[j] as OpenAI.Chat.ChatCompletionToolMessageParam).tool_call_id) 429 j++ 430 } 431 if (needed.size > 0) { 432 // Collect stray tool responses and non-tool messages from the rest of the array 433 const toolResponses: Message[] = [] 434 const others: Message[] = [] 435 for (let k = j; k < msgs.length; k++) { 436 const m = msgs[k] 437 if (m.role === "tool" && needed.has((m as OpenAI.Chat.ChatCompletionToolMessageParam).tool_call_id)) { 438 toolResponses.push(m) 439 needed.delete((m as OpenAI.Chat.ChatCompletionToolMessageParam).tool_call_id) 440 } else { 441 others.push(m) 442 } 443 if (needed.size === 0) { 444 const rest = msgs.slice(k + 1) 445 msgs = [...msgs.slice(0, j), ...toolResponses, ...others, ...rest] 446 console.log(`[runner] repaired orphaned tool_calls at message ${i}`) 447 break 448 } 449 } 450 } 451 } 452 i++ 453 } 454 return msgs 455} 456 457/** 458 * Loads and sanitizes the persisted session snapshot. 459 * 460 * @returns The recovered message list, or `null` when no session exists. 461 */ 462export async function loadSession(): Promise<Message[] | null> { 463 try { 464 const raw = await fs.readFile(SESSION_FILE, "utf-8") 465 let msgs = JSON.parse(raw) as Message[] 466 msgs = sanitizeMessages(msgs) 467 console.log(`[runner] found saved session (${msgs.length} messages)`) 468 return msgs 469 } catch { 470 return null 471 } 472} 473 474/** 475 * Determines whether an error should trigger fallback model routing. 476 * 477 * @param err - Error thrown by the primary API call. 478 * @returns `true` when fallback should be attempted. 479 */ 480export function shouldFallback(err: unknown): boolean { 481 if (err instanceof OpenAI.APIError) { 482 // 429 + 5xx = overloaded or down; 0/undefined = network-level failure 483 return !err.status || err.status === 429 || err.status >= 500 484 } 485 // Node fetch errors (ECONNREFUSED, ENOTFOUND, ETIMEDOUT…) 486 if (err instanceof Error) { 487 return /ECONNREFUSED|ENOTFOUND|ETIMEDOUT|ECONNRESET|fetch failed/i.test(err.message) 488 } 489 return false 490} 491 492/** 493 * Produces a concise, log-friendly error summary. 494 * 495 * @param err - Any thrown error-like value. 496 * @returns A compact human-readable error string. 497 */ 498export function errorSummary(err: unknown): string { 499 if (err instanceof OpenAI.APIError) return `${err.status} ${err.message}` 500 if (err instanceof Error) return err.message 501 return String(err) 502} 503 504function parseRetryAfterHeaderMs(value: string): number | null { 505 const asNumber = Number(value) 506 if (Number.isFinite(asNumber) && asNumber >= 0) return asNumber * 1000 507 508 const asDate = Date.parse(value) 509 if (Number.isFinite(asDate)) { 510 const delta = asDate - Date.now() 511 if (delta > 0) return delta 512 } 513 514 return null 515} 516 517function parseResetTimestampMs(message: string): number | null { 518 const resetAtMatch = message.match(/reset at\s+(\d{4}-\d{2}-\d{2})[ t](\d{2}:\d{2}:\d{2})/i) 519 if (!resetAtMatch) return null 520 521 const dateParts = resetAtMatch[1].split("-").map((part) => Number(part)) 522 const timeParts = resetAtMatch[2].split(":").map((part) => Number(part)) 523 if (dateParts.length !== 3 || timeParts.length !== 3) return null 524 525 const [year, month, day] = dateParts 526 const [hour, minute, second] = timeParts 527 const values = [year, month, day, hour, minute, second] 528 if (values.some((value) => !Number.isFinite(value))) return null 529 530 // z.ai returns "reset at YYYY-MM-DD HH:mm:ss" in China Standard Time (UTC+8). 531 // Convert that wall-clock value to UTC before calculating backoff. 532 const chinaOffsetHours = 8 533 const resetAtUtc = Date.UTC(year, month - 1, day, hour - chinaOffsetHours, minute, second) 534 if (!Number.isFinite(resetAtUtc)) return null 535 536 const delta = resetAtUtc - Date.now() 537 if (delta <= 0) return null 538 return delta 539} 540 541/** 542 * Computes retry backoff milliseconds from API error metadata/content. 543 * 544 * @param err - Error returned by the API layer. 545 * @returns Delay in milliseconds before retrying primary model calls. 546 */ 547export function retryDelayMs(err: unknown): number { 548 const defaultMs = 60_000 549 if (!(err instanceof OpenAI.APIError)) return defaultMs 550 551 const retryAfterHeader = err.headers?.["retry-after"] 552 if (retryAfterHeader) { 553 const parsed = parseRetryAfterHeaderMs(retryAfterHeader) 554 if (parsed != null) return parsed 555 } 556 557 const resetAt = parseResetTimestampMs(err.message) 558 if (resetAt != null) return resetAt 559 560 const forHours = err.message.match(/for\s+(\d+)\s*hour/i) 561 if (forHours) { 562 const hours = Number(forHours[1]) 563 if (Number.isFinite(hours) && hours > 0) return hours * 60 * 60 * 1000 564 } 565 566 return defaultMs 567} 568 569/** 570 * Coerces arbitrary values into a supported image detail level. 571 * 572 * @param value - Raw user/model-provided detail value. 573 * @returns A valid image detail enum (`auto` by default). 574 */ 575export function parseImageDetail(value: unknown): ImageDetail { 576 if (value === "low" || value === "high" || value === "auto") return value 577 return "auto" 578} 579 580function extractLeadingJsonObject(raw: string): string | null { 581 const start = raw.indexOf("{") 582 if (start === -1) return null 583 584 let depth = 0 585 let inString = false 586 let escaped = false 587 588 for (let i = start; i < raw.length; i++) { 589 const ch = raw[i] 590 591 if (inString) { 592 if (escaped) { 593 escaped = false 594 } else if (ch === "\\") { 595 escaped = true 596 } else if (ch === '"') { 597 inString = false 598 } 599 continue 600 } 601 602 if (ch === '"') { 603 inString = true 604 continue 605 } 606 607 if (ch === "{") { 608 depth++ 609 continue 610 } 611 612 if (ch === "}") { 613 depth-- 614 if (depth === 0) { 615 return raw.slice(start, i + 1) 616 } 617 continue 618 } 619 } 620 621 return null 622} 623 624function decodeHtmlEntities(input: string): string { 625 if (!input.includes("&")) return input 626 627 return input.replace(/&(gt|lt|amp|quot|#39|#x27|#x2f);/gi, (entity, key: string) => { 628 switch (key.toLowerCase()) { 629 case "gt": 630 return ">" 631 case "lt": 632 return "<" 633 case "amp": 634 return "&" 635 case "quot": 636 return '"' 637 case "#39": 638 case "#x27": 639 return "'" 640 case "#x2f": 641 return "/" 642 default: 643 return entity 644 } 645 }) 646} 647 648function decodeHtmlEntitiesDeep<T>(value: T): T { 649 if (typeof value === "string") return decodeHtmlEntities(value) as T 650 if (Array.isArray(value)) return value.map((item) => decodeHtmlEntitiesDeep(item)) as T 651 if (!value || typeof value !== "object") return value 652 653 const entries = Object.entries(value as Record<string, unknown>).map(([key, entryValue]) => [key, decodeHtmlEntitiesDeep(entryValue)]) 654 return Object.fromEntries(entries) as T 655} 656 657/** 658 * Parses tool arguments and applies robustness fixes for malformed model output. 659 * 660 * @param rawArgs - Raw `tool_call.function.arguments` value. 661 * @returns Parsed argument object or a structured parse error. 662 */ 663export function parseToolArguments(rawArgs: unknown): { ok: true; args: ToolArgs } | { ok: false; error: string } { 664 if (typeof rawArgs !== "string") { 665 return { ok: false, error: `arguments must be a JSON string, got ${typeof rawArgs}` } 666 } 667 668 const parseObject = (input: string): ToolArgs | null => { 669 const parsed = JSON.parse(input) 670 if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) return null 671 return decodeHtmlEntitiesDeep(parsed as ToolArgs) 672 } 673 674 const inputs = [rawArgs] 675 const decodedRawArgs = decodeHtmlEntities(rawArgs) 676 if (decodedRawArgs !== rawArgs) inputs.push(decodedRawArgs) 677 678 let lastError: unknown = null 679 for (const input of inputs) { 680 try { 681 const parsed = parseObject(input) 682 if (parsed) return { ok: true, args: parsed } 683 return { ok: false, error: "arguments must be a JSON object" } 684 } catch (err) { 685 lastError = err 686 const recovered = extractLeadingJsonObject(input) 687 if (!recovered) continue 688 try { 689 const parsed = parseObject(recovered) 690 if (parsed) return { ok: true, args: parsed } 691 } catch { 692 // no-op; fall through to structured error below 693 } 694 } 695 } 696 697 const message = lastError instanceof Error ? lastError.message : String(lastError) 698 const preview = rawArgs.length > 180 ? `${rawArgs.slice(0, 180)}...` : rawArgs 699 return { ok: false, error: `${message}; raw=${JSON.stringify(preview)}` } 700} 701 702const CONTEXT_SUMMARY_HEADER = "[context summary v1]" 703const CONTEXT_SUMMARY_NOTE = 704 "Compressed notes of older conversation turns. If anything conflicts, trust newer raw messages." 705const CONTEXT_SUMMARY_SEGMENTS_MARKER = "[segments]" 706const CONTEXT_SUMMARY_DELIMITER = "\n\n===\n\n" 707const SUMMARY_LINE_MAX_CHARS = 180 708const SUMMARY_LINE_DEFAULT_EMPTY = "(no text)" 709 710function asRecord(value: unknown): Record<string, unknown> | null { 711 return value && typeof value === "object" ? (value as Record<string, unknown>) : null 712} 713 714function messageRole(message: Message): string { 715 const record = asRecord(message) 716 return typeof record?.role === "string" ? record.role : "" 717} 718 719function messageStringContent(message: Message): string { 720 const record = asRecord(message) 721 const content = record?.content 722 if (typeof content === "string") return content 723 if (!Array.isArray(content)) return "" 724 725 const chunks: string[] = [] 726 for (const part of content) { 727 const partRecord = asRecord(part) 728 if (!partRecord) continue 729 if (partRecord.type === "text" && typeof partRecord.text === "string") { 730 chunks.push(partRecord.text) 731 continue 732 } 733 if (partRecord.type === "image_url") chunks.push("[image]") 734 } 735 736 return chunks.join(" ") 737} 738 739function normalizeSummaryText(value: string): string { 740 return value.replace(/\s+/g, " ").trim() 741} 742 743function truncateSummaryText(value: string, maxChars: number): string { 744 if (maxChars <= 0) return "" 745 if (value.length <= maxChars) return value 746 if (maxChars <= 3) return ".".repeat(maxChars) 747 return `${value.slice(0, maxChars - 3).trimEnd()}...` 748} 749 750function assistantToolNames(message: Message): string[] { 751 const record = asRecord(message) 752 const calls = record?.tool_calls 753 if (!Array.isArray(calls)) return [] 754 755 const names: string[] = [] 756 for (const call of calls) { 757 const callRecord = asRecord(call) 758 const fn = asRecord(callRecord?.function) 759 if (typeof fn?.name === "string" && fn.name.trim()) names.push(fn.name.trim()) 760 } 761 return names 762} 763 764function assistantToolCallIds(message: Message): Set<string> { 765 const ids = new Set<string>() 766 const record = asRecord(message) 767 const calls = record?.tool_calls 768 if (!Array.isArray(calls)) return ids 769 770 for (const call of calls) { 771 const callRecord = asRecord(call) 772 if (typeof callRecord?.id === "string" && callRecord.id.trim()) ids.add(callRecord.id.trim()) 773 } 774 return ids 775} 776 777function toolCallId(message: Message): string | null { 778 const record = asRecord(message) 779 return typeof record?.tool_call_id === "string" && record.tool_call_id.trim() ? record.tool_call_id.trim() : null 780} 781 782function summarizeMessageLine(message: Message): string { 783 const role = messageRole(message) 784 const content = truncateSummaryText(normalizeSummaryText(messageStringContent(message)), SUMMARY_LINE_MAX_CHARS) 785 const safeContent = content || SUMMARY_LINE_DEFAULT_EMPTY 786 787 if (role === "assistant") { 788 const toolNames = assistantToolNames(message) 789 if (toolNames.length > 0 && content) return `- assistant: ${safeContent} | tools: ${toolNames.join(", ")}` 790 if (toolNames.length > 0) return `- assistant: tools: ${toolNames.join(", ")}` 791 return `- assistant: ${safeContent}` 792 } 793 if (role === "tool") { 794 const id = toolCallId(message) ?? "unknown" 795 return `- tool(${id}): ${safeContent}` 796 } 797 if (role === "user") return `- user: ${safeContent}` 798 if (role === "system") return `- system: ${safeContent}` 799 return `- ${role || "message"}: ${safeContent}` 800} 801 802function buildCompactionSegment(messages: Message[]): string { 803 const lines = messages.map((message) => summarizeMessageLine(message)) 804 const summaryLines = lines.length > 0 ? lines.join("\n") : `- ${SUMMARY_LINE_DEFAULT_EMPTY}` 805 return `[${new Date().toISOString()}] compacted ${messages.length} messages\n${summaryLines}` 806} 807 808function buildSummaryMessageContent(segments: string[]): string { 809 const body = segments.join(CONTEXT_SUMMARY_DELIMITER) 810 return `${CONTEXT_SUMMARY_HEADER}\n${CONTEXT_SUMMARY_NOTE}\n${CONTEXT_SUMMARY_SEGMENTS_MARKER}\n${body}` 811} 812 813function parseSummarySegments(content: string): string[] { 814 if (!content.startsWith(CONTEXT_SUMMARY_HEADER)) return [] 815 816 const marker = `\n${CONTEXT_SUMMARY_SEGMENTS_MARKER}\n` 817 const markerIndex = content.indexOf(marker) 818 if (markerIndex === -1) return [] 819 820 const body = content.slice(markerIndex + marker.length).trim() 821 if (!body) return [] 822 823 return body 824 .split(CONTEXT_SUMMARY_DELIMITER) 825 .map((segment) => segment.trim()) 826 .filter((segment) => segment.length > 0) 827} 828 829function trimSummarySegments(segments: string[], maxChars: number): string[] { 830 const safeMaxChars = Math.max(1024, maxChars) 831 const next = [...segments] 832 833 while (next.length > 1 && buildSummaryMessageContent(next).length > safeMaxChars) { 834 next.shift() 835 } 836 837 if (next.length === 0) return next 838 839 const current = buildSummaryMessageContent(next) 840 if (current.length <= safeMaxChars) return next 841 842 const fixedPrefix = buildSummaryMessageContent([]).length 843 const available = Math.max(0, safeMaxChars - fixedPrefix) 844 next[0] = truncateSummaryText(next[0]!, available) 845 return next 846} 847 848function countLeadingSystemMessages(messages: Message[]): number { 849 let count = 0 850 while (count < messages.length && messageRole(messages[count]!) === "system") count++ 851 return count 852} 853 854function findSummaryMessageIndex(messages: Message[]): number { 855 return messages.findIndex((message) => { 856 const content = messageStringContent(message) 857 return content.startsWith(CONTEXT_SUMMARY_HEADER) 858 }) 859} 860 861/** 862 * Very rough tokenizer-agnostic estimate for prompt size guardrails. 863 * 864 * Includes both messages and tool schema to mirror completion request payload. 865 */ 866export function estimatePromptTokens(messages: Message[]): number { 867 const jsonChars = JSON.stringify({ messages, tools: TOOLS }).length 868 return Math.ceil(jsonChars / 4) 869} 870 871export type ContextCompactionResult = { 872 compacted: boolean 873 messages: Message[] 874 estimateBefore: number 875 estimateAfter: number 876 messagesRemoved: number 877 chunks: number 878} 879 880function normalizedObservedPromptTokens(value: number | undefined): number | null { 881 if (!Number.isFinite(value)) return null 882 const tokens = Math.ceil(value as number) 883 return tokens > 0 ? tokens : null 884} 885 886/** 887 * Applies rolling context compaction when estimated prompt size exceeds threshold. 888 * 889 * Keeps leading bootstrap system messages and recent raw turns, while replacing 890 * older slices with a durable summary message. 891 */ 892export function maybeCompactConversation(messages: Message[], observedPromptTokens?: number): ContextCompactionResult { 893 const estimateBefore = estimatePromptTokens(messages) 894 const observedBefore = normalizedObservedPromptTokens(observedPromptTokens) 895 // Calibrate the rough chars/4 heuristic with real API prompt usage when available. 896 const estimateScale = observedBefore ? Math.max(1, observedBefore / Math.max(1, estimateBefore)) : 1 897 const effectiveBefore = Math.ceil(estimateBefore * estimateScale) 898 899 if (effectiveBefore < CONTEXT_COMPACT_TRIGGER_TOKENS) { 900 return { 901 compacted: false, 902 messages, 903 estimateBefore: effectiveBefore, 904 estimateAfter: effectiveBefore, 905 messagesRemoved: 0, 906 chunks: 0, 907 } 908 } 909 910 const chunkSize = Math.max(1, CONTEXT_COMPACT_CHUNK_MESSAGES) 911 const minRecentMessages = Math.max(1, CONTEXT_COMPACT_RECENT_MESSAGES) 912 let next = [...messages] 913 let summaryIndex = findSummaryMessageIndex(next) 914 let summaryInserted = false 915 let summarySegments: string[] = [] 916 917 if (summaryIndex >= 0) { 918 const existingContent = messageStringContent(next[summaryIndex]!) 919 summarySegments = parseSummarySegments(existingContent) 920 } else { 921 const baseLayerEnd = (() => { 922 const leadingSystems = countLeadingSystemMessages(next) 923 if (leadingSystems > 0) return leadingSystems 924 return next.length > 0 ? 1 : 0 925 })() 926 summaryIndex = Math.min(baseLayerEnd, next.length) 927 next.splice(summaryIndex, 0, { 928 role: "user", 929 content: buildSummaryMessageContent([]), 930 }) 931 summaryInserted = true 932 } 933 934 let estimateAfter = estimatePromptTokens(next) 935 let effectiveAfter = Math.ceil(estimateAfter * estimateScale) 936 let messagesRemoved = 0 937 let chunks = 0 938 939 while (effectiveAfter > CONTEXT_COMPACT_TARGET_TOKENS) { 940 const compactStart = summaryIndex + 1 941 const protectedTailStart = Math.max(compactStart, next.length - minRecentMessages) 942 if (protectedTailStart <= compactStart) break 943 944 let compactEnd = Math.min(protectedTailStart, compactStart + chunkSize) 945 if (compactEnd <= compactStart) break 946 947 while (compactEnd < protectedTailStart && messageRole(next[compactEnd]!) === "tool") { 948 compactEnd++ 949 } 950 951 const unresolvedToolCalls = assistantToolCallIds(next[compactEnd - 1]!) 952 if (unresolvedToolCalls.size > 0) { 953 let scan = compactEnd 954 while (scan < protectedTailStart && messageRole(next[scan]!) === "tool") { 955 const id = toolCallId(next[scan]!) 956 if (id) unresolvedToolCalls.delete(id) 957 scan++ 958 if (unresolvedToolCalls.size === 0) break 959 } 960 compactEnd = scan 961 } 962 963 while (compactEnd < protectedTailStart && messageRole(next[compactEnd]!) === "tool") { 964 compactEnd++ 965 } 966 967 if (compactEnd <= compactStart) break 968 969 const removed = next.slice(compactStart, compactEnd) 970 if (removed.length === 0) break 971 972 summarySegments.push(buildCompactionSegment(removed)) 973 summarySegments = trimSummarySegments(summarySegments, CONTEXT_COMPACT_SUMMARY_MAX_CHARS) 974 975 next[summaryIndex] = { 976 role: "user", 977 content: buildSummaryMessageContent(summarySegments), 978 } 979 next.splice(compactStart, removed.length) 980 981 messagesRemoved += removed.length 982 chunks += 1 983 estimateAfter = estimatePromptTokens(next) 984 effectiveAfter = Math.ceil(estimateAfter * estimateScale) 985 } 986 987 if (messagesRemoved === 0 && summaryInserted) { 988 next.splice(summaryIndex, 1) 989 estimateAfter = estimatePromptTokens(next) 990 effectiveAfter = Math.ceil(estimateAfter * estimateScale) 991 } 992 993 return { 994 compacted: messagesRemoved > 0, 995 messages: next, 996 estimateBefore: effectiveBefore, 997 estimateAfter: effectiveAfter, 998 messagesRemoved, 999 chunks, 1000 } 1001} 1002 1003/** 1004 * Estimates fallback context pressure and guardrails for current messages. 1005 * 1006 * @param messages - Current conversation history used for the next request. 1007 * @returns Token estimate plus soft/hard fallback limits. 1008 */ 1009export function fallbackContextWindow(messages: Message[]): { 1010 estimate: number 1011 nearLimit: boolean 1012 skip: boolean 1013 softLimit: number 1014 hardLimit: number 1015} { 1016 const estimate = estimatePromptTokens(messages) 1017 1018 if (!FALLBACK_ENFORCE_CONTEXT_LIMIT) { 1019 return { 1020 estimate, 1021 nearLimit: false, 1022 skip: false, 1023 softLimit: Number.POSITIVE_INFINITY, 1024 hardLimit: Number.POSITIVE_INFINITY, 1025 } 1026 } 1027 1028 // softLimit: where we start warning. hardLimit: where we stop trying fallback at all. 1029 const softLimit = Math.max(0, FALLBACK_N_CTX - FALLBACK_CONTEXT_MARGIN) 1030 const hardLimit = FALLBACK_N_CTX + Math.max(0, FALLBACK_HARD_OVERFLOW_TOKENS) 1031 1032 return { 1033 estimate, 1034 nearLimit: estimate >= softLimit, 1035 skip: estimate >= hardLimit, 1036 softLimit, 1037 hardLimit, 1038 } 1039}