src/runner/util.ts at master · null.namespaces.me/niri

null.namespaces.me / niri
fork
my harness for niri
fork
niri / src / runner / util.ts
at master 1039 lines 36 kB view raw
wrap content
nekomimi.pet i think code is clean enough atm 3w ago
53c62494
   1import fs from "fs/promises"
   2import path from "path"
   3import { fileURLToPath } from "url"
   4import OpenAI from "openai"
   5import { imageRootForModelInput } from "../container/index.js"
   6import type { Message } from "../types.js"
   7import type { ImageDetail, ToolArgs } from "./types.js"
   8
   9const PROJECT_ROOT = path.resolve(fileURLToPath(import.meta.url), "../../..")
  10const SESSION_FILE = path.join(PROJECT_ROOT, "session.json")
  11
  12export const TOKEN_NUDGE_THRESHOLD = parseInt(process.env.TOKEN_NUDGE_THRESHOLD ?? "120000")
  13export const FALLBACK_TOKEN_NUDGE_THRESHOLD = parseInt(process.env.FALLBACK_TOKEN_NUDGE_THRESHOLD ?? "50000")
  14export const CONTEXT_COMPACT_TARGET_TOKENS = parseInt(process.env.CONTEXT_COMPACT_TARGET_TOKENS ?? "65000")
  15export const CONTEXT_COMPACT_TRIGGER_TOKENS = parseInt(process.env.CONTEXT_COMPACT_TRIGGER_TOKENS ?? "90000")
  16export const CONTEXT_COMPACT_RECENT_MESSAGES = parseInt(process.env.CONTEXT_COMPACT_RECENT_MESSAGES ?? "80")
  17export const CONTEXT_COMPACT_CHUNK_MESSAGES = parseInt(process.env.CONTEXT_COMPACT_CHUNK_MESSAGES ?? "32")
  18export const CONTEXT_COMPACT_SUMMARY_MAX_CHARS = parseInt(process.env.CONTEXT_COMPACT_SUMMARY_MAX_CHARS ?? "16000")
  19
  20const NIRI_ENV = (process.env.NIRI_ENV ?? "default").trim().toLowerCase()
  21export const USE_FALLBACK = NIRI_ENV === "local"
  22
  23export const API_BASE = process.env.OPENAI_BASE_URL ?? "https://api.openai.com/v1"
  24export const MODEL = process.env.MODEL ?? ""
  25const DEFAULT_FALLBACK_BASE = "http://localhost:1234/v1"
  26const isLikelyLocalBase = (baseUrl: string): boolean => {
  27  const lowered = baseUrl.trim().toLowerCase()
  28  return lowered.includes("localhost") || lowered.includes("127.0.0.1")
  29}
  30const parseBooleanEnv = (value: string | undefined, fallback: boolean): boolean => {
  31  if (typeof value !== "string") return fallback
  32  const normalized = value.trim().toLowerCase()
  33  if (!normalized) return fallback
  34  if (normalized === "true" || normalized === "1" || normalized === "yes" || normalized === "on") return true
  35  if (normalized === "false" || normalized === "0" || normalized === "no" || normalized === "off") return false
  36  return fallback
  37}
  38const parseToolChoiceEnv = (value: string | undefined, fallback: "required" | "auto" | "none"): "required" | "auto" | "none" => {
  39  if (typeof value !== "string") return fallback
  40  const normalized = value.trim().toLowerCase()
  41  if (normalized === "required" || normalized === "auto" || normalized === "none") return normalized
  42  return fallback
  43}
  44
  45export const FALLBACK_BASE =
  46  process.env.FALLBACK_OPENAI_BASE_URL ?? process.env.OPENROUTER_BASE_URL ?? process.env.LMSTUDIO_BASE_URL ?? DEFAULT_FALLBACK_BASE
  47export const FALLBACK_MODEL =
  48  process.env.FALLBACK_MODEL ?? process.env.OPENROUTER_MODEL ?? process.env.LMSTUDIO_MODEL ?? "zai-org/glm-4.7-flash"
  49export const FALLBACK_TOOL_CHOICE = parseToolChoiceEnv(process.env.FALLBACK_TOOL_CHOICE, "required")
  50const FALLBACK_N_CTX = parseInt(process.env.FALLBACK_N_CTX ?? process.env.LMSTUDIO_N_CTX ?? "4096")
  51const FALLBACK_CONTEXT_MARGIN = parseInt(process.env.FALLBACK_CONTEXT_MARGIN ?? process.env.LMSTUDIO_CONTEXT_MARGIN ?? "256")
  52const FALLBACK_HARD_OVERFLOW_TOKENS = parseInt(
  53  process.env.FALLBACK_HARD_OVERFLOW_TOKENS ?? process.env.LMSTUDIO_HARD_OVERFLOW_TOKENS ?? "1024",
  54)
  55const FALLBACK_ENFORCE_CONTEXT_LIMIT = parseBooleanEnv(
  56  process.env.FALLBACK_ENFORCE_CONTEXT_LIMIT,
  57  isLikelyLocalBase(FALLBACK_BASE),
  58)
  59
  60const fallbackApiKey =
  61  process.env.FALLBACK_OPENAI_API_KEY ??
  62  process.env.OPENROUTER_API_KEY ??
  63  process.env.LMSTUDIO_API_KEY ??
  64  process.env.OPENAI_API_KEY ??
  65  (isLikelyLocalBase(FALLBACK_BASE) ? "lm-studio" : "")
  66const fallbackHeaders: Record<string, string> = {}
  67if (process.env.FALLBACK_OPENAI_REFERER) fallbackHeaders["HTTP-Referer"] = process.env.FALLBACK_OPENAI_REFERER
  68if (process.env.FALLBACK_OPENAI_TITLE) fallbackHeaders["X-Title"] = process.env.FALLBACK_OPENAI_TITLE
  69
  70if (!USE_FALLBACK && !MODEL) {
  71  throw new Error("MODEL is required unless fallback is forced (NIRI_ENV=local).")
  72}
  73
  74if (!USE_FALLBACK && !process.env.OPENAI_API_KEY) {
  75  throw new Error("OPENAI_API_KEY is required unless fallback is forced (NIRI_ENV=local).")
  76}
  77
  78if (USE_FALLBACK && !fallbackApiKey) {
  79  throw new Error(
  80    "Fallback API key is required in local mode. Set FALLBACK_OPENAI_API_KEY (or OPENROUTER_API_KEY / LMSTUDIO_API_KEY).",
  81  )
  82}
  83
  84export const client = USE_FALLBACK
  85  ? null
  86  : new OpenAI({
  87      baseURL: API_BASE,
  88      apiKey: process.env.OPENAI_API_KEY!,
  89    })
  90
  91export const fallbackClient = new OpenAI({
  92  baseURL: FALLBACK_BASE,
  93  apiKey: fallbackApiKey || "lm-studio", // Keep LM Studio default when running against localhost.
  94  defaultHeaders: Object.keys(fallbackHeaders).length ? fallbackHeaders : undefined,
  95})
  96
  97console.log(`[config] primary=${MODEL} @ ${API_BASE}`)
  98console.log(`[config] fallback=${FALLBACK_MODEL} @ ${FALLBACK_BASE}`)
  99console.log(`[config] env=${NIRI_ENV} use_fallback=${USE_FALLBACK}`)
 100
 101const IMAGE_ROOT_HINT = imageRootForModelInput()
 102
 103export const TOOLS: OpenAI.Chat.ChatCompletionTool[] = [
 104  {
 105    type: "function",
 106    function: {
 107      name: "shell",
 108      description:
 109        "Execute a bash command in your Linux environment. Stateful — cd, env vars, etc. persist. Output is automatically capped (default 150 lines, 40 for known-verbose commands like apt/pip/npm). Pass max_lines to override; use 0 for unlimited. You can also pass timeout_ms (default 30000, max 600000).",
 110      parameters: {
 111        type: "object",
 112        properties: {
 113          command: { type: "string" },
 114          max_lines: {
 115            type: "integer",
 116            description:
 117              "Maximum lines to return. Defaults to 150 (40 for verbose commands like apt/pip). Use 0 for unlimited.",
 118          },
 119          timeout_ms: {
 120            type: "integer",
 121            description: "Execution timeout in milliseconds. Defaults to 30000. Max 600000.",
 122          },
 123        },
 124        required: ["command"],
 125      },
 126    },
 127  },
 128  {
 129    type: "function",
 130    function: {
 131      name: "read_file",
 132      description:
 133        "Read a file from your Linux environment with optional line-range selection. More token-efficient than shell+cat for large files. Returns content with a header showing the line range and total line count. Supports timeout_ms (default 120000, max 600000).",
 134      parameters: {
 135        type: "object",
 136        properties: {
 137          path: { type: "string", description: "Absolute or relative path to the file." },
 138          start_line: {
 139            type: "integer",
 140            description: "First line to read (1-indexed). Defaults to 1.",
 141          },
 142          end_line: {
 143            type: "integer",
 144            description: "Last line to read (inclusive). Defaults to start_line + 99.",
 145          },
 146          timeout_ms: {
 147            type: "integer",
 148            description: "Read timeout in milliseconds. Defaults to 120000. Max 600000.",
 149          },
 150        },
 151        required: ["path"],
 152      },
 153    },
 154  },
 155  {
 156    type: "function",
 157    function: {
 158      name: "edit_file",
 159      description:
 160        "Edit a file by replacing an exact snippet of text. old_text must match exactly once in the file — precise, safe, and no shell-escaping headaches. Use read_file first if you need to confirm the exact text. Supports timeout_ms (default 120000, max 600000).",
 161      parameters: {
 162        type: "object",
 163        properties: {
 164          path: { type: "string", description: "Absolute or relative path to the file." },
 165          old_text: {
 166            type: "string",
 167            description: "The exact text to find and replace. Must appear exactly once in the file.",
 168          },
 169          new_text: {
 170            type: "string",
 171            description: "Replacement text. May be empty to delete old_text.",
 172          },
 173          timeout_ms: {
 174            type: "integer",
 175            description: "Edit timeout in milliseconds. Defaults to 120000. Max 600000.",
 176          },
 177        },
 178        required: ["path", "old_text", "new_text"],
 179      },
 180    },
 181  },
 182  {
 183    type: "function",
 184    function: {
 185      name: "image_tool",
 186      description:
 187        `Attach an image from ${IMAGE_ROOT_HINT} so it is injected as a multimodal user message on the next model turn. Use this after creating/downloading an image with shell.`,
 188      parameters: {
 189        type: "object",
 190        properties: {
 191          path: {
 192            type: "string",
 193            description: `Absolute image path inside ${IMAGE_ROOT_HINT} (for example ${IMAGE_ROOT_HINT}/screenshot.png).`,
 194          },
 195          note: {
 196            type: "string",
 197            description: "Optional text instruction to accompany the image for the next turn.",
 198          },
 199          detail: {
 200            type: "string",
 201            enum: ["auto", "low", "high"],
 202            description: "Vision detail level for the next turn image input.",
 203          },
 204          timeout_ms: {
 205            type: "integer",
 206            description: "Read timeout in milliseconds. Defaults to 120000. Max 600000.",
 207          },
 208        },
 209        required: ["path"],
 210      },
 211    },
 212  },
 213  {
 214    type: "function",
 215    function: {
 216      name: "discord_scan",
 217      description:
 218        "Scan configured Discord channels and ingest messages into the local Discord inbox database. Uses DISCORD_SCAN_CHANNEL_IDS by default; pass channel_ids to override.",
 219      parameters: {
 220        type: "object",
 221        properties: {
 222          limit: {
 223            type: "integer",
 224            description: "Per-channel message fetch limit (default 50, max 100).",
 225          },
 226          channel_ids: {
 227            type: "array",
 228            items: { type: "string" },
 229            description: "Optional channel id list to scan instead of DISCORD_SCAN_CHANNEL_IDS.",
 230          },
 231          before_message_id: {
 232            type: "string",
 233            description: "Optional message id cursor for older backfill scans.",
 234          },
 235        },
 236      },
 237    },
 238  },
 239  {
 240    type: "function",
 241    function: {
 242      name: "discord_inbox",
 243      description:
 244        "List Discord inbox items tracked in local state. Default status filter is pending; optionally include seen/acted/ignored.",
 245      parameters: {
 246        type: "object",
 247        properties: {
 248          limit: {
 249            type: "integer",
 250            description: "Maximum rows to return (default 20, max 200).",
 251          },
 252          status: {
 253            type: "string",
 254            description: "Comma-separated statuses: pending,seen,acted,ignored. Defaults to pending.",
 255          },
 256        },
 257      },
 258    },
 259  },
 260  {
 261    type: "function",
 262    function: {
 263      name: "discord_backread",
 264      description:
 265        "Read stored Discord message history for a channel from local state, newest first.",
 266      parameters: {
 267        type: "object",
 268        properties: {
 269          channel_id: { type: "string", description: "Discord channel id." },
 270          limit: {
 271            type: "integer",
 272            description: "Maximum rows to return (default 40, max 200).",
 273          },
 274          before_message_id: {
 275            type: "string",
 276            description: "Optional cursor message id to fetch older rows.",
 277          },
 278        },
 279        required: ["channel_id"],
 280      },
 281    },
 282  },
 283  {
 284    type: "function",
 285    function: {
 286      name: "discord_mark",
 287      description:
 288        "Set decision state for a Discord inbox item so future scans remember handled/ignored choices.",
 289      parameters: {
 290        type: "object",
 291        properties: {
 292          item_id: { type: "string", description: "Inbox item id (usually message id)." },
 293          status: {
 294            type: "string",
 295            enum: ["pending", "seen", "acted", "ignored"],
 296          },
 297          action: {
 298            type: "string",
 299            enum: ["none", "replied", "messaged", "dismissed", "noted"],
 300          },
 301          note: {
 302            type: "string",
 303            description: "Optional decision note.",
 304          },
 305        },
 306        required: ["item_id", "status"],
 307      },
 308    },
 309  },
 310  {
 311    type: "function",
 312    function: {
 313      name: "discord_send",
 314      description:
 315        "Send a Discord message. reply_mode=auto sends plain unless conversation continuity is ambiguous, then it uses an explicit reply reference.",
 316      parameters: {
 317        type: "object",
 318        properties: {
 319          channel_id: { type: "string", description: "Target channel id." },
 320          content: { type: "string", description: "Message content to send." },
 321          source_item_id: {
 322            type: "string",
 323            description: "Optional inbox item id to mark as acted after sending.",
 324          },
 325          reference_message_id: {
 326            type: "string",
 327            description: "Optional specific message id to treat as reply target.",
 328          },
 329          reply_mode: {
 330            type: "string",
 331            enum: ["auto", "plain", "explicit"],
 332            description: "Reply behavior policy (default auto).",
 333          },
 334        },
 335        required: ["content"],
 336      },
 337    },
 338  },
 339  {
 340    type: "function",
 341    function: {
 342      name: "discord_channels",
 343      description:
 344        "List known Discord channels with id-to-name mapping, guild context, and optional channel notes.",
 345      parameters: {
 346        type: "object",
 347        properties: {
 348          include_unconfigured: {
 349            type: "boolean",
 350            description: "When true (default), include channels seen in history even if not in DISCORD_SCAN_CHANNEL_IDS.",
 351          },
 352        },
 353      },
 354    },
 355  },
 356  {
 357    type: "function",
 358    function: {
 359      name: "discord_channel_note",
 360      description:
 361        "Set or clear a persistent note for a Discord channel id. Pass empty note to clear.",
 362      parameters: {
 363        type: "object",
 364        properties: {
 365          channel_id: { type: "string", description: "Discord channel id to annotate." },
 366          note: { type: "string", description: "Channel-specific note text. Empty string clears it." },
 367        },
 368        required: ["channel_id", "note"],
 369      },
 370    },
 371  },
 372  {
 373    type: "function",
 374    function: {
 375      name: "wait",
 376      description: "Pause and wait for the next incoming message or event. Use this when you've finished what you're doing and want to hear back before continuing.",
 377      parameters: {
 378        type: "object",
 379        properties: {},
 380      },
 381    },
 382  },
 383  {
 384    type: "function",
 385    function: {
 386      name: "rest",
 387      description: "Go to sleep and end this session. Call this when you're truly done for now — conversation context will be cleared.",
 388      parameters: {
 389        type: "object",
 390        properties: {
 391          note: {
 392            type: "string",
 393            description: "Optional note to yourself about where you left off.",
 394          },
 395        },
 396      },
 397    },
 398  },
 399]
 400
 401/**
 402 * Persists the current message array as the resumable session snapshot.
 403 *
 404 * @param messages - Conversation messages to serialize.
 405 */
 406export async function saveSession(messages: Message[]): Promise<void> {
 407  await fs.writeFile(SESSION_FILE, JSON.stringify(messages), { encoding: "utf-8", mode: 0o666 })
 408}
 409
 410/**
 411 * Deletes the persisted session snapshot if it exists.
 412 */
 413export async function clearSession(): Promise<void> {
 414  await fs.unlink(SESSION_FILE).catch(() => {})
 415}
 416
 417/** Move any mis-ordered tool responses back to immediately after their assistant message. */
 418function sanitizeMessages(msgs: Message[]): Message[] {
 419  let i = 0
 420  while (i < msgs.length) {
 421    const msg = msgs[i]
 422    if (msg.role === "assistant" && Array.isArray((msg as OpenAI.Chat.ChatCompletionMessage).tool_calls)) {
 423      const toolCalls = (msg as OpenAI.Chat.ChatCompletionMessage).tool_calls!
 424      const needed = new Set(toolCalls.map((tc) => tc.id))
 425      let j = i + 1
 426      // Skip tool messages that are already in place
 427      while (j < msgs.length && msgs[j].role === "tool" && needed.has((msgs[j] as OpenAI.Chat.ChatCompletionToolMessageParam).tool_call_id)) {
 428        needed.delete((msgs[j] as OpenAI.Chat.ChatCompletionToolMessageParam).tool_call_id)
 429        j++
 430      }
 431      if (needed.size > 0) {
 432        // Collect stray tool responses and non-tool messages from the rest of the array
 433        const toolResponses: Message[] = []
 434        const others: Message[] = []
 435        for (let k = j; k < msgs.length; k++) {
 436          const m = msgs[k]
 437          if (m.role === "tool" && needed.has((m as OpenAI.Chat.ChatCompletionToolMessageParam).tool_call_id)) {
 438            toolResponses.push(m)
 439            needed.delete((m as OpenAI.Chat.ChatCompletionToolMessageParam).tool_call_id)
 440          } else {
 441            others.push(m)
 442          }
 443          if (needed.size === 0) {
 444            const rest = msgs.slice(k + 1)
 445            msgs = [...msgs.slice(0, j), ...toolResponses, ...others, ...rest]
 446            console.log(`[runner] repaired orphaned tool_calls at message ${i}`)
 447            break
 448          }
 449        }
 450      }
 451    }
 452    i++
 453  }
 454  return msgs
 455}
 456
 457/**
 458 * Loads and sanitizes the persisted session snapshot.
 459 *
 460 * @returns The recovered message list, or `null` when no session exists.
 461 */
 462export async function loadSession(): Promise<Message[] | null> {
 463  try {
 464    const raw = await fs.readFile(SESSION_FILE, "utf-8")
 465    let msgs = JSON.parse(raw) as Message[]
 466    msgs = sanitizeMessages(msgs)
 467    console.log(`[runner] found saved session (${msgs.length} messages)`)
 468    return msgs
 469  } catch {
 470    return null
 471  }
 472}
 473
 474/**
 475 * Determines whether an error should trigger fallback model routing.
 476 *
 477 * @param err - Error thrown by the primary API call.
 478 * @returns `true` when fallback should be attempted.
 479 */
 480export function shouldFallback(err: unknown): boolean {
 481  if (err instanceof OpenAI.APIError) {
 482    // 429 + 5xx = overloaded or down; 0/undefined = network-level failure
 483    return !err.status || err.status === 429 || err.status >= 500
 484  }
 485  // Node fetch errors (ECONNREFUSED, ENOTFOUND, ETIMEDOUT…)
 486  if (err instanceof Error) {
 487    return /ECONNREFUSED|ENOTFOUND|ETIMEDOUT|ECONNRESET|fetch failed/i.test(err.message)
 488  }
 489  return false
 490}
 491
 492/**
 493 * Produces a concise, log-friendly error summary.
 494 *
 495 * @param err - Any thrown error-like value.
 496 * @returns A compact human-readable error string.
 497 */
 498export function errorSummary(err: unknown): string {
 499  if (err instanceof OpenAI.APIError) return `${err.status} ${err.message}`
 500  if (err instanceof Error) return err.message
 501  return String(err)
 502}
 503
 504function parseRetryAfterHeaderMs(value: string): number | null {
 505  const asNumber = Number(value)
 506  if (Number.isFinite(asNumber) && asNumber >= 0) return asNumber * 1000
 507
 508  const asDate = Date.parse(value)
 509  if (Number.isFinite(asDate)) {
 510    const delta = asDate - Date.now()
 511    if (delta > 0) return delta
 512  }
 513
 514  return null
 515}
 516
 517function parseResetTimestampMs(message: string): number | null {
 518  const resetAtMatch = message.match(/reset at\s+(\d{4}-\d{2}-\d{2})[ t](\d{2}:\d{2}:\d{2})/i)
 519  if (!resetAtMatch) return null
 520
 521  const dateParts = resetAtMatch[1].split("-").map((part) => Number(part))
 522  const timeParts = resetAtMatch[2].split(":").map((part) => Number(part))
 523  if (dateParts.length !== 3 || timeParts.length !== 3) return null
 524
 525  const [year, month, day] = dateParts
 526  const [hour, minute, second] = timeParts
 527  const values = [year, month, day, hour, minute, second]
 528  if (values.some((value) => !Number.isFinite(value))) return null
 529
 530  // z.ai returns "reset at YYYY-MM-DD HH:mm:ss" in China Standard Time (UTC+8).
 531  // Convert that wall-clock value to UTC before calculating backoff.
 532  const chinaOffsetHours = 8
 533  const resetAtUtc = Date.UTC(year, month - 1, day, hour - chinaOffsetHours, minute, second)
 534  if (!Number.isFinite(resetAtUtc)) return null
 535
 536  const delta = resetAtUtc - Date.now()
 537  if (delta <= 0) return null
 538  return delta
 539}
 540
 541/**
 542 * Computes retry backoff milliseconds from API error metadata/content.
 543 *
 544 * @param err - Error returned by the API layer.
 545 * @returns Delay in milliseconds before retrying primary model calls.
 546 */
 547export function retryDelayMs(err: unknown): number {
 548  const defaultMs = 60_000
 549  if (!(err instanceof OpenAI.APIError)) return defaultMs
 550
 551  const retryAfterHeader = err.headers?.["retry-after"]
 552  if (retryAfterHeader) {
 553    const parsed = parseRetryAfterHeaderMs(retryAfterHeader)
 554    if (parsed != null) return parsed
 555  }
 556
 557  const resetAt = parseResetTimestampMs(err.message)
 558  if (resetAt != null) return resetAt
 559
 560  const forHours = err.message.match(/for\s+(\d+)\s*hour/i)
 561  if (forHours) {
 562    const hours = Number(forHours[1])
 563    if (Number.isFinite(hours) && hours > 0) return hours * 60 * 60 * 1000
 564  }
 565
 566  return defaultMs
 567}
 568
 569/**
 570 * Coerces arbitrary values into a supported image detail level.
 571 *
 572 * @param value - Raw user/model-provided detail value.
 573 * @returns A valid image detail enum (`auto` by default).
 574 */
 575export function parseImageDetail(value: unknown): ImageDetail {
 576  if (value === "low" || value === "high" || value === "auto") return value
 577  return "auto"
 578}
 579
 580function extractLeadingJsonObject(raw: string): string | null {
 581  const start = raw.indexOf("{")
 582  if (start === -1) return null
 583
 584  let depth = 0
 585  let inString = false
 586  let escaped = false
 587
 588  for (let i = start; i < raw.length; i++) {
 589    const ch = raw[i]
 590
 591    if (inString) {
 592      if (escaped) {
 593        escaped = false
 594      } else if (ch === "\\") {
 595        escaped = true
 596      } else if (ch === '"') {
 597        inString = false
 598      }
 599      continue
 600    }
 601
 602    if (ch === '"') {
 603      inString = true
 604      continue
 605    }
 606
 607    if (ch === "{") {
 608      depth++
 609      continue
 610    }
 611
 612    if (ch === "}") {
 613      depth--
 614      if (depth === 0) {
 615        return raw.slice(start, i + 1)
 616      }
 617      continue
 618    }
 619  }
 620
 621  return null
 622}
 623
 624function decodeHtmlEntities(input: string): string {
 625  if (!input.includes("&")) return input
 626
 627  return input.replace(/&(gt|lt|amp|quot|#39|#x27|#x2f);/gi, (entity, key: string) => {
 628    switch (key.toLowerCase()) {
 629      case "gt":
 630        return ">"
 631      case "lt":
 632        return "<"
 633      case "amp":
 634        return "&"
 635      case "quot":
 636        return '"'
 637      case "#39":
 638      case "#x27":
 639        return "'"
 640      case "#x2f":
 641        return "/"
 642      default:
 643        return entity
 644    }
 645  })
 646}
 647
 648function decodeHtmlEntitiesDeep<T>(value: T): T {
 649  if (typeof value === "string") return decodeHtmlEntities(value) as T
 650  if (Array.isArray(value)) return value.map((item) => decodeHtmlEntitiesDeep(item)) as T
 651  if (!value || typeof value !== "object") return value
 652
 653  const entries = Object.entries(value as Record<string, unknown>).map(([key, entryValue]) => [key, decodeHtmlEntitiesDeep(entryValue)])
 654  return Object.fromEntries(entries) as T
 655}
 656
 657/**
 658 * Parses tool arguments and applies robustness fixes for malformed model output.
 659 *
 660 * @param rawArgs - Raw `tool_call.function.arguments` value.
 661 * @returns Parsed argument object or a structured parse error.
 662 */
 663export function parseToolArguments(rawArgs: unknown): { ok: true; args: ToolArgs } | { ok: false; error: string } {
 664  if (typeof rawArgs !== "string") {
 665    return { ok: false, error: `arguments must be a JSON string, got ${typeof rawArgs}` }
 666  }
 667
 668  const parseObject = (input: string): ToolArgs | null => {
 669    const parsed = JSON.parse(input)
 670    if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) return null
 671    return decodeHtmlEntitiesDeep(parsed as ToolArgs)
 672  }
 673
 674  const inputs = [rawArgs]
 675  const decodedRawArgs = decodeHtmlEntities(rawArgs)
 676  if (decodedRawArgs !== rawArgs) inputs.push(decodedRawArgs)
 677
 678  let lastError: unknown = null
 679  for (const input of inputs) {
 680    try {
 681      const parsed = parseObject(input)
 682      if (parsed) return { ok: true, args: parsed }
 683      return { ok: false, error: "arguments must be a JSON object" }
 684    } catch (err) {
 685      lastError = err
 686      const recovered = extractLeadingJsonObject(input)
 687      if (!recovered) continue
 688      try {
 689        const parsed = parseObject(recovered)
 690        if (parsed) return { ok: true, args: parsed }
 691      } catch {
 692        // no-op; fall through to structured error below
 693      }
 694    }
 695  }
 696
 697  const message = lastError instanceof Error ? lastError.message : String(lastError)
 698  const preview = rawArgs.length > 180 ? `${rawArgs.slice(0, 180)}...` : rawArgs
 699  return { ok: false, error: `${message}; raw=${JSON.stringify(preview)}` }
 700}
 701
 702const CONTEXT_SUMMARY_HEADER = "[context summary v1]"
 703const CONTEXT_SUMMARY_NOTE =
 704  "Compressed notes of older conversation turns. If anything conflicts, trust newer raw messages."
 705const CONTEXT_SUMMARY_SEGMENTS_MARKER = "[segments]"
 706const CONTEXT_SUMMARY_DELIMITER = "\n\n===\n\n"
 707const SUMMARY_LINE_MAX_CHARS = 180
 708const SUMMARY_LINE_DEFAULT_EMPTY = "(no text)"
 709
 710function asRecord(value: unknown): Record<string, unknown> | null {
 711  return value && typeof value === "object" ? (value as Record<string, unknown>) : null
 712}
 713
 714function messageRole(message: Message): string {
 715  const record = asRecord(message)
 716  return typeof record?.role === "string" ? record.role : ""
 717}
 718
 719function messageStringContent(message: Message): string {
 720  const record = asRecord(message)
 721  const content = record?.content
 722  if (typeof content === "string") return content
 723  if (!Array.isArray(content)) return ""
 724
 725  const chunks: string[] = []
 726  for (const part of content) {
 727    const partRecord = asRecord(part)
 728    if (!partRecord) continue
 729    if (partRecord.type === "text" && typeof partRecord.text === "string") {
 730      chunks.push(partRecord.text)
 731      continue
 732    }
 733    if (partRecord.type === "image_url") chunks.push("[image]")
 734  }
 735
 736  return chunks.join(" ")
 737}
 738
 739function normalizeSummaryText(value: string): string {
 740  return value.replace(/\s+/g, " ").trim()
 741}
 742
 743function truncateSummaryText(value: string, maxChars: number): string {
 744  if (maxChars <= 0) return ""
 745  if (value.length <= maxChars) return value
 746  if (maxChars <= 3) return ".".repeat(maxChars)
 747  return `${value.slice(0, maxChars - 3).trimEnd()}...`
 748}
 749
 750function assistantToolNames(message: Message): string[] {
 751  const record = asRecord(message)
 752  const calls = record?.tool_calls
 753  if (!Array.isArray(calls)) return []
 754
 755  const names: string[] = []
 756  for (const call of calls) {
 757    const callRecord = asRecord(call)
 758    const fn = asRecord(callRecord?.function)
 759    if (typeof fn?.name === "string" && fn.name.trim()) names.push(fn.name.trim())
 760  }
 761  return names
 762}
 763
 764function assistantToolCallIds(message: Message): Set<string> {
 765  const ids = new Set<string>()
 766  const record = asRecord(message)
 767  const calls = record?.tool_calls
 768  if (!Array.isArray(calls)) return ids
 769
 770  for (const call of calls) {
 771    const callRecord = asRecord(call)
 772    if (typeof callRecord?.id === "string" && callRecord.id.trim()) ids.add(callRecord.id.trim())
 773  }
 774  return ids
 775}
 776
 777function toolCallId(message: Message): string | null {
 778  const record = asRecord(message)
 779  return typeof record?.tool_call_id === "string" && record.tool_call_id.trim() ? record.tool_call_id.trim() : null
 780}
 781
 782function summarizeMessageLine(message: Message): string {
 783  const role = messageRole(message)
 784  const content = truncateSummaryText(normalizeSummaryText(messageStringContent(message)), SUMMARY_LINE_MAX_CHARS)
 785  const safeContent = content || SUMMARY_LINE_DEFAULT_EMPTY
 786
 787  if (role === "assistant") {
 788    const toolNames = assistantToolNames(message)
 789    if (toolNames.length > 0 && content) return `- assistant: ${safeContent} | tools: ${toolNames.join(", ")}`
 790    if (toolNames.length > 0) return `- assistant: tools: ${toolNames.join(", ")}`
 791    return `- assistant: ${safeContent}`
 792  }
 793  if (role === "tool") {
 794    const id = toolCallId(message) ?? "unknown"
 795    return `- tool(${id}): ${safeContent}`
 796  }
 797  if (role === "user") return `- user: ${safeContent}`
 798  if (role === "system") return `- system: ${safeContent}`
 799  return `- ${role || "message"}: ${safeContent}`
 800}
 801
 802function buildCompactionSegment(messages: Message[]): string {
 803  const lines = messages.map((message) => summarizeMessageLine(message))
 804  const summaryLines = lines.length > 0 ? lines.join("\n") : `- ${SUMMARY_LINE_DEFAULT_EMPTY}`
 805  return `[${new Date().toISOString()}] compacted ${messages.length} messages\n${summaryLines}`
 806}
 807
 808function buildSummaryMessageContent(segments: string[]): string {
 809  const body = segments.join(CONTEXT_SUMMARY_DELIMITER)
 810  return `${CONTEXT_SUMMARY_HEADER}\n${CONTEXT_SUMMARY_NOTE}\n${CONTEXT_SUMMARY_SEGMENTS_MARKER}\n${body}`
 811}
 812
 813function parseSummarySegments(content: string): string[] {
 814  if (!content.startsWith(CONTEXT_SUMMARY_HEADER)) return []
 815
 816  const marker = `\n${CONTEXT_SUMMARY_SEGMENTS_MARKER}\n`
 817  const markerIndex = content.indexOf(marker)
 818  if (markerIndex === -1) return []
 819
 820  const body = content.slice(markerIndex + marker.length).trim()
 821  if (!body) return []
 822
 823  return body
 824    .split(CONTEXT_SUMMARY_DELIMITER)
 825    .map((segment) => segment.trim())
 826    .filter((segment) => segment.length > 0)
 827}
 828
 829function trimSummarySegments(segments: string[], maxChars: number): string[] {
 830  const safeMaxChars = Math.max(1024, maxChars)
 831  const next = [...segments]
 832
 833  while (next.length > 1 && buildSummaryMessageContent(next).length > safeMaxChars) {
 834    next.shift()
 835  }
 836
 837  if (next.length === 0) return next
 838
 839  const current = buildSummaryMessageContent(next)
 840  if (current.length <= safeMaxChars) return next
 841
 842  const fixedPrefix = buildSummaryMessageContent([]).length
 843  const available = Math.max(0, safeMaxChars - fixedPrefix)
 844  next[0] = truncateSummaryText(next[0]!, available)
 845  return next
 846}
 847
 848function countLeadingSystemMessages(messages: Message[]): number {
 849  let count = 0
 850  while (count < messages.length && messageRole(messages[count]!) === "system") count++
 851  return count
 852}
 853
 854function findSummaryMessageIndex(messages: Message[]): number {
 855  return messages.findIndex((message) => {
 856    const content = messageStringContent(message)
 857    return content.startsWith(CONTEXT_SUMMARY_HEADER)
 858  })
 859}
 860
 861/**
 862 * Very rough tokenizer-agnostic estimate for prompt size guardrails.
 863 *
 864 * Includes both messages and tool schema to mirror completion request payload.
 865 */
 866export function estimatePromptTokens(messages: Message[]): number {
 867  const jsonChars = JSON.stringify({ messages, tools: TOOLS }).length
 868  return Math.ceil(jsonChars / 4)
 869}
 870
 871export type ContextCompactionResult = {
 872  compacted: boolean
 873  messages: Message[]
 874  estimateBefore: number
 875  estimateAfter: number
 876  messagesRemoved: number
 877  chunks: number
 878}
 879
 880function normalizedObservedPromptTokens(value: number | undefined): number | null {
 881  if (!Number.isFinite(value)) return null
 882  const tokens = Math.ceil(value as number)
 883  return tokens > 0 ? tokens : null
 884}
 885
 886/**
 887 * Applies rolling context compaction when estimated prompt size exceeds threshold.
 888 *
 889 * Keeps leading bootstrap system messages and recent raw turns, while replacing
 890 * older slices with a durable summary message.
 891 */
 892export function maybeCompactConversation(messages: Message[], observedPromptTokens?: number): ContextCompactionResult {
 893  const estimateBefore = estimatePromptTokens(messages)
 894  const observedBefore = normalizedObservedPromptTokens(observedPromptTokens)
 895  // Calibrate the rough chars/4 heuristic with real API prompt usage when available.
 896  const estimateScale = observedBefore ? Math.max(1, observedBefore / Math.max(1, estimateBefore)) : 1
 897  const effectiveBefore = Math.ceil(estimateBefore * estimateScale)
 898
 899  if (effectiveBefore < CONTEXT_COMPACT_TRIGGER_TOKENS) {
 900    return {
 901      compacted: false,
 902      messages,
 903      estimateBefore: effectiveBefore,
 904      estimateAfter: effectiveBefore,
 905      messagesRemoved: 0,
 906      chunks: 0,
 907    }
 908  }
 909
 910  const chunkSize = Math.max(1, CONTEXT_COMPACT_CHUNK_MESSAGES)
 911  const minRecentMessages = Math.max(1, CONTEXT_COMPACT_RECENT_MESSAGES)
 912  let next = [...messages]
 913  let summaryIndex = findSummaryMessageIndex(next)
 914  let summaryInserted = false
 915  let summarySegments: string[] = []
 916
 917  if (summaryIndex >= 0) {
 918    const existingContent = messageStringContent(next[summaryIndex]!)
 919    summarySegments = parseSummarySegments(existingContent)
 920  } else {
 921    const baseLayerEnd = (() => {
 922      const leadingSystems = countLeadingSystemMessages(next)
 923      if (leadingSystems > 0) return leadingSystems
 924      return next.length > 0 ? 1 : 0
 925    })()
 926    summaryIndex = Math.min(baseLayerEnd, next.length)
 927    next.splice(summaryIndex, 0, {
 928      role: "user",
 929      content: buildSummaryMessageContent([]),
 930    })
 931    summaryInserted = true
 932  }
 933
 934  let estimateAfter = estimatePromptTokens(next)
 935  let effectiveAfter = Math.ceil(estimateAfter * estimateScale)
 936  let messagesRemoved = 0
 937  let chunks = 0
 938
 939  while (effectiveAfter > CONTEXT_COMPACT_TARGET_TOKENS) {
 940    const compactStart = summaryIndex + 1
 941    const protectedTailStart = Math.max(compactStart, next.length - minRecentMessages)
 942    if (protectedTailStart <= compactStart) break
 943
 944    let compactEnd = Math.min(protectedTailStart, compactStart + chunkSize)
 945    if (compactEnd <= compactStart) break
 946
 947    while (compactEnd < protectedTailStart && messageRole(next[compactEnd]!) === "tool") {
 948      compactEnd++
 949    }
 950
 951    const unresolvedToolCalls = assistantToolCallIds(next[compactEnd - 1]!)
 952    if (unresolvedToolCalls.size > 0) {
 953      let scan = compactEnd
 954      while (scan < protectedTailStart && messageRole(next[scan]!) === "tool") {
 955        const id = toolCallId(next[scan]!)
 956        if (id) unresolvedToolCalls.delete(id)
 957        scan++
 958        if (unresolvedToolCalls.size === 0) break
 959      }
 960      compactEnd = scan
 961    }
 962
 963    while (compactEnd < protectedTailStart && messageRole(next[compactEnd]!) === "tool") {
 964      compactEnd++
 965    }
 966
 967    if (compactEnd <= compactStart) break
 968
 969    const removed = next.slice(compactStart, compactEnd)
 970    if (removed.length === 0) break
 971
 972    summarySegments.push(buildCompactionSegment(removed))
 973    summarySegments = trimSummarySegments(summarySegments, CONTEXT_COMPACT_SUMMARY_MAX_CHARS)
 974
 975    next[summaryIndex] = {
 976      role: "user",
 977      content: buildSummaryMessageContent(summarySegments),
 978    }
 979    next.splice(compactStart, removed.length)
 980
 981    messagesRemoved += removed.length
 982    chunks += 1
 983    estimateAfter = estimatePromptTokens(next)
 984    effectiveAfter = Math.ceil(estimateAfter * estimateScale)
 985  }
 986
 987  if (messagesRemoved === 0 && summaryInserted) {
 988    next.splice(summaryIndex, 1)
 989    estimateAfter = estimatePromptTokens(next)
 990    effectiveAfter = Math.ceil(estimateAfter * estimateScale)
 991  }
 992
 993  return {
 994    compacted: messagesRemoved > 0,
 995    messages: next,
 996    estimateBefore: effectiveBefore,
 997    estimateAfter: effectiveAfter,
 998    messagesRemoved,
 999    chunks,
1000  }
1001}
1002
1003/**
1004 * Estimates fallback context pressure and guardrails for current messages.
1005 *
1006 * @param messages - Current conversation history used for the next request.
1007 * @returns Token estimate plus soft/hard fallback limits.
1008 */
1009export function fallbackContextWindow(messages: Message[]): {
1010  estimate: number
1011  nearLimit: boolean
1012  skip: boolean
1013  softLimit: number
1014  hardLimit: number
1015} {
1016  const estimate = estimatePromptTokens(messages)
1017
1018  if (!FALLBACK_ENFORCE_CONTEXT_LIMIT) {
1019    return {
1020      estimate,
1021      nearLimit: false,
1022      skip: false,
1023      softLimit: Number.POSITIVE_INFINITY,
1024      hardLimit: Number.POSITIVE_INFINITY,
1025    }
1026  }
1027
1028  // softLimit: where we start warning. hardLimit: where we stop trying fallback at all.
1029  const softLimit = Math.max(0, FALLBACK_N_CTX - FALLBACK_CONTEXT_MARGIN)
1030  const hardLimit = FALLBACK_N_CTX + Math.max(0, FALLBACK_HARD_OVERFLOW_TOKENS)
1031
1032  return {
1033    estimate,
1034    nearLimit: estimate >= softLimit,
1035    skip: estimate >= hardLimit,
1036    softLimit,
1037    hardLimit,
1038  }
1039}
Configure Feed

Configure Feed