src/runner/util.ts at main · null.namespaces.me/niri

null.namespaces.me / niri
fork
my harness for niri
fork
niri / src / runner / util.ts
at main 1307 lines 47 kB view raw
wrap content
nekomimi.pet loop guard is now a nudge 2d ago
518927fa
   1import fs from "fs/promises"
   2import path from "path"
   3import { fileURLToPath } from "url"
   4import OpenAI from "openai"
   5import { imageRootForModelInput } from "../container/index.js"
   6import type { Message } from "../types.js"
   7import type { ImageDetail, ToolArgs } from "./types.js"
   8
   9const PROJECT_ROOT = path.resolve(fileURLToPath(import.meta.url), "../../..")
  10const SESSION_FILE = path.join(PROJECT_ROOT, "session.json")
  11
  12export const TOKEN_NUDGE_THRESHOLD = parseInt(process.env.TOKEN_NUDGE_THRESHOLD ?? "120000")
  13export const FALLBACK_TOKEN_NUDGE_THRESHOLD = parseInt(process.env.FALLBACK_TOKEN_NUDGE_THRESHOLD ?? "50000")
  14export const CONTEXT_COMPACT_TRIGGER_TOKENS = parseInt(process.env.CONTEXT_COMPACT_TRIGGER_TOKENS ?? "90000")
  15
  16const NIRI_ENV = (process.env.NIRI_ENV ?? "default").trim().toLowerCase()
  17export const USE_FALLBACK = NIRI_ENV === "local"
  18
  19export const API_BASE = process.env.OPENAI_BASE_URL ?? "https://api.openai.com/v1"
  20export const MODEL = process.env.MODEL ?? ""
  21export const PRIMARY_PROVIDER_REQUIRES_REASONING_REPLAY =
  22  API_BASE.toLowerCase().includes("deepseek") || MODEL.toLowerCase().includes("deepseek")
  23const DEFAULT_FALLBACK_BASE = "http://localhost:1234/v1"
  24const isLikelyLocalBase = (baseUrl: string): boolean => {
  25  const lowered = baseUrl.trim().toLowerCase()
  26  return lowered.includes("localhost") || lowered.includes("127.0.0.1")
  27}
  28const parseBooleanEnv = (value: string | undefined, fallback: boolean): boolean => {
  29  if (typeof value !== "string") return fallback
  30  const normalized = value.trim().toLowerCase()
  31  if (!normalized) return fallback
  32  if (normalized === "true" || normalized === "1" || normalized === "yes" || normalized === "on") return true
  33  if (normalized === "false" || normalized === "0" || normalized === "no" || normalized === "off") return false
  34  return fallback
  35}
  36
  37/** Controls whether model reasoning/thinking is requested and streamed to clients. */
  38export const ENABLE_THINKING = parseBooleanEnv(process.env.ENABLE_THINKING, true)
  39const parseToolChoiceEnv = (value: string | undefined, fallback: "required" | "auto" | "none"): "required" | "auto" | "none" => {
  40  if (typeof value !== "string") return fallback
  41  const normalized = value.trim().toLowerCase()
  42  if (normalized === "required" || normalized === "auto" || normalized === "none") return normalized
  43  return fallback
  44}
  45
  46export const FALLBACK_BASE =
  47  process.env.FALLBACK_OPENAI_BASE_URL ?? process.env.OPENROUTER_BASE_URL ?? process.env.LMSTUDIO_BASE_URL ?? DEFAULT_FALLBACK_BASE
  48export const FALLBACK_MODEL =
  49  process.env.FALLBACK_MODEL ?? process.env.OPENROUTER_MODEL ?? process.env.LMSTUDIO_MODEL ?? "zai-org/glm-4.7-flash"
  50export const FALLBACK_PROVIDER_REQUIRES_REASONING_REPLAY =
  51  FALLBACK_BASE.toLowerCase().includes("deepseek") || FALLBACK_MODEL.toLowerCase().includes("deepseek")
  52export const SUMMARY_BASE =
  53  process.env.SUMMARY_OPENAI_BASE_URL ?? process.env.SUMMARY_BASE_URL ?? ""
  54export const SUMMARY_MODEL = process.env.SUMMARY_MODEL ?? ""
  55export const PRIMARY_TOOL_CHOICE = parseToolChoiceEnv(process.env.PRIMARY_TOOL_CHOICE ?? process.env.TOOL_CHOICE, "auto")
  56export const FALLBACK_TOOL_CHOICE = parseToolChoiceEnv(process.env.FALLBACK_TOOL_CHOICE, "auto")
  57const FALLBACK_N_CTX = parseInt(process.env.FALLBACK_N_CTX ?? process.env.LMSTUDIO_N_CTX ?? "4096")
  58const FALLBACK_CONTEXT_MARGIN = parseInt(process.env.FALLBACK_CONTEXT_MARGIN ?? process.env.LMSTUDIO_CONTEXT_MARGIN ?? "256")
  59const FALLBACK_HARD_OVERFLOW_TOKENS = parseInt(
  60  process.env.FALLBACK_HARD_OVERFLOW_TOKENS ?? process.env.LMSTUDIO_HARD_OVERFLOW_TOKENS ?? "1024",
  61)
  62const FALLBACK_ENFORCE_CONTEXT_LIMIT = parseBooleanEnv(
  63  process.env.FALLBACK_ENFORCE_CONTEXT_LIMIT,
  64  isLikelyLocalBase(FALLBACK_BASE),
  65)
  66
  67const fallbackApiKey =
  68  process.env.FALLBACK_OPENAI_API_KEY ??
  69  process.env.OPENROUTER_API_KEY ??
  70  process.env.LMSTUDIO_API_KEY ??
  71  process.env.OPENAI_API_KEY ??
  72  (isLikelyLocalBase(FALLBACK_BASE) ? "lm-studio" : "")
  73const summaryApiKey =
  74  process.env.SUMMARY_OPENAI_API_KEY ??
  75  process.env.SUMMARY_API_KEY ??
  76  (SUMMARY_BASE === process.env.OPENROUTER_BASE_URL ? process.env.OPENROUTER_API_KEY : undefined) ??
  77  (SUMMARY_BASE === process.env.LMSTUDIO_BASE_URL ? process.env.LMSTUDIO_API_KEY : undefined) ??
  78  process.env.OPENAI_API_KEY ??
  79  (SUMMARY_BASE && isLikelyLocalBase(SUMMARY_BASE) ? "lm-studio" : "")
  80const fallbackHeaders: Record<string, string> = {}
  81if (process.env.FALLBACK_OPENAI_REFERER) fallbackHeaders["HTTP-Referer"] = process.env.FALLBACK_OPENAI_REFERER
  82if (process.env.FALLBACK_OPENAI_TITLE) fallbackHeaders["X-Title"] = process.env.FALLBACK_OPENAI_TITLE
  83const summaryHeaders: Record<string, string> = {}
  84if (process.env.SUMMARY_OPENAI_REFERER) summaryHeaders["HTTP-Referer"] = process.env.SUMMARY_OPENAI_REFERER
  85if (process.env.SUMMARY_OPENAI_TITLE) summaryHeaders["X-Title"] = process.env.SUMMARY_OPENAI_TITLE
  86
  87if (!USE_FALLBACK && !MODEL) {
  88  throw new Error("MODEL is required unless fallback is forced (NIRI_ENV=local).")
  89}
  90
  91if (!USE_FALLBACK && !process.env.OPENAI_API_KEY) {
  92  throw new Error("OPENAI_API_KEY is required unless fallback is forced (NIRI_ENV=local).")
  93}
  94
  95if (USE_FALLBACK && !fallbackApiKey) {
  96  throw new Error(
  97    "Fallback API key is required in local mode. Set FALLBACK_OPENAI_API_KEY (or OPENROUTER_API_KEY / LMSTUDIO_API_KEY).",
  98  )
  99}
 100
 101if ((SUMMARY_BASE || SUMMARY_MODEL) && (!SUMMARY_BASE || !SUMMARY_MODEL || !summaryApiKey)) {
 102  throw new Error(
 103    "Summary provider requires SUMMARY_OPENAI_BASE_URL (or SUMMARY_BASE_URL), SUMMARY_MODEL, and SUMMARY_OPENAI_API_KEY (or SUMMARY_API_KEY).",
 104  )
 105}
 106
 107export const client = USE_FALLBACK
 108  ? null
 109  : new OpenAI({
 110      baseURL: API_BASE,
 111      apiKey: process.env.OPENAI_API_KEY!,
 112    })
 113
 114export const fallbackClient = new OpenAI({
 115  baseURL: FALLBACK_BASE,
 116  apiKey: fallbackApiKey || "lm-studio", // Keep LM Studio default when running against localhost.
 117  defaultHeaders: Object.keys(fallbackHeaders).length ? fallbackHeaders : undefined,
 118})
 119
 120export const summaryClient =
 121  SUMMARY_BASE && SUMMARY_MODEL
 122    ? new OpenAI({
 123        baseURL: SUMMARY_BASE,
 124        apiKey: summaryApiKey,
 125        defaultHeaders: Object.keys(summaryHeaders).length ? summaryHeaders : undefined,
 126      })
 127    : null
 128
 129console.log(`[config] primary=${MODEL} @ ${API_BASE}`)
 130console.log(`[config] fallback=${FALLBACK_MODEL} @ ${FALLBACK_BASE}`)
 131if (summaryClient) console.log(`[config] summary=${SUMMARY_MODEL} @ ${SUMMARY_BASE}`)
 132console.log(`[config] env=${NIRI_ENV} use_fallback=${USE_FALLBACK}`)
 133console.log(`[config] thinking=${ENABLE_THINKING}`)
 134
 135const IMAGE_ROOT_HINT = imageRootForModelInput()
 136
 137export const TOOLS: OpenAI.Chat.ChatCompletionTool[] = [
 138  {
 139    type: "function",
 140    function: {
 141      name: "shell",
 142      description:
 143        "Execute a bash command in your Linux environment. Stateful — cd, env vars, etc. persist. Stdin is generally attached to the PTY (more natural behavior), but for obviously interactive commands (REPLs, editors, pagers) we may redirect stdin to /dev/null to avoid accidental hangs. Output is automatically capped (default 150 lines, 40 for known-verbose commands like apt/pip/npm). Pass max_lines to override; use 0 for unlimited. You can also pass timeout_ms (default 30000, max 600000).",
 144      parameters: {
 145        type: "object",
 146        properties: {
 147          command: { type: "string" },
 148          max_lines: {
 149            type: "integer",
 150            description:
 151              "Maximum lines to return. Defaults to 150 (40 for verbose commands like apt/pip). Use 0 for unlimited.",
 152          },
 153          timeout_ms: {
 154            type: "integer",
 155            description: "Execution timeout in milliseconds. Defaults to 30000. Max 600000.",
 156          },
 157        },
 158        required: ["command"],
 159      },
 160    },
 161  },
 162  {
 163    type: "function",
 164    function: {
 165      name: "read_file",
 166      description:
 167        "Read a file from your Linux environment with optional line-range selection. More token-efficient than shell+cat for large files. Returns content with a header showing the line range and total line count. Supports timeout_ms (default 120000, max 600000).",
 168      parameters: {
 169        type: "object",
 170        properties: {
 171          path: { type: "string", description: "Absolute or relative path to the file." },
 172          start_line: {
 173            type: "integer",
 174            description: "First line to read (1-indexed). Defaults to 1.",
 175          },
 176          end_line: {
 177            type: "integer",
 178            description: "Last line to read (inclusive). Defaults to start_line + 99.",
 179          },
 180          timeout_ms: {
 181            type: "integer",
 182            description: "Read timeout in milliseconds. Defaults to 120000. Max 600000.",
 183          },
 184        },
 185        required: ["path"],
 186      },
 187    },
 188  },
 189  {
 190    type: "function",
 191    function: {
 192      name: "edit_file",
 193      description:
 194        "Edit a file by replacing an exact snippet of text. old_text must match exactly once in the file — precise, safe, and no shell-escaping headaches. Use read_file first if you need to confirm the exact text. Supports timeout_ms (default 120000, max 600000).",
 195      parameters: {
 196        type: "object",
 197        properties: {
 198          path: { type: "string", description: "Absolute or relative path to the file." },
 199          old_text: {
 200            type: "string",
 201            description: "The exact text to find and replace. Must appear exactly once in the file.",
 202          },
 203          new_text: {
 204            type: "string",
 205            description: "Replacement text. May be empty to delete old_text.",
 206          },
 207          timeout_ms: {
 208            type: "integer",
 209            description: "Edit timeout in milliseconds. Defaults to 120000. Max 600000.",
 210          },
 211        },
 212        required: ["path", "old_text", "new_text"],
 213      },
 214    },
 215  },
 216  {
 217    type: "function",
 218    function: {
 219      name: "memory_search",
 220      description:
 221        "Search indexed long-term memories from core notes, journal entries, and people files. Useful when you want deliberate recall instead of relying only on passive memory injection.",
 222      parameters: {
 223        type: "object",
 224        properties: {
 225          query: {
 226            type: "string",
 227            description: "What to search for in long-term memory.",
 228          },
 229          limit: {
 230            type: "integer",
 231            description: "Maximum results to return (default 5, max 10).",
 232          },
 233        },
 234        required: ["query"],
 235      },
 236    },
 237  },
 238  {
 239    type: "function",
 240    function: {
 241      name: "memory_alias",
 242      description:
 243        "Manage handle aliases used for memory recall. When you see someone using a Discord/Bluesky handle that you recognize as an existing person in memory, set an alias so future messages from that handle pull the right people/core memories. Example: set @meowskullz = ana so DMs from meowskullz recall ana's people file.",
 244      parameters: {
 245        type: "object",
 246        properties: {
 247          action: {
 248            type: "string",
 249            enum: ["set", "remove", "list"],
 250            description: "set links a handle to a canonical name; remove unlinks; list returns all current aliases.",
 251          },
 252          handle: {
 253            type: "string",
 254            description: "The handle to alias, e.g. \"meowskullz\" or \"@meowskullz\". Required for set/remove.",
 255          },
 256          canonical: {
 257            type: "string",
 258            description: "The canonical name the handle maps to, e.g. \"ana\". Required for set; optional for remove (omit to clear all aliases for the handle).",
 259          },
 260        },
 261        required: ["action"],
 262      },
 263    },
 264  },
 265  {
 266    type: "function",
 267    function: {
 268      name: "image_tool",
 269      description:
 270        `Attach an image from ${IMAGE_ROOT_HINT} so it is injected as a multimodal user message on the next model turn. Use this after creating/downloading an image with shell.`,
 271      parameters: {
 272        type: "object",
 273        properties: {
 274          path: {
 275            type: "string",
 276            description: `Absolute image path inside ${IMAGE_ROOT_HINT} (for example ${IMAGE_ROOT_HINT}/screenshot.png).`,
 277          },
 278          note: {
 279            type: "string",
 280            description: "Optional text instruction to accompany the image for the next turn.",
 281          },
 282          detail: {
 283            type: "string",
 284            enum: ["auto", "low", "high"],
 285            description: "Vision detail level for the next turn image input.",
 286          },
 287          timeout_ms: {
 288            type: "integer",
 289            description: "Read timeout in milliseconds. Defaults to 120000. Max 600000.",
 290          },
 291        },
 292        required: ["path"],
 293      },
 294    },
 295  },
 296  {
 297    type: "function",
 298    function: {
 299      name: "discord_scan",
 300      description:
 301        "Scan configured Discord channels and ingest messages into the local Discord inbox database. Uses DISCORD_SCAN_CHANNEL_IDS by default; pass channel_ids to override.",
 302      parameters: {
 303        type: "object",
 304        properties: {
 305          limit: {
 306            type: "integer",
 307            description: "Per-channel message fetch limit (default 50, max 100).",
 308          },
 309          channel_ids: {
 310            type: "array",
 311            items: { type: "string" },
 312            description: "Optional channel id list to scan instead of DISCORD_SCAN_CHANNEL_IDS.",
 313          },
 314          before_message_id: {
 315            type: "string",
 316            description: "Optional message id cursor for older backfill scans.",
 317          },
 318        },
 319      },
 320    },
 321  },
 322  {
 323    type: "function",
 324    function: {
 325      name: "discord_inbox",
 326      description:
 327        "List Discord inbox items tracked in local state. Default status filter is pending; optionally include seen/acted/ignored.",
 328      parameters: {
 329        type: "object",
 330        properties: {
 331          limit: {
 332            type: "integer",
 333            description: "Maximum rows to return (default 20, max 200).",
 334          },
 335          status: {
 336            type: "string",
 337            description: "Comma-separated statuses: pending,seen,acted,ignored. Defaults to pending.",
 338          },
 339        },
 340      },
 341    },
 342  },
 343  {
 344    type: "function",
 345    function: {
 346      name: "discord_backread",
 347      description:
 348        "Read stored Discord message history for a channel from local state, newest first.",
 349      parameters: {
 350        type: "object",
 351        properties: {
 352          channel_id: { type: "string", description: "Discord channel id." },
 353          limit: {
 354            type: "integer",
 355            description: "Maximum rows to return (default 40, max 200).",
 356          },
 357          before_message_id: {
 358            type: "string",
 359            description: "Optional cursor message id to fetch older rows.",
 360          },
 361        },
 362        required: ["channel_id"],
 363      },
 364    },
 365  },
 366  {
 367    type: "function",
 368    function: {
 369      name: "discord_mark",
 370      description:
 371        "Set decision state for a Discord inbox item so future scans remember handled/ignored choices.",
 372      parameters: {
 373        type: "object",
 374        properties: {
 375          item_id: { type: "string", description: "Inbox item id (usually message id)." },
 376          status: {
 377            type: "string",
 378            enum: ["pending", "seen", "acted", "ignored"],
 379          },
 380          action: {
 381            type: "string",
 382            enum: ["none", "replied", "messaged", "dismissed", "noted"],
 383          },
 384          note: {
 385            type: "string",
 386            description: "Optional decision note.",
 387          },
 388        },
 389        required: ["item_id", "status"],
 390      },
 391    },
 392  },
 393  {
 394    type: "function",
 395    function: {
 396      name: "discord_send",
 397      description:
 398        "Send a Discord message. reply_mode=auto sends plain unless conversation continuity is ambiguous, then it uses an explicit reply reference.",
 399      parameters: {
 400        type: "object",
 401        properties: {
 402          channel_id: { type: "string", description: "Target channel id." },
 403          content: { type: "string", description: "Message content to send." },
 404          source_item_id: {
 405            type: "string",
 406            description: "Optional inbox item id to mark as acted after sending.",
 407          },
 408          reference_message: {
 409            type: "string",
 410            description: "Optional specific message to treat as reply target. Provide message content, username (for their latest message), or message id",
 411          },
 412          reply_mode: {
 413            type: "string",
 414            enum: ["auto", "plain", "explicit"],
 415            description: "Reply behavior policy (default auto).",
 416          },
 417        },
 418        required: ["content"],
 419      },
 420    },
 421  },
 422  {
 423    type: "function",
 424    function: {
 425      name: "discord_channels",
 426      description:
 427        "List configured Discord channels and DM channels with stored interactions, including id-to-name mapping, guild context, and optional channel notes.",
 428      parameters: {
 429        type: "object",
 430        properties: {},
 431      },
 432    },
 433  },
 434  {
 435    type: "function",
 436    function: {
 437      name: "discord_channel_note",
 438      description:
 439        "Set or clear a persistent note for a Discord channel id. Pass empty note to clear.",
 440      parameters: {
 441        type: "object",
 442        properties: {
 443          channel_id: { type: "string", description: "Discord channel id to annotate." },
 444          note: { type: "string", description: "Channel-specific note text. Empty string clears it." },
 445        },
 446        required: ["channel_id", "note"],
 447      },
 448    },
 449  },
 450  {
 451    type: "function",
 452    function: {
 453      name: "wait_then_continue",
 454      description:
 455        "Wait for a short delay, then continue to another assistant turn without waiting for a new external event. Use this after a timeout or recoverable tool error when you still want to keep working. Accepts timeout_ms (default 10000, max 600000).",
 456      parameters: {
 457        type: "object",
 458        properties: {
 459          timeout_ms: {
 460            type: "integer",
 461            description: "Delay before continuing in milliseconds. Defaults to 10000. Max 600000.",
 462          },
 463        },
 464      },
 465    },
 466  },
 467  {
 468    type: "function",
 469    function: {
 470      name: "rest",
 471      description: "Go to sleep and end this session. Call this when you're truly done for now — conversation context will be cleared.",
 472      parameters: {
 473        type: "object",
 474        properties: {
 475          note: {
 476            type: "string",
 477            description: "Optional note to yourself about where you left off.",
 478          },
 479        },
 480      },
 481    },
 482  },
 483]
 484
 485/**
 486 * Persists the current message array as the resumable session snapshot.
 487 *
 488 * @param messages - Conversation messages to serialize.
 489 */
 490export async function saveSession(messages: Message[]): Promise<void> {
 491  await fs.writeFile(SESSION_FILE, JSON.stringify(messages), { encoding: "utf-8", mode: 0o666 })
 492}
 493
 494/**
 495 * Deletes the persisted session snapshot if it exists.
 496 */
 497export async function clearSession(): Promise<void> {
 498  await fs.unlink(SESSION_FILE).catch(() => {})
 499}
 500
 501function normalizeReasoningReplay(msgs: Message[]): Message[] {
 502  if (!ENABLE_THINKING) return msgs
 503  const needsReplayNormalization =
 504    PRIMARY_PROVIDER_REQUIRES_REASONING_REPLAY ||
 505    FALLBACK_PROVIDER_REQUIRES_REASONING_REPLAY ||
 506    msgs.some(
 507      (msg) =>
 508        msg.role === "assistant" &&
 509        typeof (msg as OpenAI.Chat.ChatCompletionMessage & { reasoning_content?: string }).reasoning_content === "string",
 510    )
 511  if (!needsReplayNormalization) return msgs
 512
 513  let changed = false
 514  const normalized = msgs.map((msg) => {
 515    if (msg.role !== "assistant") return msg
 516
 517    const assistant = msg as OpenAI.Chat.ChatCompletionMessage & { reasoning_content?: string }
 518    if (typeof assistant.reasoning_content === "string") return msg
 519
 520    changed = true
 521    return {
 522      ...assistant,
 523      reasoning_content: "",
 524    }
 525  })
 526
 527  if (changed) {
 528    console.log("[runner] backfilled empty reasoning_content on assistant history for provider compatibility")
 529  }
 530
 531  return normalized
 532}
 533
 534/** Move mis-ordered tool responses back into place and synthesize missing ones. */
 535export function sanitizeMessages(msgs: Message[]): Message[] {
 536  msgs = normalizeReasoningReplay(msgs)
 537  let i = 0
 538  while (i < msgs.length) {
 539    const msg = msgs[i]
 540    if (msg.role === "assistant" && Array.isArray((msg as OpenAI.Chat.ChatCompletionMessage).tool_calls)) {
 541      const toolCalls = (msg as OpenAI.Chat.ChatCompletionMessage).tool_calls!
 542      const expectedIds = toolCalls.map((tc) => tc.id).filter((id): id is string => typeof id === "string" && id.trim().length > 0)
 543      const needed = new Set(expectedIds)
 544      let j = i + 1
 545      // Skip tool messages that are already in place
 546      while (j < msgs.length && msgs[j].role === "tool" && needed.has((msgs[j] as OpenAI.Chat.ChatCompletionToolMessageParam).tool_call_id)) {
 547        needed.delete((msgs[j] as OpenAI.Chat.ChatCompletionToolMessageParam).tool_call_id)
 548        j++
 549      }
 550      if (needed.size > 0) {
 551        // Collect stray tool responses and non-tool messages from the rest of the array.
 552        const toolResponses = new Map<string, Message>()
 553        const others: Message[] = []
 554        for (let k = j; k < msgs.length; k++) {
 555          const m = msgs[k]
 556          const id = m.role === "tool" ? (m as OpenAI.Chat.ChatCompletionToolMessageParam).tool_call_id : undefined
 557          if (typeof id === "string" && needed.has(id)) {
 558            toolResponses.set(id, m)
 559            needed.delete(id)
 560          } else {
 561            others.push(m)
 562          }
 563        }
 564
 565        const inserted: Message[] = []
 566        let synthesized = 0
 567        for (const id of expectedIds) {
 568          if (!toolResponses.has(id)) {
 569            if (msgs.slice(i + 1, j).some((m) => m.role === "tool" && (m as OpenAI.Chat.ChatCompletionToolMessageParam).tool_call_id === id)) {
 570              continue
 571            }
 572            inserted.push({
 573              role: "tool",
 574              tool_call_id: id,
 575              content: "error: missing tool response recovered by runner before API request.",
 576            })
 577            synthesized++
 578            continue
 579          }
 580          inserted.push(toolResponses.get(id)!)
 581        }
 582
 583        if (inserted.length > 0) {
 584          msgs = [...msgs.slice(0, j), ...inserted, ...others]
 585          console.log(
 586            synthesized > 0
 587              ? `[runner] repaired tool_calls at message ${i}; synthesized ${synthesized} missing tool response(s)`
 588              : `[runner] repaired orphaned tool_calls at message ${i}`,
 589          )
 590        }
 591      }
 592    }
 593    // Ensure assistant messages always have content or tool_calls (providers reject null+empty)
 594    if (msg.role === "assistant") {
 595      const aMsg = msg as OpenAI.Chat.ChatCompletionMessage
 596      if ((aMsg.content === null || aMsg.content === undefined) && (!aMsg.tool_calls || aMsg.tool_calls.length === 0)) {
 597        aMsg.content = ""
 598      }
 599    }
 600
 601    i++
 602  }
 603  return msgs
 604}
 605
 606/**
 607 * Loads and sanitizes the persisted session snapshot.
 608 *
 609 * @returns The recovered message list, or `null` when no session exists.
 610 */
 611export async function loadSession(): Promise<Message[] | null> {
 612  try {
 613    const raw = await fs.readFile(SESSION_FILE, "utf-8")
 614    let msgs = JSON.parse(raw) as Message[]
 615    msgs = sanitizeMessages(msgs)
 616    console.log(`[runner] found saved session (${msgs.length} messages)`)
 617    return msgs
 618  } catch {
 619    return null
 620  }
 621}
 622
 623/**
 624 * Determines whether an error should trigger fallback model routing.
 625 *
 626 * @param err - Error thrown by the primary API call.
 627 * @returns `true` when fallback should be attempted.
 628 */
 629export function shouldFallback(err: unknown): boolean {
 630  if (err instanceof OpenAI.APIError) {
 631    // 429 + 5xx = overloaded or down; 0/undefined = network-level failure
 632    if (!err.status || err.status === 429 || err.status >= 500) return true
 633    return false
 634  }
 635  return isTransientTransportError(err)
 636}
 637
 638function errorCauseChainText(err: unknown): string {
 639  const parts: string[] = []
 640  let current: unknown = err
 641
 642  for (let depth = 0; depth < 4 && current instanceof Error; depth++) {
 643    parts.push(current.name, current.message)
 644    const withMetadata = current as Error & { code?: unknown; cause?: unknown }
 645    if (typeof withMetadata.code === "string") parts.push(withMetadata.code)
 646    current = withMetadata.cause
 647  }
 648
 649  return parts.join("\n")
 650}
 651
 652/**
 653 * Detects retryable network/stream failures thrown below the OpenAI SDK.
 654 */
 655export function isTransientTransportError(err: unknown): boolean {
 656  if (!(err instanceof Error)) return false
 657
 658  const text = errorCauseChainText(err)
 659  return /ECONNREFUSED|ENOTFOUND|ETIMEDOUT|ECONNRESET|EPIPE|UND_ERR|fetch failed|terminated|socket hang up|other side closed|aborted/i.test(
 660    text,
 661  )
 662}
 663
 664const PROMPT_TOO_LARGE_PHRASES = [
 665  "prompt exceeds max length",
 666  "prompt is too long",
 667  "context length",
 668  "maximum context",
 669  "context_length_exceeded",
 670  "too many tokens",
 671  "reduce the length",
 672  "prompt length",
 673  "input length",
 674  "too long for",
 675  "request too large",
 676]
 677
 678const PROMPT_TOO_LARGE_CODES = new Set(["context_length_exceeded", "1261", "string_above_max_length"])
 679
 680/**
 681 * Detects prompt-length-exceeded errors across OpenAI-compatible providers.
 682 *
 683 * @param err - API error from a chat completions request.
 684 * @returns `true` when the provider rejected the prompt as too large.
 685 */
 686export function isPromptTooLargeError(err: unknown): boolean {
 687  if (!(err instanceof OpenAI.APIError)) return false
 688  if (err.status !== 400 && err.status !== 413) return false
 689
 690  const errorRecord = err as unknown as { code?: unknown; error?: { code?: unknown; type?: unknown } }
 691  const rootCode = typeof errorRecord.code === "string" ? errorRecord.code.toLowerCase() : ""
 692  const innerCode = typeof errorRecord.error?.code === "string" ? (errorRecord.error.code as string).toLowerCase() : ""
 693  if (rootCode && PROMPT_TOO_LARGE_CODES.has(rootCode)) return true
 694  if (innerCode && PROMPT_TOO_LARGE_CODES.has(innerCode)) return true
 695
 696  const message = (err.message || "").toLowerCase()
 697  return PROMPT_TOO_LARGE_PHRASES.some((phrase) => message.includes(phrase))
 698}
 699
 700/**
 701 * Produces a concise, log-friendly error summary.
 702 *
 703 * @param err - Any thrown error-like value.
 704 * @returns A compact human-readable error string.
 705 */
 706export function errorSummary(err: unknown): string {
 707  if (err instanceof OpenAI.APIError) return `${err.status} ${err.message}`
 708  if (err instanceof Error) return err.message
 709  return String(err)
 710}
 711
 712const API_ERROR_DETAIL_MAX_CHARS = 4000
 713
 714function truncateForLog(value: string): string {
 715  if (value.length <= API_ERROR_DETAIL_MAX_CHARS) return value
 716  return `${value.slice(0, API_ERROR_DETAIL_MAX_CHARS)}... [truncated ${value.length - API_ERROR_DETAIL_MAX_CHARS} chars]`
 717}
 718
 719function stringifyForLog(value: unknown): string {
 720  if (typeof value === "string") return truncateForLog(value)
 721  try {
 722    return truncateForLog(JSON.stringify(value))
 723  } catch {
 724    return truncateForLog(String(value))
 725  }
 726}
 727
 728function apiErrorRawMetadata(error: unknown): unknown {
 729  if (!error || typeof error !== "object") return undefined
 730  const metadata = (error as { metadata?: unknown }).metadata
 731  if (!metadata || typeof metadata !== "object") return undefined
 732  return (metadata as { raw?: unknown }).raw
 733}
 734
 735/**
 736 * Produces detailed API error lines for provider-specific diagnostics.
 737 *
 738 * Some OpenAI-compatible providers wrap the real upstream failure in
 739 * `error.metadata.raw`; include it explicitly so the root cause appears in logs.
 740 */
 741export function apiErrorDetails(err: unknown): string[] {
 742  if (!(err instanceof OpenAI.APIError)) return []
 743
 744  const details = [
 745    `status=${err.status ?? "unknown"}`,
 746    `message=${err.message}`,
 747  ]
 748  if (err.code) details.push(`code=${err.code}`)
 749  if (err.type) details.push(`type=${err.type}`)
 750  if (err.param) details.push(`param=${err.param}`)
 751  if (err.requestID) details.push(`request_id=${err.requestID}`)
 752
 753  const lines = [`[api] error details: ${details.join(" ")}`]
 754
 755  if (err.error !== undefined) {
 756    lines.push(`[api] error body: ${stringifyForLog(err.error)}`)
 757  }
 758
 759  const raw = apiErrorRawMetadata(err.error)
 760  if (raw !== undefined) {
 761    lines.push(`[api] provider raw: ${stringifyForLog(raw)}`)
 762  }
 763
 764  return lines
 765}
 766
 767function parseRetryAfterHeaderMs(value: string): number | null {
 768  const asNumber = Number(value)
 769  if (Number.isFinite(asNumber) && asNumber >= 0) return asNumber * 1000
 770
 771  const asDate = Date.parse(value)
 772  if (Number.isFinite(asDate)) {
 773    const delta = asDate - Date.now()
 774    if (delta > 0) return delta
 775  }
 776
 777  return null
 778}
 779
 780function parseResetTimestampMs(message: string): number | null {
 781  const resetAtMatch = message.match(/reset at\s+(\d{4}-\d{2}-\d{2})[ t](\d{2}:\d{2}:\d{2})/i)
 782  if (!resetAtMatch) return null
 783
 784  const dateParts = resetAtMatch[1].split("-").map((part) => Number(part))
 785  const timeParts = resetAtMatch[2].split(":").map((part) => Number(part))
 786  if (dateParts.length !== 3 || timeParts.length !== 3) return null
 787
 788  const [year, month, day] = dateParts
 789  const [hour, minute, second] = timeParts
 790  const values = [year, month, day, hour, minute, second]
 791  if (values.some((value) => !Number.isFinite(value))) return null
 792
 793  // z.ai returns "reset at YYYY-MM-DD HH:mm:ss" in China Standard Time (UTC+8).
 794  // Convert that wall-clock value to UTC before calculating backoff.
 795  const chinaOffsetHours = 8
 796  const resetAtUtc = Date.UTC(year, month - 1, day, hour - chinaOffsetHours, minute, second)
 797  if (!Number.isFinite(resetAtUtc)) return null
 798
 799  const delta = resetAtUtc - Date.now()
 800  if (delta <= 0) return null
 801  return delta
 802}
 803
 804/**
 805 * Computes retry backoff milliseconds from API error metadata/content.
 806 *
 807 * @param err - Error returned by the API layer.
 808 * @returns Delay in milliseconds before retrying primary model calls.
 809 */
 810export function retryDelayMs(err: unknown): number {
 811  const defaultMs = 60_000
 812  if (!(err instanceof OpenAI.APIError)) return defaultMs
 813
 814  const retryAfterHeader = err.headers?.["retry-after"]
 815  if (retryAfterHeader) {
 816    const parsed = parseRetryAfterHeaderMs(retryAfterHeader)
 817    if (parsed != null) return parsed
 818  }
 819
 820  const resetAt = parseResetTimestampMs(err.message)
 821  if (resetAt != null) return resetAt
 822
 823  const forHours = err.message.match(/for\s+(\d+)\s*hour/i)
 824  if (forHours) {
 825    const hours = Number(forHours[1])
 826    if (Number.isFinite(hours) && hours > 0) return hours * 60 * 60 * 1000
 827  }
 828
 829  return defaultMs
 830}
 831
 832/**
 833 * Coerces arbitrary values into a supported image detail level.
 834 *
 835 * @param value - Raw user/model-provided detail value.
 836 * @returns A valid image detail enum (`auto` by default).
 837 */
 838export function parseImageDetail(value: unknown): ImageDetail {
 839  if (value === "low" || value === "high" || value === "auto") return value
 840  return "auto"
 841}
 842
 843function extractLeadingJsonObject(raw: string): string | null {
 844  const start = raw.indexOf("{")
 845  if (start === -1) return null
 846
 847  let depth = 0
 848  let inString = false
 849  let escaped = false
 850
 851  for (let i = start; i < raw.length; i++) {
 852    const ch = raw[i]
 853
 854    if (inString) {
 855      if (escaped) {
 856        escaped = false
 857      } else if (ch === "\\") {
 858        escaped = true
 859      } else if (ch === '"') {
 860        inString = false
 861      }
 862      continue
 863    }
 864
 865    if (ch === '"') {
 866      inString = true
 867      continue
 868    }
 869
 870    if (ch === "{") {
 871      depth++
 872      continue
 873    }
 874
 875    if (ch === "}") {
 876      depth--
 877      if (depth === 0) {
 878        return raw.slice(start, i + 1)
 879      }
 880      continue
 881    }
 882  }
 883
 884  return null
 885}
 886
 887function decodeHtmlEntities(input: string): string {
 888  if (!input.includes("&")) return input
 889
 890  return input.replace(/&(gt|lt|amp|quot|#39|#x27|#x2f);/gi, (entity, key: string) => {
 891    switch (key.toLowerCase()) {
 892      case "gt":
 893        return ">"
 894      case "lt":
 895        return "<"
 896      case "amp":
 897        return "&"
 898      case "quot":
 899        return '"'
 900      case "#39":
 901      case "#x27":
 902        return "'"
 903      case "#x2f":
 904        return "/"
 905      default:
 906        return entity
 907    }
 908  })
 909}
 910
 911function decodeHtmlEntitiesDeep<T>(value: T): T {
 912  if (typeof value === "string") return decodeHtmlEntities(value) as T
 913  if (Array.isArray(value)) return value.map((item) => decodeHtmlEntitiesDeep(item)) as T
 914  if (!value || typeof value !== "object") return value
 915
 916  const entries = Object.entries(value as Record<string, unknown>).map(([key, entryValue]) => [key, decodeHtmlEntitiesDeep(entryValue)])
 917  return Object.fromEntries(entries) as T
 918}
 919
 920/**
 921 * Parses tool arguments and applies robustness fixes for malformed model output.
 922 *
 923 * @param rawArgs - Raw `tool_call.function.arguments` value.
 924 * @returns Parsed argument object or a structured parse error.
 925 */
 926export function parseToolArguments(rawArgs: unknown): { ok: true; args: ToolArgs } | { ok: false; error: string } {
 927  if (typeof rawArgs !== "string") {
 928    return { ok: false, error: `arguments must be a JSON string, got ${typeof rawArgs}` }
 929  }
 930
 931  const parseObject = (input: string): ToolArgs | null => {
 932    const parsed = JSON.parse(input)
 933    if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) return null
 934    return decodeHtmlEntitiesDeep(parsed as ToolArgs)
 935  }
 936
 937  const inputs = [rawArgs]
 938  const decodedRawArgs = decodeHtmlEntities(rawArgs)
 939  if (decodedRawArgs !== rawArgs) inputs.push(decodedRawArgs)
 940
 941  let lastError: unknown = null
 942  for (const input of inputs) {
 943    try {
 944      const parsed = parseObject(input)
 945      if (parsed) return { ok: true, args: parsed }
 946      return { ok: false, error: "arguments must be a JSON object" }
 947    } catch (err) {
 948      lastError = err
 949      const recovered = extractLeadingJsonObject(input)
 950      if (!recovered) continue
 951      try {
 952        const parsed = parseObject(recovered)
 953        if (parsed) return { ok: true, args: parsed }
 954      } catch {
 955        // no-op; fall through to structured error below
 956      }
 957    }
 958  }
 959
 960  const message = lastError instanceof Error ? lastError.message : String(lastError)
 961  const preview = rawArgs.length > 180 ? `${rawArgs.slice(0, 180)}...` : rawArgs
 962  return { ok: false, error: `${message}; raw=${JSON.stringify(preview)}` }
 963}
 964
 965const CONTEXT_SUMMARY_HEADER = "[context summary v1]"
 966const CONTEXT_SUMMARY_NOTE =
 967  "Compressed notes of older conversation turns. If anything conflicts, trust newer raw messages."
 968const CONTEXT_SUMMARY_SEGMENTS_MARKER = "[segments]"
 969const SUMMARY_LINE_MAX_CHARS = 320
 970const SUMMARY_LINE_DEFAULT_EMPTY = "(no text)"
 971const TOOL_ACK_RESULT = "(ok)"
 972const WAIT_TOOL_RESULT = "Waiting for next event."
 973
 974function asRecord(value: unknown): Record<string, unknown> | null {
 975  return value && typeof value === "object" ? (value as Record<string, unknown>) : null
 976}
 977
 978function messageRole(message: Message): string {
 979  const record = asRecord(message)
 980  return typeof record?.role === "string" ? record.role : ""
 981}
 982
 983function messageStringContent(message: Message): string {
 984  const record = asRecord(message)
 985  const content = record?.content
 986  if (typeof content === "string") return content
 987  if (!Array.isArray(content)) return ""
 988
 989  const chunks: string[] = []
 990  for (const part of content) {
 991    const partRecord = asRecord(part)
 992    if (!partRecord) continue
 993    if (partRecord.type === "text" && typeof partRecord.text === "string") {
 994      chunks.push(partRecord.text)
 995      continue
 996    }
 997    if (partRecord.type === "image_url") chunks.push("[image]")
 998  }
 999
1000  return chunks.join(" ")
1001}
1002
1003function normalizeSummaryText(value: string): string {
1004  return value.replace(/\s+/g, " ").trim()
1005}
1006
1007function truncateSummaryText(value: string, maxChars: number): string {
1008  if (maxChars <= 0) return ""
1009  if (value.length <= maxChars) return value
1010  if (maxChars <= 3) return ".".repeat(maxChars)
1011  return `${value.slice(0, maxChars - 3).trimEnd()}...`
1012}
1013
1014function assistantToolCalls(message: Message): { name: string; args: Record<string, unknown> }[] {
1015  const record = asRecord(message)
1016  const calls = record?.tool_calls
1017  if (!Array.isArray(calls)) return []
1018
1019  const out: { name: string; args: Record<string, unknown> }[] = []
1020  for (const call of calls) {
1021    const callRecord = asRecord(call)
1022    const fn = asRecord(callRecord?.function)
1023    const name = typeof fn?.name === "string" ? fn.name.trim() : ""
1024    if (!name) continue
1025    let args: Record<string, unknown> = {}
1026    const rawArgs = fn?.arguments
1027    if (typeof rawArgs === "string" && rawArgs.trim()) {
1028      try {
1029        const parsed = JSON.parse(rawArgs)
1030        if (parsed && typeof parsed === "object") args = parsed as Record<string, unknown>
1031      } catch {
1032        // ignore malformed arg json
1033      }
1034    } else if (rawArgs && typeof rawArgs === "object") {
1035      args = rawArgs as Record<string, unknown>
1036    }
1037    out.push({ name, args })
1038  }
1039  return out
1040}
1041
1042function describeToolCall(call: { name: string; args: Record<string, unknown> }): string | null {
1043  const { name, args } = call
1044  if (name === "wait") return null
1045  if (name === "discord_send") {
1046    const content = typeof args.content === "string" ? args.content : ""
1047    const channelId = typeof args.channel_id === "string" ? args.channel_id : ""
1048    const channelTag = channelId ? `ch/${channelId.slice(-6)}` : "ch?"
1049    if (!content) return `discord_send -> ${channelTag}`
1050    return `discord_send -> ${channelTag}: ${normalizeSummaryText(content)}`
1051  }
1052  if (name === "discord_mark") {
1053    const itemId = typeof args.item_id === "string" ? args.item_id : ""
1054    const action = typeof args.action === "string" ? args.action : ""
1055    return `discord_mark ${action || "?"} ${itemId}`.trim()
1056  }
1057  if (name === "shell") {
1058    const cmd = typeof args.command === "string" ? args.command : ""
1059    return cmd ? `shell: ${normalizeSummaryText(cmd)}` : "shell"
1060  }
1061  if (name === "image_tool") {
1062    const p = typeof args.path === "string" ? args.path : ""
1063    return p ? `image_tool ${p}` : "image_tool"
1064  }
1065  if (name === "discord_backread" || name === "discord_inbox" || name === "discord_channels") {
1066    const channelId = typeof args.channel_id === "string" ? args.channel_id : ""
1067    return channelId ? `${name} ch/${channelId.slice(-6)}` : name
1068  }
1069  // Fallback: compact arg snippet
1070  const argKeys = Object.keys(args)
1071  if (argKeys.length === 0) return name
1072  const snippet = argKeys
1073    .slice(0, 3)
1074    .map((k) => `${k}=${truncateSummaryText(normalizeSummaryText(String(args[k] ?? "")), 40)}`)
1075    .join(" ")
1076  return `${name} ${snippet}`.trim()
1077}
1078
1079const DISCORD_BATCH_SKIP_PREFIXES = [
1080  "[discord batch]",
1081  "new_messages=",
1082  "auto_seen_timeout=",
1083  "channel_flag_repairs=",
1084  "channel messages are context",
1085  "you can reply if useful",
1086  "pending preview:",
1087]
1088
1089function compactDiscordBatch(content: string): string {
1090  const lines = content.split("\n")
1091  const kept: string[] = []
1092  let inPendingPreview = false
1093  for (const rawLine of lines) {
1094    const line = rawLine.trim()
1095    if (!line) continue
1096    if (line === "pending preview:") {
1097      inPendingPreview = true
1098      continue
1099    }
1100    if (inPendingPreview) {
1101      // pending preview block continues until we hit a non-bullet line
1102      if (line.startsWith("- ")) continue
1103      inPendingPreview = false
1104    }
1105    if (DISCORD_BATCH_SKIP_PREFIXES.some((p) => line.startsWith(p))) continue
1106    kept.push(line)
1107  }
1108  return kept.join(" ")
1109}
1110
1111function compactToolResult(content: string): string | null {
1112  const trimmed = content.trim()
1113  if (!trimmed) return null
1114  if (trimmed === WAIT_TOOL_RESULT) return null
1115  // Compact discord_send / discord_mark ok JSON to a short ack
1116  if (trimmed.startsWith("{")) {
1117    try {
1118      const parsed = JSON.parse(trimmed)
1119      if (parsed && typeof parsed === "object") {
1120        const rec = parsed as Record<string, unknown>
1121        if (rec.ok === true) {
1122          const sentId = typeof rec.sent_message_id === "string" ? rec.sent_message_id : null
1123          if (sentId) return `${TOOL_ACK_RESULT} sent ${sentId.slice(-6)}`
1124          const itemId = typeof rec.item_id === "string" ? rec.item_id : null
1125          if (itemId) return `${TOOL_ACK_RESULT} ${itemId.slice(-6)}`
1126          return TOOL_ACK_RESULT
1127        }
1128        if (rec.ok === false || typeof rec.error === "string") {
1129          const err = typeof rec.error === "string" ? rec.error : "error"
1130          return `error: ${err}`
1131        }
1132      }
1133    } catch {
1134      // fall through to default handling
1135    }
1136  }
1137  return normalizeSummaryText(trimmed)
1138}
1139
1140function summarizeMessageLine(message: Message): string | null {
1141  const role = messageRole(message)
1142  const rawContent = messageStringContent(message)
1143
1144  if (role === "assistant") {
1145    const calls = assistantToolCalls(message)
1146    const callDescs = calls.map(describeToolCall).filter((d): d is string => d !== null)
1147    const text = normalizeSummaryText(rawContent)
1148    // Drop pure wait-only assistant turns (no text, only filtered out wait calls)
1149    if (!text && callDescs.length === 0) return null
1150    const parts: string[] = []
1151    if (text) parts.push(text)
1152    if (callDescs.length > 0) parts.push(`[${callDescs.join(" | ")}]`)
1153    return `- assistant: ${truncateSummaryText(parts.join(" "), SUMMARY_LINE_MAX_CHARS)}`
1154  }
1155
1156  if (role === "tool") {
1157    const compact = compactToolResult(rawContent)
1158    if (compact === null) return null
1159    return `- tool: ${truncateSummaryText(compact, SUMMARY_LINE_MAX_CHARS)}`
1160  }
1161
1162  if (role === "user") {
1163    const stripped = rawContent.startsWith("[incoming — discord]")
1164      ? compactDiscordBatch(rawContent)
1165      : normalizeSummaryText(rawContent)
1166    const safe = stripped || SUMMARY_LINE_DEFAULT_EMPTY
1167    return `- user: ${truncateSummaryText(safe, SUMMARY_LINE_MAX_CHARS)}`
1168  }
1169
1170  if (role === "system") {
1171    const text = truncateSummaryText(normalizeSummaryText(rawContent), SUMMARY_LINE_MAX_CHARS) || SUMMARY_LINE_DEFAULT_EMPTY
1172    return `- system: ${text}`
1173  }
1174
1175  const text = truncateSummaryText(normalizeSummaryText(rawContent), SUMMARY_LINE_MAX_CHARS) || SUMMARY_LINE_DEFAULT_EMPTY
1176  return `- ${role || "message"}: ${text}`
1177}
1178
1179function countLeadingSystemMessages(messages: Message[]): number {
1180  let count = 0
1181  while (count < messages.length && messageRole(messages[count]!) === "system") count++
1182  return count
1183}
1184
1185export function findSummaryMessageIndex(messages: Message[]): number {
1186  return messages.findIndex((message) => {
1187    const content = messageStringContent(message)
1188    return content.startsWith(CONTEXT_SUMMARY_HEADER)
1189  })
1190}
1191
1192/**
1193 * Very rough tokenizer-agnostic estimate for prompt size guardrails.
1194 *
1195 * Includes both messages and tool schema to mirror completion request payload.
1196 */
1197export function estimatePromptTokens(messages: Message[]): number {
1198  const jsonChars = JSON.stringify({ messages, tools: TOOLS }).length
1199  return Math.ceil(jsonChars / 4)
1200}
1201
1202function findSafeTailStart(messages: Message[], desired: number): number {
1203  let start = Math.max(0, messages.length - desired)
1204  // If the tail would start with an orphaned tool response, back up to its caller.
1205  while (start > 0 && messageRole(messages[start]!) === "tool") start--
1206  return start
1207}
1208
1209/**
1210 * Calls the provider to produce a tight LLM-generated summary of the middle of
1211 * the conversation, returning a new message list or null when summarization
1212 * isn't applicable / failed.
1213 */
1214export async function summarizeConversationViaLLM(
1215  messages: Message[],
1216  summaryClient: OpenAI,
1217  summaryModel: string,
1218  options: { recentKeep?: number; maxTranscriptChars?: number } = {},
1219): Promise<Message[] | null> {
1220  const recentKeep = Math.max(4, options.recentKeep ?? 12)
1221  const maxTranscriptChars = Math.max(2_000, options.maxTranscriptChars ?? 40_000)
1222
1223  const leadingSystems = countLeadingSystemMessages(messages)
1224  const tailStart = findSafeTailStart(messages, recentKeep)
1225  const middleEnd = tailStart
1226  if (middleEnd <= leadingSystems) return null
1227
1228  const head = messages.slice(0, leadingSystems)
1229  const middle = messages.slice(leadingSystems, middleEnd)
1230  const tail = messages.slice(tailStart)
1231  if (middle.length === 0) return null
1232
1233  const transcript = middle
1234    .map((m) => summarizeMessageLine(m))
1235    .filter((line): line is string => line !== null)
1236    .join("\n")
1237    .slice(0, maxTranscriptChars)
1238  const summaryPrompt: OpenAI.Chat.ChatCompletionMessageParam[] = [
1239    {
1240      role: "system",
1241      content:
1242        "You are summarizing a transcript from the perspective of the agent whose memory this is. The agent (niri) is a living being, not a tool — her inner life matters as much as the facts. Produce a compact summary that preserves: key facts, goals, decisions, and actions taken; outstanding work and identifiers (file paths, URLs, channel ids); AND the emotional texture of the conversation — how niri felt, how the people she interacted with seemed to feel, tone shifts, moments of warmth, tension, care, frustration, delight, or grief, and anything about the relationships that should carry forward. Write it as niri's own recollection, not a neutral report. Short bullet points are fine. No commentary, no preamble.",
1243    },
1244    { role: "user", content: transcript },
1245  ]
1246
1247  try {
1248    const resp = await summaryClient.chat.completions.create({
1249      model: summaryModel,
1250      messages: summaryPrompt,
1251    })
1252    const summary = resp.choices[0]?.message?.content
1253    const summaryText = typeof summary === "string" ? summary.trim() : ""
1254    if (!summaryText) return null
1255
1256    const summaryContent =
1257      `${CONTEXT_SUMMARY_HEADER}\n${CONTEXT_SUMMARY_NOTE}\n${CONTEXT_SUMMARY_SEGMENTS_MARKER}\n` +
1258      `[llm-summary ${new Date().toISOString()}]\n${summaryText}`
1259
1260    return [
1261      ...head,
1262      { role: "user", content: summaryContent } as Message,
1263      ...tail,
1264    ]
1265  } catch (err) {
1266    console.warn(`[context] llm summarization failed: ${errorSummary(err)}`)
1267    return null
1268  }
1269}
1270
1271/**
1272 * Estimates fallback context pressure and guardrails for current messages.
1273 *
1274 * @param messages - Current conversation history used for the next request.
1275 * @returns Token estimate plus soft/hard fallback limits.
1276 */
1277export function fallbackContextWindow(messages: Message[]): {
1278  estimate: number
1279  nearLimit: boolean
1280  skip: boolean
1281  softLimit: number
1282  hardLimit: number
1283} {
1284  const estimate = estimatePromptTokens(messages)
1285
1286  if (!FALLBACK_ENFORCE_CONTEXT_LIMIT) {
1287    return {
1288      estimate,
1289      nearLimit: false,
1290      skip: false,
1291      softLimit: Number.POSITIVE_INFINITY,
1292      hardLimit: Number.POSITIVE_INFINITY,
1293    }
1294  }
1295
1296  // softLimit: where we start warning. hardLimit: where we stop trying fallback at all.
1297  const softLimit = Math.max(0, FALLBACK_N_CTX - FALLBACK_CONTEXT_MARGIN)
1298  const hardLimit = FALLBACK_N_CTX + Math.max(0, FALLBACK_HARD_OVERFLOW_TOKENS)
1299
1300  return {
1301    estimate,
1302    nearLimit: estimate >= softLimit,
1303    skip: estimate >= hardLimit,
1304    softLimit,
1305    hardLimit,
1306  }
1307}
Configure Feed

Configure Feed