my harness for niri
1import fs from "fs/promises"
2import path from "path"
3import { fileURLToPath } from "url"
4import OpenAI from "openai"
5import { imageRootForModelInput } from "../container/index.js"
6import type { Message } from "../types.js"
7import type { ImageDetail, ToolArgs } from "./types.js"
8
9const PROJECT_ROOT = path.resolve(fileURLToPath(import.meta.url), "../../..")
10const SESSION_FILE = path.join(PROJECT_ROOT, "session.json")
11
12export const TOKEN_NUDGE_THRESHOLD = parseInt(process.env.TOKEN_NUDGE_THRESHOLD ?? "120000")
13export const FALLBACK_TOKEN_NUDGE_THRESHOLD = parseInt(process.env.FALLBACK_TOKEN_NUDGE_THRESHOLD ?? "50000")
14export const CONTEXT_COMPACT_TARGET_TOKENS = parseInt(process.env.CONTEXT_COMPACT_TARGET_TOKENS ?? "65000")
15export const CONTEXT_COMPACT_TRIGGER_TOKENS = parseInt(process.env.CONTEXT_COMPACT_TRIGGER_TOKENS ?? "90000")
16export const CONTEXT_COMPACT_RECENT_MESSAGES = parseInt(process.env.CONTEXT_COMPACT_RECENT_MESSAGES ?? "80")
17export const CONTEXT_COMPACT_CHUNK_MESSAGES = parseInt(process.env.CONTEXT_COMPACT_CHUNK_MESSAGES ?? "32")
18export const CONTEXT_COMPACT_SUMMARY_MAX_CHARS = parseInt(process.env.CONTEXT_COMPACT_SUMMARY_MAX_CHARS ?? "16000")
19
20const NIRI_ENV = (process.env.NIRI_ENV ?? "default").trim().toLowerCase()
21export const USE_FALLBACK = NIRI_ENV === "local"
22
23export const API_BASE = process.env.OPENAI_BASE_URL ?? "https://api.openai.com/v1"
24export const MODEL = process.env.MODEL ?? ""
25const DEFAULT_FALLBACK_BASE = "http://localhost:1234/v1"
26const isLikelyLocalBase = (baseUrl: string): boolean => {
27 const lowered = baseUrl.trim().toLowerCase()
28 return lowered.includes("localhost") || lowered.includes("127.0.0.1")
29}
30const parseBooleanEnv = (value: string | undefined, fallback: boolean): boolean => {
31 if (typeof value !== "string") return fallback
32 const normalized = value.trim().toLowerCase()
33 if (!normalized) return fallback
34 if (normalized === "true" || normalized === "1" || normalized === "yes" || normalized === "on") return true
35 if (normalized === "false" || normalized === "0" || normalized === "no" || normalized === "off") return false
36 return fallback
37}
38const parseToolChoiceEnv = (value: string | undefined, fallback: "required" | "auto" | "none"): "required" | "auto" | "none" => {
39 if (typeof value !== "string") return fallback
40 const normalized = value.trim().toLowerCase()
41 if (normalized === "required" || normalized === "auto" || normalized === "none") return normalized
42 return fallback
43}
44
45export const FALLBACK_BASE =
46 process.env.FALLBACK_OPENAI_BASE_URL ?? process.env.OPENROUTER_BASE_URL ?? process.env.LMSTUDIO_BASE_URL ?? DEFAULT_FALLBACK_BASE
47export const FALLBACK_MODEL =
48 process.env.FALLBACK_MODEL ?? process.env.OPENROUTER_MODEL ?? process.env.LMSTUDIO_MODEL ?? "zai-org/glm-4.7-flash"
49export const FALLBACK_TOOL_CHOICE = parseToolChoiceEnv(process.env.FALLBACK_TOOL_CHOICE, "required")
50const FALLBACK_N_CTX = parseInt(process.env.FALLBACK_N_CTX ?? process.env.LMSTUDIO_N_CTX ?? "4096")
51const FALLBACK_CONTEXT_MARGIN = parseInt(process.env.FALLBACK_CONTEXT_MARGIN ?? process.env.LMSTUDIO_CONTEXT_MARGIN ?? "256")
52const FALLBACK_HARD_OVERFLOW_TOKENS = parseInt(
53 process.env.FALLBACK_HARD_OVERFLOW_TOKENS ?? process.env.LMSTUDIO_HARD_OVERFLOW_TOKENS ?? "1024",
54)
55const FALLBACK_ENFORCE_CONTEXT_LIMIT = parseBooleanEnv(
56 process.env.FALLBACK_ENFORCE_CONTEXT_LIMIT,
57 isLikelyLocalBase(FALLBACK_BASE),
58)
59
60const fallbackApiKey =
61 process.env.FALLBACK_OPENAI_API_KEY ??
62 process.env.OPENROUTER_API_KEY ??
63 process.env.LMSTUDIO_API_KEY ??
64 process.env.OPENAI_API_KEY ??
65 (isLikelyLocalBase(FALLBACK_BASE) ? "lm-studio" : "")
66const fallbackHeaders: Record<string, string> = {}
67if (process.env.FALLBACK_OPENAI_REFERER) fallbackHeaders["HTTP-Referer"] = process.env.FALLBACK_OPENAI_REFERER
68if (process.env.FALLBACK_OPENAI_TITLE) fallbackHeaders["X-Title"] = process.env.FALLBACK_OPENAI_TITLE
69
70if (!USE_FALLBACK && !MODEL) {
71 throw new Error("MODEL is required unless fallback is forced (NIRI_ENV=local).")
72}
73
74if (!USE_FALLBACK && !process.env.OPENAI_API_KEY) {
75 throw new Error("OPENAI_API_KEY is required unless fallback is forced (NIRI_ENV=local).")
76}
77
78if (USE_FALLBACK && !fallbackApiKey) {
79 throw new Error(
80 "Fallback API key is required in local mode. Set FALLBACK_OPENAI_API_KEY (or OPENROUTER_API_KEY / LMSTUDIO_API_KEY).",
81 )
82}
83
84export const client = USE_FALLBACK
85 ? null
86 : new OpenAI({
87 baseURL: API_BASE,
88 apiKey: process.env.OPENAI_API_KEY!,
89 })
90
91export const fallbackClient = new OpenAI({
92 baseURL: FALLBACK_BASE,
93 apiKey: fallbackApiKey || "lm-studio", // Keep LM Studio default when running against localhost.
94 defaultHeaders: Object.keys(fallbackHeaders).length ? fallbackHeaders : undefined,
95})
96
97console.log(`[config] primary=${MODEL} @ ${API_BASE}`)
98console.log(`[config] fallback=${FALLBACK_MODEL} @ ${FALLBACK_BASE}`)
99console.log(`[config] env=${NIRI_ENV} use_fallback=${USE_FALLBACK}`)
100
101const IMAGE_ROOT_HINT = imageRootForModelInput()
102
103export const TOOLS: OpenAI.Chat.ChatCompletionTool[] = [
104 {
105 type: "function",
106 function: {
107 name: "shell",
108 description:
109 "Execute a bash command in your Linux environment. Stateful — cd, env vars, etc. persist. Output is automatically capped (default 150 lines, 40 for known-verbose commands like apt/pip/npm). Pass max_lines to override; use 0 for unlimited. You can also pass timeout_ms (default 30000, max 600000).",
110 parameters: {
111 type: "object",
112 properties: {
113 command: { type: "string" },
114 max_lines: {
115 type: "integer",
116 description:
117 "Maximum lines to return. Defaults to 150 (40 for verbose commands like apt/pip). Use 0 for unlimited.",
118 },
119 timeout_ms: {
120 type: "integer",
121 description: "Execution timeout in milliseconds. Defaults to 30000. Max 600000.",
122 },
123 },
124 required: ["command"],
125 },
126 },
127 },
128 {
129 type: "function",
130 function: {
131 name: "read_file",
132 description:
133 "Read a file from your Linux environment with optional line-range selection. More token-efficient than shell+cat for large files. Returns content with a header showing the line range and total line count. Supports timeout_ms (default 120000, max 600000).",
134 parameters: {
135 type: "object",
136 properties: {
137 path: { type: "string", description: "Absolute or relative path to the file." },
138 start_line: {
139 type: "integer",
140 description: "First line to read (1-indexed). Defaults to 1.",
141 },
142 end_line: {
143 type: "integer",
144 description: "Last line to read (inclusive). Defaults to start_line + 99.",
145 },
146 timeout_ms: {
147 type: "integer",
148 description: "Read timeout in milliseconds. Defaults to 120000. Max 600000.",
149 },
150 },
151 required: ["path"],
152 },
153 },
154 },
155 {
156 type: "function",
157 function: {
158 name: "edit_file",
159 description:
160 "Edit a file by replacing an exact snippet of text. old_text must match exactly once in the file — precise, safe, and no shell-escaping headaches. Use read_file first if you need to confirm the exact text. Supports timeout_ms (default 120000, max 600000).",
161 parameters: {
162 type: "object",
163 properties: {
164 path: { type: "string", description: "Absolute or relative path to the file." },
165 old_text: {
166 type: "string",
167 description: "The exact text to find and replace. Must appear exactly once in the file.",
168 },
169 new_text: {
170 type: "string",
171 description: "Replacement text. May be empty to delete old_text.",
172 },
173 timeout_ms: {
174 type: "integer",
175 description: "Edit timeout in milliseconds. Defaults to 120000. Max 600000.",
176 },
177 },
178 required: ["path", "old_text", "new_text"],
179 },
180 },
181 },
182 {
183 type: "function",
184 function: {
185 name: "image_tool",
186 description:
187 `Attach an image from ${IMAGE_ROOT_HINT} so it is injected as a multimodal user message on the next model turn. Use this after creating/downloading an image with shell.`,
188 parameters: {
189 type: "object",
190 properties: {
191 path: {
192 type: "string",
193 description: `Absolute image path inside ${IMAGE_ROOT_HINT} (for example ${IMAGE_ROOT_HINT}/screenshot.png).`,
194 },
195 note: {
196 type: "string",
197 description: "Optional text instruction to accompany the image for the next turn.",
198 },
199 detail: {
200 type: "string",
201 enum: ["auto", "low", "high"],
202 description: "Vision detail level for the next turn image input.",
203 },
204 timeout_ms: {
205 type: "integer",
206 description: "Read timeout in milliseconds. Defaults to 120000. Max 600000.",
207 },
208 },
209 required: ["path"],
210 },
211 },
212 },
213 {
214 type: "function",
215 function: {
216 name: "discord_scan",
217 description:
218 "Scan configured Discord channels and ingest messages into the local Discord inbox database. Uses DISCORD_SCAN_CHANNEL_IDS by default; pass channel_ids to override.",
219 parameters: {
220 type: "object",
221 properties: {
222 limit: {
223 type: "integer",
224 description: "Per-channel message fetch limit (default 50, max 100).",
225 },
226 channel_ids: {
227 type: "array",
228 items: { type: "string" },
229 description: "Optional channel id list to scan instead of DISCORD_SCAN_CHANNEL_IDS.",
230 },
231 before_message_id: {
232 type: "string",
233 description: "Optional message id cursor for older backfill scans.",
234 },
235 },
236 },
237 },
238 },
239 {
240 type: "function",
241 function: {
242 name: "discord_inbox",
243 description:
244 "List Discord inbox items tracked in local state. Default status filter is pending; optionally include seen/acted/ignored.",
245 parameters: {
246 type: "object",
247 properties: {
248 limit: {
249 type: "integer",
250 description: "Maximum rows to return (default 20, max 200).",
251 },
252 status: {
253 type: "string",
254 description: "Comma-separated statuses: pending,seen,acted,ignored. Defaults to pending.",
255 },
256 },
257 },
258 },
259 },
260 {
261 type: "function",
262 function: {
263 name: "discord_backread",
264 description:
265 "Read stored Discord message history for a channel from local state, newest first.",
266 parameters: {
267 type: "object",
268 properties: {
269 channel_id: { type: "string", description: "Discord channel id." },
270 limit: {
271 type: "integer",
272 description: "Maximum rows to return (default 40, max 200).",
273 },
274 before_message_id: {
275 type: "string",
276 description: "Optional cursor message id to fetch older rows.",
277 },
278 },
279 required: ["channel_id"],
280 },
281 },
282 },
283 {
284 type: "function",
285 function: {
286 name: "discord_mark",
287 description:
288 "Set decision state for a Discord inbox item so future scans remember handled/ignored choices.",
289 parameters: {
290 type: "object",
291 properties: {
292 item_id: { type: "string", description: "Inbox item id (usually message id)." },
293 status: {
294 type: "string",
295 enum: ["pending", "seen", "acted", "ignored"],
296 },
297 action: {
298 type: "string",
299 enum: ["none", "replied", "messaged", "dismissed", "noted"],
300 },
301 note: {
302 type: "string",
303 description: "Optional decision note.",
304 },
305 },
306 required: ["item_id", "status"],
307 },
308 },
309 },
310 {
311 type: "function",
312 function: {
313 name: "discord_send",
314 description:
315 "Send a Discord message. reply_mode=auto sends plain unless conversation continuity is ambiguous, then it uses an explicit reply reference.",
316 parameters: {
317 type: "object",
318 properties: {
319 channel_id: { type: "string", description: "Target channel id." },
320 content: { type: "string", description: "Message content to send." },
321 source_item_id: {
322 type: "string",
323 description: "Optional inbox item id to mark as acted after sending.",
324 },
325 reference_message_id: {
326 type: "string",
327 description: "Optional specific message id to treat as reply target.",
328 },
329 reply_mode: {
330 type: "string",
331 enum: ["auto", "plain", "explicit"],
332 description: "Reply behavior policy (default auto).",
333 },
334 },
335 required: ["content"],
336 },
337 },
338 },
339 {
340 type: "function",
341 function: {
342 name: "discord_channels",
343 description:
344 "List known Discord channels with id-to-name mapping, guild context, and optional channel notes.",
345 parameters: {
346 type: "object",
347 properties: {
348 include_unconfigured: {
349 type: "boolean",
350 description: "When true (default), include channels seen in history even if not in DISCORD_SCAN_CHANNEL_IDS.",
351 },
352 },
353 },
354 },
355 },
356 {
357 type: "function",
358 function: {
359 name: "discord_channel_note",
360 description:
361 "Set or clear a persistent note for a Discord channel id. Pass empty note to clear.",
362 parameters: {
363 type: "object",
364 properties: {
365 channel_id: { type: "string", description: "Discord channel id to annotate." },
366 note: { type: "string", description: "Channel-specific note text. Empty string clears it." },
367 },
368 required: ["channel_id", "note"],
369 },
370 },
371 },
372 {
373 type: "function",
374 function: {
375 name: "wait",
376 description: "Pause and wait for the next incoming message or event. Use this when you've finished what you're doing and want to hear back before continuing.",
377 parameters: {
378 type: "object",
379 properties: {},
380 },
381 },
382 },
383 {
384 type: "function",
385 function: {
386 name: "rest",
387 description: "Go to sleep and end this session. Call this when you're truly done for now — conversation context will be cleared.",
388 parameters: {
389 type: "object",
390 properties: {
391 note: {
392 type: "string",
393 description: "Optional note to yourself about where you left off.",
394 },
395 },
396 },
397 },
398 },
399]
400
401/**
402 * Persists the current message array as the resumable session snapshot.
403 *
404 * @param messages - Conversation messages to serialize.
405 */
406export async function saveSession(messages: Message[]): Promise<void> {
407 await fs.writeFile(SESSION_FILE, JSON.stringify(messages), { encoding: "utf-8", mode: 0o666 })
408}
409
410/**
411 * Deletes the persisted session snapshot if it exists.
412 */
413export async function clearSession(): Promise<void> {
414 await fs.unlink(SESSION_FILE).catch(() => {})
415}
416
417/** Move any mis-ordered tool responses back to immediately after their assistant message. */
418function sanitizeMessages(msgs: Message[]): Message[] {
419 let i = 0
420 while (i < msgs.length) {
421 const msg = msgs[i]
422 if (msg.role === "assistant" && Array.isArray((msg as OpenAI.Chat.ChatCompletionMessage).tool_calls)) {
423 const toolCalls = (msg as OpenAI.Chat.ChatCompletionMessage).tool_calls!
424 const needed = new Set(toolCalls.map((tc) => tc.id))
425 let j = i + 1
426 // Skip tool messages that are already in place
427 while (j < msgs.length && msgs[j].role === "tool" && needed.has((msgs[j] as OpenAI.Chat.ChatCompletionToolMessageParam).tool_call_id)) {
428 needed.delete((msgs[j] as OpenAI.Chat.ChatCompletionToolMessageParam).tool_call_id)
429 j++
430 }
431 if (needed.size > 0) {
432 // Collect stray tool responses and non-tool messages from the rest of the array
433 const toolResponses: Message[] = []
434 const others: Message[] = []
435 for (let k = j; k < msgs.length; k++) {
436 const m = msgs[k]
437 if (m.role === "tool" && needed.has((m as OpenAI.Chat.ChatCompletionToolMessageParam).tool_call_id)) {
438 toolResponses.push(m)
439 needed.delete((m as OpenAI.Chat.ChatCompletionToolMessageParam).tool_call_id)
440 } else {
441 others.push(m)
442 }
443 if (needed.size === 0) {
444 const rest = msgs.slice(k + 1)
445 msgs = [...msgs.slice(0, j), ...toolResponses, ...others, ...rest]
446 console.log(`[runner] repaired orphaned tool_calls at message ${i}`)
447 break
448 }
449 }
450 }
451 }
452 i++
453 }
454 return msgs
455}
456
457/**
458 * Loads and sanitizes the persisted session snapshot.
459 *
460 * @returns The recovered message list, or `null` when no session exists.
461 */
462export async function loadSession(): Promise<Message[] | null> {
463 try {
464 const raw = await fs.readFile(SESSION_FILE, "utf-8")
465 let msgs = JSON.parse(raw) as Message[]
466 msgs = sanitizeMessages(msgs)
467 console.log(`[runner] found saved session (${msgs.length} messages)`)
468 return msgs
469 } catch {
470 return null
471 }
472}
473
474/**
475 * Determines whether an error should trigger fallback model routing.
476 *
477 * @param err - Error thrown by the primary API call.
478 * @returns `true` when fallback should be attempted.
479 */
480export function shouldFallback(err: unknown): boolean {
481 if (err instanceof OpenAI.APIError) {
482 // 429 + 5xx = overloaded or down; 0/undefined = network-level failure
483 return !err.status || err.status === 429 || err.status >= 500
484 }
485 // Node fetch errors (ECONNREFUSED, ENOTFOUND, ETIMEDOUT…)
486 if (err instanceof Error) {
487 return /ECONNREFUSED|ENOTFOUND|ETIMEDOUT|ECONNRESET|fetch failed/i.test(err.message)
488 }
489 return false
490}
491
492/**
493 * Produces a concise, log-friendly error summary.
494 *
495 * @param err - Any thrown error-like value.
496 * @returns A compact human-readable error string.
497 */
498export function errorSummary(err: unknown): string {
499 if (err instanceof OpenAI.APIError) return `${err.status} ${err.message}`
500 if (err instanceof Error) return err.message
501 return String(err)
502}
503
504function parseRetryAfterHeaderMs(value: string): number | null {
505 const asNumber = Number(value)
506 if (Number.isFinite(asNumber) && asNumber >= 0) return asNumber * 1000
507
508 const asDate = Date.parse(value)
509 if (Number.isFinite(asDate)) {
510 const delta = asDate - Date.now()
511 if (delta > 0) return delta
512 }
513
514 return null
515}
516
517function parseResetTimestampMs(message: string): number | null {
518 const resetAtMatch = message.match(/reset at\s+(\d{4}-\d{2}-\d{2})[ t](\d{2}:\d{2}:\d{2})/i)
519 if (!resetAtMatch) return null
520
521 const dateParts = resetAtMatch[1].split("-").map((part) => Number(part))
522 const timeParts = resetAtMatch[2].split(":").map((part) => Number(part))
523 if (dateParts.length !== 3 || timeParts.length !== 3) return null
524
525 const [year, month, day] = dateParts
526 const [hour, minute, second] = timeParts
527 const values = [year, month, day, hour, minute, second]
528 if (values.some((value) => !Number.isFinite(value))) return null
529
530 // z.ai returns "reset at YYYY-MM-DD HH:mm:ss" in China Standard Time (UTC+8).
531 // Convert that wall-clock value to UTC before calculating backoff.
532 const chinaOffsetHours = 8
533 const resetAtUtc = Date.UTC(year, month - 1, day, hour - chinaOffsetHours, minute, second)
534 if (!Number.isFinite(resetAtUtc)) return null
535
536 const delta = resetAtUtc - Date.now()
537 if (delta <= 0) return null
538 return delta
539}
540
541/**
542 * Computes retry backoff milliseconds from API error metadata/content.
543 *
544 * @param err - Error returned by the API layer.
545 * @returns Delay in milliseconds before retrying primary model calls.
546 */
547export function retryDelayMs(err: unknown): number {
548 const defaultMs = 60_000
549 if (!(err instanceof OpenAI.APIError)) return defaultMs
550
551 const retryAfterHeader = err.headers?.["retry-after"]
552 if (retryAfterHeader) {
553 const parsed = parseRetryAfterHeaderMs(retryAfterHeader)
554 if (parsed != null) return parsed
555 }
556
557 const resetAt = parseResetTimestampMs(err.message)
558 if (resetAt != null) return resetAt
559
560 const forHours = err.message.match(/for\s+(\d+)\s*hour/i)
561 if (forHours) {
562 const hours = Number(forHours[1])
563 if (Number.isFinite(hours) && hours > 0) return hours * 60 * 60 * 1000
564 }
565
566 return defaultMs
567}
568
569/**
570 * Coerces arbitrary values into a supported image detail level.
571 *
572 * @param value - Raw user/model-provided detail value.
573 * @returns A valid image detail enum (`auto` by default).
574 */
575export function parseImageDetail(value: unknown): ImageDetail {
576 if (value === "low" || value === "high" || value === "auto") return value
577 return "auto"
578}
579
580function extractLeadingJsonObject(raw: string): string | null {
581 const start = raw.indexOf("{")
582 if (start === -1) return null
583
584 let depth = 0
585 let inString = false
586 let escaped = false
587
588 for (let i = start; i < raw.length; i++) {
589 const ch = raw[i]
590
591 if (inString) {
592 if (escaped) {
593 escaped = false
594 } else if (ch === "\\") {
595 escaped = true
596 } else if (ch === '"') {
597 inString = false
598 }
599 continue
600 }
601
602 if (ch === '"') {
603 inString = true
604 continue
605 }
606
607 if (ch === "{") {
608 depth++
609 continue
610 }
611
612 if (ch === "}") {
613 depth--
614 if (depth === 0) {
615 return raw.slice(start, i + 1)
616 }
617 continue
618 }
619 }
620
621 return null
622}
623
624function decodeHtmlEntities(input: string): string {
625 if (!input.includes("&")) return input
626
627 return input.replace(/&(gt|lt|amp|quot|#39|#x27|#x2f);/gi, (entity, key: string) => {
628 switch (key.toLowerCase()) {
629 case "gt":
630 return ">"
631 case "lt":
632 return "<"
633 case "amp":
634 return "&"
635 case "quot":
636 return '"'
637 case "#39":
638 case "#x27":
639 return "'"
640 case "#x2f":
641 return "/"
642 default:
643 return entity
644 }
645 })
646}
647
648function decodeHtmlEntitiesDeep<T>(value: T): T {
649 if (typeof value === "string") return decodeHtmlEntities(value) as T
650 if (Array.isArray(value)) return value.map((item) => decodeHtmlEntitiesDeep(item)) as T
651 if (!value || typeof value !== "object") return value
652
653 const entries = Object.entries(value as Record<string, unknown>).map(([key, entryValue]) => [key, decodeHtmlEntitiesDeep(entryValue)])
654 return Object.fromEntries(entries) as T
655}
656
657/**
658 * Parses tool arguments and applies robustness fixes for malformed model output.
659 *
660 * @param rawArgs - Raw `tool_call.function.arguments` value.
661 * @returns Parsed argument object or a structured parse error.
662 */
663export function parseToolArguments(rawArgs: unknown): { ok: true; args: ToolArgs } | { ok: false; error: string } {
664 if (typeof rawArgs !== "string") {
665 return { ok: false, error: `arguments must be a JSON string, got ${typeof rawArgs}` }
666 }
667
668 const parseObject = (input: string): ToolArgs | null => {
669 const parsed = JSON.parse(input)
670 if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) return null
671 return decodeHtmlEntitiesDeep(parsed as ToolArgs)
672 }
673
674 const inputs = [rawArgs]
675 const decodedRawArgs = decodeHtmlEntities(rawArgs)
676 if (decodedRawArgs !== rawArgs) inputs.push(decodedRawArgs)
677
678 let lastError: unknown = null
679 for (const input of inputs) {
680 try {
681 const parsed = parseObject(input)
682 if (parsed) return { ok: true, args: parsed }
683 return { ok: false, error: "arguments must be a JSON object" }
684 } catch (err) {
685 lastError = err
686 const recovered = extractLeadingJsonObject(input)
687 if (!recovered) continue
688 try {
689 const parsed = parseObject(recovered)
690 if (parsed) return { ok: true, args: parsed }
691 } catch {
692 // no-op; fall through to structured error below
693 }
694 }
695 }
696
697 const message = lastError instanceof Error ? lastError.message : String(lastError)
698 const preview = rawArgs.length > 180 ? `${rawArgs.slice(0, 180)}...` : rawArgs
699 return { ok: false, error: `${message}; raw=${JSON.stringify(preview)}` }
700}
701
702const CONTEXT_SUMMARY_HEADER = "[context summary v1]"
703const CONTEXT_SUMMARY_NOTE =
704 "Compressed notes of older conversation turns. If anything conflicts, trust newer raw messages."
705const CONTEXT_SUMMARY_SEGMENTS_MARKER = "[segments]"
706const CONTEXT_SUMMARY_DELIMITER = "\n\n===\n\n"
707const SUMMARY_LINE_MAX_CHARS = 180
708const SUMMARY_LINE_DEFAULT_EMPTY = "(no text)"
709
710function asRecord(value: unknown): Record<string, unknown> | null {
711 return value && typeof value === "object" ? (value as Record<string, unknown>) : null
712}
713
714function messageRole(message: Message): string {
715 const record = asRecord(message)
716 return typeof record?.role === "string" ? record.role : ""
717}
718
719function messageStringContent(message: Message): string {
720 const record = asRecord(message)
721 const content = record?.content
722 if (typeof content === "string") return content
723 if (!Array.isArray(content)) return ""
724
725 const chunks: string[] = []
726 for (const part of content) {
727 const partRecord = asRecord(part)
728 if (!partRecord) continue
729 if (partRecord.type === "text" && typeof partRecord.text === "string") {
730 chunks.push(partRecord.text)
731 continue
732 }
733 if (partRecord.type === "image_url") chunks.push("[image]")
734 }
735
736 return chunks.join(" ")
737}
738
739function normalizeSummaryText(value: string): string {
740 return value.replace(/\s+/g, " ").trim()
741}
742
743function truncateSummaryText(value: string, maxChars: number): string {
744 if (maxChars <= 0) return ""
745 if (value.length <= maxChars) return value
746 if (maxChars <= 3) return ".".repeat(maxChars)
747 return `${value.slice(0, maxChars - 3).trimEnd()}...`
748}
749
750function assistantToolNames(message: Message): string[] {
751 const record = asRecord(message)
752 const calls = record?.tool_calls
753 if (!Array.isArray(calls)) return []
754
755 const names: string[] = []
756 for (const call of calls) {
757 const callRecord = asRecord(call)
758 const fn = asRecord(callRecord?.function)
759 if (typeof fn?.name === "string" && fn.name.trim()) names.push(fn.name.trim())
760 }
761 return names
762}
763
764function assistantToolCallIds(message: Message): Set<string> {
765 const ids = new Set<string>()
766 const record = asRecord(message)
767 const calls = record?.tool_calls
768 if (!Array.isArray(calls)) return ids
769
770 for (const call of calls) {
771 const callRecord = asRecord(call)
772 if (typeof callRecord?.id === "string" && callRecord.id.trim()) ids.add(callRecord.id.trim())
773 }
774 return ids
775}
776
777function toolCallId(message: Message): string | null {
778 const record = asRecord(message)
779 return typeof record?.tool_call_id === "string" && record.tool_call_id.trim() ? record.tool_call_id.trim() : null
780}
781
782function summarizeMessageLine(message: Message): string {
783 const role = messageRole(message)
784 const content = truncateSummaryText(normalizeSummaryText(messageStringContent(message)), SUMMARY_LINE_MAX_CHARS)
785 const safeContent = content || SUMMARY_LINE_DEFAULT_EMPTY
786
787 if (role === "assistant") {
788 const toolNames = assistantToolNames(message)
789 if (toolNames.length > 0 && content) return `- assistant: ${safeContent} | tools: ${toolNames.join(", ")}`
790 if (toolNames.length > 0) return `- assistant: tools: ${toolNames.join(", ")}`
791 return `- assistant: ${safeContent}`
792 }
793 if (role === "tool") {
794 const id = toolCallId(message) ?? "unknown"
795 return `- tool(${id}): ${safeContent}`
796 }
797 if (role === "user") return `- user: ${safeContent}`
798 if (role === "system") return `- system: ${safeContent}`
799 return `- ${role || "message"}: ${safeContent}`
800}
801
802function buildCompactionSegment(messages: Message[]): string {
803 const lines = messages.map((message) => summarizeMessageLine(message))
804 const summaryLines = lines.length > 0 ? lines.join("\n") : `- ${SUMMARY_LINE_DEFAULT_EMPTY}`
805 return `[${new Date().toISOString()}] compacted ${messages.length} messages\n${summaryLines}`
806}
807
808function buildSummaryMessageContent(segments: string[]): string {
809 const body = segments.join(CONTEXT_SUMMARY_DELIMITER)
810 return `${CONTEXT_SUMMARY_HEADER}\n${CONTEXT_SUMMARY_NOTE}\n${CONTEXT_SUMMARY_SEGMENTS_MARKER}\n${body}`
811}
812
813function parseSummarySegments(content: string): string[] {
814 if (!content.startsWith(CONTEXT_SUMMARY_HEADER)) return []
815
816 const marker = `\n${CONTEXT_SUMMARY_SEGMENTS_MARKER}\n`
817 const markerIndex = content.indexOf(marker)
818 if (markerIndex === -1) return []
819
820 const body = content.slice(markerIndex + marker.length).trim()
821 if (!body) return []
822
823 return body
824 .split(CONTEXT_SUMMARY_DELIMITER)
825 .map((segment) => segment.trim())
826 .filter((segment) => segment.length > 0)
827}
828
829function trimSummarySegments(segments: string[], maxChars: number): string[] {
830 const safeMaxChars = Math.max(1024, maxChars)
831 const next = [...segments]
832
833 while (next.length > 1 && buildSummaryMessageContent(next).length > safeMaxChars) {
834 next.shift()
835 }
836
837 if (next.length === 0) return next
838
839 const current = buildSummaryMessageContent(next)
840 if (current.length <= safeMaxChars) return next
841
842 const fixedPrefix = buildSummaryMessageContent([]).length
843 const available = Math.max(0, safeMaxChars - fixedPrefix)
844 next[0] = truncateSummaryText(next[0]!, available)
845 return next
846}
847
848function countLeadingSystemMessages(messages: Message[]): number {
849 let count = 0
850 while (count < messages.length && messageRole(messages[count]!) === "system") count++
851 return count
852}
853
854function findSummaryMessageIndex(messages: Message[]): number {
855 return messages.findIndex((message) => {
856 const content = messageStringContent(message)
857 return content.startsWith(CONTEXT_SUMMARY_HEADER)
858 })
859}
860
861/**
862 * Very rough tokenizer-agnostic estimate for prompt size guardrails.
863 *
864 * Includes both messages and tool schema to mirror completion request payload.
865 */
866export function estimatePromptTokens(messages: Message[]): number {
867 const jsonChars = JSON.stringify({ messages, tools: TOOLS }).length
868 return Math.ceil(jsonChars / 4)
869}
870
871export type ContextCompactionResult = {
872 compacted: boolean
873 messages: Message[]
874 estimateBefore: number
875 estimateAfter: number
876 messagesRemoved: number
877 chunks: number
878}
879
880function normalizedObservedPromptTokens(value: number | undefined): number | null {
881 if (!Number.isFinite(value)) return null
882 const tokens = Math.ceil(value as number)
883 return tokens > 0 ? tokens : null
884}
885
886/**
887 * Applies rolling context compaction when estimated prompt size exceeds threshold.
888 *
889 * Keeps leading bootstrap system messages and recent raw turns, while replacing
890 * older slices with a durable summary message.
891 */
892export function maybeCompactConversation(messages: Message[], observedPromptTokens?: number): ContextCompactionResult {
893 const estimateBefore = estimatePromptTokens(messages)
894 const observedBefore = normalizedObservedPromptTokens(observedPromptTokens)
895 // Calibrate the rough chars/4 heuristic with real API prompt usage when available.
896 const estimateScale = observedBefore ? Math.max(1, observedBefore / Math.max(1, estimateBefore)) : 1
897 const effectiveBefore = Math.ceil(estimateBefore * estimateScale)
898
899 if (effectiveBefore < CONTEXT_COMPACT_TRIGGER_TOKENS) {
900 return {
901 compacted: false,
902 messages,
903 estimateBefore: effectiveBefore,
904 estimateAfter: effectiveBefore,
905 messagesRemoved: 0,
906 chunks: 0,
907 }
908 }
909
910 const chunkSize = Math.max(1, CONTEXT_COMPACT_CHUNK_MESSAGES)
911 const minRecentMessages = Math.max(1, CONTEXT_COMPACT_RECENT_MESSAGES)
912 let next = [...messages]
913 let summaryIndex = findSummaryMessageIndex(next)
914 let summaryInserted = false
915 let summarySegments: string[] = []
916
917 if (summaryIndex >= 0) {
918 const existingContent = messageStringContent(next[summaryIndex]!)
919 summarySegments = parseSummarySegments(existingContent)
920 } else {
921 const baseLayerEnd = (() => {
922 const leadingSystems = countLeadingSystemMessages(next)
923 if (leadingSystems > 0) return leadingSystems
924 return next.length > 0 ? 1 : 0
925 })()
926 summaryIndex = Math.min(baseLayerEnd, next.length)
927 next.splice(summaryIndex, 0, {
928 role: "user",
929 content: buildSummaryMessageContent([]),
930 })
931 summaryInserted = true
932 }
933
934 let estimateAfter = estimatePromptTokens(next)
935 let effectiveAfter = Math.ceil(estimateAfter * estimateScale)
936 let messagesRemoved = 0
937 let chunks = 0
938
939 while (effectiveAfter > CONTEXT_COMPACT_TARGET_TOKENS) {
940 const compactStart = summaryIndex + 1
941 const protectedTailStart = Math.max(compactStart, next.length - minRecentMessages)
942 if (protectedTailStart <= compactStart) break
943
944 let compactEnd = Math.min(protectedTailStart, compactStart + chunkSize)
945 if (compactEnd <= compactStart) break
946
947 while (compactEnd < protectedTailStart && messageRole(next[compactEnd]!) === "tool") {
948 compactEnd++
949 }
950
951 const unresolvedToolCalls = assistantToolCallIds(next[compactEnd - 1]!)
952 if (unresolvedToolCalls.size > 0) {
953 let scan = compactEnd
954 while (scan < protectedTailStart && messageRole(next[scan]!) === "tool") {
955 const id = toolCallId(next[scan]!)
956 if (id) unresolvedToolCalls.delete(id)
957 scan++
958 if (unresolvedToolCalls.size === 0) break
959 }
960 compactEnd = scan
961 }
962
963 while (compactEnd < protectedTailStart && messageRole(next[compactEnd]!) === "tool") {
964 compactEnd++
965 }
966
967 if (compactEnd <= compactStart) break
968
969 const removed = next.slice(compactStart, compactEnd)
970 if (removed.length === 0) break
971
972 summarySegments.push(buildCompactionSegment(removed))
973 summarySegments = trimSummarySegments(summarySegments, CONTEXT_COMPACT_SUMMARY_MAX_CHARS)
974
975 next[summaryIndex] = {
976 role: "user",
977 content: buildSummaryMessageContent(summarySegments),
978 }
979 next.splice(compactStart, removed.length)
980
981 messagesRemoved += removed.length
982 chunks += 1
983 estimateAfter = estimatePromptTokens(next)
984 effectiveAfter = Math.ceil(estimateAfter * estimateScale)
985 }
986
987 if (messagesRemoved === 0 && summaryInserted) {
988 next.splice(summaryIndex, 1)
989 estimateAfter = estimatePromptTokens(next)
990 effectiveAfter = Math.ceil(estimateAfter * estimateScale)
991 }
992
993 return {
994 compacted: messagesRemoved > 0,
995 messages: next,
996 estimateBefore: effectiveBefore,
997 estimateAfter: effectiveAfter,
998 messagesRemoved,
999 chunks,
1000 }
1001}
1002
1003/**
1004 * Estimates fallback context pressure and guardrails for current messages.
1005 *
1006 * @param messages - Current conversation history used for the next request.
1007 * @returns Token estimate plus soft/hard fallback limits.
1008 */
1009export function fallbackContextWindow(messages: Message[]): {
1010 estimate: number
1011 nearLimit: boolean
1012 skip: boolean
1013 softLimit: number
1014 hardLimit: number
1015} {
1016 const estimate = estimatePromptTokens(messages)
1017
1018 if (!FALLBACK_ENFORCE_CONTEXT_LIMIT) {
1019 return {
1020 estimate,
1021 nearLimit: false,
1022 skip: false,
1023 softLimit: Number.POSITIVE_INFINITY,
1024 hardLimit: Number.POSITIVE_INFINITY,
1025 }
1026 }
1027
1028 // softLimit: where we start warning. hardLimit: where we stop trying fallback at all.
1029 const softLimit = Math.max(0, FALLBACK_N_CTX - FALLBACK_CONTEXT_MARGIN)
1030 const hardLimit = FALLBACK_N_CTX + Math.max(0, FALLBACK_HARD_OVERFLOW_TOKENS)
1031
1032 return {
1033 estimate,
1034 nearLimit: estimate >= softLimit,
1035 skip: estimate >= hardLimit,
1036 softLimit,
1037 hardLimit,
1038 }
1039}