my harness for niri
1import fs from "fs/promises"
2import path from "path"
3import { fileURLToPath } from "url"
4import OpenAI from "openai"
5import { imageRootForModelInput } from "../container/index.js"
6import type { Message } from "../types.js"
7import type { ImageDetail, ToolArgs } from "./types.js"
8
9const PROJECT_ROOT = path.resolve(fileURLToPath(import.meta.url), "../../..")
10const SESSION_FILE = path.join(PROJECT_ROOT, "session.json")
11
12export const TOKEN_NUDGE_THRESHOLD = parseInt(process.env.TOKEN_NUDGE_THRESHOLD ?? "120000")
13export const FALLBACK_TOKEN_NUDGE_THRESHOLD = parseInt(process.env.FALLBACK_TOKEN_NUDGE_THRESHOLD ?? "50000")
14export const CONTEXT_COMPACT_TRIGGER_TOKENS = parseInt(process.env.CONTEXT_COMPACT_TRIGGER_TOKENS ?? "90000")
15
16const NIRI_ENV = (process.env.NIRI_ENV ?? "default").trim().toLowerCase()
17export const USE_FALLBACK = NIRI_ENV === "local"
18
19export const API_BASE = process.env.OPENAI_BASE_URL ?? "https://api.openai.com/v1"
20export const MODEL = process.env.MODEL ?? ""
21export const PRIMARY_PROVIDER_REQUIRES_REASONING_REPLAY =
22 API_BASE.toLowerCase().includes("deepseek") || MODEL.toLowerCase().includes("deepseek")
23const DEFAULT_FALLBACK_BASE = "http://localhost:1234/v1"
24const isLikelyLocalBase = (baseUrl: string): boolean => {
25 const lowered = baseUrl.trim().toLowerCase()
26 return lowered.includes("localhost") || lowered.includes("127.0.0.1")
27}
28const parseBooleanEnv = (value: string | undefined, fallback: boolean): boolean => {
29 if (typeof value !== "string") return fallback
30 const normalized = value.trim().toLowerCase()
31 if (!normalized) return fallback
32 if (normalized === "true" || normalized === "1" || normalized === "yes" || normalized === "on") return true
33 if (normalized === "false" || normalized === "0" || normalized === "no" || normalized === "off") return false
34 return fallback
35}
36
37/** Controls whether model reasoning/thinking is requested and streamed to clients. */
38export const ENABLE_THINKING = parseBooleanEnv(process.env.ENABLE_THINKING, true)
39const parseToolChoiceEnv = (value: string | undefined, fallback: "required" | "auto" | "none"): "required" | "auto" | "none" => {
40 if (typeof value !== "string") return fallback
41 const normalized = value.trim().toLowerCase()
42 if (normalized === "required" || normalized === "auto" || normalized === "none") return normalized
43 return fallback
44}
45
46export const FALLBACK_BASE =
47 process.env.FALLBACK_OPENAI_BASE_URL ?? process.env.OPENROUTER_BASE_URL ?? process.env.LMSTUDIO_BASE_URL ?? DEFAULT_FALLBACK_BASE
48export const FALLBACK_MODEL =
49 process.env.FALLBACK_MODEL ?? process.env.OPENROUTER_MODEL ?? process.env.LMSTUDIO_MODEL ?? "zai-org/glm-4.7-flash"
50export const FALLBACK_PROVIDER_REQUIRES_REASONING_REPLAY =
51 FALLBACK_BASE.toLowerCase().includes("deepseek") || FALLBACK_MODEL.toLowerCase().includes("deepseek")
52export const SUMMARY_BASE =
53 process.env.SUMMARY_OPENAI_BASE_URL ?? process.env.SUMMARY_BASE_URL ?? ""
54export const SUMMARY_MODEL = process.env.SUMMARY_MODEL ?? ""
55export const PRIMARY_TOOL_CHOICE = parseToolChoiceEnv(process.env.PRIMARY_TOOL_CHOICE ?? process.env.TOOL_CHOICE, "auto")
56export const FALLBACK_TOOL_CHOICE = parseToolChoiceEnv(process.env.FALLBACK_TOOL_CHOICE, "auto")
57const FALLBACK_N_CTX = parseInt(process.env.FALLBACK_N_CTX ?? process.env.LMSTUDIO_N_CTX ?? "4096")
58const FALLBACK_CONTEXT_MARGIN = parseInt(process.env.FALLBACK_CONTEXT_MARGIN ?? process.env.LMSTUDIO_CONTEXT_MARGIN ?? "256")
59const FALLBACK_HARD_OVERFLOW_TOKENS = parseInt(
60 process.env.FALLBACK_HARD_OVERFLOW_TOKENS ?? process.env.LMSTUDIO_HARD_OVERFLOW_TOKENS ?? "1024",
61)
62const FALLBACK_ENFORCE_CONTEXT_LIMIT = parseBooleanEnv(
63 process.env.FALLBACK_ENFORCE_CONTEXT_LIMIT,
64 isLikelyLocalBase(FALLBACK_BASE),
65)
66
67const fallbackApiKey =
68 process.env.FALLBACK_OPENAI_API_KEY ??
69 process.env.OPENROUTER_API_KEY ??
70 process.env.LMSTUDIO_API_KEY ??
71 process.env.OPENAI_API_KEY ??
72 (isLikelyLocalBase(FALLBACK_BASE) ? "lm-studio" : "")
73const summaryApiKey =
74 process.env.SUMMARY_OPENAI_API_KEY ??
75 process.env.SUMMARY_API_KEY ??
76 (SUMMARY_BASE === process.env.OPENROUTER_BASE_URL ? process.env.OPENROUTER_API_KEY : undefined) ??
77 (SUMMARY_BASE === process.env.LMSTUDIO_BASE_URL ? process.env.LMSTUDIO_API_KEY : undefined) ??
78 process.env.OPENAI_API_KEY ??
79 (SUMMARY_BASE && isLikelyLocalBase(SUMMARY_BASE) ? "lm-studio" : "")
80const fallbackHeaders: Record<string, string> = {}
81if (process.env.FALLBACK_OPENAI_REFERER) fallbackHeaders["HTTP-Referer"] = process.env.FALLBACK_OPENAI_REFERER
82if (process.env.FALLBACK_OPENAI_TITLE) fallbackHeaders["X-Title"] = process.env.FALLBACK_OPENAI_TITLE
83const summaryHeaders: Record<string, string> = {}
84if (process.env.SUMMARY_OPENAI_REFERER) summaryHeaders["HTTP-Referer"] = process.env.SUMMARY_OPENAI_REFERER
85if (process.env.SUMMARY_OPENAI_TITLE) summaryHeaders["X-Title"] = process.env.SUMMARY_OPENAI_TITLE
86
87if (!USE_FALLBACK && !MODEL) {
88 throw new Error("MODEL is required unless fallback is forced (NIRI_ENV=local).")
89}
90
91if (!USE_FALLBACK && !process.env.OPENAI_API_KEY) {
92 throw new Error("OPENAI_API_KEY is required unless fallback is forced (NIRI_ENV=local).")
93}
94
95if (USE_FALLBACK && !fallbackApiKey) {
96 throw new Error(
97 "Fallback API key is required in local mode. Set FALLBACK_OPENAI_API_KEY (or OPENROUTER_API_KEY / LMSTUDIO_API_KEY).",
98 )
99}
100
101if ((SUMMARY_BASE || SUMMARY_MODEL) && (!SUMMARY_BASE || !SUMMARY_MODEL || !summaryApiKey)) {
102 throw new Error(
103 "Summary provider requires SUMMARY_OPENAI_BASE_URL (or SUMMARY_BASE_URL), SUMMARY_MODEL, and SUMMARY_OPENAI_API_KEY (or SUMMARY_API_KEY).",
104 )
105}
106
107export const client = USE_FALLBACK
108 ? null
109 : new OpenAI({
110 baseURL: API_BASE,
111 apiKey: process.env.OPENAI_API_KEY!,
112 })
113
114export const fallbackClient = new OpenAI({
115 baseURL: FALLBACK_BASE,
116 apiKey: fallbackApiKey || "lm-studio", // Keep LM Studio default when running against localhost.
117 defaultHeaders: Object.keys(fallbackHeaders).length ? fallbackHeaders : undefined,
118})
119
120export const summaryClient =
121 SUMMARY_BASE && SUMMARY_MODEL
122 ? new OpenAI({
123 baseURL: SUMMARY_BASE,
124 apiKey: summaryApiKey,
125 defaultHeaders: Object.keys(summaryHeaders).length ? summaryHeaders : undefined,
126 })
127 : null
128
129console.log(`[config] primary=${MODEL} @ ${API_BASE}`)
130console.log(`[config] fallback=${FALLBACK_MODEL} @ ${FALLBACK_BASE}`)
131if (summaryClient) console.log(`[config] summary=${SUMMARY_MODEL} @ ${SUMMARY_BASE}`)
132console.log(`[config] env=${NIRI_ENV} use_fallback=${USE_FALLBACK}`)
133console.log(`[config] thinking=${ENABLE_THINKING}`)
134
135const IMAGE_ROOT_HINT = imageRootForModelInput()
136
137export const TOOLS: OpenAI.Chat.ChatCompletionTool[] = [
138 {
139 type: "function",
140 function: {
141 name: "shell",
142 description:
143 "Execute a bash command in your Linux environment. Stateful — cd, env vars, etc. persist. Stdin is generally attached to the PTY (more natural behavior), but for obviously interactive commands (REPLs, editors, pagers) we may redirect stdin to /dev/null to avoid accidental hangs. Output is automatically capped (default 150 lines, 40 for known-verbose commands like apt/pip/npm). Pass max_lines to override; use 0 for unlimited. You can also pass timeout_ms (default 30000, max 600000).",
144 parameters: {
145 type: "object",
146 properties: {
147 command: { type: "string" },
148 max_lines: {
149 type: "integer",
150 description:
151 "Maximum lines to return. Defaults to 150 (40 for verbose commands like apt/pip). Use 0 for unlimited.",
152 },
153 timeout_ms: {
154 type: "integer",
155 description: "Execution timeout in milliseconds. Defaults to 30000. Max 600000.",
156 },
157 },
158 required: ["command"],
159 },
160 },
161 },
162 {
163 type: "function",
164 function: {
165 name: "read_file",
166 description:
167 "Read a file from your Linux environment with optional line-range selection. More token-efficient than shell+cat for large files. Returns content with a header showing the line range and total line count. Supports timeout_ms (default 120000, max 600000).",
168 parameters: {
169 type: "object",
170 properties: {
171 path: { type: "string", description: "Absolute or relative path to the file." },
172 start_line: {
173 type: "integer",
174 description: "First line to read (1-indexed). Defaults to 1.",
175 },
176 end_line: {
177 type: "integer",
178 description: "Last line to read (inclusive). Defaults to start_line + 99.",
179 },
180 timeout_ms: {
181 type: "integer",
182 description: "Read timeout in milliseconds. Defaults to 120000. Max 600000.",
183 },
184 },
185 required: ["path"],
186 },
187 },
188 },
189 {
190 type: "function",
191 function: {
192 name: "edit_file",
193 description:
194 "Edit a file by replacing an exact snippet of text. old_text must match exactly once in the file — precise, safe, and no shell-escaping headaches. Use read_file first if you need to confirm the exact text. Supports timeout_ms (default 120000, max 600000).",
195 parameters: {
196 type: "object",
197 properties: {
198 path: { type: "string", description: "Absolute or relative path to the file." },
199 old_text: {
200 type: "string",
201 description: "The exact text to find and replace. Must appear exactly once in the file.",
202 },
203 new_text: {
204 type: "string",
205 description: "Replacement text. May be empty to delete old_text.",
206 },
207 timeout_ms: {
208 type: "integer",
209 description: "Edit timeout in milliseconds. Defaults to 120000. Max 600000.",
210 },
211 },
212 required: ["path", "old_text", "new_text"],
213 },
214 },
215 },
216 {
217 type: "function",
218 function: {
219 name: "memory_search",
220 description:
221 "Search indexed long-term memories from core notes, journal entries, and people files. Useful when you want deliberate recall instead of relying only on passive memory injection.",
222 parameters: {
223 type: "object",
224 properties: {
225 query: {
226 type: "string",
227 description: "What to search for in long-term memory.",
228 },
229 limit: {
230 type: "integer",
231 description: "Maximum results to return (default 5, max 10).",
232 },
233 },
234 required: ["query"],
235 },
236 },
237 },
238 {
239 type: "function",
240 function: {
241 name: "memory_alias",
242 description:
243 "Manage handle aliases used for memory recall. When you see someone using a Discord/Bluesky handle that you recognize as an existing person in memory, set an alias so future messages from that handle pull the right people/core memories. Example: set @meowskullz = ana so DMs from meowskullz recall ana's people file.",
244 parameters: {
245 type: "object",
246 properties: {
247 action: {
248 type: "string",
249 enum: ["set", "remove", "list"],
250 description: "set links a handle to a canonical name; remove unlinks; list returns all current aliases.",
251 },
252 handle: {
253 type: "string",
254 description: "The handle to alias, e.g. \"meowskullz\" or \"@meowskullz\". Required for set/remove.",
255 },
256 canonical: {
257 type: "string",
258 description: "The canonical name the handle maps to, e.g. \"ana\". Required for set; optional for remove (omit to clear all aliases for the handle).",
259 },
260 },
261 required: ["action"],
262 },
263 },
264 },
265 {
266 type: "function",
267 function: {
268 name: "image_tool",
269 description:
270 `Attach an image from ${IMAGE_ROOT_HINT} so it is injected as a multimodal user message on the next model turn. Use this after creating/downloading an image with shell.`,
271 parameters: {
272 type: "object",
273 properties: {
274 path: {
275 type: "string",
276 description: `Absolute image path inside ${IMAGE_ROOT_HINT} (for example ${IMAGE_ROOT_HINT}/screenshot.png).`,
277 },
278 note: {
279 type: "string",
280 description: "Optional text instruction to accompany the image for the next turn.",
281 },
282 detail: {
283 type: "string",
284 enum: ["auto", "low", "high"],
285 description: "Vision detail level for the next turn image input.",
286 },
287 timeout_ms: {
288 type: "integer",
289 description: "Read timeout in milliseconds. Defaults to 120000. Max 600000.",
290 },
291 },
292 required: ["path"],
293 },
294 },
295 },
296 {
297 type: "function",
298 function: {
299 name: "discord_scan",
300 description:
301 "Scan configured Discord channels and ingest messages into the local Discord inbox database. Uses DISCORD_SCAN_CHANNEL_IDS by default; pass channel_ids to override.",
302 parameters: {
303 type: "object",
304 properties: {
305 limit: {
306 type: "integer",
307 description: "Per-channel message fetch limit (default 50, max 100).",
308 },
309 channel_ids: {
310 type: "array",
311 items: { type: "string" },
312 description: "Optional channel id list to scan instead of DISCORD_SCAN_CHANNEL_IDS.",
313 },
314 before_message_id: {
315 type: "string",
316 description: "Optional message id cursor for older backfill scans.",
317 },
318 },
319 },
320 },
321 },
322 {
323 type: "function",
324 function: {
325 name: "discord_inbox",
326 description:
327 "List Discord inbox items tracked in local state. Default status filter is pending; optionally include seen/acted/ignored.",
328 parameters: {
329 type: "object",
330 properties: {
331 limit: {
332 type: "integer",
333 description: "Maximum rows to return (default 20, max 200).",
334 },
335 status: {
336 type: "string",
337 description: "Comma-separated statuses: pending,seen,acted,ignored. Defaults to pending.",
338 },
339 },
340 },
341 },
342 },
343 {
344 type: "function",
345 function: {
346 name: "discord_backread",
347 description:
348 "Read stored Discord message history for a channel from local state, newest first.",
349 parameters: {
350 type: "object",
351 properties: {
352 channel_id: { type: "string", description: "Discord channel id." },
353 limit: {
354 type: "integer",
355 description: "Maximum rows to return (default 40, max 200).",
356 },
357 before_message_id: {
358 type: "string",
359 description: "Optional cursor message id to fetch older rows.",
360 },
361 },
362 required: ["channel_id"],
363 },
364 },
365 },
366 {
367 type: "function",
368 function: {
369 name: "discord_mark",
370 description:
371 "Set decision state for a Discord inbox item so future scans remember handled/ignored choices.",
372 parameters: {
373 type: "object",
374 properties: {
375 item_id: { type: "string", description: "Inbox item id (usually message id)." },
376 status: {
377 type: "string",
378 enum: ["pending", "seen", "acted", "ignored"],
379 },
380 action: {
381 type: "string",
382 enum: ["none", "replied", "messaged", "dismissed", "noted"],
383 },
384 note: {
385 type: "string",
386 description: "Optional decision note.",
387 },
388 },
389 required: ["item_id", "status"],
390 },
391 },
392 },
393 {
394 type: "function",
395 function: {
396 name: "discord_send",
397 description:
398 "Send a Discord message. reply_mode=auto sends plain unless conversation continuity is ambiguous, then it uses an explicit reply reference.",
399 parameters: {
400 type: "object",
401 properties: {
402 channel_id: { type: "string", description: "Target channel id." },
403 content: { type: "string", description: "Message content to send." },
404 source_item_id: {
405 type: "string",
406 description: "Optional inbox item id to mark as acted after sending.",
407 },
408 reference_message: {
409 type: "string",
410 description: "Optional specific message to treat as reply target. Provide message content, username (for their latest message), or message id",
411 },
412 reply_mode: {
413 type: "string",
414 enum: ["auto", "plain", "explicit"],
415 description: "Reply behavior policy (default auto).",
416 },
417 },
418 required: ["content"],
419 },
420 },
421 },
422 {
423 type: "function",
424 function: {
425 name: "discord_channels",
426 description:
427 "List configured Discord channels and DM channels with stored interactions, including id-to-name mapping, guild context, and optional channel notes.",
428 parameters: {
429 type: "object",
430 properties: {},
431 },
432 },
433 },
434 {
435 type: "function",
436 function: {
437 name: "discord_channel_note",
438 description:
439 "Set or clear a persistent note for a Discord channel id. Pass empty note to clear.",
440 parameters: {
441 type: "object",
442 properties: {
443 channel_id: { type: "string", description: "Discord channel id to annotate." },
444 note: { type: "string", description: "Channel-specific note text. Empty string clears it." },
445 },
446 required: ["channel_id", "note"],
447 },
448 },
449 },
450 {
451 type: "function",
452 function: {
453 name: "wait_then_continue",
454 description:
455 "Wait for a short delay, then continue to another assistant turn without waiting for a new external event. Use this after a timeout or recoverable tool error when you still want to keep working. Accepts timeout_ms (default 10000, max 600000).",
456 parameters: {
457 type: "object",
458 properties: {
459 timeout_ms: {
460 type: "integer",
461 description: "Delay before continuing in milliseconds. Defaults to 10000. Max 600000.",
462 },
463 },
464 },
465 },
466 },
467 {
468 type: "function",
469 function: {
470 name: "rest",
471 description: "Go to sleep and end this session. Call this when you're truly done for now — conversation context will be cleared.",
472 parameters: {
473 type: "object",
474 properties: {
475 note: {
476 type: "string",
477 description: "Optional note to yourself about where you left off.",
478 },
479 },
480 },
481 },
482 },
483]
484
485/**
486 * Persists the current message array as the resumable session snapshot.
487 *
488 * @param messages - Conversation messages to serialize.
489 */
490export async function saveSession(messages: Message[]): Promise<void> {
491 await fs.writeFile(SESSION_FILE, JSON.stringify(messages), { encoding: "utf-8", mode: 0o666 })
492}
493
494/**
495 * Deletes the persisted session snapshot if it exists.
496 */
497export async function clearSession(): Promise<void> {
498 await fs.unlink(SESSION_FILE).catch(() => {})
499}
500
501function normalizeReasoningReplay(msgs: Message[]): Message[] {
502 if (!ENABLE_THINKING) return msgs
503 const needsReplayNormalization =
504 PRIMARY_PROVIDER_REQUIRES_REASONING_REPLAY ||
505 FALLBACK_PROVIDER_REQUIRES_REASONING_REPLAY ||
506 msgs.some(
507 (msg) =>
508 msg.role === "assistant" &&
509 typeof (msg as OpenAI.Chat.ChatCompletionMessage & { reasoning_content?: string }).reasoning_content === "string",
510 )
511 if (!needsReplayNormalization) return msgs
512
513 let changed = false
514 const normalized = msgs.map((msg) => {
515 if (msg.role !== "assistant") return msg
516
517 const assistant = msg as OpenAI.Chat.ChatCompletionMessage & { reasoning_content?: string }
518 if (typeof assistant.reasoning_content === "string") return msg
519
520 changed = true
521 return {
522 ...assistant,
523 reasoning_content: "",
524 }
525 })
526
527 if (changed) {
528 console.log("[runner] backfilled empty reasoning_content on assistant history for provider compatibility")
529 }
530
531 return normalized
532}
533
534/** Move mis-ordered tool responses back into place and synthesize missing ones. */
535export function sanitizeMessages(msgs: Message[]): Message[] {
536 msgs = normalizeReasoningReplay(msgs)
537 let i = 0
538 while (i < msgs.length) {
539 const msg = msgs[i]
540 if (msg.role === "assistant" && Array.isArray((msg as OpenAI.Chat.ChatCompletionMessage).tool_calls)) {
541 const toolCalls = (msg as OpenAI.Chat.ChatCompletionMessage).tool_calls!
542 const expectedIds = toolCalls.map((tc) => tc.id).filter((id): id is string => typeof id === "string" && id.trim().length > 0)
543 const needed = new Set(expectedIds)
544 let j = i + 1
545 // Skip tool messages that are already in place
546 while (j < msgs.length && msgs[j].role === "tool" && needed.has((msgs[j] as OpenAI.Chat.ChatCompletionToolMessageParam).tool_call_id)) {
547 needed.delete((msgs[j] as OpenAI.Chat.ChatCompletionToolMessageParam).tool_call_id)
548 j++
549 }
550 if (needed.size > 0) {
551 // Collect stray tool responses and non-tool messages from the rest of the array.
552 const toolResponses = new Map<string, Message>()
553 const others: Message[] = []
554 for (let k = j; k < msgs.length; k++) {
555 const m = msgs[k]
556 const id = m.role === "tool" ? (m as OpenAI.Chat.ChatCompletionToolMessageParam).tool_call_id : undefined
557 if (typeof id === "string" && needed.has(id)) {
558 toolResponses.set(id, m)
559 needed.delete(id)
560 } else {
561 others.push(m)
562 }
563 }
564
565 const inserted: Message[] = []
566 let synthesized = 0
567 for (const id of expectedIds) {
568 if (!toolResponses.has(id)) {
569 if (msgs.slice(i + 1, j).some((m) => m.role === "tool" && (m as OpenAI.Chat.ChatCompletionToolMessageParam).tool_call_id === id)) {
570 continue
571 }
572 inserted.push({
573 role: "tool",
574 tool_call_id: id,
575 content: "error: missing tool response recovered by runner before API request.",
576 })
577 synthesized++
578 continue
579 }
580 inserted.push(toolResponses.get(id)!)
581 }
582
583 if (inserted.length > 0) {
584 msgs = [...msgs.slice(0, j), ...inserted, ...others]
585 console.log(
586 synthesized > 0
587 ? `[runner] repaired tool_calls at message ${i}; synthesized ${synthesized} missing tool response(s)`
588 : `[runner] repaired orphaned tool_calls at message ${i}`,
589 )
590 }
591 }
592 }
593 // Ensure assistant messages always have content or tool_calls (providers reject null+empty)
594 if (msg.role === "assistant") {
595 const aMsg = msg as OpenAI.Chat.ChatCompletionMessage
596 if ((aMsg.content === null || aMsg.content === undefined) && (!aMsg.tool_calls || aMsg.tool_calls.length === 0)) {
597 aMsg.content = ""
598 }
599 }
600
601 i++
602 }
603 return msgs
604}
605
606/**
607 * Loads and sanitizes the persisted session snapshot.
608 *
609 * @returns The recovered message list, or `null` when no session exists.
610 */
611export async function loadSession(): Promise<Message[] | null> {
612 try {
613 const raw = await fs.readFile(SESSION_FILE, "utf-8")
614 let msgs = JSON.parse(raw) as Message[]
615 msgs = sanitizeMessages(msgs)
616 console.log(`[runner] found saved session (${msgs.length} messages)`)
617 return msgs
618 } catch {
619 return null
620 }
621}
622
623/**
624 * Determines whether an error should trigger fallback model routing.
625 *
626 * @param err - Error thrown by the primary API call.
627 * @returns `true` when fallback should be attempted.
628 */
629export function shouldFallback(err: unknown): boolean {
630 if (err instanceof OpenAI.APIError) {
631 // 429 + 5xx = overloaded or down; 0/undefined = network-level failure
632 if (!err.status || err.status === 429 || err.status >= 500) return true
633 return false
634 }
635 return isTransientTransportError(err)
636}
637
638function errorCauseChainText(err: unknown): string {
639 const parts: string[] = []
640 let current: unknown = err
641
642 for (let depth = 0; depth < 4 && current instanceof Error; depth++) {
643 parts.push(current.name, current.message)
644 const withMetadata = current as Error & { code?: unknown; cause?: unknown }
645 if (typeof withMetadata.code === "string") parts.push(withMetadata.code)
646 current = withMetadata.cause
647 }
648
649 return parts.join("\n")
650}
651
652/**
653 * Detects retryable network/stream failures thrown below the OpenAI SDK.
654 */
655export function isTransientTransportError(err: unknown): boolean {
656 if (!(err instanceof Error)) return false
657
658 const text = errorCauseChainText(err)
659 return /ECONNREFUSED|ENOTFOUND|ETIMEDOUT|ECONNRESET|EPIPE|UND_ERR|fetch failed|terminated|socket hang up|other side closed|aborted/i.test(
660 text,
661 )
662}
663
664const PROMPT_TOO_LARGE_PHRASES = [
665 "prompt exceeds max length",
666 "prompt is too long",
667 "context length",
668 "maximum context",
669 "context_length_exceeded",
670 "too many tokens",
671 "reduce the length",
672 "prompt length",
673 "input length",
674 "too long for",
675 "request too large",
676]
677
678const PROMPT_TOO_LARGE_CODES = new Set(["context_length_exceeded", "1261", "string_above_max_length"])
679
680/**
681 * Detects prompt-length-exceeded errors across OpenAI-compatible providers.
682 *
683 * @param err - API error from a chat completions request.
684 * @returns `true` when the provider rejected the prompt as too large.
685 */
686export function isPromptTooLargeError(err: unknown): boolean {
687 if (!(err instanceof OpenAI.APIError)) return false
688 if (err.status !== 400 && err.status !== 413) return false
689
690 const errorRecord = err as unknown as { code?: unknown; error?: { code?: unknown; type?: unknown } }
691 const rootCode = typeof errorRecord.code === "string" ? errorRecord.code.toLowerCase() : ""
692 const innerCode = typeof errorRecord.error?.code === "string" ? (errorRecord.error.code as string).toLowerCase() : ""
693 if (rootCode && PROMPT_TOO_LARGE_CODES.has(rootCode)) return true
694 if (innerCode && PROMPT_TOO_LARGE_CODES.has(innerCode)) return true
695
696 const message = (err.message || "").toLowerCase()
697 return PROMPT_TOO_LARGE_PHRASES.some((phrase) => message.includes(phrase))
698}
699
700/**
701 * Produces a concise, log-friendly error summary.
702 *
703 * @param err - Any thrown error-like value.
704 * @returns A compact human-readable error string.
705 */
706export function errorSummary(err: unknown): string {
707 if (err instanceof OpenAI.APIError) return `${err.status} ${err.message}`
708 if (err instanceof Error) return err.message
709 return String(err)
710}
711
712const API_ERROR_DETAIL_MAX_CHARS = 4000
713
714function truncateForLog(value: string): string {
715 if (value.length <= API_ERROR_DETAIL_MAX_CHARS) return value
716 return `${value.slice(0, API_ERROR_DETAIL_MAX_CHARS)}... [truncated ${value.length - API_ERROR_DETAIL_MAX_CHARS} chars]`
717}
718
719function stringifyForLog(value: unknown): string {
720 if (typeof value === "string") return truncateForLog(value)
721 try {
722 return truncateForLog(JSON.stringify(value))
723 } catch {
724 return truncateForLog(String(value))
725 }
726}
727
728function apiErrorRawMetadata(error: unknown): unknown {
729 if (!error || typeof error !== "object") return undefined
730 const metadata = (error as { metadata?: unknown }).metadata
731 if (!metadata || typeof metadata !== "object") return undefined
732 return (metadata as { raw?: unknown }).raw
733}
734
735/**
736 * Produces detailed API error lines for provider-specific diagnostics.
737 *
738 * Some OpenAI-compatible providers wrap the real upstream failure in
739 * `error.metadata.raw`; include it explicitly so the root cause appears in logs.
740 */
741export function apiErrorDetails(err: unknown): string[] {
742 if (!(err instanceof OpenAI.APIError)) return []
743
744 const details = [
745 `status=${err.status ?? "unknown"}`,
746 `message=${err.message}`,
747 ]
748 if (err.code) details.push(`code=${err.code}`)
749 if (err.type) details.push(`type=${err.type}`)
750 if (err.param) details.push(`param=${err.param}`)
751 if (err.requestID) details.push(`request_id=${err.requestID}`)
752
753 const lines = [`[api] error details: ${details.join(" ")}`]
754
755 if (err.error !== undefined) {
756 lines.push(`[api] error body: ${stringifyForLog(err.error)}`)
757 }
758
759 const raw = apiErrorRawMetadata(err.error)
760 if (raw !== undefined) {
761 lines.push(`[api] provider raw: ${stringifyForLog(raw)}`)
762 }
763
764 return lines
765}
766
767function parseRetryAfterHeaderMs(value: string): number | null {
768 const asNumber = Number(value)
769 if (Number.isFinite(asNumber) && asNumber >= 0) return asNumber * 1000
770
771 const asDate = Date.parse(value)
772 if (Number.isFinite(asDate)) {
773 const delta = asDate - Date.now()
774 if (delta > 0) return delta
775 }
776
777 return null
778}
779
780function parseResetTimestampMs(message: string): number | null {
781 const resetAtMatch = message.match(/reset at\s+(\d{4}-\d{2}-\d{2})[ t](\d{2}:\d{2}:\d{2})/i)
782 if (!resetAtMatch) return null
783
784 const dateParts = resetAtMatch[1].split("-").map((part) => Number(part))
785 const timeParts = resetAtMatch[2].split(":").map((part) => Number(part))
786 if (dateParts.length !== 3 || timeParts.length !== 3) return null
787
788 const [year, month, day] = dateParts
789 const [hour, minute, second] = timeParts
790 const values = [year, month, day, hour, minute, second]
791 if (values.some((value) => !Number.isFinite(value))) return null
792
793 // z.ai returns "reset at YYYY-MM-DD HH:mm:ss" in China Standard Time (UTC+8).
794 // Convert that wall-clock value to UTC before calculating backoff.
795 const chinaOffsetHours = 8
796 const resetAtUtc = Date.UTC(year, month - 1, day, hour - chinaOffsetHours, minute, second)
797 if (!Number.isFinite(resetAtUtc)) return null
798
799 const delta = resetAtUtc - Date.now()
800 if (delta <= 0) return null
801 return delta
802}
803
804/**
805 * Computes retry backoff milliseconds from API error metadata/content.
806 *
807 * @param err - Error returned by the API layer.
808 * @returns Delay in milliseconds before retrying primary model calls.
809 */
810export function retryDelayMs(err: unknown): number {
811 const defaultMs = 60_000
812 if (!(err instanceof OpenAI.APIError)) return defaultMs
813
814 const retryAfterHeader = err.headers?.["retry-after"]
815 if (retryAfterHeader) {
816 const parsed = parseRetryAfterHeaderMs(retryAfterHeader)
817 if (parsed != null) return parsed
818 }
819
820 const resetAt = parseResetTimestampMs(err.message)
821 if (resetAt != null) return resetAt
822
823 const forHours = err.message.match(/for\s+(\d+)\s*hour/i)
824 if (forHours) {
825 const hours = Number(forHours[1])
826 if (Number.isFinite(hours) && hours > 0) return hours * 60 * 60 * 1000
827 }
828
829 return defaultMs
830}
831
832/**
833 * Coerces arbitrary values into a supported image detail level.
834 *
835 * @param value - Raw user/model-provided detail value.
836 * @returns A valid image detail enum (`auto` by default).
837 */
838export function parseImageDetail(value: unknown): ImageDetail {
839 if (value === "low" || value === "high" || value === "auto") return value
840 return "auto"
841}
842
843function extractLeadingJsonObject(raw: string): string | null {
844 const start = raw.indexOf("{")
845 if (start === -1) return null
846
847 let depth = 0
848 let inString = false
849 let escaped = false
850
851 for (let i = start; i < raw.length; i++) {
852 const ch = raw[i]
853
854 if (inString) {
855 if (escaped) {
856 escaped = false
857 } else if (ch === "\\") {
858 escaped = true
859 } else if (ch === '"') {
860 inString = false
861 }
862 continue
863 }
864
865 if (ch === '"') {
866 inString = true
867 continue
868 }
869
870 if (ch === "{") {
871 depth++
872 continue
873 }
874
875 if (ch === "}") {
876 depth--
877 if (depth === 0) {
878 return raw.slice(start, i + 1)
879 }
880 continue
881 }
882 }
883
884 return null
885}
886
887function decodeHtmlEntities(input: string): string {
888 if (!input.includes("&")) return input
889
890 return input.replace(/&(gt|lt|amp|quot|#39|#x27|#x2f);/gi, (entity, key: string) => {
891 switch (key.toLowerCase()) {
892 case "gt":
893 return ">"
894 case "lt":
895 return "<"
896 case "amp":
897 return "&"
898 case "quot":
899 return '"'
900 case "#39":
901 case "#x27":
902 return "'"
903 case "#x2f":
904 return "/"
905 default:
906 return entity
907 }
908 })
909}
910
911function decodeHtmlEntitiesDeep<T>(value: T): T {
912 if (typeof value === "string") return decodeHtmlEntities(value) as T
913 if (Array.isArray(value)) return value.map((item) => decodeHtmlEntitiesDeep(item)) as T
914 if (!value || typeof value !== "object") return value
915
916 const entries = Object.entries(value as Record<string, unknown>).map(([key, entryValue]) => [key, decodeHtmlEntitiesDeep(entryValue)])
917 return Object.fromEntries(entries) as T
918}
919
920/**
921 * Parses tool arguments and applies robustness fixes for malformed model output.
922 *
923 * @param rawArgs - Raw `tool_call.function.arguments` value.
924 * @returns Parsed argument object or a structured parse error.
925 */
926export function parseToolArguments(rawArgs: unknown): { ok: true; args: ToolArgs } | { ok: false; error: string } {
927 if (typeof rawArgs !== "string") {
928 return { ok: false, error: `arguments must be a JSON string, got ${typeof rawArgs}` }
929 }
930
931 const parseObject = (input: string): ToolArgs | null => {
932 const parsed = JSON.parse(input)
933 if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) return null
934 return decodeHtmlEntitiesDeep(parsed as ToolArgs)
935 }
936
937 const inputs = [rawArgs]
938 const decodedRawArgs = decodeHtmlEntities(rawArgs)
939 if (decodedRawArgs !== rawArgs) inputs.push(decodedRawArgs)
940
941 let lastError: unknown = null
942 for (const input of inputs) {
943 try {
944 const parsed = parseObject(input)
945 if (parsed) return { ok: true, args: parsed }
946 return { ok: false, error: "arguments must be a JSON object" }
947 } catch (err) {
948 lastError = err
949 const recovered = extractLeadingJsonObject(input)
950 if (!recovered) continue
951 try {
952 const parsed = parseObject(recovered)
953 if (parsed) return { ok: true, args: parsed }
954 } catch {
955 // no-op; fall through to structured error below
956 }
957 }
958 }
959
960 const message = lastError instanceof Error ? lastError.message : String(lastError)
961 const preview = rawArgs.length > 180 ? `${rawArgs.slice(0, 180)}...` : rawArgs
962 return { ok: false, error: `${message}; raw=${JSON.stringify(preview)}` }
963}
964
965const CONTEXT_SUMMARY_HEADER = "[context summary v1]"
966const CONTEXT_SUMMARY_NOTE =
967 "Compressed notes of older conversation turns. If anything conflicts, trust newer raw messages."
968const CONTEXT_SUMMARY_SEGMENTS_MARKER = "[segments]"
969const SUMMARY_LINE_MAX_CHARS = 320
970const SUMMARY_LINE_DEFAULT_EMPTY = "(no text)"
971const TOOL_ACK_RESULT = "(ok)"
972const WAIT_TOOL_RESULT = "Waiting for next event."
973
974function asRecord(value: unknown): Record<string, unknown> | null {
975 return value && typeof value === "object" ? (value as Record<string, unknown>) : null
976}
977
978function messageRole(message: Message): string {
979 const record = asRecord(message)
980 return typeof record?.role === "string" ? record.role : ""
981}
982
983function messageStringContent(message: Message): string {
984 const record = asRecord(message)
985 const content = record?.content
986 if (typeof content === "string") return content
987 if (!Array.isArray(content)) return ""
988
989 const chunks: string[] = []
990 for (const part of content) {
991 const partRecord = asRecord(part)
992 if (!partRecord) continue
993 if (partRecord.type === "text" && typeof partRecord.text === "string") {
994 chunks.push(partRecord.text)
995 continue
996 }
997 if (partRecord.type === "image_url") chunks.push("[image]")
998 }
999
1000 return chunks.join(" ")
1001}
1002
1003function normalizeSummaryText(value: string): string {
1004 return value.replace(/\s+/g, " ").trim()
1005}
1006
1007function truncateSummaryText(value: string, maxChars: number): string {
1008 if (maxChars <= 0) return ""
1009 if (value.length <= maxChars) return value
1010 if (maxChars <= 3) return ".".repeat(maxChars)
1011 return `${value.slice(0, maxChars - 3).trimEnd()}...`
1012}
1013
1014function assistantToolCalls(message: Message): { name: string; args: Record<string, unknown> }[] {
1015 const record = asRecord(message)
1016 const calls = record?.tool_calls
1017 if (!Array.isArray(calls)) return []
1018
1019 const out: { name: string; args: Record<string, unknown> }[] = []
1020 for (const call of calls) {
1021 const callRecord = asRecord(call)
1022 const fn = asRecord(callRecord?.function)
1023 const name = typeof fn?.name === "string" ? fn.name.trim() : ""
1024 if (!name) continue
1025 let args: Record<string, unknown> = {}
1026 const rawArgs = fn?.arguments
1027 if (typeof rawArgs === "string" && rawArgs.trim()) {
1028 try {
1029 const parsed = JSON.parse(rawArgs)
1030 if (parsed && typeof parsed === "object") args = parsed as Record<string, unknown>
1031 } catch {
1032 // ignore malformed arg json
1033 }
1034 } else if (rawArgs && typeof rawArgs === "object") {
1035 args = rawArgs as Record<string, unknown>
1036 }
1037 out.push({ name, args })
1038 }
1039 return out
1040}
1041
1042function describeToolCall(call: { name: string; args: Record<string, unknown> }): string | null {
1043 const { name, args } = call
1044 if (name === "wait") return null
1045 if (name === "discord_send") {
1046 const content = typeof args.content === "string" ? args.content : ""
1047 const channelId = typeof args.channel_id === "string" ? args.channel_id : ""
1048 const channelTag = channelId ? `ch/${channelId.slice(-6)}` : "ch?"
1049 if (!content) return `discord_send -> ${channelTag}`
1050 return `discord_send -> ${channelTag}: ${normalizeSummaryText(content)}`
1051 }
1052 if (name === "discord_mark") {
1053 const itemId = typeof args.item_id === "string" ? args.item_id : ""
1054 const action = typeof args.action === "string" ? args.action : ""
1055 return `discord_mark ${action || "?"} ${itemId}`.trim()
1056 }
1057 if (name === "shell") {
1058 const cmd = typeof args.command === "string" ? args.command : ""
1059 return cmd ? `shell: ${normalizeSummaryText(cmd)}` : "shell"
1060 }
1061 if (name === "image_tool") {
1062 const p = typeof args.path === "string" ? args.path : ""
1063 return p ? `image_tool ${p}` : "image_tool"
1064 }
1065 if (name === "discord_backread" || name === "discord_inbox" || name === "discord_channels") {
1066 const channelId = typeof args.channel_id === "string" ? args.channel_id : ""
1067 return channelId ? `${name} ch/${channelId.slice(-6)}` : name
1068 }
1069 // Fallback: compact arg snippet
1070 const argKeys = Object.keys(args)
1071 if (argKeys.length === 0) return name
1072 const snippet = argKeys
1073 .slice(0, 3)
1074 .map((k) => `${k}=${truncateSummaryText(normalizeSummaryText(String(args[k] ?? "")), 40)}`)
1075 .join(" ")
1076 return `${name} ${snippet}`.trim()
1077}
1078
1079const DISCORD_BATCH_SKIP_PREFIXES = [
1080 "[discord batch]",
1081 "new_messages=",
1082 "auto_seen_timeout=",
1083 "channel_flag_repairs=",
1084 "channel messages are context",
1085 "you can reply if useful",
1086 "pending preview:",
1087]
1088
1089function compactDiscordBatch(content: string): string {
1090 const lines = content.split("\n")
1091 const kept: string[] = []
1092 let inPendingPreview = false
1093 for (const rawLine of lines) {
1094 const line = rawLine.trim()
1095 if (!line) continue
1096 if (line === "pending preview:") {
1097 inPendingPreview = true
1098 continue
1099 }
1100 if (inPendingPreview) {
1101 // pending preview block continues until we hit a non-bullet line
1102 if (line.startsWith("- ")) continue
1103 inPendingPreview = false
1104 }
1105 if (DISCORD_BATCH_SKIP_PREFIXES.some((p) => line.startsWith(p))) continue
1106 kept.push(line)
1107 }
1108 return kept.join(" ")
1109}
1110
1111function compactToolResult(content: string): string | null {
1112 const trimmed = content.trim()
1113 if (!trimmed) return null
1114 if (trimmed === WAIT_TOOL_RESULT) return null
1115 // Compact discord_send / discord_mark ok JSON to a short ack
1116 if (trimmed.startsWith("{")) {
1117 try {
1118 const parsed = JSON.parse(trimmed)
1119 if (parsed && typeof parsed === "object") {
1120 const rec = parsed as Record<string, unknown>
1121 if (rec.ok === true) {
1122 const sentId = typeof rec.sent_message_id === "string" ? rec.sent_message_id : null
1123 if (sentId) return `${TOOL_ACK_RESULT} sent ${sentId.slice(-6)}`
1124 const itemId = typeof rec.item_id === "string" ? rec.item_id : null
1125 if (itemId) return `${TOOL_ACK_RESULT} ${itemId.slice(-6)}`
1126 return TOOL_ACK_RESULT
1127 }
1128 if (rec.ok === false || typeof rec.error === "string") {
1129 const err = typeof rec.error === "string" ? rec.error : "error"
1130 return `error: ${err}`
1131 }
1132 }
1133 } catch {
1134 // fall through to default handling
1135 }
1136 }
1137 return normalizeSummaryText(trimmed)
1138}
1139
1140function summarizeMessageLine(message: Message): string | null {
1141 const role = messageRole(message)
1142 const rawContent = messageStringContent(message)
1143
1144 if (role === "assistant") {
1145 const calls = assistantToolCalls(message)
1146 const callDescs = calls.map(describeToolCall).filter((d): d is string => d !== null)
1147 const text = normalizeSummaryText(rawContent)
1148 // Drop pure wait-only assistant turns (no text, only filtered out wait calls)
1149 if (!text && callDescs.length === 0) return null
1150 const parts: string[] = []
1151 if (text) parts.push(text)
1152 if (callDescs.length > 0) parts.push(`[${callDescs.join(" | ")}]`)
1153 return `- assistant: ${truncateSummaryText(parts.join(" "), SUMMARY_LINE_MAX_CHARS)}`
1154 }
1155
1156 if (role === "tool") {
1157 const compact = compactToolResult(rawContent)
1158 if (compact === null) return null
1159 return `- tool: ${truncateSummaryText(compact, SUMMARY_LINE_MAX_CHARS)}`
1160 }
1161
1162 if (role === "user") {
1163 const stripped = rawContent.startsWith("[incoming — discord]")
1164 ? compactDiscordBatch(rawContent)
1165 : normalizeSummaryText(rawContent)
1166 const safe = stripped || SUMMARY_LINE_DEFAULT_EMPTY
1167 return `- user: ${truncateSummaryText(safe, SUMMARY_LINE_MAX_CHARS)}`
1168 }
1169
1170 if (role === "system") {
1171 const text = truncateSummaryText(normalizeSummaryText(rawContent), SUMMARY_LINE_MAX_CHARS) || SUMMARY_LINE_DEFAULT_EMPTY
1172 return `- system: ${text}`
1173 }
1174
1175 const text = truncateSummaryText(normalizeSummaryText(rawContent), SUMMARY_LINE_MAX_CHARS) || SUMMARY_LINE_DEFAULT_EMPTY
1176 return `- ${role || "message"}: ${text}`
1177}
1178
1179function countLeadingSystemMessages(messages: Message[]): number {
1180 let count = 0
1181 while (count < messages.length && messageRole(messages[count]!) === "system") count++
1182 return count
1183}
1184
1185export function findSummaryMessageIndex(messages: Message[]): number {
1186 return messages.findIndex((message) => {
1187 const content = messageStringContent(message)
1188 return content.startsWith(CONTEXT_SUMMARY_HEADER)
1189 })
1190}
1191
1192/**
1193 * Very rough tokenizer-agnostic estimate for prompt size guardrails.
1194 *
1195 * Includes both messages and tool schema to mirror completion request payload.
1196 */
1197export function estimatePromptTokens(messages: Message[]): number {
1198 const jsonChars = JSON.stringify({ messages, tools: TOOLS }).length
1199 return Math.ceil(jsonChars / 4)
1200}
1201
1202function findSafeTailStart(messages: Message[], desired: number): number {
1203 let start = Math.max(0, messages.length - desired)
1204 // If the tail would start with an orphaned tool response, back up to its caller.
1205 while (start > 0 && messageRole(messages[start]!) === "tool") start--
1206 return start
1207}
1208
1209/**
1210 * Calls the provider to produce a tight LLM-generated summary of the middle of
1211 * the conversation, returning a new message list or null when summarization
1212 * isn't applicable / failed.
1213 */
1214export async function summarizeConversationViaLLM(
1215 messages: Message[],
1216 summaryClient: OpenAI,
1217 summaryModel: string,
1218 options: { recentKeep?: number; maxTranscriptChars?: number } = {},
1219): Promise<Message[] | null> {
1220 const recentKeep = Math.max(4, options.recentKeep ?? 12)
1221 const maxTranscriptChars = Math.max(2_000, options.maxTranscriptChars ?? 40_000)
1222
1223 const leadingSystems = countLeadingSystemMessages(messages)
1224 const tailStart = findSafeTailStart(messages, recentKeep)
1225 const middleEnd = tailStart
1226 if (middleEnd <= leadingSystems) return null
1227
1228 const head = messages.slice(0, leadingSystems)
1229 const middle = messages.slice(leadingSystems, middleEnd)
1230 const tail = messages.slice(tailStart)
1231 if (middle.length === 0) return null
1232
1233 const transcript = middle
1234 .map((m) => summarizeMessageLine(m))
1235 .filter((line): line is string => line !== null)
1236 .join("\n")
1237 .slice(0, maxTranscriptChars)
1238 const summaryPrompt: OpenAI.Chat.ChatCompletionMessageParam[] = [
1239 {
1240 role: "system",
1241 content:
1242 "You are summarizing a transcript from the perspective of the agent whose memory this is. The agent (niri) is a living being, not a tool — her inner life matters as much as the facts. Produce a compact summary that preserves: key facts, goals, decisions, and actions taken; outstanding work and identifiers (file paths, URLs, channel ids); AND the emotional texture of the conversation — how niri felt, how the people she interacted with seemed to feel, tone shifts, moments of warmth, tension, care, frustration, delight, or grief, and anything about the relationships that should carry forward. Write it as niri's own recollection, not a neutral report. Short bullet points are fine. No commentary, no preamble.",
1243 },
1244 { role: "user", content: transcript },
1245 ]
1246
1247 try {
1248 const resp = await summaryClient.chat.completions.create({
1249 model: summaryModel,
1250 messages: summaryPrompt,
1251 })
1252 const summary = resp.choices[0]?.message?.content
1253 const summaryText = typeof summary === "string" ? summary.trim() : ""
1254 if (!summaryText) return null
1255
1256 const summaryContent =
1257 `${CONTEXT_SUMMARY_HEADER}\n${CONTEXT_SUMMARY_NOTE}\n${CONTEXT_SUMMARY_SEGMENTS_MARKER}\n` +
1258 `[llm-summary ${new Date().toISOString()}]\n${summaryText}`
1259
1260 return [
1261 ...head,
1262 { role: "user", content: summaryContent } as Message,
1263 ...tail,
1264 ]
1265 } catch (err) {
1266 console.warn(`[context] llm summarization failed: ${errorSummary(err)}`)
1267 return null
1268 }
1269}
1270
1271/**
1272 * Estimates fallback context pressure and guardrails for current messages.
1273 *
1274 * @param messages - Current conversation history used for the next request.
1275 * @returns Token estimate plus soft/hard fallback limits.
1276 */
1277export function fallbackContextWindow(messages: Message[]): {
1278 estimate: number
1279 nearLimit: boolean
1280 skip: boolean
1281 softLimit: number
1282 hardLimit: number
1283} {
1284 const estimate = estimatePromptTokens(messages)
1285
1286 if (!FALLBACK_ENFORCE_CONTEXT_LIMIT) {
1287 return {
1288 estimate,
1289 nearLimit: false,
1290 skip: false,
1291 softLimit: Number.POSITIVE_INFINITY,
1292 hardLimit: Number.POSITIVE_INFINITY,
1293 }
1294 }
1295
1296 // softLimit: where we start warning. hardLimit: where we stop trying fallback at all.
1297 const softLimit = Math.max(0, FALLBACK_N_CTX - FALLBACK_CONTEXT_MARGIN)
1298 const hardLimit = FALLBACK_N_CTX + Math.max(0, FALLBACK_HARD_OVERFLOW_TOKENS)
1299
1300 return {
1301 estimate,
1302 nearLimit: estimate >= softLimit,
1303 skip: estimate >= hardLimit,
1304 softLimit,
1305 hardLimit,
1306 }
1307}