my harness for niri
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at main 704 lines 25 kB view raw
1import OpenAI from "openai" 2import { logMessage } from "../db.js" 3import { buildCompletionMessages, rememberRecalledMemoryChunks } from "../memory.js" 4import { recordMetric } from "../metrics.js" 5import { emit } from "../stream.js" 6import type { LoopState } from "./types.js" 7import { 8 API_BASE, 9 ENABLE_THINKING, 10 FALLBACK_BASE, 11 FALLBACK_MODEL, 12 FALLBACK_TOOL_CHOICE, 13 MODEL, 14 PRIMARY_TOOL_CHOICE, 15 SUMMARY_MODEL, 16 TOOLS, 17 USE_FALLBACK, 18 apiErrorDetails, 19 client, 20 errorSummary, 21 estimatePromptTokens, 22 fallbackClient, 23 fallbackContextWindow, 24 findSummaryMessageIndex, 25 isPromptTooLargeError, 26 retryDelayMs, 27 sanitizeMessages, 28 shouldFallback, 29 summaryClient, 30 summarizeConversationViaLLM, 31} from "./util.js" 32import { assistantContentText } from "./loop-content.js" 33import type { CompletionRequest, CompletionTurnResult, ToolCallAssembly } from "./loop-shared.js" 34 35/** 36 * Resolves the configured summary client/model pair. 37 * 38 * @returns Active summary provider config. 39 */ 40export function configuredSummaryProvider(): { client: OpenAI | null; model: string } { 41 if (summaryClient && SUMMARY_MODEL) return { client: summaryClient, model: SUMMARY_MODEL } 42 return { 43 client: USE_FALLBACK ? fallbackClient : client, 44 model: USE_FALLBACK ? FALLBACK_MODEL : MODEL, 45 } 46} 47 48function logApiError(err: unknown, context: string): void { 49 if (!(err instanceof OpenAI.APIError)) return 50 console.error(`[api] ${err.status} ${err.message} - ${context}`) 51 for (const line of apiErrorDetails(err)) console.error(line) 52} 53 54/** 55 * Appends an assistant message to state and persists it to conversation logs. 56 * 57 * @param convId - Active conversation id. 58 * @param state - Mutable loop state. 59 * @param msg - Assistant message to append. 60 */ 61export function addAssistantMessage(convId: number, state: LoopState, msg: OpenAI.Chat.ChatCompletionMessage): void { 62 state.conversation.push(msg) 63 logMessage(convId, msg.role, msg.content ?? "", msg.tool_calls ?? undefined) 64} 65 66function recordPromptResponse(request: CompletionRequest, result: CompletionTurnResult, promptMetricId: number | null): void { 67 recordMetric({ 68 type: "prompt_response", 69 promptMetricId: promptMetricId ?? undefined, 70 model: request.model, 71 toolChoice: request.tool_choice, 72 messages: request.messages, 73 response: result.message, 74 usage: result.usage, 75 }) 76} 77 78/** 79 * Applies token usage from a completion response to loop state counters. 80 * 81 * @param state - Mutable loop state. 82 * @param usage - Completion usage payload (if provided by the API). 83 */ 84export function applyUsage(state: LoopState, usage: OpenAI.Completions.CompletionUsage | undefined): void { 85 if (!usage) return 86 state.tokenCount += usage.total_tokens 87 if (usage.prompt_tokens) state.contextSize = usage.prompt_tokens 88 console.log(`[tokens] +${usage.total_tokens} total=${state.tokenCount}`) 89 recordMetric({ type: "usage", usage }) 90} 91 92/** 93 * Emits model reasoning text when exposed by the provider. 94 * 95 * Supports both `reasoning_content` and `<think>...</think>` wrappers. 96 * 97 * @param msg - Assistant message to inspect for reasoning traces. 98 */ 99export function emitThinking(msg: OpenAI.Chat.ChatCompletionMessage): void { 100 const rawMsg = msg as unknown as Record<string, unknown> 101 let thinkingText: string | null = null 102 103 if (typeof rawMsg.reasoning_content === "string" && rawMsg.reasoning_content.trim()) { 104 thinkingText = rawMsg.reasoning_content.trim() 105 } else if (typeof msg.content === "string") { 106 const match = msg.content.match(/^<think>([\s\S]*?)<\/think>\s*/i) 107 if (match) { 108 thinkingText = match[1]!.trim() 109 ;(msg as unknown as Record<string, unknown>).content = msg.content.slice(match[0].length) 110 } 111 } 112 113 if (thinkingText) emit({ type: "thinking", text: thinkingText }) 114} 115 116function sleep(ms: number): Promise<void> { 117 return new Promise((resolve) => setTimeout(resolve, ms)) 118} 119 120function formatRetryAt(retryAfterMs: number): string { 121 const retryAt = new Date(Date.now() + retryAfterMs) 122 const local = retryAt.toLocaleString(undefined, { 123 hour12: false, 124 timeZoneName: "short", 125 }) 126 return `${local} (${retryAt.toISOString()})` 127} 128 129function apiErrorSearchText(err: { message: string; error?: unknown }): string { 130 const parts = [err.message] 131 if (err.error !== undefined) { 132 try { 133 parts.push(JSON.stringify(err.error)) 134 } catch { 135 parts.push(String(err.error)) 136 } 137 } 138 return parts.join("\n") 139} 140 141function shouldRetryWithAutoToolChoice(err: unknown): boolean { 142 if (!(err instanceof OpenAI.APIError)) return false 143 const text = apiErrorSearchText(err) 144 return /no endpoints found that support the provided 'tool_choice' value|does not support this tool_choice/i.test(text) 145} 146 147function shouldRetryWithoutReasoningForTools(err: unknown): boolean { 148 if (!(err instanceof OpenAI.APIError)) return false 149 return /function call should not be used with prefix/i.test(apiErrorSearchText(err)) 150} 151 152function toolCompatibleReasoningExtras( 153 request?: Pick<CompletionRequest, "provider" | "chat_template_kwargs">, 154): Partial<CompletionRequest> { 155 return { 156 include_reasoning: false, 157 reasoning: { enabled: false, exclude: true, effort: "none" }, 158 provider: { ...request?.provider, require_parameters: true }, 159 } 160} 161 162function prefixModeToolCallExtras(request?: Pick<CompletionRequest, "provider" | "chat_template_kwargs">): Partial<CompletionRequest> { 163 return { 164 ...toolCompatibleReasoningExtras(request), 165 enable_thinking: false, 166 chat_template_kwargs: { 167 ...request?.chat_template_kwargs, 168 enable_thinking: false, 169 }, 170 } 171} 172 173function disableReasoningForToolCalls(request: CompletionRequest): CompletionRequest { 174 return { 175 ...request, 176 ...prefixModeToolCallExtras(request), 177 } 178} 179 180function configuredThinkingRequestExtras( 181 request?: Pick<CompletionRequest, "chat_template_kwargs">, 182): Partial<CompletionRequest> { 183 if (ENABLE_THINKING) return {} 184 return { 185 include_reasoning: false, 186 reasoning: { enabled: false, exclude: true, effort: "none" }, 187 enable_thinking: false, 188 chat_template_kwargs: { 189 ...request?.chat_template_kwargs, 190 enable_thinking: false, 191 }, 192 } 193} 194 195function openRouterToolRequestExtras(baseUrl: string): Partial<CompletionRequest> { 196 if (!baseUrl.includes("openrouter.ai")) return {} 197 return toolCompatibleReasoningExtras() 198} 199 200function shouldRetryWithoutStreamUsage(err: unknown): boolean { 201 if (!(err instanceof OpenAI.APIError)) return false 202 if (err.status !== 400) return false 203 return /stream_options|include_usage/i.test(err.message) 204} 205 206function coerceReasoningToolArgument(rawValue: string): unknown { 207 const value = rawValue.trim() 208 if (!value) return "" 209 if (/^true$/i.test(value)) return true 210 if (/^false$/i.test(value)) return false 211 if (/^null$/i.test(value)) return null 212 213 if (/^-?\d+(?:\.\d+)?$/.test(value)) { 214 const parsed = Number(value) 215 if (Number.isFinite(parsed)) return parsed 216 } 217 218 if ( 219 (value.startsWith("{") && value.endsWith("}")) || 220 (value.startsWith("[") && value.endsWith("]")) || 221 (value.startsWith('"') && value.endsWith('"')) 222 ) { 223 try { 224 return JSON.parse(value) 225 } catch { 226 // keep raw string fallback 227 } 228 } 229 230 return value 231} 232 233function parseReasoningToolCallBlock(rawBlock: string): ToolCallAssembly | null { 234 const functionMatch = rawBlock.match(/<function(?:=|\s+name\s*=\s*["']?)([^>"'\s/]+)["']?\s*>/i) 235 if (!functionMatch || functionMatch.index === undefined) return null 236 237 const functionName = functionMatch[1]?.trim() 238 if (!functionName) return null 239 240 const functionBodyStart = functionMatch.index + functionMatch[0].length 241 const functionBodyEnd = rawBlock.indexOf("</function>", functionBodyStart) 242 if (functionBodyEnd < 0) return null 243 244 const functionBody = rawBlock.slice(functionBodyStart, functionBodyEnd) 245 const args: Record<string, unknown> = {} 246 247 const parameterRegex = /<parameter(?:=|\s+name\s*=\s*["']?)([^>"'\s/]+)["']?\s*>([\s\S]*?)<\/parameter>/gi 248 for (const match of functionBody.matchAll(parameterRegex)) { 249 const key = match[1]?.trim() 250 if (!key) continue 251 args[key] = coerceReasoningToolArgument(match[2] ?? "") 252 } 253 254 return { 255 id: "", 256 type: "function", 257 function: { 258 name: functionName, 259 arguments: JSON.stringify(args), 260 }, 261 } 262} 263 264function drainReasoningToolCallBlocks(buffer: string): { blocks: string[]; remainder: string } { 265 const blocks: string[] = [] 266 let remaining = buffer 267 268 while (true) { 269 const openMatch = remaining.match(/<tool_call(?:\s[^>]*)?>/i) 270 if (!openMatch || openMatch.index === undefined) { 271 const partialStart = remaining.lastIndexOf("<tool_call") 272 return { 273 blocks, 274 remainder: partialStart >= 0 ? remaining.slice(partialStart) : "", 275 } 276 } 277 278 const openStart = openMatch.index 279 const openEnd = openStart + openMatch[0].length 280 const closeStart = remaining.indexOf("</tool_call>", openEnd) 281 if (closeStart < 0) { 282 return { 283 blocks, 284 remainder: remaining.slice(openStart), 285 } 286 } 287 288 blocks.push(remaining.slice(openEnd, closeStart)) 289 remaining = remaining.slice(closeStart + "</tool_call>".length) 290 } 291} 292 293async function consumeCompletionStream( 294 stream: AsyncIterable<OpenAI.Chat.ChatCompletionChunk>, 295): Promise<CompletionTurnResult> { 296 const contentParts: string[] = [] 297 const streamedToolCalls = new Map<number, ToolCallAssembly>() 298 const reasoningToolCalls: ToolCallAssembly[] = [] 299 let reasoningToolBuffer = "" 300 301 let usage: OpenAI.Completions.CompletionUsage | undefined 302 let emittedText = false 303 let emittedThinking = false 304 const reasoningParts: string[] = [] 305 306 for await (const chunk of stream) { 307 if (chunk.usage) usage = chunk.usage 308 309 const choice = chunk.choices[0] 310 if (!choice) continue 311 312 const delta = choice.delta as OpenAI.Chat.ChatCompletionChunk.Choice.Delta & { 313 reasoning_content?: string 314 } 315 316 if (typeof delta.reasoning_content === "string" && delta.reasoning_content.length > 0) { 317 if (ENABLE_THINKING) reasoningParts.push(delta.reasoning_content) 318 319 reasoningToolBuffer += delta.reasoning_content 320 const { blocks, remainder } = drainReasoningToolCallBlocks(reasoningToolBuffer) 321 reasoningToolBuffer = remainder 322 323 for (const block of blocks) { 324 const parsedCall = parseReasoningToolCallBlock(block) 325 if (!parsedCall) continue 326 parsedCall.id = `call_reasoning_${reasoningToolCalls.length}` 327 reasoningToolCalls.push(parsedCall) 328 } 329 } 330 331 if (typeof delta.content === "string" && delta.content.length > 0) { 332 if (ENABLE_THINKING && !emittedThinking && reasoningParts.length > 0) { 333 emit({ type: "thinking", text: reasoningParts.join("") }) 334 emittedThinking = true 335 } 336 contentParts.push(delta.content) 337 emit({ type: "text", text: delta.content }) 338 emittedText = true 339 } 340 341 if (!Array.isArray(delta.tool_calls)) continue 342 343 for (const partial of delta.tool_calls) { 344 const index = partial.index ?? 0 345 const existing = streamedToolCalls.get(index) ?? { 346 id: partial.id ?? `call_${index}`, 347 type: "function" as const, 348 function: { name: "", arguments: "" }, 349 } 350 351 if (partial.id) existing.id = partial.id 352 if (partial.type === "function") existing.type = "function" 353 if (partial.function?.name) existing.function.name += partial.function.name 354 if (partial.function?.arguments) existing.function.arguments += partial.function.arguments 355 streamedToolCalls.set(index, existing) 356 } 357 } 358 359 if (streamedToolCalls.size === 0) { 360 const trailingReasoningCall = parseReasoningToolCallBlock(reasoningToolBuffer) 361 if (trailingReasoningCall) { 362 trailingReasoningCall.id = `call_reasoning_${reasoningToolCalls.length}` 363 reasoningToolCalls.push(trailingReasoningCall) 364 } 365 } 366 367 const finalToolCalls = 368 streamedToolCalls.size > 0 369 ? [...streamedToolCalls.entries()] 370 .sort((a, b) => a[0] - b[0]) 371 .map(([, toolCall]) => toolCall) 372 : reasoningToolCalls 373 374 const message: OpenAI.Chat.ChatCompletionMessage = { 375 role: "assistant", 376 content: contentParts.length > 0 ? contentParts.join("") : null, 377 refusal: null, 378 ...(finalToolCalls.length > 0 379 ? { 380 tool_calls: finalToolCalls, 381 } 382 : {}), 383 } 384 385 if (reasoningParts.length > 0) { 386 ;(message as OpenAI.Chat.ChatCompletionMessage & { reasoning_content?: string }).reasoning_content = 387 reasoningParts.join("") 388 } 389 390 return { 391 message, 392 usage, 393 emittedText, 394 emittedThinking, 395 bufferedThinking: reasoningParts.join(""), 396 } 397} 398 399async function createStreamedCompletion( 400 apiClient: OpenAI, 401 request: CompletionRequest, 402): Promise<CompletionTurnResult> { 403 const streamedRequest = { 404 ...request, 405 stream: true, 406 stream_options: { include_usage: true }, 407 } as const 408 409 const promptMetricId = recordMetric({ type: "prompt", messages: request.messages }) 410 411 try { 412 const stream = await apiClient.chat.completions.create(streamedRequest) 413 const result = await consumeCompletionStream(stream as AsyncIterable<OpenAI.Chat.ChatCompletionChunk>) 414 recordPromptResponse(request, result, promptMetricId) 415 return result 416 } catch (err) { 417 if (shouldRetryWithoutStreamUsage(err)) { 418 const stream = await apiClient.chat.completions.create({ 419 ...request, 420 stream: true, 421 } as const) 422 const result = await consumeCompletionStream(stream as AsyncIterable<OpenAI.Chat.ChatCompletionChunk>) 423 recordPromptResponse(request, result, promptMetricId) 424 return result 425 } 426 throw err 427 } 428} 429 430async function createFallbackCompletion(messages: OpenAI.Chat.ChatCompletionMessageParam[]): Promise<CompletionTurnResult> { 431 const request: CompletionRequest = { 432 model: FALLBACK_MODEL, 433 messages, 434 tools: TOOLS, 435 tool_choice: FALLBACK_TOOL_CHOICE, 436 ...openRouterToolRequestExtras(FALLBACK_BASE), 437 ...configuredThinkingRequestExtras(), 438 } 439 440 let currentRequest = request 441 let retriedAutoToolChoice = false 442 let retriedWithoutReasoning = false 443 while (true) { 444 try { 445 return await createStreamedCompletion(fallbackClient, currentRequest) 446 } catch (err) { 447 if (currentRequest.tool_choice !== "auto" && !retriedAutoToolChoice && shouldRetryWithAutoToolChoice(err)) { 448 retriedAutoToolChoice = true 449 console.warn( 450 `[fallback] provider rejected tool_choice=${currentRequest.tool_choice}; retrying with tool_choice=auto`, 451 ) 452 currentRequest = { 453 ...currentRequest, 454 tool_choice: "auto", 455 } 456 continue 457 } 458 if (!retriedWithoutReasoning && shouldRetryWithoutReasoningForTools(err)) { 459 retriedWithoutReasoning = true 460 console.warn("[fallback] provider rejected function calling in reasoning/prefix mode; retrying fallback with tool-compatible reasoning disabled") 461 currentRequest = disableReasoningForToolCalls(currentRequest) 462 continue 463 } 464 throw err 465 } 466 } 467} 468 469async function createPrimaryCompletion(messages: OpenAI.Chat.ChatCompletionMessageParam[]): Promise<CompletionTurnResult> { 470 const request: CompletionRequest = { 471 model: MODEL, 472 messages, 473 tools: TOOLS, 474 tool_choice: PRIMARY_TOOL_CHOICE, 475 ...openRouterToolRequestExtras(API_BASE), 476 ...configuredThinkingRequestExtras(), 477 } 478 479 let currentRequest = request 480 let retriedAutoToolChoice = false 481 let retriedWithoutReasoning = false 482 while (true) { 483 try { 484 return await createStreamedCompletion(client!, currentRequest) 485 } catch (err) { 486 if (currentRequest.tool_choice !== "auto" && !retriedAutoToolChoice && shouldRetryWithAutoToolChoice(err)) { 487 retriedAutoToolChoice = true 488 console.warn(`[api] provider rejected tool_choice=${currentRequest.tool_choice}; retrying primary with tool_choice=auto`) 489 currentRequest = { 490 ...currentRequest, 491 tool_choice: "auto", 492 } 493 continue 494 } 495 if (!retriedWithoutReasoning && shouldRetryWithoutReasoningForTools(err)) { 496 retriedWithoutReasoning = true 497 console.warn("[api] provider rejected function calling in reasoning/prefix mode; retrying primary with tool-compatible reasoning disabled") 498 currentRequest = disableReasoningForToolCalls(currentRequest) 499 continue 500 } 501 throw err 502 } 503 } 504} 505 506function logPromptSizeDebug(state: LoopState, err: unknown, label: string): void { 507 const messageCount = state.conversation.length 508 const roleCounts = state.conversation.reduce<Record<string, number>>((acc, m) => { 509 const role = (m as { role?: string }).role ?? "unknown" 510 acc[role] = (acc[role] ?? 0) + 1 511 return acc 512 }, {}) 513 const estimate = estimatePromptTokens(state.conversation) 514 const charLength = JSON.stringify(state.conversation).length 515 const summary = err instanceof OpenAI.APIError ? `${err.status} ${err.message}` : errorSummary(err) 516 console.warn( 517 `[api] ${label}: ${summary} - messages=${messageCount} est_tokens=${estimate} chars=${charLength} roles=${JSON.stringify(roleCounts)} observedPromptTokens=${state.contextSize}`, 518 ) 519} 520 521async function recoverFromPromptTooLarge(state: LoopState, attempt: number): Promise<boolean> { 522 const beforeCount = state.conversation.length 523 const beforeEstimate = estimatePromptTokens(state.conversation) 524 525 const summaryProvider = configuredSummaryProvider() 526 if (!summaryProvider.client || !summaryProvider.model) { 527 console.warn(`[context] recovery: no summary client available; cannot llm-summarize`) 528 return false 529 } 530 531 console.warn(`[context] recovery: attempting llm summarization via ${summaryProvider.model} (attempt=${attempt + 1})`) 532 const summarized = await summarizeConversationViaLLM(state.conversation, summaryProvider.client, summaryProvider.model) 533 if (!summarized) { 534 console.warn(`[context] recovery: llm summarization returned no changes`) 535 return false 536 } 537 538 const afterEstimate = estimatePromptTokens(summarized) 539 if (afterEstimate >= beforeEstimate) { 540 console.warn(`[context] recovery: llm summary not smaller (${beforeEstimate} -> ${afterEstimate}); keeping original`) 541 return false 542 } 543 544 state.conversation = summarized 545 state.contextSize = afterEstimate 546 547 const summaryIdx = findSummaryMessageIndex(state.conversation) 548 const summary = summaryIdx >= 0 ? (state.conversation[summaryIdx]?.content as string) : undefined 549 550 console.warn( 551 `[context] recovery: llm-summarized conversation (${beforeCount} -> ${summarized.length} msgs, ${beforeEstimate} -> ${afterEstimate} tokens)`, 552 ) 553 554 recordMetric({ 555 type: "compaction", 556 before: beforeEstimate, 557 after: afterEstimate, 558 method: "force-llm", 559 summary, 560 }) 561 return true 562} 563 564/** 565 * Fetches the next assistant completion, including fallback and backoff behavior. 566 * 567 * @param state - Mutable loop state containing current conversation/context. 568 * @param baseConversation - Optional alternate base conversation for retries. 569 * @returns The next chat completion response. 570 * @throws If the primary request fails with a non-fallback error condition. 571 */ 572export async function fetchCompletion( 573 state: LoopState, 574 baseConversation: OpenAI.Chat.ChatCompletionMessageParam[] = state.conversation, 575): Promise<CompletionTurnResult> { 576 let promptTooLargeAttempts = 0 577 while (true) { 578 if (baseConversation === state.conversation) { 579 state.conversation = sanitizeMessages(state.conversation) 580 baseConversation = state.conversation 581 } else { 582 baseConversation = sanitizeMessages(baseConversation) 583 } 584 585 const requestContext = await buildCompletionMessages( 586 baseConversation, 587 state.memoryRecallCooldowns, 588 state.memoryRecallTurn, 589 ) 590 const requestMessages = requestContext.messages 591 592 if (USE_FALLBACK) { 593 const fallbackWindow = fallbackContextWindow(requestMessages) 594 if (fallbackWindow.nearLimit) { 595 console.warn( 596 `[fallback] prompt estimate ${fallbackWindow.estimate} nearing fallback limit ${fallbackWindow.softLimit} (${FALLBACK_MODEL})`, 597 ) 598 } 599 600 try { 601 const completion = await createFallbackCompletion(requestMessages) 602 state.memoryRecallCooldowns = rememberRecalledMemoryChunks( 603 state.memoryRecallCooldowns, 604 requestContext.recalledChunkIds, 605 state.memoryRecallTurn, 606 ) 607 return completion 608 } catch (fallbackErr) { 609 if (isPromptTooLargeError(fallbackErr) && promptTooLargeAttempts < 2) { 610 logPromptSizeDebug(state, fallbackErr, `fallback rejected prompt (attempt ${promptTooLargeAttempts + 1}/2)`) 611 const recovered = await recoverFromPromptTooLarge(state, promptTooLargeAttempts) 612 promptTooLargeAttempts++ 613 if (recovered) continue 614 } 615 if (shouldFallback(fallbackErr)) { 616 const retryAfter = retryDelayMs(fallbackErr) 617 console.warn( 618 `[fallback] transient failure (${errorSummary(fallbackErr)}); retrying after ${Math.ceil(retryAfter / 1000)}s`, 619 ) 620 console.log( 621 `[runner] backing off ${Math.ceil(retryAfter / 1000)}s (until ${formatRetryAt(retryAfter)}) before retrying fallback...`, 622 ) 623 await sleep(retryAfter) 624 continue 625 } 626 logApiError(fallbackErr, `model=${FALLBACK_MODEL} api=${FALLBACK_BASE}`) 627 throw fallbackErr 628 } 629 } 630 631 try { 632 const completion = await createPrimaryCompletion(requestMessages) 633 state.memoryRecallCooldowns = rememberRecalledMemoryChunks( 634 state.memoryRecallCooldowns, 635 requestContext.recalledChunkIds, 636 state.memoryRecallTurn, 637 ) 638 return completion 639 } catch (primaryErr) { 640 if (isPromptTooLargeError(primaryErr) && promptTooLargeAttempts < 2) { 641 logPromptSizeDebug(state, primaryErr, `primary rejected prompt (attempt ${promptTooLargeAttempts + 1}/2)`) 642 const recovered = await recoverFromPromptTooLarge(state, promptTooLargeAttempts) 643 promptTooLargeAttempts++ 644 if (recovered) continue 645 logApiError(primaryErr, `model=${MODEL} api=${API_BASE}`) 646 throw primaryErr 647 } 648 649 if (!shouldFallback(primaryErr)) { 650 logApiError(primaryErr, `model=${MODEL} api=${API_BASE}`) 651 throw primaryErr 652 } 653 654 const fallbackWindow = fallbackContextWindow(requestMessages) 655 if (fallbackWindow.skip) { 656 console.warn( 657 `[api] primary down (${errorSummary(primaryErr)}) and fallback context estimate ${fallbackWindow.estimate} exceeds hard limit ${fallbackWindow.hardLimit}; retrying primary after backoff`, 658 ) 659 const retryAfter = retryDelayMs(primaryErr) 660 console.log( 661 `[runner] backing off ${Math.ceil(retryAfter / 1000)}s (until ${formatRetryAt(retryAfter)}) before retrying primary...`, 662 ) 663 await sleep(retryAfter) 664 continue 665 } 666 667 if (fallbackWindow.nearLimit) { 668 console.warn( 669 `[fallback] prompt estimate ${fallbackWindow.estimate} nearing fallback limit ${fallbackWindow.softLimit} (${FALLBACK_MODEL})`, 670 ) 671 } 672 673 console.warn(`[api] primary down (${errorSummary(primaryErr)}) - switching to fallback`) 674 try { 675 const completion = await createFallbackCompletion(requestMessages) 676 state.memoryRecallCooldowns = rememberRecalledMemoryChunks( 677 state.memoryRecallCooldowns, 678 requestContext.recalledChunkIds, 679 state.memoryRecallTurn, 680 ) 681 return completion 682 } catch (fallbackErr) { 683 if (isPromptTooLargeError(fallbackErr) && promptTooLargeAttempts < 2) { 684 logPromptSizeDebug(state, fallbackErr, `fallback rejected prompt during failover (attempt ${promptTooLargeAttempts + 1}/2)`) 685 const recovered = await recoverFromPromptTooLarge(state, promptTooLargeAttempts) 686 promptTooLargeAttempts++ 687 if (recovered) continue 688 } 689 console.warn( 690 `[api] fallback failed (${errorSummary(fallbackErr)}) after primary failure (${errorSummary(primaryErr)}); retrying primary after backoff`, 691 ) 692 const retryAfter = retryDelayMs(primaryErr) 693 console.log( 694 `[runner] backing off ${Math.ceil(retryAfter / 1000)}s (until ${formatRetryAt(retryAfter)}) before retrying primary...`, 695 ) 696 await sleep(retryAfter) 697 } 698 } 699 } 700} 701 702export const __completionTest = { 703 consumeCompletionStream, 704}