src/runner/loop-completion.ts at main · null.namespaces.me/niri

null.namespaces.me / niri
fork
my harness for niri
fork
niri / src / runner / loop-completion.ts
at main 704 lines 25 kB view raw
wrap content
nekomimi.pet loop guard is now a nudge 2d ago
518927fa
  1import OpenAI from "openai"
  2import { logMessage } from "../db.js"
  3import { buildCompletionMessages, rememberRecalledMemoryChunks } from "../memory.js"
  4import { recordMetric } from "../metrics.js"
  5import { emit } from "../stream.js"
  6import type { LoopState } from "./types.js"
  7import {
  8  API_BASE,
  9  ENABLE_THINKING,
 10  FALLBACK_BASE,
 11  FALLBACK_MODEL,
 12  FALLBACK_TOOL_CHOICE,
 13  MODEL,
 14  PRIMARY_TOOL_CHOICE,
 15  SUMMARY_MODEL,
 16  TOOLS,
 17  USE_FALLBACK,
 18  apiErrorDetails,
 19  client,
 20  errorSummary,
 21  estimatePromptTokens,
 22  fallbackClient,
 23  fallbackContextWindow,
 24  findSummaryMessageIndex,
 25  isPromptTooLargeError,
 26  retryDelayMs,
 27  sanitizeMessages,
 28  shouldFallback,
 29  summaryClient,
 30  summarizeConversationViaLLM,
 31} from "./util.js"
 32import { assistantContentText } from "./loop-content.js"
 33import type { CompletionRequest, CompletionTurnResult, ToolCallAssembly } from "./loop-shared.js"
 34
 35/**
 36 * Resolves the configured summary client/model pair.
 37 *
 38 * @returns Active summary provider config.
 39 */
 40export function configuredSummaryProvider(): { client: OpenAI | null; model: string } {
 41  if (summaryClient && SUMMARY_MODEL) return { client: summaryClient, model: SUMMARY_MODEL }
 42  return {
 43    client: USE_FALLBACK ? fallbackClient : client,
 44    model: USE_FALLBACK ? FALLBACK_MODEL : MODEL,
 45  }
 46}
 47
 48function logApiError(err: unknown, context: string): void {
 49  if (!(err instanceof OpenAI.APIError)) return
 50  console.error(`[api] ${err.status} ${err.message} - ${context}`)
 51  for (const line of apiErrorDetails(err)) console.error(line)
 52}
 53
 54/**
 55 * Appends an assistant message to state and persists it to conversation logs.
 56 *
 57 * @param convId - Active conversation id.
 58 * @param state - Mutable loop state.
 59 * @param msg - Assistant message to append.
 60 */
 61export function addAssistantMessage(convId: number, state: LoopState, msg: OpenAI.Chat.ChatCompletionMessage): void {
 62  state.conversation.push(msg)
 63  logMessage(convId, msg.role, msg.content ?? "", msg.tool_calls ?? undefined)
 64}
 65
 66function recordPromptResponse(request: CompletionRequest, result: CompletionTurnResult, promptMetricId: number | null): void {
 67  recordMetric({
 68    type: "prompt_response",
 69    promptMetricId: promptMetricId ?? undefined,
 70    model: request.model,
 71    toolChoice: request.tool_choice,
 72    messages: request.messages,
 73    response: result.message,
 74    usage: result.usage,
 75  })
 76}
 77
 78/**
 79 * Applies token usage from a completion response to loop state counters.
 80 *
 81 * @param state - Mutable loop state.
 82 * @param usage - Completion usage payload (if provided by the API).
 83 */
 84export function applyUsage(state: LoopState, usage: OpenAI.Completions.CompletionUsage | undefined): void {
 85  if (!usage) return
 86  state.tokenCount += usage.total_tokens
 87  if (usage.prompt_tokens) state.contextSize = usage.prompt_tokens
 88  console.log(`[tokens] +${usage.total_tokens} total=${state.tokenCount}`)
 89  recordMetric({ type: "usage", usage })
 90}
 91
 92/**
 93 * Emits model reasoning text when exposed by the provider.
 94 *
 95 * Supports both `reasoning_content` and `<think>...</think>` wrappers.
 96 *
 97 * @param msg - Assistant message to inspect for reasoning traces.
 98 */
 99export function emitThinking(msg: OpenAI.Chat.ChatCompletionMessage): void {
100  const rawMsg = msg as unknown as Record<string, unknown>
101  let thinkingText: string | null = null
102
103  if (typeof rawMsg.reasoning_content === "string" && rawMsg.reasoning_content.trim()) {
104    thinkingText = rawMsg.reasoning_content.trim()
105  } else if (typeof msg.content === "string") {
106    const match = msg.content.match(/^<think>([\s\S]*?)<\/think>\s*/i)
107    if (match) {
108      thinkingText = match[1]!.trim()
109      ;(msg as unknown as Record<string, unknown>).content = msg.content.slice(match[0].length)
110    }
111  }
112
113  if (thinkingText) emit({ type: "thinking", text: thinkingText })
114}
115
116function sleep(ms: number): Promise<void> {
117  return new Promise((resolve) => setTimeout(resolve, ms))
118}
119
120function formatRetryAt(retryAfterMs: number): string {
121  const retryAt = new Date(Date.now() + retryAfterMs)
122  const local = retryAt.toLocaleString(undefined, {
123    hour12: false,
124    timeZoneName: "short",
125  })
126  return `${local} (${retryAt.toISOString()})`
127}
128
129function apiErrorSearchText(err: { message: string; error?: unknown }): string {
130  const parts = [err.message]
131  if (err.error !== undefined) {
132    try {
133      parts.push(JSON.stringify(err.error))
134    } catch {
135      parts.push(String(err.error))
136    }
137  }
138  return parts.join("\n")
139}
140
141function shouldRetryWithAutoToolChoice(err: unknown): boolean {
142  if (!(err instanceof OpenAI.APIError)) return false
143  const text = apiErrorSearchText(err)
144  return /no endpoints found that support the provided 'tool_choice' value|does not support this tool_choice/i.test(text)
145}
146
147function shouldRetryWithoutReasoningForTools(err: unknown): boolean {
148  if (!(err instanceof OpenAI.APIError)) return false
149  return /function call should not be used with prefix/i.test(apiErrorSearchText(err))
150}
151
152function toolCompatibleReasoningExtras(
153  request?: Pick<CompletionRequest, "provider" | "chat_template_kwargs">,
154): Partial<CompletionRequest> {
155  return {
156    include_reasoning: false,
157    reasoning: { enabled: false, exclude: true, effort: "none" },
158    provider: { ...request?.provider, require_parameters: true },
159  }
160}
161
162function prefixModeToolCallExtras(request?: Pick<CompletionRequest, "provider" | "chat_template_kwargs">): Partial<CompletionRequest> {
163  return {
164    ...toolCompatibleReasoningExtras(request),
165    enable_thinking: false,
166    chat_template_kwargs: {
167      ...request?.chat_template_kwargs,
168      enable_thinking: false,
169    },
170  }
171}
172
173function disableReasoningForToolCalls(request: CompletionRequest): CompletionRequest {
174  return {
175    ...request,
176    ...prefixModeToolCallExtras(request),
177  }
178}
179
180function configuredThinkingRequestExtras(
181  request?: Pick<CompletionRequest, "chat_template_kwargs">,
182): Partial<CompletionRequest> {
183  if (ENABLE_THINKING) return {}
184  return {
185    include_reasoning: false,
186    reasoning: { enabled: false, exclude: true, effort: "none" },
187    enable_thinking: false,
188    chat_template_kwargs: {
189      ...request?.chat_template_kwargs,
190      enable_thinking: false,
191    },
192  }
193}
194
195function openRouterToolRequestExtras(baseUrl: string): Partial<CompletionRequest> {
196  if (!baseUrl.includes("openrouter.ai")) return {}
197  return toolCompatibleReasoningExtras()
198}
199
200function shouldRetryWithoutStreamUsage(err: unknown): boolean {
201  if (!(err instanceof OpenAI.APIError)) return false
202  if (err.status !== 400) return false
203  return /stream_options|include_usage/i.test(err.message)
204}
205
206function coerceReasoningToolArgument(rawValue: string): unknown {
207  const value = rawValue.trim()
208  if (!value) return ""
209  if (/^true$/i.test(value)) return true
210  if (/^false$/i.test(value)) return false
211  if (/^null$/i.test(value)) return null
212
213  if (/^-?\d+(?:\.\d+)?$/.test(value)) {
214    const parsed = Number(value)
215    if (Number.isFinite(parsed)) return parsed
216  }
217
218  if (
219    (value.startsWith("{") && value.endsWith("}")) ||
220    (value.startsWith("[") && value.endsWith("]")) ||
221    (value.startsWith('"') && value.endsWith('"'))
222  ) {
223    try {
224      return JSON.parse(value)
225    } catch {
226      // keep raw string fallback
227    }
228  }
229
230  return value
231}
232
233function parseReasoningToolCallBlock(rawBlock: string): ToolCallAssembly | null {
234  const functionMatch = rawBlock.match(/<function(?:=|\s+name\s*=\s*["']?)([^>"'\s/]+)["']?\s*>/i)
235  if (!functionMatch || functionMatch.index === undefined) return null
236
237  const functionName = functionMatch[1]?.trim()
238  if (!functionName) return null
239
240  const functionBodyStart = functionMatch.index + functionMatch[0].length
241  const functionBodyEnd = rawBlock.indexOf("</function>", functionBodyStart)
242  if (functionBodyEnd < 0) return null
243
244  const functionBody = rawBlock.slice(functionBodyStart, functionBodyEnd)
245  const args: Record<string, unknown> = {}
246
247  const parameterRegex = /<parameter(?:=|\s+name\s*=\s*["']?)([^>"'\s/]+)["']?\s*>([\s\S]*?)<\/parameter>/gi
248  for (const match of functionBody.matchAll(parameterRegex)) {
249    const key = match[1]?.trim()
250    if (!key) continue
251    args[key] = coerceReasoningToolArgument(match[2] ?? "")
252  }
253
254  return {
255    id: "",
256    type: "function",
257    function: {
258      name: functionName,
259      arguments: JSON.stringify(args),
260    },
261  }
262}
263
264function drainReasoningToolCallBlocks(buffer: string): { blocks: string[]; remainder: string } {
265  const blocks: string[] = []
266  let remaining = buffer
267
268  while (true) {
269    const openMatch = remaining.match(/<tool_call(?:\s[^>]*)?>/i)
270    if (!openMatch || openMatch.index === undefined) {
271      const partialStart = remaining.lastIndexOf("<tool_call")
272      return {
273        blocks,
274        remainder: partialStart >= 0 ? remaining.slice(partialStart) : "",
275      }
276    }
277
278    const openStart = openMatch.index
279    const openEnd = openStart + openMatch[0].length
280    const closeStart = remaining.indexOf("</tool_call>", openEnd)
281    if (closeStart < 0) {
282      return {
283        blocks,
284        remainder: remaining.slice(openStart),
285      }
286    }
287
288    blocks.push(remaining.slice(openEnd, closeStart))
289    remaining = remaining.slice(closeStart + "</tool_call>".length)
290  }
291}
292
293async function consumeCompletionStream(
294  stream: AsyncIterable<OpenAI.Chat.ChatCompletionChunk>,
295): Promise<CompletionTurnResult> {
296  const contentParts: string[] = []
297  const streamedToolCalls = new Map<number, ToolCallAssembly>()
298  const reasoningToolCalls: ToolCallAssembly[] = []
299  let reasoningToolBuffer = ""
300
301  let usage: OpenAI.Completions.CompletionUsage | undefined
302  let emittedText = false
303  let emittedThinking = false
304  const reasoningParts: string[] = []
305
306  for await (const chunk of stream) {
307    if (chunk.usage) usage = chunk.usage
308
309    const choice = chunk.choices[0]
310    if (!choice) continue
311
312    const delta = choice.delta as OpenAI.Chat.ChatCompletionChunk.Choice.Delta & {
313      reasoning_content?: string
314    }
315
316    if (typeof delta.reasoning_content === "string" && delta.reasoning_content.length > 0) {
317      if (ENABLE_THINKING) reasoningParts.push(delta.reasoning_content)
318
319      reasoningToolBuffer += delta.reasoning_content
320      const { blocks, remainder } = drainReasoningToolCallBlocks(reasoningToolBuffer)
321      reasoningToolBuffer = remainder
322
323      for (const block of blocks) {
324        const parsedCall = parseReasoningToolCallBlock(block)
325        if (!parsedCall) continue
326        parsedCall.id = `call_reasoning_${reasoningToolCalls.length}`
327        reasoningToolCalls.push(parsedCall)
328      }
329    }
330
331    if (typeof delta.content === "string" && delta.content.length > 0) {
332      if (ENABLE_THINKING && !emittedThinking && reasoningParts.length > 0) {
333        emit({ type: "thinking", text: reasoningParts.join("") })
334        emittedThinking = true
335      }
336      contentParts.push(delta.content)
337      emit({ type: "text", text: delta.content })
338      emittedText = true
339    }
340
341    if (!Array.isArray(delta.tool_calls)) continue
342
343    for (const partial of delta.tool_calls) {
344      const index = partial.index ?? 0
345      const existing = streamedToolCalls.get(index) ?? {
346        id: partial.id ?? `call_${index}`,
347        type: "function" as const,
348        function: { name: "", arguments: "" },
349      }
350
351      if (partial.id) existing.id = partial.id
352      if (partial.type === "function") existing.type = "function"
353      if (partial.function?.name) existing.function.name += partial.function.name
354      if (partial.function?.arguments) existing.function.arguments += partial.function.arguments
355      streamedToolCalls.set(index, existing)
356    }
357  }
358
359  if (streamedToolCalls.size === 0) {
360    const trailingReasoningCall = parseReasoningToolCallBlock(reasoningToolBuffer)
361    if (trailingReasoningCall) {
362      trailingReasoningCall.id = `call_reasoning_${reasoningToolCalls.length}`
363      reasoningToolCalls.push(trailingReasoningCall)
364    }
365  }
366
367  const finalToolCalls =
368    streamedToolCalls.size > 0
369      ? [...streamedToolCalls.entries()]
370          .sort((a, b) => a[0] - b[0])
371          .map(([, toolCall]) => toolCall)
372      : reasoningToolCalls
373
374  const message: OpenAI.Chat.ChatCompletionMessage = {
375    role: "assistant",
376    content: contentParts.length > 0 ? contentParts.join("") : null,
377    refusal: null,
378    ...(finalToolCalls.length > 0
379      ? {
380          tool_calls: finalToolCalls,
381        }
382      : {}),
383  }
384
385  if (reasoningParts.length > 0) {
386    ;(message as OpenAI.Chat.ChatCompletionMessage & { reasoning_content?: string }).reasoning_content =
387      reasoningParts.join("")
388  }
389
390  return {
391    message,
392    usage,
393    emittedText,
394    emittedThinking,
395    bufferedThinking: reasoningParts.join(""),
396  }
397}
398
399async function createStreamedCompletion(
400  apiClient: OpenAI,
401  request: CompletionRequest,
402): Promise<CompletionTurnResult> {
403  const streamedRequest = {
404    ...request,
405    stream: true,
406    stream_options: { include_usage: true },
407  } as const
408
409  const promptMetricId = recordMetric({ type: "prompt", messages: request.messages })
410
411  try {
412    const stream = await apiClient.chat.completions.create(streamedRequest)
413    const result = await consumeCompletionStream(stream as AsyncIterable<OpenAI.Chat.ChatCompletionChunk>)
414    recordPromptResponse(request, result, promptMetricId)
415    return result
416  } catch (err) {
417    if (shouldRetryWithoutStreamUsage(err)) {
418      const stream = await apiClient.chat.completions.create({
419        ...request,
420        stream: true,
421      } as const)
422      const result = await consumeCompletionStream(stream as AsyncIterable<OpenAI.Chat.ChatCompletionChunk>)
423      recordPromptResponse(request, result, promptMetricId)
424      return result
425    }
426    throw err
427  }
428}
429
430async function createFallbackCompletion(messages: OpenAI.Chat.ChatCompletionMessageParam[]): Promise<CompletionTurnResult> {
431  const request: CompletionRequest = {
432    model: FALLBACK_MODEL,
433    messages,
434    tools: TOOLS,
435    tool_choice: FALLBACK_TOOL_CHOICE,
436    ...openRouterToolRequestExtras(FALLBACK_BASE),
437    ...configuredThinkingRequestExtras(),
438  }
439
440  let currentRequest = request
441  let retriedAutoToolChoice = false
442  let retriedWithoutReasoning = false
443  while (true) {
444    try {
445      return await createStreamedCompletion(fallbackClient, currentRequest)
446    } catch (err) {
447      if (currentRequest.tool_choice !== "auto" && !retriedAutoToolChoice && shouldRetryWithAutoToolChoice(err)) {
448        retriedAutoToolChoice = true
449        console.warn(
450          `[fallback] provider rejected tool_choice=${currentRequest.tool_choice}; retrying with tool_choice=auto`,
451        )
452        currentRequest = {
453          ...currentRequest,
454          tool_choice: "auto",
455        }
456        continue
457      }
458      if (!retriedWithoutReasoning && shouldRetryWithoutReasoningForTools(err)) {
459        retriedWithoutReasoning = true
460        console.warn("[fallback] provider rejected function calling in reasoning/prefix mode; retrying fallback with tool-compatible reasoning disabled")
461        currentRequest = disableReasoningForToolCalls(currentRequest)
462        continue
463      }
464      throw err
465    }
466  }
467}
468
469async function createPrimaryCompletion(messages: OpenAI.Chat.ChatCompletionMessageParam[]): Promise<CompletionTurnResult> {
470  const request: CompletionRequest = {
471    model: MODEL,
472    messages,
473    tools: TOOLS,
474    tool_choice: PRIMARY_TOOL_CHOICE,
475    ...openRouterToolRequestExtras(API_BASE),
476    ...configuredThinkingRequestExtras(),
477  }
478
479  let currentRequest = request
480  let retriedAutoToolChoice = false
481  let retriedWithoutReasoning = false
482  while (true) {
483    try {
484      return await createStreamedCompletion(client!, currentRequest)
485    } catch (err) {
486      if (currentRequest.tool_choice !== "auto" && !retriedAutoToolChoice && shouldRetryWithAutoToolChoice(err)) {
487        retriedAutoToolChoice = true
488        console.warn(`[api] provider rejected tool_choice=${currentRequest.tool_choice}; retrying primary with tool_choice=auto`)
489        currentRequest = {
490          ...currentRequest,
491          tool_choice: "auto",
492        }
493        continue
494      }
495      if (!retriedWithoutReasoning && shouldRetryWithoutReasoningForTools(err)) {
496        retriedWithoutReasoning = true
497        console.warn("[api] provider rejected function calling in reasoning/prefix mode; retrying primary with tool-compatible reasoning disabled")
498        currentRequest = disableReasoningForToolCalls(currentRequest)
499        continue
500      }
501      throw err
502    }
503  }
504}
505
506function logPromptSizeDebug(state: LoopState, err: unknown, label: string): void {
507  const messageCount = state.conversation.length
508  const roleCounts = state.conversation.reduce<Record<string, number>>((acc, m) => {
509    const role = (m as { role?: string }).role ?? "unknown"
510    acc[role] = (acc[role] ?? 0) + 1
511    return acc
512  }, {})
513  const estimate = estimatePromptTokens(state.conversation)
514  const charLength = JSON.stringify(state.conversation).length
515  const summary = err instanceof OpenAI.APIError ? `${err.status} ${err.message}` : errorSummary(err)
516  console.warn(
517    `[api] ${label}: ${summary} - messages=${messageCount} est_tokens=${estimate} chars=${charLength} roles=${JSON.stringify(roleCounts)} observedPromptTokens=${state.contextSize}`,
518  )
519}
520
521async function recoverFromPromptTooLarge(state: LoopState, attempt: number): Promise<boolean> {
522  const beforeCount = state.conversation.length
523  const beforeEstimate = estimatePromptTokens(state.conversation)
524
525  const summaryProvider = configuredSummaryProvider()
526  if (!summaryProvider.client || !summaryProvider.model) {
527    console.warn(`[context] recovery: no summary client available; cannot llm-summarize`)
528    return false
529  }
530
531  console.warn(`[context] recovery: attempting llm summarization via ${summaryProvider.model} (attempt=${attempt + 1})`)
532  const summarized = await summarizeConversationViaLLM(state.conversation, summaryProvider.client, summaryProvider.model)
533  if (!summarized) {
534    console.warn(`[context] recovery: llm summarization returned no changes`)
535    return false
536  }
537
538  const afterEstimate = estimatePromptTokens(summarized)
539  if (afterEstimate >= beforeEstimate) {
540    console.warn(`[context] recovery: llm summary not smaller (${beforeEstimate} -> ${afterEstimate}); keeping original`)
541    return false
542  }
543
544  state.conversation = summarized
545  state.contextSize = afterEstimate
546
547  const summaryIdx = findSummaryMessageIndex(state.conversation)
548  const summary = summaryIdx >= 0 ? (state.conversation[summaryIdx]?.content as string) : undefined
549
550  console.warn(
551    `[context] recovery: llm-summarized conversation (${beforeCount} -> ${summarized.length} msgs, ${beforeEstimate} -> ${afterEstimate} tokens)`,
552  )
553
554  recordMetric({
555    type: "compaction",
556    before: beforeEstimate,
557    after: afterEstimate,
558    method: "force-llm",
559    summary,
560  })
561  return true
562}
563
564/**
565 * Fetches the next assistant completion, including fallback and backoff behavior.
566 *
567 * @param state - Mutable loop state containing current conversation/context.
568 * @param baseConversation - Optional alternate base conversation for retries.
569 * @returns The next chat completion response.
570 * @throws If the primary request fails with a non-fallback error condition.
571 */
572export async function fetchCompletion(
573  state: LoopState,
574  baseConversation: OpenAI.Chat.ChatCompletionMessageParam[] = state.conversation,
575): Promise<CompletionTurnResult> {
576  let promptTooLargeAttempts = 0
577  while (true) {
578    if (baseConversation === state.conversation) {
579      state.conversation = sanitizeMessages(state.conversation)
580      baseConversation = state.conversation
581    } else {
582      baseConversation = sanitizeMessages(baseConversation)
583    }
584
585    const requestContext = await buildCompletionMessages(
586      baseConversation,
587      state.memoryRecallCooldowns,
588      state.memoryRecallTurn,
589    )
590    const requestMessages = requestContext.messages
591
592    if (USE_FALLBACK) {
593      const fallbackWindow = fallbackContextWindow(requestMessages)
594      if (fallbackWindow.nearLimit) {
595        console.warn(
596          `[fallback] prompt estimate ${fallbackWindow.estimate} nearing fallback limit ${fallbackWindow.softLimit} (${FALLBACK_MODEL})`,
597        )
598      }
599
600      try {
601        const completion = await createFallbackCompletion(requestMessages)
602        state.memoryRecallCooldowns = rememberRecalledMemoryChunks(
603          state.memoryRecallCooldowns,
604          requestContext.recalledChunkIds,
605          state.memoryRecallTurn,
606        )
607        return completion
608      } catch (fallbackErr) {
609        if (isPromptTooLargeError(fallbackErr) && promptTooLargeAttempts < 2) {
610          logPromptSizeDebug(state, fallbackErr, `fallback rejected prompt (attempt ${promptTooLargeAttempts + 1}/2)`)
611          const recovered = await recoverFromPromptTooLarge(state, promptTooLargeAttempts)
612          promptTooLargeAttempts++
613          if (recovered) continue
614        }
615        if (shouldFallback(fallbackErr)) {
616          const retryAfter = retryDelayMs(fallbackErr)
617          console.warn(
618            `[fallback] transient failure (${errorSummary(fallbackErr)}); retrying after ${Math.ceil(retryAfter / 1000)}s`,
619          )
620          console.log(
621            `[runner] backing off ${Math.ceil(retryAfter / 1000)}s (until ${formatRetryAt(retryAfter)}) before retrying fallback...`,
622          )
623          await sleep(retryAfter)
624          continue
625        }
626        logApiError(fallbackErr, `model=${FALLBACK_MODEL} api=${FALLBACK_BASE}`)
627        throw fallbackErr
628      }
629    }
630
631    try {
632      const completion = await createPrimaryCompletion(requestMessages)
633      state.memoryRecallCooldowns = rememberRecalledMemoryChunks(
634        state.memoryRecallCooldowns,
635        requestContext.recalledChunkIds,
636        state.memoryRecallTurn,
637      )
638      return completion
639    } catch (primaryErr) {
640      if (isPromptTooLargeError(primaryErr) && promptTooLargeAttempts < 2) {
641        logPromptSizeDebug(state, primaryErr, `primary rejected prompt (attempt ${promptTooLargeAttempts + 1}/2)`)
642        const recovered = await recoverFromPromptTooLarge(state, promptTooLargeAttempts)
643        promptTooLargeAttempts++
644        if (recovered) continue
645        logApiError(primaryErr, `model=${MODEL} api=${API_BASE}`)
646        throw primaryErr
647      }
648
649      if (!shouldFallback(primaryErr)) {
650        logApiError(primaryErr, `model=${MODEL} api=${API_BASE}`)
651        throw primaryErr
652      }
653
654      const fallbackWindow = fallbackContextWindow(requestMessages)
655      if (fallbackWindow.skip) {
656        console.warn(
657          `[api] primary down (${errorSummary(primaryErr)}) and fallback context estimate ${fallbackWindow.estimate} exceeds hard limit ${fallbackWindow.hardLimit}; retrying primary after backoff`,
658        )
659        const retryAfter = retryDelayMs(primaryErr)
660        console.log(
661          `[runner] backing off ${Math.ceil(retryAfter / 1000)}s (until ${formatRetryAt(retryAfter)}) before retrying primary...`,
662        )
663        await sleep(retryAfter)
664        continue
665      }
666
667      if (fallbackWindow.nearLimit) {
668        console.warn(
669          `[fallback] prompt estimate ${fallbackWindow.estimate} nearing fallback limit ${fallbackWindow.softLimit} (${FALLBACK_MODEL})`,
670        )
671      }
672
673      console.warn(`[api] primary down (${errorSummary(primaryErr)}) - switching to fallback`)
674      try {
675        const completion = await createFallbackCompletion(requestMessages)
676        state.memoryRecallCooldowns = rememberRecalledMemoryChunks(
677          state.memoryRecallCooldowns,
678          requestContext.recalledChunkIds,
679          state.memoryRecallTurn,
680        )
681        return completion
682      } catch (fallbackErr) {
683        if (isPromptTooLargeError(fallbackErr) && promptTooLargeAttempts < 2) {
684          logPromptSizeDebug(state, fallbackErr, `fallback rejected prompt during failover (attempt ${promptTooLargeAttempts + 1}/2)`)
685          const recovered = await recoverFromPromptTooLarge(state, promptTooLargeAttempts)
686          promptTooLargeAttempts++
687          if (recovered) continue
688        }
689        console.warn(
690          `[api] fallback failed (${errorSummary(fallbackErr)}) after primary failure (${errorSummary(primaryErr)}); retrying primary after backoff`,
691        )
692        const retryAfter = retryDelayMs(primaryErr)
693        console.log(
694          `[runner] backing off ${Math.ceil(retryAfter / 1000)}s (until ${formatRetryAt(retryAfter)}) before retrying primary...`,
695        )
696        await sleep(retryAfter)
697      }
698    }
699  }
700}
701
702export const __completionTest = {
703  consumeCompletionStream,
704}
Configure Feed

Configure Feed