fix: handle streaming cache usage · lyric.tngl.sh/mistermorph@765cfaf

+4

assets/config/config.example.yaml

··· 34 34 # - Go duration strings such as "5m", "1h", "24h" 35 35 # The runtime maps this to provider-supported cache buckets automatically. 36 36 cache_ttl: "short" 37 + # Optional prefix added to generated prompt_cache_key values for providers that support it. 38 + # Change this value to force a new cache group during testing. 39 + cache_key_prefix: "" 37 40 # Per-LLM HTTP request timeout (0 uses provider default). 38 41 request_timeout: "90s" 39 42 # Optional default temperature. If empty/unset, do not call uniai.WithTemperature(...). ··· 67 70 # cheap: 68 71 # model: "gpt-4.1-mini" 69 72 # cache_ttl: "long" 73 + # cache_key_prefix: "cheap-test" 70 74 # reasoning: 71 75 # provider: xai 72 76 # model: "grok-4.1-fast-reasoning"

+2

docs/configuration.md

··· 255 255 - `MISTER_MORPH_LLM_ENDPOINT` 256 256 - `MISTER_MORPH_LLM_MODEL` 257 257 - `MISTER_MORPH_LLM_API_KEY` 258 + - `MISTER_MORPH_LLM_CACHE_KEY_PREFIX` 258 259 - `MISTER_MORPH_LLM_REQUEST_TIMEOUT` 259 260 - `MISTER_MORPH_LOGGING_LEVEL` 260 261 - `MISTER_MORPH_LOGGING_FORMAT` ··· 281 282 - Azure uses `llm.azure.deployment`. 282 283 - Bedrock uses `llm.bedrock.*`. 283 284 - `llm.cache_ttl` controls cache intent across providers. Supported values are `off`, `short`, `long`, and Go duration strings such as `5m`, `1h`, and `24h`. The runtime maps this to each provider's supported cache buckets. 285 + - `llm.cache_key_prefix` is optional and defaults to empty. For providers that support `prompt_cache_key`, the runtime prepends it to the generated key so changing the value forces a new cache group. 284 286 - `llm.tools_emulation_mode` controls tool-call emulation for models without native tool calling. 285 287 - `llm.profiles` defines named profile overrides. 286 288 - `llm.routes` routes semantic purposes such as `main_loop`, `addressing`, `heartbeat`, `plan_create`, and `memory_draft`.

+1

internal/configdefaults/defaults.go

··· 18 18 v.SetDefault("llm.model", "") 19 19 v.SetDefault("llm.api_key", "") 20 20 v.SetDefault("llm.cache_ttl", "short") 21 + v.SetDefault("llm.cache_key_prefix", "") 21 22 v.SetDefault("llm.request_timeout", 90*time.Second) 22 23 v.SetDefault("llm.tools_emulation_mode", "off") 23 24 v.SetDefault("llm.cloudflare.account_id", "")

+3

internal/llmutil/llmutil.go

··· 26 26 Model string `config:"llm.model"` 27 27 Headers map[string]string 28 28 CacheTTL string `config:"llm.cache_ttl"` 29 + CacheKeyPrefix string `config:"llm.cache_key_prefix"` 29 30 AzureDeployment string `config:"llm.azure.deployment"` 30 31 RequestTimeoutRaw string `config:"llm.request_timeout"` 31 32 ToolsEmulationMode string `config:"llm.tools_emulation_mode"` ··· 56 57 Model: strings.TrimSpace(r.GetString("llm.model")), 57 58 Headers: loadStringMapKeyFromReader(r, "llm.headers"), 58 59 CacheTTL: strings.TrimSpace(r.GetString("llm.cache_ttl")), 60 + CacheKeyPrefix: strings.TrimSpace(r.GetString("llm.cache_key_prefix")), 59 61 AzureDeployment: strings.TrimSpace(r.GetString("llm.azure.deployment")), 60 62 RequestTimeoutRaw: strings.TrimSpace(r.GetString("llm.request_timeout")), 61 63 ToolsEmulationMode: strings.TrimSpace(r.GetString("llm.tools_emulation_mode")), ··· 172 174 Pricing: pricing, 173 175 RequestTimeout: cfg.RequestTimeout, 174 176 CacheTTL: strings.TrimSpace(values.CacheTTL), 177 + CacheKeyPrefix: strings.TrimSpace(values.CacheKeyPrefix), 175 178 ToolsEmulationMode: toolsEmulationMode, 176 179 Temperature: temperature, 177 180 ReasoningEffort: reasoningEffort,

+21 -8

internal/llmutil/llmutil_test.go

··· 568 568 v.Set("llm.api_key", "base-key") 569 569 v.Set("llm.model", "gpt-5.2") 570 570 v.Set("llm.cache_ttl", "short") 571 + v.Set("llm.cache_key_prefix", "base-cache") 571 572 v.Set("llm.request_timeout", "90s") 572 573 v.Set("llm.profiles", map[string]any{ 573 574 "cheap": map[string]any{ 574 - "model": "gpt-4.1-mini", 575 - "temperature": "0.2", 576 - "cache_ttl": "long", 575 + "model": "gpt-4.1-mini", 576 + "temperature": "0.2", 577 + "cache_ttl": "long", 578 + "cache_key_prefix": "cheap-cache", 577 579 }, 578 580 "reasoning": map[string]any{ 579 581 "provider": "xai", ··· 602 604 if values.CacheTTL != "short" { 603 605 t.Fatalf("cache_ttl = %q, want short", values.CacheTTL) 604 606 } 607 + if values.CacheKeyPrefix != "base-cache" { 608 + t.Fatalf("cache_key_prefix = %q, want base-cache", values.CacheKeyPrefix) 609 + } 605 610 if values.Profiles["cheap"].CacheTTL != "long" { 606 611 t.Fatalf("cheap cache_ttl = %q, want long", values.Profiles["cheap"].CacheTTL) 612 + } 613 + if values.Profiles["cheap"].CacheKeyPrefix != "cheap-cache" { 614 + t.Fatalf("cheap cache_key_prefix = %q, want cheap-cache", values.Profiles["cheap"].CacheKeyPrefix) 607 615 } 608 616 if values.Profiles["reasoning"].ReasoningEffortRaw != "high" { 609 617 t.Fatalf("reasoning effort = %q, want high", values.Profiles["reasoning"].ReasoningEffortRaw) ··· 627 635 628 636 func TestResolveProfile_AppliesCacheTTLOverrides(t *testing.T) { 629 637 values := RuntimeValues{ 630 - Provider: "openai_resp", 631 - Model: "gpt-5.2", 632 - CacheTTL: "short", 638 + Provider: "openai_resp", 639 + Model: "gpt-5.2", 640 + CacheTTL: "short", 641 + CacheKeyPrefix: "base-cache", 633 642 Profiles: map[string]ProfileConfig{ 634 643 "cheap": { 635 - Model: "gpt-4.1-mini", 636 - CacheTTL: "long", 644 + Model: "gpt-4.1-mini", 645 + CacheTTL: "long", 646 + CacheKeyPrefix: "cheap-cache", 637 647 }, 638 648 }, 639 649 } ··· 644 654 } 645 655 if resolved.Values.CacheTTL != "long" { 646 656 t.Fatalf("resolved cache_ttl = %q, want long", resolved.Values.CacheTTL) 657 + } 658 + if resolved.Values.CacheKeyPrefix != "cheap-cache" { 659 + t.Fatalf("resolved cache_key_prefix = %q, want cheap-cache", resolved.Values.CacheKeyPrefix) 647 660 } 648 661 if resolved.ClientConfig.Model != "gpt-4.1-mini" { 649 662 t.Fatalf("resolved model = %q, want gpt-4.1-mini", resolved.ClientConfig.Model)

+3

internal/llmutil/routes.go

··· 26 26 Model string `mapstructure:"model"` 27 27 Headers map[string]string `mapstructure:"headers"` 28 28 CacheTTL string `mapstructure:"cache_ttl"` 29 + CacheKeyPrefix string `mapstructure:"cache_key_prefix"` 29 30 RequestTimeoutRaw string `mapstructure:"request_timeout"` 30 31 ToolsEmulationMode string `mapstructure:"tools_emulation_mode"` 31 32 TemperatureRaw string `mapstructure:"temperature"` ··· 298 299 cfg.Model = strings.TrimSpace(cfg.Model) 299 300 cfg.Headers = cloneStringMap(cfg.Headers) 300 301 cfg.CacheTTL = strings.TrimSpace(cfg.CacheTTL) 302 + cfg.CacheKeyPrefix = strings.TrimSpace(cfg.CacheKeyPrefix) 301 303 cfg.RequestTimeoutRaw = strings.TrimSpace(cfg.RequestTimeoutRaw) 302 304 cfg.ToolsEmulationMode = strings.TrimSpace(cfg.ToolsEmulationMode) 303 305 cfg.TemperatureRaw = strings.TrimSpace(cfg.TemperatureRaw) ··· 392 394 applyStringOverride(&out.Model, override.Model) 393 395 out.Headers = mergeStringMaps(out.Headers, override.Headers) 394 396 applyStringOverride(&out.CacheTTL, override.CacheTTL) 397 + applyStringOverride(&out.CacheKeyPrefix, override.CacheKeyPrefix) 395 398 applyStringOverride(&out.RequestTimeoutRaw, override.RequestTimeoutRaw) 396 399 applyStringOverride(&out.ToolsEmulationMode, override.ToolsEmulationMode) 397 400 applyStringOverride(&out.TemperatureRaw, override.TemperatureRaw)

+141 -45

providers/uniai/client.go

··· 6 6 "encoding/base64" 7 7 "encoding/json" 8 8 "fmt" 9 + "reflect" 9 10 "strconv" 10 11 "strings" 11 12 "time" ··· 29 30 ReasoningEffort string 30 31 ReasoningBudget *int 31 32 CacheTTL string 33 + CacheKeyPrefix string 32 34 33 35 ToolsEmulationMode string 34 36 AzureAPIKey string ··· 54 56 reasoningEffort string 55 57 reasoningBudget *int 56 58 cacheTTL string 59 + cacheKeyPrefix string 57 60 toolsEmulationMode uniaiapi.ToolsEmulationMode 58 61 client *uniaiapi.Client 59 62 } ··· 112 115 reasoningEffort: strings.ToLower(strings.TrimSpace(cfg.ReasoningEffort)), 113 116 reasoningBudget: cloneInt(cfg.ReasoningBudget), 114 117 cacheTTL: strings.TrimSpace(cfg.CacheTTL), 118 + cacheKeyPrefix: strings.TrimSpace(cfg.CacheKeyPrefix), 115 119 toolsEmulationMode: normalizeToolsEmulationMode(cfg.ToolsEmulationMode), 116 120 client: uniaiapi.New(uCfg), 117 121 } ··· 124 128 ctx, cancel = context.WithTimeout(ctx, c.requestTimeout) 125 129 defer cancel() 126 130 } 127 - opts := buildChatOptions(req, c.provider, c.model, c.cacheTTL, req.ForceJSON, c.toolsEmulationMode, c.temperature, c.reasoningEffort, c.reasoningBudget) 131 + opts := buildChatOptions(req, c.provider, c.model, c.cacheTTL, c.cacheKeyPrefix, req.ForceJSON, c.toolsEmulationMode, c.temperature, c.reasoningEffort, c.reasoningBudget) 128 132 resp, err := c.client.Chat(ctx, opts...) 129 133 if err != nil { 130 134 c.emitChatError(req.DebugFn, err, req.ForceJSON, 1) 131 135 } 132 136 if err != nil && req.ForceJSON && shouldRetryWithoutResponseFormat(err) { 133 - opts = buildChatOptions(req, c.provider, c.model, c.cacheTTL, false, c.toolsEmulationMode, c.temperature, c.reasoningEffort, c.reasoningBudget) 137 + opts = buildChatOptions(req, c.provider, c.model, c.cacheTTL, c.cacheKeyPrefix, false, c.toolsEmulationMode, c.temperature, c.reasoningEffort, c.reasoningBudget) 134 138 resp, err = c.client.Chat(ctx, opts...) 135 139 if err != nil { 136 140 c.emitChatError(req.DebugFn, err, false, 2) ··· 168 172 return strings.EqualFold(strings.TrimSpace(provider), "gemini") 169 173 } 170 174 171 - func buildChatOptions(req llm.Request, provider string, defaultModel string, cacheTTL string, forceJSON bool, toolsEmulationMode uniaiapi.ToolsEmulationMode, defaultTemperature *float64, defaultReasoningEffort string, defaultReasoningBudget *int) []uniaiapi.ChatOption { 175 + func buildChatOptions(req llm.Request, provider string, defaultModel string, cacheTTL string, cacheKeyPrefix string, forceJSON bool, toolsEmulationMode uniaiapi.ToolsEmulationMode, defaultTemperature *float64, defaultReasoningEffort string, defaultReasoningBudget *int) []uniaiapi.ChatOption { 172 176 req = adaptRequestForProvider(req, provider) 173 177 msgs := make([]uniaiapi.Message, len(req.Messages)) 174 178 for i, m := range req.Messages { ··· 261 265 opts = append(opts, uniaiapi.WithReasoningBudgetTokens(*defaultReasoningBudget)) 262 266 } 263 267 264 - applyPromptCacheOptions(provider, firstNonEmpty(req.Model, defaultModel), cacheTTL, req, openAIOptions, azureOptions) 268 + applyPromptCacheOptions(provider, firstNonEmpty(req.Model, defaultModel), cacheTTL, cacheKeyPrefix, req, openAIOptions, azureOptions) 265 269 if forceJSON && len(req.Tools) == 0 { 266 270 openAIOptions["response_format"] = "json_object" 267 271 if strings.EqualFold(strings.TrimSpace(provider), "azure") { ··· 294 298 } 295 299 if ev.Usage != nil { 296 300 usage := toLLMUsage(*ev.Usage) 301 + if enriched, changed := enrichUsageFromOpenAICompatibleRaw(usage, streamEventRaw(ev)); changed { 302 + usage = enriched 303 + } 297 304 streamEvent.Usage = &usage 298 305 } 299 306 return req.OnStream(streamEvent) ··· 376 383 RawJSON() string 377 384 } 378 385 386 + type openAICompatibleUsagePayload struct { 387 + CachedTokens *int `json:"cached_tokens"` 388 + CacheReadInputTokens *int `json:"cache_read_input_tokens"` 389 + CacheCreationInputTokens *int `json:"cache_creation_input_tokens"` 390 + CacheCreation map[string]int `json:"cache_creation"` 391 + PromptTokensDetails struct { 392 + CachedTokens *int `json:"cached_tokens"` 393 + CacheReadInputTokens *int `json:"cache_read_input_tokens"` 394 + CacheCreationInputTokens *int `json:"cache_creation_input_tokens"` 395 + CacheCreation map[string]int `json:"cache_creation"` 396 + } `json:"prompt_tokens_details"` 397 + } 398 + 379 399 func enrichUsageFromOpenAICompatibleRaw(usage llm.Usage, raw any) (llm.Usage, bool) { 380 - rawJSON := rawJSONFromOpenAICompatibleRaw(raw) 381 - if strings.TrimSpace(rawJSON) == "" { 382 - return usage, false 400 + changed := false 401 + for _, rawJSON := range rawJSONCandidatesFromOpenAICompatibleRaw(raw) { 402 + payload, ok := parseOpenAICompatibleUsagePayload(rawJSON) 403 + if !ok { 404 + continue 405 + } 406 + var payloadChanged bool 407 + usage, payloadChanged = applyOpenAICompatibleUsagePayload(usage, payload) 408 + changed = changed || payloadChanged 383 409 } 410 + return usage, changed 411 + } 384 412 385 - var payload struct { 386 - Usage struct { 387 - CachedTokens *int `json:"cached_tokens"` 388 - CacheReadInputTokens *int `json:"cache_read_input_tokens"` 389 - CacheCreationInputTokens *int `json:"cache_creation_input_tokens"` 390 - CacheCreation map[string]int `json:"cache_creation"` 391 - PromptTokensDetails struct { 392 - CachedTokens *int `json:"cached_tokens"` 393 - CacheReadInputTokens *int `json:"cache_read_input_tokens"` 394 - CacheCreationInputTokens *int `json:"cache_creation_input_tokens"` 395 - CacheCreation map[string]int `json:"cache_creation"` 396 - } `json:"prompt_tokens_details"` 397 - } `json:"usage"` 398 - } 399 - if err := json.Unmarshal([]byte(rawJSON), &payload); err != nil { 400 - return usage, false 401 - } 402 - 413 + func applyOpenAICompatibleUsagePayload(usage llm.Usage, payload openAICompatibleUsagePayload) (llm.Usage, bool) { 403 414 changed := false 404 415 if cached := firstPositiveInt( 405 - payload.Usage.PromptTokensDetails.CacheReadInputTokens, 406 - payload.Usage.PromptTokensDetails.CachedTokens, 407 - payload.Usage.CacheReadInputTokens, 408 - payload.Usage.CachedTokens, 416 + payload.PromptTokensDetails.CacheReadInputTokens, 417 + payload.PromptTokensDetails.CachedTokens, 418 + payload.CacheReadInputTokens, 419 + payload.CachedTokens, 409 420 ); cached > 0 && usage.Cache.CachedInputTokens != cached { 410 421 usage.Cache.CachedInputTokens = cached 411 422 changed = true 412 423 } 413 424 if created := firstPositiveInt( 414 - payload.Usage.PromptTokensDetails.CacheCreationInputTokens, 415 - payload.Usage.CacheCreationInputTokens, 425 + payload.PromptTokensDetails.CacheCreationInputTokens, 426 + payload.CacheCreationInputTokens, 416 427 ); created > 0 && usage.Cache.CacheCreationInputTokens != created { 417 428 usage.Cache.CacheCreationInputTokens = created 418 429 changed = true 419 430 } 420 431 var detailChanged bool 421 - usage.Cache.Details, detailChanged = mergePositiveCacheDetails(usage.Cache.Details, payload.Usage.PromptTokensDetails.CacheCreation) 432 + usage.Cache.Details, detailChanged = mergePositiveCacheDetails(usage.Cache.Details, payload.PromptTokensDetails.CacheCreation) 422 433 changed = changed || detailChanged 423 - usage.Cache.Details, detailChanged = mergePositiveCacheDetails(usage.Cache.Details, payload.Usage.CacheCreation) 434 + usage.Cache.Details, detailChanged = mergePositiveCacheDetails(usage.Cache.Details, payload.CacheCreation) 424 435 changed = changed || detailChanged 425 436 return usage, changed 426 437 } 427 438 428 - func rawJSONFromOpenAICompatibleRaw(raw any) string { 439 + func parseOpenAICompatibleUsagePayload(rawJSON string) (openAICompatibleUsagePayload, bool) { 440 + rawJSON = strings.TrimSpace(rawJSON) 441 + if rawJSON == "" { 442 + return openAICompatibleUsagePayload{}, false 443 + } 444 + var response struct { 445 + Usage openAICompatibleUsagePayload `json:"usage"` 446 + } 447 + if err := json.Unmarshal([]byte(rawJSON), &response); err == nil && response.Usage.hasCacheUsage() { 448 + return response.Usage, true 449 + } 450 + var usage openAICompatibleUsagePayload 451 + if err := json.Unmarshal([]byte(rawJSON), &usage); err != nil || !usage.hasCacheUsage() { 452 + return openAICompatibleUsagePayload{}, false 453 + } 454 + return usage, true 455 + } 456 + 457 + func (p openAICompatibleUsagePayload) hasCacheUsage() bool { 458 + return p.CachedTokens != nil || 459 + p.CacheReadInputTokens != nil || 460 + p.CacheCreationInputTokens != nil || 461 + len(p.CacheCreation) > 0 || 462 + p.PromptTokensDetails.CachedTokens != nil || 463 + p.PromptTokensDetails.CacheReadInputTokens != nil || 464 + p.PromptTokensDetails.CacheCreationInputTokens != nil || 465 + len(p.PromptTokensDetails.CacheCreation) > 0 466 + } 467 + 468 + func rawJSONCandidatesFromOpenAICompatibleRaw(raw any) []string { 429 469 if raw == nil { 430 - return "" 470 + return nil 431 471 } 472 + var out []string 473 + out = append(out, rawJSONCandidatesFromSequence(raw)...) 432 474 if v, ok := raw.(rawJSONProvider); ok { 433 - return strings.TrimSpace(v.RawJSON()) 475 + if rawJSON := strings.TrimSpace(v.RawJSON()); rawJSON != "" { 476 + out = append(out, rawJSON) 477 + } 478 + } 479 + if len(out) == 0 { 480 + b, err := json.Marshal(raw) 481 + if err == nil { 482 + if rawJSON := strings.TrimSpace(string(b)); rawJSON != "" { 483 + out = append(out, rawJSON) 484 + } 485 + } 486 + } 487 + return out 488 + } 489 + 490 + func rawJSONCandidatesFromSequence(raw any) []string { 491 + v := reflect.ValueOf(raw) 492 + for v.IsValid() && v.Kind() == reflect.Pointer { 493 + if v.IsNil() { 494 + return nil 495 + } 496 + v = v.Elem() 497 + } 498 + if !v.IsValid() || (v.Kind() != reflect.Slice && v.Kind() != reflect.Array) { 499 + return nil 500 + } 501 + if v.Type().Elem().Kind() == reflect.Uint8 { 502 + return nil 434 503 } 435 - b, err := json.Marshal(raw) 436 - if err != nil { 437 - return "" 504 + out := make([]string, 0, v.Len()) 505 + for i := v.Len() - 1; i >= 0; i-- { 506 + elem := v.Index(i) 507 + if !elem.CanInterface() { 508 + continue 509 + } 510 + out = append(out, rawJSONCandidatesFromOpenAICompatibleRaw(elem.Interface())...) 438 511 } 439 - return strings.TrimSpace(string(b)) 512 + return out 513 + } 514 + 515 + func streamEventRaw(event any) any { 516 + v := reflect.ValueOf(event) 517 + for v.IsValid() && v.Kind() == reflect.Pointer { 518 + if v.IsNil() { 519 + return nil 520 + } 521 + v = v.Elem() 522 + } 523 + if !v.IsValid() || v.Kind() != reflect.Struct { 524 + return nil 525 + } 526 + field := v.FieldByName("Raw") 527 + if !field.IsValid() || !field.CanInterface() { 528 + return nil 529 + } 530 + return field.Interface() 440 531 } 441 532 442 533 func firstPositiveInt(values ...*int) int { ··· 584 675 return out 585 676 } 586 677 587 - func applyPromptCacheOptions(provider, model, cacheTTL string, req llm.Request, openAIOptions, azureOptions structs.JSONMap) { 678 + func applyPromptCacheOptions(provider, model, cacheTTL, cacheKeyPrefix string, req llm.Request, openAIOptions, azureOptions structs.JSONMap) { 588 679 retention := promptCacheRetentionForProvider(provider, cacheTTL) 589 - key := derivedPromptCacheKey(provider, model, req) 680 + key := derivedPromptCacheKey(provider, model, cacheKeyPrefix, req) 590 681 if key == "" && retention == "" { 591 682 return 592 683 } ··· 794 885 return "1h" 795 886 } 796 887 797 - func derivedPromptCacheKey(provider, model string, req llm.Request) string { 888 + func derivedPromptCacheKey(provider, model, cacheKeyPrefix string, req llm.Request) string { 798 889 switch strings.ToLower(strings.TrimSpace(provider)) { 799 890 case "openai", "openai_resp", "azure": 800 891 default: 801 892 return "" 802 893 } 894 + cacheKeyPrefix = strings.TrimSpace(cacheKeyPrefix) 803 895 804 896 stable := promptCacheStablePayload{ 805 897 Model: strings.TrimSpace(model), ··· 826 918 }) 827 919 } 828 920 if len(stable.Messages) == 0 && len(stable.Tools) == 0 { 829 - return "" 921 + return cacheKeyPrefix 830 922 } 831 923 data, err := json.Marshal(stable) 832 924 if err != nil { 833 925 return "" 834 926 } 835 927 sum := sha256.Sum256(data) 836 - return "mm-" + base64.RawURLEncoding.EncodeToString(sum[:12]) 928 + key := "mm-" + base64.RawURLEncoding.EncodeToString(sum[:12]) 929 + if cacheKeyPrefix == "" { 930 + return key 931 + } 932 + return cacheKeyPrefix + "-" + key 837 933 } 838 934 839 935 type promptCacheStablePayload struct {

+134 -26

providers/uniai/client_test.go

··· 1 1 package uniai 2 2 3 3 import ( 4 + "encoding/json" 4 5 "math" 5 6 "reflect" 7 + "strings" 6 8 "testing" 7 9 10 + openai "github.com/openai/openai-go/v3" 8 11 "github.com/quailyquaily/mistermorph/llm" 9 12 uniaiapi "github.com/quailyquaily/uniai" 10 13 uniaichat "github.com/quailyquaily/uniai/chat" ··· 19 22 20 23 opts := append( 21 24 []uniaiapi.ChatOption{uniaiapi.WithMessages(uniaiapi.User("old"))}, 22 - buildChatOptions(req, "", "", "", false, uniaiapi.ToolsEmulationOff, nil, "", nil)..., 25 + buildChatOptions(req, "", "", "", "", false, uniaiapi.ToolsEmulationOff, nil, "", nil)..., 23 26 ) 24 27 25 28 built, err := uniaichat.BuildRequest(opts...) ··· 46 49 }, 47 50 } 48 51 49 - opts := buildChatOptions(req, "", "", "", false, uniaiapi.ToolsEmulationOff, nil, "", nil) 52 + opts := buildChatOptions(req, "", "", "", "", false, uniaiapi.ToolsEmulationOff, nil, "", nil) 50 53 built, err := uniaichat.BuildRequest(opts...) 51 54 if err != nil { 52 55 t.Fatalf("build request: %v", err) ··· 72 75 }, 73 76 } 74 77 75 - opts := buildChatOptions(req, "", "", "", false, uniaiapi.ToolsEmulationOff, nil, "", nil) 78 + opts := buildChatOptions(req, "", "", "", "", false, uniaiapi.ToolsEmulationOff, nil, "", nil) 76 79 built, err := uniaichat.BuildRequest(opts...) 77 80 if err != nil { 78 81 t.Fatalf("build request: %v", err) ··· 100 103 }, 101 104 } 102 105 103 - opts := buildChatOptions(req, "", "", "", false, uniaiapi.ToolsEmulationOff, nil, "", nil) 106 + opts := buildChatOptions(req, "", "", "", "", false, uniaiapi.ToolsEmulationOff, nil, "", nil) 104 107 built, err := uniaichat.BuildRequest(opts...) 105 108 if err != nil { 106 109 t.Fatalf("build request: %v", err) ··· 143 146 return nil 144 147 }, 145 148 } 146 - opts := buildChatOptions(req, "", "", "", false, uniaiapi.ToolsEmulationOff, nil, "", nil) 149 + opts := buildChatOptions(req, "", "", "", "", false, uniaiapi.ToolsEmulationOff, nil, "", nil) 147 150 148 151 built, err := uniaichat.BuildRequest(opts...) 149 152 if err != nil { ··· 266 269 } 267 270 } 268 271 272 + func TestEnrichUsageFromOpenAICompatibleRawReadsUsageObject(t *testing.T) { 273 + usage := llm.Usage{ 274 + InputTokens: 2648, 275 + OutputTokens: 38, 276 + TotalTokens: 2686, 277 + } 278 + raw := testRawJSON(`{ 279 + "prompt_tokens": 2648, 280 + "completion_tokens": 38, 281 + "total_tokens": 2686, 282 + "prompt_tokens_details": { 283 + "cache_read_input_tokens": 2390, 284 + "cache_creation_input_tokens": 255 285 + } 286 + }`) 287 + 288 + got, changed := enrichUsageFromOpenAICompatibleRaw(usage, raw) 289 + if !changed { 290 + t.Fatalf("changed = false, want true") 291 + } 292 + if got.Cache.CachedInputTokens != 2390 { 293 + t.Fatalf("cached_input_tokens = %d, want 2390", got.Cache.CachedInputTokens) 294 + } 295 + if got.Cache.CacheCreationInputTokens != 255 { 296 + t.Fatalf("cache_creation_input_tokens = %d, want 255", got.Cache.CacheCreationInputTokens) 297 + } 298 + } 299 + 300 + func TestEnrichUsageFromOpenAICompatibleRawReadsLastStreamChunk(t *testing.T) { 301 + usage := llm.Usage{ 302 + InputTokens: 2648, 303 + OutputTokens: 38, 304 + TotalTokens: 2686, 305 + } 306 + rawChunks := []testRawJSON{ 307 + testRawJSON(`{"choices":[{"delta":{"content":"hello"}}]}`), 308 + testRawJSON(`{"usage":{"prompt_tokens_details":{"cache_read_input_tokens":2390,"cache_creation_input_tokens":255}}}`), 309 + } 310 + 311 + got, changed := enrichUsageFromOpenAICompatibleRaw(usage, rawChunks) 312 + if !changed { 313 + t.Fatalf("changed = false, want true") 314 + } 315 + if got.Cache.CachedInputTokens != 2390 { 316 + t.Fatalf("cached_input_tokens = %d, want 2390", got.Cache.CachedInputTokens) 317 + } 318 + if got.Cache.CacheCreationInputTokens != 255 { 319 + t.Fatalf("cache_creation_input_tokens = %d, want 255", got.Cache.CacheCreationInputTokens) 320 + } 321 + } 322 + 323 + func TestEnrichUsageFromOpenAICompatibleRawReadsOpenAIStreamChunks(t *testing.T) { 324 + usage := llm.Usage{ 325 + InputTokens: 2648, 326 + OutputTokens: 38, 327 + TotalTokens: 2686, 328 + } 329 + chunks := make([]openai.ChatCompletionChunk, 0, 2) 330 + for _, raw := range []string{ 331 + `{"id":"chatcmpl-1","object":"chat.completion.chunk","created":1,"model":"anthropic/claude-sonnet-4-6","choices":[{"index":0,"delta":{"content":"hello"}}]}`, 332 + `{"id":"chatcmpl-1","object":"chat.completion.chunk","created":1,"model":"anthropic/claude-sonnet-4-6","choices":[],"usage":{"prompt_tokens":2648,"completion_tokens":38,"total_tokens":2686,"prompt_tokens_details":{"cached_tokens":2390,"cache_read_input_tokens":2390,"cache_creation_input_tokens":255}}}`, 333 + } { 334 + var chunk openai.ChatCompletionChunk 335 + if err := json.Unmarshal([]byte(raw), &chunk); err != nil { 336 + t.Fatalf("unmarshal chunk: %v", err) 337 + } 338 + chunks = append(chunks, chunk) 339 + } 340 + 341 + got, changed := enrichUsageFromOpenAICompatibleRaw(usage, chunks) 342 + if !changed { 343 + t.Fatalf("changed = false, want true") 344 + } 345 + if got.Cache.CachedInputTokens != 2390 { 346 + t.Fatalf("cached_input_tokens = %d, want 2390", got.Cache.CachedInputTokens) 347 + } 348 + if got.Cache.CacheCreationInputTokens != 255 { 349 + t.Fatalf("cache_creation_input_tokens = %d, want 255", got.Cache.CacheCreationInputTokens) 350 + } 351 + } 352 + 269 353 func TestRecalculateUsageCostIncludesCacheCreation(t *testing.T) { 270 354 cachedInputRate := 0.30 271 355 cacheCreationRate := 3.75 ··· 308 392 Messages: []llm.Message{{Role: "user", Content: "hello"}}, 309 393 OnStream: func(llm.StreamEvent) error { return nil }, 310 394 } 311 - opts := buildChatOptions(req, "gemini", "", "", false, uniaiapi.ToolsEmulationOff, nil, "", nil) 395 + opts := buildChatOptions(req, "gemini", "", "", "", false, uniaiapi.ToolsEmulationOff, nil, "", nil) 312 396 313 397 built, err := uniaichat.BuildRequest(opts...) 314 398 if err != nil { ··· 324 408 Messages: []llm.Message{{Role: "user", Content: "hello"}}, 325 409 OnStream: func(llm.StreamEvent) error { return nil }, 326 410 } 327 - opts := buildChatOptions(req, "anthropic", "", "", false, uniaiapi.ToolsEmulationOff, nil, "", nil) 411 + opts := buildChatOptions(req, "anthropic", "", "", "", false, uniaiapi.ToolsEmulationOff, nil, "", nil) 328 412 329 413 built, err := uniaichat.BuildRequest(opts...) 330 414 if err != nil { ··· 340 424 Messages: []llm.Message{{Role: "user", Content: "hello"}}, 341 425 OnStream: func(llm.StreamEvent) error { return nil }, 342 426 } 343 - opts := buildChatOptions(req, "cloudflare", "", "", false, uniaiapi.ToolsEmulationOff, nil, "", nil) 427 + opts := buildChatOptions(req, "cloudflare", "", "", "", false, uniaiapi.ToolsEmulationOff, nil, "", nil) 344 428 345 429 built, err := uniaichat.BuildRequest(opts...) 346 430 if err != nil { ··· 360 444 gotPayload = payload 361 445 }, 362 446 } 363 - opts := buildChatOptions(req, "", "", "", false, uniaiapi.ToolsEmulationOff, nil, "", nil) 447 + opts := buildChatOptions(req, "", "", "", "", false, uniaiapi.ToolsEmulationOff, nil, "", nil) 364 448 365 449 built, err := uniaichat.BuildRequest(opts...) 366 450 if err != nil { ··· 379 463 req := llm.Request{ 380 464 Messages: []llm.Message{{Role: "user", Content: "hello"}}, 381 465 } 382 - opts := buildChatOptions(req, "", "", "", true, uniaiapi.ToolsEmulationOff, nil, "", nil) 466 + opts := buildChatOptions(req, "", "", "", "", true, uniaiapi.ToolsEmulationOff, nil, "", nil) 383 467 384 468 built, err := uniaichat.BuildRequest(opts...) 385 469 if err != nil { ··· 402 486 ParametersJSON: `{"type":"object","properties":{},"additionalProperties":false}`, 403 487 }}, 404 488 } 405 - opts := buildChatOptions(req, "", "", "", true, uniaiapi.ToolsEmulationOff, nil, "", nil) 489 + opts := buildChatOptions(req, "", "", "", "", true, uniaiapi.ToolsEmulationOff, nil, "", nil) 406 490 407 491 built, err := uniaichat.BuildRequest(opts...) 408 492 if err != nil { ··· 426 510 {Role: "user", Content: "hello"}, 427 511 }, 428 512 } 429 - opts := buildChatOptions(req, "openai_resp", "gpt-5.4", "short", true, uniaiapi.ToolsEmulationOff, nil, "", nil) 513 + opts := buildChatOptions(req, "openai_resp", "gpt-5.4", "short", "cache-test", true, uniaiapi.ToolsEmulationOff, nil, "", nil) 430 514 431 515 built, err := uniaichat.BuildRequest(opts...) 432 516 if err != nil { ··· 435 519 if built.Options.OpenAI == nil { 436 520 t.Fatal("expected openai options to be set") 437 521 } 438 - if got := built.Options.OpenAI["prompt_cache_key"]; got == "" || got == nil { 439 - t.Fatalf("prompt_cache_key = %#v, want non-empty derived key", got) 522 + if got, _ := built.Options.OpenAI["prompt_cache_key"].(string); !strings.HasPrefix(got, "cache-test-mm-") { 523 + t.Fatalf("prompt_cache_key = %#v, want cache-test-prefixed derived key", got) 440 524 } 441 525 if got := built.Options.OpenAI["prompt_cache_retention"]; got != "in_memory" { 442 526 t.Fatalf("prompt_cache_retention = %#v, want in_memory", got) ··· 446 530 } 447 531 } 448 532 533 + func TestBuildChatOptionsUsesPromptCacheKeyPrefixWithoutStablePayload(t *testing.T) { 534 + req := llm.Request{ 535 + Messages: []llm.Message{ 536 + {Role: "user", Content: "hello"}, 537 + }, 538 + } 539 + opts := buildChatOptions(req, "openai_resp", "gpt-5.4", "short", "manual-test", false, uniaiapi.ToolsEmulationOff, nil, "", nil) 540 + 541 + built, err := uniaichat.BuildRequest(opts...) 542 + if err != nil { 543 + t.Fatalf("build request: %v", err) 544 + } 545 + if built.Options.OpenAI == nil { 546 + t.Fatal("expected openai options to be set") 547 + } 548 + if got := built.Options.OpenAI["prompt_cache_key"]; got != "manual-test" { 549 + t.Fatalf("prompt_cache_key = %#v, want manual-test", got) 550 + } 551 + } 552 + 449 553 func TestBuildChatOptionsMapsPromptCacheOptionsForAzure(t *testing.T) { 450 554 req := llm.Request{ 451 555 Messages: []llm.Message{ ··· 453 557 {Role: "user", Content: "hello"}, 454 558 }, 455 559 } 456 - opts := buildChatOptions(req, "azure", "gpt-5.4", "long", true, uniaiapi.ToolsEmulationOff, nil, "", nil) 560 + opts := buildChatOptions(req, "azure", "gpt-5.4", "long", "", true, uniaiapi.ToolsEmulationOff, nil, "", nil) 457 561 458 562 built, err := uniaichat.BuildRequest(opts...) 459 563 if err != nil { ··· 477 581 req := llm.Request{ 478 582 Messages: []llm.Message{{Role: "user", Content: "hello"}}, 479 583 } 480 - opts := buildChatOptions(req, "", "", "", false, uniaiapi.ToolsEmulationOff, nil, "", nil) 584 + opts := buildChatOptions(req, "", "", "", "", false, uniaiapi.ToolsEmulationOff, nil, "", nil) 481 585 built, err := uniaichat.BuildRequest(opts...) 482 586 if err != nil { 483 587 t.Fatalf("build request: %v", err) ··· 493 597 } 494 598 temperature := 0.4 495 599 reasoningBudget := 8192 496 - opts := buildChatOptions(req, "", "", "", false, uniaiapi.ToolsEmulationOff, &temperature, "high", &reasoningBudget) 600 + opts := buildChatOptions(req, "", "", "", "", false, uniaiapi.ToolsEmulationOff, &temperature, "high", &reasoningBudget) 497 601 built, err := uniaichat.BuildRequest(opts...) 498 602 if err != nil { 499 603 t.Fatalf("build request: %v", err) ··· 514 618 Messages: []llm.Message{{Role: "user", Content: "hello"}}, 515 619 } 516 620 reasoningBudget := 8192 517 - opts := buildChatOptions(req, "openai_resp", "", "", false, uniaiapi.ToolsEmulationOff, nil, "high", &reasoningBudget) 621 + opts := buildChatOptions(req, "openai_resp", "", "", "", false, uniaiapi.ToolsEmulationOff, nil, "high", &reasoningBudget) 518 622 built, err := uniaichat.BuildRequest(opts...) 519 623 if err != nil { 520 624 t.Fatalf("build request: %v", err) ··· 533 637 Parameters: map[string]any{"temperature": 0.1}, 534 638 } 535 639 temperature := 0.4 536 - opts := buildChatOptions(req, "", "", "", false, uniaiapi.ToolsEmulationOff, &temperature, "", nil) 640 + opts := buildChatOptions(req, "", "", "", "", false, uniaiapi.ToolsEmulationOff, &temperature, "", nil) 537 641 built, err := uniaichat.BuildRequest(opts...) 538 642 if err != nil { 539 643 t.Fatalf("build request: %v", err) ··· 567 671 CacheControl: &llm.CacheControl{TTL: "1h"}, 568 672 }}, 569 673 } 570 - opts := buildChatOptions(req, "anthropic", "", "", false, uniaiapi.ToolsEmulationOff, nil, "", nil) 674 + opts := buildChatOptions(req, "anthropic", "", "", "", false, uniaiapi.ToolsEmulationOff, nil, "", nil) 571 675 572 676 built, err := uniaichat.BuildRequest(opts...) 573 677 if err != nil { ··· 602 706 }}, 603 707 } 604 708 605 - opts := buildChatOptions(req, "anthropic", "", "", false, uniaiapi.ToolsEmulationOff, nil, "", nil) 709 + opts := buildChatOptions(req, "anthropic", "", "", "", false, uniaiapi.ToolsEmulationOff, nil, "", nil) 606 710 built, err := uniaichat.BuildRequest(opts...) 607 711 if err != nil { 608 712 t.Fatalf("build request: %v", err) ··· 633 737 }}, 634 738 } 635 739 636 - opts := buildChatOptions(req, "openai_resp", "", "", false, uniaiapi.ToolsEmulationOff, nil, "", nil) 740 + opts := buildChatOptions(req, "openai_resp", "", "", "", false, uniaiapi.ToolsEmulationOff, nil, "", nil) 637 741 built, err := uniaichat.BuildRequest(opts...) 638 742 if err != nil { 639 743 t.Fatalf("build request: %v", err) ··· 674 778 }}, 675 779 } 676 780 677 - opts := buildChatOptions(req, "bedrock", "", "", false, uniaiapi.ToolsEmulationOff, nil, "", nil) 781 + opts := buildChatOptions(req, "bedrock", "", "", "", false, uniaiapi.ToolsEmulationOff, nil, "", nil) 678 782 built, err := uniaichat.BuildRequest(opts...) 679 783 if err != nil { 680 784 t.Fatalf("build request: %v", err) ··· 692 796 693 797 func TestNewStoresCacheTTLDefault(t *testing.T) { 694 798 client := New(Config{ 695 - Provider: "openai_resp", 696 - Model: "gpt-5.2", 697 - CacheTTL: "long", 799 + Provider: "openai_resp", 800 + Model: "gpt-5.2", 801 + CacheTTL: "long", 802 + CacheKeyPrefix: "test-prefix", 698 803 }) 699 804 700 805 if client.cacheTTL != "long" { 701 806 t.Fatalf("cacheTTL = %q, want long", client.cacheTTL) 807 + } 808 + if client.cacheKeyPrefix != "test-prefix" { 809 + t.Fatalf("cacheKeyPrefix = %q, want test-prefix", client.cacheKeyPrefix) 702 810 } 703 811 } 704 812

Configure Feed

Configure Feed