Normalize token usage once at the provider boundary

+8 -29

docs/PROVIDERS.md

··· 198 198 199 199 **Usage dict format:** 200 200 201 - Providers should build a normalized usage dict from their response: 202 - ```python 203 - usage_dict = { 204 - "input_tokens": 1500, # Required 205 - "output_tokens": 500, # Required 206 - "total_tokens": 2000, # Required 207 - "cached_tokens": 800, # Optional: cache hits 208 - "reasoning_tokens": 200, # Optional: thinking/reasoning tokens 209 - } 210 - ``` 201 + Providers normalize usage into the unified schema defined by `USAGE_KEYS` in `think/providers/shared.py`. Each provider's `_extract_usage()` is responsible for mapping API-specific field names to these canonical keys. `log_token_usage()` passes through known keys — it does **not** re-normalize. 211 202 212 - Provider-specific extraction examples: 213 203 ```python 214 - # OpenAI / OpenAI-compatible 215 204 usage_dict = { 216 - "input_tokens": response.usage.prompt_tokens, 217 - "output_tokens": response.usage.completion_tokens, 218 - "total_tokens": response.usage.total_tokens, 219 - } 220 - 221 - # Anthropic 222 - usage_dict = { 223 - "input_tokens": response.usage.input_tokens, 224 - "output_tokens": response.usage.output_tokens, 225 - "total_tokens": response.usage.input_tokens + response.usage.output_tokens, 226 - } 227 - 228 - # Google 229 - usage_dict = { 230 - "input_tokens": response.usage_metadata.prompt_token_count, 231 - "output_tokens": response.usage_metadata.candidates_token_count, 232 - "total_tokens": response.usage_metadata.total_token_count, 205 + "input_tokens": 1500, # Required 206 + "output_tokens": 500, # Required 207 + "total_tokens": 2000, # Required (computed if missing) 208 + "cached_tokens": 800, # Optional: cache hits 209 + "reasoning_tokens": 200, # Optional: thinking/reasoning tokens 210 + "cache_creation_tokens": 100, # Optional: cache creation cost 211 + "requests": 1, # Optional: request count 233 212 } 234 213 ``` 235 214

+54

tests/test_models.py

··· 688 688 entry = json.loads(log_file.read_text().strip()) 689 689 assert entry["usage"]["cached_tokens"] == 800 690 690 assert entry["usage"]["total_tokens"] == 1200 691 + 692 + 693 + def test_log_token_usage_passes_through_reasoning_tokens(tmp_path, monkeypatch): 694 + """reasoning_tokens from provider-normalized usage are preserved in log.""" 695 + import json 696 + 697 + from think.models import log_token_usage 698 + 699 + monkeypatch.setenv("JOURNAL_PATH", str(tmp_path)) 700 + 701 + # Normalized usage from Google provider (the bug: reasoning_tokens were dropped) 702 + log_token_usage( 703 + model="gemini-3-flash-preview", 704 + usage={ 705 + "input_tokens": 13319, 706 + "output_tokens": 969, 707 + "total_tokens": 37878, 708 + "reasoning_tokens": 23590, 709 + }, 710 + context="test", 711 + ) 712 + 713 + log_file = tmp_path / "tokens" / (__import__("time").strftime("%Y%m%d") + ".jsonl") 714 + entry = json.loads(log_file.read_text().strip()) 715 + assert entry["usage"]["reasoning_tokens"] == 23590 716 + assert entry["usage"]["total_tokens"] == 37878 717 + assert entry["usage"]["input_tokens"] == 13319 718 + assert entry["usage"]["output_tokens"] == 969 719 + 720 + 721 + def test_log_token_usage_passes_through_cache_creation_tokens(tmp_path, monkeypatch): 722 + """cache_creation_tokens from Anthropic provider are preserved in log.""" 723 + import json 724 + 725 + from think.models import log_token_usage 726 + 727 + monkeypatch.setenv("JOURNAL_PATH", str(tmp_path)) 728 + 729 + log_token_usage( 730 + model="claude-sonnet-4-5", 731 + usage={ 732 + "input_tokens": 5000, 733 + "output_tokens": 1000, 734 + "total_tokens": 6000, 735 + "cached_tokens": 3000, 736 + "cache_creation_tokens": 2000, 737 + }, 738 + context="test", 739 + ) 740 + 741 + log_file = tmp_path / "tokens" / (__import__("time").strftime("%Y%m%d") + ".jsonl") 742 + entry = json.loads(log_file.read_text().strip()) 743 + assert entry["usage"]["cache_creation_tokens"] == 2000 744 + assert entry["usage"]["cached_tokens"] == 3000

+19 -78

think/models.py

··· 521 521 ) -> None: 522 522 """Log token usage to journal with unified schema. 523 523 524 + Providers normalize usage into the unified schema (see USAGE_KEYS in 525 + shared.py) before returning GenerateResult. This function passes 526 + through those known keys, computes total_tokens when missing, and 527 + handles a few legacy field aliases from CLI backends. 528 + 524 529 Parameters 525 530 ---------- 526 531 model : str 527 532 Model name (e.g., "gpt-5", "gemini-2.5-flash") 528 - usage : dict or response object 529 - Usage data in provider-specific format, OR a Gemini response object. 530 - Dict formats supported: 531 - - OpenAI format: {input_tokens, output_tokens, total_tokens, 532 - details: {input: {cached_tokens}, output: {reasoning_tokens}}} 533 - - Gemini format: {prompt_token_count, candidates_token_count, 534 - cached_content_token_count, thoughts_token_count, total_token_count} 535 - - Unified format: {input_tokens, output_tokens, total_tokens, 536 - cached_tokens, reasoning_tokens, requests} 537 - Response objects: Gemini GenerateContentResponse with usage_metadata attribute 533 + usage : dict 534 + Normalized usage dict with keys from USAGE_KEYS. 538 535 context : str, optional 539 536 Context string (e.g., "module.function:123" or "muse.system.default"). 540 537 If None, auto-detects from call stack. ··· 544 541 type : str, optional 545 542 Token entry type (e.g., "generate", "cogitate"). 546 543 """ 544 + from think.providers.shared import USAGE_KEYS 545 + 547 546 try: 548 547 journal = get_journal() 549 548 550 - # Extract from Gemini response object if needed 551 - if hasattr(usage, "usage_metadata"): 552 - try: 553 - metadata = usage.usage_metadata 554 - usage = { 555 - "prompt_token_count": getattr(metadata, "prompt_token_count", 0), 556 - "candidates_token_count": getattr( 557 - metadata, "candidates_token_count", 0 558 - ), 559 - "cached_content_token_count": getattr( 560 - metadata, "cached_content_token_count", 0 561 - ), 562 - "thoughts_token_count": getattr( 563 - metadata, "thoughts_token_count", 0 564 - ), 565 - "total_token_count": getattr(metadata, "total_token_count", 0), 566 - } 567 - except Exception: 568 - return # Can't extract, fail silently 569 - 570 549 # Auto-detect calling context if not provided 571 550 if context is None: 572 551 frame = inspect.currentframe() ··· 589 568 590 569 context = f"{module_name}.{func_name}:{line_num}" 591 570 592 - # Normalize usage data to unified schema 571 + # Pass through known keys from the already-normalized usage dict. 593 572 normalized_usage: Dict[str, int] = {} 573 + for key in USAGE_KEYS: 574 + val = usage.get(key) 575 + if val: 576 + normalized_usage[key] = val 594 577 595 - # Handle OpenAI format with nested details 596 - if "input_tokens" in usage or "output_tokens" in usage: 597 - normalized_usage["input_tokens"] = usage.get("input_tokens", 0) 598 - normalized_usage["output_tokens"] = usage.get("output_tokens", 0) 599 - normalized_usage["total_tokens"] = usage.get("total_tokens", 0) 600 - 601 - # Extract nested details 602 - details = usage.get("details", {}) 603 - if details: 604 - input_details = details.get("input", {}) 605 - if input_details and input_details.get("cached_tokens"): 606 - normalized_usage["cached_tokens"] = input_details["cached_tokens"] 607 - 608 - output_details = details.get("output", {}) 609 - if output_details and output_details.get("reasoning_tokens"): 610 - normalized_usage["reasoning_tokens"] = output_details[ 611 - "reasoning_tokens" 612 - ] 613 - 614 - # Optional requests field for OpenAI 615 - if "requests" in usage and usage["requests"] is not None: 616 - normalized_usage["requests"] = usage["requests"] 617 - 618 - # Pass through cache fields from various providers 619 - if usage.get("cached_tokens"): 620 - normalized_usage["cached_tokens"] = usage["cached_tokens"] 621 - if usage.get("cached_input_tokens"): 622 - normalized_usage["cached_tokens"] = usage["cached_input_tokens"] 623 - if usage.get("cache_creation_tokens"): 624 - normalized_usage["cache_creation_tokens"] = usage[ 625 - "cache_creation_tokens" 626 - ] 627 - 628 - # Handle Gemini format 629 - elif "prompt_token_count" in usage or "candidates_token_count" in usage: 630 - normalized_usage["input_tokens"] = usage.get("prompt_token_count", 0) 631 - normalized_usage["output_tokens"] = usage.get("candidates_token_count", 0) 632 - normalized_usage["total_tokens"] = usage.get("total_token_count", 0) 633 - 634 - if usage.get("cached_content_token_count"): 635 - normalized_usage["cached_tokens"] = usage["cached_content_token_count"] 636 - if usage.get("thoughts_token_count"): 637 - normalized_usage["reasoning_tokens"] = usage["thoughts_token_count"] 638 - 639 - # Already in unified format 640 - else: 641 - normalized_usage = {k: v for k, v in usage.items() if isinstance(v, int)} 578 + # Legacy alias: some CLI backends emit cached_input_tokens 579 + if not normalized_usage.get("cached_tokens") and usage.get( 580 + "cached_input_tokens" 581 + ): 582 + normalized_usage["cached_tokens"] = usage["cached_input_tokens"] 642 583 643 584 # Compute total_tokens from parts when missing (e.g. Codex CLI omits it) 644 585 if not normalized_usage.get("total_tokens"):

+13 -6

think/providers/anthropic.py

··· 197 197 result_meta["cost_usd"] = event.get("total_cost_usd") 198 198 usage = event.get("usage") 199 199 if usage: 200 - result_meta["usage"] = { 201 - "input_tokens": usage.get("input_tokens") or 0, 202 - "output_tokens": usage.get("output_tokens") or 0, 203 - "total_tokens": ( 204 - (usage.get("input_tokens") or 0) + (usage.get("output_tokens") or 0) 205 - ), 200 + input_tokens = usage.get("input_tokens") or 0 201 + output_tokens = usage.get("output_tokens") or 0 202 + usage_dict: dict[str, Any] = { 203 + "input_tokens": input_tokens, 204 + "output_tokens": output_tokens, 205 + "total_tokens": input_tokens + output_tokens, 206 206 } 207 + cache_creation = usage.get("cache_creation_input_tokens") 208 + if cache_creation: 209 + usage_dict["cache_creation_tokens"] = cache_creation 210 + cache_read = usage.get("cache_read_input_tokens") 211 + if cache_read: 212 + usage_dict["cached_tokens"] = cache_read 213 + result_meta["usage"] = usage_dict 207 214 208 215 return None 209 216

+11 -3

think/providers/google.py

··· 549 549 if event_type == "result": 550 550 stats = event.get("stats") or {} 551 551 if usage_out is not None and stats: 552 + input_tokens = stats.get("input_tokens", 0) 553 + output_tokens = stats.get("output_tokens", 0) 554 + total_tokens = stats.get("total_tokens", 0) 552 555 usage_out.update( 553 556 { 554 - "input_tokens": stats.get("input_tokens", 0), 555 - "output_tokens": stats.get("output_tokens", 0), 556 - "total_tokens": stats.get("total_tokens", 0), 557 + "input_tokens": input_tokens, 558 + "output_tokens": output_tokens, 559 + "total_tokens": total_tokens, 557 560 } 558 561 ) 559 562 if stats.get("cached"): 560 563 usage_out["cached_tokens"] = stats["cached"] 564 + # CLI doesn't break out thinking tokens, but they're the 565 + # difference between total and input+output. 566 + reasoning = total_tokens - input_tokens - output_tokens 567 + if reasoning > 0: 568 + usage_out["reasoning_tokens"] = reasoning 561 569 return None 562 570 563 571 # Unknown event type — log and skip

+13 -1

think/providers/openai.py

··· 325 325 if not response.usage: 326 326 return None 327 327 328 - return { 328 + usage: dict[str, int] = { 329 329 "input_tokens": response.usage.prompt_tokens, 330 330 "output_tokens": response.usage.completion_tokens, 331 331 "total_tokens": response.usage.total_tokens, 332 332 } 333 + # Extract optional detail fields 334 + prompt_details = getattr(response.usage, "prompt_tokens_details", None) 335 + if prompt_details: 336 + cached = getattr(prompt_details, "cached_tokens", 0) 337 + if cached: 338 + usage["cached_tokens"] = cached 339 + completion_details = getattr(response.usage, "completion_tokens_details", None) 340 + if completion_details: 341 + reasoning = getattr(completion_details, "reasoning_tokens", 0) 342 + if reasoning: 343 + usage["reasoning_tokens"] = reasoning 344 + return usage 333 345 334 346 335 347 def run_generate(

+19

think/providers/shared.py

··· 133 133 134 134 135 135 # --------------------------------------------------------------------------- 136 + # Usage Schema 137 + # --------------------------------------------------------------------------- 138 + 139 + # Canonical keys for the normalized usage dict returned by all providers. 140 + # log_token_usage() passes through exactly these keys (when present and non-zero). 141 + USAGE_KEYS = frozenset( 142 + { 143 + "input_tokens", 144 + "output_tokens", 145 + "total_tokens", 146 + "cached_tokens", 147 + "reasoning_tokens", 148 + "cache_creation_tokens", 149 + "requests", 150 + } 151 + ) 152 + 153 + # --------------------------------------------------------------------------- 136 154 # GenerateResult 137 155 # --------------------------------------------------------------------------- 138 156 ··· 222 240 "GenerateResult", 223 241 "JSONEventCallback", 224 242 "ThinkingEvent", 243 + "USAGE_KEYS", 225 244 "safe_raw", 226 245 ]

Configure Feed

Configure Feed