personal memory agent
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

providers+logs: cap Gemini output tokens at 65536, fix sol health logs chronicle path

- Lower default `thinking_budget` in `think/talents.py` from `8192 * 3` to `8192 * 2` (16384); new default total (16384 + 49152) equals Gemini's 65536 cap.
- Clamp outgoing `max_output_tokens` in `think/providers/google.py::_build_generate_config` to `<= GEMINI_MAX_OUTPUT_TOKENS` (65536) with a WARNING log; unit-tested at boundary and above.
- Drop `thinking_budget` / `max_output_tokens` frontmatter overrides from `talent/sense.md` so it inherits the new defaults.
- Fix `think/logs_cli.py::get_today_health_dir` to read from `journal/chronicle/<day>/health/` (missed during the 173c1773 chronicle rename); updated `tests/test_logs_cli.py::make_journal` fixture to match. Regenerated `tests/baselines/api/stats/stats.json` after the sense.md change.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

+71 -11
-2
talent/sense.md
··· 7 7 "schedule": "segment", 8 8 "priority": 5, 9 9 "tier": 3, 10 - "thinking_budget": 4096, 11 - "max_output_tokens": 4096, 12 10 "output": "json", 13 11 "schema": "sense.schema.json", 14 12 "load": {"transcripts": true, "percepts": true, "talents": false}
-2
tests/baselines/api/stats/stats.json
··· 244 244 "talents": false, 245 245 "transcripts": true 246 246 }, 247 - "max_output_tokens": 4096, 248 247 "mtime": 0, 249 248 "output": "json", 250 249 "path": "<PROJECT>/talent/sense.md", ··· 252 251 "schedule": "segment", 253 252 "schema": "sense.schema.json", 254 253 "source": "system", 255 - "thinking_budget": 4096, 256 254 "tier": 3, 257 255 "title": "Segment Sense", 258 256 "type": "generate"
+2 -2
tests/test_logs_cli.py
··· 10 10 11 11 def make_journal(tmp_path, day, services, supervisor_lines=None): 12 12 """Create a synthetic journal with health logs.""" 13 - health_dir = tmp_path / day / "health" 13 + health_dir = tmp_path / "chronicle" / day / "health" 14 14 health_dir.mkdir(parents=True) 15 15 16 16 for name, lines in services.items(): ··· 25 25 26 26 for name in services: 27 27 journal_sym = journal_health / f"{name}.log" 28 - journal_sym.symlink_to(f"../{day}/health/ref_{name}.log") 28 + journal_sym.symlink_to(f"../chronicle/{day}/health/ref_{name}.log") 29 29 30 30 if supervisor_lines is not None: 31 31 sup = journal_health / "supervisor.log"
+52
tests/test_providers_google.py
··· 1 + # SPDX-License-Identifier: AGPL-3.0-only 2 + # Copyright (c) 2026 sol pbc 3 + 4 + import logging 5 + 6 + 7 + def test_build_generate_config_passes_through_at_cap(caplog): 8 + from think.providers.google import GEMINI_MAX_OUTPUT_TOKENS, _build_generate_config 9 + 10 + with caplog.at_level(logging.WARNING, logger="think.providers.google"): 11 + config = _build_generate_config( 12 + temperature=0.3, 13 + max_output_tokens=49152, 14 + system_instruction=None, 15 + json_output=False, 16 + thinking_budget=16384, 17 + ) 18 + 19 + warnings = [ 20 + record 21 + for record in caplog.records 22 + if record.name == "think.providers.google" and record.levelno == logging.WARNING 23 + ] 24 + assert config.max_output_tokens == GEMINI_MAX_OUTPUT_TOKENS 25 + assert config.thinking_config.thinking_budget == 16384 26 + assert warnings == [] 27 + 28 + 29 + def test_build_generate_config_clamps_and_warns_once(caplog): 30 + from think.providers.google import GEMINI_MAX_OUTPUT_TOKENS, _build_generate_config 31 + 32 + with caplog.at_level(logging.WARNING, logger="think.providers.google"): 33 + config = _build_generate_config( 34 + temperature=0.3, 35 + max_output_tokens=49152, 36 + system_instruction=None, 37 + json_output=False, 38 + thinking_budget=24576, 39 + ) 40 + 41 + warnings = [ 42 + record 43 + for record in caplog.records 44 + if record.name == "think.providers.google" and record.levelno == logging.WARNING 45 + ] 46 + assert config.max_output_tokens <= GEMINI_MAX_OUTPUT_TOKENS 47 + assert config.max_output_tokens == GEMINI_MAX_OUTPUT_TOKENS 48 + assert config.thinking_config.thinking_budget == 16384 49 + assert len(warnings) == 1 50 + assert "max_output_tokens=49152" in warnings[0].message 51 + assert "thinking_budget=24576" in warnings[0].message 52 + assert "clamped_thinking_budget=16384" in warnings[0].message
+2 -4
think/logs_cli.py
··· 23 23 from pathlib import Path 24 24 from typing import NamedTuple 25 25 26 - from think.utils import get_journal, setup_cli 26 + from think.utils import day_path, get_journal, setup_cli 27 27 28 28 _DIM = "\033[2m" 29 29 _RESET = "\033[0m" ··· 108 108 109 109 110 110 def get_today_health_dir() -> Path | None: 111 - journal = Path(os.path.expanduser(get_journal())) 112 - today = datetime.now().strftime("%Y%m%d") 113 - health_dir = journal / today / "health" 111 + health_dir = day_path(create=False) / "health" 114 112 return health_dir if health_dir.is_dir() else None 115 113 116 114
+14
think/providers/google.py
··· 56 56 safe_raw, 57 57 ) 58 58 59 + GEMINI_MAX_OUTPUT_TOKENS = 65536 59 60 _DEFAULT_MAX_TOKENS = 8192 60 61 _DEFAULT_MODEL = GEMINI_FLASH 61 62 ··· 244 245 """ 245 246 # Compute total tokens: output + thinking budget 246 247 total_tokens = max_output_tokens + (thinking_budget or 0) 248 + if total_tokens > GEMINI_MAX_OUTPUT_TOKENS: 249 + clamped_max_output = min(max_output_tokens, GEMINI_MAX_OUTPUT_TOKENS) 250 + clamped_thinking = max(0, GEMINI_MAX_OUTPUT_TOKENS - clamped_max_output) 251 + logging.getLogger(__name__).warning( 252 + "Clamping Gemini token budget: max_output_tokens=%s thinking_budget=%s " 253 + "clamped_max_output_tokens=%s clamped_thinking_budget=%s", 254 + max_output_tokens, 255 + thinking_budget, 256 + clamped_max_output, 257 + clamped_thinking, 258 + ) 259 + thinking_budget = clamped_thinking 260 + total_tokens = clamped_max_output + clamped_thinking 247 261 248 262 config_args: dict[str, Any] = { 249 263 "temperature": temperature,
+1 -1
think/talents.py
··· 938 938 output_format = config.get("output") 939 939 940 940 # Get generation parameters from config (set in frontmatter) 941 - thinking_budget = config.get("thinking_budget") or 8192 * 3 941 + thinking_budget = config.get("thinking_budget") or 8192 * 2 942 942 max_output_tokens = config.get("max_output_tokens") or 8192 * 6 943 943 is_json_output = output_format == "json" 944 944