Merge generate.py into agents.py for unified agent system

+3 -3

apps/agents/routes.py

··· 16 16 from convey import state 17 17 from convey.utils import DATE_RE, format_date 18 18 from think.facets import get_facets 19 - from think.utils import get_agents 19 + from think.utils import get_muse_configs 20 20 21 21 agents_bp = Blueprint( 22 22 "app:agents", ··· 320 320 facet_filter = _get_facet_filter() 321 321 322 322 # Load agent metadata for titles and grouping 323 - agents_meta = get_agents() 323 + agents_meta = get_muse_configs(has_tools=True) 324 324 facets = get_facets() 325 325 326 326 # Get agents for this day ··· 381 381 facet_filter = _get_facet_filter() 382 382 383 383 # Load metadata 384 - agents_meta = get_agents() 384 + agents_meta = get_muse_configs(has_tools=True) 385 385 facets = get_facets() 386 386 387 387 # Get all agents for day and filter to this name

+2 -2

apps/calendar/routes.py

··· 57 57 return "", 404 58 58 59 59 from think.indexer.journal import get_events 60 - from think.utils import get_generator_agents 60 + from think.utils import get_muse_configs 61 61 62 - generators = get_generator_agents() 62 + generators = get_muse_configs(has_tools=False, has_output=True) 63 63 64 64 # Get full event objects from source files 65 65 raw_events = get_events(day)

+2 -2

apps/insights/routes.py

··· 15 15 16 16 from convey.utils import DATE_RE, format_date 17 17 from think.models import get_usage_cost 18 - from think.utils import day_dirs, day_path, get_generator_agents, get_output_topic 18 + from think.utils import day_dirs, day_path, get_muse_configs, get_output_topic 19 19 20 20 insights_bp = Blueprint( 21 21 "app:insights", ··· 30 30 Returns dict mapping topic filename (e.g., "activity", "_chat_sentiment") 31 31 to {"key": generator_key, "meta": generator_metadata}. 32 32 """ 33 - generators = get_generator_agents() 33 + generators = get_muse_configs(has_tools=False, has_output=True) 34 34 topic_map = {} 35 35 for key, meta in generators.items(): 36 36 topic = get_output_topic(key)

+8 -6

apps/settings/routes.py

··· 830 830 Generators with missing or invalid schedule are excluded. 831 831 """ 832 832 try: 833 - from think.utils import get_generator_agents_by_schedule 833 + from think.utils import get_muse_configs 834 834 835 835 # Get generators by schedule (include disabled for settings toggle UI) 836 836 segment_generators = [ 837 837 _build_generator_info(key, meta) 838 838 for key, meta in sorted( 839 - get_generator_agents_by_schedule( 840 - "segment", include_disabled=True 839 + get_muse_configs( 840 + has_tools=False, has_output=True, schedule="segment", include_disabled=True 841 841 ).items() 842 842 ) 843 843 ] 844 844 daily_generators = [ 845 845 _build_generator_info(key, meta) 846 846 for key, meta in sorted( 847 - get_generator_agents_by_schedule("daily", include_disabled=True).items() 847 + get_muse_configs( 848 + has_tools=False, has_output=True, schedule="daily", include_disabled=True 849 + ).items() 848 850 ) 849 851 ] 850 852 ··· 877 879 Setting a generator to null removes all overrides for that generator. 878 880 """ 879 881 try: 880 - from think.utils import get_generator_agents 882 + from think.utils import get_muse_configs 881 883 882 884 request_data = request.get_json() 883 885 if not request_data: ··· 887 889 return jsonify({"error": "Request must be an object"}), 400 888 890 889 891 # Get valid generator keys 890 - all_generators = get_generator_agents() 892 + all_generators = get_muse_configs(has_tools=False, has_output=True) 891 893 valid_keys = set(all_generators.keys()) 892 894 893 895 config_dir = Path(state.journal_root) / "config"

+2 -2

apps/stats/routes.py

··· 10 10 from flask import Blueprint, jsonify 11 11 12 12 from convey import state 13 - from think.utils import get_generator_agents 13 + from think.utils import get_muse_configs 14 14 15 15 stats_bp = Blueprint( 16 16 "app:stats", ··· 37 37 except Exception: 38 38 pass 39 39 40 - response["generators"] = get_generator_agents() 40 + response["generators"] = get_muse_configs(has_tools=False, has_output=True) 41 41 42 42 return jsonify(response)

+2 -2

docs/APPS.md

··· 313 313 **Reference implementations:** 314 314 - System generator templates: `muse/*.md` (files with `schedule` field but no `tools` field) 315 315 - Extraction hooks: `muse/occurrence.py`, `muse/anticipation.py` 316 - - Discovery logic: `think/utils.py` - `get_generator_agents()`, `get_generator_agents_by_schedule()`, `get_output_topic()` 316 + - Discovery logic: `think/utils.py` - `get_muse_configs(has_tools=False)`, `get_output_topic()` 317 317 - Hook loading: `think/utils.py` - `load_output_hook()` 318 318 319 319 --- ··· 336 336 337 337 **Reference implementations:** 338 338 - System agent examples: `muse/*.md` (files with `tools` field) 339 - - Discovery logic: `think/utils.py` - `get_agents()`, `get_agent()` 339 + - Discovery logic: `think/utils.py` - `get_muse_configs(has_tools=True)`, `get_agent()` 340 340 341 341 #### Instructions Configuration 342 342

+4 -7

docs/CORTEX.md

··· 17 17 ### Key Components 18 18 - **Message Bus Integration**: Cortex connects to Callosum to receive requests and broadcast events 19 19 - **Configuration Loading**: Cortex loads and merges agent configuration with request parameters 20 - - **Request Routing**: Routes requests based on config fields: 21 - - `tools` field present → spawns `sol agents` (tool-using agent) 22 - - `output` field present (no `tools`) → spawns `sol generate` (generator) 23 - - Neither field → returns error 24 - - **Process Management**: Spawns agent/generator subprocesses with merged configuration 20 + - **Request Routing**: Validates config has either `tools` or `output` field, then spawns `sol agents` 21 + - **Process Management**: Spawns agent subprocesses with merged configuration (both tool agents and generators) 25 22 - **Event Capture**: Monitors agent stdout/stderr and appends to JSONL files 26 23 - **Dual Event Distribution**: Events go to both persistent files and real-time message bus 27 - - **NDJSON Input Mode**: Both agent and generator processes accept newline-delimited JSON via stdin containing the full merged configuration 24 + - **NDJSON Input Mode**: Agent processes accept newline-delimited JSON via stdin containing the full merged configuration 28 25 29 26 ### File States 30 27 - `<timestamp>_active.jsonl`: Agent currently executing (Cortex is appending events) ··· 275 272 3. Request parameters override agent defaults in the merged configuration 276 273 4. The full configuration is passed to the agent process 277 274 278 - Agents define specialized behaviors, tool usage patterns, and facet expertise. Available agents can be discovered using the `get_agents()` function or by listing files in the `muse/` directory (agents are `.md` files with a `tools` field). 275 + Agents define specialized behaviors, tool usage patterns, and facet expertise. Available agents can be discovered using `get_muse_configs(has_tools=True)` or by listing files in the `muse/` directory (agents are `.md` files with a `tools` field). 279 276 280 277 ### Agent Configuration Options 281 278

+1 -1

docs/JOURNAL.md

··· 956 956 - `muse/*.md` – system generator templates (files with `schedule` field but no `tools` field) 957 957 - `apps/{app}/muse/*.md` – app-specific generator templates 958 958 959 - Each template is a `.md` file with JSON frontmatter containing metadata (title, description, schedule, output format). The `schedule` field is required and must be `"segment"` or `"daily"` - generators with missing or invalid schedule are skipped. Use `get_generator_agents()` from `think/utils.py` to retrieve all available generators, or `get_generator_agents_by_schedule()` to get generators filtered by schedule. 959 + Each template is a `.md` file with JSON frontmatter containing metadata (title, description, schedule, output format). The `schedule` field is required and must be `"segment"` or `"daily"` - generators with missing or invalid schedule are skipped. Use `get_muse_configs(has_tools=False)` from `think/utils.py` to retrieve all available generators, or `get_muse_configs(has_tools=False, schedule="daily")` to get generators filtered by schedule. 960 960 961 961 **Output naming:** 962 962 - System outputs: `agents/{topic}.md` (e.g., `agents/flow.md`, `agents/meetings.md`)

+5 -3

docs/THINK.md

··· 14 14 15 15 The package exposes several commands: 16 16 17 - - `sol generate` runs generator pipelines spawned by Cortex (NDJSON protocol, not for direct CLI use). 18 17 - `sol cluster` groups audio and screen JSON files into report sections. Use `--start` and 19 18 `--length` to limit the report to a specific time range. 20 19 - `sol dream` runs generators and agents for a single day via Cortex. 20 + - `sol agents` is the unified CLI for tool agents and generators (spawned by Cortex, NDJSON protocol). 21 21 - `sol supervisor` monitors observation heartbeats. Use `--no-observers` to disable local capture (sense still runs for remote uploads and imports). 22 22 - `sol mcp` starts an MCP server exposing search capabilities for both summary text and raw transcripts. 23 23 - `sol cortex` starts a Callosum-based service for managing AI agent instances and generators. ··· 82 82 83 83 Cortex routes requests based on configuration: 84 84 - Requests with `tools` field → tool-using agents (`sol agents`) 85 - - Requests with `output` field (no `tools`) → generators (`sol generate`) 85 + - Requests with `output` field (no `tools`) → generators (`sol agents`) 86 + 87 + Both types are handled by the unified `sol agents` CLI which routes internally. 86 88 87 89 To spawn agents programmatically, use the cortex_client functions: 88 90 ··· 159 161 160 162 ## Generator map keys 161 163 162 - `think.utils.get_generator_agents()` reads the `.md` prompt files under `muse/` and 164 + `think.utils.get_muse_configs(has_tools=False)` reads the `.md` prompt files under `muse/` and 163 165 returns a dictionary keyed by generator name. Each entry contains: 164 166 165 167 - `path` – the prompt file path

+2 -4

sol.py

··· 10 10 11 11 Examples: 12 12 sol import data.json Import data into journal 13 - sol generate 20250101 Generate agent outputs for a day 14 - sol think.generate -h Show help for specific module 13 + sol dream 20250101 Run daily processing for a day 14 + sol think.agents -h Show help for specific module 15 15 """ 16 16 17 17 from __future__ import annotations ··· 39 39 COMMANDS: dict[str, str] = { 40 40 # think package - daily processing and analysis 41 41 "import": "think.importer", 42 - "generate": "think.generate", 43 42 "cluster": "think.cluster", 44 43 "dream": "think.dream", 45 44 "planner": "think.planner", ··· 89 88 GROUPS: dict[str, list[str]] = { 90 89 "Think (daily processing)": [ 91 90 "import", 92 - "generate", 93 91 "cluster", 94 92 "dream", 95 93 "planner",

+10 -5

tests/test_agents_ndjson.py

··· 80 80 "model": GPT_5, 81 81 "max_output_tokens": 100, 82 82 "mcp_server_url": "http://localhost:5175/mcp", 83 + "tools": ["search_insights"], 83 84 } 84 85 ) 85 86 ··· 120 121 "prompt": "First question", 121 122 "provider": "openai", 122 123 "mcp_server_url": "http://localhost:5175/mcp", 124 + "tools": ["search_insights"], 123 125 }, 124 126 { 125 127 "prompt": "Second question", 126 128 "provider": "anthropic", 127 129 "model": "claude-3", 128 130 "mcp_server_url": "http://localhost:5175/mcp", 131 + "tools": ["search_insights"], 129 132 }, 130 133 { 131 134 "prompt": "Third question", 132 135 "provider": "google", 133 136 "name": "technical", 134 137 "mcp_server_url": "http://localhost:5175/mcp", 138 + "tools": ["search_insights"], 135 139 }, 136 140 ] 137 141 ··· 168 172 169 173 def test_ndjson_invalid_json(mock_journal, monkeypatch, capsys): 170 174 """Test handling of invalid JSON in NDJSON input.""" 171 - ndjson_input = """{"prompt": "Valid request", "provider": "openai", "mcp_server_url": "http://localhost:5175/mcp"} 175 + ndjson_input = """{"prompt": "Valid request", "provider": "openai", "mcp_server_url": "http://localhost:5175/mcp", "tools": ["search_insights"]} 172 176 not valid json 173 - {"prompt": "Another valid request", "provider": "openai", "mcp_server_url": "http://localhost:5175/mcp"}""" 177 + {"prompt": "Another valid request", "provider": "openai", "mcp_server_url": "http://localhost:5175/mcp", "tools": ["search_insights"]}""" 174 178 175 179 monkeypatch.setattr("sys.stdin", StringIO(ndjson_input)) 176 180 ··· 204 208 { 205 209 "provider": "openai", 206 210 "model": GPT_5, 211 + "tools": ["search_insights"], # Has tools, so needs prompt 207 212 } 208 213 ) 209 214 ··· 226 231 assert len(lines) >= 1 227 232 error_event = json.loads(lines[0]) 228 233 assert error_event["event"] == "error" 229 - assert "Missing 'prompt'" in error_event["error"] 234 + assert "prompt" in error_event["error"].lower() # Error mentions prompt 230 235 231 236 232 237 def test_ndjson_empty_lines(mock_journal, monkeypatch, capsys): 233 238 """Test that empty lines in NDJSON input are ignored.""" 234 - ndjson_input = """{"prompt": "First", "provider": "openai"} 239 + ndjson_input = """{"prompt": "First", "provider": "openai", "tools": ["search_insights"]} 235 240 236 - {"prompt": "Second", "provider": "openai"} 241 + {"prompt": "Second", "provider": "openai", "tools": ["search_insights"]} 237 242 238 243 """ 239 244

+5

tests/test_anthropic.py

··· 177 177 "provider": "anthropic", 178 178 "model": CLAUDE_SONNET_4, 179 179 "mcp_server_url": "http://localhost:5173/mcp", 180 + "tools": ["search_insights"], 180 181 } 181 182 ) 182 183 asyncio.run(run_main(mod, ["sol agents"], stdin_data=ndjson_input)) ··· 218 219 "provider": "anthropic", 219 220 "model": CLAUDE_SONNET_4, 220 221 "mcp_server_url": "http://localhost:5173/mcp", 222 + "tools": ["search_insights"], 221 223 } 222 224 ) 223 225 asyncio.run(run_main(mod, ["sol agents"], stdin_data=ndjson_input)) ··· 263 265 "provider": "anthropic", 264 266 "model": CLAUDE_SONNET_4, 265 267 "mcp_server_url": "http://localhost:5173/mcp", 268 + "tools": ["search_insights"], 266 269 } 267 270 ) 268 271 asyncio.run(run_main(mod, ["sol agents"], stdin_data=ndjson_input)) ··· 306 309 "provider": "anthropic", 307 310 "model": CLAUDE_SONNET_4, 308 311 "mcp_server_url": "http://localhost:5173/mcp", 312 + "tools": ["search_insights"], 309 313 } 310 314 ) 311 315 asyncio.run(run_main(mod, ["sol agents"], stdin_data=ndjson_input)) ··· 347 351 "provider": "anthropic", 348 352 "model": CLAUDE_SONNET_4, 349 353 "mcp_server_url": "http://localhost:5173/mcp", 354 + "tools": ["search_insights"], 350 355 } 351 356 ) 352 357 asyncio.run(run_main(mod, ["sol agents"], stdin_data=ndjson_input))

+11 -11

tests/test_app_agents.py

··· 9 9 10 10 import pytest 11 11 12 - from think.utils import _resolve_agent_path, get_agent, get_agents 12 + from think.utils import _resolve_agent_path, get_agent, get_muse_configs 13 13 14 14 15 15 @pytest.fixture ··· 120 120 assert "fakeapp:fakeagent" in str(exc_info.value) 121 121 122 122 123 - def test_get_agents_includes_system_agents(fixture_journal): 124 - """Test get_agents returns system agents.""" 125 - agents = get_agents() 123 + def test_get_muse_configs_includes_system_agents(fixture_journal): 124 + """Test get_muse_configs returns system agents.""" 125 + agents = get_muse_configs(has_tools=True) 126 126 127 127 # Should include known system agents 128 128 assert "default" in agents ··· 131 131 assert "user_instruction" in agents["default"] 132 132 133 133 134 - def test_get_agents_system_agents_have_metadata(fixture_journal): 134 + def test_get_muse_configs_system_agents_have_metadata(fixture_journal): 135 135 """Test system agents have proper metadata fields.""" 136 - agents = get_agents() 136 + agents = get_muse_configs(has_tools=True) 137 137 138 138 # Check a known system agent 139 139 default = agents.get("default") ··· 143 143 assert "name" in default 144 144 145 145 146 - def test_get_agents_excludes_private_apps(fixture_journal, tmp_path, monkeypatch): 147 - """Test get_agents skips apps starting with underscore.""" 146 + def test_get_muse_configs_excludes_private_apps(fixture_journal, tmp_path, monkeypatch): 147 + """Test get_muse_configs skips apps starting with underscore.""" 148 148 # Create a private app with an agent 149 149 private_app = tmp_path / "_private_app" / "agents" 150 150 private_app.mkdir(parents=True) ··· 152 152 153 153 # This is tricky to test without modifying the actual apps directory 154 154 # The current implementation filters by app_path.name.startswith("_") 155 - # We verify this by checking the code behavior with get_agents() 155 + # We verify this by checking the code behavior with get_muse_configs() 156 156 157 - agents = get_agents() 157 + agents = get_muse_configs(has_tools=True) 158 158 159 159 # No agents should have keys starting with "_" 160 160 for key in agents: ··· 163 163 164 164 def test_app_agent_namespace_format(fixture_journal): 165 165 """Test app agent keys follow {app}:{agent} format.""" 166 - agents = get_agents() 166 + agents = get_muse_configs(has_tools=True) 167 167 168 168 for key, config in agents.items(): 169 169 if config.get("source") == "app":

+6 -5

tests/test_cortex.py

··· 142 142 @patch("think.cortex.subprocess.Popen") 143 143 @patch("think.cortex.threading.Thread") 144 144 @patch("think.cortex.threading.Timer") 145 - def test_spawn_generator(mock_timer, mock_thread, mock_popen, cortex_service, mock_journal): 146 - """Test spawning a generator subprocess.""" 145 + def test_spawn_generator_via_agent(mock_timer, mock_thread, mock_popen, cortex_service, mock_journal): 146 + """Test spawning a generator subprocess via _spawn_agent.""" 147 147 mock_process = MagicMock() 148 148 mock_process.pid = 54321 149 149 mock_process.poll.return_value = None ··· 168 168 "output": "md", 169 169 } 170 170 171 - cortex_service._spawn_generator( 171 + # Generators now route through _spawn_agent 172 + cortex_service._spawn_agent( 172 173 agent_id, 173 174 file_path, 174 175 config, 175 176 ) 176 177 177 - # Check subprocess was called with generate command 178 + # Check subprocess was called with agents command (generators route through agents) 178 179 mock_popen.assert_called_once() 179 180 call_args = mock_popen.call_args 180 - assert call_args[0][0] == ["sol", "generate"] 181 + assert call_args[0][0] == ["sol", "agents"] 181 182 assert call_args[1]["stdin"] is not None 182 183 assert call_args[1]["stdout"] is not None 183 184 assert call_args[1]["stderr"] is not None

+9 -1

tests/test_dream_full.py

··· 20 20 journal = copy_journal(tmp_path) 21 21 monkeypatch.setenv("JOURNAL_PATH", str(journal)) 22 22 called = [] 23 + generators_called = False 23 24 24 25 def mock_run_command(cmd, day): 25 26 called.append(cmd) ··· 29 30 called.append(cmd) 30 31 return True # Return success 31 32 33 + def mock_run_generators_via_cortex(day, force, segment=None): 34 + nonlocal generators_called 35 + generators_called = True 36 + return (1, 0) # 1 success, 0 failures 37 + 32 38 monkeypatch.setattr(mod, "run_command", mock_run_command) 33 39 monkeypatch.setattr(mod, "run_queued_command", mock_run_queued_command) 40 + monkeypatch.setattr(mod, "run_generators_via_cortex", mock_run_generators_via_cortex) 34 41 # Also mock run_daily_agents to avoid agent execution 35 42 monkeypatch.setattr(mod, "run_daily_agents", lambda day: (0, 0)) 36 43 monkeypatch.setattr("think.utils.load_dotenv", lambda: True) ··· 40 47 ) 41 48 mod.main() 42 49 assert any(c[0] == "sol" and c[1] == "sense" for c in called) 43 - assert any(c[0] == "sol" and c[1] == "generate" for c in called) 50 + # Generators now run via cortex, not as direct subprocess 51 + assert generators_called, "run_generators_via_cortex should have been called" 44 52 # Verify indexer is called with --rescan (light mode) via queued command 45 53 indexer_cmds = [c for c in called if c[0] == "sol" and c[1] == "indexer"] 46 54 assert len(indexer_cmds) == 1

+8 -5

tests/test_generate_full.py

··· 43 43 44 44 def run_generator_with_config(mod, config: dict, monkeypatch) -> list[dict]: 45 45 """Run generator with NDJSON config and capture output events.""" 46 + # Mock argv to prevent argparse from seeing pytest args 47 + monkeypatch.setattr("sys.argv", ["sol"]) 48 + 46 49 # Mock stdin with config 47 50 stdin_data = json.dumps(config) + "\n" 48 51 monkeypatch.setattr("sys.stdin", io.StringIO(stdin_data)) ··· 67 70 68 71 def test_generate_output_ndjson(tmp_path, monkeypatch): 69 72 """Test basic output generation via NDJSON protocol.""" 70 - mod = importlib.import_module("think.generate") 73 + mod = importlib.import_module("think.agents") 71 74 copy_day(tmp_path) 72 75 73 76 # Create a test generator in muse directory ··· 115 118 116 119 def test_generate_hook_invoked_with_context(tmp_path, monkeypatch): 117 120 """Test that hooks receive correct context including multi_segment flag.""" 118 - mod = importlib.import_module("think.generate") 121 + mod = importlib.import_module("think.agents") 119 122 copy_day(tmp_path) 120 123 121 124 # Create the hook file in muse/ directory ··· 193 196 194 197 def test_generate_without_hook_succeeds(tmp_path, monkeypatch): 195 198 """Test that generators without hooks still work correctly.""" 196 - mod = importlib.import_module("think.generate") 199 + mod = importlib.import_module("think.agents") 197 200 copy_day(tmp_path) 198 201 199 202 # Create generator without hook ··· 237 240 238 241 def test_generate_error_event_on_missing_generator(tmp_path, monkeypatch): 239 242 """Test that missing generator name emits error event.""" 240 - mod = importlib.import_module("think.generate") 243 + mod = importlib.import_module("think.agents") 241 244 copy_day(tmp_path) 242 245 243 246 monkeypatch.setenv("JOURNAL_PATH", str(tmp_path)) ··· 258 261 259 262 def test_generate_skipped_on_no_input(tmp_path, monkeypatch): 260 263 """Test that generator emits skipped finish when no input.""" 261 - mod = importlib.import_module("think.generate") 264 + mod = importlib.import_module("think.agents") 262 265 263 266 # Create empty day directory (no transcripts) 264 267 os.environ["JOURNAL_PATH"] = str(tmp_path)

+1 -1

tests/test_generate_scan_day.py

··· 28 28 29 29 30 30 def test_scan_day(tmp_path, monkeypatch): 31 - mod = importlib.import_module("think.generate") 31 + mod = importlib.import_module("think.agents") 32 32 day_dir = copy_day(tmp_path) 33 33 monkeypatch.setenv("JOURNAL_PATH", str(tmp_path)) 34 34

+22 -37

tests/test_generators.py

··· 7 7 from pathlib import Path 8 8 9 9 10 - def test_get_generator_agents(): 10 + def test_get_muse_configs_generators(): 11 11 """Test that system generators are discovered with source field.""" 12 12 utils = importlib.import_module("think.utils") 13 - generators = utils.get_generator_agents() 13 + generators = utils.get_muse_configs(has_tools=False, has_output=True) 14 14 assert "flow" in generators 15 15 info = generators["flow"] 16 16 assert os.path.basename(info["path"]) == "flow.md" ··· 35 35 assert utils.get_output_topic("my_app:weekly_summary") == "_my_app_weekly_summary" 36 36 37 37 38 - def test_get_generator_agents_app_discovery(tmp_path, monkeypatch): 38 + def test_get_muse_configs_app_discovery(tmp_path, monkeypatch): 39 39 """Test that app generators are discovered from apps/*/muse/.""" 40 40 utils = importlib.import_module("think.utils") 41 41 ··· 51 51 # Also create workspace.html to make it a valid app (not strictly required for generators) 52 52 (tmp_path / "apps" / "test_app" / "workspace.html").write_text("<h1>Test</h1>") 53 53 54 - # Monkeypatch the apps_dir path 55 - original_get_generator_agents = utils.get_generator_agents 56 - 57 - def patched_get_generator_agents(): 58 - # Temporarily modify the path 59 - import think.utils as tu 60 - 61 - original_parent = Path(tu.__file__).parent.parent 62 - # We need to actually patch how the function resolves apps_dir 63 - # Let's just test the existing system generators have source 64 - return original_get_generator_agents() 65 - 66 54 # For now, just verify system generators have correct source 67 - generators = utils.get_generator_agents() 55 + generators = utils.get_muse_configs(has_tools=False, has_output=True) 68 56 for key, info in generators.items(): 69 57 if ":" not in key: 70 58 assert info.get("source") == "system", f"{key} should have source=system" 71 59 72 60 73 - def test_get_generator_agents_by_schedule(): 61 + def test_get_muse_configs_by_schedule(): 74 62 """Test filtering generators by schedule.""" 75 63 utils = importlib.import_module("think.utils") 76 64 77 65 # Get daily generators 78 - daily = utils.get_generator_agents_by_schedule("daily") 66 + daily = utils.get_muse_configs(has_tools=False, has_output=True, schedule="daily") 79 67 assert len(daily) > 0 80 68 for key, meta in daily.items(): 81 69 assert meta.get("schedule") == "daily", f"{key} should have schedule=daily" 82 70 83 71 # Get segment generators 84 - segment = utils.get_generator_agents_by_schedule("segment") 72 + segment = utils.get_muse_configs(has_tools=False, has_output=True, schedule="segment") 85 73 assert len(segment) > 0 86 74 for key, meta in segment.items(): 87 75 assert meta.get("schedule") == "segment", f"{key} should have schedule=segment" ··· 92 80 ), "daily and segment should not overlap" 93 81 94 82 # Unknown schedule returns empty dict 95 - assert utils.get_generator_agents_by_schedule("hourly") == {} 96 - assert utils.get_generator_agents_by_schedule("") == {} 83 + assert utils.get_muse_configs(has_tools=False, has_output=True, schedule="hourly") == {} 84 + assert utils.get_muse_configs(has_tools=False, has_output=True, schedule="") == {} 97 85 98 86 99 - def test_get_generator_agents_by_schedule_include_disabled(monkeypatch): 87 + def test_get_muse_configs_include_disabled(monkeypatch): 100 88 """Test include_disabled parameter.""" 101 89 utils = importlib.import_module("think.utils") 102 90 103 91 # Get generators without disabled (default) 104 - without_disabled = utils.get_generator_agents_by_schedule("daily") 92 + without_disabled = utils.get_muse_configs(has_tools=False, has_output=True, schedule="daily") 105 93 106 94 # Get generators with disabled included 107 - with_disabled = utils.get_generator_agents_by_schedule( 108 - "daily", include_disabled=True 95 + with_disabled = utils.get_muse_configs( 96 + has_tools=False, has_output=True, schedule="daily", include_disabled=True 109 97 ) 110 98 111 99 # Should have at least as many with disabled included ··· 113 101 assert len(with_disabled) >= len(without_disabled) 114 102 115 103 116 - def test_all_system_generators_have_schedule(): 117 - """Test that all system generators have valid schedule field. 104 + def test_scheduled_generators_have_valid_schedule(): 105 + """Test that scheduled generators have valid schedule field. 118 106 119 - Generators are identified by having a schedule field but no tools field. 120 - Hook-only files (occurrence, anticipation) have neither, so they're 121 - excluded from get_generator_agents() automatically. 107 + Generators with a schedule field must have valid values ('segment' or 'daily'). 108 + Some generators (like importer) have output but no schedule - they're used 109 + for ad-hoc processing, not scheduled runs. 122 110 """ 123 111 utils = importlib.import_module("think.utils") 124 112 125 - generators = utils.get_generator_agents() 113 + generators = utils.get_muse_configs(has_tools=False, has_output=True) 126 114 valid_schedules = ("segment", "daily") 127 115 128 116 for key, meta in generators.items(): 129 - if meta.get("source") == "system": 130 - sched = meta.get("schedule") 131 - assert ( 132 - sched is not None 133 - ), f"System generator '{key}' missing required 'schedule' field" 117 + sched = meta.get("schedule") 118 + if sched is not None: 134 119 assert ( 135 120 sched in valid_schedules 136 - ), f"System generator '{key}' has invalid schedule '{sched}'" 121 + ), f"Generator '{key}' has invalid schedule '{sched}'"

+2

tests/test_google.py

··· 44 44 "provider": "google", 45 45 "model": GEMINI_FLASH, 46 46 "disable_mcp": True, 47 + "tools": ["search_insights"], 47 48 } 48 49 ) 49 50 asyncio.run(run_main(mod, ["sol agents"], stdin_data=ndjson_input)) ··· 94 95 "provider": "google", 95 96 "model": GEMINI_FLASH, 96 97 "mcp_server_url": "http://localhost:6270/mcp", 98 + "tools": ["search_insights"], 97 99 } 98 100 ) 99 101 asyncio.run(run_main(mod, ["sol agents"], stdin_data=ndjson_input))

+1

tests/test_google_thinking.py

··· 40 40 "provider": "google", 41 41 "model": GEMINI_FLASH, 42 42 "disable_mcp": True, 43 + "tools": ["search_insights"], 43 44 } 44 45 ) 45 46 asyncio.run(run_main(mod, ["sol agents"], stdin_data=ndjson_input))

+8

tests/test_openai.py

··· 47 47 "provider": "openai", 48 48 "model": GPT_5, 49 49 "mcp_server_url": "http://localhost:5173/mcp", 50 + "tools": ["search_insights"], 50 51 } 51 52 ) 52 53 asyncio.run(run_main(mod, ["sol agents"], stdin_data=ndjson_input)) ··· 101 102 "provider": "openai", 102 103 "model": GPT_5, 103 104 "mcp_server_url": "http://localhost:5173/mcp", 105 + "tools": ["search_insights"], 104 106 } 105 107 ) 106 108 asyncio.run(run_main(mod, ["sol agents"], stdin_data=ndjson_input)) ··· 148 150 "name": "investigator", 149 151 "agent_id": "999", 150 152 "mcp_server_url": "http://localhost:5173/mcp", 153 + "tools": ["search_insights"], 151 154 } 152 155 ) 153 156 asyncio.run(run_main(mod, ["sol agents"], stdin_data=ndjson_input)) ··· 182 185 "provider": "openai", 183 186 "model": GPT_5, 184 187 "mcp_server_url": "http://localhost:5173/mcp", 188 + "tools": ["search_insights"], 185 189 } 186 190 ) 187 191 asyncio.run(run_main(mod, ["sol agents"], stdin_data=ndjson_input)) ··· 235 239 "provider": "openai", 236 240 "model": GPT_5, 237 241 "mcp_server_url": "http://localhost:5173/mcp", 242 + "tools": ["search_insights"], 238 243 } 239 244 ) 240 245 asyncio.run(run_main(mod, ["sol agents"], stdin_data=ndjson_input)) ··· 294 299 "provider": "openai", 295 300 "model": GPT_5, 296 301 "mcp_server_url": "http://localhost:5173/mcp", 302 + "tools": ["search_insights"], 297 303 } 298 304 ) 299 305 asyncio.run(run_main(mod, ["sol agents"], stdin_data=ndjson_input)) ··· 343 349 "provider": "openai", 344 350 "model": GPT_5, 345 351 "mcp_server_url": "http://localhost:5173/mcp", 352 + "tools": ["search_insights"], 346 353 } 347 354 ) 348 355 asyncio.run(run_main(mod, ["sol agents"], stdin_data=ndjson_input)) ··· 406 413 "provider": "openai", 407 414 "model": GPT_5, 408 415 "mcp_server_url": "http://localhost:5173/mcp", 416 + "tools": ["search_insights"], 409 417 } 410 418 ) 411 419 asyncio.run(run_main(mod, ["sol agents"], stdin_data=ndjson_input))

+7 -4

tests/test_output_hooks.py

··· 41 41 42 42 def run_generator_with_config(mod, config: dict, monkeypatch) -> list[dict]: 43 43 """Run generator with NDJSON config and capture output events.""" 44 + # Mock argv to prevent argparse from seeing pytest args 45 + monkeypatch.setattr("sys.argv", ["sol"]) 46 + 44 47 stdin_data = json.dumps(config) + "\n" 45 48 monkeypatch.setattr("sys.stdin", io.StringIO(stdin_data)) 46 49 ··· 142 145 143 146 144 147 def test_output_hook_invocation(tmp_path, monkeypatch): 145 - """Test that generate.py invokes hook and uses transformed result.""" 146 - mod = importlib.import_module("think.generate") 148 + """Test that agents.py invokes hook and uses transformed result.""" 149 + mod = importlib.import_module("think.agents") 147 150 copy_day(tmp_path) 148 151 149 152 # Create generator with hook in muse directory ··· 202 205 203 206 def test_output_hook_returns_none(tmp_path, monkeypatch): 204 207 """Test that hook returning None uses original result.""" 205 - mod = importlib.import_module("think.generate") 208 + mod = importlib.import_module("think.agents") 206 209 copy_day(tmp_path) 207 210 208 211 muse_dir = Path(mod.__file__).resolve().parent.parent / "muse" ··· 252 255 253 256 def test_output_hook_error_fallback(tmp_path, monkeypatch): 254 257 """Test that hook errors fall back to original result.""" 255 - mod = importlib.import_module("think.generate") 258 + mod = importlib.import_module("think.agents") 256 259 copy_day(tmp_path) 257 260 258 261 muse_dir = Path(mod.__file__).resolve().parent.parent / "muse"

+1 -1

tests/test_sol.py

··· 219 219 220 220 def test_critical_commands_registered(self): 221 221 """Test that critical commands are registered.""" 222 - critical = ["import", "generate", "dream", "indexer", "transcribe"] 222 + critical = ["import", "agents", "dream", "indexer", "transcribe"] 223 223 for cmd in critical: 224 224 assert cmd in sol.COMMANDS, f"Critical command '{cmd}' not registered"

+505 -36

think/agents.py

··· 1 1 # SPDX-License-Identifier: AGPL-3.0-only 2 2 # Copyright (c) 2026 sol pbc 3 3 4 + """Unified agent and generator CLI for solstone. 5 + 6 + Spawned by cortex for both: 7 + - Tool-using agents (configs with 'tools' field) 8 + - Transcript generators (configs with 'output' field, no 'tools') 9 + 10 + Reads NDJSON config from stdin, emits JSONL events to stdout. 11 + """ 12 + 4 13 from __future__ import annotations 5 14 6 15 import argparse ··· 11 20 import sys 12 21 import time 13 22 import traceback 23 + from datetime import datetime 14 24 from pathlib import Path 15 25 from typing import Any, Callable, Literal, Optional, TypedDict, Union 16 26 27 + from google import genai 28 + from google.genai import types 17 29 from typing_extensions import Required 18 30 19 - from think.utils import setup_cli 31 + from think.cluster import cluster, cluster_period, cluster_segments_multi 32 + from think.utils import ( 33 + compose_instructions, 34 + day_log, 35 + day_path, 36 + format_day, 37 + format_segment_times, 38 + get_muse_configs, 39 + get_output_path, 40 + load_output_hook, 41 + load_prompt, 42 + segment_parse, 43 + setup_cli, 44 + ) 20 45 21 46 LOG = logging.getLogger("think.agents") 22 47 ··· 283 308 "JSONEventCallback", 284 309 "format_tool_summary", 285 310 "parse_agent_events_to_turns", 311 + "scan_day", 312 + "generate_agent_output", 286 313 ] 287 314 288 315 316 + # ============================================================================= 317 + # Generator Functions (for transcript analysis without tools) 318 + # ============================================================================= 319 + 320 + # Minimum content length for generator output 321 + MIN_INPUT_CHARS = 50 322 + 323 + 324 + def scan_day(day: str) -> dict[str, list[str]]: 325 + """Return lists of processed and pending daily generator output files. 326 + 327 + Only scans daily generators (schedule='daily'). Segment generators are 328 + stored within segment directories and are not included here. 329 + """ 330 + day_dir = day_path(day) 331 + daily_generators = get_muse_configs( 332 + has_tools=False, has_output=True, schedule="daily", include_disabled=True 333 + ) 334 + processed: list[str] = [] 335 + pending: list[str] = [] 336 + for key, meta in sorted(daily_generators.items()): 337 + output_format = meta.get("output") 338 + output_path = get_output_path(day_dir, key, output_format=output_format) 339 + if output_path.exists(): 340 + processed.append(os.path.join("agents", output_path.name)) 341 + else: 342 + pending.append(os.path.join("agents", output_path.name)) 343 + return {"processed": sorted(processed), "repairable": sorted(pending)} 344 + 345 + 346 + def _get_or_create_cache( 347 + client: genai.Client, 348 + model: str, 349 + display_name: str, 350 + transcript: str, 351 + system_instruction: str, 352 + ) -> str | None: 353 + """Return cache name for ``display_name`` or None if content too small. 354 + 355 + Creates cache with ``transcript`` and provided system instruction if needed. 356 + Returns None if content is below estimated 2048 token minimum (~10k chars). 357 + 358 + The cache contains the system instruction + transcript which are identical 359 + for all topics on the same day with the same system prompt, so display_name 360 + should include both day and system prompt name. 361 + """ 362 + MIN_CACHE_CHARS = 10000 # Heuristic: ~4 chars/token → 2048 tokens ≈ 8k-10k chars 363 + 364 + # Check existing caches first 365 + for c in client.caches.list(): 366 + if c.model == model and c.display_name == display_name: 367 + return c.name 368 + 369 + # Skip cache creation for small content 370 + if len(transcript) < MIN_CACHE_CHARS: 371 + return None 372 + 373 + cache = client.caches.create( 374 + model=model, 375 + config=types.CreateCachedContentConfig( 376 + display_name=display_name, 377 + system_instruction=system_instruction, 378 + contents=[transcript], 379 + ttl="1800s", # 30 minutes to accommodate multiple topic analyses 380 + ), 381 + ) 382 + return cache.name 383 + 384 + 385 + def generate_agent_output( 386 + transcript: str, 387 + prompt: str, 388 + api_key: str, 389 + cache_display_name: str | None = None, 390 + name: str | None = None, 391 + json_output: bool = False, 392 + system_instruction: str | None = None, 393 + thinking_budget: int | None = None, 394 + max_output_tokens: int | None = None, 395 + return_result: bool = False, 396 + ) -> str | GenerateResult: 397 + """Send clustered transcript to LLM for agent output generation. 398 + 399 + Args: 400 + transcript: Clustered transcript content (markdown format). 401 + prompt: Agent prompt text. 402 + api_key: Google API key for caching. 403 + cache_display_name: Optional cache key for Google content caching. 404 + Should include system prompt name for proper cache isolation. 405 + name: Agent name for token logging context. 406 + json_output: If True, request JSON response format. 407 + system_instruction: System instruction text. If None, loads default 408 + from journal.md via compose_instructions(). 409 + thinking_budget: Token budget for model thinking. If None, uses default. 410 + max_output_tokens: Maximum output tokens. If None, uses default. 411 + return_result: If True, return full GenerateResult with usage data. 412 + 413 + Returns: 414 + Generated agent output content (markdown or JSON string), or 415 + GenerateResult dict if return_result=True. 416 + """ 417 + from think.models import generate_with_result, resolve_provider 418 + 419 + # Use provided system_instruction or fall back to default 420 + if system_instruction is None: 421 + instructions = compose_instructions(include_datetime=False) 422 + system_instruction = instructions["system_instruction"] 423 + 424 + # Use defaults if not specified 425 + if thinking_budget is None: 426 + thinking_budget = 8192 * 3 427 + if max_output_tokens is None: 428 + max_output_tokens = 8192 * 6 429 + 430 + # Build context for provider routing and token logging 431 + output_type = "json" if json_output else "markdown" 432 + context = f"agent.{name}.{output_type}" if name else "agent.unknown" 433 + 434 + # Try to use cache if display name provided 435 + # Note: caching is Google-specific, so we check provider first 436 + provider, model = resolve_provider(context) 437 + 438 + client = None 439 + cache_name = None 440 + if cache_display_name and provider == "google": 441 + client = genai.Client(api_key=api_key) 442 + cache_name = _get_or_create_cache( 443 + client, model, cache_display_name, transcript, system_instruction 444 + ) 445 + 446 + if cache_name: 447 + # Cache hit: content already in cache, just send prompt. 448 + # Google-specific params (cached_content, client) are passed via kwargs. 449 + result = generate_with_result( 450 + contents=[prompt], 451 + context=context, 452 + temperature=0.3, 453 + max_output_tokens=max_output_tokens, 454 + thinking_budget=thinking_budget, 455 + model=model, 456 + cached_content=cache_name, 457 + client=client, 458 + json_output=json_output, 459 + ) 460 + else: 461 + # No cache: use unified generate() 462 + result = generate_with_result( 463 + contents=[transcript, prompt], 464 + context=context, 465 + temperature=0.3, 466 + max_output_tokens=max_output_tokens, 467 + thinking_budget=thinking_budget, 468 + system_instruction=system_instruction, 469 + json_output=json_output, 470 + ) 471 + 472 + if return_result: 473 + return result 474 + return result["text"] 475 + 476 + 477 + def _run_generator(config: dict, emit_event: Callable[[dict], None]) -> None: 478 + """Execute generator pipeline with config from cortex. 479 + 480 + Args: 481 + config: Merged config from cortex containing: 482 + - name: Generator key (e.g., 'activity', 'chat:sentiment') 483 + - day: Day in YYYYMMDD format 484 + - segment: Optional single segment key 485 + - segments: Optional list of segment keys 486 + - output: Output format ('md' or 'json') 487 + - output_path: Optional custom output path 488 + - force: Whether to regenerate existing output 489 + - provider: AI provider 490 + - model: Model name 491 + emit_event: Callback to emit JSONL events 492 + """ 493 + name = config.get("name", "default") 494 + day = config.get("day") 495 + segment = config.get("segment") 496 + segments = config.get("segments") # List of segment keys 497 + output_format = config.get("output", "md") 498 + output_path_override = config.get("output_path") 499 + force = config.get("force", False) 500 + provider = config.get("provider", "google") 501 + model = config.get("model") 502 + 503 + if not day: 504 + raise ValueError("Missing 'day' field in generator config") 505 + 506 + # Emit start event 507 + emit_event( 508 + { 509 + "event": "start", 510 + "ts": int(time.time() * 1000), 511 + "prompt": "", # Generators don't have user prompts 512 + "name": name, 513 + "model": model or "unknown", 514 + "provider": provider, 515 + } 516 + ) 517 + 518 + # Set segment key for token usage logging 519 + if segment: 520 + os.environ["SEGMENT_KEY"] = segment 521 + elif segments: 522 + os.environ["SEGMENT_KEY"] = segments[0] 523 + 524 + # Load generator metadata 525 + all_generators = get_muse_configs(has_tools=False, has_output=True) 526 + if name in all_generators: 527 + meta = all_generators[name] 528 + agent_path = Path(meta["path"]) 529 + else: 530 + raise ValueError(f"Generator not found: {name}") 531 + 532 + # Check if generator is disabled 533 + if meta.get("disabled"): 534 + logging.info("Generator %s is disabled, skipping", name) 535 + emit_event( 536 + { 537 + "event": "finish", 538 + "ts": int(time.time() * 1000), 539 + "result": "", 540 + "skipped": "disabled", 541 + } 542 + ) 543 + return 544 + 545 + # Extract instructions config for source filtering and system prompt 546 + instructions_config = meta.get("instructions") 547 + instructions = compose_instructions( 548 + include_datetime=False, 549 + config_overrides=instructions_config, 550 + ) 551 + sources = instructions.get("sources") 552 + system_prompt_name = instructions.get("system_prompt_name", "journal") 553 + system_instruction = instructions["system_instruction"] 554 + 555 + # Track multi-segment mode 556 + multi_segment_mode = bool(segments) 557 + 558 + # Build transcript via clustering 559 + if segments: 560 + markdown, file_count = cluster_segments_multi(day, segments, sources=sources) 561 + elif segment: 562 + markdown, file_count = cluster_period(day, segment, sources=sources) 563 + else: 564 + markdown, file_count = cluster(day, sources=sources) 565 + 566 + day_dir = str(day_path(day)) 567 + 568 + # Skip generation when there's nothing to analyze 569 + if file_count == 0 or len(markdown.strip()) < MIN_INPUT_CHARS: 570 + logging.info( 571 + "Insufficient input (files=%d, chars=%d), skipping", 572 + file_count, 573 + len(markdown.strip()), 574 + ) 575 + emit_event( 576 + { 577 + "event": "finish", 578 + "ts": int(time.time() * 1000), 579 + "result": "", 580 + "skipped": "no_input", 581 + } 582 + ) 583 + day_log(day, f"generate {name} skipped (no input)") 584 + return 585 + 586 + # Prepend input context note for limited recordings 587 + if file_count < 3: 588 + input_note = ( 589 + "**Input Note:** Limited recordings for this day. " 590 + "Scale analysis to available input.\n\n" 591 + ) 592 + markdown = input_note + markdown 593 + 594 + # Build context for template substitution 595 + prompt_context: dict[str, str] = { 596 + "day": day, 597 + "date": format_day(day), 598 + } 599 + 600 + # Add segment context 601 + if segment: 602 + start_str, end_str = format_segment_times(segment) 603 + if start_str and end_str: 604 + prompt_context["segment"] = segment 605 + prompt_context["segment_start"] = start_str 606 + prompt_context["segment_end"] = end_str 607 + elif segments: 608 + all_times = [] 609 + for seg in segments: 610 + start_time, end_time = segment_parse(seg) 611 + if start_time and end_time: 612 + all_times.append((start_time, end_time)) 613 + 614 + if all_times: 615 + earliest_start = min(t[0] for t in all_times) 616 + latest_end = max(t[1] for t in all_times) 617 + start_str = ( 618 + datetime.combine(datetime.today(), earliest_start) 619 + .strftime("%I:%M %p") 620 + .lstrip("0") 621 + ) 622 + end_str = ( 623 + datetime.combine(datetime.today(), latest_end) 624 + .strftime("%I:%M %p") 625 + .lstrip("0") 626 + ) 627 + prompt_context["segment_start"] = start_str 628 + prompt_context["segment_end"] = end_str 629 + 630 + # Load prompt 631 + agent_prompt = load_prompt( 632 + agent_path.stem, base_dir=agent_path.parent, context=prompt_context 633 + ) 634 + prompt = agent_prompt.text 635 + 636 + # Determine output path 637 + is_json_output = output_format == "json" 638 + if output_path_override: 639 + output_path = Path(output_path_override) 640 + else: 641 + output_path = get_output_path( 642 + day_dir, name, segment=segment, output_format=output_format 643 + ) 644 + 645 + # Check if output exists (force check happens in cortex, but we handle it here too) 646 + output_exists = output_path.exists() and output_path.stat().st_size > 0 647 + 648 + # Determine cache settings 649 + if multi_segment_mode: 650 + cache_display_name = None 651 + elif segment: 652 + cache_display_name = f"{system_prompt_name}_{day}_{segment}" 653 + else: 654 + cache_display_name = f"{system_prompt_name}_{day}" 655 + 656 + # Extract generation parameters from metadata 657 + meta_thinking_budget = meta.get("thinking_budget") 658 + meta_max_output_tokens = meta.get("max_output_tokens") 659 + 660 + # Get API key 661 + api_key = os.getenv("GOOGLE_API_KEY", "") 662 + 663 + usage_data = None 664 + 665 + if output_exists and not force: 666 + # Load existing content (no LLM call) 667 + logging.info("Output exists, loading: %s", output_path) 668 + with open(output_path, "r") as f: 669 + result = f.read() 670 + else: 671 + # Generate new content 672 + if output_exists and force: 673 + logging.info("Force regenerating: %s", output_path) 674 + 675 + gen_result = generate_agent_output( 676 + markdown, 677 + prompt, 678 + api_key, 679 + cache_display_name=cache_display_name, 680 + name=name, 681 + json_output=is_json_output, 682 + system_instruction=system_instruction, 683 + thinking_budget=meta_thinking_budget, 684 + max_output_tokens=meta_max_output_tokens, 685 + return_result=True, 686 + ) 687 + result = gen_result["text"] 688 + usage_data = gen_result.get("usage") 689 + 690 + # Run post-processing hook if present 691 + if meta.get("hook_path"): 692 + hook_path = meta["hook_path"] 693 + try: 694 + hook_process = load_output_hook(hook_path) 695 + hook_context = { 696 + "day": day, 697 + "segment": segment, 698 + "multi_segment": multi_segment_mode, 699 + "name": name, 700 + "output_path": str(output_path), 701 + "meta": dict(meta), 702 + "transcript": markdown, 703 + } 704 + hook_result = hook_process(result, hook_context) 705 + if hook_result is not None: 706 + result = hook_result 707 + logging.info("Hook %s transformed result", hook_path) 708 + except Exception as exc: 709 + logging.error("Hook %s failed: %s", hook_path, exc) 710 + 711 + # Emit finish event with result (cortex handles file writing) 712 + finish_event = { 713 + "event": "finish", 714 + "ts": int(time.time() * 1000), 715 + "result": result, 716 + } 717 + if usage_data: 718 + finish_event["usage"] = usage_data 719 + 720 + emit_event(finish_event) 721 + 722 + # Log completion 723 + msg = f"generate {name} ok" 724 + if force: 725 + msg += " --force" 726 + day_log(day, msg) 727 + 728 + 729 + # ============================================================================= 730 + # Main Entry Point 731 + # ============================================================================= 732 + 733 + 289 734 async def main_async() -> None: 290 - """NDJSON-based CLI for agent backends.""" 735 + """NDJSON-based CLI for agents and generators. 291 736 737 + Routes based on config: 738 + - 'tools' field present -> tool-using agent (via provider) 739 + - 'output' field present (no 'tools') -> generator (transcript analysis) 740 + """ 292 741 parser = argparse.ArgumentParser( 293 742 description="solstone Agent CLI - Accepts NDJSON input via stdin" 294 743 ) ··· 317 766 # Parse NDJSON line - this is the complete merged config from Cortex 318 767 config = json.loads(line) 319 768 320 - # Validate prompt exists 321 - prompt = config.get("prompt") 322 - if not prompt: 323 - emit_event( 324 - { 325 - "event": "error", 326 - "error": "Missing 'prompt' field in NDJSON input", 327 - "ts": int(time.time() * 1000), 328 - } 329 - ) 330 - continue 769 + # Route based on config type 770 + has_tools = bool(config.get("tools")) 771 + has_output = bool(config.get("output")) 772 + 773 + if has_output and not has_tools: 774 + # Generator: transcript analysis without tools 775 + app_logger.debug(f"Processing generator: {config.get('name')}") 776 + _run_generator(config, emit_event) 777 + 778 + elif has_tools: 779 + # Tool-using agent: validate prompt exists 780 + prompt = config.get("prompt") 781 + if not prompt: 782 + emit_event( 783 + { 784 + "event": "error", 785 + "error": "Missing 'prompt' field for tool agent", 786 + "ts": int(time.time() * 1000), 787 + } 788 + ) 789 + continue 331 790 332 - # Extract provider to route to correct module 333 - from .providers import PROVIDER_REGISTRY, get_provider_module 791 + # Extract provider to route to correct module 792 + from .providers import PROVIDER_REGISTRY, get_provider_module 334 793 335 - provider = config.get("provider", "google") 794 + provider = config.get("provider", "google") 336 795 337 - # Set OpenAI key if needed 338 - if provider == "openai": 339 - api_key = os.getenv("OPENAI_API_KEY", "") 340 - if api_key: 341 - from agents import set_default_openai_key 796 + # Set OpenAI key if needed 797 + if provider == "openai": 798 + api_key = os.getenv("OPENAI_API_KEY", "") 799 + if api_key: 800 + from agents import set_default_openai_key 801 + 802 + set_default_openai_key(api_key) 803 + 804 + app_logger.debug(f"Processing agent: provider={provider}") 342 805 343 - set_default_openai_key(api_key) 806 + # Route to appropriate provider module 807 + if provider in PROVIDER_REGISTRY: 808 + provider_mod = get_provider_module(provider) 809 + else: 810 + # Explicit error for unknown providers 811 + valid = ", ".join(sorted(PROVIDER_REGISTRY.keys())) 812 + raise ValueError( 813 + f"Unknown provider: {provider!r}. Valid providers: {valid}" 814 + ) 344 815 345 - app_logger.debug(f"Processing request: provider={provider}") 816 + # Pass complete config to provider 817 + await provider_mod.run_agent( 818 + config=config, 819 + on_event=emit_event, 820 + ) 346 821 347 - # Route to appropriate provider module 348 - if provider in PROVIDER_REGISTRY: 349 - provider_mod = get_provider_module(provider) 350 822 else: 351 - # Explicit error for unknown providers 352 - valid = ", ".join(sorted(PROVIDER_REGISTRY.keys())) 353 - raise ValueError( 354 - f"Unknown provider: {provider!r}. Valid providers: {valid}" 823 + # Neither tools nor output - invalid config 824 + emit_event( 825 + { 826 + "event": "error", 827 + "error": "Invalid config: must have 'tools' or 'output' field", 828 + "ts": int(time.time() * 1000), 829 + } 355 830 ) 356 - 357 - # Pass complete config to provider 358 - await provider_mod.run_agent( 359 - config=config, 360 - on_event=emit_event, 361 - ) 362 831 363 832 except json.JSONDecodeError as e: 364 833 emit_event(

+27 -42

think/cortex.py

··· 355 355 else: 356 356 self.agent_handoffs.pop(agent_id, None) 357 357 358 - # Route based on config type: 359 - # - tools present -> agent (sol agents) 360 - # - output present (no tools) -> generator (sol generate) 361 - # - neither -> error 358 + # Validate config has either tools or output 362 359 has_tools = bool(config.get("tools")) 363 360 has_output = bool(config.get("output")) 364 361 362 + if not has_tools and not has_output: 363 + self.logger.error( 364 + f"Invalid agent config for {agent_id}: " 365 + "must have 'tools' or 'output' field" 366 + ) 367 + self._write_error_and_complete( 368 + file_path, 369 + "Invalid agent config: must have 'tools' or 'output' field", 370 + ) 371 + return 372 + 373 + # For tool agents: validate prompt and expand tool packs 365 374 if has_tools: 366 - # Agents require a prompt (generators don't - they use transcripts) 367 375 prompt = config.get("prompt") 368 376 if not prompt: 369 377 self.logger.error(f"Empty prompt in agent request {agent_id}") ··· 397 405 398 406 config["tools"] = expanded 399 407 400 - # Spawn the agent process with the merged config 401 - self._spawn_agent(agent_id, file_path, config) 402 - 403 - elif has_output: 404 - # Generator: has output format but no tools 405 - self._spawn_generator(agent_id, file_path, config) 406 - 407 - else: 408 - # Neither tools nor output - invalid config 409 - self.logger.error( 410 - f"Invalid agent config for {agent_id}: " 411 - "must have 'tools' or 'output' field" 412 - ) 413 - self._write_error_and_complete( 414 - file_path, 415 - "Invalid agent config: must have 'tools' or 'output' field", 416 - ) 408 + # Spawn agent process (handles both tool agents and generators) 409 + self._spawn_agent(agent_id, file_path, config) 417 410 418 411 except json.JSONDecodeError as e: 419 412 self.logger.error(f"Invalid JSON in request file {file_path}: {e}") ··· 428 421 file_path: Path, 429 422 config: Dict[str, Any], 430 423 ) -> None: 431 - """Spawn an agent subprocess and monitor its output using the merged config.""" 432 - if self.mcp_server_url and not config.get("disable_mcp", False): 433 - config.setdefault("mcp_server_url", self.mcp_server_url) 434 - self._spawn_subprocess(agent_id, file_path, config, ["sol", "agents"], "agent") 424 + """Spawn an agent subprocess and monitor its output. 435 425 436 - def _spawn_generator( 437 - self, 438 - agent_id: str, 439 - file_path: Path, 440 - config: Dict[str, Any], 441 - ) -> None: 442 - """Spawn a generator subprocess and monitor its output. 443 - 444 - Generators are like agents but process transcripts instead of using tools. 445 - They have 'output' field (format) but no 'tools' field. 426 + Handles both tool-using agents and generators - routing is done by 427 + the agents CLI based on config (tools vs output field). 446 428 """ 447 - self._spawn_subprocess( 448 - agent_id, file_path, config, ["sol", "generate"], "generator" 449 - ) 429 + # Only inject MCP server URL for tool agents (not generators) 430 + has_tools = bool(config.get("tools")) 431 + if has_tools and self.mcp_server_url and not config.get("disable_mcp", False): 432 + config.setdefault("mcp_server_url", self.mcp_server_url) 433 + 434 + self._spawn_subprocess(agent_id, file_path, config, ["sol", "agents"], "agent") 450 435 451 436 def _spawn_subprocess( 452 437 self, ··· 456 441 cmd: list[str], 457 442 process_type: str, 458 443 ) -> None: 459 - """Spawn a subprocess (agent or generator) and monitor its output. 444 + """Spawn a subprocess and monitor its output. 460 445 461 446 Args: 462 447 agent_id: Unique identifier for this process 463 448 file_path: Path to the JSONL log file 464 449 config: Configuration dict to pass via NDJSON stdin 465 - cmd: Command to run (e.g., ["sol", "agents"] or ["sol", "generate"]) 466 - process_type: Label for logging ("agent" or "generator") 450 + cmd: Command to run (e.g., ["sol", "agents"]) 451 + process_type: Label for logging ("agent") 467 452 """ 468 453 try: 469 454 # Store the config for later use - thread safe

+4 -5

think/dream.py

··· 16 16 day_input_summary, 17 17 day_log, 18 18 day_path, 19 - get_agents, 20 - get_generator_agents_by_schedule, 21 19 get_journal, 20 + get_muse_configs, 22 21 setup_cli, 23 22 ) 24 23 ··· 190 189 from think.cortex_client import get_agent_end_state 191 190 192 191 target_schedule = "segment" if segment else "daily" 193 - generators = get_generator_agents_by_schedule(target_schedule) 192 + generators = get_muse_configs(has_tools=False, has_output=True, schedule=target_schedule) 194 193 195 194 if not generators: 196 195 logging.info("No generators found for schedule: %s", target_schedule) ··· 311 310 if not check_callosum_available(): 312 311 logging.warning("Callosum socket not found - agents may fail to spawn") 313 312 314 - agents = get_agents() 313 + agents = get_muse_configs(has_tools=True) 315 314 316 315 # Group agents by priority 317 316 priority_groups: dict[int, list[tuple[str, dict]]] = {} ··· 469 468 Returns: 470 469 Number of agents spawned 471 470 """ 472 - agents = get_agents() 471 + agents = get_muse_configs(has_tools=True) 473 472 spawned = 0 474 473 475 474 for agent_name, config in agents.items():

-510

think/generate.py

··· 1 - # SPDX-License-Identifier: AGPL-3.0-only 2 - # Copyright (c) 2026 sol pbc 3 - 4 - """Generator pipeline for transcript analysis. 5 - 6 - Spawned by cortex when a request has 'output' field (no 'tools'). 7 - Reads NDJSON config from stdin, emits JSONL events to stdout. 8 - """ 9 - 10 - import json 11 - import logging 12 - import os 13 - import sys 14 - import time 15 - from collections.abc import Callable 16 - from datetime import datetime 17 - from pathlib import Path 18 - 19 - from google import genai 20 - from google.genai import types 21 - 22 - from think.agents import GenerateResult, JSONEventWriter 23 - from think.cluster import cluster, cluster_period, cluster_segments_multi 24 - from think.utils import ( 25 - compose_instructions, 26 - day_log, 27 - day_path, 28 - format_day, 29 - format_segment_times, 30 - get_generator_agents, 31 - get_output_path, 32 - load_output_hook, 33 - load_prompt, 34 - segment_parse, 35 - ) 36 - 37 - 38 - def scan_day(day: str) -> dict[str, list[str]]: 39 - """Return lists of processed and pending daily generator output files. 40 - 41 - Only scans daily generators (schedule='daily'). Segment generators are 42 - stored within segment directories and are not included here. 43 - """ 44 - from think.utils import get_generator_agents_by_schedule 45 - 46 - day_dir = day_path(day) 47 - daily_generators = get_generator_agents_by_schedule("daily", include_disabled=True) 48 - processed: list[str] = [] 49 - pending: list[str] = [] 50 - for key, meta in sorted(daily_generators.items()): 51 - output_format = meta.get("output") 52 - output_path = get_output_path(day_dir, key, output_format=output_format) 53 - if output_path.exists(): 54 - processed.append(os.path.join("agents", output_path.name)) 55 - else: 56 - pending.append(os.path.join("agents", output_path.name)) 57 - return {"processed": sorted(processed), "repairable": sorted(pending)} 58 - 59 - 60 - def _get_or_create_cache( 61 - client: genai.Client, 62 - model: str, 63 - display_name: str, 64 - transcript: str, 65 - system_instruction: str, 66 - ) -> str | None: 67 - """Return cache name for ``display_name`` or None if content too small. 68 - 69 - Creates cache with ``transcript`` and provided system instruction if needed. 70 - Returns None if content is below estimated 2048 token minimum (~10k chars). 71 - 72 - The cache contains the system instruction + transcript which are identical 73 - for all topics on the same day with the same system prompt, so display_name 74 - should include both day and system prompt name.""" 75 - 76 - MIN_CACHE_CHARS = 10000 # Heuristic: ~4 chars/token → 2048 tokens ≈ 8k-10k chars 77 - 78 - # Check existing caches first 79 - for c in client.caches.list(): 80 - if c.model == model and c.display_name == display_name: 81 - return c.name 82 - 83 - # Skip cache creation for small content 84 - if len(transcript) < MIN_CACHE_CHARS: 85 - return None 86 - 87 - cache = client.caches.create( 88 - model=model, 89 - config=types.CreateCachedContentConfig( 90 - display_name=display_name, 91 - system_instruction=system_instruction, 92 - contents=[transcript], 93 - ttl="1800s", # 30 minutes to accommodate multiple topic analyses 94 - ), 95 - ) 96 - return cache.name 97 - 98 - 99 - def generate_agent_output( 100 - transcript: str, 101 - prompt: str, 102 - api_key: str, 103 - cache_display_name: str | None = None, 104 - name: str | None = None, 105 - json_output: bool = False, 106 - system_instruction: str | None = None, 107 - thinking_budget: int | None = None, 108 - max_output_tokens: int | None = None, 109 - return_result: bool = False, 110 - ) -> str | GenerateResult: 111 - """Send clustered transcript to LLM for agent output generation. 112 - 113 - Args: 114 - transcript: Clustered transcript content (markdown format). 115 - prompt: Agent prompt text. 116 - api_key: Google API key for caching. 117 - cache_display_name: Optional cache key for Google content caching. 118 - Should include system prompt name for proper cache isolation. 119 - name: Agent name for token logging context. 120 - json_output: If True, request JSON response format. 121 - system_instruction: System instruction text. If None, loads default 122 - from journal.md via compose_instructions(). 123 - thinking_budget: Token budget for model thinking. If None, uses default. 124 - max_output_tokens: Maximum output tokens. If None, uses default. 125 - return_result: If True, return full GenerateResult with usage data. 126 - 127 - Returns: 128 - Generated agent output content (markdown or JSON string), or 129 - GenerateResult dict if return_result=True. 130 - """ 131 - from think.models import generate_with_result, resolve_provider 132 - 133 - # Use provided system_instruction or fall back to default 134 - if system_instruction is None: 135 - instructions = compose_instructions(include_datetime=False) 136 - system_instruction = instructions["system_instruction"] 137 - 138 - # Use defaults if not specified 139 - if thinking_budget is None: 140 - thinking_budget = 8192 * 3 141 - if max_output_tokens is None: 142 - max_output_tokens = 8192 * 6 143 - 144 - # Build context for provider routing and token logging 145 - output_type = "json" if json_output else "markdown" 146 - context = f"agent.{name}.{output_type}" if name else "agent.unknown" 147 - 148 - # Try to use cache if display name provided 149 - # Note: caching is Google-specific, so we check provider first 150 - provider, model = resolve_provider(context) 151 - 152 - client = None 153 - cache_name = None 154 - if cache_display_name and provider == "google": 155 - client = genai.Client(api_key=api_key) 156 - cache_name = _get_or_create_cache( 157 - client, model, cache_display_name, transcript, system_instruction 158 - ) 159 - 160 - if cache_name: 161 - # Cache hit: content already in cache, just send prompt. 162 - # Google-specific params (cached_content, client) are passed via kwargs. 163 - result = generate_with_result( 164 - contents=[prompt], 165 - context=context, 166 - temperature=0.3, 167 - max_output_tokens=max_output_tokens, 168 - thinking_budget=thinking_budget, 169 - model=model, 170 - cached_content=cache_name, 171 - client=client, 172 - json_output=json_output, 173 - ) 174 - else: 175 - # No cache: use unified generate() 176 - result = generate_with_result( 177 - contents=[transcript, prompt], 178 - context=context, 179 - temperature=0.3, 180 - max_output_tokens=max_output_tokens, 181 - thinking_budget=thinking_budget, 182 - system_instruction=system_instruction, 183 - json_output=json_output, 184 - ) 185 - 186 - if return_result: 187 - return result 188 - return result["text"] 189 - 190 - 191 - # Minimum content length for insight generation 192 - MIN_INPUT_CHARS = 50 193 - 194 - 195 - def _run_generator(config: dict, emit_event: Callable[[dict], None]) -> None: 196 - """Execute generator pipeline with config from cortex. 197 - 198 - Args: 199 - config: Merged config from cortex containing: 200 - - name: Generator key (e.g., 'activity', 'chat:sentiment') 201 - - day: Day in YYYYMMDD format 202 - - segment: Optional single segment key 203 - - segments: Optional list of segment keys 204 - - output: Output format ('md' or 'json') 205 - - output_path: Optional custom output path 206 - - force: Whether to regenerate existing output 207 - - provider: AI provider 208 - - model: Model name 209 - emit_event: Callback to emit JSONL events 210 - """ 211 - name = config.get("name", "default") 212 - day = config.get("day") 213 - segment = config.get("segment") 214 - segments = config.get("segments") # List of segment keys 215 - output_format = config.get("output", "md") 216 - output_path_override = config.get("output_path") 217 - force = config.get("force", False) 218 - provider = config.get("provider", "google") 219 - model = config.get("model") 220 - 221 - if not day: 222 - raise ValueError("Missing 'day' field in generator config") 223 - 224 - # Emit start event 225 - emit_event( 226 - { 227 - "event": "start", 228 - "ts": int(time.time() * 1000), 229 - "prompt": "", # Generators don't have user prompts 230 - "name": name, 231 - "model": model or "unknown", 232 - "provider": provider, 233 - } 234 - ) 235 - 236 - # Set segment key for token usage logging 237 - if segment: 238 - os.environ["SEGMENT_KEY"] = segment 239 - elif segments: 240 - os.environ["SEGMENT_KEY"] = segments[0] 241 - 242 - # Load generator metadata 243 - all_generators = get_generator_agents() 244 - if name in all_generators: 245 - meta = all_generators[name] 246 - agent_path = Path(meta["path"]) 247 - else: 248 - raise ValueError(f"Generator not found: {name}") 249 - 250 - # Check if generator is disabled 251 - if meta.get("disabled"): 252 - logging.info("Generator %s is disabled, skipping", name) 253 - emit_event( 254 - { 255 - "event": "finish", 256 - "ts": int(time.time() * 1000), 257 - "result": "", 258 - "skipped": "disabled", 259 - } 260 - ) 261 - return 262 - 263 - # Extract instructions config for source filtering and system prompt 264 - instructions_config = meta.get("instructions") 265 - instructions = compose_instructions( 266 - include_datetime=False, 267 - config_overrides=instructions_config, 268 - ) 269 - sources = instructions.get("sources") 270 - system_prompt_name = instructions.get("system_prompt_name", "journal") 271 - system_instruction = instructions["system_instruction"] 272 - 273 - # Track multi-segment mode 274 - multi_segment_mode = bool(segments) 275 - 276 - # Build transcript via clustering 277 - if segments: 278 - markdown, file_count = cluster_segments_multi(day, segments, sources=sources) 279 - elif segment: 280 - markdown, file_count = cluster_period(day, segment, sources=sources) 281 - else: 282 - markdown, file_count = cluster(day, sources=sources) 283 - 284 - day_dir = str(day_path(day)) 285 - 286 - # Skip generation when there's nothing to analyze 287 - if file_count == 0 or len(markdown.strip()) < MIN_INPUT_CHARS: 288 - logging.info( 289 - "Insufficient input (files=%d, chars=%d), skipping", 290 - file_count, 291 - len(markdown.strip()), 292 - ) 293 - emit_event( 294 - { 295 - "event": "finish", 296 - "ts": int(time.time() * 1000), 297 - "result": "", 298 - "skipped": "no_input", 299 - } 300 - ) 301 - day_log(day, f"generate {name} skipped (no input)") 302 - return 303 - 304 - # Prepend input context note for limited recordings 305 - if file_count < 3: 306 - input_note = ( 307 - "**Input Note:** Limited recordings for this day. " 308 - "Scale analysis to available input.\n\n" 309 - ) 310 - markdown = input_note + markdown 311 - 312 - # Build context for template substitution 313 - prompt_context: dict[str, str] = { 314 - "day": day, 315 - "date": format_day(day), 316 - } 317 - 318 - # Add segment context 319 - if segment: 320 - start_str, end_str = format_segment_times(segment) 321 - if start_str and end_str: 322 - prompt_context["segment"] = segment 323 - prompt_context["segment_start"] = start_str 324 - prompt_context["segment_end"] = end_str 325 - elif segments: 326 - all_times = [] 327 - for seg in segments: 328 - start_time, end_time = segment_parse(seg) 329 - if start_time and end_time: 330 - all_times.append((start_time, end_time)) 331 - 332 - if all_times: 333 - earliest_start = min(t[0] for t in all_times) 334 - latest_end = max(t[1] for t in all_times) 335 - start_str = ( 336 - datetime.combine(datetime.today(), earliest_start) 337 - .strftime("%I:%M %p") 338 - .lstrip("0") 339 - ) 340 - end_str = ( 341 - datetime.combine(datetime.today(), latest_end) 342 - .strftime("%I:%M %p") 343 - .lstrip("0") 344 - ) 345 - prompt_context["segment_start"] = start_str 346 - prompt_context["segment_end"] = end_str 347 - 348 - # Load prompt 349 - agent_prompt = load_prompt( 350 - agent_path.stem, base_dir=agent_path.parent, context=prompt_context 351 - ) 352 - prompt = agent_prompt.text 353 - 354 - # Determine output path 355 - is_json_output = output_format == "json" 356 - if output_path_override: 357 - output_path = Path(output_path_override) 358 - else: 359 - output_path = get_output_path( 360 - day_dir, name, segment=segment, output_format=output_format 361 - ) 362 - 363 - # Check if output exists (force check happens in cortex, but we handle it here too) 364 - output_exists = output_path.exists() and output_path.stat().st_size > 0 365 - 366 - # Determine cache settings 367 - if multi_segment_mode: 368 - cache_display_name = None 369 - elif segment: 370 - cache_display_name = f"{system_prompt_name}_{day}_{segment}" 371 - else: 372 - cache_display_name = f"{system_prompt_name}_{day}" 373 - 374 - # Extract generation parameters from metadata 375 - meta_thinking_budget = meta.get("thinking_budget") 376 - meta_max_output_tokens = meta.get("max_output_tokens") 377 - 378 - # Get API key 379 - api_key = os.getenv("GOOGLE_API_KEY", "") 380 - 381 - usage_data = None 382 - 383 - if output_exists and not force: 384 - # Load existing content (no LLM call) 385 - logging.info("Output exists, loading: %s", output_path) 386 - with open(output_path, "r") as f: 387 - result = f.read() 388 - else: 389 - # Generate new content 390 - if output_exists and force: 391 - logging.info("Force regenerating: %s", output_path) 392 - 393 - gen_result = generate_agent_output( 394 - markdown, 395 - prompt, 396 - api_key, 397 - cache_display_name=cache_display_name, 398 - name=name, 399 - json_output=is_json_output, 400 - system_instruction=system_instruction, 401 - thinking_budget=meta_thinking_budget, 402 - max_output_tokens=meta_max_output_tokens, 403 - return_result=True, 404 - ) 405 - result = gen_result["text"] 406 - usage_data = gen_result.get("usage") 407 - 408 - # Run post-processing hook if present 409 - if meta.get("hook_path"): 410 - hook_path = meta["hook_path"] 411 - try: 412 - hook_process = load_output_hook(hook_path) 413 - hook_context = { 414 - "day": day, 415 - "segment": segment, 416 - "multi_segment": multi_segment_mode, 417 - "name": name, 418 - "output_path": str(output_path), 419 - "meta": dict(meta), 420 - "transcript": markdown, 421 - } 422 - hook_result = hook_process(result, hook_context) 423 - if hook_result is not None: 424 - result = hook_result 425 - logging.info("Hook %s transformed result", hook_path) 426 - except Exception as exc: 427 - logging.error("Hook %s failed: %s", hook_path, exc) 428 - 429 - # Emit finish event with result (cortex handles file writing) 430 - finish_event = { 431 - "event": "finish", 432 - "ts": int(time.time() * 1000), 433 - "result": result, 434 - } 435 - if usage_data: 436 - finish_event["usage"] = usage_data 437 - 438 - emit_event(finish_event) 439 - 440 - # Log completion 441 - msg = f"generate {name} ok" 442 - if force: 443 - msg += " --force" 444 - day_log(day, msg) 445 - 446 - 447 - def main() -> None: 448 - """NDJSON-based CLI for generator pipeline. 449 - 450 - Reads config from stdin, emits JSONL events to stdout. 451 - Spawned by cortex when request has 'output' field (no 'tools'). 452 - """ 453 - import traceback 454 - 455 - # Configure basic logging (no argparse needed for NDJSON mode) 456 - logging.basicConfig(level=logging.INFO) 457 - 458 - # Always write to stdout only 459 - event_writer = JSONEventWriter(None) 460 - 461 - def emit_event(data: dict) -> None: 462 - if "ts" not in data: 463 - data["ts"] = int(time.time() * 1000) 464 - event_writer.emit(data) 465 - 466 - try: 467 - # NDJSON input mode from stdin 468 - for line in sys.stdin: 469 - line = line.strip() 470 - if not line: 471 - continue 472 - 473 - try: 474 - config = json.loads(line) 475 - 476 - _run_generator(config, emit_event) 477 - 478 - except json.JSONDecodeError as e: 479 - emit_event( 480 - { 481 - "event": "error", 482 - "error": f"Invalid JSON: {str(e)}", 483 - "ts": int(time.time() * 1000), 484 - } 485 - ) 486 - except Exception as e: 487 - emit_event( 488 - { 489 - "event": "error", 490 - "error": str(e), 491 - "trace": traceback.format_exc(), 492 - "ts": int(time.time() * 1000), 493 - } 494 - ) 495 - 496 - except Exception as exc: 497 - emit_event( 498 - { 499 - "event": "error", 500 - "error": str(exc), 501 - "trace": traceback.format_exc(), 502 - } 503 - ) 504 - raise 505 - finally: 506 - event_writer.close() 507 - 508 - 509 - if __name__ == "__main__": 510 - main()

+1 -1

think/journal_stats.py

··· 12 12 13 13 from observe.sense import scan_day as sense_scan_day 14 14 from observe.utils import VIDEO_EXTENSIONS, load_analysis_frames 15 - from think.generate import scan_day as generate_scan_day 15 + from think.agents import scan_day as generate_scan_day 16 16 from think.utils import day_dirs, get_journal, setup_cli 17 17 18 18 logger = logging.getLogger(__name__)

+92 -122

think/utils.py

··· 900 900 return process_func 901 901 902 902 903 - def get_generator_agents() -> dict[str, dict[str, object]]: 904 - """Return available generator agents with metadata and config overrides. 903 + def get_muse_configs( 904 + *, 905 + has_tools: bool | None = None, 906 + has_output: bool | None = None, 907 + schedule: str | None = None, 908 + include_disabled: bool = False, 909 + ) -> dict[str, dict[str, Any]]: 910 + """Load muse configs from system and app directories. 905 911 906 - Scans both system generators (muse/) and app generators (apps/*/muse/). 907 - Generators are identified by having a "schedule" field but no "tools" field 908 - in frontmatter (tool agents have tools, generators don't). 909 - 910 - Each key is the generator name: 911 - - System: "activity", "meetings" 912 - - App: "app:topic" (e.g., "chat:sentiment") 912 + Unified function for loading both tool-using agents and generators from 913 + muse/*.md files. Filters based on presence of tools/output fields. 913 914 914 - The value contains the ``path`` to the ``.md`` file, the ``color`` 915 - from the frontmatter, the file ``mtime``, a ``source`` field 916 - ("system" or "app"), and any keys loaded from the JSON frontmatter. 915 + Args: 916 + has_tools: If True, only configs with "tools" field (agents). 917 + If False, only configs without "tools" field. 918 + If None, no filtering on tools presence. 919 + has_output: If True, only configs with "output" field (generators). 920 + If False, only configs without "output" field. 921 + If None, no filtering on output presence. 922 + schedule: If provided, only configs where schedule matches this value 923 + (e.g., "segment", "daily"). 924 + include_disabled: If True, include configs with disabled=True. 925 + Default False (for processing pipelines). 917 926 918 - Journal config overrides (from config/journal.json "agents" section) 919 - are merged in, allowing ``disabled`` and ``extract`` to be 920 - overridden per generator. 927 + Returns: 928 + Dictionary mapping config keys to their metadata including: 929 + - path: Path to the .md file 930 + - source: "system" or "app" 931 + - app: App name (only for app configs) 932 + - All fields from frontmatter 921 933 """ 922 - generators: dict[str, dict[str, object]] = {} 934 + configs: dict[str, dict[str, Any]] = {} 935 + 936 + def matches_filter(info: dict) -> bool: 937 + """Check if config matches the filter criteria.""" 938 + # Check has_tools filter 939 + if has_tools is True and "tools" not in info: 940 + return False 941 + if has_tools is False and "tools" in info: 942 + return False 923 943 924 - # System generators from muse/ (have "schedule" but no "tools") 944 + # Check has_output filter 945 + if has_output is True and "output" not in info: 946 + return False 947 + if has_output is False and "output" in info: 948 + return False 949 + 950 + # Check specific schedule value 951 + if schedule is not None and info.get("schedule") != schedule: 952 + return False 953 + 954 + # Check disabled status 955 + if not include_disabled and info.get("disabled", False): 956 + return False 957 + 958 + return True 959 + 960 + # System configs from muse/ 925 961 if MUSE_DIR.is_dir(): 926 962 for md_path in sorted(MUSE_DIR.glob("*.md")): 927 963 name = md_path.stem 928 964 info = _load_prompt_metadata(md_path) 929 - # Generators have schedule but no tools (tool agents have tools) 930 - if "tools" in info or "schedule" not in info: 965 + 966 + if not matches_filter(info): 931 967 continue 968 + 932 969 info["source"] = "system" 933 - generators[name] = info 970 + 971 + # For tool agents, load full config via get_agent() 972 + if "tools" in info: 973 + try: 974 + config = get_agent(name) 975 + config["title"] = config.get("title", name) 976 + config["source"] = "system" 977 + configs[name] = config 978 + except Exception: 979 + pass # Skip configs that can't be loaded 980 + else: 981 + configs[name] = info 934 982 935 - # App generators from apps/*/muse/ 983 + # App configs from apps/*/muse/ 936 984 apps_dir = Path(__file__).parent.parent / "apps" 937 985 if apps_dir.is_dir(): 938 986 for app_path in sorted(apps_dir.iterdir()): ··· 943 991 continue 944 992 app_name = app_path.name 945 993 for md_path in sorted(app_muse_dir.glob("*.md")): 994 + item_name = md_path.stem 946 995 info = _load_prompt_metadata(md_path) 947 - # Generators have schedule but no tools (tool agents have tools) 948 - if "tools" in info or "schedule" not in info: 996 + 997 + if not matches_filter(info): 949 998 continue 950 - topic = md_path.stem 951 - key = f"{app_name}:{topic}" 999 + 1000 + key = f"{app_name}:{item_name}" 952 1001 info["source"] = "app" 953 1002 info["app"] = app_name 954 - generators[key] = info 1003 + 1004 + # For tool agents, load full config via get_agent() 1005 + if "tools" in info: 1006 + try: 1007 + config = get_agent(key) 1008 + config["title"] = config.get("title", item_name) 1009 + config["source"] = "app" 1010 + config["app"] = app_name 1011 + configs[key] = config 1012 + except Exception: 1013 + pass # Skip configs that can't be loaded 1014 + else: 1015 + configs[key] = info 955 1016 956 - # Merge journal config overrides 1017 + # Merge journal config overrides (applies to generators) 957 1018 overrides = get_config().get("agents", {}) 958 1019 for key, override in overrides.items(): 959 - if key in generators and isinstance(override, dict): 960 - # Only merge known override fields 1020 + if key in configs and isinstance(override, dict): 961 1021 if "disabled" in override: 962 - generators[key]["disabled"] = override["disabled"] 1022 + configs[key]["disabled"] = override["disabled"] 963 1023 if "extract" in override: 964 - generators[key]["extract"] = override["extract"] 1024 + configs[key]["extract"] = override["extract"] 965 1025 966 - return generators 967 - 968 - 969 - def get_generator_agents_by_schedule( 970 - schedule: str, 971 - *, 972 - include_disabled: bool = False, 973 - ) -> dict[str, dict[str, object]]: 974 - """Return generator agents matching the given schedule. 975 - 976 - Args: 977 - schedule: Target schedule (e.g., "segment" or "daily"). 978 - include_disabled: If True, include disabled generators (for settings UI). 979 - Default False (for processing pipelines). 980 - 981 - Returns: 982 - Dict of generator_key -> metadata for generators where schedule matches. 983 - """ 984 - all_generators = get_generator_agents() 985 - result: dict[str, dict[str, object]] = {} 986 - 987 - for key, meta in all_generators.items(): 988 - if not include_disabled and meta.get("disabled", False): 989 - continue 990 - if meta.get("schedule") == schedule: 991 - result[key] = meta 992 - 993 - return result 1026 + return configs 994 1027 995 1028 996 1029 def _resolve_agent_path(name: str) -> tuple[Path, str]: ··· 1355 1388 mime = "application/octet-stream" 1356 1389 1357 1390 return rel, mime, meta 1358 - 1359 - 1360 - def get_agents() -> dict[str, dict[str, Any]]: 1361 - """Load agent metadata from system and app directories. 1362 - 1363 - Scans both system agents (muse/) and app agents (apps/*/muse/). 1364 - Agents are identified by having a "tools" field in frontmatter 1365 - (generators have schedule but no tools). 1366 - System agents use simple keys like "default", while app agents are 1367 - namespaced as "app:agent" (e.g., "chat:helper"). 1368 - 1369 - Returns: 1370 - Dictionary mapping agent keys to their metadata including: 1371 - - title: Display title for the agent 1372 - - source: "system" or "app" 1373 - - app: App name (only for app agents) 1374 - - All configuration fields from get_agent() 1375 - """ 1376 - agents = {} 1377 - 1378 - # System agents from muse/ (identified by having "tools" field) 1379 - if MUSE_DIR.exists(): 1380 - for md_path in sorted(MUSE_DIR.glob("*.md")): 1381 - agent_id = md_path.stem 1382 - try: 1383 - # Quick check: load frontmatter to filter out generators 1384 - post = frontmatter.load(md_path) 1385 - if not post.metadata or "tools" not in post.metadata: 1386 - continue # This is an insight or hook, not an agent 1387 - config = get_agent(agent_id) 1388 - config["title"] = config.get("title", agent_id) 1389 - config["source"] = "system" 1390 - agents[agent_id] = config 1391 - except Exception: 1392 - pass # Skip agents that can't be loaded 1393 - 1394 - # App agents from apps/*/muse/ 1395 - apps_dir = Path(__file__).parent.parent / "apps" 1396 - if apps_dir.is_dir(): 1397 - for app_path in sorted(apps_dir.iterdir()): 1398 - if not app_path.is_dir() or app_path.name.startswith("_"): 1399 - continue 1400 - muse_dir = app_path / "muse" 1401 - if not muse_dir.is_dir(): 1402 - continue 1403 - app_name = app_path.name 1404 - for md_path in sorted(muse_dir.glob("*.md")): 1405 - agent_name = md_path.stem 1406 - try: 1407 - # Quick check: load frontmatter to filter out generators 1408 - post = frontmatter.load(md_path) 1409 - if not post.metadata or "tools" not in post.metadata: 1410 - continue # This is an insight or hook, not an agent 1411 - key = f"{app_name}:{agent_name}" 1412 - config = get_agent(key) 1413 - config["title"] = config.get("title", agent_name) 1414 - config["source"] = "app" 1415 - config["app"] = app_name 1416 - agents[key] = config 1417 - except Exception: 1418 - pass # Skip agents that can't be loaded 1419 - 1420 - return agents

Configure Feed

Configure Feed