Add pre-hook framework for modifying inputs before LLM calls

+27 -13

docs/APPS.md

··· 275 275 276 276 **Event extraction via hooks:** To extract structured events from generator output, use the `hook` field: 277 277 278 - - `"hook": "occurrence"` - Extracts past events to `facets/{facet}/events/{day}.jsonl` 279 - - `"hook": "anticipation"` - Extracts future scheduled events 278 + - `"hook": {"post": "occurrence"}` - Extracts past events to `facets/{facet}/events/{day}.jsonl` 279 + - `"hook": {"post": "anticipation"}` - Extracts future scheduled events 280 280 281 281 The `occurrences` field (optional string) provides topic-specific extraction guidance when using the occurrence hook. Example: 282 282 ··· 284 284 { 285 285 "title": "Meeting Summary", 286 286 "schedule": "daily", 287 - "hook": "occurrence", 287 + "hook": {"post": "occurrence"}, 288 288 "occurrences": "Each meeting should generate an occurrence with start and end times, participants, and summary." 289 289 } 290 290 ``` ··· 293 293 294 294 **Template variables:** Generator prompts can use template variables like `$name`, `$preferred`, `$daily_preamble`, and context variables like `$day` and `$date`. See [PROMPT_TEMPLATES.md](PROMPT_TEMPLATES.md) for the complete template system documentation. 295 295 296 - **Custom hooks:** Generators also support custom `.py` hooks for transforming output programmatically: 296 + **Custom hooks:** Both generators and tool-using agents support custom `.py` hooks for transforming inputs and outputs programmatically. Hooks support both pre-processing (before LLM call) and post-processing (after LLM call): 297 297 298 - - Create `{topic}.py` alongside `{topic}.md` for co-located hooks 299 - - Or use `"hook": "my_hook"` to reference `muse/my_hook.py` 300 - - Hook must define a `process(result, context)` function 298 + **Hook configuration:** 299 + - Use `"hook": {"pre": "my_hook"}` for pre-processing hooks 300 + - Use `"hook": {"post": "my_hook"}` for post-processing hooks 301 + - Use both together: `"hook": {"pre": "prep", "post": "process"}` 302 + - Resolution: `"name"` → `muse/{name}.py`, `"app:name"` → `apps/{app}/muse/{name}.py`, or explicit path 303 + 304 + **Pre-hooks** (`pre_process`): Modify inputs before the LLM call 305 + - `context` is a `PreHookContext` with: `name`, `agent_id`, `provider`, `model`, `prompt`, `system_instruction`, `user_instruction`, `extra_context`, `output_format`, `meta`, and for generators: `day`, `segment`, `span`, `transcript`, `output_path` 306 + - Return a dict of modified fields to merge back (e.g., `{"prompt": "modified"}`) 307 + - Return `None` for no changes 308 + 309 + **Post-hooks** (`post_process`): Transform output after the LLM call 301 310 - `result` is the LLM output (markdown or JSON string) 302 - - `context` dict contains: `day`, `segment`, `span`, `name`, `output_path`, `meta`, `transcript` 311 + - `context` is a `HookContext` with: `name`, `agent_id`, `provider`, `model`, `prompt`, `output_format`, `meta`, and for generators: `day`, `segment`, `span`, `transcript`, `output_path` 303 312 - Return modified string, or `None` to use original result 304 - - Hook errors are logged but don't crash the pipeline (falls back to original) 313 + 314 + Hook errors are logged but don't crash the pipeline (graceful degradation). 305 315 306 316 ```python 307 - # muse/my_generator.py 308 - def process(result: str, context: dict) -> str | None: 309 - # Transform, validate, or emit side effects 317 + # muse/my_hook.py 318 + def pre_process(context: dict) -> dict | None: 319 + # Modify inputs before LLM call 320 + return {"prompt": context["prompt"] + "\n\nBe concise."} 321 + 322 + def post_process(result: str, context: dict) -> str | None: 323 + # Transform output after LLM call 310 324 return result + "\n\n## Generated by hook" 311 325 ``` 312 326 ··· 314 328 - System generator templates: `muse/*.md` (files with `schedule` field but no `tools` field) 315 329 - Extraction hooks: `muse/occurrence.py`, `muse/anticipation.py` 316 330 - Discovery logic: `think/utils.py` - `get_muse_configs(has_tools=False)`, `get_output_topic()` 317 - - Hook loading: `think/utils.py` - `load_output_hook()` 331 + - Hook loading: `think/agents.py` - `load_pre_hook()`, `load_post_hook()` 318 332 319 333 --- 320 334

+10 -16

tests/test_generate_full.py

··· 125 125 muse_dir = Path(mod.__file__).resolve().parent.parent / "muse" 126 126 hook_file = muse_dir / "test_hook.py" 127 127 hook_file.write_text(""" 128 - def process(result, context): 128 + def post_process(result, context): 129 129 import json 130 130 from pathlib import Path 131 131 # Write context to file for test verification ··· 144 144 return None 145 145 """) 146 146 147 - # Create generator with hook 147 + # Create generator with hook (new format) 148 148 test_generator = muse_dir / "hooked_gen.md" 149 149 test_generator.write_text( 150 - '{\n "title": "Hooked",\n "schedule": "daily",\n "output": "md",\n "hook": "test_hook"\n}\n\nTest prompt' 150 + '{\n "title": "Hooked",\n "schedule": "daily",\n "output": "md",\n "hook": {"post": "test_hook"}\n}\n\nTest prompt' 151 151 ) 152 152 153 153 try: ··· 300 300 301 301 302 302 def test_named_hook_resolution(tmp_path, monkeypatch): 303 - """Test that named hooks are resolved from muse/{hook}.py.""" 304 - utils = importlib.import_module("think.utils") 305 - 306 - # Create generator with named hook 307 - generator_file = tmp_path / "test_generator.md" 308 - generator_file.write_text( 309 - '{\n "title": "Test",\n "hook": "occurrence"\n}\n\nTest prompt' 310 - ) 303 + """Test that named hooks are resolved via load_post_hook.""" 304 + agents = importlib.import_module("think.agents") 311 305 312 - meta = utils._load_prompt_metadata(generator_file) 306 + # Config with named hook (new format) 307 + config = {"hook": {"post": "occurrence"}} 308 + hook_fn = agents.load_post_hook(config) 313 309 314 - # Should resolve to muse/occurrence.py 315 - assert "hook_path" in meta 316 - assert meta["hook_path"].endswith("occurrence.py") 317 - assert "muse/occurrence.py" in meta["hook_path"].replace("\\", "/") 310 + # Should resolve to muse/occurrence.py and be callable 311 + assert callable(hook_fn)

+10 -3

tests/test_muse_cli.py

··· 65 65 """Property tags show output, tools, hook.""" 66 66 assert _property_tags({"output": "md"}) == "output:md" 67 67 assert _property_tags({"tools": "journal, todo"}) == "tools:journal, todo" 68 - assert _property_tags({"hook": "occurrence"}) == "hook:occurrence" 68 + 69 + # New dict-based hook format 70 + assert _property_tags({"hook": {"post": "occurrence"}}) == "hook:post=occurrence" 71 + assert _property_tags({"hook": {"pre": "prep"}}) == "hook:pre=prep" 72 + assert ( 73 + _property_tags({"hook": {"pre": "prep", "post": "process"}}) 74 + == "hook:pre=prep,post=process" 75 + ) 69 76 70 - tags = _property_tags({"output": "md", "hook": "occurrence"}) 77 + tags = _property_tags({"output": "md", "hook": {"post": "occurrence"}}) 71 78 assert "output:md" in tags 72 - assert "hook:occurrence" in tags 79 + assert "hook:post=occurrence" in tags 73 80 74 81 assert _property_tags({}) == "" 75 82 assert "disabled" in _property_tags({"disabled": True})

+405 -69

tests/test_output_hooks.py

··· 4 4 """Tests for the generator output hooks system. 5 5 6 6 Tests cover: 7 - - Hook loading and validation 7 + - Hook loading and validation via load_post_hook 8 8 - Hook invocation via NDJSON protocol 9 9 - Hook error handling 10 10 """ ··· 62 62 return events 63 63 64 64 65 - def test_load_output_hook_success(tmp_path): 66 - """Test loading a valid hook with process function.""" 67 - utils = importlib.import_module("think.utils") 65 + def test_load_post_hook_success(tmp_path): 66 + """Test loading a valid hook with post_process function.""" 67 + agents = importlib.import_module("think.agents") 68 68 69 69 hook_file = tmp_path / "test_hook.py" 70 70 hook_file.write_text(""" 71 - def process(result, context): 71 + def post_process(result, context): 72 72 return result + "\\n\\n## Added by hook" 73 73 """) 74 74 75 - process_func = utils.load_output_hook(hook_file) 76 - assert callable(process_func) 75 + # Config with explicit path 76 + config = {"hook": {"post": str(hook_file)}} 77 + hook_fn = agents.load_post_hook(config) 78 + assert callable(hook_fn) 77 79 78 80 # Test the hook transforms content 79 - output = process_func("Original", {"day": "20240101"}) 81 + output = hook_fn("Original", {"day": "20240101"}) 80 82 assert output == "Original\n\n## Added by hook" 81 83 82 84 83 - def test_load_output_hook_missing_process(tmp_path): 84 - """Test that hook without process function raises ValueError.""" 85 - utils = importlib.import_module("think.utils") 85 + def test_load_post_hook_missing_post_process(tmp_path): 86 + """Test that hook without post_process function raises ValueError.""" 87 + agents = importlib.import_module("think.agents") 86 88 87 89 hook_file = tmp_path / "bad_hook.py" 88 90 hook_file.write_text(""" ··· 90 92 pass 91 93 """) 92 94 95 + config = {"hook": {"post": str(hook_file)}} 93 96 try: 94 - utils.load_output_hook(hook_file) 97 + agents.load_post_hook(config) 95 98 assert False, "Should have raised ValueError" 96 99 except ValueError as e: 97 - assert "must define a 'process' function" in str(e) 100 + assert "must define a 'post_process' function" in str(e) 98 101 99 102 100 - def test_load_output_hook_process_not_callable(tmp_path): 101 - """Test that hook with non-callable process raises ValueError.""" 102 - utils = importlib.import_module("think.utils") 103 + def test_load_post_hook_not_callable(tmp_path): 104 + """Test that hook with non-callable post_process raises ValueError.""" 105 + agents = importlib.import_module("think.agents") 103 106 104 107 hook_file = tmp_path / "bad_hook.py" 105 108 hook_file.write_text(""" 106 - process = "not a function" 109 + post_process = "not a function" 107 110 """) 108 111 112 + config = {"hook": {"post": str(hook_file)}} 109 113 try: 110 - utils.load_output_hook(hook_file) 114 + agents.load_post_hook(config) 111 115 assert False, "Should have raised ValueError" 112 116 except ValueError as e: 113 - assert "'process' must be callable" in str(e) 117 + assert "'post_process' must be callable" in str(e) 114 118 115 119 116 - def test_prompt_metadata_includes_hook_path(tmp_path): 117 - """Test that _load_prompt_metadata detects .py hook file.""" 118 - utils = importlib.import_module("think.utils") 120 + def test_load_post_hook_no_hook_config(): 121 + """Test that missing hook config returns None.""" 122 + agents = importlib.import_module("think.agents") 119 123 120 - # Create prompt file with frontmatter 121 - md_file = tmp_path / "test_generator.md" 122 - md_file.write_text('{\n "title": "Test",\n "color": "#ff0000"\n}\n\nTest prompt') 124 + assert agents.load_post_hook({}) is None 125 + assert agents.load_post_hook({"hook": {}}) is None 126 + assert agents.load_post_hook({"hook": {"pre": "something"}}) is None 127 + 128 + 129 + def test_load_post_hook_named_resolution(): 130 + """Test that named hooks resolve to muse/{name}.py.""" 131 + agents = importlib.import_module("think.agents") 132 + 133 + # occurrence.py exists in muse/ 134 + config = {"hook": {"post": "occurrence"}} 135 + hook_fn = agents.load_post_hook(config) 136 + assert callable(hook_fn) 123 137 124 - hook_file = tmp_path / "test_generator.py" 125 - hook_file.write_text("def process(r, c): return r") 126 138 127 - meta = utils._load_prompt_metadata(md_file) 139 + def test_load_post_hook_file_not_found(tmp_path): 140 + """Test that nonexistent hook file raises ImportError.""" 141 + agents = importlib.import_module("think.agents") 128 142 129 - assert meta["path"] == str(md_file) 130 - assert meta["hook_path"] == str(hook_file) 131 - assert meta["title"] == "Test" 143 + config = {"hook": {"post": str(tmp_path / "nonexistent.py")}} 144 + try: 145 + agents.load_post_hook(config) 146 + assert False, "Should have raised ImportError" 147 + except ImportError as e: 148 + assert "not found" in str(e) 132 149 133 150 134 - def test_prompt_metadata_no_hook(tmp_path): 135 - """Test that _load_prompt_metadata works without hook file.""" 151 + def test_prompt_metadata_no_hook_path(tmp_path): 152 + """Test that _load_prompt_metadata no longer sets hook_path.""" 136 153 utils = importlib.import_module("think.utils") 137 154 138 155 md_file = tmp_path / "test_generator.md" 139 - md_file.write_text("Test prompt") 156 + md_file.write_text( 157 + '{\n "title": "Test",\n "hook": {"post": "entities"}\n}\n\nTest prompt' 158 + ) 159 + 160 + # Create a co-located .py file 161 + hook_file = tmp_path / "test_generator.py" 162 + hook_file.write_text("def post_process(r, c): return r") 140 163 141 164 meta = utils._load_prompt_metadata(md_file) 142 165 166 + # hook_path should no longer be set (hooks are loaded via load_post_hook) 167 + assert "hook_path" not in meta 143 168 assert meta["path"] == str(md_file) 144 - assert "hook_path" not in meta 169 + assert meta["title"] == "Test" 145 170 146 171 147 172 def test_output_hook_invocation(tmp_path, monkeypatch): ··· 154 179 155 180 prompt_file = muse_dir / "hooked_test.md" 156 181 prompt_file.write_text( 157 - '{\n "title": "Hooked",\n "schedule": "daily",\n "output": "md"\n}\n\nTest prompt' 182 + '{\n "title": "Hooked",\n "schedule": "daily",\n "output": "md",\n "hook": {"post": "hooked_test"}\n}\n\nTest prompt' 158 183 ) 159 184 160 185 hook_file = muse_dir / "hooked_test.py" 161 186 hook_file.write_text(""" 162 - def process(result, context): 187 + def post_process(result, context): 163 188 # Verify context has expected fields 164 189 assert "day" in context 165 190 assert "transcript" in context ··· 212 237 213 238 prompt_file = muse_dir / "noop_test.md" 214 239 prompt_file.write_text( 215 - '{\n "title": "Noop",\n "schedule": "daily",\n "output": "md"\n}\n\nTest prompt' 240 + '{\n "title": "Noop",\n "schedule": "daily",\n "output": "md",\n "hook": {"post": "noop_test"}\n}\n\nTest prompt' 216 241 ) 217 242 218 243 hook_file = muse_dir / "noop_test.py" 219 244 hook_file.write_text(""" 220 - def process(result, context): 245 + def post_process(result, context): 221 246 return None # Signal to use original 222 247 """) 223 248 ··· 262 287 263 288 prompt_file = muse_dir / "broken_test.md" 264 289 prompt_file.write_text( 265 - '{\n "title": "Broken",\n "schedule": "daily",\n "output": "md"\n}\n\nTest prompt' 290 + '{\n "title": "Broken",\n "schedule": "daily",\n "output": "md",\n "hook": {"post": "broken_test"}\n}\n\nTest prompt' 266 291 ) 267 292 268 293 hook_file = muse_dir / "broken_test.py" 269 294 hook_file.write_text(""" 270 - def process(result, context): 295 + def post_process(result, context): 271 296 raise RuntimeError("Hook exploded!") 272 297 """) 273 298 ··· 304 329 prompt_file.unlink() 305 330 306 331 307 - def test_named_hook_resolution_takes_precedence(tmp_path): 308 - """Test that named hooks via 'hook' field take precedence over co-located .py files.""" 309 - utils = importlib.import_module("think.utils") 332 + def test_build_hook_context(): 333 + """Test that build_hook_context creates correct context.""" 334 + agents = importlib.import_module("think.agents") 335 + 336 + config = { 337 + "name": "test_gen", 338 + "agent_id": "123456", 339 + "provider": "google", 340 + "model": "gemini-2.0-flash", 341 + "prompt": "test prompt", 342 + "output": "md", 343 + "day": "20240101", 344 + "segment": "120000_3600", 345 + } 310 346 311 - # Create prompt file with named hook 312 - md_file = tmp_path / "test_generator.md" 313 - md_file.write_text( 314 - '{\n "title": "Test",\n "hook": "occurrence"\n}\n\nTest prompt' 347 + context = agents.build_hook_context( 348 + config, 349 + transcript="test transcript", 350 + output_path="/tmp/test.md", 351 + span=False, 315 352 ) 316 353 317 - # Also create a co-located .py file that would normally be picked up 318 - colocated_hook = tmp_path / "test_generator.py" 319 - colocated_hook.write_text("def process(r, c): return 'colocated'") 354 + assert context["name"] == "test_gen" 355 + assert context["agent_id"] == "123456" 356 + assert context["provider"] == "google" 357 + assert context["model"] == "gemini-2.0-flash" 358 + assert context["prompt"] == "test prompt" 359 + assert context["output_format"] == "md" 360 + assert context["day"] == "20240101" 361 + assert context["segment"] == "120000_3600" 362 + assert context["transcript"] == "test transcript" 363 + assert context["output_path"] == "/tmp/test.md" 364 + assert context["span"] is False 365 + assert context["meta"] == config 366 + 367 + 368 + def test_run_post_hook_transforms_result(): 369 + """Test that run_post_hook applies transformation.""" 370 + agents = importlib.import_module("think.agents") 371 + 372 + def hook(result, context): 373 + return result.upper() 374 + 375 + context = agents.build_hook_context({"name": "test"}) 376 + output = agents.run_post_hook("hello world", context, hook) 320 377 321 - meta = utils._load_prompt_metadata(md_file) 378 + assert output == "HELLO WORLD" 322 379 323 - # Should resolve to named hook, not co-located 324 - assert "hook_path" in meta 325 - assert meta["hook_path"].endswith("occurrence.py") 326 - assert "muse/occurrence.py" in meta["hook_path"].replace("\\", "/") 327 380 381 + def test_run_post_hook_none_keeps_original(): 382 + """Test that run_post_hook keeps original when hook returns None.""" 383 + agents = importlib.import_module("think.agents") 328 384 329 - def test_named_hook_nonexistent_falls_through(tmp_path): 330 - """Test that nonexistent named hooks fall back to co-located .py files.""" 331 - utils = importlib.import_module("think.utils") 385 + def hook(result, context): 386 + return None 332 387 333 - # Create prompt file with nonexistent named hook 334 - md_file = tmp_path / "test_generator.md" 335 - md_file.write_text( 336 - '{\n "title": "Test",\n "hook": "nonexistent_hook_xyz"\n}\n\nTest prompt' 388 + context = agents.build_hook_context({"name": "test"}) 389 + output = agents.run_post_hook("original", context, hook) 390 + 391 + assert output == "original" 392 + 393 + 394 + def test_run_post_hook_error_keeps_original(): 395 + """Test that run_post_hook keeps original on error.""" 396 + agents = importlib.import_module("think.agents") 397 + 398 + def hook(result, context): 399 + raise RuntimeError("boom") 400 + 401 + context = agents.build_hook_context({"name": "test"}) 402 + output = agents.run_post_hook("original", context, hook) 403 + 404 + assert output == "original" 405 + 406 + 407 + # ============================================================================= 408 + # Pre-hook Tests 409 + # ============================================================================= 410 + 411 + 412 + def test_load_pre_hook_success(tmp_path): 413 + """Test loading a valid hook with pre_process function.""" 414 + agents = importlib.import_module("think.agents") 415 + 416 + hook_file = tmp_path / "test_pre_hook.py" 417 + hook_file.write_text(""" 418 + def pre_process(context): 419 + return {"prompt": context["prompt"] + " [modified]"} 420 + """) 421 + 422 + config = {"hook": {"pre": str(hook_file)}} 423 + hook_fn = agents.load_pre_hook(config) 424 + assert callable(hook_fn) 425 + 426 + # Test the hook returns modifications 427 + result = hook_fn({"prompt": "original"}) 428 + assert result == {"prompt": "original [modified]"} 429 + 430 + 431 + def test_load_pre_hook_missing_pre_process(tmp_path): 432 + """Test that hook without pre_process function raises ValueError.""" 433 + agents = importlib.import_module("think.agents") 434 + 435 + hook_file = tmp_path / "bad_hook.py" 436 + hook_file.write_text(""" 437 + def other_function(): 438 + pass 439 + """) 440 + 441 + config = {"hook": {"pre": str(hook_file)}} 442 + try: 443 + agents.load_pre_hook(config) 444 + assert False, "Should have raised ValueError" 445 + except ValueError as e: 446 + assert "must define a 'pre_process' function" in str(e) 447 + 448 + 449 + def test_load_pre_hook_not_callable(tmp_path): 450 + """Test that hook with non-callable pre_process raises ValueError.""" 451 + agents = importlib.import_module("think.agents") 452 + 453 + hook_file = tmp_path / "bad_hook.py" 454 + hook_file.write_text(""" 455 + pre_process = "not a function" 456 + """) 457 + 458 + config = {"hook": {"pre": str(hook_file)}} 459 + try: 460 + agents.load_pre_hook(config) 461 + assert False, "Should have raised ValueError" 462 + except ValueError as e: 463 + assert "'pre_process' must be callable" in str(e) 464 + 465 + 466 + def test_load_pre_hook_no_hook_config(): 467 + """Test that missing hook config returns None.""" 468 + agents = importlib.import_module("think.agents") 469 + 470 + assert agents.load_pre_hook({}) is None 471 + assert agents.load_pre_hook({"hook": {}}) is None 472 + assert agents.load_pre_hook({"hook": {"post": "something"}}) is None 473 + 474 + 475 + def test_load_pre_hook_file_not_found(tmp_path): 476 + """Test that nonexistent hook file raises ImportError.""" 477 + agents = importlib.import_module("think.agents") 478 + 479 + config = {"hook": {"pre": str(tmp_path / "nonexistent.py")}} 480 + try: 481 + agents.load_pre_hook(config) 482 + assert False, "Should have raised ImportError" 483 + except ImportError as e: 484 + assert "not found" in str(e) 485 + 486 + 487 + def test_build_pre_hook_context(): 488 + """Test that build_pre_hook_context creates correct context.""" 489 + agents = importlib.import_module("think.agents") 490 + 491 + config = { 492 + "name": "test_gen", 493 + "agent_id": "123456", 494 + "provider": "google", 495 + "model": "gemini-2.0-flash", 496 + "prompt": "test prompt", 497 + "system_instruction": "be helpful", 498 + "user_instruction": "answer questions", 499 + "extra_context": "extra info", 500 + "output": "md", 501 + "day": "20240101", 502 + "segment": "120000_3600", 503 + } 504 + 505 + context = agents.build_pre_hook_context( 506 + config, 507 + transcript="test transcript", 508 + output_path="/tmp/test.md", 509 + span=False, 337 510 ) 338 511 339 - # Create a co-located .py file 340 - colocated_hook = tmp_path / "test_generator.py" 341 - colocated_hook.write_text("def process(r, c): return 'colocated'") 512 + assert context["name"] == "test_gen" 513 + assert context["agent_id"] == "123456" 514 + assert context["provider"] == "google" 515 + assert context["model"] == "gemini-2.0-flash" 516 + assert context["prompt"] == "test prompt" 517 + assert context["system_instruction"] == "be helpful" 518 + assert context["user_instruction"] == "answer questions" 519 + assert context["extra_context"] == "extra info" 520 + assert context["output_format"] == "md" 521 + assert context["day"] == "20240101" 522 + assert context["segment"] == "120000_3600" 523 + assert context["transcript"] == "test transcript" 524 + assert context["output_path"] == "/tmp/test.md" 525 + assert context["span"] is False 526 + assert context["meta"] == config 527 + 528 + 529 + def test_run_pre_hook_returns_modifications(): 530 + """Test that run_pre_hook returns modifications dict.""" 531 + agents = importlib.import_module("think.agents") 532 + 533 + def hook(context): 534 + return {"prompt": "modified prompt", "transcript": "modified transcript"} 535 + 536 + context = agents.build_pre_hook_context({"name": "test", "prompt": "original"}) 537 + result = agents.run_pre_hook(context, hook) 538 + 539 + assert result == {"prompt": "modified prompt", "transcript": "modified transcript"} 540 + 541 + 542 + def test_run_pre_hook_none_returns_none(): 543 + """Test that run_pre_hook returns None when hook returns None.""" 544 + agents = importlib.import_module("think.agents") 545 + 546 + def hook(context): 547 + return None 548 + 549 + context = agents.build_pre_hook_context({"name": "test"}) 550 + result = agents.run_pre_hook(context, hook) 551 + 552 + assert result is None 553 + 554 + 555 + def test_run_pre_hook_error_returns_none(): 556 + """Test that run_pre_hook returns None on error.""" 557 + agents = importlib.import_module("think.agents") 558 + 559 + def hook(context): 560 + raise RuntimeError("boom") 561 + 562 + context = agents.build_pre_hook_context({"name": "test"}) 563 + result = agents.run_pre_hook(context, hook) 564 + 565 + assert result is None 566 + 567 + 568 + def test_pre_hook_invocation(tmp_path, monkeypatch): 569 + """Test that agents.py invokes pre-hook and uses modified inputs.""" 570 + mod = importlib.import_module("think.agents") 571 + copy_day(tmp_path) 572 + 573 + muse_dir = Path(mod.__file__).resolve().parent.parent / "muse" 574 + 575 + prompt_file = muse_dir / "prehooked_test.md" 576 + prompt_file.write_text( 577 + '{\n "title": "Prehooked",\n "schedule": "daily",\n "output": "md",\n "hook": {"pre": "prehooked_test"}\n}\n\nOriginal prompt' 578 + ) 579 + 580 + hook_file = muse_dir / "prehooked_test.py" 581 + hook_file.write_text(""" 582 + def pre_process(context): 583 + # Verify context has expected fields 584 + assert "transcript" in context 585 + assert "prompt" in context 586 + assert "system_instruction" in context 587 + # Modify the prompt 588 + return {"prompt": context["prompt"] + " [pre-processed]"} 589 + """) 590 + 591 + try: 592 + # Track what generate_agent_output receives 593 + received_args = {} 594 + 595 + def mock_generate(*args, **kwargs): 596 + received_args["transcript"] = args[0] 597 + received_args["prompt"] = args[1] 598 + return MOCK_RESULT if kwargs.get("return_result") else MOCK_RESULT["text"] 599 + 600 + monkeypatch.setattr(mod, "generate_agent_output", mock_generate) 601 + monkeypatch.setenv("GOOGLE_API_KEY", "x") 602 + monkeypatch.setenv("JOURNAL_PATH", str(tmp_path)) 603 + 604 + config = { 605 + "name": "prehooked_test", 606 + "day": "20240101", 607 + "output": "md", 608 + "provider": "google", 609 + "model": "gemini-2.0-flash", 610 + } 611 + 612 + events = run_generator_with_config(mod, config, monkeypatch) 613 + 614 + # Verify pre-hook modified the prompt 615 + assert "[pre-processed]" in received_args["prompt"] 342 616 343 - meta = utils._load_prompt_metadata(md_file) 617 + # Verify generator still completed successfully 618 + finish_events = [e for e in events if e["event"] == "finish"] 619 + assert len(finish_events) == 1 344 620 345 - # Named hook doesn't exist, so no hook_path should be set 346 - assert "hook_path" not in meta 621 + finally: 622 + if hook_file.exists(): 623 + hook_file.unlink() 624 + if prompt_file.exists(): 625 + prompt_file.unlink() 626 + 627 + 628 + def test_both_pre_and_post_hooks(tmp_path, monkeypatch): 629 + """Test that both pre and post hooks can be configured together.""" 630 + mod = importlib.import_module("think.agents") 631 + copy_day(tmp_path) 632 + 633 + muse_dir = Path(mod.__file__).resolve().parent.parent / "muse" 634 + 635 + prompt_file = muse_dir / "both_hooks_test.md" 636 + prompt_file.write_text( 637 + '{\n "title": "Both Hooks",\n "schedule": "daily",\n "output": "md",\n "hook": {"pre": "both_hooks_test", "post": "both_hooks_test"}\n}\n\nOriginal prompt' 638 + ) 639 + 640 + hook_file = muse_dir / "both_hooks_test.py" 641 + hook_file.write_text(""" 642 + def pre_process(context): 643 + return {"prompt": context["prompt"] + " [pre]"} 644 + 645 + def post_process(result, context): 646 + return result + "\\n\\n[post]" 647 + """) 648 + 649 + try: 650 + received_args = {} 651 + 652 + def mock_generate(*args, **kwargs): 653 + received_args["prompt"] = args[1] 654 + return MOCK_RESULT if kwargs.get("return_result") else MOCK_RESULT["text"] 655 + 656 + monkeypatch.setattr(mod, "generate_agent_output", mock_generate) 657 + monkeypatch.setenv("GOOGLE_API_KEY", "x") 658 + monkeypatch.setenv("JOURNAL_PATH", str(tmp_path)) 659 + 660 + config = { 661 + "name": "both_hooks_test", 662 + "day": "20240101", 663 + "output": "md", 664 + "provider": "google", 665 + "model": "gemini-2.0-flash", 666 + } 667 + 668 + events = run_generator_with_config(mod, config, monkeypatch) 669 + 670 + # Verify pre-hook modified the prompt 671 + assert "[pre]" in received_args["prompt"] 672 + 673 + # Verify post-hook modified the result 674 + finish_events = [e for e in events if e["event"] == "finish"] 675 + assert len(finish_events) == 1 676 + assert "[post]" in finish_events[0]["result"] 677 + 678 + finally: 679 + if hook_file.exists(): 680 + hook_file.unlink() 681 + if prompt_file.exists(): 682 + prompt_file.unlink()

+163 -34

think/agents.py

··· 328 328 meta: dict # Full frontmatter/config 329 329 330 330 331 + class PreHookContext(TypedDict, total=False): 332 + """Context passed to pre-processing hook functions. 333 + 334 + Pre-hooks receive all inputs before the LLM call and can modify them. 335 + Returns a dict of modified fields to merge back. 336 + """ 337 + 338 + # Identity 339 + name: str # Agent/generator name 340 + agent_id: str # Unique agent ID 341 + provider: str # google/anthropic/openai 342 + model: str # Model used 343 + 344 + # Temporal (generators) 345 + day: str # YYYYMMDD 346 + segment: str # Segment key 347 + span: bool # True if span mode 348 + 349 + # Modifiable inputs 350 + prompt: str # User prompt (can modify) 351 + system_instruction: str # System prompt (can modify) 352 + user_instruction: str # User instruction (agents, can modify) 353 + extra_context: str # Extra context (agents, can modify) 354 + transcript: str # Clustered transcript (generators, can modify) 355 + 356 + # Output settings 357 + output_path: str # Where result will be written 358 + output_format: str # 'md' or 'json' 359 + 360 + # Full config (read-only reference) 361 + meta: dict # Full frontmatter/config 362 + 363 + 331 364 # MUSE_DIR for hook resolution 332 365 _MUSE_DIR = Path(__file__).parent.parent / "muse" 333 366 334 367 335 - def load_post_hook(config: dict) -> Callable[[str, HookContext], str | None] | None: 336 - """Load post-processing hook from config if defined. 368 + def _resolve_hook_path(hook_name: str) -> Path: 369 + """Resolve hook name to file path. 337 370 338 - Hook config format: {"hook": {"post": "name"}} 339 371 Resolution: 340 372 - Named: "name" -> muse/{name}.py 341 373 - App-qualified: "app:name" -> apps/{app}/muse/{name}.py 342 374 - Explicit path: "path/to/hook.py" -> direct path 375 + """ 376 + if "/" in hook_name or hook_name.endswith(".py"): 377 + return Path(hook_name) 378 + elif ":" in hook_name: 379 + app, name = hook_name.split(":", 1) 380 + return Path(__file__).parent.parent / "apps" / app / "muse" / f"{name}.py" 381 + else: 382 + return _MUSE_DIR / f"{hook_name}.py" 383 + 384 + 385 + def _load_hook_function(config: dict, key: str, func_name: str) -> Callable | None: 386 + """Load a hook function from config. 343 387 344 388 Args: 345 389 config: Agent/generator config dict 390 + key: Hook key in config ("pre" or "post") 391 + func_name: Function name to load ("pre_process" or "post_process") 346 392 347 393 Returns: 348 - The post_process function from the hook module, or None if no hook. 394 + The hook function, or None if no hook configured. 349 395 350 396 Raises: 351 - ValueError: If hook file doesn't define post_process function. 397 + ValueError: If hook file doesn't define the required function. 352 398 ImportError: If hook file cannot be loaded. 353 399 """ 354 400 import importlib.util ··· 357 403 if not hook_config or not isinstance(hook_config, dict): 358 404 return None 359 405 360 - post_hook_name = hook_config.get("post") 361 - if not post_hook_name: 406 + hook_name = hook_config.get(key) 407 + if not hook_name: 362 408 return None 363 409 364 - # Resolve hook path 365 - if "/" in post_hook_name or post_hook_name.endswith(".py"): 366 - # Explicit path 367 - hook_path = Path(post_hook_name) 368 - elif ":" in post_hook_name: 369 - # App-qualified: "app:name" -> apps/{app}/muse/{name}.py 370 - app, name = post_hook_name.split(":", 1) 371 - hook_path = Path(__file__).parent.parent / "apps" / app / "muse" / f"{name}.py" 372 - else: 373 - # Named hook: muse/{name}.py 374 - hook_path = _MUSE_DIR / f"{post_hook_name}.py" 410 + hook_path = _resolve_hook_path(hook_name) 375 411 376 412 if not hook_path.exists(): 377 413 raise ImportError(f"Hook file not found: {hook_path}") 378 414 379 415 spec = importlib.util.spec_from_file_location( 380 - f"post_hook_{hook_path.stem}", hook_path 416 + f"{key}_hook_{hook_path.stem}", hook_path 381 417 ) 382 418 if spec is None or spec.loader is None: 383 419 raise ImportError(f"Cannot load hook from {hook_path}") ··· 385 421 module = importlib.util.module_from_spec(spec) 386 422 spec.loader.exec_module(module) 387 423 388 - if not hasattr(module, "post_process"): 389 - raise ValueError(f"Hook {hook_path} must define a 'post_process' function") 424 + if not hasattr(module, func_name): 425 + raise ValueError(f"Hook {hook_path} must define a '{func_name}' function") 390 426 391 - process_func = getattr(module, "post_process") 427 + process_func = getattr(module, func_name) 392 428 if not callable(process_func): 393 - raise ValueError(f"Hook {hook_path} 'post_process' must be callable") 429 + raise ValueError(f"Hook {hook_path} '{func_name}' must be callable") 394 430 395 431 return process_func 396 432 397 433 398 - def build_hook_context(config: dict, **extras: Any) -> HookContext: 399 - """Build unified HookContext from config and extra values. 434 + def load_post_hook(config: dict) -> Callable[[str, HookContext], str | None] | None: 435 + """Load post-processing hook from config if defined. 400 436 401 - Args: 402 - config: Agent/generator config dict 403 - **extras: Additional context values (transcript, output_path, etc.) 437 + Hook config format: {"hook": {"post": "name"}} 438 + """ 439 + return _load_hook_function(config, "post", "post_process") 404 440 405 - Returns: 406 - HookContext with all available fields populated. 441 + 442 + def load_pre_hook(config: dict) -> Callable[[PreHookContext], dict | None] | None: 443 + """Load pre-processing hook from config if defined. 444 + 445 + Hook config format: {"hook": {"pre": "name"}} 407 446 """ 408 - context: HookContext = { 447 + return _load_hook_function(config, "pre", "pre_process") 448 + 449 + 450 + def _build_base_context(config: dict) -> dict: 451 + """Build common context fields shared by pre and post hooks.""" 452 + context = { 409 453 "name": config.get("name", ""), 410 454 "agent_id": config.get("agent_id", ""), 411 455 "provider": config.get("provider", ""), ··· 421 465 if "segment" in config: 422 466 context["segment"] = config["segment"] 423 467 468 + return context 469 + 470 + 471 + def build_pre_hook_context(config: dict, **extras: Any) -> PreHookContext: 472 + """Build PreHookContext from config and extra values.""" 473 + context: PreHookContext = _build_base_context(config) 474 + 475 + # Add pre-hook specific fields 476 + context["system_instruction"] = config.get("system_instruction", "") 477 + context["user_instruction"] = config.get("user_instruction", "") 478 + context["extra_context"] = config.get("extra_context", "") 479 + 424 480 # Merge extras (transcript, output_path, span, etc.) 425 481 context.update(extras) 426 482 427 483 return context 428 484 429 485 486 + def build_hook_context(config: dict, **extras: Any) -> HookContext: 487 + """Build HookContext from config and extra values.""" 488 + context: HookContext = _build_base_context(config) 489 + 490 + # Merge extras (transcript, output_path, span, etc.) 491 + context.update(extras) 492 + 493 + return context 494 + 495 + 496 + def run_pre_hook( 497 + context: PreHookContext, 498 + hook_fn: Callable[[PreHookContext], dict | None], 499 + ) -> dict | None: 500 + """Execute pre-processing hook and return modifications dict. 501 + 502 + Hook errors are logged and return None (graceful degradation). 503 + """ 504 + try: 505 + modifications = hook_fn(context) 506 + if modifications is not None: 507 + logging.info( 508 + "Pre-hook returned modifications: %s", list(modifications.keys()) 509 + ) 510 + return modifications 511 + except Exception as exc: 512 + logging.error("Pre-hook failed: %s", exc) 513 + 514 + return None 515 + 516 + 430 517 def run_post_hook( 431 518 result: str, 432 519 context: HookContext, ··· 464 551 "GenerateResult", 465 552 "Event", 466 553 "HookContext", 554 + "PreHookContext", 467 555 "JSONEventWriter", 468 556 "JSONEventCallback", 469 557 "format_tool_summary", 470 558 "parse_agent_events_to_turns", 471 559 "load_post_hook", 560 + "load_pre_hook", 472 561 "build_hook_context", 562 + "build_pre_hook_context", 473 563 "run_post_hook", 564 + "run_pre_hook", 474 565 "scan_day", 475 566 "generate_agent_output", 476 567 ] ··· 603 694 if cache_display_name and provider == "google": 604 695 client = genai.Client( 605 696 api_key=api_key, 606 - http_options=types.HttpOptions( 607 - retry_options=types.HttpRetryOptions() 608 - ), 697 + http_options=types.HttpOptions(retry_options=types.HttpRetryOptions()), 609 698 ) 610 699 cache_name = _get_or_create_cache( 611 700 client, model, cache_display_name, transcript, system_instruction ··· 840 929 if output_exists and force: 841 930 logging.info("Force regenerating: %s", output_path) 842 931 932 + # Run pre-processing hook if present (before LLM call) 933 + pre_hook = load_pre_hook(meta) 934 + if pre_hook: 935 + pre_context = build_pre_hook_context( 936 + meta, 937 + name=name, 938 + day=day, 939 + segment=segment, 940 + span=span_mode, 941 + output_path=str(output_path), 942 + transcript=markdown, 943 + prompt=prompt, 944 + system_instruction=system_instruction, 945 + ) 946 + modifications = run_pre_hook(pre_context, pre_hook) 947 + if modifications: 948 + # Apply modifications to inputs 949 + markdown = modifications.get("transcript", markdown) 950 + prompt = modifications.get("prompt", prompt) 951 + system_instruction = modifications.get( 952 + "system_instruction", system_instruction 953 + ) 954 + 843 955 gen_result = generate_agent_output( 844 956 markdown, 845 957 prompt, ··· 860 972 if post_hook: 861 973 hook_context = build_hook_context( 862 974 meta, 975 + name=name, 863 976 day=day, 864 977 segment=segment, 865 978 span=span_mode, ··· 972 1085 raise ValueError( 973 1086 f"Unknown provider: {provider!r}. Valid providers: {valid}" 974 1087 ) 1088 + 1089 + # Load pre hook if configured (before LLM call) 1090 + pre_hook = load_pre_hook(config) 1091 + if pre_hook: 1092 + pre_context = build_pre_hook_context(config) 1093 + modifications = run_pre_hook(pre_context, pre_hook) 1094 + if modifications: 1095 + # Apply modifications to config 1096 + for key in ( 1097 + "prompt", 1098 + "system_instruction", 1099 + "user_instruction", 1100 + "extra_context", 1101 + ): 1102 + if key in modifications: 1103 + config[key] = modifications[key] 975 1104 976 1105 # Load post hook if configured 977 1106 post_hook = load_post_hook(config)

+18 -6

think/muse_cli.py

··· 36 36 _PROJECT_ROOT = Path(__file__).parent.parent 37 37 38 38 # Internal bookkeeping keys to exclude from JSONL output 39 - _INTERNAL_KEYS = frozenset({"path", "mtime", "hook_path"}) 39 + _INTERNAL_KEYS = frozenset({"path", "mtime"}) 40 40 41 41 42 42 def _relative_path(abs_path: str) -> str: ··· 83 83 tags.append(f"tools:{tools}") 84 84 85 85 if info.get("hook"): 86 - tags.append(f"hook:{info['hook']}") 86 + hook = info["hook"] 87 + if isinstance(hook, dict): 88 + # Format as "hook:pre=name,post=name" 89 + parts = [] 90 + if hook.get("pre"): 91 + parts.append(f"pre={hook['pre']}") 92 + if hook.get("post"): 93 + parts.append(f"post={hook['post']}") 94 + tags.append(f"hook:{','.join(parts)}") 95 + else: 96 + tags.append(f"hook:{hook}") 87 97 88 98 if info.get("disabled"): 89 99 tags.append("disabled") ··· 226 236 "hook", 227 237 "color", 228 238 ] 229 - skip_keys = {"path", "mtime", "hook_path"} 239 + skip_keys = {"path", "mtime"} 230 240 231 241 label_width = 14 232 242 ··· 237 247 # Truncate long descriptions for readability 238 248 if key == "description" and len(val_str) > 72: 239 249 val_str = val_str[:72] + "..." 240 - # Show hook path inline 241 - if key == "hook" and info.get("hook_path"): 242 - val_str += f" \u2192 {_relative_path(str(info['hook_path']))}" 250 + # Format hook config nicely 251 + if key == "hook" and isinstance(value, dict): 252 + post_hook = value.get("post", "") 253 + if post_hook: 254 + val_str = f"post: {post_hook}" 243 255 print(f" {key + ':':<{label_width}} {val_str}") 244 256 245 257 printed: set[str] = set()

Configure Feed

Configure Feed