personal memory agent
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

Split think/outputs.py into markdown.py and hooks.py

Separate the two distinct concerns that were combined in outputs.py:

- think/markdown.py: Semantic markdown chunking utilities (AST parsing,
chunking by headers/lists/tables, rendering back to markdown)

- think/hooks.py: Shared utilities for output extraction hooks
(should_skip_extraction, write_events_jsonl, compute_output_source)

Updates imports in:
- think/formatters.py (registry + inline import)
- muse/anticipation.py, muse/occurrence.py (hook utilities)
- tests/test_formatters.py (7 test imports)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

+163 -162
+2 -2
muse/anticipation.py
··· 13 13 from pathlib import Path 14 14 15 15 from think.facets import facet_summaries 16 - from think.models import generate 17 - from think.outputs import ( 16 + from think.hooks import ( 18 17 compute_output_source, 19 18 should_skip_extraction, 20 19 write_events_jsonl, 21 20 ) 21 + from think.models import generate 22 22 from think.utils import get_output_topic, load_prompt 23 23 24 24
+2 -2
muse/occurrence.py
··· 13 13 from pathlib import Path 14 14 15 15 from think.facets import facet_summaries 16 - from think.models import generate 17 - from think.outputs import ( 16 + from think.hooks import ( 18 17 compute_output_source, 19 18 should_skip_extraction, 20 19 write_events_jsonl, 21 20 ) 21 + from think.models import generate 22 22 from think.utils import get_output_topic, load_prompt 23 23 24 24
+7 -7
tests/test_formatters.py
··· 1292 1292 1293 1293 def test_format_markdown_basic(self): 1294 1294 """Test basic markdown formatting.""" 1295 - from think.outputs import format_markdown 1295 + from think.markdown import format_markdown 1296 1296 1297 1297 text = "# Hello\n\nThis is a paragraph.\n" 1298 1298 chunks, meta = format_markdown(text) ··· 1304 1304 1305 1305 def test_format_markdown_multiple_chunks(self): 1306 1306 """Test that lists are split into multiple chunks.""" 1307 - from think.outputs import format_markdown 1307 + from think.markdown import format_markdown 1308 1308 1309 1309 text = "# List\n\n- Item one\n- Item two\n- Item three\n" 1310 1310 chunks, meta = format_markdown(text) ··· 1315 1315 1316 1316 def test_format_markdown_no_timestamp(self): 1317 1317 """Test that markdown chunks don't have timestamp key.""" 1318 - from think.outputs import format_markdown 1318 + from think.markdown import format_markdown 1319 1319 1320 1320 text = "# Test\n\nSome content.\n" 1321 1321 chunks, meta = format_markdown(text) ··· 1326 1326 1327 1327 def test_format_markdown_preserves_headers(self): 1328 1328 """Test that each chunk includes its header context.""" 1329 - from think.outputs import format_markdown 1329 + from think.markdown import format_markdown 1330 1330 1331 1331 text = "# Top\n\n## Section\n\nParagraph content.\n" 1332 1332 chunks, meta = format_markdown(text) ··· 1338 1338 1339 1339 def test_format_markdown_definition_list(self): 1340 1340 """Test that definition lists stay as single chunk.""" 1341 - from think.outputs import format_markdown 1341 + from think.markdown import format_markdown 1342 1342 1343 1343 text = "# Info\n\n- **Name:** Alice\n- **Role:** Engineer\n" 1344 1344 chunks, meta = format_markdown(text) ··· 1350 1350 1351 1351 def test_format_markdown_table_rows(self): 1352 1352 """Test that table rows become separate chunks.""" 1353 - from think.outputs import format_markdown 1353 + from think.markdown import format_markdown 1354 1354 1355 1355 text = """# Data 1356 1356 ··· 1369 1369 1370 1370 def test_format_markdown_code_block(self): 1371 1371 """Test that code blocks become chunks.""" 1372 - from think.outputs import format_markdown 1372 + from think.markdown import format_markdown 1373 1373 1374 1374 text = "# Code\n\n```python\nprint('hello')\n```\n" 1375 1375 chunks, meta = format_markdown(text)
+2 -2
think/formatters.py
··· 127 127 "*/*_audio.jsonl": ("observe.hear", "format_audio"), 128 128 "*/audio.jsonl": ("observe.hear", "format_audio"), 129 129 # Markdown formatter (semantic chunking) 130 - "**/*.md": ("think.outputs", "format_markdown"), 130 + "**/*.md": ("think.markdown", "format_markdown"), 131 131 } 132 132 133 133 ··· 456 456 # For summary format on markdown files, get raw chunks with metadata 457 457 raw_chunks = None 458 458 if args.format == "summary" and args.file.endswith(".md"): 459 - from think.outputs import chunk_markdown 459 + from think.markdown import chunk_markdown 460 460 461 461 text = load_markdown(args.file) 462 462 raw_chunks = chunk_markdown(text)
+150
think/hooks.py
··· 1 + # SPDX-License-Identifier: AGPL-3.0-only 2 + # Copyright (c) 2026 sol pbc 3 + 4 + """Shared utilities for output extraction hooks. 5 + 6 + This module provides common functions used by extraction hooks like 7 + occurrence.py and anticipation.py in the muse/ directory. 8 + """ 9 + 10 + import json 11 + import os 12 + from pathlib import Path 13 + 14 + # Minimum content length for meaningful event extraction 15 + MIN_EXTRACTION_CHARS = 50 16 + 17 + 18 + def should_skip_extraction(result: str, context: dict) -> str | None: 19 + """Check if extraction should be skipped and return reason, or None to proceed. 20 + 21 + Args: 22 + result: The generated output markdown content. 23 + context: Hook context dict with meta and multi_segment. 24 + 25 + Returns: 26 + Skip reason string if extraction should be skipped, None otherwise. 27 + """ 28 + meta = context.get("meta", {}) 29 + 30 + # Skip if extraction disabled via journal config 31 + if meta.get("extract") is False: 32 + return "extraction disabled via journal config" 33 + 34 + # Skip for JSON output (output IS the structured data) 35 + if meta.get("output") == "json": 36 + return "JSON output (already structured)" 37 + 38 + # Skip in multi-segment mode 39 + if context.get("multi_segment"): 40 + return "multi-segment mode" 41 + 42 + # Skip for minimal content 43 + if len(result.strip()) < MIN_EXTRACTION_CHARS: 44 + return f"minimal content ({len(result.strip())} chars < {MIN_EXTRACTION_CHARS})" 45 + 46 + return None 47 + 48 + 49 + def write_events_jsonl( 50 + events: list[dict], 51 + topic: str, 52 + occurred: bool, 53 + source_output: str, 54 + capture_day: str, 55 + ) -> list[Path]: 56 + """Write events to facet-based JSONL files. 57 + 58 + Groups events by facet and writes each to the appropriate file: 59 + facets/{facet}/events/{event_day}.jsonl 60 + 61 + Args: 62 + events: List of event dictionaries from extraction. 63 + topic: Source generator topic (e.g., "meetings", "schedule"). 64 + occurred: True for occurrences, False for anticipations. 65 + source_output: Relative path to source output file. 66 + capture_day: Day the output was captured (YYYYMMDD). 67 + 68 + Returns: 69 + List of paths to written JSONL files. 70 + """ 71 + from think.utils import get_journal 72 + 73 + journal = get_journal() 74 + 75 + # Group events by (facet, event_day) 76 + grouped: dict[tuple[str, str], list[dict]] = {} 77 + 78 + for event in events: 79 + facet = event.get("facet", "") 80 + if not facet: 81 + continue # Skip events without facet 82 + 83 + # Determine the event day 84 + if occurred: 85 + # Occurrences use capture day 86 + event_day = capture_day 87 + else: 88 + # Anticipations use their scheduled date 89 + event_date = event.get("date", "") 90 + # Convert YYYY-MM-DD to YYYYMMDD 91 + event_day = event_date.replace("-", "") if event_date else capture_day 92 + 93 + if not event_day: 94 + continue 95 + 96 + key = (facet, event_day) 97 + if key not in grouped: 98 + grouped[key] = [] 99 + 100 + # Enrich event with metadata 101 + enriched = dict(event) 102 + enriched["topic"] = topic 103 + enriched["occurred"] = occurred 104 + enriched["source"] = source_output 105 + 106 + grouped[key].append(enriched) 107 + 108 + # Write each group to its JSONL file 109 + written_paths: list[Path] = [] 110 + 111 + for (facet, event_day), facet_events in grouped.items(): 112 + events_dir = Path(journal) / "facets" / facet / "events" 113 + events_dir.mkdir(parents=True, exist_ok=True) 114 + 115 + jsonl_path = events_dir / f"{event_day}.jsonl" 116 + with open(jsonl_path, "a", encoding="utf-8") as f: 117 + for event in facet_events: 118 + f.write(json.dumps(event, ensure_ascii=False) + "\n") 119 + 120 + written_paths.append(jsonl_path) 121 + 122 + return written_paths 123 + 124 + 125 + def compute_output_source(context: dict) -> str: 126 + """Compute relative source output path from hook context. 127 + 128 + Args: 129 + context: Hook context dict with day, segment, name, output_path. 130 + 131 + Returns: 132 + Relative path like "20240101/agents/meetings.md". 133 + """ 134 + from think.utils import get_journal, get_output_topic 135 + 136 + day = context.get("day", "") 137 + output_path = context.get("output_path", "") 138 + name = context.get("name", "unknown") 139 + journal = get_journal() 140 + 141 + try: 142 + return os.path.relpath(output_path, journal) 143 + except ValueError: 144 + segment = context.get("segment") 145 + topic = get_output_topic(name) 146 + return os.path.join( 147 + day, 148 + "agents" if not segment else segment, 149 + f"{topic}.md", 150 + )
-149
think/outputs.py think/markdown.py
··· 354 354 raw_chunks = chunk_markdown(text) 355 355 chunks = [{"markdown": render_chunk(c)} for c in raw_chunks] 356 356 return chunks, {} 357 - 358 - 359 - # --------------------------------------------------------------------------- 360 - # Extraction Hook Utilities 361 - # --------------------------------------------------------------------------- 362 - # Shared utilities for output extraction hooks (occurrence.py, anticipation.py) 363 - 364 - import json 365 - import logging 366 - import os 367 - from pathlib import Path 368 - 369 - # Minimum content length for meaningful event extraction 370 - MIN_EXTRACTION_CHARS = 50 371 - 372 - 373 - def should_skip_extraction(result: str, context: dict) -> str | None: 374 - """Check if extraction should be skipped and return reason, or None to proceed. 375 - 376 - Args: 377 - result: The generated output markdown content. 378 - context: Hook context dict with meta and multi_segment. 379 - 380 - Returns: 381 - Skip reason string if extraction should be skipped, None otherwise. 382 - """ 383 - meta = context.get("meta", {}) 384 - 385 - # Skip if extraction disabled via journal config 386 - if meta.get("extract") is False: 387 - return "extraction disabled via journal config" 388 - 389 - # Skip for JSON output (output IS the structured data) 390 - if meta.get("output") == "json": 391 - return "JSON output (already structured)" 392 - 393 - # Skip in multi-segment mode 394 - if context.get("multi_segment"): 395 - return "multi-segment mode" 396 - 397 - # Skip for minimal content 398 - if len(result.strip()) < MIN_EXTRACTION_CHARS: 399 - return f"minimal content ({len(result.strip())} chars < {MIN_EXTRACTION_CHARS})" 400 - 401 - return None 402 - 403 - 404 - def write_events_jsonl( 405 - events: list[dict], 406 - topic: str, 407 - occurred: bool, 408 - source_output: str, 409 - capture_day: str, 410 - ) -> list[Path]: 411 - """Write events to facet-based JSONL files. 412 - 413 - Groups events by facet and writes each to the appropriate file: 414 - facets/{facet}/events/{event_day}.jsonl 415 - 416 - Args: 417 - events: List of event dictionaries from extraction. 418 - topic: Source generator topic (e.g., "meetings", "schedule"). 419 - occurred: True for occurrences, False for anticipations. 420 - source_output: Relative path to source output file. 421 - capture_day: Day the output was captured (YYYYMMDD). 422 - 423 - Returns: 424 - List of paths to written JSONL files. 425 - """ 426 - from think.utils import get_journal 427 - 428 - journal = get_journal() 429 - 430 - # Group events by (facet, event_day) 431 - grouped: dict[tuple[str, str], list[dict]] = {} 432 - 433 - for event in events: 434 - facet = event.get("facet", "") 435 - if not facet: 436 - continue # Skip events without facet 437 - 438 - # Determine the event day 439 - if occurred: 440 - # Occurrences use capture day 441 - event_day = capture_day 442 - else: 443 - # Anticipations use their scheduled date 444 - event_date = event.get("date", "") 445 - # Convert YYYY-MM-DD to YYYYMMDD 446 - event_day = event_date.replace("-", "") if event_date else capture_day 447 - 448 - if not event_day: 449 - continue 450 - 451 - key = (facet, event_day) 452 - if key not in grouped: 453 - grouped[key] = [] 454 - 455 - # Enrich event with metadata 456 - enriched = dict(event) 457 - enriched["topic"] = topic 458 - enriched["occurred"] = occurred 459 - enriched["source"] = source_output 460 - 461 - grouped[key].append(enriched) 462 - 463 - # Write each group to its JSONL file 464 - written_paths: list[Path] = [] 465 - 466 - for (facet, event_day), facet_events in grouped.items(): 467 - events_dir = Path(journal) / "facets" / facet / "events" 468 - events_dir.mkdir(parents=True, exist_ok=True) 469 - 470 - jsonl_path = events_dir / f"{event_day}.jsonl" 471 - with open(jsonl_path, "a", encoding="utf-8") as f: 472 - for event in facet_events: 473 - f.write(json.dumps(event, ensure_ascii=False) + "\n") 474 - 475 - written_paths.append(jsonl_path) 476 - 477 - return written_paths 478 - 479 - 480 - def compute_output_source(context: dict) -> str: 481 - """Compute relative source output path from hook context. 482 - 483 - Args: 484 - context: Hook context dict with day, segment, name, output_path. 485 - 486 - Returns: 487 - Relative path like "20240101/agents/meetings.md". 488 - """ 489 - from think.utils import get_journal, get_output_topic 490 - 491 - day = context.get("day", "") 492 - output_path = context.get("output_path", "") 493 - name = context.get("name", "unknown") 494 - journal = get_journal() 495 - 496 - try: 497 - return os.path.relpath(output_path, journal) 498 - except ValueError: 499 - segment = context.get("segment") 500 - topic = get_output_topic(name) 501 - return os.path.join( 502 - day, 503 - "agents" if not segment else segment, 504 - f"{topic}.md", 505 - )