Add selective agent filtering for sources configuration

+150

tests/test_cluster.py

··· 376 376 377 377 assert "100000_300" in str(exc_info.value) 378 378 assert "not found" in str(exc_info.value) 379 + 380 + 381 + def test_cluster_with_agent_filter_dict(tmp_path, monkeypatch): 382 + """Test cluster() with dict-valued agents source for selective filtering.""" 383 + monkeypatch.setenv("JOURNAL_PATH", str(tmp_path)) 384 + day_dir = day_path("20240101") 385 + 386 + mod = importlib.import_module("think.cluster") 387 + 388 + # Create segment with multiple agent output files 389 + segment = day_dir / "120000_300" 390 + segment.mkdir() 391 + (segment / "audio.jsonl").write_text('{}\n{"text": "hello"}\n') 392 + (segment / "entities.md").write_text("Entity extraction results") 393 + (segment / "meetings.md").write_text("Meeting summary results") 394 + (segment / "flow.md").write_text("Flow analysis results") 395 + 396 + # Test filtering to only include entities 397 + result, counts = mod.cluster( 398 + "20240101", 399 + sources={"audio": True, "screen": False, "agents": {"entities": True}}, 400 + ) 401 + 402 + assert counts["audio"] == 1 403 + assert counts["agents"] == 1 # Only entities should be counted 404 + assert "Entity extraction results" in result 405 + assert "Meeting summary results" not in result 406 + assert "Flow analysis results" not in result 407 + 408 + 409 + def test_cluster_with_agent_filter_multiple(tmp_path, monkeypatch): 410 + """Test cluster() with dict selecting multiple agents.""" 411 + monkeypatch.setenv("JOURNAL_PATH", str(tmp_path)) 412 + day_dir = day_path("20240101") 413 + 414 + mod = importlib.import_module("think.cluster") 415 + 416 + # Create segment with multiple agent output files 417 + segment = day_dir / "120000_300" 418 + segment.mkdir() 419 + (segment / "audio.jsonl").write_text('{}\n{"text": "hello"}\n') 420 + (segment / "entities.md").write_text("Entity extraction results") 421 + (segment / "meetings.md").write_text("Meeting summary results") 422 + (segment / "flow.md").write_text("Flow analysis results") 423 + 424 + # Test filtering to include entities and meetings but not flow 425 + result, counts = mod.cluster( 426 + "20240101", 427 + sources={ 428 + "audio": True, 429 + "screen": False, 430 + "agents": {"entities": True, "meetings": "required", "flow": False}, 431 + }, 432 + ) 433 + 434 + assert counts["audio"] == 1 435 + assert counts["agents"] == 2 # entities + meetings 436 + assert "Entity extraction results" in result 437 + assert "Meeting summary results" in result 438 + assert "Flow analysis results" not in result 439 + 440 + 441 + def test_cluster_with_agent_filter_app_namespaced(tmp_path, monkeypatch): 442 + """Test cluster() with dict filtering app-namespaced agent outputs.""" 443 + monkeypatch.setenv("JOURNAL_PATH", str(tmp_path)) 444 + day_dir = day_path("20240101") 445 + 446 + mod = importlib.import_module("think.cluster") 447 + 448 + # Create segment with app-namespaced agent output files 449 + # App agent output naming: "app:topic" -> "_app_topic.md" 450 + segment = day_dir / "120000_300" 451 + segment.mkdir() 452 + (segment / "audio.jsonl").write_text('{}\n{"text": "hello"}\n') 453 + (segment / "entities.md").write_text("System entity results") 454 + (segment / "_todos_review.md").write_text("Todos review results") 455 + 456 + # Test filtering to include app-namespaced agent 457 + result, counts = mod.cluster( 458 + "20240101", 459 + sources={ 460 + "audio": True, 461 + "screen": False, 462 + "agents": {"entities": False, "todos:review": True}, 463 + }, 464 + ) 465 + 466 + assert counts["audio"] == 1 467 + assert counts["agents"] == 1 # Only todos:review 468 + assert "System entity results" not in result 469 + assert "Todos review results" in result 470 + 471 + 472 + def test_cluster_with_empty_agent_filter(tmp_path, monkeypatch): 473 + """Test cluster() with empty dict means no agents.""" 474 + monkeypatch.setenv("JOURNAL_PATH", str(tmp_path)) 475 + day_dir = day_path("20240101") 476 + 477 + mod = importlib.import_module("think.cluster") 478 + 479 + segment = day_dir / "120000_300" 480 + segment.mkdir() 481 + (segment / "audio.jsonl").write_text('{}\n{"text": "hello"}\n') 482 + (segment / "entities.md").write_text("Entity extraction results") 483 + 484 + # Empty dict should mean no agents 485 + result, counts = mod.cluster( 486 + "20240101", 487 + sources={"audio": True, "screen": False, "agents": {}}, 488 + ) 489 + 490 + assert counts["audio"] == 1 491 + assert counts["agents"] == 0 492 + assert "Entity extraction results" not in result 493 + 494 + 495 + def test_filename_to_agent_key(): 496 + """Test _filename_to_agent_key conversion.""" 497 + from think.cluster import _filename_to_agent_key 498 + 499 + # System agents 500 + assert _filename_to_agent_key("entities") == "entities" 501 + assert _filename_to_agent_key("flow") == "flow" 502 + 503 + # App-namespaced agents 504 + assert _filename_to_agent_key("_todos_review") == "todos:review" 505 + assert _filename_to_agent_key("_entities_observer") == "entities:observer" 506 + 507 + # Edge case: single underscore component 508 + assert _filename_to_agent_key("_app") == "_app" # No second part, returns as-is 509 + 510 + 511 + def test_agent_matches_filter(): 512 + """Test _agent_matches_filter logic.""" 513 + from think.cluster import _agent_matches_filter 514 + 515 + # None filter means all agents 516 + assert _agent_matches_filter("entities", None) is True 517 + assert _agent_matches_filter("_todos_review", None) is True 518 + 519 + # Empty dict means no agents 520 + assert _agent_matches_filter("entities", {}) is False 521 + assert _agent_matches_filter("_todos_review", {}) is False 522 + 523 + # Specific filtering 524 + filter_dict = {"entities": True, "meetings": False, "todos:review": "required"} 525 + assert _agent_matches_filter("entities", filter_dict) is True 526 + assert _agent_matches_filter("meetings", filter_dict) is False 527 + assert _agent_matches_filter("_todos_review", filter_dict) is True 528 + assert _agent_matches_filter("flow", filter_dict) is False # Not in filter

+97

tests/test_think_utils.py

··· 970 970 # Now it should exist 971 971 assert health_dir.exists() 972 972 assert (health_dir / "new_service.port").read_text() == "9999" 973 + 974 + 975 + # ============================================================================= 976 + # source_is_enabled / source_is_required / get_agent_filter tests 977 + # ============================================================================= 978 + 979 + 980 + def test_source_is_enabled_bool(): 981 + """Test source_is_enabled with bool values.""" 982 + from think.utils import source_is_enabled 983 + 984 + assert source_is_enabled(True) is True 985 + assert source_is_enabled(False) is False 986 + 987 + 988 + def test_source_is_enabled_required_string(): 989 + """Test source_is_enabled with 'required' string.""" 990 + from think.utils import source_is_enabled 991 + 992 + assert source_is_enabled("required") is True 993 + 994 + 995 + def test_source_is_enabled_dict(): 996 + """Test source_is_enabled with dict values for agents source.""" 997 + from think.utils import source_is_enabled 998 + 999 + # Dict with at least one True value -> enabled 1000 + assert source_is_enabled({"entities": True, "meetings": False}) is True 1001 + 1002 + # Dict with at least one "required" value -> enabled 1003 + assert source_is_enabled({"entities": "required", "meetings": False}) is True 1004 + 1005 + # Dict with all False values -> disabled 1006 + assert source_is_enabled({"entities": False, "meetings": False}) is False 1007 + 1008 + # Empty dict -> disabled 1009 + assert source_is_enabled({}) is False 1010 + 1011 + 1012 + def test_source_is_required_bool(): 1013 + """Test source_is_required with bool values.""" 1014 + from think.utils import source_is_required 1015 + 1016 + assert source_is_required(True) is False 1017 + assert source_is_required(False) is False 1018 + 1019 + 1020 + def test_source_is_required_string(): 1021 + """Test source_is_required with 'required' string.""" 1022 + from think.utils import source_is_required 1023 + 1024 + assert source_is_required("required") is True 1025 + 1026 + 1027 + def test_source_is_required_dict(): 1028 + """Test source_is_required with dict values.""" 1029 + from think.utils import source_is_required 1030 + 1031 + # Dict with at least one "required" value -> required 1032 + assert source_is_required({"entities": "required", "meetings": False}) is True 1033 + 1034 + # Dict with no "required" values -> not required 1035 + assert source_is_required({"entities": True, "meetings": False}) is False 1036 + 1037 + # Empty dict -> not required 1038 + assert source_is_required({}) is False 1039 + 1040 + 1041 + def test_get_agent_filter_bool(): 1042 + """Test get_agent_filter with bool values.""" 1043 + from think.utils import get_agent_filter 1044 + 1045 + # True -> None (all agents) 1046 + assert get_agent_filter(True) is None 1047 + 1048 + # False -> empty dict (no agents) 1049 + assert get_agent_filter(False) == {} 1050 + 1051 + 1052 + def test_get_agent_filter_required_string(): 1053 + """Test get_agent_filter with 'required' string.""" 1054 + from think.utils import get_agent_filter 1055 + 1056 + # "required" -> None (all agents, required) 1057 + assert get_agent_filter("required") is None 1058 + 1059 + 1060 + def test_get_agent_filter_dict(): 1061 + """Test get_agent_filter with dict values.""" 1062 + from think.utils import get_agent_filter 1063 + 1064 + # Dict -> returned as-is for filtering 1065 + filter_dict = {"entities": True, "meetings": "required", "flow": False} 1066 + assert get_agent_filter(filter_dict) == filter_dict 1067 + 1068 + # Empty dict -> empty dict (no agents) 1069 + assert get_agent_filter({}) == {}

+18 -1

think/agents.py

··· 35 35 day_path, 36 36 format_day, 37 37 format_segment_times, 38 + get_agent_filter, 38 39 get_muse_configs, 39 40 get_output_path, 40 41 load_prompt, ··· 710 711 os.environ["SEGMENT_KEY"] = span[0] 711 712 712 713 # Convert sources for clustering 713 - cluster_sources = {k: source_is_enabled(v) for k, v in sources.items()} 714 + # For audio/screen: use source_is_enabled to get bool 715 + # For agents: pass through dict for selective filtering, or use source_is_enabled 716 + cluster_sources: dict = {} 717 + for k, v in sources.items(): 718 + if k == "agents": 719 + agent_filter = get_agent_filter(v) 720 + if agent_filter is None: 721 + # All agents (True or "required") 722 + cluster_sources[k] = source_is_enabled(v) 723 + elif not agent_filter: 724 + # No agents (False or empty dict) 725 + cluster_sources[k] = False 726 + else: 727 + # Selective filtering - pass dict through 728 + cluster_sources[k] = agent_filter 729 + else: 730 + cluster_sources[k] = source_is_enabled(v) 714 731 715 732 # Build transcript via clustering 716 733 if span:

+77 -13

think/cluster.py

··· 22 22 return base 23 23 24 24 25 + def _filename_to_agent_key(filename: str) -> str: 26 + """Convert output filename stem to agent key. 27 + 28 + Reverse of get_output_topic(): converts filesystem names back to agent keys. 29 + 30 + Args: 31 + filename: Filename stem (e.g., "entities" or "_todos_review") 32 + 33 + Returns: 34 + Agent key (e.g., "entities" or "todos:review") 35 + """ 36 + if filename.startswith("_"): 37 + # App agent: "_app_topic" -> "app:topic" 38 + parts = filename[1:].split("_", 1) 39 + if len(parts) == 2: 40 + return f"{parts[0]}:{parts[1]}" 41 + return filename 42 + 43 + 44 + def _agent_matches_filter( 45 + filename: str, agent_filter: Dict[str, bool | str] | None 46 + ) -> bool: 47 + """Check if an agent output file matches the filter. 48 + 49 + Args: 50 + filename: Filename stem (e.g., "entities" or "_todos_review") 51 + agent_filter: Dict mapping agent keys to bool/"required", or None for all 52 + 53 + Returns: 54 + True if the file should be included 55 + """ 56 + if agent_filter is None: 57 + # None means include all agents 58 + return True 59 + 60 + if not agent_filter: 61 + # Empty dict means no agents 62 + return False 63 + 64 + agent_key = _filename_to_agent_key(filename) 65 + 66 + # Check if this agent is enabled in the filter 67 + if agent_key in agent_filter: 68 + value = agent_filter[agent_key] 69 + return value is True or value == "required" 70 + 71 + return False 72 + 73 + 25 74 def _process_segment( 26 75 segment_path: Path, 27 76 date_str: str, 28 77 audio: bool, 29 78 screen: bool, 30 - agents: bool, 79 + agents: bool | Dict[str, bool | str], 31 80 ) -> List[Dict[str, Any]]: 32 81 """Process a single segment directory and return entries. 33 82 ··· 36 85 date_str: Date in YYYYMMDD format 37 86 audio: Whether to load audio transcripts 38 87 screen: Whether to load raw screen data from *screen.jsonl files 39 - agents: Whether to load agent output summaries from *.md files 88 + agents: Whether to load agent output summaries from *.md files. 89 + Can be bool (all/none) or dict for selective filtering 90 + (e.g., {"entities": True, "meetings": "required"}). 40 91 41 92 Returns: 42 93 List of entry dicts with timestamp, segment_key, prefix, content, name, etc. ··· 107 158 file=sys.stderr, 108 159 ) 109 160 110 - # Process agent output summaries from all *.md files 161 + # Process agent output summaries from *.md files (with optional filtering) 111 162 if agents: 163 + # Convert bool to filter: True -> None (all), False handled by outer if 164 + agent_filter = ( 165 + None if agents is True else agents if isinstance(agents, dict) else None 166 + ) 167 + 112 168 for md_file in sorted(segment_path.glob("*.md")): 113 169 if not md_file.is_file(): 114 170 continue 171 + 172 + # Check if this agent matches the filter 173 + if not _agent_matches_filter(md_file.stem, agent_filter): 174 + continue 175 + 115 176 try: 116 177 content = md_file.read_text() 117 178 if content.strip(): ··· 137 198 138 199 139 200 def _load_entries( 140 - day_dir: str, audio: bool, screen: bool, agents: bool 201 + day_dir: str, audio: bool, screen: bool, agents: bool | Dict[str, bool | str] 141 202 ) -> List[Dict[str, Any]]: 142 203 """Load all transcript entries from a day directory.""" 143 204 from think.utils import segment_parse ··· 377 438 378 439 def cluster( 379 440 day: str, 380 - sources: Dict[str, bool | str], 441 + sources: Dict[str, bool | str | Dict], 381 442 ) -> Tuple[str, Dict[str, int]]: 382 443 """Return Markdown summary for one day's JSON files and counts by source. 383 444 384 445 Args: 385 446 day: Day in YYYYMMDD format 386 447 sources: Dict with keys "audio", "screen", "agents". 387 - Values can be bool or "required" string (see source_is_enabled). 448 + Values can be bool, "required" string, or dict (for agents). 449 + The "agents" source can be a dict for selective filtering, 450 + e.g., {"entities": True, "meetings": "required"}. 388 451 389 452 Returns: 390 453 Tuple of (markdown, source_counts) where source_counts is a dict ··· 417 480 def cluster_period( 418 481 day: str, 419 482 segment: str, 420 - sources: Dict[str, bool | str], 483 + sources: Dict[str, bool | str | Dict], 421 484 ) -> Tuple[str, Dict[str, int]]: 422 485 """Return Markdown summary for one segment's JSON files and counts by source. 423 486 ··· 425 488 day: Day in YYYYMMDD format 426 489 segment: Segment key in HHMMSS_LEN format (e.g., "163045_300") 427 490 sources: Dict with keys "audio", "screen", "agents". 428 - Values can be bool or "required" string (see source_is_enabled). 491 + Values can be bool, "required" string, or dict (for agents). 429 492 430 493 Returns: 431 494 Tuple of (markdown, source_counts) where source_counts is a dict ··· 454 517 455 518 456 519 def _load_entries_from_segment( 457 - segment_dir: str, audio: bool, screen: bool, agents: bool 520 + segment_dir: str, audio: bool, screen: bool, agents: bool | Dict[str, bool | str] 458 521 ) -> List[Dict[str, Any]]: 459 522 """Load entries from a single segment directory. 460 523 ··· 477 540 def cluster_span( 478 541 day: str, 479 542 span: List[str], 480 - sources: Dict[str, bool | str], 543 + sources: Dict[str, bool | str | Dict], 481 544 ) -> Tuple[str, Dict[str, int]]: 482 545 """Return Markdown summary for a span of segments and counts by source. 483 546 ··· 490 553 day: Day in YYYYMMDD format 491 554 span: List of segment keys in HHMMSS_LEN format (e.g., ["163045_300", "170000_600"]) 492 555 sources: Dict with keys "audio", "screen", "agents". 493 - Values can be bool or "required" string (see source_is_enabled). 556 + Values can be bool, "required" string, or dict (for agents). 494 557 495 558 Returns: 496 559 Tuple of (markdown, source_counts) where source_counts is a dict ··· 551 614 day: str, 552 615 start: str, 553 616 end: str, 554 - sources: Dict[str, bool], 617 + sources: Dict[str, bool | str | Dict], 555 618 ) -> str: 556 619 """Return markdown for ``day`` limited to ``start``-``end`` (HHMMSS). 557 620 ··· 562 625 day: Day in YYYYMMDD format 563 626 start: Start time in HHMMSS format 564 627 end: End time in HHMMSS format 565 - sources: Dict with keys "audio", "screen", "agents" (all bool). 628 + sources: Dict with keys "audio", "screen", "agents". 629 + Values can be bool, "required" string, or dict (for agents). 566 630 """ 567 631 day_dir = str(day_path(day)) 568 632 date_str = _date_str(day_dir)

+47 -6

think/utils.py

··· 1116 1116 Optional dict from .json "instructions" key. Supported keys: 1117 1117 - "system": prompt name for system instruction (default: "journal") 1118 1118 - "facets": "none" | "short" | "detailed" (default: "short") 1119 - - "sources": {"audio": bool, "screen": bool, "agents": bool} 1119 + - "sources": {"audio": bool, "screen": bool, "agents": bool|dict} 1120 + The "agents" source can be: 1121 + - bool: True (all agents), False (no agents) 1122 + - "required": all agents, fail if none found 1123 + - dict: selective filtering, e.g., {"entities": true, "meetings": "required"} 1120 1124 1121 1125 Returns 1122 1126 ------- ··· 1126 1130 - system_prompt_name: str - name of system prompt (for cache keys) 1127 1131 - user_instruction: str | None - loaded from user_prompt if provided 1128 1132 - extra_context: str | None - facets + datetime 1129 - - sources: dict - {"audio": bool, "screen": bool, "agents": bool} 1133 + - sources: dict - {"audio": bool, "screen": bool, "agents": bool|dict} 1130 1134 """ 1131 1135 # Merge defaults with overrides 1132 1136 cfg = _merge_instructions_config(_DEFAULT_INSTRUCTIONS, config_overrides) ··· 1193 1197 return result 1194 1198 1195 1199 1196 - def source_is_enabled(value: bool | str) -> bool: 1200 + def source_is_enabled(value: bool | str | dict) -> bool: 1197 1201 """Check if a source should be loaded based on its config value. 1198 1202 1199 1203 Sources can be configured as: 1200 1204 - False: don't load 1201 1205 - True: load if available 1202 1206 - "required": load (and generation will fail if none found) 1207 + - dict: for agents source, selective loading (e.g., {"entities": true}) 1203 1208 1204 1209 Both True and "required" mean the source should be loaded. 1210 + A non-empty dict means the source should be loaded (with filtering). 1205 1211 1206 1212 Args: 1207 - value: The source config value (bool or "required" string) 1213 + value: The source config value (bool, "required" string, or dict for agents) 1208 1214 1209 1215 Returns: 1210 1216 True if the source should be loaded, False otherwise. 1211 1217 """ 1218 + if isinstance(value, dict): 1219 + # Dict means selective loading - enabled if any agent is enabled 1220 + return any(v is True or v == "required" for v in value.values()) 1212 1221 return value is True or value == "required" 1213 1222 1214 1223 1215 - def source_is_required(value: bool | str) -> bool: 1224 + def source_is_required(value: bool | str | dict) -> bool: 1216 1225 """Check if a source must have content for generation to proceed. 1217 1226 1218 1227 Args: 1219 - value: The source config value (bool or "required" string) 1228 + value: The source config value (bool, "required" string, or dict for agents) 1220 1229 1221 1230 Returns: 1222 1231 True if the source is required (generation should skip if no content). 1232 + For dict values, returns True if any agent is marked "required". 1223 1233 """ 1234 + if isinstance(value, dict): 1235 + return any(v == "required" for v in value.values()) 1224 1236 return value == "required" 1237 + 1238 + 1239 + def get_agent_filter(value: bool | str | dict) -> dict[str, bool | str] | None: 1240 + """Extract agent filter from sources config. 1241 + 1242 + When agents source is a dict, returns it as filter mapping agent names 1243 + to their enabled/required status. When agents source is bool or "required", 1244 + returns None to indicate all agents should be loaded. 1245 + 1246 + Args: 1247 + value: The agents source config value 1248 + 1249 + Returns: 1250 + Dict mapping agent names to bool/"required", or None for all agents. 1251 + Returns empty dict if value is False (no agents). 1252 + 1253 + Examples: 1254 + >>> get_agent_filter(True) 1255 + None # All agents 1256 + >>> get_agent_filter(False) 1257 + {} # No agents 1258 + >>> get_agent_filter({"entities": True, "meetings": "required"}) 1259 + {"entities": True, "meetings": "required"} 1260 + """ 1261 + if isinstance(value, dict): 1262 + return value 1263 + if value is False: 1264 + return {} # No agents 1265 + return None # All agents (True or "required") 1225 1266 1226 1267 1227 1268 def get_agent(name: str = "default", facet: str | None = None) -> dict:

Configure Feed

Configure Feed