personal memory agent
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'hopper-layn7pdo-dream-efficiency'

+1136 -72
+5 -3
muse/entities.md
··· 4 4 "title": "Entity Extraction", 5 5 "description": "Extracts people, companies, projects, and tools from segment content", 6 6 "color": "#2e7d32", 7 - "schedule": "segment", 7 + "schedule": "activity", 8 + "activities": ["*"], 8 9 "priority": 10, 9 10 "hook": {"post": "entities"}, 10 11 "thinking_budget": 4096, ··· 12 13 "output": "md", 13 14 "instructions": { 14 15 "sources": {"transcripts": true, "percepts": true, "agents": false}, 15 - "facets": false 16 + "facets": false, 17 + "activity": true 16 18 } 17 19 18 20 } 19 21 20 - $segment_preamble 22 + $activity_preamble 21 23 22 24 Extract named entities and descriptions from the given segment transcription document. 23 25
+16 -12
muse/entities.py
··· 5 5 6 6 This hook is invoked via "hook": {"post": "entities"} in generator frontmatter. 7 7 It parses the markdown entity list and writes deduplicated entities 8 - to a JSONL file in the segment directory. 8 + to a JSONL file next to the agent output. 9 9 """ 10 10 11 11 import json ··· 65 65 66 66 67 67 def post_process(result: str, context: dict) -> str | None: 68 - """Parse entity list and write to segment JSONL file. 68 + """Parse entity list and write to an adjacent JSONL file. 69 69 70 70 Args: 71 71 result: The generated output content (markdown entity list). ··· 75 75 Returns: 76 76 None - this hook does not modify the output result. 77 77 """ 78 - segment = context.get("segment") 79 - if not segment: 80 - logging.warning("entities hook requires segment mode") 81 - return None 82 - 83 78 # Parse entities from result 84 79 entities = [] 85 80 unparsed = [] ··· 94 89 unparsed.append(line) 95 90 96 91 if unparsed: 97 - print(f"Warning: {len(unparsed)} unparsed entity lines:") 92 + logging.warning("entities hook: %d unparsed entity lines", len(unparsed)) 98 93 for line in unparsed: 99 - print(f" {line}") 94 + logging.warning("entities hook: unparsed line: %s", line) 100 95 101 96 if not entities: 102 97 logging.info("entities hook: no entities extracted") ··· 119 114 ) 120 115 121 116 # Write entities.jsonl alongside the agent output in the agents/ directory 122 - output_path = Path(context.get("output_path", "")) 117 + output_path_value = context.get("output_path") 118 + if not output_path_value: 119 + logging.error("entities hook: missing output_path in context") 120 + return None 121 + 122 + output_path = Path(output_path_value) 123 123 agents_dir = output_path.parent 124 124 jsonl_path = agents_dir / "entities.jsonl" 125 125 126 126 # Write JSONL file 127 127 try: 128 128 jsonl_path.parent.mkdir(parents=True, exist_ok=True) 129 - with open(jsonl_path, "w") as f: 129 + with open(jsonl_path, "w", encoding="utf-8") as f: 130 130 for entity in unique_entities: 131 131 f.write(json.dumps(entity) + "\n") 132 - print(f"Entities written to: {jsonl_path} ({len(unique_entities)} entities)") 132 + logging.info( 133 + "entities hook: wrote %d entities to %s", 134 + len(unique_entities), 135 + jsonl_path, 136 + ) 133 137 except Exception as e: 134 138 logging.error("entities hook: failed to write JSONL: %s", e) 135 139
+1 -1
tests/baselines/api/agents/agents-day.json
··· 105 105 "description": "Extracts people, companies, projects, and tools from segment content", 106 106 "multi_facet": false, 107 107 "output_format": "md", 108 - "schedule": "segment", 108 + "schedule": "activity", 109 109 "source": "system", 110 110 "title": "Entity Extraction", 111 111 "type": "generate"
-11
tests/baselines/api/entities/journal-entities.json
··· 25 25 { 26 26 "aka": [], 27 27 "blocked": false, 28 - "facets": [], 29 - "id": "person_a", 30 - "is_principal": false, 31 - "last_active_ts": 0, 32 - "name": "Person A", 33 - "total_observation_count": 0, 34 - "type": "Person" 35 - }, 36 - { 37 - "aka": [], 38 - "blocked": false, 39 28 "facets": [ 40 29 { 41 30 "attached_at": null,
+1 -1
tests/baselines/api/search/day-results.json
··· 14 14 "id": "20260304/agents/knowledge_graph.md:7", 15 15 "idx": 7, 16 16 "path": "20260304/agents/knowledge_graph.md", 17 - "score": -2.6, 17 + "score": -2.0, 18 18 "stream": null, 19 19 "text": "# Part 1: Entity Extraction and Relationship Mapping\n\n## Relationship Mapping\n\n| Source Name | Target Name | Relationship Type | Context |\n| :--- | :--- | :--- | :--- |\n| **Romeo Montague** | **Juliet Capulet** | `met-at-conference` | First <strong>meeting</strong> at Denver Tech Summit keynote. |\n" 20 20 }
+34 -34
tests/baselines/api/search/search.json
··· 85 85 "id": "20260306/default/093000_300/agents/audio.md:0", 86 86 "idx": 0, 87 87 "path": "20260306/default/093000_300/agents/audio.md", 88 - "score": -2.6, 88 + "score": -1.7, 89 89 "stream": "default", 90 90 "text": "# Audio Summary Morning standup. Benvolio noticed <strong>Romeo</strong>'s late-night GitHub activity and pressed him about API gateway commits. <strong>Romeo</strong> deflected, calling it a personal mesh routing prototype. Mercutio covered for him. Balthasar reported progress on the mesh routing fallback PR with an edge case for <strong>Romeo</strong> to review. Benvolio scheduled..." 91 91 }, ··· 101 101 "id": "facets/montague/entities/20260306.jsonl:0", 102 102 "idx": 0, 103 103 "path": "facets/montague/entities/20260306.jsonl", 104 - "score": -3.2, 104 + "score": -2.1, 105 105 "stream": null, 106 106 "text": "### Person: <strong>Romeo</strong> Montague\n\n\nContinued Verona Platform development\n\n" 107 107 }, ··· 117 117 "id": "facets/montague/entities/20260306.jsonl:3", 118 118 "idx": 3, 119 119 "path": "facets/montague/entities/20260306.jsonl", 120 - "score": -3.1, 120 + "score": -2.0, 121 121 "stream": null, 122 122 "text": "### Person: Balthasar Davi\n\n\nReviewed mesh routing PR with <strong>Romeo</strong>\n\n" 123 123 }, ··· 133 133 "id": "facets/montague/entities/20260306.jsonl:4", 134 134 "idx": 4, 135 135 "path": "facets/montague/entities/20260306.jsonl", 136 - "score": -3.2, 136 + "score": -2.0, 137 137 "stream": null, 138 138 "text": "### Person: Mercutio Escalus\n\n\nCovered for <strong>Romeo</strong> during standup\n\n" 139 139 }, ··· 149 149 "id": "20260306/default/093000_300/agents/screen.md:0", 150 150 "idx": 0, 151 151 "path": "20260306/default/093000_300/agents/screen.md", 152 - "score": -2.8, 152 + "score": -1.8, 153 153 "stream": "default", 154 154 "text": "# Screen Summary\n\nSlack standup channel. Benvolio questioning <strong>Romeo</strong> about late-night commits.\n" 155 155 } ··· 174 174 "id": "facets/verona/logs/20260309.jsonl:1", 175 175 "idx": 1, 176 176 "path": "facets/verona/logs/20260309.jsonl", 177 - "score": -2.4, 177 + "score": -1.6, 178 178 "stream": null, 179 179 "text": "### Deploy Complete by <strong>romeo</strong>_montague\n\n**Source:** deploy | **Time:** 13:45:00\n\n**Parameters:**\n- service: verona-gateway\n- version: 0.9.0\n" 180 180 }, ··· 190 190 "id": "20260309/default/090000_300/agents/audio.md:0", 191 191 "idx": 0, 192 192 "path": "20260309/default/090000_300/agents/audio.md", 193 - "score": -2.3, 193 + "score": -1.5, 194 194 "stream": "default", 195 195 "text": "# Audio Summary\n\n<strong>Romeo</strong> confessed the project to Benvolio and asked for infrastructure help. Benvolio agreed to spin up a Kubernetes staging cluster.\n" 196 196 }, ··· 206 206 "id": "facets/montague/entities/20260309.jsonl:0", 207 207 "idx": 0, 208 208 "path": "facets/montague/entities/20260309.jsonl", 209 - "score": -3.1, 209 + "score": -2.0, 210 210 "stream": null, 211 211 "text": "### Person: <strong>Romeo</strong> Montague\n\n\nConfessed project to Benvolio, preparing demo\n\n" 212 212 }, ··· 222 222 "id": "facets/montague/calendar/20260309.jsonl:0", 223 223 "idx": 0, 224 224 "path": "facets/montague/calendar/20260309.jsonl", 225 - "score": -2.6, 225 + "score": -1.7, 226 226 "stream": null, 227 227 "text": "### Event: Team Standup\n\n\n**Time Occurred:** 09:00 - 09:30\n**Participants:** <strong>Romeo</strong> Montague, Benvolio Montague\n\nDaily sync\n" 228 228 }, ··· 238 238 "id": "facets/verona/calendar/20260309.jsonl:0", 239 239 "idx": 0, 240 240 "path": "facets/verona/calendar/20260309.jsonl", 241 - "score": -2.3, 241 + "score": -1.5, 242 242 "stream": null, 243 243 "text": "### Event: Demo Sprint\n\n\n**Time Occurred:** 09:00 - 21:00\n**Participants:** <strong>Romeo</strong> Montague, Juliet Capulet, Benvolio Montague\n\nFull day board presentation preparation\n" 244 244 } ··· 263 263 "id": "20260307/default/100000_300/agents/audio.md:0", 264 264 "idx": 0, 265 265 "path": "20260307/default/100000_300/agents/audio.md", 266 - "score": -3.1, 266 + "score": -2.0, 267 267 "stream": "default", 268 268 "text": "# Audio Summary\n\nHeated confrontation. Tybalt Capulet accused <strong>Romeo</strong> of stealing Capulet IP. Mercutio defended <strong>Romeo</strong> and had his Capulet consulting contract terminated by Tybalt.\n" 269 269 }, ··· 279 279 "id": "20260307/default/150000_300/agents/audio.md:0", 280 280 "idx": 0, 281 281 "path": "20260307/default/150000_300/agents/audio.md", 282 - "score": -3.3, 282 + "score": -2.2, 283 283 "stream": "default", 284 284 "text": "# Audio Summary\n\nEmergency meeting at Montague Tech. Benvolio questioned <strong>Romeo</strong> about the secret project. <strong>Romeo</strong> clarified no company IP was shared. Team discussed legal exposure. <strong>Romeo</strong> proposed Professor Lawrence as mediator.\n" 285 285 }, ··· 295 295 "id": "facets/montague/entities/20260307.jsonl:0", 296 296 "idx": 0, 297 297 "path": "facets/montague/entities/20260307.jsonl", 298 - "score": -3.1, 298 + "score": -2.0, 299 299 "stream": null, 300 300 "text": "### Person: <strong>Romeo</strong> Montague\n\n\nConfronted by Tybalt, called emergency meeting\n\n" 301 301 }, ··· 311 311 "id": "facets/montague/calendar/20260307.jsonl:0", 312 312 "idx": 0, 313 313 "path": "facets/montague/calendar/20260307.jsonl", 314 - "score": -2.4, 314 + "score": -1.5, 315 315 "stream": null, 316 316 "text": "### Event: Emergency Team Meeting\n\n\n**Time Occurred:** 15:00 - 16:00\n**Participants:** <strong>Romeo</strong> Montague, Benvolio Montague\n\nCrisis response to Capulet situation\n" 317 317 }, ··· 327 327 "id": "facets/montague/events/20260307.jsonl:0", 328 328 "idx": 0, 329 329 "path": "facets/montague/events/20260307.jsonl", 330 - "score": -2.9, 330 + "score": -1.9, 331 331 "stream": null, 332 332 "text": "### Meeting: Confrontation with Tybalt\n\n\n**Time Occurred:** 10:00 - 10:30\n**Participants:** <strong>Romeo</strong> Montague, Tybalt Capulet, Mercutio Escalus\n\nTybalt accused <strong>Romeo</strong> of IP theft\n\nMercutio fired from Capulet contract\n" 333 333 } ··· 352 352 "id": "facets/montague/entities/20260308.jsonl:0", 353 353 "idx": 0, 354 354 "path": "facets/montague/entities/20260308.jsonl", 355 - "score": -3.1, 355 + "score": -2.0, 356 356 "stream": null, 357 357 "text": "### Person: <strong>Romeo</strong> Montague\n\n\nUnder board pressure, planning board presentation\n\n" 358 358 }, ··· 368 368 "id": "facets/verona/events/20260308.jsonl:0", 369 369 "idx": 0, 370 370 "path": "facets/verona/events/20260308.jsonl", 371 - "score": -2.1, 371 + "score": -1.3, 372 372 "stream": null, 373 373 "text": "### Meeting: Strategy Call with Professor Lawrence\n\n\n**Time Occurred:** 10:00 - 11:00\n**Participants:** <strong>Romeo</strong> Montague, Juliet Capulet, Friar Lawrence\n\nJoint venture strategy planning\n\nProposed board presentation strategy\n" 374 374 }, ··· 384 384 "id": "20260308/agents/knowledge_graph.md:2", 385 385 "idx": 2, 386 386 "path": "20260308/agents/knowledge_graph.md", 387 - "score": -2.0, 387 + "score": -1.3, 388 388 "stream": null, 389 389 "text": "# Part 1: Entity Extraction and Relationship Mapping ## Entity Profiles | Entity Name | Entity Type | First Appearance | Total Engagement | Context | | :--- | :--- | :--- | :--- | :--- | | **<strong>Romeo</strong> Montague** | Person | 10:00 | High | Under board pressure,..." 390 390 }, ··· 400 400 "id": "20260308/agents/meetings.md:0", 401 401 "idx": 0, 402 402 "path": "20260308/agents/meetings.md", 403 - "score": -2.9, 403 + "score": -1.9, 404 404 "stream": null, 405 405 "text": "# Meetings\n\n- 10:00 Strategy Call with Professor Lawrence, <strong>Romeo</strong>, and Juliet\n" 406 406 } ··· 425 425 "id": "facets/verona/logs/20260305.jsonl:0", 426 426 "idx": 0, 427 427 "path": "facets/verona/logs/20260305.jsonl", 428 - "score": -2.5, 428 + "score": -1.6, 429 429 "stream": null, 430 430 "text": "### Repo Created by <strong>romeo</strong>_montague\n\n**Source:** github | **Time:** 22:05:00\n\n**Parameters:**\n- repo: balcony-app\n- visibility: private\n" 431 431 }, ··· 441 441 "id": "20260305/default/090000_300/agents/audio.md:0", 442 442 "idx": 0, 443 443 "path": "20260305/default/090000_300/agents/audio.md", 444 - "score": -2.9, 444 + "score": -1.9, 445 445 "stream": "default", 446 446 "text": "# Audio Summary\n\nMorning standup at Montague Tech. Benvolio reported CI pipeline is green. <strong>Romeo</strong> mentioned wanting to explore ideas from the conference. Mercutio teased about <strong>Romeo</strong> meeting someone.\n" 447 447 }, ··· 457 457 "id": "facets/montague/entities/20260305.jsonl:0", 458 458 "idx": 0, 459 459 "path": "facets/montague/entities/20260305.jsonl", 460 - "score": -3.1, 460 + "score": -2.0, 461 461 "stream": null, 462 462 "text": "### Person: <strong>Romeo</strong> Montague\n\n\nStarted Balcony App prototype with Juliet\n\n" 463 463 }, ··· 473 473 "id": "facets/verona/entities/20260305.jsonl:0", 474 474 "idx": 0, 475 475 "path": "facets/verona/entities/20260305.jsonl", 476 - "score": -3.1, 476 + "score": -2.0, 477 477 "stream": null, 478 478 "text": "### Person: <strong>Romeo</strong> Montague\n\n\nSet up private repo for collaboration\n\n" 479 479 }, ··· 489 489 "id": "facets/montague/events/20260305.jsonl:0", 490 490 "idx": 0, 491 491 "path": "facets/montague/events/20260305.jsonl", 492 - "score": -3.1, 492 + "score": -2.0, 493 493 "stream": null, 494 494 "text": "### Meeting: Montague Tech Daily Standup\n\n\n**Time Occurred:** 09:00 - 09:30\n**Participants:** <strong>Romeo</strong> Montague, Benvolio Montague, Mercutio Escalus\n\nTeam standup\n\n<strong>Romeo</strong> mentioned conference ideas\n" 495 495 } ··· 514 514 "id": "facets/montague/entities/20260310.jsonl:0", 515 515 "idx": 0, 516 516 "path": "facets/montague/entities/20260310.jsonl", 517 - "score": -2.9, 517 + "score": -1.9, 518 518 "stream": null, 519 519 "text": "### Person: <strong>Romeo</strong> Montague\n\n\nNamed co-lead of Verona Platform joint venture\n\n" 520 520 }, ··· 530 530 "id": "facets/verona/entities/20260310.jsonl:0", 531 531 "idx": 0, 532 532 "path": "facets/verona/entities/20260310.jsonl", 533 - "score": -3.0, 533 + "score": -1.9, 534 534 "stream": null, 535 535 "text": "### Person: <strong>Romeo</strong> Montague\n\n\nNamed co-lead of approved joint venture\n\n" 536 536 }, ··· 546 546 "id": "facets/montague/calendar/20260310.jsonl:0", 547 547 "idx": 0, 548 548 "path": "facets/montague/calendar/20260310.jsonl", 549 - "score": -2.3, 549 + "score": -1.5, 550 550 "stream": null, 551 551 "text": "### Event: Joint Board Meeting\n\n\n**Time Occurred:** 10:00 - 12:00\n**Participants:** <strong>Romeo</strong> Montague, Benvolio Montague\n\nQuarterly review with Verona Platform presentation\n" 552 552 }, ··· 562 562 "id": "facets/verona/calendar/20260310.jsonl:0", 563 563 "idx": 0, 564 564 "path": "facets/verona/calendar/20260310.jsonl", 565 - "score": -2.3, 565 + "score": -1.5, 566 566 "stream": null, 567 567 "text": "### Event: Board Presentation\n\n\n**Time Occurred:** 10:00 - 12:00\n**Participants:** <strong>Romeo</strong> Montague, Juliet Capulet, Friar Lawrence\n\nVerona Platform joint venture pitch\n" 568 568 }, ··· 578 578 "id": "20260310/agents/meetings.md:0", 579 579 "idx": 0, 580 580 "path": "20260310/agents/meetings.md", 581 - "score": -3.0, 581 + "score": -1.9, 582 582 "stream": null, 583 583 "text": "# Meetings\n\n- 08:30 Pre-Board Meeting Prep (<strong>Romeo</strong>, Juliet, Benvolio)\n" 584 584 } ··· 603 603 "id": "20260304/default/180000_300/agents/audio.md:0", 604 604 "idx": 0, 605 605 "path": "20260304/default/180000_300/agents/audio.md", 606 - "score": -2.9, 606 + "score": -1.9, 607 607 "stream": "default", 608 608 "text": "# Audio Summary\n\nEvening mixer at Denver Tech Summit. <strong>Romeo</strong> and Juliet had their first extended conversation about combining their API approaches. Mercutio tried to pull <strong>Romeo</strong> away to karaoke.\n" 609 609 }, ··· 619 619 "id": "facets/capulet/entities/20260304.jsonl:1", 620 620 "idx": 1, 621 621 "path": "facets/capulet/entities/20260304.jsonl", 622 - "score": -3.2, 622 + "score": -2.1, 623 623 "stream": null, 624 624 "text": "### Person: Tybalt Capulet\n\n\nConfronted <strong>Romeo</strong> at hackathon\n\n" 625 625 }, ··· 635 635 "id": "facets/montague/entities/20260304.jsonl:0", 636 636 "idx": 0, 637 637 "path": "facets/montague/entities/20260304.jsonl", 638 - "score": -3.0, 638 + "score": -1.9, 639 639 "stream": null, 640 640 "text": "### Person: <strong>Romeo</strong> Montague\n\n\nAttended Denver Tech Summit, met Juliet Capulet\n\n" 641 641 }, ··· 651 651 "id": "facets/capulet/events/20260304.jsonl:1", 652 652 "idx": 1, 653 653 "path": "facets/capulet/events/20260304.jsonl", 654 - "score": -3.2, 654 + "score": -2.1, 655 655 "stream": null, 656 656 "text": "### Social: Conference Mixer\n\n\n**Time Occurred:** 18:00 - 20:00\n**Participants:** Juliet Capulet, <strong>Romeo</strong> Montague\n\nNetworking event\n\nJuliet and <strong>Romeo</strong> exchanged Signal contacts\n" 657 657 }, ··· 667 667 "id": "facets/montague/events/20260304.jsonl:1", 668 668 "idx": 1, 669 669 "path": "facets/montague/events/20260304.jsonl", 670 - "score": -3.1, 670 + "score": -2.0, 671 671 "stream": null, 672 672 "text": "### Hackathon: Hackathon - API Bridge Challenge\n\n\n**Time Occurred:** 14:00 - 18:00\n**Participants:** <strong>Romeo</strong> Montague, Mercutio Escalus\n\nBuilt API bridge prototype\n\nTybalt confronted <strong>Romeo</strong>\n" 673 673 }
-10
tests/baselines/api/settings/generators.json
··· 174 174 }, 175 175 { 176 176 "app": null, 177 - "description": "Extracts people, companies, projects, and tools from segment content", 178 - "disabled": false, 179 - "extract": null, 180 - "has_extraction": false, 181 - "key": "entities", 182 - "source": "system", 183 - "title": "Entity Extraction" 184 - }, 185 - { 186 - "app": null, 187 177 "description": "Identifies who said what in each transcript segment. Layers 1-3 (owner, structural, acoustic) run computationally via hook; Layer 4 uses contextual LLM analysis for remaining unmatched sentences.", 188 178 "disabled": false, 189 179 "extract": null,
+176
tests/test_dream_preflight.py
··· 1 + # SPDX-License-Identifier: AGPL-3.0-only 2 + # Copyright (c) 2026 sol pbc 3 + 4 + """Tests for dream preflight skip evaluation.""" 5 + 6 + from __future__ import annotations 7 + 8 + import json 9 + 10 + import pytest 11 + 12 + 13 + @pytest.fixture 14 + def segment_dir(tmp_path, monkeypatch): 15 + journal = tmp_path / "journal" 16 + seg_dir = journal / "20240115" / "default" / "120000_300" 17 + seg_dir.mkdir(parents=True) 18 + (seg_dir / "agents").mkdir() 19 + monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(journal)) 20 + return seg_dir 21 + 22 + 23 + class TestShouldSkipPreflight: 24 + def test_daily_mode_never_skips(self): 25 + from think.dream import _should_skip_preflight 26 + 27 + assert _should_skip_preflight("observation", day="20240115", segment=None, stream=None) == ( 28 + False, 29 + None, 30 + ) 31 + 32 + def test_firstday_checkin_not_complete(self, monkeypatch): 33 + from think import awareness 34 + from think.dream import _should_skip_preflight 35 + 36 + monkeypatch.setattr(awareness, "get_onboarding", lambda: {"status": "observing"}) 37 + assert _should_skip_preflight( 38 + "firstday_checkin", day="20240115", segment="120000_300", stream="default" 39 + ) == (True, "preflight:not_complete") 40 + 41 + def test_firstday_checkin_already_sent(self, monkeypatch): 42 + from think import awareness 43 + from think.dream import _should_skip_preflight 44 + 45 + monkeypatch.setattr( 46 + awareness, 47 + "get_onboarding", 48 + lambda: {"status": "complete", "firstday_checkin_sent": "20260402T10:00:00"}, 49 + ) 50 + assert _should_skip_preflight( 51 + "firstday_checkin", day="20240115", segment="120000_300", stream="default" 52 + ) == (True, "preflight:already_sent") 53 + 54 + def test_observation_not_observing(self, monkeypatch): 55 + from think import awareness 56 + from think.dream import _should_skip_preflight 57 + 58 + monkeypatch.setattr(awareness, "get_onboarding", lambda: {"status": "complete"}) 59 + assert _should_skip_preflight( 60 + "observation", day="20240115", segment="120000_300", stream="default" 61 + ) == (True, "preflight:not_observing") 62 + 63 + def test_speaker_attribution_requires_embeddings(self, segment_dir): 64 + from think.dream import _should_skip_preflight 65 + 66 + assert _should_skip_preflight( 67 + "speaker_attribution", 68 + day="20240115", 69 + segment="120000_300", 70 + stream="default", 71 + ) == (True, "preflight:no_embeddings") 72 + 73 + (segment_dir / "audio.npz").write_bytes(b"x") 74 + assert _should_skip_preflight( 75 + "speaker_attribution", 76 + day="20240115", 77 + segment="120000_300", 78 + stream="default", 79 + ) == (False, None) 80 + 81 + def test_speakers_requires_transcripts(self, segment_dir): 82 + from think.dream import _should_skip_preflight 83 + 84 + assert _should_skip_preflight( 85 + "speakers", 86 + day="20240115", 87 + segment="120000_300", 88 + stream="default", 89 + ) == (True, "preflight:no_transcripts") 90 + 91 + def test_speakers_skips_single_speaker(self, segment_dir): 92 + from think.dream import _should_skip_preflight 93 + 94 + (segment_dir / "audio.jsonl").write_text( 95 + "\n".join( 96 + [ 97 + json.dumps({"raw": "audio.flac"}), 98 + json.dumps({"start": "00:00:01", "speaker": 1, "text": "hello"}), 99 + json.dumps({"start": "00:00:02", "speaker": 1, "text": "again"}), 100 + ] 101 + ) 102 + + "\n", 103 + encoding="utf-8", 104 + ) 105 + assert _should_skip_preflight( 106 + "speakers", 107 + day="20240115", 108 + segment="120000_300", 109 + stream="default", 110 + ) == (True, "preflight:single_speaker") 111 + 112 + def test_speakers_runs_for_multiple_speakers(self, segment_dir): 113 + from think.dream import _should_skip_preflight 114 + 115 + (segment_dir / "audio.jsonl").write_text( 116 + "\n".join( 117 + [ 118 + json.dumps({"raw": "audio.flac"}), 119 + json.dumps({"start": "00:00:01", "speaker": 1, "text": "hello"}), 120 + json.dumps({"start": "00:00:02", "speaker": 2, "text": "hi"}), 121 + ] 122 + ) 123 + + "\n", 124 + encoding="utf-8", 125 + ) 126 + assert _should_skip_preflight( 127 + "speakers", 128 + day="20240115", 129 + segment="120000_300", 130 + stream="default", 131 + ) == (False, None) 132 + 133 + def test_activities_requires_previous_segment(self, segment_dir): 134 + from think.dream import _should_skip_preflight 135 + 136 + assert _should_skip_preflight( 137 + "activities", 138 + day="20240115", 139 + segment="120000_300", 140 + stream="default", 141 + ) == (True, "preflight:no_previous_segment") 142 + 143 + def test_activities_requires_previous_activity_state(self, tmp_path, monkeypatch): 144 + from think.dream import _should_skip_preflight 145 + 146 + journal = tmp_path / "journal" 147 + prev_dir = journal / "20240115" / "default" / "110000_300" 148 + curr_dir = journal / "20240115" / "default" / "120000_300" 149 + prev_dir.mkdir(parents=True) 150 + curr_dir.mkdir(parents=True) 151 + monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(journal)) 152 + 153 + assert _should_skip_preflight( 154 + "activities", 155 + day="20240115", 156 + segment="120000_300", 157 + stream="default", 158 + ) == (True, "preflight:no_previous_activity_state") 159 + 160 + def test_activities_runs_with_previous_activity_state(self, tmp_path, monkeypatch): 161 + from think.dream import _should_skip_preflight 162 + 163 + journal = tmp_path / "journal" 164 + prev_dir = journal / "20240115" / "default" / "110000_300" / "agents" / "work" 165 + curr_dir = journal / "20240115" / "default" / "120000_300" 166 + prev_dir.mkdir(parents=True) 167 + curr_dir.mkdir(parents=True) 168 + (prev_dir / "activity_state.json").write_text("[]", encoding="utf-8") 169 + monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(journal)) 170 + 171 + assert _should_skip_preflight( 172 + "activities", 173 + day="20240115", 174 + segment="120000_300", 175 + stream="default", 176 + ) == (False, None)
+308
tests/test_dream_segment.py
··· 150 150 monkeypatch.setattr(dream, "get_enabled_facets", mock_get_enabled_facets) 151 151 monkeypatch.setattr(dream, "get_active_facets", mock_get_active_facets) 152 152 monkeypatch.setattr(dream, "run_queued_command", mock_run_queued_command) 153 + monkeypatch.setattr(dream, "_classify_segment_density", lambda *args: "active") 153 154 154 155 success, failed, failed_names = dream.run_prompts_by_priority( 155 156 "20240115", "120000_300", refresh=False, verbose=False ··· 208 209 monkeypatch.setattr(dream, "wait_for_agents", mock_wait_for_agents) 209 210 monkeypatch.setattr(dream, "get_muse_configs", mock_get_muse_configs) 210 211 monkeypatch.setattr(dream, "get_enabled_facets", mock_get_enabled_facets) 212 + monkeypatch.setattr(dream, "_classify_segment_density", lambda *args: "active") 211 213 212 214 success, failed, failed_names = dream.run_prompts_by_priority( 213 215 "20240115", "120000_300", refresh=False, verbose=False ··· 259 261 monkeypatch.setattr(dream, "wait_for_agents", mock_wait_for_agents) 260 262 monkeypatch.setattr(dream, "get_muse_configs", mock_get_muse_configs) 261 263 monkeypatch.setattr(dream, "get_enabled_facets", mock_get_enabled_facets) 264 + monkeypatch.setattr(dream, "_classify_segment_density", lambda *args: "active") 262 265 263 266 success, failed, failed_names = dream.run_prompts_by_priority( 264 267 "20240115", "120000_300", refresh=False, verbose=False ··· 311 314 monkeypatch.setattr(dream, "get_enabled_facets", mock_get_enabled_facets) 312 315 monkeypatch.setattr(dream, "get_active_facets", mock_get_active_facets) 313 316 monkeypatch.setattr(dream, "run_queued_command", mock_run_queued_command) 317 + monkeypatch.setattr(dream, "_classify_segment_density", lambda *args: "active") 314 318 315 319 dream.run_prompts_by_priority( 316 320 "20240115", "120000_300", refresh=False, verbose=False, stream="default" ··· 411 415 monkeypatch.setattr(dream, "get_muse_configs", mock_get_muse_configs) 412 416 monkeypatch.setattr(dream, "get_enabled_facets", mock_get_enabled_facets) 413 417 monkeypatch.setattr(dream, "get_active_facets", mock_get_active_facets) 418 + monkeypatch.setattr(dream, "_classify_segment_density", lambda *args: "active") 414 419 415 420 success, failed, failed_names = dream.run_prompts_by_priority( 416 421 "20240115", "120000_300", refresh=False, verbose=False ··· 592 597 monkeypatch.setattr( 593 598 dream, "run_queued_command", lambda cmd, day, timeout=60: True 594 599 ) 600 + monkeypatch.setattr(dream, "_classify_segment_density", lambda *args: "active") 595 601 596 602 dream.run_prompts_by_priority( 597 603 "20240115", ··· 602 608 ) 603 609 604 610 assert wait_calls == [None] 611 + 612 + def test_pulse_counter_runs_every_sixth_segment(self, segment_dir, monkeypatch): 613 + """Pulse is skipped five times, then runs on the sixth segment.""" 614 + from think import dream 615 + 616 + spawned = [] 617 + 618 + def mock_cortex_request(prompt, name, config=None): 619 + spawned.append(name) 620 + return f"agent-{name}" 621 + 622 + def mock_wait_for_agents(agent_ids, timeout=600): 623 + return ({aid: "finish" for aid in agent_ids}, []) 624 + 625 + def mock_get_muse_configs(schedule=None, **kwargs): 626 + return { 627 + "pulse": { 628 + "priority": 99, 629 + "type": "cogitate", 630 + "schedule": "segment", 631 + }, 632 + } 633 + 634 + monkeypatch.setattr(dream, "cortex_request", mock_cortex_request) 635 + monkeypatch.setattr(dream, "wait_for_agents", mock_wait_for_agents) 636 + monkeypatch.setattr(dream, "get_muse_configs", mock_get_muse_configs) 637 + monkeypatch.setattr(dream, "get_enabled_facets", lambda: {}) 638 + monkeypatch.setattr(dream, "get_active_facets", lambda day: set()) 639 + monkeypatch.setattr(dream, "_callosum", None) 640 + 641 + for _ in range(5): 642 + dream.run_prompts_by_priority( 643 + "20240115", "120000_300", refresh=False, verbose=False 644 + ) 645 + 646 + assert spawned == [] 647 + 648 + dream.run_prompts_by_priority( 649 + "20240115", "120000_300", refresh=False, verbose=False 650 + ) 651 + assert spawned == ["pulse"] 652 + 653 + def test_pulse_counter_resets_on_activity_change(self, segment_dir, monkeypatch): 654 + """Pulse runs immediately when activity_state changes.""" 655 + from think import dream 656 + 657 + current_dir = segment_dir.parent / "120500_300" 658 + (current_dir / "agents").mkdir(parents=True) 659 + (current_dir / "agents" / "facets.json").write_text( 660 + json.dumps([{"facet": "work", "activity": "Coding", "level": "high"}]) 661 + ) 662 + 663 + spawned = [] 664 + 665 + def mock_cortex_request(prompt, name, config=None): 666 + spawned.append(name) 667 + return f"agent-{name}" 668 + 669 + def mock_wait_for_agents(agent_ids, timeout=600): 670 + return ({aid: "finish" for aid in agent_ids}, []) 671 + 672 + def mock_get_muse_configs(schedule=None, **kwargs): 673 + return { 674 + "activity_state": { 675 + "priority": 95, 676 + "type": "generate", 677 + "multi_facet": True, 678 + "output": "json", 679 + "schedule": "segment", 680 + }, 681 + "pulse": { 682 + "priority": 99, 683 + "type": "cogitate", 684 + "schedule": "segment", 685 + }, 686 + } 687 + 688 + monkeypatch.setattr(dream, "cortex_request", mock_cortex_request) 689 + monkeypatch.setattr(dream, "wait_for_agents", mock_wait_for_agents) 690 + monkeypatch.setattr(dream, "get_muse_configs", mock_get_muse_configs) 691 + monkeypatch.setattr(dream, "get_enabled_facets", lambda: {"work": {"title": "Work"}}) 692 + monkeypatch.setattr(dream, "load_segment_facets", lambda *args, **kwargs: ["work"]) 693 + monkeypatch.setattr(dream, "_detect_activity_state_change", lambda *args, **kwargs: True) 694 + monkeypatch.setattr(dream, "_callosum", None) 695 + 696 + dream.run_prompts_by_priority( 697 + "20240115", "120000_300", refresh=False, verbose=False 698 + ) 699 + 700 + assert spawned == ["activity_state", "pulse"] 701 + 702 + def test_activity_state_carry_forward_writes_output(self, segment_dir, monkeypatch): 703 + """Cached activity_state is written directly when classification matches.""" 704 + from think import callosum, dream 705 + 706 + current_dir = segment_dir.parent / "120500_300" 707 + (current_dir / "agents").mkdir(parents=True) 708 + (current_dir / "agents" / "facets.json").write_text( 709 + json.dumps([{"facet": "work", "activity": "Coding", "level": "high"}]) 710 + ) 711 + 712 + sent = [] 713 + monkeypatch.setattr( 714 + callosum, 715 + "callosum_send", 716 + lambda tract, event, **kwargs: sent.append((tract, event, kwargs)), 717 + ) 718 + 719 + cache = { 720 + "default:work": { 721 + "state": [ 722 + { 723 + "id": "coding_120000_300", 724 + "activity": "coding", 725 + "state": "active", 726 + "since": "120000_300", 727 + "description": "Coding", 728 + "level": "high", 729 + } 730 + ], 731 + "facet_classification": "Coding", 732 + "segment": "120000_300", 733 + "carry_count": 1, 734 + "updated_at": 1, 735 + } 736 + } 737 + 738 + carried = dream._try_carry_forward_activity_state( 739 + day="20240115", 740 + segment="120500_300", 741 + stream="default", 742 + facet="work", 743 + cache=cache, 744 + ) 745 + 746 + assert carried is True 747 + output_path = ( 748 + segment_dir.parent / "120500_300" / "agents" / "work" / "activity_state.json" 749 + ) 750 + assert output_path.exists() 751 + assert sent[0][0:2] == ("activity", "live") 752 + assert cache["default:work"]["carry_count"] == 2 753 + 754 + def test_activity_state_carry_forward_requires_matching_classification( 755 + self, segment_dir 756 + ): 757 + from think import dream 758 + 759 + current_dir = segment_dir.parent / "120500_300" 760 + (current_dir / "agents").mkdir(parents=True) 761 + (current_dir / "agents" / "facets.json").write_text( 762 + json.dumps([{"facet": "work", "activity": "Email", "level": "high"}]) 763 + ) 764 + 765 + cache = { 766 + "default:work": { 767 + "state": [], 768 + "facet_classification": "Coding", 769 + "segment": "120000_300", 770 + "carry_count": 1, 771 + "updated_at": 1, 772 + } 773 + } 774 + 775 + assert ( 776 + dream._try_carry_forward_activity_state( 777 + day="20240115", 778 + segment="120500_300", 779 + stream="default", 780 + facet="work", 781 + cache=cache, 782 + ) 783 + is False 784 + ) 785 + 786 + def test_activity_state_carry_forward_respects_gap(self, segment_dir): 787 + from think import dream 788 + 789 + current_dir = segment_dir.parent / "120500_300" 790 + (current_dir / "agents").mkdir(parents=True) 791 + (current_dir / "agents" / "facets.json").write_text( 792 + json.dumps([{"facet": "work", "activity": "Coding", "level": "high"}]) 793 + ) 794 + 795 + cache = { 796 + "default:work": { 797 + "state": [], 798 + "facet_classification": "Coding", 799 + "segment": "100000_300", 800 + "carry_count": 1, 801 + "updated_at": 1, 802 + } 803 + } 804 + 805 + assert ( 806 + dream._try_carry_forward_activity_state( 807 + day="20240115", 808 + segment="120500_300", 809 + stream="default", 810 + facet="work", 811 + cache=cache, 812 + ) 813 + is False 814 + ) 815 + 816 + def test_activity_state_carry_forward_respects_carry_limit(self, segment_dir): 817 + from think import dream 818 + 819 + current_dir = segment_dir.parent / "120500_300" 820 + (current_dir / "agents").mkdir(parents=True) 821 + (current_dir / "agents" / "facets.json").write_text( 822 + json.dumps([{"facet": "work", "activity": "Coding", "level": "high"}]) 823 + ) 824 + 825 + cache = { 826 + "default:work": { 827 + "state": [], 828 + "facet_classification": "Coding", 829 + "segment": "120000_300", 830 + "carry_count": 6, 831 + "updated_at": 1, 832 + } 833 + } 834 + 835 + assert ( 836 + dream._try_carry_forward_activity_state( 837 + day="20240115", 838 + segment="120500_300", 839 + stream="default", 840 + facet="work", 841 + cache=cache, 842 + ) 843 + is False 844 + ) 845 + 846 + def test_activity_state_carry_forward_preserves_incremented_cache( 847 + self, segment_dir, monkeypatch 848 + ): 849 + from think import dream 850 + 851 + current_segment = "120500_300" 852 + current_dir = segment_dir.parent / current_segment 853 + (current_dir / "agents").mkdir(parents=True) 854 + (current_dir / "agents" / "facets.json").write_text( 855 + json.dumps([{"facet": "work", "activity": "Coding", "level": "high"}]), 856 + encoding="utf-8", 857 + ) 858 + 859 + cache_path = dream._activity_state_cache_path("20240115") 860 + cache_path.parent.mkdir(parents=True, exist_ok=True) 861 + cache_path.write_text( 862 + json.dumps( 863 + { 864 + "default:work": { 865 + "state": [ 866 + { 867 + "id": "coding_120000_300", 868 + "activity": "coding", 869 + "state": "active", 870 + "since": "120000_300", 871 + "description": "Coding", 872 + "level": "high", 873 + } 874 + ], 875 + "facet_classification": "Coding", 876 + "segment": "120000_300", 877 + "carry_count": 1, 878 + "updated_at": 1, 879 + } 880 + } 881 + ), 882 + encoding="utf-8", 883 + ) 884 + 885 + def mock_wait_for_agents(agent_ids, timeout=600): 886 + return ({aid: "finish" for aid in agent_ids}, []) 887 + 888 + def mock_get_muse_configs(schedule=None, **kwargs): 889 + return { 890 + "activity_state": { 891 + "priority": 95, 892 + "type": "generate", 893 + "multi_facet": True, 894 + "output": "json", 895 + "schedule": "segment", 896 + }, 897 + } 898 + 899 + monkeypatch.setattr(dream, "cortex_request", lambda *args, **kwargs: None) 900 + monkeypatch.setattr(dream, "wait_for_agents", mock_wait_for_agents) 901 + monkeypatch.setattr(dream, "get_muse_configs", mock_get_muse_configs) 902 + monkeypatch.setattr(dream, "get_enabled_facets", lambda: {"work": {"title": "Work"}}) 903 + monkeypatch.setattr(dream, "load_segment_facets", lambda *args, **kwargs: ["work"]) 904 + monkeypatch.setattr(dream, "_callosum", None) 905 + 906 + dream.run_prompts_by_priority( 907 + "20240115", current_segment, refresh=False, verbose=False 908 + ) 909 + 910 + cache = json.loads(cache_path.read_text(encoding="utf-8")) 911 + assert cache["default:work"]["carry_count"] == 2 912 + assert cache["default:work"]["segment"] == current_segment
+55
tests/test_entities_hook.py
··· 1 + # SPDX-License-Identifier: AGPL-3.0-only 2 + # Copyright (c) 2026 sol pbc 3 + 4 + """Tests for entity hook behavior across schedules.""" 5 + 6 + from __future__ import annotations 7 + 8 + import json 9 + from pathlib import Path 10 + 11 + 12 + def test_entities_post_process_writes_without_segment(tmp_path): 13 + from muse.entities import post_process 14 + 15 + output_path = ( 16 + tmp_path 17 + / "facets" 18 + / "work" 19 + / "activities" 20 + / "20240115" 21 + / "coding_120000_300" 22 + / "entities.md" 23 + ) 24 + result = "* Person: Alice Smith - Mentioned in the meeting\n" 25 + 26 + post_process(result, {"output_path": str(output_path)}) 27 + 28 + entities_path = output_path.parent / "entities.jsonl" 29 + assert entities_path.exists() 30 + rows = [json.loads(line) for line in entities_path.read_text(encoding="utf-8").splitlines()] 31 + assert rows == [ 32 + { 33 + "type": "Person", 34 + "name": "Alice Smith", 35 + "description": "Mentioned in the meeting", 36 + } 37 + ] 38 + 39 + 40 + def test_entities_post_process_requires_output_path(caplog): 41 + from muse.entities import post_process 42 + 43 + post_process("* Person: Alice Smith - Mentioned in the meeting\n", {}) 44 + 45 + assert "missing output_path" in caplog.text 46 + 47 + 48 + def test_entities_muse_is_activity_scheduled(): 49 + from think.muse import get_muse_configs 50 + 51 + segment_prompts = get_muse_configs(schedule="segment") 52 + activity_prompts = get_muse_configs(schedule="activity") 53 + 54 + assert "entities" not in segment_prompts 55 + assert activity_prompts["entities"]["activities"] == ["*"]
+73
tests/test_segment_gate.py
··· 1 + # SPDX-License-Identifier: AGPL-3.0-only 2 + # Copyright (c) 2026 sol pbc 3 + 4 + """Tests for segment density classification.""" 5 + 6 + from __future__ import annotations 7 + 8 + import json 9 + 10 + 11 + def test_missing_segment_dir_is_active(tmp_path, monkeypatch): 12 + from think.dream import _classify_segment_density 13 + 14 + monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(tmp_path / "journal")) 15 + assert _classify_segment_density("20240115", "120000_300", "default") == "active" 16 + 17 + 18 + def test_idle_segment(tmp_path, monkeypatch): 19 + from think.dream import _classify_segment_density 20 + 21 + seg_dir = tmp_path / "journal" / "20240115" / "default" / "120000_300" 22 + seg_dir.mkdir(parents=True) 23 + (seg_dir / "audio.jsonl").write_text(json.dumps({"raw": "audio.flac"}) + "\n") 24 + 25 + monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(tmp_path / "journal")) 26 + assert _classify_segment_density("20240115", "120000_300", "default") == "idle" 27 + 28 + 29 + def test_low_change_segment(tmp_path, monkeypatch): 30 + from think.dream import _classify_segment_density 31 + 32 + seg_dir = tmp_path / "journal" / "20240115" / "default" / "120000_300" 33 + seg_dir.mkdir(parents=True) 34 + audio_lines = [json.dumps({"raw": "audio.flac"})] 35 + audio_lines.extend( 36 + json.dumps({"start": f"00:00:0{i}", "text": "line"}) for i in range(1, 9) 37 + ) 38 + (seg_dir / "audio.jsonl").write_text("\n".join(audio_lines) + "\n") 39 + screen_lines = [json.dumps({"raw": "screen.webm"})] 40 + screen_lines.extend( 41 + json.dumps({"timestamp": i, "analysis": {"visual_description": "screen"}}) 42 + for i in range(3) 43 + ) 44 + (seg_dir / "screen.jsonl").write_text("\n".join(screen_lines) + "\n") 45 + 46 + monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(tmp_path / "journal")) 47 + assert _classify_segment_density("20240115", "120000_300", "default") == "low_change" 48 + 49 + 50 + def test_active_segment_with_imported_md(tmp_path, monkeypatch): 51 + from think.dream import _classify_segment_density 52 + 53 + seg_dir = tmp_path / "journal" / "20240115" / "default" / "120000_300" 54 + seg_dir.mkdir(parents=True) 55 + (seg_dir / "imported.md").write_text("\n".join(f"line {i}" for i in range(12)) + "\n") 56 + 57 + monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(tmp_path / "journal")) 58 + assert _classify_segment_density("20240115", "120000_300", "default") == "active" 59 + 60 + 61 + def test_tmux_screen_has_no_header(tmp_path, monkeypatch): 62 + from think.dream import _classify_segment_density 63 + 64 + seg_dir = tmp_path / "journal" / "20240115" / "default" / "120000_300" 65 + seg_dir.mkdir(parents=True) 66 + screen_lines = [ 67 + json.dumps({"frame_id": 1, "timestamp": 0, "analysis": {"primary": "tmux"}}), 68 + json.dumps({"frame_id": 2, "timestamp": 1, "analysis": {"primary": "tmux"}}), 69 + ] 70 + (seg_dir / "tmux_0_screen.jsonl").write_text("\n".join(screen_lines) + "\n") 71 + 72 + monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(tmp_path / "journal")) 73 + assert _classify_segment_density("20240115", "120000_300", "default") == "low_change"
+467
think/dream.py
··· 10 10 11 11 import argparse 12 12 import fnmatch 13 + import json 13 14 import logging 14 15 import sys 15 16 import threading ··· 304 305 return (success, failed, failed_names) 305 306 306 307 308 + def _segment_dir(day: str, segment: str, stream: str | None) -> Path: 309 + """Return the expected segment directory without creating it.""" 310 + return day_path(day) / (stream or "default") / segment 311 + 312 + 313 + def _resolve_segment_dir( 314 + day: str, 315 + segment: str, 316 + stream: str | None, 317 + ) -> Path | None: 318 + """Resolve a segment directory, searching across streams when needed.""" 319 + if stream: 320 + path = _segment_dir(day, segment, stream) 321 + return path if path.is_dir() else None 322 + 323 + for seg_stream, seg_key, seg_path in iter_segments(day): 324 + if seg_key == segment: 325 + return seg_path 326 + return None 327 + 328 + 329 + def _load_json_file(path: Path, default: object) -> object: 330 + """Load JSON from a file, returning the provided default on failure.""" 331 + if not path.exists(): 332 + return default 333 + try: 334 + return json.loads(path.read_text(encoding="utf-8")) 335 + except (json.JSONDecodeError, OSError): 336 + return default 337 + 338 + 339 + def _write_json_atomic(path: Path, data: object) -> None: 340 + """Atomically write JSON data to a file.""" 341 + path.parent.mkdir(parents=True, exist_ok=True) 342 + tmp = path.with_suffix(f"{path.suffix}.tmp") 343 + tmp.write_text(json.dumps(data), encoding="utf-8") 344 + tmp.replace(path) 345 + 346 + 347 + def _count_nonempty_lines(path: Path) -> list[str]: 348 + """Return non-empty lines from a text file.""" 349 + try: 350 + return [line for line in path.read_text(encoding="utf-8").splitlines() if line] 351 + except OSError: 352 + return [] 353 + 354 + 355 + def _load_segment_facet_rows( 356 + day: str, 357 + segment: str, 358 + stream: str | None, 359 + ) -> list[dict]: 360 + """Load raw facets.json rows for a segment.""" 361 + seg_dir = _resolve_segment_dir(day, segment, stream) 362 + if not seg_dir: 363 + return [] 364 + 365 + facets_path = seg_dir / "agents" / "facets.json" 366 + data = _load_json_file(facets_path, []) 367 + return data if isinstance(data, list) else [] 368 + 369 + 370 + def _has_audio_embeddings(seg_dir: Path) -> bool: 371 + """Return True when a segment has audio embedding files.""" 372 + for npz_path in seg_dir.glob("*.npz"): 373 + if npz_path.stem == "audio" or npz_path.stem.endswith("_audio"): 374 + return True 375 + return False 376 + 377 + 378 + def _should_skip_preflight( 379 + prompt_name: str, 380 + *, 381 + day: str, 382 + segment: str | None, 383 + stream: str | None, 384 + ) -> tuple[bool, str | None]: 385 + """Return whether a prompt can be skipped before sending a cortex request.""" 386 + if not segment: 387 + return (False, None) 388 + 389 + if prompt_name == "firstday_checkin": 390 + from think.awareness import get_onboarding 391 + 392 + onboarding = get_onboarding() 393 + if onboarding.get("status") != "complete": 394 + return (True, "preflight:not_complete") 395 + if onboarding.get("firstday_checkin_sent"): 396 + return (True, "preflight:already_sent") 397 + return (False, None) 398 + 399 + if prompt_name == "observation": 400 + from think.awareness import get_onboarding 401 + 402 + onboarding = get_onboarding() 403 + if onboarding.get("status") != "observing": 404 + return (True, "preflight:not_observing") 405 + return (False, None) 406 + 407 + seg_dir = _resolve_segment_dir(day, segment, stream) 408 + if seg_dir is None: 409 + return (False, None) 410 + 411 + if prompt_name == "speaker_attribution": 412 + if not _has_audio_embeddings(seg_dir): 413 + return (True, "preflight:no_embeddings") 414 + return (False, None) 415 + 416 + if prompt_name == "speakers": 417 + transcript_files = sorted(seg_dir.glob("audio.jsonl")) 418 + transcript_files.extend(sorted(seg_dir.glob("*_audio.jsonl"))) 419 + transcript_files.extend(sorted(seg_dir.glob("*_transcript.jsonl"))) 420 + if not transcript_files: 421 + return (True, "preflight:no_transcripts") 422 + 423 + speakers: set[str] = set() 424 + for transcript_path in transcript_files: 425 + lines = _count_nonempty_lines(transcript_path) 426 + for line in lines[1:]: 427 + try: 428 + entry = json.loads(line) 429 + except json.JSONDecodeError: 430 + continue 431 + speaker = entry.get("speaker") 432 + if speaker is not None: 433 + speakers.add(str(speaker)) 434 + if len(speakers) < 2: 435 + return (True, "preflight:single_speaker") 436 + return (False, None) 437 + 438 + if prompt_name == "activities": 439 + from muse.activity_state import find_previous_segment 440 + 441 + previous_segment = find_previous_segment(day, segment, stream=stream) 442 + if not previous_segment: 443 + return (True, "preflight:no_previous_segment") 444 + 445 + prev_dir = _resolve_segment_dir(day, previous_segment, stream) 446 + if prev_dir is None: 447 + return (True, "preflight:no_previous_activity_state") 448 + 449 + agents_dir = prev_dir / "agents" 450 + for facet_dir in sorted(agents_dir.iterdir()) if agents_dir.is_dir() else []: 451 + if facet_dir.is_dir() and (facet_dir / "activity_state.json").exists(): 452 + return (False, None) 453 + return (True, "preflight:no_previous_activity_state") 454 + 455 + return (False, None) 456 + 457 + 458 + def _classify_segment_density( 459 + day: str, 460 + segment: str, 461 + stream: str | None, 462 + ) -> str: 463 + """Classify segment content density as 'idle', 'low_change', or 'active'.""" 464 + seg_dir = _segment_dir(day, segment, stream) 465 + if not seg_dir.exists(): 466 + return "active" 467 + 468 + transcript_lines = 0 469 + transcript_files = sorted(seg_dir.glob("audio.jsonl")) 470 + transcript_files.extend(sorted(seg_dir.glob("*_audio.jsonl"))) 471 + transcript_files.extend(sorted(seg_dir.glob("*_transcript.jsonl"))) 472 + for transcript_path in transcript_files: 473 + lines = _count_nonempty_lines(transcript_path) 474 + transcript_lines += max(0, len(lines) - 1) 475 + 476 + imported_md = seg_dir / "imported.md" 477 + if imported_md.exists(): 478 + transcript_lines += len(_count_nonempty_lines(imported_md)) 479 + 480 + screen_frames = 0 481 + screen_files = sorted(seg_dir.glob("screen.jsonl")) 482 + screen_files.extend(sorted(seg_dir.glob("*_screen.jsonl"))) 483 + for screen_path in screen_files: 484 + lines = _count_nonempty_lines(screen_path) 485 + if not lines: 486 + continue 487 + subtract_header = False 488 + try: 489 + first_entry = json.loads(lines[0]) 490 + subtract_header = isinstance(first_entry, dict) and "raw" in first_entry 491 + except json.JSONDecodeError: 492 + subtract_header = False 493 + screen_frames += max(0, len(lines) - 1 if subtract_header else len(lines)) 494 + 495 + if transcript_lines < 3 and screen_frames < 2: 496 + return "idle" 497 + if transcript_lines < 10 and screen_frames < 5: 498 + return "low_change" 499 + return "active" 500 + 501 + 502 + def _activity_state_cache_path(day: str) -> Path: 503 + return day_path(day) / "health" / "activity_state_cache.json" 504 + 505 + 506 + def _load_activity_state_cache(day: str) -> dict: 507 + data = _load_json_file(_activity_state_cache_path(day), {}) 508 + return data if isinstance(data, dict) else {} 509 + 510 + 511 + def _save_activity_state_cache(day: str, data: dict) -> None: 512 + _write_json_atomic(_activity_state_cache_path(day), data) 513 + 514 + 515 + def _emit_activity_live_entries( 516 + *, 517 + day: str, 518 + segment: str, 519 + facet: str, 520 + entries: list[dict], 521 + ) -> None: 522 + """Emit activity.live events for active entries.""" 523 + from think.callosum import callosum_send 524 + 525 + for entry in entries: 526 + if entry.get("state") != "active": 527 + continue 528 + try: 529 + callosum_send( 530 + "activity", 531 + "live", 532 + facet=facet, 533 + day=day, 534 + segment=segment, 535 + id=entry["id"], 536 + activity=entry["activity"], 537 + since=entry["since"], 538 + description=entry.get("description", ""), 539 + level=entry.get("level", "medium"), 540 + active_entities=entry.get("active_entities", []), 541 + ) 542 + except Exception as exc: 543 + logging.warning( 544 + "Failed to emit carried activity.live for %s/%s: %s", 545 + facet, 546 + entry.get("id", "unknown"), 547 + exc, 548 + ) 549 + 550 + 551 + def _try_carry_forward_activity_state( 552 + *, 553 + day: str, 554 + segment: str, 555 + stream: str | None, 556 + facet: str, 557 + cache: dict, 558 + ) -> bool: 559 + """Try to carry forward cached activity_state for a facet.""" 560 + cache_key = f"{stream or 'default'}:{facet}" 561 + entry = cache.get(cache_key) 562 + if not isinstance(entry, dict): 563 + return False 564 + 565 + if int(entry.get("carry_count", 0)) >= 6: 566 + return False 567 + 568 + cached_segment = entry.get("segment", "") 569 + if not cached_segment: 570 + return False 571 + 572 + current_start, _current_end = segment_parse(segment) 573 + _cached_start, cached_end = segment_parse(cached_segment) 574 + if current_start is None or cached_end is None: 575 + return False 576 + 577 + current_dt = datetime.combine(date.today(), current_start) 578 + cached_end_dt = datetime.combine(date.today(), cached_end) 579 + gap = current_dt - cached_end_dt 580 + if gap < timedelta(0) or gap > timedelta(minutes=10): 581 + return False 582 + 583 + facet_rows = _load_segment_facet_rows(day, segment, stream) 584 + facet_row = next((row for row in facet_rows if row.get("facet") == facet), None) 585 + if not facet_row: 586 + return False 587 + if facet_row.get("activity", "") != entry.get("facet_classification", ""): 588 + return False 589 + 590 + state = entry.get("state") 591 + if not isinstance(state, list): 592 + return False 593 + 594 + seg_dir = _segment_dir(day, segment, stream) 595 + output_path = seg_dir / "agents" / facet / "activity_state.json" 596 + _write_json_atomic(output_path, state) 597 + _emit_activity_live_entries(day=day, segment=segment, facet=facet, entries=state) 598 + 599 + entry["carry_count"] = int(entry.get("carry_count", 0)) + 1 600 + entry["segment"] = segment 601 + entry["updated_at"] = int(time.time()) 602 + cache[cache_key] = entry 603 + return True 604 + 605 + 606 + def _refresh_activity_state_cache( 607 + *, 608 + day: str, 609 + segment: str, 610 + stream: str | None, 611 + facets: list[str], 612 + cache: dict, 613 + ) -> None: 614 + """Refresh activity_state cache entries from newly-written output files.""" 615 + if not facets: 616 + return 617 + 618 + facet_rows = _load_segment_facet_rows(day, segment, stream) 619 + facet_activity = { 620 + row.get("facet"): row.get("activity", "") 621 + for row in facet_rows 622 + if isinstance(row, dict) and row.get("facet") 623 + } 624 + seg_dir = _segment_dir(day, segment, stream) 625 + 626 + for facet in facets: 627 + state_path = seg_dir / "agents" / facet / "activity_state.json" 628 + data = _load_json_file(state_path, None) 629 + if not isinstance(data, list): 630 + continue 631 + 632 + cache[f"{stream or 'default'}:{facet}"] = { 633 + "state": data, 634 + "facet_classification": facet_activity.get(facet, ""), 635 + "segment": segment, 636 + "carry_count": 0, 637 + "updated_at": int(time.time()), 638 + } 639 + 640 + 641 + def _pulse_counter_path(day: str) -> Path: 642 + return day_path(day) / "health" / "pulse_counter.json" 643 + 644 + 645 + def _load_pulse_counter(day: str) -> dict: 646 + data = _load_json_file(_pulse_counter_path(day), {"count": 0, "last_segment": ""}) 647 + return data if isinstance(data, dict) else {"count": 0, "last_segment": ""} 648 + 649 + 650 + def _save_pulse_counter(day: str, data: dict) -> None: 651 + _write_json_atomic(_pulse_counter_path(day), data) 652 + 653 + 654 + def _active_activity_keys_for_segment( 655 + day: str, 656 + segment: str, 657 + stream: str | None, 658 + ) -> set[tuple[str, str, str]]: 659 + """Return active (facet, activity, since) tuples for a segment.""" 660 + seg_dir = _resolve_segment_dir(day, segment, stream) 661 + if not seg_dir: 662 + return set() 663 + 664 + keys: set[tuple[str, str, str]] = set() 665 + agents_dir = seg_dir / "agents" 666 + if not agents_dir.is_dir(): 667 + return keys 668 + 669 + for facet_dir in sorted(agents_dir.iterdir()): 670 + if not facet_dir.is_dir(): 671 + continue 672 + state_path = facet_dir / "activity_state.json" 673 + data = _load_json_file(state_path, None) 674 + if not isinstance(data, list): 675 + continue 676 + for item in data: 677 + if item.get("state") == "active": 678 + keys.add( 679 + ( 680 + facet_dir.name, 681 + str(item.get("activity", "")), 682 + str(item.get("since", "")), 683 + ) 684 + ) 685 + return keys 686 + 687 + 688 + def _detect_activity_state_change( 689 + day: str, 690 + segment: str, 691 + stream: str | None, 692 + ) -> bool: 693 + """Check whether activity_state changed for this segment.""" 694 + from muse.activity_state import find_previous_segment 695 + 696 + previous_segment = find_previous_segment(day, segment, stream=stream) 697 + if not previous_segment: 698 + return True 699 + 700 + current_keys = _active_activity_keys_for_segment(day, segment, stream) 701 + previous_keys = _active_activity_keys_for_segment(day, previous_segment, stream) 702 + return current_keys != previous_keys 703 + 704 + 307 705 def run_prompts_by_priority( 308 706 day: str, 309 707 segment: str | None, ··· 347 745 priority = config["priority"] # Required field, validated by get_muse_configs 348 746 priority_groups.setdefault(priority, []).append((name, config)) 349 747 748 + segment_density = "active" 749 + if segment: 750 + segment_density = _classify_segment_density(day, segment, stream) 751 + if segment_density != "active": 752 + logging.info("Segment %s classified as %s", segment, segment_density) 753 + 754 + activity_changed = False 755 + as_cache = _load_activity_state_cache(day) if segment else {} 756 + 350 757 # Pre-compute shared data for multi-facet prompts 351 758 day_formatted = iso_date(day) 352 759 input_summary = day_input_summary(day) ··· 392 799 _update_status(current_group_priority=priority) 393 800 logging.info(f"Starting priority {priority} ({len(prompts_list)} prompts)") 394 801 802 + if priority == 10 and segment_density != "active": 803 + logging.info( 804 + "Skipping priority-10 group (%d agents): segment %s is %s", 805 + len(prompts_list), 806 + segment, 807 + segment_density, 808 + ) 809 + continue 810 + 395 811 emit( 396 812 "group_started", 397 813 mode=target_schedule, ··· 406 822 if segment: 407 823 raw_facets = load_segment_facets(day, segment, stream=stream) 408 824 active_facets = set(f for f in raw_facets if f in enabled_facets) 825 + activity_state_llm_facets: set[str] = set() 409 826 410 827 spawned: list[ 411 828 tuple[str, str, dict, str | None] ··· 425 842 ) 426 843 continue 427 844 845 + skip, skip_reason = _should_skip_preflight( 846 + prompt_name, 847 + day=day, 848 + segment=segment, 849 + stream=stream, 850 + ) 851 + if skip: 852 + logging.info("Skipping %s: %s", prompt_name, skip_reason) 853 + continue 854 + 428 855 # Skip pulse when sol/pulse.md is already current for this segment 429 856 if prompt_name == "pulse" and segment and not refresh: 430 857 try: ··· 442 869 except Exception: 443 870 pass 444 871 872 + if not activity_changed: 873 + pulse_counter = _load_pulse_counter(day) 874 + if pulse_counter.get("count", 0) < 5: 875 + pulse_counter["count"] = int(pulse_counter.get("count", 0)) + 1 876 + pulse_counter["last_segment"] = segment 877 + _save_pulse_counter(day, pulse_counter) 878 + logging.info( 879 + "Skipping pulse: counter %d/6 for %s", 880 + pulse_counter["count"], 881 + segment, 882 + ) 883 + continue 884 + _save_pulse_counter(day, {"count": 0, "last_segment": segment}) 885 + else: 886 + _save_pulse_counter(day, {"count": 0, "last_segment": segment}) 887 + 445 888 try: 446 889 if config.get("multi_facet"): 447 890 always_run = config.get("always", False) ··· 455 898 continue 456 899 457 900 logging.info(f"Spawning {prompt_name} for facet: {facet_name}") 901 + if prompt_name == "activity_state" and segment: 902 + if not refresh and _try_carry_forward_activity_state( 903 + day=day, 904 + segment=segment, 905 + stream=stream, 906 + facet=facet_name, 907 + cache=as_cache, 908 + ): 909 + logging.info( 910 + "Carry-forward activity_state for %s", facet_name 911 + ) 912 + continue 913 + activity_state_llm_facets.add(facet_name) 458 914 459 915 # Always pass day for instructions.day context 460 916 request_config: dict = {"facet": facet_name, "day": day} ··· 614 1070 + group_failed, 615 1071 current_agents=[], 616 1072 ) 1073 + 1074 + if priority == 95 and segment: 1075 + _refresh_activity_state_cache( 1076 + day=day, 1077 + segment=segment, 1078 + stream=stream, 1079 + facets=sorted(activity_state_llm_facets), 1080 + cache=as_cache, 1081 + ) 1082 + _save_activity_state_cache(day, as_cache) 1083 + activity_changed = _detect_activity_state_change(day, segment, stream) 617 1084 618 1085 total_success += group_success 619 1086 total_failed += group_failed