Merge branch 'hopper-layn7pdo-dream-efficiency'

+5 -3

muse/entities.md

··· 4 4 "title": "Entity Extraction", 5 5 "description": "Extracts people, companies, projects, and tools from segment content", 6 6 "color": "#2e7d32", 7 - "schedule": "segment", 7 + "schedule": "activity", 8 + "activities": ["*"], 8 9 "priority": 10, 9 10 "hook": {"post": "entities"}, 10 11 "thinking_budget": 4096, ··· 12 13 "output": "md", 13 14 "instructions": { 14 15 "sources": {"transcripts": true, "percepts": true, "agents": false}, 15 - "facets": false 16 + "facets": false, 17 + "activity": true 16 18 } 17 19 18 20 } 19 21 20 - $segment_preamble 22 + $activity_preamble 21 23 22 24 Extract named entities and descriptions from the given segment transcription document. 23 25

+16 -12

muse/entities.py

··· 5 5 6 6 This hook is invoked via "hook": {"post": "entities"} in generator frontmatter. 7 7 It parses the markdown entity list and writes deduplicated entities 8 - to a JSONL file in the segment directory. 8 + to a JSONL file next to the agent output. 9 9 """ 10 10 11 11 import json ··· 65 65 66 66 67 67 def post_process(result: str, context: dict) -> str | None: 68 - """Parse entity list and write to segment JSONL file. 68 + """Parse entity list and write to an adjacent JSONL file. 69 69 70 70 Args: 71 71 result: The generated output content (markdown entity list). ··· 75 75 Returns: 76 76 None - this hook does not modify the output result. 77 77 """ 78 - segment = context.get("segment") 79 - if not segment: 80 - logging.warning("entities hook requires segment mode") 81 - return None 82 - 83 78 # Parse entities from result 84 79 entities = [] 85 80 unparsed = [] ··· 94 89 unparsed.append(line) 95 90 96 91 if unparsed: 97 - print(f"Warning: {len(unparsed)} unparsed entity lines:") 92 + logging.warning("entities hook: %d unparsed entity lines", len(unparsed)) 98 93 for line in unparsed: 99 - print(f" {line}") 94 + logging.warning("entities hook: unparsed line: %s", line) 100 95 101 96 if not entities: 102 97 logging.info("entities hook: no entities extracted") ··· 119 114 ) 120 115 121 116 # Write entities.jsonl alongside the agent output in the agents/ directory 122 - output_path = Path(context.get("output_path", "")) 117 + output_path_value = context.get("output_path") 118 + if not output_path_value: 119 + logging.error("entities hook: missing output_path in context") 120 + return None 121 + 122 + output_path = Path(output_path_value) 123 123 agents_dir = output_path.parent 124 124 jsonl_path = agents_dir / "entities.jsonl" 125 125 126 126 # Write JSONL file 127 127 try: 128 128 jsonl_path.parent.mkdir(parents=True, exist_ok=True) 129 - with open(jsonl_path, "w") as f: 129 + with open(jsonl_path, "w", encoding="utf-8") as f: 130 130 for entity in unique_entities: 131 131 f.write(json.dumps(entity) + "\n") 132 - print(f"Entities written to: {jsonl_path} ({len(unique_entities)} entities)") 132 + logging.info( 133 + "entities hook: wrote %d entities to %s", 134 + len(unique_entities), 135 + jsonl_path, 136 + ) 133 137 except Exception as e: 134 138 logging.error("entities hook: failed to write JSONL: %s", e) 135 139

+1 -1

tests/baselines/api/agents/agents-day.json

··· 105 105 "description": "Extracts people, companies, projects, and tools from segment content", 106 106 "multi_facet": false, 107 107 "output_format": "md", 108 - "schedule": "segment", 108 + "schedule": "activity", 109 109 "source": "system", 110 110 "title": "Entity Extraction", 111 111 "type": "generate"

-11

tests/baselines/api/entities/journal-entities.json

··· 25 25 { 26 26 "aka": [], 27 27 "blocked": false, 28 - "facets": [], 29 - "id": "person_a", 30 - "is_principal": false, 31 - "last_active_ts": 0, 32 - "name": "Person A", 33 - "total_observation_count": 0, 34 - "type": "Person" 35 - }, 36 - { 37 - "aka": [], 38 - "blocked": false, 39 28 "facets": [ 40 29 { 41 30 "attached_at": null,

+1 -1

tests/baselines/api/search/day-results.json

··· 14 14 "id": "20260304/agents/knowledge_graph.md:7", 15 15 "idx": 7, 16 16 "path": "20260304/agents/knowledge_graph.md", 17 - "score": -2.6, 17 + "score": -2.0, 18 18 "stream": null, 19 19 "text": "# Part 1: Entity Extraction and Relationship Mapping\n\n## Relationship Mapping\n\n| Source Name | Target Name | Relationship Type | Context |\n| :--- | :--- | :--- | :--- |\n| **Romeo Montague** | **Juliet Capulet** | `met-at-conference` | First meeting at Denver Tech Summit keynote. |\n" 20 20 }

+34 -34

tests/baselines/api/search/search.json

··· 85 85 "id": "20260306/default/093000_300/agents/audio.md:0", 86 86 "idx": 0, 87 87 "path": "20260306/default/093000_300/agents/audio.md", 88 - "score": -2.6, 88 + "score": -1.7, 89 89 "stream": "default", 90 90 "text": "# Audio Summary Morning standup. Benvolio noticed Romeo's late-night GitHub activity and pressed him about API gateway commits. Romeo deflected, calling it a personal mesh routing prototype. Mercutio covered for him. Balthasar reported progress on the mesh routing fallback PR with an edge case for Romeo to review. Benvolio scheduled..." 91 91 }, ··· 101 101 "id": "facets/montague/entities/20260306.jsonl:0", 102 102 "idx": 0, 103 103 "path": "facets/montague/entities/20260306.jsonl", 104 - "score": -3.2, 104 + "score": -2.1, 105 105 "stream": null, 106 106 "text": "### Person: Romeo Montague\n\n\nContinued Verona Platform development\n\n" 107 107 }, ··· 117 117 "id": "facets/montague/entities/20260306.jsonl:3", 118 118 "idx": 3, 119 119 "path": "facets/montague/entities/20260306.jsonl", 120 - "score": -3.1, 120 + "score": -2.0, 121 121 "stream": null, 122 122 "text": "### Person: Balthasar Davi\n\n\nReviewed mesh routing PR with Romeo\n\n" 123 123 }, ··· 133 133 "id": "facets/montague/entities/20260306.jsonl:4", 134 134 "idx": 4, 135 135 "path": "facets/montague/entities/20260306.jsonl", 136 - "score": -3.2, 136 + "score": -2.0, 137 137 "stream": null, 138 138 "text": "### Person: Mercutio Escalus\n\n\nCovered for Romeo during standup\n\n" 139 139 }, ··· 149 149 "id": "20260306/default/093000_300/agents/screen.md:0", 150 150 "idx": 0, 151 151 "path": "20260306/default/093000_300/agents/screen.md", 152 - "score": -2.8, 152 + "score": -1.8, 153 153 "stream": "default", 154 154 "text": "# Screen Summary\n\nSlack standup channel. Benvolio questioning Romeo about late-night commits.\n" 155 155 } ··· 174 174 "id": "facets/verona/logs/20260309.jsonl:1", 175 175 "idx": 1, 176 176 "path": "facets/verona/logs/20260309.jsonl", 177 - "score": -2.4, 177 + "score": -1.6, 178 178 "stream": null, 179 179 "text": "### Deploy Complete by romeo_montague\n\n**Source:** deploy | **Time:** 13:45:00\n\n**Parameters:**\n- service: verona-gateway\n- version: 0.9.0\n" 180 180 }, ··· 190 190 "id": "20260309/default/090000_300/agents/audio.md:0", 191 191 "idx": 0, 192 192 "path": "20260309/default/090000_300/agents/audio.md", 193 - "score": -2.3, 193 + "score": -1.5, 194 194 "stream": "default", 195 195 "text": "# Audio Summary\n\nRomeo confessed the project to Benvolio and asked for infrastructure help. Benvolio agreed to spin up a Kubernetes staging cluster.\n" 196 196 }, ··· 206 206 "id": "facets/montague/entities/20260309.jsonl:0", 207 207 "idx": 0, 208 208 "path": "facets/montague/entities/20260309.jsonl", 209 - "score": -3.1, 209 + "score": -2.0, 210 210 "stream": null, 211 211 "text": "### Person: Romeo Montague\n\n\nConfessed project to Benvolio, preparing demo\n\n" 212 212 }, ··· 222 222 "id": "facets/montague/calendar/20260309.jsonl:0", 223 223 "idx": 0, 224 224 "path": "facets/montague/calendar/20260309.jsonl", 225 - "score": -2.6, 225 + "score": -1.7, 226 226 "stream": null, 227 227 "text": "### Event: Team Standup\n\n\n**Time Occurred:** 09:00 - 09:30\n**Participants:** Romeo Montague, Benvolio Montague\n\nDaily sync\n" 228 228 }, ··· 238 238 "id": "facets/verona/calendar/20260309.jsonl:0", 239 239 "idx": 0, 240 240 "path": "facets/verona/calendar/20260309.jsonl", 241 - "score": -2.3, 241 + "score": -1.5, 242 242 "stream": null, 243 243 "text": "### Event: Demo Sprint\n\n\n**Time Occurred:** 09:00 - 21:00\n**Participants:** Romeo Montague, Juliet Capulet, Benvolio Montague\n\nFull day board presentation preparation\n" 244 244 } ··· 263 263 "id": "20260307/default/100000_300/agents/audio.md:0", 264 264 "idx": 0, 265 265 "path": "20260307/default/100000_300/agents/audio.md", 266 - "score": -3.1, 266 + "score": -2.0, 267 267 "stream": "default", 268 268 "text": "# Audio Summary\n\nHeated confrontation. Tybalt Capulet accused Romeo of stealing Capulet IP. Mercutio defended Romeo and had his Capulet consulting contract terminated by Tybalt.\n" 269 269 }, ··· 279 279 "id": "20260307/default/150000_300/agents/audio.md:0", 280 280 "idx": 0, 281 281 "path": "20260307/default/150000_300/agents/audio.md", 282 - "score": -3.3, 282 + "score": -2.2, 283 283 "stream": "default", 284 284 "text": "# Audio Summary\n\nEmergency meeting at Montague Tech. Benvolio questioned Romeo about the secret project. Romeo clarified no company IP was shared. Team discussed legal exposure. Romeo proposed Professor Lawrence as mediator.\n" 285 285 }, ··· 295 295 "id": "facets/montague/entities/20260307.jsonl:0", 296 296 "idx": 0, 297 297 "path": "facets/montague/entities/20260307.jsonl", 298 - "score": -3.1, 298 + "score": -2.0, 299 299 "stream": null, 300 300 "text": "### Person: Romeo Montague\n\n\nConfronted by Tybalt, called emergency meeting\n\n" 301 301 }, ··· 311 311 "id": "facets/montague/calendar/20260307.jsonl:0", 312 312 "idx": 0, 313 313 "path": "facets/montague/calendar/20260307.jsonl", 314 - "score": -2.4, 314 + "score": -1.5, 315 315 "stream": null, 316 316 "text": "### Event: Emergency Team Meeting\n\n\n**Time Occurred:** 15:00 - 16:00\n**Participants:** Romeo Montague, Benvolio Montague\n\nCrisis response to Capulet situation\n" 317 317 }, ··· 327 327 "id": "facets/montague/events/20260307.jsonl:0", 328 328 "idx": 0, 329 329 "path": "facets/montague/events/20260307.jsonl", 330 - "score": -2.9, 330 + "score": -1.9, 331 331 "stream": null, 332 332 "text": "### Meeting: Confrontation with Tybalt\n\n\n**Time Occurred:** 10:00 - 10:30\n**Participants:** Romeo Montague, Tybalt Capulet, Mercutio Escalus\n\nTybalt accused Romeo of IP theft\n\nMercutio fired from Capulet contract\n" 333 333 } ··· 352 352 "id": "facets/montague/entities/20260308.jsonl:0", 353 353 "idx": 0, 354 354 "path": "facets/montague/entities/20260308.jsonl", 355 - "score": -3.1, 355 + "score": -2.0, 356 356 "stream": null, 357 357 "text": "### Person: Romeo Montague\n\n\nUnder board pressure, planning board presentation\n\n" 358 358 }, ··· 368 368 "id": "facets/verona/events/20260308.jsonl:0", 369 369 "idx": 0, 370 370 "path": "facets/verona/events/20260308.jsonl", 371 - "score": -2.1, 371 + "score": -1.3, 372 372 "stream": null, 373 373 "text": "### Meeting: Strategy Call with Professor Lawrence\n\n\n**Time Occurred:** 10:00 - 11:00\n**Participants:** Romeo Montague, Juliet Capulet, Friar Lawrence\n\nJoint venture strategy planning\n\nProposed board presentation strategy\n" 374 374 }, ··· 384 384 "id": "20260308/agents/knowledge_graph.md:2", 385 385 "idx": 2, 386 386 "path": "20260308/agents/knowledge_graph.md", 387 - "score": -2.0, 387 + "score": -1.3, 388 388 "stream": null, 389 389 "text": "# Part 1: Entity Extraction and Relationship Mapping ## Entity Profiles | Entity Name | Entity Type | First Appearance | Total Engagement | Context | | :--- | :--- | :--- | :--- | :--- | | **Romeo Montague** | Person | 10:00 | High | Under board pressure,..." 390 390 }, ··· 400 400 "id": "20260308/agents/meetings.md:0", 401 401 "idx": 0, 402 402 "path": "20260308/agents/meetings.md", 403 - "score": -2.9, 403 + "score": -1.9, 404 404 "stream": null, 405 405 "text": "# Meetings\n\n- 10:00 Strategy Call with Professor Lawrence, Romeo, and Juliet\n" 406 406 } ··· 425 425 "id": "facets/verona/logs/20260305.jsonl:0", 426 426 "idx": 0, 427 427 "path": "facets/verona/logs/20260305.jsonl", 428 - "score": -2.5, 428 + "score": -1.6, 429 429 "stream": null, 430 430 "text": "### Repo Created by romeo_montague\n\n**Source:** github | **Time:** 22:05:00\n\n**Parameters:**\n- repo: balcony-app\n- visibility: private\n" 431 431 }, ··· 441 441 "id": "20260305/default/090000_300/agents/audio.md:0", 442 442 "idx": 0, 443 443 "path": "20260305/default/090000_300/agents/audio.md", 444 - "score": -2.9, 444 + "score": -1.9, 445 445 "stream": "default", 446 446 "text": "# Audio Summary\n\nMorning standup at Montague Tech. Benvolio reported CI pipeline is green. Romeo mentioned wanting to explore ideas from the conference. Mercutio teased about Romeo meeting someone.\n" 447 447 }, ··· 457 457 "id": "facets/montague/entities/20260305.jsonl:0", 458 458 "idx": 0, 459 459 "path": "facets/montague/entities/20260305.jsonl", 460 - "score": -3.1, 460 + "score": -2.0, 461 461 "stream": null, 462 462 "text": "### Person: Romeo Montague\n\n\nStarted Balcony App prototype with Juliet\n\n" 463 463 }, ··· 473 473 "id": "facets/verona/entities/20260305.jsonl:0", 474 474 "idx": 0, 475 475 "path": "facets/verona/entities/20260305.jsonl", 476 - "score": -3.1, 476 + "score": -2.0, 477 477 "stream": null, 478 478 "text": "### Person: Romeo Montague\n\n\nSet up private repo for collaboration\n\n" 479 479 }, ··· 489 489 "id": "facets/montague/events/20260305.jsonl:0", 490 490 "idx": 0, 491 491 "path": "facets/montague/events/20260305.jsonl", 492 - "score": -3.1, 492 + "score": -2.0, 493 493 "stream": null, 494 494 "text": "### Meeting: Montague Tech Daily Standup\n\n\n**Time Occurred:** 09:00 - 09:30\n**Participants:** Romeo Montague, Benvolio Montague, Mercutio Escalus\n\nTeam standup\n\nRomeo mentioned conference ideas\n" 495 495 } ··· 514 514 "id": "facets/montague/entities/20260310.jsonl:0", 515 515 "idx": 0, 516 516 "path": "facets/montague/entities/20260310.jsonl", 517 - "score": -2.9, 517 + "score": -1.9, 518 518 "stream": null, 519 519 "text": "### Person: Romeo Montague\n\n\nNamed co-lead of Verona Platform joint venture\n\n" 520 520 }, ··· 530 530 "id": "facets/verona/entities/20260310.jsonl:0", 531 531 "idx": 0, 532 532 "path": "facets/verona/entities/20260310.jsonl", 533 - "score": -3.0, 533 + "score": -1.9, 534 534 "stream": null, 535 535 "text": "### Person: Romeo Montague\n\n\nNamed co-lead of approved joint venture\n\n" 536 536 }, ··· 546 546 "id": "facets/montague/calendar/20260310.jsonl:0", 547 547 "idx": 0, 548 548 "path": "facets/montague/calendar/20260310.jsonl", 549 - "score": -2.3, 549 + "score": -1.5, 550 550 "stream": null, 551 551 "text": "### Event: Joint Board Meeting\n\n\n**Time Occurred:** 10:00 - 12:00\n**Participants:** Romeo Montague, Benvolio Montague\n\nQuarterly review with Verona Platform presentation\n" 552 552 }, ··· 562 562 "id": "facets/verona/calendar/20260310.jsonl:0", 563 563 "idx": 0, 564 564 "path": "facets/verona/calendar/20260310.jsonl", 565 - "score": -2.3, 565 + "score": -1.5, 566 566 "stream": null, 567 567 "text": "### Event: Board Presentation\n\n\n**Time Occurred:** 10:00 - 12:00\n**Participants:** Romeo Montague, Juliet Capulet, Friar Lawrence\n\nVerona Platform joint venture pitch\n" 568 568 }, ··· 578 578 "id": "20260310/agents/meetings.md:0", 579 579 "idx": 0, 580 580 "path": "20260310/agents/meetings.md", 581 - "score": -3.0, 581 + "score": -1.9, 582 582 "stream": null, 583 583 "text": "# Meetings\n\n- 08:30 Pre-Board Meeting Prep (Romeo, Juliet, Benvolio)\n" 584 584 } ··· 603 603 "id": "20260304/default/180000_300/agents/audio.md:0", 604 604 "idx": 0, 605 605 "path": "20260304/default/180000_300/agents/audio.md", 606 - "score": -2.9, 606 + "score": -1.9, 607 607 "stream": "default", 608 608 "text": "# Audio Summary\n\nEvening mixer at Denver Tech Summit. Romeo and Juliet had their first extended conversation about combining their API approaches. Mercutio tried to pull Romeo away to karaoke.\n" 609 609 }, ··· 619 619 "id": "facets/capulet/entities/20260304.jsonl:1", 620 620 "idx": 1, 621 621 "path": "facets/capulet/entities/20260304.jsonl", 622 - "score": -3.2, 622 + "score": -2.1, 623 623 "stream": null, 624 624 "text": "### Person: Tybalt Capulet\n\n\nConfronted Romeo at hackathon\n\n" 625 625 }, ··· 635 635 "id": "facets/montague/entities/20260304.jsonl:0", 636 636 "idx": 0, 637 637 "path": "facets/montague/entities/20260304.jsonl", 638 - "score": -3.0, 638 + "score": -1.9, 639 639 "stream": null, 640 640 "text": "### Person: Romeo Montague\n\n\nAttended Denver Tech Summit, met Juliet Capulet\n\n" 641 641 }, ··· 651 651 "id": "facets/capulet/events/20260304.jsonl:1", 652 652 "idx": 1, 653 653 "path": "facets/capulet/events/20260304.jsonl", 654 - "score": -3.2, 654 + "score": -2.1, 655 655 "stream": null, 656 656 "text": "### Social: Conference Mixer\n\n\n**Time Occurred:** 18:00 - 20:00\n**Participants:** Juliet Capulet, Romeo Montague\n\nNetworking event\n\nJuliet and Romeo exchanged Signal contacts\n" 657 657 }, ··· 667 667 "id": "facets/montague/events/20260304.jsonl:1", 668 668 "idx": 1, 669 669 "path": "facets/montague/events/20260304.jsonl", 670 - "score": -3.1, 670 + "score": -2.0, 671 671 "stream": null, 672 672 "text": "### Hackathon: Hackathon - API Bridge Challenge\n\n\n**Time Occurred:** 14:00 - 18:00\n**Participants:** Romeo Montague, Mercutio Escalus\n\nBuilt API bridge prototype\n\nTybalt confronted Romeo\n" 673 673 }

-10

tests/baselines/api/settings/generators.json

··· 174 174 }, 175 175 { 176 176 "app": null, 177 - "description": "Extracts people, companies, projects, and tools from segment content", 178 - "disabled": false, 179 - "extract": null, 180 - "has_extraction": false, 181 - "key": "entities", 182 - "source": "system", 183 - "title": "Entity Extraction" 184 - }, 185 - { 186 - "app": null, 187 177 "description": "Identifies who said what in each transcript segment. Layers 1-3 (owner, structural, acoustic) run computationally via hook; Layer 4 uses contextual LLM analysis for remaining unmatched sentences.", 188 178 "disabled": false, 189 179 "extract": null,

+176

tests/test_dream_preflight.py

··· 1 + # SPDX-License-Identifier: AGPL-3.0-only 2 + # Copyright (c) 2026 sol pbc 3 + 4 + """Tests for dream preflight skip evaluation.""" 5 + 6 + from __future__ import annotations 7 + 8 + import json 9 + 10 + import pytest 11 + 12 + 13 + @pytest.fixture 14 + def segment_dir(tmp_path, monkeypatch): 15 + journal = tmp_path / "journal" 16 + seg_dir = journal / "20240115" / "default" / "120000_300" 17 + seg_dir.mkdir(parents=True) 18 + (seg_dir / "agents").mkdir() 19 + monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(journal)) 20 + return seg_dir 21 + 22 + 23 + class TestShouldSkipPreflight: 24 + def test_daily_mode_never_skips(self): 25 + from think.dream import _should_skip_preflight 26 + 27 + assert _should_skip_preflight("observation", day="20240115", segment=None, stream=None) == ( 28 + False, 29 + None, 30 + ) 31 + 32 + def test_firstday_checkin_not_complete(self, monkeypatch): 33 + from think import awareness 34 + from think.dream import _should_skip_preflight 35 + 36 + monkeypatch.setattr(awareness, "get_onboarding", lambda: {"status": "observing"}) 37 + assert _should_skip_preflight( 38 + "firstday_checkin", day="20240115", segment="120000_300", stream="default" 39 + ) == (True, "preflight:not_complete") 40 + 41 + def test_firstday_checkin_already_sent(self, monkeypatch): 42 + from think import awareness 43 + from think.dream import _should_skip_preflight 44 + 45 + monkeypatch.setattr( 46 + awareness, 47 + "get_onboarding", 48 + lambda: {"status": "complete", "firstday_checkin_sent": "20260402T10:00:00"}, 49 + ) 50 + assert _should_skip_preflight( 51 + "firstday_checkin", day="20240115", segment="120000_300", stream="default" 52 + ) == (True, "preflight:already_sent") 53 + 54 + def test_observation_not_observing(self, monkeypatch): 55 + from think import awareness 56 + from think.dream import _should_skip_preflight 57 + 58 + monkeypatch.setattr(awareness, "get_onboarding", lambda: {"status": "complete"}) 59 + assert _should_skip_preflight( 60 + "observation", day="20240115", segment="120000_300", stream="default" 61 + ) == (True, "preflight:not_observing") 62 + 63 + def test_speaker_attribution_requires_embeddings(self, segment_dir): 64 + from think.dream import _should_skip_preflight 65 + 66 + assert _should_skip_preflight( 67 + "speaker_attribution", 68 + day="20240115", 69 + segment="120000_300", 70 + stream="default", 71 + ) == (True, "preflight:no_embeddings") 72 + 73 + (segment_dir / "audio.npz").write_bytes(b"x") 74 + assert _should_skip_preflight( 75 + "speaker_attribution", 76 + day="20240115", 77 + segment="120000_300", 78 + stream="default", 79 + ) == (False, None) 80 + 81 + def test_speakers_requires_transcripts(self, segment_dir): 82 + from think.dream import _should_skip_preflight 83 + 84 + assert _should_skip_preflight( 85 + "speakers", 86 + day="20240115", 87 + segment="120000_300", 88 + stream="default", 89 + ) == (True, "preflight:no_transcripts") 90 + 91 + def test_speakers_skips_single_speaker(self, segment_dir): 92 + from think.dream import _should_skip_preflight 93 + 94 + (segment_dir / "audio.jsonl").write_text( 95 + "\n".join( 96 + [ 97 + json.dumps({"raw": "audio.flac"}), 98 + json.dumps({"start": "00:00:01", "speaker": 1, "text": "hello"}), 99 + json.dumps({"start": "00:00:02", "speaker": 1, "text": "again"}), 100 + ] 101 + ) 102 + + "\n", 103 + encoding="utf-8", 104 + ) 105 + assert _should_skip_preflight( 106 + "speakers", 107 + day="20240115", 108 + segment="120000_300", 109 + stream="default", 110 + ) == (True, "preflight:single_speaker") 111 + 112 + def test_speakers_runs_for_multiple_speakers(self, segment_dir): 113 + from think.dream import _should_skip_preflight 114 + 115 + (segment_dir / "audio.jsonl").write_text( 116 + "\n".join( 117 + [ 118 + json.dumps({"raw": "audio.flac"}), 119 + json.dumps({"start": "00:00:01", "speaker": 1, "text": "hello"}), 120 + json.dumps({"start": "00:00:02", "speaker": 2, "text": "hi"}), 121 + ] 122 + ) 123 + + "\n", 124 + encoding="utf-8", 125 + ) 126 + assert _should_skip_preflight( 127 + "speakers", 128 + day="20240115", 129 + segment="120000_300", 130 + stream="default", 131 + ) == (False, None) 132 + 133 + def test_activities_requires_previous_segment(self, segment_dir): 134 + from think.dream import _should_skip_preflight 135 + 136 + assert _should_skip_preflight( 137 + "activities", 138 + day="20240115", 139 + segment="120000_300", 140 + stream="default", 141 + ) == (True, "preflight:no_previous_segment") 142 + 143 + def test_activities_requires_previous_activity_state(self, tmp_path, monkeypatch): 144 + from think.dream import _should_skip_preflight 145 + 146 + journal = tmp_path / "journal" 147 + prev_dir = journal / "20240115" / "default" / "110000_300" 148 + curr_dir = journal / "20240115" / "default" / "120000_300" 149 + prev_dir.mkdir(parents=True) 150 + curr_dir.mkdir(parents=True) 151 + monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(journal)) 152 + 153 + assert _should_skip_preflight( 154 + "activities", 155 + day="20240115", 156 + segment="120000_300", 157 + stream="default", 158 + ) == (True, "preflight:no_previous_activity_state") 159 + 160 + def test_activities_runs_with_previous_activity_state(self, tmp_path, monkeypatch): 161 + from think.dream import _should_skip_preflight 162 + 163 + journal = tmp_path / "journal" 164 + prev_dir = journal / "20240115" / "default" / "110000_300" / "agents" / "work" 165 + curr_dir = journal / "20240115" / "default" / "120000_300" 166 + prev_dir.mkdir(parents=True) 167 + curr_dir.mkdir(parents=True) 168 + (prev_dir / "activity_state.json").write_text("[]", encoding="utf-8") 169 + monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(journal)) 170 + 171 + assert _should_skip_preflight( 172 + "activities", 173 + day="20240115", 174 + segment="120000_300", 175 + stream="default", 176 + ) == (False, None)

+308

tests/test_dream_segment.py

··· 150 150 monkeypatch.setattr(dream, "get_enabled_facets", mock_get_enabled_facets) 151 151 monkeypatch.setattr(dream, "get_active_facets", mock_get_active_facets) 152 152 monkeypatch.setattr(dream, "run_queued_command", mock_run_queued_command) 153 + monkeypatch.setattr(dream, "_classify_segment_density", lambda *args: "active") 153 154 154 155 success, failed, failed_names = dream.run_prompts_by_priority( 155 156 "20240115", "120000_300", refresh=False, verbose=False ··· 208 209 monkeypatch.setattr(dream, "wait_for_agents", mock_wait_for_agents) 209 210 monkeypatch.setattr(dream, "get_muse_configs", mock_get_muse_configs) 210 211 monkeypatch.setattr(dream, "get_enabled_facets", mock_get_enabled_facets) 212 + monkeypatch.setattr(dream, "_classify_segment_density", lambda *args: "active") 211 213 212 214 success, failed, failed_names = dream.run_prompts_by_priority( 213 215 "20240115", "120000_300", refresh=False, verbose=False ··· 259 261 monkeypatch.setattr(dream, "wait_for_agents", mock_wait_for_agents) 260 262 monkeypatch.setattr(dream, "get_muse_configs", mock_get_muse_configs) 261 263 monkeypatch.setattr(dream, "get_enabled_facets", mock_get_enabled_facets) 264 + monkeypatch.setattr(dream, "_classify_segment_density", lambda *args: "active") 262 265 263 266 success, failed, failed_names = dream.run_prompts_by_priority( 264 267 "20240115", "120000_300", refresh=False, verbose=False ··· 311 314 monkeypatch.setattr(dream, "get_enabled_facets", mock_get_enabled_facets) 312 315 monkeypatch.setattr(dream, "get_active_facets", mock_get_active_facets) 313 316 monkeypatch.setattr(dream, "run_queued_command", mock_run_queued_command) 317 + monkeypatch.setattr(dream, "_classify_segment_density", lambda *args: "active") 314 318 315 319 dream.run_prompts_by_priority( 316 320 "20240115", "120000_300", refresh=False, verbose=False, stream="default" ··· 411 415 monkeypatch.setattr(dream, "get_muse_configs", mock_get_muse_configs) 412 416 monkeypatch.setattr(dream, "get_enabled_facets", mock_get_enabled_facets) 413 417 monkeypatch.setattr(dream, "get_active_facets", mock_get_active_facets) 418 + monkeypatch.setattr(dream, "_classify_segment_density", lambda *args: "active") 414 419 415 420 success, failed, failed_names = dream.run_prompts_by_priority( 416 421 "20240115", "120000_300", refresh=False, verbose=False ··· 592 597 monkeypatch.setattr( 593 598 dream, "run_queued_command", lambda cmd, day, timeout=60: True 594 599 ) 600 + monkeypatch.setattr(dream, "_classify_segment_density", lambda *args: "active") 595 601 596 602 dream.run_prompts_by_priority( 597 603 "20240115", ··· 602 608 ) 603 609 604 610 assert wait_calls == [None] 611 + 612 + def test_pulse_counter_runs_every_sixth_segment(self, segment_dir, monkeypatch): 613 + """Pulse is skipped five times, then runs on the sixth segment.""" 614 + from think import dream 615 + 616 + spawned = [] 617 + 618 + def mock_cortex_request(prompt, name, config=None): 619 + spawned.append(name) 620 + return f"agent-{name}" 621 + 622 + def mock_wait_for_agents(agent_ids, timeout=600): 623 + return ({aid: "finish" for aid in agent_ids}, []) 624 + 625 + def mock_get_muse_configs(schedule=None, **kwargs): 626 + return { 627 + "pulse": { 628 + "priority": 99, 629 + "type": "cogitate", 630 + "schedule": "segment", 631 + }, 632 + } 633 + 634 + monkeypatch.setattr(dream, "cortex_request", mock_cortex_request) 635 + monkeypatch.setattr(dream, "wait_for_agents", mock_wait_for_agents) 636 + monkeypatch.setattr(dream, "get_muse_configs", mock_get_muse_configs) 637 + monkeypatch.setattr(dream, "get_enabled_facets", lambda: {}) 638 + monkeypatch.setattr(dream, "get_active_facets", lambda day: set()) 639 + monkeypatch.setattr(dream, "_callosum", None) 640 + 641 + for _ in range(5): 642 + dream.run_prompts_by_priority( 643 + "20240115", "120000_300", refresh=False, verbose=False 644 + ) 645 + 646 + assert spawned == [] 647 + 648 + dream.run_prompts_by_priority( 649 + "20240115", "120000_300", refresh=False, verbose=False 650 + ) 651 + assert spawned == ["pulse"] 652 + 653 + def test_pulse_counter_resets_on_activity_change(self, segment_dir, monkeypatch): 654 + """Pulse runs immediately when activity_state changes.""" 655 + from think import dream 656 + 657 + current_dir = segment_dir.parent / "120500_300" 658 + (current_dir / "agents").mkdir(parents=True) 659 + (current_dir / "agents" / "facets.json").write_text( 660 + json.dumps([{"facet": "work", "activity": "Coding", "level": "high"}]) 661 + ) 662 + 663 + spawned = [] 664 + 665 + def mock_cortex_request(prompt, name, config=None): 666 + spawned.append(name) 667 + return f"agent-{name}" 668 + 669 + def mock_wait_for_agents(agent_ids, timeout=600): 670 + return ({aid: "finish" for aid in agent_ids}, []) 671 + 672 + def mock_get_muse_configs(schedule=None, **kwargs): 673 + return { 674 + "activity_state": { 675 + "priority": 95, 676 + "type": "generate", 677 + "multi_facet": True, 678 + "output": "json", 679 + "schedule": "segment", 680 + }, 681 + "pulse": { 682 + "priority": 99, 683 + "type": "cogitate", 684 + "schedule": "segment", 685 + }, 686 + } 687 + 688 + monkeypatch.setattr(dream, "cortex_request", mock_cortex_request) 689 + monkeypatch.setattr(dream, "wait_for_agents", mock_wait_for_agents) 690 + monkeypatch.setattr(dream, "get_muse_configs", mock_get_muse_configs) 691 + monkeypatch.setattr(dream, "get_enabled_facets", lambda: {"work": {"title": "Work"}}) 692 + monkeypatch.setattr(dream, "load_segment_facets", lambda *args, **kwargs: ["work"]) 693 + monkeypatch.setattr(dream, "_detect_activity_state_change", lambda *args, **kwargs: True) 694 + monkeypatch.setattr(dream, "_callosum", None) 695 + 696 + dream.run_prompts_by_priority( 697 + "20240115", "120000_300", refresh=False, verbose=False 698 + ) 699 + 700 + assert spawned == ["activity_state", "pulse"] 701 + 702 + def test_activity_state_carry_forward_writes_output(self, segment_dir, monkeypatch): 703 + """Cached activity_state is written directly when classification matches.""" 704 + from think import callosum, dream 705 + 706 + current_dir = segment_dir.parent / "120500_300" 707 + (current_dir / "agents").mkdir(parents=True) 708 + (current_dir / "agents" / "facets.json").write_text( 709 + json.dumps([{"facet": "work", "activity": "Coding", "level": "high"}]) 710 + ) 711 + 712 + sent = [] 713 + monkeypatch.setattr( 714 + callosum, 715 + "callosum_send", 716 + lambda tract, event, **kwargs: sent.append((tract, event, kwargs)), 717 + ) 718 + 719 + cache = { 720 + "default:work": { 721 + "state": [ 722 + { 723 + "id": "coding_120000_300", 724 + "activity": "coding", 725 + "state": "active", 726 + "since": "120000_300", 727 + "description": "Coding", 728 + "level": "high", 729 + } 730 + ], 731 + "facet_classification": "Coding", 732 + "segment": "120000_300", 733 + "carry_count": 1, 734 + "updated_at": 1, 735 + } 736 + } 737 + 738 + carried = dream._try_carry_forward_activity_state( 739 + day="20240115", 740 + segment="120500_300", 741 + stream="default", 742 + facet="work", 743 + cache=cache, 744 + ) 745 + 746 + assert carried is True 747 + output_path = ( 748 + segment_dir.parent / "120500_300" / "agents" / "work" / "activity_state.json" 749 + ) 750 + assert output_path.exists() 751 + assert sent[0][0:2] == ("activity", "live") 752 + assert cache["default:work"]["carry_count"] == 2 753 + 754 + def test_activity_state_carry_forward_requires_matching_classification( 755 + self, segment_dir 756 + ): 757 + from think import dream 758 + 759 + current_dir = segment_dir.parent / "120500_300" 760 + (current_dir / "agents").mkdir(parents=True) 761 + (current_dir / "agents" / "facets.json").write_text( 762 + json.dumps([{"facet": "work", "activity": "Email", "level": "high"}]) 763 + ) 764 + 765 + cache = { 766 + "default:work": { 767 + "state": [], 768 + "facet_classification": "Coding", 769 + "segment": "120000_300", 770 + "carry_count": 1, 771 + "updated_at": 1, 772 + } 773 + } 774 + 775 + assert ( 776 + dream._try_carry_forward_activity_state( 777 + day="20240115", 778 + segment="120500_300", 779 + stream="default", 780 + facet="work", 781 + cache=cache, 782 + ) 783 + is False 784 + ) 785 + 786 + def test_activity_state_carry_forward_respects_gap(self, segment_dir): 787 + from think import dream 788 + 789 + current_dir = segment_dir.parent / "120500_300" 790 + (current_dir / "agents").mkdir(parents=True) 791 + (current_dir / "agents" / "facets.json").write_text( 792 + json.dumps([{"facet": "work", "activity": "Coding", "level": "high"}]) 793 + ) 794 + 795 + cache = { 796 + "default:work": { 797 + "state": [], 798 + "facet_classification": "Coding", 799 + "segment": "100000_300", 800 + "carry_count": 1, 801 + "updated_at": 1, 802 + } 803 + } 804 + 805 + assert ( 806 + dream._try_carry_forward_activity_state( 807 + day="20240115", 808 + segment="120500_300", 809 + stream="default", 810 + facet="work", 811 + cache=cache, 812 + ) 813 + is False 814 + ) 815 + 816 + def test_activity_state_carry_forward_respects_carry_limit(self, segment_dir): 817 + from think import dream 818 + 819 + current_dir = segment_dir.parent / "120500_300" 820 + (current_dir / "agents").mkdir(parents=True) 821 + (current_dir / "agents" / "facets.json").write_text( 822 + json.dumps([{"facet": "work", "activity": "Coding", "level": "high"}]) 823 + ) 824 + 825 + cache = { 826 + "default:work": { 827 + "state": [], 828 + "facet_classification": "Coding", 829 + "segment": "120000_300", 830 + "carry_count": 6, 831 + "updated_at": 1, 832 + } 833 + } 834 + 835 + assert ( 836 + dream._try_carry_forward_activity_state( 837 + day="20240115", 838 + segment="120500_300", 839 + stream="default", 840 + facet="work", 841 + cache=cache, 842 + ) 843 + is False 844 + ) 845 + 846 + def test_activity_state_carry_forward_preserves_incremented_cache( 847 + self, segment_dir, monkeypatch 848 + ): 849 + from think import dream 850 + 851 + current_segment = "120500_300" 852 + current_dir = segment_dir.parent / current_segment 853 + (current_dir / "agents").mkdir(parents=True) 854 + (current_dir / "agents" / "facets.json").write_text( 855 + json.dumps([{"facet": "work", "activity": "Coding", "level": "high"}]), 856 + encoding="utf-8", 857 + ) 858 + 859 + cache_path = dream._activity_state_cache_path("20240115") 860 + cache_path.parent.mkdir(parents=True, exist_ok=True) 861 + cache_path.write_text( 862 + json.dumps( 863 + { 864 + "default:work": { 865 + "state": [ 866 + { 867 + "id": "coding_120000_300", 868 + "activity": "coding", 869 + "state": "active", 870 + "since": "120000_300", 871 + "description": "Coding", 872 + "level": "high", 873 + } 874 + ], 875 + "facet_classification": "Coding", 876 + "segment": "120000_300", 877 + "carry_count": 1, 878 + "updated_at": 1, 879 + } 880 + } 881 + ), 882 + encoding="utf-8", 883 + ) 884 + 885 + def mock_wait_for_agents(agent_ids, timeout=600): 886 + return ({aid: "finish" for aid in agent_ids}, []) 887 + 888 + def mock_get_muse_configs(schedule=None, **kwargs): 889 + return { 890 + "activity_state": { 891 + "priority": 95, 892 + "type": "generate", 893 + "multi_facet": True, 894 + "output": "json", 895 + "schedule": "segment", 896 + }, 897 + } 898 + 899 + monkeypatch.setattr(dream, "cortex_request", lambda *args, **kwargs: None) 900 + monkeypatch.setattr(dream, "wait_for_agents", mock_wait_for_agents) 901 + monkeypatch.setattr(dream, "get_muse_configs", mock_get_muse_configs) 902 + monkeypatch.setattr(dream, "get_enabled_facets", lambda: {"work": {"title": "Work"}}) 903 + monkeypatch.setattr(dream, "load_segment_facets", lambda *args, **kwargs: ["work"]) 904 + monkeypatch.setattr(dream, "_callosum", None) 905 + 906 + dream.run_prompts_by_priority( 907 + "20240115", current_segment, refresh=False, verbose=False 908 + ) 909 + 910 + cache = json.loads(cache_path.read_text(encoding="utf-8")) 911 + assert cache["default:work"]["carry_count"] == 2 912 + assert cache["default:work"]["segment"] == current_segment

+55

tests/test_entities_hook.py

··· 1 + # SPDX-License-Identifier: AGPL-3.0-only 2 + # Copyright (c) 2026 sol pbc 3 + 4 + """Tests for entity hook behavior across schedules.""" 5 + 6 + from __future__ import annotations 7 + 8 + import json 9 + from pathlib import Path 10 + 11 + 12 + def test_entities_post_process_writes_without_segment(tmp_path): 13 + from muse.entities import post_process 14 + 15 + output_path = ( 16 + tmp_path 17 + / "facets" 18 + / "work" 19 + / "activities" 20 + / "20240115" 21 + / "coding_120000_300" 22 + / "entities.md" 23 + ) 24 + result = "* Person: Alice Smith - Mentioned in the meeting\n" 25 + 26 + post_process(result, {"output_path": str(output_path)}) 27 + 28 + entities_path = output_path.parent / "entities.jsonl" 29 + assert entities_path.exists() 30 + rows = [json.loads(line) for line in entities_path.read_text(encoding="utf-8").splitlines()] 31 + assert rows == [ 32 + { 33 + "type": "Person", 34 + "name": "Alice Smith", 35 + "description": "Mentioned in the meeting", 36 + } 37 + ] 38 + 39 + 40 + def test_entities_post_process_requires_output_path(caplog): 41 + from muse.entities import post_process 42 + 43 + post_process("* Person: Alice Smith - Mentioned in the meeting\n", {}) 44 + 45 + assert "missing output_path" in caplog.text 46 + 47 + 48 + def test_entities_muse_is_activity_scheduled(): 49 + from think.muse import get_muse_configs 50 + 51 + segment_prompts = get_muse_configs(schedule="segment") 52 + activity_prompts = get_muse_configs(schedule="activity") 53 + 54 + assert "entities" not in segment_prompts 55 + assert activity_prompts["entities"]["activities"] == ["*"]

+73

tests/test_segment_gate.py

··· 1 + # SPDX-License-Identifier: AGPL-3.0-only 2 + # Copyright (c) 2026 sol pbc 3 + 4 + """Tests for segment density classification.""" 5 + 6 + from __future__ import annotations 7 + 8 + import json 9 + 10 + 11 + def test_missing_segment_dir_is_active(tmp_path, monkeypatch): 12 + from think.dream import _classify_segment_density 13 + 14 + monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(tmp_path / "journal")) 15 + assert _classify_segment_density("20240115", "120000_300", "default") == "active" 16 + 17 + 18 + def test_idle_segment(tmp_path, monkeypatch): 19 + from think.dream import _classify_segment_density 20 + 21 + seg_dir = tmp_path / "journal" / "20240115" / "default" / "120000_300" 22 + seg_dir.mkdir(parents=True) 23 + (seg_dir / "audio.jsonl").write_text(json.dumps({"raw": "audio.flac"}) + "\n") 24 + 25 + monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(tmp_path / "journal")) 26 + assert _classify_segment_density("20240115", "120000_300", "default") == "idle" 27 + 28 + 29 + def test_low_change_segment(tmp_path, monkeypatch): 30 + from think.dream import _classify_segment_density 31 + 32 + seg_dir = tmp_path / "journal" / "20240115" / "default" / "120000_300" 33 + seg_dir.mkdir(parents=True) 34 + audio_lines = [json.dumps({"raw": "audio.flac"})] 35 + audio_lines.extend( 36 + json.dumps({"start": f"00:00:0{i}", "text": "line"}) for i in range(1, 9) 37 + ) 38 + (seg_dir / "audio.jsonl").write_text("\n".join(audio_lines) + "\n") 39 + screen_lines = [json.dumps({"raw": "screen.webm"})] 40 + screen_lines.extend( 41 + json.dumps({"timestamp": i, "analysis": {"visual_description": "screen"}}) 42 + for i in range(3) 43 + ) 44 + (seg_dir / "screen.jsonl").write_text("\n".join(screen_lines) + "\n") 45 + 46 + monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(tmp_path / "journal")) 47 + assert _classify_segment_density("20240115", "120000_300", "default") == "low_change" 48 + 49 + 50 + def test_active_segment_with_imported_md(tmp_path, monkeypatch): 51 + from think.dream import _classify_segment_density 52 + 53 + seg_dir = tmp_path / "journal" / "20240115" / "default" / "120000_300" 54 + seg_dir.mkdir(parents=True) 55 + (seg_dir / "imported.md").write_text("\n".join(f"line {i}" for i in range(12)) + "\n") 56 + 57 + monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(tmp_path / "journal")) 58 + assert _classify_segment_density("20240115", "120000_300", "default") == "active" 59 + 60 + 61 + def test_tmux_screen_has_no_header(tmp_path, monkeypatch): 62 + from think.dream import _classify_segment_density 63 + 64 + seg_dir = tmp_path / "journal" / "20240115" / "default" / "120000_300" 65 + seg_dir.mkdir(parents=True) 66 + screen_lines = [ 67 + json.dumps({"frame_id": 1, "timestamp": 0, "analysis": {"primary": "tmux"}}), 68 + json.dumps({"frame_id": 2, "timestamp": 1, "analysis": {"primary": "tmux"}}), 69 + ] 70 + (seg_dir / "tmux_0_screen.jsonl").write_text("\n".join(screen_lines) + "\n") 71 + 72 + monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(tmp_path / "journal")) 73 + assert _classify_segment_density("20240115", "120000_300", "default") == "low_change"

+467

think/dream.py

··· 10 10 11 11 import argparse 12 12 import fnmatch 13 + import json 13 14 import logging 14 15 import sys 15 16 import threading ··· 304 305 return (success, failed, failed_names) 305 306 306 307 308 + def _segment_dir(day: str, segment: str, stream: str | None) -> Path: 309 + """Return the expected segment directory without creating it.""" 310 + return day_path(day) / (stream or "default") / segment 311 + 312 + 313 + def _resolve_segment_dir( 314 + day: str, 315 + segment: str, 316 + stream: str | None, 317 + ) -> Path | None: 318 + """Resolve a segment directory, searching across streams when needed.""" 319 + if stream: 320 + path = _segment_dir(day, segment, stream) 321 + return path if path.is_dir() else None 322 + 323 + for seg_stream, seg_key, seg_path in iter_segments(day): 324 + if seg_key == segment: 325 + return seg_path 326 + return None 327 + 328 + 329 + def _load_json_file(path: Path, default: object) -> object: 330 + """Load JSON from a file, returning the provided default on failure.""" 331 + if not path.exists(): 332 + return default 333 + try: 334 + return json.loads(path.read_text(encoding="utf-8")) 335 + except (json.JSONDecodeError, OSError): 336 + return default 337 + 338 + 339 + def _write_json_atomic(path: Path, data: object) -> None: 340 + """Atomically write JSON data to a file.""" 341 + path.parent.mkdir(parents=True, exist_ok=True) 342 + tmp = path.with_suffix(f"{path.suffix}.tmp") 343 + tmp.write_text(json.dumps(data), encoding="utf-8") 344 + tmp.replace(path) 345 + 346 + 347 + def _count_nonempty_lines(path: Path) -> list[str]: 348 + """Return non-empty lines from a text file.""" 349 + try: 350 + return [line for line in path.read_text(encoding="utf-8").splitlines() if line] 351 + except OSError: 352 + return [] 353 + 354 + 355 + def _load_segment_facet_rows( 356 + day: str, 357 + segment: str, 358 + stream: str | None, 359 + ) -> list[dict]: 360 + """Load raw facets.json rows for a segment.""" 361 + seg_dir = _resolve_segment_dir(day, segment, stream) 362 + if not seg_dir: 363 + return [] 364 + 365 + facets_path = seg_dir / "agents" / "facets.json" 366 + data = _load_json_file(facets_path, []) 367 + return data if isinstance(data, list) else [] 368 + 369 + 370 + def _has_audio_embeddings(seg_dir: Path) -> bool: 371 + """Return True when a segment has audio embedding files.""" 372 + for npz_path in seg_dir.glob("*.npz"): 373 + if npz_path.stem == "audio" or npz_path.stem.endswith("_audio"): 374 + return True 375 + return False 376 + 377 + 378 + def _should_skip_preflight( 379 + prompt_name: str, 380 + *, 381 + day: str, 382 + segment: str | None, 383 + stream: str | None, 384 + ) -> tuple[bool, str | None]: 385 + """Return whether a prompt can be skipped before sending a cortex request.""" 386 + if not segment: 387 + return (False, None) 388 + 389 + if prompt_name == "firstday_checkin": 390 + from think.awareness import get_onboarding 391 + 392 + onboarding = get_onboarding() 393 + if onboarding.get("status") != "complete": 394 + return (True, "preflight:not_complete") 395 + if onboarding.get("firstday_checkin_sent"): 396 + return (True, "preflight:already_sent") 397 + return (False, None) 398 + 399 + if prompt_name == "observation": 400 + from think.awareness import get_onboarding 401 + 402 + onboarding = get_onboarding() 403 + if onboarding.get("status") != "observing": 404 + return (True, "preflight:not_observing") 405 + return (False, None) 406 + 407 + seg_dir = _resolve_segment_dir(day, segment, stream) 408 + if seg_dir is None: 409 + return (False, None) 410 + 411 + if prompt_name == "speaker_attribution": 412 + if not _has_audio_embeddings(seg_dir): 413 + return (True, "preflight:no_embeddings") 414 + return (False, None) 415 + 416 + if prompt_name == "speakers": 417 + transcript_files = sorted(seg_dir.glob("audio.jsonl")) 418 + transcript_files.extend(sorted(seg_dir.glob("*_audio.jsonl"))) 419 + transcript_files.extend(sorted(seg_dir.glob("*_transcript.jsonl"))) 420 + if not transcript_files: 421 + return (True, "preflight:no_transcripts") 422 + 423 + speakers: set[str] = set() 424 + for transcript_path in transcript_files: 425 + lines = _count_nonempty_lines(transcript_path) 426 + for line in lines[1:]: 427 + try: 428 + entry = json.loads(line) 429 + except json.JSONDecodeError: 430 + continue 431 + speaker = entry.get("speaker") 432 + if speaker is not None: 433 + speakers.add(str(speaker)) 434 + if len(speakers) < 2: 435 + return (True, "preflight:single_speaker") 436 + return (False, None) 437 + 438 + if prompt_name == "activities": 439 + from muse.activity_state import find_previous_segment 440 + 441 + previous_segment = find_previous_segment(day, segment, stream=stream) 442 + if not previous_segment: 443 + return (True, "preflight:no_previous_segment") 444 + 445 + prev_dir = _resolve_segment_dir(day, previous_segment, stream) 446 + if prev_dir is None: 447 + return (True, "preflight:no_previous_activity_state") 448 + 449 + agents_dir = prev_dir / "agents" 450 + for facet_dir in sorted(agents_dir.iterdir()) if agents_dir.is_dir() else []: 451 + if facet_dir.is_dir() and (facet_dir / "activity_state.json").exists(): 452 + return (False, None) 453 + return (True, "preflight:no_previous_activity_state") 454 + 455 + return (False, None) 456 + 457 + 458 + def _classify_segment_density( 459 + day: str, 460 + segment: str, 461 + stream: str | None, 462 + ) -> str: 463 + """Classify segment content density as 'idle', 'low_change', or 'active'.""" 464 + seg_dir = _segment_dir(day, segment, stream) 465 + if not seg_dir.exists(): 466 + return "active" 467 + 468 + transcript_lines = 0 469 + transcript_files = sorted(seg_dir.glob("audio.jsonl")) 470 + transcript_files.extend(sorted(seg_dir.glob("*_audio.jsonl"))) 471 + transcript_files.extend(sorted(seg_dir.glob("*_transcript.jsonl"))) 472 + for transcript_path in transcript_files: 473 + lines = _count_nonempty_lines(transcript_path) 474 + transcript_lines += max(0, len(lines) - 1) 475 + 476 + imported_md = seg_dir / "imported.md" 477 + if imported_md.exists(): 478 + transcript_lines += len(_count_nonempty_lines(imported_md)) 479 + 480 + screen_frames = 0 481 + screen_files = sorted(seg_dir.glob("screen.jsonl")) 482 + screen_files.extend(sorted(seg_dir.glob("*_screen.jsonl"))) 483 + for screen_path in screen_files: 484 + lines = _count_nonempty_lines(screen_path) 485 + if not lines: 486 + continue 487 + subtract_header = False 488 + try: 489 + first_entry = json.loads(lines[0]) 490 + subtract_header = isinstance(first_entry, dict) and "raw" in first_entry 491 + except json.JSONDecodeError: 492 + subtract_header = False 493 + screen_frames += max(0, len(lines) - 1 if subtract_header else len(lines)) 494 + 495 + if transcript_lines < 3 and screen_frames < 2: 496 + return "idle" 497 + if transcript_lines < 10 and screen_frames < 5: 498 + return "low_change" 499 + return "active" 500 + 501 + 502 + def _activity_state_cache_path(day: str) -> Path: 503 + return day_path(day) / "health" / "activity_state_cache.json" 504 + 505 + 506 + def _load_activity_state_cache(day: str) -> dict: 507 + data = _load_json_file(_activity_state_cache_path(day), {}) 508 + return data if isinstance(data, dict) else {} 509 + 510 + 511 + def _save_activity_state_cache(day: str, data: dict) -> None: 512 + _write_json_atomic(_activity_state_cache_path(day), data) 513 + 514 + 515 + def _emit_activity_live_entries( 516 + *, 517 + day: str, 518 + segment: str, 519 + facet: str, 520 + entries: list[dict], 521 + ) -> None: 522 + """Emit activity.live events for active entries.""" 523 + from think.callosum import callosum_send 524 + 525 + for entry in entries: 526 + if entry.get("state") != "active": 527 + continue 528 + try: 529 + callosum_send( 530 + "activity", 531 + "live", 532 + facet=facet, 533 + day=day, 534 + segment=segment, 535 + id=entry["id"], 536 + activity=entry["activity"], 537 + since=entry["since"], 538 + description=entry.get("description", ""), 539 + level=entry.get("level", "medium"), 540 + active_entities=entry.get("active_entities", []), 541 + ) 542 + except Exception as exc: 543 + logging.warning( 544 + "Failed to emit carried activity.live for %s/%s: %s", 545 + facet, 546 + entry.get("id", "unknown"), 547 + exc, 548 + ) 549 + 550 + 551 + def _try_carry_forward_activity_state( 552 + *, 553 + day: str, 554 + segment: str, 555 + stream: str | None, 556 + facet: str, 557 + cache: dict, 558 + ) -> bool: 559 + """Try to carry forward cached activity_state for a facet.""" 560 + cache_key = f"{stream or 'default'}:{facet}" 561 + entry = cache.get(cache_key) 562 + if not isinstance(entry, dict): 563 + return False 564 + 565 + if int(entry.get("carry_count", 0)) >= 6: 566 + return False 567 + 568 + cached_segment = entry.get("segment", "") 569 + if not cached_segment: 570 + return False 571 + 572 + current_start, _current_end = segment_parse(segment) 573 + _cached_start, cached_end = segment_parse(cached_segment) 574 + if current_start is None or cached_end is None: 575 + return False 576 + 577 + current_dt = datetime.combine(date.today(), current_start) 578 + cached_end_dt = datetime.combine(date.today(), cached_end) 579 + gap = current_dt - cached_end_dt 580 + if gap < timedelta(0) or gap > timedelta(minutes=10): 581 + return False 582 + 583 + facet_rows = _load_segment_facet_rows(day, segment, stream) 584 + facet_row = next((row for row in facet_rows if row.get("facet") == facet), None) 585 + if not facet_row: 586 + return False 587 + if facet_row.get("activity", "") != entry.get("facet_classification", ""): 588 + return False 589 + 590 + state = entry.get("state") 591 + if not isinstance(state, list): 592 + return False 593 + 594 + seg_dir = _segment_dir(day, segment, stream) 595 + output_path = seg_dir / "agents" / facet / "activity_state.json" 596 + _write_json_atomic(output_path, state) 597 + _emit_activity_live_entries(day=day, segment=segment, facet=facet, entries=state) 598 + 599 + entry["carry_count"] = int(entry.get("carry_count", 0)) + 1 600 + entry["segment"] = segment 601 + entry["updated_at"] = int(time.time()) 602 + cache[cache_key] = entry 603 + return True 604 + 605 + 606 + def _refresh_activity_state_cache( 607 + *, 608 + day: str, 609 + segment: str, 610 + stream: str | None, 611 + facets: list[str], 612 + cache: dict, 613 + ) -> None: 614 + """Refresh activity_state cache entries from newly-written output files.""" 615 + if not facets: 616 + return 617 + 618 + facet_rows = _load_segment_facet_rows(day, segment, stream) 619 + facet_activity = { 620 + row.get("facet"): row.get("activity", "") 621 + for row in facet_rows 622 + if isinstance(row, dict) and row.get("facet") 623 + } 624 + seg_dir = _segment_dir(day, segment, stream) 625 + 626 + for facet in facets: 627 + state_path = seg_dir / "agents" / facet / "activity_state.json" 628 + data = _load_json_file(state_path, None) 629 + if not isinstance(data, list): 630 + continue 631 + 632 + cache[f"{stream or 'default'}:{facet}"] = { 633 + "state": data, 634 + "facet_classification": facet_activity.get(facet, ""), 635 + "segment": segment, 636 + "carry_count": 0, 637 + "updated_at": int(time.time()), 638 + } 639 + 640 + 641 + def _pulse_counter_path(day: str) -> Path: 642 + return day_path(day) / "health" / "pulse_counter.json" 643 + 644 + 645 + def _load_pulse_counter(day: str) -> dict: 646 + data = _load_json_file(_pulse_counter_path(day), {"count": 0, "last_segment": ""}) 647 + return data if isinstance(data, dict) else {"count": 0, "last_segment": ""} 648 + 649 + 650 + def _save_pulse_counter(day: str, data: dict) -> None: 651 + _write_json_atomic(_pulse_counter_path(day), data) 652 + 653 + 654 + def _active_activity_keys_for_segment( 655 + day: str, 656 + segment: str, 657 + stream: str | None, 658 + ) -> set[tuple[str, str, str]]: 659 + """Return active (facet, activity, since) tuples for a segment.""" 660 + seg_dir = _resolve_segment_dir(day, segment, stream) 661 + if not seg_dir: 662 + return set() 663 + 664 + keys: set[tuple[str, str, str]] = set() 665 + agents_dir = seg_dir / "agents" 666 + if not agents_dir.is_dir(): 667 + return keys 668 + 669 + for facet_dir in sorted(agents_dir.iterdir()): 670 + if not facet_dir.is_dir(): 671 + continue 672 + state_path = facet_dir / "activity_state.json" 673 + data = _load_json_file(state_path, None) 674 + if not isinstance(data, list): 675 + continue 676 + for item in data: 677 + if item.get("state") == "active": 678 + keys.add( 679 + ( 680 + facet_dir.name, 681 + str(item.get("activity", "")), 682 + str(item.get("since", "")), 683 + ) 684 + ) 685 + return keys 686 + 687 + 688 + def _detect_activity_state_change( 689 + day: str, 690 + segment: str, 691 + stream: str | None, 692 + ) -> bool: 693 + """Check whether activity_state changed for this segment.""" 694 + from muse.activity_state import find_previous_segment 695 + 696 + previous_segment = find_previous_segment(day, segment, stream=stream) 697 + if not previous_segment: 698 + return True 699 + 700 + current_keys = _active_activity_keys_for_segment(day, segment, stream) 701 + previous_keys = _active_activity_keys_for_segment(day, previous_segment, stream) 702 + return current_keys != previous_keys 703 + 704 + 307 705 def run_prompts_by_priority( 308 706 day: str, 309 707 segment: str | None, ··· 347 745 priority = config["priority"] # Required field, validated by get_muse_configs 348 746 priority_groups.setdefault(priority, []).append((name, config)) 349 747 748 + segment_density = "active" 749 + if segment: 750 + segment_density = _classify_segment_density(day, segment, stream) 751 + if segment_density != "active": 752 + logging.info("Segment %s classified as %s", segment, segment_density) 753 + 754 + activity_changed = False 755 + as_cache = _load_activity_state_cache(day) if segment else {} 756 + 350 757 # Pre-compute shared data for multi-facet prompts 351 758 day_formatted = iso_date(day) 352 759 input_summary = day_input_summary(day) ··· 392 799 _update_status(current_group_priority=priority) 393 800 logging.info(f"Starting priority {priority} ({len(prompts_list)} prompts)") 394 801 802 + if priority == 10 and segment_density != "active": 803 + logging.info( 804 + "Skipping priority-10 group (%d agents): segment %s is %s", 805 + len(prompts_list), 806 + segment, 807 + segment_density, 808 + ) 809 + continue 810 + 395 811 emit( 396 812 "group_started", 397 813 mode=target_schedule, ··· 406 822 if segment: 407 823 raw_facets = load_segment_facets(day, segment, stream=stream) 408 824 active_facets = set(f for f in raw_facets if f in enabled_facets) 825 + activity_state_llm_facets: set[str] = set() 409 826 410 827 spawned: list[ 411 828 tuple[str, str, dict, str | None] ··· 425 842 ) 426 843 continue 427 844 845 + skip, skip_reason = _should_skip_preflight( 846 + prompt_name, 847 + day=day, 848 + segment=segment, 849 + stream=stream, 850 + ) 851 + if skip: 852 + logging.info("Skipping %s: %s", prompt_name, skip_reason) 853 + continue 854 + 428 855 # Skip pulse when sol/pulse.md is already current for this segment 429 856 if prompt_name == "pulse" and segment and not refresh: 430 857 try: ··· 442 869 except Exception: 443 870 pass 444 871 872 + if not activity_changed: 873 + pulse_counter = _load_pulse_counter(day) 874 + if pulse_counter.get("count", 0) < 5: 875 + pulse_counter["count"] = int(pulse_counter.get("count", 0)) + 1 876 + pulse_counter["last_segment"] = segment 877 + _save_pulse_counter(day, pulse_counter) 878 + logging.info( 879 + "Skipping pulse: counter %d/6 for %s", 880 + pulse_counter["count"], 881 + segment, 882 + ) 883 + continue 884 + _save_pulse_counter(day, {"count": 0, "last_segment": segment}) 885 + else: 886 + _save_pulse_counter(day, {"count": 0, "last_segment": segment}) 887 + 445 888 try: 446 889 if config.get("multi_facet"): 447 890 always_run = config.get("always", False) ··· 455 898 continue 456 899 457 900 logging.info(f"Spawning {prompt_name} for facet: {facet_name}") 901 + if prompt_name == "activity_state" and segment: 902 + if not refresh and _try_carry_forward_activity_state( 903 + day=day, 904 + segment=segment, 905 + stream=stream, 906 + facet=facet_name, 907 + cache=as_cache, 908 + ): 909 + logging.info( 910 + "Carry-forward activity_state for %s", facet_name 911 + ) 912 + continue 913 + activity_state_llm_facets.add(facet_name) 458 914 459 915 # Always pass day for instructions.day context 460 916 request_config: dict = {"facet": facet_name, "day": day} ··· 614 1070 + group_failed, 615 1071 current_agents=[], 616 1072 ) 1073 + 1074 + if priority == 95 and segment: 1075 + _refresh_activity_state_cache( 1076 + day=day, 1077 + segment=segment, 1078 + stream=stream, 1079 + facets=sorted(activity_state_llm_facets), 1080 + cache=as_cache, 1081 + ) 1082 + _save_activity_state_cache(day, as_cache) 1083 + activity_changed = _detect_activity_state_change(day, segment, stream) 617 1084 618 1085 total_success += group_success 619 1086 total_failed += group_failed

Configure Feed

Configure Feed