activity_state cold-start fix: default vocabulary + always_on meeting + post-hook validation

+31 -15

muse/activity_state.py

··· 529 529 logger.warning("activity_state output is not an array") 530 530 return None 531 531 532 + # Extract facet from output path 533 + facet = _extract_facet_from_output_path(output_path) 534 + 535 + # Validate activity IDs against configured vocabulary 536 + if facet: 537 + from think.activities import get_facet_activities 538 + 539 + valid_ids = {a["id"] for a in get_facet_activities(facet)} 540 + original_count = len(items) 541 + items = [item for item in items if item.get("activity", "") in valid_ids] 542 + dropped = original_count - len(items) 543 + if dropped: 544 + logger.warning( 545 + "Dropped %d activity entries with unrecognized activity IDs for facet %s", 546 + dropped, 547 + facet, 548 + ) 549 + 532 550 # Load previous state for since resolution 533 551 prev_active: list[dict] = [] 534 552 prev_ended: list[dict] = [] 535 - if day: 536 - facet = _extract_facet_from_output_path(output_path) 537 - if facet: 538 - previous_segment = find_previous_segment(day, segment, stream=stream) 539 - if previous_segment: 540 - prev_state, _ = load_previous_state( 541 - day, previous_segment, facet, stream=stream 542 - ) 543 - if prev_state: 544 - prev_active = [ 545 - item for item in prev_state if item.get("state") == "active" 546 - ] 547 - prev_ended = [ 548 - item for item in prev_state if item.get("state") == "ended" 549 - ] 553 + if day and facet: 554 + previous_segment = find_previous_segment(day, segment, stream=stream) 555 + if previous_segment: 556 + prev_state, _ = load_previous_state( 557 + day, previous_segment, facet, stream=stream 558 + ) 559 + if prev_state: 560 + prev_active = [ 561 + item for item in prev_state if item.get("state") == "active" 562 + ] 563 + prev_ended = [ 564 + item for item in prev_state if item.get("state") == "ended" 565 + ] 550 566 551 567 # Track which previous items have been claimed to avoid double-matching 552 568 claimed: set[int] = set()

+69 -3

tests/test_activities.py

··· 105 105 def test_facet_activities_empty(): 106 106 """Test loading activities from a facet with no activities file. 107 107 108 - Even with no activities.jsonl, always-on defaults are included. 108 + With no activities.jsonl, all defaults are returned as the vocabulary. 109 109 """ 110 110 from think.activities import DEFAULT_ACTIVITIES, get_facet_activities 111 111 112 112 activities = get_facet_activities("personal") 113 113 assert isinstance(activities, list) 114 114 115 - # Should contain exactly the always-on defaults 115 + # Should contain all defaults (full vocabulary for unconfigured facets) 116 + all_default_ids = {a["id"] for a in DEFAULT_ACTIVITIES} 117 + assert {a["id"] for a in activities} == all_default_ids 118 + 119 + 120 + def test_meeting_is_always_on(): 121 + """Test that meeting is marked always_on in DEFAULT_ACTIVITIES.""" 122 + from think.activities import DEFAULT_ACTIVITIES 123 + 116 124 always_on_ids = {a["id"] for a in DEFAULT_ACTIVITIES if a.get("always_on")} 117 - assert {a["id"] for a in activities} == always_on_ids 125 + assert "meeting" in always_on_ids 126 + assert "email" in always_on_ids 127 + assert "messaging" in always_on_ids 128 + 129 + 130 + def test_unconfigured_facet_returns_all_defaults(): 131 + """Test that a facet with no activities.jsonl gets all 16 defaults.""" 132 + from think.activities import DEFAULT_ACTIVITIES, get_facet_activities 133 + 134 + with tempfile.TemporaryDirectory() as tmpdir: 135 + original_path = os.environ.get("JOURNAL_PATH") 136 + os.environ["JOURNAL_PATH"] = tmpdir 137 + 138 + facet_path = Path(tmpdir) / "facets" / "new_facet" 139 + facet_path.mkdir(parents=True) 140 + 141 + try: 142 + activities = get_facet_activities("new_facet") 143 + assert len(activities) == len(DEFAULT_ACTIVITIES) 144 + 145 + activity_ids = {a["id"] for a in activities} 146 + default_ids = {a["id"] for a in DEFAULT_ACTIVITIES} 147 + assert activity_ids == default_ids 148 + 149 + # All should be marked as not custom 150 + for activity in activities: 151 + assert activity.get("custom") is False 152 + 153 + finally: 154 + if original_path: 155 + os.environ["JOURNAL_PATH"] = original_path 156 + 157 + 158 + def test_configured_facet_includes_meeting_always_on(): 159 + """Test that a facet with explicit activities auto-includes meeting.""" 160 + from think.activities import get_facet_activities, save_facet_activities 161 + 162 + with tempfile.TemporaryDirectory() as tmpdir: 163 + original_path = os.environ.get("JOURNAL_PATH") 164 + os.environ["JOURNAL_PATH"] = tmpdir 165 + 166 + facet_path = Path(tmpdir) / "facets" / "work" 167 + facet_path.mkdir(parents=True) 168 + 169 + try: 170 + # Save only coding — meeting, email, messaging should auto-include 171 + save_facet_activities("work", [{"id": "coding"}]) 172 + 173 + activities = get_facet_activities("work") 174 + activity_ids = {a["id"] for a in activities} 175 + 176 + assert "coding" in activity_ids 177 + assert "meeting" in activity_ids 178 + assert "email" in activity_ids 179 + assert "messaging" in activity_ids 180 + 181 + finally: 182 + if original_path: 183 + os.environ["JOURNAL_PATH"] = original_path 118 184 119 185 120 186 def test_facet_activities_roundtrip():

+204

tests/test_activity_state.py

··· 1392 1392 items = json.loads(result) 1393 1393 assert len(items) == 1 1394 1394 assert items[0]["id"] == "coding_143000_300" 1395 + 1396 + 1397 + class TestActivityIdValidation: 1398 + """Tests for post-hook activity ID validation against configured vocabulary.""" 1399 + 1400 + def test_drops_unrecognized_activity_ids(self): 1401 + """Post-hook drops LLM output entries with activity IDs not in config.""" 1402 + from unittest.mock import patch 1403 + 1404 + from muse.activity_state import post_process 1405 + 1406 + with tempfile.TemporaryDirectory() as tmpdir: 1407 + original_path = os.environ.get("JOURNAL_PATH") 1408 + os.environ["JOURNAL_PATH"] = tmpdir 1409 + 1410 + try: 1411 + # Create facet with only coding and meeting configured 1412 + facet_dir = Path(tmpdir) / "facets" / "work" / "activities" 1413 + facet_dir.mkdir(parents=True) 1414 + (facet_dir / "activities.jsonl").write_text( 1415 + '{"id": "coding"}\n{"id": "meeting"}' 1416 + ) 1417 + 1418 + llm_output = json.dumps( 1419 + [ 1420 + { 1421 + "activity": "coding", 1422 + "state": "new", 1423 + "description": "Writing code", 1424 + "level": "high", 1425 + }, 1426 + { 1427 + "activity": "Technical Work", 1428 + "state": "new", 1429 + "description": "Hallucinated from facet name", 1430 + "level": "medium", 1431 + }, 1432 + ] 1433 + ) 1434 + 1435 + context = { 1436 + "segment": "143000_300", 1437 + "output_path": f"{tmpdir}/20260130/143000_300/agents/work/activity_state.json", 1438 + } 1439 + 1440 + with patch("muse.activity_state.callosum_send"): 1441 + result = post_process(llm_output, context) 1442 + 1443 + items = json.loads(result) 1444 + assert len(items) == 1 1445 + assert items[0]["activity"] == "coding" 1446 + 1447 + finally: 1448 + if original_path: 1449 + os.environ["JOURNAL_PATH"] = original_path 1450 + 1451 + def test_logs_warning_on_dropped_activities(self, caplog): 1452 + """Post-hook logs a warning when dropping unrecognized activity IDs.""" 1453 + import logging 1454 + from unittest.mock import patch 1455 + 1456 + from muse.activity_state import post_process 1457 + 1458 + with tempfile.TemporaryDirectory() as tmpdir: 1459 + original_path = os.environ.get("JOURNAL_PATH") 1460 + os.environ["JOURNAL_PATH"] = tmpdir 1461 + 1462 + try: 1463 + facet_dir = Path(tmpdir) / "facets" / "work" / "activities" 1464 + facet_dir.mkdir(parents=True) 1465 + (facet_dir / "activities.jsonl").write_text('{"id": "coding"}') 1466 + 1467 + llm_output = json.dumps( 1468 + [ 1469 + { 1470 + "activity": "meetings", 1471 + "state": "new", 1472 + "description": "Hallucinated", 1473 + "level": "medium", 1474 + }, 1475 + ] 1476 + ) 1477 + 1478 + context = { 1479 + "segment": "143000_300", 1480 + "output_path": f"{tmpdir}/20260130/143000_300/agents/work/activity_state.json", 1481 + } 1482 + 1483 + with caplog.at_level(logging.WARNING, logger="muse.activity_state"): 1484 + with patch("muse.activity_state.callosum_send"): 1485 + post_process(llm_output, context) 1486 + 1487 + assert "Dropped 1 activity entries" in caplog.text 1488 + assert "work" in caplog.text 1489 + 1490 + finally: 1491 + if original_path: 1492 + os.environ["JOURNAL_PATH"] = original_path 1493 + 1494 + def test_valid_activity_ids_pass_through(self): 1495 + """Post-hook preserves entries with valid activity IDs.""" 1496 + from unittest.mock import patch 1497 + 1498 + from muse.activity_state import post_process 1499 + 1500 + with tempfile.TemporaryDirectory() as tmpdir: 1501 + original_path = os.environ.get("JOURNAL_PATH") 1502 + os.environ["JOURNAL_PATH"] = tmpdir 1503 + 1504 + try: 1505 + facet_dir = Path(tmpdir) / "facets" / "work" / "activities" 1506 + facet_dir.mkdir(parents=True) 1507 + (facet_dir / "activities.jsonl").write_text( 1508 + '{"id": "coding"}\n{"id": "meeting"}' 1509 + ) 1510 + 1511 + llm_output = json.dumps( 1512 + [ 1513 + { 1514 + "activity": "coding", 1515 + "state": "new", 1516 + "description": "Writing code", 1517 + "level": "high", 1518 + }, 1519 + { 1520 + "activity": "meeting", 1521 + "state": "new", 1522 + "description": "Standup", 1523 + "level": "medium", 1524 + }, 1525 + { 1526 + "activity": "email", 1527 + "state": "new", 1528 + "description": "Checking inbox", 1529 + "level": "low", 1530 + }, 1531 + ] 1532 + ) 1533 + 1534 + context = { 1535 + "segment": "143000_300", 1536 + "output_path": f"{tmpdir}/20260130/143000_300/agents/work/activity_state.json", 1537 + } 1538 + 1539 + with patch("muse.activity_state.callosum_send"): 1540 + result = post_process(llm_output, context) 1541 + 1542 + items = json.loads(result) 1543 + # All three are valid: coding + meeting explicit, email always_on 1544 + assert len(items) == 3 1545 + activity_ids = {item["activity"] for item in items} 1546 + assert activity_ids == {"coding", "meeting", "email"} 1547 + 1548 + finally: 1549 + if original_path: 1550 + os.environ["JOURNAL_PATH"] = original_path 1551 + 1552 + def test_unconfigured_facet_allows_all_defaults(self): 1553 + """Post-hook allows all default activity IDs for unconfigured facets.""" 1554 + from unittest.mock import patch 1555 + 1556 + from muse.activity_state import post_process 1557 + 1558 + with tempfile.TemporaryDirectory() as tmpdir: 1559 + original_path = os.environ.get("JOURNAL_PATH") 1560 + os.environ["JOURNAL_PATH"] = tmpdir 1561 + 1562 + try: 1563 + # Create facet dir but no activities.jsonl 1564 + facet_dir = Path(tmpdir) / "facets" / "new_facet" 1565 + facet_dir.mkdir(parents=True) 1566 + 1567 + llm_output = json.dumps( 1568 + [ 1569 + { 1570 + "activity": "meeting", 1571 + "state": "new", 1572 + "description": "Team sync", 1573 + "level": "high", 1574 + }, 1575 + { 1576 + "activity": "coding", 1577 + "state": "new", 1578 + "description": "Writing code", 1579 + "level": "medium", 1580 + }, 1581 + ] 1582 + ) 1583 + 1584 + context = { 1585 + "segment": "143000_300", 1586 + "output_path": f"{tmpdir}/20260130/143000_300/agents/new_facet/activity_state.json", 1587 + } 1588 + 1589 + with patch("muse.activity_state.callosum_send"): 1590 + result = post_process(llm_output, context) 1591 + 1592 + items = json.loads(result) 1593 + # Both are valid defaults 1594 + assert len(items) == 2 1595 + 1596 + finally: 1597 + if original_path: 1598 + os.environ["JOURNAL_PATH"] = original_path

+17 -6

think/activities.py

··· 34 34 "name": "Meetings", 35 35 "description": "Video calls, in-person meetings, and conferences", 36 36 "icon": "📅", 37 + "always_on": True, 37 38 "instructions": ( 38 39 "Levels: high=actively speaking/presenting, medium=listening attentively," 39 40 " low=muted or multitasking during call." ··· 298 299 # Load facet-specific activities 299 300 facet_activities = _load_activities_jsonl(facet) 300 301 302 + # If no explicit activities configured, use all defaults as the vocabulary 303 + if not facet_activities: 304 + result = [] 305 + for default in DEFAULT_ACTIVITIES: 306 + activity = dict(default) 307 + activity["custom"] = False 308 + activity.setdefault("priority", "normal") 309 + result.append(activity) 310 + return result 311 + 301 312 seen_ids: set[str] = set() 302 313 result = [] 303 314 for fa in facet_activities: ··· 469 480 Returns: 470 481 The added activity dict 471 482 """ 472 - # Check if already attached 473 - existing = get_facet_activities(facet) 474 - for activity in existing: 475 - if activity.get("id") == activity_id: 476 - # Already attached - return existing 477 - return activity 483 + # Check if already explicitly attached (in JSONL, not just defaults) 484 + existing_raw = _load_activities_jsonl(facet) 485 + for entry in existing_raw: 486 + if entry.get("id") == activity_id: 487 + # Already attached - return full activity with defaults merged 488 + return get_activity_by_id(facet, activity_id) or entry 478 489 479 490 # Build new activity entry 480 491 defaults_by_id = {a["id"]: a for a in DEFAULT_ACTIVITIES}

Configure Feed

Configure Feed