personal memory agent
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

activity_state cold-start fix: default vocabulary + always_on meeting + post-hook validation

Three changes from CPO design opinion (activity-state-cold-start-design):

1. Mark meeting as always_on in DEFAULT_ACTIVITIES — meetings are as
universal as email and messaging, auto-included for any facet.

2. Return all 16 defaults when no activities.jsonl exists — gives
unconfigured facets full detection vocabulary out of the box.
Once a user configures explicit activities, those take over.

3. Validate activity IDs in post_process() — drops LLM output entries
whose activity field doesn't match the configured vocabulary,
preventing hallucinated activity types (e.g., facet name used as
activity ID) from polluting data. Logs warning on drops.

Also fixes add_activity_to_facet to check raw JSONL (not merged
defaults) when detecting duplicates, so adding activities still works
correctly with the cold-start default vocabulary.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

+321 -24
+31 -15
muse/activity_state.py
··· 529 529 logger.warning("activity_state output is not an array") 530 530 return None 531 531 532 + # Extract facet from output path 533 + facet = _extract_facet_from_output_path(output_path) 534 + 535 + # Validate activity IDs against configured vocabulary 536 + if facet: 537 + from think.activities import get_facet_activities 538 + 539 + valid_ids = {a["id"] for a in get_facet_activities(facet)} 540 + original_count = len(items) 541 + items = [item for item in items if item.get("activity", "") in valid_ids] 542 + dropped = original_count - len(items) 543 + if dropped: 544 + logger.warning( 545 + "Dropped %d activity entries with unrecognized activity IDs for facet %s", 546 + dropped, 547 + facet, 548 + ) 549 + 532 550 # Load previous state for since resolution 533 551 prev_active: list[dict] = [] 534 552 prev_ended: list[dict] = [] 535 - if day: 536 - facet = _extract_facet_from_output_path(output_path) 537 - if facet: 538 - previous_segment = find_previous_segment(day, segment, stream=stream) 539 - if previous_segment: 540 - prev_state, _ = load_previous_state( 541 - day, previous_segment, facet, stream=stream 542 - ) 543 - if prev_state: 544 - prev_active = [ 545 - item for item in prev_state if item.get("state") == "active" 546 - ] 547 - prev_ended = [ 548 - item for item in prev_state if item.get("state") == "ended" 549 - ] 553 + if day and facet: 554 + previous_segment = find_previous_segment(day, segment, stream=stream) 555 + if previous_segment: 556 + prev_state, _ = load_previous_state( 557 + day, previous_segment, facet, stream=stream 558 + ) 559 + if prev_state: 560 + prev_active = [ 561 + item for item in prev_state if item.get("state") == "active" 562 + ] 563 + prev_ended = [ 564 + item for item in prev_state if item.get("state") == "ended" 565 + ] 550 566 551 567 # Track which previous items have been claimed to avoid double-matching 552 568 claimed: set[int] = set()
+69 -3
tests/test_activities.py
··· 105 105 def test_facet_activities_empty(): 106 106 """Test loading activities from a facet with no activities file. 107 107 108 - Even with no activities.jsonl, always-on defaults are included. 108 + With no activities.jsonl, all defaults are returned as the vocabulary. 109 109 """ 110 110 from think.activities import DEFAULT_ACTIVITIES, get_facet_activities 111 111 112 112 activities = get_facet_activities("personal") 113 113 assert isinstance(activities, list) 114 114 115 - # Should contain exactly the always-on defaults 115 + # Should contain all defaults (full vocabulary for unconfigured facets) 116 + all_default_ids = {a["id"] for a in DEFAULT_ACTIVITIES} 117 + assert {a["id"] for a in activities} == all_default_ids 118 + 119 + 120 + def test_meeting_is_always_on(): 121 + """Test that meeting is marked always_on in DEFAULT_ACTIVITIES.""" 122 + from think.activities import DEFAULT_ACTIVITIES 123 + 116 124 always_on_ids = {a["id"] for a in DEFAULT_ACTIVITIES if a.get("always_on")} 117 - assert {a["id"] for a in activities} == always_on_ids 125 + assert "meeting" in always_on_ids 126 + assert "email" in always_on_ids 127 + assert "messaging" in always_on_ids 128 + 129 + 130 + def test_unconfigured_facet_returns_all_defaults(): 131 + """Test that a facet with no activities.jsonl gets all 16 defaults.""" 132 + from think.activities import DEFAULT_ACTIVITIES, get_facet_activities 133 + 134 + with tempfile.TemporaryDirectory() as tmpdir: 135 + original_path = os.environ.get("JOURNAL_PATH") 136 + os.environ["JOURNAL_PATH"] = tmpdir 137 + 138 + facet_path = Path(tmpdir) / "facets" / "new_facet" 139 + facet_path.mkdir(parents=True) 140 + 141 + try: 142 + activities = get_facet_activities("new_facet") 143 + assert len(activities) == len(DEFAULT_ACTIVITIES) 144 + 145 + activity_ids = {a["id"] for a in activities} 146 + default_ids = {a["id"] for a in DEFAULT_ACTIVITIES} 147 + assert activity_ids == default_ids 148 + 149 + # All should be marked as not custom 150 + for activity in activities: 151 + assert activity.get("custom") is False 152 + 153 + finally: 154 + if original_path: 155 + os.environ["JOURNAL_PATH"] = original_path 156 + 157 + 158 + def test_configured_facet_includes_meeting_always_on(): 159 + """Test that a facet with explicit activities auto-includes meeting.""" 160 + from think.activities import get_facet_activities, save_facet_activities 161 + 162 + with tempfile.TemporaryDirectory() as tmpdir: 163 + original_path = os.environ.get("JOURNAL_PATH") 164 + os.environ["JOURNAL_PATH"] = tmpdir 165 + 166 + facet_path = Path(tmpdir) / "facets" / "work" 167 + facet_path.mkdir(parents=True) 168 + 169 + try: 170 + # Save only coding — meeting, email, messaging should auto-include 171 + save_facet_activities("work", [{"id": "coding"}]) 172 + 173 + activities = get_facet_activities("work") 174 + activity_ids = {a["id"] for a in activities} 175 + 176 + assert "coding" in activity_ids 177 + assert "meeting" in activity_ids 178 + assert "email" in activity_ids 179 + assert "messaging" in activity_ids 180 + 181 + finally: 182 + if original_path: 183 + os.environ["JOURNAL_PATH"] = original_path 118 184 119 185 120 186 def test_facet_activities_roundtrip():
+204
tests/test_activity_state.py
··· 1392 1392 items = json.loads(result) 1393 1393 assert len(items) == 1 1394 1394 assert items[0]["id"] == "coding_143000_300" 1395 + 1396 + 1397 + class TestActivityIdValidation: 1398 + """Tests for post-hook activity ID validation against configured vocabulary.""" 1399 + 1400 + def test_drops_unrecognized_activity_ids(self): 1401 + """Post-hook drops LLM output entries with activity IDs not in config.""" 1402 + from unittest.mock import patch 1403 + 1404 + from muse.activity_state import post_process 1405 + 1406 + with tempfile.TemporaryDirectory() as tmpdir: 1407 + original_path = os.environ.get("JOURNAL_PATH") 1408 + os.environ["JOURNAL_PATH"] = tmpdir 1409 + 1410 + try: 1411 + # Create facet with only coding and meeting configured 1412 + facet_dir = Path(tmpdir) / "facets" / "work" / "activities" 1413 + facet_dir.mkdir(parents=True) 1414 + (facet_dir / "activities.jsonl").write_text( 1415 + '{"id": "coding"}\n{"id": "meeting"}' 1416 + ) 1417 + 1418 + llm_output = json.dumps( 1419 + [ 1420 + { 1421 + "activity": "coding", 1422 + "state": "new", 1423 + "description": "Writing code", 1424 + "level": "high", 1425 + }, 1426 + { 1427 + "activity": "Technical Work", 1428 + "state": "new", 1429 + "description": "Hallucinated from facet name", 1430 + "level": "medium", 1431 + }, 1432 + ] 1433 + ) 1434 + 1435 + context = { 1436 + "segment": "143000_300", 1437 + "output_path": f"{tmpdir}/20260130/143000_300/agents/work/activity_state.json", 1438 + } 1439 + 1440 + with patch("muse.activity_state.callosum_send"): 1441 + result = post_process(llm_output, context) 1442 + 1443 + items = json.loads(result) 1444 + assert len(items) == 1 1445 + assert items[0]["activity"] == "coding" 1446 + 1447 + finally: 1448 + if original_path: 1449 + os.environ["JOURNAL_PATH"] = original_path 1450 + 1451 + def test_logs_warning_on_dropped_activities(self, caplog): 1452 + """Post-hook logs a warning when dropping unrecognized activity IDs.""" 1453 + import logging 1454 + from unittest.mock import patch 1455 + 1456 + from muse.activity_state import post_process 1457 + 1458 + with tempfile.TemporaryDirectory() as tmpdir: 1459 + original_path = os.environ.get("JOURNAL_PATH") 1460 + os.environ["JOURNAL_PATH"] = tmpdir 1461 + 1462 + try: 1463 + facet_dir = Path(tmpdir) / "facets" / "work" / "activities" 1464 + facet_dir.mkdir(parents=True) 1465 + (facet_dir / "activities.jsonl").write_text('{"id": "coding"}') 1466 + 1467 + llm_output = json.dumps( 1468 + [ 1469 + { 1470 + "activity": "meetings", 1471 + "state": "new", 1472 + "description": "Hallucinated", 1473 + "level": "medium", 1474 + }, 1475 + ] 1476 + ) 1477 + 1478 + context = { 1479 + "segment": "143000_300", 1480 + "output_path": f"{tmpdir}/20260130/143000_300/agents/work/activity_state.json", 1481 + } 1482 + 1483 + with caplog.at_level(logging.WARNING, logger="muse.activity_state"): 1484 + with patch("muse.activity_state.callosum_send"): 1485 + post_process(llm_output, context) 1486 + 1487 + assert "Dropped 1 activity entries" in caplog.text 1488 + assert "work" in caplog.text 1489 + 1490 + finally: 1491 + if original_path: 1492 + os.environ["JOURNAL_PATH"] = original_path 1493 + 1494 + def test_valid_activity_ids_pass_through(self): 1495 + """Post-hook preserves entries with valid activity IDs.""" 1496 + from unittest.mock import patch 1497 + 1498 + from muse.activity_state import post_process 1499 + 1500 + with tempfile.TemporaryDirectory() as tmpdir: 1501 + original_path = os.environ.get("JOURNAL_PATH") 1502 + os.environ["JOURNAL_PATH"] = tmpdir 1503 + 1504 + try: 1505 + facet_dir = Path(tmpdir) / "facets" / "work" / "activities" 1506 + facet_dir.mkdir(parents=True) 1507 + (facet_dir / "activities.jsonl").write_text( 1508 + '{"id": "coding"}\n{"id": "meeting"}' 1509 + ) 1510 + 1511 + llm_output = json.dumps( 1512 + [ 1513 + { 1514 + "activity": "coding", 1515 + "state": "new", 1516 + "description": "Writing code", 1517 + "level": "high", 1518 + }, 1519 + { 1520 + "activity": "meeting", 1521 + "state": "new", 1522 + "description": "Standup", 1523 + "level": "medium", 1524 + }, 1525 + { 1526 + "activity": "email", 1527 + "state": "new", 1528 + "description": "Checking inbox", 1529 + "level": "low", 1530 + }, 1531 + ] 1532 + ) 1533 + 1534 + context = { 1535 + "segment": "143000_300", 1536 + "output_path": f"{tmpdir}/20260130/143000_300/agents/work/activity_state.json", 1537 + } 1538 + 1539 + with patch("muse.activity_state.callosum_send"): 1540 + result = post_process(llm_output, context) 1541 + 1542 + items = json.loads(result) 1543 + # All three are valid: coding + meeting explicit, email always_on 1544 + assert len(items) == 3 1545 + activity_ids = {item["activity"] for item in items} 1546 + assert activity_ids == {"coding", "meeting", "email"} 1547 + 1548 + finally: 1549 + if original_path: 1550 + os.environ["JOURNAL_PATH"] = original_path 1551 + 1552 + def test_unconfigured_facet_allows_all_defaults(self): 1553 + """Post-hook allows all default activity IDs for unconfigured facets.""" 1554 + from unittest.mock import patch 1555 + 1556 + from muse.activity_state import post_process 1557 + 1558 + with tempfile.TemporaryDirectory() as tmpdir: 1559 + original_path = os.environ.get("JOURNAL_PATH") 1560 + os.environ["JOURNAL_PATH"] = tmpdir 1561 + 1562 + try: 1563 + # Create facet dir but no activities.jsonl 1564 + facet_dir = Path(tmpdir) / "facets" / "new_facet" 1565 + facet_dir.mkdir(parents=True) 1566 + 1567 + llm_output = json.dumps( 1568 + [ 1569 + { 1570 + "activity": "meeting", 1571 + "state": "new", 1572 + "description": "Team sync", 1573 + "level": "high", 1574 + }, 1575 + { 1576 + "activity": "coding", 1577 + "state": "new", 1578 + "description": "Writing code", 1579 + "level": "medium", 1580 + }, 1581 + ] 1582 + ) 1583 + 1584 + context = { 1585 + "segment": "143000_300", 1586 + "output_path": f"{tmpdir}/20260130/143000_300/agents/new_facet/activity_state.json", 1587 + } 1588 + 1589 + with patch("muse.activity_state.callosum_send"): 1590 + result = post_process(llm_output, context) 1591 + 1592 + items = json.loads(result) 1593 + # Both are valid defaults 1594 + assert len(items) == 2 1595 + 1596 + finally: 1597 + if original_path: 1598 + os.environ["JOURNAL_PATH"] = original_path
+17 -6
think/activities.py
··· 34 34 "name": "Meetings", 35 35 "description": "Video calls, in-person meetings, and conferences", 36 36 "icon": "📅", 37 + "always_on": True, 37 38 "instructions": ( 38 39 "Levels: high=actively speaking/presenting, medium=listening attentively," 39 40 " low=muted or multitasking during call." ··· 298 299 # Load facet-specific activities 299 300 facet_activities = _load_activities_jsonl(facet) 300 301 302 + # If no explicit activities configured, use all defaults as the vocabulary 303 + if not facet_activities: 304 + result = [] 305 + for default in DEFAULT_ACTIVITIES: 306 + activity = dict(default) 307 + activity["custom"] = False 308 + activity.setdefault("priority", "normal") 309 + result.append(activity) 310 + return result 311 + 301 312 seen_ids: set[str] = set() 302 313 result = [] 303 314 for fa in facet_activities: ··· 469 480 Returns: 470 481 The added activity dict 471 482 """ 472 - # Check if already attached 473 - existing = get_facet_activities(facet) 474 - for activity in existing: 475 - if activity.get("id") == activity_id: 476 - # Already attached - return existing 477 - return activity 483 + # Check if already explicitly attached (in JSONL, not just defaults) 484 + existing_raw = _load_activities_jsonl(facet) 485 + for entry in existing_raw: 486 + if entry.get("id") == activity_id: 487 + # Already attached - return full activity with defaults merged 488 + return get_activity_by_id(facet, activity_id) or entry 478 489 479 490 # Build new activity entry 480 491 defaults_by_id = {a["id"]: a for a in DEFAULT_ACTIVITIES}