activity_state: drop redundant ended re-reports, promote mis-tagged

+1 -1

muse/activity_state.md

··· 57 57 58 58 1. **Only detect configured activities** — Ignore activity that doesn't match the facet's list 59 59 2. **Active vs. visible** — Only report an activity if the user is actively interacting with it during this segment. An application merely visible on screen but unchanged is NOT active. Look for evidence of interaction: typing, clicking, new content, spoken discussion. 60 - 3. **Report endings** — If a previously active activity is no longer happening, always report it as `"ended"` so it can be tracked 60 + 3. **Report endings** — If an activity listed as **active** in the Previous State is no longer happening, report it as `"ended"`. Only report endings for activities that were active — do not re-report activities that already ended previously. 61 61 4. **Same-type transitions** — If a meeting ends and a different meeting starts, report both: the old one as `"ended"` and the new one as `"new"` 62 62 5. **Update descriptions** — As activities continue, refine the description with new context 63 63 6. **Empty is valid** — `[]` is correct when no activities are detected

+68 -12

muse/activity_state.py

··· 12 12 1. Stamps `since` (segment key) from tooling — never from LLM 13 13 2. Normalizes `state` from LLM values (continuing/new) to stored values (active) 14 14 3. Matches continuing/ended activities to previous state via activity type + fuzzy description 15 + 4. Drops redundant ended re-reports; promotes unmatched ended with novel descriptions to active 15 16 """ 16 17 17 18 import json ··· 395 396 return matches[0] 396 397 397 398 399 + def _is_redundant_ended( 400 + activity_id: str, 401 + description: str, 402 + prev_ended: list[dict], 403 + ) -> bool: 404 + """Check if an ended activity is a redundant re-report. 405 + 406 + Returns True if a matching activity already ended in the previous segment 407 + (same type with similar or empty description), meaning this is just the 408 + LLM re-reporting an ending that was already recorded. 409 + """ 410 + ended_same_type = [e for e in prev_ended if e.get("activity") == activity_id] 411 + if not ended_same_type: 412 + return False 413 + 414 + # If only one match and description is close enough, it's redundant 415 + if not description: 416 + return True 417 + 418 + try: 419 + from rapidfuzz import fuzz 420 + 421 + for prev in ended_same_type: 422 + prev_desc = prev.get("description", "") 423 + if not prev_desc: 424 + return True 425 + if fuzz.token_sort_ratio(description, prev_desc) >= 70: 426 + return True 427 + except ImportError: 428 + # Without fuzzy matching, fall back to exact substring check 429 + for prev in ended_same_type: 430 + prev_desc = prev.get("description", "") 431 + if not prev_desc: 432 + return True 433 + if description.lower() in prev_desc.lower() or prev_desc.lower() in description.lower(): 434 + return True 435 + 436 + return False 437 + 438 + 398 439 def post_process(result: str, context: dict) -> str | None: 399 440 """Resolve timing metadata on LLM activity state output. 400 441 ··· 431 472 432 473 # Load previous state for since resolution 433 474 prev_active: list[dict] = [] 475 + prev_ended: list[dict] = [] 434 476 if day: 435 477 facet = _extract_facet_from_output_path(output_path) 436 478 if facet: ··· 440 482 if prev_state: 441 483 prev_active = [ 442 484 item for item in prev_state if item.get("state") == "active" 485 + ] 486 + prev_ended = [ 487 + item for item in prev_state if item.get("state") == "ended" 443 488 ] 444 489 445 490 # Track which previous items have been claimed to avoid double-matching ··· 479 524 if result: 480 525 idx, matched = result 481 526 claimed.add(idx) 482 - since = matched.get("since", segment) 483 - else: 484 - since = segment 485 - 486 - resolved.append( 487 - { 488 - "activity": activity_id, 489 - "state": "ended", 490 - "since": since, 491 - "description": description, 492 - } 493 - ) 527 + resolved.append( 528 + { 529 + "activity": activity_id, 530 + "state": "ended", 531 + "since": matched.get("since", segment), 532 + "description": description, 533 + } 534 + ) 535 + elif description and not _is_redundant_ended( 536 + activity_id, description, prev_ended 537 + ): 538 + # No active match but has a novel description — likely 539 + # a real activity the LLM mis-tagged as ended; treat as new 540 + resolved.append( 541 + { 542 + "activity": activity_id, 543 + "state": "active", 544 + "since": segment, 545 + "description": description, 546 + "level": item.get("level", "medium"), 547 + } 548 + ) 549 + # else: redundant re-report of already ended activity — drop 494 550 495 551 else: 496 552 # "new" or any unrecognized state — stamp current segment

+152 -8

tests/test_activity_state.py

··· 556 556 if original_path: 557 557 os.environ["JOURNAL_PATH"] = original_path 558 558 559 - def test_no_previous_state_all_new(self): 559 + def test_no_previous_state_continuing_becomes_new(self): 560 560 from muse.activity_state import post_process 561 561 562 562 llm_output = json.dumps( ··· 567 567 "description": "Writing code", 568 568 "level": "high", 569 569 }, 570 + ] 571 + ) 572 + 573 + # No day/output_path — no previous state available 574 + result = post_process(llm_output, {"segment": "143000_300"}) 575 + items = json.loads(result) 576 + 577 + # "continuing" with no match falls back to current segment 578 + assert items[0]["since"] == "143000_300" 579 + assert items[0]["state"] == "active" 580 + 581 + def test_unmatched_ended_with_novel_description_becomes_active(self): 582 + """Ended activity with no previous active match but novel description 583 + is treated as a new active activity (LLM mis-tagged).""" 584 + from muse.activity_state import post_process 585 + 586 + llm_output = json.dumps( 587 + [ 570 588 { 571 589 "activity": "meeting", 572 590 "state": "ended", 573 - "description": "Standup ended", 591 + "description": "Quick sync about deployment", 592 + "level": "medium", 574 593 }, 575 594 ] 576 595 ) 577 596 578 - # No day/output_path — no previous state available 597 + # No previous state — no active match, no ended match 579 598 result = post_process(llm_output, {"segment": "143000_300"}) 580 599 items = json.loads(result) 581 600 582 - # "continuing" with no match falls back to current segment 601 + assert len(items) == 1 602 + assert items[0]["state"] == "active" 583 603 assert items[0]["since"] == "143000_300" 584 - assert items[0]["state"] == "active" 604 + assert items[0]["level"] == "medium" 585 605 586 - # "ended" with no match also uses current segment 587 - assert items[1]["since"] == "143000_300" 588 - assert items[1]["state"] == "ended" 606 + def test_unmatched_ended_with_empty_description_dropped(self): 607 + """Ended activity with no previous active match and no description 608 + is dropped as redundant.""" 609 + from muse.activity_state import post_process 610 + 611 + llm_output = json.dumps( 612 + [ 613 + { 614 + "activity": "email", 615 + "state": "ended", 616 + "description": "", 617 + }, 618 + ] 619 + ) 620 + 621 + result = post_process(llm_output, {"segment": "143000_300"}) 622 + items = json.loads(result) 623 + assert len(items) == 0 624 + 625 + def test_unmatched_ended_matching_prev_ended_dropped(self): 626 + """Ended activity that matches a previously ended activity is dropped.""" 627 + from muse.activity_state import post_process 628 + 629 + with tempfile.TemporaryDirectory() as tmpdir: 630 + original_path = os.environ.get("JOURNAL_PATH") 631 + os.environ["JOURNAL_PATH"] = tmpdir 632 + 633 + try: 634 + day_dir = Path(tmpdir) / "20260130" 635 + day_dir.mkdir() 636 + 637 + # Previous segment — email already ended 638 + prev_dir = day_dir / "100000_300" 639 + prev_dir.mkdir() 640 + prev_state = [ 641 + { 642 + "activity": "email", 643 + "state": "ended", 644 + "since": "090000_300", 645 + "description": "Replied to boss", 646 + } 647 + ] 648 + (prev_dir / "activity_state_work.json").write_text( 649 + json.dumps(prev_state) 650 + ) 651 + 652 + (day_dir / "100500_300").mkdir() 653 + 654 + llm_output = json.dumps( 655 + [ 656 + { 657 + "activity": "email", 658 + "state": "ended", 659 + "description": "Replied to boss", 660 + } 661 + ] 662 + ) 663 + 664 + context = { 665 + "day": "20260130", 666 + "segment": "100500_300", 667 + "output_path": f"{tmpdir}/20260130/100500_300/activity_state_work.json", 668 + } 669 + 670 + result = post_process(llm_output, context) 671 + items = json.loads(result) 672 + assert len(items) == 0 # Dropped as redundant 673 + 674 + finally: 675 + if original_path: 676 + os.environ["JOURNAL_PATH"] = original_path 677 + 678 + def test_unmatched_ended_novel_desc_with_prev_ended_becomes_active(self): 679 + """Ended activity with novel description (different from prev ended) 680 + is promoted to active.""" 681 + from muse.activity_state import post_process 682 + 683 + with tempfile.TemporaryDirectory() as tmpdir: 684 + original_path = os.environ.get("JOURNAL_PATH") 685 + os.environ["JOURNAL_PATH"] = tmpdir 686 + 687 + try: 688 + day_dir = Path(tmpdir) / "20260130" 689 + day_dir.mkdir() 690 + 691 + # Previous segment — email ended with different description 692 + prev_dir = day_dir / "100000_300" 693 + prev_dir.mkdir() 694 + prev_state = [ 695 + { 696 + "activity": "email", 697 + "state": "ended", 698 + "since": "090000_300", 699 + "description": "Replied to boss", 700 + } 701 + ] 702 + (prev_dir / "activity_state_work.json").write_text( 703 + json.dumps(prev_state) 704 + ) 705 + 706 + (day_dir / "100500_300").mkdir() 707 + 708 + llm_output = json.dumps( 709 + [ 710 + { 711 + "activity": "email", 712 + "state": "ended", 713 + "description": "Composing proposal to new client", 714 + } 715 + ] 716 + ) 717 + 718 + context = { 719 + "day": "20260130", 720 + "segment": "100500_300", 721 + "output_path": f"{tmpdir}/20260130/100500_300/activity_state_work.json", 722 + } 723 + 724 + result = post_process(llm_output, context) 725 + items = json.loads(result) 726 + assert len(items) == 1 727 + assert items[0]["state"] == "active" 728 + assert items[0]["since"] == "100500_300" 729 + 730 + finally: 731 + if original_path: 732 + os.environ["JOURNAL_PATH"] = original_path 589 733 590 734 def test_empty_array_passthrough(self): 591 735 from muse.activity_state import post_process

Configure Feed

Configure Feed