feat: entity enrichment observations from Granola importer

+208

tests/test_importer_granola.py

··· 587 587 assert "Total:" in captured.out 588 588 assert "Available to import:" in captured.out 589 589 assert "Q1 Planning" in captured.out # title shown in available list 590 + 591 + 592 + # --------------------------------------------------------------------------- 593 + # Entity enrichment observations 594 + # --------------------------------------------------------------------------- 595 + 596 + ENRICHED_TRANSCRIPT = dedent("""\ 597 + --- 598 + doc_id: doc_enriched_001 599 + source: granola 600 + created_at: "2025-10-28T15:04:05Z" 601 + remote_updated_at: "2025-10-29T01:23:45Z" 602 + title: Enriched Meeting 603 + duration_seconds: 600 604 + generator: muesli 1.0 605 + --- 606 + 607 + # Enriched Meeting 608 + 609 + ## Participants 610 + 611 + - **Alice Smith**, Engineering Manager, Acme Corp, (alice@acme.com) 612 + - **Bob Jones**, (bob@jones.io) 613 + - **Jane Doe**, CTO, StartupCo, linkedin.com/in/janedoe, (jane@startup.co) 614 + - **Carlos Garcia**, Designer, (carlos@example.com) 615 + - **Eve Wong**, (eve@megacorp.com) 616 + 617 + --- 618 + 619 + **Alice Smith (15:04:12):** Welcome everyone. 620 + **Bob Jones (15:04:19):** Thanks for setting this up. 621 + **Jane Doe (15:04:35):** Happy to be here. 622 + **Carlos Garcia (15:05:01):** Same here. 623 + **Eve Wong (15:05:10):** Let's get started. 624 + """) 625 + 626 + 627 + def test_observations_created_on_import(tmp_path, monkeypatch): 628 + """Observations are created for participants with enrichment data.""" 629 + from think.entities.observations import load_observations 630 + from think.importers.granola import GranolaBackend 631 + 632 + monkeypatch.setenv("JOURNAL_PATH", str(tmp_path)) 633 + 634 + muesli_dir = tmp_path / "muesli" 635 + _write_transcript(muesli_dir, "2025-10-28_enriched.md", ENRICHED_TRANSCRIPT) 636 + 637 + GranolaBackend().sync(tmp_path, source_path=muesli_dir, dry_run=False) 638 + 639 + # Alice: title + company 640 + alice_obs = load_observations("import.granola", "Alice Smith") 641 + alice_contents = [o["content"] for o in alice_obs] 642 + assert "Engineering Manager at Acme Corp (via Granola, 2025-10-28)" in alice_contents 643 + 644 + # Bob: no title, no company, no linkedin — no observations 645 + bob_obs = load_observations("import.granola", "Bob Jones") 646 + assert len(bob_obs) == 0 647 + 648 + # Jane: title + company + linkedin 649 + jane_obs = load_observations("import.granola", "Jane Doe") 650 + jane_contents = [o["content"] for o in jane_obs] 651 + assert "CTO at StartupCo (via Granola, 2025-10-28)" in jane_contents 652 + assert "LinkedIn: linkedin.com/in/janedoe (via Granola, 2025-10-28)" in jane_contents 653 + 654 + # Carlos: title only (Designer, no company) 655 + carlos_obs = load_observations("import.granola", "Carlos Garcia") 656 + carlos_contents = [o["content"] for o in carlos_obs] 657 + assert "Designer (via Granola, 2025-10-28)" in carlos_contents 658 + 659 + # Eve: no title, no company, no linkedin — no observations 660 + eve_obs = load_observations("import.granola", "Eve Wong") 661 + assert len(eve_obs) == 0 662 + 663 + 664 + def test_observations_not_duplicated_on_reimport(tmp_path, monkeypatch): 665 + """Re-importing the same transcript does not duplicate observations.""" 666 + from think.entities.observations import load_observations 667 + from think.importers.granola import GranolaBackend 668 + 669 + monkeypatch.setenv("JOURNAL_PATH", str(tmp_path)) 670 + 671 + muesli_dir = tmp_path / "muesli" 672 + _write_transcript(muesli_dir, "2025-10-28_enriched.md", ENRICHED_TRANSCRIPT) 673 + 674 + backend = GranolaBackend() 675 + 676 + # First import 677 + backend.sync(tmp_path, source_path=muesli_dir, dry_run=False) 678 + alice_obs_1 = load_observations("import.granola", "Alice Smith") 679 + assert len(alice_obs_1) == 1 680 + 681 + # Second import (force to re-import) 682 + backend.sync(tmp_path, source_path=muesli_dir, dry_run=False, force=True) 683 + alice_obs_2 = load_observations("import.granola", "Alice Smith") 684 + assert len(alice_obs_2) == 1 # still just one, not duplicated 685 + 686 + jane_obs = load_observations("import.granola", "Jane Doe") 687 + assert len(jane_obs) == 2 # title+company and linkedin, still two 688 + 689 + 690 + def test_observations_source_day(tmp_path, monkeypatch): 691 + """Observation source_day matches the segment day, not today.""" 692 + from think.entities.observations import load_observations 693 + from think.importers.granola import GranolaBackend 694 + 695 + monkeypatch.setenv("JOURNAL_PATH", str(tmp_path)) 696 + 697 + muesli_dir = tmp_path / "muesli" 698 + _write_transcript(muesli_dir, "2025-10-28_enriched.md", ENRICHED_TRANSCRIPT) 699 + 700 + GranolaBackend().sync(tmp_path, source_path=muesli_dir, dry_run=False) 701 + 702 + alice_obs = load_observations("import.granola", "Alice Smith") 703 + assert len(alice_obs) == 1 704 + assert alice_obs[0]["source_day"] == "20251028" 705 + 706 + 707 + def test_seed_entities_without_observations(tmp_path, monkeypatch): 708 + """seed_entities() works unchanged when no observations are provided.""" 709 + from think.importers.shared import seed_entities 710 + 711 + monkeypatch.setenv("JOURNAL_PATH", str(tmp_path)) 712 + 713 + entities = [ 714 + {"name": "Test Person", "type": "Person", "email": "test@example.com"}, 715 + ] 716 + result = seed_entities("test.facet", "20251028", entities) 717 + assert len(result) == 1 718 + assert result[0]["name"] == "Test Person" 719 + 720 + 721 + def test_seed_entities_observation_formatting(tmp_path, monkeypatch): 722 + """seed_entities() creates observations with correct formatting for all field combos.""" 723 + from think.entities.observations import load_observations 724 + from think.importers.shared import seed_entities 725 + 726 + monkeypatch.setenv("JOURNAL_PATH", str(tmp_path)) 727 + 728 + entities = [ 729 + # title + company 730 + { 731 + "name": "Person A", 732 + "type": "Person", 733 + "observations": ["VP Engineering at BigCo (via Granola, 2025-10-28)"], 734 + }, 735 + # title only 736 + { 737 + "name": "Person B", 738 + "type": "Person", 739 + "observations": ["Designer (via Granola, 2025-10-28)"], 740 + }, 741 + # company only 742 + { 743 + "name": "Person C", 744 + "type": "Person", 745 + "observations": ["Works at MegaCorp (via Granola, 2025-10-28)"], 746 + }, 747 + # linkedin 748 + { 749 + "name": "Person D", 750 + "type": "Person", 751 + "observations": ["LinkedIn: linkedin.com/in/persond (via Granola, 2025-10-28)"], 752 + }, 753 + ] 754 + seed_entities("test.facet", "20251028", entities) 755 + 756 + a_obs = load_observations("test.facet", "Person A") 757 + assert len(a_obs) == 1 758 + assert a_obs[0]["content"] == "VP Engineering at BigCo (via Granola, 2025-10-28)" 759 + assert a_obs[0]["source_day"] == "20251028" 760 + 761 + b_obs = load_observations("test.facet", "Person B") 762 + assert len(b_obs) == 1 763 + assert b_obs[0]["content"] == "Designer (via Granola, 2025-10-28)" 764 + 765 + c_obs = load_observations("test.facet", "Person C") 766 + assert len(c_obs) == 1 767 + assert c_obs[0]["content"] == "Works at MegaCorp (via Granola, 2025-10-28)" 768 + 769 + d_obs = load_observations("test.facet", "Person D") 770 + assert len(d_obs) == 1 771 + assert d_obs[0]["content"] == "LinkedIn: linkedin.com/in/persond (via Granola, 2025-10-28)" 772 + 773 + 774 + def test_seed_entities_observation_dedup(tmp_path, monkeypatch): 775 + """seed_entities() does not duplicate observations on re-call.""" 776 + from think.entities.observations import load_observations 777 + from think.importers.shared import seed_entities 778 + 779 + monkeypatch.setenv("JOURNAL_PATH", str(tmp_path)) 780 + 781 + entities = [ 782 + { 783 + "name": "Dedup Person", 784 + "type": "Person", 785 + "observations": ["CTO at Acme (via Granola, 2025-10-28)"], 786 + }, 787 + ] 788 + 789 + # First call creates the observation 790 + seed_entities("test.facet", "20251028", entities) 791 + obs = load_observations("test.facet", "Dedup Person") 792 + assert len(obs) == 1 793 + 794 + # Second call with same observation does not duplicate 795 + seed_entities("test.facet", "20251028", entities) 796 + obs = load_observations("test.facet", "Dedup Person") 797 + assert len(obs) == 1

+32 -5

think/importers/granola.py

··· 261 261 participants = _parse_participants(body) 262 262 if participants and segments: 263 263 first_day = segments[0][0] 264 - entity_dicts = [ 265 - {"name": p["name"], "type": "Person", "email": p["email"]} 266 - for p in participants 267 - if p["name"] 268 - ] 264 + # Format day for observation attribution: YYYYMMDD -> YYYY-MM-DD 265 + obs_date = f"{first_day[:4]}-{first_day[4:6]}-{first_day[6:8]}" 266 + entity_dicts = [] 267 + for p in participants: 268 + if not p["name"]: 269 + continue 270 + d: dict[str, Any] = { 271 + "name": p["name"], 272 + "type": "Person", 273 + "email": p["email"], 274 + } 275 + observations: list[str] = [] 276 + title = p.get("title", "") 277 + company = p.get("company", "") 278 + linkedin = p.get("linkedin", "") 279 + if title and company: 280 + observations.append( 281 + f"{title} at {company} (via Granola, {obs_date})" 282 + ) 283 + elif title: 284 + observations.append(f"{title} (via Granola, {obs_date})") 285 + elif company: 286 + observations.append( 287 + f"Works at {company} (via Granola, {obs_date})" 288 + ) 289 + if linkedin: 290 + observations.append( 291 + f"LinkedIn: linkedin.com/in/{linkedin} (via Granola, {obs_date})" 292 + ) 293 + if observations: 294 + d["observations"] = observations 295 + entity_dicts.append(d) 269 296 if entity_dicts: 270 297 try: 271 298 seeded = seed_entities("import.granola", first_day, entity_dicts)

+15 -1

think/importers/shared.py

··· 659 659 """Seed entities from structured imports. 660 660 661 661 Each dict should have: name (required), type (default "Person"), 662 - email (optional), context (optional). 662 + email (optional), context (optional), observations (optional list of strings). 663 663 664 664 Matches by email first, then name. Creates new entities for non-matches. 665 + If observations are provided, adds them via add_observation() with dedup. 665 666 666 667 Args: 667 668 facet: Facet name for entity context ··· 678 679 save_journal_entity, 679 680 ) 680 681 from think.entities.matching import find_entity_by_email, find_matching_entity 682 + from think.entities.observations import add_observation, load_observations 681 683 682 684 # Load all journal entities for matching 683 685 all_entities = load_all_journal_entities() ··· 711 713 matched["emails"] = sorted(existing_emails | {email.lower()}) 712 714 save_journal_entity(matched) 713 715 resolved.append(matched) 716 + resolved_name = matched.get("name", name) 714 717 else: 715 718 # Create new entity 716 719 eid = entity_slug(name) ··· 723 726 ) 724 727 entity_list.append(new_entity) # Add to list for future matches 725 728 resolved.append(new_entity) 729 + resolved_name = new_entity.get("name", name) 730 + 731 + # Add observations if provided, with dedup 732 + observations = ent.get("observations", []) 733 + if observations: 734 + existing_obs = load_observations(facet, resolved_name) 735 + existing_contents = {o["content"] for o in existing_obs} 736 + for obs_content in observations: 737 + if obs_content not in existing_contents: 738 + add_observation(facet, resolved_name, obs_content, source_day=day) 739 + existing_contents.add(obs_content) 726 740 727 741 return resolved

Configure Feed

Configure Feed