refactor(entities): relocate seed_entities to think/entities/seeding.py

-5

scripts/check_layer_hygiene.py

··· 109 109 # 110 110 # Audit ref: vpe/workspace/solstone-layer-violations-audit.md (extro repo). 111 111 ALLOWLIST: dict[str, str] = { 112 - # TODO(V2): seed_entities() creates entities from importer shared code. 113 - # Indirect writes go through save_journal_entity(), so the direct-write 114 - # grep does not flag the file today. Keep the entry so the file is 115 - # named alongside V1 as a known audit target; remove after Bundle A. 116 - "think/importers/shared.py": "V2", 117 112 # TODO(import-resolve-facet): apps/import/call.py's `resolve-facet` 118 113 # command uses a read-verb name ("resolve_*" per L3) but writes to 119 114 # journal/facets and unlinks staged files. Not in the audit's V1-V14,

+3 -3

tests/test_importer_granola.py

··· 720 720 721 721 def test_seed_entities_without_observations(tmp_path, monkeypatch): 722 722 """seed_entities() works unchanged when no observations are provided.""" 723 - from think.importers.shared import seed_entities 723 + from think.entities.seeding import seed_entities 724 724 725 725 monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(tmp_path)) 726 726 ··· 735 735 def test_seed_entities_observation_formatting(tmp_path, monkeypatch): 736 736 """seed_entities() creates observations with correct formatting for all field combos.""" 737 737 from think.entities.observations import load_observations 738 - from think.importers.shared import seed_entities 738 + from think.entities.seeding import seed_entities 739 739 740 740 monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(tmp_path)) 741 741 ··· 793 793 def test_seed_entities_observation_dedup(tmp_path, monkeypatch): 794 794 """seed_entities() does not duplicate observations on re-call.""" 795 795 from think.entities.observations import load_observations 796 - from think.importers.shared import seed_entities 796 + from think.entities.seeding import seed_entities 797 797 798 798 monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(tmp_path)) 799 799

+106

think/entities/seeding.py

··· 1 + # SPDX-License-Identifier: AGPL-3.0-only 2 + # Copyright (c) 2026 sol pbc 3 + 4 + """Entity seeding functions. 5 + 6 + This module handles seeding entities from structured imports: 7 + - seed_entities: Match or create entities and add optional observations 8 + """ 9 + 10 + from __future__ import annotations 11 + 12 + from typing import TYPE_CHECKING 13 + 14 + if TYPE_CHECKING: 15 + from think.entities.core import EntityDict 16 + 17 + 18 + def seed_entities( 19 + facet: str, 20 + day: str, 21 + entities: list[dict], 22 + ) -> list[EntityDict]: 23 + """Seed entities from structured imports. 24 + 25 + Each dict should have: name (required), type (default "Person"), 26 + email (optional), context (optional), observations (optional list of strings). 27 + 28 + Matches by email first, then name. Creates new entities for non-matches. 29 + If observations are provided, adds them via add_observation() with dedup. 30 + 31 + Args: 32 + facet: Facet name for entity context 33 + day: Day string YYYYMMDD for activity tracking 34 + entities: List of entity dicts to seed 35 + 36 + Returns: 37 + List of resolved/created entity dicts 38 + """ 39 + from think.entities.core import entity_slug 40 + from think.entities.journal import ( 41 + create_journal_entity, 42 + load_all_journal_entities, 43 + load_journal_entity, 44 + save_journal_entity, 45 + ) 46 + from think.entities.matching import find_entity_by_email, find_matching_entity 47 + from think.entities.observations import add_observation, load_observations 48 + 49 + # Load all journal entities for matching 50 + all_entities = load_all_journal_entities() 51 + entity_list = list(all_entities.values()) 52 + 53 + resolved: list[EntityDict] = [] 54 + 55 + for ent in entities: 56 + name = ent.get("name", "").strip() 57 + if not name: 58 + continue 59 + 60 + entity_type = ent.get("type", "Person") 61 + email = ent.get("email", "") 62 + 63 + matched = None 64 + 65 + # Try email match first 66 + if email: 67 + matched = find_entity_by_email(email, entity_list) 68 + 69 + # Fall back to name match 70 + if not matched: 71 + matched = find_matching_entity(name, entity_list) 72 + 73 + if matched: 74 + # Merge email into existing entity if new 75 + if email: 76 + existing_emails = set(e.lower() for e in matched.get("emails", [])) 77 + if email.lower() not in existing_emails: 78 + matched["emails"] = sorted(existing_emails | {email.lower()}) 79 + save_journal_entity(matched) 80 + resolved.append(matched) 81 + resolved_name = matched.get("name", name) 82 + else: 83 + # Create new entity 84 + eid = entity_slug(name) 85 + emails = [email.lower()] if email else None 86 + new_entity = load_journal_entity(eid) or create_journal_entity( 87 + entity_id=eid, 88 + name=name, 89 + entity_type=entity_type, 90 + emails=emails, 91 + ) 92 + entity_list.append(new_entity) # Add to list for future matches 93 + resolved.append(new_entity) 94 + resolved_name = new_entity.get("name", name) 95 + 96 + # Add observations if provided, with dedup 97 + observations = ent.get("observations", []) 98 + if observations: 99 + existing_obs = load_observations(facet, resolved_name) 100 + existing_contents = {o["content"] for o in existing_obs} 101 + for obs_content in observations: 102 + if obs_content not in existing_contents: 103 + add_observation(facet, resolved_name, obs_content, source_day=day) 104 + existing_contents.add(obs_content) 105 + 106 + return resolved

+2 -1

think/importers/documents.py

··· 24 24 except ImportError: # pragma: no cover - optional dependency 25 25 pytesseract = None 26 26 27 + from think.entities.seeding import seed_entities 27 28 from think.importers.file_importer import ImportPreview, ImportResult 28 - from think.importers.shared import seed_entities, write_content_manifest 29 + from think.importers.shared import write_content_manifest 29 30 from think.models import generate 30 31 from think.utils import day_path 31 32

+1 -1

think/importers/granola.py

··· 20 20 21 21 import frontmatter 22 22 23 + from think.entities.seeding import seed_entities 23 24 from think.importers.shared import ( 24 25 _window_messages, 25 - seed_entities, 26 26 write_segment, 27 27 ) 28 28 from think.importers.sync import load_sync_state, save_sync_state

+1 -1

think/importers/ics.py

··· 9 9 from pathlib import Path 10 10 from typing import Any, Callable 11 11 12 + from think.entities.seeding import seed_entities 12 13 from think.importers.file_importer import ImportPreview, ImportResult 13 14 from think.importers.shared import ( 14 15 map_items_to_segments, 15 - seed_entities, 16 16 window_items, 17 17 write_content_manifest, 18 18 write_markdown_segments,

+1 -1

think/importers/kindle.py

··· 9 9 from pathlib import Path 10 10 from typing import Callable 11 11 12 + from think.entities.seeding import seed_entities 12 13 from think.importers.file_importer import ImportPreview, ImportResult 13 14 from think.importers.shared import ( 14 15 map_items_to_segments, 15 - seed_entities, 16 16 window_items, 17 17 write_content_manifest, 18 18 write_markdown_segments,

+1 -1

think/importers/obsidian.py

··· 11 11 from pathlib import Path 12 12 from typing import Any, Callable 13 13 14 + from think.entities.seeding import seed_entities 14 15 from think.importers.file_importer import ImportPreview, ImportResult 15 16 from think.importers.shared import ( 16 17 map_items_to_segments, 17 - seed_entities, 18 18 window_items, 19 19 write_content_manifest, 20 20 write_markdown_segments,

+1 -95

think/importers/shared.py

··· 10 10 import os 11 11 import shutil 12 12 from pathlib import Path 13 - from typing import TYPE_CHECKING, Any, Callable 13 + from typing import Any, Callable 14 14 15 15 from media import MIME_TYPES 16 16 from think.importers.utils import save_import_file, write_import_metadata 17 17 from think.utils import day_path, get_journal, now_ms 18 - 19 - if TYPE_CHECKING: 20 - from think.entities.core import EntityDict 21 18 22 19 logger = logging.getLogger(__name__) 23 20 ··· 642 639 result.append((window_day, seg_key)) 643 640 644 641 return result 645 - 646 - 647 - def seed_entities( 648 - facet: str, 649 - day: str, 650 - entities: list[dict], 651 - ) -> list[EntityDict]: 652 - """Seed entities from structured imports. 653 - 654 - Each dict should have: name (required), type (default "Person"), 655 - email (optional), context (optional), observations (optional list of strings). 656 - 657 - Matches by email first, then name. Creates new entities for non-matches. 658 - If observations are provided, adds them via add_observation() with dedup. 659 - 660 - Args: 661 - facet: Facet name for entity context 662 - day: Day string YYYYMMDD for activity tracking 663 - entities: List of entity dicts to seed 664 - 665 - Returns: 666 - List of resolved/created entity dicts 667 - """ 668 - from think.entities.core import entity_slug 669 - from think.entities.journal import ( 670 - create_journal_entity, 671 - load_all_journal_entities, 672 - load_journal_entity, 673 - save_journal_entity, 674 - ) 675 - from think.entities.matching import find_entity_by_email, find_matching_entity 676 - from think.entities.observations import add_observation, load_observations 677 - 678 - # Load all journal entities for matching 679 - all_entities = load_all_journal_entities() 680 - entity_list = list(all_entities.values()) 681 - 682 - resolved: list[EntityDict] = [] 683 - 684 - for ent in entities: 685 - name = ent.get("name", "").strip() 686 - if not name: 687 - continue 688 - 689 - entity_type = ent.get("type", "Person") 690 - email = ent.get("email", "") 691 - 692 - matched = None 693 - 694 - # Try email match first 695 - if email: 696 - matched = find_entity_by_email(email, entity_list) 697 - 698 - # Fall back to name match 699 - if not matched: 700 - matched = find_matching_entity(name, entity_list) 701 - 702 - if matched: 703 - # Merge email into existing entity if new 704 - if email: 705 - existing_emails = set(e.lower() for e in matched.get("emails", [])) 706 - if email.lower() not in existing_emails: 707 - matched["emails"] = sorted(existing_emails | {email.lower()}) 708 - save_journal_entity(matched) 709 - resolved.append(matched) 710 - resolved_name = matched.get("name", name) 711 - else: 712 - # Create new entity 713 - eid = entity_slug(name) 714 - emails = [email.lower()] if email else None 715 - new_entity = load_journal_entity(eid) or create_journal_entity( 716 - entity_id=eid, 717 - name=name, 718 - entity_type=entity_type, 719 - emails=emails, 720 - ) 721 - entity_list.append(new_entity) # Add to list for future matches 722 - resolved.append(new_entity) 723 - resolved_name = new_entity.get("name", name) 724 - 725 - # Add observations if provided, with dedup 726 - observations = ent.get("observations", []) 727 - if observations: 728 - existing_obs = load_observations(facet, resolved_name) 729 - existing_contents = {o["content"] for o in existing_obs} 730 - for obs_content in observations: 731 - if obs_content not in existing_contents: 732 - add_observation(facet, resolved_name, obs_content, source_day=day) 733 - existing_contents.add(obs_content) 734 - 735 - return resolved

Configure Feed

Configure Feed