fix(sense): tighten contamination guard + clamp attendee roles outside meetings

+37

talent/participation.py

··· 7 7 import logging 8 8 9 9 from think.activities import update_record_fields 10 + from think.cluster import _find_segment_dir 10 11 from think.entities.loading import load_entities 11 12 from think.entities.matching import find_matching_entity 12 13 13 14 logger = logging.getLogger(__name__) 15 + 16 + 17 + def _segment_meeting_detected(day: str, segment_key: str) -> bool: 18 + """Return True iff a segment's sense.json reports meeting_detected=True.""" 19 + seg_dir = _find_segment_dir(day, segment_key, stream=None) 20 + if seg_dir is None: 21 + return False 22 + 23 + sense_path = seg_dir / "talents" / "sense.json" 24 + try: 25 + data = json.loads(sense_path.read_text()) 26 + except (FileNotFoundError, OSError, ValueError): 27 + return False 28 + 29 + return bool(data.get("meeting_detected")) 30 + 31 + 32 + def _any_activity_segment_meeting_detected(day: str, segments: list[str]) -> bool: 33 + """Return True when any contributing segment is marked as a meeting.""" 34 + return any(_segment_meeting_detected(day, segment) for segment in segments) 14 35 15 36 16 37 def post_process(result: str, context: dict) -> str | None: ··· 58 79 match = find_matching_entity(resolved_entry.get("name", ""), entities_list) 59 80 resolved_entry["entity_id"] = match.get("id") if match else None 60 81 resolved_entries.append(resolved_entry) 82 + 83 + segments = activity.get("segments") or [] 84 + if segments and not _any_activity_segment_meeting_detected(day, segments): 85 + clamped_count = 0 86 + for entry in resolved_entries: 87 + if entry.get("role") == "attendee": 88 + entry["role"] = "mentioned" 89 + clamped_count += 1 90 + if clamped_count: 91 + logger.warning( 92 + "participation hook: clamped %d attendee entries to mentioned on activity %s (facet=%s day=%s); no contributing sense segment had meeting_detected=true", 93 + clamped_count, 94 + record_id, 95 + facet, 96 + day, 97 + ) 61 98 62 99 payload = {"participation": resolved_entries} 63 100 participation_confidence = data.get("participation_confidence")

+1 -1

talent/sense.md

··· 86 86 - **attendee**: The entity was directly participating in the live interaction during this segment. Use only for people who were actively present in the meeting or call. 87 87 - **mentioned**: The entity was referenced, quoted, shown on screen, or otherwise relevant, but was not directly participating. 88 88 89 - Contamination guard: tool or product names visible on screen must be `source: screen` and `role: mentioned`, never `attendee`. Video-conference app names such as Google Meet or Zoom are platform/tool entities, not attendees. People quoted or referenced in transcripts are `role: mentioned` unless they were actively speaking as participants in the live meeting. 89 + Contamination guard: tool or product names visible on screen must be `source: screen` and `role: mentioned`, never `attendee`. Video-conference app names such as Google Meet or Zoom are platform/tool entities, not attendees. `role: attendee` requires `meeting_detected: true` for this same segment; when `meeting_detected: false`, every Person must be `role: mentioned` even if they spoke, were quoted, or were referenced in the transcript. 90 90 91 91 #### source 92 92 - **voice**: Use when the entity is identified from spoken audio content.

+208 -23

tests/test_sense_contamination_guard.py

··· 1 1 # SPDX-License-Identifier: AGPL-3.0-only 2 2 # Copyright (c) 2026 sol pbc 3 3 4 - from pathlib import Path 4 + import json 5 + import logging 6 + 7 + 8 + def _write_detected_entities(tmp_path, facet: str, day: str, rows: list[dict]) -> None: 9 + entities_path = tmp_path / "facets" / facet / "entities" / f"{day}.jsonl" 10 + entities_path.parent.mkdir(parents=True, exist_ok=True) 11 + entities_path.write_text( 12 + "".join(json.dumps(row, ensure_ascii=False) + "\n" for row in rows), 13 + encoding="utf-8", 14 + ) 15 + 16 + 17 + def _write_sense_json( 18 + tmp_path, 19 + day: str, 20 + stream: str, 21 + segment_key: str, 22 + payload: dict | None, 23 + ) -> None: 24 + talents_dir = tmp_path / "chronicle" / day / stream / segment_key / "talents" 25 + talents_dir.mkdir(parents=True, exist_ok=True) 26 + if payload is not None: 27 + (talents_dir / "sense.json").write_text(json.dumps(payload), encoding="utf-8") 28 + 29 + 30 + def _activity_record(segments: list[str]) -> dict: 31 + return { 32 + "id": "meeting_090000_300", 33 + "activity": "meeting", 34 + "segments": segments, 35 + "level_avg": 1.0, 36 + "description": "Team sync", 37 + "active_entities": ["Guest Speaker"], 38 + "created_at": 1, 39 + } 40 + 41 + 42 + def _participation_result(role: str) -> str: 43 + return json.dumps( 44 + { 45 + "participation": [ 46 + { 47 + "name": "Guest Speaker", 48 + "role": role, 49 + "source": "voice", 50 + "confidence": 0.98, 51 + "context": "Spoke during the session", 52 + "entity_id": None, 53 + } 54 + ] 55 + } 56 + ) 57 + 58 + 59 + def test_participation_clamps_attendees_when_all_segments_are_non_meetings( 60 + tmp_path, monkeypatch, caplog 61 + ): 62 + from talent.participation import post_process 63 + from think.activities import append_activity_record, load_activity_records 64 + 65 + facet = "work" 66 + day = "20260418" 67 + stream = "default" 68 + segments = ["090000_300", "090500_300"] 69 + monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(tmp_path)) 70 + 71 + _write_detected_entities( 72 + tmp_path, 73 + facet, 74 + day, 75 + [{"id": "guest_speaker", "type": "Person", "name": "Guest Speaker"}], 76 + ) 77 + for segment_key in segments: 78 + _write_sense_json( 79 + tmp_path, day, stream, segment_key, {"meeting_detected": False} 80 + ) 81 + 82 + activity = _activity_record(segments) 83 + append_activity_record(facet, day, activity) 84 + 85 + with caplog.at_level(logging.WARNING, logger="talent.participation"): 86 + post_process( 87 + _participation_result("attendee"), 88 + {"activity": activity, "facet": facet, "day": day}, 89 + ) 90 + 91 + record = load_activity_records(facet, day)[0] 92 + assert record["participation"][0]["role"] == "mentioned" 93 + warnings = [r for r in caplog.records if r.levelno == logging.WARNING] 94 + assert len(warnings) == 1 95 + assert ( 96 + warnings[0].getMessage() 97 + == "participation hook: clamped 1 attendee entries to mentioned on activity meeting_090000_300 (facet=work day=20260418); no contributing sense segment had meeting_detected=true" 98 + ) 99 + 100 + 101 + def test_participation_preserves_attendees_when_any_segment_is_meeting( 102 + tmp_path, monkeypatch, caplog 103 + ): 104 + from talent.participation import post_process 105 + from think.activities import append_activity_record, load_activity_records 106 + 107 + facet = "work" 108 + day = "20260418" 109 + stream = "default" 110 + segments = ["090000_300", "090500_300"] 111 + monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(tmp_path)) 112 + 113 + _write_detected_entities( 114 + tmp_path, 115 + facet, 116 + day, 117 + [{"id": "guest_speaker", "type": "Person", "name": "Guest Speaker"}], 118 + ) 119 + _write_sense_json(tmp_path, day, stream, segments[0], {"meeting_detected": False}) 120 + _write_sense_json(tmp_path, day, stream, segments[1], {"meeting_detected": True}) 121 + 122 + activity = _activity_record(segments) 123 + append_activity_record(facet, day, activity) 124 + 125 + with caplog.at_level(logging.WARNING, logger="talent.participation"): 126 + post_process( 127 + _participation_result("attendee"), 128 + {"activity": activity, "facet": facet, "day": day}, 129 + ) 130 + 131 + record = load_activity_records(facet, day)[0] 132 + assert record["participation"][0]["role"] == "attendee" 133 + warnings = [r for r in caplog.records if r.levelno == logging.WARNING] 134 + assert warnings == [] 135 + 5 136 6 - import frontmatter 137 + def test_participation_clamp_is_idempotent_on_second_pass( 138 + tmp_path, monkeypatch, caplog 139 + ): 140 + from talent.participation import post_process 141 + from think.activities import append_activity_record, load_activity_records 142 + 143 + facet = "work" 144 + day = "20260418" 145 + stream = "default" 146 + segments = ["090000_300", "090500_300"] 147 + monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(tmp_path)) 7 148 8 - SENSE_PATH = Path(__file__).resolve().parents[1] / "talent" / "sense.md" 149 + _write_detected_entities( 150 + tmp_path, 151 + facet, 152 + day, 153 + [{"id": "guest_speaker", "type": "Person", "name": "Guest Speaker"}], 154 + ) 155 + for segment_key in segments: 156 + _write_sense_json( 157 + tmp_path, day, stream, segment_key, {"meeting_detected": False} 158 + ) 159 + 160 + activity = _activity_record(segments) 161 + append_activity_record(facet, day, activity) 162 + 163 + with caplog.at_level(logging.WARNING, logger="talent.participation"): 164 + post_process( 165 + _participation_result("attendee"), 166 + {"activity": activity, "facet": facet, "day": day}, 167 + ) 168 + first_warning_count = len( 169 + [r for r in caplog.records if r.levelno == logging.WARNING] 170 + ) 171 + assert first_warning_count == 1 172 + 173 + record = load_activity_records(facet, day)[0] 174 + second_result = json.dumps({"participation": record["participation"]}) 175 + 176 + with caplog.at_level(logging.WARNING, logger="talent.participation"): 177 + post_process( 178 + second_result, 179 + {"activity": record, "facet": facet, "day": day}, 180 + ) 9 181 182 + updated = load_activity_records(facet, day)[0] 183 + assert updated["participation"] == record["participation"] 184 + warnings = [r for r in caplog.records if r.levelno == logging.WARNING] 185 + assert len(warnings) == 1 10 186 11 - def _role_section() -> str: 12 - content = frontmatter.load(SENSE_PATH).content 13 - start = content.index("#### role") 14 - end = content.index("#### source", start) 15 - return content[start:end] 16 187 188 + def test_participation_treats_missing_sense_json_as_non_meeting( 189 + tmp_path, monkeypatch, caplog 190 + ): 191 + from talent.participation import post_process 192 + from think.activities import append_activity_record, load_activity_records 17 193 18 - def test_sense_role_section_contains_contamination_guard(): 19 - role_section = _role_section() 194 + facet = "work" 195 + day = "20260418" 196 + stream = "default" 197 + segments = ["090000_300", "090500_300"] 198 + monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(tmp_path)) 20 199 21 - assert "tool or product names visible on screen" in role_section 22 - assert "`source: screen`" in role_section 23 - assert "`role: mentioned`" in role_section 24 - assert "Google Meet" in role_section 25 - assert "Zoom" in role_section 26 - assert "quoted or referenced in transcripts" in role_section 27 - assert "actively speaking as participants" in role_section 200 + _write_detected_entities( 201 + tmp_path, 202 + facet, 203 + day, 204 + [{"id": "guest_speaker", "type": "Person", "name": "Guest Speaker"}], 205 + ) 206 + _write_sense_json(tmp_path, day, stream, segments[0], None) 207 + _write_sense_json(tmp_path, day, stream, segments[1], {"meeting_detected": False}) 28 208 209 + activity = _activity_record(segments) 210 + append_activity_record(facet, day, activity) 29 211 30 - def test_sense_role_section_has_screen_and_mentioned_guidance_for_tools_and_apps(): 31 - role_section = _role_section() 212 + with caplog.at_level(logging.WARNING, logger="talent.participation"): 213 + post_process( 214 + _participation_result("attendee"), 215 + {"activity": activity, "facet": facet, "day": day}, 216 + ) 32 217 33 - assert "screen" in role_section 34 - assert "mentioned" in role_section 35 - assert "tool" in role_section 36 - assert "Video-conference app names" in role_section 218 + record = load_activity_records(facet, day)[0] 219 + assert record["participation"][0]["role"] == "mentioned" 220 + warnings = [r for r in caplog.records if r.levelno == logging.WARNING] 221 + assert len(warnings) == 1

Configure Feed

Configure Feed