personal memory agent
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

talents/entity_observer: schema-constrain + list-shape observations

Migrate entity_observer to an external Draft 2020-12 schema and flip the
observations payload from a dynamic-key dict ({slug: [...]}) to a typed
list of groups ([{entity_id, items: [...]}]). Typed lists validate under
OpenAI strict mode where patternProperties does not (founder decision #4,
2026-04-19 audit).

Clean break: no dict-of-lists fallback in the post-hook; schema + prompt
+ hook flip together. Stats baseline updated to reflect the new schema
field surfacing for this talent (matches the speaker_attribution pattern).
Hook tests rewritten to the new shape; a new schema test file covers
validator + positive/negative payloads.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

+340 -67
+11 -7
apps/entities/talent/entity_observer.md
··· 10 10 "multi_facet": true, 11 11 "group": "Entities", 12 12 "output": "json", 13 + "schema": "entity_observer.schema.json", 13 14 "thinking_budget": 2048, 14 15 "hook": {"pre": "entities:entity_observer", "post": "entities:entity_observer"}, 15 16 "load": {"transcripts": false, "percepts": false, "talents": false} ··· 86 87 87 88 ```json 88 89 { 89 - "observations": { 90 - "entity_slug": [ 91 - {"content": "The durable observation text", "reasoning": "Why this qualifies (1 sentence)"} 92 - ] 93 - }, 90 + "observations": [ 91 + { 92 + "entity_id": "alice_johnson", 93 + "items": [ 94 + {"content": "The durable observation text", "reasoning": "Why this qualifies (1 sentence)"} 95 + ] 96 + } 97 + ], 94 98 "skipped": ["entity_ids_examined_but_no_new_observations"], 95 99 "summary": "Observed X entities, Y new observations total." 96 100 } 97 101 ``` 98 102 99 103 Rules: 100 - - Use the entity_id (slug) from the context as the key 104 + - Use the entity_id (slug) from the context as the `entity_id` field 101 105 - One fact per observation — no compound sentences 102 106 - Check for semantic duplicates against the existing observations shown in context 103 107 - If existing observations are already rich, zero new observations is valid and correct 104 108 - The `reasoning` field is for audit only 105 109 - Include ALL examined entities in either `observations` or `skipped` 106 - - Empty observations dict is valid when nothing new is found 110 + - Empty observations list is valid when nothing new is found
+13 -4
apps/entities/talent/entity_observer.py
··· 48 48 return None 49 49 50 50 observations = data.get("observations") 51 - if not isinstance(observations, dict) or not observations: 51 + if not isinstance(observations, list): 52 + logger.warning("entity_observer: observations is not a list") 53 + return None 54 + if not observations: 52 55 return None 53 56 54 57 valid_entity_ids = { 55 58 entity.get("id") for entity in load_entities(facet) if entity.get("id") 56 59 } 57 60 58 - for entity_id, items in observations.items(): 61 + for entry in observations: 62 + if not isinstance(entry, dict): 63 + logger.debug("Skipping non-dict observation entry: %r", entry) 64 + continue 65 + entity_id = entry.get("entity_id") 66 + items = entry.get("items") 67 + if not isinstance(entity_id, str) or not isinstance(items, list): 68 + logger.debug("Skipping malformed observation entry: %r", entry) 69 + continue 59 70 if entity_id not in valid_entity_ids: 60 71 logger.debug("Skipping unrecognized entity_id: %s", entity_id) 61 - continue 62 - if not isinstance(items, list): 63 72 continue 64 73 65 74 existing = {
+51
apps/entities/talent/entity_observer.schema.json
··· 1 + { 2 + "$schema": "https://json-schema.org/draft/2020-12/schema", 3 + "type": "object", 4 + "additionalProperties": false, 5 + "required": ["observations", "skipped", "summary"], 6 + "properties": { 7 + "observations": { 8 + "type": "array", 9 + "items": { 10 + "type": "object", 11 + "additionalProperties": false, 12 + "required": ["entity_id", "items"], 13 + "properties": { 14 + "entity_id": { 15 + "type": "string", 16 + "minLength": 1 17 + }, 18 + "items": { 19 + "type": "array", 20 + "items": { 21 + "type": "object", 22 + "additionalProperties": false, 23 + "required": ["content", "reasoning"], 24 + "properties": { 25 + "content": { 26 + "type": "string", 27 + "minLength": 1 28 + }, 29 + "reasoning": { 30 + "type": "string", 31 + "minLength": 1 32 + } 33 + } 34 + } 35 + } 36 + } 37 + } 38 + }, 39 + "skipped": { 40 + "type": "array", 41 + "items": { 42 + "type": "string", 43 + "minLength": 1 44 + } 45 + }, 46 + "summary": { 47 + "type": "string", 48 + "minLength": 1 49 + } 50 + } 51 + }
+1
tests/baselines/api/stats/stats.json
··· 115 115 "path": "<PROJECT>/apps/entities/talent/entity_observer.md", 116 116 "priority": 57, 117 117 "schedule": "daily", 118 + "schema": "entity_observer.schema.json", 118 119 "source": "app", 119 120 "thinking_budget": 2048, 120 121 "tier": 2,
+103 -56
tests/test_entity_observer_context.py
··· 4 4 from __future__ import annotations 5 5 6 6 import json 7 + import logging 7 8 import os 8 9 from pathlib import Path 9 10 ··· 186 187 result = post_process( 187 188 json.dumps( 188 189 { 189 - "observations": { 190 - "alice_johnson": [ 191 - { 192 - "content": "Prefers morning meetings", 193 - "reasoning": "Durable preference", 194 - } 195 - ] 196 - }, 190 + "observations": [ 191 + { 192 + "entity_id": "alice_johnson", 193 + "items": [ 194 + { 195 + "content": "Prefers morning meetings", 196 + "reasoning": "Durable preference", 197 + } 198 + ], 199 + } 200 + ], 197 201 "skipped": [], 198 202 "summary": "1 entity, 1 observation", 199 203 } ··· 206 210 assert [obs["content"] for obs in observations] == ["Prefers morning meetings"] 207 211 208 212 209 - def test_post_process_filters_unrecognized_entity(tmp_path): 213 + def test_post_process_filters_unrecognized_entity(tmp_path, caplog): 210 214 _set_journal(str(tmp_path)) 211 215 facet = "work" 212 216 _attach_entity(tmp_path, facet, "alice_johnson", "Alice Johnson") 213 217 214 - result = post_process( 215 - json.dumps( 216 - { 217 - "observations": { 218 - "alice_johnson": [{"content": "Prefers morning meetings"}], 219 - "unknown_entity": [{"content": "Should be ignored"}], 220 - }, 221 - "skipped": [], 222 - "summary": "2 entities, 1 persisted observation", 223 - } 224 - ), 225 - {"facet": facet, "day": "20260304"}, 226 - ) 218 + with caplog.at_level(logging.DEBUG): 219 + result = post_process( 220 + json.dumps( 221 + { 222 + "observations": [ 223 + { 224 + "entity_id": "unknown_entity", 225 + "items": [ 226 + { 227 + "content": "Should be ignored", 228 + "reasoning": "Unknown entity", 229 + } 230 + ], 231 + } 232 + ], 233 + "skipped": ["alice_johnson"], 234 + "summary": "1 entity skipped", 235 + } 236 + ), 237 + {"facet": facet, "day": "20260304"}, 238 + ) 227 239 228 240 assert result is None 229 - observations = load_observations(facet, "alice_johnson") 230 - assert [obs["content"] for obs in observations] == ["Prefers morning meetings"] 241 + assert load_observations(facet, "alice_johnson") == [] 231 242 assert load_observations(facet, "unknown_entity") == [] 243 + assert "Skipping unrecognized entity_id: unknown_entity" in caplog.text 232 244 233 245 234 246 def test_post_process_skips_empty_content(tmp_path): ··· 239 251 post_process( 240 252 json.dumps( 241 253 { 242 - "observations": { 243 - "alice_johnson": [ 244 - {"content": "", "reasoning": "empty"}, 245 - {"content": " ", "reasoning": "whitespace"}, 246 - {"content": "Valid observation", "reasoning": "ok"}, 247 - ] 248 - }, 254 + "observations": [ 255 + { 256 + "entity_id": "alice_johnson", 257 + "items": [{"content": "", "reasoning": "empty"}], 258 + } 259 + ], 249 260 "skipped": [], 250 - "summary": "", 261 + "summary": "No valid observations", 251 262 } 252 263 ), 253 264 {"facet": facet, "day": "20260304"}, 254 265 ) 255 266 256 - observations = load_observations(facet, "alice_johnson") 257 - assert len(observations) == 1 258 - assert observations[0]["content"] == "Valid observation" 267 + assert load_observations(facet, "alice_johnson") == [] 259 268 260 269 261 - def test_post_process_skips_non_list_items(tmp_path): 270 + def test_post_process_skips_non_list_group_items(tmp_path, caplog): 262 271 _set_journal(str(tmp_path)) 263 272 facet = "work" 264 273 _attach_entity(tmp_path, facet, "alice_johnson", "Alice Johnson") 265 274 266 - post_process( 267 - json.dumps( 268 - { 269 - "observations": {"alice_johnson": "not a list"}, 270 - "skipped": [], 271 - "summary": "", 272 - } 273 - ), 274 - {"facet": facet, "day": "20260304"}, 275 - ) 275 + with caplog.at_level(logging.DEBUG): 276 + post_process( 277 + json.dumps( 278 + { 279 + "observations": [ 280 + { 281 + "entity_id": "alice_johnson", 282 + "items": "not a list", 283 + } 284 + ], 285 + "skipped": [], 286 + "summary": "Malformed items", 287 + } 288 + ), 289 + {"facet": facet, "day": "20260304"}, 290 + ) 291 + 292 + assert load_observations(facet, "alice_johnson") == [] 293 + assert "Skipping malformed observation entry" in caplog.text 294 + 295 + 296 + def test_post_process_skips_group_missing_entity_id(tmp_path, caplog): 297 + _set_journal(str(tmp_path)) 298 + facet = "work" 299 + _attach_entity(tmp_path, facet, "alice_johnson", "Alice Johnson") 300 + 301 + with caplog.at_level(logging.DEBUG): 302 + post_process( 303 + json.dumps( 304 + { 305 + "observations": [ 306 + { 307 + "items": [{"content": "x", "reasoning": "y"}], 308 + } 309 + ], 310 + "skipped": [], 311 + "summary": "Missing entity id", 312 + } 313 + ), 314 + {"facet": facet, "day": "20260304"}, 315 + ) 276 316 277 317 assert load_observations(facet, "alice_johnson") == [] 318 + assert "Skipping malformed observation entry" in caplog.text 278 319 279 320 280 321 def test_post_process_deduplicates_existing(tmp_path): ··· 289 330 post_process( 290 331 json.dumps( 291 332 { 292 - "observations": { 293 - "alice_johnson": [ 294 - {"content": "Prefers morning meetings", "reasoning": "dupe"}, 295 - { 296 - "content": "Expert in distributed systems", 297 - "reasoning": "new", 298 - }, 299 - ] 300 - }, 333 + "observations": [ 334 + { 335 + "entity_id": "alice_johnson", 336 + "items": [ 337 + { 338 + "content": "Prefers morning meetings", 339 + "reasoning": "dupe", 340 + }, 341 + { 342 + "content": "Expert in distributed systems", 343 + "reasoning": "new", 344 + }, 345 + ], 346 + } 347 + ], 301 348 "skipped": [], 302 - "summary": "", 349 + "summary": "One duplicate, one new observation", 303 350 } 304 351 ), 305 352 {"facet": facet, "day": "20260304"},
+161
tests/test_entity_observer_schema.py
··· 1 + # SPDX-License-Identifier: AGPL-3.0-only 2 + # Copyright (c) 2026 sol pbc 3 + 4 + import json 5 + from pathlib import Path 6 + 7 + from jsonschema import Draft202012Validator 8 + 9 + from think.talent import get_talent 10 + 11 + SCHEMA_PATH = ( 12 + Path(__file__).parent.parent 13 + / "apps" 14 + / "entities" 15 + / "talent" 16 + / "entity_observer.schema.json" 17 + ) 18 + 19 + 20 + def _load_schema() -> dict: 21 + return json.loads(SCHEMA_PATH.read_text(encoding="utf-8")) 22 + 23 + 24 + def test_schema_is_valid_draft_2020_12(): 25 + Draft202012Validator.check_schema(_load_schema()) 26 + 27 + 28 + def test_talent_exposes_json_schema(): 29 + assert get_talent("entities:entity_observer")["json_schema"] == _load_schema() 30 + 31 + 32 + def test_valid_payload_empty_observations(): 33 + validator = Draft202012Validator(_load_schema()) 34 + 35 + assert validator.is_valid( 36 + {"observations": [], "skipped": [], "summary": "nothing today"} 37 + ) 38 + 39 + 40 + def test_valid_payload_single_group_single_item(): 41 + validator = Draft202012Validator(_load_schema()) 42 + 43 + assert validator.is_valid( 44 + { 45 + "observations": [ 46 + { 47 + "entity_id": "alice_johnson", 48 + "items": [ 49 + { 50 + "content": "Prefers async communication", 51 + "reasoning": "Durable working style preference.", 52 + } 53 + ], 54 + } 55 + ], 56 + "skipped": [], 57 + "summary": "one observation", 58 + } 59 + ) 60 + 61 + 62 + def test_valid_payload_multi_group(): 63 + validator = Draft202012Validator(_load_schema()) 64 + 65 + assert validator.is_valid( 66 + { 67 + "observations": [ 68 + { 69 + "entity_id": "alice_johnson", 70 + "items": [ 71 + { 72 + "content": "Prefers async communication", 73 + "reasoning": "Durable working style preference.", 74 + }, 75 + { 76 + "content": "Works Pacific time hours", 77 + "reasoning": "Stable schedule pattern.", 78 + }, 79 + ], 80 + }, 81 + { 82 + "entity_id": "verona_platform", 83 + "items": [ 84 + { 85 + "content": "Uses event sourcing in core workflows", 86 + "reasoning": "Architectural constraint.", 87 + } 88 + ], 89 + }, 90 + ], 91 + "skipped": ["bob_smith"], 92 + "summary": "three observations across two entities", 93 + } 94 + ) 95 + 96 + 97 + def test_invalid_missing_top_required(): 98 + validator = Draft202012Validator(_load_schema()) 99 + 100 + assert not validator.is_valid({"observations": [], "skipped": []}) 101 + 102 + 103 + def test_invalid_missing_group_entity_id(): 104 + validator = Draft202012Validator(_load_schema()) 105 + 106 + assert not validator.is_valid( 107 + { 108 + "observations": [ 109 + { 110 + "items": [ 111 + { 112 + "content": "Prefers async communication", 113 + "reasoning": "Durable working style preference.", 114 + } 115 + ] 116 + } 117 + ], 118 + "skipped": [], 119 + "summary": "missing entity id", 120 + } 121 + ) 122 + 123 + 124 + def test_invalid_empty_content(): 125 + validator = Draft202012Validator(_load_schema()) 126 + 127 + assert not validator.is_valid( 128 + { 129 + "observations": [ 130 + { 131 + "entity_id": "alice_johnson", 132 + "items": [{"content": "", "reasoning": "Durable preference."}], 133 + } 134 + ], 135 + "skipped": [], 136 + "summary": "invalid content", 137 + } 138 + ) 139 + 140 + 141 + def test_invalid_extra_property_on_group(): 142 + validator = Draft202012Validator(_load_schema()) 143 + 144 + assert not validator.is_valid( 145 + { 146 + "observations": [ 147 + { 148 + "entity_id": "alice_johnson", 149 + "items": [ 150 + { 151 + "content": "Prefers async communication", 152 + "reasoning": "Durable working style preference.", 153 + } 154 + ], 155 + "extra": True, 156 + } 157 + ], 158 + "skipped": [], 159 + "summary": "extra property", 160 + } 161 + )