observe/enrich: schema-constrain + drop bare-list fallback

Lock the enrichment output to the documented wrapper shape
{statements, topics, setting, warning} by passing a Draft 2020-12
schema on the generate() call, and drop the bare-list fallback that
was silently wrapping drifted Gemini output with empty metadata.

Four changes:
- observe/enrich.schema.json (new) — wrapper schema, root and item
additionalProperties:false, no minLength (empty strings permitted
on corrected/emotion/topics/setting/warning per prompt contract).
- observe/enrich.py — module-level _SCHEMA load; pass
json_schema=_SCHEMA into generate(); delete the bare-list branch
so non-dict responses fall through the existing isinstance(dict)
guard and return None.
- tests/test_enrich_schema.py (new) — schema validity, accept/reject
matrix, and generate() wiring test, mirroring
tests/test_extract_schema.py.
- tests/test_enrich.py — rewrite test_bare_list_response_wrapped as
test_bare_list_response_returns_none.

Follows the direct-caller schema-constrain pattern established by
observe/extract.py (03bf274d) and think/detect_created.py (b58bd862).
Provider-side json_schema plumbing is already in place; no
think/models.py or provider changes required.

Jer Miller 3 weeks ago 357f5cc2 03bf274d

+181 -14

4 changed files

expand all

observe

enrich.py

enrich.schema.json

tests

test_enrich.py

test_enrich_schema.py

+5 -6

observe/enrich.py

··· 27 27 28 28 logger = logging.getLogger(__name__) 29 29 30 + _SCHEMA = json.loads( 31 + (Path(__file__).parent / "enrich.schema.json").read_text(encoding="utf-8") 32 + ) 33 + 30 34 31 35 def _statement_to_flac_bytes( 32 36 wav: np.ndarray, start: float, end: float, sample_rate: int ··· 112 116 max_output_tokens=16384, 113 117 thinking_budget=4096, 114 118 json_output=True, 119 + json_schema=_SCHEMA, 115 120 ) 116 121 117 122 result = json.loads(response_text) 118 123 logger.info(f" Enrichment complete in {time.perf_counter() - t0:.2f}s") 119 - 120 - # Normalize response — Gemini sometimes returns a bare list of 121 - # statement dicts instead of the expected wrapper object. 122 - if isinstance(result, list): 123 - logger.warning("Enrichment returned bare list, wrapping as statements") 124 - result = {"statements": result, "topics": "", "setting": "", "warning": ""} 125 124 126 125 if not isinstance(result, dict): 127 126 logger.warning(f"Enrichment returned unexpected type: {type(result)}")

+23

observe/enrich.schema.json

··· 1 + { 2 + "$schema": "https://json-schema.org/draft/2020-12/schema", 3 + "type": "object", 4 + "additionalProperties": false, 5 + "required": ["statements", "topics", "setting", "warning"], 6 + "properties": { 7 + "statements": { 8 + "type": "array", 9 + "items": { 10 + "type": "object", 11 + "additionalProperties": false, 12 + "required": ["corrected", "emotion"], 13 + "properties": { 14 + "corrected": {"type": "string"}, 15 + "emotion": {"type": "string"} 16 + } 17 + } 18 + }, 19 + "topics": {"type": "string"}, 20 + "setting": {"type": "string"}, 21 + "warning": {"type": "string"} 22 + } 23 + }

+3 -8

tests/test_enrich.py

··· 134 134 assert result is None 135 135 136 136 @patch("observe.enrich.generate") 137 - def test_bare_list_response_wrapped(self, mock_generate): 138 - """Should wrap bare list response as statements with empty metadata.""" 137 + def test_bare_list_response_returns_none(self, mock_generate): 138 + """Should return None when response is a bare list (schema rejection).""" 139 139 from observe.enrich import enrich_transcript 140 140 141 141 wav = np.zeros(16000 * 10, dtype=np.float32) ··· 150 150 151 151 result = enrich_transcript(wav, 16000, statements) 152 152 153 - assert result is not None 154 - assert result["statements"] == [ 155 - {"corrected": "Hello world.", "emotion": "calm"} 156 - ] 157 - assert result["topics"] == "" 158 - assert result["setting"] == "" 153 + assert result is None 159 154 160 155 def test_returns_none_for_empty_statements(self): 161 156 """Should return None for empty statement list."""

+150

tests/test_enrich_schema.py

··· 1 + # SPDX-License-Identifier: AGPL-3.0-only 2 + # Copyright (c) 2026 sol pbc 3 + 4 + import importlib 5 + import json 6 + from pathlib import Path 7 + 8 + from jsonschema import Draft202012Validator 9 + 10 + enrich_mod = importlib.import_module("observe.enrich") 11 + 12 + _SCHEMA = json.loads( 13 + (Path(__file__).resolve().parents[1] / "observe" / "enrich.schema.json").read_text( 14 + encoding="utf-8" 15 + ) 16 + ) 17 + 18 + 19 + def test_enrich_schema_file_is_valid_draft_2020_12(): 20 + Draft202012Validator.check_schema(_SCHEMA) 21 + 22 + 23 + def test_enrich_schema_accepts_and_rejects_expected_values(): 24 + validator = Draft202012Validator(_SCHEMA) 25 + 26 + assert validator.is_valid( 27 + { 28 + "statements": [{"corrected": "Hello world.", "emotion": "calm"}], 29 + "topics": "", 30 + "setting": "", 31 + "warning": "", 32 + } 33 + ) 34 + assert validator.is_valid( 35 + { 36 + "statements": [], 37 + "topics": "planning, testing", 38 + "setting": "work", 39 + "warning": "", 40 + } 41 + ) 42 + assert validator.is_valid( 43 + { 44 + "statements": [], 45 + "topics": "", 46 + "setting": "", 47 + "warning": "", 48 + } 49 + ) 50 + assert not validator.is_valid([{"corrected": "x", "emotion": "y"}]) 51 + assert not validator.is_valid( 52 + { 53 + "statements": [{"corrected": "Hello world.", "emotion": "calm"}], 54 + "setting": "", 55 + "warning": "", 56 + } 57 + ) 58 + assert not validator.is_valid( 59 + { 60 + "statements": [{"corrected": "Hello world.", "emotion": "calm"}], 61 + "topics": "", 62 + "warning": "", 63 + } 64 + ) 65 + assert not validator.is_valid( 66 + { 67 + "statements": [{"corrected": "Hello world.", "emotion": "calm"}], 68 + "topics": "", 69 + "setting": "", 70 + } 71 + ) 72 + assert not validator.is_valid( 73 + { 74 + "topics": "", 75 + "setting": "", 76 + "warning": "", 77 + } 78 + ) 79 + assert not validator.is_valid( 80 + { 81 + "statements": [{"corrected": "Hello world.", "emotion": "calm"}], 82 + "topics": "", 83 + "setting": "", 84 + "warning": "", 85 + "extra": "nope", 86 + } 87 + ) 88 + assert not validator.is_valid( 89 + { 90 + "statements": [{"emotion": "calm"}], 91 + "topics": "", 92 + "setting": "", 93 + "warning": "", 94 + } 95 + ) 96 + assert not validator.is_valid( 97 + { 98 + "statements": [{"corrected": "Hello world."}], 99 + "topics": "", 100 + "setting": "", 101 + "warning": "", 102 + } 103 + ) 104 + assert not validator.is_valid( 105 + { 106 + "statements": [ 107 + { 108 + "corrected": "Hello world.", 109 + "emotion": "calm", 110 + "extra": "nope", 111 + } 112 + ], 113 + "topics": "", 114 + "setting": "", 115 + "warning": "", 116 + } 117 + ) 118 + 119 + 120 + def test_enrich_transcript_passes_schema_to_generate(monkeypatch): 121 + captured = {} 122 + 123 + def fake_generate(**kwargs): 124 + captured.update(kwargs) 125 + return json.dumps( 126 + { 127 + "statements": [{"corrected": "Hello world.", "emotion": "neutral"}], 128 + "topics": "", 129 + "setting": "", 130 + "warning": "", 131 + } 132 + ) 133 + 134 + monkeypatch.setattr(enrich_mod, "generate", fake_generate) 135 + 136 + import numpy as np 137 + 138 + result = enrich_mod.enrich_transcript( 139 + np.zeros(16000, dtype=np.float32), 140 + 16000, 141 + [{"id": 1, "start": 0.0, "end": 1.0, "text": "Hello world."}], 142 + ) 143 + 144 + assert captured["json_schema"] is enrich_mod._SCHEMA 145 + assert result == { 146 + "statements": [{"corrected": "Hello world.", "emotion": "neutral"}], 147 + "topics": "", 148 + "setting": "", 149 + "warning": "", 150 + }

Configure Feed

Configure Feed