observe/extract: schema-constrain frame-id selection

This is the first observe-pipeline structured-outputs migration, using the same direct-caller schema pattern as think/detect_created.py. It adds tests/test_extract_schema.py covering schema validity, accept-reject behavior, and wiring the schema through to think.models.generate.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

Jer Miller 1 month ago 03bf274d 4283ae47

+70

3 changed files

expand all

observe

extract.py

extract.schema.json

tests

test_extract_schema.py

observe/extract.py

··· 22 22 23 23 logger = logging.getLogger(__name__) 24 24 25 + _SCHEMA = json.loads( 26 + (Path(__file__).parent / "extract.schema.json").read_text(encoding="utf-8") 27 + ) 28 + 25 29 # Default maximum frames to extract content from 26 30 DEFAULT_MAX_EXTRACTIONS = 20 27 31 ··· 244 248 context="observe.extract.selection", 245 249 system_instruction=prompt_content.text, 246 250 json_output=True, 251 + json_schema=_SCHEMA, 247 252 thinking_budget=4096, 248 253 max_output_tokens=1024, 249 254 temperature=0.3,

observe/extract.schema.json

··· 1 + { 2 + "$schema": "https://json-schema.org/draft/2020-12/schema", 3 + "type": "array", 4 + "items": {"type": "integer", "minimum": 0} 5 + }

+60

tests/test_extract_schema.py

··· 1 + # SPDX-License-Identifier: AGPL-3.0-only 2 + # Copyright (c) 2026 sol pbc 3 + 4 + import importlib 5 + import json 6 + from pathlib import Path 7 + 8 + from jsonschema import Draft202012Validator 9 + 10 + import think.models as models 11 + 12 + extract_mod = importlib.import_module("observe.extract") 13 + 14 + _SCHEMA = json.loads( 15 + (Path(__file__).resolve().parents[1] / "observe" / "extract.schema.json").read_text( 16 + encoding="utf-8" 17 + ) 18 + ) 19 + 20 + 21 + def test_extract_schema_file_is_valid_draft_2020_12(): 22 + Draft202012Validator.check_schema(_SCHEMA) 23 + 24 + 25 + def test_extract_schema_accepts_and_rejects_expected_values(): 26 + validator = Draft202012Validator(_SCHEMA) 27 + 28 + assert validator.is_valid([]) 29 + assert validator.is_valid([1, 15, 42, 89]) 30 + assert validator.is_valid([1, 0]) 31 + assert not validator.is_valid(["1"]) 32 + assert not validator.is_valid([-1]) 33 + assert not validator.is_valid([1.5]) 34 + assert not validator.is_valid(42) 35 + assert not validator.is_valid({"ids": [1]}) 36 + assert not validator.is_valid([[1, 2]]) 37 + 38 + 39 + def test_ai_select_frames_passes_schema_to_generate(monkeypatch): 40 + captured = {} 41 + 42 + def fake_generate(**kwargs): 43 + captured.update(kwargs) 44 + return "[1]" 45 + 46 + monkeypatch.setattr(models, "generate", fake_generate) 47 + 48 + frames = [ 49 + {"frame_id": 1, "timestamp": 1.0, "analysis": {"primary": "code"}}, 50 + ] 51 + categories = {"code": {"description": "Code editors"}} 52 + 53 + result = extract_mod._ai_select_frames( 54 + frames, 55 + max_extractions=5, 56 + categories=categories, 57 + ) 58 + 59 + assert captured["json_schema"] is extract_mod._SCHEMA 60 + assert result == [1]

Configure Feed

Configure Feed