observe/describe: schema-constrain frame categorization; thread json_schema through think/batch

+5

observe/describe.py

··· 175 175 # Build categorization prompt from template 176 176 CATEGORIZATION_PROMPT = _build_categorization_prompt() 177 177 178 + _SCHEMA = json.loads( 179 + (Path(__file__).parent / "describe.schema.json").read_text(encoding="utf-8") 180 + ) 181 + 178 182 179 183 class VideoProcessor: 180 184 """Process per-monitor screencast videos and detect significant frame changes.""" ··· 477 481 model=frame_model, 478 482 system_instruction=system_instruction, 479 483 json_output=True, 484 + json_schema=_SCHEMA, 480 485 temperature=0.7, 481 486 max_output_tokens=1024, 482 487 thinking_budget=1024,

+13

observe/describe.schema.json

··· 1 + { 2 + "$schema": "https://json-schema.org/draft/2020-12/schema", 3 + "$comment": "Frame categorization schema for observe/describe.py. The enums in `primary` and `secondary` MUST match the filenames under observe/categories/*.md. Enforced by tests/test_observe_describe_schema.py::test_category_enum_matches_registry. If you add/remove/rename a category file, update both enums here.", 4 + "type": "object", 5 + "additionalProperties": false, 6 + "required": ["visual_description", "primary", "secondary", "overlap"], 7 + "properties": { 8 + "visual_description": {"type": "string", "minLength": 1}, 9 + "primary": {"type": "string", "enum": ["browsing", "code", "gaming", "media", "meeting", "messaging", "productivity", "reading", "terminal"]}, 10 + "secondary": {"type": "string", "enum": ["browsing", "code", "gaming", "media", "meeting", "messaging", "productivity", "reading", "terminal", "none"]}, 11 + "overlap": {"type": "boolean"} 12 + } 13 + }

+24

tests/test_batch.py

··· 465 465 # Verify client was passed through 466 466 call_kwargs = mock_agenerate.call_args[1] 467 467 assert call_kwargs["client"] is mock_client 468 + 469 + 470 + @pytest.mark.asyncio 471 + @patch("think.batch.agenerate", new_callable=AsyncMock) 472 + async def test_batch_passes_json_schema_to_agenerate(mock_agenerate): 473 + """Test that json_schema is passed through to agenerate.""" 474 + mock_agenerate.return_value = "Response" 475 + 476 + batch = Batch(max_concurrent=5) 477 + req = batch.create( 478 + contents="Test prompt", 479 + context="test.context", 480 + json_schema={"type": "object"}, 481 + ) 482 + batch.add(req) 483 + 484 + results = [] 485 + async for completed_req in batch.drain_batch(): 486 + results.append(completed_req) 487 + 488 + assert len(results) == 1 489 + 490 + call_kwargs = mock_agenerate.call_args[1] 491 + assert call_kwargs["json_schema"] == {"type": "object"}

+127

tests/test_observe_describe_schema.py

··· 1 + # SPDX-License-Identifier: AGPL-3.0-only 2 + # Copyright (c) 2026 sol pbc 3 + 4 + from pathlib import Path 5 + from unittest.mock import AsyncMock, patch 6 + 7 + import pytest 8 + from jsonschema import Draft202012Validator 9 + 10 + from observe import describe as describe_mod 11 + from think.batch import Batch 12 + 13 + _SCHEMA = describe_mod._SCHEMA 14 + 15 + 16 + def test_describe_schema_file_is_valid_draft_2020_12(): 17 + Draft202012Validator.check_schema(_SCHEMA) 18 + 19 + 20 + def test_describe_schema_accepts_and_rejects_expected_values(): 21 + validator = Draft202012Validator(_SCHEMA) 22 + 23 + assert validator.is_valid( 24 + { 25 + "visual_description": "A browser window with multiple open tabs.", 26 + "primary": "browsing", 27 + "secondary": "reading", 28 + "overlap": True, 29 + } 30 + ) 31 + assert validator.is_valid( 32 + { 33 + "visual_description": "A code editor with a terminal pane.", 34 + "primary": "code", 35 + "secondary": "none", 36 + "overlap": False, 37 + } 38 + ) 39 + assert not validator.is_valid( 40 + { 41 + "visual_description": "A dashboard view.", 42 + "primary": "unknown", 43 + "secondary": "none", 44 + "overlap": False, 45 + } 46 + ) 47 + assert not validator.is_valid( 48 + { 49 + "visual_description": "A dashboard view.", 50 + "primary": "productivity", 51 + "secondary": "unknown", 52 + "overlap": False, 53 + } 54 + ) 55 + assert not validator.is_valid( 56 + { 57 + "visual_description": "A dashboard view.", 58 + "secondary": "none", 59 + "overlap": False, 60 + } 61 + ) 62 + assert not validator.is_valid( 63 + { 64 + "visual_description": "A dashboard view.", 65 + "primary": "productivity", 66 + "secondary": "none", 67 + } 68 + ) 69 + assert not validator.is_valid( 70 + { 71 + "visual_description": "A dashboard view.", 72 + "primary": "productivity", 73 + "secondary": "none", 74 + "overlap": False, 75 + "confidence": 0.9, 76 + } 77 + ) 78 + assert not validator.is_valid( 79 + { 80 + "visual_description": "A dashboard view.", 81 + "primary": "productivity", 82 + "secondary": "none", 83 + "overlap": "yes", 84 + } 85 + ) 86 + assert not validator.is_valid( 87 + { 88 + "visual_description": "", 89 + "primary": "productivity", 90 + "secondary": "none", 91 + "overlap": False, 92 + } 93 + ) 94 + 95 + 96 + @pytest.mark.asyncio 97 + @patch("think.batch.agenerate", new_callable=AsyncMock) 98 + async def test_describe_batch_call_passes_schema(mock_agenerate): 99 + mock_agenerate.return_value = ( 100 + '{"visual_description":"A code editor is visible.","primary":"code",' 101 + '"secondary":"none","overlap":false}' 102 + ) 103 + 104 + batch = Batch(max_concurrent=1) 105 + req = batch.create( 106 + contents="Analyze this screenshot frame from a screencast recording.", 107 + context="observe.describe.frame", 108 + json_output=True, 109 + json_schema=_SCHEMA, 110 + ) 111 + batch.add(req) 112 + 113 + results = [] 114 + async for completed_req in batch.drain_batch(): 115 + results.append(completed_req) 116 + 117 + assert len(results) == 1 118 + assert mock_agenerate.call_args.kwargs["json_schema"] is describe_mod._SCHEMA 119 + 120 + 121 + def test_category_enum_matches_registry(): 122 + categories_dir = Path(describe_mod.__file__).resolve().parent / "categories" 123 + on_disk = {p.stem for p in categories_dir.glob("*.md")} 124 + 125 + assert set(_SCHEMA["properties"]["primary"]["enum"]) == on_disk 126 + assert set(_SCHEMA["properties"]["secondary"]["enum"]) - {"none"} == on_disk 127 + assert "none" in _SCHEMA["properties"]["secondary"]["enum"]

+5

think/batch.py

··· 52 52 max_output_tokens: int = 8192 * 2, 53 53 system_instruction: Optional[str] = None, 54 54 json_output: bool = False, 55 + json_schema: Optional[dict] = None, 55 56 thinking_budget: Optional[int] = None, 56 57 timeout_s: Optional[float] = None, 57 58 ): ··· 62 63 self.max_output_tokens = max_output_tokens 63 64 self.system_instruction = system_instruction 64 65 self.json_output = json_output 66 + self.json_schema = json_schema 65 67 self.thinking_budget = thinking_budget 66 68 self.timeout_s = timeout_s 67 69 ··· 124 126 max_output_tokens: int = 8192 * 2, 125 127 system_instruction: Optional[str] = None, 126 128 json_output: bool = False, 129 + json_schema: Optional[dict] = None, 127 130 thinking_budget: Optional[int] = None, 128 131 timeout_s: Optional[float] = None, 129 132 ) -> BatchRequest: ··· 155 158 max_output_tokens=max_output_tokens, 156 159 system_instruction=system_instruction, 157 160 json_output=json_output, 161 + json_schema=json_schema, 158 162 thinking_budget=thinking_budget, 159 163 timeout_s=timeout_s, 160 164 ) ··· 261 265 max_output_tokens=request.max_output_tokens, 262 266 system_instruction=request.system_instruction, 263 267 json_output=request.json_output, 268 + json_schema=request.json_schema, 264 269 thinking_budget=request.thinking_budget, 265 270 timeout_s=request.timeout_s, 266 271 **kwargs,

Configure Feed

Configure Feed