observe/describe: schema-constrain meeting extraction; drop legacy participant fallback

+2

observe/categories/meeting.md

··· 48 48 - **formatted_text**: Complete text extraction from the presented screen/slide, formatted in markdown. Preserve structure with headings, bullets, code blocks, etc. 49 49 50 50 Focus on accuracy. If information isn't visible or is unclear, use "unknown" or null. 51 + 52 + Return the JSON object with dict participants; do not use bare name strings.

+12 -9

observe/categories/meeting.py

··· 6 6 Renders meeting analysis JSON to rich markdown with participants and screen share. 7 7 """ 8 8 9 + import logging 9 10 from typing import Any 11 + 12 + logger = logging.getLogger(__name__) 10 13 11 14 12 15 def format(content: Any, context: dict) -> str: ··· 34 37 if participants: 35 38 lines.append("**Participants:**") 36 39 for p in participants: 37 - # Handle both dict format (new) and string format (legacy) 38 - if isinstance(p, dict): 39 - name = p.get("name", "Unknown") 40 - status = p.get("status", "unknown") 41 - video = "📹" if p.get("video") else "🔇" 42 - lines.append(f"- {video} {name} ({status})") 43 - else: 44 - # Legacy: participant is just a name string 45 - lines.append(f"- {p}") 40 + if not isinstance(p, dict): 41 + logger.warning( 42 + "meeting formatter: skipping non-dict participant: %r", p 43 + ) 44 + continue 45 + name = p.get("name", "Unknown") 46 + status = p.get("status", "unknown") 47 + video = "📹" if p.get("video") else "🔇" 48 + lines.append(f"- {video} {name} ({status})") 46 49 lines.append("") 47 50 48 51 # Screen share

+56

observe/categories/meeting.schema.json

··· 1 + { 2 + "$schema": "https://json-schema.org/draft/2020-12/schema", 3 + "$comment": "Meeting extraction output contract. Source of truth for the shape is observe/categories/meeting.md.", 4 + "type": "object", 5 + "additionalProperties": false, 6 + "required": ["platform", "participants", "screen_share"], 7 + "properties": { 8 + "platform": { 9 + "type": "string", 10 + "enum": ["zoom", "meet", "teams", "slack", "discord", "webex", "other"] 11 + }, 12 + "participants": { 13 + "type": "array", 14 + "items": { 15 + "type": "object", 16 + "additionalProperties": false, 17 + "required": ["name", "status", "video"], 18 + "properties": { 19 + "name": {"type": "string", "minLength": 1}, 20 + "status": { 21 + "type": "string", 22 + "enum": ["speaking", "muted", "active", "presenting", "unknown"] 23 + }, 24 + "video": {"type": "boolean"}, 25 + "box_2d": { 26 + "type": "array", 27 + "items": {"type": "integer", "minimum": 0}, 28 + "minItems": 4, 29 + "maxItems": 4 30 + } 31 + } 32 + } 33 + }, 34 + "screen_share": { 35 + "oneOf": [ 36 + {"type": "null"}, 37 + { 38 + "type": "object", 39 + "additionalProperties": false, 40 + "required": ["box_2d", "presenter", "description", "formatted_text"], 41 + "properties": { 42 + "box_2d": { 43 + "type": "array", 44 + "items": {"type": "integer", "minimum": 0}, 45 + "minItems": 4, 46 + "maxItems": 4 47 + }, 48 + "presenter": {"type": ["string", "null"]}, 49 + "description": {"type": "string"}, 50 + "formatted_text": {"type": "string"} 51 + } 52 + } 53 + ] 54 + } 55 + } 56 + }

+8 -1

observe/describe.py

··· 111 111 if prompt_content.text.strip(): 112 112 metadata["prompt"] = prompt_content.text 113 113 114 + schema_path = md_path.with_suffix(".schema.json") 115 + if schema_path.exists(): 116 + metadata["json_schema"] = json.loads(schema_path.read_text("utf-8")) 117 + 114 118 categories[category] = metadata 115 119 extractable = "prompt" in metadata 116 120 logger.debug(f"Loaded category: {category} (extractable={extractable})") ··· 723 727 else: 724 728 # Create new request for secondary extraction 725 729 extract_req = batch.create( 726 - contents=[], context=cat_meta["context"] 730 + contents=[], 731 + context=cat_meta["context"], 732 + json_schema=cat_meta.get("json_schema"), 727 733 ) 728 734 extract_req.frame_id = req.frame_id 729 735 extract_req.timestamp = req.timestamp ··· 752 758 model=cat_model, 753 759 system_instruction=cat_meta["prompt"] + redact_instruction, 754 760 json_output=is_json, 761 + json_schema=cat_meta.get("json_schema"), 755 762 max_output_tokens=10240 if is_json else 8192, 756 763 thinking_budget=6144 if is_json else 4096, 757 764 context=cat_meta["context"],

+8 -2

tests/test_formatters.py

··· 324 324 "timestamp": 0, 325 325 "analysis": {"primary": "meeting"}, 326 326 "content": { 327 - "meeting": {"participants": ["Alice", "Bob"]}, 327 + "meeting": { 328 + "participants": [ 329 + {"name": "Alice", "status": "active", "video": True}, 330 + {"name": "Bob", "status": "muted", "video": False}, 331 + ] 332 + }, 328 333 }, 329 334 } 330 335 ] ··· 333 338 334 339 # New meeting formatter uses "**Meeting** (platform)" format 335 340 assert "**Meeting**" in chunks[0]["markdown"] 336 - assert "Alice" in chunks[0]["markdown"] 341 + assert "📹 Alice (active)" in chunks[0]["markdown"] 342 + assert "🔇 Bob (muted)" in chunks[0]["markdown"] 337 343 338 344 def test_format_screen_extracts_metadata(self): 339 345 """Test that metadata line is extracted and not treated as a frame."""

+168

tests/test_meeting_schema.py

··· 1 + # SPDX-License-Identifier: AGPL-3.0-only 2 + # Copyright (c) 2026 sol pbc 3 + 4 + import json 5 + from pathlib import Path 6 + from unittest.mock import AsyncMock, patch 7 + 8 + import pytest 9 + from jsonschema import Draft202012Validator 10 + 11 + from observe import describe as describe_mod 12 + from observe.categories import meeting as meeting_mod 13 + from think.batch import Batch 14 + 15 + 16 + def _load_schema() -> dict: 17 + return json.loads( 18 + ( 19 + Path(describe_mod.__file__).resolve().parent 20 + / "categories" 21 + / "meeting.schema.json" 22 + ).read_text(encoding="utf-8") 23 + ) 24 + 25 + 26 + def test_meeting_schema_file_is_valid_draft_2020_12(): 27 + Draft202012Validator.check_schema(_load_schema()) 28 + 29 + 30 + def test_meeting_schema_accepts_and_rejects_expected_values(): 31 + validator = Draft202012Validator(_load_schema()) 32 + 33 + assert validator.is_valid( 34 + { 35 + "platform": "zoom", 36 + "participants": [ 37 + {"name": "Alice", "status": "active", "video": True}, 38 + ], 39 + "screen_share": None, 40 + } 41 + ) 42 + assert validator.is_valid( 43 + { 44 + "platform": "teams", 45 + "participants": [ 46 + { 47 + "name": "Bob", 48 + "status": "presenting", 49 + "video": True, 50 + "box_2d": [0, 10, 20, 30], 51 + }, 52 + ], 53 + "screen_share": { 54 + "box_2d": [40, 50, 60, 70], 55 + "presenter": "Bob", 56 + "description": "Showing a roadmap deck.", 57 + "formatted_text": "# Roadmap", 58 + }, 59 + } 60 + ) 61 + assert not validator.is_valid( 62 + { 63 + "platform": "hangouts", 64 + "participants": [ 65 + {"name": "Alice", "status": "active", "video": True}, 66 + ], 67 + "screen_share": None, 68 + } 69 + ) 70 + assert not validator.is_valid( 71 + { 72 + "platform": "zoom", 73 + "participants": [ 74 + {"name": "Alice", "status": "talking", "video": True}, 75 + ], 76 + "screen_share": None, 77 + } 78 + ) 79 + assert not validator.is_valid( 80 + { 81 + "platform": "zoom", 82 + "participants": ["Alice"], 83 + "screen_share": None, 84 + } 85 + ) 86 + assert not validator.is_valid( 87 + { 88 + "platform": "zoom", 89 + "participants": [ 90 + {"name": "Alice", "status": "active", "video": True}, 91 + ], 92 + "screen_share": None, 93 + "extra": True, 94 + } 95 + ) 96 + assert not validator.is_valid( 97 + { 98 + "platform": "zoom", 99 + "participants": [ 100 + {"status": "active", "video": True}, 101 + ], 102 + "screen_share": None, 103 + } 104 + ) 105 + assert not validator.is_valid( 106 + { 107 + "platform": "zoom", 108 + "participants": [ 109 + {"name": "", "status": "active", "video": True}, 110 + ], 111 + "screen_share": None, 112 + } 113 + ) 114 + 115 + 116 + def test_discover_categories_attaches_meeting_schema(): 117 + expected = _load_schema() 118 + 119 + assert describe_mod.CATEGORIES["meeting"]["json_schema"] == expected 120 + assert [ 121 + name 122 + for name, meta in describe_mod.CATEGORIES.items() 123 + if name != "meeting" and "json_schema" in meta 124 + ] == [] 125 + 126 + 127 + @pytest.mark.asyncio 128 + @patch("think.batch.agenerate", new_callable=AsyncMock) 129 + async def test_meeting_extract_batch_call_passes_schema(mock_agenerate): 130 + mock_agenerate.return_value = ( 131 + '{"platform":"zoom","participants":[{"name":"Alice","status":"active",' 132 + '"video":true}],"screen_share":null}' 133 + ) 134 + 135 + cat_meta = describe_mod.CATEGORIES["meeting"] 136 + batch = Batch(max_concurrent=1) 137 + req = batch.create( 138 + contents="Analyze this meeting screenshot.", 139 + context=cat_meta["context"], 140 + json_schema=cat_meta["json_schema"], 141 + ) 142 + batch.add(req) 143 + 144 + results = [] 145 + async for completed_req in batch.drain_batch(): 146 + results.append(completed_req) 147 + 148 + assert len(results) == 1 149 + assert mock_agenerate.call_args.kwargs["json_schema"] == _load_schema() 150 + 151 + 152 + def test_meeting_formatter_skips_non_dict_participant(caplog): 153 + with caplog.at_level("WARNING", logger="observe.categories.meeting"): 154 + result = meeting_mod.format( 155 + { 156 + "platform": "zoom", 157 + "participants": [ 158 + "Alice", 159 + {"name": "Bob", "status": "active", "video": False}, 160 + ], 161 + "screen_share": None, 162 + }, 163 + {}, 164 + ) 165 + 166 + assert "🔇 Bob (active)" in result 167 + assert "Alice" not in result 168 + assert "skipping non-dict participant" in caplog.text

Configure Feed

Configure Feed