personal memory agent
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

Add speakers app for voiceprint management

New Convey app for managing speaker voiceprints and identity matching:
- Scan segments for speaker embeddings and match against known entities
- Assign voiceprints to existing entities or create new ones
- Uses cosine similarity with 0.4 threshold for speaker matching
- Averages multiple voiceprints per entity for improved accuracy
- Includes audio playback for verification

Bundled tests following APPS.md pattern with self-contained fixtures.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

+1370
+6
apps/speakers/app.json
··· 1 + { 2 + "icon": "🎙️", 3 + "label": "Speakers", 4 + "date_nav": true, 5 + "facets": true 6 + }
+1
apps/speakers/app_bar.html
··· 1 + {% include 'date_nav.html' %}
+440
apps/speakers/routes.py
··· 1 + """Speaker voiceprint management app.""" 2 + 3 + from __future__ import annotations 4 + 5 + import logging 6 + import os 7 + import re 8 + import time 9 + from datetime import date 10 + from pathlib import Path 11 + from typing import Any 12 + 13 + import numpy as np 14 + from flask import ( 15 + Blueprint, 16 + jsonify, 17 + redirect, 18 + render_template, 19 + request, 20 + send_file, 21 + url_for, 22 + ) 23 + 24 + from convey import state 25 + from convey.utils import DATE_RE, error_response, format_date, success_response 26 + from think.entities import ( 27 + ensure_entity_folder, 28 + entity_folder_path, 29 + load_entities, 30 + save_entities, 31 + ) 32 + from think.utils import day_dirs, day_path 33 + from think.utils import segment_key as validate_segment_key 34 + from think.utils import segment_parse 35 + 36 + logger = logging.getLogger(__name__) 37 + 38 + speakers_bp = Blueprint( 39 + "app:speakers", 40 + __name__, 41 + url_prefix="/app/speakers", 42 + ) 43 + 44 + 45 + def _normalize_embedding(emb: np.ndarray) -> np.ndarray | None: 46 + """L2-normalize an embedding vector. Returns None if norm is zero.""" 47 + emb = emb.astype(np.float32) 48 + norm = np.linalg.norm(emb) 49 + if norm > 0: 50 + return emb / norm 51 + return None 52 + 53 + 54 + def _scan_segment_embeddings(day: str) -> list[dict]: 55 + """Scan a day for segments with speaker embeddings. 56 + 57 + Returns list of segment info dicts with keys: 58 + - key: segment directory name (HHMMSS_LEN) 59 + - start: formatted start time (HH:MM) 60 + - end: formatted end time (HH:MM) 61 + - duration: duration in seconds 62 + - speakers: list of speaker labels 63 + """ 64 + day_dir = day_path(day) 65 + if not day_dir.is_dir(): 66 + return [] 67 + 68 + segments = [] 69 + for item in sorted(os.listdir(day_dir)): 70 + item_path = day_dir / item 71 + if not item_path.is_dir(): 72 + continue 73 + 74 + # Validate segment key format 75 + parsed = segment_parse(item) 76 + if parsed[0] is None: 77 + continue 78 + 79 + start_time, end_time = parsed 80 + 81 + # Check for audio embeddings subdir 82 + audio_dir = item_path / "audio" 83 + if not audio_dir.is_dir(): 84 + continue 85 + 86 + # Find speaker embedding files 87 + npz_files = list(audio_dir.glob("*.npz")) 88 + if not npz_files: 89 + continue 90 + 91 + speakers = [f.stem for f in npz_files] 92 + 93 + # Calculate duration from start and end times 94 + start_seconds = ( 95 + start_time.hour * 3600 + start_time.minute * 60 + start_time.second 96 + ) 97 + end_seconds = end_time.hour * 3600 + end_time.minute * 60 + end_time.second 98 + duration = end_seconds - start_seconds 99 + 100 + segments.append( 101 + { 102 + "key": item, 103 + "start": f"{start_time.hour:02d}:{start_time.minute:02d}", 104 + "end": f"{end_time.hour:02d}:{end_time.minute:02d}", 105 + "duration": duration, 106 + "speakers": speakers, 107 + } 108 + ) 109 + 110 + return segments 111 + 112 + 113 + def _load_segment_speaker_embedding( 114 + day: str, segment_key: str, speaker: str 115 + ) -> np.ndarray | None: 116 + """Load a speaker's embedding from a segment.""" 117 + emb_path = day_path(day) / segment_key / "audio" / f"{speaker}.npz" 118 + if not emb_path.exists(): 119 + return None 120 + 121 + data = np.load(emb_path) 122 + return _normalize_embedding(data["embedding"]) 123 + 124 + 125 + def _scan_entity_voiceprints(facet: str) -> dict[str, np.ndarray]: 126 + """Scan entities in a facet for voiceprints. 127 + 128 + Returns dict mapping entity name to averaged embedding. 129 + Each entity may have multiple voiceprint files (day_segment.npz). 130 + """ 131 + try: 132 + entities = load_entities(facet) 133 + except RuntimeError: 134 + return {} 135 + 136 + voiceprints = {} 137 + for entity in entities: 138 + name = entity.get("name", "") 139 + if not name: 140 + continue 141 + 142 + try: 143 + folder = entity_folder_path(facet, name) 144 + except (RuntimeError, ValueError): 145 + continue 146 + 147 + if not folder.is_dir(): 148 + continue 149 + 150 + # Find all voiceprint files (pattern: YYYYMMDD_HHMMSS_LEN.npz) 151 + npz_files = list(folder.glob("*_*.npz")) 152 + if not npz_files: 153 + continue 154 + 155 + # Load and average all voiceprints 156 + embeddings = [] 157 + for npz_path in npz_files: 158 + try: 159 + data = np.load(npz_path) 160 + emb = _normalize_embedding(data["embedding"]) 161 + if emb is not None: 162 + embeddings.append(emb) 163 + except Exception as e: 164 + logger.warning("Failed to load voiceprint %s: %s", npz_path, e) 165 + continue 166 + 167 + if embeddings: 168 + # Average all embeddings and re-normalize 169 + avg_emb = _normalize_embedding(np.mean(embeddings, axis=0)) 170 + if avg_emb is not None: 171 + voiceprints[name] = avg_emb 172 + 173 + return voiceprints 174 + 175 + 176 + def _compute_matches( 177 + segment_emb: np.ndarray, known_embs: dict[str, np.ndarray] 178 + ) -> dict[str, float]: 179 + """Compute cosine similarity between segment embedding and known voiceprints.""" 180 + if not known_embs: 181 + return {} 182 + 183 + matches = {} 184 + for name, known_emb in known_embs.items(): 185 + # Cosine similarity via dot product (both are L2-normalized) 186 + score = float(np.dot(segment_emb, known_emb)) 187 + if score >= 0.4: # Only include matches above threshold 188 + matches[name] = round(score, 4) 189 + 190 + return matches 191 + 192 + 193 + def _save_voiceprint_to_entity( 194 + facet: str, entity_name: str, day: str, segment_key: str, embedding: np.ndarray 195 + ) -> Path: 196 + """Save a voiceprint embedding to an entity's folder.""" 197 + folder = ensure_entity_folder(facet, entity_name) 198 + filename = f"{day}_{segment_key}.npz" 199 + emb_path = folder / filename 200 + np.savez_compressed(emb_path, embedding=embedding) 201 + return emb_path 202 + 203 + 204 + @speakers_bp.route("/") 205 + def index() -> Any: 206 + """Redirect to today's view.""" 207 + today = date.today().strftime("%Y%m%d") 208 + return redirect(url_for("app:speakers.speakers_day", day=today)) 209 + 210 + 211 + @speakers_bp.route("/<day>") 212 + def speakers_day(day: str) -> str: 213 + """Render speaker management view for a specific day.""" 214 + if not re.fullmatch(DATE_RE.pattern, day): 215 + return "", 404 216 + 217 + title = format_date(day) 218 + return render_template("app.html", title=title) 219 + 220 + 221 + @speakers_bp.route("/api/stats/<month>") 222 + def api_stats(month: str) -> Any: 223 + """Return segment counts for each day in a month. 224 + 225 + Used by calendar heatmap to show days with speaker embeddings. 226 + """ 227 + if not re.fullmatch(r"\d{6}", month): 228 + return error_response("Invalid month format, expected YYYYMM", 400) 229 + 230 + stats: dict[str, int] = {} 231 + 232 + for day_name in day_dirs().keys(): 233 + if not day_name.startswith(month): 234 + continue 235 + 236 + segments = _scan_segment_embeddings(day_name) 237 + if segments: 238 + stats[day_name] = len(segments) 239 + 240 + return jsonify(stats) 241 + 242 + 243 + @speakers_bp.route("/api/segments/<day>") 244 + def api_segments(day: str) -> Any: 245 + """Return segments with speaker embeddings for a day.""" 246 + if not re.fullmatch(DATE_RE.pattern, day): 247 + return error_response("Invalid day format", 400) 248 + 249 + segments = _scan_segment_embeddings(day) 250 + return jsonify({"segments": segments}) 251 + 252 + 253 + @speakers_bp.route("/api/segment/<day>/<segment_key>") 254 + def api_segment_detail(day: str, segment_key: str) -> Any: 255 + """Return segment detail with speaker match results.""" 256 + if not re.fullmatch(DATE_RE.pattern, day): 257 + return error_response("Invalid day format", 400) 258 + 259 + if not validate_segment_key(segment_key): 260 + return error_response("Invalid segment key", 400) 261 + 262 + # Get selected facet from cookie 263 + selected_facet = request.cookies.get("selected_facet") 264 + if not selected_facet: 265 + return error_response("No facet selected", 400) 266 + 267 + # Load ALL entities in the facet for dropdown 268 + try: 269 + all_entities = load_entities(selected_facet) 270 + all_entity_names = [e.get("name") for e in all_entities if e.get("name")] 271 + except RuntimeError: 272 + all_entity_names = [] 273 + 274 + # Load known voiceprints for matching 275 + known_voiceprints = _scan_entity_voiceprints(selected_facet) 276 + 277 + # Load segment speaker embeddings 278 + audio_dir = day_path(day) / segment_key / "audio" 279 + if not audio_dir.is_dir(): 280 + return error_response("Segment has no speaker embeddings", 404) 281 + 282 + speakers = [] 283 + for npz_path in sorted(audio_dir.glob("*.npz")): 284 + speaker_label = npz_path.stem 285 + emb = _load_segment_speaker_embedding(day, segment_key, speaker_label) 286 + if emb is None: 287 + continue 288 + 289 + matches = _compute_matches(emb, known_voiceprints) 290 + speakers.append( 291 + { 292 + "label": speaker_label, 293 + "matches": matches, 294 + } 295 + ) 296 + 297 + # Get audio file URL if available 298 + audio_file = None 299 + segment_dir = day_path(day) / segment_key 300 + audio_files = list(segment_dir.glob("*audio.flac")) 301 + if audio_files: 302 + rel_path = f"{segment_key}/{audio_files[0].name}" 303 + audio_file = ( 304 + f"/app/speakers/api/serve_audio/{day}/{rel_path.replace('/', '__')}" 305 + ) 306 + 307 + return jsonify( 308 + { 309 + "speakers": speakers, 310 + "all_entities": all_entity_names, 311 + "audio_file": audio_file, 312 + "facet": selected_facet, 313 + } 314 + ) 315 + 316 + 317 + @speakers_bp.route("/api/serve_audio/<day>/<path:encoded_path>") 318 + def serve_audio(day: str, encoded_path: str) -> Any: 319 + """Serve audio files for playback.""" 320 + if not re.fullmatch(DATE_RE.pattern, day): 321 + return "", 404 322 + 323 + try: 324 + rel_path = encoded_path.replace("__", "/") 325 + full_path = os.path.join(state.journal_root, day, rel_path) 326 + 327 + day_dir = str(day_path(day)) 328 + if not os.path.commonpath([full_path, day_dir]) == day_dir: 329 + return "", 403 330 + 331 + if not os.path.isfile(full_path): 332 + return "", 404 333 + 334 + return send_file(full_path) 335 + 336 + except Exception as e: 337 + logger.warning("Error serving audio %s/%s: %s", day, encoded_path, e) 338 + return "", 404 339 + 340 + 341 + @speakers_bp.route("/api/save-voiceprint", methods=["POST"]) 342 + def api_save_voiceprint() -> Any: 343 + """Save a voiceprint from a segment speaker to an existing entity.""" 344 + data = request.get_json() 345 + if not data: 346 + return error_response("No data provided", 400) 347 + 348 + facet = data.get("facet") 349 + entity_name = data.get("entity_name") 350 + day = data.get("day") 351 + segment_key = data.get("segment_key") 352 + speaker_label = data.get("speaker_label") 353 + 354 + if not all([facet, entity_name, day, segment_key, speaker_label]): 355 + return error_response("Missing required fields", 400) 356 + 357 + # Validate day and segment_key formats 358 + if not re.fullmatch(DATE_RE.pattern, day): 359 + return error_response("Invalid day format", 400) 360 + if not validate_segment_key(segment_key): 361 + return error_response("Invalid segment key", 400) 362 + 363 + # Validate entity exists 364 + entities = load_entities(facet) 365 + entity_names = [e.get("name") for e in entities] 366 + if entity_name not in entity_names: 367 + return error_response( 368 + f"Entity '{entity_name}' not found in facet '{facet}'", 404 369 + ) 370 + 371 + # Load speaker embedding 372 + emb = _load_segment_speaker_embedding(day, segment_key, speaker_label) 373 + if emb is None: 374 + return error_response("Speaker embedding not found", 404) 375 + 376 + # Save voiceprint 377 + try: 378 + emb_path = _save_voiceprint_to_entity(facet, entity_name, day, segment_key, emb) 379 + return success_response({"path": str(emb_path)}) 380 + except Exception as e: 381 + logger.exception("Failed to save voiceprint for %s", entity_name) 382 + return error_response(f"Failed to save voiceprint: {e}", 500) 383 + 384 + 385 + @speakers_bp.route("/api/create-entity-voiceprint", methods=["POST"]) 386 + def api_create_entity_voiceprint() -> Any: 387 + """Create a new entity with a voiceprint.""" 388 + data = request.get_json() 389 + if not data: 390 + return error_response("No data provided", 400) 391 + 392 + facet = data.get("facet") 393 + entity_type = data.get("type", "Person") 394 + entity_name = data.get("name") 395 + entity_description = data.get("description", "") 396 + day = data.get("day") 397 + segment_key = data.get("segment_key") 398 + speaker_label = data.get("speaker_label") 399 + 400 + if not all([facet, entity_name, day, segment_key, speaker_label]): 401 + return error_response("Missing required fields", 400) 402 + 403 + # Validate day and segment_key formats 404 + if not re.fullmatch(DATE_RE.pattern, day): 405 + return error_response("Invalid day format", 400) 406 + if not validate_segment_key(segment_key): 407 + return error_response("Invalid segment key", 400) 408 + 409 + # Check entity doesn't already exist 410 + entities = load_entities(facet, include_detached=True) 411 + entity_names = [e.get("name") for e in entities] 412 + if entity_name in entity_names: 413 + return error_response( 414 + f"Entity '{entity_name}' already exists in facet '{facet}'", 409 415 + ) 416 + 417 + # Load speaker embedding 418 + emb = _load_segment_speaker_embedding(day, segment_key, speaker_label) 419 + if emb is None: 420 + return error_response("Speaker embedding not found", 404) 421 + 422 + # Create new entity 423 + new_entity = { 424 + "type": entity_type, 425 + "name": entity_name, 426 + "description": entity_description, 427 + "attached_at": int(time.time() * 1000), 428 + } 429 + entities.append(new_entity) 430 + save_entities(facet, entities) 431 + 432 + # Save voiceprint 433 + try: 434 + emb_path = _save_voiceprint_to_entity(facet, entity_name, day, segment_key, emb) 435 + return success_response( 436 + {"entity": new_entity, "voiceprint_path": str(emb_path)} 437 + ) 438 + except Exception as e: 439 + logger.exception("Failed to save voiceprint for new entity %s", entity_name) 440 + return error_response(f"Failed to save voiceprint: {e}", 500)
apps/speakers/tests/__init__.py

This is a binary file and will not be displayed.

+99
apps/speakers/tests/conftest.py
··· 1 + """Self-contained fixtures for speakers app tests. 2 + 3 + These fixtures are fully standalone and only depend on pytest builtins. 4 + No shared dependencies from the root conftest.py are required. 5 + """ 6 + 7 + from __future__ import annotations 8 + 9 + import json 10 + from pathlib import Path 11 + 12 + import numpy as np 13 + import pytest 14 + 15 + from think.entities import normalize_entity_name 16 + 17 + 18 + @pytest.fixture 19 + def speakers_env(tmp_path, monkeypatch): 20 + """Create a temporary journal environment for speaker tests. 21 + 22 + Provides helpers to create: 23 + - Day directories with segment speaker embeddings 24 + - Facets with entities and voiceprints 25 + 26 + Usage: 27 + def test_example(speakers_env): 28 + env = speakers_env() 29 + env.create_segment("20240101", "143022_300", ["Speaker 1", "Speaker 2"]) 30 + env.create_entity("test", "Alice Test") 31 + # Now JOURNAL_PATH is set and data exists 32 + """ 33 + 34 + class SpeakersEnv: 35 + def __init__(self, journal_path: Path): 36 + self.journal = journal_path 37 + monkeypatch.setenv("JOURNAL_PATH", str(journal_path)) 38 + 39 + def create_segment( 40 + self, day: str, segment_key: str, speakers: list[str] 41 + ) -> Path: 42 + """Create a segment with speaker embedding files.""" 43 + audio_dir = self.journal / day / segment_key / "audio" 44 + audio_dir.mkdir(parents=True, exist_ok=True) 45 + 46 + for speaker in speakers: 47 + emb = np.random.randn(256).astype(np.float32) 48 + emb = emb / np.linalg.norm(emb) 49 + np.savez_compressed(audio_dir / f"{speaker}.npz", embedding=emb) 50 + 51 + return audio_dir 52 + 53 + def create_embedding(self, vector: list[float] | None = None) -> np.ndarray: 54 + """Create a normalized 256-dim embedding.""" 55 + if vector is None: 56 + emb = np.random.randn(256).astype(np.float32) 57 + else: 58 + emb = np.array(vector + [0.0] * (256 - len(vector)), dtype=np.float32) 59 + return emb / np.linalg.norm(emb) 60 + 61 + def create_entity( 62 + self, 63 + facet: str, 64 + name: str, 65 + voiceprints: list[tuple[str, str]] | None = None, 66 + ) -> Path: 67 + """Create an entity with optional voiceprint files. 68 + 69 + Args: 70 + facet: Facet name 71 + name: Entity name 72 + voiceprints: Optional list of (day, segment_key) tuples for voiceprints 73 + """ 74 + facet_dir = self.journal / "facets" / facet 75 + facet_dir.mkdir(parents=True, exist_ok=True) 76 + 77 + # Create entities.jsonl 78 + entities_file = facet_dir / "entities.jsonl" 79 + entity_data = {"type": "Person", "name": name, "description": "Test entity"} 80 + with open(entities_file, "a", encoding="utf-8") as f: 81 + f.write(json.dumps(entity_data) + "\n") 82 + 83 + # Create entity folder with voiceprints if specified 84 + if voiceprints: 85 + entity_dir = facet_dir / "entities" / normalize_entity_name(name) 86 + entity_dir.mkdir(parents=True, exist_ok=True) 87 + 88 + for day, segment_key in voiceprints: 89 + emb = self.create_embedding() 90 + np.savez_compressed( 91 + entity_dir / f"{day}_{segment_key}.npz", embedding=emb 92 + ) 93 + 94 + return facet_dir 95 + 96 + def _create(): 97 + return SpeakersEnv(tmp_path) 98 + 99 + return _create
+148
apps/speakers/tests/test_routes.py
··· 1 + """Tests for speakers app.""" 2 + 3 + import numpy as np 4 + 5 + 6 + def test_scan_segment_embeddings_empty(speakers_env): 7 + """Test scanning when no embeddings exist.""" 8 + from apps.speakers.routes import _scan_segment_embeddings 9 + 10 + env = speakers_env() 11 + 12 + # Create a day dir but no segments 13 + day_dir = env.journal / "20240101" 14 + day_dir.mkdir() 15 + 16 + segments = _scan_segment_embeddings("20240101") 17 + assert segments == [] 18 + 19 + 20 + def test_scan_segment_embeddings_with_data(speakers_env): 21 + """Test scanning when embeddings exist.""" 22 + from apps.speakers.routes import _scan_segment_embeddings 23 + 24 + env = speakers_env() 25 + env.create_segment("20240101", "143022_300", ["Speaker 1", "Speaker 2"]) 26 + 27 + segments = _scan_segment_embeddings("20240101") 28 + assert len(segments) == 1 29 + assert segments[0]["key"] == "143022_300" 30 + assert segments[0]["start"] == "14:30" 31 + assert segments[0]["end"] == "14:35" 32 + assert segments[0]["duration"] == 300 33 + assert set(segments[0]["speakers"]) == {"Speaker 1", "Speaker 2"} 34 + 35 + 36 + def test_load_segment_speaker_embedding(speakers_env): 37 + """Test loading a speaker embedding.""" 38 + from apps.speakers.routes import _load_segment_speaker_embedding 39 + 40 + env = speakers_env() 41 + 42 + # Create segment with specific embedding 43 + audio_dir = env.journal / "20240101" / "143022_300" / "audio" 44 + audio_dir.mkdir(parents=True) 45 + emb = np.array([1.0, 0.0, 0.0] + [0.0] * 253, dtype=np.float32) 46 + np.savez_compressed(audio_dir / "Speaker 1.npz", embedding=emb) 47 + 48 + loaded = _load_segment_speaker_embedding("20240101", "143022_300", "Speaker 1") 49 + assert loaded is not None 50 + assert loaded.shape == (256,) 51 + assert np.isclose(np.linalg.norm(loaded), 1.0) 52 + 53 + 54 + def test_compute_matches(): 55 + """Test matching algorithm.""" 56 + from apps.speakers.routes import _compute_matches 57 + 58 + # Create segment embedding 59 + seg_emb = np.array([1.0, 0.0, 0.0] + [0.0] * 253, dtype=np.float32) 60 + 61 + # Create known voiceprints with varying similarity 62 + known = { 63 + "Alice": np.array( 64 + [1.0, 0.0, 0.0] + [0.0] * 253, dtype=np.float32 65 + ), # Perfect match 66 + "Bob": np.array( 67 + [0.7, 0.7, 0.0] + [0.0] * 253, dtype=np.float32 68 + ), # Partial match 69 + "Charlie": np.array( 70 + [0.0, 1.0, 0.0] + [0.0] * 253, dtype=np.float32 71 + ), # No match 72 + } 73 + # Normalize 74 + for name in known: 75 + known[name] = known[name] / np.linalg.norm(known[name]) 76 + 77 + matches = _compute_matches(seg_emb, known) 78 + 79 + # Alice should have perfect match (1.0) 80 + assert "Alice" in matches 81 + assert matches["Alice"] >= 0.99 82 + 83 + # Bob should have partial match (~0.7) 84 + assert "Bob" in matches 85 + assert 0.65 <= matches["Bob"] <= 0.75 86 + 87 + # Charlie should be below threshold (0.4), not included 88 + assert "Charlie" not in matches 89 + 90 + 91 + def test_scan_entity_voiceprints_averaging(speakers_env): 92 + """Test that multiple voiceprints are averaged.""" 93 + from apps.speakers.routes import _scan_entity_voiceprints 94 + from think.entities import normalize_entity_name 95 + 96 + env = speakers_env() 97 + 98 + # Create facet with entity 99 + facet_dir = env.journal / "facets" / "test" 100 + facet_dir.mkdir(parents=True) 101 + 102 + # Create entities.jsonl 103 + (facet_dir / "entities.jsonl").write_text( 104 + '{"type": "Person", "name": "Alice Test", "description": "Test"}\n' 105 + ) 106 + 107 + # Create entity folder with multiple voiceprints 108 + entity_dir = facet_dir / "entities" / normalize_entity_name("Alice Test") 109 + entity_dir.mkdir(parents=True) 110 + 111 + # Create two voiceprints 112 + emb1 = np.array([1.0, 0.0, 0.0] + [0.0] * 253, dtype=np.float32) 113 + emb2 = np.array([0.8, 0.6, 0.0] + [0.0] * 253, dtype=np.float32) 114 + emb2 = emb2 / np.linalg.norm(emb2) 115 + 116 + np.savez_compressed(entity_dir / "20240101_120000_300.npz", embedding=emb1) 117 + np.savez_compressed(entity_dir / "20240102_130000_300.npz", embedding=emb2) 118 + 119 + voiceprints = _scan_entity_voiceprints("test") 120 + 121 + assert "Alice Test" in voiceprints 122 + avg_emb = voiceprints["Alice Test"] 123 + assert avg_emb.shape == (256,) 124 + assert np.isclose(np.linalg.norm(avg_emb), 1.0) # Should be normalized 125 + 126 + 127 + def test_save_voiceprint_to_entity(speakers_env): 128 + """Test saving voiceprint to entity folder.""" 129 + from apps.speakers.routes import _save_voiceprint_to_entity 130 + 131 + env = speakers_env() 132 + 133 + # Create facet 134 + facet_dir = env.journal / "facets" / "test" 135 + facet_dir.mkdir(parents=True) 136 + 137 + emb = np.array([1.0, 0.0, 0.0] + [0.0] * 253, dtype=np.float32) 138 + 139 + path = _save_voiceprint_to_entity("test", "John Doe", "20240101", "143022_300", emb) 140 + 141 + assert path.exists() 142 + assert path.name == "20240101_143022_300.npz" 143 + assert "john_doe" in str(path.parent) 144 + 145 + # Verify content 146 + data = np.load(path) 147 + assert "embedding" in data 148 + assert data["embedding"].shape == (256,)
+676
apps/speakers/workspace.html
··· 1 + {# Speaker voiceprint management #} 2 + 3 + <style> 4 + /* Speakers app styles - all classes prefixed with .spk- */ 5 + 6 + .spk-wrap { 7 + max-width: 1200px; 8 + margin: 0 auto; 9 + padding: 16px 24px; 10 + } 11 + 12 + .spk-layout { 13 + display: grid; 14 + grid-template-columns: 280px 1fr; 15 + gap: 24px; 16 + min-height: 400px; 17 + } 18 + 19 + /* Segment list panel */ 20 + .spk-segments { 21 + background: #fff; 22 + border-radius: 12px; 23 + border: 1px solid #e5e7eb; 24 + overflow: hidden; 25 + } 26 + 27 + .spk-segments-header { 28 + padding: 12px 16px; 29 + border-bottom: 1px solid #e5e7eb; 30 + font-weight: 600; 31 + font-size: 14px; 32 + color: #374151; 33 + } 34 + 35 + .spk-segments-list { 36 + max-height: 500px; 37 + overflow-y: auto; 38 + } 39 + 40 + .spk-segment { 41 + padding: 12px 16px; 42 + border-bottom: 1px solid #f3f4f6; 43 + cursor: pointer; 44 + transition: background 0.15s; 45 + } 46 + 47 + .spk-segment:hover { 48 + background: #f9fafb; 49 + } 50 + 51 + .spk-segment.active { 52 + background: #eff6ff; 53 + border-left: 3px solid #3b82f6; 54 + } 55 + 56 + .spk-segment-time { 57 + font-weight: 500; 58 + font-size: 14px; 59 + color: #1f2937; 60 + } 61 + 62 + .spk-segment-meta { 63 + font-size: 12px; 64 + color: #6b7280; 65 + margin-top: 4px; 66 + } 67 + 68 + .spk-segment-speakers { 69 + display: flex; 70 + gap: 6px; 71 + margin-top: 6px; 72 + flex-wrap: wrap; 73 + } 74 + 75 + .spk-speaker-badge { 76 + display: inline-block; 77 + padding: 2px 8px; 78 + background: #e5e7eb; 79 + border-radius: 12px; 80 + font-size: 11px; 81 + color: #4b5563; 82 + } 83 + 84 + .spk-empty { 85 + padding: 24px; 86 + text-align: center; 87 + color: #9ca3af; 88 + font-size: 14px; 89 + } 90 + 91 + /* Detail panel */ 92 + .spk-detail { 93 + background: #fff; 94 + border-radius: 12px; 95 + border: 1px solid #e5e7eb; 96 + padding: 20px; 97 + } 98 + 99 + .spk-detail-header { 100 + display: flex; 101 + align-items: center; 102 + justify-content: space-between; 103 + margin-bottom: 16px; 104 + } 105 + 106 + .spk-detail-title { 107 + font-size: 18px; 108 + font-weight: 600; 109 + color: #1f2937; 110 + } 111 + 112 + .spk-audio-player { 113 + margin-bottom: 20px; 114 + } 115 + 116 + .spk-audio-player audio { 117 + width: 100%; 118 + } 119 + 120 + /* Match table */ 121 + .spk-matches { 122 + margin-top: 16px; 123 + } 124 + 125 + .spk-matches-title { 126 + font-size: 14px; 127 + font-weight: 600; 128 + color: #374151; 129 + margin-bottom: 12px; 130 + } 131 + 132 + .spk-match-table { 133 + width: 100%; 134 + border-collapse: collapse; 135 + font-size: 13px; 136 + } 137 + 138 + .spk-match-table th, 139 + .spk-match-table td { 140 + padding: 10px 12px; 141 + text-align: left; 142 + border-bottom: 1px solid #e5e7eb; 143 + } 144 + 145 + .spk-match-table th { 146 + background: #f9fafb; 147 + font-weight: 500; 148 + color: #6b7280; 149 + } 150 + 151 + .spk-match-table td:first-child { 152 + font-weight: 500; 153 + } 154 + 155 + .spk-score { 156 + display: inline-block; 157 + padding: 2px 8px; 158 + border-radius: 4px; 159 + font-family: monospace; 160 + font-size: 12px; 161 + } 162 + 163 + .spk-score-high { 164 + background: #dcfce7; 165 + color: #166534; 166 + } 167 + 168 + .spk-score-med { 169 + background: #fef9c3; 170 + color: #854d0e; 171 + } 172 + 173 + .spk-score-low { 174 + background: #f3f4f6; 175 + color: #6b7280; 176 + } 177 + 178 + .spk-no-match { 179 + color: #9ca3af; 180 + font-style: italic; 181 + } 182 + 183 + /* Actions */ 184 + .spk-actions { 185 + display: flex; 186 + gap: 8px; 187 + align-items: center; 188 + } 189 + 190 + .spk-select { 191 + padding: 6px 12px; 192 + border: 1px solid #d1d5db; 193 + border-radius: 6px; 194 + font-size: 13px; 195 + background: #fff; 196 + min-width: 150px; 197 + } 198 + 199 + .spk-btn { 200 + padding: 6px 14px; 201 + border: none; 202 + border-radius: 6px; 203 + font-size: 13px; 204 + cursor: pointer; 205 + transition: background 0.15s; 206 + } 207 + 208 + .spk-btn-primary { 209 + background: #3b82f6; 210 + color: #fff; 211 + } 212 + 213 + .spk-btn-primary:hover { 214 + background: #2563eb; 215 + } 216 + 217 + .spk-btn-primary:disabled { 218 + background: #93c5fd; 219 + cursor: not-allowed; 220 + } 221 + 222 + .spk-btn-secondary { 223 + background: #f3f4f6; 224 + color: #374151; 225 + } 226 + 227 + .spk-btn-secondary:hover { 228 + background: #e5e7eb; 229 + } 230 + 231 + /* Create entity form */ 232 + .spk-create-form { 233 + margin-top: 16px; 234 + padding: 16px; 235 + background: #f9fafb; 236 + border-radius: 8px; 237 + border: 1px solid #e5e7eb; 238 + } 239 + 240 + .spk-form-row { 241 + margin-bottom: 12px; 242 + } 243 + 244 + .spk-form-row label { 245 + display: block; 246 + font-size: 12px; 247 + font-weight: 500; 248 + color: #6b7280; 249 + margin-bottom: 4px; 250 + } 251 + 252 + .spk-form-row input, 253 + .spk-form-row select { 254 + width: 100%; 255 + padding: 8px 12px; 256 + border: 1px solid #d1d5db; 257 + border-radius: 6px; 258 + font-size: 13px; 259 + } 260 + 261 + .spk-form-actions { 262 + display: flex; 263 + gap: 8px; 264 + margin-top: 12px; 265 + } 266 + 267 + /* Status messages */ 268 + .spk-status { 269 + padding: 8px 12px; 270 + border-radius: 6px; 271 + font-size: 13px; 272 + margin-top: 12px; 273 + } 274 + 275 + .spk-status-success { 276 + background: #dcfce7; 277 + color: #166534; 278 + } 279 + 280 + .spk-status-error { 281 + background: #fee2e2; 282 + color: #991b1b; 283 + } 284 + 285 + /* Facet indicator */ 286 + .spk-facet-badge { 287 + display: inline-block; 288 + padding: 4px 10px; 289 + background: var(--facet-bg, #e5e7eb); 290 + border: 1px solid var(--facet-color, #d1d5db); 291 + border-radius: 16px; 292 + font-size: 12px; 293 + color: var(--facet-color, #6b7280); 294 + } 295 + </style> 296 + 297 + <div class="spk-wrap"> 298 + <div class="spk-layout"> 299 + <!-- Segment list --> 300 + <div class="spk-segments"> 301 + <div class="spk-segments-header"> 302 + Segments with Speakers 303 + </div> 304 + <div class="spk-segments-list" id="spkSegmentList"> 305 + <div class="spk-empty">Loading...</div> 306 + </div> 307 + </div> 308 + 309 + <!-- Detail panel --> 310 + <div class="spk-detail" id="spkDetail"> 311 + <div class="spk-empty"> 312 + Select a segment to view speaker matches 313 + </div> 314 + </div> 315 + </div> 316 + </div> 317 + 318 + <script> 319 + (() => { 320 + const day = '{{ day }}'; 321 + const segmentList = document.getElementById('spkSegmentList'); 322 + const detailPanel = document.getElementById('spkDetail'); 323 + 324 + let segments = []; 325 + let selectedSegment = null; 326 + 327 + // Load segments on page load 328 + loadSegments(); 329 + 330 + // Listen for facet changes 331 + window.addEventListener('facet.switch', () => { 332 + if (selectedSegment) { 333 + loadSegmentDetail(selectedSegment); 334 + } 335 + }); 336 + 337 + function loadSegments() { 338 + fetch(`/app/speakers/api/segments/${day}`) 339 + .then(r => r.json()) 340 + .then(data => { 341 + segments = data.segments || []; 342 + renderSegmentList(); 343 + }) 344 + .catch(() => { 345 + segmentList.innerHTML = '<div class="spk-empty">Failed to load segments</div>'; 346 + }); 347 + } 348 + 349 + function renderSegmentList() { 350 + if (segments.length === 0) { 351 + segmentList.innerHTML = '<div class="spk-empty">No segments with speaker embeddings found for this day</div>'; 352 + return; 353 + } 354 + 355 + segmentList.innerHTML = segments.map(seg => ` 356 + <div class="spk-segment${selectedSegment?.key === seg.key ? ' active' : ''}" data-key="${seg.key}"> 357 + <div class="spk-segment-time">${seg.start} - ${seg.end}</div> 358 + <div class="spk-segment-meta">${formatDuration(seg.duration)}</div> 359 + <div class="spk-segment-speakers"> 360 + ${seg.speakers.map(s => `<span class="spk-speaker-badge">${escapeHtml(s)}</span>`).join('')} 361 + </div> 362 + </div> 363 + `).join(''); 364 + 365 + // Add click handlers 366 + segmentList.querySelectorAll('.spk-segment').forEach(el => { 367 + el.addEventListener('click', () => { 368 + const seg = segments.find(s => s.key === el.dataset.key); 369 + if (seg) selectSegment(seg); 370 + }); 371 + }); 372 + } 373 + 374 + function selectSegment(seg) { 375 + selectedSegment = seg; 376 + renderSegmentList(); 377 + loadSegmentDetail(seg); 378 + } 379 + 380 + function loadSegmentDetail(seg) { 381 + detailPanel.innerHTML = '<div class="spk-empty">Loading...</div>'; 382 + 383 + fetch(`/app/speakers/api/segment/${day}/${seg.key}`) 384 + .then(r => r.json()) 385 + .then(data => { 386 + if (data.error) { 387 + detailPanel.innerHTML = `<div class="spk-empty">${escapeHtml(data.error)}</div>`; 388 + return; 389 + } 390 + renderSegmentDetail(seg, data); 391 + }) 392 + .catch(() => { 393 + detailPanel.innerHTML = '<div class="spk-empty">Failed to load segment details</div>'; 394 + }); 395 + } 396 + 397 + function renderSegmentDetail(seg, data) { 398 + const { speakers, all_entities, audio_file, facet } = data; 399 + 400 + let html = ` 401 + <div class="spk-detail-header"> 402 + <div class="spk-detail-title">${seg.start} - ${seg.end}</div> 403 + <span class="spk-facet-badge">${escapeHtml(facet)}</span> 404 + </div> 405 + `; 406 + 407 + // Audio player 408 + if (audio_file) { 409 + html += ` 410 + <div class="spk-audio-player"> 411 + <audio controls preload="metadata"> 412 + <source src="${audio_file}" type="audio/flac"> 413 + </audio> 414 + </div> 415 + `; 416 + } 417 + 418 + // Speakers and matches 419 + if (speakers.length === 0) { 420 + html += '<div class="spk-empty">No speaker embeddings found</div>'; 421 + } else { 422 + html += '<div class="spk-matches">'; 423 + html += '<div class="spk-matches-title">Speaker Matches</div>'; 424 + 425 + speakers.forEach((speaker, idx) => { 426 + html += renderSpeakerCard(speaker, all_entities, facet, seg, idx); 427 + }); 428 + 429 + html += '</div>'; 430 + } 431 + 432 + detailPanel.innerHTML = html; 433 + 434 + // Add event handlers 435 + setupEventHandlers(seg, facet); 436 + } 437 + 438 + function renderSpeakerCard(speaker, allEntities, facet, seg, idx) { 439 + const matches = Object.entries(speaker.matches || {}) 440 + .sort((a, b) => b[1] - a[1]); 441 + 442 + let html = ` 443 + <div class="spk-speaker-card" data-speaker="${escapeHtml(speaker.label)}" data-idx="${idx}"> 444 + <table class="spk-match-table"> 445 + <thead> 446 + <tr> 447 + <th>${escapeHtml(speaker.label)}</th> 448 + <th>Match Score</th> 449 + <th>Action</th> 450 + </tr> 451 + </thead> 452 + <tbody> 453 + `; 454 + 455 + if (matches.length === 0) { 456 + html += ` 457 + <tr> 458 + <td colspan="2"><span class="spk-no-match">No matches above threshold</span></td> 459 + <td> 460 + <div class="spk-actions"> 461 + <select class="spk-select spk-entity-select"> 462 + <option value="">Assign to entity...</option> 463 + ${allEntities.map(e => `<option value="${escapeHtml(e)}">${escapeHtml(e)}</option>`).join('')} 464 + <option value="__new__">+ Create new entity</option> 465 + </select> 466 + <button class="spk-btn spk-btn-primary spk-assign-btn" disabled>Assign</button> 467 + </div> 468 + </td> 469 + </tr> 470 + `; 471 + } else { 472 + matches.forEach((match, i) => { 473 + const [entityName, score] = match; 474 + const scoreClass = score >= 0.7 ? 'high' : score >= 0.5 ? 'med' : 'low'; 475 + const scorePct = Math.round(score * 100); 476 + 477 + html += ` 478 + <tr> 479 + <td>${escapeHtml(entityName)}</td> 480 + <td><span class="spk-score spk-score-${scoreClass}">${scorePct}%</span></td> 481 + <td> 482 + ${i === 0 ? ` 483 + <div class="spk-actions"> 484 + <select class="spk-select spk-entity-select"> 485 + <option value="">Assign to entity...</option> 486 + ${allEntities.map(e => `<option value="${escapeHtml(e)}"${e === entityName ? ' selected' : ''}>${escapeHtml(e)}</option>`).join('')} 487 + <option value="__new__">+ Create new entity</option> 488 + </select> 489 + <button class="spk-btn spk-btn-primary spk-assign-btn" disabled>Assign</button> 490 + </div> 491 + ` : ''} 492 + </td> 493 + </tr> 494 + `; 495 + }); 496 + } 497 + 498 + html += ` 499 + </tbody> 500 + </table> 501 + <div class="spk-create-form-container" style="display: none;"></div> 502 + <div class="spk-status-container"></div> 503 + </div> 504 + `; 505 + 506 + return html; 507 + } 508 + 509 + function setupEventHandlers(seg, facet) { 510 + // Entity select change 511 + detailPanel.querySelectorAll('.spk-entity-select').forEach(select => { 512 + select.addEventListener('change', () => { 513 + const card = select.closest('.spk-speaker-card'); 514 + const assignBtn = card.querySelector('.spk-assign-btn'); 515 + const formContainer = card.querySelector('.spk-create-form-container'); 516 + 517 + if (select.value === '__new__') { 518 + assignBtn.disabled = true; 519 + formContainer.style.display = 'block'; 520 + formContainer.innerHTML = renderCreateForm(); 521 + setupCreateFormHandlers(card, seg, facet); 522 + } else { 523 + formContainer.style.display = 'none'; 524 + formContainer.innerHTML = ''; 525 + assignBtn.disabled = !select.value; 526 + } 527 + }); 528 + }); 529 + 530 + // Assign button click 531 + detailPanel.querySelectorAll('.spk-assign-btn').forEach(btn => { 532 + btn.addEventListener('click', () => { 533 + const card = btn.closest('.spk-speaker-card'); 534 + const speakerLabel = card.dataset.speaker; 535 + const entityName = card.querySelector('.spk-entity-select').value; 536 + 537 + if (!entityName || entityName === '__new__') return; 538 + 539 + saveVoiceprint(facet, entityName, seg.key, speakerLabel, card); 540 + }); 541 + }); 542 + } 543 + 544 + function renderCreateForm() { 545 + return ` 546 + <div class="spk-create-form"> 547 + <div class="spk-form-row"> 548 + <label>Entity Name</label> 549 + <input type="text" class="spk-new-name" placeholder="e.g., John Smith"> 550 + </div> 551 + <div class="spk-form-row"> 552 + <label>Type</label> 553 + <select class="spk-new-type"> 554 + <option value="Person">Person</option> 555 + <option value="Contact">Contact</option> 556 + </select> 557 + </div> 558 + <div class="spk-form-row"> 559 + <label>Description (optional)</label> 560 + <input type="text" class="spk-new-desc" placeholder="e.g., Colleague from work"> 561 + </div> 562 + <div class="spk-form-actions"> 563 + <button class="spk-btn spk-btn-secondary spk-cancel-create">Cancel</button> 564 + <button class="spk-btn spk-btn-primary spk-confirm-create">Create & Save Voiceprint</button> 565 + </div> 566 + </div> 567 + `; 568 + } 569 + 570 + function setupCreateFormHandlers(card, seg, facet) { 571 + const formContainer = card.querySelector('.spk-create-form-container'); 572 + const select = card.querySelector('.spk-entity-select'); 573 + const speakerLabel = card.dataset.speaker; 574 + 575 + formContainer.querySelector('.spk-cancel-create').addEventListener('click', () => { 576 + formContainer.style.display = 'none'; 577 + formContainer.innerHTML = ''; 578 + select.value = ''; 579 + }); 580 + 581 + formContainer.querySelector('.spk-confirm-create').addEventListener('click', () => { 582 + const name = formContainer.querySelector('.spk-new-name').value.trim(); 583 + const type = formContainer.querySelector('.spk-new-type').value; 584 + const desc = formContainer.querySelector('.spk-new-desc').value.trim(); 585 + 586 + if (!name) { 587 + showStatus(card, 'Please enter an entity name', 'error'); 588 + return; 589 + } 590 + 591 + createEntityWithVoiceprint(facet, type, name, desc, seg.key, speakerLabel, card); 592 + }); 593 + } 594 + 595 + function saveVoiceprint(facet, entityName, segmentKey, speakerLabel, card) { 596 + fetch('/app/speakers/api/save-voiceprint', { 597 + method: 'POST', 598 + headers: { 'Content-Type': 'application/json' }, 599 + body: JSON.stringify({ 600 + facet, 601 + entity_name: entityName, 602 + day, 603 + segment_key: segmentKey, 604 + speaker_label: speakerLabel, 605 + }), 606 + }) 607 + .then(r => r.json()) 608 + .then(data => { 609 + if (data.error) { 610 + showStatus(card, data.error, 'error'); 611 + } else { 612 + showStatus(card, `Voiceprint saved to ${entityName}`, 'success'); 613 + // Reload to refresh matches 614 + setTimeout(() => loadSegmentDetail(selectedSegment), 1500); 615 + } 616 + }) 617 + .catch(() => { 618 + showStatus(card, 'Failed to save voiceprint', 'error'); 619 + }); 620 + } 621 + 622 + function createEntityWithVoiceprint(facet, type, name, description, segmentKey, speakerLabel, card) { 623 + fetch('/app/speakers/api/create-entity-voiceprint', { 624 + method: 'POST', 625 + headers: { 'Content-Type': 'application/json' }, 626 + body: JSON.stringify({ 627 + facet, 628 + type, 629 + name, 630 + description, 631 + day, 632 + segment_key: segmentKey, 633 + speaker_label: speakerLabel, 634 + }), 635 + }) 636 + .then(r => r.json()) 637 + .then(data => { 638 + if (data.error) { 639 + showStatus(card, data.error, 'error'); 640 + } else { 641 + showStatus(card, `Created entity "${name}" with voiceprint`, 'success'); 642 + // Hide form and reload 643 + const formContainer = card.querySelector('.spk-create-form-container'); 644 + formContainer.style.display = 'none'; 645 + formContainer.innerHTML = ''; 646 + setTimeout(() => loadSegmentDetail(selectedSegment), 1500); 647 + } 648 + }) 649 + .catch(() => { 650 + showStatus(card, 'Failed to create entity', 'error'); 651 + }); 652 + } 653 + 654 + function showStatus(card, message, type) { 655 + const container = card.querySelector('.spk-status-container'); 656 + container.innerHTML = `<div class="spk-status spk-status-${type}">${escapeHtml(message)}</div>`; 657 + setTimeout(() => { container.innerHTML = ''; }, 3000); 658 + } 659 + 660 + function formatDuration(seconds) { 661 + const mins = Math.floor(seconds / 60); 662 + const secs = seconds % 60; 663 + if (mins === 0) return `${secs}s`; 664 + return secs > 0 ? `${mins}m ${secs}s` : `${mins}m`; 665 + } 666 + 667 + function escapeHtml(str) { 668 + if (!str) return ''; 669 + return String(str) 670 + .replace(/&/g, '&amp;') 671 + .replace(/</g, '&lt;') 672 + .replace(/>/g, '&gt;') 673 + .replace(/"/g, '&quot;'); 674 + } 675 + })(); 676 + </script>