Add speakers app for voiceprint management · solpbc.org/solstone@030f8aa

+6

apps/speakers/app.json

··· 1 + { 2 + "icon": "🎙️", 3 + "label": "Speakers", 4 + "date_nav": true, 5 + "facets": true 6 + }

+1

apps/speakers/app_bar.html

··· 1 + {% include 'date_nav.html' %}

+440

apps/speakers/routes.py

··· 1 + """Speaker voiceprint management app.""" 2 + 3 + from __future__ import annotations 4 + 5 + import logging 6 + import os 7 + import re 8 + import time 9 + from datetime import date 10 + from pathlib import Path 11 + from typing import Any 12 + 13 + import numpy as np 14 + from flask import ( 15 + Blueprint, 16 + jsonify, 17 + redirect, 18 + render_template, 19 + request, 20 + send_file, 21 + url_for, 22 + ) 23 + 24 + from convey import state 25 + from convey.utils import DATE_RE, error_response, format_date, success_response 26 + from think.entities import ( 27 + ensure_entity_folder, 28 + entity_folder_path, 29 + load_entities, 30 + save_entities, 31 + ) 32 + from think.utils import day_dirs, day_path 33 + from think.utils import segment_key as validate_segment_key 34 + from think.utils import segment_parse 35 + 36 + logger = logging.getLogger(__name__) 37 + 38 + speakers_bp = Blueprint( 39 + "app:speakers", 40 + __name__, 41 + url_prefix="/app/speakers", 42 + ) 43 + 44 + 45 + def _normalize_embedding(emb: np.ndarray) -> np.ndarray | None: 46 + """L2-normalize an embedding vector. Returns None if norm is zero.""" 47 + emb = emb.astype(np.float32) 48 + norm = np.linalg.norm(emb) 49 + if norm > 0: 50 + return emb / norm 51 + return None 52 + 53 + 54 + def _scan_segment_embeddings(day: str) -> list[dict]: 55 + """Scan a day for segments with speaker embeddings. 56 + 57 + Returns list of segment info dicts with keys: 58 + - key: segment directory name (HHMMSS_LEN) 59 + - start: formatted start time (HH:MM) 60 + - end: formatted end time (HH:MM) 61 + - duration: duration in seconds 62 + - speakers: list of speaker labels 63 + """ 64 + day_dir = day_path(day) 65 + if not day_dir.is_dir(): 66 + return [] 67 + 68 + segments = [] 69 + for item in sorted(os.listdir(day_dir)): 70 + item_path = day_dir / item 71 + if not item_path.is_dir(): 72 + continue 73 + 74 + # Validate segment key format 75 + parsed = segment_parse(item) 76 + if parsed[0] is None: 77 + continue 78 + 79 + start_time, end_time = parsed 80 + 81 + # Check for audio embeddings subdir 82 + audio_dir = item_path / "audio" 83 + if not audio_dir.is_dir(): 84 + continue 85 + 86 + # Find speaker embedding files 87 + npz_files = list(audio_dir.glob("*.npz")) 88 + if not npz_files: 89 + continue 90 + 91 + speakers = [f.stem for f in npz_files] 92 + 93 + # Calculate duration from start and end times 94 + start_seconds = ( 95 + start_time.hour * 3600 + start_time.minute * 60 + start_time.second 96 + ) 97 + end_seconds = end_time.hour * 3600 + end_time.minute * 60 + end_time.second 98 + duration = end_seconds - start_seconds 99 + 100 + segments.append( 101 + { 102 + "key": item, 103 + "start": f"{start_time.hour:02d}:{start_time.minute:02d}", 104 + "end": f"{end_time.hour:02d}:{end_time.minute:02d}", 105 + "duration": duration, 106 + "speakers": speakers, 107 + } 108 + ) 109 + 110 + return segments 111 + 112 + 113 + def _load_segment_speaker_embedding( 114 + day: str, segment_key: str, speaker: str 115 + ) -> np.ndarray | None: 116 + """Load a speaker's embedding from a segment.""" 117 + emb_path = day_path(day) / segment_key / "audio" / f"{speaker}.npz" 118 + if not emb_path.exists(): 119 + return None 120 + 121 + data = np.load(emb_path) 122 + return _normalize_embedding(data["embedding"]) 123 + 124 + 125 + def _scan_entity_voiceprints(facet: str) -> dict[str, np.ndarray]: 126 + """Scan entities in a facet for voiceprints. 127 + 128 + Returns dict mapping entity name to averaged embedding. 129 + Each entity may have multiple voiceprint files (day_segment.npz). 130 + """ 131 + try: 132 + entities = load_entities(facet) 133 + except RuntimeError: 134 + return {} 135 + 136 + voiceprints = {} 137 + for entity in entities: 138 + name = entity.get("name", "") 139 + if not name: 140 + continue 141 + 142 + try: 143 + folder = entity_folder_path(facet, name) 144 + except (RuntimeError, ValueError): 145 + continue 146 + 147 + if not folder.is_dir(): 148 + continue 149 + 150 + # Find all voiceprint files (pattern: YYYYMMDD_HHMMSS_LEN.npz) 151 + npz_files = list(folder.glob("*_*.npz")) 152 + if not npz_files: 153 + continue 154 + 155 + # Load and average all voiceprints 156 + embeddings = [] 157 + for npz_path in npz_files: 158 + try: 159 + data = np.load(npz_path) 160 + emb = _normalize_embedding(data["embedding"]) 161 + if emb is not None: 162 + embeddings.append(emb) 163 + except Exception as e: 164 + logger.warning("Failed to load voiceprint %s: %s", npz_path, e) 165 + continue 166 + 167 + if embeddings: 168 + # Average all embeddings and re-normalize 169 + avg_emb = _normalize_embedding(np.mean(embeddings, axis=0)) 170 + if avg_emb is not None: 171 + voiceprints[name] = avg_emb 172 + 173 + return voiceprints 174 + 175 + 176 + def _compute_matches( 177 + segment_emb: np.ndarray, known_embs: dict[str, np.ndarray] 178 + ) -> dict[str, float]: 179 + """Compute cosine similarity between segment embedding and known voiceprints.""" 180 + if not known_embs: 181 + return {} 182 + 183 + matches = {} 184 + for name, known_emb in known_embs.items(): 185 + # Cosine similarity via dot product (both are L2-normalized) 186 + score = float(np.dot(segment_emb, known_emb)) 187 + if score >= 0.4: # Only include matches above threshold 188 + matches[name] = round(score, 4) 189 + 190 + return matches 191 + 192 + 193 + def _save_voiceprint_to_entity( 194 + facet: str, entity_name: str, day: str, segment_key: str, embedding: np.ndarray 195 + ) -> Path: 196 + """Save a voiceprint embedding to an entity's folder.""" 197 + folder = ensure_entity_folder(facet, entity_name) 198 + filename = f"{day}_{segment_key}.npz" 199 + emb_path = folder / filename 200 + np.savez_compressed(emb_path, embedding=embedding) 201 + return emb_path 202 + 203 + 204 + @speakers_bp.route("/") 205 + def index() -> Any: 206 + """Redirect to today's view.""" 207 + today = date.today().strftime("%Y%m%d") 208 + return redirect(url_for("app:speakers.speakers_day", day=today)) 209 + 210 + 211 + @speakers_bp.route("/<day>") 212 + def speakers_day(day: str) -> str: 213 + """Render speaker management view for a specific day.""" 214 + if not re.fullmatch(DATE_RE.pattern, day): 215 + return "", 404 216 + 217 + title = format_date(day) 218 + return render_template("app.html", title=title) 219 + 220 + 221 + @speakers_bp.route("/api/stats/<month>") 222 + def api_stats(month: str) -> Any: 223 + """Return segment counts for each day in a month. 224 + 225 + Used by calendar heatmap to show days with speaker embeddings. 226 + """ 227 + if not re.fullmatch(r"\d{6}", month): 228 + return error_response("Invalid month format, expected YYYYMM", 400) 229 + 230 + stats: dict[str, int] = {} 231 + 232 + for day_name in day_dirs().keys(): 233 + if not day_name.startswith(month): 234 + continue 235 + 236 + segments = _scan_segment_embeddings(day_name) 237 + if segments: 238 + stats[day_name] = len(segments) 239 + 240 + return jsonify(stats) 241 + 242 + 243 + @speakers_bp.route("/api/segments/<day>") 244 + def api_segments(day: str) -> Any: 245 + """Return segments with speaker embeddings for a day.""" 246 + if not re.fullmatch(DATE_RE.pattern, day): 247 + return error_response("Invalid day format", 400) 248 + 249 + segments = _scan_segment_embeddings(day) 250 + return jsonify({"segments": segments}) 251 + 252 + 253 + @speakers_bp.route("/api/segment/<day>/<segment_key>") 254 + def api_segment_detail(day: str, segment_key: str) -> Any: 255 + """Return segment detail with speaker match results.""" 256 + if not re.fullmatch(DATE_RE.pattern, day): 257 + return error_response("Invalid day format", 400) 258 + 259 + if not validate_segment_key(segment_key): 260 + return error_response("Invalid segment key", 400) 261 + 262 + # Get selected facet from cookie 263 + selected_facet = request.cookies.get("selected_facet") 264 + if not selected_facet: 265 + return error_response("No facet selected", 400) 266 + 267 + # Load ALL entities in the facet for dropdown 268 + try: 269 + all_entities = load_entities(selected_facet) 270 + all_entity_names = [e.get("name") for e in all_entities if e.get("name")] 271 + except RuntimeError: 272 + all_entity_names = [] 273 + 274 + # Load known voiceprints for matching 275 + known_voiceprints = _scan_entity_voiceprints(selected_facet) 276 + 277 + # Load segment speaker embeddings 278 + audio_dir = day_path(day) / segment_key / "audio" 279 + if not audio_dir.is_dir(): 280 + return error_response("Segment has no speaker embeddings", 404) 281 + 282 + speakers = [] 283 + for npz_path in sorted(audio_dir.glob("*.npz")): 284 + speaker_label = npz_path.stem 285 + emb = _load_segment_speaker_embedding(day, segment_key, speaker_label) 286 + if emb is None: 287 + continue 288 + 289 + matches = _compute_matches(emb, known_voiceprints) 290 + speakers.append( 291 + { 292 + "label": speaker_label, 293 + "matches": matches, 294 + } 295 + ) 296 + 297 + # Get audio file URL if available 298 + audio_file = None 299 + segment_dir = day_path(day) / segment_key 300 + audio_files = list(segment_dir.glob("*audio.flac")) 301 + if audio_files: 302 + rel_path = f"{segment_key}/{audio_files[0].name}" 303 + audio_file = ( 304 + f"/app/speakers/api/serve_audio/{day}/{rel_path.replace('/', '__')}" 305 + ) 306 + 307 + return jsonify( 308 + { 309 + "speakers": speakers, 310 + "all_entities": all_entity_names, 311 + "audio_file": audio_file, 312 + "facet": selected_facet, 313 + } 314 + ) 315 + 316 + 317 + @speakers_bp.route("/api/serve_audio/<day>/<path:encoded_path>") 318 + def serve_audio(day: str, encoded_path: str) -> Any: 319 + """Serve audio files for playback.""" 320 + if not re.fullmatch(DATE_RE.pattern, day): 321 + return "", 404 322 + 323 + try: 324 + rel_path = encoded_path.replace("__", "/") 325 + full_path = os.path.join(state.journal_root, day, rel_path) 326 + 327 + day_dir = str(day_path(day)) 328 + if not os.path.commonpath([full_path, day_dir]) == day_dir: 329 + return "", 403 330 + 331 + if not os.path.isfile(full_path): 332 + return "", 404 333 + 334 + return send_file(full_path) 335 + 336 + except Exception as e: 337 + logger.warning("Error serving audio %s/%s: %s", day, encoded_path, e) 338 + return "", 404 339 + 340 + 341 + @speakers_bp.route("/api/save-voiceprint", methods=["POST"]) 342 + def api_save_voiceprint() -> Any: 343 + """Save a voiceprint from a segment speaker to an existing entity.""" 344 + data = request.get_json() 345 + if not data: 346 + return error_response("No data provided", 400) 347 + 348 + facet = data.get("facet") 349 + entity_name = data.get("entity_name") 350 + day = data.get("day") 351 + segment_key = data.get("segment_key") 352 + speaker_label = data.get("speaker_label") 353 + 354 + if not all([facet, entity_name, day, segment_key, speaker_label]): 355 + return error_response("Missing required fields", 400) 356 + 357 + # Validate day and segment_key formats 358 + if not re.fullmatch(DATE_RE.pattern, day): 359 + return error_response("Invalid day format", 400) 360 + if not validate_segment_key(segment_key): 361 + return error_response("Invalid segment key", 400) 362 + 363 + # Validate entity exists 364 + entities = load_entities(facet) 365 + entity_names = [e.get("name") for e in entities] 366 + if entity_name not in entity_names: 367 + return error_response( 368 + f"Entity '{entity_name}' not found in facet '{facet}'", 404 369 + ) 370 + 371 + # Load speaker embedding 372 + emb = _load_segment_speaker_embedding(day, segment_key, speaker_label) 373 + if emb is None: 374 + return error_response("Speaker embedding not found", 404) 375 + 376 + # Save voiceprint 377 + try: 378 + emb_path = _save_voiceprint_to_entity(facet, entity_name, day, segment_key, emb) 379 + return success_response({"path": str(emb_path)}) 380 + except Exception as e: 381 + logger.exception("Failed to save voiceprint for %s", entity_name) 382 + return error_response(f"Failed to save voiceprint: {e}", 500) 383 + 384 + 385 + @speakers_bp.route("/api/create-entity-voiceprint", methods=["POST"]) 386 + def api_create_entity_voiceprint() -> Any: 387 + """Create a new entity with a voiceprint.""" 388 + data = request.get_json() 389 + if not data: 390 + return error_response("No data provided", 400) 391 + 392 + facet = data.get("facet") 393 + entity_type = data.get("type", "Person") 394 + entity_name = data.get("name") 395 + entity_description = data.get("description", "") 396 + day = data.get("day") 397 + segment_key = data.get("segment_key") 398 + speaker_label = data.get("speaker_label") 399 + 400 + if not all([facet, entity_name, day, segment_key, speaker_label]): 401 + return error_response("Missing required fields", 400) 402 + 403 + # Validate day and segment_key formats 404 + if not re.fullmatch(DATE_RE.pattern, day): 405 + return error_response("Invalid day format", 400) 406 + if not validate_segment_key(segment_key): 407 + return error_response("Invalid segment key", 400) 408 + 409 + # Check entity doesn't already exist 410 + entities = load_entities(facet, include_detached=True) 411 + entity_names = [e.get("name") for e in entities] 412 + if entity_name in entity_names: 413 + return error_response( 414 + f"Entity '{entity_name}' already exists in facet '{facet}'", 409 415 + ) 416 + 417 + # Load speaker embedding 418 + emb = _load_segment_speaker_embedding(day, segment_key, speaker_label) 419 + if emb is None: 420 + return error_response("Speaker embedding not found", 404) 421 + 422 + # Create new entity 423 + new_entity = { 424 + "type": entity_type, 425 + "name": entity_name, 426 + "description": entity_description, 427 + "attached_at": int(time.time() * 1000), 428 + } 429 + entities.append(new_entity) 430 + save_entities(facet, entities) 431 + 432 + # Save voiceprint 433 + try: 434 + emb_path = _save_voiceprint_to_entity(facet, entity_name, day, segment_key, emb) 435 + return success_response( 436 + {"entity": new_entity, "voiceprint_path": str(emb_path)} 437 + ) 438 + except Exception as e: 439 + logger.exception("Failed to save voiceprint for new entity %s", entity_name) 440 + return error_response(f"Failed to save voiceprint: {e}", 500)

apps/speakers/tests/__init__.py

This is a binary file and will not be displayed.

+99

apps/speakers/tests/conftest.py

··· 1 + """Self-contained fixtures for speakers app tests. 2 + 3 + These fixtures are fully standalone and only depend on pytest builtins. 4 + No shared dependencies from the root conftest.py are required. 5 + """ 6 + 7 + from __future__ import annotations 8 + 9 + import json 10 + from pathlib import Path 11 + 12 + import numpy as np 13 + import pytest 14 + 15 + from think.entities import normalize_entity_name 16 + 17 + 18 + @pytest.fixture 19 + def speakers_env(tmp_path, monkeypatch): 20 + """Create a temporary journal environment for speaker tests. 21 + 22 + Provides helpers to create: 23 + - Day directories with segment speaker embeddings 24 + - Facets with entities and voiceprints 25 + 26 + Usage: 27 + def test_example(speakers_env): 28 + env = speakers_env() 29 + env.create_segment("20240101", "143022_300", ["Speaker 1", "Speaker 2"]) 30 + env.create_entity("test", "Alice Test") 31 + # Now JOURNAL_PATH is set and data exists 32 + """ 33 + 34 + class SpeakersEnv: 35 + def __init__(self, journal_path: Path): 36 + self.journal = journal_path 37 + monkeypatch.setenv("JOURNAL_PATH", str(journal_path)) 38 + 39 + def create_segment( 40 + self, day: str, segment_key: str, speakers: list[str] 41 + ) -> Path: 42 + """Create a segment with speaker embedding files.""" 43 + audio_dir = self.journal / day / segment_key / "audio" 44 + audio_dir.mkdir(parents=True, exist_ok=True) 45 + 46 + for speaker in speakers: 47 + emb = np.random.randn(256).astype(np.float32) 48 + emb = emb / np.linalg.norm(emb) 49 + np.savez_compressed(audio_dir / f"{speaker}.npz", embedding=emb) 50 + 51 + return audio_dir 52 + 53 + def create_embedding(self, vector: list[float] | None = None) -> np.ndarray: 54 + """Create a normalized 256-dim embedding.""" 55 + if vector is None: 56 + emb = np.random.randn(256).astype(np.float32) 57 + else: 58 + emb = np.array(vector + [0.0] * (256 - len(vector)), dtype=np.float32) 59 + return emb / np.linalg.norm(emb) 60 + 61 + def create_entity( 62 + self, 63 + facet: str, 64 + name: str, 65 + voiceprints: list[tuple[str, str]] | None = None, 66 + ) -> Path: 67 + """Create an entity with optional voiceprint files. 68 + 69 + Args: 70 + facet: Facet name 71 + name: Entity name 72 + voiceprints: Optional list of (day, segment_key) tuples for voiceprints 73 + """ 74 + facet_dir = self.journal / "facets" / facet 75 + facet_dir.mkdir(parents=True, exist_ok=True) 76 + 77 + # Create entities.jsonl 78 + entities_file = facet_dir / "entities.jsonl" 79 + entity_data = {"type": "Person", "name": name, "description": "Test entity"} 80 + with open(entities_file, "a", encoding="utf-8") as f: 81 + f.write(json.dumps(entity_data) + "\n") 82 + 83 + # Create entity folder with voiceprints if specified 84 + if voiceprints: 85 + entity_dir = facet_dir / "entities" / normalize_entity_name(name) 86 + entity_dir.mkdir(parents=True, exist_ok=True) 87 + 88 + for day, segment_key in voiceprints: 89 + emb = self.create_embedding() 90 + np.savez_compressed( 91 + entity_dir / f"{day}_{segment_key}.npz", embedding=emb 92 + ) 93 + 94 + return facet_dir 95 + 96 + def _create(): 97 + return SpeakersEnv(tmp_path) 98 + 99 + return _create

+148

apps/speakers/tests/test_routes.py

··· 1 + """Tests for speakers app.""" 2 + 3 + import numpy as np 4 + 5 + 6 + def test_scan_segment_embeddings_empty(speakers_env): 7 + """Test scanning when no embeddings exist.""" 8 + from apps.speakers.routes import _scan_segment_embeddings 9 + 10 + env = speakers_env() 11 + 12 + # Create a day dir but no segments 13 + day_dir = env.journal / "20240101" 14 + day_dir.mkdir() 15 + 16 + segments = _scan_segment_embeddings("20240101") 17 + assert segments == [] 18 + 19 + 20 + def test_scan_segment_embeddings_with_data(speakers_env): 21 + """Test scanning when embeddings exist.""" 22 + from apps.speakers.routes import _scan_segment_embeddings 23 + 24 + env = speakers_env() 25 + env.create_segment("20240101", "143022_300", ["Speaker 1", "Speaker 2"]) 26 + 27 + segments = _scan_segment_embeddings("20240101") 28 + assert len(segments) == 1 29 + assert segments[0]["key"] == "143022_300" 30 + assert segments[0]["start"] == "14:30" 31 + assert segments[0]["end"] == "14:35" 32 + assert segments[0]["duration"] == 300 33 + assert set(segments[0]["speakers"]) == {"Speaker 1", "Speaker 2"} 34 + 35 + 36 + def test_load_segment_speaker_embedding(speakers_env): 37 + """Test loading a speaker embedding.""" 38 + from apps.speakers.routes import _load_segment_speaker_embedding 39 + 40 + env = speakers_env() 41 + 42 + # Create segment with specific embedding 43 + audio_dir = env.journal / "20240101" / "143022_300" / "audio" 44 + audio_dir.mkdir(parents=True) 45 + emb = np.array([1.0, 0.0, 0.0] + [0.0] * 253, dtype=np.float32) 46 + np.savez_compressed(audio_dir / "Speaker 1.npz", embedding=emb) 47 + 48 + loaded = _load_segment_speaker_embedding("20240101", "143022_300", "Speaker 1") 49 + assert loaded is not None 50 + assert loaded.shape == (256,) 51 + assert np.isclose(np.linalg.norm(loaded), 1.0) 52 + 53 + 54 + def test_compute_matches(): 55 + """Test matching algorithm.""" 56 + from apps.speakers.routes import _compute_matches 57 + 58 + # Create segment embedding 59 + seg_emb = np.array([1.0, 0.0, 0.0] + [0.0] * 253, dtype=np.float32) 60 + 61 + # Create known voiceprints with varying similarity 62 + known = { 63 + "Alice": np.array( 64 + [1.0, 0.0, 0.0] + [0.0] * 253, dtype=np.float32 65 + ), # Perfect match 66 + "Bob": np.array( 67 + [0.7, 0.7, 0.0] + [0.0] * 253, dtype=np.float32 68 + ), # Partial match 69 + "Charlie": np.array( 70 + [0.0, 1.0, 0.0] + [0.0] * 253, dtype=np.float32 71 + ), # No match 72 + } 73 + # Normalize 74 + for name in known: 75 + known[name] = known[name] / np.linalg.norm(known[name]) 76 + 77 + matches = _compute_matches(seg_emb, known) 78 + 79 + # Alice should have perfect match (1.0) 80 + assert "Alice" in matches 81 + assert matches["Alice"] >= 0.99 82 + 83 + # Bob should have partial match (~0.7) 84 + assert "Bob" in matches 85 + assert 0.65 <= matches["Bob"] <= 0.75 86 + 87 + # Charlie should be below threshold (0.4), not included 88 + assert "Charlie" not in matches 89 + 90 + 91 + def test_scan_entity_voiceprints_averaging(speakers_env): 92 + """Test that multiple voiceprints are averaged.""" 93 + from apps.speakers.routes import _scan_entity_voiceprints 94 + from think.entities import normalize_entity_name 95 + 96 + env = speakers_env() 97 + 98 + # Create facet with entity 99 + facet_dir = env.journal / "facets" / "test" 100 + facet_dir.mkdir(parents=True) 101 + 102 + # Create entities.jsonl 103 + (facet_dir / "entities.jsonl").write_text( 104 + '{"type": "Person", "name": "Alice Test", "description": "Test"}\n' 105 + ) 106 + 107 + # Create entity folder with multiple voiceprints 108 + entity_dir = facet_dir / "entities" / normalize_entity_name("Alice Test") 109 + entity_dir.mkdir(parents=True) 110 + 111 + # Create two voiceprints 112 + emb1 = np.array([1.0, 0.0, 0.0] + [0.0] * 253, dtype=np.float32) 113 + emb2 = np.array([0.8, 0.6, 0.0] + [0.0] * 253, dtype=np.float32) 114 + emb2 = emb2 / np.linalg.norm(emb2) 115 + 116 + np.savez_compressed(entity_dir / "20240101_120000_300.npz", embedding=emb1) 117 + np.savez_compressed(entity_dir / "20240102_130000_300.npz", embedding=emb2) 118 + 119 + voiceprints = _scan_entity_voiceprints("test") 120 + 121 + assert "Alice Test" in voiceprints 122 + avg_emb = voiceprints["Alice Test"] 123 + assert avg_emb.shape == (256,) 124 + assert np.isclose(np.linalg.norm(avg_emb), 1.0) # Should be normalized 125 + 126 + 127 + def test_save_voiceprint_to_entity(speakers_env): 128 + """Test saving voiceprint to entity folder.""" 129 + from apps.speakers.routes import _save_voiceprint_to_entity 130 + 131 + env = speakers_env() 132 + 133 + # Create facet 134 + facet_dir = env.journal / "facets" / "test" 135 + facet_dir.mkdir(parents=True) 136 + 137 + emb = np.array([1.0, 0.0, 0.0] + [0.0] * 253, dtype=np.float32) 138 + 139 + path = _save_voiceprint_to_entity("test", "John Doe", "20240101", "143022_300", emb) 140 + 141 + assert path.exists() 142 + assert path.name == "20240101_143022_300.npz" 143 + assert "john_doe" in str(path.parent) 144 + 145 + # Verify content 146 + data = np.load(path) 147 + assert "embedding" in data 148 + assert data["embedding"].shape == (256,)

+676

apps/speakers/workspace.html

··· 1 + {# Speaker voiceprint management #} 2 + 3 + <style> 4 + /* Speakers app styles - all classes prefixed with .spk- */ 5 + 6 + .spk-wrap { 7 + max-width: 1200px; 8 + margin: 0 auto; 9 + padding: 16px 24px; 10 + } 11 + 12 + .spk-layout { 13 + display: grid; 14 + grid-template-columns: 280px 1fr; 15 + gap: 24px; 16 + min-height: 400px; 17 + } 18 + 19 + /* Segment list panel */ 20 + .spk-segments { 21 + background: #fff; 22 + border-radius: 12px; 23 + border: 1px solid #e5e7eb; 24 + overflow: hidden; 25 + } 26 + 27 + .spk-segments-header { 28 + padding: 12px 16px; 29 + border-bottom: 1px solid #e5e7eb; 30 + font-weight: 600; 31 + font-size: 14px; 32 + color: #374151; 33 + } 34 + 35 + .spk-segments-list { 36 + max-height: 500px; 37 + overflow-y: auto; 38 + } 39 + 40 + .spk-segment { 41 + padding: 12px 16px; 42 + border-bottom: 1px solid #f3f4f6; 43 + cursor: pointer; 44 + transition: background 0.15s; 45 + } 46 + 47 + .spk-segment:hover { 48 + background: #f9fafb; 49 + } 50 + 51 + .spk-segment.active { 52 + background: #eff6ff; 53 + border-left: 3px solid #3b82f6; 54 + } 55 + 56 + .spk-segment-time { 57 + font-weight: 500; 58 + font-size: 14px; 59 + color: #1f2937; 60 + } 61 + 62 + .spk-segment-meta { 63 + font-size: 12px; 64 + color: #6b7280; 65 + margin-top: 4px; 66 + } 67 + 68 + .spk-segment-speakers { 69 + display: flex; 70 + gap: 6px; 71 + margin-top: 6px; 72 + flex-wrap: wrap; 73 + } 74 + 75 + .spk-speaker-badge { 76 + display: inline-block; 77 + padding: 2px 8px; 78 + background: #e5e7eb; 79 + border-radius: 12px; 80 + font-size: 11px; 81 + color: #4b5563; 82 + } 83 + 84 + .spk-empty { 85 + padding: 24px; 86 + text-align: center; 87 + color: #9ca3af; 88 + font-size: 14px; 89 + } 90 + 91 + /* Detail panel */ 92 + .spk-detail { 93 + background: #fff; 94 + border-radius: 12px; 95 + border: 1px solid #e5e7eb; 96 + padding: 20px; 97 + } 98 + 99 + .spk-detail-header { 100 + display: flex; 101 + align-items: center; 102 + justify-content: space-between; 103 + margin-bottom: 16px; 104 + } 105 + 106 + .spk-detail-title { 107 + font-size: 18px; 108 + font-weight: 600; 109 + color: #1f2937; 110 + } 111 + 112 + .spk-audio-player { 113 + margin-bottom: 20px; 114 + } 115 + 116 + .spk-audio-player audio { 117 + width: 100%; 118 + } 119 + 120 + /* Match table */ 121 + .spk-matches { 122 + margin-top: 16px; 123 + } 124 + 125 + .spk-matches-title { 126 + font-size: 14px; 127 + font-weight: 600; 128 + color: #374151; 129 + margin-bottom: 12px; 130 + } 131 + 132 + .spk-match-table { 133 + width: 100%; 134 + border-collapse: collapse; 135 + font-size: 13px; 136 + } 137 + 138 + .spk-match-table th, 139 + .spk-match-table td { 140 + padding: 10px 12px; 141 + text-align: left; 142 + border-bottom: 1px solid #e5e7eb; 143 + } 144 + 145 + .spk-match-table th { 146 + background: #f9fafb; 147 + font-weight: 500; 148 + color: #6b7280; 149 + } 150 + 151 + .spk-match-table td:first-child { 152 + font-weight: 500; 153 + } 154 + 155 + .spk-score { 156 + display: inline-block; 157 + padding: 2px 8px; 158 + border-radius: 4px; 159 + font-family: monospace; 160 + font-size: 12px; 161 + } 162 + 163 + .spk-score-high { 164 + background: #dcfce7; 165 + color: #166534; 166 + } 167 + 168 + .spk-score-med { 169 + background: #fef9c3; 170 + color: #854d0e; 171 + } 172 + 173 + .spk-score-low { 174 + background: #f3f4f6; 175 + color: #6b7280; 176 + } 177 + 178 + .spk-no-match { 179 + color: #9ca3af; 180 + font-style: italic; 181 + } 182 + 183 + /* Actions */ 184 + .spk-actions { 185 + display: flex; 186 + gap: 8px; 187 + align-items: center; 188 + } 189 + 190 + .spk-select { 191 + padding: 6px 12px; 192 + border: 1px solid #d1d5db; 193 + border-radius: 6px; 194 + font-size: 13px; 195 + background: #fff; 196 + min-width: 150px; 197 + } 198 + 199 + .spk-btn { 200 + padding: 6px 14px; 201 + border: none; 202 + border-radius: 6px; 203 + font-size: 13px; 204 + cursor: pointer; 205 + transition: background 0.15s; 206 + } 207 + 208 + .spk-btn-primary { 209 + background: #3b82f6; 210 + color: #fff; 211 + } 212 + 213 + .spk-btn-primary:hover { 214 + background: #2563eb; 215 + } 216 + 217 + .spk-btn-primary:disabled { 218 + background: #93c5fd; 219 + cursor: not-allowed; 220 + } 221 + 222 + .spk-btn-secondary { 223 + background: #f3f4f6; 224 + color: #374151; 225 + } 226 + 227 + .spk-btn-secondary:hover { 228 + background: #e5e7eb; 229 + } 230 + 231 + /* Create entity form */ 232 + .spk-create-form { 233 + margin-top: 16px; 234 + padding: 16px; 235 + background: #f9fafb; 236 + border-radius: 8px; 237 + border: 1px solid #e5e7eb; 238 + } 239 + 240 + .spk-form-row { 241 + margin-bottom: 12px; 242 + } 243 + 244 + .spk-form-row label { 245 + display: block; 246 + font-size: 12px; 247 + font-weight: 500; 248 + color: #6b7280; 249 + margin-bottom: 4px; 250 + } 251 + 252 + .spk-form-row input, 253 + .spk-form-row select { 254 + width: 100%; 255 + padding: 8px 12px; 256 + border: 1px solid #d1d5db; 257 + border-radius: 6px; 258 + font-size: 13px; 259 + } 260 + 261 + .spk-form-actions { 262 + display: flex; 263 + gap: 8px; 264 + margin-top: 12px; 265 + } 266 + 267 + /* Status messages */ 268 + .spk-status { 269 + padding: 8px 12px; 270 + border-radius: 6px; 271 + font-size: 13px; 272 + margin-top: 12px; 273 + } 274 + 275 + .spk-status-success { 276 + background: #dcfce7; 277 + color: #166534; 278 + } 279 + 280 + .spk-status-error { 281 + background: #fee2e2; 282 + color: #991b1b; 283 + } 284 + 285 + /* Facet indicator */ 286 + .spk-facet-badge { 287 + display: inline-block; 288 + padding: 4px 10px; 289 + background: var(--facet-bg, #e5e7eb); 290 + border: 1px solid var(--facet-color, #d1d5db); 291 + border-radius: 16px; 292 + font-size: 12px; 293 + color: var(--facet-color, #6b7280); 294 + } 295 + </style> 296 + 297 + <div class="spk-wrap"> 298 + <div class="spk-layout"> 299 +  300 + <div class="spk-segments"> 301 + <div class="spk-segments-header"> 302 + Segments with Speakers 303 + </div> 304 + <div class="spk-segments-list" id="spkSegmentList"> 305 + <div class="spk-empty">Loading...</div> 306 + </div> 307 + </div> 308 + 309 +  310 + <div class="spk-detail" id="spkDetail"> 311 + <div class="spk-empty"> 312 + Select a segment to view speaker matches 313 + </div> 314 + </div> 315 + </div> 316 + </div> 317 + 318 + <script> 319 + (() => { 320 + const day = '{{ day }}'; 321 + const segmentList = document.getElementById('spkSegmentList'); 322 + const detailPanel = document.getElementById('spkDetail'); 323 + 324 + let segments = []; 325 + let selectedSegment = null; 326 + 327 + // Load segments on page load 328 + loadSegments(); 329 + 330 + // Listen for facet changes 331 + window.addEventListener('facet.switch', () => { 332 + if (selectedSegment) { 333 + loadSegmentDetail(selectedSegment); 334 + } 335 + }); 336 + 337 + function loadSegments() { 338 + fetch(`/app/speakers/api/segments/${day}`) 339 + .then(r => r.json()) 340 + .then(data => { 341 + segments = data.segments || []; 342 + renderSegmentList(); 343 + }) 344 + .catch(() => { 345 + segmentList.innerHTML = '<div class="spk-empty">Failed to load segments</div>'; 346 + }); 347 + } 348 + 349 + function renderSegmentList() { 350 + if (segments.length === 0) { 351 + segmentList.innerHTML = '<div class="spk-empty">No segments with speaker embeddings found for this day</div>'; 352 + return; 353 + } 354 + 355 + segmentList.innerHTML = segments.map(seg => ` 356 + <div class="spk-segment${selectedSegment?.key === seg.key ? ' active' : ''}" data-key="${seg.key}"> 357 + <div class="spk-segment-time">${seg.start} - ${seg.end}</div> 358 + <div class="spk-segment-meta">${formatDuration(seg.duration)}</div> 359 + <div class="spk-segment-speakers"> 360 + ${seg.speakers.map(s => `<span class="spk-speaker-badge">${escapeHtml(s)}</span>`).join('')} 361 + </div> 362 + </div> 363 + `).join(''); 364 + 365 + // Add click handlers 366 + segmentList.querySelectorAll('.spk-segment').forEach(el => { 367 + el.addEventListener('click', () => { 368 + const seg = segments.find(s => s.key === el.dataset.key); 369 + if (seg) selectSegment(seg); 370 + }); 371 + }); 372 + } 373 + 374 + function selectSegment(seg) { 375 + selectedSegment = seg; 376 + renderSegmentList(); 377 + loadSegmentDetail(seg); 378 + } 379 + 380 + function loadSegmentDetail(seg) { 381 + detailPanel.innerHTML = '<div class="spk-empty">Loading...</div>'; 382 + 383 + fetch(`/app/speakers/api/segment/${day}/${seg.key}`) 384 + .then(r => r.json()) 385 + .then(data => { 386 + if (data.error) { 387 + detailPanel.innerHTML = `<div class="spk-empty">${escapeHtml(data.error)}</div>`; 388 + return; 389 + } 390 + renderSegmentDetail(seg, data); 391 + }) 392 + .catch(() => { 393 + detailPanel.innerHTML = '<div class="spk-empty">Failed to load segment details</div>'; 394 + }); 395 + } 396 + 397 + function renderSegmentDetail(seg, data) { 398 + const { speakers, all_entities, audio_file, facet } = data; 399 + 400 + let html = ` 401 + <div class="spk-detail-header"> 402 + <div class="spk-detail-title">${seg.start} - ${seg.end}</div> 403 + <span class="spk-facet-badge">${escapeHtml(facet)}</span> 404 + </div> 405 + `; 406 + 407 + // Audio player 408 + if (audio_file) { 409 + html += ` 410 + <div class="spk-audio-player"> 411 + <audio controls preload="metadata"> 412 + <source src="${audio_file}" type="audio/flac"> 413 + </audio> 414 + </div> 415 + `; 416 + } 417 + 418 + // Speakers and matches 419 + if (speakers.length === 0) { 420 + html += '<div class="spk-empty">No speaker embeddings found</div>'; 421 + } else { 422 + html += '<div class="spk-matches">'; 423 + html += '<div class="spk-matches-title">Speaker Matches</div>'; 424 + 425 + speakers.forEach((speaker, idx) => { 426 + html += renderSpeakerCard(speaker, all_entities, facet, seg, idx); 427 + }); 428 + 429 + html += '</div>'; 430 + } 431 + 432 + detailPanel.innerHTML = html; 433 + 434 + // Add event handlers 435 + setupEventHandlers(seg, facet); 436 + } 437 + 438 + function renderSpeakerCard(speaker, allEntities, facet, seg, idx) { 439 + const matches = Object.entries(speaker.matches || {}) 440 + .sort((a, b) => b[1] - a[1]); 441 + 442 + let html = ` 443 + <div class="spk-speaker-card" data-speaker="${escapeHtml(speaker.label)}" data-idx="${idx}"> 444 + <table class="spk-match-table"> 445 + <thead> 446 + <tr> 447 + <th>${escapeHtml(speaker.label)}</th> 448 + <th>Match Score</th> 449 + <th>Action</th> 450 + </tr> 451 + </thead> 452 + <tbody> 453 + `; 454 + 455 + if (matches.length === 0) { 456 + html += ` 457 + <tr> 458 + <td colspan="2"><span class="spk-no-match">No matches above threshold</span></td> 459 + <td> 460 + <div class="spk-actions"> 461 + <select class="spk-select spk-entity-select"> 462 + <option value="">Assign to entity...</option> 463 + ${allEntities.map(e => `<option value="${escapeHtml(e)}">${escapeHtml(e)}</option>`).join('')} 464 + <option value="__new__">+ Create new entity</option> 465 + </select> 466 + <button class="spk-btn spk-btn-primary spk-assign-btn" disabled>Assign</button> 467 + </div> 468 + </td> 469 + </tr> 470 + `; 471 + } else { 472 + matches.forEach((match, i) => { 473 + const [entityName, score] = match; 474 + const scoreClass = score >= 0.7 ? 'high' : score >= 0.5 ? 'med' : 'low'; 475 + const scorePct = Math.round(score * 100); 476 + 477 + html += ` 478 + <tr> 479 + <td>${escapeHtml(entityName)}</td> 480 + <td><span class="spk-score spk-score-${scoreClass}">${scorePct}%</span></td> 481 + <td> 482 + ${i === 0 ? ` 483 + <div class="spk-actions"> 484 + <select class="spk-select spk-entity-select"> 485 + <option value="">Assign to entity...</option> 486 + ${allEntities.map(e => `<option value="${escapeHtml(e)}"${e === entityName ? ' selected' : ''}>${escapeHtml(e)}</option>`).join('')} 487 + <option value="__new__">+ Create new entity</option> 488 + </select> 489 + <button class="spk-btn spk-btn-primary spk-assign-btn" disabled>Assign</button> 490 + </div> 491 + ` : ''} 492 + </td> 493 + </tr> 494 + `; 495 + }); 496 + } 497 + 498 + html += ` 499 + </tbody> 500 + </table> 501 + <div class="spk-create-form-container" style="display: none;"></div> 502 + <div class="spk-status-container"></div> 503 + </div> 504 + `; 505 + 506 + return html; 507 + } 508 + 509 + function setupEventHandlers(seg, facet) { 510 + // Entity select change 511 + detailPanel.querySelectorAll('.spk-entity-select').forEach(select => { 512 + select.addEventListener('change', () => { 513 + const card = select.closest('.spk-speaker-card'); 514 + const assignBtn = card.querySelector('.spk-assign-btn'); 515 + const formContainer = card.querySelector('.spk-create-form-container'); 516 + 517 + if (select.value === '__new__') { 518 + assignBtn.disabled = true; 519 + formContainer.style.display = 'block'; 520 + formContainer.innerHTML = renderCreateForm(); 521 + setupCreateFormHandlers(card, seg, facet); 522 + } else { 523 + formContainer.style.display = 'none'; 524 + formContainer.innerHTML = ''; 525 + assignBtn.disabled = !select.value; 526 + } 527 + }); 528 + }); 529 + 530 + // Assign button click 531 + detailPanel.querySelectorAll('.spk-assign-btn').forEach(btn => { 532 + btn.addEventListener('click', () => { 533 + const card = btn.closest('.spk-speaker-card'); 534 + const speakerLabel = card.dataset.speaker; 535 + const entityName = card.querySelector('.spk-entity-select').value; 536 + 537 + if (!entityName || entityName === '__new__') return; 538 + 539 + saveVoiceprint(facet, entityName, seg.key, speakerLabel, card); 540 + }); 541 + }); 542 + } 543 + 544 + function renderCreateForm() { 545 + return ` 546 + <div class="spk-create-form"> 547 + <div class="spk-form-row"> 548 + <label>Entity Name</label> 549 + <input type="text" class="spk-new-name" placeholder="e.g., John Smith"> 550 + </div> 551 + <div class="spk-form-row"> 552 + <label>Type</label> 553 + <select class="spk-new-type"> 554 + <option value="Person">Person</option> 555 + <option value="Contact">Contact</option> 556 + </select> 557 + </div> 558 + <div class="spk-form-row"> 559 + <label>Description (optional)</label> 560 + <input type="text" class="spk-new-desc" placeholder="e.g., Colleague from work"> 561 + </div> 562 + <div class="spk-form-actions"> 563 + <button class="spk-btn spk-btn-secondary spk-cancel-create">Cancel</button> 564 + <button class="spk-btn spk-btn-primary spk-confirm-create">Create & Save Voiceprint</button> 565 + </div> 566 + </div> 567 + `; 568 + } 569 + 570 + function setupCreateFormHandlers(card, seg, facet) { 571 + const formContainer = card.querySelector('.spk-create-form-container'); 572 + const select = card.querySelector('.spk-entity-select'); 573 + const speakerLabel = card.dataset.speaker; 574 + 575 + formContainer.querySelector('.spk-cancel-create').addEventListener('click', () => { 576 + formContainer.style.display = 'none'; 577 + formContainer.innerHTML = ''; 578 + select.value = ''; 579 + }); 580 + 581 + formContainer.querySelector('.spk-confirm-create').addEventListener('click', () => { 582 + const name = formContainer.querySelector('.spk-new-name').value.trim(); 583 + const type = formContainer.querySelector('.spk-new-type').value; 584 + const desc = formContainer.querySelector('.spk-new-desc').value.trim(); 585 + 586 + if (!name) { 587 + showStatus(card, 'Please enter an entity name', 'error'); 588 + return; 589 + } 590 + 591 + createEntityWithVoiceprint(facet, type, name, desc, seg.key, speakerLabel, card); 592 + }); 593 + } 594 + 595 + function saveVoiceprint(facet, entityName, segmentKey, speakerLabel, card) { 596 + fetch('/app/speakers/api/save-voiceprint', { 597 + method: 'POST', 598 + headers: { 'Content-Type': 'application/json' }, 599 + body: JSON.stringify({ 600 + facet, 601 + entity_name: entityName, 602 + day, 603 + segment_key: segmentKey, 604 + speaker_label: speakerLabel, 605 + }), 606 + }) 607 + .then(r => r.json()) 608 + .then(data => { 609 + if (data.error) { 610 + showStatus(card, data.error, 'error'); 611 + } else { 612 + showStatus(card, `Voiceprint saved to ${entityName}`, 'success'); 613 + // Reload to refresh matches 614 + setTimeout(() => loadSegmentDetail(selectedSegment), 1500); 615 + } 616 + }) 617 + .catch(() => { 618 + showStatus(card, 'Failed to save voiceprint', 'error'); 619 + }); 620 + } 621 + 622 + function createEntityWithVoiceprint(facet, type, name, description, segmentKey, speakerLabel, card) { 623 + fetch('/app/speakers/api/create-entity-voiceprint', { 624 + method: 'POST', 625 + headers: { 'Content-Type': 'application/json' }, 626 + body: JSON.stringify({ 627 + facet, 628 + type, 629 + name, 630 + description, 631 + day, 632 + segment_key: segmentKey, 633 + speaker_label: speakerLabel, 634 + }), 635 + }) 636 + .then(r => r.json()) 637 + .then(data => { 638 + if (data.error) { 639 + showStatus(card, data.error, 'error'); 640 + } else { 641 + showStatus(card, `Created entity "${name}" with voiceprint`, 'success'); 642 + // Hide form and reload 643 + const formContainer = card.querySelector('.spk-create-form-container'); 644 + formContainer.style.display = 'none'; 645 + formContainer.innerHTML = ''; 646 + setTimeout(() => loadSegmentDetail(selectedSegment), 1500); 647 + } 648 + }) 649 + .catch(() => { 650 + showStatus(card, 'Failed to create entity', 'error'); 651 + }); 652 + } 653 + 654 + function showStatus(card, message, type) { 655 + const container = card.querySelector('.spk-status-container'); 656 + container.innerHTML = `<div class="spk-status spk-status-${type}">${escapeHtml(message)}</div>`; 657 + setTimeout(() => { container.innerHTML = ''; }, 3000); 658 + } 659 + 660 + function formatDuration(seconds) { 661 + const mins = Math.floor(seconds / 60); 662 + const secs = seconds % 60; 663 + if (mins === 0) return `${secs}s`; 664 + return secs > 0 ? `${mins}m ${secs}s` : `${mins}m`; 665 + } 666 + 667 + function escapeHtml(str) { 668 + if (!str) return ''; 669 + return String(str) 670 + .replace(/&/g, '&') 671 + .replace(/</g, '<') 672 + .replace(/>/g, '>') 673 + .replace(/"/g, '"'); 674 + } 675 + })(); 676 + </script>

Configure Feed

Configure Feed