apps/speakers: collapse owner bootstrap UX, add manual tag path

+2

apps/speakers/encoder_config.py

··· 12 12 OWNER_BOOTSTRAP_MIN_STMTS: int = 30 13 13 OWNER_BOOTSTRAP_MIN_MEDIAN_DURATION_S: float = 1.5 14 14 OWNER_BOOTSTRAP_MIN_INTRA_COSINE_P25: float = 0.30 15 + # Smallest manual-tag set that meaningfully constrains the contamination centroid; below this the no-op default holds. 16 + OWNER_BOOTSTRAP_PROVISIONAL_GUARD_MIN_TAGS: int = 5 15 17 16 18 NOISY_FLYWHEEL_OVERLAP_MAX: float = 0.10 17 19 OVERLAP_DETECTOR_ID: str = "pyannote-segmentation-3.0-onnx"

+532 -58

apps/speakers/owner.py

··· 20 20 OWNER_BOOTSTRAP_MIN_INTRA_COSINE_P25, 21 21 OWNER_BOOTSTRAP_MIN_MEDIAN_DURATION_S, 22 22 OWNER_BOOTSTRAP_MIN_STMTS, 23 + OWNER_BOOTSTRAP_PROVISIONAL_GUARD_MIN_TAGS, 23 24 OWNER_THRESHOLD, 24 25 ) 25 26 from think.awareness import update_state 26 - from think.entities.journal import get_journal_principal, journal_entity_memory_path 27 + from think.entities.journal import ( 28 + ensure_journal_entity_memory, 29 + get_journal_principal, 30 + journal_entity_memory_path, 31 + ) 32 + from think.entities.voiceprints import load_entity_voiceprints_file 27 33 from think.utils import day_dirs, get_journal, segment_path 28 34 29 35 logger = logging.getLogger(__name__) 30 36 31 37 MAX_EMBEDDINGS = 30000 38 + LOW_QUALITY_REASON_TOO_FEW_STMTS = "too_few_stmts" 39 + LOW_QUALITY_REASON_MEDIAN_DURATION_TOO_SHORT = "median_duration_too_short" 40 + LOW_QUALITY_REASON_CLUSTER_TOO_DIFFUSE = "cluster_too_diffuse" 41 + MANUAL_OWNER_METHODS = frozenset({"user_assigned", "user_corrected", "user_confirmed"}) 42 + _PROVISIONAL_GUARD_CACHE: dict[str, tuple[int, int, np.ndarray]] | None = None 32 43 33 44 34 45 def _mark_no_cluster(segment_count: int) -> None: ··· 44 55 45 56 46 57 def _mark_low_quality( 47 - reason: str, observed: float, threshold: float, segment_count: int 58 + reason: str, 59 + observed: float, 60 + threshold: float, 61 + segment_count: int, 62 + *, 63 + source: str, 48 64 ) -> None: 49 65 """Record that detection found a cluster, but it failed quality gates.""" 50 66 update_state( 51 67 "voiceprint", 52 68 { 53 69 "status": "low_quality", 70 + "source": source, 54 71 "low_quality_reason": reason, 55 72 "observed_value": float(observed), 56 73 "threshold_value": float(threshold), ··· 66 83 threshold: float, 67 84 segment_count: int, 68 85 embeddings_count: int, 86 + *, 87 + source: str, 69 88 ) -> dict[str, Any]: 70 89 """Record and return a locked low-quality owner detection result.""" 71 - _mark_low_quality(reason, observed, threshold, segment_count) 90 + _mark_low_quality( 91 + reason, 92 + observed, 93 + threshold, 94 + segment_count, 95 + source=source, 96 + ) 72 97 return { 73 98 "status": "low_quality", 99 + "source": source, 74 100 "recommendation": "low_quality", 75 101 "segments_available": int(segment_count), 76 102 "embeddings_available": int(embeddings_count), ··· 121 147 return awareness_dir / "owner_candidate.npz" 122 148 123 149 150 + def _principal_id_or_none() -> str | None: 151 + """Return the current journal principal entity id, if one exists.""" 152 + principal = get_journal_principal() 153 + if principal is None: 154 + return None 155 + return str(principal["id"]) 156 + 157 + 124 158 def _iso_now() -> str: 125 159 """Return a timestamp string for persisted metadata.""" 126 160 return datetime.now().isoformat() ··· 168 202 return durations 169 203 170 204 171 - def count_segments_with_embeddings() -> int: 172 - """Count all journal segments that contain audio embedding files.""" 173 - _, _, scan_segment_embeddings = _routes_helpers() 205 + def _load_manual_tag_rows(principal_id: str) -> list[dict[str, Any]]: 206 + """Return validated owner-attested voiceprint rows for the principal.""" 207 + result = load_entity_voiceprints_file(principal_id) 208 + if result is None: 209 + return [] 210 + 211 + _embeddings, metadata_rows = result 212 + latest_rows: dict[tuple[str, str, str, int], tuple[int, int, dict[str, Any]]] = {} 213 + for index, raw_row in enumerate(metadata_rows): 214 + day = raw_row.get("day") 215 + segment_key = raw_row.get("segment_key") 216 + source = raw_row.get("source") 217 + sentence_id = raw_row.get("sentence_id") 218 + if not isinstance(day, str) or not isinstance(segment_key, str): 219 + continue 220 + if not isinstance(source, str): 221 + continue 222 + try: 223 + sentence_id_int = int(sentence_id) 224 + except (TypeError, ValueError): 225 + continue 226 + added_at = raw_row.get("added_at") 227 + try: 228 + added_at_int = int(added_at) 229 + except (TypeError, ValueError): 230 + added_at_int = -1 231 + dedupe_key = (day, segment_key, source, sentence_id_int) 232 + current = latest_rows.get(dedupe_key) 233 + if current is not None and (added_at_int, index) <= (current[0], current[1]): 234 + continue 235 + normalized = dict(raw_row) 236 + normalized["day"] = day 237 + normalized["segment_key"] = segment_key 238 + normalized["source"] = source 239 + normalized["sentence_id"] = sentence_id_int 240 + latest_rows[dedupe_key] = (added_at_int, index, normalized) 241 + 242 + chronicle_root = Path(get_journal()) / "chronicle" 243 + rows: list[dict[str, Any]] = [] 244 + labels_cache: dict[Path, dict[str, Any] | None] = {} 245 + overlap_cache: dict[Path, float] = {} 246 + for _added_at, _index, row in sorted( 247 + latest_rows.values(), 248 + key=lambda item: ( 249 + item[2]["day"], 250 + str(item[2].get("stream") or ""), 251 + item[2]["segment_key"], 252 + item[2]["source"], 253 + item[2]["sentence_id"], 254 + ), 255 + ): 256 + day = row["day"] 257 + stream = row.get("stream") 258 + segment_key = row["segment_key"] 259 + source = row["source"] 260 + sentence_id = row["sentence_id"] 261 + 262 + segment_dir: Path | None = None 263 + if isinstance(stream, str) and stream: 264 + candidate = chronicle_root / day / stream / segment_key 265 + if candidate.is_dir(): 266 + segment_dir = candidate 267 + else: 268 + matches = [ 269 + candidate 270 + for candidate in (chronicle_root / day).glob(f"*/{segment_key}") 271 + if candidate.is_dir() 272 + ] 273 + if len(matches) == 1: 274 + segment_dir = matches[0] 275 + row["stream"] = matches[0].parent.name 276 + stream = row["stream"] 277 + else: 278 + if len(matches) > 1: 279 + logger.info( 280 + "owner manual bootstrap skip: ambiguous segment for %s/%s/%s", 281 + day, 282 + segment_key, 283 + source, 284 + ) 285 + continue 286 + 287 + if segment_dir is None: 288 + continue 289 + 290 + labels_path = segment_dir / "talents" / "speaker_labels.json" 291 + if labels_path not in labels_cache: 292 + if not labels_path.is_file(): 293 + labels_cache[labels_path] = None 294 + else: 295 + try: 296 + with open(labels_path, encoding="utf-8") as f: 297 + labels_cache[labels_path] = json.load(f) 298 + except (json.JSONDecodeError, OSError): 299 + labels_cache[labels_path] = None 300 + labels_data = labels_cache[labels_path] 301 + if not isinstance(labels_data, dict): 302 + continue 303 + 304 + label_match = None 305 + for label in labels_data.get("labels", []): 306 + try: 307 + label_sentence_id = int(label.get("sentence_id", -1)) 308 + except (TypeError, ValueError): 309 + continue 310 + if label_sentence_id != sentence_id: 311 + continue 312 + label_match = label 313 + break 314 + if label_match is None: 315 + continue 316 + if label_match.get("speaker") != principal_id: 317 + continue 318 + if label_match.get("method") not in MANUAL_OWNER_METHODS: 319 + continue 320 + 321 + jsonl_path = segment_dir / f"{source}.jsonl" 322 + overlap = overlap_cache.setdefault( 323 + jsonl_path, 324 + _read_segment_overlap_fraction(jsonl_path), 325 + ) 326 + if overlap >= NOISY_FLYWHEEL_OVERLAP_MAX: 327 + logger.info( 328 + "owner manual bootstrap skip: overlap=%.3f at %s/%s/%s", 329 + overlap, 330 + day, 331 + segment_key, 332 + source, 333 + ) 334 + continue 174 335 175 - total = 0 336 + rows.append( 337 + { 338 + "day": day, 339 + "stream": stream, 340 + "segment_key": segment_key, 341 + "source": source, 342 + "sentence_id": sentence_id, 343 + "segment_dir": segment_dir, 344 + "jsonl_path": jsonl_path, 345 + } 346 + ) 347 + 348 + return rows 349 + 350 + 351 + def count_manual_tag_embeddings(principal_id: str) -> int: 352 + """Count validated owner manual-tag rows for the principal.""" 353 + return len(_load_manual_tag_rows(principal_id)) 354 + 355 + 356 + def load_manual_tag_stats(principal_id: str) -> dict[str, int]: 357 + """Return aggregate counts for validated owner manual-tag rows.""" 358 + rows = _load_manual_tag_rows(principal_id) 359 + streams = {row["stream"] for row in rows if row.get("stream")} 360 + return { 361 + "manual_tags_count": len(rows), 362 + "streams_represented": len(streams), 363 + } 364 + 365 + 366 + def load_owner_embedding_inventory() -> dict[str, int]: 367 + """Return journal-wide segment and embedding availability for owner bootstrap.""" 368 + load_embeddings_file, _, scan_segment_embeddings = _routes_helpers() 369 + 370 + segment_count = 0 371 + embeddings_count = 0 372 + overlap_cache: dict[Path, float] = {} 176 373 for day in day_dirs().keys(): 177 - total += len(scan_segment_embeddings(day)) 178 - return total 374 + for segment in scan_segment_embeddings(day): 375 + segment_count += 1 376 + segment_dir = segment_path(day, segment["key"], segment["stream"]) 377 + for source in segment["sources"]: 378 + jsonl_path = segment_dir / f"{source}.jsonl" 379 + overlap = overlap_cache.setdefault( 380 + jsonl_path, 381 + _read_segment_overlap_fraction(jsonl_path), 382 + ) 383 + if overlap > NOISY_FLYWHEEL_OVERLAP_MAX: 384 + continue 385 + emb_data = load_embeddings_file(segment_dir / f"{source}.npz") 386 + if emb_data is None: 387 + continue 388 + embeddings_count += int(len(emb_data[0])) 389 + 390 + return { 391 + "segments_available": segment_count, 392 + "embeddings_available": embeddings_count, 393 + } 394 + 395 + 396 + def load_owner_bootstrap_diagnostics( 397 + principal_id: str | None, 398 + ) -> dict[str, int | bool]: 399 + """Return counts that drive owner bootstrap diagnostics surfaces.""" 400 + inventory = load_owner_embedding_inventory() 401 + manual_stats = ( 402 + load_manual_tag_stats(principal_id) 403 + if principal_id is not None 404 + else {"manual_tags_count": 0, "streams_represented": 0} 405 + ) 406 + manual_tags_count = int(manual_stats["manual_tags_count"]) 407 + return { 408 + "manual_tags_count": manual_tags_count, 409 + "segments_available": int(inventory["segments_available"]), 410 + "embeddings_available": int(inventory["embeddings_available"]), 411 + "streams_represented": int(manual_stats["streams_represented"]), 412 + "can_build_from_tags": manual_tags_count >= OWNER_BOOTSTRAP_MIN_STMTS, 413 + } 414 + 415 + 416 + def clear_owner_provisional_cache(principal_id: str | None = None) -> None: 417 + """Clear the cached provisional owner centroid for one principal or all.""" 418 + global _PROVISIONAL_GUARD_CACHE 419 + 420 + if _PROVISIONAL_GUARD_CACHE is None: 421 + return 422 + if principal_id is None: 423 + _PROVISIONAL_GUARD_CACHE = None 424 + return 425 + _PROVISIONAL_GUARD_CACHE.pop(principal_id, None) 426 + if not _PROVISIONAL_GUARD_CACHE: 427 + _PROVISIONAL_GUARD_CACHE = None 428 + 429 + 430 + def _write_owner_centroid( 431 + principal_id: str, centroid: np.ndarray, cluster_size: int 432 + ) -> Path: 433 + """Write owner_centroid.npz with the canonical schema.""" 434 + owner_path = ensure_journal_entity_memory(principal_id) / "owner_centroid.npz" 435 + np.savez_compressed( 436 + owner_path, 437 + centroid=np.asarray(centroid, dtype=np.float32).reshape(-1), 438 + cluster_size=np.array(cluster_size, dtype=np.int32), 439 + threshold=np.array(OWNER_THRESHOLD, dtype=np.float32), 440 + version=np.array(_iso_now()), 441 + ) 442 + return owner_path 443 + 444 + 445 + def _apply_owner_quality_gates( 446 + cluster_embeddings: np.ndarray, 447 + cluster_durations: list[float], 448 + segment_count: int, 449 + embeddings_count: int, 450 + source: str, 451 + ) -> dict[str, Any] | None: 452 + """Return a low-quality payload when a gate fails, or None when all pass.""" 453 + cluster_size = int(cluster_embeddings.shape[0]) 454 + if cluster_size < OWNER_BOOTSTRAP_MIN_STMTS: 455 + return _bail_low_quality( 456 + LOW_QUALITY_REASON_TOO_FEW_STMTS, 457 + observed=cluster_size, 458 + threshold=OWNER_BOOTSTRAP_MIN_STMTS, 459 + segment_count=segment_count, 460 + embeddings_count=embeddings_count, 461 + source=source, 462 + ) 463 + 464 + median_duration = ( 465 + 0.0 if not cluster_durations else float(np.median(cluster_durations)) 466 + ) 467 + if median_duration < OWNER_BOOTSTRAP_MIN_MEDIAN_DURATION_S: 468 + return _bail_low_quality( 469 + LOW_QUALITY_REASON_MEDIAN_DURATION_TOO_SHORT, 470 + observed=median_duration, 471 + threshold=OWNER_BOOTSTRAP_MIN_MEDIAN_DURATION_S, 472 + segment_count=segment_count, 473 + embeddings_count=embeddings_count, 474 + source=source, 475 + ) 476 + 477 + intra_cosines = _pairwise_cosines(cluster_embeddings) 478 + intra_p25 = ( 479 + 0.0 if intra_cosines.size == 0 else float(np.percentile(intra_cosines, 25)) 480 + ) 481 + if intra_p25 < OWNER_BOOTSTRAP_MIN_INTRA_COSINE_P25: 482 + return _bail_low_quality( 483 + LOW_QUALITY_REASON_CLUSTER_TOO_DIFFUSE, 484 + observed=intra_p25, 485 + threshold=OWNER_BOOTSTRAP_MIN_INTRA_COSINE_P25, 486 + segment_count=segment_count, 487 + embeddings_count=embeddings_count, 488 + source=source, 489 + ) 490 + 491 + return None 492 + 493 + 494 + def _collect_manual_tag_embeddings( 495 + principal_id: str, 496 + ) -> tuple[np.ndarray, list[dict[str, Any]]]: 497 + """Load validated owner-tag embeddings and provenance for the principal.""" 498 + load_embeddings_file, _, _ = _routes_helpers() 499 + 500 + rows = _load_manual_tag_rows(principal_id) 501 + if not rows: 502 + return np.empty((0, 256), dtype=np.float32), [] 503 + 504 + embeddings_cache: dict[ 505 + Path, tuple[np.ndarray, np.ndarray, np.ndarray | None] | None 506 + ] = {} 507 + fallback_cache: dict[Path, dict[int, float | None]] = {} 508 + embedding_rows: list[np.ndarray] = [] 509 + provenance: list[dict[str, Any]] = [] 510 + for row in rows: 511 + npz_path = row["segment_dir"] / f"{row['source']}.npz" 512 + emb_data = embeddings_cache.setdefault(npz_path, load_embeddings_file(npz_path)) 513 + if emb_data is None: 514 + continue 515 + embeddings, statement_ids, durations_data = emb_data 516 + sentence_id = row["sentence_id"] 517 + matched_index = None 518 + for idx, statement_id in enumerate(statement_ids): 519 + if int(statement_id) == sentence_id: 520 + matched_index = idx 521 + break 522 + if matched_index is None: 523 + continue 524 + 525 + duration_s: float | None 526 + if durations_data is not None: 527 + duration_s = float(durations_data[matched_index]) 528 + else: 529 + fallback_durations = fallback_cache.setdefault( 530 + row["jsonl_path"], 531 + _fallback_statement_durations(row["jsonl_path"]), 532 + ) 533 + duration_s = fallback_durations.get(sentence_id) 534 + 535 + embedding_rows.append(np.asarray(embeddings[matched_index], dtype=np.float32)) 536 + provenance.append( 537 + { 538 + "day": row["day"], 539 + "stream": row["stream"], 540 + "segment_key": row["segment_key"], 541 + "source": row["source"], 542 + "sentence_id": sentence_id, 543 + "duration_s": duration_s, 544 + } 545 + ) 546 + 547 + if not embedding_rows: 548 + return np.empty((0, 256), dtype=np.float32), [] 549 + return np.vstack(embedding_rows).astype(np.float32, copy=False), provenance 550 + 551 + 552 + def load_owner_provisional_centroid(principal_id: str) -> np.ndarray | None: 553 + """Load or rebuild a cached provisional owner centroid from manual tags.""" 554 + global _PROVISIONAL_GUARD_CACHE 555 + 556 + centroid_path = journal_entity_memory_path(principal_id) / "owner_centroid.npz" 557 + if centroid_path.exists(): 558 + clear_owner_provisional_cache(principal_id) 559 + return None 560 + 561 + voiceprints_path = journal_entity_memory_path(principal_id) / "voiceprints.npz" 562 + if not voiceprints_path.exists(): 563 + clear_owner_provisional_cache(principal_id) 564 + return None 565 + 566 + try: 567 + mtime_ns = voiceprints_path.stat().st_mtime_ns 568 + except OSError: 569 + clear_owner_provisional_cache(principal_id) 570 + return None 571 + 572 + rows = _load_manual_tag_rows(principal_id) 573 + manual_tag_count = len(rows) 574 + if manual_tag_count < OWNER_BOOTSTRAP_PROVISIONAL_GUARD_MIN_TAGS: 575 + clear_owner_provisional_cache(principal_id) 576 + return None 577 + 578 + if _PROVISIONAL_GUARD_CACHE is not None: 579 + cached = _PROVISIONAL_GUARD_CACHE.get(principal_id) 580 + if ( 581 + cached is not None 582 + and cached[0] == mtime_ns 583 + and cached[1] == manual_tag_count 584 + ): 585 + return cached[2] 586 + 587 + embeddings, _provenance = _collect_manual_tag_embeddings(principal_id) 588 + embeddings_count = int(embeddings.shape[0]) 589 + if embeddings_count < OWNER_BOOTSTRAP_PROVISIONAL_GUARD_MIN_TAGS: 590 + clear_owner_provisional_cache(principal_id) 591 + return None 592 + 593 + _load_embeddings_file, normalize_embedding, _scan_segment_embeddings = ( 594 + _routes_helpers() 595 + ) 596 + centroid = normalize_embedding(np.mean(embeddings, axis=0)) 597 + if centroid is None: 598 + clear_owner_provisional_cache(principal_id) 599 + return None 600 + 601 + if _PROVISIONAL_GUARD_CACHE is None: 602 + _PROVISIONAL_GUARD_CACHE = {} 603 + _PROVISIONAL_GUARD_CACHE[principal_id] = (mtime_ns, manual_tag_count, centroid) 604 + return centroid 605 + 606 + 607 + def count_segments_with_embeddings() -> int: 608 + """Count all journal segments that contain audio embedding files.""" 609 + return load_owner_embedding_inventory()["segments_available"] 179 610 180 611 181 612 def _subsample_embeddings( ··· 345 776 } 346 777 347 778 cluster_embeddings = embeddings_matrix[cluster_indices] 348 - cluster_size = int(len(cluster_indices)) 349 779 embeddings_count = int(embeddings_matrix.shape[0]) 350 - 351 - if cluster_size < OWNER_BOOTSTRAP_MIN_STMTS: 352 - return _bail_low_quality( 353 - "too_few_stmts", 354 - observed=cluster_size, 355 - threshold=OWNER_BOOTSTRAP_MIN_STMTS, 356 - segment_count=segment_count, 357 - embeddings_count=embeddings_count, 358 - ) 359 - 360 780 cluster_durations = [ 361 - provenance[int(i)]["duration_s"] 781 + float(provenance[int(i)]["duration_s"]) 362 782 for i in cluster_indices 363 783 if provenance[int(i)].get("duration_s") is not None 364 784 ] 365 - if not cluster_durations: 366 - median_duration = 0.0 367 - else: 368 - median_duration = float(np.median(cluster_durations)) 369 - if median_duration < OWNER_BOOTSTRAP_MIN_MEDIAN_DURATION_S: 370 - return _bail_low_quality( 371 - "median_duration_too_short", 372 - observed=median_duration, 373 - threshold=OWNER_BOOTSTRAP_MIN_MEDIAN_DURATION_S, 374 - segment_count=segment_count, 375 - embeddings_count=embeddings_count, 376 - ) 377 - 378 - intra_cosines = _pairwise_cosines(cluster_embeddings) 379 - if intra_cosines.size == 0: 380 - intra_p25 = 0.0 381 - else: 382 - intra_p25 = float(np.percentile(intra_cosines, 25)) 383 - if intra_p25 < OWNER_BOOTSTRAP_MIN_INTRA_COSINE_P25: 384 - return _bail_low_quality( 385 - "cluster_too_diffuse", 386 - observed=intra_p25, 387 - threshold=OWNER_BOOTSTRAP_MIN_INTRA_COSINE_P25, 388 - segment_count=segment_count, 389 - embeddings_count=embeddings_count, 390 - ) 785 + low_quality = _apply_owner_quality_gates( 786 + cluster_embeddings, 787 + cluster_durations, 788 + segment_count, 789 + embeddings_count, 790 + source="hdbscan", 791 + ) 792 + if low_quality is not None: 793 + return low_quality 391 794 392 795 centroid = normalize_embedding(np.mean(cluster_embeddings, axis=0)) 393 796 if centroid is None: ··· 401 804 402 805 cluster_streams = {provenance[int(i)]["stream"] for i in cluster_indices} 403 806 streams_represented = len(cluster_streams) 807 + cluster_size = int(cluster_embeddings.shape[0]) 404 808 recommendation = "ready" if streams_represented > 1 else "single_stream" 405 809 similarities = np.dot(cluster_embeddings, centroid) 406 810 sorted_cluster_positions = np.argsort(similarities)[::-1] ··· 551 955 from think.entities.core import get_identity_names 552 956 from think.entities.journal import ( 553 957 create_journal_entity, 554 - ensure_journal_entity_memory, 555 958 load_journal_entity, 556 959 ) 557 960 ··· 563 966 data = np.load(candidate_path, allow_pickle=False) 564 967 centroid = data["centroid"] 565 968 cluster_size = int(np.asarray(data["cluster_size"]).item()) 566 - threshold = float(np.asarray(data["threshold"]).item()) 567 - version = str(np.asarray(data["version"]).item()) 568 969 except Exception as e: 569 970 logger.warning("Failed to load owner candidate %s: %s", candidate_path, e) 570 971 return {"error": "No candidate available"} ··· 582 983 entity_type="Person", 583 984 ) 584 985 585 - owner_path = ensure_journal_entity_memory(principal["id"]) / "owner_centroid.npz" 586 - np.savez_compressed( 587 - owner_path, 588 - centroid=np.asarray(centroid, dtype=np.float32).reshape(-1), 589 - cluster_size=np.array(cluster_size, dtype=np.int32), 590 - threshold=np.array(threshold, dtype=np.float32), 591 - version=np.array(version), 592 - ) 986 + _write_owner_centroid(principal["id"], np.asarray(centroid), cluster_size) 987 + clear_owner_provisional_cache(principal["id"]) 593 988 candidate_path.unlink(missing_ok=True) 594 989 595 990 update_state( ··· 605 1000 "status": "confirmed", 606 1001 "principal_id": principal["id"], 607 1002 "cluster_size": cluster_size, 1003 + } 1004 + 1005 + 1006 + def bootstrap_owner_from_manual_tags() -> dict[str, Any]: 1007 + """Promote validated principal manual tags into a confirmed owner centroid.""" 1008 + _, normalize_embedding, _ = _routes_helpers() 1009 + 1010 + principal_id = _principal_id_or_none() 1011 + if principal_id is None: 1012 + return {"error": "No principal entity found"} 1013 + 1014 + centroid_path = journal_entity_memory_path(principal_id) / "owner_centroid.npz" 1015 + if centroid_path.exists(): 1016 + clear_owner_provisional_cache(principal_id) 1017 + cluster_size = None 1018 + try: 1019 + with np.load(centroid_path, allow_pickle=False) as data: 1020 + cluster_size = int(np.asarray(data["cluster_size"]).item()) 1021 + except Exception as exc: 1022 + logger.warning( 1023 + "Failed to read owner centroid metadata %s: %s", 1024 + centroid_path, 1025 + exc, 1026 + ) 1027 + return { 1028 + "status": "confirmed", 1029 + "principal_id": principal_id, 1030 + "cluster_size": cluster_size, 1031 + } 1032 + 1033 + embeddings, provenance = _collect_manual_tag_embeddings(principal_id) 1034 + segment_count = len( 1035 + { 1036 + (record["day"], record["stream"], record["segment_key"]) 1037 + for record in provenance 1038 + if record.get("stream") 1039 + } 1040 + ) 1041 + embeddings_count = int(embeddings.shape[0]) 1042 + durations = [ 1043 + float(record["duration_s"]) 1044 + for record in provenance 1045 + if record.get("duration_s") is not None 1046 + ] 1047 + low_quality = _apply_owner_quality_gates( 1048 + embeddings, 1049 + durations, 1050 + segment_count, 1051 + embeddings_count, 1052 + source="manual_tags", 1053 + ) 1054 + if low_quality is not None: 1055 + return low_quality 1056 + 1057 + centroid = normalize_embedding(np.mean(embeddings, axis=0)) 1058 + if centroid is None: 1059 + return _bail_low_quality( 1060 + LOW_QUALITY_REASON_CLUSTER_TOO_DIFFUSE, 1061 + observed=0.0, 1062 + threshold=OWNER_BOOTSTRAP_MIN_INTRA_COSINE_P25, 1063 + segment_count=segment_count, 1064 + embeddings_count=embeddings_count, 1065 + source="manual_tags", 1066 + ) 1067 + 1068 + _write_owner_centroid(principal_id, centroid, embeddings_count) 1069 + clear_owner_provisional_cache(principal_id) 1070 + update_state( 1071 + "voiceprint", 1072 + { 1073 + "status": "confirmed", 1074 + "cluster_size": embeddings_count, 1075 + "confirmed_at": _iso_now(), 1076 + }, 1077 + ) 1078 + return { 1079 + "status": "confirmed", 1080 + "principal_id": principal_id, 1081 + "cluster_size": embeddings_count, 608 1082 } 609 1083 610 1084

+85 -15

apps/speakers/routes.py

··· 29 29 ) 30 30 31 31 from apps.speakers.discovery import discover_unknown_speakers, identify_cluster 32 + from apps.speakers.encoder_config import OWNER_THRESHOLD 32 33 from apps.speakers.owner import ( 34 + bootstrap_owner_from_manual_tags, 33 35 classify_sentences, 34 36 confirm_owner_candidate, 35 - count_segments_with_embeddings, 36 37 detect_owner_candidate, 38 + load_owner_bootstrap_diagnostics, 39 + load_owner_provisional_centroid, 37 40 reject_owner_candidate, 38 41 ) 39 42 from apps.utils import log_app_action ··· 335 338 from apps.speakers.owner import load_owner_centroid 336 339 337 340 centroid_data = load_owner_centroid() 338 - if centroid_data is None: 339 - return False 340 - owner_centroid, owner_threshold = centroid_data 341 + if centroid_data is not None: 342 + owner_centroid, owner_threshold = centroid_data 343 + else: 344 + principal_id = _principal_id_or_none() 345 + if principal_id is None: 346 + return False 347 + owner_centroid = load_owner_provisional_centroid(principal_id) 348 + if owner_centroid is None: 349 + return False 350 + owner_threshold = OWNER_THRESHOLD 341 351 score = float(np.dot(embedding, owner_centroid)) 342 352 return score >= owner_threshold 343 353 344 354 355 + def _principal_id_or_none() -> str | None: 356 + """Return the current journal principal id if one exists.""" 357 + principal = get_journal_principal() 358 + if principal is None: 359 + return None 360 + return str(principal["id"]) 361 + 362 + 363 + def _owner_bootstrap_status_fields() -> dict[str, Any]: 364 + """Return shared owner bootstrap diagnostics for status surfaces.""" 365 + diagnostics = load_owner_bootstrap_diagnostics(_principal_id_or_none()) 366 + return { 367 + **diagnostics, 368 + "segments_with_embeddings": diagnostics["segments_available"], 369 + } 370 + 371 + 372 + def _maybe_bootstrap_owner_from_attestation( 373 + principal_id: str | None, speaker_id: str | None 374 + ) -> None: 375 + """Refresh manual owner bootstrap state after a principal attestation.""" 376 + if principal_id is None or speaker_id != principal_id: 377 + return 378 + try: 379 + result = bootstrap_owner_from_manual_tags() 380 + if "error" in result: 381 + logger.warning( 382 + "owner manual bootstrap failed after attestation: %s", 383 + result["error"], 384 + ) 385 + except Exception: 386 + logger.exception("owner manual bootstrap failed after attestation") 387 + 388 + 345 389 def _resolve_entity_display( 346 390 entity_id: str, 347 391 entity_cache: dict, ··· 842 886 if emb is None: 843 887 return error_response("Sentence embedding not found", 404) 844 888 845 - principal = get_journal_principal() 846 - principal_id = principal["id"] if principal else None 889 + principal_id = _principal_id_or_none() 847 890 if speaker != principal_id and _check_owner_contamination(emb): 848 891 return error_response("Embedding too similar to owner voice — cannot save", 400) 849 892 ··· 878 921 "speaker": speaker, 879 922 }, 880 923 ) 924 + _maybe_bootstrap_owner_from_attestation(principal_id, speaker) 881 925 882 926 return success_response({"status": "confirmed", "speaker": speaker}) 883 927 ··· 936 980 if emb is None: 937 981 return error_response("Sentence embedding not found", 404) 938 982 939 - principal = get_journal_principal() 940 - principal_id = principal["id"] if principal else None 983 + principal_id = _principal_id_or_none() 941 984 if new_speaker != principal_id and _check_owner_contamination(emb): 942 985 return error_response("Embedding too similar to owner voice — cannot save", 400) 943 986 ··· 994 1037 "voiceprints_removed": voiceprints_removed, 995 1038 }, 996 1039 ) 1040 + _maybe_bootstrap_owner_from_attestation(principal_id, new_speaker) 997 1041 998 1042 return success_response( 999 1043 { ··· 1059 1103 if emb is None: 1060 1104 return error_response("Sentence embedding not found", 404) 1061 1105 1062 - principal = get_journal_principal() 1063 - principal_id = principal["id"] if principal else None 1106 + principal_id = _principal_id_or_none() 1064 1107 if speaker != principal_id and _check_owner_contamination(emb): 1065 1108 return error_response("Embedding too similar to owner voice — cannot save", 400) 1066 1109 ··· 1095 1138 "speaker": speaker, 1096 1139 }, 1097 1140 ) 1141 + _maybe_bootstrap_owner_from_attestation(principal_id, speaker) 1098 1142 1099 1143 return success_response({"status": "assigned", "speaker": speaker}) 1100 1144 ··· 1104 1148 """Return the current owner voiceprint confirmation state.""" 1105 1149 voiceprint = get_current().get("voiceprint", {}) 1106 1150 status = voiceprint.get("status", "none") 1151 + diagnostics = _owner_bootstrap_status_fields() 1107 1152 1108 1153 if status == "confirmed": 1109 1154 return jsonify({"status": "confirmed"}) ··· 1121 1166 return jsonify( 1122 1167 { 1123 1168 "status": "low_quality", 1169 + "source": voiceprint.get("source", "hdbscan"), 1124 1170 "low_quality_reason": voiceprint.get("low_quality_reason", ""), 1125 1171 "observed_value": voiceprint.get("observed_value", 0.0), 1126 1172 "threshold_value": voiceprint.get("threshold_value", 0.0), 1173 + **diagnostics, 1127 1174 } 1128 1175 ) 1129 1176 ··· 1131 1178 return jsonify({"status": "no_cluster"}) 1132 1179 1133 1180 if status in {"none", "rejected"}: 1134 - seg_count = count_segments_with_embeddings() 1135 - if seg_count > 0: 1181 + if diagnostics["segments_available"] > 0: 1136 1182 return jsonify( 1137 1183 { 1138 1184 "status": "needs_detection", 1139 - "segments_with_embeddings": seg_count, 1185 + **diagnostics, 1140 1186 } 1141 1187 ) 1142 - return jsonify({"status": "none", "segments_with_embeddings": seg_count}) 1188 + return jsonify( 1189 + { 1190 + "status": "none", 1191 + **diagnostics, 1192 + } 1193 + ) 1143 1194 1144 - return jsonify({"status": "none"}) 1195 + return jsonify({"status": "none", **diagnostics}) 1145 1196 1146 1197 1147 1198 @speakers_bp.route("/api/owner/detect", methods=["POST"]) 1148 1199 def api_owner_detect() -> Any: 1149 1200 """Run owner voice candidate detection.""" 1150 1201 result = detect_owner_candidate() 1202 + return jsonify(result) 1203 + 1204 + 1205 + @speakers_bp.route("/api/owner/build-from-tags", methods=["POST"]) 1206 + def api_owner_build_from_tags() -> Any: 1207 + """Build a confirmed owner centroid directly from validated manual tags.""" 1208 + result = bootstrap_owner_from_manual_tags() 1209 + if "error" in result: 1210 + return error_response(result["error"], 400) 1211 + if result.get("status") == "confirmed": 1212 + log_app_action( 1213 + app="speakers", 1214 + facet=None, 1215 + action="owner_voiceprint_build_from_tags", 1216 + params={ 1217 + "principal_id": result["principal_id"], 1218 + "cluster_size": result.get("cluster_size"), 1219 + }, 1220 + ) 1151 1221 return jsonify(result) 1152 1222 1153 1223

+12 -1

apps/speakers/status.py

··· 73 73 74 74 75 75 def _owner_section() -> dict[str, Any]: 76 - from apps.speakers.owner import load_owner_centroid 76 + from apps.speakers.owner import ( 77 + load_owner_bootstrap_diagnostics, 78 + load_owner_centroid, 79 + ) 80 + from think.entities.journal import get_journal_principal 77 81 78 82 voiceprint = get_current().get("voiceprint", {}) 79 83 status = voiceprint.get("status", "none") 80 84 result: dict[str, Any] = {"status": status} 85 + principal = get_journal_principal() 86 + principal_id = str(principal["id"]) if principal else None 87 + diagnostics = load_owner_bootstrap_diagnostics(principal_id) 81 88 82 89 if status == "candidate": 83 90 result["cluster_size"] = voiceprint.get("cluster_size") ··· 85 92 result["streams_represented"] = voiceprint.get("streams_represented") 86 93 result["recommendation"] = voiceprint.get("recommendation") 87 94 elif status == "low_quality": 95 + result["source"] = voiceprint.get("source", "hdbscan") 88 96 result["low_quality_reason"] = voiceprint.get("low_quality_reason", "") 89 97 result["observed_value"] = voiceprint.get("observed_value", 0.0) 90 98 result["threshold_value"] = voiceprint.get("threshold_value", 0.0) 91 99 result["segments_checked"] = voiceprint.get("segments_checked", 0) 92 100 result["attempted_at"] = voiceprint.get("attempted_at", "") 101 + result.update(diagnostics) 93 102 elif status == "no_cluster": 94 103 result["segments_checked"] = voiceprint.get("segments_checked") 95 104 result["attempted_at"] = voiceprint.get("attempted_at") 105 + elif status in {"none", "rejected"}: 106 + result.update(diagnostics) 96 107 97 108 result["centroid_saved"] = load_owner_centroid() is not None 98 109 return result

+3

apps/speakers/tests/conftest.py

··· 60 60 import think.utils 61 61 62 62 think.utils._journal_path_cache = None 63 + from apps.speakers.owner import clear_owner_provisional_cache 64 + 65 + clear_owner_provisional_cache() 63 66 64 67 def _segment_dirs( 65 68 self,

+5

apps/speakers/tests/test_encoder_config.py

··· 15 15 assert encoder_config.OWNER_BOOTSTRAP_MIN_STMTS == 30 16 16 assert encoder_config.OWNER_BOOTSTRAP_MIN_MEDIAN_DURATION_S == 1.5 17 17 assert encoder_config.OWNER_BOOTSTRAP_MIN_INTRA_COSINE_P25 == 0.30 18 + assert encoder_config.OWNER_BOOTSTRAP_PROVISIONAL_GUARD_MIN_TAGS == 5 18 19 assert encoder_config.NOISY_FLYWHEEL_OVERLAP_MAX == 0.10 19 20 assert encoder_config.OVERLAP_DETECTOR_ID == MAIN_OVERLAP_DETECTOR_ID 20 21 assert encoder_config.OVERLAP_DETECTOR_SHA256 == PYANNOTE_OVERLAP_MODEL_SHA256 ··· 36 37 owner.OWNER_BOOTSTRAP_MIN_INTRA_COSINE_P25 37 38 is encoder_config.OWNER_BOOTSTRAP_MIN_INTRA_COSINE_P25 38 39 ) 40 + assert ( 41 + owner.OWNER_BOOTSTRAP_PROVISIONAL_GUARD_MIN_TAGS 42 + is encoder_config.OWNER_BOOTSTRAP_PROVISIONAL_GUARD_MIN_TAGS 43 + )

+357 -1

apps/speakers/tests/test_owner.py

··· 102 102 return journal / "awareness" / "owner_candidate.npz" 103 103 104 104 105 + def _normalize_rows(embeddings: np.ndarray) -> np.ndarray: 106 + norms = np.linalg.norm(embeddings, axis=1, keepdims=True) 107 + return embeddings / np.where(norms == 0, 1.0, norms) 108 + 109 + 110 + def _save_manual_owner_tags( 111 + env, 112 + principal_id: str, 113 + day: str, 114 + segment_key: str, 115 + embeddings: np.ndarray, 116 + *, 117 + source: str = "audio", 118 + method: str = "user_assigned", 119 + durations_s: np.ndarray | None = None, 120 + overlap_fraction: float = 0.0, 121 + ) -> Path: 122 + from apps.speakers.routes import _save_voiceprint 123 + 124 + normalized_embeddings = _normalize_rows(np.asarray(embeddings, dtype=np.float32)) 125 + segment_dir = _write_segment( 126 + env.journal, 127 + day, 128 + "test", 129 + segment_key, 130 + source, 131 + normalized_embeddings, 132 + durations_s=durations_s, 133 + ) 134 + env.create_speaker_labels( 135 + day, 136 + segment_key, 137 + [ 138 + { 139 + "sentence_id": idx, 140 + "speaker": principal_id, 141 + "confidence": "high", 142 + "method": method, 143 + } 144 + for idx in range(1, len(normalized_embeddings) + 1) 145 + ], 146 + ) 147 + _rewrite_segment_header( 148 + segment_dir, 149 + source, 150 + overlap_fraction=overlap_fraction, 151 + overlap_detector=OVERLAP_DETECTOR_ID, 152 + ) 153 + for idx, embedding in enumerate(normalized_embeddings, start=1): 154 + _save_voiceprint( 155 + principal_id, 156 + embedding, 157 + day, 158 + segment_key, 159 + source, 160 + idx, 161 + stream="test", 162 + ) 163 + return segment_dir 164 + 165 + 105 166 def test_count_segments_with_embeddings(speakers_env): 106 167 from apps.speakers.owner import count_segments_with_embeddings 107 168 ··· 386 447 assert result["cluster_size"] == 60 387 448 388 449 450 + def test_bootstrap_owner_from_manual_tags_confirms(speakers_env): 451 + from apps.speakers.encoder_config import OWNER_THRESHOLD 452 + from apps.speakers.owner import bootstrap_owner_from_manual_tags 453 + 454 + env = speakers_env() 455 + principal_dir = env.create_entity("Self Person", is_principal=True) 456 + principal_id = "self_person" 457 + rng = np.random.default_rng(4) 458 + base = np.zeros((10, 256), dtype=np.float32) 459 + base[:, 0] = 1.0 460 + durations = np.full(10, 2.4, dtype=np.float32) 461 + for idx in range(3): 462 + embeddings = base + rng.normal(scale=0.01, size=(10, 256)).astype(np.float32) 463 + _save_manual_owner_tags( 464 + env, 465 + principal_id, 466 + "20240101", 467 + f"{9 + idx:02d}0000_300", 468 + embeddings, 469 + durations_s=durations, 470 + ) 471 + 472 + result = bootstrap_owner_from_manual_tags() 473 + 474 + owner_path = principal_dir / "owner_centroid.npz" 475 + assert result["status"] == "confirmed" 476 + assert result["principal_id"] == principal_id 477 + assert result["cluster_size"] == 30 478 + assert owner_path.exists() 479 + with np.load(owner_path, allow_pickle=False) as data: 480 + assert set(data.files) == {"centroid", "cluster_size", "threshold", "version"} 481 + centroid = data["centroid"] 482 + cluster_size = int(np.asarray(data["cluster_size"]).item()) 483 + threshold = float(np.asarray(data["threshold"]).item()) 484 + version = str(np.asarray(data["version"]).item()) 485 + assert cluster_size == 30 486 + assert np.isclose(np.linalg.norm(centroid), 1.0) 487 + assert np.isclose(threshold, OWNER_THRESHOLD) 488 + assert version 489 + assert get_current()["voiceprint"]["status"] == "confirmed" 490 + 491 + 492 + def test_bootstrap_owner_from_manual_tags_too_few_stmts(speakers_env): 493 + from apps.speakers.owner import ( 494 + LOW_QUALITY_REASON_TOO_FEW_STMTS, 495 + bootstrap_owner_from_manual_tags, 496 + ) 497 + 498 + env = speakers_env() 499 + env.create_entity("Self Person", is_principal=True) 500 + embeddings = np.zeros((10, 256), dtype=np.float32) 501 + embeddings[:, 0] = 1.0 502 + _save_manual_owner_tags( 503 + env, 504 + "self_person", 505 + "20240101", 506 + "090000_300", 507 + embeddings, 508 + durations_s=np.full(10, 2.0, dtype=np.float32), 509 + ) 510 + 511 + result = bootstrap_owner_from_manual_tags() 512 + 513 + assert result["status"] == "low_quality" 514 + assert result["source"] == "manual_tags" 515 + assert result["low_quality_reason"] == LOW_QUALITY_REASON_TOO_FEW_STMTS 516 + assert get_current()["voiceprint"]["source"] == "manual_tags" 517 + 518 + 519 + def test_bootstrap_owner_from_manual_tags_short_durations(speakers_env): 520 + from apps.speakers.owner import ( 521 + LOW_QUALITY_REASON_MEDIAN_DURATION_TOO_SHORT, 522 + bootstrap_owner_from_manual_tags, 523 + ) 524 + 525 + env = speakers_env() 526 + env.create_entity("Self Person", is_principal=True) 527 + base = np.zeros((10, 256), dtype=np.float32) 528 + base[:, 0] = 1.0 529 + for idx in range(3): 530 + _save_manual_owner_tags( 531 + env, 532 + "self_person", 533 + "20240101", 534 + f"{9 + idx:02d}0000_300", 535 + base, 536 + durations_s=np.full(10, 0.3, dtype=np.float32), 537 + ) 538 + 539 + result = bootstrap_owner_from_manual_tags() 540 + 541 + assert result["status"] == "low_quality" 542 + assert result["source"] == "manual_tags" 543 + assert result["low_quality_reason"] == LOW_QUALITY_REASON_MEDIAN_DURATION_TOO_SHORT 544 + 545 + 546 + def test_bootstrap_owner_from_manual_tags_diffuse_cluster(speakers_env): 547 + from apps.speakers.owner import ( 548 + LOW_QUALITY_REASON_CLUSTER_TOO_DIFFUSE, 549 + bootstrap_owner_from_manual_tags, 550 + ) 551 + 552 + env = speakers_env() 553 + env.create_entity("Self Person", is_principal=True) 554 + rng = np.random.default_rng(9) 555 + for idx in range(3): 556 + _save_manual_owner_tags( 557 + env, 558 + "self_person", 559 + "20240101", 560 + f"{9 + idx:02d}0000_300", 561 + _noise_embeddings(10, rng), 562 + durations_s=np.full(10, 2.0, dtype=np.float32), 563 + ) 564 + 565 + result = bootstrap_owner_from_manual_tags() 566 + 567 + assert result["status"] == "low_quality" 568 + assert result["source"] == "manual_tags" 569 + assert result["low_quality_reason"] == LOW_QUALITY_REASON_CLUSTER_TOO_DIFFUSE 570 + 571 + 572 + def test_manual_tag_overlap_guard_excludes_rows(speakers_env): 573 + from apps.speakers.owner import ( 574 + LOW_QUALITY_REASON_TOO_FEW_STMTS, 575 + bootstrap_owner_from_manual_tags, 576 + count_manual_tag_embeddings, 577 + ) 578 + 579 + env = speakers_env() 580 + env.create_entity("Self Person", is_principal=True) 581 + embeddings = np.zeros((5, 256), dtype=np.float32) 582 + embeddings[:, 0] = 1.0 583 + _save_manual_owner_tags( 584 + env, 585 + "self_person", 586 + "20240101", 587 + "090000_300", 588 + embeddings, 589 + durations_s=np.full(5, 2.0, dtype=np.float32), 590 + overlap_fraction=0.0, 591 + ) 592 + _save_manual_owner_tags( 593 + env, 594 + "self_person", 595 + "20240101", 596 + "100000_300", 597 + embeddings, 598 + durations_s=np.full(5, 2.0, dtype=np.float32), 599 + overlap_fraction=0.20, 600 + ) 601 + 602 + assert count_manual_tag_embeddings("self_person") == 5 603 + result = bootstrap_owner_from_manual_tags() 604 + assert result["low_quality_reason"] == LOW_QUALITY_REASON_TOO_FEW_STMTS 605 + 606 + 607 + def test_owner_centroid_schema_parity_between_confirm_and_manual_build(speakers_env): 608 + from apps.speakers.encoder_config import OWNER_THRESHOLD 609 + from apps.speakers.owner import ( 610 + bootstrap_owner_from_manual_tags, 611 + clear_owner_provisional_cache, 612 + confirm_owner_candidate, 613 + ) 614 + 615 + env = speakers_env() 616 + principal_dir = env.create_entity("Self Person", is_principal=True) 617 + candidate_path = _candidate_path(env.journal) 618 + candidate_path.parent.mkdir(parents=True, exist_ok=True) 619 + centroid = _normalized(np.array([1.0] + [0.0] * 255, dtype=np.float32)) 620 + np.savez_compressed( 621 + candidate_path, 622 + centroid=centroid, 623 + cluster_size=np.array(40, dtype=np.int32), 624 + threshold=np.array(OWNER_THRESHOLD, dtype=np.float32), 625 + version=np.array("2026-03-19T12:00:00"), 626 + ) 627 + 628 + confirm_owner_candidate() 629 + owner_path = principal_dir / "owner_centroid.npz" 630 + with np.load(owner_path, allow_pickle=False) as data: 631 + confirmed_keys = set(data.files) 632 + 633 + owner_path.unlink() 634 + clear_owner_provisional_cache("self_person") 635 + update_state("voiceprint", {"status": "none"}) 636 + 637 + base = np.zeros((10, 256), dtype=np.float32) 638 + base[:, 0] = 1.0 639 + for idx in range(3): 640 + _save_manual_owner_tags( 641 + env, 642 + "self_person", 643 + "20240101", 644 + f"{9 + idx:02d}0000_300", 645 + base, 646 + durations_s=np.full(10, 2.0, dtype=np.float32), 647 + ) 648 + 649 + bootstrap_owner_from_manual_tags() 650 + with np.load(owner_path, allow_pickle=False) as data: 651 + manual_keys = set(data.files) 652 + 653 + assert ( 654 + confirmed_keys 655 + == manual_keys 656 + == { 657 + "centroid", 658 + "cluster_size", 659 + "threshold", 660 + "version", 661 + } 662 + ) 663 + 664 + 665 + def test_bootstrap_owner_from_manual_tags_is_idempotent(speakers_env): 666 + from apps.speakers.owner import bootstrap_owner_from_manual_tags 667 + 668 + env = speakers_env() 669 + env.create_entity("Self Person", is_principal=True) 670 + base = np.zeros((10, 256), dtype=np.float32) 671 + base[:, 0] = 1.0 672 + for idx in range(3): 673 + _save_manual_owner_tags( 674 + env, 675 + "self_person", 676 + "20240101", 677 + f"{9 + idx:02d}0000_300", 678 + base, 679 + durations_s=np.full(10, 2.1, dtype=np.float32), 680 + ) 681 + 682 + first = bootstrap_owner_from_manual_tags() 683 + state_before = dict(get_current()["voiceprint"]) 684 + second = bootstrap_owner_from_manual_tags() 685 + 686 + assert first["status"] == "confirmed" 687 + assert second["status"] == "confirmed" 688 + assert second["cluster_size"] == first["cluster_size"] 689 + assert dict(get_current()["voiceprint"]) == state_before 690 + 691 + 389 692 def test_load_owner_centroid_no_principal(speakers_env): 390 693 from apps.speakers.owner import load_owner_centroid 391 694 ··· 478 781 response = client.get("/app/speakers/api/owner/status") 479 782 480 783 assert response.status_code == 200 481 - assert response.get_json() == {"status": "none", "segments_with_embeddings": 0} 784 + assert response.get_json() == { 785 + "status": "none", 786 + "manual_tags_count": 0, 787 + "segments_available": 0, 788 + "segments_with_embeddings": 0, 789 + "embeddings_available": 0, 790 + "streams_represented": 0, 791 + "can_build_from_tags": False, 792 + } 482 793 483 794 484 795 def test_api_owner_status_needs_detection(speakers_env): ··· 500 811 assert response.status_code == 200 501 812 assert data["status"] == "needs_detection" 502 813 assert data["segments_with_embeddings"] == 50 814 + assert data["segments_available"] == 50 815 + assert data["embeddings_available"] == 250 816 + assert data["manual_tags_count"] == 0 817 + assert data["streams_represented"] == 0 818 + assert data["can_build_from_tags"] is False 819 + 820 + 821 + def test_api_owner_status_manual_tags_count(speakers_env): 822 + from apps.speakers.routes import speakers_bp 823 + 824 + env = speakers_env() 825 + env.create_entity("Self Person", is_principal=True) 826 + embeddings = np.zeros((7, 256), dtype=np.float32) 827 + embeddings[:, 0] = 1.0 828 + _save_manual_owner_tags( 829 + env, 830 + "self_person", 831 + "20240101", 832 + "090000_300", 833 + embeddings, 834 + durations_s=np.full(7, 2.0, dtype=np.float32), 835 + ) 836 + 837 + app = Flask(__name__) 838 + app.register_blueprint(speakers_bp) 839 + 840 + with app.test_client() as client: 841 + response = client.get("/app/speakers/api/owner/status") 842 + 843 + data = response.get_json() 844 + assert response.status_code == 200 845 + assert data["status"] == "needs_detection" 846 + assert data["manual_tags_count"] == 7 847 + assert data["segments_available"] == 1 848 + assert data["segments_with_embeddings"] == 1 849 + assert data["embeddings_available"] == 7 850 + assert data["streams_represented"] == 1 851 + assert data["can_build_from_tags"] is False 503 852 504 853 505 854 def test_api_owner_status_candidate(speakers_env): ··· 548 897 assert response.status_code == 200 549 898 assert response.get_json() == { 550 899 "status": "low_quality", 900 + "source": "hdbscan", 551 901 "low_quality_reason": "too_few_stmts", 552 902 "observed_value": 5, 553 903 "threshold_value": 30, 904 + "manual_tags_count": 0, 905 + "segments_available": 0, 906 + "segments_with_embeddings": 0, 907 + "embeddings_available": 0, 908 + "streams_represented": 0, 909 + "can_build_from_tags": False, 554 910 } 555 911 556 912

+164

apps/speakers/tests/test_routes.py

··· 23 23 ] 24 24 25 25 26 + def _save_principal_manual_tags( 27 + env, 28 + principal_id: str, 29 + count: int, 30 + *, 31 + day: str = "20240101", 32 + segment_key: str = "143022_300", 33 + source: str = "mic_audio", 34 + embeddings: np.ndarray | None = None, 35 + ) -> np.ndarray: 36 + from apps.speakers.routes import _save_voiceprint 37 + 38 + if embeddings is None: 39 + embeddings = np.zeros((count, 256), dtype=np.float32) 40 + embeddings[:, 0] = 1.0 41 + env.create_segment( 42 + day, 43 + segment_key, 44 + [source], 45 + num_sentences=count, 46 + embeddings=embeddings, 47 + ) 48 + env.create_speaker_labels( 49 + day, 50 + segment_key, 51 + [ 52 + { 53 + "sentence_id": idx, 54 + "speaker": principal_id, 55 + "confidence": "high", 56 + "method": "user_assigned", 57 + } 58 + for idx in range(1, count + 1) 59 + ], 60 + ) 61 + for idx, embedding in enumerate(embeddings, start=1): 62 + _save_voiceprint( 63 + principal_id, 64 + embedding, 65 + day, 66 + segment_key, 67 + source, 68 + idx, 69 + stream="test", 70 + ) 71 + return embeddings 72 + 73 + 26 74 def test_normalize_embedding(): 27 75 """Test L2 normalization of embeddings.""" 28 76 from apps.speakers.routes import _normalize_embedding ··· 281 329 meta2 = json.loads(data["metadata"][1]) 282 330 assert meta1["day"] == "20240101" 283 331 assert meta2["day"] == "20240102" 332 + 333 + 334 + def test_check_owner_contamination_uses_provisional_centroid(speakers_env): 335 + from apps.speakers.routes import _check_owner_contamination 336 + 337 + env = speakers_env() 338 + env.create_entity("Self Person", is_principal=True) 339 + _save_principal_manual_tags(env, "self_person", 5) 340 + 341 + similar = np.zeros(256, dtype=np.float32) 342 + similar[0] = 1.0 343 + dissimilar = np.zeros(256, dtype=np.float32) 344 + dissimilar[1] = 1.0 345 + 346 + assert _check_owner_contamination(similar) is True 347 + assert _check_owner_contamination(dissimilar) is False 348 + 349 + 350 + def test_check_owner_contamination_below_provisional_min_tags(speakers_env): 351 + from apps.speakers.routes import _check_owner_contamination 352 + 353 + env = speakers_env() 354 + env.create_entity("Self Person", is_principal=True) 355 + _save_principal_manual_tags(env, "self_person", 4) 356 + 357 + similar = np.zeros(256, dtype=np.float32) 358 + similar[0] = 1.0 359 + dissimilar = np.zeros(256, dtype=np.float32) 360 + dissimilar[1] = 1.0 361 + 362 + assert _check_owner_contamination(similar) is False 363 + assert _check_owner_contamination(dissimilar) is False 364 + 365 + 366 + def test_check_owner_contamination_invalidates_cached_provisional_count(speakers_env): 367 + from apps.speakers.routes import _check_owner_contamination 368 + 369 + env = speakers_env() 370 + env.create_entity("Self Person", is_principal=True) 371 + _save_principal_manual_tags(env, "self_person", 5) 372 + 373 + similar = np.zeros(256, dtype=np.float32) 374 + similar[0] = 1.0 375 + 376 + assert _check_owner_contamination(similar) is True 377 + 378 + labels_path = ( 379 + env.journal 380 + / "chronicle" 381 + / "20240101" 382 + / "test" 383 + / "143022_300" 384 + / "talents" 385 + / "speaker_labels.json" 386 + ) 387 + labels = json.loads(labels_path.read_text(encoding="utf-8")) 388 + labels["labels"][0]["speaker"] = "other_person" 389 + labels_path.write_text(json.dumps(labels, indent=2), encoding="utf-8") 390 + 391 + assert _check_owner_contamination(similar) is False 392 + 393 + 394 + def test_check_owner_contamination_prefers_confirmed_centroid(speakers_env): 395 + from apps.speakers.encoder_config import OWNER_THRESHOLD 396 + from apps.speakers.routes import _check_owner_contamination 397 + 398 + env = speakers_env() 399 + principal_dir = env.create_entity("Self Person", is_principal=True) 400 + _save_principal_manual_tags(env, "self_person", 5) 401 + 402 + similar = np.zeros(256, dtype=np.float32) 403 + similar[0] = 1.0 404 + confirmed = np.zeros(256, dtype=np.float32) 405 + confirmed[1] = 1.0 406 + 407 + assert _check_owner_contamination(similar) is True 408 + 409 + np.savez_compressed( 410 + principal_dir / "owner_centroid.npz", 411 + centroid=confirmed, 412 + cluster_size=np.array(30, dtype=np.int32), 413 + threshold=np.array(OWNER_THRESHOLD, dtype=np.float32), 414 + version=np.array("2026-04-25T12:00:00"), 415 + ) 416 + 417 + assert _check_owner_contamination(similar) is False 418 + assert _check_owner_contamination(confirmed) is True 419 + 420 + 421 + def test_api_owner_build_from_tags(speakers_env): 422 + from apps.speakers.routes import speakers_bp 423 + 424 + env = speakers_env() 425 + principal_dir = env.create_entity("Self Person", is_principal=True) 426 + for idx in range(3): 427 + _save_principal_manual_tags( 428 + env, 429 + "self_person", 430 + 10, 431 + day="20240101", 432 + segment_key=f"{9 + idx:02d}0000_300", 433 + source="audio", 434 + ) 435 + 436 + app = Flask(__name__) 437 + app.register_blueprint(speakers_bp) 438 + 439 + with app.test_client() as client: 440 + resp = client.post("/app/speakers/api/owner/build-from-tags") 441 + 442 + data = resp.get_json() 443 + assert resp.status_code == 200 444 + assert data["status"] == "confirmed" 445 + assert data["principal_id"] == "self_person" 446 + assert data["cluster_size"] == 30 447 + assert (principal_dir / "owner_centroid.npz").exists() 284 448 285 449 286 450 def test_load_embeddings_file(speakers_env):

+50

apps/speakers/tests/test_status.py

··· 5 5 6 6 from __future__ import annotations 7 7 8 + import numpy as np 9 + 10 + 11 + def _save_principal_manual_tags(env, principal_id: str, count: int) -> None: 12 + from apps.speakers.routes import _save_voiceprint 13 + 14 + embeddings = np.zeros((count, 256), dtype=np.float32) 15 + embeddings[:, 0] = 1.0 16 + env.create_segment("20240101", "090000_300", ["audio"], embeddings=embeddings) 17 + env.create_speaker_labels( 18 + "20240101", 19 + "090000_300", 20 + [ 21 + { 22 + "sentence_id": idx, 23 + "speaker": principal_id, 24 + "confidence": "high", 25 + "method": "user_assigned", 26 + } 27 + for idx in range(1, count + 1) 28 + ], 29 + ) 30 + for idx, embedding in enumerate(embeddings, start=1): 31 + _save_voiceprint( 32 + principal_id, 33 + embedding, 34 + "20240101", 35 + "090000_300", 36 + "audio", 37 + idx, 38 + stream="test", 39 + ) 40 + 8 41 9 42 def test_status_all_sections(speakers_env): 10 43 from apps.speakers.status import get_speakers_status ··· 26 59 result = get_speakers_status(section="owner") 27 60 assert "status" in result 28 61 assert "centroid_saved" in result 62 + 63 + 64 + def test_status_owner_includes_bootstrap_diagnostics(speakers_env): 65 + from apps.speakers.status import get_speakers_status 66 + 67 + env = speakers_env() 68 + env.create_entity("Self Person", is_principal=True) 69 + _save_principal_manual_tags(env, "self_person", 7) 70 + 71 + result = get_speakers_status(section="owner") 72 + 73 + assert result["status"] == "none" 74 + assert result["manual_tags_count"] == 7 75 + assert result["segments_available"] == 1 76 + assert result["embeddings_available"] == 7 77 + assert result["streams_represented"] == 1 78 + assert result["can_build_from_tags"] is False 29 79 30 80 31 81 def test_status_unknown_section(speakers_env):

+149 -14

apps/speakers/workspace.html

··· 646 646 border-color: #3b82f6; 647 647 } 648 648 649 + .spk-owner-diagnostics { 650 + border: 1px solid #bfdbfe; 651 + border-radius: 10px; 652 + background: rgba(255, 255, 255, 0.72); 653 + } 654 + 655 + .spk-owner-diagnostics-summary { 656 + cursor: pointer; 657 + padding: 10px 12px; 658 + font-size: 13px; 659 + font-weight: 600; 660 + color: #1d4ed8; 661 + } 662 + 663 + .spk-owner-diagnostics-summary::-webkit-details-marker { 664 + display: none; 665 + } 666 + 667 + .spk-owner-diagnostics-body { 668 + padding: 0 12px 12px; 669 + display: flex; 670 + flex-direction: column; 671 + gap: 8px; 672 + } 673 + 674 + .spk-owner-diagnostics-line { 675 + font-size: 12px; 676 + color: #334155; 677 + line-height: 1.5; 678 + } 679 + 680 + .spk-owner-toast { 681 + margin-top: 6px; 682 + } 683 + 649 684 .spk-discovery-banner { 650 685 display: none; 651 686 margin-bottom: 8px; ··· 792 827 793 828 /* Banner buttons need darker ring for contrast on colored backgrounds */ 794 829 .spk-owner-btn:focus-visible, 795 - .spk-discovery-btn:focus-visible { 830 + .spk-discovery-btn:focus-visible, 831 + .spk-owner-diagnostics-summary:focus-visible { 796 832 outline: 2px solid #1d4ed8; 797 833 outline-offset: 2px; 798 834 } ··· 1177 1213 let playingSentenceId = null; 1178 1214 let detailExpanded = false; 1179 1215 let ownerDetectionInFlight = false; 1216 + const OWNER_HELP_TOAST_KEY = 'speakers-owner-help-toast'; 1217 + const OWNER_HELP_TOAST_MESSAGE = 'Click on segments and tag your own statements with the assign button. Solstone needs ~30 longer ones to lock in your voice.'; 1180 1218 1181 1219 checkOwnerStatus(); 1182 1220 loadSegments(); ··· 1287 1325 function checkOwnerStatus() { 1288 1326 window.apiJson('/app/speakers/api/owner/status') 1289 1327 .then(data => { 1290 - if (data.status === 'needs_detection') { 1328 + if ( 1329 + data.status === 'needs_detection' 1330 + || data.status === 'low_quality' 1331 + || (data.status === 'none' && data.segments_with_embeddings > 0) 1332 + ) { 1291 1333 hideDiscovery(); 1292 - renderOwnerBanner('Analyzing voice patterns...'); 1293 - if (!ownerDetectionInFlight) { 1334 + renderOwnerNotReady(data); 1335 + if (data.status === 'needs_detection' && !ownerDetectionInFlight) { 1294 1336 ownerDetectionInFlight = true; 1295 1337 window.apiJson('/app/speakers/api/owner/detect', { method: 'POST' }) 1296 1338 .then(() => { ··· 1310 1352 if (data.status === 'no_cluster') { 1311 1353 hideDiscovery(); 1312 1354 hideOwnerBanner(); 1313 - return; 1314 - } 1315 - if (data.status === 'low_quality') { 1316 - hideDiscovery(); 1317 - renderOwnerLowQuality(data); 1318 1355 return; 1319 1356 } 1320 1357 if (data.status === 'candidate') { ··· 1386 1423 attachAudioErrorListeners(ownerBanner); 1387 1424 } 1388 1425 1389 - function renderOwnerLowQuality(data) { 1426 + function formatOwnerMetric(value) { 1427 + if (typeof value !== 'number' || Number.isNaN(value)) return 'n/a'; 1428 + if (Number.isInteger(value)) return String(value); 1429 + return value.toFixed(2); 1430 + } 1431 + 1432 + function todayDayKey() { 1433 + const now = new Date(); 1434 + const year = String(now.getFullYear()); 1435 + const month = String(now.getMonth() + 1).padStart(2, '0'); 1436 + const datePart = String(now.getDate()).padStart(2, '0'); 1437 + return `${year}${month}${datePart}`; 1438 + } 1439 + 1440 + function principalEntityId() { 1441 + const principal = reviewEntities.find(entity => entity.is_principal); 1442 + return principal ? principal.entity_id : null; 1443 + } 1444 + 1445 + function showOwnerGuideToast(message = OWNER_HELP_TOAST_MESSAGE, type = 'info') { 1446 + const container = document.getElementById('spkOwnerGuideToast'); 1447 + if (!container) return; 1448 + container.innerHTML = `<div class="spk-status spk-status-${type}">${escapeHtml(message)}</div>`; 1449 + } 1450 + 1451 + function maybeShowOwnerGuideToast() { 1452 + if (sessionStorage.getItem(OWNER_HELP_TOAST_KEY) !== '1') return; 1453 + sessionStorage.removeItem(OWNER_HELP_TOAST_KEY); 1454 + showOwnerGuideToast(); 1455 + } 1456 + 1457 + function navigateToOwnerHelp() { 1458 + const today = todayDayKey(); 1459 + if (day !== today) { 1460 + sessionStorage.setItem(OWNER_HELP_TOAST_KEY, '1'); 1461 + window.location.href = `/app/speakers/${today}`; 1462 + return; 1463 + } 1464 + showOwnerGuideToast(); 1465 + } 1466 + 1467 + function renderOwnerNotReady(data) { 1390 1468 ownerBanner.style.display = 'block'; 1391 1469 ownerBanner.innerHTML = ` 1392 1470 <div class="spk-owner-panel"> 1393 - <p class="spk-owner-low-quality-message"> 1394 - Solstone is still learning your voice. Once it has at least 30 longer recordings of you speaking, it will offer a confirmation prompt. 1395 - </p> 1471 + <div class="spk-owner-title">Solstone is still learning your voice</div> 1472 + <div class="spk-owner-copy"> 1473 + We're tagging audio segments to recognize you. Help Solstone learn faster by tagging your own statements. 1474 + </div> 1475 + <div class="spk-owner-actions"> 1476 + <button class="spk-owner-btn spk-owner-btn-confirm" id="spkOwnerHelp"> 1477 + Help solstone learn faster 1478 + </button> 1479 + </div> 1480 + <div class="spk-owner-toast" id="spkOwnerGuideToast"></div> 1481 + <details class="spk-owner-diagnostics"> 1482 + <summary class="spk-owner-diagnostics-summary">Why not yet?</summary> 1483 + <div class="spk-owner-diagnostics-body"> 1484 + <div class="spk-owner-diagnostics-line">Source: ${escapeHtml(data.source || 'auto')}</div> 1485 + ${data.low_quality_reason ? ` 1486 + <div class="spk-owner-diagnostics-line"> 1487 + Gate: ${escapeHtml(data.low_quality_reason)} — observed ${escapeHtml(formatOwnerMetric(data.observed_value))}, threshold ${escapeHtml(formatOwnerMetric(data.threshold_value))} 1488 + </div> 1489 + ` : ''} 1490 + <div class="spk-owner-diagnostics-line">Manual tags: ${escapeHtml(String(data.manual_tags_count || 0))}</div> 1491 + <div class="spk-owner-diagnostics-line">Segments with audio: ${escapeHtml(String(data.segments_available || 0))}</div> 1492 + <div class="spk-owner-diagnostics-line">Embeddings: ${escapeHtml(String(data.embeddings_available || 0))}</div> 1493 + ${data.can_build_from_tags === true ? ` 1494 + <div class="spk-owner-actions"> 1495 + <button class="spk-owner-btn spk-owner-btn-reject" id="spkOwnerBuildFromTags"> 1496 + Build from manual tags 1497 + </button> 1498 + </div> 1499 + ` : ''} 1500 + </div> 1501 + </details> 1396 1502 </div> 1397 1503 `; 1504 + 1505 + document.getElementById('spkOwnerHelp')?.addEventListener('click', navigateToOwnerHelp); 1506 + document.getElementById('spkOwnerBuildFromTags')?.addEventListener('click', event => { 1507 + const btn = event.currentTarget; 1508 + btn.disabled = true; 1509 + window.apiJson('/app/speakers/api/owner/build-from-tags', { method: 'POST' }) 1510 + .then(() => { 1511 + checkOwnerStatus(); 1512 + }) 1513 + .catch((err) => { 1514 + btn.disabled = false; 1515 + showOwnerGuideToast(`Couldn't build from manual tags — ${resolveSpeakerError(err)}`, 'error'); 1516 + window.logError(err, { context: 'speakers: owner build from tags failed' }); 1517 + }); 1518 + }); 1519 + maybeShowOwnerGuideToast(); 1398 1520 } 1399 1521 1400 1522 function submitOwnerChoice(path) { ··· 2061 2183 2062 2184 async function confirmAttribution(sentenceId) { 2063 2185 try { 2186 + const sentence = currentSentences.find(s => s.id === sentenceId); 2187 + const isPrincipalAttestation = Boolean(sentence?.is_owner); 2064 2188 const data = await window.apiJson('/app/speakers/api/confirm-attribution', { 2065 2189 method: 'POST', 2066 2190 headers: { 'Content-Type': 'application/json' }, ··· 2081 2205 return; 2082 2206 } 2083 2207 2084 - const sentence = currentSentences.find(s => s.id === sentenceId); 2085 2208 if (sentence) { 2086 2209 sentence.confidence = 'high'; 2087 2210 sentence.method = 'user_confirmed'; ··· 2091 2214 renderReviewList(); 2092 2215 showStatusBySentence(sentenceId, 'Attribution confirmed', 'success'); 2093 2216 loadSegments(); 2217 + if (isPrincipalAttestation) { 2218 + checkOwnerStatus(); 2219 + } 2094 2220 } catch (err) { 2095 2221 showStatusBySentence(sentenceId, resolveSpeakerError(err), 'error'); 2096 2222 } ··· 2120 2246 } 2121 2247 loadReview(selectedSegment, selectedSource); 2122 2248 loadSegments(); 2249 + triggerOwnerStatusIfPrincipal(newSpeaker); 2123 2250 } catch (err) { 2124 2251 showStatusBySentence(sentenceId, resolveSpeakerError(err), 'error'); 2125 2252 } ··· 2149 2276 } 2150 2277 loadReview(selectedSegment, selectedSource); 2151 2278 loadSegments(); 2279 + triggerOwnerStatusIfPrincipal(speaker); 2152 2280 } catch (err) { 2153 2281 showStatusBySentence(sentenceId, resolveSpeakerError(err), 'error'); 2282 + } 2283 + } 2284 + 2285 + function triggerOwnerStatusIfPrincipal(speakerId) { 2286 + if (!speakerId) return; 2287 + if (speakerId === principalEntityId()) { 2288 + checkOwnerStatus(); 2154 2289 } 2155 2290 } 2156 2291

+103

records/decisions/260425-vpe-owner-bootstrap-ux-collapse-and-manual-tag-accelerant.md

··· 1 + # 260425 owner bootstrap UX collapse and manual-tag accelerant 2 + 3 + ## 1. Summary 4 + 5 + This lode keeps `apps/speakers/owner.py:233-476` as the HDBSCAN-only owner-detection path and adds a sibling manual rebuild path driven by explicit owner attestations. The manual path does not trust `voiceprints.npz` alone: it walks the principal's voiceprint rows, cross-checks each row against the segment's `talents/speaker_labels.json`, accepts only `method in {user_assigned, user_corrected, user_confirmed}` with `speaker == principal_id`, then reloads the embedding and duration from the segment NPZ. The manual path reuses the same three quality gates and writes `owner_centroid.npz` with the exact current schema: `centroid`, `cluster_size`, `threshold`, `version`. Owner-contamination guarding gains an in-memory provisional centroid cache only; there is no new journal file and no wipe-target change. 6 + 7 + ## 2. Files touched 8 + 9 + | Path | Planned change | 10 + |---|---| 11 + | `apps/speakers/encoder_config.py` | Add locked constant `OWNER_BOOTSTRAP_PROVISIONAL_GUARD_MIN_TAGS = 5` at the end of the locked block, with a one-line rationale comment. | 12 + | `apps/speakers/owner.py` | Add manual-tag collection, shared quality-gate helper, provisional-guard cache helpers, and the sibling manual build function. Factor centroid persistence so the manual and confirmed paths write the same NPZ schema. | 13 + | `apps/speakers/routes.py` | Add `POST /api/owner/build-from-tags`, switch contamination reads to confirmed-or-provisional owner guard data, and enrich owner-status payloads for the collapsed banner. | 14 + | `apps/speakers/workspace.html` | Collapse owner states into one banner shell, add expandable diagnostics plus manual-build CTA, and refresh owner status after principal manual attestations. | 15 + | `apps/speakers/status.py` | Mirror the richer owner-state fields in the status surface so CLI/admin diagnostics stay aligned with the UI. | 16 + | `apps/speakers/tests/conftest.py` | Clear the provisional owner-cache between tests so route- and owner-level cache assertions stay isolated. | 17 + | `apps/speakers/tests/test_encoder_config.py` | Assert the new locked constant and owner import contract. | 18 + | `apps/speakers/tests/test_owner.py` | Cover manual-tag collection, shared gate behavior, manual-build success/low-quality cases, status payload branches, and schema parity with `confirm_owner_candidate()`. | 19 + | `apps/speakers/tests/test_routes.py` | Cover provisional contamination on manual attribution routes and the new build-from-tags endpoint behavior. | 20 + | `apps/speakers/tests/test_status.py` | Cover the expanded owner diagnostic fields. | 21 + 22 + ## 3. Decisions 23 + 24 + ### 3.1 API shape 25 + 26 + Decision: add a new endpoint, `POST /app/speakers/api/owner/build-from-tags`. 27 + 28 + Why: `detect_owner_candidate()` is currently the HDBSCAN entry point, and keeping that contract intact preserves both the implementation boundary and the current tests. A query/body switch on `/api/owner/detect` would mix two different acquisition strategies into one route, add invalid state combinations, and make the status flow harder to reason about. A separate endpoint also gives the owner-banner CTA a clear target and keeps action logging explicit (`owner_voiceprint_build_from_tags` versus auto-detect). This route can return either the canonical `low_quality` payload with `source: "manual_tags"` or the canonical `confirmed` payload. The new route is easy to gate, test, and remove independently if the manual accelerant changes later. 29 + 30 + Files touched: `apps/speakers/routes.py`, `apps/speakers/owner.py`, `apps/speakers/tests/test_owner.py`, `apps/speakers/tests/test_routes.py`. 31 + 32 + ### 3.2 Function placement in `owner.py` 33 + 34 + Decision: add a sibling function, `bootstrap_owner_from_manual_tags()`, rather than branching inside `detect_owner_candidate()`. 35 + 36 + Why: the manual path diverges immediately from the HDBSCAN path. It is principal-specific, begins from persisted voiceprint rows, cross-checks labels, and never clusters. Forcing both flows through `detect_owner_candidate(source=...)` would create a misleading API and make the function harder to test and maintain. The two paths only need to share small private helpers: `_collect_manual_tag_embeddings(...)`, `_apply_owner_quality_gates(cluster_embeddings: np.ndarray, durations: list[float], segment_count: int, embeddings_count: int, source: str) -> dict | None`, and a centroid writer/state updater. The gate helper returns the canonical low-quality dict, including `source`, or `None` when all three gates pass. This keeps the public mental model simple: `detect_owner_candidate()` finds a candidate; `bootstrap_owner_from_manual_tags()` promotes sufficiently attested manual tags into a confirmed centroid. 37 + 38 + Files touched: `apps/speakers/owner.py`, `apps/speakers/routes.py`, `apps/speakers/tests/test_owner.py`. 39 + 40 + ### 3.3 Manual-tag set materialization 41 + 42 + Decision: use `_collect_manual_tag_embeddings(principal_id) -> tuple[np.ndarray, list[dict[str, Any]]]` in `apps/speakers/owner.py`, backed by label cross-reference rather than voiceprint metadata alone. 43 + 44 + Why: the prep findings rule out trusting `voiceprints.npz` as “owner-attested.” The helper should load the principal's `voiceprints.npz`, dedupe rows by `(day, segment_key, source, sentence_id)`, resolve the segment directory from `stream` when present, and fall back to a `chronicle/<day>/*/<segment_key>` scan for historical rows that lack `stream`. For each row it should load `talents/speaker_labels.json`, confirm `speaker == principal_id` and `method in {user_assigned, user_corrected, user_confirmed}`, then reload the segment NPZ through `_load_embeddings_file()` to recover the exact embedding and `durations_s`. If `durations_s` is missing, it should reuse `_fallback_statement_durations()` so the manual path behaves like the HDBSCAN path on legacy segments. On success, `bootstrap_owner_from_manual_tags()` writes the same `owner_centroid.npz` schema as `confirm_owner_candidate()` and reuses `cluster_size` for the validated manual-tag count so downstream readers do not need a parallel schema. The returned provenance rows should carry `day`, `stream`, `segment_key`, `source`, `sentence_id`, and `duration_s`, which is enough for diagnostics and for the shared quality-gate helper. 45 + 46 + Files touched: `apps/speakers/owner.py`, `apps/speakers/tests/test_owner.py`. 47 + 48 + ### 3.4 Provisional contamination strategy 49 + 50 + Decision: add a module-level in-memory provisional guard cache in `apps/speakers/owner.py`, keyed by the principal voiceprints file `mtime_ns` plus the validated manual-tag count. 51 + 52 + Why: `_check_owner_contamination` is cold enough that an in-memory cache is sufficient and much simpler than a persisted artifact. Add `OWNER_BOOTSTRAP_PROVISIONAL_GUARD_MIN_TAGS = 5` in `encoder_config.py`; when no confirmed `owner_centroid.npz` exists and at least five validated manual owner tags are available, compute a normalized mean centroid and cache `(mtime_ns, manual_tag_count, centroid, threshold)`. When a confirmed centroid exists, clear the provisional cache and always use the on-disk confirmed vector. Also clear the cache when the principal disappears, `voiceprints.npz` disappears, or the validated manual-tag count drops below the minimum. No file is written, nothing is added to `wipe.py`, and process-restart loss is acceptable because rebuild cost is low and the call surface is narrow. 53 + 54 + Files touched: `apps/speakers/encoder_config.py`, `apps/speakers/owner.py`, `apps/speakers/routes.py`, `apps/speakers/tests/test_encoder_config.py`, `apps/speakers/tests/test_routes.py`. 55 + 56 + ### 3.5 Diagnostics affordance in `workspace.html` 57 + 58 + Decision: use an expand toggle inside the owner banner, not a title tooltip. 59 + 60 + Why: the low-signal state now needs more than a one-line explanation. The owner banner should render one shared panel shell with summary copy, an optional “Build from tagged samples” button, and a toggle such as “Why not yet?” that reveals a compact diagnostics block. The expanded block should show the gate name, observed value, threshold value, source (`hdbscan` or `manual_tags`), and three counters: validated manual tags, segments with audio, and embeddings available. `api_owner_status` should expose those fields on the low-signal branches together with `can_build_from_tags`, so the client does not need to recompute eligibility. This is richer than a `title=` attribute, keeps the information accessible on mobile, and matches the existing inline status styling pattern already used in the workspace. The collapsed state stays lightweight while still giving jer the detailed affordance needed for debugging or support. 61 + 62 + Files touched: `apps/speakers/workspace.html`, `apps/speakers/routes.py`, `apps/speakers/tests/test_owner.py`. 63 + 64 + ### 3.6 Voiceprint metadata discriminator 65 + 66 + Decision: do pure labels cross-reference; do not add a `method` field to voiceprint metadata. 67 + 68 + Why: `speaker_labels.json` is already the source of truth for manual versus automatic attribution, while `voiceprints.npz` is intentionally a generic embedding store shared by manual review, bootstrap, discovery, attribution accumulation, and merge flows. Adding `method` only to `_save_voiceprint()` would create a partial schema that says nothing about rows written by `bootstrap.py`, `discovery.py`, or `attribution.py`, which is worse than having no provenance flag at all. Skipping the schema change also avoids migrating historical journals and keeps merge/idempotency behavior unchanged. If the product later needs first-class voiceprint provenance, that should be a separate full-schema lode that updates every writer and reader together. 69 + 70 + Files touched: `apps/speakers/owner.py` only. 71 + 72 + ### 3.7 Status re-fetch trigger on `confirmAttribution` 73 + 74 + Decision: use existing client data, not a new server response field, and centralize the refresh logic in `workspace.html`. 75 + 76 + Why: `api_review()` already tells the client whether a sentence belongs to the principal (`sentence.is_owner`) and whether an entity is principal (`all_entities[].is_principal`). `confirmAttribution()` does not change speaker identity, so it can decide locally whether the success path just produced an owner attestation and call `checkOwnerStatus()` when true. The same small helper should be reused by `correctAttribution()` and `assignAttribution()` when the chosen entity is principal, because `loadReview()` does not itself hit `/api/owner/status`. This avoids unnecessary route churn, keeps the principal lookup on the client where the data already exists, and ensures the owner banner updates immediately after any principal manual tag is created. 77 + 78 + Files touched: `apps/speakers/workspace.html`. 79 + 80 + ## 4. Implementation sequence 81 + 82 + 1. Add the locked constant and the new `owner.py` helpers: `_collect_manual_tag_embeddings(...)`, `_apply_owner_quality_gates(...)`, provisional guard loader/cache, and shared centroid persistence/state update. 83 + 2. Add the new route and status-payload fields in `apps/speakers/routes.py`, then mirror the owner diagnostics in `apps/speakers/status.py`. 84 + 3. Refactor `workspace.html` owner rendering into one banner shell, add the diagnostics toggle and manual-build CTA, and add principal-attestation status refresh hooks. 85 + 4. Add/adjust tests in `test_owner.py`, `test_routes.py`, `test_discovery.py`, `test_status.py`, and `test_encoder_config.py`. 86 + 87 + ## 5. Risks and open questions 88 + 89 + - Historical principal voiceprint rows without `stream` metadata will require a segment-directory fallback scan. The design treats missing or ambiguous rows as non-qualifying and skips them rather than guessing. 90 + - The provisional guard minimum (`5`) is intentionally lower than the full confirmation gate (`30`). The UI copy must make it clear that provisional contamination protection and confirmed centroid promotion are different thresholds. 91 + - `apps/speakers/owner.py:541-608` currently writes awareness with direct `update_state(...)` calls, not a dedicated state helper. Implementation can either keep that style or add a tiny private wrapper, but both the manual and confirmed paths must record `status: "confirmed"` consistently and persist the same NPZ schema. 92 + - HDBSCAN and manual paths can race on `owner_centroid.npz`; both produce gate-passing centroids, so correctness is unaffected. First write wins; the second is a no-op via the short-circuit in `bootstrap_owner_from_manual_tags()`. 93 + - Reverse contamination — a user mistakenly Assigning their own identity to a non-owner sentence — is unguarded by design. Out of scope for this lode. 94 + 95 + ## 6. Audit-time correction 96 + 97 + During audit, the provisional contamination cache was found to invalidate only on `voiceprints.npz` mtime. Because correcting a principal manual label away does not rewrite `voiceprints.npz`, a stale cached centroid could keep blocking non-owner saves. Fixed by keying the cache on `(mtime_ns, validated_manual_tag_count)` and adding a regression at `apps/speakers/tests/test_routes.py` for the label-only removal path. 98 + 99 + ## 7. Handoff items 100 + 101 + - **Spec roll-forward.** `cpo/specs/shipped/speaker-attribution-wespeaker.md` lives outside this worktree. The shipped-lodes row referencing this lode (id `4kverujw`, branch `hopper-4kverujw-owner-bootstrap-ux`) needs to be added on the CPO side from this decision record. 102 + - **Visual states.** Sandbox capture was skipped — a venv-level ONNX runtime issue made `make sandbox` painful during implement. Screenshots of the four owner-banner states (no embeddings, manual progress, HDBSCAN candidate, confirmed/hidden) should be captured opportunistically post-merge. 103 + - **Test reason-string consolidation.** A handful of older HDBSCAN/status assertions in `test_owner.py` still hardcode `low_quality_reason` strings rather than importing the centralized constants. Cleanup-only — not blocking.

Configure Feed

Configure Feed