···44"""CLI interface for speaker voiceprint management.
5566Provides:
77- sol call speakers status [--section SECTION]
77+ sol call speakers status [section]
88 sol call speakers bootstrap [--dry-run] [--json]
99 sol call speakers resolve-names [--dry-run] [--json]
1010- sol call speakers attribute-segment <day> <stream> <segment>
1010+ sol call speakers attribute-segment <day> <stream> <segment> [--json]
1111 sol call speakers backfill [--dry-run] [--json]
1212 sol call speakers discover [--json]
1313- sol call speakers identify <cluster_id> <name> [--entity-id ID]
1414- sol call speakers merge-names <alias> <canonical>
1513"""
16141715from __future__ import annotations
···27252826@app.command("status")
2927def status(
3030- section: str = typer.Option(
2828+ section: str | None = typer.Argument(
3129 None,
3232- "--section",
3333- help="Return only one section: embeddings, owner, speakers, clusters, imports, attribution.",
3030+ help=(
3131+ "Section to show (embeddings, owner, speakers, clusters, imports, "
3232+ "attribution). Omit for all."
3333+ ),
3434 ),
3535) -> None:
3636- """Return the full speaker ID state model as JSON.
3737-3838- Aggregates embedding coverage, owner centroid status, known speakers,
3939- candidate clusters, import signals, and attribution coverage into a
4040- single dashboard view. All data is read from disk — no new computations.
4141- """
4242- import json
3636+ """Show speaker subsystem status as JSON."""
3737+ import json as json_mod
43384444- from apps.speakers.status import get_status
3939+ from apps.speakers.status import get_speakers_status
45404646- result = get_status(section=section)
4747-4848- if "error" in result:
4949- typer.echo(json.dumps(result, indent=2), err=True)
5050- raise typer.Exit(1)
5151-5252- typer.echo(json.dumps(result, indent=2))
4141+ result = get_speakers_status(section=section)
4242+ typer.echo(json_mod.dumps(result, indent=2, default=str))
534354445545@app.command("bootstrap")
···5848 False, "--dry-run", help="Show what would be saved without saving."
5949 ),
6050 json_output: bool = typer.Option(
6161- False, "--json", help="Output results as JSON."
5151+ False, "--json", help="Output full result as JSON."
6252 ),
6353) -> None:
6454 """Bootstrap voiceprints from single-speaker segments.
···6858 speaker. Saves them as voiceprints using the owner centroid for
6959 owner subtraction.
7060 """
7171- import json as json_mod
7272-7361 from apps.speakers.bootstrap import bootstrap_voiceprints
74627563 if dry_run and not json_output:
···77657866 if not json_output:
7967 typer.echo("Bootstrapping voiceprints from single-speaker segments...")
8080-8168 stats = bootstrap_voiceprints(dry_run=dry_run)
82698383- if json_output:
8484- typer.echo(json_mod.dumps(stats, indent=2))
8585- if "error" in stats:
8686- raise typer.Exit(1)
8787- return
8888-8970 if "error" in stats:
9071 typer.echo(f"Error: {stats['error']}", err=True)
9172 raise typer.Exit(1)
7373+ if json_output:
7474+ import json as json_mod
7575+7676+ typer.echo(json_mod.dumps(stats, indent=2, default=str))
7777+ return
92789379 typer.echo(f"\nSegments scanned: {stats['segments_scanned']}")
9480 typer.echo(f"Single-speaker segments: {stats['single_speaker_segments']}")
···122108 False, "--dry-run", help="Show merges without applying them."
123109 ),
124110 json_output: bool = typer.Option(
125125- False, "--json", help="Output results as JSON."
111111+ False, "--json", help="Output full result as JSON."
126112 ),
127113) -> None:
128114 """Resolve speaker name variants using voiceprint similarity.
···132118 (short name is first word of full name) are auto-merged by adding the
133119 short name as an aka on the canonical entity.
134120 """
135135- import json as json_mod
136136-137121 from apps.speakers.bootstrap import resolve_name_variants
138122139123 if dry_run and not json_output:
···141125142126 if not json_output:
143127 typer.echo("Resolving speaker name variants...")
144144-145128 stats = resolve_name_variants(dry_run=dry_run)
146129147130 if json_output:
148148- typer.echo(json_mod.dumps(stats, indent=2))
131131+ import json as json_mod
132132+133133+ typer.echo(json_mod.dumps(stats, indent=2, default=str))
149134 return
150135151136 typer.echo(f"\nEntities with voiceprints: {stats['entities_with_voiceprints']}")
···252237 False, "--dry-run", help="Enumerate segments without processing."
253238 ),
254239 json_output: bool = typer.Option(
255255- False, "--json", help="Output results as JSON."
240240+ False, "--json", help="Output full result as JSON."
256241 ),
257242) -> None:
258243 """Run speaker attribution across all segments with embeddings.
···260245 Processes segments oldest-first for progressive voiceprint building.
261246 Skips segments that already have speaker_labels.json (safe to re-run).
262247 """
263263- import json as json_mod
264248 import time
265249266250 from apps.speakers.attribution import backfill_segments
···287271288272 stats = backfill_segments(
289273 dry_run=dry_run,
290290- progress_callback=None if (dry_run or json_output) else on_progress,
274274+ progress_callback=None if dry_run or json_output else on_progress,
291275 )
292276293277 elapsed = time.monotonic() - start
294278295279 if json_output:
296296- stats["elapsed_seconds"] = round(elapsed, 1)
297297- typer.echo(json_mod.dumps(stats, indent=2))
280280+ import json as json_mod
281281+282282+ typer.echo(json_mod.dumps(stats, indent=2, default=str))
298283 return
299284300285 typer.echo("\n")
···325310@app.command()
326311def discover(
327312 json_output: bool = typer.Option(
328328- False, "--json", help="Output results as JSON."
313313+ False, "--json", help="Output full result as JSON."
329314 ),
330315) -> None:
331316 """Discover recurring unknown speakers across segments."""
···334319 from apps.speakers.discovery import discover_unknown_speakers
335320336321 result = discover_unknown_speakers()
337337-338322 if json_output:
339339- typer.echo(json_mod.dumps(result, indent=2))
323323+ typer.echo(json_mod.dumps(result, indent=2, default=str))
340324 return
341341-342325 clusters = result.get("clusters", [])
343326344327 if not clusters:
···358341 f"sid={sample['sentence_id']}: {text_preview}"
359342 )
360343 typer.echo()
361361-362362-363363-@app.command()
364364-def identify(
365365- cluster_id: int = typer.Argument(..., help="Cluster ID from discovery output."),
366366- name: str = typer.Argument(..., help="Speaker name to assign."),
367367- entity_id: str | None = typer.Option(
368368- None, "--entity-id", help="Link to existing entity instead of name matching."
369369- ),
370370-) -> None:
371371- """Name an unknown speaker cluster from discovery.
372372-373373- Creates or matches a speaker entity and saves the cluster's embeddings
374374- as voiceprints. Updates speaker labels in all affected segments.
375375- Returns JSON.
376376- """
377377- import json as json_mod
378378-379379- from apps.speakers.discovery import identify_cluster
380380-381381- result = identify_cluster(cluster_id, name, entity_id=entity_id)
382382-383383- if "error" in result:
384384- typer.echo(json_mod.dumps({"error": result["error"]}, indent=2), err=True)
385385- raise typer.Exit(1)
386386-387387- typer.echo(json_mod.dumps(result, indent=2))
388388-389389-390390-@app.command("merge-names")
391391-def merge_names_cmd(
392392- alias: str = typer.Argument(..., help="Alias/variant name to merge from."),
393393- canonical: str = typer.Argument(..., help="Canonical/full name to merge into."),
394394-) -> None:
395395- """Merge a speaker name variant into a canonical entity.
396396-397397- Adds the alias as an aka on the canonical entity and merges voiceprint
398398- embeddings with deduplication. Returns JSON.
399399- """
400400- import json as json_mod
401401-402402- from apps.speakers.bootstrap import merge_names
403403-404404- result = merge_names(alias, canonical)
405405-406406- if "error" in result:
407407- typer.echo(json_mod.dumps({"error": result["error"]}, indent=2), err=True)
408408- raise typer.Exit(1)
409409-410410- typer.echo(json_mod.dumps(result, indent=2))