···226226227227228228def _scan_segment_embeddings(day: str) -> list[dict]:
229229- """Scan a day for segments with embeddings and 1+ speakers.
229229+ """Scan a day for segments with audio embeddings.
230230231231- Only includes segments that have:
232232- 1. Audio embedding NPZ files
233233- 2. A speakers.json file with 1 or more speaker names
231231+ Only includes segments that have audio embedding NPZ files.
232232+ Segments with a speakers.json file will include speaker names;
233233+ segments without speakers.json will have an empty speakers list.
234234235235 Returns list of segment info dicts with keys:
236236 - key: segment directory name (HHMMSS_LEN)
···263263 if not sources:
264264 continue
265265266266- # Load speakers.json - require at least one speaker
266266+ # Load speakers.json (may be empty if not yet processed)
267267 speakers = _load_segment_speakers(s_path)
268268- if not speakers:
269269- continue
270268271269 # Calculate duration from start and end times
272270 duration = _time_to_seconds(end_time) - _time_to_seconds(start_time)
···469467def api_stats(month: str) -> Any:
470468 """Return segment counts for each day in a month.
471469472472- Used by calendar heatmap to show days with speaker segments.
470470+ Used by calendar heatmap to show days with embedding segments.
473471 """
474472 if not re.fullmatch(r"\d{6}", month):
475473 return error_response("Invalid month format, expected YYYYMM", 400)
···489487490488@speakers_bp.route("/api/segments/<day>")
491489def api_segments(day: str) -> Any:
492492- """Return segments with embeddings and 1+ speakers for a day."""
490490+ """Return segments with audio embeddings for a day."""
493491 if not DATE_RE.fullmatch(day):
494492 return error_response("Invalid day format", 400)
495493···513511 segment_dir = get_segment_path(day, segment_key, stream)
514512 speakers = _load_segment_speakers(segment_dir)
515513 if not speakers:
516516- return error_response("No speakers found for segment", 404)
514514+ return jsonify({"matched": [], "unmatched": []})
517515518516 # Load all journal entities for matching
519517 journal_entities = load_all_journal_entities()
+31-5
apps/speakers/tests/test_routes.py
···426426 assert speakers == []
427427428428429429-def test_scan_segment_embeddings_requires_speakers(speakers_env):
430430- """Test that segments without speakers.json are filtered out."""
429429+def test_scan_segment_embeddings_without_speakers(speakers_env):
430430+ """Test that segments without speakers.json are included with empty speakers."""
431431 from apps.speakers.routes import _scan_segment_embeddings
432432433433 env = speakers_env()
···435435 env.create_segment("20240101", "143022_300", ["mic_audio"])
436436437437 segments = _scan_segment_embeddings("20240101")
438438- assert segments == []
438438+ assert len(segments) == 1
439439+ assert segments[0]["key"] == "143022_300"
440440+ assert segments[0]["speakers"] == []
441441+ assert segments[0]["speaker_count"] == 0
439442440443441444def test_scan_segment_embeddings_single_speaker(speakers_env):
···453456454457455458def test_scan_segment_embeddings_empty_speakers(speakers_env):
456456- """Test that segments with 0 speakers are filtered out."""
459459+ """Test that segments with empty speakers.json are included."""
457460 from apps.speakers.routes import _scan_segment_embeddings
458461459462 env = speakers_env()
···461464 env.create_speakers_json("20240101", "143022_300", []) # No speakers
462465463466 segments = _scan_segment_embeddings("20240101")
464464- assert segments == []
467467+ assert len(segments) == 1
468468+ assert segments[0]["speakers"] == []
469469+ assert segments[0]["speaker_count"] == 0
465470466471467472def test_scan_segment_embeddings_includes_speaker_data(speakers_env):
···477482 assert len(segments) == 1
478483 assert segments[0]["speakers"] == ["Alice", "Bob"]
479484 assert segments[0]["speaker_count"] == 2
485485+486486+487487+def test_api_speakers_empty_when_no_speakers_json(speakers_env):
488488+ """Test /api/speakers/ returns empty matched/unmatched when no speakers.json."""
489489+ from flask import Flask
490490+491491+ from apps.speakers.routes import speakers_bp
492492+493493+ env = speakers_env()
494494+ env.create_segment("20240101", "143022_300", ["mic_audio"])
495495+ # No speakers.json created
496496+497497+ app = Flask(__name__)
498498+ app.register_blueprint(speakers_bp)
499499+500500+ with app.test_client() as client:
501501+ response = client.get("/app/speakers/api/speakers/20240101/test/143022_300")
502502+ assert response.status_code == 200
503503+ data = response.get_json()
504504+ assert data["matched"] == []
505505+ assert data["unmatched"] == []
480506481507482508def test_get_journal_principal(speakers_env):