···66from __future__ import annotations
7788import json
99+import logging
910import os
1011import re
1112import shutil
···3536from think.utils import day_dirs, day_path, segment_path
3637from think.utils import segment_key as validate_segment_key
37383838-# Regex for HHMMSS time format validation
3939-TIME_RE = re.compile(r"\d{6}")
3939+logger = logging.getLogger(__name__)
4040+4141+# Regex for YYYYMM month format validation
4242+MONTH_RE = re.compile(r"\d{6}")
40434144transcripts_bp = Blueprint(
4245 "app:transcripts",
···4952def index() -> Any:
5053 """Redirect to the most recent day with segments, falling back to today."""
5154 today = date.today().strftime("%Y%m%d")
5252- if cluster_segments(today):
5353- return redirect(url_for("app:transcripts.transcripts_day", day=today))
5455 for day in sorted(day_dirs().keys(), reverse=True):
5556 if cluster_segments(day):
5657 return redirect(url_for("app:transcripts.transcripts_day", day=day))
···6162def transcripts_day(day: str) -> str:
6263 """Render transcript viewer for a specific day."""
6364 if not DATE_RE.fullmatch(day):
6464- return "", 404
6565+ return error_response("Day not found", 404)
65666667 title = format_date(day)
6768···7273def transcript_ranges(day: str) -> Any:
7374 """Return available transcript ranges for a day."""
7475 if not DATE_RE.fullmatch(day):
7575- return "", 404
7676+ return error_response("Day not found", 404)
76777778 audio_ranges, screen_ranges = cluster_scan(day)
7879 return jsonify({"audio": audio_ranges, "screen": screen_ranges})
···8586 Returns list of segments with their content types for the segment selector UI.
8687 """
8788 if not DATE_RE.fullmatch(day):
8888- return "", 404
8989+ return error_response("Day not found", 404)
89909091 segments = cluster_segments(day)
9192 return jsonify({"segments": segments})
···9596def transcript_day_data(day: str) -> Any:
9697 """Return combined ranges and segments for a day in a single response."""
9798 if not DATE_RE.fullmatch(day):
9898- return "", 404
9999+ return error_response("Day not found", 404)
99100100101 audio_ranges, screen_ranges, segments = scan_day(day)
101102 return jsonify({"audio": audio_ranges, "screen": screen_ranges, "segments": segments})
···105106def serve_file(day: str, encoded_path: str) -> Any:
106107 """Serve actual media files for embedding."""
107108 if not DATE_RE.fullmatch(day):
108108- return "", 404
109109+ return error_response("Day not found", 404)
109110110111 try:
111112 rel_path = encoded_path.replace("__", "/")
···113114114115 day_dir = str(day_path(day, create=False))
115116 if not os.path.commonpath([full_path, day_dir]) == day_dir:
116116- return "", 403
117117+ return error_response("Invalid file path", 403)
117118118119 if not os.path.isfile(full_path):
119119- return "", 404
120120+ return error_response("File not found", 404)
120121121122 return send_file(full_path)
122123123124 except Exception:
124124- return "", 404
125125+ return error_response("Failed to serve file", 404)
125126126127127128@transcripts_bp.route("/api/stats/<month>")
···135136 JSON dict mapping day (YYYYMMDD) to transcript range count.
136137 Transcripts app is not facet-aware, so returns simple {day: count} mapping.
137138 """
138138- if not TIME_RE.fullmatch(month):
139139- return jsonify({"error": "Invalid month format, expected YYYYMM"}), 400
139139+ if not MONTH_RE.fullmatch(month):
140140+ return error_response("Invalid month format", 400)
140141141142 stats: dict[str, int] = {}
142143···203204 - media_sizes: dict with audio/screen byte counts for raw media files
204205 """
205206 if not DATE_RE.fullmatch(day):
206206- return "", 404
207207+ return error_response("Invalid day format", 404)
207208208209 if not validate_segment_key(segment_key):
209209- return "", 404
210210+ return error_response("Invalid segment key format", 404)
210211211212 segment_dir = str(segment_path(day, segment_key, stream))
212213 if not os.path.isdir(segment_dir):
213213- return "", 404
214214+ return error_response("Segment directory not found", 404)
214215215216 chunks: list[dict] = []
216217 audio_file_url = None
···218219 media_sizes: dict[str, int] = {"audio": 0, "screen": 0}
219220 has_raw_reference = False
220221 has_raw_file = False
222222+ warnings = 0
221223222224 # Load speaker labels if available.
223225 speaker_labels_path = Path(segment_dir) / "agents" / "speaker_labels.json"
···306308 if speaker_label:
307309 chunk_data["speaker_label"] = speaker_label
308310 chunks.append(chunk_data)
309309- except Exception:
311311+ except Exception as e:
312312+ logger.warning("Failed to parse audio segment %s: %s", audio_path, e)
313313+ warnings += 1
310314 continue
311315312316 # Process screen files and collect video URLs for client-side decoding
···396400 "basic": is_basic,
397401 }
398402 )
399399- except Exception:
403403+ except Exception as e:
404404+ logger.warning("Failed to parse screen segment %s: %s", screen_path, e)
405405+ warnings += 1
400406 continue
401407402408 # Sort all chunks by timestamp
···427433 "cost": cost_data["cost"],
428434 "media_sizes": media_sizes,
429435 "media_purged": media_purged,
436436+ "warnings": warnings,
430437 }
431438 )
432439
-2
convey/root.py
···117117def app_today() -> Any:
118118 """Redirect /app/today to the most recent day with journal data."""
119119 today = date.today().strftime("%Y%m%d")
120120- if cluster_segments(today):
121121- return redirect(url_for("app:transcripts.transcripts_day", day=today))
122120 for day in sorted(day_dirs().keys(), reverse=True):
123121 if cluster_segments(day):
124122 return redirect(url_for("app:transcripts.transcripts_day", day=day))