transcripts(routes): cache api_stats on (month, day_dirs_mtime)

Month-picker clicks were re-scanning matching transcript days on every request. Cache
api_stats with lru_cache(maxsize=64) keyed on the month plus the maximum mtime
observed anywhere under the matching day directories so repeat requests for unchanged
months reuse the prior result. Any create, delete, or modify under a matching day dir
changes that mtime key and forces a cache miss, and FileNotFoundError races during the
rglob walk are skipped silently.

Jer Miller 2 weeks ago a5ecb0ae d08e5544

+49 -11

1 changed file

expand all

apps

transcripts

routes.py

+49 -11

apps/transcripts/routes.py

··· 5 5 6 6 from __future__ import annotations 7 7 8 + import functools 8 9 import json 9 10 import logging 10 11 import os ··· 46 47 __name__, 47 48 url_prefix="/app/transcripts", 48 49 ) 50 + 51 + 52 + def _day_max_mtime(path: str) -> float: 53 + """Return the latest mtime under a day directory, skipping delete races.""" 54 + day_dir = Path(path) 55 + try: 56 + max_mtime = day_dir.stat().st_mtime 57 + except FileNotFoundError: 58 + return 0.0 59 + 60 + try: 61 + for child in day_dir.rglob("*"): 62 + try: 63 + child_mtime = child.stat().st_mtime 64 + except FileNotFoundError: 65 + continue 66 + if child_mtime > max_mtime: 67 + max_mtime = child_mtime 68 + except FileNotFoundError: 69 + return max_mtime 70 + return max_mtime 71 + 72 + 73 + @functools.lru_cache(maxsize=64) 74 + def _stats_for_month(month: str, mtime_key: float) -> dict[str, int]: 75 + """Return cached transcript range counts for a month.""" 76 + del mtime_key 77 + 78 + stats: dict[str, int] = {} 79 + for day_name in day_dirs().keys(): 80 + if not day_name.startswith(month): 81 + continue 82 + 83 + audio_ranges, screen_ranges = cluster_scan(day_name) 84 + total_ranges = len(audio_ranges) + len(screen_ranges) 85 + if total_ranges > 0: 86 + stats[day_name] = total_ranges 87 + 88 + return stats 49 89 50 90 51 91 @transcripts_bp.route("/") ··· 143 183 if not MONTH_RE.fullmatch(month): 144 184 return error_response("Invalid month format", 400) 145 185 146 - stats: dict[str, int] = {} 147 - 148 - for day_name in day_dirs().keys(): 149 - if not day_name.startswith(month): 150 - continue 151 - 152 - audio_ranges, screen_ranges = cluster_scan(day_name) 153 - total_ranges = len(audio_ranges) + len(screen_ranges) 154 - if total_ranges > 0: 155 - stats[day_name] = total_ranges 186 + matching = [ 187 + (day_name, path) 188 + for day_name, path in day_dirs().items() 189 + if day_name.startswith(month) 190 + ] 191 + if not matching: 192 + return jsonify({}) 156 193 157 - return jsonify(stats) 194 + mtime_key = max(_day_max_mtime(path) for _, path in matching) 195 + return jsonify(_stats_for_month(month, mtime_key)) 158 196 159 197 160 198 def _load_jsonl(path: str) -> list[dict]:

Configure Feed

Configure Feed