personal memory agent
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

transcripts(routes): harden serve_file — range support, drop __ encoding, loud logs

Three scoped changes to apps/transcripts/routes.py:

- (B) send_file now passes conditional=True so HTTP Range: requests work;
audio/video seeking stops re-downloading from byte 0.
- (C) Remove the __↔/ path-encoding scheme. Flask's <path:> converter
already accepts '/'. Both the encoder (two sites in segment_content)
and decoder (serve_file) are removed in the same commit — no
backwards-compat acceptance of the __ form. workspace.html needs no
change: URLs are built server-side.
- (G) Stop silently swallowing exceptions. serve_file's try/except is
narrowed to (OSError, ValueError) around path validation only;
send_file now runs outside the try so werkzeug's own 404/permission
errors surface naturally. The two inner except blocks in
segment_content (audio parse, screen parse) now log with
exc_info=True for full tracebacks.

Adds apps/transcripts/tests/test_serve_file.py pinning the path-traversal
403 and malformed-day 404 behavior via the Flask test client.

The parallel __ encoding and broad except Exception in apps/speakers/
routes.py:1230 have the same issues but are intentionally deferred per
scope.

+50 -15
+21 -15
apps/transcripts/routes.py
··· 104 104 ) 105 105 106 106 107 - @transcripts_bp.route("/api/serve_file/<day>/<path:encoded_path>") 108 - def serve_file(day: str, encoded_path: str) -> Any: 107 + @transcripts_bp.route("/api/serve_file/<day>/<path:rel_path>") 108 + def serve_file(day: str, rel_path: str) -> Any: 109 109 """Serve actual media files for embedding.""" 110 110 if not DATE_RE.fullmatch(day): 111 111 return error_response("Day not found", 404) 112 112 113 113 try: 114 - rel_path = encoded_path.replace("__", "/") 115 114 full_path = os.path.join(state.journal_root, day, rel_path) 116 - 117 115 day_dir = str(day_path(day, create=False)) 118 116 if not os.path.commonpath([full_path, day_dir]) == day_dir: 119 117 return error_response("Invalid file path", 403) 120 - 121 118 if not os.path.isfile(full_path): 122 119 return error_response("File not found", 404) 123 - 124 - return send_file(full_path) 125 - 126 - except Exception: 120 + except (OSError, ValueError): 121 + logger.warning( 122 + "serve_file path validation failed for %s/%s", 123 + day, 124 + rel_path, 125 + exc_info=True, 126 + ) 127 127 return error_response("Failed to serve file", 404) 128 + 129 + return send_file(full_path, conditional=True) 128 130 129 131 130 132 @transcripts_bp.route("/api/stats/<month>") ··· 282 284 if os.path.isfile(audio_full): 283 285 has_raw_file = True 284 286 rel_path = f"{stream}/{segment_key}/{raw_audio}" 285 - audio_file_url = f"/app/transcripts/api/serve_file/{day}/{rel_path.replace('/', '__')}" 287 + audio_file_url = f"/app/transcripts/api/serve_file/{day}/{rel_path}" 286 288 media_sizes["audio"] += os.path.getsize(audio_full) 287 289 288 290 for chunk in formatted_chunks: ··· 310 312 if speaker_label: 311 313 chunk_data["speaker_label"] = speaker_label 312 314 chunks.append(chunk_data) 313 - except Exception as e: 314 - logger.warning("Failed to parse audio segment %s: %s", audio_path, e) 315 + except Exception: 316 + logger.warning( 317 + "Failed to parse audio segment %s", audio_path, exc_info=True 318 + ) 315 319 warnings += 1 316 320 continue 317 321 ··· 344 348 has_raw_file = True 345 349 rel_path = f"{stream}/{segment_key}/{raw_video}" 346 350 video_files[filename] = ( 347 - f"/app/transcripts/api/serve_file/{day}/{rel_path.replace('/', '__')}" 351 + f"/app/transcripts/api/serve_file/{day}/{rel_path}" 348 352 ) 349 353 media_sizes["screen"] += os.path.getsize(video_full) 350 354 ··· 402 406 "basic": is_basic, 403 407 } 404 408 ) 405 - except Exception as e: 406 - logger.warning("Failed to parse screen segment %s: %s", screen_path, e) 409 + except Exception: 410 + logger.warning( 411 + "Failed to parse screen segment %s", screen_path, exc_info=True 412 + ) 407 413 warnings += 1 408 414 continue 409 415
+29
apps/transcripts/tests/test_serve_file.py
··· 1 + # SPDX-License-Identifier: AGPL-3.0-only 2 + # Copyright (c) 2026 sol pbc 3 + 4 + from pathlib import Path 5 + 6 + import pytest 7 + 8 + from convey import create_app 9 + 10 + 11 + @pytest.fixture 12 + def client(): 13 + journal = Path(__file__).resolve().parents[3] / "tests" / "fixtures" / "journal" 14 + app = create_app(str(journal)) 15 + return app.test_client() 16 + 17 + 18 + def test_serve_file_path_traversal_returns_non_200(client): 19 + response = client.get( 20 + "/app/transcripts/api/serve_file/20240101/../../../etc/passwd" 21 + ) 22 + 23 + assert response.status_code != 200 24 + 25 + 26 + def test_serve_file_malformed_day_returns_404(client): 27 + response = client.get("/app/transcripts/api/serve_file/notadate/foo") 28 + 29 + assert response.status_code == 404