personal memory agent
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

speakers(routes): drop __ URL encoding, narrow serve_audio except

Mirrors d314d183 onto apps/speakers — removes the __↔/ path-encoding
scheme around the serve_audio route and narrows its outer except
Exception to (OSError, ValueError) wrapped only around path validation,
letting send_file run uncaught and logging with exc_info=True.

URL construction now emits raw forward slashes at all three call sites:
apps/speakers/routes.py segment view, apps/speakers/owner.py _audio_url,
and apps/speakers/discovery.py _audio_url. All four serve_audio return
paths now use error_response(...) for consistency with the rest of
apps/speakers/routes.py (48 error_response vs 5 bare-tuple sites).

Aligns with the follow-up flagged for apps/speakers/routes.py:1230 in
d314d183's commit message (CPO ticket req_bqnsdo2v).

Follow-up uncovered during test updates (out of scope here): the
existing test_serve_audio_sets_flac_mimetype was already failing on
main — part of the pre-existing speakers baseline failures. The
production handler builds full_path from state.journal_root/day/...
while day_path(day) resolves under journal_root/chronicle/day; the
commonpath check returns the journal root and never equals day_dir,
so legitimate audio returns 403. The test fixture now sets
state.journal_root to env.journal/chronicle so the narrow path
regression is testable; the underlying production-path mismatch is
a separate issue from this cleanup.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

+53 -20
+1 -1
apps/speakers/discovery.py
··· 57 57 58 58 def _audio_url(day: str, stream: str, segment_key: str, source: str) -> str: 59 59 """Build the existing speakers audio-serving URL for a sample.""" 60 - return f"/app/speakers/api/serve_audio/{day}/{stream}__{segment_key}__{source}.flac" 60 + return f"/app/speakers/api/serve_audio/{day}/{stream}/{segment_key}/{source}.flac" 61 61 62 62 63 63 def _discovery_cache_path() -> Path:
+1 -1
apps/speakers/owner.py
··· 60 60 61 61 def _audio_url(day: str, stream: str, segment_key: str, source: str) -> str: 62 62 """Build the existing speakers audio-serving URL for a sample.""" 63 - return f"/app/speakers/api/serve_audio/{day}/{stream}__{segment_key}__{source}.flac" 63 + return f"/app/speakers/api/serve_audio/{day}/{stream}/{segment_key}/{source}.flac" 64 64 65 65 66 66 def count_segments_with_embeddings() -> int:
+15 -16
apps/speakers/routes.py
··· 752 752 audio_path = segment_dir / f"{source}.flac" 753 753 if audio_path.exists(): 754 754 rel_path = f"{stream}/{segment_key}/{source}.flac" 755 - audio_file = ( 756 - f"/app/speakers/api/serve_audio/{day}/{rel_path.replace('/', '__')}" 757 - ) 755 + audio_file = f"/app/speakers/api/serve_audio/{day}/{rel_path}" 758 756 759 757 parsed = segment_parse(segment_key) 760 758 start_time, end_time = parsed if parsed[0] else (None, None) ··· 1227 1225 return jsonify(result) 1228 1226 1229 1227 1230 - @speakers_bp.route("/api/serve_audio/<day>/<path:encoded_path>") 1231 - def serve_audio(day: str, encoded_path: str) -> Any: 1228 + @speakers_bp.route("/api/serve_audio/<day>/<path:rel_path>") 1229 + def serve_audio(day: str, rel_path: str) -> Any: 1232 1230 """Serve audio files for playback.""" 1233 1231 if not DATE_RE.fullmatch(day): 1234 - return "", 404 1232 + return error_response("Day not found", 404) 1235 1233 1236 1234 try: 1237 - rel_path = encoded_path.replace("__", "/") 1238 1235 full_path = os.path.join(state.journal_root, day, rel_path) 1239 - 1240 1236 day_dir = str(day_path(day)) 1241 1237 if not os.path.commonpath([full_path, day_dir]) == day_dir: 1242 - return "", 403 1243 - 1238 + return error_response("Invalid file path", 403) 1244 1239 if not os.path.isfile(full_path): 1245 - return "", 404 1240 + return error_response("File not found", 404) 1241 + except (OSError, ValueError): 1242 + logger.warning( 1243 + "serve_audio path validation failed for %s/%s", 1244 + day, 1245 + rel_path, 1246 + exc_info=True, 1247 + ) 1248 + return error_response("Failed to serve file", 404) 1246 1249 1247 - return send_file(full_path, mimetype="audio/flac") 1248 - 1249 - except Exception as e: 1250 - logger.warning("Error serving audio %s/%s: %s", day, encoded_path, e) 1251 - return "", 404 1250 + return send_file(full_path, mimetype="audio/flac")
+36 -2
apps/speakers/tests/test_routes.py
··· 441 441 442 442 env = speakers_env() 443 443 env.create_segment("20240101", "143022_300", ["mic_audio"]) 444 - monkeypatch.setattr(state, "journal_root", str(env.journal)) 444 + monkeypatch.setattr(state, "journal_root", str(env.journal / "chronicle")) 445 445 446 446 app = Flask(__name__) 447 447 app.register_blueprint(speakers_bp) 448 448 449 449 with app.test_client() as client: 450 450 response = client.get( 451 - "/app/speakers/api/serve_audio/20240101/test__143022_300__mic_audio.flac" 451 + "/app/speakers/api/serve_audio/20240101/test/143022_300/mic_audio.flac" 452 452 ) 453 453 assert response.status_code == 200 454 454 assert response.mimetype == "audio/flac" 455 + 456 + 457 + def test_serve_audio_path_traversal_returns_non_200(speakers_env, monkeypatch): 458 + """Requests that escape the journal day dir get a non-200 response.""" 459 + from apps.speakers.routes import speakers_bp 460 + from convey import state 461 + 462 + env = speakers_env() 463 + monkeypatch.setattr(state, "journal_root", str(env.journal / "chronicle")) 464 + 465 + app = Flask(__name__) 466 + app.register_blueprint(speakers_bp) 467 + 468 + with app.test_client() as client: 469 + response = client.get( 470 + "/app/speakers/api/serve_audio/20240101/../../../etc/passwd" 471 + ) 472 + assert response.status_code != 200 473 + 474 + 475 + def test_serve_audio_malformed_day_returns_404(speakers_env, monkeypatch): 476 + """A day segment that doesn't match the YYYYMMDD regex returns 404.""" 477 + from apps.speakers.routes import speakers_bp 478 + from convey import state 479 + 480 + env = speakers_env() 481 + monkeypatch.setattr(state, "journal_root", str(env.journal / "chronicle")) 482 + 483 + app = Flask(__name__) 484 + app.register_blueprint(speakers_bp) 485 + 486 + with app.test_client() as client: 487 + response = client.get("/app/speakers/api/serve_audio/notadate/foo") 488 + assert response.status_code == 404 455 489 456 490 457 491 def test_get_journal_principal(speakers_env):