personal memory agent
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

Remove raw header from tmux JSONL and add defensive validation

Tmux captures have no source media file, so the raw field pointing to
the JSONL itself was semantically incorrect. Now tmux JSONL files contain
only frame entries with no header line.

- Remove header from write_captures_jsonl() in observe/tmux/capture.py
- Add AUDIO_EXTENSIONS constant alongside VIDEO_EXTENSIONS
- Validate raw field extensions before building media URLs in apps
- Add missing MIME types (.ogg, .m4a, .mp4, .mov) to get_raw_file()
- Update docstrings to reflect the changes

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

+23 -10
+5 -1
apps/calendar/routes.py
··· 13 13 14 14 from convey import state 15 15 from convey.utils import DATE_RE, format_date 16 + from observe.utils import VIDEO_EXTENSIONS 16 17 from think.utils import day_path 17 18 18 19 calendar_bp = Blueprint( ··· 278 279 # Extract raw video path from header (first item if it only has "raw" key) 279 280 raw_video_path = None 280 281 if all_frames and "raw" in all_frames[0] and "frame_id" not in all_frames[0]: 281 - raw_video_path = all_frames[0].get("raw") 282 + raw_path = all_frames[0].get("raw") 283 + # Validate raw points to a video file (skip if not, e.g. tmux) 284 + if raw_path and raw_path.endswith(VIDEO_EXTENSIONS): 285 + raw_video_path = raw_path 282 286 283 287 # Decode and cache all frames from the video 284 288 cache_key = (day, timestamp, filename)
+5 -2
apps/transcripts/routes.py
··· 24 24 from convey.utils import DATE_RE, format_date 25 25 from observe.hear import format_audio 26 26 from observe.screen import format_screen 27 + from observe.utils import AUDIO_EXTENSIONS, VIDEO_EXTENSIONS 27 28 from think.cluster import cluster_scan, cluster_segments 28 29 from think.utils import day_dirs, day_path 29 30 from think.utils import segment_key as validate_segment_key ··· 207 208 raw_audio = entry["raw"] 208 209 break 209 210 210 - if raw_audio: 211 + # Validate raw points to an audio file (skip if not) 212 + if raw_audio and raw_audio.endswith(AUDIO_EXTENSIONS): 211 213 rel_path = f"{segment_key}/{raw_audio}" 212 214 audio_file_url = f"/app/transcripts/api/serve_file/{day}/{rel_path.replace('/', '__')}" 213 215 ··· 252 254 raw_video = entry["raw"] 253 255 break 254 256 255 - if raw_video: 257 + # Validate raw points to a video file (skip if not, e.g. tmux) 258 + if raw_video and raw_video.endswith(VIDEO_EXTENSIONS): 256 259 video_path = os.path.join(segment_dir, raw_video) 257 260 if os.path.isfile(video_path): 258 261 rel_path = f"{segment_key}/{raw_video}"
+3 -6
observe/tmux/capture.py
··· 358 358 """Write tmux captures to JSONL files, grouped by session. 359 359 360 360 Creates one file per session: tmux_{session}_screen.jsonl 361 - Format matches screen.jsonl for unified formatting/indexing. 361 + Frame entries match screen.jsonl format for unified formatting/indexing. 362 + No header line since tmux captures have no raw media file. 362 363 363 364 Args: 364 365 captures: List of capture dicts from result_to_dict() ··· 389 390 output_path = segment_dir / filename 390 391 391 392 with open(output_path, "w") as f: 392 - # Header matching screen.jsonl format 393 - header = {"raw": filename} 394 - f.write(json.dumps(header) + "\n") 395 - 396 - # Write each capture 393 + # No header - tmux captures have no raw media file 397 394 for capture in session_captures: 398 395 f.write(json.dumps(capture) + "\n") 399 396
+2 -1
observe/utils.py
··· 1 1 # SPDX-License-Identifier: AGPL-3.0-only 2 2 # Copyright (c) 2026 sol pbc 3 3 4 - """Utilities for working with screencasts and video files.""" 4 + """Utilities for working with media files (audio and video).""" 5 5 6 6 import json 7 7 import logging ··· 11 11 logger = logging.getLogger(__name__) 12 12 13 13 VIDEO_EXTENSIONS = (".webm", ".mp4", ".mov") 14 + AUDIO_EXTENSIONS = (".flac", ".ogg", ".m4a") 14 15 15 16 16 17 def extract_descriptive_suffix(filename: str) -> str:
+8
think/utils.py
··· 833 833 # Determine MIME type from raw file extension 834 834 if rel.endswith(".flac"): 835 835 mime = "audio/flac" 836 + elif rel.endswith(".ogg"): 837 + mime = "audio/ogg" 838 + elif rel.endswith(".m4a"): 839 + mime = "audio/mp4" 836 840 elif rel.endswith(".png"): 837 841 mime = "image/png" 838 842 elif rel.endswith(".webm"): 839 843 mime = "video/webm" 844 + elif rel.endswith(".mp4"): 845 + mime = "video/mp4" 846 + elif rel.endswith(".mov"): 847 + mime = "video/quicktime" 840 848 else: 841 849 # Default fallback for unknown types 842 850 mime = "application/octet-stream"