Add segment key tracking to token usage logging

+7 -21

observe/describe.py

··· 25 25 import av 26 26 from PIL import Image, ImageChops, ImageStat 27 27 28 + from observe.utils import segment_and_suffix 28 29 from think.callosum import callosum_send 29 30 from think.utils import setup_cli 30 31 ··· 36 37 37 38 DESCRIBE = "describe" # Initial categorization 38 39 CATEGORY = "category" # Category-specific follow-up 39 - 40 - 41 - def _segment_and_suffix(media_path: Path) -> tuple[str, str]: 42 - """Return segment key and descriptive suffix for a media path.""" 43 - from observe.utils import extract_descriptive_suffix 44 - from think.utils import segment_key 45 - 46 - segment = segment_key(media_path.stem) 47 - if segment is None: 48 - raise ValueError( 49 - f"Invalid video filename: {media_path.stem} (must be HHMMSS_LEN format)" 50 - ) 51 - try: 52 - suffix = extract_descriptive_suffix(media_path.stem) 53 - except ValueError as exc: 54 - raise ValueError( 55 - f"Invalid video filename: {media_path.stem} (must be HHMMSS_LEN format)" 56 - ) from exc 57 - return segment, suffix 58 40 59 41 60 42 def _discover_categories() -> dict[str, dict]: ··· 357 339 358 340 def _move_to_segment(self, media_path: Path) -> Path: 359 341 """Move media file to its segment and return new path.""" 360 - segment, suffix = _segment_and_suffix(media_path) 342 + segment, suffix = segment_and_suffix(media_path) 361 343 segment_dir = media_path.parent / segment 362 344 try: 363 345 segment_dir.mkdir(exist_ok=True) ··· 771 753 if not args.frames_only: 772 754 # Extract segment and suffix for output naming 773 755 try: 774 - segment, suffix = _segment_and_suffix(video_path) 756 + segment, suffix = segment_and_suffix(video_path) 775 757 except ValueError as exc: 776 758 parser.error(str(exc)) 759 + 760 + # Set segment key for token usage logging 761 + os.environ["SEGMENT_KEY"] = segment 762 + 777 763 segment_dir = video_path.parent / segment 778 764 segment_dir.mkdir(exist_ok=True) 779 765 # Output JSONL matches input filename pattern (e.g., center_DP-3_screen.jsonl)

+23 -27

observe/transcribe.py

··· 19 19 20 20 from observe.diarize import DiarizationError, diarize, save_speaker_embeddings 21 21 from observe.hear import SAMPLE_RATE 22 - from observe.utils import extract_descriptive_suffix 22 + from observe.utils import ( 23 + extract_descriptive_suffix, 24 + get_segment_key, 25 + segment_and_suffix, 26 + ) 23 27 from think.callosum import callosum_send 24 28 from think.entities import load_entity_names 25 29 from think.models import GEMINI_FLASH 26 - from think.utils import ( 27 - PromptNotFoundError, 28 - load_prompt, 29 - segment_key, 30 - setup_cli, 31 - ) 30 + from think.utils import PromptNotFoundError, load_prompt, setup_cli 32 31 33 32 # Constants 34 33 MODEL = GEMINI_FLASH ··· 160 159 161 160 def _segment_info(self, audio_path: Path) -> tuple[Path, str, bool]: 162 161 """Return segment directory, descriptive suffix, and whether already in segment.""" 163 - parent_segment = segment_key(audio_path.parent.name) 164 - if parent_segment: 165 - return audio_path.parent, audio_path.stem, True 166 - 167 - segment = segment_key(audio_path.stem) 168 - if segment is None: 169 - raise ValueError(f"Invalid audio filename: {audio_path.stem}") 170 - segment_dir = audio_path.parent / segment 171 - suffix = extract_descriptive_suffix(audio_path.stem) 172 - return segment_dir, suffix, False 162 + segment, suffix = segment_and_suffix(audio_path) 163 + in_segment = get_segment_key(audio_path.parent) is not None 164 + if in_segment: 165 + return audio_path.parent, suffix, True 166 + return audio_path.parent / segment, suffix, False 173 167 174 168 def _move_to_segment(self, audio_path: Path) -> Path: 175 169 """Move audio file to its segment and return new path.""" ··· 279 273 data = data.mean(axis=1) 280 274 281 275 # Extract date and time based on path structure 282 - # Day root: YYYYMMDD/HHMMSS_LEN_audio.flac -> parent=YYYYMMDD, stem=HHMMSS_... 283 - # Segment dir: YYYYMMDD/HHMMSS_LEN/audio.flac -> grandparent=YYYYMMDD, parent=HHMMSS_LEN 284 - parent_segment = segment_key(raw_path.parent.name) 285 - if parent_segment: 286 - # Segment dir: extract from grandparent and parent 276 + segment = get_segment_key(raw_path) 277 + time_part = ( 278 + segment.split("_")[0] if segment else raw_path.stem.split("_")[0] 279 + ) 280 + # Day dir is parent or grandparent depending on whether file is in segment 281 + if get_segment_key(raw_path.parent) is not None: 287 282 day_str = raw_path.parent.parent.name 288 - time_part = raw_path.parent.name.split("_")[0] 289 283 else: 290 - # Day root: extract from parent and filename 291 284 day_str = raw_path.parent.name 292 - time_part = raw_path.stem.split("_")[0] 293 285 294 286 base_dt = datetime.datetime.strptime( 295 287 f"{day_str}_{time_part}", "%Y%m%d_%H%M%S" ··· 488 480 """ 489 481 start_time = time.time() 490 482 483 + # Set segment key for token usage logging 484 + segment = get_segment_key(raw_path) 485 + if segment: 486 + os.environ["SEGMENT_KEY"] = segment 487 + 491 488 # Skip if already processed (unless redo mode) 492 489 json_path = self._get_json_path(raw_path) 493 490 if not redo and json_path.exists(): ··· 585 582 586 583 # Validate --redo requires file to be in segment directory 587 584 if args.redo: 588 - parent_segment = segment_key(audio_path.parent.name) 589 - if not parent_segment: 585 + if get_segment_key(audio_path.parent) is None: 590 586 parser.error( 591 587 f"--redo requires audio file to be in a segment directory (HHMMSS_LEN/), " 592 588 f"but parent is: {audio_path.parent.name}"

+85

observe/utils.py

··· 60 60 return "raw" 61 61 62 62 63 + def get_segment_key(media_path: Path) -> str | None: 64 + """ 65 + Extract segment key from a media file path. 66 + 67 + Checks parent directory first (for files already in segment dirs), 68 + then falls back to filename stem (for files in day root). 69 + 70 + Parameters 71 + ---------- 72 + media_path : Path 73 + Path to media file (audio or video) 74 + 75 + Returns 76 + ------- 77 + str or None 78 + Segment key in HHMMSS_LEN format, or None if not found 79 + 80 + Examples 81 + -------- 82 + >>> get_segment_key(Path("/journal/20250101/143022_300/audio.flac")) 83 + "143022_300" 84 + >>> get_segment_key(Path("/journal/20250101/143022_300_audio.flac")) 85 + "143022_300" 86 + >>> get_segment_key(Path("/journal/20250101/random.txt")) 87 + None 88 + """ 89 + from think.utils import segment_key 90 + 91 + # Check if parent directory is a segment (file already moved) 92 + parent_segment = segment_key(media_path.parent.name) 93 + if parent_segment: 94 + return parent_segment 95 + 96 + # Check if filename contains segment (file in day root) 97 + return segment_key(media_path.stem) 98 + 99 + 100 + def segment_and_suffix(media_path: Path) -> tuple[str, str]: 101 + """ 102 + Extract segment key and descriptive suffix from a media file path. 103 + 104 + Handles both files in day root (YYYYMMDD/HHMMSS_LEN_suffix.ext) and 105 + files already in segment directories (YYYYMMDD/HHMMSS_LEN/suffix.ext). 106 + 107 + Parameters 108 + ---------- 109 + media_path : Path 110 + Path to media file (audio or video) 111 + 112 + Returns 113 + ------- 114 + tuple[str, str] 115 + (segment_key, suffix) - e.g., ("143022_300", "audio") 116 + 117 + Raises 118 + ------ 119 + ValueError 120 + If the path doesn't contain a valid segment key 121 + 122 + Examples 123 + -------- 124 + >>> segment_and_suffix(Path("/journal/20250101/143022_300_audio.flac")) 125 + ("143022_300", "audio") 126 + >>> segment_and_suffix(Path("/journal/20250101/143022_300/audio.flac")) 127 + ("143022_300", "audio") 128 + """ 129 + from think.utils import segment_key 130 + 131 + # Check if parent directory is a segment (file already moved) 132 + parent_segment = segment_key(media_path.parent.name) 133 + if parent_segment: 134 + # File is in segment dir - stem is the suffix 135 + return parent_segment, media_path.stem 136 + 137 + # File is in day root - extract segment from filename 138 + segment = segment_key(media_path.stem) 139 + if segment is None: 140 + raise ValueError( 141 + f"Invalid media filename: {media_path.stem} (must contain HHMMSS_LEN)" 142 + ) 143 + 144 + suffix = extract_descriptive_suffix(media_path.stem) 145 + return segment, suffix 146 + 147 + 63 148 def parse_screen_filename(filename: str) -> tuple[str, str]: 64 149 """ 65 150 Parse position and connector from a per-monitor screen filename.

+4

think/insight.py

··· 318 318 ) 319 319 args = setup_cli(parser) 320 320 321 + # Set segment key for token usage logging 322 + if args.segment: 323 + os.environ["SEGMENT_KEY"] = args.segment 324 + 321 325 # Resolve insight key or path to metadata 322 326 all_insights = get_insights() 323 327 topic_arg = args.topic

+5

think/models.py

··· 267 267 "usage": normalized_usage, 268 268 } 269 269 270 + # Add segment from env if available (set by observe/transcribe, observe/describe, think/insight) 271 + segment = os.getenv("SEGMENT_KEY") 272 + if segment: 273 + token_data["segment"] = segment 274 + 270 275 # Save to journal/tokens/<YYYYMMDD>.jsonl (one file per day) 271 276 tokens_dir = Path(journal) / "tokens" 272 277 tokens_dir.mkdir(exist_ok=True)

+3 -2

think/supervisor.py

··· 993 993 994 994 # Spawn agents configured for segment schedule 995 995 agents = get_agents() 996 - for persona_id, config in agents.items(): 997 - if config.get("schedule") == "segment": 996 + for persona_id, agent_config in agents.items(): 997 + if agent_config.get("schedule") == "segment": 998 998 try: 999 999 cortex_request( 1000 1000 prompt=f"Processing segment {segment} from {day}. Use available tools to analyze this specific recording window.", 1001 1001 persona=persona_id, 1002 + config={"env": {"SEGMENT_KEY": segment}}, 1002 1003 ) 1003 1004 logging.info(f"Spawned segment agent: {persona_id}") 1004 1005 except Exception as e:

Configure Feed

Configure Feed