Fix tilde expansion in sol import and add text dry-run segmentation

Expand ~ in media path with os.path.expanduser() to fix FileNotFoundError
when paths are shell-quoted. Add LLM segmentation preview to text/PDF
dry-run output, paralleling audio's segment/key display. Update help
text to reflect audio/text (not video) support.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Jer Miller 2 months ago a2df8e7d 8433aca1

+46 -2

1 changed file

expand all

think

importers

cli.py

+46 -2

think/importers/cli.py

··· 161 161 global _callosum, _message_queue, _import_id, _current_stage, _start_time 162 162 global _stage_start_time, _stages_run, _status_thread, _status_running 163 163 164 - parser = argparse.ArgumentParser(description="Chunk a media file into the journal") 165 - parser.add_argument("media", nargs="?", help="Path to video or audio file") 164 + parser = argparse.ArgumentParser(description="Import a media file into the journal") 165 + parser.add_argument("media", nargs="?", help="Path to audio or text file") 166 166 parser.add_argument( 167 167 "--timestamp", help="Timestamp YYYYMMDD_HHMMSS for journal entry" 168 168 ) ··· 237 237 238 238 if not args.media: 239 239 parser.error("the following arguments are required: media") 240 + 241 + args.media = os.path.expanduser(args.media) 240 242 241 243 # Track detection result for metadata 242 244 detection_result = None ··· 306 308 307 309 ext = os.path.splitext(args.media)[1].lower() 308 310 if ext in {".txt", ".md", ".pdf"}: 311 + from think.detect_transcript import detect_transcript_segment 312 + from think.importers.text import _time_to_seconds 313 + 309 314 text = _read_transcript(args.media) 310 315 chars = len(text) 311 316 lines = text.count("\n") + (1 if text and not text.endswith("\n") else 0) 312 317 print() 313 318 print(f" Content: {chars:,} characters, {lines:,} lines") 319 + 320 + # Run segmentation to preview what would be created 321 + start_time = base_dt.strftime("%H:%M:%S") 322 + if args.verbose: 323 + print() 324 + print(" Segmenting transcript...") 325 + segments = detect_transcript_segment(text, start_time) 326 + if segments: 327 + keys = [] 328 + for idx, (start_at, seg_text) in enumerate(segments): 329 + time_part = start_at.replace(":", "") 330 + start_seconds = _time_to_seconds(start_at) 331 + if idx + 1 < len(segments): 332 + next_start_at, _ = segments[idx + 1] 333 + duration = _time_to_seconds(next_start_at) - start_seconds 334 + else: 335 + duration = 5 336 + duration = max(1, duration) 337 + key = f"{time_part}_{duration}" 338 + keys.append(key) 339 + 340 + print(f" Segments: {len(segments)}") 341 + print(f" Keys: {', '.join(keys)}") 342 + 343 + if args.verbose: 344 + print() 345 + for idx, (key, (start_at, seg_text)) in enumerate( 346 + zip(keys, segments), 1 347 + ): 348 + seg_lines = seg_text.count("\n") + ( 349 + 1 if seg_text and not seg_text.endswith("\n") else 0 350 + ) 351 + print( 352 + f" Segment {idx}: {key} " 353 + f"({seg_lines:,} lines, {len(seg_text):,} chars)" 354 + ) 355 + else: 356 + print() 357 + print(" Segments: segmentation failed") 314 358 else: 315 359 duration = _get_audio_duration(args.media) 316 360 if duration is not None:

Configure Feed

Configure Feed