Add sol call transcripts CLI and remove sol cluster

+2

apps/transcripts/__init__.py

+137

apps/transcripts/call.py

··· 1 + # SPDX-License-Identifier: AGPL-3.0-only 2 + # Copyright (c) 2026 sol pbc 3 + 4 + """CLI commands for transcript browsing. 5 + 6 + Provides human-friendly CLI access to transcript operations, paralleling the 7 + MCP tools in ``think/resources/transcripts.py`` but optimized for terminal use. 8 + 9 + Auto-discovered by ``think.call`` and mounted as ``sol call transcripts ...``. 10 + """ 11 + 12 + import typer 13 + 14 + from think.cluster import ( 15 + cluster, 16 + cluster_period, 17 + cluster_range, 18 + cluster_scan, 19 + cluster_segments, 20 + ) 21 + from think.utils import day_dirs 22 + 23 + app = typer.Typer(help="Transcript browsing.") 24 + 25 + 26 + @app.command("scan") 27 + def scan(day: str = typer.Argument(help="Day (YYYYMMDD).")) -> None: 28 + """List transcript coverage ranges for a day.""" 29 + audio_ranges, screen_ranges = cluster_scan(day) 30 + 31 + typer.echo("Audio:") 32 + if audio_ranges: 33 + for start, end in audio_ranges: 34 + typer.echo(f" {start} - {end}") 35 + else: 36 + typer.echo(" (none)") 37 + 38 + typer.echo("Screen:") 39 + if screen_ranges: 40 + for start, end in screen_ranges: 41 + typer.echo(f" {start} - {end}") 42 + else: 43 + typer.echo(" (none)") 44 + 45 + 46 + @app.command("segments") 47 + def segments(day: str = typer.Argument(help="Day (YYYYMMDD).")) -> None: 48 + """List recording segments for a day.""" 49 + segment_list = cluster_segments(day) 50 + if not segment_list: 51 + typer.echo("No segments.") 52 + return 53 + 54 + for segment in segment_list: 55 + key = segment.get("key", "") 56 + start = segment.get("start", "") 57 + end = segment.get("end", "") 58 + types = ", ".join(segment.get("types", [])) 59 + typer.echo(f"{key} {start} - {end} [{types}]") 60 + 61 + 62 + @app.command("read") 63 + def read( 64 + day: str = typer.Argument(help="Day (YYYYMMDD)."), 65 + start: str | None = typer.Option(None, "--start", help="Start time (HHMMSS)."), 66 + length: int | None = typer.Option(None, "--length", help="Length in minutes."), 67 + segment: str | None = typer.Option( 68 + None, "--segment", help="Segment key (HHMMSS_LEN)." 69 + ), 70 + full: bool = typer.Option( 71 + False, "--full", help="Include audio, screen, and agents." 72 + ), 73 + raw: bool = typer.Option(False, "--raw", help="Include audio and screen only."), 74 + audio: bool = typer.Option(False, "--audio", help="Include audio transcripts."), 75 + screen: bool = typer.Option(False, "--screen", help="Include screen transcripts."), 76 + agents: bool = typer.Option(False, "--agents", help="Include agent outputs."), 77 + ) -> None: 78 + """Read transcript content for a day, segment, or time range.""" 79 + if full and raw: 80 + typer.echo("Error: Cannot use --full and --raw together.", err=True) 81 + raise typer.Exit(1) 82 + 83 + if (full or raw) and (audio or screen or agents): 84 + typer.echo( 85 + "Error: Cannot mix --full/--raw with individual source flags.", err=True 86 + ) 87 + raise typer.Exit(1) 88 + 89 + if full: 90 + sources: dict[str, bool] = {"audio": True, "screen": True, "agents": True} 91 + elif raw: 92 + sources = {"audio": True, "screen": True, "agents": False} 93 + elif audio or screen or agents: 94 + sources = {"audio": audio, "screen": screen, "agents": agents} 95 + else: 96 + sources = {"audio": True, "screen": False, "agents": True} 97 + 98 + if segment and (start or length is not None): 99 + typer.echo("Error: Cannot mix --segment with --start/--length.", err=True) 100 + raise typer.Exit(1) 101 + 102 + if (start is not None) != (length is not None): 103 + typer.echo("Error: --start and --length must be used together.", err=True) 104 + raise typer.Exit(1) 105 + 106 + if start is not None and length is not None: 107 + from datetime import datetime, timedelta 108 + 109 + start_dt = datetime.strptime(start, "%H%M%S") 110 + end_dt = start_dt + timedelta(minutes=length) 111 + markdown = cluster_range(day, start, end_dt.strftime("%H%M%S"), sources) 112 + elif segment is not None: 113 + markdown, _counts = cluster_period(day, segment, sources) 114 + else: 115 + markdown, _counts = cluster(day, sources) 116 + 117 + typer.echo(markdown) 118 + 119 + 120 + @app.command("stats") 121 + def stats(month: str = typer.Argument(help="Month (YYYYMM).")) -> None: 122 + """Show daily transcript coverage counts for a month.""" 123 + days = sorted(day for day in day_dirs().keys() if day.startswith(month)) 124 + 125 + days_with_data = 0 126 + for day in days: 127 + audio_ranges, screen_ranges = cluster_scan(day) 128 + if audio_ranges or screen_ranges: 129 + days_with_data += 1 130 + typer.echo(f"{day} audio:{len(audio_ranges)} screen:{len(screen_ranges)}") 131 + 132 + if not days_with_data: 133 + typer.echo(f"No data for {month}.") 134 + return 135 + 136 + typer.echo("") 137 + typer.echo(f"Total: {days_with_data} days with data")

+2

apps/transcripts/tests/__init__.py

+14

apps/transcripts/tests/conftest.py

··· 1 + # SPDX-License-Identifier: AGPL-3.0-only 2 + # Copyright (c) 2026 sol pbc 3 + 4 + """Fixtures for transcripts app tests.""" 5 + 6 + import os 7 + 8 + import pytest 9 + 10 + 11 + @pytest.fixture(autouse=True) 12 + def _journal_env(monkeypatch): 13 + """Point JOURNAL_PATH at the test fixtures.""" 14 + monkeypatch.setenv("JOURNAL_PATH", os.path.join(os.getcwd(), "fixtures", "journal"))

+105

apps/transcripts/tests/test_call.py

··· 1 + # SPDX-License-Identifier: AGPL-3.0-only 2 + # Copyright (c) 2026 sol pbc 3 + 4 + """Tests for transcripts CLI commands (sol call transcripts ...).""" 5 + 6 + from typer.testing import CliRunner 7 + 8 + from think.call import call_app 9 + 10 + runner = CliRunner() 11 + 12 + 13 + class TestScan: 14 + def test_scan_day(self): 15 + result = runner.invoke(call_app, ["transcripts", "scan", "20240101"]) 16 + assert result.exit_code == 0 17 + assert "Audio:" in result.output 18 + assert "Screen:" in result.output 19 + 20 + def test_scan_empty_day(self): 21 + result = runner.invoke(call_app, ["transcripts", "scan", "20990101"]) 22 + assert result.exit_code == 0 23 + assert "(none)" in result.output 24 + 25 + 26 + class TestSegments: 27 + def test_segments_day(self): 28 + result = runner.invoke(call_app, ["transcripts", "segments", "20240101"]) 29 + assert result.exit_code == 0 30 + assert "123456_300" in result.output 31 + 32 + def test_segments_empty(self): 33 + result = runner.invoke(call_app, ["transcripts", "segments", "20990101"]) 34 + assert result.exit_code == 0 35 + assert "No segments" in result.output 36 + 37 + 38 + class TestRead: 39 + def test_read_default(self): 40 + result = runner.invoke(call_app, ["transcripts", "read", "20240101"]) 41 + assert result.exit_code == 0 42 + assert "## " in result.output 43 + 44 + def test_read_full(self): 45 + result = runner.invoke(call_app, ["transcripts", "read", "20240101", "--full"]) 46 + assert result.exit_code == 0 47 + 48 + def test_read_raw(self): 49 + result = runner.invoke(call_app, ["transcripts", "read", "20240101", "--raw"]) 50 + assert result.exit_code == 0 51 + 52 + def test_read_segment(self): 53 + result = runner.invoke( 54 + call_app, ["transcripts", "read", "20240101", "--segment", "123456_300"] 55 + ) 56 + assert result.exit_code == 0 57 + 58 + def test_read_range(self): 59 + result = runner.invoke( 60 + call_app, 61 + ["transcripts", "read", "20240101", "--start", "123456", "--length", "5"], 62 + ) 63 + assert result.exit_code == 0 64 + 65 + def test_read_full_and_raw_error(self): 66 + result = runner.invoke( 67 + call_app, ["transcripts", "read", "20240101", "--full", "--raw"] 68 + ) 69 + assert result.exit_code == 1 70 + assert "Cannot use --full and --raw" in result.output 71 + 72 + def test_read_start_without_length(self): 73 + result = runner.invoke( 74 + call_app, ["transcripts", "read", "20240101", "--start", "123456"] 75 + ) 76 + assert result.exit_code == 1 77 + assert "--start and --length must be used together" in result.output 78 + 79 + def test_read_segment_with_start(self): 80 + result = runner.invoke( 81 + call_app, 82 + [ 83 + "transcripts", 84 + "read", 85 + "20240101", 86 + "--segment", 87 + "123456_300", 88 + "--start", 89 + "123456", 90 + ], 91 + ) 92 + assert result.exit_code == 1 93 + 94 + 95 + class TestStats: 96 + def test_stats_month(self): 97 + result = runner.invoke(call_app, ["transcripts", "stats", "202401"]) 98 + assert result.exit_code == 0 99 + assert "20240101" in result.output 100 + assert "Total: 1 days with data" in result.output 101 + 102 + def test_stats_empty(self): 103 + result = runner.invoke(call_app, ["transcripts", "stats", "209901"]) 104 + assert result.exit_code == 0 105 + assert "No data" in result.output

+3 -4

docs/THINK.md

··· 14 14 15 15 The package exposes several commands: 16 16 17 - - `sol cluster` groups audio and screen JSON files into report sections. Use `--start` and 18 - `--length` to limit the report to a specific time range. 17 + - `sol call transcripts read` groups audio and screen transcripts into report sections. Use `--start` and 18 + `--length` to limit the report to a specific time range. See `sol call transcripts --help` for additional commands. 19 19 - `sol dream` runs generators and agents for a single day via Cortex. 20 20 - `sol agents` is the unified CLI for tool agents and generators (spawned by Cortex, NDJSON protocol). 21 21 - `sol supervisor` monitors observation heartbeats. Use `--no-observers` to disable local capture (sense still runs for remote uploads and imports). ··· 24 24 - `sol muse` lists available agents and generators with their configuration. Use `sol muse <name>` to see details, and `sol muse <name> --prompt` to see the fully composed prompt that would be sent to the LLM. 25 25 26 26 ```bash 27 - sol cluster YYYYMMDD [--start HHMMSS --length MINUTES] 27 + sol call transcripts read YYYYMMDD [--start HHMMSS --length MINUTES] 28 28 sol dream [--day YYYYMMDD] [--segment HHMMSS_LEN] [--force] [--run NAME] 29 29 sol supervisor [--no-observers] 30 30 sol mcp [--transport http] [--port PORT] [--path PATH] ··· 271 271 - [CORTEX.md](CORTEX.md) - Full API, event schemas, request format 272 272 - [CALLOSUM.md](CALLOSUM.md) - Message bus protocol 273 273 - [THINK.md](THINK.md) - Cortex usage examples 274 -

-2

sol.py

··· 39 39 COMMANDS: dict[str, str] = { 40 40 # think package - daily processing and analysis 41 41 "import": "think.importer", 42 - "cluster": "think.cluster", 43 42 "dream": "think.dream", 44 43 "planner": "think.planner", 45 44 "indexer": "think.indexer", ··· 90 89 GROUPS: dict[str, list[str]] = { 91 90 "Think (daily processing)": [ 92 91 "import", 93 - "cluster", 94 92 "dream", 95 93 "planner", 96 94 "indexer",

+11 -11

tests/test_cluster_full.py

··· 40 40 assert "### audio summary" in md 41 41 42 42 43 - def test_cluster_cli(tmp_path, monkeypatch, capsys): 43 + def test_cluster_default_sources(tmp_path, monkeypatch): 44 44 mod = importlib.import_module("think.cluster") 45 45 copy_day(tmp_path) 46 46 monkeypatch.setenv("JOURNAL_PATH", str(tmp_path)) 47 - monkeypatch.setattr("sys.argv", ["cluster", "20240101"]) 48 - mod.main() 49 - out = capsys.readouterr().out 47 + out, _counts = mod.cluster( 48 + "20240101", sources={"audio": True, "screen": False, "agents": True} 49 + ) 50 50 # Now uses insight format: "### {stem} summary" 51 51 assert "### screen summary" in out 52 52 53 53 54 - def test_cluster_cli_range(tmp_path, monkeypatch, capsys): 54 + def test_cluster_range_raw_screen(tmp_path, monkeypatch): 55 55 mod = importlib.import_module("think.cluster") 56 56 copy_day(tmp_path) 57 57 monkeypatch.setenv("JOURNAL_PATH", str(tmp_path)) 58 - monkeypatch.setattr( 59 - "sys.argv", 60 - ["cluster", "20240101", "--start", "123456", "--length", "1"], 58 + out = mod.cluster_range( 59 + "20240101", 60 + "123456", 61 + "123556", 62 + sources={"audio": True, "screen": True, "agents": False}, 61 63 ) 62 - mod.main() 63 - out = capsys.readouterr().out 64 - # CLI --start/--length uses raw screen data (screen=True) 64 + # Range mode with screen=True uses raw screen data. 65 65 assert "### Screen Activity" in out 66 66 assert "IDE with auth.py open" in out

+1 -48

think/cluster.py

··· 1 1 # SPDX-License-Identifier: AGPL-3.0-only 2 2 # Copyright (c) 2026 sol pbc 3 3 4 - import argparse 5 4 import os 6 5 import re 7 6 import sys ··· 12 11 13 12 from observe.screen import format_screen_text 14 13 15 - from .utils import day_path, setup_cli 14 + from .utils import day_path 16 15 17 16 18 17 def _date_str(day_dir: str) -> str: ··· 647 646 ] 648 647 groups = _group_entries(entries) 649 648 return _groups_to_markdown(groups) 650 - 651 - 652 - def main(): 653 - parser = argparse.ArgumentParser( 654 - description="Generate a Markdown report for a day's JSON files grouped by recording segments." 655 - ) 656 - parser.add_argument( 657 - "day", 658 - help="Day in YYYYMMDD format", 659 - ) 660 - parser.add_argument( 661 - "--start", 662 - metavar="HHMMSS", 663 - help="Start time for range (HHMMSS)", 664 - ) 665 - parser.add_argument( 666 - "--length", 667 - type=int, 668 - help="Length of range in minutes", 669 - ) 670 - 671 - args = setup_cli(parser) 672 - 673 - if args.start and args.length is not None: 674 - start_dt = datetime.strptime(args.start, "%H%M%S") 675 - end_dt = start_dt + timedelta(minutes=args.length) 676 - # CLI range view: show raw data (audio + screen, no summaries) 677 - markdown = cluster_range( 678 - args.day, 679 - args.start, 680 - end_dt.strftime("%H%M%S"), 681 - sources={"audio": True, "screen": True, "agents": False}, 682 - ) 683 - print(markdown) 684 - elif args.start or args.length is not None: 685 - parser.error("--start and --length must be used together") 686 - else: 687 - # CLI default: show audio + agent summaries (daily view) 688 - markdown, _counts = cluster( 689 - args.day, sources={"audio": True, "screen": False, "agents": True} 690 - ) 691 - print(markdown) 692 - 693 - 694 - if __name__ == "__main__": 695 - main()

Configure Feed

Configure Feed