···2727- `sol supervisor` - Run supervisor services.
2828- `sol detect-created` - Detect newly created content artifacts.
2929- `sol top` - Show runtime/service activity status.
3030-- `sol logs` - View service health logs.
3030+- `sol health` - Show service health status. Use `sol health logs` to view service logs.
3131- `sol callosum` - Interact with Callosum message bus tooling.
3232- `sol streams` - Manage or inspect stream-related state.
3333- `sol journal-stats` - Show journal statistics.
···11+# SPDX-License-Identifier: AGPL-3.0-only
22+# Copyright (c) 2026 sol pbc
33+44+"""CLI for service health status and logs.
55+66+Usage:
77+ sol health Show current service health status
88+ sol health logs View service health logs
99+"""
1010+1111+from __future__ import annotations
1212+1313+import argparse
1414+import sys
1515+import threading
1616+from datetime import timedelta
1717+from pathlib import Path
1818+from typing import Any
1919+2020+from think.callosum import CallosumConnection
2121+from think.utils import get_journal, setup_cli
2222+2323+STATUS_TIMEOUT = 10
2424+2525+2626+def format_uptime(seconds: int) -> str:
2727+ """Format uptime in human-readable format."""
2828+ if seconds < 60:
2929+ return f"{seconds}s"
3030+3131+ delta = timedelta(seconds=seconds)
3232+ parts = []
3333+ if delta.days:
3434+ parts.append(f"{delta.days}d")
3535+3636+ hours = delta.seconds // 3600
3737+ if hours:
3838+ parts.append(f"{hours}h")
3939+4040+ mins = (delta.seconds % 3600) // 60
4141+ if mins:
4242+ parts.append(f"{mins}m")
4343+4444+ return " ".join(parts)
4545+4646+4747+def print_status(status: dict[str, Any]) -> None:
4848+ """Print supervisor status in a human-readable format."""
4949+ print("Services:")
5050+ for service in status.get("services", []):
5151+ name = service.get("name", "?")
5252+ pid = service.get("pid", "?")
5353+ uptime_seconds = int(service.get("uptime_seconds", 0) or 0)
5454+ print(f" {name:16} pid {pid} uptime {format_uptime(uptime_seconds)}")
5555+5656+ crashed = status.get("crashed") or []
5757+ if crashed:
5858+ print()
5959+ print("Crashed:")
6060+ for service in crashed:
6161+ name = service.get("name", "?")
6262+ attempts = service.get("restart_attempts", 0)
6363+ print(f" {name:16} {attempts} restart attempts")
6464+6565+ print()
6666+ tasks = status.get("tasks") or []
6767+ queues = status.get("queues") or {}
6868+ non_zero_queues = [(name, count) for name, count in sorted(queues.items()) if count]
6969+7070+ if tasks:
7171+ print("Tasks:")
7272+ for task in tasks:
7373+ name = task.get("name", "?")
7474+ duration = task.get("duration_seconds", 0)
7575+ print(f" {name:16} {duration}s")
7676+ for name, count in non_zero_queues:
7777+ print(f" queued {name:9} {count}")
7878+ elif non_zero_queues:
7979+ print("Tasks:")
8080+ for name, count in non_zero_queues:
8181+ print(f" queued {name:9} {count}")
8282+ else:
8383+ print("Tasks: none")
8484+8585+ stale = status.get("stale_heartbeats") or []
8686+ if stale:
8787+ print()
8888+ print(f"Heartbeat: STALE ({', '.join(stale)})")
8989+ else:
9090+ print("Heartbeat: ok")
9191+9292+9393+def health_check() -> int:
9494+ """Request and print one-shot supervisor status."""
9595+ sock_path = Path(get_journal()) / "health" / "callosum.sock"
9696+ if not sock_path.exists():
9797+ print(
9898+ f"Cannot connect: callosum socket not found at {sock_path}",
9999+ file=sys.stderr,
100100+ )
101101+ return 1
102102+103103+ status_event = threading.Event()
104104+ status_holder: dict[str, dict[str, Any]] = {}
105105+106106+ def callback(msg: dict[str, Any]) -> None:
107107+ if msg.get("tract") == "supervisor" and msg.get("event") == "status":
108108+ status_holder["data"] = msg
109109+ status_event.set()
110110+111111+ conn = CallosumConnection(socket_path=sock_path)
112112+ conn.start(callback=callback)
113113+ try:
114114+ got_status = status_event.wait(timeout=STATUS_TIMEOUT)
115115+ finally:
116116+ conn.stop()
117117+118118+ if not got_status:
119119+ print(
120120+ f"Timed out waiting for supervisor status ({STATUS_TIMEOUT:g}s)",
121121+ file=sys.stderr,
122122+ )
123123+ return 1
124124+125125+ print_status(status_holder["data"])
126126+ return 0
127127+128128+129129+def main() -> None:
130130+ """Entry point for ``sol health``."""
131131+ args = sys.argv[1:]
132132+ if args and args[0] == "logs":
133133+ sys.argv = ["sol health logs"] + args[1:]
134134+ from think.logs_cli import main as logs_main
135135+136136+ logs_main()
137137+ return
138138+139139+ parser = argparse.ArgumentParser(
140140+ prog="sol health",
141141+ description=(
142142+ "Show service health status.\n\n"
143143+ "Subcommands:\n"
144144+ " logs View service health logs (sol health logs -h for details)"
145145+ ),
146146+ formatter_class=argparse.RawDescriptionHelpFormatter,
147147+ )
148148+ setup_cli(parser)
149149+ sys.exit(health_check())
150150+151151+152152+if __name__ == "__main__":
153153+ main()
+6-6
think/logs_cli.py
···44"""CLI for viewing service health logs.
5566Usage:
77- sol logs Show last 5 lines from each service
88- sol logs -c 20 Show last 20 lines from each service
99- sol logs -f Follow all logs for new output
1010- sol logs --since 30m Lines from last 30 minutes
1111- sol logs --service observer Only show observer logs
1212- sol logs --grep "error" Lines matching regex "error"
77+ sol health logs Show last 5 lines from each service
88+ sol health logs -c 20 Show last 20 lines from each service
99+ sol health logs -f Follow all logs for new output
1010+ sol health logs --since 30m Lines from last 30 minutes
1111+ sol health logs --service observer Only show observer logs
1212+ sol health logs --grep "error" Lines matching regex "error"
1313"""
14141515from __future__ import annotations