personal memory agent
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'hopper-dvw2lfip-sol-health-command'

+274 -9
+1 -1
muse/help.md
··· 27 27 - `sol supervisor` - Run supervisor services. 28 28 - `sol detect-created` - Detect newly created content artifacts. 29 29 - `sol top` - Show runtime/service activity status. 30 - - `sol logs` - View service health logs. 30 + - `sol health` - Show service health status. Use `sol health logs` to view service logs. 31 31 - `sol callosum` - Interact with Callosum message bus tooling. 32 32 - `sol streams` - Manage or inspect stream-related state. 33 33 - `sol journal-stats` - Show journal statistics.
+2 -2
sol.py
··· 46 46 "schedule": "think.scheduler", 47 47 "detect-created": "think.detect_created", 48 48 "top": "think.top", 49 - "logs": "think.logs_cli", 49 + "health": "think.health_cli", 50 50 "callosum": "think.callosum", 51 51 "notify": "think.notify_cli", 52 52 "streams": "think.streams", ··· 99 99 "supervisor", 100 100 "schedule", 101 101 "top", 102 - "logs", 102 + "health", 103 103 "callosum", 104 104 "notify", 105 105 ],
+112
tests/test_health_cli.py
··· 1 + # SPDX-License-Identifier: AGPL-3.0-only 2 + # Copyright (c) 2026 sol pbc 3 + 4 + from __future__ import annotations 5 + 6 + import sys 7 + from unittest.mock import patch 8 + 9 + import pytest 10 + 11 + from think.health_cli import health_check, main, print_status 12 + 13 + 14 + def test_health_check_no_socket(tmp_path, monkeypatch, capsys): 15 + monkeypatch.setenv("JOURNAL_PATH", str(tmp_path)) 16 + 17 + result = health_check() 18 + 19 + captured = capsys.readouterr() 20 + assert result == 1 21 + assert "callosum socket not found" in captured.err 22 + 23 + 24 + def test_health_check_prints_status(capsys): 25 + status = { 26 + "services": [ 27 + {"name": "supervisor", "pid": 1001, "uptime_seconds": 65}, 28 + {"name": "observer", "pid": 2002, "uptime_seconds": 5}, 29 + ], 30 + "crashed": [{"name": "sync", "restart_attempts": 2}], 31 + "tasks": [{"name": "dream", "duration_seconds": 12}], 32 + "queues": {"indexer": 3, "planner": 0}, 33 + "stale_heartbeats": [], 34 + } 35 + 36 + print_status(status) 37 + 38 + output = capsys.readouterr().out 39 + assert "Services:" in output 40 + assert "supervisor" in output 41 + assert "pid 1001" in output 42 + assert "observer" in output 43 + assert "Crashed:" in output 44 + assert "sync" in output 45 + assert "Tasks:" in output 46 + assert "dream" in output 47 + assert "queued indexer" in output 48 + assert "Heartbeat: ok" in output 49 + 50 + 51 + def test_health_check_timeout(tmp_path, monkeypatch, capsys): 52 + monkeypatch.setenv("JOURNAL_PATH", str(tmp_path)) 53 + sock = tmp_path / "health" / "callosum.sock" 54 + sock.parent.mkdir(parents=True) 55 + sock.touch() 56 + monkeypatch.setattr("think.health_cli.STATUS_TIMEOUT", 0.1) 57 + 58 + with patch("think.health_cli.CallosumConnection") as mock_conn_cls: 59 + mock_conn = mock_conn_cls.return_value 60 + mock_conn.start.return_value = None 61 + mock_conn.stop.return_value = None 62 + 63 + result = health_check() 64 + 65 + captured = capsys.readouterr() 66 + assert result == 1 67 + assert "Timed out waiting for supervisor status" in captured.err 68 + mock_conn.stop.assert_called_once() 69 + 70 + 71 + def test_health_check_receives_status(tmp_path, monkeypatch, capsys): 72 + monkeypatch.setenv("JOURNAL_PATH", str(tmp_path)) 73 + sock = tmp_path / "health" / "callosum.sock" 74 + sock.parent.mkdir(parents=True) 75 + sock.touch() 76 + 77 + with patch("think.health_cli.CallosumConnection") as mock_conn_cls: 78 + mock_conn = mock_conn_cls.return_value 79 + 80 + def _start(*, callback): 81 + callback( 82 + { 83 + "tract": "supervisor", 84 + "event": "status", 85 + "services": [ 86 + {"name": "supervisor", "pid": 111, "uptime_seconds": 120} 87 + ], 88 + "tasks": [], 89 + "queues": {}, 90 + "stale_heartbeats": [], 91 + } 92 + ) 93 + 94 + mock_conn.start.side_effect = _start 95 + mock_conn.stop.return_value = None 96 + 97 + result = health_check() 98 + 99 + captured = capsys.readouterr() 100 + assert result == 0 101 + assert "Services:" in captured.out 102 + assert "supervisor" in captured.out 103 + 104 + 105 + def test_main_routes_to_logs(monkeypatch): 106 + monkeypatch.setattr(sys, "argv", ["sol health", "logs", "--help"]) 107 + 108 + with patch("think.logs_cli.main", side_effect=SystemExit(0)) as mock_logs_main: 109 + with pytest.raises(SystemExit): 110 + main() 111 + 112 + mock_logs_main.assert_called_once()
+153
think/health_cli.py
··· 1 + # SPDX-License-Identifier: AGPL-3.0-only 2 + # Copyright (c) 2026 sol pbc 3 + 4 + """CLI for service health status and logs. 5 + 6 + Usage: 7 + sol health Show current service health status 8 + sol health logs View service health logs 9 + """ 10 + 11 + from __future__ import annotations 12 + 13 + import argparse 14 + import sys 15 + import threading 16 + from datetime import timedelta 17 + from pathlib import Path 18 + from typing import Any 19 + 20 + from think.callosum import CallosumConnection 21 + from think.utils import get_journal, setup_cli 22 + 23 + STATUS_TIMEOUT = 10 24 + 25 + 26 + def format_uptime(seconds: int) -> str: 27 + """Format uptime in human-readable format.""" 28 + if seconds < 60: 29 + return f"{seconds}s" 30 + 31 + delta = timedelta(seconds=seconds) 32 + parts = [] 33 + if delta.days: 34 + parts.append(f"{delta.days}d") 35 + 36 + hours = delta.seconds // 3600 37 + if hours: 38 + parts.append(f"{hours}h") 39 + 40 + mins = (delta.seconds % 3600) // 60 41 + if mins: 42 + parts.append(f"{mins}m") 43 + 44 + return " ".join(parts) 45 + 46 + 47 + def print_status(status: dict[str, Any]) -> None: 48 + """Print supervisor status in a human-readable format.""" 49 + print("Services:") 50 + for service in status.get("services", []): 51 + name = service.get("name", "?") 52 + pid = service.get("pid", "?") 53 + uptime_seconds = int(service.get("uptime_seconds", 0) or 0) 54 + print(f" {name:16} pid {pid} uptime {format_uptime(uptime_seconds)}") 55 + 56 + crashed = status.get("crashed") or [] 57 + if crashed: 58 + print() 59 + print("Crashed:") 60 + for service in crashed: 61 + name = service.get("name", "?") 62 + attempts = service.get("restart_attempts", 0) 63 + print(f" {name:16} {attempts} restart attempts") 64 + 65 + print() 66 + tasks = status.get("tasks") or [] 67 + queues = status.get("queues") or {} 68 + non_zero_queues = [(name, count) for name, count in sorted(queues.items()) if count] 69 + 70 + if tasks: 71 + print("Tasks:") 72 + for task in tasks: 73 + name = task.get("name", "?") 74 + duration = task.get("duration_seconds", 0) 75 + print(f" {name:16} {duration}s") 76 + for name, count in non_zero_queues: 77 + print(f" queued {name:9} {count}") 78 + elif non_zero_queues: 79 + print("Tasks:") 80 + for name, count in non_zero_queues: 81 + print(f" queued {name:9} {count}") 82 + else: 83 + print("Tasks: none") 84 + 85 + stale = status.get("stale_heartbeats") or [] 86 + if stale: 87 + print() 88 + print(f"Heartbeat: STALE ({', '.join(stale)})") 89 + else: 90 + print("Heartbeat: ok") 91 + 92 + 93 + def health_check() -> int: 94 + """Request and print one-shot supervisor status.""" 95 + sock_path = Path(get_journal()) / "health" / "callosum.sock" 96 + if not sock_path.exists(): 97 + print( 98 + f"Cannot connect: callosum socket not found at {sock_path}", 99 + file=sys.stderr, 100 + ) 101 + return 1 102 + 103 + status_event = threading.Event() 104 + status_holder: dict[str, dict[str, Any]] = {} 105 + 106 + def callback(msg: dict[str, Any]) -> None: 107 + if msg.get("tract") == "supervisor" and msg.get("event") == "status": 108 + status_holder["data"] = msg 109 + status_event.set() 110 + 111 + conn = CallosumConnection(socket_path=sock_path) 112 + conn.start(callback=callback) 113 + try: 114 + got_status = status_event.wait(timeout=STATUS_TIMEOUT) 115 + finally: 116 + conn.stop() 117 + 118 + if not got_status: 119 + print( 120 + f"Timed out waiting for supervisor status ({STATUS_TIMEOUT:g}s)", 121 + file=sys.stderr, 122 + ) 123 + return 1 124 + 125 + print_status(status_holder["data"]) 126 + return 0 127 + 128 + 129 + def main() -> None: 130 + """Entry point for ``sol health``.""" 131 + args = sys.argv[1:] 132 + if args and args[0] == "logs": 133 + sys.argv = ["sol health logs"] + args[1:] 134 + from think.logs_cli import main as logs_main 135 + 136 + logs_main() 137 + return 138 + 139 + parser = argparse.ArgumentParser( 140 + prog="sol health", 141 + description=( 142 + "Show service health status.\n\n" 143 + "Subcommands:\n" 144 + " logs View service health logs (sol health logs -h for details)" 145 + ), 146 + formatter_class=argparse.RawDescriptionHelpFormatter, 147 + ) 148 + setup_cli(parser) 149 + sys.exit(health_check()) 150 + 151 + 152 + if __name__ == "__main__": 153 + main()
+6 -6
think/logs_cli.py
··· 4 4 """CLI for viewing service health logs. 5 5 6 6 Usage: 7 - sol logs Show last 5 lines from each service 8 - sol logs -c 20 Show last 20 lines from each service 9 - sol logs -f Follow all logs for new output 10 - sol logs --since 30m Lines from last 30 minutes 11 - sol logs --service observer Only show observer logs 12 - sol logs --grep "error" Lines matching regex "error" 7 + sol health logs Show last 5 lines from each service 8 + sol health logs -c 20 Show last 20 lines from each service 9 + sol health logs -f Follow all logs for new output 10 + sol health logs --since 30m Lines from last 30 minutes 11 + sol health logs --service observer Only show observer logs 12 + sol health logs --grep "error" Lines matching regex "error" 13 13 """ 14 14 15 15 from __future__ import annotations