A loose federation of distributed, typed datasets
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

feat(cli): add atdata CLI with local infrastructure and diagnose commands

Implement CLI entry point providing:
- `atdata local up/down/status` - Docker-based Redis + MinIO management
- `atdata diagnose` - Redis health check (persistence, memory policy, connectivity)
- `atdata version` - Version display

The local infrastructure uses docker-compose with preconfigured settings
for atdata development (AOF persistence, noeviction policy).

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

+658
+213
src/atdata/cli/__init__.py
··· 1 + """Command-line interface for atdata. 2 + 3 + This module provides CLI commands for managing local development infrastructure 4 + and diagnosing configuration issues. 5 + 6 + Commands: 7 + atdata local up Start Redis and MinIO containers for local development 8 + atdata local down Stop local development containers 9 + atdata diagnose Check Redis configuration and connectivity 10 + atdata version Show version information 11 + 12 + Example: 13 + $ atdata local up 14 + Starting Redis on port 6379... 15 + Starting MinIO on port 9000... 16 + Local infrastructure ready. 17 + 18 + $ atdata diagnose 19 + Checking Redis configuration... 20 + ✓ Redis connected 21 + ✓ Persistence enabled (AOF) 22 + ✓ Memory policy: noeviction 23 + """ 24 + 25 + import argparse 26 + import sys 27 + from typing import Sequence 28 + 29 + 30 + def main(argv: Sequence[str] | None = None) -> int: 31 + """Main entry point for the atdata CLI. 32 + 33 + Args: 34 + argv: Command-line arguments. If None, uses sys.argv[1:]. 35 + 36 + Returns: 37 + Exit code (0 for success, non-zero for errors). 38 + """ 39 + parser = argparse.ArgumentParser( 40 + prog="atdata", 41 + description="A loose federation of distributed, typed datasets", 42 + formatter_class=argparse.RawDescriptionHelpFormatter, 43 + ) 44 + parser.add_argument( 45 + "--version", "-v", 46 + action="store_true", 47 + help="Show version information", 48 + ) 49 + 50 + subparsers = parser.add_subparsers(dest="command", help="Available commands") 51 + 52 + # 'local' command group 53 + local_parser = subparsers.add_parser( 54 + "local", 55 + help="Manage local development infrastructure", 56 + ) 57 + local_subparsers = local_parser.add_subparsers( 58 + dest="local_command", 59 + help="Local infrastructure commands", 60 + ) 61 + 62 + # 'local up' command 63 + up_parser = local_subparsers.add_parser( 64 + "up", 65 + help="Start Redis and MinIO containers", 66 + ) 67 + up_parser.add_argument( 68 + "--redis-port", 69 + type=int, 70 + default=6379, 71 + help="Redis port (default: 6379)", 72 + ) 73 + up_parser.add_argument( 74 + "--minio-port", 75 + type=int, 76 + default=9000, 77 + help="MinIO API port (default: 9000)", 78 + ) 79 + up_parser.add_argument( 80 + "--minio-console-port", 81 + type=int, 82 + default=9001, 83 + help="MinIO console port (default: 9001)", 84 + ) 85 + up_parser.add_argument( 86 + "--detach", "-d", 87 + action="store_true", 88 + default=True, 89 + help="Run containers in detached mode (default: True)", 90 + ) 91 + 92 + # 'local down' command 93 + down_parser = local_subparsers.add_parser( 94 + "down", 95 + help="Stop local development containers", 96 + ) 97 + down_parser.add_argument( 98 + "--volumes", "-v", 99 + action="store_true", 100 + help="Also remove volumes (deletes all data)", 101 + ) 102 + 103 + # 'local status' command 104 + local_subparsers.add_parser( 105 + "status", 106 + help="Show status of local infrastructure", 107 + ) 108 + 109 + # 'diagnose' command 110 + diagnose_parser = subparsers.add_parser( 111 + "diagnose", 112 + help="Diagnose Redis configuration and connectivity", 113 + ) 114 + diagnose_parser.add_argument( 115 + "--host", 116 + default="localhost", 117 + help="Redis host (default: localhost)", 118 + ) 119 + diagnose_parser.add_argument( 120 + "--port", 121 + type=int, 122 + default=6379, 123 + help="Redis port (default: 6379)", 124 + ) 125 + 126 + # 'version' command (alternative to --version flag) 127 + subparsers.add_parser( 128 + "version", 129 + help="Show version information", 130 + ) 131 + 132 + args = parser.parse_args(argv) 133 + 134 + # Handle --version flag 135 + if args.version or args.command == "version": 136 + return _cmd_version() 137 + 138 + # Handle 'local' commands 139 + if args.command == "local": 140 + if args.local_command == "up": 141 + return _cmd_local_up( 142 + redis_port=args.redis_port, 143 + minio_port=args.minio_port, 144 + minio_console_port=args.minio_console_port, 145 + detach=args.detach, 146 + ) 147 + elif args.local_command == "down": 148 + return _cmd_local_down(remove_volumes=args.volumes) 149 + elif args.local_command == "status": 150 + return _cmd_local_status() 151 + else: 152 + local_parser.print_help() 153 + return 1 154 + 155 + # Handle 'diagnose' command 156 + if args.command == "diagnose": 157 + return _cmd_diagnose(host=args.host, port=args.port) 158 + 159 + # No command given 160 + parser.print_help() 161 + return 0 162 + 163 + 164 + def _cmd_version() -> int: 165 + """Show version information.""" 166 + try: 167 + from atdata import __version__ 168 + version = __version__ 169 + except ImportError: 170 + # Fallback to package metadata 171 + from importlib.metadata import version as pkg_version 172 + version = pkg_version("atdata") 173 + 174 + print(f"atdata {version}") 175 + return 0 176 + 177 + 178 + def _cmd_local_up( 179 + redis_port: int, 180 + minio_port: int, 181 + minio_console_port: int, 182 + detach: bool, 183 + ) -> int: 184 + """Start local development infrastructure.""" 185 + from .local import local_up 186 + return local_up( 187 + redis_port=redis_port, 188 + minio_port=minio_port, 189 + minio_console_port=minio_console_port, 190 + detach=detach, 191 + ) 192 + 193 + 194 + def _cmd_local_down(remove_volumes: bool) -> int: 195 + """Stop local development infrastructure.""" 196 + from .local import local_down 197 + return local_down(remove_volumes=remove_volumes) 198 + 199 + 200 + def _cmd_local_status() -> int: 201 + """Show status of local infrastructure.""" 202 + from .local import local_status 203 + return local_status() 204 + 205 + 206 + def _cmd_diagnose(host: str, port: int) -> int: 207 + """Diagnose Redis configuration.""" 208 + from .diagnose import diagnose_redis 209 + return diagnose_redis(host=host, port=port) 210 + 211 + 212 + if __name__ == "__main__": 213 + sys.exit(main())
+165
src/atdata/cli/diagnose.py
··· 1 + """Diagnostic tools for atdata infrastructure. 2 + 3 + This module provides commands to diagnose configuration issues with Redis 4 + and other infrastructure components. 5 + """ 6 + 7 + import sys 8 + from typing import Any 9 + 10 + 11 + def _print_status(label: str, ok: bool, detail: str = "") -> None: 12 + """Print a status line with checkmark or X.""" 13 + symbol = "✓" if ok else "✗" 14 + status = f"{symbol} {label}" 15 + if detail: 16 + status += f": {detail}" 17 + print(status) 18 + 19 + 20 + def diagnose_redis(host: str = "localhost", port: int = 6379) -> int: 21 + """Diagnose Redis configuration and connectivity. 22 + 23 + Checks for common issues that can cause data loss: 24 + - Connection issues 25 + - Persistence settings (AOF/RDB) 26 + - Memory eviction policy 27 + - Memory usage 28 + 29 + Args: 30 + host: Redis host (default: localhost) 31 + port: Redis port (default: 6379) 32 + 33 + Returns: 34 + Exit code (0 if all checks pass, 1 if any issues found) 35 + """ 36 + print(f"Diagnosing Redis at {host}:{port}...") 37 + print() 38 + 39 + issues_found = False 40 + 41 + # Try to connect 42 + try: 43 + from redis import Redis 44 + redis = Redis(host=host, port=port, socket_connect_timeout=5) 45 + redis.ping() 46 + _print_status("Connection", True, "connected") 47 + except ImportError: 48 + print("Error: redis package not installed", file=sys.stderr) 49 + return 1 50 + except Exception as e: 51 + _print_status("Connection", False, str(e)) 52 + print() 53 + print("Cannot connect to Redis. Make sure Redis is running:") 54 + print(" atdata local up") 55 + return 1 56 + 57 + # Check Redis version 58 + try: 59 + info = redis.info() 60 + version = info.get("redis_version", "unknown") 61 + _print_status("Version", True, version) 62 + except Exception as e: 63 + _print_status("Version", False, str(e)) 64 + issues_found = True 65 + 66 + # Check persistence - AOF 67 + try: 68 + aof_enabled = redis.config_get("appendonly").get("appendonly", "no") 69 + aof_ok = aof_enabled == "yes" 70 + _print_status( 71 + "AOF Persistence", 72 + aof_ok, 73 + "enabled" if aof_ok else "DISABLED - data may be lost on restart!" 74 + ) 75 + if not aof_ok: 76 + issues_found = True 77 + except Exception as e: 78 + _print_status("AOF Persistence", False, f"check failed: {e}") 79 + issues_found = True 80 + 81 + # Check persistence - RDB 82 + try: 83 + save_config = redis.config_get("save").get("save", "") 84 + rdb_ok = bool(save_config and save_config.strip()) 85 + _print_status( 86 + "RDB Persistence", 87 + rdb_ok, 88 + f"configured ({save_config})" if rdb_ok else "DISABLED" 89 + ) 90 + # RDB disabled is only a warning if AOF is enabled 91 + except Exception as e: 92 + _print_status("RDB Persistence", False, f"check failed: {e}") 93 + 94 + # Check memory policy 95 + try: 96 + policy = redis.config_get("maxmemory-policy").get("maxmemory-policy", "unknown") 97 + # Safe policies that won't evict index data 98 + safe_policies = {"noeviction", "volatile-lru", "volatile-lfu", "volatile-ttl", "volatile-random"} 99 + policy_ok = policy in safe_policies 100 + 101 + if policy_ok: 102 + _print_status("Memory Policy", True, policy) 103 + else: 104 + _print_status( 105 + "Memory Policy", 106 + False, 107 + f"{policy} - may evict index data! Use 'noeviction' or 'volatile-*'" 108 + ) 109 + issues_found = True 110 + except Exception as e: 111 + _print_status("Memory Policy", False, f"check failed: {e}") 112 + issues_found = True 113 + 114 + # Check maxmemory setting 115 + try: 116 + maxmemory = redis.config_get("maxmemory").get("maxmemory", "0") 117 + maxmemory_bytes = int(maxmemory) 118 + if maxmemory_bytes == 0: 119 + _print_status("Max Memory", True, "unlimited") 120 + else: 121 + maxmemory_mb = maxmemory_bytes / (1024 * 1024) 122 + _print_status("Max Memory", True, f"{maxmemory_mb:.0f} MB") 123 + except Exception as e: 124 + _print_status("Max Memory", False, f"check failed: {e}") 125 + 126 + # Check current memory usage 127 + try: 128 + memory_info = redis.info("memory") 129 + used_memory = memory_info.get("used_memory_human", "unknown") 130 + peak_memory = memory_info.get("used_memory_peak_human", "unknown") 131 + _print_status("Memory Usage", True, f"{used_memory} (peak: {peak_memory})") 132 + except Exception as e: 133 + _print_status("Memory Usage", False, f"check failed: {e}") 134 + 135 + # Check number of atdata keys 136 + try: 137 + dataset_count = 0 138 + schema_count = 0 139 + for key in redis.scan_iter(match="LocalDatasetEntry:*", count=100): 140 + dataset_count += 1 141 + for key in redis.scan_iter(match="LocalSchema:*", count=100): 142 + schema_count += 1 143 + _print_status( 144 + "atdata Keys", 145 + True, 146 + f"{dataset_count} datasets, {schema_count} schemas" 147 + ) 148 + except Exception as e: 149 + _print_status("atdata Keys", False, f"check failed: {e}") 150 + 151 + print() 152 + 153 + if issues_found: 154 + print("Issues found! Recommended configuration:") 155 + print() 156 + print(" # In redis.conf or via CONFIG SET:") 157 + print(" appendonly yes") 158 + print(" maxmemory-policy noeviction") 159 + print() 160 + print(" # Or use atdata's preconfigured local setup:") 161 + print(" atdata local up") 162 + return 1 163 + else: 164 + print("All checks passed. Redis is properly configured for atdata.") 165 + return 0
+280
src/atdata/cli/local.py
··· 1 + """Local infrastructure management for atdata. 2 + 3 + This module provides commands to start and stop local development infrastructure: 4 + - Redis: For index storage and metadata 5 + - MinIO: S3-compatible object storage for dataset files 6 + 7 + The infrastructure runs in Docker containers managed via docker-compose or 8 + direct docker commands. 9 + """ 10 + 11 + import shutil 12 + import subprocess 13 + import sys 14 + from pathlib import Path 15 + from textwrap import dedent 16 + 17 + # Container names for tracking 18 + REDIS_CONTAINER = "atdata-redis" 19 + MINIO_CONTAINER = "atdata-minio" 20 + 21 + # Docker compose configuration 22 + COMPOSE_TEMPLATE = dedent("""\ 23 + version: '3.8' 24 + 25 + services: 26 + redis: 27 + image: redis:7-alpine 28 + container_name: {redis_container} 29 + ports: 30 + - "{redis_port}:6379" 31 + volumes: 32 + - atdata-redis-data:/data 33 + command: redis-server --appendonly yes --maxmemory-policy noeviction 34 + restart: unless-stopped 35 + healthcheck: 36 + test: ["CMD", "redis-cli", "ping"] 37 + interval: 5s 38 + timeout: 3s 39 + retries: 3 40 + 41 + minio: 42 + image: minio/minio:latest 43 + container_name: {minio_container} 44 + ports: 45 + - "{minio_port}:9000" 46 + - "{minio_console_port}:9001" 47 + volumes: 48 + - atdata-minio-data:/data 49 + environment: 50 + MINIO_ROOT_USER: minioadmin 51 + MINIO_ROOT_PASSWORD: minioadmin 52 + command: server /data --console-address ":9001" 53 + restart: unless-stopped 54 + healthcheck: 55 + test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"] 56 + interval: 5s 57 + timeout: 3s 58 + retries: 3 59 + 60 + volumes: 61 + atdata-redis-data: 62 + atdata-minio-data: 63 + """) 64 + 65 + 66 + def _check_docker() -> bool: 67 + """Check if Docker is available and running.""" 68 + if not shutil.which("docker"): 69 + print("Error: Docker is not installed or not in PATH", file=sys.stderr) 70 + return False 71 + 72 + try: 73 + result = subprocess.run( 74 + ["docker", "info"], 75 + capture_output=True, 76 + text=True, 77 + timeout=10, 78 + ) 79 + if result.returncode != 0: 80 + print("Error: Docker daemon is not running", file=sys.stderr) 81 + return False 82 + except subprocess.TimeoutExpired: 83 + print("Error: Docker daemon not responding", file=sys.stderr) 84 + return False 85 + except Exception as e: 86 + print(f"Error checking Docker: {e}", file=sys.stderr) 87 + return False 88 + 89 + return True 90 + 91 + 92 + def _get_compose_file( 93 + redis_port: int, 94 + minio_port: int, 95 + minio_console_port: int, 96 + ) -> str: 97 + """Generate docker-compose configuration.""" 98 + return COMPOSE_TEMPLATE.format( 99 + redis_container=REDIS_CONTAINER, 100 + minio_container=MINIO_CONTAINER, 101 + redis_port=redis_port, 102 + minio_port=minio_port, 103 + minio_console_port=minio_console_port, 104 + ) 105 + 106 + 107 + def _container_running(name: str) -> bool: 108 + """Check if a container is running.""" 109 + try: 110 + result = subprocess.run( 111 + ["docker", "inspect", "-f", "{{.State.Running}}", name], 112 + capture_output=True, 113 + text=True, 114 + timeout=5, 115 + ) 116 + return result.returncode == 0 and result.stdout.strip() == "true" 117 + except Exception: 118 + return False 119 + 120 + 121 + def _run_compose( 122 + compose_content: str, 123 + command: list[str], 124 + *, 125 + capture_output: bool = False, 126 + ) -> subprocess.CompletedProcess: 127 + """Run a docker-compose command with the given configuration.""" 128 + # Write compose file to temp location 129 + compose_dir = Path.home() / ".atdata" 130 + compose_dir.mkdir(exist_ok=True) 131 + compose_file = compose_dir / "docker-compose.yml" 132 + compose_file.write_text(compose_content) 133 + 134 + # Prefer 'docker compose' (v2) over 'docker-compose' (v1) 135 + if shutil.which("docker"): 136 + # Check if docker compose v2 is available 137 + check = subprocess.run( 138 + ["docker", "compose", "version"], 139 + capture_output=True, 140 + timeout=5, 141 + ) 142 + if check.returncode == 0: 143 + base_cmd = ["docker", "compose"] 144 + elif shutil.which("docker-compose"): 145 + base_cmd = ["docker-compose"] 146 + else: 147 + raise RuntimeError("Neither 'docker compose' nor 'docker-compose' available") 148 + else: 149 + raise RuntimeError("Docker not found") 150 + 151 + full_cmd = base_cmd + ["-f", str(compose_file)] + command 152 + 153 + return subprocess.run( 154 + full_cmd, 155 + capture_output=capture_output, 156 + text=True, 157 + ) 158 + 159 + 160 + def local_up( 161 + redis_port: int = 6379, 162 + minio_port: int = 9000, 163 + minio_console_port: int = 9001, 164 + detach: bool = True, 165 + ) -> int: 166 + """Start local development infrastructure. 167 + 168 + Args: 169 + redis_port: Port for Redis (default: 6379) 170 + minio_port: Port for MinIO API (default: 9000) 171 + minio_console_port: Port for MinIO console (default: 9001) 172 + detach: Run in background (default: True) 173 + 174 + Returns: 175 + Exit code (0 for success) 176 + """ 177 + if not _check_docker(): 178 + return 1 179 + 180 + print("Starting atdata local infrastructure...") 181 + 182 + compose_content = _get_compose_file(redis_port, minio_port, minio_console_port) 183 + command = ["up"] 184 + if detach: 185 + command.append("-d") 186 + 187 + try: 188 + result = _run_compose(compose_content, command) 189 + if result.returncode != 0: 190 + print("Error: Failed to start containers", file=sys.stderr) 191 + return result.returncode 192 + except Exception as e: 193 + print(f"Error: {e}", file=sys.stderr) 194 + return 1 195 + 196 + # Wait a moment for containers to be healthy 197 + import time 198 + time.sleep(2) 199 + 200 + # Show status 201 + print() 202 + print("Local infrastructure started:") 203 + print(f" Redis: localhost:{redis_port}") 204 + print(f" MinIO API: http://localhost:{minio_port}") 205 + print(f" MinIO Console: http://localhost:{minio_console_port}") 206 + print() 207 + print("MinIO credentials: minioadmin / minioadmin") 208 + print() 209 + print("Example usage:") 210 + print(" from atdata.local import Index, S3DataStore") 211 + print(" ") 212 + print(" store = S3DataStore.from_credentials({") 213 + print(f" 'AWS_ENDPOINT': 'http://localhost:{minio_port}',") 214 + print(" 'AWS_ACCESS_KEY_ID': 'minioadmin',") 215 + print(" 'AWS_SECRET_ACCESS_KEY': 'minioadmin',") 216 + print(" }, bucket='datasets')") 217 + print(" index = Index(data_store=store)") 218 + 219 + return 0 220 + 221 + 222 + def local_down(remove_volumes: bool = False) -> int: 223 + """Stop local development infrastructure. 224 + 225 + Args: 226 + remove_volumes: Also remove data volumes (default: False) 227 + 228 + Returns: 229 + Exit code (0 for success) 230 + """ 231 + if not _check_docker(): 232 + return 1 233 + 234 + print("Stopping atdata local infrastructure...") 235 + 236 + # Use default ports for compose file (actual ports don't matter for down) 237 + compose_content = _get_compose_file(6379, 9000, 9001) 238 + command = ["down"] 239 + if remove_volumes: 240 + command.append("-v") 241 + print("Warning: This will delete all local data!") 242 + 243 + try: 244 + result = _run_compose(compose_content, command) 245 + if result.returncode != 0: 246 + print("Error: Failed to stop containers", file=sys.stderr) 247 + return result.returncode 248 + except Exception as e: 249 + print(f"Error: {e}", file=sys.stderr) 250 + return 1 251 + 252 + print("Local infrastructure stopped.") 253 + return 0 254 + 255 + 256 + def local_status() -> int: 257 + """Show status of local infrastructure. 258 + 259 + Returns: 260 + Exit code (0 for success) 261 + """ 262 + if not _check_docker(): 263 + return 1 264 + 265 + redis_running = _container_running(REDIS_CONTAINER) 266 + minio_running = _container_running(MINIO_CONTAINER) 267 + 268 + print("atdata local infrastructure status:") 269 + print() 270 + print(f" Redis ({REDIS_CONTAINER}): {'running' if redis_running else 'stopped'}") 271 + print(f" MinIO ({MINIO_CONTAINER}): {'running' if minio_running else 'stopped'}") 272 + 273 + if redis_running or minio_running: 274 + print() 275 + print("To stop: atdata local down") 276 + else: 277 + print() 278 + print("To start: atdata local up") 279 + 280 + return 0