personal memory agent
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

Add session readiness pre-flight to observer startup

Observer was crash-looping after reboot before desktop session was ready
(no PulseAudio, no DBus, no display). Now:

- check_session_ready() verifies DISPLAY, DBUS, and pactl before setup
- Exit code 75 (EX_TEMPFAIL) signals "not ready" to supervisor
- Supervisor uses 15s retry delay for tempfail (vs 0/1/5s backoff),
skips noisy error-level logging and desktop notifications
- Audio detection retries 3x with 5s delay for device initialization
- Hardened detect.py: wrapped sc.default_speaker() and
sc.all_microphones() in try/except

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

+92 -16
+14 -6
observe/detect.py
··· 1 1 # SPDX-License-Identifier: AGPL-3.0-only 2 2 # Copyright (c) 2026 sol pbc 3 3 4 + import logging 4 5 import threading 5 6 6 7 import numpy as np 7 8 import soundcard as sc 9 + 10 + logger = logging.getLogger(__name__) 8 11 9 12 10 13 def input_detect(duration=0.4, sample_rate=44100): 11 14 t = np.linspace(0, duration, int(sample_rate * duration), endpoint=False) 12 15 tone = 0.5 * np.sin(2 * np.pi * 18000 * t) # ultrasonic 13 16 14 - devices = sc.all_microphones(include_loopback=True) 17 + try: 18 + devices = sc.all_microphones(include_loopback=True) 19 + except Exception: 20 + logger.warning("Failed to enumerate audio devices") 21 + return None, None 15 22 if not devices: 16 - print("No matching devices found:") 17 - for mic in devices: 18 - print(mic) 23 + logger.warning("No audio devices found") 19 24 return None, None 20 25 21 26 results = {} ··· 33 38 34 39 def play_tone(): 35 40 barrier.wait() 36 - sp = sc.default_speaker() 37 - sp.play(tone, samplerate=sample_rate) 41 + try: 42 + sp = sc.default_speaker() 43 + sp.play(tone, samplerate=sample_rate) 44 + except Exception: 45 + logger.warning("No default speaker available for tone detection") 38 46 39 47 threads = [] 40 48 for mic in devices:
+57 -2
observe/linux/observer.py
··· 21 21 import logging 22 22 import os 23 23 import platform 24 + import shutil 24 25 import signal 25 26 import socket 27 + import subprocess 26 28 import sys 27 29 import time 28 30 from pathlib import Path ··· 59 61 TMUX_ACTIVITY_THRESHOLD = 5 # seconds - fixed window for activity detection 60 62 61 63 64 + # Exit codes 65 + EXIT_TEMPFAIL = 75 # EX_TEMPFAIL: session not ready, retry later 66 + 62 67 # Capture modes 63 68 MODE_IDLE = "idle" 64 69 MODE_SCREENCAST = "screencast" 65 70 MODE_TMUX = "tmux" 71 + 72 + # Audio detection retry 73 + DETECT_RETRIES = 3 74 + DETECT_RETRY_DELAY = 5 # seconds 66 75 67 76 68 77 class Observer: ··· 140 149 141 150 async def setup(self): 142 151 """Initialize audio devices and DBus connection.""" 143 - # Detect and start audio recorder 144 - if not self.audio_recorder.detect(): 152 + # Detect audio devices with retry (devices may still be initializing) 153 + detected = False 154 + for attempt in range(DETECT_RETRIES): 155 + if self.audio_recorder.detect(): 156 + detected = True 157 + break 158 + if attempt < DETECT_RETRIES - 1: 159 + logger.info( 160 + "Audio detection attempt %d/%d failed, retrying in %ds", 161 + attempt + 1, 162 + DETECT_RETRIES, 163 + DETECT_RETRY_DELAY, 164 + ) 165 + await asyncio.sleep(DETECT_RETRY_DELAY) 166 + if not detected: 145 167 logger.error("Failed to detect audio devices") 146 168 return False 147 169 ··· 807 829 logger.info("Callosum connection stopped") 808 830 809 831 832 + def check_session_ready() -> str | None: 833 + """Check if the desktop session is ready for observation. 834 + 835 + Returns None if ready, or a description of what's missing. 836 + """ 837 + # Display server 838 + if not os.environ.get("DISPLAY") and not os.environ.get("WAYLAND_DISPLAY"): 839 + return "no display server (DISPLAY/WAYLAND_DISPLAY not set)" 840 + 841 + # DBus session bus 842 + if not os.environ.get("DBUS_SESSION_BUS_ADDRESS"): 843 + return "no DBus session bus (DBUS_SESSION_BUS_ADDRESS not set)" 844 + 845 + # PulseAudio / PipeWire audio 846 + pactl = shutil.which("pactl") 847 + if pactl: 848 + try: 849 + subprocess.run( 850 + [pactl, "info"], 851 + capture_output=True, 852 + timeout=5, 853 + ).check_returncode() 854 + except (subprocess.CalledProcessError, subprocess.TimeoutExpired): 855 + return "audio server not responding (pactl info failed)" 856 + return None 857 + 858 + 810 859 async def async_main(args): 811 860 """Async entry point.""" 861 + # Pre-flight: check session prerequisites before attempting setup 862 + not_ready = check_session_ready() 863 + if not_ready: 864 + logger.warning("Session not ready: %s", not_ready) 865 + return EXIT_TEMPFAIL 866 + 812 867 observer = Observer( 813 868 interval=args.interval, 814 869 )
+21 -8
think/supervisor.py
··· 36 36 DEFAULT_THRESHOLD = 60 37 37 CHECK_INTERVAL = 30 38 38 MAX_UPDATED_CATCHUP = 4 39 + EXIT_TEMPFAIL = 75 # EX_TEMPFAIL: service prerequisites not ready 40 + TEMPFAIL_DELAY = 15 # seconds to wait before retrying a tempfail exit 39 41 40 42 # Global shutdown flag 41 43 shutdown_requested = False ··· 933 935 return 934 936 935 937 exited_names = [managed.name for managed in exited] 936 - msg = f"Runner process exited: {', '.join(sorted(exited_names))}" 937 - logging.error(msg) 938 938 exit_key = ("runner_exit", tuple(sorted(exited_names))) 939 939 940 - await alert_mgr.alert_if_ready(exit_key, msg) 940 + # Check if all exits are tempfail (session not ready) 941 + all_tempfail = all(m.process.returncode == EXIT_TEMPFAIL for m in exited) 942 + 943 + if all_tempfail: 944 + logging.info("Runner waiting for session: %s", ", ".join(sorted(exited_names))) 945 + else: 946 + msg = f"Runner process exited: {', '.join(sorted(exited_names))}" 947 + logging.error(msg) 948 + await alert_mgr.alert_if_ready(exit_key, msg) 941 949 942 950 for managed in exited: 943 951 # Clear any pending restart request for this service 944 952 _restart_requests.pop(managed.name, None) 945 953 946 954 returncode = managed.process.returncode 955 + is_tempfail = returncode == EXIT_TEMPFAIL 947 956 logging.info("%s exited with code %s", managed.name, returncode) 948 957 949 958 # Emit stopped event ··· 967 976 968 977 # Handle restart if needed 969 978 if managed.restart and not shutdown_requested: 970 - policy = _get_restart_policy(managed.name) 971 - uptime = time.time() - policy.last_start if policy.last_start else 0 972 - if uptime >= 60: 973 - policy.reset_attempts() 974 - delay = policy.next_delay() 979 + # Tempfail: use fixed longer delay, don't burn through backoff 980 + if is_tempfail: 981 + delay = TEMPFAIL_DELAY 982 + else: 983 + policy = _get_restart_policy(managed.name) 984 + uptime = time.time() - policy.last_start if policy.last_start else 0 985 + if uptime >= 60: 986 + policy.reset_attempts() 987 + delay = policy.next_delay() 975 988 if delay: 976 989 logging.info("Waiting %ss before restarting %s", delay, managed.name) 977 990 for _ in range(delay):