add logfire observability with graceful degradation

+1 -1

pyproject.toml

··· 10 10 "atproto@git+https://github.com/MarshalX/atproto.git@refs/pull/605/head", 11 11 "fastapi", 12 12 "fastmcp>=0.8.0", 13 - "logfire[fastapi]", 13 + "logfire[anthropic,fastapi,openai,pydantic-ai]", 14 14 "openai", 15 15 "pydantic-ai", 16 16 "pydantic-settings",

+4 -2

src/bot/config.py

··· 7 7 8 8 9 9 class LogfireSettings(BaseSettings): 10 - model_config = SettingsConfigDict(env_prefix="LOGFIRE_", extra="ignore", env_file=".env") 10 + model_config = SettingsConfigDict( 11 + env_prefix="LOGFIRE_", extra="ignore", env_file=".env" 12 + ) 11 13 12 - token: str | None = None 14 + write_token: str | None = None 13 15 environment: str | None = None 14 16 send_to_logfire: Literal["if-token-present"] | None = "if-token-present" 15 17

+18 -2

src/bot/logging_config.py

··· 23 23 logging.getLogger("httpcore").setLevel(logging.WARNING) 24 24 25 25 # SDK debug loggers dump full request bodies (embeddings, prompts) 26 - for name in ["anthropic._base_client", "openai._base_client", "turbopuffer._base_client"]: 26 + for name in [ 27 + "anthropic._base_client", 28 + "openai._base_client", 29 + "turbopuffer._base_client", 30 + ]: 27 31 logging.getLogger(name).setLevel(logging.WARNING) 28 32 29 33 # MCP protocol chatter (session init, tool listings, SSE messages) 30 - for name in ["mcp", "mcp.client", "mcp.client.session", "mcp.client.streamable_http", "pydantic_ai.mcp"]: 34 + for name in [ 35 + "mcp", 36 + "mcp.client", 37 + "mcp.client.session", 38 + "mcp.client.streamable_http", 39 + "pydantic_ai.mcp", 40 + ]: 31 41 logging.getLogger(name).setLevel(logging.WARNING) 42 + 43 + # uvicorn access logs are redundant with fastapi spans 44 + logging.getLogger("uvicorn.access").setLevel(logging.WARNING) 45 + 46 + # asyncio selector noise (KqueueSelector, EpollSelector) 47 + logging.getLogger("asyncio").setLevel(logging.WARNING) 32 48 33 49 34 50 def _clear_uvicorn_handlers() -> None:

+26 -4

src/bot/main.py

··· 26 26 logfire.configure( 27 27 send_to_logfire=settings.logfire.send_to_logfire, 28 28 environment=settings.logfire.environment, 29 - token=settings.logfire.token, 29 + token=settings.logfire.write_token, 30 30 console=logfire.ConsoleOptions( 31 31 min_log_level="debug" if settings.debug else "info", 32 32 ), 33 33 ) 34 + 35 + # instrument the interesting stuff — skip httpx (poll noise) since 36 + # anthropic/openai integrations already trace their own HTTP calls. 37 + # each call is wrapped individually so a missing dep degrades to a no-op. 38 + for _instrument in ( 39 + logfire.instrument_pydantic_ai, 40 + logfire.instrument_anthropic, 41 + logfire.instrument_openai, 42 + ): 43 + try: 44 + _instrument() 45 + except Exception as _e: 46 + logger.warning(f"logfire instrumentation failed ({_instrument.__name__}): {_e}") 34 47 35 48 36 49 @asynccontextmanager ··· 79 92 ), 80 93 ) 81 94 82 - logfire.instrument_fastapi(app) 95 + try: 96 + logfire.instrument_fastapi(app, excluded_urls="/health") 97 + except Exception as _e: 98 + logger.warning(f"logfire fastapi instrumentation failed: {_e}") 83 99 84 100 85 101 NAV_HTML = '<nav><a href="/">phi</a><a href="/status">status</a><a href="/memory">memory</a></nav>' ··· 151 167 @app.get("/health") 152 168 async def health(): 153 169 """Health check endpoint.""" 154 - return {"status": "healthy", "polling_active": bot_status.polling_active, "paused": bot_status.paused} 170 + return { 171 + "status": "healthy", 172 + "polling_active": bot_status.polling_active, 173 + "paused": bot_status.paused, 174 + } 155 175 156 176 157 177 def _check_control_token(request: Request): ··· 256 276 return JSONResponse(data) 257 277 except Exception as e: 258 278 logger.warning(f"memory graph failed: {e}") 259 - return JSONResponse({"nodes": [], "edges": [], "error": str(e)}, status_code=500) 279 + return JSONResponse( 280 + {"nodes": [], "edges": [], "error": str(e)}, status_code=500 281 + ) 260 282 261 283 262 284 @app.get("/memory", response_class=HTMLResponse)

+38 -27

src/bot/services/message_handler.py

··· 2 2 3 3 import logging 4 4 5 + import logfire 5 6 from atproto_client import models 6 7 from limits import parse as parse_limit 7 8 from limits.storage import MemoryStorage ··· 35 36 logger.warning(f"rate limited @{author_handle}") 36 37 return 37 38 38 - try: 39 - if reason in ("mention", "reply", "quote"): 40 - await self._handle_post(notification) 41 - elif reason in ("like", "repost"): 42 - await self._handle_engagement(notification) 43 - elif reason == "follow": 44 - await self._handle_follow(notification) 45 - else: 46 - logger.debug(f"notification type '{reason}' from @{author_handle}") 47 - except Exception as e: 48 - logger.exception(f"notification handling error: {e}") 49 - bot_status.record_error() 39 + with logfire.span( 40 + "handle notification", 41 + reason=reason, 42 + author=author_handle, 43 + ): 44 + try: 45 + if reason in ("mention", "reply", "quote"): 46 + await self._handle_post(notification) 47 + elif reason in ("like", "repost"): 48 + await self._handle_engagement(notification) 49 + elif reason == "follow": 50 + await self._handle_follow(notification) 51 + else: 52 + logger.debug(f"notification type '{reason}' from @{author_handle}") 53 + except Exception as e: 54 + logger.exception(f"notification handling error: {e}") 55 + bot_status.record_error() 50 56 51 57 async def _handle_engagement(self, notification): 52 58 """Process a like or repost — someone engaged with phi's content.""" ··· 82 88 root_ref = post.record.reply.root 83 89 else: 84 90 root_ref = parent_ref 85 - reply_ref = models.AppBskyFeedPost.ReplyRef(parent=parent_ref, root=root_ref) 91 + reply_ref = models.AppBskyFeedPost.ReplyRef( 92 + parent=parent_ref, root=root_ref 93 + ) 86 94 await self.client.create_post(response.text, reply_to=reply_ref) 87 95 bot_status.record_response() 88 - logger.info(f"replied on {reason} from @{author_handle}: {response.text[:80]}") 96 + logger.info( 97 + f"replied on {reason} from @{author_handle}: {response.text[:80]}" 98 + ) 89 99 else: 90 100 logger.info(f"{response.action} on {reason} from @{author_handle}") 91 101 bot_status.record_response() ··· 199 209 200 210 async def daily_reflection(self): 201 211 """Generate and post a daily reflection if phi has something to say.""" 202 - try: 203 - response = await self.agent.process_reflection() 204 - except Exception as e: 205 - logger.exception(f"daily reflection failed: {e}") 206 - return 207 - 208 - if response.action in ("reply", "post") and response.text: 212 + with logfire.span("daily reflection"): 209 213 try: 210 - await self.client.create_post(response.text) 211 - bot_status.record_response() 212 - logger.info(f"daily reflection posted: {response.text[:80]}") 214 + response = await self.agent.process_reflection() 213 215 except Exception as e: 214 - logger.exception(f"failed to post daily reflection: {e}") 215 - else: 216 - logger.info(f"daily reflection: nothing to say ({response.reason})") 216 + logger.exception(f"daily reflection failed: {e}") 217 + return 218 + 219 + if response.action in ("reply", "post") and response.text: 220 + try: 221 + await self.client.create_post(response.text) 222 + bot_status.record_response() 223 + logger.info(f"daily reflection posted: {response.text[:80]}") 224 + except Exception as e: 225 + logger.exception(f"failed to post daily reflection: {e}") 226 + else: 227 + logger.info(f"daily reflection: nothing to say ({response.reason})")

+30

tests/test_config.py

··· 1 1 """Test configuration loading""" 2 2 3 + from unittest.mock import patch 4 + 3 5 from bot.config import settings 4 6 5 7 ··· 8 10 assert settings.bluesky_service == "https://bsky.social" 9 11 assert settings.bot_name == "phi" 10 12 assert settings.notification_poll_interval == 10 13 + 14 + 15 + def test_logfire_instrumentation_degrades_gracefully(): 16 + """Regression: a broken logfire instrumentation must not crash the app.""" 17 + import importlib 18 + 19 + import bot.main as main_mod 20 + 21 + # Simulate instrument_pydantic_ai raising (e.g. missing otel dep) 22 + with patch.object( 23 + main_mod.logfire, 24 + "instrument_pydantic_ai", 25 + side_effect=ImportError("no module 'opentelemetry.instrumentation.httpx'"), 26 + ): 27 + # Re-running the instrumentation loop should not raise 28 + for _instrument in ( 29 + main_mod.logfire.instrument_pydantic_ai, 30 + main_mod.logfire.instrument_anthropic, 31 + main_mod.logfire.instrument_openai, 32 + ): 33 + try: 34 + _instrument() 35 + except Exception: 36 + pass # this is what the production code does 37 + 38 + # App should still be importable and functional 39 + importlib.reload(main_mod) 40 + assert main_mod.app is not None

+1 -1

uv.lock

··· 213 213 { name = "atproto", git = "https://github.com/MarshalX/atproto.git?rev=refs%2Fpull%2F605%2Fhead" }, 214 214 { name = "fastapi" }, 215 215 { name = "fastmcp", specifier = ">=0.8.0" }, 216 - { name = "logfire", extras = ["fastapi"] }, 216 + { name = "logfire", extras = ["anthropic", "fastapi", "openai", "pydantic-ai"] }, 217 217 { name = "openai" }, 218 218 { name = "pydantic-ai" }, 219 219 { name = "pydantic-settings" },

Configure Feed

Configure Feed