a digital entity named phi that roams bsky
phi.zzstoatzz.io
1"""Test that proves tools are actually being used by the agent"""
2
3import os
4from unittest.mock import patch
5
6import pytest
7from pydantic import BaseModel, Field
8from pydantic_ai import Agent, RunContext
9from pydantic_ai.exceptions import ModelHTTPError
10
11from bot.config import settings
12
13
14class Response(BaseModel):
15 text: str = Field(description="Response text")
16
17
18class TestToolUsage:
19 def setup_method(self):
20 """Set up API key for tests"""
21 if settings.anthropic_api_key:
22 os.environ["ANTHROPIC_API_KEY"] = settings.anthropic_api_key
23
24 @pytest.mark.asyncio
25 async def test_agent_uses_tools(self):
26 """Test that the agent actually calls tools when appropriate"""
27
28 if not settings.anthropic_api_key:
29 pytest.skip("No Anthropic API key configured")
30
31 # Track tool calls
32 tool_calls: list[str] = []
33
34 # Create agent
35 agent = Agent(
36 "anthropic:claude-haiku-4-5",
37 system_prompt="You are a helpful assistant. Use tools when asked.",
38 output_type=Response,
39 )
40
41 # Register a simple tool
42 @agent.tool
43 async def get_current_time(ctx: RunContext[None]) -> str:
44 """Get the current time"""
45 tool_calls.append("get_current_time")
46 return "The current time is 3:14 PM"
47
48 # Test 1: Query that should NOT use the tool
49 result = await agent.run("What is 2 + 2?")
50 assert len(tool_calls) == 0, "Tool was called for simple math question"
51
52 # Test 2: Query that SHOULD use the tool
53 result = await agent.run("What time is it?")
54 assert len(tool_calls) == 1, (
55 f"Tool was not called for time question. Calls: {tool_calls}"
56 )
57 assert tool_calls[0] == "get_current_time"
58 assert "3:14" in result.output.text, (
59 f"Tool result not in response: {result.output.text}"
60 )
61
62 @pytest.mark.asyncio
63 async def test_search_tool_usage(self):
64 """Test that search tool is called for appropriate queries"""
65
66 if not settings.anthropic_api_key:
67 pytest.skip("No Anthropic API key configured")
68
69 tool_calls: list[dict] = []
70
71 agent = Agent(
72 "anthropic:claude-haiku-4-5",
73 system_prompt="You help answer questions. Use search for current events.",
74 output_type=Response,
75 )
76
77 @agent.tool
78 async def search_web(ctx: RunContext[None], query: str) -> str:
79 """Search the web for information"""
80 tool_calls.append({"tool": "search_web", "query": query})
81 return f"Search results for '{query}': Latest news about {query}"
82
83 try:
84 # Should NOT search for simple math
85 result = await agent.run("What is 2 + 2?")
86 assert len(tool_calls) == 0, f"Searched for basic math. Calls: {tool_calls}"
87
88 # SHOULD search for current events
89 result = await agent.run("What happened in tech news today?")
90 assert len(tool_calls) > 0, (
91 f"Did not search for current news. Response: {result.output.text}"
92 )
93 assert tool_calls[0]["tool"] == "search_web"
94 assert (
95 "tech" in tool_calls[0]["query"].lower()
96 or "news" in tool_calls[0]["query"].lower()
97 )
98 except ModelHTTPError:
99 pytest.skip("Anthropic API unavailable")
100
101 @pytest.mark.asyncio
102 async def test_multiple_tool_calls(self):
103 """Test that agent can call tools multiple times in one request"""
104
105 if not settings.anthropic_api_key:
106 pytest.skip("No Anthropic API key configured")
107
108 calls: list[str] = []
109
110 agent = Agent(
111 "anthropic:claude-haiku-4-5",
112 system_prompt="You are a helpful assistant.",
113 output_type=Response,
114 )
115
116 @agent.tool
117 async def search_web(ctx: RunContext[None], query: str) -> str:
118 """Search for information"""
119 calls.append(f"search: {query}")
120 return f"Info about {query}"
121
122 # Ask for multiple things that need searching
123 await agent.run("Search for information about Python and also about Rust")
124
125 assert len(calls) >= 2, f"Expected multiple searches, got {len(calls)}: {calls}"
126 assert any("Python" in call for call in calls), f"No Python search in: {calls}"
127 assert any("Rust" in call for call in calls), f"No Rust search in: {calls}"
128
129
130class TestPhiAgentToolRegistration:
131 """Verify that PhiAgent registers all expected tools (no LLM calls needed)."""
132
133 def setup_method(self):
134 if settings.anthropic_api_key:
135 os.environ["ANTHROPIC_API_KEY"] = settings.anthropic_api_key
136
137 def test_graze_tools_registered(self):
138 if not os.environ.get("ANTHROPIC_API_KEY"):
139 pytest.skip("No Anthropic API key configured")
140
141 with patch("bot.core.atproto_client.bot_client"):
142 from bot.agent import PhiAgent
143
144 agent = PhiAgent()
145 tool_names = {t.name for t in agent.agent._function_toolset.tools.values()}
146 assert "create_feed" in tool_names, f"create_feed not in {tool_names}"
147 assert "list_feeds" in tool_names, f"list_feeds not in {tool_names}"
148 assert "read_timeline" in tool_names, f"read_timeline not in {tool_names}"
149 assert "read_feed" in tool_names, f"read_feed not in {tool_names}"
150 assert "follow_user" in tool_names, f"follow_user not in {tool_names}"
151 assert "get_own_posts" in tool_names, f"get_own_posts not in {tool_names}"
152
153 def test_graze_client_instantiated(self):
154 if not os.environ.get("ANTHROPIC_API_KEY"):
155 pytest.skip("No Anthropic API key configured")
156
157 with patch("bot.core.atproto_client.bot_client"):
158 from bot.agent import PhiAgent
159
160 agent = PhiAgent()
161 assert agent.graze_client is not None
162 assert agent.graze_client._handle == settings.bluesky_handle