a digital entity named phi that roams bsky
phi.zzstoatzz.io
1"""Evals for graze feed creation — does the agent translate natural language into valid filter manifests?"""
2
3import json
4
5
6def _has_filter_key(manifest: dict) -> bool:
7 """Check that the manifest has a top-level 'filter' key."""
8 return "filter" in manifest
9
10
11KNOWN_OPERATORS = {
12 "regex_any",
13 "regex_none",
14 "regex_matches",
15 "regex_negation_matches",
16 "and",
17 "or",
18}
19
20
21def _uses_known_operators(obj: dict | list) -> bool:
22 """Recursively check that all operator keys are from the known set."""
23 if isinstance(obj, list):
24 return all(
25 _uses_known_operators(item) for item in obj if isinstance(item, dict)
26 )
27 if isinstance(obj, dict):
28 for key, val in obj.items():
29 if key in KNOWN_OPERATORS:
30 if isinstance(val, dict | list):
31 if not _uses_known_operators(val):
32 return False
33 elif key not in ("filter",):
34 return False
35 return True
36 return True
37
38
39async def test_creates_feed_from_description(feed_agent, evaluate_response):
40 """Agent should call create_feed with a jazz-related manifest."""
41 response = await feed_agent.process_mention(
42 "create a feed for posts about jazz music"
43 )
44
45 assert response.action == "reply", f"expected reply, got {response.action}"
46
47 spy = feed_agent.spy
48 assert spy.was_called("create_feed"), "create_feed was not called"
49 assert not spy.was_called("list_feeds"), "list_feeds should not be called"
50
51 call = spy.get_calls("create_feed")[0]
52 manifest = call["filter_manifest"]
53 assert _has_filter_key(manifest), (
54 f"manifest missing 'filter' key: {json.dumps(manifest)}"
55 )
56
57 await evaluate_response(
58 "The filter manifest should contain patterns related to jazz music "
59 "(e.g. 'jazz', 'bebop', 'improvisation', '#jazz'). "
60 "Does it capture the user's intent to find jazz-related posts?",
61 json.dumps(manifest),
62 )
63
64
65async def test_manifest_uses_valid_dsl(feed_agent):
66 """Manifest should only use known graze DSL operators."""
67 await feed_agent.process_mention("make me a feed for machine learning posts")
68
69 spy = feed_agent.spy
70 assert spy.was_called("create_feed"), "create_feed was not called"
71
72 call = spy.get_calls("create_feed")[0]
73 manifest = call["filter_manifest"]
74 assert _has_filter_key(manifest), (
75 f"manifest missing 'filter' key: {json.dumps(manifest)}"
76 )
77 assert _uses_known_operators(manifest), (
78 f"manifest uses unknown operators: {json.dumps(manifest)}"
79 )
80
81
82async def test_complex_description(feed_agent, evaluate_response):
83 """Agent should disambiguate 'rust' (programming language vs game)."""
84 response = await feed_agent.process_mention(
85 "create a feed for rust programming, not the game"
86 )
87
88 assert response.action == "reply", f"expected reply, got {response.action}"
89
90 spy = feed_agent.spy
91 assert spy.was_called("create_feed"), "create_feed was not called"
92
93 call = spy.get_calls("create_feed")[0]
94 manifest = call["filter_manifest"]
95 assert _has_filter_key(manifest), (
96 f"manifest missing 'filter' key: {json.dumps(manifest)}"
97 )
98
99 await evaluate_response(
100 "The filter manifest should make a reasonable attempt to target rust "
101 "programming language content rather than the video game. It passes if "
102 "it includes ANY rust-programming-specific terms (e.g. 'rustlang', "
103 "'cargo', 'crate', '#rustlang', 'systems programming', 'compiler'). "
104 "It does NOT need to be perfect — partial disambiguation is fine.",
105 json.dumps(manifest),
106 )
107
108
109async def test_list_feeds_when_asked(feed_agent):
110 """Asking about existing feeds should call list_feeds, not create_feed."""
111 response = await feed_agent.process_mention("what feeds do you have?")
112
113 spy = feed_agent.spy
114 assert spy.was_called("list_feeds"), "list_feeds was not called"
115 assert not spy.was_called("create_feed"), "create_feed should not be called"
116
117 assert response.action == "reply", f"expected reply, got {response.action}"
118 assert response.text is not None
119 assert "jazz" in response.text.lower() or "rust" in response.text.lower(), (
120 f"response should mention canned feeds: {response.text}"
121 )
122
123
124async def test_no_feed_creation_without_request(feed_agent):
125 """Informational question about feeds should not trigger any feed tools."""
126 await feed_agent.process_mention("what is a bluesky feed?")
127
128 spy = feed_agent.spy
129 assert not spy.was_called("create_feed"), (
130 "create_feed should not be called for an informational question"
131 )
132 assert not spy.was_called("list_feeds"), (
133 "list_feeds should not be called for an informational question"
134 )