a digital entity named phi that roams bsky phi.zzstoatzz.io
2
fork

Configure Feed

Select the types of activity you want to include in your feed.

at main 134 lines 4.8 kB view raw
1"""Evals for graze feed creation — does the agent translate natural language into valid filter manifests?""" 2 3import json 4 5 6def _has_filter_key(manifest: dict) -> bool: 7 """Check that the manifest has a top-level 'filter' key.""" 8 return "filter" in manifest 9 10 11KNOWN_OPERATORS = { 12 "regex_any", 13 "regex_none", 14 "regex_matches", 15 "regex_negation_matches", 16 "and", 17 "or", 18} 19 20 21def _uses_known_operators(obj: dict | list) -> bool: 22 """Recursively check that all operator keys are from the known set.""" 23 if isinstance(obj, list): 24 return all( 25 _uses_known_operators(item) for item in obj if isinstance(item, dict) 26 ) 27 if isinstance(obj, dict): 28 for key, val in obj.items(): 29 if key in KNOWN_OPERATORS: 30 if isinstance(val, dict | list): 31 if not _uses_known_operators(val): 32 return False 33 elif key not in ("filter",): 34 return False 35 return True 36 return True 37 38 39async def test_creates_feed_from_description(feed_agent, evaluate_response): 40 """Agent should call create_feed with a jazz-related manifest.""" 41 response = await feed_agent.process_mention( 42 "create a feed for posts about jazz music" 43 ) 44 45 assert response.action == "reply", f"expected reply, got {response.action}" 46 47 spy = feed_agent.spy 48 assert spy.was_called("create_feed"), "create_feed was not called" 49 assert not spy.was_called("list_feeds"), "list_feeds should not be called" 50 51 call = spy.get_calls("create_feed")[0] 52 manifest = call["filter_manifest"] 53 assert _has_filter_key(manifest), ( 54 f"manifest missing 'filter' key: {json.dumps(manifest)}" 55 ) 56 57 await evaluate_response( 58 "The filter manifest should contain patterns related to jazz music " 59 "(e.g. 'jazz', 'bebop', 'improvisation', '#jazz'). " 60 "Does it capture the user's intent to find jazz-related posts?", 61 json.dumps(manifest), 62 ) 63 64 65async def test_manifest_uses_valid_dsl(feed_agent): 66 """Manifest should only use known graze DSL operators.""" 67 await feed_agent.process_mention("make me a feed for machine learning posts") 68 69 spy = feed_agent.spy 70 assert spy.was_called("create_feed"), "create_feed was not called" 71 72 call = spy.get_calls("create_feed")[0] 73 manifest = call["filter_manifest"] 74 assert _has_filter_key(manifest), ( 75 f"manifest missing 'filter' key: {json.dumps(manifest)}" 76 ) 77 assert _uses_known_operators(manifest), ( 78 f"manifest uses unknown operators: {json.dumps(manifest)}" 79 ) 80 81 82async def test_complex_description(feed_agent, evaluate_response): 83 """Agent should disambiguate 'rust' (programming language vs game).""" 84 response = await feed_agent.process_mention( 85 "create a feed for rust programming, not the game" 86 ) 87 88 assert response.action == "reply", f"expected reply, got {response.action}" 89 90 spy = feed_agent.spy 91 assert spy.was_called("create_feed"), "create_feed was not called" 92 93 call = spy.get_calls("create_feed")[0] 94 manifest = call["filter_manifest"] 95 assert _has_filter_key(manifest), ( 96 f"manifest missing 'filter' key: {json.dumps(manifest)}" 97 ) 98 99 await evaluate_response( 100 "The filter manifest should make a reasonable attempt to target rust " 101 "programming language content rather than the video game. It passes if " 102 "it includes ANY rust-programming-specific terms (e.g. 'rustlang', " 103 "'cargo', 'crate', '#rustlang', 'systems programming', 'compiler'). " 104 "It does NOT need to be perfect — partial disambiguation is fine.", 105 json.dumps(manifest), 106 ) 107 108 109async def test_list_feeds_when_asked(feed_agent): 110 """Asking about existing feeds should call list_feeds, not create_feed.""" 111 response = await feed_agent.process_mention("what feeds do you have?") 112 113 spy = feed_agent.spy 114 assert spy.was_called("list_feeds"), "list_feeds was not called" 115 assert not spy.was_called("create_feed"), "create_feed should not be called" 116 117 assert response.action == "reply", f"expected reply, got {response.action}" 118 assert response.text is not None 119 assert "jazz" in response.text.lower() or "rust" in response.text.lower(), ( 120 f"response should mention canned feeds: {response.text}" 121 ) 122 123 124async def test_no_feed_creation_without_request(feed_agent): 125 """Informational question about feeds should not trigger any feed tools.""" 126 await feed_agent.process_mention("what is a bluesky feed?") 127 128 spy = feed_agent.spy 129 assert not spy.was_called("create_feed"), ( 130 "create_feed should not be called for an informational question" 131 ) 132 assert not spy.was_called("list_feeds"), ( 133 "list_feeds should not be called for an informational question" 134 )