a digital entity named phi that roams bsky phi.zzstoatzz.io
2
fork

Configure Feed

Select the types of activity you want to include in your feed.

make embeds visible to phi (images, links, quotes, video)

passes image URLs as ImageUrl to the agent for multimodal vision,
not just alt text descriptions. phi can now literally look at images.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

zzstoatzz 8e865648 66ee3bac

+146 -9
+10 -2
src/bot/agent.py
··· 9 9 10 10 import httpx 11 11 from pydantic import BaseModel 12 - from pydantic_ai import Agent, RunContext 12 + from pydantic_ai import Agent, ImageUrl, RunContext 13 13 from pydantic_ai.mcp import MCPServerStreamableHTTP 14 14 15 15 from bot.config import settings ··· 293 293 author_handle: str, 294 294 thread_context: str, 295 295 thread_uri: str | None = None, 296 + image_urls: list[str] | None = None, 296 297 ) -> Response: 297 298 """Process a mention with structured memory context.""" 298 299 # Build context from memory if available ··· 329 330 prompt_parts.append(f"\n[NEW MESSAGE]:\n@{author_handle}: {mention_text}") 330 331 prompt = "\n\n".join(prompt_parts) 331 332 333 + # Build multimodal prompt if images are present 334 + if image_urls: 335 + user_prompt: str | list = [prompt] + [ImageUrl(url=url) for url in image_urls] 336 + logger.info(f"including {len(image_urls)} images in prompt") 337 + else: 338 + user_prompt = prompt 339 + 332 340 # Run agent with MCP tools + search_memory available 333 341 logger.info(f"processing mention from @{author_handle}: {mention_text[:80]}") 334 342 deps = PhiDeps( ··· 336 344 memory=self.memory, 337 345 thread_uri=thread_uri, 338 346 ) 339 - result = await self.agent.run(prompt, deps=deps) 347 + result = await self.agent.run(user_prompt, deps=deps) 340 348 logger.info(f"agent decided: {result.output.action}" + (f" - {result.output.text[:80]}" if result.output.text else "") + (f" ({result.output.reason})" if result.output.reason else "")) 341 349 342 350 # Store interaction and extract observations
+14 -1
src/bot/services/message_handler.py
··· 7 7 from bot.agent import PhiAgent 8 8 from bot.core.atproto_client import BotClient 9 9 from bot.status import bot_status 10 - from bot.utils.thread import build_thread_context 10 + from bot.utils.thread import build_thread_context, describe_embed, extract_image_urls 11 11 12 12 logger = logging.getLogger("bot.handler") 13 13 ··· 37 37 mention_text = post.record.text 38 38 author_handle = post.author.handle 39 39 40 + # Include embed content (images, links, quote posts) in the mention 41 + embed = post.embed if hasattr(post, "embed") and post.embed else None 42 + if not embed and hasattr(post.record, "embed") and post.record.embed: 43 + embed = post.record.embed 44 + 45 + embed_desc = describe_embed(embed) if embed else None 46 + if embed_desc: 47 + mention_text = f"{mention_text}\n{embed_desc}" 48 + 49 + # Extract image URLs for multimodal vision 50 + image_urls = extract_image_urls(embed) if embed else [] 51 + 40 52 bot_status.record_mention() 41 53 42 54 # Build reply reference ··· 65 77 author_handle=author_handle, 66 78 thread_context=thread_context, 67 79 thread_uri=thread_uri, 80 + image_urls=image_urls, 68 81 ) 69 82 70 83 # Handle response actions
+122 -6
src/bot/utils/thread.py
··· 3 3 from collections.abc import Callable 4 4 5 5 6 + def describe_embed(embed) -> str | None: 7 + """Extract a human-readable description from a post embed. 8 + 9 + Handles images (with alt text), external links, quote posts, 10 + and record-with-media (quote + images). 11 + """ 12 + if embed is None: 13 + return None 14 + 15 + parts: list[str] = [] 16 + py_type = getattr(embed, "py_type", "") 17 + 18 + # images 19 + if "images" in py_type: 20 + for img in getattr(embed, "images", []): 21 + alt = getattr(img, "alt", "").strip() 22 + if alt: 23 + parts.append(f"[image: {alt}]") 24 + else: 25 + parts.append("[image: no alt text]") 26 + 27 + # external link card 28 + elif "external" in py_type: 29 + ext = getattr(embed, "external", None) 30 + if ext: 31 + title = getattr(ext, "title", "") 32 + desc = getattr(ext, "description", "") 33 + uri = getattr(ext, "uri", "") 34 + link_parts = [] 35 + if title: 36 + link_parts.append(title) 37 + if desc: 38 + link_parts.append(desc) 39 + if uri: 40 + link_parts.append(uri) 41 + parts.append(f"[link: {' — '.join(link_parts)}]") 42 + 43 + # quote post 44 + elif py_type == "app.bsky.embed.record#view": 45 + rec = getattr(embed, "record", None) 46 + if rec and hasattr(rec, "value"): 47 + author = getattr(rec, "author", None) 48 + handle = getattr(author, "handle", "?") if author else "?" 49 + text = getattr(rec.value, "text", "") 50 + # Recursively describe embeds on the quoted post 51 + quoted_embeds = getattr(rec, "embeds", None) 52 + inner = "" 53 + if quoted_embeds: 54 + inner_parts = [describe_embed(e) for e in quoted_embeds] 55 + inner = " ".join(p for p in inner_parts if p) 56 + quote_content = text 57 + if inner: 58 + quote_content = f"{text} {inner}" if text else inner 59 + parts.append(f"[quoting @{handle}: {quote_content}]") 60 + 61 + # record with media (quote post + images/video) 62 + elif "record_with_media" in py_type: 63 + media = getattr(embed, "media", None) 64 + if media: 65 + media_desc = describe_embed(media) 66 + if media_desc: 67 + parts.append(media_desc) 68 + rec = getattr(embed, "record", None) 69 + if rec: 70 + rec_desc = describe_embed(rec) 71 + if rec_desc: 72 + parts.append(rec_desc) 73 + 74 + # video 75 + elif "video" in py_type: 76 + alt = getattr(embed, "alt", "") 77 + if alt: 78 + parts.append(f"[video: {alt}]") 79 + else: 80 + parts.append("[video]") 81 + 82 + return " ".join(parts) if parts else None 83 + 84 + 85 + def extract_image_urls(embed) -> list[str]: 86 + """Extract fullsize image URLs from a post embed. 87 + 88 + Returns URLs that can be passed as ImageUrl to a multimodal model. 89 + """ 90 + if embed is None: 91 + return [] 92 + 93 + urls: list[str] = [] 94 + py_type = getattr(embed, "py_type", "") 95 + 96 + if "images" in py_type: 97 + for img in getattr(embed, "images", []): 98 + fullsize = getattr(img, "fullsize", None) 99 + if fullsize: 100 + urls.append(fullsize) 101 + 102 + elif "record_with_media" in py_type: 103 + media = getattr(embed, "media", None) 104 + if media: 105 + urls.extend(extract_image_urls(media)) 106 + 107 + return urls 108 + 109 + 110 + def describe_post(post) -> str: 111 + """Build a full text representation of a post including embeds.""" 112 + handle = post.author.handle 113 + text = post.record.text if hasattr(post.record, "text") else "" 114 + 115 + # Check for embeds on the post view (post.embed) or record (post.record.embed) 116 + embed_desc = None 117 + if hasattr(post, "embed") and post.embed: 118 + embed_desc = describe_embed(post.embed) 119 + elif hasattr(post.record, "embed") and post.record.embed: 120 + embed_desc = describe_embed(post.record.embed) 121 + 122 + if embed_desc: 123 + return f"@{handle}: {text}\n {embed_desc}" if text else f"@{handle}: {embed_desc}" 124 + return f"@{handle}: {text}" if text else f"@{handle}: [no text]" 125 + 126 + 6 127 def traverse_thread( 7 128 thread_node, 8 129 visit: Callable[[any], None], ··· 84 205 if not posts: 85 206 return "No previous messages in this thread." 86 207 87 - messages = [] 88 - for post in posts: 89 - handle = post.author.handle 90 - text = post.record.text if hasattr(post.record, "text") else "[no text]" 91 - messages.append(f"@{handle}: {text}") 92 - 208 + messages = [describe_post(post) for post in posts] 93 209 return "\n".join(messages)