make embeds visible to phi (images, links, quotes, video)

+10 -2

src/bot/agent.py

··· 9 9 10 10 import httpx 11 11 from pydantic import BaseModel 12 - from pydantic_ai import Agent, RunContext 12 + from pydantic_ai import Agent, ImageUrl, RunContext 13 13 from pydantic_ai.mcp import MCPServerStreamableHTTP 14 14 15 15 from bot.config import settings ··· 293 293 author_handle: str, 294 294 thread_context: str, 295 295 thread_uri: str | None = None, 296 + image_urls: list[str] | None = None, 296 297 ) -> Response: 297 298 """Process a mention with structured memory context.""" 298 299 # Build context from memory if available ··· 329 330 prompt_parts.append(f"\n[NEW MESSAGE]:\n@{author_handle}: {mention_text}") 330 331 prompt = "\n\n".join(prompt_parts) 331 332 333 + # Build multimodal prompt if images are present 334 + if image_urls: 335 + user_prompt: str | list = [prompt] + [ImageUrl(url=url) for url in image_urls] 336 + logger.info(f"including {len(image_urls)} images in prompt") 337 + else: 338 + user_prompt = prompt 339 + 332 340 # Run agent with MCP tools + search_memory available 333 341 logger.info(f"processing mention from @{author_handle}: {mention_text[:80]}") 334 342 deps = PhiDeps( ··· 336 344 memory=self.memory, 337 345 thread_uri=thread_uri, 338 346 ) 339 - result = await self.agent.run(prompt, deps=deps) 347 + result = await self.agent.run(user_prompt, deps=deps) 340 348 logger.info(f"agent decided: {result.output.action}" + (f" - {result.output.text[:80]}" if result.output.text else "") + (f" ({result.output.reason})" if result.output.reason else "")) 341 349 342 350 # Store interaction and extract observations

+14 -1

src/bot/services/message_handler.py

··· 7 7 from bot.agent import PhiAgent 8 8 from bot.core.atproto_client import BotClient 9 9 from bot.status import bot_status 10 - from bot.utils.thread import build_thread_context 10 + from bot.utils.thread import build_thread_context, describe_embed, extract_image_urls 11 11 12 12 logger = logging.getLogger("bot.handler") 13 13 ··· 37 37 mention_text = post.record.text 38 38 author_handle = post.author.handle 39 39 40 + # Include embed content (images, links, quote posts) in the mention 41 + embed = post.embed if hasattr(post, "embed") and post.embed else None 42 + if not embed and hasattr(post.record, "embed") and post.record.embed: 43 + embed = post.record.embed 44 + 45 + embed_desc = describe_embed(embed) if embed else None 46 + if embed_desc: 47 + mention_text = f"{mention_text}\n{embed_desc}" 48 + 49 + # Extract image URLs for multimodal vision 50 + image_urls = extract_image_urls(embed) if embed else [] 51 + 40 52 bot_status.record_mention() 41 53 42 54 # Build reply reference ··· 65 77 author_handle=author_handle, 66 78 thread_context=thread_context, 67 79 thread_uri=thread_uri, 80 + image_urls=image_urls, 68 81 ) 69 82 70 83 # Handle response actions

+122 -6

src/bot/utils/thread.py

··· 3 3 from collections.abc import Callable 4 4 5 5 6 + def describe_embed(embed) -> str | None: 7 + """Extract a human-readable description from a post embed. 8 + 9 + Handles images (with alt text), external links, quote posts, 10 + and record-with-media (quote + images). 11 + """ 12 + if embed is None: 13 + return None 14 + 15 + parts: list[str] = [] 16 + py_type = getattr(embed, "py_type", "") 17 + 18 + # images 19 + if "images" in py_type: 20 + for img in getattr(embed, "images", []): 21 + alt = getattr(img, "alt", "").strip() 22 + if alt: 23 + parts.append(f"[image: {alt}]") 24 + else: 25 + parts.append("[image: no alt text]") 26 + 27 + # external link card 28 + elif "external" in py_type: 29 + ext = getattr(embed, "external", None) 30 + if ext: 31 + title = getattr(ext, "title", "") 32 + desc = getattr(ext, "description", "") 33 + uri = getattr(ext, "uri", "") 34 + link_parts = [] 35 + if title: 36 + link_parts.append(title) 37 + if desc: 38 + link_parts.append(desc) 39 + if uri: 40 + link_parts.append(uri) 41 + parts.append(f"[link: {' — '.join(link_parts)}]") 42 + 43 + # quote post 44 + elif py_type == "app.bsky.embed.record#view": 45 + rec = getattr(embed, "record", None) 46 + if rec and hasattr(rec, "value"): 47 + author = getattr(rec, "author", None) 48 + handle = getattr(author, "handle", "?") if author else "?" 49 + text = getattr(rec.value, "text", "") 50 + # Recursively describe embeds on the quoted post 51 + quoted_embeds = getattr(rec, "embeds", None) 52 + inner = "" 53 + if quoted_embeds: 54 + inner_parts = [describe_embed(e) for e in quoted_embeds] 55 + inner = " ".join(p for p in inner_parts if p) 56 + quote_content = text 57 + if inner: 58 + quote_content = f"{text} {inner}" if text else inner 59 + parts.append(f"[quoting @{handle}: {quote_content}]") 60 + 61 + # record with media (quote post + images/video) 62 + elif "record_with_media" in py_type: 63 + media = getattr(embed, "media", None) 64 + if media: 65 + media_desc = describe_embed(media) 66 + if media_desc: 67 + parts.append(media_desc) 68 + rec = getattr(embed, "record", None) 69 + if rec: 70 + rec_desc = describe_embed(rec) 71 + if rec_desc: 72 + parts.append(rec_desc) 73 + 74 + # video 75 + elif "video" in py_type: 76 + alt = getattr(embed, "alt", "") 77 + if alt: 78 + parts.append(f"[video: {alt}]") 79 + else: 80 + parts.append("[video]") 81 + 82 + return " ".join(parts) if parts else None 83 + 84 + 85 + def extract_image_urls(embed) -> list[str]: 86 + """Extract fullsize image URLs from a post embed. 87 + 88 + Returns URLs that can be passed as ImageUrl to a multimodal model. 89 + """ 90 + if embed is None: 91 + return [] 92 + 93 + urls: list[str] = [] 94 + py_type = getattr(embed, "py_type", "") 95 + 96 + if "images" in py_type: 97 + for img in getattr(embed, "images", []): 98 + fullsize = getattr(img, "fullsize", None) 99 + if fullsize: 100 + urls.append(fullsize) 101 + 102 + elif "record_with_media" in py_type: 103 + media = getattr(embed, "media", None) 104 + if media: 105 + urls.extend(extract_image_urls(media)) 106 + 107 + return urls 108 + 109 + 110 + def describe_post(post) -> str: 111 + """Build a full text representation of a post including embeds.""" 112 + handle = post.author.handle 113 + text = post.record.text if hasattr(post.record, "text") else "" 114 + 115 + # Check for embeds on the post view (post.embed) or record (post.record.embed) 116 + embed_desc = None 117 + if hasattr(post, "embed") and post.embed: 118 + embed_desc = describe_embed(post.embed) 119 + elif hasattr(post.record, "embed") and post.record.embed: 120 + embed_desc = describe_embed(post.record.embed) 121 + 122 + if embed_desc: 123 + return f"@{handle}: {text}\n {embed_desc}" if text else f"@{handle}: {embed_desc}" 124 + return f"@{handle}: {text}" if text else f"@{handle}: [no text]" 125 + 126 + 6 127 def traverse_thread( 7 128 thread_node, 8 129 visit: Callable[[any], None], ··· 84 205 if not posts: 85 206 return "No previous messages in this thread." 86 207 87 - messages = [] 88 - for post in posts: 89 - handle = post.author.handle 90 - text = post.record.text if hasattr(post.record, "text") else "[no text]" 91 - messages.append(f"@{handle}: {text}") 92 - 208 + messages = [describe_post(post) for post in posts] 93 209 return "\n".join(messages)

Configure Feed

Configure Feed