···991010import httpx
1111from pydantic import BaseModel
1212-from pydantic_ai import Agent, RunContext
1212+from pydantic_ai import Agent, ImageUrl, RunContext
1313from pydantic_ai.mcp import MCPServerStreamableHTTP
14141515from bot.config import settings
···293293 author_handle: str,
294294 thread_context: str,
295295 thread_uri: str | None = None,
296296+ image_urls: list[str] | None = None,
296297 ) -> Response:
297298 """Process a mention with structured memory context."""
298299 # Build context from memory if available
···329330 prompt_parts.append(f"\n[NEW MESSAGE]:\n@{author_handle}: {mention_text}")
330331 prompt = "\n\n".join(prompt_parts)
331332333333+ # Build multimodal prompt if images are present
334334+ if image_urls:
335335+ user_prompt: str | list = [prompt] + [ImageUrl(url=url) for url in image_urls]
336336+ logger.info(f"including {len(image_urls)} images in prompt")
337337+ else:
338338+ user_prompt = prompt
339339+332340 # Run agent with MCP tools + search_memory available
333341 logger.info(f"processing mention from @{author_handle}: {mention_text[:80]}")
334342 deps = PhiDeps(
···336344 memory=self.memory,
337345 thread_uri=thread_uri,
338346 )
339339- result = await self.agent.run(prompt, deps=deps)
347347+ result = await self.agent.run(user_prompt, deps=deps)
340348 logger.info(f"agent decided: {result.output.action}" + (f" - {result.output.text[:80]}" if result.output.text else "") + (f" ({result.output.reason})" if result.output.reason else ""))
341349342350 # Store interaction and extract observations
+14-1
src/bot/services/message_handler.py
···77from bot.agent import PhiAgent
88from bot.core.atproto_client import BotClient
99from bot.status import bot_status
1010-from bot.utils.thread import build_thread_context
1010+from bot.utils.thread import build_thread_context, describe_embed, extract_image_urls
11111212logger = logging.getLogger("bot.handler")
1313···3737 mention_text = post.record.text
3838 author_handle = post.author.handle
39394040+ # Include embed content (images, links, quote posts) in the mention
4141+ embed = post.embed if hasattr(post, "embed") and post.embed else None
4242+ if not embed and hasattr(post.record, "embed") and post.record.embed:
4343+ embed = post.record.embed
4444+4545+ embed_desc = describe_embed(embed) if embed else None
4646+ if embed_desc:
4747+ mention_text = f"{mention_text}\n{embed_desc}"
4848+4949+ # Extract image URLs for multimodal vision
5050+ image_urls = extract_image_urls(embed) if embed else []
5151+4052 bot_status.record_mention()
41534254 # Build reply reference
···6577 author_handle=author_handle,
6678 thread_context=thread_context,
6779 thread_uri=thread_uri,
8080+ image_urls=image_urls,
6881 )
69827083 # Handle response actions
+122-6
src/bot/utils/thread.py
···33from collections.abc import Callable
445566+def describe_embed(embed) -> str | None:
77+ """Extract a human-readable description from a post embed.
88+99+ Handles images (with alt text), external links, quote posts,
1010+ and record-with-media (quote + images).
1111+ """
1212+ if embed is None:
1313+ return None
1414+1515+ parts: list[str] = []
1616+ py_type = getattr(embed, "py_type", "")
1717+1818+ # images
1919+ if "images" in py_type:
2020+ for img in getattr(embed, "images", []):
2121+ alt = getattr(img, "alt", "").strip()
2222+ if alt:
2323+ parts.append(f"[image: {alt}]")
2424+ else:
2525+ parts.append("[image: no alt text]")
2626+2727+ # external link card
2828+ elif "external" in py_type:
2929+ ext = getattr(embed, "external", None)
3030+ if ext:
3131+ title = getattr(ext, "title", "")
3232+ desc = getattr(ext, "description", "")
3333+ uri = getattr(ext, "uri", "")
3434+ link_parts = []
3535+ if title:
3636+ link_parts.append(title)
3737+ if desc:
3838+ link_parts.append(desc)
3939+ if uri:
4040+ link_parts.append(uri)
4141+ parts.append(f"[link: {' — '.join(link_parts)}]")
4242+4343+ # quote post
4444+ elif py_type == "app.bsky.embed.record#view":
4545+ rec = getattr(embed, "record", None)
4646+ if rec and hasattr(rec, "value"):
4747+ author = getattr(rec, "author", None)
4848+ handle = getattr(author, "handle", "?") if author else "?"
4949+ text = getattr(rec.value, "text", "")
5050+ # Recursively describe embeds on the quoted post
5151+ quoted_embeds = getattr(rec, "embeds", None)
5252+ inner = ""
5353+ if quoted_embeds:
5454+ inner_parts = [describe_embed(e) for e in quoted_embeds]
5555+ inner = " ".join(p for p in inner_parts if p)
5656+ quote_content = text
5757+ if inner:
5858+ quote_content = f"{text} {inner}" if text else inner
5959+ parts.append(f"[quoting @{handle}: {quote_content}]")
6060+6161+ # record with media (quote post + images/video)
6262+ elif "record_with_media" in py_type:
6363+ media = getattr(embed, "media", None)
6464+ if media:
6565+ media_desc = describe_embed(media)
6666+ if media_desc:
6767+ parts.append(media_desc)
6868+ rec = getattr(embed, "record", None)
6969+ if rec:
7070+ rec_desc = describe_embed(rec)
7171+ if rec_desc:
7272+ parts.append(rec_desc)
7373+7474+ # video
7575+ elif "video" in py_type:
7676+ alt = getattr(embed, "alt", "")
7777+ if alt:
7878+ parts.append(f"[video: {alt}]")
7979+ else:
8080+ parts.append("[video]")
8181+8282+ return " ".join(parts) if parts else None
8383+8484+8585+def extract_image_urls(embed) -> list[str]:
8686+ """Extract fullsize image URLs from a post embed.
8787+8888+ Returns URLs that can be passed as ImageUrl to a multimodal model.
8989+ """
9090+ if embed is None:
9191+ return []
9292+9393+ urls: list[str] = []
9494+ py_type = getattr(embed, "py_type", "")
9595+9696+ if "images" in py_type:
9797+ for img in getattr(embed, "images", []):
9898+ fullsize = getattr(img, "fullsize", None)
9999+ if fullsize:
100100+ urls.append(fullsize)
101101+102102+ elif "record_with_media" in py_type:
103103+ media = getattr(embed, "media", None)
104104+ if media:
105105+ urls.extend(extract_image_urls(media))
106106+107107+ return urls
108108+109109+110110+def describe_post(post) -> str:
111111+ """Build a full text representation of a post including embeds."""
112112+ handle = post.author.handle
113113+ text = post.record.text if hasattr(post.record, "text") else ""
114114+115115+ # Check for embeds on the post view (post.embed) or record (post.record.embed)
116116+ embed_desc = None
117117+ if hasattr(post, "embed") and post.embed:
118118+ embed_desc = describe_embed(post.embed)
119119+ elif hasattr(post.record, "embed") and post.record.embed:
120120+ embed_desc = describe_embed(post.record.embed)
121121+122122+ if embed_desc:
123123+ return f"@{handle}: {text}\n {embed_desc}" if text else f"@{handle}: {embed_desc}"
124124+ return f"@{handle}: {text}" if text else f"@{handle}: [no text]"
125125+126126+6127def traverse_thread(
7128 thread_node,
8129 visit: Callable[[any], None],
···84205 if not posts:
85206 return "No previous messages in this thread."
862078787- messages = []
8888- for post in posts:
8989- handle = post.author.handle
9090- text = post.record.text if hasattr(post.record, "text") else "[no text]"
9191- messages.append(f"@{handle}: {text}")
9292-208208+ messages = [describe_post(post) for post in posts]
93209 return "\n".join(messages)