An Akkoma/Mastodon compatible API bridge that translates Mastodon/Akkoma client API requests into ATProto XRPC calls.
4
fork

Configure Feed

Select the types of activity you want to include in your feed.

refactorslop

fizzAI 5da4cf2d 7f5ff85b

+859 -829
-829
app/convert.py
··· 1 - """Convert Bluesky / AT Protocol data structures to Mastodon API format.""" 2 - 3 - from __future__ import annotations 4 - 5 - import hashlib 6 - import re 7 - import time 8 - from collections import OrderedDict 9 - from datetime import datetime, timezone 10 - from html import escape as html_escape 11 - from typing import Any 12 - 13 - from .atproto import Session, at_uri_to_web_url, encode_id, parse_at_uri 14 - 15 - # --------------------------------------------------------------------------- 16 - # Profile cache – enriches basic profiles (from post authors) with full data 17 - # --------------------------------------------------------------------------- 18 - # akkoma-fe caches the first account object it sees for a user and may never 19 - # re-fetch the full profile. Bluesky post views only include 20 - # ``profileViewBasic`` (no bio, no counts). We keep a lightweight LRU cache 21 - # of the "extra" fields so that every account object we emit is complete. 22 - 23 - _PROFILE_CACHE_MAX = 2000 24 - _PROFILE_CACHE_TTL = 600 # 10 minutes 25 - 26 - 27 - class _ProfileCache: 28 - """LRU cache mapping DID → enrichment dict (description, counts, banner).""" 29 - 30 - def __init__(self, max_size: int = _PROFILE_CACHE_MAX, ttl: int = _PROFILE_CACHE_TTL): 31 - self._cache: OrderedDict[str, tuple[dict[str, Any], float]] = OrderedDict() 32 - self._max_size = max_size 33 - self._ttl = ttl 34 - 35 - def store(self, did: str, profile: dict[str, Any]) -> None: 36 - """Cache enrichment fields from a *full* profile response.""" 37 - enrichment = { 38 - "description": profile.get("description") or "", 39 - "followersCount": profile.get("followersCount", 0), 40 - "followsCount": profile.get("followsCount", 0), 41 - "postsCount": profile.get("postsCount", 0), 42 - "banner": profile.get("banner") or "", 43 - } 44 - if did in self._cache: 45 - del self._cache[did] 46 - if len(self._cache) >= self._max_size: 47 - self._cache.popitem(last=False) 48 - self._cache[did] = (enrichment, time.time()) 49 - 50 - def get(self, did: str) -> dict[str, Any] | None: 51 - """Return cached enrichment dict, or *None* if missing / expired.""" 52 - if did not in self._cache: 53 - return None 54 - data, ts = self._cache[did] 55 - if time.time() - ts > self._ttl: 56 - del self._cache[did] 57 - return None 58 - self._cache.move_to_end(did) 59 - return data 60 - 61 - 62 - _profile_cache = _ProfileCache() 63 - 64 - 65 - def cache_profile(profile: dict[str, Any]) -> None: 66 - """Store a full Bluesky profile in the enrichment cache. 67 - 68 - Call this whenever a ``profileViewDetailed`` is fetched (e.g. getProfile). 69 - """ 70 - did = profile.get("did", "") 71 - if did: 72 - _profile_cache.store(did, profile) 73 - 74 - 75 - def collect_uncached_dids(items: list[dict[str, Any]]) -> list[str]: 76 - """Collect unique, uncached author DIDs from feed items or post views. 77 - 78 - Works with both ``feedViewPost`` items (containing a ``post`` key) and 79 - bare ``postView`` dicts. Returns only DIDs not already in the cache so 80 - callers can batch-fetch full profiles via ``getProfiles``. 81 - """ 82 - seen: set[str] = set() 83 - dids: list[str] = [] 84 - for item in items: 85 - post = item.get("post", item) 86 - did = post.get("author", {}).get("did", "") 87 - if did and did not in seen and _profile_cache.get(did) is None: 88 - seen.add(did) 89 - dids.append(did) 90 - # Also check repost reason author 91 - reason = item.get("reason") 92 - if reason: 93 - rdid = reason.get("by", {}).get("did", "") 94 - if rdid and rdid not in seen and _profile_cache.get(rdid) is None: 95 - seen.add(rdid) 96 - dids.append(rdid) 97 - return dids 98 - 99 - 100 - def _enrich_profile(profile: dict[str, Any]) -> dict[str, Any]: 101 - """Return *profile* with missing fields filled from the cache.""" 102 - did = profile.get("did", "") 103 - if not did: 104 - return profile 105 - # If the profile already has counts, it's a full profile – cache it 106 - if profile.get("followersCount") is not None: 107 - _profile_cache.store(did, profile) 108 - return profile 109 - # Otherwise try to enrich from cache 110 - cached = _profile_cache.get(did) 111 - if cached is None: 112 - return profile 113 - enriched = dict(profile) 114 - enriched.setdefault("description", cached["description"]) 115 - enriched.setdefault("followersCount", cached["followersCount"]) 116 - enriched.setdefault("followsCount", cached["followsCount"]) 117 - enriched.setdefault("postsCount", cached["postsCount"]) 118 - if not enriched.get("banner"): 119 - enriched["banner"] = cached["banner"] 120 - return enriched 121 - 122 - # --------------------------------------------------------------------------- 123 - # Rich text: facets → HTML 124 - # --------------------------------------------------------------------------- 125 - 126 - _NEWLINE_SPLIT = re.compile(r"\n{2,}") 127 - 128 - 129 - def facets_to_html(text: str, facets: list[dict[str, Any]] | None = None) -> str: 130 - """Convert Bluesky post text + facets into Mastodon-style HTML content.""" 131 - if not text: 132 - return "" 133 - if not facets: 134 - return _wrap_paragraphs(html_escape(text, quote=False)) 135 - 136 - text_bytes = text.encode("utf-8") 137 - sorted_facets = sorted(facets, key=lambda f: f["index"]["byteStart"]) 138 - 139 - parts: list[str] = [] 140 - cursor = 0 141 - 142 - for facet in sorted_facets: 143 - start = facet["index"]["byteStart"] 144 - end = facet["index"]["byteEnd"] 145 - 146 - # Text before this facet 147 - if start > cursor: 148 - parts.append(html_escape(text_bytes[cursor:start].decode("utf-8"), quote=False)) 149 - 150 - facet_text = html_escape(text_bytes[start:end].decode("utf-8"), quote=False) 151 - 152 - for feature in facet.get("features", []): 153 - ftype = feature.get("$type", "") 154 - if ftype == "app.bsky.richtext.facet#link": 155 - uri = html_escape(feature["uri"]) 156 - parts.append( 157 - f'<a href="{uri}" rel="nofollow noopener noreferrer" target="_blank">{facet_text}</a>' 158 - ) 159 - break 160 - elif ftype == "app.bsky.richtext.facet#mention": 161 - did = feature.get("did", "") 162 - parts.append( 163 - f'<span class="h-card">' 164 - f'<a href="https://bsky.app/profile/{did}" class="u-url mention">' 165 - f"{facet_text}</a></span>" 166 - ) 167 - break 168 - elif ftype == "app.bsky.richtext.facet#tag": 169 - tag = feature.get("tag", facet_text.lstrip("#")) 170 - parts.append( 171 - f'<a href="https://bsky.app/hashtag/{tag}" class="mention hashtag" ' 172 - f'rel="tag">#<span>{tag}</span></a>' 173 - ) 174 - break 175 - else: 176 - parts.append(facet_text) 177 - 178 - cursor = end 179 - 180 - # Remaining text 181 - if cursor < len(text_bytes): 182 - parts.append(html_escape(text_bytes[cursor:].decode("utf-8"), quote=False)) 183 - 184 - return _wrap_paragraphs("".join(parts)) 185 - 186 - 187 - def _wrap_paragraphs(html: str) -> str: 188 - """Wrap text in ``<p>`` tags, converting line breaks.""" 189 - paragraphs = _NEWLINE_SPLIT.split(html) 190 - if len(paragraphs) > 1: 191 - return "".join(f"<p>{p.replace(chr(10), '<br/>')}</p>" for p in paragraphs if p) 192 - return f"<p>{html.replace(chr(10), '<br/>')}</p>" 193 - 194 - 195 - # --------------------------------------------------------------------------- 196 - # Facet detection for outgoing posts (text → facets) 197 - # --------------------------------------------------------------------------- 198 - 199 - URL_RE = re.compile( 200 - r"https?://[^\s<>\[\]()\"',;!?]*[^\s<>\[\]()\"',;!?.:]" 201 - ) 202 - MENTION_RE = re.compile( 203 - r"(?<!\w)@([a-zA-Z0-9]([a-zA-Z0-9\-]{0,61}[a-zA-Z0-9])?" 204 - r"(\.[a-zA-Z0-9]([a-zA-Z0-9\-]{0,61}[a-zA-Z0-9])?)+)" 205 - ) 206 - TAG_RE = re.compile(r"(?<!\w)#(\w+)", re.UNICODE) 207 - 208 - 209 - async def detect_facets( 210 - text: str, 211 - resolve_handle=None, 212 - ) -> list[dict[str, Any]]: 213 - """Detect links, @mentions, and #hashtags and return Bluesky facets.""" 214 - text_bytes = text.encode("utf-8") 215 - facets: list[dict[str, Any]] = [] 216 - 217 - for m in URL_RE.finditer(text): 218 - bs = len(text[: m.start()].encode("utf-8")) 219 - be = len(text[: m.end()].encode("utf-8")) 220 - facets.append( 221 - { 222 - "index": {"byteStart": bs, "byteEnd": be}, 223 - "features": [ 224 - {"$type": "app.bsky.richtext.facet#link", "uri": m.group()} 225 - ], 226 - } 227 - ) 228 - 229 - for m in MENTION_RE.finditer(text): 230 - handle = m.group(1) 231 - did = None 232 - if resolve_handle: 233 - try: 234 - did = await resolve_handle(handle) 235 - except Exception: 236 - continue 237 - if did: 238 - bs = len(text[: m.start()].encode("utf-8")) 239 - be = len(text[: m.end()].encode("utf-8")) 240 - facets.append( 241 - { 242 - "index": {"byteStart": bs, "byteEnd": be}, 243 - "features": [ 244 - {"$type": "app.bsky.richtext.facet#mention", "did": did} 245 - ], 246 - } 247 - ) 248 - 249 - for m in TAG_RE.finditer(text): 250 - tag = m.group(1) 251 - bs = len(text[: m.start()].encode("utf-8")) 252 - be = len(text[: m.end()].encode("utf-8")) 253 - facets.append( 254 - { 255 - "index": {"byteStart": bs, "byteEnd": be}, 256 - "features": [ 257 - {"$type": "app.bsky.richtext.facet#tag", "tag": tag} 258 - ], 259 - } 260 - ) 261 - 262 - return facets 263 - 264 - 265 - # --------------------------------------------------------------------------- 266 - # Profile → Mastodon Account 267 - # --------------------------------------------------------------------------- 268 - 269 - _DEFAULT_AVATAR = "https://bsky.app/static/default-avatar.png" 270 - _DEFAULT_HEADER = "" 271 - 272 - 273 - def convert_account( 274 - profile: dict[str, Any], 275 - *, 276 - is_self: bool = False, 277 - ) -> dict[str, Any]: 278 - """Convert a Bluesky profile (``profileViewBasic`` / ``profileViewDetailed``) 279 - to a Mastodon Account object with Akkoma/Pleroma extensions.""" 280 - # Enrich basic profiles with cached full-profile data (bio, counts, banner) 281 - profile = _enrich_profile(profile) 282 - 283 - did = profile.get("did", "") 284 - handle = profile.get("handle", "") 285 - display_name = profile.get("displayName") or handle 286 - # Use `or ""` to handle both missing key AND explicit None value 287 - description = profile.get("description") or "" 288 - avatar = profile.get("avatar") or _DEFAULT_AVATAR 289 - banner = profile.get("banner") or _DEFAULT_HEADER 290 - created = profile.get("createdAt") or profile.get("indexedAt") or "1970-01-01T00:00:00.000Z" 291 - 292 - acct = handle if is_self else handle 293 - url = f"https://bsky.app/profile/{handle}" 294 - 295 - account: dict[str, Any] = { 296 - "id": did, 297 - "username": handle, 298 - "acct": acct, 299 - "display_name": display_name, 300 - "locked": False, 301 - "bot": False, 302 - "discoverable": True, 303 - "group": False, 304 - "created_at": created, 305 - "note": facets_to_html(description), 306 - "url": url, 307 - "uri": f"at://{did}", 308 - "avatar": avatar, 309 - "avatar_static": avatar, 310 - "header": banner, 311 - "header_static": banner, 312 - "followers_count": profile.get("followersCount", 0), 313 - "following_count": profile.get("followsCount", 0), 314 - "statuses_count": profile.get("postsCount", 0), 315 - "last_status_at": None, 316 - "emojis": [], 317 - "fields": [], 318 - "fqn": f"{handle}@bsky.social" if "." not in handle else handle, 319 - # ── Akkoma / Pleroma extensions ── 320 - "pleroma": { 321 - "ap_id": url, 322 - "background_image": None, 323 - "confirmation_pending": False, 324 - "tags": [], 325 - "is_admin": False, 326 - "is_moderator": False, 327 - "hide_favorites": True, 328 - "hide_followers": False, 329 - "hide_follows": False, 330 - "hide_followers_count": False, 331 - "hide_follows_count": False, 332 - "relationship": {}, 333 - "skip_thread_containment": False, 334 - "deactivated": False, 335 - "allow_following_move": True, 336 - "unread_conversation_count": 0, 337 - "unread_notifications_count": 0, 338 - "notification_settings": { 339 - "block_from_strangers": False, 340 - "hide_notification_contents": False, 341 - }, 342 - "favicon": None, 343 - "accepts_chat_messages": False, 344 - }, 345 - "akkoma": { 346 - "instance": None, 347 - "status_ttl_days": None, 348 - "permit_followback": False, 349 - }, 350 - } 351 - 352 - # If this is the user's own account, add settings_store to pleroma 353 - if is_self: 354 - account["pleroma"]["settings_store"] = {} 355 - 356 - return account 357 - 358 - 359 - # --------------------------------------------------------------------------- 360 - # Relationship helper 361 - # --------------------------------------------------------------------------- 362 - 363 - 364 - def convert_relationship( 365 - did: str, 366 - viewer: dict[str, Any] | None = None, 367 - ) -> dict[str, Any]: 368 - """Build a Mastodon Relationship object from a Bluesky viewer dict.""" 369 - v = viewer or {} 370 - return { 371 - "id": did, 372 - "following": bool(v.get("following")), 373 - "showing_reblogs": True, 374 - "notifying": False, 375 - "languages": None, 376 - "followed_by": bool(v.get("followedBy")), 377 - "blocking": bool(v.get("blocking")), 378 - "blocked_by": bool(v.get("blockedBy")), 379 - "muting": bool(v.get("muted")), 380 - "muting_notifications": False, 381 - "requested": False, 382 - "requested_by": False, 383 - "domain_blocking": False, 384 - "endorsed": False, 385 - "note": "", 386 - } 387 - 388 - 389 - # --------------------------------------------------------------------------- 390 - # Embed → media_attachments + card 391 - # --------------------------------------------------------------------------- 392 - 393 - 394 - def _convert_images(embed: dict[str, Any]) -> list[dict[str, Any]]: 395 - """Extract media attachments from an ``images#view`` embed.""" 396 - attachments = [] 397 - for idx, img in enumerate(embed.get("images", [])): 398 - attachments.append( 399 - { 400 - "id": str(idx), 401 - "type": "image", 402 - "url": img.get("fullsize", img.get("thumb", "")), 403 - "preview_url": img.get("thumb", img.get("fullsize", "")), 404 - "remote_url": None, 405 - "text_url": None, 406 - "meta": { 407 - "original": { 408 - "width": img.get("aspectRatio", {}).get("width", 0), 409 - "height": img.get("aspectRatio", {}).get("height", 0), 410 - } 411 - }, 412 - "description": img.get("alt", ""), 413 - "blurhash": None, 414 - } 415 - ) 416 - return attachments 417 - 418 - 419 - def _convert_video(embed: dict[str, Any]) -> list[dict[str, Any]]: 420 - """Extract a video attachment from a ``video#view`` embed.""" 421 - playlist = embed.get("playlist", "") 422 - thumb = embed.get("thumbnail", "") 423 - return [ 424 - { 425 - "id": "video", 426 - "type": "video", 427 - "url": playlist, 428 - "preview_url": thumb, 429 - "remote_url": None, 430 - "text_url": None, 431 - "meta": { 432 - "original": { 433 - "width": embed.get("aspectRatio", {}).get("width", 0), 434 - "height": embed.get("aspectRatio", {}).get("height", 0), 435 - } 436 - }, 437 - "description": embed.get("alt", ""), 438 - "blurhash": None, 439 - } 440 - ] 441 - 442 - 443 - def _convert_external_card(embed: dict[str, Any]) -> dict[str, Any] | None: 444 - """Convert an ``external#view`` embed to a Mastodon card.""" 445 - ext = embed.get("external") 446 - if not ext: 447 - return None 448 - return { 449 - "url": ext.get("uri", ""), 450 - "title": ext.get("title", ""), 451 - "description": ext.get("description", ""), 452 - "type": "link", 453 - "image": ext.get("thumb", ""), 454 - "author_name": "", 455 - "author_url": "", 456 - "provider_name": "", 457 - "provider_url": "", 458 - "html": "", 459 - "width": 0, 460 - "height": 0, 461 - "embed_url": "", 462 - "blurhash": "", 463 - } 464 - 465 - 466 - def _extract_embed( 467 - embed: dict[str, Any] | None, 468 - ) -> tuple[list[dict], dict | None, str, dict[str, Any] | None]: 469 - """Return ``(media_attachments, card, extra_html, quote_post)`` from a resolved embed. 470 - 471 - The ``quote_post`` is the raw quoted post view data for later conversion. 472 - """ 473 - if not embed: 474 - return [], None, "", None 475 - 476 - etype = embed.get("$type", "") 477 - media: list[dict[str, Any]] = [] 478 - card: dict[str, Any] | None = None 479 - extra_html = "" 480 - quote_post: dict[str, Any] | None = None 481 - 482 - if etype == "app.bsky.embed.images#view": 483 - media = _convert_images(embed) 484 - elif etype == "app.bsky.embed.video#view": 485 - media = _convert_video(embed) 486 - elif etype == "app.bsky.embed.external#view": 487 - card = _convert_external_card(embed) 488 - elif etype == "app.bsky.embed.record#view": 489 - record = embed.get("record", {}) 490 - # Check if this is a quoted post (not a not-found or blocked post) 491 - if record.get("$type") == "app.bsky.embed.record#viewRecord": 492 - # Build a pseudo-postView from the record view 493 - quote_post = _record_view_to_post_view(record) 494 - elif etype == "app.bsky.embed.recordWithMedia#view": 495 - inner_media = embed.get("media", {}) 496 - inner_type = inner_media.get("$type", "") 497 - if inner_type == "app.bsky.embed.images#view": 498 - media = _convert_images(inner_media) 499 - elif inner_type == "app.bsky.embed.video#view": 500 - media = _convert_video(inner_media) 501 - # Also capture the quoted record 502 - record_embed = embed.get("record", {}) 503 - if record_embed: 504 - _, _, qt_html, qt_post = _extract_embed( 505 - {"$type": "app.bsky.embed.record#view", **record_embed} 506 - ) 507 - extra_html = qt_html 508 - quote_post = qt_post 509 - 510 - return media, card, extra_html, quote_post 511 - 512 - 513 - def _record_view_to_post_view(record: dict[str, Any]) -> dict[str, Any]: 514 - """Convert a ``record#viewRecord`` (from embed) to a pseudo ``postView``. 515 - 516 - This allows quoted posts to be converted via ``convert_status``. 517 - """ 518 - # Extract the record data 519 - rec_value = record.get("value", {}) 520 - 521 - return { 522 - "uri": record.get("uri", ""), 523 - "cid": record.get("cid", ""), 524 - "author": record.get("author", {}), 525 - "record": rec_value, 526 - "embed": record.get("embeds", [None])[0] if record.get("embeds") else None, 527 - "replyCount": record.get("replyCount", 0), 528 - "repostCount": record.get("repostCount", 0), 529 - "likeCount": record.get("likeCount", 0), 530 - "indexedAt": record.get("indexedAt", ""), 531 - "viewer": {}, # No viewer data for embedded posts 532 - "labels": record.get("labels", []), 533 - } 534 - 535 - 536 - # --------------------------------------------------------------------------- 537 - # Post → Mastodon Status 538 - # --------------------------------------------------------------------------- 539 - 540 - 541 - def convert_status( 542 - post_view: dict[str, Any], 543 - *, 544 - session: Session | None = None, 545 - ) -> dict[str, Any]: 546 - """Convert a Bluesky post (``postView``) to a Mastodon Status object.""" 547 - uri = post_view.get("uri", "") 548 - record = post_view.get("record", {}) 549 - author = post_view.get("author", {}) 550 - viewer = post_view.get("viewer", {}) 551 - embed = post_view.get("embed") 552 - 553 - text = record.get("text", "") 554 - facets = record.get("facets") 555 - created_at = record.get("createdAt") or post_view.get("indexedAt", "") 556 - reply_ref = record.get("reply") 557 - langs = record.get("langs", []) 558 - language = langs[0] if langs else None 559 - 560 - # Content HTML 561 - content_html = facets_to_html(text, facets) 562 - media_attachments, card, extra_html, quote_post_data = _extract_embed(embed) 563 - if extra_html: 564 - content_html += extra_html 565 - 566 - # Convert quoted post if present 567 - quote_status = None 568 - if quote_post_data: 569 - quote_status = convert_status(quote_post_data, session=session) 570 - 571 - # Reply info + conversation ID 572 - in_reply_to_id = None 573 - in_reply_to_account_id = None 574 - in_reply_to_account_acct = None 575 - # The conversation root is the thread root URI; for standalone posts it's the post itself 576 - conversation_root = uri 577 - if reply_ref: 578 - parent_uri = reply_ref.get("parent", {}).get("uri", "") 579 - root_uri = reply_ref.get("root", {}).get("uri", "") 580 - if root_uri: 581 - conversation_root = root_uri 582 - if parent_uri: 583 - in_reply_to_id = encode_id(parent_uri) 584 - try: 585 - repo, _, _ = parse_at_uri(parent_uri) 586 - in_reply_to_account_id = repo 587 - in_reply_to_account_acct = repo 588 - except ValueError: 589 - pass 590 - 591 - # Generate a stable numeric-ish conversation ID from the root URI 592 - conversation_id = int(hashlib.sha256(conversation_root.encode()).hexdigest()[:12], 16) 593 - 594 - is_self = session is not None and author.get("did") == session.did 595 - 596 - # Mentions from facets 597 - mentions = [] 598 - if facets: 599 - for f in facets: 600 - for feat in f.get("features", []): 601 - if feat.get("$type") == "app.bsky.richtext.facet#mention": 602 - mentions.append( 603 - { 604 - "id": feat["did"], 605 - "username": feat.get("did", ""), 606 - "url": f"https://bsky.app/profile/{feat['did']}", 607 - "acct": feat.get("did", ""), 608 - } 609 - ) 610 - 611 - # Tags from facets 612 - tags = [] 613 - if facets: 614 - for f in facets: 615 - for feat in f.get("features", []): 616 - if feat.get("$type") == "app.bsky.richtext.facet#tag": 617 - tag = feat.get("tag", "") 618 - tags.append( 619 - { 620 - "name": tag, 621 - "url": f"https://bsky.app/hashtag/{tag}", 622 - } 623 - ) 624 - 625 - # Determine CW / sensitive 626 - labels = post_view.get("labels", []) 627 - sensitive = any( 628 - lbl.get("val") in ("nsfw", "porn", "sexual", "nudity", "graphic-media") 629 - for lbl in labels 630 - ) 631 - spoiler_text = "" 632 - if sensitive: 633 - spoiler_text = "Sensitive content" 634 - 635 - thread_muted = bool(viewer.get("threadMuted")) 636 - 637 - return { 638 - "id": encode_id(uri), 639 - "created_at": created_at, 640 - "in_reply_to_id": in_reply_to_id, 641 - "in_reply_to_account_id": in_reply_to_account_id, 642 - "sensitive": sensitive, 643 - "spoiler_text": spoiler_text, 644 - "visibility": "public", 645 - "language": language, 646 - "uri": uri, 647 - "url": at_uri_to_web_url(uri), 648 - "replies_count": post_view.get("replyCount", 0), 649 - "reblogs_count": post_view.get("repostCount", 0), 650 - "favourites_count": post_view.get("likeCount", 0), 651 - "favourited": bool(viewer.get("like")), 652 - "reblogged": bool(viewer.get("repost")), 653 - "muted": thread_muted, 654 - "bookmarked": False, 655 - "pinned": False, 656 - "text": text, 657 - "content": content_html, 658 - "reblog": None, 659 - "application": {"name": "Bluesky", "website": "https://bsky.app"}, 660 - "account": convert_account(author, is_self=is_self), 661 - "media_attachments": media_attachments, 662 - "mentions": mentions, 663 - "tags": tags, 664 - "emojis": [], 665 - "card": card, 666 - "poll": None, 667 - "emoji_reactions": [], 668 - # ── Akkoma / Pleroma extensions ── 669 - "pleroma": { 670 - "local": False, 671 - "conversation_id": conversation_id, 672 - "direct_conversation_id": None, 673 - "in_reply_to_account_acct": in_reply_to_account_acct, 674 - "content": {"text/plain": text}, 675 - "spoiler_text": {"text/plain": spoiler_text}, 676 - "expires_at": None, 677 - "thread_muted": thread_muted, 678 - "emoji_reactions": [], 679 - "parent_visible": True, 680 - "pinned_at": None, 681 - }, 682 - # ── Akkoma quote post extension ── 683 - "quote": quote_status, 684 - } 685 - 686 - 687 - # --------------------------------------------------------------------------- 688 - # Feed view (with possible repost reason) → Mastodon Status 689 - # --------------------------------------------------------------------------- 690 - 691 - 692 - def _iso_to_tid_int(iso_dt: str, extra: str = "") -> str: 693 - """Convert an ISO 8601 timestamp to a TID-scale numeric string. 694 - 695 - TIDs encode microseconds-since-epoch in the upper 53 bits of a 64-bit 696 - value (shifted left by 10). We replicate that scale here so that the 697 - resulting numeric string sorts chronologically alongside real TID-based 698 - IDs produced by :func:`encode_id`. 699 - 700 - ``extra`` is hashed to fill the lower 10 bits, providing uniqueness when 701 - multiple reposts share the same second-level timestamp. 702 - """ 703 - try: 704 - dt = datetime.fromisoformat(iso_dt.replace("Z", "+00:00")) 705 - except (ValueError, AttributeError): 706 - dt = datetime.now(timezone.utc) 707 - us = int(dt.timestamp() * 1_000_000) 708 - # Lower 10 bits from a hash of extra data to avoid collisions 709 - low = int(hashlib.sha256(extra.encode()).hexdigest()[:4], 16) & 0x3FF 710 - return str((us << 10) | low) 711 - 712 - 713 - def convert_feed_item( 714 - item: dict[str, Any], 715 - *, 716 - session: Session | None = None, 717 - ) -> dict[str, Any]: 718 - """Convert a ``feedViewPost`` (which may be a repost) to a Status.""" 719 - post_data = item.get("post", item) 720 - reason = item.get("reason") 721 - 722 - status = convert_status(post_data, session=session) 723 - 724 - if reason and reason.get("$type") == "app.bsky.feed.defs#reasonRepost": 725 - reblogger = reason.get("by", {}) 726 - repost_time = reason.get("indexedAt", status["created_at"]) 727 - 728 - # Generate a chronologically-sortable numeric ID from the repost 729 - # timestamp so that Mastodon/Akkoma clients order it correctly 730 - # alongside regular TID-based post IDs. 731 - extra = f"{reblogger.get('did', '')}:{post_data.get('uri', '')}" 732 - wrapper_id = _iso_to_tid_int(repost_time, extra) 733 - 734 - is_self_reblog = session is not None and reblogger.get("did") == session.did 735 - wrapper = { 736 - **status, 737 - "id": wrapper_id, 738 - "created_at": repost_time, 739 - "account": convert_account(reblogger, is_self=is_self_reblog), 740 - "reblog": status, 741 - "content": "", 742 - "text": "", 743 - "media_attachments": [], 744 - "mentions": [], 745 - "tags": [], 746 - "card": None, 747 - } 748 - wrapper["reblogged"] = True 749 - return wrapper 750 - 751 - return status 752 - 753 - 754 - # --------------------------------------------------------------------------- 755 - # Notification → Mastodon Notification 756 - # --------------------------------------------------------------------------- 757 - 758 - _REASON_MAP = { 759 - "like": "favourite", 760 - "repost": "reblog", 761 - "follow": "follow", 762 - "mention": "mention", 763 - "reply": "mention", 764 - "quote": "mention", 765 - } 766 - 767 - 768 - def convert_notification( 769 - notif: dict[str, Any], 770 - *, 771 - posts_by_uri: dict[str, dict] | None = None, 772 - session: Session | None = None, 773 - ) -> dict[str, Any] | None: 774 - """Convert a Bluesky notification to a Mastodon Notification 775 - with Akkoma/Pleroma extensions.""" 776 - reason = notif.get("reason", "") 777 - masto_type = _REASON_MAP.get(reason) 778 - if not masto_type: 779 - return None 780 - 781 - author = notif.get("author", {}) 782 - indexed_at = notif.get("indexedAt", "") 783 - is_read = notif.get("isRead", False) 784 - 785 - result: dict[str, Any] = { 786 - "id": encode_id(notif.get("uri") or indexed_at), 787 - "type": masto_type, 788 - "created_at": indexed_at, 789 - "account": convert_account(author), 790 - # ── Akkoma / Pleroma extension ── 791 - "pleroma": { 792 - "is_seen": is_read, 793 - }, 794 - } 795 - 796 - # Attach the relevant status where applicable 797 - if masto_type in ("favourite", "reblog"): 798 - subject_uri = notif.get("reasonSubject", "") 799 - if subject_uri and posts_by_uri and subject_uri in posts_by_uri: 800 - result["status"] = convert_status( 801 - posts_by_uri[subject_uri], session=session 802 - ) 803 - else: 804 - result["status"] = None 805 - elif masto_type == "mention": 806 - # The notification record itself is the post 807 - record = notif.get("record", {}) 808 - if record.get("$type") == "app.bsky.feed.post": 809 - # Build a minimal post_view 810 - pseudo_post = { 811 - "uri": notif.get("uri", ""), 812 - "cid": notif.get("cid", ""), 813 - "author": author, 814 - "record": record, 815 - "embed": None, 816 - "replyCount": 0, 817 - "repostCount": 0, 818 - "likeCount": 0, 819 - "indexedAt": indexed_at, 820 - "viewer": {}, 821 - "labels": notif.get("labels", []), 822 - } 823 - result["status"] = convert_status(pseudo_post, session=session) 824 - else: 825 - result["status"] = None 826 - elif masto_type == "follow": 827 - result["status"] = None 828 - 829 - return result
+28
app/convert/__init__.py
··· 1 + """Convert Bluesky / AT Protocol data structures to Mastodon API format.""" 2 + 3 + from .profile_cache import cache_profile, collect_uncached_dids, _enrich_profile, _profile_cache 4 + from .richtext import facets_to_html, detect_facets, URL_RE, MENTION_RE, TAG_RE 5 + from .account import convert_account 6 + from .relationship import convert_relationship 7 + from .status import convert_status 8 + from .feed import convert_feed_item, _iso_to_tid_int 9 + from .notification import convert_notification 10 + 11 + __all__ = [ 12 + "cache_profile", 13 + "collect_uncached_dids", 14 + "facets_to_html", 15 + "detect_facets", 16 + "convert_account", 17 + "convert_relationship", 18 + "convert_status", 19 + "convert_feed_item", 20 + "convert_notification", 21 + # Private symbols exported for backward compatibility with tests 22 + "_enrich_profile", 23 + "_profile_cache", 24 + "_iso_to_tid_int", 25 + "URL_RE", 26 + "MENTION_RE", 27 + "TAG_RE", 28 + ]
+97
app/convert/account.py
··· 1 + """Convert Bluesky profiles to Mastodon Account objects.""" 2 + 3 + from __future__ import annotations 4 + 5 + from typing import Any 6 + 7 + from .profile_cache import _enrich_profile 8 + from .richtext import facets_to_html 9 + 10 + _DEFAULT_AVATAR = "https://bsky.app/static/default-avatar.png" 11 + _DEFAULT_HEADER = "" 12 + 13 + 14 + def convert_account( 15 + profile: dict[str, Any], 16 + *, 17 + is_self: bool = False, 18 + ) -> dict[str, Any]: 19 + """Convert a Bluesky profile (``profileViewBasic`` / ``profileViewDetailed``) 20 + to a Mastodon Account object with Akkoma/Pleroma extensions.""" 21 + # Enrich basic profiles with cached full-profile data (bio, counts, banner) 22 + profile = _enrich_profile(profile) 23 + 24 + did = profile.get("did", "") 25 + handle = profile.get("handle", "") 26 + display_name = profile.get("displayName") or handle 27 + # Use `or ""` to handle both missing key AND explicit None value 28 + description = profile.get("description") or "" 29 + avatar = profile.get("avatar") or _DEFAULT_AVATAR 30 + banner = profile.get("banner") or _DEFAULT_HEADER 31 + created = profile.get("createdAt") or profile.get("indexedAt") or "1970-01-01T00:00:00.000Z" 32 + 33 + acct = handle if is_self else handle 34 + url = f"https://bsky.app/profile/{handle}" 35 + 36 + account: dict[str, Any] = { 37 + "id": did, 38 + "username": handle, 39 + "acct": acct, 40 + "display_name": display_name, 41 + "locked": False, 42 + "bot": False, 43 + "discoverable": True, 44 + "group": False, 45 + "created_at": created, 46 + "note": facets_to_html(description), 47 + "url": url, 48 + "uri": f"at://{did}", 49 + "avatar": avatar, 50 + "avatar_static": avatar, 51 + "header": banner, 52 + "header_static": banner, 53 + "followers_count": profile.get("followersCount", 0), 54 + "following_count": profile.get("followsCount", 0), 55 + "statuses_count": profile.get("postsCount", 0), 56 + "last_status_at": None, 57 + "emojis": [], 58 + "fields": [], 59 + "fqn": f"{handle}@bsky.social" if "." not in handle else handle, 60 + # ── Akkoma / Pleroma extensions ── 61 + "pleroma": { 62 + "ap_id": url, 63 + "background_image": None, 64 + "confirmation_pending": False, 65 + "tags": [], 66 + "is_admin": False, 67 + "is_moderator": False, 68 + "hide_favorites": True, 69 + "hide_followers": False, 70 + "hide_follows": False, 71 + "hide_followers_count": False, 72 + "hide_follows_count": False, 73 + "relationship": {}, 74 + "skip_thread_containment": False, 75 + "deactivated": False, 76 + "allow_following_move": True, 77 + "unread_conversation_count": 0, 78 + "unread_notifications_count": 0, 79 + "notification_settings": { 80 + "block_from_strangers": False, 81 + "hide_notification_contents": False, 82 + }, 83 + "favicon": None, 84 + "accepts_chat_messages": False, 85 + }, 86 + "akkoma": { 87 + "instance": None, 88 + "status_ttl_days": None, 89 + "permit_followback": False, 90 + }, 91 + } 92 + 93 + # If this is the user's own account, add settings_store to pleroma 94 + if is_self: 95 + account["pleroma"]["settings_store"] = {} 96 + 97 + return account
+147
app/convert/embed.py
··· 1 + """Convert Bluesky embeds to Mastodon media attachments and cards.""" 2 + 3 + from __future__ import annotations 4 + 5 + from typing import Any 6 + 7 + 8 + def _convert_images(embed: dict[str, Any]) -> list[dict[str, Any]]: 9 + """Extract media attachments from an ``images#view`` embed.""" 10 + attachments = [] 11 + for idx, img in enumerate(embed.get("images", [])): 12 + attachments.append( 13 + { 14 + "id": str(idx), 15 + "type": "image", 16 + "url": img.get("fullsize", img.get("thumb", "")), 17 + "preview_url": img.get("thumb", img.get("fullsize", "")), 18 + "remote_url": None, 19 + "text_url": None, 20 + "meta": { 21 + "original": { 22 + "width": img.get("aspectRatio", {}).get("width", 0), 23 + "height": img.get("aspectRatio", {}).get("height", 0), 24 + } 25 + }, 26 + "description": img.get("alt", ""), 27 + "blurhash": None, 28 + } 29 + ) 30 + return attachments 31 + 32 + 33 + def _convert_video(embed: dict[str, Any]) -> list[dict[str, Any]]: 34 + """Extract a video attachment from a ``video#view`` embed.""" 35 + playlist = embed.get("playlist", "") 36 + thumb = embed.get("thumbnail", "") 37 + return [ 38 + { 39 + "id": "video", 40 + "type": "video", 41 + "url": playlist, 42 + "preview_url": thumb, 43 + "remote_url": None, 44 + "text_url": None, 45 + "meta": { 46 + "original": { 47 + "width": embed.get("aspectRatio", {}).get("width", 0), 48 + "height": embed.get("aspectRatio", {}).get("height", 0), 49 + } 50 + }, 51 + "description": embed.get("alt", ""), 52 + "blurhash": None, 53 + } 54 + ] 55 + 56 + 57 + def _convert_external_card(embed: dict[str, Any]) -> dict[str, Any] | None: 58 + """Convert an ``external#view`` embed to a Mastodon card.""" 59 + ext = embed.get("external") 60 + if not ext: 61 + return None 62 + return { 63 + "url": ext.get("uri", ""), 64 + "title": ext.get("title", ""), 65 + "description": ext.get("description", ""), 66 + "type": "link", 67 + "image": ext.get("thumb", ""), 68 + "author_name": "", 69 + "author_url": "", 70 + "provider_name": "", 71 + "provider_url": "", 72 + "html": "", 73 + "width": 0, 74 + "height": 0, 75 + "embed_url": "", 76 + "blurhash": "", 77 + } 78 + 79 + 80 + def _record_view_to_post_view(record: dict[str, Any]) -> dict[str, Any]: 81 + """Convert a ``record#viewRecord`` (from embed) to a pseudo ``postView``. 82 + 83 + This allows quoted posts to be converted via ``convert_status``. 84 + """ 85 + # Extract the record data 86 + rec_value = record.get("value", {}) 87 + 88 + return { 89 + "uri": record.get("uri", ""), 90 + "cid": record.get("cid", ""), 91 + "author": record.get("author", {}), 92 + "record": rec_value, 93 + "embed": record.get("embeds", [None])[0] if record.get("embeds") else None, 94 + "replyCount": record.get("replyCount", 0), 95 + "repostCount": record.get("repostCount", 0), 96 + "likeCount": record.get("likeCount", 0), 97 + "indexedAt": record.get("indexedAt", ""), 98 + "viewer": {}, # No viewer data for embedded posts 99 + "labels": record.get("labels", []), 100 + } 101 + 102 + 103 + def _extract_embed( 104 + embed: dict[str, Any] | None, 105 + ) -> tuple[list[dict], dict | None, str, dict[str, Any] | None]: 106 + """Return ``(media_attachments, card, extra_html, quote_post)`` from a resolved embed. 107 + 108 + The ``quote_post`` is the raw quoted post view data for later conversion. 109 + """ 110 + if not embed: 111 + return [], None, "", None 112 + 113 + etype = embed.get("$type", "") 114 + media: list[dict[str, Any]] = [] 115 + card: dict[str, Any] | None = None 116 + extra_html = "" 117 + quote_post: dict[str, Any] | None = None 118 + 119 + if etype == "app.bsky.embed.images#view": 120 + media = _convert_images(embed) 121 + elif etype == "app.bsky.embed.video#view": 122 + media = _convert_video(embed) 123 + elif etype == "app.bsky.embed.external#view": 124 + card = _convert_external_card(embed) 125 + elif etype == "app.bsky.embed.record#view": 126 + record = embed.get("record", {}) 127 + # Check if this is a quoted post (not a not-found or blocked post) 128 + if record.get("$type") == "app.bsky.embed.record#viewRecord": 129 + # Build a pseudo-postView from the record view 130 + quote_post = _record_view_to_post_view(record) 131 + elif etype == "app.bsky.embed.recordWithMedia#view": 132 + inner_media = embed.get("media", {}) 133 + inner_type = inner_media.get("$type", "") 134 + if inner_type == "app.bsky.embed.images#view": 135 + media = _convert_images(inner_media) 136 + elif inner_type == "app.bsky.embed.video#view": 137 + media = _convert_video(inner_media) 138 + # Also capture the quoted record 139 + record_embed = embed.get("record", {}) 140 + if record_embed: 141 + _, _, qt_html, qt_post = _extract_embed( 142 + {"$type": "app.bsky.embed.record#view", **record_embed} 143 + ) 144 + extra_html = qt_html 145 + quote_post = qt_post 146 + 147 + return media, card, extra_html, quote_post
+73
app/convert/feed.py
··· 1 + """Convert Bluesky feed items to Mastodon Status objects (including reposts).""" 2 + 3 + from __future__ import annotations 4 + 5 + import hashlib 6 + from datetime import datetime, timezone 7 + from typing import Any 8 + 9 + from ..atproto import Session 10 + from .account import convert_account 11 + from .status import convert_status 12 + 13 + 14 + def _iso_to_tid_int(iso_dt: str, extra: str = "") -> str: 15 + """Convert an ISO 8601 timestamp to a TID-scale numeric string. 16 + 17 + TIDs encode microseconds-since-epoch in the upper 53 bits of a 64-bit 18 + value (shifted left by 10). We replicate that scale here so that the 19 + resulting numeric string sorts chronologically alongside real TID-based 20 + IDs produced by :func:`encode_id`. 21 + 22 + ``extra`` is hashed to fill the lower 10 bits, providing uniqueness when 23 + multiple reposts share the same second-level timestamp. 24 + """ 25 + try: 26 + dt = datetime.fromisoformat(iso_dt.replace("Z", "+00:00")) 27 + except (ValueError, AttributeError): 28 + dt = datetime.now(timezone.utc) 29 + us = int(dt.timestamp() * 1_000_000) 30 + # Lower 10 bits from a hash of extra data to avoid collisions 31 + low = int(hashlib.sha256(extra.encode()).hexdigest()[:4], 16) & 0x3FF 32 + return str((us << 10) | low) 33 + 34 + 35 + def convert_feed_item( 36 + item: dict[str, Any], 37 + *, 38 + session: Session | None = None, 39 + ) -> dict[str, Any]: 40 + """Convert a ``feedViewPost`` (which may be a repost) to a Status.""" 41 + post_data = item.get("post", item) 42 + reason = item.get("reason") 43 + 44 + status = convert_status(post_data, session=session) 45 + 46 + if reason and reason.get("$type") == "app.bsky.feed.defs#reasonRepost": 47 + reblogger = reason.get("by", {}) 48 + repost_time = reason.get("indexedAt", status["created_at"]) 49 + 50 + # Generate a chronologically-sortable numeric ID from the repost 51 + # timestamp so that Mastodon/Akkoma clients order it correctly 52 + # alongside regular TID-based post IDs. 53 + extra = f"{reblogger.get('did', '')}:{post_data.get('uri', '')}" 54 + wrapper_id = _iso_to_tid_int(repost_time, extra) 55 + 56 + is_self_reblog = session is not None and reblogger.get("did") == session.did 57 + wrapper = { 58 + **status, 59 + "id": wrapper_id, 60 + "created_at": repost_time, 61 + "account": convert_account(reblogger, is_self=is_self_reblog), 62 + "reblog": status, 63 + "content": "", 64 + "text": "", 65 + "media_attachments": [], 66 + "mentions": [], 67 + "tags": [], 68 + "card": None, 69 + } 70 + wrapper["reblogged"] = True 71 + return wrapper 72 + 73 + return status
+82
app/convert/notification.py
··· 1 + """Convert Bluesky notifications to Mastodon Notification objects.""" 2 + 3 + from __future__ import annotations 4 + 5 + from typing import Any 6 + 7 + from ..atproto import Session, encode_id 8 + from .account import convert_account 9 + from .status import convert_status 10 + 11 + _REASON_MAP = { 12 + "like": "favourite", 13 + "repost": "reblog", 14 + "follow": "follow", 15 + "mention": "mention", 16 + "reply": "mention", 17 + "quote": "mention", 18 + } 19 + 20 + 21 + def convert_notification( 22 + notif: dict[str, Any], 23 + *, 24 + posts_by_uri: dict[str, dict] | None = None, 25 + session: Session | None = None, 26 + ) -> dict[str, Any] | None: 27 + """Convert a Bluesky notification to a Mastodon Notification 28 + with Akkoma/Pleroma extensions.""" 29 + reason = notif.get("reason", "") 30 + masto_type = _REASON_MAP.get(reason) 31 + if not masto_type: 32 + return None 33 + 34 + author = notif.get("author", {}) 35 + indexed_at = notif.get("indexedAt", "") 36 + is_read = notif.get("isRead", False) 37 + 38 + result: dict[str, Any] = { 39 + "id": encode_id(notif.get("uri") or indexed_at), 40 + "type": masto_type, 41 + "created_at": indexed_at, 42 + "account": convert_account(author), 43 + # ── Akkoma / Pleroma extension ── 44 + "pleroma": { 45 + "is_seen": is_read, 46 + }, 47 + } 48 + 49 + # Attach the relevant status where applicable 50 + if masto_type in ("favourite", "reblog"): 51 + subject_uri = notif.get("reasonSubject", "") 52 + if subject_uri and posts_by_uri and subject_uri in posts_by_uri: 53 + result["status"] = convert_status( 54 + posts_by_uri[subject_uri], session=session 55 + ) 56 + else: 57 + result["status"] = None 58 + elif masto_type == "mention": 59 + # The notification record itself is the post 60 + record = notif.get("record", {}) 61 + if record.get("$type") == "app.bsky.feed.post": 62 + # Build a minimal post_view 63 + pseudo_post = { 64 + "uri": notif.get("uri", ""), 65 + "cid": notif.get("cid", ""), 66 + "author": author, 67 + "record": record, 68 + "embed": None, 69 + "replyCount": 0, 70 + "repostCount": 0, 71 + "likeCount": 0, 72 + "indexedAt": indexed_at, 73 + "viewer": {}, 74 + "labels": notif.get("labels", []), 75 + } 76 + result["status"] = convert_status(pseudo_post, session=session) 77 + else: 78 + result["status"] = None 79 + elif masto_type == "follow": 80 + result["status"] = None 81 + 82 + return result
+106
app/convert/profile_cache.py
··· 1 + """Profile cache for enriching basic Bluesky profiles with full data.""" 2 + 3 + from __future__ import annotations 4 + 5 + import time 6 + from collections import OrderedDict 7 + from typing import Any 8 + 9 + _PROFILE_CACHE_MAX = 2000 10 + _PROFILE_CACHE_TTL = 600 # 10 minutes 11 + 12 + 13 + class _ProfileCache: 14 + """LRU cache mapping DID → enrichment dict (description, counts, banner).""" 15 + 16 + def __init__(self, max_size: int = _PROFILE_CACHE_MAX, ttl: int = _PROFILE_CACHE_TTL): 17 + self._cache: OrderedDict[str, tuple[dict[str, Any], float]] = OrderedDict() 18 + self._max_size = max_size 19 + self._ttl = ttl 20 + 21 + def store(self, did: str, profile: dict[str, Any]) -> None: 22 + """Cache enrichment fields from a *full* profile response.""" 23 + enrichment = { 24 + "description": profile.get("description") or "", 25 + "followersCount": profile.get("followersCount", 0), 26 + "followsCount": profile.get("followsCount", 0), 27 + "postsCount": profile.get("postsCount", 0), 28 + "banner": profile.get("banner") or "", 29 + } 30 + if did in self._cache: 31 + del self._cache[did] 32 + if len(self._cache) >= self._max_size: 33 + self._cache.popitem(last=False) 34 + self._cache[did] = (enrichment, time.time()) 35 + 36 + def get(self, did: str) -> dict[str, Any] | None: 37 + """Return cached enrichment dict, or *None* if missing / expired.""" 38 + if did not in self._cache: 39 + return None 40 + data, ts = self._cache[did] 41 + if time.time() - ts > self._ttl: 42 + del self._cache[did] 43 + return None 44 + self._cache.move_to_end(did) 45 + return data 46 + 47 + 48 + _profile_cache = _ProfileCache() 49 + 50 + 51 + def cache_profile(profile: dict[str, Any]) -> None: 52 + """Store a full Bluesky profile in the enrichment cache. 53 + 54 + Call this whenever a ``profileViewDetailed`` is fetched (e.g. getProfile). 55 + """ 56 + did = profile.get("did", "") 57 + if did: 58 + _profile_cache.store(did, profile) 59 + 60 + 61 + def collect_uncached_dids(items: list[dict[str, Any]]) -> list[str]: 62 + """Collect unique, uncached author DIDs from feed items or post views. 63 + 64 + Works with both ``feedViewPost`` items (containing a ``post`` key) and 65 + bare ``postView`` dicts. Returns only DIDs not already in the cache so 66 + callers can batch-fetch full profiles via ``getProfiles``. 67 + """ 68 + seen: set[str] = set() 69 + dids: list[str] = [] 70 + for item in items: 71 + post = item.get("post", item) 72 + did = post.get("author", {}).get("did", "") 73 + if did and did not in seen and _profile_cache.get(did) is None: 74 + seen.add(did) 75 + dids.append(did) 76 + # Also check repost reason author 77 + reason = item.get("reason") 78 + if reason: 79 + rdid = reason.get("by", {}).get("did", "") 80 + if rdid and rdid not in seen and _profile_cache.get(rdid) is None: 81 + seen.add(rdid) 82 + dids.append(rdid) 83 + return dids 84 + 85 + 86 + def _enrich_profile(profile: dict[str, Any]) -> dict[str, Any]: 87 + """Return *profile* with missing fields filled from the cache.""" 88 + did = profile.get("did", "") 89 + if not did: 90 + return profile 91 + # If the profile already has counts, it's a full profile – cache it 92 + if profile.get("followersCount") is not None: 93 + _profile_cache.store(did, profile) 94 + return profile 95 + # Otherwise try to enrich from cache 96 + cached = _profile_cache.get(did) 97 + if cached is None: 98 + return profile 99 + enriched = dict(profile) 100 + enriched.setdefault("description", cached["description"]) 101 + enriched.setdefault("followersCount", cached["followersCount"]) 102 + enriched.setdefault("followsCount", cached["followsCount"]) 103 + enriched.setdefault("postsCount", cached["postsCount"]) 104 + if not enriched.get("banner"): 105 + enriched["banner"] = cached["banner"] 106 + return enriched
+30
app/convert/relationship.py
··· 1 + """Convert Bluesky relationship data to Mastodon Relationship objects.""" 2 + 3 + from __future__ import annotations 4 + 5 + from typing import Any 6 + 7 + 8 + def convert_relationship( 9 + did: str, 10 + viewer: dict[str, Any] | None = None, 11 + ) -> dict[str, Any]: 12 + """Build a Mastodon Relationship object from a Bluesky viewer dict.""" 13 + v = viewer or {} 14 + return { 15 + "id": did, 16 + "following": bool(v.get("following")), 17 + "showing_reblogs": True, 18 + "notifying": False, 19 + "languages": None, 20 + "followed_by": bool(v.get("followedBy")), 21 + "blocking": bool(v.get("blocking")), 22 + "blocked_by": bool(v.get("blockedBy")), 23 + "muting": bool(v.get("muted")), 24 + "muting_notifications": False, 25 + "requested": False, 26 + "requested_by": False, 27 + "domain_blocking": False, 28 + "endorsed": False, 29 + "note": "", 30 + }
+139
app/convert/richtext.py
··· 1 + """Rich text conversion between Bluesky facets and HTML.""" 2 + 3 + from __future__ import annotations 4 + 5 + import re 6 + from html import escape as html_escape 7 + from typing import Any 8 + 9 + _NEWLINE_SPLIT = re.compile(r"\n{2,}") 10 + URL_RE = re.compile( 11 + r"https?://[^\s<>\[\]()\"',;!?]*[^\s<>\[\]()\"',;!?.:]" 12 + ) 13 + MENTION_RE = re.compile( 14 + r"(?<!\w)@([a-zA-Z0-9]([a-zA-Z0-9\-]{0,61}[a-zA-Z0-9])?" 15 + r"(\.[a-zA-Z0-9]([a-zA-Z0-9\-]{0,61}[a-zA-Z0-9])?)+)" 16 + ) 17 + TAG_RE = re.compile(r"(?<!\w)#(\w+)", re.UNICODE) 18 + 19 + 20 + def facets_to_html(text: str, facets: list[dict[str, Any]] | None = None) -> str: 21 + """Convert Bluesky post text + facets into Mastodon-style HTML content.""" 22 + if not text: 23 + return "" 24 + if not facets: 25 + return _wrap_paragraphs(html_escape(text, quote=False)) 26 + 27 + text_bytes = text.encode("utf-8") 28 + sorted_facets = sorted(facets, key=lambda f: f["index"]["byteStart"]) 29 + 30 + parts: list[str] = [] 31 + cursor = 0 32 + 33 + for facet in sorted_facets: 34 + start = facet["index"]["byteStart"] 35 + end = facet["index"]["byteEnd"] 36 + 37 + # Text before this facet 38 + if start > cursor: 39 + parts.append(html_escape(text_bytes[cursor:start].decode("utf-8"), quote=False)) 40 + 41 + facet_text = html_escape(text_bytes[start:end].decode("utf-8"), quote=False) 42 + 43 + for feature in facet.get("features", []): 44 + ftype = feature.get("$type", "") 45 + if ftype == "app.bsky.richtext.facet#link": 46 + uri = html_escape(feature["uri"]) 47 + parts.append( 48 + f'<a href="{uri}" rel="nofollow noopener noreferrer" target="_blank">{facet_text}</a>' 49 + ) 50 + break 51 + elif ftype == "app.bsky.richtext.facet#mention": 52 + did = feature.get("did", "") 53 + parts.append( 54 + f'<span class="h-card">' 55 + f'<a href="https://bsky.app/profile/{did}" class="u-url mention">' 56 + f"{facet_text}</a></span>" 57 + ) 58 + break 59 + elif ftype == "app.bsky.richtext.facet#tag": 60 + tag = feature.get("tag", facet_text.lstrip("#")) 61 + parts.append( 62 + f'<a href="https://bsky.app/hashtag/{tag}" class="mention hashtag" ' 63 + f'rel="tag">#<span>{tag}</span></a>' 64 + ) 65 + break 66 + else: 67 + parts.append(facet_text) 68 + 69 + cursor = end 70 + 71 + # Remaining text 72 + if cursor < len(text_bytes): 73 + parts.append(html_escape(text_bytes[cursor:].decode("utf-8"), quote=False)) 74 + 75 + return _wrap_paragraphs("".join(parts)) 76 + 77 + 78 + def _wrap_paragraphs(html: str) -> str: 79 + """Wrap text in ``<p>`` tags, converting line breaks.""" 80 + paragraphs = _NEWLINE_SPLIT.split(html) 81 + if len(paragraphs) > 1: 82 + return "".join(f"<p>{p.replace(chr(10), '<br/>')}</p>" for p in paragraphs if p) 83 + return f"<p>{html.replace(chr(10), '<br/>')}</p>" 84 + 85 + 86 + async def detect_facets( 87 + text: str, 88 + resolve_handle=None, 89 + ) -> list[dict[str, Any]]: 90 + """Detect links, @mentions, and #hashtags and return Bluesky facets.""" 91 + text_bytes = text.encode("utf-8") 92 + facets: list[dict[str, Any]] = [] 93 + 94 + for m in URL_RE.finditer(text): 95 + bs = len(text[: m.start()].encode("utf-8")) 96 + be = len(text[: m.end()].encode("utf-8")) 97 + facets.append( 98 + { 99 + "index": {"byteStart": bs, "byteEnd": be}, 100 + "features": [ 101 + {"$type": "app.bsky.richtext.facet#link", "uri": m.group()} 102 + ], 103 + } 104 + ) 105 + 106 + for m in MENTION_RE.finditer(text): 107 + handle = m.group(1) 108 + did = None 109 + if resolve_handle: 110 + try: 111 + did = await resolve_handle(handle) 112 + except Exception: 113 + continue 114 + if did: 115 + bs = len(text[: m.start()].encode("utf-8")) 116 + be = len(text[: m.end()].encode("utf-8")) 117 + facets.append( 118 + { 119 + "index": {"byteStart": bs, "byteEnd": be}, 120 + "features": [ 121 + {"$type": "app.bsky.richtext.facet#mention", "did": did} 122 + ], 123 + } 124 + ) 125 + 126 + for m in TAG_RE.finditer(text): 127 + tag = m.group(1) 128 + bs = len(text[: m.start()].encode("utf-8")) 129 + be = len(text[: m.end()].encode("utf-8")) 130 + facets.append( 131 + { 132 + "index": {"byteStart": bs, "byteEnd": be}, 133 + "features": [ 134 + {"$type": "app.bsky.richtext.facet#tag", "tag": tag} 135 + ], 136 + } 137 + ) 138 + 139 + return facets
+157
app/convert/status.py
··· 1 + """Convert Bluesky posts to Mastodon Status objects.""" 2 + 3 + from __future__ import annotations 4 + 5 + import hashlib 6 + from typing import Any 7 + 8 + from ..atproto import Session, at_uri_to_web_url, encode_id, parse_at_uri 9 + from .account import convert_account 10 + from .embed import _extract_embed 11 + from .richtext import facets_to_html 12 + 13 + 14 + def convert_status( 15 + post_view: dict[str, Any], 16 + *, 17 + session: Session | None = None, 18 + ) -> dict[str, Any]: 19 + """Convert a Bluesky post (``postView``) to a Mastodon Status object.""" 20 + uri = post_view.get("uri", "") 21 + record = post_view.get("record", {}) 22 + author = post_view.get("author", {}) 23 + viewer = post_view.get("viewer", {}) 24 + embed = post_view.get("embed") 25 + 26 + text = record.get("text", "") 27 + facets = record.get("facets") 28 + created_at = record.get("createdAt") or post_view.get("indexedAt", "") 29 + reply_ref = record.get("reply") 30 + langs = record.get("langs", []) 31 + language = langs[0] if langs else None 32 + 33 + # Content HTML 34 + content_html = facets_to_html(text, facets) 35 + media_attachments, card, extra_html, quote_post_data = _extract_embed(embed) 36 + if extra_html: 37 + content_html += extra_html 38 + 39 + # Convert quoted post if present 40 + quote_status = None 41 + if quote_post_data: 42 + quote_status = convert_status(quote_post_data, session=session) 43 + 44 + # Reply info + conversation ID 45 + in_reply_to_id = None 46 + in_reply_to_account_id = None 47 + in_reply_to_account_acct = None 48 + # The conversation root is the thread root URI; for standalone posts it's the post itself 49 + conversation_root = uri 50 + if reply_ref: 51 + parent_uri = reply_ref.get("parent", {}).get("uri", "") 52 + root_uri = reply_ref.get("root", {}).get("uri", "") 53 + if root_uri: 54 + conversation_root = root_uri 55 + if parent_uri: 56 + in_reply_to_id = encode_id(parent_uri) 57 + try: 58 + repo, _, _ = parse_at_uri(parent_uri) 59 + in_reply_to_account_id = repo 60 + in_reply_to_account_acct = repo 61 + except ValueError: 62 + pass 63 + 64 + # Generate a stable numeric-ish conversation ID from the root URI 65 + conversation_id = int(hashlib.sha256(conversation_root.encode()).hexdigest()[:12], 16) 66 + 67 + is_self = session is not None and author.get("did") == session.did 68 + 69 + # Mentions from facets 70 + mentions = [] 71 + if facets: 72 + for f in facets: 73 + for feat in f.get("features", []): 74 + if feat.get("$type") == "app.bsky.richtext.facet#mention": 75 + mentions.append( 76 + { 77 + "id": feat["did"], 78 + "username": feat.get("did", ""), 79 + "url": f"https://bsky.app/profile/{feat['did']}", 80 + "acct": feat.get("did", ""), 81 + } 82 + ) 83 + 84 + # Tags from facets 85 + tags = [] 86 + if facets: 87 + for f in facets: 88 + for feat in f.get("features", []): 89 + if feat.get("$type") == "app.bsky.richtext.facet#tag": 90 + tag = feat.get("tag", "") 91 + tags.append( 92 + { 93 + "name": tag, 94 + "url": f"https://bsky.app/hashtag/{tag}", 95 + } 96 + ) 97 + 98 + # Determine CW / sensitive 99 + labels = post_view.get("labels", []) 100 + sensitive = any( 101 + lbl.get("val") in ("nsfw", "porn", "sexual", "nudity", "graphic-media") 102 + for lbl in labels 103 + ) 104 + spoiler_text = "" 105 + if sensitive: 106 + spoiler_text = "Sensitive content" 107 + 108 + thread_muted = bool(viewer.get("threadMuted")) 109 + 110 + return { 111 + "id": encode_id(uri), 112 + "created_at": created_at, 113 + "in_reply_to_id": in_reply_to_id, 114 + "in_reply_to_account_id": in_reply_to_account_id, 115 + "sensitive": sensitive, 116 + "spoiler_text": spoiler_text, 117 + "visibility": "public", 118 + "language": language, 119 + "uri": uri, 120 + "url": at_uri_to_web_url(uri), 121 + "replies_count": post_view.get("replyCount", 0), 122 + "reblogs_count": post_view.get("repostCount", 0), 123 + "favourites_count": post_view.get("likeCount", 0), 124 + "favourited": bool(viewer.get("like")), 125 + "reblogged": bool(viewer.get("repost")), 126 + "muted": thread_muted, 127 + "bookmarked": False, 128 + "pinned": False, 129 + "text": text, 130 + "content": content_html, 131 + "reblog": None, 132 + "application": {"name": "Bluesky", "website": "https://bsky.app"}, 133 + "account": convert_account(author, is_self=is_self), 134 + "media_attachments": media_attachments, 135 + "mentions": mentions, 136 + "tags": tags, 137 + "emojis": [], 138 + "card": card, 139 + "poll": None, 140 + "emoji_reactions": [], 141 + # ── Akkoma / Pleroma extensions ── 142 + "pleroma": { 143 + "local": False, 144 + "conversation_id": conversation_id, 145 + "direct_conversation_id": None, 146 + "in_reply_to_account_acct": in_reply_to_account_acct, 147 + "content": {"text/plain": text}, 148 + "spoiler_text": {"text/plain": spoiler_text}, 149 + "expires_at": None, 150 + "thread_muted": thread_muted, 151 + "emoji_reactions": [], 152 + "parent_visible": True, 153 + "pinned_at": None, 154 + }, 155 + # ── Akkoma quote post extension ── 156 + "quote": quote_status, 157 + }