See the best posts from any Bluesky account
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

Store and render Bluesky rich text facets for clickable links and mentions

Bluesky annotates post text with "facets" — byte-range metadata identifying
links, mentions, and hashtags. Previously we stored only plain text, so bare
domain URLs (e.g. atmosphereconf-tv.btao.org) were never linkified. Now we
store facets from both ingest paths (backfill + Jetstream), persist them as
JSON in ClickHouse, and use UTF-8 byte slicing at render time to wrap the
correct text spans in <a> tags with rel="ugc nofollow noopener". Posts
indexed before this change fall back to a regex that catches https:// URLs.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

+190 -5
+70 -1
app/controllers/profile_controller.ts
··· 3 3 import logger from '@adonisjs/core/services/logger' 4 4 import { HandleResolver, InvalidHandleError, HandleNotFoundError } from '#services/handle_resolver' 5 5 import { AtprotoClient, BlueskyRateLimitedError } from '#lib/atproto/index' 6 + import type { Facet, FacetLink, FacetMention } from '#lib/atproto/index' 6 7 import { ClickHouseStore } from '#lib/clickhouse/index' 7 8 import { runBackfillStream, type SseWriter } from '#lib/backfill_stream' 8 9 import User from '#models/user' ··· 309 310 ...p, 310 311 embed: p.embed, 311 312 bskyUrl: atUriToBskyUrl(p.postUri), 312 - postTextSafe: escapeHtml(p.postText).replace(/\n/g, '<br>'), 313 + postTextSafe: renderRichText(p.postText, p.facets).replace(/\n/g, '<br>'), 313 314 })) 314 315 315 316 // 6. Render profile page with Cache-Control ··· 438 439 /** 439 440 * Escapes HTML special characters to prevent XSS in template safe() blocks. 440 441 */ 442 + /** 443 + * Render post text with rich text facets into safe HTML. 444 + * 445 + * Bluesky facets use UTF-8 byte offsets. We convert the text to a UTF-8 byte 446 + * array, slice by facet byte ranges, then decode each segment back to a string 447 + * for HTML escaping and wrapping. 448 + * 449 + * Falls back to plain escaped text if no facets are present. 450 + */ 451 + function renderRichText(text: string, facets: Facet[]): string { 452 + if (facets.length === 0) { 453 + // Fallback for posts indexed before facets were stored: linkify https:// URLs via regex 454 + return escapeHtml(text).replace( 455 + /https?:\/\/[^\s<"']+/g, 456 + (url) => 457 + `<a href="${url}" target="_blank" rel="ugc nofollow noopener" class="text-blue-600 hover:underline">${url}</a>` 458 + ) 459 + } 460 + 461 + const encoder = new TextEncoder() 462 + const decoder = new TextDecoder() 463 + const bytes = encoder.encode(text) 464 + 465 + const parts: string[] = [] 466 + let cursor = 0 467 + 468 + for (const facet of facets) { 469 + const { byteStart, byteEnd } = facet.index 470 + if (byteStart > bytes.length || byteEnd > bytes.length) continue 471 + 472 + // Text before this facet 473 + if (byteStart > cursor) { 474 + parts.push(escapeHtml(decoder.decode(bytes.slice(cursor, byteStart)))) 475 + } 476 + 477 + const facetText = escapeHtml(decoder.decode(bytes.slice(byteStart, byteEnd))) 478 + 479 + // Find the first link feature (most important for clickability) 480 + const linkFeature = facet.features.find( 481 + (f): f is FacetLink => f.$type === 'app.bsky.richtext.facet#link' 482 + ) 483 + const mentionFeature = facet.features.find( 484 + (f): f is FacetMention => f.$type === 'app.bsky.richtext.facet#mention' 485 + ) 486 + 487 + if (linkFeature) { 488 + parts.push( 489 + `<a href="${escapeHtml(linkFeature.uri)}" target="_blank" rel="ugc nofollow noopener" class="text-blue-600 hover:underline">${facetText}</a>` 490 + ) 491 + } else if (mentionFeature) { 492 + parts.push( 493 + `<a href="https://bsky.app/profile/${escapeHtml(mentionFeature.did)}" target="_blank" rel="ugc nofollow noopener" class="text-blue-600 hover:underline">${facetText}</a>` 494 + ) 495 + } else { 496 + parts.push(facetText) 497 + } 498 + 499 + cursor = byteEnd 500 + } 501 + 502 + // Remaining text after the last facet 503 + if (cursor < bytes.length) { 504 + parts.push(escapeHtml(decoder.decode(bytes.slice(cursor)))) 505 + } 506 + 507 + return parts.join('') 508 + } 509 + 441 510 function escapeHtml(text: string): string { 442 511 return text 443 512 .replace(/&/g, '&amp;')
+6
app/lib/atproto/index.ts
··· 2 2 export { parseAtUri } from './parsers/at_uri.js' 3 3 export { parseJetstreamEvent, parsePostEmbed } from './parsers/jetstream.js' 4 4 export { parseGetAuthorFeedResponse } from './parsers/get_author_feed.js' 5 + export { parseFacets } from './parsers/facets.js' 5 6 6 7 // Types 7 8 export type { ··· 17 18 ImagesEmbed, 18 19 VideoEmbed, 19 20 ExternalEmbed, 21 + Facet, 22 + FacetFeature, 23 + FacetLink, 24 + FacetMention, 25 + FacetTag, 20 26 } from './types.js' 21 27 22 28 // Client wrapper
+65
app/lib/atproto/parsers/facets.ts
··· 1 + import type { Facet, FacetFeature } from '../types.js' 2 + 3 + /** 4 + * Parse the `facets` array from a Bluesky post record into typed Facet objects. 5 + * Discards malformed facets silently (best-effort rendering). 6 + * 7 + * @param record - The raw post record (Jetstream rawRecord or AppView record object) 8 + * @returns Array of valid facets, sorted by byteStart ascending. 9 + */ 10 + export function parseFacets(record: unknown): Facet[] { 11 + if (typeof record !== 'object' || record === null || Array.isArray(record)) return [] 12 + 13 + const raw = (record as Record<string, unknown>)['facets'] 14 + if (!Array.isArray(raw)) return [] 15 + 16 + const facets: Facet[] = [] 17 + 18 + for (const item of raw) { 19 + if (typeof item !== 'object' || item === null || Array.isArray(item)) continue 20 + const obj = item as Record<string, unknown> 21 + 22 + const index = obj['index'] 23 + if (typeof index !== 'object' || index === null || Array.isArray(index)) continue 24 + const idx = index as Record<string, unknown> 25 + 26 + const byteStart = idx['byteStart'] 27 + const byteEnd = idx['byteEnd'] 28 + if (typeof byteStart !== 'number' || typeof byteEnd !== 'number') continue 29 + if (byteStart < 0 || byteEnd <= byteStart) continue 30 + 31 + const rawFeatures = obj['features'] 32 + if (!Array.isArray(rawFeatures) || rawFeatures.length === 0) continue 33 + 34 + const features: FacetFeature[] = [] 35 + for (const feat of rawFeatures) { 36 + if (typeof feat !== 'object' || feat === null || Array.isArray(feat)) continue 37 + const f = feat as Record<string, unknown> 38 + const type = f['$type'] 39 + 40 + if (type === 'app.bsky.richtext.facet#link' && typeof f['uri'] === 'string') { 41 + features.push({ $type: 'app.bsky.richtext.facet#link', uri: f['uri'] as string }) 42 + } else if (type === 'app.bsky.richtext.facet#mention' && typeof f['did'] === 'string') { 43 + features.push({ $type: 'app.bsky.richtext.facet#mention', did: f['did'] as string }) 44 + } else if (type === 'app.bsky.richtext.facet#tag' && typeof f['tag'] === 'string') { 45 + features.push({ $type: 'app.bsky.richtext.facet#tag', tag: f['tag'] as string }) 46 + } 47 + } 48 + 49 + if (features.length > 0) { 50 + facets.push({ index: { byteStart, byteEnd }, features }) 51 + } 52 + } 53 + 54 + // Sort by byteStart ascending, discard overlapping facets 55 + facets.sort((a, b) => a.index.byteStart - b.index.byteStart) 56 + const result: Facet[] = [] 57 + let lastEnd = 0 58 + for (const facet of facets) { 59 + if (facet.index.byteStart < lastEnd) continue // overlapping, skip 60 + result.push(facet) 61 + lastEnd = facet.index.byteEnd 62 + } 63 + 64 + return result 65 + }
+2
app/lib/atproto/parsers/get_author_feed.ts
··· 1 1 import type { ExternalEmbed, ImagesEmbed, PostEmbed, PostSnapshot, VideoEmbed } from '../types.js' 2 + import { parseFacets } from './facets.js' 2 3 3 4 function isObject(v: unknown): v is Record<string, unknown> { 4 5 return typeof v === 'object' && v !== null && !Array.isArray(v) ··· 229 230 snapshotQuotes, 230 231 snapshotTakenAt, 231 232 embed: parsedEmbed, 233 + facets: parseFacets(record), 232 234 }) 233 235 }) 234 236
+28
app/lib/atproto/types.ts
··· 162 162 snapshotTakenAt: Date 163 163 /** Optional embed parsed from the post's record (images/video/external). Null = no embed. */ 164 164 embed: PostEmbed | null 165 + /** Rich text facets from the post record. Empty array = no facets. */ 166 + facets: Facet[] 167 + } 168 + 169 + // --------------------------------------------------------------------------- 170 + // Rich text facets 171 + // --------------------------------------------------------------------------- 172 + 173 + export interface Facet { 174 + index: { byteStart: number; byteEnd: number } 175 + features: FacetFeature[] 176 + } 177 + 178 + export type FacetFeature = FacetLink | FacetMention | FacetTag 179 + 180 + export interface FacetLink { 181 + $type: 'app.bsky.richtext.facet#link' 182 + uri: string 183 + } 184 + 185 + export interface FacetMention { 186 + $type: 'app.bsky.richtext.facet#mention' 187 + did: string 188 + } 189 + 190 + export interface FacetTag { 191 + $type: 'app.bsky.richtext.facet#tag' 192 + tag: string 165 193 } 166 194 167 195 // ---------------------------------------------------------------------------
+8 -2
app/lib/clickhouse/store.ts
··· 1 1 import { createClient } from '@clickhouse/client' 2 2 import type { ClickHouseClient } from '@clickhouse/client' 3 - import type { PostEmbed, PostSnapshot } from '#lib/atproto/index' 3 + import type { Facet, PostEmbed, PostSnapshot } from '#lib/atproto/index' 4 4 import type { 5 5 ClickHouseConfig, 6 6 EngagementEventRow, ··· 20 20 likes: string // aggregates come back as strings in ClickHouse JSON 21 21 reposts: string 22 22 embed_json: string // empty string = no embed 23 + facets_json: string // empty string = no facets 23 24 } 24 25 25 26 // --------------------------------------------------------------------------- ··· 36 37 s.post_text, 37 38 s.post_created_at, 38 39 s.embed_json, 40 + s.facets_json, 39 41 s.snapshot_likes 40 42 + countIf(e.kind = 'like' AND e.event_created_at > s.snapshot_taken_at) 41 43 AS likes, ··· 54 56 AND s.is_deleted = 0` 55 57 56 58 const GROUP_BY = ` 57 - GROUP BY s.post_uri, s.post_text, s.post_created_at, s.embed_json, 59 + GROUP BY s.post_uri, s.post_text, s.post_created_at, s.embed_json, s.facets_json, 58 60 s.snapshot_likes, s.snapshot_reposts, s.snapshot_taken_at` 59 61 60 62 const LIMIT = ` ··· 189 191 reposts: Number(row.reposts), 190 192 // Empty string is the canonical "no embed" sentinel — don't JSON.parse(''). 191 193 embed: row.embed_json ? (JSON.parse(row.embed_json) as PostEmbed) : null, 194 + facets: row.facets_json ? (JSON.parse(row.facets_json) as Facet[]) : [], 192 195 })) 193 196 } 194 197 ··· 248 251 snapshot_taken_at: dateToClickHouseStr(s.snapshotTakenAt), 249 252 is_deleted: 0, 250 253 embed_json: s.embed ? JSON.stringify(s.embed) : '', 254 + facets_json: s.facets.length > 0 ? JSON.stringify(s.facets) : '', 251 255 })) 252 256 253 257 try { ··· 327 331 snapshot_taken_at: now, 328 332 is_deleted: 1, 329 333 embed_json: '', 334 + facets_json: '', 330 335 }, 331 336 ], 332 337 format: 'JSONEachRow', ··· 369 374 snapshot_quotes, 370 375 now64(6), 371 376 toUInt8(1), 377 + '', 372 378 '' 373 379 FROM post_snapshots FINAL 374 380 WHERE post_author_did = {authorDid:String}
+3 -1
app/lib/clickhouse/types.ts
··· 1 1 /** 2 2 * Public types for the ClickHouse package. 3 3 */ 4 - import type { PostEmbed } from '#lib/atproto/index' 4 + import type { Facet, PostEmbed } from '#lib/atproto/index' 5 5 6 6 // --------------------------------------------------------------------------- 7 7 // Query types ··· 24 24 reposts: number 25 25 /** Parsed embed (images/video/external) or null if the post had no embed. */ 26 26 embed: PostEmbed | null 27 + /** Rich text facets for rendering links/mentions/tags. */ 28 + facets: Facet[] 27 29 } 28 30 29 31 // ---------------------------------------------------------------------------
+2 -1
app/services/jetstream_consumer.ts
··· 1 1 import type { EngagementEventRow } from '#lib/clickhouse/index' 2 2 import type { ClickHouseStore } from '#lib/clickhouse/index' 3 3 import type { PostSnapshot } from '#lib/clickhouse/index' 4 - import { parseJetstreamEvent, parseAtUri, parsePostEmbed } from '#lib/atproto/index' 4 + import { parseJetstreamEvent, parseAtUri, parsePostEmbed, parseFacets } from '#lib/atproto/index' 5 5 import type { PostEvent } from '#lib/atproto/index' 6 6 7 7 // --------------------------------------------------------------------------- ··· 379 379 snapshotQuotes: 0, 380 380 snapshotTakenAt: this.deps.now(), 381 381 embed: embedResult.embed, 382 + facets: parseFacets(event.rawRecord), 382 383 }) 383 384 384 385 this.advancePendingCursor(timeUs)
+1
database/clickhouse/004_add_facets_to_post_snapshots.sql
··· 1 + ALTER TABLE post_snapshots ADD COLUMN IF NOT EXISTS facets_json String DEFAULT ''
+4
tests/functional/profile_controller.spec.ts
··· 303 303 snapshotQuotes: 0, 304 304 snapshotTakenAt: new Date('2024-01-15T12:00:00Z'), 305 305 embed: null, 306 + facets: [], 306 307 }, 307 308 ]) 308 309 ··· 342 343 snapshotQuotes: 0, 343 344 snapshotTakenAt: new Date(), 344 345 embed: null, 346 + facets: [], 345 347 }, 346 348 { 347 349 postUri: 'at://did:plc:test002/app.bsky.feed.post/old', ··· 353 355 snapshotQuotes: 0, 354 356 snapshotTakenAt: new Date('2020-01-01T00:00:00Z'), 355 357 embed: null, 358 + facets: [], 356 359 }, 357 360 ]) 358 361 ··· 428 431 snapshotQuotes: 0, 429 432 snapshotTakenAt: new Date(), 430 433 embed: null, 434 + facets: [], 431 435 }, 432 436 ]) 433 437
+1
tests/unit/clickhouse_store_fixtures.ts
··· 36 36 snapshotQuotes: overrides.snapshotQuotes ?? 0, 37 37 snapshotTakenAt: overrides.snapshotTakenAt ?? new Date('2025-01-15T13:00:00Z'), 38 38 embed: overrides.embed ?? null, 39 + facets: [], 39 40 } 40 41 } 41 42