Ionosphere.tv
3
fork

Configure Feed

Select the types of activity you want to include in your feed.

feat: wider search for discussion content — VOD sites, blogs, OG metadata

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

+360
+360
scripts/fetch-discussion.mjs
··· 1 + /** 2 + * Fetch wider conference discussion content from Bluesky and store in SQLite. 3 + * 4 + * Phases: 5 + * 1. VOD domain searches — ~18 domains with conference-related queries 6 + * 2. Blog/recap queries — keyword searches for writeups and reflections 7 + * 3. Top conference posts — high-engagement posts sorted by top 8 + * 4. Classify and enrich — blog/video/post classification, URL extraction, talk matching 9 + * 5. OG titles — fetch og:title for blog posts with external URLs 10 + * 6. Profile backfill — fetch profiles for new author DIDs 11 + * 7. Backfill talk_rkey — update existing mentions that have talk_uri but no talk_rkey 12 + * 13 + * Usage: 14 + * BOT_PASSWORD=xxx node scripts/fetch-discussion.mjs 15 + */ 16 + 17 + import { createRequire } from 'module'; 18 + const require = createRequire( 19 + new URL('../apps/ionosphere-appview/package.json', import.meta.url).pathname 20 + ); 21 + const { BskyAgent } = require('@atproto/api'); 22 + const Database = require('better-sqlite3'); 23 + 24 + import { fileURLToPath } from 'url'; 25 + import { dirname, join } from 'path'; 26 + 27 + const __dirname = dirname(fileURLToPath(import.meta.url)); 28 + const DB_PATH = join(__dirname, '..', 'apps', 'data', 'ionosphere.sqlite'); 29 + 30 + const agent = new BskyAgent({ service: 'https://bsky.social' }); 31 + function sleep(ms) { return new Promise(r => setTimeout(r, ms)); } 32 + 33 + // ── VOD domains ──────────────────────────────────────────────────── 34 + 35 + const VOD_DOMAINS = [ 36 + 'stream.place', 'vods.sky.boo', 'vod.atverkackt.de', 'ionosphere.tv', 37 + 'atmosphereconf-vods.wisp.place', 'rpg.actor', 'vod.j4ck.xyz', 38 + 'atmosphere-vods.j4ck.xyz', 'atmosphereconf-tv.btao.org', 39 + 'stream-bsky.pages.dev', 'sites.wisp.place', 'vods.ajbird.net', 40 + 'streamhut.wisp.place', 'conf-vods.wisp.place', 'aetheros.computer', 41 + 'atmo.rsvp', 'atmosphereconf.org', 'youtube.com', 42 + ]; 43 + 44 + // ── Blog/recap queries ───────────────────────────────────────────── 45 + 46 + const BLOG_QUERIES = [ 47 + 'atmosphereconf recap', 48 + 'atmosphereconf wrote', 49 + 'atmosphereconf writeup', 50 + 'atmosphereconf takeaway', 51 + 'atmosphereconf reflection', 52 + 'atmosphereconf blog', 53 + 'atmosphere conference wrote', 54 + 'atmosphere conference recap', 55 + ]; 56 + 57 + // ── Helpers ───────────────────────────────────────────────────────── 58 + 59 + function extractLinks(post) { 60 + return (post.record?.facets || []) 61 + .flatMap(f => f.features || []) 62 + .filter(f => f.uri) 63 + .map(f => f.uri); 64 + } 65 + 66 + function classifyPost(post, searchDomain) { 67 + const links = extractLinks(post); 68 + const text = (post.record?.text || '').toLowerCase(); 69 + 70 + // If searched by a VOD domain, it's a video 71 + if (searchDomain && VOD_DOMAINS.includes(searchDomain)) return 'video'; 72 + 73 + // Check links for known VOD patterns 74 + for (const link of links) { 75 + try { 76 + const url = new URL(link); 77 + if (VOD_DOMAINS.some(d => url.hostname.endsWith(d))) return 'video'; 78 + } catch {} 79 + } 80 + 81 + // Blog indicators 82 + if (text.includes('wrote') || text.includes('recap') || text.includes('writeup') || 83 + text.includes('blog') || text.includes('reflection')) { 84 + if (links.some(l => !VOD_DOMAINS.some(d => l.includes(d)))) return 'blog'; 85 + } 86 + 87 + return 'post'; 88 + } 89 + 90 + function extractPrimaryUrl(post, contentType) { 91 + const links = extractLinks(post); 92 + if (contentType === 'video') { 93 + return links.find(l => VOD_DOMAINS.some(d => l.includes(d))) || links[0] || null; 94 + } 95 + if (contentType === 'blog') { 96 + return links.find(l => !VOD_DOMAINS.some(d => l.includes(d)) && !l.includes('bsky.app')) || links[0] || null; 97 + } 98 + return links[0] || null; 99 + } 100 + 101 + function matchTalkByUrl(url, talksByRkey) { 102 + if (!url) return null; 103 + const match = url.match(/ionosphere\.tv\/talks\/([^/?#]+)/); 104 + if (match && talksByRkey.has(match[1])) return match[1]; 105 + return null; 106 + } 107 + 108 + function matchTalkBySpeaker(post, speakerHandleToTalks) { 109 + // Also check text for @handle patterns 110 + const text = post.record?.text || ''; 111 + const handleMatches = text.match(/@([\w.-]+)/g) || []; 112 + 113 + for (const handle of handleMatches) { 114 + const clean = handle.replace('@', ''); 115 + const talks = speakerHandleToTalks.get(clean); 116 + if (talks?.length === 1) return talks[0]; // unambiguous match 117 + } 118 + return null; 119 + } 120 + 121 + async function fetchOgTitle(url) { 122 + try { 123 + const controller = new AbortController(); 124 + const timeout = setTimeout(() => controller.abort(), 5000); 125 + const res = await fetch(url, { 126 + signal: controller.signal, 127 + headers: { 'User-Agent': 'ionosphere.tv/1.0' }, 128 + redirect: 'follow', 129 + }); 130 + clearTimeout(timeout); 131 + if (!res.ok) return null; 132 + const html = await res.text(); 133 + // Extract og:title 134 + const ogMatch = html.match(/<meta[^>]+property=["']og:title["'][^>]+content=["']([^"']+)["']/i) 135 + || html.match(/<meta[^>]+content=["']([^"']+)["'][^>]+property=["']og:title["']/i); 136 + if (ogMatch) return ogMatch[1]; 137 + // Fallback to <title> 138 + const titleMatch = html.match(/<title[^>]*>([^<]+)<\/title>/i); 139 + return titleMatch ? titleMatch[1].trim() : null; 140 + } catch { 141 + return null; 142 + } 143 + } 144 + 145 + // ── Main ─────────────────────────────────────────────────────────── 146 + 147 + async function main() { 148 + console.log('=== Fetch Discussion Content ===\n'); 149 + 150 + await agent.login({ 151 + identifier: 'ionosphere.tv', 152 + password: process.env.BOT_PASSWORD, 153 + }); 154 + console.log('Authenticated\n'); 155 + 156 + const db = new Database(DB_PATH); 157 + 158 + // Ensure new columns exist (idempotent) 159 + try { db.exec("ALTER TABLE mentions ADD COLUMN content_type TEXT DEFAULT 'post'"); } catch {} 160 + try { db.exec("ALTER TABLE mentions ADD COLUMN external_url TEXT"); } catch {} 161 + try { db.exec("ALTER TABLE mentions ADD COLUMN og_title TEXT"); } catch {} 162 + try { db.exec("ALTER TABLE mentions ADD COLUMN talk_rkey TEXT"); } catch {} 163 + 164 + const upsert = db.prepare(` 165 + INSERT INTO mentions (uri, talk_uri, author_did, author_handle, text, created_at, 166 + talk_offset_ms, byte_position, likes, reposts, replies, parent_uri, 167 + mention_type, indexed_at, content_type, external_url, og_title, talk_rkey) 168 + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) 169 + ON CONFLICT(uri) DO UPDATE SET 170 + likes=excluded.likes, reposts=excluded.reposts, replies=excluded.replies, 171 + content_type=excluded.content_type, external_url=excluded.external_url, 172 + og_title=excluded.og_title, talk_rkey=excluded.talk_rkey, indexed_at=excluded.indexed_at 173 + `); 174 + 175 + // Load talk data for matching 176 + const talks = db.prepare("SELECT DISTINCT rkey, title, uri FROM talks WHERE starts_at IS NOT NULL").all(); 177 + const talksByRkey = new Map(talks.map(t => [t.rkey, t])); 178 + 179 + const speakerTalks = db.prepare(` 180 + SELECT s.handle, t.rkey 181 + FROM speakers s 182 + JOIN talk_speakers ts ON ts.speaker_uri = s.uri 183 + JOIN talks t ON t.uri = ts.talk_uri 184 + WHERE s.handle IS NOT NULL 185 + `).all(); 186 + const speakerHandleToTalks = new Map(); 187 + for (const { handle, rkey } of speakerTalks) { 188 + if (!speakerHandleToTalks.has(handle)) speakerHandleToTalks.set(handle, []); 189 + speakerHandleToTalks.get(handle).push(rkey); 190 + } 191 + 192 + const allPosts = new Map(); 193 + 194 + // Phase 1: VOD domain searches 195 + console.log('--- Phase 1: VOD domains ---'); 196 + for (const domain of VOD_DOMAINS) { 197 + try { 198 + const res = await agent.app.bsky.feed.searchPosts({ 199 + q: 'atmosphere OR atmosphereconf', 200 + domain, 201 + since: '2026-03-25T00:00:00Z', 202 + sort: 'top', 203 + limit: 100, 204 + }); 205 + const posts = res.data?.posts || []; 206 + for (const p of posts) { 207 + if (!allPosts.has(p.uri)) allPosts.set(p.uri, { post: p, searchDomain: domain }); 208 + } 209 + if (posts.length > 0) console.log(` ${domain}: ${posts.length} posts`); 210 + await sleep(200); 211 + } catch (e) { 212 + // Some domains may not return results 213 + } 214 + } 215 + 216 + // Phase 2: Blog/recap queries 217 + console.log('\n--- Phase 2: Blog/recap queries ---'); 218 + for (const q of BLOG_QUERIES) { 219 + try { 220 + const res = await agent.app.bsky.feed.searchPosts({ 221 + q, 222 + since: '2026-03-25T00:00:00Z', 223 + sort: 'top', 224 + limit: 50, 225 + }); 226 + const posts = res.data?.posts || []; 227 + for (const p of posts) { 228 + if (!allPosts.has(p.uri)) allPosts.set(p.uri, { post: p, searchDomain: null }); 229 + } 230 + if (posts.length > 0) console.log(` "${q}": ${posts.length} posts`); 231 + await sleep(200); 232 + } catch {} 233 + } 234 + 235 + // Phase 3: Top conference posts (sorted by engagement) 236 + console.log('\n--- Phase 3: Top conference posts ---'); 237 + for (const q of ['atmosphereconf', 'atmosphere conf', '#atmosphereconf', '#ATmosphere']) { 238 + try { 239 + const res = await agent.app.bsky.feed.searchPosts({ 240 + q, 241 + since: '2026-03-25T00:00:00Z', 242 + sort: 'top', 243 + limit: 100, 244 + }); 245 + const posts = res.data?.posts || []; 246 + for (const p of posts) { 247 + if (!allPosts.has(p.uri)) allPosts.set(p.uri, { post: p, searchDomain: null }); 248 + } 249 + if (posts.length > 0) console.log(` "${q}": ${posts.length} posts`); 250 + await sleep(200); 251 + } catch {} 252 + } 253 + 254 + console.log(`\nTotal unique posts: ${allPosts.size}`); 255 + 256 + // Phase 4: Classify, extract URLs, match talks 257 + console.log('\n--- Phase 4: Classify and enrich ---'); 258 + let blogCount = 0, videoCount = 0, postCount = 0; 259 + const now = new Date().toISOString(); 260 + 261 + const batchInsert = db.transaction((items) => { 262 + for (const item of items) { 263 + upsert.run(...item); 264 + } 265 + }); 266 + 267 + const rows = []; 268 + for (const [uri, { post: p, searchDomain }] of allPosts) { 269 + const contentType = classifyPost(p, searchDomain); 270 + const externalUrl = extractPrimaryUrl(p, contentType); 271 + let talkRkey = matchTalkByUrl(externalUrl, talksByRkey); 272 + if (!talkRkey) talkRkey = matchTalkBySpeaker(p, speakerHandleToTalks); 273 + 274 + const talkUri = talkRkey ? (talksByRkey.get(talkRkey)?.uri || null) : null; 275 + 276 + if (contentType === 'blog') blogCount++; 277 + else if (contentType === 'video') videoCount++; 278 + else postCount++; 279 + 280 + rows.push([ 281 + p.uri, talkUri, p.author.did, p.author.handle, 282 + p.record?.text, p.record?.createdAt, 283 + null, null, // talk_offset_ms, byte_position 284 + p.likeCount || 0, p.repostCount || 0, p.replyCount || 0, 285 + null, // parent_uri 286 + 'discussion', now, 287 + contentType, externalUrl, null, talkRkey, 288 + ]); 289 + } 290 + 291 + batchInsert(rows); 292 + console.log(` Posts: ${postCount}, Blog posts: ${blogCount}, Videos: ${videoCount}`); 293 + 294 + // Phase 5: Fetch OG titles for blog posts 295 + console.log('\n--- Phase 5: OG titles ---'); 296 + const blogRows = db.prepare( 297 + "SELECT uri, external_url FROM mentions WHERE content_type = 'blog' AND external_url IS NOT NULL AND og_title IS NULL" 298 + ).all(); 299 + 300 + let ogFetched = 0; 301 + const updateOg = db.prepare("UPDATE mentions SET og_title = ? WHERE uri = ?"); 302 + for (const row of blogRows) { 303 + const title = await fetchOgTitle(row.external_url); 304 + if (title) { 305 + updateOg.run(title, row.uri); 306 + ogFetched++; 307 + console.log(` ${row.external_url} → ${title}`); 308 + } 309 + await sleep(100); 310 + } 311 + console.log(` OG titles fetched: ${ogFetched}/${blogRows.length}`); 312 + 313 + // Phase 6: Backfill profiles 314 + console.log('\n--- Phase 6: Profile backfill ---'); 315 + const missing = db.prepare(` 316 + SELECT DISTINCT m.author_did FROM mentions m 317 + LEFT JOIN profiles p ON m.author_did = p.did WHERE p.did IS NULL 318 + `).all(); 319 + 320 + const profileUpsert = db.prepare( 321 + "INSERT OR REPLACE INTO profiles (did, handle, display_name, avatar_url, fetched_at) VALUES (?, ?, ?, ?, ?)" 322 + ); 323 + let profilesFetched = 0; 324 + for (const { author_did: did } of missing) { 325 + try { 326 + const res = await fetch( 327 + `https://public.api.bsky.app/xrpc/app.bsky.actor.getProfile?actor=${encodeURIComponent(did)}` 328 + ); 329 + if (res.ok) { 330 + const data = await res.json(); 331 + profileUpsert.run(did, data.handle || null, data.displayName || null, data.avatar || null, now); 332 + profilesFetched++; 333 + } 334 + } catch {} 335 + await sleep(50); 336 + } 337 + console.log(` New profiles: ${profilesFetched}`); 338 + 339 + // Phase 7: Backfill talk_rkey on existing mentions 340 + console.log('\n--- Phase 7: Backfill talk_rkey on existing mentions ---'); 341 + const updated = db.prepare(` 342 + UPDATE mentions SET talk_rkey = ( 343 + SELECT t.rkey FROM talks t WHERE t.uri = mentions.talk_uri LIMIT 1 344 + ) WHERE talk_uri IS NOT NULL AND talk_rkey IS NULL 345 + `).run(); 346 + console.log(` Updated ${updated.changes} existing mentions with talk_rkey`); 347 + 348 + // Summary 349 + const stats = db.prepare(` 350 + SELECT content_type, COUNT(*) as c FROM mentions 351 + WHERE content_type IS NOT NULL GROUP BY content_type 352 + `).all(); 353 + console.log('\n=== DONE ==='); 354 + for (const s of stats) console.log(` ${s.content_type}: ${s.c}`); 355 + console.log(` Total: ${db.prepare('SELECT COUNT(*) as c FROM mentions').get().c}`); 356 + 357 + db.close(); 358 + } 359 + 360 + main().catch(console.error);