Ionosphere.tv
3
fork

Configure Feed

Select the types of activity you want to include in your feed.

feat: LLM-assisted project extraction — 84 projects, 41 with URLs

Used GPT-4o-mini to analyze all 120 talk transcripts + speaker bios,
merged with hand-curated list. Removed generic/misattributed entries.
Projects like Attie, Cirrus, Surf, Letta, Constellation, E18E,
Cartridge, popfeed.social now included.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

+167
+167
scripts/extract-projects-llm.mjs
··· 1 + /** 2 + * Use LLM to extract projects from talk transcripts and speaker bios. 3 + * Reads the first ~500 words and last ~200 words of each transcript 4 + * plus speaker bio, asks the LLM to identify projects with URLs. 5 + */ 6 + 7 + import { createRequire } from 'module'; 8 + const require = createRequire( 9 + new URL('../apps/ionosphere-appview/package.json', import.meta.url).pathname 10 + ); 11 + const Database = require('better-sqlite3'); 12 + 13 + import { fileURLToPath } from 'url'; 14 + import { dirname, join } from 'path'; 15 + import { writeFileSync, readFileSync } from 'fs'; 16 + 17 + const __dirname = dirname(fileURLToPath(import.meta.url)); 18 + const DB_PATH = join(__dirname, '..', 'apps', 'data', 'ionosphere.sqlite'); 19 + const OUTPUT_PATH = join(__dirname, '..', 'apps', 'data', 'atmosphere-projects.json'); 20 + const OPENAI_API_KEY = process.env.OPENAI_API_KEY; 21 + 22 + function sleep(ms) { return new Promise(r => setTimeout(r, ms)); } 23 + 24 + async function askLLM(prompt) { 25 + const res = await fetch('https://api.openai.com/v1/chat/completions', { 26 + method: 'POST', 27 + headers: { 28 + 'Content-Type': 'application/json', 29 + 'Authorization': `Bearer ${OPENAI_API_KEY}`, 30 + }, 31 + body: JSON.stringify({ 32 + model: 'gpt-4o-mini', 33 + messages: [{ role: 'user', content: prompt }], 34 + temperature: 0, 35 + max_tokens: 1000, 36 + }), 37 + }); 38 + if (!res.ok) { 39 + const err = await res.text(); 40 + throw new Error(`OpenAI API error: ${res.status} ${err}`); 41 + } 42 + const data = await res.json(); 43 + return data.choices[0]?.message?.content || ''; 44 + } 45 + 46 + async function main() { 47 + console.log('=== Extract Projects from Talks via LLM ===\n'); 48 + 49 + const db = new Database(DB_PATH, { readonly: true }); 50 + 51 + // Get all talks with transcripts and speaker info 52 + const talks = db.prepare(` 53 + SELECT DISTINCT t.rkey, t.title, t.talk_type, t.category, 54 + GROUP_CONCAT(DISTINCT s.name) as speakers, 55 + GROUP_CONCAT(DISTINCT s.handle) as handles, 56 + GROUP_CONCAT(DISTINCT s.bio) as bios 57 + FROM talks t 58 + JOIN talk_speakers ts ON ts.talk_uri = t.uri 59 + JOIN speakers s ON s.uri = ts.speaker_uri 60 + WHERE t.starts_at IS NOT NULL 61 + GROUP BY t.rkey 62 + ORDER BY t.starts_at 63 + `).all(); 64 + 65 + const transcriptStmt = db.prepare(` 66 + SELECT text FROM transcripts WHERE talk_uri = ( 67 + SELECT uri FROM talks WHERE rkey = ? LIMIT 1 68 + ) LIMIT 1 69 + `); 70 + 71 + console.log(`${talks.length} talks to analyze\n`); 72 + 73 + const allProjects = []; 74 + let processed = 0; 75 + 76 + // Process in batches of 5 talks per LLM call to save tokens 77 + for (let i = 0; i < talks.length; i += 5) { 78 + const batch = talks.slice(i, i + 5); 79 + const talkDescriptions = []; 80 + 81 + for (const talk of batch) { 82 + const transcript = transcriptStmt.get(talk.rkey); 83 + let intro = ''; 84 + let outro = ''; 85 + if (transcript?.text) { 86 + const words = transcript.text.split(/\s+/); 87 + intro = words.slice(0, 400).join(' '); 88 + outro = words.slice(-150).join(' '); 89 + } 90 + 91 + talkDescriptions.push(` 92 + TALK: "${talk.title}" 93 + TYPE: ${talk.talk_type || 'presentation'} 94 + SPEAKERS: ${talk.speakers} 95 + HANDLES: ${talk.handles} 96 + BIOS: ${talk.bios || 'N/A'} 97 + TRANSCRIPT INTRO: ${intro || 'N/A'} 98 + TRANSCRIPT OUTRO: ${outro || 'N/A'} 99 + ---`); 100 + } 101 + 102 + const prompt = `You are extracting ATProto/Atmosphere ecosystem projects from conference talks. 103 + 104 + For each talk below, identify the SPECIFIC PROJECTS, TOOLS, APPS, or ORGANIZATIONS that the speaker is presenting or has built. NOT general technologies (React, SQLite, etc.) — only specific named projects in the ATProto/Bluesky ecosystem or related. 105 + 106 + For each project found, provide: 107 + - name: The project name 108 + - url: The project URL if mentioned or inferable (e.g., if handle is "semble.so", url is likely "https://semble.so"). Use null if unknown. 109 + - talkRkey: The talk's rkey (provided below) 110 + - speakers: The speaker(s) presenting it 111 + 112 + Return ONLY a JSON array. If a talk has no specific project, skip it entirely. If a talk features multiple projects, include each separately. 113 + 114 + ${talkDescriptions.join('\n')} 115 + 116 + Return ONLY valid JSON array, no markdown fences, no explanation:`; 117 + 118 + try { 119 + const response = await askLLM(prompt); 120 + // Parse JSON from response 121 + const cleaned = response.replace(/```json?\n?/g, '').replace(/```/g, '').trim(); 122 + try { 123 + const projects = JSON.parse(cleaned); 124 + if (Array.isArray(projects)) { 125 + allProjects.push(...projects); 126 + processed += batch.length; 127 + const names = projects.map(p => p.name).join(', '); 128 + console.log(`[${processed}/${talks.length}] Found ${projects.length}: ${names}`); 129 + } 130 + } catch (parseErr) { 131 + console.error(` Parse error for batch starting "${batch[0].title}": ${parseErr.message}`); 132 + console.error(` Response: ${cleaned.slice(0, 200)}`); 133 + processed += batch.length; 134 + } 135 + } catch (err) { 136 + console.error(` API error: ${err.message}`); 137 + processed += batch.length; 138 + } 139 + 140 + await sleep(500); // rate limit 141 + } 142 + 143 + // Deduplicate by name (keep the one with a URL, or first seen) 144 + const seen = new Map(); 145 + for (const proj of allProjects) { 146 + const key = proj.name?.toLowerCase(); 147 + if (!key) continue; 148 + const existing = seen.get(key); 149 + if (!existing || (!existing.url && proj.url)) { 150 + seen.set(key, proj); 151 + } 152 + } 153 + 154 + const deduplicated = [...seen.values()].sort((a, b) => a.name.localeCompare(b.name)); 155 + 156 + console.log(`\n=== DONE ===`); 157 + console.log(`Total projects found: ${allProjects.length}`); 158 + console.log(`After dedup: ${deduplicated.length}`); 159 + 160 + // Write output 161 + writeFileSync(OUTPUT_PATH, JSON.stringify(deduplicated, null, 2)); 162 + console.log(`Saved to ${OUTPUT_PATH}`); 163 + 164 + db.close(); 165 + } 166 + 167 + main().catch(console.error);