atmo.rsvp
3
fork

Configure Feed

Select the types of activity you want to include in your feed.

at 2953d729cb2b4e5a89bd7e01fa616da57e208475 174 lines 5.1 kB view raw
1/** 2 * Step 4: Use Claude to generate chapters and summaries from transcripts. 3 * Output: data/output/<rkey>.json 4 * 5 * Requires: ANTHROPIC_API_KEY env var, npm install anthropic 6 */ 7 8import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs'; 9import { 10 TRANSCRIPTS_DIR, 11 OUTPUT_DIR, 12 EVENTS_FILE, 13 type VodEvent, 14 type TranscriptData, 15 type VodOutput, 16 type Chapter 17} from './config.js'; 18 19const args = process.argv.slice(2); 20const only = args.find((a) => a.startsWith('--only='))?.split('=')[1]; 21const skipSummary = args.includes('--skip-summary'); 22 23async function getAnthropicClient() { 24 const { default: Anthropic } = await import('anthropic'); 25 return new Anthropic(); 26} 27 28function formatTranscriptForPrompt(transcript: TranscriptData): string { 29 return transcript.segments 30 .map((seg) => { 31 const time = formatTime(seg.start); 32 const speaker = seg.speaker ? `[${seg.speaker}] ` : ''; 33 return `[${time}] ${speaker}${seg.text.trim()}`; 34 }) 35 .join('\n'); 36} 37 38function formatTime(seconds: number): string { 39 const h = Math.floor(seconds / 3600); 40 const m = Math.floor((seconds % 3600) / 60); 41 const s = Math.floor(seconds % 60); 42 return h > 0 ? `${h}:${String(m).padStart(2, '0')}:${String(s).padStart(2, '0')}` : `${m}:${String(s).padStart(2, '0')}`; 43} 44 45async function generateChaptersAndSummary( 46 event: VodEvent, 47 transcript: TranscriptData, 48 client: Awaited<ReturnType<typeof getAnthropicClient>> 49): Promise<{ chapters: Chapter[]; summary: string }> { 50 const formattedTranscript = formatTranscriptForPrompt(transcript); 51 52 const prompt = `You are analyzing a conference talk transcript from AtmosphereConf, a conference about the AT Protocol / Bluesky ecosystem. 53 54Talk: "${event.name}" 55Speakers: ${event.speakers.join(', ') || 'Unknown'} 56Description: ${event.description || 'No description provided'} 57 58Here is the timestamped transcript: 59 60${formattedTranscript} 61 62Please provide: 63 641. **Chapters**: Break the talk into logical chapters/sections. Each chapter should have a start timestamp (in seconds), end timestamp (in seconds), and a short descriptive title. Aim for 3-8 chapters depending on talk length. 65 662. **Summary**: A concise summary of the talk (2-4 paragraphs) covering the key points, arguments, and takeaways. 67 68Respond in this exact JSON format (no markdown, just raw JSON): 69{ 70 "chapters": [ 71 {"start": 0, "end": 120, "title": "Introduction"}, 72 {"start": 120, "end": 450, "title": "..."} 73 ], 74 "summary": "..." 75}`; 76 77 const response = await client.messages.create({ 78 model: 'claude-sonnet-4-20250514', 79 max_tokens: 4096, 80 messages: [{ role: 'user', content: prompt }] 81 }); 82 83 const text = response.content 84 .filter((c): c is { type: 'text'; text: string } => c.type === 'text') 85 .map((c) => c.text) 86 .join(''); 87 88 try { 89 return JSON.parse(text); 90 } catch { 91 // Try to extract JSON from markdown code blocks 92 const jsonMatch = text.match(/```(?:json)?\s*([\s\S]*?)\s*```/); 93 if (jsonMatch) { 94 return JSON.parse(jsonMatch[1]); 95 } 96 throw new Error(`Failed to parse Claude response as JSON: ${text.slice(0, 200)}`); 97 } 98} 99 100async function processEvent( 101 event: VodEvent, 102 client: Awaited<ReturnType<typeof getAnthropicClient>> 103): Promise<boolean> { 104 const outFile = `${OUTPUT_DIR}/${event.rkey}.json`; 105 if (existsSync(outFile)) { 106 console.log(` ✓ already processed: ${event.rkey}`); 107 return true; 108 } 109 110 const transcriptFile = `${TRANSCRIPTS_DIR}/${event.rkey}.json`; 111 if (!existsSync(transcriptFile)) { 112 console.log(` ✗ no transcript: ${event.rkey}`); 113 return false; 114 } 115 116 const transcript: TranscriptData = JSON.parse(readFileSync(transcriptFile, 'utf-8')); 117 118 let chapters: Chapter[] = []; 119 let summary = ''; 120 121 if (!skipSummary) { 122 try { 123 console.log(` ⏳ generating chapters/summary: ${event.rkey} (${event.name})...`); 124 const result = await generateChaptersAndSummary(event, transcript, client); 125 chapters = result.chapters; 126 summary = result.summary; 127 console.log(` ✓ generated: ${event.rkey}`); 128 } catch (err) { 129 console.error(` ✗ Claude failed for ${event.rkey}: ${(err as Error).message}`); 130 } 131 } 132 133 const output: VodOutput = { 134 rkey: event.rkey, 135 name: event.name, 136 speakers: event.speakers, 137 description: event.description, 138 transcript, 139 chapters, 140 summary 141 }; 142 143 writeFileSync(outFile, JSON.stringify(output, null, 2)); 144 return true; 145} 146 147async function main() { 148 const events: VodEvent[] = JSON.parse(readFileSync(EVENTS_FILE, 'utf-8')); 149 mkdirSync(OUTPUT_DIR, { recursive: true }); 150 151 const toProcess = only ? events.filter((e) => e.rkey === only) : events; 152 console.log(`Processing ${toProcess.length} transcripts...\n`); 153 154 let client: Awaited<ReturnType<typeof getAnthropicClient>> | null = null; 155 if (!skipSummary) { 156 if (!process.env.ANTHROPIC_API_KEY) { 157 console.log('⚠️ No ANTHROPIC_API_KEY set — chapters/summaries will be skipped.\n'); 158 } else { 159 client = await getAnthropicClient(); 160 } 161 } 162 163 let success = 0; 164 let failed = 0; 165 166 for (const event of toProcess) { 167 if (await processEvent(event, client!)) success++; 168 else failed++; 169 } 170 171 console.log(`\nDone: ${success} processed, ${failed} failed`); 172} 173 174main();