atmo.rsvp
1/**
2 * Step 4: Use Claude to generate chapters and summaries from transcripts.
3 * Output: data/output/<rkey>.json
4 *
5 * Requires: ANTHROPIC_API_KEY env var, npm install anthropic
6 */
7
8import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs';
9import {
10 TRANSCRIPTS_DIR,
11 OUTPUT_DIR,
12 EVENTS_FILE,
13 type VodEvent,
14 type TranscriptData,
15 type VodOutput,
16 type Chapter
17} from './config.js';
18
19const args = process.argv.slice(2);
20const only = args.find((a) => a.startsWith('--only='))?.split('=')[1];
21const skipSummary = args.includes('--skip-summary');
22
23async function getAnthropicClient() {
24 const { default: Anthropic } = await import('anthropic');
25 return new Anthropic();
26}
27
28function formatTranscriptForPrompt(transcript: TranscriptData): string {
29 return transcript.segments
30 .map((seg) => {
31 const time = formatTime(seg.start);
32 const speaker = seg.speaker ? `[${seg.speaker}] ` : '';
33 return `[${time}] ${speaker}${seg.text.trim()}`;
34 })
35 .join('\n');
36}
37
38function formatTime(seconds: number): string {
39 const h = Math.floor(seconds / 3600);
40 const m = Math.floor((seconds % 3600) / 60);
41 const s = Math.floor(seconds % 60);
42 return h > 0 ? `${h}:${String(m).padStart(2, '0')}:${String(s).padStart(2, '0')}` : `${m}:${String(s).padStart(2, '0')}`;
43}
44
45async function generateChaptersAndSummary(
46 event: VodEvent,
47 transcript: TranscriptData,
48 client: Awaited<ReturnType<typeof getAnthropicClient>>
49): Promise<{ chapters: Chapter[]; summary: string }> {
50 const formattedTranscript = formatTranscriptForPrompt(transcript);
51
52 const prompt = `You are analyzing a conference talk transcript from AtmosphereConf, a conference about the AT Protocol / Bluesky ecosystem.
53
54Talk: "${event.name}"
55Speakers: ${event.speakers.join(', ') || 'Unknown'}
56Description: ${event.description || 'No description provided'}
57
58Here is the timestamped transcript:
59
60${formattedTranscript}
61
62Please provide:
63
641. **Chapters**: Break the talk into logical chapters/sections. Each chapter should have a start timestamp (in seconds), end timestamp (in seconds), and a short descriptive title. Aim for 3-8 chapters depending on talk length.
65
662. **Summary**: A concise summary of the talk (2-4 paragraphs) covering the key points, arguments, and takeaways.
67
68Respond in this exact JSON format (no markdown, just raw JSON):
69{
70 "chapters": [
71 {"start": 0, "end": 120, "title": "Introduction"},
72 {"start": 120, "end": 450, "title": "..."}
73 ],
74 "summary": "..."
75}`;
76
77 const response = await client.messages.create({
78 model: 'claude-sonnet-4-20250514',
79 max_tokens: 4096,
80 messages: [{ role: 'user', content: prompt }]
81 });
82
83 const text = response.content
84 .filter((c): c is { type: 'text'; text: string } => c.type === 'text')
85 .map((c) => c.text)
86 .join('');
87
88 try {
89 return JSON.parse(text);
90 } catch {
91 // Try to extract JSON from markdown code blocks
92 const jsonMatch = text.match(/```(?:json)?\s*([\s\S]*?)\s*```/);
93 if (jsonMatch) {
94 return JSON.parse(jsonMatch[1]);
95 }
96 throw new Error(`Failed to parse Claude response as JSON: ${text.slice(0, 200)}`);
97 }
98}
99
100async function processEvent(
101 event: VodEvent,
102 client: Awaited<ReturnType<typeof getAnthropicClient>>
103): Promise<boolean> {
104 const outFile = `${OUTPUT_DIR}/${event.rkey}.json`;
105 if (existsSync(outFile)) {
106 console.log(` ✓ already processed: ${event.rkey}`);
107 return true;
108 }
109
110 const transcriptFile = `${TRANSCRIPTS_DIR}/${event.rkey}.json`;
111 if (!existsSync(transcriptFile)) {
112 console.log(` ✗ no transcript: ${event.rkey}`);
113 return false;
114 }
115
116 const transcript: TranscriptData = JSON.parse(readFileSync(transcriptFile, 'utf-8'));
117
118 let chapters: Chapter[] = [];
119 let summary = '';
120
121 if (!skipSummary) {
122 try {
123 console.log(` ⏳ generating chapters/summary: ${event.rkey} (${event.name})...`);
124 const result = await generateChaptersAndSummary(event, transcript, client);
125 chapters = result.chapters;
126 summary = result.summary;
127 console.log(` ✓ generated: ${event.rkey}`);
128 } catch (err) {
129 console.error(` ✗ Claude failed for ${event.rkey}: ${(err as Error).message}`);
130 }
131 }
132
133 const output: VodOutput = {
134 rkey: event.rkey,
135 name: event.name,
136 speakers: event.speakers,
137 description: event.description,
138 transcript,
139 chapters,
140 summary
141 };
142
143 writeFileSync(outFile, JSON.stringify(output, null, 2));
144 return true;
145}
146
147async function main() {
148 const events: VodEvent[] = JSON.parse(readFileSync(EVENTS_FILE, 'utf-8'));
149 mkdirSync(OUTPUT_DIR, { recursive: true });
150
151 const toProcess = only ? events.filter((e) => e.rkey === only) : events;
152 console.log(`Processing ${toProcess.length} transcripts...\n`);
153
154 let client: Awaited<ReturnType<typeof getAnthropicClient>> | null = null;
155 if (!skipSummary) {
156 if (!process.env.ANTHROPIC_API_KEY) {
157 console.log('⚠️ No ANTHROPIC_API_KEY set — chapters/summaries will be skipped.\n');
158 } else {
159 client = await getAnthropicClient();
160 }
161 }
162
163 let success = 0;
164 let failed = 0;
165
166 for (const event of toProcess) {
167 if (await processEvent(event, client!)) success++;
168 else failed++;
169 }
170
171 console.log(`\nDone: ${success} processed, ${failed} failed`);
172}
173
174main();