Monorepo for Aesthetic.Computer aesthetic.computer
4
fork

Configure Feed

Select the types of activity you want to include in your feed.

say: add Jeffrey PVC voice provider + browsable utterance cache

New provider: `say:jeffrey hello` hits the backend with Jeffrey's
Professional Voice Clone (ElevenLabs voice dYNGZ848Oo6DtNBoeqgh, same
one used in the LACMA 2026 grant video). `say:jeffrey:scream` routes
to the screaming voice settings.

Cache layout changes (all still under art.aesthetic.computer/tts-cache/):
- Jeffrey utterances land in their own subfolder
`tts-cache/jeffrey/<sha256>.mp3` so they're easy to list and audit.
- Every cached object now carries the original text + provider + voice
+ timestamp as S3 user metadata, so browsing a HeadObject response
tells you exactly what was said and when. Values are ASCII-trimmed
and newline-stripped to satisfy S3 header rules.

Frontend (say.mjs):
- `jeffrey` added as a colon option (alongside google/openai/eleven)
- Provider indicator now renders in magenta when jeffrey is active
- Help line updated; README call-out documents the cache structure

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

+91 -11
+69 -5
system/netlify/functions/say.js
··· 29 29 const CACHE_PREFIX = "tts-cache/"; 30 30 31 31 // Generate cache key from provider + voice + text + instructions 32 + // Custom voice clones (jeffrey) get their own subfolder so all utterances 33 + // from that voice are easy to list / browse on the CDN. 32 34 function getCacheKey(provider, voiceId, text, instructions) { 33 35 const parts = `${provider}:${voiceId}:${text}${instructions ? `:${instructions}` : ""}`; 34 36 const hash = crypto.createHash("sha256").update(parts).digest("hex"); 35 - return `${CACHE_PREFIX}${hash}.mp3`; 37 + const subfolder = provider === "jeffrey" ? "jeffrey/" : ""; 38 + return `${CACHE_PREFIX}${subfolder}${hash}.mp3`; 36 39 } 37 40 38 41 // Check if cached audio exists, return CDN URL if so ··· 49 52 } 50 53 } 51 54 52 - // Save audio to cache 53 - async function saveToCache(key, audioBuffer) { 55 + // Save audio to cache. Optional `metadata` is persisted as S3 user metadata 56 + // so we can browse utterances later (e.g. HeadObject → x-amz-meta-text). 57 + async function saveToCache(key, audioBuffer, metadata = {}) { 54 58 try { 59 + // S3 user metadata must be ASCII and each header is usually capped 60 + // around 2 KB; trim text + encode non-ASCII defensively. 61 + const cleanMeta = {}; 62 + for (const [k, v] of Object.entries(metadata)) { 63 + if (v == null) continue; 64 + const str = String(v).slice(0, 1800); 65 + // Keep values ASCII-safe (S3 rejects high-unicode metadata headers). 66 + cleanMeta[k] = Buffer.from(str, "utf8").toString("ascii").replace(/[\r\n]/g, " "); 67 + } 68 + 55 69 await s3.send(new PutObjectCommand({ 56 70 Bucket: BUCKET, 57 71 Key: key, ··· 59 73 ContentType: "audio/mpeg", 60 74 ACL: "public-read", 61 75 CacheControl: "public, max-age=31536000", // 1 year (audio doesn't change) 76 + Metadata: cleanMeta, 62 77 })); 63 78 console.log(`✅ Cached TTS: ${CDN_URL}/${key}`); 64 79 return `${CDN_URL}/${key}`; ··· 147 162 }; 148 163 } 149 164 165 + // ── Jeffrey: Professional Voice Clone (PVC) ────────────────────────── 166 + // Trained on multiple public lectures/talks by @jeffrey. Same voice 167 + // used in the LACMA 2026 grant pitch video. 168 + // Usage from the piece: `say:jeffrey hello world` 169 + const JEFFREY_VOICE_ID = "dYNGZ848Oo6DtNBoeqgh"; 170 + 171 + async function generateJeffrey(text, scream) { 172 + // Calmer, more natural delivery than the premade "scream" preset. 173 + // Same knobs as the grant-video pipeline for homogeneity. 174 + const voiceSettings = scream 175 + ? { stability: 0.2, similarity_boost: 0.85, style: 0.9, use_speaker_boost: true } 176 + : { stability: 0.65, similarity_boost: 0.9, style: 0.15, use_speaker_boost: true }; 177 + 178 + const response = await fetch( 179 + `https://api.elevenlabs.io/v1/text-to-speech/${JEFFREY_VOICE_ID}`, 180 + { 181 + method: "POST", 182 + headers: { 183 + "xi-api-key": process.env.ELEVENLABS_API_KEY, 184 + "Content-Type": "application/json", 185 + }, 186 + body: JSON.stringify({ 187 + text, 188 + model_id: "eleven_multilingual_v2", 189 + voice_settings: voiceSettings, 190 + }), 191 + }, 192 + ); 193 + 194 + if (!response.ok) { 195 + const err = await response.text(); 196 + throw new Error(`ElevenLabs (Jeffrey) API error ${response.status}: ${err}`); 197 + } 198 + 199 + return { 200 + buffer: Buffer.from(await response.arrayBuffer()), 201 + voiceId: "jeffrey-pvc", 202 + }; 203 + } 204 + 150 205 // Generate audio with Google Cloud TTS 151 206 async function generateGoogle(text, gender, set, isSSML) { 152 207 // Fetch GCP key from URL ··· 271 326 result = await generateGoogle(text, gender, set, isSSML); 272 327 } else if (provider === "eleven") { 273 328 result = await generateElevenLabs(text, gender, set, scream); 329 + } else if (provider === "jeffrey") { 330 + result = await generateJeffrey(text, scream); 274 331 } else { 275 332 result = await generateOpenAI(text, gender, set, instructions); 276 333 } ··· 287 344 288 345 console.log(`🗣️ Generated with ${provider}: ${voiceId}`); 289 346 290 - // Cache for next time 291 - const cdnUrl = await saveToCache(cacheKey, audioBuffer); 347 + // Cache for next time. Attach the original text + voice as S3 metadata 348 + // so individual objects are self-describing when you browse them. 349 + const cdnUrl = await saveToCache(cacheKey, audioBuffer, { 350 + text, 351 + provider, 352 + voice: voiceId, 353 + scream: scream ? "1" : "0", 354 + ts: new Date().toISOString(), 355 + }); 292 356 293 357 if (cdnUrl) { 294 358 return {
+22 -6
system/public/aesthetic.computer/disks/say.mjs
··· 2 2 // A simple test piece for the TTS API. 3 3 // Type a word or phrase after `say` to hear it spoken. 4 4 5 - /* #region 📚 README 5 + /* #region 📚 README 6 6 Usage: say hello 7 7 say how are you today 8 8 say:male hi there 9 9 say:female good morning 10 10 say:google hello (use Google TTS) 11 11 say:google:female hi there 12 + say:jeffrey hello (Jeffrey PVC — @jeffrey's voice clone) 13 + say:jeffrey:scream AHHH (screaming variant) 14 + 15 + All utterances are cached to the art.aesthetic.computer CDN under 16 + `tts-cache/` (jeffrey gets its own subfolder `tts-cache/jeffrey/`). 17 + Each cached MP3 has the original text + voice stamped as S3 metadata, 18 + so you can rehydrate the catalog later by listing the bucket. 12 19 #endregion */ 13 20 14 21 let text = ""; ··· 34 41 if (part === "google") provider = "google"; 35 42 else if (part === "openai") provider = "openai"; 36 43 else if (part === "eleven") provider = "eleven"; 44 + else if (part === "jeffrey") provider = "jeffrey"; 37 45 else if (part === "male") gender = "male"; 38 46 else if (part === "female") gender = "female"; 39 47 else if (part === "scream") { 40 48 scream = true; 41 49 if (provider === "openai") { 42 50 instructions = "Deliver this as a blood-curdling scream. Shriek at the absolute top of your lungs with your voice cracking. Pure primal rage. Do NOT speak normally — only scream, raw and unhinged."; 43 - } else if (provider !== "eleven") { 44 - provider = "eleven"; // Default scream to ElevenLabs 51 + } else if (provider !== "eleven" && provider !== "jeffrey") { 52 + provider = "eleven"; // Default scream to ElevenLabs premade voices 45 53 } 46 54 } 47 55 } ··· 56 64 // Note: Top-left corner is reserved for prompt HUD label 57 65 58 66 // Provider indicator (below HUD area) 59 - const providerColor = scream ? "red" : provider === "eleven" ? "orange" : provider === "google" ? "cyan" : "lime"; 67 + const providerColor = scream 68 + ? "red" 69 + : provider === "jeffrey" 70 + ? "magenta" 71 + : provider === "eleven" 72 + ? "orange" 73 + : provider === "google" 74 + ? "cyan" 75 + : "lime"; 60 76 const providerLabel = scream ? `[${provider} SCREAM]` : `[${provider}]`; 61 77 ink(providerColor).write(providerLabel, { x: 6, y: 18 }); 62 - 78 + 63 79 // Instructions 64 80 ink("gray").write("say <words>", { x: 6, y: 32 }); 65 - ink("gray").write("say:google or say:male", { x: 6, y: 44 }); 81 + ink("gray").write("say:jeffrey · say:google · say:male", { x: 6, y: 44 }); 66 82 67 83 // Current text 68 84 if (text) {