src/lib/transcript.ts at main · chaosgreml.in/understory

chaosgreml.in / understory
fork
The AtmosphereConf talks your skyline missed
fork
understory / src / lib / transcript.ts
at main 115 lines 3.4 kB view raw
wrap content
Bryan Guffey feat: add talk page with video, transcript, and search 3w ago
673c4169
  1import type { Utterance, TranscriptSegment } from "./types";
  2
  3/**
  4 * Strip trailing punctuation for word comparison.
  5 */
  6function normalize(s: string): string {
  7  return s
  8    .replace(/[.,!?;:'")\]]+$/, "")
  9    .replace(/^['"(\[]+/, "")
 10    .toLowerCase();
 11}
 12
 13/**
 14 * Split utterances into sentence-level segments with interpolated timestamps.
 15 * Walks the words array sequentially, matching tokens to derive accurate timing.
 16 */
 17export function splitUtterances(
 18  utterances: Utterance[],
 19): TranscriptSegment[] {
 20  const segments: TranscriptSegment[] = [];
 21
 22  if (!utterances || utterances.length === 0) return segments;
 23
 24  for (let uIdx = 0; uIdx < utterances.length; uIdx++) {
 25    const utterance = utterances[uIdx];
 26    const sentences = utterance.text.split(/(?<=[.!?])\s+/).filter(Boolean);
 27
 28    if (sentences.length === 0) continue;
 29
 30    // If only one sentence or no words, treat entire utterance as one segment
 31    if (
 32      sentences.length === 1 ||
 33      !utterance.words ||
 34      utterance.words.length === 0
 35    ) {
 36      segments.push({
 37        id: `u${uIdx}-s0`,
 38        speaker: utterance.speaker,
 39        text: utterance.text,
 40        startMs: utterance.start,
 41        endMs: utterance.end,
 42      });
 43      continue;
 44    }
 45
 46    let wordPtr = 0;
 47    const words = utterance.words;
 48
 49    for (let sIdx = 0; sIdx < sentences.length; sIdx++) {
 50      const sentence = sentences[sIdx];
 51      const tokens = sentence.split(/\s+/).filter(Boolean);
 52
 53      // Try to match tokens to words sequentially
 54      const startWord = wordPtr;
 55
 56      for (const token of tokens) {
 57        if (wordPtr >= words.length) break;
 58        const normalizedToken = normalize(token);
 59        const normalizedWord = normalize(words[wordPtr].text);
 60
 61        if (
 62          normalizedToken === normalizedWord ||
 63          normalizedWord.startsWith(normalizedToken) ||
 64          normalizedToken.startsWith(normalizedWord)
 65        ) {
 66          wordPtr++;
 67        } else {
 68          // Skip ahead up to 2 words to handle minor mismatches
 69          let found = false;
 70          for (
 71            let skip = 1;
 72            skip <= 2 && wordPtr + skip < words.length;
 73            skip++
 74          ) {
 75            if (normalize(words[wordPtr + skip].text) === normalizedToken) {
 76              wordPtr += skip + 1;
 77              found = true;
 78              break;
 79            }
 80          }
 81          if (!found) {
 82            wordPtr++;
 83          }
 84        }
 85      }
 86
 87      // Derive timestamps from matched words
 88      let startMs: number;
 89      let endMs: number;
 90
 91      if (startWord < words.length && wordPtr > startWord) {
 92        startMs = words[startWord].start;
 93        endMs = words[Math.min(wordPtr - 1, words.length - 1)].end;
 94      } else {
 95        // Fallback: interpolate proportionally
 96        const charStart = sentences.slice(0, sIdx).join(" ").length;
 97        const charEnd = charStart + sentence.length;
 98        const totalChars = utterance.text.length;
 99        const duration = utterance.end - utterance.start;
100        startMs = utterance.start + (charStart / totalChars) * duration;
101        endMs = utterance.start + (charEnd / totalChars) * duration;
102      }
103
104      segments.push({
105        id: `u${uIdx}-s${sIdx}`,
106        speaker: utterance.speaker,
107        text: sentence,
108        startMs: Math.round(startMs),
109        endMs: Math.round(endMs),
110      });
111    }
112  }
113
114  return segments;
115}
Configure Feed

Configure Feed