The AtmosphereConf talks your skyline missed
1import type { Utterance, TranscriptSegment } from "./types";
2
3/**
4 * Strip trailing punctuation for word comparison.
5 */
6function normalize(s: string): string {
7 return s
8 .replace(/[.,!?;:'")\]]+$/, "")
9 .replace(/^['"(\[]+/, "")
10 .toLowerCase();
11}
12
13/**
14 * Split utterances into sentence-level segments with interpolated timestamps.
15 * Walks the words array sequentially, matching tokens to derive accurate timing.
16 */
17export function splitUtterances(
18 utterances: Utterance[],
19): TranscriptSegment[] {
20 const segments: TranscriptSegment[] = [];
21
22 if (!utterances || utterances.length === 0) return segments;
23
24 for (let uIdx = 0; uIdx < utterances.length; uIdx++) {
25 const utterance = utterances[uIdx];
26 const sentences = utterance.text.split(/(?<=[.!?])\s+/).filter(Boolean);
27
28 if (sentences.length === 0) continue;
29
30 // If only one sentence or no words, treat entire utterance as one segment
31 if (
32 sentences.length === 1 ||
33 !utterance.words ||
34 utterance.words.length === 0
35 ) {
36 segments.push({
37 id: `u${uIdx}-s0`,
38 speaker: utterance.speaker,
39 text: utterance.text,
40 startMs: utterance.start,
41 endMs: utterance.end,
42 });
43 continue;
44 }
45
46 let wordPtr = 0;
47 const words = utterance.words;
48
49 for (let sIdx = 0; sIdx < sentences.length; sIdx++) {
50 const sentence = sentences[sIdx];
51 const tokens = sentence.split(/\s+/).filter(Boolean);
52
53 // Try to match tokens to words sequentially
54 const startWord = wordPtr;
55
56 for (const token of tokens) {
57 if (wordPtr >= words.length) break;
58 const normalizedToken = normalize(token);
59 const normalizedWord = normalize(words[wordPtr].text);
60
61 if (
62 normalizedToken === normalizedWord ||
63 normalizedWord.startsWith(normalizedToken) ||
64 normalizedToken.startsWith(normalizedWord)
65 ) {
66 wordPtr++;
67 } else {
68 // Skip ahead up to 2 words to handle minor mismatches
69 let found = false;
70 for (
71 let skip = 1;
72 skip <= 2 && wordPtr + skip < words.length;
73 skip++
74 ) {
75 if (normalize(words[wordPtr + skip].text) === normalizedToken) {
76 wordPtr += skip + 1;
77 found = true;
78 break;
79 }
80 }
81 if (!found) {
82 wordPtr++;
83 }
84 }
85 }
86
87 // Derive timestamps from matched words
88 let startMs: number;
89 let endMs: number;
90
91 if (startWord < words.length && wordPtr > startWord) {
92 startMs = words[startWord].start;
93 endMs = words[Math.min(wordPtr - 1, words.length - 1)].end;
94 } else {
95 // Fallback: interpolate proportionally
96 const charStart = sentences.slice(0, sIdx).join(" ").length;
97 const charEnd = charStart + sentence.length;
98 const totalChars = utterance.text.length;
99 const duration = utterance.end - utterance.start;
100 startMs = utterance.start + (charStart / totalChars) * duration;
101 endMs = utterance.start + (charEnd / totalChars) * duration;
102 }
103
104 segments.push({
105 id: `u${uIdx}-s${sIdx}`,
106 speaker: utterance.speaker,
107 text: sentence,
108 startMs: Math.round(startMs),
109 endMs: Math.round(endMs),
110 });
111 }
112 }
113
114 return segments;
115}