Full document, spreadsheet, slideshow, and diagram tooling
1/**
2 * Document Diff — pure utility module for comparing TipTap JSON documents.
3 *
4 * No DOM dependencies. Uses a word-level LCS (Longest Common Subsequence)
5 * algorithm to produce inline diffs showing additions and deletions.
6 */
7
8// --- Types ---
9
10export interface DiffBlock {
11 type: 'equal' | 'insert' | 'delete';
12 content: string;
13}
14
15// eslint-disable-next-line @typescript-eslint/no-explicit-any
16type TipTapNode = Record<string, any>;
17
18// --- Text extraction ---
19
20/**
21 * Recursively extract plain text from a TipTap JSON document.
22 * Block-level nodes (paragraphs, headings, list items) are separated by newlines.
23 */
24export function extractText(doc: TipTapNode | null | undefined): string {
25 if (!doc) return '';
26
27 const blocks: string[] = [];
28
29 function walk(node: TipTapNode): void {
30 // Text leaf node
31 if (node.type === 'text' && typeof node.text === 'string') {
32 // Append to the last block (or create one)
33 if (blocks.length === 0) blocks.push('');
34 blocks[blocks.length - 1] += node.text;
35 return;
36 }
37
38 // Block-level nodes that should start a new text block
39 const blockTypes = new Set([
40 'paragraph', 'heading', 'listItem', 'taskItem',
41 'codeBlock', 'blockquote',
42 ]);
43
44 if (blockTypes.has(node.type) && blocks.length > 0 && blocks[blocks.length - 1] !== '') {
45 blocks.push('');
46 }
47
48 // Recurse into children
49 if (Array.isArray(node.content)) {
50 for (const child of node.content) {
51 walk(child);
52 }
53 }
54 }
55
56 walk(doc);
57
58 // Filter out empty trailing blocks and join with newlines
59 return blocks.filter(b => b !== '').join('\n');
60}
61
62// --- LCS-based word diff ---
63
64/**
65 * Tokenize text into words, splitting on whitespace.
66 */
67function tokenize(text: string): string[] {
68 if (!text.trim()) return [];
69 return text.split(/\s+/);
70}
71
72/**
73 * Compute the LCS (Longest Common Subsequence) table using standard O(nm) DP.
74 * Returns the DP table for backtracking.
75 */
76function lcsTable(a: string[], b: string[]): number[][] {
77 const m = a.length;
78 const n = b.length;
79
80 // Create (m+1) x (n+1) table initialized to 0
81 const dp: number[][] = [];
82 for (let i = 0; i <= m; i++) {
83 dp[i] = new Array(n + 1).fill(0);
84 }
85
86 for (let i = 1; i <= m; i++) {
87 for (let j = 1; j <= n; j++) {
88 if (a[i - 1] === b[j - 1]) {
89 dp[i]![j] = dp[i - 1]![j - 1]! + 1;
90 } else {
91 dp[i]![j] = Math.max(dp[i - 1]![j]!, dp[i]![j - 1]!);
92 }
93 }
94 }
95
96 return dp;
97}
98
99/**
100 * Backtrack through the LCS table to produce a sequence of diff operations.
101 * Returns raw (ungrouped) operations: 'equal', 'delete', 'insert'.
102 */
103function backtrack(
104 dp: number[][],
105 a: string[],
106 b: string[],
107): Array<{ type: 'equal' | 'insert' | 'delete'; word: string }> {
108 const ops: Array<{ type: 'equal' | 'insert' | 'delete'; word: string }> = [];
109 let i = a.length;
110 let j = b.length;
111
112 while (i > 0 || j > 0) {
113 if (i > 0 && j > 0 && a[i - 1] === b[j - 1]) {
114 ops.push({ type: 'equal', word: a[i - 1]! });
115 i--;
116 j--;
117 } else if (j > 0 && (i === 0 || dp[i]![j - 1]! >= dp[i - 1]![j]!)) {
118 ops.push({ type: 'insert', word: b[j - 1]! });
119 j--;
120 } else {
121 ops.push({ type: 'delete', word: a[i - 1]! });
122 i--;
123 }
124 }
125
126 return ops.reverse();
127}
128
129/**
130 * Group consecutive same-type operations into DiffBlocks.
131 * Consecutive tokens of the same type are joined with spaces.
132 */
133function groupOps(
134 ops: Array<{ type: 'equal' | 'insert' | 'delete'; word: string }>,
135): DiffBlock[] {
136 if (ops.length === 0) return [];
137
138 const blocks: DiffBlock[] = [];
139 let currentType = ops[0]!.type;
140 let currentWords: string[] = [ops[0]!.word];
141
142 for (let i = 1; i < ops.length; i++) {
143 const op = ops[i]!;
144 if (op.type === currentType) {
145 currentWords.push(op.word);
146 } else {
147 blocks.push({ type: currentType, content: currentWords.join(' ') });
148 currentType = op.type;
149 currentWords = [op.word];
150 }
151 }
152
153 blocks.push({ type: currentType, content: currentWords.join(' ') });
154 return blocks;
155}
156
157/**
158 * Compare two plain text strings at word level and produce diff blocks.
159 */
160export function diffWords(textA: string, textB: string): DiffBlock[] {
161 const wordsA = tokenize(textA);
162 const wordsB = tokenize(textB);
163
164 if (wordsA.length === 0 && wordsB.length === 0) return [];
165
166 if (wordsA.length === 0) {
167 return [{ type: 'insert', content: wordsB.join(' ') }];
168 }
169
170 if (wordsB.length === 0) {
171 return [{ type: 'delete', content: wordsA.join(' ') }];
172 }
173
174 const dp = lcsTable(wordsA, wordsB);
175 const ops = backtrack(dp, wordsA, wordsB);
176 return groupOps(ops);
177}
178
179// --- Document-level diff ---
180
181/**
182 * Compare two TipTap JSON documents and produce diff blocks.
183 * Extracts text from each document, then runs word-level diff.
184 */
185export function diffDocuments(
186 docA: TipTapNode | null | undefined,
187 docB: TipTapNode | null | undefined,
188): DiffBlock[] {
189 const textA = extractText(docA);
190 const textB = extractText(docB);
191 return diffWords(textA, textB);
192}
193
194// --- HTML rendering ---
195
196/**
197 * Escape HTML special characters to prevent XSS.
198 */
199function escapeHtml(str: string): string {
200 return str
201 .replace(/&/g, '&')
202 .replace(/</g, '<')
203 .replace(/>/g, '>')
204 .replace(/"/g, '"')
205 .replace(/'/g, ''');
206}
207
208/**
209 * Render diff blocks as styled HTML string.
210 *
211 * Each block is wrapped in a <span> with the appropriate CSS class:
212 * - .diff-equal — unchanged text
213 * - .diff-insert — added text (green)
214 * - .diff-delete — removed text (red, strikethrough)
215 *
216 * Newlines within content are converted to <br> tags.
217 */
218export function renderDiffHtml(blocks: DiffBlock[]): string {
219 if (blocks.length === 0) return '';
220
221 return blocks.map(block => {
222 const escaped = escapeHtml(block.content).replace(/\n/g, '<br>');
223 return `<span class="diff-${block.type}">${escaped}</span>`;
224 }).join(' ');
225}