/** * Document Diff — pure utility module for comparing TipTap JSON documents. * * No DOM dependencies. Uses a word-level LCS (Longest Common Subsequence) * algorithm to produce inline diffs showing additions and deletions. */ // --- Types --- export interface DiffBlock { type: 'equal' | 'insert' | 'delete'; content: string; } // eslint-disable-next-line @typescript-eslint/no-explicit-any type TipTapNode = Record; // --- Text extraction --- /** * Recursively extract plain text from a TipTap JSON document. * Block-level nodes (paragraphs, headings, list items) are separated by newlines. */ export function extractText(doc: TipTapNode | null | undefined): string { if (!doc) return ''; const blocks: string[] = []; function walk(node: TipTapNode): void { // Text leaf node if (node.type === 'text' && typeof node.text === 'string') { // Append to the last block (or create one) if (blocks.length === 0) blocks.push(''); blocks[blocks.length - 1] += node.text; return; } // Block-level nodes that should start a new text block const blockTypes = new Set([ 'paragraph', 'heading', 'listItem', 'taskItem', 'codeBlock', 'blockquote', ]); if (blockTypes.has(node.type) && blocks.length > 0 && blocks[blocks.length - 1] !== '') { blocks.push(''); } // Recurse into children if (Array.isArray(node.content)) { for (const child of node.content) { walk(child); } } } walk(doc); // Filter out empty trailing blocks and join with newlines return blocks.filter(b => b !== '').join('\n'); } // --- LCS-based word diff --- /** * Tokenize text into words, splitting on whitespace. */ function tokenize(text: string): string[] { if (!text.trim()) return []; return text.split(/\s+/); } /** * Compute the LCS (Longest Common Subsequence) table using standard O(nm) DP. * Returns the DP table for backtracking. */ function lcsTable(a: string[], b: string[]): number[][] { const m = a.length; const n = b.length; // Create (m+1) x (n+1) table initialized to 0 const dp: number[][] = []; for (let i = 0; i <= m; i++) { dp[i] = new Array(n + 1).fill(0); } for (let i = 1; i <= m; i++) { for (let j = 1; j <= n; j++) { if (a[i - 1] === b[j - 1]) { dp[i]![j] = dp[i - 1]![j - 1]! + 1; } else { dp[i]![j] = Math.max(dp[i - 1]![j]!, dp[i]![j - 1]!); } } } return dp; } /** * Backtrack through the LCS table to produce a sequence of diff operations. * Returns raw (ungrouped) operations: 'equal', 'delete', 'insert'. */ function backtrack( dp: number[][], a: string[], b: string[], ): Array<{ type: 'equal' | 'insert' | 'delete'; word: string }> { const ops: Array<{ type: 'equal' | 'insert' | 'delete'; word: string }> = []; let i = a.length; let j = b.length; while (i > 0 || j > 0) { if (i > 0 && j > 0 && a[i - 1] === b[j - 1]) { ops.push({ type: 'equal', word: a[i - 1]! }); i--; j--; } else if (j > 0 && (i === 0 || dp[i]![j - 1]! >= dp[i - 1]![j]!)) { ops.push({ type: 'insert', word: b[j - 1]! }); j--; } else { ops.push({ type: 'delete', word: a[i - 1]! }); i--; } } return ops.reverse(); } /** * Group consecutive same-type operations into DiffBlocks. * Consecutive tokens of the same type are joined with spaces. */ function groupOps( ops: Array<{ type: 'equal' | 'insert' | 'delete'; word: string }>, ): DiffBlock[] { if (ops.length === 0) return []; const blocks: DiffBlock[] = []; let currentType = ops[0]!.type; let currentWords: string[] = [ops[0]!.word]; for (let i = 1; i < ops.length; i++) { const op = ops[i]!; if (op.type === currentType) { currentWords.push(op.word); } else { blocks.push({ type: currentType, content: currentWords.join(' ') }); currentType = op.type; currentWords = [op.word]; } } blocks.push({ type: currentType, content: currentWords.join(' ') }); return blocks; } /** * Compare two plain text strings at word level and produce diff blocks. */ export function diffWords(textA: string, textB: string): DiffBlock[] { const wordsA = tokenize(textA); const wordsB = tokenize(textB); if (wordsA.length === 0 && wordsB.length === 0) return []; if (wordsA.length === 0) { return [{ type: 'insert', content: wordsB.join(' ') }]; } if (wordsB.length === 0) { return [{ type: 'delete', content: wordsA.join(' ') }]; } const dp = lcsTable(wordsA, wordsB); const ops = backtrack(dp, wordsA, wordsB); return groupOps(ops); } // --- Document-level diff --- /** * Compare two TipTap JSON documents and produce diff blocks. * Extracts text from each document, then runs word-level diff. */ export function diffDocuments( docA: TipTapNode | null | undefined, docB: TipTapNode | null | undefined, ): DiffBlock[] { const textA = extractText(docA); const textB = extractText(docB); return diffWords(textA, textB); } // --- HTML rendering --- /** * Escape HTML special characters to prevent XSS. */ function escapeHtml(str: string): string { return str .replace(/&/g, '&') .replace(//g, '>') .replace(/"/g, '"') .replace(/'/g, '''); } /** * Render diff blocks as styled HTML string. * * Each block is wrapped in a with the appropriate CSS class: * - .diff-equal — unchanged text * - .diff-insert — added text (green) * - .diff-delete — removed text (red, strikethrough) * * Newlines within content are converted to
tags. */ export function renderDiffHtml(blocks: DiffBlock[]): string { if (blocks.length === 0) return ''; return blocks.map(block => { const escaped = escapeHtml(block.content).replace(/\n/g, '
'); return `${escaped}`; }).join(' '); }