Full document, spreadsheet, slideshow, and diagram tooling
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

at main 235 lines 5.9 kB view raw
1/** 2 * Full-Text Search Index — client-side search across decrypted documents. 3 * 4 * Pure logic module: tokenization, indexing, ranked search. 5 * Since all content is E2EE, search runs entirely in the browser 6 * over decrypted document names and content snippets. 7 */ 8 9export interface SearchDocument { 10 id: string; 11 name: string; 12 type: 'doc' | 'sheet'; 13 content: string; 14} 15 16export interface SearchResult { 17 docId: string; 18 name: string; 19 type: 'doc' | 'sheet'; 20 score: number; 21 /** Snippet with match context */ 22 snippet: string; 23 /** Positions of matches in the content */ 24 matches: { field: 'name' | 'content'; start: number; end: number }[]; 25} 26 27/** 28 * Tokenize text into lowercase words for indexing. 29 */ 30export function tokenize(text: string): string[] { 31 if (!text) return []; 32 return text 33 .toLowerCase() 34 .replace(/[^\w\s]/g, ' ') 35 .split(/\s+/) 36 .filter(w => w.length > 0); 37} 38 39/** 40 * Strip HTML tags from content, returning plain text. 41 */ 42export function stripHtml(html: string): string { 43 if (!html) return ''; 44 return html 45 .replace(/<[^>]+>/g, ' ') 46 .replace(/&nbsp;/g, ' ') 47 .replace(/&amp;/g, '&') 48 .replace(/&lt;/g, '<') 49 .replace(/&gt;/g, '>') 50 .replace(/&quot;/g, '"') 51 .replace(/&#39;/g, "'") 52 .replace(/\s+/g, ' ') 53 .trim(); 54} 55 56/** 57 * Build an inverted index from a collection of documents. 58 * Maps each token to the set of document IDs containing it. 59 */ 60export function buildIndex( 61 documents: SearchDocument[], 62): Map<string, Set<string>> { 63 const index = new Map<string, Set<string>>(); 64 65 for (const doc of documents) { 66 const nameTokens = tokenize(doc.name); 67 const contentTokens = tokenize(stripHtml(doc.content)); 68 const allTokens = new Set([...nameTokens, ...contentTokens]); 69 70 for (const token of allTokens) { 71 if (!index.has(token)) { 72 index.set(token, new Set()); 73 } 74 index.get(token)!.add(doc.id); 75 } 76 } 77 78 return index; 79} 80 81/** 82 * Extract a snippet around the first match of a query in text. 83 * Returns ~80 chars of context around the match. 84 */ 85export function extractSnippet( 86 text: string, 87 query: string, 88 contextChars = 40, 89): string { 90 if (!text || !query) return ''; 91 92 const lower = text.toLowerCase(); 93 const qLower = query.toLowerCase(); 94 const idx = lower.indexOf(qLower); 95 96 if (idx === -1) { 97 // No exact match, return start of text 98 return text.length > contextChars * 2 99 ? text.slice(0, contextChars * 2) + '…' 100 : text; 101 } 102 103 const start = Math.max(0, idx - contextChars); 104 const end = Math.min(text.length, idx + query.length + contextChars); 105 106 let snippet = text.slice(start, end); 107 if (start > 0) snippet = '…' + snippet; 108 if (end < text.length) snippet = snippet + '…'; 109 110 return snippet; 111} 112 113/** 114 * Find match positions of a query within text (case-insensitive). 115 */ 116export function findMatchPositions( 117 text: string, 118 query: string, 119): { start: number; end: number }[] { 120 if (!text || !query) return []; 121 122 const positions: { start: number; end: number }[] = []; 123 const lower = text.toLowerCase(); 124 const qLower = query.toLowerCase(); 125 126 let pos = 0; 127 while (pos < lower.length) { 128 const idx = lower.indexOf(qLower, pos); 129 if (idx === -1) break; 130 positions.push({ start: idx, end: idx + query.length }); 131 pos = idx + 1; 132 } 133 134 return positions; 135} 136 137/** 138 * Search documents by query string. 139 * Scores results by: 140 * - Name match (exact): 10 points 141 * - Name match (contains): 5 points 142 * - Content token matches: 1 point per unique token match 143 * - Content phrase match: 3 bonus points 144 */ 145export function searchDocuments( 146 query: string, 147 documents: SearchDocument[], 148 index: Map<string, Set<string>>, 149): SearchResult[] { 150 if (!query || !query.trim()) return []; 151 152 const queryLower = query.toLowerCase().trim(); 153 const queryTokens = tokenize(query); 154 155 if (queryTokens.length === 0) return []; 156 157 // Find candidate documents from inverted index 158 const candidates = new Map<string, number>(); 159 160 for (const token of queryTokens) { 161 // Exact token match 162 const exact = index.get(token); 163 if (exact) { 164 for (const docId of exact) { 165 candidates.set(docId, (candidates.get(docId) || 0) + 1); 166 } 167 } 168 169 // Prefix match for partial words 170 for (const [indexToken, docIds] of index) { 171 if (indexToken.startsWith(token) && indexToken !== token) { 172 for (const docId of docIds) { 173 candidates.set(docId, (candidates.get(docId) || 0) + 0.5); 174 } 175 } 176 } 177 } 178 179 // Score and rank candidates 180 const results: SearchResult[] = []; 181 182 for (const [docId, tokenScore] of candidates) { 183 const doc = documents.find(d => d.id === docId); 184 if (!doc) continue; 185 186 let score = tokenScore; 187 const matches: SearchResult['matches'] = []; 188 189 const nameLower = doc.name.toLowerCase(); 190 const plainContent = stripHtml(doc.content); 191 192 // Name scoring 193 if (nameLower === queryLower) { 194 score += 10; 195 } else if (nameLower.includes(queryLower)) { 196 score += 5; 197 } 198 199 // Name match positions 200 const namePositions = findMatchPositions(doc.name, query); 201 for (const pos of namePositions) { 202 matches.push({ field: 'name', ...pos }); 203 } 204 205 // Content phrase match bonus 206 if (plainContent.toLowerCase().includes(queryLower)) { 207 score += 3; 208 } 209 210 // Content match positions 211 const contentPositions = findMatchPositions(plainContent, query); 212 for (const pos of contentPositions) { 213 matches.push({ field: 'content', ...pos }); 214 } 215 216 const snippet = extractSnippet(plainContent, query); 217 218 results.push({ 219 docId: doc.id, 220 name: doc.name, 221 type: doc.type, 222 score, 223 snippet, 224 matches, 225 }); 226 } 227 228 // Sort by score descending, then by name 229 results.sort((a, b) => { 230 if (b.score !== a.score) return b.score - a.score; 231 return a.name.localeCompare(b.name); 232 }); 233 234 return results; 235}