Full document, spreadsheet, slideshow, and diagram tooling
1/**
2 * Full-Text Search Index — client-side search across decrypted documents.
3 *
4 * Pure logic module: tokenization, indexing, ranked search.
5 * Since all content is E2EE, search runs entirely in the browser
6 * over decrypted document names and content snippets.
7 */
8
9export interface SearchDocument {
10 id: string;
11 name: string;
12 type: 'doc' | 'sheet';
13 content: string;
14}
15
16export interface SearchResult {
17 docId: string;
18 name: string;
19 type: 'doc' | 'sheet';
20 score: number;
21 /** Snippet with match context */
22 snippet: string;
23 /** Positions of matches in the content */
24 matches: { field: 'name' | 'content'; start: number; end: number }[];
25}
26
27/**
28 * Tokenize text into lowercase words for indexing.
29 */
30export function tokenize(text: string): string[] {
31 if (!text) return [];
32 return text
33 .toLowerCase()
34 .replace(/[^\w\s]/g, ' ')
35 .split(/\s+/)
36 .filter(w => w.length > 0);
37}
38
39/**
40 * Strip HTML tags from content, returning plain text.
41 */
42export function stripHtml(html: string): string {
43 if (!html) return '';
44 return html
45 .replace(/<[^>]+>/g, ' ')
46 .replace(/ /g, ' ')
47 .replace(/&/g, '&')
48 .replace(/</g, '<')
49 .replace(/>/g, '>')
50 .replace(/"/g, '"')
51 .replace(/'/g, "'")
52 .replace(/\s+/g, ' ')
53 .trim();
54}
55
56/**
57 * Build an inverted index from a collection of documents.
58 * Maps each token to the set of document IDs containing it.
59 */
60export function buildIndex(
61 documents: SearchDocument[],
62): Map<string, Set<string>> {
63 const index = new Map<string, Set<string>>();
64
65 for (const doc of documents) {
66 const nameTokens = tokenize(doc.name);
67 const contentTokens = tokenize(stripHtml(doc.content));
68 const allTokens = new Set([...nameTokens, ...contentTokens]);
69
70 for (const token of allTokens) {
71 if (!index.has(token)) {
72 index.set(token, new Set());
73 }
74 index.get(token)!.add(doc.id);
75 }
76 }
77
78 return index;
79}
80
81/**
82 * Extract a snippet around the first match of a query in text.
83 * Returns ~80 chars of context around the match.
84 */
85export function extractSnippet(
86 text: string,
87 query: string,
88 contextChars = 40,
89): string {
90 if (!text || !query) return '';
91
92 const lower = text.toLowerCase();
93 const qLower = query.toLowerCase();
94 const idx = lower.indexOf(qLower);
95
96 if (idx === -1) {
97 // No exact match, return start of text
98 return text.length > contextChars * 2
99 ? text.slice(0, contextChars * 2) + '…'
100 : text;
101 }
102
103 const start = Math.max(0, idx - contextChars);
104 const end = Math.min(text.length, idx + query.length + contextChars);
105
106 let snippet = text.slice(start, end);
107 if (start > 0) snippet = '…' + snippet;
108 if (end < text.length) snippet = snippet + '…';
109
110 return snippet;
111}
112
113/**
114 * Find match positions of a query within text (case-insensitive).
115 */
116export function findMatchPositions(
117 text: string,
118 query: string,
119): { start: number; end: number }[] {
120 if (!text || !query) return [];
121
122 const positions: { start: number; end: number }[] = [];
123 const lower = text.toLowerCase();
124 const qLower = query.toLowerCase();
125
126 let pos = 0;
127 while (pos < lower.length) {
128 const idx = lower.indexOf(qLower, pos);
129 if (idx === -1) break;
130 positions.push({ start: idx, end: idx + query.length });
131 pos = idx + 1;
132 }
133
134 return positions;
135}
136
137/**
138 * Search documents by query string.
139 * Scores results by:
140 * - Name match (exact): 10 points
141 * - Name match (contains): 5 points
142 * - Content token matches: 1 point per unique token match
143 * - Content phrase match: 3 bonus points
144 */
145export function searchDocuments(
146 query: string,
147 documents: SearchDocument[],
148 index: Map<string, Set<string>>,
149): SearchResult[] {
150 if (!query || !query.trim()) return [];
151
152 const queryLower = query.toLowerCase().trim();
153 const queryTokens = tokenize(query);
154
155 if (queryTokens.length === 0) return [];
156
157 // Find candidate documents from inverted index
158 const candidates = new Map<string, number>();
159
160 for (const token of queryTokens) {
161 // Exact token match
162 const exact = index.get(token);
163 if (exact) {
164 for (const docId of exact) {
165 candidates.set(docId, (candidates.get(docId) || 0) + 1);
166 }
167 }
168
169 // Prefix match for partial words
170 for (const [indexToken, docIds] of index) {
171 if (indexToken.startsWith(token) && indexToken !== token) {
172 for (const docId of docIds) {
173 candidates.set(docId, (candidates.get(docId) || 0) + 0.5);
174 }
175 }
176 }
177 }
178
179 // Score and rank candidates
180 const results: SearchResult[] = [];
181
182 for (const [docId, tokenScore] of candidates) {
183 const doc = documents.find(d => d.id === docId);
184 if (!doc) continue;
185
186 let score = tokenScore;
187 const matches: SearchResult['matches'] = [];
188
189 const nameLower = doc.name.toLowerCase();
190 const plainContent = stripHtml(doc.content);
191
192 // Name scoring
193 if (nameLower === queryLower) {
194 score += 10;
195 } else if (nameLower.includes(queryLower)) {
196 score += 5;
197 }
198
199 // Name match positions
200 const namePositions = findMatchPositions(doc.name, query);
201 for (const pos of namePositions) {
202 matches.push({ field: 'name', ...pos });
203 }
204
205 // Content phrase match bonus
206 if (plainContent.toLowerCase().includes(queryLower)) {
207 score += 3;
208 }
209
210 // Content match positions
211 const contentPositions = findMatchPositions(plainContent, query);
212 for (const pos of contentPositions) {
213 matches.push({ field: 'content', ...pos });
214 }
215
216 const snippet = extractSnippet(plainContent, query);
217
218 results.push({
219 docId: doc.id,
220 name: doc.name,
221 type: doc.type,
222 score,
223 snippet,
224 matches,
225 });
226 }
227
228 // Sort by score descending, then by name
229 results.sort((a, b) => {
230 if (b.score !== a.score) return b.score - a.score;
231 return a.name.localeCompare(b.name);
232 });
233
234 return results;
235}