add entity types, publication search, and link previews · zzstoatzz.io/leaflet-search@27d7c7b

+140 -50

backend/src/db.zig

··· 61 61 , &.{}); 62 62 63 63 _ = try execSql( 64 + \\CREATE VIRTUAL TABLE IF NOT EXISTS publications_fts USING fts5( 65 + \\ uri UNINDEXED, 66 + \\ name, 67 + \\ description 68 + \\) 69 + , &.{}); 70 + 71 + _ = try execSql( 64 72 \\CREATE TABLE IF NOT EXISTS document_tags ( 65 73 \\ document_uri TEXT NOT NULL, 66 74 \\ tag TEXT NOT NULL, ··· 120 128 "INSERT OR REPLACE INTO publications (uri, did, rkey, name, description, base_path) VALUES (?, ?, ?, ?, ?, ?)", 121 129 &.{ uri, did, rkey, name, description orelse "", base_path orelse "" }, 122 130 ); 131 + 132 + // update FTS index - delete old entry first, then insert new 133 + _ = execSql("DELETE FROM publications_fts WHERE uri = ?", &.{uri}) catch |err| { 134 + std.debug.print("delete publication FTS error for {s}: {}\n", .{ uri, err }); 135 + }; 136 + 137 + _ = execSql( 138 + "INSERT INTO publications_fts (uri, name, description) VALUES (?, ?, ?)", 139 + &.{ uri, name, description orelse "" }, 140 + ) catch |err| { 141 + std.debug.print("insert publication FTS error for {s}: {}\n", .{ uri, err }); 142 + }; 123 143 } 124 144 125 145 pub fn deleteDocument(uri: []const u8) void { ··· 135 155 _ = execSql("DELETE FROM publications WHERE uri = ?", &.{uri}) catch |err| { 136 156 std.debug.print("delete publication error for {s}: {}\n", .{ uri, err }); 137 157 }; 158 + _ = execSql("DELETE FROM publications_fts WHERE uri = ?", &.{uri}) catch |err| { 159 + std.debug.print("delete publication FTS error for {s}: {}\n", .{ uri, err }); 160 + }; 138 161 } 139 162 140 - // column indices for search query results 141 - const SearchCol = struct { 163 + // column indices for document search query results 164 + const DocSearchCol = struct { 142 165 const uri = 0; 143 166 const did = 1; 144 167 const title = 2; ··· 146 169 const created_at = 4; 147 170 const rkey = 5; 148 171 const base_path = 6; 149 - const count = 7; 172 + const has_publication = 7; 173 + const count = 8; 150 174 }; 151 175 152 - pub fn searchDocuments(alloc: Allocator, query: []const u8, tag_filter: ?[]const u8) ![]const u8 { 176 + // column indices for publication search query results 177 + const PubSearchCol = struct { 178 + const uri = 0; 179 + const did = 1; 180 + const name = 2; 181 + const snippet = 3; 182 + const rkey = 4; 183 + const base_path = 5; 184 + const count = 6; 185 + }; 186 + 187 + pub fn search(alloc: Allocator, query: []const u8, tag_filter: ?[]const u8) ![]const u8 { 153 188 var output: std.Io.Writer.Allocating = .init(alloc); 154 189 errdefer output.deinit(); 155 190 ··· 161 196 if (c.* == '.') c.* = ' '; 162 197 } 163 198 164 - // build query based on whether we have a tag filter 165 - const result = if (tag_filter) |tag| 199 + var jw: json.Stringify = .{ .writer = &output.writer }; 200 + try jw.beginArray(); 201 + 202 + // search documents (articles and looseleafs) 203 + const doc_result = if (tag_filter) |tag| 166 204 execSql( 167 205 \\SELECT f.uri, d.did, d.title, 168 206 \\ snippet(documents_fts, 2, '<mark>', '</mark>', '...', 32) as snippet, 169 - \\ d.created_at, d.rkey, p.base_path 207 + \\ d.created_at, d.rkey, p.base_path, 208 + \\ CASE WHEN d.publication_uri != '' THEN 1 ELSE 0 END as has_publication 170 209 \\FROM documents_fts f 171 210 \\JOIN documents d ON f.uri = d.uri 172 211 \\LEFT JOIN publications p ON d.publication_uri = p.uri 173 212 \\JOIN document_tags dt ON d.uri = dt.document_uri 174 213 \\WHERE documents_fts MATCH ? AND dt.tag = ? 175 - \\ORDER BY rank LIMIT 50 176 - , &.{ normalized_query, tag }) catch { 177 - try output.writer.writeAll("[]"); 178 - return try output.toOwnedSlice(); 179 - } 214 + \\ORDER BY rank LIMIT 40 215 + , &.{ normalized_query, tag }) catch null 180 216 else 181 217 execSql( 182 218 \\SELECT f.uri, d.did, d.title, 183 219 \\ snippet(documents_fts, 2, '<mark>', '</mark>', '...', 32) as snippet, 184 - \\ d.created_at, d.rkey, p.base_path 220 + \\ d.created_at, d.rkey, p.base_path, 221 + \\ CASE WHEN d.publication_uri != '' THEN 1 ELSE 0 END as has_publication 185 222 \\FROM documents_fts f 186 223 \\JOIN documents d ON f.uri = d.uri 187 224 \\LEFT JOIN publications p ON d.publication_uri = p.uri 188 225 \\WHERE documents_fts MATCH ? 189 - \\ORDER BY rank LIMIT 50 190 - , &.{normalized_query}) catch { 191 - try output.writer.writeAll("[]"); 192 - return try output.toOwnedSlice(); 193 - }; 194 - defer temp_alloc.free(result); 226 + \\ORDER BY rank LIMIT 40 227 + , &.{normalized_query}) catch null; 195 228 196 - // parse JSON response - keep parsed alive while iterating rows 197 - const parsed = json.parseFromSlice(json.Value, temp_alloc, result, .{}) catch { 198 - try output.writer.writeAll("[]"); 199 - return try output.toOwnedSlice(); 200 - }; 201 - defer parsed.deinit(); 229 + if (doc_result) |result| { 230 + defer temp_alloc.free(result); 231 + if (json.parseFromSlice(json.Value, temp_alloc, result, .{})) |parsed| { 232 + defer parsed.deinit(); 233 + if (getRowsFromParsed(parsed.value)) |rows| { 234 + for (rows.items) |row| { 235 + if (row != .array or row.array.items.len < DocSearchCol.count) continue; 236 + const cols = row.array.items; 202 237 203 - const rows = getRowsFromParsed(parsed.value) orelse { 204 - try output.writer.writeAll("[]"); 205 - return try output.toOwnedSlice(); 206 - }; 238 + // determine entity type: article (has publication) or looseleaf (no publication) 239 + const has_pub = extractInt(cols[DocSearchCol.has_publication]) != 0; 240 + const entity_type = if (has_pub) "article" else "looseleaf"; 207 241 208 - var jw: json.Stringify = .{ .writer = &output.writer }; 209 - try jw.beginArray(); 242 + try jw.beginObject(); 243 + try jw.objectField("type"); 244 + try jw.write(entity_type); 245 + try jw.objectField("uri"); 246 + try jw.write(extractText(cols[DocSearchCol.uri])); 247 + try jw.objectField("did"); 248 + try jw.write(extractText(cols[DocSearchCol.did])); 249 + try jw.objectField("title"); 250 + try jw.write(extractText(cols[DocSearchCol.title])); 251 + try jw.objectField("snippet"); 252 + try jw.write(extractText(cols[DocSearchCol.snippet])); 253 + try jw.objectField("createdAt"); 254 + try jw.write(extractText(cols[DocSearchCol.created_at])); 255 + try jw.objectField("rkey"); 256 + try jw.write(extractText(cols[DocSearchCol.rkey])); 257 + try jw.objectField("basePath"); 258 + try jw.write(extractText(cols[DocSearchCol.base_path])); 259 + try jw.endObject(); 260 + } 261 + } 262 + } else |_| {} 263 + } 210 264 211 - for (rows.items) |row| { 212 - if (row != .array or row.array.items.len < SearchCol.count) continue; 213 - const cols = row.array.items; 265 + // search publications (only if no tag filter - publications don't have tags) 266 + if (tag_filter == null) { 267 + const pub_result = execSql( 268 + \\SELECT f.uri, p.did, p.name, 269 + \\ snippet(publications_fts, 2, '<mark>', '</mark>', '...', 32) as snippet, 270 + \\ p.rkey, p.base_path 271 + \\FROM publications_fts f 272 + \\JOIN publications p ON f.uri = p.uri 273 + \\WHERE publications_fts MATCH ? 274 + \\ORDER BY rank LIMIT 10 275 + , &.{normalized_query}) catch null; 214 276 215 - try jw.beginObject(); 216 - try jw.objectField("uri"); 217 - try jw.write(extractText(cols[SearchCol.uri])); 218 - try jw.objectField("did"); 219 - try jw.write(extractText(cols[SearchCol.did])); 220 - try jw.objectField("title"); 221 - try jw.write(extractText(cols[SearchCol.title])); 222 - try jw.objectField("snippet"); 223 - try jw.write(extractText(cols[SearchCol.snippet])); 224 - try jw.objectField("createdAt"); 225 - try jw.write(extractText(cols[SearchCol.created_at])); 226 - try jw.objectField("rkey"); 227 - try jw.write(extractText(cols[SearchCol.rkey])); 228 - try jw.objectField("basePath"); 229 - try jw.write(extractText(cols[SearchCol.base_path])); 230 - try jw.endObject(); 277 + if (pub_result) |result| { 278 + defer temp_alloc.free(result); 279 + if (json.parseFromSlice(json.Value, temp_alloc, result, .{})) |parsed| { 280 + defer parsed.deinit(); 281 + if (getRowsFromParsed(parsed.value)) |rows| { 282 + for (rows.items) |row| { 283 + if (row != .array or row.array.items.len < PubSearchCol.count) continue; 284 + const cols = row.array.items; 285 + 286 + try jw.beginObject(); 287 + try jw.objectField("type"); 288 + try jw.write("publication"); 289 + try jw.objectField("uri"); 290 + try jw.write(extractText(cols[PubSearchCol.uri])); 291 + try jw.objectField("did"); 292 + try jw.write(extractText(cols[PubSearchCol.did])); 293 + try jw.objectField("title"); 294 + try jw.write(extractText(cols[PubSearchCol.name])); 295 + try jw.objectField("snippet"); 296 + try jw.write(extractText(cols[PubSearchCol.snippet])); 297 + try jw.objectField("rkey"); 298 + try jw.write(extractText(cols[PubSearchCol.rkey])); 299 + try jw.objectField("basePath"); 300 + try jw.write(extractText(cols[PubSearchCol.base_path])); 301 + try jw.endObject(); 302 + } 303 + } 304 + } else |_| {} 305 + } 231 306 } 232 307 233 308 try jw.endArray(); ··· 258 333 .string => |s| s, 259 334 .object => |obj| if (obj.get("value")) |v| (if (v == .string) v.string else "") else "", 260 335 else => "", 336 + }; 337 + } 338 + 339 + fn extractInt(val: json.Value) i64 { 340 + return switch (val) { 341 + .integer => |i| i, 342 + .object => |obj| blk: { 343 + const v = obj.get("value") orelse break :blk 0; 344 + break :blk switch (v) { 345 + .integer => |i| i, 346 + .string => |s| std.fmt.parseInt(i64, s, 10) catch 0, 347 + else => 0, 348 + }; 349 + }, 350 + else => 0, 261 351 }; 262 352 } 263 353

+1 -1

backend/src/http.zig

··· 71 71 } 72 72 73 73 // perform FTS search - arena handles cleanup 74 - const results = try db.searchDocuments(alloc, query, tag_filter); 74 + const results = try db.search(alloc, query, tag_filter); 75 75 try sendJson(request, results); 76 76 } 77 77

+50

site/functions/[[path]].js

··· 1 + export async function onRequest(context) { 2 + const url = new URL(context.request.url); 3 + const query = url.searchParams.get('q'); 4 + 5 + // if no query param, just serve the static file 6 + if (!query) { 7 + return context.next(); 8 + } 9 + 10 + // fetch the original HTML 11 + const response = await context.next(); 12 + let html = await response.text(); 13 + 14 + // build OG meta tags 15 + const title = `"${query}" - leaflet search`; 16 + const description = `search results for "${query}" on leaflet`; 17 + const ogUrl = url.toString(); 18 + 19 + // remove existing OG tags 20 + html = html.replace(/<meta property="og:[^"]*"[^>]*>/g, ''); 21 + html = html.replace(/<meta name="twitter:[^"]*"[^>]*>/g, ''); 22 + 23 + const ogTags = ` 24 + <meta property="og:title" content="${escapeHtml(title)}" /> 25 + <meta property="og:description" content="${escapeHtml(description)}" /> 26 + <meta property="og:url" content="${escapeHtml(ogUrl)}" /> 27 + <meta property="og:type" content="website" /> 28 + <meta name="twitter:card" content="summary" /> 29 + <meta name="twitter:title" content="${escapeHtml(title)}" /> 30 + <meta name="twitter:description" content="${escapeHtml(description)}" /> 31 + `; 32 + 33 + // inject OG tags into <head> 34 + const modifiedHtml = html.replace('</head>', `${ogTags}</head>`); 35 + 36 + return new Response(modifiedHtml, { 37 + headers: { 38 + 'content-type': 'text/html;charset=UTF-8', 39 + }, 40 + }); 41 + } 42 + 43 + function escapeHtml(str) { 44 + return str 45 + .replace(/&/g, '&') 46 + .replace(/</g, '<') 47 + .replace(/>/g, '>') 48 + .replace(/"/g, '"') 49 + .replace(/'/g, '''); 50 + }

+45 -4

site/index.html

··· 4 4 <meta charset="UTF-8"> 5 5 <meta name="viewport" content="width=device-width, initial-scale=1.0"> 6 6 <title>leaflet search</title> 7 + <meta name="description" content="full-text search for leaflet documents on atproto"> 8 + <meta property="og:title" content="leaflet search"> 9 + <meta property="og:description" content="full-text search for leaflet documents on atproto"> 10 + <meta property="og:type" content="website"> 11 + <meta name="twitter:card" content="summary"> 12 + <meta name="twitter:title" content="leaflet search"> 13 + <meta name="twitter:description" content="full-text search for leaflet documents on atproto"> 7 14 <style> 8 15 * { box-sizing: border-box; margin: 0; padding: 0; } 9 16 ··· 113 120 color: #555; 114 121 } 115 122 123 + .entity-type { 124 + font-size: 10px; 125 + padding: 2px 6px; 126 + border-radius: 3px; 127 + margin-right: 8px; 128 + text-transform: lowercase; 129 + } 130 + 131 + .entity-type.article { 132 + background: rgba(27, 115, 64, 0.2); 133 + color: #2a9d5c; 134 + } 135 + 136 + .entity-type.looseleaf { 137 + background: rgba(100, 100, 100, 0.2); 138 + color: #888; 139 + } 140 + 141 + .entity-type.publication { 142 + background: rgba(64, 115, 180, 0.2); 143 + color: #6a9fd4; 144 + } 145 + 116 146 .status { 117 147 padding: 1rem; 118 148 text-align: center; ··· 206 236 </div>`; 207 237 208 238 for (const doc of results) { 209 - // published docs use basePath, unpublished use leaflet.pub/p/{did}/{rkey} 210 - const leafletUrl = doc.basePath && doc.rkey 211 - ? `https://${doc.basePath}/${doc.rkey}` 212 - : (doc.did && doc.rkey ? `https://leaflet.pub/p/${doc.did}/${doc.rkey}` : null); 239 + const entityType = doc.type || 'article'; 240 + 241 + // build URL based on entity type 242 + let leafletUrl = null; 243 + if (entityType === 'publication') { 244 + // publications link to their base path 245 + leafletUrl = doc.basePath ? `https://${doc.basePath}` : null; 246 + } else { 247 + // articles and looseleafs link to specific document 248 + leafletUrl = doc.basePath && doc.rkey 249 + ? `https://${doc.basePath}/${doc.rkey}` 250 + : (doc.did && doc.rkey ? `https://leaflet.pub/p/${doc.did}/${doc.rkey}` : null); 251 + } 252 + 213 253 const date = doc.createdAt ? new Date(doc.createdAt).toLocaleDateString() : ''; 214 254 html += ` 215 255 <div class="result"> 216 256 <div class="result-title"> 257 + <span class="entity-type ${entityType}">${entityType}</span> 217 258 ${leafletUrl 218 259 ? `<a href="${leafletUrl}" target="_blank">${escapeHtml(doc.title || 'Untitled')}</a>` 219 260 : escapeHtml(doc.title || 'Untitled')}

Configure Feed

Configure Feed