search for standard sites pub-search.waow.tech
search zig blog atproto
11
fork

Configure Feed

Select the types of activity you want to include in your feed.

add tag filtering, frontend polish, and readme

- add document_tags table and /tags endpoint
- filter search by tag with ?tag=param
- fix FTS5 tokenization for queries with dots
- add named buffer constants, fix silent catches
- update frontend: smaller header, home link, clickable pubs
- add readme with project overview

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

zzstoatzz 14781661 feec67d9

+274 -97
+28
README.md
··· 1 + # leaflet-search 2 + 3 + full-text search for [leaflet](https://leaflet.pub) documents on the ATProto network. 4 + 5 + **live:** [leaflet-search.zzstoatzz.io](https://leaflet-search.zzstoatzz.io) 6 + 7 + ## how it works 8 + 9 + 1. **tap** subscribes to the ATProto firehose, filtering for `pub.leaflet.document` and `pub.leaflet.publication` records 10 + 2. **backend** indexes documents into SQLite FTS5 via [Turso](https://turso.tech), serves search API 11 + 3. **site** is a static frontend hosted on Cloudflare Pages 12 + 13 + ## api 14 + 15 + ``` 16 + GET /search?q=<query>[&tag=<tag>] # full-text search, optional tag filter 17 + GET /tags # list all tags with counts 18 + GET /stats # document/publication counts 19 + GET /health # health check 20 + ``` 21 + 22 + ## stack 23 + 24 + - ~450 LOC of [Zig](https://ziglang.org) for the backend 25 + - [Tap](https://github.com/bluesky-social/indigo/tree/main/cmd/tap) for ATProto sync 26 + - [Turso](https://turso.tech) for SQLite + FTS5 27 + - [Fly.io](https://fly.io) for hosting 28 + - [Cloudflare Pages](https://pages.cloudflare.com) for the frontend
+129 -17
backend/src/db.zig
··· 4 4 const http = std.http; 5 5 const Allocator = mem.Allocator; 6 6 7 + const URL_BUF_SIZE = 512; 8 + const AUTH_BUF_SIZE = 512; 9 + 7 10 var gpa: std.heap.GeneralPurposeAllocator(.{}) = .{}; 8 11 9 12 // initialized by init(), null until then ··· 57 60 \\) 58 61 , &.{}); 59 62 63 + _ = try execSql( 64 + \\CREATE TABLE IF NOT EXISTS document_tags ( 65 + \\ document_uri TEXT NOT NULL, 66 + \\ tag TEXT NOT NULL, 67 + \\ PRIMARY KEY (document_uri, tag) 68 + \\) 69 + , &.{}); 70 + 71 + _ = execSql("CREATE INDEX IF NOT EXISTS idx_document_tags_tag ON document_tags(tag)", &.{}) catch |err| { 72 + std.debug.print("create index error: {}\n", .{err}); 73 + }; 74 + 60 75 // migrate: add columns if missing (ignore "duplicate column" errors) 61 76 _ = execSql("ALTER TABLE documents ADD COLUMN publication_uri TEXT", &.{}) catch |err| { 62 77 std.debug.print("migrate documents: {}\n", .{err}); ··· 68 83 std.debug.print("turso schema initialized with FTS5\n", .{}); 69 84 } 70 85 71 - pub fn insertDocument(uri: []const u8, did: []const u8, rkey: []const u8, title: []const u8, content: []const u8, created_at: ?[]const u8, publication_uri: ?[]const u8) !void { 86 + pub fn insertDocument(uri: []const u8, did: []const u8, rkey: []const u8, title: []const u8, content: []const u8, created_at: ?[]const u8, publication_uri: ?[]const u8, tags: []const []const u8) !void { 72 87 _ = try execSql( 73 88 "INSERT OR REPLACE INTO documents (uri, did, rkey, title, content, created_at, publication_uri) VALUES (?, ?, ?, ?, ?, ?, ?)", 74 89 &.{ uri, did, rkey, title, content, created_at orelse "", publication_uri orelse "" }, ··· 85 100 ) catch |err| { 86 101 std.debug.print("insert FTS error for {s}: {}\n", .{ uri, err }); 87 102 }; 103 + 104 + // update tags - delete old, insert new 105 + _ = execSql("DELETE FROM document_tags WHERE document_uri = ?", &.{uri}) catch |err| { 106 + std.debug.print("delete tags error for {s}: {}\n", .{ uri, err }); 107 + }; 108 + for (tags) |tag| { 109 + _ = execSql( 110 + "INSERT OR IGNORE INTO document_tags (document_uri, tag) VALUES (?, ?)", 111 + &.{ uri, tag }, 112 + ) catch |err| { 113 + std.debug.print("insert tag error for {s}: {}\n", .{ uri, err }); 114 + }; 115 + } 88 116 } 89 117 90 118 pub fn insertPublication(uri: []const u8, did: []const u8, rkey: []const u8, name: []const u8, description: ?[]const u8, base_path: ?[]const u8) !void { ··· 121 149 const count = 7; 122 150 }; 123 151 124 - pub fn searchDocuments(alloc: Allocator, query: []const u8) ![]const u8 { 152 + pub fn searchDocuments(alloc: Allocator, query: []const u8, tag_filter: ?[]const u8) ![]const u8 { 125 153 var output: std.Io.Writer.Allocating = .init(alloc); 126 154 errdefer output.deinit(); 127 155 128 156 const temp_alloc = gpa.allocator(); 129 157 130 - const result = execSql( 131 - \\SELECT f.uri, d.did, d.title, 132 - \\ snippet(documents_fts, 2, '<mark>', '</mark>', '...', 32) as snippet, 133 - \\ d.created_at, d.rkey, p.base_path 134 - \\FROM documents_fts f 135 - \\JOIN documents d ON f.uri = d.uri 136 - \\LEFT JOIN publications p ON d.publication_uri = p.uri 137 - \\WHERE documents_fts MATCH ? 138 - \\ORDER BY rank LIMIT 50 139 - , &.{query}) catch { 140 - try output.writer.writeAll("[]"); 141 - return try output.toOwnedSlice(); 142 - }; 158 + // normalize query to match FTS5 tokenization (dots become spaces) 159 + const normalized_query = try alloc.dupe(u8, query); 160 + for (normalized_query) |*c| { 161 + if (c.* == '.') c.* = ' '; 162 + } 163 + 164 + // build query based on whether we have a tag filter 165 + const result = if (tag_filter) |tag| 166 + execSql( 167 + \\SELECT f.uri, d.did, d.title, 168 + \\ snippet(documents_fts, 2, '<mark>', '</mark>', '...', 32) as snippet, 169 + \\ d.created_at, d.rkey, p.base_path 170 + \\FROM documents_fts f 171 + \\JOIN documents d ON f.uri = d.uri 172 + \\LEFT JOIN publications p ON d.publication_uri = p.uri 173 + \\JOIN document_tags dt ON d.uri = dt.document_uri 174 + \\WHERE documents_fts MATCH ? AND dt.tag = ? 175 + \\ORDER BY rank LIMIT 50 176 + , &.{ normalized_query, tag }) catch { 177 + try output.writer.writeAll("[]"); 178 + return try output.toOwnedSlice(); 179 + } 180 + else 181 + execSql( 182 + \\SELECT f.uri, d.did, d.title, 183 + \\ snippet(documents_fts, 2, '<mark>', '</mark>', '...', 32) as snippet, 184 + \\ d.created_at, d.rkey, p.base_path 185 + \\FROM documents_fts f 186 + \\JOIN documents d ON f.uri = d.uri 187 + \\LEFT JOIN publications p ON d.publication_uri = p.uri 188 + \\WHERE documents_fts MATCH ? 189 + \\ORDER BY rank LIMIT 50 190 + , &.{normalized_query}) catch { 191 + try output.writer.writeAll("[]"); 192 + return try output.toOwnedSlice(); 193 + }; 143 194 defer temp_alloc.free(result); 144 195 145 196 // parse JSON response - keep parsed alive while iterating rows ··· 226 277 else 227 278 url_value; 228 279 229 - var url_buf: [512]u8 = undefined; 280 + var url_buf: [URL_BUF_SIZE]u8 = undefined; 230 281 const url = std.fmt.bufPrint(&url_buf, "https://{s}/v2/pipeline", .{host}) catch return error.UrlTooLong; 231 282 232 283 // build request body ··· 271 322 try jw.endArray(); 272 323 try jw.endObject(); 273 324 274 - var auth_buf: [512]u8 = undefined; 325 + var auth_buf: [AUTH_BUF_SIZE]u8 = undefined; 275 326 const auth_header = std.fmt.bufPrint(&auth_buf, "Bearer {s}", .{token_value}) catch return error.AuthTooLong; 276 327 277 328 var client: http.Client = .{ .allocator = alloc }; ··· 300 351 } 301 352 302 353 return try response_body.toOwnedSlice(); 354 + } 355 + 356 + pub fn getTags(alloc: Allocator) ![]const u8 { 357 + var output: std.Io.Writer.Allocating = .init(alloc); 358 + errdefer output.deinit(); 359 + 360 + const temp_alloc = gpa.allocator(); 361 + 362 + const result = execSql( 363 + \\SELECT tag, COUNT(*) as count 364 + \\FROM document_tags 365 + \\GROUP BY tag 366 + \\ORDER BY count DESC 367 + \\LIMIT 100 368 + , &.{}) catch { 369 + try output.writer.writeAll("[]"); 370 + return try output.toOwnedSlice(); 371 + }; 372 + defer temp_alloc.free(result); 373 + 374 + const parsed = json.parseFromSlice(json.Value, temp_alloc, result, .{}) catch { 375 + try output.writer.writeAll("[]"); 376 + return try output.toOwnedSlice(); 377 + }; 378 + defer parsed.deinit(); 379 + 380 + const rows = getRowsFromParsed(parsed.value) orelse { 381 + try output.writer.writeAll("[]"); 382 + return try output.toOwnedSlice(); 383 + }; 384 + 385 + var jw: json.Stringify = .{ .writer = &output.writer }; 386 + try jw.beginArray(); 387 + 388 + for (rows.items) |row| { 389 + if (row != .array or row.array.items.len < 2) continue; 390 + const cols = row.array.items; 391 + 392 + try jw.beginObject(); 393 + try jw.objectField("tag"); 394 + try jw.write(extractText(cols[0])); 395 + try jw.objectField("count"); 396 + const count_val = cols[1]; 397 + const count: i64 = switch (count_val) { 398 + .integer => |i| i, 399 + .object => |obj| blk: { 400 + const v = obj.get("value") orelse break :blk 0; 401 + break :blk switch (v) { 402 + .integer => |i| i, 403 + .string => |s| std.fmt.parseInt(i64, s, 10) catch 0, 404 + else => 0, 405 + }; 406 + }, 407 + else => 0, 408 + }; 409 + try jw.write(count); 410 + try jw.endObject(); 411 + } 412 + 413 + try jw.endArray(); 414 + return try output.toOwnedSlice(); 303 415 } 304 416 305 417 pub fn getStats() struct { documents: i64, publications: i64 } {
+37 -16
backend/src/http.zig
··· 4 4 const mem = std.mem; 5 5 const db = @import("db.zig"); 6 6 7 + const HTTP_BUF_SIZE = 8192; 8 + const QUERY_PARAM_BUF_SIZE = 64; 9 + 7 10 pub fn handleConnection(conn: net.Server.Connection) void { 8 11 defer conn.stream.close(); 9 12 10 - var read_buffer: [8192]u8 = undefined; 11 - var write_buffer: [8192]u8 = undefined; 13 + var read_buffer: [HTTP_BUF_SIZE]u8 = undefined; 14 + var write_buffer: [HTTP_BUF_SIZE]u8 = undefined; 12 15 13 16 var reader = conn.stream.reader(&read_buffer); 14 17 var writer = conn.stream.writer(&write_buffer); ··· 42 45 43 46 if (mem.startsWith(u8, target, "/search")) { 44 47 try handleSearch(request, target); 48 + } else if (mem.eql(u8, target, "/tags")) { 49 + try handleTags(request); 45 50 } else if (mem.eql(u8, target, "/stats")) { 46 51 try handleStats(request); 47 52 } else if (mem.eql(u8, target, "/health")) { ··· 56 61 defer arena.deinit(); 57 62 const alloc = arena.allocator(); 58 63 59 - // parse query param: /search?q=something 60 - const query = blk: { 61 - if (mem.indexOf(u8, target, "?q=")) |idx| { 62 - const encoded = target[idx + 3 ..]; 63 - // find end of query param (next & or end of string) 64 - const end = mem.indexOf(u8, encoded, "&") orelse encoded.len; 65 - const query_encoded = encoded[0..end]; 66 - // decode percent-encoding 67 - const buf = try alloc.dupe(u8, query_encoded); 68 - break :blk std.Uri.percentDecodeInPlace(buf); 69 - } 70 - break :blk ""; 71 - }; 64 + // parse query param: /search?q=something&tag=foo 65 + const query = parseQueryParam(alloc, target, "q") catch ""; 66 + const tag_filter = parseQueryParam(alloc, target, "tag") catch null; 72 67 73 68 if (query.len == 0) { 74 69 try sendJson(request, "{\"error\":\"missing q parameter\"}"); ··· 76 71 } 77 72 78 73 // perform FTS search - arena handles cleanup 79 - const results = try db.searchDocuments(alloc, query); 74 + const results = try db.searchDocuments(alloc, query, tag_filter); 80 75 try sendJson(request, results); 76 + } 77 + 78 + fn handleTags(request: *http.Server.Request) !void { 79 + var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); 80 + defer arena.deinit(); 81 + const alloc = arena.allocator(); 82 + 83 + const tags = try db.getTags(alloc); 84 + try sendJson(request, tags); 85 + } 86 + 87 + fn parseQueryParam(alloc: std.mem.Allocator, target: []const u8, param: []const u8) ![]const u8 { 88 + // look for ?param= or &param= 89 + const patterns = [_][]const u8{ "?", "&" }; 90 + for (patterns) |prefix| { 91 + var search_buf: [QUERY_PARAM_BUF_SIZE]u8 = undefined; 92 + const search = std.fmt.bufPrint(&search_buf, "{s}{s}=", .{ prefix, param }) catch continue; 93 + if (mem.indexOf(u8, target, search)) |idx| { 94 + const encoded = target[idx + search.len ..]; 95 + const end = mem.indexOf(u8, encoded, "&") orelse encoded.len; 96 + const query_encoded = encoded[0..end]; 97 + const buf = try alloc.dupe(u8, query_encoded); 98 + return std.Uri.percentDecodeInPlace(buf); 99 + } 100 + } 101 + return error.NotFound; 81 102 } 82 103 83 104 fn handleStats(request: *http.Server.Request) !void {
+15 -2
backend/src/tap.zig
··· 176 176 break :blk null; 177 177 }; 178 178 179 + // extract tags 180 + var tags_list: std.ArrayList([]const u8) = .{}; 181 + defer tags_list.deinit(allocator); 182 + if (record.get("tags")) |tags_val| { 183 + if (tags_val == .array) { 184 + for (tags_val.array.items) |tag_item| { 185 + if (tag_item == .string) { 186 + try tags_list.append(allocator, tag_item.string); 187 + } 188 + } 189 + } 190 + } 191 + 179 192 var content_buf: std.ArrayList(u8) = .{}; 180 193 defer content_buf.deinit(allocator); 181 194 ··· 201 214 return; 202 215 } 203 216 204 - try db.insertDocument(uri, did, rkey, title, content_buf.items, created_at, publication_uri); 205 - std.debug.print("indexed document: {s} ({} chars)\n", .{ uri, content_buf.items.len }); 217 + try db.insertDocument(uri, did, rkey, title, content_buf.items, created_at, publication_uri, tags_list.items); 218 + std.debug.print("indexed document: {s} ({} chars, {} tags)\n", .{ uri, content_buf.items.len, tags_list.items.len }); 206 219 } 207 220 208 221 fn extractPlaintextFromPage(allocator: Allocator, buf: *std.ArrayList(u8), page: json.ObjectMap) !void {
+65 -62
site/index.html
··· 8 8 * { box-sizing: border-box; margin: 0; padding: 0; } 9 9 10 10 body { 11 - font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; 11 + font-family: monospace; 12 12 background: #0a0a0a; 13 - color: #e5e5e5; 13 + color: #ccc; 14 14 min-height: 100vh; 15 - padding: 2rem; 15 + padding: 1rem; 16 + font-size: 14px; 17 + line-height: 1.6; 16 18 } 17 19 18 20 .container { 19 - max-width: 800px; 21 + max-width: 600px; 20 22 margin: 0 auto; 21 23 } 22 24 25 + a { color: #1B7340; text-decoration: none; } 26 + a:hover { color: #2a9d5c; } 27 + 23 28 h1 { 24 - font-size: 1.5rem; 25 - font-weight: 400; 29 + font-size: 12px; 30 + font-weight: normal; 26 31 margin-bottom: 1.5rem; 27 - color: #a3a3a3; 28 32 } 29 33 30 - h1 span { color: #22c55e; } 31 - h1 a.src { font-size: 0.7rem; color: #525252; margin-left: 0.5rem; } 32 - h1 a.src:hover { color: #737373; } 34 + h1 a.title { 35 + color: #888; 36 + } 37 + 38 + h1 a.title:hover { 39 + color: #fff; 40 + } 41 + 42 + h1 a.src { 43 + font-size: 10px; 44 + color: #444; 45 + } 33 46 34 47 .search-box { 35 48 display: flex; 36 49 gap: 0.5rem; 37 - margin-bottom: 2rem; 50 + margin-bottom: 1.5rem; 38 51 } 39 52 40 53 input[type="text"] { 41 54 flex: 1; 42 - padding: 0.75rem 1rem; 43 - font-size: 1rem; 44 - background: #171717; 45 - border: 1px solid #262626; 46 - border-radius: 0.5rem; 47 - color: #e5e5e5; 48 - outline: none; 55 + padding: 0.5rem; 56 + font-family: monospace; 57 + font-size: 14px; 58 + background: #111; 59 + border: 1px solid #333; 60 + color: #ccc; 49 61 } 50 62 51 - input[type="text"]:focus { 52 - border-color: #22c55e; 53 - } 63 + input[type="text"]:focus { outline: 1px solid #1B7340; } 54 64 55 65 button { 56 - padding: 0.75rem 1.5rem; 57 - font-size: 1rem; 58 - background: #22c55e; 59 - color: #0a0a0a; 60 - border: none; 61 - border-radius: 0.5rem; 66 + padding: 0.5rem 1rem; 67 + font-family: monospace; 68 + font-size: 14px; 69 + background: #111; 70 + border: 1px solid #333; 71 + color: #ccc; 62 72 cursor: pointer; 63 - font-weight: 500; 64 73 } 65 74 66 - button:hover { background: #16a34a; } 67 - button:disabled { background: #374151; color: #6b7280; cursor: not-allowed; } 75 + button:hover { background: #222; } 76 + button:disabled { color: #555; cursor: not-allowed; } 68 77 69 78 .results { 70 79 display: flex; 71 80 flex-direction: column; 72 - gap: 1rem; 73 81 } 74 82 75 83 .result { 76 - background: #171717; 77 - border: 1px solid #262626; 78 - border-radius: 0.5rem; 79 - padding: 1rem; 84 + border-bottom: 1px solid #222; 85 + padding: 1rem 0; 80 86 } 81 87 82 - .result:hover { border-color: #404040; } 88 + .result:hover { background: #111; margin: 0 -0.5rem; padding: 1rem 0.5rem; } 83 89 84 90 .result-title { 85 - font-size: 1.1rem; 91 + color: #fff; 86 92 margin-bottom: 0.5rem; 87 - color: #f5f5f5; 88 93 } 89 94 90 - .result-title a { 91 - color: inherit; 92 - text-decoration: none; 93 - } 94 - 95 - .result-title a:hover { color: #22c55e; } 95 + .result-title a { color: inherit; } 96 + .result-title a:hover { color: #2a9d5c; } 96 97 97 98 .result-snippet { 98 - font-size: 0.9rem; 99 - color: #a3a3a3; 99 + font-size: 12px; 100 + color: #888; 100 101 margin-bottom: 0.5rem; 101 - line-height: 1.4; 102 + line-height: 1.5; 102 103 } 103 104 104 105 .result-snippet mark { 105 - background: #22c55e33; 106 - color: #22c55e; 106 + background: rgba(27, 115, 64, 0.3); 107 + color: #2a9d5c; 107 108 padding: 0 2px; 108 - border-radius: 2px; 109 109 } 110 110 111 111 .result-meta { 112 - font-size: 0.75rem; 113 - color: #525252; 112 + font-size: 11px; 113 + color: #555; 114 114 } 115 115 116 116 .status { 117 117 padding: 1rem; 118 118 text-align: center; 119 - color: #737373; 119 + color: #666; 120 120 } 121 121 122 - .error { color: #ef4444; } 122 + .error { color: #c44; } 123 123 124 124 .empty-state { 125 125 text-align: center; 126 - padding: 3rem; 127 - color: #525252; 126 + padding: 2rem; 127 + color: #555; 128 128 } 129 129 130 130 .empty-state p { margin-bottom: 0.5rem; } 131 131 132 132 .stats { 133 - font-size: 0.75rem; 134 - color: #525252; 135 - margin-top: 2rem; 133 + font-size: 11px; 134 + color: #555; 135 + margin-top: 1.5rem; 136 136 text-align: center; 137 137 } 138 138 </style> 139 139 </head> 140 140 <body> 141 141 <div class="container"> 142 - <h1><span>leaflet</span> search <a href="https://tangled.sh/@zzstoatzz.io/leaflet-search" target="_blank" class="src">[src]</a></h1> 142 + <h1><a href="/" class="title">leaflet search</a> <a href="https://tangled.sh/@zzstoatzz.io/leaflet-search" target="_blank" class="src">[src]</a></h1> 143 143 144 144 <div class="search-box"> 145 145 <input type="text" id="query" placeholder="search content..." autofocus> ··· 206 206 </div>`; 207 207 208 208 for (const doc of results) { 209 + // published docs use basePath, unpublished use leaflet.pub/p/{did}/{rkey} 209 210 const leafletUrl = doc.basePath && doc.rkey 210 211 ? `https://${doc.basePath}/${doc.rkey}` 211 - : null; 212 + : (doc.did && doc.rkey ? `https://leaflet.pub/p/${doc.did}/${doc.rkey}` : null); 212 213 const date = doc.createdAt ? new Date(doc.createdAt).toLocaleDateString() : ''; 213 214 html += ` 214 215 <div class="result"> ··· 219 220 </div> 220 221 <div class="result-snippet">${doc.snippet || ''}</div> 221 222 <div class="result-meta"> 222 - ${date ? `${date} | ` : ''}${doc.basePath ? doc.basePath : 'unpublished'} 223 + ${date ? `${date} | ` : ''}${doc.basePath 224 + ? `<a href="https://${doc.basePath}" target="_blank">${doc.basePath}</a>` 225 + : `<a href="https://leaflet.pub" target="_blank">leaflet.pub</a>`} 223 226 </div> 224 227 </div> 225 228 `;