search for standard sites pub-search.waow.tech
search zig blog atproto
11
fork

Configure Feed

Select the types of activity you want to include in your feed.

improve search: OR ranking, URL decoding, lower default limit

- use OR between search terms with BM25 ranking for better recall
- decode + as space in query params (form-urlencoded)
- lower MCP search default limit from 20 to 5

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

zzstoatzz 35ad4b51 7fa1172b

+68 -13
+62 -11
backend/src/db/mod.zig
··· 410 410 return try output.toOwnedSlice(); 411 411 } 412 412 413 - /// Build FTS5 query with prefix on last word only: "cat dog" -> "cat dog*" 413 + /// Build FTS5 query with OR between terms: "cat dog" -> "cat OR dog*" 414 + /// Uses OR for better recall with BM25 ranking (more matches = higher score) 414 415 fn buildFtsQuery(alloc: Allocator, query: []const u8) ![]const u8 { 415 416 if (query.len == 0) return ""; 416 417 417 - // find actual content bounds (trim whitespace) 418 + // normalize: trim whitespace 418 419 var start: usize = 0; 419 420 var end: usize = query.len; 420 421 while (start < end and query[start] == ' ') start += 1; 421 422 while (end > start and query[end - 1] == ' ') end -= 1; 423 + if (start >= end) return ""; 422 424 423 - if (start >= end) return ""; 425 + const trimmed = query[start..end]; 426 + 427 + // count words and total length 428 + var word_count: usize = 0; 429 + var total_word_len: usize = 0; 430 + var in_word = false; 431 + for (trimmed) |c| { 432 + const is_sep = (c == ' ' or c == '.'); 433 + if (is_sep) { 434 + in_word = false; 435 + } else { 436 + if (!in_word) word_count += 1; 437 + in_word = true; 438 + total_word_len += 1; 439 + } 440 + } 424 441 425 - // allocate: trimmed length + 1 for '*' at end 426 - const trimmed_len = end - start; 427 - const buf = try alloc.alloc(u8, trimmed_len + 1); 442 + if (word_count == 0) return ""; 428 443 429 - // copy and normalize dots to spaces 430 - for (query[start..end], 0..) |c, i| { 431 - buf[i] = if (c == '.') ' ' else c; 444 + // single word: just add prefix wildcard 445 + if (word_count == 1) { 446 + const buf = try alloc.alloc(u8, total_word_len + 1); 447 + var pos: usize = 0; 448 + for (trimmed) |c| { 449 + if (c != ' ' and c != '.') { 450 + buf[pos] = c; 451 + pos += 1; 452 + } 453 + } 454 + buf[pos] = '*'; 455 + return buf; 432 456 } 433 - buf[trimmed_len] = '*'; 434 457 435 - return buf[0 .. trimmed_len + 1]; 458 + // multiple words: join with " OR ", prefix on last 459 + // size = word chars + (n-1) * 4 for " OR " + 1 for "*" 460 + const buf_len = total_word_len + (word_count - 1) * 4 + 1; 461 + const buf = try alloc.alloc(u8, buf_len); 462 + 463 + var pos: usize = 0; 464 + var current_word: usize = 0; 465 + in_word = false; 466 + 467 + for (trimmed) |c| { 468 + const is_sep = (c == ' ' or c == '.'); 469 + if (is_sep) { 470 + if (in_word) { 471 + // end of word - add " OR " if not last 472 + current_word += 1; 473 + if (current_word < word_count) { 474 + @memcpy(buf[pos .. pos + 4], " OR "); 475 + pos += 4; 476 + } 477 + } 478 + in_word = false; 479 + } else { 480 + buf[pos] = c; 481 + pos += 1; 482 + in_word = true; 483 + } 484 + } 485 + buf[pos] = '*'; 486 + return buf; 436 487 } 437 488 438 489 /// Find documents similar to a given document using vector similarity
+4
backend/src/server.zig
··· 117 117 const end = mem.indexOf(u8, encoded, "&") orelse encoded.len; 118 118 const query_encoded = encoded[0..end]; 119 119 const buf = try alloc.dupe(u8, query_encoded); 120 + // decode + as space (form-urlencoded), then percent-decode 121 + for (buf) |*c| { 122 + if (c.* == '+') c.* = ' '; 123 + } 120 124 return std.Uri.percentDecodeInPlace(buf); 121 125 } 122 126 }
+2 -2
mcp/src/leaflet_mcp/server.py
··· 93 93 async def search( 94 94 query: str = "", 95 95 tag: str | None = None, 96 - limit: int = 20, 96 + limit: int = 5, 97 97 ) -> list[SearchResult]: 98 98 """search leaflet documents and publications. 99 99 ··· 103 103 args: 104 104 query: search query (searches titles and content) 105 105 tag: optional tag to filter by (only applies to documents) 106 - limit: max results to return (default 20, max 40) 106 + limit: max results to return (default 5, max 40) 107 107 108 108 returns: 109 109 list of search results with uri, title, snippet, and metadata