search for standard sites pub-search.waow.tech
search zig blog atproto
11
fork

Configure Feed

Select the types of activity you want to include in your feed.

feat: add whitewind (com.whtwnd.blog.entry) indexing support

- add com.whtwnd.blog.entry to tap collection filters and document routing
- add content-as-string fallback in extractor (whitewind stores markdown in content field)
- add visibility filter to skip non-public whitewind entries
- add whitewind platform to frontend (filter button, URL pattern, config)
- add stats link to header

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

zzstoatzz 5a3bfd13 4885accb

+70 -3
+34
backend/src/ingest/extractor.zig
··· 11 11 /// from publication basePath. Documents that don't match any known platform are "other". 12 12 pub const Platform = enum { 13 13 leaflet, 14 + whitewind, 14 15 other, // site.standard.* documents not matching a known platform 15 16 unknown, 16 17 17 18 pub fn fromCollection(collection: []const u8) Platform { 18 19 if (mem.startsWith(u8, collection, "pub.leaflet.")) return .leaflet; 19 20 if (mem.startsWith(u8, collection, "site.standard.")) return .other; 21 + if (mem.startsWith(u8, collection, "com.whtwnd.")) return .whitewind; 20 22 return .unknown; 21 23 } 22 24 ··· 149 151 return try buf.toOwnedSlice(allocator); 150 152 } 151 153 154 + // try content as plain string (e.g., com.whtwnd.blog.entry stores markdown here) 155 + if (zat.json.getString(record, "content")) |text| { 156 + try buf.appendSlice(allocator, text); 157 + return try buf.toOwnedSlice(allocator); 158 + } 159 + 152 160 // fall back to leaflet-style block parsing 153 161 if (zat.json.getString(record, "description")) |desc| { 154 162 try buf.appendSlice(allocator, desc); ··· 255 263 try std.testing.expectEqual(Platform.other, Platform.fromCollection("site.standard.publication")); 256 264 } 257 265 266 + test "Platform.fromCollection: whitewind" { 267 + try std.testing.expectEqual(Platform.whitewind, Platform.fromCollection("com.whtwnd.blog.entry")); 268 + } 269 + 258 270 test "Platform.fromCollection: unknown" { 259 271 try std.testing.expectEqual(Platform.unknown, Platform.fromCollection("something.else")); 260 272 try std.testing.expectEqual(Platform.unknown, Platform.fromCollection("")); ··· 262 274 263 275 test "Platform.name" { 264 276 try std.testing.expectEqualStrings("leaflet", Platform.leaflet.name()); 277 + try std.testing.expectEqualStrings("whitewind", Platform.whitewind.name()); 265 278 try std.testing.expectEqualStrings("other", Platform.other.name()); 266 279 try std.testing.expectEqualStrings("unknown", Platform.unknown.name()); 267 280 } ··· 285 298 // content_type should be extracted for platform detection (custom domain support) 286 299 try std.testing.expectEqualStrings("pub.leaflet.content", doc.content_type.?); 287 300 } 301 + 302 + test "extractDocument: com.whtwnd.blog.entry (whitewind)" { 303 + const allocator = std.testing.allocator; 304 + 305 + const test_json = 306 + \\{"title":"Love Across Discontinuity","content":"I've been thinking about what it means to love...","createdAt":"2026-02-08T08:01:41.776Z","visibility":"public"} 307 + ; 308 + 309 + const parsed = try json.parseFromSlice(json.Value, allocator, test_json, .{}); 310 + defer parsed.deinit(); 311 + 312 + var doc = try extractDocument(allocator, parsed.value.object, "com.whtwnd.blog.entry"); 313 + defer doc.deinit(); 314 + 315 + try std.testing.expectEqualStrings("Love Across Discontinuity", doc.title); 316 + try std.testing.expectEqualStrings("I've been thinking about what it means to love...", doc.content); 317 + try std.testing.expectEqualStrings("2026-02-08T08:01:41.776Z", doc.created_at.?); 318 + try std.testing.expectEqual(Platform.whitewind, doc.platform); 319 + try std.testing.expect(doc.publication_uri == null); 320 + try std.testing.expect(doc.content_type == null); 321 + }
+15 -1
backend/src/ingest/tap.zig
··· 17 17 const STANDARD_DOCUMENT = "site.standard.document"; 18 18 const STANDARD_PUBLICATION = "site.standard.publication"; 19 19 20 + // whitewind blog entries 21 + const WHITEWIND_ENTRY = "com.whtwnd.blog.entry"; 22 + 20 23 fn isDocumentCollection(collection: []const u8) bool { 21 24 return mem.eql(u8, collection, LEAFLET_DOCUMENT) or 22 - mem.eql(u8, collection, STANDARD_DOCUMENT); 25 + mem.eql(u8, collection, STANDARD_DOCUMENT) or 26 + mem.eql(u8, collection, WHITEWIND_ENTRY); 23 27 } 24 28 25 29 fn isPublicationCollection(collection: []const u8) bool { ··· 211 215 }; 212 216 213 217 if (isDocumentCollection(rec.collection)) { 218 + // skip non-public whitewind entries 219 + if (mem.eql(u8, rec.collection, WHITEWIND_ENTRY)) { 220 + const record_val: json.Value = .{ .object = inner_record }; 221 + const visibility = zat.json.getString(record_val, "visibility") orelse "public"; 222 + if (!mem.eql(u8, visibility, "public")) { 223 + logfire.span("tap.dropped", .{ .reason = "not_public", .collection = rec.collection, .uri = uri }).end(); 224 + return; 225 + } 226 + } 227 + 214 228 processDocument(allocator, uri, did.raw, rec.rkey, inner_record, rec.collection) catch |err| { 215 229 logfire.err("document processing error: {}", .{err}); 216 230 };
+20 -1
site/index.html
··· 52 52 color: #444; 53 53 } 54 54 55 + h1 a.stats-link { 56 + font-size: 10px; 57 + color: #553; 58 + } 59 + 60 + h1 a.stats-link:hover { 61 + color: #d4956a; 62 + } 63 + 55 64 h1 .by { 56 65 font-size: 10px; 57 66 color: #555; ··· 591 600 </head> 592 601 <body> 593 602 <div class="container"> 594 - <h1><a href="/" class="title">pub search</a> <span class="by">by <a href="https://bsky.app/profile/zzstoatzz.io" target="_blank">@zzstoatzz.io</a></span> <a href="https://tangled.sh/@zzstoatzz.io/leaflet-search" target="_blank" class="src">[src]</a></h1> 603 + <h1><a href="/" class="title">pub search</a> <span class="by">by <a href="https://bsky.app/profile/zzstoatzz.io" target="_blank">@zzstoatzz.io</a></span> <a href="https://tangled.sh/@zzstoatzz.io/leaflet-search" target="_blank" class="src">[src]</a> <a id="header-stats" class="stats-link"></a></h1> 595 604 596 605 <div class="search-box"> 597 606 <input type="text" id="query" placeholder="search content..." autofocus> ··· 873 882 // greengale uses basePath + path pattern 874 883 docUrl: null 875 884 }, 885 + whitewind: { 886 + home: 'https://whtwnd.com', 887 + label: 'whtwnd.com', 888 + docUrl: (did, rkey) => `https://whtwnd.com/${did}/${rkey}` 889 + }, 876 890 other: { 877 891 home: 'https://standard.site', 878 892 label: 'other', ··· 1026 1040 { id: 'pckt', label: 'pckt' }, 1027 1041 { id: 'offprint', label: 'offprint' }, 1028 1042 { id: 'greengale', label: 'greengale' }, 1043 + { id: 'whitewind', label: 'whitewind' }, 1029 1044 { id: 'other', label: 'other' }, 1030 1045 ]; 1031 1046 const html = platforms.map(p => ` ··· 1252 1267 .then(r => r.json()) 1253 1268 .then(data => { 1254 1269 statsDiv.innerHTML = `${data.documents} documents, ${data.publications} publications · <a href="${API_URL}/dashboard" target="_blank">stats</a>`; 1270 + const headerStats = document.getElementById('header-stats'); 1271 + headerStats.href = `${API_URL}/dashboard`; 1272 + headerStats.target = '_blank'; 1273 + headerStats.textContent = `[${data.documents.toLocaleString()} docs]`; 1255 1274 }) 1256 1275 .catch(() => { 1257 1276 statsDiv.textContent = 'connecting...';
+1 -1
tap/fly.toml
··· 9 9 TAP_BIND = ':2480' 10 10 TAP_RELAY_URL = 'https://relay1.us-east.bsky.network' 11 11 TAP_SIGNAL_COLLECTION = 'site.standard.document' 12 - TAP_COLLECTION_FILTERS = 'pub.leaflet.document,pub.leaflet.publication,site.standard.document,site.standard.publication' 12 + TAP_COLLECTION_FILTERS = 'pub.leaflet.document,pub.leaflet.publication,site.standard.document,site.standard.publication,com.whtwnd.blog.entry' 13 13 TAP_LOG_LEVEL = 'info' 14 14 TAP_RESYNC_PARALLELISM = '1' 15 15 TAP_FIREHOSE_PARALLELISM = '5'