The AtmosphereConf talks your skyline missed
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge pull request #20 from musicjunkieg/fix/searchposts-public-appview

fix: bypass PDS for searchPosts, hit public AppView directly

authored by

chaos gremlin and committed by
GitHub
78876724 f21c6733

+113 -19
+1 -1
src/lib/crawl/crawler.ts
··· 115 115 console.error("Constellation fetch failed, skipping RSVPs:", err); 116 116 return new Map<string, Set<string>>(); 117 117 }), 118 - searchConferencePosts(agent, signal), 118 + searchConferencePosts(signal), 119 119 ]); 120 120 121 121 throwIfAborted(signal);
+112 -18
src/lib/crawl/search.ts
··· 1 - import type { Agent } from "@atproto/api"; 2 1 import type { AppBskyFeedDefs } from "@atproto/api"; 3 2 4 3 type PostView = AppBskyFeedDefs.PostView; 4 + 5 + const APPVIEW_URL = 6 + "https://api.bsky.app/xrpc/app.bsky.feed.searchPosts"; 5 7 6 8 const SEARCH_QUERIES = [ 7 9 "atmosphereconf", ··· 16 18 // includes every post through the end of April 26, 2026. 17 19 const SEARCH_UNTIL = "2026-04-27T00:00:00.000Z"; 18 20 21 + const MAX_ATTEMPTS = 3; 22 + const BASE_BACKOFF_MS = 200; 23 + 24 + interface SearchResponse { 25 + posts: PostView[]; 26 + cursor?: string; 27 + } 28 + 29 + function isRetryableStatus(status: number): boolean { 30 + return status === 429 || (status >= 500 && status < 600); 31 + } 32 + 33 + /** 34 + * Sleep that aborts cleanly when the crawl signal aborts. Uses a one-shot 35 + * listener so the timer is cancelled instead of leaking past the abort. 36 + */ 37 + function abortableDelay(ms: number, signal?: AbortSignal): Promise<void> { 38 + return new Promise((resolve, reject) => { 39 + if (signal?.aborted) { 40 + reject(signal.reason ?? new Error("Aborted")); 41 + return; 42 + } 43 + const timer = setTimeout(() => { 44 + signal?.removeEventListener("abort", onAbort); 45 + resolve(); 46 + }, ms); 47 + function onAbort() { 48 + clearTimeout(timer); 49 + reject(signal!.reason ?? new Error("Aborted")); 50 + } 51 + signal?.addEventListener("abort", onAbort, { once: true }); 52 + }); 53 + } 54 + 55 + /** 56 + * Fetch a single search page with bounded retries on transient failures. 57 + * 58 + * Retries on HTTP 429, 5xx, and network errors with exponential backoff 59 + * (200ms → 400ms → 800ms) plus uniform jitter up to the same delay. 60 + * Non-retryable HTTP errors (4xx other than 429) and abort errors are 61 + * thrown immediately. The crawl has a 30s overall budget enforced upstream, 62 + * so retry counts and base delay are deliberately kept small. 63 + */ 64 + async function fetchSearchPage( 65 + params: URLSearchParams, 66 + signal?: AbortSignal, 67 + ): Promise<SearchResponse> { 68 + let lastError: unknown; 69 + 70 + for (let attempt = 0; attempt < MAX_ATTEMPTS; attempt++) { 71 + if (signal?.aborted) throw signal.reason ?? new Error("Aborted"); 72 + 73 + try { 74 + const res = await fetch(`${APPVIEW_URL}?${params.toString()}`, { 75 + signal, 76 + headers: { accept: "application/json" }, 77 + }); 78 + if (res.ok) { 79 + return (await res.json()) as SearchResponse; 80 + } 81 + if (!isRetryableStatus(res.status)) { 82 + throw new Error( 83 + `AppView searchPosts returned ${res.status} ${res.statusText}`, 84 + ); 85 + } 86 + lastError = new Error( 87 + `AppView searchPosts returned ${res.status} ${res.statusText}`, 88 + ); 89 + } catch (err) { 90 + // Abort always propagates immediately — never burn retries on a 91 + // cancelled crawl. 92 + if (signal?.aborted) throw err; 93 + lastError = err; 94 + } 95 + 96 + // No sleep after the final attempt — we're about to give up. 97 + if (attempt < MAX_ATTEMPTS - 1) { 98 + const backoff = BASE_BACKOFF_MS * 2 ** attempt; 99 + const jitter = Math.random() * backoff; 100 + await abortableDelay(backoff + jitter, signal); 101 + } 102 + } 103 + 104 + throw lastError ?? new Error("AppView searchPosts failed after retries"); 105 + } 106 + 19 107 /** 20 108 * Search Bluesky for conference-related posts during the conference period 21 - * and the post-conference aftermath (see SEARCH_SINCE / SEARCH_UNTIL above). 22 - * Returns deduplicated posts from all search queries. 109 + * and the post-conference aftermath. 110 + * 111 + * Calls the public AppView (`api.bsky.app`) directly via `fetch` instead of 112 + * routing through the user's PDS via `agent.app.bsky.feed.searchPosts`. The 113 + * search is a public read — there is no benefit to authenticating it, and 114 + * the OAuth/DPoP path through the PDS has been observed returning 5xx in 115 + * production while the public AppView returns 200 for the same query. 23 116 */ 24 117 export async function searchConferencePosts( 25 - agent: Agent, 26 118 signal?: AbortSignal, 27 119 ): Promise<PostView[]> { 28 120 const seenUris = new Set<string>(); ··· 33 125 34 126 do { 35 127 if (signal?.aborted) throw signal.reason ?? new Error("Aborted"); 128 + const params = new URLSearchParams({ 129 + q: query, 130 + sort: "latest", 131 + since: SEARCH_SINCE, 132 + until: SEARCH_UNTIL, 133 + limit: "100", 134 + }); 135 + if (cursor) params.set("cursor", cursor); 136 + 36 137 try { 37 - const res = await agent.app.bsky.feed.searchPosts( 38 - { 39 - q: query, 40 - sort: "latest", 41 - since: SEARCH_SINCE, 42 - until: SEARCH_UNTIL, 43 - limit: 100, 44 - cursor, 45 - }, 46 - { signal }, 47 - ); 138 + const data = await fetchSearchPage(params, signal); 48 139 49 - for (const post of res.data.posts) { 140 + for (const post of data.posts) { 50 141 if (!seenUris.has(post.uri)) { 51 142 seenUris.add(post.uri); 52 143 posts.push(post); 53 144 } 54 145 } 55 146 56 - cursor = res.data.cursor; 147 + cursor = data.cursor; 57 148 } catch (error) { 58 149 // Propagate abort errors so the whole crawl cancels cleanly. 59 150 if (signal?.aborted) throw error; 60 - console.error(`Search query "${query}" failed:`, error); 151 + console.error( 152 + `Search query "${query}" failed after ${MAX_ATTEMPTS} attempts:`, 153 + error, 154 + ); 61 155 break; 62 156 } 63 157 } while (cursor);