···88const stateStore = new Map<string, NodeSavedState>();
99const sessionStore = new Map<string, NodeSavedSession>();
10101111+// Single-process request lock: serializes access to a given key (e.g. a
1212+// session DID) so concurrent token refreshes never race. Without this,
1313+// @atproto/oauth-client-node prints
1414+// "No lock mechanism provided. Credentials might get revoked."
1515+// and two in-flight requests for the same user can both try to rotate the
1616+// refresh token — the losing request has its credentials invalidated and
1717+// the next AppView call returns 500 through the PDS proxy. (That was the
1818+// original root cause of the network-attention regression on staging.)
1919+//
2020+// Implementation per @atproto/oauth-client-node docs: a Map of per-key
2121+// promise chains. `fn` runs only after the previous holder releases, and
2222+// `releaseLock` is always called in finally so a thrown `fn` doesn't wedge
2323+// the chain for subsequent waiters.
2424+const locks = new Map<string, Promise<void>>();
2525+async function requestLock<T>(
2626+ key: string,
2727+ fn: () => T | PromiseLike<T>,
2828+): Promise<T> {
2929+ const prevLock = locks.get(key) ?? Promise.resolve();
3030+ let releaseLock!: () => void;
3131+ const currentLock = new Promise<void>((resolve) => {
3232+ releaseLock = resolve;
3333+ });
3434+ locks.set(key, currentLock);
3535+ try {
3636+ await prevLock;
3737+ return await fn();
3838+ } finally {
3939+ releaseLock();
4040+ // If this was the last waiter, drop the map entry so it doesn't leak
4141+ // across long-running processes. Comparing by reference is safe: a new
4242+ // waiter would have replaced the entry before we got here.
4343+ if (locks.get(key) === currentLock) {
4444+ locks.delete(key);
4545+ }
4646+ }
4747+}
4848+1149function createClient(): NodeOAuthClient {
1250 const appUrl = process.env.APP_URL;
1351···20582159 return new NodeOAuthClient({
2260 clientMetadata: buildClientMetadata(appUrl),
6161+ requestLock,
2362 stateStore: {
2463 async get(key: string) {
2564 return stateStore.get(key);
···11-import type { AppBskyFeedDefs } from "@atproto/api";
11+import type { Agent, AppBskyFeedDefs } from "@atproto/api";
2233type PostView = AppBskyFeedDefs.PostView;
44-55-const APPVIEW_URL =
66- "https://api.bsky.app/xrpc/app.bsky.feed.searchPosts";
7485const SEARCH_QUERIES = [
96 "atmosphereconf",
···2118const MAX_ATTEMPTS = 3;
2219const BASE_BACKOFF_MS = 200;
23202424-interface SearchResponse {
2121+interface SearchPageResult {
2522 posts: PostView[];
2626- cursor?: string;
2323+ cursor: string | undefined;
2724}
28252929-function isRetryableStatus(status: number): boolean {
3030- return status === 429 || (status >= 500 && status < 600);
2626+interface SearchPageParams {
2727+ q: string;
2828+ sort: "latest";
2929+ since: string;
3030+ until: string;
3131+ limit: number;
3232+ cursor?: string;
3133}
32343335/**
···5355}
54565557/**
5656- * Fetch a single search page with bounded retries on transient failures.
5858+ * Inspect an arbitrary thrown value and decide whether to retry.
5959+ *
6060+ * Retryable: HTTP `429`, any `5xx`, and network errors (fetch / DNS / TCP
6161+ * failures that surface without a status). Non-retryable: other HTTP 4xx
6262+ * and anything with a non-retryable status shape.
6363+ *
6464+ * `@atproto/api` throws `XRPCError` with a numeric `status` on HTTP errors,
6565+ * so that's what we look at first. We fall back to `error.status` on plain
6666+ * objects for defensive parity.
6767+ */
6868+function isRetryableError(err: unknown): boolean {
6969+ if (!err || typeof err !== "object") return true; // unknown — retry once
7070+ const status = (err as { status?: number }).status;
7171+ if (typeof status === "number") {
7272+ return status === 429 || (status >= 500 && status < 600);
7373+ }
7474+ // No status → network / abort-ish error. Retry unless it's an AbortError
7575+ // (abort handling is done by the caller; we just signal retryable=true).
7676+ return true;
7777+}
7878+7979+/**
8080+ * Fetch a single search page via the user's authenticated agent with
8181+ * bounded retries on transient failures.
8282+ *
8383+ * Why authenticated: the public Bluesky AppView (`public.api.bsky.app`)
8484+ * returns `403 Forbidden` on paginated `searchPosts` requests to prevent
8585+ * unauthenticated scraping — documented behavior per bluesky-social/atproto
8686+ * issue #3583 and others. The alternate host `api.bsky.app` IP-blocks
8787+ * Railway egress. Authenticated-through-PDS is the only reliable path.
5788 *
5858- * Retries on HTTP 429, 5xx, and network errors with exponential backoff
5959- * (200ms → 400ms → 800ms) plus uniform jitter up to the same delay.
6060- * Non-retryable HTTP errors (4xx other than 429) and abort errors are
6161- * thrown immediately. The crawl has a 30s overall budget enforced upstream,
6262- * so retry counts and base delay are deliberately kept small.
8989+ * The original PDS path was returning 500 because `@atproto/oauth-client-node`
9090+ * was running without a `requestLock`, letting concurrent crawl operations
9191+ * race on token refresh and get credentials revoked. That lock is now
9292+ * installed in `src/lib/auth/client.ts`.
9393+ *
9494+ * Retries: 3 attempts, exponential backoff (200ms → 400ms → 800ms) with
9595+ * uniform jitter, retry only on 429/5xx/network. Abort always propagates
9696+ * immediately so a cancelled crawl never burns retries. Retry counts and
9797+ * delays are deliberately small because the overall crawl has a 30s budget
9898+ * enforced upstream in `src/app/api/crawl/route.ts`.
6399 */
64100async function fetchSearchPage(
6565- params: URLSearchParams,
101101+ agent: Agent,
102102+ params: SearchPageParams,
66103 signal?: AbortSignal,
6767-): Promise<SearchResponse> {
104104+): Promise<SearchPageResult> {
68105 let lastError: unknown;
6910670107 for (let attempt = 0; attempt < MAX_ATTEMPTS; attempt++) {
71108 if (signal?.aborted) throw signal.reason ?? new Error("Aborted");
7210973110 try {
7474- const res = await fetch(`${APPVIEW_URL}?${params.toString()}`, {
7575- signal,
7676- headers: { accept: "application/json" },
7777- });
7878- if (res.ok) {
7979- return (await res.json()) as SearchResponse;
8080- }
8181- if (!isRetryableStatus(res.status)) {
8282- throw new Error(
8383- `AppView searchPosts returned ${res.status} ${res.statusText}`,
8484- );
8585- }
8686- lastError = new Error(
8787- `AppView searchPosts returned ${res.status} ${res.statusText}`,
8888- );
111111+ const res = await agent.app.bsky.feed.searchPosts(params, { signal });
112112+ return { posts: res.data.posts, cursor: res.data.cursor };
89113 } catch (err) {
90114 // Abort always propagates immediately — never burn retries on a
91115 // cancelled crawl.
92116 if (signal?.aborted) throw err;
117117+ if (!isRetryableError(err)) throw err;
93118 lastError = err;
94119 }
95120···101126 }
102127 }
103128104104- throw lastError ?? new Error("AppView searchPosts failed after retries");
129129+ throw lastError ?? new Error("searchPosts failed after retries");
105130}
106131107132/**
108133 * Search Bluesky for conference-related posts during the conference period
109109- * and the post-conference aftermath.
110110- *
111111- * Calls the public AppView (`api.bsky.app`) directly via `fetch` instead of
112112- * routing through the user's PDS via `agent.app.bsky.feed.searchPosts`. The
113113- * search is a public read — there is no benefit to authenticating it, and
114114- * the OAuth/DPoP path through the PDS has been observed returning 5xx in
115115- * production while the public AppView returns 200 for the same query.
134134+ * and the post-conference aftermath. Returns deduplicated posts from all
135135+ * search queries.
116136 */
117137export async function searchConferencePosts(
138138+ agent: Agent,
118139 signal?: AbortSignal,
119140): Promise<PostView[]> {
120141 const seenUris = new Set<string>();
···125146126147 do {
127148 if (signal?.aborted) throw signal.reason ?? new Error("Aborted");
128128- const params = new URLSearchParams({
129129- q: query,
130130- sort: "latest",
131131- since: SEARCH_SINCE,
132132- until: SEARCH_UNTIL,
133133- limit: "100",
134134- });
135135- if (cursor) params.set("cursor", cursor);
136136-137149 try {
138138- const data = await fetchSearchPage(params, signal);
150150+ const page = await fetchSearchPage(
151151+ agent,
152152+ {
153153+ q: query,
154154+ sort: "latest",
155155+ since: SEARCH_SINCE,
156156+ until: SEARCH_UNTIL,
157157+ limit: 100,
158158+ cursor,
159159+ },
160160+ signal,
161161+ );
139162140140- for (const post of data.posts) {
163163+ for (const post of page.posts) {
141164 if (!seenUris.has(post.uri)) {
142165 seenUris.add(post.uri);
143166 posts.push(post);
144167 }
145168 }
146169147147- cursor = data.cursor;
170170+ cursor = page.cursor;
148171 } catch (error) {
149172 // Propagate abort errors so the whole crawl cancels cleanly.
150173 if (signal?.aborted) throw error;