See the best posts from any Bluesky account
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

Add seed:top-accounts command to bootstrap top Bluesky accounts

Fetches the ~1k most-followed accounts from bluecrawler.com's SSE API
and creates User + BackfillJob rows with queue dispatch for each.
Supports --dry-run to preview without DB writes.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

+469
+54
app/lib/bluecrawler/parser.ts
··· 1 + /** 2 + * Parse a bluecrawler.com SSE response into a list of ranked accounts. 3 + * 4 + * The response is a Server-Sent Events stream where each `data:` line 5 + * contains a JSON payload with `{ genTime, rankings }`. We extract the 6 + * first `data:` line and parse the rankings array. 7 + */ 8 + 9 + export interface BluecrawlerAccount { 10 + did: string 11 + handle: string 12 + displayName: string 13 + followersCount: number 14 + } 15 + 16 + export function parseBluecrawlerResponse(raw: string): BluecrawlerAccount[] { 17 + const dataPrefix = 'data: ' 18 + let jsonStr: string | null = null 19 + 20 + // The raw response may contain curl progress output or other noise. 21 + // Find the first occurrence of "data: {" which starts the SSE payload. 22 + const idx = raw.indexOf(dataPrefix + '{') 23 + if (idx === -1) { 24 + throw new Error('No SSE data line found in bluecrawler response') 25 + } 26 + jsonStr = raw.slice(idx + dataPrefix.length) 27 + 28 + let parsed: { rankings?: unknown[] } 29 + try { 30 + parsed = JSON.parse(jsonStr) 31 + } catch { 32 + throw new Error(`Failed to parse bluecrawler JSON: ${jsonStr.slice(0, 100)}...`) 33 + } 34 + 35 + const rankings = parsed.rankings 36 + if (!Array.isArray(rankings)) { 37 + throw new Error('No rankings array in bluecrawler response') 38 + } 39 + 40 + const accounts: BluecrawlerAccount[] = [] 41 + for (const entry of rankings) { 42 + if (typeof entry !== 'object' || entry === null) continue 43 + const { did, handle, displayName, followersCount } = entry as Record<string, unknown> 44 + if (typeof did !== 'string' || typeof handle !== 'string') continue 45 + accounts.push({ 46 + did, 47 + handle, 48 + displayName: typeof displayName === 'string' ? displayName : '', 49 + followersCount: typeof followersCount === 'number' ? followersCount : 0, 50 + }) 51 + } 52 + 53 + return accounts 54 + }
+59
app/lib/bluecrawler/seeder.ts
··· 1 + import User from '#models/user' 2 + import BackfillJobRow from '#models/backfill_job' 3 + import type { BluecrawlerAccount } from './parser.js' 4 + 5 + export interface SeedResult { 6 + created: number 7 + skipped: number 8 + } 9 + 10 + /** 11 + * Create User + BackfillJob rows for a list of accounts and dispatch 12 + * backfill jobs for each new account. 13 + * 14 + * Skips accounts that already have a User row in the database. 15 + * 16 + * @param accounts - Parsed bluecrawler accounts to seed 17 + * @param dispatchFn - Called with the DID for each new account to dispatch a backfill job 18 + * @param onProgress - Optional progress callback (current, total) 19 + */ 20 + export async function seedTopAccounts( 21 + accounts: BluecrawlerAccount[], 22 + dispatchFn: (did: string) => Promise<void>, 23 + onProgress?: (current: number, total: number) => void 24 + ): Promise<SeedResult> { 25 + let created = 0 26 + let skipped = 0 27 + 28 + for (let i = 0; i < accounts.length; i++) { 29 + const account = accounts[i] 30 + 31 + // Skip if user already exists 32 + const existing = await User.find(account.did) 33 + if (existing) { 34 + skipped++ 35 + onProgress?.(i + 1, accounts.length) 36 + continue 37 + } 38 + 39 + await User.create({ 40 + did: account.did, 41 + handle: account.handle, 42 + displayName: account.displayName || null, 43 + firstSeenAt: Date.now(), 44 + }) 45 + 46 + await BackfillJobRow.create({ 47 + did: account.did, 48 + startedAt: Date.now(), 49 + state: 'running', 50 + fetchedPosts: 0, 51 + }) 52 + 53 + await dispatchFn(account.did) 54 + created++ 55 + onProgress?.(i + 1, accounts.length) 56 + } 57 + 58 + return { created, skipped } 59 + }
+148
commands/seed_top_accounts.ts
··· 1 + import { BaseCommand, flags } from '@adonisjs/core/ace' 2 + import { CommandOptions } from '@adonisjs/core/types/ace' 3 + import BackfillJob from '#jobs/backfill_job' 4 + import { parseBluecrawlerResponse } from '#lib/bluecrawler/parser' 5 + import { seedTopAccounts } from '#lib/bluecrawler/seeder' 6 + 7 + const BLUECRAWLER_URL = 'https://api.bluecrawler.com/info' 8 + 9 + /** 10 + * Ace command: node ace seed:top-accounts 11 + * 12 + * Fetches the top ~1,000 most-followed Bluesky accounts from the 13 + * bluecrawler.com SSE API and creates backfill jobs for each one. 14 + * 15 + * Accounts that already exist in the database are skipped. 16 + * 17 + * Use --dry-run to preview what would be seeded without writing to the DB. 18 + */ 19 + export default class SeedTopAccounts extends BaseCommand { 20 + static commandName = 'seed:top-accounts' 21 + static description = 'Seed top Bluesky accounts from bluecrawler and create backfill jobs' 22 + static options: CommandOptions = { startApp: true } 23 + 24 + @flags.boolean({ description: 'Preview what would be seeded without writing to DB' }) 25 + declare dryRun: boolean 26 + 27 + async run() { 28 + // ---- Fetch from bluecrawler ---- 29 + this.logger.info('Fetching top accounts from bluecrawler...') 30 + 31 + let raw: string 32 + try { 33 + raw = await this.fetchBluecrawler() 34 + } catch (err) { 35 + this.logger.error( 36 + `Failed to fetch from bluecrawler: ${err instanceof Error ? err.message : String(err)}` 37 + ) 38 + this.exitCode = 1 39 + return 40 + } 41 + 42 + // ---- Parse response ---- 43 + let accounts: ReturnType<typeof parseBluecrawlerResponse> 44 + try { 45 + accounts = parseBluecrawlerResponse(raw) 46 + } catch (err) { 47 + this.logger.error( 48 + `Failed to parse bluecrawler response: ${err instanceof Error ? err.message : String(err)}` 49 + ) 50 + this.exitCode = 1 51 + return 52 + } 53 + 54 + this.logger.info(`Parsed ${accounts.length} accounts from bluecrawler`) 55 + 56 + if (accounts.length === 0) { 57 + this.logger.warning('No accounts found — nothing to seed') 58 + return 59 + } 60 + 61 + // ---- Dry run ---- 62 + if (this.dryRun) { 63 + this.logger.info('[dry-run] Would seed the following accounts:') 64 + for (const account of accounts.slice(0, 20)) { 65 + this.logger.info( 66 + ` ${account.handle} (${account.followersCount.toLocaleString()} followers)` 67 + ) 68 + } 69 + if (accounts.length > 20) { 70 + this.logger.info(` ... and ${accounts.length - 20} more`) 71 + } 72 + return 73 + } 74 + 75 + // ---- Seed ---- 76 + const result = await seedTopAccounts( 77 + accounts, 78 + async (did) => { 79 + await BackfillJob.dispatch({ did }) 80 + }, 81 + (current, total) => { 82 + if (current % 100 === 0 || current === total) { 83 + this.logger.info(`[progress] ${current}/${total}`) 84 + } 85 + } 86 + ) 87 + 88 + this.logger.info( 89 + `Done! Created ${result.created} backfill jobs, skipped ${result.skipped} existing accounts.` 90 + ) 91 + } 92 + 93 + /** 94 + * Fetch the bluecrawler SSE stream. Reads until the first complete 95 + * JSON payload arrives, then aborts the connection. 96 + */ 97 + private async fetchBluecrawler(): Promise<string> { 98 + const controller = new AbortController() 99 + const response = await fetch(BLUECRAWLER_URL, { 100 + headers: { 101 + 'User-Agent': 102 + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:149.0) Gecko/20100101 Firefox/149.0', 103 + Accept: 'text/event-stream', 104 + Origin: 'https://bluecrawler.com', 105 + Referer: 'https://bluecrawler.com/', 106 + 'Sec-Fetch-Site': 'same-site', 107 + }, 108 + signal: controller.signal, 109 + }) 110 + 111 + if (!response.ok) { 112 + throw new Error(`HTTP ${response.status}: ${response.statusText}`) 113 + } 114 + 115 + const reader = response.body!.getReader() 116 + const decoder = new TextDecoder() 117 + let buffer = '' 118 + 119 + try { 120 + while (true) { 121 + const { done, value } = await reader.read() 122 + if (done) break 123 + 124 + buffer += decoder.decode(value, { stream: true }) 125 + 126 + // Check if we have a complete data line with a JSON payload. 127 + // The bluecrawler SSE sends the full rankings in a single data: line. 128 + // We look for the closing of the rankings array to know it's complete. 129 + const dataIdx = buffer.indexOf('data: {') 130 + if (dataIdx !== -1) { 131 + const jsonStart = dataIdx + 6 132 + try { 133 + // Attempt to parse — if it succeeds, we have the full payload 134 + JSON.parse(buffer.slice(jsonStart)) 135 + controller.abort() 136 + return buffer 137 + } catch { 138 + // JSON not complete yet, keep reading 139 + } 140 + } 141 + } 142 + } finally { 143 + reader.releaseLock() 144 + } 145 + 146 + return buffer 147 + } 148 + }
+92
tests/unit/bluecrawler.spec.ts
··· 1 + import { test } from '@japa/runner' 2 + import { parseBluecrawlerResponse } from '#lib/bluecrawler/parser' 3 + 4 + test.group('parseBluecrawlerResponse', () => { 5 + test('parses SSE data line into ranked accounts', ({ assert }) => { 6 + const ssePayload = 7 + 'data: ' + 8 + JSON.stringify({ 9 + genTime: '2026-04-13T07:15:44.873Z', 10 + rankings: [ 11 + { 12 + did: 'did:plc:abc123', 13 + displayName: 'Alice', 14 + followersCount: 500000, 15 + handle: 'alice.bsky.social', 16 + avatar: 'https://cdn.bsky.app/img/avatar/plain/did:plc:abc123/bafkreiabc', 17 + }, 18 + { 19 + did: 'did:plc:def456', 20 + displayName: 'Bob', 21 + followersCount: 100000, 22 + handle: 'bob.bsky.social', 23 + avatar: 'https://cdn.bsky.app/img/avatar/plain/did:plc:def456/bafkreidef', 24 + }, 25 + ], 26 + }) 27 + 28 + const result = parseBluecrawlerResponse(ssePayload) 29 + 30 + assert.lengthOf(result, 2) 31 + assert.deepEqual(result[0], { 32 + did: 'did:plc:abc123', 33 + handle: 'alice.bsky.social', 34 + displayName: 'Alice', 35 + followersCount: 500000, 36 + }) 37 + assert.deepEqual(result[1], { 38 + did: 'did:plc:def456', 39 + handle: 'bob.bsky.social', 40 + displayName: 'Bob', 41 + followersCount: 100000, 42 + }) 43 + }) 44 + 45 + test('handles curl progress noise before data line', ({ assert }) => { 46 + const ssePayload = 47 + ' % Total % Received\n' + 48 + ' 0 0 0 0\n' + 49 + 'data: ' + 50 + JSON.stringify({ 51 + genTime: '2026-04-13T07:15:44.873Z', 52 + rankings: [ 53 + { 54 + did: 'did:plc:abc123', 55 + displayName: 'Test', 56 + followersCount: 10000, 57 + handle: 'test.bsky.social', 58 + avatar: 'https://example.com/avatar.jpg', 59 + }, 60 + ], 61 + }) 62 + 63 + const result = parseBluecrawlerResponse(ssePayload) 64 + assert.lengthOf(result, 1) 65 + assert.equal(result[0].did, 'did:plc:abc123') 66 + }) 67 + 68 + test('throws on payload with no data line', ({ assert }) => { 69 + assert.throws(() => parseBluecrawlerResponse('no data here'), 'No SSE data line found') 70 + }) 71 + 72 + test('throws on malformed JSON', ({ assert }) => { 73 + assert.throws(() => parseBluecrawlerResponse('data: {not json'), /Failed to parse/) 74 + }) 75 + 76 + test('skips entries missing required fields', ({ assert }) => { 77 + const ssePayload = 78 + 'data: ' + 79 + JSON.stringify({ 80 + genTime: '2026-04-13T07:15:44.873Z', 81 + rankings: [ 82 + { did: 'did:plc:abc123', handle: 'alice.bsky.social', displayName: 'Alice', followersCount: 500000 }, 83 + { handle: 'missing-did.bsky.social', displayName: 'No DID', followersCount: 1000 }, 84 + { did: 'did:plc:no-handle', displayName: 'No Handle', followersCount: 1000 }, 85 + ], 86 + }) 87 + 88 + const result = parseBluecrawlerResponse(ssePayload) 89 + assert.lengthOf(result, 1) 90 + assert.equal(result[0].did, 'did:plc:abc123') 91 + }) 92 + })
+116
tests/unit/seed_top_accounts.spec.ts
··· 1 + import { test } from '@japa/runner' 2 + import testUtils from '@adonisjs/core/services/test_utils' 3 + import User from '#models/user' 4 + import BackfillJobRow from '#models/backfill_job' 5 + import type { BluecrawlerAccount } from '#lib/bluecrawler/parser' 6 + import { seedTopAccounts } from '#lib/bluecrawler/seeder' 7 + 8 + // --------------------------------------------------------------------------- 9 + // Helpers 10 + // --------------------------------------------------------------------------- 11 + 12 + function makeAccounts(count: number, startFollowers = 100000): BluecrawlerAccount[] { 13 + return Array.from({ length: count }, (_, i) => ({ 14 + did: `did:plc:test${String(i).padStart(4, '0')}`, 15 + handle: `user${i}.bsky.social`, 16 + displayName: `User ${i}`, 17 + followersCount: startFollowers - i, 18 + })) 19 + } 20 + 21 + // --------------------------------------------------------------------------- 22 + // Tests 23 + // --------------------------------------------------------------------------- 24 + 25 + test.group('seedTopAccounts', (group) => { 26 + group.each.setup(() => testUtils.db().withGlobalTransaction()) 27 + 28 + test('creates User and BackfillJob rows for each account', async ({ assert }) => { 29 + const accounts = makeAccounts(3) 30 + const dispatched: string[] = [] 31 + 32 + const result = await seedTopAccounts(accounts, async (did) => { 33 + dispatched.push(did) 34 + }) 35 + 36 + assert.equal(result.created, 3) 37 + assert.equal(result.skipped, 0) 38 + 39 + for (const account of accounts) { 40 + const user = await User.find(account.did) 41 + assert.isNotNull(user, `User ${account.did} should exist`) 42 + assert.equal(user!.handle, account.handle) 43 + assert.equal(user!.displayName, account.displayName) 44 + 45 + const job = await BackfillJobRow.find(account.did) 46 + assert.isNotNull(job, `BackfillJob ${account.did} should exist`) 47 + assert.equal(job!.state, 'running') 48 + } 49 + 50 + assert.deepEqual(dispatched, accounts.map((a) => a.did)) 51 + }) 52 + 53 + test('skips accounts that already have a User row', async ({ assert }) => { 54 + const accounts = makeAccounts(2) 55 + 56 + // Pre-create one user 57 + await User.create({ 58 + did: accounts[0].did, 59 + handle: accounts[0].handle, 60 + firstSeenAt: Date.now(), 61 + }) 62 + 63 + const dispatched: string[] = [] 64 + const result = await seedTopAccounts(accounts, async (did) => { 65 + dispatched.push(did) 66 + }) 67 + 68 + assert.equal(result.created, 1) 69 + assert.equal(result.skipped, 1) 70 + assert.deepEqual(dispatched, [accounts[1].did]) 71 + }) 72 + 73 + test('skips accounts that already have a BackfillJob row', async ({ assert }) => { 74 + const accounts = makeAccounts(1) 75 + 76 + // Pre-create user + backfill job 77 + await User.create({ 78 + did: accounts[0].did, 79 + handle: accounts[0].handle, 80 + firstSeenAt: Date.now(), 81 + backfilledAt: Date.now(), 82 + }) 83 + await BackfillJobRow.create({ 84 + did: accounts[0].did, 85 + startedAt: Date.now(), 86 + state: 'done', 87 + fetchedPosts: 100, 88 + }) 89 + 90 + const dispatched: string[] = [] 91 + const result = await seedTopAccounts(accounts, async (did) => { 92 + dispatched.push(did) 93 + }) 94 + 95 + assert.equal(result.created, 0) 96 + assert.equal(result.skipped, 1) 97 + assert.lengthOf(dispatched, 0) 98 + }) 99 + 100 + test('reports progress via callback', async ({ assert }) => { 101 + const accounts = makeAccounts(5) 102 + const progressCalls: Array<{ current: number; total: number }> = [] 103 + 104 + await seedTopAccounts( 105 + accounts, 106 + async () => {}, 107 + (current, total) => { 108 + progressCalls.push({ current, total }) 109 + } 110 + ) 111 + 112 + assert.lengthOf(progressCalls, 5) 113 + assert.deepEqual(progressCalls[0], { current: 1, total: 5 }) 114 + assert.deepEqual(progressCalls[4], { current: 5, total: 5 }) 115 + }) 116 + })