···3232# Backfill cap (number of posts; defaults to 10000)
3333BACKFILL_MAX_POSTS=10000
34343535+# Feed generator — DID of the atproto account that owns the published
3636+# app.bsky.feed.generator records (top-1k, top-10k). Unset => feeds are
3737+# served but not discoverable via describeFeedGenerator.
3838+FEED_PUBLISHER_DID=
3939+3540# PostHog (analytics)
3641# Tracking is disabled when POSTHOG_API_KEY is unset.
3742POSTHOG_API_KEY=
+3
.env.test
···1010CLICKHOUSE_DB=favs_test
1111CLICKHOUSE_USER=favs
1212CLICKHOUSE_PASSWORD=
1313+1414+# Feed generator — tests override this per-case where relevant.
1515+FEED_PUBLISHER_DID=did:plc:testpublisher
+102
app/controllers/feed_generator_controller.ts
···11+import type { HttpContext } from '@adonisjs/core/http'
22+import {
33+ FEEDS,
44+ FEED_SKELETON_DEFAULT_LIMIT,
55+ FEED_SKELETON_MAX_LIMIT,
66+ InvalidCursorError,
77+ UnknownFeedError,
88+ didDocument,
99+ feedUri,
1010+ getSkeleton,
1111+ publisherDid,
1212+ serviceDid,
1313+} from '#services/feed_generator'
1414+1515+/**
1616+ * HTTP surface for the Bluesky custom feed generator.
1717+ *
1818+ * GET /.well-known/did.json — did:web document
1919+ * GET /xrpc/app.bsky.feed.describeFeedGenerator — lists our two feeds
2020+ * GET /xrpc/app.bsky.feed.getFeedSkeleton — ranked post skeleton
2121+ *
2222+ * The XRPC endpoints follow the lexicon shape defined at
2323+ * https://github.com/bluesky-social/atproto/tree/main/lexicons/app/bsky/feed
2424+ */
2525+export default class FeedGeneratorController {
2626+ async didDocument({ response }: HttpContext) {
2727+ response.header('Content-Type', 'application/json')
2828+ response.header('Cache-Control', 'public, max-age=3600')
2929+ return response.ok(didDocument())
3030+ }
3131+3232+ async describeFeedGenerator({ response }: HttpContext) {
3333+ const pub = publisherDid()
3434+ const feeds = pub
3535+ ? FEEDS.map((f) => ({ uri: `at://${pub}/app.bsky.feed.generator/${f.rkey}` }))
3636+ : []
3737+3838+ response.header('Cache-Control', 'public, max-age=300')
3939+ return response.ok({
4040+ did: serviceDid(),
4141+ feeds,
4242+ })
4343+ }
4444+4545+ async getFeedSkeleton({ request, response }: HttpContext) {
4646+ const feed = request.qs().feed
4747+ if (typeof feed !== 'string' || feed.length === 0) {
4848+ return response.badRequest({
4949+ error: 'InvalidRequest',
5050+ message: 'Missing required parameter: feed',
5151+ })
5252+ }
5353+5454+ const limit = parseLimit(request.qs().limit)
5555+ if (limit === null) {
5656+ return response.badRequest({
5757+ error: 'InvalidRequest',
5858+ message: `limit must be an integer in [1, ${FEED_SKELETON_MAX_LIMIT}]`,
5959+ })
6060+ }
6161+6262+ const cursorRaw = request.qs().cursor
6363+ const cursor = typeof cursorRaw === 'string' && cursorRaw.length > 0 ? cursorRaw : undefined
6464+6565+ try {
6666+ const result = await getSkeleton({ feedUri: feed, cursor, limit })
6767+ response.header('Cache-Control', 'no-store')
6868+ return response.ok(result)
6969+ } catch (err) {
7070+ if (err instanceof UnknownFeedError) {
7171+ return response.badRequest({
7272+ error: 'UnknownFeed',
7373+ message: `Unknown feed URI: ${feed}`,
7474+ })
7575+ }
7676+ if (err instanceof InvalidCursorError) {
7777+ return response.badRequest({
7878+ error: 'InvalidRequest',
7979+ message: 'Invalid cursor',
8080+ })
8181+ }
8282+ throw err
8383+ }
8484+ }
8585+}
8686+8787+/**
8888+ * Parse the `limit` query parameter. Returns:
8989+ * - the default when absent
9090+ * - the integer value when within [1, MAX]
9191+ * - null when present-but-invalid, signalling a 400 to the caller
9292+ */
9393+function parseLimit(raw: unknown): number | null {
9494+ if (raw === undefined || raw === '') return FEED_SKELETON_DEFAULT_LIMIT
9595+ if (typeof raw !== 'string') return null
9696+ const n = Number(raw)
9797+ if (!Number.isInteger(n)) return null
9898+ if (n < 1 || n > FEED_SKELETON_MAX_LIMIT) return null
9999+ return n
100100+}
101101+102102+export { feedUri }
+215
app/services/feed_generator.ts
···11+import env from '#start/env'
22+import NotifiedThreshold from '#models/notified_threshold'
33+import { parseAtUri } from '#lib/atproto/index'
44+55+/**
66+ * Feed-generator service. Two hardcoded feeds:
77+ *
88+ * 1k-favs — posts that have crossed 1,000 likes
99+ * 10k-favs — posts that have crossed 10,000 likes
1010+ *
1111+ * Ordering is by the time a post *first crossed* the threshold, newest first.
1212+ * Posts are never removed from a feed if their like count later drops.
1313+ *
1414+ * Source of truth: the `notified_thresholds` SQLite table populated by
1515+ * `ThresholdScanJob` — each row is `(subject_uri, threshold, fired_at)` and
1616+ * `fired_at` is the detection timestamp.
1717+ */
1818+1919+export interface FeedDefinition {
2020+ readonly rkey: string
2121+ readonly threshold: number
2222+ readonly displayName: string
2323+ readonly description: string
2424+}
2525+2626+export const FEEDS: readonly FeedDefinition[] = [
2727+ {
2828+ rkey: '1k-favs',
2929+ threshold: 1000,
3030+ displayName: 'favs.blue 1000 ❤️s',
3131+ description: 'All bluesky posts with over 1000 likes, powered by https://favs.blue',
3232+ },
3333+ {
3434+ rkey: '10k-favs',
3535+ threshold: 10_000,
3636+ displayName: 'favs.blue 10,000 ❤️s',
3737+ description: 'All bluesky posts with over 10,000 likes, powered by https://favs.blue',
3838+ },
3939+] as const
4040+4141+export const FEED_SKELETON_MAX_LIMIT = 100
4242+export const FEED_SKELETON_DEFAULT_LIMIT = 50
4343+4444+// ---------------------------------------------------------------------------
4545+// Identity / record locations
4646+// ---------------------------------------------------------------------------
4747+4848+/**
4949+ * DID of the feed-generator service itself. We use `did:web:<hostname>` so the
5050+ * DID document is self-hosted at `/.well-known/did.json` — no PLC registration
5151+ * required. Derived from `APP_URL`.
5252+ */
5353+export function serviceDid(): string {
5454+ const url = new URL(env.get('APP_URL'))
5555+ return `did:web:${url.host}`
5656+}
5757+5858+/**
5959+ * DID of the atproto account that owns the feed records. Feeds must be
6060+ * published on some real account's PDS; this is the `creator` field of the
6161+ * `app.bsky.feed.generator` record. Configured via FEED_PUBLISHER_DID.
6262+ *
6363+ * Returns null when unconfigured — in that case describeFeedGenerator returns
6464+ * an empty feed list and getFeedSkeleton cannot resolve any feed URIs.
6565+ *
6666+ * Read from `process.env` directly (same pattern as the firehose webhook
6767+ * URLs in ThresholdScanJob) so tests can override it without rebuilding the
6868+ * AdonisJS Env instance.
6969+ */
7070+export function publisherDid(): string | null {
7171+ const value = process.env.FEED_PUBLISHER_DID
7272+ return value && value.length > 0 ? value : null
7373+}
7474+7575+/** Canonical AT-URI of a feed record, or null if no publisher configured. */
7676+export function feedUri(rkey: string): string | null {
7777+ const pub = publisherDid()
7878+ if (!pub) return null
7979+ return `at://${pub}/app.bsky.feed.generator/${rkey}`
8080+}
8181+8282+/**
8383+ * Find the feed definition matching an AT-URI received in getFeedSkeleton.
8484+ * Returns null if the URI doesn't match a known feed under our publisher DID.
8585+ */
8686+export function findFeedByUri(uri: string): FeedDefinition | null {
8787+ const pub = publisherDid()
8888+ if (!pub) return null
8989+9090+ let parsed
9191+ try {
9292+ parsed = parseAtUri(uri)
9393+ } catch {
9494+ return null
9595+ }
9696+ if (parsed.did !== pub) return null
9797+ if (parsed.collection !== 'app.bsky.feed.generator') return null
9898+ return FEEDS.find((f) => f.rkey === parsed.rkey) ?? null
9999+}
100100+101101+// ---------------------------------------------------------------------------
102102+// DID document
103103+// ---------------------------------------------------------------------------
104104+105105+/**
106106+ * DID document served at `/.well-known/did.json`. Declares the
107107+ * `BskyFeedGenerator` service endpoint — the AppView uses this to look up
108108+ * where to hit `getFeedSkeleton`.
109109+ *
110110+ * Spec: https://github.com/bluesky-social/feed-generator
111111+ */
112112+export function didDocument(): Record<string, unknown> {
113113+ const did = serviceDid()
114114+ const appUrl = env.get('APP_URL')
115115+ return {
116116+ '@context': ['https://www.w3.org/ns/did/v1'],
117117+ 'id': did,
118118+ 'service': [
119119+ {
120120+ id: '#bsky_fg',
121121+ type: 'BskyFeedGenerator',
122122+ serviceEndpoint: appUrl,
123123+ },
124124+ ],
125125+ }
126126+}
127127+128128+// ---------------------------------------------------------------------------
129129+// Skeleton query
130130+// ---------------------------------------------------------------------------
131131+132132+export interface SkeletonArgs {
133133+ /** The AT-URI passed by the AppView in the `feed` query param. */
134134+ feedUri: string
135135+ /** Opaque pagination cursor returned by a previous call, if any. */
136136+ cursor?: string | undefined
137137+ /** Desired page size (clamped by the caller to [1, MAX_LIMIT]). */
138138+ limit: number
139139+}
140140+141141+export interface SkeletonResult {
142142+ feed: Array<{ post: string }>
143143+ cursor?: string
144144+}
145145+146146+export class UnknownFeedError extends Error {
147147+ constructor(uri: string) {
148148+ super(`Unknown feed URI: ${uri}`)
149149+ this.name = 'UnknownFeedError'
150150+ }
151151+}
152152+153153+export class InvalidCursorError extends Error {
154154+ constructor(cursor: string) {
155155+ super(`Invalid cursor: ${cursor}`)
156156+ this.name = 'InvalidCursorError'
157157+ }
158158+}
159159+160160+/**
161161+ * Build a feed skeleton for the given feed URI.
162162+ *
163163+ * Pagination cursor format: `<firedAt>:<subjectUri>` — a keyset cursor on
164164+ * (firedAt DESC, subjectUri DESC). Each row in `notified_thresholds` is
165165+ * unique by (subject_uri, threshold), so subjectUri alone disambiguates
166166+ * rows that share a firedAt millisecond.
167167+ */
168168+export async function getSkeleton(args: SkeletonArgs): Promise<SkeletonResult> {
169169+ const feed = findFeedByUri(args.feedUri)
170170+ if (!feed) throw new UnknownFeedError(args.feedUri)
171171+172172+ const query = NotifiedThreshold.query()
173173+ .where('threshold', feed.threshold)
174174+ .orderBy('fired_at', 'desc')
175175+ .orderBy('subject_uri', 'desc')
176176+ .limit(args.limit)
177177+178178+ if (args.cursor !== undefined && args.cursor !== '') {
179179+ const parsed = parseCursor(args.cursor)
180180+ // Keyset walk: rows strictly "after" the cursor under (fired_at DESC,
181181+ // subject_uri DESC). Either firedAt is smaller, or firedAt is equal
182182+ // and subjectUri is lexicographically smaller.
183183+ query.where((q) => {
184184+ q.where('fired_at', '<', parsed.firedAt).orWhere((inner) => {
185185+ inner.where('fired_at', parsed.firedAt).andWhere('subject_uri', '<', parsed.subjectUri)
186186+ })
187187+ })
188188+ }
189189+190190+ const rows = await query
191191+ const items = rows.map((r) => ({ post: r.subjectUri }))
192192+193193+ let nextCursor: string | undefined
194194+ if (rows.length === args.limit && rows.length > 0) {
195195+ const last = rows[rows.length - 1]
196196+ nextCursor = encodeCursor(last.firedAt, last.subjectUri)
197197+ }
198198+199199+ return nextCursor ? { feed: items, cursor: nextCursor } : { feed: items }
200200+}
201201+202202+function encodeCursor(firedAt: number, subjectUri: string): string {
203203+ return `${firedAt}:${subjectUri}`
204204+}
205205+206206+function parseCursor(raw: string): { firedAt: number; subjectUri: string } {
207207+ const idx = raw.indexOf(':')
208208+ if (idx <= 0 || idx === raw.length - 1) throw new InvalidCursorError(raw)
209209+ const firedAt = Number(raw.slice(0, idx))
210210+ const subjectUri = raw.slice(idx + 1)
211211+ if (!Number.isFinite(firedAt) || !Number.isInteger(firedAt)) {
212212+ throw new InvalidCursorError(raw)
213213+ }
214214+ return { firedAt, subjectUri }
215215+}
+128
commands/feeds_publish.ts
···11+import { BaseCommand, flags } from '@adonisjs/core/ace'
22+import { CommandOptions } from '@adonisjs/core/types/ace'
33+import { AtpAgent } from '@atproto/api'
44+import { FEEDS, publisherDid, serviceDid } from '#services/feed_generator'
55+66+/**
77+ * Ace command: `node ace feeds:publish --handle <handle> --password <app-password>`
88+ *
99+ * Publishes `app.bsky.feed.generator` records on an atproto account's PDS for
1010+ * each configured feed (1k-favs, 10k-favs). This is a one-shot bootstrap step
1111+ * that makes the feeds discoverable in Bluesky clients.
1212+ *
1313+ * Flags:
1414+ * --handle handle that will own the feed records (required)
1515+ * --password app password for that account (required)
1616+ * --pds PDS service URL (default: https://bsky.social)
1717+ * --dry-run print what would be published without writing
1818+ *
1919+ * After publishing, the command logs the account's DID. Set
2020+ * FEED_PUBLISHER_DID=<that DID> in the web process's env so
2121+ * describeFeedGenerator advertises the feeds.
2222+ *
2323+ * The service DID served at the generator endpoint is derived from APP_URL
2424+ * (did:web:<host>) — the feed record points to that DID, not the publisher.
2525+ *
2626+ * Re-running the command is safe: putRecord is idempotent on (repo,
2727+ * collection, rkey).
2828+ */
2929+export default class FeedsPublish extends BaseCommand {
3030+ static commandName = 'feeds:publish'
3131+ static description = 'Publish app.bsky.feed.generator records for 1k-favs and 10k-favs'
3232+ static options: CommandOptions = { startApp: true }
3333+3434+ @flags.string({ description: 'Handle of the account that will own the feed records' })
3535+ declare handle: string
3636+3737+ @flags.string({ description: 'App password for the publisher account' })
3838+ declare password: string
3939+4040+ @flags.string({ description: 'PDS service URL', default: 'https://bsky.social' })
4141+ declare pds: string
4242+4343+ @flags.boolean({ description: "Don't write records — print what would be published" })
4444+ declare dryRun: boolean
4545+4646+ async run() {
4747+ if (!this.handle || !this.password) {
4848+ this.logger.error('--handle and --password are required')
4949+ this.exitCode = 1
5050+ return
5151+ }
5252+5353+ const svcDid = serviceDid()
5454+ this.logger.info(`Service DID (will be set on each feed record): ${svcDid}`)
5555+5656+ const agent = new AtpAgent({ service: this.pds })
5757+5858+ try {
5959+ await agent.login({ identifier: this.handle, password: this.password })
6060+ } catch (err) {
6161+ this.logger.error(`Login failed: ${err instanceof Error ? err.message : String(err)}`)
6262+ this.exitCode = 1
6363+ return
6464+ }
6565+6666+ const accountDid = agent.session?.did
6767+ if (!accountDid) {
6868+ this.logger.error('Login succeeded but no session DID returned')
6969+ this.exitCode = 1
7070+ return
7171+ }
7272+7373+ this.logger.info(`Authenticated as ${this.handle} (${accountDid})`)
7474+7575+ const expected = publisherDid()
7676+ if (expected && expected !== accountDid) {
7777+ this.logger.error(
7878+ `FEED_PUBLISHER_DID (${expected}) does not match the account DID (${accountDid}). ` +
7979+ 'Refusing to publish under the wrong account.'
8080+ )
8181+ this.exitCode = 1
8282+ return
8383+ }
8484+ if (!expected) {
8585+ this.logger.warning(
8686+ `FEED_PUBLISHER_DID is not set. After publishing, set FEED_PUBLISHER_DID=${accountDid} ` +
8787+ 'so the web process can advertise these feeds in describeFeedGenerator.'
8888+ )
8989+ }
9090+9191+ const createdAt = new Date().toISOString()
9292+ for (const feed of FEEDS) {
9393+ const record = {
9494+ $type: 'app.bsky.feed.generator',
9595+ did: svcDid,
9696+ displayName: feed.displayName,
9797+ description: feed.description,
9898+ createdAt,
9999+ }
100100+101101+ if (this.dryRun) {
102102+ this.logger.info(`[dry-run] Would putRecord rkey=${feed.rkey}:`)
103103+ this.logger.info(JSON.stringify(record, null, 2))
104104+ continue
105105+ }
106106+107107+ try {
108108+ const result = await agent.com.atproto.repo.putRecord({
109109+ repo: accountDid,
110110+ collection: 'app.bsky.feed.generator',
111111+ rkey: feed.rkey,
112112+ record,
113113+ })
114114+ this.logger.info(`Published ${feed.rkey} → ${result.data.uri}`)
115115+ } catch (err) {
116116+ this.logger.error(
117117+ `Failed to publish ${feed.rkey}: ${err instanceof Error ? err.message : String(err)}`
118118+ )
119119+ this.exitCode = 1
120120+ return
121121+ }
122122+ }
123123+124124+ if (!this.dryRun) {
125125+ this.logger.info('All feeds published.')
126126+ }
127127+ }
128128+}
···11+import { BaseSchema } from '@adonisjs/lucid/schema'
22+33+/**
44+ * Composite index covering the feed-generator's getFeedSkeleton query:
55+ *
66+ * SELECT subject_uri
77+ * FROM notified_thresholds
88+ * WHERE threshold = ?
99+ * AND (fired_at < ? OR (fired_at = ? AND subject_uri < ?)) -- cursor
1010+ * ORDER BY fired_at DESC, subject_uri DESC
1111+ * LIMIT ?
1212+ *
1313+ * Without this index the table is scanned in full because the existing PK
1414+ * (subject_uri, threshold) has subject_uri leftmost, which can't service a
1515+ * threshold-only filter. SQLite can walk an ASC composite index in reverse
1616+ * for the DESC ORDER BY, so this index handles both the filter and the sort
1717+ * in a single index range scan.
1818+ */
1919+export default class extends BaseSchema {
2020+ async up() {
2121+ this.schema.alterTable('notified_thresholds', (table) => {
2222+ table.index(
2323+ ['threshold', 'fired_at', 'subject_uri'],
2424+ 'notified_thresholds_threshold_fired_at_subject_uri'
2525+ )
2626+ })
2727+ }
2828+2929+ async down() {
3030+ this.schema.alterTable('notified_thresholds', (table) => {
3131+ table.dropIndex(
3232+ ['threshold', 'fired_at', 'subject_uri'],
3333+ 'notified_thresholds_threshold_fired_at_subject_uri'
3434+ )
3535+ })
3636+ }
3737+}
+5
start/env.ts
···4545 FIREHOSE_WEBHOOK_URL_1K: Env.schema.string.optional(),
4646 FIREHOSE_WEBHOOK_URL_10K: Env.schema.string.optional(),
47474848+ // Feed generator (web): DID of the atproto account that owns the
4949+ // published `app.bsky.feed.generator` records. Unset => feeds are not
5050+ // discoverable (describe endpoint returns an empty list).
5151+ FEED_PUBLISHER_DID: Env.schema.string.optional(),
5252+4853 // Backfill
4954 BACKFILL_MAX_POSTS: Env.schema.number.optional(),
5055