···11+import type { FastifyRequest, FastifyReply } from "fastify";
22+import type { KnownPDSStorage } from "./knownPDSStorage";
33+import type { ActiveLabelsStorage } from "./activeLabelsStorage";
44+import { CacheableMemory } from "cacheable";
55+66+export class InfoServer {
77+ private knownPDSStorage: KnownPDSStorage;
88+ private activeLabelsStorage: ActiveLabelsStorage;
99+ private maxPDSDedicatedLabels: number;
1010+1111+ private statsCache = new CacheableMemory({ ttl: '10s' });
1212+1313+ constructor(knownPDSStorage: KnownPDSStorage, activeLabelsStorage: ActiveLabelsStorage, maxPDSDedicatedLabels: number) {
1414+ this.knownPDSStorage = knownPDSStorage;
1515+ this.activeLabelsStorage = activeLabelsStorage;
1616+ this.maxPDSDedicatedLabels = maxPDSDedicatedLabels;
1717+ }
1818+1919+ private readonly cacheKey = "stats";
2020+2121+ private async computeStats(): Promise<number[]> {
2222+ const v = [
2323+ await this.knownPDSStorage.countKnownPDSs(),
2424+ await this.knownPDSStorage.countPDSCrawlingFailures(),
2525+ await this.activeLabelsStorage.countActiveLabelSubjects(),
2626+ ];
2727+ this.statsCache.set(this.cacheKey, v);
2828+ return v;
2929+ }
3030+3131+ async indexPage(req: FastifyRequest, resp: FastifyReply) {
3232+ let [totalPDS, failures, activeLabels] = (
3333+ this.statsCache.get(this.cacheKey) ??
3434+ await this.computeStats()
3535+ ) as number[];
3636+ resp.type("text/html").send(`
3737+ <!DOCTYPE html>
3838+ <html lang="en">
3939+ <head>
4040+ <meta charset="UTF-8" />
4141+ <title>Independent PDS Labeler</title>
4242+ <meta name="viewport" content="width=device-width,initial-scale=1" />
4343+ <meta name="description" content="Information page for the independent PDS labeler for Bluesky - @pds.labeler.tny.im" />
4444+ </head>
4545+4646+ <body>
4747+ <h1>pds.labeler.tny.im</h1>
4848+ <p>
4949+ This community ran Bluesky labeler identifies accounts that are hosted on a
5050+ <a rel="noreferrer nofollow" href="https://github.com/bluesky-social/pds">PDS</a>
5151+ not operated by
5252+ <a rel="noreferrer nofollow" href="https://bsky.social/about/faq">Bluesky PBC</a>, and which is, therefore, considered "independent."
5353+ Some PDS meet this criteria but are not considered, for technical or logistical reasons.
5454+ </p>
5555+ <p>This labeler provides dedicated labels for the ${this.maxPDSDedicatedLabels} biggest independent PDS that it is aware of, by number of accounts.</p>
5656+ <p><a rel="noreferrer nofollow" href="https://bsky.app/profile/pds.labeler.tny.im">Subscribe to the labeler on Bluesky to use it.</a></p>
5757+ <p>
5858+ This labeler works by enumerating accounts (repositories) within each PDS that it finds.
5959+ It finds new PDS by consuming some events from the Bluesky firehose, and fetching the DID documents of the respective accounts, in order to identify or confirm their current PDS host.
6060+ Additionally, known PDS are rescanned periodically in order to find and label new accounts.
6161+ </p>
6262+ <p>The source code for the labeler is available on <a rel="noreferrer nofollow" href="https://tangled.org/gbl08ma.com/pdslabeler">Tangled</a> under a dual MIT and Apache-2.0 licensing scheme.</p>
6363+ <p>
6464+ This labeler is aware of ${activeLabels} accounts hosted on a total of ${totalPDS} independent PDS.
6565+ It is failing to crawl ${failures} of these PDS, most of which correspond to nonexistent, offline or otherwise malfunctioning or misidentified PDS.
6666+ </p>
6767+ </body>
6868+6969+ </html>
7070+ `);
7171+ }
7272+}
+36
knownPDSStorage.ts
···4848 return v
4949 }
50505151+ async countKnownPDSs(): Promise<number> {
5252+ await this.dbInitLock;
5353+5454+ const result = await this.db.execute({
5555+ sql: `
5656+ SELECT
5757+ COUNT(*) AS c
5858+ FROM known_pds
5959+ `,
6060+ });
6161+6262+ if (!result.rows || !result.rows.length) {
6363+ return 0;
6464+ }
6565+6666+ return result.rows[0]!.c as number;
6767+ }
6868+5169 async* getKnownPDSs(lastCrawledBefore: Date = new Date(), lastFailureBefore: Date = new Date()) {
5270 await this.dbInitLock;
5371···181199 } finally {
182200 transaction.close();
183201 }
202202+ }
203203+204204+ async countPDSCrawlingFailures(): Promise<number> {
205205+ await this.dbInitLock;
206206+207207+ const result = await this.db.execute({
208208+ sql: `
209209+ SELECT
210210+ COUNT(*) AS c
211211+ FROM pds_crawling_failures
212212+ `,
213213+ });
214214+215215+ if (!result.rows || !result.rows.length) {
216216+ return 0;
217217+ }
218218+219219+ return result.rows[0]!.c as number;
184220 }
185221186222 async markPDSCrawlFailure(uri: string, at: Date) {
+10
labeler.ts
···1212import { PDSCrawler } from "./pdsCrawler";
1313import { TaskProcessor } from "./taskProcessor";
1414import { NamedMutex, wrapAsyncInCatch, type DID } from "./utils";
1515+import { sleep } from "bun";
1616+import { InfoServer } from "./infoServer";
15171618export type LabelerOptions = {
1719 databasePath: string | undefined;
···4345 private crawler: PDSCrawler;
4446 private labelDefiner: LabelDefiner | undefined;
4547 private listDeterminer: ListDeterminer;
4848+ private infoServer: InfoServer;
46494750 private allowIssuingLabels: boolean;
4851 private allowManagingLists: boolean;
···7881 this.activeListItemsStorage = new ActiveListItemsStorage(this.server.db);
7982 this.knownPDSStorage = new KnownPDSStorage(this.server.db);
8083 this.crawler = new PDSCrawler(options.maxExpectedReposPerPDS);
8484+8585+ this.infoServer = new InfoServer(this.knownPDSStorage, this.activeLabelsStorage, options.maxPDSDedicatedLabels);
81868287 this.reconsiderActivePDSForRecrawlingAfterMilliseconds = options.reconsiderActivePDSForRecrawlingAfterMilliseconds;
8388 this.recrawlKnownPDSAfterNotCrawledForMilliseconds = options.recrawlKnownPDSAfterNotCrawledForMilliseconds;
···111116 await this.credentialManager.login({ identifier: loginCredentials.identifier, password: loginCredentials.password })
112117113118 this.authenticatedRPC = new Client({ handler: this.credentialManager });
119119+120120+ // see https://github.com/skyware-js/labeler/issues/8
121121+ // should be fixable in the next version of skyware-js/labeler, see https://github.com/skyware-js/labeler/pull/20
122122+ await sleep(1000);
123123+ this.server.app.get("/", this.infoServer.indexPage.bind(this.infoServer));
114124 }
115125116126 private pdsFilter(pds: string): boolean {