A Bluesky labeler that labels accounts hosted on PDSes operated by entities other than Bluesky PBC
3
fork

Configure Feed

Select the types of activity you want to include in your feed.

Serve index page with statistics on HTTP server

gbl08ma 1386c0b4 90b83e27

+118
+72
infoServer.ts
··· 1 + import type { FastifyRequest, FastifyReply } from "fastify"; 2 + import type { KnownPDSStorage } from "./knownPDSStorage"; 3 + import type { ActiveLabelsStorage } from "./activeLabelsStorage"; 4 + import { CacheableMemory } from "cacheable"; 5 + 6 + export class InfoServer { 7 + private knownPDSStorage: KnownPDSStorage; 8 + private activeLabelsStorage: ActiveLabelsStorage; 9 + private maxPDSDedicatedLabels: number; 10 + 11 + private statsCache = new CacheableMemory({ ttl: '10s' }); 12 + 13 + constructor(knownPDSStorage: KnownPDSStorage, activeLabelsStorage: ActiveLabelsStorage, maxPDSDedicatedLabels: number) { 14 + this.knownPDSStorage = knownPDSStorage; 15 + this.activeLabelsStorage = activeLabelsStorage; 16 + this.maxPDSDedicatedLabels = maxPDSDedicatedLabels; 17 + } 18 + 19 + private readonly cacheKey = "stats"; 20 + 21 + private async computeStats(): Promise<number[]> { 22 + const v = [ 23 + await this.knownPDSStorage.countKnownPDSs(), 24 + await this.knownPDSStorage.countPDSCrawlingFailures(), 25 + await this.activeLabelsStorage.countActiveLabelSubjects(), 26 + ]; 27 + this.statsCache.set(this.cacheKey, v); 28 + return v; 29 + } 30 + 31 + async indexPage(req: FastifyRequest, resp: FastifyReply) { 32 + let [totalPDS, failures, activeLabels] = ( 33 + this.statsCache.get(this.cacheKey) ?? 34 + await this.computeStats() 35 + ) as number[]; 36 + resp.type("text/html").send(` 37 + <!DOCTYPE html> 38 + <html lang="en"> 39 + <head> 40 + <meta charset="UTF-8" /> 41 + <title>Independent PDS Labeler</title> 42 + <meta name="viewport" content="width=device-width,initial-scale=1" /> 43 + <meta name="description" content="Information page for the independent PDS labeler for Bluesky - @pds.labeler.tny.im" /> 44 + </head> 45 + 46 + <body> 47 + <h1>pds.labeler.tny.im</h1> 48 + <p> 49 + This community ran Bluesky labeler identifies accounts that are hosted on a 50 + <a rel="noreferrer nofollow" href="https://github.com/bluesky-social/pds">PDS</a> 51 + not operated by 52 + <a rel="noreferrer nofollow" href="https://bsky.social/about/faq">Bluesky PBC</a>, and which is, therefore, considered &quot;independent.&quot; 53 + Some PDS meet this criteria but are not considered, for technical or logistical reasons. 54 + </p> 55 + <p>This labeler provides dedicated labels for the ${this.maxPDSDedicatedLabels} biggest independent PDS that it is aware of, by number of accounts.</p> 56 + <p><a rel="noreferrer nofollow" href="https://bsky.app/profile/pds.labeler.tny.im">Subscribe to the labeler on Bluesky to use it.</a></p> 57 + <p> 58 + This labeler works by enumerating accounts (repositories) within each PDS that it finds. 59 + It finds new PDS by consuming some events from the Bluesky firehose, and fetching the DID documents of the respective accounts, in order to identify or confirm their current PDS host. 60 + Additionally, known PDS are rescanned periodically in order to find and label new accounts. 61 + </p> 62 + <p>The source code for the labeler is available on <a rel="noreferrer nofollow" href="https://tangled.org/gbl08ma.com/pdslabeler">Tangled</a> under a dual MIT and Apache-2.0 licensing scheme.</p> 63 + <p> 64 + This labeler is aware of ${activeLabels} accounts hosted on a total of ${totalPDS} independent PDS. 65 + It is failing to crawl ${failures} of these PDS, most of which correspond to nonexistent, offline or otherwise malfunctioning or misidentified PDS. 66 + </p> 67 + </body> 68 + 69 + </html> 70 + `); 71 + } 72 + }
+36
knownPDSStorage.ts
··· 48 48 return v 49 49 } 50 50 51 + async countKnownPDSs(): Promise<number> { 52 + await this.dbInitLock; 53 + 54 + const result = await this.db.execute({ 55 + sql: ` 56 + SELECT 57 + COUNT(*) AS c 58 + FROM known_pds 59 + `, 60 + }); 61 + 62 + if (!result.rows || !result.rows.length) { 63 + return 0; 64 + } 65 + 66 + return result.rows[0]!.c as number; 67 + } 68 + 51 69 async* getKnownPDSs(lastCrawledBefore: Date = new Date(), lastFailureBefore: Date = new Date()) { 52 70 await this.dbInitLock; 53 71 ··· 181 199 } finally { 182 200 transaction.close(); 183 201 } 202 + } 203 + 204 + async countPDSCrawlingFailures(): Promise<number> { 205 + await this.dbInitLock; 206 + 207 + const result = await this.db.execute({ 208 + sql: ` 209 + SELECT 210 + COUNT(*) AS c 211 + FROM pds_crawling_failures 212 + `, 213 + }); 214 + 215 + if (!result.rows || !result.rows.length) { 216 + return 0; 217 + } 218 + 219 + return result.rows[0]!.c as number; 184 220 } 185 221 186 222 async markPDSCrawlFailure(uri: string, at: Date) {
+10
labeler.ts
··· 12 12 import { PDSCrawler } from "./pdsCrawler"; 13 13 import { TaskProcessor } from "./taskProcessor"; 14 14 import { NamedMutex, wrapAsyncInCatch, type DID } from "./utils"; 15 + import { sleep } from "bun"; 16 + import { InfoServer } from "./infoServer"; 15 17 16 18 export type LabelerOptions = { 17 19 databasePath: string | undefined; ··· 43 45 private crawler: PDSCrawler; 44 46 private labelDefiner: LabelDefiner | undefined; 45 47 private listDeterminer: ListDeterminer; 48 + private infoServer: InfoServer; 46 49 47 50 private allowIssuingLabels: boolean; 48 51 private allowManagingLists: boolean; ··· 78 81 this.activeListItemsStorage = new ActiveListItemsStorage(this.server.db); 79 82 this.knownPDSStorage = new KnownPDSStorage(this.server.db); 80 83 this.crawler = new PDSCrawler(options.maxExpectedReposPerPDS); 84 + 85 + this.infoServer = new InfoServer(this.knownPDSStorage, this.activeLabelsStorage, options.maxPDSDedicatedLabels); 81 86 82 87 this.reconsiderActivePDSForRecrawlingAfterMilliseconds = options.reconsiderActivePDSForRecrawlingAfterMilliseconds; 83 88 this.recrawlKnownPDSAfterNotCrawledForMilliseconds = options.recrawlKnownPDSAfterNotCrawledForMilliseconds; ··· 111 116 await this.credentialManager.login({ identifier: loginCredentials.identifier, password: loginCredentials.password }) 112 117 113 118 this.authenticatedRPC = new Client({ handler: this.credentialManager }); 119 + 120 + // see https://github.com/skyware-js/labeler/issues/8 121 + // should be fixable in the next version of skyware-js/labeler, see https://github.com/skyware-js/labeler/pull/20 122 + await sleep(1000); 123 + this.server.app.get("/", this.infoServer.indexPage.bind(this.infoServer)); 114 124 } 115 125 116 126 private pdsFilter(pds: string): boolean {