search for standard sites pub-search.waow.tech
search zig blog atproto
11
fork

Configure Feed

Select the types of activity you want to include in your feed.

feat: add script to enumerate site.standard repos for TAP

TAP only supports one signal collection, so we signal on pub.leaflet.document
to discover Leaflet users. This script enumerates repos with site.standard.publication
records (pckt, etc) and adds them to TAP via the /repos/add endpoint.

Run manually or periodically to ensure site.standard repos are indexed.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

zzstoatzz 8f8c3070 ef4c3975

+109
+109
scripts/enumerate-standard-repos
··· 1 + #!/usr/bin/env -S uv run --script --quiet 2 + # /// script 3 + # requires-python = ">=3.12" 4 + # dependencies = ["httpx"] 5 + # /// 6 + """ 7 + Enumerate repos with site.standard.* records and add them to TAP. 8 + 9 + TAP only signals on one collection, so we use this to discover repos 10 + that use site.standard.publication (pckt, etc) and add them to TAP. 11 + 12 + Usage: 13 + ./scripts/enumerate-standard-repos 14 + ./scripts/enumerate-standard-repos --dry-run 15 + """ 16 + 17 + import argparse 18 + import sys 19 + 20 + import httpx 21 + 22 + RELAY_URL = "https://relay1.us-east.bsky.network" 23 + TAP_URL = "http://leaflet-search-tap.internal:2480" # fly internal network 24 + COLLECTION = "site.standard.publication" 25 + 26 + 27 + def enumerate_repos(relay_url: str, collection: str) -> list[str]: 28 + """Enumerate all repos with records in the given collection.""" 29 + dids = [] 30 + cursor = None 31 + 32 + print(f"enumerating repos with {collection}...") 33 + 34 + while True: 35 + params = {"collection": collection, "limit": 1000} 36 + if cursor: 37 + params["cursor"] = cursor 38 + 39 + resp = httpx.get( 40 + f"{relay_url}/xrpc/com.atproto.sync.listReposByCollection", 41 + params=params, 42 + timeout=60, 43 + ) 44 + resp.raise_for_status() 45 + data = resp.json() 46 + 47 + repos = data.get("repos", []) 48 + for repo in repos: 49 + dids.append(repo["did"]) 50 + 51 + if not repos: 52 + break 53 + 54 + cursor = data.get("cursor") 55 + if not cursor: 56 + break 57 + 58 + print(f" found {len(dids)} repos so far...") 59 + 60 + return dids 61 + 62 + 63 + def add_repos_to_tap(tap_url: str, dids: list[str]) -> None: 64 + """Add repos to TAP for syncing.""" 65 + if not dids: 66 + return 67 + 68 + # batch in chunks of 100 69 + batch_size = 100 70 + for i in range(0, len(dids), batch_size): 71 + batch = dids[i:i + batch_size] 72 + resp = httpx.post( 73 + f"{tap_url}/repos/add", 74 + json={"dids": batch}, 75 + timeout=30, 76 + ) 77 + resp.raise_for_status() 78 + print(f" added batch {i // batch_size + 1}: {len(batch)} repos") 79 + 80 + 81 + def main(): 82 + parser = argparse.ArgumentParser(description="Enumerate and add standard.site repos to TAP") 83 + parser.add_argument("--dry-run", action="store_true", help="Show what would be done") 84 + parser.add_argument("--relay-url", default=RELAY_URL, help="Relay URL") 85 + parser.add_argument("--tap-url", default=TAP_URL, help="TAP URL") 86 + args = parser.parse_args() 87 + 88 + dids = enumerate_repos(args.relay_url, COLLECTION) 89 + print(f"found {len(dids)} repos with {COLLECTION}") 90 + 91 + if not dids: 92 + print("no repos to add") 93 + return 94 + 95 + if args.dry_run: 96 + print("dry run - would add these repos to TAP:") 97 + for did in dids[:10]: 98 + print(f" {did}") 99 + if len(dids) > 10: 100 + print(f" ... and {len(dids) - 10} more") 101 + return 102 + 103 + print(f"adding {len(dids)} repos to TAP...") 104 + add_repos_to_tap(args.tap_url, dids) 105 + print("done!") 106 + 107 + 108 + if __name__ == "__main__": 109 + main()