declarative relay deployment on hetzner relay-eval.waow.tech
atproto relay
14
fork

Configure Feed

Select the types of activity you want to include in your feed.

add collectiondir-diff script for comparing relay coverage

compares listReposByCollection results between two endpoints,
reports DIDs unique to each side. supports --resolve to look up
handles and PDS hosts via plc.directory.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

zzstoatzz b7fd42d4 c4b47a8b

+154
+154
scripts/collectiondir-diff
··· 1 + #!/usr/bin/env -S PYTHONUNBUFFERED=1 uv run --script --quiet 2 + # /// script 3 + # requires-python = ">=3.12" 4 + # dependencies = [] 5 + # /// 6 + """ 7 + compare listReposByCollection results between two relay endpoints. 8 + 9 + fetches all DIDs for a collection from both endpoints and reports the 10 + difference — which DIDs are unique to each side. 11 + 12 + usage: 13 + ./scripts/collectiondir-diff --collection io.atcr.sailor.profile 14 + ./scripts/collectiondir-diff --collection app.bsky.feed.post --limit 10000 15 + ./scripts/collectiondir-diff --collection io.atcr.sailor.profile \ 16 + --a https://relay.waow.tech --b https://relay1.us-east.bsky.network 17 + """ 18 + 19 + import argparse 20 + import json 21 + import sys 22 + import urllib.request 23 + import urllib.error 24 + 25 + 26 + def fetch_dids(base_url: str, collection: str, limit: int) -> set[str]: 27 + """paginate listReposByCollection and return all DIDs up to limit.""" 28 + endpoint = f"{base_url.rstrip('/')}/xrpc/com.atproto.sync.listReposByCollection" 29 + dids: set[str] = set() 30 + cursor = None 31 + page_size = min(limit, 2000) 32 + 33 + while len(dids) < limit: 34 + params = f"collection={collection}&limit={page_size}" 35 + if cursor: 36 + params += f"&cursor={urllib.request.quote(cursor)}" 37 + 38 + url = f"{endpoint}?{params}" 39 + try: 40 + req = urllib.request.Request(url) 41 + with urllib.request.urlopen(req, timeout=30) as resp: 42 + data = json.loads(resp.read()) 43 + except urllib.error.HTTPError as e: 44 + print(f" error fetching {base_url}: {e.code} {e.reason}", file=sys.stderr) 45 + break 46 + except Exception as e: 47 + print(f" error fetching {base_url}: {e}", file=sys.stderr) 48 + break 49 + 50 + repos = data.get("repos", []) 51 + if not repos: 52 + break 53 + 54 + for repo in repos: 55 + dids.add(repo["did"]) 56 + 57 + cursor = data.get("cursor") 58 + if not cursor: 59 + break 60 + 61 + sys.stdout.write(f"\r {base_url}: {len(dids)} DIDs fetched...") 62 + sys.stdout.flush() 63 + 64 + return dids 65 + 66 + 67 + def resolve_did(did: str) -> dict: 68 + """resolve a DID via plc.directory. returns handle + PDS host.""" 69 + try: 70 + url = f"https://plc.directory/{did}" 71 + with urllib.request.urlopen(url, timeout=10) as resp: 72 + data = json.loads(resp.read()) 73 + handle = "?" 74 + aka = data.get("alsoKnownAs", []) 75 + if aka: 76 + handle = aka[0].replace("at://", "") 77 + pds = "?" 78 + services = data.get("service", []) 79 + if services: 80 + pds = services[0].get("serviceEndpoint", "?") 81 + return {"handle": handle, "pds": pds} 82 + except Exception: 83 + return {"handle": "?", "pds": "?"} 84 + 85 + 86 + def main(): 87 + parser = argparse.ArgumentParser( 88 + description="compare listReposByCollection between two relay endpoints" 89 + ) 90 + parser.add_argument("--collection", required=True, help="collection NSID to compare") 91 + parser.add_argument( 92 + "--a", 93 + default="https://relay.waow.tech", 94 + help="first endpoint (default: https://relay.waow.tech)", 95 + ) 96 + parser.add_argument( 97 + "--b", 98 + default="https://bsky.network", 99 + help="second endpoint (default: https://bsky.network)", 100 + ) 101 + parser.add_argument( 102 + "--limit", 103 + type=int, 104 + default=100_000, 105 + help="max DIDs to fetch per endpoint (default: 100000)", 106 + ) 107 + parser.add_argument( 108 + "--resolve", 109 + action="store_true", 110 + help="resolve DIDs to handles via plc.directory (slower)", 111 + ) 112 + args = parser.parse_args() 113 + 114 + print(f"collection: {args.collection}") 115 + print(f" A: {args.a}") 116 + print(f" B: {args.b}") 117 + print(f" limit: {args.limit:,}") 118 + print() 119 + 120 + dids_a = fetch_dids(args.a, args.collection, args.limit) 121 + print(f"\r A: {len(dids_a):,} DIDs" + " " * 40) 122 + 123 + dids_b = fetch_dids(args.b, args.collection, args.limit) 124 + print(f"\r B: {len(dids_b):,} DIDs" + " " * 40) 125 + print() 126 + 127 + only_a = dids_a - dids_b 128 + only_b = dids_b - dids_a 129 + common = dids_a & dids_b 130 + 131 + print(f"common: {len(common):,}") 132 + print(f"only in A ({args.a}): {len(only_a):,}") 133 + print(f"only in B ({args.b}): {len(only_b):,}") 134 + 135 + def print_dids(label: str, dids: set[str]): 136 + if not dids: 137 + return 138 + print(f"\n{label}:") 139 + for did in sorted(dids): 140 + if args.resolve: 141 + info = resolve_did(did) 142 + print(f" {did} @{info['handle']} ({info['pds']})") 143 + else: 144 + print(f" {did}") 145 + 146 + print_dids(f"only in A", only_a) 147 + print_dids(f"only in B", only_b) 148 + 149 + if not only_a and not only_b: 150 + print("\nidentical.") 151 + 152 + 153 + if __name__ == "__main__": 154 + main()