add zlay-smoketest script · zzstoatzz.io/relay@8590edd

+273

1 changed file

expand all

scripts

+273

scripts/zlay-smoketest

··· 1 + #!/usr/bin/env -S PYTHONUNBUFFERED=1 uv run --script --quiet 2 + # /// script 3 + # requires-python = ">=3.12" 4 + # dependencies = [] 5 + # /// 6 + """ 7 + smoke test zlay's listReposByCollection endpoint. 8 + 9 + tests: 10 + 1. endpoint conformance (valid response shape, error handling) 11 + 2. pagination (cursor works, no overlap, deterministic) 12 + 3. set completeness vs reference relay (collectiondir-diff) 13 + 4. live indexing (optional — create a record, check it appears) 14 + 15 + usage: 16 + ./scripts/zlay-smoketest 17 + ./scripts/zlay-smoketest --zlay https://zlay.waow.tech 18 + ./scripts/zlay-smoketest --zlay https://zlay.waow.tech --reference https://bsky.network 19 + ./scripts/zlay-smoketest --skip-diff # skip the slow set-completeness check 20 + ./scripts/zlay-smoketest --collections app.bsky.feed.post app.bsky.graph.follow 21 + """ 22 + 23 + import argparse 24 + import json 25 + import sys 26 + import urllib.request 27 + import urllib.error 28 + 29 + PASS = "\033[32mpass\033[0m" 30 + FAIL = "\033[31mFAIL\033[0m" 31 + SKIP = "\033[33mskip\033[0m" 32 + 33 + failures = 0 34 + 35 + 36 + def check(name: str, ok: bool, detail: str = ""): 37 + global failures 38 + tag = PASS if ok else FAIL 39 + msg = f" [{tag}] {name}" 40 + if detail: 41 + msg += f" ({detail})" 42 + print(msg) 43 + if not ok: 44 + failures += 1 45 + return ok 46 + 47 + 48 + def fetch_json(url: str, timeout: int = 15) -> dict | None: 49 + try: 50 + req = urllib.request.Request(url) 51 + with urllib.request.urlopen(req, timeout=timeout) as resp: 52 + return json.loads(resp.read()) 53 + except urllib.error.HTTPError as e: 54 + return {"_http_error": e.code, "_reason": e.reason} 55 + except Exception as e: 56 + return {"_error": str(e)} 57 + 58 + 59 + def fetch_dids(base_url: str, collection: str, limit: int) -> list[str]: 60 + """paginate and return ordered list of DIDs.""" 61 + endpoint = f"{base_url.rstrip('/')}/xrpc/com.atproto.sync.listReposByCollection" 62 + dids: list[str] = [] 63 + seen: set[str] = set() 64 + cursor = None 65 + page_size = min(limit, 1000) 66 + 67 + while len(dids) < limit: 68 + params = f"collection={collection}&limit={page_size}" 69 + if cursor: 70 + params += f"&cursor={urllib.request.quote(cursor, safe=':')}" 71 + 72 + data = fetch_json(f"{endpoint}?{params}", timeout=30) 73 + if data is None or "_error" in data or "_http_error" in data: 74 + break 75 + 76 + repos = data.get("repos", []) 77 + if not repos: 78 + break 79 + 80 + for repo in repos: 81 + did = repo.get("did", "") 82 + if did and did not in seen: 83 + dids.append(did) 84 + seen.add(did) 85 + 86 + cursor = data.get("cursor") 87 + if not cursor: 88 + break 89 + 90 + return dids 91 + 92 + 93 + def test_endpoint_conformance(base_url: str, collection: str): 94 + print("\n--- endpoint conformance ---") 95 + endpoint = f"{base_url.rstrip('/')}/xrpc/com.atproto.sync.listReposByCollection" 96 + 97 + # valid request 98 + data = fetch_json(f"{endpoint}?collection={collection}&limit=5") 99 + check("returns valid JSON", data is not None and "_error" not in data) 100 + if data and "_error" not in data and "_http_error" not in data: 101 + check("has repos array", isinstance(data.get("repos"), list)) 102 + repos = data.get("repos", []) 103 + if repos: 104 + check("repo has did field", "did" in repos[0]) 105 + check("did starts with did:", repos[0]["did"].startswith("did:")) 106 + 107 + # missing collection 108 + data = fetch_json(f"{endpoint}?limit=5") 109 + check( 110 + "missing collection → 400", 111 + data is not None and data.get("_http_error") == 400, 112 + f"got {data.get('_http_error', 'ok')}" if data else "no response", 113 + ) 114 + 115 + # invalid collection (no dot) 116 + data = fetch_json(f"{endpoint}?collection=invalid&limit=5") 117 + check( 118 + "invalid collection → 400", 119 + data is not None and data.get("_http_error") == 400, 120 + f"got {data.get('_http_error', 'ok')}" if data else "no response", 121 + ) 122 + 123 + # invalid limit 124 + data = fetch_json(f"{endpoint}?collection={collection}&limit=0") 125 + check( 126 + "limit=0 → 400", 127 + data is not None and data.get("_http_error") == 400, 128 + f"got {data.get('_http_error', 'ok')}" if data else "no response", 129 + ) 130 + 131 + data = fetch_json(f"{endpoint}?collection={collection}&limit=9999") 132 + check( 133 + "limit=9999 → 400", 134 + data is not None and data.get("_http_error") == 400, 135 + f"got {data.get('_http_error', 'ok')}" if data else "no response", 136 + ) 137 + 138 + # explicit limit respected 139 + data = fetch_json(f"{endpoint}?collection={collection}&limit=3") 140 + if data and "_error" not in data and "_http_error" not in data: 141 + repos = data.get("repos", []) 142 + check("limit=3 returns ≤3", len(repos) <= 3, f"got {len(repos)}") 143 + 144 + 145 + def test_pagination(base_url: str, collection: str): 146 + print("\n--- pagination ---") 147 + endpoint = f"{base_url.rstrip('/')}/xrpc/com.atproto.sync.listReposByCollection" 148 + 149 + # page 1 150 + data1 = fetch_json(f"{endpoint}?collection={collection}&limit=3") 151 + if not data1 or "_error" in data1 or "_http_error" in data1: 152 + check("page 1 fetch", False, "request failed") 153 + return 154 + 155 + repos1 = data1.get("repos", []) 156 + cursor = data1.get("cursor") 157 + check("page 1 returns results", len(repos1) > 0, f"{len(repos1)} repos") 158 + 159 + if not cursor: 160 + check("no cursor (fewer results than limit)", len(repos1) < 3) 161 + return 162 + 163 + check("page 1 has cursor", cursor is not None) 164 + 165 + # page 2 166 + data2 = fetch_json(f"{endpoint}?collection={collection}&limit=3&cursor={urllib.request.quote(cursor)}") 167 + if not data2 or "_error" in data2 or "_http_error" in data2: 168 + check("page 2 fetch", False, "request failed") 169 + return 170 + 171 + repos2 = data2.get("repos", []) 172 + check("page 2 returns results", len(repos2) > 0, f"{len(repos2)} repos") 173 + 174 + # no overlap 175 + dids1 = {r["did"] for r in repos1} 176 + dids2 = {r["did"] for r in repos2} 177 + overlap = dids1 & dids2 178 + check("no overlap between pages", len(overlap) == 0, f"{len(overlap)} overlapping" if overlap else "") 179 + 180 + 181 + def test_pagination_determinism(base_url: str, collection: str): 182 + print("\n--- pagination determinism ---") 183 + 184 + dids_run1 = fetch_dids(base_url, collection, limit=100) 185 + dids_run2 = fetch_dids(base_url, collection, limit=100) 186 + 187 + if not dids_run1: 188 + check("has data to compare", False, "no results") 189 + return 190 + 191 + check("same count", len(dids_run1) == len(dids_run2), f"{len(dids_run1)} vs {len(dids_run2)}") 192 + check("same order", dids_run1 == dids_run2) 193 + 194 + 195 + def test_set_completeness(zlay_url: str, ref_url: str, collection: str, limit: int): 196 + print(f"\n--- set completeness: {collection} ---") 197 + 198 + sys.stdout.write(f" fetching from zlay...") 199 + sys.stdout.flush() 200 + dids_zlay = set(fetch_dids(zlay_url, collection, limit)) 201 + sys.stdout.write(f"\r zlay: {len(dids_zlay):,} DIDs\n") 202 + 203 + sys.stdout.write(f" fetching from reference...") 204 + sys.stdout.flush() 205 + dids_ref = set(fetch_dids(ref_url, collection, limit)) 206 + sys.stdout.write(f"\r reference: {len(dids_ref):,} DIDs\n") 207 + 208 + common = dids_zlay & dids_ref 209 + only_zlay = dids_zlay - dids_ref 210 + only_ref = dids_ref - dids_zlay 211 + 212 + check("zlay has data", len(dids_zlay) > 0) 213 + check("reference has data", len(dids_ref) > 0) 214 + 215 + if dids_ref: 216 + coverage = len(common) / len(dids_ref) * 100 217 + check( 218 + f"coverage: {coverage:.1f}%", 219 + True, # informational — not a hard pass/fail 220 + f"{len(common):,} common, {len(only_ref):,} gaps, {len(only_zlay):,} extras", 221 + ) 222 + else: 223 + print(f" (no reference data to compare)") 224 + 225 + 226 + def main(): 227 + parser = argparse.ArgumentParser(description="smoke test zlay's collection index") 228 + parser.add_argument("--zlay", default="https://zlay.waow.tech", help="zlay endpoint") 229 + parser.add_argument("--reference", default="https://bsky.network", help="reference relay") 230 + parser.add_argument("--skip-diff", action="store_true", help="skip set-completeness checks") 231 + parser.add_argument( 232 + "--collections", 233 + nargs="+", 234 + default=["app.bsky.feed.post", "app.bsky.graph.follow"], 235 + help="collections to test", 236 + ) 237 + parser.add_argument("--diff-limit", type=int, default=10_000, help="max DIDs for set completeness") 238 + args = parser.parse_args() 239 + 240 + print(f"zlay: {args.zlay}") 241 + print(f"reference: {args.reference}") 242 + 243 + # use first collection for conformance/pagination tests 244 + test_collection = args.collections[0] 245 + 246 + # 1. endpoint conformance 247 + test_endpoint_conformance(args.zlay, test_collection) 248 + 249 + # 2. pagination 250 + test_pagination(args.zlay, test_collection) 251 + 252 + # 3. pagination determinism 253 + test_pagination_determinism(args.zlay, test_collection) 254 + 255 + # 4. set completeness (per collection) 256 + if not args.skip_diff: 257 + for coll in args.collections: 258 + test_set_completeness(args.zlay, args.reference, coll, args.diff_limit) 259 + else: 260 + print("\n--- set completeness ---") 261 + print(f" [{SKIP}] skipped (--skip-diff)") 262 + 263 + # summary 264 + print() 265 + if failures == 0: 266 + print(f"all checks passed.") 267 + else: 268 + print(f"{failures} check(s) failed.") 269 + return 1 if failures else 0 270 + 271 + 272 + if __name__ == "__main__": 273 + sys.exit(main())

Configure Feed

Configure Feed