very fast at protocol indexer with flexible filtering, xrpc queries, cursor-backed event stream, and more, built on fjall
rust
fjall
at-protocol
atproto
indexer
1#!/usr/bin/env nu
2use common.nu *
3
4def main [] {
5 # 1. ensure http-nu is installed
6 if (which http-nu | is-empty) {
7 print "http-nu not found, installing..."
8 cargo install http-nu
9 }
10
11 # 2. setup ports and paths
12 let port = resolve-test-port 3010
13 let mock_port = resolve-test-mock-port 3012
14 let url = $"http://localhost:($port)"
15 let mock_url = $"http://localhost:($mock_port)"
16 let db_path = (mktemp -d -t hydrant_throttling.XXXXXX)
17
18 print $"testing crawler throttling..."
19 print $"database path: ($db_path)"
20
21 # 3. start mock relay
22 print $"starting mock relay on ($mock_port)..."
23 let mock_pid = (
24 bash -c $"http-nu :($mock_port) tests/mock_relay.nu > ($db_path)/mock.log 2>&1 & echo $!"
25 | str trim
26 | into int
27 )
28 print $"mock relay pid: ($mock_pid)"
29
30 # 4. start hydrant with low throttling limits
31 let binary = build-hydrant
32
33 let log_file = $"($db_path)/hydrant.log"
34 print $"starting hydrant - logs at ($log_file)..."
35
36 let hydrant_pid = (
37 with-env {
38 HYDRANT_DATABASE_PATH: ($db_path),
39 HYDRANT_FULL_NETWORK: "true",
40 HYDRANT_RELAY_HOST: ($mock_url),
41 HYDRANT_DISABLE_FIREHOSE: "true",
42 HYDRANT_DISABLE_BACKFILL: "true", # disable backfill so pending count stays up
43 HYDRANT_API_PORT: ($port | into string),
44 HYDRANT_LOG_LEVEL: "debug",
45 RUST_LOG: "debug",
46 HYDRANT_CRAWLER_MAX_PENDING_REPOS: "2",
47 HYDRANT_CRAWLER_RESUME_PENDING_REPOS: "1"
48 } {
49 sh -c $"($binary) >($log_file) 2>&1 & echo $!" | str trim | into int
50 }
51 )
52 print $"hydrant started with pid: ($hydrant_pid)"
53
54 mut success = false
55
56 try {
57 if (wait-for-api $url) {
58 print "hydrant api is up."
59
60 # wait for crawler to run and hit limit
61 print "waiting for crawler to hit throttling limit..."
62
63 # retry check for 30s
64 for i in 1..30 {
65 let stats = (http get $"($url)/stats").counts
66 let pending = ($stats.pending | into int)
67
68 # we expect 5 repos from the mock, but max pending is 2.
69 # wait, the crawler fetches a page (5 repos) THEN adds to DB.
70 # so pending will jump to 5.
71 # then next loop, it checks pending > 2.
72 # so pending should be 5.
73
74 print $"[($i)/30] pending: ($pending)"
75
76 if $pending >= 5 {
77 print "crawler discovered repos."
78 break
79 }
80
81 sleep 1sec
82 }
83
84 # now check logs for throttling message
85 print "checking logs for throttling message..."
86
87 let logs = (open $log_file | str replace --all "\n" " ")
88 if ($logs | str contains "throttling: above max pending") {
89 print "CONFIRMED: crawler is throttling!"
90
91 # now testing resumption
92 print "testing resumption by removing repos..."
93
94 # remove 4 repos to drop pending (5) to 1 (<= resume limit 1)
95 # mock repos are did:web:mock1.com ... mock5.com
96 curl -s -X DELETE -H "Content-Type: application/json" -d '[
97 {"did": "did:web:mock1.com"},
98 {"did": "did:web:mock2.com"},
99 {"did": "did:web:mock3.com"},
100 {"did": "did:web:mock4.com"}
101 ]' $"($url)/repos"
102
103 print "waiting for crawler to wake up..."
104 sleep 1sec
105
106 # check logs for resumption message
107 let logs_after = (open $log_file | str replace --all "\n" " ")
108 if ($logs_after | str contains "throttling released") {
109 print "CONFIRMED: crawler resumed!"
110 $success = true
111 } else {
112 print "FAILED: resumption message not found in logs"
113 $success = false
114 }
115
116 } else {
117 print "FAILED: throttling message not found in logs"
118 }
119
120 } else {
121 print "hydrant failed to start."
122 }
123 } catch { |e|
124 print $"test failed with error: ($e)"
125 }
126
127 # cleanup
128 print "stopping processes..."
129 try { kill $hydrant_pid }
130 try { kill $mock_pid }
131
132 if $success {
133 print "test passed!"
134 exit 0
135 } else {
136 print "test failed!"
137 print "hydrant logs:"
138 open $log_file | tail -n 20
139 exit 1
140 }
141}