atproto relay implementation in zig zlay.waow.tech
9
fork

Configure Feed

Select the types of activity you want to include in your feed.

migrate to zig 0.16: Io primitives, updated deps, timer regression fixes

phase 1 of the 0.16 migration — threads still use std.Thread.spawn,
but all synchronization primitives now use Io.Mutex/Io.Condition and
all sleep/timestamp calls go through std.Io.

- bump deps: zat v0.3.0-alpha.7, websocket edeca26, rocksdb 9be930b, pg dev
- build.zig: Module API (link_libc on root_module), .ignore/.trimEnd renames
- Dockerfile: zig 0.16.0-dev.3059
- main.zig: Io.Evented/Threaded backend selection, DebugAllocator, getenv helper
- all modules: Io.Mutex/Io.Condition, io.sleep(), Io.Timestamp, Io.Duration
- thread_pool: stays on std.Thread.spawn (CPU-bound keyed workers)
- event_log: sleep-based flush loop (Io.Condition has no timedWait)
- broadcaster: poll-based idle wakeup for consumer keepalive pings
- thread_pool submit: poll-based stop check to prevent shutdown deadlock
- lru/ring_buffer: Io.Mutex for cross-boundary access
- api/xrpc: Io.Writer.fixed() replaces fixedBufferStream
- collection_index: allocPrint for test paths (realpathAlloc removed)

next: port thread spawns to io.concurrent for upstream/downstream websocket paths

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

+729 -584
+1
.gitignore
··· 1 1 .zig-cache/ 2 2 zig-out/ 3 + zig-pkg/ 3 4 data/ 4 5 .env 5 6 .env.*
+3 -3
Dockerfile
··· 1 1 # multi-stage build: compile natively inside x86_64 linux container 2 - # uses Debian (glibc) because zig 0.15's C++ codegen for musl 2 + # uses Debian (glibc) because zig's C++ codegen for musl 3 3 # produces illegal instructions in RocksDB's LRU cache 4 4 FROM --platform=linux/amd64 debian:bookworm-slim AS builder 5 5 RUN apt-get update && apt-get install -y --no-install-recommends curl xz-utils ca-certificates && rm -rf /var/lib/apt/lists/* 6 - RUN curl -fSL https://ziglang.org/download/0.15.2/zig-x86_64-linux-0.15.2.tar.xz \ 6 + RUN curl -fSL https://ziglang.org/builds/zig-x86_64-linux-0.16.0-dev.3059+42e33db9d.tar.xz \ 7 7 | tar xJ -C /opt 8 - ENV PATH=/opt/zig-x86_64-linux-0.15.2:$PATH 8 + ENV PATH=/opt/zig-x86_64-linux-0.16.0-dev.3059+42e33db9d:$PATH 9 9 WORKDIR /build 10 10 11 11 # fetch dependencies first (cacheable — only changes when build.zig.zon changes)
+6 -6
build.zig
··· 32 32 const build_options = b.addOptions(); 33 33 build_options.addOption([]const u8, "git_sha", git_sha: { 34 34 var code: u8 = 0; 35 - const result = b.runAllowFail(&.{ "git", "rev-parse", "--short", "HEAD" }, &code, .Ignore); 35 + const result = b.runAllowFail(&.{ "git", "rev-parse", "--short", "HEAD" }, &code, .ignore); 36 36 if (result) |output| { 37 - break :git_sha std.mem.trimRight(u8, output, "\n \t"); 37 + break :git_sha std.mem.trimEnd(u8, output, "\n \t"); 38 38 } else |_| { 39 39 break :git_sha "unknown"; 40 40 } ··· 51 51 .imports = imports, 52 52 }); 53 53 relay_mod.addImport("build_options", build_options.createModule()); 54 + relay_mod.link_libc = true; 55 + relay_mod.link_libcpp = true; 54 56 const relay = b.addExecutable(.{ 55 57 .name = "zlay", 56 58 .root_module = relay_mod, 57 59 }); 58 - relay.linkLibC(); 59 - relay.linkLibCpp(); 60 60 b.installArtifact(relay); 61 61 62 62 const run_relay = b.addRunArtifact(relay); ··· 87 87 .imports = imports, 88 88 }); 89 89 test_mod.addImport("build_options", build_options.createModule()); 90 + test_mod.link_libc = true; 91 + test_mod.link_libcpp = true; 90 92 const t = b.addTest(.{ 91 93 .root_module = test_mod, 92 94 }); 93 - t.linkLibC(); 94 - t.linkLibCpp(); 95 95 test_step.dependOn(&b.addRunArtifact(t).step); 96 96 } 97 97 }
+9 -9
build.zig.zon
··· 2 2 .name = .zlay, 3 3 .version = "0.0.1", 4 4 .fingerprint = 0x31343ede133f3e58, 5 - .minimum_zig_version = "0.15.0", 5 + .minimum_zig_version = "0.16.0", 6 6 .dependencies = .{ 7 7 .zat = .{ 8 - .url = "https://tangled.org/zat.dev/zat/archive/v0.2.16.tar.gz", 9 - .hash = "zat-0.2.16-5PuC7tjwBADbnwV5y8ztKUHhGHMJHh2HouvoYImnZ7y5", 8 + .url = "https://tangled.org/zat.dev/zat/archive/v0.3.0-alpha.7.tar.gz", 9 + .hash = "zat-0.3.0-alpha.7-5PuC7uNjBQDv28db31DEKkFn1tU5I4f1GfJs-RrG8_pS", 10 10 }, 11 11 .websocket = .{ 12 - .url = "https://github.com/zzstoatzz/websocket.zig/archive/395d0f4.tar.gz", 13 - .hash = "websocket-0.1.0-ZPISdVJ8AwD7U03ARGgHclzlYSd9GeU91_WDXjRyjYdh", 12 + .url = "https://github.com/zzstoatzz/websocket.zig/archive/edeca26.tar.gz", 13 + .hash = "websocket-0.1.0-ZPISdSmqAwCbwcFtrAQC_q9cegdw-iHyrjCftgfMz-Nf", 14 14 }, 15 15 .pg = .{ 16 - .url = "git+https://github.com/karlseguin/pg.zig?ref=master#e58b318b7867ef065b3135983f829219c5eef891", 17 - .hash = "pg-0.0.0-Wp_7gXFoBgD0fQ72WICKa-bxLga03AXXQ3BbIsjjohQ3", 16 + .url = "git+https://github.com/zzstoatzz/pg.zig?ref=dev#fdc519c42d6df787c4ac5ad9f391baf961d07f9c", 17 + .hash = "pg-0.0.0-Wp_7gaOBBgAdAUwxkZb82X57EovpFUGWp9IdOSCcgKiL", 18 18 }, 19 19 .rocksdb = .{ 20 - .url = "https://github.com/Syndica/rocksdb-zig/archive/baceb67dc9c66e8ba40a83da3de3fd959b889e57.tar.gz", 21 - .hash = "rocksdb-9.7.4-z_CUTg_HAABgQurZzAhpJL5Erij8ZnppTmy3g1LIM8br", 20 + .url = "https://github.com/zzstoatzz/rocksdb-zig/archive/9be930b.tar.gz", 21 + .hash = "rocksdb-9.7.4-z_CUTr_HAADkz2Rte8o6L0TSrQnXjZZuw2kzve5n88RQ", 22 22 }, 23 23 }, 24 24 .paths = .{
+17 -9
src/api/admin.zig
··· 4 4 //! includes host blocking/unblocking, account bans, and backfill control. 5 5 6 6 const std = @import("std"); 7 + const Io = std.Io; 7 8 const h = @import("http.zig"); 8 9 const router = @import("router.zig"); 9 10 const websocket = @import("websocket"); ··· 19 20 20 21 /// check admin auth via headers, send error response if not authorized. returns true if authorized. 21 22 pub fn checkAdmin(conn: *h.Conn, headers: ?*const websocket.Handshake.KeyValue) bool { 22 - const admin_pw = std.posix.getenv("RELAY_ADMIN_PASSWORD") orelse { 23 + const admin_pw = getenv("RELAY_ADMIN_PASSWORD") orelse { 23 24 h.respondJson(conn, .forbidden, "{\"error\":\"admin endpoint not configured\"}"); 24 25 return false; 25 26 }; ··· 105 106 persist.allocator.free(hosts); 106 107 } 107 108 108 - var list: std.ArrayListUnmanaged(u8) = .{}; 109 - defer list.deinit(persist.allocator); 110 - const w = list.writer(persist.allocator); 109 + var aw: Io.Writer.Allocating = .init(persist.allocator); 110 + defer aw.deinit(); 111 + const w = &aw.writer; 111 112 112 113 w.writeAll("{\"hosts\":[") catch return; 113 114 114 115 for (hosts, 0..) |host, i| { 115 116 if (i > 0) w.writeByte(',') catch return; 116 117 if (host.account_limit) |limit| { 117 - std.fmt.format(w, "{{\"id\":{d},\"hostname\":\"{s}\",\"status\":\"{s}\",\"last_seq\":{d},\"failed_attempts\":{d},\"account_limit\":{d}}}", .{ 118 + w.print("{{\"id\":{d},\"hostname\":\"{s}\",\"status\":\"{s}\",\"last_seq\":{d},\"failed_attempts\":{d},\"account_limit\":{d}}}", .{ 118 119 host.id, 119 120 host.hostname, 120 121 host.status, ··· 123 124 limit, 124 125 }) catch return; 125 126 } else { 126 - std.fmt.format(w, "{{\"id\":{d},\"hostname\":\"{s}\",\"status\":\"{s}\",\"last_seq\":{d},\"failed_attempts\":{d},\"account_limit\":null}}", .{ 127 + w.print("{{\"id\":{d},\"hostname\":\"{s}\",\"status\":\"{s}\",\"last_seq\":{d},\"failed_attempts\":{d},\"account_limit\":null}}", .{ 127 128 host.id, 128 129 host.hostname, 129 130 host.status, ··· 133 134 } 134 135 } 135 136 136 - std.fmt.format(w, "],\"active_workers\":{d}}}", .{ctx.slurper.workerCount()}) catch return; 137 - h.respondJson(conn, .ok, list.items); 137 + w.print("],\"active_workers\":{d}}}", .{ctx.slurper.workerCount()}) catch return; 138 + h.respondJson(conn, .ok, aw.written()); 138 139 } 139 140 140 141 pub fn handleAdminBlockHost(conn: *h.Conn, body: []const u8, headers: *const websocket.Handshake.KeyValue, persist: *event_log_mod.DiskPersist) void { ··· 377 378 378 379 /// format current UTC time as ISO 8601 (YYYY-MM-DDTHH:MM:SSZ) 379 380 fn formatTimestamp(buf: *[24]u8) []const u8 { 380 - const ts: u64 = @intCast(std.time.timestamp()); 381 + var tp: std.c.timespec = undefined; 382 + _ = std.c.clock_gettime(.REALTIME, &tp); 383 + const ts: u64 = @intCast(tp.sec); 381 384 const es = std.time.epoch.EpochSeconds{ .secs = ts }; 382 385 const day = es.getEpochDay(); 383 386 const yd = day.calculateYearDay(); ··· 393 396 ds.getSecondsIntoMinute(), 394 397 }) catch "1970-01-01T00:00:00Z"; 395 398 } 399 + 400 + fn getenv(key: [*:0]const u8) ?[]const u8 { 401 + const ptr = std.c.getenv(key) orelse return null; 402 + return std.mem.sliceTo(ptr, 0); 403 + }
+1 -1
src/api/router.zig
··· 67 67 h.respondJson(conn, .ok, "{\"status\":\"ok\"}"); 68 68 } else if (std.mem.eql(u8, path, "/_stats")) { 69 69 var stats_buf: [4096]u8 = undefined; 70 - const body = broadcaster.formatStatsResponse(ctx.stats, &stats_buf); 70 + const body = broadcaster.formatStatsResponse(ctx.stats, &stats_buf, ctx.bc.io); 71 71 h.respondJson(conn, .ok, body); 72 72 } else if (std.mem.eql(u8, path, "/xrpc/com.atproto.sync.listRepos")) { 73 73 xrpc.handleListRepos(conn, query, ctx.persist);
+17 -22
src/api/xrpc.zig
··· 5 5 //! listHosts, getHostStatus, requestCrawl 6 6 7 7 const std = @import("std"); 8 + const Io = std.Io; 8 9 const h = @import("http.zig"); 9 10 const event_log_mod = @import("../event_log.zig"); 10 11 const collection_index_mod = @import("../collection_index.zig"); ··· 48 49 49 50 // build JSON response into a buffer 50 51 var buf: [65536]u8 = undefined; 51 - var fbs = std.io.fixedBufferStream(&buf); 52 - const w = fbs.writer(); 52 + var w: Io.Writer = .fixed(&buf); 53 53 54 54 var count: i64 = 0; 55 55 var last_uid: i64 = 0; ··· 100 100 101 101 // include cursor if we got a full page 102 102 if (count >= limit and count >= 2) { 103 - std.fmt.format(w, ",\"cursor\":\"{d}\"", .{last_uid}) catch return; 103 + w.print(",\"cursor\":\"{d}\"", .{last_uid}) catch return; 104 104 } 105 105 106 106 w.writeByte('}') catch return; 107 107 108 - h.respondJson(conn, .ok, fbs.getWritten()); 108 + h.respondJson(conn, .ok, w.buffered()); 109 109 } 110 110 111 111 pub fn handleGetRepoStatus(conn: *h.Conn, query: []const u8, persist: *event_log_mod.DiskPersist) void { ··· 144 144 const status = if (!local_ok) local_status else upstream_status; 145 145 146 146 var buf: [4096]u8 = undefined; 147 - var fbs = std.io.fixedBufferStream(&buf); 148 - const w = fbs.writer(); 147 + var w: Io.Writer = .fixed(&buf); 149 148 150 149 w.writeAll("{\"did\":\"") catch return; 151 150 w.writeAll(did) catch return; ··· 166 165 } 167 166 168 167 w.writeByte('}') catch return; 169 - h.respondJson(conn, .ok, fbs.getWritten()); 168 + h.respondJson(conn, .ok, w.buffered()); 170 169 } 171 170 172 171 pub fn handleGetRepo(conn: *h.Conn, query: []const u8, persist: *event_log_mod.DiskPersist) void { ··· 257 256 } 258 257 259 258 var buf: [4096]u8 = undefined; 260 - var fbs = std.io.fixedBufferStream(&buf); 261 - const w = fbs.writer(); 259 + var w: Io.Writer = .fixed(&buf); 262 260 263 261 w.writeAll("{\"cid\":\"") catch return; 264 262 w.writeAll(cid) catch return; ··· 266 264 w.writeAll(rev) catch return; 267 265 w.writeAll("\"}") catch return; 268 266 269 - h.respondJson(conn, .ok, fbs.getWritten()); 267 + h.respondJson(conn, .ok, w.buffered()); 270 268 } 271 269 272 270 pub fn handleListReposByCollection(conn: *h.Conn, query: []const u8, ci: *collection_index_mod.CollectionIndex) void { ··· 302 300 303 301 // build JSON response 304 302 var buf: [65536]u8 = undefined; 305 - var fbs = std.io.fixedBufferStream(&buf); 306 - const w = fbs.writer(); 303 + var w: Io.Writer = .fixed(&buf); 307 304 308 305 w.writeAll("{\"repos\":[") catch return; 309 306 for (0..ci_result.count) |i| { ··· 323 320 } 324 321 325 322 w.writeByte('}') catch return; 326 - h.respondJson(conn, .ok, fbs.getWritten()); 323 + h.respondJson(conn, .ok, w.buffered()); 327 324 } 328 325 329 326 pub fn handleListHosts(conn: *h.Conn, query: []const u8, persist: *event_log_mod.DiskPersist) void { ··· 358 355 defer result.deinit(); 359 356 360 357 var buf: [65536]u8 = undefined; 361 - var fbs = std.io.fixedBufferStream(&buf); 362 - const w = fbs.writer(); 358 + var w: Io.Writer = .fixed(&buf); 363 359 364 360 var count: i64 = 0; 365 361 var last_id: i64 = 0; ··· 377 373 w.writeAll("{\"hostname\":\"") catch return; 378 374 w.writeAll(hostname) catch return; 379 375 w.writeAll("\"") catch return; 380 - std.fmt.format(w, ",\"seq\":{d}", .{seq}) catch return; 376 + w.print(",\"seq\":{d}", .{seq}) catch return; 381 377 w.writeAll(",\"status\":\"") catch return; 382 378 w.writeAll(status) catch return; 383 379 w.writeAll("\"}") catch return; ··· 389 385 w.writeByte(']') catch return; 390 386 391 387 if (count >= limit and count > 1) { 392 - std.fmt.format(w, ",\"cursor\":\"{d}\"", .{last_id}) catch return; 388 + w.print(",\"cursor\":\"{d}\"", .{last_id}) catch return; 393 389 } 394 390 395 391 w.writeByte('}') catch return; 396 - h.respondJson(conn, .ok, fbs.getWritten()); 392 + h.respondJson(conn, .ok, w.buffered()); 397 393 } 398 394 399 395 pub fn handleGetHostStatus(conn: *h.Conn, query: []const u8, persist: *event_log_mod.DiskPersist) void { ··· 440 436 } else 0; 441 437 442 438 var buf: [4096]u8 = undefined; 443 - var fbs = std.io.fixedBufferStream(&buf); 444 - const w = fbs.writer(); 439 + var w: Io.Writer = .fixed(&buf); 445 440 446 441 w.writeAll("{\"hostname\":\"") catch return; 447 442 w.writeAll(host_name) catch return; 448 443 w.writeAll("\"") catch return; 449 - std.fmt.format(w, ",\"seq\":{d},\"accountCount\":{d}", .{ seq, account_count }) catch return; 444 + w.print(",\"seq\":{d},\"accountCount\":{d}", .{ seq, account_count }) catch return; 450 445 w.writeAll(",\"status\":\"") catch return; 451 446 w.writeAll(status) catch return; 452 447 w.writeAll("\"}") catch return; 453 448 454 - h.respondJson(conn, .ok, fbs.getWritten()); 449 + h.respondJson(conn, .ok, w.buffered()); 455 450 } 456 451 457 452 pub fn handleRequestCrawl(conn: *h.Conn, body: []const u8, slurper: *slurper_mod.Slurper) void {
+17 -13
src/backfill.zig
··· 8 8 //! triggered via POST /admin/backfill-collections, status via GET. 9 9 10 10 const std = @import("std"); 11 + const Io = std.Io; 11 12 const http = std.http; 12 13 const pg = @import("pg"); 13 14 const collection_index_mod = @import("collection_index.zig"); ··· 27 28 running: std.atomic.Value(bool), 28 29 thread: ?std.Thread, 29 30 source: []const u8, 31 + io: Io, 30 32 31 33 pub fn init( 32 34 allocator: Allocator, 33 35 collection_index: *collection_index_mod.CollectionIndex, 34 36 db: *pg.Pool, 37 + io: Io, 35 38 ) Backfiller { 36 39 return .{ 37 40 .allocator = allocator, ··· 40 43 .running = .{ .raw = false }, 41 44 .thread = null, 42 45 .source = "", 46 + .io = io, 43 47 }; 44 48 } 45 49 ··· 99 103 } 100 104 101 105 fn discoverCollections(self: *Backfiller) ![][]const u8 { 102 - var seen: std.StringHashMapUnmanaged(void) = .{}; 106 + var seen: std.StringHashMapUnmanaged(void) = .empty; 103 107 defer seen.deinit(self.allocator); 104 108 105 109 // source 1: lexicon garden ··· 151 155 /// fetch NSIDs from https://lexicon.garden/llms.txt 152 156 /// parses lines matching `- [`<nsid>`](` 153 157 fn fetchLexiconGarden(self: *Backfiller) ![][]const u8 { 154 - var client: http.Client = .{ .allocator = self.allocator }; 158 + var client: http.Client = .{ .allocator = self.allocator, .io = self.io }; 155 159 defer client.deinit(); 156 160 157 161 var aw: std.Io.Writer.Allocating = .init(self.allocator); ··· 167 171 168 172 const body = aw.toArrayList().items; 169 173 170 - var nsids: std.ArrayListUnmanaged([]const u8) = .{}; 174 + var nsids: std.ArrayListUnmanaged([]const u8) = .empty; 171 175 defer nsids.deinit(self.allocator); 172 176 173 177 // parse lines like: - [`app.bsky.feed.post`]( ··· 217 221 } 218 222 219 223 // reuse one HTTP client across all pages for this collection 220 - var client: http.Client = .{ .allocator = self.allocator }; 224 + var client: http.Client = .{ .allocator = self.allocator, .io = self.io }; 221 225 defer client.deinit(); 222 226 223 227 var page_count: usize = 0; ··· 253 257 cursor = self.allocator.dupe(u8, nc) catch break; 254 258 255 259 // brief pause between pages 256 - std.posix.nanosleep(0, 100 * std.time.ns_per_ms); 260 + self.io.sleep(Io.Duration.fromMilliseconds(100), .awake) catch {}; 257 261 } else { 258 262 // no more pages — mark complete 259 263 _ = self.db.exec( ··· 296 300 .next_cursor = null, 297 301 }; 298 302 299 - var dids: std.ArrayListUnmanaged([]const u8) = .{}; 303 + var dids: std.ArrayListUnmanaged([]const u8) = .empty; 300 304 defer dids.deinit(self.allocator); 301 305 302 306 for (repos) |repo| { ··· 329 333 330 334 /// return status summary for the admin endpoint 331 335 pub fn getStatus(self: *Backfiller, allocator: Allocator) ![]u8 { 332 - var list: std.ArrayListUnmanaged(u8) = .{}; 333 - defer list.deinit(allocator); 334 - const w = list.writer(allocator); 336 + var aw: Io.Writer.Allocating = .init(allocator); 337 + defer aw.deinit(); 338 + const w = &aw.writer; 335 339 336 340 // query aggregate stats 337 341 var total: i64 = 0; ··· 350 354 } 351 355 } 352 356 353 - std.fmt.format(w, "{{\"running\":{},\"total\":{d},\"completed\":{d},\"in_progress\":{d},\"total_imported\":{d},\"collections\":[", .{ 357 + w.print("{{\"running\":{},\"total\":{d},\"completed\":{d},\"in_progress\":{d},\"total_imported\":{d},\"collections\":[", .{ 354 358 self.isRunning(), 355 359 total, 356 360 completed, ··· 376 380 const count = dbrow.get(i64, 3); 377 381 const is_completed = dbrow.get(bool, 4); 378 382 379 - std.fmt.format(w, "{{\"collection\":\"{s}\",\"source\":\"{s}\",\"imported\":{d},\"completed\":{}", .{ 383 + w.print("{{\"collection\":\"{s}\",\"source\":\"{s}\",\"imported\":{d},\"completed\":{}", .{ 380 384 collection, 381 385 source, 382 386 count, ··· 384 388 }) catch {}; 385 389 386 390 if (cursor_val.len > 0 and !is_completed) { 387 - std.fmt.format(w, ",\"cursor\":\"{s}\"", .{cursor_val}) catch {}; 391 + w.print(",\"cursor\":\"{s}\"", .{cursor_val}) catch {}; 388 392 } 389 393 390 394 w.writeByte('}') catch {}; 391 395 } 392 396 393 397 w.writeAll("]}") catch {}; 394 - return try list.toOwnedSlice(allocator); 398 + return try aw.toOwnedSlice(); 395 399 } 396 400 };
+88 -77
src/broadcaster.zig
··· 8 8 //! - health/stats HTTP endpoints via websocket handshake routing 9 9 10 10 const std = @import("std"); 11 + const Io = std.Io; 11 12 const websocket = @import("websocket"); 12 13 const zat = @import("zat"); 13 14 const ring_buffer = @import("ring_buffer.zig"); ··· 205 206 }; 206 207 207 208 // rebuild map entries with seq replaced 208 - var new_entries: std.ArrayListUnmanaged(cbor.Value.MapEntry) = .{}; 209 + var new_entries: std.ArrayListUnmanaged(cbor.Value.MapEntry) = .empty; 209 210 var found_seq = false; 210 211 for (old_entries) |entry| { 211 212 if (std.mem.eql(u8, entry.key, "seq")) { ··· 245 246 read_pos: usize = 0, 246 247 buf_len: usize = 0, 247 248 alive: std.atomic.Value(bool) = .{ .raw = true }, 248 - mutex: std.Thread.Mutex = .{}, 249 - cond: std.Thread.Condition = .{}, 249 + mutex: Io.Mutex = Io.Mutex.init, 250 + cond: Io.Condition = Io.Condition.init, 250 251 thread: ?std.Thread = null, 251 252 last_send_time: i128 = 0, 253 + io: Io, 252 254 253 255 /// push a shared frame to this consumer's send buffer. 254 256 /// acquires a reference. returns false if full (consumer too slow). 255 257 pub fn enqueue(self: *Consumer, frame: *SharedFrame) bool { 256 - self.mutex.lock(); 257 - defer self.mutex.unlock(); 258 + self.mutex.lockUncancelable(self.io); 259 + defer self.mutex.unlock(self.io); 258 260 259 261 if (self.buf_len == BUFFER_CAP) return false; 260 262 ··· 262 264 self.buf[self.write_pos] = frame; 263 265 self.write_pos = (self.write_pos + 1) % BUFFER_CAP; 264 266 self.buf_len += 1; 265 - self.cond.signal(); 267 + self.cond.signal(self.io); 266 268 return true; 267 269 } 268 270 ··· 289 291 } 290 292 291 293 fn writeLoop(self: *Consumer) void { 292 - self.last_send_time = std.time.nanoTimestamp(); 294 + self.last_send_time = Io.Timestamp.now(self.io, .real).nanoseconds; 293 295 294 296 while (self.alive.load(.acquire)) { 295 297 var frame: ?*SharedFrame = null; 296 298 { 297 - self.mutex.lock(); 298 - defer self.mutex.unlock(); 299 - while (self.buf_len == 0 and self.alive.load(.acquire)) { 300 - // wake every 100ms to check ping timer 301 - self.cond.timedWait(&self.mutex, 100 * std.time.ns_per_ms) catch {}; 299 + self.mutex.lockUncancelable(self.io); 300 + defer self.mutex.unlock(self.io); 301 + if (self.buf_len == 0) { 302 + // no data: poll briefly instead of blocking on cond 303 + // (Io.Condition has no timedWait, so we poll to allow periodic ping checks) 304 + self.mutex.unlock(self.io); 305 + self.io.sleep(Io.Duration.fromMilliseconds(100), .awake) catch {}; 306 + self.mutex.lockUncancelable(self.io); 302 307 } 303 308 frame = self.dequeue(); 304 309 } ··· 308 313 self.alive.store(false, .release); 309 314 return; 310 315 }; 311 - self.last_send_time = std.time.nanoTimestamp(); 316 + self.last_send_time = Io.Timestamp.now(self.io, .real).nanoseconds; 312 317 } else { 313 318 // no data — check if we should send a ping 314 319 self.maybePing(); ··· 316 321 } 317 322 // drain remaining buffered frames 318 323 while (true) { 319 - self.mutex.lock(); 324 + self.mutex.lockUncancelable(self.io); 320 325 const frame = self.dequeue(); 321 - self.mutex.unlock(); 326 + self.mutex.unlock(self.io); 322 327 if (frame) |f| { 323 328 f.release(); 324 329 } else break; ··· 326 331 } 327 332 328 333 fn maybePing(self: *Consumer) void { 329 - const now = std.time.nanoTimestamp(); 334 + const now = Io.Timestamp.now(self.io, .real).nanoseconds; 330 335 const elapsed: u64 = @intCast(@max(0, now - self.last_send_time)); 331 336 if (elapsed >= ping_interval_ns) { 332 337 // send ping ··· 340 345 341 346 pub fn shutdown(self: *Consumer) void { 342 347 self.alive.store(false, .release); 343 - self.cond.signal(); 348 + self.cond.signal(self.io); 344 349 if (self.thread) |t| t.join(); 345 350 self.thread = null; 346 351 } ··· 363 368 364 369 pub const Broadcaster = struct { 365 370 allocator: Allocator, 366 - consumers: std.ArrayListUnmanaged(*Consumer) = .{}, 367 - consumers_mutex: std.Thread.Mutex = .{}, 368 - broadcast_order: std.Thread.Mutex = .{}, 371 + consumers: std.ArrayListUnmanaged(*Consumer) = .empty, 372 + consumers_mutex: Io.Mutex = Io.Mutex.init, 373 + broadcast_order: Io.Mutex = Io.Mutex.init, 369 374 history: FrameHistory, 370 375 persist: ?*event_log_mod.DiskPersist = null, 371 376 stats: Stats = .{}, 372 377 error_frame: ?[]const u8 = null, 373 378 http_fallback: ?HttpFallbackFn = null, 374 379 http_fallback_ctx: ?*anyopaque = null, 380 + io: Io, 375 381 376 - pub fn init(allocator: Allocator) Broadcaster { 382 + pub fn init(allocator: Allocator, io: Io) Broadcaster { 377 383 return .{ 378 384 .allocator = allocator, 379 - .history = FrameHistory.init(allocator), 380 - .stats = .{ .start_time = std.time.timestamp() }, 385 + .history = FrameHistory.init(allocator, io), 386 + .stats = .{ .start_time = timestamp(io) }, 381 387 .error_frame = buildErrorFrame(allocator), 388 + .io = io, 382 389 }; 383 390 } 384 391 385 392 pub fn deinit(self: *Broadcaster) void { 386 - self.consumers_mutex.lock(); 387 - defer self.consumers_mutex.unlock(); 393 + self.consumers_mutex.lockUncancelable(self.io); 394 + defer self.consumers_mutex.unlock(self.io); 388 395 for (self.consumers.items) |consumer| { 389 396 consumer.shutdown(); 390 397 self.allocator.destroy(consumer); ··· 399 406 consumer.* = .{ 400 407 .conn = conn, 401 408 .allocator = self.allocator, 409 + .io = self.io, 402 410 }; 403 411 consumer.thread = std.Thread.spawn(.{ .stack_size = @import("main.zig").default_stack_size }, Consumer.writeLoop, .{consumer}) catch { 404 412 self.allocator.destroy(consumer); 405 413 return error.ThreadSpawnFailed; 406 414 }; 407 415 408 - self.consumers_mutex.lock(); 409 - defer self.consumers_mutex.unlock(); 416 + self.consumers_mutex.lockUncancelable(self.io); 417 + defer self.consumers_mutex.unlock(self.io); 410 418 self.consumers.append(self.allocator, consumer) catch { 411 419 consumer.shutdown(); 412 420 self.allocator.destroy(consumer); ··· 419 427 420 428 pub fn removeConsumer(self: *Broadcaster, consumer: *Consumer) void { 421 429 { 422 - self.consumers_mutex.lock(); 423 - defer self.consumers_mutex.unlock(); 430 + self.consumers_mutex.lockUncancelable(self.io); 431 + defer self.consumers_mutex.unlock(self.io); 424 432 for (self.consumers.items, 0..) |c, i| { 425 433 if (c == consumer) { 426 434 _ = self.consumers.swapRemove(i); ··· 446 454 const frame = SharedFrame.create(self.allocator, data) catch return; 447 455 defer frame.release(); // release broadcaster's reference 448 456 449 - self.consumers_mutex.lock(); 450 - defer self.consumers_mutex.unlock(); 457 + self.consumers_mutex.lockUncancelable(self.io); 458 + defer self.consumers_mutex.unlock(self.io); 451 459 452 460 var frames_sent: u64 = 0; 453 461 var i: usize = 0; ··· 482 490 consumer.conn.writeBin(ef) catch {}; 483 491 } 484 492 consumer.alive.store(false, .release); 485 - consumer.cond.signal(); 493 + consumer.cond.signal(consumer.io); 486 494 consumer.conn.close(.{}) catch {}; 487 495 // clean up asynchronously to avoid joining thread while holding mutex 488 496 const alloc = self.allocator; ··· 501 509 pub fn replayTo(self: *Broadcaster, consumer: *Consumer, cursor: u64) void { 502 510 // phase 1: disk replay from diskpersist 503 511 if (self.persist) |dp| { 504 - var entries: std.ArrayListUnmanaged(event_log_mod.PlaybackEntry) = .{}; 512 + var entries: std.ArrayListUnmanaged(event_log_mod.PlaybackEntry) = .empty; 505 513 defer { 506 514 for (entries.items) |e| self.allocator.free(e.data); 507 515 entries.deinit(self.allocator); ··· 537 545 } 538 546 539 547 fn replayFromMemory(self: *Broadcaster, consumer: *Consumer, cursor: u64) void { 540 - self.history.mutex.lock(); 541 - defer self.history.mutex.unlock(); 548 + self.history.mutex.lockUncancelable(self.io); 549 + defer self.history.mutex.unlock(self.io); 542 550 543 551 var i: usize = 0; 544 552 while (i < self.history.len) : (i += 1) { ··· 551 559 } 552 560 553 561 pub fn consumerCount(self: *Broadcaster) usize { 554 - self.consumers_mutex.lock(); 555 - defer self.consumers_mutex.unlock(); 562 + self.consumers_mutex.lockUncancelable(self.io); 563 + defer self.consumers_mutex.unlock(self.io); 556 564 return self.consumers.items.len; 557 565 } 558 566 }; ··· 650 658 pub fn clientClose(self: *Handler, _: []const u8) !void { 651 659 if (self.consumer) |c| { 652 660 c.alive.store(false, .release); 653 - c.cond.signal(); 661 + c.cond.signal(c.io); 654 662 } 655 663 self.conn.close(.{}) catch {}; 656 664 } ··· 689 697 workers_count: usize = 0, 690 698 }; 691 699 692 - pub fn formatPrometheusMetrics(stats: *const Stats, cache_entries: usize, attribution: AttributionMetrics, data_dir: []const u8, buf: []u8) []const u8 { 693 - const uptime: i64 = std.time.timestamp() - stats.start_time; 694 - var fbs = std.io.fixedBufferStream(buf); 695 - const w = fbs.writer(); 700 + pub fn formatPrometheusMetrics(stats: *const Stats, cache_entries: usize, attribution: AttributionMetrics, data_dir: []const u8, buf: []u8, io: Io) []const u8 { 701 + const uptime: i64 = timestamp(io) - stats.start_time; 702 + var w: Io.Writer = .fixed(buf); 696 703 697 704 // canary metric — proves what binary is running 698 - std.fmt.format(w, 705 + w.print( 699 706 \\# TYPE relay_build_info gauge 700 707 \\# HELP relay_build_info build provenance 701 708 \\relay_build_info{{git_sha="{s}",optimize="{s}"}} 1 702 709 \\ 703 710 , .{ build_options.git_sha, build_options.optimize }) catch {}; 704 711 705 - std.fmt.format(w, 712 + w.print( 706 713 \\# TYPE relay_frames_received_total counter 707 714 \\relay_frames_received_total {d} 708 715 \\ ··· 831 838 stats.host_authority_host_changed.load(.acquire), 832 839 stats.host_authority_time_us.load(.acquire), 833 840 stats.pool_queued_bytes.load(.acquire), 834 - }) catch return fbs.getWritten(); 841 + }) catch return w.buffered(); 835 842 836 843 // validation failure breakdown by reason 837 - std.fmt.format(w, 844 + w.print( 838 845 \\# TYPE relay_validation_failed counter 839 846 \\# HELP relay_validation_failed validation failures by reason 840 847 \\relay_validation_failed{{reason="bad_did"}} {d} ··· 853 860 stats.failed_bad_structure.load(.acquire), 854 861 stats.failed_host_authority.load(.acquire), 855 862 stats.failed_future_rev.load(.acquire), 856 - }) catch return fbs.getWritten(); 863 + }) catch return w.buffered(); 857 864 858 865 // memory attribution — internal capacities help identify what's consuming RSS 859 - std.fmt.format(w, 866 + w.print( 860 867 \\# TYPE relay_validator_cache_map_cap gauge 861 868 \\# HELP relay_validator_cache_map_cap hashmap backing capacity of signing key LRU 862 869 \\relay_validator_cache_map_cap {d} ··· 892 899 893 900 // linux-only process metrics from /proc 894 901 if (comptime builtin.os.tag == .linux) { 895 - appendProcMetrics(w); 896 - appendDiskMetrics(w, data_dir); 902 + appendProcMetrics(&w, io); 903 + appendDiskMetrics(&w, data_dir); 897 904 } 898 905 899 - return fbs.getWritten(); 906 + return w.buffered(); 900 907 } 901 908 902 - fn appendProcMetrics(w: anytype) void { 909 + fn appendProcMetrics(w: *Io.Writer, io: Io) void { 903 910 // RSS from /proc/self/statm (field[1] * page_size) 904 - if (std.fs.openFileAbsolute("/proc/self/statm", .{})) |f| { 905 - defer f.close(); 911 + if (Io.Dir.openFileAbsolute(io, "/proc/self/statm", .{})) |f| { 912 + defer f.close(io); 906 913 var statm_buf: [256]u8 = undefined; 907 - const n = f.readAll(&statm_buf) catch 0; 914 + const n = f.readPositionalAll(io, &statm_buf, 0) catch 0; 908 915 if (n > 0) { 909 916 const line = statm_buf[0..n]; 910 917 var iter = std.mem.splitScalar(u8, line, ' '); 911 918 _ = iter.next(); // skip total pages 912 919 if (iter.next()) |rss_pages| { 913 920 if (std.fmt.parseInt(u64, rss_pages, 10)) |pages| { 914 - std.fmt.format(w, 921 + w.print( 915 922 \\# TYPE relay_process_rss_bytes gauge 916 923 \\relay_process_rss_bytes {d} 917 924 \\ ··· 922 929 } else |_| {} 923 930 924 931 // thread count, VmHWM, RssAnon from /proc/self/status 925 - if (std.fs.openFileAbsolute("/proc/self/status", .{})) |f| { 926 - defer f.close(); 932 + if (Io.Dir.openFileAbsolute(io, "/proc/self/status", .{})) |f| { 933 + defer f.close(io); 927 934 var status_buf: [4096]u8 = undefined; 928 - const n = f.readAll(&status_buf) catch 0; 935 + const n = f.readPositionalAll(io, &status_buf, 0) catch 0; 929 936 if (n > 0) { 930 937 const content = status_buf[0..n]; 931 938 const fields = .{ ··· 940 947 const end = std.mem.indexOfScalar(u8, trimmed, ' ') orelse 941 948 (std.mem.indexOfScalar(u8, trimmed, '\n') orelse trimmed.len); 942 949 if (std.fmt.parseInt(u64, trimmed[0..end], 10)) |val| { 943 - std.fmt.format(w, 950 + w.print( 944 951 \\# TYPE {s} gauge 945 952 \\{s} {d} 946 953 \\ ··· 959 966 const in_use: u64 = @as(u32, @bitCast(mi.uordblks)); 960 967 const free_bytes: u64 = @as(u32, @bitCast(mi.fordblks)); 961 968 const mmap_bytes: u64 = @as(u32, @bitCast(mi.hblkhd)); 962 - std.fmt.format(w, 969 + w.print( 963 970 \\# TYPE relay_malloc_arena_bytes gauge 964 971 \\relay_malloc_arena_bytes {d} 965 972 \\ ··· 978 985 const posix_vfs = @cImport(@cInclude("sys/statvfs.h")); 979 986 const malloc_h = @cImport(@cInclude("malloc.h")); 980 987 981 - fn appendDiskMetrics(w: anytype, data_dir: []const u8) void { 988 + fn appendDiskMetrics(w: *Io.Writer, data_dir: []const u8) void { 982 989 // statvfs needs a null-terminated path 983 990 var path_buf: [4096]u8 = undefined; 984 991 if (data_dir.len >= path_buf.len) return; ··· 992 999 const total = stat.f_blocks * block_size; 993 1000 const available = stat.f_bavail * block_size; 994 1001 995 - std.fmt.format(w, 1002 + w.print( 996 1003 \\# TYPE relay_disk_total_bytes gauge 997 1004 \\relay_disk_total_bytes {d} 998 1005 \\ ··· 1002 1009 , .{ total, available }) catch {}; 1003 1010 } 1004 1011 1005 - pub fn formatStatsResponse(stats: *const Stats, buf: []u8) []const u8 { 1012 + pub fn formatStatsResponse(stats: *const Stats, buf: []u8, io: Io) []const u8 { 1006 1013 return std.fmt.bufPrint(buf, 1007 1014 \\{{"seq":{d},"relay_seq":{d},"consumers":{d},"connected_inbound":{d},"frames_in":{d},"frames_out":{d},"validated":{d},"failed":{d},"skipped":{d},"decode_errors":{d},"cache_hits":{d},"cache_misses":{d},"slow_consumers":{d},"chain_breaks":{d},"pool_backpressure":{d},"uptime_seconds":{d}}} 1008 1015 , .{ ··· 1021 1028 stats.slow_consumers.load(.acquire), 1022 1029 stats.chain_breaks.load(.acquire), 1023 1030 stats.pool_backpressure.load(.acquire), 1024 - std.time.timestamp() - stats.start_time, 1031 + timestamp(io) - stats.start_time, 1025 1032 }) catch ""; 1026 1033 } 1027 1034 1035 + fn timestamp(io: Io) i64 { 1036 + return @intCast(@divFloor(Io.Timestamp.now(io, .real).nanoseconds, std.time.ns_per_s)); 1037 + } 1038 + 1028 1039 // --- tests --- 1029 1040 1030 1041 test "broadcaster add and remove consumer" { 1031 - var b = Broadcaster.init(std.testing.allocator); 1042 + var b = Broadcaster.init(std.testing.allocator, std.testing.io); 1032 1043 defer b.deinit(); 1033 1044 1034 1045 try std.testing.expectEqual(@as(u64, 0), b.stats.seq.load(.acquire)); ··· 1036 1047 } 1037 1048 1038 1049 test "broadcast updates stats and history" { 1039 - var b = Broadcaster.init(std.testing.allocator); 1050 + var b = Broadcaster.init(std.testing.allocator, std.testing.io); 1040 1051 defer b.deinit(); 1041 1052 1042 1053 b.broadcast(1, "frame1"); ··· 1051 1062 } 1052 1063 1053 1064 test "frame history supports cursor replay" { 1054 - var b = Broadcaster.init(std.testing.allocator); 1065 + var b = Broadcaster.init(std.testing.allocator, std.testing.io); 1055 1066 defer b.deinit(); 1056 1067 1057 1068 for (1..6) |i| { ··· 1102 1113 } 1103 1114 1104 1115 test "formatPrometheusMetrics produces valid output" { 1105 - var stats = Stats{ .start_time = std.time.timestamp() - 60 }; 1116 + var stats = Stats{ .start_time = timestamp(std.testing.io) - 60 }; 1106 1117 stats.seq.store(99999, .release); 1107 1118 stats.relay_seq.store(12345, .release); 1108 1119 stats.consumer_count.store(3, .release); ··· 1115 1126 stats.cache_misses.store(100, .release); 1116 1127 1117 1128 var buf: [65536]u8 = undefined; 1118 - const output = formatPrometheusMetrics(&stats, 42, .{}, "/tmp", &buf); 1129 + const output = formatPrometheusMetrics(&stats, 42, .{}, "/tmp", &buf, std.testing.io); 1119 1130 1120 1131 try std.testing.expect(std.mem.indexOf(u8, output, "relay_frames_received_total 10000") != null); 1121 1132 try std.testing.expect(std.mem.indexOf(u8, output, "relay_frames_broadcast_total 9000") != null); ··· 1131 1142 } 1132 1143 1133 1144 test "formatStatsResponse produces valid JSON" { 1134 - var stats = Stats{ .start_time = std.time.timestamp() - 42 }; 1145 + var stats = Stats{ .start_time = timestamp(std.testing.io) - 42 }; 1135 1146 stats.seq.store(100, .release); 1136 1147 stats.frames_in.store(200, .release); 1137 1148 stats.consumer_count.store(5, .release); 1138 1149 1139 1150 var buf: [4096]u8 = undefined; 1140 - const response = formatStatsResponse(&stats, &buf); 1151 + const response = formatStatsResponse(&stats, &buf, std.testing.io); 1141 1152 1142 1153 try std.testing.expect(std.mem.startsWith(u8, response, "{")); 1143 1154 try std.testing.expect(std.mem.indexOf(u8, response, "\"seq\":100") != null); ··· 1272 1283 // ordering lock, assign a seq (atomic increment, like persist), and 1273 1284 // broadcast. the ring buffer history must be strictly monotonic. 1274 1285 1275 - var bc = Broadcaster.init(std.testing.allocator); 1286 + var bc = Broadcaster.init(std.testing.allocator, std.testing.io); 1276 1287 defer bc.deinit(); 1277 1288 1278 1289 const num_threads = 8; ··· 1281 1292 var seq_counter = std.atomic.Value(u64).init(0); 1282 1293 1283 1294 const Worker = struct { 1284 - fn run(broadcaster: *Broadcaster, counter: *std.atomic.Value(u64)) void { 1295 + fn run(bc_ptr: *Broadcaster, counter: *std.atomic.Value(u64)) void { 1285 1296 for (0..frames_per_thread) |_| { 1286 - broadcaster.broadcast_order.lock(); 1287 - defer broadcaster.broadcast_order.unlock(); 1297 + bc_ptr.broadcast_order.lockUncancelable(bc_ptr.io); 1298 + defer bc_ptr.broadcast_order.unlock(bc_ptr.io); 1288 1299 1289 1300 const seq = counter.fetchAdd(1, .monotonic) + 1; 1290 - broadcaster.broadcast(seq, "x"); 1301 + bc_ptr.broadcast(seq, "x"); 1291 1302 } 1292 1303 } 1293 1304 };
+6 -2
src/cleaner.zig
··· 5 5 //! modeled on backfill.zig's Backfiller pattern. 6 6 7 7 const std = @import("std"); 8 + const Io = std.Io; 8 9 const pg = @import("pg"); 9 10 const collection_index_mod = @import("collection_index.zig"); 10 11 ··· 13 14 14 15 pub const Cleaner = struct { 15 16 allocator: Allocator, 17 + io: Io, 16 18 collection_index: *collection_index_mod.CollectionIndex, 17 19 db: *pg.Pool, 18 20 running: std.atomic.Value(bool), ··· 22 24 23 25 pub fn init( 24 26 allocator: Allocator, 27 + io: Io, 25 28 collection_index: *collection_index_mod.CollectionIndex, 26 29 db: *pg.Pool, 27 30 ) Cleaner { 28 31 return .{ 29 32 .allocator = allocator, 33 + .io = io, 30 34 .collection_index = collection_index, 31 35 .db = db, 32 36 .running = .{ .raw = false }, ··· 49 53 50 54 self.scanned.store(0, .release); 51 55 self.removed.store(0, .release); 52 - self.thread = try std.Thread.spawn(.{ .stack_size = @import("main.zig").default_stack_size }, run, .{self}); 56 + self.thread = try std.Thread.spawn(.{}, run, .{self}); 53 57 } 54 58 55 59 fn run(self: *Cleaner) void { ··· 97 101 }); 98 102 99 103 // brief pause between batches to avoid hammering postgres 100 - std.posix.nanosleep(0, 50 * std.time.ns_per_ms); 104 + self.io.sleep(Io.Duration.fromMilliseconds(50), .awake) catch {}; 101 105 } 102 106 103 107 const total = self.scanned.load(.monotonic);
+7 -7
src/collection_index.zig
··· 276 276 pub fn listKnownCollections(self: *CollectionIndex, allocator: Allocator) ![][]const u8 { 277 277 var err_str: ?rocksdb.Data = null; 278 278 279 - var seen: std.StringHashMapUnmanaged(void) = .{}; 279 + var seen: std.StringHashMapUnmanaged(void) = .empty; 280 280 defer seen.deinit(allocator); 281 281 282 282 // full scan of RBC — keys are collection\0did ··· 359 359 360 360 var dir = std.testing.tmpDir(.{}); 361 361 defer dir.cleanup(); 362 - const path = try dir.dir.realpathAlloc(allocator, "."); 362 + const path = try std.fmt.allocPrint(allocator, ".zig-cache/tmp/{s}", .{@as([]const u8, &dir.sub_path)}); 363 363 defer allocator.free(path); 364 364 365 365 var ci = CollectionIndex.open(allocator, path) catch |err| { ··· 375 375 376 376 var dir = std.testing.tmpDir(.{}); 377 377 defer dir.cleanup(); 378 - const path = try dir.dir.realpathAlloc(allocator, "."); 378 + const path = try std.fmt.allocPrint(allocator, ".zig-cache/tmp/{s}", .{@as([]const u8, &dir.sub_path)}); 379 379 defer allocator.free(path); 380 380 381 381 var ci = CollectionIndex.open(allocator, path) catch |err| { ··· 402 402 403 403 var dir = std.testing.tmpDir(.{}); 404 404 defer dir.cleanup(); 405 - const path = try dir.dir.realpathAlloc(allocator, "."); 405 + const path = try std.fmt.allocPrint(allocator, ".zig-cache/tmp/{s}", .{@as([]const u8, &dir.sub_path)}); 406 406 defer allocator.free(path); 407 407 408 408 var ci = CollectionIndex.open(allocator, path) catch |err| { ··· 434 434 435 435 var dir = std.testing.tmpDir(.{}); 436 436 defer dir.cleanup(); 437 - const path = try dir.dir.realpathAlloc(allocator, "."); 437 + const path = try std.fmt.allocPrint(allocator, ".zig-cache/tmp/{s}", .{@as([]const u8, &dir.sub_path)}); 438 438 defer allocator.free(path); 439 439 440 440 var ci = CollectionIndex.open(allocator, path) catch |err| { ··· 462 462 463 463 var dir = std.testing.tmpDir(.{}); 464 464 defer dir.cleanup(); 465 - const path = try dir.dir.realpathAlloc(allocator, "."); 465 + const path = try std.fmt.allocPrint(allocator, ".zig-cache/tmp/{s}", .{@as([]const u8, &dir.sub_path)}); 466 466 defer allocator.free(path); 467 467 468 468 var ci = CollectionIndex.open(allocator, path) catch |err| { ··· 504 504 505 505 var dir = std.testing.tmpDir(.{}); 506 506 defer dir.cleanup(); 507 - const path = try dir.dir.realpathAlloc(allocator, "."); 507 + const path = try std.fmt.allocPrint(allocator, ".zig-cache/tmp/{s}", .{@as([]const u8, &dir.sub_path)}); 508 508 defer allocator.free(path); 509 509 510 510 var ci = CollectionIndex.open(allocator, path) catch |err| {
+122 -120
src/event_log.zig
··· 11 11 //! see: indigo cmd/relay/stream/persist/diskpersist/diskpersist.go 12 12 13 13 const std = @import("std"); 14 + const Io = std.Io; 14 15 const pg = @import("pg"); 15 16 const lru = @import("lru.zig"); 16 17 ··· 79 80 pub const DiskPersist = struct { 80 81 allocator: Allocator, 81 82 dir_path: []const u8, 82 - dir: std.fs.Dir, 83 + dir: Io.Dir, 83 84 db: *pg.Pool, 84 - current_file: ?std.fs.File = null, 85 + current_file: ?Io.File = null, 85 86 current_file_path: ?[]const u8 = null, 87 + current_file_pos: u64 = 0, 86 88 87 89 // sequence state 88 90 cur_seq: u64 = 1, ··· 97 99 did_cache: lru.LruCache(u64), 98 100 99 101 // write buffer (flushed periodically or when threshold hit) 100 - outbuf: std.ArrayListUnmanaged(u8) = .{}, 101 - evtbuf: std.ArrayListUnmanaged(PersistJob) = .{}, 102 - mutex: std.Thread.Mutex = .{}, 102 + outbuf: std.ArrayListUnmanaged(u8) = .empty, 103 + evtbuf: std.ArrayListUnmanaged(PersistJob) = .empty, 104 + mutex: Io.Mutex = Io.Mutex.init, 103 105 104 106 // flush thread 105 107 flush_thread: ?std.Thread = null, 106 108 alive: std.atomic.Value(bool) = .{ .raw = true }, 107 - flush_cond: std.Thread.Condition = .{}, 109 + 110 + io: Io, 108 111 109 112 /// current evtbuf entry count (for metrics — non-blocking, returns 0 if lock is contended) 110 113 pub fn evtbufLen(self: *DiskPersist) usize { 111 114 if (!self.mutex.tryLock()) return 0; 112 - defer self.mutex.unlock(); 115 + defer self.mutex.unlock(self.io); 113 116 return self.evtbuf.items.len; 114 117 } 115 118 ··· 126 129 /// evtbuf allocated capacity in jobs (for memory attribution — non-blocking) 127 130 pub fn evtbufCap(self: *DiskPersist) usize { 128 131 if (!self.mutex.tryLock()) return 0; 129 - defer self.mutex.unlock(); 132 + defer self.mutex.unlock(self.io); 130 133 return self.evtbuf.capacity; 131 134 } 132 135 133 136 /// outbuf allocated capacity in bytes (for memory attribution — non-blocking) 134 137 pub fn outbufCap(self: *DiskPersist) usize { 135 138 if (!self.mutex.tryLock()) return 0; 136 - defer self.mutex.unlock(); 139 + defer self.mutex.unlock(self.io); 137 140 return self.outbuf.capacity; 138 141 } 139 142 140 - pub fn init(allocator: Allocator, dir_path: []const u8, database_url: []const u8) !DiskPersist { 143 + pub fn init(allocator: Allocator, dir_path: []const u8, database_url: []const u8, io: Io) !DiskPersist { 141 144 // ensure directory exists 142 - std.fs.cwd().makePath(dir_path) catch |err| switch (err) { 143 - error.PathAlreadyExists => {}, 144 - else => return err, 145 - }; 145 + try Io.Dir.cwd().createDirPath(io, dir_path); 146 146 147 - var dir = try std.fs.cwd().openDir(dir_path, .{ .iterate = true }); 148 - errdefer dir.close(); 147 + var dir = try Io.Dir.cwd().openDir(io, dir_path, .{ .iterate = true }); 148 + errdefer dir.close(io); 149 149 150 150 // connect to Postgres 151 151 const uri = std.Uri.parse(database_url) catch return error.InvalidDatabaseUrl; 152 - const pool = try pg.Pool.initUri(allocator, uri, .{ .size = 5 }); 152 + const pool = try pg.Pool.initUri(allocator, io, uri, .{ .size = 5 }); 153 153 errdefer pool.deinit(); 154 154 155 155 // create tables (matching indigo's Go relay schema) ··· 242 242 .dir_path = try allocator.dupe(u8, dir_path), 243 243 .dir = dir, 244 244 .db = pool, 245 - .did_cache = lru.LruCache(u64).init(allocator, 500_000), 245 + .did_cache = lru.LruCache(u64).init(allocator, 500_000, io), 246 + .io = io, 246 247 }; 247 248 248 249 // recover from existing log files ··· 252 253 } 253 254 254 255 pub fn deinit(self: *DiskPersist) void { 255 - // stop flush thread 256 + // stop flush thread (loop checks alive after each sleep interval) 256 257 self.alive.store(false, .release); 257 - self.flush_cond.signal(); 258 258 if (self.flush_thread) |t| t.join(); 259 259 260 260 // flush remaining 261 - self.mutex.lock(); 261 + self.mutex.lockUncancelable(self.io); 262 262 self.flushLocked() catch {}; 263 - self.mutex.unlock(); 263 + self.mutex.unlock(self.io); 264 264 265 265 // free write buffer 266 266 for (self.evtbuf.items) |job| self.allocator.free(job.data); ··· 269 269 270 270 self.did_cache.deinit(); 271 271 272 - if (self.current_file) |f| f.close(); 272 + if (self.current_file) |f| f.close(self.io); 273 273 if (self.current_file_path) |p| self.allocator.free(p); 274 - self.dir.close(); 274 + self.dir.close(self.io); 275 275 self.db.deinit(); 276 276 self.allocator.free(self.dir_path); 277 277 } ··· 544 544 545 545 /// list all active hosts 546 546 pub fn listActiveHosts(self: *DiskPersist, allocator: Allocator) ![]Host { 547 - var hosts: std.ArrayListUnmanaged(Host) = .{}; 547 + var hosts: std.ArrayListUnmanaged(Host) = .empty; 548 548 errdefer { 549 549 for (hosts.items) |h| { 550 550 allocator.free(h.hostname); ··· 575 575 576 576 /// list all hosts (any status) for admin view 577 577 pub fn listAllHosts(self: *DiskPersist, allocator: Allocator) ![]Host { 578 - var hosts: std.ArrayListUnmanaged(Host) = .{}; 578 + var hosts: std.ArrayListUnmanaged(Host) = .empty; 579 579 errdefer { 580 580 for (hosts.items) |h| { 581 581 allocator.free(h.hostname); ··· 668 668 header.encode(data[0..header_size]); 669 669 @memcpy(data[header_size..], payload); 670 670 671 - self.mutex.lock(); 672 - defer self.mutex.unlock(); 671 + self.mutex.lockUncancelable(self.io); 672 + defer self.mutex.unlock(self.io); 673 673 674 674 // assign seq 675 675 const seq = self.cur_seq; ··· 690 690 691 691 /// playback events with seq > since. calls cb for each event. 692 692 pub fn playback(self: *DiskPersist, since: u64, allocator: Allocator, entries: *std.ArrayListUnmanaged(PlaybackEntry)) !void { 693 - self.mutex.lock(); 694 - defer self.mutex.unlock(); 693 + self.mutex.lockUncancelable(self.io); 694 + defer self.mutex.unlock(self.io); 695 695 696 696 const since_i: i64 = @intCast(since); 697 697 698 698 // find the log file containing `since` 699 - var start_files: std.ArrayListUnmanaged(LogFileRef) = .{}; 699 + var start_files: std.ArrayListUnmanaged(LogFileRef) = .empty; 700 700 defer start_files.deinit(allocator); 701 701 702 702 if (since > 0) { ··· 733 733 734 734 // read events from each file 735 735 for (start_files.items) |ref| { 736 - var file = self.dir.openFile(ref.path, .{}) catch continue; 737 - defer file.close(); 738 - try readEventsFrom(allocator, file, since, entries); 736 + var file = self.dir.openFile(self.io, ref.path, .{}) catch continue; 737 + defer file.close(self.io); 738 + try readEventsFrom(allocator, file, self.io, since, entries); 739 739 } 740 740 } 741 741 ··· 761 761 762 762 /// garbage-collect log files older than the retention period 763 763 pub fn gc(self: *DiskPersist) !void { 764 - self.mutex.lock(); 765 - defer self.mutex.unlock(); 764 + self.mutex.lockUncancelable(self.io); 765 + defer self.mutex.unlock(self.io); 766 766 767 767 const cutoff_interval = try std.fmt.allocPrint(self.allocator, "{d} hours", .{self.retention_hours}); 768 768 defer self.allocator.free(cutoff_interval); 769 769 770 770 // find expired refs 771 - var expired: std.ArrayListUnmanaged(GcRef) = .{}; 771 + var expired: std.ArrayListUnmanaged(GcRef) = .empty; 772 772 defer { 773 773 for (expired.items) |e| self.allocator.free(e.path); 774 774 expired.deinit(self.allocator); ··· 801 801 }; 802 802 803 803 // delete file 804 - self.dir.deleteFile(ref.path) catch |err| { 804 + self.dir.deleteFile(self.io, ref.path) catch |err| { 805 805 log.warn("gc: failed to delete {s}: {s}", .{ ref.path, @errorName(err) }); 806 806 }; 807 807 } ··· 819 819 /// delete oldest event files until total directory size is under max_dir_bytes 820 820 fn gcBySize(self: *DiskPersist) !void { 821 821 // query all refs ordered by creation time (oldest first) 822 - var refs: std.ArrayListUnmanaged(GcSizeRef) = .{}; 822 + var refs: std.ArrayListUnmanaged(GcSizeRef) = .empty; 823 823 defer { 824 824 for (refs.items) |e| self.allocator.free(e.path); 825 825 refs.deinit(self.allocator); ··· 834 834 defer result.deinit(); 835 835 while (result.nextUnsafe() catch null) |r| { 836 836 const path = r.get([]const u8, 1); 837 - const stat = self.dir.statFile(path) catch continue; 837 + const stat = self.dir.statFile(self.io, path, .{}) catch continue; 838 838 total_bytes += stat.size; 839 839 try refs.append(self.allocator, .{ 840 840 .id = r.get(i64, 0), ··· 865 865 continue; 866 866 }; 867 867 868 - self.dir.deleteFile(ref.path) catch |err| { 868 + self.dir.deleteFile(self.io, ref.path) catch |err| { 869 869 log.warn("gc: failed to delete {s}: {s}", .{ ref.path, @errorName(err) }); 870 870 continue; // don't subtract size if file delete failed 871 871 }; ··· 884 884 885 885 /// take down all events for a user (set flag + zero payload) 886 886 pub fn takeDownUser(self: *DiskPersist, uid: u64) !void { 887 - self.mutex.lock(); 888 - defer self.mutex.unlock(); 887 + self.mutex.lockUncancelable(self.io); 888 + defer self.mutex.unlock(self.io); 889 889 890 890 // iterate all log files 891 - var refs: std.ArrayListUnmanaged([]const u8) = .{}; 891 + var refs: std.ArrayListUnmanaged([]const u8) = .empty; 892 892 defer { 893 893 for (refs.items) |p| self.allocator.free(p); 894 894 refs.deinit(self.allocator); ··· 903 903 } 904 904 905 905 for (refs.items) |path| { 906 - var file = self.dir.openFile(path, .{ .mode = .read_write }) catch continue; 907 - defer file.close(); 908 - mutateUserEventsInFile(file, uid) catch |err| { 906 + var file = self.dir.openFile(self.io, path, .{ .mode = .read_write }) catch continue; 907 + defer file.close(self.io); 908 + mutateUserEventsInFile(file, self.io, uid) catch |err| { 909 909 log.warn("takedown: failed to process {s}: {s}", .{ path, @errorName(err) }); 910 910 }; 911 911 } ··· 924 924 const path = r.get([]const u8, 1); 925 925 const seq_start: u64 = @intCast(r.get(i64, 2)); 926 926 927 - var file = self.dir.openFile(path, .{ .mode = .read_write }) catch { 927 + var file = self.dir.openFile(self.io, path, .{ .mode = .read_write }) catch { 928 928 // file missing, start fresh 929 929 try self.initLogFile(); 930 930 return; 931 931 }; 932 932 933 933 // scan for last seq 934 - const last_seq = scanForLastSeq(file) catch { 935 - file.close(); 934 + const last_seq = scanForLastSeq(file, self.io) catch { 935 + file.close(self.io); 936 936 try self.initLogFile(); 937 937 return; 938 938 }; ··· 944 944 self.cur_seq = if (seq_start > 0) seq_start else 1; 945 945 } 946 946 947 - // seek to end for appending 948 - const stat = try file.stat(); 949 - try file.seekTo(stat.size); 947 + // track position for appending (at end of file) 948 + const file_stat = try file.stat(self.io); 949 + self.current_file_pos = file_stat.size; 950 950 951 951 self.current_file = file; 952 952 self.current_file_path = try self.allocator.dupe(u8, path); ··· 963 963 var name_buf: [64]u8 = undefined; 964 964 const name = std.fmt.bufPrint(&name_buf, "evts-{d}", .{start_seq}) catch unreachable; 965 965 966 - if (self.current_file) |f| f.close(); 966 + if (self.current_file) |f| f.close(self.io); 967 967 if (self.current_file_path) |p| self.allocator.free(p); 968 968 969 - self.current_file = try self.dir.createFile(name, .{ .truncate = false }); 969 + self.current_file = try self.dir.createFile(self.io, name, .{ .truncate = false, .read = true }); 970 + self.current_file_pos = 0; 970 971 self.current_file_path = try self.allocator.dupe(u8, name); 971 972 972 973 // register in Postgres ··· 983 984 984 985 // write buffered bytes to current file 985 986 const file = self.current_file orelse return; 986 - file.writeAll(self.outbuf.items) catch |err| { 987 + file.writePositionalAll(self.io, self.outbuf.items, self.current_file_pos) catch |err| { 987 988 log.err("flush: write failed: {s}", .{@errorName(err)}); 988 989 return err; 989 990 }; 991 + self.current_file_pos += self.outbuf.items.len; 990 992 991 993 // clear buffers 992 994 self.outbuf.clearRetainingCapacity(); ··· 1007 1009 1008 1010 fn flushLoop(self: *DiskPersist) void { 1009 1011 while (self.alive.load(.acquire)) { 1010 - // wait for flush interval or signal 1011 - { 1012 - self.mutex.lock(); 1013 - defer self.mutex.unlock(); 1014 - self.flush_cond.timedWait(&self.mutex, default_flush_interval_ms * std.time.ns_per_ms) catch {}; 1015 - self.flushLocked() catch {}; 1016 - } 1012 + // sleep for flush interval (replaces timedWait — Io.Condition has no timed variant) 1013 + self.io.sleep(Io.Duration.fromMilliseconds(@intCast(default_flush_interval_ms)), .awake) catch {}; 1014 + self.mutex.lockUncancelable(self.io); 1015 + self.flushLocked() catch {}; 1016 + self.mutex.unlock(self.io); 1017 1017 } 1018 1018 } 1019 1019 }; ··· 1045 1045 1046 1046 // --- file-level operations --- 1047 1047 1048 - fn readEventsFrom(allocator: Allocator, file: std.fs.File, since: u64, result: *std.ArrayListUnmanaged(PlaybackEntry)) !void { 1049 - const file_size = (try file.stat()).size; 1048 + fn readEventsFrom(allocator: Allocator, file: Io.File, io: Io, since: u64, result: *std.ArrayListUnmanaged(PlaybackEntry)) !void { 1049 + const file_size = (try file.stat(io)).size; 1050 1050 1051 1051 // if since > 0, scan to the right position 1052 + var pos: u64 = 0; 1052 1053 if (since > 0) { 1053 - seekToSeq(file, since, file_size) catch return; 1054 + pos = seekToSeq(file, io, since, file_size) catch return; 1054 1055 } 1055 1056 1056 1057 // read events 1057 - while (true) { 1058 + while (pos + header_size <= file_size) { 1058 1059 var hdr_buf: [header_size]u8 = undefined; 1059 - const n = file.readAll(&hdr_buf) catch break; 1060 + const n = file.readPositionalAll(io, &hdr_buf, pos) catch break; 1060 1061 if (n < header_size) break; 1062 + pos += header_size; 1061 1063 1062 1064 const hdr = EvtHeader.decode(&hdr_buf); 1063 1065 1064 1066 // skip taken down / rebased events 1065 1067 if (hdr.flags & (EvtFlags.takedown | EvtFlags.rebased) != 0) { 1066 - file.seekBy(@intCast(hdr.len)) catch break; 1068 + pos += hdr.len; 1067 1069 continue; 1068 1070 } 1069 1071 1070 1072 if (hdr.seq <= since) { 1071 - file.seekBy(@intCast(hdr.len)) catch break; 1073 + pos += hdr.len; 1072 1074 continue; 1073 1075 } 1074 1076 1075 1077 // read payload 1076 1078 const data = allocator.alloc(u8, hdr.len) catch break; 1077 - const read_n = file.readAll(data) catch { 1079 + const read_n = file.readPositionalAll(io, data, pos) catch { 1078 1080 allocator.free(data); 1079 1081 break; 1080 1082 }; ··· 1082 1084 allocator.free(data); 1083 1085 break; 1084 1086 } 1087 + pos += hdr.len; 1085 1088 1086 1089 result.append(allocator, .{ 1087 1090 .seq = hdr.seq, ··· 1095 1098 } 1096 1099 } 1097 1100 1098 - /// scan file headers to seek to the first event with seq > target 1099 - fn seekToSeq(file: std.fs.File, target: u64, file_size: u64) !void { 1100 - try file.seekTo(0); 1101 + /// scan file headers to find position of first event with seq > target 1102 + fn seekToSeq(file: Io.File, io: Io, target: u64, file_size: u64) !u64 { 1101 1103 var pos: u64 = 0; 1102 1104 while (pos + header_size <= file_size) { 1103 1105 var hdr_buf: [header_size]u8 = undefined; 1104 - const n = try file.readAll(&hdr_buf); 1106 + const n = try file.readPositionalAll(io, &hdr_buf, pos); 1105 1107 if (n < header_size) break; 1106 1108 1107 1109 const hdr = EvtHeader.decode(&hdr_buf); 1108 1110 if (hdr.seq > target) { 1109 - // seek back to start of this header 1110 - try file.seekTo(pos); 1111 - return; 1111 + return pos; 1112 1112 } 1113 1113 pos += header_size + hdr.len; 1114 - try file.seekTo(pos); 1115 1114 } 1115 + return pos; 1116 1116 } 1117 1117 1118 1118 /// scan a file for the last sequence number 1119 - fn scanForLastSeq(file: std.fs.File) !?u64 { 1120 - try file.seekTo(0); 1121 - const file_size = (try file.stat()).size; 1119 + fn scanForLastSeq(file: Io.File, io: Io) !?u64 { 1120 + const file_size = (try file.stat(io)).size; 1122 1121 1123 1122 var last_seq: ?u64 = null; 1124 1123 var pos: u64 = 0; 1125 1124 while (pos + header_size <= file_size) { 1126 1125 var hdr_buf: [header_size]u8 = undefined; 1127 - try file.seekTo(pos); 1128 - const n = try file.readAll(&hdr_buf); 1126 + const n = try file.readPositionalAll(io, &hdr_buf, pos); 1129 1127 if (n < header_size) break; 1130 1128 1131 1129 const hdr = EvtHeader.decode(&hdr_buf); ··· 1136 1134 } 1137 1135 1138 1136 /// set takedown flag + zero payload for all events belonging to uid 1139 - fn mutateUserEventsInFile(file: std.fs.File, uid: u64) !void { 1140 - const file_size = (try file.stat()).size; 1137 + fn mutateUserEventsInFile(file: Io.File, io: Io, uid: u64) !void { 1138 + const file_size = (try file.stat(io)).size; 1141 1139 var pos: u64 = 0; 1142 1140 1143 1141 while (pos + header_size <= file_size) { 1144 1142 var hdr_buf: [header_size]u8 = undefined; 1145 - try file.seekTo(pos); 1146 - const n = try file.readAll(&hdr_buf); 1143 + const n = try file.readPositionalAll(io, &hdr_buf, pos); 1147 1144 if (n < header_size) break; 1148 1145 1149 1146 const hdr = EvtHeader.decode(&hdr_buf); ··· 1153 1150 const new_flags = hdr.flags | EvtFlags.takedown; 1154 1151 var flags_buf: [4]u8 = undefined; 1155 1152 std.mem.writeInt(u32, &flags_buf, new_flags, .little); 1156 - try file.seekTo(pos); 1157 - try file.writeAll(&flags_buf); 1153 + try file.writePositionalAll(io, &flags_buf, pos); 1158 1154 1159 1155 // zero the payload 1160 1156 const payload_start = pos + header_size; 1161 - try file.seekTo(payload_start); 1162 1157 var zeros: [4096]u8 = [_]u8{0} ** 4096; 1163 1158 var remaining: u64 = hdr.len; 1159 + var write_pos: u64 = payload_start; 1164 1160 while (remaining > 0) { 1165 - const chunk = @min(remaining, zeros.len); 1166 - try file.writeAll(zeros[0..chunk]); 1161 + const chunk: usize = @min(remaining, zeros.len); 1162 + try file.writePositionalAll(io, zeros[0..chunk], write_pos); 1163 + write_pos += chunk; 1167 1164 remaining -= chunk; 1168 1165 } 1169 1166 } ··· 1201 1198 try std.testing.expectEqual(@as(u8, 0x01), buf[9]); 1202 1199 } 1203 1200 1201 + fn getenv(key: [*:0]const u8) ?[]const u8 { 1202 + const ptr = std.c.getenv(key) orelse return null; 1203 + return std.mem.sliceTo(ptr, 0); 1204 + } 1205 + 1204 1206 fn requireDatabaseUrl() ![]const u8 { 1205 - return std.posix.getenv("DATABASE_URL") orelse return error.SkipZigTest; 1207 + return getenv("DATABASE_URL") orelse return error.SkipZigTest; 1206 1208 } 1207 1209 1208 1210 test "persist and playback" { ··· 1214 1216 const dir_path = try tmpDirRealPath(std.testing.allocator, tmp); 1215 1217 defer std.testing.allocator.free(dir_path); 1216 1218 1217 - var dp = try DiskPersist.init(std.testing.allocator, dir_path, database_url); 1219 + var dp = try DiskPersist.init(std.testing.allocator, dir_path, database_url, std.testing.io); 1218 1220 defer dp.deinit(); 1219 1221 1220 1222 // persist some events (sync flush, no background thread) ··· 1228 1230 1229 1231 // flush manually 1230 1232 { 1231 - dp.mutex.lock(); 1232 - defer dp.mutex.unlock(); 1233 + dp.mutex.lockUncancelable(dp.io); 1234 + defer dp.mutex.unlock(dp.io); 1233 1235 try dp.flushLocked(); 1234 1236 } 1235 1237 1236 1238 // playback from cursor=0 → all events 1237 - var entries: std.ArrayListUnmanaged(PlaybackEntry) = .{}; 1239 + var entries: std.ArrayListUnmanaged(PlaybackEntry) = .empty; 1238 1240 defer { 1239 1241 for (entries.items) |e| std.testing.allocator.free(e.data); 1240 1242 entries.deinit(std.testing.allocator); ··· 1257 1259 const dir_path = try tmpDirRealPath(std.testing.allocator, tmp); 1258 1260 defer std.testing.allocator.free(dir_path); 1259 1261 1260 - var dp = try DiskPersist.init(std.testing.allocator, dir_path, database_url); 1262 + var dp = try DiskPersist.init(std.testing.allocator, dir_path, database_url, std.testing.io); 1261 1263 defer dp.deinit(); 1262 1264 1263 1265 _ = try dp.persist(.commit, 1, "a"); 1264 1266 _ = try dp.persist(.commit, 2, "b"); 1265 1267 _ = try dp.persist(.commit, 3, "c"); 1266 1268 { 1267 - dp.mutex.lock(); 1268 - defer dp.mutex.unlock(); 1269 + dp.mutex.lockUncancelable(dp.io); 1270 + defer dp.mutex.unlock(dp.io); 1269 1271 try dp.flushLocked(); 1270 1272 } 1271 1273 1272 1274 // playback from cursor=2 → only seq 3 1273 - var entries: std.ArrayListUnmanaged(PlaybackEntry) = .{}; 1275 + var entries: std.ArrayListUnmanaged(PlaybackEntry) = .empty; 1274 1276 defer { 1275 1277 for (entries.items) |e| std.testing.allocator.free(e.data); 1276 1278 entries.deinit(std.testing.allocator); ··· 1293 1295 1294 1296 // write some events 1295 1297 { 1296 - var dp = try DiskPersist.init(std.testing.allocator, dir_path, database_url); 1298 + var dp = try DiskPersist.init(std.testing.allocator, dir_path, database_url, std.testing.io); 1297 1299 defer dp.deinit(); 1298 1300 _ = try dp.persist(.commit, 1, "x"); 1299 1301 _ = try dp.persist(.commit, 2, "y"); 1300 1302 _ = try dp.persist(.account, 3, "z"); 1301 - dp.mutex.lock(); 1302 - defer dp.mutex.unlock(); 1303 + dp.mutex.lockUncancelable(dp.io); 1304 + defer dp.mutex.unlock(dp.io); 1303 1305 try dp.flushLocked(); 1304 1306 } 1305 1307 1306 1308 // reinit — should recover seq 1307 1309 { 1308 - var dp = try DiskPersist.init(std.testing.allocator, dir_path, database_url); 1310 + var dp = try DiskPersist.init(std.testing.allocator, dir_path, database_url, std.testing.io); 1309 1311 defer dp.deinit(); 1310 1312 try std.testing.expectEqual(@as(u64, 3), dp.lastSeq().?); 1311 1313 const seq4 = try dp.persist(.commit, 1, "w"); ··· 1322 1324 const dir_path = try tmpDirRealPath(std.testing.allocator, tmp); 1323 1325 defer std.testing.allocator.free(dir_path); 1324 1326 1325 - var dp = try DiskPersist.init(std.testing.allocator, dir_path, database_url); 1327 + var dp = try DiskPersist.init(std.testing.allocator, dir_path, database_url, std.testing.io); 1326 1328 defer dp.deinit(); 1327 1329 1328 1330 _ = try dp.persist(.commit, 42, "secret-data"); 1329 1331 _ = try dp.persist(.commit, 99, "other-data"); 1330 1332 { 1331 - dp.mutex.lock(); 1332 - defer dp.mutex.unlock(); 1333 + dp.mutex.lockUncancelable(dp.io); 1334 + defer dp.mutex.unlock(dp.io); 1333 1335 try dp.flushLocked(); 1334 1336 } 1335 1337 ··· 1337 1339 try dp.takeDownUser(42); 1338 1340 1339 1341 // playback should skip user 42's events 1340 - var entries: std.ArrayListUnmanaged(PlaybackEntry) = .{}; 1342 + var entries: std.ArrayListUnmanaged(PlaybackEntry) = .empty; 1341 1343 defer { 1342 1344 for (entries.items) |e| std.testing.allocator.free(e.data); 1343 1345 entries.deinit(std.testing.allocator); ··· 1357 1359 const dir_path = try tmpDirRealPath(std.testing.allocator, tmp); 1358 1360 defer std.testing.allocator.free(dir_path); 1359 1361 1360 - var dp = try DiskPersist.init(std.testing.allocator, dir_path, database_url); 1362 + var dp = try DiskPersist.init(std.testing.allocator, dir_path, database_url, std.testing.io); 1361 1363 defer dp.deinit(); 1362 1364 1363 1365 // first call creates the account ··· 1385 1387 1386 1388 var uid1: u64 = undefined; 1387 1389 { 1388 - var dp = try DiskPersist.init(std.testing.allocator, dir_path, database_url); 1390 + var dp = try DiskPersist.init(std.testing.allocator, dir_path, database_url, std.testing.io); 1389 1391 defer dp.deinit(); 1390 1392 uid1 = try dp.uidForDid("did:plc:carol"); 1391 1393 } 1392 1394 1393 1395 // reinit — UID should be the same from database 1394 1396 { 1395 - var dp = try DiskPersist.init(std.testing.allocator, dir_path, database_url); 1397 + var dp = try DiskPersist.init(std.testing.allocator, dir_path, database_url, std.testing.io); 1396 1398 defer dp.deinit(); 1397 1399 const uid1_again = try dp.uidForDid("did:plc:carol"); 1398 1400 try std.testing.expectEqual(uid1, uid1_again); ··· 1408 1410 const dir_path = try tmpDirRealPath(std.testing.allocator, tmp); 1409 1411 defer std.testing.allocator.free(dir_path); 1410 1412 1411 - var dp = try DiskPersist.init(std.testing.allocator, dir_path, database_url); 1413 + var dp = try DiskPersist.init(std.testing.allocator, dir_path, database_url, std.testing.io); 1412 1414 defer dp.deinit(); 1413 1415 1414 1416 const alice_uid = try dp.uidForDid("did:plc:alice"); ··· 1418 1420 _ = try dp.persist(.commit, bob_uid, "bob-post"); 1419 1421 _ = try dp.persist(.commit, alice_uid, "alice-post-2"); 1420 1422 { 1421 - dp.mutex.lock(); 1422 - defer dp.mutex.unlock(); 1423 + dp.mutex.lockUncancelable(dp.io); 1424 + defer dp.mutex.unlock(dp.io); 1423 1425 try dp.flushLocked(); 1424 1426 } 1425 1427 ··· 1427 1429 try dp.takeDownUser(alice_uid); 1428 1430 1429 1431 // playback should only have bob's event 1430 - var entries: std.ArrayListUnmanaged(PlaybackEntry) = .{}; 1432 + var entries: std.ArrayListUnmanaged(PlaybackEntry) = .empty; 1431 1433 defer { 1432 1434 for (entries.items) |e| std.testing.allocator.free(e.data); 1433 1435 entries.deinit(std.testing.allocator); ··· 1440 1442 } 1441 1443 1442 1444 fn tmpDirRealPath(allocator: Allocator, tmp: std.testing.TmpDir) ![]const u8 { 1443 - var buf: [std.fs.max_path_bytes]u8 = undefined; 1444 - const real = try tmp.dir.realpath(".", &buf); 1445 - return try allocator.dupe(u8, real); 1445 + var buf: [Io.Dir.max_path_bytes]u8 = undefined; 1446 + const n = try tmp.dir.realPath(std.testing.io, &buf); 1447 + return try allocator.dupe(u8, buf[0..n]); 1446 1448 }
+11 -5
src/frame_worker.zig
··· 8 8 //! CBOR decode is ~1-2μs, far cheaper than serializing decoded values across threads. 9 9 10 10 const std = @import("std"); 11 + const Io = std.Io; 11 12 const zat = @import("zat"); 12 13 const broadcaster = @import("broadcaster.zig"); 13 14 const validator_mod = @import("validator.zig"); ··· 19 20 const Allocator = std.mem.Allocator; 20 21 const log = std.log.scoped(.relay); 21 22 23 + fn microTimestamp(io: Io) i64 { 24 + return @intCast(@divFloor(Io.Timestamp.now(io, .real).nanoseconds, std.time.ns_per_us)); 25 + } 26 + 22 27 pub const FrameWork = struct { 23 28 data: []u8, // raw frame bytes (heap-duped by reader, freed by worker) 24 29 host_id: u64, 25 30 hostname: []const u8, // borrowed from subscriber (stable lifetime) 26 31 allocator: Allocator, 32 + io: Io, 27 33 // shared references (all thread-safe, all outlive the work item) 28 34 bc: *broadcaster.Broadcaster, 29 35 validator: *validator_mod.Validator, ··· 92 98 if (result.is_new) _ = work.bc.stats.host_authority_is_new.fetchAdd(1, .monotonic); 93 99 if (result.host_changed) _ = work.bc.stats.host_authority_host_changed.fetchAdd(1, .monotonic); 94 100 _ = work.bc.stats.host_authority_checks.fetchAdd(1, .monotonic); 95 - const ha_t0 = std.time.microTimestamp(); 101 + const ha_t0 = microTimestamp(work.io); 96 102 defer { 97 - const elapsed: u64 = @intCast(@max(0, std.time.microTimestamp() - ha_t0)); 103 + const elapsed: u64 = @intCast(@max(0, microTimestamp(work.io) - ha_t0)); 98 104 _ = work.bc.stats.host_authority_time_us.fetchAdd(elapsed, .monotonic); 99 105 } 100 106 switch (work.validator.resolveHostAuthority(d, work.host_id)) { ··· 177 183 // future-rev rejection: drop commits with timestamps too far ahead 178 184 if (zat.Tid.parse(incoming_rev)) |tid| { 179 185 const rev_us: i64 = @intCast(tid.timestamp()); 180 - const now_us = std.time.microTimestamp(); 186 + const now_us = microTimestamp(work.io); 181 187 const skew_us: i64 = work.validator.config.rev_clock_skew * 1_000_000; 182 188 if (rev_us > now_us + skew_us) { 183 189 log.info("host {s}: dropping future rev uid={d} rev={s}", .{ ··· 270 276 // persist and broadcast under ordering lock 271 277 if (work.persist) |dp| { 272 278 const relay_seq = blk: { 273 - work.bc.broadcast_order.lock(); 274 - defer work.bc.broadcast_order.unlock(); 279 + work.bc.broadcast_order.lockUncancelable(work.io); 280 + defer work.bc.broadcast_order.unlock(work.io); 275 281 276 282 const seq = dp.persist(kind, uid, data) catch |err| { 277 283 log.warn("persist failed: {s}", .{@errorName(err)});
+25 -22
src/lru.zig
··· 4 4 //! thread-safe (internal mutex). keys are duped on insert, freed on eviction. 5 5 6 6 const std = @import("std"); 7 + const Io = std.Io; 7 8 const Allocator = std.mem.Allocator; 8 9 9 10 pub fn LruCache(comptime V: type) type { ··· 17 18 next: ?*Node = null, 18 19 }; 19 20 20 - map: std.StringHashMapUnmanaged(*Node) = .{}, 21 + map: std.StringHashMapUnmanaged(*Node) = .empty, 21 22 head: ?*Node = null, // most recently used 22 23 tail: ?*Node = null, // least recently used 23 24 capacity: u32, 24 25 len: u32 = 0, 25 26 allocator: Allocator, 26 - mutex: std.Thread.Mutex = .{}, 27 + mutex: Io.Mutex = Io.Mutex.init, 28 + io: Io, 27 29 28 - pub fn init(allocator: Allocator, capacity: u32) Self { 30 + pub fn init(allocator: Allocator, capacity: u32, io: Io) Self { 29 31 return .{ 30 32 .allocator = allocator, 31 33 .capacity = capacity, 34 + .io = io, 32 35 }; 33 36 } 34 37 ··· 44 47 } 45 48 46 49 pub fn get(self: *Self, key: []const u8) ?V { 47 - self.mutex.lock(); 48 - defer self.mutex.unlock(); 50 + self.mutex.lockUncancelable(self.io); 51 + defer self.mutex.unlock(self.io); 49 52 const node = self.map.get(key) orelse return null; 50 53 self.moveToHead(node); 51 54 return node.value; 52 55 } 53 56 54 57 pub fn put(self: *Self, key: []const u8, value: V) Allocator.Error!void { 55 - self.mutex.lock(); 56 - defer self.mutex.unlock(); 58 + self.mutex.lockUncancelable(self.io); 59 + defer self.mutex.unlock(self.io); 57 60 58 61 if (self.map.get(key)) |node| { 59 62 // update existing ··· 88 91 } 89 92 90 93 pub fn remove(self: *Self, key: []const u8) bool { 91 - self.mutex.lock(); 92 - defer self.mutex.unlock(); 94 + self.mutex.lockUncancelable(self.io); 95 + defer self.mutex.unlock(self.io); 93 96 const node = self.map.get(key) orelse return false; 94 97 self.unlink(node); 95 98 // fetchRemove uses the node's owned key for lookup, which is valid ··· 103 106 104 107 /// check if a key exists without promoting it 105 108 pub fn contains(self: *Self, key: []const u8) bool { 106 - self.mutex.lock(); 107 - defer self.mutex.unlock(); 109 + self.mutex.lockUncancelable(self.io); 110 + defer self.mutex.unlock(self.io); 108 111 return self.map.contains(key); 109 112 } 110 113 111 114 /// entry count (non-blocking — returns 0 if lock is contended) 112 115 pub fn count(self: *Self) u32 { 113 116 if (!self.mutex.tryLock()) return 0; 114 - defer self.mutex.unlock(); 117 + defer self.mutex.unlock(self.io); 115 118 return self.len; 116 119 } 117 120 118 121 /// internal hashmap capacity (non-blocking — returns 0 if lock is contended) 119 122 pub fn mapCapacity(self: *Self) u32 { 120 123 if (!self.mutex.tryLock()) return 0; 121 - defer self.mutex.unlock(); 124 + defer self.mutex.unlock(self.io); 122 125 return self.map.capacity(); 123 126 } 124 127 ··· 165 168 const testing = std.testing; 166 169 167 170 test "basic get and put" { 168 - var cache = LruCache(u64).init(testing.allocator, 3); 171 + var cache = LruCache(u64).init(testing.allocator, 3, testing.io); 169 172 defer cache.deinit(); 170 173 171 174 try cache.put("a", 1); ··· 179 182 } 180 183 181 184 test "eviction order" { 182 - var cache = LruCache(u64).init(testing.allocator, 2); 185 + var cache = LruCache(u64).init(testing.allocator, 2, testing.io); 183 186 defer cache.deinit(); 184 187 185 188 try cache.put("a", 1); ··· 194 197 } 195 198 196 199 test "update moves to front" { 197 - var cache = LruCache(u64).init(testing.allocator, 2); 200 + var cache = LruCache(u64).init(testing.allocator, 2, testing.io); 198 201 defer cache.deinit(); 199 202 200 203 try cache.put("a", 1); ··· 213 216 } 214 217 215 218 test "put update existing key" { 216 - var cache = LruCache(u64).init(testing.allocator, 2); 219 + var cache = LruCache(u64).init(testing.allocator, 2, testing.io); 217 220 defer cache.deinit(); 218 221 219 222 try cache.put("a", 1); ··· 228 231 } 229 232 230 233 test "remove" { 231 - var cache = LruCache(u64).init(testing.allocator, 3); 234 + var cache = LruCache(u64).init(testing.allocator, 3, testing.io); 232 235 defer cache.deinit(); 233 236 234 237 try cache.put("a", 1); ··· 241 244 } 242 245 243 246 test "capacity 1" { 244 - var cache = LruCache(u64).init(testing.allocator, 1); 247 + var cache = LruCache(u64).init(testing.allocator, 1, testing.io); 245 248 defer cache.deinit(); 246 249 247 250 try cache.put("a", 1); ··· 254 257 } 255 258 256 259 test "count" { 257 - var cache = LruCache(u64).init(testing.allocator, 10); 260 + var cache = LruCache(u64).init(testing.allocator, 10, testing.io); 258 261 defer cache.deinit(); 259 262 260 263 try testing.expectEqual(@as(u32, 0), cache.count()); ··· 267 270 } 268 271 269 272 test "contains" { 270 - var cache = LruCache(u64).init(testing.allocator, 3); 273 + var cache = LruCache(u64).init(testing.allocator, 3, testing.io); 271 274 defer cache.deinit(); 272 275 273 276 try cache.put("a", 1); ··· 277 280 278 281 test "struct values" { 279 282 const Val = struct { x: u32, y: u32 }; 280 - var cache = LruCache(Val).init(testing.allocator, 2); 283 + var cache = LruCache(Val).init(testing.allocator, 2, testing.io); 281 284 defer cache.deinit(); 282 285 283 286 try cache.put("point1", .{ .x = 1, .y = 2 });
+114 -86
src/main.zig
··· 24 24 //! /_health — liveness probe (DB check) 25 25 26 26 const std = @import("std"); 27 + const Io = std.Io; 27 28 const http = std.http; 28 29 const websocket = @import("websocket"); 29 30 const broadcaster = @import("broadcaster.zig"); ··· 40 41 41 42 const log = std.log.scoped(.relay); 42 43 43 - /// zig's default thread stack is 16 MB. with ~2,750 subscriber threads that's 44 - /// 44 GB of virtual memory. 8 MB supports ReleaseSafe — tls.Client.init alone 45 - /// needs ~134 KiB of stack, and deep call chains under inline-else cipher 46 - /// dispatch need headroom. only touched pages count as RSS. 47 44 pub const default_stack_size = 8 * 1024 * 1024; 48 45 46 + // -- Io backend selection -- 47 + // Evented = Io.Uring (linux) | Io.Dispatch (macOS) | Io.Kqueue (BSD) | void 48 + // Falls back to Io.Threaded on platforms without fiber support. 49 + const Backend = if (Io.Evented != void) Io.Evented else Io.Threaded; 50 + 51 + var backend: Backend = undefined; 52 + var debug_threaded_io: Io.Threaded = undefined; 53 + 54 + /// override single-threaded debug_io default — required for std.debug.print safety 55 + /// when multiple OS threads exist (frame worker pool, websocket server). 56 + pub const std_options_debug_threaded_io: ?*Io.Threaded = &debug_threaded_io; 57 + 49 58 var shutdown_flag: std.atomic.Value(bool) = .{ .raw = false }; 50 59 51 60 /// metrics-only server on the internal port 52 61 const MetricsServer = struct { 53 - server: std.net.Server, 62 + server: Io.net.Server, 63 + io: Io, 54 64 stats: *broadcaster.Stats, 55 65 validator: *validator_mod.Validator, 56 66 data_dir: []const u8, ··· 60 70 61 71 fn run(self: *MetricsServer) void { 62 72 while (!shutdown_flag.load(.acquire)) { 63 - const conn = self.server.accept() catch |err| { 73 + const stream = self.server.accept(self.io) catch |err| { 64 74 if (shutdown_flag.load(.acquire)) return; 65 75 log.debug("metrics accept error: {s}", .{@errorName(err)}); 66 76 continue; 67 77 }; 68 - // 5s read timeout — prevents stale connections from blocking the single-threaded server 69 - const timeout = std.posix.timeval{ .sec = 5, .usec = 0 }; 70 - std.posix.setsockopt(conn.stream.handle, std.posix.SOL.SOCKET, std.posix.SO.RCVTIMEO, std.mem.asBytes(&timeout)) catch {}; 71 - handleMetricsConn(conn.stream, self.stats, self.validator, self.data_dir, self.persist, self.bc, self.slurper); 78 + self.handleMetricsConn(stream); 72 79 } 73 80 } 74 - }; 75 81 76 - fn handleMetricsConn(stream: std.net.Stream, stats: *broadcaster.Stats, validator: *validator_mod.Validator, data_dir: []const u8, persist: *event_log_mod.DiskPersist, bc: *broadcaster.Broadcaster, slurp: *slurper_mod.Slurper) void { 77 - defer stream.close(); 82 + fn handleMetricsConn(self: *MetricsServer, stream: Io.net.Stream) void { 83 + defer stream.close(self.io); 78 84 79 - var recv_buf: [4096]u8 = undefined; 80 - var send_buf: [4096]u8 = undefined; 81 - var connection_reader = stream.reader(&recv_buf); 82 - var connection_writer = stream.writer(&send_buf); 83 - var server = http.Server.init(connection_reader.interface(), &connection_writer.interface); 85 + var recv_buf: [4096]u8 = undefined; 86 + var send_buf: [4096]u8 = undefined; 87 + var connection_reader = stream.reader(self.io, &recv_buf); 88 + var connection_writer = stream.writer(self.io, &send_buf); 89 + var server = http.Server.init(&connection_reader.interface, &connection_writer.interface); 84 90 85 - var request = server.receiveHead() catch return; 86 - const path = request.head.target; 91 + var request = server.receiveHead() catch return; 92 + const path = request.head.target; 87 93 88 - if (std.mem.eql(u8, path, "/_healthz")) { 89 - // trivial liveness — constant-time, no dependencies 90 - request.respond("{\"status\":\"ok\"}", .{ .status = .ok, .keep_alive = false, .extra_headers = &.{ 91 - .{ .name = "content-type", .value = "application/json" }, 92 - .{ .name = "server", .value = "zlay (atproto-relay)" }, 93 - } }) catch {}; 94 - } else if (std.mem.eql(u8, path, "/_health") or std.mem.eql(u8, path, "/_readyz")) { 95 - const db_ok = if (persist.db.exec("SELECT 1", .{})) |_| true else |_| false; 96 - const status: http.Status = if (db_ok) .ok else .internal_server_error; 97 - const body = if (db_ok) "{\"status\":\"ok\"}" else "{\"status\":\"error\",\"msg\":\"database unavailable\"}"; 98 - request.respond(body, .{ .status = status, .keep_alive = false, .extra_headers = &.{ 99 - .{ .name = "content-type", .value = "application/json" }, 100 - .{ .name = "server", .value = "zlay (atproto-relay)" }, 101 - } }) catch {}; 102 - } else if (std.mem.eql(u8, path, "/metrics")) { 103 - const cache_entries = validator.cacheSize(); 104 - const attribution = broadcaster.AttributionMetrics{ 105 - .history_entries = bc.history.count(), 106 - .evtbuf_entries = persist.evtbufLen(), 107 - .did_cache_entries = persist.didCacheLen(), 108 - .resolve_queue_len = validator.resolveQueueLen(), 109 - .resolve_queued_set_count = validator.resolveQueuedSetCount(), 110 - .validator_cache_map_cap = validator.cacheMapCapacity(), 111 - .did_cache_map_cap = persist.didCacheMapCap(), 112 - .queued_set_map_cap = validator.resolveQueuedSetCapacity(), 113 - .evtbuf_cap = persist.evtbufCap(), 114 - .outbuf_cap = persist.outbufCap(), 115 - .workers_count = slurp.workerCount(), 116 - }; 94 + if (std.mem.eql(u8, path, "/_healthz")) { 95 + // trivial liveness — constant-time, no dependencies 96 + request.respond("{\"status\":\"ok\"}", .{ .status = .ok, .keep_alive = false, .extra_headers = &.{ 97 + .{ .name = "content-type", .value = "application/json" }, 98 + .{ .name = "server", .value = "zlay (atproto-relay)" }, 99 + } }) catch {}; 100 + } else if (std.mem.eql(u8, path, "/_health") or std.mem.eql(u8, path, "/_readyz")) { 101 + const db_ok = if (self.persist.db.exec("SELECT 1", .{})) |_| true else |_| false; 102 + const status: http.Status = if (db_ok) .ok else .internal_server_error; 103 + const body = if (db_ok) "{\"status\":\"ok\"}" else "{\"status\":\"error\",\"msg\":\"database unavailable\"}"; 104 + request.respond(body, .{ .status = status, .keep_alive = false, .extra_headers = &.{ 105 + .{ .name = "content-type", .value = "application/json" }, 106 + .{ .name = "server", .value = "zlay (atproto-relay)" }, 107 + } }) catch {}; 108 + } else if (std.mem.eql(u8, path, "/metrics")) { 109 + const cache_entries = self.validator.cacheSize(); 110 + const attribution = broadcaster.AttributionMetrics{ 111 + .history_entries = self.bc.history.count(), 112 + .evtbuf_entries = self.persist.evtbufLen(), 113 + .did_cache_entries = self.persist.didCacheLen(), 114 + .resolve_queue_len = self.validator.resolveQueueLen(), 115 + .resolve_queued_set_count = self.validator.resolveQueuedSetCount(), 116 + .validator_cache_map_cap = self.validator.cacheMapCapacity(), 117 + .did_cache_map_cap = self.persist.didCacheMapCap(), 118 + .queued_set_map_cap = self.validator.resolveQueuedSetCapacity(), 119 + .evtbuf_cap = self.persist.evtbufCap(), 120 + .outbuf_cap = self.persist.outbufCap(), 121 + .workers_count = self.slurper.workerCount(), 122 + }; 117 123 118 - var metrics_buf: [65536]u8 = undefined; 119 - const body = broadcaster.formatPrometheusMetrics(stats, cache_entries, attribution, data_dir, &metrics_buf); 120 - request.respond(body, .{ .status = .ok, .keep_alive = false, .extra_headers = &.{ 121 - .{ .name = "content-type", .value = "text/plain; version=0.0.4; charset=utf-8" }, 122 - .{ .name = "server", .value = "zlay (atproto-relay)" }, 123 - } }) catch {}; 124 - } else { 125 - request.respond("not found", .{ .status = .not_found, .keep_alive = false, .extra_headers = &.{ 126 - .{ .name = "content-type", .value = "text/plain" }, 127 - .{ .name = "server", .value = "zlay (atproto-relay)" }, 128 - } }) catch {}; 124 + var metrics_buf: [65536]u8 = undefined; 125 + const body = broadcaster.formatPrometheusMetrics(self.stats, cache_entries, attribution, self.data_dir, &metrics_buf, self.io); 126 + request.respond(body, .{ .status = .ok, .keep_alive = false, .extra_headers = &.{ 127 + .{ .name = "content-type", .value = "text/plain; version=0.0.4; charset=utf-8" }, 128 + .{ .name = "server", .value = "zlay (atproto-relay)" }, 129 + } }) catch {}; 130 + } else { 131 + request.respond("not found", .{ .status = .not_found, .keep_alive = false, .extra_headers = &.{ 132 + .{ .name = "content-type", .value = "text/plain" }, 133 + .{ .name = "server", .value = "zlay (atproto-relay)" }, 134 + } }) catch {}; 135 + } 129 136 } 130 - } 137 + }; 131 138 132 139 pub fn main() !void { 133 140 // exp-002: optional GPA wrapper for leak detection. 134 141 // build with -Duse_gpa=true to enable. on clean shutdown (SIGTERM), 135 142 // GPA logs every allocation that was never freed, with stack traces. 136 - var gpa: std.heap.GeneralPurposeAllocator(.{ 143 + var gpa: std.heap.DebugAllocator(.{ 137 144 .stack_trace_frames = if (build_options.use_gpa) 8 else 0, 138 145 }) = .init; 139 146 defer if (build_options.use_gpa) { ··· 147 154 }; 148 155 const allocator = if (build_options.use_gpa) gpa.allocator() else std.heap.c_allocator; 149 156 157 + // init debug io (for std.debug.print thread safety) 158 + debug_threaded_io = Io.Threaded.init(allocator, .{}); 159 + 160 + // init primary runtime 161 + if (Backend == Io.Threaded) { 162 + backend = Io.Threaded.init(allocator, .{}); 163 + } else { 164 + try Backend.init(&backend, allocator, .{}); 165 + } 166 + const io = backend.io(); 167 + 168 + log.info("io backend: {s}", .{if (Backend == Io.Threaded) "Threaded" else "Evented"}); 169 + 150 170 // parse config from env 151 171 const port = parseEnvInt(u16, "RELAY_PORT", 3000); 152 172 const metrics_port = parseEnvInt(u16, "RELAY_METRICS_PORT", 3001); 153 - const upstream = normalizeSeedHost(std.posix.getenv("RELAY_UPSTREAM") orelse "bsky.network"); 154 - const data_dir = std.posix.getenv("RELAY_DATA_DIR") orelse "data/events"; 173 + const upstream = normalizeSeedHost(getenv("RELAY_UPSTREAM") orelse "bsky.network"); 174 + const data_dir = getenv("RELAY_DATA_DIR") orelse "data/events"; 155 175 const retention_hours = parseEnvInt(u64, "RELAY_RETENTION_HOURS", 72); 156 176 const max_events_gb = parseEnvInt(u64, "RELAY_MAX_EVENTS_GB", 100); 157 177 const frame_workers = parseEnvInt(u16, "FRAME_WORKERS", 16); ··· 160 180 // install signal handlers (including SIGPIPE ignore) 161 181 installSignalHandlers(); 162 182 163 - // init components 164 - var bc = broadcaster.Broadcaster.init(allocator); 183 + // init components — pass io to network-facing modules 184 + var bc = broadcaster.Broadcaster.init(allocator, io); 165 185 defer bc.deinit(); 166 186 167 - var val = validator_mod.Validator.init(allocator, &bc.stats); 187 + var val = validator_mod.Validator.init(allocator, &bc.stats, io); 168 188 defer val.deinit(); 169 189 try val.start(); 170 190 171 191 // init disk persistence (indigo-compatible diskpersist format + Postgres index) 172 - const database_url = std.posix.getenv("DATABASE_URL") orelse "postgres://relay:relay@localhost:5432/relay"; 173 - var dp = event_log_mod.DiskPersist.init(allocator, data_dir, database_url) catch |err| { 192 + const database_url = getenv("DATABASE_URL") orelse "postgres://relay:relay@localhost:5432/relay"; 193 + var dp = event_log_mod.DiskPersist.init(allocator, data_dir, database_url, io) catch |err| { 174 194 log.err("failed to init disk persist at {s}: {s}", .{ data_dir, @errorName(err) }); 175 195 return err; 176 196 }; ··· 190 210 val.persist = &dp; 191 211 192 212 // init collection index (RocksDB — inspired by lightrail/microcosm.blue) 193 - const ci_dir = std.posix.getenv("COLLECTION_INDEX_DIR") orelse "data/collection-index"; 213 + const ci_dir = getenv("COLLECTION_INDEX_DIR") orelse "data/collection-index"; 194 214 var ci = collection_index_mod.CollectionIndex.open(allocator, ci_dir) catch |err| { 195 215 log.err("failed to init collection index at {s}: {s}", .{ ci_dir, @errorName(err) }); 196 216 return err; ··· 198 218 defer ci.deinit(); 199 219 200 220 // init backfiller (collection index backfill from source relay) 201 - var backfiller = backfill_mod.Backfiller.init(allocator, &ci, dp.db); 221 + var backfiller = backfill_mod.Backfiller.init(allocator, &ci, dp.db, io); 202 222 203 223 // init cleaner (removes stale entries from collection index) 204 - var cleaner = cleaner_mod.Cleaner.init(allocator, &ci, dp.db); 224 + var cleaner = cleaner_mod.Cleaner.init(allocator, io, &ci, dp.db); 205 225 206 226 // init resyncer (updates collection index on #sync events) 207 - var resyncer = resync_mod.Resyncer.init(allocator, &ci); 227 + var resyncer = resync_mod.Resyncer.init(allocator, io, &ci); 208 228 try resyncer.start(); 209 229 defer resyncer.deinit(); 210 230 ··· 221 241 .frame_workers = frame_workers, 222 242 .frame_queue_capacity = frame_queue_capacity, 223 243 }, 244 + io, 224 245 ); 225 246 defer slurper.deinit(); 226 247 slurper.collection_index = &ci; ··· 229 250 // start: loads active hosts from DB, spawns subscriber threads 230 251 try slurper.start(); 231 252 232 - // start GC thread (runs every 10 minutes) 233 - const gc_thread = try std.Thread.spawn(.{ .stack_size = default_stack_size }, gcLoop, .{&dp}); 253 + // start GC loop (runs as background thread — does disk I/O + malloc_trim) 254 + const gc_thread = try std.Thread.spawn(.{}, gcLoop, .{ &dp, io }); 234 255 235 256 // wire HTTP fallback into broadcaster (all API endpoints served on WS port) 236 257 var http_context = api.HttpContext{ ··· 248 269 bc.http_fallback_ctx = @ptrCast(&http_context); 249 270 250 271 // start metrics-only server (internal port) 251 - const metrics_address = std.net.Address.initIp4(.{ 0, 0, 0, 0 }, metrics_port); 272 + const metrics_address = Io.net.Ip4Address.unspecified(metrics_port); 252 273 var metrics_srv = MetricsServer{ 253 - .server = metrics_address.listen(.{ .reuse_address = true }) catch |err| { 274 + .server = (Io.net.IpAddress{ .ip4 = metrics_address }).listen(io, .{ .reuse_address = true }) catch |err| { 254 275 log.err("metrics server failed to listen on :{d}: {s}", .{ metrics_port, @errorName(err) }); 255 276 return err; 256 277 }, 278 + .io = io, 257 279 .stats = &bc.stats, 258 280 .validator = &val, 259 281 .data_dir = data_dir, ··· 261 283 .bc = &bc, 262 284 .slurper = &slurper, 263 285 }; 264 - const metrics_thread = try std.Thread.spawn(.{ .stack_size = default_stack_size }, MetricsServer.run, .{&metrics_srv}); 286 + const metrics_thread = try std.Thread.spawn(.{}, MetricsServer.run, .{&metrics_srv}); 265 287 266 288 // start downstream WebSocket server (also serves HTTP API via httpFallback) 267 289 log.info("relay listening on :{d} (ws+http), :{d} (metrics)", .{ port, metrics_port }); ··· 280 302 281 303 // wait for shutdown signal 282 304 while (!shutdown_flag.load(.acquire)) { 283 - std.posix.nanosleep(0, 100 * std.time.ns_per_ms); 305 + io.sleep(Io.Duration.fromMilliseconds(100), .awake) catch break; 284 306 } 285 307 286 308 log.info("shutdown signal received, stopping...", .{}); ··· 292 314 // wait for GC thread 293 315 gc_thread.join(); 294 316 295 - // close metrics listener socket to unblock accept(), then join 296 - metrics_srv.server.stream.close(); 317 + // close metrics listener to unblock accept(), then join 318 + metrics_srv.server.deinit(io); 297 319 metrics_thread.join(); 298 320 299 321 log.info("relay stopped cleanly", .{}); ··· 301 323 302 324 const builtin = @import("builtin"); 303 325 304 - fn gcLoop(dp: *event_log_mod.DiskPersist) void { 326 + fn gcLoop(dp: *event_log_mod.DiskPersist, io: Io) void { 305 327 const gc_interval: u64 = 10 * 60; // 10 minutes in seconds 306 328 while (!shutdown_flag.load(.acquire)) { 307 329 // sleep in small increments to check shutdown 308 330 var remaining: u64 = gc_interval; 309 331 while (remaining > 0 and !shutdown_flag.load(.acquire)) { 310 332 const chunk = @min(remaining, 1); 311 - std.posix.nanosleep(chunk, 0); 333 + io.sleep(Io.Duration.fromSeconds(@intCast(chunk)), .awake) catch return; 312 334 remaining -= chunk; 313 335 } 314 336 if (shutdown_flag.load(.acquire)) return; ··· 325 347 } 326 348 } 327 349 328 - fn signalHandler(_: c_int) callconv(.c) void { 350 + fn signalHandler(_: std.posix.SIG) callconv(.c) void { 329 351 shutdown_flag.store(true, .release); 330 352 } 331 353 ··· 353 375 return raw; 354 376 } 355 377 356 - fn parseEnvInt(comptime T: type, key: []const u8, default: T) T { 357 - const val = std.posix.getenv(key) orelse return default; 378 + /// libc getenv — std.posix.getenv removed in 0.16 379 + fn getenv(key: [*:0]const u8) ?[]const u8 { 380 + const ptr = std.c.getenv(key) orelse return null; 381 + return std.mem.sliceTo(ptr, 0); 382 + } 383 + 384 + fn parseEnvInt(comptime T: type, key: [*:0]const u8, default: T) T { 385 + const val = getenv(key) orelse return default; 358 386 return std.fmt.parseInt(T, val, 10) catch default; 359 387 } 360 388
+29 -22
src/resync.zig
··· 8 8 //! modeled on cleaner.zig — bounded queue, single background worker thread. 9 9 10 10 const std = @import("std"); 11 + const Io = std.Io; 11 12 const http = std.http; 12 13 const collection_index_mod = @import("collection_index.zig"); 13 14 ··· 33 34 34 35 pub const Resyncer = struct { 35 36 allocator: Allocator, 37 + io: Io, 36 38 collection_index: *collection_index_mod.CollectionIndex, 37 39 38 40 // bounded ring buffer queue ··· 40 42 head: usize, 41 43 tail: usize, 42 44 len: usize, 43 - mutex: std.Thread.Mutex, 44 - cond: std.Thread.Condition, 45 + mutex: Io.Mutex, 46 + cond: Io.Condition, 45 47 46 48 running: std.atomic.Value(bool), 47 49 thread: ?std.Thread, ··· 53 55 54 56 pub fn init( 55 57 allocator: Allocator, 58 + io: Io, 56 59 collection_index: *collection_index_mod.CollectionIndex, 57 60 ) Resyncer { 58 61 return .{ 59 62 .allocator = allocator, 63 + .io = io, 60 64 .collection_index = collection_index, 61 65 .queue = undefined, 62 66 .head = 0, 63 67 .tail = 0, 64 68 .len = 0, 65 - .mutex = .{}, 66 - .cond = .{}, 69 + .mutex = Io.Mutex.init, 70 + .cond = Io.Condition.init, 67 71 .running = .{ .raw = false }, 68 72 .thread = null, 69 73 .processed = .{ .raw = 0 }, ··· 76 80 pub fn start(self: *Resyncer) !void { 77 81 self.running.store(true, .release); 78 82 self.thread = try std.Thread.spawn( 79 - .{ .stack_size = @import("main.zig").default_stack_size }, 83 + .{}, 80 84 run, 81 85 .{self}, 82 86 ); ··· 86 90 pub fn enqueue(self: *Resyncer, did: []const u8, hostname: []const u8) void { 87 91 if (did.len == 0 or did.len > 128 or hostname.len == 0 or hostname.len > 256) return; 88 92 89 - self.mutex.lock(); 90 - defer self.mutex.unlock(); 93 + self.mutex.lockUncancelable(self.io); 94 + defer self.mutex.unlock(self.io); 91 95 92 96 if (self.len >= queue_capacity) { 93 97 _ = self.dropped.fetchAdd(1, .monotonic); ··· 106 110 self.queue[self.tail] = item; 107 111 self.tail = (self.tail + 1) % queue_capacity; 108 112 self.len += 1; 109 - self.cond.signal(); 113 + self.cond.signal(self.io); 110 114 } 111 115 112 116 /// dequeue one item. blocks until available or shutdown. 113 117 fn dequeue(self: *Resyncer) ?ResyncItem { 114 - self.mutex.lock(); 115 - defer self.mutex.unlock(); 118 + self.mutex.lockUncancelable(self.io); 119 + defer self.mutex.unlock(self.io); 116 120 117 121 while (self.len == 0 and self.running.load(.acquire)) { 118 - self.cond.timedWait(&self.mutex, 1 * std.time.ns_per_s) catch {}; 122 + self.cond.waitUncancelable(self.io, &self.mutex); 119 123 } 120 124 121 125 if (self.len == 0) return null; ··· 129 133 fn run(self: *Resyncer) void { 130 134 log.info("resync worker started", .{}); 131 135 132 - var client: http.Client = .{ .allocator = self.allocator }; 136 + var client: http.Client = .{ .allocator = self.allocator, .io = self.io }; 133 137 defer client.deinit(); 134 138 135 139 while (self.running.load(.acquire)) { ··· 137 141 self.processItem(&client, &item); 138 142 139 143 // brief pause between items 140 - std.posix.nanosleep(0, 50 * std.time.ns_per_ms); 144 + self.io.sleep(Io.Duration.fromMilliseconds(50), .awake) catch {}; 141 145 } 142 146 143 147 log.info("resync worker stopped (processed={d}, failed={d}, dropped={d})", .{ ··· 216 220 } 217 221 218 222 pub fn queueDepth(self: *Resyncer) usize { 219 - self.mutex.lock(); 220 - defer self.mutex.unlock(); 223 + self.mutex.lockUncancelable(self.io); 224 + defer self.mutex.unlock(self.io); 221 225 return self.len; 222 226 } 223 227 224 228 pub fn stop(self: *Resyncer) void { 225 229 self.running.store(false, .release); 226 - self.cond.signal(); 230 + self.cond.signal(self.io); 227 231 } 228 232 229 233 pub fn deinit(self: *Resyncer) void { ··· 243 247 // just test the queue mechanics, not the HTTP/RocksDB parts 244 248 var r: Resyncer = .{ 245 249 .allocator = std.testing.allocator, 250 + .io = std.testing.io, 246 251 .collection_index = undefined, // not used in this test 247 252 .queue = undefined, 248 253 .head = 0, 249 254 .tail = 0, 250 255 .len = 0, 251 - .mutex = .{}, 252 - .cond = .{}, 256 + .mutex = Io.Mutex.init, 257 + .cond = Io.Condition.init, 253 258 .running = .{ .raw = true }, 254 259 .thread = null, 255 260 .processed = .{ .raw = 0 }, ··· 270 275 test "Resyncer drops when full" { 271 276 var r: Resyncer = .{ 272 277 .allocator = std.testing.allocator, 278 + .io = std.testing.io, 273 279 .collection_index = undefined, 274 280 .queue = undefined, 275 281 .head = 0, 276 282 .tail = 0, 277 283 .len = queue_capacity, // pretend full 278 - .mutex = .{}, 279 - .cond = .{}, 284 + .mutex = Io.Mutex.init, 285 + .cond = Io.Condition.init, 280 286 .running = .{ .raw = true }, 281 287 .thread = null, 282 288 .processed = .{ .raw = 0 }, ··· 291 297 test "Resyncer rejects oversized inputs" { 292 298 var r: Resyncer = .{ 293 299 .allocator = std.testing.allocator, 300 + .io = std.testing.io, 294 301 .collection_index = undefined, 295 302 .queue = undefined, 296 303 .head = 0, 297 304 .tail = 0, 298 305 .len = 0, 299 - .mutex = .{}, 300 - .cond = .{}, 306 + .mutex = Io.Mutex.init, 307 + .cond = Io.Condition.init, 301 308 .running = .{ .raw = true }, 302 309 .thread = null, 303 310 .processed = .{ .raw = 0 },
+25 -23
src/ring_buffer.zig
··· 4 4 //! and global frame history (cursor replay). 5 5 6 6 const std = @import("std"); 7 + const Io = std.Io; 7 8 const Allocator = std.mem.Allocator; 8 9 9 10 pub const Frame = struct { ··· 23 24 read_pos: usize = 0, // next read position (for pop) 24 25 len: usize = 0, 25 26 allocator: Allocator, 26 - mutex: std.Thread.Mutex = .{}, 27 + mutex: Io.Mutex = Io.Mutex.init, 28 + io: Io, 27 29 28 30 const Self = @This(); 29 31 30 - pub fn init(allocator: Allocator) Self { 31 - return .{ .allocator = allocator }; 32 + pub fn init(allocator: Allocator, io: Io) Self { 33 + return .{ .allocator = allocator, .io = io }; 32 34 } 33 35 34 36 pub fn deinit(self: *Self) void { ··· 46 48 47 49 /// push a frame. if full, overwrites oldest. returns false if alloc failed. 48 50 pub fn push(self: *Self, seq: u64, data: []const u8) bool { 49 - self.mutex.lock(); 50 - defer self.mutex.unlock(); 51 + self.mutex.lockUncancelable(self.io); 52 + defer self.mutex.unlock(self.io); 51 53 return self.pushUnlocked(seq, data); 52 54 } 53 55 ··· 73 75 74 76 /// pop the oldest frame. caller owns the returned data. 75 77 pub fn pop(self: *Self) ?Frame { 76 - self.mutex.lock(); 77 - defer self.mutex.unlock(); 78 + self.mutex.lockUncancelable(self.io); 79 + defer self.mutex.unlock(self.io); 78 80 return self.popUnlocked(); 79 81 } 80 82 ··· 90 92 /// number of frames currently buffered (non-blocking — returns 0 if lock is contended) 91 93 pub fn count(self: *Self) usize { 92 94 if (!self.mutex.tryLock()) return 0; 93 - defer self.mutex.unlock(); 95 + defer self.mutex.unlock(self.io); 94 96 return self.len; 95 97 } 96 98 97 99 /// check if buffer is full 98 100 pub fn isFull(self: *Self) bool { 99 - self.mutex.lock(); 100 - defer self.mutex.unlock(); 101 + self.mutex.lockUncancelable(self.io); 102 + defer self.mutex.unlock(self.io); 101 103 return self.len == capacity; 102 104 } 103 105 104 106 /// get all frames with seq > cursor, ordered by seq. 105 107 /// caller owns the returned slice AND frame data. 106 108 pub fn framesSince(self: *Self, allocator: Allocator, cursor: u64) ![]const Frame { 107 - self.mutex.lock(); 108 - defer self.mutex.unlock(); 109 + self.mutex.lockUncancelable(self.io); 110 + defer self.mutex.unlock(self.io); 109 111 110 - var result: std.ArrayList(Frame) = .{}; 112 + var result: std.ArrayList(Frame) = .empty; 111 113 errdefer { 112 114 for (result.items) |f| allocator.free(f.data); 113 115 result.deinit(allocator); ··· 127 129 128 130 /// oldest seq in the buffer, or null if empty 129 131 pub fn oldestSeq(self: *Self) ?u64 { 130 - self.mutex.lock(); 131 - defer self.mutex.unlock(); 132 + self.mutex.lockUncancelable(self.io); 133 + defer self.mutex.unlock(self.io); 132 134 if (self.len == 0) return null; 133 135 return self.entries[self.read_pos].seq; 134 136 } 135 137 136 138 /// newest seq in the buffer, or null if empty 137 139 pub fn newestSeq(self: *Self) ?u64 { 138 - self.mutex.lock(); 139 - defer self.mutex.unlock(); 140 + self.mutex.lockUncancelable(self.io); 141 + defer self.mutex.unlock(self.io); 140 142 if (self.len == 0) return null; 141 143 const newest_idx = if (self.write_pos == 0) capacity - 1 else self.write_pos - 1; 142 144 return self.entries[newest_idx].seq; ··· 147 149 // === tests === 148 150 149 151 test "push and pop" { 150 - var buf = RingBuffer(4).init(std.testing.allocator); 152 + var buf = RingBuffer(4).init(std.testing.allocator, std.testing.io); 151 153 defer buf.deinit(); 152 154 153 155 try std.testing.expect(buf.push(1, "hello")); ··· 167 169 } 168 170 169 171 test "overwrite when full" { 170 - var buf = RingBuffer(3).init(std.testing.allocator); 172 + var buf = RingBuffer(3).init(std.testing.allocator, std.testing.io); 171 173 defer buf.deinit(); 172 174 173 175 try std.testing.expect(buf.push(1, "a")); ··· 185 187 } 186 188 187 189 test "framesSince" { 188 - var buf = RingBuffer(8).init(std.testing.allocator); 190 + var buf = RingBuffer(8).init(std.testing.allocator, std.testing.io); 189 191 defer buf.deinit(); 190 192 191 193 for (1..6) |i| { ··· 204 206 } 205 207 206 208 test "oldestSeq and newestSeq" { 207 - var buf = RingBuffer(4).init(std.testing.allocator); 209 + var buf = RingBuffer(4).init(std.testing.allocator, std.testing.io); 208 210 defer buf.deinit(); 209 211 210 212 try std.testing.expect(buf.oldestSeq() == null); ··· 219 221 } 220 222 221 223 test "empty buffer operations" { 222 - var buf = RingBuffer(4).init(std.testing.allocator); 224 + var buf = RingBuffer(4).init(std.testing.allocator, std.testing.io); 223 225 defer buf.deinit(); 224 226 225 227 try std.testing.expectEqual(@as(usize, 0), buf.count()); ··· 232 234 } 233 235 234 236 test "wrap-around with pop and push" { 235 - var buf = RingBuffer(3).init(std.testing.allocator); 237 + var buf = RingBuffer(3).init(std.testing.allocator, std.testing.io); 236 238 defer buf.deinit(); 237 239 238 240 // fill
+75 -52
src/slurper.zig
··· 11 11 //! thread-safe for N concurrent producers, so this just orchestrates. 12 12 13 13 const std = @import("std"); 14 + const Io = std.Io; 14 15 const http = std.http; 15 16 const broadcaster = @import("broadcaster.zig"); 16 17 const validator_mod = @import("validator.zig"); ··· 111 112 /// SSRF protection: resolve hostname and reject private/reserved IP ranges. 112 113 /// Go relay: ssrf.go PublicOnlyTransport — rejects 10/8, 172.16/12, 192.168/16, 127/8, link-local. 113 114 fn rejectPrivateHost(allocator: Allocator, hostname: []const u8) HostValidationError!void { 114 - const addr_list = std.net.getAddressList(allocator, hostname, 443) catch return error.HostUnreachable; 115 - defer addr_list.deinit(); 115 + // null-terminate hostname for getaddrinfo 116 + const hostname_z = allocator.dupeZ(u8, hostname) catch return error.HostUnreachable; 117 + defer allocator.free(hostname_z); 118 + 119 + var hints: std.c.addrinfo = .{ 120 + .flags = .{}, 121 + .family = std.c.AF.UNSPEC, 122 + .socktype = std.c.SOCK.STREAM, 123 + .protocol = 0, 124 + .addrlen = 0, 125 + .addr = null, 126 + .canonname = null, 127 + .next = null, 128 + }; 116 129 117 - if (addr_list.addrs.len == 0) return error.HostUnreachable; 130 + var res: ?*std.c.addrinfo = null; 131 + const rc = std.c.getaddrinfo(hostname_z, "443", &hints, &res); 132 + if (@intFromEnum(rc) != 0 or res == null) return error.HostUnreachable; 133 + defer std.c.freeaddrinfo(res.?); 118 134 119 135 // check all resolved addresses — reject if ANY is private 120 - for (addr_list.addrs) |addr| { 121 - switch (addr.any.family) { 122 - std.posix.AF.INET => { 123 - const ip4 = addr.in.sa.addr; 124 - const bytes: [4]u8 = @bitCast(ip4); 125 - if (bytes[0] == 10 or // 10.0.0.0/8 126 - (bytes[0] == 172 and (bytes[1] & 0xf0) == 16) or // 172.16.0.0/12 127 - (bytes[0] == 192 and bytes[1] == 168) or // 192.168.0.0/16 128 - bytes[0] == 127 or // 127.0.0.0/8 129 - bytes[0] == 0 or // 0.0.0.0/8 130 - (bytes[0] == 169 and bytes[1] == 254)) // 169.254.0.0/16 link-local 131 - { 132 - log.warn("SSRF: {s} resolves to private IP {d}.{d}.{d}.{d}", .{ hostname, bytes[0], bytes[1], bytes[2], bytes[3] }); 133 - return error.HostUnreachable; 134 - } 135 - }, 136 - else => {}, // allow IPv6 for now (could add RFC 4193 check later) 136 + var cur = res; 137 + var found_any = false; 138 + while (cur) |node| : (cur = node.next) { 139 + found_any = true; 140 + if (node.family == std.c.AF.INET) { 141 + const sa: *const std.c.sockaddr.in = @ptrCast(@alignCast(node.addr.?)); 142 + const bytes: [4]u8 = @bitCast(sa.addr); 143 + if (bytes[0] == 10 or // 10.0.0.0/8 144 + (bytes[0] == 172 and (bytes[1] & 0xf0) == 16) or // 172.16.0.0/12 145 + (bytes[0] == 192 and bytes[1] == 168) or // 192.168.0.0/16 146 + bytes[0] == 127 or // 127.0.0.0/8 147 + bytes[0] == 0 or // 0.0.0.0/8 148 + (bytes[0] == 169 and bytes[1] == 254)) // 169.254.0.0/16 link-local 149 + { 150 + log.warn("SSRF: {s} resolves to private IP {d}.{d}.{d}.{d}", .{ hostname, bytes[0], bytes[1], bytes[2], bytes[3] }); 151 + return error.HostUnreachable; 152 + } 137 153 } 154 + // allow IPv6 for now (could add RFC 4193 check later) 138 155 } 156 + if (!found_any) return error.HostUnreachable; 139 157 } 140 158 141 159 /// check that a host is a real PDS by calling describeServer. 142 160 /// also checks Server header for relay loop detection. 143 161 /// Go relay: host_checker.go CheckHost + slurper.go Server header check. 144 - fn checkHost(allocator: Allocator, hostname: []const u8) HostValidationError!void { 162 + fn checkHost(allocator: Allocator, hostname: []const u8, io: Io) HostValidationError!void { 145 163 // SSRF protection: reject private IPs before making any request 146 164 rejectPrivateHost(allocator, hostname) catch |err| return err; 147 165 var url_buf: [512]u8 = undefined; 148 166 const url = std.fmt.bufPrint(&url_buf, "https://{s}/xrpc/com.atproto.server.describeServer", .{hostname}) catch return error.HostUnreachable; 149 167 150 - var client: http.Client = .{ .allocator = allocator }; 168 + var client: http.Client = .{ .allocator = allocator, .io = io }; 151 169 defer client.deinit(); 152 170 153 171 const uri = std.Uri.parse(url) catch return error.HostUnreachable; ··· 216 234 ca_bundle: ?std.crypto.Certificate.Bundle = null, 217 235 218 236 // active subscriber threads, keyed by host_id 219 - workers: std.AutoHashMapUnmanaged(u64, WorkerEntry) = .{}, 220 - workers_mutex: std.Thread.Mutex = .{}, 237 + workers: std.AutoHashMapUnmanaged(u64, WorkerEntry) = .empty, 238 + workers_mutex: Io.Mutex = Io.Mutex.init, 221 239 222 240 // crawl request queue 223 - crawl_queue: std.ArrayListUnmanaged([]const u8) = .{}, 224 - crawl_mutex: std.Thread.Mutex = .{}, 225 - crawl_cond: std.Thread.Condition = .{}, 241 + crawl_queue: std.ArrayListUnmanaged([]const u8) = .empty, 242 + crawl_mutex: Io.Mutex = Io.Mutex.init, 243 + crawl_cond: Io.Condition = Io.Condition.init, 226 244 227 245 // background threads 228 246 startup_thread: ?std.Thread = null, 229 247 crawl_thread: ?std.Thread = null, 230 248 249 + io: Io, 250 + 231 251 pub fn init( 232 252 allocator: Allocator, 233 253 bc: *broadcaster.Broadcaster, ··· 235 255 persist: *event_log_mod.DiskPersist, 236 256 shutdown: *std.atomic.Value(bool), 237 257 options: Options, 258 + io: Io, 238 259 ) Slurper { 239 260 return .{ 240 261 .allocator = allocator, ··· 243 264 .persist = persist, 244 265 .shutdown = shutdown, 245 266 .options = options, 267 + .io = io, 246 268 }; 247 269 } 248 270 ··· 250 272 /// Go relay: pull-hosts bootstraps from bsky.network's listHosts, then crawls each PDS directly. 251 273 pub fn start(self: *Slurper) !void { 252 274 // load CA bundle once — shared by all subscriber TLS connections 253 - var bundle: std.crypto.Certificate.Bundle = .{}; 254 - try bundle.rescan(self.allocator); 275 + var bundle: std.crypto.Certificate.Bundle = .empty; 276 + try bundle.rescan(self.allocator, self.io, Io.Timestamp.now(self.io, .real)); 255 277 self.ca_bundle = bundle; 256 278 log.info("loaded shared CA bundle", .{}); 257 279 ··· 260 282 .num_workers = self.options.frame_workers, 261 283 .queue_capacity = self.options.frame_queue_capacity, 262 284 .stack_size = @import("main.zig").default_stack_size, 263 - }); 285 + }, self.io); 264 286 log.info("frame pool started: {d} workers, queue capacity {d}", .{ self.options.frame_workers, self.options.frame_queue_capacity }); 265 287 266 288 // spawn worker startup in background so HTTP server + probes come up immediately. ··· 276 298 var total: usize = 0; 277 299 const limit = 500; 278 300 279 - var client: http.Client = .{ .allocator = self.allocator }; 301 + var client: http.Client = .{ .allocator = self.allocator, .io = self.io }; 280 302 defer client.deinit(); 281 303 282 304 while (true) { ··· 366 388 /// add a crawl request (from requestCrawl endpoint) 367 389 pub fn addCrawlRequest(self: *Slurper, hostname: []const u8) !void { 368 390 const duped = try self.allocator.dupe(u8, hostname); 369 - self.crawl_mutex.lock(); 370 - defer self.crawl_mutex.unlock(); 391 + self.crawl_mutex.lockUncancelable(self.io); 392 + defer self.crawl_mutex.unlock(self.io); 371 393 try self.crawl_queue.append(self.allocator, duped); 372 - self.crawl_cond.signal(); 394 + self.crawl_cond.signal(self.io); 373 395 } 374 396 375 397 /// validate and add a host: format check, domain ban, describeServer, then spawn. ··· 401 423 // Go relay: crawl.go CheckIfSubscribed 402 424 const host_info = try self.persist.getOrCreateHost(hostname); 403 425 { 404 - self.workers_mutex.lock(); 405 - defer self.workers_mutex.unlock(); 426 + self.workers_mutex.lockUncancelable(self.io); 427 + defer self.workers_mutex.unlock(self.io); 406 428 if (self.workers.contains(host_info.id)) { 407 429 log.debug("host {s} already has a worker, skipping", .{hostname}); 408 430 return; ··· 411 433 412 434 // step 5: describeServer liveness check 413 435 // Go relay: host_checker.go CheckHost (with SSRF protection) 414 - checkHost(self.allocator, hostname) catch |err| { 436 + checkHost(self.allocator, hostname, self.io) catch |err| { 415 437 log.warn("host {s}: describeServer check failed: {s}", .{ hostname, @errorName(err) }); 416 438 return; 417 439 }; ··· 438 460 439 461 sub.* = subscriber_mod.Subscriber.init( 440 462 self.allocator, 463 + self.io, 441 464 self.bc, 442 465 self.validator, 443 466 self.persist, ··· 456 479 457 480 const thread = try std.Thread.spawn(.{ .stack_size = @import("main.zig").default_stack_size }, runWorker, .{ self, host_id, sub }); 458 481 459 - self.workers_mutex.lock(); 460 - defer self.workers_mutex.unlock(); 482 + self.workers_mutex.lockUncancelable(self.io); 483 + defer self.workers_mutex.unlock(self.io); 461 484 try self.workers.put(self.allocator, host_id, .{ 462 485 .thread = thread, 463 486 .subscriber = sub, ··· 470 493 sub.run(); 471 494 472 495 // subscriber returned — remove from active workers 473 - self.workers_mutex.lock(); 474 - defer self.workers_mutex.unlock(); 496 + self.workers_mutex.lockUncancelable(self.io); 497 + defer self.workers_mutex.unlock(self.io); 475 498 _ = self.workers.remove(host_id); 476 499 _ = self.bc.stats.connected_inbound.fetchSub(1, .monotonic); 477 500 ··· 524 547 while (!self.shutdown.load(.acquire)) { 525 548 var hostname: ?[]const u8 = null; 526 549 { 527 - self.crawl_mutex.lock(); 528 - defer self.crawl_mutex.unlock(); 550 + self.crawl_mutex.lockUncancelable(self.io); 551 + defer self.crawl_mutex.unlock(self.io); 529 552 while (self.crawl_queue.items.len == 0 and !self.shutdown.load(.acquire)) { 530 - self.crawl_cond.timedWait(&self.crawl_mutex, 1 * std.time.ns_per_s) catch {}; 553 + self.crawl_cond.waitUncancelable(self.io, &self.crawl_mutex); 531 554 } 532 555 if (self.crawl_queue.items.len > 0) { 533 556 hostname = self.crawl_queue.orderedRemove(0); ··· 545 568 546 569 /// number of active workers 547 570 pub fn workerCount(self: *Slurper) usize { 548 - self.workers_mutex.lock(); 549 - defer self.workers_mutex.unlock(); 571 + self.workers_mutex.lockUncancelable(self.io); 572 + defer self.workers_mutex.unlock(self.io); 550 573 return self.workers.count(); 551 574 } 552 575 553 576 /// update rate limits for a running subscriber (called from admin API). 554 577 /// if the host has a worker, recomputes and applies new limits immediately. 555 578 pub fn updateHostLimits(self: *Slurper, host_id: u64, account_count: u64) void { 556 - self.workers_mutex.lock(); 557 - defer self.workers_mutex.unlock(); 579 + self.workers_mutex.lockUncancelable(self.io); 580 + defer self.workers_mutex.unlock(self.io); 558 581 if (self.workers.get(host_id)) |entry| { 559 582 const trusted = subscriber_mod.isTrustedHost(entry.subscriber.options.hostname); 560 583 const limits = subscriber_mod.computeLimits(trusted, account_count); ··· 569 592 pub fn deinit(self: *Slurper) void { 570 593 // join background threads 571 594 if (self.startup_thread) |t| t.join(); 572 - self.crawl_cond.signal(); 595 + self.crawl_cond.signal(self.io); 573 596 if (self.crawl_thread) |t| t.join(); 574 597 575 598 // collect threads to join (can't join while holding workers_mutex) 576 - var threads_to_join: std.ArrayListUnmanaged(std.Thread) = .{}; 599 + var threads_to_join: std.ArrayListUnmanaged(std.Thread) = .empty; 577 600 defer threads_to_join.deinit(self.allocator); 578 601 579 602 { 580 - self.workers_mutex.lock(); 581 - defer self.workers_mutex.unlock(); 603 + self.workers_mutex.lockUncancelable(self.io); 604 + defer self.workers_mutex.unlock(self.io); 582 605 var it = self.workers.iterator(); 583 606 while (it.next()) |entry| { 584 607 threads_to_join.append(self.allocator, entry.value_ptr.thread) catch {};
+50 -22
src/subscriber.zig
··· 17 17 const frame_worker_mod = @import("frame_worker.zig"); 18 18 19 19 const Allocator = std.mem.Allocator; 20 + const Io = std.Io; 20 21 const log = std.log.scoped(.relay); 21 22 22 23 const max_consecutive_failures = 15; ··· 67 68 ca_bundle: ?std.crypto.Certificate.Bundle = null, 68 69 }; 69 70 71 + fn timestamp(io: Io) i64 { 72 + return @intCast(@divFloor(Io.Timestamp.now(io, .real).nanoseconds, std.time.ns_per_s)); 73 + } 74 + 75 + fn milliTimestamp(io: Io) i64 { 76 + return Io.Timestamp.now(io, .real).toMilliseconds(); 77 + } 78 + 79 + fn microTimestamp(io: Io) i64 { 80 + return Io.Timestamp.now(io, .real).toMicroseconds(); 81 + } 82 + 83 + fn nanoTimestamp(io: Io) i96 { 84 + return Io.Timestamp.now(io, .real).toNanoseconds(); 85 + } 86 + 70 87 /// simple sliding window rate limiter — tracks event counts per second/hour/day. 71 88 /// Sliding window rate limiter (same algorithm as Go relay's github.com/RussellLuo/slidingwindow). 72 89 /// Uses millisecond timestamps for sub-second precision (critical for the 1-second window). ··· 125 142 /// Block until all rate limit windows allow the event. 126 143 /// Checks every 100ms, matching indigo's waitForLimiter behavior. 127 144 /// Returns which tier (if any) caused a wait, for metrics. 128 - fn waitForAllow(self: *RateLimiter, shutdown: *std.atomic.Value(bool)) Result { 145 + fn waitForAllow(self: *RateLimiter, shutdown: *std.atomic.Value(bool), io: Io) Result { 129 146 // fast path: no waiting needed 130 - const now_ms = std.time.milliTimestamp(); 147 + const now_ms = milliTimestamp(io); 131 148 self.sec.advance(now_ms); 132 149 self.hour.advance(now_ms); 133 150 self.day.advance(now_ms); ··· 145 162 // slow path: poll every 100ms until allowed (creates TCP backpressure) 146 163 var waited: Result = .sec; 147 164 while (!shutdown.load(.acquire)) { 148 - std.posix.nanosleep(0, 100 * std.time.ns_per_ms); 165 + io.sleep(Io.Duration.fromMilliseconds(100), .awake) catch {}; 149 166 150 - const t = std.time.milliTimestamp(); 167 + const t = milliTimestamp(io); 151 168 self.sec.advance(t); 152 169 self.hour.advance(t); 153 170 self.day.advance(t); ··· 183 200 184 201 pub const Subscriber = struct { 185 202 allocator: Allocator, 203 + io: Io, 186 204 options: Options, 187 205 bc: *broadcaster.Broadcaster, 188 206 validator: *validator_mod.Validator, ··· 200 218 201 219 pub fn init( 202 220 allocator: Allocator, 221 + io: Io, 203 222 bc: *broadcaster.Broadcaster, 204 223 val: *validator_mod.Validator, 205 224 persist: ?*event_log_mod.DiskPersist, ··· 210 229 const limits = computeLimits(trusted, options.account_count); 211 230 return .{ 212 231 .allocator = allocator, 232 + .io = io, 213 233 .options = options, 214 234 .bc = bc, 215 235 .validator = val, ··· 273 293 var remaining: u64 = backoff; 274 294 while (remaining > 0 and !self.shouldStop()) { 275 295 const chunk = @min(remaining, 1); 276 - std.posix.nanosleep(chunk, 0); 296 + self.io.sleep(Io.Duration.fromSeconds(@intCast(chunk)), .awake) catch {}; 277 297 remaining -= chunk; 278 298 } 279 299 backoff = @min(backoff * 2, max_backoff); ··· 301 321 } 302 322 const path = w.buffered(); 303 323 304 - var client = try websocket.Client.init(self.allocator, .{ 324 + var client = try websocket.Client.init(self.io, self.allocator, .{ 305 325 .host = self.options.hostname, 306 326 .port = 443, 307 327 .tls = true, ··· 354 374 // sleep in 1s increments so we can check shutdown 355 375 var elapsed: u32 = 0; 356 376 while (elapsed < ping_interval_sec and !self.shouldStop()) { 357 - std.posix.nanosleep(1, 0); 377 + self.io.sleep(Io.Duration.fromSeconds(1), .awake) catch {}; 358 378 elapsed += 1; 359 379 } 360 380 if (self.shouldStop()) return; ··· 379 399 380 400 pub fn serverMessage(self: *FrameHandler, data: []const u8) !void { 381 401 const sub = self.subscriber; 402 + const io = sub.io; 382 403 383 404 // lightweight header decode for cursor tracking + routing 384 405 var arena = std.heap.ArenaAllocator.init(sub.allocator); ··· 430 451 431 452 // time-based cursor flush (Go relay: every 4 seconds) 432 453 { 433 - const now = std.time.timestamp(); 454 + const now = timestamp(io); 434 455 if (now - sub.last_cursor_flush >= cursor_flush_interval_sec) { 435 456 sub.flushCursor(); 436 457 sub.last_cursor_flush = now; ··· 439 460 440 461 // per-host rate limiting — block until window opens (matches indigo's waitForLimiter) 441 462 // blocking here stalls the websocket reader → TCP backpressure → PDS slows down 442 - switch (sub.rate_limiter.waitForAllow(sub.shutdown)) { 463 + switch (sub.rate_limiter.waitForAllow(sub.shutdown, io)) { 443 464 .allowed => {}, 444 465 .sec => { 445 466 _ = sub.bc.stats.rate_limited.fetchAdd(1, .monotonic); ··· 477 498 break :blk if (d) |s| std.hash.Wyhash.hash(0, s) else sub.options.host_id; 478 499 }; 479 500 const duped = sub.allocator.dupe(u8, data) catch return; 480 - const t0 = std.time.nanoTimestamp(); 501 + const t0 = nanoTimestamp(io); 481 502 if (pool.submit(did_key, .{ 482 503 .data = duped, 483 504 .host_id = sub.options.host_id, 484 505 .hostname = sub.options.hostname, 485 506 .allocator = sub.allocator, 507 + .io = sub.io, 486 508 .bc = sub.bc, 487 509 .validator = sub.validator, 488 510 .persist = sub.persist, ··· 492 514 // pool accepted — advance cursor past this frame 493 515 _ = sub.bc.stats.pool_queued_bytes.fetchAdd(duped.len, .monotonic); 494 516 if (upstream_seq) |s| sub.last_upstream_seq = s; 495 - if (std.time.nanoTimestamp() - t0 > 1_000_000) { // >1ms = had to wait 517 + if (nanoTimestamp(io) - t0 > 1_000_000) { // >1ms = had to wait 496 518 _ = sub.bc.stats.pool_backpressure.fetchAdd(1, .monotonic); 497 519 } 498 520 } else { ··· 521 543 is_account: bool, 522 544 is_identity: bool, 523 545 ) void { 546 + const io = sub.io; 547 + 524 548 // extract DID: "repo" for commits, "did" for identity/account 525 549 const did: ?[]const u8 = if (is_commit) 526 550 payload.getString("repo") ··· 615 639 // future-rev rejection 616 640 if (zat.Tid.parse(incoming_rev)) |tid| { 617 641 const rev_us: i64 = @intCast(tid.timestamp()); 618 - const now_us = std.time.microTimestamp(); 642 + const now_us = microTimestamp(io); 619 643 const skew_us: i64 = sub.validator.config.rev_clock_skew * 1_000_000; 620 644 if (rev_us > now_us + skew_us) { 621 645 log.info("host {s}: dropping future rev uid={d} rev={s}", .{ ··· 711 735 // persist (seq assignment) and broadcast, delivering out-of-order. 712 736 if (sub.persist) |dp| { 713 737 const relay_seq = blk: { 714 - sub.bc.broadcast_order.lock(); 715 - defer sub.bc.broadcast_order.unlock(); 738 + sub.bc.broadcast_order.lockUncancelable(sub.io); 739 + defer sub.bc.broadcast_order.unlock(sub.io); 716 740 717 741 const seq = dp.persist(kind, uid, data) catch |err| { 718 742 log.warn("persist failed: {s}", .{@errorName(err)}); ··· 883 907 } 884 908 885 909 test "waitForAllow blocks then allows after window advances" { 910 + const io = std.testing.io; 911 + 886 912 // verify that waitForAllow returns a non-.allowed result when the limit was hit, 887 913 // indicating it had to wait. We use a tiny limit so the fast path is exhausted. 888 914 var rl: RateLimiter = .{ .sec_limit = .{ .raw = 1 }, .hour_limit = .{ .raw = 1000 }, .day_limit = .{ .raw = 10000 } }; 889 915 var shutdown = std.atomic.Value(bool){ .raw = false }; 890 916 891 917 // first call takes the fast path 892 - try std.testing.expectEqual(RateLimiter.Result.allowed, rl.waitForAllow(&shutdown)); 918 + try std.testing.expectEqual(RateLimiter.Result.allowed, rl.waitForAllow(&shutdown, io)); 893 919 894 920 // second call must block (sec limit = 1), then return .sec after the window advances 895 921 // this will sleep ~100ms+ until the sliding window allows it 896 - const before = std.time.milliTimestamp(); 897 - const result = rl.waitForAllow(&shutdown); 898 - const elapsed = std.time.milliTimestamp() - before; 922 + const before = milliTimestamp(io); 923 + const result = rl.waitForAllow(&shutdown, io); 924 + const elapsed = milliTimestamp(io) - before; 899 925 900 926 try std.testing.expectEqual(RateLimiter.Result.sec, result); 901 927 try std.testing.expect(elapsed >= 100); // must have slept at least one 100ms poll 902 928 } 903 929 904 930 test "waitForAllow respects shutdown" { 931 + const io = std.testing.io; 932 + 905 933 var rl: RateLimiter = .{ .sec_limit = .{ .raw = 1 }, .hour_limit = .{ .raw = 1000 }, .day_limit = .{ .raw = 10000 } }; 906 934 var shutdown = std.atomic.Value(bool){ .raw = false }; 907 935 908 936 // exhaust the limit 909 - _ = rl.waitForAllow(&shutdown); 937 + _ = rl.waitForAllow(&shutdown, io); 910 938 911 939 // set shutdown before the next call 912 940 shutdown.store(true, .release); 913 941 914 942 // should return immediately without blocking 915 - const before = std.time.milliTimestamp(); 916 - _ = rl.waitForAllow(&shutdown); 917 - const elapsed = std.time.milliTimestamp() - before; 943 + const before = milliTimestamp(io); 944 + _ = rl.waitForAllow(&shutdown, io); 945 + const elapsed = milliTimestamp(io) - before; 918 946 919 947 try std.testing.expect(elapsed < 50); // should not have slept 920 948 }
+51 -40
src/thread_pool.zig
··· 6 6 //! items stored by value in pre-allocated ring buffer (zero alloc per submit). 7 7 8 8 const std = @import("std"); 9 + const Io = std.Io; 9 10 const Allocator = std.mem.Allocator; 10 11 11 12 pub fn ThreadPool(comptime T: type, comptime processFn: fn (*T) void) type { ··· 25 26 head: u16 = 0, // next slot to read 26 27 tail: u16 = 0, // next slot to write 27 28 count: u16 = 0, 28 - mutex: std.Thread.Mutex = .{}, 29 - cond: std.Thread.Condition = .{}, // "not empty" — workers wait here 30 - not_full: std.Thread.Condition = .{}, // "not full" — submitters wait here 29 + mutex: Io.Mutex = Io.Mutex.init, 30 + cond: Io.Condition = Io.Condition.init, // "not empty" — workers wait here 31 + not_full: Io.Condition = Io.Condition.init, // "not full" — submitters wait here 31 32 alive: bool = true, 32 33 thread: ?std.Thread = null, 34 + io: Io, 33 35 }; 34 36 35 37 workers: []Worker, 36 38 allocator: Allocator, 39 + io: Io, 37 40 38 - pub fn init(allocator: Allocator, config: Config) !Self { 41 + pub fn init(allocator: Allocator, config: Config, io: Io) !Self { 39 42 const workers = try allocator.alloc(Worker, config.num_workers); 40 43 for (workers) |*w| { 41 44 w.* = .{ 42 45 .queue = try allocator.alloc(T, config.queue_capacity), 43 46 .capacity = config.queue_capacity, 47 + .io = io, 44 48 }; 45 49 } 46 50 47 51 const self = Self{ 48 52 .workers = workers, 49 53 .allocator = allocator, 54 + .io = io, 50 55 }; 51 56 52 57 // spawn worker threads ··· 68 73 const idx = key % self.workers.len; 69 74 const w = &self.workers[idx]; 70 75 71 - w.mutex.lock(); 72 - defer w.mutex.unlock(); 76 + w.mutex.lockUncancelable(w.io); 77 + defer w.mutex.unlock(w.io); 73 78 74 79 while (w.count == w.capacity) { 75 80 if (stop.load(.acquire)) return false; 76 - // poll every 100ms so we notice shutdown promptly 77 - w.not_full.timedWait(&w.mutex, 100 * std.time.ns_per_ms) catch {}; 81 + // poll: release mutex, sleep briefly, reacquire 82 + // (Io.Condition has no timedWait — poll so stop check isn't starved) 83 + w.mutex.unlock(w.io); 84 + w.io.sleep(Io.Duration.fromMilliseconds(10), .awake) catch {}; 85 + w.mutex.lockUncancelable(w.io); 78 86 } 79 87 80 88 w.queue[w.tail] = item; 81 89 w.tail = @intCast((@as(u32, w.tail) + 1) % @as(u32, w.capacity)); 82 90 w.count += 1; 83 - w.cond.signal(); 91 + w.cond.signal(w.io); 84 92 return true; 85 93 } 86 94 ··· 88 96 pub fn shutdown(self: *Self) void { 89 97 // signal all workers to stop 90 98 for (self.workers) |*w| { 91 - w.mutex.lock(); 99 + w.mutex.lockUncancelable(w.io); 92 100 w.alive = false; 93 - w.cond.signal(); 94 - w.not_full.broadcast(); // wake any blocked submitters 95 - w.mutex.unlock(); 101 + w.cond.signal(w.io); 102 + w.not_full.broadcast(w.io); // wake any blocked submitters 103 + w.mutex.unlock(w.io); 96 104 } 97 105 // join all threads 98 106 for (self.workers) |*w| { ··· 115 123 pub fn pendingCount(self: *Self) usize { 116 124 var total: usize = 0; 117 125 for (self.workers) |*w| { 118 - w.mutex.lock(); 119 - defer w.mutex.unlock(); 126 + w.mutex.lockUncancelable(w.io); 127 + defer w.mutex.unlock(w.io); 120 128 total += w.count; 121 129 } 122 130 return total; ··· 127 135 var item: T = undefined; 128 136 129 137 { 130 - w.mutex.lock(); 131 - defer w.mutex.unlock(); 138 + w.mutex.lockUncancelable(w.io); 139 + defer w.mutex.unlock(w.io); 132 140 133 141 while (w.count == 0 and w.alive) { 134 - w.cond.wait(&w.mutex); 142 + w.cond.waitUncancelable(w.io, &w.mutex); 135 143 } 136 144 137 145 if (w.count == 0 and !w.alive) return; ··· 139 147 item = w.queue[w.head]; 140 148 w.head = @intCast((@as(u32, w.head) + 1) % @as(u32, w.capacity)); 141 149 w.count -= 1; 142 - w.not_full.signal(); // wake one blocked submitter 150 + w.not_full.signal(w.io); // wake one blocked submitter 143 151 } 144 152 145 153 processFn(&item); ··· 170 178 .num_workers = 2, 171 179 .queue_capacity = 64, 172 180 .stack_size = 1 * 1024 * 1024, 173 - }); 181 + }, std.testing.io); 174 182 175 183 // submit items 176 184 for (0..10) |i| { ··· 193 201 const Item = struct { 194 202 seq: u32, 195 203 results: *std.ArrayListUnmanaged(u32), 196 - mutex: *std.Thread.Mutex, 204 + mutex: *Io.Mutex, 197 205 allocator: Allocator, 206 + io: Io, 198 207 }; 199 208 200 209 const S = struct { 201 210 fn process(item: *Item) void { 202 - item.mutex.lock(); 203 - defer item.mutex.unlock(); 211 + item.mutex.lockUncancelable(item.io); 212 + defer item.mutex.unlock(item.io); 204 213 item.results.append(item.allocator, item.seq) catch {}; 205 214 } 206 215 }; 207 216 208 217 var shutdown: std.atomic.Value(bool) = .{ .raw = false }; 209 - var results: std.ArrayListUnmanaged(u32) = .{}; 218 + var results: std.ArrayListUnmanaged(u32) = .empty; 210 219 defer results.deinit(testing.allocator); 211 - var mutex: std.Thread.Mutex = .{}; 220 + var mutex: Io.Mutex = Io.Mutex.init; 212 221 213 222 var pool = try ThreadPool(Item, S.process).init(testing.allocator, .{ 214 223 .num_workers = 4, 215 224 .queue_capacity = 64, 216 225 .stack_size = 1 * 1024 * 1024, 217 - }); 226 + }, std.testing.io); 218 227 219 228 // submit 20 items all with key=42 (same worker) 220 229 for (0..20) |i| { ··· 223 232 .results = &results, 224 233 .mutex = &mutex, 225 234 .allocator = testing.allocator, 235 + .io = std.testing.io, 226 236 }, &shutdown); 227 237 try testing.expect(ok); 228 238 } ··· 239 249 test "submit blocks when queue full, succeeds after drain" { 240 250 const Item = struct { 241 251 counter: *std.atomic.Value(u32), 252 + io: Io, 242 253 }; 243 254 const S = struct { 244 255 fn process(item: *Item) void { 245 256 // slow worker — gives time for queue to fill 246 - std.posix.nanosleep(0, 5 * std.time.ns_per_ms); 257 + item.io.sleep(Io.Duration.fromMilliseconds(5), .awake) catch {}; 247 258 _ = item.counter.fetchAdd(1, .monotonic); 248 259 } 249 260 }; ··· 254 265 .num_workers = 1, 255 266 .queue_capacity = 4, 256 267 .stack_size = 1 * 1024 * 1024, 257 - }); 268 + }, std.testing.io); 258 269 259 270 // submit more items than capacity — submit blocks until slots open 260 271 for (0..20) |_| { 261 - const ok = pool.submit(0, .{ .counter = &counter }, &shutdown); 272 + const ok = pool.submit(0, .{ .counter = &counter, .io = std.testing.io }, &shutdown); 262 273 try testing.expect(ok); 263 274 } 264 275 ··· 272 283 test "submit returns false on shutdown" { 273 284 const Item = struct { 274 285 stop: *std.atomic.Value(bool), 286 + io: Io, 275 287 }; 276 288 const S = struct { 277 289 fn process(item: *Item) void { 278 290 // poll until shutdown — allows worker to exit promptly 279 291 while (!item.stop.load(.acquire)) { 280 - std.posix.nanosleep(0, 5 * std.time.ns_per_ms); 292 + item.io.sleep(Io.Duration.fromMilliseconds(5), .awake) catch {}; 281 293 } 282 294 } 283 295 }; ··· 287 299 .num_workers = 1, 288 300 .queue_capacity = 2, 289 301 .stack_size = 1 * 1024 * 1024, 290 - }); 302 + }, std.testing.io); 291 303 292 304 // fill: 1 processing + 2 queued = capacity reached 293 - _ = pool.submit(0, .{ .stop = &shutdown }, &shutdown); 294 - _ = pool.submit(0, .{ .stop = &shutdown }, &shutdown); 295 - _ = pool.submit(0, .{ .stop = &shutdown }, &shutdown); 305 + _ = pool.submit(0, .{ .stop = &shutdown, .io = std.testing.io }, &shutdown); 306 + _ = pool.submit(0, .{ .stop = &shutdown, .io = std.testing.io }, &shutdown); 307 + _ = pool.submit(0, .{ .stop = &shutdown, .io = std.testing.io }, &shutdown); 296 308 297 309 // signal shutdown — next submit should return false 298 310 shutdown.store(true, .release); 299 - const ok = pool.submit(0, .{ .stop = &shutdown }, &shutdown); 311 + const ok = pool.submit(0, .{ .stop = &shutdown, .io = std.testing.io }, &shutdown); 300 312 try testing.expect(!ok); 301 313 302 314 pool.shutdown(); ··· 304 316 } 305 317 306 318 test "pendingCount reflects queued items" { 307 - const Item = struct { x: u32 }; 319 + const Item = struct { x: u32, io: Io }; 308 320 const S = struct { 309 321 fn process(item: *Item) void { 310 - _ = item; 311 322 // slow worker so items accumulate 312 - std.posix.nanosleep(0, 10 * std.time.ns_per_ms); 323 + item.io.sleep(Io.Duration.fromMilliseconds(10), .awake) catch {}; 313 324 } 314 325 }; 315 326 ··· 317 328 .num_workers = 1, 318 329 .queue_capacity = 64, 319 330 .stack_size = 1 * 1024 * 1024, 320 - }); 331 + }, std.testing.io); 321 332 322 333 // initially empty 323 334 try testing.expectEqual(@as(usize, 0), pool.pendingCount()); ··· 342 353 .num_workers = 2, 343 354 .queue_capacity = 64, 344 355 .stack_size = 1 * 1024 * 1024, 345 - }); 356 + }, std.testing.io); 346 357 347 358 for (0..30) |i| { 348 359 _ = pool.submit(i, .{ .counter = &counter }, &shutdown);
+55 -43
src/validator.zig
··· 6 6 //! and queues background resolution. no frame is ever blocked on network I/O. 7 7 8 8 const std = @import("std"); 9 + const Io = std.Io; 9 10 const zat = @import("zat"); 10 11 const broadcaster = @import("broadcaster.zig"); 11 12 const event_log_mod = @import("event_log.zig"); ··· 49 50 // DID → signing key cache (decoded, ready for verification) 50 51 cache: lru.LruCache(CachedKey), 51 52 // background resolve queue 52 - queue: std.ArrayListUnmanaged([]const u8) = .{}, 53 + queue: std.ArrayListUnmanaged([]const u8) = .empty, 53 54 // in-flight set — prevents duplicate DID entries in the queue 54 - queued_set: std.StringHashMapUnmanaged(void) = .{}, 55 - queue_mutex: std.Thread.Mutex = .{}, 56 - queue_cond: std.Thread.Condition = .{}, 55 + queued_set: std.StringHashMapUnmanaged(void) = .empty, 56 + queue_mutex: Io.Mutex = Io.Mutex.init, 57 + queue_cond: Io.Condition = Io.Condition.init, 57 58 resolver_threads: [max_resolver_threads]?std.Thread = .{null} ** max_resolver_threads, 58 59 alive: std.atomic.Value(bool) = .{ .raw = true }, 59 60 max_cache_size: u32 = 250_000, 61 + io: Io, 60 62 // pool of reusable resolvers for inline host authority checks. 61 63 // frame workers acquire/release via atomic flag to avoid creating 62 64 // a fresh resolver (and fresh TLS handshake) per call. ··· 69 71 const max_queue_size: usize = 100_000; 70 72 const host_resolver_pool_size: usize = 4; 71 73 72 - pub fn init(allocator: Allocator, stats: *broadcaster.Stats) Validator { 73 - return initWithConfig(allocator, stats, .{}); 74 + pub fn init(allocator: Allocator, stats: *broadcaster.Stats, io: Io) Validator { 75 + return initWithConfig(allocator, stats, .{}, io); 74 76 } 75 77 76 - pub fn initWithConfig(allocator: Allocator, stats: *broadcaster.Stats, config: ValidatorConfig) Validator { 78 + pub fn initWithConfig(allocator: Allocator, stats: *broadcaster.Stats, config: ValidatorConfig, io: Io) Validator { 77 79 return .{ 78 80 .allocator = allocator, 79 81 .stats = stats, 80 82 .config = config, 81 - .cache = lru.LruCache(CachedKey).init(allocator, 250_000), 83 + .cache = lru.LruCache(CachedKey).init(allocator, 250_000, io), 84 + .io = io, 82 85 }; 83 86 } 84 87 85 88 pub fn deinit(self: *Validator) void { 86 89 self.alive.store(false, .release); 87 - self.queue_cond.broadcast(); 90 + self.queue_cond.broadcast(self.io); 88 91 for (&self.resolver_threads) |*t| { 89 92 if (t.*) |thread| { 90 93 thread.join(); ··· 121 124 122 125 // init host authority resolver pool (reused across calls) 123 126 for (&self.host_resolvers) |*r| { 124 - r.* = zat.DidResolver.initWithOptions(self.allocator, .{}); 127 + r.* = zat.DidResolver.initWithOptions(self.io, self.allocator, .{}); 125 128 } 126 129 for (&self.host_resolver_available) |*a| { 127 130 a.store(true, .release); ··· 321 324 fn extractOps(self: *Validator, alloc: Allocator, payload: zat.cbor.Value) ?[]const zat.MstOperation { 322 325 _ = self; 323 326 const ops_array = payload.getArray("ops") orelse return null; 324 - var ops: std.ArrayListUnmanaged(zat.MstOperation) = .{}; 327 + var ops: std.ArrayListUnmanaged(zat.MstOperation) = .empty; 325 328 for (ops_array) |op| { 326 329 const action = op.getString("action") orelse continue; 327 330 const path = op.getString("path") orelse continue; ··· 404 407 405 408 const duped = self.allocator.dupe(u8, did) catch return; 406 409 407 - self.queue_mutex.lock(); 408 - defer self.queue_mutex.unlock(); 410 + self.queue_mutex.lockUncancelable(self.io); 411 + defer self.queue_mutex.unlock(self.io); 409 412 410 413 // skip if already queued (prevents unbounded queue growth) 411 414 if (self.queued_set.contains(duped)) { ··· 424 427 return; 425 428 }; 426 429 self.queued_set.put(self.allocator, duped, {}) catch {}; 427 - self.queue_cond.signal(); 430 + self.queue_cond.signal(self.io); 428 431 } 429 432 430 433 fn resolveLoop(self: *Validator) void { 431 - var resolver = zat.DidResolver.initWithOptions(self.allocator, .{ .keep_alive = true }); 434 + var resolver = zat.DidResolver.initWithOptions(self.io, self.allocator, .{ .keep_alive = true }); 432 435 defer resolver.deinit(); 433 436 434 437 while (self.alive.load(.acquire)) { 435 438 var did: ?[]const u8 = null; 436 439 { 437 - self.queue_mutex.lock(); 438 - defer self.queue_mutex.unlock(); 440 + self.queue_mutex.lockUncancelable(self.io); 441 + defer self.queue_mutex.unlock(self.io); 439 442 while (self.queue.items.len == 0 and self.alive.load(.acquire)) { 440 - self.queue_cond.timedWait(&self.queue_mutex, 1 * std.time.ns_per_s) catch {}; 443 + self.queue_cond.waitUncancelable(self.io, &self.queue_mutex); 441 444 } 442 445 if (self.queue.items.len > 0) { 443 446 did = self.queue.orderedRemove(0); ··· 470 473 .key_type = public_key.key_type, 471 474 .raw = undefined, 472 475 .len = @intCast(public_key.raw.len), 473 - .resolve_time = std.time.timestamp(), 476 + .resolve_time = timestamp(self.io), 474 477 }; 475 478 @memcpy(cached.raw[0..public_key.raw.len], public_key.raw); 476 479 ··· 509 512 /// resolve queue length (for diagnostics — non-blocking) 510 513 pub fn resolveQueueLen(self: *Validator) usize { 511 514 if (!self.queue_mutex.tryLock()) return 0; 512 - defer self.queue_mutex.unlock(); 515 + defer self.queue_mutex.unlock(self.io); 513 516 return self.queue.items.len; 514 517 } 515 518 516 519 /// resolve dedup set size (for diagnostics — non-blocking) 517 520 pub fn resolveQueuedSetCount(self: *Validator) u32 { 518 521 if (!self.queue_mutex.tryLock()) return 0; 519 - defer self.queue_mutex.unlock(); 522 + defer self.queue_mutex.unlock(self.io); 520 523 return self.queued_set.count(); 521 524 } 522 525 ··· 528 531 /// resolver dedup set hashmap backing capacity (for memory attribution — non-blocking) 529 532 pub fn resolveQueuedSetCapacity(self: *Validator) u32 { 530 533 if (!self.queue_mutex.tryLock()) return 0; 531 - defer self.queue_mutex.unlock(); 534 + defer self.queue_mutex.unlock(self.io); 532 535 return self.queued_set.capacity(); 533 536 } 534 537 ··· 574 577 return i; 575 578 } 576 579 } 577 - std.Thread.yield() catch {}; 580 + self.io.sleep(Io.Duration.fromMilliseconds(1), .awake) catch {}; 578 581 } 579 582 return 0; // shutdown path — caller will exit soon 580 583 } ··· 614 617 return rest; 615 618 } 616 619 617 - fn parseEnvInt(comptime T: type, key: []const u8, default: T) T { 618 - const val = std.posix.getenv(key) orelse return default; 620 + fn getenv(key: [*:0]const u8) ?[]const u8 { 621 + const ptr = std.c.getenv(key) orelse return null; 622 + return std.mem.sliceTo(ptr, 0); 623 + } 624 + 625 + fn parseEnvInt(comptime T: type, key: [*:0]const u8, default: T) T { 626 + const val = getenv(key) orelse return default; 619 627 return std.fmt.parseInt(T, val, 10) catch default; 628 + } 629 + 630 + fn timestamp(io: Io) i64 { 631 + return @intCast(@divFloor(Io.Timestamp.now(io, .real).nanoseconds, std.time.ns_per_s)); 620 632 } 621 633 622 634 // --- tests --- 623 635 624 636 test "validateCommit skips on cache miss" { 625 637 var stats = broadcaster.Stats{}; 626 - var v = Validator.init(std.testing.allocator, &stats); 638 + var v = Validator.init(std.testing.allocator, &stats, std.testing.io); 627 639 defer v.deinit(); 628 640 629 641 // build a commit payload using SDK ··· 642 654 643 655 test "validateCommit skips when no repo field" { 644 656 var stats = broadcaster.Stats{}; 645 - var v = Validator.init(std.testing.allocator, &stats); 657 + var v = Validator.init(std.testing.allocator, &stats, std.testing.io); 646 658 defer v.deinit(); 647 659 648 660 // payload without "repo" field ··· 658 670 659 671 test "checkCommitStructure rejects invalid DID" { 660 672 var stats = broadcaster.Stats{}; 661 - var v = Validator.init(std.testing.allocator, &stats); 673 + var v = Validator.init(std.testing.allocator, &stats, std.testing.io); 662 674 defer v.deinit(); 663 675 664 676 const payload: zat.cbor.Value = .{ .map = &.{ ··· 670 682 671 683 test "checkCommitStructure accepts valid commit" { 672 684 var stats = broadcaster.Stats{}; 673 - var v = Validator.init(std.testing.allocator, &stats); 685 + var v = Validator.init(std.testing.allocator, &stats, std.testing.io); 674 686 defer v.deinit(); 675 687 676 688 const payload: zat.cbor.Value = .{ .map = &.{ ··· 683 695 684 696 test "validateSync skips on cache miss" { 685 697 var stats = broadcaster.Stats{}; 686 - var v = Validator.init(std.testing.allocator, &stats); 698 + var v = Validator.init(std.testing.allocator, &stats, std.testing.io); 687 699 defer v.deinit(); 688 700 689 701 const payload: zat.cbor.Value = .{ .map = &.{ ··· 701 713 702 714 test "validateSync rejects invalid DID" { 703 715 var stats = broadcaster.Stats{}; 704 - var v = Validator.init(std.testing.allocator, &stats); 716 + var v = Validator.init(std.testing.allocator, &stats, std.testing.io); 705 717 defer v.deinit(); 706 718 707 719 const payload: zat.cbor.Value = .{ .map = &.{ ··· 717 729 718 730 test "validateSync rejects missing blocks" { 719 731 var stats = broadcaster.Stats{}; 720 - var v = Validator.init(std.testing.allocator, &stats); 732 + var v = Validator.init(std.testing.allocator, &stats, std.testing.io); 721 733 defer v.deinit(); 722 734 723 735 const payload: zat.cbor.Value = .{ .map = &.{ ··· 733 745 734 746 test "validateSync skips when no did field" { 735 747 var stats = broadcaster.Stats{}; 736 - var v = Validator.init(std.testing.allocator, &stats); 748 + var v = Validator.init(std.testing.allocator, &stats, std.testing.io); 737 749 defer v.deinit(); 738 750 739 751 const payload: zat.cbor.Value = .{ .map = &.{ ··· 748 760 749 761 test "LRU cache evicts least recently used" { 750 762 var stats = broadcaster.Stats{}; 751 - var v = Validator.init(std.testing.allocator, &stats); 763 + var v = Validator.init(std.testing.allocator, &stats, std.testing.io); 752 764 v.cache.capacity = 3; 753 765 defer v.deinit(); 754 766 ··· 773 785 774 786 test "checkCommitStructure rejects too many ops" { 775 787 var stats = broadcaster.Stats{}; 776 - var v = Validator.initWithConfig(std.testing.allocator, &stats, .{ .max_ops = 2 }); 788 + var v = Validator.initWithConfig(std.testing.allocator, &stats, .{ .max_ops = 2 }, std.testing.io); 777 789 defer v.deinit(); 778 790 779 791 // build ops array with 3 items (over limit of 2) ··· 796 808 test "spec: #commit blocks > 2,000,000 bytes rejected" { 797 809 // lexicon maxLength for #commit blocks: 2,000,000 798 810 var stats = broadcaster.Stats{}; 799 - var v = Validator.init(std.testing.allocator, &stats); 811 + var v = Validator.init(std.testing.allocator, &stats, std.testing.io); 800 812 defer v.deinit(); 801 813 802 814 // insert a fake cached key so we reach the blocks size check ··· 826 838 test "spec: #commit blocks = 2,000,000 bytes accepted (boundary)" { 827 839 // lexicon maxLength for #commit blocks: 2,000,000 — exactly at limit should pass size check 828 840 var stats = broadcaster.Stats{}; 829 - var v = Validator.init(std.testing.allocator, &stats); 841 + var v = Validator.init(std.testing.allocator, &stats, std.testing.io); 830 842 defer v.deinit(); 831 843 832 844 const did = "did:plc:test123"; ··· 857 869 test "spec: #sync blocks > 10,000 bytes rejected" { 858 870 // lexicon maxLength for #sync blocks: 10,000 859 871 var stats = broadcaster.Stats{}; 860 - var v = Validator.init(std.testing.allocator, &stats); 872 + var v = Validator.init(std.testing.allocator, &stats, std.testing.io); 861 873 defer v.deinit(); 862 874 863 875 const payload: zat.cbor.Value = .{ .map = &.{ ··· 874 886 test "spec: #sync blocks = 10,000 bytes accepted (boundary)" { 875 887 // lexicon maxLength for #sync blocks: 10,000 — exactly at limit should pass size check 876 888 var stats = broadcaster.Stats{}; 877 - var v = Validator.init(std.testing.allocator, &stats); 889 + var v = Validator.init(std.testing.allocator, &stats, std.testing.io); 878 890 defer v.deinit(); 879 891 880 892 const payload: zat.cbor.Value = .{ .map = &.{ ··· 891 903 892 904 test "extractOps reads path field from firehose format" { 893 905 var stats = broadcaster.Stats{}; 894 - var v = Validator.initWithConfig(std.testing.allocator, &stats, .{ .verify_commit_diff = true }); 906 + var v = Validator.initWithConfig(std.testing.allocator, &stats, .{ .verify_commit_diff = true }, std.testing.io); 895 907 defer v.deinit(); 896 908 897 909 // use arena since extractOps allocates an ArrayList internally ··· 926 938 927 939 test "extractOps rejects malformed path without slash" { 928 940 var stats = broadcaster.Stats{}; 929 - var v = Validator.initWithConfig(std.testing.allocator, &stats, .{ .verify_commit_diff = true }); 941 + var v = Validator.initWithConfig(std.testing.allocator, &stats, .{ .verify_commit_diff = true }, std.testing.io); 930 942 defer v.deinit(); 931 943 932 944 var arena = std.heap.ArenaAllocator.init(std.testing.allocator); ··· 952 964 953 965 test "checkCommitStructure validates path field" { 954 966 var stats = broadcaster.Stats{}; 955 - var v = Validator.init(std.testing.allocator, &stats); 967 + var v = Validator.init(std.testing.allocator, &stats, std.testing.io); 956 968 defer v.deinit(); 957 969 958 970 // valid path ··· 988 1000 989 1001 test "queueResolve deduplicates repeated DIDs" { 990 1002 var stats = broadcaster.Stats{}; 991 - var v = Validator.init(std.testing.allocator, &stats); 1003 + var v = Validator.init(std.testing.allocator, &stats, std.testing.io); 992 1004 defer v.deinit(); 993 1005 994 1006 // queue the same DID 100 times