integrate zql for compile-time sql validation · zzstoatzz.io/leaflet-search@1d4ac9c

+6

backend/build.zig

··· 9 9 .optimize = optimize, 10 10 }); 11 11 12 + const zql = b.dependency("zql", .{ 13 + .target = target, 14 + .optimize = optimize, 15 + }); 16 + 12 17 const exe = b.addExecutable(.{ 13 18 .name = "leaflet-search", 14 19 .root_module = b.createModule(.{ ··· 17 22 .optimize = optimize, 18 23 .imports = &.{ 19 24 .{ .name = "websocket", .module = websocket.module("websocket") }, 25 + .{ .name = "zql", .module = zql.module("zql") }, 20 26 }, 21 27 }), 22 28 });

+4

backend/build.zig.zon

··· 8 8 .url = "https://github.com/karlseguin/websocket.zig/archive/refs/heads/master.tar.gz", 9 9 .hash = "websocket-0.1.0-ZPISdRNzAwAGszh62EpRtoQxu8wb1MSMVI6Ow0o2dmyJ", 10 10 }, 11 + .zql = .{ 12 + .url = "https://github.com/zzstoatzz/zql/archive/main.tar.gz", 13 + .hash = "zql-0.0.1-alpha-xNRI4IYzAABlysQna0qdLkNqMwddTK9vrA0D8JKFjW11", 14 + }, 11 15 }, 12 16 .paths = .{ 13 17 "build.zig",

+221

backend/notes/libsql-zig-design.md

··· 1 + # libsql-zig design sketch 2 + 3 + a zig client for turso/libsql with nice ergonomics and comptime validation. 4 + 5 + ## API sketch 6 + 7 + ### basic usage 8 + 9 + ```zig 10 + const db = try libsql.connect("libsql://mydb.turso.io", token); 11 + 12 + // simple query with positional args 13 + var result = try db.query("SELECT * FROM users WHERE id = ?", .{42}); 14 + defer result.deinit(); 15 + 16 + for (result.rows()) |row| { 17 + const name = row.get("name", .string); 18 + const age = row.get("age", .int); 19 + } 20 + ``` 21 + 22 + ### named parameters 23 + 24 + ```zig 25 + // named params - comptime validates struct fields match :placeholders 26 + try db.exec( 27 + "INSERT INTO users (name, age) VALUES (:name, :age)", 28 + .{ .name = "bob", .age = 30 }, 29 + ); 30 + 31 + // comptime error if you typo a param name: 32 + try db.exec( 33 + "INSERT INTO users (name, age) VALUES (:name, :age)", 34 + .{ .naem = "bob", .age = 30 }, // error: param :naem not found in SQL 35 + ); 36 + ``` 37 + 38 + ### struct mapping 39 + 40 + ```zig 41 + const User = struct { 42 + id: i64, 43 + name: []const u8, 44 + age: ?i64, // nullable 45 + }; 46 + 47 + // query directly into structs 48 + const users = try db.queryAs(User, "SELECT id, name, age FROM users", .{}); 49 + defer users.deinit(); 50 + 51 + for (users.items) |user| { 52 + std.debug.print("{}: {s}, {?}\n", .{ user.id, user.name, user.age }); 53 + } 54 + 55 + // comptime validates struct fields exist (if we parse SELECT columns) 56 + // or runtime validation against response column names 57 + ``` 58 + 59 + ### transactions 60 + 61 + ```zig 62 + // turso pipeline API supports batched statements 63 + try db.transaction(.{}, struct { 64 + fn run(tx: *Transaction) !void { 65 + try tx.exec("INSERT INTO users (name) VALUES (?)", .{"alice"}); 66 + try tx.exec("INSERT INTO logs (msg) VALUES (?)", .{"created alice"}); 67 + } 68 + }.run); 69 + // auto-rollback on error, auto-commit on success 70 + ``` 71 + 72 + ### connection options 73 + 74 + ```zig 75 + const db = try libsql.connect(.{ 76 + .url = "libsql://mydb.turso.io", 77 + .token = token, 78 + .timeout_ms = 5000, 79 + .retry_count = 3, 80 + }); 81 + ``` 82 + 83 + ## comptime features 84 + 85 + ### 1. parameter count (already have this) 86 + ```zig 87 + db.query("SELECT * FROM users WHERE id = ?", .{1, 2}); 88 + // error: SQL has 1 placeholders but 2 args provided 89 + ``` 90 + 91 + ### 2. named parameter validation 92 + ```zig 93 + fn query(comptime sql: []const u8, args: anytype) !Result { 94 + comptime { 95 + const placeholders = parseNamedPlaceholders(sql); // [":name", ":age"] 96 + const fields = @typeInfo(@TypeOf(args)).@"struct".fields; 97 + 98 + for (placeholders) |p| { 99 + if (!hasField(fields, p[1..])) { // strip leading ':' 100 + @compileError("param " ++ p ++ " not found in args struct"); 101 + } 102 + } 103 + } 104 + } 105 + ``` 106 + 107 + ### 3. struct field validation (partial) 108 + for `queryAs`, we can validate at comptime that the struct is well-formed: 109 + - all fields are valid SQL types (i64, []const u8, ?T for nullables) 110 + - no unsupported types 111 + 112 + full column name validation would require either: 113 + - parsing SELECT clause at comptime (doable but complex) 114 + - runtime validation against response cols (simpler, still catches bugs) 115 + 116 + ### 4. SQL syntax hints (stretch goal) 117 + basic comptime SQL parsing could catch obvious errors: 118 + - unclosed quotes 119 + - mismatched parens 120 + - obviously malformed statements 121 + 122 + not a full parser, just sanity checks. 123 + 124 + ## implementation notes 125 + 126 + ### turso HTTP API 127 + 128 + endpoint: `POST https://{host}/v2/pipeline` 129 + 130 + request format: 131 + ```json 132 + { 133 + "requests": [ 134 + {"type": "execute", "stmt": {"sql": "...", "args": [...]}}, 135 + {"type": "execute", "stmt": {"sql": "...", "args": [...]}}, 136 + {"type": "close"} 137 + ] 138 + } 139 + ``` 140 + 141 + response format: 142 + ```json 143 + { 144 + "results": [{ 145 + "response": { 146 + "type": "execute", 147 + "result": { 148 + "cols": [{"name": "id", "decltype": "INTEGER"}, ...], 149 + "rows": [[1, "bob", 30], ...] 150 + } 151 + } 152 + }] 153 + } 154 + ``` 155 + 156 + the `cols` array gives us column names and types at runtime - we use this for: 157 + - named column access: `row.get("name", .string)` 158 + - struct mapping validation 159 + - optional runtime type checking 160 + 161 + ### arg serialization 162 + 163 + turso args format: 164 + ```json 165 + {"args": [ 166 + {"type": "integer", "value": "42"}, 167 + {"type": "text", "value": "hello"}, 168 + {"type": "null"}, 169 + {"type": "blob", "base64": "..."} 170 + ]} 171 + ``` 172 + 173 + we need to map zig types to these: 174 + - `i64`, `u64`, etc → integer 175 + - `[]const u8` → text 176 + - `null`, `?T` when null → null 177 + - `[]const u8` (blob flag?) → blob 178 + 179 + ### named param parsing 180 + 181 + parse `:name` at comptime: 182 + ```zig 183 + fn parseNamedParams(comptime sql: []const u8) []const []const u8 { 184 + // find all :identifier patterns 185 + // return slice of param names 186 + } 187 + 188 + fn substituteParams(comptime sql: []const u8) []const u8 { 189 + // replace :name with ? for the actual query 190 + // "WHERE id = :id" → "WHERE id = ?" 191 + } 192 + ``` 193 + 194 + ## repo structure 195 + 196 + ``` 197 + libsql-zig/ 198 + ├── src/ 199 + │ ├── root.zig # public API 200 + │ ├── client.zig # HTTP client, connection management 201 + │ ├── query.zig # query building, param substitution 202 + │ ├── result.zig # result parsing, row access 203 + │ ├── types.zig # type mapping, serialization 204 + │ └── comptime/ 205 + │ ├── params.zig # named param parsing 206 + │ └── sql.zig # SQL parsing helpers 207 + ├── build.zig 208 + └── README.md 209 + ``` 210 + 211 + ## open questions 212 + 213 + 1. **naming**: `libsql-zig`? `turso-zig`? `zsql`? 214 + 215 + 2. **local libsql support**: turso also has an embedded mode. support that too, or HTTP-only? 216 + 217 + 3. **async**: zig's async is in flux. start with blocking, add async later? 218 + 219 + 4. **allocator strategy**: arena per query? caller provides? configurable? 220 + 221 + 5. **error handling**: rich error types with SQL context, or simple error union?

+203

backend/notes/sqlx-research.md

··· 1 + # sqlx research notes 2 + 3 + ## what sqlx (rust) does 4 + 5 + 1. **compile-time query validation** via procedural macros 6 + - `query!("SELECT * FROM users WHERE id = $1", user_id)` 7 + - at compile time, connects to DATABASE_URL and runs `PREPARE` 8 + - validates: syntax, column existence, parameter types 9 + - returns anonymous struct with typed fields matching columns 10 + 11 + 2. **offline mode** for CI/builds without DB 12 + - `cargo sqlx prepare` connects to DB, caches query metadata to `.sqlx/` 13 + - cached JSON files contain: query hash, column names, column types, param types 14 + - at compile time, macro reads from cache instead of live DB 15 + - `cargo sqlx prepare --check` validates cache is up-to-date 16 + 17 + 3. **query_as!** for named structs 18 + ```rust 19 + struct User { id: i64, name: String } 20 + let users = sqlx::query_as!(User, "SELECT id, name FROM users").fetch_all(&pool).await?; 21 + ``` 22 + 23 + ## what zig-sqlite does 24 + 25 + 1. **comptime parameter count checking** 26 + - parses SQL string at comptime to count `?` markers 27 + - validates args tuple length matches 28 + - compile error if mismatch 29 + 30 + 2. **optional type annotations** via custom syntax 31 + ```zig 32 + db.prepare("SELECT * FROM users WHERE age > ?{usize}") 33 + ``` 34 + - the `{usize}` is parsed at comptime 35 + - validates that bound value is correct type 36 + - compile error if type mismatch 37 + 38 + 3. **no schema validation** - doesn't connect to DB at compile time 39 + 40 + ## our current situation 41 + 42 + - turso HTTP API (not local sqlite) 43 + - no compile-time checking at all 44 + - manual JSON building in `turso.zig` 45 + - manual response parsing in `result.zig` 46 + - column access by index: `row.text(0)`, `row.int(1)` 47 + 48 + ## what we could build 49 + 50 + ### option A: comptime parameter checking (easy) 51 + 52 + add to turso.zig: 53 + ```zig 54 + pub fn query(comptime sql: []const u8, args: anytype) !Result { 55 + comptime { 56 + const expected = countPlaceholders(sql); 57 + const provided = @typeInfo(@TypeOf(args)).Struct.fields.len; 58 + if (expected != provided) { 59 + @compileError("wrong number of parameters"); 60 + } 61 + } 62 + // ... existing code 63 + } 64 + ``` 65 + 66 + pros: 67 + - catches "wrong number of args" at compile time 68 + - minimal effort 69 + - no external dependencies 70 + 71 + cons: 72 + - doesn't validate types 73 + - doesn't validate SQL syntax 74 + - doesn't validate column existence 75 + 76 + ### option B: comptime type annotations (medium) 77 + 78 + custom syntax like zig-sqlite: 79 + ```zig 80 + client.query( 81 + "SELECT * FROM users WHERE age > ?{i64} AND name = ?{text}", 82 + .{ age, name } 83 + ) 84 + ``` 85 + 86 + parse `?{type}` at comptime, validate args match. 87 + 88 + pros: 89 + - type safety for parameters 90 + - self-documenting queries 91 + 92 + cons: 93 + - non-standard SQL 94 + - still no schema validation 95 + 96 + ### option C: offline mode like sqlx (hard) 97 + 98 + 1. write CLI tool that: 99 + - connects to turso 100 + - finds all queries in codebase (grep for `client.query`) 101 + - runs each query with `EXPLAIN` or similar 102 + - caches column info to `sqlx-cache.json` 103 + 104 + 2. at comptime, read cache and generate typed result structs 105 + 106 + pros: 107 + - full type safety for results 108 + - validates against real schema 109 + 110 + cons: 111 + - requires CLI tool 112 + - need to re-run on schema changes 113 + - turso's HTTP API might not expose enough metadata 114 + - significant complexity 115 + 116 + ### option D: named parameters (easy ergonomic win) 117 + 118 + instead of: 119 + ```zig 120 + client.query("SELECT * FROM users WHERE id = ? AND age > ?", &.{id, age}) 121 + ``` 122 + 123 + allow: 124 + ```zig 125 + client.query("SELECT * FROM users WHERE id = :id AND age > :age", .{ .id = id, .age = age }) 126 + ``` 127 + 128 + at comptime, parse `:name` markers and match to struct field names. 129 + 130 + pros: 131 + - more readable 132 + - self-documenting 133 + - catches typos at compile time 134 + 135 + cons: 136 + - non-standard SQL (but common pattern) 137 + 138 + ## recommendation 139 + 140 + start with A + D: 141 + 1. comptime parameter count checking 142 + 2. named parameters with `:name` syntax 143 + 144 + these give us: 145 + - compile-time error for wrong arg count 146 + - compile-time error for misnamed parameters 147 + - more readable queries 148 + - minimal implementation effort 149 + 150 + then evaluate if we need B or C based on pain points. 151 + 152 + ## turso API notes 153 + 154 + turso HTTP API (`/v2/pipeline`) returns: 155 + ```json 156 + { 157 + "results": [{ 158 + "response": { 159 + "type": "execute", 160 + "result": { 161 + "cols": [{"name": "id", "decltype": "INTEGER"}, ...], 162 + "rows": [[1], [2], ...] 163 + } 164 + } 165 + }] 166 + } 167 + ``` 168 + 169 + the `cols` array has column metadata! we could potentially: 170 + - cache this on first query execution 171 + - use for runtime column name lookup 172 + - or fetch at build time for comptime generation 173 + 174 + ## implementation status 175 + 176 + ### option A: comptime parameter count checking ✓ 177 + 178 + implemented in `turso.zig`: 179 + 180 + ```zig 181 + pub fn query(self: *Client, comptime sql: []const u8, args: anytype) !Result { 182 + const expected = comptime countPlaceholders(sql); 183 + const provided = comptime countArgsType(@TypeOf(args)); 184 + if (expected != provided) { 185 + @compileError(std.fmt.comptimePrint( 186 + "SQL has {} placeholders but {} args provided", 187 + .{ expected, provided }, 188 + )); 189 + } 190 + // ... 191 + } 192 + ``` 193 + 194 + this gives compile-time errors like: 195 + ``` 196 + error: SQL has 1 placeholders but 2 args provided 197 + ``` 198 + 199 + ### next steps 200 + 201 + 1. ~~implement option A (parameter count checking)~~ ✓ 202 + 2. implement option D (named parameters) - if needed 203 + 3. evaluate if we need more based on pain points

-607

backend/src/db.zig

··· 1 - const std = @import("std"); 2 - const mem = std.mem; 3 - const json = std.json; 4 - const http = std.http; 5 - const Allocator = mem.Allocator; 6 - 7 - const URL_BUF_SIZE = 512; 8 - const AUTH_BUF_SIZE = 512; 9 - 10 - var gpa: std.heap.GeneralPurposeAllocator(.{}) = .{}; 11 - 12 - // initialized by init(), null until then 13 - var turso_url: ?[]const u8 = null; 14 - var turso_token: ?[]const u8 = null; 15 - var mutex: std.Thread.Mutex = .{}; 16 - 17 - pub fn init() !void { 18 - turso_url = std.posix.getenv("TURSO_URL") orelse { 19 - std.debug.print("TURSO_URL not set\n", .{}); 20 - return error.MissingEnv; 21 - }; 22 - turso_token = std.posix.getenv("TURSO_TOKEN") orelse { 23 - std.debug.print("TURSO_TOKEN not set\n", .{}); 24 - return error.MissingEnv; 25 - }; 26 - 27 - std.debug.print("using turso database: {s}\n", .{turso_url.?}); 28 - try initSchema(); 29 - } 30 - 31 - fn initSchema() !void { 32 - _ = try execSql( 33 - \\CREATE TABLE IF NOT EXISTS documents ( 34 - \\ uri TEXT PRIMARY KEY, 35 - \\ did TEXT NOT NULL, 36 - \\ rkey TEXT NOT NULL, 37 - \\ title TEXT NOT NULL, 38 - \\ content TEXT NOT NULL, 39 - \\ created_at TEXT, 40 - \\ publication_uri TEXT 41 - \\) 42 - , &.{}); 43 - 44 - _ = try execSql( 45 - \\CREATE VIRTUAL TABLE IF NOT EXISTS documents_fts USING fts5( 46 - \\ uri UNINDEXED, 47 - \\ title, 48 - \\ content 49 - \\) 50 - , &.{}); 51 - 52 - _ = try execSql( 53 - \\CREATE TABLE IF NOT EXISTS publications ( 54 - \\ uri TEXT PRIMARY KEY, 55 - \\ did TEXT NOT NULL, 56 - \\ rkey TEXT NOT NULL, 57 - \\ name TEXT NOT NULL, 58 - \\ description TEXT, 59 - \\ base_path TEXT 60 - \\) 61 - , &.{}); 62 - 63 - _ = try execSql( 64 - \\CREATE VIRTUAL TABLE IF NOT EXISTS publications_fts USING fts5( 65 - \\ uri UNINDEXED, 66 - \\ name, 67 - \\ description 68 - \\) 69 - , &.{}); 70 - 71 - _ = try execSql( 72 - \\CREATE TABLE IF NOT EXISTS document_tags ( 73 - \\ document_uri TEXT NOT NULL, 74 - \\ tag TEXT NOT NULL, 75 - \\ PRIMARY KEY (document_uri, tag) 76 - \\) 77 - , &.{}); 78 - 79 - _ = execSql("CREATE INDEX IF NOT EXISTS idx_document_tags_tag ON document_tags(tag)", &.{}) catch |err| { 80 - std.debug.print("create index error: {}\n", .{err}); 81 - }; 82 - 83 - // migrate: add columns if missing (ignore "duplicate column" errors) 84 - _ = execSql("ALTER TABLE documents ADD COLUMN publication_uri TEXT", &.{}) catch |err| { 85 - std.debug.print("migrate documents: {}\n", .{err}); 86 - }; 87 - _ = execSql("ALTER TABLE publications ADD COLUMN base_path TEXT", &.{}) catch |err| { 88 - std.debug.print("migrate publications: {}\n", .{err}); 89 - }; 90 - 91 - std.debug.print("turso schema initialized with FTS5\n", .{}); 92 - } 93 - 94 - pub fn insertDocument(uri: []const u8, did: []const u8, rkey: []const u8, title: []const u8, content: []const u8, created_at: ?[]const u8, publication_uri: ?[]const u8, tags: []const []const u8) !void { 95 - _ = try execSql( 96 - "INSERT OR REPLACE INTO documents (uri, did, rkey, title, content, created_at, publication_uri) VALUES (?, ?, ?, ?, ?, ?, ?)", 97 - &.{ uri, did, rkey, title, content, created_at orelse "", publication_uri orelse "" }, 98 - ); 99 - 100 - // update FTS index - delete old entry first, then insert new 101 - _ = execSql("DELETE FROM documents_fts WHERE uri = ?", &.{uri}) catch |err| { 102 - std.debug.print("delete FTS error for {s}: {}\n", .{ uri, err }); 103 - }; 104 - 105 - _ = execSql( 106 - "INSERT INTO documents_fts (uri, title, content) VALUES (?, ?, ?)", 107 - &.{ uri, title, content }, 108 - ) catch |err| { 109 - std.debug.print("insert FTS error for {s}: {}\n", .{ uri, err }); 110 - }; 111 - 112 - // update tags - delete old, insert new 113 - _ = execSql("DELETE FROM document_tags WHERE document_uri = ?", &.{uri}) catch |err| { 114 - std.debug.print("delete tags error for {s}: {}\n", .{ uri, err }); 115 - }; 116 - for (tags) |tag| { 117 - _ = execSql( 118 - "INSERT OR IGNORE INTO document_tags (document_uri, tag) VALUES (?, ?)", 119 - &.{ uri, tag }, 120 - ) catch |err| { 121 - std.debug.print("insert tag error for {s}: {}\n", .{ uri, err }); 122 - }; 123 - } 124 - } 125 - 126 - pub fn insertPublication(uri: []const u8, did: []const u8, rkey: []const u8, name: []const u8, description: ?[]const u8, base_path: ?[]const u8) !void { 127 - _ = try execSql( 128 - "INSERT OR REPLACE INTO publications (uri, did, rkey, name, description, base_path) VALUES (?, ?, ?, ?, ?, ?)", 129 - &.{ uri, did, rkey, name, description orelse "", base_path orelse "" }, 130 - ); 131 - 132 - // update FTS index - delete old entry first, then insert new 133 - _ = execSql("DELETE FROM publications_fts WHERE uri = ?", &.{uri}) catch |err| { 134 - std.debug.print("delete publication FTS error for {s}: {}\n", .{ uri, err }); 135 - }; 136 - 137 - _ = execSql( 138 - "INSERT INTO publications_fts (uri, name, description) VALUES (?, ?, ?)", 139 - &.{ uri, name, description orelse "" }, 140 - ) catch |err| { 141 - std.debug.print("insert publication FTS error for {s}: {}\n", .{ uri, err }); 142 - }; 143 - } 144 - 145 - pub fn deleteDocument(uri: []const u8) void { 146 - _ = execSql("DELETE FROM documents WHERE uri = ?", &.{uri}) catch |err| { 147 - std.debug.print("delete document error for {s}: {}\n", .{ uri, err }); 148 - }; 149 - _ = execSql("DELETE FROM documents_fts WHERE uri = ?", &.{uri}) catch |err| { 150 - std.debug.print("delete document FTS error for {s}: {}\n", .{ uri, err }); 151 - }; 152 - } 153 - 154 - pub fn deletePublication(uri: []const u8) void { 155 - _ = execSql("DELETE FROM publications WHERE uri = ?", &.{uri}) catch |err| { 156 - std.debug.print("delete publication error for {s}: {}\n", .{ uri, err }); 157 - }; 158 - _ = execSql("DELETE FROM publications_fts WHERE uri = ?", &.{uri}) catch |err| { 159 - std.debug.print("delete publication FTS error for {s}: {}\n", .{ uri, err }); 160 - }; 161 - } 162 - 163 - // column indices for document search query results 164 - const DocSearchCol = struct { 165 - const uri = 0; 166 - const did = 1; 167 - const title = 2; 168 - const snippet = 3; 169 - const created_at = 4; 170 - const rkey = 5; 171 - const base_path = 6; 172 - const has_publication = 7; 173 - const count = 8; 174 - }; 175 - 176 - // column indices for publication search query results 177 - const PubSearchCol = struct { 178 - const uri = 0; 179 - const did = 1; 180 - const name = 2; 181 - const snippet = 3; 182 - const rkey = 4; 183 - const base_path = 5; 184 - const count = 6; 185 - }; 186 - 187 - pub fn search(alloc: Allocator, query: []const u8, tag_filter: ?[]const u8) ![]const u8 { 188 - var output: std.Io.Writer.Allocating = .init(alloc); 189 - errdefer output.deinit(); 190 - 191 - const temp_alloc = gpa.allocator(); 192 - 193 - // normalize query: dots become spaces, add prefix matching with * 194 - const fts_query = try buildFtsQuery(alloc, query); 195 - 196 - var jw: json.Stringify = .{ .writer = &output.writer }; 197 - try jw.beginArray(); 198 - 199 - // search documents (articles and looseleafs) 200 - const doc_result = if (query.len == 0 and tag_filter != null) 201 - // tag-only search - list documents with this tag 202 - execSql( 203 - \\SELECT d.uri, d.did, d.title, '' as snippet, 204 - \\ d.created_at, d.rkey, p.base_path, 205 - \\ CASE WHEN d.publication_uri != '' THEN 1 ELSE 0 END as has_publication 206 - \\FROM documents d 207 - \\LEFT JOIN publications p ON d.publication_uri = p.uri 208 - \\JOIN document_tags dt ON d.uri = dt.document_uri 209 - \\WHERE dt.tag = ? 210 - \\ORDER BY d.created_at DESC LIMIT 40 211 - , &.{tag_filter.?}) catch null 212 - else if (tag_filter) |tag| 213 - execSql( 214 - \\SELECT f.uri, d.did, d.title, 215 - \\ snippet(documents_fts, 2, '', '', '...', 32) as snippet, 216 - \\ d.created_at, d.rkey, p.base_path, 217 - \\ CASE WHEN d.publication_uri != '' THEN 1 ELSE 0 END as has_publication 218 - \\FROM documents_fts f 219 - \\JOIN documents d ON f.uri = d.uri 220 - \\LEFT JOIN publications p ON d.publication_uri = p.uri 221 - \\JOIN document_tags dt ON d.uri = dt.document_uri 222 - \\WHERE documents_fts MATCH ? AND dt.tag = ? 223 - \\ORDER BY rank LIMIT 40 224 - , &.{ fts_query, tag }) catch null 225 - else 226 - execSql( 227 - \\SELECT f.uri, d.did, d.title, 228 - \\ snippet(documents_fts, 2, '', '', '...', 32) as snippet, 229 - \\ d.created_at, d.rkey, p.base_path, 230 - \\ CASE WHEN d.publication_uri != '' THEN 1 ELSE 0 END as has_publication 231 - \\FROM documents_fts f 232 - \\JOIN documents d ON f.uri = d.uri 233 - \\LEFT JOIN publications p ON d.publication_uri = p.uri 234 - \\WHERE documents_fts MATCH ? 235 - \\ORDER BY rank LIMIT 40 236 - , &.{fts_query}) catch null; 237 - 238 - if (doc_result) |result| { 239 - defer temp_alloc.free(result); 240 - if (json.parseFromSlice(json.Value, temp_alloc, result, .{})) |parsed| { 241 - defer parsed.deinit(); 242 - if (getRowsFromParsed(parsed.value)) |rows| { 243 - for (rows.items) |row| { 244 - if (row != .array or row.array.items.len < DocSearchCol.count) continue; 245 - const cols = row.array.items; 246 - 247 - // determine entity type: article (has publication) or looseleaf (no publication) 248 - const has_pub = extractInt(cols[DocSearchCol.has_publication]) != 0; 249 - const entity_type = if (has_pub) "article" else "looseleaf"; 250 - 251 - try jw.beginObject(); 252 - try jw.objectField("type"); 253 - try jw.write(entity_type); 254 - try jw.objectField("uri"); 255 - try jw.write(extractText(cols[DocSearchCol.uri])); 256 - try jw.objectField("did"); 257 - try jw.write(extractText(cols[DocSearchCol.did])); 258 - try jw.objectField("title"); 259 - try jw.write(extractText(cols[DocSearchCol.title])); 260 - try jw.objectField("snippet"); 261 - try jw.write(extractText(cols[DocSearchCol.snippet])); 262 - try jw.objectField("createdAt"); 263 - try jw.write(extractText(cols[DocSearchCol.created_at])); 264 - try jw.objectField("rkey"); 265 - try jw.write(extractText(cols[DocSearchCol.rkey])); 266 - try jw.objectField("basePath"); 267 - try jw.write(extractText(cols[DocSearchCol.base_path])); 268 - try jw.endObject(); 269 - } 270 - } 271 - } else |_| {} 272 - } 273 - 274 - // search publications (only if no tag filter - publications don't have tags) 275 - if (tag_filter == null) { 276 - const pub_result = execSql( 277 - \\SELECT f.uri, p.did, p.name, 278 - \\ snippet(publications_fts, 2, '', '', '...', 32) as snippet, 279 - \\ p.rkey, p.base_path 280 - \\FROM publications_fts f 281 - \\JOIN publications p ON f.uri = p.uri 282 - \\WHERE publications_fts MATCH ? 283 - \\ORDER BY rank LIMIT 10 284 - , &.{fts_query}) catch null; 285 - 286 - if (pub_result) |result| { 287 - defer temp_alloc.free(result); 288 - if (json.parseFromSlice(json.Value, temp_alloc, result, .{})) |parsed| { 289 - defer parsed.deinit(); 290 - if (getRowsFromParsed(parsed.value)) |rows| { 291 - for (rows.items) |row| { 292 - if (row != .array or row.array.items.len < PubSearchCol.count) continue; 293 - const cols = row.array.items; 294 - 295 - try jw.beginObject(); 296 - try jw.objectField("type"); 297 - try jw.write("publication"); 298 - try jw.objectField("uri"); 299 - try jw.write(extractText(cols[PubSearchCol.uri])); 300 - try jw.objectField("did"); 301 - try jw.write(extractText(cols[PubSearchCol.did])); 302 - try jw.objectField("title"); 303 - try jw.write(extractText(cols[PubSearchCol.name])); 304 - try jw.objectField("snippet"); 305 - try jw.write(extractText(cols[PubSearchCol.snippet])); 306 - try jw.objectField("rkey"); 307 - try jw.write(extractText(cols[PubSearchCol.rkey])); 308 - try jw.objectField("basePath"); 309 - try jw.write(extractText(cols[PubSearchCol.base_path])); 310 - try jw.endObject(); 311 - } 312 - } 313 - } else |_| {} 314 - } 315 - } 316 - 317 - try jw.endArray(); 318 - return try output.toOwnedSlice(); 319 - } 320 - 321 - fn getRowsFromParsed(value: json.Value) ?json.Array { 322 - const results = value.object.get("results") orelse return null; 323 - if (results != .array or results.array.items.len == 0) return null; 324 - 325 - const first = results.array.items[0]; 326 - if (first != .object) return null; 327 - 328 - const resp = first.object.get("response") orelse return null; 329 - if (resp != .object) return null; 330 - 331 - const res = resp.object.get("result") orelse return null; 332 - if (res != .object) return null; 333 - 334 - const rows = res.object.get("rows") orelse return null; 335 - if (rows != .array) return null; 336 - 337 - return rows.array; 338 - } 339 - 340 - fn extractText(val: json.Value) []const u8 { 341 - return switch (val) { 342 - .string => |s| s, 343 - .object => |obj| if (obj.get("value")) |v| (if (v == .string) v.string else "") else "", 344 - else => "", 345 - }; 346 - } 347 - 348 - fn extractInt(val: json.Value) i64 { 349 - return switch (val) { 350 - .integer => |i| i, 351 - .object => |obj| blk: { 352 - const v = obj.get("value") orelse break :blk 0; 353 - break :blk switch (v) { 354 - .integer => |i| i, 355 - .string => |s| std.fmt.parseInt(i64, s, 10) catch 0, 356 - else => 0, 357 - }; 358 - }, 359 - else => 0, 360 - }; 361 - } 362 - 363 - // build FTS5 query with prefix matching: "cat dog" -> "cat* dog*" 364 - fn buildFtsQuery(alloc: Allocator, query: []const u8) ![]const u8 { 365 - if (query.len == 0) return ""; 366 - 367 - // normalize dots to spaces 368 - const normalized = try alloc.dupe(u8, query); 369 - for (normalized) |*c| { 370 - if (c.* == '.') c.* = ' '; 371 - } 372 - 373 - // count words to calculate output size 374 - var word_count: usize = 0; 375 - var in_word = false; 376 - for (normalized) |c| { 377 - if (c == ' ') { 378 - in_word = false; 379 - } else if (!in_word) { 380 - word_count += 1; 381 - in_word = true; 382 - } 383 - } 384 - 385 - if (word_count == 0) return ""; 386 - 387 - // allocate: original length + one '*' per word + spaces 388 - const result = try alloc.alloc(u8, normalized.len + word_count); 389 - var pos: usize = 0; 390 - in_word = false; 391 - 392 - for (normalized) |c| { 393 - if (c == ' ') { 394 - if (in_word) { 395 - result[pos] = '*'; 396 - pos += 1; 397 - in_word = false; 398 - } 399 - result[pos] = ' '; 400 - pos += 1; 401 - } else { 402 - result[pos] = c; 403 - pos += 1; 404 - in_word = true; 405 - } 406 - } 407 - 408 - // add final * if ended in a word 409 - if (in_word) { 410 - result[pos] = '*'; 411 - pos += 1; 412 - } 413 - 414 - return result[0..pos]; 415 - } 416 - 417 - fn execSql(sql: []const u8, args: []const []const u8) ![]const u8 { 418 - mutex.lock(); 419 - defer mutex.unlock(); 420 - 421 - const alloc = gpa.allocator(); 422 - 423 - const url_value = turso_url orelse return error.NotInitialized; 424 - const token_value = turso_token orelse return error.NotInitialized; 425 - 426 - // strip libsql:// prefix if present, use https:// 427 - const libsql_prefix = "libsql://"; 428 - const host = if (mem.startsWith(u8, url_value, libsql_prefix)) 429 - url_value[libsql_prefix.len..] 430 - else 431 - url_value; 432 - 433 - var url_buf: [URL_BUF_SIZE]u8 = undefined; 434 - const url = std.fmt.bufPrint(&url_buf, "https://{s}/v2/pipeline", .{host}) catch return error.UrlTooLong; 435 - 436 - // build request body 437 - var body: std.Io.Writer.Allocating = .init(alloc); 438 - defer body.deinit(); 439 - 440 - var jw: json.Stringify = .{ .writer = &body.writer }; 441 - try jw.beginObject(); 442 - try jw.objectField("requests"); 443 - try jw.beginArray(); 444 - 445 - // execute statement 446 - try jw.beginObject(); 447 - try jw.objectField("type"); 448 - try jw.write("execute"); 449 - try jw.objectField("stmt"); 450 - try jw.beginObject(); 451 - try jw.objectField("sql"); 452 - try jw.write(sql); 453 - if (args.len > 0) { 454 - try jw.objectField("args"); 455 - try jw.beginArray(); 456 - for (args) |arg| { 457 - try jw.beginObject(); 458 - try jw.objectField("type"); 459 - try jw.write("text"); 460 - try jw.objectField("value"); 461 - try jw.write(arg); 462 - try jw.endObject(); 463 - } 464 - try jw.endArray(); 465 - } 466 - try jw.endObject(); 467 - try jw.endObject(); 468 - 469 - // close statement 470 - try jw.beginObject(); 471 - try jw.objectField("type"); 472 - try jw.write("close"); 473 - try jw.endObject(); 474 - 475 - try jw.endArray(); 476 - try jw.endObject(); 477 - 478 - var auth_buf: [AUTH_BUF_SIZE]u8 = undefined; 479 - const auth_header = std.fmt.bufPrint(&auth_buf, "Bearer {s}", .{token_value}) catch return error.AuthTooLong; 480 - 481 - var client: http.Client = .{ .allocator = alloc }; 482 - defer client.deinit(); 483 - 484 - var response_body: std.Io.Writer.Allocating = .init(alloc); 485 - defer response_body.deinit(); 486 - 487 - const result = client.fetch(.{ 488 - .location = .{ .url = url }, 489 - .method = .POST, 490 - .headers = .{ 491 - .content_type = .{ .override = "application/json" }, 492 - .authorization = .{ .override = auth_header }, 493 - }, 494 - .payload = body.written(), 495 - .response_writer = &response_body.writer, 496 - }) catch |err| { 497 - std.debug.print("turso request failed: {}\n", .{err}); 498 - return error.HttpError; 499 - }; 500 - 501 - if (result.status != .ok) { 502 - std.debug.print("turso error: {}\n", .{result.status}); 503 - return error.TursoError; 504 - } 505 - 506 - return try response_body.toOwnedSlice(); 507 - } 508 - 509 - pub fn getTags(alloc: Allocator) ![]const u8 { 510 - var output: std.Io.Writer.Allocating = .init(alloc); 511 - errdefer output.deinit(); 512 - 513 - const temp_alloc = gpa.allocator(); 514 - 515 - const result = execSql( 516 - \\SELECT tag, COUNT(*) as count 517 - \\FROM document_tags 518 - \\GROUP BY tag 519 - \\ORDER BY count DESC 520 - \\LIMIT 100 521 - , &.{}) catch { 522 - try output.writer.writeAll("[]"); 523 - return try output.toOwnedSlice(); 524 - }; 525 - defer temp_alloc.free(result); 526 - 527 - const parsed = json.parseFromSlice(json.Value, temp_alloc, result, .{}) catch { 528 - try output.writer.writeAll("[]"); 529 - return try output.toOwnedSlice(); 530 - }; 531 - defer parsed.deinit(); 532 - 533 - const rows = getRowsFromParsed(parsed.value) orelse { 534 - try output.writer.writeAll("[]"); 535 - return try output.toOwnedSlice(); 536 - }; 537 - 538 - var jw: json.Stringify = .{ .writer = &output.writer }; 539 - try jw.beginArray(); 540 - 541 - for (rows.items) |row| { 542 - if (row != .array or row.array.items.len < 2) continue; 543 - const cols = row.array.items; 544 - 545 - try jw.beginObject(); 546 - try jw.objectField("tag"); 547 - try jw.write(extractText(cols[0])); 548 - try jw.objectField("count"); 549 - const count_val = cols[1]; 550 - const count: i64 = switch (count_val) { 551 - .integer => |i| i, 552 - .object => |obj| blk: { 553 - const v = obj.get("value") orelse break :blk 0; 554 - break :blk switch (v) { 555 - .integer => |i| i, 556 - .string => |s| std.fmt.parseInt(i64, s, 10) catch 0, 557 - else => 0, 558 - }; 559 - }, 560 - else => 0, 561 - }; 562 - try jw.write(count); 563 - try jw.endObject(); 564 - } 565 - 566 - try jw.endArray(); 567 - return try output.toOwnedSlice(); 568 - } 569 - 570 - pub fn getStats() struct { documents: i64, publications: i64 } { 571 - const doc_result = execSql("SELECT COUNT(*) FROM documents", &.{}) catch return .{ .documents = 0, .publications = 0 }; 572 - defer gpa.allocator().free(doc_result); 573 - 574 - const pub_result = execSql("SELECT COUNT(*) FROM publications", &.{}) catch return .{ .documents = 0, .publications = 0 }; 575 - defer gpa.allocator().free(pub_result); 576 - 577 - return .{ 578 - .documents = parseCount(doc_result), 579 - .publications = parseCount(pub_result), 580 - }; 581 - } 582 - 583 - fn parseCount(result: []const u8) i64 { 584 - const alloc = gpa.allocator(); 585 - const parsed = json.parseFromSlice(json.Value, alloc, result, .{}) catch return 0; 586 - defer parsed.deinit(); 587 - 588 - const rows = getRowsFromParsed(parsed.value) orelse return 0; 589 - if (rows.items.len == 0) return 0; 590 - 591 - const first_row = rows.items[0]; 592 - if (first_row != .array or first_row.array.items.len == 0) return 0; 593 - 594 - const val = first_row.array.items[0]; 595 - return switch (val) { 596 - .integer => |i| i, 597 - .object => |obj| blk: { 598 - const v = obj.get("value") orelse break :blk 0; 599 - break :blk switch (v) { 600 - .integer => |i| i, 601 - .string => |s| std.fmt.parseInt(i64, s, 10) catch 0, 602 - else => 0, 603 - }; 604 - }, 605 - else => 0, 606 - }; 607 - }

+345

backend/src/db/mod.zig

··· 1 + const std = @import("std"); 2 + const json = std.json; 3 + const Allocator = std.mem.Allocator; 4 + 5 + const zql = @import("zql"); 6 + const turso = @import("turso.zig"); 7 + const schema = @import("schema.zig"); 8 + const result = @import("result.zig"); 9 + 10 + pub const Client = turso.Client; 11 + pub const Result = turso.Result; 12 + pub const Row = turso.Row; 13 + 14 + var gpa: std.heap.GeneralPurposeAllocator(.{}) = .{}; 15 + var client: ?turso.Client = null; 16 + 17 + pub fn init() !void { 18 + client = try turso.Client.init(gpa.allocator()); 19 + try schema.init(&client.?); 20 + } 21 + 22 + pub fn insertDocument( 23 + uri: []const u8, 24 + did: []const u8, 25 + rkey: []const u8, 26 + title: []const u8, 27 + content: []const u8, 28 + created_at: ?[]const u8, 29 + publication_uri: ?[]const u8, 30 + tags: []const []const u8, 31 + ) !void { 32 + var c = &(client orelse return error.NotInitialized); 33 + 34 + try c.exec( 35 + "INSERT OR REPLACE INTO documents (uri, did, rkey, title, content, created_at, publication_uri) VALUES (?, ?, ?, ?, ?, ?, ?)", 36 + &.{ uri, did, rkey, title, content, created_at orelse "", publication_uri orelse "" }, 37 + ); 38 + 39 + // update FTS index 40 + c.exec("DELETE FROM documents_fts WHERE uri = ?", &.{uri}) catch {}; 41 + c.exec( 42 + "INSERT INTO documents_fts (uri, title, content) VALUES (?, ?, ?)", 43 + &.{ uri, title, content }, 44 + ) catch {}; 45 + 46 + // update tags 47 + c.exec("DELETE FROM document_tags WHERE document_uri = ?", &.{uri}) catch {}; 48 + for (tags) |tag| { 49 + c.exec( 50 + "INSERT OR IGNORE INTO document_tags (document_uri, tag) VALUES (?, ?)", 51 + &.{ uri, tag }, 52 + ) catch {}; 53 + } 54 + } 55 + 56 + pub fn insertPublication( 57 + uri: []const u8, 58 + did: []const u8, 59 + rkey: []const u8, 60 + name: []const u8, 61 + description: ?[]const u8, 62 + base_path: ?[]const u8, 63 + ) !void { 64 + var c = &(client orelse return error.NotInitialized); 65 + 66 + try c.exec( 67 + "INSERT OR REPLACE INTO publications (uri, did, rkey, name, description, base_path) VALUES (?, ?, ?, ?, ?, ?)", 68 + &.{ uri, did, rkey, name, description orelse "", base_path orelse "" }, 69 + ); 70 + 71 + // update FTS index 72 + c.exec("DELETE FROM publications_fts WHERE uri = ?", &.{uri}) catch {}; 73 + c.exec( 74 + "INSERT INTO publications_fts (uri, name, description) VALUES (?, ?, ?)", 75 + &.{ uri, name, description orelse "" }, 76 + ) catch {}; 77 + } 78 + 79 + pub fn deleteDocument(uri: []const u8) void { 80 + var c = &(client orelse return); 81 + c.exec("DELETE FROM documents WHERE uri = ?", &.{uri}) catch {}; 82 + c.exec("DELETE FROM documents_fts WHERE uri = ?", &.{uri}) catch {}; 83 + c.exec("DELETE FROM document_tags WHERE document_uri = ?", &.{uri}) catch {}; 84 + } 85 + 86 + pub fn deletePublication(uri: []const u8) void { 87 + var c = &(client orelse return); 88 + c.exec("DELETE FROM publications WHERE uri = ?", &.{uri}) catch {}; 89 + c.exec("DELETE FROM publications_fts WHERE uri = ?", &.{uri}) catch {}; 90 + } 91 + 92 + // query types with comptime column extraction 93 + const DocQuery = zql.Query( 94 + \\SELECT d.uri, d.did, d.title, '' as snippet, 95 + \\ d.created_at, d.rkey, p.base_path, 96 + \\ CASE WHEN d.publication_uri != '' THEN 1 ELSE 0 END as has_publication 97 + \\FROM documents d 98 + ); 99 + 100 + const Doc = struct { 101 + uri: []const u8, 102 + did: []const u8, 103 + title: []const u8, 104 + snippet: []const u8, 105 + created_at: []const u8, 106 + rkey: []const u8, 107 + base_path: []const u8, 108 + has_publication: bool, 109 + }; 110 + 111 + const PubQuery = zql.Query( 112 + \\SELECT f.uri, p.did, p.name, 113 + \\ snippet(publications_fts, 2, '', '', '...', 32) as snippet, 114 + \\ p.rkey, p.base_path 115 + \\FROM publications_fts f 116 + ); 117 + 118 + const Pub = struct { 119 + uri: []const u8, 120 + did: []const u8, 121 + name: []const u8, 122 + snippet: []const u8, 123 + rkey: []const u8, 124 + base_path: []const u8, 125 + }; 126 + 127 + pub fn search(alloc: Allocator, query: []const u8, tag_filter: ?[]const u8) ![]const u8 { 128 + var c = &(client orelse return error.NotInitialized); 129 + 130 + var output: std.Io.Writer.Allocating = .init(alloc); 131 + errdefer output.deinit(); 132 + 133 + var jw: json.Stringify = .{ .writer = &output.writer }; 134 + try jw.beginArray(); 135 + 136 + const fts_query = try buildFtsQuery(alloc, query); 137 + 138 + // search documents 139 + var doc_result = if (query.len == 0 and tag_filter != null) 140 + c.query( 141 + \\SELECT d.uri, d.did, d.title, '' as snippet, 142 + \\ d.created_at, d.rkey, p.base_path, 143 + \\ CASE WHEN d.publication_uri != '' THEN 1 ELSE 0 END as has_publication 144 + \\FROM documents d 145 + \\LEFT JOIN publications p ON d.publication_uri = p.uri 146 + \\JOIN document_tags dt ON d.uri = dt.document_uri 147 + \\WHERE dt.tag = ? 148 + \\ORDER BY d.created_at DESC LIMIT 40 149 + , &.{tag_filter.?}) catch null 150 + else if (tag_filter) |tag| 151 + c.query( 152 + \\SELECT f.uri, d.did, d.title, 153 + \\ snippet(documents_fts, 2, '', '', '...', 32) as snippet, 154 + \\ d.created_at, d.rkey, p.base_path, 155 + \\ CASE WHEN d.publication_uri != '' THEN 1 ELSE 0 END as has_publication 156 + \\FROM documents_fts f 157 + \\JOIN documents d ON f.uri = d.uri 158 + \\LEFT JOIN publications p ON d.publication_uri = p.uri 159 + \\JOIN document_tags dt ON d.uri = dt.document_uri 160 + \\WHERE documents_fts MATCH ? AND dt.tag = ? 161 + \\ORDER BY rank LIMIT 40 162 + , &.{ fts_query, tag }) catch null 163 + else 164 + c.query( 165 + \\SELECT f.uri, d.did, d.title, 166 + \\ snippet(documents_fts, 2, '', '', '...', 32) as snippet, 167 + \\ d.created_at, d.rkey, p.base_path, 168 + \\ CASE WHEN d.publication_uri != '' THEN 1 ELSE 0 END as has_publication 169 + \\FROM documents_fts f 170 + \\JOIN documents d ON f.uri = d.uri 171 + \\LEFT JOIN publications p ON d.publication_uri = p.uri 172 + \\WHERE documents_fts MATCH ? 173 + \\ORDER BY rank LIMIT 40 174 + , &.{fts_query}) catch null; 175 + 176 + if (doc_result) |*res| { 177 + defer res.deinit(); 178 + for (res.rows) |row| { 179 + const doc = DocQuery.fromRow(Doc, row); 180 + try jw.beginObject(); 181 + try jw.objectField("type"); 182 + try jw.write(if (doc.has_publication) "article" else "looseleaf"); 183 + try jw.objectField("uri"); 184 + try jw.write(doc.uri); 185 + try jw.objectField("did"); 186 + try jw.write(doc.did); 187 + try jw.objectField("title"); 188 + try jw.write(doc.title); 189 + try jw.objectField("snippet"); 190 + try jw.write(doc.snippet); 191 + try jw.objectField("createdAt"); 192 + try jw.write(doc.created_at); 193 + try jw.objectField("rkey"); 194 + try jw.write(doc.rkey); 195 + try jw.objectField("basePath"); 196 + try jw.write(doc.base_path); 197 + try jw.endObject(); 198 + } 199 + } 200 + 201 + // search publications (only if no tag filter) 202 + if (tag_filter == null) { 203 + var pub_result = c.query( 204 + \\SELECT f.uri, p.did, p.name, 205 + \\ snippet(publications_fts, 2, '', '', '...', 32) as snippet, 206 + \\ p.rkey, p.base_path 207 + \\FROM publications_fts f 208 + \\JOIN publications p ON f.uri = p.uri 209 + \\WHERE publications_fts MATCH ? 210 + \\ORDER BY rank LIMIT 10 211 + , &.{fts_query}) catch null; 212 + 213 + if (pub_result) |*res| { 214 + defer res.deinit(); 215 + for (res.rows) |row| { 216 + try jw.beginObject(); 217 + try jw.objectField("type"); 218 + try jw.write("publication"); 219 + try jw.objectField("uri"); 220 + try jw.write(row.text(PubQuery.columnIndex("uri"))); 221 + try jw.objectField("did"); 222 + try jw.write(row.text(PubQuery.columnIndex("did"))); 223 + try jw.objectField("title"); 224 + try jw.write(row.text(PubQuery.columnIndex("name"))); 225 + try jw.objectField("snippet"); 226 + try jw.write(row.text(PubQuery.columnIndex("snippet"))); 227 + try jw.objectField("rkey"); 228 + try jw.write(row.text(PubQuery.columnIndex("rkey"))); 229 + try jw.objectField("basePath"); 230 + try jw.write(row.text(PubQuery.columnIndex("base_path"))); 231 + try jw.endObject(); 232 + } 233 + } 234 + } 235 + 236 + try jw.endArray(); 237 + return try output.toOwnedSlice(); 238 + } 239 + 240 + pub fn getTags(alloc: Allocator) ![]const u8 { 241 + var c = &(client orelse return error.NotInitialized); 242 + 243 + var output: std.Io.Writer.Allocating = .init(alloc); 244 + errdefer output.deinit(); 245 + 246 + var res = c.query( 247 + \\SELECT tag, COUNT(*) as count 248 + \\FROM document_tags 249 + \\GROUP BY tag 250 + \\ORDER BY count DESC 251 + \\LIMIT 100 252 + , &.{}) catch { 253 + try output.writer.writeAll("[]"); 254 + return try output.toOwnedSlice(); 255 + }; 256 + defer res.deinit(); 257 + 258 + var jw: json.Stringify = .{ .writer = &output.writer }; 259 + try jw.beginArray(); 260 + 261 + for (res.rows) |row| { 262 + try jw.beginObject(); 263 + try jw.objectField("tag"); 264 + try jw.write(row.text(0)); 265 + try jw.objectField("count"); 266 + try jw.write(row.int(1)); 267 + try jw.endObject(); 268 + } 269 + 270 + try jw.endArray(); 271 + return try output.toOwnedSlice(); 272 + } 273 + 274 + pub fn getStats() struct { documents: i64, publications: i64 } { 275 + var c = &(client orelse return .{ .documents = 0, .publications = 0 }); 276 + 277 + const docs = blk: { 278 + var res = c.query("SELECT COUNT(*) FROM documents", &.{}) catch break :blk 0; 279 + defer res.deinit(); 280 + const row = res.first() orelse break :blk 0; 281 + break :blk row.int(0); 282 + }; 283 + 284 + const pubs = blk: { 285 + var res = c.query("SELECT COUNT(*) FROM publications", &.{}) catch break :blk 0; 286 + defer res.deinit(); 287 + const row = res.first() orelse break :blk 0; 288 + break :blk row.int(0); 289 + }; 290 + 291 + return .{ .documents = docs, .publications = pubs }; 292 + } 293 + 294 + /// Build FTS5 query with prefix matching: "cat dog" -> "cat* dog*" 295 + fn buildFtsQuery(alloc: Allocator, query: []const u8) ![]const u8 { 296 + if (query.len == 0) return ""; 297 + 298 + // normalize dots to spaces 299 + const normalized = try alloc.dupe(u8, query); 300 + for (normalized) |*c| { 301 + if (c.* == '.') c.* = ' '; 302 + } 303 + 304 + // count words 305 + var word_count: usize = 0; 306 + var in_word = false; 307 + for (normalized) |c| { 308 + if (c == ' ') { 309 + in_word = false; 310 + } else if (!in_word) { 311 + word_count += 1; 312 + in_word = true; 313 + } 314 + } 315 + 316 + if (word_count == 0) return ""; 317 + 318 + // allocate: original length + one '*' per word 319 + const buf = try alloc.alloc(u8, normalized.len + word_count); 320 + var pos: usize = 0; 321 + in_word = false; 322 + 323 + for (normalized) |c| { 324 + if (c == ' ') { 325 + if (in_word) { 326 + buf[pos] = '*'; 327 + pos += 1; 328 + in_word = false; 329 + } 330 + buf[pos] = ' '; 331 + pos += 1; 332 + } else { 333 + buf[pos] = c; 334 + pos += 1; 335 + in_word = true; 336 + } 337 + } 338 + 339 + if (in_word) { 340 + buf[pos] = '*'; 341 + pos += 1; 342 + } 343 + 344 + return buf[0..pos]; 345 + }

+117

backend/src/db/result.zig

··· 1 + const std = @import("std"); 2 + const json = std.json; 3 + const Allocator = std.mem.Allocator; 4 + 5 + /// A single row from a query result 6 + pub const Row = struct { 7 + columns: []const json.Value, 8 + 9 + pub fn text(self: Row, index: usize) []const u8 { 10 + if (index >= self.columns.len) return ""; 11 + return extractText(self.columns[index]); 12 + } 13 + 14 + pub fn int(self: Row, index: usize) i64 { 15 + if (index >= self.columns.len) return 0; 16 + return extractInt(self.columns[index]); 17 + } 18 + }; 19 + 20 + /// Parsed query result with rows 21 + pub const Result = struct { 22 + allocator: Allocator, 23 + parsed: ?json.Parsed(json.Value), 24 + rows: []const Row, 25 + 26 + pub fn parse(allocator: Allocator, response: []const u8) !Result { 27 + const parsed = json.parseFromSlice(json.Value, allocator, response, .{}) catch { 28 + return .{ .allocator = allocator, .parsed = null, .rows = &.{} }; 29 + }; 30 + 31 + const json_rows = getRowsFromParsed(parsed.value) orelse { 32 + return .{ .allocator = allocator, .parsed = parsed, .rows = &.{} }; 33 + }; 34 + 35 + var rows: std.ArrayList(Row) = .{}; 36 + errdefer rows.deinit(allocator); 37 + 38 + for (json_rows.items) |item| { 39 + if (item == .array) { 40 + try rows.append(allocator, .{ .columns = item.array.items }); 41 + } 42 + } 43 + 44 + return .{ 45 + .allocator = allocator, 46 + .parsed = parsed, 47 + .rows = try rows.toOwnedSlice(allocator), 48 + }; 49 + } 50 + 51 + pub fn deinit(self: *Result) void { 52 + self.allocator.free(self.rows); 53 + if (self.parsed) |*p| p.deinit(); 54 + } 55 + 56 + pub fn count(self: Result) usize { 57 + return self.rows.len; 58 + } 59 + 60 + pub fn isEmpty(self: Result) bool { 61 + return self.rows.len == 0; 62 + } 63 + 64 + /// Get the first row, or null if empty 65 + pub fn first(self: Result) ?Row { 66 + if (self.rows.len == 0) return null; 67 + return self.rows[0]; 68 + } 69 + }; 70 + 71 + /// Navigate Turso's nested response format to get rows 72 + fn getRowsFromParsed(value: json.Value) ?json.Array { 73 + const results = value.object.get("results") orelse return null; 74 + if (results != .array or results.array.items.len == 0) return null; 75 + 76 + const first = results.array.items[0]; 77 + if (first != .object) return null; 78 + 79 + const resp = first.object.get("response") orelse return null; 80 + if (resp != .object) return null; 81 + 82 + const res = resp.object.get("result") orelse return null; 83 + if (res != .object) return null; 84 + 85 + const rows = res.object.get("rows") orelse return null; 86 + if (rows != .array) return null; 87 + 88 + return rows.array; 89 + } 90 + 91 + /// Extract text from a Turso value (handles both raw and typed formats) 92 + pub fn extractText(val: json.Value) []const u8 { 93 + return switch (val) { 94 + .string => |s| s, 95 + .object => |obj| { 96 + const v = obj.get("value") orelse return ""; 97 + return if (v == .string) v.string else ""; 98 + }, 99 + else => "", 100 + }; 101 + } 102 + 103 + /// Extract integer from a Turso value (handles both raw and typed formats) 104 + pub fn extractInt(val: json.Value) i64 { 105 + return switch (val) { 106 + .integer => |i| i, 107 + .object => |obj| { 108 + const v = obj.get("value") orelse return 0; 109 + return switch (v) { 110 + .integer => |i| i, 111 + .string => |s| std.fmt.parseInt(i64, s, 10) catch 0, 112 + else => 0, 113 + }; 114 + }, 115 + else => 0, 116 + }; 117 + }

+69

backend/src/db/schema.zig

··· 1 + const std = @import("std"); 2 + const turso = @import("turso.zig"); 3 + 4 + /// Initialize database schema and run migrations 5 + pub fn init(client: *turso.Client) !void { 6 + try createTables(client); 7 + try runMigrations(client); 8 + std.debug.print("schema initialized\n", .{}); 9 + } 10 + 11 + fn createTables(client: *turso.Client) !void { 12 + try client.exec( 13 + \\CREATE TABLE IF NOT EXISTS documents ( 14 + \\ uri TEXT PRIMARY KEY, 15 + \\ did TEXT NOT NULL, 16 + \\ rkey TEXT NOT NULL, 17 + \\ title TEXT NOT NULL, 18 + \\ content TEXT NOT NULL, 19 + \\ created_at TEXT, 20 + \\ publication_uri TEXT 21 + \\) 22 + , &.{}); 23 + 24 + try client.exec( 25 + \\CREATE VIRTUAL TABLE IF NOT EXISTS documents_fts USING fts5( 26 + \\ uri UNINDEXED, 27 + \\ title, 28 + \\ content 29 + \\) 30 + , &.{}); 31 + 32 + try client.exec( 33 + \\CREATE TABLE IF NOT EXISTS publications ( 34 + \\ uri TEXT PRIMARY KEY, 35 + \\ did TEXT NOT NULL, 36 + \\ rkey TEXT NOT NULL, 37 + \\ name TEXT NOT NULL, 38 + \\ description TEXT, 39 + \\ base_path TEXT 40 + \\) 41 + , &.{}); 42 + 43 + try client.exec( 44 + \\CREATE VIRTUAL TABLE IF NOT EXISTS publications_fts USING fts5( 45 + \\ uri UNINDEXED, 46 + \\ name, 47 + \\ description 48 + \\) 49 + , &.{}); 50 + 51 + try client.exec( 52 + \\CREATE TABLE IF NOT EXISTS document_tags ( 53 + \\ document_uri TEXT NOT NULL, 54 + \\ tag TEXT NOT NULL, 55 + \\ PRIMARY KEY (document_uri, tag) 56 + \\) 57 + , &.{}); 58 + 59 + client.exec( 60 + "CREATE INDEX IF NOT EXISTS idx_document_tags_tag ON document_tags(tag)", 61 + &.{}, 62 + ) catch {}; 63 + } 64 + 65 + fn runMigrations(client: *turso.Client) !void { 66 + // these may fail if columns already exist - that's fine 67 + client.exec("ALTER TABLE documents ADD COLUMN publication_uri TEXT", &.{}) catch {}; 68 + client.exec("ALTER TABLE publications ADD COLUMN base_path TEXT", &.{}) catch {}; 69 + }

+231

backend/src/db/turso.zig

··· 1 + const std = @import("std"); 2 + const http = std.http; 3 + const json = std.json; 4 + const mem = std.mem; 5 + const Allocator = mem.Allocator; 6 + 7 + const result = @import("result.zig"); 8 + pub const Result = result.Result; 9 + pub const Row = result.Row; 10 + 11 + const URL_BUF_SIZE = 512; 12 + const AUTH_BUF_SIZE = 512; 13 + 14 + /// Count `?` placeholders in SQL at comptime 15 + fn countPlaceholders(comptime sql: []const u8) usize { 16 + var count: usize = 0; 17 + for (sql) |c| { 18 + if (c == '?') count += 1; 19 + } 20 + return count; 21 + } 22 + 23 + /// Count args in a tuple type (handles both direct tuples and pointers to tuples) 24 + fn countArgsType(comptime ArgsType: type) usize { 25 + const args_type_info = @typeInfo(ArgsType); 26 + 27 + if (args_type_info == .pointer) { 28 + const child_info = @typeInfo(args_type_info.pointer.child); 29 + if (child_info == .@"struct") { 30 + return child_info.@"struct".fields.len; 31 + } 32 + } 33 + 34 + if (args_type_info == .@"struct") { 35 + return args_type_info.@"struct".fields.len; 36 + } 37 + 38 + return 0; 39 + } 40 + 41 + pub const Client = struct { 42 + allocator: Allocator, 43 + url: []const u8, 44 + token: []const u8, 45 + mutex: std.Thread.Mutex = .{}, 46 + 47 + pub fn init(allocator: Allocator) !Client { 48 + const url = std.posix.getenv("TURSO_URL") orelse { 49 + std.debug.print("TURSO_URL not set\n", .{}); 50 + return error.MissingEnv; 51 + }; 52 + const token = std.posix.getenv("TURSO_TOKEN") orelse { 53 + std.debug.print("TURSO_TOKEN not set\n", .{}); 54 + return error.MissingEnv; 55 + }; 56 + 57 + // strip libsql:// prefix if present 58 + const libsql_prefix = "libsql://"; 59 + const host = if (mem.startsWith(u8, url, libsql_prefix)) 60 + url[libsql_prefix.len..] 61 + else 62 + url; 63 + 64 + std.debug.print("turso client initialized: {s}\n", .{host}); 65 + 66 + return .{ 67 + .allocator = allocator, 68 + .url = host, 69 + .token = token, 70 + }; 71 + } 72 + 73 + /// Execute a query and return parsed results. 74 + /// Validates parameter count at compile time. 75 + pub fn query(self: *Client, comptime sql: []const u8, args: anytype) !Result { 76 + const expected = comptime countPlaceholders(sql); 77 + const provided = comptime countArgsType(@TypeOf(args)); 78 + if (expected != provided) { 79 + @compileError(std.fmt.comptimePrint( 80 + "SQL has {} placeholders but {} args provided", 81 + .{ expected, provided }, 82 + )); 83 + } 84 + const args_slice = try self.argsToSlice(args); 85 + defer self.allocator.free(args_slice); 86 + const response = try self.executeRaw(sql, args_slice); 87 + defer self.allocator.free(response); 88 + return Result.parse(self.allocator, response); 89 + } 90 + 91 + /// Execute a statement, ignoring results. 92 + /// Validates parameter count at compile time. 93 + pub fn exec(self: *Client, comptime sql: []const u8, args: anytype) !void { 94 + const expected = comptime countPlaceholders(sql); 95 + const provided = comptime countArgsType(@TypeOf(args)); 96 + if (expected != provided) { 97 + @compileError(std.fmt.comptimePrint( 98 + "SQL has {} placeholders but {} args provided", 99 + .{ expected, provided }, 100 + )); 101 + } 102 + const args_slice = try self.argsToSlice(args); 103 + defer self.allocator.free(args_slice); 104 + const response = try self.executeRaw(sql, args_slice); 105 + self.allocator.free(response); 106 + } 107 + 108 + /// Convert tuple/struct args to slice, with comptime validation 109 + fn argsToSlice(self: *Client, args: anytype) ![]const []const u8 { 110 + const ArgsType = @TypeOf(args); 111 + const args_type_info = @typeInfo(ArgsType); 112 + 113 + // handle pointer to tuple (e.g., &.{a, b, c}) 114 + if (args_type_info == .pointer) { 115 + const child_info = @typeInfo(args_type_info.pointer.child); 116 + if (child_info == .@"struct") { 117 + const fields = child_info.@"struct".fields; 118 + const slice = try self.allocator.alloc([]const u8, fields.len); 119 + inline for (fields, 0..) |field, i| { 120 + slice[i] = @field(args.*, field.name); 121 + } 122 + return slice; 123 + } 124 + } 125 + 126 + // handle direct struct/tuple 127 + if (args_type_info == .@"struct") { 128 + const fields = args_type_info.@"struct".fields; 129 + const slice = try self.allocator.alloc([]const u8, fields.len); 130 + inline for (fields, 0..) |field, i| { 131 + slice[i] = @field(args, field.name); 132 + } 133 + return slice; 134 + } 135 + 136 + @compileError("args must be a tuple or pointer to tuple"); 137 + } 138 + 139 + /// Execute and return raw JSON response (caller owns memory) 140 + fn executeRaw(self: *Client, sql: []const u8, args: []const []const u8) ![]const u8 { 141 + self.mutex.lock(); 142 + defer self.mutex.unlock(); 143 + 144 + var url_buf: [URL_BUF_SIZE]u8 = undefined; 145 + const url = std.fmt.bufPrint(&url_buf, "https://{s}/v2/pipeline", .{self.url}) catch 146 + return error.UrlTooLong; 147 + 148 + // build request body 149 + const body = try self.buildRequestBody(sql, args); 150 + defer self.allocator.free(body); 151 + 152 + var auth_buf: [AUTH_BUF_SIZE]u8 = undefined; 153 + const auth = std.fmt.bufPrint(&auth_buf, "Bearer {s}", .{self.token}) catch 154 + return error.AuthTooLong; 155 + 156 + var client: http.Client = .{ .allocator = self.allocator }; 157 + defer client.deinit(); 158 + 159 + var response_body: std.Io.Writer.Allocating = .init(self.allocator); 160 + errdefer response_body.deinit(); 161 + 162 + const res = client.fetch(.{ 163 + .location = .{ .url = url }, 164 + .method = .POST, 165 + .headers = .{ 166 + .content_type = .{ .override = "application/json" }, 167 + .authorization = .{ .override = auth }, 168 + }, 169 + .payload = body, 170 + .response_writer = &response_body.writer, 171 + }) catch |err| { 172 + std.debug.print("turso request failed: {}\n", .{err}); 173 + return error.HttpError; 174 + }; 175 + 176 + if (res.status != .ok) { 177 + std.debug.print("turso error: {}\n", .{res.status}); 178 + return error.TursoError; 179 + } 180 + 181 + return try response_body.toOwnedSlice(); 182 + } 183 + 184 + fn buildRequestBody(self: *Client, sql: []const u8, args: []const []const u8) ![]const u8 { 185 + var body: std.Io.Writer.Allocating = .init(self.allocator); 186 + errdefer body.deinit(); 187 + 188 + var jw: json.Stringify = .{ .writer = &body.writer }; 189 + 190 + try jw.beginObject(); 191 + try jw.objectField("requests"); 192 + try jw.beginArray(); 193 + 194 + // execute statement 195 + try jw.beginObject(); 196 + try jw.objectField("type"); 197 + try jw.write("execute"); 198 + try jw.objectField("stmt"); 199 + try jw.beginObject(); 200 + try jw.objectField("sql"); 201 + try jw.write(sql); 202 + 203 + if (args.len > 0) { 204 + try jw.objectField("args"); 205 + try jw.beginArray(); 206 + for (args) |arg| { 207 + try jw.beginObject(); 208 + try jw.objectField("type"); 209 + try jw.write("text"); 210 + try jw.objectField("value"); 211 + try jw.write(arg); 212 + try jw.endObject(); 213 + } 214 + try jw.endArray(); 215 + } 216 + 217 + try jw.endObject(); // stmt 218 + try jw.endObject(); // execute request 219 + 220 + // close statement 221 + try jw.beginObject(); 222 + try jw.objectField("type"); 223 + try jw.write("close"); 224 + try jw.endObject(); 225 + 226 + try jw.endArray(); // requests 227 + try jw.endObject(); // root 228 + 229 + return try body.toOwnedSlice(); 230 + } 231 + };

+1 -1

backend/src/http.zig

··· 2 2 const net = std.net; 3 3 const http = std.http; 4 4 const mem = std.mem; 5 - const db = @import("db.zig"); 5 + const db = @import("db/mod.zig"); 6 6 7 7 const HTTP_BUF_SIZE = 8192; 8 8 const QUERY_PARAM_BUF_SIZE = 64;

+1 -1

backend/src/main.zig

··· 2 2 const net = std.net; 3 3 const posix = std.posix; 4 4 const Thread = std.Thread; 5 - const db = @import("db.zig"); 5 + const db = @import("db/mod.zig"); 6 6 const http_server = @import("http.zig"); 7 7 const tap = @import("tap.zig"); 8 8

+1 -1

backend/src/tap.zig

··· 4 4 const posix = std.posix; 5 5 const Allocator = mem.Allocator; 6 6 const websocket = @import("websocket"); 7 - const db = @import("db.zig"); 7 + const db = @import("db/mod.zig"); 8 8 9 9 const DOCUMENT_COLLECTION = "pub.leaflet.document"; 10 10 const PUBLICATION_COLLECTION = "pub.leaflet.publication";

Configure Feed

Configure Feed