Minimal SQLite key-value store for OCaml
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

Replace hand-rolled CREATE TABLE parser with menhir+sedlex

- parser.mly: conflict-free LR(1) grammar; handles structure (parens,
commas, table-vs-column dispatch)
- lexer.ml: sedlex tokenizer with case-preserving keyword tokens,
4 quoting styles, SQL comments
- ast.ml: column body classification (type affinity + constraints)
done in OCaml, not the grammar

Also fixes:
- CONSTRAINT name UNIQUE(...) now enforced (named table constraints)
- NULL columns skip UNIQUE checks per SQL standard (multiple NULLs
allowed under UNIQUE)

+520 -277
+4
dune-project
··· 1 1 (lang dune 3.21) 2 + (using menhir 3.0) 2 3 3 4 (name sqlite) 4 5 ··· 24 25 (cmdliner (>= 1.3)) 25 26 (vlog (>= 0.1)) 26 27 (tty (>= 0.1)) 28 + (menhir (>= 20230608)) 29 + (menhirLib (>= 20230608)) 30 + (sedlex (>= 3.0)) 27 31 (alcotest :with-test) 28 32 (crowbar :with-test)))
+89
lib/ast.ml
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2025 Thomas Gazagnaire. All rights reserved. 3 + SPDX-License-Identifier: MIT 4 + ---------------------------------------------------------------------------*) 5 + 6 + type col_token = 7 + | Tok_word of string 8 + | Tok_number of string 9 + | Tok_parens of col_token list 10 + 11 + type table_constraint = 12 + | Tbl_unique of string list 13 + | Tbl_primary_key of string list 14 + | Tbl_other 15 + 16 + type column_def = { 17 + name : string; 18 + affinity : string; 19 + is_rowid_alias : bool; 20 + has_unique : bool; 21 + } 22 + 23 + type create_table = { 24 + tbl_name : string; 25 + columns : column_def list; 26 + table_constraints : table_constraint list; 27 + } 28 + 29 + (* Column constraint keywords — when we see one of these after the column 30 + name and type, everything that follows is constraints, not type. *) 31 + let constraint_kw = function 32 + | "PRIMARY" | "NOT" | "UNIQUE" | "DEFAULT" | "CHECK" | "REFERENCES" 33 + | "COLLATE" | "GENERATED" | "AUTOINCREMENT" | "CONSTRAINT" | "ON" | "ASC" 34 + | "DESC" -> 35 + true 36 + | _ -> false 37 + 38 + (* Classify a column's token stream into type affinity + constraints. *) 39 + let classify_column name tokens = 40 + (* Split tokens into type part and constraint part. 41 + Type tokens: everything before the first constraint keyword. 42 + Constraint tokens: everything from the first constraint keyword on. *) 43 + let rec split_type acc = function 44 + | Tok_word w :: rest when constraint_kw (String.uppercase_ascii w) -> 45 + (List.rev acc, Tok_word w :: rest) 46 + | (Tok_word _ as t) :: rest -> split_type (t :: acc) rest 47 + | (Tok_number _ as t) :: rest -> split_type (t :: acc) rest 48 + | (Tok_parens _ as t) :: rest -> 49 + (* Parenthesized content after a type name is type params like 50 + DECIMAL(10,2). But only if we've seen a type name. *) 51 + if acc = [] then (List.rev acc, Tok_parens [] :: rest) 52 + else split_type (t :: acc) rest 53 + | [] -> (List.rev acc, []) 54 + in 55 + let type_tokens, constraint_tokens = split_type [] tokens in 56 + (* Build affinity string from type tokens. 57 + Parenthesized params attach to the preceding name without space: 58 + DECIMAL(10,2) not DECIMAL (10,2). *) 59 + let affinity = 60 + let rec to_string = function 61 + | Tok_word s -> s 62 + | Tok_number s -> s 63 + | Tok_parens inner -> 64 + "(" ^ String.concat "" (List.map to_string inner) ^ ")" 65 + in 66 + let rec join = function 67 + | [] -> "" 68 + | [ t ] -> to_string t 69 + | t :: (Tok_parens _ :: _ as rest) -> to_string t ^ join rest 70 + | t :: rest -> to_string t ^ " " ^ join rest 71 + in 72 + join type_tokens 73 + in 74 + (* Scan constraint tokens for PRIMARY KEY and UNIQUE *) 75 + let words = 76 + List.filter_map 77 + (function Tok_word w -> Some (String.uppercase_ascii w) | _ -> None) 78 + constraint_tokens 79 + in 80 + let rec has_pk = function 81 + | "PRIMARY" :: "KEY" :: _ -> true 82 + | _ :: rest -> has_pk rest 83 + | [] -> false 84 + in 85 + let has_unique = List.mem "UNIQUE" words in 86 + let is_rowid_alias = 87 + String.uppercase_ascii affinity = "INTEGER" && has_pk words 88 + in 89 + { name; affinity; is_rowid_alias; has_unique }
+7 -1
lib/dune
··· 1 1 (library 2 2 (name sqlite) 3 3 (public_name sqlite) 4 - (libraries btree eio fmt)) 4 + (libraries btree eio fmt sedlex menhirLib) 5 + (preprocess 6 + (pps sedlex.ppx))) 7 + 8 + (menhir 9 + (modules parser) 10 + (flags --table))
+123
lib/lexer.ml
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2025 Thomas Gazagnaire. All rights reserved. 3 + SPDX-License-Identifier: MIT 4 + 5 + Sedlex lexer for SQLite CREATE TABLE statements. 6 + ---------------------------------------------------------------------------*) 7 + 8 + exception Error of string 9 + 10 + (* Keyword constructors — all carry the original text for case preservation *) 11 + let keywords = 12 + [ 13 + ("CREATE", fun s -> Parser.CREATE s); 14 + ("TABLE", fun s -> Parser.TABLE s); 15 + ("IF", fun s -> Parser.IF s); 16 + ("NOT", fun s -> Parser.NOT s); 17 + ("EXISTS", fun s -> Parser.EXISTS s); 18 + ("PRIMARY", fun s -> Parser.PRIMARY s); 19 + ("KEY", fun s -> Parser.KEY s); 20 + ("UNIQUE", fun s -> Parser.UNIQUE s); 21 + ("NULL", fun s -> Parser.NULL s); 22 + ("DEFAULT", fun s -> Parser.DEFAULT s); 23 + ("CHECK", fun s -> Parser.CHECK s); 24 + ("REFERENCES", fun s -> Parser.REFERENCES s); 25 + ("COLLATE", fun s -> Parser.COLLATE s); 26 + ("GENERATED", fun s -> Parser.GENERATED s); 27 + ("ALWAYS", fun s -> Parser.ALWAYS s); 28 + ("AS", fun s -> Parser.AS s); 29 + ("AUTOINCREMENT", fun s -> Parser.AUTOINCREMENT s); 30 + ("FOREIGN", fun s -> Parser.FOREIGN s); 31 + ("CONSTRAINT", fun s -> Parser.CONSTRAINT s); 32 + ("ON", fun s -> Parser.ON s); 33 + ("ASC", fun s -> Parser.ASC s); 34 + ("DESC", fun s -> Parser.DESC s); 35 + ] 36 + 37 + let classify s = 38 + match List.assoc_opt (String.uppercase_ascii s) keywords with 39 + | Some mk -> mk s 40 + | None -> Parser.IDENT s 41 + 42 + (* sedlex patterns *) 43 + let digit = [%sedlex.regexp? '0' .. '9'] 44 + let alpha = [%sedlex.regexp? 'a' .. 'z' | 'A' .. 'Z'] 45 + let ident_start = [%sedlex.regexp? alpha | '_'] 46 + let ident_char = [%sedlex.regexp? alpha | digit | '_'] 47 + let ident = [%sedlex.regexp? ident_start, Star ident_char] 48 + 49 + let number = 50 + [%sedlex.regexp? Opt ('+' | '-'), Plus digit, Opt ('.', Plus digit)] 51 + 52 + let ws = [%sedlex.regexp? Plus (' ' | '\t' | '\n' | '\r')] 53 + 54 + (* Quoted identifier/string readers *) 55 + let read_until buf stop lexbuf = 56 + let rec loop () = 57 + match%sedlex lexbuf with 58 + | any -> 59 + let s = Sedlexing.Utf8.lexeme lexbuf in 60 + if s = stop then Buffer.contents buf 61 + else ( 62 + Buffer.add_string buf s; 63 + loop ()) 64 + | _ -> raise (Error ("unterminated " ^ stop ^ "-quoted identifier")) 65 + in 66 + loop () 67 + 68 + let read_double_quoted buf lexbuf = read_until buf "\"" lexbuf 69 + let read_bracket_quoted buf lexbuf = read_until buf "]" lexbuf 70 + let read_backtick_quoted buf lexbuf = read_until buf "`" lexbuf 71 + 72 + let read_single_quoted buf lexbuf = 73 + let rec loop () = 74 + match%sedlex lexbuf with 75 + | "''" -> 76 + Buffer.add_char buf '\''; 77 + loop () 78 + | '\'' -> Buffer.contents buf 79 + | any -> 80 + Buffer.add_string buf (Sedlexing.Utf8.lexeme lexbuf); 81 + loop () 82 + | _ -> raise (Error "unterminated string literal") 83 + in 84 + loop () 85 + 86 + let rec token lexbuf = 87 + match%sedlex lexbuf with 88 + | ws -> token lexbuf 89 + | "--", Star (Compl '\n') -> token lexbuf 90 + | '(' -> Parser.LPAREN 91 + | ')' -> Parser.RPAREN 92 + | ',' -> Parser.COMMA 93 + | number -> Parser.NUMBER (Sedlexing.Utf8.lexeme lexbuf) 94 + | ident -> classify (Sedlexing.Utf8.lexeme lexbuf) 95 + | '"' -> Parser.IDENT (read_double_quoted (Buffer.create 32) lexbuf) 96 + | '[' -> Parser.IDENT (read_bracket_quoted (Buffer.create 32) lexbuf) 97 + | '`' -> Parser.IDENT (read_backtick_quoted (Buffer.create 32) lexbuf) 98 + | '\'' -> 99 + let _s = read_single_quoted (Buffer.create 32) lexbuf in 100 + (* String literals in DDL are typically DEFAULT values; 101 + treat them as numbers (opaque values) in the token stream *) 102 + Parser.NUMBER ("'" ^ _s ^ "'") 103 + | eof -> Parser.EOF 104 + | any -> 105 + (* Pass through operators, signs, etc. as NUMBER tokens *) 106 + Parser.NUMBER (Sedlexing.Utf8.lexeme lexbuf) 107 + | _ -> raise (Error "unexpected end of input") 108 + 109 + (* Bridge sedlex with menhir via MenhirLib.Convert *) 110 + let parse sql = 111 + let lexbuf = Sedlexing.Utf8.from_string sql in 112 + let provider () = 113 + let tok = token lexbuf in 114 + let s, e = Sedlexing.lexing_positions lexbuf in 115 + (tok, s, e) 116 + in 117 + try 118 + Ok 119 + (MenhirLib.Convert.Simplified.traditional2revised Parser.create_table 120 + provider) 121 + with 122 + | Parser.Error -> Error "parse error" 123 + | Error msg -> Error msg
+161
lib/parser.mly
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2025 Thomas Gazagnaire. All rights reserved. 3 + SPDX-License-Identifier: MIT 4 + 5 + Menhir grammar for SQLite CREATE TABLE statements. 6 + 7 + Column bodies are collected as token lists and classified in 8 + Ast.classify_column — the grammar handles structure (parens, 9 + commas, table-vs-column dispatch), OCaml handles semantics. 10 + ---------------------------------------------------------------------------*) 11 + 12 + %{ open Ast %} 13 + 14 + (* Payload tokens *) 15 + %token <string> IDENT 16 + %token <string> NUMBER 17 + 18 + (* SQL keywords — all carry original text for case preservation *) 19 + %token <string> CREATE TABLE IF NOT EXISTS 20 + %token <string> PRIMARY KEY UNIQUE NULL DEFAULT CHECK REFERENCES 21 + %token <string> COLLATE GENERATED ALWAYS AS AUTOINCREMENT 22 + %token <string> FOREIGN CONSTRAINT ON ASC DESC 23 + 24 + (* Delimiters *) 25 + %token LPAREN RPAREN COMMA EOF 26 + 27 + %start <Ast.create_table> create_table 28 + 29 + %% 30 + 31 + create_table: 32 + | CREATE; TABLE; if_not_exists; name = ident; 33 + LPAREN; defs = separated_nonempty_list(COMMA, column_or_constraint); RPAREN; 34 + EOF 35 + { let columns, tcs = 36 + List.fold_left (fun (cs, ts) -> function 37 + | `Col c -> (c :: cs, ts) 38 + | `Tbl t -> (cs, t :: ts)) 39 + ([], []) defs 40 + in 41 + { tbl_name = name; 42 + columns = List.rev columns; 43 + table_constraints = List.rev tcs } } 44 + 45 + if_not_exists: 46 + | IF; NOT; EXISTS {} 47 + | {} 48 + 49 + (* Dispatch: table constraints start with PRIMARY, UNIQUE, FOREIGN, 50 + CHECK, or CONSTRAINT. Everything else is a column definition. *) 51 + column_or_constraint: 52 + | t = table_constraint { `Tbl t } 53 + | name = col_name; body = col_body { `Col (classify_column name body) } 54 + 55 + (* Column names: IDENT plus keywords that never start a table constraint *) 56 + col_name: 57 + | s = IDENT { s } 58 + | s = TABLE { s } 59 + | s = CREATE { s } 60 + | s = KEY { s } 61 + | s = NOT { s } 62 + | s = NULL { s } 63 + | s = DEFAULT { s } 64 + | s = REFERENCES { s } 65 + | s = COLLATE { s } 66 + | s = GENERATED { s } 67 + | s = ALWAYS { s } 68 + | s = AS { s } 69 + | s = AUTOINCREMENT { s } 70 + | s = EXISTS { s } 71 + | s = IF { s } 72 + | s = ON { s } 73 + | s = ASC { s } 74 + | s = DESC { s } 75 + 76 + (* ── Table-level constraints ─────────────────────────────────── *) 77 + 78 + table_constraint: 79 + | UNIQUE; LPAREN; cs = names; RPAREN 80 + { Tbl_unique cs } 81 + | PRIMARY; KEY; LPAREN; cs = names; RPAREN 82 + { Tbl_primary_key cs } 83 + | CONSTRAINT; ident; UNIQUE; LPAREN; cs = names; RPAREN 84 + { Tbl_unique cs } 85 + | CONSTRAINT; ident; PRIMARY; KEY; LPAREN; cs = names; RPAREN 86 + { Tbl_primary_key cs } 87 + | FOREIGN; KEY; LPAREN; names; RPAREN; references_tail 88 + { Tbl_other } 89 + | CONSTRAINT; ident; FOREIGN; KEY; LPAREN; names; RPAREN; references_tail 90 + { Tbl_other } 91 + | CHECK; LPAREN; skip; RPAREN 92 + { Tbl_other } 93 + | CONSTRAINT; ident; CHECK; LPAREN; skip; RPAREN 94 + { Tbl_other } 95 + 96 + references_tail: 97 + | REFERENCES; ident {} 98 + | REFERENCES; ident; LPAREN; names; RPAREN {} 99 + 100 + names: 101 + | separated_nonempty_list(COMMA, ident) { $1 } 102 + 103 + (* ── Column body: flat token list ────────────────────────────── *) 104 + 105 + col_body: 106 + | list(col_token) { $1 } 107 + 108 + %inline col_token: 109 + | s = IDENT { Tok_word s } 110 + | s = NUMBER { Tok_number s } 111 + | k = keyword { Tok_word k } 112 + | LPAREN; ts = list(paren_token); RPAREN { Tok_parens ts } 113 + 114 + paren_token: 115 + | s = IDENT { Tok_word s } 116 + | s = NUMBER { Tok_number s } 117 + | k = keyword { Tok_word k } 118 + | COMMA { Tok_word "," } 119 + | LPAREN; ts = list(paren_token); RPAREN { Tok_parens ts } 120 + 121 + (* ── Keywords usable as column body tokens ───────────────────── *) 122 + 123 + %inline keyword: 124 + | s = PRIMARY { s } 125 + | s = KEY { s } 126 + | s = NOT { s } 127 + | s = NULL { s } 128 + | s = UNIQUE { s } 129 + | s = DEFAULT { s } 130 + | s = CHECK { s } 131 + | s = REFERENCES { s } 132 + | s = COLLATE { s } 133 + | s = GENERATED { s } 134 + | s = ALWAYS { s } 135 + | s = AS { s } 136 + | s = AUTOINCREMENT { s } 137 + | s = CONSTRAINT { s } 138 + | s = EXISTS { s } 139 + | s = CREATE { s } 140 + | s = TABLE { s } 141 + | s = IF { s } 142 + | s = ON { s } 143 + | s = ASC { s } 144 + | s = DESC { s } 145 + | s = FOREIGN { s } 146 + 147 + (* ── Skip balanced parens (CHECK expressions) ────────────────── *) 148 + 149 + skip: 150 + | list(skip_atom) {} 151 + 152 + skip_atom: 153 + | IDENT {} | NUMBER {} 154 + | keyword {} 155 + | COMMA {} 156 + | LPAREN; skip; RPAREN {} 157 + 158 + (* ── Identifiers (including quoted) ──────────────────────────── *) 159 + 160 + ident: 161 + | s = IDENT { s }
+66 -276
lib/sqlite.ml
··· 70 70 let names = List.map (fun gt -> gt.g_schema.tbl_name) t.all_tables in 71 71 Fmt.pf ppf "sqlite(%a)" Fmt.(list ~sep:(any ",") string) names 72 72 73 - (* CREATE TABLE parser *) 74 - 75 - let is_space = function ' ' | '\t' | '\n' | '\r' -> true | _ -> false 76 - 77 - (* Split a string by commas, respecting nested parentheses *) 78 - let split_respecting_parens s = 79 - let len = String.length s in 80 - let buf = Buffer.create 64 in 81 - let parts = ref [] in 82 - let depth = ref 0 in 83 - for i = 0 to len - 1 do 84 - match s.[i] with 85 - | '(' -> 86 - incr depth; 87 - Buffer.add_char buf '(' 88 - | ')' -> 89 - decr depth; 90 - Buffer.add_char buf ')' 91 - | ',' when !depth = 0 -> 92 - parts := String.trim (Buffer.contents buf) :: !parts; 93 - Buffer.clear buf 94 - | c -> Buffer.add_char buf c 95 - done; 96 - let last = String.trim (Buffer.contents buf) in 97 - if last <> "" then parts := last :: !parts; 98 - List.rev !parts 99 - 100 - (* Find the position of the matching closing paren *) 101 - let matching_paren s start = 102 - let len = String.length s in 103 - let rec loop i depth = 104 - if i >= len then None 105 - else 106 - match s.[i] with 107 - | '(' -> loop (i + 1) (depth + 1) 108 - | ')' -> if depth = 0 then Some i else loop (i + 1) (depth - 1) 109 - | '\'' -> 110 - (* Skip single-quoted string literal *) 111 - let rec skip j = 112 - if j >= len then loop j 0 113 - else if s.[j] = '\'' then 114 - if j + 1 < len && s.[j + 1] = '\'' then skip (j + 2) 115 - else loop (j + 1) depth 116 - else skip (j + 1) 117 - in 118 - skip (i + 1) 119 - | '"' -> 120 - let rec skip j = 121 - if j >= len then loop j 0 122 - else if s.[j] = '"' then loop (j + 1) depth 123 - else skip (j + 1) 124 - in 125 - skip (i + 1) 126 - | _ -> loop (i + 1) depth 127 - in 128 - loop (start + 1) 0 129 - 130 - (* Consume parenthesized content into buf, advancing i past the closing paren *) 131 - let add_paren_content buf s i len = 132 - Buffer.add_char buf '('; 133 - let depth = ref 1 in 134 - incr i; 135 - while !i < len && !depth > 0 do 136 - (match s.[!i] with '(' -> incr depth | ')' -> decr depth | _ -> ()); 137 - Buffer.add_char buf s.[!i]; 138 - incr i 139 - done 140 - 141 - (* Tokenize a column definition into words, handling quoted identifiers 142 - and parenthesized type parameters like DECIMAL(10,2) *) 143 - let tokenize s = 144 - let len = String.length s in 145 - let buf = Buffer.create 16 in 146 - let tokens = ref [] in 147 - let flush () = 148 - if Buffer.length buf > 0 then begin 149 - tokens := Buffer.contents buf :: !tokens; 150 - Buffer.clear buf 151 - end 152 - in 153 - let i = ref 0 in 154 - while !i < len do 155 - match s.[!i] with 156 - | c when is_space c -> 157 - flush (); 158 - incr i 159 - | '(' -> 160 - (* Include parenthesized content as part of current token *) 161 - add_paren_content buf s i len 162 - | '"' -> 163 - (* Double-quoted identifier: strip quotes *) 164 - incr i; 165 - while !i < len && s.[!i] <> '"' do 166 - Buffer.add_char buf s.[!i]; 167 - incr i 168 - done; 169 - if !i < len then incr i 170 - | '[' -> 171 - (* Bracket-quoted identifier: strip brackets *) 172 - incr i; 173 - while !i < len && s.[!i] <> ']' do 174 - Buffer.add_char buf s.[!i]; 175 - incr i 176 - done; 177 - if !i < len then incr i 178 - | '`' -> 179 - (* Backtick-quoted identifier: strip backticks *) 180 - incr i; 181 - while !i < len && s.[!i] <> '`' do 182 - Buffer.add_char buf s.[!i]; 183 - incr i 184 - done; 185 - if !i < len then incr i 186 - | c -> 187 - Buffer.add_char buf c; 188 - incr i 189 - done; 190 - flush (); 191 - List.rev !tokens 192 - 193 - (* Keywords that start constraint clauses in column definitions *) 194 - let constraint_keywords = 195 - [ 196 - "PRIMARY"; 197 - "NOT"; 198 - "UNIQUE"; 199 - "DEFAULT"; 200 - "CHECK"; 201 - "REFERENCES"; 202 - "COLLATE"; 203 - "GENERATED"; 204 - "AUTOINCREMENT"; 205 - "ASC"; 206 - "DESC"; 207 - "ON"; 208 - "CONSTRAINT"; 209 - ] 210 - 211 - (* Check if a column def is a table-level constraint *) 212 - let is_table_constraint s = 213 - let upper = String.uppercase_ascii (String.trim s) in 214 - let starts_with prefix = 215 - String.length upper >= String.length prefix 216 - && String.sub upper 0 (String.length prefix) = prefix 217 - in 218 - starts_with "PRIMARY KEY(" || starts_with "PRIMARY KEY " 219 - || starts_with "UNIQUE(" || starts_with "UNIQUE " || starts_with "FOREIGN KEY" 220 - || starts_with "CONSTRAINT " || starts_with "CHECK(" || starts_with "CHECK " 221 - 222 - let parse_column_def s = 223 - if is_table_constraint s then None 224 - else 225 - let tokens = tokenize s in 226 - match tokens with 227 - | [] -> None 228 - | name :: rest -> 229 - (* Collect type tokens until we hit a constraint keyword *) 230 - let rec collect_type acc = function 231 - | [] -> (List.rev acc, []) 232 - | tok :: _ as all 233 - when List.mem (String.uppercase_ascii tok) constraint_keywords -> 234 - (List.rev acc, all) 235 - | tok :: tl -> collect_type (tok :: acc) tl 236 - in 237 - let type_tokens, constraint_tokens = collect_type [] rest in 238 - let affinity = String.concat " " type_tokens in 239 - (* INTEGER PRIMARY KEY is a rowid alias *) 240 - let is_rowid_alias = 241 - String.uppercase_ascii affinity = "INTEGER" 242 - && 243 - let rec has_pk = function 244 - | "PRIMARY" :: "KEY" :: _ -> true 245 - | _ :: tl -> has_pk tl 246 - | [] -> false 247 - in 248 - has_pk (List.map String.uppercase_ascii constraint_tokens) 249 - in 250 - Some 251 - { 252 - col_name = name; 253 - col_affinity = affinity; 254 - col_is_rowid_alias = is_rowid_alias; 255 - } 73 + (* CREATE TABLE parser — delegates to Lexer.parse (menhir + sedlex) *) 256 74 257 - (* Extract column names from a table-level UNIQUE(...) constraint *) 258 - let parse_unique_constraint s = 259 - let upper = String.uppercase_ascii (String.trim s) in 260 - let starts_with prefix = 261 - String.length upper >= String.length prefix 262 - && String.sub upper 0 (String.length prefix) = prefix 263 - in 264 - if starts_with "UNIQUE(" || starts_with "UNIQUE " then 265 - match String.index_opt s '(' with 266 - | None -> None 267 - | Some start -> ( 268 - match matching_paren s start with 269 - | None -> None 270 - | Some end_ -> 271 - let inner = String.sub s (start + 1) (end_ - start - 1) in 272 - let cols = String.split_on_char ',' inner |> List.map String.trim in 273 - Some cols) 274 - else None 275 - 276 - (* Check if a column definition has a UNIQUE constraint *) 277 - let column_has_unique s = 278 - if is_table_constraint s then false 279 - else 280 - let tokens = tokenize s in 281 - match tokens with 282 - | [] -> false 283 - | _ :: rest -> 284 - List.exists (fun t -> String.uppercase_ascii t = "UNIQUE") rest 75 + let parse_sql sql = 76 + match Lexer.parse sql with Ok ct -> Some ct | Error _ -> None 285 77 286 78 let parse_create_table sql = 287 - match String.index_opt sql '(' with 79 + match parse_sql sql with 288 80 | None -> [] 289 - | Some start -> ( 290 - match matching_paren sql start with 291 - | None -> [] 292 - | Some body_end -> 293 - let body = String.sub sql (start + 1) (body_end - start - 1) in 294 - let parts = split_respecting_parens body in 295 - List.filter_map parse_column_def parts) 81 + | Some ct -> 82 + List.map 83 + (fun (c : Ast.column_def) -> 84 + { 85 + col_name = c.name; 86 + col_affinity = c.affinity; 87 + col_is_rowid_alias = c.is_rowid_alias; 88 + }) 89 + ct.columns 296 90 297 - (* Parse all unique constraints from a CREATE TABLE statement. 298 - Returns a list of column-name lists. *) 299 91 let parse_unique_constraints sql columns = 300 - match String.index_opt sql '(' with 92 + match parse_sql sql with 301 93 | None -> [] 302 - | Some start -> ( 303 - match matching_paren sql start with 304 - | None -> [] 305 - | Some body_end -> 306 - let body = String.sub sql (start + 1) (body_end - start - 1) in 307 - let parts = split_respecting_parens body in 308 - (* Table-level UNIQUE(...) constraints *) 309 - let table_level = List.filter_map parse_unique_constraint parts in 310 - (* Column-level UNIQUE constraints *) 311 - let column_level = 312 - List.filter_map 313 - (fun part -> 314 - if column_has_unique part then 315 - match tokenize part with 316 - | name :: _ -> Some [ name ] 317 - | [] -> None 318 - else None) 319 - parts 320 - in 321 - (* Deduplicate: don't add column-level if already in table-level *) 322 - let all = table_level @ column_level in 323 - (* Resolve column names to indices *) 324 - let col_names = List.map (fun c -> c.col_name) columns in 325 - List.filter_map 326 - (fun constraint_cols -> 327 - let indices = 328 - List.filter_map 329 - (fun name -> 330 - let rec find_idx i = function 331 - | [] -> None 332 - | n :: _ when n = name -> Some i 333 - | _ :: rest -> find_idx (i + 1) rest 334 - in 335 - find_idx 0 col_names) 336 - constraint_cols 337 - in 338 - if List.length indices = List.length constraint_cols then 339 - let name = String.concat ", " constraint_cols in 340 - Some (indices, name) 341 - else None) 342 - all) 94 + | Some ct -> 95 + let col_names = List.map (fun c -> c.col_name) columns in 96 + let find_idx name = 97 + let rec go i = function 98 + | [] -> None 99 + | n :: _ when n = name -> Some i 100 + | _ :: rest -> go (i + 1) rest 101 + in 102 + go 0 col_names 103 + in 104 + let resolve_cols names = 105 + let indices = List.filter_map find_idx names in 106 + if List.length indices = List.length names then 107 + Some (indices, String.concat ", " names) 108 + else None 109 + in 110 + (* Table-level UNIQUE constraints *) 111 + let table_level = 112 + List.filter_map 113 + (function Ast.Tbl_unique cols -> resolve_cols cols | _ -> None) 114 + ct.table_constraints 115 + in 116 + (* Column-level UNIQUE constraints *) 117 + let column_level = 118 + List.filter_map 119 + (fun (c : Ast.column_def) -> 120 + if c.has_unique then resolve_cols [ c.name ] else None) 121 + ct.columns 122 + in 123 + table_level @ column_level 343 124 344 125 (* Find the index of the rowid alias column, if any *) 345 126 let rowid_alias_index columns = ··· 371 152 else v) 372 153 values 373 154 374 - (* Encode the indexed column values as a Record string for Btree.Index *) 155 + (* Encode the indexed column values as a Record string for Btree.Index. 156 + Returns None if any indexed column is NULL — per SQL semantics, 157 + NULL is never equal to NULL, so UNIQUE constraints allow multiple NULLs. *) 375 158 let encode_index_key ui values = 376 159 let key_values = 377 160 List.map ··· 380 163 else Btree.Record.Vnull) 381 164 ui.ui_columns 382 165 in 383 - Btree.Record.encode key_values 166 + if List.exists (fun v -> v = Btree.Record.Vnull) key_values then None 167 + else Some (Btree.Record.encode key_values) 384 168 385 169 (* Create persistent unique indexes for a table *) 386 170 let make_unique_indexes pager ~tbl_name constraints = ··· 404 188 let values = fixup_values ~schema ~rowid values in 405 189 List.iter 406 190 (fun ui -> 407 - let key = encode_index_key ui values in 408 - Btree.Index.insert ui.ui_btree key) 191 + match encode_index_key ui values with 192 + | None -> () 193 + | Some key -> Btree.Index.insert ui.ui_btree key) 409 194 indexes) 410 195 411 196 (* Standard kv table schema *) ··· 687 472 Btree.Table.iter btree (fun rowid payload -> 688 473 let values = Btree.Record.decode payload in 689 474 let values = fixup_values ~schema ~rowid values in 690 - let key = encode_index_key ui values in 691 - Btree.Index.insert ui.ui_btree key); 475 + match encode_index_key ui values with 476 + | None -> () 477 + | Some key -> Btree.Index.insert ui.ui_btree key); 692 478 ui) 693 479 constraints 694 480 ··· 923 709 let full_values = fixup_values ~schema:gt.g_schema ~rowid values in 924 710 List.iter 925 711 (fun ui -> 926 - let key = encode_index_key ui full_values in 927 - if Btree.Index.mem ui.ui_btree key then 928 - raise (Unique_violation ui.ui_name)) 712 + match encode_index_key ui full_values with 713 + | None -> () (* NULL columns — UNIQUE allows multiple NULLs *) 714 + | Some key -> 715 + if Btree.Index.mem ui.ui_btree key then 716 + raise (Unique_violation ui.ui_name)) 929 717 gt.g_unique_indexes; 930 718 let record = Btree.Record.encode record_values in 931 719 Btree.Table.insert gt.g_btree ~rowid record; 932 720 (* Update persistent unique indexes *) 933 721 List.iter 934 722 (fun ui -> 935 - let key = encode_index_key ui full_values in 936 - Btree.Index.insert ui.ui_btree key) 723 + match encode_index_key ui full_values with 724 + | None -> () 725 + | Some key -> Btree.Index.insert ui.ui_btree key) 937 726 gt.g_unique_indexes; 938 727 rowid 939 728 ··· 947 736 let values = fixup_values ~schema:gt.g_schema ~rowid values in 948 737 List.iter 949 738 (fun ui -> 950 - let key = encode_index_key ui values in 951 - Btree.Index.delete ui.ui_btree key) 739 + match encode_index_key ui values with 740 + | None -> () 741 + | Some key -> Btree.Index.delete ui.ui_btree key) 952 742 gt.g_unique_indexes); 953 743 Btree.Table.delete gt.g_btree rowid 954 744
+3
sqlite.opam
··· 19 19 "cmdliner" {>= "1.3"} 20 20 "vlog" {>= "0.1"} 21 21 "tty" {>= "0.1"} 22 + "menhir" {>= "20230608"} 23 + "menhirLib" {>= "20230608"} 24 + "sedlex" {>= "3.0"} 22 25 "alcotest" {with-test} 23 26 "crowbar" {with-test} 24 27 "odoc" {with-doc}
+67
test/test_sqlite.ml
··· 1091 1091 let rows = Sqlite.read_table db "t" in 1092 1092 Alcotest.(check int) "two rows" 2 (List.length rows) 1093 1093 1094 + let test_unique_allows_multiple_nulls () = 1095 + with_temp_db @@ fun _fs db -> 1096 + Sqlite.create_table db 1097 + ~sql:"CREATE TABLE t (id INTEGER PRIMARY KEY, email TEXT UNIQUE)"; 1098 + (* Two NULLs should both succeed — NULL is never equal to NULL *) 1099 + let _ = Sqlite.insert db ~table:"t" [ Sqlite.Vnull; Sqlite.Vnull ] in 1100 + let _ = Sqlite.insert db ~table:"t" [ Sqlite.Vnull; Sqlite.Vnull ] in 1101 + let rows = Sqlite.read_table db "t" in 1102 + Alcotest.(check int) "two rows with NULL" 2 (List.length rows); 1103 + (* But a non-NULL duplicate still fails *) 1104 + let _ = 1105 + Sqlite.insert db ~table:"t" [ Sqlite.Vnull; Sqlite.Vtext "a@b.com" ] 1106 + in 1107 + try 1108 + let _ = 1109 + Sqlite.insert db ~table:"t" [ Sqlite.Vnull; Sqlite.Vtext "a@b.com" ] 1110 + in 1111 + Alcotest.fail "should have raised Unique_violation" 1112 + with Sqlite.Unique_violation _ -> () 1113 + 1114 + let test_unique_composite_null () = 1115 + with_temp_db @@ fun _fs db -> 1116 + Sqlite.create_table db 1117 + ~sql:"CREATE TABLE t (a TEXT, b TEXT, c TEXT, UNIQUE(a, b))"; 1118 + (* If any column in the composite key is NULL, duplicates are allowed *) 1119 + let _ = 1120 + Sqlite.insert db ~table:"t" 1121 + [ Sqlite.Vnull; Sqlite.Vtext "y"; Sqlite.Vtext "z1" ] 1122 + in 1123 + let _ = 1124 + Sqlite.insert db ~table:"t" 1125 + [ Sqlite.Vnull; Sqlite.Vtext "y"; Sqlite.Vtext "z2" ] 1126 + in 1127 + let rows = Sqlite.read_table db "t" in 1128 + Alcotest.(check int) "both rows with partial NULL" 2 (List.length rows) 1129 + 1094 1130 let test_unique_persists () = 1095 1131 Eio_main.run @@ fun env -> 1096 1132 let cwd = Eio.Stdenv.cwd env in ··· 1118 1154 Alcotest.fail "should have raised Unique_violation after reopen" 1119 1155 with Sqlite.Unique_violation _ -> ()); 1120 1156 Sqlite.close db) 1157 + 1158 + let test_unique_named_constraint () = 1159 + with_temp_db @@ fun _fs db -> 1160 + Sqlite.create_table db 1161 + ~sql: 1162 + "CREATE TABLE t (id INTEGER PRIMARY KEY, provider TEXT, uid TEXT, \ 1163 + CONSTRAINT uq_identity UNIQUE(provider, uid))"; 1164 + let _ = 1165 + Sqlite.insert db ~table:"t" 1166 + [ Sqlite.Vnull; Sqlite.Vtext "github"; Sqlite.Vtext "123" ] 1167 + in 1168 + (try 1169 + let _ = 1170 + Sqlite.insert db ~table:"t" 1171 + [ Sqlite.Vnull; Sqlite.Vtext "github"; Sqlite.Vtext "123" ] 1172 + in 1173 + Alcotest.fail "should have raised Unique_violation" 1174 + with Sqlite.Unique_violation cols -> 1175 + Alcotest.(check string) "columns" "provider, uid" cols); 1176 + (* Different values should succeed *) 1177 + let _ = 1178 + Sqlite.insert db ~table:"t" 1179 + [ Sqlite.Vnull; Sqlite.Vtext "github"; Sqlite.Vtext "456" ] 1180 + in 1181 + () 1121 1182 1122 1183 let suite = 1123 1184 ( "sqlite", ··· 1245 1306 Alcotest.test_case "unique allows distinct" `Quick 1246 1307 test_unique_allows_distinct; 1247 1308 Alcotest.test_case "unique persists" `Quick test_unique_persists; 1309 + Alcotest.test_case "unique named constraint" `Quick 1310 + test_unique_named_constraint; 1311 + Alcotest.test_case "unique allows multiple NULLs" `Quick 1312 + test_unique_allows_multiple_nulls; 1313 + Alcotest.test_case "unique composite NULL" `Quick 1314 + test_unique_composite_null; 1248 1315 ]; 1249 1316 ] )