Minimal SQLite key-value store for OCaml
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

sqlite: replace sedlex+PPX with ocamllex (E335)

The CREATE TABLE lexer matched on ASCII characters only — punctuation,
keywords, identifiers, numbers, and quoted strings whose body bytes
pass through verbatim. None of sedlex's Unicode classes were used, so
sedlex.ppx was paying for nothing while merlint flagged every
PPX-expanded [__sedlex_state_*] / [__sedlex_partition_*] identifier as
double-underscore module access (~68 spurious E335 hits).

Convert lib/lexer.ml -> lib/lexer.mll. The grammar is unchanged: same
keyword table, same quoted-identifier and single-quoted string readers
(now ocamllex sub-rules), same NUMBER fallback for operators / passthrough
bytes (UTF-8 round-trips because ocamllex's [_] matches any byte).
Drop sedlex, sedlex.ppx, and menhirLib from the library deps; menhir's
traditional API plugs into [Parser.create_table token lexbuf] directly.

+118 -130
+4 -5
lib/dune
··· 1 1 (library 2 2 (name sqlite) 3 3 (public_name sqlite) 4 - (libraries btree eio fmt sedlex menhirLib wal) 5 - (preprocess 6 - (pps sedlex.ppx))) 4 + (libraries btree eio fmt wal)) 5 + 6 + (ocamllex lexer) 7 7 8 8 (menhir 9 - (modules parser) 10 - (flags --table)) 9 + (modules parser))
-123
lib/lexer.ml
··· 1 - (*--------------------------------------------------------------------------- 2 - Copyright (c) 2025 Thomas Gazagnaire. All rights reserved. 3 - SPDX-License-Identifier: MIT 4 - 5 - Sedlex lexer for SQLite CREATE TABLE statements. 6 - ---------------------------------------------------------------------------*) 7 - 8 - exception Error of string 9 - 10 - (* Keyword constructors — all carry the original text for case preservation *) 11 - let keywords = 12 - [ 13 - ("CREATE", fun s -> Parser.CREATE s); 14 - ("TABLE", fun s -> Parser.TABLE s); 15 - ("IF", fun s -> Parser.IF s); 16 - ("NOT", fun s -> Parser.NOT s); 17 - ("EXISTS", fun s -> Parser.EXISTS s); 18 - ("PRIMARY", fun s -> Parser.PRIMARY s); 19 - ("KEY", fun s -> Parser.KEY s); 20 - ("UNIQUE", fun s -> Parser.UNIQUE s); 21 - ("NULL", fun s -> Parser.NULL s); 22 - ("DEFAULT", fun s -> Parser.DEFAULT s); 23 - ("CHECK", fun s -> Parser.CHECK s); 24 - ("REFERENCES", fun s -> Parser.REFERENCES s); 25 - ("COLLATE", fun s -> Parser.COLLATE s); 26 - ("GENERATED", fun s -> Parser.GENERATED s); 27 - ("ALWAYS", fun s -> Parser.ALWAYS s); 28 - ("AS", fun s -> Parser.AS s); 29 - ("AUTOINCREMENT", fun s -> Parser.AUTOINCREMENT s); 30 - ("FOREIGN", fun s -> Parser.FOREIGN s); 31 - ("CONSTRAINT", fun s -> Parser.CONSTRAINT s); 32 - ("ON", fun s -> Parser.ON s); 33 - ("ASC", fun s -> Parser.ASC s); 34 - ("DESC", fun s -> Parser.DESC s); 35 - ] 36 - 37 - let classify s = 38 - match List.assoc_opt (String.uppercase_ascii s) keywords with 39 - | Some mk -> mk s 40 - | None -> Parser.IDENT s 41 - 42 - (* sedlex patterns *) 43 - let digit = [%sedlex.regexp? '0' .. '9'] 44 - let alpha = [%sedlex.regexp? 'a' .. 'z' | 'A' .. 'Z'] 45 - let ident_start = [%sedlex.regexp? alpha | '_'] 46 - let ident_char = [%sedlex.regexp? alpha | digit | '_'] 47 - let ident = [%sedlex.regexp? ident_start, Star ident_char] 48 - 49 - let number = 50 - [%sedlex.regexp? Opt ('+' | '-'), Plus digit, Opt ('.', Plus digit)] 51 - 52 - let ws = [%sedlex.regexp? Plus (' ' | '\t' | '\n' | '\r')] 53 - 54 - (* Quoted identifier/string readers *) 55 - let read_until buf stop lexbuf = 56 - let rec loop () = 57 - match%sedlex lexbuf with 58 - | any -> 59 - let s = Sedlexing.Utf8.lexeme lexbuf in 60 - if s = stop then Buffer.contents buf 61 - else ( 62 - Buffer.add_string buf s; 63 - loop ()) 64 - | _ -> raise (Error ("unterminated " ^ stop ^ "-quoted identifier")) 65 - in 66 - loop () 67 - 68 - let read_double_quoted buf lexbuf = read_until buf "\"" lexbuf 69 - let read_bracket_quoted buf lexbuf = read_until buf "]" lexbuf 70 - let read_backtick_quoted buf lexbuf = read_until buf "`" lexbuf 71 - 72 - let read_single_quoted buf lexbuf = 73 - let rec loop () = 74 - match%sedlex lexbuf with 75 - | "''" -> 76 - Buffer.add_char buf '\''; 77 - loop () 78 - | '\'' -> Buffer.contents buf 79 - | any -> 80 - Buffer.add_string buf (Sedlexing.Utf8.lexeme lexbuf); 81 - loop () 82 - | _ -> raise (Error "unterminated string literal") 83 - in 84 - loop () 85 - 86 - let rec token lexbuf = 87 - match%sedlex lexbuf with 88 - | ws -> token lexbuf 89 - | "--", Star (Compl '\n') -> token lexbuf 90 - | '(' -> Parser.LPAREN 91 - | ')' -> Parser.RPAREN 92 - | ',' -> Parser.COMMA 93 - | number -> Parser.NUMBER (Sedlexing.Utf8.lexeme lexbuf) 94 - | ident -> classify (Sedlexing.Utf8.lexeme lexbuf) 95 - | '"' -> Parser.IDENT (read_double_quoted (Buffer.create 32) lexbuf) 96 - | '[' -> Parser.IDENT (read_bracket_quoted (Buffer.create 32) lexbuf) 97 - | '`' -> Parser.IDENT (read_backtick_quoted (Buffer.create 32) lexbuf) 98 - | '\'' -> 99 - let s = read_single_quoted (Buffer.create 32) lexbuf in 100 - (* String literals in DDL are typically DEFAULT values; 101 - treat them as numbers (opaque values) in the token stream *) 102 - Parser.NUMBER ("'" ^ s ^ "'") 103 - | eof -> Parser.EOF 104 - | any -> 105 - (* Pass through operators, signs, etc. as NUMBER tokens *) 106 - Parser.NUMBER (Sedlexing.Utf8.lexeme lexbuf) 107 - | _ -> raise (Error "unexpected end of input") 108 - 109 - (* Bridge sedlex with menhir via MenhirLib.Convert *) 110 - let parse sql = 111 - let lexbuf = Sedlexing.Utf8.from_string sql in 112 - let provider () = 113 - let tok = token lexbuf in 114 - let s, e = Sedlexing.lexing_positions lexbuf in 115 - (tok, s, e) 116 - in 117 - try 118 - Ok 119 - (MenhirLib.Convert.Simplified.traditional2revised Parser.create_table 120 - provider) 121 - with 122 - | Parser.Error -> Error "parse error" 123 - | Error msg -> Error msg
+2 -2
lib/lexer.mli
··· 1 - (** Sedlex lexer for SQLite CREATE TABLE statements. *) 1 + (** ocamllex lexer for SQLite CREATE TABLE statements. *) 2 2 3 3 exception Error of string 4 4 (** Raised on lexer errors (unterminated strings, unexpected input). *) 5 5 6 - val token : Sedlexing.lexbuf -> Parser.token 6 + val token : Lexing.lexbuf -> Parser.token 7 7 (** [token lexbuf] returns the next token. *) 8 8 9 9 val parse : string -> (Ast.create_table, string) result
+112
lib/lexer.mll
··· 1 + { 2 + (*--------------------------------------------------------------------------- 3 + Copyright (c) 2025 Thomas Gazagnaire. All rights reserved. 4 + SPDX-License-Identifier: MIT 5 + 6 + ocamllex lexer for SQLite CREATE TABLE statements. 7 + 8 + The grammar is ASCII-only at the punctuation / keyword level; quoted 9 + identifiers and string literals pass through the body bytes verbatim, 10 + which means UTF-8 content round-trips untouched. 11 + ---------------------------------------------------------------------------*) 12 + 13 + exception Error of string 14 + 15 + (* Keyword constructors — all carry the original text for case preservation *) 16 + let keywords = 17 + [ 18 + ("CREATE", fun s -> Parser.CREATE s); 19 + ("TABLE", fun s -> Parser.TABLE s); 20 + ("IF", fun s -> Parser.IF s); 21 + ("NOT", fun s -> Parser.NOT s); 22 + ("EXISTS", fun s -> Parser.EXISTS s); 23 + ("PRIMARY", fun s -> Parser.PRIMARY s); 24 + ("KEY", fun s -> Parser.KEY s); 25 + ("UNIQUE", fun s -> Parser.UNIQUE s); 26 + ("NULL", fun s -> Parser.NULL s); 27 + ("DEFAULT", fun s -> Parser.DEFAULT s); 28 + ("CHECK", fun s -> Parser.CHECK s); 29 + ("REFERENCES", fun s -> Parser.REFERENCES s); 30 + ("COLLATE", fun s -> Parser.COLLATE s); 31 + ("GENERATED", fun s -> Parser.GENERATED s); 32 + ("ALWAYS", fun s -> Parser.ALWAYS s); 33 + ("AS", fun s -> Parser.AS s); 34 + ("AUTOINCREMENT", fun s -> Parser.AUTOINCREMENT s); 35 + ("FOREIGN", fun s -> Parser.FOREIGN s); 36 + ("CONSTRAINT", fun s -> Parser.CONSTRAINT s); 37 + ("ON", fun s -> Parser.ON s); 38 + ("ASC", fun s -> Parser.ASC s); 39 + ("DESC", fun s -> Parser.DESC s); 40 + ] 41 + 42 + let classify s = 43 + match List.assoc_opt (String.uppercase_ascii s) keywords with 44 + | Some mk -> mk s 45 + | None -> Parser.IDENT s 46 + } 47 + 48 + let digit = ['0'-'9'] 49 + let alpha = ['a'-'z' 'A'-'Z'] 50 + let ident_start = alpha | '_' 51 + let ident_char = ident_start | digit 52 + let ident = ident_start ident_char* 53 + let number = ['+' '-']? digit+ ('.' digit+)? 54 + let ws = [' ' '\t' '\n' '\r']+ 55 + 56 + rule token = parse 57 + | ws { token lexbuf } 58 + | "--" [^ '\n']* { token lexbuf } 59 + | '(' { Parser.LPAREN } 60 + | ')' { Parser.RPAREN } 61 + | ',' { Parser.COMMA } 62 + | number as n { Parser.NUMBER n } 63 + | ident as s { classify s } 64 + | '"' 65 + { Parser.IDENT (read_until '"' "\"" (Buffer.create 32) lexbuf) } 66 + | '[' 67 + { Parser.IDENT (read_until ']' "]" (Buffer.create 32) lexbuf) } 68 + | '`' 69 + { Parser.IDENT (read_until '`' "`" (Buffer.create 32) lexbuf) } 70 + | '\'' 71 + { 72 + let s = read_single_quoted (Buffer.create 32) lexbuf in 73 + (* String literals in DDL are typically DEFAULT values; treat 74 + them as numbers (opaque values) in the token stream. *) 75 + Parser.NUMBER ("'" ^ s ^ "'") 76 + } 77 + | eof { Parser.EOF } 78 + | _ as c { Parser.NUMBER (String.make 1 c) } 79 + 80 + and read_until stop label buf = parse 81 + | eof 82 + { raise (Error ("unterminated " ^ label ^ "-quoted identifier")) } 83 + | _ as c 84 + { 85 + if c = stop then Buffer.contents buf 86 + else begin 87 + Buffer.add_char buf c; 88 + read_until stop label buf lexbuf 89 + end 90 + } 91 + 92 + and read_single_quoted buf = parse 93 + | "''" 94 + { 95 + Buffer.add_char buf '\''; 96 + read_single_quoted buf lexbuf 97 + } 98 + | '\'' { Buffer.contents buf } 99 + | eof { raise (Error "unterminated string literal") } 100 + | _ as c 101 + { 102 + Buffer.add_char buf c; 103 + read_single_quoted buf lexbuf 104 + } 105 + 106 + { 107 + let parse sql = 108 + let lexbuf = Lexing.from_string sql in 109 + try Ok (Parser.create_table token lexbuf) with 110 + | Parser.Error -> Error "parse error" 111 + | Error msg -> Error msg 112 + }