Minimal SQLite key-value store for OCaml
1(*---------------------------------------------------------------------------
2 Copyright (c) 2025 Thomas Gazagnaire. All rights reserved.
3 SPDX-License-Identifier: MIT
4
5 Menhir grammar for SQLite CREATE TABLE statements.
6
7 Column bodies are collected as token lists and classified in
8 Ast.classify_column — the grammar handles structure (parens,
9 commas, table-vs-column dispatch), OCaml handles semantics.
10 ---------------------------------------------------------------------------*)
11
12%{ open Ast %}
13
14(* Payload tokens *)
15%token <string> IDENT
16%token <string> NUMBER
17
18(* SQL keywords — all carry original text for case preservation *)
19%token <string> CREATE TABLE IF NOT EXISTS
20%token <string> PRIMARY KEY UNIQUE NULL DEFAULT CHECK REFERENCES
21%token <string> COLLATE GENERATED ALWAYS AS AUTOINCREMENT
22%token <string> FOREIGN CONSTRAINT ON ASC DESC
23
24(* Delimiters *)
25%token LPAREN RPAREN COMMA EOF
26
27%start <Ast.create_table> create_table
28
29%%
30
31create_table:
32 | CREATE; TABLE; if_not_exists; name = ident;
33 LPAREN; defs = separated_nonempty_list(COMMA, column_or_constraint); RPAREN;
34 EOF
35 { let columns, tcs =
36 List.fold_left (fun (cs, ts) -> function
37 | `Col c -> (c :: cs, ts)
38 | `Tbl t -> (cs, t :: ts))
39 ([], []) defs
40 in
41 { tbl_name = name;
42 columns = List.rev columns;
43 table_constraints = List.rev tcs } }
44
45if_not_exists:
46 | IF; NOT; EXISTS {}
47 | {}
48
49(* Dispatch: table constraints start with PRIMARY, UNIQUE, FOREIGN,
50 CHECK, or CONSTRAINT. Everything else is a column definition. *)
51column_or_constraint:
52 | t = table_constraint { `Tbl t }
53 | name = col_name; body = col_body { `Col (classify_column name body) }
54
55(* Column names: IDENT plus keywords that never start a table constraint *)
56col_name:
57 | s = IDENT { s }
58 | s = TABLE { s }
59 | s = CREATE { s }
60 | s = KEY { s }
61 | s = NOT { s }
62 | s = NULL { s }
63 | s = DEFAULT { s }
64 | s = REFERENCES { s }
65 | s = COLLATE { s }
66 | s = GENERATED { s }
67 | s = ALWAYS { s }
68 | s = AS { s }
69 | s = AUTOINCREMENT { s }
70 | s = EXISTS { s }
71 | s = IF { s }
72 | s = ON { s }
73 | s = ASC { s }
74 | s = DESC { s }
75
76(* ── Table-level constraints ─────────────────────────────────── *)
77
78table_constraint:
79 | UNIQUE; LPAREN; cs = names; RPAREN
80 { Tbl_unique cs }
81 | PRIMARY; KEY; LPAREN; cs = names; RPAREN
82 { Tbl_primary_key cs }
83 | CONSTRAINT; ident; UNIQUE; LPAREN; cs = names; RPAREN
84 { Tbl_unique cs }
85 | CONSTRAINT; ident; PRIMARY; KEY; LPAREN; cs = names; RPAREN
86 { Tbl_primary_key cs }
87 | FOREIGN; KEY; LPAREN; names; RPAREN; references_tail
88 { Tbl_other }
89 | CONSTRAINT; ident; FOREIGN; KEY; LPAREN; names; RPAREN; references_tail
90 { Tbl_other }
91 | CHECK; LPAREN; skip; RPAREN
92 { Tbl_other }
93 | CONSTRAINT; ident; CHECK; LPAREN; skip; RPAREN
94 { Tbl_other }
95
96references_tail:
97 | REFERENCES; ident {}
98 | REFERENCES; ident; LPAREN; names; RPAREN {}
99
100names:
101 | separated_nonempty_list(COMMA, ident) { $1 }
102
103(* ── Column body: flat token list ────────────────────────────── *)
104
105col_body:
106 | list(col_token) { $1 }
107
108%inline col_token:
109 | s = IDENT { Tok_word s }
110 | s = NUMBER { Tok_number s }
111 | k = keyword { Tok_word k }
112 | LPAREN; ts = list(paren_token); RPAREN { Tok_parens ts }
113
114paren_token:
115 | s = IDENT { Tok_word s }
116 | s = NUMBER { Tok_number s }
117 | k = keyword { Tok_word k }
118 | COMMA { Tok_word "," }
119 | LPAREN; ts = list(paren_token); RPAREN { Tok_parens ts }
120
121(* ── Keywords usable as column body tokens ───────────────────── *)
122
123%inline keyword:
124 | s = PRIMARY { s }
125 | s = KEY { s }
126 | s = NOT { s }
127 | s = NULL { s }
128 | s = UNIQUE { s }
129 | s = DEFAULT { s }
130 | s = CHECK { s }
131 | s = REFERENCES { s }
132 | s = COLLATE { s }
133 | s = GENERATED { s }
134 | s = ALWAYS { s }
135 | s = AS { s }
136 | s = AUTOINCREMENT { s }
137 | s = CONSTRAINT { s }
138 | s = EXISTS { s }
139 | s = CREATE { s }
140 | s = TABLE { s }
141 | s = IF { s }
142 | s = ON { s }
143 | s = ASC { s }
144 | s = DESC { s }
145 | s = FOREIGN { s }
146
147(* ── Skip balanced parens (CHECK expressions) ────────────────── *)
148
149skip:
150 | list(skip_atom) {}
151
152skip_atom:
153 | IDENT {} | NUMBER {}
154 | keyword {}
155 | COMMA {}
156 | LPAREN; skip; RPAREN {}
157
158(* ── Identifiers (including quoted) ──────────────────────────── *)
159
160ident:
161 | s = IDENT { s }