(*--------------------------------------------------------------------------- Copyright (c) 2025 Thomas Gazagnaire. All rights reserved. SPDX-License-Identifier: MIT Menhir grammar for SQLite CREATE TABLE statements. Column bodies are collected as token lists and classified in Ast.classify_column — the grammar handles structure (parens, commas, table-vs-column dispatch), OCaml handles semantics. ---------------------------------------------------------------------------*) %{ open Ast %} (* Payload tokens *) %token IDENT %token NUMBER (* SQL keywords — all carry original text for case preservation *) %token CREATE TABLE IF NOT EXISTS %token PRIMARY KEY UNIQUE NULL DEFAULT CHECK REFERENCES %token COLLATE GENERATED ALWAYS AS AUTOINCREMENT %token FOREIGN CONSTRAINT ON ASC DESC (* Delimiters *) %token LPAREN RPAREN COMMA EOF %start create_table %% create_table: | CREATE; TABLE; if_not_exists; name = ident; LPAREN; defs = separated_nonempty_list(COMMA, column_or_constraint); RPAREN; EOF { let columns, tcs = List.fold_left (fun (cs, ts) -> function | `Col c -> (c :: cs, ts) | `Tbl t -> (cs, t :: ts)) ([], []) defs in { tbl_name = name; columns = List.rev columns; table_constraints = List.rev tcs } } if_not_exists: | IF; NOT; EXISTS {} | {} (* Dispatch: table constraints start with PRIMARY, UNIQUE, FOREIGN, CHECK, or CONSTRAINT. Everything else is a column definition. *) column_or_constraint: | t = table_constraint { `Tbl t } | name = col_name; body = col_body { `Col (classify_column name body) } (* Column names: IDENT plus keywords that never start a table constraint *) col_name: | s = IDENT { s } | s = TABLE { s } | s = CREATE { s } | s = KEY { s } | s = NOT { s } | s = NULL { s } | s = DEFAULT { s } | s = REFERENCES { s } | s = COLLATE { s } | s = GENERATED { s } | s = ALWAYS { s } | s = AS { s } | s = AUTOINCREMENT { s } | s = EXISTS { s } | s = IF { s } | s = ON { s } | s = ASC { s } | s = DESC { s } (* ── Table-level constraints ─────────────────────────────────── *) table_constraint: | UNIQUE; LPAREN; cs = names; RPAREN { Tbl_unique cs } | PRIMARY; KEY; LPAREN; cs = names; RPAREN { Tbl_primary_key cs } | CONSTRAINT; ident; UNIQUE; LPAREN; cs = names; RPAREN { Tbl_unique cs } | CONSTRAINT; ident; PRIMARY; KEY; LPAREN; cs = names; RPAREN { Tbl_primary_key cs } | FOREIGN; KEY; LPAREN; names; RPAREN; references_tail { Tbl_other } | CONSTRAINT; ident; FOREIGN; KEY; LPAREN; names; RPAREN; references_tail { Tbl_other } | CHECK; LPAREN; skip; RPAREN { Tbl_other } | CONSTRAINT; ident; CHECK; LPAREN; skip; RPAREN { Tbl_other } references_tail: | REFERENCES; ident {} | REFERENCES; ident; LPAREN; names; RPAREN {} names: | separated_nonempty_list(COMMA, ident) { $1 } (* ── Column body: flat token list ────────────────────────────── *) col_body: | list(col_token) { $1 } %inline col_token: | s = IDENT { Tok_word s } | s = NUMBER { Tok_number s } | k = keyword { Tok_word k } | LPAREN; ts = list(paren_token); RPAREN { Tok_parens ts } paren_token: | s = IDENT { Tok_word s } | s = NUMBER { Tok_number s } | k = keyword { Tok_word k } | COMMA { Tok_word "," } | LPAREN; ts = list(paren_token); RPAREN { Tok_parens ts } (* ── Keywords usable as column body tokens ───────────────────── *) %inline keyword: | s = PRIMARY { s } | s = KEY { s } | s = NOT { s } | s = NULL { s } | s = UNIQUE { s } | s = DEFAULT { s } | s = CHECK { s } | s = REFERENCES { s } | s = COLLATE { s } | s = GENERATED { s } | s = ALWAYS { s } | s = AS { s } | s = AUTOINCREMENT { s } | s = CONSTRAINT { s } | s = EXISTS { s } | s = CREATE { s } | s = TABLE { s } | s = IF { s } | s = ON { s } | s = ASC { s } | s = DESC { s } | s = FOREIGN { s } (* ── Skip balanced parens (CHECK expressions) ────────────────── *) skip: | list(skip_atom) {} skip_atom: | IDENT {} | NUMBER {} | keyword {} | COMMA {} | LPAREN; skip; RPAREN {} (* ── Identifiers (including quoted) ──────────────────────────── *) ident: | s = IDENT { s }