(*--------------------------------------------------------------------------- Copyright (c) 2025 Anil Madhavapeddy . All rights reserved. SPDX-License-Identifier: ISC ---------------------------------------------------------------------------*) (** INI parser and encoder using bytesrw. This module provides functions to parse and encode INI files using the {{:https://erratique.ch/software/bytesrw}bytesrw} streaming I/O library. It implements {{:https://docs.python.org/3/library/configparser.html}Python's configparser} semantics for maximum compatibility. {1:basic_usage Basic Usage} {@ocaml[ (* Define your configuration type and codec *) let config_codec = Init.Document.( obj (fun server -> server) |> section "server" server_codec ~enc:Fun.id |> finish ) (* Decode from a string *) match Init_bytesrw.decode_string config_codec ini_text with | Ok config -> (* use config *) | Error msg -> (* handle error *) (* Encode back to a string *) match Init_bytesrw.encode_string config_codec config with | Ok text -> (* write text *) | Error msg -> (* handle error *) ]} {1:python_compat Python Compatibility} This parser implements the same semantics as Python's [configparser] module. Configuration files that work with Python will work here, and vice versa. {2:syntax Supported Syntax} {@ini[ # Comments start with # or ; ; This is also a comment [section] key = value key2 : value2 ; Both = and : are delimiters key3=no spaces needed [multiline] long_value = This is a long value that continues on indented lines for as long as needed [types] integer = 42 float = 3.14 boolean = yes ; Also: true, on, 1, no, false, off, 0 list = a, b, c, d ]} {2:edge_cases Edge Cases and Gotchas} {ul {- {b Section names are case-sensitive}: [[Server]] and [[server]] are different.} {- {b Option names are case-insensitive}: [Port] and [port] are the same.} {- {b Whitespace is trimmed} from keys and values automatically.} {- {b Empty values are allowed}: [key =] gives an empty string.} {- {b Comments are NOT preserved} during round-trips (matching Python).} {- {b Inline comments are disabled by default}: [key = value ; comment] gives the value ["value ; comment"] unless you configure {!field-inline_comment_prefixes}.}} *) open Bytesrw (** {1:config Parser Configuration} Configure the parser to match different INI dialects. The default configuration matches Python's [ConfigParser]. *) type interpolation = [ `No_interpolation (** No variable substitution. Values like ["%(foo)s"] are returned literally. Equivalent to Python's [RawConfigParser]. Use this for configuration files that contain literal [%] or [$] characters that shouldn't be interpreted. *) | `Basic_interpolation (** Basic variable substitution using [%(name)s] syntax (default). Equivalent to Python's [ConfigParser] default. Variables reference options in the current section or the DEFAULT section: {@ini[ [paths] base = /opt/app data = %(base)s/data ; Becomes "/opt/app/data" ]} {b Escaping:} Use [%%] to get a literal [%]. *) | `Extended_interpolation (** Extended substitution using [$\{section:name\}] syntax. Equivalent to Python's [ExtendedInterpolation]. Variables can reference options in any section: {@ini[ [common] base = /opt/app [server] data = ${common:base}/data ; Cross-section reference logs = ${base}/logs ; Same section or DEFAULT ]} {b Escaping:} Use [$$] to get a literal [$]. *) ] (** The type for interpolation modes. Controls how variable references in values are expanded. {b Recursion limit:} Interpolation follows references up to 10 levels deep to prevent infinite loops. Deeper nesting raises an error. {b Missing references:} If a referenced option doesn't exist, decoding fails with {!Init.Error.Interpolation}. *) type config = { delimiters : string list; (** Characters that separate option names from values. Default: [["="; ":"]]. The {e first} delimiter on a line is used, so values can contain delimiter characters: {@ini[ url = https://example.com:8080 ; Colon in value is fine ]} *) comment_prefixes : string list; (** Prefixes that start full-line comments. Default: [["#"; ";"]]. A line starting with any of these (after optional whitespace) is treated as a comment and ignored. *) inline_comment_prefixes : string list; (** Prefixes that start inline comments. Default: [[]] (disabled). {b Warning:} Enabling inline comments (e.g., [[";"]]) prevents using those characters in values. For example: {@ini[ url = https://example.com;port=8080 ; Would be truncated! ]} A space must precede inline comments: [value;comment] keeps the semicolon, but [value ; comment] removes it. *) default_section : string; (** Name of the default section. Default: ["DEFAULT"]. Options in this section are inherited by all other sections and available for interpolation. You can customize this, e.g., to ["general"] or ["common"]. *) interpolation : interpolation; (** How to handle variable references. Default: [`Basic_interpolation]. See {!type-interpolation} for details on each mode. *) allow_no_value : bool; (** Allow options without values. Default: [false]. When [true], options can appear without a delimiter: {@ini[ [mysqld] skip-innodb ; No = sign, value is None port = 3306 ]} Such options decode as [None] when using {!Init.option}. *) strict : bool; (** Reject duplicate sections and options. Default: [true]. When [true], if the same section or option appears twice, decoding fails with {!Init.Error.Duplicate_section} or {!Init.Error.Duplicate_option}. When [false], later values silently override earlier ones. *) empty_lines_in_values : bool; (** Allow empty lines in multiline values. Default: [true]. When [true], empty lines can be part of multiline values: {@ini[ [section] key = line 1 line 3 ; Empty line 2 is preserved ]} When [false], empty lines terminate the multiline value. *) } (** Parser configuration. Adjust these settings to parse different INI dialects or to match specific Python configparser settings. *) val default_config : config (** Default configuration matching Python's [configparser.ConfigParser]: {ul {- [delimiters = ["="; ":"]]} {- [comment_prefixes = ["#"; ";"]]} {- [inline_comment_prefixes = []] (disabled)} {- [default_section = "DEFAULT"]} {- [interpolation = `Basic_interpolation]} {- [allow_no_value = false]} {- [strict = true]} {- [empty_lines_in_values = true]}} *) val raw_config : config (** Configuration matching Python's [configparser.RawConfigParser]: same as {!default_config} but with [interpolation = `No_interpolation]. Use this when your values contain literal [%] or [$] characters. *) (** {1:decode Decoding} Parse INI data into OCaml values. All decode functions return [Result.t] - they never raise exceptions for parse errors. *) val decode : ?config:config -> ?locs:bool -> ?layout:bool -> ?file:Init.Textloc.fpath -> 'a Init.t -> Bytes.Reader.t -> ('a, string) result (** [decode codec r] decodes INI data from reader [r] using [codec]. {ul {- [config] configures the parser. Default: {!default_config}.} {- [locs] if [true], preserves source locations in metadata. Default: [false].} {- [layout] if [true], preserves whitespace in metadata for layout-preserving round-trips. Default: [false].} {- [file] is the file path for error messages. Default: ["-"].}} Returns [Ok value] on success or [Error message] on failure, where [message] includes location information when available. *) val decode' : ?config:config -> ?locs:bool -> ?layout:bool -> ?file:Init.Textloc.fpath -> 'a Init.t -> Bytes.Reader.t -> ('a, Init.Error.t) result (** [decode'] is like {!val-decode} but returns a structured error with separate {!Init.Error.type-kind}, location, and path information. Use this when you need to programmatically handle different error types or extract location information. *) val decode_string : ?config:config -> ?locs:bool -> ?layout:bool -> ?file:Init.Textloc.fpath -> 'a Init.t -> string -> ('a, string) result (** [decode_string codec s] decodes INI data from string [s]. This is the most common entry point for parsing: {@ocaml[ let ini_text = {| [server] host = localhost port = 8080 |} in Init_bytesrw.decode_string config_codec ini_text ]} *) val decode_string' : ?config:config -> ?locs:bool -> ?layout:bool -> ?file:Init.Textloc.fpath -> 'a Init.t -> string -> ('a, Init.Error.t) result (** [decode_string'] is like {!val-decode_string} with structured errors. *) (** {1:encode Encoding} Serialize OCaml values to INI format. *) val encode : ?buf:Bytes.t -> 'a Init.t -> 'a -> eod:bool -> Bytes.Writer.t -> (unit, string) result (** [encode codec v ~eod w] encodes [v] to writer [w] using [codec]. {ul {- [buf] is an optional scratch buffer for writing.} {- [eod] if [true], signals end-of-data after writing.}} The output format follows standard INI conventions: - Sections are written as [[section_name]] - Options are written as [key = value] - Multiline values are continued with indentation *) val encode' : ?buf:Bytes.t -> 'a Init.t -> 'a -> eod:bool -> Bytes.Writer.t -> (unit, Init.Error.t) result (** [encode'] is like {!val-encode} with structured errors. *) val encode_string : ?buf:Bytes.t -> 'a Init.t -> 'a -> (string, string) result (** [encode_string codec v] encodes [v] to a string. {@ocaml[ let config = { server = { host = "localhost"; port = 8080 } } in match Init_bytesrw.encode_string config_codec config with | Ok text -> print_endline text | Error msg -> failwith msg ]} Produces: {@ini[ [server] host = localhost port = 8080 ]} *) val encode_string' : ?buf:Bytes.t -> 'a Init.t -> 'a -> (string, Init.Error.t) result (** [encode_string'] is like {!val-encode_string} with structured errors. *) (** {1:layout Layout Preservation} When decoding with [~layout:true], whitespace and comment positions are preserved in the {!Init.Meta.t} values attached to each element. When re-encoding, this information is used to reproduce the original formatting as closely as possible. {b Limitations:} {ul {- Comments are NOT preserved (matching Python's behavior).} {- Whitespace within values may be normalized.} {- The output may differ slightly from the input in edge cases.}} {b Performance tip:} For maximum performance when you don't need layout preservation, use [~layout:false ~locs:false] (the default). Enabling [~locs:true] improves error messages at a small cost. *) (** {1:examples Examples} {2:simple Simple Configuration} {@ocaml[ type config = { debug : bool; port : int } let codec = Init.Document.( let section = Init.Section.( obj (fun debug port -> { debug; port }) |> mem "debug" Init.bool ~dec_absent:false ~enc:(fun c -> c.debug) |> mem "port" Init.int ~dec_absent:8080 ~enc:(fun c -> c.port) |> finish ) in obj Fun.id |> section "server" section ~enc:Fun.id |> finish ) let config = Init_bytesrw.decode_string codec "[server]\nport = 9000" (* Ok { debug = false; port = 9000 } *) ]} {2:multi_section Multiple Sections} {@ocaml[ type db = { host : string; port : int } type cache = { enabled : bool; ttl : int } type config = { db : db; cache : cache option } let db_codec = Init.Section.( obj (fun host port -> { host; port }) |> mem "host" Init.string ~enc:(fun d -> d.host) |> mem "port" Init.int ~dec_absent:5432 ~enc:(fun d -> d.port) |> finish ) let cache_codec = Init.Section.( obj (fun enabled ttl -> { enabled; ttl }) |> mem "enabled" Init.bool ~enc:(fun c -> c.enabled) |> mem "ttl" Init.int ~dec_absent:3600 ~enc:(fun c -> c.ttl) |> finish ) let config_codec = Init.Document.( obj (fun db cache -> { db; cache }) |> section "database" db_codec ~enc:(fun c -> c.db) |> opt_section "cache" cache_codec ~enc:(fun c -> c.cache) |> finish ) ]} {2:interpolation_example Interpolation} {@ocaml[ let paths_codec = Init.Section.( obj (fun base data logs -> (base, data, logs)) |> mem "base" Init.string ~enc:(fun (b,_,_) -> b) |> mem "data" Init.string ~enc:(fun (_,d,_) -> d) |> mem "logs" Init.string ~enc:(fun (_,_,l) -> l) |> finish ) let doc_codec = Init.Document.( obj Fun.id |> section "paths" paths_codec ~enc:Fun.id |> finish ) (* Basic interpolation expands %(base)s *) let ini = {| [paths] base = /opt/app data = %(base)s/data logs = %(base)s/logs |} match Init_bytesrw.decode_string doc_codec ini with | Ok (_, data, logs) -> assert (data = "/opt/app/data"); assert (logs = "/opt/app/logs") | Error _ -> assert false ]} {2:raw_parser Disabling Interpolation} {@ocaml[ (* Use raw_config for files with literal % characters *) let config = Init_bytesrw.raw_config let result = Init_bytesrw.decode_string ~config codec {| [display] format = 100%% complete ; Would fail with basic interpolation |} ]} *)