···11-(** Character input source with lookahead *)
22-33-type t = {
44- source : string;
55- mutable pos : int; (** Current byte position *)
66- mutable position : Position.t; (** Line/column tracking *)
77- length : int;
88-}
99-1010-let of_string source =
1111- let encoding, bom_len = Encoding.detect source in
1212- (* For now, we only support UTF-8. Skip BOM if present. *)
1313- ignore encoding;
1414- {
1515- source;
1616- pos = bom_len;
1717- position = Position.initial;
1818- length = String.length source;
1919- }
2020-2121-let position t = t.position
2222-2323-let is_eof t = t.pos >= t.length
2424-2525-let peek t =
2626- if t.pos >= t.length then None
2727- else Some t.source.[t.pos]
2828-2929-let peek_exn t =
3030- if t.pos >= t.length then
3131- Error.raise_at t.position Unexpected_eof
3232- else
3333- t.source.[t.pos]
3434-3535-let peek_nth t n =
3636- let idx = t.pos + n in
3737- if idx >= t.length then None
3838- else Some t.source.[idx]
3939-4040-let peek_string t n =
4141- if t.pos + n > t.length then
4242- String.sub t.source t.pos (t.length - t.pos)
4343- else
4444- String.sub t.source t.pos n
4545-4646-let next t =
4747- if t.pos >= t.length then None
4848- else begin
4949- let c = t.source.[t.pos] in
5050- t.pos <- t.pos + 1;
5151- t.position <- Position.advance_char c t.position;
5252- Some c
5353- end
5454-5555-let next_exn t =
5656- match next t with
5757- | Some c -> c
5858- | None -> Error.raise_at t.position Unexpected_eof
5959-6060-let skip t n =
6161- for _ = 1 to n do
6262- ignore (next t)
6363- done
6464-6565-let skip_while t pred =
6666- while not (is_eof t) && pred (Option.get (peek t)) do
6767- ignore (next t)
6868- done
6969-7070-(** Character classification *)
7171-7272-let is_break c = c = '\n' || c = '\r'
7373-7474-let is_blank c = c = ' ' || c = '\t'
7575-7676-let is_whitespace c = is_break c || is_blank c
7777-7878-let is_digit c = c >= '0' && c <= '9'
7979-8080-let is_hex c =
8181- (c >= '0' && c <= '9') ||
8282- (c >= 'a' && c <= 'f') ||
8383- (c >= 'A' && c <= 'F')
8484-8585-let is_alpha c =
8686- (c >= 'a' && c <= 'z') ||
8787- (c >= 'A' && c <= 'Z')
8888-8989-let is_alnum c = is_alpha c || is_digit c
9090-9191-(** YAML indicator characters *)
9292-let is_indicator c =
9393- match c with
9494- | '-' | '?' | ':' | ',' | '[' | ']' | '{' | '}'
9595- | '#' | '&' | '*' | '!' | '|' | '>' | '\'' | '"'
9696- | '%' | '@' | '`' -> true
9797- | _ -> false
9898-9999-(** Characters that cannot start a plain scalar *)
100100-let is_flow_indicator c =
101101- match c with
102102- | ',' | '[' | ']' | '{' | '}' -> true
103103- | _ -> false
104104-105105-(** Check if next char satisfies predicate *)
106106-let next_is pred t =
107107- match peek t with
108108- | None -> false
109109- | Some c -> pred c
110110-111111-let next_is_break t = next_is is_break t
112112-let next_is_blank t = next_is is_blank t
113113-let next_is_whitespace t = next_is is_whitespace t
114114-let next_is_digit t = next_is is_digit t
115115-let next_is_hex t = next_is is_hex t
116116-let next_is_alpha t = next_is is_alpha t
117117-let next_is_indicator t = next_is is_indicator t
118118-119119-(** Check if at document boundary (--- or ...) *)
120120-let at_document_boundary t =
121121- if t.position.column <> 1 then false
122122- else
123123- let s = peek_string t 4 in
124124- let prefix = String.sub s 0 (min 3 (String.length s)) in
125125- (prefix = "---" || prefix = "...") &&
126126- (String.length s < 4 || is_whitespace s.[3] || String.length s = 3)
127127-128128-(** Consume line break, handling \r\n as single break *)
129129-let consume_break t =
130130- match peek t with
131131- | Some '\r' ->
132132- ignore (next t);
133133- (match peek t with
134134- | Some '\n' -> ignore (next t)
135135- | _ -> ())
136136- | Some '\n' ->
137137- ignore (next t)
138138- | _ -> ()
139139-140140-(** Get remaining content from current position *)
141141-let remaining t =
142142- if t.pos >= t.length then ""
143143- else String.sub t.source t.pos (t.length - t.pos)
144144-145145-(** Mark current position for span creation *)
146146-let mark t = t.position
147147-148148-(** Get the character before the current position *)
149149-let peek_back t =
150150- if t.pos <= 0 then None
151151- else Some t.source.[t.pos - 1]
-24
yaml/ocaml-yamle/lib/layout_style.ml
···11-(** Collection layout styles *)
22-33-type t =
44- | Any (** Let emitter choose *)
55- | Block (** Indentation-based *)
66- | Flow (** Inline with brackets *)
77-88-let to_string = function
99- | Any -> "any"
1010- | Block -> "block"
1111- | Flow -> "flow"
1212-1313-let pp fmt t =
1414- Format.pp_print_string fmt (to_string t)
1515-1616-let equal a b = a = b
1717-1818-let compare a b =
1919- let to_int = function
2020- | Any -> 0
2121- | Block -> 1
2222- | Flow -> 2
2323- in
2424- Int.compare (to_int a) (to_int b)
-276
yaml/ocaml-yamle/lib/loader.ml
···11-(** Loader - converts parser events to YAML data structures *)
22-33-(** Stack frame for building nested structures *)
44-type frame =
55- | Sequence_frame of {
66- anchor : string option;
77- tag : string option;
88- implicit : bool;
99- style : Layout_style.t;
1010- items : Yaml.t list;
1111- }
1212- | Mapping_frame of {
1313- anchor : string option;
1414- tag : string option;
1515- implicit : bool;
1616- style : Layout_style.t;
1717- pairs : (Yaml.t * Yaml.t) list;
1818- pending_key : Yaml.t option;
1919- }
2020-2121-type state = {
2222- mutable stack : frame list;
2323- mutable current : Yaml.t option;
2424- mutable documents : Document.t list;
2525- mutable doc_version : (int * int) option;
2626- mutable doc_implicit_start : bool;
2727-}
2828-2929-let create_state () = {
3030- stack = [];
3131- current = None;
3232- documents = [];
3333- doc_version = None;
3434- doc_implicit_start = true;
3535-}
3636-3737-(** Process a single event *)
3838-let rec process_event state (ev : Event.spanned) =
3939- match ev.event with
4040- | Event.Stream_start _ -> ()
4141-4242- | Event.Stream_end -> ()
4343-4444- | Event.Document_start { version; implicit } ->
4545- state.doc_version <- version;
4646- state.doc_implicit_start <- implicit
4747-4848- | Event.Document_end { implicit } ->
4949- let doc = Document.make
5050- ?version:state.doc_version
5151- ~implicit_start:state.doc_implicit_start
5252- ~implicit_end:implicit
5353- state.current
5454- in
5555- state.documents <- doc :: state.documents;
5656- state.current <- None;
5757- state.doc_version <- None;
5858- state.doc_implicit_start <- true
5959-6060- | Event.Alias { anchor } ->
6161- let node : Yaml.t = `Alias anchor in
6262- add_node state node
6363-6464- | Event.Scalar { anchor; tag; value; plain_implicit; quoted_implicit; style } ->
6565- let scalar = Scalar.make
6666- ?anchor ?tag
6767- ~plain_implicit ~quoted_implicit
6868- ~style value
6969- in
7070- let node : Yaml.t = `Scalar scalar in
7171- add_node state node
7272-7373- | Event.Sequence_start { anchor; tag; implicit; style } ->
7474- let frame = Sequence_frame {
7575- anchor; tag; implicit; style;
7676- items = [];
7777- } in
7878- state.stack <- frame :: state.stack
7979-8080- | Event.Sequence_end ->
8181- (match state.stack with
8282- | Sequence_frame { anchor; tag; implicit; style; items } :: rest ->
8383- let seq = Sequence.make ?anchor ?tag ~implicit ~style (List.rev items) in
8484- let node : Yaml.t = `A seq in
8585- state.stack <- rest;
8686- add_node state node
8787- | _ -> Error.raise (Invalid_state "unexpected sequence end"))
8888-8989- | Event.Mapping_start { anchor; tag; implicit; style } ->
9090- let frame = Mapping_frame {
9191- anchor; tag; implicit; style;
9292- pairs = [];
9393- pending_key = None;
9494- } in
9595- state.stack <- frame :: state.stack
9696-9797- | Event.Mapping_end ->
9898- (match state.stack with
9999- | Mapping_frame { anchor; tag; implicit; style; pairs; pending_key = None } :: rest ->
100100- let map = Mapping.make ?anchor ?tag ~implicit ~style (List.rev pairs) in
101101- let node : Yaml.t = `O map in
102102- state.stack <- rest;
103103- add_node state node
104104- | Mapping_frame { pending_key = Some _; _ } :: _ ->
105105- Error.raise (Invalid_state "mapping ended with pending key")
106106- | _ -> Error.raise (Invalid_state "unexpected mapping end"))
107107-108108-(** Add a node to current context *)
109109-and add_node state node =
110110- match state.stack with
111111- | [] ->
112112- state.current <- Some node
113113-114114- | Sequence_frame f :: rest ->
115115- state.stack <- Sequence_frame { f with items = node :: f.items } :: rest
116116-117117- | Mapping_frame f :: rest ->
118118- (match f.pending_key with
119119- | None ->
120120- (* This is a key *)
121121- state.stack <- Mapping_frame { f with pending_key = Some node } :: rest
122122- | Some key ->
123123- (* This is a value *)
124124- state.stack <- Mapping_frame {
125125- f with
126126- pairs = (key, node) :: f.pairs;
127127- pending_key = None;
128128- } :: rest)
129129-130130-(** Load single document as Value.
131131-132132- @param resolve_aliases Whether to resolve aliases (default true)
133133- @param max_nodes Maximum nodes during alias expansion (default 10M)
134134- @param max_depth Maximum alias nesting depth (default 100)
135135-*)
136136-let value_of_string
137137- ?(resolve_aliases = true)
138138- ?(max_nodes = Yaml.default_max_alias_nodes)
139139- ?(max_depth = Yaml.default_max_alias_depth)
140140- s =
141141- let parser = Parser.of_string s in
142142- let state = create_state () in
143143- Parser.iter (process_event state) parser;
144144- match state.documents with
145145- | [] -> `Null
146146- | [doc] ->
147147- (match Document.root doc with
148148- | None -> `Null
149149- | Some yaml ->
150150- Yaml.to_value ~resolve_aliases_first:resolve_aliases ~max_nodes ~max_depth yaml)
151151- | _ -> Error.raise Multiple_documents
152152-153153-(** Load single document as Yaml.
154154-155155- @param resolve_aliases Whether to resolve aliases (default false for Yaml.t)
156156- @param max_nodes Maximum nodes during alias expansion (default 10M)
157157- @param max_depth Maximum alias nesting depth (default 100)
158158-*)
159159-let yaml_of_string
160160- ?(resolve_aliases = false)
161161- ?(max_nodes = Yaml.default_max_alias_nodes)
162162- ?(max_depth = Yaml.default_max_alias_depth)
163163- s =
164164- let parser = Parser.of_string s in
165165- let state = create_state () in
166166- Parser.iter (process_event state) parser;
167167- match state.documents with
168168- | [] -> `Scalar (Scalar.make "")
169169- | [doc] ->
170170- (match Document.root doc with
171171- | None -> `Scalar (Scalar.make "")
172172- | Some yaml ->
173173- if resolve_aliases then
174174- Yaml.resolve_aliases ~max_nodes ~max_depth yaml
175175- else
176176- yaml)
177177- | _ -> Error.raise Multiple_documents
178178-179179-(** Load all documents *)
180180-let documents_of_string s =
181181- let parser = Parser.of_string s in
182182- let state = create_state () in
183183- Parser.iter (process_event state) parser;
184184- List.rev state.documents
185185-186186-(** Load single Value from parser.
187187-188188- @param resolve_aliases Whether to resolve aliases (default true)
189189- @param max_nodes Maximum nodes during alias expansion (default 10M)
190190- @param max_depth Maximum alias nesting depth (default 100)
191191-*)
192192-let load_value
193193- ?(resolve_aliases = true)
194194- ?(max_nodes = Yaml.default_max_alias_nodes)
195195- ?(max_depth = Yaml.default_max_alias_depth)
196196- parser =
197197- let state = create_state () in
198198- let rec loop () =
199199- match Parser.next parser with
200200- | None -> None
201201- | Some ev ->
202202- process_event state ev;
203203- match ev.event with
204204- | Event.Document_end _ ->
205205- (match state.documents with
206206- | doc :: _ ->
207207- state.documents <- [];
208208- Some (match Document.root doc with
209209- | None -> `Null
210210- | Some yaml ->
211211- Yaml.to_value ~resolve_aliases_first:resolve_aliases ~max_nodes ~max_depth yaml)
212212- | [] -> None)
213213- | Event.Stream_end -> None
214214- | _ -> loop ()
215215- in
216216- loop ()
217217-218218-(** Load single Yaml from parser *)
219219-let load_yaml parser =
220220- let state = create_state () in
221221- let rec loop () =
222222- match Parser.next parser with
223223- | None -> None
224224- | Some ev ->
225225- process_event state ev;
226226- match ev.event with
227227- | Event.Document_end _ ->
228228- (match state.documents with
229229- | doc :: _ ->
230230- state.documents <- [];
231231- Some (match Document.root doc with
232232- | None -> `Scalar (Scalar.make "")
233233- | Some yaml -> yaml)
234234- | [] -> None)
235235- | Event.Stream_end -> None
236236- | _ -> loop ()
237237- in
238238- loop ()
239239-240240-(** Load single Document from parser *)
241241-let load_document parser =
242242- let state = create_state () in
243243- let rec loop () =
244244- match Parser.next parser with
245245- | None -> None
246246- | Some ev ->
247247- process_event state ev;
248248- match ev.event with
249249- | Event.Document_end _ ->
250250- (match state.documents with
251251- | doc :: _ ->
252252- state.documents <- [];
253253- Some doc
254254- | [] -> None)
255255- | Event.Stream_end -> None
256256- | _ -> loop ()
257257- in
258258- loop ()
259259-260260-(** Iterate over documents *)
261261-let iter_documents f parser =
262262- let rec loop () =
263263- match load_document parser with
264264- | None -> ()
265265- | Some doc -> f doc; loop ()
266266- in
267267- loop ()
268268-269269-(** Fold over documents *)
270270-let fold_documents f init parser =
271271- let rec loop acc =
272272- match load_document parser with
273273- | None -> acc
274274- | Some doc -> loop (f acc doc)
275275- in
276276- loop init
-92
yaml/ocaml-yamle/lib/mapping.ml
···11-(** YAML mapping (object) values with metadata *)
22-33-type ('k, 'v) t = {
44- anchor : string option;
55- tag : string option;
66- implicit : bool;
77- style : Layout_style.t;
88- members : ('k * 'v) list;
99-}
1010-1111-let make
1212- ?(anchor : string option)
1313- ?(tag : string option)
1414- ?(implicit = true)
1515- ?(style = Layout_style.Any)
1616- members =
1717- { anchor; tag; implicit; style; members }
1818-1919-let members t = t.members
2020-let anchor t = t.anchor
2121-let tag t = t.tag
2222-let implicit t = t.implicit
2323-let style t = t.style
2424-2525-let with_anchor anchor t = { t with anchor = Some anchor }
2626-let with_tag tag t = { t with tag = Some tag }
2727-let with_style style t = { t with style }
2828-2929-let map_keys f t = { t with members = List.map (fun (k, v) -> (f k, v)) t.members }
3030-let map_values f t = { t with members = List.map (fun (k, v) -> (k, f v)) t.members }
3131-let map f t = { t with members = List.map (fun (k, v) -> f k v) t.members }
3232-3333-let length t = List.length t.members
3434-3535-let is_empty t = t.members = []
3636-3737-let find pred t =
3838- match List.find_opt (fun (k, _) -> pred k) t.members with
3939- | Some (_, v) -> Some v
4040- | None -> None
4141-4242-let find_key pred t =
4343- List.find_opt (fun (k, _) -> pred k) t.members
4444-4545-let mem pred t =
4646- List.exists (fun (k, _) -> pred k) t.members
4747-4848-let keys t = List.map fst t.members
4949-5050-let values t = List.map snd t.members
5151-5252-let iter f t = List.iter (fun (k, v) -> f k v) t.members
5353-5454-let fold f init t = List.fold_left (fun acc (k, v) -> f acc k v) init t.members
5555-5656-let pp pp_key pp_val fmt t =
5757- Format.fprintf fmt "@[<hv 2>mapping(@,";
5858- (match t.anchor with
5959- | Some a -> Format.fprintf fmt "anchor=%s,@ " a
6060- | None -> ());
6161- (match t.tag with
6262- | Some tag -> Format.fprintf fmt "tag=%s,@ " tag
6363- | None -> ());
6464- Format.fprintf fmt "style=%a,@ " Layout_style.pp t.style;
6565- Format.fprintf fmt "members={@,";
6666- List.iteri (fun i (k, v) ->
6767- if i > 0 then Format.fprintf fmt ",@ ";
6868- Format.fprintf fmt "@[<hv 2>%a:@ %a@]" pp_key k pp_val v
6969- ) t.members;
7070- Format.fprintf fmt "@]@,})"
7171-7272-let equal eq_k eq_v a b =
7373- Option.equal String.equal a.anchor b.anchor &&
7474- Option.equal String.equal a.tag b.tag &&
7575- a.implicit = b.implicit &&
7676- Layout_style.equal a.style b.style &&
7777- List.equal (fun (k1, v1) (k2, v2) -> eq_k k1 k2 && eq_v v1 v2) a.members b.members
7878-7979-let compare cmp_k cmp_v a b =
8080- let c = Option.compare String.compare a.anchor b.anchor in
8181- if c <> 0 then c else
8282- let c = Option.compare String.compare a.tag b.tag in
8383- if c <> 0 then c else
8484- let c = Bool.compare a.implicit b.implicit in
8585- if c <> 0 then c else
8686- let c = Layout_style.compare a.style b.style in
8787- if c <> 0 then c else
8888- let cmp_pair (k1, v1) (k2, v2) =
8989- let c = cmp_k k1 k2 in
9090- if c <> 0 then c else cmp_v v1 v2
9191- in
9292- List.compare cmp_pair a.members b.members
-781
yaml/ocaml-yamle/lib/parser.ml
···11-(** YAML parser - converts tokens to semantic events via state machine *)
22-33-(** Parser states *)
44-type state =
55- | Stream_start
66- | Implicit_document_start
77- | Document_start
88- | Document_content
99- | Document_content_done (* After parsing a node, check for unexpected content *)
1010- | Document_end
1111- | Block_node
1212- | Block_node_or_indentless_sequence
1313- | Flow_node
1414- | Block_sequence_first_entry
1515- | Block_sequence_entry
1616- | Indentless_sequence_entry
1717- | Block_mapping_first_key
1818- | Block_mapping_key
1919- | Block_mapping_value
2020- | Flow_sequence_first_entry
2121- | Flow_sequence_entry
2222- | Flow_sequence_entry_mapping_key
2323- | Flow_sequence_entry_mapping_value
2424- | Flow_sequence_entry_mapping_end
2525- | Flow_mapping_first_key
2626- | Flow_mapping_key
2727- | Flow_mapping_value
2828- | Flow_mapping_empty_value
2929- | End
3030-3131-type t = {
3232- scanner : Scanner.t;
3333- mutable state : state;
3434- mutable states : state list; (** State stack *)
3535- mutable marks : Span.t list; (** Mark stack for span tracking *)
3636- mutable version : (int * int) option;
3737- mutable tag_directives : (string * string) list;
3838- mutable current_token : Token.spanned option;
3939- mutable finished : bool;
4040- mutable explicit_doc_end : bool; (** True if last doc ended with explicit ... *)
4141- mutable stream_start : bool; (** True if we haven't emitted any documents yet *)
4242-}
4343-4444-let create scanner = {
4545- scanner;
4646- state = Stream_start;
4747- states = [];
4848- marks = [];
4949- version = None;
5050- tag_directives = [
5151- ("!", "!");
5252- ("!!", "tag:yaml.org,2002:");
5353- ];
5454- current_token = None;
5555- finished = false;
5656- explicit_doc_end = false;
5757- stream_start = true;
5858-}
5959-6060-let of_string s = create (Scanner.of_string s)
6161-6262-(** Get current token, fetching if needed *)
6363-let current_token t =
6464- match t.current_token with
6565- | Some tok -> tok
6666- | None ->
6767- let tok = Scanner.next t.scanner in
6868- t.current_token <- tok;
6969- match tok with
7070- | Some tok -> tok
7171- | None -> Error.raise Unexpected_eof
7272-7373-(** Peek at current token *)
7474-let peek_token t =
7575- match t.current_token with
7676- | Some _ -> t.current_token
7777- | None ->
7878- t.current_token <- Scanner.next t.scanner;
7979- t.current_token
8080-8181-(** Skip current token *)
8282-let skip_token t =
8383- t.current_token <- None
8484-8585-(** Check if current token matches *)
8686-let check t pred =
8787- match peek_token t with
8888- | Some tok -> pred tok.token
8989- | None -> false
9090-9191-(** Check for specific token *)
9292-let check_token t token_match =
9393- check t token_match
9494-9595-(** Push state onto stack *)
9696-let push_state t s =
9797- t.states <- s :: t.states
9898-9999-(** Pop state from stack *)
100100-let pop_state t =
101101- match t.states with
102102- | s :: rest ->
103103- t.states <- rest;
104104- s
105105- | [] -> End
106106-107107-(** Resolve a tag *)
108108-let resolve_tag t ~handle ~suffix =
109109- if handle = "" then
110110- (* Verbatim tag - suffix is already the full URI *)
111111- suffix
112112- else
113113- match List.assoc_opt handle t.tag_directives with
114114- | Some prefix -> prefix ^ suffix
115115- | None when handle = "!" -> "!" ^ suffix
116116- | None -> Error.raise (Invalid_tag (handle ^ suffix))
117117-118118-(** Process directives at document start *)
119119-let process_directives t =
120120- t.version <- None;
121121- t.tag_directives <- [("!", "!"); ("!!", "tag:yaml.org,2002:")];
122122-123123- while check t (function
124124- | Token.Version_directive _ | Token.Tag_directive _ -> true
125125- | _ -> false)
126126- do
127127- let tok = current_token t in
128128- skip_token t;
129129- match tok.token with
130130- | Token.Version_directive { major; minor } ->
131131- if t.version <> None then
132132- Error.raise_span tok.span (Invalid_yaml_version "duplicate YAML directive");
133133- t.version <- Some (major, minor)
134134- | Token.Tag_directive { handle; prefix } ->
135135- (* Skip empty tag directives (these are reserved/unknown directives that were ignored) *)
136136- if handle = "" && prefix = "" then
137137- () (* Ignore reserved directives *)
138138- else begin
139139- if List.mem_assoc handle t.tag_directives &&
140140- handle <> "!" && handle <> "!!" then
141141- Error.raise_span tok.span (Invalid_tag_directive ("duplicate tag handle: " ^ handle));
142142- t.tag_directives <- (handle, prefix) :: t.tag_directives
143143- end
144144- | _ -> ()
145145- done
146146-147147-(** Parse anchor and/or tag properties *)
148148-let parse_properties t =
149149- let anchor = ref None in
150150- let tag = ref None in
151151-152152- while check t (function
153153- | Token.Anchor _ | Token.Tag _ -> true
154154- | _ -> false)
155155- do
156156- let tok = current_token t in
157157- skip_token t;
158158- match tok.token with
159159- | Token.Anchor name ->
160160- if !anchor <> None then
161161- Error.raise_span tok.span (Duplicate_anchor name);
162162- anchor := Some name
163163- | Token.Tag { handle; suffix } ->
164164- if !tag <> None then
165165- Error.raise_span tok.span (Invalid_tag "duplicate tag");
166166- let resolved =
167167- if handle = "" && suffix = "" then None
168168- else if handle = "!" && suffix = "" then Some "!"
169169- else Some (resolve_tag t ~handle ~suffix)
170170- in
171171- tag := resolved
172172- | _ -> ()
173173- done;
174174- (!anchor, !tag)
175175-176176-(** Empty scalar event *)
177177-let empty_scalar_event ~anchor ~tag span =
178178- Event.Scalar {
179179- anchor;
180180- tag;
181181- value = "";
182182- plain_implicit = tag = None;
183183- quoted_implicit = false;
184184- style = Scalar_style.Plain;
185185- }, span
186186-187187-(** Parse stream start *)
188188-let parse_stream_start t =
189189- let tok = current_token t in
190190- skip_token t;
191191- match tok.token with
192192- | Token.Stream_start encoding ->
193193- t.state <- Implicit_document_start;
194194- Event.Stream_start { encoding }, tok.span
195195- | _ ->
196196- Error.raise_span tok.span (Unexpected_token "expected stream start")
197197-198198-(** Parse document start (implicit or explicit) *)
199199-let parse_document_start t ~implicit =
200200- process_directives t;
201201-202202- if not implicit then begin
203203- let tok = current_token t in
204204- match tok.token with
205205- | Token.Document_start ->
206206- skip_token t
207207- | _ ->
208208- Error.raise_span tok.span Expected_document_start
209209- end;
210210-211211- let span = match peek_token t with
212212- | Some tok -> tok.span
213213- | None -> Span.point Position.initial
214214- in
215215-216216- (* After first document, stream_start is false *)
217217- t.stream_start <- false;
218218- push_state t Document_end;
219219- t.state <- Document_content;
220220- Event.Document_start { version = t.version; implicit }, span
221221-222222-(** Parse document end *)
223223-let parse_document_end t =
224224- let implicit = not (check t (function Token.Document_end -> true | _ -> false)) in
225225- let span = match peek_token t with
226226- | Some tok -> tok.span
227227- | None -> Span.point Position.initial
228228- in
229229-230230- if not implicit then skip_token t;
231231-232232- (* Track if this document ended explicitly with ... *)
233233- t.explicit_doc_end <- not implicit;
234234- t.state <- Implicit_document_start;
235235- Event.Document_end { implicit }, span
236236-237237-(** Parse node in various contexts *)
238238-let parse_node t ~block ~indentless =
239239- let tok = current_token t in
240240- match tok.token with
241241- | Token.Alias name ->
242242- skip_token t;
243243- t.state <- pop_state t;
244244- Event.Alias { anchor = name }, tok.span
245245-246246- | Token.Anchor _ | Token.Tag _ ->
247247- let anchor, tag = parse_properties t in
248248- let tok = current_token t in
249249- (match tok.token with
250250- | Token.Block_entry when indentless ->
251251- t.state <- Indentless_sequence_entry;
252252- Event.Sequence_start {
253253- anchor; tag;
254254- implicit = tag = None;
255255- style = Layout_style.Block;
256256- }, tok.span
257257-258258- | Token.Block_sequence_start when block ->
259259- t.state <- Block_sequence_first_entry;
260260- skip_token t;
261261- Event.Sequence_start {
262262- anchor; tag;
263263- implicit = tag = None;
264264- style = Layout_style.Block;
265265- }, tok.span
266266-267267- | Token.Block_mapping_start when block ->
268268- t.state <- Block_mapping_first_key;
269269- skip_token t;
270270- Event.Mapping_start {
271271- anchor; tag;
272272- implicit = tag = None;
273273- style = Layout_style.Block;
274274- }, tok.span
275275-276276- | Token.Flow_sequence_start ->
277277- t.state <- Flow_sequence_first_entry;
278278- skip_token t;
279279- Event.Sequence_start {
280280- anchor; tag;
281281- implicit = tag = None;
282282- style = Layout_style.Flow;
283283- }, tok.span
284284-285285- | Token.Flow_mapping_start ->
286286- t.state <- Flow_mapping_first_key;
287287- skip_token t;
288288- Event.Mapping_start {
289289- anchor; tag;
290290- implicit = tag = None;
291291- style = Layout_style.Flow;
292292- }, tok.span
293293-294294- | Token.Scalar { style; value } ->
295295- skip_token t;
296296- t.state <- pop_state t;
297297- let plain_implicit = tag = None && style = Scalar_style.Plain in
298298- let quoted_implicit = tag = None && style <> Scalar_style.Plain in
299299- Event.Scalar {
300300- anchor; tag; value;
301301- plain_implicit; quoted_implicit; style;
302302- }, tok.span
303303-304304- | _ ->
305305- (* Empty node *)
306306- t.state <- pop_state t;
307307- empty_scalar_event ~anchor ~tag tok.span)
308308-309309- | Token.Block_sequence_start when block ->
310310- t.state <- Block_sequence_first_entry;
311311- skip_token t;
312312- Event.Sequence_start {
313313- anchor = None; tag = None;
314314- implicit = true;
315315- style = Layout_style.Block;
316316- }, tok.span
317317-318318- | Token.Block_mapping_start when block ->
319319- t.state <- Block_mapping_first_key;
320320- skip_token t;
321321- Event.Mapping_start {
322322- anchor = None; tag = None;
323323- implicit = true;
324324- style = Layout_style.Block;
325325- }, tok.span
326326-327327- | Token.Flow_sequence_start ->
328328- t.state <- Flow_sequence_first_entry;
329329- skip_token t;
330330- Event.Sequence_start {
331331- anchor = None; tag = None;
332332- implicit = true;
333333- style = Layout_style.Flow;
334334- }, tok.span
335335-336336- | Token.Flow_mapping_start ->
337337- t.state <- Flow_mapping_first_key;
338338- skip_token t;
339339- Event.Mapping_start {
340340- anchor = None; tag = None;
341341- implicit = true;
342342- style = Layout_style.Flow;
343343- }, tok.span
344344-345345- | Token.Block_entry when indentless ->
346346- t.state <- Indentless_sequence_entry;
347347- Event.Sequence_start {
348348- anchor = None; tag = None;
349349- implicit = true;
350350- style = Layout_style.Block;
351351- }, tok.span
352352-353353- | Token.Scalar { style; value } ->
354354- skip_token t;
355355- t.state <- pop_state t;
356356- let plain_implicit = style = Scalar_style.Plain in
357357- let quoted_implicit = style <> Scalar_style.Plain in
358358- Event.Scalar {
359359- anchor = None; tag = None; value;
360360- plain_implicit; quoted_implicit; style;
361361- }, tok.span
362362-363363- | _ ->
364364- (* Empty node *)
365365- t.state <- pop_state t;
366366- empty_scalar_event ~anchor:None ~tag:None tok.span
367367-368368-(** Parse block sequence entry *)
369369-let parse_block_sequence_entry t =
370370- let tok = current_token t in
371371- match tok.token with
372372- | Token.Block_entry ->
373373- skip_token t;
374374- if check t (function
375375- | Token.Block_entry | Token.Block_end -> true
376376- | _ -> false)
377377- then begin
378378- t.state <- Block_sequence_entry;
379379- empty_scalar_event ~anchor:None ~tag:None tok.span
380380- end else begin
381381- push_state t Block_sequence_entry;
382382- parse_node t ~block:true ~indentless:false
383383- end
384384- | Token.Block_end ->
385385- skip_token t;
386386- t.state <- pop_state t;
387387- Event.Sequence_end, tok.span
388388- | _ ->
389389- Error.raise_span tok.span Expected_block_entry
390390-391391-(** Parse block mapping key *)
392392-let parse_block_mapping_key t =
393393- let tok = current_token t in
394394- match tok.token with
395395- | Token.Key ->
396396- skip_token t;
397397- if check t (function
398398- | Token.Key | Token.Value | Token.Block_end -> true
399399- | _ -> false)
400400- then begin
401401- t.state <- Block_mapping_value;
402402- empty_scalar_event ~anchor:None ~tag:None tok.span
403403- end else begin
404404- push_state t Block_mapping_value;
405405- parse_node t ~block:true ~indentless:true
406406- end
407407- (* Handle value without explicit key - key is empty/null *)
408408- | Token.Value ->
409409- t.state <- Block_mapping_value;
410410- empty_scalar_event ~anchor:None ~tag:None tok.span
411411- | Token.Block_end ->
412412- skip_token t;
413413- t.state <- pop_state t;
414414- Event.Mapping_end, tok.span
415415- | _ ->
416416- Error.raise_span tok.span Expected_key
417417-418418-(** Parse block mapping value *)
419419-let parse_block_mapping_value t =
420420- let tok = current_token t in
421421- match tok.token with
422422- | Token.Value ->
423423- skip_token t;
424424- if check t (function
425425- | Token.Key | Token.Value | Token.Block_end -> true
426426- | _ -> false)
427427- then begin
428428- t.state <- Block_mapping_key;
429429- empty_scalar_event ~anchor:None ~tag:None tok.span
430430- end else begin
431431- push_state t Block_mapping_key;
432432- parse_node t ~block:true ~indentless:true
433433- end
434434- | _ ->
435435- (* Implicit empty value *)
436436- t.state <- Block_mapping_key;
437437- empty_scalar_event ~anchor:None ~tag:None tok.span
438438-439439-(** Parse indentless sequence entry *)
440440-let parse_indentless_sequence_entry t =
441441- let tok = current_token t in
442442- match tok.token with
443443- | Token.Block_entry ->
444444- skip_token t;
445445- if check t (function
446446- | Token.Block_entry | Token.Key | Token.Value | Token.Block_end -> true
447447- | _ -> false)
448448- then begin
449449- t.state <- Indentless_sequence_entry;
450450- empty_scalar_event ~anchor:None ~tag:None tok.span
451451- end else begin
452452- push_state t Indentless_sequence_entry;
453453- parse_node t ~block:true ~indentless:false
454454- end
455455- | _ ->
456456- t.state <- pop_state t;
457457- Event.Sequence_end, tok.span
458458-459459-(** Parse flow sequence *)
460460-let rec parse_flow_sequence_entry t ~first =
461461- let tok = current_token t in
462462- match tok.token with
463463- | Token.Flow_sequence_end ->
464464- skip_token t;
465465- t.state <- pop_state t;
466466- Event.Sequence_end, tok.span
467467- | Token.Flow_entry when not first ->
468468- skip_token t;
469469- parse_flow_sequence_entry_internal t
470470- | _ when first ->
471471- parse_flow_sequence_entry_internal t
472472- | _ ->
473473- Error.raise_span tok.span Expected_sequence_end
474474-475475-and parse_flow_sequence_entry_internal t =
476476- let tok = current_token t in
477477- match tok.token with
478478- | Token.Flow_sequence_end ->
479479- (* Trailing comma case - don't emit empty scalar, just go back to sequence entry state *)
480480- skip_token t;
481481- t.state <- pop_state t;
482482- Event.Sequence_end, tok.span
483483- | Token.Flow_entry ->
484484- (* Double comma or comma after comma - invalid *)
485485- Error.raise_span tok.span (Unexpected_token "unexpected ',' in flow sequence")
486486- | Token.Key ->
487487- skip_token t;
488488- t.state <- Flow_sequence_entry_mapping_key;
489489- Event.Mapping_start {
490490- anchor = None; tag = None;
491491- implicit = true;
492492- style = Layout_style.Flow;
493493- }, tok.span
494494- | Token.Value ->
495495- (* Implicit empty key mapping: [ : value ] *)
496496- t.state <- Flow_sequence_entry_mapping_key;
497497- Event.Mapping_start {
498498- anchor = None; tag = None;
499499- implicit = true;
500500- style = Layout_style.Flow;
501501- }, tok.span
502502- | _ ->
503503- push_state t Flow_sequence_entry;
504504- parse_node t ~block:false ~indentless:false
505505-506506-(** Parse flow sequence entry mapping *)
507507-let parse_flow_sequence_entry_mapping_key t =
508508- let tok = current_token t in
509509- if check t (function
510510- | Token.Value | Token.Flow_entry | Token.Flow_sequence_end -> true
511511- | _ -> false)
512512- then begin
513513- t.state <- Flow_sequence_entry_mapping_value;
514514- empty_scalar_event ~anchor:None ~tag:None tok.span
515515- end else begin
516516- push_state t Flow_sequence_entry_mapping_value;
517517- parse_node t ~block:false ~indentless:false
518518- end
519519-520520-let parse_flow_sequence_entry_mapping_value t =
521521- let tok = current_token t in
522522- match tok.token with
523523- | Token.Value ->
524524- skip_token t;
525525- if check t (function
526526- | Token.Flow_entry | Token.Flow_sequence_end -> true
527527- | _ -> false)
528528- then begin
529529- t.state <- Flow_sequence_entry_mapping_end;
530530- empty_scalar_event ~anchor:None ~tag:None tok.span
531531- end else begin
532532- push_state t Flow_sequence_entry_mapping_end;
533533- parse_node t ~block:false ~indentless:false
534534- end
535535- | _ ->
536536- t.state <- Flow_sequence_entry_mapping_end;
537537- empty_scalar_event ~anchor:None ~tag:None tok.span
538538-539539-let parse_flow_sequence_entry_mapping_end t =
540540- let tok = current_token t in
541541- t.state <- Flow_sequence_entry;
542542- Event.Mapping_end, tok.span
543543-544544-(** Parse flow mapping *)
545545-let rec parse_flow_mapping_key t ~first =
546546- let tok = current_token t in
547547- match tok.token with
548548- | Token.Flow_mapping_end ->
549549- skip_token t;
550550- t.state <- pop_state t;
551551- Event.Mapping_end, tok.span
552552- | Token.Flow_entry when not first ->
553553- skip_token t;
554554- parse_flow_mapping_key_internal t
555555- | _ when first ->
556556- parse_flow_mapping_key_internal t
557557- | _ ->
558558- Error.raise_span tok.span Expected_mapping_end
559559-560560-and parse_flow_mapping_key_internal t =
561561- let tok = current_token t in
562562- match tok.token with
563563- | Token.Flow_mapping_end ->
564564- (* Trailing comma case - don't emit empty scalar, just return to key state *)
565565- skip_token t;
566566- t.state <- pop_state t;
567567- Event.Mapping_end, tok.span
568568- | Token.Flow_entry ->
569569- (* Double comma or comma after comma - invalid *)
570570- Error.raise_span tok.span (Unexpected_token "unexpected ',' in flow mapping")
571571- | Token.Key ->
572572- skip_token t;
573573- if check t (function
574574- | Token.Value | Token.Flow_entry | Token.Flow_mapping_end -> true
575575- | _ -> false)
576576- then begin
577577- t.state <- Flow_mapping_value;
578578- empty_scalar_event ~anchor:None ~tag:None tok.span
579579- end else begin
580580- push_state t Flow_mapping_value;
581581- parse_node t ~block:false ~indentless:false
582582- end
583583- | _ ->
584584- push_state t Flow_mapping_value;
585585- parse_node t ~block:false ~indentless:false
586586-587587-let parse_flow_mapping_value t ~empty =
588588- let tok = current_token t in
589589- if empty then begin
590590- t.state <- Flow_mapping_key;
591591- empty_scalar_event ~anchor:None ~tag:None tok.span
592592- end else
593593- match tok.token with
594594- | Token.Value ->
595595- skip_token t;
596596- if check t (function
597597- | Token.Flow_entry | Token.Flow_mapping_end -> true
598598- | _ -> false)
599599- then begin
600600- t.state <- Flow_mapping_key;
601601- empty_scalar_event ~anchor:None ~tag:None tok.span
602602- end else begin
603603- push_state t Flow_mapping_key;
604604- parse_node t ~block:false ~indentless:false
605605- end
606606- | _ ->
607607- t.state <- Flow_mapping_key;
608608- empty_scalar_event ~anchor:None ~tag:None tok.span
609609-610610-(** Main state machine dispatcher *)
611611-let rec parse t =
612612- match t.state with
613613- | Stream_start ->
614614- parse_stream_start t
615615-616616- | Implicit_document_start ->
617617- (* Skip any document end markers before checking what's next *)
618618- while check t (function Token.Document_end -> true | _ -> false) do
619619- t.explicit_doc_end <- true; (* Seeing ... counts as explicit end *)
620620- skip_token t
621621- done;
622622-623623- let tok = current_token t in
624624- (match tok.token with
625625- | Token.Stream_end ->
626626- skip_token t;
627627- t.state <- End;
628628- t.finished <- true;
629629- Event.Stream_end, tok.span
630630- | Token.Version_directive _ | Token.Tag_directive _ ->
631631- (* Directives are only allowed at stream start or after explicit ... (MUS6/01) *)
632632- if not t.stream_start && not t.explicit_doc_end then
633633- Error.raise_span tok.span (Invalid_directive "directives require explicit document end '...' before them");
634634- parse_document_start t ~implicit:false
635635- | Token.Document_start ->
636636- parse_document_start t ~implicit:false
637637- (* These tokens are invalid at document start - they indicate leftover junk *)
638638- | Token.Flow_sequence_end | Token.Flow_mapping_end | Token.Flow_entry
639639- | Token.Block_end | Token.Value ->
640640- Error.raise_span tok.span (Unexpected_token "unexpected token at document start")
641641- | _ ->
642642- parse_document_start t ~implicit:true)
643643-644644- | Document_start ->
645645- parse_document_start t ~implicit:false
646646-647647- | Document_content ->
648648- if check t (function
649649- | Token.Version_directive _ | Token.Tag_directive _
650650- | Token.Document_start | Token.Document_end | Token.Stream_end -> true
651651- | _ -> false)
652652- then begin
653653- let tok = current_token t in
654654- t.state <- pop_state t;
655655- empty_scalar_event ~anchor:None ~tag:None tok.span
656656- end else begin
657657- (* Push Document_content_done so we return there after parsing the node.
658658- This allows us to check for unexpected content after the node. *)
659659- push_state t Document_content_done;
660660- parse_node t ~block:true ~indentless:false
661661- end
662662-663663- | Document_content_done ->
664664- (* After parsing a node in document content, check for unexpected content *)
665665- if check t (function
666666- | Token.Version_directive _ | Token.Tag_directive _
667667- | Token.Document_start | Token.Document_end | Token.Stream_end -> true
668668- | _ -> false)
669669- then begin
670670- (* Valid document boundary - continue to Document_end *)
671671- t.state <- pop_state t;
672672- parse t (* Continue to emit the next event *)
673673- end else begin
674674- (* Unexpected content after document value - this is an error (KS4U, BS4K) *)
675675- let tok = current_token t in
676676- Error.raise_span tok.span
677677- (Unexpected_token "content not allowed after document value")
678678- end
679679-680680- | Document_end ->
681681- parse_document_end t
682682-683683- | Block_node ->
684684- parse_node t ~block:true ~indentless:false
685685-686686- | Block_node_or_indentless_sequence ->
687687- parse_node t ~block:true ~indentless:true
688688-689689- | Flow_node ->
690690- parse_node t ~block:false ~indentless:false
691691-692692- | Block_sequence_first_entry ->
693693- t.state <- Block_sequence_entry;
694694- parse_block_sequence_entry t
695695-696696- | Block_sequence_entry ->
697697- parse_block_sequence_entry t
698698-699699- | Indentless_sequence_entry ->
700700- parse_indentless_sequence_entry t
701701-702702- | Block_mapping_first_key ->
703703- t.state <- Block_mapping_key;
704704- parse_block_mapping_key t
705705-706706- | Block_mapping_key ->
707707- parse_block_mapping_key t
708708-709709- | Block_mapping_value ->
710710- parse_block_mapping_value t
711711-712712- | Flow_sequence_first_entry ->
713713- parse_flow_sequence_entry t ~first:true
714714-715715- | Flow_sequence_entry ->
716716- parse_flow_sequence_entry t ~first:false
717717-718718- | Flow_sequence_entry_mapping_key ->
719719- parse_flow_sequence_entry_mapping_key t
720720-721721- | Flow_sequence_entry_mapping_value ->
722722- parse_flow_sequence_entry_mapping_value t
723723-724724- | Flow_sequence_entry_mapping_end ->
725725- parse_flow_sequence_entry_mapping_end t
726726-727727- | Flow_mapping_first_key ->
728728- parse_flow_mapping_key t ~first:true
729729-730730- | Flow_mapping_key ->
731731- parse_flow_mapping_key t ~first:false
732732-733733- | Flow_mapping_value ->
734734- parse_flow_mapping_value t ~empty:false
735735-736736- | Flow_mapping_empty_value ->
737737- parse_flow_mapping_value t ~empty:true
738738-739739- | End ->
740740- let span = Span.point Position.initial in
741741- t.finished <- true;
742742- Event.Stream_end, span
743743-744744-(** Get next event *)
745745-let next t =
746746- if t.finished then None
747747- else begin
748748- let event, span = parse t in
749749- Some { Event.event; span }
750750- end
751751-752752-(** Peek at next event *)
753753-let peek t =
754754- (* Parser is not easily peekable without full state save/restore *)
755755- (* For now, we don't support peek - could add caching if needed *)
756756- if t.finished then None
757757- else
758758- (* Just call next and the caller will have to deal with it *)
759759- next t
760760-761761-(** Iterate over all events *)
762762-let iter f t =
763763- let rec loop () =
764764- match next t with
765765- | None -> ()
766766- | Some ev -> f ev; loop ()
767767- in
768768- loop ()
769769-770770-(** Fold over all events *)
771771-let fold f init t =
772772- let rec loop acc =
773773- match next t with
774774- | None -> acc
775775- | Some ev -> loop (f acc ev)
776776- in
777777- loop init
778778-779779-(** Convert to list *)
780780-let to_list t =
781781- fold (fun acc ev -> ev :: acc) [] t |> List.rev
-42
yaml/ocaml-yamle/lib/position.ml
···11-(** Position tracking for source locations *)
22-33-type t = {
44- index : int; (** Byte offset from start *)
55- line : int; (** 1-indexed line number *)
66- column : int; (** 1-indexed column number *)
77-}
88-99-let initial = { index = 0; line = 1; column = 1 }
1010-1111-let advance_byte t =
1212- { t with index = t.index + 1; column = t.column + 1 }
1313-1414-let advance_line t =
1515- { index = t.index + 1; line = t.line + 1; column = 1 }
1616-1717-let advance_char c t =
1818- if c = '\n' then advance_line t
1919- else advance_byte t
2020-2121-let advance_utf8 uchar t =
2222- let len = Uchar.utf_8_byte_length uchar in
2323- let code = Uchar.to_int uchar in
2424- if code = 0x0A (* LF *) then
2525- { index = t.index + len; line = t.line + 1; column = 1 }
2626- else
2727- { t with index = t.index + len; column = t.column + 1 }
2828-2929-let advance_bytes n t =
3030- { t with index = t.index + n; column = t.column + n }
3131-3232-let pp fmt t =
3333- Format.fprintf fmt "line %d, column %d" t.line t.column
3434-3535-let to_string t =
3636- Format.asprintf "%a" pp t
3737-3838-let compare a b =
3939- Int.compare a.index b.index
4040-4141-let equal a b =
4242- a.index = b.index
-61
yaml/ocaml-yamle/lib/scalar.ml
···11-(** YAML scalar values with metadata *)
22-33-type t = {
44- anchor : string option;
55- tag : string option;
66- value : string;
77- plain_implicit : bool;
88- quoted_implicit : bool;
99- style : Scalar_style.t;
1010-}
1111-1212-let make
1313- ?(anchor : string option)
1414- ?(tag : string option)
1515- ?(plain_implicit = true)
1616- ?(quoted_implicit = false)
1717- ?(style = Scalar_style.Plain)
1818- value =
1919- { anchor; tag; value; plain_implicit; quoted_implicit; style }
2020-2121-let value t = t.value
2222-let anchor t = t.anchor
2323-let tag t = t.tag
2424-let style t = t.style
2525-let plain_implicit t = t.plain_implicit
2626-let quoted_implicit t = t.quoted_implicit
2727-2828-let with_anchor anchor t = { t with anchor = Some anchor }
2929-let with_tag tag t = { t with tag = Some tag }
3030-let with_style style t = { t with style }
3131-3232-let pp fmt t =
3333- Format.fprintf fmt "scalar(%S" t.value;
3434- (match t.anchor with
3535- | Some a -> Format.fprintf fmt ", anchor=%s" a
3636- | None -> ());
3737- (match t.tag with
3838- | Some tag -> Format.fprintf fmt ", tag=%s" tag
3939- | None -> ());
4040- Format.fprintf fmt ", style=%a)" Scalar_style.pp t.style
4141-4242-let equal a b =
4343- Option.equal String.equal a.anchor b.anchor &&
4444- Option.equal String.equal a.tag b.tag &&
4545- String.equal a.value b.value &&
4646- a.plain_implicit = b.plain_implicit &&
4747- a.quoted_implicit = b.quoted_implicit &&
4848- Scalar_style.equal a.style b.style
4949-5050-let compare a b =
5151- let c = Option.compare String.compare a.anchor b.anchor in
5252- if c <> 0 then c else
5353- let c = Option.compare String.compare a.tag b.tag in
5454- if c <> 0 then c else
5555- let c = String.compare a.value b.value in
5656- if c <> 0 then c else
5757- let c = Bool.compare a.plain_implicit b.plain_implicit in
5858- if c <> 0 then c else
5959- let c = Bool.compare a.quoted_implicit b.quoted_implicit in
6060- if c <> 0 then c else
6161- Scalar_style.compare a.style b.style
-33
yaml/ocaml-yamle/lib/scalar_style.ml
···11-(** Scalar formatting styles *)
22-33-type t =
44- | Any (** Let emitter choose *)
55- | Plain (** Unquoted: foo *)
66- | Single_quoted (** 'foo' *)
77- | Double_quoted (** "foo" *)
88- | Literal (** | block *)
99- | Folded (** > block *)
1010-1111-let to_string = function
1212- | Any -> "any"
1313- | Plain -> "plain"
1414- | Single_quoted -> "single-quoted"
1515- | Double_quoted -> "double-quoted"
1616- | Literal -> "literal"
1717- | Folded -> "folded"
1818-1919-let pp fmt t =
2020- Format.pp_print_string fmt (to_string t)
2121-2222-let equal a b = a = b
2323-2424-let compare a b =
2525- let to_int = function
2626- | Any -> 0
2727- | Plain -> 1
2828- | Single_quoted -> 2
2929- | Double_quoted -> 3
3030- | Literal -> 4
3131- | Folded -> 5
3232- in
3333- Int.compare (to_int a) (to_int b)
-1568
yaml/ocaml-yamle/lib/scanner.ml
···11-(** YAML tokenizer/scanner with lookahead for ambiguity resolution *)
22-33-(** Simple key tracking for mapping key disambiguation *)
44-type simple_key = {
55- sk_possible : bool;
66- sk_required : bool;
77- sk_token_number : int;
88- sk_position : Position.t;
99-}
1010-1111-(** Indent level tracking *)
1212-type indent = {
1313- indent : int;
1414- needs_block_end : bool;
1515- sequence : bool; (** true if this is a sequence indent *)
1616-}
1717-1818-type t = {
1919- input : Input.t;
2020- mutable tokens : Token.spanned Queue.t;
2121- mutable token_number : int;
2222- mutable tokens_taken : int;
2323- mutable stream_started : bool;
2424- mutable stream_ended : bool;
2525- mutable indent_stack : indent list; (** Stack of indentation levels *)
2626- mutable flow_level : int; (** Nesting depth in [] or {} *)
2727- mutable flow_indent : int; (** Column where outermost flow collection started *)
2828- mutable simple_keys : simple_key option list; (** Per flow-level simple key tracking *)
2929- mutable allow_simple_key : bool;
3030- mutable leading_whitespace : bool; (** True when at start of line (only whitespace seen) *)
3131- mutable document_has_content : bool; (** True if we've emitted content tokens in current document *)
3232- mutable adjacent_value_allowed_at : Position.t option; (** Position where adjacent : is allowed *)
3333- mutable pending_value : bool; (** True if we've emitted a KEY and are waiting for VALUE *)
3434- mutable flow_mapping_stack : bool list; (** Stack of whether each flow level is a mapping *)
3535-}
3636-3737-let create input =
3838- {
3939- input;
4040- tokens = Queue.create ();
4141- token_number = 0;
4242- tokens_taken = 0;
4343- stream_started = false;
4444- stream_ended = false;
4545- indent_stack = [];
4646- flow_level = 0;
4747- flow_indent = 0;
4848- simple_keys = [None]; (* One entry for the base level *)
4949- allow_simple_key = true;
5050- leading_whitespace = true; (* Start at beginning of stream *)
5151- document_has_content = false;
5252- adjacent_value_allowed_at = None;
5353- pending_value = false;
5454- flow_mapping_stack = [];
5555- }
5656-5757-let of_string s = create (Input.of_string s)
5858-5959-let position t = Input.position t.input
6060-6161-(** Add a token to the queue *)
6262-let emit t span token =
6363- Queue.add { Token.token; span } t.tokens;
6464- t.token_number <- t.token_number + 1
6565-6666-(** Get current column (1-indexed) *)
6767-let column t = (Input.position t.input).column
6868-6969-(** Get current indent level *)
7070-let current_indent t =
7171- match t.indent_stack with
7272- | [] -> -1
7373- | { indent; _ } :: _ -> indent
7474-7575-(** Skip whitespace to end of line, checking for valid comments.
7676- Returns true if any whitespace (including tabs) was found before a comment. *)
7777-let skip_whitespace_and_comment t =
7878- let has_whitespace = ref false in
7979- (* Skip blanks (spaces and tabs) *)
8080- while Input.next_is_blank t.input do
8181- has_whitespace := true;
8282- ignore (Input.next t.input)
8383- done;
8484- (* Check for comment *)
8585- if Input.next_is (( = ) '#') t.input then begin
8686- (* Validate: comment must be preceded by whitespace or be at start of line *)
8787- if not !has_whitespace then begin
8888- (* Check if we're at the start of input or after whitespace (blank or line break) *)
8989- match Input.peek_back t.input with
9090- | None -> () (* Start of input - OK *)
9191- | Some c when Input.is_whitespace c -> () (* After whitespace - OK *)
9292- | _ ->
9393- (* Comment not preceded by whitespace - ERROR *)
9494- Error.raise_at (Input.mark t.input) Invalid_comment
9595- end;
9696- (* Skip to end of line *)
9797- while not (Input.is_eof t.input) && not (Input.next_is_break t.input) do
9898- ignore (Input.next t.input)
9999- done
100100- end
101101-102102-(** Skip blanks (spaces/tabs) and return (found_tabs, found_spaces) *)
103103-let skip_blanks_check_tabs t =
104104- let found_tab = ref false in
105105- let found_space = ref false in
106106- while Input.next_is_blank t.input do
107107- (match Input.peek t.input with
108108- | Some '\t' -> found_tab := true
109109- | Some ' ' -> found_space := true
110110- | _ -> ());
111111- ignore (Input.next t.input)
112112- done;
113113- (!found_tab, !found_space)
114114-115115-(** Skip whitespace and comments, return true if at newline *)
116116-let rec skip_to_next_token t =
117117- (* Check for tabs used as indentation in block context *)
118118- (match Input.peek t.input with
119119- | Some '\t' when t.flow_level = 0 && t.leading_whitespace &&
120120- (column t - 1) < current_indent t ->
121121- (* Tab found in indentation zone - this is invalid *)
122122- (* Skip to end of line to check if line has content *)
123123- let start_pos = Input.mark t.input in
124124- while Input.next_is_blank t.input do
125125- ignore (Input.next t.input)
126126- done;
127127- (* If we have content on this line with a tab, raise error *)
128128- if not (Input.next_is_break t.input) && not (Input.is_eof t.input) then
129129- Error.raise_at start_pos Tab_in_indentation
130130- | _ -> ());
131131-132132- (* Skip blanks and validate comments *)
133133- skip_whitespace_and_comment t;
134134- (* Skip line break in block context *)
135135- if t.flow_level = 0 && Input.next_is_break t.input then begin
136136- Input.consume_break t.input;
137137- t.allow_simple_key <- true;
138138- t.leading_whitespace <- true;
139139- skip_to_next_token t
140140- end
141141- else if t.flow_level > 0 && Input.next_is_whitespace t.input then begin
142142- (* In flow context, skip all whitespace including line breaks *)
143143- if Input.next_is_break t.input then begin
144144- Input.consume_break t.input;
145145- (* Allow simple keys after line breaks in flow context *)
146146- t.allow_simple_key <- true;
147147- (* After line break in flow, check for tabs at start of line (Y79Y/03)
148148- Tabs are not allowed as indentation - if tab is first char and results
149149- in a column less than flow_indent, it's an error *)
150150- if Input.next_is (( = ) '\t') t.input then begin
151151- (* Tab at start of line in flow context - skip tabs and check position *)
152152- let start_mark = Input.mark t.input in
153153- while Input.next_is (( = ) '\t') t.input do
154154- ignore (Input.next t.input)
155155- done;
156156- (* If only tabs were used (no spaces) and column < flow_indent, error *)
157157- if not (Input.next_is_break t.input) && not (Input.is_eof t.input) &&
158158- column t < t.flow_indent then
159159- Error.raise_at start_mark Invalid_flow_indentation
160160- end;
161161- skip_to_next_token t
162162- end else begin
163163- ignore (Input.next t.input);
164164- skip_to_next_token t
165165- end
166166- end
167167-168168-(** Roll the indentation level *)
169169-let roll_indent t col ~sequence =
170170- if t.flow_level = 0 && col > current_indent t then begin
171171- t.indent_stack <- { indent = col; needs_block_end = true; sequence } :: t.indent_stack;
172172- true
173173- end else
174174- false
175175-176176-(** Unroll indentation to given column *)
177177-let unroll_indent t col =
178178- while t.flow_level = 0 &&
179179- match t.indent_stack with
180180- | { indent; needs_block_end = true; _ } :: _ when indent > col -> true
181181- | _ -> false
182182- do
183183- match t.indent_stack with
184184- | { indent = _; needs_block_end = true; _ } :: rest ->
185185- let pos = Input.position t.input in
186186- let span = Span.point pos in
187187- emit t span Token.Block_end;
188188- t.indent_stack <- rest
189189- | _ -> ()
190190- done
191191-192192-(** Save a potential simple key *)
193193-let save_simple_key t =
194194- if t.allow_simple_key then begin
195195- (* A simple key is required only if we're in a block context,
196196- at the current indentation level, AND the current indent needs a block end.
197197- This matches saphyr's logic and prevents false positives for values. *)
198198- let required = t.flow_level = 0 &&
199199- match t.indent_stack with
200200- | { indent; needs_block_end = true; _ } :: _ ->
201201- indent = column t
202202- | _ -> false
203203- in
204204- let sk = {
205205- sk_possible = true;
206206- sk_required = required;
207207- sk_token_number = t.token_number;
208208- sk_position = Input.position t.input;
209209- } in
210210- (* Remove any existing simple key at current level *)
211211- t.simple_keys <- (
212212- match t.simple_keys with
213213- | _ :: rest -> Some sk :: rest
214214- | [] -> [Some sk]
215215- )
216216- end
217217-218218-(** Remove simple key at current level *)
219219-let remove_simple_key t =
220220- match t.simple_keys with
221221- | Some sk :: _rest when sk.sk_required ->
222222- Error.raise_at sk.sk_position Expected_key
223223- | _ :: rest -> t.simple_keys <- None :: rest
224224- | [] -> ()
225225-226226-(** Stale simple keys that span too many tokens *)
227227-let stale_simple_keys t =
228228- t.simple_keys <- List.map (fun sk_opt ->
229229- match sk_opt with
230230- | Some sk when sk.sk_possible &&
231231- (Input.position t.input).line > sk.sk_position.line &&
232232- t.flow_level = 0 ->
233233- if sk.sk_required then
234234- Error.raise_at sk.sk_position Expected_key;
235235- None
236236- | _ -> sk_opt
237237- ) t.simple_keys
238238-239239-(** Read anchor or alias name *)
240240-let scan_anchor_alias t =
241241- let start = Input.mark t.input in
242242- let buf = Buffer.create 16 in
243243- (* Per YAML 1.2 spec: anchor names can contain any character that is NOT:
244244- - Whitespace (space, tab, line breaks)
245245- - Flow indicators: []{}
246246- - Comma (,)
247247- This matches the saphyr implementation: is_yaml_non_space && !is_flow *)
248248- while
249249- match Input.peek t.input with
250250- | Some c when not (Input.is_whitespace c) &&
251251- not (Input.is_flow_indicator c) &&
252252- c <> '\x00' ->
253253- Buffer.add_char buf c;
254254- ignore (Input.next t.input);
255255- true
256256- | _ -> false
257257- do () done;
258258- let name = Buffer.contents buf in
259259- if String.length name = 0 then
260260- Error.raise_at start (Invalid_anchor "empty anchor name");
261261- (name, Span.make ~start ~stop:(Input.mark t.input))
262262-263263-(** Scan tag handle *)
264264-let scan_tag_handle t =
265265- let start = Input.mark t.input in
266266- let buf = Buffer.create 16 in
267267- (* Expect ! *)
268268- (match Input.peek t.input with
269269- | Some '!' ->
270270- Buffer.add_char buf '!';
271271- ignore (Input.next t.input)
272272- | _ -> Error.raise_at start (Invalid_tag "expected '!'"));
273273- (* Read word chars *)
274274- while
275275- match Input.peek t.input with
276276- | Some c when Input.is_alnum c || c = '-' ->
277277- Buffer.add_char buf c;
278278- ignore (Input.next t.input);
279279- true
280280- | _ -> false
281281- do () done;
282282- (* Check for secondary ! *)
283283- (match Input.peek t.input with
284284- | Some '!' ->
285285- Buffer.add_char buf '!';
286286- ignore (Input.next t.input)
287287- | _ -> ());
288288- Buffer.contents buf
289289-290290-(** Scan tag suffix (after handle) *)
291291-let scan_tag_suffix t =
292292- let is_hex_digit c =
293293- (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f')
294294- in
295295- let hex_val c =
296296- match c with
297297- | '0'..'9' -> Char.code c - Char.code '0'
298298- | 'A'..'F' -> Char.code c - Char.code 'A' + 10
299299- | 'a'..'f' -> Char.code c - Char.code 'a' + 10
300300- | _ -> 0
301301- in
302302- let buf = Buffer.create 32 in
303303- while
304304- match Input.peek t.input with
305305- | Some '%' ->
306306- (* Percent-encoded character *)
307307- ignore (Input.next t.input);
308308- (match Input.peek t.input, Input.peek_nth t.input 1 with
309309- | Some c1, Some c2 when is_hex_digit c1 && is_hex_digit c2 ->
310310- ignore (Input.next t.input);
311311- ignore (Input.next t.input);
312312- let code = (hex_val c1) * 16 + (hex_val c2) in
313313- Buffer.add_char buf (Char.chr code);
314314- true
315315- | _ ->
316316- (* Invalid percent encoding - keep the % *)
317317- Buffer.add_char buf '%';
318318- true)
319319- | Some c when not (Input.is_whitespace c) &&
320320- not (Input.is_flow_indicator c) ->
321321- Buffer.add_char buf c;
322322- ignore (Input.next t.input);
323323- true
324324- | _ -> false
325325- do () done;
326326- Buffer.contents buf
327327-328328-(** Scan a tag *)
329329-let scan_tag t =
330330- let start = Input.mark t.input in
331331- ignore (Input.next t.input); (* consume ! *)
332332- let handle, suffix =
333333- match Input.peek t.input with
334334- | Some '<' ->
335335- (* Verbatim tag: !<...> - handle is empty, suffix is full URI *)
336336- ignore (Input.next t.input);
337337- let buf = Buffer.create 32 in
338338- while
339339- match Input.peek t.input with
340340- | Some '>' -> false
341341- | Some c ->
342342- Buffer.add_char buf c;
343343- ignore (Input.next t.input);
344344- true
345345- | None -> Error.raise_at (Input.mark t.input) (Invalid_tag "unclosed verbatim tag")
346346- do () done;
347347- ignore (Input.next t.input); (* consume > *)
348348- ("", Buffer.contents buf)
349349- | Some c when Input.is_whitespace c || Input.is_flow_indicator c ->
350350- (* Non-specific tag: ! *)
351351- ("!", "")
352352- | Some '!' ->
353353- (* Secondary handle: !! *)
354354- ignore (Input.next t.input); (* consume second ! *)
355355- let suffix = scan_tag_suffix t in
356356- ("!!", suffix)
357357- | _ ->
358358- (* Primary handle or just suffix: !foo or !e!foo *)
359359- (* Read alphanumeric characters *)
360360- let buf = Buffer.create 16 in
361361- while
362362- match Input.peek t.input with
363363- | Some c when Input.is_alnum c || c = '-' ->
364364- Buffer.add_char buf c;
365365- ignore (Input.next t.input);
366366- true
367367- | _ -> false
368368- do () done;
369369- (* Check if next character is ! - if so, this is a named handle *)
370370- (match Input.peek t.input with
371371- | Some '!' ->
372372- (* Named handle like !e! *)
373373- ignore (Input.next t.input);
374374- let handle_name = Buffer.contents buf in
375375- let suffix = scan_tag_suffix t in
376376- ("!" ^ handle_name ^ "!", suffix)
377377- | _ ->
378378- (* Just ! followed by suffix *)
379379- ("!", Buffer.contents buf ^ scan_tag_suffix t))
380380- in
381381- (* Validate that tag is followed by whitespace, break, or (in flow) flow indicator *)
382382- (match Input.peek t.input with
383383- | None -> () (* EOF is ok *)
384384- | Some c when Input.is_whitespace c || Input.is_break c -> ()
385385- | Some c when t.flow_level > 0 && Input.is_flow_indicator c -> ()
386386- | _ -> Error.raise_at start (Invalid_tag "expected whitespace or line break after tag"));
387387- let span = Span.make ~start ~stop:(Input.mark t.input) in
388388- (handle, suffix, span)
389389-390390-(** Scan single-quoted scalar *)
391391-let scan_single_quoted t =
392392- let start = Input.mark t.input in
393393- ignore (Input.next t.input); (* consume opening single-quote *)
394394- let buf = Buffer.create 64 in
395395- let whitespace = Buffer.create 16 in (* Track trailing whitespace *)
396396-397397- let flush_whitespace () =
398398- if Buffer.length whitespace > 0 then begin
399399- Buffer.add_buffer buf whitespace;
400400- Buffer.clear whitespace
401401- end
402402- in
403403-404404- let rec loop () =
405405- match Input.peek t.input with
406406- | None -> Error.raise_at start Unclosed_single_quote
407407- | Some '\'' ->
408408- ignore (Input.next t.input);
409409- (* Check for escaped quote ('') *)
410410- (match Input.peek t.input with
411411- | Some '\'' ->
412412- flush_whitespace ();
413413- Buffer.add_char buf '\'';
414414- ignore (Input.next t.input);
415415- loop ()
416416- | _ ->
417417- (* End of string - flush any trailing whitespace *)
418418- flush_whitespace ())
419419- | Some ' ' | Some '\t' ->
420420- (* Track whitespace - don't add to buf yet *)
421421- Buffer.add_char whitespace (Option.get (Input.peek t.input));
422422- ignore (Input.next t.input);
423423- loop ()
424424- | Some '\n' | Some '\r' ->
425425- (* Discard trailing whitespace before line break *)
426426- Buffer.clear whitespace;
427427- Input.consume_break t.input;
428428- (* Skip leading whitespace on next line *)
429429- while Input.next_is_blank t.input do
430430- ignore (Input.next t.input)
431431- done;
432432- (* Check for document boundary *)
433433- if Input.at_document_boundary t.input then
434434- Error.raise_at start Unclosed_single_quote;
435435- (* Check indentation: continuation must be > block indent (QB6E, DK95) *)
436436- let col = column t in
437437- let indent = current_indent t in
438438- if not (Input.is_eof t.input) && not (Input.next_is_break t.input) && col <= indent && indent >= 0 then
439439- Error.raise_at (Input.mark t.input) (Invalid_quoted_scalar_indentation "invalid indentation in quoted scalar");
440440- (* Count empty lines (consecutive line breaks) *)
441441- let empty_lines = ref 0 in
442442- while Input.next_is_break t.input do
443443- incr empty_lines;
444444- Input.consume_break t.input;
445445- while Input.next_is_blank t.input do
446446- ignore (Input.next t.input)
447447- done;
448448- if Input.at_document_boundary t.input then
449449- Error.raise_at start Unclosed_single_quote;
450450- (* Check indentation after each empty line too *)
451451- let col = column t in
452452- let indent = current_indent t in
453453- if not (Input.is_eof t.input) && not (Input.next_is_break t.input) && col <= indent && indent >= 0 then
454454- Error.raise_at (Input.mark t.input) (Invalid_quoted_scalar_indentation "invalid indentation in quoted scalar")
455455- done;
456456- (* Apply folding rules *)
457457- if !empty_lines > 0 then begin
458458- (* Empty lines: preserve as newlines *)
459459- for _ = 1 to !empty_lines do
460460- Buffer.add_char buf '\n'
461461- done
462462- end else
463463- (* Single break: fold to space (even at start of string) *)
464464- Buffer.add_char buf ' ';
465465- loop ()
466466- | Some c ->
467467- flush_whitespace ();
468468- Buffer.add_char buf c;
469469- ignore (Input.next t.input);
470470- loop ()
471471- in
472472- loop ();
473473- let span = Span.make ~start ~stop:(Input.mark t.input) in
474474- (Buffer.contents buf, span)
475475-476476-(** Decode hex escape of given length *)
477477-let decode_hex t len =
478478- let start = Input.mark t.input in
479479- let buf = Buffer.create len in
480480- for _ = 1 to len do
481481- match Input.peek t.input with
482482- | Some c when Input.is_hex c ->
483483- Buffer.add_char buf c;
484484- ignore (Input.next t.input)
485485- | _ ->
486486- Error.raise_at start (Invalid_hex_escape (Buffer.contents buf))
487487- done;
488488- let code = int_of_string ("0x" ^ Buffer.contents buf) in
489489- if code <= 0x7F then
490490- String.make 1 (Char.chr code)
491491- else if code <= 0x7FF then
492492- let b1 = 0xC0 lor (code lsr 6) in
493493- let b2 = 0x80 lor (code land 0x3F) in
494494- String.init 2 (fun i -> Char.chr (if i = 0 then b1 else b2))
495495- else if code <= 0xFFFF then
496496- let b1 = 0xE0 lor (code lsr 12) in
497497- let b2 = 0x80 lor ((code lsr 6) land 0x3F) in
498498- let b3 = 0x80 lor (code land 0x3F) in
499499- String.init 3 (fun i -> Char.chr (match i with 0 -> b1 | 1 -> b2 | _ -> b3))
500500- else
501501- let b1 = 0xF0 lor (code lsr 18) in
502502- let b2 = 0x80 lor ((code lsr 12) land 0x3F) in
503503- let b3 = 0x80 lor ((code lsr 6) land 0x3F) in
504504- let b4 = 0x80 lor (code land 0x3F) in
505505- String.init 4 (fun i -> Char.chr (match i with 0 -> b1 | 1 -> b2 | 2 -> b3 | _ -> b4))
506506-507507-(** Scan double-quoted scalar *)
508508-let scan_double_quoted t =
509509- let start = Input.mark t.input in
510510- ignore (Input.next t.input); (* consume opening double-quote *)
511511- let buf = Buffer.create 64 in
512512- let whitespace = Buffer.create 16 in (* Track pending whitespace *)
513513-514514- let flush_whitespace () =
515515- if Buffer.length whitespace > 0 then begin
516516- Buffer.add_buffer buf whitespace;
517517- Buffer.clear whitespace
518518- end
519519- in
520520-521521- let rec loop () =
522522- match Input.peek t.input with
523523- | None -> Error.raise_at start Unclosed_double_quote
524524- | Some '"' ->
525525- (* Flush trailing whitespace before closing quote to preserve it *)
526526- flush_whitespace ();
527527- ignore (Input.next t.input)
528528- | Some ' ' | Some '\t' as c_opt ->
529529- (* Track whitespace - don't add to buf yet *)
530530- let c = match c_opt with Some c -> c | None -> assert false in
531531- Buffer.add_char whitespace c;
532532- ignore (Input.next t.input);
533533- loop ()
534534- | Some '\\' ->
535535- (* Escape sequence - this is non-whitespace content *)
536536- flush_whitespace (); (* Commit any pending whitespace *)
537537- ignore (Input.next t.input);
538538- (match Input.peek t.input with
539539- | None -> Error.raise_at start (Invalid_escape_sequence "\\<EOF>")
540540- | Some '0' -> Buffer.add_char buf '\x00'; ignore (Input.next t.input)
541541- | Some 'a' -> Buffer.add_char buf '\x07'; ignore (Input.next t.input)
542542- | Some 'b' -> Buffer.add_char buf '\x08'; ignore (Input.next t.input)
543543- | Some 't' | Some '\t' -> Buffer.add_char buf '\t'; ignore (Input.next t.input)
544544- | Some 'n' -> Buffer.add_char buf '\n'; ignore (Input.next t.input)
545545- | Some 'v' -> Buffer.add_char buf '\x0B'; ignore (Input.next t.input)
546546- | Some 'f' -> Buffer.add_char buf '\x0C'; ignore (Input.next t.input)
547547- | Some 'r' -> Buffer.add_char buf '\r'; ignore (Input.next t.input)
548548- | Some 'e' -> Buffer.add_char buf '\x1B'; ignore (Input.next t.input)
549549- | Some ' ' -> Buffer.add_char buf ' '; ignore (Input.next t.input)
550550- | Some '"' -> Buffer.add_char buf '"'; ignore (Input.next t.input)
551551- | Some '/' -> Buffer.add_char buf '/'; ignore (Input.next t.input)
552552- | Some '\\' -> Buffer.add_char buf '\\'; ignore (Input.next t.input)
553553- | Some 'N' -> Buffer.add_string buf "\xC2\x85"; ignore (Input.next t.input) (* NEL *)
554554- | Some '_' -> Buffer.add_string buf "\xC2\xA0"; ignore (Input.next t.input) (* NBSP *)
555555- | Some 'L' -> Buffer.add_string buf "\xE2\x80\xA8"; ignore (Input.next t.input) (* LS *)
556556- | Some 'P' -> Buffer.add_string buf "\xE2\x80\xA9"; ignore (Input.next t.input) (* PS *)
557557- | Some 'x' ->
558558- ignore (Input.next t.input);
559559- Buffer.add_string buf (decode_hex t 2)
560560- | Some 'u' ->
561561- ignore (Input.next t.input);
562562- Buffer.add_string buf (decode_hex t 4)
563563- | Some 'U' ->
564564- ignore (Input.next t.input);
565565- Buffer.add_string buf (decode_hex t 8)
566566- | Some '\n' | Some '\r' ->
567567- (* Line continuation escape *)
568568- Input.consume_break t.input;
569569- while Input.next_is_blank t.input do
570570- ignore (Input.next t.input)
571571- done
572572- | Some c ->
573573- Error.raise_at (Input.mark t.input)
574574- (Invalid_escape_sequence (Printf.sprintf "\\%c" c)));
575575- loop ()
576576- | Some '\n' | Some '\r' ->
577577- (* Line break: discard any pending trailing whitespace *)
578578- Buffer.clear whitespace;
579579- Input.consume_break t.input;
580580- (* Count consecutive line breaks (empty lines) *)
581581- let empty_lines = ref 0 in
582582- let continue = ref true in
583583- let started_with_tab = ref false in
584584- while !continue do
585585- (* Track if we start with a tab (for DK95/01 check) *)
586586- if Input.next_is (( = ) '\t') t.input then started_with_tab := true;
587587- (* Skip blanks (spaces/tabs) on the line *)
588588- while Input.next_is_blank t.input do
589589- ignore (Input.next t.input)
590590- done;
591591- (* Check if we hit another line break (empty line) *)
592592- if Input.next_is_break t.input then begin
593593- Input.consume_break t.input;
594594- incr empty_lines;
595595- started_with_tab := false (* Reset for next line *)
596596- end else
597597- continue := false
598598- done;
599599- (* Check for document boundary - this terminates the quoted string *)
600600- if Input.at_document_boundary t.input then
601601- Error.raise_at start Unclosed_double_quote;
602602- (* Check indentation: continuation must be > block indent (QB6E, DK95)
603603- Note: must be strictly greater than block indent, not just equal *)
604604- let col = column t in
605605- let indent = current_indent t in
606606- let start_col = start.column in
607607- (* DK95/01: if continuation started with tabs and column < start column, error *)
608608- if not (Input.is_eof t.input) && !started_with_tab && col < start_col then
609609- Error.raise_at (Input.mark t.input) (Invalid_quoted_scalar_indentation "invalid indentation in quoted scalar");
610610- if not (Input.is_eof t.input) && col <= indent && indent >= 0 then
611611- Error.raise_at (Input.mark t.input) (Invalid_quoted_scalar_indentation "invalid indentation in quoted scalar");
612612- (* Per YAML spec: single break = space, break + empty lines = newlines *)
613613- if !empty_lines > 0 then begin
614614- (* Empty lines: output N newlines where N = number of empty lines *)
615615- for _ = 1 to !empty_lines do
616616- Buffer.add_char buf '\n'
617617- done
618618- end else
619619- (* Single break folds to space *)
620620- Buffer.add_char buf ' ';
621621- loop ()
622622- | Some c ->
623623- (* Non-whitespace character *)
624624- flush_whitespace (); (* Commit any pending whitespace *)
625625- Buffer.add_char buf c;
626626- ignore (Input.next t.input);
627627- loop ()
628628- in
629629- loop ();
630630- let span = Span.make ~start ~stop:(Input.mark t.input) in
631631- (Buffer.contents buf, span)
632632-633633-(** Check if character can appear in plain scalar at this position *)
634634-let can_continue_plain t c ~in_flow =
635635- match c with
636636- | ':' ->
637637- (* : is OK if not followed by whitespace or flow indicator *)
638638- (match Input.peek_nth t.input 1 with
639639- | None -> true
640640- | Some c2 when Input.is_whitespace c2 -> false
641641- | Some c2 when in_flow && Input.is_flow_indicator c2 -> false
642642- | _ -> true)
643643- | '#' ->
644644- (* # is a comment indicator only if preceded by whitespace *)
645645- (* Check the previous character to determine if this is a comment *)
646646- (match Input.peek_back t.input with
647647- | None -> true (* At start - can't be comment indicator, allow it *)
648648- | Some c when Input.is_whitespace c -> false (* Preceded by whitespace - comment *)
649649- | Some c when Input.is_break c -> false (* At start of line - comment *)
650650- | _ -> true) (* Not preceded by whitespace - part of scalar *)
651651- | c when in_flow && Input.is_flow_indicator c -> false
652652- | _ when Input.is_break c -> false
653653- | _ -> true
654654-655655-(** Scan plain scalar *)
656656-let scan_plain_scalar t =
657657- let start = Input.mark t.input in
658658- let in_flow = t.flow_level > 0 in
659659- let indent = current_indent t in
660660- (* In flow context, scalars must be indented more than the current block indent.
661661- This ensures that content at block indent or less ends the flow context. *)
662662- if in_flow && (column t - 1) < indent then
663663- Error.raise_at start Invalid_flow_indentation;
664664- let buf = Buffer.create 64 in
665665- let spaces = Buffer.create 16 in
666666- let whitespace = Buffer.create 16 in (* Track whitespace within a line *)
667667- let leading_blanks = ref false in
668668-669669- let rec scan_line () =
670670- match Input.peek t.input with
671671- | None -> ()
672672- | Some c when Input.is_blank c && can_continue_plain t c ~in_flow ->
673673- (* Blank character within a line - save to whitespace buffer *)
674674- Buffer.add_char whitespace c;
675675- ignore (Input.next t.input);
676676- scan_line ()
677677- | Some c when can_continue_plain t c ~in_flow ->
678678- (* Non-blank character - process any pending breaks/whitespace first *)
679679- begin
680680- if Buffer.length spaces > 0 then begin
681681- if !leading_blanks then begin
682682- (* Fold line break *)
683683- if Buffer.contents spaces = "\n" then
684684- Buffer.add_char buf ' '
685685- else begin
686686- (* Multiple breaks - preserve all but first *)
687687- let s = Buffer.contents spaces in
688688- Buffer.add_substring buf s 1 (String.length s - 1)
689689- end
690690- end else
691691- Buffer.add_buffer buf spaces;
692692- Buffer.clear spaces
693693- end;
694694- (* Add any pending whitespace from within the line *)
695695- if Buffer.length whitespace > 0 then begin
696696- Buffer.add_buffer buf whitespace;
697697- Buffer.clear whitespace
698698- end;
699699- (* Add the character *)
700700- Buffer.add_char buf c;
701701- ignore (Input.next t.input);
702702- leading_blanks := false;
703703- scan_line ()
704704- end
705705- | _ -> ()
706706- in
707707-708708- let rec scan_lines () =
709709- scan_line ();
710710- (* Check for line continuation *)
711711- if Input.next_is_break t.input then begin
712712- (* Discard any trailing whitespace from the current line *)
713713- Buffer.clear whitespace;
714714- (* Save the line break *)
715715- if !leading_blanks then begin
716716- (* We already had a break - this is an additional break (empty line) *)
717717- Buffer.add_char spaces '\n'
718718- end else begin
719719- (* First line break *)
720720- Buffer.clear spaces;
721721- Buffer.add_char spaces '\n';
722722- leading_blanks := true
723723- end;
724724- Input.consume_break t.input;
725725- (* Note: We do NOT set allow_simple_key here during plain scalar scanning.
726726- Setting it here would incorrectly allow ':' that appears on a continuation
727727- line to become a mapping indicator. The flag will be set properly after
728728- the scalar ends and skip_to_next_token processes line breaks. *)
729729- (* Skip leading blanks on the next line *)
730730- while Input.next_is_blank t.input do
731731- ignore (Input.next t.input)
732732- done;
733733- let col = (Input.position t.input).column in
734734- (* Check indentation - stop if we're at or before the containing block's indent *)
735735- (* However, allow empty lines (line breaks) to continue even if dedented *)
736736- if Input.next_is_break t.input then
737737- scan_lines () (* Empty line - continue *)
738738- else if not in_flow && col <= indent then
739739- () (* Stop - dedented or at parent level in block context *)
740740- else if Input.at_document_boundary t.input then
741741- () (* Stop - document boundary *)
742742- else
743743- scan_lines ()
744744- end
745745- in
746746-747747- scan_lines ();
748748- let value = Buffer.contents buf in
749749- (* Trim trailing whitespace (spaces and tabs) *)
750750- let value =
751751- let len = String.length value in
752752- let rec find_end i =
753753- if i < 0 then 0
754754- else match value.[i] with
755755- | ' ' | '\t' -> find_end (i - 1)
756756- | _ -> i + 1
757757- in
758758- let end_pos = find_end (len - 1) in
759759- String.sub value 0 end_pos
760760- in
761761- let span = Span.make ~start ~stop:(Input.mark t.input) in
762762- (* Return value, span, and whether we ended with leading blanks (crossed a line break) *)
763763- (value, span, !leading_blanks)
764764-765765-(** Scan block scalar (literal | or folded >) *)
766766-let scan_block_scalar t literal =
767767- let start = Input.mark t.input in
768768- ignore (Input.next t.input); (* consume | or > *)
769769-770770- (* Parse header: optional indentation indicator and chomping *)
771771- let explicit_indent = ref None in
772772- let chomping = ref Chomping.Clip in
773773-774774- (* First character of header *)
775775- (match Input.peek t.input with
776776- | Some c when Input.is_digit c && c <> '0' ->
777777- explicit_indent := Some (Char.code c - Char.code '0');
778778- ignore (Input.next t.input)
779779- | Some '-' -> chomping := Chomping.Strip; ignore (Input.next t.input)
780780- | Some '+' -> chomping := Chomping.Keep; ignore (Input.next t.input)
781781- | _ -> ());
782782-783783- (* Second character of header *)
784784- (match Input.peek t.input with
785785- | Some c when Input.is_digit c && c <> '0' && !explicit_indent = None ->
786786- explicit_indent := Some (Char.code c - Char.code '0');
787787- ignore (Input.next t.input)
788788- | Some '-' when !chomping = Chomping.Clip ->
789789- chomping := Chomping.Strip; ignore (Input.next t.input)
790790- | Some '+' when !chomping = Chomping.Clip ->
791791- chomping := Chomping.Keep; ignore (Input.next t.input)
792792- | _ -> ());
793793-794794- (* Skip whitespace and optional comment *)
795795- skip_whitespace_and_comment t;
796796-797797- (* Consume line break *)
798798- if Input.next_is_break t.input then
799799- Input.consume_break t.input
800800- else if not (Input.is_eof t.input) then
801801- Error.raise_at (Input.mark t.input)
802802- (Invalid_block_scalar_header "expected newline after header");
803803-804804- let base_indent = current_indent t in
805805- (* base_indent is the indent level from the stack, -1 if empty.
806806- It's used directly for comparisons in implicit indent case. *)
807807- let content_indent = ref (
808808- match !explicit_indent with
809809- | Some n ->
810810- (* Explicit indent: base_indent is 1-indexed column, convert to 0-indexed.
811811- content_indent = (base_indent - 1) + n, but at least n for document level. *)
812812- let base_level = max 0 (base_indent - 1) in
813813- base_level + n
814814- | None -> 0 (* Will be determined by first non-empty line *)
815815- ) in
816816-817817- let buf = Buffer.create 256 in
818818- let trailing_breaks = Buffer.create 16 in
819819- let leading_blank = ref false in (* Was the previous line "more indented"? *)
820820- let max_empty_line_indent = ref 0 in (* Track max indent of empty lines before first content *)
821821-822822- (* Skip to content indentation, skipping empty lines.
823823- Returns the number of spaces actually skipped (important for detecting dedentation). *)
824824- let rec skip_to_content_indent () =
825825- if !content_indent > 0 then begin
826826- (* Explicit indent - skip up to content_indent spaces *)
827827- let spaces_skipped = ref 0 in
828828- while !spaces_skipped < !content_indent && Input.next_is (( = ) ' ') t.input do
829829- incr spaces_skipped;
830830- ignore (Input.next t.input)
831831- done;
832832-833833- (* Check if this line is empty (only spaces/tabs until break/eof) *)
834834- if Input.next_is_break t.input then begin
835835- (* Empty line - record the break and continue *)
836836- Buffer.add_char trailing_breaks '\n';
837837- Input.consume_break t.input;
838838- skip_to_content_indent ()
839839- end else if !spaces_skipped < !content_indent then begin
840840- (* Line starts with fewer spaces than content_indent - dedented *)
841841- !spaces_skipped
842842- end else if Input.next_is_blank t.input then begin
843843- (* Line has spaces/tabs beyond content_indent - could be whitespace content or empty line.
844844- For literal scalars, whitespace-only lines ARE content (not empty).
845845- For folded scalars, whitespace-only lines that are "more indented" are preserved. *)
846846- if literal then
847847- (* Literal: whitespace beyond content_indent is content, let read_lines handle it *)
848848- !content_indent
849849- else begin
850850- (* Folded: check if rest is only blanks *)
851851- let idx = ref 0 in
852852- while match Input.peek_nth t.input !idx with
853853- | Some c when Input.is_blank c -> incr idx; true
854854- | _ -> false
855855- do () done;
856856- match Input.peek_nth t.input (!idx) with
857857- | None | Some '\n' | Some '\r' ->
858858- (* Empty/whitespace-only line in folded - skip spaces *)
859859- while Input.next_is_blank t.input do
860860- ignore (Input.next t.input)
861861- done;
862862- Buffer.add_char trailing_breaks '\n';
863863- Input.consume_break t.input;
864864- skip_to_content_indent ()
865865- | _ ->
866866- (* Has non-whitespace content *)
867867- !content_indent
868868- end
869869- end else
870870- !content_indent
871871- end else begin
872872- (* Implicit indent - skip empty lines without consuming spaces.
873873- Note: Only SPACES count as indentation. Tabs are content, not indentation.
874874- So we only check for spaces when determining if a line is "empty". *)
875875- if Input.next_is_break t.input then begin
876876- Buffer.add_char trailing_breaks '\n';
877877- Input.consume_break t.input;
878878- skip_to_content_indent ()
879879- end else if Input.next_is (( = ) ' ') t.input then begin
880880- (* Check if line is empty (only spaces before break) *)
881881- let idx = ref 0 in
882882- while match Input.peek_nth t.input !idx with
883883- | Some ' ' -> incr idx; true
884884- | _ -> false
885885- do () done;
886886- match Input.peek_nth t.input (!idx) with
887887- | None | Some '\n' | Some '\r' ->
888888- (* Line has only spaces - empty line *)
889889- (* Track max indent of empty lines for later validation *)
890890- if !idx > !max_empty_line_indent then
891891- max_empty_line_indent := !idx;
892892- while Input.next_is (( = ) ' ') t.input do
893893- ignore (Input.next t.input)
894894- done;
895895- Buffer.add_char trailing_breaks '\n';
896896- Input.consume_break t.input;
897897- skip_to_content_indent ()
898898- | _ ->
899899- (* Has content (including tabs which are content, not indentation) *)
900900- 0
901901- end else if Input.next_is (( = ) '\t') t.input then begin
902902- (* Tab at start of line in implicit indent mode - this is an error (Y79Y)
903903- because tabs cannot be used as indentation in YAML *)
904904- Error.raise_at (Input.mark t.input) Tab_in_indentation
905905- end else
906906- (* Not at break or space - other content character *)
907907- 0
908908- end
909909- in
910910-911911- (* Read content *)
912912- let rec read_lines () =
913913- let spaces_skipped = skip_to_content_indent () in
914914-915915- (* Check if we're at content *)
916916- if Input.is_eof t.input then ()
917917- else if Input.at_document_boundary t.input then ()
918918- else begin
919919- (* Count additional leading spaces beyond what was skipped *)
920920- let extra_spaces = ref 0 in
921921- while Input.next_is (( = ) ' ') t.input do
922922- incr extra_spaces;
923923- ignore (Input.next t.input)
924924- done;
925925-926926- (* Calculate actual line indentation *)
927927- let line_indent = spaces_skipped + !extra_spaces in
928928-929929- (* Determine content indent from first content line (implicit case) *)
930930- let first_line = !content_indent = 0 in
931931- (* base_indent is 1-indexed column, convert to 0-indexed for comparison with line_indent.
932932- If base_indent = -1 (empty stack), then base_level = -1 means col 0 is valid. *)
933933- let base_level = base_indent - 1 in
934934- let should_process =
935935- if !content_indent = 0 then begin
936936- (* For implicit indent, content must be more indented than base_level. *)
937937- if line_indent <= base_level then
938938- false (* No content - first line not indented enough *)
939939- else begin
940940- (* Validate: first content line must be indented at least as much as
941941- the maximum indent seen on empty lines before it (5LLU, S98Z, W9L4) *)
942942- if line_indent < !max_empty_line_indent && line_indent > base_level then
943943- Error.raise_at (Input.mark t.input)
944944- (Invalid_block_scalar_header "wrongly indented line in block scalar");
945945- content_indent := line_indent;
946946- true
947947- end
948948- end else if line_indent < !content_indent then
949949- false (* Dedented - done with content *)
950950- else
951951- true
952952- in
953953-954954- if should_process then begin
955955- (* Check if current line is "more indented" (has extra indent or starts with whitespace).
956956- For folded scalars, lines that start with any whitespace (space or tab) after the
957957- content indentation are "more indented" and preserve breaks.
958958- Note: we check Input.next_is_blank BEFORE reading content to see if content starts with whitespace. *)
959959- let trailing_blank = line_indent > !content_indent || Input.next_is_blank t.input in
960960-961961- (* Add trailing breaks to buffer *)
962962- if Buffer.length buf > 0 then begin
963963- if Buffer.length trailing_breaks > 0 then begin
964964- if literal then
965965- Buffer.add_buffer buf trailing_breaks
966966- else begin
967967- (* Folded scalar: fold only if both previous and current lines are not more-indented *)
968968- if not !leading_blank && not trailing_blank then begin
969969- let breaks = Buffer.contents trailing_breaks in
970970- if String.length breaks = 1 then
971971- Buffer.add_char buf ' '
972972- else
973973- Buffer.add_substring buf breaks 1 (String.length breaks - 1)
974974- end else begin
975975- (* Preserve breaks for more-indented lines *)
976976- Buffer.add_buffer buf trailing_breaks
977977- end
978978- end
979979- end else if not literal then
980980- Buffer.add_char buf ' '
981981- end else
982982- Buffer.add_buffer buf trailing_breaks;
983983- Buffer.clear trailing_breaks;
984984-985985- (* Add extra indentation for literal or more-indented folded lines *)
986986- (* On the first line (when determining content_indent), we've already consumed all spaces,
987987- so we should NOT add any back. On subsequent lines, we add only the spaces beyond content_indent. *)
988988- if not first_line && (literal || (!extra_spaces > 0 && not literal)) then begin
989989- for _ = 1 to !extra_spaces do
990990- Buffer.add_char buf ' '
991991- done
992992- end;
993993-994994- (* Read line content *)
995995- while not (Input.is_eof t.input) && not (Input.next_is_break t.input) do
996996- Buffer.add_char buf (Input.next_exn t.input)
997997- done;
998998-999999- (* Record trailing break *)
10001000- if Input.next_is_break t.input then begin
10011001- Buffer.add_char trailing_breaks '\n';
10021002- Input.consume_break t.input
10031003- end;
10041004-10051005- (* Update leading_blank for next iteration *)
10061006- leading_blank := trailing_blank;
10071007-10081008- read_lines ()
10091009- end
10101010- end
10111011- in
10121012-10131013- read_lines ();
10141014-10151015- (* Apply chomping *)
10161016- let value =
10171017- let content = Buffer.contents buf in
10181018- match !chomping with
10191019- | Chomping.Strip -> content
10201020- | Chomping.Clip ->
10211021- if String.length content > 0 then content ^ "\n" else content
10221022- | Chomping.Keep ->
10231023- content ^ Buffer.contents trailing_breaks
10241024- in
10251025-10261026- let span = Span.make ~start ~stop:(Input.mark t.input) in
10271027- let style = if literal then Scalar_style.Literal else Scalar_style.Folded in
10281028- (value, style, span)
10291029-10301030-(** Scan directive (after %) *)
10311031-let scan_directive t =
10321032- let start = Input.mark t.input in
10331033- ignore (Input.next t.input); (* consume % *)
10341034-10351035- (* Read directive name *)
10361036- let name_buf = Buffer.create 16 in
10371037- while
10381038- match Input.peek t.input with
10391039- | Some c when Input.is_alnum c || c = '-' ->
10401040- Buffer.add_char name_buf c;
10411041- ignore (Input.next t.input);
10421042- true
10431043- | _ -> false
10441044- do () done;
10451045- let name = Buffer.contents name_buf in
10461046-10471047- (* Skip blanks *)
10481048- while Input.next_is_blank t.input do
10491049- ignore (Input.next t.input)
10501050- done;
10511051-10521052- match name with
10531053- | "YAML" ->
10541054- (* Version directive: %YAML 1.2 *)
10551055- let major = ref 0 in
10561056- let minor = ref 0 in
10571057- (* Read major version *)
10581058- while Input.next_is_digit t.input do
10591059- major := !major * 10 + (Char.code (Input.next_exn t.input) - Char.code '0')
10601060- done;
10611061- (* Expect . *)
10621062- (match Input.peek t.input with
10631063- | Some '.' -> ignore (Input.next t.input)
10641064- | _ -> Error.raise_at (Input.mark t.input) (Invalid_yaml_version "expected '.'"));
10651065- (* Read minor version *)
10661066- while Input.next_is_digit t.input do
10671067- minor := !minor * 10 + (Char.code (Input.next_exn t.input) - Char.code '0')
10681068- done;
10691069- (* Validate: only whitespace and comments allowed before line break (MUS6) *)
10701070- skip_whitespace_and_comment t;
10711071- if not (Input.next_is_break t.input) && not (Input.is_eof t.input) then
10721072- Error.raise_at (Input.mark t.input) (Invalid_directive "expected comment or line break after version");
10731073- let span = Span.make ~start ~stop:(Input.mark t.input) in
10741074- Token.Version_directive { major = !major; minor = !minor }, span
10751075-10761076- | "TAG" ->
10771077- (* Tag directive: %TAG !foo! tag:example.com,2000: *)
10781078- let handle = scan_tag_handle t in
10791079- (* Skip blanks *)
10801080- while Input.next_is_blank t.input do
10811081- ignore (Input.next t.input)
10821082- done;
10831083- (* Read prefix *)
10841084- let prefix_buf = Buffer.create 32 in
10851085- while
10861086- match Input.peek t.input with
10871087- | Some c when not (Input.is_whitespace c) ->
10881088- Buffer.add_char prefix_buf c;
10891089- ignore (Input.next t.input);
10901090- true
10911091- | _ -> false
10921092- do () done;
10931093- let prefix = Buffer.contents prefix_buf in
10941094- let span = Span.make ~start ~stop:(Input.mark t.input) in
10951095- Token.Tag_directive { handle; prefix }, span
10961096-10971097- | _ ->
10981098- (* Reserved/Unknown directive - skip to end of line and ignore *)
10991099- (* Per YAML spec, reserved directives should be ignored with a warning *)
11001100- while not (Input.is_eof t.input) && not (Input.next_is_break t.input) do
11011101- ignore (Input.next t.input)
11021102- done;
11031103- let span = Span.make ~start ~stop:(Input.mark t.input) in
11041104- (* Return an empty tag directive token to indicate directive was processed but ignored *)
11051105- Token.Tag_directive { handle = ""; prefix = "" }, span
11061106-11071107-(** Fetch the next token(s) into the queue *)
11081108-let rec fetch_next_token t =
11091109- skip_to_next_token t;
11101110- stale_simple_keys t;
11111111- let col = column t in
11121112- (* Unroll indents that are deeper than current column.
11131113- Note: we use col, not col-1, to allow entries at the same level. *)
11141114- unroll_indent t col;
11151115-11161116- (* We're about to process actual content, not leading whitespace *)
11171117- t.leading_whitespace <- false;
11181118-11191119- if Input.is_eof t.input then
11201120- fetch_stream_end t
11211121- else if Input.at_document_boundary t.input then
11221122- fetch_document_indicator t
11231123- else begin
11241124- match Input.peek t.input with
11251125- | None -> fetch_stream_end t
11261126- | Some '%' when (Input.position t.input).column = 1 ->
11271127- fetch_directive t
11281128- | Some '[' -> fetch_flow_collection_start t Token.Flow_sequence_start
11291129- | Some '{' -> fetch_flow_collection_start t Token.Flow_mapping_start
11301130- | Some ']' -> fetch_flow_collection_end t Token.Flow_sequence_end
11311131- | Some '}' -> fetch_flow_collection_end t Token.Flow_mapping_end
11321132- | Some ',' -> fetch_flow_entry t
11331133- | Some '-' when t.flow_level = 0 && check_block_entry t ->
11341134- fetch_block_entry t
11351135- | Some '?' when check_key t ->
11361136- fetch_key t
11371137- | Some ':' when check_value t ->
11381138- fetch_value t
11391139- | Some '*' -> fetch_alias t
11401140- | Some '&' -> fetch_anchor t
11411141- | Some '!' -> fetch_tag t
11421142- | Some '|' when t.flow_level = 0 -> fetch_block_scalar t true
11431143- | Some '>' when t.flow_level = 0 -> fetch_block_scalar t false
11441144- | Some '\'' -> fetch_single_quoted t
11451145- | Some '"' -> fetch_double_quoted t
11461146- | Some '-' when can_start_plain t ->
11471147- fetch_plain_scalar t
11481148- | Some '?' when can_start_plain t ->
11491149- fetch_plain_scalar t
11501150- | Some ':' when can_start_plain t ->
11511151- fetch_plain_scalar t
11521152- | Some c when can_start_plain_char c t ->
11531153- fetch_plain_scalar t
11541154- | Some c ->
11551155- Error.raise_at (Input.mark t.input) (Unexpected_character c)
11561156- end
11571157-11581158-and fetch_stream_end t =
11591159- if not t.stream_ended then begin
11601160- unroll_indent t (-1);
11611161- remove_simple_key t;
11621162- t.allow_simple_key <- false;
11631163- t.stream_ended <- true;
11641164- let span = Span.point (Input.mark t.input) in
11651165- emit t span Token.Stream_end
11661166- end
11671167-11681168-and fetch_document_indicator t =
11691169- unroll_indent t (-1);
11701170- remove_simple_key t;
11711171- t.allow_simple_key <- false;
11721172- let start = Input.mark t.input in
11731173- let indicator = Input.peek_string t.input 3 in
11741174- Input.skip t.input 3;
11751175- let span = Span.make ~start ~stop:(Input.mark t.input) in
11761176- let token = if indicator = "---" then Token.Document_start else Token.Document_end in
11771177- (* Reset document content flag after document end marker *)
11781178- if indicator = "..." then begin
11791179- t.document_has_content <- false;
11801180- (* After document end marker, skip whitespace and check for end of line or comment *)
11811181- while Input.next_is_blank t.input do ignore (Input.next t.input) done;
11821182- (match Input.peek t.input with
11831183- | None -> () (* EOF is ok *)
11841184- | Some c when Input.is_break c -> ()
11851185- | Some '#' -> () (* Comment is ok *)
11861186- | _ -> Error.raise_at start (Invalid_directive "content not allowed after document end marker on same line"))
11871187- end;
11881188- emit t span token
11891189-11901190-and fetch_directive t =
11911191- (* Directives can only appear:
11921192- 1. At stream start (before any document content)
11931193- 2. After a document end marker (...)
11941194- If we've emitted content in the current document, we need a document end marker first *)
11951195- if t.document_has_content then
11961196- Error.raise_at (Input.mark t.input)
11971197- (Unexpected_token "directives must be separated from document content by document end marker (...)");
11981198- unroll_indent t (-1);
11991199- remove_simple_key t;
12001200- t.allow_simple_key <- false;
12011201- let token, span = scan_directive t in
12021202- emit t span token
12031203-12041204-and fetch_flow_collection_start t token_type =
12051205- save_simple_key t;
12061206- (* Record indent of outermost flow collection *)
12071207- if t.flow_level = 0 then
12081208- t.flow_indent <- column t;
12091209- t.flow_level <- t.flow_level + 1;
12101210- (* Track whether this is a mapping or sequence *)
12111211- let is_mapping = (token_type = Token.Flow_mapping_start) in
12121212- t.flow_mapping_stack <- is_mapping :: t.flow_mapping_stack;
12131213- t.allow_simple_key <- true;
12141214- t.simple_keys <- None :: t.simple_keys;
12151215- t.document_has_content <- true;
12161216- let start = Input.mark t.input in
12171217- ignore (Input.next t.input);
12181218- let span = Span.make ~start ~stop:(Input.mark t.input) in
12191219- emit t span token_type
12201220-12211221-and fetch_flow_collection_end t token_type =
12221222- remove_simple_key t;
12231223- t.flow_level <- t.flow_level - 1;
12241224- t.flow_mapping_stack <- (match t.flow_mapping_stack with _ :: rest -> rest | [] -> []);
12251225- t.simple_keys <- (match t.simple_keys with _ :: rest -> rest | [] -> []);
12261226- t.allow_simple_key <- false;
12271227- let start = Input.mark t.input in
12281228- ignore (Input.next t.input);
12291229- (* Allow adjacent values after flow collection ends *)
12301230- if t.flow_level > 0 then
12311231- t.adjacent_value_allowed_at <- Some (Input.position t.input);
12321232- let span = Span.make ~start ~stop:(Input.mark t.input) in
12331233- emit t span token_type
12341234-12351235-and fetch_flow_entry t =
12361236- remove_simple_key t;
12371237- t.allow_simple_key <- true;
12381238- let start = Input.mark t.input in
12391239- ignore (Input.next t.input);
12401240- let span = Span.make ~start ~stop:(Input.mark t.input) in
12411241- emit t span Token.Flow_entry
12421242-12431243-and check_block_entry t =
12441244- (* - followed by whitespace or EOF *)
12451245- match Input.peek_nth t.input 1 with
12461246- | None -> true
12471247- | Some c -> Input.is_whitespace c
12481248-12491249-and fetch_block_entry t =
12501250- if t.flow_level = 0 then begin
12511251- (* Block entries require allow_simple_key to be true.
12521252- This prevents block sequences on the same line as a mapping value,
12531253- e.g., "key: - a" is invalid. *)
12541254- if not t.allow_simple_key then
12551255- Error.raise_at (Input.mark t.input) Block_sequence_disallowed;
12561256- let col = column t in
12571257- if roll_indent t col ~sequence:true then begin
12581258- let span = Span.point (Input.mark t.input) in
12591259- emit t span Token.Block_sequence_start
12601260- end
12611261- end;
12621262- remove_simple_key t;
12631263- t.allow_simple_key <- true;
12641264- t.document_has_content <- true;
12651265- let start = Input.mark t.input in
12661266- ignore (Input.next t.input);
12671267-12681268- (* Check for tabs after - : pattern like -\t- is invalid *)
12691269- let (found_tabs, _found_spaces) = skip_blanks_check_tabs t in
12701270- if found_tabs then begin
12711271- (* If we found tabs and next char is - followed by whitespace, error *)
12721272- match Input.peek t.input with
12731273- | Some '-' ->
12741274- (match Input.peek_nth t.input 1 with
12751275- | None -> Error.raise_at start Tab_in_indentation
12761276- | Some c when Input.is_whitespace c ->
12771277- Error.raise_at start Tab_in_indentation
12781278- | Some _ -> ())
12791279- | _ -> ()
12801280- end;
12811281-12821282- let span = Span.make ~start ~stop:(Input.mark t.input) in
12831283- emit t span Token.Block_entry
12841284-12851285-and check_key t =
12861286- (* ? followed by whitespace or flow indicator in both block and flow *)
12871287- match Input.peek_nth t.input 1 with
12881288- | None -> true
12891289- | Some c ->
12901290- Input.is_whitespace c ||
12911291- (t.flow_level > 0 && Input.is_flow_indicator c)
12921292-12931293-and fetch_key t =
12941294- if t.flow_level = 0 then begin
12951295- if not t.allow_simple_key then
12961296- Error.raise_at (Input.mark t.input) Expected_key;
12971297- let col = column t in
12981298- if roll_indent t col ~sequence:false then begin
12991299- let span = Span.point (Input.mark t.input) in
13001300- emit t span Token.Block_mapping_start
13011301- end
13021302- end;
13031303- remove_simple_key t;
13041304- t.allow_simple_key <- t.flow_level = 0;
13051305- t.document_has_content <- true;
13061306- let start = Input.mark t.input in
13071307- ignore (Input.next t.input);
13081308-13091309- (* Check for tabs after ? : pattern like ?\t- or ?\tkey is invalid *)
13101310- let (found_tabs, _found_spaces) = skip_blanks_check_tabs t in
13111311- if found_tabs && t.flow_level = 0 then begin
13121312- (* In block context, tabs after ? are not allowed *)
13131313- Error.raise_at start Tab_in_indentation
13141314- end;
13151315-13161316- let span = Span.make ~start ~stop:(Input.mark t.input) in
13171317- emit t span Token.Key;
13181318- t.pending_value <- true (* We've emitted a KEY, now waiting for VALUE *)
13191319-13201320-and check_value t =
13211321- (* : followed by whitespace in block, or whitespace/flow indicator in flow, or adjacent value *)
13221322- match Input.peek_nth t.input 1 with
13231323- | None -> true
13241324- | Some c ->
13251325- Input.is_whitespace c ||
13261326- (t.flow_level > 0 && Input.is_flow_indicator c) ||
13271327- (* Allow adjacent values in flow context at designated positions *)
13281328- (t.flow_level > 0 &&
13291329- match t.adjacent_value_allowed_at with
13301330- | Some pos -> pos.Position.line = (Input.position t.input).Position.line &&
13311331- pos.Position.column = (Input.position t.input).Position.column
13321332- | None -> false)
13331333-13341334-and fetch_value t =
13351335- let start = Input.mark t.input in
13361336- (* Check for simple key *)
13371337- let used_simple_key =
13381338- match t.simple_keys with
13391339- | Some sk :: _ when sk.sk_possible ->
13401340- (* In implicit flow mapping (inside a flow sequence), key and : must be on the same line.
13411341- In explicit flow mapping { }, key and : can span lines. *)
13421342- let is_implicit_flow_mapping = match t.flow_mapping_stack with
13431343- | false :: _ -> true (* false = we're in a sequence, so any mapping is implicit *)
13441344- | _ -> false
13451345- in
13461346- if is_implicit_flow_mapping && sk.sk_position.line < (Input.position t.input).line then
13471347- Error.raise_at start Illegal_flow_key_line;
13481348- (* Insert KEY token before the simple key value *)
13491349- let key_span = Span.point sk.sk_position in
13501350- let key_token = { Token.token = Token.Key; span = key_span } in
13511351- (* We need to insert at the right position *)
13521352- let tokens = Queue.to_seq t.tokens |> Array.of_seq in
13531353- Queue.clear t.tokens;
13541354- let insert_pos = sk.sk_token_number - t.tokens_taken in
13551355- Array.iteri (fun i tok ->
13561356- if i = insert_pos then Queue.add key_token t.tokens;
13571357- Queue.add tok t.tokens
13581358- ) tokens;
13591359- if insert_pos >= Array.length tokens then
13601360- Queue.add key_token t.tokens;
13611361- t.token_number <- t.token_number + 1;
13621362- t.pending_value <- true; (* We've inserted a KEY token, now waiting for VALUE *)
13631363- (* Roll indent for implicit block mapping *)
13641364- if t.flow_level = 0 then begin
13651365- let col = sk.sk_position.column in
13661366- if roll_indent t col ~sequence:false then begin
13671367- let span = Span.point sk.sk_position in
13681368- (* Insert block mapping start before key *)
13691369- let bm_token = { Token.token = Token.Block_mapping_start; span } in
13701370- let tokens = Queue.to_seq t.tokens |> Array.of_seq in
13711371- Queue.clear t.tokens;
13721372- Array.iteri (fun i tok ->
13731373- if i = insert_pos then Queue.add bm_token t.tokens;
13741374- Queue.add tok t.tokens
13751375- ) tokens;
13761376- if insert_pos >= Array.length tokens then
13771377- Queue.add bm_token t.tokens;
13781378- t.token_number <- t.token_number + 1
13791379- end
13801380- end;
13811381- t.simple_keys <- None :: (List.tl t.simple_keys);
13821382- true
13831383- | _ ->
13841384- (* No simple key - this is a complex value (or empty key) *)
13851385- if t.flow_level = 0 then begin
13861386- if not t.allow_simple_key then
13871387- Error.raise_at (Input.mark t.input) Expected_key;
13881388- let col = column t in
13891389- if roll_indent t col ~sequence:false then begin
13901390- let span = Span.point (Input.mark t.input) in
13911391- emit t span Token.Block_mapping_start
13921392- end
13931393- (* Note: We don't emit KEY here. Empty key handling is done by the parser,
13941394- which emits empty scalar when it sees VALUE without preceding KEY. *)
13951395- end;
13961396- false
13971397- in
13981398- remove_simple_key t;
13991399- (* In block context without simple key, allow simple keys for compact mappings like ": moon: white"
14001400- In flow context or after using a simple key, disallow simple keys *)
14011401- t.allow_simple_key <- (not used_simple_key) && (t.flow_level = 0);
14021402- t.document_has_content <- true;
14031403- let start = Input.mark t.input in
14041404- ignore (Input.next t.input);
14051405-14061406- (* Check for tabs after : : patterns like :\t- or :\tkey: are invalid in block context (Y79Y/09)
14071407- However, :\t bar (tab followed by space then content) is valid (6BCT) *)
14081408- let (found_tabs, found_spaces) = skip_blanks_check_tabs t in
14091409- if found_tabs && not found_spaces && t.flow_level = 0 then begin
14101410- (* In block context, tabs-only after : followed by indicator or alphanumeric are not allowed *)
14111411- match Input.peek t.input with
14121412- | Some ('-' | '?') ->
14131413- Error.raise_at start Tab_in_indentation
14141414- | Some c when (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') ->
14151415- (* Tab-only followed by alphanumeric - likely a key, which is invalid *)
14161416- Error.raise_at start Tab_in_indentation
14171417- | _ -> ()
14181418- end;
14191419-14201420- (* Skip any comment that may follow the colon and whitespace *)
14211421- skip_whitespace_and_comment t;
14221422-14231423- let span = Span.make ~start ~stop:(Input.mark t.input) in
14241424- emit t span Token.Value;
14251425- t.pending_value <- false (* We've emitted a VALUE, no longer pending *)
14261426-14271427-and fetch_alias t =
14281428- save_simple_key t;
14291429- t.allow_simple_key <- false;
14301430- t.document_has_content <- true;
14311431- let start = Input.mark t.input in
14321432- ignore (Input.next t.input); (* consume * *)
14331433- let name, span = scan_anchor_alias t in
14341434- let span = Span.make ~start ~stop:span.stop in
14351435- emit t span (Token.Alias name)
14361436-14371437-and fetch_anchor t =
14381438- save_simple_key t;
14391439- t.allow_simple_key <- false;
14401440- t.document_has_content <- true;
14411441- let start = Input.mark t.input in
14421442- ignore (Input.next t.input); (* consume & *)
14431443- let name, span = scan_anchor_alias t in
14441444- let span = Span.make ~start ~stop:span.stop in
14451445- emit t span (Token.Anchor name)
14461446-14471447-and fetch_tag t =
14481448- save_simple_key t;
14491449- t.allow_simple_key <- false;
14501450- t.document_has_content <- true;
14511451- let handle, suffix, span = scan_tag t in
14521452- emit t span (Token.Tag { handle; suffix })
14531453-14541454-and fetch_block_scalar t literal =
14551455- remove_simple_key t;
14561456- t.allow_simple_key <- true;
14571457- t.document_has_content <- true;
14581458- let value, style, span = scan_block_scalar t literal in
14591459- emit t span (Token.Scalar { style; value })
14601460-14611461-and fetch_single_quoted t =
14621462- save_simple_key t;
14631463- t.allow_simple_key <- false;
14641464- t.document_has_content <- true;
14651465- let value, span = scan_single_quoted t in
14661466- (* Allow adjacent values after quoted scalars in flow context (for JSON compatibility) *)
14671467- skip_to_next_token t;
14681468- if t.flow_level > 0 then
14691469- t.adjacent_value_allowed_at <- Some (Input.position t.input);
14701470- emit t span (Token.Scalar { style = Scalar_style.Single_quoted; value })
14711471-14721472-and fetch_double_quoted t =
14731473- save_simple_key t;
14741474- t.allow_simple_key <- false;
14751475- t.document_has_content <- true;
14761476- let value, span = scan_double_quoted t in
14771477- (* Allow adjacent values after quoted scalars in flow context (for JSON compatibility) *)
14781478- skip_to_next_token t;
14791479- if t.flow_level > 0 then
14801480- t.adjacent_value_allowed_at <- Some (Input.position t.input);
14811481- emit t span (Token.Scalar { style = Scalar_style.Double_quoted; value })
14821482-14831483-and can_start_plain t =
14841484- (* Check if - ? : can start a plain scalar *)
14851485- match Input.peek_nth t.input 1 with
14861486- | None -> false
14871487- | Some c ->
14881488- not (Input.is_whitespace c) &&
14891489- (t.flow_level = 0 || not (Input.is_flow_indicator c))
14901490-14911491-and can_start_plain_char c _t =
14921492- (* Characters that can start a plain scalar *)
14931493- if Input.is_whitespace c then false
14941494- else if Input.is_indicator c then false
14951495- else true
14961496-14971497-and fetch_plain_scalar t =
14981498- save_simple_key t;
14991499- t.allow_simple_key <- false;
15001500- t.document_has_content <- true;
15011501- let value, span, ended_with_linebreak = scan_plain_scalar t in
15021502- (* If the plain scalar ended after crossing a line break (leading_blanks = true),
15031503- allow simple keys. This is important because the scanner already consumed the
15041504- line break and leading whitespace when checking for continuation. *)
15051505- if ended_with_linebreak then
15061506- t.allow_simple_key <- true;
15071507- emit t span (Token.Scalar { style = Scalar_style.Plain; value })
15081508-15091509-(** Check if we need more tokens to resolve simple keys *)
15101510-let need_more_tokens t =
15111511- if t.stream_ended then false
15121512- else if Queue.is_empty t.tokens then true
15131513- else
15141514- (* Check if any simple key could affect the first queued token *)
15151515- List.exists (function
15161516- | Some sk when sk.sk_possible ->
15171517- sk.sk_token_number >= t.tokens_taken
15181518- | _ -> false
15191519- ) t.simple_keys
15201520-15211521-(** Ensure we have enough tokens to return one safely *)
15221522-let ensure_tokens t =
15231523- if not t.stream_started then begin
15241524- t.stream_started <- true;
15251525- let span = Span.point (Input.position t.input) in
15261526- let encoding, _ = Encoding.detect t.input.source in
15271527- emit t span (Token.Stream_start encoding)
15281528- end;
15291529- while need_more_tokens t do
15301530- fetch_next_token t
15311531- done
15321532-15331533-(** Get next token *)
15341534-let next t =
15351535- ensure_tokens t;
15361536- if Queue.is_empty t.tokens then
15371537- None
15381538- else begin
15391539- t.tokens_taken <- t.tokens_taken + 1;
15401540- Some (Queue.pop t.tokens)
15411541- end
15421542-15431543-(** Peek at next token *)
15441544-let peek t =
15451545- ensure_tokens t;
15461546- Queue.peek_opt t.tokens
15471547-15481548-(** Iterate over all tokens *)
15491549-let iter f t =
15501550- let rec loop () =
15511551- match next t with
15521552- | None -> ()
15531553- | Some tok -> f tok; loop ()
15541554- in
15551555- loop ()
15561556-15571557-(** Fold over all tokens *)
15581558-let fold f init t =
15591559- let rec loop acc =
15601560- match next t with
15611561- | None -> acc
15621562- | Some tok -> loop (f acc tok)
15631563- in
15641564- loop init
15651565-15661566-(** Convert to list *)
15671567-let to_list t =
15681568- fold (fun acc tok -> tok :: acc) [] t |> List.rev
-72
yaml/ocaml-yamle/lib/sequence.ml
···11-(** YAML sequence (array) values with metadata *)
22-33-type 'a t = {
44- anchor : string option;
55- tag : string option;
66- implicit : bool;
77- style : Layout_style.t;
88- members : 'a list;
99-}
1010-1111-let make
1212- ?(anchor : string option)
1313- ?(tag : string option)
1414- ?(implicit = true)
1515- ?(style = Layout_style.Any)
1616- members =
1717- { anchor; tag; implicit; style; members }
1818-1919-let members t = t.members
2020-let anchor t = t.anchor
2121-let tag t = t.tag
2222-let implicit t = t.implicit
2323-let style t = t.style
2424-2525-let with_anchor anchor t = { t with anchor = Some anchor }
2626-let with_tag tag t = { t with tag = Some tag }
2727-let with_style style t = { t with style }
2828-2929-let map f t = { t with members = List.map f t.members }
3030-3131-let length t = List.length t.members
3232-3333-let is_empty t = t.members = []
3434-3535-let nth t n = List.nth t.members n
3636-3737-let nth_opt t n = List.nth_opt t.members n
3838-3939-let iter f t = List.iter f t.members
4040-4141-let fold f init t = List.fold_left f init t.members
4242-4343-let pp pp_elem fmt t =
4444- Format.fprintf fmt "@[<hv 2>sequence(@,";
4545- (match t.anchor with
4646- | Some a -> Format.fprintf fmt "anchor=%s,@ " a
4747- | None -> ());
4848- (match t.tag with
4949- | Some tag -> Format.fprintf fmt "tag=%s,@ " tag
5050- | None -> ());
5151- Format.fprintf fmt "style=%a,@ " Layout_style.pp t.style;
5252- Format.fprintf fmt "members=[@,%a@]@,)"
5353- (Format.pp_print_list ~pp_sep:(fun fmt () -> Format.fprintf fmt ",@ ") pp_elem)
5454- t.members
5555-5656-let equal eq a b =
5757- Option.equal String.equal a.anchor b.anchor &&
5858- Option.equal String.equal a.tag b.tag &&
5959- a.implicit = b.implicit &&
6060- Layout_style.equal a.style b.style &&
6161- List.equal eq a.members b.members
6262-6363-let compare cmp a b =
6464- let c = Option.compare String.compare a.anchor b.anchor in
6565- if c <> 0 then c else
6666- let c = Option.compare String.compare a.tag b.tag in
6767- if c <> 0 then c else
6868- let c = Bool.compare a.implicit b.implicit in
6969- if c <> 0 then c else
7070- let c = Layout_style.compare a.style b.style in
7171- if c <> 0 then c else
7272- List.compare cmp a.members b.members
-35
yaml/ocaml-yamle/lib/span.ml
···11-(** Source spans representing ranges in input *)
22-33-type t = {
44- start : Position.t;
55- stop : Position.t;
66-}
77-88-let make ~start ~stop = { start; stop }
99-1010-let point pos = { start = pos; stop = pos }
1111-1212-let merge a b =
1313- let start = if Position.compare a.start b.start <= 0 then a.start else b.start in
1414- let stop = if Position.compare a.stop b.stop >= 0 then a.stop else b.stop in
1515- { start; stop }
1616-1717-let extend span pos =
1818- { span with stop = pos }
1919-2020-let pp fmt t =
2121- if t.start.line = t.stop.line then
2222- Format.fprintf fmt "line %d, columns %d-%d"
2323- t.start.line t.start.column t.stop.column
2424- else
2525- Format.fprintf fmt "lines %d-%d" t.start.line t.stop.line
2626-2727-let to_string t =
2828- Format.asprintf "%a" pp t
2929-3030-let compare a b =
3131- let c = Position.compare a.start b.start in
3232- if c <> 0 then c else Position.compare a.stop b.stop
3333-3434-let equal a b =
3535- Position.equal a.start b.start && Position.equal a.stop b.stop
-70
yaml/ocaml-yamle/lib/tag.ml
···11-(** YAML tags for type information *)
22-33-type t = {
44- handle : string; (** e.g., "!" or "!!" or "!foo!" *)
55- suffix : string; (** e.g., "str", "int", "custom/type" *)
66-}
77-88-let make ~handle ~suffix = { handle; suffix }
99-1010-let of_string s =
1111- if String.length s = 0 then None
1212- else if s.[0] <> '!' then None
1313- else
1414- (* Find the suffix after the handle *)
1515- let len = String.length s in
1616- if len = 1 then Some { handle = "!"; suffix = "" }
1717- else if s.[1] = '!' then
1818- (* !! handle *)
1919- Some { handle = "!!"; suffix = String.sub s 2 (len - 2) }
2020- else if s.[1] = '<' then
2121- (* Verbatim tag !<...> *)
2222- if len > 2 && s.[len - 1] = '>' then
2323- Some { handle = "!"; suffix = String.sub s 2 (len - 3) }
2424- else
2525- None
2626- else
2727- (* Primary handle or local tag *)
2828- Some { handle = "!"; suffix = String.sub s 1 (len - 1) }
2929-3030-let to_string t =
3131- if t.handle = "!" && t.suffix = "" then "!"
3232- else t.handle ^ t.suffix
3333-3434-let to_uri t =
3535- match t.handle with
3636- | "!!" -> "tag:yaml.org,2002:" ^ t.suffix
3737- | "!" -> "!" ^ t.suffix
3838- | h -> h ^ t.suffix
3939-4040-let pp fmt t =
4141- Format.pp_print_string fmt (to_string t)
4242-4343-let equal a b =
4444- String.equal a.handle b.handle && String.equal a.suffix b.suffix
4545-4646-let compare a b =
4747- let c = String.compare a.handle b.handle in
4848- if c <> 0 then c else String.compare a.suffix b.suffix
4949-5050-(** Standard tags *)
5151-5252-let null = { handle = "!!"; suffix = "null" }
5353-let bool = { handle = "!!"; suffix = "bool" }
5454-let int = { handle = "!!"; suffix = "int" }
5555-let float = { handle = "!!"; suffix = "float" }
5656-let str = { handle = "!!"; suffix = "str" }
5757-let seq = { handle = "!!"; suffix = "seq" }
5858-let map = { handle = "!!"; suffix = "map" }
5959-let binary = { handle = "!!"; suffix = "binary" }
6060-let timestamp = { handle = "!!"; suffix = "timestamp" }
6161-6262-(** Check if tag matches a standard type *)
6363-6464-let is_null t = equal t null || (t.handle = "!" && t.suffix = "")
6565-let is_bool t = equal t bool
6666-let is_int t = equal t int
6767-let is_float t = equal t float
6868-let is_str t = equal t str
6969-let is_seq t = equal t seq
7070-let is_map t = equal t map
···11-(** JSON-compatible YAML value representation *)
22-33-type t = [
44- | `Null
55- | `Bool of bool
66- | `Float of float
77- | `String of string
88- | `A of t list
99- | `O of (string * t) list
1010-]
1111-1212-(** Constructors *)
1313-1414-let null : t = `Null
1515-let bool b : t = `Bool b
1616-let int n : t = `Float (Float.of_int n)
1717-let float f : t = `Float f
1818-let string s : t = `String s
1919-2020-let list f xs : t = `A (List.map f xs)
2121-let obj pairs : t = `O pairs
2222-2323-(** Type name for error messages *)
2424-let type_name : t -> string = function
2525- | `Null -> "null"
2626- | `Bool _ -> "bool"
2727- | `Float _ -> "float"
2828- | `String _ -> "string"
2929- | `A _ -> "array"
3030- | `O _ -> "object"
3131-3232-(** Safe accessors (return option) *)
3333-3434-let as_null = function `Null -> Some () | _ -> None
3535-let as_bool = function `Bool b -> Some b | _ -> None
3636-let as_float = function `Float f -> Some f | _ -> None
3737-let as_string = function `String s -> Some s | _ -> None
3838-let as_list = function `A l -> Some l | _ -> None
3939-let as_assoc = function `O o -> Some o | _ -> None
4040-4141-let as_int = function
4242- | `Float f ->
4343- let i = Float.to_int f in
4444- if Float.equal (Float.of_int i) f then Some i else None
4545- | _ -> None
4646-4747-(** Unsafe accessors (raise on type mismatch) *)
4848-4949-let to_null v =
5050- match as_null v with
5151- | Some () -> ()
5252- | None -> Error.raise (Type_mismatch ("null", type_name v))
5353-5454-let to_bool v =
5555- match as_bool v with
5656- | Some b -> b
5757- | None -> Error.raise (Type_mismatch ("bool", type_name v))
5858-5959-let to_float v =
6060- match as_float v with
6161- | Some f -> f
6262- | None -> Error.raise (Type_mismatch ("float", type_name v))
6363-6464-let to_string v =
6565- match as_string v with
6666- | Some s -> s
6767- | None -> Error.raise (Type_mismatch ("string", type_name v))
6868-6969-let to_list v =
7070- match as_list v with
7171- | Some l -> l
7272- | None -> Error.raise (Type_mismatch ("array", type_name v))
7373-7474-let to_assoc v =
7575- match as_assoc v with
7676- | Some o -> o
7777- | None -> Error.raise (Type_mismatch ("object", type_name v))
7878-7979-let to_int v =
8080- match as_int v with
8181- | Some i -> i
8282- | None -> Error.raise (Type_mismatch ("int", type_name v))
8383-8484-(** Object access *)
8585-8686-let mem key = function
8787- | `O pairs -> List.exists (fun (k, _) -> k = key) pairs
8888- | _ -> false
8989-9090-let find key = function
9191- | `O pairs -> List.assoc_opt key pairs
9292- | _ -> None
9393-9494-let get key v =
9595- match find key v with
9696- | Some v -> v
9797- | None -> Error.raise (Key_not_found key)
9898-9999-let keys = function
100100- | `O pairs -> List.map fst pairs
101101- | v -> Error.raise (Type_mismatch ("object", type_name v))
102102-103103-let values = function
104104- | `O pairs -> List.map snd pairs
105105- | v -> Error.raise (Type_mismatch ("object", type_name v))
106106-107107-(** Combinators *)
108108-109109-let combine v1 v2 =
110110- match v1, v2 with
111111- | `O o1, `O o2 -> `O (o1 @ o2)
112112- | v1, _ -> Error.raise (Type_mismatch ("object", type_name v1))
113113-114114-let map f = function
115115- | `A l -> `A (List.map f l)
116116- | v -> Error.raise (Type_mismatch ("array", type_name v))
117117-118118-let filter pred = function
119119- | `A l -> `A (List.filter pred l)
120120- | v -> Error.raise (Type_mismatch ("array", type_name v))
121121-122122-(** Pretty printing *)
123123-124124-let rec pp fmt (v : t) =
125125- match v with
126126- | `Null -> Format.pp_print_string fmt "null"
127127- | `Bool b -> Format.pp_print_bool fmt b
128128- | `Float f ->
129129- if Float.is_integer f && Float.abs f < 1e15 then
130130- Format.fprintf fmt "%.0f" f
131131- else
132132- Format.fprintf fmt "%g" f
133133- | `String s -> Format.fprintf fmt "%S" s
134134- | `A [] -> Format.pp_print_string fmt "[]"
135135- | `A items ->
136136- Format.fprintf fmt "@[<hv 2>[@,%a@]@,]"
137137- (Format.pp_print_list ~pp_sep:(fun fmt () -> Format.fprintf fmt ",@ ") pp)
138138- items
139139- | `O [] -> Format.pp_print_string fmt "{}"
140140- | `O pairs ->
141141- Format.fprintf fmt "@[<hv 2>{@,%a@]@,}"
142142- (Format.pp_print_list ~pp_sep:(fun fmt () -> Format.fprintf fmt ",@ ")
143143- (fun fmt (k, v) -> Format.fprintf fmt "@[<hv 2>%S:@ %a@]" k pp v))
144144- pairs
145145-146146-(** Equality and comparison *)
147147-148148-let rec equal (a : t) (b : t) =
149149- match a, b with
150150- | `Null, `Null -> true
151151- | `Bool a, `Bool b -> a = b
152152- | `Float a, `Float b -> Float.equal a b
153153- | `String a, `String b -> String.equal a b
154154- | `A a, `A b -> List.equal equal a b
155155- | `O a, `O b ->
156156- List.length a = List.length b &&
157157- List.for_all2 (fun (k1, v1) (k2, v2) -> k1 = k2 && equal v1 v2) a b
158158- | _ -> false
159159-160160-let rec compare (a : t) (b : t) =
161161- match a, b with
162162- | `Null, `Null -> 0
163163- | `Null, _ -> -1
164164- | _, `Null -> 1
165165- | `Bool a, `Bool b -> Bool.compare a b
166166- | `Bool _, _ -> -1
167167- | _, `Bool _ -> 1
168168- | `Float a, `Float b -> Float.compare a b
169169- | `Float _, _ -> -1
170170- | _, `Float _ -> 1
171171- | `String a, `String b -> String.compare a b
172172- | `String _, _ -> -1
173173- | _, `String _ -> 1
174174- | `A a, `A b -> List.compare compare a b
175175- | `A _, _ -> -1
176176- | _, `A _ -> 1
177177- | `O a, `O b ->
178178- let cmp_pair (k1, v1) (k2, v2) =
179179- let c = String.compare k1 k2 in
180180- if c <> 0 then c else compare v1 v2
181181- in
182182- List.compare cmp_pair a b
-257
yaml/ocaml-yamle/lib/yaml.ml
···11-(** Full YAML representation with anchors, tags, and aliases *)
22-33-type t = [
44- | `Scalar of Scalar.t
55- | `Alias of string
66- | `A of t Sequence.t
77- | `O of (t, t) Mapping.t
88-]
99-1010-(** Pretty printing *)
1111-1212-let rec pp fmt (v : t) =
1313- match v with
1414- | `Scalar s -> Scalar.pp fmt s
1515- | `Alias name -> Format.fprintf fmt "*%s" name
1616- | `A seq -> Sequence.pp pp fmt seq
1717- | `O map -> Mapping.pp pp pp fmt map
1818-1919-(** Equality *)
2020-2121-let rec equal (a : t) (b : t) =
2222- match a, b with
2323- | `Scalar a, `Scalar b -> Scalar.equal a b
2424- | `Alias a, `Alias b -> String.equal a b
2525- | `A a, `A b -> Sequence.equal equal a b
2626- | `O a, `O b -> Mapping.equal equal equal a b
2727- | _ -> false
2828-2929-(** Construct from JSON-compatible Value *)
3030-3131-let rec of_value (v : Value.t) : t =
3232- match v with
3333- | `Null -> `Scalar (Scalar.make "null")
3434- | `Bool true -> `Scalar (Scalar.make "true")
3535- | `Bool false -> `Scalar (Scalar.make "false")
3636- | `Float f ->
3737- let s =
3838- if Float.is_integer f && Float.abs f < 1e15 then
3939- Printf.sprintf "%.0f" f
4040- else
4141- Printf.sprintf "%g" f
4242- in
4343- `Scalar (Scalar.make s)
4444- | `String s ->
4545- `Scalar (Scalar.make s ~style:Scalar_style.Double_quoted)
4646- | `A items ->
4747- `A (Sequence.make (List.map of_value items))
4848- | `O pairs ->
4949- `O (Mapping.make (List.map (fun (k, v) ->
5050- (`Scalar (Scalar.make k), of_value v)
5151- ) pairs))
5252-5353-(** Default limits for alias expansion (protection against billion laughs attack) *)
5454-let default_max_alias_nodes = 10_000_000
5555-let default_max_alias_depth = 100
5656-5757-(** Resolve aliases by replacing them with referenced nodes.
5858-5959- @param max_nodes Maximum number of nodes to create during expansion (default 10M)
6060- @param max_depth Maximum depth of alias-within-alias resolution (default 100)
6161- @raise Alias_expansion_node_limit if max_nodes is exceeded
6262- @raise Alias_expansion_depth_limit if max_depth is exceeded
6363-*)
6464-let resolve_aliases ?(max_nodes = default_max_alias_nodes) ?(max_depth = default_max_alias_depth) (root : t) : t =
6565- let anchors = Hashtbl.create 16 in
6666- let node_count = ref 0 in
6767-6868- (* Check node limit *)
6969- let check_node_limit () =
7070- incr node_count;
7171- if !node_count > max_nodes then
7272- Error.raise (Alias_expansion_node_limit max_nodes)
7373- in
7474-7575- (* First pass: collect all anchors *)
7676- let rec collect (v : t) =
7777- match v with
7878- | `Scalar s ->
7979- (match Scalar.anchor s with
8080- | Some name -> Hashtbl.replace anchors name v
8181- | None -> ())
8282- | `Alias _ -> ()
8383- | `A seq ->
8484- (match Sequence.anchor seq with
8585- | Some name -> Hashtbl.replace anchors name v
8686- | None -> ());
8787- List.iter collect (Sequence.members seq)
8888- | `O map ->
8989- (match Mapping.anchor map with
9090- | Some name -> Hashtbl.replace anchors name v
9191- | None -> ());
9292- List.iter (fun (k, v) -> collect k; collect v) (Mapping.members map)
9393- in
9494- collect root;
9595-9696- (* Second pass: resolve aliases with depth tracking *)
9797- let rec resolve ~depth (v : t) : t =
9898- check_node_limit ();
9999- match v with
100100- | `Scalar _ -> v
101101- | `Alias name ->
102102- if depth >= max_depth then
103103- Error.raise (Alias_expansion_depth_limit max_depth);
104104- (match Hashtbl.find_opt anchors name with
105105- | Some target -> resolve ~depth:(depth + 1) target
106106- | None -> Error.raise (Undefined_alias name))
107107- | `A seq ->
108108- `A (Sequence.map (resolve ~depth) seq)
109109- | `O map ->
110110- `O (Mapping.make
111111- ?anchor:(Mapping.anchor map)
112112- ?tag:(Mapping.tag map)
113113- ~implicit:(Mapping.implicit map)
114114- ~style:(Mapping.style map)
115115- (List.map (fun (k, v) -> (resolve ~depth k, resolve ~depth v)) (Mapping.members map)))
116116- in
117117- resolve ~depth:0 root
118118-119119-(** Convert scalar to JSON value based on content *)
120120-let rec scalar_to_value s =
121121- let value = Scalar.value s in
122122- let tag = Scalar.tag s in
123123- let style = Scalar.style s in
124124-125125- (* If explicitly tagged, respect the tag *)
126126- match tag with
127127- | Some "tag:yaml.org,2002:null" | Some "!!null" ->
128128- `Null
129129- | Some "tag:yaml.org,2002:bool" | Some "!!bool" ->
130130- (match String.lowercase_ascii value with
131131- | "true" | "yes" | "on" -> `Bool true
132132- | "false" | "no" | "off" -> `Bool false
133133- | _ -> Error.raise (Invalid_scalar_conversion (value, "bool")))
134134- | Some "tag:yaml.org,2002:int" | Some "!!int" ->
135135- (try `Float (Float.of_string value)
136136- with _ -> Error.raise (Invalid_scalar_conversion (value, "int")))
137137- | Some "tag:yaml.org,2002:float" | Some "!!float" ->
138138- (try `Float (Float.of_string value)
139139- with _ -> Error.raise (Invalid_scalar_conversion (value, "float")))
140140- | Some "tag:yaml.org,2002:str" | Some "!!str" ->
141141- `String value
142142- | Some _ ->
143143- (* Unknown tag - treat as string *)
144144- `String value
145145- | None ->
146146- (* Implicit type resolution for plain scalars *)
147147- if style <> Scalar_style.Plain then
148148- `String value
149149- else
150150- infer_scalar_type value
151151-152152-(** Infer type from plain scalar value *)
153153-and infer_scalar_type value =
154154- let lower = String.lowercase_ascii value in
155155- (* Null *)
156156- if value = "" || lower = "null" || lower = "~" then
157157- `Null
158158- (* Boolean *)
159159- else if lower = "true" || lower = "yes" || lower = "on" then
160160- `Bool true
161161- else if lower = "false" || lower = "no" || lower = "off" then
162162- `Bool false
163163- (* Special floats *)
164164- else if lower = ".inf" || lower = "+.inf" then
165165- `Float Float.infinity
166166- else if lower = "-.inf" then
167167- `Float Float.neg_infinity
168168- else if lower = ".nan" then
169169- `Float Float.nan
170170- (* Try numeric *)
171171- else
172172- try_parse_number value
173173-174174-(** Try to parse as number *)
175175-and try_parse_number value =
176176- (* Try integer first *)
177177- let try_int () =
178178- if String.length value > 0 then
179179- let first = value.[0] in
180180- if first = '-' || first = '+' || (first >= '0' && first <= '9') then
181181- try
182182- (* Handle octal: 0o prefix or leading 0 *)
183183- if String.length value > 2 && value.[0] = '0' then
184184- match value.[1] with
185185- | 'x' | 'X' ->
186186- (* Hex *)
187187- Some (`Float (Float.of_int (int_of_string value)))
188188- | 'o' | 'O' ->
189189- (* Octal *)
190190- Some (`Float (Float.of_int (int_of_string value)))
191191- | 'b' | 'B' ->
192192- (* Binary *)
193193- Some (`Float (Float.of_int (int_of_string value)))
194194- | _ ->
195195- (* Decimal with leading zero or octal in YAML 1.1 *)
196196- Some (`Float (Float.of_string value))
197197- else
198198- Some (`Float (Float.of_string value))
199199- with _ -> None
200200- else None
201201- else None
202202- in
203203- match try_int () with
204204- | Some v -> v
205205- | None ->
206206- (* Try float *)
207207- try
208208- let f = Float.of_string value in
209209- `Float f
210210- with _ ->
211211- (* Not a number - it's a string *)
212212- `String value
213213-214214-(** Convert to JSON-compatible Value.
215215-216216- @param resolve_aliases_first Whether to resolve aliases before conversion (default true)
217217- @param max_nodes Maximum nodes during alias expansion (default 10M)
218218- @param max_depth Maximum alias nesting depth (default 100)
219219- @raise Unresolved_alias if resolve_aliases_first is false and an alias is encountered
220220-*)
221221-let to_value
222222- ?(resolve_aliases_first = true)
223223- ?(max_nodes = default_max_alias_nodes)
224224- ?(max_depth = default_max_alias_depth)
225225- (v : t) : Value.t =
226226- let v = if resolve_aliases_first then resolve_aliases ~max_nodes ~max_depth v else v in
227227- let rec convert (v : t) : Value.t =
228228- match v with
229229- | `Scalar s -> scalar_to_value s
230230- | `Alias name -> Error.raise (Unresolved_alias name)
231231- | `A seq -> `A (List.map convert (Sequence.members seq))
232232- | `O map ->
233233- `O (List.map (fun (k, v) ->
234234- let key = match k with
235235- | `Scalar s -> Scalar.value s
236236- | _ -> Error.raise (Type_mismatch ("string key", "complex key"))
237237- in
238238- (key, convert v)
239239- ) (Mapping.members map))
240240- in
241241- convert v
242242-243243-(** Get anchor from any node *)
244244-let anchor (v : t) =
245245- match v with
246246- | `Scalar s -> Scalar.anchor s
247247- | `Alias _ -> None
248248- | `A seq -> Sequence.anchor seq
249249- | `O map -> Mapping.anchor map
250250-251251-(** Get tag from any node *)
252252-let tag (v : t) =
253253- match v with
254254- | `Scalar s -> Scalar.tag s
255255- | `Alias _ -> None
256256- | `A seq -> Sequence.tag seq
257257- | `O map -> Mapping.tag map
-185
yaml/ocaml-yamle/lib/yamle.ml
···11-type value = Value.t
22-type yaml = Yaml.t
33-44-type version = [ `V1_1 | `V1_2 ]
55-66-type encoding = Encoding.t
77-type scalar_style = Scalar_style.t
88-type layout_style = Layout_style.t
99-1010-(** {1 Error handling} *)
1111-1212-type error = Error.t
1313-exception Yamle_error = Error.Yamle_error
1414-1515-(** {1 Alias expansion limits (protection against billion laughs attack)} *)
1616-1717-let default_max_alias_nodes = Yaml.default_max_alias_nodes
1818-let default_max_alias_depth = Yaml.default_max_alias_depth
1919-2020-(** {1 JSON-compatible parsing} *)
2121-2222-let of_string
2323- ?(resolve_aliases = true)
2424- ?(max_nodes = default_max_alias_nodes)
2525- ?(max_depth = default_max_alias_depth)
2626- s =
2727- Loader.value_of_string ~resolve_aliases ~max_nodes ~max_depth s
2828-2929-let documents_of_string s = Loader.documents_of_string s
3030-3131-(** {1 JSON-compatible emission} *)
3232-3333-let to_string
3434- ?(encoding = Encoding.Utf8)
3535- ?(scalar_style = Scalar_style.Any)
3636- ?(layout_style = Layout_style.Any)
3737- value =
3838- let config = {
3939- Emitter.default_config with
4040- encoding;
4141- scalar_style;
4242- layout_style;
4343- } in
4444- Emitter.value_to_string ~config value
4545-4646-(** {1 YAML-specific parsing} *)
4747-4848-let yaml_of_string
4949- ?(resolve_aliases = false)
5050- ?(max_nodes = default_max_alias_nodes)
5151- ?(max_depth = default_max_alias_depth)
5252- s =
5353- Loader.yaml_of_string ~resolve_aliases ~max_nodes ~max_depth s
5454-5555-(** {1 YAML-specific emission} *)
5656-5757-let yaml_to_string
5858- ?(encoding = Encoding.Utf8)
5959- ?(scalar_style = Scalar_style.Any)
6060- ?(layout_style = Layout_style.Any)
6161- yaml =
6262- let config = {
6363- Emitter.default_config with
6464- encoding;
6565- scalar_style;
6666- layout_style;
6767- } in
6868- Emitter.yaml_to_string ~config yaml
6969-7070-let documents_to_string
7171- ?(encoding = Encoding.Utf8)
7272- ?(scalar_style = Scalar_style.Any)
7373- ?(layout_style = Layout_style.Any)
7474- ?(resolve_aliases = true)
7575- documents =
7676- let config = {
7777- Emitter.default_config with
7878- encoding;
7979- scalar_style;
8080- layout_style;
8181- } in
8282- Emitter.documents_to_string ~config ~resolve_aliases documents
8383-8484-(** {1 Conversion} *)
8585-8686-let to_json
8787- ?(resolve_aliases = true)
8888- ?(max_nodes = default_max_alias_nodes)
8989- ?(max_depth = default_max_alias_depth)
9090- yaml =
9191- Yaml.to_value ~resolve_aliases_first:resolve_aliases ~max_nodes ~max_depth yaml
9292-9393-let of_json value = Yaml.of_value value
9494-9595-(** {1 Pretty printing} *)
9696-9797-let pp = Value.pp
9898-let pp_yaml = Yaml.pp
9999-let equal = Value.equal
100100-let equal_yaml = Yaml.equal
101101-102102-(** {1 Nested modules} *)
103103-104104-module Error = Error
105105-module Position = Position
106106-module Span = Span
107107-module Encoding = Encoding
108108-module Input = Input
109109-module Scalar_style = Scalar_style
110110-module Layout_style = Layout_style
111111-module Chomping = Chomping
112112-module Token = Token
113113-module Scanner = Scanner
114114-module Event = Event
115115-module Parser = Parser
116116-module Tag = Tag
117117-module Value = Value
118118-module Scalar = Scalar
119119-module Sequence = Sequence
120120-module Mapping = Mapping
121121-module Yaml = Yaml
122122-module Document = Document
123123-module Loader = Loader
124124-module Emitter = Emitter
125125-126126-(** {1 Streaming interface} *)
127127-128128-module Stream = struct
129129- type parser = Parser.t
130130- type emitter = Emitter.t
131131-132132- let parser s = Parser.of_string s
133133-134134- let do_parse p = Parser.next p
135135-136136- let emitter ?len:_ () = Emitter.create ()
137137-138138- let emit e ev = Emitter.emit e ev
139139-140140- let emitter_buf e = Emitter.contents e
141141-142142- (** Convenience event emitters *)
143143-144144- let stream_start e enc =
145145- Emitter.emit e (Event.Stream_start { encoding = enc })
146146-147147- let stream_end e =
148148- Emitter.emit e Event.Stream_end
149149-150150- let document_start ?version ?(implicit = true) e =
151151- let version = match version with
152152- | Some `V1_1 -> Some (1, 1)
153153- | Some `V1_2 -> Some (1, 2)
154154- | None -> None
155155- in
156156- Emitter.emit e (Event.Document_start { version; implicit })
157157-158158- let document_end ?(implicit = true) e =
159159- Emitter.emit e (Event.Document_end { implicit })
160160-161161- let scalar s e =
162162- Emitter.emit e (Event.Scalar {
163163- anchor = Scalar.anchor s;
164164- tag = Scalar.tag s;
165165- value = Scalar.value s;
166166- plain_implicit = Scalar.plain_implicit s;
167167- quoted_implicit = Scalar.quoted_implicit s;
168168- style = Scalar.style s;
169169- })
170170-171171- let alias e name =
172172- Emitter.emit e (Event.Alias { anchor = name })
173173-174174- let sequence_start ?anchor ?tag ?(implicit = true) ?(style = Layout_style.Any) e =
175175- Emitter.emit e (Event.Sequence_start { anchor; tag; implicit; style })
176176-177177- let sequence_end e =
178178- Emitter.emit e Event.Sequence_end
179179-180180- let mapping_start ?anchor ?tag ?(implicit = true) ?(style = Layout_style.Any) e =
181181- Emitter.emit e (Event.Mapping_start { anchor; tag; implicit; style })
182182-183183- let mapping_end e =
184184- Emitter.emit e Event.Mapping_end
185185-end
···11-YAML Scalar Parsing Tests
22-33-This file tests various forms of YAML scalar values including plain, quoted, and block scalars.
44-55-================================================================================
66-PLAIN SCALARS
77-================================================================================
88-99-Simple plain scalars
1010-1111- $ echo 'key: value' | yamlcat
1212- key: value
1313-1414- $ echo 'name: Alice
1515- > age: 30
1616- > active: true' | yamlcat
1717- name: Alice
1818- age: 30
1919- active: true
2020-2121-Plain scalars with special values
2222-2323- $ echo 'null_val: null
2424- > bool_true: true
2525- > bool_false: false
2626- > number: 42
2727- > float: 3.14' | yamlcat --json
2828- {"null_val": null, "bool_true": true, "bool_false": false, "number": 42, "float": 3.14}
2929-3030-================================================================================
3131-QUOTED SCALARS - SINGLE QUOTES
3232-================================================================================
3333-3434-Single-quoted strings preserve literal text
3535-3636- $ echo "single: 'hello world'" | yamlcat
3737- single: hello world
3838-3939-Single-quoted strings with embedded double quotes
4040-4141- $ echo "quote: 'He said \"hello\"'" | yamlcat
4242- quote: "He said \"hello\""
4343-4444-Single-quoted strings with escaped single quotes (doubled)
4545-4646- $ echo "escaped: 'It''s a test'" | yamlcat
4747- escaped: It's a test
4848-4949-Single-quoted multiline (newlines become spaces)
5050-5151- $ echo "text: 'This is a
5252- > multi-line
5353- > string'" | yamlcat --json
5454- {"text": "This is a multi-line string"}
5555-5656-Empty single-quoted string
5757-5858- $ echo "empty: ''" | yamlcat
5959- empty: ''
6060-6161-================================================================================
6262-QUOTED SCALARS - DOUBLE QUOTES
6363-================================================================================
6464-6565-Simple double-quoted strings
6666-6767- $ echo 'double: "hello world"' | yamlcat
6868- double: hello world
6969-7070-Double-quoted with escaped newline
7171-7272- $ echo 'text: "Line one\nLine two"' | yamlcat --json
7373- {"text": "Line one Line two"}
7474-7575-Double-quoted with escaped tab
7676-7777- $ echo 'text: "Col1\tCol2\tCol3"' | yamlcat --json
7878- {"text": "Col1\tCol2\tCol3"}
7979-8080-Double-quoted with backslash escape
8181-8282- $ echo 'path: "C:\\Users\\Name"' | yamlcat --json
8383- Error: invalid hex escape: at line 1, columns 12-12
8484- [1]
8585-8686-Double-quoted with escaped quote
8787-8888- $ echo 'text: "She said \"hello\""' | yamlcat --json
8989- {"text": "She said \"hello\""}
9090-9191-Double-quoted with multiple escape sequences
9292-9393- $ echo 'text: "Tab:\t Newline:\n Quote:\" Backslash:\\\\"' | yamlcat --json
9494- {"text": "Tab:\t Newline: Quote:\" Backslash:\\"}
9595-9696-Empty double-quoted string
9797-9898- $ echo 'empty: ""' | yamlcat
9999- empty: ''
100100-101101-================================================================================
102102-BLOCK SCALARS - LITERAL STYLE (|)
103103-================================================================================
104104-105105-Basic literal block scalar (preserves newlines)
106106-107107- $ echo 'text: |
108108- > line one
109109- > line two
110110- > line three' | yamlcat --json
111111- {"text": "line one\nline two\nline three\n"}
112112-113113-Literal with indentation
114114-115115- $ echo 'text: |
116116- > First line
117117- > Indented line
118118- > Back to first' | yamlcat --json
119119- {"text": "First line\n Indented line\nBack to first\n"}
120120-121121-Literal with blank lines
122122-123123- $ echo 'text: |
124124- > First paragraph
125125- >
126126- > Second paragraph' | yamlcat --json
127127- {"text": "First paragraph\n\nSecond paragraph\n"}
128128-129129-================================================================================
130130-BLOCK SCALARS - FOLDED STYLE (>)
131131-================================================================================
132132-133133-Basic folded block scalar (newlines become spaces)
134134-135135- $ echo 'text: >
136136- > This is a long paragraph
137137- > that will be folded into
138138- > a single line.' | yamlcat --json
139139- {"text": "This is a long paragraph that will be folded into a single line.\n"}
140140-141141-Folded with paragraph separation (blank line preserved)
142142-143143- $ echo 'text: >
144144- > First paragraph
145145- > flows together.
146146- >
147147- > Second paragraph
148148- > also flows.' | yamlcat --json
149149- {"text": "First paragraph flows together.\nSecond paragraph also flows.\n"}
150150-151151-================================================================================
152152-CHOMPING INDICATORS
153153-================================================================================
154154-155155-Strip chomping (-) removes trailing newlines
156156-157157- $ echo 'text: |-
158158- > No trailing newline' | yamlcat --json
159159- {"text": "No trailing newline"}
160160-161161- $ echo 'text: |-
162162- > Text here
163163- >
164164- > ' | yamlcat --json
165165- {"text": "Text here"}
166166-167167-Folded with strip
168168-169169- $ echo 'text: >-
170170- > Folded text
171171- > with stripped
172172- > trailing newlines
173173- >
174174- > ' | yamlcat --json
175175- {"text": "Folded text with stripped trailing newlines"}
176176-177177-Clip chomping (default) keeps single trailing newline
178178-179179- $ echo 'text: |
180180- > One trailing newline
181181- >
182182- > ' | yamlcat --json
183183- {"text": "One trailing newline\n"}
184184-185185- $ echo 'text: >
186186- > Folded with one
187187- > trailing newline
188188- >
189189- > ' | yamlcat --json
190190- {"text": "Folded with one trailing newline\n"}
191191-192192-Keep chomping (+) preserves all trailing newlines
193193-194194- $ echo 'text: |+
195195- > Keeps trailing newlines
196196- >
197197- >
198198- > ' | yamlcat --json
199199- {"text": "Keeps trailing newlines\n\n\n\n"}
200200-201201- $ echo 'text: >+
202202- > Folded text
203203- > keeps trailing
204204- >
205205- >
206206- > ' | yamlcat --json
207207- {"text": "Folded text keeps trailing\n\n\n\n"}
208208-209209-================================================================================
210210-EXPLICIT INDENTATION INDICATORS
211211-================================================================================
212212-213213-Literal with explicit 2-space indentation
214214-215215- $ echo 'text: |2
216216- > Two space base
217217- > Second line
218218- > Extra indent' | yamlcat --json
219219- {"text": " Two space base\n Second line\n Extra indent\n"}
220220-221221-Folded with explicit indentation
222222-223223- $ echo 'text: >2
224224- > Text with two space
225225- > base indentation that
226226- > will be folded.' | yamlcat --json
227227- {"text": "Text with two space base indentation that will be folded.\n"}
228228-229229-Combined indentation and chomping indicators
230230-231231- $ echo 'text: |2-
232232- > Indented by 2
233233- > No trailing newlines
234234- >
235235- > ' | yamlcat --json
236236- {"text": " Indented by 2\n No trailing newlines"}
237237-238238- $ echo 'text: |2+
239239- > Indented by 2
240240- > Keeps trailing newlines
241241- >
242242- >
243243- > ' | yamlcat --json
244244- {"text": " Indented by 2\n Keeps trailing newlines\n\n\n\n"}
245245-246246-================================================================================
247247-FILE TESTS - QUOTED SCALARS
248248-================================================================================
249249-250250-Test parsing scalars_quoted.yml file
251251-252252- $ yamlcat ../yaml/scalars_quoted.yml | head -20
253253- single_simple: hello world
254254- single_with_double: "He said \"hello\""
255255- single_escaped_quote: 'It''s a single quote: ''example'''
256256- single_multiline: This is a multi-line single quoted string
257257- double_simple: hello world
258258- double_with_single: It's easy
259259- double_escaped_quote: "She said \"hello\""
260260- escaped_newline: "Line one\nLine two\nLine three"
261261- escaped_tab: "Column1\tColumn2\tColumn3"
262262- escaped_backslash: "Path: C:\\Users\\Name"
263263- escaped_carriage: "Before\rAfter"
264264- escaped_bell: "Bell\x07"
265265- escaped_backspace: "Back\x08"
266266- escaped_formfeed: "Form\x0c"
267267- escaped_vertical: "Vertical\x0btab"
268268- unicode_16bit: 'Snowman: ☃'
269269- unicode_32bit: 'Emoji: 😀'
270270- unicode_hex: "Null byte: \x00"
271271- empty_single: ''
272272- empty_double: ''
273273-274274-Test JSON output for quoted scalars
275275-276276- $ yamlcat --json ../yaml/scalars_quoted.yml | head -c 500
277277- {"single_simple": "hello world", "single_with_double": "He said \"hello\"", "single_escaped_quote": "It's a single quote: 'example'", "single_multiline": "This is a multi-line single quoted string", "double_simple": "hello world", "double_with_single": "It's easy", "double_escaped_quote": "She said \"hello\"", "escaped_newline": "Line one\nLine two\nLine three", "escaped_tab": "Column1\tColumn2\tColumn3", "escaped_backslash": "Path: C:\\Users\\Name", "escaped_carriage": "Before\rAfter", "escaped
278278-279279-Verify specific escape handling in JSON
280280-281281- $ yamlcat --json ../yaml/scalars_quoted.yml | grep -o '"escaped_newline": "[^"]*"'
282282- "escaped_newline": "Line one\nLine two\nLine three"
283283-284284- $ yamlcat --json ../yaml/scalars_quoted.yml | grep -o '"escaped_tab": "[^"]*"'
285285- "escaped_tab": "Column1\tColumn2\tColumn3"
286286-287287-Verify Unicode handling
288288-289289- $ yamlcat --json ../yaml/scalars_quoted.yml | grep -o '"unicode_16bit": "[^"]*"'
290290- "unicode_16bit": "Snowman: \226\152\131"
291291-292292- $ yamlcat --json ../yaml/scalars_quoted.yml | grep -o '"unicode_32bit": "[^"]*"'
293293- "unicode_32bit": "Emoji: \240\159\152\128"
294294-295295-Verify quoted strings preserve type indicators
296296-297297- $ yamlcat --json ../yaml/scalars_quoted.yml | grep -o '"string_true": "[^"]*"'
298298- "string_true": "true"
299299-300300- $ yamlcat --json ../yaml/scalars_quoted.yml | grep -o '"string_null": "[^"]*"'
301301- "string_null": "null"
302302-303303- $ yamlcat --json ../yaml/scalars_quoted.yml | grep -o '"string_number": "[^"]*"'
304304- "string_number": "123"
305305-306306-================================================================================
307307-FILE TESTS - BLOCK SCALARS
308308-================================================================================
309309-310310-Test parsing scalars_block.yml file
311311-312312- $ yamlcat ../yaml/scalars_block.yml | head -30
313313- literal_basic: "Line one\nLine two\nLine three\n"
314314- literal_with_indent: "First line\n Indented line\n More indented\n Back to second level\nBack to first level\n"
315315- folded_basic: "This is a long paragraph that will be folded into a single line with the newlines converted to spaces.\n"
316316- folded_paragraph: "First paragraph flows together into a single line.\nSecond paragraph after blank line also flows together.\n"
317317- literal_strip: No trailing newline
318318- literal_strip_multiple: Text here
319319- folded_strip: Folded text with stripped trailing newlines
320320- literal_clip: "One trailing newline\n"
321321- literal_clip_explicit: "This is the default behavior\n"
322322- folded_clip: "Folded with one trailing newline\n"
323323- literal_keep: "Keeps trailing newlines\n\n\n"
324324- literal_keep_multiple: "Text here\n\n\n"
325325- folded_keep: "Folded text keeps trailing\n\n\n"
326326- literal_indent_2: " Two space indentation\n is preserved here\n Extra indent\n Back to two\n"
327327- literal_indent_4: " Four space base indent\n Second line\n Extra indent\n Back to base\n"
328328- folded_indent_2: "Text with two space base indentation that will be folded.\n"
329329- folded_indent_3: "Three space indent for this folded text block.\n"
330330- literal_indent_strip: " Indented by 2\n No trailing newlines"
331331- folded_indent_strip: Folded with indent and stripped end
332332- literal_indent_keep: " Indented by 2\n Keeps trailing newlines\n\n\n"
333333- folded_indent_keep: "Folded indent 4 keeps all trailing\n\n\n"
334334- empty_literal: "\nempty_folded: >\n\n# Block scalar with only newlines\nonly_newlines_literal: |\n\n\nonly_newlines_folded: >\n\n\n# Complex indentation patterns\ncomplex_literal: |\nFirst level\n Second level\n Third level\n Back to second\nBack to first\n\nNew paragraph\n With indent\n\nFinal paragraph\n"
335335- complex_folded: "This paragraph flows together.\nThis is separate. This line starts more indented and continues.\nFinal thoughts here.\n"
336336- special_chars_literal: "Special: @#$%^&*()\nQuotes: \"double\" 'single'\nBrackets: [array] {object}\nSymbols: | > & * ? : -\n"
337337- special_chars_folded: "All special chars are literal in block scalars: []{}|>*&\n"
338338- sequence_with_blocks:
339339- - "First item\nliteral block\n"
340340- - "Second item folded block\n"
341341- - "Third item\nstripped"
342342- - "Fourth item\nkept\n\n\n"
343343-344344-Test JSON output for block scalars
345345-346346- $ yamlcat --json ../yaml/scalars_block.yml | grep -o '"literal_basic": "[^"]*"'
347347- "literal_basic": "Line one\nLine two\nLine three\n"
348348-349349- $ yamlcat --json ../yaml/scalars_block.yml | grep -o '"folded_basic": "[^"]*"' | head -c 100
350350- "folded_basic": "This is a long paragraph that will be folded into a single line with the newlines c
351351-352352-Verify strip chomping
353353-354354- $ yamlcat --json ../yaml/scalars_block.yml | grep -o '"literal_strip": "[^"]*"'
355355- "literal_strip": "No trailing newline"
356356-357357- $ yamlcat --json ../yaml/scalars_block.yml | grep -o '"folded_strip": "[^"]*"'
358358- "folded_strip": "Folded text with stripped trailing newlines"
359359-360360-Verify clip chomping (single newline)
361361-362362- $ yamlcat --json ../yaml/scalars_block.yml | grep -o '"literal_clip": "[^"]*"'
363363- "literal_clip": "One trailing newline\n"
364364-365365-Verify keep chomping (all newlines)
366366-367367- $ yamlcat --json ../yaml/scalars_block.yml | grep -o '"literal_keep": "[^"]*"'
368368- "literal_keep": "Keeps trailing newlines\n\n\n"
369369-370370- $ yamlcat --json ../yaml/scalars_block.yml | grep -o '"folded_keep": "[^"]*"'
371371- "folded_keep": "Folded text keeps trailing\n\n\n"
372372-373373-Verify indentation handling
374374-375375- $ yamlcat --json ../yaml/scalars_block.yml | grep -o '"literal_indent_2": "[^"]*"'
376376- "literal_indent_2": " Two space indentation\n is preserved here\n Extra indent\n Back to two\n"
377377-378378-Verify nested structures with block scalars
379379-380380- $ yamlcat ../yaml/scalars_block.yml | tail -10
381381- special_chars_folded: "All special chars are literal in block scalars: []{}|>*&\n"
382382- sequence_with_blocks:
383383- - "First item\nliteral block\n"
384384- - "Second item folded block\n"
385385- - "Third item\nstripped"
386386- - "Fourth item\nkept\n\n\n"
387387- nested:
388388- description: "This is a folded description that spans multiple lines.\n"
389389- code: "def hello():\n print(\"Hello, World!\")\n return True\n"
390390- notes: "Final notes\nwith stripped end"
391391-392392-================================================================================
393393-SPECIAL CASES AND EDGE CASES
394394-================================================================================
395395-396396-Empty block scalars
397397-398398- $ echo 'empty_literal: |' | yamlcat --json
399399- {"empty_literal": ""}
400400-401401- $ echo 'empty_folded: >' | yamlcat --json
402402- {"empty_folded": ""}
403403-404404-Block scalars with special characters (no escaping needed)
405405-406406- $ echo 'code: |
407407- > Special: @#$%^&*()
408408- > Quotes: "double" '"'"'single'"'"'
409409- > Brackets: [array] {object}' | yamlcat --json
410410- {"code": "Special: @#$%^&*()\nQuotes: \"double\" 'single'\nBrackets: [array] {object}\n"}
411411-412412-Plain scalar vs quoted string for special values
413413-414414- $ echo 'unquoted_true: true
415415- > quoted_true: "true"' | yamlcat --json
416416- {"unquoted_true": true, "quoted_true": "true"}
417417-418418- $ echo 'unquoted_null: null
419419- > quoted_null: "null"' | yamlcat --json
420420- {"unquoted_null": null, "quoted_null": "null"}
421421-422422-Strings that need quoting to preserve leading/trailing spaces
423423-424424- $ echo 'leading: " spaces"
425425- > trailing: "spaces "
426426- > both: " spaces "' | yamlcat --json
427427- {"leading": " spaces", "trailing": "spaces ", "both": " spaces "}
428428-429429-Block scalars in sequences
430430-431431- $ echo 'items:
432432- > - |
433433- > First item
434434- > multiline
435435- > - >
436436- > Second item
437437- > folded' | yamlcat --json
438438- {"items": ["First item\nmultiline\n", "Second item folded\n"]}
439439-440440-Block scalars in nested mappings
441441-442442- $ echo 'outer:
443443- > inner:
444444- > description: >
445445- > This is a folded
446446- > description.
447447- > code: |
448448- > def test():
449449- > return True' | yamlcat --json
450450- {"outer": {"inner": {"description": "This is a folded description.\n", "code": "def test():\n return True\n"}}}
451451-452452-Preserving indentation in literal blocks
453453-454454- $ echo 'code: |
455455- > def hello():
456456- > print("Hello")
457457- > if True:
458458- > return 42' | yamlcat --json
459459- {"code": "def hello():\n print(\"Hello\")\n if True:\n return 42\n"}
460460-461461-Folded scalars preserve more-indented lines
462462-463463- $ echo 'text: >
464464- > Normal paragraph
465465- > continues here.
466466- >
467467- > Indented block
468468- > preserved.
469469- >
470470- > Back to normal.' | yamlcat --json
471471- {"text": "Normal paragraph continues here.\nIndented block preserved.\nBack to normal.\n"}
-60
yaml/ocaml-yamle/tests/cram/tags.t
···11-Tag Support Tests
22-33-These tests verify YAML tag support including type coercion and
44-different tag formats.
55-66-Test: String tag shorthand
77-88- $ printf '!!str 123' | yamlcat
99- '123'
1010-1111-The !!str tag forces the value to be treated as a string.
1212-1313-Test: Integer tag shorthand
1414-1515- $ printf '!!int "42"' | yamlcat
1616- 42
1717-1818-The !!int tag coerces the quoted string to an integer.
1919-2020-Test: Boolean tag shorthand
2121-2222- $ printf '!!bool "yes"' | yamlcat
2323- true
2424-2525-The !!bool tag coerces the string to a boolean.
2626-2727-Test: Null tag shorthand
2828-2929- $ printf '!!null ""' | yamlcat
3030- null
3131-3232-The !!null tag coerces the value to null.
3333-3434-Test: Float tag shorthand
3535-3636- $ printf '!!float 3.14' | yamlcat
3737- 3.14
3838-3939-The !!float tag specifies a floating-point number.
4040-4141-Test: Tag shorthand in mapping value
4242-4343- $ printf 'value: !!str 42' | yamlcat
4444- value: '42'
4545-4646-Tags work in mapping values and force type coercion.
4747-4848-Test: Local tags
4949-5050- $ printf '!local_tag value' | yamlcat
5151- value
5252-5353-Local tags (single !) are treated as unknown and default to string type.
5454-5555-Test: Verbatim tags
5656-5757- $ printf '!<tag:example.com:type> value' | yamlcat
5858- value
5959-6060-Verbatim tags (!<...>) are treated as unknown and default to string type.
···11-(* Load yaml-test-suite test cases from data branch format *)
22-33-type test_case = {
44- id : string;
55- name : string;
66- yaml : string;
77- tree : string option;
88- json : string option;
99- fail : bool;
1010-}
1111-1212-let read_file path =
1313- try
1414- let ic = open_in path in
1515- let n = in_channel_length ic in
1616- let s = really_input_string ic n in
1717- close_in ic;
1818- Some s
1919- with _ -> None
2020-2121-let read_file_required path =
2222- match read_file path with
2323- | Some s -> s
2424- | None -> ""
2525-2626-let file_exists path =
2727- Sys.file_exists path
2828-2929-let is_directory path =
3030- Sys.file_exists path && Sys.is_directory path
3131-3232-(* Load a single test from a directory *)
3333-let load_test_dir base_id dir_path =
3434- let name_file = Filename.concat dir_path "===" in
3535- let yaml_file = Filename.concat dir_path "in.yaml" in
3636- let tree_file = Filename.concat dir_path "test.event" in
3737- let json_file = Filename.concat dir_path "in.json" in
3838- let error_file = Filename.concat dir_path "error" in
3939-4040- (* Must have in.yaml to be a valid test *)
4141- if not (file_exists yaml_file) then None
4242- else
4343- let name = match read_file name_file with
4444- | Some s -> String.trim s
4545- | None -> base_id
4646- in
4747- let yaml = read_file_required yaml_file in
4848- let tree = read_file tree_file in
4949- let json = read_file json_file in
5050- let fail = file_exists error_file in
5151- Some { id = base_id; name; yaml; tree; json; fail }
5252-5353-(* Load tests from a test ID directory (may have subdirectories for variants) *)
5454-let load_test_id test_suite_path test_id =
5555- let dir_path = Filename.concat test_suite_path test_id in
5656- if not (is_directory dir_path) then []
5757- else
5858- (* Check if this directory has variant subdirectories (00, 01, etc.) *)
5959- let entries = Sys.readdir dir_path in
6060- let has_variants = Array.exists (fun e ->
6161- let subdir = Filename.concat dir_path e in
6262- is_directory subdir &&
6363- String.length e >= 2 &&
6464- e.[0] >= '0' && e.[0] <= '9'
6565- ) entries in
6666-6767- if has_variants then
6868- (* Load each variant subdirectory *)
6969- let variants = Array.to_list entries
7070- |> List.filter (fun e ->
7171- let subdir = Filename.concat dir_path e in
7272- is_directory subdir && String.length e >= 2 && e.[0] >= '0' && e.[0] <= '9')
7373- |> List.sort String.compare
7474- in
7575- List.filter_map (fun variant ->
7676- let variant_path = Filename.concat dir_path variant in
7777- let variant_id = Printf.sprintf "%s:%s" test_id variant in
7878- load_test_dir variant_id variant_path
7979- ) variants
8080- else
8181- (* Single test in this directory *)
8282- match load_test_dir test_id dir_path with
8383- | Some t -> [t]
8484- | None -> []
8585-8686-let load_directory test_suite_path =
8787- if not (is_directory test_suite_path) then []
8888- else
8989- let entries = Sys.readdir test_suite_path in
9090- let test_ids = Array.to_list entries
9191- |> List.filter (fun e ->
9292- is_directory (Filename.concat test_suite_path e) &&
9393- String.length e >= 4 && (* Test IDs are 4 chars *)
9494- e.[0] >= '0' && e.[0] <= 'Z') (* Start with alphanumeric *)
9595- |> List.sort String.compare
9696- in
9797- List.concat_map (load_test_id test_suite_path) test_ids
···11-(* Format parser events as tree notation compatible with yaml-test-suite *)
22-33-open Yamle
44-55-let escape_string s =
66- let buf = Buffer.create (String.length s * 2) in
77- String.iter (fun c ->
88- match c with
99- | '\n' -> Buffer.add_string buf "\\n"
1010- | '\t' -> Buffer.add_string buf "\\t"
1111- | '\r' -> Buffer.add_string buf "\\r"
1212- | '\\' -> Buffer.add_string buf "\\\\"
1313- | '\x00' -> Buffer.add_string buf "\\0"
1414- | '\x07' -> Buffer.add_string buf "\\a"
1515- | '\x08' -> Buffer.add_string buf "\\b"
1616- | '\x0b' -> Buffer.add_string buf "\\v"
1717- | '\x0c' -> Buffer.add_string buf "\\f"
1818- | '\x1b' -> Buffer.add_string buf "\\e"
1919- | '\xa0' -> Buffer.add_string buf "\\_"
2020- | c -> Buffer.add_char buf c
2121- ) s;
2222- Buffer.contents buf
2323-2424-let style_char = function
2525- | Scalar_style.Plain -> ':'
2626- | Scalar_style.Single_quoted -> '\''
2727- | Scalar_style.Double_quoted -> '"'
2828- | Scalar_style.Literal -> '|'
2929- | Scalar_style.Folded -> '>'
3030- | Scalar_style.Any -> ':'
3131-3232-let format_event { Event.event; span = _span } =
3333- match event with
3434- | Event.Stream_start _ -> "+STR"
3535- | Event.Stream_end -> "-STR"
3636- | Event.Document_start { implicit; _ } ->
3737- if implicit then "+DOC"
3838- else "+DOC ---"
3939- | Event.Document_end { implicit } ->
4040- if implicit then "-DOC"
4141- else "-DOC ..."
4242- | Event.Mapping_start { anchor; tag; style; _ } ->
4343- let anchor_str = match anchor with Some a -> " &" ^ a | None -> "" in
4444- let tag_str = match tag with Some t -> " <" ^ t ^ ">" | None -> "" in
4545- let flow_str = match style with Layout_style.Flow -> " {}" | _ -> "" in
4646- Printf.sprintf "+MAP%s%s%s" flow_str anchor_str tag_str
4747- | Event.Mapping_end -> "-MAP"
4848- | Event.Sequence_start { anchor; tag; style; _ } ->
4949- let anchor_str = match anchor with Some a -> " &" ^ a | None -> "" in
5050- let tag_str = match tag with Some t -> " <" ^ t ^ ">" | None -> "" in
5151- let flow_str = match style with Layout_style.Flow -> " []" | _ -> "" in
5252- Printf.sprintf "+SEQ%s%s%s" flow_str anchor_str tag_str
5353- | Event.Sequence_end -> "-SEQ"
5454- | Event.Scalar { anchor; tag; value; style; _ } ->
5555- let anchor_str = match anchor with Some a -> " &" ^ a | None -> "" in
5656- let tag_str = match tag with Some t -> " <" ^ t ^ ">" | None -> "" in
5757- let style_c = style_char style in
5858- Printf.sprintf "=VAL%s%s %c%s" anchor_str tag_str style_c (escape_string value)
5959- | Event.Alias { anchor } ->
6060- Printf.sprintf "=ALI *%s" anchor
6161-6262-let of_spanned_events events =
6363- let buf = Buffer.create 256 in
6464- List.iter (fun (e : Event.spanned) ->
6565- let line = format_event e in
6666- Buffer.add_string buf line;
6767- Buffer.add_char buf '\n'
6868- ) events;
6969- Buffer.contents buf
-353
yaml/ocaml-yamle/tests/test_yamle.ml
···11-(** Tests for the Yamle library *)
22-33-open Yamle
44-55-(** Test helpers *)
66-77-let check_value msg expected actual =
88- Alcotest.(check bool) msg true (Value.equal expected actual)
99-1010-let _check_string msg expected actual =
1111- Alcotest.(check string) msg expected actual
1212-1313-(** Scanner tests *)
1414-1515-let test_scanner_simple () =
1616- let scanner = Scanner.of_string "hello: world" in
1717- let tokens = Scanner.to_list scanner in
1818- let token_types = List.map (fun (t : Token.spanned) -> t.token) tokens in
1919- Alcotest.(check int) "token count" 8 (List.length token_types);
2020- (* Stream_start, Block_mapping_start, Key, Scalar, Value, Scalar, Block_end, Stream_end *)
2121- match token_types with
2222- | Token.Stream_start _ :: Token.Block_mapping_start :: Token.Key ::
2323- Token.Scalar { value = "hello"; _ } :: Token.Value ::
2424- Token.Scalar { value = "world"; _ } :: Token.Block_end :: Token.Stream_end :: [] ->
2525- ()
2626- | _ ->
2727- Alcotest.fail "unexpected token sequence"
2828-2929-let test_scanner_sequence () =
3030- let scanner = Scanner.of_string "- one\n- two\n- three" in
3131- let tokens = Scanner.to_list scanner in
3232- Alcotest.(check bool) "has tokens" true (List.length tokens > 0)
3333-3434-let test_scanner_flow () =
3535- let scanner = Scanner.of_string "[1, 2, 3]" in
3636- let tokens = Scanner.to_list scanner in
3737- let has_flow_start = List.exists (fun (t : Token.spanned) ->
3838- match t.token with Token.Flow_sequence_start -> true | _ -> false
3939- ) tokens in
4040- Alcotest.(check bool) "has flow sequence start" true has_flow_start
4141-4242-let scanner_tests = [
4343- "simple mapping", `Quick, test_scanner_simple;
4444- "sequence", `Quick, test_scanner_sequence;
4545- "flow sequence", `Quick, test_scanner_flow;
4646-]
4747-4848-(** Parser tests *)
4949-5050-let test_parser_events () =
5151- let parser = Parser.of_string "key: value" in
5252- let events = Parser.to_list parser in
5353- Alcotest.(check bool) "has events" true (List.length events > 0);
5454- let has_stream_start = List.exists (fun (e : Event.spanned) ->
5555- match e.event with Event.Stream_start _ -> true | _ -> false
5656- ) events in
5757- Alcotest.(check bool) "has stream start" true has_stream_start
5858-5959-let test_parser_sequence_events () =
6060- let parser = Parser.of_string "- a\n- b" in
6161- let events = Parser.to_list parser in
6262- let has_seq_start = List.exists (fun (e : Event.spanned) ->
6363- match e.event with Event.Sequence_start _ -> true | _ -> false
6464- ) events in
6565- Alcotest.(check bool) "has sequence start" true has_seq_start
6666-6767-let parser_tests = [
6868- "parse events", `Quick, test_parser_events;
6969- "sequence events", `Quick, test_parser_sequence_events;
7070-]
7171-7272-(** Value parsing tests *)
7373-7474-let test_parse_null () =
7575- check_value "null" `Null (of_string "null");
7676- check_value "~" `Null (of_string "~");
7777- check_value "empty" `Null (of_string "")
7878-7979-let test_parse_bool () =
8080- check_value "true" (`Bool true) (of_string "true");
8181- check_value "false" (`Bool false) (of_string "false");
8282- check_value "yes" (`Bool true) (of_string "yes");
8383- check_value "no" (`Bool false) (of_string "no")
8484-8585-let test_parse_number () =
8686- check_value "integer" (`Float 42.0) (of_string "42");
8787- check_value "negative" (`Float (-17.0)) (of_string "-17");
8888- check_value "float" (`Float 3.14) (of_string "3.14")
8989-9090-let test_parse_string () =
9191- check_value "plain" (`String "hello") (of_string "hello world" |> function `String s -> `String (String.sub s 0 5) | v -> v);
9292- check_value "quoted" (`String "hello") (of_string {|"hello"|})
9393-9494-let test_parse_sequence () =
9595- let result = of_string "- one\n- two\n- three" in
9696- match result with
9797- | `A [_; _; _] -> ()
9898- | _ -> Alcotest.fail "expected sequence with 3 elements"
9999-100100-let test_parse_mapping () =
101101- let result = of_string "name: Alice\nage: 30" in
102102- match result with
103103- | `O pairs when List.length pairs = 2 -> ()
104104- | _ -> Alcotest.fail "expected mapping with 2 pairs"
105105-106106-let test_parse_nested () =
107107- let yaml = {|
108108-person:
109109- name: Bob
110110- hobbies:
111111- - reading
112112- - coding
113113-|} in
114114- let result = of_string yaml in
115115- match result with
116116- | `O [("person", `O _)] -> ()
117117- | _ -> Alcotest.fail "expected nested structure"
118118-119119-let test_parse_flow_sequence () =
120120- let result = of_string "[1, 2, 3]" in
121121- match result with
122122- | `A [`Float 1.0; `Float 2.0; `Float 3.0] -> ()
123123- | _ -> Alcotest.fail "expected flow sequence [1, 2, 3]"
124124-125125-let test_parse_flow_mapping () =
126126- let result = of_string "{a: 1, b: 2}" in
127127- match result with
128128- | `O [("a", `Float 1.0); ("b", `Float 2.0)] -> ()
129129- | _ -> Alcotest.fail "expected flow mapping {a: 1, b: 2}"
130130-131131-let test_parse_flow_mapping_trailing_comma () =
132132- let result = of_string "{ a: 1, }" in
133133- match result with
134134- | `O [("a", `Float 1.0)] -> ()
135135- | `O pairs ->
136136- Alcotest.failf "expected 1 pair but got %d pairs (trailing comma should not create empty entry)"
137137- (List.length pairs)
138138- | _ -> Alcotest.fail "expected flow mapping with 1 pair"
139139-140140-let value_tests = [
141141- "parse null", `Quick, test_parse_null;
142142- "parse bool", `Quick, test_parse_bool;
143143- "parse number", `Quick, test_parse_number;
144144- "parse string", `Quick, test_parse_string;
145145- "parse sequence", `Quick, test_parse_sequence;
146146- "parse mapping", `Quick, test_parse_mapping;
147147- "parse nested", `Quick, test_parse_nested;
148148- "parse flow sequence", `Quick, test_parse_flow_sequence;
149149- "parse flow mapping", `Quick, test_parse_flow_mapping;
150150- "flow mapping trailing comma", `Quick, test_parse_flow_mapping_trailing_comma;
151151-]
152152-153153-(** Emitter tests *)
154154-155155-let test_emit_null () =
156156- let result = to_string `Null in
157157- Alcotest.(check bool) "contains null" true (String.length result > 0)
158158-159159-let starts_with prefix s =
160160- String.length s >= String.length prefix &&
161161- String.sub s 0 (String.length prefix) = prefix
162162-163163-let test_emit_mapping () =
164164- let value = `O [("name", `String "Alice"); ("age", `Float 30.0)] in
165165- let result = to_string value in
166166- let trimmed = String.trim result in
167167- Alcotest.(check bool) "contains name" true (starts_with "name" trimmed || starts_with "\"name\"" trimmed)
168168-169169-let test_roundtrip_simple () =
170170- let yaml = "name: Alice" in
171171- let value = of_string yaml in
172172- let _ = to_string value in
173173- (* Just check it doesn't crash *)
174174- ()
175175-176176-let test_roundtrip_sequence () =
177177- let yaml = "- one\n- two\n- three" in
178178- let value = of_string yaml in
179179- match value with
180180- | `A items when List.length items = 3 ->
181181- let _ = to_string value in
182182- ()
183183- | _ -> Alcotest.fail "roundtrip failed"
184184-185185-let emitter_tests = [
186186- "emit null", `Quick, test_emit_null;
187187- "emit mapping", `Quick, test_emit_mapping;
188188- "roundtrip simple", `Quick, test_roundtrip_simple;
189189- "roundtrip sequence", `Quick, test_roundtrip_sequence;
190190-]
191191-192192-(** YAML-specific tests *)
193193-194194-let test_yaml_anchor () =
195195- let yaml = "&anchor hello" in
196196- let result = yaml_of_string yaml in
197197- match result with
198198- | `Scalar s when Scalar.anchor s = Some "anchor" -> ()
199199- | _ -> Alcotest.fail "expected scalar with anchor"
200200-201201-let test_yaml_alias () =
202202- let yaml = {|
203203-defaults: &defaults
204204- timeout: 30
205205-production:
206206- <<: *defaults
207207- port: 8080
208208-|} in
209209- (* Just check it parses without error *)
210210- let _ = yaml_of_string yaml in
211211- ()
212212-213213-let yaml_tests = [
214214- "yaml anchor", `Quick, test_yaml_anchor;
215215- "yaml alias", `Quick, test_yaml_alias;
216216-]
217217-218218-(** Multiline scalar tests *)
219219-220220-let test_literal_block () =
221221- let yaml = {|description: |
222222- This is a
223223- multi-line
224224- description
225225-|} in
226226- let result = of_string yaml in
227227- match result with
228228- | `O [("description", `String _)] -> ()
229229- | _ -> Alcotest.fail "expected mapping with literal block"
230230-231231-let test_folded_block () =
232232- let yaml = {|description: >
233233- This is a
234234- folded
235235- description
236236-|} in
237237- let result = of_string yaml in
238238- match result with
239239- | `O [("description", `String _)] -> ()
240240- | _ -> Alcotest.fail "expected mapping with folded block"
241241-242242-let multiline_tests = [
243243- "literal block", `Quick, test_literal_block;
244244- "folded block", `Quick, test_folded_block;
245245-]
246246-247247-(** Error handling tests *)
248248-249249-let test_error_position () =
250250- try
251251- let _ = of_string "key: [unclosed" in
252252- Alcotest.fail "expected error"
253253- with
254254- | Yamle_error e ->
255255- Alcotest.(check bool) "has span" true (e.span <> None)
256256-257257-let error_tests = [
258258- "error position", `Quick, test_error_position;
259259-]
260260-261261-(** Alias expansion limit tests (billion laughs protection) *)
262262-263263-let test_node_limit () =
264264- (* Small bomb that would expand to 9^4 = 6561 nodes *)
265265- let yaml = {|
266266-a: &a [1,2,3,4,5,6,7,8,9]
267267-b: &b [*a,*a,*a,*a,*a,*a,*a,*a,*a]
268268-c: &c [*b,*b,*b,*b,*b,*b,*b,*b,*b]
269269-d: &d [*c,*c,*c,*c,*c,*c,*c,*c,*c]
270270-|} in
271271- (* Should fail with a small node limit *)
272272- try
273273- let _ = of_string ~max_nodes:100 yaml in
274274- Alcotest.fail "expected node limit error"
275275- with
276276- | Yamle_error e ->
277277- (match e.Error.kind with
278278- | Error.Alias_expansion_node_limit _ -> ()
279279- | _ -> Alcotest.fail "expected Alias_expansion_node_limit error")
280280-281281-let test_depth_limit () =
282282- (* Create deeply nested alias chain:
283283- *e -> [*d,*d] -> [*c,*c] -> [*b,*b] -> [*a,*a] -> [x,y,z]
284284- Each alias resolution increases depth by 1 *)
285285- let yaml = {|
286286-a: &a [x, y, z]
287287-b: &b [*a, *a]
288288-c: &c [*b, *b]
289289-d: &d [*c, *c]
290290-e: &e [*d, *d]
291291-result: *e
292292-|} in
293293- (* Should fail with a small depth limit (depth 3 means max 3 alias hops) *)
294294- try
295295- let _ = of_string ~max_depth:3 yaml in
296296- Alcotest.fail "expected depth limit error"
297297- with
298298- | Yamle_error e ->
299299- (match e.Error.kind with
300300- | Error.Alias_expansion_depth_limit _ -> ()
301301- | _ -> Alcotest.fail ("expected Alias_expansion_depth_limit error, got: " ^ Error.kind_to_string e.Error.kind))
302302-303303-let test_normal_aliases_work () =
304304- (* Normal alias usage should work fine *)
305305- let yaml = {|
306306-defaults: &defaults
307307- timeout: 30
308308- retries: 3
309309-production:
310310- <<: *defaults
311311- port: 8080
312312-|} in
313313- let result = of_string yaml in
314314- match result with
315315- | `O _ -> ()
316316- | _ -> Alcotest.fail "expected mapping"
317317-318318-let test_resolve_aliases_false () =
319319- (* With resolve_aliases=false, aliases should remain unresolved *)
320320- let yaml = {|
321321-a: &anchor value
322322-b: *anchor
323323-|} in
324324- let result = yaml_of_string ~resolve_aliases:false yaml in
325325- (* Check that alias is preserved *)
326326- match result with
327327- | `O map ->
328328- let pairs = Mapping.members map in
329329- (match List.assoc_opt (`Scalar (Scalar.make "b")) pairs with
330330- | Some (`Alias "anchor") -> ()
331331- | _ -> Alcotest.fail "expected alias to be preserved")
332332- | _ -> Alcotest.fail "expected mapping"
333333-334334-let alias_limit_tests = [
335335- "node limit", `Quick, test_node_limit;
336336- "depth limit", `Quick, test_depth_limit;
337337- "normal aliases work", `Quick, test_normal_aliases_work;
338338- "resolve_aliases false", `Quick, test_resolve_aliases_false;
339339-]
340340-341341-(** Run all tests *)
342342-343343-let () =
344344- Alcotest.run "yamle" [
345345- "scanner", scanner_tests;
346346- "parser", parser_tests;
347347- "value", value_tests;
348348- "emitter", emitter_tests;
349349- "yaml", yaml_tests;
350350- "multiline", multiline_tests;
351351- "errors", error_tests;
352352- "alias_limits", alias_limit_tests;
353353- ]
-24
yaml/ocaml-yamle/tests/yaml/anchor.yml
···11-datetime: 2001-12-15T02:59:43.1Z
22-datetime_with_spaces: 2001-12-14 21:59:43.10 -5
33-date: 2002-12-14
44-55-# The !!binary tag indicates that a string is actually a base64-encoded
66-# representation of a binary blob.
77-gif_file: !!binary |
88- R0lGODlhDAAMAIQAAP//9/X17unp5WZmZgAAAOfn515eXvPz7Y6OjuDg4J+fn5
99- OTk6enp56enmlpaWNjY6Ojo4SEhP/++f/++f/++f/++f/++f/++f/++f/++f/+
1010- +f/++f/++f/++f/++f/++SH+Dk1hZGUgd2l0aCBHSU1QACwAAAAADAAMAAAFLC
1111- AgjoEwnuNAFOhpEMTRiggcz4BNJHrv/zCFcLiwMWYNG84BwwEeECcgggoBADs=
1212-1313-# YAML also has a set type, which looks like this:
1414-set:
1515- ? item1
1616- ? item2
1717- ? item3
1818-1919-# Like Python, sets are just maps with null values; the above is equivalent to:
2020-set2:
2121- item1: null
2222- item2: null
2323- item3: null
2424-
-125
yaml/ocaml-yamle/tests/yaml/anchors_basic.yml
···11-# Basic Anchor and Alias Test Cases
22-# Tests fundamental anchor (&) and alias (*) functionality
33-44-# Test 1: Simple scalar anchor and alias
55----
66-scalar_anchor: &simple_scalar "Hello, World!"
77-scalar_alias: *simple_scalar
88-# Expected: both should have the value "Hello, World!"
99-1010-# Test 2: Numeric scalar anchor
1111----
1212-original: &num 42
1313-copy: *num
1414-another_copy: *num
1515-# Expected: all three should have the value 42
1616-1717-# Test 3: Sequence anchor and alias
1818----
1919-original_list: &my_list
2020- - apple
2121- - banana
2222- - cherry
2323-2424-copied_list: *my_list
2525-# Expected: both lists should be identical
2626-2727-# Test 4: Mapping anchor and alias
2828----
2929-original_map: &person
3030- name: Alice
3131- age: 30
3232- city: London
3333-3434-copied_map: *person
3535-# Expected: both maps should be identical
3636-3737-# Test 5: Multiple anchors in same document
3838----
3939-defaults: &defaults
4040- timeout: 30
4141- retries: 3
4242-4343-colors: &colors
4444- - red
4545- - green
4646- - blue
4747-4848-config:
4949- settings: *defaults
5050- palette: *colors
5151-# Expected: config.settings should have timeout and retries, config.palette should have the color list
5252-5353-# Test 6: Nested structure with anchor
5454----
5555-template: &template
5656- metadata:
5757- version: 1.0
5858- author: John Doe
5959- settings:
6060- enabled: true
6161- debug: false
6262-6363-instance1: *template
6464-instance2: *template
6565-# Expected: both instances should be identical copies of template
6666-6767-# Test 7: Anchor in sequence
6868----
6969-items:
7070- - &first_item
7171- id: 1
7272- name: First
7373- - id: 2
7474- name: Second
7575- - *first_item
7676-# Expected: first and third items should be identical
7777-7878-# Test 8: Multiple uses of same alias
7979----
8080-shared_value: &shared 100
8181-calculations:
8282- base: *shared
8383- doubled: 200 # Just a value, not calculated
8484- reference: *shared
8585- another_ref: *shared
8686-# Expected: base, reference, and another_ref should all be 100
8787-8888-# Test 9: Boolean anchor
8989----
9090-feature_flag: &enabled true
9191-features:
9292- login: *enabled
9393- signup: *enabled
9494- export: *enabled
9595-# Expected: all features should be true
9696-9797-# Test 10: Null anchor
9898----
9999-empty: &null_value ~
100100-values:
101101- first: *null_value
102102- second: *null_value
103103-# Expected: all should be null
104104-105105-# Test 11: String with special characters
106106----
107107-message: &msg |
108108- This is a multi-line
109109- message with some
110110- special content!
111111-112112-output1: *msg
113113-output2: *msg
114114-# Expected: both outputs should have the same multi-line string
115115-116116-# Test 12: Anchor in mapping value
117117----
118118-database:
119119- primary: &db_config
120120- host: localhost
121121- port: 5432
122122- ssl: true
123123- replica: *db_config
124124- backup: *db_config
125125-# Expected: primary, replica, and backup should all have identical configuration
-194
yaml/ocaml-yamle/tests/yaml/anchors_merge.yml
···11-# Merge Key Test Cases
22-# Tests YAML 1.1 merge key (<<) functionality
33-# Note: Merge keys are a YAML 1.1 feature and may not be supported in YAML 1.2
44-55-# Test 1: Basic merge key
66----
77-defaults: &defaults
88- timeout: 30
99- retries: 3
1010- verbose: false
1111-1212-production:
1313- <<: *defaults
1414- environment: production
1515-# Expected: production should have timeout, retries, verbose from defaults, plus environment
1616-1717-# Test 2: Override after merge
1818----
1919-base: &base
2020- color: red
2121- size: medium
2222- weight: 100
2323-2424-custom:
2525- <<: *base
2626- color: blue
2727- shape: circle
2828-# Expected: custom should have size and weight from base, but color should be blue, and add shape
2929-3030-# Test 3: Merging multiple anchors
3131----
3232-connection: &connection
3333- host: localhost
3434- port: 8080
3535-3636-authentication: &auth
3737- username: admin
3838- password: secret
3939-4040-server:
4141- <<: [*connection, *auth]
4242- ssl: true
4343-# Expected: server should have host, port, username, password, and ssl
4444-4545-# Test 4: Multiple merges with override
4646----
4747-defaults: &defaults
4848- timeout: 30
4949- retries: 3
5050-5151-advanced: &advanced
5252- cache: true
5353- pool_size: 10
5454-5555-config:
5656- <<: [*defaults, *advanced]
5757- timeout: 60
5858- custom: value
5959-# Expected: config should have all fields from both anchors, with timeout overridden to 60
6060-6161-# Test 5: Nested merge
6262----
6363-base_style: &base_style
6464- font: Arial
6565- size: 12
6666-6767-heading_defaults: &heading
6868- <<: *base_style
6969- weight: bold
7070-7171-main_heading:
7272- <<: *heading
7373- size: 18
7474- color: navy
7575-# Expected: main_heading should inherit from heading (which inherits from base_style) with overrides
7676-7777-# Test 6: Merge in sequence context
7878----
7979-common: &common
8080- enabled: true
8181- log_level: info
8282-8383-services:
8484- - name: web
8585- <<: *common
8686- port: 80
8787- - name: api
8888- <<: *common
8989- port: 3000
9090- - name: worker
9191- <<: *common
9292- threads: 4
9393-# Expected: each service should have enabled and log_level, plus their specific fields
9494-9595-# Test 7: Empty merge (edge case)
9696----
9797-empty: &empty {}
9898-9999-config:
100100- <<: *empty
101101- key: value
102102-# Expected: config should just have key: value
103103-104104-# Test 8: Merge with nested structures
105105----
106106-metadata: &metadata
107107- created: 2023-01-01
108108- author: Admin
109109- tags:
110110- - v1
111111- - stable
112112-113113-document:
114114- <<: *metadata
115115- title: Important Document
116116- content: Some content here
117117-# Expected: document should have all metadata fields plus title and content
118118-119119-# Test 9: Chain of merges
120120----
121121-level1: &l1
122122- a: 1
123123- b: 2
124124-125125-level2: &l2
126126- <<: *l1
127127- c: 3
128128-129129-level3:
130130- <<: *l2
131131- d: 4
132132-# Expected: level3 should have a, b, c, and d
133133-134134-# Test 10: Merge with conflicting keys
135135----
136136-first: &first
137137- name: First
138138- value: 100
139139- priority: low
140140-141141-second: &second
142142- name: Second
143143- value: 200
144144- category: important
145145-146146-combined:
147147- <<: [*first, *second]
148148- name: Combined
149149-# Expected: later merges and direct assignments take precedence
150150-151151-# Test 11: Merge preserving types
152152----
153153-numbers: &numbers
154154- count: 42
155155- ratio: 3.14
156156- active: true
157157-158158-derived:
159159- <<: *numbers
160160- label: Test
161161-# Expected: types should be preserved (int, float, bool)
162162-163163-# Test 12: Complex real-world example
164164----
165165-db_defaults: &db_defaults
166166- pool_size: 5
167167- timeout: 30
168168- ssl: false
169169-170170-cache_defaults: &cache_defaults
171171- ttl: 3600
172172- max_size: 1000
173173-174174-development:
175175- database:
176176- <<: *db_defaults
177177- host: localhost
178178- name: dev_db
179179- cache:
180180- <<: *cache_defaults
181181- backend: memory
182182-183183-production:
184184- database:
185185- <<: *db_defaults
186186- host: prod.example.com
187187- name: prod_db
188188- ssl: true
189189- pool_size: 20
190190- cache:
191191- <<: *cache_defaults
192192- backend: redis
193193- ttl: 7200
194194-# Expected: each environment should inherit defaults with environment-specific overrides
···11-# Full line comment at the beginning
22-# This is a YAML file testing comment handling
33-44-# Comment before a mapping
55-name: John Doe # End of line comment after a scalar value
66-age: 30 # Another end of line comment
77-88-# Comment between mapping entries
99-address:
1010- # Comment inside nested mapping
1111- street: 123 Main St # End of line comment in nested value
1212- city: Springfield
1313- # Comment between nested entries
1414- zip: 12345
1515-1616-# Comment before sequence
1717-items:
1818- - apple # Comment after sequence item
1919- - banana
2020- # Comment between sequence items
2121- - cherry
2222- - date # Last item comment
2323-2424-# Comment before flow sequence
2525-flow_seq: [1, 2, 3] # Comment after flow sequence
2626-2727-# Comment before flow mapping
2828-flow_map: {key1: value1, key2: value2} # Comment after flow mapping
2929-3030-# Comments with various indentation levels
3131-nested:
3232- # Indented comment level 1
3333- level1:
3434- # Indented comment level 2
3535- level2:
3636- # Indented comment level 3
3737- value: deeply nested # End comment at depth
3838-3939-# Multiple consecutive comments
4040-# Line 1
4141-# Line 2
4242-# Line 3
4343-multi_comment_key: value
4444-4545-# Comment with special characters: !@#$%^&*()
4646-special: "value with # hash inside quotes"
4747-4848-# Empty value with comment
4949-empty_value: # This key has no value (null)
5050-5151-# Comment before document end
5252-final_key: final_value
5353-# Final comment at end of file
···11-# Test 2: Three documents with different content types
22----
33-# First document: mapping
44-name: John Doe
55-age: 30
66-city: New York
77----
88-# Second document: sequence
99-- apple
1010-- banana
1111-- orange
1212-- grape
1313----
1414-# Third document: scalar
1515-This is a plain scalar document
···11-# Test 4: With %YAML directive
22-%YAML 1.2
33----
44-key1: value1
55-key2: value2
66-nested:
77- inner: data
88-list:
99- - item1
1010- - item2
1111- - item3
-155
yaml/ocaml-yamle/tests/yaml/edge_cases.yml
···11-# Edge cases test file for YAML parsing
22-33-# Case 1: Keys with colons (must be quoted)
44-"key:with:colons": value
55-"http://example.com": url_as_key
66-"time:12:30": time_value
77-88-# Case 2: Values starting with indicators (must be quoted or escaped)
99-indicator_square: "[this starts with bracket]"
1010-indicator_curly: "{this starts with brace}"
1111-indicator_star: "*this starts with star"
1212-indicator_amp: "&this starts with ampersand"
1313-indicator_question: "?this starts with question"
1414-indicator_pipe: "|this starts with pipe"
1515-indicator_gt: ">this starts with gt"
1616-indicator_dash: "-this starts with dash"
1717-indicator_hash: "#this starts with hash"
1818-1919-# Case 3: Special string values that look like other types
2020-string_true: "true"
2121-string_false: "false"
2222-string_null: "null"
2323-string_number: "123"
2424-string_float: "45.67"
2525-string_yes: "yes"
2626-string_no: "no"
2727-2828-# Case 4: Actual special values
2929-bool_true: true
3030-bool_false: false
3131-null_value: null
3232-null_tilde: ~
3333-number_int: 123
3434-number_float: 45.67
3535-number_exp: 1.23e4
3636-number_hex: 0x1F
3737-number_oct: 0o17
3838-3939-# Case 5: Empty values
4040-empty_string: ""
4141-empty_list: []
4242-empty_map: {}
4343-null_implicit:
4444-4545-# Case 6: Very long lines
4646-very_long_key: "This is a very long value that contains a lot of text to test how the parser handles long lines. It should be able to handle lines that are much longer than typical lines in most YAML files. This continues for quite a while to make sure we test the boundaries of reasonable line lengths. Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua."
4747-4848-very_long_literal: |
4949- This is a very long literal block that should preserve all the whitespace and newlines exactly as written. It can contain very long lines that go on and on and on without breaking. This tests whether the parser can handle long content in literal blocks properly. Lorem ipsum dolor sit amet, consectetur adipiscing elit.
5050-5151-# Case 7: Unicode and special characters
5252-unicode_emoji: "Hello 🌍 World 🚀"
5353-unicode_chars: "Héllo Wörld 你好 مرحبا"
5454-unicode_key_🔑: unicode_value
5555-escaped_chars: "Line1\nLine2\tTabbed"
5656-5757-# Case 8: Nested empty structures
5858-nested_empty:
5959- level1: {}
6060- level2:
6161- inner: []
6262- level3:
6363- inner:
6464- deep: null
6565-6666-# Case 9: Complex keys (flow collections as keys)
6767-? [complex, key]
6868-: complex_value
6969-? {nested: key}
7070-: another_value
7171-7272-# Case 10: Multi-line keys and values
7373-? |
7474- This is a multi-line
7575- key using literal block
7676-: |
7777- This is a multi-line
7878- value using literal block
7979-8080-# Case 11: Quoted strings with escape sequences
8181-single_quoted: 'It''s a single-quoted string with doubled quotes'
8282-double_quoted: "It's a \"double-quoted\" string with escapes"
8383-backslash: "Path\\to\\file"
8484-newline_escape: "First line\nSecond line"
8585-8686-# Case 12: Anchors and aliases at edge positions
8787-anchor_list: &anchor_ref
8888- - item1
8989- - item2
9090- - item3
9191-9292-alias_usage: *anchor_ref
9393-9494-nested_anchor:
9595- data: &nested_ref
9696- key: value
9797- reference: *nested_ref
9898-9999-# Case 13: Mixed flow and block styles
100100-mixed_style:
101101- block_key:
102102- - flow_in_block: [1, 2, 3]
103103- - another: {a: 1, b: 2}
104104- flow_key: {block_in_flow:
105105- - item1
106106- - item2}
107107-108108-# Case 14: Trailing commas in flow (typically invalid in YAML)
109109-# flow_trailing: [1, 2, 3,] # This would be invalid
110110-111111-# Case 15: Strings that need quoting
112112-needs_quote_1: "value with # in it"
113113-needs_quote_2: "value with: colon"
114114-needs_quote_3: "value with @ at sign"
115115-needs_quote_4: "value with ` backtick"
116116-117117-# Case 16: Multiple documents separator (not starting a new document)
118118-not_doc_separator: "--- this is just a string value"
119119-120120-# Case 17: Extremely nested structures
121121-deeply_nested:
122122- l1:
123123- l2:
124124- l3:
125125- l4:
126126- l5:
127127- l6:
128128- l7:
129129- l8:
130130- l9:
131131- l10: "deep value"
132132-133133-# Case 18: Large sequence
134134-large_sequence:
135135- - item_001
136136- - item_002
137137- - item_003
138138- - item_004
139139- - item_005
140140- - item_006
141141- - item_007
142142- - item_008
143143- - item_009
144144- - item_010
145145-146146-# Case 19: Keys and values with only whitespace differences
147147-" key": "value with leading space in key"
148148-"key ": "value with trailing space in key"
149149-" spaced ": " spaced "
150150-151151-# Case 20: Binary-looking values
152152-binary_string: "0b101010"
153153-hex_string: "0xDEADBEEF"
154154-155155-# End of edge cases test file
···11-# Block scalars - literal and folded styles
22----
33-# Literal style (|) - preserves newlines
44-literal_basic: |
55- Line one
66- Line two
77- Line three
88-99-literal_with_indent: |
1010- First line
1111- Indented line
1212- More indented
1313- Back to second level
1414- Back to first level
1515-1616-# Folded style (>) - converts newlines to spaces
1717-folded_basic: >
1818- This is a long paragraph
1919- that will be folded into
2020- a single line with the
2121- newlines converted to spaces.
2222-2323-folded_paragraph: >
2424- First paragraph flows together
2525- into a single line.
2626-2727- Second paragraph after blank line
2828- also flows together.
2929-3030-# Chomping indicators
3131-# Strip (-) - removes trailing newlines
3232-literal_strip: |-
3333- No trailing newline
3434-3535-3636-literal_strip_multiple: |-
3737- Text here
3838-3939-4040-folded_strip: >-
4141- Folded text
4242- with stripped
4343- trailing newlines
4444-4545-4646-# Clip (default) - keeps single trailing newline
4747-literal_clip: |
4848- One trailing newline
4949-5050-5151-literal_clip_explicit: |
5252- This is the default behavior
5353-5454-5555-folded_clip: >
5656- Folded with one
5757- trailing newline
5858-5959-6060-# Keep (+) - preserves all trailing newlines
6161-literal_keep: |+
6262- Keeps trailing newlines
6363-6464-6565-literal_keep_multiple: |+
6666- Text here
6767-6868-6969-folded_keep: >+
7070- Folded text
7171- keeps trailing
7272-7373-7474-# Explicit indentation indicators
7575-literal_indent_2: |2
7676- Two space indentation
7777- is preserved here
7878- Extra indent
7979- Back to two
8080-8181-literal_indent_4: |4
8282- Four space base indent
8383- Second line
8484- Extra indent
8585- Back to base
8686-8787-folded_indent_2: >2
8888- Text with two space
8989- base indentation that
9090- will be folded.
9191-9292-folded_indent_3: >3
9393- Three space indent
9494- for this folded
9595- text block.
9696-9797-# Combinations of indicators
9898-literal_indent_strip: |2-
9999- Indented by 2
100100- No trailing newlines
101101-102102-103103-folded_indent_strip: >3-
104104- Folded with indent
105105- and stripped end
106106-107107-108108-literal_indent_keep: |2+
109109- Indented by 2
110110- Keeps trailing newlines
111111-112112-113113-folded_indent_keep: >4+
114114- Folded indent 4
115115- keeps all trailing
116116-117117-118118-# Empty block scalars
119119-empty_literal: |
120120-121121-empty_folded: >
122122-123123-# Block scalar with only newlines
124124-only_newlines_literal: |
125125-126126-127127-only_newlines_folded: >
128128-129129-130130-# Complex indentation patterns
131131-complex_literal: |
132132- First level
133133- Second level
134134- Third level
135135- Back to second
136136- Back to first
137137-138138- New paragraph
139139- With indent
140140-141141- Final paragraph
142142-143143-complex_folded: >
144144- This paragraph
145145- flows together.
146146-147147- This is separate.
148148- This line starts more indented
149149- and continues.
150150-151151- Final thoughts here.
152152-153153-# Special characters in block scalars
154154-special_chars_literal: |
155155- Special: @#$%^&*()
156156- Quotes: "double" 'single'
157157- Brackets: [array] {object}
158158- Symbols: | > & * ? : -
159159-160160-special_chars_folded: >
161161- All special chars are literal
162162- in block scalars: []{}|>*&
163163-164164-# Block scalars in sequences
165165-sequence_with_blocks:
166166- - |
167167- First item
168168- literal block
169169- - >
170170- Second item
171171- folded block
172172- - |-
173173- Third item
174174- stripped
175175- - |+
176176- Fourth item
177177- kept
178178-179179-180180-# Block scalars in nested mappings
181181-nested:
182182- description: >
183183- This is a folded
184184- description that spans
185185- multiple lines.
186186- code: |
187187- def hello():
188188- print("Hello, World!")
189189- return True
190190- notes: |-
191191- Final notes
192192- with stripped end
-60
yaml/ocaml-yamle/tests/yaml/scalars_plain.yml
···11-# Plain scalars - no quotes needed
22----
33-# Simple words
44-simple_word: hello
55-single_character: x
66-number_like: 123
77-boolean_like: true
88-null_like: null
99-1010-# Multi-word values (no special meaning characters)
1111-sentence: this is a plain scalar
1212-phrase: plain scalars can have spaces
1313-1414-# Numbers and special values that remain strings in context
1515-age: 42
1616-pi: 3.14159
1717-negative: -273
1818-scientific: 1.23e-4
1919-hex_like: 0x1A2B
2020-octal_like: 0o755
2121-2222-# Special characters that are valid in plain scalars
2323-with_colon: "value: with colon needs quotes in value"
2424-with_comma: "commas, need quotes in flow context"
2525-with_hash: "# needs quotes if starting with hash"
2626-hyphen_start: "- needs quotes if starting like list"
2727-question_start: "? needs quotes if starting like mapping key"
2828-2929-# Plain scalars with valid special characters
3030-email: user@example.com
3131-url: http://example.com/path
3232-path: /usr/local/bin
3333-ratio: 16:9
3434-version: v1.2.3
3535-3636-# Multi-line plain scalars (line folding)
3737-# Newlines become spaces, blank lines become newlines
3838-folded_plain: This is a long
3939- plain scalar that spans
4040- multiple lines and will
4141- be folded into a single line
4242- with spaces.
4343-4444-another_folded: First paragraph
4545- continues here and here.
4646-4747- Second paragraph after blank line.
4848- Also continues.
4949-5050-# Trailing and leading spaces are trimmed in plain scalars
5151-spaces_trimmed: value with spaces
5252-5353-# Plain scalars can contain most punctuation
5454-punctuation: Hello, world! How are you? I'm fine.
5555-symbols: $100 & 50% off @ store #1
5656-math: 2+2=4 and 3*3=9
5757-5858-# Empty plain scalar (becomes null)
5959-empty_implicit:
6060-explicit_empty: ""
-81
yaml/ocaml-yamle/tests/yaml/scalars_quoted.yml
···11-# Quoted scalars - single and double quoted strings
22----
33-# Single-quoted strings
44-single_simple: 'hello world'
55-single_with_double: 'He said "hello"'
66-single_escaped_quote: 'It''s a single quote: ''example'''
77-single_multiline: 'This is a
88- multi-line single
99- quoted string'
1010-1111-# Double-quoted strings
1212-double_simple: "hello world"
1313-double_with_single: "It's easy"
1414-double_escaped_quote: "She said \"hello\""
1515-1616-# Escape sequences in double-quoted strings
1717-escaped_newline: "Line one\nLine two\nLine three"
1818-escaped_tab: "Column1\tColumn2\tColumn3"
1919-escaped_backslash: "Path: C:\\Users\\Name"
2020-escaped_carriage: "Before\rAfter"
2121-escaped_bell: "Bell\a"
2222-escaped_backspace: "Back\b"
2323-escaped_formfeed: "Form\f"
2424-escaped_vertical: "Vertical\vtab"
2525-2626-# Unicode escapes
2727-unicode_16bit: "Snowman: \u2603"
2828-unicode_32bit: "Emoji: \U0001F600"
2929-unicode_hex: "Null byte: \x00"
3030-3131-# Empty strings
3232-empty_single: ''
3333-empty_double: ""
3434-3535-# Strings that would be interpreted as other types if unquoted
3636-string_true: "true"
3737-string_false: "false"
3838-string_null: "null"
3939-string_number: "123"
4040-string_float: "45.67"
4141-string_octal: "0o755"
4242-string_hex: "0xFF"
4343-4444-# Special YAML characters that need quoting
4545-starts_with_at: "@username"
4646-starts_with_backtick: "`command`"
4747-starts_with_ampersand: "&reference"
4848-starts_with_asterisk: "*alias"
4949-starts_with_exclamation: "!tag"
5050-starts_with_pipe: "|literal"
5151-starts_with_gt: ">folded"
5252-starts_with_percent: "%directive"
5353-5454-# Flow indicators that need quoting
5555-with_brackets: "[not a list]"
5656-with_braces: "{not: a map}"
5757-with_comma: "a, b, c"
5858-with_colon_space: "key: value"
5959-6060-# Quoted strings preserve leading/trailing whitespace
6161-leading_space: " spaces before"
6262-trailing_space: "spaces after "
6363-both_spaces: " spaces both "
6464-6565-# Multi-line quoted strings
6666-double_multiline: "This is a string
6767- that spans multiple
6868- lines with escaped newlines."
6969-7070-single_fold: 'This single quoted
7171- string will fold
7272- lines into spaces.'
7373-7474-# Complex escape sequences
7575-complex_escapes: "Tab:\t Newline:\n Quote:\" Backslash:\\ Unicode:\u0041"
7676-7777-# Edge cases
7878-only_spaces_single: ' '
7979-only_spaces_double: " "
8080-only_newlines: "\n\n\n"
8181-mixed_quotes: "She said 'it''s a beautiful day'"
-5
yaml/ocaml-yamle/tests/yaml/seq.yml
···11-- hello
22- - whats
33- - up
44-- foo
55-- bar
-82
yaml/ocaml-yamle/tests/yaml/values_bool.yml
···11-# Boolean value test cases for YAML 1.2
22-# Note: YAML 1.2 only recognizes 'true' and 'false' as booleans
33-# Other values like yes/no, on/off are treated as strings in 1.2
44-55-# Standard YAML 1.2 booleans (lowercase)
66-bool_true: true
77-bool_false: false
88-99-# Capitalized forms (should be strings in YAML 1.2)
1010-capitalized_true: True
1111-capitalized_false: False
1212-1313-# YAML 1.1 style booleans (should be strings in YAML 1.2)
1414-yes_value: yes
1515-no_value: no
1616-Yes_value: Yes
1717-No_value: No
1818-YES_value: YES
1919-NO_value: NO
2020-2121-# On/Off style (should be strings in YAML 1.2)
2222-on_value: on
2323-off_value: off
2424-On_value: On
2525-Off_value: Off
2626-ON_value: ON
2727-OFF_value: OFF
2828-2929-# Booleans in sequences
3030-bool_sequence:
3131- - true
3232- - false
3333- - yes
3434- - no
3535- - on
3636- - off
3737-3838-# Booleans in flow style
3939-flow_bools: [true, false, yes, no]
4040-4141-# Booleans in mappings
4242-bool_mapping:
4343- active: true
4444- disabled: false
4545- enabled: yes
4646- stopped: no
4747-4848-# String literals that should NOT be parsed as booleans
4949-quoted_bools:
5050- quoted_true: "true"
5151- quoted_false: "false"
5252- quoted_yes: "yes"
5353- quoted_no: "no"
5454- single_true: 'true'
5555- single_false: 'false'
5656-5757-# Nested boolean values
5858-nested_bools:
5959- settings:
6060- debug: true
6161- verbose: false
6262- legacy_yes: yes
6363- legacy_no: no
6464- flags:
6565- - true
6666- - false
6767- - on
6868- - off
6969-7070-# Mixed case variations
7171-mixed_case:
7272- TRUE: TRUE
7373- FALSE: FALSE
7474- TrUe: TrUe
7575- FaLsE: FaLsE
7676-7777-# Boolean-like strings that should remain strings
7878-bool_like_strings:
7979- truthy: truely
8080- falsy: falsetto
8181- yes_sir: yessir
8282- no_way: noway
-55
yaml/ocaml-yamle/tests/yaml/values_null.yml
···11-# Null value test cases for YAML 1.2
22-33-# Explicit null keyword
44-explicit_null: null
55-66-# Tilde shorthand for null
77-tilde_null: ~
88-99-# Empty value (implicit null)
1010-empty_null:
1111-1212-# Null in flow style
1313-flow_null: [null, ~, ]
1414-1515-# Null in sequences
1616-sequence_nulls:
1717- - null
1818- - ~
1919- -
2020- - explicit: null
2121- - tilde: ~
2222- - empty:
2323-2424-# Null in mappings
2525-mapping_nulls:
2626- key1: null
2727- key2: ~
2828- key3:
2929-3030-# Null as key
3131-null: "null key with string value"
3232-~: "tilde key with string value"
3333-3434-# Mixed null values in nested structures
3535-nested:
3636- level1:
3737- null_value: null
3838- tilde_value: ~
3939- empty_value:
4040- list:
4141- - null
4242- - ~
4343- -
4444- - some_value
4545- map:
4646- a: null
4747- b: ~
4848- c:
4949-5050-# String literals that contain "null" (should NOT be parsed as null)
5151-string_nulls:
5252- quoted_null: "null"
5353- quoted_tilde: "~"
5454- null_in_string: "this is null"
5555- word_null: 'null'
···11-# Timestamp value test cases for YAML 1.1
22-# Note: YAML 1.2 does not have a timestamp type in the core schema
33-# These are recognized in YAML 1.1 and some extended schemas
44-55-# ISO 8601 date format (YYYY-MM-DD)
66-date_simple: 2001-12-15
77-date_earliest: 1970-01-01
88-date_leap_year: 2020-02-29
99-date_current: 2025-12-04
1010-1111-# ISO 8601 datetime with timezone (UTC)
1212-datetime_utc: 2001-12-15T02:59:43.1Z
1313-datetime_utc_full: 2001-12-15T02:59:43.123456Z
1414-datetime_utc_no_frac: 2001-12-15T02:59:43Z
1515-1616-# ISO 8601 datetime with timezone offset
1717-datetime_offset_pos: 2001-12-15T02:59:43.1+05:30
1818-datetime_offset_neg: 2001-12-15T02:59:43.1-05:00
1919-datetime_offset_hours: 2001-12-15T02:59:43+05
2020-2121-# Spaced datetime format (YAML 1.1 style)
2222-datetime_spaced: 2001-12-14 21:59:43.10 -5
2323-datetime_spaced_utc: 2001-12-15 02:59:43.1 Z
2424-datetime_spaced_offset: 2001-12-14 21:59:43.10 -05:00
2525-2626-# Datetime without fractional seconds
2727-datetime_no_frac: 2001-12-15T14:30:00Z
2828-2929-# Date only (no time component)
3030-date_only: 2001-12-15
3131-3232-# Various formats
3333-timestamp_formats:
3434- iso_date: 2001-12-15
3535- iso_datetime_z: 2001-12-15T02:59:43Z
3636- iso_datetime_offset: 2001-12-15T02:59:43+00:00
3737- spaced_datetime: 2001-12-14 21:59:43.10 -5
3838- canonical: 2001-12-15T02:59:43.1Z
3939-4040-# Timestamps in sequences
4141-timestamp_sequence:
4242- - 2001-12-15
4343- - 2001-12-15T02:59:43.1Z
4444- - 2001-12-14 21:59:43.10 -5
4545- - 2025-01-01T00:00:00Z
4646-4747-# Timestamps in mappings
4848-events:
4949- created: 2001-12-15T02:59:43.1Z
5050- modified: 2001-12-16T10:30:00Z
5151- published: 2001-12-14 21:59:43.10 -5
5252-5353-# String literals that look like timestamps (quoted)
5454-quoted_timestamps:
5555- string_date: "2001-12-15"
5656- string_datetime: "2001-12-15T02:59:43.1Z"
5757- string_spaced: "2001-12-14 21:59:43.10 -5"
5858-5959-# Edge cases and variations
6060-edge_cases:
6161- midnight: 2001-12-15T00:00:00Z
6262- end_of_day: 2001-12-15T23:59:59Z
6363- microseconds: 2001-12-15T02:59:43.123456Z
6464- no_seconds: 2001-12-15T02:59Z
6565- hour_only: 2001-12-15T02Z
6666-6767-# Nested timestamp values
6868-nested_timestamps:
6969- project:
7070- start_date: 2001-12-15
7171- milestones:
7272- - date: 2001-12-20
7373- time: 2001-12-20T14:00:00Z
7474- - date: 2002-01-15
7575- time: 2002-01-15T09:30:00-05:00
7676- metadata:
7777- created: 2001-12-14 21:59:43.10 -5
7878- updated: 2001-12-15T02:59:43.1Z
7979-8080-# Invalid timestamp formats (should be treated as strings)
8181-invalid_timestamps:
8282- bad_date: 2001-13-45
8383- bad_time: 2001-12-15T25:99:99Z
8484- incomplete: 2001-12
8585- no_leading_zero: 2001-1-5
8686-8787-# Different timezone representations
8888-timezones:
8989- utc_z: 2001-12-15T02:59:43Z
9090- utc_offset: 2001-12-15T02:59:43+00:00
9191- est: 2001-12-14T21:59:43-05:00
9292- ist: 2001-12-15T08:29:43+05:30
9393- jst: 2001-12-15T11:59:43+09:00
9494-9595-# Historical and future dates
9696-date_range:
9797- past: 1900-01-01
9898- unix_epoch: 1970-01-01T00:00:00Z
9999- y2k: 2000-01-01T00:00:00Z
100100- present: 2025-12-04
101101- future: 2099-12-31T23:59:59Z
-105
yaml/ocaml-yamle/tests/yaml/whitespace.yml
···11-# Whitespace handling test file
22-33-# Section 1: Different indentation levels (2 spaces)
44-two_space_indent:
55- level1:
66- level2:
77- level3: value
88-99-# Section 2: Four space indentation
1010-four_space_indent:
1111- level1:
1212- level2:
1313- level3: value
1414-1515-# Section 3: Mixed content with blank lines
1616-1717-first_key: first_value
1818-1919-2020-second_key: second_value
2121-2222-2323-2424-third_key: third_value
2525-2626-# Section 4: Sequences with varying indentation
2727-sequence_2space:
2828- - item1
2929- - item2
3030- - nested:
3131- - nested_item1
3232- - nested_item2
3333-3434-sequence_4space:
3535- - item1
3636- - item2
3737- - nested:
3838- - nested_item1
3939- - nested_item2
4040-4141-# Section 5: Trailing whitespace (spaces after values - invisible but present)
4242-trailing_spaces: value
4343-another_key: another_value
4444-4545-# Section 6: Leading whitespace preservation in literals
4646-literal_block: |
4747- This is a literal block
4848- with preserved indentation
4949- including extra spaces
5050- and blank lines
5151-5252- like this one above
5353-5454-folded_block: >
5555- This is a folded block
5656- that will be folded into
5757- a single line but preserves
5858-5959- paragraph breaks like above
6060-6161-# Section 7: Whitespace in flow collections
6262-flow_with_spaces: [ item1 , item2 , item3 ]
6363-flow_tight: [item1,item2,item3]
6464-flow_map_spaces: { key1: value1 , key2: value2 }
6565-flow_map_tight: {key1:value1,key2:value2}
6666-6767-# Section 8: Multiple consecutive blank lines between top-level keys
6868-key_before_blanks: value1
6969-7070-7171-7272-7373-key_after_blanks: value2
7474-7575-# Section 9: Indentation in mappings
7676-mapping_indent:
7777- key1: value1
7878- key2: value2
7979- nested:
8080- nested_key1: nested_value1
8181- nested_key2: nested_value2
8282- deep_nested:
8383- deep_key: deep_value
8484-8585-# Section 10: Whitespace around colons and hyphens
8686-no_space_colon:value
8787-space_after_colon: value
8888-spaces_around: value
8989-- sequence_item_no_space
9090- - nested_sequence
9191-9292-# Section 11: Empty lines in sequences
9393-sequence_with_blanks:
9494- - item1
9595-9696- - item2
9797-9898- - item3
9999-100100-# Section 12: Whitespace-only mapping values (implicit null)
101101-explicit_null: null
102102-implicit_null:
103103-space_only:
104104-105105-# End of whitespace test file
-3
yaml/ocaml-yamle/tests/yaml/yaml-1.2.yml
···11-- {"when the key is quoted":"space after colon can be omitted."}
22-- "quoted slashes \/ are allowed."
33-- {?"a key can be looooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooger": "than 1024 when parsing is unambiguous before seeing the colon."}
-32
yaml/ocaml-yamle/yamle.opam
···11-# This file is generated by dune, edit dune-project instead
22-opam-version: "2.0"
33-version: "0.1.0"
44-synopsis: "Pure OCaml YAML 1.2 parser and emitter"
55-description:
66- "A pure OCaml implementation of YAML 1.2 parsing and emission, with no C dependencies."
77-maintainer: ["yamle@example.com"]
88-authors: ["Yamle Authors"]
99-license: "ISC"
1010-homepage: "https://github.com/ocaml/yamle"
1111-bug-reports: "https://github.com/ocaml/yamle/issues"
1212-depends: [
1313- "ocaml" {>= "4.14.0"}
1414- "dune" {>= "3.0" & >= "3.0"}
1515- "alcotest" {with-test}
1616- "odoc" {with-doc}
1717-]
1818-build: [
1919- ["dune" "subst"] {dev}
2020- [
2121- "dune"
2222- "build"
2323- "-p"
2424- name
2525- "-j"
2626- jobs
2727- "@install"
2828- "@runtest" {with-test}
2929- "@doc" {with-doc}
3030- ]
3131-]
3232-dev-repo: "git+https://github.com/ocaml/yamle.git"