···11+(** Character input source with lookahead *)
22+33+type t = {
44+ source : string;
55+ mutable pos : int; (** Current byte position *)
66+ mutable position : Position.t; (** Line/column tracking *)
77+ length : int;
88+}
99+1010+let of_string source =
1111+ let encoding, bom_len = Encoding.detect source in
1212+ (* For now, we only support UTF-8. Skip BOM if present. *)
1313+ ignore encoding;
1414+ {
1515+ source;
1616+ pos = bom_len;
1717+ position = Position.initial;
1818+ length = String.length source;
1919+ }
2020+2121+let position t = t.position
2222+2323+let is_eof t = t.pos >= t.length
2424+2525+let peek t =
2626+ if t.pos >= t.length then None
2727+ else Some t.source.[t.pos]
2828+2929+let peek_exn t =
3030+ if t.pos >= t.length then
3131+ Error.raise_at t.position Unexpected_eof
3232+ else
3333+ t.source.[t.pos]
3434+3535+let peek_nth t n =
3636+ let idx = t.pos + n in
3737+ if idx >= t.length then None
3838+ else Some t.source.[idx]
3939+4040+let peek_string t n =
4141+ if t.pos + n > t.length then
4242+ String.sub t.source t.pos (t.length - t.pos)
4343+ else
4444+ String.sub t.source t.pos n
4545+4646+let next t =
4747+ if t.pos >= t.length then None
4848+ else begin
4949+ let c = t.source.[t.pos] in
5050+ t.pos <- t.pos + 1;
5151+ t.position <- Position.advance_char c t.position;
5252+ Some c
5353+ end
5454+5555+let next_exn t =
5656+ match next t with
5757+ | Some c -> c
5858+ | None -> Error.raise_at t.position Unexpected_eof
5959+6060+let skip t n =
6161+ for _ = 1 to n do
6262+ ignore (next t)
6363+ done
6464+6565+let skip_while t pred =
6666+ while not (is_eof t) && pred (Option.get (peek t)) do
6767+ ignore (next t)
6868+ done
6969+7070+(** Character classification *)
7171+7272+let is_break c = c = '\n' || c = '\r'
7373+7474+let is_blank c = c = ' ' || c = '\t'
7575+7676+let is_whitespace c = is_break c || is_blank c
7777+7878+let is_digit c = c >= '0' && c <= '9'
7979+8080+let is_hex c =
8181+ (c >= '0' && c <= '9') ||
8282+ (c >= 'a' && c <= 'f') ||
8383+ (c >= 'A' && c <= 'F')
8484+8585+let is_alpha c =
8686+ (c >= 'a' && c <= 'z') ||
8787+ (c >= 'A' && c <= 'Z')
8888+8989+let is_alnum c = is_alpha c || is_digit c
9090+9191+(** YAML indicator characters *)
9292+let is_indicator c =
9393+ match c with
9494+ | '-' | '?' | ':' | ',' | '[' | ']' | '{' | '}'
9595+ | '#' | '&' | '*' | '!' | '|' | '>' | '\'' | '"'
9696+ | '%' | '@' | '`' -> true
9797+ | _ -> false
9898+9999+(** Characters that cannot start a plain scalar *)
100100+let is_flow_indicator c =
101101+ match c with
102102+ | ',' | '[' | ']' | '{' | '}' -> true
103103+ | _ -> false
104104+105105+(** Check if next char satisfies predicate *)
106106+let next_is pred t =
107107+ match peek t with
108108+ | None -> false
109109+ | Some c -> pred c
110110+111111+let next_is_break t = next_is is_break t
112112+let next_is_blank t = next_is is_blank t
113113+let next_is_whitespace t = next_is is_whitespace t
114114+let next_is_digit t = next_is is_digit t
115115+let next_is_hex t = next_is is_hex t
116116+let next_is_alpha t = next_is is_alpha t
117117+let next_is_indicator t = next_is is_indicator t
118118+119119+(** Check if at document boundary (--- or ...) *)
120120+let at_document_boundary t =
121121+ if t.position.column <> 1 then false
122122+ else
123123+ let s = peek_string t 4 in
124124+ let prefix = String.sub s 0 (min 3 (String.length s)) in
125125+ (prefix = "---" || prefix = "...") &&
126126+ (String.length s < 4 || is_whitespace s.[3] || String.length s = 3)
127127+128128+(** Consume line break, handling \r\n as single break *)
129129+let consume_break t =
130130+ match peek t with
131131+ | Some '\r' ->
132132+ ignore (next t);
133133+ (match peek t with
134134+ | Some '\n' -> ignore (next t)
135135+ | _ -> ())
136136+ | Some '\n' ->
137137+ ignore (next t)
138138+ | _ -> ()
139139+140140+(** Get remaining content from current position *)
141141+let remaining t =
142142+ if t.pos >= t.length then ""
143143+ else String.sub t.source t.pos (t.length - t.pos)
144144+145145+(** Mark current position for span creation *)
146146+let mark t = t.position
+24
yaml/ocaml-yamle/lib/layout_style.ml
···11+(** Collection layout styles *)
22+33+type t =
44+ | Any (** Let emitter choose *)
55+ | Block (** Indentation-based *)
66+ | Flow (** Inline with brackets *)
77+88+let to_string = function
99+ | Any -> "any"
1010+ | Block -> "block"
1111+ | Flow -> "flow"
1212+1313+let pp fmt t =
1414+ Format.pp_print_string fmt (to_string t)
1515+1616+let equal a b = a = b
1717+1818+let compare a b =
1919+ let to_int = function
2020+ | Any -> 0
2121+ | Block -> 1
2222+ | Flow -> 2
2323+ in
2424+ Int.compare (to_int a) (to_int b)
+243
yaml/ocaml-yamle/lib/loader.ml
···11+(** Loader - converts parser events to YAML data structures *)
22+33+(** Stack frame for building nested structures *)
44+type frame =
55+ | Sequence_frame of {
66+ anchor : string option;
77+ tag : string option;
88+ implicit : bool;
99+ style : Layout_style.t;
1010+ items : Yaml.t list;
1111+ }
1212+ | Mapping_frame of {
1313+ anchor : string option;
1414+ tag : string option;
1515+ implicit : bool;
1616+ style : Layout_style.t;
1717+ pairs : (Yaml.t * Yaml.t) list;
1818+ pending_key : Yaml.t option;
1919+ }
2020+2121+type state = {
2222+ mutable stack : frame list;
2323+ mutable current : Yaml.t option;
2424+ mutable documents : Document.t list;
2525+ mutable doc_version : (int * int) option;
2626+ mutable doc_implicit_start : bool;
2727+}
2828+2929+let create_state () = {
3030+ stack = [];
3131+ current = None;
3232+ documents = [];
3333+ doc_version = None;
3434+ doc_implicit_start = true;
3535+}
3636+3737+(** Process a single event *)
3838+let rec process_event state (ev : Event.spanned) =
3939+ match ev.event with
4040+ | Event.Stream_start _ -> ()
4141+4242+ | Event.Stream_end -> ()
4343+4444+ | Event.Document_start { version; implicit } ->
4545+ state.doc_version <- version;
4646+ state.doc_implicit_start <- implicit
4747+4848+ | Event.Document_end { implicit } ->
4949+ let doc = Document.make
5050+ ?version:state.doc_version
5151+ ~implicit_start:state.doc_implicit_start
5252+ ~implicit_end:implicit
5353+ state.current
5454+ in
5555+ state.documents <- doc :: state.documents;
5656+ state.current <- None;
5757+ state.doc_version <- None;
5858+ state.doc_implicit_start <- true
5959+6060+ | Event.Alias { anchor } ->
6161+ let node : Yaml.t = `Alias anchor in
6262+ add_node state node
6363+6464+ | Event.Scalar { anchor; tag; value; plain_implicit; quoted_implicit; style } ->
6565+ let scalar = Scalar.make
6666+ ?anchor ?tag
6767+ ~plain_implicit ~quoted_implicit
6868+ ~style value
6969+ in
7070+ let node : Yaml.t = `Scalar scalar in
7171+ add_node state node
7272+7373+ | Event.Sequence_start { anchor; tag; implicit; style } ->
7474+ let frame = Sequence_frame {
7575+ anchor; tag; implicit; style;
7676+ items = [];
7777+ } in
7878+ state.stack <- frame :: state.stack
7979+8080+ | Event.Sequence_end ->
8181+ (match state.stack with
8282+ | Sequence_frame { anchor; tag; implicit; style; items } :: rest ->
8383+ let seq = Sequence.make ?anchor ?tag ~implicit ~style (List.rev items) in
8484+ let node : Yaml.t = `A seq in
8585+ state.stack <- rest;
8686+ add_node state node
8787+ | _ -> Error.raise (Invalid_state "unexpected sequence end"))
8888+8989+ | Event.Mapping_start { anchor; tag; implicit; style } ->
9090+ let frame = Mapping_frame {
9191+ anchor; tag; implicit; style;
9292+ pairs = [];
9393+ pending_key = None;
9494+ } in
9595+ state.stack <- frame :: state.stack
9696+9797+ | Event.Mapping_end ->
9898+ (match state.stack with
9999+ | Mapping_frame { anchor; tag; implicit; style; pairs; pending_key = None } :: rest ->
100100+ let map = Mapping.make ?anchor ?tag ~implicit ~style (List.rev pairs) in
101101+ let node : Yaml.t = `O map in
102102+ state.stack <- rest;
103103+ add_node state node
104104+ | Mapping_frame { pending_key = Some _; _ } :: _ ->
105105+ Error.raise (Invalid_state "mapping ended with pending key")
106106+ | _ -> Error.raise (Invalid_state "unexpected mapping end"))
107107+108108+(** Add a node to current context *)
109109+and add_node state node =
110110+ match state.stack with
111111+ | [] ->
112112+ state.current <- Some node
113113+114114+ | Sequence_frame f :: rest ->
115115+ state.stack <- Sequence_frame { f with items = node :: f.items } :: rest
116116+117117+ | Mapping_frame f :: rest ->
118118+ (match f.pending_key with
119119+ | None ->
120120+ (* This is a key *)
121121+ state.stack <- Mapping_frame { f with pending_key = Some node } :: rest
122122+ | Some key ->
123123+ (* This is a value *)
124124+ state.stack <- Mapping_frame {
125125+ f with
126126+ pairs = (key, node) :: f.pairs;
127127+ pending_key = None;
128128+ } :: rest)
129129+130130+(** Load single document as Value *)
131131+let value_of_string s =
132132+ let parser = Parser.of_string s in
133133+ let state = create_state () in
134134+ Parser.iter (process_event state) parser;
135135+ match state.documents with
136136+ | [] -> `Null
137137+ | [doc] ->
138138+ (match Document.root doc with
139139+ | None -> `Null
140140+ | Some yaml -> Yaml.to_value yaml)
141141+ | _ -> Error.raise Multiple_documents
142142+143143+(** Load single document as Yaml *)
144144+let yaml_of_string s =
145145+ let parser = Parser.of_string s in
146146+ let state = create_state () in
147147+ Parser.iter (process_event state) parser;
148148+ match state.documents with
149149+ | [] -> `Scalar (Scalar.make "")
150150+ | [doc] ->
151151+ (match Document.root doc with
152152+ | None -> `Scalar (Scalar.make "")
153153+ | Some yaml -> yaml)
154154+ | _ -> Error.raise Multiple_documents
155155+156156+(** Load all documents *)
157157+let documents_of_string s =
158158+ let parser = Parser.of_string s in
159159+ let state = create_state () in
160160+ Parser.iter (process_event state) parser;
161161+ List.rev state.documents
162162+163163+(** Load single Value from parser *)
164164+let load_value parser =
165165+ let state = create_state () in
166166+ let rec loop () =
167167+ match Parser.next parser with
168168+ | None -> None
169169+ | Some ev ->
170170+ process_event state ev;
171171+ match ev.event with
172172+ | Event.Document_end _ ->
173173+ (match state.documents with
174174+ | doc :: _ ->
175175+ state.documents <- [];
176176+ Some (match Document.root doc with
177177+ | None -> `Null
178178+ | Some yaml -> Yaml.to_value yaml)
179179+ | [] -> None)
180180+ | Event.Stream_end -> None
181181+ | _ -> loop ()
182182+ in
183183+ loop ()
184184+185185+(** Load single Yaml from parser *)
186186+let load_yaml parser =
187187+ let state = create_state () in
188188+ let rec loop () =
189189+ match Parser.next parser with
190190+ | None -> None
191191+ | Some ev ->
192192+ process_event state ev;
193193+ match ev.event with
194194+ | Event.Document_end _ ->
195195+ (match state.documents with
196196+ | doc :: _ ->
197197+ state.documents <- [];
198198+ Some (match Document.root doc with
199199+ | None -> `Scalar (Scalar.make "")
200200+ | Some yaml -> yaml)
201201+ | [] -> None)
202202+ | Event.Stream_end -> None
203203+ | _ -> loop ()
204204+ in
205205+ loop ()
206206+207207+(** Load single Document from parser *)
208208+let load_document parser =
209209+ let state = create_state () in
210210+ let rec loop () =
211211+ match Parser.next parser with
212212+ | None -> None
213213+ | Some ev ->
214214+ process_event state ev;
215215+ match ev.event with
216216+ | Event.Document_end _ ->
217217+ (match state.documents with
218218+ | doc :: _ ->
219219+ state.documents <- [];
220220+ Some doc
221221+ | [] -> None)
222222+ | Event.Stream_end -> None
223223+ | _ -> loop ()
224224+ in
225225+ loop ()
226226+227227+(** Iterate over documents *)
228228+let iter_documents f parser =
229229+ let rec loop () =
230230+ match load_document parser with
231231+ | None -> ()
232232+ | Some doc -> f doc; loop ()
233233+ in
234234+ loop ()
235235+236236+(** Fold over documents *)
237237+let fold_documents f init parser =
238238+ let rec loop acc =
239239+ match load_document parser with
240240+ | None -> acc
241241+ | Some doc -> loop (f acc doc)
242242+ in
243243+ loop init
+92
yaml/ocaml-yamle/lib/mapping.ml
···11+(** YAML mapping (object) values with metadata *)
22+33+type ('k, 'v) t = {
44+ anchor : string option;
55+ tag : string option;
66+ implicit : bool;
77+ style : Layout_style.t;
88+ members : ('k * 'v) list;
99+}
1010+1111+let make
1212+ ?(anchor : string option)
1313+ ?(tag : string option)
1414+ ?(implicit = true)
1515+ ?(style = Layout_style.Any)
1616+ members =
1717+ { anchor; tag; implicit; style; members }
1818+1919+let members t = t.members
2020+let anchor t = t.anchor
2121+let tag t = t.tag
2222+let implicit t = t.implicit
2323+let style t = t.style
2424+2525+let with_anchor anchor t = { t with anchor = Some anchor }
2626+let with_tag tag t = { t with tag = Some tag }
2727+let with_style style t = { t with style }
2828+2929+let map_keys f t = { t with members = List.map (fun (k, v) -> (f k, v)) t.members }
3030+let map_values f t = { t with members = List.map (fun (k, v) -> (k, f v)) t.members }
3131+let map f t = { t with members = List.map (fun (k, v) -> f k v) t.members }
3232+3333+let length t = List.length t.members
3434+3535+let is_empty t = t.members = []
3636+3737+let find pred t =
3838+ match List.find_opt (fun (k, _) -> pred k) t.members with
3939+ | Some (_, v) -> Some v
4040+ | None -> None
4141+4242+let find_key pred t =
4343+ List.find_opt (fun (k, _) -> pred k) t.members
4444+4545+let mem pred t =
4646+ List.exists (fun (k, _) -> pred k) t.members
4747+4848+let keys t = List.map fst t.members
4949+5050+let values t = List.map snd t.members
5151+5252+let iter f t = List.iter (fun (k, v) -> f k v) t.members
5353+5454+let fold f init t = List.fold_left (fun acc (k, v) -> f acc k v) init t.members
5555+5656+let pp pp_key pp_val fmt t =
5757+ Format.fprintf fmt "@[<hv 2>mapping(@,";
5858+ (match t.anchor with
5959+ | Some a -> Format.fprintf fmt "anchor=%s,@ " a
6060+ | None -> ());
6161+ (match t.tag with
6262+ | Some tag -> Format.fprintf fmt "tag=%s,@ " tag
6363+ | None -> ());
6464+ Format.fprintf fmt "style=%a,@ " Layout_style.pp t.style;
6565+ Format.fprintf fmt "members={@,";
6666+ List.iteri (fun i (k, v) ->
6767+ if i > 0 then Format.fprintf fmt ",@ ";
6868+ Format.fprintf fmt "@[<hv 2>%a:@ %a@]" pp_key k pp_val v
6969+ ) t.members;
7070+ Format.fprintf fmt "@]@,})"
7171+7272+let equal eq_k eq_v a b =
7373+ Option.equal String.equal a.anchor b.anchor &&
7474+ Option.equal String.equal a.tag b.tag &&
7575+ a.implicit = b.implicit &&
7676+ Layout_style.equal a.style b.style &&
7777+ List.equal (fun (k1, v1) (k2, v2) -> eq_k k1 k2 && eq_v v1 v2) a.members b.members
7878+7979+let compare cmp_k cmp_v a b =
8080+ let c = Option.compare String.compare a.anchor b.anchor in
8181+ if c <> 0 then c else
8282+ let c = Option.compare String.compare a.tag b.tag in
8383+ if c <> 0 then c else
8484+ let c = Bool.compare a.implicit b.implicit in
8585+ if c <> 0 then c else
8686+ let c = Layout_style.compare a.style b.style in
8787+ if c <> 0 then c else
8888+ let cmp_pair (k1, v1) (k2, v2) =
8989+ let c = cmp_k k1 k2 in
9090+ if c <> 0 then c else cmp_v v1 v2
9191+ in
9292+ List.compare cmp_pair a.members b.members
+711
yaml/ocaml-yamle/lib/parser.ml
···11+(** YAML parser - converts tokens to semantic events via state machine *)
22+33+(** Parser states *)
44+type state =
55+ | Stream_start
66+ | Implicit_document_start
77+ | Document_start
88+ | Document_content
99+ | Document_end
1010+ | Block_node
1111+ | Block_node_or_indentless_sequence
1212+ | Flow_node
1313+ | Block_sequence_first_entry
1414+ | Block_sequence_entry
1515+ | Indentless_sequence_entry
1616+ | Block_mapping_first_key
1717+ | Block_mapping_key
1818+ | Block_mapping_value
1919+ | Flow_sequence_first_entry
2020+ | Flow_sequence_entry
2121+ | Flow_sequence_entry_mapping_key
2222+ | Flow_sequence_entry_mapping_value
2323+ | Flow_sequence_entry_mapping_end
2424+ | Flow_mapping_first_key
2525+ | Flow_mapping_key
2626+ | Flow_mapping_value
2727+ | Flow_mapping_empty_value
2828+ | End
2929+3030+type t = {
3131+ scanner : Scanner.t;
3232+ mutable state : state;
3333+ mutable states : state list; (** State stack *)
3434+ mutable marks : Span.t list; (** Mark stack for span tracking *)
3535+ mutable version : (int * int) option;
3636+ mutable tag_directives : (string * string) list;
3737+ mutable current_token : Token.spanned option;
3838+ mutable finished : bool;
3939+}
4040+4141+let create scanner = {
4242+ scanner;
4343+ state = Stream_start;
4444+ states = [];
4545+ marks = [];
4646+ version = None;
4747+ tag_directives = [
4848+ ("!", "!");
4949+ ("!!", "tag:yaml.org,2002:");
5050+ ];
5151+ current_token = None;
5252+ finished = false;
5353+}
5454+5555+let of_string s = create (Scanner.of_string s)
5656+5757+(** Get current token, fetching if needed *)
5858+let current_token t =
5959+ match t.current_token with
6060+ | Some tok -> tok
6161+ | None ->
6262+ let tok = Scanner.next t.scanner in
6363+ t.current_token <- tok;
6464+ match tok with
6565+ | Some tok -> tok
6666+ | None -> Error.raise Unexpected_eof
6767+6868+(** Peek at current token *)
6969+let peek_token t =
7070+ match t.current_token with
7171+ | Some _ -> t.current_token
7272+ | None ->
7373+ t.current_token <- Scanner.next t.scanner;
7474+ t.current_token
7575+7676+(** Skip current token *)
7777+let skip_token t =
7878+ t.current_token <- None
7979+8080+(** Check if current token matches *)
8181+let check t pred =
8282+ match peek_token t with
8383+ | Some tok -> pred tok.token
8484+ | None -> false
8585+8686+(** Check for specific token *)
8787+let check_token t token_match =
8888+ check t token_match
8989+9090+(** Push state onto stack *)
9191+let push_state t s =
9292+ t.states <- s :: t.states
9393+9494+(** Pop state from stack *)
9595+let pop_state t =
9696+ match t.states with
9797+ | s :: rest ->
9898+ t.states <- rest;
9999+ s
100100+ | [] -> End
101101+102102+(** Resolve a tag *)
103103+let resolve_tag t ~handle ~suffix =
104104+ match List.assoc_opt handle t.tag_directives with
105105+ | Some prefix -> prefix ^ suffix
106106+ | None when handle = "!" -> "!" ^ suffix
107107+ | None -> Error.raise (Invalid_tag (handle ^ suffix))
108108+109109+(** Process directives at document start *)
110110+let process_directives t =
111111+ t.version <- None;
112112+ t.tag_directives <- [("!", "!"); ("!!", "tag:yaml.org,2002:")];
113113+114114+ while check t (function
115115+ | Token.Version_directive _ | Token.Tag_directive _ -> true
116116+ | _ -> false)
117117+ do
118118+ let tok = current_token t in
119119+ skip_token t;
120120+ match tok.token with
121121+ | Token.Version_directive { major; minor } ->
122122+ if t.version <> None then
123123+ Error.raise_span tok.span (Invalid_yaml_version "duplicate YAML directive");
124124+ t.version <- Some (major, minor)
125125+ | Token.Tag_directive { handle; prefix } ->
126126+ if List.mem_assoc handle t.tag_directives &&
127127+ handle <> "!" && handle <> "!!" then
128128+ Error.raise_span tok.span (Invalid_tag_directive ("duplicate tag handle: " ^ handle));
129129+ t.tag_directives <- (handle, prefix) :: t.tag_directives
130130+ | _ -> ()
131131+ done
132132+133133+(** Parse anchor and/or tag properties *)
134134+let parse_properties t =
135135+ let anchor = ref None in
136136+ let tag = ref None in
137137+138138+ while check t (function
139139+ | Token.Anchor _ | Token.Tag _ -> true
140140+ | _ -> false)
141141+ do
142142+ let tok = current_token t in
143143+ skip_token t;
144144+ match tok.token with
145145+ | Token.Anchor name ->
146146+ if !anchor <> None then
147147+ Error.raise_span tok.span (Duplicate_anchor name);
148148+ anchor := Some name
149149+ | Token.Tag { handle; suffix } ->
150150+ if !tag <> None then
151151+ Error.raise_span tok.span (Invalid_tag "duplicate tag");
152152+ let resolved =
153153+ if handle = "" && suffix = "" then None
154154+ else if handle = "!" && suffix = "" then Some "!"
155155+ else Some (resolve_tag t ~handle ~suffix)
156156+ in
157157+ tag := resolved
158158+ | _ -> ()
159159+ done;
160160+ (!anchor, !tag)
161161+162162+(** Empty scalar event *)
163163+let empty_scalar_event ~anchor ~tag span =
164164+ Event.Scalar {
165165+ anchor;
166166+ tag;
167167+ value = "";
168168+ plain_implicit = tag = None;
169169+ quoted_implicit = false;
170170+ style = Scalar_style.Plain;
171171+ }, span
172172+173173+(** Parse stream start *)
174174+let parse_stream_start t =
175175+ let tok = current_token t in
176176+ skip_token t;
177177+ match tok.token with
178178+ | Token.Stream_start encoding ->
179179+ t.state <- Implicit_document_start;
180180+ Event.Stream_start { encoding }, tok.span
181181+ | _ ->
182182+ Error.raise_span tok.span (Unexpected_token "expected stream start")
183183+184184+(** Parse document start (implicit or explicit) *)
185185+let parse_document_start t ~implicit =
186186+ process_directives t;
187187+188188+ if not implicit then begin
189189+ let tok = current_token t in
190190+ match tok.token with
191191+ | Token.Document_start ->
192192+ skip_token t
193193+ | _ ->
194194+ Error.raise_span tok.span Expected_document_start
195195+ end;
196196+197197+ let span = match peek_token t with
198198+ | Some tok -> tok.span
199199+ | None -> Span.point Position.initial
200200+ in
201201+202202+ push_state t Document_end;
203203+ t.state <- Document_content;
204204+ Event.Document_start { version = t.version; implicit }, span
205205+206206+(** Parse document end *)
207207+let parse_document_end t =
208208+ let implicit = not (check t (function Token.Document_end -> true | _ -> false)) in
209209+ let span = match peek_token t with
210210+ | Some tok -> tok.span
211211+ | None -> Span.point Position.initial
212212+ in
213213+214214+ if not implicit then skip_token t;
215215+216216+ t.state <- Implicit_document_start;
217217+ Event.Document_end { implicit }, span
218218+219219+(** Parse node in various contexts *)
220220+let parse_node t ~block ~indentless =
221221+ let tok = current_token t in
222222+ match tok.token with
223223+ | Token.Alias name ->
224224+ skip_token t;
225225+ t.state <- pop_state t;
226226+ Event.Alias { anchor = name }, tok.span
227227+228228+ | Token.Anchor _ | Token.Tag _ ->
229229+ let anchor, tag = parse_properties t in
230230+ let tok = current_token t in
231231+ (match tok.token with
232232+ | Token.Block_entry when indentless ->
233233+ t.state <- Indentless_sequence_entry;
234234+ Event.Sequence_start {
235235+ anchor; tag;
236236+ implicit = tag = None;
237237+ style = Layout_style.Block;
238238+ }, tok.span
239239+240240+ | Token.Block_sequence_start when block ->
241241+ t.state <- Block_sequence_first_entry;
242242+ skip_token t;
243243+ Event.Sequence_start {
244244+ anchor; tag;
245245+ implicit = tag = None;
246246+ style = Layout_style.Block;
247247+ }, tok.span
248248+249249+ | Token.Block_mapping_start when block ->
250250+ t.state <- Block_mapping_first_key;
251251+ skip_token t;
252252+ Event.Mapping_start {
253253+ anchor; tag;
254254+ implicit = tag = None;
255255+ style = Layout_style.Block;
256256+ }, tok.span
257257+258258+ | Token.Flow_sequence_start ->
259259+ t.state <- Flow_sequence_first_entry;
260260+ skip_token t;
261261+ Event.Sequence_start {
262262+ anchor; tag;
263263+ implicit = tag = None;
264264+ style = Layout_style.Flow;
265265+ }, tok.span
266266+267267+ | Token.Flow_mapping_start ->
268268+ t.state <- Flow_mapping_first_key;
269269+ skip_token t;
270270+ Event.Mapping_start {
271271+ anchor; tag;
272272+ implicit = tag = None;
273273+ style = Layout_style.Flow;
274274+ }, tok.span
275275+276276+ | Token.Scalar { style; value } ->
277277+ skip_token t;
278278+ t.state <- pop_state t;
279279+ let plain_implicit = tag = None && style = Scalar_style.Plain in
280280+ let quoted_implicit = tag = None && style <> Scalar_style.Plain in
281281+ Event.Scalar {
282282+ anchor; tag; value;
283283+ plain_implicit; quoted_implicit; style;
284284+ }, tok.span
285285+286286+ | _ ->
287287+ (* Empty node *)
288288+ t.state <- pop_state t;
289289+ empty_scalar_event ~anchor ~tag tok.span)
290290+291291+ | Token.Block_sequence_start when block ->
292292+ t.state <- Block_sequence_first_entry;
293293+ skip_token t;
294294+ Event.Sequence_start {
295295+ anchor = None; tag = None;
296296+ implicit = true;
297297+ style = Layout_style.Block;
298298+ }, tok.span
299299+300300+ | Token.Block_mapping_start when block ->
301301+ t.state <- Block_mapping_first_key;
302302+ skip_token t;
303303+ Event.Mapping_start {
304304+ anchor = None; tag = None;
305305+ implicit = true;
306306+ style = Layout_style.Block;
307307+ }, tok.span
308308+309309+ | Token.Flow_sequence_start ->
310310+ t.state <- Flow_sequence_first_entry;
311311+ skip_token t;
312312+ Event.Sequence_start {
313313+ anchor = None; tag = None;
314314+ implicit = true;
315315+ style = Layout_style.Flow;
316316+ }, tok.span
317317+318318+ | Token.Flow_mapping_start ->
319319+ t.state <- Flow_mapping_first_key;
320320+ skip_token t;
321321+ Event.Mapping_start {
322322+ anchor = None; tag = None;
323323+ implicit = true;
324324+ style = Layout_style.Flow;
325325+ }, tok.span
326326+327327+ | Token.Block_entry when indentless ->
328328+ t.state <- Indentless_sequence_entry;
329329+ Event.Sequence_start {
330330+ anchor = None; tag = None;
331331+ implicit = true;
332332+ style = Layout_style.Block;
333333+ }, tok.span
334334+335335+ | Token.Scalar { style; value } ->
336336+ skip_token t;
337337+ t.state <- pop_state t;
338338+ let plain_implicit = style = Scalar_style.Plain in
339339+ let quoted_implicit = style <> Scalar_style.Plain in
340340+ Event.Scalar {
341341+ anchor = None; tag = None; value;
342342+ plain_implicit; quoted_implicit; style;
343343+ }, tok.span
344344+345345+ | _ ->
346346+ (* Empty node *)
347347+ t.state <- pop_state t;
348348+ empty_scalar_event ~anchor:None ~tag:None tok.span
349349+350350+(** Parse block sequence entry *)
351351+let parse_block_sequence_entry t =
352352+ let tok = current_token t in
353353+ match tok.token with
354354+ | Token.Block_entry ->
355355+ skip_token t;
356356+ if check t (function
357357+ | Token.Block_entry | Token.Block_end -> true
358358+ | _ -> false)
359359+ then begin
360360+ t.state <- Block_sequence_entry;
361361+ empty_scalar_event ~anchor:None ~tag:None tok.span
362362+ end else begin
363363+ push_state t Block_sequence_entry;
364364+ parse_node t ~block:true ~indentless:false
365365+ end
366366+ | Token.Block_end ->
367367+ skip_token t;
368368+ t.state <- pop_state t;
369369+ Event.Sequence_end, tok.span
370370+ | _ ->
371371+ Error.raise_span tok.span Expected_block_entry
372372+373373+(** Parse block mapping key *)
374374+let parse_block_mapping_key t =
375375+ let tok = current_token t in
376376+ match tok.token with
377377+ | Token.Key ->
378378+ skip_token t;
379379+ if check t (function
380380+ | Token.Key | Token.Value | Token.Block_end -> true
381381+ | _ -> false)
382382+ then begin
383383+ t.state <- Block_mapping_value;
384384+ empty_scalar_event ~anchor:None ~tag:None tok.span
385385+ end else begin
386386+ push_state t Block_mapping_value;
387387+ parse_node t ~block:true ~indentless:true
388388+ end
389389+ | Token.Block_end ->
390390+ skip_token t;
391391+ t.state <- pop_state t;
392392+ Event.Mapping_end, tok.span
393393+ | _ ->
394394+ Error.raise_span tok.span Expected_key
395395+396396+(** Parse block mapping value *)
397397+let parse_block_mapping_value t =
398398+ let tok = current_token t in
399399+ match tok.token with
400400+ | Token.Value ->
401401+ skip_token t;
402402+ if check t (function
403403+ | Token.Key | Token.Value | Token.Block_end -> true
404404+ | _ -> false)
405405+ then begin
406406+ t.state <- Block_mapping_key;
407407+ empty_scalar_event ~anchor:None ~tag:None tok.span
408408+ end else begin
409409+ push_state t Block_mapping_key;
410410+ parse_node t ~block:true ~indentless:true
411411+ end
412412+ | _ ->
413413+ (* Implicit empty value *)
414414+ t.state <- Block_mapping_key;
415415+ empty_scalar_event ~anchor:None ~tag:None tok.span
416416+417417+(** Parse indentless sequence entry *)
418418+let parse_indentless_sequence_entry t =
419419+ let tok = current_token t in
420420+ match tok.token with
421421+ | Token.Block_entry ->
422422+ skip_token t;
423423+ if check t (function
424424+ | Token.Block_entry | Token.Key | Token.Value | Token.Block_end -> true
425425+ | _ -> false)
426426+ then begin
427427+ t.state <- Indentless_sequence_entry;
428428+ empty_scalar_event ~anchor:None ~tag:None tok.span
429429+ end else begin
430430+ push_state t Indentless_sequence_entry;
431431+ parse_node t ~block:true ~indentless:false
432432+ end
433433+ | _ ->
434434+ t.state <- pop_state t;
435435+ Event.Sequence_end, tok.span
436436+437437+(** Parse flow sequence *)
438438+let rec parse_flow_sequence_entry t ~first =
439439+ let tok = current_token t in
440440+ match tok.token with
441441+ | Token.Flow_sequence_end ->
442442+ skip_token t;
443443+ t.state <- pop_state t;
444444+ Event.Sequence_end, tok.span
445445+ | Token.Flow_entry when not first ->
446446+ skip_token t;
447447+ parse_flow_sequence_entry_internal t
448448+ | _ when first ->
449449+ parse_flow_sequence_entry_internal t
450450+ | _ ->
451451+ Error.raise_span tok.span Expected_sequence_end
452452+453453+and parse_flow_sequence_entry_internal t =
454454+ let tok = current_token t in
455455+ match tok.token with
456456+ | Token.Flow_sequence_end ->
457457+ t.state <- Flow_sequence_entry;
458458+ empty_scalar_event ~anchor:None ~tag:None tok.span
459459+ | Token.Key ->
460460+ skip_token t;
461461+ push_state t Flow_sequence_entry_mapping_end;
462462+ t.state <- Flow_sequence_entry_mapping_key;
463463+ Event.Mapping_start {
464464+ anchor = None; tag = None;
465465+ implicit = true;
466466+ style = Layout_style.Flow;
467467+ }, tok.span
468468+ | _ ->
469469+ push_state t Flow_sequence_entry;
470470+ parse_node t ~block:false ~indentless:false
471471+472472+(** Parse flow sequence entry mapping *)
473473+let parse_flow_sequence_entry_mapping_key t =
474474+ let tok = current_token t in
475475+ if check t (function
476476+ | Token.Value | Token.Flow_entry | Token.Flow_sequence_end -> true
477477+ | _ -> false)
478478+ then begin
479479+ t.state <- Flow_sequence_entry_mapping_value;
480480+ empty_scalar_event ~anchor:None ~tag:None tok.span
481481+ end else begin
482482+ push_state t Flow_sequence_entry_mapping_value;
483483+ parse_node t ~block:false ~indentless:false
484484+ end
485485+486486+let parse_flow_sequence_entry_mapping_value t =
487487+ let tok = current_token t in
488488+ match tok.token with
489489+ | Token.Value ->
490490+ skip_token t;
491491+ if check t (function
492492+ | Token.Flow_entry | Token.Flow_sequence_end -> true
493493+ | _ -> false)
494494+ then begin
495495+ t.state <- Flow_sequence_entry_mapping_end;
496496+ empty_scalar_event ~anchor:None ~tag:None tok.span
497497+ end else begin
498498+ push_state t Flow_sequence_entry_mapping_end;
499499+ parse_node t ~block:false ~indentless:false
500500+ end
501501+ | _ ->
502502+ t.state <- Flow_sequence_entry_mapping_end;
503503+ empty_scalar_event ~anchor:None ~tag:None tok.span
504504+505505+let parse_flow_sequence_entry_mapping_end t =
506506+ let tok = current_token t in
507507+ t.state <- Flow_sequence_entry;
508508+ Event.Mapping_end, tok.span
509509+510510+(** Parse flow mapping *)
511511+let rec parse_flow_mapping_key t ~first =
512512+ let tok = current_token t in
513513+ match tok.token with
514514+ | Token.Flow_mapping_end ->
515515+ skip_token t;
516516+ t.state <- pop_state t;
517517+ Event.Mapping_end, tok.span
518518+ | Token.Flow_entry when not first ->
519519+ skip_token t;
520520+ parse_flow_mapping_key_internal t
521521+ | _ when first ->
522522+ parse_flow_mapping_key_internal t
523523+ | _ ->
524524+ Error.raise_span tok.span Expected_mapping_end
525525+526526+and parse_flow_mapping_key_internal t =
527527+ let tok = current_token t in
528528+ match tok.token with
529529+ | Token.Flow_mapping_end ->
530530+ t.state <- Flow_mapping_key;
531531+ empty_scalar_event ~anchor:None ~tag:None tok.span
532532+ | Token.Key ->
533533+ skip_token t;
534534+ if check t (function
535535+ | Token.Value | Token.Flow_entry | Token.Flow_mapping_end -> true
536536+ | _ -> false)
537537+ then begin
538538+ t.state <- Flow_mapping_value;
539539+ empty_scalar_event ~anchor:None ~tag:None tok.span
540540+ end else begin
541541+ push_state t Flow_mapping_value;
542542+ parse_node t ~block:false ~indentless:false
543543+ end
544544+ | _ ->
545545+ push_state t Flow_mapping_value;
546546+ parse_node t ~block:false ~indentless:false
547547+548548+let parse_flow_mapping_value t ~empty =
549549+ let tok = current_token t in
550550+ if empty then begin
551551+ t.state <- Flow_mapping_key;
552552+ empty_scalar_event ~anchor:None ~tag:None tok.span
553553+ end else
554554+ match tok.token with
555555+ | Token.Value ->
556556+ skip_token t;
557557+ if check t (function
558558+ | Token.Flow_entry | Token.Flow_mapping_end -> true
559559+ | _ -> false)
560560+ then begin
561561+ t.state <- Flow_mapping_key;
562562+ empty_scalar_event ~anchor:None ~tag:None tok.span
563563+ end else begin
564564+ push_state t Flow_mapping_key;
565565+ parse_node t ~block:false ~indentless:false
566566+ end
567567+ | _ ->
568568+ t.state <- Flow_mapping_key;
569569+ empty_scalar_event ~anchor:None ~tag:None tok.span
570570+571571+(** Main state machine dispatcher *)
572572+let parse t =
573573+ match t.state with
574574+ | Stream_start ->
575575+ parse_stream_start t
576576+577577+ | Implicit_document_start ->
578578+ if check t (function
579579+ | Token.Version_directive _ | Token.Tag_directive _
580580+ | Token.Document_start | Token.Stream_end -> true
581581+ | _ -> false)
582582+ then begin
583583+ if check t (function Token.Stream_end -> true | _ -> false) then begin
584584+ let tok = current_token t in
585585+ skip_token t;
586586+ t.state <- End;
587587+ t.finished <- true;
588588+ Event.Stream_end, tok.span
589589+ end else begin
590590+ parse_document_start t ~implicit:false
591591+ end
592592+ end else
593593+ parse_document_start t ~implicit:true
594594+595595+ | Document_start ->
596596+ parse_document_start t ~implicit:false
597597+598598+ | Document_content ->
599599+ if check t (function
600600+ | Token.Version_directive _ | Token.Tag_directive _
601601+ | Token.Document_start | Token.Document_end | Token.Stream_end -> true
602602+ | _ -> false)
603603+ then begin
604604+ let tok = current_token t in
605605+ t.state <- pop_state t;
606606+ empty_scalar_event ~anchor:None ~tag:None tok.span
607607+ end else
608608+ parse_node t ~block:true ~indentless:false
609609+610610+ | Document_end ->
611611+ parse_document_end t
612612+613613+ | Block_node ->
614614+ parse_node t ~block:true ~indentless:false
615615+616616+ | Block_node_or_indentless_sequence ->
617617+ parse_node t ~block:true ~indentless:true
618618+619619+ | Flow_node ->
620620+ parse_node t ~block:false ~indentless:false
621621+622622+ | Block_sequence_first_entry ->
623623+ t.state <- Block_sequence_entry;
624624+ parse_block_sequence_entry t
625625+626626+ | Block_sequence_entry ->
627627+ parse_block_sequence_entry t
628628+629629+ | Indentless_sequence_entry ->
630630+ parse_indentless_sequence_entry t
631631+632632+ | Block_mapping_first_key ->
633633+ t.state <- Block_mapping_key;
634634+ parse_block_mapping_key t
635635+636636+ | Block_mapping_key ->
637637+ parse_block_mapping_key t
638638+639639+ | Block_mapping_value ->
640640+ parse_block_mapping_value t
641641+642642+ | Flow_sequence_first_entry ->
643643+ parse_flow_sequence_entry t ~first:true
644644+645645+ | Flow_sequence_entry ->
646646+ parse_flow_sequence_entry t ~first:false
647647+648648+ | Flow_sequence_entry_mapping_key ->
649649+ parse_flow_sequence_entry_mapping_key t
650650+651651+ | Flow_sequence_entry_mapping_value ->
652652+ parse_flow_sequence_entry_mapping_value t
653653+654654+ | Flow_sequence_entry_mapping_end ->
655655+ parse_flow_sequence_entry_mapping_end t
656656+657657+ | Flow_mapping_first_key ->
658658+ parse_flow_mapping_key t ~first:true
659659+660660+ | Flow_mapping_key ->
661661+ parse_flow_mapping_key t ~first:false
662662+663663+ | Flow_mapping_value ->
664664+ parse_flow_mapping_value t ~empty:false
665665+666666+ | Flow_mapping_empty_value ->
667667+ parse_flow_mapping_value t ~empty:true
668668+669669+ | End ->
670670+ let span = Span.point Position.initial in
671671+ t.finished <- true;
672672+ Event.Stream_end, span
673673+674674+(** Get next event *)
675675+let next t =
676676+ if t.finished then None
677677+ else begin
678678+ let event, span = parse t in
679679+ Some { Event.event; span }
680680+ end
681681+682682+(** Peek at next event *)
683683+let peek t =
684684+ (* Parser is not easily peekable without full state save/restore *)
685685+ (* For now, we don't support peek - could add caching if needed *)
686686+ if t.finished then None
687687+ else
688688+ (* Just call next and the caller will have to deal with it *)
689689+ next t
690690+691691+(** Iterate over all events *)
692692+let iter f t =
693693+ let rec loop () =
694694+ match next t with
695695+ | None -> ()
696696+ | Some ev -> f ev; loop ()
697697+ in
698698+ loop ()
699699+700700+(** Fold over all events *)
701701+let fold f init t =
702702+ let rec loop acc =
703703+ match next t with
704704+ | None -> acc
705705+ | Some ev -> loop (f acc ev)
706706+ in
707707+ loop init
708708+709709+(** Convert to list *)
710710+let to_list t =
711711+ fold (fun acc ev -> ev :: acc) [] t |> List.rev
+42
yaml/ocaml-yamle/lib/position.ml
···11+(** Position tracking for source locations *)
22+33+type t = {
44+ index : int; (** Byte offset from start *)
55+ line : int; (** 1-indexed line number *)
66+ column : int; (** 1-indexed column number *)
77+}
88+99+let initial = { index = 0; line = 1; column = 1 }
1010+1111+let advance_byte t =
1212+ { t with index = t.index + 1; column = t.column + 1 }
1313+1414+let advance_line t =
1515+ { index = t.index + 1; line = t.line + 1; column = 1 }
1616+1717+let advance_char c t =
1818+ if c = '\n' then advance_line t
1919+ else advance_byte t
2020+2121+let advance_utf8 uchar t =
2222+ let len = Uchar.utf_8_byte_length uchar in
2323+ let code = Uchar.to_int uchar in
2424+ if code = 0x0A (* LF *) then
2525+ { index = t.index + len; line = t.line + 1; column = 1 }
2626+ else
2727+ { t with index = t.index + len; column = t.column + 1 }
2828+2929+let advance_bytes n t =
3030+ { t with index = t.index + n; column = t.column + n }
3131+3232+let pp fmt t =
3333+ Format.fprintf fmt "line %d, column %d" t.line t.column
3434+3535+let to_string t =
3636+ Format.asprintf "%a" pp t
3737+3838+let compare a b =
3939+ Int.compare a.index b.index
4040+4141+let equal a b =
4242+ a.index = b.index
+61
yaml/ocaml-yamle/lib/scalar.ml
···11+(** YAML scalar values with metadata *)
22+33+type t = {
44+ anchor : string option;
55+ tag : string option;
66+ value : string;
77+ plain_implicit : bool;
88+ quoted_implicit : bool;
99+ style : Scalar_style.t;
1010+}
1111+1212+let make
1313+ ?(anchor : string option)
1414+ ?(tag : string option)
1515+ ?(plain_implicit = true)
1616+ ?(quoted_implicit = false)
1717+ ?(style = Scalar_style.Plain)
1818+ value =
1919+ { anchor; tag; value; plain_implicit; quoted_implicit; style }
2020+2121+let value t = t.value
2222+let anchor t = t.anchor
2323+let tag t = t.tag
2424+let style t = t.style
2525+let plain_implicit t = t.plain_implicit
2626+let quoted_implicit t = t.quoted_implicit
2727+2828+let with_anchor anchor t = { t with anchor = Some anchor }
2929+let with_tag tag t = { t with tag = Some tag }
3030+let with_style style t = { t with style }
3131+3232+let pp fmt t =
3333+ Format.fprintf fmt "scalar(%S" t.value;
3434+ (match t.anchor with
3535+ | Some a -> Format.fprintf fmt ", anchor=%s" a
3636+ | None -> ());
3737+ (match t.tag with
3838+ | Some tag -> Format.fprintf fmt ", tag=%s" tag
3939+ | None -> ());
4040+ Format.fprintf fmt ", style=%a)" Scalar_style.pp t.style
4141+4242+let equal a b =
4343+ Option.equal String.equal a.anchor b.anchor &&
4444+ Option.equal String.equal a.tag b.tag &&
4545+ String.equal a.value b.value &&
4646+ a.plain_implicit = b.plain_implicit &&
4747+ a.quoted_implicit = b.quoted_implicit &&
4848+ Scalar_style.equal a.style b.style
4949+5050+let compare a b =
5151+ let c = Option.compare String.compare a.anchor b.anchor in
5252+ if c <> 0 then c else
5353+ let c = Option.compare String.compare a.tag b.tag in
5454+ if c <> 0 then c else
5555+ let c = String.compare a.value b.value in
5656+ if c <> 0 then c else
5757+ let c = Bool.compare a.plain_implicit b.plain_implicit in
5858+ if c <> 0 then c else
5959+ let c = Bool.compare a.quoted_implicit b.quoted_implicit in
6060+ if c <> 0 then c else
6161+ Scalar_style.compare a.style b.style
+33
yaml/ocaml-yamle/lib/scalar_style.ml
···11+(** Scalar formatting styles *)
22+33+type t =
44+ | Any (** Let emitter choose *)
55+ | Plain (** Unquoted: foo *)
66+ | Single_quoted (** 'foo' *)
77+ | Double_quoted (** "foo" *)
88+ | Literal (** | block *)
99+ | Folded (** > block *)
1010+1111+let to_string = function
1212+ | Any -> "any"
1313+ | Plain -> "plain"
1414+ | Single_quoted -> "single-quoted"
1515+ | Double_quoted -> "double-quoted"
1616+ | Literal -> "literal"
1717+ | Folded -> "folded"
1818+1919+let pp fmt t =
2020+ Format.pp_print_string fmt (to_string t)
2121+2222+let equal a b = a = b
2323+2424+let compare a b =
2525+ let to_int = function
2626+ | Any -> 0
2727+ | Plain -> 1
2828+ | Single_quoted -> 2
2929+ | Double_quoted -> 3
3030+ | Literal -> 4
3131+ | Folded -> 5
3232+ in
3333+ Int.compare (to_int a) (to_int b)
+1046
yaml/ocaml-yamle/lib/scanner.ml
···11+(** YAML tokenizer/scanner with lookahead for ambiguity resolution *)
22+33+(** Simple key tracking for mapping key disambiguation *)
44+type simple_key = {
55+ sk_possible : bool;
66+ sk_required : bool;
77+ sk_token_number : int;
88+ sk_position : Position.t;
99+}
1010+1111+(** Indent level tracking *)
1212+type indent = {
1313+ indent : int;
1414+ needs_block_end : bool;
1515+ sequence : bool; (** true if this is a sequence indent *)
1616+}
1717+1818+type t = {
1919+ input : Input.t;
2020+ mutable tokens : Token.spanned Queue.t;
2121+ mutable token_number : int;
2222+ mutable tokens_taken : int;
2323+ mutable stream_started : bool;
2424+ mutable stream_ended : bool;
2525+ mutable indent_stack : indent list; (** Stack of indentation levels *)
2626+ mutable flow_level : int; (** Nesting depth in [] or {} *)
2727+ mutable simple_keys : simple_key option list; (** Per flow-level simple key tracking *)
2828+ mutable allow_simple_key : bool;
2929+}
3030+3131+let create input =
3232+ {
3333+ input;
3434+ tokens = Queue.create ();
3535+ token_number = 0;
3636+ tokens_taken = 0;
3737+ stream_started = false;
3838+ stream_ended = false;
3939+ indent_stack = [];
4040+ flow_level = 0;
4141+ simple_keys = [None]; (* One entry for the base level *)
4242+ allow_simple_key = true;
4343+ }
4444+4545+let of_string s = create (Input.of_string s)
4646+4747+let position t = Input.position t.input
4848+4949+(** Add a token to the queue *)
5050+let emit t span token =
5151+ Queue.add { Token.token; span } t.tokens;
5252+ t.token_number <- t.token_number + 1
5353+5454+(** Get current column (1-indexed) *)
5555+let column t = (Input.position t.input).column
5656+5757+(** Get current indent level *)
5858+let current_indent t =
5959+ match t.indent_stack with
6060+ | [] -> 0
6161+ | { indent; _ } :: _ -> indent
6262+6363+(** Skip whitespace and comments, return true if at newline *)
6464+let rec skip_to_next_token t =
6565+ (* Skip blanks *)
6666+ while Input.next_is_blank t.input do
6767+ ignore (Input.next t.input)
6868+ done;
6969+ (* Skip comment *)
7070+ if Input.next_is (( = ) '#') t.input then begin
7171+ while not (Input.is_eof t.input) && not (Input.next_is_break t.input) do
7272+ ignore (Input.next t.input)
7373+ done
7474+ end;
7575+ (* Skip line break in block context *)
7676+ if t.flow_level = 0 && Input.next_is_break t.input then begin
7777+ Input.consume_break t.input;
7878+ t.allow_simple_key <- true;
7979+ skip_to_next_token t
8080+ end
8181+ else if t.flow_level > 0 && Input.next_is_whitespace t.input then begin
8282+ ignore (Input.next t.input);
8383+ skip_to_next_token t
8484+ end
8585+8686+(** Roll the indentation level *)
8787+let roll_indent t col ~sequence =
8888+ if t.flow_level = 0 && col > current_indent t then begin
8989+ t.indent_stack <- { indent = col; needs_block_end = true; sequence } :: t.indent_stack;
9090+ true
9191+ end else
9292+ false
9393+9494+(** Unroll indentation to given column *)
9595+let unroll_indent t col =
9696+ while t.flow_level = 0 &&
9797+ match t.indent_stack with
9898+ | { indent; needs_block_end = true; _ } :: _ when indent > col -> true
9999+ | _ -> false
100100+ do
101101+ match t.indent_stack with
102102+ | { indent = _; needs_block_end = true; _ } :: rest ->
103103+ let pos = Input.position t.input in
104104+ let span = Span.point pos in
105105+ emit t span Token.Block_end;
106106+ t.indent_stack <- rest
107107+ | _ -> ()
108108+ done
109109+110110+(** Save a potential simple key *)
111111+let save_simple_key t =
112112+ if t.allow_simple_key then begin
113113+ (* A simple key is required only if we're in a block context,
114114+ at the current indentation level, AND we have an active indent *)
115115+ let required = t.flow_level = 0 &&
116116+ t.indent_stack <> [] &&
117117+ current_indent t = column t - 1 in
118118+ let sk = {
119119+ sk_possible = true;
120120+ sk_required = required;
121121+ sk_token_number = t.token_number;
122122+ sk_position = Input.position t.input;
123123+ } in
124124+ (* Remove any existing simple key at current level *)
125125+ t.simple_keys <- (
126126+ match t.simple_keys with
127127+ | _ :: rest -> Some sk :: rest
128128+ | [] -> [Some sk]
129129+ )
130130+ end
131131+132132+(** Remove simple key at current level *)
133133+let remove_simple_key t =
134134+ match t.simple_keys with
135135+ | Some sk :: _rest when sk.sk_required ->
136136+ Error.raise_at sk.sk_position Expected_key
137137+ | _ :: rest -> t.simple_keys <- None :: rest
138138+ | [] -> ()
139139+140140+(** Stale simple keys that span too many tokens *)
141141+let stale_simple_keys t =
142142+ t.simple_keys <- List.map (fun sk_opt ->
143143+ match sk_opt with
144144+ | Some sk when sk.sk_possible &&
145145+ (Input.position t.input).line > sk.sk_position.line &&
146146+ t.flow_level = 0 ->
147147+ if sk.sk_required then
148148+ Error.raise_at sk.sk_position Expected_key;
149149+ None
150150+ | _ -> sk_opt
151151+ ) t.simple_keys
152152+153153+(** Read anchor or alias name *)
154154+let scan_anchor_alias t =
155155+ let start = Input.mark t.input in
156156+ let buf = Buffer.create 16 in
157157+ while
158158+ match Input.peek t.input with
159159+ | Some c when Input.is_alnum c || c = '_' || c = '-' ->
160160+ Buffer.add_char buf c;
161161+ ignore (Input.next t.input);
162162+ true
163163+ | _ -> false
164164+ do () done;
165165+ let name = Buffer.contents buf in
166166+ if String.length name = 0 then
167167+ Error.raise_at start (Invalid_anchor "empty anchor name");
168168+ (name, Span.make ~start ~stop:(Input.mark t.input))
169169+170170+(** Scan tag handle *)
171171+let scan_tag_handle t =
172172+ let start = Input.mark t.input in
173173+ let buf = Buffer.create 16 in
174174+ (* Expect ! *)
175175+ (match Input.peek t.input with
176176+ | Some '!' ->
177177+ Buffer.add_char buf '!';
178178+ ignore (Input.next t.input)
179179+ | _ -> Error.raise_at start (Invalid_tag "expected '!'"));
180180+ (* Read word chars *)
181181+ while
182182+ match Input.peek t.input with
183183+ | Some c when Input.is_alnum c || c = '-' ->
184184+ Buffer.add_char buf c;
185185+ ignore (Input.next t.input);
186186+ true
187187+ | _ -> false
188188+ do () done;
189189+ (* Check for secondary ! *)
190190+ (match Input.peek t.input with
191191+ | Some '!' ->
192192+ Buffer.add_char buf '!';
193193+ ignore (Input.next t.input)
194194+ | _ -> ());
195195+ Buffer.contents buf
196196+197197+(** Scan tag suffix (after handle) *)
198198+let scan_tag_suffix t =
199199+ let buf = Buffer.create 32 in
200200+ while
201201+ match Input.peek t.input with
202202+ | Some c when not (Input.is_whitespace c) &&
203203+ not (Input.is_flow_indicator c) ->
204204+ Buffer.add_char buf c;
205205+ ignore (Input.next t.input);
206206+ true
207207+ | _ -> false
208208+ do () done;
209209+ Buffer.contents buf
210210+211211+(** Scan a tag *)
212212+let scan_tag t =
213213+ let start = Input.mark t.input in
214214+ ignore (Input.next t.input); (* consume ! *)
215215+ let handle, suffix =
216216+ match Input.peek t.input with
217217+ | Some '<' ->
218218+ (* Verbatim tag: !<...> *)
219219+ ignore (Input.next t.input);
220220+ let buf = Buffer.create 32 in
221221+ while
222222+ match Input.peek t.input with
223223+ | Some '>' -> false
224224+ | Some c ->
225225+ Buffer.add_char buf c;
226226+ ignore (Input.next t.input);
227227+ true
228228+ | None -> Error.raise_at (Input.mark t.input) (Invalid_tag "unclosed verbatim tag")
229229+ do () done;
230230+ ignore (Input.next t.input); (* consume > *)
231231+ ("!", Buffer.contents buf)
232232+ | Some c when Input.is_whitespace c || Input.is_flow_indicator c ->
233233+ (* Non-specific tag: ! *)
234234+ ("!", "")
235235+ | Some '!' ->
236236+ (* Secondary handle *)
237237+ let handle = scan_tag_handle t in
238238+ let suffix = scan_tag_suffix t in
239239+ (handle, suffix)
240240+ | _ ->
241241+ (* Primary handle or just suffix *)
242242+ let first_part = scan_tag_suffix t in
243243+ if String.length first_part > 0 && first_part.[String.length first_part - 1] = '!' then
244244+ let suffix = scan_tag_suffix t in
245245+ (first_part, suffix)
246246+ else
247247+ ("!", first_part)
248248+ in
249249+ let span = Span.make ~start ~stop:(Input.mark t.input) in
250250+ (handle, suffix, span)
251251+252252+(** Scan single-quoted scalar *)
253253+let scan_single_quoted t =
254254+ let start = Input.mark t.input in
255255+ ignore (Input.next t.input); (* consume opening single-quote *)
256256+ let buf = Buffer.create 64 in
257257+ let rec loop () =
258258+ match Input.peek t.input with
259259+ | None -> Error.raise_at start Unclosed_single_quote
260260+ | Some '\'' ->
261261+ ignore (Input.next t.input);
262262+ (* Check for escaped quote ('') *)
263263+ (match Input.peek t.input with
264264+ | Some '\'' ->
265265+ Buffer.add_char buf '\'';
266266+ ignore (Input.next t.input);
267267+ loop ()
268268+ | _ -> ())
269269+ | Some '\n' | Some '\r' ->
270270+ Input.consume_break t.input;
271271+ (* Fold line break to space unless at start of content *)
272272+ if Buffer.length buf > 0 then
273273+ Buffer.add_char buf ' ';
274274+ (* Skip leading whitespace on next line *)
275275+ while Input.next_is_blank t.input do
276276+ ignore (Input.next t.input)
277277+ done;
278278+ loop ()
279279+ | Some c ->
280280+ Buffer.add_char buf c;
281281+ ignore (Input.next t.input);
282282+ loop ()
283283+ in
284284+ loop ();
285285+ let span = Span.make ~start ~stop:(Input.mark t.input) in
286286+ (Buffer.contents buf, span)
287287+288288+(** Decode hex escape of given length *)
289289+let decode_hex t len =
290290+ let start = Input.mark t.input in
291291+ let buf = Buffer.create len in
292292+ for _ = 1 to len do
293293+ match Input.peek t.input with
294294+ | Some c when Input.is_hex c ->
295295+ Buffer.add_char buf c;
296296+ ignore (Input.next t.input)
297297+ | _ ->
298298+ Error.raise_at start (Invalid_hex_escape (Buffer.contents buf))
299299+ done;
300300+ let code = int_of_string ("0x" ^ Buffer.contents buf) in
301301+ if code <= 0x7F then
302302+ String.make 1 (Char.chr code)
303303+ else if code <= 0x7FF then
304304+ let b1 = 0xC0 lor (code lsr 6) in
305305+ let b2 = 0x80 lor (code land 0x3F) in
306306+ String.init 2 (fun i -> Char.chr (if i = 0 then b1 else b2))
307307+ else if code <= 0xFFFF then
308308+ let b1 = 0xE0 lor (code lsr 12) in
309309+ let b2 = 0x80 lor ((code lsr 6) land 0x3F) in
310310+ let b3 = 0x80 lor (code land 0x3F) in
311311+ String.init 3 (fun i -> Char.chr (match i with 0 -> b1 | 1 -> b2 | _ -> b3))
312312+ else
313313+ let b1 = 0xF0 lor (code lsr 18) in
314314+ let b2 = 0x80 lor ((code lsr 12) land 0x3F) in
315315+ let b3 = 0x80 lor ((code lsr 6) land 0x3F) in
316316+ let b4 = 0x80 lor (code land 0x3F) in
317317+ String.init 4 (fun i -> Char.chr (match i with 0 -> b1 | 1 -> b2 | 2 -> b3 | _ -> b4))
318318+319319+(** Scan double-quoted scalar *)
320320+let scan_double_quoted t =
321321+ let start = Input.mark t.input in
322322+ ignore (Input.next t.input); (* consume opening double-quote *)
323323+ let buf = Buffer.create 64 in
324324+ let rec loop () =
325325+ match Input.peek t.input with
326326+ | None -> Error.raise_at start Unclosed_double_quote
327327+ | Some '"' ->
328328+ ignore (Input.next t.input)
329329+ | Some '\\' ->
330330+ ignore (Input.next t.input);
331331+ (match Input.peek t.input with
332332+ | None -> Error.raise_at start (Invalid_escape_sequence "\\<EOF>")
333333+ | Some '0' -> Buffer.add_char buf '\x00'; ignore (Input.next t.input)
334334+ | Some 'a' -> Buffer.add_char buf '\x07'; ignore (Input.next t.input)
335335+ | Some 'b' -> Buffer.add_char buf '\x08'; ignore (Input.next t.input)
336336+ | Some 't' | Some '\t' -> Buffer.add_char buf '\t'; ignore (Input.next t.input)
337337+ | Some 'n' -> Buffer.add_char buf '\n'; ignore (Input.next t.input)
338338+ | Some 'v' -> Buffer.add_char buf '\x0B'; ignore (Input.next t.input)
339339+ | Some 'f' -> Buffer.add_char buf '\x0C'; ignore (Input.next t.input)
340340+ | Some 'r' -> Buffer.add_char buf '\r'; ignore (Input.next t.input)
341341+ | Some 'e' -> Buffer.add_char buf '\x1B'; ignore (Input.next t.input)
342342+ | Some ' ' -> Buffer.add_char buf ' '; ignore (Input.next t.input)
343343+ | Some '"' -> Buffer.add_char buf '"'; ignore (Input.next t.input)
344344+ | Some '/' -> Buffer.add_char buf '/'; ignore (Input.next t.input)
345345+ | Some '\\' -> Buffer.add_char buf '\\'; ignore (Input.next t.input)
346346+ | Some 'N' -> Buffer.add_string buf "\xC2\x85"; ignore (Input.next t.input) (* NEL *)
347347+ | Some '_' -> Buffer.add_string buf "\xC2\xA0"; ignore (Input.next t.input) (* NBSP *)
348348+ | Some 'L' -> Buffer.add_string buf "\xE2\x80\xA8"; ignore (Input.next t.input) (* LS *)
349349+ | Some 'P' -> Buffer.add_string buf "\xE2\x80\xA9"; ignore (Input.next t.input) (* PS *)
350350+ | Some 'x' ->
351351+ ignore (Input.next t.input);
352352+ Buffer.add_string buf (decode_hex t 2)
353353+ | Some 'u' ->
354354+ ignore (Input.next t.input);
355355+ Buffer.add_string buf (decode_hex t 4)
356356+ | Some 'U' ->
357357+ ignore (Input.next t.input);
358358+ Buffer.add_string buf (decode_hex t 8)
359359+ | Some '\n' | Some '\r' ->
360360+ (* Line continuation *)
361361+ Input.consume_break t.input;
362362+ while Input.next_is_blank t.input do
363363+ ignore (Input.next t.input)
364364+ done
365365+ | Some c ->
366366+ Error.raise_at (Input.mark t.input)
367367+ (Invalid_escape_sequence (Printf.sprintf "\\%c" c)));
368368+ loop ()
369369+ | Some '\n' | Some '\r' ->
370370+ Input.consume_break t.input;
371371+ (* Fold to space *)
372372+ Buffer.add_char buf ' ';
373373+ (* Skip leading whitespace *)
374374+ while Input.next_is_blank t.input do
375375+ ignore (Input.next t.input)
376376+ done;
377377+ loop ()
378378+ | Some c ->
379379+ Buffer.add_char buf c;
380380+ ignore (Input.next t.input);
381381+ loop ()
382382+ in
383383+ loop ();
384384+ let span = Span.make ~start ~stop:(Input.mark t.input) in
385385+ (Buffer.contents buf, span)
386386+387387+(** Check if character can appear in plain scalar at this position *)
388388+let can_continue_plain t c ~in_flow =
389389+ match c with
390390+ | ':' ->
391391+ (* : is OK if not followed by whitespace or flow indicator *)
392392+ (match Input.peek_nth t.input 1 with
393393+ | None -> true
394394+ | Some c2 when Input.is_whitespace c2 -> false
395395+ | Some c2 when in_flow && Input.is_flow_indicator c2 -> false
396396+ | _ -> true)
397397+ | '#' ->
398398+ (* # is OK if not preceded by whitespace (checked at call site) *)
399399+ false
400400+ | c when in_flow && Input.is_flow_indicator c -> false
401401+ | _ when Input.is_break c -> false
402402+ | _ -> true
403403+404404+(** Scan plain scalar *)
405405+let scan_plain_scalar t =
406406+ let start = Input.mark t.input in
407407+ let in_flow = t.flow_level > 0 in
408408+ let indent = current_indent t in
409409+ let buf = Buffer.create 64 in
410410+ let spaces = Buffer.create 16 in
411411+ let leading_blanks = ref false in
412412+413413+ let rec scan_line () =
414414+ match Input.peek t.input with
415415+ | None -> ()
416416+ | Some c when can_continue_plain t c ~in_flow ->
417417+ (* Check for # preceded by space *)
418418+ if c = '#' && Buffer.length buf > 0 then
419419+ () (* Stop - # after content *)
420420+ else begin
421421+ if Buffer.length spaces > 0 then begin
422422+ if !leading_blanks then begin
423423+ (* Fold line break *)
424424+ if Buffer.contents spaces = "\n" then
425425+ Buffer.add_char buf ' '
426426+ else begin
427427+ (* Multiple breaks - preserve all but first *)
428428+ let s = Buffer.contents spaces in
429429+ Buffer.add_substring buf s 1 (String.length s - 1)
430430+ end
431431+ end else
432432+ Buffer.add_buffer buf spaces;
433433+ Buffer.clear spaces
434434+ end;
435435+ Buffer.add_char buf c;
436436+ ignore (Input.next t.input);
437437+ leading_blanks := false;
438438+ scan_line ()
439439+ end
440440+ | _ -> ()
441441+ in
442442+443443+ let rec scan_lines () =
444444+ scan_line ();
445445+ (* Check for line continuation *)
446446+ if not in_flow && Input.next_is_break t.input then begin
447447+ (* Save whitespace *)
448448+ Buffer.clear spaces;
449449+ Buffer.add_char spaces '\n';
450450+ Input.consume_break t.input;
451451+ (* Line break in block context allows simple key *)
452452+ t.allow_simple_key <- true;
453453+ (* Skip leading blanks *)
454454+ while Input.next_is_blank t.input do
455455+ Buffer.add_char spaces (Option.get (Input.next t.input))
456456+ done;
457457+ let col = (Input.position t.input).column in
458458+ (* Check indentation - stop if we're at or before the containing block's indent *)
459459+ if not in_flow && col <= indent then
460460+ () (* Stop - dedented or at parent level *)
461461+ else if Input.at_document_boundary t.input then
462462+ () (* Stop - document boundary *)
463463+ else begin
464464+ leading_blanks := true;
465465+ scan_lines ()
466466+ end
467467+ end
468468+ in
469469+470470+ scan_lines ();
471471+ let value = Buffer.contents buf in
472472+ let span = Span.make ~start ~stop:(Input.mark t.input) in
473473+ (value, span)
474474+475475+(** Scan block scalar (literal | or folded >) *)
476476+let scan_block_scalar t literal =
477477+ let start = Input.mark t.input in
478478+ ignore (Input.next t.input); (* consume | or > *)
479479+480480+ (* Parse header: optional indentation indicator and chomping *)
481481+ let explicit_indent = ref None in
482482+ let chomping = ref Chomping.Clip in
483483+484484+ (* First character of header *)
485485+ (match Input.peek t.input with
486486+ | Some c when Input.is_digit c && c <> '0' ->
487487+ explicit_indent := Some (Char.code c - Char.code '0');
488488+ ignore (Input.next t.input)
489489+ | Some '-' -> chomping := Chomping.Strip; ignore (Input.next t.input)
490490+ | Some '+' -> chomping := Chomping.Keep; ignore (Input.next t.input)
491491+ | _ -> ());
492492+493493+ (* Second character of header *)
494494+ (match Input.peek t.input with
495495+ | Some c when Input.is_digit c && c <> '0' && !explicit_indent = None ->
496496+ explicit_indent := Some (Char.code c - Char.code '0');
497497+ ignore (Input.next t.input)
498498+ | Some '-' when !chomping = Chomping.Clip ->
499499+ chomping := Chomping.Strip; ignore (Input.next t.input)
500500+ | Some '+' when !chomping = Chomping.Clip ->
501501+ chomping := Chomping.Keep; ignore (Input.next t.input)
502502+ | _ -> ());
503503+504504+ (* Skip to end of line *)
505505+ while Input.next_is_blank t.input do
506506+ ignore (Input.next t.input)
507507+ done;
508508+509509+ (* Optional comment *)
510510+ if Input.next_is (( = ) '#') t.input then begin
511511+ while not (Input.is_eof t.input) && not (Input.next_is_break t.input) do
512512+ ignore (Input.next t.input)
513513+ done
514514+ end;
515515+516516+ (* Consume line break *)
517517+ if Input.next_is_break t.input then
518518+ Input.consume_break t.input
519519+ else if not (Input.is_eof t.input) then
520520+ Error.raise_at (Input.mark t.input)
521521+ (Invalid_block_scalar_header "expected newline after header");
522522+523523+ let base_indent = current_indent t in
524524+ let content_indent = ref (
525525+ match !explicit_indent with
526526+ | Some n -> base_indent + n
527527+ | None -> 0 (* Will be determined by first non-empty line *)
528528+ ) in
529529+530530+ let buf = Buffer.create 256 in
531531+ let trailing_breaks = Buffer.create 16 in
532532+533533+ (* Read content *)
534534+ let rec read_lines () =
535535+ (* Skip empty lines, collecting breaks *)
536536+ while Input.next_is_break t.input ||
537537+ (Input.next_is_blank t.input &&
538538+ match Input.peek_nth t.input 1 with
539539+ | Some c when Input.is_break c -> true
540540+ | None -> true
541541+ | _ -> false)
542542+ do
543543+ if Input.next_is_blank t.input then begin
544544+ while Input.next_is_blank t.input do
545545+ ignore (Input.next t.input)
546546+ done
547547+ end;
548548+ if Input.next_is_break t.input then begin
549549+ Buffer.add_char trailing_breaks '\n';
550550+ Input.consume_break t.input
551551+ end
552552+ done;
553553+554554+ (* Check if we're at content *)
555555+ if Input.is_eof t.input then ()
556556+ else if Input.at_document_boundary t.input then ()
557557+ else begin
558558+ (* Count leading spaces *)
559559+ let line_indent = ref 0 in
560560+ while Input.next_is (( = ) ' ') t.input do
561561+ incr line_indent;
562562+ ignore (Input.next t.input)
563563+ done;
564564+565565+ (* Determine content indent from first content line *)
566566+ if !content_indent = 0 then begin
567567+ if !line_indent <= base_indent then begin
568568+ (* No content - restore position conceptually *)
569569+ ()
570570+ end else
571571+ content_indent := !line_indent
572572+ end;
573573+574574+ if !line_indent < !content_indent then begin
575575+ (* Dedented - done with content *)
576576+ ()
577577+ end else begin
578578+ (* Add trailing breaks to buffer *)
579579+ if Buffer.length buf > 0 then begin
580580+ if Buffer.length trailing_breaks > 0 then begin
581581+ if literal then
582582+ Buffer.add_buffer buf trailing_breaks
583583+ else begin
584584+ let breaks = Buffer.contents trailing_breaks in
585585+ if String.length breaks = 1 then
586586+ Buffer.add_char buf ' '
587587+ else
588588+ Buffer.add_substring buf breaks 1 (String.length breaks - 1)
589589+ end
590590+ end else if not literal then
591591+ Buffer.add_char buf ' '
592592+ end else
593593+ Buffer.add_buffer buf trailing_breaks;
594594+ Buffer.clear trailing_breaks;
595595+596596+ (* Add extra indentation for literal *)
597597+ if literal then begin
598598+ for _ = !content_indent + 1 to !line_indent do
599599+ Buffer.add_char buf ' '
600600+ done
601601+ end;
602602+603603+ (* Read line content *)
604604+ while not (Input.is_eof t.input) && not (Input.next_is_break t.input) do
605605+ Buffer.add_char buf (Input.next_exn t.input)
606606+ done;
607607+608608+ (* Record trailing break *)
609609+ if Input.next_is_break t.input then begin
610610+ Buffer.add_char trailing_breaks '\n';
611611+ Input.consume_break t.input
612612+ end;
613613+614614+ read_lines ()
615615+ end
616616+ end
617617+ in
618618+619619+ read_lines ();
620620+621621+ (* Apply chomping *)
622622+ let value =
623623+ let content = Buffer.contents buf in
624624+ match !chomping with
625625+ | Chomping.Strip -> content
626626+ | Chomping.Clip ->
627627+ if String.length content > 0 then content ^ "\n" else content
628628+ | Chomping.Keep ->
629629+ content ^ Buffer.contents trailing_breaks
630630+ in
631631+632632+ let span = Span.make ~start ~stop:(Input.mark t.input) in
633633+ let style = if literal then Scalar_style.Literal else Scalar_style.Folded in
634634+ (value, style, span)
635635+636636+(** Scan directive (after %) *)
637637+let scan_directive t =
638638+ let start = Input.mark t.input in
639639+ ignore (Input.next t.input); (* consume % *)
640640+641641+ (* Read directive name *)
642642+ let name_buf = Buffer.create 16 in
643643+ while
644644+ match Input.peek t.input with
645645+ | Some c when Input.is_alnum c || c = '-' ->
646646+ Buffer.add_char name_buf c;
647647+ ignore (Input.next t.input);
648648+ true
649649+ | _ -> false
650650+ do () done;
651651+ let name = Buffer.contents name_buf in
652652+653653+ (* Skip blanks *)
654654+ while Input.next_is_blank t.input do
655655+ ignore (Input.next t.input)
656656+ done;
657657+658658+ let span = Span.make ~start ~stop:(Input.mark t.input) in
659659+660660+ match name with
661661+ | "YAML" ->
662662+ (* Version directive: %YAML 1.2 *)
663663+ let major = ref 0 in
664664+ let minor = ref 0 in
665665+ (* Read major version *)
666666+ while Input.next_is_digit t.input do
667667+ major := !major * 10 + (Char.code (Input.next_exn t.input) - Char.code '0')
668668+ done;
669669+ (* Expect . *)
670670+ (match Input.peek t.input with
671671+ | Some '.' -> ignore (Input.next t.input)
672672+ | _ -> Error.raise_at (Input.mark t.input) (Invalid_yaml_version "expected '.'"));
673673+ (* Read minor version *)
674674+ while Input.next_is_digit t.input do
675675+ minor := !minor * 10 + (Char.code (Input.next_exn t.input) - Char.code '0')
676676+ done;
677677+ let span = Span.make ~start ~stop:(Input.mark t.input) in
678678+ Token.Version_directive { major = !major; minor = !minor }, span
679679+680680+ | "TAG" ->
681681+ (* Tag directive: %TAG !foo! tag:example.com,2000: *)
682682+ let handle = scan_tag_handle t in
683683+ (* Skip blanks *)
684684+ while Input.next_is_blank t.input do
685685+ ignore (Input.next t.input)
686686+ done;
687687+ (* Read prefix *)
688688+ let prefix_buf = Buffer.create 32 in
689689+ while
690690+ match Input.peek t.input with
691691+ | Some c when not (Input.is_whitespace c) ->
692692+ Buffer.add_char prefix_buf c;
693693+ ignore (Input.next t.input);
694694+ true
695695+ | _ -> false
696696+ do () done;
697697+ let prefix = Buffer.contents prefix_buf in
698698+ let span = Span.make ~start ~stop:(Input.mark t.input) in
699699+ Token.Tag_directive { handle; prefix }, span
700700+701701+ | _ when String.length name > 0 && name.[0] >= 'A' && name.[0] <= 'Z' ->
702702+ (* Reserved directive *)
703703+ Error.raise_span span (Reserved_directive name)
704704+705705+ | _ ->
706706+ (* Unknown directive - skip to end of line *)
707707+ while not (Input.is_eof t.input) && not (Input.next_is_break t.input) do
708708+ ignore (Input.next t.input)
709709+ done;
710710+ Error.raise_span span (Invalid_directive name)
711711+712712+(** Fetch the next token(s) into the queue *)
713713+let rec fetch_next_token t =
714714+ skip_to_next_token t;
715715+ stale_simple_keys t;
716716+ let col = column t in
717717+ (* Unroll indents that are deeper than current column.
718718+ Note: we use col, not col-1, to allow entries at the same level. *)
719719+ unroll_indent t col;
720720+721721+ if Input.is_eof t.input then
722722+ fetch_stream_end t
723723+ else if Input.at_document_boundary t.input then
724724+ fetch_document_indicator t
725725+ else begin
726726+ match Input.peek t.input with
727727+ | None -> fetch_stream_end t
728728+ | Some '%' when (Input.position t.input).column = 1 ->
729729+ fetch_directive t
730730+ | Some '[' -> fetch_flow_collection_start t Token.Flow_sequence_start
731731+ | Some '{' -> fetch_flow_collection_start t Token.Flow_mapping_start
732732+ | Some ']' -> fetch_flow_collection_end t Token.Flow_sequence_end
733733+ | Some '}' -> fetch_flow_collection_end t Token.Flow_mapping_end
734734+ | Some ',' -> fetch_flow_entry t
735735+ | Some '-' when t.flow_level = 0 && check_block_entry t ->
736736+ fetch_block_entry t
737737+ | Some '?' when t.flow_level = 0 && check_key t ->
738738+ fetch_key t
739739+ | Some ':' when check_value t ->
740740+ fetch_value t
741741+ | Some '*' -> fetch_alias t
742742+ | Some '&' -> fetch_anchor t
743743+ | Some '!' -> fetch_tag t
744744+ | Some '|' when t.flow_level = 0 -> fetch_block_scalar t true
745745+ | Some '>' when t.flow_level = 0 -> fetch_block_scalar t false
746746+ | Some '\'' -> fetch_single_quoted t
747747+ | Some '"' -> fetch_double_quoted t
748748+ | Some '-' when can_start_plain t ->
749749+ fetch_plain_scalar t
750750+ | Some '?' when can_start_plain t ->
751751+ fetch_plain_scalar t
752752+ | Some ':' when can_start_plain t ->
753753+ fetch_plain_scalar t
754754+ | Some c when can_start_plain_char c t ->
755755+ fetch_plain_scalar t
756756+ | Some c ->
757757+ Error.raise_at (Input.mark t.input) (Unexpected_character c)
758758+ end
759759+760760+and fetch_stream_end t =
761761+ if not t.stream_ended then begin
762762+ unroll_indent t (-1);
763763+ remove_simple_key t;
764764+ t.allow_simple_key <- false;
765765+ t.stream_ended <- true;
766766+ let span = Span.point (Input.mark t.input) in
767767+ emit t span Token.Stream_end
768768+ end
769769+770770+and fetch_document_indicator t =
771771+ unroll_indent t (-1);
772772+ remove_simple_key t;
773773+ t.allow_simple_key <- false;
774774+ let start = Input.mark t.input in
775775+ let indicator = Input.peek_string t.input 3 in
776776+ Input.skip t.input 3;
777777+ let span = Span.make ~start ~stop:(Input.mark t.input) in
778778+ let token = if indicator = "---" then Token.Document_start else Token.Document_end in
779779+ emit t span token
780780+781781+and fetch_directive t =
782782+ unroll_indent t (-1);
783783+ remove_simple_key t;
784784+ t.allow_simple_key <- false;
785785+ let token, span = scan_directive t in
786786+ emit t span token
787787+788788+and fetch_flow_collection_start t token_type =
789789+ save_simple_key t;
790790+ t.flow_level <- t.flow_level + 1;
791791+ t.allow_simple_key <- true;
792792+ t.simple_keys <- None :: t.simple_keys;
793793+ let start = Input.mark t.input in
794794+ ignore (Input.next t.input);
795795+ let span = Span.make ~start ~stop:(Input.mark t.input) in
796796+ emit t span token_type
797797+798798+and fetch_flow_collection_end t token_type =
799799+ remove_simple_key t;
800800+ t.flow_level <- t.flow_level - 1;
801801+ t.simple_keys <- (match t.simple_keys with _ :: rest -> rest | [] -> []);
802802+ t.allow_simple_key <- false;
803803+ let start = Input.mark t.input in
804804+ ignore (Input.next t.input);
805805+ let span = Span.make ~start ~stop:(Input.mark t.input) in
806806+ emit t span token_type
807807+808808+and fetch_flow_entry t =
809809+ remove_simple_key t;
810810+ t.allow_simple_key <- true;
811811+ let start = Input.mark t.input in
812812+ ignore (Input.next t.input);
813813+ let span = Span.make ~start ~stop:(Input.mark t.input) in
814814+ emit t span Token.Flow_entry
815815+816816+and check_block_entry t =
817817+ (* - followed by whitespace or EOF *)
818818+ match Input.peek_nth t.input 1 with
819819+ | None -> true
820820+ | Some c -> Input.is_whitespace c
821821+822822+and fetch_block_entry t =
823823+ if t.flow_level = 0 then begin
824824+ if not t.allow_simple_key then
825825+ Error.raise_at (Input.mark t.input) Expected_block_entry;
826826+ let col = column t in
827827+ if roll_indent t col ~sequence:true then begin
828828+ let span = Span.point (Input.mark t.input) in
829829+ emit t span Token.Block_sequence_start
830830+ end
831831+ end;
832832+ remove_simple_key t;
833833+ t.allow_simple_key <- true;
834834+ let start = Input.mark t.input in
835835+ ignore (Input.next t.input);
836836+ let span = Span.make ~start ~stop:(Input.mark t.input) in
837837+ emit t span Token.Block_entry
838838+839839+and check_key t =
840840+ (* ? followed by whitespace in block, any in flow *)
841841+ if t.flow_level > 0 then true
842842+ else match Input.peek_nth t.input 1 with
843843+ | None -> true
844844+ | Some c -> Input.is_whitespace c
845845+846846+and fetch_key t =
847847+ if t.flow_level = 0 then begin
848848+ if not t.allow_simple_key then
849849+ Error.raise_at (Input.mark t.input) Expected_key;
850850+ let col = column t in
851851+ if roll_indent t col ~sequence:false then begin
852852+ let span = Span.point (Input.mark t.input) in
853853+ emit t span Token.Block_mapping_start
854854+ end
855855+ end;
856856+ remove_simple_key t;
857857+ t.allow_simple_key <- t.flow_level = 0;
858858+ let start = Input.mark t.input in
859859+ ignore (Input.next t.input);
860860+ let span = Span.make ~start ~stop:(Input.mark t.input) in
861861+ emit t span Token.Key
862862+863863+and check_value t =
864864+ (* : followed by whitespace in block, or flow indicator in flow *)
865865+ if t.flow_level > 0 then true
866866+ else match Input.peek_nth t.input 1 with
867867+ | None -> true
868868+ | Some c -> Input.is_whitespace c
869869+870870+and fetch_value t =
871871+ (* Check for simple key *)
872872+ (match t.simple_keys with
873873+ | Some sk :: _ when sk.sk_possible ->
874874+ (* Insert KEY token before the simple key value *)
875875+ let key_span = Span.point sk.sk_position in
876876+ let key_token = { Token.token = Token.Key; span = key_span } in
877877+ (* We need to insert at the right position *)
878878+ let tokens = Queue.to_seq t.tokens |> Array.of_seq in
879879+ Queue.clear t.tokens;
880880+ let insert_pos = sk.sk_token_number - t.tokens_taken in
881881+ Array.iteri (fun i tok ->
882882+ if i = insert_pos then Queue.add key_token t.tokens;
883883+ Queue.add tok t.tokens
884884+ ) tokens;
885885+ if insert_pos >= Array.length tokens then
886886+ Queue.add key_token t.tokens;
887887+ t.token_number <- t.token_number + 1;
888888+ (* Roll indent for implicit block mapping *)
889889+ if t.flow_level = 0 then begin
890890+ let col = sk.sk_position.column in
891891+ if roll_indent t col ~sequence:false then begin
892892+ let span = Span.point sk.sk_position in
893893+ (* Insert block mapping start before key *)
894894+ let bm_token = { Token.token = Token.Block_mapping_start; span } in
895895+ let tokens = Queue.to_seq t.tokens |> Array.of_seq in
896896+ Queue.clear t.tokens;
897897+ Array.iteri (fun i tok ->
898898+ if i = insert_pos then Queue.add bm_token t.tokens;
899899+ Queue.add tok t.tokens
900900+ ) tokens;
901901+ if insert_pos >= Array.length tokens then
902902+ Queue.add bm_token t.tokens;
903903+ t.token_number <- t.token_number + 1
904904+ end
905905+ end;
906906+ t.simple_keys <- None :: (List.tl t.simple_keys)
907907+ | _ ->
908908+ (* No simple key - this is a complex value *)
909909+ if t.flow_level = 0 then begin
910910+ if not t.allow_simple_key then
911911+ Error.raise_at (Input.mark t.input) Expected_key;
912912+ let col = column t in
913913+ if roll_indent t col ~sequence:false then begin
914914+ let span = Span.point (Input.mark t.input) in
915915+ emit t span Token.Block_mapping_start
916916+ end
917917+ end);
918918+ remove_simple_key t;
919919+ t.allow_simple_key <- t.flow_level = 0;
920920+ let start = Input.mark t.input in
921921+ ignore (Input.next t.input);
922922+ let span = Span.make ~start ~stop:(Input.mark t.input) in
923923+ emit t span Token.Value
924924+925925+and fetch_alias t =
926926+ save_simple_key t;
927927+ t.allow_simple_key <- false;
928928+ let start = Input.mark t.input in
929929+ ignore (Input.next t.input); (* consume * *)
930930+ let name, span = scan_anchor_alias t in
931931+ let span = Span.make ~start ~stop:span.stop in
932932+ emit t span (Token.Alias name)
933933+934934+and fetch_anchor t =
935935+ save_simple_key t;
936936+ t.allow_simple_key <- false;
937937+ let start = Input.mark t.input in
938938+ ignore (Input.next t.input); (* consume & *)
939939+ let name, span = scan_anchor_alias t in
940940+ let span = Span.make ~start ~stop:span.stop in
941941+ emit t span (Token.Anchor name)
942942+943943+and fetch_tag t =
944944+ save_simple_key t;
945945+ t.allow_simple_key <- false;
946946+ let handle, suffix, span = scan_tag t in
947947+ emit t span (Token.Tag { handle; suffix })
948948+949949+and fetch_block_scalar t literal =
950950+ remove_simple_key t;
951951+ t.allow_simple_key <- true;
952952+ let value, style, span = scan_block_scalar t literal in
953953+ emit t span (Token.Scalar { style; value })
954954+955955+and fetch_single_quoted t =
956956+ save_simple_key t;
957957+ t.allow_simple_key <- false;
958958+ let value, span = scan_single_quoted t in
959959+ emit t span (Token.Scalar { style = Scalar_style.Single_quoted; value })
960960+961961+and fetch_double_quoted t =
962962+ save_simple_key t;
963963+ t.allow_simple_key <- false;
964964+ let value, span = scan_double_quoted t in
965965+ emit t span (Token.Scalar { style = Scalar_style.Double_quoted; value })
966966+967967+and can_start_plain t =
968968+ (* Check if - ? : can start a plain scalar *)
969969+ match Input.peek_nth t.input 1 with
970970+ | None -> false
971971+ | Some c ->
972972+ not (Input.is_whitespace c) &&
973973+ (t.flow_level = 0 || not (Input.is_flow_indicator c))
974974+975975+and can_start_plain_char c _t =
976976+ (* Characters that can start a plain scalar *)
977977+ if Input.is_whitespace c then false
978978+ else if Input.is_indicator c then false
979979+ else true
980980+981981+and fetch_plain_scalar t =
982982+ save_simple_key t;
983983+ t.allow_simple_key <- false;
984984+ let value, span = scan_plain_scalar t in
985985+ emit t span (Token.Scalar { style = Scalar_style.Plain; value })
986986+987987+(** Check if we need more tokens to resolve simple keys *)
988988+let need_more_tokens t =
989989+ if t.stream_ended then false
990990+ else if Queue.is_empty t.tokens then true
991991+ else
992992+ (* Check if any simple key could affect the first queued token *)
993993+ List.exists (function
994994+ | Some sk when sk.sk_possible ->
995995+ sk.sk_token_number >= t.tokens_taken
996996+ | _ -> false
997997+ ) t.simple_keys
998998+999999+(** Ensure we have enough tokens to return one safely *)
10001000+let ensure_tokens t =
10011001+ if not t.stream_started then begin
10021002+ t.stream_started <- true;
10031003+ let span = Span.point (Input.position t.input) in
10041004+ let encoding, _ = Encoding.detect t.input.source in
10051005+ emit t span (Token.Stream_start encoding)
10061006+ end;
10071007+ while need_more_tokens t do
10081008+ fetch_next_token t
10091009+ done
10101010+10111011+(** Get next token *)
10121012+let next t =
10131013+ ensure_tokens t;
10141014+ if Queue.is_empty t.tokens then
10151015+ None
10161016+ else begin
10171017+ t.tokens_taken <- t.tokens_taken + 1;
10181018+ Some (Queue.pop t.tokens)
10191019+ end
10201020+10211021+(** Peek at next token *)
10221022+let peek t =
10231023+ ensure_tokens t;
10241024+ Queue.peek_opt t.tokens
10251025+10261026+(** Iterate over all tokens *)
10271027+let iter f t =
10281028+ let rec loop () =
10291029+ match next t with
10301030+ | None -> ()
10311031+ | Some tok -> f tok; loop ()
10321032+ in
10331033+ loop ()
10341034+10351035+(** Fold over all tokens *)
10361036+let fold f init t =
10371037+ let rec loop acc =
10381038+ match next t with
10391039+ | None -> acc
10401040+ | Some tok -> loop (f acc tok)
10411041+ in
10421042+ loop init
10431043+10441044+(** Convert to list *)
10451045+let to_list t =
10461046+ fold (fun acc tok -> tok :: acc) [] t |> List.rev
+72
yaml/ocaml-yamle/lib/sequence.ml
···11+(** YAML sequence (array) values with metadata *)
22+33+type 'a t = {
44+ anchor : string option;
55+ tag : string option;
66+ implicit : bool;
77+ style : Layout_style.t;
88+ members : 'a list;
99+}
1010+1111+let make
1212+ ?(anchor : string option)
1313+ ?(tag : string option)
1414+ ?(implicit = true)
1515+ ?(style = Layout_style.Any)
1616+ members =
1717+ { anchor; tag; implicit; style; members }
1818+1919+let members t = t.members
2020+let anchor t = t.anchor
2121+let tag t = t.tag
2222+let implicit t = t.implicit
2323+let style t = t.style
2424+2525+let with_anchor anchor t = { t with anchor = Some anchor }
2626+let with_tag tag t = { t with tag = Some tag }
2727+let with_style style t = { t with style }
2828+2929+let map f t = { t with members = List.map f t.members }
3030+3131+let length t = List.length t.members
3232+3333+let is_empty t = t.members = []
3434+3535+let nth t n = List.nth t.members n
3636+3737+let nth_opt t n = List.nth_opt t.members n
3838+3939+let iter f t = List.iter f t.members
4040+4141+let fold f init t = List.fold_left f init t.members
4242+4343+let pp pp_elem fmt t =
4444+ Format.fprintf fmt "@[<hv 2>sequence(@,";
4545+ (match t.anchor with
4646+ | Some a -> Format.fprintf fmt "anchor=%s,@ " a
4747+ | None -> ());
4848+ (match t.tag with
4949+ | Some tag -> Format.fprintf fmt "tag=%s,@ " tag
5050+ | None -> ());
5151+ Format.fprintf fmt "style=%a,@ " Layout_style.pp t.style;
5252+ Format.fprintf fmt "members=[@,%a@]@,)"
5353+ (Format.pp_print_list ~pp_sep:(fun fmt () -> Format.fprintf fmt ",@ ") pp_elem)
5454+ t.members
5555+5656+let equal eq a b =
5757+ Option.equal String.equal a.anchor b.anchor &&
5858+ Option.equal String.equal a.tag b.tag &&
5959+ a.implicit = b.implicit &&
6060+ Layout_style.equal a.style b.style &&
6161+ List.equal eq a.members b.members
6262+6363+let compare cmp a b =
6464+ let c = Option.compare String.compare a.anchor b.anchor in
6565+ if c <> 0 then c else
6666+ let c = Option.compare String.compare a.tag b.tag in
6767+ if c <> 0 then c else
6868+ let c = Bool.compare a.implicit b.implicit in
6969+ if c <> 0 then c else
7070+ let c = Layout_style.compare a.style b.style in
7171+ if c <> 0 then c else
7272+ List.compare cmp a.members b.members
+35
yaml/ocaml-yamle/lib/span.ml
···11+(** Source spans representing ranges in input *)
22+33+type t = {
44+ start : Position.t;
55+ stop : Position.t;
66+}
77+88+let make ~start ~stop = { start; stop }
99+1010+let point pos = { start = pos; stop = pos }
1111+1212+let merge a b =
1313+ let start = if Position.compare a.start b.start <= 0 then a.start else b.start in
1414+ let stop = if Position.compare a.stop b.stop >= 0 then a.stop else b.stop in
1515+ { start; stop }
1616+1717+let extend span pos =
1818+ { span with stop = pos }
1919+2020+let pp fmt t =
2121+ if t.start.line = t.stop.line then
2222+ Format.fprintf fmt "line %d, columns %d-%d"
2323+ t.start.line t.start.column t.stop.column
2424+ else
2525+ Format.fprintf fmt "lines %d-%d" t.start.line t.stop.line
2626+2727+let to_string t =
2828+ Format.asprintf "%a" pp t
2929+3030+let compare a b =
3131+ let c = Position.compare a.start b.start in
3232+ if c <> 0 then c else Position.compare a.stop b.stop
3333+3434+let equal a b =
3535+ Position.equal a.start b.start && Position.equal a.stop b.stop
+70
yaml/ocaml-yamle/lib/tag.ml
···11+(** YAML tags for type information *)
22+33+type t = {
44+ handle : string; (** e.g., "!" or "!!" or "!foo!" *)
55+ suffix : string; (** e.g., "str", "int", "custom/type" *)
66+}
77+88+let make ~handle ~suffix = { handle; suffix }
99+1010+let of_string s =
1111+ if String.length s = 0 then None
1212+ else if s.[0] <> '!' then None
1313+ else
1414+ (* Find the suffix after the handle *)
1515+ let len = String.length s in
1616+ if len = 1 then Some { handle = "!"; suffix = "" }
1717+ else if s.[1] = '!' then
1818+ (* !! handle *)
1919+ Some { handle = "!!"; suffix = String.sub s 2 (len - 2) }
2020+ else if s.[1] = '<' then
2121+ (* Verbatim tag !<...> *)
2222+ if len > 2 && s.[len - 1] = '>' then
2323+ Some { handle = "!"; suffix = String.sub s 2 (len - 3) }
2424+ else
2525+ None
2626+ else
2727+ (* Primary handle or local tag *)
2828+ Some { handle = "!"; suffix = String.sub s 1 (len - 1) }
2929+3030+let to_string t =
3131+ if t.handle = "!" && t.suffix = "" then "!"
3232+ else t.handle ^ t.suffix
3333+3434+let to_uri t =
3535+ match t.handle with
3636+ | "!!" -> "tag:yaml.org,2002:" ^ t.suffix
3737+ | "!" -> "!" ^ t.suffix
3838+ | h -> h ^ t.suffix
3939+4040+let pp fmt t =
4141+ Format.pp_print_string fmt (to_string t)
4242+4343+let equal a b =
4444+ String.equal a.handle b.handle && String.equal a.suffix b.suffix
4545+4646+let compare a b =
4747+ let c = String.compare a.handle b.handle in
4848+ if c <> 0 then c else String.compare a.suffix b.suffix
4949+5050+(** Standard tags *)
5151+5252+let null = { handle = "!!"; suffix = "null" }
5353+let bool = { handle = "!!"; suffix = "bool" }
5454+let int = { handle = "!!"; suffix = "int" }
5555+let float = { handle = "!!"; suffix = "float" }
5656+let str = { handle = "!!"; suffix = "str" }
5757+let seq = { handle = "!!"; suffix = "seq" }
5858+let map = { handle = "!!"; suffix = "map" }
5959+let binary = { handle = "!!"; suffix = "binary" }
6060+let timestamp = { handle = "!!"; suffix = "timestamp" }
6161+6262+(** Check if tag matches a standard type *)
6363+6464+let is_null t = equal t null || (t.handle = "!" && t.suffix = "")
6565+let is_bool t = equal t bool
6666+let is_int t = equal t int
6767+let is_float t = equal t float
6868+let is_str t = equal t str
6969+let is_seq t = equal t seq
7070+let is_map t = equal t map
···11+(** JSON-compatible YAML value representation *)
22+33+type t = [
44+ | `Null
55+ | `Bool of bool
66+ | `Float of float
77+ | `String of string
88+ | `A of t list
99+ | `O of (string * t) list
1010+]
1111+1212+(** Constructors *)
1313+1414+let null : t = `Null
1515+let bool b : t = `Bool b
1616+let int n : t = `Float (Float.of_int n)
1717+let float f : t = `Float f
1818+let string s : t = `String s
1919+2020+let list f xs : t = `A (List.map f xs)
2121+let obj pairs : t = `O pairs
2222+2323+(** Type name for error messages *)
2424+let type_name : t -> string = function
2525+ | `Null -> "null"
2626+ | `Bool _ -> "bool"
2727+ | `Float _ -> "float"
2828+ | `String _ -> "string"
2929+ | `A _ -> "array"
3030+ | `O _ -> "object"
3131+3232+(** Safe accessors (return option) *)
3333+3434+let as_null = function `Null -> Some () | _ -> None
3535+let as_bool = function `Bool b -> Some b | _ -> None
3636+let as_float = function `Float f -> Some f | _ -> None
3737+let as_string = function `String s -> Some s | _ -> None
3838+let as_list = function `A l -> Some l | _ -> None
3939+let as_assoc = function `O o -> Some o | _ -> None
4040+4141+let as_int = function
4242+ | `Float f ->
4343+ let i = Float.to_int f in
4444+ if Float.equal (Float.of_int i) f then Some i else None
4545+ | _ -> None
4646+4747+(** Unsafe accessors (raise on type mismatch) *)
4848+4949+let to_null v =
5050+ match as_null v with
5151+ | Some () -> ()
5252+ | None -> Error.raise (Type_mismatch ("null", type_name v))
5353+5454+let to_bool v =
5555+ match as_bool v with
5656+ | Some b -> b
5757+ | None -> Error.raise (Type_mismatch ("bool", type_name v))
5858+5959+let to_float v =
6060+ match as_float v with
6161+ | Some f -> f
6262+ | None -> Error.raise (Type_mismatch ("float", type_name v))
6363+6464+let to_string v =
6565+ match as_string v with
6666+ | Some s -> s
6767+ | None -> Error.raise (Type_mismatch ("string", type_name v))
6868+6969+let to_list v =
7070+ match as_list v with
7171+ | Some l -> l
7272+ | None -> Error.raise (Type_mismatch ("array", type_name v))
7373+7474+let to_assoc v =
7575+ match as_assoc v with
7676+ | Some o -> o
7777+ | None -> Error.raise (Type_mismatch ("object", type_name v))
7878+7979+let to_int v =
8080+ match as_int v with
8181+ | Some i -> i
8282+ | None -> Error.raise (Type_mismatch ("int", type_name v))
8383+8484+(** Object access *)
8585+8686+let mem key = function
8787+ | `O pairs -> List.exists (fun (k, _) -> k = key) pairs
8888+ | _ -> false
8989+9090+let find key = function
9191+ | `O pairs -> List.assoc_opt key pairs
9292+ | _ -> None
9393+9494+let get key v =
9595+ match find key v with
9696+ | Some v -> v
9797+ | None -> Error.raise (Key_not_found key)
9898+9999+let keys = function
100100+ | `O pairs -> List.map fst pairs
101101+ | v -> Error.raise (Type_mismatch ("object", type_name v))
102102+103103+let values = function
104104+ | `O pairs -> List.map snd pairs
105105+ | v -> Error.raise (Type_mismatch ("object", type_name v))
106106+107107+(** Combinators *)
108108+109109+let combine v1 v2 =
110110+ match v1, v2 with
111111+ | `O o1, `O o2 -> `O (o1 @ o2)
112112+ | v1, _ -> Error.raise (Type_mismatch ("object", type_name v1))
113113+114114+let map f = function
115115+ | `A l -> `A (List.map f l)
116116+ | v -> Error.raise (Type_mismatch ("array", type_name v))
117117+118118+let filter pred = function
119119+ | `A l -> `A (List.filter pred l)
120120+ | v -> Error.raise (Type_mismatch ("array", type_name v))
121121+122122+(** Pretty printing *)
123123+124124+let rec pp fmt (v : t) =
125125+ match v with
126126+ | `Null -> Format.pp_print_string fmt "null"
127127+ | `Bool b -> Format.pp_print_bool fmt b
128128+ | `Float f ->
129129+ if Float.is_integer f && Float.abs f < 1e15 then
130130+ Format.fprintf fmt "%.0f" f
131131+ else
132132+ Format.fprintf fmt "%g" f
133133+ | `String s -> Format.fprintf fmt "%S" s
134134+ | `A [] -> Format.pp_print_string fmt "[]"
135135+ | `A items ->
136136+ Format.fprintf fmt "@[<hv 2>[@,%a@]@,]"
137137+ (Format.pp_print_list ~pp_sep:(fun fmt () -> Format.fprintf fmt ",@ ") pp)
138138+ items
139139+ | `O [] -> Format.pp_print_string fmt "{}"
140140+ | `O pairs ->
141141+ Format.fprintf fmt "@[<hv 2>{@,%a@]@,}"
142142+ (Format.pp_print_list ~pp_sep:(fun fmt () -> Format.fprintf fmt ",@ ")
143143+ (fun fmt (k, v) -> Format.fprintf fmt "@[<hv 2>%S:@ %a@]" k pp v))
144144+ pairs
145145+146146+(** Equality and comparison *)
147147+148148+let rec equal (a : t) (b : t) =
149149+ match a, b with
150150+ | `Null, `Null -> true
151151+ | `Bool a, `Bool b -> a = b
152152+ | `Float a, `Float b -> Float.equal a b
153153+ | `String a, `String b -> String.equal a b
154154+ | `A a, `A b -> List.equal equal a b
155155+ | `O a, `O b ->
156156+ List.length a = List.length b &&
157157+ List.for_all2 (fun (k1, v1) (k2, v2) -> k1 = k2 && equal v1 v2) a b
158158+ | _ -> false
159159+160160+let rec compare (a : t) (b : t) =
161161+ match a, b with
162162+ | `Null, `Null -> 0
163163+ | `Null, _ -> -1
164164+ | _, `Null -> 1
165165+ | `Bool a, `Bool b -> Bool.compare a b
166166+ | `Bool _, _ -> -1
167167+ | _, `Bool _ -> 1
168168+ | `Float a, `Float b -> Float.compare a b
169169+ | `Float _, _ -> -1
170170+ | _, `Float _ -> 1
171171+ | `String a, `String b -> String.compare a b
172172+ | `String _, _ -> -1
173173+ | _, `String _ -> 1
174174+ | `A a, `A b -> List.compare compare a b
175175+ | `A _, _ -> -1
176176+ | _, `A _ -> 1
177177+ | `O a, `O b ->
178178+ let cmp_pair (k1, v1) (k2, v2) =
179179+ let c = String.compare k1 k2 in
180180+ if c <> 0 then c else compare v1 v2
181181+ in
182182+ List.compare cmp_pair a b
+224
yaml/ocaml-yamle/lib/yaml.ml
···11+(** Full YAML representation with anchors, tags, and aliases *)
22+33+type t = [
44+ | `Scalar of Scalar.t
55+ | `Alias of string
66+ | `A of t Sequence.t
77+ | `O of (t, t) Mapping.t
88+]
99+1010+(** Pretty printing *)
1111+1212+let rec pp fmt (v : t) =
1313+ match v with
1414+ | `Scalar s -> Scalar.pp fmt s
1515+ | `Alias name -> Format.fprintf fmt "*%s" name
1616+ | `A seq -> Sequence.pp pp fmt seq
1717+ | `O map -> Mapping.pp pp pp fmt map
1818+1919+(** Equality *)
2020+2121+let rec equal (a : t) (b : t) =
2222+ match a, b with
2323+ | `Scalar a, `Scalar b -> Scalar.equal a b
2424+ | `Alias a, `Alias b -> String.equal a b
2525+ | `A a, `A b -> Sequence.equal equal a b
2626+ | `O a, `O b -> Mapping.equal equal equal a b
2727+ | _ -> false
2828+2929+(** Construct from JSON-compatible Value *)
3030+3131+let rec of_value (v : Value.t) : t =
3232+ match v with
3333+ | `Null -> `Scalar (Scalar.make "null")
3434+ | `Bool true -> `Scalar (Scalar.make "true")
3535+ | `Bool false -> `Scalar (Scalar.make "false")
3636+ | `Float f ->
3737+ let s =
3838+ if Float.is_integer f && Float.abs f < 1e15 then
3939+ Printf.sprintf "%.0f" f
4040+ else
4141+ Printf.sprintf "%g" f
4242+ in
4343+ `Scalar (Scalar.make s)
4444+ | `String s ->
4545+ `Scalar (Scalar.make s ~style:Scalar_style.Double_quoted)
4646+ | `A items ->
4747+ `A (Sequence.make (List.map of_value items))
4848+ | `O pairs ->
4949+ `O (Mapping.make (List.map (fun (k, v) ->
5050+ (`Scalar (Scalar.make k), of_value v)
5151+ ) pairs))
5252+5353+(** Convert to JSON-compatible Value *)
5454+5555+let rec to_value (v : t) : Value.t =
5656+ match v with
5757+ | `Scalar s -> scalar_to_value s
5858+ | `Alias name -> Error.raise (Unresolved_alias name)
5959+ | `A seq -> `A (List.map to_value (Sequence.members seq))
6060+ | `O map ->
6161+ `O (List.map (fun (k, v) ->
6262+ let key = match k with
6363+ | `Scalar s -> Scalar.value s
6464+ | _ -> Error.raise (Type_mismatch ("string key", "complex key"))
6565+ in
6666+ (key, to_value v)
6767+ ) (Mapping.members map))
6868+6969+(** Convert scalar to JSON value based on content *)
7070+and scalar_to_value s =
7171+ let value = Scalar.value s in
7272+ let tag = Scalar.tag s in
7373+ let style = Scalar.style s in
7474+7575+ (* If explicitly tagged, respect the tag *)
7676+ match tag with
7777+ | Some "tag:yaml.org,2002:null" | Some "!!null" ->
7878+ `Null
7979+ | Some "tag:yaml.org,2002:bool" | Some "!!bool" ->
8080+ (match String.lowercase_ascii value with
8181+ | "true" | "yes" | "on" -> `Bool true
8282+ | "false" | "no" | "off" -> `Bool false
8383+ | _ -> Error.raise (Invalid_scalar_conversion (value, "bool")))
8484+ | Some "tag:yaml.org,2002:int" | Some "!!int" ->
8585+ (try `Float (Float.of_string value)
8686+ with _ -> Error.raise (Invalid_scalar_conversion (value, "int")))
8787+ | Some "tag:yaml.org,2002:float" | Some "!!float" ->
8888+ (try `Float (Float.of_string value)
8989+ with _ -> Error.raise (Invalid_scalar_conversion (value, "float")))
9090+ | Some "tag:yaml.org,2002:str" | Some "!!str" ->
9191+ `String value
9292+ | Some _ ->
9393+ (* Unknown tag - treat as string *)
9494+ `String value
9595+ | None ->
9696+ (* Implicit type resolution for plain scalars *)
9797+ if style <> Scalar_style.Plain then
9898+ `String value
9999+ else
100100+ infer_scalar_type value
101101+102102+(** Infer type from plain scalar value *)
103103+and infer_scalar_type value =
104104+ let lower = String.lowercase_ascii value in
105105+ (* Null *)
106106+ if value = "" || lower = "null" || lower = "~" then
107107+ `Null
108108+ (* Boolean *)
109109+ else if lower = "true" || lower = "yes" || lower = "on" then
110110+ `Bool true
111111+ else if lower = "false" || lower = "no" || lower = "off" then
112112+ `Bool false
113113+ (* Special floats *)
114114+ else if lower = ".inf" || lower = "+.inf" then
115115+ `Float Float.infinity
116116+ else if lower = "-.inf" then
117117+ `Float Float.neg_infinity
118118+ else if lower = ".nan" then
119119+ `Float Float.nan
120120+ (* Try numeric *)
121121+ else
122122+ try_parse_number value
123123+124124+(** Try to parse as number *)
125125+and try_parse_number value =
126126+ (* Try integer first *)
127127+ let try_int () =
128128+ if String.length value > 0 then
129129+ let first = value.[0] in
130130+ if first = '-' || first = '+' || (first >= '0' && first <= '9') then
131131+ try
132132+ (* Handle octal: 0o prefix or leading 0 *)
133133+ if String.length value > 2 && value.[0] = '0' then
134134+ match value.[1] with
135135+ | 'x' | 'X' ->
136136+ (* Hex *)
137137+ Some (`Float (Float.of_int (int_of_string value)))
138138+ | 'o' | 'O' ->
139139+ (* Octal *)
140140+ Some (`Float (Float.of_int (int_of_string value)))
141141+ | 'b' | 'B' ->
142142+ (* Binary *)
143143+ Some (`Float (Float.of_int (int_of_string value)))
144144+ | _ ->
145145+ (* Decimal with leading zero or octal in YAML 1.1 *)
146146+ Some (`Float (Float.of_string value))
147147+ else
148148+ Some (`Float (Float.of_string value))
149149+ with _ -> None
150150+ else None
151151+ else None
152152+ in
153153+ match try_int () with
154154+ | Some v -> v
155155+ | None ->
156156+ (* Try float *)
157157+ try
158158+ let f = Float.of_string value in
159159+ `Float f
160160+ with _ ->
161161+ (* Not a number - it's a string *)
162162+ `String value
163163+164164+(** Resolve aliases by replacing them with referenced nodes *)
165165+166166+let resolve_aliases (root : t) : t =
167167+ let anchors = Hashtbl.create 16 in
168168+169169+ (* First pass: collect all anchors *)
170170+ let rec collect (v : t) =
171171+ match v with
172172+ | `Scalar s ->
173173+ (match Scalar.anchor s with
174174+ | Some name -> Hashtbl.replace anchors name v
175175+ | None -> ())
176176+ | `Alias _ -> ()
177177+ | `A seq ->
178178+ (match Sequence.anchor seq with
179179+ | Some name -> Hashtbl.replace anchors name v
180180+ | None -> ());
181181+ List.iter collect (Sequence.members seq)
182182+ | `O map ->
183183+ (match Mapping.anchor map with
184184+ | Some name -> Hashtbl.replace anchors name v
185185+ | None -> ());
186186+ List.iter (fun (k, v) -> collect k; collect v) (Mapping.members map)
187187+ in
188188+ collect root;
189189+190190+ (* Second pass: resolve aliases *)
191191+ let rec resolve (v : t) : t =
192192+ match v with
193193+ | `Scalar _ -> v
194194+ | `Alias name ->
195195+ (match Hashtbl.find_opt anchors name with
196196+ | Some target -> resolve target
197197+ | None -> Error.raise (Undefined_alias name))
198198+ | `A seq ->
199199+ `A (Sequence.map resolve seq)
200200+ | `O map ->
201201+ `O (Mapping.make
202202+ ?anchor:(Mapping.anchor map)
203203+ ?tag:(Mapping.tag map)
204204+ ~implicit:(Mapping.implicit map)
205205+ ~style:(Mapping.style map)
206206+ (List.map (fun (k, v) -> (resolve k, resolve v)) (Mapping.members map)))
207207+ in
208208+ resolve root
209209+210210+(** Get anchor from any node *)
211211+let anchor (v : t) =
212212+ match v with
213213+ | `Scalar s -> Scalar.anchor s
214214+ | `Alias _ -> None
215215+ | `A seq -> Sequence.anchor seq
216216+ | `O map -> Mapping.anchor map
217217+218218+(** Get tag from any node *)
219219+let tag (v : t) =
220220+ match v with
221221+ | `Scalar s -> Scalar.tag s
222222+ | `Alias _ -> None
223223+ | `A seq -> Sequence.tag seq
224224+ | `O map -> Mapping.tag map
+149
yaml/ocaml-yamle/lib/yamle.ml
···11+type value = Value.t
22+type yaml = Yaml.t
33+44+type version = [ `V1_1 | `V1_2 ]
55+66+type encoding = Encoding.t
77+type scalar_style = Scalar_style.t
88+type layout_style = Layout_style.t
99+1010+(** {1 Error handling} *)
1111+1212+type error = Error.t
1313+exception Yamle_error = Error.Yamle_error
1414+1515+(** {1 JSON-compatible parsing} *)
1616+1717+let of_string s = Loader.value_of_string s
1818+1919+(** {1 JSON-compatible emission} *)
2020+2121+let to_string
2222+ ?(encoding = Encoding.Utf8)
2323+ ?(scalar_style = Scalar_style.Any)
2424+ ?(layout_style = Layout_style.Any)
2525+ value =
2626+ let config = {
2727+ Emitter.default_config with
2828+ encoding;
2929+ scalar_style;
3030+ layout_style;
3131+ } in
3232+ Emitter.value_to_string ~config value
3333+3434+(** {1 YAML-specific parsing} *)
3535+3636+let yaml_of_string s = Loader.yaml_of_string s
3737+3838+(** {1 YAML-specific emission} *)
3939+4040+let yaml_to_string
4141+ ?(encoding = Encoding.Utf8)
4242+ ?(scalar_style = Scalar_style.Any)
4343+ ?(layout_style = Layout_style.Any)
4444+ yaml =
4545+ let config = {
4646+ Emitter.default_config with
4747+ encoding;
4848+ scalar_style;
4949+ layout_style;
5050+ } in
5151+ Emitter.yaml_to_string ~config yaml
5252+5353+(** {1 Conversion} *)
5454+5555+let to_json yaml = Yaml.to_value yaml
5656+5757+let of_json value = Yaml.of_value value
5858+5959+(** {1 Pretty printing} *)
6060+6161+let pp = Value.pp
6262+let pp_yaml = Yaml.pp
6363+let equal = Value.equal
6464+let equal_yaml = Yaml.equal
6565+6666+(** {1 Nested modules} *)
6767+6868+module Error = Error
6969+module Position = Position
7070+module Span = Span
7171+module Encoding = Encoding
7272+module Input = Input
7373+module Scalar_style = Scalar_style
7474+module Layout_style = Layout_style
7575+module Chomping = Chomping
7676+module Token = Token
7777+module Scanner = Scanner
7878+module Event = Event
7979+module Parser = Parser
8080+module Tag = Tag
8181+module Value = Value
8282+module Scalar = Scalar
8383+module Sequence = Sequence
8484+module Mapping = Mapping
8585+module Yaml = Yaml
8686+module Document = Document
8787+module Loader = Loader
8888+module Emitter = Emitter
8989+9090+(** {1 Streaming interface} *)
9191+9292+module Stream = struct
9393+ type parser = Parser.t
9494+ type emitter = Emitter.t
9595+9696+ let parser s = Parser.of_string s
9797+9898+ let do_parse p = Parser.next p
9999+100100+ let emitter ?len:_ () = Emitter.create ()
101101+102102+ let emit e ev = Emitter.emit e ev
103103+104104+ let emitter_buf e = Emitter.contents e
105105+106106+ (** Convenience event emitters *)
107107+108108+ let stream_start e enc =
109109+ Emitter.emit e (Event.Stream_start { encoding = enc })
110110+111111+ let stream_end e =
112112+ Emitter.emit e Event.Stream_end
113113+114114+ let document_start ?version ?(implicit = true) e =
115115+ let version = match version with
116116+ | Some `V1_1 -> Some (1, 1)
117117+ | Some `V1_2 -> Some (1, 2)
118118+ | None -> None
119119+ in
120120+ Emitter.emit e (Event.Document_start { version; implicit })
121121+122122+ let document_end ?(implicit = true) e =
123123+ Emitter.emit e (Event.Document_end { implicit })
124124+125125+ let scalar s e =
126126+ Emitter.emit e (Event.Scalar {
127127+ anchor = Scalar.anchor s;
128128+ tag = Scalar.tag s;
129129+ value = Scalar.value s;
130130+ plain_implicit = Scalar.plain_implicit s;
131131+ quoted_implicit = Scalar.quoted_implicit s;
132132+ style = Scalar.style s;
133133+ })
134134+135135+ let alias e name =
136136+ Emitter.emit e (Event.Alias { anchor = name })
137137+138138+ let sequence_start ?anchor ?tag ?(implicit = true) ?(style = Layout_style.Any) e =
139139+ Emitter.emit e (Event.Sequence_start { anchor; tag; implicit; style })
140140+141141+ let sequence_end e =
142142+ Emitter.emit e Event.Sequence_end
143143+144144+ let mapping_start ?anchor ?tag ?(implicit = true) ?(style = Layout_style.Any) e =
145145+ Emitter.emit e (Event.Mapping_start { anchor; tag; implicit; style })
146146+147147+ let mapping_end e =
148148+ Emitter.emit e Event.Mapping_end
149149+end
···11+(** Tests for the Yamle library *)
22+33+open Yamle
44+55+(** Test helpers *)
66+77+let check_value msg expected actual =
88+ Alcotest.(check bool) msg true (Value.equal expected actual)
99+1010+let _check_string msg expected actual =
1111+ Alcotest.(check string) msg expected actual
1212+1313+(** Scanner tests *)
1414+1515+let test_scanner_simple () =
1616+ let scanner = Scanner.of_string "hello: world" in
1717+ let tokens = Scanner.to_list scanner in
1818+ let token_types = List.map (fun (t : Token.spanned) -> t.token) tokens in
1919+ Alcotest.(check int) "token count" 8 (List.length token_types);
2020+ (* Stream_start, Block_mapping_start, Key, Scalar, Value, Scalar, Block_end, Stream_end *)
2121+ match token_types with
2222+ | Token.Stream_start _ :: Token.Block_mapping_start :: Token.Key ::
2323+ Token.Scalar { value = "hello"; _ } :: Token.Value ::
2424+ Token.Scalar { value = "world"; _ } :: Token.Block_end :: Token.Stream_end :: [] ->
2525+ ()
2626+ | _ ->
2727+ Alcotest.fail "unexpected token sequence"
2828+2929+let test_scanner_sequence () =
3030+ let scanner = Scanner.of_string "- one\n- two\n- three" in
3131+ let tokens = Scanner.to_list scanner in
3232+ Alcotest.(check bool) "has tokens" true (List.length tokens > 0)
3333+3434+let test_scanner_flow () =
3535+ let scanner = Scanner.of_string "[1, 2, 3]" in
3636+ let tokens = Scanner.to_list scanner in
3737+ let has_flow_start = List.exists (fun (t : Token.spanned) ->
3838+ match t.token with Token.Flow_sequence_start -> true | _ -> false
3939+ ) tokens in
4040+ Alcotest.(check bool) "has flow sequence start" true has_flow_start
4141+4242+let scanner_tests = [
4343+ "simple mapping", `Quick, test_scanner_simple;
4444+ "sequence", `Quick, test_scanner_sequence;
4545+ "flow sequence", `Quick, test_scanner_flow;
4646+]
4747+4848+(** Parser tests *)
4949+5050+let test_parser_events () =
5151+ let parser = Parser.of_string "key: value" in
5252+ let events = Parser.to_list parser in
5353+ Alcotest.(check bool) "has events" true (List.length events > 0);
5454+ let has_stream_start = List.exists (fun (e : Event.spanned) ->
5555+ match e.event with Event.Stream_start _ -> true | _ -> false
5656+ ) events in
5757+ Alcotest.(check bool) "has stream start" true has_stream_start
5858+5959+let test_parser_sequence_events () =
6060+ let parser = Parser.of_string "- a\n- b" in
6161+ let events = Parser.to_list parser in
6262+ let has_seq_start = List.exists (fun (e : Event.spanned) ->
6363+ match e.event with Event.Sequence_start _ -> true | _ -> false
6464+ ) events in
6565+ Alcotest.(check bool) "has sequence start" true has_seq_start
6666+6767+let parser_tests = [
6868+ "parse events", `Quick, test_parser_events;
6969+ "sequence events", `Quick, test_parser_sequence_events;
7070+]
7171+7272+(** Value parsing tests *)
7373+7474+let test_parse_null () =
7575+ check_value "null" `Null (of_string "null");
7676+ check_value "~" `Null (of_string "~");
7777+ check_value "empty" `Null (of_string "")
7878+7979+let test_parse_bool () =
8080+ check_value "true" (`Bool true) (of_string "true");
8181+ check_value "false" (`Bool false) (of_string "false");
8282+ check_value "yes" (`Bool true) (of_string "yes");
8383+ check_value "no" (`Bool false) (of_string "no")
8484+8585+let test_parse_number () =
8686+ check_value "integer" (`Float 42.0) (of_string "42");
8787+ check_value "negative" (`Float (-17.0)) (of_string "-17");
8888+ check_value "float" (`Float 3.14) (of_string "3.14")
8989+9090+let test_parse_string () =
9191+ check_value "plain" (`String "hello") (of_string "hello world" |> function `String s -> `String (String.sub s 0 5) | v -> v);
9292+ check_value "quoted" (`String "hello") (of_string {|"hello"|})
9393+9494+let test_parse_sequence () =
9595+ let result = of_string "- one\n- two\n- three" in
9696+ match result with
9797+ | `A [_; _; _] -> ()
9898+ | _ -> Alcotest.fail "expected sequence with 3 elements"
9999+100100+let test_parse_mapping () =
101101+ let result = of_string "name: Alice\nage: 30" in
102102+ match result with
103103+ | `O pairs when List.length pairs = 2 -> ()
104104+ | _ -> Alcotest.fail "expected mapping with 2 pairs"
105105+106106+let test_parse_nested () =
107107+ let yaml = {|
108108+person:
109109+ name: Bob
110110+ hobbies:
111111+ - reading
112112+ - coding
113113+|} in
114114+ let result = of_string yaml in
115115+ match result with
116116+ | `O [("person", `O _)] -> ()
117117+ | _ -> Alcotest.fail "expected nested structure"
118118+119119+let test_parse_flow_sequence () =
120120+ let result = of_string "[1, 2, 3]" in
121121+ match result with
122122+ | `A [`Float 1.0; `Float 2.0; `Float 3.0] -> ()
123123+ | _ -> Alcotest.fail "expected flow sequence [1, 2, 3]"
124124+125125+let test_parse_flow_mapping () =
126126+ let result = of_string "{a: 1, b: 2}" in
127127+ match result with
128128+ | `O [("a", `Float 1.0); ("b", `Float 2.0)] -> ()
129129+ | _ -> Alcotest.fail "expected flow mapping {a: 1, b: 2}"
130130+131131+let value_tests = [
132132+ "parse null", `Quick, test_parse_null;
133133+ "parse bool", `Quick, test_parse_bool;
134134+ "parse number", `Quick, test_parse_number;
135135+ "parse string", `Quick, test_parse_string;
136136+ "parse sequence", `Quick, test_parse_sequence;
137137+ "parse mapping", `Quick, test_parse_mapping;
138138+ "parse nested", `Quick, test_parse_nested;
139139+ "parse flow sequence", `Quick, test_parse_flow_sequence;
140140+ "parse flow mapping", `Quick, test_parse_flow_mapping;
141141+]
142142+143143+(** Emitter tests *)
144144+145145+let test_emit_null () =
146146+ let result = to_string `Null in
147147+ Alcotest.(check bool) "contains null" true (String.length result > 0)
148148+149149+let starts_with prefix s =
150150+ String.length s >= String.length prefix &&
151151+ String.sub s 0 (String.length prefix) = prefix
152152+153153+let test_emit_mapping () =
154154+ let value = `O [("name", `String "Alice"); ("age", `Float 30.0)] in
155155+ let result = to_string value in
156156+ let trimmed = String.trim result in
157157+ Alcotest.(check bool) "contains name" true (starts_with "name" trimmed || starts_with "\"name\"" trimmed)
158158+159159+let test_roundtrip_simple () =
160160+ let yaml = "name: Alice" in
161161+ let value = of_string yaml in
162162+ let _ = to_string value in
163163+ (* Just check it doesn't crash *)
164164+ ()
165165+166166+let test_roundtrip_sequence () =
167167+ let yaml = "- one\n- two\n- three" in
168168+ let value = of_string yaml in
169169+ match value with
170170+ | `A items when List.length items = 3 ->
171171+ let _ = to_string value in
172172+ ()
173173+ | _ -> Alcotest.fail "roundtrip failed"
174174+175175+let emitter_tests = [
176176+ "emit null", `Quick, test_emit_null;
177177+ "emit mapping", `Quick, test_emit_mapping;
178178+ "roundtrip simple", `Quick, test_roundtrip_simple;
179179+ "roundtrip sequence", `Quick, test_roundtrip_sequence;
180180+]
181181+182182+(** YAML-specific tests *)
183183+184184+let test_yaml_anchor () =
185185+ let yaml = "&anchor hello" in
186186+ let result = yaml_of_string yaml in
187187+ match result with
188188+ | `Scalar s when Scalar.anchor s = Some "anchor" -> ()
189189+ | _ -> Alcotest.fail "expected scalar with anchor"
190190+191191+let test_yaml_alias () =
192192+ let yaml = {|
193193+defaults: &defaults
194194+ timeout: 30
195195+production:
196196+ <<: *defaults
197197+ port: 8080
198198+|} in
199199+ (* Just check it parses without error *)
200200+ let _ = yaml_of_string yaml in
201201+ ()
202202+203203+let yaml_tests = [
204204+ "yaml anchor", `Quick, test_yaml_anchor;
205205+ "yaml alias", `Quick, test_yaml_alias;
206206+]
207207+208208+(** Multiline scalar tests *)
209209+210210+let test_literal_block () =
211211+ let yaml = {|description: |
212212+ This is a
213213+ multi-line
214214+ description
215215+|} in
216216+ let result = of_string yaml in
217217+ match result with
218218+ | `O [("description", `String _)] -> ()
219219+ | _ -> Alcotest.fail "expected mapping with literal block"
220220+221221+let test_folded_block () =
222222+ let yaml = {|description: >
223223+ This is a
224224+ folded
225225+ description
226226+|} in
227227+ let result = of_string yaml in
228228+ match result with
229229+ | `O [("description", `String _)] -> ()
230230+ | _ -> Alcotest.fail "expected mapping with folded block"
231231+232232+let multiline_tests = [
233233+ "literal block", `Quick, test_literal_block;
234234+ "folded block", `Quick, test_folded_block;
235235+]
236236+237237+(** Error handling tests *)
238238+239239+let test_error_position () =
240240+ try
241241+ let _ = of_string "key: [unclosed" in
242242+ Alcotest.fail "expected error"
243243+ with
244244+ | Yamle_error e ->
245245+ Alcotest.(check bool) "has span" true (e.span <> None)
246246+247247+let error_tests = [
248248+ "error position", `Quick, test_error_position;
249249+]
250250+251251+(** Run all tests *)
252252+253253+let () =
254254+ Alcotest.run "yamle" [
255255+ "scanner", scanner_tests;
256256+ "parser", parser_tests;
257257+ "value", value_tests;
258258+ "emitter", emitter_tests;
259259+ "yaml", yaml_tests;
260260+ "multiline", multiline_tests;
261261+ "errors", error_tests;
262262+ ]
+24
yaml/ocaml-yamle/tests/yaml/anchor.yml
···11+datetime: 2001-12-15T02:59:43.1Z
22+datetime_with_spaces: 2001-12-14 21:59:43.10 -5
33+date: 2002-12-14
44+55+# The !!binary tag indicates that a string is actually a base64-encoded
66+# representation of a binary blob.
77+gif_file: !!binary |
88+ R0lGODlhDAAMAIQAAP//9/X17unp5WZmZgAAAOfn515eXvPz7Y6OjuDg4J+fn5
99+ OTk6enp56enmlpaWNjY6Ojo4SEhP/++f/++f/++f/++f/++f/++f/++f/++f/+
1010+ +f/++f/++f/++f/++f/++SH+Dk1hZGUgd2l0aCBHSU1QACwAAAAADAAMAAAFLC
1111+ AgjoEwnuNAFOhpEMTRiggcz4BNJHrv/zCFcLiwMWYNG84BwwEeECcgggoBADs=
1212+1313+# YAML also has a set type, which looks like this:
1414+set:
1515+ ? item1
1616+ ? item2
1717+ ? item3
1818+1919+# Like Python, sets are just maps with null values; the above is equivalent to:
2020+set2:
2121+ item1: null
2222+ item2: null
2323+ item3: null
2424+
···11+- hello
22+ - whats
33+ - up
44+- foo
55+- bar
+3
yaml/ocaml-yamle/tests/yaml/yaml-1.2.yml
···11+- {"when the key is quoted":"space after colon can be omitted."}
22+- "quoted slashes \/ are allowed."
33+- {?"a key can be looooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooger": "than 1024 when parsing is unambiguous before seeing the colon."}
+32
yaml/ocaml-yamle/yamle.opam
···11+# This file is generated by dune, edit dune-project instead
22+opam-version: "2.0"
33+version: "0.1.0"
44+synopsis: "Pure OCaml YAML 1.2 parser and emitter"
55+description:
66+ "A pure OCaml implementation of YAML 1.2 parsing and emission, with no C dependencies."
77+maintainer: ["yamle@example.com"]
88+authors: ["Yamle Authors"]
99+license: "ISC"
1010+homepage: "https://github.com/ocaml/yamle"
1111+bug-reports: "https://github.com/ocaml/yamle/issues"
1212+depends: [
1313+ "ocaml" {>= "4.14.0"}
1414+ "dune" {>= "3.0" & >= "3.0"}
1515+ "alcotest" {with-test}
1616+ "odoc" {with-doc}
1717+]
1818+build: [
1919+ ["dune" "subst"] {dev}
2020+ [
2121+ "dune"
2222+ "build"
2323+ "-p"
2424+ name
2525+ "-j"
2626+ jobs
2727+ "@install"
2828+ "@runtest" {with-test}
2929+ "@doc" {with-doc}
3030+ ]
3131+]
3232+dev-repo: "git+https://github.com/ocaml/yamle.git"