···11(** yamlcat - parse and reprint YAML files *)
2233-let usage () =
44- Printf.eprintf "Usage: %s [OPTIONS] [FILE...]\n" Sys.argv.(0);
55- Printf.eprintf "\n";
66- Printf.eprintf "Parse YAML files and reprint them.\n";
77- Printf.eprintf "If no files are given, reads from stdin.\n";
88- Printf.eprintf "\n";
99- Printf.eprintf "Options:\n";
1010- Printf.eprintf " --all Output all documents (for multi-document YAML)\n";
1111- Printf.eprintf " --json Output as JSON format\n";
1212- Printf.eprintf " --flow Output YAML in flow style\n";
1313- Printf.eprintf " --debug Output internal representation (for debugging)\n";
1414- Printf.eprintf " --help Show this help message\n";
1515- exit 1
33+open Cmdliner
164175type output_format = Yaml | Json | Flow | Debug
186···4735 json_to_string buf v;
4836 Buffer.contents buf
49375050-let process_string ~format ~all content =
3838+let process_string ~format ~all ~resolve_aliases ~max_nodes ~max_depth content =
5139 try
5240 if all then
5341 (* Multi-document mode *)
···6755 | Some yaml ->
6856 if not !first then print_endline "---";
6957 first := false;
7070- let value = Yamle.to_json yaml in
5858+ let value = Yamle.to_json ~resolve_aliases ~max_nodes ~max_depth yaml in
7159 print_endline (value_to_json value)
7260 ) documents
7361 | Debug ->
···8068 (* Single-document mode (original behavior) *)
8169 match format with
8270 | Yaml ->
8383- let value = Yamle.of_string content in
7171+ let value = Yamle.of_string ~resolve_aliases ~max_nodes ~max_depth content in
8472 print_string (Yamle.to_string value)
8573 | Flow ->
8686- let value = Yamle.of_string content in
7474+ let value = Yamle.of_string ~resolve_aliases ~max_nodes ~max_depth content in
8775 print_string (Yamle.to_string ~layout_style:Yamle.Layout_style.Flow value)
8876 | Json ->
8989- let value = Yamle.of_string content in
7777+ let value = Yamle.of_string ~resolve_aliases ~max_nodes ~max_depth content in
9078 print_endline (value_to_json value)
9179 | Debug ->
9292- let yaml = Yamle.yaml_of_string content in
8080+ let yaml = Yamle.yaml_of_string ~resolve_aliases ~max_nodes ~max_depth content in
9381 Format.printf "%a@." Yamle.pp_yaml yaml
9482 with
9583 | Yamle.Yamle_error e ->
9684 Printf.eprintf "Error: %s\n" (Yamle.Error.to_string e);
9785 exit 1
98869999-let process_file ~format ~all filename =
8787+let process_file ~format ~all ~resolve_aliases ~max_nodes ~max_depth filename =
10088 let content =
10189 if filename = "-" then
10290 In_channel.input_all In_channel.stdin
10391 else
10492 In_channel.with_open_text filename In_channel.input_all
10593 in
106106- process_string ~format ~all content
9494+ process_string ~format ~all ~resolve_aliases ~max_nodes ~max_depth content
9595+9696+let run format all resolve_aliases max_nodes max_depth files =
9797+ let files = if files = [] then ["-"] else files in
9898+ List.iter (process_file ~format ~all ~resolve_aliases ~max_nodes ~max_depth) files;
9999+ `Ok ()
100100+101101+(* Command-line arguments *)
102102+103103+let format_arg =
104104+ let doc = "Output format: yaml (default), json, flow, or debug." in
105105+ let formats = [
106106+ ("yaml", Yaml);
107107+ ("json", Json);
108108+ ("flow", Flow);
109109+ ("debug", Debug);
110110+ ] in
111111+ Arg.(value & opt (enum formats) Yaml & info ["format"; "f"] ~docv:"FORMAT" ~doc)
112112+113113+let json_arg =
114114+ let doc = "Output as JSON (shorthand for --format=json)." in
115115+ Arg.(value & flag & info ["json"] ~doc)
116116+117117+let flow_arg =
118118+ let doc = "Output in flow style (shorthand for --format=flow)." in
119119+ Arg.(value & flag & info ["flow"] ~doc)
120120+121121+let debug_arg =
122122+ let doc = "Output internal representation (shorthand for --format=debug)." in
123123+ Arg.(value & flag & info ["debug"] ~doc)
124124+125125+let all_arg =
126126+ let doc = "Output all documents (for multi-document YAML)." in
127127+ Arg.(value & flag & info ["all"; "a"] ~doc)
128128+129129+let no_resolve_aliases_arg =
130130+ let doc = "Don't resolve aliases (keep them as references)." in
131131+ Arg.(value & flag & info ["no-resolve-aliases"] ~doc)
132132+133133+let max_nodes_arg =
134134+ let doc = "Maximum number of nodes during alias expansion (default: 10000000). \
135135+ Protection against billion laughs attack." in
136136+ Arg.(value & opt int Yamle.default_max_alias_nodes & info ["max-nodes"] ~docv:"N" ~doc)
107137108108-let () =
109109- let files = ref [] in
110110- let format = ref Yaml in
111111- let show_help = ref false in
112112- let all = ref false in
138138+let max_depth_arg =
139139+ let doc = "Maximum alias nesting depth (default: 100). \
140140+ Protection against deeply nested alias chains." in
141141+ Arg.(value & opt int Yamle.default_max_alias_depth & info ["max-depth"] ~docv:"N" ~doc)
113142114114- (* Parse arguments *)
115115- let args = Array.to_list Sys.argv |> List.tl in
116116- List.iter (fun arg ->
117117- match arg with
118118- | "--help" | "-h" -> show_help := true
119119- | "--all" -> all := true
120120- | "--json" -> format := Json
121121- | "--flow" -> format := Flow
122122- | "--debug" -> format := Debug
123123- | s when String.length s > 0 && s.[0] = '-' ->
124124- Printf.eprintf "Unknown option: %s\n" s;
125125- usage ()
126126- | filename -> files := filename :: !files
127127- ) args;
143143+let files_arg =
144144+ let doc = "YAML file(s) to process. Use '-' for stdin." in
145145+ Arg.(value & pos_all file [] & info [] ~docv:"FILE" ~doc)
146146+147147+let combined_format format json flow debug =
148148+ if json then Json
149149+ else if flow then Flow
150150+ else if debug then Debug
151151+ else format
128152129129- if !show_help then usage ();
153153+let term =
154154+ let combine format json flow debug all no_resolve max_nodes max_depth files =
155155+ let format = combined_format format json flow debug in
156156+ let resolve_aliases = not no_resolve in
157157+ run format all resolve_aliases max_nodes max_depth files
158158+ in
159159+ Term.(ret (const combine $ format_arg $ json_arg $ flow_arg $ debug_arg $
160160+ all_arg $ no_resolve_aliases_arg $ max_nodes_arg $ max_depth_arg $ files_arg))
130161131131- let files = List.rev !files in
162162+let info =
163163+ let doc = "Parse and reprint YAML files" in
164164+ let man = [
165165+ `S Manpage.s_description;
166166+ `P "$(tname) parses YAML files and reprints them in various formats. \
167167+ It can be used to validate YAML, convert between styles, or convert to JSON.";
168168+ `S Manpage.s_examples;
169169+ `P "Parse and reprint a YAML file:";
170170+ `Pre " $(tname) config.yaml";
171171+ `P "Convert YAML to JSON:";
172172+ `Pre " $(tname) --json config.yaml";
173173+ `P "Process multi-document YAML:";
174174+ `Pre " $(tname) --all multi.yaml";
175175+ `P "Limit alias expansion (protection against malicious YAML):";
176176+ `Pre " $(tname) --max-nodes 1000 --max-depth 10 untrusted.yaml";
177177+ `S Manpage.s_bugs;
178178+ `P "Report bugs at https://github.com/avsm/ocaml-yaml/issues";
179179+ ] in
180180+ Cmd.info "yamlcat" ~version:"0.1.0" ~doc ~man
132181133133- if files = [] then
134134- (* Read from stdin *)
135135- process_file ~format:!format ~all:!all "-"
136136- else
137137- List.iter (process_file ~format:!format ~all:!all) files
182182+let () = exit (Cmd.eval (Cmd.v info term))
+6
yaml/ocaml-yamle/lib/error.ml
···4545 | Type_mismatch of string * string (** expected, got *)
4646 | Unresolved_alias of string
4747 | Key_not_found of string
4848+ | Alias_expansion_node_limit of int (** max nodes exceeded *)
4949+ | Alias_expansion_depth_limit of int (** max depth exceeded *)
48504951 (* Emitter errors *)
5052 | Invalid_encoding of string
···138140 Printf.sprintf "type mismatch: expected %s, got %s" expected got
139141 | Unresolved_alias s -> Printf.sprintf "unresolved alias: *%s" s
140142 | Key_not_found s -> Printf.sprintf "key not found: %s" s
143143+ | Alias_expansion_node_limit n ->
144144+ Printf.sprintf "alias expansion exceeded node limit (%d nodes)" n
145145+ | Alias_expansion_depth_limit n ->
146146+ Printf.sprintf "alias expansion exceeded depth limit (%d levels)" n
141147 | Invalid_encoding s -> Printf.sprintf "invalid encoding: %s" s
142148 | Scalar_contains_invalid_chars s ->
143149 Printf.sprintf "scalar contains invalid characters: %s" s
+40-11
yaml/ocaml-yamle/lib/loader.ml
···127127 pending_key = None;
128128 } :: rest)
129129130130-(** Load single document as Value *)
131131-let value_of_string s =
130130+(** Load single document as Value.
131131+132132+ @param resolve_aliases Whether to resolve aliases (default true)
133133+ @param max_nodes Maximum nodes during alias expansion (default 10M)
134134+ @param max_depth Maximum alias nesting depth (default 100)
135135+*)
136136+let value_of_string
137137+ ?(resolve_aliases = true)
138138+ ?(max_nodes = Yaml.default_max_alias_nodes)
139139+ ?(max_depth = Yaml.default_max_alias_depth)
140140+ s =
132141 let parser = Parser.of_string s in
133142 let state = create_state () in
134143 Parser.iter (process_event state) parser;
···138147 (match Document.root doc with
139148 | None -> `Null
140149 | Some yaml ->
141141- let yaml = Yaml.resolve_aliases yaml in
142142- Yaml.to_value yaml)
150150+ Yaml.to_value ~resolve_aliases_first:resolve_aliases ~max_nodes ~max_depth yaml)
143151 | _ -> Error.raise Multiple_documents
144152145145-(** Load single document as Yaml *)
146146-let yaml_of_string s =
153153+(** Load single document as Yaml.
154154+155155+ @param resolve_aliases Whether to resolve aliases (default false for Yaml.t)
156156+ @param max_nodes Maximum nodes during alias expansion (default 10M)
157157+ @param max_depth Maximum alias nesting depth (default 100)
158158+*)
159159+let yaml_of_string
160160+ ?(resolve_aliases = false)
161161+ ?(max_nodes = Yaml.default_max_alias_nodes)
162162+ ?(max_depth = Yaml.default_max_alias_depth)
163163+ s =
147164 let parser = Parser.of_string s in
148165 let state = create_state () in
149166 Parser.iter (process_event state) parser;
···152169 | [doc] ->
153170 (match Document.root doc with
154171 | None -> `Scalar (Scalar.make "")
155155- | Some yaml -> yaml)
172172+ | Some yaml ->
173173+ if resolve_aliases then
174174+ Yaml.resolve_aliases ~max_nodes ~max_depth yaml
175175+ else
176176+ yaml)
156177 | _ -> Error.raise Multiple_documents
157178158179(** Load all documents *)
···162183 Parser.iter (process_event state) parser;
163184 List.rev state.documents
164185165165-(** Load single Value from parser *)
166166-let load_value parser =
186186+(** Load single Value from parser.
187187+188188+ @param resolve_aliases Whether to resolve aliases (default true)
189189+ @param max_nodes Maximum nodes during alias expansion (default 10M)
190190+ @param max_depth Maximum alias nesting depth (default 100)
191191+*)
192192+let load_value
193193+ ?(resolve_aliases = true)
194194+ ?(max_nodes = Yaml.default_max_alias_nodes)
195195+ ?(max_depth = Yaml.default_max_alias_depth)
196196+ parser =
167197 let state = create_state () in
168198 let rec loop () =
169199 match Parser.next parser with
···178208 Some (match Document.root doc with
179209 | None -> `Null
180210 | Some yaml ->
181181- let yaml = Yaml.resolve_aliases yaml in
182182- Yaml.to_value yaml)
211211+ Yaml.to_value ~resolve_aliases_first:resolve_aliases ~max_nodes ~max_depth yaml)
183212 | [] -> None)
184213 | Event.Stream_end -> None
185214 | _ -> loop ()
+89-56
yaml/ocaml-yamle/lib/yaml.ml
···5050 (`Scalar (Scalar.make k), of_value v)
5151 ) pairs))
52525353-(** Convert to JSON-compatible Value *)
5353+(** Default limits for alias expansion (protection against billion laughs attack) *)
5454+let default_max_alias_nodes = 10_000_000
5555+let default_max_alias_depth = 100
5656+5757+(** Resolve aliases by replacing them with referenced nodes.
5858+5959+ @param max_nodes Maximum number of nodes to create during expansion (default 10M)
6060+ @param max_depth Maximum depth of alias-within-alias resolution (default 100)
6161+ @raise Alias_expansion_node_limit if max_nodes is exceeded
6262+ @raise Alias_expansion_depth_limit if max_depth is exceeded
6363+*)
6464+let resolve_aliases ?(max_nodes = default_max_alias_nodes) ?(max_depth = default_max_alias_depth) (root : t) : t =
6565+ let anchors = Hashtbl.create 16 in
6666+ let node_count = ref 0 in
6767+6868+ (* Check node limit *)
6969+ let check_node_limit () =
7070+ incr node_count;
7171+ if !node_count > max_nodes then
7272+ Error.raise (Alias_expansion_node_limit max_nodes)
7373+ in
7474+7575+ (* First pass: collect all anchors *)
7676+ let rec collect (v : t) =
7777+ match v with
7878+ | `Scalar s ->
7979+ (match Scalar.anchor s with
8080+ | Some name -> Hashtbl.replace anchors name v
8181+ | None -> ())
8282+ | `Alias _ -> ()
8383+ | `A seq ->
8484+ (match Sequence.anchor seq with
8585+ | Some name -> Hashtbl.replace anchors name v
8686+ | None -> ());
8787+ List.iter collect (Sequence.members seq)
8888+ | `O map ->
8989+ (match Mapping.anchor map with
9090+ | Some name -> Hashtbl.replace anchors name v
9191+ | None -> ());
9292+ List.iter (fun (k, v) -> collect k; collect v) (Mapping.members map)
9393+ in
9494+ collect root;
54955555-let rec to_value (v : t) : Value.t =
5656- match v with
5757- | `Scalar s -> scalar_to_value s
5858- | `Alias name -> Error.raise (Unresolved_alias name)
5959- | `A seq -> `A (List.map to_value (Sequence.members seq))
6060- | `O map ->
6161- `O (List.map (fun (k, v) ->
6262- let key = match k with
6363- | `Scalar s -> Scalar.value s
6464- | _ -> Error.raise (Type_mismatch ("string key", "complex key"))
6565- in
6666- (key, to_value v)
6767- ) (Mapping.members map))
9696+ (* Second pass: resolve aliases with depth tracking *)
9797+ let rec resolve ~depth (v : t) : t =
9898+ check_node_limit ();
9999+ match v with
100100+ | `Scalar _ -> v
101101+ | `Alias name ->
102102+ if depth >= max_depth then
103103+ Error.raise (Alias_expansion_depth_limit max_depth);
104104+ (match Hashtbl.find_opt anchors name with
105105+ | Some target -> resolve ~depth:(depth + 1) target
106106+ | None -> Error.raise (Undefined_alias name))
107107+ | `A seq ->
108108+ `A (Sequence.map (resolve ~depth) seq)
109109+ | `O map ->
110110+ `O (Mapping.make
111111+ ?anchor:(Mapping.anchor map)
112112+ ?tag:(Mapping.tag map)
113113+ ~implicit:(Mapping.implicit map)
114114+ ~style:(Mapping.style map)
115115+ (List.map (fun (k, v) -> (resolve ~depth k, resolve ~depth v)) (Mapping.members map)))
116116+ in
117117+ resolve ~depth:0 root
6811869119(** Convert scalar to JSON value based on content *)
7070-and scalar_to_value s =
120120+let rec scalar_to_value s =
71121 let value = Scalar.value s in
72122 let tag = Scalar.tag s in
73123 let style = Scalar.style s in
···161211 (* Not a number - it's a string *)
162212 `String value
163213164164-(** Resolve aliases by replacing them with referenced nodes *)
214214+(** Convert to JSON-compatible Value.
165215166166-let resolve_aliases (root : t) : t =
167167- let anchors = Hashtbl.create 16 in
168168-169169- (* First pass: collect all anchors *)
170170- let rec collect (v : t) =
216216+ @param resolve_aliases_first Whether to resolve aliases before conversion (default true)
217217+ @param max_nodes Maximum nodes during alias expansion (default 10M)
218218+ @param max_depth Maximum alias nesting depth (default 100)
219219+ @raise Unresolved_alias if resolve_aliases_first is false and an alias is encountered
220220+*)
221221+let to_value
222222+ ?(resolve_aliases_first = true)
223223+ ?(max_nodes = default_max_alias_nodes)
224224+ ?(max_depth = default_max_alias_depth)
225225+ (v : t) : Value.t =
226226+ let v = if resolve_aliases_first then resolve_aliases ~max_nodes ~max_depth v else v in
227227+ let rec convert (v : t) : Value.t =
171228 match v with
172172- | `Scalar s ->
173173- (match Scalar.anchor s with
174174- | Some name -> Hashtbl.replace anchors name v
175175- | None -> ())
176176- | `Alias _ -> ()
177177- | `A seq ->
178178- (match Sequence.anchor seq with
179179- | Some name -> Hashtbl.replace anchors name v
180180- | None -> ());
181181- List.iter collect (Sequence.members seq)
229229+ | `Scalar s -> scalar_to_value s
230230+ | `Alias name -> Error.raise (Unresolved_alias name)
231231+ | `A seq -> `A (List.map convert (Sequence.members seq))
182232 | `O map ->
183183- (match Mapping.anchor map with
184184- | Some name -> Hashtbl.replace anchors name v
185185- | None -> ());
186186- List.iter (fun (k, v) -> collect k; collect v) (Mapping.members map)
187187- in
188188- collect root;
189189-190190- (* Second pass: resolve aliases *)
191191- let rec resolve (v : t) : t =
192192- match v with
193193- | `Scalar _ -> v
194194- | `Alias name ->
195195- (match Hashtbl.find_opt anchors name with
196196- | Some target -> resolve target
197197- | None -> Error.raise (Undefined_alias name))
198198- | `A seq ->
199199- `A (Sequence.map resolve seq)
200200- | `O map ->
201201- `O (Mapping.make
202202- ?anchor:(Mapping.anchor map)
203203- ?tag:(Mapping.tag map)
204204- ~implicit:(Mapping.implicit map)
205205- ~style:(Mapping.style map)
206206- (List.map (fun (k, v) -> (resolve k, resolve v)) (Mapping.members map)))
233233+ `O (List.map (fun (k, v) ->
234234+ let key = match k with
235235+ | `Scalar s -> Scalar.value s
236236+ | _ -> Error.raise (Type_mismatch ("string key", "complex key"))
237237+ in
238238+ (key, convert v)
239239+ ) (Mapping.members map))
207240 in
208208- resolve root
241241+ convert v
209242210243(** Get anchor from any node *)
211244let anchor (v : t) =
+23-5
yaml/ocaml-yamle/lib/yamle.ml
···1212type error = Error.t
1313exception Yamle_error = Error.Yamle_error
14141515+(** {1 Alias expansion limits (protection against billion laughs attack)} *)
1616+1717+let default_max_alias_nodes = Yaml.default_max_alias_nodes
1818+let default_max_alias_depth = Yaml.default_max_alias_depth
1919+1520(** {1 JSON-compatible parsing} *)
16211717-let of_string s = Loader.value_of_string s
2222+let of_string
2323+ ?(resolve_aliases = true)
2424+ ?(max_nodes = default_max_alias_nodes)
2525+ ?(max_depth = default_max_alias_depth)
2626+ s =
2727+ Loader.value_of_string ~resolve_aliases ~max_nodes ~max_depth s
18281929let documents_of_string s = Loader.documents_of_string s
2030···35453646(** {1 YAML-specific parsing} *)
37473838-let yaml_of_string s = Loader.yaml_of_string s
4848+let yaml_of_string
4949+ ?(resolve_aliases = false)
5050+ ?(max_nodes = default_max_alias_nodes)
5151+ ?(max_depth = default_max_alias_depth)
5252+ s =
5353+ Loader.yaml_of_string ~resolve_aliases ~max_nodes ~max_depth s
39544055(** {1 YAML-specific emission} *)
4156···67826883(** {1 Conversion} *)
69847070-let to_json yaml =
7171- let yaml = Yaml.resolve_aliases yaml in
7272- Yaml.to_value yaml
8585+let to_json
8686+ ?(resolve_aliases = true)
8787+ ?(max_nodes = default_max_alias_nodes)
8888+ ?(max_depth = default_max_alias_depth)
8989+ yaml =
9090+ Yaml.to_value ~resolve_aliases_first:resolve_aliases ~max_nodes ~max_depth yaml
73917492let of_json value = Yaml.of_value value
7593