···2424 | Invalid_yaml_version of string
2525 | Invalid_tag_directive of string
2626 | Reserved_directive of string
2727+ | Illegal_flow_key_line (** Key and : must be on same line in flow context *)
27282829 (* Parser errors *)
2930 | Unexpected_token of string
···123124 | Invalid_yaml_version s -> Printf.sprintf "invalid YAML version: %s" s
124125 | Invalid_tag_directive s -> Printf.sprintf "invalid TAG directive: %s" s
125126 | Reserved_directive s -> Printf.sprintf "reserved directive: %s" s
127127+ | Illegal_flow_key_line -> "key and ':' must be on the same line in flow context"
126128 | Unexpected_token s -> Printf.sprintf "unexpected token: %s" s
127129 | Expected_document_start -> "expected document start '---'"
128130 | Expected_document_end -> "expected document end '...'"
+6
yaml/ocaml-yamle/lib/parser.ml
···395395 push_state t Block_mapping_value;
396396 parse_node t ~block:true ~indentless:true
397397 end
398398+ (* Handle value without explicit key - key is empty/null *)
399399+ | Token.Value ->
400400+ t.state <- Block_mapping_value;
401401+ empty_scalar_event ~anchor:None ~tag:None tok.span
398402 | Token.Block_end ->
399403 skip_token t;
400404 t.state <- pop_state t;
401405 Event.Mapping_end, tok.span
402406 | _ ->
407407+ Printf.eprintf "DEBUG parser parse_block_mapping_key: unexpected token at %d:%d\n%!"
408408+ tok.span.start.line tok.span.start.column;
403409 Error.raise_span tok.span Expected_key
404410405411(** Parse block mapping value *)
+198-117
yaml/ocaml-yamle/lib/scanner.ml
···3131 mutable document_has_content : bool; (** True if we've emitted content tokens in current document *)
3232 mutable adjacent_value_allowed_at : Position.t option; (** Position where adjacent : is allowed *)
3333 mutable pending_value : bool; (** True if we've emitted a KEY and are waiting for VALUE *)
3434+ mutable flow_mapping_stack : bool list; (** Stack of whether each flow level is a mapping *)
3435}
35363637let create input =
···5051 document_has_content = false;
5152 adjacent_value_allowed_at = None;
5253 pending_value = false;
5454+ flow_mapping_stack = [];
5355 }
54565557let of_string s = create (Input.of_string s)
···173175let save_simple_key t =
174176 if t.allow_simple_key then begin
175177 (* A simple key is required only if we're in a block context,
176176- at the current indentation level, AND we have an active indent *)
178178+ at the current indentation level, AND the current indent needs a block end.
179179+ This matches saphyr's logic and prevents false positives for values. *)
177180 let required = t.flow_level = 0 &&
178178- t.indent_stack <> [] &&
179179- current_indent t = column t - 1 in
181181+ match t.indent_stack with
182182+ | { indent; needs_block_end = true; _ } :: _ ->
183183+ indent = column t
184184+ | _ -> false
185185+ in
180186 let sk = {
181187 sk_possible = true;
182188 sk_required = required;
···354360 (* Just ! followed by suffix *)
355361 ("!", Buffer.contents buf ^ scan_tag_suffix t))
356362 in
363363+ (* Validate that tag is followed by whitespace, break, or (in flow) flow indicator *)
364364+ (match Input.peek t.input with
365365+ | None -> () (* EOF is ok *)
366366+ | Some c when Input.is_whitespace c || Input.is_break c -> ()
367367+ | Some c when t.flow_level > 0 && Input.is_flow_indicator c -> ()
368368+ | _ -> Error.raise_at start (Invalid_tag "expected whitespace or line break after tag"));
357369 let span = Span.make ~start ~stop:(Input.mark t.input) in
358370 (handle, suffix, span)
359371···362374 let start = Input.mark t.input in
363375 ignore (Input.next t.input); (* consume opening single-quote *)
364376 let buf = Buffer.create 64 in
377377+ let whitespace = Buffer.create 16 in (* Track trailing whitespace *)
378378+379379+ let flush_whitespace () =
380380+ if Buffer.length whitespace > 0 then begin
381381+ Buffer.add_buffer buf whitespace;
382382+ Buffer.clear whitespace
383383+ end
384384+ in
385385+365386 let rec loop () =
366387 match Input.peek t.input with
367388 | None -> Error.raise_at start Unclosed_single_quote
···370391 (* Check for escaped quote ('') *)
371392 (match Input.peek t.input with
372393 | Some '\'' ->
394394+ flush_whitespace ();
373395 Buffer.add_char buf '\'';
374396 ignore (Input.next t.input);
375397 loop ()
376376- | _ -> ())
398398+ | _ ->
399399+ (* End of string - flush any trailing whitespace *)
400400+ flush_whitespace ())
401401+ | Some ' ' | Some '\t' ->
402402+ (* Track whitespace - don't add to buf yet *)
403403+ Buffer.add_char whitespace (Option.get (Input.peek t.input));
404404+ ignore (Input.next t.input);
405405+ loop ()
377406 | Some '\n' | Some '\r' ->
407407+ (* Discard trailing whitespace before line break *)
408408+ Buffer.clear whitespace;
378409 Input.consume_break t.input;
379379- (* Fold line break to space unless at start of content *)
380380- if Buffer.length buf > 0 then
381381- Buffer.add_char buf ' ';
382410 (* Skip leading whitespace on next line *)
383411 while Input.next_is_blank t.input do
384412 ignore (Input.next t.input)
385413 done;
386386- (* Check for document boundary - this terminates the quoted string *)
414414+ (* Check for document boundary *)
387415 if Input.at_document_boundary t.input then
388416 Error.raise_at start Unclosed_single_quote;
417417+ (* Count empty lines (consecutive line breaks) *)
418418+ let empty_lines = ref 0 in
419419+ while Input.next_is_break t.input do
420420+ incr empty_lines;
421421+ Input.consume_break t.input;
422422+ while Input.next_is_blank t.input do
423423+ ignore (Input.next t.input)
424424+ done;
425425+ if Input.at_document_boundary t.input then
426426+ Error.raise_at start Unclosed_single_quote
427427+ done;
428428+ (* Apply folding rules *)
429429+ if !empty_lines > 0 then begin
430430+ (* Empty lines: preserve as newlines *)
431431+ for _ = 1 to !empty_lines do
432432+ Buffer.add_char buf '\n'
433433+ done
434434+ end else
435435+ (* Single break: fold to space (even at start of string) *)
436436+ Buffer.add_char buf ' ';
389437 loop ()
390438 | Some c ->
439439+ flush_whitespace ();
391440 Buffer.add_char buf c;
392441 ignore (Input.next t.input);
393442 loop ()
···711760 (Invalid_block_scalar_header "expected newline after header");
712761713762 let base_indent = current_indent t in
763763+ (* base_indent is the indent level from the stack, -1 if empty.
764764+ It's used directly for comparisons in implicit indent case. *)
714765 let content_indent = ref (
715766 match !explicit_indent with
716767 | Some n ->
717717- (* base_indent is a column (1-indexed), convert to indent level (0-indexed) *)
768768+ (* Explicit indent: base_indent is 1-indexed column, convert to 0-indexed.
769769+ content_indent = (base_indent - 1) + n, but at least n for document level. *)
718770 let base_level = max 0 (base_indent - 1) in
719771 base_level + n
720772 | None -> 0 (* Will be determined by first non-empty line *)
···745797 (* Line starts with fewer spaces than content_indent - dedented *)
746798 !spaces_skipped
747799 end else if Input.next_is_blank t.input then begin
748748- (* Line has spaces beyond content_indent - check if rest is only blanks *)
749749- let idx = ref 0 in
750750- let is_empty = ref false in
751751- while not !is_empty do
752752- match Input.peek_nth t.input !idx with
753753- | Some c when Input.is_blank c -> incr idx
754754- | Some c when Input.is_break c -> is_empty := true
755755- | _ -> is_empty := true (* Not a break, so has content *)
756756- done;
757757- (* Check if we found a break (empty line) or content *)
758758- (match Input.peek_nth t.input (!idx) with
759759- | None | Some '\n' | Some '\r' ->
760760- (* Empty line - preserve spaces for literal scalars *)
761761- if literal then begin
762762- while Input.next_is_blank t.input do
763763- Buffer.add_char trailing_breaks ' ';
764764- ignore (Input.next t.input)
765765- done
766766- end else begin
767767- while Input.next_is_blank t.input do
768768- ignore (Input.next t.input)
769769- done
770770- end;
771771- Buffer.add_char trailing_breaks '\n';
772772- Input.consume_break t.input;
773773- skip_to_content_indent ()
774774- | _ ->
775775- (* Has content *)
776776- !content_indent)
800800+ (* Line has spaces/tabs beyond content_indent - could be whitespace content or empty line.
801801+ For literal scalars, whitespace-only lines ARE content (not empty).
802802+ For folded scalars, whitespace-only lines that are "more indented" are preserved. *)
803803+ if literal then
804804+ (* Literal: whitespace beyond content_indent is content, let read_lines handle it *)
805805+ !content_indent
806806+ else begin
807807+ (* Folded: check if rest is only blanks *)
808808+ let idx = ref 0 in
809809+ while match Input.peek_nth t.input !idx with
810810+ | Some c when Input.is_blank c -> incr idx; true
811811+ | _ -> false
812812+ do () done;
813813+ match Input.peek_nth t.input (!idx) with
814814+ | None | Some '\n' | Some '\r' ->
815815+ (* Empty/whitespace-only line in folded - skip spaces *)
816816+ while Input.next_is_blank t.input do
817817+ ignore (Input.next t.input)
818818+ done;
819819+ Buffer.add_char trailing_breaks '\n';
820820+ Input.consume_break t.input;
821821+ skip_to_content_indent ()
822822+ | _ ->
823823+ (* Has non-whitespace content *)
824824+ !content_indent
825825+ end
777826 end else
778827 !content_indent
779828 end else begin
780780- (* Implicit indent - skip empty lines without consuming spaces *)
829829+ (* Implicit indent - skip empty lines without consuming spaces.
830830+ Note: Only SPACES count as indentation. Tabs are content, not indentation.
831831+ So we only check for spaces when determining if a line is "empty". *)
781832 if Input.next_is_break t.input then begin
782833 Buffer.add_char trailing_breaks '\n';
783834 Input.consume_break t.input;
784835 skip_to_content_indent ()
785785- end else if Input.next_is_blank t.input then begin
786786- (* Check if line is empty *)
836836+ end else if Input.next_is (( = ) ' ') t.input then begin
837837+ (* Check if line is empty (only spaces before break) *)
787838 let idx = ref 0 in
788839 while match Input.peek_nth t.input !idx with
789789- | Some c when Input.is_blank c -> incr idx; true
840840+ | Some ' ' -> incr idx; true
790841 | _ -> false
791842 do () done;
792843 match Input.peek_nth t.input (!idx) with
793844 | None | Some '\n' | Some '\r' ->
794794- (* Empty line *)
795795- while Input.next_is_blank t.input do
845845+ (* Line has only spaces - empty line *)
846846+ while Input.next_is (( = ) ' ') t.input do
796847 ignore (Input.next t.input)
797848 done;
798849 Buffer.add_char trailing_breaks '\n';
799850 Input.consume_break t.input;
800851 skip_to_content_indent ()
801852 | _ ->
802802- (* Has content - don't consume anything, return 0 as we haven't skipped *)
853853+ (* Has content (including tabs which are content, not indentation) *)
803854 0
804855 end else
805805- (* Not at break or blank - return 0 *)
856856+ (* Not at break or space - could be tab (content) or other *)
806857 0
807858 end
808859 in
···827878828879 (* Determine content indent from first content line (implicit case) *)
829880 let first_line = !content_indent = 0 in
830830- if !content_indent = 0 then begin
831831- if line_indent <= base_indent then begin
832832- (* No content - restore position conceptually *)
833833- ()
834834- end else
835835- content_indent := line_indent
836836- end;
881881+ (* base_indent is 1-indexed column, convert to 0-indexed for comparison with line_indent.
882882+ If base_indent = -1 (empty stack), then base_level = -1 means col 0 is valid. *)
883883+ let base_level = base_indent - 1 in
884884+ let should_process =
885885+ if !content_indent = 0 then begin
886886+ (* For implicit indent, content must be more indented than base_level. *)
887887+ if line_indent <= base_level then
888888+ false (* No content - first line not indented enough *)
889889+ else begin
890890+ content_indent := line_indent;
891891+ true
892892+ end
893893+ end else if line_indent < !content_indent then
894894+ false (* Dedented - done with content *)
895895+ else
896896+ true
897897+ in
837898838838- if line_indent < !content_indent then begin
839839- (* Dedented - done with content *)
840840- ()
841841- end else begin
842842- (* Check if current line is "more indented" (has extra indent beyond content_indent) *)
843843- let trailing_blank = line_indent > !content_indent in
899899+ if should_process then begin
900900+ (* Check if current line is "more indented" (has extra indent or starts with whitespace).
901901+ For folded scalars, lines that start with any whitespace (space or tab) after the
902902+ content indentation are "more indented" and preserve breaks.
903903+ Note: we check Input.next_is_blank BEFORE reading content to see if content starts with whitespace. *)
904904+ let trailing_blank = line_indent > !content_indent || Input.next_is_blank t.input in
844905845906 (* Add trailing breaks to buffer *)
846907 if Buffer.length buf > 0 then begin
···10551116 let span = Span.make ~start ~stop:(Input.mark t.input) in
10561117 let token = if indicator = "---" then Token.Document_start else Token.Document_end in
10571118 (* Reset document content flag after document end marker *)
10581058- if indicator = "..." then
11191119+ if indicator = "..." then begin
10591120 t.document_has_content <- false;
11211121+ (* After document end marker, skip whitespace and check for end of line or comment *)
11221122+ while Input.next_is_blank t.input do ignore (Input.next t.input) done;
11231123+ (match Input.peek t.input with
11241124+ | None -> () (* EOF is ok *)
11251125+ | Some c when Input.is_break c -> ()
11261126+ | Some '#' -> () (* Comment is ok *)
11271127+ | _ -> Error.raise_at start (Invalid_directive "content not allowed after document end marker on same line"))
11281128+ end;
10601129 emit t span token
1061113010621131and fetch_directive t =
···10791148 if t.flow_level = 0 then
10801149 t.flow_indent <- column t;
10811150 t.flow_level <- t.flow_level + 1;
11511151+ (* Track whether this is a mapping or sequence *)
11521152+ let is_mapping = (token_type = Token.Flow_mapping_start) in
11531153+ t.flow_mapping_stack <- is_mapping :: t.flow_mapping_stack;
10821154 t.allow_simple_key <- true;
10831155 t.simple_keys <- None :: t.simple_keys;
10841156 t.document_has_content <- true;
···10901162and fetch_flow_collection_end t token_type =
10911163 remove_simple_key t;
10921164 t.flow_level <- t.flow_level - 1;
11651165+ t.flow_mapping_stack <- (match t.flow_mapping_stack with _ :: rest -> rest | [] -> []);
10931166 t.simple_keys <- (match t.simple_keys with _ :: rest -> rest | [] -> []);
10941167 t.allow_simple_key <- false;
10951168 let start = Input.mark t.input in
···12001273 | None -> false)
1201127412021275and fetch_value t =
12761276+ let start = Input.mark t.input in
12031277 (* Check for simple key *)
12041204- (match t.simple_keys with
12051205- | Some sk :: _ when sk.sk_possible ->
12061206- (* Insert KEY token before the simple key value *)
12071207- let key_span = Span.point sk.sk_position in
12081208- let key_token = { Token.token = Token.Key; span = key_span } in
12091209- (* We need to insert at the right position *)
12101210- let tokens = Queue.to_seq t.tokens |> Array.of_seq in
12111211- Queue.clear t.tokens;
12121212- let insert_pos = sk.sk_token_number - t.tokens_taken in
12131213- Array.iteri (fun i tok ->
12141214- if i = insert_pos then Queue.add key_token t.tokens;
12151215- Queue.add tok t.tokens
12161216- ) tokens;
12171217- if insert_pos >= Array.length tokens then
12181218- Queue.add key_token t.tokens;
12191219- t.token_number <- t.token_number + 1;
12201220- t.pending_value <- true; (* We've inserted a KEY token, now waiting for VALUE *)
12211221- (* Roll indent for implicit block mapping *)
12221222- if t.flow_level = 0 then begin
12231223- let col = sk.sk_position.column in
12241224- if roll_indent t col ~sequence:false then begin
12251225- let span = Span.point sk.sk_position in
12261226- (* Insert block mapping start before key *)
12271227- let bm_token = { Token.token = Token.Block_mapping_start; span } in
12281228- let tokens = Queue.to_seq t.tokens |> Array.of_seq in
12291229- Queue.clear t.tokens;
12301230- Array.iteri (fun i tok ->
12311231- if i = insert_pos then Queue.add bm_token t.tokens;
12321232- Queue.add tok t.tokens
12331233- ) tokens;
12341234- if insert_pos >= Array.length tokens then
12351235- Queue.add bm_token t.tokens;
12361236- t.token_number <- t.token_number + 1
12371237- end
12381238- end;
12391239- t.simple_keys <- None :: (List.tl t.simple_keys)
12401240- | _ ->
12411241- (* No simple key - this is a complex value (or empty key) *)
12421242- if t.flow_level = 0 then begin
12431243- if not t.allow_simple_key then
12441244- Error.raise_at (Input.mark t.input) Expected_key;
12451245- let col = column t in
12461246- if roll_indent t col ~sequence:false then begin
12471247- let span = Span.point (Input.mark t.input) in
12481248- emit t span Token.Block_mapping_start
12491249- end;
12501250- (* Emit KEY token for empty key case (e.g., ": value") only if we don't already have a pending KEY *)
12511251- if not t.pending_value then begin
12521252- let span = Span.point (Input.mark t.input) in
12531253- emit t span Token.Key;
12541254- t.pending_value <- true
12551255- end
12561256- end);
12781278+ let used_simple_key =
12791279+ match t.simple_keys with
12801280+ | Some sk :: _ when sk.sk_possible ->
12811281+ (* In implicit flow mapping (inside a flow sequence), key and : must be on the same line.
12821282+ In explicit flow mapping { }, key and : can span lines. *)
12831283+ let is_implicit_flow_mapping = match t.flow_mapping_stack with
12841284+ | false :: _ -> true (* false = we're in a sequence, so any mapping is implicit *)
12851285+ | _ -> false
12861286+ in
12871287+ if is_implicit_flow_mapping && sk.sk_position.line < (Input.position t.input).line then
12881288+ Error.raise_at start Illegal_flow_key_line;
12891289+ (* Insert KEY token before the simple key value *)
12901290+ let key_span = Span.point sk.sk_position in
12911291+ let key_token = { Token.token = Token.Key; span = key_span } in
12921292+ (* We need to insert at the right position *)
12931293+ let tokens = Queue.to_seq t.tokens |> Array.of_seq in
12941294+ Queue.clear t.tokens;
12951295+ let insert_pos = sk.sk_token_number - t.tokens_taken in
12961296+ Array.iteri (fun i tok ->
12971297+ if i = insert_pos then Queue.add key_token t.tokens;
12981298+ Queue.add tok t.tokens
12991299+ ) tokens;
13001300+ if insert_pos >= Array.length tokens then
13011301+ Queue.add key_token t.tokens;
13021302+ t.token_number <- t.token_number + 1;
13031303+ t.pending_value <- true; (* We've inserted a KEY token, now waiting for VALUE *)
13041304+ (* Roll indent for implicit block mapping *)
13051305+ if t.flow_level = 0 then begin
13061306+ let col = sk.sk_position.column in
13071307+ if roll_indent t col ~sequence:false then begin
13081308+ let span = Span.point sk.sk_position in
13091309+ (* Insert block mapping start before key *)
13101310+ let bm_token = { Token.token = Token.Block_mapping_start; span } in
13111311+ let tokens = Queue.to_seq t.tokens |> Array.of_seq in
13121312+ Queue.clear t.tokens;
13131313+ Array.iteri (fun i tok ->
13141314+ if i = insert_pos then Queue.add bm_token t.tokens;
13151315+ Queue.add tok t.tokens
13161316+ ) tokens;
13171317+ if insert_pos >= Array.length tokens then
13181318+ Queue.add bm_token t.tokens;
13191319+ t.token_number <- t.token_number + 1
13201320+ end
13211321+ end;
13221322+ t.simple_keys <- None :: (List.tl t.simple_keys);
13231323+ true
13241324+ | _ ->
13251325+ (* No simple key - this is a complex value (or empty key) *)
13261326+ if t.flow_level = 0 then begin
13271327+ if not t.allow_simple_key then
13281328+ Error.raise_at (Input.mark t.input) Expected_key;
13291329+ let col = column t in
13301330+ if roll_indent t col ~sequence:false then begin
13311331+ let span = Span.point (Input.mark t.input) in
13321332+ emit t span Token.Block_mapping_start
13331333+ end
13341334+ (* Note: We don't emit KEY here. Empty key handling is done by the parser,
13351335+ which emits empty scalar when it sees VALUE without preceding KEY. *)
13361336+ end;
13371337+ false
13381338+ in
12571339 remove_simple_key t;
12581258- (* In block context, allow_simple_key becomes true only after a line break,
12591259- not immediately after ':'. This prevents constructs like "key: - a".
12601260- The line break handling in skip_to_next_token will set it to true. *)
12611261- t.allow_simple_key <- false;
13401340+ (* In block context without simple key, allow simple keys for compact mappings like ": moon: white"
13411341+ In flow context or after using a simple key, disallow simple keys *)
13421342+ t.allow_simple_key <- (not used_simple_key) && (t.flow_level = 0);
12621343 t.document_has_content <- true;
12631344 let start = Input.mark t.input in
12641345 ignore (Input.next t.input);