···101101102102(** Resolve a tag *)
103103let resolve_tag t ~handle ~suffix =
104104- match List.assoc_opt handle t.tag_directives with
105105- | Some prefix -> prefix ^ suffix
106106- | None when handle = "!" -> "!" ^ suffix
107107- | None -> Error.raise (Invalid_tag (handle ^ suffix))
104104+ if handle = "" then
105105+ (* Verbatim tag - suffix is already the full URI *)
106106+ suffix
107107+ else
108108+ match List.assoc_opt handle t.tag_directives with
109109+ | Some prefix -> prefix ^ suffix
110110+ | None when handle = "!" -> "!" ^ suffix
111111+ | None -> Error.raise (Invalid_tag (handle ^ suffix))
108112109113(** Process directives at document start *)
110114let process_directives t =
+123-37
yaml/ocaml-yamle/lib/scanner.ml
···154154let scan_anchor_alias t =
155155 let start = Input.mark t.input in
156156 let buf = Buffer.create 16 in
157157+ (* Per YAML 1.2 spec: anchor names can contain any character that is NOT:
158158+ - Whitespace (space, tab, line breaks)
159159+ - Flow indicators: []{}
160160+ - Comma (,)
161161+ This matches the saphyr implementation: is_yaml_non_space && !is_flow *)
157162 while
158163 match Input.peek t.input with
159159- | Some c when Input.is_alnum c || c = '_' || c = '-' ->
164164+ | Some c when not (Input.is_whitespace c) &&
165165+ not (Input.is_flow_indicator c) &&
166166+ c <> '\x00' ->
160167 Buffer.add_char buf c;
161168 ignore (Input.next t.input);
162169 true
···215222 let handle, suffix =
216223 match Input.peek t.input with
217224 | Some '<' ->
218218- (* Verbatim tag: !<...> *)
225225+ (* Verbatim tag: !<...> - handle is empty, suffix is full URI *)
219226 ignore (Input.next t.input);
220227 let buf = Buffer.create 32 in
221228 while
···228235 | None -> Error.raise_at (Input.mark t.input) (Invalid_tag "unclosed verbatim tag")
229236 do () done;
230237 ignore (Input.next t.input); (* consume > *)
231231- ("!", Buffer.contents buf)
238238+ ("", Buffer.contents buf)
232239 | Some c when Input.is_whitespace c || Input.is_flow_indicator c ->
233240 (* Non-specific tag: ! *)
234241 ("!", "")
···238245 let suffix = scan_tag_suffix t in
239246 ("!!", suffix)
240247 | _ ->
241241- (* Primary handle or just suffix *)
242242- let first_part = scan_tag_suffix t in
243243- if String.length first_part > 0 && first_part.[String.length first_part - 1] = '!' then
244244- let suffix = scan_tag_suffix t in
245245- (first_part, suffix)
246246- else
247247- ("!", first_part)
248248+ (* Primary handle or just suffix: !foo or !e!foo *)
249249+ (* Read alphanumeric characters *)
250250+ let buf = Buffer.create 16 in
251251+ while
252252+ match Input.peek t.input with
253253+ | Some c when Input.is_alnum c || c = '-' ->
254254+ Buffer.add_char buf c;
255255+ ignore (Input.next t.input);
256256+ true
257257+ | _ -> false
258258+ do () done;
259259+ (* Check if next character is ! - if so, this is a named handle *)
260260+ (match Input.peek t.input with
261261+ | Some '!' ->
262262+ (* Named handle like !e! *)
263263+ ignore (Input.next t.input);
264264+ let handle_name = Buffer.contents buf in
265265+ let suffix = scan_tag_suffix t in
266266+ ("!" ^ handle_name ^ "!", suffix)
267267+ | _ ->
268268+ (* Just ! followed by suffix *)
269269+ ("!", Buffer.contents buf ^ scan_tag_suffix t))
248270 in
249271 let span = Span.make ~start ~stop:(Input.mark t.input) in
250272 (handle, suffix, span)
···471493 scan_line ();
472494 (* Check for line continuation *)
473495 if not in_flow && Input.next_is_break t.input then begin
474474- (* Save whitespace *)
475475- Buffer.clear spaces;
476476- Buffer.add_char spaces '\n';
496496+ (* Save the line break *)
497497+ if !leading_blanks then begin
498498+ (* We already had a break - this is an additional break (empty line) *)
499499+ Buffer.add_char spaces '\n'
500500+ end else begin
501501+ (* First line break *)
502502+ Buffer.clear spaces;
503503+ Buffer.add_char spaces '\n';
504504+ leading_blanks := true
505505+ end;
477506 Input.consume_break t.input;
478507 (* Line break in block context allows simple key *)
479508 t.allow_simple_key <- true;
480480- (* Skip leading blanks *)
509509+ (* Skip leading blanks on the next line *)
481510 while Input.next_is_blank t.input do
482482- Buffer.add_char spaces (Option.get (Input.next t.input))
511511+ ignore (Input.next t.input)
483512 done;
484513 let col = (Input.position t.input).column in
485514 (* Check indentation - stop if we're at or before the containing block's indent *)
···487516 () (* Stop - dedented or at parent level *)
488517 else if Input.at_document_boundary t.input then
489518 () (* Stop - document boundary *)
490490- else begin
491491- leading_blanks := true;
519519+ else
492520 scan_lines ()
493493- end
494521 end
495522 in
496523···562589 let base_indent = current_indent t in
563590 let content_indent = ref (
564591 match !explicit_indent with
565565- | Some n -> base_indent + n
592592+ | Some n ->
593593+ (* base_indent is a column (1-indexed), convert to indent level (0-indexed) *)
594594+ let base_level = max 0 (base_indent - 1) in
595595+ base_level + n
566596 | None -> 0 (* Will be determined by first non-empty line *)
567597 ) in
568598569599 let buf = Buffer.create 256 in
570600 let trailing_breaks = Buffer.create 16 in
571601572572- (* Read content *)
573573- let rec read_lines () =
574574- (* Skip empty lines, collecting breaks *)
575575- while Input.next_is_break t.input ||
576576- (Input.next_is_blank t.input &&
577577- match Input.peek_nth t.input 1 with
578578- | Some c when Input.is_break c -> true
579579- | None -> true
580580- | _ -> false)
581581- do
582582- if Input.next_is_blank t.input then begin
583583- while Input.next_is_blank t.input do
584584- ignore (Input.next t.input)
585585- done
586586- end;
602602+ (* Skip to content indentation, skipping empty lines *)
603603+ let rec skip_to_content_indent () =
604604+ if !content_indent > 0 then begin
605605+ (* Explicit indent - skip exactly content_indent spaces *)
606606+ let spaces_skipped = ref 0 in
607607+ while !spaces_skipped < !content_indent && Input.next_is (( = ) ' ') t.input do
608608+ incr spaces_skipped;
609609+ ignore (Input.next t.input)
610610+ done;
611611+612612+ (* Check if this line is empty (only spaces/tabs until break/eof) *)
587613 if Input.next_is_break t.input then begin
614614+ (* Empty line - record the break and continue *)
588615 Buffer.add_char trailing_breaks '\n';
589589- Input.consume_break t.input
616616+ Input.consume_break t.input;
617617+ skip_to_content_indent ()
618618+ end else if Input.next_is_blank t.input then begin
619619+ (* Line has spaces beyond content_indent - check if rest is only blanks *)
620620+ let idx = ref 0 in
621621+ let is_empty = ref false in
622622+ while not !is_empty do
623623+ match Input.peek_nth t.input !idx with
624624+ | Some c when Input.is_blank c -> incr idx
625625+ | Some c when Input.is_break c -> is_empty := true
626626+ | _ -> is_empty := true (* Not a break, so has content *)
627627+ done;
628628+ (* Check if we found a break (empty line) or content *)
629629+ (match Input.peek_nth t.input (!idx) with
630630+ | None | Some '\n' | Some '\r' ->
631631+ (* Empty line - consume all blanks and break *)
632632+ while Input.next_is_blank t.input do
633633+ ignore (Input.next t.input)
634634+ done;
635635+ Buffer.add_char trailing_breaks '\n';
636636+ Input.consume_break t.input;
637637+ skip_to_content_indent ()
638638+ | _ ->
639639+ (* Has content *)
640640+ ())
590641 end
591591- done;
642642+ end else begin
643643+ (* Implicit indent - skip empty lines without consuming spaces *)
644644+ if Input.next_is_break t.input then begin
645645+ Buffer.add_char trailing_breaks '\n';
646646+ Input.consume_break t.input;
647647+ skip_to_content_indent ()
648648+ end else if Input.next_is_blank t.input then begin
649649+ (* Check if line is empty *)
650650+ let idx = ref 0 in
651651+ while match Input.peek_nth t.input !idx with
652652+ | Some c when Input.is_blank c -> incr idx; true
653653+ | _ -> false
654654+ do () done;
655655+ match Input.peek_nth t.input (!idx) with
656656+ | None | Some '\n' | Some '\r' ->
657657+ (* Empty line *)
658658+ while Input.next_is_blank t.input do
659659+ ignore (Input.next t.input)
660660+ done;
661661+ Buffer.add_char trailing_breaks '\n';
662662+ Input.consume_break t.input;
663663+ skip_to_content_indent ()
664664+ | _ ->
665665+ (* Has content - don't consume anything *)
666666+ ()
667667+ end
668668+ end
669669+ in
670670+671671+ (* Read content *)
672672+ let rec read_lines () =
673673+ skip_to_content_indent ();
592674593675 (* Check if we're at content *)
594676 if Input.is_eof t.input then ()
···601683 ignore (Input.next t.input)
602684 done;
603685604604- (* Determine content indent from first content line *)
686686+ (* For explicit indent, line_indent is extra beyond content_indent *)
687687+ if !content_indent > 0 then
688688+ line_indent := !content_indent + !line_indent;
689689+690690+ (* Determine content indent from first content line (implicit case) *)
605691 if !content_indent = 0 then begin
606692 if !line_indent <= base_indent then begin
607693 (* No content - restore position conceptually *)