···425425 | parent :: _ -> Some parent.element_name
426426 | [] -> None
427427428428+(** Check if an li element is in a "list context":
429429+ - Inside a ul/ol/menu element with no explicit role, OR
430430+ - Inside any element with role=list *)
431431+let is_in_list_context state =
432432+ List.exists (fun ancestor ->
433433+ (* Check for role=list on any ancestor *)
434434+ if List.mem "list" ancestor.explicit_roles then true
435435+ (* Check for ul/ol/menu with no explicit role *)
436436+ else match ancestor.element_name with
437437+ | "ul" | "ol" | "menu" -> ancestor.explicit_roles = []
438438+ | _ -> false
439439+ ) state.stack
440440+428441(** Render a list of roles as a human-readable string. *)
429442let render_role_set roles =
430443 match roles with
···548561 end
549562 end;
550563551551- (* Check li role restrictions in menu/menubar/tablist contexts *)
564564+ (* Check li role restrictions in menu/menubar/tablist/list contexts *)
552565 if name_lower = "li" && explicit_roles <> [] then begin
553566 let first_role = List.hd explicit_roles in
554567 (* none/presentation are always allowed as they remove from accessibility tree *)
···565578 | Some _ ->
566579 if first_role <> "tab" then
567580 Message_collector.add_typed collector Error_code.Li_bad_role_in_tablist
568568- | None -> ())
581581+ | None ->
582582+ (* Check if in list context (ul/ol/menu without explicit role, or role=list) *)
583583+ if is_in_list_context state then
584584+ if first_role <> "listitem" then
585585+ Message_collector.add_typed collector Error_code.Li_bad_role_in_list)
569586 end
570587 end;
571588
···1717 let normalized = normalize_nfc text in
1818 text = normalized
19192020+(** Check if a character is ASCII punctuation *)
2121+let is_ascii_punct c =
2222+ let code = Char.code c in
2323+ (code >= 0x21 && code <= 0x2F) || (* ! to / *)
2424+ (code >= 0x3A && code <= 0x40) || (* : to @ including ? *)
2525+ (code >= 0x5B && code <= 0x60) || (* [ to ` *)
2626+ (code >= 0x7B && code <= 0x7E) (* { to ~ *)
2727+2828+(** Strip trailing ASCII punctuation but keep trailing space if present before punct *)
2929+let strip_trailing_punct s =
3030+ let len = String.length s in
3131+ if len = 0 then s
3232+ else
3333+ (* Find the last non-ASCII-punct character *)
3434+ let rec find_end i =
3535+ if i < 0 then 0
3636+ else if not (is_ascii_punct s.[i]) then i + 1
3737+ else find_end (i - 1)
3838+ in
3939+ let end_pos = find_end (len - 1) in
4040+ if end_pos = len then s
4141+ else String.sub s 0 end_pos
4242+2043let start_element _state ~name:_ ~namespace:_ ~attrs:_ _collector = ()
21442245let end_element _state ~name:_ ~namespace:_ _collector = ()
···2750 if String.length text_trimmed = 0 then ()
2851 else if not (is_nfc text_trimmed) then begin
2952 let normalized = normalize_nfc text_trimmed in
5353+ (* Strip trailing ASCII punctuation from replacement to match Nu validator *)
5454+ let replacement = strip_trailing_punct normalized in
3055 Message_collector.add_typed collector
3131- (Error_code.Not_nfc { replacement = normalized })
5656+ (Error_code.Not_nfc { replacement })
3257 end
33583459let end_document _state _collector = ()
···4747(** Split string on commas while respecting parentheses *)
4848let split_on_comma_respecting_parens s = split_respecting_parens ~sep:',' s
49495050+(** Split on commas respecting BALANCED parentheses only (for srcset).
5151+ If parens are unbalanced overall, just split on all commas. *)
5252+let split_on_comma_balanced_parens s =
5353+ (* First, check if parens are balanced overall *)
5454+ let opens = ref 0 and closes = ref 0 in
5555+ String.iter (fun c -> if c = '(' then incr opens else if c = ')' then incr closes) s;
5656+ if !opens <> !closes then
5757+ (* Unbalanced parens - just split on all commas *)
5858+ String.split_on_char ',' s
5959+ else
6060+ (* Balanced parens - respect them during split *)
6161+ split_on_comma_respecting_parens s
6262+5063(** Split string on spaces while respecting parentheses, filtering empty segments *)
5164let split_on_space_respecting_parens s =
5265 split_respecting_parens ~sep:' ' s |> List.filter (fun s -> s <> "")
···730743 | _ ->
731744 (* Unknown descriptor - find context in srcset_value *)
732745 let trimmed_desc = String.trim desc in
733733- (* Try to find the context: find where this descriptor appears in srcset_value *)
746746+ (* Nu validator adds extra ')' after the last ')' if descriptor contains any '(' *)
747747+ let found_desc =
748748+ if String.contains trimmed_desc '(' then
749749+ (* Find position of last ')' and insert extra ')' after it *)
750750+ try
751751+ let last_close = String.rindex trimmed_desc ')' in
752752+ let before = String.sub trimmed_desc 0 (last_close + 1) in
753753+ let after = String.sub trimmed_desc (last_close + 1) (String.length trimmed_desc - last_close - 1) in
754754+ before ^ ")" ^ after
755755+ with Not_found -> trimmed_desc ^ ")"
756756+ else trimmed_desc
757757+ in
758758+ (* Try to find the context: show trailing portion ending with descriptor and comma *)
734759 let context =
735760 try
736761 let pos = Str.search_forward (Str.regexp_string trimmed_desc) srcset_value 0 in
737737- (* Get the context up to and including the descriptor and the comma after *)
762762+ (* Get the context ending with the descriptor and the comma after *)
738763 let end_pos = min (pos + String.length trimmed_desc + 1) (String.length srcset_value) in
739739- let start_pos = max 0 (pos - 2) in
740740- String.trim (String.sub srcset_value start_pos (end_pos - start_pos))
764764+ (* Show trailing portion with ellipsis if needed *)
765765+ let max_context = 15 in
766766+ if end_pos > max_context then
767767+ "\xe2\x80\xa6" ^ String.sub srcset_value (end_pos - max_context) max_context
768768+ else
769769+ String.trim (String.sub srcset_value 0 end_pos)
741770 with Not_found -> srcset_value
742771 in
743772 Message_collector.add_typed collector
744744- (Error_code.Bad_attr_value_generic { message = Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Expected number followed by \xe2\x80\x9cw\xe2\x80\x9d or \xe2\x80\x9cx\xe2\x80\x9d but found \xe2\x80\x9c%s\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d." srcset_value element_name trimmed_desc context });
773773+ (Error_code.Bad_attr_value_generic { message = Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Expected number followed by \xe2\x80\x9cw\xe2\x80\x9d or \xe2\x80\x9cx\xe2\x80\x9d but found \xe2\x80\x9c%s\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d." srcset_value element_name found_desc context });
745774 false
746775 end
747776···766795767796(** Parse and validate srcset attribute value *)
768797let validate_srcset value element_name has_sizes collector =
769769- let entries = split_on_comma_respecting_parens value in
798798+ (* Srcset entries are split on commas - only balanced parentheses prevent split *)
799799+ let entries = split_on_comma_balanced_parens value in
770800 let has_w_descriptor = ref false in
771801 let has_x_descriptor = ref false in
772802 let no_descriptor_url = ref None in (* Track URL of first entry without width descriptor *)
+1-1
lib/html5_checker/specialized/svg_checker.ml
···297297 if value <> "http://www.w3.org/1999/xlink" then
298298 Message_collector.add_typed collector
299299 (Error_code.Bad_attr_value_generic { message = Printf.sprintf
300300- "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for the attribute \xe2\x80\x9cxmlns:xlink\xe2\x80\x9d (only \xe2\x80\x9chttp://www.w3.org/1999/xlink\xe2\x80\x9d permitted here)."
300300+ "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for the attribute \xe2\x80\x9cxmlns:link\xe2\x80\x9d (only \xe2\x80\x9chttp://www.w3.org/1999/xlink\xe2\x80\x9d permitted here)."
301301 value })
302302 | _ when String.starts_with ~prefix:"xmlns:" attr && attr <> "xmlns:xlink" ->
303303 (* Other xmlns declarations are not allowed in HTML-embedded SVG *)