···328328 let color_features = ["color"; "min-color"; "max-color"; "color-index"; "min-color-index"; "max-color-index";
329329 "monochrome"; "min-monochrome"; "max-monochrome"] in
330330331331+ (* Get base feature name for error messages (strip min-/max- prefix) *)
332332+ let base_feature =
333333+ if String.length feature > 4 && String.sub feature 0 4 = "min-" then
334334+ String.sub feature 4 (String.length feature - 4)
335335+ else if String.length feature > 4 && String.sub feature 0 4 = "max-" then
336336+ String.sub feature 4 (String.length feature - 4)
337337+ else
338338+ feature
339339+ in
340340+331341 if List.mem feature length_features then begin
332342 (* Must be a valid length: number followed by unit *)
333343 let value = String.trim value in
···360370 let unit_lower = String.lowercase_ascii unit_part in
361371 if List.mem unit_lower valid_length_units then Ok ()
362372 else if List.mem unit_lower valid_resolution_units then
363363- Error (Printf.sprintf "\"%s\" is not a \"%s\" value" value feature)
373373+ Error (Printf.sprintf "\"%s\" is not a \"%s\" value" value base_feature)
364374 else
365375 Error "Unknown dimension."
366376 end
···370380 let is_digit c = c >= '0' && c <= '9' in
371381 if String.length value > 0 && String.for_all is_digit value then Ok ()
372382 else
373373- Error (Printf.sprintf "\"%s\" is not a \"%s\" value" value feature)
383383+ Error (Printf.sprintf "\"%s\" is not a \"%s\" value" value base_feature)
374384 end else
375385 Ok () (* Allow other features with any value for now *)
376386
+3
lib/html5_checker/parse_error_bridge.ml
···7474 else if String.length s > 19 && String.sub s 0 19 = "unexpected-end-tag:" then
7575 let element = String.sub s 19 (String.length s - 19) in
7676 (Printf.sprintf "Stray end tag \xe2\x80\x9c%s\xe2\x80\x9d." element, "stray-end-tag")
7777+ else if String.length s > 19 && String.sub s 0 19 = "start-tag-in-table:" then
7878+ let tag = String.sub s 19 (String.length s - 19) in
7979+ (Printf.sprintf "Start tag \xe2\x80\x9c%s\xe2\x80\x9d seen in \xe2\x80\x9ctable\xe2\x80\x9d." tag, "start-tag-in-table")
7780 else
7881 (Printf.sprintf "Parse error: %s" s, s)
7982 with _ -> (Printf.sprintf "Parse error: %s" s, s))
+10
lib/html5_checker/semantic/id_checker.ml
···218218 (* Use specific error for list attribute on input *)
219219 if ref.attribute = "list" && ref.referring_element = "input" then
220220 Message_collector.add_typed collector Error_code.List_attr_requires_datalist
221221+ else if ref.attribute = "commandfor" then
222222+ (* commandfor has a specific expected message format *)
223223+ Message_collector.add_error collector
224224+ ~message:(Printf.sprintf "The value of the %s attribute of the %s element must be the ID of an element in the same tree as the %s with the %s attribute."
225225+ (Error_code.q "commandfor") (Error_code.q ref.referring_element)
226226+ (Error_code.q ref.referring_element) (Error_code.q "commandfor"))
227227+ ~code:"dangling-id-reference"
228228+ ~element:ref.referring_element
229229+ ~attribute:ref.attribute
230230+ ()
221231 else
222232 (* Use generic for dangling references - format may vary *)
223233 Message_collector.add_typed collector
+12-6
lib/html5_checker/semantic/option_checker.ml
···2929 ) attrs
30303131let start_element state ~name ~namespace ~attrs collector =
3232- ignore collector;
3332 let name_lower = String.lowercase_ascii name in
34333534 if namespace <> None then ()
···4342 | Some v -> String.trim v = ""
4443 | None -> false
4544 in
4545+ (* Report error for empty label attribute value *)
4646+ if label_empty then
4747+ Message_collector.add_error collector
4848+ ~message:"Bad value \xe2\x80\x9c\xe2\x80\x9d for attribute \xe2\x80\x9clabel\xe2\x80\x9d on element \xe2\x80\x9coption\xe2\x80\x9d: Bad non-empty string: Must not be empty."
4949+ ~code:"empty-attribute-value"
5050+ ~element:"option"
5151+ ~attribute:"label"
5252+ ();
4653 let ctx = { has_text = false; has_label; label_empty } in
4754 state.option_stack <- ctx :: state.option_stack
4855 end
···6067 | ctx :: rest ->
6168 state.option_stack <- rest;
6269 (* Validate: option must have text content or non-empty label *)
6363- if not ctx.has_text then begin
6464- if ctx.label_empty || not ctx.has_label then
6565- (* Has label="" (empty) and no text, or no label at all - error *)
6666- Message_collector.add_typed collector Error_code.Option_empty_without_label
6767- end
7070+ (* Note: empty label error is already reported at start_element,
7171+ so only report empty option without label when there's no label attribute at all *)
7272+ if not ctx.has_text && not ctx.has_label then
7373+ Message_collector.add_typed collector Error_code.Option_empty_without_label
6874 | [] -> ()
6975 end
7076 end
+22-4
lib/html5_checker/specialized/aria_checker.ml
···3434 (* Window roles *)
3535 "alertdialog";
36363737- (* Abstract roles - not for use in HTML content *)
3838- "command"; "comment"; "composite"; "input"; "landmark"; "range";
3939- "roletype"; "section"; "sectionhead"; "select"; "structure"; "widget";
4040- "window";
3737+ (* Note: Abstract roles (command, composite, input, landmark, range, etc.)
3838+ are NOT included as they should not be used in HTML content.
3939+ Using an abstract role will result in "Discarding unrecognized token" error. *)
41404241 (* Additional roles *)
4342 "application"; "columnheader"; "rowheader";
···342341 end
343342 | None -> Some "textbox" (* default input type is text *)
344343 end
344344+ (* Check for area element - implicit role depends on href attribute *)
345345+ else if element_name = "area" then begin
346346+ match List.assoc_opt "href" attrs with
347347+ | Some _ -> Some "link" (* area with href has implicit role "link" *)
348348+ | None -> Some "generic" (* area without href has no corresponding role, treated as generic *)
349349+ end
350350+ (* Check for a element - implicit role depends on href attribute *)
351351+ else if element_name = "a" then begin
352352+ match List.assoc_opt "href" attrs with
353353+ | Some _ -> Some "link" (* a with href has implicit role "link" *)
354354+ | None -> Some "generic" (* a without href has no corresponding role, treated as generic *)
355355+ end
345356 else
346357 Hashtbl.find_opt elements_with_implicit_role element_name
347358···443454 | Some role_value -> split_roles role_value
444455 | None -> []
445456 in
457457+458458+ (* Check for unrecognized role tokens *)
459459+ List.iter (fun role ->
460460+ if not (Hashtbl.mem valid_aria_roles role) then
461461+ Message_collector.add_typed collector
462462+ (Error_code.Discarding_unrecognized_role { token = role })
463463+ ) explicit_roles;
446464447465 (* Get implicit role for this element *)
448466 let implicit_role = get_implicit_role name_lower attrs in
···341341 (* Check if the name contains colon - not XML serializable *)
342342 else if String.contains after_prefix ':' then
343343 Message_collector.add_error collector
344344- ~message:(Printf.sprintf "Attribute \xe2\x80\x9c%s\xe2\x80\x9d is not serializable as XML 1.0."
345345- attr_name)
344344+ ~message:"\xe2\x80\x9cdata-*\xe2\x80\x9d attribute names must be XML 1.0 4th ed. plus Namespaces NCNames."
346345 ~code:"bad-attribute-name"
347346 ~element:name ~attribute:attr_name ()
348347 end
···486485487486 if has_command && has_aria_expanded then
488487 Message_collector.add_error collector
489489- ~message:"The \xe2\x80\x9caria-expanded\xe2\x80\x9d attribute must not be specified on \xe2\x80\x9cbutton\xe2\x80\x9d elements that have the \xe2\x80\x9ccommand\xe2\x80\x9d attribute."
488488+ ~message:"The \xe2\x80\x9caria-expanded\xe2\x80\x9d attribute must not be used on any element which has a \xe2\x80\x9ccommand\xe2\x80\x9d attribute."
490489 ~code:"disallowed-attribute"
491490 ~element:name ~attribute:"aria-expanded" ();
492491493492 if has_popovertarget && has_aria_expanded then
494493 Message_collector.add_error collector
495495- ~message:"The \xe2\x80\x9caria-expanded\xe2\x80\x9d attribute must not be specified on \xe2\x80\x9cbutton\xe2\x80\x9d elements that have the \xe2\x80\x9cpopovertarget\xe2\x80\x9d attribute."
494494+ ~message:"The \xe2\x80\x9caria-expanded\xe2\x80\x9d attribute must not be used on any element which has a \xe2\x80\x9cpopovertarget\xe2\x80\x9d attribute."
496495 ~code:"disallowed-attribute"
497496 ~element:name ~attribute:"aria-expanded" ()
498497 end;
···99 Some (Printf.sprintf
1010 "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Empty value."
1111 value attr_name element)
1212- else if value.[len - 1] = ' ' || value.[len - 1] = '\t' then
1313- Some (Printf.sprintf
1414- "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Extraneous trailing whitespace."
1515- value attr_name element)
1212+ else if value.[len - 1] = ' ' || value.[len - 1] = '\t' then begin
1313+ (* Check if this is a semicolon followed by only whitespace *)
1414+ let semicolon_pos = try Some (String.index value ';') with Not_found -> None in
1515+ match semicolon_pos with
1616+ | Some semi_pos ->
1717+ let params = String.sub value (semi_pos + 1) (len - semi_pos - 1) in
1818+ let params_trimmed = String.trim params in
1919+ if params_trimmed = "" then
2020+ Some (Printf.sprintf
2121+ "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Semicolon seen but there was no parameter following it."
2222+ value attr_name element)
2323+ else
2424+ Some (Printf.sprintf
2525+ "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Extraneous trailing whitespace."
2626+ value attr_name element)
2727+ | None ->
2828+ Some (Printf.sprintf
2929+ "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Extraneous trailing whitespace."
3030+ value attr_name element)
3131+ end
1632 else if len > 0 && (value.[0] = ' ' || value.[0] = '\t') then
1733 Some (Printf.sprintf
1834 "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Expected a token character but saw \xe2\x80\x9c \xe2\x80\x9d instead."
···2020 if String.lowercase_ascii n = name then Some v else None
2121 ) attrs
22222323+(** Split string on a character while respecting parentheses *)
2424+let split_respecting_parens ~sep s =
2525+ let len = String.length s in
2626+ let result = ref [] in
2727+ let current = Buffer.create 64 in
2828+ let depth = ref 0 in
2929+ for i = 0 to len - 1 do
3030+ let c = s.[i] in
3131+ if c = '(' then begin
3232+ incr depth;
3333+ Buffer.add_char current c
3434+ end else if c = ')' then begin
3535+ decr depth;
3636+ Buffer.add_char current c
3737+ end else if c = sep && !depth = 0 then begin
3838+ result := Buffer.contents current :: !result;
3939+ Buffer.clear current
4040+ end else
4141+ Buffer.add_char current c
4242+ done;
4343+ (* Add the last segment *)
4444+ result := Buffer.contents current :: !result;
4545+ List.rev !result
4646+4747+(** Split string on commas while respecting parentheses *)
4848+let split_on_comma_respecting_parens s = split_respecting_parens ~sep:',' s
4949+5050+(** Split string on spaces while respecting parentheses, filtering empty segments *)
5151+let split_on_space_respecting_parens s =
5252+ split_respecting_parens ~sep:' ' s |> List.filter (fun s -> s <> "")
5353+2354(** Check if string contains only whitespace *)
2455let is_whitespace_only s =
2556 String.for_all (fun c -> c = ' ' || c = '\t' || c = '\n' || c = '\r') s
···793824794825(** Parse and validate srcset attribute value *)
795826let validate_srcset value element_name has_sizes collector =
796796- let entries = String.split_on_char ',' value in
827827+ let entries = split_on_comma_respecting_parens value in
797828 let has_w_descriptor = ref false in
798829 let has_x_descriptor = ref false in
799830 let no_descriptor_url = ref None in (* Track URL of first entry without width descriptor *)
···844875 List.iter (fun entry ->
845876 let entry = String.trim entry in
846877 if entry <> "" then begin
847847- (* Split entry into URL and optional descriptor *)
848848- let parts = String.split_on_char ' ' entry |> List.filter (fun s -> s <> "") in
878878+ (* Split entry into URL and optional descriptor - respect parentheses *)
879879+ let parts = split_on_space_respecting_parens entry in
849880 (* Check if URL is valid *)
850881 let check_srcset_url url =
851882 (* Special schemes that require host/content after :// *)
+40-10
lib/html5_checker/specialized/svg_checker.ml
···286286(* Validate xmlns attributes *)
287287let validate_xmlns_attr attr value element collector =
288288 match attr with
289289- | "xmlns" when element = "svg" ->
289289+ | "xmlns" ->
290290+ (* xmlns on any SVG element must be the SVG namespace *)
290291 if value <> svg_ns_url then
291292 Message_collector.add_error collector
292293 ~message:(Printf.sprintf
···348349 let flag = Str.matched_group 4 d in
349350 if flag <> "0" && flag <> "1" then begin
350351 let pos = Str.match_beginning () in
351351- let ctx_end = min (String.length d) (pos + 25) in
352352+ (* Context ends right after the invalid flag *)
353353+ let flag_end = Str.match_end () in
352354 let ctx_start = max 0 (pos - 10) in
353353- let context = String.sub d ctx_start (ctx_end - ctx_start) in
355355+ let context = String.sub d ctx_start (flag_end - ctx_start) in
354356 Message_collector.add_error collector
355357 ~message:(Printf.sprintf
356358 "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9cd\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad SVG path data: Expected \xe2\x80\x9c0\xe2\x80\x9d or \xe2\x80\x9c1\xe2\x80\x9d for large-arc-flag for \xe2\x80\x9ca\xe2\x80\x9d command but found \xe2\x80\x9c%s\xe2\x80\x9d instead (context: \xe2\x80\x9c%s\xe2\x80\x9d)."
···391393 | [] -> ()
392394 end;
393395396396+ (* 2.5 Check stop element is only in linearGradient or radialGradient *)
397397+ if name_lower = "stop" then begin
398398+ match state.element_stack with
399399+ | parent :: _ when (let p = String.lowercase_ascii parent in
400400+ p = "lineargradient" || p = "radialgradient") -> ()
401401+ | parent :: _ ->
402402+ Message_collector.add_error collector
403403+ ~message:(Printf.sprintf
404404+ "Element \xe2\x80\x9c%s\xe2\x80\x9d not allowed as child of element \xe2\x80\x9c%s\xe2\x80\x9d in this context. (Suppressing further errors from this subtree.)"
405405+ name parent)
406406+ ~element:name
407407+ ()
408408+ | [] -> ()
409409+ end;
410410+411411+ (* 2.6 Check use element is not nested inside another use element *)
412412+ if name_lower = "use" then begin
413413+ match state.element_stack with
414414+ | parent :: _ when String.lowercase_ascii parent = "use" ->
415415+ Message_collector.add_error collector
416416+ ~message:(Printf.sprintf
417417+ "Element \xe2\x80\x9c%s\xe2\x80\x9d not allowed as child of element \xe2\x80\x9c%s\xe2\x80\x9d in this context. (Suppressing further errors from this subtree.)"
418418+ name parent)
419419+ ~element:name
420420+ ()
421421+ | _ -> ()
422422+ end;
423423+394424 (* 3. Check duplicate feFunc* in feComponentTransfer *)
395425 (match state.element_stack with
396426 | parent :: _ when String.lowercase_ascii parent = "fecomponenttransfer" ->
···401431 Message_collector.add_error collector
402432 ~message:(Printf.sprintf
403433 "Element \xe2\x80\x9c%s\xe2\x80\x9d not allowed as child of element \xe2\x80\x9cfeComponentTransfer\xe2\x80\x9d in this context. (Suppressing further errors from this subtree.)"
404404- name_lower)
405405- ~element:name_lower
434434+ name)
435435+ ~element:name
406436 ()
407437 else
408438 fect.seen_funcs <- name_lower :: fect.seen_funcs
···430460 Message_collector.add_error collector
431461 ~message:(Printf.sprintf
432462 "Attribute \xe2\x80\x9c%s\xe2\x80\x9d not allowed on element \xe2\x80\x9c%s\xe2\x80\x9d at this point."
433433- attr name_lower)
434434- ~element:name_lower
463463+ attr name)
464464+ ~element:name
435465 ~attribute:attr_lower
436466 ()
437467 (* Validate path data *)
438468 else if attr_lower = "d" && name_lower = "path" then
439439- validate_path_data value name_lower collector
469469+ validate_path_data value name collector
440470 (* Check if attribute is valid for this element *)
441471 else if not (is_valid_attr name_lower attr_lower) then
442472 Message_collector.add_error collector
443473 ~message:(Printf.sprintf
444474 "Attribute \xe2\x80\x9c%s\xe2\x80\x9d not allowed on element \xe2\x80\x9c%s\xe2\x80\x9d at this point."
445445- attr name_lower)
446446- ~element:name_lower
475475+ attr name)
476476+ ~element:name
447477 ~attribute:attr_lower
448478 ()
449479 ) attrs;
+3-3
lib/html5rw/parser/parser_tree_builder.ml
···11781178 | Token.Tag { kind = Token.End; name; _ }
11791179 when List.mem name ["address"; "article"; "aside"; "blockquote"; "button"; "center"; "details"; "dialog"; "dir"; "div"; "dl"; "fieldset"; "figcaption"; "figure"; "footer"; "header"; "hgroup"; "listing"; "main"; "menu"; "nav"; "ol"; "pre"; "search"; "section"; "summary"; "ul"] ->
11801180 if not (has_element_in_scope t name) then
11811181- parse_error t "unexpected-end-tag"
11811181+ parse_error t ("unexpected-end-tag:" ^ name)
11821182 else begin
11831183 generate_implied_end_tags t ();
11841184 (match current_node t with
···15271527 String.lowercase_ascii k = "type" && String.lowercase_ascii v = "hidden"
15281528 ) attrs in
15291529 if not is_hidden then begin
15301530- parse_error t "unexpected-start-tag";
15301530+ parse_error t "start-tag-in-table:input";
15311531 t.foster_parenting <- true;
15321532 process_in_body t token;
15331533 t.foster_parenting <- false
15341534 end else begin
15351535- parse_error t "unexpected-start-tag";
15351535+ parse_error t "start-tag-in-table:input";
15361536 ignore (insert_element t "input" ~push:true attrs);
15371537 pop_current t
15381538 end
+11
test/test_nfc_debug.ml
···11+let () =
22+ let content = In_channel.with_open_text "validator/tests/html-svg/struct-cond-02-t-haswarn.html" (fun ic ->
33+ In_channel.input_all ic
44+ ) in
55+ let reader = Bytesrw.Bytes.Reader.of_string content in
66+ let result = Html5_checker.check ~system_id:"test.html" reader in
77+ let warnings = Html5_checker.warnings result in
88+ Printf.printf "Total warnings: %d\n" (List.length warnings);
99+ List.iter (fun msg ->
1010+ Printf.printf "WARNING: %s\n" (Html5_checker.Message.message msg)
1111+ ) warnings