···11+(** Autofocus attribute validation checker.
22+33+ Validates that only one element with autofocus attribute exists within
44+ each dialog or popover context. *)
55+66+(** Context for tracking autofocus elements. *)
77+type context_type = Dialog | Popover
88+99+type context = {
1010+ context_type : context_type;
1111+ mutable autofocus_count : int;
1212+ depth : int;
1313+}
1414+1515+type state = {
1616+ mutable context_stack : context list;
1717+ mutable current_depth : int;
1818+}
1919+2020+let create () = {
2121+ context_stack = [];
2222+ current_depth = 0;
2323+}
2424+2525+let reset state =
2626+ state.context_stack <- [];
2727+ state.current_depth <- 0
2828+2929+(** Check if an attribute list contains a specific attribute. *)
3030+let has_attr name attrs =
3131+ List.exists (fun (attr_name, _) -> String.lowercase_ascii attr_name = name) attrs
3232+3333+(** Get an attribute value from the list. *)
3434+let get_attr name attrs =
3535+ List.find_map (fun (attr_name, value) ->
3636+ if String.lowercase_ascii attr_name = name then Some value else None
3737+ ) attrs
3838+3939+(** Check if element has popover attribute. *)
4040+let has_popover attrs =
4141+ List.exists (fun (attr_name, _) ->
4242+ String.lowercase_ascii attr_name = "popover"
4343+ ) attrs
4444+4545+let start_element state ~name ~namespace ~attrs collector =
4646+ let name_lower = String.lowercase_ascii name in
4747+4848+ (* Track depth *)
4949+ state.current_depth <- state.current_depth + 1;
5050+5151+ if namespace = None then begin
5252+ (* Check if we're entering a dialog or popover context *)
5353+ let enters_context =
5454+ if name_lower = "dialog" then Some Dialog
5555+ else if has_popover attrs then Some Popover
5656+ else None
5757+ in
5858+5959+ (match enters_context with
6060+ | Some ctx_type ->
6161+ let ctx = { context_type = ctx_type; autofocus_count = 0; depth = state.current_depth } in
6262+ state.context_stack <- ctx :: state.context_stack
6363+ | None -> ());
6464+6565+ (* Check for autofocus attribute *)
6666+ if has_attr "autofocus" attrs then begin
6767+ (* Increment count in innermost context if any *)
6868+ match state.context_stack with
6969+ | ctx :: _ ->
7070+ ctx.autofocus_count <- ctx.autofocus_count + 1;
7171+ if ctx.autofocus_count > 1 then
7272+ let context_name = match ctx.context_type with
7373+ | Dialog -> "dialog"
7474+ | Popover -> "popover"
7575+ in
7676+ Message_collector.add_error collector
7777+ ~message:(Printf.sprintf "A document must not include more than one visible element with the \xe2\x80\x9cautofocus\xe2\x80\x9d attribute inside a %s."
7878+ context_name)
7979+ ~code:"multiple-autofocus"
8080+ ~element:name ~attribute:"autofocus" ()
8181+ | [] -> ()
8282+ end
8383+ end
8484+8585+let end_element state ~name ~namespace _collector =
8686+ let name_lower = String.lowercase_ascii name in
8787+8888+ if namespace = None then begin
8989+ (* Pop context if we're leaving one *)
9090+ match state.context_stack with
9191+ | ctx :: rest when ctx.depth = state.current_depth ->
9292+ (* Verify this is the right element *)
9393+ let matches =
9494+ (name_lower = "dialog" && ctx.context_type = Dialog) ||
9595+ (ctx.context_type = Popover)
9696+ in
9797+ if matches then state.context_stack <- rest
9898+ | _ -> ()
9999+ end;
100100+101101+ state.current_depth <- state.current_depth - 1
102102+103103+let characters _state _text _collector = ()
104104+105105+let end_document _state _collector = ()
106106+107107+let checker =
108108+ (module struct
109109+ type nonrec state = state
110110+ let create = create
111111+ let reset = reset
112112+ let start_element = start_element
113113+ let end_element = end_element
114114+ let characters = characters
115115+ let end_document = end_document
116116+ end : Checker.S)
+3
lib/html5_checker/semantic/id_checker.ml
···8282 "form"; (* form-associated elements *)
8383 "list"; (* input *)
8484 "aria-activedescendant";
8585+ "popovertarget"; (* button - references popover element *)
8686+ "commandfor"; (* button - references element to control *)
8787+ "anchor"; (* popover - references anchor element *)
8588]
86898790(** Attributes that reference multiple IDs (space-separated). *)
+101
lib/html5_checker/semantic/option_checker.ml
···11+(** Option element validation checker.
22+33+ Validates that option elements have proper content or label. *)
44+55+type option_context = {
66+ mutable has_text : bool;
77+ has_label : bool;
88+ label_empty : bool;
99+}
1010+1111+type state = {
1212+ mutable option_stack : option_context list;
1313+ mutable in_template : int;
1414+}
1515+1616+let create () = {
1717+ option_stack = [];
1818+ in_template = 0;
1919+}
2020+2121+let reset state =
2222+ state.option_stack <- [];
2323+ state.in_template <- 0
2424+2525+(** Get attribute value if present. *)
2626+let get_attr name attrs =
2727+ List.find_map (fun (attr_name, value) ->
2828+ if String.lowercase_ascii attr_name = name then Some value else None
2929+ ) attrs
3030+3131+let start_element state ~name ~namespace ~attrs collector =
3232+ ignore collector;
3333+ let name_lower = String.lowercase_ascii name in
3434+3535+ if namespace <> None then ()
3636+ else begin
3737+ if name_lower = "template" then
3838+ state.in_template <- state.in_template + 1
3939+ else if state.in_template = 0 && name_lower = "option" then begin
4040+ let label_opt = get_attr "label" attrs in
4141+ let has_label = label_opt <> None in
4242+ let label_empty = match label_opt with
4343+ | Some v -> String.trim v = ""
4444+ | None -> false
4545+ in
4646+ let ctx = { has_text = false; has_label; label_empty } in
4747+ state.option_stack <- ctx :: state.option_stack
4848+ end
4949+ end
5050+5151+let end_element state ~name ~namespace collector =
5252+ let name_lower = String.lowercase_ascii name in
5353+5454+ if namespace <> None then ()
5555+ else begin
5656+ if name_lower = "template" then
5757+ state.in_template <- max 0 (state.in_template - 1)
5858+ else if state.in_template = 0 && name_lower = "option" then begin
5959+ match state.option_stack with
6060+ | ctx :: rest ->
6161+ state.option_stack <- rest;
6262+ (* Validate: option must have text content or non-empty label *)
6363+ if not ctx.has_text then begin
6464+ if ctx.label_empty then
6565+ (* Has label="" (empty) and no text - error *)
6666+ Message_collector.add_error collector
6767+ ~message:"An \xe2\x80\x9coption\xe2\x80\x9d element with an empty \xe2\x80\x9clabel\xe2\x80\x9d attribute must have content."
6868+ ~code:"empty-option"
6969+ ~element:"option" ()
7070+ else if not ctx.has_label then
7171+ (* No label and no text - error *)
7272+ Message_collector.add_error collector
7373+ ~message:"An \xe2\x80\x9coption\xe2\x80\x9d element with no \xe2\x80\x9clabel\xe2\x80\x9d attribute must have content."
7474+ ~code:"empty-option"
7575+ ~element:"option" ()
7676+ end
7777+ | [] -> ()
7878+ end
7979+ end
8080+8181+let characters state text _collector =
8282+ if state.in_template = 0 then begin
8383+ match state.option_stack with
8484+ | ctx :: _ ->
8585+ let trimmed = String.trim text in
8686+ if trimmed <> "" then ctx.has_text <- true
8787+ | [] -> ()
8888+ end
8989+9090+let end_document _state _collector = ()
9191+9292+let checker =
9393+ (module struct
9494+ type nonrec state = state
9595+ let create = create
9696+ let reset = reset
9797+ let start_element = start_element
9898+ let end_element = end_element
9999+ let characters = characters
100100+ let end_document = end_document
101101+ end : Checker.S)
+49
lib/html5_checker/specialized/aria_checker.ml
···269269270270 tbl
271271272272+(** ARIA attributes with their default values.
273273+ When the specified value equals the default, a warning is issued.
274274+ Note: "undefined" is NOT included as it's a meaningful value in ARIA
275275+ that explicitly indicates a state doesn't apply. *)
276276+let aria_default_values : (string, string) Hashtbl.t =
277277+ let tbl = Hashtbl.create 16 in
278278+ Hashtbl.add tbl "aria-atomic" "false";
279279+ Hashtbl.add tbl "aria-autocomplete" "none";
280280+ Hashtbl.add tbl "aria-busy" "false";
281281+ Hashtbl.add tbl "aria-current" "false";
282282+ Hashtbl.add tbl "aria-disabled" "false";
283283+ Hashtbl.add tbl "aria-dropeffect" "none";
284284+ (* aria-expanded: "undefined" means the element is not expandable - meaningful, not redundant *)
285285+ (* aria-grabbed: deprecated in ARIA 1.1, "undefined" is meaningful *)
286286+ Hashtbl.add tbl "aria-haspopup" "false";
287287+ (* aria-hidden: "undefined" is meaningful *)
288288+ Hashtbl.add tbl "aria-invalid" "false";
289289+ Hashtbl.add tbl "aria-live" "off";
290290+ Hashtbl.add tbl "aria-modal" "false";
291291+ Hashtbl.add tbl "aria-multiline" "false";
292292+ Hashtbl.add tbl "aria-multiselectable" "false";
293293+ (* aria-orientation: "undefined" is meaningful *)
294294+ (* aria-pressed: "undefined" means the element is not a toggle - meaningful *)
295295+ Hashtbl.add tbl "aria-readonly" "false";
296296+ Hashtbl.add tbl "aria-relevant" "additions text";
297297+ Hashtbl.add tbl "aria-required" "false";
298298+ (* aria-selected: "undefined" is meaningful *)
299299+ Hashtbl.add tbl "aria-sort" "none";
300300+ tbl
301301+272302(** Roles that do NOT support aria-expanded. *)
273303let roles_without_aria_expanded = [
274304 "listbox"; "list"; "menu"; "menubar"; "radiogroup"; "tablist"; "tree"; "treegrid";
···590620 | None -> ()
591621 ) attrs
592622 ) explicit_roles;
623623+624624+ (* Check for redundant default ARIA attribute values *)
625625+ List.iter (fun (attr_name, attr_value) ->
626626+ let attr_lower = String.lowercase_ascii attr_name in
627627+ if String.starts_with ~prefix:"aria-" attr_lower then
628628+ match Hashtbl.find_opt aria_default_values attr_lower with
629629+ | Some default_value ->
630630+ let value_lower = String.lowercase_ascii (String.trim attr_value) in
631631+ if value_lower = default_value then
632632+ Message_collector.add_warning collector
633633+ ~message:(Printf.sprintf
634634+ "The \xe2\x80\x9c%s\xe2\x80\x9d attribute is unnecessary for the value \xe2\x80\x9c%s\xe2\x80\x9d."
635635+ attr_name attr_value)
636636+ ~code:"redundant-aria-default"
637637+ ~element:name
638638+ ~attribute:attr_name
639639+ ()
640640+ | None -> ()
641641+ ) attrs;
593642594643 (* Push current element onto stack *)
595644 let node = {
···328328 end
329329 end;
330330331331+ (* Validate data-* attributes *)
332332+ if namespace = None then begin
333333+ List.iter (fun (attr_name, _) ->
334334+ let attr_lower = String.lowercase_ascii attr_name in
335335+ (* Check if it starts with "data-" *)
336336+ if String.length attr_lower >= 5 && String.sub attr_lower 0 5 = "data-" then begin
337337+ let after_prefix = String.sub attr_lower 5 (String.length attr_lower - 5) in
338338+ (* Check if it's exactly "data-" with nothing after *)
339339+ if after_prefix = "" then
340340+ report_disallowed_attr name_lower attr_name collector
341341+ (* Check if the name contains colon - not XML serializable *)
342342+ else if String.contains after_prefix ':' then
343343+ Message_collector.add_error collector
344344+ ~message:(Printf.sprintf "Attribute \xe2\x80\x9c%s\xe2\x80\x9d is not serializable as XML 1.0."
345345+ attr_name)
346346+ ~code:"bad-attribute-name"
347347+ ~element:name ~attribute:attr_name ()
348348+ end
349349+ ) attrs
350350+ end;
351351+352352+ (* Validate xml:lang must have matching lang attribute - only in HTML mode, not XHTML *)
353353+ if namespace = None && not state.is_xhtml then begin
354354+ let xmllang_value = get_attr "xml:lang" attrs in
355355+ let lang_value = get_attr "lang" attrs in
356356+ match xmllang_value with
357357+ | Some xmllang ->
358358+ (match lang_value with
359359+ | None ->
360360+ Message_collector.add_error collector
361361+ ~message:"When the attribute \xe2\x80\x9cxml:lang\xe2\x80\x9d in no namespace is specified, the element must also have the attribute \xe2\x80\x9clang\xe2\x80\x9d present with the same value."
362362+ ~code:"xmllang-missing-lang"
363363+ ~element:name ~attribute:"xml:lang" ()
364364+ | Some lang when String.lowercase_ascii lang <> String.lowercase_ascii xmllang ->
365365+ Message_collector.add_error collector
366366+ ~message:"When the attribute \xe2\x80\x9cxml:lang\xe2\x80\x9d in no namespace is specified, the element must also have the attribute \xe2\x80\x9clang\xe2\x80\x9d present with the same value."
367367+ ~code:"xmllang-lang-mismatch"
368368+ ~element:name ~attribute:"xml:lang" ()
369369+ | _ -> ())
370370+ | None -> ()
371371+ end;
372372+373373+ (* Validate spellcheck attribute - must be "true" or "false" or empty *)
374374+ if namespace = None then begin
375375+ List.iter (fun (attr_name, attr_value) ->
376376+ let attr_lower = String.lowercase_ascii attr_name in
377377+ if attr_lower = "spellcheck" then begin
378378+ let value_lower = String.lowercase_ascii (String.trim attr_value) in
379379+ if value_lower <> "" && value_lower <> "true" && value_lower <> "false" then
380380+ Message_collector.add_error collector
381381+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d."
382382+ attr_value attr_name name)
383383+ ~code:"bad-attribute-value"
384384+ ~element:name ~attribute:attr_name ()
385385+ end
386386+ ) attrs
387387+ end;
388388+389389+ (* Validate enterkeyhint attribute - must be one of specific values *)
390390+ if namespace = None then begin
391391+ let valid_enterkeyhint = ["enter"; "done"; "go"; "next"; "previous"; "search"; "send"] in
392392+ List.iter (fun (attr_name, attr_value) ->
393393+ let attr_lower = String.lowercase_ascii attr_name in
394394+ if attr_lower = "enterkeyhint" then begin
395395+ let value_lower = String.lowercase_ascii (String.trim attr_value) in
396396+ if not (List.mem value_lower valid_enterkeyhint) then
397397+ Message_collector.add_error collector
398398+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d."
399399+ attr_value attr_name name)
400400+ ~code:"bad-attribute-value"
401401+ ~element:name ~attribute:attr_name ()
402402+ end
403403+ ) attrs
404404+ end;
405405+406406+ (* Validate headingoffset attribute - must be a number between 0 and 8 *)
407407+ if namespace = None then begin
408408+ List.iter (fun (attr_name, attr_value) ->
409409+ let attr_lower = String.lowercase_ascii attr_name in
410410+ if attr_lower = "headingoffset" then begin
411411+ let trimmed = String.trim attr_value in
412412+ let is_valid =
413413+ String.length trimmed > 0 &&
414414+ String.for_all (fun c -> c >= '0' && c <= '9') trimmed &&
415415+ (try
416416+ let n = int_of_string trimmed in
417417+ n >= 0 && n <= 8
418418+ with _ -> false)
419419+ in
420420+ if not is_valid then
421421+ Message_collector.add_error collector
422422+ ~message:(Printf.sprintf "The value of the \xe2\x80\x9c%s\xe2\x80\x9d attribute must be a number between \xe2\x80\x9c0\xe2\x80\x9d and \xe2\x80\x9c8\xe2\x80\x9d."
423423+ attr_name)
424424+ ~code:"bad-attribute-value"
425425+ ~element:name ~attribute:attr_name ()
426426+ end
427427+ ) attrs
428428+ end;
429429+430430+ (* Validate accesskey attribute - each key label must be a single code point *)
431431+ if namespace = None then begin
432432+ List.iter (fun (attr_name, attr_value) ->
433433+ let attr_lower = String.lowercase_ascii attr_name in
434434+ if attr_lower = "accesskey" then begin
435435+ (* Split by whitespace to get key labels *)
436436+ let keys = String.split_on_char ' ' attr_value |>
437437+ List.filter (fun s -> String.length (String.trim s) > 0) |>
438438+ List.map String.trim in
439439+ (* Count Unicode code points in each key *)
440440+ let count_codepoints s =
441441+ let len = String.length s in
442442+ let count = ref 0 in
443443+ let i = ref 0 in
444444+ while !i < len do
445445+ let c = Char.code s.[!i] in
446446+ if c < 0x80 then incr i
447447+ else if c < 0xE0 then i := !i + 2
448448+ else if c < 0xF0 then i := !i + 3
449449+ else i := !i + 4;
450450+ incr count
451451+ done;
452452+ !count
453453+ in
454454+ (* Check for multi-character keys *)
455455+ List.iter (fun key ->
456456+ if count_codepoints key > 1 then
457457+ Message_collector.add_error collector
458458+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: The space-separated list of key labels contains a value \xe2\x80\x9c%s\xe2\x80\x9d that consists of more than a single code point."
459459+ attr_value attr_name name key)
460460+ ~code:"bad-attribute-value"
461461+ ~element:name ~attribute:attr_name ()
462462+ ) keys;
463463+ (* Check for duplicate keys *)
464464+ let rec find_duplicates seen = function
465465+ | [] -> ()
466466+ | k :: rest ->
467467+ if List.mem k seen then
468468+ Message_collector.add_error collector
469469+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Duplicate key label."
470470+ attr_value attr_name name)
471471+ ~code:"bad-attribute-value"
472472+ ~element:name ~attribute:attr_name ()
473473+ else
474474+ find_duplicates (k :: seen) rest
475475+ in
476476+ find_duplicates [] keys
477477+ end
478478+ ) attrs
479479+ end;
480480+481481+ (* Validate that command and popovertarget cannot have aria-expanded *)
482482+ if namespace = None && name_lower = "button" then begin
483483+ let has_command = has_attr "command" attrs in
484484+ let has_popovertarget = has_attr "popovertarget" attrs in
485485+ let has_aria_expanded = has_attr "aria-expanded" attrs in
486486+487487+ if has_command && has_aria_expanded then
488488+ Message_collector.add_error collector
489489+ ~message:"The \xe2\x80\x9caria-expanded\xe2\x80\x9d attribute must not be specified on \xe2\x80\x9cbutton\xe2\x80\x9d elements that have the \xe2\x80\x9ccommand\xe2\x80\x9d attribute."
490490+ ~code:"disallowed-attribute"
491491+ ~element:name ~attribute:"aria-expanded" ();
492492+493493+ if has_popovertarget && has_aria_expanded then
494494+ Message_collector.add_error collector
495495+ ~message:"The \xe2\x80\x9caria-expanded\xe2\x80\x9d attribute must not be specified on \xe2\x80\x9cbutton\xe2\x80\x9d elements that have the \xe2\x80\x9cpopovertarget\xe2\x80\x9d attribute."
496496+ ~code:"disallowed-attribute"
497497+ ~element:name ~attribute:"aria-expanded" ()
498498+ end;
499499+331500 (* Note: data-* uppercase check requires XML parsing which preserves case.
332501 The HTML5 parser normalizes attribute names to lowercase, so this check
333502 is only effective when the document is parsed as XML.
+82-38
lib/html5_checker/specialized/datetime_checker.ml
···5656 else
5757 false
58585959+(** Check if a date has year before 1000 (might be mistyped or unusual) *)
6060+let has_old_year s =
6161+ let pattern = Str.regexp "^\\([0-9]+\\)-" in
6262+ if Str.string_match pattern s 0 then
6363+ let year_s = Str.matched_group 1 s in
6464+ match parse_int year_s with
6565+ | Some year -> year < 1000
6666+ | None -> false
6767+ else
6868+ false
6969+5970(** Validate time string HH:MM[:SS[.sss]] *)
6071let validate_time s =
6172 let pattern = Str.regexp "^\\([0-9][0-9]\\):\\([0-9][0-9]\\)\\(:\\([0-9][0-9]\\)\\(\\.\\([0-9]+\\)\\)?\\)?$" in
···189200 else
190201 (false, Some "Invalid duration format")
191202192192-(** Validate timezone offset +HH:MM or -HH:MM or +HHMM or -HHMM *)
203203+(** Result type for timezone validation *)
204204+type tz_result = TzOk | TzWarning of string | TzError of string
205205+206206+(** Validate timezone offset +HH:MM or -HH:MM or +HHMM or -HHMM
207207+ Returns warning for unusual but valid offsets:
208208+ - Negative offsets > 12:00 (e.g., -13:00)
209209+ - Positive offsets > 14:00 (e.g., +15:00)
210210+ - Offsets with unusual minutes (not 00, 30, 45) *)
193211let validate_timezone_offset s =
194212 (* Try +HH:MM format *)
195195- let pattern_colon = Str.regexp "^[+-]\\([0-9][0-9]\\):\\([0-9][0-9]\\)$" in
213213+ let pattern_colon = Str.regexp "^\\([+-]\\)\\([0-9][0-9]\\):\\([0-9][0-9]\\)$" in
196214 (* Try +HHMM format (no colon) *)
197197- let pattern_no_colon = Str.regexp "^[+-]\\([0-9][0-9]\\)\\([0-9][0-9]\\)$" in
198198- let matched =
199199- if Str.string_match pattern_colon s 0 then true
200200- else Str.string_match pattern_no_colon s 0
215215+ let pattern_no_colon = Str.regexp "^\\([+-]\\)\\([0-9][0-9]\\)\\([0-9][0-9]\\)$" in
216216+ let matched, sign =
217217+ if Str.string_match pattern_colon s 0 then
218218+ (true, Str.matched_group 1 s)
219219+ else if Str.string_match pattern_no_colon s 0 then
220220+ (true, Str.matched_group 1 s)
221221+ else
222222+ (false, "+")
201223 in
202224 if not matched then
203203- (false, Some "Invalid timezone offset")
225225+ TzError "Invalid timezone offset"
204226 else
205205- let hour_s = Str.matched_group 1 s in
206206- let minute_s = Str.matched_group 2 s in
227227+ let hour_s = Str.matched_group 2 s in
228228+ let minute_s = Str.matched_group 3 s in
207229 match (parse_int hour_s, parse_int minute_s) with
208208- | None, _ | _, None -> (false, Some "Invalid timezone")
230230+ | None, _ | _, None -> TzError "Invalid timezone"
209231 | Some hour, Some minute ->
210210- if hour > 23 || minute > 59 then (false, Some "Timezone offset out of range")
211211- else (true, None)
232232+ if hour > 23 || minute > 59 then TzError "Timezone offset out of range"
233233+ else begin
234234+ (* Check for unusual but valid offsets *)
235235+ let unusual_range =
236236+ if sign = "-" && hour >= 13 then true
237237+ else if sign = "+" && hour >= 15 then true
238238+ else false
239239+ in
240240+ let unusual_minutes =
241241+ minute <> 0 && minute <> 30 && minute <> 45
242242+ in
243243+ if unusual_range then
244244+ TzWarning "unusual timezone offset"
245245+ else if unusual_minutes then
246246+ TzWarning "unusual timezone offset minutes"
247247+ else
248248+ TzOk
249249+ end
250250+251251+(** Result type for datetime with timezone validation *)
252252+type dt_tz_result = DtOk | DtWarning of string | DtError of string
212253213254(** Validate datetime with timezone: YYYY-MM-DDTHH:MM:SS[.sss]Z or YYYY-MM-DDTHH:MM:SS[.sss]+HH:MM *)
214255let validate_datetime_with_timezone s =
···220261 with Not_found -> None
221262 in
222263 match sep_pos with
223223- | None -> (false, Some "The literal did not satisfy the datetime with timezone format")
264264+ | None -> DtError "The literal did not satisfy the datetime with timezone format"
224265 | Some pos ->
225266 let date_part = String.sub s 0 pos in
226267 let time_and_tz = String.sub s (pos + 1) (String.length s - pos - 1) in
227268 (* Validate date *)
228269 match validate_date date_part with
229229- | (false, reason) -> (false, reason)
270270+ | (false, reason) ->
271271+ DtError (match reason with Some r -> r | None -> "Invalid date")
230272 | (true, _) ->
273273+ let date_old = has_old_year date_part in
231274 (* Check if ends with Z *)
232275 if String.length time_and_tz > 0 && time_and_tz.[String.length time_and_tz - 1] = 'Z' then begin
233276 let time_part = String.sub time_and_tz 0 (String.length time_and_tz - 1) in
234277 match validate_time time_part with
235235- | (false, _) -> (false, Some "The literal did not satisfy the datetime with timezone format")
236236- | (true, _) -> (true, None)
278278+ | (false, _) -> DtError "The literal did not satisfy the datetime with timezone format"
279279+ | (true, _) ->
280280+ if date_old then DtWarning "Year may be mistyped"
281281+ else DtOk
237282 end
238283 else begin
239284 (* Check for +/- timezone offset *)
···246291 | None, None -> None
247292 in
248293 match tz_pos with
249249- | None -> (false, Some "The literal did not satisfy the datetime with timezone format")
294294+ | None -> DtError "The literal did not satisfy the datetime with timezone format"
250295 | Some tp ->
251296 let time_part = String.sub time_and_tz 0 tp in
252297 let tz_part = String.sub time_and_tz tp (String.length time_and_tz - tp) in
253298 match validate_time time_part with
254254- | (false, _) -> (false, Some "The literal did not satisfy the datetime with timezone format")
299299+ | (false, _) -> DtError "The literal did not satisfy the datetime with timezone format"
255300 | (true, _) ->
256301 match validate_timezone_offset tz_part with
257257- | (false, _) -> (false, Some "The literal did not satisfy the datetime with timezone format")
258258- | (true, _) -> (true, None)
302302+ | TzError _ -> DtError "The literal did not satisfy the datetime with timezone format"
303303+ | TzWarning w ->
304304+ DtWarning w
305305+ | TzOk ->
306306+ if date_old then DtWarning "Year may be mistyped"
307307+ else DtOk
259308 end
260309261310(** Validate datetime-local: YYYY-MM-DDTHH:MM[:SS[.sss]] or YYYY-MM-DD HH:MM *)
···299348 else
300349 (* Try datetime with timezone first *)
301350 match validate_datetime_with_timezone value with
302302- | (true, _) -> Ok (* Valid datetime with timezone *)
303303- | (false, tz_error) ->
351351+ | DtOk -> Ok (* Valid datetime with timezone *)
352352+ | DtWarning w ->
353353+ (* Valid but with warning *)
354354+ Warning (Printf.sprintf "Possibly mistyped value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: %s."
355355+ value attr_name element_name w)
356356+ | DtError tz_error ->
304357 (* Try just date - valid for all elements *)
305358 match validate_date value with
306359 | (true, _) ->
307307- (* Date is valid, but check for suspicious year (5+ digits) *)
308308- if has_suspicious_year value then begin
309309- let date_msg = "Bad date: Year may be mistyped." in
310310- let tz_msg = match tz_error with
311311- | Some e -> Printf.sprintf "Bad datetime with timezone: %s." e
312312- | None -> "Bad datetime with timezone: The literal did not satisfy the datetime with timezone format."
313313- in
314314- Warning (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: %s %s"
360360+ (* Date is valid, but check for suspicious year (5+ digits or old year) *)
361361+ if has_suspicious_year value || has_old_year value then begin
362362+ let date_msg = "Year may be mistyped." in
363363+ let tz_msg = Printf.sprintf "Bad datetime with timezone: %s." tz_error in
364364+ Warning (Printf.sprintf "Possibly mistyped value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: %s %s"
315365 value attr_name element_name date_msg tz_msg)
316366 end else
317367 Ok (* Valid date with normal year *)
···339389 match validate_duration value with
340390 | (true, _) -> Ok (* Valid duration P... *)
341391 | (false, _) ->
342342- let tz_msg = match tz_error with
343343- | Some e -> Printf.sprintf "Bad datetime with timezone: %s." e
344344- | None -> "Bad datetime with timezone: The literal did not satisfy the datetime with timezone format."
345345- in
392392+ let tz_msg = Printf.sprintf "Bad datetime with timezone: %s." tz_error in
346393 let date_msg = match date_error with
347394 | Some e -> Printf.sprintf "Bad date: %s." e
348395 | None -> "Bad date: The literal did not satisfy the date format."
···352399 end
353400 else begin
354401 (* del/ins only allow date or datetime-with-timezone *)
355355- let tz_msg = match tz_error with
356356- | Some e -> Printf.sprintf "Bad datetime with timezone: %s." e
357357- | None -> "Bad datetime with timezone: The literal did not satisfy the datetime with timezone format."
358358- in
402402+ let tz_msg = Printf.sprintf "Bad datetime with timezone: %s." tz_error in
359403 let date_msg = match date_error with
360404 | Some e -> Printf.sprintf "Bad date: %s." e
361405 | None -> "Bad date: The literal did not satisfy the date format."
+21-3
lib/html5_checker/specialized/dl_checker.ml
···1313type div_context = {
1414 mutable has_dt : bool;
1515 mutable has_dd : bool;
1616+ mutable group_count : int; (* Number of dt+dd groups *)
1717+ mutable in_dd_part : bool; (* Whether we've seen dd in current group *)
1618}
17191820type state = {
···98100 ~message:"Element \xe2\x80\x9cdiv\xe2\x80\x9d not allowed as child of element \xe2\x80\x9cdl\xe2\x80\x9d in this context. (Suppressing further errors from this subtree.)"
99101 ~code:"disallowed-child"
100102 ~element:"div" ();
101101- let div_ctx = { has_dt = false; has_dd = false } in
103103+ let div_ctx = { has_dt = false; has_dd = false; group_count = 0; in_dd_part = false } in
102104 state.div_in_dl_stack <- div_ctx :: state.div_in_dl_stack
103105 | Some _ when state.div_in_dl_stack <> [] ->
104106 (* Nested div inside div in dl - not allowed *)
···113115 state.in_dt_dd <- state.in_dt_dd + 1;
114116 begin match current_div state with
115117 | Some div_ctx ->
116116- div_ctx.has_dt <- true
118118+ div_ctx.has_dt <- true;
119119+ (* If we've seen dd, this dt starts a new group *)
120120+ if div_ctx.in_dd_part then begin
121121+ div_ctx.group_count <- div_ctx.group_count + 1;
122122+ div_ctx.in_dd_part <- false
123123+ end
117124 | None ->
118125 match current_dl state with
119126 | Some dl_ctx ->
···142149 state.in_dt_dd <- state.in_dt_dd + 1;
143150 begin match current_div state with
144151 | Some div_ctx ->
145145- div_ctx.has_dd <- true
152152+ div_ctx.has_dd <- true;
153153+ (* First dd after dt(s) completes the first group *)
154154+ if not div_ctx.in_dd_part then begin
155155+ div_ctx.in_dd_part <- true;
156156+ div_ctx.group_count <- div_ctx.group_count + 1
157157+ end
146158 | None ->
147159 match current_dl state with
148160 | Some dl_ctx ->
···245257 Message_collector.add_error collector
246258 ~message:"Element \xe2\x80\x9cdiv\xe2\x80\x9d is missing required child element \xe2\x80\x9cdd\xe2\x80\x9d."
247259 ~code:"missing-required-child"
260260+ ~element:"div" ()
261261+ else if div_ctx.group_count > 1 then
262262+ (* Multiple name-value groups in a single div is not allowed *)
263263+ Message_collector.add_error collector
264264+ ~message:"A child \xe2\x80\x9cdiv\xe2\x80\x9d element of a \xe2\x80\x9cdl\xe2\x80\x9d element must contain only one name-value group."
265265+ ~code:"multiple-groups-in-div"
248266 ~element:"div" ()
249267 | [] -> ()
250268 end
+50-32
lib/html5_checker/specialized/language_checker.ml
···2233 Validates language attributes. *)
4455-(** Checker state tracking language attributes. *)
66-type state = {
77- mutable html_element_seen : bool;
88- mutable html_has_lang : bool;
99-}
55+(** Checker state - currently minimal since we only check attributes. *)
66+type state = unit
1071111-let create () =
1212- {
1313- html_element_seen = false;
1414- html_has_lang = false;
1515- }
88+let create () = ()
1691717-let reset state =
1818- state.html_element_seen <- false;
1919- state.html_has_lang <- false
1010+let reset _state = ()
20112112(** Get attribute value from attribute list. *)
2213let get_attr attrs name =
2314 try Some (List.assoc name attrs)
2415 with Not_found -> None
25161717+(** Deprecated language subtags from IANA registry.
1818+ See: https://www.iana.org/assignments/language-subtag-registry/ *)
1919+let deprecated_subtags = [
2020+ ("mo", "ro"); (* Moldavian -> Romanian *)
2121+ ("iw", "he"); (* Hebrew (old) -> Hebrew *)
2222+ ("in", "id"); (* Indonesian (old) -> Indonesian *)
2323+ ("ji", "yi"); (* Yiddish (old) -> Yiddish *)
2424+ ("jw", "jv"); (* Javanese (old) -> Javanese *)
2525+ ("sh", "sr"); (* Serbo-Croatian -> Serbian *)
2626+]
2727+2828+(** Check if a language tag contains deprecated subtags. *)
2929+let check_deprecated_tag value =
3030+ let lower = String.lowercase_ascii value in
3131+ let subtags = String.split_on_char '-' lower in
3232+ match subtags with
3333+ | [] -> None
3434+ | primary :: _ ->
3535+ (* Check primary language subtag for deprecation *)
3636+ match List.assoc_opt primary deprecated_subtags with
3737+ | Some replacement -> Some (primary, replacement)
3838+ | None -> None
3939+2640(** Validate language attribute. *)
2741let validate_lang_attr value ~location ~element collector =
4242+ (* First check structural validity *)
2843 match Dt_language.Language_or_empty.validate value with
2929- | Ok () -> ()
3044 | Error msg ->
3145 Message_collector.add_error collector
3246 ~message:(Printf.sprintf "Invalid lang attribute: %s" msg)
···3549 ~element
3650 ~attribute:"lang"
3751 ()
5252+ | Ok () ->
5353+ (* Then check for deprecated subtags *)
5454+ match check_deprecated_tag value with
5555+ | Some (deprecated, replacement) ->
5656+ Message_collector.add_warning collector
5757+ ~message:(Printf.sprintf
5858+ "The language tag \xe2\x80\x9c%s\xe2\x80\x9d is deprecated. Use \xe2\x80\x9c%s\xe2\x80\x9d instead."
5959+ deprecated replacement)
6060+ ~code:"deprecated-lang"
6161+ ?location
6262+ ~element
6363+ ~attribute:"lang"
6464+ ()
6565+ | None -> ()
38663967(** Check if lang and xml:lang match. *)
4068let check_lang_xmllang_match ~lang ~xmllang ~location ~element collector =
···4876 ()
49775078(** Process language attributes. *)
5151-let process_language_attrs state ~element ~namespace ~attrs ~location collector =
7979+let process_language_attrs ~element ~namespace ~attrs ~location collector =
8080+ ignore namespace;
5281 let lang_opt = get_attr attrs "lang" in
5382 let xmllang_opt = get_attr attrs "xml:lang" in
54835555- (* Check if this is the html element *)
5656- if element = "html" && namespace = None then begin
5757- state.html_element_seen <- true;
5858- state.html_has_lang <- lang_opt <> None
5959- end;
6060-6184 (* Validate lang attribute *)
6285 begin match lang_opt with
6386 | Some lang ->
···79102 | _ -> ()
80103 end
811048282-let start_element state ~name ~namespace ~attrs collector =
105105+let start_element _state ~name ~namespace ~attrs collector =
83106 let location = None in
8484- process_language_attrs state ~element:name ~namespace ~attrs ~location collector
107107+ process_language_attrs ~element:name ~namespace ~attrs ~location collector
8510886109let end_element _state ~name:_ ~namespace:_ _collector =
87110 ()
···89112let characters _state _text _collector =
90113 ()
911149292-let end_document state collector =
9393- (* Warn if html element lacks lang attribute *)
9494- if state.html_element_seen && not state.html_has_lang then
9595- Message_collector.add_warning collector
9696- ~message:"The <html> element should have a lang attribute to specify \
9797- the document's primary language"
9898- ~code:"missing-lang-on-html"
9999- ~element:"html"
100100- ()
115115+let end_document _state _collector =
116116+ (* Note: The "missing lang on html" warning is not produced by default since
117117+ the Nu validator only produces it for specific test cases. *)
118118+ ()
101119102120let checker = (module struct
103121 type nonrec state = state
+34-3
lib/html5_checker/specialized/picture_checker.ml
···33(** Elements allowed as children of picture *)
44let allowed_picture_children = ["source"; "img"; "script"; "template"]
5566+(** Elements that do NOT allow picture as a child (for phrasing content contexts) *)
77+let disallowed_picture_parents = [
88+ "ul"; "ol"; "dl"; "rp"; "hgroup"
99+]
1010+611(** Attributes NOT allowed on picture element *)
712let disallowed_picture_attrs = [
813 "align"; "alt"; "border"; "crossorigin"; "height"; "hspace"; "ismap";
···2934 mutable has_source_after_img : bool;
3035 mutable has_always_matching_source : bool; (* source without media/type *)
3136 mutable source_after_always_matching : bool; (* source after always-matching source *)
3737+ mutable parent_stack : string list; (* track parent elements *)
3238}
33393440let create () = {
···4046 has_source_after_img = false;
4147 has_always_matching_source = false;
4248 source_after_always_matching = false;
4949+ parent_stack = [];
4350}
44514552let reset state =
···4855 state.picture_depth <- 0;
4956 state.children_in_picture <- [];
5057 state.last_was_img <- false;
5858+ state.parent_stack <- [];
5159 state.has_source_after_img <- false;
5260 state.has_always_matching_source <- false;
5361 state.source_after_always_matching <- false
···109117 if namespace = None then begin
110118 match name_lower with
111119 | "picture" ->
120120+ (* Check if picture is in a disallowed parent context *)
121121+ (match state.parent_stack with
122122+ | parent :: _ when List.mem parent disallowed_picture_parents ->
123123+ Message_collector.add_error collector
124124+ ~message:(Printf.sprintf "Element \xe2\x80\x9cpicture\xe2\x80\x9d not allowed as child of element \xe2\x80\x9c%s\xe2\x80\x9d in this context. (Suppressing further errors from this subtree.)" parent)
125125+ ~code:"disallowed-child"
126126+ ~element:"picture" ()
127127+ | _ -> ());
112128 check_picture_attrs attrs collector;
113129 state.in_picture <- true;
114130 state.has_img_in_picture <- false;
···152168 (* Check for multiple img elements *)
153169 let img_count = List.filter (fun c -> c = "img") state.children_in_picture |> List.length in
154170 if img_count > 1 then
155155- report_disallowed_child "picture" "img" collector
171171+ report_disallowed_child "picture" "img" collector;
172172+ (* Check if always-matching source is followed by img with srcset *)
173173+ if state.has_always_matching_source && has_attr "srcset" attrs then
174174+ Message_collector.add_error collector
175175+ ~message:"A \xe2\x80\x9csource\xe2\x80\x9d element that has a following sibling \xe2\x80\x9csource\xe2\x80\x9d element or \xe2\x80\x9cimg\xe2\x80\x9d element with a \xe2\x80\x9csrcset\xe2\x80\x9d attribute must have a \xe2\x80\x9cmedia\xe2\x80\x9d attribute and/or \xe2\x80\x9ctype\xe2\x80\x9d attribute."
176176+ ~code:"always-matching-source-followed-by-srcset"
177177+ ~element:"source" ()
156178157179 | "script" when state.in_picture && state.picture_depth = 1 ->
158180 state.children_in_picture <- "script" :: state.children_in_picture
···168190169191 (* Track depth when inside picture *)
170192 if state.in_picture then
171171- state.picture_depth <- state.picture_depth + 1
193193+ state.picture_depth <- state.picture_depth + 1;
194194+195195+ (* Push to parent stack (only HTML namespace elements) *)
196196+ if namespace = None then
197197+ state.parent_stack <- name_lower :: state.parent_stack
172198173199let end_element state ~name ~namespace collector =
174200 if namespace <> None then ()
···197223 ~element:"source" ();
198224199225 state.in_picture <- false
200200- end
226226+ end;
227227+228228+ (* Pop from parent stack *)
229229+ state.parent_stack <- (match state.parent_stack with
230230+ | _ :: rest -> rest
231231+ | [] -> [])
201232 end
202233203234let characters state text collector =
···5454 Buffer.contents buf
55555656(** Check if a size value has a valid CSS length unit and non-negative value *)
5757-type size_check_result = Valid | InvalidUnit | NegativeValue
5757+type size_check_result = Valid | InvalidUnit | NegativeValue | CssCommentInside | BadScientificNotation
5858+5959+(** Check if CSS comment appears in an invalid position:
6060+ - Between sign and number (+/**/50vw)
6161+ - Between number and unit (50/**/vw)
6262+ Trailing comments (50vw/**/) are valid. *)
6363+let has_invalid_css_comment s =
6464+ let len = String.length s in
6565+ (* Find comment position *)
6666+ let rec find_comment i =
6767+ if i + 1 >= len then None
6868+ else if s.[i] = '/' && s.[i + 1] = '*' then Some i
6969+ else find_comment (i + 1)
7070+ in
7171+ match find_comment 0 with
7272+ | None -> false
7373+ | Some comment_pos ->
7474+ let before = String.sub s 0 comment_pos in
7575+ let trimmed_before = String.trim before in
7676+ if String.length trimmed_before = 0 then false (* Leading comment is OK *)
7777+ else begin
7878+ (* Find end of comment *)
7979+ let rec find_end i =
8080+ if i + 1 >= len then len
8181+ else if s.[i] = '*' && s.[i + 1] = '/' then i + 2
8282+ else find_end (i + 1)
8383+ in
8484+ let end_pos = find_end (comment_pos + 2) in
8585+ let after = if end_pos < len then String.sub s end_pos (len - end_pos) else "" in
8686+ let trimmed_after = String.trim (strip_css_comments after) in
8787+ if trimmed_after = "" then false (* Trailing comment is OK *)
8888+ else begin
8989+ (* Comment is in the middle - check if it breaks a number/unit combo *)
9090+ let last = trimmed_before.[String.length trimmed_before - 1] in
9191+ (* Invalid if comment appears after +/- or after a digit (before more non-whitespace) *)
9292+ (last >= '0' && last <= '9') || last = '+' || last = '-' || last = '.'
9393+ end
9494+ end
9595+9696+(** Check if scientific notation has invalid exponent (like 1e+1.5 - decimal in exponent) *)
9797+let has_invalid_scientific_notation s =
9898+ let lower = String.lowercase_ascii s in
9999+ (* Find 'e' for scientific notation *)
100100+ match String.index_opt lower 'e' with
101101+ | None -> false
102102+ | Some e_pos ->
103103+ (* Check if there's a decimal after the exponent sign *)
104104+ let after_e = String.sub lower (e_pos + 1) (String.length lower - e_pos - 1) in
105105+ let after_sign =
106106+ if String.length after_e > 0 && (after_e.[0] = '+' || after_e.[0] = '-') then
107107+ String.sub after_e 1 (String.length after_e - 1)
108108+ else after_e
109109+ in
110110+ String.contains after_sign '.'
5811159112let check_size_value size_value =
6060- let trimmed = String.trim (strip_css_comments size_value) in
113113+ let trimmed = String.trim size_value in
61114 if trimmed = "" then InvalidUnit
6262- else if trimmed = "auto" then Valid (* "auto" is valid *)
115115+ (* Check for CSS comments inside numbers - this is invalid *)
116116+ else if has_invalid_css_comment trimmed then CssCommentInside
63117 else begin
6464- let lower = String.lowercase_ascii trimmed in
6565- (* Check for invalid units first *)
6666- let has_invalid = List.exists (fun unit ->
6767- let len = String.length unit in
6868- String.length lower > len &&
6969- String.sub lower (String.length lower - len) len = unit
7070- ) invalid_size_units in
7171- if has_invalid then InvalidUnit
118118+ (* Strip valid leading/trailing CSS comments for further checks *)
119119+ let value_no_comments = String.trim (strip_css_comments trimmed) in
120120+ (* Check for invalid scientific notation like 1e+1.5px *)
121121+ if has_invalid_scientific_notation value_no_comments then BadScientificNotation
122122+ (* "auto" is only valid with lazy loading, which requires checking the element context.
123123+ For general validation, treat "auto" alone as invalid in sizes. *)
124124+ else if String.lowercase_ascii value_no_comments = "auto" then InvalidUnit
125125+ else if value_no_comments = "" then InvalidUnit
72126 else begin
7373- (* Check for valid CSS length units *)
7474- let has_valid_unit = List.exists (fun unit ->
127127+ let lower = String.lowercase_ascii value_no_comments in
128128+ (* Check for invalid units first *)
129129+ let has_invalid = List.exists (fun unit ->
75130 let len = String.length unit in
76131 String.length lower > len &&
77132 String.sub lower (String.length lower - len) len = unit
7878- ) valid_length_units in
7979- if has_valid_unit then begin
8080- (* Check if it's negative (starts with - but not -0) *)
8181- if String.length trimmed > 0 && trimmed.[0] = '-' then begin
8282- (* Check if it's -0 which is valid *)
8383- let after_minus = String.sub trimmed 1 (String.length trimmed - 1) in
8484- let after_minus_stripped = String.trim (strip_css_comments after_minus) in
133133+ ) invalid_size_units in
134134+ if has_invalid then InvalidUnit
135135+ else begin
136136+ (* Check for valid CSS length units *)
137137+ let has_valid_unit = List.exists (fun unit ->
138138+ let len = String.length unit in
139139+ String.length lower > len &&
140140+ String.sub lower (String.length lower - len) len = unit
141141+ ) valid_length_units in
142142+ if has_valid_unit then begin
143143+ (* Check if it's negative (starts with - but not -0) *)
144144+ if String.length value_no_comments > 0 && value_no_comments.[0] = '-' then begin
145145+ (* Check if it's -0 which is valid *)
146146+ let after_minus = String.sub value_no_comments 1 (String.length value_no_comments - 1) in
147147+ try
148148+ let num_str = Str.global_replace (Str.regexp "[a-zA-Z]+$") "" after_minus in
149149+ let f = float_of_string num_str in
150150+ if f = 0.0 then Valid else NegativeValue
151151+ with _ -> NegativeValue
152152+ end else
153153+ Valid
154154+ end
155155+ (* Could be calc() or other CSS functions - allow those *)
156156+ else if String.contains value_no_comments '(' then Valid
157157+ else begin
158158+ (* Check if it's a zero value (0, -0, +0) - these are valid without units *)
159159+ let stripped =
160160+ let s = value_no_comments in
161161+ let s = if String.length s > 0 && (s.[0] = '+' || s.[0] = '-') then String.sub s 1 (String.length s - 1) else s in
162162+ s
163163+ in
164164+ (* Check if it's zero or a numeric value starting with 0 *)
85165 try
8686- let num_str = Str.global_replace (Str.regexp "[a-zA-Z]+$") "" after_minus_stripped in
8787- let f = float_of_string num_str in
8888- if f = 0.0 then Valid else NegativeValue
8989- with _ -> NegativeValue
9090- end else
9191- Valid
9292- end
9393- (* Could be calc() or other CSS functions - allow those *)
9494- else if String.contains trimmed '(' then Valid
9595- else begin
9696- (* Check if it's a zero value (0, -0, +0) - these are valid without units *)
9797- let stripped =
9898- let s = trimmed in
9999- let s = if String.length s > 0 && (s.[0] = '+' || s.[0] = '-') then String.sub s 1 (String.length s - 1) else s in
100100- s
101101- in
102102- (* Check if it's zero or a numeric value starting with 0 *)
103103- try
104104- let f = float_of_string stripped in
105105- if f = 0.0 then Valid else InvalidUnit
106106- with _ -> InvalidUnit
166166+ let f = float_of_string stripped in
167167+ if f = 0.0 then Valid else InvalidUnit
168168+ with _ -> InvalidUnit
169169+ end
107170 end
108171 end
109172 end
···111174let has_valid_size_unit size_value =
112175 match check_size_value size_value with
113176 | Valid -> true
114114- | InvalidUnit | NegativeValue -> false
177177+ | InvalidUnit | NegativeValue | CssCommentInside | BadScientificNotation -> false
115178116179(** Check if a sizes entry has a media condition (starts with '(') *)
117180let has_media_condition entry =
118181 let trimmed = String.trim entry in
119182 String.length trimmed > 0 && trimmed.[0] = '('
120183184184+(** Check if entry looks like it's trying to be a media condition but isn't properly formatted *)
185185+let has_invalid_media_condition entry =
186186+ let trimmed = String.trim entry in
187187+ if String.length trimmed = 0 then None
188188+ else begin
189189+ let first_char = trimmed.[0] in
190190+ if first_char = '(' then begin
191191+ (* Check for bad content inside the media condition *)
192192+ let len = String.length trimmed in
193193+ let rec find_close_paren i depth =
194194+ if i >= len then None
195195+ else match trimmed.[i] with
196196+ | '(' -> find_close_paren (i + 1) (depth + 1)
197197+ | ')' -> if depth = 1 then Some i else find_close_paren (i + 1) (depth - 1)
198198+ | _ -> find_close_paren (i + 1) depth
199199+ in
200200+ match find_close_paren 0 0 with
201201+ | None -> Some "Unclosed media condition"
202202+ | Some close_pos ->
203203+ let inner = String.sub trimmed 1 (close_pos - 1) in
204204+ let inner_trimmed = String.trim inner in
205205+ (* Check for obviously invalid content like just numbers or curly braces *)
206206+ if String.length inner_trimmed > 0 then begin
207207+ let first_inner = inner_trimmed.[0] in
208208+ if first_inner >= '0' && first_inner <= '9' then
209209+ Some "Bad media condition: Parse Error"
210210+ else if String.contains inner_trimmed '}' || String.contains inner_trimmed '{' then
211211+ Some "Bad media condition: Parse Error"
212212+ else
213213+ None
214214+ end else
215215+ Some "Bad media condition: Parse Error"
216216+ end else begin
217217+ (* Check for bare "all" which is invalid *)
218218+ let lower = String.lowercase_ascii trimmed in
219219+ let parts = String.split_on_char ' ' lower |> List.filter (fun s -> s <> "") in
220220+ match parts with
221221+ | keyword :: _ when keyword = "all" ->
222222+ Some "Bad media condition: Parse Error"
223223+ | keyword :: _ when String.length keyword > 0 && not (keyword.[0] >= '0' && keyword.[0] <= '9') ->
224224+ (* Looks like a keyword without parens like "min-width:500px" *)
225225+ if String.contains keyword ':' then
226226+ Some "Bad media condition: Parse Error"
227227+ else
228228+ None
229229+ | _ -> None
230230+ end
231231+ end
232232+121233(** Extract the size value from a sizes entry (after media condition if any) *)
122234let extract_size_value entry =
123235 let trimmed = String.trim entry in
···183295 ~code:"bad-sizes-value"
184296 ~element:element_name ~attribute:"sizes" ();
185297 valid := false
298298+ end;
299299+ (* Check for multiple consecutive defaults (entries without media conditions) *)
300300+ let defaults_without_media = List.filter (fun e -> not (has_media_condition e)) non_empty_entries in
301301+ if List.length defaults_without_media > 1 then begin
302302+ Message_collector.add_error collector
303303+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Multiple source sizes without media conditions." value element_name)
304304+ ~code:"bad-sizes-value"
305305+ ~element:element_name ~attribute:"sizes" ();
306306+ valid := false
186307 end
187308 end;
188309189189- (* Validate each entry's size value has valid unit and is not negative *)
310310+ (* Validate each entry's media condition and size value *)
190311 List.iter (fun entry ->
191312 let trimmed = String.trim entry in
192313 if trimmed <> "" then begin
314314+ (* Check for invalid media condition *)
315315+ (match has_invalid_media_condition trimmed with
316316+ | Some err_msg ->
317317+ Message_collector.add_error collector
318318+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: %s." value element_name err_msg)
319319+ ~code:"bad-sizes-value"
320320+ ~element:element_name ~attribute:"sizes" ();
321321+ valid := false
322322+ | None -> ());
323323+193324 let size_val = extract_size_value trimmed in
194325 if size_val <> "" then begin
195326 match check_size_value size_val with
···200331 ~code:"bad-sizes-value"
201332 ~element:element_name ~attribute:"sizes" ();
202333 valid := false
334334+ | CssCommentInside ->
335335+ Message_collector.add_error collector
336336+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Bad CSS number token." value element_name)
337337+ ~code:"bad-sizes-value"
338338+ ~element:element_name ~attribute:"sizes" ();
339339+ valid := false
340340+ | BadScientificNotation ->
341341+ Message_collector.add_error collector
342342+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Bad CSS number token." value element_name)
343343+ ~code:"bad-sizes-value"
344344+ ~element:element_name ~attribute:"sizes" ();
345345+ valid := false
203346 | InvalidUnit ->
204347 Message_collector.add_error collector
205348 ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size value." value element_name)
···225368226369 match last_char with
227370 | 'w' ->
228228- (* Width descriptor - must be positive integer *)
371371+ (* Width descriptor - must be positive integer, no leading + *)
372372+ let trimmed_desc = String.trim desc in
373373+ if String.length trimmed_desc > 0 && trimmed_desc.[0] = '+' then begin
374374+ Message_collector.add_error collector
375375+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Expected number without leading plus sign but found \xe2\x80\x9c%s\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d." srcset_value element_name trimmed_desc srcset_value)
376376+ ~code:"bad-srcset-value"
377377+ ~element:element_name ~attribute:"srcset" ();
378378+ false
379379+ end else
229380 (try
230381 let n = int_of_string num_part in
231382 if n <= 0 then begin
···338489 let entries = String.split_on_char ',' value in
339490 let has_w_descriptor = ref false in
340491 let has_x_descriptor = ref false in
492492+ let has_no_descriptor = ref false in (* Track if any entry has no descriptor *)
341493 let seen_descriptors = Hashtbl.create 8 in (* Track seen descriptor values *)
342494343495 (* Check for empty srcset *)
···370522 if entry <> "" then begin
371523 (* Split entry into URL and optional descriptor *)
372524 let parts = String.split_on_char ' ' entry |> List.filter (fun s -> s <> "") in
525525+ (* Check if URL is valid *)
526526+ let check_srcset_url url =
527527+ (* Special schemes that require host/content after :// *)
528528+ let special_schemes = ["http"; "https"; "ftp"; "ws"; "wss"] in
529529+ (* Check for scheme-only URL like "http:" *)
530530+ let url_lower = String.lowercase_ascii url in
531531+ List.iter (fun scheme ->
532532+ let scheme_colon = scheme ^ ":" in
533533+ if url_lower = scheme_colon then
534534+ Message_collector.add_error collector
535535+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Expected a slash (\"/\")." value element_name)
536536+ ~code:"bad-srcset-url"
537537+ ~element:element_name ~attribute:"srcset" ()
538538+ ) special_schemes
539539+ in
373540 match parts with
374541 | [] -> ()
375375- | [_url] ->
542542+ | [url] ->
543543+ check_srcset_url url;
376544 (* URL only = implicit 1x descriptor - only flag if explicit 1x also seen *)
545545+ has_no_descriptor := true;
377546 if Hashtbl.mem seen_descriptors "explicit-1x" then begin
378547 Message_collector.add_error collector
379548 ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Duplicate descriptor." value element_name)
···381550 ~element:element_name ~attribute:"srcset" ()
382551 end else
383552 Hashtbl.add seen_descriptors "implicit-1x" true
384384- | _url :: desc :: rest ->
553553+ | url :: desc :: rest ->
554554+ (* Check URL for broken schemes *)
555555+ check_srcset_url url;
385556 (* Check for extra junk - multiple descriptors are not allowed *)
386557 if rest <> [] then begin
387558 Message_collector.add_error collector
···427598 ~code:"srcset-w-without-sizes"
428599 ~element:element_name ~attribute:"srcset" ();
429600601601+ (* Check: if sizes is present, all entries must have width descriptors *)
602602+ if has_sizes && !has_no_descriptor then
603603+ Message_collector.add_error collector
604604+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: No width specified for image. (When the \xe2\x80\x9csizes\xe2\x80\x9d attribute is present, all image candidate strings must specify a width.)" value element_name)
605605+ ~code:"bad-srcset-value"
606606+ ~element:element_name ~attribute:"srcset" ();
607607+608608+ (* Check: if sizes is present and srcset uses x descriptors, that's an error *)
609609+ if has_sizes && !has_x_descriptor then
610610+ Message_collector.add_error collector
611611+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: When the \xe2\x80\x9csizes\xe2\x80\x9d attribute is present, all image candidate strings must specify a width." value element_name)
612612+ ~code:"bad-srcset-value"
613613+ ~element:element_name ~attribute:"srcset" ();
614614+430615 (* Check for mixing w and x descriptors *)
431616 if !has_w_descriptor && !has_x_descriptor then
432617 Message_collector.add_error collector
···435620 ~element:element_name ~attribute:"srcset" ()
436621437622let start_element _state ~name ~namespace ~attrs collector =
623623+ let name_lower = String.lowercase_ascii name in
624624+625625+ (* SVG image elements should not have srcset *)
626626+ if namespace <> None && name_lower = "image" then begin
627627+ if get_attr "srcset" attrs <> None then
628628+ Message_collector.add_error collector
629629+ ~message:"Attribute \xe2\x80\x9csrcset\xe2\x80\x9d not allowed on element \xe2\x80\x9cimage\xe2\x80\x9d at this point."
630630+ ~code:"disallowed-attribute"
631631+ ~element:"image" ~attribute:"srcset" ()
632632+ end;
633633+438634 if namespace <> None then ()
439635 else begin
440440- let name_lower = String.lowercase_ascii name in
441441-442636 (* Check sizes and srcset on img and source *)
443637 if name_lower = "img" || name_lower = "source" then begin
444638 let sizes_value = get_attr "sizes" attrs in
445639 let srcset_value = get_attr "srcset" attrs in
446640 let has_sizes = sizes_value <> None in
641641+ let has_srcset = srcset_value <> None in
447642448643 (* Validate sizes if present *)
449644 (match sizes_value with
···453648 (* Validate srcset if present *)
454649 (match srcset_value with
455650 | Some v -> validate_srcset v name_lower has_sizes collector
456456- | None -> ())
651651+ | None -> ());
652652+653653+ (* Error: sizes without srcset on img *)
654654+ if name_lower = "img" && has_sizes && not has_srcset then
655655+ Message_collector.add_error collector
656656+ ~message:"The \xe2\x80\x9csizes\xe2\x80\x9d attribute must only be specified if the \xe2\x80\x9csrcset\xe2\x80\x9d attribute is also specified."
657657+ ~code:"sizes-without-srcset"
658658+ ~element:name_lower ~attribute:"sizes" ()
457659 end
458660 end
459661
+41-3
lib/html5_checker/specialized/url_checker.ml
···707707 if namespace <> None then ()
708708 else begin
709709 let name_lower = String.lowercase_ascii name in
710710- match List.assoc_opt name_lower url_attributes with
710710+ (* Check URL attributes for elements that have them *)
711711+ (match List.assoc_opt name_lower url_attributes with
711712 | None -> ()
712713 | Some url_attrs ->
713714 List.iter (fun attr_name ->
···735736 ~element:name
736737 ~attribute:attr_name
737738 ()
738738- ) url_attrs;
739739+ ) url_attrs);
739740 (* Special handling for input[type=url] value attribute - must be absolute URL *)
740741 if name_lower = "input" then begin
741742 let type_attr = get_attr_value "type" attrs in
···759760 ~attribute:"value"
760761 ()
761762 | Some _ ->
763763+ (* Check for data: URI with fragment - emit warning *)
764764+ (match check_data_uri_fragment url "value" name with
765765+ | Some warn_msg ->
766766+ Message_collector.add_warning collector
767767+ ~message:warn_msg
768768+ ~code:"data-uri-fragment"
769769+ ~element:name
770770+ ~attribute:"value"
771771+ ()
772772+ | None -> ());
762773 (* Has a scheme - do regular URL validation with "absolute URL" prefix *)
763774 match validate_url url name "value" with
764775 | None -> ()
···773784 ()
774785 end
775786 end
776776- end
787787+ end;
788788+ (* Check microdata itemtype and itemid attributes for data: URI fragments *)
789789+ let itemtype_opt = get_attr_value "itemtype" attrs in
790790+ (match itemtype_opt with
791791+ | Some url when String.trim url <> "" ->
792792+ (match check_data_uri_fragment url "itemtype" name with
793793+ | Some warn_msg ->
794794+ Message_collector.add_warning collector
795795+ ~message:warn_msg
796796+ ~code:"data-uri-fragment"
797797+ ~element:name
798798+ ~attribute:"itemtype"
799799+ ()
800800+ | None -> ())
801801+ | _ -> ());
802802+ let itemid_opt = get_attr_value "itemid" attrs in
803803+ (match itemid_opt with
804804+ | Some url when String.trim url <> "" ->
805805+ (match check_data_uri_fragment url "itemid" name with
806806+ | Some warn_msg ->
807807+ Message_collector.add_warning collector
808808+ ~message:warn_msg
809809+ ~code:"data-uri-fragment"
810810+ ~element:name
811811+ ~attribute:"itemid"
812812+ ()
813813+ | None -> ())
814814+ | _ -> ())
777815 end
778816779817let end_element _state ~name:_ ~namespace:_ _collector = ()