···11-(** Media query validation - simplified implementation *)
11+(** Media query validation - strict implementation for HTML5 conformance *)
2233-(** Media types *)
44-let media_types =
33+(** Valid media types per Media Queries Level 4 spec *)
44+let valid_media_types =
55 [
66 "all";
77 "screen";
88 "print";
99 "speech";
1010+ ]
1111+1212+(** Deprecated media types that should trigger an error *)
1313+let deprecated_media_types =
1414+ [
1015 "aural";
1116 "braille";
1217 "handheld";
···1621 "embossed";
1722 ]
18231919-(** Media query keywords *)
2020-let media_keywords = [ "and"; "or"; "not"; "only" ]
2424+(** Deprecated media features that should trigger an error *)
2525+let deprecated_media_features =
2626+ [
2727+ "device-width";
2828+ "device-height";
2929+ "device-aspect-ratio";
3030+ ]
3131+3232+(** Valid media features *)
3333+let valid_media_features =
3434+ [
3535+ (* Dimensions *)
3636+ "width"; "min-width"; "max-width";
3737+ "height"; "min-height"; "max-height";
3838+ "aspect-ratio"; "min-aspect-ratio"; "max-aspect-ratio";
3939+ (* Display quality *)
4040+ "resolution"; "min-resolution"; "max-resolution";
4141+ "scan"; "grid"; "update"; "overflow-block"; "overflow-inline";
4242+ (* Color *)
4343+ "color"; "min-color"; "max-color";
4444+ "color-index"; "min-color-index"; "max-color-index";
4545+ "monochrome"; "min-monochrome"; "max-monochrome";
4646+ "color-gamut";
4747+ (* Interaction *)
4848+ "pointer"; "any-pointer"; "hover"; "any-hover";
4949+ (* Scripting *)
5050+ "scripting";
5151+ (* Light/dark *)
5252+ "prefers-color-scheme"; "prefers-contrast"; "prefers-reduced-motion";
5353+ "prefers-reduced-transparency";
5454+ (* Display mode *)
5555+ "display-mode";
5656+ (* Inverted colors *)
5757+ "inverted-colors";
5858+ (* Forced colors *)
5959+ "forced-colors";
6060+ (* Orientation *)
6161+ "orientation";
6262+ ]
6363+6464+(** Valid length units *)
6565+let valid_length_units = ["px"; "em"; "rem"; "vh"; "vw"; "vmin"; "vmax"; "cm"; "mm"; "in"; "pt"; "pc"; "ch"; "ex"]
6666+6767+(** Valid resolution units *)
6868+let valid_resolution_units = ["dpi"; "dpcm"; "dppx"; "x"]
6969+7070+(** Media query keywords (unused but kept for documentation) *)
7171+let _media_keywords = [ "and"; "not"; "only" ]
21722273(** Check if character is whitespace *)
2374let is_whitespace c = c = ' ' || c = '\t' || c = '\n' || c = '\r'
···3283let is_ident_char c =
3384 is_ident_start c || (c >= '0' && c <= '9')
34858686+(** Unicode case-fold for Turkish dotted-I etc *)
8787+let lowercase_unicode s =
8888+ (* Handle special case: U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE -> i *)
8989+ let buf = Buffer.create (String.length s) in
9090+ let i = ref 0 in
9191+ while !i < String.length s do
9292+ let c = s.[!i] in
9393+ if c = '\xc4' && !i + 1 < String.length s && s.[!i + 1] = '\xb0' then begin
9494+ (* U+0130 -> 'i' + U+0307 (combining dot above), but for simplicity just 'i' followed by U+0307 *)
9595+ Buffer.add_string buf "i\xcc\x87";
9696+ i := !i + 2
9797+ end else begin
9898+ Buffer.add_char buf (Char.lowercase_ascii c);
9999+ incr i
100100+ end
101101+ done;
102102+ Buffer.contents buf
103103+35104(** Check balanced parentheses *)
36105let check_balanced_parens s =
37106 let rec check depth i =
38107 if i >= String.length s then
39108 if depth = 0 then Ok ()
4040- else Error "Unbalanced parentheses: unclosed '('"
109109+ else Error "Parse Error."
41110 else
42111 let c = s.[i] in
43112 match c with
44113 | '(' -> check (depth + 1) (i + 1)
45114 | ')' ->
4646- if depth = 0 then Error "Unbalanced parentheses: unexpected ')'"
115115+ if depth = 0 then Error "Parse Error."
47116 else check (depth - 1) (i + 1)
48117 | _ -> check depth (i + 1)
49118 in
50119 check 0 0
511205252-(** Extract words (identifiers and keywords) from media query *)
5353-let extract_words s =
5454- let words = ref [] in
5555- let buf = Buffer.create 16 in
5656- let in_parens = ref 0 in
121121+(** Strict media query validation *)
122122+let rec validate_media_query_strict s =
123123+ let s = String.trim s in
124124+ if String.length s = 0 then Error "Parse Error."
125125+ else begin
126126+ (* Check for empty commas *)
127127+ if s = "," then Error "Parse Error."
128128+ else if String.length s > 0 && s.[0] = ',' then Error "Parse Error."
129129+ else if String.length s > 0 && s.[String.length s - 1] = ',' then Error "Parse Error."
130130+ else if String.contains s ',' then begin
131131+ (* Check for empty queries between commas *)
132132+ let parts = String.split_on_char ',' s in
133133+ if List.exists (fun p -> String.trim p = "") parts then Error "Parse Error."
134134+ else begin
135135+ (* Validate each media query in the list *)
136136+ let rec validate_all = function
137137+ | [] -> Ok ()
138138+ | part :: rest ->
139139+ match validate_media_query_strict (String.trim part) with
140140+ | Ok () -> validate_all rest
141141+ | Error e -> Error e
142142+ in
143143+ validate_all parts
144144+ end
145145+ end else begin
146146+ (* Single media query *)
147147+ match check_balanced_parens s with
148148+ | Error e -> Error e
149149+ | Ok () ->
150150+ (* Check for "and" or "and(" at end *)
151151+ let trimmed = String.trim s in
152152+ if String.length trimmed >= 3 then begin
153153+ let suffix = String.sub trimmed (String.length trimmed - 3) 3 in
154154+ if String.lowercase_ascii suffix = "and" then
155155+ Error "Parse Error."
156156+ else if String.length trimmed >= 4 then begin
157157+ let suffix4 = String.sub trimmed (String.length trimmed - 4) 4 in
158158+ if String.lowercase_ascii suffix4 = "and(" then
159159+ Error "Parse Error."
160160+ else
161161+ validate_media_query_content trimmed
162162+ end else
163163+ validate_media_query_content trimmed
164164+ end else
165165+ validate_media_query_content trimmed
166166+ end
167167+ end
571685858- for i = 0 to String.length s - 1 do
5959- let c = s.[i] in
6060- match c with
6161- | '(' ->
6262- if Buffer.length buf > 0 then (
6363- words := Buffer.contents buf :: !words;
6464- Buffer.clear buf);
6565- incr in_parens
6666- | ')' ->
6767- if Buffer.length buf > 0 then (
6868- words := Buffer.contents buf :: !words;
6969- Buffer.clear buf);
7070- decr in_parens
7171- | _ ->
7272- if !in_parens = 0 then
7373- if is_ident_char c then Buffer.add_char buf c
7474- else if is_whitespace c then
7575- if Buffer.length buf > 0 then (
7676- words := Buffer.contents buf :: !words;
7777- Buffer.clear buf)
7878- else ()
7979- else if Buffer.length buf > 0 then (
8080- words := Buffer.contents buf :: !words;
8181- Buffer.clear buf)
8282- done;
169169+and validate_media_query_content s =
170170+ (* Parse into tokens *)
171171+ let len = String.length s in
172172+ let i = ref 0 in
173173+ let skip_ws () = while !i < len && is_whitespace s.[!i] do incr i done in
174174+175175+ let read_ident () =
176176+ let start = !i in
177177+ while !i < len && is_ident_char s.[!i] do incr i done;
178178+ if !i > start then Some (String.sub s start (!i - start))
179179+ else None
180180+ in
181181+182182+ let read_paren_content () =
183183+ (* Read until matching ) *)
184184+ let start = !i in
185185+ let depth = ref 1 in
186186+ incr i; (* skip opening ( *)
187187+ while !i < len && !depth > 0 do
188188+ if s.[!i] = '(' then incr depth
189189+ else if s.[!i] = ')' then decr depth;
190190+ incr i
191191+ done;
192192+ String.sub s (start + 1) (!i - start - 2)
193193+ in
831948484- if Buffer.length buf > 0 then words := Buffer.contents buf :: !words;
8585- List.rev !words
195195+ (* Parse the query *)
196196+ skip_ws ();
197197+ if !i >= len then Error "Parse Error."
198198+ else begin
199199+ (* Check for only/not prefix *)
200200+ let has_only = ref false in
201201+ let has_not = ref false in
202202+ (match read_ident () with
203203+ | Some w ->
204204+ let w_lower = String.lowercase_ascii w in
205205+ if w_lower = "only" then (has_only := true; skip_ws ())
206206+ else if w_lower = "not" then (has_not := true; skip_ws ())
207207+ else i := !i - String.length w (* put back *)
208208+ | None -> ());
862098787-(** Validate media query structure *)
210210+ skip_ws ();
211211+ if !i >= len then begin
212212+ if !has_only || !has_not then Error "Parse Error."
213213+ else Error "Parse Error."
214214+ end else begin
215215+ (* Check for media type or ( *)
216216+ if s.[!i] = '(' then begin
217217+ (* Media feature only *)
218218+ let content = read_paren_content () in
219219+ validate_media_feature content
220220+ end else begin
221221+ (* Expect media type *)
222222+ match read_ident () with
223223+ | None -> Error "Parse Error."
224224+ | Some media_type ->
225225+ let mt_lower = lowercase_unicode media_type in
226226+ (* Check for deprecated media type *)
227227+ if List.mem mt_lower deprecated_media_types then
228228+ Error (Printf.sprintf "The media \"%s\" has been deprecated" mt_lower)
229229+ (* Check if valid media type *)
230230+ else if not (List.mem mt_lower valid_media_types) then
231231+ Error (Printf.sprintf "unrecognized media \"%s\"." mt_lower)
232232+ else begin
233233+ skip_ws ();
234234+ if !i >= len then Ok ()
235235+ else begin
236236+ (* Check for "and" - must be followed by whitespace *)
237237+ let and_start = !i in
238238+ match read_ident () with
239239+ | None -> Error "Parse Error."
240240+ | Some kw ->
241241+ let kw_lower = String.lowercase_ascii kw in
242242+ if kw_lower <> "and" then Error "Parse Error."
243243+ else begin
244244+ (* Check that there was whitespace before 'and' *)
245245+ if and_start > 0 && not (is_whitespace s.[and_start - 1]) then
246246+ Error "Parse Error."
247247+ (* Check that there is whitespace after 'and' *)
248248+ else if !i < len && s.[!i] = '(' then
249249+ Error "Parse Error."
250250+ else begin
251251+ skip_ws ();
252252+ if !i >= len then Error "Parse Error."
253253+ else if s.[!i] <> '(' then Error "Parse Error."
254254+ else begin
255255+ (* Validate remaining features *)
256256+ let rec validate_features () =
257257+ skip_ws ();
258258+ if !i >= len then Ok ()
259259+ else if s.[!i] = '(' then begin
260260+ let content = read_paren_content () in
261261+ match validate_media_feature content with
262262+ | Error e -> Error e
263263+ | Ok () ->
264264+ skip_ws ();
265265+ if !i >= len then Ok ()
266266+ else begin
267267+ match read_ident () with
268268+ | None -> Error "Parse Error."
269269+ | Some kw2 ->
270270+ let kw2_lower = String.lowercase_ascii kw2 in
271271+ if kw2_lower <> "and" then Error "Parse Error."
272272+ else begin
273273+ skip_ws ();
274274+ if !i >= len then Error "Parse Error."
275275+ else validate_features ()
276276+ end
277277+ end
278278+ end else Error "Parse Error."
279279+ in
280280+ validate_features ()
281281+ end
282282+ end
283283+ end
284284+ end
285285+ end
286286+ end
287287+ end
288288+ end
289289+290290+and validate_media_feature content =
291291+ let content = String.trim content in
292292+ if content = "" then Error "Parse Error."
293293+ else begin
294294+ (* Check for colon - feature: value *)
295295+ match String.index_opt content ':' with
296296+ | None ->
297297+ (* Just feature name - boolean feature or range syntax *)
298298+ let feature_lower = String.lowercase_ascii content in
299299+ if List.mem feature_lower deprecated_media_features then
300300+ Error (Printf.sprintf "Deprecated media feature \"%s\". For guidance, see the Deprecated Media Features section in the current Media Queries specification." feature_lower)
301301+ else if List.mem feature_lower valid_media_features then
302302+ Ok ()
303303+ else
304304+ Ok () (* Allow unknown features for forward compatibility *)
305305+ | Some colon_pos ->
306306+ let feature = String.trim (String.sub content 0 colon_pos) in
307307+ let value = String.trim (String.sub content (colon_pos + 1) (String.length content - colon_pos - 1)) in
308308+ let feature_lower = String.lowercase_ascii feature in
309309+310310+ (* Check for deprecated features *)
311311+ if List.mem feature_lower deprecated_media_features then
312312+ Error (Printf.sprintf "Deprecated media feature \"%s\". For guidance, see the Deprecated Media Features section in the current Media Queries specification." feature_lower)
313313+ (* Check for incomplete value *)
314314+ else if value = "" then
315315+ Error "Parse Error."
316316+ (* Check for invalid value syntax *)
317317+ else if String.length value > 0 && value.[String.length value - 1] = ';' then
318318+ Error "Parse Error."
319319+ else begin
320320+ (* Validate value based on feature type *)
321321+ validate_feature_value feature_lower value
322322+ end
323323+ end
324324+325325+and validate_feature_value feature value =
326326+ (* Width/height features require length values *)
327327+ let length_features = ["width"; "min-width"; "max-width"; "height"; "min-height"; "max-height"] in
328328+ let color_features = ["color"; "min-color"; "max-color"; "color-index"; "min-color-index"; "max-color-index";
329329+ "monochrome"; "min-monochrome"; "max-monochrome"] in
330330+331331+ if List.mem feature length_features then begin
332332+ (* Must be a valid length: number followed by unit *)
333333+ let value = String.trim value in
334334+ let is_digit c = c >= '0' && c <= '9' in
335335+336336+ (* Parse number - includes sign, digits, and decimal point *)
337337+ let i = ref 0 in
338338+ let len = String.length value in
339339+ while !i < len && (is_digit value.[!i] || value.[!i] = '.' || value.[!i] = '-' || value.[!i] = '+') do
340340+ incr i
341341+ done;
342342+ let num_part = String.sub value 0 !i in
343343+ let unit_part = String.sub value !i (len - !i) in
344344+345345+ (* Check if the number is zero (including 0.0, 0.00, etc.) *)
346346+ let is_zero num =
347347+ let rec check i =
348348+ if i >= String.length num then true
349349+ else match num.[i] with
350350+ | '0' | '.' | '-' -> check (i + 1)
351351+ | _ -> false
352352+ in
353353+ check 0
354354+ in
355355+ if num_part = "" then Error "Parse Error."
356356+ else if is_zero num_part && unit_part = "" then Ok () (* 0 (or 0.0) can be unitless *)
357357+ else if unit_part = "" then
358358+ Error "only \"0\" can be a \"unit\". You must put a unit after your number"
359359+ else begin
360360+ let unit_lower = String.lowercase_ascii unit_part in
361361+ if List.mem unit_lower valid_length_units then Ok ()
362362+ else if List.mem unit_lower valid_resolution_units then
363363+ Error (Printf.sprintf "\"%s\" is not a \"%s\" value" value feature)
364364+ else
365365+ Error "Unknown dimension."
366366+ end
367367+ end else if List.mem feature color_features then begin
368368+ (* Must be an integer *)
369369+ let value = String.trim value in
370370+ let is_digit c = c >= '0' && c <= '9' in
371371+ if String.length value > 0 && String.for_all is_digit value then Ok ()
372372+ else
373373+ Error (Printf.sprintf "\"%s\" is not a \"%s\" value" value feature)
374374+ end else
375375+ Ok () (* Allow other features with any value for now *)
376376+377377+(** Legacy permissive validation *)
88378let validate_media_query s =
89379 let s = String.trim s in
90380 if String.length s = 0 then Error "Media query must not be empty"
91381 else
9292- (* Check balanced parentheses *)
93382 match check_balanced_parens s with
94383 | Error _ as e -> e
9595- | Ok () ->
9696- (* Extract and validate words *)
9797- let words = extract_words s in
9898- let words_lower = List.map String.lowercase_ascii words in
9999-100100- (* Basic validation: check for invalid keyword combinations *)
101101- let rec validate_words prev = function
102102- | [] -> Ok ()
103103- | word :: rest -> (
104104- let word_lower = String.lowercase_ascii word in
105105- match (prev, word_lower) with
106106- | None, "and" | None, "or" ->
107107- Error
108108- (Printf.sprintf
109109- "Media query cannot start with keyword '%s'" word)
110110- | Some "and", "and" | Some "or", "or" | Some "not", "not" ->
111111- Error
112112- (Printf.sprintf "Consecutive '%s' keywords are not allowed"
113113- word)
114114- | Some "only", "only" ->
115115- Error "Consecutive 'only' keywords are not allowed"
116116- | _, _ -> validate_words (Some word_lower) rest)
117117- in
118118-119119- (* Check if query contains valid media types or features *)
120120- let has_media_type =
121121- List.exists
122122- (fun w -> List.mem (String.lowercase_ascii w) media_types)
123123- words
124124- in
125125- let has_features = String.contains s '(' in
126126-127127- if not (has_media_type || has_features) then
128128- (* Only keywords, no actual media type or features *)
129129- if List.for_all (fun w -> List.mem w media_keywords) words_lower then
130130- Error "Media query contains only keywords without media type or features"
131131- else Ok () (* Assume other identifiers are valid *)
132132- else validate_words None words
384384+ | Ok () -> Ok ()
133385134386module Media_query = struct
135387 let name = "media query"
+4
lib/html5_checker/datatype/dt_media_query.mli
···2233 This module provides a validator for CSS media queries as used in HTML5. *)
4455+(** Strict media query validation for HTML5 conformance checking.
66+ Returns Ok () if valid, Error message if invalid. *)
77+val validate_media_query_strict : string -> (unit, string) result
88+59(** Media query validator.
610711 Validates CSS media queries used in media attributes and CSS @media rules.
+10-2
lib/html5_checker/parse_error_bridge.ml
···1111 Message.make_location ~line ~column ?system_id ()
1212 in
1313 let code_str = Html5rw.Parse_error_code.to_string code in
1414+ let message = match code with
1515+ | Html5rw.Parse_error_code.Non_void_html_element_start_tag_with_trailing_solidus ->
1616+ "Self-closing syntax (\"/>\") used on a non-void HTML element. Ignoring the slash and treating as a start tag."
1717+ | _ -> Printf.sprintf "Parse error: %s" code_str
1818+ in
1419 Message.error
1515- ~message:(Printf.sprintf "Parse error: %s" code_str)
2020+ ~message
1621 ~code:code_str
1722 ~location
1823 ()
···2530 in
2631 let filtered_errors =
2732 if is_xhtml then
2828- (* XHTML doesn't require DOCTYPE - filter that error *)
3333+ (* XHTML has different requirements than HTML:
3434+ - No DOCTYPE required
3535+ - Self-closing syntax is valid for all elements *)
2936 List.filter (fun err ->
3037 match Html5rw.error_code err with
3138 | Html5rw.Parse_error_code.Tree_construction_error "expected-doctype-but-got-other" -> false
3939+ | Html5rw.Parse_error_code.Non_void_html_element_start_tag_with_trailing_solidus -> false
3240 | _ -> true
3341 ) errors
3442 else errors
+4-14
lib/html5_checker/semantic/id_checker.ml
···66 - ID values conform to HTML5 requirements *)
7788(** Location information for ID occurrences. *)
99-type id_location = {
1010- element : string;
1111- location : Message.location option;
1212-}
99+type id_location = unit (* simplified since we only need to track existence *)
13101411(** Information about an ID reference. *)
1512type id_reference = {
···120117 ()
121118 (* Check for duplicate ID *)
122119 else if Hashtbl.mem state.ids id then
123123- let first_occurrence = Hashtbl.find state.ids id in
124124- let first_loc_str = match first_occurrence.location with
125125- | None -> ""
126126- | Some loc -> Printf.sprintf " at line %d, column %d" loc.line loc.column
127127- in
128120 Message_collector.add_error collector
129129- ~message:(Printf.sprintf
130130- "Duplicate ID '%s': first used on <%s>%s, now on <%s>"
131131- id first_occurrence.element first_loc_str element)
121121+ ~message:(Printf.sprintf "Duplicate ID \xe2\x80\x9c%s\xe2\x80\x9d." id)
132122 ~code:"duplicate-id"
133123 ?location
134124 ~element
···136126 ()
137127 else
138128 (* Store the ID *)
139139- Hashtbl.add state.ids id { element; location }
129129+ Hashtbl.add state.ids id ()
140130141131(** Record a single ID reference. *)
142132let add_reference state ~referring_element ~attribute ~referenced_id ~location =
···181171 | "name" when element = "map" ->
182172 (* Track map name attributes for usemap resolution *)
183173 if String.length value > 0 then
184184- Hashtbl.add state.map_names value { element; location }
174174+ Hashtbl.add state.map_names value ()
185175186176 | attr when List.mem attr single_id_ref_attrs ->
187177 add_reference state ~referring_element:element
+128-3
lib/html5_checker/specialized/aria_checker.ml
···358358359359(** Stack node representing an element in the ancestor chain. *)
360360type stack_node = {
361361+ element_name : string;
361362 explicit_roles : string list;
362363 implicit_role : string option;
363364}
···365366(** Checker state. *)
366367type state = {
367368 mutable stack : stack_node list;
369369+ mutable has_active_tab : bool; (* Whether document has role=tab with aria-selected=true *)
370370+ mutable has_tabpanel : bool; (* Whether document has role=tabpanel elements *)
368371}
369372370370-let create () = { stack = [] }
373373+let create () = { stack = []; has_active_tab = false; has_tabpanel = false }
371374372372-let reset state = state.stack <- []
375375+let reset state =
376376+ state.stack <- [];
377377+ state.has_active_tab <- false;
378378+ state.has_tabpanel <- false
373379374380(** Check if any ancestor has one of the required roles. *)
375381let has_required_ancestor_role state required_roles =
···385391 | None -> false
386392 ) state.stack
387393394394+(** Get the first ancestor role from a list of target roles. *)
395395+let get_ancestor_role state target_roles =
396396+ let rec find_in_stack = function
397397+ | [] -> None
398398+ | ancestor :: rest ->
399399+ let found_explicit = List.find_opt (fun role -> List.mem role target_roles) ancestor.explicit_roles in
400400+ match found_explicit with
401401+ | Some r -> Some r
402402+ | None ->
403403+ match ancestor.implicit_role with
404404+ | Some r when List.mem r target_roles -> Some r
405405+ | _ -> find_in_stack rest
406406+ in
407407+ find_in_stack state.stack
408408+409409+(** Get the immediate parent element name. *)
410410+let get_parent_element state =
411411+ match state.stack with
412412+ | parent :: _ -> Some parent.element_name
413413+ | [] -> None
414414+388415(** Render a list of roles as a human-readable string. *)
389416let render_role_set roles =
390417 match roles with
···418445 (* Get implicit role for this element *)
419446 let implicit_role = get_implicit_role name_lower attrs in
420447448448+ (* Track active tabs and tabpanel roles for end_document validation *)
449449+ if List.mem "tab" explicit_roles then begin
450450+ let aria_selected = List.assoc_opt "aria-selected" attrs in
451451+ if aria_selected = Some "true" then state.has_active_tab <- true
452452+ end;
453453+ if List.mem "tabpanel" explicit_roles then state.has_tabpanel <- true;
454454+421455 (* Check br/wbr role restrictions - only none/presentation allowed *)
422456 if (name_lower = "br" || name_lower = "wbr") && explicit_roles <> [] then begin
423457 let first_role = List.hd explicit_roles in
···499533 | _ -> ()
500534 end;
501535536536+ (* Check for input[type=checkbox][role=button] requires aria-pressed *)
537537+ if name_lower = "input" then begin
538538+ let input_type = match List.assoc_opt "type" attrs with
539539+ | Some t -> String.lowercase_ascii t
540540+ | None -> "text"
541541+ in
542542+ if input_type = "checkbox" && List.mem "button" explicit_roles then begin
543543+ let has_aria_pressed = List.assoc_opt "aria-pressed" attrs <> None in
544544+ if not has_aria_pressed then
545545+ Message_collector.add_error collector
546546+ ~message:"An \xe2\x80\x9cinput\xe2\x80\x9d element with a \xe2\x80\x9ctype\xe2\x80\x9d attribute whose value is \xe2\x80\x9ccheckbox\xe2\x80\x9d and with a \xe2\x80\x9crole\xe2\x80\x9d attribute whose value is \xe2\x80\x9cbutton\xe2\x80\x9d must have an \xe2\x80\x9caria-pressed\xe2\x80\x9d attribute."
547547+ ~code:"checkbox-button-needs-aria-pressed"
548548+ ~element:name
549549+ ~attribute:"role"
550550+ ()
551551+ end
552552+ end;
553553+554554+ (* Check li role restrictions in menu/menubar/tablist contexts *)
555555+ if name_lower = "li" && explicit_roles <> [] then begin
556556+ let first_role = List.hd explicit_roles in
557557+ (* none/presentation are always allowed as they remove from accessibility tree *)
558558+ if first_role <> "none" && first_role <> "presentation" then begin
559559+ (* Check if in menu or menubar context *)
560560+ (match get_ancestor_role state ["menu"; "menubar"] with
561561+ | Some _ ->
562562+ let valid_roles = ["group"; "menuitem"; "menuitemcheckbox"; "menuitemradio"; "separator"] in
563563+ if not (List.mem first_role valid_roles) then
564564+ Message_collector.add_error collector
565565+ ~message:"An \xe2\x80\x9cli\xe2\x80\x9d element that is a descendant of a \xe2\x80\x9crole=menu\xe2\x80\x9d element or \xe2\x80\x9crole=menubar\xe2\x80\x9d element must not have any \xe2\x80\x9crole\xe2\x80\x9d value other than \xe2\x80\x9cgroup\xe2\x80\x9d, \xe2\x80\x9cmenuitem\xe2\x80\x9d, \xe2\x80\x9cmenuitemcheckbox\xe2\x80\x9d, \xe2\x80\x9cmenuitemradio\xe2\x80\x9d, or \xe2\x80\x9cseparator\xe2\x80\x9d."
566566+ ~code:"invalid-li-role-in-menu"
567567+ ~element:name
568568+ ~attribute:"role"
569569+ ()
570570+ | None ->
571571+ (* Check if in tablist context *)
572572+ match get_ancestor_role state ["tablist"] with
573573+ | Some _ ->
574574+ if first_role <> "tab" then
575575+ Message_collector.add_error collector
576576+ ~message:"An \xe2\x80\x9cli\xe2\x80\x9d element that is a descendant of a \xe2\x80\x9crole=tablist\xe2\x80\x9d element must not have any \xe2\x80\x9crole\xe2\x80\x9d value other than \xe2\x80\x9ctab\xe2\x80\x9d."
577577+ ~code:"invalid-li-role-in-tablist"
578578+ ~element:name
579579+ ~attribute:"role"
580580+ ()
581581+ | None -> ())
582582+ end
583583+ end;
584584+502585 (* Check for aria-hidden="true" on body element *)
503586 if name_lower = "body" then begin
504587 let aria_hidden = List.assoc_opt "aria-hidden" attrs in
···640723 | None -> ()
641724 ) attrs;
642725726726+ (* Check summary restrictions in details context *)
727727+ if name_lower = "summary" then begin
728728+ let parent = get_parent_element state in
729729+ let is_in_details = parent = Some "details" in
730730+ if is_in_details then begin
731731+ (* summary that is the first child of details *)
732732+ (* Cannot have role=paragraph (or other non-button roles) *)
733733+ if explicit_roles <> [] then begin
734734+ let first_role = List.hd explicit_roles in
735735+ if first_role <> "button" && first_role <> "none" && first_role <> "presentation" then
736736+ Message_collector.add_error collector
737737+ ~message:"The \xe2\x80\x9crole\xe2\x80\x9d attribute must not be used on any \xe2\x80\x9csummary\xe2\x80\x9d element that is a summary for its parent \xe2\x80\x9cdetails\xe2\x80\x9d element."
738738+ ~code:"invalid-role-on-summary"
739739+ ~element:name
740740+ ~attribute:"role"
741741+ ()
742742+ end;
743743+ (* If has aria-expanded or aria-pressed, must have role *)
744744+ let has_aria_expanded = List.assoc_opt "aria-expanded" attrs <> None in
745745+ let has_aria_pressed = List.assoc_opt "aria-pressed" attrs <> None in
746746+ if (has_aria_expanded || has_aria_pressed) && explicit_roles = [] then begin
747747+ if has_aria_pressed then
748748+ Message_collector.add_error collector
749749+ ~message:"Element \xe2\x80\x9csummary\xe2\x80\x9d is missing required attribute \xe2\x80\x9crole\xe2\x80\x9d."
750750+ ~code:"missing-role-on-summary"
751751+ ~element:name ()
752752+ else
753753+ Message_collector.add_error collector
754754+ ~message:"Element \xe2\x80\x9csummary\xe2\x80\x9d is missing one or more of the following attributes: [aria-checked, aria-level, role]."
755755+ ~code:"missing-role-on-summary"
756756+ ~element:name ()
757757+ end
758758+ end
759759+ end;
760760+643761 (* Push current element onto stack *)
644762 let node = {
763763+ element_name = name_lower;
645764 explicit_roles;
646765 implicit_role;
647766 } in
···659778660779let characters _state _text _collector = ()
661780662662-let end_document _state _collector = ()
781781+let end_document state collector =
782782+ (* Check that active tabs have corresponding tabpanels *)
783783+ if state.has_active_tab && not state.has_tabpanel then
784784+ Message_collector.add_error collector
785785+ ~message:"Every active \xe2\x80\x9crole=tab\xe2\x80\x9d element must have a corresponding \xe2\x80\x9crole=tabpanel\xe2\x80\x9d element."
786786+ ~code:"tab-without-tabpanel"
787787+ ()
663788664789let checker = (module struct
665790 type nonrec state = state
···501501 The HTML5 parser normalizes attribute names to lowercase, so this check
502502 is only effective when the document is parsed as XML.
503503 Commenting out until we have XML parsing support. *)
504504- ignore state.is_xhtml
504504+ ignore state.is_xhtml;
505505+506506+ (* Validate media attribute on link, style, source elements *)
507507+ if namespace = None && (name_lower = "link" || name_lower = "style" || name_lower = "source") then begin
508508+ List.iter (fun (attr_name, attr_value) ->
509509+ let attr_lower = String.lowercase_ascii attr_name in
510510+ if attr_lower = "media" then begin
511511+ let trimmed = String.trim attr_value in
512512+ if trimmed <> "" then begin
513513+ match Dt_media_query.validate_media_query_strict trimmed with
514514+ | Ok () -> ()
515515+ | Error msg ->
516516+ Message_collector.add_error collector
517517+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad media query: %s"
518518+ attr_value attr_name name msg)
519519+ ~code:"bad-attribute-value"
520520+ ~element:name ~attribute:attr_name ()
521521+ end
522522+ end
523523+ ) attrs
524524+ end;
525525+526526+ (* Validate RDFa prefix attribute - space-separated list of prefix:iri pairs *)
527527+ if namespace = None then begin
528528+ List.iter (fun (attr_name, attr_value) ->
529529+ let attr_lower = String.lowercase_ascii attr_name in
530530+ if attr_lower = "prefix" then begin
531531+ (* Parse prefix attribute value - format: "prefix1: iri1 prefix2: iri2 ..." *)
532532+ let trimmed = String.trim attr_value in
533533+ if trimmed <> "" then begin
534534+ (* Check for empty prefix (starts with : or has space:) *)
535535+ if String.length trimmed > 0 && trimmed.[0] = ':' then
536536+ Message_collector.add_error collector
537537+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d."
538538+ attr_value attr_name name)
539539+ ~code:"bad-attribute-value"
540540+ ~element:name ~attribute:attr_name ()
541541+ else begin
542542+ (* Check for invalid prefix names - must start with letter or underscore *)
543543+ let is_ncname_start c =
544544+ (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c = '_'
545545+ in
546546+ if String.length trimmed > 0 && not (is_ncname_start trimmed.[0]) then
547547+ Message_collector.add_error collector
548548+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d."
549549+ attr_value attr_name name)
550550+ ~code:"bad-attribute-value"
551551+ ~element:name ~attribute:attr_name ()
552552+ end
553553+ end
554554+ end
555555+ ) attrs
556556+ end
505557506558let end_element _state ~name:_ ~namespace:_ _collector = ()
507559let characters _state _text _collector = ()
+17-1
lib/html5_checker/specialized/dl_checker.ml
···5656 | ctx :: _ -> Some ctx
5757 | [] -> None
58585959-let start_element state ~name ~namespace ~attrs:_ collector =
5959+let get_attr name attrs =
6060+ List.find_map (fun (n, v) ->
6161+ if String.lowercase_ascii n = name then Some v else None
6262+ ) attrs
6363+6464+let start_element state ~name ~namespace ~attrs collector =
6065 let name_lower = String.lowercase_ascii name in
61666267 (* Track parent stack for all HTML elements first *)
···100105 ~message:"Element \xe2\x80\x9cdiv\xe2\x80\x9d not allowed as child of element \xe2\x80\x9cdl\xe2\x80\x9d in this context. (Suppressing further errors from this subtree.)"
101106 ~code:"disallowed-child"
102107 ~element:"div" ();
108108+ (* Check that role is only presentation or none *)
109109+ (match get_attr "role" attrs with
110110+ | Some role_value ->
111111+ let role_lower = String.lowercase_ascii (String.trim role_value) in
112112+ if role_lower <> "presentation" && role_lower <> "none" then
113113+ Message_collector.add_error collector
114114+ ~message:"A \xe2\x80\x9cdiv\xe2\x80\x9d child of a \xe2\x80\x9cdl\xe2\x80\x9d element must not have any \xe2\x80\x9crole\xe2\x80\x9d value other than \xe2\x80\x9cpresentation\xe2\x80\x9d or \xe2\x80\x9cnone\xe2\x80\x9d."
115115+ ~code:"invalid-role-on-div-in-dl"
116116+ ~element:"div"
117117+ ~attribute:"role" ()
118118+ | None -> ());
103119 let div_ctx = { has_dt = false; has_dd = false; group_count = 0; in_dd_part = false } in
104120 state.div_in_dl_stack <- div_ctx :: state.div_in_dl_stack
105121 | Some _ when state.div_in_dl_stack <> [] ->
+71-10
lib/html5_checker/specialized/label_checker.ml
···1212 if String.lowercase_ascii n = name_lower then Some v else None
1313 ) attrs
14141515+type label_for_info = {
1616+ for_target : string;
1717+ has_role : bool;
1818+ has_aria_label : bool;
1919+}
2020+1521type state = {
1622 mutable in_label : bool;
1723 mutable label_depth : int;
1824 mutable labelable_count : int;
1925 mutable label_for_value : string option; (* Value of for attribute on current label *)
2626+ mutable label_has_role : bool; (* Whether current label has role attribute *)
2727+ mutable label_has_aria_label : bool; (* Whether current label has aria-label attribute *)
2828+ mutable labels_for : label_for_info list; (* Labels with for= attribute *)
2929+ mutable labelable_ids : string list; (* IDs of labelable elements *)
2030}
21312232let create () = {
···2434 label_depth = 0;
2535 labelable_count = 0;
2636 label_for_value = None;
3737+ label_has_role = false;
3838+ label_has_aria_label = false;
3939+ labels_for = [];
4040+ labelable_ids = [];
2741}
28422943let reset state =
3044 state.in_label <- false;
3145 state.label_depth <- 0;
3246 state.labelable_count <- 0;
3333- state.label_for_value <- None
4747+ state.label_for_value <- None;
4848+ state.label_has_role <- false;
4949+ state.label_has_aria_label <- false;
5050+ state.labels_for <- [];
5151+ state.labelable_ids <- []
34523553let start_element state ~name ~namespace ~attrs collector =
3654 if namespace <> None then ()
···39574058 if name_lower = "label" then begin
4159 state.in_label <- true;
4242- state.label_depth <- 0;
6060+ state.label_depth <- 1; (* Start at 1 for the label element itself *)
4361 state.labelable_count <- 0;
4444- state.label_for_value <- get_attr attrs "for"
6262+ let for_value = get_attr attrs "for" in
6363+ let has_role = get_attr attrs "role" <> None in
6464+ let has_aria_label = get_attr attrs "aria-label" <> None in
6565+ state.label_for_value <- for_value;
6666+ state.label_has_role <- has_role;
6767+ state.label_has_aria_label <- has_aria_label;
6868+ (* Track this label if it has for= and role/aria-label *)
6969+ (match for_value with
7070+ | Some target when has_role || has_aria_label ->
7171+ state.labels_for <- { for_target = target; has_role; has_aria_label } :: state.labels_for
7272+ | _ -> ())
4573 end;
7474+ (* Track labelable element IDs *)
7575+ (if List.mem name_lower labelable_elements then
7676+ match get_attr attrs "id" with
7777+ | Some id -> state.labelable_ids <- id :: state.labelable_ids
7878+ | None -> ());
46794747- if state.in_label then begin
8080+ if state.in_label && name_lower <> "label" then begin
4881 state.label_depth <- state.label_depth + 1;
49825083 (* Check for labelable elements inside label *)
···5790 ~element:"label" ();
58915992 (* Check if label has for attribute and descendant has mismatched id *)
6060- match state.label_for_value with
9393+ (match state.label_for_value with
6194 | Some for_value ->
6295 let descendant_id = get_attr attrs "id" in
6396 (match descendant_id with
···78111 ())
79112 | None ->
80113 (* No for attribute on label - no constraint on descendant id *)
8181- ()
114114+ ())
82115 end
83116 end
84117 end
851188686-let end_element state ~name ~namespace _collector =
119119+let end_element state ~name ~namespace collector =
87120 if namespace <> None then ()
88121 else begin
89122 let name_lower = String.lowercase_ascii name in
···91124 if state.in_label then begin
92125 state.label_depth <- state.label_depth - 1;
931269494- if name_lower = "label" && state.label_depth < 0 then begin
127127+ if name_lower = "label" && state.label_depth = 0 then begin
128128+ (* Check for role attribute on label that's ancestor of labelable element *)
129129+ if state.label_has_role && state.labelable_count > 0 then
130130+ Message_collector.add_error collector
131131+ ~message:"The \xe2\x80\x9crole\xe2\x80\x9d attribute must not be used on any \xe2\x80\x9clabel\xe2\x80\x9d element that is an ancestor of a labelable element."
132132+ ~code:"role-on-label-ancestor"
133133+ ~element:"label"
134134+ ~attribute:"role" ();
135135+95136 state.in_label <- false;
96137 state.labelable_count <- 0;
9797- state.label_for_value <- None
138138+ state.label_for_value <- None;
139139+ state.label_has_role <- false;
140140+ state.label_has_aria_label <- false
98141 end
99142 end
100143 end
101144102145let characters _state _text _collector = ()
103146104104-let end_document _state _collector = ()
147147+let end_document state collector =
148148+ (* Check labels with for= that target labelable elements *)
149149+ List.iter (fun label_info ->
150150+ if List.mem label_info.for_target state.labelable_ids then begin
151151+ (* This label is associated with a labelable element *)
152152+ if label_info.has_role then
153153+ Message_collector.add_error collector
154154+ ~message:"The \xe2\x80\x9crole\xe2\x80\x9d attribute must not be used on any \xe2\x80\x9clabel\xe2\x80\x9d element that is associated with a labelable element."
155155+ ~code:"role-on-label-for"
156156+ ~element:"label"
157157+ ~attribute:"role" ();
158158+ if label_info.has_aria_label then
159159+ Message_collector.add_error collector
160160+ ~message:"The \xe2\x80\x9caria-label\xe2\x80\x9d attribute must not be used on any \xe2\x80\x9clabel\xe2\x80\x9d element that is associated with a labelable element."
161161+ ~code:"aria-label-on-label-for"
162162+ ~element:"label"
163163+ ~attribute:"aria-label" ()
164164+ end
165165+ ) state.labels_for
105166106167let checker =
107168 (module struct
···6767let is_url s =
6868 String.contains s ':'
69697070+(** Validate that a URL is a valid absolute URL for itemtype.
7171+ itemtype must be an absolute URL per the HTML5 spec.
7272+ http/https URLs require :// but other schemes like mailto:, data:, javascript: don't. *)
7373+let validate_itemtype_url url =
7474+ let url = String.trim url in
7575+ if String.length url = 0 then
7676+ Error "itemtype must not be empty"
7777+ else
7878+ match String.index_opt url ':' with
7979+ | None -> Error "Expected a slash (\"/\")."
8080+ | Some colon_pos ->
8181+ if colon_pos = 0 then
8282+ Error "Expected a slash (\"/\")."
8383+ else
8484+ let scheme = String.lowercase_ascii (String.sub url 0 colon_pos) in
8585+ (* Schemes that require :// for itemtype validation
8686+ Note: The Nu validator only enforces :// for http, https, and ftp *)
8787+ let special_schemes = [
8888+ "http"; "https"; "ftp"
8989+ ] in
9090+ if List.mem scheme special_schemes then begin
9191+ if colon_pos + 2 >= String.length url then
9292+ Error "Expected a slash (\"/\")."
9393+ else if url.[colon_pos + 1] <> '/' || url.[colon_pos + 2] <> '/' then
9494+ Error "Expected a slash (\"/\")."
9595+ else
9696+ Ok ()
9797+ end else
9898+ (* Other schemes (mailto:, data:, javascript:, etc.) are valid as-is *)
9999+ Ok ()
100100+70101(** Check if itemprop value is valid. *)
71102let validate_itemprop_value value =
72103 if String.length value = 0 then
···139170 | None -> ()
140171 end;
141172142142- (* Check itemtype requires itemscope *)
173173+ (* Check itemtype requires itemscope and is valid URL *)
143174 begin match itemtype_opt with
144144- | Some _itemtype ->
175175+ | Some itemtype ->
145176 if not has_itemscope then
146177 Message_collector.add_error collector
147178 ~message:"itemtype attribute requires itemscope attribute"
···150181 ~element
151182 ~attribute:"itemtype"
152183 ()
184184+ else begin
185185+ (* Validate each itemtype URL (can be space-separated) *)
186186+ let types = split_whitespace itemtype in
187187+ List.iter (fun url ->
188188+ match validate_itemtype_url url with
189189+ | Ok () -> ()
190190+ | Error msg ->
191191+ Message_collector.add_error collector
192192+ ~message:(Printf.sprintf
193193+ "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9citemtype\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad absolute URL: %s"
194194+ url element msg)
195195+ ~code:"microdata-invalid-itemtype"
196196+ ?location
197197+ ~element
198198+ ~attribute:"itemtype"
199199+ ()
200200+ ) types
201201+ end
153202 | None -> ()
154203 end;
155204
+5-2
lib/html5rw/parser/parser_tree_builder.ml
···14281428 | Token.Tag { kind = Token.Start; name; _ }
14291429 when List.mem name ["caption"; "colgroup"; "head"; "tbody"; "td"; "tfoot"; "th"; "thead"; "tr"] ->
14301430 parse_error t "unexpected-start-tag"
14311431- | Token.Tag { kind = Token.Start; name; attrs; _ } ->
14311431+ | Token.Tag { kind = Token.Start; name; attrs; self_closing } ->
14321432 (* Any other start tag *)
14331433 reconstruct_active_formatting t;
14341434- ignore (insert_element t name ~push:true attrs)
14341434+ ignore (insert_element t name ~push:true attrs);
14351435+ (* Check for self-closing on non-void HTML element *)
14361436+ if self_closing && not (List.mem name Parser_constants.void_elements) then
14371437+ parse_error t "non-void-html-element-start-tag-with-trailing-solidus"
14351438 | Token.Tag { kind = Token.End; name; _ } ->
14361439 (* Any other end tag *)
14371440 let rec check = function
+17
test/debug_check.ml
···11+let () =
22+ let test_file = "validator/tests/html/microdata/itemtype/scheme-https-no-slash-novalid.html" in
33+ let ic = open_in test_file in
44+ let html = really_input_string ic (in_channel_length ic) in
55+ close_in ic;
66+ let reader = Bytesrw.Bytes.Reader.of_string html in
77+ let result = Html5_checker.check ~collect_parse_errors:true ~system_id:test_file reader in
88+ let errors = Html5_checker.errors result in
99+ let warnings = Html5_checker.warnings result in
1010+ print_endline "=== Errors ===";
1111+ List.iter (fun e -> print_endline e.Html5_checker.Message.message) errors;
1212+ print_endline "=== Warnings ===";
1313+ List.iter (fun e -> print_endline e.Html5_checker.Message.message) warnings;
1414+ if List.length errors > 0 then
1515+ print_endline "PASS (has errors)"
1616+ else
1717+ print_endline "FAIL (no errors)"