(** Srcset and sizes attribute validation checker. *) (** Quote helper for consistent message formatting. *) let q = Error_code.q (** Valid CSS length units for sizes attribute *) let valid_length_units = [ "em"; "ex"; "ch"; "rem"; "cap"; "ic"; "vw"; "svw"; "lvw"; "dvw"; "vh"; "svh"; "lvh"; "dvh"; "vi"; "svi"; "lvi"; "dvi"; "vb"; "svb"; "lvb"; "dvb"; "vmin"; "svmin"; "lvmin"; "dvmin"; "vmax"; "svmax"; "lvmax"; "dvmax"; "cm"; "mm"; "q"; "in"; "pc"; "pt"; "px" ] type state = unit [@@warning "-34"] let create () = () let reset _state = () (** Split string on a character while respecting parentheses *) let split_respecting_parens ~sep s = let len = String.length s in let result = ref [] in let current = Buffer.create 64 in let depth = ref 0 in for i = 0 to len - 1 do let c = s.[i] in if c = '(' then begin incr depth; Buffer.add_char current c end else if c = ')' then begin decr depth; Buffer.add_char current c end else if c = sep && !depth = 0 then begin result := Buffer.contents current :: !result; Buffer.clear current end else Buffer.add_char current c done; (* Add the last segment *) result := Buffer.contents current :: !result; List.rev !result (** Split string on commas while respecting parentheses *) let split_on_comma_respecting_parens s = split_respecting_parens ~sep:',' s (** Split on commas respecting BALANCED parentheses only (for srcset). If parens are unbalanced overall, just split on all commas. *) let split_on_comma_balanced_parens s = (* First, check if parens are balanced overall *) let opens = ref 0 and closes = ref 0 in String.iter (fun c -> if c = '(' then incr opens else if c = ')' then incr closes) s; if !opens <> !closes then (* Unbalanced parens - just split on all commas *) String.split_on_char ',' s else (* Balanced parens - respect them during split *) split_on_comma_respecting_parens s (** Split string on spaces while respecting parentheses, filtering empty segments *) let split_on_space_respecting_parens s = split_respecting_parens ~sep:' ' s |> List.filter (fun s -> s <> "") (** Invalid units that are not CSS lengths but might be confused for them *) let invalid_size_units = [ "deg"; "grad"; "rad"; "turn"; (* angle units *) "s"; "ms"; (* time units *) "hz"; "khz"; (* frequency units *) "dpi"; "dpcm"; "dppx"; (* resolution units *) "%" (* percentage - not valid in sizes *) ] (** Strip CSS comments from a value *) let strip_css_comments s = let buf = Buffer.create (String.length s) in let len = String.length s in let i = ref 0 in while !i < len do if !i + 1 < len && s.[!i] = '/' && s.[!i + 1] = '*' then begin (* Start of comment, find end *) i := !i + 2; while !i + 1 < len && not (s.[!i] = '*' && s.[!i + 1] = '/') do incr i done; if !i + 1 < len then i := !i + 2 end else begin Buffer.add_char buf s.[!i]; incr i end done; Buffer.contents buf (** Check if a size value has a valid CSS length unit and non-negative value *) type size_check_result = | Valid | InvalidUnit of string * string (* (found_unit, context) *) | NegativeValue | CssCommentAfterSign of string * string (* what was found, context *) | CssCommentBeforeUnit of string * string (* what was found, context *) | BadScientificNotation | BadCssNumber of char * string (* (first_char, context) - not starting with digit or minus *) (** CSS comment error types *) type css_comment_error = | NoCommentError | CommentAfterSign of string * string (* what was found, context *) | CommentBetweenNumberAndUnit of string * string (* what was found at comment position, context *) (** Check if CSS comment appears in an invalid position: - Between sign and number (+/**/50vw) - Between number and unit (50/**/vw) Trailing comments (50vw/**/) are valid. *) let check_css_comment_position s = let len = String.length s in (* Find comment position *) let rec find_comment i = if i + 1 >= len then None else if s.[i] = '/' && s.[i + 1] = '*' then Some i else find_comment (i + 1) in match find_comment 0 with | None -> NoCommentError | Some comment_pos -> let before = String.sub s 0 comment_pos in let trimmed_before = String.trim before in if String.length trimmed_before = 0 then NoCommentError (* Leading comment is OK *) else begin (* Find end of comment *) let rec find_end i = if i + 1 >= len then len else if s.[i] = '*' && s.[i + 1] = '/' then i + 2 else find_end (i + 1) in let end_pos = find_end (comment_pos + 2) in let after = if end_pos < len then String.sub s end_pos (len - end_pos) else "" in let trimmed_after = String.trim (strip_css_comments after) in if trimmed_after = "" then NoCommentError (* Trailing comment is OK *) else begin (* Comment is in the middle - check if it breaks a number/unit combo *) let last = trimmed_before.[String.length trimmed_before - 1] in (* What's at the comment position? Just show "/" *) let slash = "/" in (* Invalid if comment appears after +/- *) if last = '+' || last = '-' then CommentAfterSign (trimmed_before ^ slash, s) (* Invalid if comment appears after digit (before more content) *) else if (last >= '0' && last <= '9') || last = '.' then CommentBetweenNumberAndUnit (slash ^ trimmed_after, s) else NoCommentError end end (** Check if scientific notation has invalid exponent (like 1e+1.5 - decimal in exponent) *) let has_invalid_scientific_notation s = let lower = Astring.String.Ascii.lowercase s in (* Find 'e' for scientific notation *) match String.index_opt lower 'e' with | None -> false | Some e_pos -> (* Check if there's a decimal after the exponent sign *) let after_e = String.sub lower (e_pos + 1) (String.length lower - e_pos - 1) in let after_sign = if String.length after_e > 0 && (after_e.[0] = '+' || after_e.[0] = '-') then String.sub after_e 1 (String.length after_e - 1) else after_e in String.contains after_sign '.' (** Extract unit from a size value like "10px" -> "px", "100vw" -> "vw", "50%" -> "%" Returns the unit with original case preserved *) let extract_unit s = let trimmed = String.trim s in let len = String.length trimmed in if len = 0 then "" (* Check for % at the end *) else if trimmed.[len - 1] = '%' then "%" else begin let lower = Astring.String.Ascii.lowercase trimmed in (* Try to find a unit at the end (letters only) *) let rec find_unit_length i = if i < 0 then 0 else if lower.[i] >= 'a' && lower.[i] <= 'z' then find_unit_length (i - 1) else i + 1 in let start = find_unit_length (len - 1) in if start < len then (* Return the unit from the original string (preserving case) *) String.sub trimmed start (len - start) else "" end let check_size_value size_value = let trimmed = String.trim size_value in if trimmed = "" then InvalidUnit ("", trimmed) else begin (* Check for CSS comments inside numbers - this is invalid *) match check_css_comment_position trimmed with | CommentAfterSign (found, ctx) -> CssCommentAfterSign (found, ctx) | CommentBetweenNumberAndUnit (found, ctx) -> CssCommentBeforeUnit (found, ctx) | NoCommentError -> (* Strip valid leading/trailing CSS comments for further checks *) let value_no_comments = String.trim (strip_css_comments trimmed) in (* Check for invalid scientific notation like 1e+1.5px *) if has_invalid_scientific_notation value_no_comments then BadScientificNotation (* "auto" is only valid with lazy loading, which requires checking the element context. For general validation, treat "auto" alone as invalid in sizes. *) else if Astring.String.Ascii.lowercase value_no_comments = "auto" then BadCssNumber (value_no_comments.[0], trimmed) else if value_no_comments = "" then InvalidUnit ("", trimmed) else begin let lower = Astring.String.Ascii.lowercase value_no_comments in (* Check for calc() or other CSS functions first - these are always valid *) if String.contains value_no_comments '(' then Valid else begin (* Check if the value starts with a digit, minus, or plus sign *) let first_char = value_no_comments.[0] in let starts_with_number = (first_char >= '0' && first_char <= '9') || first_char = '-' || first_char = '+' || first_char = '.' (* decimal point like .5px *) in if not starts_with_number then (* Not a valid CSS number token - doesn't start with digit or sign *) BadCssNumber (first_char, trimmed) else begin (* Check for invalid units first *) let found_invalid = List.find_opt (fun unit -> let len = String.length unit in String.length lower > len && String.sub lower (String.length lower - len) len = unit ) invalid_size_units in match found_invalid with | Some _unit -> InvalidUnit (extract_unit value_no_comments, trimmed) | None -> (* Check for valid CSS length units *) let has_valid_unit = List.exists (fun unit -> let len = String.length unit in String.length lower > len && String.sub lower (String.length lower - len) len = unit ) valid_length_units in if has_valid_unit then begin (* Check if it's negative (starts with - but not -0) *) if String.length value_no_comments > 0 && value_no_comments.[0] = '-' then begin (* Check if it's -0 which is valid *) let after_minus = String.sub value_no_comments 1 (String.length value_no_comments - 1) in try let num_str = Str.global_replace (Str.regexp "[a-zA-Z]+$") "" after_minus in let f = float_of_string num_str in if f = 0.0 then Valid else NegativeValue with _ -> NegativeValue end else Valid end else begin (* Check if it's a zero value (0, -0, +0) - these are valid without units *) let stripped = let s = value_no_comments in let s = if String.length s > 0 && (s.[0] = '+' || s.[0] = '-') then String.sub s 1 (String.length s - 1) else s in s in (* Check if it's zero or a numeric value starting with 0 *) try let f = float_of_string stripped in if f = 0.0 then Valid else InvalidUnit (extract_unit value_no_comments, trimmed) with _ -> InvalidUnit (extract_unit value_no_comments, trimmed) end end end end end (** Check if a sizes entry has a media condition (starts with '(') *) let has_media_condition entry = let trimmed = String.trim entry in String.length trimmed > 0 && trimmed.[0] = '(' (** Check if entry looks like it's trying to be a media condition but isn't properly formatted *) let has_invalid_media_condition entry = let trimmed = String.trim entry in if String.length trimmed = 0 then None else begin let first_char = trimmed.[0] in if first_char = '(' then begin (* Check for bad content inside the media condition *) let len = String.length trimmed in let rec find_close_paren i depth = if i >= len then None else match trimmed.[i] with | '(' -> find_close_paren (i + 1) (depth + 1) | ')' -> if depth = 1 then Some i else find_close_paren (i + 1) (depth - 1) | _ -> find_close_paren (i + 1) depth in match find_close_paren 0 0 with | None -> Some "Unclosed media condition" | Some close_pos -> let inner = String.sub trimmed 1 (close_pos - 1) in let inner_trimmed = String.trim inner in (* Check for obviously invalid content like just numbers or curly braces *) if String.length inner_trimmed > 0 then begin let first_inner = inner_trimmed.[0] in if first_inner >= '0' && first_inner <= '9' then Some "Bad media condition: Parse Error" else if String.contains inner_trimmed '}' || String.contains inner_trimmed '{' then Some "Bad media condition: Parse Error" else None end else Some "Bad media condition: Parse Error" end else begin (* Check for bare "all" which is invalid *) let lower = Astring.String.Ascii.lowercase trimmed in let parts = String.split_on_char ' ' lower |> List.filter (fun s -> s <> "") in match parts with | keyword :: _ when keyword = "all" -> Some "Bad media condition: Parse Error" | keyword :: _ when String.length keyword > 0 && not (keyword.[0] >= '0' && keyword.[0] <= '9') -> (* Looks like a keyword without parens like "min-width:500px" *) if String.contains keyword ':' then Some "Bad media condition: Parse Error" else None | _ -> None end end (** Extract the size value from a sizes entry (after media condition if any) *) let extract_size_value entry = let trimmed = String.trim entry in if not (has_media_condition trimmed) then trimmed else begin (* Media conditions can have "and", "or", "not" operators connecting multiple parenthesized groups, e.g., "(not (width:500px)) and (width:500px) 500px" We need to skip all media condition parts to find the size value *) let len = String.length trimmed in let rec skip_media_condition i = if i >= len then len else begin let remaining = String.trim (String.sub trimmed i (len - i)) in let remaining_len = String.length remaining in if remaining_len = 0 then len else begin let first_char = remaining.[0] in if first_char = '(' then begin (* Skip this parenthesized group *) let rec find_close_paren j depth = if j >= remaining_len then remaining_len else match remaining.[j] with | '(' -> find_close_paren (j + 1) (depth + 1) | ')' -> if depth = 1 then j + 1 else find_close_paren (j + 1) (depth - 1) | _ -> find_close_paren (j + 1) depth in let after_paren = find_close_paren 0 0 in let new_pos = i + (len - i) - remaining_len + after_paren in skip_media_condition new_pos end else begin (* Check if remaining starts with "and", "or", "not" followed by space or paren *) let lower_remaining = Astring.String.Ascii.lowercase remaining in if remaining_len >= 4 && String.sub lower_remaining 0 4 = "and " then skip_media_condition (i + (len - i) - remaining_len + 4) else if remaining_len >= 3 && String.sub lower_remaining 0 3 = "or " then skip_media_condition (i + (len - i) - remaining_len + 3) else if remaining_len >= 4 && String.sub lower_remaining 0 4 = "not " then skip_media_condition (i + (len - i) - remaining_len + 4) else if remaining_len >= 4 && String.sub lower_remaining 0 4 = "and(" then skip_media_condition (i + (len - i) - remaining_len + 3) else if remaining_len >= 3 && String.sub lower_remaining 0 3 = "or(" then skip_media_condition (i + (len - i) - remaining_len + 2) else if remaining_len >= 4 && String.sub lower_remaining 0 4 = "not(" then skip_media_condition (i + (len - i) - remaining_len + 3) else (* Found something that's not a media condition part - this is the size value *) i + (len - i) - remaining_len end end end in let size_start = skip_media_condition 0 in if size_start >= len then "" else String.trim (String.sub trimmed size_start (len - size_start)) end (** Validate sizes attribute value *) let validate_sizes value element_name collector = (* Empty sizes is invalid *) if String.trim value = "" then begin Message_collector.add_typed collector (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad source size list: Must not be empty." (q "") (q "sizes") (q element_name))))); false end else begin (* Split on comma and check each entry *) let entries = String.split_on_char ',' value in let first_entry = String.trim (List.hd entries) in (* Check if starts with comma (empty first entry) *) if first_entry = "" then begin Message_collector.add_typed collector (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad source size list: Starts with empty source size." (q value) (q "sizes") (q element_name))))); false end else begin (* Check for trailing comma *) let last_entry = String.trim (List.nth entries (List.length entries - 1)) in if List.length entries > 1 && last_entry = "" then begin Message_collector.add_typed collector (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad source size list: Expected media condition before %s at %s." (q value) (q "sizes") (q element_name) (q "") (q value))))); false end else begin let valid = ref true in (* Check for default-first pattern: unconditional value before conditional ones *) let non_empty_entries = List.filter (fun e -> String.trim e <> "") entries in (* Filter out entries that have invalid media conditions - they'll be reported separately *) let valid_entries = List.filter (fun e -> has_invalid_media_condition (String.trim e) = None ) non_empty_entries in if List.length valid_entries > 1 then begin let first = List.hd valid_entries in let rest = List.tl valid_entries in (* If first entry has no media condition but later ones do, that's invalid *) if not (has_media_condition first) && List.exists has_media_condition rest then begin (* Context is the first entry with a comma *) let context = (String.trim first) ^ "," in Message_collector.add_typed collector (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad source size list: Expected media condition before %s at %s." (q value) (q "sizes") (q element_name) (q "") (q context))))); valid := false end; (* Check for multiple entries without media conditions. When the first entry has no media condition, report "Expected media condition" regardless of whether later entries have media conditions or not *) if not (has_media_condition first) && !valid then begin (* Only report if we haven't already reported the default-first error *) if not (List.exists has_media_condition rest) then begin (* Multiple defaults - report as "Expected media condition" *) let context = (String.trim first) ^ "," in Message_collector.add_typed collector (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad source size list: Expected media condition before %s at %s." (q value) (q "sizes") (q element_name) (q "") (q context))))); valid := false end end end; (* Validate each entry's media condition and size value *) let num_entries = List.length entries in List.iteri (fun idx entry -> let trimmed = String.trim entry in if trimmed <> "" then begin (* Check for invalid media condition *) (match has_invalid_media_condition trimmed with | Some err_msg -> let context = trimmed ^ "," in Message_collector.add_typed collector (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad source size list: %s at %s." (q value) (q "sizes") (q element_name) err_msg (q context))))); valid := false | None -> ()); let size_val = extract_size_value trimmed in if size_val <> "" then begin (* Check if there are multiple space-separated words in the size value. Only the first word should be the size, rest is junk. *) let size_parts = String.split_on_char ' ' size_val |> List.filter (fun s -> s <> "") in let first_size = match size_parts with [] -> size_val | hd :: _ -> hd in let extra_parts = match size_parts with [] -> [] | _ :: tl -> tl in (* Check if first word looks like it should have been a media condition (doesn't start with digit, sign, decimal, '/', or look like a CSS function) *) let first_char = if String.length first_size > 0 then first_size.[0] else 'x' in let has_paren = String.contains size_val '(' in (* calc(), etc. *) let looks_like_junk_entry = not (has_media_condition trimmed) && not has_paren && (* Allow CSS functions like calc() *) not (first_char = '/') && (* Allow leading CSS comments *) not ((first_char >= '0' && first_char <= '9') || first_char = '+' || first_char = '-' || first_char = '.') in (* If this entry looks like junk and there are multiple entries, report "Expected media condition" instead of "Bad CSS number". For single entries with invalid values, fall through to BadCssNumber. *) if looks_like_junk_entry && num_entries > 1 then begin (* Find the context ending with the previous entry *) let prev_entries = List.filter (fun e -> String.trim e <> "" && e <> entry) entries in let context = if List.length prev_entries > 0 then String.concat ", " (List.map String.trim prev_entries) ^ "," else value in Message_collector.add_typed collector (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad source size list: Expected media condition before %s at %s." (q value) (q "sizes") (q element_name) (q "") (q context))))); valid := false end (* If there's extra junk after the size, report BadCssNumber error for it *) else if extra_parts <> [] then begin let last_junk = List.nth extra_parts (List.length extra_parts - 1) in let first_char = if String.length last_junk > 0 then last_junk.[0] else 'x' in let is_last_entry = idx = num_entries - 1 in let context = if is_last_entry then value else trimmed ^ "," in Message_collector.add_typed collector (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad source size list: Bad CSS number token: Expected a minus sign or a digit but saw %s instead at %s." (q value) (q "sizes") (q element_name) (q (String.make 1 first_char)) (q context))))); valid := false end else match check_size_value first_size with | Valid -> () | NegativeValue -> let full_context = if List.length entries > 1 then size_val ^ "," else size_val in let _ = full_context in Message_collector.add_typed collector (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad source size list: Expected positive size value but found %s at %s." (q value) (q "sizes") (q element_name) (q size_val) (q size_val))))); valid := false | CssCommentAfterSign (found, context) -> (* e.g., +/**/50vw - expected number after sign *) Message_collector.add_typed collector (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad source size list: Expected number but found %s at %s." (q value) (q "sizes") (q element_name) (q found) (q context))))); valid := false | CssCommentBeforeUnit (found, context) -> (* e.g., 50/**/vw - expected units after number *) let units_list = List.map q valid_length_units in let units_str = String.concat ", " units_list in Message_collector.add_typed collector (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad source size list: Expected units (one of %s) but found %s at %s." (q value) (q "sizes") (q element_name) units_str (q found) (q context))))); valid := false | BadScientificNotation -> (* For scientific notation with bad exponent, show what char was expected vs found *) let context = if List.length entries > 1 then trimmed ^ "," else trimmed in (* Find the period in the exponent *) let _ = context in Message_collector.add_typed collector (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad source size list: Bad CSS number token: Expected a digit but saw %s instead at %s." (q value) (q "sizes") (q element_name) (q ".") (q size_val))))); valid := false | BadCssNumber (first_char, context) -> (* Value doesn't start with a digit or minus sign *) let full_context = if List.length entries > 1 then context ^ "," else context in let _ = full_context in Message_collector.add_typed collector (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad source size list: Bad CSS number token: Expected a minus sign or a digit but saw %s instead at %s." (q value) (q "sizes") (q element_name) (q (String.make 1 first_char)) (q context))))); valid := false | InvalidUnit (found_unit, _context) -> (* Generate the full list of expected units *) let units_list = List.map q valid_length_units in let units_str = String.concat ", " units_list in (* Context should be the full entry, with comma only if there are multiple entries *) let full_context = if List.length entries > 1 then trimmed ^ "," else trimmed in (* When found_unit is empty, say "no units" instead of quoting empty string *) let found_str = if found_unit = "" then "no units" else q found_unit in Message_collector.add_typed collector (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad source size list: Expected units (one of %s) but found %s at %s." (q value) (q "sizes") (q element_name) units_str found_str (q full_context))))); valid := false end end ) entries; !valid end end end (** Validate srcset descriptor *) let validate_srcset_descriptor desc element_name srcset_value has_sizes collector = let desc_lower = Astring.String.Ascii.lowercase (String.trim desc) in if String.length desc_lower = 0 then true else begin let last_char = desc_lower.[String.length desc_lower - 1] in let num_part = String.sub desc_lower 0 (String.length desc_lower - 1) in match last_char with | 'w' -> (* Width descriptor - must be positive integer, no leading + *) let trimmed_desc = String.trim desc in if String.length trimmed_desc > 0 && trimmed_desc.[0] = '+' then begin (* Show just the number part (without the 'w') *) let num_part_for_msg = String.sub trimmed_desc 0 (String.length trimmed_desc - 1) in Message_collector.add_typed collector (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: Expected number without leading plus sign but found %s at %s." (q srcset_value) (q "srcset") (q element_name) (q num_part_for_msg) (q srcset_value))))); false end else (try let n = int_of_string num_part in if n <= 0 then begin Message_collector.add_typed collector (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: Expected number greater than zero but found %s at %s." (q srcset_value) (q "srcset") (q element_name) (q num_part) (q srcset_value))))); false end else begin (* Check for uppercase W - compare original desc with lowercase version *) let original_last = desc.[String.length desc - 1] in if original_last = 'W' then begin Message_collector.add_typed collector (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: Expected width descriptor but found %s at %s. (When the %s attribute is present, all image candidate strings must specify a width.)" (q srcset_value) (q "srcset") (q element_name) (q desc) (q srcset_value) (q "sizes"))))); false end else true end with _ -> (* Check for scientific notation, decimal, or other non-integer values *) if String.contains num_part 'e' || String.contains num_part 'E' || String.contains num_part '.' then begin Message_collector.add_typed collector (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: Expected integer but found %s at %s." (q srcset_value) (q "srcset") (q element_name) (q num_part) (q srcset_value))))); false end else begin Message_collector.add_typed collector (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad srcset descriptor: Invalid width descriptor." (q srcset_value) (q "srcset") (q element_name))))); false end) | 'x' -> (* Pixel density descriptor - must be positive number, no leading + *) let trimmed_desc = String.trim desc in if String.length trimmed_desc > 0 && trimmed_desc.[0] = '+' then begin (* Extract the number part including the plus sign *) let num_with_plus = String.sub trimmed_desc 0 (String.length trimmed_desc - 1) in Message_collector.add_typed collector (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: Expected number without leading plus sign but found %s at %s." (q srcset_value) (q "srcset") (q element_name) (q num_with_plus) (q srcset_value))))); false end else begin (try let n = float_of_string num_part in if Float.is_nan n then begin (* NaN is not a valid float - report as parse error with first char from ORIGINAL desc *) let trimmed_desc = String.trim desc in let orig_num_part = String.sub trimmed_desc 0 (String.length trimmed_desc - 1) in let first_char = if String.length orig_num_part > 0 then String.make 1 orig_num_part.[0] else "" in Message_collector.add_typed collector (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad positive floating point number: Expected a digit but saw %s instead at %s." (q srcset_value) (q "srcset") (q element_name) (q first_char) (q srcset_value))))); false end else if n = 0.0 then begin (* Check if it's -0 (starts with minus) - report as "greater than zero" error *) let trimmed_desc = String.trim desc in let orig_num_part = String.sub trimmed_desc 0 (String.length trimmed_desc - 1) in if String.length orig_num_part > 0 && orig_num_part.[0] = '-' then begin Message_collector.add_typed collector (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: Expected number greater than zero but found %s at %s." (q srcset_value) (q "srcset") (q element_name) (q orig_num_part) (q srcset_value))))) end else begin Message_collector.add_typed collector (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad positive floating point number: Zero is not a valid positive floating point number at %s." (q srcset_value) (q "srcset") (q element_name) (q srcset_value))))) end; false end else if n < 0.0 then begin Message_collector.add_typed collector (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: Expected number greater than zero but found %s at %s." (q srcset_value) (q "srcset") (q element_name) (q num_part) (q srcset_value))))); false end else if n = neg_infinity || n = infinity then begin (* Infinity is not a valid float - report as parse error with first char from ORIGINAL desc *) let trimmed_desc = String.trim desc in let orig_num_part = String.sub trimmed_desc 0 (String.length trimmed_desc - 1) in let first_char = if String.length orig_num_part > 0 then String.make 1 orig_num_part.[0] else "" in Message_collector.add_typed collector (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad positive floating point number: Expected a digit but saw %s instead at %s." (q srcset_value) (q "srcset") (q element_name) (q first_char) (q srcset_value))))); false end else true with _ -> Message_collector.add_typed collector (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad srcset descriptor: Invalid density descriptor." (q srcset_value) (q "srcset") (q element_name))))); false) end | 'h' -> (* Height descriptor - not allowed *) let trimmed_desc = String.trim desc in (* Generate context: find where this entry appears *) let context = try let pos = Str.search_forward (Str.regexp_string trimmed_desc) srcset_value 0 in (* Get the entry context ending with comma *) let search_from = max 0 (pos - 3) in let comma_pos = try Str.search_forward (Str.regexp_string ",") srcset_value pos with Not_found -> String.length srcset_value - 1 in let end_pos = min (comma_pos + 1) (String.length srcset_value) in let len = end_pos - search_from in if len > 0 then String.trim (String.sub srcset_value search_from len) else srcset_value with Not_found | Invalid_argument _ -> srcset_value in if has_sizes then Message_collector.add_typed collector (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: Expected width descriptor but found %s at %s. (When the %s attribute is present, all image candidate strings must specify a width.)" (q srcset_value) (q "srcset") (q element_name) (q trimmed_desc) (q context) (q "sizes"))))) else Message_collector.add_typed collector (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad srcset descriptor: Height descriptor %s is not allowed." (q srcset_value) (q "srcset") (q element_name) (q "h"))))); false | _ -> (* Unknown descriptor - find context in srcset_value *) let trimmed_desc = String.trim desc in (* Nu validator adds extra ')' after the last ')' if descriptor contains any '(' *) let found_desc = if String.contains trimmed_desc '(' then (* Find position of last ')' and insert extra ')' after it *) try let last_close = String.rindex trimmed_desc ')' in let before = String.sub trimmed_desc 0 (last_close + 1) in let after = String.sub trimmed_desc (last_close + 1) (String.length trimmed_desc - last_close - 1) in before ^ ")" ^ after with Not_found -> trimmed_desc ^ ")" else trimmed_desc in (* Find context: the entry containing the error with trailing comma *) let context = try let pos = Str.search_forward (Str.regexp_string trimmed_desc) srcset_value 0 in (* Get the context ending with the descriptor and the comma after *) let end_pos = min (pos + String.length trimmed_desc + 1) (String.length srcset_value) in String.trim (String.sub srcset_value 0 end_pos) with Not_found -> srcset_value in Message_collector.add_typed collector (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: Expected number followed by %s or %s but found %s at %s." (q srcset_value) (q "srcset") (q element_name) (q "w") (q "x") (q found_desc) (q context))))); false end (** Normalize descriptor for duplicate detection (e.g., 1x = 1.0x) *) let normalize_descriptor desc = let desc_lower = Astring.String.Ascii.lowercase (String.trim desc) in if String.length desc_lower = 0 then desc_lower else let last_char = desc_lower.[String.length desc_lower - 1] in let num_part = String.sub desc_lower 0 (String.length desc_lower - 1) in match last_char with | 'x' -> (* Normalize density to a float string for comparison *) (try let f = float_of_string num_part in Printf.sprintf "%gx" f (* %g removes trailing zeros *) with _ -> desc_lower) | 'w' -> (* Width should be integer, just return as-is *) desc_lower | _ -> desc_lower (** Parse and validate srcset attribute value *) let validate_srcset value element_name has_sizes collector = (* Srcset entries are split on commas - only balanced parentheses prevent split *) let entries = split_on_comma_balanced_parens value in let has_w_descriptor = ref false in let has_x_descriptor = ref false in let no_descriptor_url = ref None in (* Track URL of first entry without width descriptor *) let x_with_sizes_error_reported = ref false in (* Track if we already reported x-with-sizes error *) let seen_descriptors = Hashtbl.create 8 in (* Track seen descriptor values -> first URL *) (* Check for empty srcset *) if String.trim value = "" then begin Message_collector.add_typed collector (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: Must contain one or more image candidate strings." (q value) (q "srcset") (q element_name))))) end; (* Check for leading comma *) if String.length value > 0 && value.[0] = ',' then begin Message_collector.add_typed collector (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: Starts with empty image-candidate string." (q value) (q "srcset") (q element_name))))) end; (* Check for trailing comma(s) / empty entries *) let trimmed_value = String.trim value in if String.length trimmed_value > 0 && trimmed_value.[String.length trimmed_value - 1] = ',' then begin (* Count consecutive trailing commas *) let rec count_trailing_commas s idx count = if idx < 0 then count else if s.[idx] = ',' then count_trailing_commas s (idx - 1) (count + 1) else if s.[idx] = ' ' || s.[idx] = '\t' then count_trailing_commas s (idx - 1) count else count in let trailing_commas = count_trailing_commas trimmed_value (String.length trimmed_value - 1) 0 in if trailing_commas > 1 then (* Multiple trailing commas: "Empty image-candidate string at" *) Message_collector.add_typed collector (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: Empty image-candidate string at %s." (q value) (q "srcset") (q element_name) (q value))))) else (* Single trailing comma: "Ends with empty image-candidate string." *) Message_collector.add_typed collector (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: Ends with empty image-candidate string." (q value) (q "srcset") (q element_name))))) end; List.iter (fun entry -> let entry = String.trim entry in if entry <> "" then begin (* Split entry into URL and optional descriptor - respect parentheses *) let parts = split_on_space_respecting_parens entry in (* Check if URL is valid *) let check_srcset_url url = (* Special schemes that require host/content after :// *) let special_schemes = ["http"; "https"; "ftp"; "ws"; "wss"] in (* Check for scheme-only URL like "http:" *) let url_lower = Astring.String.Ascii.lowercase url in List.iter (fun scheme -> let scheme_colon = scheme ^ ":" in if url_lower = scheme_colon then Message_collector.add_typed collector (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad image-candidate URL: %s: Expected a slash (\"/\")." (q value) (q "srcset") (q element_name) (q url))))) ) special_schemes in match parts with | [] -> () | [url] -> check_srcset_url url; (* URL only = implicit 1x descriptor - only flag if explicit 1x also seen *) if !no_descriptor_url = None then no_descriptor_url := Some url; begin match Hashtbl.find_opt seen_descriptors "explicit-1x" with | Some first_url -> Message_collector.add_typed collector (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: Density for image %s is identical to density for image %s." (q value) (q "srcset") (q element_name) (q url) (q first_url))))) | None -> Hashtbl.add seen_descriptors "implicit-1x" url end | url :: desc :: rest -> (* Check URL for broken schemes *) check_srcset_url url; (* Check for extra junk - multiple descriptors are not allowed *) if rest <> [] then begin let extra_desc = List.hd rest in Message_collector.add_typed collector (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: Expected single descriptor but found extraneous descriptor %s at %s." (q value) (q "srcset") (q element_name) (q extra_desc) (q value))))) end; let desc_lower = Astring.String.Ascii.lowercase (String.trim desc) in if String.length desc_lower > 0 then begin let last_char = desc_lower.[String.length desc_lower - 1] in if last_char = 'w' then has_w_descriptor := true else if last_char = 'x' then begin has_x_descriptor := true; (* If sizes is present and we have an x descriptor, generate detailed error *) if has_sizes && not !x_with_sizes_error_reported then begin x_with_sizes_error_reported := true; (* Build context: - If entry has extra parts (multiple descriptors): show "url descriptor " - Else if entry has trailing comma: show "url descriptor," - Else (last entry, no extra parts): show full srcset value *) let trimmed_url = String.trim url in let trimmed_desc = String.trim desc in let entry_context = if rest <> [] then (* Entry has multiple descriptors - show URL + first descriptor + space *) trimmed_url ^ " " ^ trimmed_desc ^ " " else (* Check if entry ends with comma in original value *) let trimmed_entry = String.trim entry in try let entry_start = Str.search_forward (Str.regexp_string trimmed_url) value 0 in let entry_end = entry_start + String.length trimmed_entry in let has_trailing_comma = entry_end < String.length value && value.[entry_end] = ',' in if has_trailing_comma then (* Entry followed by comma - show "url descriptor," *) trimmed_url ^ " " ^ trimmed_desc ^ "," else (* Last entry - show full srcset value *) value with Not_found -> value in Message_collector.add_typed collector (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: Expected width descriptor but found %s at %s. (When the %s attribute is present, all image candidate strings must specify a width.)" (q value) (q "srcset") (q element_name) (q trimmed_desc) (q entry_context) (q "sizes"))))) end end; (* Check for duplicate descriptors - use normalized form *) let normalized = normalize_descriptor desc in let is_1x = (normalized = "1x") in let is_width = (last_char = 'w') in let dup_type = if is_width then "Width" else "Density" in begin match Hashtbl.find_opt seen_descriptors normalized with | Some first_url -> Message_collector.add_typed collector (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: %s for image %s is identical to %s for image %s." (q value) (q "srcset") (q element_name) dup_type (q url) (Astring.String.Ascii.lowercase dup_type) (q first_url))))) | None -> begin match (if is_1x then Hashtbl.find_opt seen_descriptors "implicit-1x" else None) with | Some first_url -> (* Explicit 1x conflicts with implicit 1x *) Message_collector.add_typed collector (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: %s for image %s is identical to %s for image %s." (q value) (q "srcset") (q element_name) dup_type (q url) (Astring.String.Ascii.lowercase dup_type) (q first_url))))) | None -> Hashtbl.add seen_descriptors normalized url; if is_1x then Hashtbl.add seen_descriptors "explicit-1x" url end end end; ignore (validate_srcset_descriptor desc element_name value has_sizes collector) end ) entries; (* Check: if w descriptor used and no sizes, that's an error for img and source *) if !has_w_descriptor && not has_sizes then Message_collector.add_typed collector (`Srcset `W_without_sizes); (* Check: if sizes is present, all entries must have width descriptors *) (match !no_descriptor_url with | Some url when has_sizes -> Message_collector.add_typed collector (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: No width specified for image %s. (When the %s attribute is present, all image candidate strings must specify a width.)" (q value) (q "srcset") (q element_name) (q url) (q "sizes"))))) | _ -> ()); (* Check: if sizes is present and srcset uses x descriptors, that's an error. Only report if we haven't already reported the detailed error. *) if has_sizes && !has_x_descriptor && not !x_with_sizes_error_reported then Message_collector.add_typed collector (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: When the %s attribute is present, all image candidate strings must specify a width." (q value) (q "srcset") (q element_name) (q "sizes"))))); (* Check for mixing w and x descriptors *) if !has_w_descriptor && !has_x_descriptor then Message_collector.add_typed collector (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: Mixing width and density descriptors is not allowed." (q value) (q "srcset") (q element_name))))) let start_element _state ~element collector = match element.Element.tag with | Tag.Svg "image" -> (* SVG image elements should not have srcset *) if Attr_utils.get_attr "srcset" element.Element.raw_attrs <> None then Message_collector.add_typed collector (`Attr (`Not_allowed (`Attr "srcset", `Elem "image"))) | Tag.Html (`Img | `Source as tag) -> let name_lower = Tag.html_tag_to_string tag in let attrs = element.raw_attrs in let sizes_value = Attr_utils.get_attr "sizes" attrs in let srcset_value = Attr_utils.get_attr "srcset" attrs in let has_sizes = sizes_value <> None in let has_srcset = srcset_value <> None in (* Validate sizes if present *) (match sizes_value with | Some v -> ignore (validate_sizes v name_lower collector) | None -> ()); (* Validate srcset if present *) (match srcset_value with | Some v -> validate_srcset v name_lower has_sizes collector | None -> ()); (* Error: sizes without srcset on img *) if name_lower = "img" && has_sizes && not has_srcset then Message_collector.add_typed collector (`Srcset `Sizes_without_srcset) | _ -> () (* Other elements *) let end_element _state ~tag:_ _collector = () let checker = Checker.make ~create ~reset ~start_element ~end_element ()