(** Datetime attribute validation checker *) let q = Error_code.q (** Elements that have datetime attribute *) let datetime_elements = ["del"; "ins"; "time"] (** Parse int safely *) let parse_int s = try Some (int_of_string s) with _ -> None (** Days in each month (non-leap year) *) let days_in_month = [| 31; 28; 31; 30; 31; 30; 31; 31; 30; 31; 30; 31 |] (** Check if a year is a leap year *) let is_leap_year year = (year mod 400 = 0) || (year mod 4 = 0 && year mod 100 <> 0) (** Get max day for a given month/year *) let max_day_for_month year month = if month = 2 && is_leap_year year then 29 else if month >= 1 && month <= 12 then days_in_month.(month - 1) else 31 (** Validate date string YYYY-MM-DD. Returns (valid, error_reason option) *) let validate_date s = let pattern = Str.regexp "^\\([0-9]+\\)-\\([0-9][0-9]\\)-\\([0-9][0-9]\\)$" in if not (Str.string_match pattern s 0) then (false, Some "The literal did not satisfy the date format") else let year_s = Str.matched_group 1 s in let month_s = Str.matched_group 2 s in let day_s = Str.matched_group 3 s in if String.length year_s < 4 then (false, Some "The literal did not satisfy the date format") else match (parse_int year_s, parse_int month_s, parse_int day_s) with | None, _, _ | _, None, _ | _, _, None -> (false, Some "Invalid year, month or day") | Some year, Some month, Some day -> if year < 1 then (false, Some "Year cannot be less than 1") else if month = 0 then (false, Some "Month cannot be less than 1") else if month > 12 then (false, Some "Month cannot be greater than 12") else if day < 1 then (false, Some "Day cannot be less than 1") else let max_day = max_day_for_month year month in if day > max_day then (false, Some "Day out of range") else (true, None) (** Check if a date-like value has a 5+ digit year (might be mistyped) *) let has_suspicious_year s = let pattern = Str.regexp "^\\([0-9]+\\)-" in if Str.string_match pattern s 0 then let year_s = Str.matched_group 1 s in String.length year_s > 4 else false (** Check if a date has year before 1000 (might be mistyped or unusual) *) let has_old_year s = let pattern = Str.regexp "^\\([0-9]+\\)-" in if Str.string_match pattern s 0 then let year_s = Str.matched_group 1 s in match parse_int year_s with | Some year -> year < 1000 | None -> false else false (** Validate time string HH:MM[:SS[.sss]] *) let validate_time s = let pattern = Str.regexp "^\\([0-9][0-9]\\):\\([0-9][0-9]\\)\\(:\\([0-9][0-9]\\)\\(\\.\\([0-9]+\\)\\)?\\)?$" in if not (Str.string_match pattern s 0) then (false, None) (* Format error - return None so caller uses generic message *) else let hour_s = Str.matched_group 1 s in let minute_s = Str.matched_group 2 s in match (parse_int hour_s, parse_int minute_s) with | None, _ | _, None -> (false, Some "Invalid hour or minute") | Some hour, Some minute -> if hour > 23 then (false, Some "Hour cannot be greater than 23") else if minute > 59 then (false, Some "Minute cannot be greater than 59") else let second_s = try Some (Str.matched_group 4 s) with Not_found -> None in match second_s with | None -> (true, None) | Some sec_s -> match parse_int sec_s with | None -> (false, Some "Invalid seconds") | Some sec -> if sec > 59 then (false, Some "Second cannot be greater than 59") else (* Check milliseconds if present *) let millis_s = try Some (Str.matched_group 6 s) with Not_found -> None in match millis_s with | None -> (true, None) | Some ms -> if String.length ms < 1 || String.length ms > 3 then (false, Some "A fraction of a second must be one, two, or three digits") else (true, None) (** Validate year-only format YYYY (at least 4 digits, > 0) *) let validate_year_only s = let pattern = Str.regexp "^\\([0-9]+\\)$" in if not (Str.string_match pattern s 0) then (false, Some "Year must be digits only") else let year_s = Str.matched_group 1 s in if String.length year_s < 4 then (false, Some "The literal did not satisfy the date format") else match parse_int year_s with | None -> (false, Some "Invalid year") | Some year -> if year < 1 then (false, Some "Year cannot be less than 1") else (true, None) (** Validate month format YYYY-MM *) let validate_year_month s = let pattern = Str.regexp "^\\([0-9]+\\)-\\([0-9][0-9]\\)$" in if not (Str.string_match pattern s 0) then (false, Some "Month must be in YYYY-MM format") else let year_s = Str.matched_group 1 s in let month_s = Str.matched_group 2 s in if String.length year_s < 4 then (false, Some "The literal did not satisfy the date format") else match (parse_int year_s, parse_int month_s) with | None, _ | _, None -> (false, Some "Invalid year or month") | Some year, Some month -> if year < 1 then (false, Some "Year cannot be less than 1") else if month < 1 || month > 12 then (false, Some "Month out of range") else (true, None) (** Validate week format YYYY-Www *) let validate_week s = let pattern = Str.regexp "^\\([0-9]+\\)-W\\([0-9][0-9]\\)$" in if not (Str.string_match pattern s 0) then (false, Some "Week must be in YYYY-Www format") else let year_s = Str.matched_group 1 s in let week_s = Str.matched_group 2 s in if String.length year_s < 4 then (false, Some "The literal did not satisfy the date format") else match (parse_int year_s, parse_int week_s) with | None, _ | _, None -> (false, Some "Invalid year or week") | Some year, Some week -> if year < 1 then (false, Some "Year cannot be less than 1") else if week < 1 || week > 53 then (false, Some "Week out of range") else (true, None) (** Validate yearless date format --MM-DD *) let validate_yearless_date s = let pattern = Str.regexp "^--\\([0-9][0-9]\\)-\\([0-9][0-9]\\)$" in if not (Str.string_match pattern s 0) then (false, Some "Yearless date must be in --MM-DD format") else let month_s = Str.matched_group 1 s in let day_s = Str.matched_group 2 s in match (parse_int month_s, parse_int day_s) with | None, _ | _, None -> (false, Some "Invalid month or day") | Some month, Some day -> if month < 1 || month > 12 then (false, Some "Month out of range") else if day < 1 then (false, Some "Day cannot be less than 1") else (* Use non-leap year for yearless date validation *) let max_day = if month = 2 then 29 else days_in_month.(month - 1) in if day > max_day then (false, Some "Day out of range") else (true, None) (** Validate duration format - HTML5 only accepts: 1. Duration time component: PT#H#M#S (or PT#H, PT#M, PT#S, etc.) 2. Duration weeks: P#W 3. Duration days: P#D or P#DT#H#M#S *) let validate_duration s = if String.length s < 2 then (false, Some "Duration too short") else if s.[0] <> 'P' then (false, Some "Duration must start with P") else let rest = String.sub s 1 (String.length s - 1) in (* Valid HTML5 duration patterns: - PT#H#M#S (or any combination of H, M, S after T) - P#W (weeks only) - P#D or P#DT#H#M#S (days with optional time) *) let pattern_time_only = Str.regexp "^T\\([0-9]+H\\)?\\([0-9]+M\\)?\\([0-9]+\\(\\.[0-9]+\\)?S\\)?$" in let pattern_weeks = Str.regexp "^[0-9]+W$" in let pattern_days = Str.regexp "^[0-9]+D\\(T\\([0-9]+H\\)?\\([0-9]+M\\)?\\([0-9]+\\(\\.[0-9]+\\)?S\\)?\\)?$" in if Str.string_match pattern_time_only rest 0 then (* Check that at least one component exists after T *) if String.length rest > 1 then (true, None) else (false, Some "Invalid duration format") else if Str.string_match pattern_weeks rest 0 then (true, None) else if Str.string_match pattern_days rest 0 then (true, None) else (false, Some "Invalid duration format") (** Result type for timezone validation *) type tz_result = TzOk | TzWarning of string | TzError of string (** Validate timezone offset +HH:MM or -HH:MM or +HHMM or -HHMM Returns warning for unusual but valid offsets: - Negative offsets > 12:00 (e.g., -13:00) - Positive offsets > 14:00 (e.g., +15:00) - Offsets with unusual minutes (not 00, 30, 45) *) let validate_timezone_offset s = (* Try +HH:MM format *) let pattern_colon = Str.regexp "^\\([+-]\\)\\([0-9][0-9]\\):\\([0-9][0-9]\\)$" in (* Try +HHMM format (no colon) *) let pattern_no_colon = Str.regexp "^\\([+-]\\)\\([0-9][0-9]\\)\\([0-9][0-9]\\)$" in let matched, sign = if Str.string_match pattern_colon s 0 then (true, Str.matched_group 1 s) else if Str.string_match pattern_no_colon s 0 then (true, Str.matched_group 1 s) else (false, "+") in if not matched then TzError "The literal did not satisfy the datetime with timezone format" else let hour_s = Str.matched_group 2 s in let minute_s = Str.matched_group 3 s in match (parse_int hour_s, parse_int minute_s) with | None, _ | _, None -> TzError "Invalid timezone" | Some hour, Some minute -> if hour > 23 then TzError "Hours out of range in time zone designator" else if minute > 59 then TzError "Minutes out of range in time zone designator" else begin (* Check for unusual but valid offsets *) let unusual_range = if sign = "-" && hour >= 13 then true else if sign = "+" && hour >= 15 then true else false in let unusual_minutes = minute <> 0 && minute <> 30 && minute <> 45 in if unusual_range then TzWarning "Hours in time zone designator should be from \"-12:00\" to \"+14:00\"" else if unusual_minutes then TzWarning "Minutes in time zone designator should be either \"00\", \"30\", or \"45\"." else TzOk end (** Result type for datetime with timezone validation *) type dt_tz_result = DtOk | DtWarning of string | DtError of string (** Validate datetime with timezone: YYYY-MM-DDTHH:MM:SS[.sss]Z or YYYY-MM-DDTHH:MM:SS[.sss]+HH:MM *) let validate_datetime_with_timezone s = (* Try to split on T or space *) let sep_pos = try Some (String.index s 'T') with Not_found -> try Some (String.index s ' ') with Not_found -> None in match sep_pos with | None -> DtError "The literal did not satisfy the datetime with timezone format" | Some pos -> let date_part = String.sub s 0 pos in let time_and_tz = String.sub s (pos + 1) (String.length s - pos - 1) in (* Validate date *) match validate_date date_part with | (false, _) -> DtError "The literal did not satisfy the datetime with timezone format" | (true, _) -> let date_old = has_old_year date_part in (* Check if ends with Z *) if String.length time_and_tz > 0 && time_and_tz.[String.length time_and_tz - 1] = 'Z' then begin let time_part = String.sub time_and_tz 0 (String.length time_and_tz - 1) in match validate_time time_part with | (false, Some reason) -> DtError reason | (false, None) -> DtError "The literal did not satisfy the datetime with timezone format" | (true, _) -> if date_old then DtWarning "Year may be mistyped" else DtOk end else begin (* Check for +/- timezone offset *) let plus_pos = try Some (String.rindex time_and_tz '+') with Not_found -> None in let minus_pos = try Some (String.rindex time_and_tz '-') with Not_found -> None in let tz_pos = match plus_pos, minus_pos with | Some p, Some m -> Some (max p m) | Some p, None -> Some p | None, Some m -> Some m | None, None -> None in match tz_pos with | None -> DtError "The literal did not satisfy the datetime with timezone format" | Some tp -> let time_part = String.sub time_and_tz 0 tp in let tz_part = String.sub time_and_tz tp (String.length time_and_tz - tp) in match validate_time time_part with | (false, Some reason) -> DtError reason | (false, None) -> DtError "The literal did not satisfy the datetime with timezone format" | (true, _) -> match validate_timezone_offset tz_part with | TzError e -> DtError e | TzWarning w -> DtWarning w | TzOk -> if date_old then DtWarning "Year may be mistyped" else DtOk end (** Validate datetime-local: YYYY-MM-DDTHH:MM[:SS[.sss]] or YYYY-MM-DD HH:MM *) let validate_datetime_local s = let sep_pos = try Some (String.index s 'T') with Not_found -> try Some (String.index s ' ') with Not_found -> None in match sep_pos with | None -> (false, Some "Invalid datetime-local format") | Some pos -> let date_part = String.sub s 0 pos in let time_part = String.sub s (pos + 1) (String.length s - pos - 1) in match validate_date date_part with | (false, reason) -> (false, reason) | (true, _) -> match validate_time time_part with | (false, reason) -> (false, reason) | (true, _) -> (true, None) (** Result type for datetime validation - can be Ok, Error, or Warning *) type datetime_result = | Ok | Error of string | Warning of string (** Validate datetime attribute - valid formats depend on element: - del/ins: only date or datetime-with-timezone - time: date, time, datetime-local, datetime-with-timezone, year, month, week, yearless, duration *) let validate_datetime_attr value element_name attr_name = let is_time_element = element_name = "time" in (* Check for leading/trailing whitespace - not allowed *) if value <> String.trim value then begin let tz_msg = "Bad datetime with timezone: The literal did not satisfy the datetime with timezone format." in let date_msg = "Bad date: The literal did not satisfy the date format." in Error (Printf.sprintf "Bad value %s for attribute %s on element %s: %s %s" (q value) (q attr_name) (q element_name) tz_msg date_msg) end else (* Try datetime with timezone first *) match validate_datetime_with_timezone value with | DtOk -> Ok (* Valid datetime with timezone *) | DtWarning w -> (* Valid but with warning - format matches Nu validator *) Warning (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad datetime with timezone: %s Bad date: The literal did not satisfy the date format." (q value) (q attr_name) (q element_name) w) | DtError tz_error -> (* Try just date - valid for all elements *) match validate_date value with | (true, _) -> (* Date is valid, but check for suspicious year (5+ digits or old year) *) if has_suspicious_year value || has_old_year value then begin let date_msg = "Bad date: Year may be mistyped." in let tz_msg = Printf.sprintf "Bad datetime with timezone: %s." tz_error in Warning (Printf.sprintf "Bad value %s for attribute %s on element %s: %s %s" (q value) (q attr_name) (q element_name) date_msg tz_msg) end else Ok (* Valid date with normal year *) | (false, date_error) -> (* For time element only, try additional formats *) if is_time_element then begin match validate_datetime_local value with | (true, _) -> Ok (* Valid datetime-local *) | (false, _) -> match validate_time value with | (true, _) -> Ok (* Valid time *) | (false, _) -> match validate_year_month value with | (true, _) -> Ok (* Valid month YYYY-MM *) | (false, _) -> match validate_year_only value with | (true, _) -> Ok (* Valid year YYYY *) | (false, _) -> match validate_week value with | (true, _) -> Ok (* Valid week YYYY-Www *) | (false, _) -> match validate_yearless_date value with | (true, _) -> Ok (* Valid yearless date --MM-DD *) | (false, _) -> match validate_duration value with | (true, _) -> Ok (* Valid duration P... *) | (false, _) -> (* Use simplified message for time element matching Nu validator format *) Error (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad time-datetime: The literal did not satisfy the time-datetime format." (q value) (q attr_name) (q element_name)) end else begin (* del/ins only allow date or datetime-with-timezone *) let tz_msg = Printf.sprintf "Bad datetime with timezone: %s." tz_error in let date_msg = match date_error with | Some e -> Printf.sprintf "Bad date: %s." e | None -> "Bad date: The literal did not satisfy the date format." in (* Order depends on error type. The Nu validator has specific patterns: - Time hour/minute errors (not timezone) -> datetime first - Timezone hours error -> datetime first - Timezone minutes error -> date first - Time fraction error -> date first - Date "less than" error -> date first - Date "greater than" error -> datetime first - Generic errors both sides -> datetime first *) let is_generic_tz = tz_error = "The literal did not satisfy the datetime with timezone format" in let is_tz_hours_error = String.length tz_error >= 5 && String.sub tz_error 0 5 = "Hours" in let is_tz_minutes_error = String.length tz_error >= 7 && String.sub tz_error 0 7 = "Minutes" in let is_time_minute_or_hour_error = (try ignore (Str.search_forward (Str.regexp "Minute cannot\\|Hour cannot") tz_error 0); true with Not_found -> false) in let is_fraction_error = try ignore (Str.search_forward (Str.regexp "fraction") tz_error 0); true with Not_found -> false in let is_month_less_than_error = match date_error with | Some e -> (try ignore (Str.search_forward (Str.regexp "Month cannot be less than") e 0); true with Not_found -> false) | None -> false in (* Datetime first for: generic tz, tz hours error, time minute/hour errors, year errors Date first for: "Month cannot be less than" date error, tz minutes error, fraction error *) if is_month_less_than_error then Error (Printf.sprintf "Bad value %s for attribute %s on element %s: %s %s" (q value) (q attr_name) (q element_name) date_msg tz_msg) else if is_tz_minutes_error || is_fraction_error then Error (Printf.sprintf "Bad value %s for attribute %s on element %s: %s %s" (q value) (q attr_name) (q element_name) date_msg tz_msg) else if is_tz_hours_error || is_time_minute_or_hour_error || is_generic_tz then Error (Printf.sprintf "Bad value %s for attribute %s on element %s: %s %s" (q value) (q attr_name) (q element_name) tz_msg date_msg) else Error (Printf.sprintf "Bad value %s for attribute %s on element %s: %s %s" (q value) (q attr_name) (q element_name) tz_msg date_msg) end (** Checker state *) type state = unit [@@warning "-34"] let create () = () let reset _state = () let start_element _state ~element collector = match element.Element.tag with | Tag.Html tag -> let name = Tag.html_tag_to_string tag in if List.mem name datetime_elements then begin (* Check for datetime attribute *) let datetime_attr = List.find_map (fun (k, v) -> if Astring.String.Ascii.lowercase k = "datetime" then Some v else None ) element.raw_attrs in match datetime_attr with | None -> () | Some value -> if String.trim value = "" then () else match validate_datetime_attr value name "datetime" with | Ok -> () | Error error_msg -> Message_collector.add_typed collector (`Attr (`Bad_value_generic (`Message error_msg))) | Warning warn_msg -> Message_collector.add_typed collector (`Generic warn_msg) end | _ -> () (* Non-HTML elements don't have datetime attributes *) let end_element _state ~tag:_ _collector = () let checker = Checker.make ~create ~reset ~start_element ~end_element ()