(** XHTML content model checker. Validates specific content model rules that the Nu validator checks, particularly for elements that don't allow text content or specific children. *) type figure_state = { mutable has_content_before_figcaption : bool; mutable has_figcaption : bool; mutable figcaption_at_start : bool; (* true if figcaption came first *) } type state = { mutable element_stack : string list; mutable figure_stack : figure_state list; (* Stack to handle nested figures *) } let create () = { element_stack = []; figure_stack = [] } let reset state = state.element_stack <- []; state.figure_stack <- [] (* Elements that don't allow direct text content (only specific child elements) *) let no_text_elements = [ "menu"; (* Only li elements *) "iframe"; (* In XHTML mode, no content allowed *) (* Note: figure handled separately due to complex content model with figcaption *) ] (* Check if an element is allowed as child of parent *) let is_child_allowed ~parent ~child = match parent with | "menu" -> (* menu only allows li, script, template *) List.mem child ["li"; "script"; "template"] | "table" -> (* col must be in colgroup, not directly in table *) child <> "col" | _ -> true (* Check if text is allowed in element *) let is_text_allowed element = not (List.mem element no_text_elements) (* Check if data-* attribute has uppercase characters *) let check_data_attr_case attrs collector = List.iter (fun (attr_name, _) -> if String.starts_with ~prefix:"data-" attr_name then let suffix = String.sub attr_name 5 (String.length attr_name - 5) in if String.exists (fun c -> c >= 'A' && c <= 'Z') suffix then Message_collector.add_typed collector (`Attr `Data_uppercase) ) attrs let start_element state ~element collector = let name = Tag.tag_to_string element.Element.tag in let name_lower = Astring.String.Ascii.lowercase name in let attrs = element.raw_attrs in (* Check data-* attributes for uppercase *) check_data_attr_case attrs collector; (* Check if this element is allowed as child of parent *) (match state.element_stack with | parent :: _ -> let parent_lower = Astring.String.Ascii.lowercase parent in if not (is_child_allowed ~parent:parent_lower ~child:name_lower) then Message_collector.add_typed collector (`Element (`Not_allowed_as_child (`Child name_lower, `Parent parent_lower))) | [] -> ()); (* Handle figure content model *) (match state.element_stack with | parent :: _ when Astring.String.Ascii.lowercase parent = "figure" -> (* We're inside a figure, check content model *) (match state.figure_stack with | fig :: _ -> if name_lower = "figcaption" then begin (* figcaption appearing *) if not fig.has_content_before_figcaption then fig.figcaption_at_start <- true; fig.has_figcaption <- true end else begin (* Flow content appearing in figure *) if fig.has_figcaption && not fig.figcaption_at_start then Message_collector.add_typed collector (`Element (`Not_allowed_as_child (`Child name_lower, `Parent "figure"))) else if not fig.has_figcaption then fig.has_content_before_figcaption <- true end | [] -> ()) | _ -> ()); (* If entering a figure, push new figure state *) if name_lower = "figure" then state.figure_stack <- { has_content_before_figcaption = false; has_figcaption = false; figcaption_at_start = false } :: state.figure_stack; (* Push onto stack *) state.element_stack <- name :: state.element_stack let end_element state ~tag _collector = let name_lower = Astring.String.Ascii.lowercase (Tag.tag_to_string tag) in (* Pop figure state if leaving a figure *) if name_lower = "figure" then begin match state.figure_stack with | _ :: rest -> state.figure_stack <- rest | [] -> () end; (* Pop from element stack *) match state.element_stack with | _ :: rest -> state.element_stack <- rest | [] -> () let characters state text collector = match state.element_stack with | [] -> () | parent :: _ -> let parent_lower = Astring.String.Ascii.lowercase parent in let trimmed = String.trim text in if trimmed <> "" then begin if parent_lower = "figure" then begin match state.figure_stack with | fig :: _ -> if fig.has_figcaption && not fig.figcaption_at_start then Message_collector.add_typed collector (`Element (`Text_not_allowed (`Parent "figure"))) else if not fig.has_figcaption then fig.has_content_before_figcaption <- true | [] -> () end else if not (is_text_allowed parent_lower) then Message_collector.add_typed collector (`Element (`Text_not_allowed (`Parent parent_lower))) end let checker = Checker.make ~create ~reset ~start_element ~end_element ~characters ()