(** XHTML content model checker.
Validates specific content model rules that the Nu validator checks,
particularly for elements that don't allow text content or specific children. *)
type figure_state = {
mutable has_content_before_figcaption : bool;
mutable has_figcaption : bool;
mutable figcaption_at_start : bool; (* true if figcaption came first *)
}
type state = {
mutable element_stack : string list;
mutable figure_stack : figure_state list; (* Stack to handle nested figures *)
}
let create () = { element_stack = []; figure_stack = [] }
let reset state =
state.element_stack <- [];
state.figure_stack <- []
(* Elements that don't allow direct text content (only specific child elements) *)
let no_text_elements = [
"menu"; (* Only li elements *)
"iframe"; (* In XHTML mode, no content allowed *)
(* Note: figure handled separately due to complex content model with figcaption *)
]
(* Check if an element is allowed as child of parent *)
let is_child_allowed ~parent ~child =
match parent with
| "menu" ->
(* menu only allows li, script, template *)
List.mem child ["li"; "script"; "template"]
| "table" ->
(* col must be in colgroup, not directly in table *)
child <> "col"
| _ -> true
(* Check if text is allowed in element *)
let is_text_allowed element =
not (List.mem element no_text_elements)
(* Check if data-* attribute has uppercase characters *)
let check_data_attr_case attrs collector =
List.iter (fun (attr_name, _) ->
if String.starts_with ~prefix:"data-" attr_name then
let suffix = String.sub attr_name 5 (String.length attr_name - 5) in
if String.exists (fun c -> c >= 'A' && c <= 'Z') suffix then
Message_collector.add_typed collector (`Attr `Data_uppercase)
) attrs
let start_element state ~element collector =
let name = Tag.tag_to_string element.Element.tag in
let name_lower = Astring.String.Ascii.lowercase name in
let attrs = element.raw_attrs in
(* Check data-* attributes for uppercase *)
check_data_attr_case attrs collector;
(* Check if this element is allowed as child of parent *)
(match state.element_stack with
| parent :: _ ->
let parent_lower = Astring.String.Ascii.lowercase parent in
if not (is_child_allowed ~parent:parent_lower ~child:name_lower) then
Message_collector.add_typed collector
(`Element (`Not_allowed_as_child (`Child name_lower, `Parent parent_lower)))
| [] -> ());
(* Handle figure content model *)
(match state.element_stack with
| parent :: _ when Astring.String.Ascii.lowercase parent = "figure" ->
(* We're inside a figure, check content model *)
(match state.figure_stack with
| fig :: _ ->
if name_lower = "figcaption" then begin
(* figcaption appearing *)
if not fig.has_content_before_figcaption then
fig.figcaption_at_start <- true;
fig.has_figcaption <- true
end else begin
(* Flow content appearing in figure *)
if fig.has_figcaption && not fig.figcaption_at_start then
Message_collector.add_typed collector
(`Element (`Not_allowed_as_child (`Child name_lower, `Parent "figure")))
else if not fig.has_figcaption then
fig.has_content_before_figcaption <- true
end
| [] -> ())
| _ -> ());
(* If entering a figure, push new figure state *)
if name_lower = "figure" then
state.figure_stack <- { has_content_before_figcaption = false; has_figcaption = false; figcaption_at_start = false } :: state.figure_stack;
(* Push onto stack *)
state.element_stack <- name :: state.element_stack
let end_element state ~tag _collector =
let name_lower = Astring.String.Ascii.lowercase (Tag.tag_to_string tag) in
(* Pop figure state if leaving a figure *)
if name_lower = "figure" then begin
match state.figure_stack with
| _ :: rest -> state.figure_stack <- rest
| [] -> ()
end;
(* Pop from element stack *)
match state.element_stack with
| _ :: rest -> state.element_stack <- rest
| [] -> ()
let characters state text collector =
match state.element_stack with
| [] -> ()
| parent :: _ ->
let parent_lower = Astring.String.Ascii.lowercase parent in
let trimmed = String.trim text in
if trimmed <> "" then begin
if parent_lower = "figure" then begin
match state.figure_stack with
| fig :: _ ->
if fig.has_figcaption && not fig.figcaption_at_start then
Message_collector.add_typed collector
(`Element (`Text_not_allowed (`Parent "figure")))
else if not fig.has_figcaption then
fig.has_content_before_figcaption <- true
| [] -> ()
end
else if not (is_text_allowed parent_lower) then
Message_collector.add_typed collector
(`Element (`Text_not_allowed (`Parent parent_lower)))
end
let checker = Checker.make ~create ~reset ~start_element ~end_element
~characters ()