(** DOM tree traversal for HTML5 conformance checking. *)
(** Convert DOM location to Message location. *)
let dom_location_to_message_location (loc : Html5rw.Dom.location) : Message.location =
Message.make_location
~line:loc.line
~column:loc.column
?end_line:loc.end_line
?end_column:loc.end_column
()
(** Get Message.location from a DOM node. *)
let node_location (node : Html5rw.Dom.node) : Message.location option =
Option.map dom_location_to_message_location node.location
(** Package a checker with its state for traversal. *)
type checker_state = {
start_element : element:Element.t -> Message_collector.t -> unit;
end_element : tag:Tag.element_tag -> Message_collector.t -> unit;
characters : string -> Message_collector.t -> unit;
end_document : Message_collector.t -> unit;
}
(** Create a checker state package from a first-class module. *)
let make_checker_state (module C : Checker.S) =
let state = C.create () in
{
start_element = (fun ~element collector ->
C.start_element state ~element collector);
end_element = (fun ~tag collector ->
C.end_element state ~tag collector);
characters = (fun text collector ->
C.characters state text collector);
end_document = (fun collector ->
C.end_document state collector);
}
(** Walk a DOM node with a single checker state. *)
let rec walk_node_single cs collector node =
let open Html5rw.Dom in
(* Set current location for messages *)
Message_collector.set_current_location collector (node_location node);
match node.name with
| "#text" ->
(* Text node: emit characters event *)
cs.characters node.data collector
| "#comment" ->
(* Comment node: skip - comment content is not text content *)
()
| "#document" | "#document-fragment" ->
(* Document/fragment nodes: just traverse children *)
List.iter (walk_node_single cs collector) node.children
| "!doctype" ->
(* Doctype node: skip (no validation events for doctype) *)
()
| _ ->
(* Element node: create typed element, emit start, traverse children, emit end *)
let element = Element.create ~name:node.name ~namespace:node.namespace ~attrs:node.attrs in
cs.start_element ~element collector;
List.iter (walk_node_single cs collector) node.children;
cs.end_element ~tag:element.tag collector
let walk checker collector node =
let cs = make_checker_state checker in
walk_node_single cs collector node;
cs.end_document collector
(** Walk a DOM node with multiple checker states. *)
let rec walk_node_all css collector node =
let open Html5rw.Dom in
(* Set current location for messages *)
Message_collector.set_current_location collector (node_location node);
match node.name with
| "#text" ->
(* Text node: emit characters event to all checkers *)
List.iter (fun cs -> cs.characters node.data collector) css
| "#comment" ->
(* Comment node: skip - comment content is not text content *)
()
| "#document" | "#document-fragment" ->
(* Document/fragment nodes: just traverse children *)
List.iter (walk_node_all css collector) node.children
| "!doctype" ->
(* Doctype node: skip *)
()
| _ ->
(* Element node: create typed element, emit start to all checkers, traverse children, emit end to all *)
let element = Element.create ~name:node.name ~namespace:node.namespace ~attrs:node.attrs in
List.iter (fun cs -> cs.start_element ~element collector) css;
List.iter (walk_node_all css collector) node.children;
List.iter (fun cs -> cs.end_element ~tag:element.tag collector) css
let walk_all checkers collector node =
(* Create checker state packages *)
let css = List.map make_checker_state checkers in
(* Traverse with all checkers *)
walk_node_all css collector node;
(* Call end_document on all checkers *)
List.iter (fun cs -> cs.end_document collector) css
let walk_registry registry collector node =
let checkers = Checker_registry.all registry in
walk_all checkers collector node