(** DOM tree traversal for HTML5 conformance checking. *) (** Convert DOM location to Message location. *) let dom_location_to_message_location (loc : Html5rw.Dom.location) : Message.location = Message.make_location ~line:loc.line ~column:loc.column ?end_line:loc.end_line ?end_column:loc.end_column () (** Get Message.location from a DOM node. *) let node_location (node : Html5rw.Dom.node) : Message.location option = Option.map dom_location_to_message_location node.location (** Package a checker with its state for traversal. *) type checker_state = { start_element : element:Element.t -> Message_collector.t -> unit; end_element : tag:Tag.element_tag -> Message_collector.t -> unit; characters : string -> Message_collector.t -> unit; end_document : Message_collector.t -> unit; } (** Create a checker state package from a first-class module. *) let make_checker_state (module C : Checker.S) = let state = C.create () in { start_element = (fun ~element collector -> C.start_element state ~element collector); end_element = (fun ~tag collector -> C.end_element state ~tag collector); characters = (fun text collector -> C.characters state text collector); end_document = (fun collector -> C.end_document state collector); } (** Walk a DOM node with a single checker state. *) let rec walk_node_single cs collector node = let open Html5rw.Dom in (* Set current location for messages *) Message_collector.set_current_location collector (node_location node); match node.name with | "#text" -> (* Text node: emit characters event *) cs.characters node.data collector | "#comment" -> (* Comment node: skip - comment content is not text content *) () | "#document" | "#document-fragment" -> (* Document/fragment nodes: just traverse children *) List.iter (walk_node_single cs collector) node.children | "!doctype" -> (* Doctype node: skip (no validation events for doctype) *) () | _ -> (* Element node: create typed element, emit start, traverse children, emit end *) let element = Element.create ~name:node.name ~namespace:node.namespace ~attrs:node.attrs in cs.start_element ~element collector; List.iter (walk_node_single cs collector) node.children; cs.end_element ~tag:element.tag collector let walk checker collector node = let cs = make_checker_state checker in walk_node_single cs collector node; cs.end_document collector (** Walk a DOM node with multiple checker states. *) let rec walk_node_all css collector node = let open Html5rw.Dom in (* Set current location for messages *) Message_collector.set_current_location collector (node_location node); match node.name with | "#text" -> (* Text node: emit characters event to all checkers *) List.iter (fun cs -> cs.characters node.data collector) css | "#comment" -> (* Comment node: skip - comment content is not text content *) () | "#document" | "#document-fragment" -> (* Document/fragment nodes: just traverse children *) List.iter (walk_node_all css collector) node.children | "!doctype" -> (* Doctype node: skip *) () | _ -> (* Element node: create typed element, emit start to all checkers, traverse children, emit end to all *) let element = Element.create ~name:node.name ~namespace:node.namespace ~attrs:node.attrs in List.iter (fun cs -> cs.start_element ~element collector) css; List.iter (walk_node_all css collector) node.children; List.iter (fun cs -> cs.end_element ~tag:element.tag collector) css let walk_all checkers collector node = (* Create checker state packages *) let css = List.map make_checker_state checkers in (* Traverse with all checkers *) walk_node_all css collector node; (* Call end_document on all checkers *) List.iter (fun cs -> cs.end_document collector) css let walk_registry registry collector node = let checkers = Checker_registry.all registry in walk_all checkers collector node