(** Base checker module for HTML5 conformance checking. This module provides the core checker abstraction used throughout the html5_checker library. A checker validates HTML5 documents by observing DOM tree traversal events and emitting validation messages. {2 Design Overview} Checkers follow a SAX-like event model where they receive notifications about elements, text, and document boundaries as a DOM tree is traversed. This design allows for: - {b Stateful validation}: Each checker maintains its own state across multiple events - {b Composability}: Multiple checkers can validate the same document simultaneously - {b Efficiency}: DOM traversal happens once regardless of checker count {2 Checker Lifecycle} A checker progresses through these phases: 1. {b Creation}: Initialize with {!create} to set up initial state 2. {b Traversal}: Receive {!start_element}, {!characters}, and {!end_element} events as the DOM is walked 3. {b Completion}: Finalize validation with {!end_document} 4. {b Reset} (optional): Return to initial state with {!reset} {3 Event Sequence} For a document like [

Hello world

], the event sequence is: {v start_element "p" characters "Hello " start_element "b" characters "world" end_element "b" end_element "p" end_document v} {2 First-Class Modules} Checkers are represented as first-class modules implementing the {!S} signature. This allows: - Dynamic checker registration and discovery - Heterogeneous collections of checkers - Checker selection at runtime based on validation requirements @see OCaml manual: First-class modules *) (** {1 Module Signature} *) (** The signature that all checker modules must implement. A checker module maintains validation state and receives notifications about DOM tree traversal events. *) module type S = sig (** The type of checker state. This is an abstract type that holds the checker's internal validation state. Different checkers will have different state representations depending on what they need to track during validation. *) type state (** {1 Lifecycle Operations} *) val create : unit -> state (** [create ()] initializes a new checker state. This function sets up the initial state needed for validation, such as empty stacks for context tracking, counters, or lookup tables. *) val reset : state -> unit (** [reset state] resets the checker to its initial state. This allows reusing a checker for multiple documents without reallocating. After reset, the checker behaves as if freshly created with {!create}. *) (** {1 DOM Traversal Events} *) val start_element : state -> element:Element.t -> Message_collector.t -> unit (** [start_element state ~element collector] is called when entering an element during DOM traversal. @param state The checker state @param element The typed element (includes tag, typed attrs, and raw attrs) @param collector The message collector for emitting validation messages This is where checkers can validate: - Whether the element is allowed in the current context - Whether required attributes are present - Whether attribute values are valid - Whether the element opens a new validation context *) val end_element : state -> tag:Tag.element_tag -> Message_collector.t -> unit (** [end_element state ~tag collector] is called when exiting an element during DOM traversal. @param state The checker state @param tag The element tag @param collector The message collector for emitting validation messages This is where checkers can: - Pop validation contexts from stacks - Validate that required child elements were present - Emit messages about element-scoped validation rules *) val characters : state -> string -> Message_collector.t -> unit (** [characters state text collector] is called when text content is encountered during DOM traversal. @param state The checker state @param text The text content @param collector The message collector for emitting validation messages This is where checkers can validate: - Whether text is allowed in the current context - Whether text content follows specific patterns - Whether text matches expected formats *) val end_document : state -> Message_collector.t -> unit (** [end_document state collector] is called after the entire DOM tree has been traversed. @param state The checker state @param collector The message collector for emitting validation messages This is where checkers can: - Emit messages about missing required elements - Validate document-level constraints - Check that all opened contexts were properly closed - Report any accumulated validation failures *) end (** {1 Checker Values} *) (** The type of a checker value. This is a packed first-class module containing both the checker implementation and its state. It enables storing heterogeneous checkers in collections and passing them around dynamically. *) type t = (module S) (** {1 Built-in Checkers} *) val noop : unit -> t (** [noop ()] creates a no-operation checker that performs no validation. This checker ignores all events and never emits messages. It is useful: - As a placeholder in checker registries - For testing checker infrastructure - As a base for building new checkers {b Example:} {[ let checker = noop () in (* Does nothing when walked over a DOM tree *) ]} *) (** {1 Checker Construction Helpers} *) (** Input signature for {!Make} functor. Only the required callbacks need to be provided. Optional callbacks (characters, end_document) default to no-op implementations. *) module type Input = sig type state val create : unit -> state val reset : state -> unit val start_element : state -> element:Element.t -> Message_collector.t -> unit val end_element : state -> tag:Tag.element_tag -> Message_collector.t -> unit (** Optional: called for text content. Default: no-op. *) val characters : (state -> string -> Message_collector.t -> unit) option (** Optional: called at document end. Default: no-op. *) val end_document : (state -> Message_collector.t -> unit) option end (** Functor to create a checker from an {!Input} module. This reduces boilerplate when creating checkers that don't need to handle all events. The characters and end_document callbacks default to no-ops if not provided. {b Example:} {[ let checker = Checker.Make(struct type state = { mutable count : int } let create () = { count = 0 } let reset s = s.count <- 0 let start_element s ~element collector = s.count <- s.count + 1 let end_element _ ~tag:_ _ = () let characters = None (* Use default no-op *) let end_document = None (* Use default no-op *) end) ]} *) module Make : functor (I : Input) -> S with type state = I.state (** Create a checker from individual callback functions. This is a simpler alternative to the [Make] functor that eliminates the need for a module wrapper at the end of each checker file. {b Example:} {[ let checker = Checker.make ~create:(fun () -> { count = 0 }) ~reset:(fun s -> s.count <- 0) ~start_element:(fun s ~element collector -> ...) ~end_element:(fun s ~tag collector -> ...) () ]} @param create State initialization function @param reset State reset function @param start_element Element start callback @param end_element Element end callback @param characters Optional text content callback (default: no-op) @param end_document Optional document end callback (default: no-op) *) val make : create:(unit -> 's) -> reset:('s -> unit) -> start_element:('s -> element:Element.t -> Message_collector.t -> unit) -> end_element:('s -> tag:Tag.element_tag -> Message_collector.t -> unit) -> ?characters:('s -> string -> Message_collector.t -> unit) -> ?end_document:('s -> Message_collector.t -> unit) -> unit -> t