(** Base checker module for HTML5 conformance checking.
This module provides the core checker abstraction used throughout the
html5_checker library. A checker validates HTML5 documents by observing
DOM tree traversal events and emitting validation messages.
{2 Design Overview}
Checkers follow a SAX-like event model where they receive notifications
about elements, text, and document boundaries as a DOM tree is traversed.
This design allows for:
- {b Stateful validation}: Each checker maintains its own state across
multiple events
- {b Composability}: Multiple checkers can validate the same document
simultaneously
- {b Efficiency}: DOM traversal happens once regardless of checker count
{2 Checker Lifecycle}
A checker progresses through these phases:
1. {b Creation}: Initialize with {!create} to set up initial state
2. {b Traversal}: Receive {!start_element}, {!characters}, and
{!end_element} events as the DOM is walked
3. {b Completion}: Finalize validation with {!end_document}
4. {b Reset} (optional): Return to initial state with {!reset}
{3 Event Sequence}
For a document like [
Hello world
], the event sequence is:
{v
start_element "p"
characters "Hello "
start_element "b"
characters "world"
end_element "b"
end_element "p"
end_document
v}
{2 First-Class Modules}
Checkers are represented as first-class modules implementing the {!S}
signature. This allows:
- Dynamic checker registration and discovery
- Heterogeneous collections of checkers
- Checker selection at runtime based on validation requirements
@see
OCaml manual: First-class modules
*)
(** {1 Module Signature} *)
(** The signature that all checker modules must implement.
A checker module maintains validation state and receives notifications
about DOM tree traversal events. *)
module type S = sig
(** The type of checker state.
This is an abstract type that holds the checker's internal validation
state. Different checkers will have different state representations
depending on what they need to track during validation. *)
type state
(** {1 Lifecycle Operations} *)
val create : unit -> state
(** [create ()] initializes a new checker state.
This function sets up the initial state needed for validation,
such as empty stacks for context tracking, counters, or lookup
tables. *)
val reset : state -> unit
(** [reset state] resets the checker to its initial state.
This allows reusing a checker for multiple documents without
reallocating. After reset, the checker behaves as if freshly
created with {!create}. *)
(** {1 DOM Traversal Events} *)
val start_element :
state ->
element:Element.t ->
Message_collector.t ->
unit
(** [start_element state ~element collector] is called when
entering an element during DOM traversal.
@param state The checker state
@param element The typed element (includes tag, typed attrs, and raw attrs)
@param collector The message collector for emitting validation messages
This is where checkers can validate:
- Whether the element is allowed in the current context
- Whether required attributes are present
- Whether attribute values are valid
- Whether the element opens a new validation context *)
val end_element :
state ->
tag:Tag.element_tag ->
Message_collector.t ->
unit
(** [end_element state ~tag collector] is called when exiting
an element during DOM traversal.
@param state The checker state
@param tag The element tag
@param collector The message collector for emitting validation messages
This is where checkers can:
- Pop validation contexts from stacks
- Validate that required child elements were present
- Emit messages about element-scoped validation rules *)
val characters : state -> string -> Message_collector.t -> unit
(** [characters state text collector] is called when text content is
encountered during DOM traversal.
@param state The checker state
@param text The text content
@param collector The message collector for emitting validation messages
This is where checkers can validate:
- Whether text is allowed in the current context
- Whether text content follows specific patterns
- Whether text matches expected formats *)
val end_document : state -> Message_collector.t -> unit
(** [end_document state collector] is called after the entire DOM tree has
been traversed.
@param state The checker state
@param collector The message collector for emitting validation messages
This is where checkers can:
- Emit messages about missing required elements
- Validate document-level constraints
- Check that all opened contexts were properly closed
- Report any accumulated validation failures *)
end
(** {1 Checker Values} *)
(** The type of a checker value.
This is a packed first-class module containing both the checker
implementation and its state. It enables storing heterogeneous
checkers in collections and passing them around dynamically. *)
type t = (module S)
(** {1 Built-in Checkers} *)
val noop : unit -> t
(** [noop ()] creates a no-operation checker that performs no validation.
This checker ignores all events and never emits messages. It is useful:
- As a placeholder in checker registries
- For testing checker infrastructure
- As a base for building new checkers
{b Example:}
{[
let checker = noop () in
(* Does nothing when walked over a DOM tree *)
]}
*)
(** {1 Checker Construction Helpers} *)
(** Input signature for {!Make} functor.
Only the required callbacks need to be provided. Optional callbacks
(characters, end_document) default to no-op implementations. *)
module type Input = sig
type state
val create : unit -> state
val reset : state -> unit
val start_element : state -> element:Element.t -> Message_collector.t -> unit
val end_element : state -> tag:Tag.element_tag -> Message_collector.t -> unit
(** Optional: called for text content. Default: no-op. *)
val characters : (state -> string -> Message_collector.t -> unit) option
(** Optional: called at document end. Default: no-op. *)
val end_document : (state -> Message_collector.t -> unit) option
end
(** Functor to create a checker from an {!Input} module.
This reduces boilerplate when creating checkers that don't need
to handle all events. The characters and end_document callbacks
default to no-ops if not provided.
{b Example:}
{[
let checker = Checker.Make(struct
type state = { mutable count : int }
let create () = { count = 0 }
let reset s = s.count <- 0
let start_element s ~element collector =
s.count <- s.count + 1
let end_element _ ~tag:_ _ = ()
let characters = None (* Use default no-op *)
let end_document = None (* Use default no-op *)
end)
]}
*)
module Make : functor (I : Input) -> S with type state = I.state
(** Create a checker from individual callback functions.
This is a simpler alternative to the [Make] functor that eliminates the
need for a module wrapper at the end of each checker file.
{b Example:}
{[
let checker = Checker.make
~create:(fun () -> { count = 0 })
~reset:(fun s -> s.count <- 0)
~start_element:(fun s ~element collector -> ...)
~end_element:(fun s ~tag collector -> ...)
()
]}
@param create State initialization function
@param reset State reset function
@param start_element Element start callback
@param end_element Element end callback
@param characters Optional text content callback (default: no-op)
@param end_document Optional document end callback (default: no-op)
*)
val make :
create:(unit -> 's) ->
reset:('s -> unit) ->
start_element:('s -> element:Element.t -> Message_collector.t -> unit) ->
end_element:('s -> tag:Tag.element_tag -> Message_collector.t -> unit) ->
?characters:('s -> string -> Message_collector.t -> unit) ->
?end_document:('s -> Message_collector.t -> unit) ->
unit ->
t