(*--------------------------------------------------------------------------- Copyright (c) 2025 Anil Madhavapeddy . All rights reserved. SPDX-License-Identifier: MIT ---------------------------------------------------------------------------*) (** XHTML parser using xmlm for proper XML parsing. This module provides XML parsing for XHTML files. While the HTML5 parser handles most content, XHTML requires proper XML parsing to correctly handle: - Self-closing tags on non-void elements (e.g., [
]) - XML namespaces for SVG and MathML - Strict XML well-formedness requirements {2 Usage} {[ if Xhtml_parser.is_xhtml_file (Some "page.xhtml") then match Xhtml_parser.parse_xhtml content with | Ok doc -> (* Process XHTML document *) | Error msg -> (* Handle parse error *) ]} *) (** {1 Types} *) type xhtml_doc = { root : Html5rw.Dom.node; (** The document root node. *) errors : Html5rw.Error.t list; (** Parse errors (empty for valid XML). *) } (** An XHTML document representation. *) (** {1 Parsing} *) val parse_xhtml : string -> (Html5rw.Dom.node, string) result (** [parse_xhtml content] parses XHTML content using xmlm. @param content The XHTML content as a string @return [Ok root] with the document root on success, [Error message] with parse error details on failure *) val is_xhtml_file : string option -> bool (** [is_xhtml_file system_id] checks if a system_id indicates an XHTML file. @param system_id The optional file path or identifier @return [true] if the path ends with ".xhtml" *) (** {1 Document Access} *) val xhtml_root : xhtml_doc -> Html5rw.Dom.node (** [xhtml_root doc] returns the document root node. *) val xhtml_errors : xhtml_doc -> Html5rw.Error.t list (** [xhtml_errors doc] returns the parse errors (always empty for XHTML). *)