(* HTML5 spec constants *)
(* Use Astring for string operations *)
let lowercase = Astring.String.Ascii.lowercase
(* Helper to create a hashtable set from a list for O(1) membership *)
let make_set elements =
let tbl = Hashtbl.create (List.length elements) in
List.iter (fun e -> Hashtbl.add tbl e ()) elements;
tbl
(* Void elements - no end tag allowed *)
let void_elements = [
"area"; "base"; "br"; "col"; "embed"; "hr"; "img"; "input";
"link"; "meta"; "source"; "track"; "wbr"
]
let void_elements_tbl = make_set void_elements
(* Raw text elements - content is raw text *)
let raw_text_elements = ["script"; "style"]
(* Escapable raw text elements *)
let escapable_raw_text_elements = ["textarea"; "title"]
(* Formatting elements for adoption agency *)
let formatting_elements = [
"a"; "b"; "big"; "code"; "em"; "font"; "i"; "nobr"; "s"; "small";
"strike"; "strong"; "tt"; "u"
]
let formatting_elements_tbl = make_set formatting_elements
(* Special elements *)
let special_elements = [
"address"; "applet"; "area"; "article"; "aside"; "base"; "basefont";
"bgsound"; "blockquote"; "body"; "br"; "button"; "caption"; "center";
"col"; "colgroup"; "dd"; "details"; "dir"; "div"; "dl"; "dt"; "embed";
"fieldset"; "figcaption"; "figure"; "footer"; "form"; "frame"; "frameset";
"h1"; "h2"; "h3"; "h4"; "h5"; "h6"; "head"; "header"; "hgroup"; "hr";
"html"; "iframe"; "img"; "input"; "keygen"; "li"; "link"; "listing";
"main"; "marquee"; "menu"; "meta"; "nav"; "noembed"; "noframes";
"noscript"; "object"; "ol"; "p"; "param"; "plaintext"; "pre"; "script";
"search"; "section"; "select"; "source"; "style"; "summary"; "table";
"tbody"; "td"; "template"; "textarea"; "tfoot"; "th"; "thead"; "title";
"tr"; "track"; "ul"; "wbr"; "xmp"
]
let special_elements_tbl = make_set special_elements
(* Heading elements *)
let heading_elements = ["h1"; "h2"; "h3"; "h4"; "h5"; "h6"]
let heading_elements_tbl = make_set heading_elements
(* Implied end tag elements *)
let implied_end_tags = [
"dd"; "dt"; "li"; "optgroup"; "option"; "p"; "rb"; "rp"; "rt"; "rtc"
]
let implied_end_tags_tbl = make_set implied_end_tags
(* Thoroughly implied end tags *)
let thoroughly_implied_end_tags = [
"caption"; "colgroup"; "dd"; "dt"; "li"; "optgroup"; "option"; "p";
"rb"; "rp"; "rt"; "rtc"; "tbody"; "td"; "tfoot"; "th"; "thead"; "tr"
]
let thoroughly_implied_end_tags_tbl = make_set thoroughly_implied_end_tags
(* Scope elements for various scope checks *)
let default_scope = [
"applet"; "caption"; "html"; "table"; "td"; "th"; "marquee"; "object"; "template"
]
let list_item_scope = default_scope @ ["ol"; "ul"]
let button_scope = default_scope @ ["button"]
let table_scope = ["html"; "table"; "template"]
let select_scope_exclude = ["optgroup"; "option"]
let select_scope_exclude_tbl = make_set select_scope_exclude
(* MathML text integration points *)
let mathml_text_integration = ["mi"; "mo"; "mn"; "ms"; "mtext"]
let mathml_text_integration_tbl = make_set mathml_text_integration
(* MathML attribute adjustments - O(1) hashtable lookup *)
let mathml_attr_adjustments_tbl =
let adjustments = [("definitionurl", "definitionURL")] in
let tbl = Hashtbl.create 4 in
List.iter (fun (k, v) -> Hashtbl.add tbl k v) adjustments;
tbl
let adjust_mathml_attrs attrs =
List.map (fun (k, v) ->
match Hashtbl.find_opt mathml_attr_adjustments_tbl (lowercase k) with
| Some adjusted_k -> (adjusted_k, v)
| None -> (k, v)
) attrs
(* SVG HTML integration points *)
let svg_html_integration = ["foreignObject"; "desc"; "title"]
let svg_html_integration_tbl = make_set (List.map lowercase svg_html_integration)
(* Helper to create hashtable from association list for O(1) lookup *)
let make_assoc_tbl pairs =
let tbl = Hashtbl.create (List.length pairs) in
List.iter (fun (k, v) -> Hashtbl.add tbl k v) pairs;
tbl
(* SVG tag name adjustments *)
let svg_tag_adjustments = [
("altglyph", "altGlyph");
("altglyphdef", "altGlyphDef");
("altglyphitem", "altGlyphItem");
("animatecolor", "animateColor");
("animatemotion", "animateMotion");
("animatetransform", "animateTransform");
("clippath", "clipPath");
("feblend", "feBlend");
("fecolormatrix", "feColorMatrix");
("fecomponenttransfer", "feComponentTransfer");
("fecomposite", "feComposite");
("feconvolvematrix", "feConvolveMatrix");
("fediffuselighting", "feDiffuseLighting");
("fedisplacementmap", "feDisplacementMap");
("fedistantlight", "feDistantLight");
("fedropshadow", "feDropShadow");
("feflood", "feFlood");
("fefunca", "feFuncA");
("fefuncb", "feFuncB");
("fefuncg", "feFuncG");
("fefuncr", "feFuncR");
("fegaussianblur", "feGaussianBlur");
("feimage", "feImage");
("femerge", "feMerge");
("femergenode", "feMergeNode");
("femorphology", "feMorphology");
("feoffset", "feOffset");
("fepointlight", "fePointLight");
("fespecularlighting", "feSpecularLighting");
("fespotlight", "feSpotLight");
("fetile", "feTile");
("feturbulence", "feTurbulence");
("foreignobject", "foreignObject");
("glyphref", "glyphRef");
("lineargradient", "linearGradient");
("radialgradient", "radialGradient");
("textpath", "textPath");
]
let svg_tag_adjustments_tbl = make_assoc_tbl svg_tag_adjustments
(* SVG attribute adjustments *)
let svg_attr_adjustments = [
("attributename", "attributeName");
("attributetype", "attributeType");
("basefrequency", "baseFrequency");
("baseprofile", "baseProfile");
("calcmode", "calcMode");
("clippathunits", "clipPathUnits");
("diffuseconstant", "diffuseConstant");
("edgemode", "edgeMode");
("filterunits", "filterUnits");
("glyphref", "glyphRef");
("gradienttransform", "gradientTransform");
("gradientunits", "gradientUnits");
("kernelmatrix", "kernelMatrix");
("kernelunitlength", "kernelUnitLength");
("keypoints", "keyPoints");
("keysplines", "keySplines");
("keytimes", "keyTimes");
("lengthadjust", "lengthAdjust");
("limitingconeangle", "limitingConeAngle");
("markerheight", "markerHeight");
("markerunits", "markerUnits");
("markerwidth", "markerWidth");
("maskcontentunits", "maskContentUnits");
("maskunits", "maskUnits");
("numoctaves", "numOctaves");
("pathlength", "pathLength");
("patterncontentunits", "patternContentUnits");
("patterntransform", "patternTransform");
("patternunits", "patternUnits");
("pointsatx", "pointsAtX");
("pointsaty", "pointsAtY");
("pointsatz", "pointsAtZ");
("preservealpha", "preserveAlpha");
("preserveaspectratio", "preserveAspectRatio");
("primitiveunits", "primitiveUnits");
("refx", "refX");
("refy", "refY");
("repeatcount", "repeatCount");
("repeatdur", "repeatDur");
("requiredextensions", "requiredExtensions");
("requiredfeatures", "requiredFeatures");
("specularconstant", "specularConstant");
("specularexponent", "specularExponent");
("spreadmethod", "spreadMethod");
("startoffset", "startOffset");
("stddeviation", "stdDeviation");
("stitchtiles", "stitchTiles");
("surfacescale", "surfaceScale");
("systemlanguage", "systemLanguage");
("tablevalues", "tableValues");
("targetx", "targetX");
("targety", "targetY");
("textlength", "textLength");
("viewbox", "viewBox");
("viewtarget", "viewTarget");
("xchannelselector", "xChannelSelector");
("ychannelselector", "yChannelSelector");
("zoomandpan", "zoomAndPan");
]
let svg_attr_adjustments_tbl = make_assoc_tbl svg_attr_adjustments
(* Foreign attribute adjustments *)
let foreign_attr_adjustments = [
("xlink:actuate", ("xlink", "actuate", "http://www.w3.org/1999/xlink"));
("xlink:arcrole", ("xlink", "arcrole", "http://www.w3.org/1999/xlink"));
("xlink:href", ("xlink", "href", "http://www.w3.org/1999/xlink"));
("xlink:role", ("xlink", "role", "http://www.w3.org/1999/xlink"));
("xlink:show", ("xlink", "show", "http://www.w3.org/1999/xlink"));
("xlink:title", ("xlink", "title", "http://www.w3.org/1999/xlink"));
("xlink:type", ("xlink", "type", "http://www.w3.org/1999/xlink"));
("xml:lang", ("xml", "lang", "http://www.w3.org/XML/1998/namespace"));
("xml:space", ("xml", "space", "http://www.w3.org/XML/1998/namespace"));
("xmlns", ("", "xmlns", "http://www.w3.org/2000/xmlns/"));
("xmlns:xlink", ("xmlns", "xlink", "http://www.w3.org/2000/xmlns/"));
]
let foreign_attr_adjustments_tbl = make_assoc_tbl foreign_attr_adjustments
(* Quirks mode detection *)
let quirky_public_matches = [
"-//w3o//dtd w3 html strict 3.0//en//";
"-/w3c/dtd html 4.0 transitional/en";
"html"
]
let quirky_public_prefixes = [
"+//silmaril//dtd html pro v0r11 19970101//";
"-//as//dtd html 3.0 aswedit + extensions//";
"-//advasoft ltd//dtd html 3.0 aswedit + extensions//";
"-//ietf//dtd html 2.0 level 1//";
"-//ietf//dtd html 2.0 level 2//";
"-//ietf//dtd html 2.0 strict level 1//";
"-//ietf//dtd html 2.0 strict level 2//";
"-//ietf//dtd html 2.0 strict//";
"-//ietf//dtd html 2.0//";
"-//ietf//dtd html 2.1e//";
"-//ietf//dtd html 3.0//";
"-//ietf//dtd html 3.2 final//";
"-//ietf//dtd html 3.2//";
"-//ietf//dtd html 3//";
"-//ietf//dtd html level 0//";
"-//ietf//dtd html level 1//";
"-//ietf//dtd html level 2//";
"-//ietf//dtd html level 3//";
"-//ietf//dtd html strict level 0//";
"-//ietf//dtd html strict level 1//";
"-//ietf//dtd html strict level 2//";
"-//ietf//dtd html strict level 3//";
"-//ietf//dtd html strict//";
"-//ietf//dtd html//";
"-//metrius//dtd metrius presentational//";
"-//microsoft//dtd internet explorer 2.0 html strict//";
"-//microsoft//dtd internet explorer 2.0 html//";
"-//microsoft//dtd internet explorer 2.0 tables//";
"-//microsoft//dtd internet explorer 3.0 html strict//";
"-//microsoft//dtd internet explorer 3.0 html//";
"-//microsoft//dtd internet explorer 3.0 tables//";
"-//netscape comm. corp.//dtd html//";
"-//netscape comm. corp.//dtd strict html//";
"-//o'reilly and associates//dtd html 2.0//";
"-//o'reilly and associates//dtd html extended 1.0//";
"-//o'reilly and associates//dtd html extended relaxed 1.0//";
"-//sq//dtd html 2.0 hotmetal + extensions//";
"-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//";
"-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//";
"-//spyglass//dtd html 2.0 extended//";
"-//sun microsystems corp.//dtd hotjava html//";
"-//sun microsystems corp.//dtd hotjava strict html//";
"-//w3c//dtd html 3 1995-03-24//";
"-//w3c//dtd html 3.2 draft//";
"-//w3c//dtd html 3.2 final//";
"-//w3c//dtd html 3.2//";
"-//w3c//dtd html 3.2s draft//";
"-//w3c//dtd html 4.0 frameset//";
"-//w3c//dtd html 4.0 transitional//";
"-//w3c//dtd html experimental 19960712//";
"-//w3c//dtd html experimental 970421//";
"-//w3c//dtd w3 html//";
"-//w3o//dtd w3 html 3.0//";
"-//webtechs//dtd mozilla html 2.0//";
"-//webtechs//dtd mozilla html//";
]
let limited_quirky_public_prefixes = [
"-//w3c//dtd xhtml 1.0 frameset//";
"-//w3c//dtd xhtml 1.0 transitional//";
]
let html4_public_prefixes = [
"-//w3c//dtd html 4.01 frameset//";
"-//w3c//dtd html 4.01 transitional//";
]
let quirky_system_matches = [
"http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"
]
(* Table-related element sets for tree builder O(1) lookups *)
let table_section_elements = ["tbody"; "thead"; "tfoot"]
let table_section_elements_tbl = make_set table_section_elements
let table_cell_elements = ["td"; "th"]
let table_cell_elements_tbl = make_set table_cell_elements
let foster_parenting_elements = ["table"; "tbody"; "tfoot"; "thead"; "tr"]
let foster_parenting_elements_tbl = make_set foster_parenting_elements
(* Helper functions - O(1) hashtable lookups *)
let is_void_element name = Hashtbl.mem void_elements_tbl name
let is_formatting_element name = Hashtbl.mem formatting_elements_tbl name
let is_special_element name = Hashtbl.mem special_elements_tbl name
let is_heading_element name = Hashtbl.mem heading_elements_tbl name
let is_implied_end_tag name = Hashtbl.mem implied_end_tags_tbl name
let is_thoroughly_implied_end_tag name = Hashtbl.mem thoroughly_implied_end_tags_tbl name
let is_mathml_text_integration name = Hashtbl.mem mathml_text_integration_tbl name
let is_svg_html_integration name = Hashtbl.mem svg_html_integration_tbl (lowercase name)
let is_select_scope_exclude name = Hashtbl.mem select_scope_exclude_tbl name
let is_table_section_element name = Hashtbl.mem table_section_elements_tbl name
let is_table_cell_element name = Hashtbl.mem table_cell_elements_tbl name
let is_foster_parenting_element name = Hashtbl.mem foster_parenting_elements_tbl name
(* Backwards compatibility aliases *)
let is_void = List.mem
let is_formatting = List.mem
let is_special name = List.mem name special_elements
let is_heading = List.mem
let adjust_svg_tag_name name =
match Hashtbl.find_opt svg_tag_adjustments_tbl (lowercase name) with
| Some adjusted -> adjusted
| None -> name
let adjust_svg_attrs attrs =
List.map (fun (name, value) ->
let adjusted_name =
match Hashtbl.find_opt svg_attr_adjustments_tbl (lowercase name) with
| Some n -> n
| None -> name
in
(adjusted_name, value)
) attrs
let adjust_foreign_attrs attrs =
List.map (fun (name, value) ->
match Hashtbl.find_opt foreign_attr_adjustments_tbl (lowercase name) with
| Some (prefix, local, _ns) ->
if prefix = "" then (local, value)
else (prefix ^ ":" ^ local, value)
| None -> (name, value)
) attrs