(* HTML5 spec constants *) (* Use Astring for string operations *) let lowercase = Astring.String.Ascii.lowercase (* Helper to create a hashtable set from a list for O(1) membership *) let make_set elements = let tbl = Hashtbl.create (List.length elements) in List.iter (fun e -> Hashtbl.add tbl e ()) elements; tbl (* Void elements - no end tag allowed *) let void_elements = [ "area"; "base"; "br"; "col"; "embed"; "hr"; "img"; "input"; "link"; "meta"; "source"; "track"; "wbr" ] let void_elements_tbl = make_set void_elements (* Raw text elements - content is raw text *) let raw_text_elements = ["script"; "style"] (* Escapable raw text elements *) let escapable_raw_text_elements = ["textarea"; "title"] (* Formatting elements for adoption agency *) let formatting_elements = [ "a"; "b"; "big"; "code"; "em"; "font"; "i"; "nobr"; "s"; "small"; "strike"; "strong"; "tt"; "u" ] let formatting_elements_tbl = make_set formatting_elements (* Special elements *) let special_elements = [ "address"; "applet"; "area"; "article"; "aside"; "base"; "basefont"; "bgsound"; "blockquote"; "body"; "br"; "button"; "caption"; "center"; "col"; "colgroup"; "dd"; "details"; "dir"; "div"; "dl"; "dt"; "embed"; "fieldset"; "figcaption"; "figure"; "footer"; "form"; "frame"; "frameset"; "h1"; "h2"; "h3"; "h4"; "h5"; "h6"; "head"; "header"; "hgroup"; "hr"; "html"; "iframe"; "img"; "input"; "keygen"; "li"; "link"; "listing"; "main"; "marquee"; "menu"; "meta"; "nav"; "noembed"; "noframes"; "noscript"; "object"; "ol"; "p"; "param"; "plaintext"; "pre"; "script"; "search"; "section"; "select"; "source"; "style"; "summary"; "table"; "tbody"; "td"; "template"; "textarea"; "tfoot"; "th"; "thead"; "title"; "tr"; "track"; "ul"; "wbr"; "xmp" ] let special_elements_tbl = make_set special_elements (* Heading elements *) let heading_elements = ["h1"; "h2"; "h3"; "h4"; "h5"; "h6"] let heading_elements_tbl = make_set heading_elements (* Implied end tag elements *) let implied_end_tags = [ "dd"; "dt"; "li"; "optgroup"; "option"; "p"; "rb"; "rp"; "rt"; "rtc" ] let implied_end_tags_tbl = make_set implied_end_tags (* Thoroughly implied end tags *) let thoroughly_implied_end_tags = [ "caption"; "colgroup"; "dd"; "dt"; "li"; "optgroup"; "option"; "p"; "rb"; "rp"; "rt"; "rtc"; "tbody"; "td"; "tfoot"; "th"; "thead"; "tr" ] let thoroughly_implied_end_tags_tbl = make_set thoroughly_implied_end_tags (* Scope elements for various scope checks *) let default_scope = [ "applet"; "caption"; "html"; "table"; "td"; "th"; "marquee"; "object"; "template" ] let list_item_scope = default_scope @ ["ol"; "ul"] let button_scope = default_scope @ ["button"] let table_scope = ["html"; "table"; "template"] let select_scope_exclude = ["optgroup"; "option"] let select_scope_exclude_tbl = make_set select_scope_exclude (* MathML text integration points *) let mathml_text_integration = ["mi"; "mo"; "mn"; "ms"; "mtext"] let mathml_text_integration_tbl = make_set mathml_text_integration (* MathML attribute adjustments - O(1) hashtable lookup *) let mathml_attr_adjustments_tbl = let adjustments = [("definitionurl", "definitionURL")] in let tbl = Hashtbl.create 4 in List.iter (fun (k, v) -> Hashtbl.add tbl k v) adjustments; tbl let adjust_mathml_attrs attrs = List.map (fun (k, v) -> match Hashtbl.find_opt mathml_attr_adjustments_tbl (lowercase k) with | Some adjusted_k -> (adjusted_k, v) | None -> (k, v) ) attrs (* SVG HTML integration points *) let svg_html_integration = ["foreignObject"; "desc"; "title"] let svg_html_integration_tbl = make_set (List.map lowercase svg_html_integration) (* Helper to create hashtable from association list for O(1) lookup *) let make_assoc_tbl pairs = let tbl = Hashtbl.create (List.length pairs) in List.iter (fun (k, v) -> Hashtbl.add tbl k v) pairs; tbl (* SVG tag name adjustments *) let svg_tag_adjustments = [ ("altglyph", "altGlyph"); ("altglyphdef", "altGlyphDef"); ("altglyphitem", "altGlyphItem"); ("animatecolor", "animateColor"); ("animatemotion", "animateMotion"); ("animatetransform", "animateTransform"); ("clippath", "clipPath"); ("feblend", "feBlend"); ("fecolormatrix", "feColorMatrix"); ("fecomponenttransfer", "feComponentTransfer"); ("fecomposite", "feComposite"); ("feconvolvematrix", "feConvolveMatrix"); ("fediffuselighting", "feDiffuseLighting"); ("fedisplacementmap", "feDisplacementMap"); ("fedistantlight", "feDistantLight"); ("fedropshadow", "feDropShadow"); ("feflood", "feFlood"); ("fefunca", "feFuncA"); ("fefuncb", "feFuncB"); ("fefuncg", "feFuncG"); ("fefuncr", "feFuncR"); ("fegaussianblur", "feGaussianBlur"); ("feimage", "feImage"); ("femerge", "feMerge"); ("femergenode", "feMergeNode"); ("femorphology", "feMorphology"); ("feoffset", "feOffset"); ("fepointlight", "fePointLight"); ("fespecularlighting", "feSpecularLighting"); ("fespotlight", "feSpotLight"); ("fetile", "feTile"); ("feturbulence", "feTurbulence"); ("foreignobject", "foreignObject"); ("glyphref", "glyphRef"); ("lineargradient", "linearGradient"); ("radialgradient", "radialGradient"); ("textpath", "textPath"); ] let svg_tag_adjustments_tbl = make_assoc_tbl svg_tag_adjustments (* SVG attribute adjustments *) let svg_attr_adjustments = [ ("attributename", "attributeName"); ("attributetype", "attributeType"); ("basefrequency", "baseFrequency"); ("baseprofile", "baseProfile"); ("calcmode", "calcMode"); ("clippathunits", "clipPathUnits"); ("diffuseconstant", "diffuseConstant"); ("edgemode", "edgeMode"); ("filterunits", "filterUnits"); ("glyphref", "glyphRef"); ("gradienttransform", "gradientTransform"); ("gradientunits", "gradientUnits"); ("kernelmatrix", "kernelMatrix"); ("kernelunitlength", "kernelUnitLength"); ("keypoints", "keyPoints"); ("keysplines", "keySplines"); ("keytimes", "keyTimes"); ("lengthadjust", "lengthAdjust"); ("limitingconeangle", "limitingConeAngle"); ("markerheight", "markerHeight"); ("markerunits", "markerUnits"); ("markerwidth", "markerWidth"); ("maskcontentunits", "maskContentUnits"); ("maskunits", "maskUnits"); ("numoctaves", "numOctaves"); ("pathlength", "pathLength"); ("patterncontentunits", "patternContentUnits"); ("patterntransform", "patternTransform"); ("patternunits", "patternUnits"); ("pointsatx", "pointsAtX"); ("pointsaty", "pointsAtY"); ("pointsatz", "pointsAtZ"); ("preservealpha", "preserveAlpha"); ("preserveaspectratio", "preserveAspectRatio"); ("primitiveunits", "primitiveUnits"); ("refx", "refX"); ("refy", "refY"); ("repeatcount", "repeatCount"); ("repeatdur", "repeatDur"); ("requiredextensions", "requiredExtensions"); ("requiredfeatures", "requiredFeatures"); ("specularconstant", "specularConstant"); ("specularexponent", "specularExponent"); ("spreadmethod", "spreadMethod"); ("startoffset", "startOffset"); ("stddeviation", "stdDeviation"); ("stitchtiles", "stitchTiles"); ("surfacescale", "surfaceScale"); ("systemlanguage", "systemLanguage"); ("tablevalues", "tableValues"); ("targetx", "targetX"); ("targety", "targetY"); ("textlength", "textLength"); ("viewbox", "viewBox"); ("viewtarget", "viewTarget"); ("xchannelselector", "xChannelSelector"); ("ychannelselector", "yChannelSelector"); ("zoomandpan", "zoomAndPan"); ] let svg_attr_adjustments_tbl = make_assoc_tbl svg_attr_adjustments (* Foreign attribute adjustments *) let foreign_attr_adjustments = [ ("xlink:actuate", ("xlink", "actuate", "http://www.w3.org/1999/xlink")); ("xlink:arcrole", ("xlink", "arcrole", "http://www.w3.org/1999/xlink")); ("xlink:href", ("xlink", "href", "http://www.w3.org/1999/xlink")); ("xlink:role", ("xlink", "role", "http://www.w3.org/1999/xlink")); ("xlink:show", ("xlink", "show", "http://www.w3.org/1999/xlink")); ("xlink:title", ("xlink", "title", "http://www.w3.org/1999/xlink")); ("xlink:type", ("xlink", "type", "http://www.w3.org/1999/xlink")); ("xml:lang", ("xml", "lang", "http://www.w3.org/XML/1998/namespace")); ("xml:space", ("xml", "space", "http://www.w3.org/XML/1998/namespace")); ("xmlns", ("", "xmlns", "http://www.w3.org/2000/xmlns/")); ("xmlns:xlink", ("xmlns", "xlink", "http://www.w3.org/2000/xmlns/")); ] let foreign_attr_adjustments_tbl = make_assoc_tbl foreign_attr_adjustments (* Quirks mode detection *) let quirky_public_matches = [ "-//w3o//dtd w3 html strict 3.0//en//"; "-/w3c/dtd html 4.0 transitional/en"; "html" ] let quirky_public_prefixes = [ "+//silmaril//dtd html pro v0r11 19970101//"; "-//as//dtd html 3.0 aswedit + extensions//"; "-//advasoft ltd//dtd html 3.0 aswedit + extensions//"; "-//ietf//dtd html 2.0 level 1//"; "-//ietf//dtd html 2.0 level 2//"; "-//ietf//dtd html 2.0 strict level 1//"; "-//ietf//dtd html 2.0 strict level 2//"; "-//ietf//dtd html 2.0 strict//"; "-//ietf//dtd html 2.0//"; "-//ietf//dtd html 2.1e//"; "-//ietf//dtd html 3.0//"; "-//ietf//dtd html 3.2 final//"; "-//ietf//dtd html 3.2//"; "-//ietf//dtd html 3//"; "-//ietf//dtd html level 0//"; "-//ietf//dtd html level 1//"; "-//ietf//dtd html level 2//"; "-//ietf//dtd html level 3//"; "-//ietf//dtd html strict level 0//"; "-//ietf//dtd html strict level 1//"; "-//ietf//dtd html strict level 2//"; "-//ietf//dtd html strict level 3//"; "-//ietf//dtd html strict//"; "-//ietf//dtd html//"; "-//metrius//dtd metrius presentational//"; "-//microsoft//dtd internet explorer 2.0 html strict//"; "-//microsoft//dtd internet explorer 2.0 html//"; "-//microsoft//dtd internet explorer 2.0 tables//"; "-//microsoft//dtd internet explorer 3.0 html strict//"; "-//microsoft//dtd internet explorer 3.0 html//"; "-//microsoft//dtd internet explorer 3.0 tables//"; "-//netscape comm. corp.//dtd html//"; "-//netscape comm. corp.//dtd strict html//"; "-//o'reilly and associates//dtd html 2.0//"; "-//o'reilly and associates//dtd html extended 1.0//"; "-//o'reilly and associates//dtd html extended relaxed 1.0//"; "-//sq//dtd html 2.0 hotmetal + extensions//"; "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//"; "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//"; "-//spyglass//dtd html 2.0 extended//"; "-//sun microsystems corp.//dtd hotjava html//"; "-//sun microsystems corp.//dtd hotjava strict html//"; "-//w3c//dtd html 3 1995-03-24//"; "-//w3c//dtd html 3.2 draft//"; "-//w3c//dtd html 3.2 final//"; "-//w3c//dtd html 3.2//"; "-//w3c//dtd html 3.2s draft//"; "-//w3c//dtd html 4.0 frameset//"; "-//w3c//dtd html 4.0 transitional//"; "-//w3c//dtd html experimental 19960712//"; "-//w3c//dtd html experimental 970421//"; "-//w3c//dtd w3 html//"; "-//w3o//dtd w3 html 3.0//"; "-//webtechs//dtd mozilla html 2.0//"; "-//webtechs//dtd mozilla html//"; ] let limited_quirky_public_prefixes = [ "-//w3c//dtd xhtml 1.0 frameset//"; "-//w3c//dtd xhtml 1.0 transitional//"; ] let html4_public_prefixes = [ "-//w3c//dtd html 4.01 frameset//"; "-//w3c//dtd html 4.01 transitional//"; ] let quirky_system_matches = [ "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd" ] (* Table-related element sets for tree builder O(1) lookups *) let table_section_elements = ["tbody"; "thead"; "tfoot"] let table_section_elements_tbl = make_set table_section_elements let table_cell_elements = ["td"; "th"] let table_cell_elements_tbl = make_set table_cell_elements let foster_parenting_elements = ["table"; "tbody"; "tfoot"; "thead"; "tr"] let foster_parenting_elements_tbl = make_set foster_parenting_elements (* Helper functions - O(1) hashtable lookups *) let is_void_element name = Hashtbl.mem void_elements_tbl name let is_formatting_element name = Hashtbl.mem formatting_elements_tbl name let is_special_element name = Hashtbl.mem special_elements_tbl name let is_heading_element name = Hashtbl.mem heading_elements_tbl name let is_implied_end_tag name = Hashtbl.mem implied_end_tags_tbl name let is_thoroughly_implied_end_tag name = Hashtbl.mem thoroughly_implied_end_tags_tbl name let is_mathml_text_integration name = Hashtbl.mem mathml_text_integration_tbl name let is_svg_html_integration name = Hashtbl.mem svg_html_integration_tbl (lowercase name) let is_select_scope_exclude name = Hashtbl.mem select_scope_exclude_tbl name let is_table_section_element name = Hashtbl.mem table_section_elements_tbl name let is_table_cell_element name = Hashtbl.mem table_cell_elements_tbl name let is_foster_parenting_element name = Hashtbl.mem foster_parenting_elements_tbl name (* Backwards compatibility aliases *) let is_void = List.mem let is_formatting = List.mem let is_special name = List.mem name special_elements let is_heading = List.mem let adjust_svg_tag_name name = match Hashtbl.find_opt svg_tag_adjustments_tbl (lowercase name) with | Some adjusted -> adjusted | None -> name let adjust_svg_attrs attrs = List.map (fun (name, value) -> let adjusted_name = match Hashtbl.find_opt svg_attr_adjustments_tbl (lowercase name) with | Some n -> n | None -> name in (adjusted_name, value) ) attrs let adjust_foreign_attrs attrs = List.map (fun (name, value) -> match Hashtbl.find_opt foreign_attr_adjustments_tbl (lowercase name) with | Some (prefix, local, _ns) -> if prefix = "" then (local, value) else (prefix ^ ":" ^ local, value) | None -> (name, value) ) attrs