(* Encoding label normalization per WHATWG Encoding Standard *) let normalize_label label = if String.length label = 0 then None else let s = Astring.String.Ascii.lowercase (Astring.String.trim label) in if String.length s = 0 then None else (* Security: never allow utf-7 *) if s = "utf-7" || s = "utf7" || s = "x-utf-7" then Some Encoding_types.Windows_1252 else if s = "utf-8" || s = "utf8" then Some Encoding_types.Utf8 (* HTML treats latin-1 labels as windows-1252 *) else if s = "iso-8859-1" || s = "iso8859-1" || s = "latin1" || s = "latin-1" || s = "l1" || s = "cp819" || s = "ibm819" then Some Encoding_types.Windows_1252 else if s = "windows-1252" || s = "windows1252" || s = "cp1252" || s = "x-cp1252" then Some Encoding_types.Windows_1252 else if s = "iso-8859-2" || s = "iso8859-2" || s = "latin2" || s = "latin-2" then Some Encoding_types.Iso_8859_2 else if s = "euc-jp" || s = "eucjp" then Some Encoding_types.Euc_jp else if s = "utf-16" || s = "utf16" then Some Encoding_types.Utf16le (* Default to LE for ambiguous utf-16 *) else if s = "utf-16le" || s = "utf16le" then Some Encoding_types.Utf16le else if s = "utf-16be" || s = "utf16be" then Some Encoding_types.Utf16be else None let normalize_meta_declared label = match normalize_label label with | None -> None | Some enc -> (* Per HTML meta charset handling: ignore UTF-16/UTF-32 declarations and treat them as UTF-8 *) match enc with | Encoding_types.Utf16le | Encoding_types.Utf16be -> Some Encoding_types.Utf8 | other -> Some other