OCaml HTML5 parser/serialiser based on Python's JustHTML
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at main 99 lines 3.7 kB view raw
1(** Language attribute validation checker. 2 3 Validates language attributes. *) 4 5(** Checker state - currently minimal since we only check attributes. *) 6type state = unit [@@warning "-34"] 7 8let create () = () 9 10let reset _state = () 11 12(** Get attribute value from attribute list. *) 13let get_attr attrs name = 14 try Some (List.assoc name attrs) 15 with Not_found -> None 16 17(** Deprecated language subtags from IANA registry. 18 See: https://www.iana.org/assignments/language-subtag-registry/ *) 19let deprecated_subtags = [ 20 ("mo", "ro"); (* Moldavian -> Romanian *) 21 ("iw", "he"); (* Hebrew (old) -> Hebrew *) 22 ("in", "id"); (* Indonesian (old) -> Indonesian *) 23 ("ji", "yi"); (* Yiddish (old) -> Yiddish *) 24 ("jw", "jv"); (* Javanese (old) -> Javanese *) 25 ("sh", "sr"); (* Serbo-Croatian -> Serbian *) 26] 27 28(** Check if a language tag contains deprecated subtags. *) 29let check_deprecated_tag value = 30 let lower = Astring.String.Ascii.lowercase value in 31 let subtags = String.split_on_char '-' lower in 32 match subtags with 33 | [] -> None 34 | primary :: _ -> 35 (* Check primary language subtag for deprecation *) 36 match List.assoc_opt primary deprecated_subtags with 37 | Some replacement -> Some (primary, replacement) 38 | None -> None 39 40(** Validate language attribute. *) 41let validate_lang_attr value ~location:_ ~element ~attribute collector = 42 (* First check structural validity *) 43 match Dt_language.Language_or_empty.validate value with 44 | Error msg -> 45 let reason = Printf.sprintf "Bad language tag: %s." msg in 46 Message_collector.add_typed collector 47 (`Attr (`Bad_value (`Elem element, `Attr attribute, `Value value, `Reason reason))) 48 | Ok () -> 49 (* Then check for deprecated subtags *) 50 match check_deprecated_tag value with 51 | Some (deprecated, replacement) -> 52 let reason = Printf.sprintf "Bad language tag: The language subtag %s is deprecated. Use %s instead." 53 (Error_code.q deprecated) (Error_code.q replacement) in 54 Message_collector.add_typed collector 55 (`Generic (Printf.sprintf "Bad value %s for attribute %s on element %s: %s" 56 (Error_code.q value) (Error_code.q attribute) (Error_code.q element) reason)) 57 | None -> () 58 59(** Check if lang and xml:lang match. *) 60let check_lang_xmllang_match ~lang:_ ~xmllang:_ ~location:_ ~element:_ collector = 61 (* Note: This check is disabled as the Error_code.Xml_lang_lang_mismatch format 62 differs from what the tests expect. We use add_typed when enabled. *) 63 ignore collector 64 65(** Process language attributes. *) 66let process_language_attrs ~element ~namespace ~attrs ~location collector = 67 ignore namespace; 68 let lang_opt = get_attr attrs "lang" in 69 let xmllang_opt = get_attr attrs "xml:lang" in 70 71 (* Validate lang attribute *) 72 begin match lang_opt with 73 | Some lang -> 74 validate_lang_attr lang ~location ~element ~attribute:"lang" collector 75 | None -> () 76 end; 77 78 (* Validate xml:lang attribute *) 79 begin match xmllang_opt with 80 | Some xmllang -> 81 validate_lang_attr xmllang ~location ~element ~attribute:"xml:lang" collector 82 | None -> () 83 end; 84 85 (* Check that lang and xml:lang match if both present *) 86 begin match lang_opt, xmllang_opt with 87 | Some lang, Some xmllang -> 88 check_lang_xmllang_match ~lang ~xmllang ~location ~element collector 89 | _ -> () 90 end 91 92let start_element _state ~element collector = 93 let location = None in 94 let name = Tag.tag_to_string element.Element.tag in 95 process_language_attrs ~element:name ~namespace:None ~attrs:element.raw_attrs ~location collector 96 97let end_element _state ~tag:_ _collector = () 98 99let checker = Checker.make ~create ~reset ~start_element ~end_element ()