My working unpac space for OCaml projects in development
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge opam/patches/uucd

+4921
+5
vendor/opam/uucd/.gitignore
··· 1 + _b0 2 + _build 3 + tmp 4 + *.install 5 + test/ucd.xml
+2
vendor/opam/uucd/.merlin
··· 1 + PKG b0.kit xmlm 2 + B _b0/**
+81
vendor/opam/uucd/B0.ml
··· 1 + open B0_kit.V000 2 + open Result.Syntax 3 + 4 + let unicode_version = 17, 0, 0, None (* Adjust on new releases *) 5 + 6 + (* OCaml library names *) 7 + 8 + let b0_std = B0_ocaml.libname "b0.std" 9 + let xmlm = B0_ocaml.libname "xmlm" 10 + let uucd = B0_ocaml.libname "uucd" 11 + 12 + (* Libraries *) 13 + 14 + let uucd_lib = 15 + let srcs = [ `Dir ~/"src" ] and requires = [ xmlm ] in 16 + B0_ocaml.lib uucd ~doc:"Uucd library" ~srcs ~requires 17 + 18 + (* Actions *) 19 + 20 + let uc_base = "http://www.unicode.org/Public" 21 + 22 + let download_ucdxml = 23 + let doc = "Download the Unicode character database to test/ucd.xml" in 24 + B0_unit.of_action "download-ucdxml" ~doc @@ fun env _ ~args:_ -> 25 + let* unzip = B0_env.get_cmd env (Cmd.arg "unzip") in 26 + let version = B0_version.to_string unicode_version in 27 + let ucd_url = Fmt.str "%s/%s/ucdxml/ucd.all.grouped.zip" uc_base version in 28 + let ucd_file = B0_env.in_scope_dir env ~/"test/ucd.xml" in 29 + Result.join @@ Os.File.with_tmp_fd @@ fun tmpfile tmpfd -> 30 + (Log.stdout @@ fun m -> 31 + m "@[<v>Downloading %s@,to %a@]" ucd_url Fpath.pp ucd_file); 32 + let* () = B0_action_kit.fetch_url env ucd_url tmpfile in 33 + let stdout = Os.Cmd.out_file ~force:true ~make_path:true ucd_file in 34 + Os.Cmd.run Cmd.(unzip % "-p" %% path tmpfile) ~stdout 35 + 36 + let show_version = 37 + B0_unit.of_action "unicode-version" ~doc:"Show supported unicode version" @@ 38 + fun _ _ ~args:_ -> 39 + Ok (Log.stdout (fun m -> m "%s" (B0_version.to_string unicode_version))) 40 + 41 + (* Tests *) 42 + 43 + let test_uucd = 44 + let srcs = [ `File ~/"test/test_uucd.ml" ] in 45 + let meta = 46 + B0_meta.(empty |> tag test |> tag run |> ~~ B0_unit.Action.cwd `Scope_dir) 47 + in 48 + let requires = [uucd; b0_std] in 49 + B0_ocaml.exe "test_uucd" ~doc:"Test decoder" ~srcs ~requires ~meta 50 + 51 + let example = 52 + let srcs = [ `File ~/"test/example.ml" ] in 53 + let meta = B0_meta.(empty |> tag test) in 54 + B0_ocaml.exe "example" ~doc:"Sample code" ~srcs ~meta ~requires:[uucd] 55 + 56 + (* Packs *) 57 + 58 + let default = 59 + let meta = 60 + B0_meta.empty 61 + |> ~~ B0_meta.authors ["The uucd programmers"] 62 + |> ~~ B0_meta.maintainers ["Daniel Bünzli <daniel.buenzl i@erratique.ch>"] 63 + |> ~~ B0_meta.homepage "https://erratique.ch/software/uucd" 64 + |> ~~ B0_meta.online_doc "https://erratique.ch/software/uucd/doc/Uucd" 65 + |> ~~ B0_meta.licenses ["ISC"] 66 + |> ~~ B0_meta.repo "git+https://erratique.ch/repos/uucd.git" 67 + |> ~~ B0_meta.issues "https://github.com/dbuenzli/uucd/issues" 68 + |> ~~ B0_meta.description_tags 69 + ["unicode"; "database"; "decoder"; "org:erratique"] 70 + |> B0_meta.tag B0_opam.tag 71 + |> ~~ B0_opam.build 72 + {|[["ocaml" "pkg/pkg.ml" "build" "--dev-pkg" "%{dev}%"]]|} 73 + |> ~~ B0_opam.depends 74 + [ "ocaml", {|>= "4.08.0"|}; 75 + "ocamlfind", {|build|}; 76 + "ocamlbuild", {|build|}; 77 + "topkg", {|build & >= "1.1.0"|}; 78 + "xmlm", {||} ] 79 + in 80 + B0_pack.make "default" ~doc:"uucd package" ~meta ~locked:true @@ 81 + B0_unit.list ()
+1
vendor/opam/uucd/BRZO
··· 1 + (srcs-x pkg)
+82
vendor/opam/uucd/CHANGES.md
··· 1 + v17.0.0 2025-09-11 Zagreb 2 + ------------------------- 3 + 4 + - Support for Unicode 17.0.0 5 + 6 + v16.0.0 2024-09-11 Zagreb 7 + ------------------------- 8 + 9 + - Support for Unicode 16.0.0 10 + 11 + v15.1.0 2023-09-15 Zagreb 12 + ------------------------- 13 + 14 + - Support for Unicode 15.1.0 15 + 16 + v15.0.0 2022-09-15 Zagreb 17 + ------------------------- 18 + 19 + - Support for Unicode 15.0.0 20 + 21 + v14.0.0 2021-09-17 Zagreb 22 + ------------------------- 23 + 24 + - Support for Unicode 14.0.0 25 + 26 + v13.0.0 2020-03-10 La Forclaz (VS) 27 + ---------------------------------- 28 + 29 + - Support for Unicode 13.0.0 30 + 31 + v12.0.0 2019-03-07 La Forclaz (VS) 32 + ---------------------------------- 33 + 34 + - Support for Unicode 12.0.0 35 + 36 + v11.0.0 2018-06-06 Lausanne 37 + --------------------------- 38 + 39 + - Support for Unicode 11.0.0 40 + 41 + v10.0.0 2017-06-20 Cambridge (UK) 42 + --------------------------------- 43 + 44 + - Support for Unicode 10.0.0 45 + 46 + v4.0.0 2016-06-26 Cambridge (UK) 47 + -------------------------------- 48 + 49 + - Updated for Unicode 9.0.0 50 + - Build depend on topkg. 51 + - Relicensed from BSD3 to ISC. 52 + 53 + v3.0.0 2015-06-17 Cambridge (UK) 54 + -------------------------------- 55 + 56 + - Updated for Unicode 8.0.0 57 + 58 + v2.0.0 2014-06-16 Cambridge (UK) 59 + -------------------------------- 60 + 61 + - Updated for Unicode 7.0.0 62 + 63 + v1.0.0 2013-10-01 Lausanne 64 + -------------------------- 65 + 66 + - Updated for Unicode 6.3.0. 67 + - OPAM friendly workflow and drop OASIS support. 68 + 69 + v0.9.2 2013-01-04 La Forclaz (VS) 70 + --------------------------------- 71 + 72 + - Updated for Unicode 6.2.0. 73 + 74 + v0.9.1 2013-01-04 La Forclaz (VS) 75 + --------------------------------- 76 + 77 + - Fix Uucd.is_scalar_value always returning false. 78 + 79 + v0.9.0 2012-09-07 Lausanne 80 + -------------------------- 81 + 82 + First release.
+23
vendor/opam/uucd/DEVEL.md
··· 1 + # New Unicode release 2 + 3 + Bump the Unicode release number at the top of the `B0.ml` file and in 4 + `pkg/pkg.ml`. Verify that everything is as expected with: 5 + 6 + b0 -- unicode-version 7 + 8 + Download the latest xml unicode database to the `test/ucd.xml` file 9 + which is ignored by git. If you have `curl` and `unzip` in your `PATH` 10 + you can simply issue: 11 + 12 + b0 -- download-ucdxml 13 + 14 + Then you should run 15 + 16 + b0 test 17 + 18 + this will likely fail with a parse error. Adjust the parser and 19 + datatypes with the help of: 20 + 21 + <https://www.unicode.org/reports/tr42/proposed.html> 22 + 23 +
+13
vendor/opam/uucd/LICENSE.md
··· 1 + Copyright (c) 2012 The uucd programmers 2 + 3 + Permission to use, copy, modify, and/or distribute this software for any 4 + purpose with or without fee is hereby granted, provided that the above 5 + copyright notice and this permission notice appear in all copies. 6 + 7 + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 8 + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 9 + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 10 + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 11 + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 12 + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 13 + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+32
vendor/opam/uucd/README.md
··· 1 + Uucd — Unicode character database decoder for OCaml 2 + =================================================== 3 + 4 + Uucd is an OCaml module to decode the data of the [Unicode character 5 + database][1] from its XML [representation][2]. It provides high-level 6 + (but not necessarily efficient) access to the data so that efficient 7 + representations can be extracted. 8 + 9 + Uucd is made of a single module, depends on [Xmlm][xmlm] and is distributed 10 + under the ISC license. 11 + 12 + [1]: http://www.unicode.org/reports/tr44/ 13 + [2]: http://www.unicode.org/reports/tr42/ 14 + [xmlm]: http://erratique.ch/software/xmlm 15 + 16 + Home page: <http://erratique.ch/software/uucd> 17 + 18 + ## Installation 19 + 20 + Uucd can be installed with `opam`: 21 + 22 + opam install uucd 23 + 24 + If you don't use `opam` consult the [`opam`](opam) file for build 25 + instructions and a complete specification of the dependencies. 26 + 27 + ## Documentation 28 + 29 + The documentation and API reference can be consulted [online][doc] 30 + or via `odig doc uucd`. 31 + 32 + [doc]: http://erratique.ch/software/uucd/doc/
+6
vendor/opam/uucd/_tags
··· 1 + <**/*.{ml,mli}> : bin_annot, safe_string 2 + <src> : include 3 + <test> : include 4 + <src/uucd.{mli,ml}> : package(xmlm) 5 + <test/test.{byte,native}> : package(xmlm) 6 + <_b0> : -traverse
+1
vendor/opam/uucd/doc/api.odocl
··· 1 + Uucd
+10
vendor/opam/uucd/doc/index.mld
··· 1 + {0 Uucd {%html: <span class="version">%%VERSION%%</span>%}} 2 + 3 + Uucd is an OCaml module to decode the data of the Unicode character 4 + database from its XML representation. 5 + 6 + {1:uucd Library [uucd]} 7 + 8 + {!modules: 9 + Uucd 10 + }
+34
vendor/opam/uucd/opam
··· 1 + opam-version: "2.0" 2 + name: "uucd" 3 + synopsis: "Unicode character database decoder for OCaml" 4 + description: """\ 5 + Uucd is an OCaml module to decode the data of the [Unicode character 6 + database][1] from its XML [representation][2]. It provides high-level 7 + (but not necessarily efficient) access to the data so that efficient 8 + representations can be extracted. 9 + 10 + Uucd is made of a single module, depends on [Xmlm][xmlm] and is distributed 11 + under the ISC license. 12 + 13 + [1]: http://www.unicode.org/reports/tr44/ 14 + [2]: http://www.unicode.org/reports/tr42/ 15 + [xmlm]: http://erratique.ch/software/xmlm 16 + 17 + Home page: <http://erratique.ch/software/uucd>""" 18 + maintainer: "Daniel Bünzli <daniel.buenzl i@erratique.ch>" 19 + authors: "The uucd programmers" 20 + license: "ISC" 21 + tags: ["unicode" "database" "decoder" "org:erratique"] 22 + homepage: "https://erratique.ch/software/uucd" 23 + doc: "https://erratique.ch/software/uucd/doc/Uucd" 24 + bug-reports: "https://github.com/dbuenzli/uucd/issues" 25 + depends: [ 26 + "ocaml" {>= "4.08.0"} 27 + "ocamlfind" {build} 28 + "ocamlbuild" {build} 29 + "topkg" {build & >= "1.1.0"} 30 + "xmlm" 31 + ] 32 + build: ["ocaml" "pkg/pkg.ml" "build" "--dev-pkg" "%{dev}%"] 33 + dev-repo: "git+https://erratique.ch/repos/uucd.git" 34 + x-maintenance-intent: ["(latest)"]
+8
vendor/opam/uucd/pkg/META
··· 1 + description = "Unicode character database decoder for OCaml" 2 + version = "%%VERSION_NUM%%" 3 + requires = "xmlm" 4 + archive(byte) = "uucd.cma" 5 + archive(native) = "uucd.cmxa" 6 + plugin(byte) = "uucd.cma" 7 + plugin(native) = "uucd.cmxs" 8 + exists_if = "uucd.cma uucd.cmxa"
+13
vendor/opam/uucd/pkg/pkg.ml
··· 1 + #!/usr/bin/env ocaml 2 + #use "topfind" 3 + #require "topkg" 4 + open Topkg 5 + 6 + let distrib = 7 + (* FIXME OPAMv2, move this to an x-unicode-version field in the opam file. *) 8 + let watermarks = ("UNICODE_VERSION", `String "16.0.0") :: Pkg.watermarks in 9 + Pkg.distrib ~watermarks () 10 + 11 + let () = 12 + Pkg.describe "uucd" ~distrib @@ fun c -> 13 + Ok [ Pkg.mllib ~api:["Uucd"] "src/uucd.mllib"; ]
+3176
vendor/opam/uucd/src/uucd.ml
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2012 The uucd programmers. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + let str = Printf.sprintf 7 + let str_of_name (u,l) = str "{%s}%s" u l 8 + let split_string s sep = 9 + let rec split accum j = 10 + let i = try (String.rindex_from s j sep) with Not_found -> -1 in 11 + if (i = -1) then 12 + let p = String.sub s 0 (j + 1) in 13 + if p <> "" then p :: accum else accum 14 + else 15 + let p = String.sub s (i + 1) (j - i) in 16 + let accum' = if p <> "" then p :: accum else accum in 17 + split accum' (i - 1) 18 + in 19 + split [] (String.length s - 1) 20 + 21 + (* Error messages *) 22 + 23 + let err s = failwith s 24 + let err_data = "character data not allowed here" 25 + let err_exp_el_end = "expected end of element" 26 + let err_exp_data = "expected character data" 27 + let err_wf = "document not well formed" 28 + let err_dup n = str "duplicate element (%s)" (str_of_name n) 29 + let err_miss_att n = str "missing attribute (%s)" n 30 + let err_att_val v = str "invalid attribute value (\"%s\")" v 31 + let err_invalid_cp v = str "invalid code point (\"%s\")" v 32 + let err_empty_cps = "empty code point sequence" 33 + let err_exp_ucd fnd = str "expected ucd element found %s" (str_of_name fnd) 34 + let err_invalid_cp_spec = str "invalid code point specification" 35 + let err_invalid_name_alias_spec = str "invalid name alias specification" 36 + 37 + (* Code points *) 38 + 39 + module Cp = struct 40 + type t = int 41 + let compare : int -> int -> int = compare 42 + end 43 + 44 + type cp = Cp.t 45 + 46 + let is_cp i = 0x0000 <= i && i <= 0x10_FFFF 47 + let is_scalar_value i = 48 + (0x0000 <= i && i <= 0xD7FF) || (0xE000 <= i && i <= 0x10FFFF) 49 + 50 + let cp_of_string v = (* parses a code point value. *) 51 + let is_hex c = (0x30 <= c && c <= 0x39) || (0x41 <= c && c <= 0x46) in 52 + let cp = ref 0 in 53 + for k = 0 to (String.length v) - 1 do 54 + let c = Char.code v.[k] in 55 + if not (is_hex c) then err (err_invalid_cp v) else 56 + cp := !cp * 16 + (if c <= 0x39 then c - 48 else c - 55) 57 + done; 58 + if is_cp !cp then !cp else err (err_invalid_cp v) 59 + 60 + let cps_of_string ?(empty = false) v = (* parses a code point sequence value. *) 61 + if (v = "") then (if empty then [] else err err_empty_cps) else 62 + List.map cp_of_string (split_string v ' ') 63 + 64 + module Cpmap = Map.Make (Cp) 65 + 66 + (* Properties *) 67 + 68 + type key = (* the type for property keys (names). *) 69 + | Age 70 + | Alphabetic 71 + | Ascii_hex_digit 72 + | Bidi_class 73 + | Bidi_control 74 + | Bidi_mirrored 75 + | Bidi_mirroring_glyph 76 + | Bidi_paired_bracket 77 + | Bidi_paired_bracket_type 78 + | Block 79 + | Canonical_combining_class 80 + | Cased 81 + | Case_folding 82 + | Case_ignorable 83 + | Changes_when_casefolded 84 + | Changes_when_casemapped 85 + | Changes_when_lowercased 86 + | Changes_when_nfkc_casefolded 87 + | Changes_when_titlecased 88 + | Changes_when_uppercased 89 + | Composition_exclusion 90 + | Dash 91 + | Decomposition_mapping 92 + | Decomposition_type 93 + | Default_ignorable_code_point 94 + | Deprecated 95 + | Diacritic 96 + | East_asian_width 97 + | Emoji 98 + | Emoji_presentation 99 + | Emoji_modifier 100 + | Emoji_modifier_base 101 + | Emoji_component 102 + | Equivalent_unified_ideograph 103 + | Extender 104 + | Extended_pictographic 105 + | Full_composition_exclusion 106 + | General_category 107 + | Grapheme_base 108 + | Grapheme_cluster_break 109 + | Grapheme_extend 110 + | Hangul_syllable_type 111 + | Hex_digit 112 + | Id_continue 113 + | Id_compat_math_continue 114 + | Id_compat_math_start 115 + | Id_start 116 + | Ideographic 117 + | Ids_binary_operator 118 + | Ids_trinary_operator 119 + | Ids_unary_operator 120 + | Indic_conjunct_break 121 + | Indic_syllabic_category 122 + | Indic_matra_category 123 + | Indic_positional_category 124 + | Jamo_short_name 125 + | Join_control 126 + | Joining_group 127 + | Joining_type 128 + | Line_break 129 + | Logical_order_exception 130 + | Lowercase 131 + | Lowercase_mapping 132 + | Math 133 + | Modifier_combining_mark 134 + | Name 135 + | Name_alias 136 + | Nfc_quick_check 137 + | Nfd_quick_check 138 + | Nfkc_quick_check 139 + | Nfkc_casefold 140 + | Nfkc_simple_casefold 141 + | Nfkd_quick_check 142 + | Noncharacter_code_point 143 + | Numeric_type 144 + | Numeric_value 145 + | Other_alphabetic 146 + | Other_default_ignorable_code_point 147 + | Other_grapheme_extend 148 + | Other_id_continue 149 + | Other_id_start 150 + | Other_lowercase 151 + | Other_math 152 + | Other_uppercase 153 + | Pattern_syntax 154 + | Pattern_white_space 155 + | Prepended_concatenation_mark 156 + | Quotation_mark 157 + | Radical 158 + | Regional_indicator 159 + | Script 160 + | Script_extensions 161 + | Sentence_break 162 + | Simple_case_folding 163 + | Simple_lowercase_mapping 164 + | Simple_titlecase_mapping 165 + | Simple_uppercase_mapping 166 + | Soft_dotted 167 + | Sterm 168 + | Terminal_punctuation 169 + | Titlecase_mapping 170 + | UAX_42_element 171 + | Unicode_1_name 172 + | Unified_ideograph 173 + | Uppercase 174 + | Uppercase_mapping 175 + | Variation_selector 176 + | Vertical_orientation 177 + | White_space 178 + | Word_break 179 + | Xid_continue 180 + | Xid_start 181 + (* Unihan *) 182 + | KAccountingNumeric 183 + | KAlternateHanYu 184 + | KAlternateJEF 185 + | KAlternateKangXi 186 + | KAlternateMorohashi 187 + | KAlternateTotalStrokes 188 + | KBigFive 189 + | KCCCII 190 + | KCNS1986 191 + | KCNS1992 192 + | KCangjie 193 + | KCantonese 194 + | KCheungBauer 195 + | KCheungBauerIndex 196 + | KCihaiT 197 + | KCompatibilityVariant 198 + | KCowles 199 + | KDaeJaweon 200 + | KDefinition 201 + | KEACC 202 + | KFanqie 203 + | KFenn 204 + | KFennIndex 205 + | KFourCornerCode 206 + | KFrequency 207 + | KGB0 208 + | KGB1 209 + | KGB3 210 + | KGB5 211 + | KGB8 212 + | KGSR 213 + | KGradeLevel 214 + | KHDZRadBreak 215 + | KHKGlyph 216 + | KHKSCS 217 + | KHanYu 218 + | KHangul 219 + | KHanyuPinlu 220 + | KHanyuPinyin 221 + | KIBMJapan 222 + | KIICore 223 + | KIRGDaeJaweon 224 + | KIRGDaiKanwaZiten 225 + | KIRGHanyuDaZidian 226 + | KIRGKangXi 227 + | KIRG_GSource 228 + | KIRG_HSource 229 + | KIRG_JSource 230 + | KIRG_KPSource 231 + | KIRG_KSource 232 + | KIRG_MSource 233 + | KIRG_SSource 234 + | KIRG_TSource 235 + | KIRG_USource 236 + | KIRG_UKSource 237 + | KIRG_VSource 238 + | KJapanese 239 + | KJHJ 240 + | KJIS0213 241 + | KJapaneseKun 242 + | KJapaneseOn 243 + | KJinmeiyoKanji 244 + | KJis0 245 + | KJis1 246 + | KJoyoKanji 247 + | KKPS0 248 + | KKPS1 249 + | KKSC0 250 + | KKSC1 251 + | KKangXi 252 + | KKarlgren 253 + | KKorean 254 + | KKoreanEducationHanja 255 + | KKoreanName 256 + | KLau 257 + | KMainlandTelegraph 258 + | KMandarin 259 + | KMatthews 260 + | KMeyerWempe 261 + | KMojiJoho 262 + | KMorohashi 263 + | KNelson 264 + | KNSHU_DubenSrc 265 + | KNSHU_Reading 266 + | KOtherNumeric 267 + | KPhonetic 268 + | KPrimaryNumeric 269 + | KPseudoGB1 270 + | KRSAdobe_Japan1_6 271 + | KRSJapanese 272 + | KRSKanWa 273 + | KRSKangXi 274 + | KRSKorean 275 + | KRSMerged 276 + | KRSUnicode 277 + | KSBGY 278 + | KSemanticVariant 279 + | KSimplifiedVariant 280 + | KSMSZD2003Index 281 + | KSMSZD2003Readings 282 + | KSpecializedSemanticVariant 283 + | KSpoofingVariant 284 + | KStrange 285 + | KTGH 286 + | KTGHZ2013 287 + | KTGT_MergedSrc 288 + | KTGT_RSUnicode 289 + | KTaiwanTelegraph 290 + | KTang 291 + | KTayNumeric 292 + | KTotalStrokes 293 + | KTraditionalVariant 294 + | KUnihanCore2020 295 + | KVietnamese 296 + | KVietnameseNumeric 297 + | KWubi 298 + | KXHC1983 299 + | KXerox 300 + | KZhuang 301 + | KZhuangNumeric 302 + | KZVariant 303 + (* Unikemet *) 304 + | KEH_Cat 305 + | KEH_Core 306 + | KEH_Desc 307 + | KEH_Func 308 + | KEH_FVal 309 + | KEH_UniK 310 + | KEH_JSesh 311 + | KEH_HG 312 + | KEH_IFAO 313 + | KEH_NoMirror 314 + | KEH_NoRotate 315 + | KEH_AltSeq 316 + | Other of (string * string) (* expanded XML name. *) 317 + 318 + type script = [ 319 + | `Adlm 320 + | `Aghb 321 + | `Ahom 322 + | `Arab 323 + | `Armi 324 + | `Armn 325 + | `Avst 326 + | `Bali 327 + | `Bamu 328 + | `Bass 329 + | `Batk 330 + | `Beng 331 + | `Berf 332 + | `Bhks 333 + | `Bopo 334 + | `Brah 335 + | `Brai 336 + | `Bugi 337 + | `Buhd 338 + | `Cakm 339 + | `Cans 340 + | `Cari 341 + | `Cham 342 + | `Cher 343 + | `Chrs 344 + | `Copt 345 + | `Cpmn 346 + | `Cprt 347 + | `Cyrl 348 + | `Deva 349 + | `Diak 350 + | `Dogr 351 + | `Dsrt 352 + | `Dupl 353 + | `Egyp 354 + | `Elba 355 + | `Elym 356 + | `Ethi 357 + | `Gara 358 + | `Geor 359 + | `Glag 360 + | `Gong 361 + | `Gonm 362 + | `Goth 363 + | `Gran 364 + | `Grek 365 + | `Gujr 366 + | `Gukh 367 + | `Guru 368 + | `Hang 369 + | `Hani 370 + | `Hano 371 + | `Hatr 372 + | `Hebr 373 + | `Hira 374 + | `Hluw 375 + | `Hmng 376 + | `Hmnp 377 + | `Hrkt 378 + | `Hung 379 + | `Ital 380 + | `Java 381 + | `Kali 382 + | `Kana 383 + | `Kawi 384 + | `Khar 385 + | `Khmr 386 + | `Khoj 387 + | `Knda 388 + | `Krai 389 + | `Kthi 390 + | `Kits 391 + | `Lana 392 + | `Laoo 393 + | `Latn 394 + | `Lepc 395 + | `Limb 396 + | `Lina 397 + | `Linb 398 + | `Lisu 399 + | `Lyci 400 + | `Lydi 401 + | `Mahj 402 + | `Maka 403 + | `Mand 404 + | `Mani 405 + | `Marc 406 + | `Medf 407 + | `Mend 408 + | `Merc 409 + | `Mero 410 + | `Mlym 411 + | `Modi 412 + | `Mong 413 + | `Mroo 414 + | `Mtei 415 + | `Mult 416 + | `Mymr 417 + | `Nagm 418 + | `Nand 419 + | `Narb 420 + | `Nbat 421 + | `Newa 422 + | `Nkoo 423 + | `Nshu 424 + | `Ogam 425 + | `Olck 426 + | `Onao 427 + | `Orkh 428 + | `Orya 429 + | `Osge 430 + | `Osma 431 + | `Ougr 432 + | `Palm 433 + | `Pauc 434 + | `Perm 435 + | `Phag 436 + | `Phli 437 + | `Phlp 438 + | `Phnx 439 + | `Plrd 440 + | `Prti 441 + | `Qaai 442 + | `Rjng 443 + | `Rohg 444 + | `Runr 445 + | `Samr 446 + | `Sarb 447 + | `Saur 448 + | `Sgnw 449 + | `Shaw 450 + | `Shrd 451 + | `Sidd 452 + | `Sidt 453 + | `Sind 454 + | `Sinh 455 + | `Sogd 456 + | `Sogo 457 + | `Sora 458 + | `Soyo 459 + | `Sund 460 + | `Sunu 461 + | `Sylo 462 + | `Syrc 463 + | `Tagb 464 + | `Takr 465 + | `Tale 466 + | `Talu 467 + | `Taml 468 + | `Tang 469 + | `Tavt 470 + | `Tayo 471 + | `Telu 472 + | `Tfng 473 + | `Tglg 474 + | `Thaa 475 + | `Thai 476 + | `Tibt 477 + | `Tirh 478 + | `Tnsa 479 + | `Todr 480 + | `Tols 481 + | `Toto 482 + | `Tutg 483 + | `Ugar 484 + | `Vaii 485 + | `Vith 486 + | `Wara 487 + | `Wcho 488 + | `Xpeo 489 + | `Xsux 490 + | `Yezi 491 + | `Yiii 492 + | `Zanb 493 + | `Zinh 494 + | `Zyyy 495 + | `Zzzz 496 + ] 497 + 498 + type block_prop = [ 499 + | `ASCII 500 + | `Adlam 501 + | `Aegean_Numbers 502 + | `Ahom 503 + | `Alchemical 504 + | `Alphabetic_PF 505 + | `Anatolian_Hieroglyphs 506 + | `Ancient_Greek_Music 507 + | `Ancient_Greek_Numbers 508 + | `Ancient_Symbols 509 + | `Arabic 510 + | `Arabic_Ext_A 511 + | `Arabic_Ext_B 512 + | `Arabic_Ext_C 513 + | `Arabic_Math 514 + | `Arabic_PF_A 515 + | `Arabic_PF_B 516 + | `Arabic_Sup 517 + | `Armenian 518 + | `Arrows 519 + | `Avestan 520 + | `Balinese 521 + | `Bamum 522 + | `Bamum_Sup 523 + | `Bassa_Vah 524 + | `Batak 525 + | `Bengali 526 + | `Beria_Erfe 527 + | `Bhaiksuki 528 + | `Block_Elements 529 + | `Bopomofo 530 + | `Bopomofo_Ext 531 + | `Box_Drawing 532 + | `Brahmi 533 + | `Braille 534 + | `Buginese 535 + | `Buhid 536 + | `Byzantine_Music 537 + | `CJK 538 + | `CJK_Compat 539 + | `CJK_Compat_Forms 540 + | `CJK_Compat_Ideographs 541 + | `CJK_Compat_Ideographs_Sup 542 + | `CJK_Ext_A 543 + | `CJK_Ext_B 544 + | `CJK_Ext_C 545 + | `CJK_Ext_D 546 + | `CJK_Ext_E 547 + | `CJK_Ext_F 548 + | `CJK_Ext_G 549 + | `CJK_Ext_H 550 + | `CJK_Ext_I 551 + | `CJK_Ext_J 552 + | `CJK_Radicals_Sup 553 + | `CJK_Strokes 554 + | `CJK_Symbols 555 + | `Carian 556 + | `Caucasian_Albanian 557 + | `Chakma 558 + | `Cham 559 + | `Cherokee 560 + | `Cherokee_Sup 561 + | `Chess_Symbols 562 + | `Chorasmian 563 + | `Compat_Jamo 564 + | `Control_Pictures 565 + | `Coptic 566 + | `Coptic_Epact_Numbers 567 + | `Counting_Rod 568 + | `Cuneiform 569 + | `Cuneiform_Numbers 570 + | `Currency_Symbols 571 + | `Cypriot_Syllabary 572 + | `Cypro_Minoan 573 + | `Cyrillic 574 + | `Cyrillic_Ext_A 575 + | `Cyrillic_Ext_B 576 + | `Cyrillic_Ext_C 577 + | `Cyrillic_Ext_D 578 + | `Cyrillic_Sup 579 + | `Deseret 580 + | `Devanagari 581 + | `Devanagari_Ext 582 + | `Devanagari_Ext_A 583 + | `Diacriticals 584 + | `Diacriticals_Ext 585 + | `Diacriticals_For_Symbols 586 + | `Diacriticals_Sup 587 + | `Dingbats 588 + | `Dives_Akuru 589 + | `Dogra 590 + | `Domino 591 + | `Duployan 592 + | `Early_Dynastic_Cuneiform 593 + | `Egyptian_Hieroglyph_Format_Controls 594 + | `Egyptian_Hieroglyphs 595 + | `Egyptian_Hieroglyphs_Ext_A 596 + | `Elbasan 597 + | `Elymaic 598 + | `Emoticons 599 + | `Enclosed_Alphanum 600 + | `Enclosed_Alphanum_Sup 601 + | `Enclosed_CJK 602 + | `Enclosed_Ideographic_Sup 603 + | `Ethiopic 604 + | `Ethiopic_Ext 605 + | `Ethiopic_Ext_A 606 + | `Ethiopic_Ext_B 607 + | `Ethiopic_Sup 608 + | `Garay 609 + | `Geometric_Shapes 610 + | `Geometric_Shapes_Ext 611 + | `Georgian 612 + | `Georgian_Ext 613 + | `Georgian_Sup 614 + | `Glagolitic 615 + | `Glagolitic_Sup 616 + | `Gothic 617 + | `Grantha 618 + | `Greek 619 + | `Greek_Ext 620 + | `Gujarati 621 + | `Gunjala_Gondi 622 + | `Gurmukhi 623 + | `Gurung_Khema 624 + | `Half_And_Full_Forms 625 + | `Half_Marks 626 + | `Hangul 627 + | `Hanifi_Rohingya 628 + | `Hanunoo 629 + | `Hatran 630 + | `Hebrew 631 + | `High_PU_Surrogates 632 + | `High_Surrogates 633 + | `Hiragana 634 + | `IDC 635 + | `IPA_Ext 636 + | `Ideographic_Symbols 637 + | `Imperial_Aramaic 638 + | `Indic_Number_Forms 639 + | `Indic_Siyaq_Numbers 640 + | `Inscriptional_Pahlavi 641 + | `Inscriptional_Parthian 642 + | `Jamo 643 + | `Jamo_Ext_A 644 + | `Jamo_Ext_B 645 + | `Javanese 646 + | `Kaithi 647 + | `Kaktovik_Numerals 648 + | `Kana_Ext_A 649 + | `Kana_Ext_B 650 + | `Kana_Sup 651 + | `Kanbun 652 + | `Kangxi 653 + | `Kannada 654 + | `Katakana 655 + | `Katakana_Ext 656 + | `Kawi 657 + | `Kayah_Li 658 + | `Kharoshthi 659 + | `Khitan_Small_Script 660 + | `Khmer 661 + | `Khmer_Symbols 662 + | `Khojki 663 + | `Khudawadi 664 + | `Kirat_Rai 665 + | `Lao 666 + | `Latin_1_Sup 667 + | `Latin_Ext_A 668 + | `Latin_Ext_Additional 669 + | `Latin_Ext_B 670 + | `Latin_Ext_C 671 + | `Latin_Ext_D 672 + | `Latin_Ext_E 673 + | `Latin_Ext_F 674 + | `Latin_Ext_G 675 + | `Lepcha 676 + | `Letterlike_Symbols 677 + | `Limbu 678 + | `Linear_A 679 + | `Linear_B_Ideograms 680 + | `Linear_B_Syllabary 681 + | `Lisu 682 + | `Lisu_Sup 683 + | `Low_Surrogates 684 + | `Lycian 685 + | `Lydian 686 + | `Mahajani 687 + | `Mahjong 688 + | `Makasar 689 + | `Malayalam 690 + | `Mandaic 691 + | `Manichaean 692 + | `Marchen 693 + | `Masaram_Gondi 694 + | `Math_Alphanum 695 + | `Math_Operators 696 + | `Mayan_Numerals 697 + | `Medefaidrin 698 + | `Meetei_Mayek 699 + | `Meetei_Mayek_Ext 700 + | `Mende_Kikakui 701 + | `Meroitic_Cursive 702 + | `Meroitic_Hieroglyphs 703 + | `Miao 704 + | `Misc_Arrows 705 + | `Misc_Math_Symbols_A 706 + | `Misc_Math_Symbols_B 707 + | `Misc_Pictographs 708 + | `Misc_Symbols 709 + | `Misc_Symbols_Sup 710 + | `Misc_Technical 711 + | `Modi 712 + | `Modifier_Letters 713 + | `Modifier_Tone_Letters 714 + | `Mongolian 715 + | `Mongolian_Sup 716 + | `Mro 717 + | `Multani 718 + | `Music 719 + | `Myanmar 720 + | `Myanmar_Ext_A 721 + | `Myanmar_Ext_B 722 + | `Myanmar_Ext_C 723 + | `NB 724 + | `NKo 725 + | `Nabataean 726 + | `Nag_Mundari 727 + | `Nandinagari 728 + | `New_Tai_Lue 729 + | `Newa 730 + | `Number_Forms 731 + | `Nushu 732 + | `Nyiakeng_Puachue_Hmong 733 + | `OCR 734 + | `Ogham 735 + | `Ol_Onal 736 + | `Ol_Chiki 737 + | `Old_Hungarian 738 + | `Old_Italic 739 + | `Old_North_Arabian 740 + | `Old_Permic 741 + | `Old_Persian 742 + | `Old_Sogdian 743 + | `Old_South_Arabian 744 + | `Old_Turkic 745 + | `Old_Uyghur 746 + | `Oriya 747 + | `Ornamental_Dingbats 748 + | `Osage 749 + | `Osmanya 750 + | `Ottoman_Siyaq_Numbers 751 + | `PUA 752 + | `Pahawh_Hmong 753 + | `Palmyrene 754 + | `Pau_Cin_Hau 755 + | `Phags_Pa 756 + | `Phaistos 757 + | `Phoenician 758 + | `Phonetic_Ext 759 + | `Phonetic_Ext_Sup 760 + | `Playing_Cards 761 + | `Psalter_Pahlavi 762 + | `Punctuation 763 + | `Rejang 764 + | `Rumi 765 + | `Runic 766 + | `Samaritan 767 + | `Saurashtra 768 + | `Sharada 769 + | `Sharada_Sup 770 + | `Shavian 771 + | `Shorthand_Format_Controls 772 + | `Siddham 773 + | `Sidetic 774 + | `Sinhala 775 + | `Sinhala_Archaic_Numbers 776 + | `Small_Forms 777 + | `Small_Kana_Ext 778 + | `Sogdian 779 + | `Sora_Sompeng 780 + | `Soyombo 781 + | `Specials 782 + | `Sundanese 783 + | `Sundanese_Sup 784 + | `Sunuwar 785 + | `Sup_Arrows_A 786 + | `Sup_Arrows_B 787 + | `Sup_Arrows_C 788 + | `Sup_Math_Operators 789 + | `Sup_PUA_A 790 + | `Sup_PUA_B 791 + | `Sup_Punctuation 792 + | `Sup_Symbols_And_Pictographs 793 + | `Super_And_Sub 794 + | `Sutton_SignWriting 795 + | `Syloti_Nagri 796 + | `Symbols_And_Pictographs_Ext_A 797 + | `Symbols_For_Legacy_Computing 798 + | `Symbols_For_Legacy_Computing_Sup 799 + | `Syriac 800 + | `Syriac_Sup 801 + | `Tagalog 802 + | `Tagbanwa 803 + | `Tags 804 + | `Tai_Le 805 + | `Tai_Tham 806 + | `Tai_Viet 807 + | `Tai_Xuan_Jing 808 + | `Tai_Yo 809 + | `Takri 810 + | `Tamil 811 + | `Tamil_Sup 812 + | `Tangsa 813 + | `Tangut 814 + | `Tangut_Components 815 + | `Tangut_Components_Sup 816 + | `Tangut_Sup 817 + | `Telugu 818 + | `Thaana 819 + | `Thai 820 + | `Tibetan 821 + | `Tifinagh 822 + | `Tirhuta 823 + | `Todhri 824 + | `Tolong_Siki 825 + | `Toto 826 + | `Transport_And_Map 827 + | `Tulu_Tigalari 828 + | `UCAS 829 + | `UCAS_Ext 830 + | `UCAS_Ext_A 831 + | `Ugaritic 832 + | `VS 833 + | `VS_Sup 834 + | `Vai 835 + | `Vedic_Ext 836 + | `Vertical_Forms 837 + | `Vithkuqi 838 + | `Wancho 839 + | `Warang_Citi 840 + | `Yezidi 841 + | `Yi_Radicals 842 + | `Yi_Syllables 843 + | `Yijing 844 + | `Zanabazar_Square 845 + | `Znamenny_Music 846 + ] 847 + 848 + type value = (* the type for property values. *) 849 + | Age_v of [ `Version of int * int | `Unassigned ] 850 + | Block_v of block_prop 851 + | Bidi_class_v of [ 852 + | `AL 853 + | `AN 854 + | `B 855 + | `BN 856 + | `CS 857 + | `EN 858 + | `ES 859 + | `ET 860 + | `L 861 + | `LRE 862 + | `LRO 863 + | `NSM 864 + | `ON 865 + | `PDF 866 + | `R 867 + | `RLE 868 + | `RLO 869 + | `S 870 + | `WS 871 + | `LRI 872 + | `RLI 873 + | `FSI 874 + | `PDI 875 + ] 876 + | Bidi_paired_bracket_type_v of [ `O | `C | `N ] 877 + | Bool_v of bool 878 + | Bool_maybe_v of [ `True | `False | `Maybe ] 879 + | Cp_v of cp 880 + | Cp_map_v of [ `Self | `Cp of cp ] 881 + | Cp_opt_v of cp option 882 + | Decomposition_type_v of [ 883 + | `Can 884 + | `Com 885 + | `Enc 886 + | `Fin 887 + | `Font 888 + | `Fra 889 + | `Init 890 + | `Iso 891 + | `Med 892 + | `Nar 893 + | `Nb 894 + | `Sml 895 + | `Sqr 896 + | `Sub 897 + | `Sup 898 + | `Vert 899 + | `Wide 900 + | `None 901 + ] 902 + | East_asian_width_v of [ `A | `F | `H | `N | `Na | `W ] 903 + | General_category_v of [ 904 + | `Lu 905 + | `Ll 906 + | `Lt 907 + | `Lm 908 + | `Lo 909 + | `Mn 910 + | `Mc 911 + | `Me 912 + | `Nd 913 + | `Nl 914 + | `No 915 + | `Pc 916 + | `Pd 917 + | `Ps 918 + | `Pe 919 + | `Pi 920 + | `Pf 921 + | `Po 922 + | `Sm 923 + | `Sc 924 + | `Sk 925 + | `So 926 + | `Zs 927 + | `Zl 928 + | `Zp 929 + | `Cc 930 + | `Cf 931 + | `Cs 932 + | `Co 933 + | `Cn 934 + ] 935 + | Grapheme_cluster_break_v of [ 936 + | `CN 937 + | `CR 938 + | `EB 939 + | `EBG 940 + | `EM 941 + | `EX 942 + | `GAZ 943 + | `L 944 + | `LF 945 + | `LV 946 + | `LVT 947 + | `PP 948 + | `RI 949 + | `SM 950 + | `T 951 + | `V 952 + | `XX 953 + | `ZWJ ] 954 + | Hangul_syllable_type_v of [ `L | `LV | `LVT | `T | `V | `NA ] 955 + | Int_v of int 956 + | Indic_conjunct_break_v of 957 + [ `Consonant 958 + | `Extend 959 + | `Linker 960 + | `None ] 961 + | Indic_syllabic_category_v of 962 + [ `Avagraha 963 + | `Bindu 964 + | `Brahmi_Joining_Number 965 + | `Cantillation_Mark 966 + | `Consonant 967 + | `Consonant_Dead 968 + | `Consonant_Final 969 + | `Consonant_Head_Letter 970 + | `Consonant_Initial_Postfixed 971 + | `Consonant_Killer 972 + | `Consonant_Medial 973 + | `Consonant_Placeholder 974 + | `Consonant_Preceding_Repha 975 + | `Consonant_Prefixed 976 + | `Consonant_Repha 977 + | `Consonant_Subjoined 978 + | `Consonant_Succeeding_Repha 979 + | `Consonant_With_Stacker 980 + | `Gemination_Mark 981 + | `Invisible_Stacker 982 + | `Joiner 983 + | `Modifying_Letter 984 + | `Non_Joiner 985 + | `Nukta 986 + | `Number 987 + | `Number_Joiner 988 + | `Other 989 + | `Pure_Killer 990 + | `Reordering_Killer 991 + | `Register_Shifter 992 + | `Syllable_Modifier 993 + | `Tone_Letter 994 + | `Tone_Mark 995 + | `Virama 996 + | `Visarga 997 + | `Vowel 998 + | `Vowel_Dependent 999 + | `Vowel_Independent ] 1000 + | Indic_matra_category_v of [ 1001 + | `Right 1002 + | `Left 1003 + | `Visual_Order_Left 1004 + | `Left_And_Right 1005 + | `Top 1006 + | `Bottom 1007 + | `Top_And_Bottom 1008 + | `Top_And_Right 1009 + | `Top_And_Left 1010 + | `Top_And_Left_And_Right 1011 + | `Bottom_And_Right 1012 + | `Top_And_Bottom_And_Right 1013 + | `Overstruck 1014 + | `Invisible 1015 + | `NA 1016 + ] 1017 + | Indic_positional_category_v of [ 1018 + | `Bottom 1019 + | `Bottom_And_Left 1020 + | `Bottom_And_Right 1021 + | `Invisible 1022 + | `Left 1023 + | `Left_And_Right 1024 + | `NA 1025 + | `Overstruck 1026 + | `Right 1027 + | `Top 1028 + | `Top_And_Bottom 1029 + | `Top_And_Bottom_And_Left 1030 + | `Top_And_Bottom_And_Right 1031 + | `Top_And_Left 1032 + | `Top_And_Left_And_Right 1033 + | `Top_And_Right 1034 + | `Visual_Order_Left 1035 + ] 1036 + | Joining_group_v of [ 1037 + | `African_Feh 1038 + | `African_Noon 1039 + | `African_Qaf 1040 + | `Ain 1041 + | `Alaph 1042 + | `Alef 1043 + | `Alef_Maqsurah 1044 + | `Beh 1045 + | `Beth 1046 + | `Burushaski_Yeh_Barree 1047 + | `Dal 1048 + | `Dalath_Rish 1049 + | `E 1050 + | `Farsi_Yeh 1051 + | `Fe 1052 + | `Feh 1053 + | `Final_Semkath 1054 + | `Gaf 1055 + | `Gamal 1056 + | `Hah 1057 + | `Hanifi_Rohingya_Kinna_Ya 1058 + | `Hanifi_Rohingya_Pa 1059 + | `Hamza_On_Heh_Goal 1060 + | `He 1061 + | `Heh 1062 + | `Heh_Goal 1063 + | `Heth 1064 + | `Kaf 1065 + | `Kaph 1066 + | `Kashmiri_Yeh 1067 + | `Khaph 1068 + | `Knotted_Heh 1069 + | `Lam 1070 + | `Lamadh 1071 + | `Malayalam_Bha 1072 + | `Malayalam_Ja 1073 + | `Malayalam_Lla 1074 + | `Malayalam_Llla 1075 + | `Malayalam_Nga 1076 + | `Malayalam_Nna 1077 + | `Malayalam_Nnna 1078 + | `Malayalam_Nya 1079 + | `Malayalam_Ra 1080 + | `Malayalam_Ssa 1081 + | `Malayalam_Tta 1082 + | `Manichaean_Aleph 1083 + | `Manichaean_Ayin 1084 + | `Manichaean_Beth 1085 + | `Manichaean_Daleth 1086 + | `Manichaean_Dhamedh 1087 + | `Manichaean_Five 1088 + | `Manichaean_Gimel 1089 + | `Manichaean_Heth 1090 + | `Manichaean_Hundred 1091 + | `Manichaean_Kaph 1092 + | `Manichaean_Lamedh 1093 + | `Manichaean_Mem 1094 + | `Manichaean_Nun 1095 + | `Manichaean_One 1096 + | `Manichaean_Pe 1097 + | `Manichaean_Qoph 1098 + | `Manichaean_Resh 1099 + | `Manichaean_Sadhe 1100 + | `Manichaean_Samekh 1101 + | `Manichaean_Taw 1102 + | `Manichaean_Ten 1103 + | `Manichaean_Teth 1104 + | `Manichaean_Thamedh 1105 + | `Manichaean_Twenty 1106 + | `Manichaean_Waw 1107 + | `Manichaean_Yodh 1108 + | `Manichaean_Zayin 1109 + | `Meem 1110 + | `Mim 1111 + | `No_Joining_Group 1112 + | `Noon 1113 + | `Nun 1114 + | `Nya 1115 + | `Pe 1116 + | `Qaf 1117 + | `Qaph 1118 + | `Reh 1119 + | `Reversed_Pe 1120 + | `Rohingya_Yeh 1121 + | `Sad 1122 + | `Sadhe 1123 + | `Seen 1124 + | `Semkath 1125 + | `Shin 1126 + | `Straight_Waw 1127 + | `Swash_Kaf 1128 + | `Syriac_Waw 1129 + | `Tah 1130 + | `Taw 1131 + | `Teh_Marbuta 1132 + | `Teh_Marbuta_Goal 1133 + | `Teth 1134 + | `Thin_Noon 1135 + | `Thin_Yeh 1136 + | `Vertical_Tail 1137 + | `Waw 1138 + | `Yeh 1139 + | `Yeh_Barree 1140 + | `Yeh_With_Tail 1141 + | `Yudh 1142 + | `Yudh_He 1143 + | `Zain 1144 + | `Zhain 1145 + | `BAA 1146 + | `FA 1147 + | `HAA 1148 + | `HA_GOAL 1149 + | `HA 1150 + | `CAF 1151 + | `KNOTTED_HA 1152 + | `RA 1153 + | `SWASH_CAF 1154 + | `HAMZAH_ON_HA_GOAL 1155 + | `TAA_MARBUTAH 1156 + | `YA_BARREE 1157 + | `YA 1158 + | `ALEF_MAQSURAH ] 1159 + | Joining_type_v of [ `U | `C | `T | `D | `L | `R ] 1160 + | Line_break_v of [ 1161 + | `AI 1162 + | `AK 1163 + | `AL 1164 + | `AP 1165 + | `AS 1166 + | `B2 1167 + | `BA 1168 + | `BB 1169 + | `BK 1170 + | `CB 1171 + | `CJ 1172 + | `CL 1173 + | `CM 1174 + | `CP 1175 + | `CR 1176 + | `EB 1177 + | `EM 1178 + | `EX 1179 + | `GL 1180 + | `H2 1181 + | `H3 1182 + | `HH 1183 + | `HL 1184 + | `HY 1185 + | `ID 1186 + | `IN 1187 + | `IS 1188 + | `JL 1189 + | `JT 1190 + | `JV 1191 + | `LF 1192 + | `NL 1193 + | `NS 1194 + | `NU 1195 + | `OP 1196 + | `PO 1197 + | `PR 1198 + | `QU 1199 + | `RI 1200 + | `SA 1201 + | `SG 1202 + | `SP 1203 + | `SY 1204 + | `VF 1205 + | `VI 1206 + | `WJ 1207 + | `XX 1208 + | `ZW 1209 + | `ZWJ 1210 + ] 1211 + | Name_v of [`Pattern of string | `Name of string ] 1212 + | Name_alias_v of 1213 + (string * [`Abbreviation | `Alternate | `Control | `Correction | `Figment]) 1214 + list 1215 + | Numeric_type_v of [ `None | `De | `Di | `Nu ] 1216 + | Numeric_value_v of 1217 + [ `NaN | `Nums of [`Frac of int * int | `Num of int64 ] list] 1218 + | Script_v of script 1219 + | Script_extensions_v of script list 1220 + | Sentence_break_v of [ 1221 + | `AT 1222 + | `CL 1223 + | `CR 1224 + | `EX 1225 + | `FO 1226 + | `LE 1227 + | `LF 1228 + | `LO 1229 + | `NU 1230 + | `SC 1231 + | `SE 1232 + | `SP 1233 + | `ST 1234 + | `UP 1235 + | `XX 1236 + ] 1237 + | Cps_v of cp list 1238 + | Cps_map_v of [ `Self | `Cps of cp list ] 1239 + | String_v of string 1240 + | UAX_42_element_v of [ `Reserved | `Noncharacter | `Surrogate | `Char ] 1241 + | Vertical_orientation_v of [ `U | `R | `Tu | `Tr ] 1242 + | Word_break_v of [ 1243 + | `CR 1244 + | `DQ 1245 + | `EB 1246 + | `EBG 1247 + | `EM 1248 + | `EX 1249 + | `Extend 1250 + | `FO 1251 + | `GAZ 1252 + | `HL 1253 + | `KA 1254 + | `LE 1255 + | `LF 1256 + | `MB 1257 + | `ML 1258 + | `MN 1259 + | `NL 1260 + | `NU 1261 + | `RI 1262 + | `SQ 1263 + | `WSegSpace 1264 + | `XX 1265 + | `ZWJ 1266 + ] 1267 + 1268 + (* property value projection *) 1269 + 1270 + let o_age = function Age_v v -> v | _ -> assert false 1271 + let o_bidi_class = function Bidi_class_v v -> v | _ -> assert false 1272 + let o_bidi_paired_bracket_type = 1273 + function Bidi_paired_bracket_type_v v -> v | _ -> assert false 1274 + 1275 + let o_block = function Block_v v -> v | _ -> assert false 1276 + let o_bool = function Bool_v v -> v | _ -> assert false 1277 + let o_bool_maybe = function Bool_maybe_v v -> v | _ -> assert false 1278 + let o_cp = function Cp_v v -> v | _ -> assert false 1279 + let o_cp_map = function Cp_map_v v -> v | _ -> assert false 1280 + let o_cp_opt = function Cp_opt_v v -> v | _ -> assert false 1281 + let o_decomposition_type = 1282 + function Decomposition_type_v v -> v | _ -> assert false 1283 + 1284 + let o_east_asian_width = function East_asian_width_v v -> v | _ -> assert false 1285 + let o_general_category = function General_category_v v -> v | _ -> assert false 1286 + let o_grapheme_cluster_break = 1287 + function Grapheme_cluster_break_v v -> v | _ -> assert false 1288 + 1289 + let o_hangul_syllable_type = 1290 + function Hangul_syllable_type_v v -> v | _ -> assert false 1291 + 1292 + let o_int = function Int_v v -> v | _ -> assert false 1293 + 1294 + let o_indic_conjunct_break = 1295 + function Indic_conjunct_break_v v -> v | _ -> assert false 1296 + 1297 + let o_indic_syllabic_category = 1298 + function Indic_syllabic_category_v v -> v | _ -> assert false 1299 + 1300 + let o_indic_matra_category = 1301 + function Indic_matra_category_v v -> v | _ -> assert false 1302 + 1303 + let o_indic_positional_category = 1304 + function Indic_positional_category_v v -> v | _ -> assert false 1305 + 1306 + let o_joining_group = function Joining_group_v v -> v | _ -> assert false 1307 + let o_joining_type = function Joining_type_v v -> v | _ -> assert false 1308 + let o_line_break = function Line_break_v v -> v | _ -> assert false 1309 + let o_name = function Name_v v -> v | _ -> assert false 1310 + let o_name_alias = function Name_alias_v v -> v | _ -> assert false 1311 + let o_numeric_type = function Numeric_type_v v -> v | _ -> assert false 1312 + let o_numeric_value = function Numeric_value_v v -> v | _ -> assert false 1313 + let o_script = function Script_v v -> v | _ -> assert false 1314 + let o_script_extensions = 1315 + function Script_extensions_v v -> v | _ -> assert false 1316 + 1317 + let o_sentence_break = function Sentence_break_v v -> v | _ -> assert false 1318 + let o_cps = function Cps_v v -> v | _ -> assert false 1319 + let o_cps_map = function Cps_map_v v -> v | _ -> assert false 1320 + let o_string = function String_v v -> v | _ -> assert false 1321 + let o_uax_42_element = function UAX_42_element_v v -> v | _ -> assert false 1322 + let o_vertical_orientation = 1323 + function Vertical_orientation_v v -> v | _ -> assert false 1324 + let o_word_break = function Word_break_v v -> v | _ -> assert false 1325 + 1326 + (* property value injection *) 1327 + 1328 + let i_age v = Age_v begin match v with 1329 + | "unassigned" -> `Unassigned 1330 + | v -> 1331 + try match List.map int_of_string (split_string v '.') with 1332 + | [v1; v2;] -> `Version (v1, v2) 1333 + | _ -> failwith "" 1334 + with Failure _ -> err (err_att_val v) 1335 + end 1336 + 1337 + let i_bidi_class v = Bidi_class_v begin match v with 1338 + | "AL" -> `AL 1339 + | "AN" -> `AN 1340 + | "B" -> `B 1341 + | "BN" -> `BN 1342 + | "CS" -> `CS 1343 + | "EN" -> `EN 1344 + | "ES" -> `ES 1345 + | "ET" -> `ET 1346 + | "L" -> `L 1347 + | "LRE" -> `LRE 1348 + | "LRO" -> `LRO 1349 + | "NSM" -> `NSM 1350 + | "ON" -> `ON 1351 + | "PDF" -> `PDF 1352 + | "R" -> `R 1353 + | "RLE" -> `RLE 1354 + | "RLO" -> `RLO 1355 + | "S" -> `S 1356 + | "WS" -> `WS 1357 + | "LRI" -> `LRI 1358 + | "RLI" -> `RLI 1359 + | "FSI" -> `FSI 1360 + | "PDI" -> `PDI 1361 + | v -> err (err_att_val v) 1362 + end 1363 + 1364 + let i_bidi_paired_bracket_type v = Bidi_paired_bracket_type_v begin match v with 1365 + | "o" -> `O 1366 + | "c" -> `C 1367 + | "n" -> `N 1368 + | v -> err (err_att_val v) 1369 + end 1370 + 1371 + let i_block v = Block_v begin match v with 1372 + | "ASCII" -> `ASCII 1373 + | "Adlam" -> `Adlam 1374 + | "Aegean_Numbers" -> `Aegean_Numbers 1375 + | "Ahom" -> `Ahom 1376 + | "Alchemical" -> `Alchemical 1377 + | "Alphabetic_PF" -> `Alphabetic_PF 1378 + | "Anatolian_Hieroglyphs" -> `Anatolian_Hieroglyphs 1379 + | "Ancient_Greek_Music" -> `Ancient_Greek_Music 1380 + | "Ancient_Greek_Numbers" -> `Ancient_Greek_Numbers 1381 + | "Ancient_Symbols" -> `Ancient_Symbols 1382 + | "Arabic" -> `Arabic 1383 + | "Arabic_Ext_A" -> `Arabic_Ext_A 1384 + | "Arabic_Ext_B" -> `Arabic_Ext_A 1385 + | "Arabic_Ext_C" -> `Arabic_Ext_C 1386 + | "Arabic_Math" -> `Arabic_Math 1387 + | "Arabic_PF_A" -> `Arabic_PF_A 1388 + | "Arabic_PF_B" -> `Arabic_PF_B 1389 + | "Arabic_Sup" -> `Arabic_Sup 1390 + | "Armenian" -> `Armenian 1391 + | "Arrows" -> `Arrows 1392 + | "Avestan" -> `Avestan 1393 + | "Balinese" -> `Balinese 1394 + | "Bamum" -> `Bamum 1395 + | "Bamum_Sup" -> `Bamum_Sup 1396 + | "Bassa_Vah" -> `Bassa_Vah 1397 + | "Batak" -> `Batak 1398 + | "Bengali" -> `Bengali 1399 + | "Beria_Erfe" -> `Beria_Erfe 1400 + | "Bhaiksuki" -> `Bhaiksuki 1401 + | "Block_Elements" -> `Block_Elements 1402 + | "Bopomofo" -> `Bopomofo 1403 + | "Bopomofo_Ext" -> `Bopomofo_Ext 1404 + | "Box_Drawing" -> `Box_Drawing 1405 + | "Brahmi" -> `Brahmi 1406 + | "Braille" -> `Braille 1407 + | "Buginese" -> `Buginese 1408 + | "Buhid" -> `Buhid 1409 + | "Byzantine_Music" -> `Byzantine_Music 1410 + | "CJK" -> `CJK 1411 + | "CJK_Compat" -> `CJK_Compat 1412 + | "CJK_Compat_Forms" -> `CJK_Compat_Forms 1413 + | "CJK_Compat_Ideographs" -> `CJK_Compat_Ideographs 1414 + | "CJK_Compat_Ideographs_Sup" -> `CJK_Compat_Ideographs_Sup 1415 + | "CJK_Ext_A" -> `CJK_Ext_A 1416 + | "CJK_Ext_B" -> `CJK_Ext_B 1417 + | "CJK_Ext_C" -> `CJK_Ext_C 1418 + | "CJK_Ext_D" -> `CJK_Ext_D 1419 + | "CJK_Ext_E" -> `CJK_Ext_E 1420 + | "CJK_Ext_F" -> `CJK_Ext_F 1421 + | "CJK_Ext_G" -> `CJK_Ext_G 1422 + | "CJK_Ext_H" -> `CJK_Ext_H 1423 + | "CJK_Ext_I" -> `CJK_Ext_I 1424 + | "CJK_Ext_J" -> `CJK_Ext_J 1425 + | "CJK_Radicals_Sup" -> `CJK_Radicals_Sup 1426 + | "CJK_Strokes" -> `CJK_Strokes 1427 + | "CJK_Symbols" -> `CJK_Symbols 1428 + | "Carian" -> `Carian 1429 + | "Caucasian_Albanian" -> `Caucasian_Albanian 1430 + | "Chakma" -> `Chakma 1431 + | "Cham" -> `Cham 1432 + | "Cherokee" -> `Cherokee 1433 + | "Cherokee_Sup" -> `Cherokee_Sup 1434 + | "Chess_Symbols" -> `Chess_Symbols 1435 + | "Chorasmian" -> `Chorasmian 1436 + | "Compat_Jamo" -> `Compat_Jamo 1437 + | "Control_Pictures" -> `Control_Pictures 1438 + | "Coptic" -> `Coptic 1439 + | "Coptic_Epact_Numbers" -> `Coptic_Epact_Numbers 1440 + | "Counting_Rod" -> `Counting_Rod 1441 + | "Cuneiform" -> `Cuneiform 1442 + | "Cuneiform_Numbers" -> `Cuneiform_Numbers 1443 + | "Currency_Symbols" -> `Currency_Symbols 1444 + | "Cypriot_Syllabary" -> `Cypriot_Syllabary 1445 + | "Cypro_Minoan" -> `Cypro_Minoan 1446 + | "Cyrillic" -> `Cyrillic 1447 + | "Cyrillic_Ext_A" -> `Cyrillic_Ext_A 1448 + | "Cyrillic_Ext_B" -> `Cyrillic_Ext_B 1449 + | "Cyrillic_Ext_C" -> `Cyrillic_Ext_C 1450 + | "Cyrillic_Ext_D" -> `Cyrillic_Ext_D 1451 + | "Cyrillic_Sup" -> `Cyrillic_Sup 1452 + | "Deseret" -> `Deseret 1453 + | "Devanagari" -> `Devanagari 1454 + | "Devanagari_Ext" -> `Devanagari_Ext 1455 + | "Devanagari_Ext_A" -> `Devanagari_Ext_A 1456 + | "Diacriticals" -> `Diacriticals 1457 + | "Diacriticals_Ext" -> `Diacriticals_Ext 1458 + | "Diacriticals_For_Symbols" -> `Diacriticals_For_Symbols 1459 + | "Diacriticals_Sup" -> `Diacriticals_Sup 1460 + | "Dingbats" -> `Dingbats 1461 + | "Dives_Akuru" -> `Dives_Akuru 1462 + | "Dogra" -> `Dogra 1463 + | "Domino" -> `Domino 1464 + | "Duployan" -> `Duployan 1465 + | "Early_Dynastic_Cuneiform" -> `Early_Dynastic_Cuneiform 1466 + | "Egyptian_Hieroglyph_Format_Controls" -> `Egyptian_Hieroglyph_Format_Controls 1467 + | "Egyptian_Hieroglyphs" -> `Egyptian_Hieroglyphs 1468 + | "Egyptian_Hieroglyphs_Ext_A" -> `Egyptian_Hieroglyphs_Ext_A 1469 + | "Elbasan" -> `Elbasan 1470 + | "Elymaic" -> `Elymaic 1471 + | "Emoticons" -> `Emoticons 1472 + | "Enclosed_Alphanum" -> `Enclosed_Alphanum 1473 + | "Enclosed_Alphanum_Sup" -> `Enclosed_Alphanum_Sup 1474 + | "Enclosed_CJK" -> `Enclosed_CJK 1475 + | "Enclosed_Ideographic_Sup" -> `Enclosed_Ideographic_Sup 1476 + | "Ethiopic" -> `Ethiopic 1477 + | "Ethiopic_Ext" -> `Ethiopic_Ext 1478 + | "Ethiopic_Ext_A" -> `Ethiopic_Ext_A 1479 + | "Ethiopic_Ext_B" -> `Ethiopic_Ext_B 1480 + | "Ethiopic_Sup" -> `Ethiopic_Sup 1481 + | "Garay" -> `Garay 1482 + | "Geometric_Shapes" -> `Geometric_Shapes 1483 + | "Geometric_Shapes_Ext" -> `Geometric_Shapes_Ext 1484 + | "Georgian" -> `Georgian 1485 + | "Georgian_Ext" -> `Georgian_Ext 1486 + | "Georgian_Sup" -> `Georgian_Sup 1487 + | "Glagolitic" -> `Glagolitic 1488 + | "Glagolitic_Sup" -> `Glagolitic_Sup 1489 + | "Gothic" -> `Gothic 1490 + | "Grantha" -> `Grantha 1491 + | "Greek" -> `Greek 1492 + | "Greek_Ext" -> `Greek_Ext 1493 + | "Gujarati" -> `Gujarati 1494 + | "Gunjala_Gondi" -> `Gunjala_Gondi 1495 + | "Gurmukhi" -> `Gurmukhi 1496 + | "Gurung_Khema" -> `Gurung_Khema 1497 + | "Half_And_Full_Forms" -> `Half_And_Full_Forms 1498 + | "Half_Marks" -> `Half_Marks 1499 + | "Hangul" -> `Hangul 1500 + | "Hanifi_Rohingya" -> `Hanifi_Rohingya 1501 + | "Hanunoo" -> `Hanunoo 1502 + | "Hatran" -> `Hatran 1503 + | "Hebrew" -> `Hebrew 1504 + | "High_PU_Surrogates" -> `High_PU_Surrogates 1505 + | "High_Surrogates" -> `High_Surrogates 1506 + | "Hiragana" -> `Hiragana 1507 + | "IDC" -> `IDC 1508 + | "IPA_Ext" -> `IPA_Ext 1509 + | "Ideographic_Symbols" -> `Ideographic_Symbols 1510 + | "Imperial_Aramaic" -> `Imperial_Aramaic 1511 + | "Indic_Number_Forms" -> `Indic_Number_Forms 1512 + | "Indic_Siyaq_Numbers" -> `Indic_Siyaq_Numbers 1513 + | "Inscriptional_Pahlavi" -> `Inscriptional_Pahlavi 1514 + | "Inscriptional_Parthian" -> `Inscriptional_Parthian 1515 + | "Jamo" -> `Jamo 1516 + | "Jamo_Ext_A" -> `Jamo_Ext_A 1517 + | "Jamo_Ext_B" -> `Jamo_Ext_B 1518 + | "Javanese" -> `Javanese 1519 + | "Kaithi" -> `Kaithi 1520 + | "Kaktovik_Numerals" -> `Kaktovik_Numerals 1521 + | "Kana_Ext_A" -> `Kana_Ext_A 1522 + | "Kana_Ext_B" -> `Kana_Ext_B 1523 + | "Kawi" -> `Kawi 1524 + | "Kana_Sup" -> `Kana_Sup 1525 + | "Kanbun" -> `Kanbun 1526 + | "Kangxi" -> `Kangxi 1527 + | "Kannada" -> `Kannada 1528 + | "Katakana" -> `Katakana 1529 + | "Katakana_Ext" -> `Katakana_Ext 1530 + | "Kayah_Li" -> `Kayah_Li 1531 + | "Kharoshthi" -> `Kharoshthi 1532 + | "Khitan_Small_Script" -> `Khitan_Small_Script 1533 + | "Khmer" -> `Khmer 1534 + | "Khmer_Symbols" -> `Khmer_Symbols 1535 + | "Khojki" -> `Khojki 1536 + | "Khudawadi" -> `Khudawadi 1537 + | "Kirat_Rai" -> `Kirat_Rai 1538 + | "Lao" -> `Lao 1539 + | "Latin_1_Sup" -> `Latin_1_Sup 1540 + | "Latin_Ext_A" -> `Latin_Ext_A 1541 + | "Latin_Ext_Additional" -> `Latin_Ext_Additional 1542 + | "Latin_Ext_B" -> `Latin_Ext_B 1543 + | "Latin_Ext_C" -> `Latin_Ext_C 1544 + | "Latin_Ext_D" -> `Latin_Ext_D 1545 + | "Latin_Ext_E" -> `Latin_Ext_E 1546 + | "Latin_Ext_F" -> `Latin_Ext_F 1547 + | "Latin_Ext_G" -> `Latin_Ext_G 1548 + | "Lepcha" -> `Lepcha 1549 + | "Letterlike_Symbols" -> `Letterlike_Symbols 1550 + | "Limbu" -> `Limbu 1551 + | "Linear_A" -> `Linear_A 1552 + | "Linear_B_Ideograms" -> `Linear_B_Ideograms 1553 + | "Linear_B_Syllabary" -> `Linear_B_Syllabary 1554 + | "Lisu" -> `Lisu 1555 + | "Lisu_Sup" -> `Lisu_Sup 1556 + | "Low_Surrogates" -> `Low_Surrogates 1557 + | "Lycian" -> `Lycian 1558 + | "Lydian" -> `Lydian 1559 + | "Mahajani" -> `Mahajani 1560 + | "Mahjong" -> `Mahjong 1561 + | "Makasar" -> `Makasar 1562 + | "Malayalam" -> `Malayalam 1563 + | "Mandaic" -> `Mandaic 1564 + | "Manichaean" -> `Manichaean 1565 + | "Marchen" -> `Marchen 1566 + | "Masaram_Gondi" -> `Masaram_Gondi 1567 + | "Math_Alphanum" -> `Math_Alphanum 1568 + | "Math_Operators" -> `Math_Operators 1569 + | "Mayan_Numerals" -> `Mayan_Numerals 1570 + | "Medefaidrin" -> `Medefaidrin 1571 + | "Meetei_Mayek" -> `Meetei_Mayek 1572 + | "Meetei_Mayek_Ext" -> `Meetei_Mayek_Ext 1573 + | "Mende_Kikakui" -> `Mende_Kikakui 1574 + | "Meroitic_Cursive" -> `Meroitic_Cursive 1575 + | "Meroitic_Hieroglyphs" -> `Meroitic_Hieroglyphs 1576 + | "Miao" -> `Miao 1577 + | "Misc_Arrows" -> `Misc_Arrows 1578 + | "Misc_Math_Symbols_A" -> `Misc_Math_Symbols_A 1579 + | "Misc_Math_Symbols_B" -> `Misc_Math_Symbols_B 1580 + | "Misc_Pictographs" -> `Misc_Pictographs 1581 + | "Misc_Symbols" -> `Misc_Symbols 1582 + | "Misc_Symbols_Sup" -> `Misc_Symbols_Sup 1583 + | "Misc_Technical" -> `Misc_Technical 1584 + | "Modi" -> `Modi 1585 + | "Modifier_Letters" -> `Modifier_Letters 1586 + | "Modifier_Tone_Letters" -> `Modifier_Tone_Letters 1587 + | "Mongolian" -> `Mongolian 1588 + | "Mongolian_Sup" -> `Mongolian_Sup 1589 + | "Mro" -> `Mro 1590 + | "Multani" -> `Multani 1591 + | "Music" -> `Music 1592 + | "Myanmar" -> `Myanmar 1593 + | "Myanmar_Ext_A" -> `Myanmar_Ext_A 1594 + | "Myanmar_Ext_B" -> `Myanmar_Ext_B 1595 + | "Myanmar_Ext_C" -> `Myanmar_Ext_C 1596 + | "NB" -> `NB 1597 + | "NKo" -> `NKo 1598 + | "Nabataean" -> `Nabataean 1599 + | "Nag_Mundari" -> `Nag_Mundari 1600 + | "Nandinagari" -> `Nandinagari 1601 + | "New_Tai_Lue" -> `New_Tai_Lue 1602 + | "Newa" -> `Newa 1603 + | "Number_Forms" -> `Number_Forms 1604 + | "Nushu" -> `Nushu 1605 + | "Nyiakeng_Puachue_Hmong" -> `Nyiakeng_Puachue_Hmong 1606 + | "OCR" -> `OCR 1607 + | "Ogham" -> `Ogham 1608 + | "Ol_Chiki" -> `Ol_Chiki 1609 + | "Ol_Onal" -> `Ol_Onal 1610 + | "Old_Hungarian" -> `Old_Hungarian 1611 + | "Old_Italic" -> `Old_Italic 1612 + | "Old_North_Arabian" -> `Old_North_Arabian 1613 + | "Old_Permic" -> `Old_Permic 1614 + | "Old_Persian" -> `Old_Persian 1615 + | "Old_Sogdian" -> `Old_Sogdian 1616 + | "Old_South_Arabian" -> `Old_South_Arabian 1617 + | "Old_Turkic" -> `Old_Turkic 1618 + | "Old_Uyghur" -> `Old_Uyghur 1619 + | "Oriya" -> `Oriya 1620 + | "Ornamental_Dingbats" -> `Ornamental_Dingbats 1621 + | "Osage" -> `Osage 1622 + | "Osmanya" -> `Osmanya 1623 + | "Ottoman_Siyaq_Numbers" -> `Ottoman_Siyaq_Numbers 1624 + | "PUA" -> `PUA 1625 + | "Pahawh_Hmong" -> `Pahawh_Hmong 1626 + | "Palmyrene" -> `Palmyrene 1627 + | "Pau_Cin_Hau" -> `Pau_Cin_Hau 1628 + | "Phags_Pa" -> `Phags_Pa 1629 + | "Phaistos" -> `Phaistos 1630 + | "Phoenician" -> `Phoenician 1631 + | "Phonetic_Ext" -> `Phonetic_Ext 1632 + | "Phonetic_Ext_Sup" -> `Phonetic_Ext_Sup 1633 + | "Playing_Cards" -> `Playing_Cards 1634 + | "Psalter_Pahlavi" -> `Psalter_Pahlavi 1635 + | "Punctuation" -> `Punctuation 1636 + | "Rejang" -> `Rejang 1637 + | "Rumi" -> `Rumi 1638 + | "Runic" -> `Runic 1639 + | "Samaritan" -> `Samaritan 1640 + | "Saurashtra" -> `Saurashtra 1641 + | "Sharada" -> `Sharada 1642 + | "Sharada_Sup" -> `Sharada_Sup 1643 + | "Shavian" -> `Shavian 1644 + | "Shorthand_Format_Controls" -> `Shorthand_Format_Controls 1645 + | "Siddham" -> `Siddham 1646 + | "Sidetic" -> `Sidetic 1647 + | "Sinhala" -> `Sinhala 1648 + | "Sinhala_Archaic_Numbers" -> `Sinhala_Archaic_Numbers 1649 + | "Small_Forms" -> `Small_Forms 1650 + | "Small_Kana_Ext" -> `Small_Kana_Ext 1651 + | "Sogdian" -> `Sogdian 1652 + | "Sora_Sompeng" -> `Sora_Sompeng 1653 + | "Soyombo" -> `Soyombo 1654 + | "Specials" -> `Specials 1655 + | "Sundanese" -> `Sundanese 1656 + | "Sundanese_Sup" -> `Sundanese_Sup 1657 + | "Sunuwar" -> `Sunuwar 1658 + | "Sup_Arrows_A" -> `Sup_Arrows_A 1659 + | "Sup_Arrows_B" -> `Sup_Arrows_B 1660 + | "Sup_Arrows_C" -> `Sup_Arrows_C 1661 + | "Sup_Math_Operators" -> `Sup_Math_Operators 1662 + | "Sup_PUA_A" -> `Sup_PUA_A 1663 + | "Sup_PUA_B" -> `Sup_PUA_B 1664 + | "Sup_Punctuation" -> `Sup_Punctuation 1665 + | "Sup_Symbols_And_Pictographs" -> `Sup_Symbols_And_Pictographs 1666 + | "Super_And_Sub" -> `Super_And_Sub 1667 + | "Sutton_SignWriting" -> `Sutton_SignWriting 1668 + | "Syloti_Nagri" -> `Syloti_Nagri 1669 + | "Symbols_And_Pictographs_Ext_A" -> `Symbols_And_Pictographs_Ext_A 1670 + | "Symbols_For_Legacy_Computing" -> `Symbols_For_Legacy_Computing 1671 + | "Symbols_For_Legacy_Computing_Sup" -> `Symbols_For_Legacy_Computing_Sup 1672 + | "Syriac" -> `Syriac 1673 + | "Syriac_Sup" -> `Syriac_Sup 1674 + | "Tagalog" -> `Tagalog 1675 + | "Tagbanwa" -> `Tagbanwa 1676 + | "Tags" -> `Tags 1677 + | "Tai_Le" -> `Tai_Le 1678 + | "Tai_Tham" -> `Tai_Tham 1679 + | "Tai_Viet" -> `Tai_Viet 1680 + | "Tai_Xuan_Jing" -> `Tai_Xuan_Jing 1681 + | "Tai_Yo" -> `Tai_Yo 1682 + | "Takri" -> `Takri 1683 + | "Tamil" -> `Tamil 1684 + | "Tamil_Sup" -> `Tamil_Sup 1685 + | "Tangsa" -> `Tangsa 1686 + | "Tangut" -> `Tangut 1687 + | "Tangut_Components" -> `Tangut_Components 1688 + | "Tangut_Components_Sup" -> `Tangut_Components_Sup 1689 + | "Tangut_Sup" -> `Tangut_Sup 1690 + | "Telugu" -> `Telugu 1691 + | "Thaana" -> `Thaana 1692 + | "Thai" -> `Thai 1693 + | "Tibetan" -> `Tibetan 1694 + | "Tifinagh" -> `Tifinagh 1695 + | "Tirhuta" -> `Tirhuta 1696 + | "Todhri" -> `Todhri 1697 + | "Tolong_Siki" -> `Tolong_Siki 1698 + | "Toto" -> `Toto 1699 + | "Transport_And_Map" -> `Transport_And_Map 1700 + | "Tulu_Tigalari" -> `Tulu_Tigalari 1701 + | "UCAS" -> `UCAS 1702 + | "UCAS_Ext" -> `UCAS_Ext 1703 + | "UCAS_Ext_A" -> `UCAS_Ext_A 1704 + | "Ugaritic" -> `Ugaritic 1705 + | "VS" -> `VS 1706 + | "VS_Sup" -> `VS_Sup 1707 + | "Vai" -> `Vai 1708 + | "Vedic_Ext" -> `Vedic_Ext 1709 + | "Vertical_Forms" -> `Vertical_Forms 1710 + | "Vithkuqi" -> `Vithkuqi 1711 + | "Wancho" -> `Wancho 1712 + | "Warang_Citi" -> `Warang_Citi 1713 + | "Yezidi" -> `Yezidi 1714 + | "Yi_Radicals" -> `Yi_Radicals 1715 + | "Yi_Syllables" -> `Yi_Syllables 1716 + | "Yijing" -> `Yijing 1717 + | "Zanabazar_Square" -> `Zanabazar_Square 1718 + | "Znamenny_Music" -> `Znamenny_Music 1719 + | v -> err (err_att_val v) 1720 + end 1721 + 1722 + let i_bool v = Bool_v begin match v with 1723 + | "Y" -> true | "N" -> false 1724 + | v -> err (err_att_val v) 1725 + end 1726 + 1727 + let i_bool_maybe v = Bool_maybe_v begin match v with 1728 + | "Y" -> `True | "N" -> `False | "M" -> `Maybe 1729 + | v -> err (err_att_val v) 1730 + end 1731 + 1732 + let i_cp v = Cp_v (cp_of_string v) 1733 + let i_cp_map v = 1734 + if v = "#" then Cp_map_v `Self else Cp_map_v (`Cp (cp_of_string v)) 1735 + 1736 + let i_cp_opt v = 1737 + if v = "" then Cp_opt_v None else Cp_opt_v (Some (cp_of_string v)) 1738 + 1739 + let i_cps ?empty v = Cps_v (cps_of_string ?empty v) 1740 + let i_cps_map ?empty v = 1741 + if v = "#" then Cps_map_v `Self else Cps_map_v (`Cps (cps_of_string ?empty v)) 1742 + 1743 + let i_decomposition_type v = Decomposition_type_v begin match v with 1744 + | "can" -> `Can 1745 + | "com" -> `Com 1746 + | "enc" -> `Enc 1747 + | "fin" -> `Fin 1748 + | "font" -> `Font 1749 + | "fra" -> `Fra 1750 + | "init" -> `Init 1751 + | "iso" -> `Iso 1752 + | "med" -> `Med 1753 + | "nar" -> `Nar 1754 + | "nb" -> `Nb 1755 + | "sml" -> `Sml 1756 + | "sqr" -> `Sqr 1757 + | "sub" -> `Sub 1758 + | "sup" -> `Sup 1759 + | "vert" -> `Vert 1760 + | "wide" -> `Wide 1761 + | "none" -> `None 1762 + | v -> err (err_att_val v) 1763 + end 1764 + 1765 + let i_east_asian_width v = East_asian_width_v begin match v with 1766 + | "A" -> `A 1767 + | "F" -> `F 1768 + | "H" -> `H 1769 + | "N" -> `N 1770 + | "Na" -> `Na 1771 + | "W" -> `W 1772 + | v -> err (err_att_val v) 1773 + end 1774 + 1775 + let i_general_category v = General_category_v begin match v with 1776 + | "Lu" -> `Lu 1777 + | "Ll" -> `Ll 1778 + | "Lt" -> `Lt 1779 + | "Lm" -> `Lm 1780 + | "Lo" -> `Lo 1781 + | "Mn" -> `Mn 1782 + | "Mc" -> `Mc 1783 + | "Me" -> `Me 1784 + | "Nd" -> `Nd 1785 + | "Nl" -> `Nl 1786 + | "No" -> `No 1787 + | "Pc" -> `Pc 1788 + | "Pd" -> `Pd 1789 + | "Ps" -> `Ps 1790 + | "Pe" -> `Pe 1791 + | "Pi" -> `Pi 1792 + | "Pf" -> `Pf 1793 + | "Po" -> `Po 1794 + | "Sm" -> `Sm 1795 + | "Sc" ->`Sc 1796 + | "Sk" -> `Sk 1797 + | "So" -> `So 1798 + | "Zs" -> `Zs 1799 + | "Zl" -> `Zl 1800 + | "Zp" -> `Zp 1801 + | "Cc" -> `Cc 1802 + | "Cf" -> `Cf 1803 + | "Cs" -> `Cs 1804 + | "Co" -> `Co 1805 + | "Cn" -> `Cn 1806 + | v -> err (err_att_val v) 1807 + end 1808 + 1809 + let i_grapheme_cluster_break v = Grapheme_cluster_break_v begin match v with 1810 + | "CN" -> `CN 1811 + | "CR" -> `CR 1812 + | "EB" -> `EB 1813 + | "EBG" -> `EBG 1814 + | "EM" -> `EM 1815 + | "EX" -> `EX 1816 + | "GAZ" -> `GAZ 1817 + | "L" -> `L 1818 + | "LF" -> `LF 1819 + | "LV" -> `LV 1820 + | "LVT" -> `LVT 1821 + | "PP" -> `PP 1822 + | "RI" -> `RI 1823 + | "SM" -> `SM 1824 + | "T" -> `T 1825 + | "V" -> `V 1826 + | "XX" -> `XX 1827 + | "ZWJ" -> `ZWJ 1828 + | v -> err (err_att_val v) 1829 + end 1830 + 1831 + let i_hangul_syllable_type v = Hangul_syllable_type_v begin match v with 1832 + | "L" -> `L 1833 + | "LV" -> `LV 1834 + | "LVT" -> `LVT 1835 + | "T" -> `T 1836 + | "V" -> `V 1837 + | "NA" -> `NA 1838 + | v -> err (err_att_val v) 1839 + end 1840 + 1841 + let i_int v = try Int_v (int_of_string v) with Failure _ -> err (err_att_val v) 1842 + let i_indic_conjunct_break v = Indic_conjunct_break_v begin match v with 1843 + | "Consonant" -> `Consonant 1844 + | "Extend" -> `Extend 1845 + | "Linker" -> `Linker 1846 + | "None" -> `None 1847 + | v -> err (err_att_val v) 1848 + end 1849 + 1850 + let i_indic_syllabic_category v = Indic_syllabic_category_v begin match v with 1851 + | "Avagraha" -> `Avagraha 1852 + | "Bindu" -> `Bindu 1853 + | "Brahmi_Joining_Number" -> `Brahmi_Joining_Number 1854 + | "Cantillation_Mark" -> `Cantillation_Mark 1855 + | "Consonant" -> `Consonant 1856 + | "Consonant_Dead" -> `Consonant_Dead 1857 + | "Consonant_Final" -> `Consonant_Final 1858 + | "Consonant_Head_Letter" -> `Consonant_Head_Letter 1859 + | "Consonant_Initial_Postfixed" -> `Consonant_Initial_Postfixed 1860 + | "Consonant_Killer" -> `Consonant_Killer 1861 + | "Consonant_Medial" -> `Consonant_Medial 1862 + | "Consonant_Placeholder" -> `Consonant_Placeholder 1863 + | "Consonant_Preceding_Repha" -> `Consonant_Preceding_Repha 1864 + | "Consonant_Prefixed" -> `Consonant_Prefixed 1865 + | "Consonant_Repha" -> `Consonant_Repha 1866 + | "Consonant_Subjoined" -> `Consonant_Subjoined 1867 + | "Consonant_Succeeding_Repha" -> `Consonant_Succeeding_Repha 1868 + | "Consonant_With_Stacker" -> `Consonant_With_Stacker 1869 + | "Gemination_Mark" -> `Gemination_Mark 1870 + | "Invisible_Stacker" -> `Invisible_Stacker 1871 + | "Joiner" -> `Joiner 1872 + | "Modifying_Letter" -> `Modifying_Letter 1873 + | "Non_Joiner" -> `Non_Joiner 1874 + | "Nukta" -> `Nukta 1875 + | "Number" -> `Number 1876 + | "Number_Joiner" -> `Number_Joiner 1877 + | "Other" -> `Other 1878 + | "Pure_Killer" -> `Pure_Killer 1879 + | "Reordering_Killer" -> `Reordering_Killer 1880 + | "Register_Shifter" -> `Register_Shifter 1881 + | "Syllable_Modifier" -> `Syllable_Modifier 1882 + | "Tone_Letter" -> `Tone_Letter 1883 + | "Tone_Mark" -> `Tone_Mark 1884 + | "Virama" -> `Virama 1885 + | "Visarga" -> `Visarga 1886 + | "Vowel" -> `Vowel 1887 + | "Vowel_Dependent" -> `Vowel_Dependent 1888 + | "Vowel_Independent" -> `Vowel_Independent 1889 + | v -> err (err_att_val v) 1890 + end 1891 + 1892 + let i_indic_matra_category v = Indic_matra_category_v begin match v with 1893 + | "Right" -> `Right 1894 + | "Left" -> `Left 1895 + | "Visual_Order_Left" -> `Visual_Order_Left 1896 + | "Left_And_Right" -> `Left_And_Right 1897 + | "Top" -> `Top 1898 + | "Bottom" -> `Bottom 1899 + | "Top_And_Bottom" -> `Top_And_Bottom 1900 + | "Top_And_Right" -> `Top_And_Right 1901 + | "Top_And_Left" -> `Top_And_Left 1902 + | "Top_And_Left_And_Right" -> `Top_And_Left_And_Right 1903 + | "Bottom_And_Right" -> `Bottom_And_Right 1904 + | "Top_And_Bottom_And_Right" -> `Top_And_Bottom_And_Right 1905 + | "Overstruck" -> `Overstruck 1906 + | "Invisible" -> `Invisible 1907 + | "NA" -> `NA 1908 + | v -> err (err_att_val v) 1909 + end 1910 + 1911 + let i_indic_positional_category v = Indic_positional_category_v 1912 + begin match v with 1913 + | "Bottom" -> `Bottom 1914 + | "Bottom_And_Left" -> `Bottom_And_Right 1915 + | "Bottom_And_Right" -> `Bottom_And_Right 1916 + | "Invisible" -> `Invisible 1917 + | "Left" -> `Left 1918 + | "Left_And_Right" -> `Left_And_Right 1919 + | "NA" -> `NA 1920 + | "Overstruck" -> `Overstruck 1921 + | "Right" -> `Right 1922 + | "Top" -> `Top 1923 + | "Top_And_Bottom" -> `Top_And_Bottom 1924 + | "Top_And_Bottom_And_Left" -> `Top_And_Bottom_And_Left 1925 + | "Top_And_Bottom_And_Right" -> `Top_And_Bottom_And_Right 1926 + | "Top_And_Left" -> `Top_And_Left 1927 + | "Top_And_Left_And_Right" -> `Top_And_Left_And_Right 1928 + | "Top_And_Right" -> `Top_And_Right 1929 + | "Visual_Order_Left" -> `Visual_Order_Left 1930 + | v -> err (err_att_val v) 1931 + end 1932 + 1933 + let i_joining_group v = Joining_group_v begin match v with 1934 + | "African_Feh" -> `African_Feh 1935 + | "African_Noon" -> `African_Noon 1936 + | "African_Qaf" -> `African_Qaf 1937 + | "Ain" -> `Ain 1938 + | "Alaph" -> `Alaph 1939 + | "Alef" -> `Alef 1940 + | "Alef_Maqsurah" -> `Alef_Maqsurah 1941 + | "Beh" -> `Beh 1942 + | "Beth" -> `Beth 1943 + | "Burushaski_Yeh_Barree" -> `Burushaski_Yeh_Barree 1944 + | "Dal" -> `Dal 1945 + | "Dalath_Rish" -> `Dalath_Rish 1946 + | "E" -> `E 1947 + | "Farsi_Yeh" -> `Farsi_Yeh 1948 + | "Fe" -> `Fe 1949 + | "Feh" -> `Feh 1950 + | "Final_Semkath" -> `Final_Semkath 1951 + | "Gaf" -> `Gaf 1952 + | "Gamal" -> `Gamal 1953 + | "Hah" -> `Hah 1954 + | "Hanifi_Rohingya_Kinna_Ya" -> `Hanifi_Rohingya_Kinna_Ya 1955 + | "Hanifi_Rohingya_Pa" -> `Hanifi_Rohingya_Pa 1956 + | "Hamza_On_Heh_Goal" -> `Hamza_On_Heh_Goal 1957 + | "He" -> `He 1958 + | "Heh" -> `Heh 1959 + | "Heh_Goal" -> `Heh_Goal 1960 + | "Heth" -> `Heth 1961 + | "Kaf" -> `Kaf 1962 + | "Kaph" -> `Kaph 1963 + | "Kashmiri_Yeh" -> `Kashmiri_Yeh 1964 + | "Khaph" -> `Khaph 1965 + | "Knotted_Heh" -> `Knotted_Heh 1966 + | "Lam" -> `Lam 1967 + | "Lamadh" -> `Lamadh 1968 + | "Malayalam_Bha" -> `Malayalam_Bha 1969 + | "Malayalam_Ja" -> `Malayalam_Ja 1970 + | "Malayalam_Lla" -> `Malayalam_Lla 1971 + | "Malayalam_Llla" -> `Malayalam_Llla 1972 + | "Malayalam_Nna" -> `Malayalam_Nna 1973 + | "Malayalam_Nnna" -> `Malayalam_Nnna 1974 + | "Malayalam_Nya" -> `Malayalam_Nya 1975 + | "Malayalam_Ra" -> `Malayalam_Ra 1976 + | "Malayalam_Ssa" -> `Malayalam_Ssa 1977 + | "Malayalam_Tta" -> `Malayalam_Tta 1978 + | "Malayalam_Nga" -> `Malayalam_Nga 1979 + | "Manichaean_Aleph" -> `Manichaean_Aleph 1980 + | "Manichaean_Ayin" -> `Manichaean_Ayin 1981 + | "Manichaean_Beth" -> `Manichaean_Beth 1982 + | "Manichaean_Daleth" -> `Manichaean_Daleth 1983 + | "Manichaean_Dhamedh" -> `Manichaean_Dhamedh 1984 + | "Manichaean_Five" -> `Manichaean_Five 1985 + | "Manichaean_Gimel" -> `Manichaean_Gimel 1986 + | "Manichaean_Heth" -> `Manichaean_Heth 1987 + | "Manichaean_Hundred" -> `Manichaean_Hundred 1988 + | "Manichaean_Kaph" -> `Manichaean_Kaph 1989 + | "Manichaean_Lamedh" -> `Manichaean_Lamedh 1990 + | "Manichaean_Mem" -> `Manichaean_Mem 1991 + | "Manichaean_Nun" -> `Manichaean_Nun 1992 + | "Manichaean_One" -> `Manichaean_One 1993 + | "Manichaean_Pe" -> `Manichaean_Pe 1994 + | "Manichaean_Qoph" -> `Manichaean_Qoph 1995 + | "Manichaean_Resh" -> `Manichaean_Resh 1996 + | "Manichaean_Sadhe" -> `Manichaean_Sadhe 1997 + | "Manichaean_Samekh" -> `Manichaean_Samekh 1998 + | "Manichaean_Taw" -> `Manichaean_Taw 1999 + | "Manichaean_Ten" -> `Manichaean_Ten 2000 + | "Manichaean_Teth" -> `Manichaean_Teth 2001 + | "Manichaean_Thamedh" -> `Manichaean_Thamedh 2002 + | "Manichaean_Twenty" -> `Manichaean_Twenty 2003 + | "Manichaean_Waw" -> `Manichaean_Waw 2004 + | "Manichaean_Yodh" -> `Manichaean_Yodh 2005 + | "Manichaean_Zayin" -> `Manichaean_Zayin 2006 + | "Meem" -> `Meem 2007 + | "Mim" -> `Mim 2008 + | "No_Joining_Group" -> `No_Joining_Group 2009 + | "Noon" -> `Noon 2010 + | "Nun" -> `Nun 2011 + | "Nya" -> `Nya 2012 + | "Pe" -> `Pe 2013 + | "Qaf" -> `Qaf 2014 + | "Qaph" -> `Qaph 2015 + | "Reh" -> `Reh 2016 + | "Reversed_Pe" -> `Reversed_Pe 2017 + | "Rohingya_Yeh" -> `Rohingya_Yeh 2018 + | "Sad" -> `Sad 2019 + | "Sadhe" -> `Sadhe 2020 + | "Seen" -> `Seen 2021 + | "Semkath" -> `Semkath 2022 + | "Shin" -> `Shin 2023 + | "Straight_Waw" -> `Straight_Waw 2024 + | "Swash_Kaf" -> `Swash_Kaf 2025 + | "Syriac_Waw" -> `Syriac_Waw 2026 + | "Tah" -> `Tah 2027 + | "Taw" -> `Taw 2028 + | "Teh_Marbuta" -> `Teh_Marbuta 2029 + | "Teh_Marbuta_Goal" -> `Teh_Marbuta_Goal 2030 + | "Teth" -> `Teth 2031 + | "Thin_Noon" -> `Thin_Noon 2032 + | "Thin_Yeh" -> `Thin_Yeh 2033 + | "Vertical_Tail" -> `Vertical_Tail 2034 + | "Waw" -> `Waw 2035 + | "Yeh" -> `Yeh 2036 + | "Yeh_Barree" -> `Yeh_Barree 2037 + | "Yeh_With_Tail" -> `Yeh_With_Tail 2038 + | "Yudh" -> `Yudh 2039 + | "Yudh_He" -> `Yudh_He 2040 + | "Zain" -> `Zain 2041 + | "Zhain" -> `Zhain 2042 + | "BAA" -> `BAA 2043 + | "FA" -> `FA 2044 + | "HAA" -> `HAA 2045 + | "HA_GOAL" -> `HA_GOAL 2046 + | "HA" -> `HA 2047 + | "CAF" -> `CAF 2048 + | "KNOTTED_HA" -> `KNOTTED_HA 2049 + | "RA" -> `RA 2050 + | "SWASH_CAF" -> `SWASH_CAF 2051 + | "HAMZAH_ON_HA_GOAL" -> `HAMZAH_ON_HA_GOAL 2052 + | "TAA_MARBUTAH" -> `TAA_MARBUTAH 2053 + | "YA_BARREE" -> `YA_BARREE 2054 + | "YA" -> `YA 2055 + | "ALEF_MAQSURAH " -> `ALEF_MAQSURAH 2056 + | v -> err (err_att_val v) 2057 + end 2058 + 2059 + let i_joining_type v = Joining_type_v begin match v with 2060 + | "U" -> `U 2061 + | "C" -> `C 2062 + | "T" -> `T 2063 + | "D" -> `D 2064 + | "L" -> `L 2065 + | "R" -> `R 2066 + | v -> err (err_att_val v) 2067 + end 2068 + 2069 + let i_line_break v = Line_break_v begin match v with 2070 + | "AI" -> `AI 2071 + | "AK" -> `AK 2072 + | "AL" -> `AL 2073 + | "AP" -> `AP 2074 + | "AS" -> `AS 2075 + | "B2" -> `B2 2076 + | "BA" -> `BA 2077 + | "BB" -> `BB 2078 + | "BK" -> `BK 2079 + | "CB" -> `CB 2080 + | "CJ" -> `CJ 2081 + | "CL" -> `CL 2082 + | "CM" -> `CM 2083 + | "CP" -> `CP 2084 + | "CR" -> `CR 2085 + | "EB" -> `EB 2086 + | "EM" -> `EM 2087 + | "EX" -> `EX 2088 + | "GL" -> `GL 2089 + | "H2" -> `H2 2090 + | "H3" -> `H3 2091 + | "HH" -> `HH 2092 + | "HL" -> `HL 2093 + | "HY" -> `HY 2094 + | "ID" -> `ID 2095 + | "IN" -> `IN 2096 + | "IS" -> `IS 2097 + | "JL" -> `JL 2098 + | "JT" -> `JT 2099 + | "JV" -> `JV 2100 + | "LF" -> `LF 2101 + | "NL" -> `NL 2102 + | "NS" -> `NS 2103 + | "NU" -> `NU 2104 + | "OP" -> `OP 2105 + | "PO" -> `PO 2106 + | "PR" -> `PR 2107 + | "QU" -> `QU 2108 + | "RI" -> `RI 2109 + | "SA" -> `SA 2110 + | "SG" -> `SG 2111 + | "SP" -> `SP 2112 + | "SY" -> `SY 2113 + | "VF" -> `VF 2114 + | "VI" -> `VI 2115 + | "WJ" -> `WJ 2116 + | "XX" -> `XX 2117 + | "ZW" -> `ZW 2118 + | "ZWJ" -> `ZWJ 2119 + | v -> err (err_att_val v) 2120 + end 2121 + 2122 + let i_name v = Name_v (if String.contains v '#' then `Pattern v else `Name v) 2123 + let i_name_alias_type = function 2124 + | "abbreviation" -> `Abbreviation 2125 + | "alternate" -> `Alternate 2126 + | "control" -> `Control 2127 + | "correction" -> `Correction 2128 + | "figment" -> `Figment 2129 + | v -> err (err_att_val v) 2130 + 2131 + let i_numeric_type v = Numeric_type_v begin match v with 2132 + | "None" -> `None 2133 + | "De" -> `De 2134 + | "Di" -> `Di 2135 + | "Nu" -> `Nu 2136 + | v -> err (err_att_val v) 2137 + end 2138 + 2139 + let i_numeric_value v = Numeric_value_v begin try match String.trim v with 2140 + | "NaN" -> `NaN 2141 + | s -> 2142 + let base s = match split_string (String.trim s) '/' with 2143 + | [num; denom] -> `Frac (int_of_string num, int_of_string denom) 2144 + | [num] -> `Num (Int64.of_string num) 2145 + | _ -> failwith "" 2146 + in 2147 + `Nums (List.map base (split_string s ' ')) 2148 + with Failure _ -> err (err_att_val v) 2149 + end 2150 + 2151 + let i_script v = Script_v begin match v with 2152 + | "Adlm" -> `Adlm 2153 + | "Aghb" -> `Aghb 2154 + | "Ahom" -> `Ahom 2155 + | "Arab" -> `Arab 2156 + | "Armi" -> `Armi 2157 + | "Armn" -> `Armn 2158 + | "Avst" -> `Avst 2159 + | "Bali" -> `Bali 2160 + | "Bamu" -> `Bamu 2161 + | "Bass" -> `Bass 2162 + | "Batk" -> `Batk 2163 + | "Beng" -> `Beng 2164 + | "Berf" -> `Berf 2165 + | "Bhks" -> `Bhks 2166 + | "Bopo" -> `Bopo 2167 + | "Brah" -> `Brah 2168 + | "Brai" -> `Brai 2169 + | "Bugi" -> `Bugi 2170 + | "Buhd" -> `Buhd 2171 + | "Cakm" -> `Cakm 2172 + | "Cans" -> `Cans 2173 + | "Cari" -> `Cari 2174 + | "Cham" -> `Cham 2175 + | "Cher" -> `Cher 2176 + | "Chrs" -> `Chrs 2177 + | "Copt" -> `Copt 2178 + | "Cpmn" -> `Cpmn 2179 + | "Cprt" -> `Cprt 2180 + | "Cyrl" -> `Cyrl 2181 + | "Deva" -> `Deva 2182 + | "Diak" -> `Diak 2183 + | "Dogr" -> `Dogr 2184 + | "Dsrt" -> `Dsrt 2185 + | "Dupl" -> `Dupl 2186 + | "Egyp" -> `Egyp 2187 + | "Elba" -> `Elba 2188 + | "Elym" -> `Elym 2189 + | "Ethi" -> `Ethi 2190 + | "Gara" -> `Gara 2191 + | "Geor" -> `Geor 2192 + | "Glag" -> `Glag 2193 + | "Gong" -> `Gong 2194 + | "Gonm" -> `Gonm 2195 + | "Goth" -> `Goth 2196 + | "Gran" -> `Gran 2197 + | "Grek" -> `Grek 2198 + | "Gujr" -> `Gujr 2199 + | "Gukh" -> `Gukh 2200 + | "Guru" -> `Guru 2201 + | "Hang" -> `Hang 2202 + | "Hani" -> `Hani 2203 + | "Hano" -> `Hano 2204 + | "Hatr" -> `Hatr 2205 + | "Hebr" -> `Hebr 2206 + | "Hira" -> `Hira 2207 + | "Hluw" -> `Hluw 2208 + | "Hmng" -> `Hmng 2209 + | "Hmnp" -> `Hmnp 2210 + | "Hrkt" -> `Hrkt 2211 + | "Hung" -> `Hung 2212 + | "Ital" -> `Ital 2213 + | "Java" -> `Java 2214 + | "Kali" -> `Kali 2215 + | "Kana" -> `Kana 2216 + | "Kawi" -> `Kawi 2217 + | "Khar" -> `Khar 2218 + | "Khmr" -> `Khmr 2219 + | "Khoj" -> `Khoj 2220 + | "Knda" -> `Knda 2221 + | "Krai" -> `Krai 2222 + | "Kthi" -> `Kthi 2223 + | "Kits" -> `Kits 2224 + | "Lana" -> `Lana 2225 + | "Laoo" -> `Laoo 2226 + | "Latn" -> `Latn 2227 + | "Lepc" -> `Lepc 2228 + | "Limb" -> `Limb 2229 + | "Lina" -> `Lina 2230 + | "Linb" -> `Linb 2231 + | "Lisu" -> `Lisu 2232 + | "Lyci" -> `Lyci 2233 + | "Lydi" -> `Lydi 2234 + | "Mahj" -> `Mahj 2235 + | "Maka" -> `Maka 2236 + | "Mand" -> `Mand 2237 + | "Mani" -> `Mani 2238 + | "Marc" -> `Marc 2239 + | "Medf" -> `Medf 2240 + | "Mend" -> `Mend 2241 + | "Merc" -> `Merc 2242 + | "Mero" -> `Mero 2243 + | "Mlym" -> `Mlym 2244 + | "Modi" -> `Modi 2245 + | "Mong" -> `Mong 2246 + | "Mroo" -> `Mroo 2247 + | "Mtei" -> `Mtei 2248 + | "Mult" -> `Mult 2249 + | "Mymr" -> `Mymr 2250 + | "Nagm" -> `Nagm 2251 + | "Nand" -> `Nand 2252 + | "Narb" -> `Narb 2253 + | "Nbat" -> `Nbat 2254 + | "Newa" -> `Newa 2255 + | "Nkoo" -> `Nkoo 2256 + | "Nshu" -> `Nshu 2257 + | "Ogam" -> `Ogam 2258 + | "Olck" -> `Olck 2259 + | "Onao" -> `Onao 2260 + | "Orkh" -> `Orkh 2261 + | "Orya" -> `Orya 2262 + | "Osge" -> `Osge 2263 + | "Osma" -> `Osma 2264 + | "Ougr" -> `Ougr 2265 + | "Palm" -> `Palm 2266 + | "Pauc" -> `Pauc 2267 + | "Perm" -> `Perm 2268 + | "Phag" -> `Phag 2269 + | "Phli" -> `Phli 2270 + | "Phlp" -> `Phlp 2271 + | "Phnx" -> `Phnx 2272 + | "Plrd" -> `Plrd 2273 + | "Prti" -> `Prti 2274 + | "Qaai" -> `Qaai 2275 + | "Rjng" -> `Rjng 2276 + | "Rohg" -> `Rohg 2277 + | "Runr" -> `Runr 2278 + | "Samr" -> `Samr 2279 + | "Sarb" -> `Sarb 2280 + | "Saur" -> `Saur 2281 + | "Sgnw" -> `Sgnw 2282 + | "Shaw" -> `Shaw 2283 + | "Shrd" -> `Shrd 2284 + | "Sidd" -> `Sidd 2285 + | "Sidt" -> `Sidt 2286 + | "Sind" -> `Sind 2287 + | "Sinh" -> `Sinh 2288 + | "Sogd" -> `Sogd 2289 + | "Sogo" -> `Sogo 2290 + | "Sora" -> `Sora 2291 + | "Soyo" -> `Soyo 2292 + | "Sund" -> `Sund 2293 + | "Sunu" -> `Sunu 2294 + | "Sylo" -> `Sylo 2295 + | "Syrc" -> `Syrc 2296 + | "Tagb" -> `Tagb 2297 + | "Takr" -> `Takr 2298 + | "Tale" -> `Tale 2299 + | "Talu" -> `Talu 2300 + | "Taml" -> `Taml 2301 + | "Tang" -> `Tang 2302 + | "Tavt" -> `Tavt 2303 + | "Tayo" -> `Tayo 2304 + | "Telu" -> `Telu 2305 + | "Tfng" -> `Tfng 2306 + | "Tglg" -> `Tglg 2307 + | "Thaa" -> `Thaa 2308 + | "Thai" -> `Thai 2309 + | "Tibt" -> `Tibt 2310 + | "Tirh" -> `Tirh 2311 + | "Tnsa" -> `Tnsa 2312 + | "Todr" -> `Todr 2313 + | "Tols" -> `Tols 2314 + | "Toto" -> `Toto 2315 + | "Tutg" -> `Tutg 2316 + | "Ugar" -> `Ugar 2317 + | "Vaii" -> `Vaii 2318 + | "Vith" -> `Vith 2319 + | "Wara" -> `Wara 2320 + | "Wcho" -> `Wcho 2321 + | "Xpeo" -> `Xpeo 2322 + | "Xsux" -> `Xsux 2323 + | "Yezi" -> `Yezi 2324 + | "Yiii" -> `Yiii 2325 + | "Zanb" -> `Zanb 2326 + | "Zinh" -> `Zinh 2327 + | "Zyyy" -> `Zyyy 2328 + | "Zzzz" -> `Zzzz 2329 + | v -> err (err_att_val v) 2330 + end 2331 + 2332 + let i_script_seq v = 2333 + let script v = o_script (i_script v) in 2334 + Script_extensions_v (List.map script (split_string v ' ')) 2335 + 2336 + let i_sentence_break v = Sentence_break_v begin match v with 2337 + | "AT" -> `AT 2338 + | "CL" -> `CL 2339 + | "CR" -> `CR 2340 + | "EX" -> `EX 2341 + | "FO" -> `FO 2342 + | "LE" -> `LE 2343 + | "LF" -> `LF 2344 + | "LO" -> `LO 2345 + | "NU" -> `NU 2346 + | "SC" -> `SC 2347 + | "SE" -> `SE 2348 + | "SP" -> `SP 2349 + | "ST" -> `ST 2350 + | "UP" -> `UP 2351 + | "XX" -> `XX 2352 + | v -> err (err_att_val v) 2353 + end 2354 + 2355 + let i_string v = String_v v 2356 + let i_uax_42_element v = UAX_42_element_v begin match v with 2357 + | "reserved" -> `Reserved 2358 + | "noncharacter" -> `Noncharacter 2359 + | "surrogate" -> `Surrogate 2360 + | "char" -> `Char 2361 + | s -> err (err_att_val s) 2362 + end 2363 + 2364 + let i_vertical_orientation v = Vertical_orientation_v begin match v with 2365 + | "U" -> `U 2366 + | "R" -> `R 2367 + | "Tu" -> `Tu 2368 + | "Tr" -> `Tr 2369 + | s -> err (err_att_val s) 2370 + end 2371 + 2372 + let i_word_break v = Word_break_v begin match v with 2373 + | "CR" -> `CR 2374 + | "DQ" -> `DQ 2375 + | "EB" -> `EB 2376 + | "EBG" -> `EBG 2377 + | "EM" -> `EM 2378 + | "EX" -> `EX 2379 + | "Extend" -> `Extend 2380 + | "FO" -> `FO 2381 + | "GAZ" -> `GAZ 2382 + | "HL" -> `HL 2383 + | "KA" -> `KA 2384 + | "LE" -> `LE 2385 + | "LF" -> `LF 2386 + | "MB" -> `MB 2387 + | "ML" -> `ML 2388 + | "MN" -> `MN 2389 + | "NL" -> `NL 2390 + | "NU" -> `NU 2391 + | "RI" -> `RI 2392 + | "SQ" -> `SQ 2393 + | "WSegSpace" -> `WSegSpace 2394 + | "XX" -> `XX 2395 + | "ZWJ" -> `ZWJ 2396 + | v -> err (err_att_val v) 2397 + end 2398 + 2399 + module Pkey = struct type t = key let compare : key -> key -> int = compare end 2400 + module Pmap = Map.Make (Pkey) 2401 + type props = value Pmap.t 2402 + type 'a prop = key * (value -> 'a) (* property key and value projection. *) 2403 + 2404 + let find props (k, o) = try Some (o (Pmap.find k props)) with Not_found -> None 2405 + let unknown_prop name = (Other name), o_string 2406 + 2407 + 2408 + (* non hunihan and unikemet properties *) 2409 + 2410 + let uax_42_element = UAX_42_element, o_uax_42_element (* artefact of Uucd *) 2411 + 2412 + let age = Age, o_age 2413 + let alphabetic = Alphabetic, o_bool 2414 + let ascii_hex_digit = Ascii_hex_digit, o_bool 2415 + let bidi_class = Bidi_class, o_bidi_class 2416 + let bidi_control = Bidi_control, o_bool 2417 + let bidi_mirrored = Bidi_mirrored, o_bool 2418 + let bidi_mirroring_glyph = Bidi_mirroring_glyph, o_cp_opt 2419 + let bidi_paired_bracket = Bidi_paired_bracket, o_cp_map 2420 + let bidi_paired_bracket_type = 2421 + Bidi_paired_bracket_type, o_bidi_paired_bracket_type 2422 + 2423 + let block = Block, o_block 2424 + let canonical_combining_class = Canonical_combining_class, o_int 2425 + let cased = Cased, o_bool 2426 + let case_folding = Case_folding, o_cps_map 2427 + let case_ignorable = Case_ignorable, o_bool 2428 + let changes_when_casefolded = Changes_when_casefolded, o_bool 2429 + let changes_when_casemapped = Changes_when_casemapped, o_bool 2430 + let changes_when_lowercased = Changes_when_lowercased, o_bool 2431 + let changes_when_nfkc_casefolded = Changes_when_nfkc_casefolded, o_bool 2432 + let changes_when_titlecased = Changes_when_titlecased, o_bool 2433 + let changes_when_uppercased = Changes_when_uppercased, o_bool 2434 + let composition_exclusion = Composition_exclusion, o_bool 2435 + let dash = Dash, o_bool 2436 + let decomposition_mapping = Decomposition_mapping, o_cps_map 2437 + let decomposition_type = Decomposition_type, o_decomposition_type 2438 + let default_ignorable_code_point = Default_ignorable_code_point, o_bool 2439 + let deprecated = Deprecated, o_bool 2440 + let diacritic = Diacritic, o_bool 2441 + let east_asian_width = East_asian_width, o_east_asian_width 2442 + let emoji = Emoji, o_bool 2443 + let emoji_presentation = Emoji_presentation, o_bool 2444 + let emoji_modifier = Emoji_modifier, o_bool 2445 + let emoji_modifier_base = Emoji_modifier_base, o_bool 2446 + let emoji_component = Emoji_component, o_bool 2447 + let equivalent_unified_ideograph = Equivalent_unified_ideograph, o_cp_opt 2448 + let extended_pictographic = Extended_pictographic, o_bool 2449 + let extender = Extender, o_bool 2450 + let full_composition_exclusion = Full_composition_exclusion, o_bool 2451 + let general_category = General_category, o_general_category 2452 + let grapheme_base = Grapheme_base, o_bool 2453 + let grapheme_cluster_break = Grapheme_cluster_break, o_grapheme_cluster_break 2454 + let grapheme_extend = Grapheme_extend, o_bool 2455 + let hangul_syllable_type = Hangul_syllable_type, o_hangul_syllable_type 2456 + let hex_digit = Hex_digit, o_bool 2457 + let id_continue = Id_continue, o_bool 2458 + let id_compat_math_continue = Id_compat_math_continue, o_bool 2459 + let id_compat_math_start = Id_compat_math_start, o_bool 2460 + let id_start = Id_start, o_bool 2461 + let ideographic = Ideographic, o_bool 2462 + let ids_binary_operator = Ids_binary_operator, o_bool 2463 + let ids_trinary_operator = Ids_trinary_operator, o_bool 2464 + let ids_unary_operator = Ids_unary_operator, o_bool 2465 + let indic_conjunct_break = Indic_conjunct_break, o_indic_conjunct_break 2466 + let indic_syllabic_category = Indic_syllabic_category, o_indic_syllabic_category 2467 + let indic_matra_category = Indic_matra_category, o_indic_matra_category 2468 + let indic_positional_category = 2469 + Indic_positional_category, o_indic_positional_category 2470 + let jamo_short_name = Jamo_short_name, o_string 2471 + let join_control = Join_control, o_bool 2472 + let joining_group = Joining_group, o_joining_group 2473 + let joining_type = Joining_type, o_joining_type 2474 + let line_break = Line_break, o_line_break 2475 + let logical_order_exception = Logical_order_exception, o_bool 2476 + let lowercase = Lowercase, o_bool 2477 + let lowercase_mapping = Lowercase_mapping, o_cps_map 2478 + let math = Math, o_bool 2479 + let modifier_combining_mark = Modifier_combining_mark, o_bool 2480 + let name = Name, o_name 2481 + let name_alias = Name_alias, o_name_alias 2482 + let nfc_quick_check = Nfc_quick_check, o_bool_maybe 2483 + let nfd_quick_check = Nfd_quick_check, o_bool_maybe 2484 + let nfkc_quick_check = Nfkc_quick_check, o_bool_maybe 2485 + let nfkc_casefold = Nfkc_casefold, o_cps_map 2486 + let nfkc_simple_casefold = Nfkc_simple_casefold, o_cps_map 2487 + let nfkd_quick_check = Nfkd_quick_check, o_bool_maybe 2488 + let noncharacter_code_point = Noncharacter_code_point, o_bool 2489 + let numeric_type = Numeric_type, o_numeric_type 2490 + let numeric_value = Numeric_value, o_numeric_value 2491 + let other_alphabetic = Other_alphabetic, o_bool 2492 + let other_default_ignorable_code_point = 2493 + Other_default_ignorable_code_point, o_bool 2494 + 2495 + let other_grapheme_extend = Other_grapheme_extend, o_bool 2496 + let other_id_continue = Other_id_continue, o_bool 2497 + let other_id_start = Other_id_start, o_bool 2498 + let other_lowercase = Other_lowercase, o_bool 2499 + let other_math = Other_math, o_bool 2500 + let other_uppercase = Other_uppercase, o_bool 2501 + let pattern_syntax = Pattern_syntax, o_bool 2502 + let pattern_white_space = Pattern_white_space, o_bool 2503 + let prepended_concatenation_mark = Prepended_concatenation_mark, o_bool 2504 + let quotation_mark = Quotation_mark, o_bool 2505 + let radical = Radical, o_bool 2506 + let regional_indicator = Regional_indicator, o_bool 2507 + let script = Script, o_script 2508 + let script_extensions = Script_extensions, o_script_extensions 2509 + let sentence_break = Sentence_break, o_sentence_break 2510 + let simple_case_folding = Simple_case_folding, o_cp_map 2511 + let simple_lowercase_mapping = Simple_lowercase_mapping, o_cp_map 2512 + let simple_titlecase_mapping = Simple_titlecase_mapping, o_cp_map 2513 + let simple_uppercase_mapping = Simple_uppercase_mapping, o_cp_map 2514 + let soft_dotted = Soft_dotted, o_bool 2515 + let sterm = Sterm, o_bool 2516 + let terminal_punctuation = Terminal_punctuation, o_bool 2517 + let titlecase_mapping = Titlecase_mapping, o_cps_map 2518 + let unicode_1_name = Unicode_1_name, o_string 2519 + let unified_ideograph = Unified_ideograph, o_bool 2520 + let uppercase = Uppercase, o_bool 2521 + let uppercase_mapping = Uppercase_mapping, o_cps_map 2522 + let variation_selector = Variation_selector, o_bool 2523 + let vertical_orientation = Vertical_orientation, o_vertical_orientation 2524 + let white_space = White_space, o_bool 2525 + let word_break = Word_break, o_word_break 2526 + let xid_continue = Xid_continue, o_bool 2527 + let xid_start = Xid_start, o_bool 2528 + 2529 + (* unihan properties *) 2530 + 2531 + let kAccountingNumeric = KAccountingNumeric, o_string 2532 + let kAlternateHanYu = KAlternateHanYu, o_string 2533 + let kAlternateJEF = KAlternateJEF, o_string 2534 + let kAlternateKangXi = KAlternateKangXi, o_string 2535 + let kAlternateMorohashi = KAlternateMorohashi, o_string 2536 + let kAlternateTotalStrokes = KAlternateTotalStrokes, o_string 2537 + let kBigFive = KBigFive, o_string 2538 + let kCCCII = KCCCII, o_string 2539 + let kCNS1986 = KCNS1986, o_string 2540 + let kCNS1992 = KCNS1992, o_string 2541 + let kCangjie = KCangjie, o_string 2542 + let kCantonese = KCantonese, o_string 2543 + let kCheungBauer = KCheungBauer, o_string 2544 + let kCheungBauerIndex = KCheungBauerIndex, o_string 2545 + let kCihaiT = KCihaiT, o_string 2546 + let kCompatibilityVariant = KCompatibilityVariant, o_string 2547 + let kCowles = KCowles, o_string 2548 + let kDaeJaweon = KDaeJaweon, o_string 2549 + let kDefinition = KDefinition, o_string 2550 + let kEACC = KEACC, o_string 2551 + let kFanqie = KFanqie, o_string 2552 + let kFenn = KFenn, o_string 2553 + let kFennIndex = KFennIndex, o_string 2554 + let kFourCornerCode = KFourCornerCode, o_string 2555 + let kFrequency = KFrequency, o_string 2556 + let kGB0 = KGB0, o_string 2557 + let kGB1 = KGB1, o_string 2558 + let kGB3 = KGB3, o_string 2559 + let kGB5 = KGB5, o_string 2560 + let kGB8 = KGB8, o_string 2561 + let kGSR = KGSR, o_string 2562 + let kGradeLevel = KGradeLevel, o_string 2563 + let kHDZRadBreak = KHDZRadBreak, o_string 2564 + let kHKGlyph = KHKGlyph, o_string 2565 + let kHKSCS = KHKSCS, o_string 2566 + let kHanYu = KHanYu, o_string 2567 + let kHangul = KHangul, o_string 2568 + let kHanyuPinlu = KHanyuPinlu, o_string 2569 + let kHanyuPinyin = KHanyuPinyin, o_string 2570 + let kIBMJapan = KIBMJapan, o_string 2571 + let kIICore = KIICore, o_string 2572 + let kIRGDaeJaweon = KIRGDaeJaweon, o_string 2573 + let kIRGDaiKanwaZiten = KIRGDaiKanwaZiten, o_string 2574 + let kIRGHanyuDaZidian = KIRGHanyuDaZidian, o_string 2575 + let kIRGKangXi = KIRGKangXi, o_string 2576 + let kIRG_GSource = KIRG_GSource, o_string 2577 + let kIRG_HSource = KIRG_HSource, o_string 2578 + let kIRG_JSource = KIRG_JSource, o_string 2579 + let kIRG_KPSource = KIRG_KPSource, o_string 2580 + let kIRG_KSource = KIRG_KSource, o_string 2581 + let kIRG_MSource = KIRG_MSource, o_string 2582 + let kIRG_SSource = KIRG_SSource, o_string 2583 + let kIRG_TSource = KIRG_TSource, o_string 2584 + let kIRG_USource = KIRG_USource, o_string 2585 + let kIRG_UKSource = KIRG_UKSource, o_string 2586 + let kIRG_VSource = KIRG_VSource, o_string 2587 + let kJHJ = KJHJ, o_string 2588 + let kJIS0213 = KJIS0213, o_string 2589 + let kJapanese = KJapanese, o_string 2590 + let kJapaneseKun = KJapaneseKun, o_string 2591 + let kJapaneseOn = KJapaneseOn, o_string 2592 + let kJinmeiyoKanji = KJinmeiyoKanji, o_string 2593 + let kJis0 = KJis0, o_string 2594 + let kJis1 = KJis1, o_string 2595 + let kJoyoKanji = KJoyoKanji, o_string 2596 + let kKPS0 = KKPS0, o_string 2597 + let kKPS1 = KKPS1, o_string 2598 + let kKSC0 = KKSC0, o_string 2599 + let kKSC1 = KKSC1, o_string 2600 + let kKangXi = KKangXi, o_string 2601 + let kKarlgren = KKarlgren, o_string 2602 + let kKorean = KKorean, o_string 2603 + let kKoreanEducationHanja = KKoreanEducationHanja, o_string 2604 + let kKoreanName = KKoreanName, o_string 2605 + let kLau = KLau, o_string 2606 + let kMainlandTelegraph = KMainlandTelegraph, o_string 2607 + let kMandarin = KMandarin, o_string 2608 + let kMatthews = KMatthews, o_string 2609 + let kMeyerWempe = KMeyerWempe, o_string 2610 + let kMojiJoho = KMojiJoho, o_string 2611 + let kMorohashi = KMorohashi, o_string 2612 + let kNelson = KNelson, o_string 2613 + let kNSHU_DubenSrc = KNSHU_DubenSrc, o_string 2614 + let kNSHU_Reading = KNSHU_Reading, o_string 2615 + let kOtherNumeric = KOtherNumeric, o_string 2616 + let kPhonetic = KPhonetic, o_string 2617 + let kPrimaryNumeric = KPrimaryNumeric, o_string 2618 + let kPseudoGB1 = KPseudoGB1, o_string 2619 + let kRSAdobe_Japan1_6 = KRSAdobe_Japan1_6, o_string 2620 + let kRSJapanese = KRSJapanese, o_string 2621 + let kRSKanWa = KRSKanWa, o_string 2622 + let kRSKangXi = KRSKangXi, o_string 2623 + let kRSKorean = KRSKorean, o_string 2624 + let kRSMerged = KRSMerged, o_string 2625 + let kRSUnicode = KRSUnicode, o_string 2626 + let kSBGY = KSBGY, o_string 2627 + let kSemanticVariant = KSemanticVariant, o_string 2628 + let kSimplifiedVariant = KSimplifiedVariant, o_string 2629 + let kSMSZD2003Index = KSMSZD2003Index, o_string 2630 + let kSMSZD2003Readings = KSMSZD2003Readings, o_string 2631 + let kSpecializedSemanticVariant = KSpecializedSemanticVariant, o_string 2632 + let kSpoofingVariant = KSpoofingVariant, o_string 2633 + let kStrange = KStrange, o_string 2634 + let kTGH = KTGH, o_string 2635 + let kTGHZ2013 = KTGHZ2013, o_string 2636 + let kTGT_MergedSrc = KTGT_MergedSrc, o_string 2637 + let kTGT_RSUnicode = KTGT_RSUnicode, o_string 2638 + let kTaiwanTelegraph = KTaiwanTelegraph, o_string 2639 + let kTang = KTang, o_string 2640 + let kTayNumeric = KTayNumeric, o_string 2641 + let kTotalStrokes = KTotalStrokes, o_string 2642 + let kTraditionalVariant = KTraditionalVariant, o_string 2643 + let kUnihanCore2020 = KUnihanCore2020, o_string 2644 + let kVietnamese = KVietnamese, o_string 2645 + let kVietnameseNumeric = KVietnameseNumeric, o_string 2646 + let kWubi = KWubi, o_string 2647 + let kXHC1983 = KXHC1983, o_string 2648 + let kXerox = KXerox, o_string 2649 + let kZhuang = KZhuang, o_string 2650 + let kZhuangNumeric = KZhuangNumeric, o_string 2651 + let kZVariant = KZVariant, o_string 2652 + 2653 + (* Unikemet properties *) 2654 + 2655 + let kEH_Cat = KEH_Cat, o_string 2656 + let kEH_Core = KEH_Core, o_string 2657 + let kEH_Desc = KEH_Desc, o_string 2658 + let kEH_Func = KEH_Func, o_string 2659 + let kEH_FVal = KEH_FVal, o_string 2660 + let kEH_UniK = KEH_UniK, o_string 2661 + let kEH_JSesh = KEH_JSesh, o_string 2662 + let kEH_HG = KEH_HG, o_string 2663 + let kEH_IFAO = KEH_IFAO, o_string 2664 + let kEH_NoMirror = KEH_NoMirror, o_bool 2665 + let kEH_NoRotate = KEH_NoRotate, o_bool 2666 + let kEH_AltSeq = KEH_AltSeq, o_string 2667 + 2668 + 2669 + (* Unicode Character Databases *) 2670 + 2671 + type block = (cp * cp) * string 2672 + type named_sequence = string * cp list 2673 + type standardized_variant = 2674 + cp list * string * [ `Isolate | `Initial | `Medial | `Final ] list 2675 + 2676 + type cjk_radical = string * cp * cp 2677 + type do_not_emit = { instead_of : cp list; use : cp list; because : string; } 2678 + 2679 + type t = 2680 + { description : string; 2681 + repertoire : props Cpmap.t; 2682 + blocks : block list; 2683 + named_sequences : named_sequence list; 2684 + provisional_named_sequences : named_sequence list; 2685 + standardized_variants : standardized_variant list; 2686 + cjk_radicals : cjk_radical list; 2687 + do_not_emit : do_not_emit list; } 2688 + 2689 + let cp_props db cp = 2690 + try Some (Cpmap.find cp db.repertoire) with Not_found -> None 2691 + 2692 + let cp_prop db cp p = try find (Cpmap.find cp db.repertoire) p 2693 + with Not_found -> None 2694 + 2695 + (* Decode *) 2696 + 2697 + (* Xml names *) 2698 + 2699 + let ns_ucd = "http://www.unicode.org/ns/2003/ucd/1.0" 2700 + let n_block = (ns_ucd, "block") 2701 + let n_blocks = (ns_ucd, "blocks") 2702 + let n_char = (ns_ucd, "char") 2703 + let n_cjk_radical = (ns_ucd, "cjk-radical") 2704 + let n_cjk_radicals = (ns_ucd, "cjk-radicals") 2705 + let n_do_not_emit = (ns_ucd, "do-not-emit") 2706 + let n_description = (ns_ucd, "description") 2707 + let n_group = (ns_ucd, "group") 2708 + let n_instead = (ns_ucd, "instead") 2709 + let n_name_alias = (ns_ucd, "name-alias") 2710 + let n_named_sequence = (ns_ucd, "named-sequence") 2711 + let n_named_sequences = (ns_ucd, "named-sequences") 2712 + let n_noncharacter = (ns_ucd, "noncharacter") 2713 + let n_provisional_named_sequences = (ns_ucd, "provisional-named-sequences") 2714 + let n_repertoire = (ns_ucd, "repertoire") 2715 + let n_reserved = (ns_ucd, "reserved") 2716 + let n_standardized_variant = (ns_ucd, "standardized-variant") 2717 + let n_standardized_variants = (ns_ucd, "standardized-variants") 2718 + let n_surrogate = (ns_ucd, "surrogate") 2719 + let n_ucd = (ns_ucd, "ucd") 2720 + 2721 + (* Attribute parsing *) 2722 + 2723 + let add_prop : value Pmap.t -> Xmlm.attribute -> value Pmap.t = 2724 + let h = Hashtbl.create 500 in 2725 + let map = Hashtbl.add h in 2726 + map "AHex" (Ascii_hex_digit, i_bool); 2727 + map "Alpha" (Alphabetic, i_bool); 2728 + map "Bidi_C" (Bidi_control, i_bool); 2729 + map "Bidi_M" (Bidi_mirrored, i_bool); 2730 + map "Cased" (Cased, i_bool); 2731 + map "CI" (Case_ignorable, i_bool); 2732 + map "CE" (Composition_exclusion, i_bool); 2733 + map "CWCF" (Changes_when_casefolded, i_bool); 2734 + map "CWCM" (Changes_when_casemapped, i_bool); 2735 + map "CWL" (Changes_when_lowercased, i_bool); 2736 + map "CWKCF" (Changes_when_nfkc_casefolded, i_bool); 2737 + map "CWT" (Changes_when_titlecased, i_bool); 2738 + map "CWU" (Changes_when_uppercased, i_bool); 2739 + map "Comp_Ex" (Full_composition_exclusion, i_bool); 2740 + map "DI" (Default_ignorable_code_point, i_bool); 2741 + map "Dash" (Dash, i_bool); 2742 + map "Dep" (Deprecated, i_bool); 2743 + map "Dia" (Diacritic, i_bool); 2744 + map "EqUIdeo" (Equivalent_unified_ideograph, i_cp_opt); 2745 + map "Ext" (Extender, i_bool); 2746 + map "GCB" (Grapheme_cluster_break, i_grapheme_cluster_break); 2747 + map "Gr_Base" (Grapheme_base, i_bool); 2748 + map "Gr_Ext" (Grapheme_extend, i_bool); 2749 + map "Hex" (Hex_digit, i_bool); 2750 + map "ID_Compat_Math_Continue" (Id_compat_math_continue, i_bool); 2751 + map "ID_Compat_Math_Start" (Id_compat_math_start, i_bool); 2752 + map "IDC" (Id_continue, i_bool); 2753 + map "IDS" (Id_start, i_bool); 2754 + map "IDSB" (Ids_binary_operator, i_bool); 2755 + map "IDST" (Ids_trinary_operator, i_bool); 2756 + map "IDSU" (Ids_unary_operator, i_bool); 2757 + map "Ideo" (Ideographic, i_bool); 2758 + map "InCB" (Indic_conjunct_break, i_indic_conjunct_break); 2759 + map "InSC" (Indic_syllabic_category, i_indic_syllabic_category); 2760 + map "InMC" (Indic_matra_category, i_indic_matra_category); 2761 + map "InPC" (Indic_positional_category, i_indic_positional_category); 2762 + map "JSN" (Jamo_short_name, i_string); 2763 + map "Join_C" (Join_control, i_bool); 2764 + map "LOE" (Logical_order_exception, i_bool); 2765 + map "Lower" (Lowercase, i_bool); 2766 + map "Math" (Math, i_bool); 2767 + map "MCM" (Modifier_combining_mark, i_bool); 2768 + map "NChar" (Noncharacter_code_point, i_bool); 2769 + map "NFC_QC" (Nfc_quick_check, i_bool_maybe); 2770 + map "NFD_QC" (Nfd_quick_check, i_bool_maybe); 2771 + map "NFKC_QC" (Nfkc_quick_check, i_bool_maybe); 2772 + map "NFKC_CF" (Nfkc_casefold, i_cps_map ~empty:true); 2773 + map "NFKC_SCF" (Nfkc_simple_casefold, i_cps_map ~empty:true); 2774 + map "NFKD_QC" (Nfkd_quick_check, i_bool_maybe); 2775 + map "OAlpha" (Other_alphabetic, i_bool); 2776 + map "ODI" (Other_default_ignorable_code_point, i_bool); 2777 + map "OGr_Ext" (Other_grapheme_extend, i_bool); 2778 + map "OIDC" (Other_id_continue, i_bool); 2779 + map "OIDS" (Other_id_start, i_bool); 2780 + map "OLower" (Other_lowercase, i_bool); 2781 + map "OMath" (Other_math, i_bool); 2782 + map "OUpper" (Other_uppercase, i_bool); 2783 + map "Pat_Syn" (Pattern_syntax, i_bool); 2784 + map "Pat_WS" (Pattern_white_space, i_bool); 2785 + map "PCM" (Prepended_concatenation_mark, i_bool); 2786 + map "QMark" (Quotation_mark, i_bool); 2787 + map "Radical" (Radical, i_bool); 2788 + map "RI" (Regional_indicator, i_bool); 2789 + map "SB" (Sentence_break, i_sentence_break); 2790 + map "SD" (Soft_dotted, i_bool); 2791 + map "STerm" (Sterm, i_bool); 2792 + map "Term" (Terminal_punctuation, i_bool); 2793 + map "UIdeo" (Unified_ideograph, i_bool); 2794 + map "Upper" (Uppercase, i_bool); 2795 + map "VS" (Variation_selector, i_bool); 2796 + map "vo" (Vertical_orientation, i_vertical_orientation); 2797 + map "WB" (Word_break, i_word_break); 2798 + map "WSpace" (White_space, i_bool); 2799 + map "XIDC" (Xid_continue, i_bool); 2800 + map "XIDS" (Xid_start, i_bool); 2801 + map "age" (Age, i_age); 2802 + map "bc" (Bidi_class, i_bidi_class); 2803 + map "blk" (Block, i_block); 2804 + map "bmg" (Bidi_mirroring_glyph, i_cp_opt); 2805 + map "bpb" (Bidi_paired_bracket, i_cp_map); 2806 + map "bpt" (Bidi_paired_bracket_type, i_bidi_paired_bracket_type); 2807 + map "ccc" (Canonical_combining_class, i_int); 2808 + map "cf" (Case_folding, i_cps_map ~empty:false); 2809 + map "dm" (Decomposition_mapping, (i_cps_map ~empty:true)); 2810 + map "dt" (Decomposition_type, i_decomposition_type); 2811 + map "ea" (East_asian_width, i_east_asian_width); 2812 + map "Emoji" (Emoji, i_bool); 2813 + map "EPres" (Emoji_presentation, i_bool); 2814 + map "EMod" (Emoji_modifier, i_bool); 2815 + map "EBase" (Emoji_modifier_base, i_bool); 2816 + map "EComp" (Emoji_component, i_bool); 2817 + map "ExtPict" (Extended_pictographic, i_bool); 2818 + map "gc" (General_category, i_general_category); 2819 + map "hst" (Hangul_syllable_type, i_hangul_syllable_type); 2820 + map "jg" (Joining_group, i_joining_group); 2821 + map "jt" (Joining_type, i_joining_type); 2822 + map "lb" (Line_break, i_line_break); 2823 + map "lc" (Lowercase_mapping, i_cps_map ~empty:false); 2824 + map "na" (Name, i_name); 2825 + map "na1" (Unicode_1_name, i_string); 2826 + map "nt" (Numeric_type, i_numeric_type); 2827 + map "nv" (Numeric_value, i_numeric_value); 2828 + map "sc" (Script, i_script); 2829 + map "scf" (Simple_case_folding, i_cp_map); 2830 + map "scx" (Script_extensions, i_script_seq); 2831 + map "slc" (Simple_lowercase_mapping, i_cp_map); 2832 + map "stc" (Simple_titlecase_mapping, i_cp_map); 2833 + map "suc" (Simple_uppercase_mapping, i_cp_map); 2834 + map "tc" (Titlecase_mapping, i_cps_map ~empty:false); 2835 + map "uax_42_element" (UAX_42_element, i_uax_42_element); (* artefact *) 2836 + map "uc" (Uppercase_mapping, i_cps_map ~empty:false); 2837 + map "kAccountingNumeric" (KAccountingNumeric, i_string); 2838 + map "kAlternateHanYu" (KAlternateHanYu, i_string); 2839 + map "kAlternateJEF" (KAlternateJEF, i_string); 2840 + map "kAlternateKangXi" (KAlternateKangXi, i_string); 2841 + map "kAlternateMorohashi" (KAlternateMorohashi, i_string); 2842 + map "kBigFive" (KBigFive, i_string); 2843 + map "kCCCII" (KCCCII, i_string); 2844 + map "kCNS1986" (KCNS1986, i_string); 2845 + map "kCNS1992" (KCNS1992, i_string); 2846 + map "kCangjie" (KCangjie, i_string); 2847 + map "kCantonese" (KCantonese, i_string); 2848 + map "kCheungBauer" (KCheungBauer, i_string); 2849 + map "kCheungBauerIndex" (KCheungBauerIndex, i_string); 2850 + map "kCihaiT" (KCihaiT, i_string); 2851 + map "kCompatibilityVariant" (KCompatibilityVariant, i_string); 2852 + map "kCowles" (KCowles, i_string); 2853 + map "kDaeJaweon" (KDaeJaweon, i_string); 2854 + map "kDefinition" (KDefinition, i_string); 2855 + map "kEACC" (KEACC, i_string); 2856 + map "kFanqie" (KFanqie, i_string); 2857 + map "kFenn" (KFenn, i_string); 2858 + map "kFennIndex" (KFennIndex, i_string); 2859 + map "kFourCornerCode" (KFourCornerCode, i_string); 2860 + map "kFrequency" (KFrequency, i_string); 2861 + map "kGB0" (KGB0, i_string); 2862 + map "kGB1" (KGB1, i_string); 2863 + map "kGB3" (KGB3, i_string); 2864 + map "kGB5" (KGB5, i_string); 2865 + map "kGB8" (KGB8, i_string); 2866 + map "kGSR" (KGSR, i_string); 2867 + map "kGradeLevel" (KGradeLevel, i_string); 2868 + map "kHDZRadBreak" (KHDZRadBreak, i_string); 2869 + map "kHKGlyph" (KHKGlyph, i_string); 2870 + map "kHKSCS" (KHKSCS, i_string); 2871 + map "kHanYu" (KHanYu, i_string); 2872 + map "kHangul" (KHangul, i_string); 2873 + map "kHanyuPinlu" (KHanyuPinlu, i_string); 2874 + map "kHanyuPinyin" (KHanyuPinyin, i_string); 2875 + map "kIBMJapan" (KIBMJapan, i_string); 2876 + map "kIICore" (KIICore, i_string); 2877 + map "kIRGDaeJaweon" (KIRGDaeJaweon, i_string); 2878 + map "kIRGDaiKanwaZiten" (KIRGDaiKanwaZiten, i_string); 2879 + map "kIRGHanyuDaZidian" (KIRGHanyuDaZidian, i_string); 2880 + map "kIRGKangXi" (KIRGKangXi, i_string); 2881 + map "kIRG_GSource" (KIRG_GSource, i_string); 2882 + map "kIRG_HSource" (KIRG_HSource, i_string); 2883 + map "kIRG_JSource" (KIRG_JSource, i_string); 2884 + map "kIRG_KPSource" (KIRG_KPSource, i_string); 2885 + map "kIRG_KSource" (KIRG_KSource, i_string); 2886 + map "kIRG_MSource" (KIRG_MSource, i_string); 2887 + map "kIRG_SSource" (KIRG_SSource, i_string); 2888 + map "kIRG_TSource" (KIRG_TSource, i_string); 2889 + map "kIRG_USource" (KIRG_USource, i_string); 2890 + map "kIRG_UKSource" (KIRG_UKSource, i_string); 2891 + map "kIRG_VSource" (KIRG_VSource, i_string); 2892 + map "kJapanese" (KJapanese, i_string); 2893 + map "kJHJ" (KJHJ, i_string); 2894 + map "kJIS0213" (KJIS0213, i_string); 2895 + map "kJapaneseKun" (KJapaneseKun, i_string); 2896 + map "kJapaneseOn" (KJapaneseOn, i_string); 2897 + map "kJinmeiyoKanji" (KJinmeiyoKanji, i_string); 2898 + map "kJis0" (KJis0, i_string); 2899 + map "kJis1" (KJis1, i_string); 2900 + map "kJoyoKanji" (KJoyoKanji, i_string); 2901 + map "kKPS0" (KKPS0, i_string); 2902 + map "kKPS1" (KKPS1, i_string); 2903 + map "kKSC0" (KKSC0, i_string); 2904 + map "kKSC1" (KKSC1, i_string); 2905 + map "kKangXi" (KKangXi, i_string); 2906 + map "kKarlgren" (KKarlgren, i_string); 2907 + map "kKorean" (KKorean, i_string); 2908 + map "kKoreanEducationHanja" (KKoreanEducationHanja, i_string); 2909 + map "kKoreanName" (KKoreanName, i_string); 2910 + map "kLau" (KLau, i_string); 2911 + map "kMainlandTelegraph" (KMainlandTelegraph, i_string); 2912 + map "kMandarin" (KMandarin, i_string); 2913 + map "kMatthews" (KMatthews, i_string); 2914 + map "kMeyerWempe" (KMeyerWempe, i_string); 2915 + map "kMorohashi" (KMorohashi, i_string); 2916 + map "kNelson" (KNelson, i_string); 2917 + map "kNSHU_DubenSrc" (KNSHU_DubenSrc, i_string); 2918 + map "kNSHU_Reading" (KNSHU_Reading, i_string); 2919 + map "kOtherNumeric" (KOtherNumeric, i_string); 2920 + map "kPhonetic" (KPhonetic, i_string); 2921 + map "kPrimaryNumeric" (KPrimaryNumeric, i_string); 2922 + map "kPseudoGB1" (KPseudoGB1, i_string); 2923 + map "kRSAdobe_Japan1_6" (KRSAdobe_Japan1_6, i_string); 2924 + map "kRSJapanese" (KRSJapanese, i_string); 2925 + map "kRSKanWa" (KRSKanWa, i_string); 2926 + map "kRSKangXi" (KRSKangXi, i_string); 2927 + map "kRSKorean" (KRSKorean, i_string); 2928 + map "kRSMerged" (KRSMerged, i_string); 2929 + map "kRSUnicode" (KRSUnicode, i_string); 2930 + map "kSBGY" (KSBGY, i_string); 2931 + map "kSemanticVariant" (KSemanticVariant, i_string); 2932 + map "kSimplifiedVariant" (KSimplifiedVariant, i_string); 2933 + map "kSMSZD2003Index" (KSMSZD2003Index, i_string); 2934 + map "kSMSZD2003Readings" (KSMSZD2003Readings, i_string); 2935 + map "kSpecializedSemanticVariant" (KSpecializedSemanticVariant, i_string); 2936 + map "kSpoofingVariant" (KSpoofingVariant, i_string); 2937 + map "kTGH" (KTGH, i_string); 2938 + map "kTGHZ2013" (KTGHZ2013, i_string); 2939 + map "kTGT_MergedSrc" (KTGT_MergedSrc, i_string); 2940 + map "kTGT_RSUnicode" (KTGT_RSUnicode, i_string); 2941 + map "kTaiwanTelegraph" (KTaiwanTelegraph, i_string); 2942 + map "kTang" (KTang, i_string); 2943 + map "kTayNumeric" (KTayNumeric, i_string); 2944 + map "kTotalStrokes" (KTotalStrokes, i_string); 2945 + map "kTraditionalVariant" (KTraditionalVariant, i_string); 2946 + map "kVietnamese" (KVietnamese, i_string); 2947 + map "kVietnameseNumeric" (KVietnameseNumeric, i_string); 2948 + map "kWubi" (KWubi, i_string); 2949 + map "kXHC1983" (KXHC1983, i_string); 2950 + map "kXerox" (KXerox, i_string); 2951 + map "kZhuang" (KZhuang, i_string); 2952 + map "kZhuangNumeric" (KZhuangNumeric, i_string); 2953 + map "kZVariant" (KZVariant, i_string); 2954 + map "kEH_Cat" (KEH_Cat, i_string); 2955 + map "kEH_Core" (KEH_Core, i_string); 2956 + map "kEH_Desc" (KEH_Desc, i_string); 2957 + map "kEH_Func" (KEH_Func, i_string); 2958 + map "kEH_FVal" (KEH_FVal, i_string); 2959 + map "kEH_UniK" (KEH_UniK, i_string); 2960 + map "kEH_JSesh" (KEH_JSesh, i_string); 2961 + map "kEH_HG" (KEH_HG, i_string); 2962 + map "kEH_IFAO" (KEH_IFAO, i_string); 2963 + map "kEH_NoMirror" (KEH_NoMirror, i_bool); 2964 + map "kEH_NoRotate" (KEH_NoRotate, i_bool); 2965 + map "kEH_AltSeq" (KEH_AltSeq, i_string); 2966 + fun m (n, v) -> 2967 + try match n with 2968 + | ("", p) -> 2969 + let k, conv = Hashtbl.find h p in 2970 + Pmap.add k (conv v) m 2971 + | _ -> raise Not_found 2972 + with Not_found -> Pmap.add (Other n) (i_string v) m 2973 + 2974 + let attv n atts = (* value of attribute [n] in atts or raises. *) 2975 + try snd (List.find (fun (en, v) -> en = ("", n)) atts) with 2976 + | Not_found -> err_miss_att n 2977 + 2978 + let rec skip_el d = (* skips an element, start signal was input. *) 2979 + let rec loop d depth = match Xmlm.input d with 2980 + | `El_start _ -> loop d (depth + 1) 2981 + | `El_end -> if depth = 0 then () else loop d (depth - 1) 2982 + | s -> loop d depth 2983 + in 2984 + loop d 0 2985 + 2986 + (* Parses a sequence of empty elements named n and a El_end. *) 2987 + let p_seq n p_atts d = 2988 + let rec aux n p_atts d acc = match Xmlm.input d with 2989 + | `El_start (n', atts) when n' = n -> 2990 + if Xmlm.input d <> `El_end then err err_exp_el_end else 2991 + aux n p_atts d ((p_atts atts) :: acc); 2992 + | `El_start _ -> skip_el d; aux n p_atts d acc 2993 + | `El_end -> List.rev acc 2994 + | `Data _ -> err err_data 2995 + | _ -> assert false 2996 + in 2997 + aux n p_atts d [] 2998 + 2999 + let p_description d = match (Xmlm.input d) with 3000 + | `Data desc -> if (Xmlm.input d <> `El_end) then err err_exp_el_end else desc 3001 + | `El_end -> "" 3002 + | _ -> err err_exp_data 3003 + 3004 + let p_name_aliases d = 3005 + let rec loop d depth acc = match Xmlm.peek d with 3006 + | `El_start (n, atts) when n = n_name_alias -> 3007 + ignore (Xmlm.input d); 3008 + let alias = ref "" in 3009 + let atype = ref None in 3010 + let p_alias_atts = function 3011 + | ("", "alias"), v -> alias := v 3012 + | ("", "type"), v -> atype := Some (i_name_alias_type v) 3013 + | _ -> () 3014 + in 3015 + List.iter p_alias_atts atts; 3016 + begin match !atype with None -> err err_invalid_name_alias_spec 3017 + | Some t -> loop d (depth + 1) ((!alias, t) :: acc) 3018 + end 3019 + | `El_start (n, atts) -> ignore (Xmlm.input d); skip_el d; loop d depth acc 3020 + | `El_end -> 3021 + if depth = 0 then List.rev acc else 3022 + (ignore (Xmlm.input d); loop d (depth - 1) acc) 3023 + | `Data _ -> err err_data 3024 + | _ -> assert false 3025 + in 3026 + loop d 0 [] 3027 + 3028 + let p_cp d rep atts g_props = 3029 + let cp = ref None in 3030 + let cp_first = ref None in 3031 + let cp_last = ref None in 3032 + let add acc ((n, v) as a) = match n with 3033 + | ("", "cp") -> cp := Some (cp_of_string v); acc 3034 + | ("", "first-cp") -> cp_first := Some (cp_of_string v); acc 3035 + | ("", "last-cp") -> cp_last := Some (cp_of_string v); acc 3036 + | _ -> add_prop acc a 3037 + in 3038 + let props = List.fold_left add g_props atts in 3039 + let props = Pmap.add Name_alias (Name_alias_v (p_name_aliases d)) props in 3040 + match !cp with 3041 + | Some cp -> Cpmap.add cp props rep 3042 + | None -> match !cp_first, !cp_last with 3043 + | Some f, Some l -> 3044 + let rep = ref rep in 3045 + for cp = f to l do rep := Cpmap.add cp props !rep done; 3046 + !rep 3047 + | _ -> err err_invalid_cp_spec 3048 + 3049 + let p_repertoire d = 3050 + let eatt t = ("","uax_42_element"), t in (* fake attribute for uniformity *) 3051 + let rec loop d depth rep g_atts = match Xmlm.input d with 3052 + | `El_start (n, atts) when n = n_reserved -> 3053 + loop d (depth + 1) (p_cp d rep (eatt "reserved" :: atts) g_atts) g_atts 3054 + | `El_start (n, atts) when n = n_noncharacter -> 3055 + loop d (depth + 1) (p_cp d rep (eatt "noncharacter":: atts) g_atts) g_atts 3056 + | `El_start (n, atts) when n = n_surrogate -> 3057 + loop d (depth + 1) (p_cp d rep (eatt "surrogate" :: atts) g_atts) g_atts 3058 + | `El_start (n, atts) when n = n_char -> 3059 + loop d (depth + 1) (p_cp d rep (eatt "char" :: atts) g_atts) g_atts 3060 + | `El_start (n, atts) when n = n_group -> 3061 + let atts = List.fold_left add_prop Pmap.empty atts in 3062 + let rep = loop d 0 rep atts in (* ^ empty: no group hierarchy *) 3063 + loop d depth rep Pmap.empty 3064 + | `El_start (n, atts) -> skip_el d; loop d depth rep g_atts (* skip foreign *) 3065 + | `El_end -> if depth = 0 then rep else loop d (depth - 1) rep g_atts 3066 + | `Data _ -> err err_data 3067 + | _ -> assert false 3068 + in 3069 + loop d 0 Cpmap.empty Pmap.empty 3070 + 3071 + let p_blocks d = 3072 + let b_atts atts = 3073 + (cp_of_string (attv "first-cp" atts), cp_of_string (attv "last-cp" atts)), 3074 + attv "name" atts 3075 + in 3076 + p_seq n_block b_atts d 3077 + 3078 + let p_named_sequences d = 3079 + let ns_atts atts = attv "name" atts, cps_of_string (attv "cps" atts) in 3080 + p_seq n_named_sequence ns_atts d 3081 + 3082 + let p_standardized_variants d = 3083 + let when_of_string v = 3084 + let w s = match s with 3085 + | "isolate" -> `Isolate 3086 + | "initial" -> `Initial 3087 + | "medial" -> `Medial 3088 + | "final" -> `Final 3089 + | s -> err (err_att_val s) 3090 + in 3091 + List.map w (split_string v ' ') 3092 + in 3093 + let sv_atts atts = 3094 + cps_of_string (attv "cps" atts), 3095 + attv "desc" atts, 3096 + when_of_string (attv "when" atts) 3097 + in 3098 + p_seq n_standardized_variant sv_atts d 3099 + 3100 + let p_cjk_radicals d = 3101 + let cjk_r_atts atts = 3102 + attv "number" atts, 3103 + cp_of_string (attv "radical" atts), 3104 + cp_of_string (attv "ideograph" atts) 3105 + in 3106 + p_seq n_cjk_radical cjk_r_atts d 3107 + 3108 + let p_do_not_emit d = 3109 + let instead_atts atts = 3110 + let instead_of = cps_of_string (attv "of" atts) in 3111 + let use = cps_of_string (attv "use" atts) in 3112 + let because = attv "because" atts in 3113 + { instead_of; use; because } 3114 + in 3115 + p_seq n_instead instead_atts d 3116 + 3117 + let p_ucd d = 3118 + let description = ref None in 3119 + let repertoire = ref None in 3120 + let blocks = ref None in 3121 + let named_sequences = ref None in 3122 + let provisional_named_sequences = ref None in 3123 + let standardized_variants = ref None in 3124 + let cjk_radicals = ref None in 3125 + let do_not_emit = ref None in 3126 + let set n r p d = if !r <> None then err (err_dup n) else r := Some (p d) in 3127 + while (Xmlm.peek d <> `El_end) do match Xmlm.input d with 3128 + | `El_start (n, _) when n = n_description -> 3129 + set n description p_description d 3130 + | `El_start (n, _) when n = n_repertoire -> 3131 + set n repertoire p_repertoire d 3132 + | `El_start (n, _) when n = n_blocks -> 3133 + set n blocks p_blocks d 3134 + | `El_start (n, _) when n = n_named_sequences -> 3135 + set n named_sequences p_named_sequences d 3136 + | `El_start (n, _) when n = n_provisional_named_sequences -> 3137 + set n provisional_named_sequences p_named_sequences d 3138 + | `El_start (n, _) when n = n_standardized_variants -> 3139 + set n standardized_variants p_standardized_variants d 3140 + | `El_start (n, _) when n = n_cjk_radicals -> 3141 + set n cjk_radicals p_cjk_radicals d 3142 + | `El_start (n, _) when n = n_do_not_emit -> 3143 + set n do_not_emit p_do_not_emit d 3144 + | `El_start (n, _) -> skip_el d (* foreign markup *) 3145 + | `Data _ -> err err_data 3146 + | _ -> assert false 3147 + done; 3148 + ignore (Xmlm.input d); 3149 + if not (Xmlm.eoi d) then err err_wf; 3150 + let some v default = match v with Some v -> v | None -> default in 3151 + { description = some !description ""; 3152 + repertoire = some !repertoire Cpmap.empty; 3153 + blocks = some !blocks []; 3154 + named_sequences = some !named_sequences []; 3155 + provisional_named_sequences = some !provisional_named_sequences []; 3156 + standardized_variants = some !standardized_variants []; 3157 + cjk_radicals = some !cjk_radicals []; 3158 + do_not_emit = some !do_not_emit []; } 3159 + 3160 + type src = [ `Channel of in_channel | `String of string ] 3161 + type decoder = Xmlm.input 3162 + 3163 + let decoder src = 3164 + let src = match src with `String s -> `String (0, s) | `Channel _ as s -> s in 3165 + Xmlm.make_input ~strip:true src 3166 + 3167 + let decoded_range d = Xmlm.pos d, Xmlm.pos d 3168 + let decode d = try 3169 + ignore (Xmlm.input d); (* `Dtd *) 3170 + begin match Xmlm.input d with 3171 + | `El_start (n, _) when n = n_ucd -> `Ok (p_ucd d) 3172 + | `El_start (n, _) -> err (err_exp_ucd n) 3173 + | _ -> assert false 3174 + end; 3175 + with 3176 + | Failure e -> `Error e | Xmlm.Error (_, e) -> `Error (Xmlm.error_message e)
+1371
vendor/opam/uucd/src/uucd.mli
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2012 The uucd programmers. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + (** Unicode character database decoder. 7 + 8 + [Uucd] decodes the data of the 9 + {{:http://www.unicode.org/reports/tr44}Unicode character database} 10 + from its XML representation. It provides high-level (but not 11 + necessarily efficient) access to the data so that efficient 12 + representations can be extracted. 13 + 14 + [Uucd] decodes the representation described in the Annex #42 of 15 + Unicode %%UNICODE_VERSION%%. Subsequent versions may be decoded as 16 + long as no new cases are introduced in parsed enumerated 17 + properties. 18 + 19 + Consult the {{!basics}basics}. 20 + 21 + {b Note.} All strings returned by the module are UTF-8 encoded. 22 + 23 + {e Unicode version %%UNICODE_VERSION%%} 24 + 25 + {3 References} 26 + {ul 27 + {- The Unicode Consortium. 28 + {e {{:http://www.unicode.org/versions/latest}The Unicode Standard}}. 29 + (latest version)} 30 + {- Mark Davis, Ken Whistler. 31 + {e {{:http://www.unicode.org/reports/tr44/}UAX #44 Unicode Character 32 + Database}}. (latest version)} 33 + {- Eric Muller. 34 + {e {{:http://www.unicode.org/reports/tr42/}UAX #42 Unicode Character 35 + Database in XML}}. (latest version)}} *) 36 + 37 + (** {1:chars Code points} *) 38 + 39 + type cp = int 40 + (** The type for Unicode {{:http://unicode.org/glossary/#code_point}code 41 + points}, ranges from [0x0000] to [0x10_FFFF]. *) 42 + 43 + val is_cp : int -> bool 44 + (** [is_cp n] is [true] iff [n] a Unicode 45 + {{:http://unicode.org/glossary/#code_point}code 46 + point}. *) 47 + 48 + val is_scalar_value : int -> bool 49 + (** [is_scalar_value n] is [true] iff [n] is a Unicode 50 + {{:http://unicode.org/glossary/#Unicode_scalar_value}scalar value}. *) 51 + 52 + (** Code point maps. *) 53 + module Cpmap : Map.S with type key = cp 54 + 55 + (** {1:props Properties} 56 + 57 + Properties are referenced by their name and property values by 58 + their 59 + {{:http://www.unicode.org/Public/UNIDATA/PropertyValueAliases.txt} 60 + abbreviated name}. To understand their semantics refer to the 61 + {{:http://www.unicode.org/versions/latest/}standard}. *) 62 + 63 + type props 64 + (** The type for sets of properties. *) 65 + 66 + type 'a prop 67 + (** The type for properties with property value of type ['a]. *) 68 + 69 + val find : props -> 'a prop -> 'a option 70 + (** [find ps p] is the value of property [p] in [ps], if any. *) 71 + 72 + val unknown_prop : string * string -> string prop 73 + (** [unknown_prop (ns, n)] is a property read from an XML attribute 74 + whose expanded name is [(ns, n)]. This can be used to access a 75 + property unknown to the module. *) 76 + 77 + (** {2:nonunihan Non Unihan properties} 78 + 79 + In alphabetical order. *) 80 + 81 + val age : [ `Version of int * int | `Unassigned ] prop 82 + val alphabetic : bool prop 83 + val ascii_hex_digit : bool prop 84 + val bidi_class : [ 85 + | `AL 86 + | `AN 87 + | `B 88 + | `BN 89 + | `CS 90 + | `EN 91 + | `ES 92 + | `ET 93 + | `FSI 94 + | `L 95 + | `LRE 96 + | `LRI 97 + | `LRO 98 + | `NSM 99 + | `ON 100 + | `PDF 101 + | `PDI 102 + | `R 103 + | `RLE 104 + | `RLI 105 + | `RLO 106 + | `S 107 + | `WS 108 + ] prop 109 + 110 + val bidi_control : bool prop 111 + val bidi_mirrored : bool prop 112 + val bidi_mirroring_glyph : cp option prop 113 + val bidi_paired_bracket : [ `Self | `Cp of cp ] prop 114 + val bidi_paired_bracket_type : [ `O | `C | `N ] prop 115 + val block : [ 116 + | `ASCII 117 + | `Adlam 118 + | `Aegean_Numbers 119 + | `Ahom 120 + | `Alchemical 121 + | `Alphabetic_PF 122 + | `Anatolian_Hieroglyphs 123 + | `Ancient_Greek_Music 124 + | `Ancient_Greek_Numbers 125 + | `Ancient_Symbols 126 + | `Arabic 127 + | `Arabic_Ext_A 128 + | `Arabic_Ext_B 129 + | `Arabic_Ext_C 130 + | `Arabic_Math 131 + | `Arabic_PF_A 132 + | `Arabic_PF_B 133 + | `Arabic_Sup 134 + | `Armenian 135 + | `Arrows 136 + | `Avestan 137 + | `Balinese 138 + | `Bamum 139 + | `Bamum_Sup 140 + | `Bassa_Vah 141 + | `Batak 142 + | `Bengali 143 + | `Beria_Erfe 144 + | `Bhaiksuki 145 + | `Block_Elements 146 + | `Bopomofo 147 + | `Bopomofo_Ext 148 + | `Box_Drawing 149 + | `Brahmi 150 + | `Braille 151 + | `Buginese 152 + | `Buhid 153 + | `Byzantine_Music 154 + | `CJK 155 + | `CJK_Compat 156 + | `CJK_Compat_Forms 157 + | `CJK_Compat_Ideographs 158 + | `CJK_Compat_Ideographs_Sup 159 + | `CJK_Ext_A 160 + | `CJK_Ext_B 161 + | `CJK_Ext_C 162 + | `CJK_Ext_D 163 + | `CJK_Ext_E 164 + | `CJK_Ext_F 165 + | `CJK_Ext_G 166 + | `CJK_Ext_H 167 + | `CJK_Ext_I 168 + | `CJK_Ext_J 169 + | `CJK_Radicals_Sup 170 + | `CJK_Strokes 171 + | `CJK_Symbols 172 + | `Carian 173 + | `Caucasian_Albanian 174 + | `Chakma 175 + | `Cham 176 + | `Cherokee 177 + | `Cherokee_Sup 178 + | `Chess_Symbols 179 + | `Chorasmian 180 + | `Compat_Jamo 181 + | `Control_Pictures 182 + | `Coptic 183 + | `Coptic_Epact_Numbers 184 + | `Counting_Rod 185 + | `Cuneiform 186 + | `Cuneiform_Numbers 187 + | `Currency_Symbols 188 + | `Cypriot_Syllabary 189 + | `Cypro_Minoan 190 + | `Cyrillic 191 + | `Cyrillic_Ext_A 192 + | `Cyrillic_Ext_B 193 + | `Cyrillic_Ext_C 194 + | `Cyrillic_Ext_D 195 + | `Cyrillic_Sup 196 + | `Deseret 197 + | `Devanagari 198 + | `Devanagari_Ext 199 + | `Devanagari_Ext_A 200 + | `Diacriticals 201 + | `Diacriticals_Ext 202 + | `Diacriticals_For_Symbols 203 + | `Diacriticals_Sup 204 + | `Dingbats 205 + | `Dives_Akuru 206 + | `Dogra 207 + | `Domino 208 + | `Duployan 209 + | `Early_Dynastic_Cuneiform 210 + | `Egyptian_Hieroglyph_Format_Controls 211 + | `Egyptian_Hieroglyphs 212 + | `Egyptian_Hieroglyphs_Ext_A 213 + | `Elbasan 214 + | `Elymaic 215 + | `Emoticons 216 + | `Enclosed_Alphanum 217 + | `Enclosed_Alphanum_Sup 218 + | `Enclosed_CJK 219 + | `Enclosed_Ideographic_Sup 220 + | `Ethiopic 221 + | `Ethiopic_Ext 222 + | `Ethiopic_Ext_A 223 + | `Ethiopic_Ext_B 224 + | `Ethiopic_Sup 225 + | `Garay 226 + | `Geometric_Shapes 227 + | `Geometric_Shapes_Ext 228 + | `Georgian 229 + | `Georgian_Ext 230 + | `Georgian_Sup 231 + | `Glagolitic 232 + | `Glagolitic_Sup 233 + | `Gothic 234 + | `Grantha 235 + | `Greek 236 + | `Greek_Ext 237 + | `Gujarati 238 + | `Gunjala_Gondi 239 + | `Gurmukhi 240 + | `Gurung_Khema 241 + | `Half_And_Full_Forms 242 + | `Half_Marks 243 + | `Hangul 244 + | `Hanifi_Rohingya 245 + | `Hanunoo 246 + | `Hatran 247 + | `Hebrew 248 + | `High_PU_Surrogates 249 + | `High_Surrogates 250 + | `Hiragana 251 + | `IDC 252 + | `IPA_Ext 253 + | `Ideographic_Symbols 254 + | `Imperial_Aramaic 255 + | `Indic_Number_Forms 256 + | `Indic_Siyaq_Numbers 257 + | `Inscriptional_Pahlavi 258 + | `Inscriptional_Parthian 259 + | `Jamo 260 + | `Jamo_Ext_A 261 + | `Jamo_Ext_B 262 + | `Javanese 263 + | `Kaithi 264 + | `Kaktovik_Numerals 265 + | `Kana_Ext_A 266 + | `Kana_Ext_B 267 + | `Kana_Sup 268 + | `Kanbun 269 + | `Kangxi 270 + | `Kannada 271 + | `Katakana 272 + | `Katakana_Ext 273 + | `Kawi 274 + | `Kayah_Li 275 + | `Kharoshthi 276 + | `Khitan_Small_Script 277 + | `Khmer 278 + | `Khmer_Symbols 279 + | `Khojki 280 + | `Khudawadi 281 + | `Kirat_Rai 282 + | `Lao 283 + | `Latin_1_Sup 284 + | `Latin_Ext_A 285 + | `Latin_Ext_Additional 286 + | `Latin_Ext_B 287 + | `Latin_Ext_C 288 + | `Latin_Ext_D 289 + | `Latin_Ext_E 290 + | `Latin_Ext_F 291 + | `Latin_Ext_G 292 + | `Lepcha 293 + | `Letterlike_Symbols 294 + | `Limbu 295 + | `Linear_A 296 + | `Linear_B_Ideograms 297 + | `Linear_B_Syllabary 298 + | `Lisu 299 + | `Lisu_Sup 300 + | `Low_Surrogates 301 + | `Lycian 302 + | `Lydian 303 + | `Mahajani 304 + | `Mahjong 305 + | `Makasar 306 + | `Malayalam 307 + | `Mandaic 308 + | `Manichaean 309 + | `Marchen 310 + | `Masaram_Gondi 311 + | `Math_Alphanum 312 + | `Math_Operators 313 + | `Mayan_Numerals 314 + | `Medefaidrin 315 + | `Meetei_Mayek 316 + | `Meetei_Mayek_Ext 317 + | `Mende_Kikakui 318 + | `Meroitic_Cursive 319 + | `Meroitic_Hieroglyphs 320 + | `Miao 321 + | `Misc_Arrows 322 + | `Misc_Math_Symbols_A 323 + | `Misc_Math_Symbols_B 324 + | `Misc_Pictographs 325 + | `Misc_Symbols 326 + | `Misc_Symbols_Sup 327 + | `Misc_Technical 328 + | `Modi 329 + | `Modifier_Letters 330 + | `Modifier_Tone_Letters 331 + | `Mongolian 332 + | `Mongolian_Sup 333 + | `Mro 334 + | `Multani 335 + | `Music 336 + | `Myanmar 337 + | `Myanmar_Ext_A 338 + | `Myanmar_Ext_B 339 + | `Myanmar_Ext_C 340 + | `NB 341 + | `NKo 342 + | `Nabataean 343 + | `Nag_Mundari 344 + | `Nandinagari 345 + | `New_Tai_Lue 346 + | `Newa 347 + | `Number_Forms 348 + | `Nushu 349 + | `Nyiakeng_Puachue_Hmong 350 + | `OCR 351 + | `Ogham 352 + | `Ol_Onal 353 + | `Ol_Chiki 354 + | `Old_Hungarian 355 + | `Old_Italic 356 + | `Old_North_Arabian 357 + | `Old_Permic 358 + | `Old_Persian 359 + | `Old_Sogdian 360 + | `Old_South_Arabian 361 + | `Old_Turkic 362 + | `Old_Uyghur 363 + | `Oriya 364 + | `Ornamental_Dingbats 365 + | `Osage 366 + | `Osmanya 367 + | `Ottoman_Siyaq_Numbers 368 + | `PUA 369 + | `Pahawh_Hmong 370 + | `Palmyrene 371 + | `Pau_Cin_Hau 372 + | `Phags_Pa 373 + | `Phaistos 374 + | `Phoenician 375 + | `Phonetic_Ext 376 + | `Phonetic_Ext_Sup 377 + | `Playing_Cards 378 + | `Psalter_Pahlavi 379 + | `Punctuation 380 + | `Rejang 381 + | `Rumi 382 + | `Runic 383 + | `Samaritan 384 + | `Saurashtra 385 + | `Sharada 386 + | `Sharada_Sup 387 + | `Shavian 388 + | `Shorthand_Format_Controls 389 + | `Siddham 390 + | `Sidetic 391 + | `Sinhala 392 + | `Sinhala_Archaic_Numbers 393 + | `Small_Forms 394 + | `Small_Kana_Ext 395 + | `Sogdian 396 + | `Sora_Sompeng 397 + | `Soyombo 398 + | `Specials 399 + | `Sundanese 400 + | `Sundanese_Sup 401 + | `Sunuwar 402 + | `Sup_Arrows_A 403 + | `Sup_Arrows_B 404 + | `Sup_Arrows_C 405 + | `Sup_Math_Operators 406 + | `Sup_PUA_A 407 + | `Sup_PUA_B 408 + | `Sup_Punctuation 409 + | `Sup_Symbols_And_Pictographs 410 + | `Super_And_Sub 411 + | `Sutton_SignWriting 412 + | `Syloti_Nagri 413 + | `Symbols_And_Pictographs_Ext_A 414 + | `Symbols_For_Legacy_Computing 415 + | `Symbols_For_Legacy_Computing_Sup 416 + | `Syriac 417 + | `Syriac_Sup 418 + | `Tagalog 419 + | `Tagbanwa 420 + | `Tags 421 + | `Tai_Le 422 + | `Tai_Tham 423 + | `Tai_Viet 424 + | `Tai_Xuan_Jing 425 + | `Tai_Yo 426 + | `Takri 427 + | `Tamil 428 + | `Tamil_Sup 429 + | `Tangsa 430 + | `Tangut 431 + | `Tangut_Components 432 + | `Tangut_Components_Sup 433 + | `Tangut_Sup 434 + | `Telugu 435 + | `Thaana 436 + | `Thai 437 + | `Tibetan 438 + | `Tifinagh 439 + | `Tirhuta 440 + | `Todhri 441 + | `Tolong_Siki 442 + | `Toto 443 + | `Transport_And_Map 444 + | `Tulu_Tigalari 445 + | `UCAS 446 + | `UCAS_Ext 447 + | `UCAS_Ext_A 448 + | `Ugaritic 449 + | `VS 450 + | `VS_Sup 451 + | `Vai 452 + | `Vedic_Ext 453 + | `Vertical_Forms 454 + | `Vithkuqi 455 + | `Wancho 456 + | `Warang_Citi 457 + | `Yezidi 458 + | `Yi_Radicals 459 + | `Yi_Syllables 460 + | `Yijing 461 + | `Zanabazar_Square 462 + | `Znamenny_Music 463 + ] prop 464 + 465 + val canonical_combining_class : int prop 466 + val cased : bool prop 467 + val case_folding : [`Self | `Cps of cp list ] prop 468 + val case_ignorable : bool prop 469 + val changes_when_casefolded : bool prop 470 + val changes_when_casemapped : bool prop 471 + val changes_when_lowercased : bool prop 472 + val changes_when_nfkc_casefolded : bool prop 473 + val changes_when_titlecased : bool prop 474 + val changes_when_uppercased : bool prop 475 + val composition_exclusion : bool prop 476 + val dash : bool prop 477 + val decomposition_mapping : [`Self | `Cps of cp list ] prop 478 + val decomposition_type : [ 479 + | `Can 480 + | `Com 481 + | `Enc 482 + | `Fin 483 + | `Font 484 + | `Fra 485 + | `Init 486 + | `Iso 487 + | `Med 488 + | `Nar 489 + | `Nb 490 + | `Sml 491 + | `Sqr 492 + | `Sub 493 + | `Sup 494 + | `Vert 495 + | `Wide 496 + | `None 497 + ] prop 498 + 499 + val default_ignorable_code_point : bool prop 500 + val deprecated : bool prop 501 + val diacritic : bool prop 502 + val east_asian_width : [ `A | `F | `H | `N | `Na | `W ] prop 503 + val emoji : bool prop 504 + val emoji_presentation : bool prop 505 + val emoji_modifier : bool prop 506 + val emoji_modifier_base : bool prop 507 + val emoji_component : bool prop 508 + val equivalent_unified_ideograph : cp option prop 509 + val extended_pictographic : bool prop 510 + val extender : bool prop 511 + val full_composition_exclusion : bool prop 512 + val general_category : [ 513 + | `Lu 514 + | `Ll 515 + | `Lt 516 + | `Lm 517 + | `Lo 518 + | `Mn 519 + | `Mc 520 + | `Me 521 + | `Nd 522 + | `Nl 523 + | `No 524 + | `Pc 525 + | `Pd 526 + | `Ps 527 + | `Pe 528 + | `Pi 529 + | `Pf 530 + | `Po 531 + | `Sm 532 + | `Sc 533 + | `Sk 534 + | `So 535 + | `Zs 536 + | `Zl 537 + | `Zp 538 + | `Cc 539 + | `Cf 540 + | `Cs 541 + | `Co 542 + | `Cn 543 + ] prop 544 + 545 + val grapheme_base : bool prop 546 + val grapheme_cluster_break : [ 547 + | `CN 548 + | `CR 549 + | `EB 550 + | `EBG 551 + | `EM 552 + | `EX 553 + | `GAZ 554 + | `L 555 + | `LF 556 + | `LV 557 + | `LVT 558 + | `PP 559 + | `RI 560 + | `SM 561 + | `T 562 + | `V 563 + | `XX 564 + | `ZWJ 565 + ] prop 566 + 567 + val grapheme_extend : bool prop 568 + val hangul_syllable_type : [ `L | `LV | `LVT | `T | `V | `NA ] prop 569 + val hex_digit : bool prop 570 + val id_continue : bool prop 571 + val id_compat_math_continue : bool prop 572 + val id_compat_math_start : bool prop 573 + val id_start : bool prop 574 + val ideographic : bool prop 575 + val ids_binary_operator : bool prop 576 + val ids_trinary_operator : bool prop 577 + val ids_unary_operator : bool prop 578 + 579 + val indic_conjunct_break : [ 580 + | `Consonant 581 + | `Extend 582 + | `Linker 583 + | `None ] prop 584 + 585 + val indic_syllabic_category : [ 586 + | `Avagraha 587 + | `Bindu 588 + | `Brahmi_Joining_Number 589 + | `Cantillation_Mark 590 + | `Consonant 591 + | `Consonant_Dead 592 + | `Consonant_Final 593 + | `Consonant_Head_Letter 594 + | `Consonant_Initial_Postfixed 595 + | `Consonant_Killer 596 + | `Consonant_Medial 597 + | `Consonant_Placeholder 598 + | `Consonant_Preceding_Repha 599 + | `Consonant_Prefixed 600 + | `Consonant_Repha 601 + | `Consonant_Subjoined 602 + | `Consonant_Succeeding_Repha 603 + | `Consonant_With_Stacker 604 + | `Gemination_Mark 605 + | `Invisible_Stacker 606 + | `Joiner 607 + | `Modifying_Letter 608 + | `Non_Joiner 609 + | `Nukta 610 + | `Number 611 + | `Number_Joiner 612 + | `Other 613 + | `Pure_Killer 614 + | `Reordering_Killer 615 + | `Register_Shifter 616 + | `Syllable_Modifier 617 + | `Tone_Letter 618 + | `Tone_Mark 619 + | `Virama 620 + | `Visarga 621 + | `Vowel 622 + | `Vowel_Dependent 623 + | `Vowel_Independent 624 + ] prop 625 + 626 + val indic_matra_category : [ 627 + | `Right 628 + | `Left 629 + | `Visual_Order_Left 630 + | `Left_And_Right 631 + | `Top 632 + | `Bottom 633 + | `Top_And_Bottom 634 + | `Top_And_Right 635 + | `Top_And_Left 636 + | `Top_And_Left_And_Right 637 + | `Bottom_And_Right 638 + | `Top_And_Bottom_And_Right 639 + | `Overstruck 640 + | `Invisible 641 + | `NA 642 + ] prop 643 + 644 + val indic_positional_category : [ 645 + | `Bottom 646 + | `Bottom_And_Left 647 + | `Bottom_And_Right 648 + | `Invisible 649 + | `Left 650 + | `Left_And_Right 651 + | `NA 652 + | `Overstruck 653 + | `Right 654 + | `Top 655 + | `Top_And_Bottom 656 + | `Top_And_Bottom_And_Left 657 + | `Top_And_Bottom_And_Right 658 + | `Top_And_Left 659 + | `Top_And_Left_And_Right 660 + | `Top_And_Right 661 + | `Visual_Order_Left 662 + ] prop 663 + 664 + val jamo_short_name : string prop 665 + val join_control : bool prop 666 + val joining_group : [ 667 + | `African_Feh 668 + | `African_Noon 669 + | `African_Qaf 670 + | `Ain 671 + | `Alaph 672 + | `Alef 673 + | `Alef_Maqsurah 674 + | `Beh 675 + | `Beth 676 + | `Burushaski_Yeh_Barree 677 + | `Dal 678 + | `Dalath_Rish 679 + | `E 680 + | `Farsi_Yeh 681 + | `Fe 682 + | `Feh 683 + | `Final_Semkath 684 + | `Gaf 685 + | `Gamal 686 + | `Hah 687 + | `Hanifi_Rohingya_Kinna_Ya 688 + | `Hanifi_Rohingya_Pa 689 + | `Hamza_On_Heh_Goal 690 + | `He 691 + | `Heh 692 + | `Heh_Goal 693 + | `Heth 694 + | `Kaf 695 + | `Kaph 696 + | `Kashmiri_Yeh 697 + | `Khaph 698 + | `Knotted_Heh 699 + | `Lam 700 + | `Lamadh 701 + | `Malayalam_Bha 702 + | `Malayalam_Ja 703 + | `Malayalam_Lla 704 + | `Malayalam_Llla 705 + | `Malayalam_Nga 706 + | `Malayalam_Nna 707 + | `Malayalam_Nnna 708 + | `Malayalam_Nya 709 + | `Malayalam_Ra 710 + | `Malayalam_Ssa 711 + | `Malayalam_Tta 712 + | `Manichaean_Aleph 713 + | `Manichaean_Ayin 714 + | `Manichaean_Beth 715 + | `Manichaean_Daleth 716 + | `Manichaean_Dhamedh 717 + | `Manichaean_Five 718 + | `Manichaean_Gimel 719 + | `Manichaean_Heth 720 + | `Manichaean_Hundred 721 + | `Manichaean_Kaph 722 + | `Manichaean_Lamedh 723 + | `Manichaean_Mem 724 + | `Manichaean_Nun 725 + | `Manichaean_One 726 + | `Manichaean_Pe 727 + | `Manichaean_Qoph 728 + | `Manichaean_Resh 729 + | `Manichaean_Sadhe 730 + | `Manichaean_Samekh 731 + | `Manichaean_Taw 732 + | `Manichaean_Ten 733 + | `Manichaean_Teth 734 + | `Manichaean_Thamedh 735 + | `Manichaean_Twenty 736 + | `Manichaean_Waw 737 + | `Manichaean_Yodh 738 + | `Manichaean_Zayin 739 + | `Meem 740 + | `Mim 741 + | `No_Joining_Group 742 + | `Noon 743 + | `Nun 744 + | `Nya 745 + | `Pe 746 + | `Qaf 747 + | `Qaph 748 + | `Reh 749 + | `Reversed_Pe 750 + | `Rohingya_Yeh 751 + | `Sad 752 + | `Sadhe 753 + | `Seen 754 + | `Semkath 755 + | `Shin 756 + | `Straight_Waw 757 + | `Swash_Kaf 758 + | `Syriac_Waw 759 + | `Tah 760 + | `Taw 761 + | `Teh_Marbuta 762 + | `Teh_Marbuta_Goal 763 + | `Teth 764 + | `Thin_Noon 765 + | `Thin_Yeh 766 + | `Vertical_Tail 767 + | `Waw 768 + | `Yeh 769 + | `Yeh_Barree 770 + | `Yeh_With_Tail 771 + | `Yudh 772 + | `Yudh_He 773 + | `Zain 774 + | `Zhain 775 + | `BAA 776 + | `FA 777 + | `HAA 778 + | `HA_GOAL 779 + | `HA 780 + | `CAF 781 + | `KNOTTED_HA 782 + | `RA 783 + | `SWASH_CAF 784 + | `HAMZAH_ON_HA_GOAL 785 + | `TAA_MARBUTAH 786 + | `YA_BARREE 787 + | `YA 788 + | `ALEF_MAQSURAH 789 + ] prop 790 + 791 + val joining_type : [ `U | `C | `T | `D | `L | `R ] prop 792 + val line_break : [ 793 + | `AI 794 + | `AK 795 + | `AL 796 + | `AP 797 + | `AS 798 + | `B2 799 + | `BA 800 + | `BB 801 + | `BK 802 + | `CB 803 + | `CJ 804 + | `CL 805 + | `CM 806 + | `CP 807 + | `CR 808 + | `EX 809 + | `GL 810 + | `H2 811 + | `H3 812 + | `HH 813 + | `HL 814 + | `HY 815 + | `ID 816 + | `IN 817 + | `IS 818 + | `JL 819 + | `JT 820 + | `JV 821 + | `LF 822 + | `NL 823 + | `NS 824 + | `NU 825 + | `OP 826 + | `PO 827 + | `PR 828 + | `QU 829 + | `RI 830 + | `SA 831 + | `SG 832 + | `SP 833 + | `SY 834 + | `VF 835 + | `VI 836 + | `WJ 837 + | `XX 838 + | `ZW 839 + | `EB 840 + | `EM 841 + | `ZWJ 842 + ] prop 843 + 844 + val logical_order_exception : bool prop 845 + val lowercase : bool prop 846 + val lowercase_mapping : [`Self | `Cps of cp list ] prop 847 + val math : bool prop 848 + val name : [`Pattern of string | `Name of string ] prop 849 + (** In the [`Pattern] case occurrences of the character ['#'] 850 + ([U+0023]) in the string must be replaced by the value of the code 851 + point as four to six uppercase hexadecimal digits (the minimal 852 + needed). E.g. the pattern ["CJK UNIFIED IDEOGRAPH-#"] associated 853 + to code point [U+3400] gives the name ["CJK UNIFIED IDEOGRAPH-3400"]. *) 854 + 855 + val modifier_combining_mark : bool prop 856 + 857 + val name_alias : 858 + (string * [`Abbreviation | `Alternate | `Control | `Correction | `Figment]) 859 + list prop 860 + 861 + val nfc_quick_check : [ `True | `False | `Maybe ] prop 862 + val nfd_quick_check : [ `True | `False | `Maybe ] prop 863 + val nfkc_quick_check : [ `True | `False | `Maybe ] prop 864 + val nfkc_casefold : [`Self | `Cps of cp list] prop 865 + val nfkc_simple_casefold : [ `Self | `Cps of cp list ] prop 866 + val nfkd_quick_check : [ `True | `False | `Maybe ] prop 867 + val noncharacter_code_point : bool prop 868 + val numeric_type : [ `None | `De | `Di | `Nu ] prop 869 + val numeric_value : 870 + [ `NaN | `Nums of [`Frac of int * int | `Num of int64 ] list] prop 871 + 872 + val other_alphabetic : bool prop 873 + val other_default_ignorable_code_point : bool prop 874 + val other_grapheme_extend : bool prop 875 + val other_id_continue : bool prop 876 + val other_id_start : bool prop 877 + val other_lowercase : bool prop 878 + val other_math : bool prop 879 + val other_uppercase : bool prop 880 + val pattern_syntax : bool prop 881 + val pattern_white_space : bool prop 882 + val prepended_concatenation_mark : bool prop 883 + val quotation_mark : bool prop 884 + val radical : bool prop 885 + val regional_indicator : bool prop 886 + 887 + type script = [ 888 + | `Adlm 889 + | `Aghb 890 + | `Ahom 891 + | `Arab 892 + | `Armi 893 + | `Armn 894 + | `Avst 895 + | `Bali 896 + | `Bamu 897 + | `Bass 898 + | `Batk 899 + | `Beng 900 + | `Berf 901 + | `Bhks 902 + | `Bopo 903 + | `Brah 904 + | `Brai 905 + | `Bugi 906 + | `Buhd 907 + | `Cakm 908 + | `Cans 909 + | `Cari 910 + | `Cham 911 + | `Cher 912 + | `Chrs 913 + | `Copt 914 + | `Cpmn 915 + | `Cprt 916 + | `Cyrl 917 + | `Deva 918 + | `Diak 919 + | `Dogr 920 + | `Dsrt 921 + | `Dupl 922 + | `Egyp 923 + | `Elba 924 + | `Elym 925 + | `Ethi 926 + | `Gara 927 + | `Geor 928 + | `Glag 929 + | `Gong 930 + | `Gonm 931 + | `Goth 932 + | `Gran 933 + | `Grek 934 + | `Gujr 935 + | `Gukh 936 + | `Guru 937 + | `Hang 938 + | `Hani 939 + | `Hano 940 + | `Hatr 941 + | `Hebr 942 + | `Hira 943 + | `Hluw 944 + | `Hmng 945 + | `Hmnp 946 + | `Hrkt 947 + | `Hung 948 + | `Ital 949 + | `Java 950 + | `Kali 951 + | `Kana 952 + | `Kawi 953 + | `Khar 954 + | `Khmr 955 + | `Khoj 956 + | `Knda 957 + | `Krai 958 + | `Kthi 959 + | `Kits 960 + | `Lana 961 + | `Laoo 962 + | `Latn 963 + | `Lepc 964 + | `Limb 965 + | `Lina 966 + | `Linb 967 + | `Lisu 968 + | `Lyci 969 + | `Lydi 970 + | `Mahj 971 + | `Maka 972 + | `Mand 973 + | `Mani 974 + | `Marc 975 + | `Medf 976 + | `Mend 977 + | `Merc 978 + | `Mero 979 + | `Mlym 980 + | `Modi 981 + | `Mong 982 + | `Mroo 983 + | `Mtei 984 + | `Mult 985 + | `Mymr 986 + | `Nagm 987 + | `Nand 988 + | `Narb 989 + | `Nbat 990 + | `Newa 991 + | `Nkoo 992 + | `Nshu 993 + | `Ogam 994 + | `Olck 995 + | `Onao 996 + | `Orkh 997 + | `Orya 998 + | `Osge 999 + | `Osma 1000 + | `Ougr 1001 + | `Palm 1002 + | `Pauc 1003 + | `Perm 1004 + | `Phag 1005 + | `Phli 1006 + | `Phlp 1007 + | `Phnx 1008 + | `Plrd 1009 + | `Prti 1010 + | `Qaai 1011 + | `Rjng 1012 + | `Rohg 1013 + | `Runr 1014 + | `Samr 1015 + | `Sarb 1016 + | `Saur 1017 + | `Sgnw 1018 + | `Shaw 1019 + | `Shrd 1020 + | `Sidd 1021 + | `Sidt 1022 + | `Sind 1023 + | `Sinh 1024 + | `Sogd 1025 + | `Sogo 1026 + | `Sora 1027 + | `Soyo 1028 + | `Sund 1029 + | `Sunu 1030 + | `Sylo 1031 + | `Syrc 1032 + | `Tagb 1033 + | `Takr 1034 + | `Tale 1035 + | `Talu 1036 + | `Taml 1037 + | `Tang 1038 + | `Tavt 1039 + | `Tayo 1040 + | `Telu 1041 + | `Tfng 1042 + | `Tglg 1043 + | `Thaa 1044 + | `Thai 1045 + | `Tibt 1046 + | `Tirh 1047 + | `Tnsa 1048 + | `Todr 1049 + | `Tols 1050 + | `Toto 1051 + | `Tutg 1052 + | `Ugar 1053 + | `Vaii 1054 + | `Vith 1055 + | `Wara 1056 + | `Wcho 1057 + | `Xpeo 1058 + | `Xsux 1059 + | `Yezi 1060 + | `Yiii 1061 + | `Zanb 1062 + | `Zinh 1063 + | `Zyyy 1064 + | `Zzzz 1065 + ] 1066 + 1067 + val script : script prop 1068 + val script_extensions : script list prop 1069 + 1070 + val sentence_break : [ 1071 + | `AT 1072 + | `CL 1073 + | `CR 1074 + | `EX 1075 + | `FO 1076 + | `LE 1077 + | `LF 1078 + | `LO 1079 + | `NU 1080 + | `SC 1081 + | `SE 1082 + | `SP 1083 + | `ST 1084 + | `UP 1085 + | `XX 1086 + ] prop 1087 + 1088 + val simple_case_folding : [ `Self | `Cp of cp ] prop 1089 + val simple_lowercase_mapping : [ `Self | `Cp of cp ] prop 1090 + val simple_titlecase_mapping : [ `Self | `Cp of cp ] prop 1091 + val simple_uppercase_mapping : [ `Self | `Cp of cp ] prop 1092 + val soft_dotted : bool prop 1093 + val sterm : bool prop 1094 + val terminal_punctuation : bool prop 1095 + val titlecase_mapping : [`Self | `Cps of cp list ] prop 1096 + val uax_42_element : [ `Reserved | `Noncharacter | `Surrogate | `Char ] prop 1097 + (** Not normative, artefact of [Uucd]. Corresponds to the 1098 + {{:http://www.unicode.org/reports/tr42/#w1aac13b9b1}XML element name} 1099 + that describes the code point. *) 1100 + 1101 + val unicode_1_name : string prop 1102 + val unified_ideograph : bool prop 1103 + val uppercase : bool prop 1104 + val uppercase_mapping : [`Self | `Cps of cp list ] prop 1105 + val variation_selector : bool prop 1106 + val vertical_orientation : [ `U | `R | `Tu | `Tr ] prop 1107 + val white_space : bool prop 1108 + val word_break : [ 1109 + | `CR 1110 + | `DQ 1111 + | `EB 1112 + | `EBG 1113 + | `EM 1114 + | `EX 1115 + | `Extend 1116 + | `FO 1117 + | `GAZ 1118 + | `HL 1119 + | `KA 1120 + | `LE 1121 + | `LF 1122 + | `MB 1123 + | `ML 1124 + | `MN 1125 + | `NL 1126 + | `NU 1127 + | `RI 1128 + | `SQ 1129 + | `WSegSpace 1130 + | `XX 1131 + | `ZWJ 1132 + ] prop 1133 + 1134 + val xid_continue : bool prop 1135 + val xid_start : bool prop 1136 + 1137 + (** {2:unihan Unihan properties} 1138 + 1139 + In alphabetic order. For now unihan properties are always 1140 + represented as strings. *) 1141 + 1142 + val kAccountingNumeric : string prop 1143 + val kAlternateHanYu : string prop 1144 + val kAlternateJEF : string prop 1145 + val kAlternateKangXi : string prop 1146 + val kAlternateMorohashi : string prop 1147 + val kAlternateTotalStrokes : string prop 1148 + val kBigFive : string prop 1149 + val kCCCII : string prop 1150 + val kCNS1986 : string prop 1151 + val kCNS1992 : string prop 1152 + val kCangjie : string prop 1153 + val kCantonese : string prop 1154 + val kCheungBauer : string prop 1155 + val kCheungBauerIndex : string prop 1156 + val kCihaiT : string prop 1157 + val kCompatibilityVariant : string prop 1158 + val kCowles : string prop 1159 + val kDaeJaweon : string prop 1160 + val kDefinition : string prop 1161 + val kEACC : string prop 1162 + val kFanqie : string prop 1163 + val kFenn : string prop 1164 + val kFennIndex : string prop 1165 + val kFourCornerCode : string prop 1166 + val kFrequency : string prop 1167 + val kGB0 : string prop 1168 + val kGB1 : string prop 1169 + val kGB3 : string prop 1170 + val kGB5 : string prop 1171 + val kGB8 : string prop 1172 + val kGSR : string prop 1173 + val kGradeLevel : string prop 1174 + val kHDZRadBreak : string prop 1175 + val kHKGlyph : string prop 1176 + val kHKSCS : string prop 1177 + val kHanYu : string prop 1178 + val kHangul : string prop 1179 + val kHanyuPinlu : string prop 1180 + val kHanyuPinyin : string prop 1181 + val kIBMJapan : string prop 1182 + val kIICore : string prop 1183 + val kIRGDaeJaweon : string prop 1184 + val kIRGDaiKanwaZiten : string prop 1185 + val kIRGHanyuDaZidian : string prop 1186 + val kIRGKangXi : string prop 1187 + val kIRG_GSource : string prop 1188 + val kIRG_HSource : string prop 1189 + val kIRG_JSource : string prop 1190 + val kIRG_KPSource : string prop 1191 + val kIRG_KSource : string prop 1192 + val kIRG_MSource : string prop 1193 + val kIRG_SSource : string prop 1194 + val kIRG_TSource : string prop 1195 + val kIRG_USource : string prop 1196 + val kIRG_UKSource : string prop 1197 + val kIRG_VSource : string prop 1198 + val kJapanese : string prop 1199 + val kJapaneseKun : string prop 1200 + val kJapaneseOn : string prop 1201 + val kJHJ : string prop 1202 + val kJIS0213 : string prop 1203 + val kJinmeiyoKanji : string prop 1204 + val kJis0 : string prop 1205 + val kJis1 : string prop 1206 + val kJoyoKanji : string prop 1207 + val kKPS0 : string prop 1208 + val kKPS1 : string prop 1209 + val kKSC0 : string prop 1210 + val kKSC1 : string prop 1211 + val kKangXi : string prop 1212 + val kKarlgren : string prop 1213 + val kKorean : string prop 1214 + val kKoreanEducationHanja : string prop 1215 + val kKoreanName : string prop 1216 + val kLau : string prop 1217 + val kMainlandTelegraph : string prop 1218 + val kMandarin : string prop 1219 + val kMatthews : string prop 1220 + val kMeyerWempe : string prop 1221 + val kMojiJoho : string prop 1222 + val kMorohashi : string prop 1223 + val kNelson : string prop 1224 + val kNSHU_DubenSrc : string prop 1225 + val kNSHU_Reading : string prop 1226 + val kOtherNumeric : string prop 1227 + val kPhonetic : string prop 1228 + val kPrimaryNumeric : string prop 1229 + val kPseudoGB1 : string prop 1230 + val kRSAdobe_Japan1_6 : string prop 1231 + val kRSJapanese : string prop 1232 + val kRSKanWa : string prop 1233 + val kRSKangXi : string prop 1234 + val kRSKorean : string prop 1235 + val kRSMerged : string prop 1236 + val kRSUnicode : string prop 1237 + val kSBGY : string prop 1238 + val kSemanticVariant : string prop 1239 + val kSimplifiedVariant : string prop 1240 + val kSMSZD2003Index : string prop 1241 + val kSMSZD2003Readings : string prop 1242 + val kSpecializedSemanticVariant : string prop 1243 + val kSpoofingVariant : string prop 1244 + val kStrange : string prop 1245 + val kUnihanCore2020 : string prop 1246 + val kTGH : string prop 1247 + val kTGHZ2013 : string prop 1248 + val kTGT_MergedSrc : string prop 1249 + val kTGT_RSUnicode : string prop 1250 + val kTaiwanTelegraph : string prop 1251 + val kTang : string prop 1252 + val kTayNumeric : string prop 1253 + val kTotalStrokes : string prop 1254 + val kTraditionalVariant : string prop 1255 + val kVietnamese : string prop 1256 + val kVietnameseNumeric : string prop 1257 + val kWubi : string prop 1258 + val kXHC1983 : string prop 1259 + val kZhuang : string prop 1260 + val kXerox : string prop 1261 + val kZhuangNumeric : string prop 1262 + val kZVariant : string prop 1263 + 1264 + (** {1:unikemet Unikemet properties} *) 1265 + 1266 + val kEH_Cat : string prop 1267 + val kEH_Core : string prop 1268 + val kEH_Desc : string prop 1269 + val kEH_Func : string prop 1270 + val kEH_FVal : string prop 1271 + val kEH_UniK : string prop 1272 + val kEH_JSesh : string prop 1273 + val kEH_HG : string prop 1274 + val kEH_IFAO : string prop 1275 + val kEH_NoMirror : bool prop 1276 + val kEH_NoRotate : bool prop 1277 + val kEH_AltSeq : string prop 1278 + 1279 + (** {1:db Unicode character databases} *) 1280 + 1281 + type block = (cp * cp) * string 1282 + (** The type for blocks. Code point range, name of the block. *) 1283 + 1284 + type named_sequence = string * cp list 1285 + (** The type for named sequences. Sequence name, code point sequence. *) 1286 + 1287 + type standardized_variant = 1288 + cp list * string * [ `Isolate | `Initial | `Medial | `Final ] list 1289 + (** The type for standarized variants. Code point sequence, 1290 + description, when. *) 1291 + 1292 + type cjk_radical = string * cp * cp 1293 + (** The type for CJK radicals. Radical number, CJK radical character, 1294 + CJK unified ideograph. *) 1295 + 1296 + type do_not_emit = { instead_of : cp list; use : cp list; because : string; } 1297 + (** The type for do not emit character sequences. *) 1298 + 1299 + type t = 1300 + { description : string; 1301 + repertoire : props Cpmap.t; 1302 + blocks : block list; 1303 + named_sequences : named_sequence list; 1304 + provisional_named_sequences : named_sequence list; 1305 + standardized_variants : standardized_variant list; 1306 + cjk_radicals : cjk_radical list; 1307 + do_not_emit : do_not_emit list 1308 + } 1309 + (** The type for Unicode character databases. 1310 + 1311 + {b Note.} Absence of an optional top-level field in the database 1312 + is denoted by the neutral element of its type (empty string, empty 1313 + list, {!Cpmap.empty}). This means that the module doesn't 1314 + distinguish between absence of a field and presence of the field 1315 + with empty data (but incurs no problems in this context). *) 1316 + 1317 + val cp_prop : t -> cp -> 'a prop -> 'a option 1318 + (** [cp_prop ucd cp p] is the property [p] of the code point [cp] 1319 + in [db]'s repertoire, if [p] is in the repertoire and the property 1320 + exists for [cp]. *) 1321 + 1322 + (** {1:decoder Decode} *) 1323 + 1324 + type src = [ `Channel of in_channel | `String of string ] 1325 + (** The type for input sources. *) 1326 + 1327 + type decoder 1328 + (** The type for Unicode character database decoders. *) 1329 + 1330 + val decoder : [< src] -> decoder 1331 + (** [decoder src] is a decoder that inputs from [src]. *) 1332 + 1333 + val decode : decoder -> [`Ok of t | `Error of string ] 1334 + (** [decode d] decodes a database from [d] or returns an error. *) 1335 + 1336 + val decoded_range : decoder -> (int * int) * (int * int) 1337 + (** [decoded_range d] is the range of characters spanning the [`Error] 1338 + decoded by [d]. A pair of line and column numbers respectively one and 1339 + zero based. *) 1340 + 1341 + (** {1:basics Basics} 1342 + 1343 + The database and subsets of it for Unicode %%UNICODE_VERSION%% are 1344 + available 1345 + {{:http://www.unicode.org/Public/%%UNICODE_VERSION%%/ucdxml/}here}. 1346 + Databases with groups should be preferred, they maximize value 1347 + sharing and improve parsing performance. 1348 + 1349 + A database is decoded as follows: 1350 + {[ 1351 + let ucd_or_die inf = try 1352 + let ic = if inf = "-" then stdin else open_in inf in 1353 + let d = Uucd.decoder (`Channel ic) in 1354 + match Uucd.decode d with 1355 + | `Ok db -> db 1356 + | `Error e -> 1357 + let (l0, c0), (l1, c1) = Uucd.decoded_range d in 1358 + Printf.eprintf "%s:%d.%d-%d.%d: %s\n%!" inf l0 c0 l1 c1 e; 1359 + exit 1 1360 + with Sys_error e -> Printf.eprintf "%s\n%!" e; exit 1 1361 + 1362 + let ucd = ucd_or_die "/tmp/ucd.all.grouped.xml" 1363 + ]} 1364 + The convenience function {!cp_prop} can be used to query 1365 + the property of a given code point. For example the 1366 + {{!general_category}general category} of [U+1F42B] 1367 + is given by: 1368 + {[ 1369 + let u_1F42B_gc = Uucd.cp_prop ucd 0x1F42B Uucd.general_category 1370 + ]} 1371 + *)
+1
vendor/opam/uucd/src/uucd.mllib
··· 1 + Uucd
+32
vendor/opam/uucd/test/example.ml
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2012 The uucd programmers. All rights reserved. 3 + SPDX-License-Identifier: CC0-1.0 4 + ---------------------------------------------------------------------------*) 5 + 6 + let ucd_or_die inf = 7 + try 8 + let ic = if inf = "-" then stdin else open_in inf in 9 + let d = Uucd.decoder (`Channel ic) in 10 + match Uucd.decode d with 11 + | `Ok db -> db 12 + | `Error e -> 13 + let (l0, c0), (l1, c1) = Uucd.decoded_range d in 14 + Printf.eprintf "%s:%d.%d-%d.%d: %s\n%!" inf l0 c0 l1 c1 e; 15 + exit 1 16 + with Sys_error e -> Printf.eprintf "%s\n%!" e; exit 1 17 + 18 + let ucd_from_marshaled : string -> Uucd.t = 19 + fun inf -> Marshal.from_channel (open_in inf) 20 + 21 + let main () = 22 + let usage = "test [ucd.xml]" in 23 + let inf = ref None in 24 + let anon_fun file = match !inf with 25 + | Some _ -> raise (Arg.Bad ("Don't now what to do with " ^ file)) 26 + | None -> inf := Some file 27 + in 28 + Arg.parse [] anon_fun usage; 29 + let inf = Option.value ~default:"-" !inf in 30 + Marshal.to_channel stdout (ucd_or_die inf) [] 31 + 32 + let () = if !Sys.interactive then () else main ()
+30
vendor/opam/uucd/test/test_uucd.ml
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2024 The uucd programmers. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + open B0_std 7 + open B0_testing 8 + 9 + let test_decode = 10 + Test.test "Uucd.decode" @@ fun file -> 11 + Test.noraise ~__POS__ @@ fun () -> 12 + let cwd = Os.Dir.cwd () |> Result.error_to_failure in 13 + let inf = Fpath.(cwd // v "test/ucd.xml") in 14 + let inf = Fpath.to_string inf in 15 + try 16 + In_channel.with_open_bin inf @@ fun ic -> 17 + let d = Uucd.decoder (`Channel ic) in 18 + match Uucd.decode d with 19 + | `Ok db -> 20 + let props = Uucd.Cpmap.find 0x0020 db.repertoire in 21 + Test.(option T.any) (Uucd.find props Uucd.general_category) (Some `Zs) 22 + ~__POS__ 23 + | `Error e -> 24 + let (l0, c0), (l1, c1) = Uucd.decoded_range d in 25 + Test.failstop ~__POS__ "%s:%d.%d-%d.%d: %s\n%!" inf l0 c0 l1 c1 e 26 + with 27 + | Sys_error e -> Test.failstop "%s" e ~__POS__ 28 + 29 + let main () = Test.main @@ fun () -> Test.autorun () 30 + let () = if !Sys.interactive then () else exit (main ())