Suite of AT Protocol TypeScript libraries built on web standards
1// counts the number of bytes in a utf8 string
2export const utf8Len = (str: string): number => {
3 return new TextEncoder().encode(str).byteLength;
4};
5
6// counts the number of graphemes (user-displayed characters) in a string
7// Using Intl.Segmenter which is supported in Deno and modern browsers
8export const graphemeLen = (str: string): number => {
9 if (typeof Intl !== "undefined" && "Segmenter" in Intl) {
10 const segmenter = new Intl.Segmenter(undefined, {
11 granularity: "grapheme",
12 });
13 return Array.from(segmenter.segment(str)).length;
14 }
15
16 // Fallback for environments without Intl.Segmenter
17 // This is a simplified approach that handles basic cases
18 return Array.from(str).length;
19};
20
21export const utf8ToB64Url = (utf8: string): string => {
22 const encoder = new TextEncoder();
23 const bytes = encoder.encode(utf8);
24 return btoa(String.fromCharCode(...bytes))
25 .replace(/\+/g, "-")
26 .replace(/\//g, "_")
27 .replace(/=/g, "");
28};
29
30export const b64UrlToUtf8 = (b64: string): string => {
31 // Convert base64url to base64
32 const base64 = b64.replace(/-/g, "+").replace(/_/g, "/");
33 // Add padding if needed
34 const padded = base64 + "=".repeat((4 - (base64.length % 4)) % 4);
35
36 const binaryString = atob(padded);
37 const bytes = new Uint8Array(binaryString.length);
38 for (let i = 0; i < binaryString.length; i++) {
39 bytes[i] = binaryString.charCodeAt(i);
40 }
41
42 const decoder = new TextDecoder();
43 return decoder.decode(bytes);
44};
45
46export const parseLanguage = (langTag: string): LanguageTag | null => {
47 const parsed = langTag.match(bcp47Regexp);
48 if (!parsed?.groups) return null;
49 const parts = parsed.groups;
50 const result: LanguageTag = {};
51
52 if (parts.grandfathered) result.grandfathered = parts.grandfathered;
53 if (parts.language) result.language = parts.language;
54 if (parts.extlang) result.extlang = parts.extlang;
55 if (parts.script) result.script = parts.script;
56 if (parts.region) result.region = parts.region;
57 if (parts.variant) result.variant = parts.variant;
58 if (parts.extension) result.extension = parts.extension;
59 if (parts.privateUseA || parts.privateUseB) {
60 result.privateUse = parts.privateUseA || parts.privateUseB;
61 }
62
63 return result;
64};
65
66export const validateLanguage = (langTag: string): boolean => {
67 return bcp47Regexp.test(langTag);
68};
69
70export type LanguageTag = {
71 grandfathered?: string;
72 language?: string;
73 extlang?: string;
74 script?: string;
75 region?: string;
76 variant?: string;
77 extension?: string;
78 privateUse?: string;
79};
80
81// Validates well-formed BCP 47 syntax: https://www.rfc-editor.org/rfc/rfc5646.html#section-2.1
82const bcp47Regexp =
83 /^((?<grandfathered>(en-GB-oed|i-ami|i-bnn|i-default|i-enochian|i-hak|i-klingon|i-lux|i-mingo|i-navajo|i-pwn|i-tao|i-tay|i-tsu|sgn-BE-FR|sgn-BE-NL|sgn-CH-DE)|(art-lojban|cel-gaulish|no-bok|no-nyn|zh-guoyu|zh-hakka|zh-min|zh-min-nan|zh-xiang))|((?<language>([A-Za-z]{2,3}(-(?<extlang>[A-Za-z]{3}(-[A-Za-z]{3}){0,2}))?)|[A-Za-z]{4}|[A-Za-z]{5,8})(-(?<script>[A-Za-z]{4}))?(-(?<region>[A-Za-z]{2}|[0-9]{3}))?(-(?<variant>[A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3}))*(-(?<extension>[0-9A-WY-Za-wy-z](-[A-Za-z0-9]{2,8})+))*(-(?<privateUseA>x(-[A-Za-z0-9]{1,8})+))?)|(?<privateUseB>x(-[A-Za-z0-9]{1,8})+))$/;