MIRROR: javascript for 馃悳's, a tiny runtime with big ambitions
1#include <compat.h> // IWYU pragma: keep
2
3#include <stdbool.h>
4#include <math.h>
5#include <stdio.h>
6#include <string.h>
7#include <time.h>
8
9#include "ant.h"
10#include "errors.h"
11#include "runtime.h"
12#include "internal.h"
13#include "descriptors.h"
14
15#include "modules/intl.h"
16#include "modules/symbol.h"
17
18static ant_value_t g_intl_collator_proto = 0;
19static ant_value_t g_intl_numberformat_proto = 0;
20static ant_value_t g_intl_datetimeformat_proto = 0;
21static ant_value_t g_intl_segmenter_proto = 0;
22
23typedef struct {
24 int hour12;
25 int minute;
26 int second;
27 const char *day_period;
28} intl_dtf_fields_t;
29
30static ant_value_t intl_create_instance(ant_t *js, ant_value_t fallback_proto) {
31 ant_value_t obj = js_mkobj(js);
32 ant_value_t proto = js_instance_proto_from_new_target(js, fallback_proto);
33 if (is_object_type(proto)) js_set_proto_init(obj, proto);
34 return obj;
35}
36
37// TODO: docs/exec-plans/tech-debt.md
38static inline bool intl_ascii_is_alpha(char c) {
39 return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
40}
41
42static inline bool intl_ascii_is_digit(char c) {
43 return c >= '0' && c <= '9';
44}
45
46static inline bool intl_ascii_is_alnum(char c) {
47 return intl_ascii_is_alpha(c) || intl_ascii_is_digit(c);
48}
49
50static inline char intl_ascii_lower(char c) {
51 return (c >= 'A' && c <= 'Z') ? (char)(c + ('a' - 'A')) : c;
52}
53
54static bool intl_ascii_all(const char *s, size_t len, bool (*pred)(char)) {
55 if (!s || len == 0) return false;
56 for (size_t i = 0; i < len; i++) if (!pred(s[i])) return false;
57 return true;
58}
59
60static bool intl_is_valid_language_tag(const char *tag, size_t len) {
61 if (!tag || len == 0) return false;
62
63 size_t start = 0;
64 size_t end = 0;
65 while (end < len && tag[end] != '-') end++;
66
67 size_t first_len = end - start;
68 if (first_len < 2 || first_len > 8) return false;
69 if (!intl_ascii_all(tag, first_len, intl_ascii_is_alpha)) return false;
70
71 bool need_extension_subtag = false;
72 bool in_private_use = false;
73 bool saw_private_use_subtag = false;
74
75 while (end < len) {
76 start = end + 1;
77 if (start >= len) return false;
78
79 end = start;
80 while (end < len && tag[end] != '-') end++;
81
82 size_t subtag_len = end - start;
83 if (subtag_len == 0 || subtag_len > 8) return false;
84
85 const char *subtag = tag + start;
86 if (!intl_ascii_all(subtag, subtag_len, intl_ascii_is_alnum)) return false;
87
88 if (in_private_use) {
89 saw_private_use_subtag = true;
90 continue;
91 }
92
93 if (need_extension_subtag) {
94 if (subtag_len < 2) return false;
95 need_extension_subtag = false;
96 continue;
97 }
98
99 if (subtag_len == 1) {
100 char singleton = intl_ascii_lower(subtag[0]);
101 if (singleton == 'x') in_private_use = true;
102 else need_extension_subtag = true;
103 }
104 }
105
106 if (need_extension_subtag) return false;
107 if (in_private_use && !saw_private_use_subtag) return false;
108
109 return true;
110}
111
112static ant_value_t intl_resolve_locale(ant_t *js, ant_value_t input) {
113 if (vtype(input) == T_ARR) input = js_get(js, input, "0");
114 if (vtype(input) == T_UNDEF) return js_mkstr(js, "en-US", 5);
115
116 ant_value_t locale = js_tostring_val(js, input);
117 if (is_err(locale)) return locale;
118
119 size_t len = 0;
120 const char *tag = js_getstr(js, locale, &len);
121 if (!intl_is_valid_language_tag(tag, len))
122 return js_mkerr_typed(js, JS_ERR_RANGE, "Invalid language tag");
123
124 return locale;
125}
126
127static ant_value_t intl_get_option_string(ant_t *js, ant_value_t options, const char *key, const char *fallback) {
128 if (vtype(options) != T_OBJ) return js_mkstr(js, fallback, strlen(fallback));
129
130 ant_value_t value = js_get(js, options, key);
131 if (vtype(value) == T_UNDEF) return js_mkstr(js, fallback, strlen(fallback));
132
133 ant_value_t str = js_tostring_val(js, value);
134 if (is_err(str)) return str;
135
136 size_t len = 0;
137 const char *ptr = js_getstr(js, str, &len);
138 if (!ptr || len == 0) return js_mkstr(js, fallback, strlen(fallback));
139
140 return str;
141}
142
143static ant_value_t intl_collator_compare(ant_t *js, ant_value_t *args, int nargs) {
144 ant_value_t left = js_tostring_val(js, nargs > 0 ? args[0] : js_mkstr(js, "", 0));
145 if (is_err(left)) return left;
146
147 ant_value_t right = js_tostring_val(js, nargs > 1 ? args[1] : js_mkstr(js, "", 0));
148 if (is_err(right)) return right;
149
150 const char *left_str = js_getstr(js, left, NULL);
151 const char *right_str = js_getstr(js, right, NULL);
152
153 int result = strcoll(left_str ? left_str : "", right_str ? right_str : "");
154 if (result < 0) return js_mknum(-1);
155 if (result > 0) return js_mknum(1);
156
157 return js_mknum(0);
158}
159
160static ant_value_t intl_collator_resolved_options(ant_t *js, ant_value_t *args, int nargs) {
161 ant_value_t obj = js_mkobj(js);
162 ant_value_t this_obj = js_getthis(js);
163
164 ant_value_t locale = is_object_type(this_obj)
165 ? js_get(js, this_obj, "locale")
166 : js_mkundef();
167
168 if (vtype(locale) != T_STR) locale = js_mkstr(js, "en-US", 5);
169 js_set(js, obj, "locale", locale);
170
171 return obj;
172}
173
174static ant_value_t intl_numberformat_format(ant_t *js, ant_value_t *args, int nargs) {
175 double number = nargs > 0 ? js_to_number(js, args[0]) : 0.0;
176 ant_value_t raw_val = js_tostring_val(js, js_mknum(number));
177 if (is_err(raw_val)) return raw_val;
178
179 size_t raw_len = 0;
180 const char *raw = js_getstr(js, raw_val, &raw_len);
181 if (!raw || raw_len == 0) return js_mkstr(js, "0", 1);
182
183 if (
184 !isfinite(number) ||
185 memchr(raw, 'e', raw_len) ||
186 memchr(raw, 'E', raw_len)
187 ) return raw_val;
188
189 const char *dot = memchr(raw, '.', raw_len);
190 size_t int_len = dot ? (size_t)(dot - raw) : raw_len;
191 size_t start = raw[0] == '-' ? 1 : 0;
192 size_t frac_len = dot ? (raw_len - int_len) : 0;
193
194 char buf[128];
195 size_t pos = 0;
196 if (start) buf[pos++] = '-';
197
198 for (size_t i = start; i < int_len; i++) {
199 buf[pos++] = raw[i];
200 size_t remaining = int_len - 1 - i;
201 if (remaining > 0 && remaining % 3 == 0) buf[pos++] = ',';
202 }
203
204 if (dot && frac_len > 0) {
205 memcpy(buf + pos, dot, frac_len);
206 pos += frac_len;
207 }
208
209 buf[pos] = '\0';
210 return js_mkstr(js, buf, pos);
211}
212
213static ant_value_t intl_numberformat_resolved_options(ant_t *js, ant_value_t *args, int nargs) {
214 return intl_collator_resolved_options(js, args, nargs);
215}
216
217static void intl_dtf_extract_fields(ant_t *js, ant_value_t *args, int nargs, intl_dtf_fields_t *out) {
218 time_t t = time(NULL);
219 if (nargs >= 1) t = (time_t)(js_to_number(js, args[0]) / 1000.0);
220
221 struct tm local;
222#ifdef _WIN32
223 localtime_s(&local, &t);
224#else
225 localtime_r(&t, &local);
226#endif
227
228 out->hour12 = local.tm_hour % 12;
229 if (out->hour12 == 0) out->hour12 = 12;
230 out->minute = local.tm_min;
231 out->second = local.tm_sec;
232 out->day_period = local.tm_hour < 12 ? "AM" : "PM";
233}
234
235static ant_value_t intl_dtf_format(ant_t *js, ant_value_t *args, int nargs) {
236 intl_dtf_fields_t fields;
237 intl_dtf_extract_fields(js, args, nargs, &fields);
238
239 char buf[64];
240 snprintf(
241 buf, sizeof(buf), "%d:%02d:%02d %s",
242 fields.hour12, fields.minute, fields.second, fields.day_period
243 );
244
245 return js_mkstr(js, buf, strlen(buf));
246}
247
248static ant_value_t intl_dtf_resolved_options(ant_t *js, ant_value_t *args, int nargs) {
249 ant_value_t obj = js_mkobj(js);
250 ant_value_t this_obj = js_getthis(js);
251
252 ant_value_t locale = is_object_type(this_obj) ? js_get(js, this_obj, "locale") : js_mkundef();
253 ant_value_t time_zone = is_object_type(this_obj) ? js_get(js, this_obj, "timeZone") : js_mkundef();
254
255 if (vtype(locale) != T_STR) locale = js_mkstr(js, "en-US", 5);
256 if (vtype(time_zone) != T_STR) time_zone = js_mkstr(js, "UTC", 3);
257
258 js_set(js, obj, "locale", locale);
259 js_set(js, obj, "timeZone", time_zone);
260
261 return obj;
262}
263
264static ant_value_t intl_dtf_make_part(ant_t *js, const char *type, const char *value) {
265 ant_value_t obj = js_mkobj(js);
266 js_set(js, obj, "type", js_mkstr(js, type, strlen(type)));
267 js_set(js, obj, "value", js_mkstr(js, value, strlen(value)));
268 return obj;
269}
270
271static ant_value_t intl_dtf_format_to_parts(ant_t *js, ant_value_t *args, int nargs) {
272 intl_dtf_fields_t fields;
273 intl_dtf_extract_fields(js, args, nargs, &fields);
274
275 char hour[8];
276 char minute[8];
277 char second[8];
278
279 snprintf(hour, sizeof(hour), "%d", fields.hour12);
280 snprintf(minute, sizeof(minute), "%02d", fields.minute);
281 snprintf(second, sizeof(second), "%02d", fields.second);
282
283 ant_value_t parts = js_mkarr(js);
284 js_arr_push(js, parts, intl_dtf_make_part(js, "hour", hour));
285 js_arr_push(js, parts, intl_dtf_make_part(js, "literal", ":"));
286 js_arr_push(js, parts, intl_dtf_make_part(js, "minute", minute));
287 js_arr_push(js, parts, intl_dtf_make_part(js, "literal", ":"));
288 js_arr_push(js, parts, intl_dtf_make_part(js, "second", second));
289 js_arr_push(js, parts, intl_dtf_make_part(js, "literal", " "));
290 js_arr_push(js, parts, intl_dtf_make_part(js, "dayPeriod", fields.day_period));
291
292 return parts;
293}
294
295static size_t intl_utf8_segment_len(const char *input, size_t remaining) {
296 if (remaining == 0) return 0;
297
298 const unsigned char *s = (const unsigned char *)input;
299 unsigned char c = s[0];
300 size_t len = 1;
301
302 if ((c & 0x80) == 0) return 1;
303 if ((c & 0xe0) == 0xc0) len = 2;
304 else if ((c & 0xf0) == 0xe0) len = 3;
305 else if ((c & 0xf8) == 0xf0) len = 4;
306
307 if (len > remaining) return 1;
308 for (size_t i = 1; i < len; i++) if ((s[i] & 0xc0) != 0x80) return 1;
309
310 return len;
311}
312
313static bool intl_ascii_is_word_byte(const char *segment, size_t len) {
314 if (len != 1) return true;
315
316 unsigned char c = (unsigned char)segment[0];
317 return
318 (c >= '0' && c <= '9') ||
319 (c >= 'A' && c <= 'Z') ||
320 (c >= 'a' && c <= 'z') ||
321 c == '_';
322}
323
324static const char *intl_segmenter_granularity(ant_t *js, ant_value_t segmenter, size_t *len) {
325 ant_value_t granularity = js_get(js, segmenter, "granularity");
326 if (vtype(granularity) != T_STR) {
327 if (len) *len = 8;
328 return "grapheme";
329 }
330
331 return js_getstr(js, granularity, len);
332}
333
334static ant_value_t intl_segmenter_segment(ant_t *js, ant_value_t *args, int nargs) {
335 ant_value_t input = nargs > 0 ? js_tostring_val(js, args[0]) : js_mkstr(js, "", 0);
336 if (is_err(input)) return input;
337
338 size_t input_len = 0;
339 char *input_str = js_getstr(js, input, &input_len);
340 ant_value_t segments = js_mkarr(js);
341
342 ant_value_t this_obj = js_getthis(js);
343 size_t granularity_len = 0;
344 const char *granularity = intl_segmenter_granularity(js, this_obj, &granularity_len);
345 bool word_granularity = granularity_len == 4 && memcmp(granularity, "word", 4) == 0;
346
347 for (size_t offset = 0; offset < input_len;) {
348 size_t segment_len = intl_utf8_segment_len(input_str + offset, input_len - offset);
349 ant_value_t record = js_mkobj(js);
350
351 js_set(js, record, "segment", js_mkstr(js, input_str + offset, segment_len));
352 js_set(js, record, "index", js_mknum((double)offset));
353 js_set(js, record, "input", input);
354
355 if (word_granularity) js_set(
356 js, record, "isWordLike",
357 js_bool(intl_ascii_is_word_byte(input_str + offset, segment_len))
358 );
359
360 js_arr_push(js, segments, record);
361 offset += segment_len;
362 }
363
364 return segments;
365}
366
367static ant_value_t intl_segmenter_resolved_options(ant_t *js, ant_value_t *args, int nargs) {
368 ant_value_t obj = js_mkobj(js);
369 ant_value_t this_obj = js_getthis(js);
370
371 size_t granularity_len = 0;
372 const char *granularity = intl_segmenter_granularity(js, this_obj, &granularity_len);
373
374 ant_value_t locale = is_object_type(this_obj) ? js_get(js, this_obj, "locale") : js_mkundef();
375 if (vtype(locale) != T_STR) locale = js_mkstr(js, "en-US", 5);
376
377 js_set(js, obj, "locale", locale);
378 js_set(js, obj, "granularity", js_mkstr(js, granularity, granularity_len));
379
380 return obj;
381}
382
383static ant_value_t intl_collator_constructor(ant_t *js, ant_value_t *args, int nargs) {
384 ant_value_t locale = intl_resolve_locale(js, nargs > 0 ? args[0] : js_mkundef());
385 if (is_err(locale)) return locale;
386
387 ant_value_t obj = intl_create_instance(js, g_intl_collator_proto);
388 js_set(js, obj, "locale", locale);
389
390 return obj;
391}
392
393static ant_value_t intl_numberformat_constructor(ant_t *js, ant_value_t *args, int nargs) {
394 ant_value_t locale = intl_resolve_locale(js, nargs > 0 ? args[0] : js_mkundef());
395 if (is_err(locale)) return locale;
396
397 ant_value_t obj = intl_create_instance(js, g_intl_numberformat_proto);
398 js_set(js, obj, "locale", locale);
399
400 return obj;
401}
402
403static ant_value_t intl_dtf_constructor(ant_t *js, ant_value_t *args, int nargs) {
404 ant_value_t locale = intl_resolve_locale(js, nargs > 0 ? args[0] : js_mkundef());
405 if (is_err(locale)) return locale;
406
407 ant_value_t time_zone = intl_get_option_string(
408 js, nargs > 1 ? args[1] : js_mkundef(),
409 "timeZone", "UTC"
410 );
411 if (is_err(time_zone)) return time_zone;
412
413 ant_value_t obj = intl_create_instance(js, g_intl_datetimeformat_proto);
414 js_set(js, obj, "locale", locale);
415 js_set(js, obj, "timeZone", time_zone);
416
417 return obj;
418}
419
420static ant_value_t intl_segmenter_constructor(ant_t *js, ant_value_t *args, int nargs) {
421 ant_value_t locale = intl_resolve_locale(js, nargs > 0 ? args[0] : js_mkundef());
422 if (is_err(locale)) return locale;
423
424 ant_value_t granularity = intl_get_option_string(
425 js, nargs > 1 ? args[1] : js_mkundef(),
426 "granularity", "grapheme"
427 );
428 if (is_err(granularity)) return granularity;
429
430 ant_value_t obj = intl_create_instance(js, g_intl_segmenter_proto);
431 js_set(js, obj, "locale", locale);
432 js_set(js, obj, "granularity", granularity);
433
434 return obj;
435}
436
437void init_intl_module(void) {
438 ant_t *js = rt->js;
439
440 ant_value_t global = js_glob(js);
441 ant_value_t intl = js_mkobj(js);
442 ant_value_t object_proto = js->sym.object_proto;
443
444 if (is_object_type(object_proto)) js_set_proto_init(intl, object_proto);
445 js_set_sym(js, intl, get_toStringTag_sym(), js_mkstr(js, "Intl", 4));
446
447 g_intl_collator_proto = js_mkobj(js);
448 js_set(js, g_intl_collator_proto, "compare", js_mkfun(intl_collator_compare));
449 js_set(js, g_intl_collator_proto, "resolvedOptions", js_mkfun(intl_collator_resolved_options));
450 js_set_sym(js, g_intl_collator_proto, get_toStringTag_sym(), js_mkstr(js, "Intl.Collator", 13));
451 ant_value_t collator_ctor = js_make_ctor(js, intl_collator_constructor, g_intl_collator_proto, "Collator", 8);
452 js_set(js, intl, "Collator", collator_ctor);
453
454 g_intl_numberformat_proto = js_mkobj(js);
455 js_set(js, g_intl_numberformat_proto, "format", js_mkfun(intl_numberformat_format));
456 js_set(js, g_intl_numberformat_proto, "resolvedOptions", js_mkfun(intl_numberformat_resolved_options));
457 js_set_sym(js, g_intl_numberformat_proto, get_toStringTag_sym(), js_mkstr(js, "Intl.NumberFormat", 17));
458 ant_value_t numberformat_ctor = js_make_ctor(js, intl_numberformat_constructor, g_intl_numberformat_proto, "NumberFormat", 12);
459 js_set(js, intl, "NumberFormat", numberformat_ctor);
460
461 g_intl_datetimeformat_proto = js_mkobj(js);
462 js_set(js, g_intl_datetimeformat_proto, "format", js_mkfun(intl_dtf_format));
463 js_set(js, g_intl_datetimeformat_proto, "resolvedOptions", js_mkfun(intl_dtf_resolved_options));
464 js_set(js, g_intl_datetimeformat_proto, "formatToParts", js_mkfun(intl_dtf_format_to_parts));
465 js_set_sym(js, g_intl_datetimeformat_proto, get_toStringTag_sym(), js_mkstr(js, "Intl.DateTimeFormat", 19));
466 ant_value_t dtf_ctor = js_make_ctor(js, intl_dtf_constructor, g_intl_datetimeformat_proto, "DateTimeFormat", 14);
467 js_set(js, intl, "DateTimeFormat", dtf_ctor);
468
469 g_intl_segmenter_proto = js_mkobj(js);
470 js_set(js, g_intl_segmenter_proto, "segment", js_mkfun(intl_segmenter_segment));
471 js_set(js, g_intl_segmenter_proto, "resolvedOptions", js_mkfun(intl_segmenter_resolved_options));
472 js_set_sym(js, g_intl_segmenter_proto, get_toStringTag_sym(), js_mkstr(js, "Intl.Segmenter", 14));
473 ant_value_t segmenter_ctor = js_make_ctor(js, intl_segmenter_constructor, g_intl_segmenter_proto, "Segmenter", 9);
474 js_set(js, intl, "Segmenter", segmenter_ctor);
475
476 js_set(js, global, "Intl", intl);
477 js_set_descriptor(js, global, "Intl", 4, JS_DESC_W | JS_DESC_C);
478}