MIRROR: javascript for 馃悳's, a tiny runtime with big ambitions
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at mir/inline-method 2164 lines 78 kB view raw
1// TODO: cleanup module, make cleaner 2 3#include <stdlib.h> 4#include <string.h> 5#include <stdio.h> 6 7#include "ant.h" 8#include "utf8.h" 9#include "errors.h" 10#include "runtime.h" 11#include "internal.h" 12#include "utils.h" 13#include "escape.h" 14#include "descriptors.h" 15 16#include "silver/engine.h" 17#include "modules/regex.h" 18#include "modules/symbol.h" 19#include "gc/objects.h" 20 21#include <pcre2.h> 22 23typedef struct { 24 ant_object_t *obj; 25 pcre2_code *code; 26 pcre2_match_data *match_data; 27 bool jit_ready; 28} regex_cache_entry_t; 29 30enum { 31 REGEXP_FLAG_HAS_INDICES = 1 << 0, 32 REGEXP_FLAG_GLOBAL = 1 << 1, 33 REGEXP_FLAG_IGNORE_CASE = 1 << 2, 34 REGEXP_FLAG_MULTILINE = 1 << 3, 35 REGEXP_FLAG_DOTALL = 1 << 4, 36 REGEXP_FLAG_UNICODE = 1 << 5, 37 REGEXP_FLAG_UNICODE_SET = 1 << 6, 38 REGEXP_FLAG_STICKY = 1 << 7, 39}; 40 41static regex_cache_entry_t *regex_cache = NULL; 42static ant_value_t regexp_matchall_iter_proto_val = 0; 43 44static size_t regex_cache_count = 0; 45static size_t regex_cache_cap = 0; 46 47static inline uint8_t regexp_parse_flags_mask(const char *fstr, ant_offset_t flen) { 48 uint8_t mask = 0; 49 for (ant_offset_t k = 0; k < flen; k++) { 50 switch (fstr[k]) { 51 case 'd': mask |= REGEXP_FLAG_HAS_INDICES; break; 52 case 'g': mask |= REGEXP_FLAG_GLOBAL; break; 53 case 'i': mask |= REGEXP_FLAG_IGNORE_CASE; break; 54 case 'm': mask |= REGEXP_FLAG_MULTILINE; break; 55 case 's': mask |= REGEXP_FLAG_DOTALL; break; 56 case 'u': mask |= REGEXP_FLAG_UNICODE; break; 57 case 'v': mask |= REGEXP_FLAG_UNICODE_SET; break; 58 case 'y': mask |= REGEXP_FLAG_STICKY; break; 59 default: break; 60 }} 61 return mask; 62} 63 64static inline uint8_t regexp_flags_mask(ant_t *js, ant_value_t regexp) { 65 ant_offset_t flags_off = lkp(js, regexp, "flags", 5); 66 if (flags_off == 0) return 0; 67 68 ant_value_t flags_val = js_propref_load(js, flags_off); 69 if (vtype(flags_val) != T_STR) return 0; 70 71 ant_value_t cached_flags = js_get_slot(regexp, SLOT_REGEXP_FLAGS_STRING); 72 ant_value_t cached = js_get_slot(regexp, SLOT_REGEXP_FLAGS_MASK); 73 if (flags_val == cached_flags && vtype(cached) == T_NUM) return (uint8_t)tod(cached); 74 75 ant_offset_t flen, foff = vstr(js, flags_val, &flen); 76 uint8_t mask = regexp_parse_flags_mask((const char *)(uintptr_t)foff, flen); 77 js_set_slot(regexp, SLOT_REGEXP_FLAGS_MASK, tov((double)mask)); 78 js_set_slot(regexp, SLOT_REGEXP_FLAGS_STRING, flags_val); 79 80 return mask; 81} 82 83static ant_value_t regexp_build_named_groups_meta(ant_t *js, pcre2_code *code) { 84 uint32_t namecount = 0; 85 pcre2_pattern_info(code, PCRE2_INFO_NAMECOUNT, &namecount); 86 if (namecount == 0) return js_mkundef(); 87 88 uint32_t nameentrysize = 0; 89 PCRE2_SPTR nametable = NULL; 90 pcre2_pattern_info(code, PCRE2_INFO_NAMEENTRYSIZE, &nameentrysize); 91 pcre2_pattern_info(code, PCRE2_INFO_NAMETABLE, (void *)&nametable); 92 93 ant_value_t meta = js_mkarr(js); 94 if (is_err(meta)) return meta; 95 96 PCRE2_SPTR tabptr = nametable; 97 for (uint32_t i = 0; i < namecount; i++) { 98 int n = (tabptr[0] << 8) | tabptr[1]; 99 const char *name = (const char *)(tabptr + 2); 100 ant_value_t name_val = js_mkstr(js, name, strlen(name)); 101 if (is_err(name_val)) return name_val; 102 js_arr_push(js, meta, name_val); 103 js_arr_push(js, meta, tov((double)n)); 104 tabptr += nameentrysize; 105 } 106 107 return meta; 108} 109 110static void update_regexp_statics(ant_t *js, const char *str_ptr, PCRE2_SIZE *ovector, uint32_t ovcount) { 111 ant_value_t regexp_ctor = js_get(js, js_glob(js), "RegExp"); 112 if (is_err(regexp_ctor) || vtype(regexp_ctor) == T_UNDEF) return; 113 114 ant_value_t empty = js_mkstr(js, "", 0); 115 for (int i = 1; i <= 9; i++) { 116 char key[3] = {'$', (char)('0' + i), '\0'}; 117 ant_value_t val = empty; 118 if ((uint32_t)i < ovcount && ovector[2*i] != PCRE2_UNSET) 119 val = js_mkstr(js, str_ptr + ovector[2*i], ovector[2*i+1] - ovector[2*i]); 120 if (is_err(setprop_cstr(js, regexp_ctor, key, 2, val))) return; 121 } 122 123 ant_value_t match0 = empty; 124 if (ovcount > 0 && ovector[0] != PCRE2_UNSET) 125 match0 = js_mkstr(js, str_ptr + ovector[0], ovector[1] - ovector[0]); 126 if (is_err(setprop_cstr(js, regexp_ctor, "lastMatch", 9, match0))) return; 127 (void)setprop_cstr(js, regexp_ctor, "$&", 2, match0); 128} 129 130static inline bool is_pcre2_passthrough_escape(char c) { 131switch (c) { 132 case 'd': case 'D': case 'w': case 'W': case 's': case 'S': 133 case 'b': case 'B': case 'n': case 'r': case 't': case 'f': 134 case '1': case '2': case '3': case '4': case '5': 135 case '6': case '7': case '8': case '9': 136 case '.': case '*': case '+': case '?': 137 case '(': case ')': case '[': case ']': 138 case '{': case '}': case '|': case '^': 139 case '$': case '\\': case '/': case '-': return true; 140 default: return false; 141}} 142 143static inline bool is_class_shorthand(char c) { 144 return c == 'w' || c == 'W' || c == 'd' || c == 'D' || c == 's' || c == 'S'; 145} 146 147static size_t v_close_bracket(const char *src, size_t src_len, size_t open) { 148 int depth = 0; 149 for (size_t i = open; i < src_len; i++) { 150 if (src[i] == '\\' && i + 1 < src_len) { i++; continue; } 151 if (src[i] == '[') depth++; 152 else if (src[i] == ']') { if (--depth == 0) return i; } 153 } 154 return src_len; 155} 156 157static size_t v_translate_part(const char *p, size_t len, char *out, size_t out_size) { 158 if (len && p[0] == '[') return js_to_pcre2_pattern(p, len, out, out_size, false); 159 char tmp[1024]; 160 if (len >= sizeof(tmp) - 2) return 0; 161 tmp[0] = '['; memcpy(tmp + 1, p, len); tmp[len + 1] = ']'; 162 return js_to_pcre2_pattern(tmp, len + 2, out, out_size, false); 163} 164 165static int v_set_op(const char *src, size_t start, size_t end, size_t *op_pos) { 166 int depth = 0; 167 for (size_t i = start; i < end; ) { 168 if (src[i] == '\\' && i + 1 < end) { 169 char n = src[i + 1]; 170 if ((n == 'p' || n == 'P') && i + 2 < end && src[i + 2] == '{') { 171 i += 3; while (i < end && src[i] != '}') i++; if (i < end) i++; continue; 172 } 173 if ((n == 'u' || n == 'x') && i + 2 < end && src[i + 2] == '{') { 174 i += 3; while (i < end && src[i] != '}') i++; if (i < end) i++; continue; 175 } 176 i += 2; continue; 177 } 178 if (src[i] == '[') { depth++; i++; continue; } 179 if (src[i] == ']') { if (depth > 0) { depth--; i++; continue; } break; } 180 if (!depth && i + 1 < end) { 181 if (src[i] == '&' && src[i+1] == '&') { *op_pos = i; return 1; } 182 if (src[i] == '-' && src[i+1] == '-') { *op_pos = i; return 2; } 183 } 184 i++; 185 } 186 return 0; 187} 188 189size_t js_to_pcre2_pattern(const char *src, size_t src_len, char *dst, size_t dst_size, bool v_flag) { 190 size_t di = 0; 191 int charclass_depth = 0; 192 193#define OUT(ch) do { if (di < dst_size - 1) dst[di++] = (ch); } while(0) 194 195 for (size_t si = 0; si < src_len && di < dst_size - 1; si++) { 196 if (src[si] == '[') { 197 if (v_flag && charclass_depth == 0) { 198 size_t close = v_close_bracket(src, src_len, si); 199 size_t op_pos; 200 int op_type = v_set_op(src, si + 1, close, &op_pos); 201 if (op_type && close < src_len) { 202 char ao[1024], bo[1024]; 203 size_t aol = v_translate_part(&src[si + 1], op_pos - si - 1, ao, sizeof(ao)); 204 size_t bol = v_translate_part(&src[op_pos + 2], close - op_pos - 2, bo, sizeof(bo)); 205 const char *la = op_type == 1 ? ao : bo, *ra = op_type == 1 ? bo : ao; 206 size_t ll = op_type == 1 ? aol : bol, rl = op_type == 1 ? bol : aol; 207 OUT('('); OUT('?'); OUT(op_type == 1 ? '=' : '!'); 208 for (size_t k = 0; k < ll; k++) OUT(la[k]); 209 OUT(')'); 210 for (size_t k = 0; k < rl; k++) OUT(ra[k]); 211 si = close; 212 continue; 213 } 214 } 215 charclass_depth++; 216 OUT('['); 217 continue; 218 } 219 if (src[si] == ']' && charclass_depth > 0) { 220 charclass_depth--; 221 OUT(']'); 222 continue; 223 } 224 225 if (charclass_depth > 0 && src[si] == '-' && si > 0 && src[si - 1] != '[' && 226 si + 1 < src_len && src[si + 1] != ']') { 227 bool prev_is_shorthand = (si >= 2 && src[si - 2] == '\\' && is_class_shorthand(src[si - 1])); 228 bool next_is_shorthand = (si + 2 < src_len && src[si + 1] == '\\' && is_class_shorthand(src[si + 2])); 229 if (prev_is_shorthand || next_is_shorthand) { 230 OUT('\\'); OUT('-'); 231 continue; 232 } 233 OUT('-'); 234 continue; 235 } 236 237 if (src[si] != '\\' || si + 1 >= src_len) { 238 OUT(src[si]); 239 continue; 240 } 241 242 char next = src[si + 1]; 243 244 if (next == 'v') { 245 OUT('\\'); OUT('x'); OUT('{'); OUT('0'); OUT('b'); OUT('}'); 246 si++; 247 continue; 248 } 249 250 if (next == 'u' && si + 2 < src_len && src[si + 2] == '{') { 251 size_t brace_start = si + 3; 252 size_t brace_end = brace_start; 253 while (brace_end < src_len && src[brace_end] != '}' && is_xdigit(src[brace_end])) brace_end++; 254 if (brace_end < src_len && src[brace_end] == '}' && brace_end > brace_start) { 255 OUT('\\'); OUT('x'); OUT('{'); 256 for (size_t k = brace_start; k < brace_end; k++) OUT(src[k]); 257 OUT('}'); 258 si = brace_end; 259 continue; 260 } 261 } 262 263 if (next == 'u' && si + 5 < src_len && 264 is_xdigit(src[si+2]) && is_xdigit(src[si+3]) && 265 is_xdigit(src[si+4]) && is_xdigit(src[si+5])) { 266 OUT('\\'); OUT('x'); OUT('{'); 267 OUT(src[si+2]); OUT(src[si+3]); OUT(src[si+4]); OUT(src[si+5]); 268 OUT('}'); 269 si += 5; 270 continue; 271 } 272 273 if (next == 'u') { 274 si++; 275 OUT('u'); 276 continue; 277 } 278 279 if (next == 'x' && si + 3 < src_len && 280 is_xdigit(src[si+2]) && is_xdigit(src[si+3])) { 281 OUT('\\'); OUT('x'); OUT(src[si+2]); OUT(src[si+3]); 282 si += 3; 283 continue; 284 } 285 286 if (next == 'x') { 287 si++; 288 OUT('x'); 289 continue; 290 } 291 292 if (next == '0' && (si + 2 >= src_len || src[si+2] < '0' || src[si+2] > '9')) { 293 OUT('\\'); OUT('x'); OUT('{'); OUT('0'); OUT('}'); 294 si++; 295 continue; 296 } 297 298 if (next >= '0' && next <= '7') { 299 unsigned int octal = next - '0'; 300 size_t advance = 1; 301 if (si + 2 < src_len && src[si+2] >= '0' && src[si+2] <= '7') { 302 octal = octal * 8 + (src[si+2] - '0'); 303 advance = 2; 304 if (si + 3 < src_len && src[si+3] >= '0' && src[si+3] <= '7' && octal * 8 + (src[si+3] - '0') <= 255) { 305 octal = octal * 8 + (src[si+3] - '0'); 306 advance = 3; 307 } 308 } 309 310 if (advance > 1 || next == '0') { 311 char hex[8]; 312 int hlen = snprintf(hex, sizeof(hex), "\\x{%02x}", octal); 313 for (int k = 0; k < hlen && di < dst_size - 1; k++) OUT(hex[k]); 314 si += advance; 315 continue; 316 } 317 } 318 319 if (next == 'c' && si + 2 < src_len && 320 ((src[si+2] >= 'A' && src[si+2] <= 'Z') || (src[si+2] >= 'a' && src[si+2] <= 'z'))) { 321 OUT('\\'); OUT('c'); OUT(src[si+2]); 322 si += 2; 323 continue; 324 } 325 326 if (next == 'c') { 327 OUT('\\'); OUT('\\'); OUT('c'); 328 si++; 329 continue; 330 } 331 332 if ((next == 'p' || next == 'P') && si + 2 < src_len && src[si + 2] == '{') { 333 size_t brace_start = si + 3; 334 size_t brace_end = brace_start; 335 while (brace_end < src_len && src[brace_end] != '}') brace_end++; 336 if (brace_end < src_len && src[brace_end] == '}') { 337 const char *prop = &src[brace_start]; 338 size_t prop_len = brace_end - brace_start; 339 static const struct { const char *name; const char *code; } gc_map[] = { 340 {"Letter","L"},{"Cased_Letter","LC"},{"Uppercase_Letter","Lu"}, 341 {"Lowercase_Letter","Ll"},{"Titlecase_Letter","Lt"}, 342 {"Modifier_Letter","Lm"},{"Other_Letter","Lo"}, 343 {"Mark","M"},{"Nonspacing_Mark","Mn"},{"Spacing_Mark","Mc"}, 344 {"Enclosing_Mark","Me"}, 345 {"Number","N"},{"Decimal_Number","Nd"},{"Letter_Number","Nl"}, 346 {"Other_Number","No"}, 347 {"Punctuation","P"},{"Connector_Punctuation","Pc"}, 348 {"Dash_Punctuation","Pd"},{"Open_Punctuation","Ps"}, 349 {"Close_Punctuation","Pe"},{"Initial_Punctuation","Pi"}, 350 {"Final_Punctuation","Pf"},{"Other_Punctuation","Po"}, 351 {"Symbol","S"},{"Math_Symbol","Sm"},{"Currency_Symbol","Sc"}, 352 {"Modifier_Symbol","Sk"},{"Other_Symbol","So"}, 353 {"Separator","Z"},{"Space_Separator","Zs"}, 354 {"Line_Separator","Zl"},{"Paragraph_Separator","Zp"}, 355 {"Other","C"},{"Control","Cc"},{"Format","Cf"}, 356 {"Surrogate","Cs"},{"Private_Use","Co"},{"Unassigned","Cn"}, 357 }; 358 static const struct { const char *script; const char *range; } u17_scripts[] = { 359 {"Sidetic", "\\x{10940}-\\x{1095F}"}, 360 {"Garay", "\\x{10D40}-\\x{10D8F}"}, 361 {"Gurung_Khema", "\\x{16100}-\\x{1613F}"}, 362 {"Kirat_Rai", "\\x{16D40}-\\x{16D7F}"}, 363 {"Ol_Onal", "\\x{1E5D0}-\\x{1E5FF}"}, 364 {"Sunuwar", "\\x{11BC0}-\\x{11BFF}"}, 365 {"Tulu_Tigalari", "\\x{11380}-\\x{113FF}"}, 366 }; 367 bool has_eq = (memchr(prop, '=', prop_len) != NULL); 368 bool has_colon = (memchr(prop, ':', prop_len) != NULL); 369 if (!has_eq && !has_colon && next == 'p' && charclass_depth == 0) { 370 static const struct { const char *name; const char *exp; } sprops[] = { 371 {"Emoji_Keycap_Sequence", 372 "(?:\\x{23}\\x{fe0f}\\x{20e3}|\\x{2a}\\x{fe0f}\\x{20e3}|[\\x{30}-\\x{39}]\\x{fe0f}\\x{20e3})"}, 373 {"RGI_Emoji", 374 "(?:[\\x{1f1e6}-\\x{1f1ff}]{2}|(?:\\p{Emoji}[\\x{1f3fb}-\\x{1f3ff}]?\\x{200d})+\\p{Emoji}[\\x{1f3fb}-\\x{1f3ff}]?|\\p{Emoji}[\\x{1f3fb}-\\x{1f3ff}]|\\p{Emoji}\\x{fe0f}?)"}, 375 }; 376 for (size_t m = 0; m < sizeof(sprops)/sizeof(sprops[0]); m++) { 377 if (strlen(sprops[m].name) == prop_len && memcmp(sprops[m].name, prop, prop_len) == 0) { 378 for (const char *r = sprops[m].exp; *r && di < dst_size - 1; r++) OUT(*r); 379 si = brace_end; 380 goto next_char; 381 } 382 } 383 } 384 if (has_eq || has_colon) { 385 char sep = has_eq ? '=' : ':'; 386 const char *val = memchr(prop, sep, prop_len); 387 if (val) { 388 val++; 389 size_t val_len = prop_len - (size_t)(val - prop); 390 for (size_t m = 0; m < sizeof(u17_scripts)/sizeof(u17_scripts[0]); m++) { 391 if (strlen(u17_scripts[m].script) == val_len && 392 memcmp(u17_scripts[m].script, val, val_len) == 0) { 393 const char *r = u17_scripts[m].range; 394 OUT('['); 395 if (next == 'P') OUT('^'); 396 for (; *r; r++) OUT(*r); 397 OUT(']'); 398 si = brace_end; 399 goto next_char; 400 } 401 } 402 } 403 } 404 if (!has_eq && !has_colon) { 405 static const struct { const char *name; const char *range; } rangeprops[] = { 406 {"ASCII", "\\x{0}-\\x{7f}"}, 407 {"Any", "\\x{0}-\\x{10ffff}"}, 408 }; 409 for (size_t m = 0; m < sizeof(rangeprops)/sizeof(rangeprops[0]); m++) { 410 if (strlen(rangeprops[m].name) == prop_len && memcmp(rangeprops[m].name, prop, prop_len) == 0) { 411 if (charclass_depth > 0) { 412 for (const char *r = rangeprops[m].range; *r; r++) OUT(*r); 413 } else { 414 OUT('['); if (next == 'P') OUT('^'); 415 for (const char *r = rangeprops[m].range; *r; r++) OUT(*r); 416 OUT(']'); 417 } 418 si = brace_end; 419 goto next_char; 420 } 421 } 422 } 423 const char *replacement = NULL; 424 if (!has_eq && !has_colon) { 425 for (size_t m = 0; m < sizeof(gc_map)/sizeof(gc_map[0]); m++) { 426 if (strlen(gc_map[m].name) == prop_len && 427 memcmp(gc_map[m].name, prop, prop_len) == 0) { 428 replacement = gc_map[m].code; 429 break; 430 } 431 } 432 } 433 static const struct { const char *prop; const char *extra; } u17_props[] = { 434 {"Emoji", "\\x{1FACD}-\\x{1FACE}\\x{1FAE9}\\x{1FAF9}"}, 435 }; 436 const char *extra_range = NULL; 437 if (!has_eq && !has_colon && !replacement) { 438 for (size_t m = 0; m < sizeof(u17_props)/sizeof(u17_props[0]); m++) { 439 if (strlen(u17_props[m].prop) == prop_len && 440 memcmp(u17_props[m].prop, prop, prop_len) == 0) { 441 extra_range = u17_props[m].extra; 442 break; 443 } 444 } 445 } 446 if (extra_range && charclass_depth == 0) { 447 const char *pfx = (next == 'p') ? "(?:\\p{" : "(?:\\P{"; 448 for (const char *r = pfx; *r; r++) OUT(*r); 449 for (size_t k = brace_start; k < brace_end; k++) OUT(src[k]); 450 OUT('}'); OUT('|'); OUT('['); 451 if (next == 'P') OUT('^'); 452 for (const char *r = extra_range; *r; r++) OUT(*r); 453 OUT(']'); OUT(')'); 454 } else { 455 OUT('\\'); OUT(next); OUT('{'); 456 if (replacement) { 457 for (const char *r = replacement; *r; r++) OUT(*r); 458 } else { 459 for (size_t k = brace_start; k < brace_end; k++) OUT(src[k]); 460 } 461 OUT('}'); 462 } 463 si = brace_end; 464 continue; 465 } 466 OUT('\\'); OUT(next); 467 si++; 468 continue; 469 } 470 471 if (is_pcre2_passthrough_escape(next)) { 472 OUT('\\'); OUT(next); 473 si++; 474 continue; 475 } 476 477 si++; 478 OUT(next); 479 next_char:; 480 } 481 482#undef OUT 483 dst[di] = '\0'; 484 return di; 485} 486 487#define REGEXP_SET_PROP(js, obj, key, klen, val, is_new) \ 488 ((is_new) ? js_mkprop_fast(js, obj, key, klen, val) \ 489 : js_setprop(js, obj, js_mkstr(js, key, klen), val)) 490 491static void regexp_init_flags(ant_t *js, ant_value_t obj, const char *fstr, ant_offset_t flen, bool is_new) { 492 uint8_t mask = regexp_parse_flags_mask(fstr, flen); 493 bool d = (mask & REGEXP_FLAG_HAS_INDICES) != 0; 494 bool g = (mask & REGEXP_FLAG_GLOBAL) != 0; 495 bool i = (mask & REGEXP_FLAG_IGNORE_CASE) != 0; 496 bool m = (mask & REGEXP_FLAG_MULTILINE) != 0; 497 bool s = (mask & REGEXP_FLAG_DOTALL) != 0; 498 bool u = (mask & REGEXP_FLAG_UNICODE) != 0; 499 bool v = (mask & REGEXP_FLAG_UNICODE_SET) != 0; 500 bool y = (mask & REGEXP_FLAG_STICKY) != 0; 501 502 char sorted[10]; int si = 0; 503 if (d) sorted[si++] = 'd'; 504 if (g) sorted[si++] = 'g'; 505 if (i) sorted[si++] = 'i'; 506 if (m) sorted[si++] = 'm'; 507 if (s) sorted[si++] = 's'; 508 if (u) sorted[si++] = 'u'; 509 if (v) sorted[si++] = 'v'; 510 if (y) sorted[si++] = 'y'; 511 512 ant_value_t flags_value = js_mkstr(js, sorted, si); 513 REGEXP_SET_PROP(js, obj, "flags", 5, flags_value, is_new); 514 REGEXP_SET_PROP(js, obj, "hasIndices", 10, mkval(T_BOOL, d ? 1 : 0), is_new); 515 REGEXP_SET_PROP(js, obj, "global", 6, mkval(T_BOOL, g ? 1 : 0), is_new); 516 REGEXP_SET_PROP(js, obj, "ignoreCase", 10, mkval(T_BOOL, i ? 1 : 0), is_new); 517 REGEXP_SET_PROP(js, obj, "multiline", 9, mkval(T_BOOL, m ? 1 : 0), is_new); 518 REGEXP_SET_PROP(js, obj, "dotAll", 6, mkval(T_BOOL, s ? 1 : 0), is_new); 519 REGEXP_SET_PROP(js, obj, "unicode", 7, mkval(T_BOOL, u ? 1 : 0), is_new); 520 REGEXP_SET_PROP(js, obj, "unicodeSets", 11, mkval(T_BOOL, v ? 1 : 0), is_new); 521 REGEXP_SET_PROP(js, obj, "sticky", 6, mkval(T_BOOL, y ? 1 : 0), is_new); 522 REGEXP_SET_PROP(js, obj, "lastIndex", 9, tov(0), is_new); 523 js_set_slot(obj, SLOT_REGEXP_FLAGS_MASK, tov((double)mask)); 524 js_set_slot(obj, SLOT_REGEXP_FLAGS_STRING, flags_value); 525 js_set_slot(obj, SLOT_REGEXP_NAMED_GROUPS, js_mkundef()); 526} 527 528ant_value_t is_regexp_like(ant_t *js, ant_value_t value) { 529 if (!is_object_type(value)) return js_false; 530 531 ant_value_t match_sym = get_match_sym(); 532 if (vtype(match_sym) == T_SYMBOL) { 533 ant_value_t match_val = js_get_sym(js, value, match_sym); 534 if (is_err(match_val)) return match_val; 535 if (vtype(match_val) != T_UNDEF) return js_bool(js_truthy(js, match_val)); 536 } 537 538 ant_value_t regexp_ctor = js_get(js, js_glob(js), "RegExp"); 539 if (is_err(regexp_ctor)) return regexp_ctor; 540 541 ant_value_t regexp_proto = js_get(js, regexp_ctor, "prototype"); 542 if (is_err(regexp_proto)) return regexp_proto; 543 if (!is_object_type(regexp_proto)) return js_false; 544 545 return js_bool(proto_chain_contains(js, value, regexp_proto)); 546} 547 548static ant_value_t should_regexp_passthrough(ant_t *js, ant_value_t *args, int nargs) { 549 if (vtype(js->new_target) != T_UNDEF) return js_false; 550 if (nargs <= 0) return js_false; 551 552 if (nargs >= 2 && vtype(args[1]) != T_UNDEF) return js_false; 553 if (!is_object_type(args[0])) return js_false; 554 555 ant_value_t is_re = is_regexp_like(js, args[0]); 556 if (is_err(is_re)) return is_re; 557 if (!js_truthy(js, is_re)) return js_false; 558 559 ant_value_t ctor = js_getprop_fallback(js, args[0], "constructor"); 560 if (is_err(ctor)) return ctor; 561 562 ant_value_t regexp_ctor = js_get(js, js_glob(js), "RegExp"); 563 if (is_err(regexp_ctor)) return regexp_ctor; 564 565 return js_bool(same_ctor_identity(js, ctor, regexp_ctor)); 566} 567 568ant_value_t reject_regexp_arg(ant_t *js, ant_value_t value, const char *method_name) { 569 ant_value_t is_re = is_regexp_like(js, value); 570 if (is_err(is_re)) return is_re; 571 if (js_truthy(js, is_re)) { 572 return js_mkerr_typed(js, JS_ERR_TYPE, "First argument to %s must not be a RegExp", method_name); 573 } 574 return js_mkundef(); 575} 576 577static ant_value_t regexp_species_construct(ant_t *js, ant_value_t rx, ant_value_t ctor, ant_value_t *ctor_args, int nargs) { 578 ant_value_t seed = js_mkobj(js); 579 if (is_err(seed)) return seed; 580 581 ant_value_t proto = js_get(js, ctor, "prototype"); 582 if (is_err(proto)) return proto; 583 if (is_object_type(proto)) js_set_proto_init(seed, proto); 584 585 ant_value_t saved = js->new_target; 586 js->new_target = ctor; 587 ant_value_t result = sv_vm_call(js->vm, js, ctor, seed, ctor_args, nargs, NULL, true); 588 js->new_target = saved; 589 590 if (is_err(result)) return result; 591 if (!is_object_type(result)) 592 return js_mkerr_typed(js, JS_ERR_TYPE, "RegExp species constructor returned non-object"); 593 594 return result; 595} 596 597static ant_value_t regexp_exec_abstract(ant_t *js, ant_value_t rx, ant_value_t str); 598static ant_value_t builtin_regexp_exec(ant_t *js, ant_value_t *args, int nargs); 599 600static regex_cache_entry_t *regex_cache_lookup(ant_object_t *obj) { 601 for (size_t i = 0; i < regex_cache_count; i++) { 602 if (regex_cache[i].obj == obj) return &regex_cache[i]; 603 } 604 return NULL; 605} 606 607static regex_cache_entry_t *regex_cache_insert(ant_object_t *obj, pcre2_code *code, pcre2_match_data *match_data, bool jit_ready) { 608 if (regex_cache_count >= regex_cache_cap) { 609 size_t new_cap = regex_cache_cap ? regex_cache_cap * 2 : 64; 610 regex_cache_entry_t *new_cache = realloc(regex_cache, new_cap * sizeof(regex_cache_entry_t)); 611 if (!new_cache) return NULL; 612 regex_cache = new_cache; 613 regex_cache_cap = new_cap; 614 } 615 regex_cache_entry_t *entry = &regex_cache[regex_cache_count++]; 616 entry->obj = obj; 617 entry->code = code; 618 entry->match_data = match_data; 619 entry->jit_ready = jit_ready; 620 return entry; 621} 622 623typedef struct { 624 pcre2_code *code; 625 pcre2_match_data *match_data; 626 bool jit_ready; 627} compiled_regex_t; 628 629static bool regex_get_or_compile(ant_t *js, ant_value_t regexp_obj, compiled_regex_t *out) { 630 ant_object_t *obj_ptr = js_obj_ptr(regexp_obj); 631 uint8_t flags_mask = regexp_flags_mask(js, regexp_obj); 632 633 regex_cache_entry_t *cached = regex_cache_lookup(obj_ptr); 634 if (cached) { 635 out->code = cached->code; 636 out->match_data = cached->match_data; 637 out->jit_ready = cached->jit_ready; 638 return true; 639 } 640 641 ant_offset_t source_off = lkp(js, regexp_obj, "source", 6); 642 if (source_off == 0) return false; 643 ant_value_t source_val = js_propref_load(js, source_off); 644 if (vtype(source_val) != T_STR) return false; 645 646 ant_offset_t plen, poff = vstr(js, source_val, &plen); 647 const char *pattern_ptr = (char *)(uintptr_t)(poff); 648 649 char pcre2_pattern[4096]; 650 size_t pcre2_len = js_to_pcre2_pattern( 651 pattern_ptr, plen, pcre2_pattern, sizeof(pcre2_pattern), 652 (flags_mask & REGEXP_FLAG_UNICODE_SET) != 0 653 ); 654 655 uint32_t options = PCRE2_UTF | PCRE2_UCP | PCRE2_MATCH_UNSET_BACKREF | PCRE2_DUPNAMES; 656 if (flags_mask & REGEXP_FLAG_IGNORE_CASE) options |= PCRE2_CASELESS; 657 if (flags_mask & REGEXP_FLAG_MULTILINE) options |= PCRE2_MULTILINE; 658 if (flags_mask & REGEXP_FLAG_DOTALL) options |= PCRE2_DOTALL; 659 660 int errcode; 661 PCRE2_SIZE erroffset; 662 pcre2_code *re = pcre2_compile((PCRE2_SPTR)pcre2_pattern, pcre2_len, options, &errcode, &erroffset, NULL); 663 if (re == NULL) return false; 664 665 pcre2_match_data *match_data = pcre2_match_data_create_from_pattern(re, NULL); 666 bool jit_ready = pcre2_jit_compile(re, PCRE2_JIT_COMPLETE) == 0; 667 regex_cache_insert(obj_ptr, re, match_data, jit_ready); 668 ant_value_t groups_meta = regexp_build_named_groups_meta(js, re); 669 670 if (is_err(groups_meta)) { 671 pcre2_match_data_free(match_data); 672 pcre2_code_free(re); 673 regex_cache_count--; 674 return false; 675 } 676 677 js_set_slot(regexp_obj, SLOT_REGEXP_NAMED_GROUPS, groups_meta); 678 out->code = re; 679 out->match_data = match_data; 680 out->jit_ready = jit_ready; 681 682 return true; 683} 684 685static ant_value_t builtin_RegExp(ant_t *js, ant_value_t *args, int nargs) { 686 bool pattern_is_regexp = false; 687 if (nargs > 0) { 688 ant_value_t is_re = is_regexp_like(js, args[0]); 689 if (is_err(is_re)) return is_re; 690 pattern_is_regexp = js_truthy(js, is_re); 691 } 692 693 if (vtype(js->new_target) == T_UNDEF && nargs > 0 && pattern_is_regexp) { 694 if (nargs < 2 || vtype(args[1]) == T_UNDEF) { 695 ant_value_t ctor = js_getprop_fallback(js, args[0], "constructor"); 696 if (is_err(ctor)) return ctor; 697 ant_value_t regexp_ctor = js_get(js, js_glob(js), "RegExp"); 698 if (is_err(regexp_ctor)) return regexp_ctor; 699 if (same_ctor_identity(js, ctor, regexp_ctor)) return args[0]; 700 } 701 } 702 703 ant_value_t regexp_obj = js->this_val; 704 bool use_this = (vtype(js->new_target) != T_UNDEF && vtype(regexp_obj) == T_OBJ); 705 706 if (!use_this) { 707 regexp_obj = mkobj(js, 0); 708 if (is_err(regexp_obj)) return regexp_obj; 709 } 710 711 ant_value_t regexp_proto = js_get_ctor_proto(js, "RegExp", 6); 712 ant_value_t instance_proto = js_instance_proto_from_new_target(js, regexp_proto); 713 714 if (is_object_type(instance_proto)) js_set_proto_init(regexp_obj, instance_proto); 715 if (vtype(js->new_target) == T_FUNC || vtype(js->new_target) == T_CFUNC) { 716 js_set_slot(regexp_obj, SLOT_CTOR, js->new_target); 717 } 718 719 ant_value_t pattern = js_mkstr(js, "", 0); 720 ant_value_t flags = js_mkstr(js, "", 0); 721 if (nargs > 0) { 722 if (pattern_is_regexp) { 723 ant_value_t src = js_getprop_fallback(js, args[0], "source"); 724 if (is_err(src)) return src; 725 pattern = js_tostring_val(js, src); 726 if (is_err(pattern)) return pattern; 727 if (nargs >= 2 && vtype(args[1]) != T_UNDEF) { 728 flags = js_tostring_val(js, args[1]); 729 } else { 730 ant_value_t fl = js_getprop_fallback(js, args[0], "flags"); 731 if (is_err(fl)) return fl; 732 flags = js_tostring_val(js, fl); 733 } 734 if (is_err(flags)) return flags; 735 } else if (vtype(args[0]) == T_STR) { 736 pattern = args[0]; 737 if (nargs > 1 && vtype(args[1]) == T_STR) flags = args[1]; 738 } else if (vtype(args[0]) != T_UNDEF) { 739 ant_value_t s = js_tostring_val(js, args[0]); 740 if (is_err(s)) return s; 741 pattern = s; 742 if (nargs > 1 && vtype(args[1]) == T_STR) flags = args[1]; 743 } 744 } 745 746 js_mkprop_fast(js, regexp_obj, "source", 6, pattern); 747 ant_offset_t flags_len, flags_off = vstr(js, flags, &flags_len); 748 regexp_init_flags(js, regexp_obj, (const char *)(uintptr_t)(flags_off), flags_len, true); 749 750 return regexp_obj; 751} 752 753static ant_value_t builtin_regexp_groups_getter(ant_t *js, ant_value_t *args, int nargs) { 754 ant_value_t result_arr = js->this_val; 755 if (!is_object_type(result_arr)) return js_mkundef(); 756 757 ant_value_t cached = js_get_slot(result_arr, SLOT_REGEXP_GROUPS_CACHE); 758 if (is_object_type(cached)) return cached; 759 760 ant_value_t meta = js_get_slot(result_arr, SLOT_REGEXP_RESULT_GROUPS); 761 if (!is_object_type(meta)) return js_mkundef(); 762 763 ant_value_t groups = js_mkobj(js); 764 if (is_err(groups)) return groups; 765 js_set_proto_init(groups, js_mknull()); 766 767 for (ant_offset_t i = 0; ; i += 2) { 768 ant_value_t name = js_arr_get(js, meta, i); 769 if (vtype(name) == T_UNDEF) break; 770 ant_value_t index_val = js_arr_get(js, meta, i + 1); 771 ant_offset_t index = (vtype(index_val) == T_NUM) ? (ant_offset_t)tod(index_val) : 0; 772 char idxstr[16]; 773 (void)uint_to_str(idxstr, sizeof(idxstr), (uint64_t)index); 774 ant_value_t value = js_getprop_fallback(js, result_arr, idxstr); 775 ant_offset_t name_len, name_off = vstr(js, name, &name_len); 776 ant_value_t status = setprop_cstr(js, groups, (const char *)(uintptr_t)name_off, (size_t)name_len, value); 777 if (is_err(status)) return status; 778 } 779 780 js_set_slot(result_arr, SLOT_REGEXP_GROUPS_CACHE, groups); 781 return groups; 782} 783 784static ant_value_t regexp_build_indices_pair(ant_t *js, PCRE2_SIZE start, PCRE2_SIZE end) { 785 if (start == PCRE2_UNSET) return js_mkundef(); 786 787 ant_value_t pair = js_mkarr(js); 788 if (is_err(pair)) return pair; 789 js_arr_push(js, pair, tov((double)start)); 790 js_arr_push(js, pair, tov((double)end)); 791 792 return pair; 793} 794 795static ant_value_t regexp_build_indices_groups( 796 ant_t *js, 797 ant_value_t groups_meta, 798 ant_value_t indices_arr 799) { 800 ant_value_t groups = js_mkobj(js); 801 if (is_err(groups)) return groups; 802 js_set_proto_init(groups, js_mknull()); 803 804 for (ant_offset_t i = 0; ; i += 2) { 805 ant_value_t name = js_arr_get(js, groups_meta, i); 806 if (vtype(name) == T_UNDEF) break; 807 808 ant_value_t index_val = js_arr_get(js, groups_meta, i + 1); 809 ant_offset_t index = (vtype(index_val) == T_NUM) ? (ant_offset_t)tod(index_val) : 0; 810 char idxstr[16]; 811 (void)uint_to_str(idxstr, sizeof(idxstr), (uint64_t)index); 812 813 ant_value_t value = js_getprop_fallback(js, indices_arr, idxstr); 814 ant_offset_t name_len, name_off = vstr(js, name, &name_len); 815 ant_value_t status = setprop_cstr(js, groups, (const char *)(uintptr_t)name_off, (size_t)name_len, value); 816 if (is_err(status)) return status; 817 } 818 819 return groups; 820} 821 822static ant_value_t regexp_build_indices_result( 823 ant_t *js, 824 ant_value_t regexp, 825 PCRE2_SIZE *ovector, 826 uint32_t ovcount 827) { 828 ant_value_t indices_arr = js_mkarr(js); 829 if (is_err(indices_arr)) return indices_arr; 830 831 for (uint32_t i = 0; i < ovcount && i < 32; i++) { 832 ant_value_t pair = regexp_build_indices_pair(js, ovector[2*i], ovector[2*i+1]); 833 if (is_err(pair)) return pair; 834 js_arr_push(js, indices_arr, pair); 835 } 836 837 ant_value_t groups_meta = js_get_slot(regexp, SLOT_REGEXP_NAMED_GROUPS); 838 if (is_object_type(groups_meta)) { 839 ant_value_t groups = regexp_build_indices_groups(js, groups_meta, indices_arr); 840 if (is_err(groups)) return groups; 841 if (is_err(setprop_cstr(js, indices_arr, "groups", 6, groups))) return js_mkerr(js, "oom"); 842 } else if (is_err(setprop_cstr(js, indices_arr, "groups", 6, js_mkundef()))) return js_mkerr(js, "oom"); 843 844 return indices_arr; 845} 846 847static ant_value_t regexp_exec_internal(ant_t *js, ant_value_t regexp, ant_value_t str_arg, bool truthy_only) { 848 ant_offset_t str_len, str_off = vstr(js, str_arg, &str_len); 849 const char *str_ptr = (char *)(uintptr_t)(str_off); 850 uint8_t flags_mask = regexp_flags_mask(js, regexp); 851 852 bool global_flag = (flags_mask & REGEXP_FLAG_GLOBAL) != 0; 853 bool has_indices = (flags_mask & REGEXP_FLAG_HAS_INDICES) != 0; 854 bool sticky_flag = (flags_mask & REGEXP_FLAG_STICKY) != 0; 855 856 // TODO: reduce nesting 857 PCRE2_SIZE start_offset = 0; 858 if (global_flag || sticky_flag) { 859 ant_offset_t lastindex_off = lkp(js, regexp, "lastIndex", 9); 860 if (lastindex_off != 0) { 861 ant_value_t li_val = js_propref_load(js, lastindex_off); 862 if (vtype(li_val) == T_NUM) { 863 double li = tod(li_val); 864 if (li >= 0 && li <= (double)str_len) start_offset = (PCRE2_SIZE)li; 865 else { 866 if (is_err(setprop_cstr(js, regexp, "lastIndex", 9, tov(0)))) return js_mkerr(js, "oom"); 867 return js_mknull(); 868 } 869 } 870 } 871 } 872 873 compiled_regex_t compiled; 874 if (!regex_get_or_compile(js, regexp, &compiled)) return js_mknull(); 875 876 uint32_t match_options = 0; 877 if (sticky_flag) match_options |= PCRE2_ANCHORED; 878 879 int rc; 880 if (compiled.jit_ready && !sticky_flag) { 881 rc = pcre2_jit_match(compiled.code, (PCRE2_SPTR)str_ptr, str_len, start_offset, match_options, compiled.match_data, NULL); 882 } else rc = pcre2_match(compiled.code, (PCRE2_SPTR)str_ptr, str_len, start_offset, match_options, compiled.match_data, NULL); 883 884 if (rc < 0) { 885 if ((global_flag || sticky_flag) && is_err(setprop_cstr(js, regexp, "lastIndex", 9, tov(0)))) { 886 return js_mkerr(js, "oom"); 887 } 888 return js_mknull(); 889 } 890 891 PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(compiled.match_data); 892 uint32_t ovcount = pcre2_get_ovector_count(compiled.match_data); 893 894 update_regexp_statics(js, str_ptr, ovector, ovcount); 895 896 if (global_flag || sticky_flag) { 897 ant_value_t next_idx = tov((double)ovector[1]); 898 if (is_err(setprop_cstr(js, regexp, "lastIndex", 9, next_idx))) return js_mkerr(js, "oom"); 899 } 900 901 if (truthy_only) return js_true; 902 903 ant_value_t result_arr = js_mkarr(js); 904 if (is_err(result_arr)) return result_arr; 905 for (uint32_t i = 0; i < ovcount && i < 32; i++) { 906 PCRE2_SIZE start = ovector[2*i]; 907 PCRE2_SIZE end = ovector[2*i+1]; 908 if (start == PCRE2_UNSET) { 909 js_arr_push(js, result_arr, js_mkundef()); 910 } else { 911 ant_value_t match_str = js_mkstr(js, str_ptr + start, end - start); 912 js_arr_push(js, result_arr, match_str); 913 } 914 } 915 916 if (is_err(setprop_cstr(js, result_arr, "index", 5, tov((double)ovector[0])))) return js_mkerr(js, "oom"); 917 if (is_err(setprop_cstr(js, result_arr, "input", 5, str_arg))) return js_mkerr(js, "oom"); 918 919 ant_value_t groups_meta = js_get_slot(regexp, SLOT_REGEXP_NAMED_GROUPS); 920 if (is_object_type(groups_meta)) { 921 js_set_slot(result_arr, SLOT_REGEXP_RESULT_GROUPS, groups_meta); 922 js_set_slot(result_arr, SLOT_REGEXP_GROUPS_CACHE, js_mkundef()); 923 js_set_getter_desc(js, js_as_obj(result_arr), "groups", 6, js_mkfun(builtin_regexp_groups_getter), JS_DESC_E | JS_DESC_C); 924 } else if (is_err(setprop_cstr(js, result_arr, "groups", 6, js_mkundef()))) return js_mkerr(js, "oom"); 925 926 if (has_indices) { 927 ant_value_t indices = regexp_build_indices_result(js, regexp, ovector, ovcount); 928 if (is_err(indices)) return indices; 929 if (is_err(setprop_cstr(js, result_arr, "indices", 7, indices))) return js_mkerr(js, "oom"); 930 } 931 932 return result_arr; 933} 934 935static ant_value_t builtin_regexp_exec(ant_t *js, ant_value_t *args, int nargs) { 936 ant_value_t regexp = js->this_val; 937 if (vtype(regexp) != T_OBJ) return js_mkerr(js, "exec called on non-regexp"); 938 if (nargs < 1) return js_mknull(); 939 940 ant_value_t str_arg = args[0]; 941 if (vtype(str_arg) != T_STR) return js_mknull(); 942 943 return regexp_exec_internal(js, regexp, str_arg, false); 944} 945 946static ant_value_t builtin_regexp_toString(ant_t *js, ant_value_t *args, int nargs) { 947 ant_value_t regexp = js->this_val; 948 if (!is_object_type(regexp)) 949 return js_mkerr_typed(js, JS_ERR_TYPE, "toString called on non-object"); 950 951 ant_value_t source_val = js_getprop_fallback(js, regexp, "source"); 952 if (is_err(source_val)) return source_val; 953 ant_value_t source_str = js_tostring_val(js, source_val); 954 if (is_err(source_str)) return source_str; 955 956 ant_value_t flags_val = js_getprop_fallback(js, regexp, "flags"); 957 if (is_err(flags_val)) return flags_val; 958 ant_value_t flags_str = js_tostring_val(js, flags_val); 959 if (is_err(flags_str)) return flags_str; 960 961 ant_offset_t src_len, src_off = vstr(js, source_str, &src_len); 962 ant_offset_t fl_len, fl_off = vstr(js, flags_str, &fl_len); 963 964 size_t total = 1 + src_len + 1 + fl_len; 965 char *buf = ant_calloc(total + 1); 966 if (!buf) return js_mkerr(js, "oom"); 967 size_t n = 0; 968 buf[n++] = '/'; 969 memcpy(buf + n, (const void *)(uintptr_t)src_off, src_len); n += src_len; 970 buf[n++] = '/'; 971 memcpy(buf + n, (const void *)(uintptr_t)fl_off, fl_len); n += fl_len; 972 973 ant_value_t result = js_mkstr(js, buf, n); 974 free(buf); 975 return result; 976} 977 978static ant_value_t builtin_regexp_compile(ant_t *js, ant_value_t *args, int nargs) { 979 ant_value_t rx = js->this_val; 980 if (!is_object_type(rx)) 981 return js_mkerr_typed(js, JS_ERR_TYPE, "compile called on non-object"); 982 983 ant_value_t pattern = js_mkstr(js, "", 0); 984 ant_value_t flags = js_mkstr(js, "", 0); 985 986 if (nargs > 0 && vtype(args[0]) != T_UNDEF) { 987 ant_value_t is_re = is_regexp_like(js, args[0]); 988 if (is_err(is_re)) return is_re; 989 if (js_truthy(js, is_re)) { 990 ant_value_t src = js_getprop_fallback(js, args[0], "source"); 991 if (is_err(src)) return src; 992 pattern = js_tostring_val(js, src); 993 if (is_err(pattern)) return pattern; 994 ant_value_t fl = js_getprop_fallback(js, args[0], "flags"); 995 if (is_err(fl)) return fl; 996 flags = js_tostring_val(js, fl); 997 if (is_err(flags)) return flags; 998 } else { 999 pattern = js_tostring_val(js, args[0]); 1000 if (is_err(pattern)) return pattern; 1001 } 1002 } 1003 if (nargs > 1 && vtype(args[1]) != T_UNDEF) { 1004 flags = js_tostring_val(js, args[1]); 1005 if (is_err(flags)) return flags; 1006 } 1007 1008 js_setprop(js, rx, js_mkstr(js, "source", 6), pattern); 1009 ant_offset_t flen, foff = vstr(js, flags, &flen); 1010 regexp_init_flags(js, rx, (const char *)(uintptr_t)(foff), flen, false); 1011 1012 ant_object_t *rx_ptr = js_obj_ptr(rx); 1013 for (size_t i = 0; i < regex_cache_count; i++) { 1014 if (regex_cache[i].obj == rx_ptr) { 1015 pcre2_match_data_free(regex_cache[i].match_data); 1016 pcre2_code_free(regex_cache[i].code); 1017 regex_cache[i] = regex_cache[--regex_cache_count]; 1018 break; 1019 } 1020 } 1021 1022 return rx; 1023} 1024 1025static inline bool is_syntax_char(char c) { 1026 return 1027 c == '^' || c == '$' || c == '\\' || c == '.' || c == '*' || 1028 c == '+' || c == '?' || c == '(' || c == ')' || c == '[' || 1029 c == ']' || c == '{' || c == '}' || c == '|' || c == '/'; 1030} 1031 1032static inline bool is_other_punctuator(char c) { 1033 return 1034 c == ',' || c == '-' || c == ':' || c == ';' || c == '<' || 1035 c == '=' || c == '>' || c == '@' || c == '!' || c == '"' || 1036 c == '#' || c == '%' || c == '&' || c == '\'' || c == '`' || c == '~'; 1037} 1038 1039static ant_value_t builtin_regexp_escape(ant_t *js, ant_value_t *args, int nargs) { 1040 if (nargs < 1 || vtype(args[0]) != T_STR) 1041 return js_mkerr_typed(js, JS_ERR_TYPE, "RegExp.escape requires a string argument"); 1042 1043 ant_offset_t slen, soff = vstr(js, args[0], &slen); 1044 const char *src = (const char *)(uintptr_t)(soff); 1045 1046 size_t buf_cap = slen * 6 + 1; 1047 char *buf = ant_calloc(buf_cap); 1048 if (!buf) return js_mkerr(js, "oom"); 1049 size_t di = 0; 1050 bool first = true; 1051 1052 for (size_t si = 0; si < slen; ) { 1053 unsigned char c = (unsigned char)src[si]; 1054 1055 if (c >= 0x80) { 1056 utf8proc_int32_t cp; 1057 int bytes = (int)utf8_next( 1058 (const utf8proc_uint8_t *)&src[si], 1059 (utf8proc_ssize_t)(slen - si), &cp 1060 ); 1061 for (int b = 0; b < bytes && si < slen; b++) 1062 buf[di++] = src[si++]; 1063 first = false; 1064 continue; 1065 } 1066 1067 if (first && ((c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))) { 1068 di += snprintf(buf + di, buf_cap - di, "\\x%02x", c); 1069 si++; first = false; 1070 continue; 1071 } 1072 1073 if (is_syntax_char(c)) { 1074 buf[di++] = '\\'; buf[di++] = c; 1075 si++; first = false; 1076 continue; 1077 } 1078 1079 if (is_other_punctuator(c) || c == ' ' || c == '\t' || c == '\n' || 1080 c == '\r' || c == '\v' || c == '\f') { 1081 di += snprintf(buf + di, buf_cap - di, "\\x%02x", c); 1082 si++; first = false; 1083 continue; 1084 } 1085 1086 buf[di++] = c; 1087 si++; first = false; 1088 } 1089 1090 ant_value_t result = js_mkstr(js, buf, di); 1091 free(buf); 1092 return result; 1093} 1094 1095static ant_value_t regexp_exec_with_exec_fn(ant_t *js, ant_value_t rx, ant_value_t str, ant_value_t exec_fn) { 1096 if (vtype(exec_fn) == T_FUNC || vtype(exec_fn) == T_CFUNC) { 1097 ant_value_t call_args[1] = { str }; 1098 ant_value_t result = sv_vm_call(js->vm, js, exec_fn, rx, call_args, 1, NULL, false); 1099 if (is_err(result)) return result; 1100 if (!is_object_type(result) && vtype(result) != T_NULL) 1101 return js_mkerr_typed(js, JS_ERR_TYPE, "RegExp exec returned non-object"); 1102 return result; 1103 } 1104 1105 ant_value_t call_args[1] = { str }; 1106 ant_value_t saved = js->this_val; 1107 js->this_val = rx; 1108 ant_value_t result = builtin_regexp_exec(js, call_args, 1); 1109 js->this_val = saved; 1110 1111 return result; 1112} 1113 1114static ant_value_t regexp_exec_abstract(ant_t *js, ant_value_t rx, ant_value_t str) { 1115 ant_value_t exec_fn = js_get(js, rx, "exec"); 1116 if (is_err(exec_fn)) return exec_fn; 1117 return regexp_exec_with_exec_fn(js, rx, str, exec_fn); 1118} 1119 1120bool regexp_exec_truthy_try_fast( 1121 ant_t *js, 1122 ant_value_t call_func, 1123 ant_value_t regexp, 1124 ant_value_t arg, 1125 ant_value_t *out_result 1126) { 1127 if (!out_result || vtype(call_func) != T_CFUNC) return false; 1128 if (!js_cfunc_same_entrypoint(call_func, builtin_regexp_exec)) return false; 1129 if (!is_object_type(regexp) || vtype(arg) != T_STR) return false; 1130 1131 ant_value_t result = regexp_exec_internal(js, regexp, arg, true); 1132 if (is_err(result)) { 1133 *out_result = result; 1134 return true; 1135 } 1136 1137 *out_result = mkval(T_BOOL, vtype(result) != T_NULL ? 1 : 0); 1138 return true; 1139} 1140 1141static ant_value_t builtin_regexp_test(ant_t *js, ant_value_t *args, int nargs) { 1142 ant_value_t regexp = js->this_val; 1143 if (!is_object_type(regexp)) 1144 return js_mkerr_typed(js, JS_ERR_TYPE, "test called on non-object"); 1145 ant_value_t str_arg = nargs > 0 ? js_tostring_val(js, args[0]) : js_mkstr(js, "undefined", 9); 1146 if (is_err(str_arg)) return str_arg; 1147 ant_value_t exec_fn = js_get(js, regexp, "exec"); 1148 if (is_err(exec_fn)) return exec_fn; 1149 1150 ant_value_t result; 1151 if (vtype(exec_fn) == T_CFUNC && js_cfunc_same_entrypoint(exec_fn, builtin_regexp_exec)) { 1152 result = regexp_exec_internal(js, regexp, str_arg, true); 1153 } else result = regexp_exec_with_exec_fn(js, regexp, str_arg, exec_fn); 1154 1155 if (is_err(result)) return result; 1156 return mkval(T_BOOL, vtype(result) != T_NULL ? 1 : 0); 1157} 1158 1159static ant_value_t builtin_regexp_flags_getter(ant_t *js, ant_value_t *args, int nargs) { 1160 ant_value_t rx = js->this_val; 1161 if (!is_object_type(rx)) 1162 return js_mkerr_typed(js, JS_ERR_TYPE, "RegExp.prototype.flags called on non-object"); 1163 1164 char buf[16]; int n = 0; 1165 ant_value_t v = js_getprop_fallback(js, rx, "hasIndices"); 1166 1167 if (is_err(v)) return v; 1168 if (js_truthy(js, v)) buf[n++] = 'd'; 1169 1170 v = js_getprop_fallback(js, rx, "global"); 1171 if (is_err(v)) return v; 1172 if (js_truthy(js, v)) buf[n++] = 'g'; 1173 1174 v = js_getprop_fallback(js, rx, "ignoreCase"); 1175 if (is_err(v)) return v; 1176 if (js_truthy(js, v)) buf[n++] = 'i'; 1177 1178 v = js_getprop_fallback(js, rx, "multiline"); 1179 if (is_err(v)) return v; 1180 if (js_truthy(js, v)) buf[n++] = 'm'; 1181 1182 v = js_getprop_fallback(js, rx, "dotAll"); 1183 if (is_err(v)) return v; 1184 if (js_truthy(js, v)) buf[n++] = 's'; 1185 1186 v = js_getprop_fallback(js, rx, "unicode"); 1187 if (is_err(v)) return v; 1188 if (js_truthy(js, v)) buf[n++] = 'u'; 1189 1190 v = js_getprop_fallback(js, rx, "unicodeSets"); 1191 if (is_err(v)) return v; 1192 if (js_truthy(js, v)) buf[n++] = 'v'; 1193 1194 v = js_getprop_fallback(js, rx, "sticky"); 1195 if (is_err(v)) return v; 1196 if (js_truthy(js, v)) buf[n++] = 'y'; 1197 1198 return js_mkstr(js, buf, n); 1199} 1200 1201static ant_value_t builtin_regexp_symbol_match(ant_t *js, ant_value_t *args, int nargs) { 1202 ant_value_t rx = js->this_val; 1203 if (!is_object_type(rx)) 1204 return js_mkerr_typed(js, JS_ERR_TYPE, "RegExp.prototype[@@match] called on non-object"); 1205 1206 ant_value_t str = nargs > 0 ? js_tostring_val(js, args[0]) : js_mkstr(js, "undefined", 9); 1207 if (is_err(str)) return str; 1208 1209 ant_value_t global_val = js_getprop_fallback(js, rx, "global"); 1210 if (is_err(global_val)) return global_val; 1211 1212 if (!js_truthy(js, global_val)) 1213 return regexp_exec_abstract(js, rx, str); 1214 1215 ant_value_t unicode_val = js_getprop_fallback(js, rx, "unicode"); 1216 if (is_err(unicode_val)) return unicode_val; 1217 1218 bool full_unicode = js_truthy(js, unicode_val); 1219 js_setprop(js, rx, js_mkstr(js, "lastIndex", 9), tov(0)); 1220 1221 ant_value_t A = js_mkarr(js); 1222 if (is_err(A)) return A; 1223 ant_offset_t n = 0; 1224 1225 for (;;) { 1226 ant_value_t result = regexp_exec_abstract(js, rx, str); 1227 if (is_err(result)) return result; 1228 if (vtype(result) == T_NULL) return n == 0 ? js_mknull() : mkval(T_ARR, vdata(A)); 1229 1230 ant_value_t match_str = js_tostring_val(js, js_arr_get(js, result, 0)); 1231 if (is_err(match_str)) return match_str; 1232 js_arr_push(js, A, match_str); 1233 n++; 1234 1235 ant_offset_t mlen; 1236 vstr(js, match_str, &mlen); 1237 if (mlen == 0) { 1238 ant_value_t li_val = js_getprop_fallback(js, rx, "lastIndex"); 1239 if (is_err(li_val)) return li_val; 1240 double li = vtype(li_val) == T_NUM ? tod(li_val) : 0; 1241 ant_offset_t str_len, str_off = vstr(js, str, &str_len); 1242 double advance = 1; 1243 if (full_unicode && li < (double)str_len) { 1244 advance = (double)utf8_char_len_at((const char *)(uintptr_t)(str_off), str_len, (ant_offset_t)li); 1245 } js_setprop(js, rx, js_mkstr(js, "lastIndex", 9), tov(li + advance)); 1246 } 1247 } 1248} 1249 1250 1251static ant_value_t regexp_matchall_next(ant_t *js, ant_value_t *args, int nargs) { 1252 ant_value_t iter = js->this_val; 1253 ant_value_t rx = js_get_slot(iter, SLOT_MATCHALL_RX); 1254 ant_value_t str = js_get_slot(iter, SLOT_MATCHALL_STR); 1255 ant_value_t done_val = js_get_slot(iter, SLOT_MATCHALL_DONE); 1256 1257 if (js_truthy(js, done_val)) 1258 return js_iter_result(js, false, js_mkundef()); 1259 1260 ant_value_t result = regexp_exec_abstract(js, rx, str); 1261 if (is_err(result)) return result; 1262 1263 if (vtype(result) == T_NULL) { 1264 js_set_slot(iter, SLOT_MATCHALL_DONE, js_true); 1265 return js_iter_result(js, false, js_mkundef()); 1266 } 1267 1268 ant_value_t global_val = js_getprop_fallback(js, rx, "global"); 1269 if (js_truthy(js, global_val)) { 1270 ant_value_t match_str = js_tostring_val(js, js_arr_get(js, result, 0)); 1271 if (is_err(match_str)) return match_str; 1272 ant_offset_t mlen; 1273 vstr(js, match_str, &mlen); 1274 if (mlen == 0) { 1275 ant_value_t li_val = js_getprop_fallback(js, rx, "lastIndex"); 1276 double li = vtype(li_val) == T_NUM ? tod(li_val) : 0; 1277 js_setprop(js, rx, js_mkstr(js, "lastIndex", 9), tov(li + 1)); 1278 } 1279 } else js_set_slot(iter, SLOT_MATCHALL_DONE, js_true); 1280 1281 return js_iter_result(js, true, result); 1282} 1283 1284static ant_value_t builtin_regexp_symbol_matchAll(ant_t *js, ant_value_t *args, int nargs) { 1285 ant_value_t rx = js->this_val; 1286 if (!is_object_type(rx)) 1287 return js_mkerr_typed(js, JS_ERR_TYPE, "RegExp.prototype[@@matchAll] called on non-object"); 1288 1289 ant_value_t str = nargs > 0 ? js_tostring_val(js, args[0]) : js_mkstr(js, "undefined", 9); 1290 if (is_err(str)) return str; 1291 1292 ant_value_t flags_val = js_getprop_fallback(js, rx, "flags"); 1293 if (is_err(flags_val)) return flags_val; 1294 ant_value_t flags_str = js_tostring_val(js, flags_val); 1295 if (is_err(flags_str)) return flags_str; 1296 1297 ant_value_t source_val = js_getprop_fallback(js, rx, "source"); 1298 if (is_err(source_val)) return source_val; 1299 1300 ant_value_t ctor_args[2] = { source_val, flags_str }; 1301 ant_value_t regexp_ctor = js_get(js, js_glob(js), "RegExp"); 1302 ant_value_t new_rx = sv_vm_call(js->vm, js, regexp_ctor, js_mkundef(), ctor_args, 2, NULL, true); 1303 if (is_err(new_rx)) return new_rx; 1304 1305 ant_value_t li_val = js_getprop_fallback(js, rx, "lastIndex"); 1306 js_setprop(js, new_rx, js_mkstr(js, "lastIndex", 9), li_val); 1307 1308 ant_value_t iter = js_mkobj(js); 1309 js_set_slot(iter, SLOT_MATCHALL_RX, new_rx); 1310 js_set_slot(iter, SLOT_MATCHALL_STR, str); 1311 js_set_slot(iter, SLOT_MATCHALL_DONE, js_false); 1312 1313 js_set_proto_init(iter, regexp_matchall_iter_proto_val); 1314 1315 return iter; 1316} 1317 1318static ant_value_t builtin_string_matchAll(ant_t *js, ant_value_t *args, int nargs) { 1319 ant_value_t this_unwrapped = unwrap_primitive(js, js->this_val); 1320 ant_value_t str = js_tostring_val(js, this_unwrapped); 1321 if (is_err(str)) return str; 1322 if (nargs < 1) return js_mkerr_typed(js, JS_ERR_TYPE, "matchAll requires at least 1 argument"); 1323 1324 if (is_object_type(args[0])) { 1325 ant_value_t is_re = is_regexp_like(js, args[0]); 1326 if (js_truthy(js, is_re)) { 1327 ant_value_t flags_val = js_getprop_fallback(js, args[0], "flags"); 1328 if (is_err(flags_val)) return flags_val; 1329 1330 ant_value_t flags_str = js_tostring_val(js, flags_val); 1331 ant_offset_t flen, foff = vstr(js, flags_str, &flen); 1332 1333 const char *fp = (const char *)(uintptr_t)(foff); 1334 bool has_g = false; 1335 for (ant_offset_t i = 0; i < flen; i++) if (fp[i] == 'g') has_g = true; 1336 if (!has_g) return js_mkerr_typed(js, JS_ERR_TYPE, "String.prototype.matchAll called with a non-global RegExp"); 1337 } 1338 1339 bool called = false; 1340 ant_value_t call_args[1] = { str }; 1341 ant_value_t dispatched = maybe_call_symbol_method( 1342 js, args[0], get_matchAll_sym(), args[0], call_args, 1, &called 1343 ); 1344 1345 if (is_err(dispatched)) return dispatched; 1346 if (called) return dispatched; 1347 } 1348 1349 ant_value_t pattern_str = js_tostring_val(js, args[0]); 1350 if (is_err(pattern_str)) return pattern_str; 1351 1352 ant_value_t ctor_args[2] = { pattern_str, js_mkstr(js, "g", 1) }; 1353 ant_value_t regexp_ctor = js_get(js, js_glob(js), "RegExp"); 1354 ant_value_t rx = sv_vm_call(js->vm, js, regexp_ctor, js_mkundef(), ctor_args, 2, NULL, true); 1355 if (is_err(rx)) return rx; 1356 1357 ant_value_t ma_args[1] = { str }; 1358 js->this_val = rx; 1359 1360 return builtin_regexp_symbol_matchAll(js, ma_args, 1); 1361} 1362 1363static ant_value_t builtin_regexp_symbol_replace(ant_t *js, ant_value_t *args, int nargs) { 1364 ant_value_t rx = js->this_val; 1365 if (!is_object_type(rx)) 1366 return js_mkerr_typed(js, JS_ERR_TYPE, "RegExp.prototype[@@replace] called on non-object"); 1367 1368 ant_value_t str = nargs > 0 ? js_tostring_val(js, args[0]) : js_mkstr(js, "undefined", 9); 1369 if (is_err(str)) return str; 1370 ant_value_t replace_value = nargs > 1 ? args[1] : js_mkundef(); 1371 bool func_replace = (vtype(replace_value) == T_FUNC || vtype(replace_value) == T_CFUNC); 1372 ant_value_t replace_str = js_mkundef(); 1373 if (!func_replace) { 1374 replace_str = js_tostring_val(js, replace_value); 1375 if (is_err(replace_str)) return replace_str; 1376 } 1377 1378 ant_value_t global_val = js_getprop_fallback(js, rx, "global"); 1379 if (is_err(global_val)) return global_val; 1380 bool global = js_truthy(js, global_val); 1381 1382 bool full_unicode = false; 1383 if (global) { 1384 ant_value_t unicode_val = js_getprop_fallback(js, rx, "unicode"); 1385 if (is_err(unicode_val)) return unicode_val; 1386 full_unicode = js_truthy(js, unicode_val); 1387 js_setprop(js, rx, js_mkstr(js, "lastIndex", 9), tov(0)); 1388 } 1389 1390 ant_value_t results = js_mkarr(js); 1391 if (is_err(results)) return results; 1392 ant_offset_t nresults = 0; 1393 1394 for (;;) { 1395 ant_value_t result = regexp_exec_abstract(js, rx, str); 1396 if (is_err(result)) return result; 1397 if (vtype(result) == T_NULL) break; 1398 js_arr_push(js, results, result); 1399 nresults++; 1400 if (!global) break; 1401 1402 ant_value_t match_str = js_tostring_val(js, js_arr_get(js, result, 0)); 1403 if (is_err(match_str)) return match_str; 1404 ant_offset_t mlen; vstr(js, match_str, &mlen); 1405 if (mlen == 0) { 1406 ant_value_t li_val = js_getprop_fallback(js, rx, "lastIndex"); 1407 if (is_err(li_val)) return li_val; 1408 double li = vtype(li_val) == T_NUM ? tod(li_val) : 0; 1409 ant_offset_t sl, so = vstr(js, str, &sl); 1410 double advance = 1; 1411 if (full_unicode && li < (double)sl) { 1412 advance = (double)utf8_char_len_at((const char *)(uintptr_t)(so), sl, (ant_offset_t)li); 1413 } 1414 js_setprop(js, rx, js_mkstr(js, "lastIndex", 9), tov(li + advance)); 1415 } 1416 } 1417 1418 ant_offset_t str_len, str_off = vstr(js, str, &str_len); 1419 size_t buf_cap = str_len + 256; 1420 char *buf = ant_calloc(buf_cap); 1421 if (!buf) return js_mkerr(js, "oom"); 1422 size_t buf_len = 0; 1423 ant_offset_t next_src_pos = 0; 1424 1425#define SB_APPEND(data, dlen) do { \ 1426 if (buf_len + (dlen) >= buf_cap) { \ 1427 buf_cap = (buf_len + (dlen) + 1) * 2; \ 1428 char *nb = ant_realloc(buf, buf_cap); \ 1429 if (!nb) { free(buf); return js_mkerr(js, "oom"); } \ 1430 buf = nb; \ 1431 } \ 1432 memcpy(buf + buf_len, data, dlen); buf_len += (dlen); \ 1433} while(0) 1434 1435 for (ant_offset_t i = 0; i < nresults; i++) { 1436 ant_value_t result = js_arr_get(js, results, i); 1437 ant_value_t matched = js_tostring_val(js, js_arr_get(js, result, 0)); 1438 if (is_err(matched)) { free(buf); return matched; } 1439 ant_offset_t matched_len; vstr(js, matched, &matched_len); 1440 1441 ant_value_t pos_val = js_getprop_fallback(js, result, "index"); 1442 ant_offset_t position = 0; 1443 if (!is_err(pos_val) && vtype(pos_val) == T_NUM) { 1444 double d = tod(pos_val); 1445 position = d < 0 ? 0 : (ant_offset_t)d; 1446 } 1447 if (position > str_len) position = str_len; 1448 1449 ant_value_t replacement; 1450 if (func_replace) { 1451 ant_offset_t ncaptures = js_arr_len(js, result); 1452 ant_value_t call_args[32]; 1453 int ca = 0; 1454 for (ant_offset_t c = 0; c < ncaptures && ca < 30; c++) 1455 call_args[ca++] = js_arr_get(js, result, c); 1456 call_args[ca++] = tov((double)position); 1457 call_args[ca++] = str; 1458 replacement = sv_vm_call(js->vm, js, replace_value, js_mkundef(), call_args, ca, NULL, false); 1459 } else { 1460 replacement = replace_str; 1461 } 1462 if (is_err(replacement)) { free(buf); return replacement; } 1463 ant_value_t rep_str = js_tostring_val(js, replacement); 1464 if (is_err(rep_str)) { free(buf); return rep_str; } 1465 1466 if (position >= next_src_pos) { 1467 str_off = vstr(js, str, &str_len); 1468 if (position > next_src_pos) 1469 SB_APPEND((const char *)(uintptr_t)(str_off + next_src_pos), position - next_src_pos); 1470 ant_offset_t rep_len, rep_off = vstr(js, rep_str, &rep_len); 1471 if (func_replace) { 1472 SB_APPEND((const char *)(uintptr_t)(rep_off), rep_len); 1473 } else { 1474 ant_offset_t ncap = js_arr_len(js, result); 1475 int num_caps = ncap > 1 ? (int)(ncap - 1) : 0; 1476 repl_capture_t caps_buf[16], *caps = num_caps <= 16 ? caps_buf : ant_calloc(sizeof(repl_capture_t) * (size_t)num_caps); 1477 if (num_caps > 16 && !caps) { 1478 free(buf); 1479 return js_mkerr(js, "oom"); 1480 } 1481 for (int ci = 0; ci < num_caps; ci++) { 1482 ant_value_t cap = js_arr_get(js, result, (ant_offset_t)(ci + 1)); 1483 if (vtype(cap) == T_STR) { ant_offset_t cl, co = vstr(js, cap, &cl); caps[ci] = (repl_capture_t){ (const char *)(uintptr_t)(co), cl }; } 1484 else caps[ci] = (repl_capture_t){ NULL, 0 }; 1485 } 1486 ant_offset_t mlen, moff = vstr(js, matched, &mlen); 1487 str_off = vstr(js, str, &str_len); 1488 bool ok = repl_template( 1489 (const char *)(uintptr_t)(rep_off), rep_len, 1490 (const char *)(uintptr_t)(moff), mlen, 1491 (const char *)(uintptr_t)(str_off), str_len, position, 1492 caps, num_caps, &buf, &buf_len, &buf_cap 1493 ); 1494 if (caps != caps_buf) free(caps); 1495 if (!ok) { 1496 free(buf); 1497 return js_mkerr(js, "oom"); 1498 } 1499 } 1500 next_src_pos = position + matched_len; 1501 } 1502 } 1503 1504 str_off = vstr(js, str, &str_len); 1505 if (next_src_pos < str_len) 1506 SB_APPEND((const char *)(uintptr_t)(str_off + next_src_pos), str_len - next_src_pos); 1507 1508#undef SB_APPEND 1509 1510 ant_value_t ret = js_mkstr(js, buf, buf_len); 1511 free(buf); 1512 return ret; 1513} 1514 1515static ant_value_t builtin_regexp_symbol_search(ant_t *js, ant_value_t *args, int nargs) { 1516 ant_value_t rx = js->this_val; 1517 if (!is_object_type(rx)) 1518 return js_mkerr_typed(js, JS_ERR_TYPE, "RegExp.prototype[@@search] called on non-object"); 1519 1520 ant_value_t str = nargs > 0 ? js_tostring_val(js, args[0]) : js_mkstr(js, "undefined", 9); 1521 if (is_err(str)) return str; 1522 1523 ant_value_t prev_li = js_getprop_fallback(js, rx, "lastIndex"); 1524 if (is_err(prev_li)) return prev_li; 1525 js_setprop(js, rx, js_mkstr(js, "lastIndex", 9), tov(0)); 1526 1527 ant_value_t result = regexp_exec_abstract(js, rx, str); 1528 if (is_err(result)) return result; 1529 1530 ant_value_t cur_li = js_getprop_fallback(js, rx, "lastIndex"); 1531 if (is_err(cur_li)) return cur_li; 1532 js_setprop(js, rx, js_mkstr(js, "lastIndex", 9), prev_li); 1533 1534 if (vtype(result) == T_NULL) return tov(-1); 1535 1536 ant_value_t idx = js_getprop_fallback(js, result, "index"); 1537 if (is_err(idx)) return idx; 1538 return vtype(idx) == T_NUM ? idx : tov(-1); 1539} 1540 1541static ant_value_t builtin_regexp_symbol_split(ant_t *js, ant_value_t *args, int nargs) { 1542 ant_value_t rx = js_getthis(js); 1543 if (!is_object_type(rx)) 1544 return js_mkerr_typed(js, JS_ERR_TYPE, "RegExp.prototype[@@split] called on non-object"); 1545 1546 ant_value_t str = nargs > 0 ? js_tostring_val(js, args[0]) : js_mkstr(js, "", 0); 1547 if (is_err(str)) return str; 1548 1549 ant_value_t ctor = js_get(js, rx, "constructor"); 1550 if (is_err(ctor)) return ctor; 1551 1552 ant_value_t C; 1553 if (vtype(ctor) == T_UNDEF) { 1554 C = js_get(js, js_glob(js), "RegExp"); 1555 } else if (!is_object_type(ctor)) { 1556 return js_mkerr_typed(js, JS_ERR_TYPE, "RegExp.prototype[@@split]: constructor is not an object"); 1557 } else { 1558 ant_value_t species = get_ctor_species_value(js, ctor); 1559 if (is_err(species)) return species; 1560 if (vtype(species) == T_UNDEF || vtype(species) == T_NULL) 1561 C = js_get(js, js_glob(js), "RegExp"); 1562 else C = species; 1563 } 1564 1565 if (is_err(C)) return C; 1566 if (vtype(C) != T_FUNC && vtype(C) != T_CFUNC) 1567 return js_mkerr_typed(js, JS_ERR_TYPE, "RegExp species is not a constructor"); 1568 1569 ant_value_t flags_val = js_get(js, rx, "flags"); 1570 if (is_err(flags_val)) return flags_val; 1571 ant_value_t flags_str = js_tostring_val(js, flags_val); 1572 if (is_err(flags_str)) return flags_str; 1573 1574 ant_offset_t flen, foff = vstr(js, flags_str, &flen); 1575 const char *fptr = (const char *)(uintptr_t)(foff); 1576 bool unicode_matching = false, has_sticky = false; 1577 for (ant_offset_t i = 0; i < flen; i++) { 1578 if (fptr[i] == 'u' || fptr[i] == 'v') unicode_matching = true; 1579 if (fptr[i] == 'y') has_sticky = true; 1580 } 1581 1582 ant_value_t new_flags; 1583 if (has_sticky) new_flags = flags_str; else { 1584 char fbuf[16]; 1585 if (flen > 14) flen = 14; 1586 foff = vstr(js, flags_str, &flen); 1587 fptr = (const char *)(uintptr_t)(foff); 1588 memcpy(fbuf, fptr, flen); 1589 fbuf[flen] = 'y'; 1590 new_flags = js_mkstr(js, fbuf, flen + 1); 1591 } 1592 1593 ant_value_t ctor_args[2] = { rx, new_flags }; 1594 ant_value_t splitter = regexp_species_construct(js, rx, C, ctor_args, 2); 1595 if (is_err(splitter)) return splitter; 1596 1597 ant_value_t A = js_mkarr(js); 1598 if (is_err(A)) return A; 1599 ant_offset_t lengthA = 0; 1600 1601 uint32_t lim = UINT32_MAX; 1602 if (nargs >= 2 && vtype(args[1]) != T_UNDEF) { 1603 double d = tod(args[1]); 1604 if (d >= 0 && d <= UINT32_MAX) lim = (uint32_t)d; 1605 } if (lim == 0) return mkval(T_ARR, vdata(A)); 1606 1607 ant_offset_t str_len, str_off = vstr(js, str, &str_len); 1608 ant_offset_t size = str_len; 1609 1610 if (size == 0) { 1611 ant_value_t z = regexp_exec_abstract(js, splitter, str); 1612 if (is_err(z)) return z; 1613 if (vtype(z) == T_NULL) js_arr_push(js, A, str); 1614 return mkval(T_ARR, vdata(A)); 1615 } 1616 1617 ant_offset_t p = 0, q = p; 1618 ant_value_t lastIndex_key = js_mkstr(js, "lastIndex", 9); 1619 1620 while (q < size) { 1621 js_setprop(js, splitter, lastIndex_key, tov((double)q)); 1622 1623 ant_value_t z = regexp_exec_abstract(js, splitter, str); 1624 if (is_err(z)) return z; 1625 1626 if (vtype(z) == T_NULL) { 1627 if (unicode_matching) { 1628 str_off = vstr(js, str, &str_len); 1629 q += utf8_char_len_at((const char *)(uintptr_t)(str_off), str_len, q); 1630 } else q++; 1631 continue; 1632 } 1633 1634 ant_value_t li_val = js_get(js, splitter, "lastIndex"); 1635 if (is_err(li_val)) return li_val; 1636 double e_raw = vtype(li_val) == T_NUM ? tod(li_val) : 0; 1637 ant_offset_t e = (ant_offset_t)(e_raw < 0 ? 0 : (e_raw > (double)size ? (double)size : e_raw)); 1638 1639 if (e == p) { 1640 if (unicode_matching) { 1641 str_off = vstr(js, str, &str_len); 1642 q += utf8_char_len_at((const char *)(uintptr_t)(str_off), str_len, q); 1643 } else q++; 1644 continue; 1645 } 1646 1647 str_off = vstr(js, str, NULL); 1648 ant_value_t T_val = js_mkstr(js, (char *)(uintptr_t)(str_off + p), q - p); 1649 js_arr_push(js, A, T_val); 1650 lengthA++; 1651 if (lengthA == lim) return mkval(T_ARR, vdata(A)); 1652 1653 ant_offset_t num_caps = js_arr_len(js, z); 1654 for (ant_offset_t i = 1; i < num_caps; i++) { 1655 ant_value_t cap = js_arr_get(js, z, i); 1656 js_arr_push(js, A, cap); 1657 lengthA++; 1658 if (lengthA == lim) return mkval(T_ARR, vdata(A)); 1659 } 1660 1661 p = e; 1662 q = p; 1663 } 1664 1665 str_off = vstr(js, str, &str_len); 1666 ant_value_t trailing = js_mkstr(js, (char *)(uintptr_t)(str_off + p), str_len - p); 1667 js_arr_push(js, A, trailing); 1668 return mkval(T_ARR, vdata(A)); 1669} 1670 1671ant_value_t do_regex_match_pcre2(ant_t *js, regex_match_args_t args) { 1672 char pcre2_pattern[4096]; 1673 size_t pcre2_len = js_to_pcre2_pattern(args.pattern_ptr, args.pattern_len, pcre2_pattern, sizeof(pcre2_pattern), false); 1674 1675 uint32_t options = PCRE2_UTF | PCRE2_UCP | PCRE2_MATCH_UNSET_BACKREF | PCRE2_DUPNAMES; 1676 if (args.ignore_case) options |= PCRE2_CASELESS; 1677 if (args.multiline) options |= PCRE2_MULTILINE; 1678 1679 int errcode; 1680 PCRE2_SIZE erroffset; 1681 pcre2_code *re = pcre2_compile((PCRE2_SPTR)pcre2_pattern, pcre2_len, options, &errcode, &erroffset, NULL); 1682 if (re == NULL) return js_mknull(); 1683 1684 pcre2_match_data *match_data = pcre2_match_data_create_from_pattern(re, NULL); 1685 uint32_t capture_count; 1686 pcre2_pattern_info(re, PCRE2_INFO_CAPTURECOUNT, &capture_count); 1687 1688 ant_value_t result_arr = js_mkarr(js); 1689 if (is_err(result_arr)) { 1690 pcre2_match_data_free(match_data); 1691 pcre2_code_free(re); 1692 return result_arr; 1693 } 1694 1695 PCRE2_SIZE pos = 0; 1696 int match_count = 0; 1697 1698 while (pos <= (PCRE2_SIZE)args.str_len) { 1699 int rc = pcre2_match(re, (PCRE2_SPTR)args.str_ptr, args.str_len, pos, 0, match_data, NULL); 1700 if (rc < 0) break; 1701 1702 PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(match_data); 1703 PCRE2_SIZE match_start = ovector[0]; 1704 PCRE2_SIZE match_end = ovector[1]; 1705 1706 if (args.global) { 1707 ant_value_t match_str = js_mkstr(js, args.str_ptr + match_start, match_end - match_start); 1708 if (is_err(match_str)) { 1709 pcre2_match_data_free(match_data); 1710 pcre2_code_free(re); 1711 return match_str; 1712 } 1713 js_arr_push(js, result_arr, match_str); 1714 } else { 1715 for (uint32_t i = 0; i <= capture_count; i++) { 1716 PCRE2_SIZE start = ovector[2*i]; 1717 PCRE2_SIZE end = ovector[2*i+1]; 1718 if (start == PCRE2_UNSET) { 1719 js_arr_push(js, result_arr, js_mkundef()); 1720 } else { 1721 ant_value_t match_str = js_mkstr(js, args.str_ptr + start, end - start); 1722 if (is_err(match_str)) { 1723 pcre2_match_data_free(match_data); 1724 pcre2_code_free(re); 1725 return match_str; 1726 } 1727 js_arr_push(js, result_arr, match_str); 1728 } 1729 } 1730 js_setprop(js, result_arr, js_mkstr(js, "index", 5), tov((double)match_start)); 1731 } 1732 match_count++; 1733 1734 if (!args.global) break; 1735 if (match_start == match_end) { 1736 pos = match_end + 1; 1737 } else { pos = match_end; } 1738 } 1739 1740 pcre2_match_data_free(match_data); 1741 pcre2_code_free(re); 1742 1743 if (match_count == 0) return js_mknull(); 1744 return result_arr; 1745} 1746 1747static bool str_buf_append(char **buf, size_t *len, size_t *cap, const char *data, size_t n) { 1748 if (n == 0) return true; 1749 if (*len + n >= *cap) { 1750 size_t new_cap = (*len + n + 1) * 2; 1751 char *nb = (char *)ant_realloc(*buf, new_cap); 1752 if (!nb) return false; 1753 *buf = nb; 1754 *cap = new_cap; 1755 } 1756 memcpy(*buf + *len, data, n); 1757 *len += n; 1758 return true; 1759} 1760 1761static inline ant_value_t emit_str_replacement( 1762 ant_t *js, ant_value_t replacement, bool is_func, 1763 const char *repl_ptr, ant_offset_t repl_len, 1764 const char *str_ptr, ant_value_t str, 1765 ant_offset_t pos, ant_offset_t match_len, 1766 char **buf, size_t *buf_len, size_t *buf_cap 1767) { 1768 if (is_func) { 1769 ant_value_t cb_args[3] = { js_mkstr(js, str_ptr + pos, match_len), tov((double)pos), str }; 1770 ant_value_t r = sv_vm_call(js->vm, js, replacement, js_mkundef(), cb_args, 3, NULL, false); 1771 1772 if (vtype(r) == T_ERR) return r; 1773 ant_value_t r_str = js_tostring_val(js, r); 1774 1775 if (is_err(r_str)) return r_str; 1776 ant_offset_t rlen, roff = vstr(js, r_str, &rlen); 1777 1778 if (!str_buf_append(buf, buf_len, buf_cap, (const char *)(uintptr_t)roff, rlen)) return js_mkerr(js, "oom"); 1779 } else if (!str_buf_append(buf, buf_len, buf_cap, repl_ptr, repl_len)) return js_mkerr(js, "oom"); 1780 1781 return js_mkundef(); 1782} 1783 1784static ant_value_t string_replace_impl(ant_t *js, ant_value_t *args, int nargs, bool replace_all) { 1785 ant_value_t this_unwrapped = unwrap_primitive(js, js->this_val); 1786 ant_value_t str = js_tostring_val(js, this_unwrapped); 1787 1788 if (is_err(str)) return str; 1789 if (nargs < 1) return str; 1790 1791 if (is_object_type(args[0])) { 1792 if (replace_all) { 1793 ant_value_t global_val = js_getprop_fallback(js, args[0], "global"); 1794 if (!js_truthy(js, global_val)) return js_mkerr_typed(js, JS_ERR_TYPE, "String.prototype.replaceAll called with a non-global RegExp"); 1795 } 1796 1797 bool called = false; 1798 ant_value_t replacement_arg = nargs > 1 ? args[1] : js_mkundef(); 1799 ant_value_t call_args[2] = { str, replacement_arg }; 1800 1801 ant_value_t result = maybe_call_symbol_method(js, args[0], get_replace_sym(), args[0], call_args, 2, &called); 1802 if (is_err(result)) return result; 1803 if (called) return result; 1804 } 1805 1806 if (nargs < 2) return str; 1807 ant_value_t search = args[0]; 1808 ant_value_t replacement = args[1]; 1809 if (vtype(search) != T_STR) return str; 1810 1811 ant_offset_t str_len, str_off = vstr(js, str, &str_len); 1812 const char *str_ptr = (char *)(uintptr_t)(str_off); 1813 ant_offset_t search_len, search_off = vstr(js, search, &search_len); 1814 const char *search_ptr = (char *)(uintptr_t)(search_off); 1815 1816 bool is_func = (vtype(replacement) == T_FUNC); 1817 ant_offset_t repl_len = 0; 1818 const char *repl_ptr = NULL; 1819 1820 if (!is_func) { 1821 if (vtype(replacement) != T_STR) return str; 1822 ant_offset_t repl_off = vstr(js, replacement, &repl_len); 1823 repl_ptr = (char *)(uintptr_t)(repl_off); 1824 } 1825 1826 if (!replace_all) { 1827 if (search_len > str_len) return str; 1828 ant_offset_t match_pos = 0; 1829 bool found = false; 1830 1831 for (ant_offset_t i = 0; i <= str_len - search_len; i++) 1832 if (memcmp(str_ptr + i, search_ptr, search_len) == 0) { 1833 match_pos = i; found = true; break; 1834 } 1835 1836 if (!found) return str; 1837 1838 size_t cap = str_len + repl_len + 256, len = 0; 1839 char *buf = (char *)ant_calloc(cap); 1840 if (!buf) return js_mkerr(js, "oom"); 1841 1842 if (!str_buf_append(&buf, &len, &cap, str_ptr, match_pos)) { 1843 free(buf); 1844 return js_mkerr(js, "oom"); 1845 } 1846 1847 ant_value_t err = emit_str_replacement( 1848 js, replacement, is_func, repl_ptr, 1849 repl_len, str_ptr, str, match_pos, 1850 search_len, &buf, &len, &cap 1851 ); 1852 1853 if (vtype(err) == T_ERR) { 1854 free(buf); 1855 return err; 1856 } 1857 1858 if (!str_buf_append( 1859 &buf, &len, &cap, str_ptr + match_pos + search_len, 1860 str_len - match_pos - search_len) 1861 ) { 1862 free(buf); 1863 return js_mkerr(js, "oom"); 1864 } 1865 1866 ant_value_t ret = js_mkstr(js, buf, len); 1867 free(buf); 1868 1869 return ret; 1870 } else { 1871 size_t cap = str_len + repl_len + 256, len = 0; 1872 char *buf = (char *)ant_calloc(cap); 1873 if (!buf) return js_mkerr(js, "oom"); 1874 1875 ant_offset_t pos = 0; 1876 bool replaced = false; 1877 1878 while (pos + (ant_offset_t)search_len <= str_len) { 1879 if (search_len == 0 || memcmp(str_ptr + pos, search_ptr, search_len) == 0) { 1880 replaced = true; 1881 ant_value_t err = emit_str_replacement(js, replacement, is_func, repl_ptr, repl_len, str_ptr, str, pos, search_len, &buf, &len, &cap); 1882 if (vtype(err) == T_ERR) { free(buf); return err; } 1883 if (search_len == 0) { 1884 if (pos < str_len && !str_buf_append(&buf, &len, &cap, str_ptr + pos, 1)) { free(buf); return js_mkerr(js, "oom"); } 1885 pos++; 1886 } else pos += search_len; 1887 } else { 1888 if (!str_buf_append(&buf, &len, &cap, str_ptr + pos, 1)) { free(buf); return js_mkerr(js, "oom"); } 1889 pos++; 1890 } 1891 } 1892 1893 if (!str_buf_append( 1894 &buf, &len, &cap, str_ptr + pos, 1895 str_len - pos) 1896 ) { 1897 free(buf); 1898 return js_mkerr(js, "oom"); 1899 } 1900 1901 if (!replaced) { 1902 free(buf); 1903 return str; 1904 } 1905 1906 ant_value_t ret = js_mkstr(js, buf, len); 1907 free(buf); 1908 1909 return ret; 1910 } 1911} 1912 1913static ant_value_t builtin_string_replace(ant_t *js, ant_value_t *args, int nargs) { 1914 return string_replace_impl(js, args, nargs, false); 1915} 1916 1917static ant_value_t builtin_string_replaceAll(ant_t *js, ant_value_t *args, int nargs) { 1918 return string_replace_impl(js, args, nargs, true); 1919} 1920 1921static ant_value_t builtin_string_search(ant_t *js, ant_value_t *args, int nargs) { 1922 ant_value_t this_unwrapped = unwrap_primitive(js, js->this_val); 1923 ant_value_t str = js_tostring_val(js, this_unwrapped); 1924 if (is_err(str)) return str; 1925 if (nargs < 1) return tov(-1); 1926 1927 if (is_object_type(args[0])) { 1928 bool called = false; 1929 ant_value_t call_args[1] = { str }; 1930 ant_value_t dispatched = maybe_call_symbol_method( 1931 js, args[0], get_search_sym(), args[0], call_args, 1, &called 1932 ); 1933 if (is_err(dispatched)) return dispatched; 1934 if (called) return dispatched; 1935 } 1936 1937 ant_value_t pattern = args[0]; 1938 const char *pattern_ptr = NULL; 1939 ant_offset_t pattern_len = 0; 1940 bool ignore_case = false, multiline = false; 1941 1942 if (vtype(pattern) == T_OBJ) { 1943 ant_offset_t source_off = lkp(js, pattern, "source", 6); 1944 if (source_off == 0) return tov(-1); 1945 ant_value_t source_val = js_propref_load(js, source_off); 1946 if (vtype(source_val) != T_STR) return tov(-1); 1947 1948 ant_offset_t poff; 1949 poff = vstr(js, source_val, &pattern_len); 1950 pattern_ptr = (char *)(uintptr_t)(poff); 1951 1952 ant_offset_t flags_off = lkp(js, pattern, "flags", 5); 1953 if (flags_off != 0) { 1954 ant_value_t flags_val = js_propref_load(js, flags_off); 1955 if (vtype(flags_val) == T_STR) { 1956 ant_offset_t flen, foff = vstr(js, flags_val, &flen); 1957 const char *flags_str = (char *)(uintptr_t)(foff); 1958 for (ant_offset_t i = 0; i < flen; i++) { 1959 if (flags_str[i] == 'i') ignore_case = true; 1960 if (flags_str[i] == 'm') multiline = true; 1961 } 1962 } 1963 } 1964 } else if (vtype(pattern) == T_STR) { 1965 ant_offset_t poff; 1966 poff = vstr(js, pattern, &pattern_len); 1967 pattern_ptr = (char *)(uintptr_t)(poff); 1968 } else { 1969 return tov(-1); 1970 } 1971 1972 ant_offset_t str_len, str_off = vstr(js, str, &str_len); 1973 const char *str_ptr = (char *)(uintptr_t)(str_off); 1974 1975 char pcre2_pattern[4096]; 1976 size_t pcre2_len = js_to_pcre2_pattern(pattern_ptr, pattern_len, pcre2_pattern, sizeof(pcre2_pattern), false); 1977 1978 uint32_t options = PCRE2_UTF | PCRE2_UCP | PCRE2_MATCH_UNSET_BACKREF | PCRE2_DUPNAMES; 1979 if (ignore_case) options |= PCRE2_CASELESS; 1980 if (multiline) options |= PCRE2_MULTILINE; 1981 1982 int errcode; 1983 PCRE2_SIZE erroffset; 1984 pcre2_code *re = pcre2_compile((PCRE2_SPTR)pcre2_pattern, pcre2_len, options, &errcode, &erroffset, NULL); 1985 if (re == NULL) return tov(-1); 1986 1987 pcre2_match_data *match_data = pcre2_match_data_create_from_pattern(re, NULL); 1988 int rc = pcre2_match(re, (PCRE2_SPTR)str_ptr, str_len, 0, 0, match_data, NULL); 1989 1990 if (rc < 0) { 1991 pcre2_match_data_free(match_data); 1992 pcre2_code_free(re); 1993 return tov(-1); 1994 } 1995 1996 PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(match_data); 1997 double result = (double)ovector[0]; 1998 1999 pcre2_match_data_free(match_data); 2000 pcre2_code_free(re); 2001 2002 return tov(result); 2003} 2004 2005static ant_value_t builtin_string_match(ant_t *js, ant_value_t *args, int nargs) { 2006 ant_value_t this_unwrapped = unwrap_primitive(js, js->this_val); 2007 ant_value_t str = js_tostring_val(js, this_unwrapped); 2008 if (is_err(str)) return str; 2009 if (nargs < 1) return js_mknull(); 2010 2011 if (is_object_type(args[0])) { 2012 bool called = false; 2013 ant_value_t call_args[1] = { str }; 2014 ant_value_t dispatched = maybe_call_symbol_method( 2015 js, args[0], get_match_sym(), args[0], call_args, 1, &called 2016 ); 2017 if (is_err(dispatched)) return dispatched; 2018 if (called) return dispatched; 2019 } 2020 2021 ant_value_t pattern = args[0]; 2022 const char *pattern_ptr = NULL; 2023 ant_offset_t pattern_len = 0; 2024 bool global_flag = false; 2025 bool ignore_case = false; 2026 bool multiline = false; 2027 2028 if (vtype(pattern) == T_OBJ) { 2029 ant_offset_t source_off = lkp(js, pattern, "source", 6); 2030 if (source_off == 0) return js_mknull(); 2031 2032 ant_value_t source_val = js_propref_load(js, source_off); 2033 if (vtype(source_val) != T_STR) return js_mknull(); 2034 2035 ant_offset_t poff; 2036 poff = vstr(js, source_val, &pattern_len); 2037 pattern_ptr = (char *)(uintptr_t)(poff); 2038 2039 ant_offset_t flags_off = lkp(js, pattern, "flags", 5); 2040 if (flags_off != 0) { 2041 ant_value_t flags_val = js_propref_load(js, flags_off); 2042 if (vtype(flags_val) == T_STR) { 2043 ant_offset_t flen, foff = vstr(js, flags_val, &flen); 2044 const char *flags_str = (char *)(uintptr_t)(foff); 2045 for (ant_offset_t i = 0; i < flen; i++) { 2046 if (flags_str[i] == 'g') global_flag = true; 2047 if (flags_str[i] == 'i') ignore_case = true; 2048 if (flags_str[i] == 'm') multiline = true; 2049 }} 2050 } 2051 } else if (vtype(pattern) == T_STR) { 2052 ant_offset_t poff; 2053 poff = vstr(js, pattern, &pattern_len); 2054 pattern_ptr = (char *)(uintptr_t)(poff); 2055 } else return js_mknull(); 2056 2057 ant_offset_t str_len, str_off = vstr(js, str, &str_len); 2058 const char *str_ptr = (char *)(uintptr_t)(str_off); 2059 2060 ant_value_t result = do_regex_match_pcre2(js, (regex_match_args_t){ 2061 .pattern_ptr = pattern_ptr, .pattern_len = pattern_len, 2062 .str_ptr = str_ptr, .str_len = str_len, 2063 .global = global_flag, .ignore_case = ignore_case, .multiline = multiline, 2064 }); 2065 2066 if (!global_flag && vtype(result) == T_ARR) { 2067 js_setprop(js, result, js_mkstr(js, "input", 5), str); 2068 } 2069 2070 return result; 2071} 2072 2073void init_regex_module(void) { 2074 ant_t *js = rt->js; 2075 ant_value_t glob = js->global; 2076 ant_value_t object_proto = js->sym.object_proto; 2077 2078 ant_value_t regexp_proto = js_mkobj(js); 2079 js_set_proto_init(regexp_proto, object_proto); 2080 2081 defmethod(js, regexp_proto, "test", 4, js_mkfun(builtin_regexp_test)); 2082 defmethod(js, regexp_proto, "exec", 4, js_mkfun(builtin_regexp_exec)); 2083 defmethod(js, regexp_proto, "toString", 8, js_mkfun(builtin_regexp_toString)); 2084 2085 js_mkprop_fast(js, regexp_proto, "global", 6, js_false); 2086 js_mkprop_fast(js, regexp_proto, "ignoreCase", 10, js_false); 2087 js_mkprop_fast(js, regexp_proto, "multiline", 9, js_false); 2088 js_mkprop_fast(js, regexp_proto, "dotAll", 6, js_false); 2089 js_mkprop_fast(js, regexp_proto, "unicode", 7, js_false); 2090 js_mkprop_fast(js, regexp_proto, "sticky", 6, js_false); 2091 js_mkprop_fast(js, regexp_proto, "hasIndices", 10, js_false); 2092 js_mkprop_fast(js, regexp_proto, "unicodeSets", 11, js_false); 2093 2094 js_set_sym(js, regexp_proto, get_split_sym(), js_mkfun(builtin_regexp_symbol_split)); 2095 js_set_sym(js, regexp_proto, get_match_sym(), js_mkfun(builtin_regexp_symbol_match)); 2096 js_set_sym(js, regexp_proto, get_matchAll_sym(), js_mkfun(builtin_regexp_symbol_matchAll)); 2097 2098 regexp_matchall_iter_proto_val = js_mkobj(js); 2099 js_set_proto_init(regexp_matchall_iter_proto_val, js->sym.iterator_proto); 2100 defmethod(js, regexp_matchall_iter_proto_val, "next", 4, js_mkfun(regexp_matchall_next)); 2101 js_set_sym(js, regexp_matchall_iter_proto_val, get_iterator_sym(), js_mkfun(sym_this_cb)); 2102 js_set_sym(js, regexp_proto, get_replace_sym(), js_mkfun(builtin_regexp_symbol_replace)); 2103 js_set_sym(js, regexp_proto, get_search_sym(), js_mkfun(builtin_regexp_symbol_search)); 2104 js_set_sym(js, regexp_proto, get_toStringTag_sym(), js_mkstr(js, "RegExp", 6)); 2105 js_set_getter_desc(js, regexp_proto, "flags", 5, js_mkfun(builtin_regexp_flags_getter), JS_DESC_C); 2106 defmethod(js, regexp_proto, "compile", 7, js_mkfun(builtin_regexp_compile)); 2107 2108 ant_value_t regexp_ctor = js_mkobj(js); 2109 js_set_slot(regexp_ctor, SLOT_CFUNC, js_mkfun(builtin_RegExp)); 2110 js_mkprop_fast(js, regexp_ctor, "prototype", 9, regexp_proto); 2111 js_mkprop_fast(js, regexp_ctor, "name", 4, js_mkstr(js, "RegExp", 6)); 2112 js_set_descriptor(js, regexp_ctor, "name", 4, 0); 2113 js_define_species_getter(js, regexp_ctor); 2114 2115 ant_value_t regexp_func = js_obj_to_func(regexp_ctor); 2116 js_setprop(js, regexp_proto, js_mkstr(js, "constructor", 11), regexp_func); 2117 js_set_descriptor(js, regexp_proto, "constructor", 11, JS_DESC_W | JS_DESC_C); 2118 2119 js_set(js, regexp_ctor, "escape", js_mkfun(builtin_regexp_escape)); 2120 2121 ant_value_t empty = js_mkstr(js, "", 0); 2122 for (int i = 1; i <= 9; i++) { 2123 char key[3] = {'$', (char)('0' + i), '\0'}; 2124 js_set(js, regexp_ctor, key, empty); 2125 } 2126 2127 js_set(js, regexp_ctor, "lastMatch", empty); 2128 js_set(js, regexp_ctor, "$&", empty); 2129 js_set(js, glob, "RegExp", regexp_func); 2130 2131 ant_value_t string_ctor = js_get(js, glob, "String"); 2132 ant_value_t string_proto = js_get(js, string_ctor, "prototype"); 2133 2134 defmethod(js, string_proto, "search", 6, js_mkfun(builtin_string_search)); 2135 defmethod(js, string_proto, "match", 5, js_mkfun(builtin_string_match)); 2136 defmethod(js, string_proto, "matchAll", 8, js_mkfun(builtin_string_matchAll)); 2137 defmethod(js, string_proto, "replace", 7, js_mkfun(builtin_string_replace)); 2138 defmethod(js, string_proto, "replaceAll", 10, js_mkfun(builtin_string_replaceAll)); 2139} 2140 2141void gc_sweep_regex_cache(void) { 2142 size_t write = 0; 2143 for (size_t i = 0; i < regex_cache_count; i++) { 2144 if (!gc_obj_is_marked(regex_cache[i].obj)) { 2145 pcre2_match_data_free(regex_cache[i].match_data); 2146 pcre2_code_free(regex_cache[i].code); 2147 } else { 2148 if (write != i) regex_cache[write] = regex_cache[i]; 2149 write++; 2150 } 2151 } 2152 regex_cache_count = write; 2153} 2154 2155void cleanup_regex_module(void) { 2156 for (size_t i = 0; i < regex_cache_count; i++) { 2157 pcre2_match_data_free(regex_cache[i].match_data); 2158 pcre2_code_free(regex_cache[i].code); 2159 } 2160 free(regex_cache); 2161 regex_cache = NULL; 2162 regex_cache_count = 0; 2163 regex_cache_cap = 0; 2164}