MIRROR: javascript for ๐Ÿœ's, a tiny runtime with big ambitions
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

improve regex species

+169 -23
+1
include/utf8.h
··· 8 8 int utf8_encode(uint32_t codepoint, char *out); 9 9 uint32_t utf8_decode(const unsigned char *buf, size_t len, int *seq_len); 10 10 11 + size_t utf8_char_len_at(const char *str, size_t byte_len, size_t pos); 11 12 size_t utf8_strlen(const char *str, size_t byte_len); 12 13 size_t utf16_strlen(const char *str, size_t byte_len); 13 14
+160 -23
src/ant.c
··· 15390 15390 if (nargs >= 2 && vtype(args[1]) != T_UNDEF) return js_false; 15391 15391 if (!is_object_type(args[0])) return js_false; 15392 15392 15393 + jsval_t is_re = is_regexp_like(js, args[0]); 15394 + if (is_err(is_re)) return is_re; 15395 + if (!js_truthy(js, is_re)) return js_false; 15396 + 15393 15397 jsval_t ctor = js_getprop_fallback(js, args[0], "constructor"); 15394 15398 if (is_err(ctor)) return ctor; 15395 15399 ··· 15408 15412 return js_mkundef(); 15409 15413 } 15410 15414 15411 - jsval_t builtin_regexp_symbol_split(struct js *js, jsval_t *args, int nargs) { 15412 - jsval_t regexp = js_getthis(js); 15413 - if (!is_object_type(regexp)) { 15414 - return js_mkerr_typed(js, JS_ERR_TYPE, "RegExp.prototype[@@split] called on non-object"); 15415 - } 15415 + static jsval_t regexp_species_construct(struct js *js, jsval_t rx, jsval_t ctor, jsval_t *ctor_args, int nargs) { 15416 + jsval_t seed = js_mkobj(js); 15417 + if (is_err(seed)) return seed; 15418 + 15419 + jsval_t proto = js_get(js, ctor, "prototype"); 15420 + if (is_err(proto)) return proto; 15421 + if (is_object_type(proto)) set_proto(js, seed, proto); 15416 15422 15417 - jsval_t ctor = js_get(js, regexp, "constructor"); 15418 - if (is_err(ctor)) return ctor; 15423 + jsval_t saved = js->new_target; 15424 + js->new_target = ctor; 15425 + jsval_t result = js_call_with_this(js, ctor, seed, ctor_args, nargs); 15426 + js->new_target = saved; 15419 15427 15420 - jsval_t species = get_ctor_species_value(js, ctor); 15421 - if (is_err(species)) return species; 15428 + if (is_err(result)) return result; 15429 + if (!is_object_type(result)) 15430 + return js_mkerr_typed(js, JS_ERR_TYPE, "RegExp species constructor returned non-object"); 15422 15431 15423 - if (vtype(species) != T_UNDEF && vtype(species) != T_NULL && 15424 - vtype(species) != T_FUNC && vtype(species) != T_CFUNC) { 15425 - return js_mkerr_typed(js, JS_ERR_TYPE, "RegExp species is not a constructor"); 15432 + return result; 15433 + } 15434 + 15435 + static jsval_t regexp_exec_abstract(struct js *js, jsval_t rx, jsval_t str) { 15436 + jsval_t exec_fn = js_get(js, rx, "exec"); 15437 + if (is_err(exec_fn)) return exec_fn; 15438 + 15439 + if (vtype(exec_fn) == T_FUNC || vtype(exec_fn) == T_CFUNC) { 15440 + jsval_t call_args[1] = { str }; 15441 + jsval_t result = js_call_with_this(js, exec_fn, rx, call_args, 1); 15442 + if (is_err(result)) return result; 15443 + if (!is_object_type(result) && vtype(result) != T_NULL) 15444 + return js_mkerr_typed(js, JS_ERR_TYPE, "RegExp exec returned non-object"); 15445 + return result; 15426 15446 } 15447 + 15448 + jsval_t call_args[1] = { str }; 15449 + jsval_t saved = js->this_val; 15450 + js->this_val = rx; 15451 + jsval_t result = builtin_regexp_exec(js, call_args, 1); 15452 + js->this_val = saved; 15427 15453 15428 - jsval_t flags_val = js_get(js, regexp, "flags"); 15429 - if (is_err(flags_val)) return flags_val; 15430 - jsval_t exec_val = js_get(js, regexp, "exec"); 15431 - if (is_err(exec_val)) return exec_val; 15454 + return result; 15455 + } 15456 + 15457 + jsval_t builtin_regexp_symbol_split(struct js *js, jsval_t *args, int nargs) { 15458 + jsval_t rx = js_getthis(js); 15459 + if (!is_object_type(rx)) 15460 + return js_mkerr_typed(js, JS_ERR_TYPE, "RegExp.prototype[@@split] called on non-object"); 15432 15461 15433 15462 jsval_t str = nargs > 0 ? js_tostring_val(js, args[0]) : js_mkstr(js, "", 0); 15434 15463 if (is_err(str)) return str; 15435 15464 15436 - jsval_t split_args[2]; 15437 - int split_nargs = 1; 15465 + jsval_t ctor = js_get(js, rx, "constructor"); 15466 + if (is_err(ctor)) return ctor; 15467 + 15468 + jsval_t C; 15469 + if (vtype(ctor) == T_UNDEF) { 15470 + C = js_get(js, js_glob(js), "RegExp"); 15471 + } else if (!is_object_type(ctor)) { 15472 + return js_mkerr_typed(js, JS_ERR_TYPE, "RegExp.prototype[@@split]: constructor is not an object"); 15473 + } else { 15474 + jsval_t species = get_ctor_species_value(js, ctor); 15475 + if (is_err(species)) return species; 15476 + if (vtype(species) == T_UNDEF || vtype(species) == T_NULL) 15477 + C = js_get(js, js_glob(js), "RegExp"); 15478 + else C = species; 15479 + } 15438 15480 15439 - split_args[0] = regexp; 15440 - if (nargs >= 2) { 15441 - split_args[1] = args[1]; 15442 - split_nargs = 2; 15481 + if (is_err(C)) return C; 15482 + if (vtype(C) != T_FUNC && vtype(C) != T_CFUNC) 15483 + return js_mkerr_typed(js, JS_ERR_TYPE, "RegExp species is not a constructor"); 15484 + 15485 + jsval_t flags_val = js_get(js, rx, "flags"); 15486 + if (is_err(flags_val)) return flags_val; 15487 + jsval_t flags_str = js_tostring_val(js, flags_val); 15488 + if (is_err(flags_str)) return flags_str; 15489 + 15490 + jsoff_t flen, foff = vstr(js, flags_str, &flen); 15491 + const char *fptr = (const char *)&js->mem[foff]; 15492 + bool unicode_matching = false, has_sticky = false; 15493 + for (jsoff_t i = 0; i < flen; i++) { 15494 + if (fptr[i] == 'u' || fptr[i] == 'v') unicode_matching = true; 15495 + if (fptr[i] == 'y') has_sticky = true; 15496 + } 15497 + 15498 + jsval_t new_flags; 15499 + if (has_sticky) new_flags = flags_str; else { 15500 + char fbuf[16]; 15501 + if (flen > 14) flen = 14; 15502 + foff = vstr(js, flags_str, &flen); 15503 + fptr = (const char *)&js->mem[foff]; 15504 + memcpy(fbuf, fptr, flen); 15505 + fbuf[flen] = 'y'; 15506 + new_flags = js_mkstr(js, fbuf, flen + 1); 15507 + } 15508 + 15509 + jsval_t ctor_args[2] = { rx, new_flags }; 15510 + jsval_t splitter = regexp_species_construct(js, rx, C, ctor_args, 2); 15511 + if (is_err(splitter)) return splitter; 15512 + 15513 + jsval_t A = mkarr(js); 15514 + if (is_err(A)) return A; 15515 + jsoff_t lengthA = 0; 15516 + 15517 + uint32_t lim = UINT32_MAX; 15518 + if (nargs >= 2 && vtype(args[1]) != T_UNDEF) { 15519 + double d = tod(args[1]); 15520 + if (d >= 0 && d <= UINT32_MAX) lim = (uint32_t)d; 15521 + } if (lim == 0) return mkval(T_ARR, vdata(A)); 15522 + 15523 + jsoff_t str_len, str_off = vstr(js, str, &str_len); 15524 + jsoff_t size = str_len; 15525 + 15526 + if (size == 0) { 15527 + jsval_t z = regexp_exec_abstract(js, splitter, str); 15528 + if (is_err(z)) return z; 15529 + if (vtype(z) == T_NULL) arr_set(js, A, 0, str); 15530 + return mkval(T_ARR, vdata(A)); 15531 + } 15532 + 15533 + jsoff_t p = 0, q = p; 15534 + jsval_t lastIndex_key = js_mkstr(js, "lastIndex", 9); 15535 + 15536 + while (q < size) { 15537 + js_setprop(js, splitter, lastIndex_key, tov((double)q)); 15538 + 15539 + jsval_t z = regexp_exec_abstract(js, splitter, str); 15540 + if (is_err(z)) return z; 15541 + 15542 + if (vtype(z) == T_NULL) { 15543 + if (unicode_matching) { 15544 + str_off = vstr(js, str, &str_len); 15545 + q += utf8_char_len_at((const char *)&js->mem[str_off], str_len, q); 15546 + } else q++; 15547 + continue; 15548 + } 15549 + 15550 + jsval_t li_val = js_get(js, splitter, "lastIndex"); 15551 + if (is_err(li_val)) return li_val; 15552 + double e_raw = vtype(li_val) == T_NUM ? tod(li_val) : 0; 15553 + jsoff_t e = (jsoff_t)(e_raw < 0 ? 0 : (e_raw > (double)size ? (double)size : e_raw)); 15554 + 15555 + if (e == p) { 15556 + if (unicode_matching) { 15557 + str_off = vstr(js, str, &str_len); 15558 + q += utf8_char_len_at((const char *)&js->mem[str_off], str_len, q); 15559 + } else q++; 15560 + continue; 15561 + } 15562 + 15563 + str_off = vstr(js, str, NULL); 15564 + jsval_t T_val = js_mkstr(js, (char *)&js->mem[str_off + p], q - p); 15565 + arr_set(js, A, lengthA++, T_val); 15566 + if (lengthA == lim) return mkval(T_ARR, vdata(A)); 15567 + 15568 + jsoff_t num_caps = get_array_length(js, z); 15569 + for (jsoff_t i = 1; i < num_caps; i++) { 15570 + jsval_t cap = arr_get(js, z, i); 15571 + arr_set(js, A, lengthA++, cap); 15572 + if (lengthA == lim) return mkval(T_ARR, vdata(A)); 15573 + } 15574 + 15575 + p = e; 15576 + q = p; 15443 15577 } 15444 15578 15445 - return string_split_impl(js, str, split_args, split_nargs); 15579 + str_off = vstr(js, str, &str_len); 15580 + jsval_t trailing = js_mkstr(js, (char *)&js->mem[str_off + p], str_len - p); 15581 + arr_set(js, A, lengthA, trailing); 15582 + return mkval(T_ARR, vdata(A)); 15446 15583 } 15447 15584 15448 15585 static jsval_t builtin_string_search(struct js *js, jsval_t *args, int nargs) {
+8
src/utf8.c
··· 57 57 return 0xFFFD; 58 58 } 59 59 60 + size_t utf8_char_len_at(const char *str, size_t byte_len, size_t pos) { 61 + if (pos >= byte_len) return 1; 62 + int seq = utf8_sequence_length((unsigned char)str[pos]); 63 + if (seq <= 0) return 1; 64 + if (pos + (size_t)seq > byte_len) return byte_len - pos; 65 + return (size_t)seq; 66 + } 67 + 60 68 size_t utf8_strlen(const char *str, size_t byte_len) { 61 69 size_t count = 0; 62 70 const unsigned char *p = (const unsigned char *)str;