MIRROR: javascript for ๐Ÿœ's, a tiny runtime with big ambitions
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

improve regex compile speeds

+417 -122
+6 -2
examples/demo/kat.js
··· 3 3 import fs from 'node:fs'; 4 4 import path from 'node:path'; 5 5 6 + function exit(message) { 7 + console.log(message); 8 + process.exit(1); 9 + } 10 + 6 11 const file = process.argv[2]; 7 12 if (!file) exit('usage: kat <file>'); 8 13 ··· 12 17 console.log(Ant.highlight(content)); 13 18 } else console.log(content); 14 19 } catch (err) { 15 - console.log(`file '${err.path}' not found`); 16 - process.exit(1); 20 + exit(`file '${err.path}' not found`); 17 21 }
+5
include/common.h
··· 79 79 X(SLOT_RESPONSE_HEADERS) \ 80 80 X(SLOT_RESPONSE_BODY_STREAM) \ 81 81 X(SLOT_PIPE_ABORT_LISTENER) \ 82 + X(SLOT_REGEXP_FLAGS_MASK) \ 83 + X(SLOT_REGEXP_FLAGS_STRING) \ 84 + X(SLOT_REGEXP_NAMED_GROUPS) \ 85 + X(SLOT_REGEXP_RESULT_GROUPS) \ 86 + X(SLOT_REGEXP_GROUPS_CACHE) \ 82 87 X(SLOT_MATCHALL_RX) \ 83 88 X(SLOT_MATCHALL_STR) \ 84 89 X(SLOT_MATCHALL_DONE)
+8
include/modules/regex.h
··· 28 28 ant_value_t do_regex_match_pcre2(ant_t *js, regex_match_args_t args); 29 29 ant_value_t reject_regexp_arg(ant_t *js, ant_value_t value, const char *method_name); 30 30 31 + bool regexp_exec_truthy_try_fast( 32 + ant_t *js, 33 + ant_value_t call_func, 34 + ant_value_t regexp, 35 + ant_value_t arg, 36 + ant_value_t *out_result 37 + ); 38 + 31 39 #endif
+1
include/silver/opcode.h
··· 162 162 OP_DEF( CALL, 3, 1, 1, npop) /* func args... -> result */ 163 163 OP_DEF( CALL_METHOD, 3, 2, 1, npop) /* this func args... -> result */ 164 164 OP_DEF( CALL_IS_PROTO, 3, 3, 1, u16) /* this func arg -> bool (ic_idx:u16) */ 165 + OP_DEF( RE_EXEC_TRUTHY, 1, 3, 1, none) /* this func arg -> bool */ 165 166 OP_DEF( TAIL_CALL, 3, 1, 0, npop) /* tail-position call */ 166 167 OP_DEF( TAIL_CALL_METHOD, 3, 2, 0, npop) 167 168 OP_DEF( NEW, 3, 2, 1, npop) /* func new.target args -> obj */
+214 -99
src/modules/regex.c
··· 27 27 bool jit_ready; 28 28 } regex_cache_entry_t; 29 29 30 + enum { 31 + REGEXP_FLAG_HAS_INDICES = 1 << 0, 32 + REGEXP_FLAG_GLOBAL = 1 << 1, 33 + REGEXP_FLAG_IGNORE_CASE = 1 << 2, 34 + REGEXP_FLAG_MULTILINE = 1 << 3, 35 + REGEXP_FLAG_DOTALL = 1 << 4, 36 + REGEXP_FLAG_UNICODE = 1 << 5, 37 + REGEXP_FLAG_UNICODE_SET = 1 << 6, 38 + REGEXP_FLAG_STICKY = 1 << 7, 39 + }; 40 + 30 41 static regex_cache_entry_t *regex_cache = NULL; 31 42 static ant_value_t regexp_matchall_iter_proto_val = 0; 32 43 33 44 static size_t regex_cache_count = 0; 34 45 static size_t regex_cache_cap = 0; 35 46 47 + static inline uint8_t regexp_parse_flags_mask(const char *fstr, ant_offset_t flen) { 48 + uint8_t mask = 0; 49 + for (ant_offset_t k = 0; k < flen; k++) { 50 + switch (fstr[k]) { 51 + case 'd': mask |= REGEXP_FLAG_HAS_INDICES; break; 52 + case 'g': mask |= REGEXP_FLAG_GLOBAL; break; 53 + case 'i': mask |= REGEXP_FLAG_IGNORE_CASE; break; 54 + case 'm': mask |= REGEXP_FLAG_MULTILINE; break; 55 + case 's': mask |= REGEXP_FLAG_DOTALL; break; 56 + case 'u': mask |= REGEXP_FLAG_UNICODE; break; 57 + case 'v': mask |= REGEXP_FLAG_UNICODE_SET; break; 58 + case 'y': mask |= REGEXP_FLAG_STICKY; break; 59 + default: break; 60 + }} 61 + return mask; 62 + } 63 + 64 + static inline uint8_t regexp_flags_mask(ant_t *js, ant_value_t regexp) { 65 + ant_offset_t flags_off = lkp(js, regexp, "flags", 5); 66 + if (flags_off == 0) return 0; 67 + 68 + ant_value_t flags_val = js_propref_load(js, flags_off); 69 + if (vtype(flags_val) != T_STR) return 0; 70 + 71 + ant_value_t cached_flags = js_get_slot(regexp, SLOT_REGEXP_FLAGS_STRING); 72 + ant_value_t cached = js_get_slot(regexp, SLOT_REGEXP_FLAGS_MASK); 73 + if (flags_val == cached_flags && vtype(cached) == T_NUM) return (uint8_t)tod(cached); 74 + 75 + ant_offset_t flen, foff = vstr(js, flags_val, &flen); 76 + uint8_t mask = regexp_parse_flags_mask((const char *)(uintptr_t)foff, flen); 77 + js_set_slot(regexp, SLOT_REGEXP_FLAGS_MASK, tov((double)mask)); 78 + js_set_slot(regexp, SLOT_REGEXP_FLAGS_STRING, flags_val); 79 + 80 + return mask; 81 + } 82 + 83 + static ant_value_t regexp_build_named_groups_meta(ant_t *js, pcre2_code *code) { 84 + uint32_t namecount = 0; 85 + pcre2_pattern_info(code, PCRE2_INFO_NAMECOUNT, &namecount); 86 + if (namecount == 0) return js_mkundef(); 87 + 88 + uint32_t nameentrysize = 0; 89 + PCRE2_SPTR nametable = NULL; 90 + pcre2_pattern_info(code, PCRE2_INFO_NAMEENTRYSIZE, &nameentrysize); 91 + pcre2_pattern_info(code, PCRE2_INFO_NAMETABLE, (void *)&nametable); 92 + 93 + ant_value_t meta = js_mkarr(js); 94 + if (is_err(meta)) return meta; 95 + 96 + PCRE2_SPTR tabptr = nametable; 97 + for (uint32_t i = 0; i < namecount; i++) { 98 + int n = (tabptr[0] << 8) | tabptr[1]; 99 + const char *name = (const char *)(tabptr + 2); 100 + ant_value_t name_val = js_mkstr(js, name, strlen(name)); 101 + if (is_err(name_val)) return name_val; 102 + js_arr_push(js, meta, name_val); 103 + js_arr_push(js, meta, tov((double)n)); 104 + tabptr += nameentrysize; 105 + } 106 + 107 + return meta; 108 + } 109 + 36 110 static void update_regexp_statics(ant_t *js, const char *str_ptr, PCRE2_SIZE *ovector, uint32_t ovcount) { 37 111 ant_value_t regexp_ctor = js_get(js, js_glob(js), "RegExp"); 38 112 if (is_err(regexp_ctor) || vtype(regexp_ctor) == T_UNDEF) return; ··· 43 117 ant_value_t val = empty; 44 118 if ((uint32_t)i < ovcount && ovector[2*i] != PCRE2_UNSET) 45 119 val = js_mkstr(js, str_ptr + ovector[2*i], ovector[2*i+1] - ovector[2*i]); 46 - js_set(js, regexp_ctor, key, val); 120 + if (is_err(setprop_cstr(js, regexp_ctor, key, 2, val))) return; 47 121 } 48 122 49 123 ant_value_t match0 = empty; 50 124 if (ovcount > 0 && ovector[0] != PCRE2_UNSET) 51 125 match0 = js_mkstr(js, str_ptr + ovector[0], ovector[1] - ovector[0]); 52 - js_set(js, regexp_ctor, "lastMatch", match0); 53 - js_set(js, regexp_ctor, "$&", match0); 126 + if (is_err(setprop_cstr(js, regexp_ctor, "lastMatch", 9, match0))) return; 127 + (void)setprop_cstr(js, regexp_ctor, "$&", 2, match0); 54 128 } 55 129 56 130 static inline bool is_pcre2_passthrough_escape(char c) { ··· 415 489 : js_setprop(js, obj, js_mkstr(js, key, klen), val)) 416 490 417 491 static void regexp_init_flags(ant_t *js, ant_value_t obj, const char *fstr, ant_offset_t flen, bool is_new) { 418 - bool d = false, g = false, i = false, m = false; 419 - bool s = false, u = false, v = false, y = false; 420 - 421 - for (ant_offset_t k = 0; k < flen; k++) { 422 - if (fstr[k] == 'd') d = true; 423 - if (fstr[k] == 'g') g = true; 424 - if (fstr[k] == 'i') i = true; 425 - if (fstr[k] == 'm') m = true; 426 - if (fstr[k] == 's') s = true; 427 - if (fstr[k] == 'u') u = true; 428 - if (fstr[k] == 'v') v = true; 429 - if (fstr[k] == 'y') y = true; 430 - } 492 + uint8_t mask = regexp_parse_flags_mask(fstr, flen); 493 + bool d = (mask & REGEXP_FLAG_HAS_INDICES) != 0; 494 + bool g = (mask & REGEXP_FLAG_GLOBAL) != 0; 495 + bool i = (mask & REGEXP_FLAG_IGNORE_CASE) != 0; 496 + bool m = (mask & REGEXP_FLAG_MULTILINE) != 0; 497 + bool s = (mask & REGEXP_FLAG_DOTALL) != 0; 498 + bool u = (mask & REGEXP_FLAG_UNICODE) != 0; 499 + bool v = (mask & REGEXP_FLAG_UNICODE_SET) != 0; 500 + bool y = (mask & REGEXP_FLAG_STICKY) != 0; 431 501 432 502 char sorted[10]; int si = 0; 433 503 if (d) sorted[si++] = 'd'; ··· 439 509 if (v) sorted[si++] = 'v'; 440 510 if (y) sorted[si++] = 'y'; 441 511 442 - REGEXP_SET_PROP(js, obj, "flags", 5, js_mkstr(js, sorted, si), is_new); 512 + ant_value_t flags_value = js_mkstr(js, sorted, si); 513 + REGEXP_SET_PROP(js, obj, "flags", 5, flags_value, is_new); 443 514 REGEXP_SET_PROP(js, obj, "hasIndices", 10, mkval(T_BOOL, d ? 1 : 0), is_new); 444 515 REGEXP_SET_PROP(js, obj, "global", 6, mkval(T_BOOL, g ? 1 : 0), is_new); 445 516 REGEXP_SET_PROP(js, obj, "ignoreCase", 10, mkval(T_BOOL, i ? 1 : 0), is_new); ··· 449 520 REGEXP_SET_PROP(js, obj, "unicodeSets", 11, mkval(T_BOOL, v ? 1 : 0), is_new); 450 521 REGEXP_SET_PROP(js, obj, "sticky", 6, mkval(T_BOOL, y ? 1 : 0), is_new); 451 522 REGEXP_SET_PROP(js, obj, "lastIndex", 9, tov(0), is_new); 523 + js_set_slot(obj, SLOT_REGEXP_FLAGS_MASK, tov((double)mask)); 524 + js_set_slot(obj, SLOT_REGEXP_FLAGS_STRING, flags_value); 525 + js_set_slot(obj, SLOT_REGEXP_NAMED_GROUPS, js_mkundef()); 452 526 } 453 527 454 528 ant_value_t is_regexp_like(ant_t *js, ant_value_t value) { ··· 554 628 555 629 static bool regex_get_or_compile(ant_t *js, ant_value_t regexp_obj, compiled_regex_t *out) { 556 630 ant_object_t *obj_ptr = js_obj_ptr(regexp_obj); 631 + uint8_t flags_mask = regexp_flags_mask(js, regexp_obj); 557 632 558 633 regex_cache_entry_t *cached = regex_cache_lookup(obj_ptr); 559 634 if (cached) { ··· 571 646 ant_offset_t plen, poff = vstr(js, source_val, &plen); 572 647 const char *pattern_ptr = (char *)(uintptr_t)(poff); 573 648 574 - bool ignore_case = false, multiline = false, dotall = false, v_flag = false; 575 - ant_offset_t flags_off = lkp(js, regexp_obj, "flags", 5); 576 - if (flags_off != 0) { 577 - ant_value_t flags_val = js_propref_load(js, flags_off); 578 - if (vtype(flags_val) == T_STR) { 579 - ant_offset_t flen, foff = vstr(js, flags_val, &flen); 580 - const char *flags_str = (char *)(uintptr_t)(foff); 581 - for (ant_offset_t i = 0; i < flen; i++) { 582 - if (flags_str[i] == 'i') ignore_case = true; 583 - if (flags_str[i] == 'm') multiline = true; 584 - if (flags_str[i] == 's') dotall = true; 585 - if (flags_str[i] == 'v') v_flag = true; 586 - }} 587 - } 588 - 589 649 char pcre2_pattern[4096]; 590 - size_t pcre2_len = js_to_pcre2_pattern(pattern_ptr, plen, pcre2_pattern, sizeof(pcre2_pattern), v_flag); 650 + size_t pcre2_len = js_to_pcre2_pattern( 651 + pattern_ptr, plen, pcre2_pattern, sizeof(pcre2_pattern), 652 + (flags_mask & REGEXP_FLAG_UNICODE_SET) != 0 653 + ); 591 654 592 655 uint32_t options = PCRE2_UTF | PCRE2_UCP | PCRE2_MATCH_UNSET_BACKREF | PCRE2_DUPNAMES; 593 - if (ignore_case) options |= PCRE2_CASELESS; 594 - if (multiline) options |= PCRE2_MULTILINE; 595 - if (dotall) options |= PCRE2_DOTALL; 656 + if (flags_mask & REGEXP_FLAG_IGNORE_CASE) options |= PCRE2_CASELESS; 657 + if (flags_mask & REGEXP_FLAG_MULTILINE) options |= PCRE2_MULTILINE; 658 + if (flags_mask & REGEXP_FLAG_DOTALL) options |= PCRE2_DOTALL; 596 659 597 660 int errcode; 598 661 PCRE2_SIZE erroffset; ··· 602 665 pcre2_match_data *match_data = pcre2_match_data_create_from_pattern(re, NULL); 603 666 bool jit_ready = pcre2_jit_compile(re, PCRE2_JIT_COMPLETE) == 0; 604 667 regex_cache_insert(obj_ptr, re, match_data, jit_ready); 605 - 668 + ant_value_t groups_meta = regexp_build_named_groups_meta(js, re); 669 + 670 + if (is_err(groups_meta)) { 671 + pcre2_match_data_free(match_data); 672 + pcre2_code_free(re); 673 + regex_cache_count--; 674 + return false; 675 + } 676 + 677 + js_set_slot(regexp_obj, SLOT_REGEXP_NAMED_GROUPS, groups_meta); 606 678 out->code = re; 607 679 out->match_data = match_data; 608 680 out->jit_ready = jit_ready; 681 + 609 682 return true; 610 683 } 611 684 ··· 677 750 return regexp_obj; 678 751 } 679 752 680 - static ant_value_t builtin_regexp_exec(ant_t *js, ant_value_t *args, int nargs) { 681 - ant_value_t regexp = js->this_val; 682 - if (vtype(regexp) != T_OBJ) return js_mkerr(js, "exec called on non-regexp"); 683 - if (nargs < 1) return js_mknull(); 753 + static ant_value_t builtin_regexp_groups_getter(ant_t *js, ant_value_t *args, int nargs) { 754 + ant_value_t result_arr = js->this_val; 755 + if (!is_object_type(result_arr)) return js_mkundef(); 756 + 757 + ant_value_t cached = js_get_slot(result_arr, SLOT_REGEXP_GROUPS_CACHE); 758 + if (is_object_type(cached)) return cached; 759 + 760 + ant_value_t meta = js_get_slot(result_arr, SLOT_REGEXP_RESULT_GROUPS); 761 + if (!is_object_type(meta)) return js_mkundef(); 762 + 763 + ant_value_t groups = js_mkobj(js); 764 + if (is_err(groups)) return groups; 765 + js_set_proto_init(groups, js_mknull()); 766 + 767 + for (ant_offset_t i = 0; ; i += 2) { 768 + ant_value_t name = js_arr_get(js, meta, i); 769 + if (vtype(name) == T_UNDEF) break; 770 + ant_value_t index_val = js_arr_get(js, meta, i + 1); 771 + ant_offset_t index = (vtype(index_val) == T_NUM) ? (ant_offset_t)tod(index_val) : 0; 772 + char idxstr[16]; 773 + (void)uint_to_str(idxstr, sizeof(idxstr), (uint64_t)index); 774 + ant_value_t value = js_getprop_fallback(js, result_arr, idxstr); 775 + ant_offset_t name_len, name_off = vstr(js, name, &name_len); 776 + ant_value_t status = setprop_cstr(js, groups, (const char *)(uintptr_t)name_off, (size_t)name_len, value); 777 + if (is_err(status)) return status; 778 + } 684 779 685 - ant_value_t str_arg = args[0]; 686 - if (vtype(str_arg) != T_STR) return js_mknull(); 780 + js_set_slot(result_arr, SLOT_REGEXP_GROUPS_CACHE, groups); 781 + return groups; 782 + } 687 783 784 + static ant_value_t regexp_exec_internal(ant_t *js, ant_value_t regexp, ant_value_t str_arg, bool truthy_only) { 688 785 ant_offset_t str_len, str_off = vstr(js, str_arg, &str_len); 689 786 const char *str_ptr = (char *)(uintptr_t)(str_off); 690 - 691 - bool global_flag = false, sticky_flag = false; 692 - ant_offset_t flags_off = lkp(js, regexp, "flags", 5); 693 - if (flags_off != 0) { 694 - ant_value_t flags_val = js_propref_load(js, flags_off); 695 - if (vtype(flags_val) == T_STR) { 696 - ant_offset_t flen, foff = vstr(js, flags_val, &flen); 697 - const char *flags_str = (char *)(uintptr_t)(foff); 698 - for (ant_offset_t i = 0; i < flen; i++) { 699 - if (flags_str[i] == 'g') global_flag = true; 700 - if (flags_str[i] == 'y') sticky_flag = true; 701 - } 702 - } 703 - } 787 + uint8_t flags_mask = regexp_flags_mask(js, regexp); 788 + bool global_flag = (flags_mask & REGEXP_FLAG_GLOBAL) != 0; 789 + bool sticky_flag = (flags_mask & REGEXP_FLAG_STICKY) != 0; 704 790 791 + // TODO: reduce nesting 705 792 PCRE2_SIZE start_offset = 0; 706 793 if (global_flag || sticky_flag) { 707 794 ant_offset_t lastindex_off = lkp(js, regexp, "lastIndex", 9); ··· 711 798 double li = tod(li_val); 712 799 if (li >= 0 && li <= (double)str_len) start_offset = (PCRE2_SIZE)li; 713 800 else { 714 - js_setprop(js, regexp, js_mkstr(js, "lastIndex", 9), tov(0)); 801 + if (is_err(setprop_cstr(js, regexp, "lastIndex", 9, tov(0)))) return js_mkerr(js, "oom"); 715 802 return js_mknull(); 716 803 } 717 804 } ··· 730 817 } else rc = pcre2_match(compiled.code, (PCRE2_SPTR)str_ptr, str_len, start_offset, match_options, compiled.match_data, NULL); 731 818 732 819 if (rc < 0) { 733 - if (global_flag || sticky_flag) js_setprop(js, regexp, js_mkstr(js, "lastIndex", 9), tov(0)); 820 + if ((global_flag || sticky_flag) && is_err(setprop_cstr(js, regexp, "lastIndex", 9, tov(0)))) { 821 + return js_mkerr(js, "oom"); 822 + } 734 823 return js_mknull(); 735 824 } 736 825 737 826 PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(compiled.match_data); 738 827 uint32_t ovcount = pcre2_get_ovector_count(compiled.match_data); 828 + 829 + update_regexp_statics(js, str_ptr, ovector, ovcount); 830 + 831 + if (global_flag || sticky_flag) { 832 + ant_value_t next_idx = tov((double)ovector[1]); 833 + if (is_err(setprop_cstr(js, regexp, "lastIndex", 9, next_idx))) return js_mkerr(js, "oom"); 834 + } 835 + 836 + if (truthy_only) return js_true; 739 837 740 838 ant_value_t result_arr = js_mkarr(js); 839 + if (is_err(result_arr)) return result_arr; 741 840 for (uint32_t i = 0; i < ovcount && i < 32; i++) { 742 841 PCRE2_SIZE start = ovector[2*i]; 743 842 PCRE2_SIZE end = ovector[2*i+1]; ··· 749 848 } 750 849 } 751 850 752 - js_setprop(js, result_arr, js_mkstr(js, "index", 5), tov((double)ovector[0])); 753 - js_setprop(js, result_arr, js_mkstr(js, "input", 5), str_arg); 851 + if (is_err(setprop_cstr(js, result_arr, "index", 5, tov((double)ovector[0])))) return js_mkerr(js, "oom"); 852 + if (is_err(setprop_cstr(js, result_arr, "input", 5, str_arg))) return js_mkerr(js, "oom"); 754 853 755 - uint32_t namecount = 0; 756 - pcre2_pattern_info(compiled.code, PCRE2_INFO_NAMECOUNT, &namecount); 757 - if (namecount > 0) { 758 - uint32_t nameentrysize = 0; 759 - PCRE2_SPTR nametable = NULL; 760 - pcre2_pattern_info(compiled.code, PCRE2_INFO_NAMEENTRYSIZE, &nameentrysize); 761 - pcre2_pattern_info(compiled.code, PCRE2_INFO_NAMETABLE, (void *)&nametable); 762 - 763 - ant_value_t groups = js_mkobj(js); 764 - js_set_proto_init(groups, js_mknull()); 854 + ant_value_t groups_meta = js_get_slot(regexp, SLOT_REGEXP_NAMED_GROUPS); 855 + if (is_object_type(groups_meta)) { 856 + js_set_slot(result_arr, SLOT_REGEXP_RESULT_GROUPS, groups_meta); 857 + js_set_slot(result_arr, SLOT_REGEXP_GROUPS_CACHE, js_mkundef()); 858 + js_set_getter_desc(js, js_as_obj(result_arr), "groups", 6, js_mkfun(builtin_regexp_groups_getter), JS_DESC_E | JS_DESC_C); 859 + } else if (is_err(setprop_cstr(js, result_arr, "groups", 6, js_mkundef()))) return js_mkerr(js, "oom"); 765 860 766 - PCRE2_SPTR tabptr = nametable; 767 - for (uint32_t i = 0; i < namecount; i++) { 768 - int n = (tabptr[0] << 8) | tabptr[1]; 769 - const char *name = (const char *)(tabptr + 2); 770 - ant_value_t val = ((uint32_t)n < ovcount) ? js_arr_get(js, result_arr, n) : js_mkundef(); 771 - js_setprop(js, groups, js_mkstr(js, name, strlen(name)), val); 772 - tabptr += nameentrysize; 773 - } 774 - js_setprop(js, result_arr, js_mkstr(js, "groups", 6), groups); 775 - } else js_setprop(js, result_arr, js_mkstr(js, "groups", 6), js_mkundef()); 861 + return result_arr; 862 + } 776 863 777 - update_regexp_statics(js, str_ptr, ovector, ovcount); 864 + static ant_value_t builtin_regexp_exec(ant_t *js, ant_value_t *args, int nargs) { 865 + ant_value_t regexp = js->this_val; 866 + if (vtype(regexp) != T_OBJ) return js_mkerr(js, "exec called on non-regexp"); 867 + if (nargs < 1) return js_mknull(); 778 868 779 - if (global_flag || sticky_flag) { 780 - js_setprop(js, regexp, js_mkstr(js, "lastIndex", 9), tov((double)ovector[1])); 781 - } 869 + ant_value_t str_arg = args[0]; 870 + if (vtype(str_arg) != T_STR) return js_mknull(); 782 871 783 - return result_arr; 872 + return regexp_exec_internal(js, regexp, str_arg, false); 784 873 } 785 874 786 875 static ant_value_t builtin_regexp_toString(ant_t *js, ant_value_t *args, int nargs) { 787 - (void)args; (void)nargs; 788 876 ant_value_t regexp = js->this_val; 789 877 if (!is_object_type(regexp)) 790 878 return js_mkerr_typed(js, JS_ERR_TYPE, "toString called on non-object"); ··· 955 1043 return result; 956 1044 } 957 1045 1046 + bool regexp_exec_truthy_try_fast( 1047 + ant_t *js, 1048 + ant_value_t call_func, 1049 + ant_value_t regexp, 1050 + ant_value_t arg, 1051 + ant_value_t *out_result 1052 + ) { 1053 + if (!out_result || vtype(call_func) != T_CFUNC) return false; 1054 + if (js_as_cfunc(call_func) != builtin_regexp_exec) return false; 1055 + if (!is_object_type(regexp) || vtype(arg) != T_STR) return false; 1056 + 1057 + ant_value_t result = regexp_exec_internal(js, regexp, arg, true); 1058 + if (is_err(result)) { 1059 + *out_result = result; 1060 + return true; 1061 + } 1062 + 1063 + *out_result = mkval(T_BOOL, vtype(result) != T_NULL ? 1 : 0); 1064 + return true; 1065 + } 1066 + 958 1067 static ant_value_t builtin_regexp_test(ant_t *js, ant_value_t *args, int nargs) { 959 1068 ant_value_t regexp = js->this_val; 960 1069 if (!is_object_type(regexp)) 961 1070 return js_mkerr_typed(js, JS_ERR_TYPE, "test called on non-object"); 962 1071 ant_value_t str_arg = nargs > 0 ? js_tostring_val(js, args[0]) : js_mkstr(js, "undefined", 9); 963 1072 if (is_err(str_arg)) return str_arg; 964 - ant_value_t result = regexp_exec_abstract(js, regexp, str_arg); 1073 + ant_value_t exec_fn = js_get(js, regexp, "exec"); 1074 + if (is_err(exec_fn)) return exec_fn; 1075 + 1076 + ant_value_t result; 1077 + if (vtype(exec_fn) == T_CFUNC && js_as_cfunc(exec_fn) == builtin_regexp_exec) { 1078 + result = regexp_exec_internal(js, regexp, str_arg, true); 1079 + } else { 1080 + result = regexp_exec_abstract(js, regexp, str_arg); 1081 + } 965 1082 if (is_err(result)) return result; 966 1083 return mkval(T_BOOL, vtype(result) != T_NULL ? 1 : 0); 967 1084 } ··· 974 1091 975 1092 char buf[16]; 976 1093 int n = 0; 1094 + uint8_t mask = regexp_flags_mask(js, rx); 977 1095 978 - static const struct { const char *name; size_t len; char flag; } flag_props[] = { 979 - {"hasIndices", 10, 'd'}, {"global", 6, 'g'}, {"ignoreCase", 10, 'i'}, 980 - {"multiline", 9, 'm'}, {"dotAll", 6, 's'}, {"unicode", 7, 'u'}, 981 - {"unicodeSets", 11, 'v'}, {"sticky", 6, 'y'}, 982 - }; 983 - 984 - for (int i = 0; i < 8; i++) { 985 - ant_value_t v = js_getprop_fallback(js, rx, flag_props[i].name); 986 - if (is_err(v)) return v; 987 - if (js_truthy(js, v)) buf[n++] = flag_props[i].flag; 988 - } 1096 + if (mask & REGEXP_FLAG_HAS_INDICES) buf[n++] = 'd'; 1097 + if (mask & REGEXP_FLAG_GLOBAL) buf[n++] = 'g'; 1098 + if (mask & REGEXP_FLAG_IGNORE_CASE) buf[n++] = 'i'; 1099 + if (mask & REGEXP_FLAG_MULTILINE) buf[n++] = 'm'; 1100 + if (mask & REGEXP_FLAG_DOTALL) buf[n++] = 's'; 1101 + if (mask & REGEXP_FLAG_UNICODE) buf[n++] = 'u'; 1102 + if (mask & REGEXP_FLAG_UNICODE_SET) buf[n++] = 'v'; 1103 + if (mask & REGEXP_FLAG_STICKY) buf[n++] = 'y'; 989 1104 990 1105 return js_mkstr(js, buf, n); 991 1106 }
+38 -19
src/silver/compiler.c
··· 56 56 emit(c, (uint8_t)op); 57 57 } 58 58 59 + static void compile_receiver_property_get(sv_compiler_t *c, sv_ast_t *node); 60 + static void compile_truthy_test_expr(sv_compiler_t *c, sv_ast_t *node); 61 + 59 62 static void emit_srcpos(sv_compiler_t *c, sv_ast_t *node) { 60 63 if (!node) return; 61 64 const char *code = c->source; ··· 1969 1972 } 1970 1973 1971 1974 void compile_ternary(sv_compiler_t *c, sv_ast_t *node) { 1972 - compile_expr(c, node->cond); 1975 + compile_truthy_test_expr(c, node->cond); 1973 1976 int else_jump = emit_jump(c, OP_JMP_FALSE); 1974 1977 compile_expr(c, node->left); 1975 1978 int end_jump = emit_jump(c, OP_JMP); ··· 2212 2215 return true; 2213 2216 } 2214 2217 2218 + static bool compile_regexp_exec_truthy_intrinsic( 2219 + sv_compiler_t *c, sv_ast_t *node 2220 + ) { 2221 + if (!node || node->type != N_CALL || call_has_spread_arg(node) || node->args.count != 1) 2222 + return false; 2223 + 2224 + sv_ast_t *callee = node->left; 2225 + if (!callee || callee->type != N_MEMBER) return false; 2226 + if ((callee->flags & 1) || !callee->right || !callee->right->str) return false; 2227 + if (is_ident_name(callee->left, "super")) return false; 2228 + if (!is_ident_str(callee->right->str, callee->right->len, "exec", 4)) 2229 + return false; 2230 + 2231 + compile_expr(c, callee->left); 2232 + compile_receiver_property_get(c, callee); 2233 + compile_expr(c, node->args.items[0]); 2234 + emit_op(c, OP_RE_EXEC_TRUTHY); 2235 + 2236 + return true; 2237 + } 2238 + 2239 + static void compile_truthy_test_expr(sv_compiler_t *c, sv_ast_t *node) { 2240 + if (compile_regexp_exec_truthy_intrinsic(c, node)) return; 2241 + compile_expr(c, node); 2242 + } 2243 + 2215 2244 static void compile_optional_call_after_setup( 2216 2245 sv_compiler_t *c, sv_ast_t *call_node, 2217 2246 sv_call_kind_t kind, bool has_spread ··· 2763 2792 2764 2793 void compile_tail_return_expr(sv_compiler_t *c, sv_ast_t *expr) { 2765 2794 if (expr->type == N_TERNARY) { 2766 - compile_expr(c, expr->cond); 2795 + compile_truthy_test_expr(c, expr->cond); 2767 2796 int else_jump = emit_jump(c, OP_JMP_FALSE); 2768 2797 compile_tail_return_expr(c, expr->left); 2769 2798 patch_jump(c, else_jump); ··· 3190 3219 } 3191 3220 } 3192 3221 3193 - void compile_destructure_binding(sv_compiler_t *c, sv_ast_t *pat, 3194 - sv_var_kind_t kind) { 3222 + void compile_destructure_binding(sv_compiler_t *c, sv_ast_t *pat, sv_var_kind_t kind) { 3195 3223 compile_destructure_pattern(c, pat, false, false, DESTRUCTURE_BIND, kind); 3196 3224 } 3197 3225 ··· 3229 3257 return true; 3230 3258 } 3231 3259 3232 - 3233 3260 void compile_if(sv_compiler_t *c, sv_ast_t *node) { 3234 3261 bool folded_truth = false; 3235 3262 if (fold_static_typeof_compare(c, node->cond, &folded_truth)) { ··· 3238 3265 return; 3239 3266 } 3240 3267 3241 - compile_expr(c, node->cond); 3268 + compile_truthy_test_expr(c, node->cond); 3242 3269 int else_jump = emit_jump(c, OP_JMP_FALSE); 3243 3270 compile_stmt(c, node->left); 3244 3271 if (node->right) { ··· 3251 3278 } 3252 3279 } 3253 3280 3254 - 3255 3281 void compile_while(sv_compiler_t *c, sv_ast_t *node) { 3256 3282 int loop_start = c->code_len; 3257 3283 push_loop(c, loop_start, NULL, 0, false); 3258 3284 3259 - compile_expr(c, node->cond); 3285 + compile_truthy_test_expr(c, node->cond); 3260 3286 int exit_jump = emit_jump(c, OP_JMP_FALSE); 3261 3287 compile_stmt(c, node->body); 3262 3288 ··· 3269 3295 pop_loop(c); 3270 3296 } 3271 3297 3272 - 3273 3298 void compile_do_while(sv_compiler_t *c, sv_ast_t *node) { 3274 3299 int loop_start = c->code_len; 3275 3300 push_loop(c, loop_start, NULL, 0, false); 3276 - 3277 3301 compile_stmt(c, node->body); 3278 3302 3279 3303 sv_loop_t *loop = &c->loops[c->loop_count - 1]; 3280 - int cond_start = c->code_len; 3281 3304 for (int i = 0; i < loop->continues.count; i++) 3282 3305 patch_jump(c, loop->continues.offsets[i]); 3283 3306 3284 - compile_expr(c, node->cond); 3307 + compile_truthy_test_expr(c, node->cond); 3285 3308 int exit_jump = emit_jump(c, OP_JMP_FALSE); 3286 3309 emit_loop(c, loop_start); 3287 3310 patch_jump(c, exit_jump); 3288 3311 pop_loop(c); 3289 - (void)cond_start; 3290 3312 } 3291 - 3292 3313 3293 3314 static void for_add_slot_unique(int **slots, int *count, int *cap, int slot) { 3294 3315 if (slot < 0) return; ··· 3305 3326 (*slots)[(*count)++] = slot; 3306 3327 } 3307 3328 3308 - static void for_collect_pattern_slots(sv_compiler_t *c, sv_ast_t *pat, 3309 - int **slots, int *count, int *cap) { 3329 + static void for_collect_pattern_slots(sv_compiler_t *c, sv_ast_t *pat, int **slots, int *count, int *cap) { 3310 3330 if (!pat) return; 3311 3331 switch (pat->type) { 3312 3332 case N_IDENT: { ··· 3343 3363 } 3344 3364 } 3345 3365 3346 - static void for_collect_var_decl_slots(sv_compiler_t *c, sv_ast_t *init_var, 3347 - int **slots, int *count, int *cap) { 3366 + static void for_collect_var_decl_slots(sv_compiler_t *c, sv_ast_t *init_var, int **slots, int *count, int *cap) { 3348 3367 if (!init_var || init_var->type != N_VAR) return; 3349 3368 for (int i = 0; i < init_var->args.count; i++) { 3350 3369 sv_ast_t *decl = init_var->args.items[i]; ··· 3377 3396 3378 3397 int exit_jump = -1; 3379 3398 if (node->cond) { 3380 - compile_expr(c, node->cond); 3399 + compile_truthy_test_expr(c, node->cond); 3381 3400 exit_jump = emit_jump(c, OP_JMP_FALSE); 3382 3401 } 3383 3402
+21
src/silver/engine.c
··· 4 4 5 5 #include "silver/engine.h" 6 6 #include "silver/swarm.h" 7 + #include "modules/regex.h" 7 8 8 9 #include "ops/literals.h" 9 10 #include "ops/stack.h" ··· 1417 1418 if (is_err(call_result)) { sv_err = call_result; goto sv_throw; } 1418 1419 vm->stack[vm->sp++] = call_result; 1419 1420 NEXT(3); 1421 + } 1422 + 1423 + L_RE_EXEC_TRUTHY: { 1424 + ant_value_t call_arg = vm->stack[vm->sp - 1]; 1425 + ant_value_t call_func = vm->stack[vm->sp - 2]; 1426 + ant_value_t call_this = vm->stack[vm->sp - 3]; 1427 + ant_value_t call_result; 1428 + 1429 + if (!regexp_exec_truthy_try_fast(js, call_func, call_this, call_arg, &call_result)) { 1430 + ant_value_t call_args[1] = { call_arg }; frame->ip = ip; 1431 + ant_value_t raw_result = sv_vm_call(vm, js, call_func, call_this, call_args, 1, NULL, false); 1432 + sv_sync_frame_locals(vm, &frame, &func, &bp, &lp); 1433 + if (is_err(raw_result)) call_result = raw_result; 1434 + else call_result = mkval(T_BOOL, js_truthy(js, raw_result) ? 1 : 0); 1435 + } 1436 + 1437 + vm->sp -= 3; 1438 + if (is_err(call_result)) { sv_err = call_result; goto sv_throw; } 1439 + vm->stack[vm->sp++] = call_result; 1440 + NEXT(1); 1420 1441 } 1421 1442 1422 1443 L_TAIL_CALL: {
+3
src/silver/swarm.c
··· 2015 2015 if (vtype(cv) != T_CFUNC) return false; 2016 2016 break; 2017 2017 } 2018 + case OP_RE_EXEC_TRUTHY: 2019 + eligible = false; 2020 + break; 2018 2021 default: 2019 2022 if (sv_jit_warn_unlikely) 2020 2023 fprintf(stderr, "jit: ineligible op %s in %s\n",
+6 -2
src/utils.c
··· 191 191 } 192 192 193 193 static bool has_js_extension(const char *filename) { 194 - const char *dot = strrchr(filename, '.'); 194 + const char *slash = strrchr(filename, '/'); 195 + const char *base = slash ? slash + 1 : filename; 196 + const char *dot = strrchr(base, '.'); 195 197 if (!dot) return false; 196 198 for (const char *const *ext = module_resolve_extensions; *ext; ext++) { 197 199 if (!is_entrypoint_script_extension(*ext)) continue; ··· 208 210 struct stat st; 209 211 if (stat(filename, &st) == 0) { 210 212 if (S_ISREG(st.st_mode)) { 211 - const char *dot = strrchr(filename, '.'); 213 + const char *slash = strrchr(filename, '/'); 214 + const char *base = slash ? slash + 1 : filename; 215 + const char *dot = strrchr(base, '.'); 212 216 if (dot && !has_js_extension(filename)) return NULL; 213 217 return strdup(filename); 214 218 }
+53
tests/test_extensionless_hidden_entrypoint.cjs
··· 1 + const { spawnSync } = require('child_process'); 2 + const fs = require('fs'); 3 + const os = require('os'); 4 + const path = require('path'); 5 + 6 + function assert(condition, message) { 7 + if (!condition) throw new Error(message); 8 + } 9 + 10 + const tmpRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'ant-hidden-entry-')); 11 + const hiddenDir = path.join(tmpRoot, '.bin'); 12 + const scriptPath = path.join(hiddenDir, 'kat2'); 13 + 14 + fs.mkdirSync(hiddenDir, { recursive: true }); 15 + fs.writeFileSync( 16 + scriptPath, 17 + [ 18 + '#!/usr/bin/env ant', 19 + '', 20 + 'console.log("hidden entry ok");', 21 + '', 22 + ].join('\n') 23 + ); 24 + fs.chmodSync(scriptPath, 0o755); 25 + 26 + const env = { ...process.env }; 27 + env.PATH = `${path.dirname(process.execPath)}${path.delimiter}${env.PATH || ''}`; 28 + 29 + const direct = spawnSync(scriptPath, [], { env }); 30 + if (direct.error) throw direct.error; 31 + 32 + assert( 33 + direct.status === 0, 34 + `extensionless shebang entrypoint in hidden dir should exit 0, got ${direct.status}\nstdout:\n${String(direct.stdout)}\nstderr:\n${String(direct.stderr)}` 35 + ); 36 + assert( 37 + String(direct.stdout) === 'hidden entry ok\n', 38 + `expected shebang stdout to be hidden entry ok, got ${JSON.stringify(String(direct.stdout))}` 39 + ); 40 + 41 + const viaAnt = spawnSync(process.execPath, [scriptPath]); 42 + if (viaAnt.error) throw viaAnt.error; 43 + 44 + assert( 45 + viaAnt.status === 0, 46 + `direct ant entrypoint in hidden dir should exit 0, got ${viaAnt.status}\nstdout:\n${String(viaAnt.stdout)}\nstderr:\n${String(viaAnt.stderr)}` 47 + ); 48 + assert( 49 + String(viaAnt.stdout) === 'hidden entry ok\n', 50 + `expected ant stdout to be hidden entry ok, got ${JSON.stringify(String(viaAnt.stdout))}` 51 + ); 52 + 53 + console.log('extensionless hidden entrypoint test passed');
+62
tests/test_regexp_exec_fast_paths.cjs
··· 1 + function assert(cond, msg) { 2 + if (!cond) throw new Error(msg); 3 + } 4 + 5 + const routeRe = 6 + /^\/api\/v(?<version>[0-9]+)\/users\/(?<user>[0-9]+)\/posts\/(?<post>[0-9]+)(?:\?(?<query>.*))?$/; 7 + const routeMatch = routeRe.exec('/api/v3/users/42/posts/9?limit=10'); 8 + 9 + assert(routeMatch !== null, 'expected route regexp to match'); 10 + assert(routeMatch[0] === '/api/v3/users/42/posts/9?limit=10', 'full match mismatch'); 11 + 12 + const groups1 = routeMatch.groups; 13 + const groups2 = routeMatch.groups; 14 + assert(groups1 === groups2, 'groups getter should cache the created object'); 15 + assert(groups1.version === '3', 'named group version mismatch'); 16 + assert(groups1.user === '42', 'named group user mismatch'); 17 + assert(groups1.post === '9', 'named group post mismatch'); 18 + assert(groups1.query === 'limit=10', 'named group query mismatch'); 19 + 20 + const wordRe = /\b[a-z]+\b/g; 21 + const words = 'alpha beta gamma'; 22 + let count = 0; 23 + let lastMatch = ''; 24 + 25 + while (wordRe.exec(words)) { 26 + count++; 27 + lastMatch = RegExp.lastMatch; 28 + } 29 + 30 + assert(count === 3, 'truthy exec loop should count all matches'); 31 + assert(lastMatch === 'gamma', 'RegExp.lastMatch should track the final successful exec'); 32 + assert(wordRe.lastIndex === 0, 'global exec loop should reset lastIndex after the final miss'); 33 + 34 + const order = []; 35 + let customCalls = 0; 36 + const customExec = { 37 + get exec() { 38 + order.push('get'); 39 + return function (value) { 40 + order.push('call:' + value); 41 + return customCalls++ === 0 ? { ok: true } : null; 42 + }; 43 + } 44 + }; 45 + 46 + function nextCustomArg() { 47 + order.push('arg'); 48 + return 'payload'; 49 + } 50 + 51 + let customCount = 0; 52 + while (customExec.exec(nextCustomArg())) { 53 + customCount++; 54 + } 55 + 56 + assert(customCount === 1, 'custom exec truthiness loop should still use fallback call semantics'); 57 + assert( 58 + order.join(',') === 'get,arg,call:payload,get,arg,call:payload', 59 + 'custom exec truthiness lowering should preserve getter/arg/call order' 60 + ); 61 + 62 + console.log('regex exec fast path semantics ok');