···3131DEF(char32_i, 5)
3232DEF(dot, 1)
3333DEF(any, 1) /* same as dot but match any character including line terminator */
3434+DEF(space, 1)
3535+DEF(not_space, 1) /* must come after */
3436DEF(line_start, 1)
3537DEF(line_start_m, 1)
3638DEF(line_end, 1)
+76-60
vendor/git/quickjs-c/libregexp.c
···34343535/*
3636 TODO:
3737-3737+ - remove REOP_char_i and REOP_range_i by precomputing the case folding.
3838+ - add specific opcodes for simple unicode property tests so that the
3939+ generated bytecode is smaller.
3840 - Add a lock step execution mode (=linear time execution guaranteed)
3941 when the regular expression is "simple" i.e. no backreference nor
4042 complicated lookahead. The opcodes are designed for this execution
···10781080 goto default_escape;
10791081 if (cr_init_char_range(s, cr, c))
10801082 return -1;
10811081- c = CLASS_RANGE_BASE;
10831083+ c += CLASS_RANGE_BASE;
10821084 break;
10831085 case 'c':
10841086 c = *p;
···15841586 case REOP_char32_i:
15851587 case REOP_dot:
15861588 case REOP_any:
15891589+ case REOP_space:
15901590+ case REOP_not_space:
15871591 need_check_adv = FALSE;
15881592 break;
15891593 case REOP_line_start:
···20282032 case 'b':
20292033 case 'B':
20302034 if (p[1] != 'b') {
20312031- re_emit_op(s, s->ignore_case ? REOP_not_word_boundary_i : REOP_not_word_boundary);
20352035+ re_emit_op(s, s->ignore_case && s->is_unicode ? REOP_not_word_boundary_i : REOP_not_word_boundary);
20322036 } else {
20332033- re_emit_op(s, s->ignore_case ? REOP_word_boundary_i : REOP_word_boundary);
20372037+ re_emit_op(s, s->ignore_case && s->is_unicode ? REOP_word_boundary_i : REOP_word_boundary);
20342038 }
20352039 p += 2;
20362040 break;
···21672171 if (is_backward_dir)
21682172 re_emit_op(s, REOP_prev);
21692173 if (c >= CLASS_RANGE_BASE) {
21702170- int ret;
21712171- ret = re_emit_string_list(s, cr);
21742174+ int ret = 0;
21752175+ /* optimize the common 'space' tests */
21762176+ if (c == (CLASS_RANGE_BASE + CHAR_RANGE_s)) {
21772177+ re_emit_op(s, REOP_space);
21782178+ } else if (c == (CLASS_RANGE_BASE + CHAR_RANGE_S)) {
21792179+ re_emit_op(s, REOP_not_space);
21802180+ } else {
21812181+ ret = re_emit_string_list(s, cr);
21822182+ }
21722183 re_string_list_free(cr);
21732184 if (ret)
21742185 return -1;
···26072618 return (c == '\n' || c == '\r' || c == CP_LS || c == CP_PS);
26082619}
2609262026102610-static BOOL is_word_char(uint32_t c)
26112611-{
26122612- return ((c >= '0' && c <= '9') ||
26132613- (c >= 'a' && c <= 'z') ||
26142614- (c >= 'A' && c <= 'Z') ||
26152615- (c == '_'));
26162616-}
26172617-26182621#define GET_CHAR(c, cptr, cbuf_end, cbuf_type) \
26192622 do { \
26202623 if (cbuf_type == 0) { \
···2769277227702773/* return 1 if match, 0 if not match or < 0 if error. */
27712774static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
27722772- uint8_t **regs, const uint8_t *pc, const uint8_t *cptr)
27752775+ const uint8_t *pc, const uint8_t *cptr)
27732776{
27742777 int opcode;
27752778 int cbuf_type;
···28092812 }
2810281328112814 /* avoid saving the previous value if already saved */
28122812-#define SAVE_REG(idx, value) \
28152815+#define SAVE_CAPTURE_CHECK(idx, value) \
28132816 { \
28142817 StackElem *sp1; \
28152818 sp1 = sp; \
28162819 for(;;) { \
28172820 if (sp1 > bp) { \
28182818- if (sp1[-2].val == -(int)(idx + 1)) \
28212821+ if (sp1[-2].val == idx) \
28192822 break; \
28202823 sp1 -= 2; \
28212824 } else { \
28222825 CHECK_STACK_SPACE(2); \
28232823- sp[0].val = -(int)(idx + 1); \
28242824- sp[1].ptr = regs[idx]; \
28262826+ sp[0].val = idx; \
28272827+ sp[1].ptr = capture[idx]; \
28252828 sp += 2; \
28262829 break; \
28272830 } \
28282831 } \
28292829- regs[idx] = (value); \
28322832+ capture[idx] = (value); \
28302833 }
2831283428322835···28512854 REExecStateEnum type;
28522855 if (bp == s->stack_buf)
28532856 return 0;
28542854- /* undo the modifications to capture[] and regs[] */
28572857+ /* undo the modifications to capture[] */
28552858 while (sp > bp) {
28562856- intptr_t idx2 = sp[-2].val;
28572857- if (idx2 >= 0)
28582858- capture[idx2] = sp[-1].ptr;
28592859- else
28602860- regs[-idx2 - 1] = sp[-1].ptr;
28592859+ capture[sp[-2].val] = sp[-1].ptr;
28612860 sp -= 2;
28622861 }
28632862···29102909 for(;;) {
29112910 REExecStateEnum type;
29122911 type = bp[-1].bp.type;
29132913- /* undo the modifications to capture[] and regs[] */
29122912+ /* undo the modifications to capture[] */
29142913 while (sp > bp) {
29152915- intptr_t idx2 = sp[-2].val;
29162916- if (idx2 >= 0)
29172917- capture[idx2] = sp[-1].ptr;
29182918- else
29192919- regs[-idx2 - 1] = sp[-1].ptr;
29142914+ capture[sp[-2].val] = sp[-1].ptr;
29202915 sp -= 2;
29212916 }
29222917 pc = sp[-3].ptr;
···30193014 goto no_match;
30203015 GET_CHAR(c, cptr, cbuf_end, cbuf_type);
30213016 break;
30173017+ case REOP_space:
30183018+ if (cptr == cbuf_end)
30193019+ goto no_match;
30203020+ GET_CHAR(c, cptr, cbuf_end, cbuf_type);
30213021+ if (!lre_is_space(c))
30223022+ goto no_match;
30233023+ break;
30243024+ case REOP_not_space:
30253025+ if (cptr == cbuf_end)
30263026+ goto no_match;
30273027+ GET_CHAR(c, cptr, cbuf_end, cbuf_type);
30283028+ if (lre_is_space(c))
30293029+ goto no_match;
30303030+ break;
30223031 case REOP_save_start:
30233032 case REOP_save_end:
30243033 val = *pc++;
···30443053 }
30453054 break;
30463055 case REOP_set_i32:
30473047- idx = pc[0];
30563056+ idx = 2 * s->capture_count + pc[0];
30483057 val = get_u32(pc + 1);
30493058 pc += 5;
30503050- SAVE_REG(idx, (void *)(uintptr_t)val);
30593059+ SAVE_CAPTURE_CHECK(idx, (void *)(uintptr_t)val);
30513060 break;
30523061 case REOP_loop:
30533062 {
30543063 uint32_t val2;
30553055- idx = pc[0];
30643064+ idx = 2 * s->capture_count + pc[0];
30563065 val = get_u32(pc + 1);
30573066 pc += 5;
3058306730593059- val2 = (uintptr_t)regs[idx] - 1;
30603060- SAVE_REG(idx, (void *)(uintptr_t)val2);
30683068+ val2 = (uintptr_t)capture[idx] - 1;
30693069+ SAVE_CAPTURE_CHECK(idx, (void *)(uintptr_t)val2);
30613070 if (val2 != 0) {
30623071 pc += (int)val;
30633072 if (lre_poll_timeout(s))
···30723081 {
30733082 const uint8_t *pc1;
30743083 uint32_t val2, limit;
30753075- idx = pc[0];
30843084+ idx = 2 * s->capture_count + pc[0];
30763085 limit = get_u32(pc + 1);
30773086 val = get_u32(pc + 5);
30783087 pc += 9;
3079308830803089 /* decrement the counter */
30813081- val2 = (uintptr_t)regs[idx] - 1;
30823082- SAVE_REG(idx, (void *)(uintptr_t)val2);
30903090+ val2 = (uintptr_t)capture[idx] - 1;
30913091+ SAVE_CAPTURE_CHECK(idx, (void *)(uintptr_t)val2);
3083309230843093 if (val2 > limit) {
30853094 /* normal loop if counter > limit */
···30903099 /* check advance */
30913100 if ((opcode == REOP_loop_check_adv_split_goto_first ||
30923101 opcode == REOP_loop_check_adv_split_next_first) &&
30933093- regs[idx + 1] == cptr &&
31023102+ capture[idx + 1] == cptr &&
30943103 val2 != limit) {
30953104 goto no_match;
30963105 }
···31163125 }
31173126 break;
31183127 case REOP_set_char_pos:
31193119- idx = pc[0];
31283128+ idx = 2 * s->capture_count + pc[0];
31203129 pc++;
31213121- SAVE_REG(idx, (uint8_t *)cptr);
31303130+ SAVE_CAPTURE_CHECK(idx, (uint8_t *)cptr);
31223131 break;
31233132 case REOP_check_advance:
31243124- idx = pc[0];
31333133+ idx = 2 * s->capture_count + pc[0];
31253134 pc++;
31263126- if (regs[idx] == cptr)
31353135+ if (capture[idx] == cptr)
31273136 goto no_match;
31283137 break;
31293138 case REOP_word_boundary:
···31393148 v1 = FALSE;
31403149 } else {
31413150 PEEK_PREV_CHAR(c, cptr, s->cbuf, cbuf_type);
31423142- if (ignore_case)
31433143- c = lre_canonicalize(c, s->is_unicode);
31443144- v1 = is_word_char(c);
31513151+ if (c < 256) {
31523152+ v1 = (lre_is_word_byte(c) != 0);
31533153+ } else {
31543154+ v1 = ignore_case && (c == 0x017f || c == 0x212a);
31553155+ }
31453156 }
31463157 /* current char */
31473158 if (cptr >= cbuf_end) {
31483159 v2 = FALSE;
31493160 } else {
31503161 PEEK_CHAR(c, cptr, cbuf_end, cbuf_type);
31513151- if (ignore_case)
31523152- c = lre_canonicalize(c, s->is_unicode);
31533153- v2 = is_word_char(c);
31623162+ if (c < 256) {
31633163+ v2 = (lre_is_word_byte(c) != 0);
31643164+ } else {
31653165+ v2 = ignore_case && (c == 0x017f || c == 0x212a);
31663166+ }
31543167 }
31553168 if (v1 ^ v2 ^ is_boundary)
31563169 goto no_match;
···33153328 int cbuf_type, void *opaque)
33163329{
33173330 REExecContext s_s, *s = &s_s;
33183318- int re_flags, i, ret, register_count;
33193319- uint8_t **regs;
33313331+ int re_flags, i, ret;
33203332 const uint8_t *cptr;
3321333333223334 re_flags = lre_get_flags(bc_buf);
···3335334733363348 for(i = 0; i < s->capture_count * 2; i++)
33373349 capture[i] = NULL;
33383338- /* XXX: modify the API so that the registers are allocated after
33393339- the captures to suppress some tests */
33403340- register_count = bc_buf[RE_HEADER_REGISTER_COUNT];
33413341- regs = alloca(register_count * sizeof(regs[0]));
3342335033433351 cptr = cbuf + (cindex << cbuf_type);
33443352 if (0 < cindex && cindex < clen && s->cbuf_type == 2) {
···33483356 }
33493357 }
3350335833513351- ret = lre_exec_backtrack(s, capture, regs, bc_buf + RE_HEADER_LEN,
33523352- cptr);
33593359+ ret = lre_exec_backtrack(s, capture, bc_buf + RE_HEADER_LEN, cptr);
33603360+33533361 if (s->stack_buf != s->static_stack_buf)
33543362 lre_realloc(s->opaque, s->stack_buf, 0);
33553363 return ret;
33563364}
3357336533663366+int lre_get_alloc_count(const uint8_t *bc_buf)
33673367+{
33683368+ return bc_buf[RE_HEADER_CAPTURE_COUNT] * 2 +
33693369+ bc_buf[RE_HEADER_REGISTER_COUNT];
33703370+}
33713371+33583372int lre_get_capture_count(const uint8_t *bc_buf)
33593373{
33603374 return bc_buf[RE_HEADER_CAPTURE_COUNT];
···33933407 int len, flags, ret, i;
33943408 uint8_t *bc;
33953409 char error_msg[64];
33963396- uint8_t *capture[CAPTURE_COUNT_MAX * 2];
34103410+ uint8_t *capture;
33973411 const char *input;
33983412 int input_len, capture_count;
33993413···34123426 input = argv[3];
34133427 input_len = strlen(input);
3414342834293429+ capture = malloc(sizeof(capture[0]) * lre_get_alloc_count(bc));
34153430 ret = lre_exec(capture, bc, (uint8_t *)input, 0, input_len, 0, NULL);
34163431 printf("ret=%d\n", ret);
34173432 if (ret == 1) {
···34273442 printf("\n");
34283443 }
34293444 }
34453445+ free(capture);
34303446 return 0;
34313447}
34323448#endif