My working unpac space for OCaml projects in development
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

regexp: cosmetic: make it clearer that there is now a set of registers instead of an auxiliary stack

+60 -59
+5 -5
vendor/git/quickjs-c/libregexp-opcode.h
··· 45 45 DEF(save_end, 2) /* save end position, must come after saved_start */ 46 46 DEF(save_reset, 3) /* reset save positions */ 47 47 DEF(loop, 6) /* decrement the top the stack and goto if != 0 */ 48 - DEF(loop_split_goto_first, 10) 48 + DEF(loop_split_goto_first, 10) /* loop and then split */ 49 49 DEF(loop_split_next_first, 10) 50 - DEF(loop_check_adv_split_goto_first, 10) 50 + DEF(loop_check_adv_split_goto_first, 10) /* loop and then check advance and split */ 51 51 DEF(loop_check_adv_split_next_first, 10) 52 - DEF(push_i32, 6) /* push integer on the stack */ 52 + DEF(set_i32, 6) /* store the immediate value to a register */ 53 53 DEF(word_boundary, 1) 54 54 DEF(word_boundary_i, 1) 55 55 DEF(not_word_boundary, 1) ··· 64 64 DEF(range32_i, 3) /* variable length */ 65 65 DEF(lookahead, 5) 66 66 DEF(negative_lookahead, 5) /* must come after */ 67 - DEF(push_char_pos, 2) /* push the character position on the stack */ 68 - DEF(check_advance, 2) /* pop one stack element and check that it is different from the character position */ 67 + DEF(set_char_pos, 2) /* store the character position to a register */ 68 + DEF(check_advance, 2) /* check that the register is different from the character position */ 69 69 DEF(prev, 1) /* go to the previous char */ 70 70 71 71 #endif /* DEF */
+55 -54
vendor/git/quickjs-c/libregexp.c
··· 55 55 } REOPCodeEnum; 56 56 57 57 #define CAPTURE_COUNT_MAX 255 58 - #define STACK_SIZE_MAX 255 58 + #define REGISTER_COUNT_MAX 255 59 59 /* must be large enough to have a negligible runtime cost and small 60 60 enough to call the interrupt callback often. */ 61 61 #define INTERRUPT_COUNTER_INIT 10000 ··· 105 105 #undef DEF 106 106 }; 107 107 108 - #define RE_HEADER_FLAGS 0 109 - #define RE_HEADER_CAPTURE_COUNT 2 110 - #define RE_HEADER_STACK_SIZE 3 111 - #define RE_HEADER_BYTECODE_LEN 4 108 + #define RE_HEADER_FLAGS 0 109 + #define RE_HEADER_CAPTURE_COUNT 2 110 + #define RE_HEADER_REGISTER_COUNT 3 111 + #define RE_HEADER_BYTECODE_LEN 4 112 112 113 113 #define RE_HEADER_LEN 8 114 114 ··· 468 468 re_flags = lre_get_flags(buf); 469 469 bc_len = get_u32(buf + RE_HEADER_BYTECODE_LEN); 470 470 assert(bc_len + RE_HEADER_LEN <= buf_len); 471 - printf("flags: 0x%x capture_count=%d aux_stack_size=%d\n", 472 - re_flags, buf[RE_HEADER_CAPTURE_COUNT], buf[RE_HEADER_STACK_SIZE]); 471 + printf("flags: 0x%x capture_count=%d reg_count=%d\n", 472 + re_flags, buf[RE_HEADER_CAPTURE_COUNT], buf[RE_HEADER_REGISTER_COUNT]); 473 473 if (re_flags & LRE_FLAG_NAMED_GROUPS) { 474 474 const char *p; 475 475 p = (char *)buf + RE_HEADER_LEN + bc_len; ··· 530 530 val2 = buf[pos + 1]; 531 531 val = get_u32(buf + pos + 2); 532 532 val += (pos + 6); 533 - printf(" %u, %u", val2, val); 533 + printf(" r%u, %u", val2, val); 534 534 break; 535 535 case REOP_loop_split_goto_first: 536 536 case REOP_loop_split_next_first: ··· 542 542 limit = get_u32(buf + pos + 2); 543 543 val = get_u32(buf + pos + 6); 544 544 val += (pos + 10); 545 - printf(" %u, %u, %u", val2, limit, val); 545 + printf(" r%u, %u, %u", val2, limit, val); 546 546 } 547 547 break; 548 548 case REOP_save_start: ··· 556 556 case REOP_save_reset: 557 557 printf(" %u %u", buf[pos + 1], buf[pos + 2]); 558 558 break; 559 - case REOP_push_i32: 559 + case REOP_set_i32: 560 560 val = buf[pos + 1]; 561 561 val2 = get_u32(buf + pos + 2); 562 - printf(" %u, %d", val, val2); 562 + printf(" r%u, %d", val, val2); 563 563 break; 564 - case REOP_push_char_pos: 564 + case REOP_set_char_pos: 565 565 case REOP_check_advance: 566 566 val = buf[pos + 1]; 567 - printf(" %u", val); 567 + printf(" r%u", val); 568 568 break; 569 569 case REOP_range: 570 570 case REOP_range_i: ··· 1570 1570 case REOP_line_start_m: 1571 1571 case REOP_line_end: 1572 1572 case REOP_line_end_m: 1573 - case REOP_push_i32: 1574 - case REOP_push_char_pos: 1573 + case REOP_set_i32: 1574 + case REOP_set_char_pos: 1575 1575 case REOP_word_boundary: 1576 1576 case REOP_word_boundary_i: 1577 1577 case REOP_not_word_boundary: ··· 2197 2197 put_u32(s->byte_code.buf + last_atom_start + 1, 2198 2198 len + 5 * has_goto + add_zero_advance_check * 2 * 2); 2199 2199 if (add_zero_advance_check) { 2200 - s->byte_code.buf[last_atom_start + 1 + 4] = REOP_push_char_pos; 2200 + s->byte_code.buf[last_atom_start + 1 + 4] = REOP_set_char_pos; 2201 2201 s->byte_code.buf[last_atom_start + 1 + 4 + 1] = 0; 2202 2202 re_emit_op_u8(s, REOP_check_advance, 0); 2203 2203 } ··· 2211 2211 put_u32(s->byte_code.buf + pos, 6 + add_zero_advance_check * 2 + len + 10); 2212 2212 pos += 4; 2213 2213 2214 - s->byte_code.buf[pos++] = REOP_push_i32; 2214 + s->byte_code.buf[pos++] = REOP_set_i32; 2215 2215 s->byte_code.buf[pos++] = 0; 2216 2216 put_u32(s->byte_code.buf + pos, quant_max); 2217 2217 pos += 4; 2218 2218 last_atom_start = pos; 2219 2219 if (add_zero_advance_check) { 2220 - s->byte_code.buf[pos++] = REOP_push_char_pos; 2220 + s->byte_code.buf[pos++] = REOP_set_char_pos; 2221 2221 s->byte_code.buf[pos++] = 0; 2222 2222 } 2223 2223 re_emit_goto_u8_u32(s, (add_zero_advance_check ? REOP_loop_check_adv_split_next_first : REOP_loop_split_next_first) - greedy, 0, quant_max, last_atom_start); ··· 2233 2233 goto out_of_memory; 2234 2234 /* Note: we assume the string length is < INT32_MAX */ 2235 2235 pos = last_atom_start; 2236 - s->byte_code.buf[pos++] = REOP_push_i32; 2236 + s->byte_code.buf[pos++] = REOP_set_i32; 2237 2237 s->byte_code.buf[pos++] = 0; 2238 2238 put_u32(s->byte_code.buf + pos, quant_max); 2239 2239 pos += 4; 2240 2240 last_atom_start = pos; 2241 2241 if (add_zero_advance_check) { 2242 - s->byte_code.buf[pos++] = REOP_push_char_pos; 2242 + s->byte_code.buf[pos++] = REOP_set_char_pos; 2243 2243 s->byte_code.buf[pos++] = 0; 2244 2244 } 2245 2245 if (quant_min == quant_max) { ··· 2330 2330 return 0; 2331 2331 } 2332 2332 2333 - /* the control flow is recursive so the analysis can be linear. As a 2334 - side effect, the auxiliary stack addresses are computed. */ 2335 - static int compute_stack_size(uint8_t *bc_buf, int bc_buf_len) 2333 + /* Allocate the registers as a stack. The control flow is recursive so 2334 + the analysis can be linear. */ 2335 + static int compute_register_count(uint8_t *bc_buf, int bc_buf_len) 2336 2336 { 2337 2337 int stack_size, stack_size_max, pos, opcode, len; 2338 2338 uint32_t val; ··· 2348 2348 assert(opcode < REOP_COUNT); 2349 2349 assert((pos + len) <= bc_buf_len); 2350 2350 switch(opcode) { 2351 - case REOP_push_i32: 2352 - case REOP_push_char_pos: 2351 + case REOP_set_i32: 2352 + case REOP_set_char_pos: 2353 2353 bc_buf[pos + 1] = stack_size; 2354 2354 stack_size++; 2355 2355 if (stack_size > stack_size_max) { 2356 - if (stack_size > STACK_SIZE_MAX) 2356 + if (stack_size > REGISTER_COUNT_MAX) 2357 2357 return -1; 2358 2358 stack_size_max = stack_size; 2359 2359 } ··· 2408 2408 void *opaque) 2409 2409 { 2410 2410 REParseState s_s, *s = &s_s; 2411 - int stack_size; 2411 + int register_count; 2412 2412 BOOL is_sticky; 2413 2413 2414 2414 memset(s, 0, sizeof(*s)); ··· 2469 2469 goto error; 2470 2470 } 2471 2471 2472 - stack_size = compute_stack_size(s->byte_code.buf, s->byte_code.size); 2473 - if (stack_size < 0) { 2472 + register_count = compute_register_count(s->byte_code.buf, s->byte_code.size); 2473 + if (register_count < 0) { 2474 2474 re_parse_error(s, "too many imbricated quantifiers"); 2475 2475 goto error; 2476 2476 } 2477 2477 2478 2478 s->byte_code.buf[RE_HEADER_CAPTURE_COUNT] = s->capture_count; 2479 - s->byte_code.buf[RE_HEADER_STACK_SIZE] = stack_size; 2479 + s->byte_code.buf[RE_HEADER_REGISTER_COUNT] = register_count; 2480 2480 put_u32(s->byte_code.buf + RE_HEADER_BYTECODE_LEN, 2481 2481 s->byte_code.size - RE_HEADER_LEN); 2482 2482 ··· 2620 2620 /* 0 = 8 bit chars, 1 = 16 bit chars, 2 = 16 bit chars, UTF-16 */ 2621 2621 int cbuf_type; 2622 2622 int capture_count; 2623 - int stack_size_max; 2624 2623 BOOL is_unicode; 2625 2624 int interrupt_counter; 2626 2625 void *opaque; /* used for stack overflow check */ ··· 2665 2664 2666 2665 /* return 1 if match, 0 if not match or < 0 if error. */ 2667 2666 static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture, 2668 - uint8_t **aux_stack, const uint8_t *pc, const uint8_t *cptr) 2667 + uint8_t **regs, const uint8_t *pc, const uint8_t *cptr) 2669 2668 { 2670 2669 int opcode; 2671 2670 int cbuf_type; ··· 2705 2704 } 2706 2705 2707 2706 /* avoid saving the previous value if already saved */ 2708 - #define SAVE_AUX_STACK(idx, value) \ 2707 + #define SAVE_REG(idx, value) \ 2709 2708 { \ 2710 2709 StackElem *sp1; \ 2711 2710 sp1 = sp; \ ··· 2717 2716 } else { \ 2718 2717 CHECK_STACK_SPACE(2); \ 2719 2718 sp[0].val = -(int)(idx + 1); \ 2720 - sp[1].ptr = aux_stack[idx]; \ 2719 + sp[1].ptr = regs[idx]; \ 2721 2720 sp += 2; \ 2722 2721 break; \ 2723 2722 } \ 2724 2723 } \ 2725 - aux_stack[idx] = (value); \ 2724 + regs[idx] = (value); \ 2726 2725 } 2727 2726 2728 2727 ··· 2747 2746 REExecStateEnum type; 2748 2747 if (bp == s->stack_buf) 2749 2748 return 0; 2750 - /* undo the modifications to capture[] and aux_stack[] */ 2749 + /* undo the modifications to capture[] and regs[] */ 2751 2750 while (sp > bp) { 2752 2751 intptr_t idx2 = sp[-2].val; 2753 2752 if (idx2 >= 0) 2754 2753 capture[idx2] = sp[-1].ptr; 2755 2754 else 2756 - aux_stack[-idx2 - 1] = sp[-1].ptr; 2755 + regs[-idx2 - 1] = sp[-1].ptr; 2757 2756 sp -= 2; 2758 2757 } 2759 2758 ··· 2804 2803 for(;;) { 2805 2804 REExecStateEnum type; 2806 2805 type = bp[-1].bp.type; 2807 - /* undo the modifications to capture[] and aux_stack[] */ 2806 + /* undo the modifications to capture[] and regs[] */ 2808 2807 while (sp > bp) { 2809 2808 intptr_t idx2 = sp[-2].val; 2810 2809 if (idx2 >= 0) 2811 2810 capture[idx2] = sp[-1].ptr; 2812 2811 else 2813 - aux_stack[-idx2 - 1] = sp[-1].ptr; 2812 + regs[-idx2 - 1] = sp[-1].ptr; 2814 2813 sp -= 2; 2815 2814 } 2816 2815 pc = sp[-3].ptr; ··· 2950 2949 } 2951 2950 } 2952 2951 break; 2953 - case REOP_push_i32: 2952 + case REOP_set_i32: 2954 2953 idx = pc[0]; 2955 2954 val = get_u32(pc + 1); 2956 2955 pc += 5; 2957 - SAVE_AUX_STACK(idx, (void *)(uintptr_t)val); 2956 + SAVE_REG(idx, (void *)(uintptr_t)val); 2958 2957 break; 2959 2958 case REOP_loop: 2960 2959 { ··· 2963 2962 val = get_u32(pc + 1); 2964 2963 pc += 5; 2965 2964 2966 - val2 = (uintptr_t)aux_stack[idx] - 1; 2967 - SAVE_AUX_STACK(idx, (void *)(uintptr_t)val2); 2965 + val2 = (uintptr_t)regs[idx] - 1; 2966 + SAVE_REG(idx, (void *)(uintptr_t)val2); 2968 2967 if (val2 != 0) { 2969 2968 pc += (int)val; 2970 2969 if (lre_poll_timeout(s)) ··· 2985 2984 pc += 9; 2986 2985 2987 2986 /* decrement the counter */ 2988 - val2 = (uintptr_t)aux_stack[idx] - 1; 2989 - SAVE_AUX_STACK(idx, (void *)(uintptr_t)val2); 2987 + val2 = (uintptr_t)regs[idx] - 1; 2988 + SAVE_REG(idx, (void *)(uintptr_t)val2); 2990 2989 2991 2990 if (val2 > limit) { 2992 2991 /* normal loop if counter > limit */ ··· 2997 2996 /* check advance */ 2998 2997 if ((opcode == REOP_loop_check_adv_split_goto_first || 2999 2998 opcode == REOP_loop_check_adv_split_next_first) && 3000 - aux_stack[idx + 1] == cptr && 2999 + regs[idx + 1] == cptr && 3001 3000 val2 != limit) { 3002 3001 goto no_match; 3003 3002 } ··· 3022 3021 } 3023 3022 } 3024 3023 break; 3025 - case REOP_push_char_pos: 3024 + case REOP_set_char_pos: 3026 3025 idx = pc[0]; 3027 3026 pc++; 3028 - SAVE_AUX_STACK(idx, (uint8_t *)cptr); 3027 + SAVE_REG(idx, (uint8_t *)cptr); 3029 3028 break; 3030 3029 case REOP_check_advance: 3031 3030 idx = pc[0]; 3032 3031 pc++; 3033 - if (aux_stack[idx] == cptr) 3032 + if (regs[idx] == cptr) 3034 3033 goto no_match; 3035 3034 break; 3036 3035 case REOP_word_boundary: ··· 3212 3211 int cbuf_type, void *opaque) 3213 3212 { 3214 3213 REExecContext s_s, *s = &s_s; 3215 - int re_flags, i, ret; 3216 - uint8_t **aux_stack; 3214 + int re_flags, i, ret, register_count; 3215 + uint8_t **regs; 3217 3216 const uint8_t *cptr; 3218 3217 3219 3218 re_flags = lre_get_flags(bc_buf); 3220 3219 s->is_unicode = (re_flags & (LRE_FLAG_UNICODE | LRE_FLAG_UNICODE_SETS)) != 0; 3221 3220 s->capture_count = bc_buf[RE_HEADER_CAPTURE_COUNT]; 3222 - s->stack_size_max = bc_buf[RE_HEADER_STACK_SIZE]; 3223 3221 s->cbuf = cbuf; 3224 3222 s->cbuf_end = cbuf + (clen << cbuf_type); 3225 3223 s->cbuf_type = cbuf_type; ··· 3233 3231 3234 3232 for(i = 0; i < s->capture_count * 2; i++) 3235 3233 capture[i] = NULL; 3236 - aux_stack = alloca(s->stack_size_max * sizeof(aux_stack[0])); 3234 + /* XXX: modify the API so that the registers are allocated after 3235 + the captures to suppress some tests */ 3236 + register_count = bc_buf[RE_HEADER_REGISTER_COUNT]; 3237 + regs = alloca(register_count * sizeof(regs[0])); 3237 3238 3238 3239 cptr = cbuf + (cindex << cbuf_type); 3239 3240 if (0 < cindex && cindex < clen && s->cbuf_type == 2) { ··· 3243 3244 } 3244 3245 } 3245 3246 3246 - ret = lre_exec_backtrack(s, capture, aux_stack, bc_buf + RE_HEADER_LEN, 3247 + ret = lre_exec_backtrack(s, capture, regs, bc_buf + RE_HEADER_LEN, 3247 3248 cptr); 3248 3249 if (s->stack_buf != s->static_stack_buf) 3249 3250 lre_realloc(s->opaque, s->stack_buf, 0);