MIRROR: javascript for ๐Ÿœ's, a tiny runtime with big ambitions
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

rewrite the entire tokenizer

+285 -172
+1 -1
meson.build
··· 96 96 build_date = run_command('date', '+%Y-%m-%d', check: true).stdout().strip() 97 97 98 98 version_conf = configuration_data() 99 - version_conf.set('ANT_VERSION', '0.3.2.21') 99 + version_conf.set('ANT_VERSION', '0.3.2.23') 100 100 version_conf.set('ANT_GIT_HASH', git_hash) 101 101 version_conf.set('ANT_BUILD_DATE', build_date) 102 102
+284 -171
src/ant.c
··· 652 652 return (c >= '0' && c <= '9') ? (uint8_t) (c - '0') : (c >= 'a' && c <= 'f') ? (uint8_t) (c - 'W') : (c >= 'A' && c <= 'F') ? (uint8_t) (c - '7') : 0; 653 653 } 654 654 655 - static bool is_space(int c) { 656 - return c == ' ' || c == '\r' || c == '\n' || c == '\t' || c == '\f' || c == '\v' || c == 0xA0; 657 - } 658 - 659 - static bool is_digit(int c) { 660 - return c >= '0' && c <= '9'; 661 - } 662 - 663 - static bool is_xdigit(int c) { 664 - return is_digit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'); 665 - } 666 - 667 - static bool is_alpha(int c) { 668 - return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); 669 - } 670 - 671 - static bool is_ident_begin(int c) { 672 - return c == '_' || c == '$' || is_alpha(c) || (c & 0x80); 673 - } 674 - 675 - static bool is_ident_continue(int c) { 676 - return c == '_' || c == '$' || is_alpha(c) || is_digit(c) || (c & 0x80); 677 - } 678 - 679 655 static bool is_unary(uint8_t tok) { 680 656 return unary_table[tok]; 681 657 } ··· 770 746 #define CHECKV(_v) do { if (is_err(_v)) { res = (_v); goto done; } } while (0) 771 747 #define EXPECT(_tok, _e) do { if (next(js) != _tok) { _e; return js_mkerr_typed(js, JS_ERR_SYNTAX, "parse error"); }; js->consumed = 1; } while (0) 772 748 749 + static bool is_digit(int c); 773 750 static bool is_proxy(struct js *js, jsval_t obj); 751 + 774 752 static bool streq(const char *buf, size_t len, const char *p, size_t n); 775 753 static bool is_this_loop_continue_target(int depth_at_entry); 776 754 static bool parse_func_params(struct js *js, uint8_t *flags, int *out_count); ··· 3404 3382 return p - code; 3405 3383 } 3406 3384 3407 - static bool streq(const char *buf, size_t len, const char *p, size_t n) { 3408 - return n == len && memcmp(buf, p, len) == 0; 3409 - } 3410 - 3411 - static bool is_strict_reserved(const char *buf, size_t len) { 3412 - switch (len) { 3413 - case 3: return streq(buf, len, "let", 3); 3414 - case 5: return streq(buf, len, "yield", 5); 3415 - case 6: return streq(buf, len, "static", 6) || streq(buf, len, "public", 6); 3416 - case 7: return streq(buf, len, "private", 7) || streq(buf, len, "package", 7); 3417 - case 9: return streq(buf, len, "interface", 9) || streq(buf, len, "protected", 9); 3418 - case 10: return streq(buf, len, "implements", 10); 3419 - default: return false; 3420 - } 3421 - } 3422 - 3423 - static bool is_strict_restricted(const char *buf, size_t len) { 3424 - return (len == 4 && streq(buf, len, "eval", 4)) || (len == 9 && streq(buf, len, "arguments", 9)); 3425 - } 3385 + #define K(s, t) if (len == sizeof(s)-1 && !memcmp(buf, s, sizeof(s)-1)) return t 3386 + #define M(s) (len == sizeof(s)-1 && !memcmp(buf, s, sizeof(s)-1)) 3426 3387 3427 3388 static uint8_t parsekeyword(const char *buf, size_t len) { 3428 3389 switch (buf[0]) { 3429 - case 'a': if (streq("async", 5, buf, len)) return TOK_ASYNC; if (streq("await", 5, buf, len)) return TOK_AWAIT; if (streq("as", 2, buf, len)) return TOK_AS; break; 3430 - case 'b': if (streq("break", 5, buf, len)) return TOK_BREAK; break; 3431 - case 'c': if (streq("class", 5, buf, len)) return TOK_CLASS; if (streq("case", 4, buf, len)) return TOK_CASE; if (streq("catch", 5, buf, len)) return TOK_CATCH; if (streq("const", 5, buf, len)) return TOK_CONST; if (streq("continue", 8, buf, len)) return TOK_CONTINUE; break; 3432 - case 'd': if (streq("do", 2, buf, len)) return TOK_DO; if (streq("default", 7, buf, len)) return TOK_DEFAULT; if (streq("delete", 6, buf, len)) return TOK_DELETE; if (streq("debugger", 8, buf, len)) return TOK_DEBUGGER; break; 3433 - case 'e': if (streq("else", 4, buf, len)) return TOK_ELSE; if (streq("export", 6, buf, len)) return TOK_EXPORT; break; 3434 - case 'f': if (streq("for", 3, buf, len)) return TOK_FOR; if (streq("from", 4, buf, len)) return TOK_FROM; if (streq("function", 8, buf, len)) return TOK_FUNC; if (streq("finally", 7, buf, len)) return TOK_FINALLY; if (streq("false", 5, buf, len)) return TOK_FALSE; break; 3435 - case 'g': break; 3436 - case 'i': if (streq("if", 2, buf, len)) return TOK_IF; if (streq("import", 6, buf, len)) return TOK_IMPORT; if (streq("in", 2, buf, len)) return TOK_IN; if (streq("instanceof", 10, buf, len)) return TOK_INSTANCEOF; break; 3437 - case 'l': if (streq("let", 3, buf, len)) return TOK_LET; break; 3438 - case 'n': if (streq("new", 3, buf, len)) return TOK_NEW; if (streq("null", 4, buf, len)) return TOK_NULL; break; 3439 - case 'o': if (streq("of", 2, buf, len)) return TOK_OF; break; 3440 - case 'r': if (streq("return", 6, buf, len)) return TOK_RETURN; break; 3441 - case 's': if (streq("switch", 6, buf, len)) return TOK_SWITCH; if (streq("static", 6, buf, len)) return TOK_STATIC; break; 3442 - case 't': if (streq("try", 3, buf, len)) return TOK_TRY; if (streq("this", 4, buf, len)) return TOK_THIS; if (streq("throw", 5, buf, len)) return TOK_THROW; if (streq("true", 4, buf, len)) return TOK_TRUE; if (streq("typeof", 6, buf, len)) return TOK_TYPEOF; break; 3443 - case 'u': if (streq("undefined", 9, buf, len)) return TOK_UNDEF; break; 3444 - case 'v': if (streq("var", 3, buf, len)) return TOK_VAR; if (streq("void", 4, buf, len)) return TOK_VOID; break; 3445 - case 'w': if (streq("while", 5, buf, len)) return TOK_WHILE; if (streq("with", 4, buf, len)) return TOK_WITH; break; 3446 - case 'y': if (streq("yield", 5, buf, len)) return TOK_YIELD; break; 3390 + case 'a': 3391 + K("as", TOK_AS); 3392 + K("async", TOK_ASYNC); 3393 + K("await", TOK_AWAIT); 3394 + break; 3395 + case 'b': 3396 + K("break", TOK_BREAK); 3397 + break; 3398 + case 'c': 3399 + K("case", TOK_CASE); 3400 + K("catch", TOK_CATCH); 3401 + K("class", TOK_CLASS); 3402 + K("const", TOK_CONST); 3403 + K("continue", TOK_CONTINUE); 3404 + break; 3405 + case 'd': 3406 + K("do", TOK_DO); 3407 + K("default", TOK_DEFAULT); 3408 + K("delete", TOK_DELETE); 3409 + K("debugger", TOK_DEBUGGER); 3410 + break; 3411 + case 'e': 3412 + K("else", TOK_ELSE); 3413 + K("export", TOK_EXPORT); 3414 + break; 3415 + case 'f': 3416 + K("for", TOK_FOR); 3417 + K("from", TOK_FROM); 3418 + K("false", TOK_FALSE); 3419 + K("finally", TOK_FINALLY); 3420 + K("function", TOK_FUNC); 3421 + break; 3422 + case 'g': 3423 + K("globalThis", TOK_GLOBAL_THIS); 3424 + break; 3425 + case 'i': 3426 + K("if", TOK_IF); 3427 + K("in", TOK_IN); 3428 + K("import", TOK_IMPORT); 3429 + K("instanceof", TOK_INSTANCEOF); 3430 + break; 3431 + case 'l': 3432 + K("let", TOK_LET); 3433 + break; 3434 + case 'n': 3435 + K("new", TOK_NEW); 3436 + K("null", TOK_NULL); 3437 + break; 3438 + case 'o': 3439 + K("of", TOK_OF); 3440 + break; 3441 + case 'r': 3442 + K("return", TOK_RETURN); 3443 + break; 3444 + case 's': 3445 + K("static", TOK_STATIC); 3446 + K("switch", TOK_SWITCH); 3447 + break; 3448 + case 't': 3449 + K("try", TOK_TRY); 3450 + K("this", TOK_THIS); 3451 + K("true", TOK_TRUE); 3452 + K("throw", TOK_THROW); 3453 + K("typeof", TOK_TYPEOF); 3454 + break; 3455 + case 'u': 3456 + K("undefined", TOK_UNDEF); 3457 + break; 3458 + case 'v': 3459 + K("var", TOK_VAR); 3460 + K("void", TOK_VOID); 3461 + break; 3462 + case 'w': 3463 + K("while", TOK_WHILE); 3464 + K("with", TOK_WITH); 3465 + K("window", TOK_WINDOW); 3466 + break; 3467 + case 'y': 3468 + K("yield", TOK_YIELD); 3469 + break; 3447 3470 } 3448 3471 return TOK_IDENTIFIER; 3449 3472 } 3450 3473 3451 - static int parse_unicode_escape(const char *buf, jsoff_t len, jsoff_t pos, uint32_t *codepoint) { 3452 - if (pos + 5 >= len) return 0; 3453 - if (buf[pos] != '\\' || buf[pos + 1] != 'u') return 0; 3454 - 3455 - uint32_t cp = 0; 3456 - for (int i = 0; i < 4; i++) { 3457 - char c = buf[pos + 2 + i]; 3458 - cp <<= 4; 3459 - if (c >= '0' && c <= '9') cp |= (c - '0'); 3460 - else if (c >= 'a' && c <= 'f') cp |= (c - 'a' + 10); 3461 - else if (c >= 'A' && c <= 'F') cp |= (c - 'A' + 10); 3462 - else return 0; 3474 + static bool is_strict_reserved(const char *buf, size_t len) { 3475 + switch (buf[0]) { 3476 + case 'i': 3477 + if M("interface") return true; 3478 + if M("implements") return true; 3479 + break; 3480 + case 'l': 3481 + if M("let") return true; 3482 + break; 3483 + case 'p': 3484 + if M("private") return true; 3485 + if M("package") return true; 3486 + if M("public") return true; 3487 + if M("protected") return true; 3488 + break; 3489 + case 's': 3490 + if M("static") return true; 3491 + break; 3492 + case 'y': 3493 + if M("yield") return true; 3494 + break; 3463 3495 } 3464 - *codepoint = cp; 3465 - return 6; 3496 + return false; 3466 3497 } 3467 3498 3468 - static bool is_unicode_ident_begin(uint32_t cp) { 3469 - return cp == '_' || cp == '$' || 3470 - (cp >= 'a' && cp <= 'z') || (cp >= 'A' && cp <= 'Z') || 3471 - cp >= 0x80; 3499 + #undef K 3500 + #undef M 3501 + 3502 + static inline bool streq(const char *buf, size_t len, const char *s, size_t n) { 3503 + return len == n && !memcmp(buf, s, n); 3472 3504 } 3473 3505 3474 - static bool is_unicode_ident_continue(uint32_t cp) { 3475 - return is_unicode_ident_begin(cp) || (cp >= '0' && cp <= '9'); 3506 + static bool is_strict_restricted(const char *buf, size_t len) { 3507 + return streq(buf, len, "eval", 4) || streq(buf, len, "arguments", 9); 3476 3508 } 3477 3509 3478 3510 static int encode_utf8(uint32_t cp, char *out) { ··· 3497 3529 } 3498 3530 } 3499 3531 3500 - static size_t decode_ident_escapes(const char *src, size_t srclen, char *dst, size_t dstlen) { 3501 - size_t si = 0, di = 0; 3502 - while (si < srclen && di < dstlen - 1) { 3503 - uint32_t cp; 3504 - int el = parse_unicode_escape(src, srclen, si, &cp); 3505 - if (el > 0) { 3506 - int utf8len = encode_utf8(cp, dst + di); 3507 - di += utf8len; 3508 - si += el; 3509 - } else { 3510 - dst[di++] = src[si++]; 3511 - } 3512 - } 3513 - dst[di] = '\0'; 3514 - return di; 3515 - } 3516 - 3517 - static bool has_unicode_escape(const char *src, size_t len) { 3518 - for (size_t i = 0; i + 5 < len; i++) { 3519 - if (src[i] == '\\' && src[i + 1] == 'u') return true; 3520 - } 3521 - return false; 3522 - } 3523 - 3524 - static jsval_t js_mkstr_ident(struct js *js, const char *src, size_t srclen) { 3525 - if (!has_unicode_escape(src, srclen)) { 3526 - return js_mkstr(js, src, srclen); 3527 - } 3528 - char decoded[256]; 3529 - size_t decoded_len = decode_ident_escapes(src, srclen, decoded, sizeof(decoded)); 3530 - return js_mkstr(js, decoded, decoded_len); 3531 - } 3532 - 3533 - static uint8_t parseident(const char *buf, jsoff_t len, jsoff_t *tlen) { 3534 - if (len == 0) return TOK_ERR; 3535 - 3536 - if (!(buf[0] & 0x80) && buf[0] != '\\' && is_ident_begin(buf[0])) { 3537 - *tlen = 1; 3538 - while (*tlen < len && !(buf[*tlen] & 0x80) && buf[*tlen] != '\\' && is_ident_continue(buf[*tlen])) { 3539 - (*tlen)++; 3540 - } 3541 - if (*tlen >= len || (!is_ident_continue(buf[*tlen]) && buf[*tlen] != '\\' && !(buf[*tlen] & 0x80))) { 3542 - return parsekeyword(buf, *tlen); 3543 - } 3544 - } 3545 - 3546 - uint32_t first_cp; 3547 - int esc_len = parse_unicode_escape(buf, len, 0, &first_cp); 3548 - 3549 - if (esc_len > 0) { 3550 - if (!is_unicode_ident_begin(first_cp)) return TOK_ERR; 3551 - *tlen = esc_len; 3552 - } else if (is_ident_begin(buf[0])) { 3553 - if (buf[0] & 0x80) { 3554 - int ws_len = is_unicode_space((const unsigned char *)buf, len, NULL); 3555 - if (ws_len > 0) return TOK_ERR; 3556 - } 3557 - *tlen = 1; 3558 - while (*tlen < len && (buf[*tlen] & 0xC0) == 0x80) (*tlen)++; 3559 - } else { 3560 - return TOK_ERR; 3561 - } 3562 - 3563 - while (*tlen < len) { 3564 - uint32_t cp; 3565 - int el = parse_unicode_escape(buf, len, *tlen, &cp); 3566 - if (el > 0) { 3567 - if (!is_unicode_ident_continue(cp)) break; 3568 - *tlen += el; 3569 - } else if (is_ident_continue(buf[*tlen])) { 3570 - if (buf[*tlen] & 0x80) { 3571 - int ws_len = is_unicode_space((const unsigned char *)&buf[*tlen], len - *tlen, NULL); 3572 - if (ws_len > 0) break; 3573 - } 3574 - (*tlen)++; 3575 - while (*tlen < len && (buf[*tlen] & 0xC0) == 0x80) (*tlen)++; 3576 - } else { 3577 - break; 3578 - } 3579 - } 3580 - 3581 - char decoded[256]; 3582 - size_t decoded_len = decode_ident_escapes(buf, *tlen, decoded, sizeof(decoded)); 3583 - return parsekeyword(decoded, decoded_len); 3584 - } 3585 - 3586 3532 #define CHAR_DIGIT 0x01 3587 3533 #define CHAR_XDIGIT 0x02 3588 3534 #define CHAR_ALPHA 0x04 ··· 3661 3607 ['~'] = TOK_TILDA, 3662 3608 ['#'] = TOK_HASH, 3663 3609 }; 3610 + 3611 + static bool is_space(int c) { 3612 + if (c < 0 || c >= 256) return false; 3613 + return (char_type[(uint8_t)c] & CHAR_WS) != 0; 3614 + } 3615 + 3616 + static bool is_digit(int c) { 3617 + if (c < 0 || c >= 256) return false; 3618 + return (char_type[(uint8_t)c] & CHAR_DIGIT) != 0; 3619 + } 3620 + 3621 + static bool is_xdigit(int c) { 3622 + if (c < 0 || c >= 256) return false; 3623 + return (char_type[(uint8_t)c] & CHAR_XDIGIT) != 0; 3624 + } 3625 + 3626 + static bool is_alpha(int c) { 3627 + if (c < 0 || c >= 256) return false; 3628 + return (char_type[(uint8_t)c] & CHAR_ALPHA) != 0; 3629 + } 3630 + 3631 + static bool is_ident_begin(int c) { 3632 + if (c < 0) return false; 3633 + if (c < 128) return (char_type[(uint8_t)c] & CHAR_IDENT1) != 0; 3634 + return (c & 0x80) != 0; 3635 + } 3636 + 3637 + static bool is_ident_continue(int c) { 3638 + if (c < 0) return false; 3639 + if (c < 128) return (char_type[(uint8_t)c] & (CHAR_IDENT | CHAR_IDENT1)) != 0; 3640 + return (c & 0x80) != 0; 3641 + } 3642 + 3643 + static int parse_unicode_escape(const char *buf, jsoff_t len, jsoff_t pos, uint32_t *codepoint) { 3644 + if (pos + 5 >= len) return 0; 3645 + if (buf[pos] != '\\' || buf[pos + 1] != 'u') return 0; 3646 + 3647 + uint32_t cp = 0; 3648 + for (int i = 0; i < 4; i++) { 3649 + int c = (unsigned char)buf[pos + 2 + i]; 3650 + if (!is_xdigit(c)) return 0; 3651 + cp <<= 4; 3652 + cp |= (c <= '9') ? (c - '0') : ((c | 0x20) - 'a' + 10); 3653 + } 3654 + *codepoint = cp; 3655 + return 6; 3656 + } 3657 + 3658 + static bool is_unicode_ident_begin(uint32_t cp) { 3659 + if (cp < 128) return (char_type[(uint8_t)cp] & CHAR_IDENT1) != 0; 3660 + return true; 3661 + } 3662 + 3663 + static bool is_unicode_ident_continue(uint32_t cp) { 3664 + if (cp < 128) return (char_type[(uint8_t)cp] & (CHAR_IDENT | CHAR_IDENT1)) != 0; 3665 + return true; 3666 + } 3667 + 3668 + static size_t decode_ident_escapes(const char *src, size_t srclen, char *dst, size_t dstlen) { 3669 + size_t si = 0, di = 0; 3670 + while (si < srclen && di + 4 < dstlen) { 3671 + uint32_t cp; 3672 + int el = parse_unicode_escape(src, srclen, si, &cp); 3673 + if (el > 0) { 3674 + di += encode_utf8(cp, dst + di); 3675 + si += el; 3676 + } else dst[di++] = src[si++]; 3677 + } 3678 + dst[di] = '\0'; 3679 + return di; 3680 + } 3681 + 3682 + static bool has_unicode_escape(const char *src, size_t len) { 3683 + if (len < 6) return false; 3684 + const char *end = src + len - 5; 3685 + const char *p = src; 3686 + while ((p = memchr(p, '\\', end - p)) != NULL) { 3687 + if (p[1] == 'u') return true; 3688 + p++; 3689 + } 3690 + return false; 3691 + } 3692 + 3693 + static jsval_t js_mkstr_ident(struct js *js, const char *src, size_t srclen) { 3694 + if (!has_unicode_escape(src, srclen)) { 3695 + return js_mkstr(js, src, srclen); 3696 + } 3697 + char decoded[256]; 3698 + size_t decoded_len = decode_ident_escapes(src, srclen, decoded, sizeof(decoded)); 3699 + return js_mkstr(js, decoded, decoded_len); 3700 + } 3701 + 3702 + 3703 + static uint8_t parseident(const char *buf, jsoff_t len, jsoff_t *tlen) { 3704 + if (len == 0) return TOK_ERR; 3705 + 3706 + unsigned char c = (unsigned char)buf[0]; 3707 + jsoff_t i = 0; 3708 + 3709 + if (c < 128 && c != '\\' && is_ident_begin(c)) { 3710 + i = 1; 3711 + while (i < len) { 3712 + c = (unsigned char)buf[i]; 3713 + if (c >= 128 || c == '\\') goto slow_path_continue; 3714 + if (!is_ident_continue(c)) break; 3715 + i++; 3716 + } 3717 + *tlen = i; 3718 + return parsekeyword(buf, i); 3719 + } 3720 + 3721 + if (c == '\\') { 3722 + uint32_t first_cp; 3723 + int esc_len = parse_unicode_escape(buf, len, 0, &first_cp); 3724 + if (esc_len <= 0 || !is_unicode_ident_begin(first_cp)) return TOK_ERR; 3725 + *tlen = esc_len; 3726 + goto slow_path_loop; 3727 + } 3728 + 3729 + if (c >= 128) { 3730 + if ((c & 0xC0) == 0x80) return TOK_ERR; 3731 + int ws_len = is_unicode_space((const unsigned char *)buf, len, NULL); 3732 + if (ws_len > 0) return TOK_ERR; 3733 + i = 1; 3734 + while (i < len && ((unsigned char)buf[i] & 0xC0) == 0x80) i++; 3735 + *tlen = i; 3736 + goto slow_path_loop; 3737 + } 3738 + 3739 + return TOK_ERR; 3740 + 3741 + slow_path_continue: 3742 + *tlen = i; 3743 + 3744 + slow_path_loop:; 3745 + int has_escapes = (buf[0] == '\\'); 3746 + 3747 + while (*tlen < len) { 3748 + c = (unsigned char)buf[*tlen]; 3749 + 3750 + if (c == '\\') { 3751 + uint32_t cp; 3752 + int el = parse_unicode_escape(buf, len, *tlen, &cp); 3753 + if (el <= 0 || !is_unicode_ident_continue(cp)) break; 3754 + *tlen += el; 3755 + has_escapes = 1; 3756 + } else if (c < 128) { 3757 + if (!is_ident_continue(c)) break; 3758 + (*tlen)++; 3759 + } else { 3760 + if ((c & 0xC0) == 0x80) break; 3761 + int ws_len = is_unicode_space((const unsigned char *)&buf[*tlen], len - *tlen, NULL); 3762 + if (ws_len > 0) break; 3763 + (*tlen)++; 3764 + while (*tlen < len && ((unsigned char)buf[*tlen] & 0xC0) == 0x80) (*tlen)++; 3765 + } 3766 + } 3767 + 3768 + if (has_escapes) { 3769 + char decoded[256]; 3770 + size_t decoded_len = decode_ident_escapes(buf, *tlen, decoded, sizeof(decoded)); 3771 + return parsekeyword(decoded, decoded_len); 3772 + } 3773 + 3774 + return parsekeyword(buf, *tlen); 3775 + } 3664 3776 3665 3777 static inline jsoff_t parse_decimal(const char *buf, jsoff_t maxlen, double *out) { 3666 3778 uint64_t int_part = 0, frac_part = 0; ··· 4063 4175 return js->tok; 4064 4176 } 4065 4177 4066 - js->tok = TOK_ERR; 4067 - js->tlen = 1; 4068 - js->pos = js->toff + 1; 4069 - return TOK_ERR; 4178 + js->tok = parseident(buf, rem, &js->tlen); 4179 + if (js->tlen == 0) js->tlen = 1; 4180 + js->pos = js->toff + js->tlen; 4181 + 4182 + return js->tok; 4070 4183 } 4071 4184 4072 4185 static inline uint8_t lookahead(struct js *js) {