MIRROR: javascript for ๐Ÿœ's, a tiny runtime with big ambitions
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

improve regex and other highlighting

+327 -35
+6 -1
include/highlight.h
··· 31 31 HL_TYPE_BOOLEAN, 32 32 HL_LITERAL_NULL, 33 33 HL_STRING, 34 + HL_REGEX, 35 + HL_REGEX_ESCAPE, 36 + HL_REGEX_DELIMITER, 37 + HL_REGEX_CDATA, 34 38 HL_BOOLEAN, 35 39 HL_NUMBER, 36 40 HL_COMMENT, ··· 38 42 HL_CLASS_NAME, 39 43 HL_PARENT_CLASS, 40 44 HL_FUNCTION, 45 + HL_ARGUMENT, 41 46 HL_PROPERTY, 42 47 HL_OPERATOR, 43 - HL_OPERATOR_CMP, 44 48 HL_OPTIONAL_CHAIN, 49 + HL_BRACKET, 45 50 HL_SEMICOLON, 46 51 } hl_token_class; 47 52
+321 -34
src/highlight.c
··· 10 10 typedef struct { const char *op; int len; hl_token_class cls; } op_entry_t; 11 11 12 12 static const op_entry_t operators[] = { 13 - { "===", 3, HL_OPERATOR_CMP }, 14 - { "!==", 3, HL_OPERATOR_CMP }, 13 + { "===", 3, HL_OPERATOR }, 14 + { "!==", 3, HL_OPERATOR }, 15 15 { "...", 3, HL_OPERATOR }, 16 16 { "=>", 2, HL_OPERATOR }, 17 - { "==", 2, HL_OPERATOR_CMP }, 18 - { "!=", 2, HL_OPERATOR_CMP }, 19 - { "<=", 2, HL_OPERATOR_CMP }, 20 - { ">=", 2, HL_OPERATOR_CMP }, 17 + { "==", 2, HL_OPERATOR }, 18 + { "!=", 2, HL_OPERATOR }, 19 + { "<=", 2, HL_OPERATOR }, 20 + { ">=", 2, HL_OPERATOR }, 21 21 { "&&", 2, HL_OPERATOR }, 22 22 { "||", 2, HL_OPERATOR }, 23 23 { "??", 2, HL_OPERATOR }, ··· 107 107 return HL_CTX_NONE; 108 108 } 109 109 110 + static bool regex_allowed_after_word(const char *word, size_t len) { 111 + #define W(s) (len == sizeof(s) - 1 && memcmp(word, s, sizeof(s) - 1) == 0) 112 + return 113 + W("return") || W("throw") || W("case") || W("delete") || 114 + W("void") || W("new") || W("typeof") || W("instanceof") || 115 + W("in") || W("of") || W("yield") || W("await"); 116 + #undef W 117 + } 118 + 119 + static bool can_start_regex_literal(const char *input, size_t start) { 120 + if (start == 0) return true; 121 + 122 + size_t i = start; 123 + while (i > 0) { 124 + unsigned char prev = (unsigned char)input[i - 1]; 125 + if (prev == ' ' || prev == '\t') { 126 + i--; 127 + continue; 128 + } 129 + if (prev == '\n' || prev == '\r') return true; 130 + 131 + if (is_ident_continue(prev)) { 132 + size_t end = i; 133 + while (i > 0 && is_ident_continue((unsigned char)input[i - 1])) i--; 134 + return regex_allowed_after_word(input + i, end - i); 135 + } 136 + 137 + if ( 138 + IS_DIGIT(prev) || prev == ')' || prev == ']' || prev == '}' || 139 + prev == '\'' || prev == '"' || prev == '`' || prev == '.' 140 + ) return false; 141 + 142 + switch (prev) { 143 + case '(': 144 + case '[': 145 + case '{': 146 + case ',': 147 + case ';': 148 + case ':': 149 + case '=': 150 + case '!': 151 + case '?': 152 + case '+': 153 + case '-': 154 + case '*': 155 + case '%': 156 + case '&': 157 + case '|': 158 + case '^': 159 + case '~': 160 + case '<': 161 + case '>': return true; 162 + default: return false; 163 + } 164 + } 165 + 166 + return true; 167 + } 168 + 169 + static bool try_parse_regex_literal(const char *input, size_t input_len, size_t start, size_t *out_end) { 170 + if (input[start] != '/' || start + 1 >= input_len) return false; 171 + if (input[start + 1] == '/' || input[start + 1] == '*') return false; 172 + if (!can_start_regex_literal(input, start)) return false; 173 + 174 + size_t i = start + 1; 175 + bool in_class = false; 176 + 177 + while (i < input_len) { 178 + unsigned char ch = (unsigned char)input[i]; 179 + 180 + if (ch == '\\') { 181 + i += (i + 1 < input_len) ? 2 : 1; 182 + continue; 183 + } 184 + if (ch == '\n' || ch == '\r') return false; 185 + if (!in_class && ch == '[') { 186 + in_class = true; i++; 187 + continue; 188 + } 189 + if (in_class && ch == ']') { 190 + in_class = false; i++; 191 + continue; 192 + } 193 + if (!in_class && ch == '/') { 194 + i++; 195 + while (i < input_len) { 196 + unsigned char f = (unsigned char)input[i]; 197 + if ((f >= 'a' && f <= 'z') || (f >= 'A' && f <= 'Z')) { 198 + i++; 199 + continue; 200 + } break; 201 + } 202 + *out_end = i; 203 + return true; 204 + } i++; 205 + } 206 + 207 + return false; 208 + } 209 + 210 + static size_t skip_inline_ws_forward(const char *input, size_t input_len, size_t i) { 211 + while (i < input_len && (input[i] == ' ' || input[i] == '\t')) i++; 212 + return i; 213 + } 214 + 215 + static size_t skip_inline_ws_backward(const char *input, size_t i) { 216 + while (i > 0 && (input[i - 1] == ' ' || input[i - 1] == '\t')) i--; 217 + return i; 218 + } 219 + 220 + static bool read_prev_word(const char *input, size_t end, size_t *word_start, size_t *word_len) { 221 + size_t i = skip_inline_ws_backward(input, end); 222 + if (i == 0 || !is_ident_continue((unsigned char)input[i - 1])) return false; 223 + 224 + size_t wend = i; 225 + while (i > 0 && is_ident_continue((unsigned char)input[i - 1])) i--; 226 + 227 + *word_start = i; 228 + *word_len = wend - i; 229 + return true; 230 + } 231 + 232 + static bool has_function_keyword_before_paren(const char *input, size_t open_paren) { 233 + size_t word_start = 0; 234 + size_t word_len = 0; 235 + 236 + if (!read_prev_word(input, open_paren, &word_start, &word_len)) return false; 237 + if (word_len == 8 && memcmp(input + word_start, "function", 8) == 0) return true; 238 + 239 + if (!read_prev_word(input, word_start, &word_start, &word_len)) return false; 240 + return (word_len == 8 && memcmp(input + word_start, "function", 8) == 0); 241 + } 242 + 243 + static bool find_enclosing_open_paren(const char *input, size_t pos, size_t *open_paren) { 244 + size_t depth = 0; 245 + size_t i = pos; 246 + 247 + while (i > 0) { 248 + i--; 249 + unsigned char ch = (unsigned char)input[i]; 250 + if (ch == ')') { 251 + depth++; 252 + continue; 253 + } 254 + if (ch == '(') { 255 + if (depth == 0) { 256 + *open_paren = i; 257 + return true; 258 + } 259 + depth--; 260 + } 261 + } 262 + return false; 263 + } 264 + 265 + static bool find_matching_close_paren(const char *input, size_t input_len, size_t open_paren, size_t *close_paren) { 266 + size_t depth = 0; 267 + for (size_t i = open_paren + 1; i < input_len; i++) { 268 + unsigned char ch = (unsigned char)input[i]; 269 + if (ch == '(') { 270 + depth++; 271 + continue; 272 + } 273 + if (ch == ')') { 274 + if (depth == 0) { 275 + *close_paren = i; 276 + return true; 277 + } 278 + depth--; 279 + } 280 + } 281 + return false; 282 + } 283 + 284 + static bool is_arrow_after(const char *input, size_t input_len, size_t pos) { 285 + size_t i = skip_inline_ws_forward(input, input_len, pos); 286 + return (i + 1 < input_len && input[i] == '=' && input[i + 1] == '>'); 287 + } 288 + 289 + static bool is_function_argument_identifier(const char *input, size_t input_len, size_t start, size_t end) { 290 + if (is_arrow_after(input, input_len, end)) { 291 + size_t left = skip_inline_ws_backward(input, start); 292 + if (left > 0 && input[left - 1] == '.') return false; 293 + return true; 294 + } 295 + 296 + size_t prev = skip_inline_ws_backward(input, start); 297 + if (prev == 0) return false; 298 + unsigned char prev_ch = (unsigned char)input[prev - 1]; 299 + if (!(prev_ch == '(' || prev_ch == ',')) return false; 300 + 301 + size_t open_paren = 0; 302 + if (!find_enclosing_open_paren(input, start, &open_paren)) return false; 303 + 304 + size_t close_paren = 0; 305 + if (find_matching_close_paren(input, input_len, open_paren, &close_paren) && 306 + is_arrow_after(input, input_len, close_paren + 1)) 307 + return true; 308 + 309 + return has_function_keyword_before_paren(input, open_paren); 310 + } 311 + 110 312 bool hl_iter_next(hl_iter *it, hl_span *out) { 111 313 const char *input = it->input; 112 314 size_t input_len = it->input_len; ··· 174 376 if (it->state.template_depth <= 0) { 175 377 it->state.mode = HL_STATE_TEMPLATE; 176 378 it->state.template_depth = 0; 177 - *out = (hl_span){ i, 1, HL_NONE }; 379 + *out = (hl_span){ i, 1, HL_BRACKET }; 178 380 it->pos = i + 1; 179 381 return true; 180 382 } 181 383 } 182 384 if (it->state.mode == HL_STATE_TEMPLATE_EXPR && c == '{') { 183 385 it->state.template_depth++; 184 - *out = (hl_span){ i, 1, HL_NONE }; 386 + *out = (hl_span){ i, 1, HL_BRACKET }; 185 387 it->pos = i + 1; 186 388 return true; 187 389 } ··· 209 411 return true; 210 412 } 211 413 414 + if (c == '/') { 415 + size_t regex_end = 0; 416 + if (try_parse_regex_literal(input, input_len, i, &regex_end)) { 417 + it->ctx = HL_CTX_NONE; 418 + *out = (hl_span){ i, regex_end - i, HL_REGEX }; 419 + it->pos = regex_end; 420 + return true; 421 + } 422 + } 423 + 212 424 if (c == '\'' || c == '"') { 213 425 it->ctx = HL_CTX_NONE; 214 426 size_t start = i; ··· 297 509 size_t word_len = i - start; 298 510 const char *word = input + start; 299 511 512 + bool is_member_access = (start > 0 && input[start - 1] == '.' && 513 + (start < 2 || input[start - 2] != '.')); 300 514 bool is_method = false; 301 - if (start > 0 && input[start - 1] == '.') { 515 + if (is_member_access) { 302 516 size_t peek = i; 303 517 while (peek < input_len && input[peek] == ' ') peek++; 304 518 if (peek < input_len && input[peek] == '(') is_method = true; 305 519 } 520 + size_t after_word = i; 521 + while (after_word < input_len && input[after_word] == ' ') after_word++; 522 + bool is_call = (after_word < input_len && input[after_word] == '('); 306 523 307 524 hl_token_class cls = HL_NONE; 525 + bool is_console = (word_len == 7 && memcmp(word, "console", 7) == 0); 308 526 309 - if (is_method) { 527 + if (is_console) { 528 + cls = HL_PROPERTY; 529 + } else if (is_function_argument_identifier(input, input_len, start, i)) { 530 + cls = HL_ARGUMENT; 531 + } else if (is_method) { 310 532 cls = HL_FUNCTION; 311 - } else if (start > 0 && input[start - 1] == '.') { 533 + } else if (is_member_access) { 312 534 cls = HL_PROPERTY; 313 535 } else if (it->ctx == HL_CTX_AFTER_FUNCTION) { 314 536 cls = HL_FUNCTION_NAME; ··· 346 568 cls = HL_TYPE; 347 569 } 348 570 571 + if (cls == HL_NONE && is_call) { 572 + cls = HL_FUNCTION; 573 + } 574 + 349 575 hl_context next_ctx = keyword_sets_context(word, word_len); 350 576 if (next_ctx != HL_CTX_NONE) it->ctx = next_ctx; 351 577 } ··· 357 583 358 584 if (c == '<' || c == '>' || c == '=') { 359 585 it->ctx = HL_CTX_NONE; 360 - *out = (hl_span){ i, 1, HL_OPERATOR_CMP }; 586 + *out = (hl_span){ i, 1, HL_OPERATOR }; 587 + it->pos = i + 1; 588 + return true; 589 + } 590 + 591 + if (c == '(' || c == ')' || c == '[' || c == ']' || c == '{' || c == '}') { 592 + it->ctx = HL_CTX_NONE; 593 + *out = (hl_span){ i, 1, HL_BRACKET }; 361 594 it->pos = i + 1; 362 595 return true; 363 596 } ··· 416 649 case HL_STRING: return "green"; 417 650 case HL_NUMBER: return "yellow"; 418 651 case HL_BOOLEAN: return "magenta"; 652 + 653 + case HL_REGEX: return "#FFB265"; 654 + case HL_REGEX_ESCAPE: return "#FFCC99"; 655 + case HL_REGEX_DELIMITER: return "#FF9932"; 656 + case HL_REGEX_CDATA: return "#65B2FF"; 419 657 420 658 case HL_KEYWORD: return "#65B2FF"; 421 659 case HL_KEYWORD_DELETE: return "#F43D3D"; ··· 426 664 case HL_COMMENT: return "#758CA3"; 427 665 case HL_FUNCTION_NAME: return "#30E8AA"; 428 666 case HL_FUNCTION: return "#30E8AA"; 667 + case HL_ARGUMENT: return "#CCA3F4"; 429 668 case HL_PROPERTY: return "#CCA3F4"; 430 - case HL_OPERATOR: return "#CCA3F4"; 431 - case HL_OPERATOR_CMP: return "#8CB2D8"; 669 + case HL_OPERATOR: return "#8CB2D8"; 432 670 case HL_OPTIONAL_CHAIN: return "#8CB2D8"; 671 + case HL_BRACKET: return "#8CB2D8"; 433 672 case HL_SEMICOLON: return "#B2CCE5"; 434 673 435 674 case HL_KEYWORD_ITALIC: return "italic+#65B2FF"; ··· 440 679 default: return NULL; 441 680 }} 442 681 682 + static inline void ob_write_with_class(outbuf_t *o, hl_token_class cls, const char *s, size_t n) { 683 + if (n == 0) return; 684 + 685 + const char *var = class_to_crvar(cls); 686 + if (var) { 687 + ob_putc(o, '<'); 688 + ob_puts(o, var); 689 + ob_putc(o, '>'); 690 + ob_write_escaped(o, s, n); 691 + ob_write(o, "</>", 3); 692 + } else ob_write_escaped(o, s, n); 693 + } 694 + 695 + static void ob_write_regex_literal(outbuf_t *o, const char *s, size_t n) { 696 + if (n == 0) return; 697 + 698 + ob_write_with_class(o, HL_REGEX_DELIMITER, s, 1); 699 + 700 + size_t i = 1; 701 + size_t seg_start = i; 702 + bool in_class = false; 703 + 704 + while (i < n) { 705 + unsigned char ch = (unsigned char)s[i]; 706 + 707 + if (!in_class && ch == '/') { 708 + ob_write_with_class(o, HL_REGEX, s + seg_start, i - seg_start); 709 + ob_write_with_class(o, HL_REGEX_DELIMITER, s + i, 1); 710 + i++; 711 + ob_write_with_class(o, HL_REGEX_DELIMITER, s + i, n - i); 712 + return; 713 + } 714 + 715 + if (ch == '\\') { 716 + ob_write_with_class(o, in_class ? HL_REGEX_CDATA : HL_REGEX, s + seg_start, i - seg_start); 717 + size_t esc_len = (i + 1 < n) ? 2 : 1; 718 + ob_write_with_class(o, HL_REGEX_ESCAPE, s + i, esc_len); 719 + i += esc_len; 720 + seg_start = i; 721 + continue; 722 + } 723 + 724 + if (!in_class && ch == '[') { 725 + ob_write_with_class(o, HL_REGEX, s + seg_start, i - seg_start); 726 + in_class = true; 727 + seg_start = i; 728 + i++; 729 + continue; 730 + } 731 + 732 + if (in_class && ch == ']') { 733 + i++; 734 + ob_write_with_class(o, HL_REGEX_CDATA, s + seg_start, i - seg_start); 735 + in_class = false; 736 + seg_start = i; 737 + continue; 738 + } 739 + 740 + i++; 741 + } 742 + 743 + ob_write_with_class(o, in_class ? HL_REGEX_CDATA : HL_REGEX, s + seg_start, n - seg_start); 744 + } 745 + 443 746 int ant_highlight_stateful( 444 747 const char *input, size_t input_len, 445 748 char *out, size_t out_size, ··· 452 755 453 756 hl_span span; 454 757 while (hl_iter_next(&it, &span) && !o.overflow) { 455 - const char *var = class_to_crvar(span.cls); 456 - if (var) { 457 - ob_putc(&o, '<'); 458 - ob_puts(&o, var); 459 - ob_putc(&o, '>'); 460 - ob_write_escaped(&o, input + span.off, span.len); 461 - ob_write(&o, "</>", 3); 462 - } else { 463 - ob_write_escaped(&o, input + span.off, span.len); 464 - } 758 + if (span.cls == HL_REGEX) ob_write_regex_literal(&o, input + span.off, span.len); 759 + else ob_write_with_class(&o, span.cls, input + span.off, span.len); 465 760 } 466 761 467 762 *state = hl_iter_state(&it); ··· 509 804 size_t emit_len = span.len < span_remaining ? span.len : span_remaining; 510 805 511 806 if (!o.overflow) { 512 - const char *var = class_to_crvar(span.cls); 513 - if (var) { 514 - ob_putc(&o, '<'); 515 - ob_puts(&o, var); 516 - ob_putc(&o, '>'); 517 - ob_write_escaped(&o, line + span.off, emit_len); 518 - ob_write(&o, "</>", 3); 519 - } else { 520 - ob_write_escaped(&o, line + span.off, emit_len); 521 - } 807 + if (span.cls == HL_REGEX) ob_write_regex_literal(&o, line + span.off, emit_len); 808 + else ob_write_with_class(&o, span.cls, line + span.off, emit_len); 522 809 } 523 810 524 811 vis_cols += span.len;