MIRROR: javascript for ๐Ÿœ's, a tiny runtime with big ambitions
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

modularize highlight.c

+844 -830
+5 -1
examples/quirks.txt
··· 47 47 Array(16).join('wat' - 1) + ' Batman!'; 48 48 49 49 banana: 50 - ('b' + 'a' + + 'a' + 'a').toLowerCase() 50 + ('b' + 'a' + + 'a' + 'a').toLowerCase() 51 + 52 + sentient window loop: 53 + (i=0,w=new Proxy(window,{get:(t,k)=>k==='window'?(console.log(i++),w):t[k]})) 54 + w.window.window.window.window.window.window
+20
include/highlight/emit.h
··· 1 + #ifndef HIGHLIGHT_EMIT_H 2 + #define HIGHLIGHT_EMIT_H 3 + 4 + #include <stdbool.h> 5 + #include <stddef.h> 6 + 7 + #include "highlight.h" 8 + 9 + typedef struct { 10 + char *buf; 11 + size_t size; 12 + size_t pos; 13 + bool overflow; 14 + } hl_outbuf; 15 + 16 + void hl_outbuf_init(hl_outbuf *o, char *buf, size_t size); 17 + void hl_outbuf_write_escaped(hl_outbuf *o, const char *s, size_t n); 18 + void hl_outbuf_emit_span(hl_outbuf *o, hl_token_class cls, const char *s, size_t n); 19 + 20 + #endif
+33 -32
include/regex_scan.h include/highlight/regex.h
··· 1 - #ifndef REGEX_SCAN_H 2 - #define REGEX_SCAN_H 1 + #ifndef HIGHLIGHT_REGEX_SCAN_H 2 + #define HIGHLIGHT_REGEX_SCAN_H 3 3 4 4 #include <stdbool.h> 5 5 #include <stddef.h> ··· 31 31 } 32 32 33 33 static inline bool js_regex_word_allows_start(const char *word, size_t len) { 34 - return 34 + return 35 35 js_regex_word_eq(word, len, "return", 6) || 36 36 js_regex_word_eq(word, len, "throw", 5) || 37 37 js_regex_word_eq(word, len, "case", 4) || ··· 47 47 } 48 48 49 49 static inline bool js_regex_prev_forbids_start(unsigned char prev) { 50 - return 50 + return 51 51 js_regex_is_digit(prev) || 52 52 prev == ')' || prev == ']' || prev == '}' || 53 53 prev == '"' || prev == '\'' || prev == '`' || prev == '.'; 54 54 } 55 55 56 56 static inline bool js_regex_prev_allows_start(unsigned char prev) { 57 - switch (prev) { 58 - case '(': 59 - case '[': 60 - case '{': 61 - case ',': 62 - case ';': 63 - case ':': 64 - case '=': 65 - case '!': 66 - case '?': 67 - case '+': 68 - case '-': 69 - case '*': 70 - case '%': 71 - case '&': 72 - case '|': 73 - case '^': 74 - case '~': 75 - case '<': 76 - case '>': return true; 77 - default: return false; 78 - }} 57 + switch (prev) { 58 + case '(': 59 + case '[': 60 + case '{': 61 + case ',': 62 + case ';': 63 + case ':': 64 + case '=': 65 + case '!': 66 + case '?': 67 + case '+': 68 + case '-': 69 + case '*': 70 + case '%': 71 + case '&': 72 + case '|': 73 + case '^': 74 + case '~': 75 + case '<': 76 + case '>': return true; 77 + default: return false; 78 + } 79 + } 79 80 80 81 static inline bool js_regex_can_start(const char *code, size_t start) { 81 82 if (start == 0) return true; ··· 109 110 for (; i < len; i++) { 110 111 unsigned char ch = (unsigned char)code[i]; 111 112 if (ch == '\n' || ch == '\r') return false; 112 - 113 + 113 114 if (ch == '\\') { 114 115 if (i + 1 < len) i++; 115 116 continue; 116 117 } 117 - 118 + 118 119 if (in_class) { 119 120 if (ch == ']') in_class = false; 120 121 continue; 121 122 } 122 - 123 + 123 124 if (ch == '[') { 124 125 in_class = true; 125 126 continue; 126 127 } 127 - 128 + 128 129 if (ch != '/') continue; 129 - 130 + 130 131 i++; 131 132 while (i < len && js_regex_is_alpha((unsigned char)code[i])) i++; 132 - 133 + 133 134 if (out_end) *out_end = i; 134 135 return true; 135 136 }
+2 -2
sources.json
··· 1 1 { 2 2 "engine": { 3 - "patterns": ["src/*.c", "src/esm/*.c", "src/cli/*.c", "src/modules/*.c", "src/silver/*.c"], 3 + "patterns": ["src/*.c", "src/highlight/*.c", "src/esm/*.c", "src/cli/*.c", "src/modules/*.c", "src/silver/*.c"], 4 4 "exclude": ["src/main.c", "src/watch.c"] 5 5 }, 6 6 "library": { 7 - "patterns": ["src/*.c", "src/esm/*.c", "src/modules/*.c", "src/silver/*.c"], 7 + "patterns": ["src/*.c", "src/highlight/*.c", "src/esm/*.c", "src/modules/*.c", "src/silver/*.c"], 8 8 "exclude": ["src/main.c", "src/watch.c"] 9 9 }, 10 10 "core": {
+10 -794
src/highlight.c
··· 1 - #include <stdio.h> 2 1 #include <string.h> 3 - #include <stdbool.h> 4 - #include <crprintf.h> 5 2 6 - #include "tokens.h" 7 3 #include "highlight.h" 8 - #include "regex_scan.h" 9 - #include "silver/lexer.h" 10 - 11 - typedef struct { const char *op; int len; hl_token_class cls; } op_entry_t; 12 - 13 - static const op_entry_t operators[] = { 14 - { "===", 3, HL_OPERATOR }, 15 - { "!==", 3, HL_OPERATOR }, 16 - { "...", 3, HL_OPERATOR }, 17 - { "=>", 2, HL_OPERATOR }, 18 - { "==", 2, HL_OPERATOR }, 19 - { "!=", 2, HL_OPERATOR }, 20 - { "<=", 2, HL_OPERATOR }, 21 - { ">=", 2, HL_OPERATOR }, 22 - { "&&", 2, HL_OPERATOR }, 23 - { "||", 2, HL_OPERATOR }, 24 - { "??", 2, HL_OPERATOR }, 25 - { "?.", 2, HL_OPTIONAL_CHAIN }, 26 - }; 27 - 28 - #define OP_COUNT (sizeof(operators) / sizeof(operators[0])) 29 - #define K(s, t) if (len == sizeof(s)-1 && !memcmp(word, s, sizeof(s)-1)) return t 30 - 31 - static hl_token_class lookup_extra_keyword(const char *word, size_t len) { 32 - switch (word[0]) { 33 - case 'a': 34 - K("abstract", HL_TYPE); 35 - K("async", HL_KEYWORD_ITALIC); 36 - break; 37 - case 'b': K("boolean", HL_TYPE_BOOLEAN); break; 38 - case 'd': K("declare", HL_TYPE); break; 39 - case 'e': 40 - K("enum", HL_TYPE); 41 - K("export", HL_KEYWORD_ITALIC); 42 - break; 43 - case 'g': K("global", HL_KEYWORD_ITALIC); break; 44 - case 'i': 45 - K("interface", HL_TYPE); 46 - K("implements", HL_TYPE); 47 - break; 48 - case 'n': 49 - K("namespace", HL_TYPE); 50 - K("never", HL_TYPE); 51 - break; 52 - case 'o': K("object", HL_TYPE); break; 53 - case 'p': 54 - K("package", HL_KEYWORD); 55 - K("private", HL_KEYWORD); 56 - K("protected", HL_KEYWORD); 57 - K("public", HL_KEYWORD); 58 - break; 59 - case 'r': K("readonly", HL_TYPE); break; 60 - case 's': 61 - K("string", HL_TYPE_STRING); 62 - K("symbol", HL_TYPE_STRING); 63 - break; 64 - case 't': K("type", HL_TYPE); break; 65 - case 'u': K("unknown", HL_TYPE); break; 66 - } return HL_NONE; 67 - } 68 - 69 - #undef K 70 - 71 - static hl_token_class tok_to_class(uint8_t tok) { 72 - static const void *dispatch[] = { 73 - [TOK_ASYNC] = &&l_kw_italic, 74 - [TOK_EXPORT] = &&l_kw_italic, 75 - [TOK_THIS] = &&l_kw_italic, 76 - [TOK_GLOBAL_THIS] = &&l_kw_italic, 77 - [TOK_WINDOW] = &&l_kw_italic, 78 - [TOK_DELETE] = &&l_kw_delete, 79 - [TOK_TYPEOF] = &&l_type, 80 - [TOK_INSTANCEOF] = &&l_type, 81 - [TOK_OF] = &&l_type, 82 - [TOK_IN] = &&l_type, 83 - [TOK_AS] = &&l_type, 84 - [TOK_TRUE] = &&l_bool, 85 - [TOK_FALSE] = &&l_bool, 86 - [TOK_NULL] = &&l_null, 87 - [TOK_UNDEF] = &&l_null, 88 - }; 89 - 90 - if (tok <= TOK_IDENTIFIER || tok >= TOK_IDENT_LIKE_END) return HL_NONE; 91 - if (tok < sizeof(dispatch) / sizeof(*dispatch) && dispatch[tok]) goto *dispatch[tok]; 92 - 93 - return HL_KEYWORD; 94 - 95 - l_kw_italic: return HL_KEYWORD_ITALIC; 96 - l_kw_delete: return HL_KEYWORD_DELETE; 97 - l_type: return HL_TYPE; 98 - l_bool: return HL_BOOLEAN; 99 - l_null: return HL_LITERAL_NULL; 100 - } 101 - 102 - void hl_iter_init(hl_iter *it, const char *input, size_t input_len, const highlight_state *state) { 103 - it->input = input; 104 - it->input_len = input_len; 105 - it->pos = 0; 106 - it->state = state ? *state : HL_STATE_INIT; 107 - it->ctx = HL_CTX_NONE; 108 - } 109 - 110 - static hl_context keyword_sets_context(const char *word, size_t len) { 111 - if (len == 8 && memcmp(word, "function", 8) == 0) return HL_CTX_AFTER_FUNCTION; 112 - if (len == 5 && memcmp(word, "class", 5) == 0) return HL_CTX_AFTER_CLASS; 113 - if (len == 7 && memcmp(word, "extends", 7) == 0) return HL_CTX_AFTER_EXTENDS; 114 - return HL_CTX_NONE; 115 - } 116 - 117 - static size_t skip_inline_ws_forward(const char *input, size_t input_len, size_t i) { 118 - while (i < input_len && (input[i] == ' ' || input[i] == '\t' || input[i] == '\n' || input[i] == '\r')) i++; 119 - return i; 120 - } 121 - 122 - static size_t skip_inline_ws_backward(const char *input, size_t i) { 123 - while (i > 0 && (input[i - 1] == ' ' || input[i - 1] == '\t' || input[i - 1] == '\n' || input[i - 1] == '\r')) i--; 124 - return i; 125 - } 126 - 127 - static bool read_prev_word(const char *input, size_t end, size_t *word_start, size_t *word_len) { 128 - size_t i = skip_inline_ws_backward(input, end); 129 - if (i == 0 || !is_ident_continue((unsigned char)input[i - 1])) return false; 130 - 131 - size_t wend = i; 132 - while (i > 0 && is_ident_continue((unsigned char)input[i - 1])) i--; 133 - 134 - *word_start = i; 135 - *word_len = wend - i; 136 - return true; 137 - } 138 - 139 - static bool is_arrow_after(const char *input, size_t input_len, size_t pos) { 140 - size_t i = skip_inline_ws_forward(input, input_len, pos); 141 - return (i + 1 < input_len && input[i] == '=' && input[i + 1] == '>'); 142 - } 143 - 144 - static bool has_function_keyword_before_paren(const char *input, size_t open_paren) { 145 - size_t word_start = 0; 146 - size_t word_len = 0; 147 - 148 - if (!read_prev_word(input, open_paren, &word_start, &word_len)) return false; 149 - if (word_len == 8 && memcmp(input + word_start, "function", 8) == 0) return true; 150 - 151 - if (!read_prev_word(input, word_start, &word_start, &word_len)) return false; 152 - return (word_len == 8 && memcmp(input + word_start, "function", 8) == 0); 153 - } 154 - 155 - static bool is_control_paren_prefix(const char *input, size_t open_paren) { 156 - size_t word_start = 0; 157 - size_t word_len = 0; 158 - if (!read_prev_word(input, open_paren, &word_start, &word_len)) return false; 159 - 160 - #define C(s) (word_len == sizeof(s) - 1 && memcmp(input + word_start, s, sizeof(s) - 1) == 0) 161 - return C("if") || C("for") || C("while") || C("switch") || C("catch") || C("with"); 162 - #undef C 163 - } 164 - 165 - static bool is_likely_function_param_paren( 166 - const char *input, size_t input_len, 167 - size_t open_paren, size_t close_paren 168 - ) { 169 - if (is_arrow_after(input, input_len, close_paren + 1)) return true; 170 - if (has_function_keyword_before_paren(input, open_paren)) return true; 171 - 172 - size_t after = skip_inline_ws_forward(input, input_len, close_paren + 1); 173 - if (after < input_len && input[after] == '{' && !is_control_paren_prefix(input, open_paren)) 174 - return true; 175 - 176 - return false; 177 - } 178 - 179 - static bool find_enclosing_open_paren(const char *input, size_t pos, size_t *open_paren) { 180 - size_t depth = 0; 181 - size_t i = pos; 182 - 183 - while (i > 0) { 184 - i--; 185 - unsigned char ch = (unsigned char)input[i]; 186 - if (ch == ')') { 187 - depth++; 188 - continue; 189 - } 190 - if (ch == '(') { 191 - if (depth == 0) { 192 - *open_paren = i; 193 - return true; 194 - } 195 - depth--; 196 - } 197 - } 198 - return false; 199 - } 200 - 201 - static bool find_matching_close_paren(const char *input, size_t input_len, size_t open_paren, size_t *close_paren) { 202 - size_t depth = 0; 203 - for (size_t i = open_paren + 1; i < input_len; i++) { 204 - unsigned char ch = (unsigned char)input[i]; 205 - if (ch == '(') { 206 - depth++; 207 - continue; 208 - } 209 - if (ch == ')') { 210 - if (depth == 0) { 211 - *close_paren = i; 212 - return true; 213 - } 214 - depth--; 215 - } 216 - } 217 - return false; 218 - } 219 - 220 - static bool is_function_argument_identifier(const char *input, size_t input_len, size_t start, size_t end) { 221 - if (is_arrow_after(input, input_len, end)) { 222 - size_t left = skip_inline_ws_backward(input, start); 223 - if (left > 0 && input[left - 1] == '.') return false; 224 - return true; 225 - } 226 - 227 - size_t prev = skip_inline_ws_backward(input, start); 228 - if (prev == 0) return false; 229 - unsigned char prev_ch = (unsigned char)input[prev - 1]; 230 - if (!(prev_ch == '(' || prev_ch == ',' || prev_ch == '{' || prev_ch == '[' || prev_ch == ':')) 231 - return false; 232 - 233 - size_t open_paren = 0; 234 - if (!find_enclosing_open_paren(input, start, &open_paren)) return false; 235 - 236 - size_t close_paren = 0; 237 - if (!find_matching_close_paren(input, input_len, open_paren, &close_paren)) return false; 238 - return is_likely_function_param_paren(input, input_len, open_paren, close_paren); 239 - } 240 - 241 - bool hl_iter_next(hl_iter *it, hl_span *out) { 242 - const char *input = it->input; 243 - size_t input_len = it->input_len; 244 - size_t i = it->pos; 245 - 246 - if (i >= input_len) return false; 247 - unsigned char c = (unsigned char)input[i]; 248 - 249 - if (it->state.mode == HL_STATE_BLOCK_COMMENT) { 250 - size_t start = i; 251 - while (i < input_len) { 252 - if (input[i] == '*' && i + 1 < input_len && input[i + 1] == '/') { 253 - i += 2; 254 - it->state.mode = HL_STATE_NORMAL; 255 - break; 256 - } 257 - i++; 258 - } 259 - *out = (hl_span){ start, i - start, HL_COMMENT }; 260 - it->pos = i; 261 - return true; 262 - } 263 - 264 - if (it->state.mode == HL_STATE_STRING_SINGLE || it->state.mode == HL_STATE_STRING_DOUBLE) { 265 - char quote = (it->state.mode == HL_STATE_STRING_SINGLE) ? '\'' : '"'; 266 - size_t start = i; 267 - while (i < input_len) { 268 - if (input[i] == '\\' && i + 1 < input_len) { i += 2; continue; } 269 - if (input[i] == quote) { 270 - i++; 271 - it->state.mode = (it->state.template_depth > 0) ? HL_STATE_TEMPLATE_EXPR : HL_STATE_NORMAL; 272 - break; 273 - } 274 - i++; 275 - } 276 - *out = (hl_span){ start, i - start, HL_STRING }; 277 - it->pos = i; 278 - return true; 279 - } 280 - 281 - if (it->state.mode == HL_STATE_TEMPLATE) { 282 - size_t start = i; 283 - while (i < input_len) { 284 - if (input[i] == '\\' && i + 1 < input_len) { i += 2; continue; } 285 - if (input[i] == '$' && i + 1 < input_len && input[i + 1] == '{') { 286 - i += 2; 287 - it->state.mode = HL_STATE_TEMPLATE_EXPR; 288 - it->state.template_depth++; 289 - break; 290 - } 291 - if (input[i] == '`') { 292 - i++; 293 - it->state.mode = (it->state.template_depth > 0) ? HL_STATE_TEMPLATE_EXPR : HL_STATE_NORMAL; 294 - break; 295 - } 296 - i++; 297 - } 298 - *out = (hl_span){ start, i - start, HL_STRING }; 299 - it->pos = i; 300 - return true; 301 - } 302 - 303 - if (it->state.mode == HL_STATE_TEMPLATE_EXPR && c == '}') { 304 - it->state.template_depth--; 305 - if (it->state.template_depth <= 0) { 306 - it->state.mode = HL_STATE_TEMPLATE; 307 - it->state.template_depth = 0; 308 - *out = (hl_span){ i, 1, HL_BRACKET }; 309 - it->pos = i + 1; 310 - return true; 311 - } 312 - } 313 - if (it->state.mode == HL_STATE_TEMPLATE_EXPR && c == '{') { 314 - it->state.template_depth++; 315 - *out = (hl_span){ i, 1, HL_BRACKET }; 316 - it->pos = i + 1; 317 - return true; 318 - } 319 - 320 - if (c == '/' && i + 1 < input_len && input[i + 1] == '/') { 321 - it->ctx = HL_CTX_NONE; 322 - *out = (hl_span){ i, input_len - i, HL_COMMENT }; 323 - it->pos = input_len; 324 - return true; 325 - } 326 - 327 - if (c == '/' && i + 1 < input_len && input[i + 1] == '*') { 328 - it->ctx = HL_CTX_NONE; 329 - size_t start = i; 330 - i += 2; 331 - while (i + 1 < input_len && !(input[i] == '*' && input[i + 1] == '/')) i++; 332 - if (i + 1 < input_len) { 333 - i += 2; 334 - } else { 335 - i = input_len; 336 - it->state.mode = HL_STATE_BLOCK_COMMENT; 337 - } 338 - *out = (hl_span){ start, i - start, HL_COMMENT }; 339 - it->pos = i; 340 - return true; 341 - } 342 - 343 - if (c == '/') { 344 - size_t regex_end = 0; 345 - if (js_scan_regex_literal(input, input_len, i, &regex_end)) { 346 - it->ctx = HL_CTX_NONE; 347 - *out = (hl_span){ i, regex_end - i, HL_REGEX }; 348 - it->pos = regex_end; 349 - return true; 350 - } 351 - } 352 - 353 - if (c == '\'' || c == '"') { 354 - it->ctx = HL_CTX_NONE; 355 - size_t start = i; 356 - it->state.mode = (c == '\'') ? HL_STATE_STRING_SINGLE : HL_STATE_STRING_DOUBLE; 357 - i++; 358 - while (i < input_len) { 359 - if (input[i] == '\\' && i + 1 < input_len) { i += 2; continue; } 360 - if ((unsigned char)input[i] == c) { 361 - i++; 362 - it->state.mode = (it->state.template_depth > 0) ? HL_STATE_TEMPLATE_EXPR : HL_STATE_NORMAL; 363 - break; 364 - } 365 - i++; 366 - } 367 - *out = (hl_span){ start, i - start, HL_STRING }; 368 - it->pos = i; 369 - return true; 370 - } 371 - 372 - if (c == '`') { 373 - it->ctx = HL_CTX_NONE; 374 - it->state.mode = HL_STATE_TEMPLATE; 375 - *out = (hl_span){ i, 1, HL_STRING }; 376 - it->pos = i + 1; 377 - return true; 378 - } 379 - 380 - if (c == ';') { 381 - it->ctx = HL_CTX_NONE; 382 - *out = (hl_span){ i, 1, HL_SEMICOLON }; 383 - it->pos = i + 1; 384 - return true; 385 - } 386 - 387 - if (IS_DIGIT(c) || (c == '.' && i + 1 < input_len && IS_DIGIT(input[i + 1]))) { 388 - it->ctx = HL_CTX_NONE; 389 - size_t start = i; 390 - if (c == '0' && i + 1 < input_len) { 391 - unsigned char next = (unsigned char)input[i + 1]; 392 - if (next == 'x' || next == 'X') { 393 - i += 2; 394 - while (i < input_len && (IS_XDIGIT(input[i]) || input[i] == '_')) i++; 395 - goto num_done; 396 - } else if (next == 'b' || next == 'B') { 397 - i += 2; 398 - while (i < input_len && (input[i] == '0' || input[i] == '1' || input[i] == '_')) i++; 399 - goto num_done; 400 - } else if (next == 'o' || next == 'O') { 401 - i += 2; 402 - while (i < input_len && (IS_OCTAL(input[i]) || input[i] == '_')) i++; 403 - goto num_done; 404 - } 405 - } 406 - while (i < input_len && (IS_DIGIT(input[i]) || input[i] == '_')) i++; 407 - if (i < input_len && input[i] == '.') { 408 - i++; 409 - while (i < input_len && (IS_DIGIT(input[i]) || input[i] == '_')) i++; 410 - } 411 - if (i < input_len && (input[i] == 'e' || input[i] == 'E')) { 412 - i++; 413 - if (i < input_len && (input[i] == '+' || input[i] == '-')) i++; 414 - while (i < input_len && (IS_DIGIT(input[i]) || input[i] == '_')) i++; 415 - } 416 - num_done: 417 - if (i < input_len && input[i] == 'n') i++; 418 - *out = (hl_span){ start, i - start, HL_NUMBER }; 419 - it->pos = i; 420 - return true; 421 - } 422 - 423 - for (int k = 0; k < (int)OP_COUNT; k++) { 424 - int oplen = operators[k].len; 425 - if (i + (size_t)oplen <= input_len && 426 - memcmp(input + i, operators[k].op, (size_t)oplen) == 0) { 427 - it->ctx = HL_CTX_NONE; 428 - *out = (hl_span){ i, (size_t)oplen, operators[k].cls }; 429 - it->pos = i + (size_t)oplen; 430 - return true; 431 - } 432 - } 433 - 434 - if (c == '#' && i + 1 < input_len && is_ident_begin((unsigned char)input[i + 1])) { 435 - size_t start = i; 436 - i += 2; 437 - while (i < input_len && is_ident_continue((unsigned char)input[i])) i++; 438 - it->ctx = HL_CTX_NONE; 439 - *out = (hl_span){ start, i - start, HL_PROPERTY }; 440 - it->pos = i; 441 - return true; 442 - } 443 - 444 - if (is_ident_begin(c)) { 445 - size_t start = i; 446 - i++; 447 - while (i < input_len && is_ident_continue(input[i])) i++; 448 - size_t word_len = i - start; 449 - const char *word = input + start; 450 - 451 - bool is_member_access = (start > 0 && input[start - 1] == '.' && 452 - (start < 2 || input[start - 2] != '.')); 453 - bool is_method = false; 454 - if (is_member_access) { 455 - size_t peek = i; 456 - while (peek < input_len && input[peek] == ' ') peek++; 457 - if (peek < input_len && input[peek] == '(') is_method = true; 458 - } 459 - size_t after_word = i; 460 - while (after_word < input_len && input[after_word] == ' ') after_word++; 461 - bool is_call = (after_word < input_len && input[after_word] == '('); 462 - 463 - hl_token_class cls = HL_NONE; 464 - bool is_console = (word_len == 7 && memcmp(word, "console", 7) == 0); 465 - 466 - if (is_console) { 467 - cls = HL_PROPERTY; 468 - } else if (is_function_argument_identifier(input, input_len, start, i)) { 469 - cls = HL_ARGUMENT; 470 - } else if (is_method) { 471 - cls = HL_FUNCTION; 472 - } else if (is_member_access) { 473 - cls = HL_PROPERTY; 474 - } else if (it->ctx == HL_CTX_AFTER_FUNCTION) { 475 - cls = HL_FUNCTION_NAME; 476 - it->ctx = HL_CTX_NONE; 477 - } else if (it->ctx == HL_CTX_AFTER_CLASS) { 478 - cls = HL_CLASS_NAME; 479 - it->ctx = HL_CTX_NONE; 480 - } else if (it->ctx == HL_CTX_AFTER_EXTENDS) { 481 - cls = HL_PARENT_CLASS; 482 - it->ctx = HL_CTX_NONE; 483 - } else { 484 - cls = lookup_extra_keyword(word, word_len); 485 - 486 - if (cls == HL_NONE) { 487 - if ((word_len == 3 && memcmp(word, "NaN", 3) == 0) || 488 - (word_len == 8 && memcmp(word, "Infinity", 8) == 0)) { 489 - cls = HL_NUMBER; 490 - } 491 - else if (word_len == 7 && memcmp(word, "extends", 7) == 0) { 492 - cls = HL_KEYWORD_EXTENDS; 493 - } else { 494 - cls = tok_to_class(sv_parsekeyword(word, word_len)); 495 - } 496 - } 497 - 498 - if (cls == HL_NONE) { 499 - size_t peek = i; 500 - while (peek < input_len && input[peek] == ' ') peek++; 501 - if (peek < input_len && input[peek] == ':' && 502 - (peek + 1 >= input_len || input[peek + 1] != ':')) 503 - cls = HL_PROPERTY; 504 - } 505 - 506 - if (cls == HL_NONE && word[0] >= 'A' && word[0] <= 'Z') { 507 - cls = HL_TYPE; 508 - } 509 - 510 - if (cls == HL_NONE && is_call) { 511 - cls = HL_FUNCTION; 512 - } 513 - 514 - hl_context next_ctx = keyword_sets_context(word, word_len); 515 - if (next_ctx != HL_CTX_NONE) it->ctx = next_ctx; 516 - } 517 - 518 - *out = (hl_span){ start, word_len, cls }; 519 - it->pos = i; 520 - return true; 521 - } 522 - 523 - if (c == '<' || c == '>' || c == '=') { 524 - it->ctx = HL_CTX_NONE; 525 - *out = (hl_span){ i, 1, HL_OPERATOR }; 526 - it->pos = i + 1; 527 - return true; 528 - } 529 - 530 - if (c == '(' || c == ')' || c == '[' || c == ']' || c == '{' || c == '}') { 531 - it->ctx = HL_CTX_NONE; 532 - *out = (hl_span){ i, 1, HL_BRACKET }; 533 - it->pos = i + 1; 534 - return true; 535 - } 536 - 537 - if (c == ' ' || c == '\t') { 538 - size_t start = i; 539 - while (i < input_len && (input[i] == ' ' || input[i] == '\t')) i++; 540 - *out = (hl_span){ start, i - start, HL_NONE }; 541 - it->pos = i; 542 - return true; 543 - } 544 - 545 - it->ctx = HL_CTX_NONE; 546 - *out = (hl_span){ i, 1, HL_NONE }; 547 - it->pos = i + 1; 548 - return true; 549 - } 550 - 551 - typedef struct { 552 - char *buf; 553 - size_t size; 554 - size_t pos; 555 - bool overflow; 556 - } outbuf_t; 557 - 558 - static inline void ob_putc(outbuf_t *o, char c) { 559 - if (o->pos + 1 < o->size) o->buf[o->pos++] = c; 560 - else o->overflow = true; 561 - } 562 - 563 - static inline void ob_write(outbuf_t *o, const char *s, size_t n) { 564 - if (o->pos + n < o->size) { 565 - memcpy(o->buf + o->pos, s, n); 566 - o->pos += n; 567 - } else o->overflow = true; 568 - } 569 - 570 - static inline void ob_puts(outbuf_t *o, const char *s) { 571 - ob_write(o, s, strlen(s)); 572 - } 573 - 574 - static inline void ob_put_escaped(outbuf_t *o, char c) { 575 - switch (c) { 576 - case '<': ob_write(o, "<<", 2); break; 577 - case '>': ob_write(o, ">>", 2); break; 578 - case '%': ob_write(o, "%%", 2); break; 579 - default: ob_putc(o, c); break; 580 - }} 581 - 582 - static inline void ob_write_escaped(outbuf_t *o, const char *s, size_t n) { 583 - for (size_t i = 0; i < n; i++) ob_put_escaped(o, s[i]); 584 - } 585 - 586 - static bool span_is_template_string(const char *s, size_t n) { 587 - for (size_t i = 0; i < n; i++) { 588 - if (s[i] == '`') return true; 589 - if (s[i] == '$' && i + 1 < n && s[i + 1] == '{') return true; 590 - } 591 - return false; 592 - } 593 - 594 - static bool is_string_key_context(const char *input, size_t input_len, size_t off, size_t len) { 595 - size_t i = off + len; 596 - while (i < input_len && (input[i] == ' ' || input[i] == '\t')) i++; 597 - return (i < input_len && input[i] == ':' && (i + 1 >= input_len || input[i + 1] != ':')); 598 - } 599 - 600 - static bool is_string_keyword_literal(const char *s, size_t n) { 601 - if (n < 2) return false; 602 - if (!((s[0] == '"' && s[n - 1] == '"') || (s[0] == '\'' && s[n - 1] == '\''))) return false; 603 - 604 - const char *inner = s + 1; 605 - size_t len = n - 2; 606 - 607 - #define SKW(w) (len == sizeof(w) - 1 && memcmp(inner, w, sizeof(w) - 1) == 0) 608 - return SKW("true") || SKW("false") || SKW("null") || SKW("undefined") || 609 - SKW("NaN") || SKW("Infinity"); 610 - #undef SKW 611 - } 612 - 613 - static const char *class_to_crvar(hl_token_class cls) { 614 - switch (cls) { 615 - case HL_NUMBER: return "#E8CD7C"; 616 - case HL_NUMBER_PREFIX: return "#EADBAD"; 617 - case HL_BOOLEAN: return "#65B2FF"; 618 - case HL_LITERAL_NULL: return "#65B2FF"; 619 - 620 - case HL_STRING: return "#FF8A7F"; 621 - case HL_STRING_DELIMITER: return "#FF7265"; 622 - case HL_STRING_ESCAPE: return "#F4AAA3"; 623 - case HL_STRING_KEY: return "#CCA3F4"; 624 - case HL_STRING_TEMPLATE: return "#FFB265"; 625 - 626 - case HL_REGEX: return "#FFB265"; 627 - case HL_REGEX_ESCAPE: return "#FFCC99"; 628 - case HL_REGEX_DELIMITER: return "#FF9932"; 629 - case HL_REGEX_CDATA: return "#65B2FF"; 630 - 631 - case HL_KEYWORD: return "#65B2FF"; 632 - case HL_KEYWORD_DELETE: return "#F43D3D"; 633 - case HL_TYPE: return "#59D8F1"; 634 - case HL_TYPE_STRING: return "#30E8AA"; 635 - case HL_TYPE_BOOLEAN: return "#30E8AA"; 636 - case HL_COMMENT: return "#758CA3"; 637 - case HL_FUNCTION_NAME: return "#30E8AA"; 638 - case HL_FUNCTION: return "#30E8AA"; 639 - case HL_ARGUMENT: return "#CCA3F4"; 640 - case HL_PROPERTY: return "#CCA3F4"; 641 - case HL_OPERATOR: return "#8CB2D8"; 642 - case HL_OPTIONAL_CHAIN: return "#8CB2D8"; 643 - case HL_BRACKET: return "#8CB2D8"; 644 - case HL_SEMICOLON: return "#B2CCE5"; 645 - 646 - case HL_KEYWORD_ITALIC: return "italic+#65B2FF"; 647 - case HL_CLASS_NAME: return "bold+#F7B76D"; 648 - case HL_PARENT_CLASS: return "bold+#59D8F1"; 649 - case HL_KEYWORD_EXTENDS: return "italic+#59D8F1"; 650 - 651 - default: return NULL; 652 - }} 653 - 654 - static inline void ob_write_with_class(outbuf_t *o, hl_token_class cls, const char *s, size_t n) { 655 - if (n == 0) return; 656 - 657 - const char *var = class_to_crvar(cls); 658 - if (var) { 659 - ob_putc(o, '<'); 660 - ob_puts(o, var); 661 - ob_putc(o, '>'); 662 - ob_write_escaped(o, s, n); 663 - ob_write(o, "</>", 3); 664 - } else ob_write_escaped(o, s, n); 665 - } 666 - 667 - static void ob_write_string_literal(outbuf_t *o, const char *s, size_t n, hl_token_class body_cls) { 668 - if (n == 0) return; 669 - 670 - size_t i = 0; 671 - size_t seg_start = 0; 672 - 673 - while (i < n) { 674 - unsigned char ch = (unsigned char)s[i]; 675 - 676 - if (ch == '\\') { 677 - ob_write_with_class(o, body_cls, s + seg_start, i - seg_start); 678 - size_t esc_len = (i + 1 < n) ? 2 : 1; 679 - ob_write_with_class(o, HL_STRING_ESCAPE, s + i, esc_len); 680 - i += esc_len; 681 - seg_start = i; 682 - continue; 683 - } 684 - 685 - if (ch == '"' || ch == '\'' || ch == '`') { 686 - ob_write_with_class(o, body_cls, s + seg_start, i - seg_start); 687 - ob_write_with_class(o, HL_STRING_DELIMITER, s + i, 1); 688 - i++; 689 - seg_start = i; 690 - continue; 691 - } 692 - 693 - if (ch == '$' && i + 1 < n && s[i + 1] == '{') { 694 - ob_write_with_class(o, body_cls, s + seg_start, i - seg_start); 695 - ob_write_with_class(o, HL_BRACKET, s + i, 1); 696 - ob_write_with_class(o, HL_BRACKET, s + i + 1, 1); 697 - i += 2; 698 - seg_start = i; 699 - continue; 700 - } 701 - 702 - i++; 703 - } 704 - 705 - ob_write_with_class(o, body_cls, s + seg_start, n - seg_start); 706 - } 707 - 708 - static void ob_write_regex_literal(outbuf_t *o, const char *s, size_t n) { 709 - if (n == 0) return; 710 - 711 - ob_write_with_class(o, HL_REGEX_DELIMITER, s, 1); 712 - 713 - size_t i = 1; 714 - size_t seg_start = i; 715 - bool in_class = false; 716 - 717 - while (i < n) { 718 - unsigned char ch = (unsigned char)s[i]; 719 - 720 - if (!in_class && ch == '/') { 721 - ob_write_with_class(o, HL_REGEX, s + seg_start, i - seg_start); 722 - ob_write_with_class(o, HL_REGEX_DELIMITER, s + i, 1); 723 - i++; 724 - ob_write_with_class(o, HL_REGEX_DELIMITER, s + i, n - i); 725 - return; 726 - } 727 - 728 - if (ch == '\\') { 729 - ob_write_with_class(o, in_class ? HL_REGEX_CDATA : HL_REGEX, s + seg_start, i - seg_start); 730 - size_t esc_len = (i + 1 < n) ? 2 : 1; 731 - ob_write_with_class(o, HL_REGEX_ESCAPE, s + i, esc_len); 732 - i += esc_len; 733 - seg_start = i; 734 - continue; 735 - } 736 - 737 - if (!in_class && ch == '[') { 738 - ob_write_with_class(o, HL_REGEX, s + seg_start, i - seg_start); 739 - in_class = true; 740 - seg_start = i; 741 - i++; 742 - continue; 743 - } 744 - 745 - if (in_class && ch == ']') { 746 - i++; 747 - ob_write_with_class(o, HL_REGEX_CDATA, s + seg_start, i - seg_start); 748 - in_class = false; 749 - seg_start = i; 750 - continue; 751 - } 752 - 753 - i++; 754 - } 755 - 756 - ob_write_with_class(o, in_class ? HL_REGEX_CDATA : HL_REGEX, s + seg_start, n - seg_start); 757 - } 758 - 759 - static void ob_write_number_literal(outbuf_t *o, const char *s, size_t n) { 760 - if (n >= 2 && s[0] == '0' && 761 - (s[1] == 'x' || s[1] == 'X' || 762 - s[1] == 'b' || s[1] == 'B' || 763 - s[1] == 'o' || s[1] == 'O')) { 764 - ob_write_with_class(o, HL_NUMBER_PREFIX, s, 2); 765 - ob_write_with_class(o, HL_NUMBER, s + 2, n - 2); 766 - return; 767 - } 768 - 769 - ob_write_with_class(o, HL_NUMBER, s, n); 770 - } 4 + #include "highlight/emit.h" 771 5 772 6 int ant_highlight_stateful( 773 7 const char *input, size_t input_len, 774 8 char *out, size_t out_size, 775 9 highlight_state *state 776 10 ) { 777 - outbuf_t o = { .buf = out, .size = out_size, .pos = 0, .overflow = false }; 11 + hl_outbuf o; 12 + hl_outbuf_init(&o, out, out_size); 778 13 779 14 hl_iter it; 780 15 hl_iter_init(&it, input, input_len, state); 781 16 782 17 hl_span span; 783 - while (hl_iter_next(&it, &span) && !o.overflow) { 784 - if (span.cls == HL_STRING) { 785 - const char *piece = input + span.off; 786 - hl_token_class body_cls = HL_STRING; 787 - if (span_is_template_string(piece, span.len)) body_cls = HL_STRING_TEMPLATE; 788 - ob_write_string_literal(&o, piece, span.len, body_cls); 789 - } else if (span.cls == HL_REGEX) ob_write_regex_literal(&o, input + span.off, span.len); 790 - else if (span.cls == HL_NUMBER) ob_write_number_literal(&o, input + span.off, span.len); 791 - else ob_write_with_class(&o, span.cls, input + span.off, span.len); 792 - } 18 + while (hl_iter_next(&it, &span) && !o.overflow) 19 + hl_outbuf_emit_span(&o, span.cls, input + span.off, span.len); 793 20 794 21 *state = hl_iter_state(&it); 795 22 796 23 if (o.overflow) { 797 24 o.pos = 0; 798 25 o.overflow = false; 799 - ob_write_escaped(&o, input, input_len); 26 + hl_outbuf_write_escaped(&o, input, input_len); 800 27 if (o.overflow) { 801 28 size_t safe = out_size > 1 ? out_size - 1 : 0; 802 29 if (safe > input_len) safe = input_len; ··· 821 48 char *out, size_t out_size, 822 49 highlight_state *state 823 50 ) { 824 - outbuf_t o = { .buf = out, .size = out_size, .pos = 0, .overflow = false }; 51 + hl_outbuf o; 52 + hl_outbuf_init(&o, out, out_size); 825 53 826 54 hl_iter it; 827 55 hl_iter_init(&it, line, line_len, state); ··· 831 59 832 60 while (hl_iter_next(&it, &span)) { 833 61 if (vis_cols >= max_cols) continue; 834 - 835 62 size_t span_remaining = max_cols - vis_cols; 836 63 size_t emit_len = span.len < span_remaining ? span.len : span_remaining; 837 - 838 - if (!o.overflow) { 839 - if (span.cls == HL_STRING) { 840 - const char *piece = line + span.off; 841 - hl_token_class body_cls = HL_STRING; 842 - if (span_is_template_string(piece, emit_len)) body_cls = HL_STRING_TEMPLATE; 843 - ob_write_string_literal(&o, piece, emit_len, body_cls); 844 - } else if (span.cls == HL_REGEX) ob_write_regex_literal(&o, line + span.off, emit_len); 845 - else if (span.cls == HL_NUMBER) ob_write_number_literal(&o, line + span.off, emit_len); 846 - else ob_write_with_class(&o, span.cls, line + span.off, emit_len); 847 - } 848 - 64 + if (!o.overflow) hl_outbuf_emit_span(&o, span.cls, line + span.off, emit_len); 849 65 vis_cols += span.len; 850 66 } 851 67 ··· 854 70 if (o.overflow) { 855 71 o.pos = 0; 856 72 size_t emit = line_len < max_cols ? line_len : max_cols; 857 - ob_write_escaped(&o, line, emit); 73 + hl_outbuf_write_escaped(&o, line, emit); 858 74 if (o.overflow) { 859 75 size_t safe = out_size > 1 ? out_size - 1 : 0; 860 76 if (safe > emit) safe = emit;
+226
src/highlight/emit.c
··· 1 + #include <string.h> 2 + #include "highlight/emit.h" 3 + 4 + static inline void ob_putc(hl_outbuf *o, char c) { 5 + if (o->pos + 1 < o->size) o->buf[o->pos++] = c; 6 + else o->overflow = true; 7 + } 8 + 9 + static inline void ob_write(hl_outbuf *o, const char *s, size_t n) { 10 + if (o->pos + n < o->size) { 11 + memcpy(o->buf + o->pos, s, n); 12 + o->pos += n; 13 + } else o->overflow = true; 14 + } 15 + 16 + static inline void ob_puts(hl_outbuf *o, const char *s) { 17 + ob_write(o, s, strlen(s)); 18 + } 19 + 20 + static inline void ob_put_escaped(hl_outbuf *o, char c) { 21 + switch (c) { 22 + case '<': ob_write(o, "<<", 2); break; 23 + case '>': ob_write(o, ">>", 2); break; 24 + case '%': ob_write(o, "%%", 2); break; 25 + default: ob_putc(o, c); break; 26 + } 27 + } 28 + 29 + void hl_outbuf_write_escaped(hl_outbuf *o, const char *s, size_t n) { 30 + for (size_t i = 0; i < n; i++) ob_put_escaped(o, s[i]); 31 + } 32 + 33 + static const char *class_to_crvar(hl_token_class cls) { 34 + switch (cls) { 35 + case HL_NUMBER: return "#E8CD7C"; 36 + case HL_NUMBER_PREFIX: return "#EADBAD"; 37 + case HL_BOOLEAN: return "#65B2FF"; 38 + case HL_LITERAL_NULL: return "#65B2FF"; 39 + 40 + case HL_STRING: return "#FF8A7F"; 41 + case HL_STRING_DELIMITER: return "#FF7265"; 42 + case HL_STRING_ESCAPE: return "#F4AAA3"; 43 + case HL_STRING_KEY: return "#CCA3F4"; 44 + case HL_STRING_TEMPLATE: return "#FFB265"; 45 + 46 + case HL_REGEX: return "#FFB265"; 47 + case HL_REGEX_ESCAPE: return "#FFCC99"; 48 + case HL_REGEX_DELIMITER: return "#FF9932"; 49 + case HL_REGEX_CDATA: return "#65B2FF"; 50 + 51 + case HL_KEYWORD: return "#65B2FF"; 52 + case HL_KEYWORD_DELETE: return "#F43D3D"; 53 + case HL_TYPE: return "#59D8F1"; 54 + case HL_TYPE_STRING: return "#30E8AA"; 55 + case HL_TYPE_BOOLEAN: return "#30E8AA"; 56 + case HL_COMMENT: return "#758CA3"; 57 + case HL_FUNCTION_NAME: return "#30E8AA"; 58 + case HL_FUNCTION: return "#30E8AA"; 59 + case HL_ARGUMENT: return "#CCA3F4"; 60 + case HL_PROPERTY: return "#CCA3F4"; 61 + case HL_OPERATOR: return "#8CB2D8"; 62 + case HL_OPTIONAL_CHAIN: return "#8CB2D8"; 63 + case HL_BRACKET: return "#8CB2D8"; 64 + case HL_SEMICOLON: return "#B2CCE5"; 65 + 66 + case HL_KEYWORD_ITALIC: return "italic+#65B2FF"; 67 + case HL_CLASS_NAME: return "bold+#F7B76D"; 68 + case HL_PARENT_CLASS: return "bold+#59D8F1"; 69 + case HL_KEYWORD_EXTENDS: return "italic+#59D8F1"; 70 + 71 + default: return NULL; 72 + } 73 + } 74 + 75 + static inline void ob_write_with_class(hl_outbuf *o, hl_token_class cls, const char *s, size_t n) { 76 + if (n == 0) return; 77 + 78 + const char *var = class_to_crvar(cls); 79 + if (var) { 80 + ob_putc(o, '<'); 81 + ob_puts(o, var); 82 + ob_putc(o, '>'); 83 + hl_outbuf_write_escaped(o, s, n); 84 + ob_write(o, "</>", 3); 85 + } else hl_outbuf_write_escaped(o, s, n); 86 + } 87 + 88 + static bool span_is_template_string(const char *s, size_t n) { 89 + for (size_t i = 0; i < n; i++) { 90 + if (s[i] == '`') return true; 91 + if (s[i] == '$' && i + 1 < n && s[i + 1] == '{') return true; 92 + } 93 + return false; 94 + } 95 + 96 + static void emit_string_literal(hl_outbuf *o, const char *s, size_t n, hl_token_class body_cls) { 97 + if (n == 0) return; 98 + 99 + size_t i = 0; 100 + size_t seg_start = 0; 101 + 102 + while (i < n) { 103 + unsigned char ch = (unsigned char)s[i]; 104 + 105 + if (ch == '\\') { 106 + ob_write_with_class(o, body_cls, s + seg_start, i - seg_start); 107 + size_t esc_len = (i + 1 < n) ? 2 : 1; 108 + ob_write_with_class(o, HL_STRING_ESCAPE, s + i, esc_len); 109 + i += esc_len; 110 + seg_start = i; 111 + continue; 112 + } 113 + 114 + if (ch == '"' || ch == '\'' || ch == '`') { 115 + ob_write_with_class(o, body_cls, s + seg_start, i - seg_start); 116 + ob_write_with_class(o, HL_STRING_DELIMITER, s + i, 1); 117 + i++; 118 + seg_start = i; 119 + continue; 120 + } 121 + 122 + if (ch == '$' && i + 1 < n && s[i + 1] == '{') { 123 + ob_write_with_class(o, body_cls, s + seg_start, i - seg_start); 124 + ob_write_with_class(o, HL_BRACKET, s + i, 1); 125 + ob_write_with_class(o, HL_BRACKET, s + i + 1, 1); 126 + i += 2; 127 + seg_start = i; 128 + continue; 129 + } 130 + 131 + i++; 132 + } 133 + 134 + ob_write_with_class(o, body_cls, s + seg_start, n - seg_start); 135 + } 136 + 137 + static void emit_regex_literal(hl_outbuf *o, const char *s, size_t n) { 138 + if (n == 0) return; 139 + 140 + ob_write_with_class(o, HL_REGEX_DELIMITER, s, 1); 141 + 142 + size_t i = 1; 143 + size_t seg_start = i; 144 + bool in_class = false; 145 + 146 + while (i < n) { 147 + unsigned char ch = (unsigned char)s[i]; 148 + 149 + if (!in_class && ch == '/') { 150 + ob_write_with_class(o, HL_REGEX, s + seg_start, i - seg_start); 151 + ob_write_with_class(o, HL_REGEX_DELIMITER, s + i, 1); 152 + i++; 153 + ob_write_with_class(o, HL_REGEX_DELIMITER, s + i, n - i); 154 + return; 155 + } 156 + 157 + if (ch == '\\') { 158 + ob_write_with_class(o, in_class ? HL_REGEX_CDATA : HL_REGEX, s + seg_start, i - seg_start); 159 + size_t esc_len = (i + 1 < n) ? 2 : 1; 160 + ob_write_with_class(o, HL_REGEX_ESCAPE, s + i, esc_len); 161 + i += esc_len; 162 + seg_start = i; 163 + continue; 164 + } 165 + 166 + if (!in_class && ch == '[') { 167 + ob_write_with_class(o, HL_REGEX, s + seg_start, i - seg_start); 168 + in_class = true; 169 + seg_start = i; 170 + i++; 171 + continue; 172 + } 173 + 174 + if (in_class && ch == ']') { 175 + i++; 176 + ob_write_with_class(o, HL_REGEX_CDATA, s + seg_start, i - seg_start); 177 + in_class = false; 178 + seg_start = i; 179 + continue; 180 + } 181 + 182 + i++; 183 + } 184 + 185 + ob_write_with_class(o, in_class ? HL_REGEX_CDATA : HL_REGEX, s + seg_start, n - seg_start); 186 + } 187 + 188 + static void emit_number_literal(hl_outbuf *o, const char *s, size_t n) { 189 + if (n >= 2 && s[0] == '0' && 190 + (s[1] == 'x' || s[1] == 'X' || 191 + s[1] == 'b' || s[1] == 'B' || 192 + s[1] == 'o' || s[1] == 'O')) { 193 + ob_write_with_class(o, HL_NUMBER_PREFIX, s, 2); 194 + ob_write_with_class(o, HL_NUMBER, s + 2, n - 2); 195 + return; 196 + } 197 + 198 + ob_write_with_class(o, HL_NUMBER, s, n); 199 + } 200 + 201 + void hl_outbuf_init(hl_outbuf *o, char *buf, size_t size) { 202 + o->buf = buf; 203 + o->size = size; 204 + o->pos = 0; 205 + o->overflow = false; 206 + } 207 + 208 + void hl_outbuf_emit_span(hl_outbuf *o, hl_token_class cls, const char *s, size_t n) { 209 + if (cls == HL_STRING) { 210 + hl_token_class body_cls = span_is_template_string(s, n) ? HL_STRING_TEMPLATE : HL_STRING; 211 + emit_string_literal(o, s, n, body_cls); 212 + return; 213 + } 214 + 215 + if (cls == HL_REGEX) { 216 + emit_regex_literal(o, s, n); 217 + return; 218 + } 219 + 220 + if (cls == HL_NUMBER) { 221 + emit_number_literal(o, s, n); 222 + return; 223 + } 224 + 225 + ob_write_with_class(o, cls, s, n); 226 + }
+547
src/highlight/iter.c
··· 1 + #include <string.h> 2 + #include <stdbool.h> 3 + 4 + #include "tokens.h" 5 + #include "highlight.h" 6 + #include "highlight/regex.h" 7 + #include "silver/lexer.h" 8 + 9 + typedef struct { const char *op; int len; hl_token_class cls; } op_entry_t; 10 + 11 + static const op_entry_t operators[] = { 12 + { "===", 3, HL_OPERATOR }, 13 + { "!==", 3, HL_OPERATOR }, 14 + { "...", 3, HL_OPERATOR }, 15 + { "=>", 2, HL_OPERATOR }, 16 + { "==", 2, HL_OPERATOR }, 17 + { "!=", 2, HL_OPERATOR }, 18 + { "<=", 2, HL_OPERATOR }, 19 + { ">=", 2, HL_OPERATOR }, 20 + { "&&", 2, HL_OPERATOR }, 21 + { "||", 2, HL_OPERATOR }, 22 + { "??", 2, HL_OPERATOR }, 23 + { "?.", 2, HL_OPTIONAL_CHAIN }, 24 + }; 25 + 26 + #define OP_COUNT (sizeof(operators) / sizeof(operators[0])) 27 + #define K(s, t) if (len == sizeof(s)-1 && !memcmp(word, s, sizeof(s)-1)) return t 28 + 29 + static hl_token_class lookup_extra_keyword(const char *word, size_t len) { 30 + switch (word[0]) { 31 + case 'a': 32 + K("abstract", HL_TYPE); 33 + K("async", HL_KEYWORD_ITALIC); 34 + break; 35 + case 'b': K("boolean", HL_TYPE_BOOLEAN); break; 36 + case 'd': K("declare", HL_TYPE); break; 37 + case 'e': 38 + K("enum", HL_TYPE); 39 + K("export", HL_KEYWORD_ITALIC); 40 + break; 41 + case 'g': K("global", HL_KEYWORD_ITALIC); break; 42 + case 'i': 43 + K("interface", HL_TYPE); 44 + K("implements", HL_TYPE); 45 + break; 46 + case 'n': 47 + K("namespace", HL_TYPE); 48 + K("never", HL_TYPE); 49 + break; 50 + case 'o': K("object", HL_TYPE); break; 51 + case 'p': 52 + K("package", HL_KEYWORD); 53 + K("private", HL_KEYWORD); 54 + K("protected", HL_KEYWORD); 55 + K("public", HL_KEYWORD); 56 + break; 57 + case 'r': K("readonly", HL_TYPE); break; 58 + case 's': 59 + K("string", HL_TYPE_STRING); 60 + K("symbol", HL_TYPE_STRING); 61 + break; 62 + case 't': K("type", HL_TYPE); break; 63 + case 'u': K("unknown", HL_TYPE); break; 64 + } return HL_NONE; 65 + } 66 + 67 + #undef K 68 + 69 + static hl_token_class tok_to_class(uint8_t tok) { 70 + static const void *dispatch[] = { 71 + [TOK_ASYNC] = &&l_kw_italic, 72 + [TOK_EXPORT] = &&l_kw_italic, 73 + [TOK_THIS] = &&l_kw_italic, 74 + [TOK_GLOBAL_THIS] = &&l_kw_italic, 75 + [TOK_WINDOW] = &&l_kw_italic, 76 + [TOK_DELETE] = &&l_kw_delete, 77 + [TOK_TYPEOF] = &&l_type, 78 + [TOK_INSTANCEOF] = &&l_type, 79 + [TOK_OF] = &&l_type, 80 + [TOK_IN] = &&l_type, 81 + [TOK_AS] = &&l_type, 82 + [TOK_TRUE] = &&l_bool, 83 + [TOK_FALSE] = &&l_bool, 84 + [TOK_NULL] = &&l_null, 85 + [TOK_UNDEF] = &&l_null, 86 + }; 87 + 88 + if (tok <= TOK_IDENTIFIER || tok >= TOK_IDENT_LIKE_END) return HL_NONE; 89 + if (tok < sizeof(dispatch) / sizeof(*dispatch) && dispatch[tok]) goto *dispatch[tok]; 90 + 91 + return HL_KEYWORD; 92 + 93 + l_kw_italic: return HL_KEYWORD_ITALIC; 94 + l_kw_delete: return HL_KEYWORD_DELETE; 95 + l_type: return HL_TYPE; 96 + l_bool: return HL_BOOLEAN; 97 + l_null: return HL_LITERAL_NULL; 98 + } 99 + 100 + void hl_iter_init(hl_iter *it, const char *input, size_t input_len, const highlight_state *state) { 101 + it->input = input; 102 + it->input_len = input_len; 103 + it->pos = 0; 104 + it->state = state ? *state : HL_STATE_INIT; 105 + it->ctx = HL_CTX_NONE; 106 + } 107 + 108 + static hl_context keyword_sets_context(const char *word, size_t len) { 109 + if (len == 8 && memcmp(word, "function", 8) == 0) return HL_CTX_AFTER_FUNCTION; 110 + if (len == 5 && memcmp(word, "class", 5) == 0) return HL_CTX_AFTER_CLASS; 111 + if (len == 7 && memcmp(word, "extends", 7) == 0) return HL_CTX_AFTER_EXTENDS; 112 + return HL_CTX_NONE; 113 + } 114 + 115 + static size_t skip_inline_ws_forward(const char *input, size_t input_len, size_t i) { 116 + while (i < input_len && (input[i] == ' ' || input[i] == '\t' || input[i] == '\n' || input[i] == '\r')) i++; 117 + return i; 118 + } 119 + 120 + static size_t skip_inline_ws_backward(const char *input, size_t i) { 121 + while (i > 0 && (input[i - 1] == ' ' || input[i - 1] == '\t' || input[i - 1] == '\n' || input[i - 1] == '\r')) i--; 122 + return i; 123 + } 124 + 125 + static bool read_prev_word(const char *input, size_t end, size_t *word_start, size_t *word_len) { 126 + size_t i = skip_inline_ws_backward(input, end); 127 + if (i == 0 || !is_ident_continue((unsigned char)input[i - 1])) return false; 128 + 129 + size_t wend = i; 130 + while (i > 0 && is_ident_continue((unsigned char)input[i - 1])) i--; 131 + 132 + *word_start = i; 133 + *word_len = wend - i; 134 + return true; 135 + } 136 + 137 + static bool is_arrow_after(const char *input, size_t input_len, size_t pos) { 138 + size_t i = skip_inline_ws_forward(input, input_len, pos); 139 + return (i + 1 < input_len && input[i] == '=' && input[i + 1] == '>'); 140 + } 141 + 142 + static bool has_function_keyword_before_paren(const char *input, size_t open_paren) { 143 + size_t word_start = 0; 144 + size_t word_len = 0; 145 + 146 + if (!read_prev_word(input, open_paren, &word_start, &word_len)) return false; 147 + if (word_len == 8 && memcmp(input + word_start, "function", 8) == 0) return true; 148 + 149 + if (!read_prev_word(input, word_start, &word_start, &word_len)) return false; 150 + return (word_len == 8 && memcmp(input + word_start, "function", 8) == 0); 151 + } 152 + 153 + static bool is_control_paren_prefix(const char *input, size_t open_paren) { 154 + size_t word_start = 0; 155 + size_t word_len = 0; 156 + if (!read_prev_word(input, open_paren, &word_start, &word_len)) return false; 157 + 158 + #define C(s) (word_len == sizeof(s) - 1 && memcmp(input + word_start, s, sizeof(s) - 1) == 0) 159 + return C("if") || C("for") || C("while") || C("switch") || C("catch") || C("with"); 160 + #undef C 161 + } 162 + 163 + static bool is_likely_function_param_paren( 164 + const char *input, size_t input_len, 165 + size_t open_paren, size_t close_paren 166 + ) { 167 + if (is_arrow_after(input, input_len, close_paren + 1)) return true; 168 + if (has_function_keyword_before_paren(input, open_paren)) return true; 169 + 170 + size_t after = skip_inline_ws_forward(input, input_len, close_paren + 1); 171 + if (after < input_len && input[after] == '{' && !is_control_paren_prefix(input, open_paren)) 172 + return true; 173 + 174 + return false; 175 + } 176 + 177 + static bool find_enclosing_open_paren(const char *input, size_t pos, size_t *open_paren) { 178 + size_t depth = 0; 179 + size_t i = pos; 180 + 181 + while (i > 0) { 182 + i--; 183 + unsigned char ch = (unsigned char)input[i]; 184 + if (ch == ')') { 185 + depth++; 186 + continue; 187 + } 188 + if (ch == '(') { 189 + if (depth == 0) { 190 + *open_paren = i; 191 + return true; 192 + } 193 + depth--; 194 + } 195 + } 196 + return false; 197 + } 198 + 199 + static bool find_matching_close_paren(const char *input, size_t input_len, size_t open_paren, size_t *close_paren) { 200 + size_t depth = 0; 201 + for (size_t i = open_paren + 1; i < input_len; i++) { 202 + unsigned char ch = (unsigned char)input[i]; 203 + if (ch == '(') { 204 + depth++; 205 + continue; 206 + } 207 + if (ch == ')') { 208 + if (depth == 0) { 209 + *close_paren = i; 210 + return true; 211 + } 212 + depth--; 213 + } 214 + } 215 + return false; 216 + } 217 + 218 + static bool is_function_argument_identifier(const char *input, size_t input_len, size_t start, size_t end) { 219 + if (is_arrow_after(input, input_len, end)) { 220 + size_t left = skip_inline_ws_backward(input, start); 221 + if (left > 0 && input[left - 1] == '.') return false; 222 + return true; 223 + } 224 + 225 + size_t prev = skip_inline_ws_backward(input, start); 226 + if (prev == 0) return false; 227 + unsigned char prev_ch = (unsigned char)input[prev - 1]; 228 + if (!(prev_ch == '(' || prev_ch == ',' || prev_ch == '{' || prev_ch == '[' || prev_ch == ':')) 229 + return false; 230 + 231 + size_t open_paren = 0; 232 + if (!find_enclosing_open_paren(input, start, &open_paren)) return false; 233 + 234 + size_t close_paren = 0; 235 + if (!find_matching_close_paren(input, input_len, open_paren, &close_paren)) return false; 236 + return is_likely_function_param_paren(input, input_len, open_paren, close_paren); 237 + } 238 + 239 + bool hl_iter_next(hl_iter *it, hl_span *out) { 240 + const char *input = it->input; 241 + size_t input_len = it->input_len; 242 + size_t i = it->pos; 243 + 244 + if (i >= input_len) return false; 245 + unsigned char c = (unsigned char)input[i]; 246 + 247 + if (it->state.mode == HL_STATE_BLOCK_COMMENT) { 248 + size_t start = i; 249 + while (i < input_len) { 250 + if (input[i] == '*' && i + 1 < input_len && input[i + 1] == '/') { 251 + i += 2; 252 + it->state.mode = HL_STATE_NORMAL; 253 + break; 254 + } 255 + i++; 256 + } 257 + *out = (hl_span){ start, i - start, HL_COMMENT }; 258 + it->pos = i; 259 + return true; 260 + } 261 + 262 + if (it->state.mode == HL_STATE_STRING_SINGLE || it->state.mode == HL_STATE_STRING_DOUBLE) { 263 + char quote = (it->state.mode == HL_STATE_STRING_SINGLE) ? '\'' : '"'; 264 + size_t start = i; 265 + while (i < input_len) { 266 + if (input[i] == '\\' && i + 1 < input_len) { i += 2; continue; } 267 + if (input[i] == quote) { 268 + i++; 269 + it->state.mode = (it->state.template_depth > 0) ? HL_STATE_TEMPLATE_EXPR : HL_STATE_NORMAL; 270 + break; 271 + } 272 + i++; 273 + } 274 + *out = (hl_span){ start, i - start, HL_STRING }; 275 + it->pos = i; 276 + return true; 277 + } 278 + 279 + if (it->state.mode == HL_STATE_TEMPLATE) { 280 + size_t start = i; 281 + while (i < input_len) { 282 + if (input[i] == '\\' && i + 1 < input_len) { i += 2; continue; } 283 + if (input[i] == '$' && i + 1 < input_len && input[i + 1] == '{') { 284 + i += 2; 285 + it->state.mode = HL_STATE_TEMPLATE_EXPR; 286 + it->state.template_depth++; 287 + break; 288 + } 289 + if (input[i] == '`') { 290 + i++; 291 + it->state.mode = (it->state.template_depth > 0) ? HL_STATE_TEMPLATE_EXPR : HL_STATE_NORMAL; 292 + break; 293 + } 294 + i++; 295 + } 296 + *out = (hl_span){ start, i - start, HL_STRING }; 297 + it->pos = i; 298 + return true; 299 + } 300 + 301 + if (it->state.mode == HL_STATE_TEMPLATE_EXPR && c == '}') { 302 + it->state.template_depth--; 303 + if (it->state.template_depth <= 0) { 304 + it->state.mode = HL_STATE_TEMPLATE; 305 + it->state.template_depth = 0; 306 + *out = (hl_span){ i, 1, HL_BRACKET }; 307 + it->pos = i + 1; 308 + return true; 309 + } 310 + } 311 + if (it->state.mode == HL_STATE_TEMPLATE_EXPR && c == '{') { 312 + it->state.template_depth++; 313 + *out = (hl_span){ i, 1, HL_BRACKET }; 314 + it->pos = i + 1; 315 + return true; 316 + } 317 + 318 + if (c == '/' && i + 1 < input_len && input[i + 1] == '/') { 319 + it->ctx = HL_CTX_NONE; 320 + *out = (hl_span){ i, input_len - i, HL_COMMENT }; 321 + it->pos = input_len; 322 + return true; 323 + } 324 + 325 + if (c == '/' && i + 1 < input_len && input[i + 1] == '*') { 326 + it->ctx = HL_CTX_NONE; 327 + size_t start = i; 328 + i += 2; 329 + while (i + 1 < input_len && !(input[i] == '*' && input[i + 1] == '/')) i++; 330 + if (i + 1 < input_len) { 331 + i += 2; 332 + } else { 333 + i = input_len; 334 + it->state.mode = HL_STATE_BLOCK_COMMENT; 335 + } 336 + *out = (hl_span){ start, i - start, HL_COMMENT }; 337 + it->pos = i; 338 + return true; 339 + } 340 + 341 + if (c == '/') { 342 + size_t regex_end = 0; 343 + if (js_scan_regex_literal(input, input_len, i, &regex_end)) { 344 + it->ctx = HL_CTX_NONE; 345 + *out = (hl_span){ i, regex_end - i, HL_REGEX }; 346 + it->pos = regex_end; 347 + return true; 348 + } 349 + } 350 + 351 + if (c == '\'' || c == '"') { 352 + it->ctx = HL_CTX_NONE; 353 + size_t start = i; 354 + it->state.mode = (c == '\'') ? HL_STATE_STRING_SINGLE : HL_STATE_STRING_DOUBLE; 355 + i++; 356 + while (i < input_len) { 357 + if (input[i] == '\\' && i + 1 < input_len) { i += 2; continue; } 358 + if ((unsigned char)input[i] == c) { 359 + i++; 360 + it->state.mode = (it->state.template_depth > 0) ? HL_STATE_TEMPLATE_EXPR : HL_STATE_NORMAL; 361 + break; 362 + } 363 + i++; 364 + } 365 + *out = (hl_span){ start, i - start, HL_STRING }; 366 + it->pos = i; 367 + return true; 368 + } 369 + 370 + if (c == '`') { 371 + it->ctx = HL_CTX_NONE; 372 + it->state.mode = HL_STATE_TEMPLATE; 373 + *out = (hl_span){ i, 1, HL_STRING }; 374 + it->pos = i + 1; 375 + return true; 376 + } 377 + 378 + if (c == ';') { 379 + it->ctx = HL_CTX_NONE; 380 + *out = (hl_span){ i, 1, HL_SEMICOLON }; 381 + it->pos = i + 1; 382 + return true; 383 + } 384 + 385 + if (IS_DIGIT(c) || (c == '.' && i + 1 < input_len && IS_DIGIT(input[i + 1]))) { 386 + it->ctx = HL_CTX_NONE; 387 + size_t start = i; 388 + if (c == '0' && i + 1 < input_len) { 389 + unsigned char next = (unsigned char)input[i + 1]; 390 + if (next == 'x' || next == 'X') { 391 + i += 2; 392 + while (i < input_len && (IS_XDIGIT(input[i]) || input[i] == '_')) i++; 393 + goto num_done; 394 + } else if (next == 'b' || next == 'B') { 395 + i += 2; 396 + while (i < input_len && (input[i] == '0' || input[i] == '1' || input[i] == '_')) i++; 397 + goto num_done; 398 + } else if (next == 'o' || next == 'O') { 399 + i += 2; 400 + while (i < input_len && (IS_OCTAL(input[i]) || input[i] == '_')) i++; 401 + goto num_done; 402 + } 403 + } 404 + while (i < input_len && (IS_DIGIT(input[i]) || input[i] == '_')) i++; 405 + if (i < input_len && input[i] == '.') { 406 + i++; 407 + while (i < input_len && (IS_DIGIT(input[i]) || input[i] == '_')) i++; 408 + } 409 + if (i < input_len && (input[i] == 'e' || input[i] == 'E')) { 410 + i++; 411 + if (i < input_len && (input[i] == '+' || input[i] == '-')) i++; 412 + while (i < input_len && (IS_DIGIT(input[i]) || input[i] == '_')) i++; 413 + } 414 + num_done: 415 + if (i < input_len && input[i] == 'n') i++; 416 + *out = (hl_span){ start, i - start, HL_NUMBER }; 417 + it->pos = i; 418 + return true; 419 + } 420 + 421 + for (int k = 0; k < (int)OP_COUNT; k++) { 422 + int oplen = operators[k].len; 423 + if (i + (size_t)oplen <= input_len && 424 + memcmp(input + i, operators[k].op, (size_t)oplen) == 0) { 425 + it->ctx = HL_CTX_NONE; 426 + *out = (hl_span){ i, (size_t)oplen, operators[k].cls }; 427 + it->pos = i + (size_t)oplen; 428 + return true; 429 + } 430 + } 431 + 432 + if (c == '#' && i + 1 < input_len && is_ident_begin((unsigned char)input[i + 1])) { 433 + size_t start = i; 434 + i += 2; 435 + while (i < input_len && is_ident_continue((unsigned char)input[i])) i++; 436 + it->ctx = HL_CTX_NONE; 437 + *out = (hl_span){ start, i - start, HL_PROPERTY }; 438 + it->pos = i; 439 + return true; 440 + } 441 + 442 + if (is_ident_begin(c)) { 443 + size_t start = i; 444 + i++; 445 + while (i < input_len && is_ident_continue(input[i])) i++; 446 + size_t word_len = i - start; 447 + const char *word = input + start; 448 + 449 + bool is_member_access = (start > 0 && input[start - 1] == '.' && 450 + (start < 2 || input[start - 2] != '.')); 451 + bool is_method = false; 452 + if (is_member_access) { 453 + size_t peek = i; 454 + while (peek < input_len && input[peek] == ' ') peek++; 455 + if (peek < input_len && input[peek] == '(') is_method = true; 456 + } 457 + size_t after_word = i; 458 + while (after_word < input_len && input[after_word] == ' ') after_word++; 459 + bool is_call = (after_word < input_len && input[after_word] == '('); 460 + 461 + hl_token_class cls = HL_NONE; 462 + bool is_console = (word_len == 7 && memcmp(word, "console", 7) == 0); 463 + 464 + if (is_console) { 465 + cls = HL_PROPERTY; 466 + } else if (is_function_argument_identifier(input, input_len, start, i)) { 467 + cls = HL_ARGUMENT; 468 + } else if (is_method) { 469 + cls = HL_FUNCTION; 470 + } else if (is_member_access) { 471 + cls = HL_PROPERTY; 472 + } else if (it->ctx == HL_CTX_AFTER_FUNCTION) { 473 + cls = HL_FUNCTION_NAME; 474 + it->ctx = HL_CTX_NONE; 475 + } else if (it->ctx == HL_CTX_AFTER_CLASS) { 476 + cls = HL_CLASS_NAME; 477 + it->ctx = HL_CTX_NONE; 478 + } else if (it->ctx == HL_CTX_AFTER_EXTENDS) { 479 + cls = HL_PARENT_CLASS; 480 + it->ctx = HL_CTX_NONE; 481 + } else { 482 + cls = lookup_extra_keyword(word, word_len); 483 + 484 + if (cls == HL_NONE) { 485 + if ((word_len == 3 && memcmp(word, "NaN", 3) == 0) || 486 + (word_len == 8 && memcmp(word, "Infinity", 8) == 0)) { 487 + cls = HL_NUMBER; 488 + } 489 + else if (word_len == 7 && memcmp(word, "extends", 7) == 0) { 490 + cls = HL_KEYWORD_EXTENDS; 491 + } else { 492 + cls = tok_to_class(sv_parsekeyword(word, word_len)); 493 + } 494 + } 495 + 496 + if (cls == HL_NONE) { 497 + size_t peek = i; 498 + while (peek < input_len && input[peek] == ' ') peek++; 499 + if (peek < input_len && input[peek] == ':' && 500 + (peek + 1 >= input_len || input[peek + 1] != ':')) 501 + cls = HL_PROPERTY; 502 + } 503 + 504 + if (cls == HL_NONE && word[0] >= 'A' && word[0] <= 'Z') { 505 + cls = HL_TYPE; 506 + } 507 + 508 + if (cls == HL_NONE && is_call) { 509 + cls = HL_FUNCTION; 510 + } 511 + 512 + hl_context next_ctx = keyword_sets_context(word, word_len); 513 + if (next_ctx != HL_CTX_NONE) it->ctx = next_ctx; 514 + } 515 + 516 + *out = (hl_span){ start, word_len, cls }; 517 + it->pos = i; 518 + return true; 519 + } 520 + 521 + if (c == '<' || c == '>' || c == '=') { 522 + it->ctx = HL_CTX_NONE; 523 + *out = (hl_span){ i, 1, HL_OPERATOR }; 524 + it->pos = i + 1; 525 + return true; 526 + } 527 + 528 + if (c == '(' || c == ')' || c == '[' || c == ']' || c == '{' || c == '}') { 529 + it->ctx = HL_CTX_NONE; 530 + *out = (hl_span){ i, 1, HL_BRACKET }; 531 + it->pos = i + 1; 532 + return true; 533 + } 534 + 535 + if (c == ' ' || c == '\t') { 536 + size_t start = i; 537 + while (i < input_len && (input[i] == ' ' || input[i] == '\t')) i++; 538 + *out = (hl_span){ start, i - start, HL_NONE }; 539 + it->pos = i; 540 + return true; 541 + } 542 + 543 + it->ctx = HL_CTX_NONE; 544 + *out = (hl_span){ i, 1, HL_NONE }; 545 + it->pos = i + 1; 546 + return true; 547 + }
+1 -1
src/repl.c
··· 30 30 31 31 #include <crprintf.h> 32 32 #include "highlight.h" 33 - #include "regex_scan.h" 33 + #include "highlight/regex.h" 34 34 35 35 #define MAX_HISTORY 512 36 36 #define MAX_LINE_LENGTH 4096