MIRROR: javascript for 馃悳's, a tiny runtime with big ambitions
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at master 101 lines 3.6 kB view raw
1#include "utf8.h" 2#include "escape.h" 3 4static inline size_t decode_hex_escape(const uint8_t *in, size_t pos, uint8_t *out, size_t *out_pos) { 5 uint32_t cp = (unhex(in[pos + 2]) << 4U) | unhex(in[pos + 3]); 6 *out_pos += utf8_encode(cp, (char *)&out[*out_pos]); 7 return 2; 8} 9 10static size_t decode_octal_escape(const uint8_t *in, size_t pos, uint8_t *out, size_t *out_pos) { 11 uint8_t c = in[pos + 1]; 12 size_t extra = 0; 13 int val = c - '0'; 14 15 if (in[pos + 2] >= '0' && in[pos + 2] <= '7') { 16 val = val * 8 + (in[pos + 2] - '0'); extra++; 17 if (in[pos + 3] >= '0' && in[pos + 3] <= '7' && val * 8 + (in[pos + 3] - '0') <= 255) { 18 val = val * 8 + (in[pos + 3] - '0'); extra++; 19 } 20 } 21 22 *out_pos += utf8_encode((uint32_t)val, (char *)&out[*out_pos]); 23 return extra; 24} 25 26static size_t decode_unicode_braced(const uint8_t *in, size_t pos, size_t end, uint8_t *out, size_t *out_pos) { 27 uint32_t cp = 0; 28 size_t i = pos + 3; 29 30 while (i < end && is_xdigit(in[i])) { cp = (cp << 4) | unhex(in[i]); i++; } 31 if (i < end && in[i] == '}') { 32 *out_pos += utf8_encode(cp, (char *)&out[*out_pos]); 33 return i - pos - 1; 34 } 35 36 out[(*out_pos)++] = 'u'; 37 return 0; 38} 39 40static size_t decode_unicode_fixed(const uint8_t *in, size_t pos, size_t end, uint8_t *out, size_t *out_pos) { 41 uint32_t cp = 42 (unhex(in[pos + 2]) << 12U) | (unhex(in[pos + 3]) << 8U) | 43 (unhex(in[pos + 4]) << 4U) | unhex(in[pos + 5]); 44 45 if (cp >= 0xD800 && cp <= 0xDBFF && pos + 11 < end && 46 in[pos + 6] == '\\' && in[pos + 7] == 'u' && 47 is_xdigit(in[pos + 8]) && is_xdigit(in[pos + 9]) && 48 is_xdigit(in[pos + 10]) && is_xdigit(in[pos + 11])) { 49 uint32_t lo = 50 (unhex(in[pos + 8]) << 12U) | (unhex(in[pos + 9]) << 8U) | 51 (unhex(in[pos + 10]) << 4U) | unhex(in[pos + 11]); 52 if (lo >= 0xDC00 && lo <= 0xDFFF) { 53 cp = 0x10000 + ((cp - 0xD800) << 10) + (lo - 0xDC00); 54 *out_pos += utf8_encode(cp, (char *)&out[*out_pos]); 55 return 10; 56 } 57 } 58 59 *out_pos += utf8_encode(cp, (char *)&out[*out_pos]); 60 return 4; 61} 62 63size_t decode_escape(const uint8_t *in, size_t pos, size_t end, uint8_t *out, size_t *out_pos, uint8_t quote) { 64 uint8_t c = in[pos + 1]; 65 size_t advance = 0; 66 67 switch (c) { 68 case 'n': out[(*out_pos)++] = '\n'; break; 69 case 't': out[(*out_pos)++] = '\t'; break; 70 case 'r': out[(*out_pos)++] = '\r'; break; 71 case 'v': out[(*out_pos)++] = '\v'; break; 72 case 'f': out[(*out_pos)++] = '\f'; break; 73 case 'b': out[(*out_pos)++] = '\b'; break; 74 case '\\': out[(*out_pos)++] = '\\'; break; 75 case '0': 76 if (!(in[pos + 2] >= '0' && in[pos + 2] <= '7')) { out[(*out_pos)++] = '\0'; break; } 77 __attribute__((fallthrough)); 78 case '1': case '2': case '3': case '4': case '5': case '6': case '7': 79 advance = decode_octal_escape(in, pos, out, out_pos); 80 break; 81 case 'x': 82 if (pos + 3 < end && is_xdigit(in[pos + 2]) && is_xdigit(in[pos + 3])) { 83 advance = decode_hex_escape(in, pos, out, out_pos); 84 } else out[(*out_pos)++] = c; 85 break; 86 case 'u': 87 if (pos + 2 < end && in[pos + 2] == '{') { 88 advance = decode_unicode_braced(in, pos, end, out, out_pos); 89 } else if ( 90 pos + 5 < end && is_xdigit(in[pos + 2]) && is_xdigit(in[pos + 3]) && 91 is_xdigit(in[pos + 4]) && is_xdigit(in[pos + 5]) 92 ) advance = decode_unicode_fixed(in, pos, end, out, out_pos); 93 else out[(*out_pos)++] = c; 94 break; 95 default: 96 out[(*out_pos)++] = (c == quote) ? quote : c; 97 break; 98 } 99 100 return advance; 101}