MIRROR: javascript for 馃悳's, a tiny runtime with big ambitions
1#include "utf8.h"
2#include "escape.h"
3
4static inline size_t decode_hex_escape(const uint8_t *in, size_t pos, uint8_t *out, size_t *out_pos) {
5 uint32_t cp = (unhex(in[pos + 2]) << 4U) | unhex(in[pos + 3]);
6 *out_pos += utf8_encode(cp, (char *)&out[*out_pos]);
7 return 2;
8}
9
10static size_t decode_octal_escape(const uint8_t *in, size_t pos, uint8_t *out, size_t *out_pos) {
11 uint8_t c = in[pos + 1];
12 size_t extra = 0;
13 int val = c - '0';
14
15 if (in[pos + 2] >= '0' && in[pos + 2] <= '7') {
16 val = val * 8 + (in[pos + 2] - '0'); extra++;
17 if (in[pos + 3] >= '0' && in[pos + 3] <= '7' && val * 8 + (in[pos + 3] - '0') <= 255) {
18 val = val * 8 + (in[pos + 3] - '0'); extra++;
19 }
20 }
21
22 *out_pos += utf8_encode((uint32_t)val, (char *)&out[*out_pos]);
23 return extra;
24}
25
26static size_t decode_unicode_braced(const uint8_t *in, size_t pos, size_t end, uint8_t *out, size_t *out_pos) {
27 uint32_t cp = 0;
28 size_t i = pos + 3;
29
30 while (i < end && is_xdigit(in[i])) { cp = (cp << 4) | unhex(in[i]); i++; }
31 if (i < end && in[i] == '}') {
32 *out_pos += utf8_encode(cp, (char *)&out[*out_pos]);
33 return i - pos - 1;
34 }
35
36 out[(*out_pos)++] = 'u';
37 return 0;
38}
39
40static size_t decode_unicode_fixed(const uint8_t *in, size_t pos, size_t end, uint8_t *out, size_t *out_pos) {
41 uint32_t cp =
42 (unhex(in[pos + 2]) << 12U) | (unhex(in[pos + 3]) << 8U) |
43 (unhex(in[pos + 4]) << 4U) | unhex(in[pos + 5]);
44
45 if (cp >= 0xD800 && cp <= 0xDBFF && pos + 11 < end &&
46 in[pos + 6] == '\\' && in[pos + 7] == 'u' &&
47 is_xdigit(in[pos + 8]) && is_xdigit(in[pos + 9]) &&
48 is_xdigit(in[pos + 10]) && is_xdigit(in[pos + 11])) {
49 uint32_t lo =
50 (unhex(in[pos + 8]) << 12U) | (unhex(in[pos + 9]) << 8U) |
51 (unhex(in[pos + 10]) << 4U) | unhex(in[pos + 11]);
52 if (lo >= 0xDC00 && lo <= 0xDFFF) {
53 cp = 0x10000 + ((cp - 0xD800) << 10) + (lo - 0xDC00);
54 *out_pos += utf8_encode(cp, (char *)&out[*out_pos]);
55 return 10;
56 }
57 }
58
59 *out_pos += utf8_encode(cp, (char *)&out[*out_pos]);
60 return 4;
61}
62
63size_t decode_escape(const uint8_t *in, size_t pos, size_t end, uint8_t *out, size_t *out_pos, uint8_t quote) {
64 uint8_t c = in[pos + 1];
65 size_t advance = 0;
66
67 switch (c) {
68 case 'n': out[(*out_pos)++] = '\n'; break;
69 case 't': out[(*out_pos)++] = '\t'; break;
70 case 'r': out[(*out_pos)++] = '\r'; break;
71 case 'v': out[(*out_pos)++] = '\v'; break;
72 case 'f': out[(*out_pos)++] = '\f'; break;
73 case 'b': out[(*out_pos)++] = '\b'; break;
74 case '\\': out[(*out_pos)++] = '\\'; break;
75 case '0':
76 if (!(in[pos + 2] >= '0' && in[pos + 2] <= '7')) { out[(*out_pos)++] = '\0'; break; }
77 __attribute__((fallthrough));
78 case '1': case '2': case '3': case '4': case '5': case '6': case '7':
79 advance = decode_octal_escape(in, pos, out, out_pos);
80 break;
81 case 'x':
82 if (pos + 3 < end && is_xdigit(in[pos + 2]) && is_xdigit(in[pos + 3])) {
83 advance = decode_hex_escape(in, pos, out, out_pos);
84 } else out[(*out_pos)++] = c;
85 break;
86 case 'u':
87 if (pos + 2 < end && in[pos + 2] == '{') {
88 advance = decode_unicode_braced(in, pos, end, out, out_pos);
89 } else if (
90 pos + 5 < end && is_xdigit(in[pos + 2]) && is_xdigit(in[pos + 3]) &&
91 is_xdigit(in[pos + 4]) && is_xdigit(in[pos + 5])
92 ) advance = decode_unicode_fixed(in, pos, end, out, out_pos);
93 else out[(*out_pos)++] = c;
94 break;
95 default:
96 out[(*out_pos)++] = (c == quote) ? quote : c;
97 break;
98 }
99
100 return advance;
101}