MIRROR: javascript for 馃悳's, a tiny runtime with big ambitions
1#include <stdlib.h>
2#include <stdio.h>
3#include <string.h>
4
5#include "ant.h"
6#include "errors.h"
7#include "runtime.h"
8#include "utf8.h"
9#include "utils.h"
10#include "modules/uri.h"
11
12static const unsigned char uri_unreserved[256] = {
13 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
14 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
15 0,1,0,0,0,0,0,1,1,1,1,0,0,1,1,0,
16 1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,
17 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
18 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1,
19 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
20 1,1,1,1,1,1,1,1,1,1,1,0,0,0,1,0,
21 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
22 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
23 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
24 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
25 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
26 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
27 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
28 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
29};
30
31static const unsigned char uri_reserved[256] = {
32 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
33 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
34 0,0,0,1,1,0,1,0,0,0,0,1,1,0,0,1,
35 0,0,0,0,0,0,0,0,0,0,1,1,0,1,0,1,
36 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
37 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
38 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
39 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
40 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
41 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
42 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
43 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
44 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
45 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
46 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
47 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
48};
49
50#define is_uri_unreserved(c) (uri_unreserved[(unsigned char)(c)])
51#define is_uri_reserved(c) (uri_reserved[(unsigned char)(c)])
52
53static int is_valid_continuation(unsigned char c) {
54 return (c & 0xC0) == 0x80;
55}
56
57static int is_lone_surrogate(const unsigned char *str, int seq_len) {
58 if (seq_len != 3) return 0;
59 if (str[0] != 0xED) return 0;
60 return (str[1] >= 0xA0 && str[1] <= 0xBF);
61}
62
63static int decode_escape_sequence(const char *str, size_t len, size_t *pos, unsigned char *out_byte) {
64 if (*pos + 2 >= len) return -1;
65 if (str[*pos] != '%') return -1;
66
67 int high = hex_digit(str[*pos + 1]);
68 int low = hex_digit(str[*pos + 2]);
69 if (high < 0 || low < 0) return -1;
70
71 *out_byte = (unsigned char)((high << 4) | low);
72 *pos += 3;
73 return 0;
74}
75
76// encodeURIComponent()
77ant_value_t js_encodeURIComponent(ant_t *js, ant_value_t *args, int nargs) {
78 ant_value_t result;
79 char *out = NULL;
80
81 if (nargs < 1) return js_mkstr(js, "undefined", 9);
82
83 char *str = js_getstr(js, args[0], NULL);
84 if (!str) return js_mkstr(js, "", 0);
85
86 size_t len = strlen(str);
87 size_t out_cap = len * 12 + 1;
88 out = malloc(out_cap);
89 if (!out) return js_mkerr(js, "out of memory");
90
91 size_t out_len = 0;
92 size_t i = 0;
93
94 while (i < len) {
95 unsigned char c = (unsigned char)str[i];
96
97 if (is_uri_unreserved(c)) {
98 out[out_len++] = (char)c;
99 i++;
100 continue;
101 }
102
103 int seq_len = utf8_sequence_length(c);
104 if (seq_len < 0) goto malformed;
105 if (i + seq_len > len) goto malformed;
106
107 for (int j = 1; j < seq_len; j++) {
108 if (!is_valid_continuation((unsigned char)str[i + j])) goto malformed;
109 }
110
111 if (is_lone_surrogate((unsigned char *)&str[i], seq_len)) goto malformed;
112
113 for (int j = 0; j < seq_len; j++) {
114 out_len += (size_t)snprintf(out + out_len, out_cap - out_len, "%%%02X", (unsigned char)str[i + j]);
115 }
116 i += seq_len;
117 }
118
119 out[out_len] = '\0';
120 result = js_mkstr(js, out, out_len);
121 free(out);
122 return result;
123
124malformed:
125 free(out);
126 return js_mkerr_typed(js, JS_ERR_URI, "URI malformed");
127}
128
129// encodeURI()
130ant_value_t js_encodeURI(ant_t *js, ant_value_t *args, int nargs) {
131 ant_value_t result;
132 char *out = NULL;
133
134 if (nargs < 1) return js_mkstr(js, "undefined", 9);
135
136 char *str = js_getstr(js, args[0], NULL);
137 if (!str) return js_mkstr(js, "", 0);
138
139 size_t len = strlen(str);
140 size_t out_cap = len * 12 + 1;
141 out = malloc(out_cap);
142 if (!out) return js_mkerr(js, "out of memory");
143
144 size_t out_len = 0;
145 size_t i = 0;
146
147 while (i < len) {
148 unsigned char c = (unsigned char)str[i];
149
150 if (is_uri_unreserved(c) || is_uri_reserved(c)) {
151 out[out_len++] = (char)c;
152 i++;
153 continue;
154 }
155
156 int seq_len = utf8_sequence_length(c);
157 if (seq_len < 0) goto malformed;
158 if (i + seq_len > len) goto malformed;
159
160 for (int j = 1; j < seq_len; j++) {
161 if (!is_valid_continuation((unsigned char)str[i + j])) goto malformed;
162 }
163
164 if (is_lone_surrogate((unsigned char *)&str[i], seq_len)) goto malformed;
165
166 for (int j = 0; j < seq_len; j++) {
167 out_len += (size_t)snprintf(out + out_len, out_cap - out_len, "%%%02X", (unsigned char)str[i + j]);
168 }
169 i += seq_len;
170 }
171
172 out[out_len] = '\0';
173 result = js_mkstr(js, out, out_len);
174 free(out);
175 return result;
176
177malformed:
178 free(out);
179 return js_mkerr_typed(js, JS_ERR_URI, "URI malformed");
180}
181
182// decodeURIComponent()
183ant_value_t js_decodeURIComponent(ant_t *js, ant_value_t *args, int nargs) {
184 ant_value_t result;
185 char *out = NULL;
186
187 if (nargs < 1) return js_mkstr(js, "undefined", 9);
188
189 char *str = js_getstr(js, args[0], NULL);
190 if (!str) return js_mkstr(js, "", 0);
191
192 size_t len = strlen(str);
193 out = malloc(len + 1);
194 if (!out) return js_mkerr(js, "out of memory");
195
196 size_t out_len = 0;
197 size_t i = 0;
198
199 while (i < len) {
200 if (str[i] != '%') {
201 out[out_len++] = str[i++];
202 continue;
203 }
204
205 unsigned char first_byte;
206 if (decode_escape_sequence(str, len, &i, &first_byte) < 0) goto malformed;
207
208 int seq_len = utf8_sequence_length(first_byte);
209 if (seq_len < 0) goto malformed;
210
211 out[out_len++] = (char)first_byte;
212
213 for (int j = 1; j < seq_len; j++) {
214 unsigned char cont_byte;
215 if (decode_escape_sequence(str, len, &i, &cont_byte) < 0) goto malformed;
216 if (!is_valid_continuation(cont_byte)) goto malformed;
217 out[out_len++] = (char)cont_byte;
218 }
219 }
220
221 out[out_len] = '\0';
222 result = js_mkstr(js, out, out_len);
223 free(out);
224 return result;
225
226malformed:
227 free(out);
228 return js_mkerr_typed(js, JS_ERR_URI, "URI malformed");
229}
230
231// decodeURI()
232ant_value_t js_decodeURI(ant_t *js, ant_value_t *args, int nargs) {
233 ant_value_t result;
234 char *out = NULL;
235
236 if (nargs < 1) return js_mkstr(js, "undefined", 9);
237
238 char *str = js_getstr(js, args[0], NULL);
239 if (!str) return js_mkstr(js, "", 0);
240
241 size_t len = strlen(str);
242 out = malloc(len + 1);
243 if (!out) return js_mkerr(js, "out of memory");
244
245 size_t out_len = 0;
246 size_t i = 0;
247
248 while (i < len) {
249 if (str[i] != '%') {
250 out[out_len++] = str[i++];
251 continue;
252 }
253
254 if (i + 2 >= len) goto malformed;
255
256 int high = hex_digit(str[i + 1]);
257 int low = hex_digit(str[i + 2]);
258 if (high < 0 || low < 0) goto malformed;
259
260 unsigned char first_byte = (unsigned char)((high << 4) | low);
261
262 if (first_byte < 128 && is_uri_reserved((char)first_byte)) {
263 out[out_len++] = str[i++];
264 out[out_len++] = str[i++];
265 out[out_len++] = str[i++];
266 continue;
267 }
268
269 i += 3;
270
271 int seq_len = utf8_sequence_length(first_byte);
272 if (seq_len < 0) goto malformed;
273
274 out[out_len++] = (char)first_byte;
275
276 for (int j = 1; j < seq_len; j++) {
277 unsigned char cont_byte;
278 if (decode_escape_sequence(str, len, &i, &cont_byte) < 0) goto malformed;
279 if (!is_valid_continuation(cont_byte)) goto malformed;
280 out[out_len++] = (char)cont_byte;
281 }
282 }
283
284 out[out_len] = '\0';
285 result = js_mkstr(js, out, out_len);
286 free(out);
287 return result;
288
289malformed:
290 free(out);
291 return js_mkerr_typed(js, JS_ERR_URI, "URI malformed");
292}
293
294static int is_escape_unreserved(unsigned char c) {
295 return (c >= 'A' && c <= 'Z') ||
296 (c >= 'a' && c <= 'z') ||
297 (c >= '0' && c <= '9') ||
298 c == '@' || c == '*' || c == '_' || c == '+' ||
299 c == '-' || c == '.' || c == '/';
300}
301
302static ant_value_t js_escape(ant_t *js, ant_value_t *args, int nargs) {
303 if (nargs < 1) return js_mkstr(js, "undefined", 9);
304
305 char *str = js_getstr(js, args[0], NULL);
306 if (!str) return js_mkstr(js, "undefined", 9);
307
308 size_t len = strlen(str);
309 size_t out_cap = len * 6 + 1;
310 char *out = malloc(out_cap);
311 if (!out) return js_mkerr(js, "out of memory");
312
313 size_t out_len = 0;
314
315 for (size_t i = 0; i < len; i++) {
316 unsigned char c = (unsigned char)str[i];
317
318 if (is_escape_unreserved(c)) {
319 out[out_len++] = (char)c;
320 } else out_len += (size_t)snprintf(out + out_len, out_cap - out_len, "%%%02X", c);
321 }
322
323 out[out_len] = '\0';
324 ant_value_t result = js_mkstr(js, out, out_len);
325 free(out);
326 return result;
327}
328
329static ant_value_t js_unescape(ant_t *js, ant_value_t *args, int nargs) {
330 if (nargs < 1) return js_mkstr(js, "undefined", 9);
331
332 char *str = js_getstr(js, args[0], NULL);
333 if (!str) return js_mkstr(js, "undefined", 9);
334
335 size_t len = strlen(str);
336 char *out = malloc(len + 1);
337 if (!out) return js_mkerr(js, "out of memory");
338
339 size_t out_len = 0;
340 size_t i = 0;
341
342 while (i < len) {
343 if (str[i] == '%' && i + 2 < len) {
344 int high = hex_digit(str[i + 1]);
345 int low = hex_digit(str[i + 2]);
346 if (high >= 0 && low >= 0) {
347 out[out_len++] = (char)((high << 4) | low);
348 i += 3;
349 continue;
350 }
351 }
352 out[out_len++] = str[i++];
353 }
354
355 out[out_len] = '\0';
356 ant_value_t result = js_mkstr(js, out, out_len);
357 free(out);
358 return result;
359}
360
361void init_uri_module(void) {
362 ant_t *js = rt->js;
363 ant_value_t glob = js_glob(js);
364
365 js_set(js, glob, "encodeURI", js_mkfun(js_encodeURI));
366 js_set(js, glob, "encodeURIComponent", js_mkfun(js_encodeURIComponent));
367 js_set(js, glob, "decodeURI", js_mkfun(js_decodeURI));
368 js_set(js, glob, "decodeURIComponent", js_mkfun(js_decodeURIComponent));
369 js_set(js, glob, "escape", js_mkfun(js_escape));
370 js_set(js, glob, "unescape", js_mkfun(js_unescape));
371}