MIRROR: javascript for ๐Ÿœ's, a tiny runtime with big ambitions
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

add utf8 parsing for json

+249 -85
+2 -1
include/utf8.h
··· 18 18 char *out, bool fatal, bool stream 19 19 ); 20 20 21 - size_t utf8_char_len_at(const char *str, size_t byte_len, size_t pos); 22 21 size_t utf8_strlen(const char *str, size_t byte_len); 23 22 size_t utf16_strlen(const char *str, size_t byte_len); 23 + size_t utf8_char_len_at(const char *str, size_t byte_len, size_t pos); 24 + char *utf8_json_quote(const char *str, size_t byte_len, size_t *out_len); 24 25 25 26 int utf16_index_to_byte_offset( 26 27 const char *str,
+127 -70
src/modules/json.c
··· 5 5 #include <yyjson.h> 6 6 #include <uthash.h> 7 7 8 + #include "utf8.h" 8 9 #include "errors.h" 9 10 #include "runtime.h" 10 11 #include "internal.h" 11 - #include "silver/engine.h" 12 12 13 + #include "silver/engine.h" 13 14 #include "modules/json.h" 14 15 #include "modules/symbol.h" 15 16 ··· 24 25 if (!val) return js_mkundef(); 25 26 26 27 switch (yyjson_get_type(val)) { 27 - case YYJSON_TYPE_NULL: return js_mknull(); 28 - case YYJSON_TYPE_BOOL: return js_bool(yyjson_get_bool(val)); 29 - case YYJSON_TYPE_STR: return js_mkstr(js, yyjson_get_str(val), yyjson_get_len(val)); 28 + case YYJSON_TYPE_NULL: return js_mknull(); 29 + case YYJSON_TYPE_BOOL: return js_bool(yyjson_get_bool(val)); 30 + case YYJSON_TYPE_STR: return js_mkstr(js, yyjson_get_str(val), yyjson_get_len(val)); 31 + 32 + case YYJSON_TYPE_NUM: { 33 + if (yyjson_is_sint(val)) return js_mknum((double)yyjson_get_sint(val)); 34 + if (yyjson_is_uint(val)) return js_mknum((double)yyjson_get_uint(val)); 35 + return js_mknum(yyjson_get_real(val)); 36 + } 37 + 38 + case YYJSON_TYPE_ARR: { 39 + ant_value_t arr = js_mkarr(js); 40 + size_t idx, max; 41 + yyjson_val *item; 30 42 31 - case YYJSON_TYPE_NUM: { 32 - if (yyjson_is_sint(val)) return js_mknum((double)yyjson_get_sint(val)); 33 - if (yyjson_is_uint(val)) return js_mknum((double)yyjson_get_uint(val)); 34 - return js_mknum(yyjson_get_real(val)); 35 - } 43 + yyjson_arr_foreach(val, idx, max, item) 44 + js_arr_push(js, arr, yyjson_to_jsval(js, item)); 45 + 46 + return arr; 47 + } 48 + 49 + case YYJSON_TYPE_OBJ: { 50 + ant_value_t obj = js_newobj(js); 36 51 37 - case YYJSON_TYPE_ARR: { 38 - ant_value_t arr = js_mkarr(js); 39 - size_t idx, max; 40 - yyjson_val *item; 52 + size_t idx, max; yyjson_val *key, *item; 53 + json_key_entry_t *hash = NULL, *entry, *tmp; 54 + 55 + yyjson_obj_foreach(val, idx, max, key, item) { 56 + const char *k = yyjson_get_str(key); 57 + size_t klen = yyjson_get_len(key); 58 + ant_value_t v = yyjson_to_jsval(js, item); 41 59 42 - yyjson_arr_foreach(val, idx, max, item) 43 - js_arr_push(js, arr, yyjson_to_jsval(js, item)); 44 - 45 - return arr; 60 + HASH_FIND(hh, hash, k, klen, entry); 61 + if (entry) js_saveval(js, entry->prop_off, v); else { 62 + ant_offset_t off = js_mkprop_fast_off(js, obj, k, klen, v); 63 + entry = malloc(sizeof(json_key_entry_t)); 64 + entry->key = k; entry->key_len = klen; entry->prop_off = off; 65 + HASH_ADD_KEYPTR(hh, hash, entry->key, entry->key_len, entry); 66 + } 46 67 } 47 68 48 - case YYJSON_TYPE_OBJ: { 49 - ant_value_t obj = js_newobj(js); 50 - 51 - size_t idx, max; yyjson_val *key, *item; 52 - json_key_entry_t *hash = NULL, *entry, *tmp; 53 - 54 - yyjson_obj_foreach(val, idx, max, key, item) { 55 - const char *k = yyjson_get_str(key); 56 - size_t klen = yyjson_get_len(key); 57 - ant_value_t v = yyjson_to_jsval(js, item); 58 - 59 - HASH_FIND(hh, hash, k, klen, entry); 60 - if (entry) js_saveval(js, entry->prop_off, v); else { 61 - ant_offset_t off = js_mkprop_fast_off(js, obj, k, klen, v); 62 - entry = malloc(sizeof(json_key_entry_t)); 63 - entry->key = k; entry->key_len = klen; entry->prop_off = off; 64 - HASH_ADD_KEYPTR(hh, hash, entry->key, entry->key_len, entry); 65 - } 66 - } 67 - 68 - HASH_ITER(hh, hash, entry, tmp) { 69 - HASH_DEL(hash, entry); free(entry); 70 - } 71 - 72 - return obj; 69 + HASH_ITER(hh, hash, entry, tmp) { 70 + HASH_DEL(hash, entry); free(entry); 73 71 } 74 72 75 - default: return js_mkundef(); 73 + return obj; 76 74 } 75 + 76 + default: return js_mkundef(); } 77 77 } 78 78 79 79 typedef struct { 80 - ant_value_t *stack; 81 - int stack_size; 82 - int stack_cap; 83 - int has_cycle; 84 80 ant_t *js; 81 + ant_value_t *stack; 85 82 ant_value_t replacer_func; 86 83 ant_value_t replacer_arr; 87 - int replacer_arr_len; 84 + ant_value_t error; 88 85 ant_value_t holder; 86 + int stack_size; 87 + int stack_cap; 88 + int replacer_arr_len; 89 + int has_cycle; 89 90 } json_cycle_ctx; 90 91 92 + static inline bool json_has_abort(json_cycle_ctx *ctx) { 93 + return ctx->has_cycle || vtype(ctx->error) != T_UNDEF; 94 + } 95 + 96 + static void json_capture_error(json_cycle_ctx *ctx, ant_value_t value) { 97 + if (vtype(ctx->error) != T_UNDEF) return; 98 + if (ctx->js->thrown_exists) { 99 + ctx->error = ctx->js->thrown_value; 100 + ctx->js->thrown_exists = false; 101 + ctx->js->thrown_value = js_mkundef(); 102 + return; 103 + } 104 + ctx->error = value; 105 + } 106 + 107 + static yyjson_mut_val *json_string_to_yyjson(ant_t *js, yyjson_mut_doc *doc, ant_value_t value) { 108 + size_t byte_len = 0; 109 + char *str = js_getstr(js, value, &byte_len); 110 + size_t raw_len = 0; 111 + char *raw = utf8_json_quote(str, byte_len, &raw_len); 112 + if (!raw) goto oom; 113 + yyjson_mut_val *out = yyjson_mut_rawncpy(doc, raw, raw_len); 114 + free(raw); 115 + return out; 116 + 117 + oom: 118 + free(raw); 119 + return NULL; 120 + } 121 + 91 122 static int json_cycle_check(json_cycle_ctx *ctx, ant_value_t val) { 92 123 for (int i = 0; i < ctx->stack_size; i++) 93 124 if (ctx->stack[i] == val) { ctx->has_cycle = 1; return 1; } ··· 165 196 uint_to_str(idxstr, sizeof(idxstr), (uint64_t)i); 166 197 ant_value_t elem = js_arr_get(js, val, i); 167 198 yyjson_mut_val *item = ant_value_to_yyjson_with_key(js, doc, idxstr, elem, ctx, 1); 168 - if (ctx->has_cycle) { 199 + if (json_has_abort(ctx)) { 169 200 ctx->holder = saved_holder; 170 201 return NULL; 171 202 } ··· 184 215 ant_value_t saved_holder = ctx->holder; 185 216 186 217 if (is_err(keys)) { 187 - ctx->has_cycle = 1; 218 + json_capture_error(ctx, keys); 188 219 return NULL; 189 220 } 190 221 ··· 200 231 if (!is_key_in_replacer_arr(js, ctx, key, key_len)) continue; 201 232 202 233 ant_value_t prop = js_get(js, val, key); 234 + if (is_err(prop)) { 235 + json_capture_error(ctx, prop); 236 + ctx->holder = saved_holder; 237 + return NULL; 238 + } 203 239 int ptype = vtype(prop); 204 240 if (ptype == T_UNDEF || ptype == T_FUNC) continue; 205 241 206 242 yyjson_mut_val *jval = ant_value_to_yyjson_with_key(js, doc, key, prop, ctx, 0); 207 - if (ctx->has_cycle) { 243 + if (json_has_abort(ctx)) { 208 244 ctx->holder = saved_holder; 209 245 return NULL; 210 246 } ··· 223 259 224 260 if (is_special_object(val)) { 225 261 ant_value_t toJSON = js_get(js, val, "toJSON"); 262 + if (is_err(toJSON)) { 263 + json_capture_error(ctx, toJSON); 264 + return NULL; 265 + } 266 + 226 267 if (vtype(toJSON) == T_FUNC) { 227 268 ant_value_t r = sv_vm_call(js->vm, js, toJSON, js_mkundef(), &val, 1, NULL, false); 228 - if (vtype(r) == T_ERR) { ctx->has_cycle = 1; return NULL; } 269 + if (vtype(r) == T_ERR) { json_capture_error(ctx, r); return NULL; } 229 270 return ant_value_to_yyjson_impl(js, doc, r, ctx, in_array); 230 271 }} 231 272 ··· 248 289 } 249 290 250 291 case T_STR: { 251 - size_t len; 252 - char *str = js_getstr(js, val, &len); 253 - return yyjson_mut_strncpy(doc, str, len); 292 + return json_string_to_yyjson(js, doc, val); 254 293 } 255 294 256 295 case T_OBJ: ··· 281 320 ant_value_t transformed = sv_vm_call(js->vm, js, ctx->replacer_func, js_mkundef(), call_args, 2, NULL, false); 282 321 283 322 if (vtype(transformed) == T_ERR) { 284 - ctx->has_cycle = 1; 323 + json_capture_error(ctx, transformed); 285 324 return NULL; 286 325 } 287 326 ··· 381 420 ant_value_t result; 382 421 yyjson_mut_doc *doc = NULL; 383 422 json_cycle_ctx ctx = {0}; 423 + 384 424 char *json_str = NULL; 385 425 size_t len; 386 426 387 427 if (nargs < 1) return js_mkerr(js, "JSON.stringify() requires at least 1 argument"); 428 + int top_type = vtype(args[0]); 429 + if (top_type == T_UNDEF || top_type == T_FUNC || top_type == T_SYMBOL) return js_mkundef(); 388 430 389 - int top_type = vtype(args[0]); 390 - if (top_type == T_UNDEF || top_type == T_FUNC || top_type == T_SYMBOL) 391 - return js_mkundef(); 431 + if (nargs < 2 && top_type == T_STR) { 432 + size_t byte_len = 0; 433 + size_t raw_len = 0; 434 + 435 + char *str = js_getstr(js, args[0], &byte_len); 436 + char *raw = utf8_json_quote(str, byte_len, &raw_len); 437 + 438 + if (!raw) return js_mkerr(js, "JSON.stringify() failed: out of memory"); 439 + result = js_mkstr(js, raw, raw_len); 440 + free(raw); 441 + 442 + return result; 443 + } 392 444 393 445 ctx.js = js; 394 446 ctx.replacer_func = js_mkundef(); 395 447 ctx.replacer_arr = js_mkundef(); 396 448 ctx.replacer_arr_len = 0; 449 + ctx.error = js_mkundef(); 397 450 ctx.holder = js_mkundef(); 398 451 399 452 if (nargs >= 2) { 400 - ant_value_t replacer = args[1]; 401 - if (vtype(replacer) == T_FUNC) { 402 - ctx.replacer_func = replacer; 403 - } else if (is_special_object(replacer)) { 404 - ant_value_t len_val = js_get(js, replacer, "length"); 405 - if (vtype(len_val) == T_NUM) { 406 - ctx.replacer_arr = replacer; 407 - ctx.replacer_arr_len = (int)js_getnum(len_val); 408 - } 409 - } 410 - } 453 + ant_value_t replacer = args[1]; 454 + if (vtype(replacer) == T_FUNC) ctx.replacer_func = replacer; 455 + 456 + else if (is_special_object(replacer)) { 457 + ant_value_t len_val = js_get(js, replacer, "length"); 458 + 459 + if (vtype(len_val) == T_NUM) { 460 + ctx.replacer_arr = replacer; 461 + ctx.replacer_arr_len = (int)js_getnum(len_val); 462 + }}} 411 463 412 464 doc = yyjson_mut_doc_new(NULL); 413 465 if (!doc) return js_mkerr(js, "JSON.stringify() failed: out of memory"); 414 466 415 467 yyjson_mut_val *root = ant_value_to_yyjson(js, doc, args[0], &ctx); 416 468 469 + if (vtype(ctx.error) != T_UNDEF) { 470 + result = is_err(ctx.error) ? ctx.error : js_throw(js, ctx.error); 471 + goto cleanup; 472 + } 473 + 417 474 if (ctx.has_cycle) { 418 475 result = js_mkerr_typed(js, JS_ERR_TYPE, "Converting circular structure to JSON"); 419 476 goto cleanup;
+20 -14
src/modules/structured-clone.c
··· 137 137 ant_value_t dv_data_val = js_get_slot(val, SLOT_DATA); 138 138 if (vtype(dv_data_val) != T_NUM) 139 139 return js_throw(js, make_dom_exception(js, "DataView could not be cloned", "DataCloneError")); 140 - 140 + 141 141 DataViewData *dv = (DataViewData *)(uintptr_t)js_getnum(dv_data_val); 142 142 if (!dv || !dv->buffer) 143 143 return js_throw(js, make_dom_exception(js, "DataView could not be cloned", "DataCloneError")); 144 - 145 - ArrayBufferData *new_buf = create_array_buffer_data(dv->byte_length); 144 + 145 + ArrayBufferData *new_buf = create_array_buffer_data(dv->buffer->length); 146 146 if (!new_buf) return js_mkerr(js, "out of memory"); 147 - if (dv->byte_length > 0) memcpy(new_buf->data, dv->buffer->data + dv->byte_offset, dv->byte_length); 147 + if (dv->buffer->length > 0) memcpy(new_buf->data, dv->buffer->data, dv->buffer->length); 148 148 149 149 ant_value_t ab_obj = create_arraybuffer_obj(js, new_buf); 150 - ant_value_t clone = create_dataview_with_buffer(js, new_buf, 0, dv->byte_length, ab_obj); 150 + ant_value_t clone = create_dataview_with_buffer( 151 + js, new_buf, dv->byte_offset, dv->byte_length, ab_obj 152 + ); 153 + 151 154 if (is_err(clone)) { 152 155 free_array_buffer_data(new_buf); 153 156 return clone; 154 157 } 155 158 159 + js_set(js, clone, "byteOffset", js_mknum((double)dv->byte_offset)); 160 + js_set(js, clone, "byteLength", js_mknum((double)dv->byte_length)); 161 + 156 162 sc_add(seen, val, clone); 157 163 free_array_buffer_data(new_buf); 158 164 return clone; ··· 161 167 if (t == T_ARR) { 162 168 ant_value_t clone = js_mkarr(js); 163 169 sc_add(seen, val, clone); 164 - 170 + 165 171 ant_offset_t len = js_arr_len(js, val); 166 172 for (ant_offset_t i = 0; i < len; i++) { 167 173 ant_value_t ic = sc_clone_rec(js, js_arr_get(js, val, i), seen, transfer); 168 174 if (is_err(ic)) return ic; 169 175 js_arr_push(js, clone, ic); 170 176 } 171 - 177 + 172 178 return clone; 173 179 } 174 180 ··· 185 191 186 192 ant_value_t map_proto = js_get_ctor_proto(js, "Map", 3); 187 193 if (is_special_object(map_proto)) js_set_proto_init(clone, map_proto); 188 - 194 + 189 195 map_entry_t **new_head = ant_calloc(sizeof(map_entry_t *)); 190 196 if (!new_head) return js_mkerr(js, "out of memory"); 191 - 197 + 192 198 *new_head = NULL; 193 199 js_set_slot(clone, SLOT_DATA, ANT_PTR(new_head)); 194 200 sc_add(seen, val, clone); 195 - 201 + 196 202 map_entry_t **src_head = get_map_from_obj(js, val); 197 203 if (src_head && *src_head) { 198 204 map_entry_t *e, *tmp; 199 205 HASH_ITER(hh, *src_head, e, tmp) { 200 206 ant_value_t vc = sc_clone_rec(js, e->value, seen, transfer); 201 207 if (is_err(vc)) return vc; 202 - 208 + 203 209 map_entry_t *ne = ant_calloc(sizeof(map_entry_t)); 204 210 if (!ne) return js_mkerr(js, "out of memory"); 205 211 ne->key = strdup(e->key); ··· 207 213 ne->value = vc; 208 214 HASH_ADD_STR(*new_head, key, ne); 209 215 }} 210 - 216 + 211 217 return clone; 212 218 } 213 219 214 220 if (obj_ptr->type_tag == T_SET) { 215 221 ant_value_t clone = js_mkobj(js); 216 222 js_obj_ptr(clone)->type_tag = T_SET; 217 - 223 + 218 224 ant_value_t set_proto = js_get_ctor_proto(js, "Set", 3); 219 225 if (is_special_object(set_proto)) js_set_proto_init(clone, set_proto); 220 - 226 + 221 227 set_entry_t **new_head = ant_calloc(sizeof(set_entry_t *)); 222 228 if (!new_head) return js_mkerr(js, "out of memory"); 223 229 *new_head = NULL;
+100
src/utf8.c
··· 1 1 #include "utf8.h" 2 + #include "utils.h" 3 + #include <stdlib.h> 2 4 #include <string.h> 3 5 #include <stdbool.h> 4 6 ··· 7 9 utf8proc_int32_t cp; 8 10 *seq_len = (int)utf8_next(buf, (utf8proc_ssize_t)len, &cp); 9 11 return cp < 0 ? 0xFFFD : (uint32_t)cp; 12 + } 13 + 14 + static bool utf8_json_quote_reserve(char **buf, size_t *cap, size_t need) { 15 + if (need <= *cap) return true; 16 + 17 + size_t next = *cap ? *cap * 2 : 64; 18 + while (next < need) next *= 2; 19 + 20 + char *tmp = realloc(*buf, next); 21 + if (!tmp) return false; 22 + *buf = tmp; 23 + *cap = next; 24 + return true; 25 + } 26 + 27 + static bool utf8_json_quote_append( 28 + char **buf, size_t *len, size_t *cap, const void *src, size_t src_len 29 + ) { 30 + if (!utf8_json_quote_reserve(buf, cap, *len + src_len + 1)) return false; 31 + memcpy(*buf + *len, src, src_len); 32 + *len += src_len; 33 + (*buf)[*len] = '\0'; 34 + return true; 35 + } 36 + 37 + static bool utf8_json_quote_append_char(char **buf, size_t *len, size_t *cap, char ch) { 38 + return utf8_json_quote_append(buf, len, cap, &ch, 1); 39 + } 40 + 41 + static bool utf8_json_quote_append_u_escape( 42 + char **buf, size_t *len, size_t *cap, uint32_t code_unit 43 + ) { 44 + char escape[6] = { 45 + '\\', 'u', 46 + hex_char((int)(code_unit >> 12)), 47 + hex_char((int)(code_unit >> 8)), 48 + hex_char((int)(code_unit >> 4)), 49 + hex_char((int)code_unit), 50 + }; 51 + return utf8_json_quote_append(buf, len, cap, escape, sizeof(escape)); 52 + } 53 + 54 + char *utf8_json_quote(const char *str, size_t byte_len, size_t *out_len) { 55 + size_t utf16_len = utf16_strlen(str, byte_len); 56 + char *raw = NULL; 57 + size_t raw_len = 0; 58 + size_t raw_cap = 0; 59 + 60 + if (!utf8_json_quote_append_char(&raw, &raw_len, &raw_cap, '"')) goto oom; 61 + 62 + for (size_t i = 0; i < utf16_len; i++) { 63 + uint32_t cu = utf16_code_unit_at(str, byte_len, i); 64 + 65 + if (cu >= 0xD800 && cu <= 0xDBFF && i + 1 < utf16_len) { 66 + uint32_t cu2 = utf16_code_unit_at(str, byte_len, i + 1); 67 + if (cu2 >= 0xDC00 && cu2 <= 0xDFFF) { 68 + uint32_t cp = utf16_codepoint_at(str, byte_len, i); 69 + char utf8[4]; 70 + int n = utf8_encode(cp, utf8); 71 + if (n <= 0 || !utf8_json_quote_append(&raw, &raw_len, &raw_cap, utf8, (size_t)n)) goto oom; 72 + i++; 73 + continue; 74 + }} 75 + 76 + if (cu >= 0xD800 && cu <= 0xDFFF) { 77 + if (!utf8_json_quote_append_u_escape(&raw, &raw_len, &raw_cap, cu)) goto oom; 78 + continue; 79 + } 80 + 81 + switch (cu) { 82 + case '"': if (!utf8_json_quote_append(&raw, &raw_len, &raw_cap, "\\\"", 2)) goto oom; continue; 83 + case '\\': if (!utf8_json_quote_append(&raw, &raw_len, &raw_cap, "\\\\", 2)) goto oom; continue; 84 + case '\b': if (!utf8_json_quote_append(&raw, &raw_len, &raw_cap, "\\b", 2)) goto oom; continue; 85 + case '\f': if (!utf8_json_quote_append(&raw, &raw_len, &raw_cap, "\\f", 2)) goto oom; continue; 86 + case '\n': if (!utf8_json_quote_append(&raw, &raw_len, &raw_cap, "\\n", 2)) goto oom; continue; 87 + case '\r': if (!utf8_json_quote_append(&raw, &raw_len, &raw_cap, "\\r", 2)) goto oom; continue; 88 + case '\t': if (!utf8_json_quote_append(&raw, &raw_len, &raw_cap, "\\t", 2)) goto oom; continue; 89 + default: break; 90 + } 91 + 92 + if (cu < 0x20) { 93 + if (!utf8_json_quote_append_u_escape(&raw, &raw_len, &raw_cap, cu)) goto oom; 94 + continue; 95 + } 96 + 97 + char utf8[4]; 98 + int n = utf8_encode(cu, utf8); 99 + if (n <= 0 || !utf8_json_quote_append(&raw, &raw_len, &raw_cap, utf8, (size_t)n)) goto oom; 100 + } 101 + 102 + if (!utf8_json_quote_append_char(&raw, &raw_len, &raw_cap, '"')) goto oom; 103 + if (out_len) *out_len = raw_len; 104 + return raw; 105 + 106 + oom: 107 + free(raw); 108 + if (out_len) *out_len = 0; 109 + return NULL; 10 110 } 11 111 12 112 size_t utf8_char_len_at(const char *str, size_t byte_len, size_t pos) {