MIRROR: javascript for ๐Ÿœ's, a tiny runtime with big ambitions
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

add fast path to url parsing

+107 -23
+107 -23
src/modules/url.c
··· 79 79 return is_special_scheme(proto) || strcmp(proto, "file:") == 0; 80 80 } 81 81 82 + static bool url_base_is_opaque(const char *base_str, const char *proto) { 83 + const char *after_colon = NULL; 84 + 85 + if (!base_str || is_special_scheme(proto)) return false; 86 + after_colon = strchr(base_str, ':'); 87 + if (!after_colon) return false; 88 + after_colon++; 89 + 90 + return *after_colon != '/' && *after_colon != '\0'; 91 + } 92 + 82 93 char *form_urlencode_n(const char *str, size_t len) { 83 94 if (!str) return strdup(""); 84 95 char *out = malloc(len * 3 + 1); ··· 182 193 return strndup(r->first, (size_t)(r->afterLast - r->first)); 183 194 } 184 195 185 - static char *url_escape_brackets_in_query_or_fragment(const char *url_str, bool *changed_out) { 196 + static bool url_has_brackets_in_query_or_fragment(const char *url_str) { 197 + size_t len = strlen(url_str); 198 + bool in_query = false; 199 + bool in_fragment = false; 200 + 201 + for (size_t i = 0; i < len; i++) { 202 + char c = url_str[i]; 203 + if (c == '#' && !in_fragment) { 204 + in_query = false; 205 + in_fragment = true; 206 + continue; 207 + } 208 + if (c == '?' && !in_query && !in_fragment) { 209 + in_query = true; 210 + continue; 211 + } 212 + if ((in_query || in_fragment) && (c == '[' || c == ']')) return true; 213 + } 214 + 215 + return false; 216 + } 217 + 218 + static char *url_escape_brackets_in_query_or_fragment(const char *url_str) { 186 219 size_t len = strlen(url_str); 187 220 size_t extra = 0; 188 221 bool in_query = false; ··· 201 234 } 202 235 if ((in_query || in_fragment) && (c == '[' || c == ']')) extra += 2; 203 236 } 204 - 205 - if (changed_out) *changed_out = (extra != 0); 206 - if (extra == 0) return NULL; 207 237 208 238 char *escaped = malloc(len + extra + 1); 209 239 size_t pos = 0; ··· 242 272 return escaped; 243 273 } 244 274 275 + static int url_parse_single_uri_relaxed( 276 + UriUriA *uri, 277 + const char *url_str, 278 + const char **errpos, 279 + char **owned_input_out, 280 + bool *used_relaxed_out 281 + ) { 282 + char *escaped = NULL; 283 + 284 + if (owned_input_out) *owned_input_out = NULL; 285 + if (used_relaxed_out) *used_relaxed_out = false; 286 + if (uriParseSingleUriA(uri, url_str, errpos) == URI_SUCCESS) return 0; 287 + 288 + if (!url_has_brackets_in_query_or_fragment(url_str)) return -1; 289 + escaped = url_escape_brackets_in_query_or_fragment(url_str); 290 + 291 + if (!escaped) return -1; 292 + if (owned_input_out) *owned_input_out = escaped; 293 + if (used_relaxed_out) *used_relaxed_out = true; 294 + 295 + return uriParseSingleUriA(uri, escaped, errpos) == URI_SUCCESS ? 0 : -1; 296 + } 297 + 245 298 static void url_override_search_hash_from_input(url_state_t *s, const char *url_str) { 246 299 const char *hash = strchr(url_str, '#'); 247 300 const char *query = strchr(url_str, '?'); ··· 263 316 s->hash = hash_len > 0 ? strndup(hash, hash_len) : strdup(""); 264 317 } 265 318 319 + static void url_override_search_hash_from_reference(url_state_t *s, const char *url_str) { 320 + const char *hash = strchr(url_str, '#'); 321 + const char *query = strchr(url_str, '?'); 322 + size_t search_len = 0; 323 + size_t hash_len = 0; 324 + 325 + if (query && hash && hash < query) query = NULL; 326 + 327 + if (query) { 328 + const char *search_end = hash && hash > query ? hash : url_str + strlen(url_str); 329 + search_len = (size_t)(search_end - query); 330 + free(s->search); 331 + s->search = strndup(query, search_len); 332 + } 333 + 334 + if (hash) { 335 + hash_len = strlen(hash); 336 + free(s->hash); 337 + s->hash = strndup(hash, hash_len); 338 + } 339 + } 340 + 266 341 static void uri_to_state(const UriUriA *uri, url_state_t *s) { 267 342 char *scheme = uri_range_dup(&uri->scheme); 268 343 size_t slen = strlen(scheme); ··· 339 414 340 415 if (base_str) { 341 416 UriUriA base_uri, ref_uri, resolved; 342 - if (uriParseSingleUriA(&base_uri, base_str, &errpos) != URI_SUCCESS) return -1; 343 - 417 + char *escaped_base = NULL; 418 + char *escaped_ref = NULL; 419 + bool used_relaxed_ref_parse = false; 420 + 421 + if (url_parse_single_uri_relaxed(&base_uri, base_str, &errpos, &escaped_base, NULL) != 0) { 422 + free(escaped_base); 423 + return -1; 424 + } 425 + 344 426 char *base_scheme = uri_range_dup(&base_uri.scheme); 345 427 size_t bslen = strlen(base_scheme); 346 428 for (size_t i = 0; i < bslen; i++) base_scheme[i] = (char)tolower((unsigned char)base_scheme[i]); ··· 351 433 proto_buf[bslen + 1] = '\0'; 352 434 free(base_scheme); 353 435 354 - bool base_special = is_special_scheme(proto_buf); 355 - if (!base_special) { 356 - const char *after_colon = strchr(base_str, ':'); 357 - if (after_colon) { 358 - after_colon++; 359 - bool is_opaque = (*after_colon != '/' && *after_colon != '\0'); 360 - if (is_opaque) { uriFreeUriMembersA(&base_uri); return -1; } 361 - }} 362 - 363 - if (uriParseSingleUriA(&ref_uri, url_str, &errpos) != URI_SUCCESS) { 436 + if (url_base_is_opaque(base_str, proto_buf)) { 437 + uriFreeUriMembersA(&base_uri); 438 + free(escaped_base); 439 + return -1; 440 + } 441 + 442 + if (url_parse_single_uri_relaxed(&ref_uri, url_str, &errpos, &escaped_ref, &used_relaxed_ref_parse) != 0) { 364 443 uriFreeUriMembersA(&base_uri); 444 + free(escaped_base); 445 + free(escaped_ref); 365 446 return -1; 366 447 } 367 448 368 449 if (uriAddBaseUriA(&resolved, &ref_uri, &base_uri) != URI_SUCCESS) { 369 450 uriFreeUriMembersA(&base_uri); 370 451 uriFreeUriMembersA(&ref_uri); 452 + free(escaped_base); 453 + free(escaped_ref); 371 454 return -1; 372 455 } 373 456 ··· 376 459 uriFreeUriMembersA(&resolved); 377 460 uriFreeUriMembersA(&ref_uri); 378 461 uriFreeUriMembersA(&base_uri); 462 + free(escaped_base); 463 + free(escaped_ref); 379 464 return -1; 380 465 } 381 466 382 467 uri_to_state(&resolved, s); 468 + if (used_relaxed_ref_parse) url_override_search_hash_from_reference(s, url_str); 383 469 uriFreeUriMembersA(&resolved); 384 470 uriFreeUriMembersA(&ref_uri); 385 471 uriFreeUriMembersA(&base_uri); 472 + free(escaped_ref); 473 + free(escaped_base); 386 474 387 475 return 0; 388 476 } ··· 391 479 char *escaped_url = NULL; 392 480 bool used_relaxed_query_parse = false; 393 481 394 - if (uriParseSingleUriA(&uri, url_str, &errpos) != URI_SUCCESS) { 395 - escaped_url = url_escape_brackets_in_query_or_fragment(url_str, &used_relaxed_query_parse); 396 - if (!escaped_url) return -1; 397 - if (uriParseSingleUriA(&uri, escaped_url, &errpos) != URI_SUCCESS) { 398 - free(escaped_url); 399 - return -1; 400 - } 482 + if (url_parse_single_uri_relaxed(&uri, url_str, &errpos, &escaped_url, &used_relaxed_query_parse) != 0) { 483 + free(escaped_url); 484 + return -1; 401 485 } 402 486 403 487 if (!uri.scheme.first || uri.scheme.first == uri.scheme.afterLast) {