we (web engine): Experimental web browser project to understand the limits of Claude
2
fork

Configure Feed

Select the types of activity you want to include in your feed.

Implement content-type routing for navigation (Phase 17)

Route fetched resources to appropriate renderers based on Content-Type:
- HTML renders normally (no regression)
- Plain text, CSS, JS, JSON, XML display in monospace <pre> blocks
- Images display centered in viewport via data URL embedding
- Unsupported binary types show friendly "Cannot Display" message
- Missing Content-Type triggers content sniffing (magic bytes, HTML
detection, text heuristic)

Adds base64_encode to we-url for image data URL generation.
Refactors loader to use shared classify_and_decode helper, eliminating
duplicated MIME routing logic across fetch/post_form/decode_response.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

+642 -142
+265 -102
crates/browser/src/loader.rs
··· 280 280 .unwrap_or_else(|| url.clone()); 281 281 282 282 let content_type = response.content_type(); 283 - let mime = content_type 284 - .as_ref() 285 - .map(|ct| ct.mime_type.as_str()) 286 - .unwrap_or("application/octet-stream"); 283 + let mime: String = match &content_type { 284 + Some(ct) => ct.mime_type.clone(), 285 + None => sniff_mime_type(&response.body).to_string(), 286 + }; 287 287 288 - match classify_mime(mime) { 289 - MimeClass::Html => { 290 - let (text, encoding) = 291 - decode_text_resource(&response.body, content_type.as_ref(), true); 292 - Ok(Resource::Html { 293 - text, 294 - base_url: effective_url, 295 - encoding, 296 - }) 297 - } 298 - MimeClass::Css => { 299 - let (text, _encoding) = 300 - decode_text_resource(&response.body, content_type.as_ref(), false); 301 - Ok(Resource::Css { 302 - text, 303 - url: effective_url, 304 - }) 305 - } 306 - MimeClass::Script => { 307 - let (text, _encoding) = 308 - decode_text_resource(&response.body, content_type.as_ref(), false); 309 - Ok(Resource::Script { 310 - text, 311 - url: effective_url, 312 - }) 313 - } 314 - MimeClass::Image => Ok(Resource::Image { 315 - data: response.body, 316 - mime_type: mime.to_string(), 317 - url: effective_url, 318 - }), 319 - MimeClass::Other => { 320 - // Check if it's a text type we should decode 321 - if mime.starts_with("text/") { 322 - let (text, _encoding) = 323 - decode_text_resource(&response.body, content_type.as_ref(), false); 324 - Ok(Resource::Other { 325 - data: text.into_bytes(), 326 - mime_type: mime.to_string(), 327 - url: effective_url, 328 - }) 329 - } else { 330 - Ok(Resource::Other { 331 - data: response.body, 332 - mime_type: mime.to_string(), 333 - url: effective_url, 334 - }) 335 - } 336 - } 337 - } 288 + classify_and_decode(response.body, &mime, content_type.as_ref(), effective_url) 338 289 } 339 290 340 291 /// Submit an HTTP POST request (for form submission) and decode the response. ··· 368 319 .unwrap_or_else(|| url.clone()); 369 320 370 321 let ct = response.content_type(); 371 - let mime = ct 372 - .as_ref() 373 - .map(|c| c.mime_type.as_str()) 374 - .unwrap_or("application/octet-stream"); 322 + let mime: String = match &ct { 323 + Some(c) => c.mime_type.clone(), 324 + None => sniff_mime_type(&response.body).to_string(), 325 + }; 375 326 376 - match classify_mime(mime) { 377 - MimeClass::Html => { 378 - let (text, encoding) = decode_text_resource(&response.body, ct.as_ref(), true); 379 - Ok(Resource::Html { 380 - text, 381 - base_url: effective_url, 382 - encoding, 383 - }) 384 - } 385 - _ => { 386 - let (text, encoding) = decode_text_resource(&response.body, ct.as_ref(), true); 387 - Ok(Resource::Html { 388 - text, 389 - base_url: effective_url, 390 - encoding, 391 - }) 392 - } 393 - } 327 + classify_and_decode(response.body, &mime, ct.as_ref(), effective_url) 394 328 } 395 329 396 330 /// Fetch a subresource with Same-Origin Policy and CORS enforcement. ··· 622 556 /// Decode an HTTP response into a Resource based on its Content-Type. 623 557 fn decode_response(response: we_net::http::HttpResponse, url: &Url) -> Result<Resource, LoadError> { 624 558 let content_type = response.content_type(); 625 - let mime = content_type 626 - .as_ref() 627 - .map(|ct| ct.mime_type.as_str()) 628 - .unwrap_or("application/octet-stream"); 559 + let mime: String = match &content_type { 560 + Some(ct) => ct.mime_type.clone(), 561 + None => sniff_mime_type(&response.body).to_string(), 562 + }; 563 + 564 + classify_and_decode(response.body, &mime, content_type.as_ref(), url.clone()) 565 + } 629 566 567 + /// Classify a MIME type and decode response bytes into the appropriate Resource variant. 568 + fn classify_and_decode( 569 + body: Vec<u8>, 570 + mime: &str, 571 + content_type: Option<&ContentType>, 572 + url: Url, 573 + ) -> Result<Resource, LoadError> { 630 574 match classify_mime(mime) { 631 575 MimeClass::Html => { 632 - let (text, encoding) = 633 - decode_text_resource(&response.body, content_type.as_ref(), true); 576 + let (text, encoding) = decode_text_resource(&body, content_type, true); 634 577 Ok(Resource::Html { 635 578 text, 636 - base_url: url.clone(), 579 + base_url: url, 637 580 encoding, 638 581 }) 639 582 } 640 583 MimeClass::Css => { 641 - let (text, _encoding) = 642 - decode_text_resource(&response.body, content_type.as_ref(), false); 643 - Ok(Resource::Css { 644 - text, 645 - url: url.clone(), 646 - }) 584 + let (text, _encoding) = decode_text_resource(&body, content_type, false); 585 + Ok(Resource::Css { text, url }) 647 586 } 648 587 MimeClass::Script => { 649 - let (text, _encoding) = 650 - decode_text_resource(&response.body, content_type.as_ref(), false); 651 - Ok(Resource::Script { 652 - text, 653 - url: url.clone(), 654 - }) 588 + let (text, _encoding) = decode_text_resource(&body, content_type, false); 589 + Ok(Resource::Script { text, url }) 655 590 } 656 591 MimeClass::Image => Ok(Resource::Image { 657 - data: response.body, 592 + data: body, 658 593 mime_type: mime.to_string(), 659 - url: url.clone(), 594 + url, 660 595 }), 661 596 MimeClass::Other => { 597 + // Decode text/* types, keep everything else as raw bytes. 662 598 if mime.starts_with("text/") { 663 - let (text, _encoding) = 664 - decode_text_resource(&response.body, content_type.as_ref(), false); 599 + let (text, _encoding) = decode_text_resource(&body, content_type, false); 665 600 Ok(Resource::Other { 666 601 data: text.into_bytes(), 667 602 mime_type: mime.to_string(), 668 - url: url.clone(), 603 + url, 669 604 }) 670 605 } else { 671 606 Ok(Resource::Other { 672 - data: response.body, 607 + data: body, 673 608 mime_type: mime.to_string(), 674 - url: url.clone(), 609 + url, 675 610 }) 676 611 } 677 612 } ··· 702 637 } 703 638 _ => MimeClass::Other, 704 639 } 640 + } 641 + 642 + // --------------------------------------------------------------------------- 643 + // Content sniffing (simplified WHATWG MIME Sniffing) 644 + // --------------------------------------------------------------------------- 645 + 646 + /// Sniff the MIME type from the body bytes when no Content-Type header is present. 647 + /// 648 + /// Checks magic bytes for known binary formats first, then falls back to 649 + /// text heuristics. Returns a MIME type string. 650 + pub fn sniff_mime_type(bytes: &[u8]) -> &'static str { 651 + // PNG: 8-byte signature 652 + if bytes.starts_with(&[0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A]) { 653 + return "image/png"; 654 + } 655 + 656 + // JPEG: SOI marker 657 + if bytes.starts_with(&[0xFF, 0xD8, 0xFF]) { 658 + return "image/jpeg"; 659 + } 660 + 661 + // GIF: GIF87a or GIF89a 662 + if bytes.starts_with(b"GIF87a") || bytes.starts_with(b"GIF89a") { 663 + return "image/gif"; 664 + } 665 + 666 + // WebP: RIFF....WEBP 667 + if bytes.len() >= 12 && bytes.starts_with(b"RIFF") && &bytes[8..12] == b"WEBP" { 668 + return "image/webp"; 669 + } 670 + 671 + // PDF 672 + if bytes.starts_with(b"%PDF") { 673 + return "application/pdf"; 674 + } 675 + 676 + // Check for HTML-like content (case-insensitive, skip leading whitespace) 677 + let trimmed = skip_whitespace_and_bom(bytes); 678 + if looks_like_html(trimmed) { 679 + return "text/html"; 680 + } 681 + 682 + // Check if the content looks like text (no binary bytes in first 512 bytes) 683 + let check_len = trimmed.len().min(512); 684 + if is_text_content(&trimmed[..check_len]) { 685 + return "text/plain"; 686 + } 687 + 688 + // Default to binary 689 + "application/octet-stream" 690 + } 691 + 692 + /// Skip leading UTF-8 BOM and ASCII whitespace. 693 + fn skip_whitespace_and_bom(bytes: &[u8]) -> &[u8] { 694 + let mut b = bytes; 695 + // Skip UTF-8 BOM 696 + if b.starts_with(&[0xEF, 0xBB, 0xBF]) { 697 + b = &b[3..]; 698 + } 699 + // Skip whitespace 700 + while let Some((&first, rest)) = b.split_first() { 701 + if first == b' ' || first == b'\t' || first == b'\n' || first == b'\r' { 702 + b = rest; 703 + } else { 704 + break; 705 + } 706 + } 707 + b 708 + } 709 + 710 + /// Check if bytes look like the start of an HTML document. 711 + fn looks_like_html(bytes: &[u8]) -> bool { 712 + if bytes.is_empty() { 713 + return false; 714 + } 715 + 716 + // Must start with '<' 717 + if bytes[0] != b'<' { 718 + return false; 719 + } 720 + 721 + // Lowercase the first ~15 bytes for comparison 722 + let check_len = bytes.len().min(15); 723 + let mut lower = [0u8; 15]; 724 + for (i, &b) in bytes[..check_len].iter().enumerate() { 725 + lower[i] = b.to_ascii_lowercase(); 726 + } 727 + let s = &lower[..check_len]; 728 + 729 + s.starts_with(b"<!doctype") 730 + || s.starts_with(b"<html") 731 + || s.starts_with(b"<head") 732 + || s.starts_with(b"<body") 733 + || s.starts_with(b"<script") 734 + || s.starts_with(b"<iframe") 735 + || s.starts_with(b"<h1") 736 + || s.starts_with(b"<div") 737 + || s.starts_with(b"<p>") 738 + || s.starts_with(b"<p ") 739 + || s.starts_with(b"<br") 740 + || s.starts_with(b"<table") 741 + || s.starts_with(b"<a ") 742 + || s.starts_with(b"<title") 743 + || s.starts_with(b"<style") 744 + || s.starts_with(b"<font") 745 + || s.starts_with(b"<b>") 746 + || s.starts_with(b"<b ") 747 + || s.starts_with(b"<!--") 748 + } 749 + 750 + /// Check if bytes look like text (no NUL bytes or other binary indicators). 751 + fn is_text_content(bytes: &[u8]) -> bool { 752 + for &b in bytes { 753 + // NUL byte is a strong binary indicator 754 + if b == 0 { 755 + return false; 756 + } 757 + } 758 + true 705 759 } 706 760 707 761 // --------------------------------------------------------------------------- ··· 1433 1487 e.to_string(), 1434 1488 "cross-origin request blocked: http://evil.com/steal.js" 1435 1489 ); 1490 + } 1491 + 1492 + // ----------------------------------------------------------------------- 1493 + // Content sniffing 1494 + // ----------------------------------------------------------------------- 1495 + 1496 + #[test] 1497 + fn sniff_png() { 1498 + let bytes = [0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A, 0x00]; 1499 + assert_eq!(sniff_mime_type(&bytes), "image/png"); 1500 + } 1501 + 1502 + #[test] 1503 + fn sniff_jpeg() { 1504 + let bytes = [0xFF, 0xD8, 0xFF, 0xE0, 0x00, 0x10]; 1505 + assert_eq!(sniff_mime_type(&bytes), "image/jpeg"); 1506 + } 1507 + 1508 + #[test] 1509 + fn sniff_gif87a() { 1510 + assert_eq!(sniff_mime_type(b"GIF87a\x00\x00"), "image/gif"); 1511 + } 1512 + 1513 + #[test] 1514 + fn sniff_gif89a() { 1515 + assert_eq!(sniff_mime_type(b"GIF89a\x00\x00"), "image/gif"); 1516 + } 1517 + 1518 + #[test] 1519 + fn sniff_webp() { 1520 + let mut bytes = b"RIFF\x00\x00\x00\x00WEBP".to_vec(); 1521 + bytes.extend_from_slice(&[0; 10]); 1522 + assert_eq!(sniff_mime_type(&bytes), "image/webp"); 1523 + } 1524 + 1525 + #[test] 1526 + fn sniff_pdf() { 1527 + assert_eq!(sniff_mime_type(b"%PDF-1.5 ..."), "application/pdf"); 1528 + } 1529 + 1530 + #[test] 1531 + fn sniff_html_doctype() { 1532 + assert_eq!(sniff_mime_type(b"<!DOCTYPE html><html>"), "text/html"); 1533 + } 1534 + 1535 + #[test] 1536 + fn sniff_html_tag() { 1537 + assert_eq!( 1538 + sniff_mime_type(b"<html><body>Hello</body></html>"), 1539 + "text/html" 1540 + ); 1541 + } 1542 + 1543 + #[test] 1544 + fn sniff_html_with_whitespace() { 1545 + assert_eq!(sniff_mime_type(b" \n <html>"), "text/html"); 1546 + } 1547 + 1548 + #[test] 1549 + fn sniff_html_with_bom() { 1550 + assert_eq!(sniff_mime_type(b"\xEF\xBB\xBF<!DOCTYPE html>"), "text/html"); 1551 + } 1552 + 1553 + #[test] 1554 + fn sniff_html_comment() { 1555 + assert_eq!(sniff_mime_type(b"<!-- comment --><html>"), "text/html"); 1556 + } 1557 + 1558 + #[test] 1559 + fn sniff_plain_text() { 1560 + assert_eq!(sniff_mime_type(b"Hello, world!"), "text/plain"); 1561 + } 1562 + 1563 + #[test] 1564 + fn sniff_json_as_text() { 1565 + assert_eq!(sniff_mime_type(b"{\"key\": \"value\"}"), "text/plain"); 1566 + } 1567 + 1568 + #[test] 1569 + fn sniff_binary_with_nulls() { 1570 + let bytes = [0x00, 0x01, 0x02, 0x03, 0xFF]; 1571 + assert_eq!(sniff_mime_type(&bytes), "application/octet-stream"); 1572 + } 1573 + 1574 + #[test] 1575 + fn sniff_empty() { 1576 + assert_eq!(sniff_mime_type(b""), "text/plain"); 1577 + } 1578 + 1579 + #[test] 1580 + fn looks_like_html_case_insensitive() { 1581 + assert!(looks_like_html(b"<HTML>")); 1582 + assert!(looks_like_html(b"<Html>")); 1583 + assert!(looks_like_html(b"<BODY>")); 1584 + } 1585 + 1586 + #[test] 1587 + fn is_text_empty() { 1588 + assert!(is_text_content(b"")); 1589 + } 1590 + 1591 + #[test] 1592 + fn is_text_ascii() { 1593 + assert!(is_text_content(b"Hello, world!")); 1594 + } 1595 + 1596 + #[test] 1597 + fn is_not_text_with_nul() { 1598 + assert!(!is_text_content(b"Hello\x00World")); 1436 1599 } 1437 1600 }
+292 -40
crates/browser/src/main.rs
··· 563 563 eprintln!("[we] Form POST submission: {}", action_url.serialize()); 564 564 let mut loader = ResourceLoader::new(); 565 565 match loader.post_form(&action_url, &body, &content_type) { 566 - Ok(Resource::Html { text, base_url, .. }) => LoadedHtml { 567 - text, 568 - base_url, 569 - http_referrer_policy: None, 570 - http_csp: PolicyList::new(), 571 - }, 572 - Ok(_) => error_page("Form submission returned non-HTML response"), 566 + Ok(resource) => resource_to_loaded_html(resource, &loader), 573 567 Err(e) => error_page(&format!("Form submission failed: {e}")), 574 568 } 575 569 } ··· 606 600 fn load_from_url(url: &Url) -> LoadedHtml { 607 601 let mut loader = ResourceLoader::new(); 608 602 match loader.fetch_url(&url.serialize(), None) { 609 - Ok(Resource::Html { text, base_url, .. }) => { 610 - let http_policy = if loader.referrer_policy() != ReferrerPolicy::default() { 611 - Some(loader.referrer_policy()) 612 - } else { 613 - None 614 - }; 615 - let http_csp = loader.csp().clone(); 616 - LoadedHtml { 617 - text, 618 - base_url, 619 - http_referrer_policy: http_policy, 620 - http_csp, 621 - } 622 - } 623 - Ok(_) => error_page(&format!("URL did not return HTML: {}", url.serialize())), 603 + Ok(resource) => resource_to_loaded_html(resource, &loader), 624 604 Err(e) => error_page(&format!("Failed to load {}: {e}", url.serialize())), 625 605 } 626 606 } ··· 2000 1980 { 2001 1981 let mut loader = ResourceLoader::new(); 2002 1982 match loader.fetch_url(arg, None) { 2003 - Ok(Resource::Html { text, base_url, .. }) => { 2004 - // Capture referrer policy from the HTTP response (if loader updated it). 2005 - let http_policy = if loader.referrer_policy() != ReferrerPolicy::default() { 2006 - Some(loader.referrer_policy()) 2007 - } else { 2008 - None 2009 - }; 2010 - // Capture CSP policies from the HTTP response. 2011 - let http_csp = loader.csp().clone(); 2012 - return LoadedHtml { 2013 - text, 2014 - base_url, 2015 - http_referrer_policy: http_policy, 2016 - http_csp, 2017 - }; 2018 - } 2019 - Ok(_) => { 2020 - return error_page(&format!("URL did not return HTML: {arg}")); 1983 + Ok(resource) => { 1984 + return resource_to_loaded_html(resource, &loader); 2021 1985 } 2022 1986 Err(e) => { 2023 1987 return error_page(&format!("Failed to load {arg}: {e}")); ··· 2072 2036 } 2073 2037 } 2074 2038 2039 + /// Convert any `Resource` into a `LoadedHtml` suitable for the rendering pipeline. 2040 + /// 2041 + /// For HTML resources, returns directly. For other types, generates a synthetic 2042 + /// HTML page that displays the content appropriately: 2043 + /// - Plain text / CSS / JS / JSON / XML → monospace `<pre>` block 2044 + /// - Images → centered `<img>` display (image bytes stored as data URL) 2045 + /// - Unsupported binary → friendly "cannot display" message 2046 + fn resource_to_loaded_html(resource: Resource, loader: &ResourceLoader) -> LoadedHtml { 2047 + match resource { 2048 + Resource::Html { text, base_url, .. } => { 2049 + let http_policy = if loader.referrer_policy() != ReferrerPolicy::default() { 2050 + Some(loader.referrer_policy()) 2051 + } else { 2052 + None 2053 + }; 2054 + let http_csp = loader.csp().clone(); 2055 + LoadedHtml { 2056 + text, 2057 + base_url, 2058 + http_referrer_policy: http_policy, 2059 + http_csp, 2060 + } 2061 + } 2062 + Resource::Script { text, url } => plain_text_page(&text, &url, "text/javascript"), 2063 + Resource::Css { text, url } => plain_text_page(&text, &url, "text/css"), 2064 + Resource::Image { 2065 + data, 2066 + mime_type, 2067 + url, 2068 + } => image_page(&data, &mime_type, &url), 2069 + Resource::Other { 2070 + data, 2071 + mime_type, 2072 + url, 2073 + } => { 2074 + if mime_type.starts_with("text/") 2075 + || mime_type == "application/json" 2076 + || mime_type == "application/xml" 2077 + || mime_type == "application/javascript" 2078 + { 2079 + // Displayable as text 2080 + let text = String::from_utf8_lossy(&data); 2081 + plain_text_page(&text, &url, &mime_type) 2082 + } else { 2083 + unsupported_page(&mime_type, &url) 2084 + } 2085 + } 2086 + } 2087 + } 2088 + 2089 + /// Generate a page that displays plain text in a monospace `<pre>` block. 2090 + fn plain_text_page(text: &str, url: &Url, mime_type: &str) -> LoadedHtml { 2091 + let url_str = url.serialize(); 2092 + let title = extract_filename(&url_str); 2093 + let escaped = text 2094 + .replace('&', "&amp;") 2095 + .replace('<', "&lt;") 2096 + .replace('>', "&gt;"); 2097 + let html = format!( 2098 + "<!DOCTYPE html>\ 2099 + <html><head><title>{title}</title>\ 2100 + <style>\ 2101 + body {{ margin: 0; padding: 16px; background: #fff; }}\ 2102 + .header {{ font-family: sans-serif; font-size: 13px; color: #666; \ 2103 + padding: 8px 16px; margin: -16px -16px 16px -16px; \ 2104 + background: #f5f5f5; border-bottom: 1px solid #ddd; }}\ 2105 + pre {{ font-family: monospace; font-size: 14px; white-space: pre-wrap; \ 2106 + word-wrap: break-word; margin: 0; }}\ 2107 + </style></head>\ 2108 + <body><div class=\"header\">{mime_type}</div><pre>{escaped}</pre></body></html>" 2109 + ); 2110 + LoadedHtml { 2111 + text: html, 2112 + base_url: url.clone(), 2113 + http_referrer_policy: None, 2114 + http_csp: PolicyList::new(), 2115 + } 2116 + } 2117 + 2118 + /// Generate a page that displays an image centered in the viewport. 2119 + fn image_page(data: &[u8], mime_type: &str, url: &Url) -> LoadedHtml { 2120 + use we_url::data_url::base64_encode; 2121 + let url_str = url.serialize(); 2122 + let title = extract_filename(&url_str); 2123 + let b64 = base64_encode(data); 2124 + let data_url = format!("data:{mime_type};base64,{b64}"); 2125 + let html = format!( 2126 + "<!DOCTYPE html>\ 2127 + <html><head><title>{title}</title>\ 2128 + <style>\ 2129 + body {{ margin: 0; display: flex; justify-content: center; \ 2130 + align-items: center; min-height: 100vh; background: #f0f0f0; }}\ 2131 + img {{ max-width: 100%; max-height: 100vh; }}\ 2132 + </style></head>\ 2133 + <body><img src=\"{data_url}\"></body></html>" 2134 + ); 2135 + LoadedHtml { 2136 + text: html, 2137 + base_url: url.clone(), 2138 + http_referrer_policy: None, 2139 + http_csp: PolicyList::new(), 2140 + } 2141 + } 2142 + 2143 + /// Generate a page indicating the content type cannot be displayed. 2144 + fn unsupported_page(mime_type: &str, url: &Url) -> LoadedHtml { 2145 + let url_str = url.serialize(); 2146 + let escaped_url = url_str 2147 + .replace('&', "&amp;") 2148 + .replace('<', "&lt;") 2149 + .replace('>', "&gt;"); 2150 + let html = format!( 2151 + "<!DOCTYPE html>\ 2152 + <html><head><title>Unsupported Content</title>\ 2153 + <style>\ 2154 + body {{ font-family: sans-serif; margin: 40px; color: #333; \ 2155 + text-align: center; }}\ 2156 + h1 {{ color: #666; }}\ 2157 + p {{ font-size: 16px; }}\ 2158 + .mime {{ font-family: monospace; background: #f0f0f0; \ 2159 + padding: 4px 8px; border-radius: 4px; }}\ 2160 + </style></head>\ 2161 + <body><h1>Cannot Display This Content</h1>\ 2162 + <p>The resource at <br>{escaped_url}</p>\ 2163 + <p>has content type <span class=\"mime\">{mime_type}</span> \ 2164 + which cannot be displayed in the browser.</p></body></html>" 2165 + ); 2166 + LoadedHtml { 2167 + text: html, 2168 + base_url: url.clone(), 2169 + http_referrer_policy: None, 2170 + http_csp: PolicyList::new(), 2171 + } 2172 + } 2173 + 2174 + /// Extract the filename portion from a URL string for use as a page title. 2175 + fn extract_filename(url_str: &str) -> String { 2176 + // Strip query and fragment 2177 + let path = url_str.split('?').next().unwrap_or(url_str); 2178 + let path = path.split('#').next().unwrap_or(path); 2179 + // Take the last path component 2180 + match path.rsplit('/').next() { 2181 + Some(name) if !name.is_empty() => name.to_string(), 2182 + _ => url_str.to_string(), 2183 + } 2184 + } 2185 + 2075 2186 /// Extract the referrer policy from `<meta name="referrer" content="...">` in the DOM. 2076 2187 fn extract_meta_referrer_policy(doc: &Document) -> Option<ReferrerPolicy> { 2077 2188 let mut result = None; ··· 2401 2512 fn fragment_href_is_skipped() { 2402 2513 let href = "#section"; 2403 2514 assert!(href.starts_with('#')); 2515 + } 2516 + 2517 + // ----------------------------------------------------------------------- 2518 + // extract_filename tests 2519 + // ----------------------------------------------------------------------- 2520 + 2521 + #[test] 2522 + fn extract_filename_simple_path() { 2523 + assert_eq!(extract_filename("https://example.com/file.txt"), "file.txt"); 2524 + } 2525 + 2526 + #[test] 2527 + fn extract_filename_with_query() { 2528 + assert_eq!( 2529 + extract_filename("https://example.com/image.png?size=large"), 2530 + "image.png" 2531 + ); 2532 + } 2533 + 2534 + #[test] 2535 + fn extract_filename_with_fragment() { 2536 + assert_eq!( 2537 + extract_filename("https://example.com/doc.html#section"), 2538 + "doc.html" 2539 + ); 2540 + } 2541 + 2542 + #[test] 2543 + fn extract_filename_trailing_slash() { 2544 + assert_eq!( 2545 + extract_filename("https://example.com/"), 2546 + "https://example.com/" 2547 + ); 2548 + } 2549 + 2550 + #[test] 2551 + fn extract_filename_no_path() { 2552 + assert_eq!(extract_filename("https://example.com"), "example.com"); 2553 + } 2554 + 2555 + // ----------------------------------------------------------------------- 2556 + // Content-type routing tests 2557 + // ----------------------------------------------------------------------- 2558 + 2559 + #[test] 2560 + fn resource_to_loaded_html_handles_html() { 2561 + let loader = ResourceLoader::new(); 2562 + let resource = Resource::Html { 2563 + text: "<html><body>Hello</body></html>".to_string(), 2564 + base_url: Url::parse("https://example.com").unwrap(), 2565 + encoding: we_encoding::Encoding::Utf8, 2566 + }; 2567 + let loaded = resource_to_loaded_html(resource, &loader); 2568 + assert!(loaded.text.contains("Hello")); 2569 + assert_eq!(loaded.base_url.serialize(), "https://example.com/"); 2570 + } 2571 + 2572 + #[test] 2573 + fn resource_to_loaded_html_handles_plain_text() { 2574 + let loader = ResourceLoader::new(); 2575 + let resource = Resource::Other { 2576 + data: b"Hello, world!".to_vec(), 2577 + mime_type: "text/plain".to_string(), 2578 + url: Url::parse("https://example.com/hello.txt").unwrap(), 2579 + }; 2580 + let loaded = resource_to_loaded_html(resource, &loader); 2581 + assert!(loaded.text.contains("<pre>")); 2582 + assert!(loaded.text.contains("Hello, world!")); 2583 + assert!(loaded.text.contains("text/plain")); 2584 + } 2585 + 2586 + #[test] 2587 + fn resource_to_loaded_html_handles_script() { 2588 + let loader = ResourceLoader::new(); 2589 + let resource = Resource::Script { 2590 + text: "console.log('hello')".to_string(), 2591 + url: Url::parse("https://example.com/app.js").unwrap(), 2592 + }; 2593 + let loaded = resource_to_loaded_html(resource, &loader); 2594 + assert!(loaded.text.contains("<pre>")); 2595 + assert!(loaded.text.contains("console.log")); 2596 + assert!(loaded.text.contains("text/javascript")); 2597 + } 2598 + 2599 + #[test] 2600 + fn resource_to_loaded_html_handles_css() { 2601 + let loader = ResourceLoader::new(); 2602 + let resource = Resource::Css { 2603 + text: "body { color: red; }".to_string(), 2604 + url: Url::parse("https://example.com/style.css").unwrap(), 2605 + }; 2606 + let loaded = resource_to_loaded_html(resource, &loader); 2607 + assert!(loaded.text.contains("<pre>")); 2608 + assert!(loaded.text.contains("body { color: red; }")); 2609 + assert!(loaded.text.contains("text/css")); 2610 + } 2611 + 2612 + #[test] 2613 + fn resource_to_loaded_html_handles_json() { 2614 + let loader = ResourceLoader::new(); 2615 + let resource = Resource::Other { 2616 + data: b"{\"key\": \"value\"}".to_vec(), 2617 + mime_type: "application/json".to_string(), 2618 + url: Url::parse("https://example.com/data.json").unwrap(), 2619 + }; 2620 + let loaded = resource_to_loaded_html(resource, &loader); 2621 + assert!(loaded.text.contains("<pre>")); 2622 + } 2623 + 2624 + #[test] 2625 + fn resource_to_loaded_html_handles_image() { 2626 + let loader = ResourceLoader::new(); 2627 + let resource = Resource::Image { 2628 + data: vec![0x89, 0x50, 0x4E, 0x47], 2629 + mime_type: "image/png".to_string(), 2630 + url: Url::parse("https://example.com/photo.png").unwrap(), 2631 + }; 2632 + let loaded = resource_to_loaded_html(resource, &loader); 2633 + assert!(loaded.text.contains("<img src=\"data:image/png;base64,")); 2634 + assert!(loaded.text.contains("photo.png")); 2635 + } 2636 + 2637 + #[test] 2638 + fn resource_to_loaded_html_handles_unsupported() { 2639 + let loader = ResourceLoader::new(); 2640 + let resource = Resource::Other { 2641 + data: vec![0x00, 0x01, 0x02], 2642 + mime_type: "application/octet-stream".to_string(), 2643 + url: Url::parse("https://example.com/data.bin").unwrap(), 2644 + }; 2645 + let loaded = resource_to_loaded_html(resource, &loader); 2646 + assert!(loaded.text.contains("Cannot Display This Content")); 2647 + assert!(loaded.text.contains("application/octet-stream")); 2648 + } 2649 + 2650 + #[test] 2651 + fn plain_text_escapes_html() { 2652 + let url = Url::parse("https://example.com/test.txt").unwrap(); 2653 + let loaded = plain_text_page("<script>alert('xss')</script>", &url, "text/plain"); 2654 + assert!(loaded.text.contains("&lt;script&gt;")); 2655 + assert!(!loaded.text.contains("<script>alert")); 2404 2656 } 2405 2657 }
+85
crates/url/src/data_url.rs
··· 199 199 Ok(result) 200 200 } 201 201 202 + /// Encode bytes to a base64 string (standard alphabet, RFC 4648). 203 + pub fn base64_encode(bytes: &[u8]) -> String { 204 + const ALPHABET: &[u8; 64] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; 205 + 206 + let mut result = String::with_capacity(bytes.len().div_ceil(3) * 4); 207 + 208 + for chunk in bytes.chunks(3) { 209 + let b0 = chunk[0]; 210 + let b1 = if chunk.len() > 1 { chunk[1] } else { 0 }; 211 + let b2 = if chunk.len() > 2 { chunk[2] } else { 0 }; 212 + 213 + result.push(ALPHABET[(b0 >> 2) as usize] as char); 214 + result.push(ALPHABET[((b0 & 0x03) << 4 | b1 >> 4) as usize] as char); 215 + 216 + if chunk.len() > 1 { 217 + result.push(ALPHABET[((b1 & 0x0F) << 2 | b2 >> 6) as usize] as char); 218 + } else { 219 + result.push('='); 220 + } 221 + 222 + if chunk.len() > 2 { 223 + result.push(ALPHABET[(b2 & 0x3F) as usize] as char); 224 + } else { 225 + result.push('='); 226 + } 227 + } 228 + 229 + result 230 + } 231 + 202 232 /// Base64 decoding error. 203 233 #[derive(Debug, Clone, PartialEq, Eq)] 204 234 pub enum Base64Error { ··· 316 346 fn base64_binary_data() { 317 347 // Raw bytes [0xFF, 0x00, 0xAA] 318 348 assert_eq!(base64_decode("/wCq").unwrap(), vec![0xFF, 0x00, 0xAA]); 349 + } 350 + 351 + // ----------------------------------------------------------------------- 352 + // Base64 encoding 353 + // ----------------------------------------------------------------------- 354 + 355 + #[test] 356 + fn base64_encode_empty() { 357 + assert_eq!(base64_encode(b""), ""); 358 + } 359 + 360 + #[test] 361 + fn base64_encode_hello() { 362 + assert_eq!(base64_encode(b"Hello"), "SGVsbG8="); 363 + } 364 + 365 + #[test] 366 + fn base64_encode_hello_world() { 367 + assert_eq!(base64_encode(b"Hello World"), "SGVsbG8gV29ybGQ="); 368 + } 369 + 370 + #[test] 371 + fn base64_encode_one_byte() { 372 + assert_eq!(base64_encode(b"a"), "YQ=="); 373 + } 374 + 375 + #[test] 376 + fn base64_encode_two_bytes() { 377 + assert_eq!(base64_encode(b"ab"), "YWI="); 378 + } 379 + 380 + #[test] 381 + fn base64_encode_three_bytes() { 382 + assert_eq!(base64_encode(b"abc"), "YWJj"); 383 + } 384 + 385 + #[test] 386 + fn base64_encode_binary() { 387 + assert_eq!(base64_encode(&[0xFF, 0x00, 0xAA]), "/wCq"); 388 + } 389 + 390 + #[test] 391 + fn base64_roundtrip() { 392 + let data = b"The quick brown fox jumps over the lazy dog"; 393 + let encoded = base64_encode(data); 394 + let decoded = base64_decode(&encoded).unwrap(); 395 + assert_eq!(decoded, data); 396 + } 397 + 398 + #[test] 399 + fn base64_roundtrip_binary() { 400 + let data: Vec<u8> = (0..=255).collect(); 401 + let encoded = base64_encode(&data); 402 + let decoded = base64_decode(&encoded).unwrap(); 403 + assert_eq!(decoded, data); 319 404 } 320 405 321 406 // -----------------------------------------------------------------------