we (web engine): Experimental web browser project to understand the limits of Claude
2
fork

Configure Feed

Select the types of activity you want to include in your feed.

Implement data URL parsing and loading (RFC 2397)

Add data_url module to the url crate:
- Base64 decoder (RFC 4648): standard alphabet, whitespace stripping, padding
- Data URL parser: extract MIME type, charset, base64/percent-encoded payload
- Default MIME type text/plain;charset=US-ASCII when omitted

Integrate with ResourceLoader in browser crate:
- Handle data: URLs in both fetch() and fetch_url() without network access
- Classify decoded content into Html/Css/Image/Other resource types
- Decode text resources using charset from data URL parameters

44 tests covering base64 decoding, data URL parsing, MIME extraction,
percent decoding, and ResourceLoader integration.

Implements issue 3mhkt7m45472x

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

+684
+195
crates/browser/src/loader.rs
··· 10 10 use we_encoding::Encoding; 11 11 use we_net::client::{ClientError, HttpClient}; 12 12 use we_net::http::ContentType; 13 + use we_url::data_url::{is_data_url, parse_data_url}; 13 14 use we_url::Url; 14 15 15 16 // --------------------------------------------------------------------------- ··· 99 100 /// Determines the resource type from the HTTP Content-Type header, decodes 100 101 /// text resources using the appropriate character encoding (per WHATWG spec), 101 102 /// and returns the result as a typed `Resource`. 103 + /// 104 + /// Handles `data:` URLs locally without network access. 102 105 pub fn fetch(&mut self, url: &Url) -> Result<Resource, LoadError> { 106 + // Handle data: URLs without network fetch. 107 + if url.scheme() == "data" { 108 + return fetch_data_url(&url.serialize()); 109 + } 110 + 103 111 let response = self.client.get(url)?; 104 112 105 113 // Check for HTTP error status codes ··· 161 169 } 162 170 163 171 /// Fetch a URL string, resolving it against an optional base URL. 172 + /// 173 + /// Handles `data:` URLs locally without network access. 164 174 pub fn fetch_url(&mut self, url_str: &str, base: Option<&Url>) -> Result<Resource, LoadError> { 175 + // Handle data URLs directly — no network fetch needed. 176 + if is_data_url(url_str) { 177 + return fetch_data_url(url_str); 178 + } 179 + 165 180 let url = match base { 166 181 Some(base_url) => Url::parse_with_base(url_str, base_url) 167 182 .or_else(|_| Url::parse(url_str)) ··· 260 275 } 261 276 262 277 // --------------------------------------------------------------------------- 278 + // Data URL handling 279 + // --------------------------------------------------------------------------- 280 + 281 + /// Fetch a data URL, decoding its payload and returning the appropriate Resource type. 282 + fn fetch_data_url(url_str: &str) -> Result<Resource, LoadError> { 283 + let parsed = parse_data_url(url_str) 284 + .map_err(|e| LoadError::InvalidUrl(format!("data URL error: {e}")))?; 285 + 286 + let mime = &parsed.mime_type; 287 + 288 + // Create a synthetic Url for the resource metadata. 289 + let url = Url::parse(url_str).map_err(|_| LoadError::InvalidUrl(url_str.to_string()))?; 290 + 291 + match classify_mime(mime) { 292 + MimeClass::Html => { 293 + let encoding = charset_to_encoding(parsed.charset.as_deref()); 294 + let text = we_encoding::decode(&parsed.data, encoding); 295 + Ok(Resource::Html { 296 + text, 297 + base_url: url, 298 + encoding, 299 + }) 300 + } 301 + MimeClass::Css => { 302 + let encoding = charset_to_encoding(parsed.charset.as_deref()); 303 + let text = we_encoding::decode(&parsed.data, encoding); 304 + Ok(Resource::Css { text, url }) 305 + } 306 + MimeClass::Image => Ok(Resource::Image { 307 + data: parsed.data, 308 + mime_type: mime.to_string(), 309 + url, 310 + }), 311 + MimeClass::Other => { 312 + if mime.starts_with("text/") { 313 + let encoding = charset_to_encoding(parsed.charset.as_deref()); 314 + let text = we_encoding::decode(&parsed.data, encoding); 315 + Ok(Resource::Other { 316 + data: text.into_bytes(), 317 + mime_type: mime.to_string(), 318 + url, 319 + }) 320 + } else { 321 + Ok(Resource::Other { 322 + data: parsed.data, 323 + mime_type: mime.to_string(), 324 + url, 325 + }) 326 + } 327 + } 328 + } 329 + } 330 + 331 + /// Map a charset name to an Encoding, defaulting to UTF-8. 332 + fn charset_to_encoding(charset: Option<&str>) -> Encoding { 333 + charset 334 + .and_then(we_encoding::lookup) 335 + .unwrap_or(Encoding::Utf8) 336 + } 337 + 338 + // --------------------------------------------------------------------------- 263 339 // Tests 264 340 // --------------------------------------------------------------------------- 265 341 ··· 519 595 assert!(result.is_err()); 520 596 // The error should NOT be InvalidUrl — the URL resolved successfully. 521 597 assert!(!matches!(result, Err(LoadError::InvalidUrl(_)))); 598 + } 599 + 600 + // ----------------------------------------------------------------------- 601 + // Data URL loading 602 + // ----------------------------------------------------------------------- 603 + 604 + #[test] 605 + fn data_url_plain_text() { 606 + let mut loader = ResourceLoader::new(); 607 + let result = loader.fetch_url("data:text/plain,Hello%20World", None); 608 + assert!(result.is_ok()); 609 + match result.unwrap() { 610 + Resource::Other { 611 + data, mime_type, .. 612 + } => { 613 + assert_eq!(mime_type, "text/plain"); 614 + assert_eq!(String::from_utf8(data).unwrap(), "Hello World"); 615 + } 616 + other => panic!("expected Other, got {:?}", other), 617 + } 618 + } 619 + 620 + #[test] 621 + fn data_url_html() { 622 + let mut loader = ResourceLoader::new(); 623 + let result = loader.fetch_url("data:text/html,<h1>Hello</h1>", None); 624 + assert!(result.is_ok()); 625 + match result.unwrap() { 626 + Resource::Html { text, .. } => { 627 + assert_eq!(text, "<h1>Hello</h1>"); 628 + } 629 + other => panic!("expected Html, got {:?}", other), 630 + } 631 + } 632 + 633 + #[test] 634 + fn data_url_css() { 635 + let mut loader = ResourceLoader::new(); 636 + let result = loader.fetch_url("data:text/css,body{color:red}", None); 637 + assert!(result.is_ok()); 638 + match result.unwrap() { 639 + Resource::Css { text, .. } => { 640 + assert_eq!(text, "body{color:red}"); 641 + } 642 + other => panic!("expected Css, got {:?}", other), 643 + } 644 + } 645 + 646 + #[test] 647 + fn data_url_image() { 648 + let mut loader = ResourceLoader::new(); 649 + let result = loader.fetch_url("data:image/png;base64,/wCq", None); 650 + assert!(result.is_ok()); 651 + match result.unwrap() { 652 + Resource::Image { 653 + data, mime_type, .. 654 + } => { 655 + assert_eq!(mime_type, "image/png"); 656 + assert_eq!(data, vec![0xFF, 0x00, 0xAA]); 657 + } 658 + other => panic!("expected Image, got {:?}", other), 659 + } 660 + } 661 + 662 + #[test] 663 + fn data_url_base64() { 664 + let mut loader = ResourceLoader::new(); 665 + let result = loader.fetch_url("data:text/plain;base64,SGVsbG8=", None); 666 + assert!(result.is_ok()); 667 + match result.unwrap() { 668 + Resource::Other { data, .. } => { 669 + assert_eq!(String::from_utf8(data).unwrap(), "Hello"); 670 + } 671 + other => panic!("expected Other, got {:?}", other), 672 + } 673 + } 674 + 675 + #[test] 676 + fn data_url_empty() { 677 + let mut loader = ResourceLoader::new(); 678 + let result = loader.fetch_url("data:,", None); 679 + assert!(result.is_ok()); 680 + } 681 + 682 + #[test] 683 + fn data_url_via_fetch_method() { 684 + let mut loader = ResourceLoader::new(); 685 + let url = Url::parse("data:text/plain,Hello").unwrap(); 686 + let result = loader.fetch(&url); 687 + assert!(result.is_ok()); 688 + match result.unwrap() { 689 + Resource::Other { data, .. } => { 690 + assert_eq!(String::from_utf8(data).unwrap(), "Hello"); 691 + } 692 + other => panic!("expected Other, got {:?}", other), 693 + } 694 + } 695 + 696 + #[test] 697 + fn data_url_invalid() { 698 + let mut loader = ResourceLoader::new(); 699 + let result = loader.fetch_url("data:text/plain", None); 700 + assert!(matches!(result, Err(LoadError::InvalidUrl(_)))); 701 + } 702 + 703 + #[test] 704 + fn data_url_binary() { 705 + let mut loader = ResourceLoader::new(); 706 + let result = loader.fetch_url("data:application/octet-stream;base64,/wCq", None); 707 + assert!(result.is_ok()); 708 + match result.unwrap() { 709 + Resource::Other { 710 + data, mime_type, .. 711 + } => { 712 + assert_eq!(mime_type, "application/octet-stream"); 713 + assert_eq!(data, vec![0xFF, 0x00, 0xAA]); 714 + } 715 + other => panic!("expected Other, got {:?}", other), 716 + } 522 717 } 523 718 }
+487
crates/url/src/data_url.rs
··· 1 + //! Data URL parsing per RFC 2397. 2 + //! 3 + //! Parses `data:[<mediatype>][;base64],<data>` URLs into their components: 4 + //! MIME type, optional charset, and decoded payload. 5 + 6 + /// A parsed data URL. 7 + #[derive(Debug, Clone, PartialEq, Eq)] 8 + pub struct DataUrl { 9 + /// The MIME type (e.g., `text/plain`, `image/png`). 10 + pub mime_type: String, 11 + /// Optional charset parameter from the MIME type. 12 + pub charset: Option<String>, 13 + /// The decoded payload bytes. 14 + pub data: Vec<u8>, 15 + } 16 + 17 + /// Errors from parsing a data URL. 18 + #[derive(Debug, Clone, PartialEq, Eq)] 19 + pub enum DataUrlError { 20 + /// Input does not start with `data:`. 21 + NotDataUrl, 22 + /// Missing comma separator between metadata and data. 23 + MissingComma, 24 + /// Base64 payload is malformed. 25 + InvalidBase64, 26 + } 27 + 28 + impl core::fmt::Display for DataUrlError { 29 + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { 30 + match self { 31 + Self::NotDataUrl => write!(f, "not a data URL"), 32 + Self::MissingComma => write!(f, "data URL missing comma separator"), 33 + Self::InvalidBase64 => write!(f, "invalid base64 in data URL"), 34 + } 35 + } 36 + } 37 + 38 + /// Parse a data URL string into its components. 39 + /// 40 + /// Format: `data:[<mediatype>][;base64],<data>` 41 + /// 42 + /// If the media type is omitted, defaults to `text/plain;charset=US-ASCII`. 43 + /// The data portion is either base64-decoded or percent-decoded depending on 44 + /// whether `;base64` is present in the metadata. 45 + pub fn parse_data_url(url: &str) -> Result<DataUrl, DataUrlError> { 46 + // Must start with "data:" 47 + let rest = url.strip_prefix("data:").ok_or(DataUrlError::NotDataUrl)?; 48 + 49 + // Find the comma that separates metadata from data. 50 + let comma_pos = rest.find(',').ok_or(DataUrlError::MissingComma)?; 51 + 52 + let metadata = &rest[..comma_pos]; 53 + let payload = &rest[comma_pos + 1..]; 54 + 55 + // Check for ;base64 flag. 56 + let (metadata, is_base64) = if let Some(meta) = metadata.strip_suffix(";base64") { 57 + (meta, true) 58 + } else { 59 + (metadata, false) 60 + }; 61 + 62 + // Parse MIME type and charset. 63 + let (mime_type, charset) = parse_mime_type(metadata); 64 + 65 + // Decode the payload. 66 + let data = if is_base64 { 67 + base64_decode(payload).map_err(|_| DataUrlError::InvalidBase64)? 68 + } else { 69 + percent_decode_bytes(payload) 70 + }; 71 + 72 + Ok(DataUrl { 73 + mime_type, 74 + charset, 75 + data, 76 + }) 77 + } 78 + 79 + /// Returns true if the URL string starts with `data:`. 80 + pub fn is_data_url(url: &str) -> bool { 81 + url.starts_with("data:") 82 + } 83 + 84 + /// Parse the MIME type portion of a data URL's metadata. 85 + /// 86 + /// Returns (mime_type, optional_charset). If metadata is empty, 87 + /// defaults to `text/plain` with charset `US-ASCII`. 88 + fn parse_mime_type(metadata: &str) -> (String, Option<String>) { 89 + if metadata.is_empty() { 90 + return ("text/plain".to_string(), Some("US-ASCII".to_string())); 91 + } 92 + 93 + // Split on ';' to separate MIME type from parameters. 94 + let mut parts = metadata.splitn(2, ';'); 95 + let mime = parts.next().unwrap_or("").trim(); 96 + let params = parts.next().unwrap_or(""); 97 + 98 + let mime_type = if mime.is_empty() { 99 + "text/plain".to_string() 100 + } else { 101 + mime.to_ascii_lowercase() 102 + }; 103 + 104 + // Extract charset from parameters if present. 105 + let charset = extract_charset(params); 106 + 107 + (mime_type, charset) 108 + } 109 + 110 + /// Extract `charset=VALUE` from a parameter string. 111 + fn extract_charset(params: &str) -> Option<String> { 112 + for param in params.split(';') { 113 + let param = param.trim(); 114 + if let Some(value) = param.strip_prefix("charset=") { 115 + return Some(value.trim().to_string()); 116 + } 117 + } 118 + None 119 + } 120 + 121 + /// Percent-decode a string into raw bytes. 122 + fn percent_decode_bytes(input: &str) -> Vec<u8> { 123 + let bytes = input.as_bytes(); 124 + let mut result = Vec::with_capacity(bytes.len()); 125 + let mut i = 0; 126 + 127 + while i < bytes.len() { 128 + if bytes[i] == b'%' && i + 2 < bytes.len() { 129 + if let (Some(hi), Some(lo)) = (hex_val(bytes[i + 1]), hex_val(bytes[i + 2])) { 130 + result.push(hi << 4 | lo); 131 + i += 3; 132 + continue; 133 + } 134 + } 135 + result.push(bytes[i]); 136 + i += 1; 137 + } 138 + 139 + result 140 + } 141 + 142 + fn hex_val(b: u8) -> Option<u8> { 143 + match b { 144 + b'0'..=b'9' => Some(b - b'0'), 145 + b'a'..=b'f' => Some(b - b'a' + 10), 146 + b'A'..=b'F' => Some(b - b'A' + 10), 147 + _ => None, 148 + } 149 + } 150 + 151 + // --------------------------------------------------------------------------- 152 + // Base64 decoder (RFC 4648) 153 + // --------------------------------------------------------------------------- 154 + 155 + /// Decode a base64-encoded string (standard alphabet, RFC 4648). 156 + /// 157 + /// Ignores ASCII whitespace. Handles padding with `=`. 158 + pub fn base64_decode(input: &str) -> Result<Vec<u8>, Base64Error> { 159 + // Strip whitespace. 160 + let clean: Vec<u8> = input 161 + .bytes() 162 + .filter(|&b| !b.is_ascii_whitespace()) 163 + .collect(); 164 + 165 + if clean.is_empty() { 166 + return Ok(Vec::new()); 167 + } 168 + 169 + // Length after stripping must be a multiple of 4. 170 + if !clean.len().is_multiple_of(4) { 171 + return Err(Base64Error::InvalidLength); 172 + } 173 + 174 + let mut result = Vec::with_capacity(clean.len() * 3 / 4); 175 + 176 + for chunk in clean.chunks_exact(4) { 177 + let a = base64_val(chunk[0])?; 178 + let b = base64_val(chunk[1])?; 179 + 180 + // First byte is always present. 181 + result.push((a << 2) | (b >> 4)); 182 + 183 + if chunk[2] == b'=' { 184 + // Two padding chars — one output byte. 185 + if chunk[3] != b'=' { 186 + return Err(Base64Error::InvalidPadding); 187 + } 188 + } else { 189 + let c = base64_val(chunk[2])?; 190 + result.push((b << 4) | (c >> 2)); 191 + 192 + if chunk[3] != b'=' { 193 + let d = base64_val(chunk[3])?; 194 + result.push((c << 6) | d); 195 + } 196 + } 197 + } 198 + 199 + Ok(result) 200 + } 201 + 202 + /// Base64 decoding error. 203 + #[derive(Debug, Clone, PartialEq, Eq)] 204 + pub enum Base64Error { 205 + /// Invalid character in input. 206 + InvalidCharacter(u8), 207 + /// Input length is not a multiple of 4. 208 + InvalidLength, 209 + /// Invalid padding. 210 + InvalidPadding, 211 + } 212 + 213 + impl core::fmt::Display for Base64Error { 214 + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { 215 + match self { 216 + Self::InvalidCharacter(c) => write!(f, "invalid base64 character: 0x{c:02X}"), 217 + Self::InvalidLength => write!(f, "invalid base64 length"), 218 + Self::InvalidPadding => write!(f, "invalid base64 padding"), 219 + } 220 + } 221 + } 222 + 223 + fn base64_val(b: u8) -> Result<u8, Base64Error> { 224 + match b { 225 + b'A'..=b'Z' => Ok(b - b'A'), 226 + b'a'..=b'z' => Ok(b - b'a' + 26), 227 + b'0'..=b'9' => Ok(b - b'0' + 52), 228 + b'+' => Ok(62), 229 + b'/' => Ok(63), 230 + _ => Err(Base64Error::InvalidCharacter(b)), 231 + } 232 + } 233 + 234 + // --------------------------------------------------------------------------- 235 + // Tests 236 + // --------------------------------------------------------------------------- 237 + 238 + #[cfg(test)] 239 + mod tests { 240 + use super::*; 241 + 242 + // ----------------------------------------------------------------------- 243 + // Base64 decoding 244 + // ----------------------------------------------------------------------- 245 + 246 + #[test] 247 + fn base64_empty() { 248 + assert_eq!(base64_decode("").unwrap(), b""); 249 + } 250 + 251 + #[test] 252 + fn base64_hello() { 253 + assert_eq!(base64_decode("SGVsbG8=").unwrap(), b"Hello"); 254 + } 255 + 256 + #[test] 257 + fn base64_hello_world() { 258 + assert_eq!(base64_decode("SGVsbG8gV29ybGQ=").unwrap(), b"Hello World"); 259 + } 260 + 261 + #[test] 262 + fn base64_no_padding() { 263 + assert_eq!(base64_decode("YWJj").unwrap(), b"abc"); 264 + } 265 + 266 + #[test] 267 + fn base64_one_pad() { 268 + assert_eq!(base64_decode("YWI=").unwrap(), b"ab"); 269 + } 270 + 271 + #[test] 272 + fn base64_two_pad() { 273 + assert_eq!(base64_decode("YQ==").unwrap(), b"a"); 274 + } 275 + 276 + #[test] 277 + fn base64_with_whitespace() { 278 + assert_eq!(base64_decode("SGVs\nbG8=").unwrap(), b"Hello"); 279 + } 280 + 281 + #[test] 282 + fn base64_all_chars() { 283 + // Encode bytes 0..63 using standard alphabet. 284 + let encoded = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; 285 + let decoded = base64_decode(encoded).unwrap(); 286 + assert_eq!(decoded.len(), 48); 287 + // First byte: A(0)<<2 | B(1)>>4 = 0 288 + assert_eq!(decoded[0], 0x00); 289 + } 290 + 291 + #[test] 292 + fn base64_invalid_char() { 293 + assert!(matches!( 294 + base64_decode("SGV!bG8="), 295 + Err(Base64Error::InvalidCharacter(b'!')) 296 + )); 297 + } 298 + 299 + #[test] 300 + fn base64_invalid_length() { 301 + assert!(matches!( 302 + base64_decode("SGVsb"), 303 + Err(Base64Error::InvalidLength) 304 + )); 305 + } 306 + 307 + #[test] 308 + fn base64_invalid_padding() { 309 + assert!(matches!( 310 + base64_decode("SG=b"), 311 + Err(Base64Error::InvalidPadding) 312 + )); 313 + } 314 + 315 + #[test] 316 + fn base64_binary_data() { 317 + // Raw bytes [0xFF, 0x00, 0xAA] 318 + assert_eq!(base64_decode("/wCq").unwrap(), vec![0xFF, 0x00, 0xAA]); 319 + } 320 + 321 + // ----------------------------------------------------------------------- 322 + // Data URL parsing 323 + // ----------------------------------------------------------------------- 324 + 325 + #[test] 326 + fn data_url_plain_text() { 327 + let result = parse_data_url("data:,Hello%20World").unwrap(); 328 + assert_eq!(result.mime_type, "text/plain"); 329 + assert_eq!(result.charset, Some("US-ASCII".to_string())); 330 + assert_eq!(result.data, b"Hello World"); 331 + } 332 + 333 + #[test] 334 + fn data_url_explicit_mime() { 335 + let result = parse_data_url("data:text/html,<h1>Hello</h1>").unwrap(); 336 + assert_eq!(result.mime_type, "text/html"); 337 + assert_eq!(result.charset, None); 338 + assert_eq!(result.data, b"<h1>Hello</h1>"); 339 + } 340 + 341 + #[test] 342 + fn data_url_with_charset() { 343 + let result = parse_data_url("data:text/plain;charset=utf-8,Hello").unwrap(); 344 + assert_eq!(result.mime_type, "text/plain"); 345 + assert_eq!(result.charset, Some("utf-8".to_string())); 346 + assert_eq!(result.data, b"Hello"); 347 + } 348 + 349 + #[test] 350 + fn data_url_base64() { 351 + let result = parse_data_url("data:text/plain;base64,SGVsbG8=").unwrap(); 352 + assert_eq!(result.mime_type, "text/plain"); 353 + assert_eq!(result.data, b"Hello"); 354 + } 355 + 356 + #[test] 357 + fn data_url_base64_image() { 358 + // Minimal data: 3 bytes as base64. 359 + let result = parse_data_url("data:image/png;base64,/wCq").unwrap(); 360 + assert_eq!(result.mime_type, "image/png"); 361 + assert_eq!(result.data, vec![0xFF, 0x00, 0xAA]); 362 + } 363 + 364 + #[test] 365 + fn data_url_base64_with_charset() { 366 + let result = parse_data_url("data:text/plain;charset=utf-8;base64,SGVsbG8=").unwrap(); 367 + assert_eq!(result.mime_type, "text/plain"); 368 + assert_eq!(result.charset, Some("utf-8".to_string())); 369 + assert_eq!(result.data, b"Hello"); 370 + } 371 + 372 + #[test] 373 + fn data_url_empty_data() { 374 + let result = parse_data_url("data:,").unwrap(); 375 + assert_eq!(result.mime_type, "text/plain"); 376 + assert_eq!(result.data, b""); 377 + } 378 + 379 + #[test] 380 + fn data_url_empty_base64() { 381 + let result = parse_data_url("data:;base64,").unwrap(); 382 + assert_eq!(result.mime_type, "text/plain"); 383 + assert_eq!(result.data, b""); 384 + } 385 + 386 + #[test] 387 + fn data_url_not_data() { 388 + assert!(matches!( 389 + parse_data_url("http://example.com"), 390 + Err(DataUrlError::NotDataUrl) 391 + )); 392 + } 393 + 394 + #[test] 395 + fn data_url_missing_comma() { 396 + assert!(matches!( 397 + parse_data_url("data:text/plain"), 398 + Err(DataUrlError::MissingComma) 399 + )); 400 + } 401 + 402 + #[test] 403 + fn data_url_invalid_base64() { 404 + assert!(matches!( 405 + parse_data_url("data:;base64,!!!"), 406 + Err(DataUrlError::InvalidBase64) 407 + )); 408 + } 409 + 410 + #[test] 411 + fn data_url_percent_encoded() { 412 + let result = parse_data_url("data:text/plain,%48%65%6C%6C%6F").unwrap(); 413 + assert_eq!(result.data, b"Hello"); 414 + } 415 + 416 + #[test] 417 + fn data_url_mime_case_insensitive() { 418 + let result = parse_data_url("data:Text/HTML,<p>hi</p>").unwrap(); 419 + assert_eq!(result.mime_type, "text/html"); 420 + } 421 + 422 + #[test] 423 + fn data_url_comma_in_data() { 424 + // Only the first comma splits metadata from data. 425 + let result = parse_data_url("data:text/plain,a,b,c").unwrap(); 426 + assert_eq!(result.data, b"a,b,c"); 427 + } 428 + 429 + #[test] 430 + fn is_data_url_positive() { 431 + assert!(is_data_url("data:text/plain,hello")); 432 + } 433 + 434 + #[test] 435 + fn is_data_url_negative() { 436 + assert!(!is_data_url("http://example.com")); 437 + } 438 + 439 + // ----------------------------------------------------------------------- 440 + // percent_decode_bytes 441 + // ----------------------------------------------------------------------- 442 + 443 + #[test] 444 + fn percent_decode_basic() { 445 + assert_eq!(percent_decode_bytes("Hello%20World"), b"Hello World"); 446 + } 447 + 448 + #[test] 449 + fn percent_decode_no_encoding() { 450 + assert_eq!(percent_decode_bytes("Hello"), b"Hello"); 451 + } 452 + 453 + #[test] 454 + fn percent_decode_incomplete_sequence() { 455 + assert_eq!(percent_decode_bytes("100%"), b"100%"); 456 + } 457 + 458 + #[test] 459 + fn percent_decode_binary() { 460 + assert_eq!(percent_decode_bytes("%FF%00"), vec![0xFF, 0x00]); 461 + } 462 + 463 + // ----------------------------------------------------------------------- 464 + // MIME parsing 465 + // ----------------------------------------------------------------------- 466 + 467 + #[test] 468 + fn mime_empty_defaults() { 469 + let (mime, charset) = parse_mime_type(""); 470 + assert_eq!(mime, "text/plain"); 471 + assert_eq!(charset, Some("US-ASCII".to_string())); 472 + } 473 + 474 + #[test] 475 + fn mime_with_charset() { 476 + let (mime, charset) = parse_mime_type("text/html;charset=utf-8"); 477 + assert_eq!(mime, "text/html"); 478 + assert_eq!(charset, Some("utf-8".to_string())); 479 + } 480 + 481 + #[test] 482 + fn mime_no_charset() { 483 + let (mime, charset) = parse_mime_type("image/png"); 484 + assert_eq!(mime, "image/png"); 485 + assert_eq!(charset, None); 486 + } 487 + }
+2
crates/url/src/lib.rs
··· 10 10 //! - URL serialization 11 11 //! - Origin derivation 12 12 13 + pub mod data_url; 14 + 13 15 use core::fmt; 14 16 15 17 // ---------------------------------------------------------------------------