we (web engine): Experimental web browser project to understand the limits of Claude
2
fork

Configure Feed

Select the types of activity you want to include in your feed.

Implement Same-Origin Policy enforcement

Add the foundational security primitive for the browser engine:

- Origin::same_origin() with default port normalization (http/80, https/443, ftp/21)
- Origin::serialize() and Display impl for CORS header comparison
- ResourceLoader::fetch_subresource() with SOP enforcement per resource type:
scripts, stylesheets, fonts, and fetch blocked cross-origin without CORS;
images and navigation always allowed
- JS fetch() API SOP enforcement via document origin thread-local
- DomBridge origin field and document.domain property for future iframe isolation
- Updated CSS loader, script loader, and font loader to use SOP-enforcing methods

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

+788 -25
+35 -9
crates/browser/src/css_loader.rs
··· 6 6 7 7 use we_css::parser::{ImportRule, Parser, Rule, Stylesheet}; 8 8 use we_dom::{Document, NodeData, NodeId}; 9 - use we_url::Url; 9 + use we_url::{Origin, Url}; 10 10 11 - use crate::loader::{LoadError, Resource, ResourceLoader}; 11 + use crate::loader::{LoadError, Resource, ResourceLoader, ResourceRequestType}; 12 12 13 13 /// Maximum depth for `@import` resolution to prevent cycles. 14 14 const MAX_IMPORT_DEPTH: usize = 5; ··· 51 51 loader: &mut ResourceLoader, 52 52 base_url: &Url, 53 53 ) -> Stylesheet { 54 + let document_origin = base_url.origin(); 55 + collect_stylesheets_with_origin(doc, loader, base_url, &document_origin) 56 + } 57 + 58 + /// Collect all CSS rules with explicit Same-Origin Policy enforcement. 59 + pub fn collect_stylesheets_with_origin( 60 + doc: &Document, 61 + loader: &mut ResourceLoader, 62 + base_url: &Url, 63 + document_origin: &Origin, 64 + ) -> Stylesheet { 54 65 let mut all_rules: Vec<Rule> = Vec::new(); 55 66 let mut style_nodes = Vec::new(); 56 67 collect_style_nodes(doc, doc.root(), &mut style_nodes); ··· 59 70 match classify_style_node(doc, node) { 60 71 StyleSource::InlineStyle(css_text) => { 61 72 let sheet = Parser::parse(&css_text); 62 - let resolved = resolve_imports(sheet, loader, base_url, 0); 73 + let resolved = resolve_imports(sheet, loader, base_url, document_origin, 0); 63 74 all_rules.extend(resolved.rules); 64 75 } 65 76 StyleSource::ExternalLink { href, media } => { 66 77 if !media_matches(&media) { 67 78 continue; 68 79 } 69 - match fetch_stylesheet(loader, &href, base_url, 0) { 80 + match fetch_stylesheet(loader, &href, base_url, document_origin, 0) { 70 81 Ok(sheet) => all_rules.extend(sheet.rules), 71 82 Err(_) => { 72 83 // Graceful degradation: skip failed stylesheet loads. ··· 188 199 loader: &mut ResourceLoader, 189 200 href: &str, 190 201 base_url: &Url, 202 + document_origin: &Origin, 191 203 depth: usize, 192 204 ) -> Result<Stylesheet, CssLoadError> { 193 - let resource = loader.fetch_url(href, Some(base_url))?; 205 + let resource = loader.fetch_url_subresource( 206 + href, 207 + Some(base_url), 208 + document_origin, 209 + ResourceRequestType::Stylesheet, 210 + )?; 194 211 195 212 let (css_text, resolved_url) = match resource { 196 213 Resource::Css { text, url } => (text, url), ··· 217 234 }; 218 235 219 236 let sheet = Parser::parse(&css_text); 220 - Ok(resolve_imports(sheet, loader, &resolved_url, depth)) 237 + Ok(resolve_imports( 238 + sheet, 239 + loader, 240 + &resolved_url, 241 + document_origin, 242 + depth, 243 + )) 221 244 } 222 245 223 246 /// Resolve `@import` rules in a stylesheet by fetching and inlining imported sheets. ··· 228 251 sheet: Stylesheet, 229 252 loader: &mut ResourceLoader, 230 253 base_url: &Url, 254 + document_origin: &Origin, 231 255 depth: usize, 232 256 ) -> Stylesheet { 233 257 if depth >= MAX_IMPORT_DEPTH { ··· 246 270 for rule in sheet.rules { 247 271 match rule { 248 272 Rule::Import(ImportRule { ref url }) => { 249 - match fetch_stylesheet(loader, url, base_url, depth + 1) { 273 + match fetch_stylesheet(loader, url, base_url, document_origin, depth + 1) { 250 274 Ok(imported) => resolved_rules.extend(imported.rules), 251 275 Err(_) => { 252 276 // Graceful degradation: skip failed imports. ··· 725 749 726 750 let mut loader = ResourceLoader::new(); 727 751 let base = Url::parse("http://example.com/").unwrap(); 752 + let origin = base.origin(); 728 753 729 - let resolved = resolve_imports(sheet, &mut loader, &base, MAX_IMPORT_DEPTH); 754 + let resolved = resolve_imports(sheet, &mut loader, &base, &origin, MAX_IMPORT_DEPTH); 730 755 // Import should be stripped, style rule kept 731 756 assert_eq!(resolved.rules.len(), 1); 732 757 assert!(matches!(resolved.rules[0], Rule::Style(_))); ··· 750 775 751 776 let mut loader = ResourceLoader::new(); 752 777 let base = Url::parse("http://example.com/").unwrap(); 778 + let origin = base.origin(); 753 779 754 - let resolved = resolve_imports(sheet, &mut loader, &base, 0); 780 + let resolved = resolve_imports(sheet, &mut loader, &base, &origin, 0); 755 781 // Failed import should be skipped, style rule kept 756 782 assert_eq!(resolved.rules.len(), 1); 757 783 assert!(matches!(resolved.rules[0], Rule::Style(_)));
+32 -7
crates/browser/src/font_loader.rs
··· 6 6 7 7 use we_css::parser::{FontDisplay, FontFaceRule, FontFaceStyle, Rule, Stylesheet}; 8 8 use we_text::font::registry::{FontRegistry, WebFontState}; 9 - use we_url::Url; 9 + use we_url::{Origin, Url}; 10 10 11 - use crate::loader::{Resource, ResourceLoader}; 11 + use crate::loader::{Resource, ResourceLoader, ResourceRequestType}; 12 12 13 13 /// Result of loading web fonts for a page. 14 14 #[derive(Debug)] ··· 34 34 base_url: &Url, 35 35 registry: &mut FontRegistry, 36 36 ) -> WebFontResult { 37 + let document_origin = base_url.origin(); 38 + load_web_fonts_with_origin(stylesheet, loader, base_url, registry, &document_origin) 39 + } 40 + 41 + /// Load web fonts with explicit Same-Origin Policy enforcement. 42 + pub fn load_web_fonts_with_origin( 43 + stylesheet: &Stylesheet, 44 + loader: &mut ResourceLoader, 45 + base_url: &Url, 46 + registry: &mut FontRegistry, 47 + document_origin: &Origin, 48 + ) -> WebFontResult { 37 49 let mut result = WebFontResult { 38 50 loaded: 0, 39 51 failed: 0, ··· 42 54 let font_face_rules = collect_font_face_rules(&stylesheet.rules); 43 55 44 56 for rule in font_face_rules { 45 - if try_load_font_face(rule, loader, base_url, registry) { 57 + if try_load_font_face(rule, loader, base_url, registry, document_origin) { 46 58 result.loaded += 1; 47 59 } else { 48 60 result.failed += 1; ··· 76 88 loader: &mut ResourceLoader, 77 89 base_url: &Url, 78 90 registry: &mut FontRegistry, 91 + document_origin: &Origin, 79 92 ) -> bool { 80 93 let italic = rule.style == FontFaceStyle::Italic || rule.style == FontFaceStyle::Oblique; 81 94 82 95 for source in &rule.sources { 83 96 // Try to fetch the font data. 84 - let data = match fetch_font_data(loader, &source.url, base_url) { 97 + let data = match fetch_font_data(loader, &source.url, base_url, document_origin) { 85 98 Some(d) => d, 86 99 None => continue, 87 100 }; ··· 97 110 false 98 111 } 99 112 100 - /// Fetch raw font data from a URL. 101 - fn fetch_font_data(loader: &mut ResourceLoader, url: &str, base_url: &Url) -> Option<Vec<u8>> { 102 - let resource = loader.fetch_url(url, Some(base_url)).ok()?; 113 + /// Fetch raw font data from a URL with SOP enforcement. 114 + fn fetch_font_data( 115 + loader: &mut ResourceLoader, 116 + url: &str, 117 + base_url: &Url, 118 + document_origin: &Origin, 119 + ) -> Option<Vec<u8>> { 120 + let resource = loader 121 + .fetch_url_subresource( 122 + url, 123 + Some(base_url), 124 + document_origin, 125 + ResourceRequestType::Font, 126 + ) 127 + .ok()?; 103 128 match resource { 104 129 Resource::Other { data, .. } => Some(data), 105 130 Resource::Image { data, .. } => Some(data),
+287 -1
crates/browser/src/loader.rs
··· 11 11 use we_net::client::{ClientError, HttpClient}; 12 12 use we_net::http::ContentType; 13 13 use we_url::data_url::{is_data_url, parse_data_url}; 14 - use we_url::Url; 14 + use we_url::{Origin, Url}; 15 15 16 16 // --------------------------------------------------------------------------- 17 17 // Error type ··· 28 28 HttpStatus { status: u16, reason: String }, 29 29 /// Encoding or decoding error. 30 30 Encoding(String), 31 + /// Blocked by Same-Origin Policy. 32 + CrossOriginBlocked { url: String }, 31 33 } 32 34 33 35 impl fmt::Display for LoadError { ··· 39 41 write!(f, "HTTP {status} {reason}") 40 42 } 41 43 Self::Encoding(s) => write!(f, "encoding error: {s}"), 44 + Self::CrossOriginBlocked { url } => { 45 + write!(f, "cross-origin request blocked: {url}") 46 + } 42 47 } 43 48 } 49 + } 50 + 51 + /// The type of subresource being fetched, used to determine SOP behavior. 52 + #[derive(Debug, Clone, Copy, PartialEq, Eq)] 53 + pub enum ResourceRequestType { 54 + /// Scripts (`<script src>`). Cross-origin blocked without CORS. 55 + Script, 56 + /// Stylesheets (`<link rel="stylesheet">`). Cross-origin blocked without CORS. 57 + Stylesheet, 58 + /// Fetch/XHR API requests. Cross-origin blocked without CORS. 59 + Fetch, 60 + /// Images (`<img src>`). Cross-origin allowed (opaque response). 61 + Image, 62 + /// Fonts (`@font-face src`). Cross-origin blocked without CORS. 63 + Font, 64 + /// Top-level navigation. Always allowed. 65 + Navigation, 44 66 } 45 67 46 68 impl From<ClientError> for LoadError { ··· 183 205 } 184 206 } 185 207 208 + /// Fetch a subresource with Same-Origin Policy enforcement. 209 + /// 210 + /// Checks the resource URL's origin against the document origin. For 211 + /// cross-origin requests without CORS headers, scripts, stylesheets, 212 + /// fetch, and font loads are blocked. Images and navigations are allowed. 213 + pub fn fetch_subresource( 214 + &mut self, 215 + url: &Url, 216 + document_origin: &Origin, 217 + request_type: ResourceRequestType, 218 + ) -> Result<Resource, LoadError> { 219 + // data: and about: URLs are always allowed (local, no network). 220 + if url.scheme() == "data" || url.scheme() == "about" { 221 + return self.fetch(url); 222 + } 223 + 224 + // Navigation and images are always allowed cross-origin. 225 + if request_type == ResourceRequestType::Navigation 226 + || request_type == ResourceRequestType::Image 227 + { 228 + return self.fetch(url); 229 + } 230 + 231 + let resource_origin = url.origin(); 232 + if document_origin.same_origin(&resource_origin) { 233 + return self.fetch(url); 234 + } 235 + 236 + // Cross-origin: perform the fetch but check for CORS headers. 237 + let response = self.client.get(url)?; 238 + 239 + if response.status_code >= 400 { 240 + return Err(LoadError::HttpStatus { 241 + status: response.status_code, 242 + reason: response.reason.clone(), 243 + }); 244 + } 245 + 246 + // Check Access-Control-Allow-Origin header. 247 + let doc_origin_str = document_origin.serialize(); 248 + let allowed = response 249 + .headers 250 + .get("access-control-allow-origin") 251 + .map(|v| { 252 + let v = v.trim(); 253 + v == "*" || v == doc_origin_str 254 + }) 255 + .unwrap_or(false); 256 + 257 + if !allowed { 258 + return Err(LoadError::CrossOriginBlocked { 259 + url: url.serialize(), 260 + }); 261 + } 262 + 263 + // CORS allows it — decode as normal. 264 + let content_type = response.content_type(); 265 + let mime = content_type 266 + .as_ref() 267 + .map(|ct| ct.mime_type.as_str()) 268 + .unwrap_or("application/octet-stream"); 269 + 270 + match classify_mime(mime) { 271 + MimeClass::Html => { 272 + let (text, encoding) = 273 + decode_text_resource(&response.body, content_type.as_ref(), true); 274 + Ok(Resource::Html { 275 + text, 276 + base_url: url.clone(), 277 + encoding, 278 + }) 279 + } 280 + MimeClass::Css => { 281 + let (text, _encoding) = 282 + decode_text_resource(&response.body, content_type.as_ref(), false); 283 + Ok(Resource::Css { 284 + text, 285 + url: url.clone(), 286 + }) 287 + } 288 + MimeClass::Script => { 289 + let (text, _encoding) = 290 + decode_text_resource(&response.body, content_type.as_ref(), false); 291 + Ok(Resource::Script { 292 + text, 293 + url: url.clone(), 294 + }) 295 + } 296 + MimeClass::Image => Ok(Resource::Image { 297 + data: response.body, 298 + mime_type: mime.to_string(), 299 + url: url.clone(), 300 + }), 301 + MimeClass::Other => { 302 + if mime.starts_with("text/") { 303 + let (text, _encoding) = 304 + decode_text_resource(&response.body, content_type.as_ref(), false); 305 + Ok(Resource::Other { 306 + data: text.into_bytes(), 307 + mime_type: mime.to_string(), 308 + url: url.clone(), 309 + }) 310 + } else { 311 + Ok(Resource::Other { 312 + data: response.body, 313 + mime_type: mime.to_string(), 314 + url: url.clone(), 315 + }) 316 + } 317 + } 318 + } 319 + } 320 + 186 321 /// Fetch a URL string, resolving it against an optional base URL. 187 322 /// 188 323 /// Handles `data:` and `about:` URLs locally without network access. ··· 206 341 None => Url::parse(url_str).map_err(|_| LoadError::InvalidUrl(url_str.to_string()))?, 207 342 }; 208 343 self.fetch(&url) 344 + } 345 + 346 + /// Fetch a URL string as a subresource with SOP enforcement. 347 + /// 348 + /// Resolves `url_str` against `base`, then applies Same-Origin Policy checks 349 + /// based on the `document_origin` and `request_type`. 350 + pub fn fetch_url_subresource( 351 + &mut self, 352 + url_str: &str, 353 + base: Option<&Url>, 354 + document_origin: &Origin, 355 + request_type: ResourceRequestType, 356 + ) -> Result<Resource, LoadError> { 357 + // data: and about: URLs are always allowed. 358 + if is_data_url(url_str) { 359 + return fetch_data_url(url_str); 360 + } 361 + if url_str.starts_with("about:") { 362 + let url = 363 + Url::parse(url_str).map_err(|_| LoadError::InvalidUrl(url_str.to_string()))?; 364 + return fetch_about_url(&url); 365 + } 366 + 367 + let url = match base { 368 + Some(base_url) => Url::parse_with_base(url_str, base_url) 369 + .or_else(|_| Url::parse(url_str)) 370 + .map_err(|_| LoadError::InvalidUrl(url_str.to_string()))?, 371 + None => Url::parse(url_str).map_err(|_| LoadError::InvalidUrl(url_str.to_string()))?, 372 + }; 373 + self.fetch_subresource(&url, document_origin, request_type) 209 374 } 210 375 } 211 376 ··· 849 1014 let mut loader = ResourceLoader::new(); 850 1015 let result = loader.fetch_url("about:invalid", None); 851 1016 assert!(matches!(result, Err(LoadError::InvalidUrl(_)))); 1017 + } 1018 + 1019 + // ----------------------------------------------------------------------- 1020 + // Same-Origin Policy enforcement 1021 + // ----------------------------------------------------------------------- 1022 + 1023 + #[test] 1024 + fn sop_same_origin_fetch_allowed() { 1025 + // Same-origin subresource fetches should behave like normal fetch. 1026 + let mut loader = ResourceLoader::new(); 1027 + let doc_url = Url::parse("http://example.com/page").unwrap(); 1028 + let doc_origin = doc_url.origin(); 1029 + let resource_url = Url::parse("http://example.com/script.js").unwrap(); 1030 + 1031 + // This will fail with a network error (no server), but NOT a 1032 + // CrossOriginBlocked error. 1033 + let result = 1034 + loader.fetch_subresource(&resource_url, &doc_origin, ResourceRequestType::Script); 1035 + assert!(result.is_err()); 1036 + assert!( 1037 + !matches!(result, Err(LoadError::CrossOriginBlocked { .. })), 1038 + "same-origin fetch should not be blocked by SOP" 1039 + ); 1040 + } 1041 + 1042 + #[test] 1043 + fn sop_cross_origin_script_blocked() { 1044 + // Cross-origin script fetch should be blocked (no CORS headers). 1045 + let mut loader = ResourceLoader::new(); 1046 + let doc_url = Url::parse("http://example.com/page").unwrap(); 1047 + let doc_origin = doc_url.origin(); 1048 + let resource_url = Url::parse("http://evil.com/malicious.js").unwrap(); 1049 + 1050 + let result = 1051 + loader.fetch_subresource(&resource_url, &doc_origin, ResourceRequestType::Script); 1052 + // Will either be network error (can't connect) or CrossOriginBlocked. 1053 + // In tests we can't actually connect, so network error is expected. 1054 + assert!(result.is_err()); 1055 + } 1056 + 1057 + #[test] 1058 + fn sop_cross_origin_image_allowed() { 1059 + // Cross-origin image loads should always be allowed (opaque response). 1060 + let mut loader = ResourceLoader::new(); 1061 + let doc_url = Url::parse("http://example.com/page").unwrap(); 1062 + let doc_origin = doc_url.origin(); 1063 + let resource_url = Url::parse("http://cdn.other.com/image.png").unwrap(); 1064 + 1065 + let result = 1066 + loader.fetch_subresource(&resource_url, &doc_origin, ResourceRequestType::Image); 1067 + // Should not be CrossOriginBlocked (will be network error since no server). 1068 + assert!( 1069 + !matches!(result, Err(LoadError::CrossOriginBlocked { .. })), 1070 + "image loads should not be blocked by SOP" 1071 + ); 1072 + } 1073 + 1074 + #[test] 1075 + fn sop_navigation_always_allowed() { 1076 + // Top-level navigation should never be blocked by SOP. 1077 + let mut loader = ResourceLoader::new(); 1078 + let doc_url = Url::parse("http://example.com/page").unwrap(); 1079 + let doc_origin = doc_url.origin(); 1080 + let target_url = Url::parse("http://other.com/").unwrap(); 1081 + 1082 + let result = 1083 + loader.fetch_subresource(&target_url, &doc_origin, ResourceRequestType::Navigation); 1084 + assert!( 1085 + !matches!(result, Err(LoadError::CrossOriginBlocked { .. })), 1086 + "navigation should not be blocked by SOP" 1087 + ); 1088 + } 1089 + 1090 + #[test] 1091 + fn sop_data_url_always_allowed() { 1092 + // data: URLs should always be allowed regardless of origin. 1093 + let mut loader = ResourceLoader::new(); 1094 + let doc_url = Url::parse("http://example.com/page").unwrap(); 1095 + let doc_origin = doc_url.origin(); 1096 + let data_url = Url::parse("data:text/plain,Hello").unwrap(); 1097 + 1098 + let result = loader.fetch_subresource(&data_url, &doc_origin, ResourceRequestType::Script); 1099 + assert!(result.is_ok()); 1100 + } 1101 + 1102 + #[test] 1103 + fn sop_about_url_always_allowed() { 1104 + let mut loader = ResourceLoader::new(); 1105 + let doc_url = Url::parse("http://example.com/page").unwrap(); 1106 + let doc_origin = doc_url.origin(); 1107 + let about_url = Url::parse("about:blank").unwrap(); 1108 + 1109 + let result = 1110 + loader.fetch_subresource(&about_url, &doc_origin, ResourceRequestType::Navigation); 1111 + assert!(result.is_ok()); 1112 + } 1113 + 1114 + #[test] 1115 + fn sop_fetch_url_subresource_data_url() { 1116 + let mut loader = ResourceLoader::new(); 1117 + let doc_url = Url::parse("http://example.com/page").unwrap(); 1118 + let doc_origin = doc_url.origin(); 1119 + 1120 + let result = loader.fetch_url_subresource( 1121 + "data:text/plain,Hello", 1122 + None, 1123 + &doc_origin, 1124 + ResourceRequestType::Fetch, 1125 + ); 1126 + assert!(result.is_ok()); 1127 + } 1128 + 1129 + #[test] 1130 + fn cross_origin_blocked_display() { 1131 + let e = LoadError::CrossOriginBlocked { 1132 + url: "http://evil.com/steal.js".to_string(), 1133 + }; 1134 + assert_eq!( 1135 + e.to_string(), 1136 + "cross-origin request blocked: http://evil.com/steal.js" 1137 + ); 852 1138 } 853 1139 }
+34 -7
crates/browser/src/script_loader.rs
··· 3 3 //! Walks the DOM for `<script>` elements, fetches external scripts, and 4 4 //! executes them in a shared JS VM with DOM access. 5 5 6 - use crate::loader::{Resource, ResourceLoader}; 6 + use crate::loader::{Resource, ResourceLoader, ResourceRequestType}; 7 7 use we_dom::{Document, NodeId}; 8 8 use we_js::compiler; 9 9 use we_js::parser::Parser; 10 10 use we_js::vm::Vm; 11 - use we_url::Url; 11 + use we_url::{Origin, Url}; 12 12 13 13 /// Information about a `<script>` element extracted from the DOM. 14 14 struct ScriptInfo { ··· 91 91 } 92 92 } 93 93 94 - /// Fetch the text of an external script. 95 - fn fetch_script_text(loader: &mut ResourceLoader, src: &str, base_url: &Url) -> Option<String> { 96 - match loader.fetch_url(src, Some(base_url)) { 94 + /// Fetch the text of an external script with SOP enforcement. 95 + fn fetch_script_text( 96 + loader: &mut ResourceLoader, 97 + src: &str, 98 + base_url: &Url, 99 + document_origin: &Origin, 100 + ) -> Option<String> { 101 + match loader.fetch_url_subresource( 102 + src, 103 + Some(base_url), 104 + document_origin, 105 + ResourceRequestType::Script, 106 + ) { 97 107 Ok(Resource::Script { text, .. }) => Some(text), 98 108 Ok(Resource::Other { data, .. }) => { 99 109 // Try decoding as UTF-8 (servers may not set correct MIME type). ··· 154 164 loader: &mut ResourceLoader, 155 165 base_url: &Url, 156 166 ) -> Document { 167 + let document_origin = base_url.origin(); 168 + execute_page_scripts_with_origin(doc, loader, base_url, &document_origin) 169 + } 170 + 171 + /// Execute all `<script>` elements with explicit Same-Origin Policy enforcement. 172 + pub fn execute_page_scripts_with_origin( 173 + doc: Document, 174 + loader: &mut ResourceLoader, 175 + base_url: &Url, 176 + document_origin: &Origin, 177 + ) -> Document { 157 178 // Find all <script> elements in document order. 158 179 let mut script_nodes = Vec::new(); 159 180 let root = doc.root(); ··· 174 195 let mut vm = Vm::new(); 175 196 vm.attach_document(doc); 176 197 198 + // Set the document origin for SOP enforcement in the fetch API and DOM bridge. 199 + let origin_str = document_origin.serialize(); 200 + we_js::fetch::set_document_origin(&origin_str); 201 + vm.set_document_origin(&origin_str); 202 + 177 203 // Separate into immediate (sync + async) and deferred scripts. 178 204 let mut deferred_sources: Vec<(String, String)> = Vec::new(); 179 205 ··· 181 207 // Resolve the script source text. 182 208 let (source, label) = if let Some(ref src) = info.src { 183 209 // External script: fetch it. 184 - match fetch_script_text(loader, src, base_url) { 210 + match fetch_script_text(loader, src, base_url, document_origin) { 185 211 Some(text) => (text, src.clone()), 186 212 None => continue, 187 213 } ··· 212 238 // Pump the event loop to handle any pending microtasks/timers. 213 239 let _ = vm.pump_event_loop(); 214 240 215 - // Take the document back from the VM. 241 + // Clear the document origin and take the document back from the VM. 242 + we_js::fetch::clear_document_origin(); 216 243 vm.detach_document().unwrap_or_default() 217 244 } 218 245
+111
crates/js/src/dom_bridge.rs
··· 229 229 } 230 230 } 231 231 232 + // Set document.domain from the document origin. 233 + // Per spec, this is the hostname portion of the document's origin. 234 + if let Some(bridge) = &vm.dom_bridge { 235 + let origin_str = bridge.origin.borrow().clone(); 236 + let domain = extract_domain_from_origin(&origin_str); 237 + set_builtin_prop(&mut vm.gc, doc_ref, "domain", Value::String(domain)); 238 + } 239 + 232 240 // Register methods on the document object. 233 241 let methods: &[NativeMethod] = &[ 234 242 ("getElementById", doc_get_element_by_id), ··· 2097 2105 } 2098 2106 } 2099 2107 2108 + // ── Origin helpers ────────────────────────────────────────────────── 2109 + 2110 + /// Extract the hostname from a serialized origin string. 2111 + /// 2112 + /// For example, `"https://example.com"` → `"example.com"`, 2113 + /// `"http://example.com:8080"` → `"example.com"`. 2114 + /// Returns an empty string for opaque origins (`"null"`) or invalid input. 2115 + fn extract_domain_from_origin(origin: &str) -> String { 2116 + if origin == "null" || origin.is_empty() { 2117 + return String::new(); 2118 + } 2119 + // Origin format: scheme://host or scheme://host:port 2120 + if let Some(after_scheme) = origin.find("://") { 2121 + let host_part = &origin[after_scheme + 3..]; 2122 + // Strip port if present 2123 + if let Some(colon) = host_part.rfind(':') { 2124 + // Make sure this isn't part of IPv6 (which would have brackets) 2125 + if !host_part.contains('[') { 2126 + return host_part[..colon].to_string(); 2127 + } 2128 + } 2129 + host_part.to_string() 2130 + } else { 2131 + String::new() 2132 + } 2133 + } 2134 + 2100 2135 // ── Tests ─────────────────────────────────────────────────────────── 2101 2136 2102 2137 #[cfg(test)] ··· 3338 3373 ) 3339 3374 .unwrap(); 3340 3375 assert!(matches!(result, Value::Boolean(true))); 3376 + } 3377 + 3378 + // ----------------------------------------------------------------------- 3379 + // Origin helpers 3380 + // ----------------------------------------------------------------------- 3381 + 3382 + #[test] 3383 + fn extract_domain_https() { 3384 + assert_eq!( 3385 + extract_domain_from_origin("https://example.com"), 3386 + "example.com" 3387 + ); 3388 + } 3389 + 3390 + #[test] 3391 + fn extract_domain_with_port() { 3392 + assert_eq!( 3393 + extract_domain_from_origin("http://example.com:8080"), 3394 + "example.com" 3395 + ); 3396 + } 3397 + 3398 + #[test] 3399 + fn extract_domain_opaque() { 3400 + assert_eq!(extract_domain_from_origin("null"), ""); 3401 + } 3402 + 3403 + #[test] 3404 + fn extract_domain_empty() { 3405 + assert_eq!(extract_domain_from_origin(""), ""); 3406 + } 3407 + 3408 + #[test] 3409 + fn extract_domain_ip() { 3410 + assert_eq!(extract_domain_from_origin("http://127.0.0.1"), "127.0.0.1"); 3411 + } 3412 + 3413 + #[test] 3414 + fn extract_domain_ip_with_port() { 3415 + assert_eq!( 3416 + extract_domain_from_origin("http://127.0.0.1:3000"), 3417 + "127.0.0.1" 3418 + ); 3419 + } 3420 + 3421 + // ----------------------------------------------------------------------- 3422 + // document.domain 3423 + // ----------------------------------------------------------------------- 3424 + 3425 + #[test] 3426 + fn test_document_domain_default_empty() { 3427 + // Without setting an origin, document.domain should be "" 3428 + let result = eval_with_doc("<html><body></body></html>", "document.domain").unwrap(); 3429 + match result { 3430 + Value::String(s) => assert_eq!(s, ""), 3431 + v => panic!("expected empty string, got {v:?}"), 3432 + } 3433 + } 3434 + 3435 + #[test] 3436 + fn test_document_domain_with_origin() { 3437 + let doc = doc_from_html("<html><body></body></html>"); 3438 + let mut vm = Vm::new(); 3439 + vm.attach_document(doc); 3440 + vm.set_document_origin("https://example.com"); 3441 + 3442 + // Re-init document object with the origin set 3443 + crate::dom_bridge::init_document_object(&mut vm); 3444 + 3445 + let program = Parser::parse("document.domain").expect("parse failed"); 3446 + let func = compiler::compile(&program).expect("compile failed"); 3447 + let result = vm.execute(&func).unwrap(); 3448 + match result { 3449 + Value::String(s) => assert_eq!(s, "example.com"), 3450 + v => panic!("expected 'example.com', got {v:?}"), 3451 + } 3341 3452 } 3342 3453 }
+97 -1
crates/js/src/fetch.rs
··· 34 34 35 35 thread_local! { 36 36 static FETCH_STATE: RefCell<Vec<PendingFetch>> = const { RefCell::new(Vec::new()) }; 37 + /// The origin of the document that owns the current JS execution context. 38 + /// Used for Same-Origin Policy enforcement in fetch(). 39 + static DOCUMENT_ORIGIN: RefCell<Option<String>> = const { RefCell::new(None) }; 40 + } 41 + 42 + /// Set the document origin for SOP enforcement in the fetch API. 43 + /// 44 + /// Should be called before executing scripts so that `fetch()` can check 45 + /// cross-origin requests. The origin string should be serialized via 46 + /// `Origin::serialize()` (e.g. `"https://example.com"`). 47 + pub fn set_document_origin(origin: &str) { 48 + DOCUMENT_ORIGIN.with(|o| *o.borrow_mut() = Some(origin.to_string())); 49 + } 50 + 51 + /// Clear the document origin (e.g. when detaching the document). 52 + pub fn clear_document_origin() { 53 + DOCUMENT_ORIGIN.with(|o| *o.borrow_mut() = None); 54 + } 55 + 56 + /// Get the current document origin, if set. 57 + fn get_document_origin() -> Option<String> { 58 + DOCUMENT_ORIGIN.with(|o| o.borrow().clone()) 37 59 } 38 60 39 61 /// Reset fetch state (useful for tests). ··· 131 153 } 132 154 } 133 155 156 + // Capture the document origin for SOP enforcement. 157 + let doc_origin = get_document_origin(); 158 + 134 159 // Create a pending promise. 135 160 let promise = create_promise_object_pub(ctx.gc); 136 161 ··· 148 173 149 174 // Spawn a background thread for the network I/O. 150 175 std::thread::spawn(move || { 151 - let result = do_fetch(&url_str, &method, &req_headers, body.as_deref()); 176 + let result = do_fetch( 177 + &url_str, 178 + &method, 179 + &req_headers, 180 + body.as_deref(), 181 + doc_origin.as_deref(), 182 + ); 152 183 let mut lock = slot_clone.lock().unwrap(); 153 184 *lock = Some(result); 154 185 }); ··· 157 188 } 158 189 159 190 /// Perform the actual HTTP fetch (runs on a background thread). 191 + /// 192 + /// If `document_origin` is set, performs Same-Origin Policy checks. Cross-origin 193 + /// responses without a matching `Access-Control-Allow-Origin` header are rejected. 160 194 fn do_fetch( 161 195 url_str: &str, 162 196 method: &str, 163 197 headers: &[(String, String)], 164 198 body: Option<&[u8]>, 199 + document_origin: Option<&str>, 165 200 ) -> Result<FetchResult, String> { 166 201 let url = we_url::Url::parse(url_str).map_err(|e| format!("Invalid URL: {e}"))?; 167 202 ··· 185 220 let response = client 186 221 .request(http_method, &url, &req_headers, body) 187 222 .map_err(|e| format!("Network error: {e}"))?; 223 + 224 + // Same-Origin Policy check: if we have a document origin, verify 225 + // that cross-origin responses include a CORS header. 226 + if let Some(doc_origin) = document_origin { 227 + let resource_origin = url.origin(); 228 + let doc_parsed_origin = we_url::Url::parse(&format!("{doc_origin}/")) 229 + .map(|u| u.origin()) 230 + .unwrap_or(we_url::Origin::Opaque); 231 + 232 + if !doc_parsed_origin.same_origin(&resource_origin) { 233 + // Cross-origin: check Access-Control-Allow-Origin. 234 + let cors_allowed = response 235 + .headers 236 + .get("access-control-allow-origin") 237 + .map(|v| { 238 + let v = v.trim(); 239 + v == "*" || v == doc_origin 240 + }) 241 + .unwrap_or(false); 242 + 243 + if !cors_allowed { 244 + return Err(format!( 245 + "Cross-origin request blocked: {url_str} (no CORS headers)" 246 + )); 247 + } 248 + } 249 + } 188 250 189 251 let resp_headers: Vec<(String, String)> = response 190 252 .headers ··· 706 768 assert_eq!(entries.len(), 1); 707 769 assert_eq!(entries[0].0, "set-cookie"); 708 770 assert_eq!(entries[0].1, "a=1, b=2"); 771 + } 772 + 773 + #[test] 774 + fn test_set_and_clear_document_origin() { 775 + set_document_origin("https://example.com"); 776 + assert_eq!( 777 + get_document_origin(), 778 + Some("https://example.com".to_string()) 779 + ); 780 + clear_document_origin(); 781 + assert_eq!(get_document_origin(), None); 782 + } 783 + 784 + #[test] 785 + fn test_fetch_with_origin_rejects_cross_origin() { 786 + // Set a document origin, then try to fetch cross-origin. 787 + // The fetch will fail with a network error before CORS check 788 + // (since there's no real server), but the origin should be captured. 789 + reset_fetch_state(); 790 + crate::timers::reset_timers(); 791 + set_document_origin("http://example.com"); 792 + 793 + let source = r#" 794 + var p = fetch("http://evil.com:1/steal"); 795 + typeof p 796 + "#; 797 + let program = Parser::parse(source).expect("parse failed"); 798 + let func = compiler::compile(&program).expect("compile failed"); 799 + let mut vm = Vm::new(); 800 + init_fetch_api(&mut vm); 801 + let result = vm.execute(&func).expect("execute failed"); 802 + assert_eq!(result.to_js_string(&vm.gc), "object"); 803 + 804 + clear_document_origin(); 709 805 } 710 806 }
+15
crates/js/src/vm.rs
··· 237 237 pub node_wrappers: RefCell<HashMap<usize, GcRef>>, 238 238 /// Event listeners keyed by NodeId index. 239 239 pub event_listeners: RefCell<HashMap<usize, Vec<EventListener>>>, 240 + /// The serialized origin of this document (e.g. "https://example.com"). 241 + /// Used for Same-Origin Policy enforcement on cross-origin DOM access. 242 + pub origin: RefCell<String>, 240 243 } 241 244 242 245 /// Context passed to native functions, providing GC access and `this` binding. ··· 840 843 document: RefCell::new(doc), 841 844 node_wrappers: RefCell::new(HashMap::new()), 842 845 event_listeners: RefCell::new(HashMap::new()), 846 + origin: RefCell::new(String::new()), 843 847 }); 844 848 self.dom_bridge = Some(bridge); 845 849 crate::dom_bridge::init_document_object(self); 846 850 crate::dom_bridge::init_event_system(self); 851 + } 852 + 853 + /// Set the document origin for Same-Origin Policy enforcement. 854 + /// 855 + /// This should be called after `attach_document` and before executing 856 + /// scripts. The origin is used for cross-origin DOM access checks 857 + /// (relevant when iframes are implemented). 858 + pub fn set_document_origin(&mut self, origin: &str) { 859 + if let Some(bridge) = &self.dom_bridge { 860 + *bridge.origin.borrow_mut() = origin.to_string(); 861 + } 847 862 } 848 863 849 864 /// Detach the DOM document from the VM, returning it.
+177
crates/url/src/lib.rs
··· 109 109 Opaque, 110 110 } 111 111 112 + impl Origin { 113 + /// Check whether two origins are the same origin per the HTML spec. 114 + /// 115 + /// Two tuple origins are same-origin iff their schemes, hosts, and ports 116 + /// are identical after normalizing default ports (http→80, https→443, etc.). 117 + /// Opaque origins are never same-origin, even with themselves. 118 + pub fn same_origin(&self, other: &Origin) -> bool { 119 + match (self, other) { 120 + (Origin::Tuple(scheme_a, host_a, port_a), Origin::Tuple(scheme_b, host_b, port_b)) => { 121 + let effective_port_a = port_a.or_else(|| default_port(scheme_a)); 122 + let effective_port_b = port_b.or_else(|| default_port(scheme_b)); 123 + scheme_a == scheme_b && host_a == host_b && effective_port_a == effective_port_b 124 + } 125 + _ => false, 126 + } 127 + } 128 + 129 + /// Serialize this origin to a string (e.g. `"https://example.com"`). 130 + /// 131 + /// Opaque origins serialize to `"null"`. 132 + pub fn serialize(&self) -> String { 133 + match self { 134 + Origin::Opaque => "null".to_string(), 135 + Origin::Tuple(scheme, host, port) => { 136 + let mut s = String::new(); 137 + s.push_str(scheme); 138 + s.push_str("://"); 139 + s.push_str(&host.serialize()); 140 + if let Some(p) = port { 141 + if default_port(scheme) != Some(*p) { 142 + s.push(':'); 143 + s.push_str(&p.to_string()); 144 + } 145 + } 146 + s 147 + } 148 + } 149 + } 150 + } 151 + 152 + impl fmt::Display for Origin { 153 + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 154 + write!(f, "{}", self.serialize()) 155 + } 156 + } 157 + 112 158 // --------------------------------------------------------------------------- 113 159 // URL record 114 160 // --------------------------------------------------------------------------- ··· 1665 1711 fn origin_data_is_opaque() { 1666 1712 let url = Url::parse("data:text/html,hello").unwrap(); 1667 1713 assert_eq!(url.origin(), Origin::Opaque); 1714 + } 1715 + 1716 + // ------------------------------------------------------------------- 1717 + // Origin::same_origin 1718 + // ------------------------------------------------------------------- 1719 + 1720 + #[test] 1721 + fn same_origin_identical_tuple() { 1722 + let a = Url::parse("http://example.com/page1").unwrap(); 1723 + let b = Url::parse("http://example.com/page2").unwrap(); 1724 + assert!(a.origin().same_origin(&b.origin())); 1725 + } 1726 + 1727 + #[test] 1728 + fn same_origin_different_path_query_fragment() { 1729 + let a = Url::parse("https://example.com/a?x=1#frag").unwrap(); 1730 + let b = Url::parse("https://example.com/b?y=2#other").unwrap(); 1731 + assert!(a.origin().same_origin(&b.origin())); 1732 + } 1733 + 1734 + #[test] 1735 + fn same_origin_different_scheme() { 1736 + let a = Url::parse("http://example.com/").unwrap(); 1737 + let b = Url::parse("https://example.com/").unwrap(); 1738 + assert!(!a.origin().same_origin(&b.origin())); 1739 + } 1740 + 1741 + #[test] 1742 + fn same_origin_different_host() { 1743 + let a = Url::parse("http://example.com/").unwrap(); 1744 + let b = Url::parse("http://other.com/").unwrap(); 1745 + assert!(!a.origin().same_origin(&b.origin())); 1746 + } 1747 + 1748 + #[test] 1749 + fn same_origin_different_port() { 1750 + let a = Url::parse("http://example.com:8080/").unwrap(); 1751 + let b = Url::parse("http://example.com:9090/").unwrap(); 1752 + assert!(!a.origin().same_origin(&b.origin())); 1753 + } 1754 + 1755 + #[test] 1756 + fn same_origin_default_port_normalization_http() { 1757 + // http://example.com (port=None) should match http://example.com:80 1758 + let a = Url::parse("http://example.com/").unwrap(); 1759 + let b = Url::parse("http://example.com:80/").unwrap(); 1760 + assert!(a.origin().same_origin(&b.origin())); 1761 + } 1762 + 1763 + #[test] 1764 + fn same_origin_default_port_normalization_https() { 1765 + let a = Url::parse("https://example.com/").unwrap(); 1766 + let b = Url::parse("https://example.com:443/").unwrap(); 1767 + assert!(a.origin().same_origin(&b.origin())); 1768 + } 1769 + 1770 + #[test] 1771 + fn same_origin_default_port_normalization_ftp() { 1772 + let a = Url::parse("ftp://example.com/").unwrap(); 1773 + let b = Url::parse("ftp://example.com:21/").unwrap(); 1774 + assert!(a.origin().same_origin(&b.origin())); 1775 + } 1776 + 1777 + #[test] 1778 + fn same_origin_non_default_port_vs_none() { 1779 + let a = Url::parse("http://example.com/").unwrap(); 1780 + let b = Url::parse("http://example.com:8080/").unwrap(); 1781 + assert!(!a.origin().same_origin(&b.origin())); 1782 + } 1783 + 1784 + #[test] 1785 + fn same_origin_opaque_never_matches() { 1786 + let a = Url::parse("data:text/html,hello").unwrap(); 1787 + let b = Url::parse("data:text/html,hello").unwrap(); 1788 + assert!(!a.origin().same_origin(&b.origin())); 1789 + } 1790 + 1791 + #[test] 1792 + fn same_origin_opaque_vs_tuple() { 1793 + let a = Url::parse("data:text/html,hello").unwrap(); 1794 + let b = Url::parse("http://example.com/").unwrap(); 1795 + assert!(!a.origin().same_origin(&b.origin())); 1796 + } 1797 + 1798 + #[test] 1799 + fn same_origin_ipv4() { 1800 + let a = Url::parse("http://127.0.0.1/a").unwrap(); 1801 + let b = Url::parse("http://127.0.0.1/b").unwrap(); 1802 + assert!(a.origin().same_origin(&b.origin())); 1803 + } 1804 + 1805 + #[test] 1806 + fn same_origin_ipv4_different() { 1807 + let a = Url::parse("http://127.0.0.1/").unwrap(); 1808 + let b = Url::parse("http://192.168.1.1/").unwrap(); 1809 + assert!(!a.origin().same_origin(&b.origin())); 1810 + } 1811 + 1812 + // ------------------------------------------------------------------- 1813 + // Origin::serialize / Display 1814 + // ------------------------------------------------------------------- 1815 + 1816 + #[test] 1817 + fn origin_serialize_http() { 1818 + let url = Url::parse("http://example.com/path").unwrap(); 1819 + assert_eq!(url.origin().serialize(), "http://example.com"); 1820 + } 1821 + 1822 + #[test] 1823 + fn origin_serialize_https_with_port() { 1824 + let url = Url::parse("https://example.com:8443/").unwrap(); 1825 + assert_eq!(url.origin().serialize(), "https://example.com:8443"); 1826 + } 1827 + 1828 + #[test] 1829 + fn origin_serialize_default_port_omitted() { 1830 + // Default port should not appear in serialization 1831 + let url = Url::parse("http://example.com:80/").unwrap(); 1832 + assert_eq!(url.origin().serialize(), "http://example.com"); 1833 + } 1834 + 1835 + #[test] 1836 + fn origin_serialize_opaque() { 1837 + let url = Url::parse("data:text/html,hi").unwrap(); 1838 + assert_eq!(url.origin().serialize(), "null"); 1839 + } 1840 + 1841 + #[test] 1842 + fn origin_display() { 1843 + let url = Url::parse("https://example.com/").unwrap(); 1844 + assert_eq!(format!("{}", url.origin()), "https://example.com"); 1668 1845 } 1669 1846 1670 1847 // -------------------------------------------------------------------