Rust library to generate static websites
5
fork

Configure Feed

Select the types of activity you want to include in your feed.

feat: adds support for generating sitemaps (#80)

* feat: adds support for generating sitemaps

* feat: some cleanup

* fix: normalize base urls before adding tos itemaps

* fix: dynamic route timings

* chore: changeset

* chore: changeset

authored by

Erika and committed by
GitHub
bce6fd12 e3a555f8

+857 -5
+5
.sampo/changesets/haughty-duchess-otso.md
··· 1 + --- 2 + cargo/maudit: patch 3 + --- 4 + 5 + Fixes route timings being wrongly based of the start of the group instead of per route for dynamic routes
+6
.sampo/changesets/regal-princess-lemminkainen.md
··· 1 + --- 2 + cargo/maudit: minor 3 + cargo/maudit-macros: minor 4 + --- 5 + 6 + Adds support for automatically generating sitemaps
+14
Cargo.lock
··· 2575 2575 "serde_yaml", 2576 2576 "slug", 2577 2577 "syntect", 2578 + "tempfile", 2578 2579 "thiserror 2.0.17", 2579 2580 "thumbhash", 2580 2581 "tokio", ··· 4765 4766 "filetime", 4766 4767 "libc", 4767 4768 "xattr", 4769 + ] 4770 + 4771 + [[package]] 4772 + name = "tempfile" 4773 + version = "3.24.0" 4774 + source = "registry+https://github.com/rust-lang/crates.io-index" 4775 + checksum = "655da9c7eb6305c55742045d5a8d2037996d61d8de95806335c7c86ce0f82e9c" 4776 + dependencies = [ 4777 + "fastrand", 4778 + "getrandom 0.3.4", 4779 + "once_cell", 4780 + "rustix", 4781 + "windows-sys 0.61.2", 4768 4782 ] 4769 4783 4770 4784 [[package]]
+119 -3
crates/maudit-macros/src/lib.rs
··· 49 49 } 50 50 } 51 51 52 + struct SitemapArgs { 53 + exclude: Option<bool>, 54 + changefreq: Option<Expr>, 55 + priority: Option<Expr>, 56 + } 57 + 58 + impl Parse for SitemapArgs { 59 + fn parse(input: ParseStream) -> Result<Self> { 60 + let mut exclude = None; 61 + let mut changefreq = None; 62 + let mut priority = None; 63 + 64 + while !input.is_empty() { 65 + let key: Ident = input.parse()?; 66 + input.parse::<Token![=]>()?; 67 + 68 + match key.to_string().as_str() { 69 + "exclude" => { 70 + let value: syn::LitBool = input.parse()?; 71 + exclude = Some(value.value); 72 + } 73 + "changefreq" => { 74 + changefreq = Some(input.parse()?); 75 + } 76 + "priority" => { 77 + priority = Some(input.parse()?); 78 + } 79 + _ => { 80 + return Err(syn::Error::new_spanned( 81 + key, 82 + "unknown sitemap argument, expected 'exclude', 'changefreq', or 'priority'", 83 + )); 84 + } 85 + } 86 + 87 + if input.peek(Token![,]) { 88 + input.parse::<Token![,]>()?; 89 + } else { 90 + break; 91 + } 92 + } 93 + 94 + Ok(SitemapArgs { 95 + exclude, 96 + changefreq, 97 + priority, 98 + }) 99 + } 100 + } 101 + 52 102 struct RouteArgs { 53 103 path: Option<Expr>, 54 104 locales: Vec<LocaleVariant>, 105 + sitemap: Option<SitemapArgs>, 55 106 } 56 107 57 108 impl Parse for RouteArgs { 58 109 fn parse(input: ParseStream) -> Result<Self> { 59 110 let mut path = None; 60 111 let mut locales = Vec::new(); 112 + let mut sitemap = None; 61 113 62 114 if input.is_empty() { 63 - return Ok(RouteArgs { path, locales }); 115 + return Ok(RouteArgs { 116 + path, 117 + locales, 118 + sitemap, 119 + }); 64 120 } 65 121 66 122 // First argument: either a path expression or a named argument like locales(...) ··· 74 130 syn::parenthesized!(content in input); 75 131 let variants = Punctuated::<LocaleVariant, Token![,]>::parse_terminated(&content)?; 76 132 locales = variants.into_iter().collect(); 133 + } else if ident_str == "sitemap" { 134 + let content; 135 + syn::parenthesized!(content in input); 136 + sitemap = Some(content.parse()?); 77 137 } else { 78 138 return Err(syn::Error::new_spanned( 79 139 ident, 80 - format!("unknown argument '{}', expected 'locales'", ident_str), 140 + format!( 141 + "unknown argument '{}', expected 'locales' or 'sitemap'", 142 + ident_str 143 + ), 81 144 )); 82 145 } 83 146 } else { ··· 110 173 let variants = 111 174 Punctuated::<LocaleVariant, Token![,]>::parse_terminated(&content)?; 112 175 locales = variants.into_iter().collect(); 176 + } else if ident_str == "sitemap" { 177 + if sitemap.is_some() { 178 + return Err(syn::Error::new_spanned( 179 + ident, 180 + "sitemap specified multiple times", 181 + )); 182 + } 183 + let content; 184 + syn::parenthesized!(content in input); 185 + sitemap = Some(content.parse()?); 113 186 } else { 114 187 return Err(syn::Error::new_spanned( 115 188 ident, ··· 127 200 // Check for duplicate locales 128 201 Self::check_duplicate_locales(&locales)?; 129 202 130 - Ok(RouteArgs { path, locales }) 203 + Ok(RouteArgs { 204 + path, 205 + locales, 206 + sitemap, 207 + }) 131 208 } 132 209 } 133 210 ··· 213 290 } 214 291 }; 215 292 293 + // Generate sitemap metadata method 294 + let sitemap_method = if let Some(sitemap_args) = &args.sitemap { 295 + let exclude_impl = if let Some(exclude) = sitemap_args.exclude { 296 + quote! { Some(#exclude) } 297 + } else { 298 + quote! { None } 299 + }; 300 + 301 + let changefreq_impl = if let Some(changefreq) = &sitemap_args.changefreq { 302 + quote! { Some(#changefreq) } 303 + } else { 304 + quote! { None } 305 + }; 306 + 307 + let priority_impl = if let Some(priority) = &sitemap_args.priority { 308 + quote! { Some(#priority) } 309 + } else { 310 + quote! { None } 311 + }; 312 + 313 + quote! { 314 + fn sitemap_metadata(&self) -> maudit::sitemap::RouteSitemapMetadata { 315 + maudit::sitemap::RouteSitemapMetadata { 316 + exclude: #exclude_impl, 317 + changefreq: #changefreq_impl, 318 + priority: #priority_impl, 319 + } 320 + } 321 + } 322 + } else { 323 + quote! { 324 + fn sitemap_metadata(&self) -> maudit::sitemap::RouteSitemapMetadata { 325 + maudit::sitemap::RouteSitemapMetadata::default() 326 + } 327 + } 328 + }; 329 + 216 330 let expanded = quote! { 217 331 impl maudit::route::InternalRoute for #struct_name { 218 332 #route_raw_impl 219 333 220 334 #variant_method 335 + 336 + #sitemap_method 221 337 } 222 338 223 339 impl maudit::route::FullRoute for #struct_name {
+3
crates/maudit/Cargo.toml
··· 48 48 rayon = "1.11.0" 49 49 rapidhash = "4.1.1" 50 50 pathdiff = "0.2.3" 51 + 52 + [dev-dependencies] 53 + tempfile = "3.24.0"
+103 -1
crates/maudit/src/build.rs
··· 17 17 logging::print_title, 18 18 route::{CachedRoute, DynamicRouteContext, FullRoute, InternalRoute, PageContext, PageParams}, 19 19 routing::extract_params_from_raw_route, 20 + sitemap::{SitemapEntry, generate_sitemap}, 20 21 }; 21 22 use colored::{ColoredString, Colorize}; 22 23 use log::{debug, info, trace, warn}; ··· 120 121 let mut build_pages_scripts: FxHashSet<assets::Script> = FxHashSet::default(); 121 122 let mut build_pages_styles: FxHashSet<assets::Style> = FxHashSet::default(); 122 123 124 + let mut sitemap_entries: Vec<SitemapEntry> = Vec::new(); 123 125 let mut page_count = 0; 126 + 127 + // Normalize base_url once to avoid repeated trimming 128 + let normalized_base_url = options 129 + .base_url 130 + .as_ref() 131 + .map(|url| url.trim_end_matches('/')); 124 132 125 133 // This is fully serial. It is somewhat trivial to make it parallel, but it currently isn't because every time I've tried to 126 134 // (uncommited, #25, #41, #46) it either made no difference or was slower. The overhead of parallelism is just too high for ··· 176 184 None, 177 185 ); 178 186 187 + add_sitemap_entry( 188 + &mut sitemap_entries, 189 + normalized_base_url, 190 + &url, 191 + base_path, 192 + &route.sitemap_metadata(), 193 + &options.sitemap, 194 + ); 195 + 179 196 page_count += 1; 180 197 } else { 181 198 // Dynamic base route ··· 196 213 197 214 // Build all pages for this route 198 215 for page in pages { 216 + let page_start = Instant::now(); 199 217 let url = cached_route.url(&page.0); 200 218 let file_path = cached_route.file_path(&page.0, &options.output_dir); 201 219 ··· 210 228 211 229 write_route_file(&content, &file_path)?; 212 230 213 - info!(target: "pages", "├─ {} {}", file_path.to_string_lossy().dimmed(), format_elapsed_time(route_start.elapsed(), &route_format_options)); 231 + info!(target: "pages", "├─ {} {}", file_path.to_string_lossy().dimmed(), format_elapsed_time(page_start.elapsed(), &route_format_options)); 214 232 215 233 build_metadata.add_page( 216 234 base_path.clone(), ··· 218 236 Some(page.0.0.clone()), 219 237 ); 220 238 239 + add_sitemap_entry( 240 + &mut sitemap_entries, 241 + normalized_base_url, 242 + &url, 243 + base_path, 244 + &route.sitemap_metadata(), 245 + &options.sitemap, 246 + ); 247 + 221 248 page_count += 1; 222 249 } 223 250 } ··· 265 292 None, 266 293 ); 267 294 295 + add_sitemap_entry( 296 + &mut sitemap_entries, 297 + normalized_base_url, 298 + &url, 299 + &variant_path, 300 + &route.sitemap_metadata(), 301 + &options.sitemap, 302 + ); 303 + 268 304 page_count += 1; 269 305 } else { 270 306 // Dynamic variant ··· 309 345 variant_path.clone(), 310 346 file_path.to_string_lossy().to_string(), 311 347 Some(page.0.0.clone()), 348 + ); 349 + 350 + add_sitemap_entry( 351 + &mut sitemap_entries, 352 + normalized_base_url, 353 + &url, 354 + &variant_path, 355 + &route.sitemap_metadata(), 356 + &options.sitemap, 312 357 ); 313 358 314 359 page_count += 1; ··· 473 518 info!(target: "build", "{}", format!("Assets copied in {}", format_elapsed_time(assets_start.elapsed(), &FormatElapsedTimeOptions::default())).bold()); 474 519 } 475 520 521 + // Generate sitemap 522 + if options.sitemap.enabled { 523 + if let Some(base_url) = normalized_base_url { 524 + let sitemap_start = Instant::now(); 525 + print_title("generating sitemap"); 526 + 527 + generate_sitemap( 528 + sitemap_entries, 529 + base_url, 530 + &options.output_dir, 531 + &options.sitemap, 532 + )?; 533 + 534 + info!(target: "build", "{}", format!("Sitemap generated in {}", format_elapsed_time(sitemap_start.elapsed(), &FormatElapsedTimeOptions::default())).bold()); 535 + } else { 536 + warn!(target: "build", "Sitemap generation is enabled but no base_url is set in BuildOptions. Either disable sitemap generation or set a base_url to enable it."); 537 + } 538 + } 539 + 476 540 info!(target: "SKIP_FORMAT", "{}", ""); 477 541 info!(target: "build", "{}", format!("Build completed in {}", format_elapsed_time(build_start.elapsed(), &section_format_options)).bold()); 478 542 ··· 481 545 } 482 546 483 547 Ok(build_metadata) 548 + } 549 + 550 + fn add_sitemap_entry( 551 + sitemap_entries: &mut Vec<SitemapEntry>, 552 + base_url: Option<&str>, 553 + url: &str, 554 + route_path: &str, 555 + sitemap_metadata: &crate::sitemap::RouteSitemapMetadata, 556 + sitemap_options: &crate::sitemap::SitemapOptions, 557 + ) { 558 + // Skip if no base_url configured 559 + let Some(base_url) = base_url else { 560 + return; 561 + }; 562 + 563 + // Skip if route is excluded or is a 404 page 564 + if sitemap_metadata.exclude.unwrap_or(false) || route_path.contains("404") { 565 + return; 566 + } 567 + 568 + // Construct full URL 569 + let full_url = if url == "/" { 570 + base_url.to_string() 571 + } else { 572 + format!("{}{}", base_url, url) 573 + }; 574 + 575 + // Add entry 576 + sitemap_entries.push(SitemapEntry { 577 + loc: full_url, 578 + lastmod: None, 579 + changefreq: sitemap_metadata 580 + .changefreq 581 + .or(sitemap_options.default_changefreq), 582 + priority: sitemap_metadata 583 + .priority 584 + .or(sitemap_options.default_priority), 585 + }); 484 586 } 485 587 486 588 fn copy_recursively(
+5 -1
crates/maudit/src/build/options.rs
··· 1 1 use std::{env, path::PathBuf}; 2 2 3 - use crate::{assets::RouteAssetsOptions, is_dev}; 3 + use crate::{assets::RouteAssetsOptions, is_dev, sitemap::SitemapOptions}; 4 4 5 5 /// Maudit build options. Should be passed to [`coronate()`](crate::coronate()). 6 6 /// ··· 57 57 pub clean_output_dir: bool, 58 58 59 59 pub assets: AssetsOptions, 60 + 61 + /// Options for sitemap generation. See [`SitemapOptions`] for configuration. 62 + pub sitemap: SitemapOptions, 60 63 } 61 64 62 65 impl BuildOptions { ··· 147 150 static_dir: "static".into(), 148 151 clean_output_dir: true, 149 152 assets: AssetsOptions::default(), 153 + sitemap: SitemapOptions::default(), 150 154 } 151 155 } 152 156 }
+2
crates/maudit/src/lib.rs
··· 11 11 pub mod errors; 12 12 pub mod route; 13 13 pub mod routing; 14 + pub mod sitemap; 14 15 15 16 // Exports for end-users 16 17 pub use build::metadata::{BuildOutput, PageOutput, StaticAssetOutput}; 17 18 pub use build::options::{AssetHashingStrategy, AssetsOptions, BuildOptions}; 19 + pub use sitemap::{ChangeFreq, SitemapOptions}; 18 20 19 21 // Re-export FxHashMap so that macro-generated code can use it without requiring users to add it as a dependency. 20 22 #[doc(hidden)]
+4
crates/maudit/src/route.rs
··· 476 476 vec![] 477 477 } 478 478 479 + fn sitemap_metadata(&self) -> crate::sitemap::RouteSitemapMetadata { 480 + crate::sitemap::RouteSitemapMetadata::default() 481 + } 482 + 479 483 fn is_endpoint(&self) -> bool { 480 484 self.route_raw() 481 485 .as_ref()
+596
crates/maudit/src/sitemap.rs
··· 1 + use std::fs; 2 + use std::io::Write; 3 + use std::path::Path; 4 + 5 + // THOUGHTS: I don't like that we maintain an implementation of sitemap generation here. I'd like to either move this into a 6 + // separate crate or use an existing crate for this. But, the existing crates I found didn't really satisfy my needs, which is annoying. 7 + 8 + /// Metadata for sitemap configuration on a specific route. 9 + #[derive(Debug, Clone, Default)] 10 + pub struct RouteSitemapMetadata { 11 + /// Whether to exclude this route from the sitemap 12 + pub exclude: Option<bool>, 13 + /// Change frequency for this route 14 + pub changefreq: Option<ChangeFreq>, 15 + /// Priority for this route (0.0 to 1.0) 16 + pub priority: Option<f32>, 17 + } 18 + 19 + /// Options for sitemap generation. 20 + #[derive(Debug, Clone)] 21 + pub struct SitemapOptions { 22 + /// Whether to generate a sitemap. Default: `false` 23 + pub enabled: bool, 24 + /// The filename for the sitemap index. Default: `"sitemap.xml"` 25 + /// 26 + /// If multiple sitemaps are needed, individual sitemap files will be named `sitemap-1.xml`, `sitemap-2.xml`, etc. 27 + pub filename: String, 28 + /// Maximum number of URLs per sitemap file. Default: `10000` 29 + /// 30 + /// Note that search engines will often ignore sitemaps with more than 50,000 URLs, 31 + /// so it's recommended to keep this value at or below that limit. 32 + pub max_urls_per_sitemap: usize, 33 + /// Default change frequency for pages. Default: `None` 34 + /// 35 + /// Note that changefreq is often ignored by search engines nowadays. 36 + pub default_changefreq: Option<ChangeFreq>, 37 + /// Default priority for pages. Default: `None` 38 + /// 39 + /// Note that priority is often ignored by search engines nowadays. 40 + pub default_priority: Option<f32>, 41 + /// Optional XSL stylesheet URL for styling the sitemap. Default: `None` 42 + /// 43 + /// If the value starts with `http(s)://`it will be used as-is (ex: your stylesheet might be coming from a CDN). 44 + /// 45 + /// Otherwise, the path is appended to the base URL. For example, `sitemap.xsl` with base URL 46 + /// `https://example.com` becomes `https://example.com/sitemap.xsl`. 47 + pub stylesheet: Option<String>, 48 + } 49 + 50 + impl Default for SitemapOptions { 51 + fn default() -> Self { 52 + Self { 53 + enabled: false, 54 + filename: "sitemap.xml".to_string(), 55 + max_urls_per_sitemap: 10000, 56 + default_changefreq: None, 57 + default_priority: None, 58 + stylesheet: None, 59 + } 60 + } 61 + } 62 + 63 + /// Change frequency values for sitemap entries. 64 + /// 65 + /// See: https://www.sitemaps.org/protocol.html#changefreqdef for more details. 66 + /// This property is often ignored by search engines nowadays. 67 + #[derive(Debug, Clone, Copy, PartialEq)] 68 + pub enum ChangeFreq { 69 + Always, 70 + Hourly, 71 + Daily, 72 + Weekly, 73 + Monthly, 74 + Yearly, 75 + Never, 76 + } 77 + 78 + impl ChangeFreq { 79 + fn as_str(&self) -> &str { 80 + match self { 81 + ChangeFreq::Always => "always", 82 + ChangeFreq::Hourly => "hourly", 83 + ChangeFreq::Daily => "daily", 84 + ChangeFreq::Weekly => "weekly", 85 + ChangeFreq::Monthly => "monthly", 86 + ChangeFreq::Yearly => "yearly", 87 + ChangeFreq::Never => "never", 88 + } 89 + } 90 + } 91 + 92 + /// Represents a single URL entry in the sitemap. 93 + #[derive(Debug)] 94 + pub struct SitemapEntry { 95 + pub loc: String, 96 + pub lastmod: Option<String>, 97 + pub changefreq: Option<ChangeFreq>, 98 + pub priority: Option<f32>, 99 + } 100 + 101 + impl SitemapEntry { 102 + fn to_xml(&self) -> String { 103 + let mut xml = String::from("<url>"); 104 + xml.push_str(&format!("<loc>{}</loc>", escape_xml(&self.loc))); 105 + 106 + if let Some(ref lastmod) = self.lastmod { 107 + xml.push_str(&format!("<lastmod>{}</lastmod>", lastmod)); 108 + } 109 + 110 + if let Some(changefreq) = self.changefreq { 111 + xml.push_str(&format!("<changefreq>{}</changefreq>", changefreq.as_str())); 112 + } 113 + 114 + if let Some(priority) = self.priority { 115 + xml.push_str(&format!("<priority>{:.1}</priority>", priority)); 116 + } 117 + 118 + xml.push_str("</url>"); 119 + xml 120 + } 121 + } 122 + 123 + /// Represents a sitemap file reference in a sitemap index. 124 + #[derive(Debug)] 125 + struct SitemapReference { 126 + loc: String, 127 + lastmod: Option<String>, 128 + } 129 + 130 + impl SitemapReference { 131 + fn to_xml(&self) -> String { 132 + let mut xml = String::from("<sitemap>"); 133 + xml.push_str(&format!("<loc>{}</loc>", escape_xml(&self.loc))); 134 + 135 + if let Some(ref lastmod) = self.lastmod { 136 + xml.push_str(&format!("<lastmod>{}</lastmod>", lastmod)); 137 + } 138 + 139 + xml.push_str("</sitemap>"); 140 + xml 141 + } 142 + } 143 + 144 + /// Escapes XML special characters. 145 + fn escape_xml(s: &str) -> String { 146 + s.replace('&', "&amp;") 147 + .replace('<', "&lt;") 148 + .replace('>', "&gt;") 149 + .replace('"', "&quot;") 150 + .replace('\'', "&apos;") 151 + } 152 + 153 + /// Resolves a stylesheet path to a full URL. 154 + /// If the path starts with http:// or https://, it's used as-is. 155 + /// Otherwise, it's appended to the base URL. 156 + fn resolve_stylesheet_url(base_url: &str, stylesheet_path: &str) -> String { 157 + if stylesheet_path.starts_with("http://") || stylesheet_path.starts_with("https://") { 158 + stylesheet_path.to_string() 159 + } else { 160 + format!("{}{}", base_url, stylesheet_path) 161 + } 162 + } 163 + 164 + /// Generates a sitemap index with multiple sitemap files from pre-built entries. 165 + pub fn generate_sitemap( 166 + entries: Vec<SitemapEntry>, 167 + base_url: &str, 168 + output_dir: &Path, 169 + options: &SitemapOptions, 170 + ) -> Result<(), Box<dyn std::error::Error>> { 171 + if !options.enabled { 172 + return Ok(()); 173 + } 174 + 175 + if entries.is_empty() { 176 + return Ok(()); 177 + } 178 + 179 + // Normalize base_url once to avoid repeated trimming 180 + let base_url = base_url.trim_end_matches('/'); 181 + 182 + // Sort entries by URL for consistency 183 + let mut sorted_entries = entries; 184 + sorted_entries.sort_by(|a, b| a.loc.cmp(&b.loc)); 185 + 186 + let total_entries = sorted_entries.len(); 187 + 188 + // If we have very few entries, just create a single sitemap 189 + if total_entries <= options.max_urls_per_sitemap { 190 + generate_single_sitemap( 191 + &sorted_entries, 192 + output_dir, 193 + &options.filename, 194 + base_url, 195 + options.stylesheet.as_deref(), 196 + )?; 197 + 198 + log::info!( 199 + target: "sitemap", 200 + "Generated sitemap with {} URLs at {}", 201 + total_entries, 202 + output_dir.join(&options.filename).display() 203 + ); 204 + 205 + return Ok(()); 206 + } 207 + 208 + // Split into chunks and create multiple sitemap files 209 + let chunks: Vec<&[SitemapEntry]> = sorted_entries 210 + .chunks(options.max_urls_per_sitemap) 211 + .collect(); 212 + 213 + let num_sitemaps = chunks.len(); 214 + let mut sitemap_refs = Vec::new(); 215 + 216 + // Generate individual sitemap files 217 + for (i, chunk) in chunks.iter().enumerate() { 218 + let sitemap_num = i + 1; 219 + let sitemap_filename = format!("sitemap-{}.xml", sitemap_num); 220 + 221 + generate_single_sitemap( 222 + chunk, 223 + output_dir, 224 + &sitemap_filename, 225 + base_url, 226 + options.stylesheet.as_deref(), 227 + )?; 228 + 229 + let sitemap_url = format!("{}/{}", base_url, sitemap_filename); 230 + sitemap_refs.push(SitemapReference { 231 + loc: sitemap_url, 232 + lastmod: None, // TODO: Somehow the user should be able to specify lastmod per chunk or we should somehow calculate it? Probably can't and probably doesn't matter anyway. 233 + }); 234 + } 235 + 236 + generate_sitemap_index( 237 + &sitemap_refs, 238 + output_dir, 239 + &options.filename, 240 + base_url, 241 + options.stylesheet.as_deref(), 242 + )?; 243 + 244 + log::info!( 245 + target: "sitemap", 246 + "Generated sitemap index with {} sitemaps ({} total URLs) at {}", 247 + num_sitemaps, 248 + total_entries, 249 + output_dir.join(&options.filename).display() 250 + ); 251 + 252 + Ok(()) 253 + } 254 + 255 + /// Generates a single sitemap file. 256 + fn generate_single_sitemap( 257 + entries: &[SitemapEntry], 258 + output_dir: &Path, 259 + filename: &str, 260 + base_url: &str, 261 + stylesheet: Option<&str>, 262 + ) -> Result<(), Box<dyn std::error::Error>> { 263 + let mut xml = String::from("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"); 264 + 265 + if let Some(stylesheet_path) = stylesheet { 266 + let stylesheet_url = resolve_stylesheet_url(base_url, stylesheet_path); 267 + xml.push_str(&format!( 268 + "<?xml-stylesheet type=\"text/xsl\" href=\"{}\"?>\n", 269 + escape_xml(&stylesheet_url) 270 + )); 271 + } 272 + 273 + xml.push_str("<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">"); 274 + 275 + for entry in entries { 276 + xml.push_str(&entry.to_xml()); 277 + } 278 + 279 + xml.push_str("</urlset>"); 280 + 281 + let sitemap_path = output_dir.join(filename); 282 + let mut file = fs::File::create(&sitemap_path)?; 283 + file.write_all(xml.as_bytes())?; 284 + 285 + Ok(()) 286 + } 287 + 288 + /// Generates a sitemap index file. 289 + fn generate_sitemap_index( 290 + sitemaps: &[SitemapReference], 291 + output_dir: &Path, 292 + filename: &str, 293 + base_url: &str, 294 + stylesheet: Option<&str>, 295 + ) -> Result<(), Box<dyn std::error::Error>> { 296 + let mut xml = String::from("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"); 297 + 298 + if let Some(stylesheet_path) = stylesheet { 299 + let stylesheet_url = resolve_stylesheet_url(base_url, stylesheet_path); 300 + xml.push_str(&format!( 301 + "<?xml-stylesheet type=\"text/xsl\" href=\"{}\"?>\n", 302 + escape_xml(&stylesheet_url) 303 + )); 304 + } 305 + 306 + xml.push_str("<sitemapindex xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">"); 307 + 308 + for sitemap in sitemaps { 309 + xml.push_str(&sitemap.to_xml()); 310 + } 311 + 312 + xml.push_str("</sitemapindex>"); 313 + 314 + let index_path = output_dir.join(filename); 315 + let mut file = fs::File::create(&index_path)?; 316 + file.write_all(xml.as_bytes())?; 317 + 318 + Ok(()) 319 + } 320 + 321 + #[cfg(test)] 322 + mod tests { 323 + use super::*; 324 + 325 + #[test] 326 + fn test_escape_xml() { 327 + assert_eq!(escape_xml("hello"), "hello"); 328 + assert_eq!(escape_xml("a&b"), "a&amp;b"); 329 + assert_eq!(escape_xml("<tag>"), "&lt;tag&gt;"); 330 + assert_eq!( 331 + escape_xml("it's \"quoted\""), 332 + "it&apos;s &quot;quoted&quot;" 333 + ); 334 + } 335 + 336 + #[test] 337 + fn test_changefreq_as_str() { 338 + assert_eq!(ChangeFreq::Always.as_str(), "always"); 339 + assert_eq!(ChangeFreq::Daily.as_str(), "daily"); 340 + assert_eq!(ChangeFreq::Never.as_str(), "never"); 341 + } 342 + 343 + #[test] 344 + fn test_sitemap_entry_to_xml() { 345 + let entry = SitemapEntry { 346 + loc: "https://example.com/page".to_string(), 347 + lastmod: Some("2024-01-01".to_string()), 348 + changefreq: Some(ChangeFreq::Weekly), 349 + priority: Some(0.8), 350 + }; 351 + 352 + let xml = entry.to_xml(); 353 + assert!(xml.contains("<loc>https://example.com/page</loc>")); 354 + assert!(xml.contains("<lastmod>2024-01-01</lastmod>")); 355 + assert!(xml.contains("<changefreq>weekly</changefreq>")); 356 + assert!(xml.contains("<priority>0.8</priority>")); 357 + } 358 + 359 + #[test] 360 + fn test_sitemap_entry_minimal() { 361 + let entry = SitemapEntry { 362 + loc: "https://example.com/".to_string(), 363 + lastmod: None, 364 + changefreq: None, 365 + priority: None, 366 + }; 367 + 368 + let xml = entry.to_xml(); 369 + assert!(xml.contains("<loc>https://example.com/</loc>")); 370 + assert!(!xml.contains("<lastmod>")); 371 + assert!(!xml.contains("<changefreq>")); 372 + assert!(!xml.contains("<priority>")); 373 + } 374 + 375 + #[test] 376 + fn test_sitemap_reference_to_xml() { 377 + let reference = SitemapReference { 378 + loc: "https://example.com/sitemap-1.xml".to_string(), 379 + lastmod: Some("2024-01-01".to_string()), 380 + }; 381 + 382 + let xml = reference.to_xml(); 383 + assert!(xml.contains("<sitemap>")); 384 + assert!(xml.contains("<loc>https://example.com/sitemap-1.xml</loc>")); 385 + assert!(xml.contains("<lastmod>2024-01-01</lastmod>")); 386 + assert!(xml.contains("</sitemap>")); 387 + } 388 + 389 + #[test] 390 + fn test_generate_single_sitemap_with_stylesheet() { 391 + use std::io::Read; 392 + use tempfile::tempdir; 393 + 394 + let dir = tempdir().unwrap(); 395 + let entries = vec![ 396 + SitemapEntry { 397 + loc: "https://example.com/page1".to_string(), 398 + lastmod: None, 399 + changefreq: None, 400 + priority: None, 401 + }, 402 + SitemapEntry { 403 + loc: "https://example.com/page2".to_string(), 404 + lastmod: None, 405 + changefreq: None, 406 + priority: None, 407 + }, 408 + ]; 409 + 410 + generate_single_sitemap( 411 + &entries, 412 + dir.path(), 413 + "sitemap.xml", 414 + "https://example.com", 415 + Some("/sitemap.xsl"), 416 + ) 417 + .unwrap(); 418 + 419 + let mut file = std::fs::File::open(dir.path().join("sitemap.xml")).unwrap(); 420 + let mut content = String::new(); 421 + file.read_to_string(&mut content).unwrap(); 422 + 423 + assert!(content.contains("<?xml version=\"1.0\" encoding=\"UTF-8\"?>")); 424 + assert!(content.contains( 425 + "<?xml-stylesheet type=\"text/xsl\" href=\"https://example.com/sitemap.xsl\"?>" 426 + )); 427 + assert!(content.contains("<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">")); 428 + assert!(content.contains("<loc>https://example.com/page1</loc>")); 429 + assert!(content.contains("<loc>https://example.com/page2</loc>")); 430 + } 431 + 432 + #[test] 433 + fn test_generate_single_sitemap_without_stylesheet() { 434 + use std::io::Read; 435 + use tempfile::tempdir; 436 + 437 + let dir = tempdir().unwrap(); 438 + let entries = vec![SitemapEntry { 439 + loc: "https://example.com/page1".to_string(), 440 + lastmod: None, 441 + changefreq: None, 442 + priority: None, 443 + }]; 444 + 445 + generate_single_sitemap( 446 + &entries, 447 + dir.path(), 448 + "sitemap.xml", 449 + "https://example.com", 450 + None, 451 + ) 452 + .unwrap(); 453 + 454 + let mut file = std::fs::File::open(dir.path().join("sitemap.xml")).unwrap(); 455 + let mut content = String::new(); 456 + file.read_to_string(&mut content).unwrap(); 457 + 458 + assert!(content.contains("<?xml version=\"1.0\" encoding=\"UTF-8\"?>")); 459 + assert!(!content.contains("<?xml-stylesheet")); 460 + assert!(content.contains("<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">")); 461 + } 462 + 463 + #[test] 464 + fn test_generate_sitemap_index_with_stylesheet() { 465 + use std::io::Read; 466 + use tempfile::tempdir; 467 + 468 + let dir = tempdir().unwrap(); 469 + let sitemaps = vec![ 470 + SitemapReference { 471 + loc: "https://example.com/sitemap-1.xml".to_string(), 472 + lastmod: None, 473 + }, 474 + SitemapReference { 475 + loc: "https://example.com/sitemap-2.xml".to_string(), 476 + lastmod: None, 477 + }, 478 + ]; 479 + 480 + generate_sitemap_index( 481 + &sitemaps, 482 + dir.path(), 483 + "sitemap.xml", 484 + "https://example.com", 485 + Some("/sitemap.xsl"), 486 + ) 487 + .unwrap(); 488 + 489 + let mut file = std::fs::File::open(dir.path().join("sitemap.xml")).unwrap(); 490 + let mut content = String::new(); 491 + file.read_to_string(&mut content).unwrap(); 492 + 493 + assert!(content.contains("<?xml version=\"1.0\" encoding=\"UTF-8\"?>")); 494 + assert!(content.contains( 495 + "<?xml-stylesheet type=\"text/xsl\" href=\"https://example.com/sitemap.xsl\"?>" 496 + )); 497 + assert!( 498 + content 499 + .contains("<sitemapindex xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">") 500 + ); 501 + assert!(content.contains("<loc>https://example.com/sitemap-1.xml</loc>")); 502 + assert!(content.contains("<loc>https://example.com/sitemap-2.xml</loc>")); 503 + } 504 + 505 + #[test] 506 + fn test_stylesheet_xml_escaping() { 507 + use std::io::Read; 508 + use tempfile::tempdir; 509 + 510 + let dir = tempdir().unwrap(); 511 + let entries = vec![SitemapEntry { 512 + loc: "https://example.com/".to_string(), 513 + lastmod: None, 514 + changefreq: None, 515 + priority: None, 516 + }]; 517 + 518 + generate_single_sitemap( 519 + &entries, 520 + dir.path(), 521 + "sitemap.xml", 522 + "https://example.com", 523 + Some("/sitemap.xsl?param=value&other=123"), 524 + ) 525 + .unwrap(); 526 + 527 + let mut file = std::fs::File::open(dir.path().join("sitemap.xml")).unwrap(); 528 + let mut content = String::new(); 529 + file.read_to_string(&mut content).unwrap(); 530 + 531 + // Verify that & is properly escaped in the stylesheet URL 532 + assert!( 533 + content.contains("href=\"https://example.com/sitemap.xsl?param=value&amp;other=123\"") 534 + ); 535 + } 536 + 537 + #[test] 538 + fn test_stylesheet_absolute_url() { 539 + use std::io::Read; 540 + use tempfile::tempdir; 541 + 542 + let dir = tempdir().unwrap(); 543 + let entries = vec![SitemapEntry { 544 + loc: "https://example.com/".to_string(), 545 + lastmod: None, 546 + changefreq: None, 547 + priority: None, 548 + }]; 549 + 550 + generate_single_sitemap( 551 + &entries, 552 + dir.path(), 553 + "sitemap.xml", 554 + "https://example.com", 555 + Some("https://cdn.example.com/sitemap.xsl"), 556 + ) 557 + .unwrap(); 558 + 559 + let mut file = std::fs::File::open(dir.path().join("sitemap.xml")).unwrap(); 560 + let mut content = String::new(); 561 + file.read_to_string(&mut content).unwrap(); 562 + 563 + // Verify that absolute URLs are used as-is 564 + assert!(content.contains("href=\"https://cdn.example.com/sitemap.xsl\"")); 565 + } 566 + 567 + #[test] 568 + fn test_stylesheet_absolute_url_http() { 569 + use std::io::Read; 570 + use tempfile::tempdir; 571 + 572 + let dir = tempdir().unwrap(); 573 + let entries = vec![SitemapEntry { 574 + loc: "https://example.com/".to_string(), 575 + lastmod: None, 576 + changefreq: None, 577 + priority: None, 578 + }]; 579 + 580 + generate_single_sitemap( 581 + &entries, 582 + dir.path(), 583 + "sitemap.xml", 584 + "https://example.com", 585 + Some("http://cdn.example.com/sitemap.xsl"), 586 + ) 587 + .unwrap(); 588 + 589 + let mut file = std::fs::File::open(dir.path().join("sitemap.xml")).unwrap(); 590 + let mut content = String::new(); 591 + file.read_to_string(&mut content).unwrap(); 592 + 593 + // Verify that http:// URLs are also used as-is 594 + assert!(content.contains("href=\"http://cdn.example.com/sitemap.xsl\"")); 595 + } 596 + }