toolkit for mdBook [mirror of my GitHub repo] docs.tonywu.dev/mdbookkit/
permalinks rust-analyzer mdbook
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

feat(permalink): check book URLs against SUMMARY.md

Tony Wu 86714240 cdfdedf1

+131 -88
+13 -3
crates/mdbook-permalinks/src/link.rs
··· 3 3 use mdbook_markdown::pulldown_cmark::{CowStr, Event, LinkType, Tag, TagEnd}; 4 4 use url::Url; 5 5 6 - use crate::vcs::PathError; 7 - 8 6 #[derive(Debug, Default, Clone, thiserror::Error)] 9 7 pub enum LinkStatus { 10 8 #[default] ··· 19 17 Permalink, 20 18 21 19 #[error("links inaccessible")] 22 - Unreachable(Vec<(Url, PathError)>), 20 + Unreachable(Vec<(Url, PathStatus)>), 23 21 24 22 #[error("error encountered: {0}")] 25 23 Error(String), 24 + } 25 + 26 + #[derive(Debug, Clone, Copy, thiserror::Error)] 27 + pub enum PathStatus { 28 + #[error("does not exist")] 29 + Unreachable, 30 + #[error("is ignored by git")] 31 + Ignored, 32 + #[error("is not in repo")] 33 + NotInRepo, 34 + #[error("is not in SUMMARY.md")] 35 + NotInBook, 26 36 } 27 37 28 38 pub struct LinkSpan<'a>(pub Vec<LinkText<'a>>);
+89 -66
crates/mdbook-permalinks/src/main.rs
··· 1 - use std::{collections::HashMap, fmt::Debug, str::FromStr}; 1 + use std::{ 2 + collections::{HashMap, HashSet}, 3 + fmt::Debug, 4 + str::FromStr, 5 + }; 2 6 3 7 use anyhow::{Context, Result, anyhow}; 4 8 use console::colors_enabled_stderr; ··· 7 11 use mdbook_markdown::pulldown_cmark; 8 12 use mdbook_preprocessor::{Preprocessor, PreprocessorContext, book::Book}; 9 13 use serde::Deserialize; 10 - use tap::{Pipe, TapFallible}; 14 + use tap::{Pipe, Tap, TapFallible}; 11 15 use url::Url; 12 16 13 17 use mdbookkit::{ ··· 19 23 }; 20 24 21 25 use self::{ 22 - link::{LinkStatus, RelativeLink}, 26 + link::{LinkStatus, PathStatus, RelativeLink}, 23 27 page::Pages, 24 28 vcs::{Permalink, PermalinkFormat}, 25 29 }; ··· 54 58 } 55 59 56 60 struct Environment { 57 - book_src: Url, 58 - markdown: pulldown_cmark::Options, 59 61 vcs: VersionControl, 62 + root_dir: Url, 63 + markdown: pulldown_cmark::Options, 60 64 config: Config, 61 65 } 62 66 ··· 75 79 let mut content = Pages::new(self.markdown); 76 80 77 81 for (path, ch) in book.iter_chapters() { 82 + let path = path.to_string_lossy(); 78 83 let url = self 79 - .book_src 80 - .join(&path.to_string_lossy()) 84 + .root_dir 85 + .join(&path) 81 86 .context("could not read path as a url")?; 82 87 content 83 88 .insert(url, &ch.content) 84 - .with_context(|| path.display().to_string()) 85 - .context("failed to parse Markdown source:")?; 89 + .with_context(|| format!("failed to parse {path}"))?; 86 90 } 87 91 88 92 self.resolve(&mut content); ··· 90 94 let mut result = book 91 95 .iter_chapters() 92 96 .filter_map(|(path, _)| { 93 - let url = self.book_src.join(&path.to_string_lossy()).ok()?; 97 + let url = self.root_dir.join(&path.to_string_lossy()).ok()?; 94 98 content 95 99 .emit(&url) 96 100 .tap_err(log_warning!()) ··· 109 113 .to_stderr() 110 114 .to_status(); 111 115 112 - book.for_each_chapter_mut(|ch| { 113 - if let Some(path) = &ch.source_path 114 - && let Some(output) = result.remove(path) 115 - { 116 - ch.content = output 116 + book.for_each_text_mut(|path, content| { 117 + if let Some(output) = result.remove(path) { 118 + *content = output; 117 119 } 118 120 }); 119 121 ··· 126 128 impl Environment { 127 129 fn resolve(&self, content: &mut Pages<'_>) { 128 130 self.validate(); 131 + 132 + let book_pages = &content.paths(&self.root_dir); 129 133 130 134 for (base, link) in content.links_mut() { 131 135 let file = if let Some(link) = link.link.strip_prefix('/') { ··· 148 152 link, 149 153 page_url, 150 154 file_url, 155 + book_pages, 151 156 env, 152 157 } 153 158 .resolve(); ··· 157 162 #[inline] 158 163 fn validate(&self) { 159 164 debug_assert!( 160 - self.book_src.as_str().ends_with('/'), 165 + self.root_dir.as_str().ends_with('/'), 161 166 "book_src should have a trailing slash, got {}", 162 - self.book_src 167 + self.root_dir 163 168 ); 164 169 debug_assert!( 165 170 self.vcs.root.as_str().ends_with('/'), ··· 181 186 182 187 let markdown = book.config.markdown_options(); 183 188 184 - let book_src = book 189 + let root_dir = book 185 190 .root 186 191 .canonicalize() 187 192 .context("failed to locate book root")? ··· 190 195 .map_err(|_| anyhow!("book `src` should be a valid absolute path"))?; 191 196 192 197 Ok(Ok(Self { 193 - book_src, 198 + vcs, 199 + root_dir, 194 200 markdown, 195 - vcs, 196 201 config, 197 202 })) 198 203 } ··· 200 205 201 206 #[must_use] 202 207 struct Resolver<'a, 'r> { 208 + link: &'a mut RelativeLink<'r>, 203 209 file_url: Url, 204 210 page_url: &'a Url, 205 - link: &'a mut RelativeLink<'r>, 211 + book_pages: &'a HashSet<String>, 206 212 env: &'a Environment, 207 213 } 208 214 ··· 249 255 }; 250 256 251 257 let relative_to_book = env 252 - .book_src 258 + .root_dir 253 259 .make_relative(&file_url) 254 260 .expect("should be a file"); 255 261 256 262 let should_link = is_vcs 257 263 || relative_to_book.starts_with("../") 258 - || env 259 - .config 260 - .always_link 264 + || relative_to_book.ends_with(".md") && !self.book_pages.contains(&relative_to_book) 265 + || (env.config.always_link) 261 266 .iter() 262 267 .any(|suffix| file_url.path().ends_with(suffix)); 263 268 ··· 275 280 return; 276 281 } 277 282 278 - match env.vcs.link.to_link(&relative_to_repo, hint) { 283 + match env.vcs.link.to_link(&relative_to_repo.path, hint) { 279 284 Ok(href) => { 280 285 link.link = suffix.restored(href).as_str().to_owned().into(); 281 286 link.status = LinkStatus::Permalink; ··· 301 306 if let Some(idx) = path.find('?') { 302 307 path.truncate(idx) 303 308 }; 304 - path.strip_suffix(".html") 305 - .map(ToOwned::to_owned) 306 - .unwrap_or(path) 309 + path 307 310 }; 308 311 309 - // one does not simply avoid trailing slash issues... 310 - // https://github.com/slorber/trailing-slash-guide 311 - let try_files = if path.is_empty() || path.ends_with('/') { 312 - &[ 313 - // enforce that index.html pages should consistently 314 - // be addressed with a trailing slash 315 - format!("{path}index.md"), 316 - format!("{path}README.md"), 317 - ] as &[_] 318 - } else { 319 - &[ 320 - format!("{path}.md"), 321 - // all major hosting providers implicitly redirect 322 - // /folder to /folder/, so these are okay 323 - format!("{path}/index.md"), 324 - format!("{path}/README.md"), 325 - // preserve extension if any which allows checking for 326 - // static files other than book pages 327 - path, 328 - ] 312 + let mut not_found = vec![]; 313 + 314 + let is_index = path.is_empty() || path.ends_with('/'); 315 + 316 + let try_pages = { 317 + let path = path.strip_suffix(".html").unwrap_or(&path); 318 + // one does not simply avoid trailing slash issues... 319 + // https://github.com/slorber/trailing-slash-guide 320 + if is_index { 321 + &[ 322 + // enforce that index.html pages should consistently 323 + // be addressed with a trailing slash 324 + format!("{path}index.md"), 325 + format!("{path}README.md"), 326 + ] as &[_] 327 + } else { 328 + &[ 329 + format!("{path}.md"), 330 + // all major hosting providers implicitly redirect 331 + // /folder to /folder/, so these are okay 332 + format!("{path}/index.md"), 333 + format!("{path}/README.md"), 334 + ] 335 + } 329 336 }; 330 337 331 - let mut not_found = vec![]; 338 + for page in try_pages { 339 + let file_url = (self.env.root_dir) 340 + .join(page) 341 + .expect("should be a valid url") 342 + .tap_mut(|u| u.set_query(file_url.query())) 343 + .tap_mut(|u| u.set_fragment(file_url.fragment())); 344 + 345 + if self.book_pages.contains(page) { 346 + link.link = page_url 347 + .make_relative(&file_url) 348 + .expect("both should be file: urls") 349 + .into(); 350 + link.status = LinkStatus::Rewritten; 351 + return; 352 + } 332 353 333 - for file in try_files { 334 - let Ok(file) = (self.env.book_src.join(file)) 335 - .with_context(|| format!("invalid URL path {file:?}")) 336 - .tap_err(log_debug!()) 337 - else { 338 - continue; 339 - }; 354 + not_found.push((file_url, PathStatus::NotInBook)); 355 + } 340 356 341 - match self.env.vcs.try_file(&file) { 342 - Ok(_) => { 343 - let file_url = { 344 - let mut file = file; 345 - file.set_query(file_url.query()); 346 - file.set_fragment(file_url.fragment()); 347 - file 348 - }; 357 + if !is_index { 358 + let try_file = (self.env.root_dir) 359 + .join(&path) 360 + .expect("should be a valid url"); 361 + 362 + match self.env.vcs.try_file(&try_file) { 363 + Ok(result) if !result.metadata.is_dir() => { 364 + let file_url = try_file 365 + .tap_mut(|u| u.set_query(file_url.query())) 366 + .tap_mut(|u| u.set_fragment(file_url.fragment())); 349 367 350 368 link.link = page_url 351 369 .make_relative(&file_url) ··· 355 373 356 374 return; 357 375 } 376 + Ok(_) => { 377 + // a directory may exist but not accessible 378 + // due to having no index.html 379 + not_found.push((try_file, PathStatus::NotInBook)); 380 + } 358 381 Err(err) => { 359 - not_found.push((file, err)); 382 + not_found.push((try_file, err)); 360 383 } 361 384 } 362 385 }
+14 -1
crates/mdbook-permalinks/src/page.rs
··· 1 - use std::{borrow::Borrow, collections::HashMap, fmt::Debug, hash::Hash, sync::Arc}; 1 + use std::{ 2 + borrow::Borrow, 3 + collections::{HashMap, HashSet}, 4 + fmt::Debug, 5 + hash::Hash, 6 + sync::Arc, 7 + }; 2 8 3 9 use anyhow::{Context, Result, bail}; 4 10 use mdbook_markdown::pulldown_cmark::{Event, Options, Parser, Tag, TagEnd}; ··· 28 34 pages: Default::default(), 29 35 markdown, 30 36 } 37 + } 38 + 39 + pub fn paths(&self, root: &Url) -> HashSet<String> { 40 + self.pages 41 + .keys() 42 + .filter_map(|url| root.make_relative(url)) 43 + .collect() 31 44 } 32 45 33 46 pub fn insert(&mut self, url: Url, source: &'a str) -> Result<&mut Self> {
+1 -1
crates/mdbook-permalinks/src/tests.rs
··· 36 36 }, 37 37 repo: Repository::open_from_env().unwrap(), 38 38 }, 39 - book_src: CARGO_WORKSPACE_DIR 39 + root_dir: CARGO_WORKSPACE_DIR 40 40 .join("crates/")? 41 41 .join(concat!(env!("CARGO_PKG_NAME"), "/"))? 42 42 .join("src/")?,
+14 -17
crates/mdbook-permalinks/src/vcs.rs
··· 8 8 9 9 use mdbookkit::log_debug; 10 10 11 - use crate::{Config, VersionControl, link::ContentTypeHint}; 11 + use crate::{ 12 + Config, VersionControl, 13 + link::{ContentTypeHint, PathStatus}, 14 + }; 12 15 13 16 impl VersionControl { 14 17 pub fn try_from_git(config: &Config, book: &MDBookConfig) -> Result<Result<Self>> { ··· 71 74 Ok(Ok(Self { root, repo, link })) 72 75 } 73 76 74 - pub fn try_file(&self, file: &Url) -> Result<String, PathError> { 77 + pub fn try_file(&self, file: &Url) -> Result<TryFile, PathStatus> { 75 78 let Some(path) = self.root.make_relative(file) else { 76 - return Err(PathError::Unreachable); 79 + return Err(PathStatus::Unreachable); 77 80 }; 78 81 79 82 if path.starts_with("../") { 80 - return Err(PathError::NotInRepo); 83 + return Err(PathStatus::NotInRepo); 81 84 } 82 85 83 - if file 86 + if let Ok(metadata) = file 84 87 .to_file_path() 85 88 .expect("should be a file: url") 86 89 .symlink_metadata() 87 - .is_ok() 88 90 { 89 91 if !self.repo.is_path_ignored(&path).unwrap_or(false) { 90 - Ok(path) 92 + Ok(TryFile { path, metadata }) 91 93 } else { 92 - Err(PathError::Ignored) 94 + Err(PathStatus::Ignored) 93 95 } 94 96 } else { 95 - Err(PathError::Unreachable) 97 + Err(PathStatus::Unreachable) 96 98 } 97 99 } 98 100 } 99 101 100 - #[derive(Debug, Clone, Copy, thiserror::Error)] 101 - pub enum PathError { 102 - #[error("does not exist")] 103 - Unreachable, 104 - #[error("is ignored by git")] 105 - Ignored, 106 - #[error("is not in repo")] 107 - NotInRepo, 102 + pub struct TryFile { 103 + pub path: String, 104 + pub metadata: std::fs::Metadata, 108 105 } 109 106 110 107 pub trait PermalinkFormat {