this repo has no description
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

refactor(main): extract job inference into its own module

main.rs was 683 lines — 95% of it was inference heuristics (get_job,
guess_from_filenames, get_compressor_from_filename, expand_shortcut_ext,
get_input_filename, get_path), Action, Job, and their unit tests. Move
all of that to a new job module, leaving main.rs at 96 lines containing
only the CmprssArgs/Format CLI shell and the thin command dispatch.

Behavior is unchanged; this is a pure reorganization commit.

+606 -591
+602
src/job.rs
··· 1 + //! Job inference — maps user-provided CLI args and filenames into a concrete 2 + //! `Compressor` + action + input/output triple. 3 + //! 4 + //! Most of the user-facing ergonomics of `cmprss` live here: guessing whether 5 + //! we're compressing or extracting, whether the input is piped from stdin, 6 + //! which compressor a filename implies, and how to dispatch compound extensions 7 + //! like `.tar.gz` or `.tgz` into a pipeline. 8 + 9 + use anyhow::{anyhow, bail}; 10 + use is_terminal::IsTerminal; 11 + use std::path::{Path, PathBuf}; 12 + 13 + use crate::backends::{self, Pipeline}; 14 + use crate::utils::{CmprssInput, CmprssOutput, CommonArgs, Compressor, Result}; 15 + 16 + /// Defines a single compress/extract action to take. 17 + #[derive(Debug)] 18 + pub struct Job { 19 + pub compressor: Box<dyn Compressor>, 20 + pub input: CmprssInput, 21 + pub output: CmprssOutput, 22 + pub action: Action, 23 + } 24 + 25 + #[derive(Debug, PartialEq, Clone, Copy)] 26 + pub enum Action { 27 + Compress, 28 + Extract, 29 + Unknown, 30 + } 31 + 32 + /// Parse the common args and determine the details of the job requested. 33 + pub fn get_job(compressor: Option<Box<dyn Compressor>>, common_args: &CommonArgs) -> Result<Job> { 34 + let mut compressor = compressor; 35 + let mut action = { 36 + if common_args.compress { 37 + Action::Compress 38 + } else if common_args.extract || common_args.decompress { 39 + Action::Extract 40 + } else { 41 + Action::Unknown 42 + } 43 + }; 44 + 45 + let mut inputs = Vec::new(); 46 + if let Some(in_file) = &common_args.input { 47 + match get_path(in_file) { 48 + Some(path) => inputs.push(path), 49 + None => { 50 + bail!("Specified input path does not exist"); 51 + } 52 + } 53 + } 54 + 55 + let mut output = match &common_args.output { 56 + Some(output) => { 57 + let path = Path::new(output); 58 + if path.try_exists()? && !path.is_dir() { 59 + // Output path exists, bail out 60 + bail!("Specified output path already exists"); 61 + } 62 + Some(path) 63 + } 64 + None => None, 65 + }; 66 + 67 + // Process the io_list, check if there is an output first 68 + let mut io_list = common_args.io_list.clone(); 69 + if output.is_none() 70 + && let Some(possible_output) = common_args.io_list.last() 71 + { 72 + let path = Path::new(possible_output); 73 + if !path.try_exists()? { 74 + // Use the given path if it doesn't exist 75 + output = Some(path); 76 + io_list.pop(); 77 + } else if path.is_dir() { 78 + match action { 79 + Action::Compress => { 80 + // A directory can potentially be a target output location or 81 + // an input, for now assume it is an input. 82 + } 83 + Action::Extract => { 84 + // Can extract to a directory, and it wouldn't make any sense as an input 85 + output = Some(path); 86 + io_list.pop(); 87 + } 88 + _ => { 89 + // TODO: don't know if this is an input or output, assume we're compressing this directory 90 + // This does cause problems for inferencing "cat archive.tar | cmprss tar ." 91 + // Probably need to add some special casing 92 + } 93 + }; 94 + } else { 95 + // TODO: check for scenarios where we want to append to an existing archive 96 + } 97 + } 98 + 99 + // Validate the specified inputs 100 + // Everything in the io_list should be an input 101 + for input in &io_list { 102 + if let Some(path) = get_path(input) { 103 + inputs.push(path); 104 + } else { 105 + bail!("Specified input path does not exist"); 106 + } 107 + } 108 + 109 + // Fallback to stdin/stdout if we're missing files 110 + let cmprss_input = match inputs.is_empty() { 111 + true => { 112 + if !std::io::stdin().is_terminal() 113 + && !&common_args.ignore_pipes 114 + && !&common_args.ignore_stdin 115 + { 116 + CmprssInput::Pipe(std::io::stdin()) 117 + } else { 118 + bail!("No specified input"); 119 + } 120 + } 121 + false => CmprssInput::Path(inputs), 122 + }; 123 + 124 + let cmprss_output = match output { 125 + Some(path) => CmprssOutput::Path(path.to_path_buf()), 126 + None => { 127 + if !std::io::stdout().is_terminal() 128 + && !&common_args.ignore_pipes 129 + && !&common_args.ignore_stdout 130 + { 131 + CmprssOutput::Pipe(std::io::stdout()) 132 + } else { 133 + match action { 134 + Action::Compress => { 135 + let c = compressor 136 + .as_ref() 137 + .ok_or_else(|| anyhow!("Must specify a compressor"))?; 138 + CmprssOutput::Path(PathBuf::from( 139 + c.default_compressed_filename(get_input_filename(&cmprss_input)?), 140 + )) 141 + } 142 + Action::Extract => { 143 + if compressor.is_none() { 144 + compressor = 145 + get_compressor_from_filename(get_input_filename(&cmprss_input)?); 146 + } 147 + let c = compressor 148 + .as_ref() 149 + .ok_or_else(|| anyhow!("Must specify a compressor"))?; 150 + CmprssOutput::Path(PathBuf::from( 151 + c.default_extracted_filename(get_input_filename(&cmprss_input)?), 152 + )) 153 + } 154 + Action::Unknown => { 155 + if let Some(ref c) = compressor { 156 + // We know the compressor, does the input have the same extension? 157 + if let Some(compressor_from_input) = 158 + get_compressor_from_filename(get_input_filename(&cmprss_input)?) 159 + { 160 + if c.name() == compressor_from_input.name() { 161 + action = Action::Extract; 162 + CmprssOutput::Path(PathBuf::from(c.default_extracted_filename( 163 + get_input_filename(&cmprss_input)?, 164 + ))) 165 + } else { 166 + action = Action::Compress; 167 + CmprssOutput::Path(PathBuf::from( 168 + c.default_compressed_filename(get_input_filename( 169 + &cmprss_input, 170 + )?), 171 + )) 172 + } 173 + } else { 174 + action = Action::Compress; 175 + CmprssOutput::Path(PathBuf::from(c.default_compressed_filename( 176 + get_input_filename(&cmprss_input)?, 177 + ))) 178 + } 179 + } else { 180 + // Can still work if the input is an archive 181 + compressor = 182 + get_compressor_from_filename(get_input_filename(&cmprss_input)?); 183 + let c = compressor 184 + .as_ref() 185 + .ok_or_else(|| anyhow!("Must specify a compressor"))?; 186 + action = Action::Extract; 187 + CmprssOutput::Path(PathBuf::from( 188 + c.default_extracted_filename(get_input_filename(&cmprss_input)?), 189 + )) 190 + } 191 + } 192 + } 193 + } 194 + } 195 + }; 196 + 197 + // If we don't have the compressor/action, we can attempt to infer 198 + if compressor.is_none() || action == Action::Unknown { 199 + match action { 200 + Action::Compress => { 201 + // Look at the output name 202 + if let CmprssOutput::Path(path) = &cmprss_output { 203 + compressor = get_compressor_from_filename(path); 204 + } 205 + } 206 + Action::Extract => { 207 + if let CmprssInput::Path(paths) = &cmprss_input { 208 + if paths.len() != 1 { 209 + bail!("Expected a single archive to extract"); 210 + } 211 + compressor = get_compressor_from_filename(paths.first().unwrap()); 212 + } 213 + } 214 + Action::Unknown => match (&cmprss_input, &cmprss_output) { 215 + (CmprssInput::Path(paths), CmprssOutput::Path(path)) => { 216 + if path.is_dir() && paths.len() == 1 { 217 + compressor = get_compressor_from_filename(paths.first().unwrap()); 218 + action = Action::Extract; 219 + 220 + if compressor.is_none() { 221 + bail!( 222 + "Couldn't determine how to extract {:?}", 223 + paths.first().unwrap() 224 + ); 225 + } 226 + } else { 227 + let (guessed_compressor, guessed_action) = 228 + guess_from_filenames(paths, path, compressor); 229 + compressor = guessed_compressor; 230 + action = guessed_action; 231 + } 232 + } 233 + (CmprssInput::Path(paths), CmprssOutput::Pipe(_)) => { 234 + if let Some(ref c) = compressor { 235 + if let Some(input_c) = get_compressor_from_filename(paths.first().unwrap()) 236 + { 237 + if c.name() == input_c.name() { 238 + action = Action::Extract; 239 + } else { 240 + action = Action::Compress; 241 + } 242 + } else { 243 + action = Action::Compress; 244 + } 245 + } else { 246 + if paths.len() != 1 { 247 + bail!("Expected a single input file for piping to stdout"); 248 + } 249 + compressor = get_compressor_from_filename(paths.first().unwrap()); 250 + if compressor.is_some() { 251 + action = Action::Extract; 252 + } else { 253 + bail!("Can't guess compressor to use"); 254 + } 255 + } 256 + } 257 + (CmprssInput::Pipe(_), CmprssOutput::Path(path)) => { 258 + if let Some(ref c) = compressor { 259 + if get_compressor_from_filename(path) 260 + .is_some_and(|pc| c.name() == pc.name()) 261 + { 262 + action = Action::Compress; 263 + } else { 264 + action = Action::Extract; 265 + } 266 + } else { 267 + compressor = get_compressor_from_filename(path); 268 + if compressor.is_some() { 269 + action = Action::Compress; 270 + } else { 271 + bail!("Can't guess compressor to use"); 272 + } 273 + } 274 + } 275 + (CmprssInput::Pipe(_), CmprssOutput::Pipe(_)) => { 276 + action = Action::Compress; 277 + } 278 + // Handle all Writer output cases 279 + (_, CmprssOutput::Writer(_)) => { 280 + // Writer outputs are only used internally by Pipeline 281 + // In main.rs we'll assume compression 282 + action = Action::Compress; 283 + } 284 + // Handle all Reader input cases 285 + (&CmprssInput::Reader(_), _) => { 286 + // For Reader input, we'll assume extraction 287 + action = Action::Extract; 288 + } 289 + }, 290 + } 291 + } 292 + 293 + let compressor = compressor.ok_or_else(|| anyhow!("Could not determine compressor to use"))?; 294 + if action == Action::Unknown { 295 + bail!("Could not determine action to take"); 296 + } 297 + 298 + Ok(Job { 299 + compressor, 300 + input: cmprss_input, 301 + output: cmprss_output, 302 + action, 303 + }) 304 + } 305 + 306 + /// Get the input filename or return a default file 307 + /// This file will be used to generate the output filename 308 + fn get_input_filename(input: &CmprssInput) -> Result<&Path> { 309 + match input { 310 + CmprssInput::Path(paths) => match paths.first() { 311 + Some(path) => Ok(path), 312 + None => bail!("error: no input specified"), 313 + }, 314 + CmprssInput::Pipe(_) => Ok(Path::new("archive")), 315 + CmprssInput::Reader(_) => Ok(Path::new("piped_data")), 316 + } 317 + } 318 + 319 + /// Expand a compound shortcut extension like `.tgz` into its equivalent 320 + /// compressor chain, in innermost-to-outermost order. Returns `None` for 321 + /// extensions that aren't a known shortcut. 322 + fn expand_shortcut_ext(ext: &str) -> Option<&'static [&'static str]> { 323 + match ext { 324 + "tgz" => Some(&["tar", "gz"]), 325 + "tbz" | "tbz2" => Some(&["tar", "bz2"]), 326 + "txz" => Some(&["tar", "xz"]), 327 + "tzst" => Some(&["tar", "zst"]), 328 + _ => None, 329 + } 330 + } 331 + 332 + /// Get a compressor pipeline from a filename by scanning extensions right-to-left 333 + pub fn get_compressor_from_filename(filename: &Path) -> Option<Box<dyn Compressor>> { 334 + let file_name = filename.file_name()?.to_str()?; 335 + let parts: Vec<&str> = file_name.split('.').collect(); 336 + 337 + if parts.len() < 2 { 338 + return None; 339 + } 340 + 341 + // Scan extensions right-to-left, collecting known compressors 342 + // until hitting an unknown extension or the base name. 343 + // e.g., "a.b.tar.gz" → gz ✓, tar ✓, b ✗ stop → [gz, tar] 344 + // Compound shortcuts like "tgz" expand to their component chain, so 345 + // "archive.tgz" behaves identically to "archive.tar.gz". 346 + let mut compressor_names: Vec<String> = Vec::new(); 347 + for ext in parts[1..].iter().rev() { 348 + if let Some(chain) = expand_shortcut_ext(ext) { 349 + // chain is innermost→outermost; we push in right-to-left order 350 + // (outermost first) to match how we're walking the filename. 351 + for name in chain.iter().rev() { 352 + compressor_names.push((*name).to_string()); 353 + } 354 + } else if let Some(c) = backends::compressor_from_str(ext) { 355 + compressor_names.push(c.name().to_string()); 356 + } else { 357 + break; 358 + } 359 + } 360 + 361 + if compressor_names.is_empty() { 362 + return None; 363 + } 364 + 365 + // Reverse to innermost-to-outermost order 366 + compressor_names.reverse(); 367 + Pipeline::from_names(&compressor_names) 368 + .ok() 369 + .map(|m| Box::new(m) as Box<dyn Compressor>) 370 + } 371 + 372 + /// Convert an input path into a Path 373 + fn get_path(input: &str) -> Option<PathBuf> { 374 + let path = PathBuf::from(input); 375 + if !path.try_exists().unwrap_or(false) { 376 + return None; 377 + } 378 + Some(path) 379 + } 380 + 381 + /// Guess compressor/action from the two filenames 382 + /// The compressor may already be given 383 + fn guess_from_filenames( 384 + input: &[PathBuf], 385 + output: &Path, 386 + compressor: Option<Box<dyn Compressor>>, 387 + ) -> (Option<Box<dyn Compressor>>, Action) { 388 + if input.len() != 1 { 389 + if let Some(guessed_compressor) = get_compressor_from_filename(output) { 390 + return (Some(guessed_compressor), Action::Compress); 391 + } 392 + 393 + // Check if output is a directory - this is likely an extraction 394 + if output.is_dir() { 395 + // Try to determine compressor from the input file's extension(s) 396 + if let Some(input_path) = input.first() 397 + && let Some(guessed_compressor) = get_compressor_from_filename(input_path) 398 + { 399 + return (Some(guessed_compressor), Action::Extract); 400 + } 401 + } 402 + 403 + // In theory we could be extracting multiple files to a directory 404 + // We'll fail somewhere else if that's not the case 405 + return (compressor, Action::Extract); 406 + } 407 + let input = input.first().unwrap(); 408 + 409 + let guessed_compressor = get_compressor_from_filename(output); 410 + let guessed_extractor = get_compressor_from_filename(input); 411 + let guessed_compressor_name = if let Some(c) = &guessed_compressor { 412 + c.name() 413 + } else { 414 + "" 415 + }; 416 + let guessed_extractor_name = if let Some(e) = &guessed_extractor { 417 + e.name() 418 + } else { 419 + "" 420 + }; 421 + 422 + if let Some(c) = &compressor { 423 + if guessed_compressor_name == c.name() { 424 + return (compressor, Action::Compress); 425 + } else if guessed_extractor_name == c.name() { 426 + return (compressor, Action::Extract); 427 + } else { 428 + // Default to compressing 429 + return (compressor, Action::Compress); 430 + } 431 + } 432 + 433 + match (guessed_compressor, guessed_extractor) { 434 + (None, None) => (None, Action::Unknown), 435 + (Some(c), None) => (Some(c), Action::Compress), 436 + (None, Some(e)) => (Some(e), Action::Extract), 437 + (Some(c), Some(e)) => { 438 + // Compare the input and output extensions to see if one has an extra extension 439 + let input_file = input.file_name().unwrap().to_str().unwrap(); 440 + let input_ext = input.extension().unwrap_or_default(); 441 + let output_file = output.file_name().unwrap().to_str().unwrap(); 442 + let output_ext = output.extension().unwrap_or_default(); 443 + let guessed_output = input_file.to_string() + "." + output_ext.to_str().unwrap(); 444 + let guessed_input = output_file.to_string() + "." + input_ext.to_str().unwrap(); 445 + 446 + if guessed_output == output_file { 447 + // Input is "archive.tar", output is "archive.tar.gz" — only add the outer layer 448 + let single_compressor = 449 + backends::compressor_from_str(output_ext.to_str().unwrap_or("")); 450 + (single_compressor.or(Some(c)), Action::Compress) 451 + } else if guessed_input == input_file { 452 + // Output is "archive.tar", input is "archive.tar.gz" — only strip the outer layer 453 + let single_compressor = 454 + backends::compressor_from_str(input_ext.to_str().unwrap_or("")); 455 + (single_compressor.or(Some(e)), Action::Extract) 456 + } else if c.name() == e.name() { 457 + // Same format for input and output, can't decide 458 + if output.is_dir() { 459 + (Some(e), Action::Extract) 460 + } else { 461 + (Some(c), Action::Unknown) 462 + } 463 + } else if output.is_dir() { 464 + (Some(e), Action::Extract) 465 + } else { 466 + (None, Action::Unknown) 467 + } 468 + } 469 + } 470 + } 471 + 472 + #[cfg(test)] 473 + mod tests { 474 + use super::*; 475 + use crate::utils::ExtractedTarget; 476 + use std::path::Path; 477 + 478 + fn compressor_name(path: &str) -> Option<String> { 479 + get_compressor_from_filename(Path::new(path)).map(|c| c.name().to_string()) 480 + } 481 + 482 + fn compressor_extension(path: &str) -> Option<String> { 483 + get_compressor_from_filename(Path::new(path)).map(|c| c.extension().to_string()) 484 + } 485 + 486 + #[test] 487 + fn test_single_extension() { 488 + assert_eq!(compressor_name("file.gz"), Some("gzip".into())); 489 + assert_eq!(compressor_name("file.xz"), Some("xz".into())); 490 + assert_eq!(compressor_name("file.bz2"), Some("bzip2".into())); 491 + assert_eq!(compressor_name("file.zst"), Some("zstd".into())); 492 + assert_eq!(compressor_name("file.lz4"), Some("lz4".into())); 493 + assert_eq!(compressor_name("file.br"), Some("brotli".into())); 494 + assert_eq!(compressor_name("file.sz"), Some("snappy".into())); 495 + assert_eq!(compressor_name("file.lzma"), Some("lzma".into())); 496 + assert_eq!(compressor_name("file.tar"), Some("tar".into())); 497 + assert_eq!(compressor_name("file.zip"), Some("zip".into())); 498 + } 499 + 500 + #[test] 501 + fn test_multi_extension() { 502 + assert_eq!(compressor_name("archive.tar.gz"), Some("gzip".into())); 503 + assert_eq!(compressor_name("archive.tar.xz"), Some("xz".into())); 504 + assert_eq!(compressor_name("archive.tar.bz2"), Some("bzip2".into())); 505 + assert_eq!(compressor_name("archive.tar.zst"), Some("zstd".into())); 506 + } 507 + 508 + #[test] 509 + fn test_shortcut_extensions() { 510 + // Shortcut extensions resolve to a tar + outer compressor pipeline, 511 + // so the reported name is the outer compressor (same as the long form). 512 + assert_eq!(compressor_name("archive.tgz"), Some("gzip".into())); 513 + assert_eq!(compressor_name("archive.tbz"), Some("bzip2".into())); 514 + assert_eq!(compressor_name("archive.tbz2"), Some("bzip2".into())); 515 + assert_eq!(compressor_name("archive.txz"), Some("xz".into())); 516 + assert_eq!(compressor_name("archive.tzst"), Some("zstd".into())); 517 + } 518 + 519 + #[test] 520 + fn test_shortcut_extensions_extract_to_directory() { 521 + // Shortcuts are tar-based, so they must extract to a directory. 522 + for path in ["a.tgz", "a.tbz", "a.tbz2", "a.txz", "a.tzst"] { 523 + let c = get_compressor_from_filename(Path::new(path)).unwrap(); 524 + assert_eq!( 525 + c.default_extracted_target(), 526 + ExtractedTarget::Directory, 527 + "{path} should extract to a directory", 528 + ); 529 + } 530 + } 531 + 532 + #[test] 533 + fn test_unknown_middle_extension() { 534 + // "b" is not a compressor, so only tar.gz should be detected 535 + assert_eq!(compressor_name("a.b.tar.gz"), Some("gzip".into())); 536 + assert_eq!(compressor_name("report.2024.tar.gz"), Some("gzip".into())); 537 + } 538 + 539 + #[test] 540 + fn test_no_recognized_extension() { 541 + assert_eq!(compressor_name("file.txt"), None); 542 + assert_eq!(compressor_name("file.pdf"), None); 543 + assert_eq!(compressor_name("file"), None); 544 + } 545 + 546 + #[test] 547 + fn test_default_filenames_single_pipeline() { 548 + let c = get_compressor_from_filename(Path::new("file.gz")).unwrap(); 549 + assert_eq!( 550 + c.default_compressed_filename(Path::new("data.txt")), 551 + "data.txt.gz" 552 + ); 553 + assert_eq!(c.default_extracted_filename(Path::new("data.gz")), "data"); 554 + } 555 + 556 + #[test] 557 + fn test_default_filenames_multi_pipeline() { 558 + let c = get_compressor_from_filename(Path::new("archive.tar.gz")).unwrap(); 559 + assert_eq!( 560 + c.default_compressed_filename(Path::new("data")), 561 + "data.tar.gz" 562 + ); 563 + // tar.gz extracts to a directory, so extracted filename is "." 564 + assert_eq!(c.default_extracted_filename(Path::new("data.tar.gz")), "."); 565 + } 566 + 567 + #[test] 568 + fn test_is_archive_single_pipeline() { 569 + let c = get_compressor_from_filename(Path::new("file.gz")).unwrap(); 570 + assert!(c.is_archive(Path::new("test.gz"))); 571 + assert!(!c.is_archive(Path::new("test.xz"))); 572 + } 573 + 574 + #[test] 575 + fn test_is_archive_multi_pipeline() { 576 + let c = get_compressor_from_filename(Path::new("archive.tar.gz")).unwrap(); 577 + assert!(c.is_archive(Path::new("foo.tar.gz"))); 578 + assert!(!c.is_archive(Path::new("foo.gz"))); 579 + } 580 + 581 + #[test] 582 + fn test_extracted_target_single_pipeline() { 583 + let gz = get_compressor_from_filename(Path::new("file.gz")).unwrap(); 584 + assert_eq!(gz.default_extracted_target(), ExtractedTarget::File); 585 + 586 + let tar = get_compressor_from_filename(Path::new("file.tar")).unwrap(); 587 + assert_eq!(tar.default_extracted_target(), ExtractedTarget::Directory); 588 + } 589 + 590 + #[test] 591 + fn test_extracted_target_multi_pipeline() { 592 + // tar.gz: innermost is tar, which extracts to directory 593 + let c = get_compressor_from_filename(Path::new("archive.tar.gz")).unwrap(); 594 + assert_eq!(c.default_extracted_target(), ExtractedTarget::Directory); 595 + } 596 + 597 + #[test] 598 + fn test_single_extension_returns_correct_extension() { 599 + assert_eq!(compressor_extension("file.gz"), Some("gz".into())); 600 + assert_eq!(compressor_extension("file.tar"), Some("tar".into())); 601 + } 602 + }
+4 -591
src/main.rs
··· 1 1 pub mod backends; 2 + mod job; 2 3 pub mod progress; 3 4 pub mod test_utils; 4 5 pub mod utils; 5 6 6 - use anyhow::{anyhow, bail}; 7 7 use backends::*; 8 8 use clap::{Parser, Subcommand}; 9 - use is_terminal::IsTerminal; 10 - use std::path::{Path, PathBuf}; 9 + use job::{Action, get_job}; 11 10 use utils::*; 12 11 13 12 /// A compression multi-tool ··· 22 21 #[clap(flatten)] 23 22 pub base_args: CommonArgs, 24 23 } 24 + 25 25 #[derive(Subcommand, Debug)] 26 26 enum Format { 27 27 /// tar archive format ··· 60 60 Lzma(LzmaArgs), 61 61 } 62 62 63 - /// Get the input filename or return a default file 64 - /// This file will be used to generate the output filename 65 - fn get_input_filename(input: &CmprssInput) -> Result<&Path> { 66 - match input { 67 - CmprssInput::Path(paths) => match paths.first() { 68 - Some(path) => Ok(path), 69 - None => bail!("error: no input specified"), 70 - }, 71 - CmprssInput::Pipe(_) => Ok(Path::new("archive")), 72 - CmprssInput::Reader(_) => Ok(Path::new("piped_data")), 73 - } 74 - } 75 - 76 - #[derive(Debug, PartialEq, Clone, Copy)] 77 - enum Action { 78 - Compress, 79 - Extract, 80 - Unknown, 81 - } 82 - 83 - /// Defines a single compress/extract action to take. 84 - #[derive(Debug)] 85 - struct Job { 86 - compressor: Box<dyn Compressor>, 87 - input: CmprssInput, 88 - output: CmprssOutput, 89 - action: Action, 90 - } 91 - 92 - /// Expand a compound shortcut extension like `.tgz` into its equivalent 93 - /// compressor chain, in innermost-to-outermost order. Returns `None` for 94 - /// extensions that aren't a known shortcut. 95 - fn expand_shortcut_ext(ext: &str) -> Option<&'static [&'static str]> { 96 - match ext { 97 - "tgz" => Some(&["tar", "gz"]), 98 - "tbz" | "tbz2" => Some(&["tar", "bz2"]), 99 - "txz" => Some(&["tar", "xz"]), 100 - "tzst" => Some(&["tar", "zst"]), 101 - _ => None, 102 - } 103 - } 104 - 105 - /// Get a compressor pipeline from a filename by scanning extensions right-to-left 106 - fn get_compressor_from_filename(filename: &Path) -> Option<Box<dyn Compressor>> { 107 - let file_name = filename.file_name()?.to_str()?; 108 - let parts: Vec<&str> = file_name.split('.').collect(); 109 - 110 - if parts.len() < 2 { 111 - return None; 112 - } 113 - 114 - // Scan extensions right-to-left, collecting known compressors 115 - // until hitting an unknown extension or the base name. 116 - // e.g., "a.b.tar.gz" → gz ✓, tar ✓, b ✗ stop → [gz, tar] 117 - // Compound shortcuts like "tgz" expand to their component chain, so 118 - // "archive.tgz" behaves identically to "archive.tar.gz". 119 - let mut compressor_names: Vec<String> = Vec::new(); 120 - for ext in parts[1..].iter().rev() { 121 - if let Some(chain) = expand_shortcut_ext(ext) { 122 - // chain is innermost→outermost; we push in right-to-left order 123 - // (outermost first) to match how we're walking the filename. 124 - for name in chain.iter().rev() { 125 - compressor_names.push((*name).to_string()); 126 - } 127 - } else if let Some(c) = backends::compressor_from_str(ext) { 128 - compressor_names.push(c.name().to_string()); 129 - } else { 130 - break; 131 - } 132 - } 133 - 134 - if compressor_names.is_empty() { 135 - return None; 136 - } 137 - 138 - // Reverse to innermost-to-outermost order 139 - compressor_names.reverse(); 140 - Pipeline::from_names(&compressor_names) 141 - .ok() 142 - .map(|m| Box::new(m) as Box<dyn Compressor>) 143 - } 144 - 145 - /// Convert an input path into a Path 146 - fn get_path(input: &str) -> Option<PathBuf> { 147 - let path = PathBuf::from(input); 148 - if !path.try_exists().unwrap_or(false) { 149 - return None; 150 - } 151 - Some(path) 152 - } 153 - 154 - /// Guess compressor/action from the two filenames 155 - /// The compressor may already be given 156 - fn guess_from_filenames( 157 - input: &[PathBuf], 158 - output: &Path, 159 - compressor: Option<Box<dyn Compressor>>, 160 - ) -> (Option<Box<dyn Compressor>>, Action) { 161 - if input.len() != 1 { 162 - if let Some(guessed_compressor) = get_compressor_from_filename(output) { 163 - return (Some(guessed_compressor), Action::Compress); 164 - } 165 - 166 - // Check if output is a directory - this is likely an extraction 167 - if output.is_dir() { 168 - // Try to determine compressor from the input file's extension(s) 169 - if let Some(input_path) = input.first() 170 - && let Some(guessed_compressor) = get_compressor_from_filename(input_path) 171 - { 172 - return (Some(guessed_compressor), Action::Extract); 173 - } 174 - } 175 - 176 - // In theory we could be extracting multiple files to a directory 177 - // We'll fail somewhere else if that's not the case 178 - return (compressor, Action::Extract); 179 - } 180 - let input = input.first().unwrap(); 181 - 182 - let guessed_compressor = get_compressor_from_filename(output); 183 - let guessed_extractor = get_compressor_from_filename(input); 184 - let guessed_compressor_name = if let Some(c) = &guessed_compressor { 185 - c.name() 186 - } else { 187 - "" 188 - }; 189 - let guessed_extractor_name = if let Some(e) = &guessed_extractor { 190 - e.name() 191 - } else { 192 - "" 193 - }; 194 - 195 - if let Some(c) = &compressor { 196 - if guessed_compressor_name == c.name() { 197 - return (compressor, Action::Compress); 198 - } else if guessed_extractor_name == c.name() { 199 - return (compressor, Action::Extract); 200 - } else { 201 - // Default to compressing 202 - return (compressor, Action::Compress); 203 - } 204 - } 205 - 206 - match (guessed_compressor, guessed_extractor) { 207 - (None, None) => (None, Action::Unknown), 208 - (Some(c), None) => (Some(c), Action::Compress), 209 - (None, Some(e)) => (Some(e), Action::Extract), 210 - (Some(c), Some(e)) => { 211 - // Compare the input and output extensions to see if one has an extra extension 212 - let input_file = input.file_name().unwrap().to_str().unwrap(); 213 - let input_ext = input.extension().unwrap_or_default(); 214 - let output_file = output.file_name().unwrap().to_str().unwrap(); 215 - let output_ext = output.extension().unwrap_or_default(); 216 - let guessed_output = input_file.to_string() + "." + output_ext.to_str().unwrap(); 217 - let guessed_input = output_file.to_string() + "." + input_ext.to_str().unwrap(); 218 - 219 - if guessed_output == output_file { 220 - // Input is "archive.tar", output is "archive.tar.gz" — only add the outer layer 221 - let single_compressor = 222 - backends::compressor_from_str(output_ext.to_str().unwrap_or("")); 223 - (single_compressor.or(Some(c)), Action::Compress) 224 - } else if guessed_input == input_file { 225 - // Output is "archive.tar", input is "archive.tar.gz" — only strip the outer layer 226 - let single_compressor = 227 - backends::compressor_from_str(input_ext.to_str().unwrap_or("")); 228 - (single_compressor.or(Some(e)), Action::Extract) 229 - } else if c.name() == e.name() { 230 - // Same format for input and output, can't decide 231 - if output.is_dir() { 232 - (Some(e), Action::Extract) 233 - } else { 234 - (Some(c), Action::Unknown) 235 - } 236 - } else if output.is_dir() { 237 - (Some(e), Action::Extract) 238 - } else { 239 - (None, Action::Unknown) 240 - } 241 - } 242 - } 243 - } 244 - 245 - /// Parse the common args and determine the details of the job requested 246 - fn get_job(compressor: Option<Box<dyn Compressor>>, common_args: &CommonArgs) -> Result<Job> { 247 - let mut compressor = compressor; 248 - let mut action = { 249 - if common_args.compress { 250 - Action::Compress 251 - } else if common_args.extract || common_args.decompress { 252 - Action::Extract 253 - } else { 254 - Action::Unknown 255 - } 256 - }; 257 - 258 - let mut inputs = Vec::new(); 259 - if let Some(in_file) = &common_args.input { 260 - match get_path(in_file) { 261 - Some(path) => inputs.push(path), 262 - None => { 263 - bail!("Specified input path does not exist"); 264 - } 265 - } 266 - } 267 - 268 - let mut output = match &common_args.output { 269 - Some(output) => { 270 - let path = Path::new(output); 271 - if path.try_exists()? && !path.is_dir() { 272 - // Output path exists, bail out 273 - bail!("Specified output path already exists"); 274 - } 275 - Some(path) 276 - } 277 - None => None, 278 - }; 279 - 280 - // Process the io_list, check if there is an output first 281 - let mut io_list = common_args.io_list.clone(); 282 - if output.is_none() 283 - && let Some(possible_output) = common_args.io_list.last() 284 - { 285 - let path = Path::new(possible_output); 286 - if !path.try_exists()? { 287 - // Use the given path if it doesn't exist 288 - output = Some(path); 289 - io_list.pop(); 290 - } else if path.is_dir() { 291 - match action { 292 - Action::Compress => { 293 - // A directory can potentially be a target output location or 294 - // an input, for now assume it is an input. 295 - } 296 - Action::Extract => { 297 - // Can extract to a directory, and it wouldn't make any sense as an input 298 - output = Some(path); 299 - io_list.pop(); 300 - } 301 - _ => { 302 - // TODO: don't know if this is an input or output, assume we're compressing this directory 303 - // This does cause problems for inferencing "cat archive.tar | cmprss tar ." 304 - // Probably need to add some special casing 305 - } 306 - }; 307 - } else { 308 - // TODO: check for scenarios where we want to append to an existing archive 309 - } 310 - } 311 - 312 - // Validate the specified inputs 313 - // Everything in the io_list should be an input 314 - for input in &io_list { 315 - if let Some(path) = get_path(input) { 316 - inputs.push(path); 317 - } else { 318 - bail!("Specified input path does not exist"); 319 - } 320 - } 321 - 322 - // Fallback to stdin/stdout if we're missing files 323 - let cmprss_input = match inputs.is_empty() { 324 - true => { 325 - if !std::io::stdin().is_terminal() 326 - && !&common_args.ignore_pipes 327 - && !&common_args.ignore_stdin 328 - { 329 - CmprssInput::Pipe(std::io::stdin()) 330 - } else { 331 - bail!("No specified input"); 332 - } 333 - } 334 - false => CmprssInput::Path(inputs), 335 - }; 336 - 337 - let cmprss_output = match output { 338 - Some(path) => CmprssOutput::Path(path.to_path_buf()), 339 - None => { 340 - if !std::io::stdout().is_terminal() 341 - && !&common_args.ignore_pipes 342 - && !&common_args.ignore_stdout 343 - { 344 - CmprssOutput::Pipe(std::io::stdout()) 345 - } else { 346 - match action { 347 - Action::Compress => { 348 - let c = compressor 349 - .as_ref() 350 - .ok_or_else(|| anyhow!("Must specify a compressor"))?; 351 - CmprssOutput::Path(PathBuf::from( 352 - c.default_compressed_filename(get_input_filename(&cmprss_input)?), 353 - )) 354 - } 355 - Action::Extract => { 356 - if compressor.is_none() { 357 - compressor = 358 - get_compressor_from_filename(get_input_filename(&cmprss_input)?); 359 - } 360 - let c = compressor 361 - .as_ref() 362 - .ok_or_else(|| anyhow!("Must specify a compressor"))?; 363 - CmprssOutput::Path(PathBuf::from( 364 - c.default_extracted_filename(get_input_filename(&cmprss_input)?), 365 - )) 366 - } 367 - Action::Unknown => { 368 - if let Some(ref c) = compressor { 369 - // We know the compressor, does the input have the same extension? 370 - if let Some(compressor_from_input) = 371 - get_compressor_from_filename(get_input_filename(&cmprss_input)?) 372 - { 373 - if c.name() == compressor_from_input.name() { 374 - action = Action::Extract; 375 - CmprssOutput::Path(PathBuf::from(c.default_extracted_filename( 376 - get_input_filename(&cmprss_input)?, 377 - ))) 378 - } else { 379 - action = Action::Compress; 380 - CmprssOutput::Path(PathBuf::from( 381 - c.default_compressed_filename(get_input_filename( 382 - &cmprss_input, 383 - )?), 384 - )) 385 - } 386 - } else { 387 - action = Action::Compress; 388 - CmprssOutput::Path(PathBuf::from(c.default_compressed_filename( 389 - get_input_filename(&cmprss_input)?, 390 - ))) 391 - } 392 - } else { 393 - // Can still work if the input is an archive 394 - compressor = 395 - get_compressor_from_filename(get_input_filename(&cmprss_input)?); 396 - let c = compressor 397 - .as_ref() 398 - .ok_or_else(|| anyhow!("Must specify a compressor"))?; 399 - action = Action::Extract; 400 - CmprssOutput::Path(PathBuf::from( 401 - c.default_extracted_filename(get_input_filename(&cmprss_input)?), 402 - )) 403 - } 404 - } 405 - } 406 - } 407 - } 408 - }; 409 - 410 - // If we don't have the compressor/action, we can attempt to infer 411 - if compressor.is_none() || action == Action::Unknown { 412 - match action { 413 - Action::Compress => { 414 - // Look at the output name 415 - if let CmprssOutput::Path(path) = &cmprss_output { 416 - compressor = get_compressor_from_filename(path); 417 - } 418 - } 419 - Action::Extract => { 420 - if let CmprssInput::Path(paths) = &cmprss_input { 421 - if paths.len() != 1 { 422 - bail!("Expected a single archive to extract"); 423 - } 424 - compressor = get_compressor_from_filename(paths.first().unwrap()); 425 - } 426 - } 427 - Action::Unknown => match (&cmprss_input, &cmprss_output) { 428 - (CmprssInput::Path(paths), CmprssOutput::Path(path)) => { 429 - if path.is_dir() && paths.len() == 1 { 430 - compressor = get_compressor_from_filename(paths.first().unwrap()); 431 - action = Action::Extract; 432 - 433 - if compressor.is_none() { 434 - bail!( 435 - "Couldn't determine how to extract {:?}", 436 - paths.first().unwrap() 437 - ); 438 - } 439 - } else { 440 - let (guessed_compressor, guessed_action) = 441 - guess_from_filenames(paths, path, compressor); 442 - compressor = guessed_compressor; 443 - action = guessed_action; 444 - } 445 - } 446 - (CmprssInput::Path(paths), CmprssOutput::Pipe(_)) => { 447 - if let Some(ref c) = compressor { 448 - if let Some(input_c) = get_compressor_from_filename(paths.first().unwrap()) 449 - { 450 - if c.name() == input_c.name() { 451 - action = Action::Extract; 452 - } else { 453 - action = Action::Compress; 454 - } 455 - } else { 456 - action = Action::Compress; 457 - } 458 - } else { 459 - if paths.len() != 1 { 460 - bail!("Expected a single input file for piping to stdout"); 461 - } 462 - compressor = get_compressor_from_filename(paths.first().unwrap()); 463 - if compressor.is_some() { 464 - action = Action::Extract; 465 - } else { 466 - bail!("Can't guess compressor to use"); 467 - } 468 - } 469 - } 470 - (CmprssInput::Pipe(_), CmprssOutput::Path(path)) => { 471 - if let Some(ref c) = compressor { 472 - if get_compressor_from_filename(path) 473 - .is_some_and(|pc| c.name() == pc.name()) 474 - { 475 - action = Action::Compress; 476 - } else { 477 - action = Action::Extract; 478 - } 479 - } else { 480 - compressor = get_compressor_from_filename(path); 481 - if compressor.is_some() { 482 - action = Action::Compress; 483 - } else { 484 - bail!("Can't guess compressor to use"); 485 - } 486 - } 487 - } 488 - (CmprssInput::Pipe(_), CmprssOutput::Pipe(_)) => { 489 - action = Action::Compress; 490 - } 491 - // Handle all Writer output cases 492 - (_, CmprssOutput::Writer(_)) => { 493 - // Writer outputs are only used internally by Pipeline 494 - // In main.rs we'll assume compression 495 - action = Action::Compress; 496 - } 497 - // Handle all Reader input cases 498 - (&CmprssInput::Reader(_), _) => { 499 - // For Reader input, we'll assume extraction 500 - action = Action::Extract; 501 - } 502 - }, 503 - } 504 - } 505 - 506 - let compressor = compressor.ok_or_else(|| anyhow!("Could not determine compressor to use"))?; 507 - if action == Action::Unknown { 508 - bail!("Could not determine action to take"); 509 - } 510 - 511 - Ok(Job { 512 - compressor, 513 - input: cmprss_input, 514 - output: cmprss_output, 515 - action, 516 - }) 517 - } 518 - 519 63 fn command(compressor: Option<Box<dyn Compressor>>, args: &CommonArgs) -> Result { 520 64 let job = get_job(compressor, args)?; 521 65 ··· 523 67 Action::Compress => job.compressor.compress(job.input, job.output)?, 524 68 Action::Extract => job.compressor.extract(job.input, job.output)?, 525 69 _ => { 526 - bail!("Unknown action requested"); 70 + anyhow::bail!("Unknown action requested"); 527 71 } 528 72 }; 529 73 ··· 550 94 std::process::exit(1); 551 95 }); 552 96 } 553 - 554 - #[cfg(test)] 555 - mod tests { 556 - use super::*; 557 - use std::path::Path; 558 - 559 - fn compressor_name(path: &str) -> Option<String> { 560 - get_compressor_from_filename(Path::new(path)).map(|c| c.name().to_string()) 561 - } 562 - 563 - fn compressor_extension(path: &str) -> Option<String> { 564 - get_compressor_from_filename(Path::new(path)).map(|c| c.extension().to_string()) 565 - } 566 - 567 - #[test] 568 - fn test_single_extension() { 569 - assert_eq!(compressor_name("file.gz"), Some("gzip".into())); 570 - assert_eq!(compressor_name("file.xz"), Some("xz".into())); 571 - assert_eq!(compressor_name("file.bz2"), Some("bzip2".into())); 572 - assert_eq!(compressor_name("file.zst"), Some("zstd".into())); 573 - assert_eq!(compressor_name("file.lz4"), Some("lz4".into())); 574 - assert_eq!(compressor_name("file.br"), Some("brotli".into())); 575 - assert_eq!(compressor_name("file.sz"), Some("snappy".into())); 576 - assert_eq!(compressor_name("file.lzma"), Some("lzma".into())); 577 - assert_eq!(compressor_name("file.tar"), Some("tar".into())); 578 - assert_eq!(compressor_name("file.zip"), Some("zip".into())); 579 - } 580 - 581 - #[test] 582 - fn test_multi_extension() { 583 - assert_eq!(compressor_name("archive.tar.gz"), Some("gzip".into())); 584 - assert_eq!(compressor_name("archive.tar.xz"), Some("xz".into())); 585 - assert_eq!(compressor_name("archive.tar.bz2"), Some("bzip2".into())); 586 - assert_eq!(compressor_name("archive.tar.zst"), Some("zstd".into())); 587 - } 588 - 589 - #[test] 590 - fn test_shortcut_extensions() { 591 - // Shortcut extensions resolve to a tar + outer compressor pipeline, 592 - // so the reported name is the outer compressor (same as the long form). 593 - assert_eq!(compressor_name("archive.tgz"), Some("gzip".into())); 594 - assert_eq!(compressor_name("archive.tbz"), Some("bzip2".into())); 595 - assert_eq!(compressor_name("archive.tbz2"), Some("bzip2".into())); 596 - assert_eq!(compressor_name("archive.txz"), Some("xz".into())); 597 - assert_eq!(compressor_name("archive.tzst"), Some("zstd".into())); 598 - } 599 - 600 - #[test] 601 - fn test_shortcut_extensions_extract_to_directory() { 602 - // Shortcuts are tar-based, so they must extract to a directory. 603 - for path in ["a.tgz", "a.tbz", "a.tbz2", "a.txz", "a.tzst"] { 604 - let c = get_compressor_from_filename(Path::new(path)).unwrap(); 605 - assert_eq!( 606 - c.default_extracted_target(), 607 - ExtractedTarget::Directory, 608 - "{path} should extract to a directory", 609 - ); 610 - } 611 - } 612 - 613 - #[test] 614 - fn test_unknown_middle_extension() { 615 - // "b" is not a compressor, so only tar.gz should be detected 616 - assert_eq!(compressor_name("a.b.tar.gz"), Some("gzip".into())); 617 - assert_eq!(compressor_name("report.2024.tar.gz"), Some("gzip".into())); 618 - } 619 - 620 - #[test] 621 - fn test_no_recognized_extension() { 622 - assert_eq!(compressor_name("file.txt"), None); 623 - assert_eq!(compressor_name("file.pdf"), None); 624 - assert_eq!(compressor_name("file"), None); 625 - } 626 - 627 - #[test] 628 - fn test_default_filenames_single_pipeline() { 629 - let c = get_compressor_from_filename(Path::new("file.gz")).unwrap(); 630 - assert_eq!( 631 - c.default_compressed_filename(Path::new("data.txt")), 632 - "data.txt.gz" 633 - ); 634 - assert_eq!(c.default_extracted_filename(Path::new("data.gz")), "data"); 635 - } 636 - 637 - #[test] 638 - fn test_default_filenames_multi_pipeline() { 639 - let c = get_compressor_from_filename(Path::new("archive.tar.gz")).unwrap(); 640 - assert_eq!( 641 - c.default_compressed_filename(Path::new("data")), 642 - "data.tar.gz" 643 - ); 644 - // tar.gz extracts to a directory, so extracted filename is "." 645 - assert_eq!(c.default_extracted_filename(Path::new("data.tar.gz")), "."); 646 - } 647 - 648 - #[test] 649 - fn test_is_archive_single_pipeline() { 650 - let c = get_compressor_from_filename(Path::new("file.gz")).unwrap(); 651 - assert!(c.is_archive(Path::new("test.gz"))); 652 - assert!(!c.is_archive(Path::new("test.xz"))); 653 - } 654 - 655 - #[test] 656 - fn test_is_archive_multi_pipeline() { 657 - let c = get_compressor_from_filename(Path::new("archive.tar.gz")).unwrap(); 658 - assert!(c.is_archive(Path::new("foo.tar.gz"))); 659 - assert!(!c.is_archive(Path::new("foo.gz"))); 660 - } 661 - 662 - #[test] 663 - fn test_extracted_target_single_pipeline() { 664 - let gz = get_compressor_from_filename(Path::new("file.gz")).unwrap(); 665 - assert_eq!(gz.default_extracted_target(), ExtractedTarget::File); 666 - 667 - let tar = get_compressor_from_filename(Path::new("file.tar")).unwrap(); 668 - assert_eq!(tar.default_extracted_target(), ExtractedTarget::Directory); 669 - } 670 - 671 - #[test] 672 - fn test_extracted_target_multi_pipeline() { 673 - // tar.gz: innermost is tar, which extracts to directory 674 - let c = get_compressor_from_filename(Path::new("archive.tar.gz")).unwrap(); 675 - assert_eq!(c.default_extracted_target(), ExtractedTarget::Directory); 676 - } 677 - 678 - #[test] 679 - fn test_single_extension_returns_correct_extension() { 680 - assert_eq!(compressor_extension("file.gz"), Some("gz".into())); 681 - assert_eq!(compressor_extension("file.tar"), Some("tar".into())); 682 - } 683 - }