this repo has no description
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

at fa6719dd365c14fa113526b45d0b8ca6cf4493bb 638 lines 24 kB view raw
1pub mod backends; 2pub mod progress; 3pub mod test_utils; 4pub mod utils; 5 6use anyhow::{anyhow, bail}; 7use backends::*; 8use clap::{Parser, Subcommand}; 9use is_terminal::IsTerminal; 10use std::path::{Path, PathBuf}; 11use utils::*; 12 13/// A compression multi-tool 14#[derive(Parser, Debug)] 15#[command(author, version, about, long_about = None)] 16struct CmprssArgs { 17 /// Format 18 #[command(subcommand)] 19 format: Option<Format>, 20 21 // Base arguments for the non-subcommand behavior 22 #[clap(flatten)] 23 pub base_args: CommonArgs, 24} 25#[derive(Subcommand, Debug)] 26enum Format { 27 /// tar archive format 28 Tar(TarArgs), 29 30 /// gzip compression 31 #[clap(visible_alias = "gz")] 32 Gzip(GzipArgs), 33 34 /// xz compression 35 Xz(XzArgs), 36 37 /// bzip2 compression 38 #[clap(visible_alias = "bz2")] 39 Bzip2(Bzip2Args), 40 41 /// zip archive format 42 Zip(ZipArgs), 43 44 /// zstd compression 45 #[clap(visible_alias = "zst")] 46 Zstd(ZstdArgs), 47 48 /// lz4 compression 49 Lz4(Lz4Args), 50 51 /// brotli compression 52 #[clap(visible_alias = "br")] 53 Brotli(BrotliArgs), 54 55 /// snappy framed compression 56 #[clap(visible_alias = "sz")] 57 Snappy(SnappyArgs), 58 59 /// lzma (legacy LZMA1) compression 60 Lzma(LzmaArgs), 61} 62 63/// Get the input filename or return a default file 64/// This file will be used to generate the output filename 65fn get_input_filename(input: &CmprssInput) -> Result<&Path> { 66 match input { 67 CmprssInput::Path(paths) => match paths.first() { 68 Some(path) => Ok(path), 69 None => bail!("error: no input specified"), 70 }, 71 CmprssInput::Pipe(_) => Ok(Path::new("archive")), 72 CmprssInput::Reader(_) => Ok(Path::new("piped_data")), 73 } 74} 75 76#[derive(Debug, PartialEq, Clone, Copy)] 77enum Action { 78 Compress, 79 Extract, 80 Unknown, 81} 82 83/// Defines a single compress/extract action to take. 84#[derive(Debug)] 85struct Job { 86 compressor: Box<dyn Compressor>, 87 input: CmprssInput, 88 output: CmprssOutput, 89 action: Action, 90} 91 92/// Get a compressor pipeline from a filename by scanning extensions right-to-left 93fn get_compressor_from_filename(filename: &Path) -> Option<Box<dyn Compressor>> { 94 let file_name = filename.file_name()?.to_str()?; 95 let parts: Vec<&str> = file_name.split('.').collect(); 96 97 if parts.len() < 2 { 98 return None; 99 } 100 101 // Scan extensions right-to-left, collecting known compressors 102 // until hitting an unknown extension or the base name. 103 // e.g., "a.b.tar.gz" → gz ✓, tar ✓, b ✗ stop → [gz, tar] 104 let mut compressor_names: Vec<String> = Vec::new(); 105 for ext in parts[1..].iter().rev() { 106 if let Some(c) = backends::compressor_from_str(ext) { 107 compressor_names.push(c.name().to_string()); 108 } else { 109 break; 110 } 111 } 112 113 if compressor_names.is_empty() { 114 return None; 115 } 116 117 // Reverse to innermost-to-outermost order 118 compressor_names.reverse(); 119 Pipeline::from_names(&compressor_names) 120 .ok() 121 .map(|m| Box::new(m) as Box<dyn Compressor>) 122} 123 124/// Convert an input path into a Path 125fn get_path(input: &str) -> Option<PathBuf> { 126 let path = PathBuf::from(input); 127 if !path.try_exists().unwrap_or(false) { 128 return None; 129 } 130 Some(path) 131} 132 133/// Guess compressor/action from the two filenames 134/// The compressor may already be given 135fn guess_from_filenames( 136 input: &[PathBuf], 137 output: &Path, 138 compressor: Option<Box<dyn Compressor>>, 139) -> (Option<Box<dyn Compressor>>, Action) { 140 if input.len() != 1 { 141 if let Some(guessed_compressor) = get_compressor_from_filename(output) { 142 return (Some(guessed_compressor), Action::Compress); 143 } 144 145 // Check if output is a directory - this is likely an extraction 146 if output.is_dir() { 147 // Try to determine compressor from the input file's extension(s) 148 if let Some(input_path) = input.first() 149 && let Some(guessed_compressor) = get_compressor_from_filename(input_path) 150 { 151 return (Some(guessed_compressor), Action::Extract); 152 } 153 } 154 155 // In theory we could be extracting multiple files to a directory 156 // We'll fail somewhere else if that's not the case 157 return (compressor, Action::Extract); 158 } 159 let input = input.first().unwrap(); 160 161 let guessed_compressor = get_compressor_from_filename(output); 162 let guessed_extractor = get_compressor_from_filename(input); 163 let guessed_compressor_name = if let Some(c) = &guessed_compressor { 164 c.name() 165 } else { 166 "" 167 }; 168 let guessed_extractor_name = if let Some(e) = &guessed_extractor { 169 e.name() 170 } else { 171 "" 172 }; 173 174 if let Some(c) = &compressor { 175 if guessed_compressor_name == c.name() { 176 return (compressor, Action::Compress); 177 } else if guessed_extractor_name == c.name() { 178 return (compressor, Action::Extract); 179 } else { 180 // Default to compressing 181 return (compressor, Action::Compress); 182 } 183 } 184 185 match (guessed_compressor, guessed_extractor) { 186 (None, None) => (None, Action::Unknown), 187 (Some(c), None) => (Some(c), Action::Compress), 188 (None, Some(e)) => (Some(e), Action::Extract), 189 (Some(c), Some(e)) => { 190 // Compare the input and output extensions to see if one has an extra extension 191 let input_file = input.file_name().unwrap().to_str().unwrap(); 192 let input_ext = input.extension().unwrap_or_default(); 193 let output_file = output.file_name().unwrap().to_str().unwrap(); 194 let output_ext = output.extension().unwrap_or_default(); 195 let guessed_output = input_file.to_string() + "." + output_ext.to_str().unwrap(); 196 let guessed_input = output_file.to_string() + "." + input_ext.to_str().unwrap(); 197 198 if guessed_output == output_file { 199 // Input is "archive.tar", output is "archive.tar.gz" — only add the outer layer 200 let single_compressor = 201 backends::compressor_from_str(output_ext.to_str().unwrap_or("")); 202 (single_compressor.or(Some(c)), Action::Compress) 203 } else if guessed_input == input_file { 204 // Output is "archive.tar", input is "archive.tar.gz" — only strip the outer layer 205 let single_compressor = 206 backends::compressor_from_str(input_ext.to_str().unwrap_or("")); 207 (single_compressor.or(Some(e)), Action::Extract) 208 } else if c.name() == e.name() { 209 // Same format for input and output, can't decide 210 if output.is_dir() { 211 (Some(e), Action::Extract) 212 } else { 213 (Some(c), Action::Unknown) 214 } 215 } else if output.is_dir() { 216 (Some(e), Action::Extract) 217 } else { 218 (None, Action::Unknown) 219 } 220 } 221 } 222} 223 224/// Parse the common args and determine the details of the job requested 225fn get_job(compressor: Option<Box<dyn Compressor>>, common_args: &CommonArgs) -> Result<Job> { 226 let mut compressor = compressor; 227 let mut action = { 228 if common_args.compress { 229 Action::Compress 230 } else if common_args.extract || common_args.decompress { 231 Action::Extract 232 } else { 233 Action::Unknown 234 } 235 }; 236 237 let mut inputs = Vec::new(); 238 if let Some(in_file) = &common_args.input { 239 match get_path(in_file) { 240 Some(path) => inputs.push(path), 241 None => { 242 bail!("Specified input path does not exist"); 243 } 244 } 245 } 246 247 let mut output = match &common_args.output { 248 Some(output) => { 249 let path = Path::new(output); 250 if path.try_exists()? && !path.is_dir() { 251 // Output path exists, bail out 252 bail!("Specified output path already exists"); 253 } 254 Some(path) 255 } 256 None => None, 257 }; 258 259 // Process the io_list, check if there is an output first 260 let mut io_list = common_args.io_list.clone(); 261 if output.is_none() 262 && let Some(possible_output) = common_args.io_list.last() 263 { 264 let path = Path::new(possible_output); 265 if !path.try_exists()? { 266 // Use the given path if it doesn't exist 267 output = Some(path); 268 io_list.pop(); 269 } else if path.is_dir() { 270 match action { 271 Action::Compress => { 272 // A directory can potentially be a target output location or 273 // an input, for now assume it is an input. 274 } 275 Action::Extract => { 276 // Can extract to a directory, and it wouldn't make any sense as an input 277 output = Some(path); 278 io_list.pop(); 279 } 280 _ => { 281 // TODO: don't know if this is an input or output, assume we're compressing this directory 282 // This does cause problems for inferencing "cat archive.tar | cmprss tar ." 283 // Probably need to add some special casing 284 } 285 }; 286 } else { 287 // TODO: check for scenarios where we want to append to an existing archive 288 } 289 } 290 291 // Validate the specified inputs 292 // Everything in the io_list should be an input 293 for input in &io_list { 294 if let Some(path) = get_path(input) { 295 inputs.push(path); 296 } else { 297 bail!("Specified input path does not exist"); 298 } 299 } 300 301 // Fallback to stdin/stdout if we're missing files 302 let cmprss_input = match inputs.is_empty() { 303 true => { 304 if !std::io::stdin().is_terminal() 305 && !&common_args.ignore_pipes 306 && !&common_args.ignore_stdin 307 { 308 CmprssInput::Pipe(std::io::stdin()) 309 } else { 310 bail!("No specified input"); 311 } 312 } 313 false => CmprssInput::Path(inputs), 314 }; 315 316 let cmprss_output = match output { 317 Some(path) => CmprssOutput::Path(path.to_path_buf()), 318 None => { 319 if !std::io::stdout().is_terminal() 320 && !&common_args.ignore_pipes 321 && !&common_args.ignore_stdout 322 { 323 CmprssOutput::Pipe(std::io::stdout()) 324 } else { 325 match action { 326 Action::Compress => { 327 let c = compressor 328 .as_ref() 329 .ok_or_else(|| anyhow!("Must specify a compressor"))?; 330 CmprssOutput::Path(PathBuf::from( 331 c.default_compressed_filename(get_input_filename(&cmprss_input)?), 332 )) 333 } 334 Action::Extract => { 335 if compressor.is_none() { 336 compressor = 337 get_compressor_from_filename(get_input_filename(&cmprss_input)?); 338 } 339 let c = compressor 340 .as_ref() 341 .ok_or_else(|| anyhow!("Must specify a compressor"))?; 342 CmprssOutput::Path(PathBuf::from( 343 c.default_extracted_filename(get_input_filename(&cmprss_input)?), 344 )) 345 } 346 Action::Unknown => { 347 if let Some(ref c) = compressor { 348 // We know the compressor, does the input have the same extension? 349 if let Some(compressor_from_input) = 350 get_compressor_from_filename(get_input_filename(&cmprss_input)?) 351 { 352 if c.name() == compressor_from_input.name() { 353 action = Action::Extract; 354 CmprssOutput::Path(PathBuf::from(c.default_extracted_filename( 355 get_input_filename(&cmprss_input)?, 356 ))) 357 } else { 358 action = Action::Compress; 359 CmprssOutput::Path(PathBuf::from( 360 c.default_compressed_filename(get_input_filename( 361 &cmprss_input, 362 )?), 363 )) 364 } 365 } else { 366 action = Action::Compress; 367 CmprssOutput::Path(PathBuf::from(c.default_compressed_filename( 368 get_input_filename(&cmprss_input)?, 369 ))) 370 } 371 } else { 372 // Can still work if the input is an archive 373 compressor = 374 get_compressor_from_filename(get_input_filename(&cmprss_input)?); 375 let c = compressor 376 .as_ref() 377 .ok_or_else(|| anyhow!("Must specify a compressor"))?; 378 action = Action::Extract; 379 CmprssOutput::Path(PathBuf::from( 380 c.default_extracted_filename(get_input_filename(&cmprss_input)?), 381 )) 382 } 383 } 384 } 385 } 386 } 387 }; 388 389 // If we don't have the compressor/action, we can attempt to infer 390 if compressor.is_none() || action == Action::Unknown { 391 match action { 392 Action::Compress => { 393 // Look at the output name 394 if let CmprssOutput::Path(path) = &cmprss_output { 395 compressor = get_compressor_from_filename(path); 396 } 397 } 398 Action::Extract => { 399 if let CmprssInput::Path(paths) = &cmprss_input { 400 if paths.len() != 1 { 401 bail!("Expected a single archive to extract"); 402 } 403 compressor = get_compressor_from_filename(paths.first().unwrap()); 404 } 405 } 406 Action::Unknown => match (&cmprss_input, &cmprss_output) { 407 (CmprssInput::Path(paths), CmprssOutput::Path(path)) => { 408 if path.is_dir() && paths.len() == 1 { 409 compressor = get_compressor_from_filename(paths.first().unwrap()); 410 action = Action::Extract; 411 412 if compressor.is_none() { 413 bail!( 414 "Couldn't determine how to extract {:?}", 415 paths.first().unwrap() 416 ); 417 } 418 } else { 419 let (guessed_compressor, guessed_action) = 420 guess_from_filenames(paths, path, compressor); 421 compressor = guessed_compressor; 422 action = guessed_action; 423 } 424 } 425 (CmprssInput::Path(paths), CmprssOutput::Pipe(_)) => { 426 if let Some(ref c) = compressor { 427 if let Some(input_c) = get_compressor_from_filename(paths.first().unwrap()) 428 { 429 if c.name() == input_c.name() { 430 action = Action::Extract; 431 } else { 432 action = Action::Compress; 433 } 434 } else { 435 action = Action::Compress; 436 } 437 } else { 438 if paths.len() != 1 { 439 bail!("Expected a single input file for piping to stdout"); 440 } 441 compressor = get_compressor_from_filename(paths.first().unwrap()); 442 if compressor.is_some() { 443 action = Action::Extract; 444 } else { 445 bail!("Can't guess compressor to use"); 446 } 447 } 448 } 449 (CmprssInput::Pipe(_), CmprssOutput::Path(path)) => { 450 if let Some(ref c) = compressor { 451 if get_compressor_from_filename(path) 452 .is_some_and(|pc| c.name() == pc.name()) 453 { 454 action = Action::Compress; 455 } else { 456 action = Action::Extract; 457 } 458 } else { 459 compressor = get_compressor_from_filename(path); 460 if compressor.is_some() { 461 action = Action::Compress; 462 } else { 463 bail!("Can't guess compressor to use"); 464 } 465 } 466 } 467 (CmprssInput::Pipe(_), CmprssOutput::Pipe(_)) => { 468 action = Action::Compress; 469 } 470 // Handle all Writer output cases 471 (_, CmprssOutput::Writer(_)) => { 472 // Writer outputs are only used internally by Pipeline 473 // In main.rs we'll assume compression 474 action = Action::Compress; 475 } 476 // Handle all Reader input cases 477 (&CmprssInput::Reader(_), _) => { 478 // For Reader input, we'll assume extraction 479 action = Action::Extract; 480 } 481 }, 482 } 483 } 484 485 let compressor = compressor.ok_or_else(|| anyhow!("Could not determine compressor to use"))?; 486 if action == Action::Unknown { 487 bail!("Could not determine action to take"); 488 } 489 490 Ok(Job { 491 compressor, 492 input: cmprss_input, 493 output: cmprss_output, 494 action, 495 }) 496} 497 498fn command(compressor: Option<Box<dyn Compressor>>, args: &CommonArgs) -> Result { 499 let job = get_job(compressor, args)?; 500 501 match job.action { 502 Action::Compress => job.compressor.compress(job.input, job.output)?, 503 Action::Extract => job.compressor.extract(job.input, job.output)?, 504 _ => { 505 bail!("Unknown action requested"); 506 } 507 }; 508 509 Ok(()) 510} 511 512fn main() { 513 let args = CmprssArgs::parse(); 514 match args.format { 515 Some(Format::Tar(a)) => command(Some(Box::new(Tar::new(&a))), &a.common_args), 516 Some(Format::Gzip(a)) => command(Some(Box::new(Gzip::new(&a))), &a.common_args), 517 Some(Format::Xz(a)) => command(Some(Box::new(Xz::new(&a))), &a.common_args), 518 Some(Format::Bzip2(a)) => command(Some(Box::new(Bzip2::new(&a))), &a.common_args), 519 Some(Format::Zip(a)) => command(Some(Box::new(Zip::new(&a))), &a.common_args), 520 Some(Format::Zstd(a)) => command(Some(Box::new(Zstd::new(&a))), &a.common_args), 521 Some(Format::Lz4(a)) => command(Some(Box::new(Lz4::new(&a))), &a.common_args), 522 Some(Format::Brotli(a)) => command(Some(Box::new(Brotli::new(&a))), &a.common_args), 523 Some(Format::Snappy(a)) => command(Some(Box::new(Snappy::new(&a))), &a.common_args), 524 Some(Format::Lzma(a)) => command(Some(Box::new(Lzma::new(&a))), &a.common_args), 525 _ => command(None, &args.base_args), 526 } 527 .unwrap_or_else(|e| { 528 eprintln!("ERROR(cmprss): {}", e); 529 std::process::exit(1); 530 }); 531} 532 533#[cfg(test)] 534mod tests { 535 use super::*; 536 use std::path::Path; 537 538 fn compressor_name(path: &str) -> Option<String> { 539 get_compressor_from_filename(Path::new(path)).map(|c| c.name().to_string()) 540 } 541 542 fn compressor_extension(path: &str) -> Option<String> { 543 get_compressor_from_filename(Path::new(path)).map(|c| c.extension().to_string()) 544 } 545 546 #[test] 547 fn test_single_extension() { 548 assert_eq!(compressor_name("file.gz"), Some("gzip".into())); 549 assert_eq!(compressor_name("file.xz"), Some("xz".into())); 550 assert_eq!(compressor_name("file.bz2"), Some("bzip2".into())); 551 assert_eq!(compressor_name("file.zst"), Some("zstd".into())); 552 assert_eq!(compressor_name("file.lz4"), Some("lz4".into())); 553 assert_eq!(compressor_name("file.br"), Some("brotli".into())); 554 assert_eq!(compressor_name("file.sz"), Some("snappy".into())); 555 assert_eq!(compressor_name("file.lzma"), Some("lzma".into())); 556 assert_eq!(compressor_name("file.tar"), Some("tar".into())); 557 assert_eq!(compressor_name("file.zip"), Some("zip".into())); 558 } 559 560 #[test] 561 fn test_multi_extension() { 562 assert_eq!(compressor_name("archive.tar.gz"), Some("gzip".into())); 563 assert_eq!(compressor_name("archive.tar.xz"), Some("xz".into())); 564 assert_eq!(compressor_name("archive.tar.bz2"), Some("bzip2".into())); 565 assert_eq!(compressor_name("archive.tar.zst"), Some("zstd".into())); 566 } 567 568 #[test] 569 fn test_unknown_middle_extension() { 570 // "b" is not a compressor, so only tar.gz should be detected 571 assert_eq!(compressor_name("a.b.tar.gz"), Some("gzip".into())); 572 assert_eq!(compressor_name("report.2024.tar.gz"), Some("gzip".into())); 573 } 574 575 #[test] 576 fn test_no_recognized_extension() { 577 assert_eq!(compressor_name("file.txt"), None); 578 assert_eq!(compressor_name("file.pdf"), None); 579 assert_eq!(compressor_name("file"), None); 580 } 581 582 #[test] 583 fn test_default_filenames_single_pipeline() { 584 let c = get_compressor_from_filename(Path::new("file.gz")).unwrap(); 585 assert_eq!( 586 c.default_compressed_filename(Path::new("data.txt")), 587 "data.txt.gz" 588 ); 589 assert_eq!(c.default_extracted_filename(Path::new("data.gz")), "data"); 590 } 591 592 #[test] 593 fn test_default_filenames_multi_pipeline() { 594 let c = get_compressor_from_filename(Path::new("archive.tar.gz")).unwrap(); 595 assert_eq!( 596 c.default_compressed_filename(Path::new("data")), 597 "data.tar.gz" 598 ); 599 // tar.gz extracts to a directory, so extracted filename is "." 600 assert_eq!(c.default_extracted_filename(Path::new("data.tar.gz")), "."); 601 } 602 603 #[test] 604 fn test_is_archive_single_pipeline() { 605 let c = get_compressor_from_filename(Path::new("file.gz")).unwrap(); 606 assert!(c.is_archive(Path::new("test.gz"))); 607 assert!(!c.is_archive(Path::new("test.xz"))); 608 } 609 610 #[test] 611 fn test_is_archive_multi_pipeline() { 612 let c = get_compressor_from_filename(Path::new("archive.tar.gz")).unwrap(); 613 assert!(c.is_archive(Path::new("foo.tar.gz"))); 614 assert!(!c.is_archive(Path::new("foo.gz"))); 615 } 616 617 #[test] 618 fn test_extracted_target_single_pipeline() { 619 let gz = get_compressor_from_filename(Path::new("file.gz")).unwrap(); 620 assert_eq!(gz.default_extracted_target(), ExtractedTarget::FILE); 621 622 let tar = get_compressor_from_filename(Path::new("file.tar")).unwrap(); 623 assert_eq!(tar.default_extracted_target(), ExtractedTarget::DIRECTORY); 624 } 625 626 #[test] 627 fn test_extracted_target_multi_pipeline() { 628 // tar.gz: innermost is tar, which extracts to directory 629 let c = get_compressor_from_filename(Path::new("archive.tar.gz")).unwrap(); 630 assert_eq!(c.default_extracted_target(), ExtractedTarget::DIRECTORY); 631 } 632 633 #[test] 634 fn test_single_extension_returns_correct_extension() { 635 assert_eq!(compressor_extension("file.gz"), Some("gz".into())); 636 assert_eq!(compressor_extension("file.tar"), Some("tar".into())); 637 } 638}