this repo has no description
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

at 2061d3e80a61a6d10def4076dc6ebc946fb82367 621 lines 24 kB view raw
1pub mod backends; 2pub mod progress; 3pub mod test_utils; 4pub mod utils; 5 6use anyhow::{anyhow, bail}; 7use backends::*; 8use clap::{Parser, Subcommand}; 9use is_terminal::IsTerminal; 10use std::path::{Path, PathBuf}; 11use utils::*; 12 13/// A compression multi-tool 14#[derive(Parser, Debug)] 15#[command(author, version, about, long_about = None)] 16struct CmprssArgs { 17 /// Format 18 #[command(subcommand)] 19 format: Option<Format>, 20 21 // Base arguments for the non-subcommand behavior 22 #[clap(flatten)] 23 pub base_args: CommonArgs, 24} 25#[derive(Subcommand, Debug)] 26enum Format { 27 /// tar archive format 28 Tar(TarArgs), 29 30 /// gzip compression 31 #[clap(visible_alias = "gz")] 32 Gzip(GzipArgs), 33 34 /// xz compression 35 Xz(XzArgs), 36 37 /// bzip2 compression 38 #[clap(visible_alias = "bz2")] 39 Bzip2(Bzip2Args), 40 41 /// zip archive format 42 Zip(ZipArgs), 43 44 /// zstd compression 45 #[clap(visible_alias = "zst")] 46 Zstd(ZstdArgs), 47 48 /// lz4 compression 49 Lz4(Lz4Args), 50} 51 52/// Get the input filename or return a default file 53/// This file will be used to generate the output filename 54fn get_input_filename(input: &CmprssInput) -> Result<&Path> { 55 match input { 56 CmprssInput::Path(paths) => match paths.first() { 57 Some(path) => Ok(path), 58 None => bail!("error: no input specified"), 59 }, 60 CmprssInput::Pipe(_) => Ok(Path::new("archive")), 61 CmprssInput::Reader(_) => Ok(Path::new("piped_data")), 62 } 63} 64 65#[derive(Debug, PartialEq, Clone, Copy)] 66enum Action { 67 Compress, 68 Extract, 69 Unknown, 70} 71 72/// Defines a single compress/extract action to take. 73#[derive(Debug)] 74struct Job { 75 compressor: Box<dyn Compressor>, 76 input: CmprssInput, 77 output: CmprssOutput, 78 action: Action, 79} 80 81/// Get a compressor pipeline from a filename by scanning extensions right-to-left 82fn get_compressor_from_filename(filename: &Path) -> Option<Box<dyn Compressor>> { 83 let file_name = filename.file_name()?.to_str()?; 84 let parts: Vec<&str> = file_name.split('.').collect(); 85 86 if parts.len() < 2 { 87 return None; 88 } 89 90 // Scan extensions right-to-left, collecting known compressors 91 // until hitting an unknown extension or the base name. 92 // e.g., "a.b.tar.gz" → gz ✓, tar ✓, b ✗ stop → [gz, tar] 93 let mut compressor_names: Vec<String> = Vec::new(); 94 for ext in parts[1..].iter().rev() { 95 if let Some(c) = backends::compressor_from_str(ext) { 96 compressor_names.push(c.name().to_string()); 97 } else { 98 break; 99 } 100 } 101 102 if compressor_names.is_empty() { 103 return None; 104 } 105 106 // Reverse to innermost-to-outermost order 107 compressor_names.reverse(); 108 Pipeline::from_names(&compressor_names) 109 .ok() 110 .map(|m| Box::new(m) as Box<dyn Compressor>) 111} 112 113/// Convert an input path into a Path 114fn get_path(input: &str) -> Option<PathBuf> { 115 let path = PathBuf::from(input); 116 if !path.try_exists().unwrap_or(false) { 117 return None; 118 } 119 Some(path) 120} 121 122/// Guess compressor/action from the two filenames 123/// The compressor may already be given 124fn guess_from_filenames( 125 input: &[PathBuf], 126 output: &Path, 127 compressor: Option<Box<dyn Compressor>>, 128) -> (Option<Box<dyn Compressor>>, Action) { 129 if input.len() != 1 { 130 if let Some(guessed_compressor) = get_compressor_from_filename(output) { 131 return (Some(guessed_compressor), Action::Compress); 132 } 133 134 // Check if output is a directory - this is likely an extraction 135 if output.is_dir() { 136 // Try to determine compressor from the input file's extension(s) 137 if let Some(input_path) = input.first() 138 && let Some(guessed_compressor) = get_compressor_from_filename(input_path) 139 { 140 return (Some(guessed_compressor), Action::Extract); 141 } 142 } 143 144 // In theory we could be extracting multiple files to a directory 145 // We'll fail somewhere else if that's not the case 146 return (compressor, Action::Extract); 147 } 148 let input = input.first().unwrap(); 149 150 let guessed_compressor = get_compressor_from_filename(output); 151 let guessed_extractor = get_compressor_from_filename(input); 152 let guessed_compressor_name = if let Some(c) = &guessed_compressor { 153 c.name() 154 } else { 155 "" 156 }; 157 let guessed_extractor_name = if let Some(e) = &guessed_extractor { 158 e.name() 159 } else { 160 "" 161 }; 162 163 if let Some(c) = &compressor { 164 if guessed_compressor_name == c.name() { 165 return (compressor, Action::Compress); 166 } else if guessed_extractor_name == c.name() { 167 return (compressor, Action::Extract); 168 } else { 169 // Default to compressing 170 return (compressor, Action::Compress); 171 } 172 } 173 174 match (guessed_compressor, guessed_extractor) { 175 (None, None) => (None, Action::Unknown), 176 (Some(c), None) => (Some(c), Action::Compress), 177 (None, Some(e)) => (Some(e), Action::Extract), 178 (Some(c), Some(e)) => { 179 // Compare the input and output extensions to see if one has an extra extension 180 let input_file = input.file_name().unwrap().to_str().unwrap(); 181 let input_ext = input.extension().unwrap_or_default(); 182 let output_file = output.file_name().unwrap().to_str().unwrap(); 183 let output_ext = output.extension().unwrap_or_default(); 184 let guessed_output = input_file.to_string() + "." + output_ext.to_str().unwrap(); 185 let guessed_input = output_file.to_string() + "." + input_ext.to_str().unwrap(); 186 187 if guessed_output == output_file { 188 // Input is "archive.tar", output is "archive.tar.gz" — only add the outer layer 189 let single_compressor = 190 backends::compressor_from_str(output_ext.to_str().unwrap_or("")); 191 (single_compressor.or(Some(c)), Action::Compress) 192 } else if guessed_input == input_file { 193 // Output is "archive.tar", input is "archive.tar.gz" — only strip the outer layer 194 let single_compressor = 195 backends::compressor_from_str(input_ext.to_str().unwrap_or("")); 196 (single_compressor.or(Some(e)), Action::Extract) 197 } else if c.name() == e.name() { 198 // Same format for input and output, can't decide 199 if output.is_dir() { 200 (Some(e), Action::Extract) 201 } else { 202 (Some(c), Action::Unknown) 203 } 204 } else if output.is_dir() { 205 (Some(e), Action::Extract) 206 } else { 207 (None, Action::Unknown) 208 } 209 } 210 } 211} 212 213/// Parse the common args and determine the details of the job requested 214fn get_job(compressor: Option<Box<dyn Compressor>>, common_args: &CommonArgs) -> Result<Job> { 215 let mut compressor = compressor; 216 let mut action = { 217 if common_args.compress { 218 Action::Compress 219 } else if common_args.extract || common_args.decompress { 220 Action::Extract 221 } else { 222 Action::Unknown 223 } 224 }; 225 226 let mut inputs = Vec::new(); 227 if let Some(in_file) = &common_args.input { 228 match get_path(in_file) { 229 Some(path) => inputs.push(path), 230 None => { 231 bail!("Specified input path does not exist"); 232 } 233 } 234 } 235 236 let mut output = match &common_args.output { 237 Some(output) => { 238 let path = Path::new(output); 239 if path.try_exists()? && !path.is_dir() { 240 // Output path exists, bail out 241 bail!("Specified output path already exists"); 242 } 243 Some(path) 244 } 245 None => None, 246 }; 247 248 // Process the io_list, check if there is an output first 249 let mut io_list = common_args.io_list.clone(); 250 if output.is_none() 251 && let Some(possible_output) = common_args.io_list.last() 252 { 253 let path = Path::new(possible_output); 254 if !path.try_exists()? { 255 // Use the given path if it doesn't exist 256 output = Some(path); 257 io_list.pop(); 258 } else if path.is_dir() { 259 match action { 260 Action::Compress => { 261 // A directory can potentially be a target output location or 262 // an input, for now assume it is an input. 263 } 264 Action::Extract => { 265 // Can extract to a directory, and it wouldn't make any sense as an input 266 output = Some(path); 267 io_list.pop(); 268 } 269 _ => { 270 // TODO: don't know if this is an input or output, assume we're compressing this directory 271 // This does cause problems for inferencing "cat archive.tar | cmprss tar ." 272 // Probably need to add some special casing 273 } 274 }; 275 } else { 276 // TODO: check for scenarios where we want to append to an existing archive 277 } 278 } 279 280 // Validate the specified inputs 281 // Everything in the io_list should be an input 282 for input in &io_list { 283 if let Some(path) = get_path(input) { 284 inputs.push(path); 285 } else { 286 bail!("Specified input path does not exist"); 287 } 288 } 289 290 // Fallback to stdin/stdout if we're missing files 291 let cmprss_input = match inputs.is_empty() { 292 true => { 293 if !std::io::stdin().is_terminal() 294 && !&common_args.ignore_pipes 295 && !&common_args.ignore_stdin 296 { 297 CmprssInput::Pipe(std::io::stdin()) 298 } else { 299 bail!("No specified input"); 300 } 301 } 302 false => CmprssInput::Path(inputs), 303 }; 304 305 let cmprss_output = match output { 306 Some(path) => CmprssOutput::Path(path.to_path_buf()), 307 None => { 308 if !std::io::stdout().is_terminal() 309 && !&common_args.ignore_pipes 310 && !&common_args.ignore_stdout 311 { 312 CmprssOutput::Pipe(std::io::stdout()) 313 } else { 314 match action { 315 Action::Compress => { 316 let c = compressor 317 .as_ref() 318 .ok_or_else(|| anyhow!("Must specify a compressor"))?; 319 CmprssOutput::Path(PathBuf::from( 320 c.default_compressed_filename(get_input_filename(&cmprss_input)?), 321 )) 322 } 323 Action::Extract => { 324 if compressor.is_none() { 325 compressor = 326 get_compressor_from_filename(get_input_filename(&cmprss_input)?); 327 } 328 let c = compressor 329 .as_ref() 330 .ok_or_else(|| anyhow!("Must specify a compressor"))?; 331 CmprssOutput::Path(PathBuf::from( 332 c.default_extracted_filename(get_input_filename(&cmprss_input)?), 333 )) 334 } 335 Action::Unknown => { 336 if let Some(ref c) = compressor { 337 // We know the compressor, does the input have the same extension? 338 if let Some(compressor_from_input) = 339 get_compressor_from_filename(get_input_filename(&cmprss_input)?) 340 { 341 if c.name() == compressor_from_input.name() { 342 action = Action::Extract; 343 CmprssOutput::Path(PathBuf::from(c.default_extracted_filename( 344 get_input_filename(&cmprss_input)?, 345 ))) 346 } else { 347 action = Action::Compress; 348 CmprssOutput::Path(PathBuf::from( 349 c.default_compressed_filename(get_input_filename( 350 &cmprss_input, 351 )?), 352 )) 353 } 354 } else { 355 action = Action::Compress; 356 CmprssOutput::Path(PathBuf::from(c.default_compressed_filename( 357 get_input_filename(&cmprss_input)?, 358 ))) 359 } 360 } else { 361 // Can still work if the input is an archive 362 compressor = 363 get_compressor_from_filename(get_input_filename(&cmprss_input)?); 364 let c = compressor 365 .as_ref() 366 .ok_or_else(|| anyhow!("Must specify a compressor"))?; 367 action = Action::Extract; 368 CmprssOutput::Path(PathBuf::from( 369 c.default_extracted_filename(get_input_filename(&cmprss_input)?), 370 )) 371 } 372 } 373 } 374 } 375 } 376 }; 377 378 // If we don't have the compressor/action, we can attempt to infer 379 if compressor.is_none() || action == Action::Unknown { 380 match action { 381 Action::Compress => { 382 // Look at the output name 383 if let CmprssOutput::Path(path) = &cmprss_output { 384 compressor = get_compressor_from_filename(path); 385 } 386 } 387 Action::Extract => { 388 if let CmprssInput::Path(paths) = &cmprss_input { 389 if paths.len() != 1 { 390 bail!("Expected a single archive to extract"); 391 } 392 compressor = get_compressor_from_filename(paths.first().unwrap()); 393 } 394 } 395 Action::Unknown => match (&cmprss_input, &cmprss_output) { 396 (CmprssInput::Path(paths), CmprssOutput::Path(path)) => { 397 if path.is_dir() && paths.len() == 1 { 398 compressor = get_compressor_from_filename(paths.first().unwrap()); 399 action = Action::Extract; 400 401 if compressor.is_none() { 402 bail!( 403 "Couldn't determine how to extract {:?}", 404 paths.first().unwrap() 405 ); 406 } 407 } else { 408 let (guessed_compressor, guessed_action) = 409 guess_from_filenames(paths, path, compressor); 410 compressor = guessed_compressor; 411 action = guessed_action; 412 } 413 } 414 (CmprssInput::Path(paths), CmprssOutput::Pipe(_)) => { 415 if let Some(ref c) = compressor { 416 if let Some(input_c) = get_compressor_from_filename(paths.first().unwrap()) 417 { 418 if c.name() == input_c.name() { 419 action = Action::Extract; 420 } else { 421 action = Action::Compress; 422 } 423 } else { 424 action = Action::Compress; 425 } 426 } else { 427 if paths.len() != 1 { 428 bail!("Expected a single input file for piping to stdout"); 429 } 430 compressor = get_compressor_from_filename(paths.first().unwrap()); 431 if compressor.is_some() { 432 action = Action::Extract; 433 } else { 434 bail!("Can't guess compressor to use"); 435 } 436 } 437 } 438 (CmprssInput::Pipe(_), CmprssOutput::Path(path)) => { 439 if let Some(ref c) = compressor { 440 if get_compressor_from_filename(path) 441 .is_some_and(|pc| c.name() == pc.name()) 442 { 443 action = Action::Compress; 444 } else { 445 action = Action::Extract; 446 } 447 } else { 448 compressor = get_compressor_from_filename(path); 449 if compressor.is_some() { 450 action = Action::Compress; 451 } else { 452 bail!("Can't guess compressor to use"); 453 } 454 } 455 } 456 (CmprssInput::Pipe(_), CmprssOutput::Pipe(_)) => { 457 action = Action::Compress; 458 } 459 // Handle all Writer output cases 460 (_, CmprssOutput::Writer(_)) => { 461 // Writer outputs are only used internally by Pipeline 462 // In main.rs we'll assume compression 463 action = Action::Compress; 464 } 465 // Handle all Reader input cases 466 (&CmprssInput::Reader(_), _) => { 467 // For Reader input, we'll assume extraction 468 action = Action::Extract; 469 } 470 }, 471 } 472 } 473 474 let compressor = compressor.ok_or_else(|| anyhow!("Could not determine compressor to use"))?; 475 if action == Action::Unknown { 476 bail!("Could not determine action to take"); 477 } 478 479 Ok(Job { 480 compressor, 481 input: cmprss_input, 482 output: cmprss_output, 483 action, 484 }) 485} 486 487fn command(compressor: Option<Box<dyn Compressor>>, args: &CommonArgs) -> Result { 488 let job = get_job(compressor, args)?; 489 490 match job.action { 491 Action::Compress => job.compressor.compress(job.input, job.output)?, 492 Action::Extract => job.compressor.extract(job.input, job.output)?, 493 _ => { 494 bail!("Unknown action requested"); 495 } 496 }; 497 498 Ok(()) 499} 500 501fn main() { 502 let args = CmprssArgs::parse(); 503 match args.format { 504 Some(Format::Tar(a)) => command(Some(Box::new(Tar::new(&a))), &a.common_args), 505 Some(Format::Gzip(a)) => command(Some(Box::new(Gzip::new(&a))), &a.common_args), 506 Some(Format::Xz(a)) => command(Some(Box::new(Xz::new(&a))), &a.common_args), 507 Some(Format::Bzip2(a)) => command(Some(Box::new(Bzip2::new(&a))), &a.common_args), 508 Some(Format::Zip(a)) => command(Some(Box::new(Zip::new(&a))), &a.common_args), 509 Some(Format::Zstd(a)) => command(Some(Box::new(Zstd::new(&a))), &a.common_args), 510 Some(Format::Lz4(a)) => command(Some(Box::new(Lz4::new(&a))), &a.common_args), 511 _ => command(None, &args.base_args), 512 } 513 .unwrap_or_else(|e| { 514 eprintln!("ERROR(cmprss): {}", e); 515 std::process::exit(1); 516 }); 517} 518 519#[cfg(test)] 520mod tests { 521 use super::*; 522 use std::path::Path; 523 524 fn compressor_name(path: &str) -> Option<String> { 525 get_compressor_from_filename(Path::new(path)).map(|c| c.name().to_string()) 526 } 527 528 fn compressor_extension(path: &str) -> Option<String> { 529 get_compressor_from_filename(Path::new(path)).map(|c| c.extension().to_string()) 530 } 531 532 #[test] 533 fn test_single_extension() { 534 assert_eq!(compressor_name("file.gz"), Some("gzip".into())); 535 assert_eq!(compressor_name("file.xz"), Some("xz".into())); 536 assert_eq!(compressor_name("file.bz2"), Some("bzip2".into())); 537 assert_eq!(compressor_name("file.zst"), Some("zstd".into())); 538 assert_eq!(compressor_name("file.lz4"), Some("lz4".into())); 539 assert_eq!(compressor_name("file.tar"), Some("tar".into())); 540 assert_eq!(compressor_name("file.zip"), Some("zip".into())); 541 } 542 543 #[test] 544 fn test_multi_extension() { 545 assert_eq!(compressor_name("archive.tar.gz"), Some("gzip".into())); 546 assert_eq!(compressor_name("archive.tar.xz"), Some("xz".into())); 547 assert_eq!(compressor_name("archive.tar.bz2"), Some("bzip2".into())); 548 assert_eq!(compressor_name("archive.tar.zst"), Some("zstd".into())); 549 } 550 551 #[test] 552 fn test_unknown_middle_extension() { 553 // "b" is not a compressor, so only tar.gz should be detected 554 assert_eq!(compressor_name("a.b.tar.gz"), Some("gzip".into())); 555 assert_eq!(compressor_name("report.2024.tar.gz"), Some("gzip".into())); 556 } 557 558 #[test] 559 fn test_no_recognized_extension() { 560 assert_eq!(compressor_name("file.txt"), None); 561 assert_eq!(compressor_name("file.pdf"), None); 562 assert_eq!(compressor_name("file"), None); 563 } 564 565 #[test] 566 fn test_default_filenames_single_pipeline() { 567 let c = get_compressor_from_filename(Path::new("file.gz")).unwrap(); 568 assert_eq!( 569 c.default_compressed_filename(Path::new("data.txt")), 570 "data.txt.gz" 571 ); 572 assert_eq!(c.default_extracted_filename(Path::new("data.gz")), "data"); 573 } 574 575 #[test] 576 fn test_default_filenames_multi_pipeline() { 577 let c = get_compressor_from_filename(Path::new("archive.tar.gz")).unwrap(); 578 assert_eq!( 579 c.default_compressed_filename(Path::new("data")), 580 "data.tar.gz" 581 ); 582 // tar.gz extracts to a directory, so extracted filename is "." 583 assert_eq!(c.default_extracted_filename(Path::new("data.tar.gz")), "."); 584 } 585 586 #[test] 587 fn test_is_archive_single_pipeline() { 588 let c = get_compressor_from_filename(Path::new("file.gz")).unwrap(); 589 assert!(c.is_archive(Path::new("test.gz"))); 590 assert!(!c.is_archive(Path::new("test.xz"))); 591 } 592 593 #[test] 594 fn test_is_archive_multi_pipeline() { 595 let c = get_compressor_from_filename(Path::new("archive.tar.gz")).unwrap(); 596 assert!(c.is_archive(Path::new("foo.tar.gz"))); 597 assert!(!c.is_archive(Path::new("foo.gz"))); 598 } 599 600 #[test] 601 fn test_extracted_target_single_pipeline() { 602 let gz = get_compressor_from_filename(Path::new("file.gz")).unwrap(); 603 assert_eq!(gz.default_extracted_target(), ExtractedTarget::FILE); 604 605 let tar = get_compressor_from_filename(Path::new("file.tar")).unwrap(); 606 assert_eq!(tar.default_extracted_target(), ExtractedTarget::DIRECTORY); 607 } 608 609 #[test] 610 fn test_extracted_target_multi_pipeline() { 611 // tar.gz: innermost is tar, which extracts to directory 612 let c = get_compressor_from_filename(Path::new("archive.tar.gz")).unwrap(); 613 assert_eq!(c.default_extracted_target(), ExtractedTarget::DIRECTORY); 614 } 615 616 #[test] 617 fn test_single_extension_returns_correct_extension() { 618 assert_eq!(compressor_extension("file.gz"), Some("gz".into())); 619 assert_eq!(compressor_extension("file.tar"), Some("tar".into())); 620 } 621}