this repo has no description
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

at d7eac9681af468ef2dd62f00e8a09e36cf744585 627 lines 24 kB view raw
1pub mod backends; 2pub mod progress; 3pub mod test_utils; 4pub mod utils; 5 6use anyhow::{anyhow, bail}; 7use backends::*; 8use clap::{Parser, Subcommand}; 9use is_terminal::IsTerminal; 10use std::path::{Path, PathBuf}; 11use utils::*; 12 13/// A compression multi-tool 14#[derive(Parser, Debug)] 15#[command(author, version, about, long_about = None)] 16struct CmprssArgs { 17 /// Format 18 #[command(subcommand)] 19 format: Option<Format>, 20 21 // Base arguments for the non-subcommand behavior 22 #[clap(flatten)] 23 pub base_args: CommonArgs, 24} 25#[derive(Subcommand, Debug)] 26enum Format { 27 /// tar archive format 28 Tar(TarArgs), 29 30 /// gzip compression 31 #[clap(visible_alias = "gz")] 32 Gzip(GzipArgs), 33 34 /// xz compression 35 Xz(XzArgs), 36 37 /// bzip2 compression 38 #[clap(visible_alias = "bz2")] 39 Bzip2(Bzip2Args), 40 41 /// zip archive format 42 Zip(ZipArgs), 43 44 /// zstd compression 45 #[clap(visible_alias = "zst")] 46 Zstd(ZstdArgs), 47 48 /// lz4 compression 49 Lz4(Lz4Args), 50 51 /// brotli compression 52 #[clap(visible_alias = "br")] 53 Brotli(BrotliArgs), 54} 55 56/// Get the input filename or return a default file 57/// This file will be used to generate the output filename 58fn get_input_filename(input: &CmprssInput) -> Result<&Path> { 59 match input { 60 CmprssInput::Path(paths) => match paths.first() { 61 Some(path) => Ok(path), 62 None => bail!("error: no input specified"), 63 }, 64 CmprssInput::Pipe(_) => Ok(Path::new("archive")), 65 CmprssInput::Reader(_) => Ok(Path::new("piped_data")), 66 } 67} 68 69#[derive(Debug, PartialEq, Clone, Copy)] 70enum Action { 71 Compress, 72 Extract, 73 Unknown, 74} 75 76/// Defines a single compress/extract action to take. 77#[derive(Debug)] 78struct Job { 79 compressor: Box<dyn Compressor>, 80 input: CmprssInput, 81 output: CmprssOutput, 82 action: Action, 83} 84 85/// Get a compressor pipeline from a filename by scanning extensions right-to-left 86fn get_compressor_from_filename(filename: &Path) -> Option<Box<dyn Compressor>> { 87 let file_name = filename.file_name()?.to_str()?; 88 let parts: Vec<&str> = file_name.split('.').collect(); 89 90 if parts.len() < 2 { 91 return None; 92 } 93 94 // Scan extensions right-to-left, collecting known compressors 95 // until hitting an unknown extension or the base name. 96 // e.g., "a.b.tar.gz" → gz ✓, tar ✓, b ✗ stop → [gz, tar] 97 let mut compressor_names: Vec<String> = Vec::new(); 98 for ext in parts[1..].iter().rev() { 99 if let Some(c) = backends::compressor_from_str(ext) { 100 compressor_names.push(c.name().to_string()); 101 } else { 102 break; 103 } 104 } 105 106 if compressor_names.is_empty() { 107 return None; 108 } 109 110 // Reverse to innermost-to-outermost order 111 compressor_names.reverse(); 112 Pipeline::from_names(&compressor_names) 113 .ok() 114 .map(|m| Box::new(m) as Box<dyn Compressor>) 115} 116 117/// Convert an input path into a Path 118fn get_path(input: &str) -> Option<PathBuf> { 119 let path = PathBuf::from(input); 120 if !path.try_exists().unwrap_or(false) { 121 return None; 122 } 123 Some(path) 124} 125 126/// Guess compressor/action from the two filenames 127/// The compressor may already be given 128fn guess_from_filenames( 129 input: &[PathBuf], 130 output: &Path, 131 compressor: Option<Box<dyn Compressor>>, 132) -> (Option<Box<dyn Compressor>>, Action) { 133 if input.len() != 1 { 134 if let Some(guessed_compressor) = get_compressor_from_filename(output) { 135 return (Some(guessed_compressor), Action::Compress); 136 } 137 138 // Check if output is a directory - this is likely an extraction 139 if output.is_dir() { 140 // Try to determine compressor from the input file's extension(s) 141 if let Some(input_path) = input.first() 142 && let Some(guessed_compressor) = get_compressor_from_filename(input_path) 143 { 144 return (Some(guessed_compressor), Action::Extract); 145 } 146 } 147 148 // In theory we could be extracting multiple files to a directory 149 // We'll fail somewhere else if that's not the case 150 return (compressor, Action::Extract); 151 } 152 let input = input.first().unwrap(); 153 154 let guessed_compressor = get_compressor_from_filename(output); 155 let guessed_extractor = get_compressor_from_filename(input); 156 let guessed_compressor_name = if let Some(c) = &guessed_compressor { 157 c.name() 158 } else { 159 "" 160 }; 161 let guessed_extractor_name = if let Some(e) = &guessed_extractor { 162 e.name() 163 } else { 164 "" 165 }; 166 167 if let Some(c) = &compressor { 168 if guessed_compressor_name == c.name() { 169 return (compressor, Action::Compress); 170 } else if guessed_extractor_name == c.name() { 171 return (compressor, Action::Extract); 172 } else { 173 // Default to compressing 174 return (compressor, Action::Compress); 175 } 176 } 177 178 match (guessed_compressor, guessed_extractor) { 179 (None, None) => (None, Action::Unknown), 180 (Some(c), None) => (Some(c), Action::Compress), 181 (None, Some(e)) => (Some(e), Action::Extract), 182 (Some(c), Some(e)) => { 183 // Compare the input and output extensions to see if one has an extra extension 184 let input_file = input.file_name().unwrap().to_str().unwrap(); 185 let input_ext = input.extension().unwrap_or_default(); 186 let output_file = output.file_name().unwrap().to_str().unwrap(); 187 let output_ext = output.extension().unwrap_or_default(); 188 let guessed_output = input_file.to_string() + "." + output_ext.to_str().unwrap(); 189 let guessed_input = output_file.to_string() + "." + input_ext.to_str().unwrap(); 190 191 if guessed_output == output_file { 192 // Input is "archive.tar", output is "archive.tar.gz" — only add the outer layer 193 let single_compressor = 194 backends::compressor_from_str(output_ext.to_str().unwrap_or("")); 195 (single_compressor.or(Some(c)), Action::Compress) 196 } else if guessed_input == input_file { 197 // Output is "archive.tar", input is "archive.tar.gz" — only strip the outer layer 198 let single_compressor = 199 backends::compressor_from_str(input_ext.to_str().unwrap_or("")); 200 (single_compressor.or(Some(e)), Action::Extract) 201 } else if c.name() == e.name() { 202 // Same format for input and output, can't decide 203 if output.is_dir() { 204 (Some(e), Action::Extract) 205 } else { 206 (Some(c), Action::Unknown) 207 } 208 } else if output.is_dir() { 209 (Some(e), Action::Extract) 210 } else { 211 (None, Action::Unknown) 212 } 213 } 214 } 215} 216 217/// Parse the common args and determine the details of the job requested 218fn get_job(compressor: Option<Box<dyn Compressor>>, common_args: &CommonArgs) -> Result<Job> { 219 let mut compressor = compressor; 220 let mut action = { 221 if common_args.compress { 222 Action::Compress 223 } else if common_args.extract || common_args.decompress { 224 Action::Extract 225 } else { 226 Action::Unknown 227 } 228 }; 229 230 let mut inputs = Vec::new(); 231 if let Some(in_file) = &common_args.input { 232 match get_path(in_file) { 233 Some(path) => inputs.push(path), 234 None => { 235 bail!("Specified input path does not exist"); 236 } 237 } 238 } 239 240 let mut output = match &common_args.output { 241 Some(output) => { 242 let path = Path::new(output); 243 if path.try_exists()? && !path.is_dir() { 244 // Output path exists, bail out 245 bail!("Specified output path already exists"); 246 } 247 Some(path) 248 } 249 None => None, 250 }; 251 252 // Process the io_list, check if there is an output first 253 let mut io_list = common_args.io_list.clone(); 254 if output.is_none() 255 && let Some(possible_output) = common_args.io_list.last() 256 { 257 let path = Path::new(possible_output); 258 if !path.try_exists()? { 259 // Use the given path if it doesn't exist 260 output = Some(path); 261 io_list.pop(); 262 } else if path.is_dir() { 263 match action { 264 Action::Compress => { 265 // A directory can potentially be a target output location or 266 // an input, for now assume it is an input. 267 } 268 Action::Extract => { 269 // Can extract to a directory, and it wouldn't make any sense as an input 270 output = Some(path); 271 io_list.pop(); 272 } 273 _ => { 274 // TODO: don't know if this is an input or output, assume we're compressing this directory 275 // This does cause problems for inferencing "cat archive.tar | cmprss tar ." 276 // Probably need to add some special casing 277 } 278 }; 279 } else { 280 // TODO: check for scenarios where we want to append to an existing archive 281 } 282 } 283 284 // Validate the specified inputs 285 // Everything in the io_list should be an input 286 for input in &io_list { 287 if let Some(path) = get_path(input) { 288 inputs.push(path); 289 } else { 290 bail!("Specified input path does not exist"); 291 } 292 } 293 294 // Fallback to stdin/stdout if we're missing files 295 let cmprss_input = match inputs.is_empty() { 296 true => { 297 if !std::io::stdin().is_terminal() 298 && !&common_args.ignore_pipes 299 && !&common_args.ignore_stdin 300 { 301 CmprssInput::Pipe(std::io::stdin()) 302 } else { 303 bail!("No specified input"); 304 } 305 } 306 false => CmprssInput::Path(inputs), 307 }; 308 309 let cmprss_output = match output { 310 Some(path) => CmprssOutput::Path(path.to_path_buf()), 311 None => { 312 if !std::io::stdout().is_terminal() 313 && !&common_args.ignore_pipes 314 && !&common_args.ignore_stdout 315 { 316 CmprssOutput::Pipe(std::io::stdout()) 317 } else { 318 match action { 319 Action::Compress => { 320 let c = compressor 321 .as_ref() 322 .ok_or_else(|| anyhow!("Must specify a compressor"))?; 323 CmprssOutput::Path(PathBuf::from( 324 c.default_compressed_filename(get_input_filename(&cmprss_input)?), 325 )) 326 } 327 Action::Extract => { 328 if compressor.is_none() { 329 compressor = 330 get_compressor_from_filename(get_input_filename(&cmprss_input)?); 331 } 332 let c = compressor 333 .as_ref() 334 .ok_or_else(|| anyhow!("Must specify a compressor"))?; 335 CmprssOutput::Path(PathBuf::from( 336 c.default_extracted_filename(get_input_filename(&cmprss_input)?), 337 )) 338 } 339 Action::Unknown => { 340 if let Some(ref c) = compressor { 341 // We know the compressor, does the input have the same extension? 342 if let Some(compressor_from_input) = 343 get_compressor_from_filename(get_input_filename(&cmprss_input)?) 344 { 345 if c.name() == compressor_from_input.name() { 346 action = Action::Extract; 347 CmprssOutput::Path(PathBuf::from(c.default_extracted_filename( 348 get_input_filename(&cmprss_input)?, 349 ))) 350 } else { 351 action = Action::Compress; 352 CmprssOutput::Path(PathBuf::from( 353 c.default_compressed_filename(get_input_filename( 354 &cmprss_input, 355 )?), 356 )) 357 } 358 } else { 359 action = Action::Compress; 360 CmprssOutput::Path(PathBuf::from(c.default_compressed_filename( 361 get_input_filename(&cmprss_input)?, 362 ))) 363 } 364 } else { 365 // Can still work if the input is an archive 366 compressor = 367 get_compressor_from_filename(get_input_filename(&cmprss_input)?); 368 let c = compressor 369 .as_ref() 370 .ok_or_else(|| anyhow!("Must specify a compressor"))?; 371 action = Action::Extract; 372 CmprssOutput::Path(PathBuf::from( 373 c.default_extracted_filename(get_input_filename(&cmprss_input)?), 374 )) 375 } 376 } 377 } 378 } 379 } 380 }; 381 382 // If we don't have the compressor/action, we can attempt to infer 383 if compressor.is_none() || action == Action::Unknown { 384 match action { 385 Action::Compress => { 386 // Look at the output name 387 if let CmprssOutput::Path(path) = &cmprss_output { 388 compressor = get_compressor_from_filename(path); 389 } 390 } 391 Action::Extract => { 392 if let CmprssInput::Path(paths) = &cmprss_input { 393 if paths.len() != 1 { 394 bail!("Expected a single archive to extract"); 395 } 396 compressor = get_compressor_from_filename(paths.first().unwrap()); 397 } 398 } 399 Action::Unknown => match (&cmprss_input, &cmprss_output) { 400 (CmprssInput::Path(paths), CmprssOutput::Path(path)) => { 401 if path.is_dir() && paths.len() == 1 { 402 compressor = get_compressor_from_filename(paths.first().unwrap()); 403 action = Action::Extract; 404 405 if compressor.is_none() { 406 bail!( 407 "Couldn't determine how to extract {:?}", 408 paths.first().unwrap() 409 ); 410 } 411 } else { 412 let (guessed_compressor, guessed_action) = 413 guess_from_filenames(paths, path, compressor); 414 compressor = guessed_compressor; 415 action = guessed_action; 416 } 417 } 418 (CmprssInput::Path(paths), CmprssOutput::Pipe(_)) => { 419 if let Some(ref c) = compressor { 420 if let Some(input_c) = get_compressor_from_filename(paths.first().unwrap()) 421 { 422 if c.name() == input_c.name() { 423 action = Action::Extract; 424 } else { 425 action = Action::Compress; 426 } 427 } else { 428 action = Action::Compress; 429 } 430 } else { 431 if paths.len() != 1 { 432 bail!("Expected a single input file for piping to stdout"); 433 } 434 compressor = get_compressor_from_filename(paths.first().unwrap()); 435 if compressor.is_some() { 436 action = Action::Extract; 437 } else { 438 bail!("Can't guess compressor to use"); 439 } 440 } 441 } 442 (CmprssInput::Pipe(_), CmprssOutput::Path(path)) => { 443 if let Some(ref c) = compressor { 444 if get_compressor_from_filename(path) 445 .is_some_and(|pc| c.name() == pc.name()) 446 { 447 action = Action::Compress; 448 } else { 449 action = Action::Extract; 450 } 451 } else { 452 compressor = get_compressor_from_filename(path); 453 if compressor.is_some() { 454 action = Action::Compress; 455 } else { 456 bail!("Can't guess compressor to use"); 457 } 458 } 459 } 460 (CmprssInput::Pipe(_), CmprssOutput::Pipe(_)) => { 461 action = Action::Compress; 462 } 463 // Handle all Writer output cases 464 (_, CmprssOutput::Writer(_)) => { 465 // Writer outputs are only used internally by Pipeline 466 // In main.rs we'll assume compression 467 action = Action::Compress; 468 } 469 // Handle all Reader input cases 470 (&CmprssInput::Reader(_), _) => { 471 // For Reader input, we'll assume extraction 472 action = Action::Extract; 473 } 474 }, 475 } 476 } 477 478 let compressor = compressor.ok_or_else(|| anyhow!("Could not determine compressor to use"))?; 479 if action == Action::Unknown { 480 bail!("Could not determine action to take"); 481 } 482 483 Ok(Job { 484 compressor, 485 input: cmprss_input, 486 output: cmprss_output, 487 action, 488 }) 489} 490 491fn command(compressor: Option<Box<dyn Compressor>>, args: &CommonArgs) -> Result { 492 let job = get_job(compressor, args)?; 493 494 match job.action { 495 Action::Compress => job.compressor.compress(job.input, job.output)?, 496 Action::Extract => job.compressor.extract(job.input, job.output)?, 497 _ => { 498 bail!("Unknown action requested"); 499 } 500 }; 501 502 Ok(()) 503} 504 505fn main() { 506 let args = CmprssArgs::parse(); 507 match args.format { 508 Some(Format::Tar(a)) => command(Some(Box::new(Tar::new(&a))), &a.common_args), 509 Some(Format::Gzip(a)) => command(Some(Box::new(Gzip::new(&a))), &a.common_args), 510 Some(Format::Xz(a)) => command(Some(Box::new(Xz::new(&a))), &a.common_args), 511 Some(Format::Bzip2(a)) => command(Some(Box::new(Bzip2::new(&a))), &a.common_args), 512 Some(Format::Zip(a)) => command(Some(Box::new(Zip::new(&a))), &a.common_args), 513 Some(Format::Zstd(a)) => command(Some(Box::new(Zstd::new(&a))), &a.common_args), 514 Some(Format::Lz4(a)) => command(Some(Box::new(Lz4::new(&a))), &a.common_args), 515 Some(Format::Brotli(a)) => command(Some(Box::new(Brotli::new(&a))), &a.common_args), 516 _ => command(None, &args.base_args), 517 } 518 .unwrap_or_else(|e| { 519 eprintln!("ERROR(cmprss): {}", e); 520 std::process::exit(1); 521 }); 522} 523 524#[cfg(test)] 525mod tests { 526 use super::*; 527 use std::path::Path; 528 529 fn compressor_name(path: &str) -> Option<String> { 530 get_compressor_from_filename(Path::new(path)).map(|c| c.name().to_string()) 531 } 532 533 fn compressor_extension(path: &str) -> Option<String> { 534 get_compressor_from_filename(Path::new(path)).map(|c| c.extension().to_string()) 535 } 536 537 #[test] 538 fn test_single_extension() { 539 assert_eq!(compressor_name("file.gz"), Some("gzip".into())); 540 assert_eq!(compressor_name("file.xz"), Some("xz".into())); 541 assert_eq!(compressor_name("file.bz2"), Some("bzip2".into())); 542 assert_eq!(compressor_name("file.zst"), Some("zstd".into())); 543 assert_eq!(compressor_name("file.lz4"), Some("lz4".into())); 544 assert_eq!(compressor_name("file.br"), Some("brotli".into())); 545 assert_eq!(compressor_name("file.tar"), Some("tar".into())); 546 assert_eq!(compressor_name("file.zip"), Some("zip".into())); 547 } 548 549 #[test] 550 fn test_multi_extension() { 551 assert_eq!(compressor_name("archive.tar.gz"), Some("gzip".into())); 552 assert_eq!(compressor_name("archive.tar.xz"), Some("xz".into())); 553 assert_eq!(compressor_name("archive.tar.bz2"), Some("bzip2".into())); 554 assert_eq!(compressor_name("archive.tar.zst"), Some("zstd".into())); 555 } 556 557 #[test] 558 fn test_unknown_middle_extension() { 559 // "b" is not a compressor, so only tar.gz should be detected 560 assert_eq!(compressor_name("a.b.tar.gz"), Some("gzip".into())); 561 assert_eq!(compressor_name("report.2024.tar.gz"), Some("gzip".into())); 562 } 563 564 #[test] 565 fn test_no_recognized_extension() { 566 assert_eq!(compressor_name("file.txt"), None); 567 assert_eq!(compressor_name("file.pdf"), None); 568 assert_eq!(compressor_name("file"), None); 569 } 570 571 #[test] 572 fn test_default_filenames_single_pipeline() { 573 let c = get_compressor_from_filename(Path::new("file.gz")).unwrap(); 574 assert_eq!( 575 c.default_compressed_filename(Path::new("data.txt")), 576 "data.txt.gz" 577 ); 578 assert_eq!(c.default_extracted_filename(Path::new("data.gz")), "data"); 579 } 580 581 #[test] 582 fn test_default_filenames_multi_pipeline() { 583 let c = get_compressor_from_filename(Path::new("archive.tar.gz")).unwrap(); 584 assert_eq!( 585 c.default_compressed_filename(Path::new("data")), 586 "data.tar.gz" 587 ); 588 // tar.gz extracts to a directory, so extracted filename is "." 589 assert_eq!(c.default_extracted_filename(Path::new("data.tar.gz")), "."); 590 } 591 592 #[test] 593 fn test_is_archive_single_pipeline() { 594 let c = get_compressor_from_filename(Path::new("file.gz")).unwrap(); 595 assert!(c.is_archive(Path::new("test.gz"))); 596 assert!(!c.is_archive(Path::new("test.xz"))); 597 } 598 599 #[test] 600 fn test_is_archive_multi_pipeline() { 601 let c = get_compressor_from_filename(Path::new("archive.tar.gz")).unwrap(); 602 assert!(c.is_archive(Path::new("foo.tar.gz"))); 603 assert!(!c.is_archive(Path::new("foo.gz"))); 604 } 605 606 #[test] 607 fn test_extracted_target_single_pipeline() { 608 let gz = get_compressor_from_filename(Path::new("file.gz")).unwrap(); 609 assert_eq!(gz.default_extracted_target(), ExtractedTarget::FILE); 610 611 let tar = get_compressor_from_filename(Path::new("file.tar")).unwrap(); 612 assert_eq!(tar.default_extracted_target(), ExtractedTarget::DIRECTORY); 613 } 614 615 #[test] 616 fn test_extracted_target_multi_pipeline() { 617 // tar.gz: innermost is tar, which extracts to directory 618 let c = get_compressor_from_filename(Path::new("archive.tar.gz")).unwrap(); 619 assert_eq!(c.default_extracted_target(), ExtractedTarget::DIRECTORY); 620 } 621 622 #[test] 623 fn test_single_extension_returns_correct_extension() { 624 assert_eq!(compressor_extension("file.gz"), Some("gz".into())); 625 assert_eq!(compressor_extension("file.tar"), Some("tar".into())); 626 } 627}