this repo has no description
1pub mod backends;
2pub mod progress;
3pub mod test_utils;
4pub mod utils;
5
6use anyhow::{anyhow, bail};
7use backends::*;
8use clap::{Parser, Subcommand};
9use is_terminal::IsTerminal;
10use std::path::{Path, PathBuf};
11use utils::*;
12
13/// A compression multi-tool
14#[derive(Parser, Debug)]
15#[command(author, version, about, long_about = None)]
16struct CmprssArgs {
17 /// Format
18 #[command(subcommand)]
19 format: Option<Format>,
20
21 // Base arguments for the non-subcommand behavior
22 #[clap(flatten)]
23 pub base_args: CommonArgs,
24}
25#[derive(Subcommand, Debug)]
26enum Format {
27 /// tar archive format
28 Tar(TarArgs),
29
30 /// gzip compression
31 #[clap(visible_alias = "gz")]
32 Gzip(GzipArgs),
33
34 /// xz compression
35 Xz(XzArgs),
36
37 /// bzip2 compression
38 #[clap(visible_alias = "bz2")]
39 Bzip2(Bzip2Args),
40
41 /// zip archive format
42 Zip(ZipArgs),
43
44 /// zstd compression
45 #[clap(visible_alias = "zst")]
46 Zstd(ZstdArgs),
47
48 /// lz4 compression
49 Lz4(Lz4Args),
50}
51
52/// Get the input filename or return a default file
53/// This file will be used to generate the output filename
54fn get_input_filename(input: &CmprssInput) -> Result<&Path> {
55 match input {
56 CmprssInput::Path(paths) => match paths.first() {
57 Some(path) => Ok(path),
58 None => bail!("error: no input specified"),
59 },
60 CmprssInput::Pipe(_) => Ok(Path::new("archive")),
61 CmprssInput::Reader(_) => Ok(Path::new("piped_data")),
62 }
63}
64
65#[derive(Debug, PartialEq, Clone, Copy)]
66enum Action {
67 Compress,
68 Extract,
69 Unknown,
70}
71
72/// Defines a single compress/extract action to take.
73#[derive(Debug)]
74struct Job {
75 compressor: Box<dyn Compressor>,
76 input: CmprssInput,
77 output: CmprssOutput,
78 action: Action,
79}
80
81/// Get a compressor pipeline from a filename by scanning extensions right-to-left
82fn get_compressor_from_filename(filename: &Path) -> Option<Box<dyn Compressor>> {
83 let file_name = filename.file_name()?.to_str()?;
84 let parts: Vec<&str> = file_name.split('.').collect();
85
86 if parts.len() < 2 {
87 return None;
88 }
89
90 // Scan extensions right-to-left, collecting known compressors
91 // until hitting an unknown extension or the base name.
92 // e.g., "a.b.tar.gz" → gz ✓, tar ✓, b ✗ stop → [gz, tar]
93 let mut compressor_names: Vec<String> = Vec::new();
94 for ext in parts[1..].iter().rev() {
95 if let Some(c) = backends::compressor_from_str(ext) {
96 compressor_names.push(c.name().to_string());
97 } else {
98 break;
99 }
100 }
101
102 if compressor_names.is_empty() {
103 return None;
104 }
105
106 // Reverse to innermost-to-outermost order
107 compressor_names.reverse();
108 Pipeline::from_names(&compressor_names)
109 .ok()
110 .map(|m| Box::new(m) as Box<dyn Compressor>)
111}
112
113/// Convert an input path into a Path
114fn get_path(input: &str) -> Option<PathBuf> {
115 let path = PathBuf::from(input);
116 if !path.try_exists().unwrap_or(false) {
117 return None;
118 }
119 Some(path)
120}
121
122/// Guess compressor/action from the two filenames
123/// The compressor may already be given
124fn guess_from_filenames(
125 input: &[PathBuf],
126 output: &Path,
127 compressor: Option<Box<dyn Compressor>>,
128) -> (Option<Box<dyn Compressor>>, Action) {
129 if input.len() != 1 {
130 if let Some(guessed_compressor) = get_compressor_from_filename(output) {
131 return (Some(guessed_compressor), Action::Compress);
132 }
133
134 // Check if output is a directory - this is likely an extraction
135 if output.is_dir() {
136 // Try to determine compressor from the input file's extension(s)
137 if let Some(input_path) = input.first() {
138 if let Some(guessed_compressor) = get_compressor_from_filename(input_path) {
139 return (Some(guessed_compressor), Action::Extract);
140 }
141 }
142 }
143
144 // In theory we could be extracting multiple files to a directory
145 // We'll fail somewhere else if that's not the case
146 return (compressor, Action::Extract);
147 }
148 let input = input.first().unwrap();
149
150 let guessed_compressor = get_compressor_from_filename(output);
151 let guessed_extractor = get_compressor_from_filename(input);
152 let guessed_compressor_name = if let Some(c) = &guessed_compressor {
153 c.name()
154 } else {
155 ""
156 };
157 let guessed_extractor_name = if let Some(e) = &guessed_extractor {
158 e.name()
159 } else {
160 ""
161 };
162
163 if let Some(c) = &compressor {
164 if guessed_compressor_name == c.name() {
165 return (compressor, Action::Compress);
166 } else if guessed_extractor_name == c.name() {
167 return (compressor, Action::Extract);
168 } else {
169 // Default to compressing
170 return (compressor, Action::Compress);
171 }
172 }
173
174 match (guessed_compressor, guessed_extractor) {
175 (None, None) => (None, Action::Unknown),
176 (Some(c), None) => (Some(c), Action::Compress),
177 (None, Some(e)) => (Some(e), Action::Extract),
178 (Some(c), Some(e)) => {
179 // Compare the input and output extensions to see if one has an extra extension
180 let input_file = input.file_name().unwrap().to_str().unwrap();
181 let input_ext = input.extension().unwrap_or_default();
182 let output_file = output.file_name().unwrap().to_str().unwrap();
183 let output_ext = output.extension().unwrap_or_default();
184 let guessed_output = input_file.to_string() + "." + output_ext.to_str().unwrap();
185 let guessed_input = output_file.to_string() + "." + input_ext.to_str().unwrap();
186
187 if guessed_output == output_file {
188 // Input is "archive.tar", output is "archive.tar.gz" — only add the outer layer
189 let single_compressor =
190 backends::compressor_from_str(output_ext.to_str().unwrap_or(""));
191 (single_compressor.or(Some(c)), Action::Compress)
192 } else if guessed_input == input_file {
193 // Output is "archive.tar", input is "archive.tar.gz" — only strip the outer layer
194 let single_compressor =
195 backends::compressor_from_str(input_ext.to_str().unwrap_or(""));
196 (single_compressor.or(Some(e)), Action::Extract)
197 } else if c.name() == e.name() {
198 // Same format for input and output, can't decide
199 if output.is_dir() {
200 (Some(e), Action::Extract)
201 } else {
202 (Some(c), Action::Unknown)
203 }
204 } else if output.is_dir() {
205 (Some(e), Action::Extract)
206 } else {
207 (None, Action::Unknown)
208 }
209 }
210 }
211}
212
213/// Parse the common args and determine the details of the job requested
214fn get_job(compressor: Option<Box<dyn Compressor>>, common_args: &CommonArgs) -> Result<Job> {
215 let mut compressor = compressor;
216 let mut action = {
217 if common_args.compress {
218 Action::Compress
219 } else if common_args.extract || common_args.decompress {
220 Action::Extract
221 } else {
222 Action::Unknown
223 }
224 };
225
226 let mut inputs = Vec::new();
227 if let Some(in_file) = &common_args.input {
228 match get_path(in_file) {
229 Some(path) => inputs.push(path),
230 None => {
231 bail!("Specified input path does not exist");
232 }
233 }
234 }
235
236 let mut output = match &common_args.output {
237 Some(output) => {
238 let path = Path::new(output);
239 if path.try_exists()? && !path.is_dir() {
240 // Output path exists, bail out
241 bail!("Specified output path already exists");
242 }
243 Some(path)
244 }
245 None => None,
246 };
247
248 // Process the io_list, check if there is an output first
249 let mut io_list = common_args.io_list.clone();
250 if output.is_none() {
251 if let Some(possible_output) = common_args.io_list.last() {
252 let path = Path::new(possible_output);
253 if !path.try_exists()? {
254 // Use the given path if it doesn't exist
255 output = Some(path);
256 io_list.pop();
257 } else if path.is_dir() {
258 match action {
259 Action::Compress => {
260 // A directory can potentially be a target output location or
261 // an input, for now assume it is an input.
262 }
263 Action::Extract => {
264 // Can extract to a directory, and it wouldn't make any sense as an input
265 output = Some(path);
266 io_list.pop();
267 }
268 _ => {
269 // TODO: don't know if this is an input or output, assume we're compressing this directory
270 // This does cause problems for inferencing "cat archive.tar | cmprss tar ."
271 // Probably need to add some special casing
272 }
273 };
274 } else {
275 // TODO: check for scenarios where we want to append to an existing archive
276 }
277 }
278 }
279
280 // Validate the specified inputs
281 // Everything in the io_list should be an input
282 for input in &io_list {
283 if let Some(path) = get_path(input) {
284 inputs.push(path);
285 } else {
286 bail!("Specified input path does not exist");
287 }
288 }
289
290 // Fallback to stdin/stdout if we're missing files
291 let cmprss_input = match inputs.is_empty() {
292 true => {
293 if !std::io::stdin().is_terminal()
294 && !&common_args.ignore_pipes
295 && !&common_args.ignore_stdin
296 {
297 CmprssInput::Pipe(std::io::stdin())
298 } else {
299 bail!("No specified input");
300 }
301 }
302 false => CmprssInput::Path(inputs),
303 };
304
305 let cmprss_output = match output {
306 Some(path) => CmprssOutput::Path(path.to_path_buf()),
307 None => {
308 if !std::io::stdout().is_terminal()
309 && !&common_args.ignore_pipes
310 && !&common_args.ignore_stdout
311 {
312 CmprssOutput::Pipe(std::io::stdout())
313 } else {
314 match action {
315 Action::Compress => {
316 let c = compressor
317 .as_ref()
318 .ok_or_else(|| anyhow!("Must specify a compressor"))?;
319 CmprssOutput::Path(PathBuf::from(
320 c.default_compressed_filename(get_input_filename(&cmprss_input)?),
321 ))
322 }
323 Action::Extract => {
324 if compressor.is_none() {
325 compressor =
326 get_compressor_from_filename(get_input_filename(&cmprss_input)?);
327 }
328 let c = compressor
329 .as_ref()
330 .ok_or_else(|| anyhow!("Must specify a compressor"))?;
331 CmprssOutput::Path(PathBuf::from(
332 c.default_extracted_filename(get_input_filename(&cmprss_input)?),
333 ))
334 }
335 Action::Unknown => {
336 if let Some(ref c) = compressor {
337 // We know the compressor, does the input have the same extension?
338 if let Some(compressor_from_input) =
339 get_compressor_from_filename(get_input_filename(&cmprss_input)?)
340 {
341 if c.name() == compressor_from_input.name() {
342 action = Action::Extract;
343 CmprssOutput::Path(PathBuf::from(c.default_extracted_filename(
344 get_input_filename(&cmprss_input)?,
345 )))
346 } else {
347 action = Action::Compress;
348 CmprssOutput::Path(PathBuf::from(
349 c.default_compressed_filename(get_input_filename(
350 &cmprss_input,
351 )?),
352 ))
353 }
354 } else {
355 action = Action::Compress;
356 CmprssOutput::Path(PathBuf::from(c.default_compressed_filename(
357 get_input_filename(&cmprss_input)?,
358 )))
359 }
360 } else {
361 // Can still work if the input is an archive
362 compressor =
363 get_compressor_from_filename(get_input_filename(&cmprss_input)?);
364 let c = compressor
365 .as_ref()
366 .ok_or_else(|| anyhow!("Must specify a compressor"))?;
367 action = Action::Extract;
368 CmprssOutput::Path(PathBuf::from(
369 c.default_extracted_filename(get_input_filename(&cmprss_input)?),
370 ))
371 }
372 }
373 }
374 }
375 }
376 };
377
378 // If we don't have the compressor/action, we can attempt to infer
379 if compressor.is_none() || action == Action::Unknown {
380 match action {
381 Action::Compress => {
382 // Look at the output name
383 if let CmprssOutput::Path(path) = &cmprss_output {
384 compressor = get_compressor_from_filename(path);
385 }
386 }
387 Action::Extract => {
388 if let CmprssInput::Path(paths) = &cmprss_input {
389 if paths.len() != 1 {
390 bail!("Expected a single archive to extract");
391 }
392 compressor = get_compressor_from_filename(paths.first().unwrap());
393 }
394 }
395 Action::Unknown => match (&cmprss_input, &cmprss_output) {
396 (CmprssInput::Path(paths), CmprssOutput::Path(path)) => {
397 if path.is_dir() && paths.len() == 1 {
398 compressor = get_compressor_from_filename(paths.first().unwrap());
399 action = Action::Extract;
400
401 if compressor.is_none() {
402 bail!(
403 "Couldn't determine how to extract {:?}",
404 paths.first().unwrap()
405 );
406 }
407 } else {
408 let (guessed_compressor, guessed_action) =
409 guess_from_filenames(paths, path, compressor);
410 compressor = guessed_compressor;
411 action = guessed_action;
412 }
413 }
414 (CmprssInput::Path(paths), CmprssOutput::Pipe(_)) => {
415 if let Some(ref c) = compressor {
416 if let Some(input_c) = get_compressor_from_filename(paths.first().unwrap())
417 {
418 if c.name() == input_c.name() {
419 action = Action::Extract;
420 } else {
421 action = Action::Compress;
422 }
423 } else {
424 action = Action::Compress;
425 }
426 } else {
427 if paths.len() != 1 {
428 bail!("Expected a single input file for piping to stdout");
429 }
430 compressor = get_compressor_from_filename(paths.first().unwrap());
431 if compressor.is_some() {
432 action = Action::Extract;
433 } else {
434 bail!("Can't guess compressor to use");
435 }
436 }
437 }
438 (CmprssInput::Pipe(_), CmprssOutput::Path(path)) => {
439 if let Some(ref c) = compressor {
440 if get_compressor_from_filename(path)
441 .is_some_and(|pc| c.name() == pc.name())
442 {
443 action = Action::Compress;
444 } else {
445 action = Action::Extract;
446 }
447 } else {
448 compressor = get_compressor_from_filename(path);
449 if compressor.is_some() {
450 action = Action::Compress;
451 } else {
452 bail!("Can't guess compressor to use");
453 }
454 }
455 }
456 (CmprssInput::Pipe(_), CmprssOutput::Pipe(_)) => {
457 action = Action::Compress;
458 }
459 // Handle all Writer output cases
460 (_, CmprssOutput::Writer(_)) => {
461 // Writer outputs are only used internally by Pipeline
462 // In main.rs we'll assume compression
463 action = Action::Compress;
464 }
465 // Handle all Reader input cases
466 (&CmprssInput::Reader(_), _) => {
467 // For Reader input, we'll assume extraction
468 action = Action::Extract;
469 }
470 },
471 }
472 }
473
474 let compressor = compressor.ok_or_else(|| anyhow!("Could not determine compressor to use"))?;
475 if action == Action::Unknown {
476 bail!("Could not determine action to take");
477 }
478
479 Ok(Job {
480 compressor,
481 input: cmprss_input,
482 output: cmprss_output,
483 action,
484 })
485}
486
487fn command(compressor: Option<Box<dyn Compressor>>, args: &CommonArgs) -> Result {
488 let job = get_job(compressor, args)?;
489
490 match job.action {
491 Action::Compress => job.compressor.compress(job.input, job.output)?,
492 Action::Extract => job.compressor.extract(job.input, job.output)?,
493 _ => {
494 bail!("Unknown action requested");
495 }
496 };
497
498 Ok(())
499}
500
501#[cfg(test)]
502mod tests {
503 use super::*;
504 use std::path::Path;
505
506 fn compressor_name(path: &str) -> Option<String> {
507 get_compressor_from_filename(Path::new(path)).map(|c| c.name().to_string())
508 }
509
510 fn compressor_extension(path: &str) -> Option<String> {
511 get_compressor_from_filename(Path::new(path)).map(|c| c.extension().to_string())
512 }
513
514 #[test]
515 fn test_single_extension() {
516 assert_eq!(compressor_name("file.gz"), Some("gzip".into()));
517 assert_eq!(compressor_name("file.xz"), Some("xz".into()));
518 assert_eq!(compressor_name("file.bz2"), Some("bzip2".into()));
519 assert_eq!(compressor_name("file.zst"), Some("zstd".into()));
520 assert_eq!(compressor_name("file.lz4"), Some("lz4".into()));
521 assert_eq!(compressor_name("file.tar"), Some("tar".into()));
522 assert_eq!(compressor_name("file.zip"), Some("zip".into()));
523 }
524
525 #[test]
526 fn test_multi_extension() {
527 assert_eq!(compressor_name("archive.tar.gz"), Some("gzip".into()));
528 assert_eq!(compressor_name("archive.tar.xz"), Some("xz".into()));
529 assert_eq!(compressor_name("archive.tar.bz2"), Some("bzip2".into()));
530 assert_eq!(compressor_name("archive.tar.zst"), Some("zstd".into()));
531 }
532
533 #[test]
534 fn test_unknown_middle_extension() {
535 // "b" is not a compressor, so only tar.gz should be detected
536 assert_eq!(compressor_name("a.b.tar.gz"), Some("gzip".into()));
537 assert_eq!(compressor_name("report.2024.tar.gz"), Some("gzip".into()));
538 }
539
540 #[test]
541 fn test_no_recognized_extension() {
542 assert_eq!(compressor_name("file.txt"), None);
543 assert_eq!(compressor_name("file.pdf"), None);
544 assert_eq!(compressor_name("file"), None);
545 }
546
547 #[test]
548 fn test_default_filenames_single_pipeline() {
549 let c = get_compressor_from_filename(Path::new("file.gz")).unwrap();
550 assert_eq!(
551 c.default_compressed_filename(Path::new("data.txt")),
552 "data.txt.gz"
553 );
554 assert_eq!(c.default_extracted_filename(Path::new("data.gz")), "data");
555 }
556
557 #[test]
558 fn test_default_filenames_multi_pipeline() {
559 let c = get_compressor_from_filename(Path::new("archive.tar.gz")).unwrap();
560 assert_eq!(
561 c.default_compressed_filename(Path::new("data")),
562 "data.tar.gz"
563 );
564 // tar.gz extracts to a directory, so extracted filename is "."
565 assert_eq!(c.default_extracted_filename(Path::new("data.tar.gz")), ".");
566 }
567
568 #[test]
569 fn test_is_archive_single_pipeline() {
570 let c = get_compressor_from_filename(Path::new("file.gz")).unwrap();
571 assert!(c.is_archive(Path::new("test.gz")));
572 assert!(!c.is_archive(Path::new("test.xz")));
573 }
574
575 #[test]
576 fn test_is_archive_multi_pipeline() {
577 let c = get_compressor_from_filename(Path::new("archive.tar.gz")).unwrap();
578 assert!(c.is_archive(Path::new("foo.tar.gz")));
579 assert!(!c.is_archive(Path::new("foo.gz")));
580 }
581
582 #[test]
583 fn test_extracted_target_single_pipeline() {
584 let gz = get_compressor_from_filename(Path::new("file.gz")).unwrap();
585 assert_eq!(gz.default_extracted_target(), ExtractedTarget::FILE);
586
587 let tar = get_compressor_from_filename(Path::new("file.tar")).unwrap();
588 assert_eq!(tar.default_extracted_target(), ExtractedTarget::DIRECTORY);
589 }
590
591 #[test]
592 fn test_extracted_target_multi_pipeline() {
593 // tar.gz: innermost is tar, which extracts to directory
594 let c = get_compressor_from_filename(Path::new("archive.tar.gz")).unwrap();
595 assert_eq!(c.default_extracted_target(), ExtractedTarget::DIRECTORY);
596 }
597
598 #[test]
599 fn test_single_extension_returns_correct_extension() {
600 assert_eq!(compressor_extension("file.gz"), Some("gz".into()));
601 assert_eq!(compressor_extension("file.tar"), Some("tar".into()));
602 }
603}
604
605fn main() {
606 let args = CmprssArgs::parse();
607 match args.format {
608 Some(Format::Tar(a)) => command(Some(Box::new(Tar::new(&a))), &a.common_args),
609 Some(Format::Gzip(a)) => command(Some(Box::new(Gzip::new(&a))), &a.common_args),
610 Some(Format::Xz(a)) => command(Some(Box::new(Xz::new(&a))), &a.common_args),
611 Some(Format::Bzip2(a)) => command(Some(Box::new(Bzip2::new(&a))), &a.common_args),
612 Some(Format::Zip(a)) => command(Some(Box::new(Zip::new(&a))), &a.common_args),
613 Some(Format::Zstd(a)) => command(Some(Box::new(Zstd::new(&a))), &a.common_args),
614 Some(Format::Lz4(a)) => command(Some(Box::new(Lz4::new(&a))), &a.common_args),
615 _ => command(None, &args.base_args),
616 }
617 .unwrap_or_else(|e| {
618 eprintln!("ERROR(cmprss): {}", e);
619 std::process::exit(1);
620 });
621}