this repo has no description
1pub mod backends;
2pub mod progress;
3pub mod test_utils;
4pub mod utils;
5
6use anyhow::{anyhow, bail};
7use backends::*;
8use clap::{Parser, Subcommand};
9use is_terminal::IsTerminal;
10use std::path::{Path, PathBuf};
11use utils::*;
12
13/// A compression multi-tool
14#[derive(Parser, Debug)]
15#[command(author, version, about, long_about = None)]
16struct CmprssArgs {
17 /// Format
18 #[command(subcommand)]
19 format: Option<Format>,
20
21 // Base arguments for the non-subcommand behavior
22 #[clap(flatten)]
23 pub base_args: CommonArgs,
24}
25#[derive(Subcommand, Debug)]
26enum Format {
27 /// tar archive format
28 Tar(TarArgs),
29
30 /// gzip compression
31 #[clap(visible_alias = "gz")]
32 Gzip(GzipArgs),
33
34 /// xz compression
35 Xz(XzArgs),
36
37 /// bzip2 compression
38 #[clap(visible_alias = "bz2")]
39 Bzip2(Bzip2Args),
40
41 /// zip archive format
42 Zip(ZipArgs),
43
44 /// zstd compression
45 #[clap(visible_alias = "zst")]
46 Zstd(ZstdArgs),
47
48 /// lz4 compression
49 Lz4(Lz4Args),
50
51 /// brotli compression
52 #[clap(visible_alias = "br")]
53 Brotli(BrotliArgs),
54
55 /// snappy framed compression
56 #[clap(visible_alias = "sz")]
57 Snappy(SnappyArgs),
58
59 /// lzma (legacy LZMA1) compression
60 Lzma(LzmaArgs),
61}
62
63/// Get the input filename or return a default file
64/// This file will be used to generate the output filename
65fn get_input_filename(input: &CmprssInput) -> Result<&Path> {
66 match input {
67 CmprssInput::Path(paths) => match paths.first() {
68 Some(path) => Ok(path),
69 None => bail!("error: no input specified"),
70 },
71 CmprssInput::Pipe(_) => Ok(Path::new("archive")),
72 CmprssInput::Reader(_) => Ok(Path::new("piped_data")),
73 }
74}
75
76#[derive(Debug, PartialEq, Clone, Copy)]
77enum Action {
78 Compress,
79 Extract,
80 Unknown,
81}
82
83/// Defines a single compress/extract action to take.
84#[derive(Debug)]
85struct Job {
86 compressor: Box<dyn Compressor>,
87 input: CmprssInput,
88 output: CmprssOutput,
89 action: Action,
90}
91
92/// Get a compressor pipeline from a filename by scanning extensions right-to-left
93fn get_compressor_from_filename(filename: &Path) -> Option<Box<dyn Compressor>> {
94 let file_name = filename.file_name()?.to_str()?;
95 let parts: Vec<&str> = file_name.split('.').collect();
96
97 if parts.len() < 2 {
98 return None;
99 }
100
101 // Scan extensions right-to-left, collecting known compressors
102 // until hitting an unknown extension or the base name.
103 // e.g., "a.b.tar.gz" → gz ✓, tar ✓, b ✗ stop → [gz, tar]
104 let mut compressor_names: Vec<String> = Vec::new();
105 for ext in parts[1..].iter().rev() {
106 if let Some(c) = backends::compressor_from_str(ext) {
107 compressor_names.push(c.name().to_string());
108 } else {
109 break;
110 }
111 }
112
113 if compressor_names.is_empty() {
114 return None;
115 }
116
117 // Reverse to innermost-to-outermost order
118 compressor_names.reverse();
119 Pipeline::from_names(&compressor_names)
120 .ok()
121 .map(|m| Box::new(m) as Box<dyn Compressor>)
122}
123
124/// Convert an input path into a Path
125fn get_path(input: &str) -> Option<PathBuf> {
126 let path = PathBuf::from(input);
127 if !path.try_exists().unwrap_or(false) {
128 return None;
129 }
130 Some(path)
131}
132
133/// Guess compressor/action from the two filenames
134/// The compressor may already be given
135fn guess_from_filenames(
136 input: &[PathBuf],
137 output: &Path,
138 compressor: Option<Box<dyn Compressor>>,
139) -> (Option<Box<dyn Compressor>>, Action) {
140 if input.len() != 1 {
141 if let Some(guessed_compressor) = get_compressor_from_filename(output) {
142 return (Some(guessed_compressor), Action::Compress);
143 }
144
145 // Check if output is a directory - this is likely an extraction
146 if output.is_dir() {
147 // Try to determine compressor from the input file's extension(s)
148 if let Some(input_path) = input.first()
149 && let Some(guessed_compressor) = get_compressor_from_filename(input_path)
150 {
151 return (Some(guessed_compressor), Action::Extract);
152 }
153 }
154
155 // In theory we could be extracting multiple files to a directory
156 // We'll fail somewhere else if that's not the case
157 return (compressor, Action::Extract);
158 }
159 let input = input.first().unwrap();
160
161 let guessed_compressor = get_compressor_from_filename(output);
162 let guessed_extractor = get_compressor_from_filename(input);
163 let guessed_compressor_name = if let Some(c) = &guessed_compressor {
164 c.name()
165 } else {
166 ""
167 };
168 let guessed_extractor_name = if let Some(e) = &guessed_extractor {
169 e.name()
170 } else {
171 ""
172 };
173
174 if let Some(c) = &compressor {
175 if guessed_compressor_name == c.name() {
176 return (compressor, Action::Compress);
177 } else if guessed_extractor_name == c.name() {
178 return (compressor, Action::Extract);
179 } else {
180 // Default to compressing
181 return (compressor, Action::Compress);
182 }
183 }
184
185 match (guessed_compressor, guessed_extractor) {
186 (None, None) => (None, Action::Unknown),
187 (Some(c), None) => (Some(c), Action::Compress),
188 (None, Some(e)) => (Some(e), Action::Extract),
189 (Some(c), Some(e)) => {
190 // Compare the input and output extensions to see if one has an extra extension
191 let input_file = input.file_name().unwrap().to_str().unwrap();
192 let input_ext = input.extension().unwrap_or_default();
193 let output_file = output.file_name().unwrap().to_str().unwrap();
194 let output_ext = output.extension().unwrap_or_default();
195 let guessed_output = input_file.to_string() + "." + output_ext.to_str().unwrap();
196 let guessed_input = output_file.to_string() + "." + input_ext.to_str().unwrap();
197
198 if guessed_output == output_file {
199 // Input is "archive.tar", output is "archive.tar.gz" — only add the outer layer
200 let single_compressor =
201 backends::compressor_from_str(output_ext.to_str().unwrap_or(""));
202 (single_compressor.or(Some(c)), Action::Compress)
203 } else if guessed_input == input_file {
204 // Output is "archive.tar", input is "archive.tar.gz" — only strip the outer layer
205 let single_compressor =
206 backends::compressor_from_str(input_ext.to_str().unwrap_or(""));
207 (single_compressor.or(Some(e)), Action::Extract)
208 } else if c.name() == e.name() {
209 // Same format for input and output, can't decide
210 if output.is_dir() {
211 (Some(e), Action::Extract)
212 } else {
213 (Some(c), Action::Unknown)
214 }
215 } else if output.is_dir() {
216 (Some(e), Action::Extract)
217 } else {
218 (None, Action::Unknown)
219 }
220 }
221 }
222}
223
224/// Parse the common args and determine the details of the job requested
225fn get_job(compressor: Option<Box<dyn Compressor>>, common_args: &CommonArgs) -> Result<Job> {
226 let mut compressor = compressor;
227 let mut action = {
228 if common_args.compress {
229 Action::Compress
230 } else if common_args.extract || common_args.decompress {
231 Action::Extract
232 } else {
233 Action::Unknown
234 }
235 };
236
237 let mut inputs = Vec::new();
238 if let Some(in_file) = &common_args.input {
239 match get_path(in_file) {
240 Some(path) => inputs.push(path),
241 None => {
242 bail!("Specified input path does not exist");
243 }
244 }
245 }
246
247 let mut output = match &common_args.output {
248 Some(output) => {
249 let path = Path::new(output);
250 if path.try_exists()? && !path.is_dir() {
251 // Output path exists, bail out
252 bail!("Specified output path already exists");
253 }
254 Some(path)
255 }
256 None => None,
257 };
258
259 // Process the io_list, check if there is an output first
260 let mut io_list = common_args.io_list.clone();
261 if output.is_none()
262 && let Some(possible_output) = common_args.io_list.last()
263 {
264 let path = Path::new(possible_output);
265 if !path.try_exists()? {
266 // Use the given path if it doesn't exist
267 output = Some(path);
268 io_list.pop();
269 } else if path.is_dir() {
270 match action {
271 Action::Compress => {
272 // A directory can potentially be a target output location or
273 // an input, for now assume it is an input.
274 }
275 Action::Extract => {
276 // Can extract to a directory, and it wouldn't make any sense as an input
277 output = Some(path);
278 io_list.pop();
279 }
280 _ => {
281 // TODO: don't know if this is an input or output, assume we're compressing this directory
282 // This does cause problems for inferencing "cat archive.tar | cmprss tar ."
283 // Probably need to add some special casing
284 }
285 };
286 } else {
287 // TODO: check for scenarios where we want to append to an existing archive
288 }
289 }
290
291 // Validate the specified inputs
292 // Everything in the io_list should be an input
293 for input in &io_list {
294 if let Some(path) = get_path(input) {
295 inputs.push(path);
296 } else {
297 bail!("Specified input path does not exist");
298 }
299 }
300
301 // Fallback to stdin/stdout if we're missing files
302 let cmprss_input = match inputs.is_empty() {
303 true => {
304 if !std::io::stdin().is_terminal()
305 && !&common_args.ignore_pipes
306 && !&common_args.ignore_stdin
307 {
308 CmprssInput::Pipe(std::io::stdin())
309 } else {
310 bail!("No specified input");
311 }
312 }
313 false => CmprssInput::Path(inputs),
314 };
315
316 let cmprss_output = match output {
317 Some(path) => CmprssOutput::Path(path.to_path_buf()),
318 None => {
319 if !std::io::stdout().is_terminal()
320 && !&common_args.ignore_pipes
321 && !&common_args.ignore_stdout
322 {
323 CmprssOutput::Pipe(std::io::stdout())
324 } else {
325 match action {
326 Action::Compress => {
327 let c = compressor
328 .as_ref()
329 .ok_or_else(|| anyhow!("Must specify a compressor"))?;
330 CmprssOutput::Path(PathBuf::from(
331 c.default_compressed_filename(get_input_filename(&cmprss_input)?),
332 ))
333 }
334 Action::Extract => {
335 if compressor.is_none() {
336 compressor =
337 get_compressor_from_filename(get_input_filename(&cmprss_input)?);
338 }
339 let c = compressor
340 .as_ref()
341 .ok_or_else(|| anyhow!("Must specify a compressor"))?;
342 CmprssOutput::Path(PathBuf::from(
343 c.default_extracted_filename(get_input_filename(&cmprss_input)?),
344 ))
345 }
346 Action::Unknown => {
347 if let Some(ref c) = compressor {
348 // We know the compressor, does the input have the same extension?
349 if let Some(compressor_from_input) =
350 get_compressor_from_filename(get_input_filename(&cmprss_input)?)
351 {
352 if c.name() == compressor_from_input.name() {
353 action = Action::Extract;
354 CmprssOutput::Path(PathBuf::from(c.default_extracted_filename(
355 get_input_filename(&cmprss_input)?,
356 )))
357 } else {
358 action = Action::Compress;
359 CmprssOutput::Path(PathBuf::from(
360 c.default_compressed_filename(get_input_filename(
361 &cmprss_input,
362 )?),
363 ))
364 }
365 } else {
366 action = Action::Compress;
367 CmprssOutput::Path(PathBuf::from(c.default_compressed_filename(
368 get_input_filename(&cmprss_input)?,
369 )))
370 }
371 } else {
372 // Can still work if the input is an archive
373 compressor =
374 get_compressor_from_filename(get_input_filename(&cmprss_input)?);
375 let c = compressor
376 .as_ref()
377 .ok_or_else(|| anyhow!("Must specify a compressor"))?;
378 action = Action::Extract;
379 CmprssOutput::Path(PathBuf::from(
380 c.default_extracted_filename(get_input_filename(&cmprss_input)?),
381 ))
382 }
383 }
384 }
385 }
386 }
387 };
388
389 // If we don't have the compressor/action, we can attempt to infer
390 if compressor.is_none() || action == Action::Unknown {
391 match action {
392 Action::Compress => {
393 // Look at the output name
394 if let CmprssOutput::Path(path) = &cmprss_output {
395 compressor = get_compressor_from_filename(path);
396 }
397 }
398 Action::Extract => {
399 if let CmprssInput::Path(paths) = &cmprss_input {
400 if paths.len() != 1 {
401 bail!("Expected a single archive to extract");
402 }
403 compressor = get_compressor_from_filename(paths.first().unwrap());
404 }
405 }
406 Action::Unknown => match (&cmprss_input, &cmprss_output) {
407 (CmprssInput::Path(paths), CmprssOutput::Path(path)) => {
408 if path.is_dir() && paths.len() == 1 {
409 compressor = get_compressor_from_filename(paths.first().unwrap());
410 action = Action::Extract;
411
412 if compressor.is_none() {
413 bail!(
414 "Couldn't determine how to extract {:?}",
415 paths.first().unwrap()
416 );
417 }
418 } else {
419 let (guessed_compressor, guessed_action) =
420 guess_from_filenames(paths, path, compressor);
421 compressor = guessed_compressor;
422 action = guessed_action;
423 }
424 }
425 (CmprssInput::Path(paths), CmprssOutput::Pipe(_)) => {
426 if let Some(ref c) = compressor {
427 if let Some(input_c) = get_compressor_from_filename(paths.first().unwrap())
428 {
429 if c.name() == input_c.name() {
430 action = Action::Extract;
431 } else {
432 action = Action::Compress;
433 }
434 } else {
435 action = Action::Compress;
436 }
437 } else {
438 if paths.len() != 1 {
439 bail!("Expected a single input file for piping to stdout");
440 }
441 compressor = get_compressor_from_filename(paths.first().unwrap());
442 if compressor.is_some() {
443 action = Action::Extract;
444 } else {
445 bail!("Can't guess compressor to use");
446 }
447 }
448 }
449 (CmprssInput::Pipe(_), CmprssOutput::Path(path)) => {
450 if let Some(ref c) = compressor {
451 if get_compressor_from_filename(path)
452 .is_some_and(|pc| c.name() == pc.name())
453 {
454 action = Action::Compress;
455 } else {
456 action = Action::Extract;
457 }
458 } else {
459 compressor = get_compressor_from_filename(path);
460 if compressor.is_some() {
461 action = Action::Compress;
462 } else {
463 bail!("Can't guess compressor to use");
464 }
465 }
466 }
467 (CmprssInput::Pipe(_), CmprssOutput::Pipe(_)) => {
468 action = Action::Compress;
469 }
470 // Handle all Writer output cases
471 (_, CmprssOutput::Writer(_)) => {
472 // Writer outputs are only used internally by Pipeline
473 // In main.rs we'll assume compression
474 action = Action::Compress;
475 }
476 // Handle all Reader input cases
477 (&CmprssInput::Reader(_), _) => {
478 // For Reader input, we'll assume extraction
479 action = Action::Extract;
480 }
481 },
482 }
483 }
484
485 let compressor = compressor.ok_or_else(|| anyhow!("Could not determine compressor to use"))?;
486 if action == Action::Unknown {
487 bail!("Could not determine action to take");
488 }
489
490 Ok(Job {
491 compressor,
492 input: cmprss_input,
493 output: cmprss_output,
494 action,
495 })
496}
497
498fn command(compressor: Option<Box<dyn Compressor>>, args: &CommonArgs) -> Result {
499 let job = get_job(compressor, args)?;
500
501 match job.action {
502 Action::Compress => job.compressor.compress(job.input, job.output)?,
503 Action::Extract => job.compressor.extract(job.input, job.output)?,
504 _ => {
505 bail!("Unknown action requested");
506 }
507 };
508
509 Ok(())
510}
511
512fn main() {
513 let args = CmprssArgs::parse();
514 match args.format {
515 Some(Format::Tar(a)) => command(Some(Box::new(Tar::new(&a))), &a.common_args),
516 Some(Format::Gzip(a)) => command(Some(Box::new(Gzip::new(&a))), &a.common_args),
517 Some(Format::Xz(a)) => command(Some(Box::new(Xz::new(&a))), &a.common_args),
518 Some(Format::Bzip2(a)) => command(Some(Box::new(Bzip2::new(&a))), &a.common_args),
519 Some(Format::Zip(a)) => command(Some(Box::new(Zip::new(&a))), &a.common_args),
520 Some(Format::Zstd(a)) => command(Some(Box::new(Zstd::new(&a))), &a.common_args),
521 Some(Format::Lz4(a)) => command(Some(Box::new(Lz4::new(&a))), &a.common_args),
522 Some(Format::Brotli(a)) => command(Some(Box::new(Brotli::new(&a))), &a.common_args),
523 Some(Format::Snappy(a)) => command(Some(Box::new(Snappy::new(&a))), &a.common_args),
524 Some(Format::Lzma(a)) => command(Some(Box::new(Lzma::new(&a))), &a.common_args),
525 _ => command(None, &args.base_args),
526 }
527 .unwrap_or_else(|e| {
528 eprintln!("ERROR(cmprss): {}", e);
529 std::process::exit(1);
530 });
531}
532
533#[cfg(test)]
534mod tests {
535 use super::*;
536 use std::path::Path;
537
538 fn compressor_name(path: &str) -> Option<String> {
539 get_compressor_from_filename(Path::new(path)).map(|c| c.name().to_string())
540 }
541
542 fn compressor_extension(path: &str) -> Option<String> {
543 get_compressor_from_filename(Path::new(path)).map(|c| c.extension().to_string())
544 }
545
546 #[test]
547 fn test_single_extension() {
548 assert_eq!(compressor_name("file.gz"), Some("gzip".into()));
549 assert_eq!(compressor_name("file.xz"), Some("xz".into()));
550 assert_eq!(compressor_name("file.bz2"), Some("bzip2".into()));
551 assert_eq!(compressor_name("file.zst"), Some("zstd".into()));
552 assert_eq!(compressor_name("file.lz4"), Some("lz4".into()));
553 assert_eq!(compressor_name("file.br"), Some("brotli".into()));
554 assert_eq!(compressor_name("file.sz"), Some("snappy".into()));
555 assert_eq!(compressor_name("file.lzma"), Some("lzma".into()));
556 assert_eq!(compressor_name("file.tar"), Some("tar".into()));
557 assert_eq!(compressor_name("file.zip"), Some("zip".into()));
558 }
559
560 #[test]
561 fn test_multi_extension() {
562 assert_eq!(compressor_name("archive.tar.gz"), Some("gzip".into()));
563 assert_eq!(compressor_name("archive.tar.xz"), Some("xz".into()));
564 assert_eq!(compressor_name("archive.tar.bz2"), Some("bzip2".into()));
565 assert_eq!(compressor_name("archive.tar.zst"), Some("zstd".into()));
566 }
567
568 #[test]
569 fn test_unknown_middle_extension() {
570 // "b" is not a compressor, so only tar.gz should be detected
571 assert_eq!(compressor_name("a.b.tar.gz"), Some("gzip".into()));
572 assert_eq!(compressor_name("report.2024.tar.gz"), Some("gzip".into()));
573 }
574
575 #[test]
576 fn test_no_recognized_extension() {
577 assert_eq!(compressor_name("file.txt"), None);
578 assert_eq!(compressor_name("file.pdf"), None);
579 assert_eq!(compressor_name("file"), None);
580 }
581
582 #[test]
583 fn test_default_filenames_single_pipeline() {
584 let c = get_compressor_from_filename(Path::new("file.gz")).unwrap();
585 assert_eq!(
586 c.default_compressed_filename(Path::new("data.txt")),
587 "data.txt.gz"
588 );
589 assert_eq!(c.default_extracted_filename(Path::new("data.gz")), "data");
590 }
591
592 #[test]
593 fn test_default_filenames_multi_pipeline() {
594 let c = get_compressor_from_filename(Path::new("archive.tar.gz")).unwrap();
595 assert_eq!(
596 c.default_compressed_filename(Path::new("data")),
597 "data.tar.gz"
598 );
599 // tar.gz extracts to a directory, so extracted filename is "."
600 assert_eq!(c.default_extracted_filename(Path::new("data.tar.gz")), ".");
601 }
602
603 #[test]
604 fn test_is_archive_single_pipeline() {
605 let c = get_compressor_from_filename(Path::new("file.gz")).unwrap();
606 assert!(c.is_archive(Path::new("test.gz")));
607 assert!(!c.is_archive(Path::new("test.xz")));
608 }
609
610 #[test]
611 fn test_is_archive_multi_pipeline() {
612 let c = get_compressor_from_filename(Path::new("archive.tar.gz")).unwrap();
613 assert!(c.is_archive(Path::new("foo.tar.gz")));
614 assert!(!c.is_archive(Path::new("foo.gz")));
615 }
616
617 #[test]
618 fn test_extracted_target_single_pipeline() {
619 let gz = get_compressor_from_filename(Path::new("file.gz")).unwrap();
620 assert_eq!(gz.default_extracted_target(), ExtractedTarget::FILE);
621
622 let tar = get_compressor_from_filename(Path::new("file.tar")).unwrap();
623 assert_eq!(tar.default_extracted_target(), ExtractedTarget::DIRECTORY);
624 }
625
626 #[test]
627 fn test_extracted_target_multi_pipeline() {
628 // tar.gz: innermost is tar, which extracts to directory
629 let c = get_compressor_from_filename(Path::new("archive.tar.gz")).unwrap();
630 assert_eq!(c.default_extracted_target(), ExtractedTarget::DIRECTORY);
631 }
632
633 #[test]
634 fn test_single_extension_returns_correct_extension() {
635 assert_eq!(compressor_extension("file.gz"), Some("gz".into()));
636 assert_eq!(compressor_extension("file.tar"), Some("tar".into()));
637 }
638}