this repo has no description
1pub mod backends;
2pub mod progress;
3pub mod test_utils;
4pub mod utils;
5
6use anyhow::{anyhow, bail};
7use backends::*;
8use clap::{Parser, Subcommand};
9use is_terminal::IsTerminal;
10use std::path::{Path, PathBuf};
11use utils::*;
12
13/// A compression multi-tool
14#[derive(Parser, Debug)]
15#[command(author, version, about, long_about = None)]
16struct CmprssArgs {
17 /// Format
18 #[command(subcommand)]
19 format: Option<Format>,
20
21 // Base arguments for the non-subcommand behavior
22 #[clap(flatten)]
23 pub base_args: CommonArgs,
24}
25#[derive(Subcommand, Debug)]
26enum Format {
27 /// tar archive format
28 Tar(TarArgs),
29
30 /// gzip compression
31 #[clap(visible_alias = "gz")]
32 Gzip(GzipArgs),
33
34 /// xz compression
35 Xz(XzArgs),
36
37 /// bzip2 compression
38 #[clap(visible_alias = "bz2")]
39 Bzip2(Bzip2Args),
40
41 /// zip archive format
42 Zip(ZipArgs),
43
44 /// zstd compression
45 #[clap(visible_alias = "zst")]
46 Zstd(ZstdArgs),
47
48 /// lz4 compression
49 Lz4(Lz4Args),
50
51 /// brotli compression
52 #[clap(visible_alias = "br")]
53 Brotli(BrotliArgs),
54}
55
56/// Get the input filename or return a default file
57/// This file will be used to generate the output filename
58fn get_input_filename(input: &CmprssInput) -> Result<&Path> {
59 match input {
60 CmprssInput::Path(paths) => match paths.first() {
61 Some(path) => Ok(path),
62 None => bail!("error: no input specified"),
63 },
64 CmprssInput::Pipe(_) => Ok(Path::new("archive")),
65 CmprssInput::Reader(_) => Ok(Path::new("piped_data")),
66 }
67}
68
69#[derive(Debug, PartialEq, Clone, Copy)]
70enum Action {
71 Compress,
72 Extract,
73 Unknown,
74}
75
76/// Defines a single compress/extract action to take.
77#[derive(Debug)]
78struct Job {
79 compressor: Box<dyn Compressor>,
80 input: CmprssInput,
81 output: CmprssOutput,
82 action: Action,
83}
84
85/// Get a compressor pipeline from a filename by scanning extensions right-to-left
86fn get_compressor_from_filename(filename: &Path) -> Option<Box<dyn Compressor>> {
87 let file_name = filename.file_name()?.to_str()?;
88 let parts: Vec<&str> = file_name.split('.').collect();
89
90 if parts.len() < 2 {
91 return None;
92 }
93
94 // Scan extensions right-to-left, collecting known compressors
95 // until hitting an unknown extension or the base name.
96 // e.g., "a.b.tar.gz" → gz ✓, tar ✓, b ✗ stop → [gz, tar]
97 let mut compressor_names: Vec<String> = Vec::new();
98 for ext in parts[1..].iter().rev() {
99 if let Some(c) = backends::compressor_from_str(ext) {
100 compressor_names.push(c.name().to_string());
101 } else {
102 break;
103 }
104 }
105
106 if compressor_names.is_empty() {
107 return None;
108 }
109
110 // Reverse to innermost-to-outermost order
111 compressor_names.reverse();
112 Pipeline::from_names(&compressor_names)
113 .ok()
114 .map(|m| Box::new(m) as Box<dyn Compressor>)
115}
116
117/// Convert an input path into a Path
118fn get_path(input: &str) -> Option<PathBuf> {
119 let path = PathBuf::from(input);
120 if !path.try_exists().unwrap_or(false) {
121 return None;
122 }
123 Some(path)
124}
125
126/// Guess compressor/action from the two filenames
127/// The compressor may already be given
128fn guess_from_filenames(
129 input: &[PathBuf],
130 output: &Path,
131 compressor: Option<Box<dyn Compressor>>,
132) -> (Option<Box<dyn Compressor>>, Action) {
133 if input.len() != 1 {
134 if let Some(guessed_compressor) = get_compressor_from_filename(output) {
135 return (Some(guessed_compressor), Action::Compress);
136 }
137
138 // Check if output is a directory - this is likely an extraction
139 if output.is_dir() {
140 // Try to determine compressor from the input file's extension(s)
141 if let Some(input_path) = input.first()
142 && let Some(guessed_compressor) = get_compressor_from_filename(input_path)
143 {
144 return (Some(guessed_compressor), Action::Extract);
145 }
146 }
147
148 // In theory we could be extracting multiple files to a directory
149 // We'll fail somewhere else if that's not the case
150 return (compressor, Action::Extract);
151 }
152 let input = input.first().unwrap();
153
154 let guessed_compressor = get_compressor_from_filename(output);
155 let guessed_extractor = get_compressor_from_filename(input);
156 let guessed_compressor_name = if let Some(c) = &guessed_compressor {
157 c.name()
158 } else {
159 ""
160 };
161 let guessed_extractor_name = if let Some(e) = &guessed_extractor {
162 e.name()
163 } else {
164 ""
165 };
166
167 if let Some(c) = &compressor {
168 if guessed_compressor_name == c.name() {
169 return (compressor, Action::Compress);
170 } else if guessed_extractor_name == c.name() {
171 return (compressor, Action::Extract);
172 } else {
173 // Default to compressing
174 return (compressor, Action::Compress);
175 }
176 }
177
178 match (guessed_compressor, guessed_extractor) {
179 (None, None) => (None, Action::Unknown),
180 (Some(c), None) => (Some(c), Action::Compress),
181 (None, Some(e)) => (Some(e), Action::Extract),
182 (Some(c), Some(e)) => {
183 // Compare the input and output extensions to see if one has an extra extension
184 let input_file = input.file_name().unwrap().to_str().unwrap();
185 let input_ext = input.extension().unwrap_or_default();
186 let output_file = output.file_name().unwrap().to_str().unwrap();
187 let output_ext = output.extension().unwrap_or_default();
188 let guessed_output = input_file.to_string() + "." + output_ext.to_str().unwrap();
189 let guessed_input = output_file.to_string() + "." + input_ext.to_str().unwrap();
190
191 if guessed_output == output_file {
192 // Input is "archive.tar", output is "archive.tar.gz" — only add the outer layer
193 let single_compressor =
194 backends::compressor_from_str(output_ext.to_str().unwrap_or(""));
195 (single_compressor.or(Some(c)), Action::Compress)
196 } else if guessed_input == input_file {
197 // Output is "archive.tar", input is "archive.tar.gz" — only strip the outer layer
198 let single_compressor =
199 backends::compressor_from_str(input_ext.to_str().unwrap_or(""));
200 (single_compressor.or(Some(e)), Action::Extract)
201 } else if c.name() == e.name() {
202 // Same format for input and output, can't decide
203 if output.is_dir() {
204 (Some(e), Action::Extract)
205 } else {
206 (Some(c), Action::Unknown)
207 }
208 } else if output.is_dir() {
209 (Some(e), Action::Extract)
210 } else {
211 (None, Action::Unknown)
212 }
213 }
214 }
215}
216
217/// Parse the common args and determine the details of the job requested
218fn get_job(compressor: Option<Box<dyn Compressor>>, common_args: &CommonArgs) -> Result<Job> {
219 let mut compressor = compressor;
220 let mut action = {
221 if common_args.compress {
222 Action::Compress
223 } else if common_args.extract || common_args.decompress {
224 Action::Extract
225 } else {
226 Action::Unknown
227 }
228 };
229
230 let mut inputs = Vec::new();
231 if let Some(in_file) = &common_args.input {
232 match get_path(in_file) {
233 Some(path) => inputs.push(path),
234 None => {
235 bail!("Specified input path does not exist");
236 }
237 }
238 }
239
240 let mut output = match &common_args.output {
241 Some(output) => {
242 let path = Path::new(output);
243 if path.try_exists()? && !path.is_dir() {
244 // Output path exists, bail out
245 bail!("Specified output path already exists");
246 }
247 Some(path)
248 }
249 None => None,
250 };
251
252 // Process the io_list, check if there is an output first
253 let mut io_list = common_args.io_list.clone();
254 if output.is_none()
255 && let Some(possible_output) = common_args.io_list.last()
256 {
257 let path = Path::new(possible_output);
258 if !path.try_exists()? {
259 // Use the given path if it doesn't exist
260 output = Some(path);
261 io_list.pop();
262 } else if path.is_dir() {
263 match action {
264 Action::Compress => {
265 // A directory can potentially be a target output location or
266 // an input, for now assume it is an input.
267 }
268 Action::Extract => {
269 // Can extract to a directory, and it wouldn't make any sense as an input
270 output = Some(path);
271 io_list.pop();
272 }
273 _ => {
274 // TODO: don't know if this is an input or output, assume we're compressing this directory
275 // This does cause problems for inferencing "cat archive.tar | cmprss tar ."
276 // Probably need to add some special casing
277 }
278 };
279 } else {
280 // TODO: check for scenarios where we want to append to an existing archive
281 }
282 }
283
284 // Validate the specified inputs
285 // Everything in the io_list should be an input
286 for input in &io_list {
287 if let Some(path) = get_path(input) {
288 inputs.push(path);
289 } else {
290 bail!("Specified input path does not exist");
291 }
292 }
293
294 // Fallback to stdin/stdout if we're missing files
295 let cmprss_input = match inputs.is_empty() {
296 true => {
297 if !std::io::stdin().is_terminal()
298 && !&common_args.ignore_pipes
299 && !&common_args.ignore_stdin
300 {
301 CmprssInput::Pipe(std::io::stdin())
302 } else {
303 bail!("No specified input");
304 }
305 }
306 false => CmprssInput::Path(inputs),
307 };
308
309 let cmprss_output = match output {
310 Some(path) => CmprssOutput::Path(path.to_path_buf()),
311 None => {
312 if !std::io::stdout().is_terminal()
313 && !&common_args.ignore_pipes
314 && !&common_args.ignore_stdout
315 {
316 CmprssOutput::Pipe(std::io::stdout())
317 } else {
318 match action {
319 Action::Compress => {
320 let c = compressor
321 .as_ref()
322 .ok_or_else(|| anyhow!("Must specify a compressor"))?;
323 CmprssOutput::Path(PathBuf::from(
324 c.default_compressed_filename(get_input_filename(&cmprss_input)?),
325 ))
326 }
327 Action::Extract => {
328 if compressor.is_none() {
329 compressor =
330 get_compressor_from_filename(get_input_filename(&cmprss_input)?);
331 }
332 let c = compressor
333 .as_ref()
334 .ok_or_else(|| anyhow!("Must specify a compressor"))?;
335 CmprssOutput::Path(PathBuf::from(
336 c.default_extracted_filename(get_input_filename(&cmprss_input)?),
337 ))
338 }
339 Action::Unknown => {
340 if let Some(ref c) = compressor {
341 // We know the compressor, does the input have the same extension?
342 if let Some(compressor_from_input) =
343 get_compressor_from_filename(get_input_filename(&cmprss_input)?)
344 {
345 if c.name() == compressor_from_input.name() {
346 action = Action::Extract;
347 CmprssOutput::Path(PathBuf::from(c.default_extracted_filename(
348 get_input_filename(&cmprss_input)?,
349 )))
350 } else {
351 action = Action::Compress;
352 CmprssOutput::Path(PathBuf::from(
353 c.default_compressed_filename(get_input_filename(
354 &cmprss_input,
355 )?),
356 ))
357 }
358 } else {
359 action = Action::Compress;
360 CmprssOutput::Path(PathBuf::from(c.default_compressed_filename(
361 get_input_filename(&cmprss_input)?,
362 )))
363 }
364 } else {
365 // Can still work if the input is an archive
366 compressor =
367 get_compressor_from_filename(get_input_filename(&cmprss_input)?);
368 let c = compressor
369 .as_ref()
370 .ok_or_else(|| anyhow!("Must specify a compressor"))?;
371 action = Action::Extract;
372 CmprssOutput::Path(PathBuf::from(
373 c.default_extracted_filename(get_input_filename(&cmprss_input)?),
374 ))
375 }
376 }
377 }
378 }
379 }
380 };
381
382 // If we don't have the compressor/action, we can attempt to infer
383 if compressor.is_none() || action == Action::Unknown {
384 match action {
385 Action::Compress => {
386 // Look at the output name
387 if let CmprssOutput::Path(path) = &cmprss_output {
388 compressor = get_compressor_from_filename(path);
389 }
390 }
391 Action::Extract => {
392 if let CmprssInput::Path(paths) = &cmprss_input {
393 if paths.len() != 1 {
394 bail!("Expected a single archive to extract");
395 }
396 compressor = get_compressor_from_filename(paths.first().unwrap());
397 }
398 }
399 Action::Unknown => match (&cmprss_input, &cmprss_output) {
400 (CmprssInput::Path(paths), CmprssOutput::Path(path)) => {
401 if path.is_dir() && paths.len() == 1 {
402 compressor = get_compressor_from_filename(paths.first().unwrap());
403 action = Action::Extract;
404
405 if compressor.is_none() {
406 bail!(
407 "Couldn't determine how to extract {:?}",
408 paths.first().unwrap()
409 );
410 }
411 } else {
412 let (guessed_compressor, guessed_action) =
413 guess_from_filenames(paths, path, compressor);
414 compressor = guessed_compressor;
415 action = guessed_action;
416 }
417 }
418 (CmprssInput::Path(paths), CmprssOutput::Pipe(_)) => {
419 if let Some(ref c) = compressor {
420 if let Some(input_c) = get_compressor_from_filename(paths.first().unwrap())
421 {
422 if c.name() == input_c.name() {
423 action = Action::Extract;
424 } else {
425 action = Action::Compress;
426 }
427 } else {
428 action = Action::Compress;
429 }
430 } else {
431 if paths.len() != 1 {
432 bail!("Expected a single input file for piping to stdout");
433 }
434 compressor = get_compressor_from_filename(paths.first().unwrap());
435 if compressor.is_some() {
436 action = Action::Extract;
437 } else {
438 bail!("Can't guess compressor to use");
439 }
440 }
441 }
442 (CmprssInput::Pipe(_), CmprssOutput::Path(path)) => {
443 if let Some(ref c) = compressor {
444 if get_compressor_from_filename(path)
445 .is_some_and(|pc| c.name() == pc.name())
446 {
447 action = Action::Compress;
448 } else {
449 action = Action::Extract;
450 }
451 } else {
452 compressor = get_compressor_from_filename(path);
453 if compressor.is_some() {
454 action = Action::Compress;
455 } else {
456 bail!("Can't guess compressor to use");
457 }
458 }
459 }
460 (CmprssInput::Pipe(_), CmprssOutput::Pipe(_)) => {
461 action = Action::Compress;
462 }
463 // Handle all Writer output cases
464 (_, CmprssOutput::Writer(_)) => {
465 // Writer outputs are only used internally by Pipeline
466 // In main.rs we'll assume compression
467 action = Action::Compress;
468 }
469 // Handle all Reader input cases
470 (&CmprssInput::Reader(_), _) => {
471 // For Reader input, we'll assume extraction
472 action = Action::Extract;
473 }
474 },
475 }
476 }
477
478 let compressor = compressor.ok_or_else(|| anyhow!("Could not determine compressor to use"))?;
479 if action == Action::Unknown {
480 bail!("Could not determine action to take");
481 }
482
483 Ok(Job {
484 compressor,
485 input: cmprss_input,
486 output: cmprss_output,
487 action,
488 })
489}
490
491fn command(compressor: Option<Box<dyn Compressor>>, args: &CommonArgs) -> Result {
492 let job = get_job(compressor, args)?;
493
494 match job.action {
495 Action::Compress => job.compressor.compress(job.input, job.output)?,
496 Action::Extract => job.compressor.extract(job.input, job.output)?,
497 _ => {
498 bail!("Unknown action requested");
499 }
500 };
501
502 Ok(())
503}
504
505fn main() {
506 let args = CmprssArgs::parse();
507 match args.format {
508 Some(Format::Tar(a)) => command(Some(Box::new(Tar::new(&a))), &a.common_args),
509 Some(Format::Gzip(a)) => command(Some(Box::new(Gzip::new(&a))), &a.common_args),
510 Some(Format::Xz(a)) => command(Some(Box::new(Xz::new(&a))), &a.common_args),
511 Some(Format::Bzip2(a)) => command(Some(Box::new(Bzip2::new(&a))), &a.common_args),
512 Some(Format::Zip(a)) => command(Some(Box::new(Zip::new(&a))), &a.common_args),
513 Some(Format::Zstd(a)) => command(Some(Box::new(Zstd::new(&a))), &a.common_args),
514 Some(Format::Lz4(a)) => command(Some(Box::new(Lz4::new(&a))), &a.common_args),
515 Some(Format::Brotli(a)) => command(Some(Box::new(Brotli::new(&a))), &a.common_args),
516 _ => command(None, &args.base_args),
517 }
518 .unwrap_or_else(|e| {
519 eprintln!("ERROR(cmprss): {}", e);
520 std::process::exit(1);
521 });
522}
523
524#[cfg(test)]
525mod tests {
526 use super::*;
527 use std::path::Path;
528
529 fn compressor_name(path: &str) -> Option<String> {
530 get_compressor_from_filename(Path::new(path)).map(|c| c.name().to_string())
531 }
532
533 fn compressor_extension(path: &str) -> Option<String> {
534 get_compressor_from_filename(Path::new(path)).map(|c| c.extension().to_string())
535 }
536
537 #[test]
538 fn test_single_extension() {
539 assert_eq!(compressor_name("file.gz"), Some("gzip".into()));
540 assert_eq!(compressor_name("file.xz"), Some("xz".into()));
541 assert_eq!(compressor_name("file.bz2"), Some("bzip2".into()));
542 assert_eq!(compressor_name("file.zst"), Some("zstd".into()));
543 assert_eq!(compressor_name("file.lz4"), Some("lz4".into()));
544 assert_eq!(compressor_name("file.br"), Some("brotli".into()));
545 assert_eq!(compressor_name("file.tar"), Some("tar".into()));
546 assert_eq!(compressor_name("file.zip"), Some("zip".into()));
547 }
548
549 #[test]
550 fn test_multi_extension() {
551 assert_eq!(compressor_name("archive.tar.gz"), Some("gzip".into()));
552 assert_eq!(compressor_name("archive.tar.xz"), Some("xz".into()));
553 assert_eq!(compressor_name("archive.tar.bz2"), Some("bzip2".into()));
554 assert_eq!(compressor_name("archive.tar.zst"), Some("zstd".into()));
555 }
556
557 #[test]
558 fn test_unknown_middle_extension() {
559 // "b" is not a compressor, so only tar.gz should be detected
560 assert_eq!(compressor_name("a.b.tar.gz"), Some("gzip".into()));
561 assert_eq!(compressor_name("report.2024.tar.gz"), Some("gzip".into()));
562 }
563
564 #[test]
565 fn test_no_recognized_extension() {
566 assert_eq!(compressor_name("file.txt"), None);
567 assert_eq!(compressor_name("file.pdf"), None);
568 assert_eq!(compressor_name("file"), None);
569 }
570
571 #[test]
572 fn test_default_filenames_single_pipeline() {
573 let c = get_compressor_from_filename(Path::new("file.gz")).unwrap();
574 assert_eq!(
575 c.default_compressed_filename(Path::new("data.txt")),
576 "data.txt.gz"
577 );
578 assert_eq!(c.default_extracted_filename(Path::new("data.gz")), "data");
579 }
580
581 #[test]
582 fn test_default_filenames_multi_pipeline() {
583 let c = get_compressor_from_filename(Path::new("archive.tar.gz")).unwrap();
584 assert_eq!(
585 c.default_compressed_filename(Path::new("data")),
586 "data.tar.gz"
587 );
588 // tar.gz extracts to a directory, so extracted filename is "."
589 assert_eq!(c.default_extracted_filename(Path::new("data.tar.gz")), ".");
590 }
591
592 #[test]
593 fn test_is_archive_single_pipeline() {
594 let c = get_compressor_from_filename(Path::new("file.gz")).unwrap();
595 assert!(c.is_archive(Path::new("test.gz")));
596 assert!(!c.is_archive(Path::new("test.xz")));
597 }
598
599 #[test]
600 fn test_is_archive_multi_pipeline() {
601 let c = get_compressor_from_filename(Path::new("archive.tar.gz")).unwrap();
602 assert!(c.is_archive(Path::new("foo.tar.gz")));
603 assert!(!c.is_archive(Path::new("foo.gz")));
604 }
605
606 #[test]
607 fn test_extracted_target_single_pipeline() {
608 let gz = get_compressor_from_filename(Path::new("file.gz")).unwrap();
609 assert_eq!(gz.default_extracted_target(), ExtractedTarget::FILE);
610
611 let tar = get_compressor_from_filename(Path::new("file.tar")).unwrap();
612 assert_eq!(tar.default_extracted_target(), ExtractedTarget::DIRECTORY);
613 }
614
615 #[test]
616 fn test_extracted_target_multi_pipeline() {
617 // tar.gz: innermost is tar, which extracts to directory
618 let c = get_compressor_from_filename(Path::new("archive.tar.gz")).unwrap();
619 assert_eq!(c.default_extracted_target(), ExtractedTarget::DIRECTORY);
620 }
621
622 #[test]
623 fn test_single_extension_returns_correct_extension() {
624 assert_eq!(compressor_extension("file.gz"), Some("gz".into()));
625 assert_eq!(compressor_extension("file.tar"), Some("tar".into()));
626 }
627}