this repo has no description
1extern crate tar;
2
3use anyhow::{anyhow, bail};
4use clap::Args;
5use indicatif::ProgressBar;
6use std::fs::{File, OpenOptions};
7use std::io::{self, Read, Seek, SeekFrom, Write};
8use std::path::Path;
9use tar::{Archive, Builder, EntryType, Header};
10use tempfile::tempfile;
11
12use super::containers::total_input_bytes;
13use crate::progress::{OutputTarget, ProgressArgs, ProgressReader, create_progress_bar};
14use crate::utils::{CmprssInput, CmprssOutput, CommonArgs, Compressor, ExtractedTarget, Result};
15
16#[derive(Args, Debug)]
17pub struct TarArgs {
18 #[clap(flatten)]
19 pub common_args: CommonArgs,
20
21 #[clap(flatten)]
22 pub progress_args: ProgressArgs,
23}
24
25#[derive(Default, Clone)]
26pub struct Tar {
27 pub progress_args: ProgressArgs,
28}
29
30impl Tar {
31 pub fn new(args: &TarArgs) -> Tar {
32 Tar {
33 progress_args: args.progress_args,
34 }
35 }
36}
37
38impl Compressor for Tar {
39 /// Full name for tar, also used for extension
40 fn name(&self) -> &str {
41 "tar"
42 }
43
44 /// Tar extracts to a directory by default
45 fn default_extracted_target(&self) -> ExtractedTarget {
46 ExtractedTarget::Directory
47 }
48
49 fn compress(&self, input: CmprssInput, output: CmprssOutput) -> Result {
50 match output {
51 CmprssOutput::Path(path) => {
52 let total = match &input {
53 CmprssInput::Path(paths) => Some(total_input_bytes(paths)),
54 _ => None,
55 };
56 let bar =
57 create_progress_bar(total, self.progress_args.progress, OutputTarget::File);
58 let file = File::create(path)?;
59 self.compress_internal(input, Builder::new(file), bar.as_ref())?;
60 if let Some(b) = bar {
61 b.finish();
62 }
63 Ok(())
64 }
65 CmprssOutput::Pipe(mut pipe) => {
66 // Create a temporary file to write the tar to
67 let mut temp_file = tempfile()?;
68 self.compress_internal(input, Builder::new(&mut temp_file), None)?;
69
70 // Reset the file position to the beginning
71 temp_file.seek(SeekFrom::Start(0))?;
72
73 // Copy the temporary file to the pipe
74 io::copy(&mut temp_file, &mut pipe)?;
75 Ok(())
76 }
77 CmprssOutput::Writer(mut writer) => {
78 // Pipeline-internal: tar is the innermost stage, writing into an
79 // in-memory pipe feeding the outer codec(s). We still own the
80 // progress bar because only tar sees the real input bytes; outer
81 // stages suppress their bar (their input size is unknown).
82 let total = match &input {
83 CmprssInput::Path(paths) => Some(total_input_bytes(paths)),
84 _ => None,
85 };
86 let bar =
87 create_progress_bar(total, self.progress_args.progress, OutputTarget::File);
88 let mut temp_file = tempfile()?;
89 self.compress_internal(input, Builder::new(&mut temp_file), bar.as_ref())?;
90 temp_file.seek(SeekFrom::Start(0))?;
91 io::copy(&mut temp_file, &mut writer)?;
92 if let Some(b) = bar {
93 b.finish();
94 }
95 Ok(())
96 }
97 }
98 }
99
100 fn extract(&self, input: CmprssInput, output: CmprssOutput) -> Result {
101 match output {
102 CmprssOutput::Path(ref out_dir) => {
103 // Create the output directory if it doesn't exist
104 if !out_dir.exists() {
105 std::fs::create_dir_all(out_dir)?;
106 } else if !out_dir.is_dir() {
107 bail!("tar extraction output must be a directory");
108 }
109
110 match input {
111 CmprssInput::Path(paths) => {
112 if paths.len() != 1 {
113 bail!("tar extraction expects exactly one archive file");
114 }
115 let file = File::open(&paths[0])?;
116 let size = file.metadata()?.len();
117 self.unpack_with_progress(file, Some(size), out_dir)
118 }
119 CmprssInput::Pipe(mut pipe) => {
120 // Create a temporary file to store the tar content
121 let mut temp_file = tempfile()?;
122
123 // Copy from pipe to temporary file
124 io::copy(&mut pipe, &mut temp_file)?;
125
126 // Reset the file position to the beginning
127 temp_file.seek(SeekFrom::Start(0))?;
128 let size = temp_file.metadata()?.len();
129 self.unpack_with_progress(temp_file, Some(size), out_dir)
130 }
131 CmprssInput::Reader(reader) => {
132 let mut archive = Archive::new(reader.0);
133 archive.unpack(out_dir)?;
134 Ok(())
135 }
136 }
137 }
138 CmprssOutput::Pipe(_) => bail!("tar extraction to stdout is not supported"),
139 CmprssOutput::Writer(mut writer) => match input {
140 CmprssInput::Path(paths) => {
141 if paths.len() != 1 {
142 bail!("tar extraction expects exactly one archive file");
143 }
144 let mut file = File::open(&paths[0])?;
145 io::copy(&mut file, &mut writer)?;
146 Ok(())
147 }
148 CmprssInput::Pipe(mut pipe) => {
149 io::copy(&mut pipe, &mut writer)?;
150 Ok(())
151 }
152 CmprssInput::Reader(mut reader) => {
153 io::copy(&mut reader, &mut writer)?;
154 Ok(())
155 }
156 },
157 }
158 }
159
160 fn append(&self, input: CmprssInput, output: CmprssOutput) -> Result {
161 let path = match output {
162 CmprssOutput::Path(p) => p,
163 _ => bail!("tar append requires the archive path as the output target"),
164 };
165 if !path.is_file() {
166 bail!("tar append target must be an existing file: {:?}", path);
167 }
168
169 // Locate the offset just past the last entry's data (512-byte padded)
170 // so we can truncate off the trailing zero blocks and resume writing
171 // entries from there. Using the iterator is cheap: tar entries carry
172 // their own position, so we walk headers without reading file data.
173 let end_of_entries = {
174 let reader = File::open(&path)?;
175 let mut archive = Archive::new(reader);
176 let mut end: u64 = 0;
177 for entry in archive.entries()? {
178 let entry = entry?;
179 let file_pos = entry.raw_file_position();
180 let size = entry.size();
181 // Round up to the next 512-byte block boundary.
182 let padded = size.div_ceil(512) * 512;
183 end = file_pos + padded;
184 }
185 end
186 };
187
188 let mut file = OpenOptions::new().read(true).write(true).open(&path)?;
189 // Truncate any trailing end-of-archive zero blocks so the new entries
190 // start at `end_of_entries` and Builder::finish writes fresh ones.
191 file.set_len(end_of_entries)?;
192 file.seek(SeekFrom::Start(end_of_entries))?;
193
194 let total = match &input {
195 CmprssInput::Path(paths) => Some(total_input_bytes(paths)),
196 _ => None,
197 };
198 let bar = create_progress_bar(total, self.progress_args.progress, OutputTarget::File);
199 self.compress_internal(input, Builder::new(file), bar.as_ref())?;
200 if let Some(b) = bar {
201 b.finish();
202 }
203 Ok(())
204 }
205
206 fn list(&self, input: CmprssInput) -> Result {
207 let reader: Box<dyn Read> = match input {
208 CmprssInput::Path(paths) => {
209 if paths.len() != 1 {
210 bail!("tar listing expects exactly one archive file");
211 }
212 Box::new(File::open(&paths[0])?)
213 }
214 CmprssInput::Pipe(stdin) => Box::new(stdin),
215 CmprssInput::Reader(reader) => reader.0,
216 };
217 let mut archive = Archive::new(reader);
218 let stdout = io::stdout();
219 let mut out = stdout.lock();
220 for entry in archive.entries()? {
221 let entry = entry?;
222 let path = entry.path()?;
223 writeln!(out, "{}", path.display())?;
224 }
225 Ok(())
226 }
227}
228
229impl Tar {
230 /// Internal compress helper. When `bar` is `Some`, recursively walks
231 /// path inputs ourselves (rather than using `Builder::append_dir_all`)
232 /// so every file read runs through `ProgressReader`, sharing a single
233 /// bar across all entries.
234 fn compress_internal<W: Write>(
235 &self,
236 input: CmprssInput,
237 mut archive: Builder<W>,
238 bar: Option<&ProgressBar>,
239 ) -> Result {
240 match input {
241 CmprssInput::Path(paths) => {
242 for path in paths {
243 let name = path
244 .file_name()
245 .ok_or_else(|| anyhow!("input path has no file name: {:?}", path))?;
246 if path.is_file() {
247 append_file_entry(&mut archive, Path::new(name), &path, bar)?;
248 } else if path.is_dir() {
249 append_dir_entry(&mut archive, Path::new(name), &path, bar)?;
250 } else {
251 bail!("tar does not support this file type");
252 }
253 }
254 }
255 CmprssInput::Pipe(mut pipe) => {
256 // For pipe input, we'll create a single file named "archive"
257 let mut temp_file = tempfile()?;
258 io::copy(&mut pipe, &mut temp_file)?;
259 temp_file.seek(SeekFrom::Start(0))?;
260 archive.append_file("archive", &mut temp_file)?;
261 }
262 CmprssInput::Reader(_) => {
263 bail!("tar does not accept an in-memory reader input");
264 }
265 }
266 Ok(archive.finish()?)
267 }
268
269 fn unpack_with_progress<R: Read>(
270 &self,
271 reader: R,
272 size: Option<u64>,
273 out_dir: &Path,
274 ) -> Result {
275 let bar = create_progress_bar(size, self.progress_args.progress, OutputTarget::File);
276 let reader = ProgressReader::new(reader, bar.clone());
277 let mut archive = Archive::new(reader);
278 archive.unpack(out_dir)?;
279 if let Some(b) = bar {
280 b.finish();
281 }
282 Ok(())
283 }
284}
285
286/// Append one regular file to the tar archive, wrapping reads in a
287/// `ProgressReader` that ticks the shared bar.
288fn append_file_entry<W: Write>(
289 archive: &mut Builder<W>,
290 archive_name: &Path,
291 disk_path: &Path,
292 bar: Option<&ProgressBar>,
293) -> Result {
294 let mut file = File::open(disk_path)?;
295 let meta = file.metadata()?;
296 let mut header = Header::new_gnu();
297 header.set_metadata(&meta);
298 header.set_size(meta.len());
299 let reader = ProgressReader::new(&mut file, bar.cloned());
300 archive.append_data(&mut header, archive_name, reader)?;
301 Ok(())
302}
303
304/// Write the directory header, then recurse into its children.
305fn append_dir_entry<W: Write>(
306 archive: &mut Builder<W>,
307 archive_name: &Path,
308 disk_path: &Path,
309 bar: Option<&ProgressBar>,
310) -> Result {
311 let meta = std::fs::metadata(disk_path)?;
312 let mut header = Header::new_gnu();
313 header.set_metadata(&meta);
314 header.set_entry_type(EntryType::Directory);
315 header.set_size(0);
316 archive.append_data(&mut header, archive_name, io::empty())?;
317 for entry in std::fs::read_dir(disk_path)? {
318 let entry = entry?;
319 let child_archive = archive_name.join(entry.file_name());
320 let child_disk = entry.path();
321 if child_disk.is_file() {
322 append_file_entry(archive, &child_archive, &child_disk, bar)?;
323 } else if child_disk.is_dir() {
324 append_dir_entry(archive, &child_archive, &child_disk, bar)?;
325 }
326 // Skip symlinks/other types; they weren't handled before either.
327 }
328 Ok(())
329}
330
331#[cfg(test)]
332mod tests {
333 use super::*;
334 use crate::test_utils::*;
335 use assert_fs::prelude::*;
336 use predicates::prelude::*;
337 use std::path::PathBuf;
338
339 /// Test the basic interface of the Tar compressor
340 #[test]
341 fn test_tar_interface() {
342 let compressor = Tar::default();
343 test_compressor_interface(&compressor, "tar", Some("tar"));
344 }
345
346 /// Test the default compression level
347 #[test]
348 fn test_tar_default_compression() -> Result {
349 let compressor = Tar::default();
350 test_compression(&compressor)
351 }
352
353 /// Append new entries into an existing tar and confirm both old and new
354 /// entries extract correctly.
355 #[test]
356 fn test_append_adds_entries() -> Result {
357 let compressor = Tar::default();
358 let working_dir = assert_fs::TempDir::new()?;
359
360 let original = working_dir.child("original.txt");
361 original.write_str("original contents")?;
362 let extra = working_dir.child("extra.txt");
363 extra.write_str("appended contents")?;
364
365 let archive = working_dir.child("archive.tar");
366 compressor.compress(
367 CmprssInput::Path(vec![original.path().to_path_buf()]),
368 CmprssOutput::Path(archive.path().to_path_buf()),
369 )?;
370 let size_before = std::fs::metadata(archive.path())?.len();
371
372 compressor.append(
373 CmprssInput::Path(vec![extra.path().to_path_buf()]),
374 CmprssOutput::Path(archive.path().to_path_buf()),
375 )?;
376 let size_after = std::fs::metadata(archive.path())?.len();
377 assert!(
378 size_after > size_before,
379 "archive did not grow after append: {size_before} -> {size_after}",
380 );
381
382 let extract_dir = working_dir.child("extracted");
383 std::fs::create_dir_all(extract_dir.path())?;
384 compressor.extract(
385 CmprssInput::Path(vec![archive.path().to_path_buf()]),
386 CmprssOutput::Path(extract_dir.path().to_path_buf()),
387 )?;
388
389 extract_dir
390 .child("original.txt")
391 .assert(predicate::path::eq_file(original.path()));
392 extract_dir
393 .child("extra.txt")
394 .assert(predicate::path::eq_file(extra.path()));
395 Ok(())
396 }
397
398 /// Appending to a missing target must error rather than silently creating
399 /// a new archive.
400 #[test]
401 fn test_append_missing_target_errors() {
402 let compressor = Tar::default();
403 let working_dir = assert_fs::TempDir::new().unwrap();
404 let extra = working_dir.child("extra.txt");
405 extra.write_str("x").unwrap();
406 let missing = working_dir.child("nope.tar");
407
408 let err = compressor
409 .append(
410 CmprssInput::Path(vec![extra.path().to_path_buf()]),
411 CmprssOutput::Path(missing.path().to_path_buf()),
412 )
413 .expect_err("append to a missing archive should error");
414 assert!(err.to_string().contains("must be an existing file"));
415 }
416
417 /// Test tar-specific functionality: directory handling
418 #[test]
419 fn test_directory_handling() -> Result {
420 let compressor = Tar::default();
421 let dir = assert_fs::TempDir::new()?;
422 let file_path = dir.child("file.txt");
423 file_path.write_str("garbage data for testing")?;
424 let working_dir = assert_fs::TempDir::new()?;
425 let archive = working_dir.child("dir_archive.tar");
426 archive.assert(predicate::path::missing());
427
428 compressor.compress(
429 CmprssInput::Path(vec![dir.path().to_path_buf()]),
430 CmprssOutput::Path(archive.path().to_path_buf()),
431 )?;
432 archive.assert(predicate::path::is_file());
433
434 let extract_dir = working_dir.child("extracted");
435 std::fs::create_dir_all(extract_dir.path())?;
436 compressor.extract(
437 CmprssInput::Path(vec![archive.path().to_path_buf()]),
438 CmprssOutput::Path(extract_dir.path().to_path_buf()),
439 )?;
440
441 let dir_name: PathBuf = dir.path().file_name().unwrap().into();
442 extract_dir
443 .child(dir_name)
444 .child("file.txt")
445 .assert(predicate::path::eq_file(file_path.path()));
446 Ok(())
447 }
448}