this repo has no description
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

at tangled-ci 448 lines 17 kB view raw
1extern crate tar; 2 3use anyhow::{anyhow, bail}; 4use clap::Args; 5use indicatif::ProgressBar; 6use std::fs::{File, OpenOptions}; 7use std::io::{self, Read, Seek, SeekFrom, Write}; 8use std::path::Path; 9use tar::{Archive, Builder, EntryType, Header}; 10use tempfile::tempfile; 11 12use super::containers::total_input_bytes; 13use crate::progress::{OutputTarget, ProgressArgs, ProgressReader, create_progress_bar}; 14use crate::utils::{CmprssInput, CmprssOutput, CommonArgs, Compressor, ExtractedTarget, Result}; 15 16#[derive(Args, Debug)] 17pub struct TarArgs { 18 #[clap(flatten)] 19 pub common_args: CommonArgs, 20 21 #[clap(flatten)] 22 pub progress_args: ProgressArgs, 23} 24 25#[derive(Default, Clone)] 26pub struct Tar { 27 pub progress_args: ProgressArgs, 28} 29 30impl Tar { 31 pub fn new(args: &TarArgs) -> Tar { 32 Tar { 33 progress_args: args.progress_args, 34 } 35 } 36} 37 38impl Compressor for Tar { 39 /// Full name for tar, also used for extension 40 fn name(&self) -> &str { 41 "tar" 42 } 43 44 /// Tar extracts to a directory by default 45 fn default_extracted_target(&self) -> ExtractedTarget { 46 ExtractedTarget::Directory 47 } 48 49 fn compress(&self, input: CmprssInput, output: CmprssOutput) -> Result { 50 match output { 51 CmprssOutput::Path(path) => { 52 let total = match &input { 53 CmprssInput::Path(paths) => Some(total_input_bytes(paths)), 54 _ => None, 55 }; 56 let bar = 57 create_progress_bar(total, self.progress_args.progress, OutputTarget::File); 58 let file = File::create(path)?; 59 self.compress_internal(input, Builder::new(file), bar.as_ref())?; 60 if let Some(b) = bar { 61 b.finish(); 62 } 63 Ok(()) 64 } 65 CmprssOutput::Pipe(mut pipe) => { 66 // Create a temporary file to write the tar to 67 let mut temp_file = tempfile()?; 68 self.compress_internal(input, Builder::new(&mut temp_file), None)?; 69 70 // Reset the file position to the beginning 71 temp_file.seek(SeekFrom::Start(0))?; 72 73 // Copy the temporary file to the pipe 74 io::copy(&mut temp_file, &mut pipe)?; 75 Ok(()) 76 } 77 CmprssOutput::Writer(mut writer) => { 78 // Pipeline-internal: tar is the innermost stage, writing into an 79 // in-memory pipe feeding the outer codec(s). We still own the 80 // progress bar because only tar sees the real input bytes; outer 81 // stages suppress their bar (their input size is unknown). 82 let total = match &input { 83 CmprssInput::Path(paths) => Some(total_input_bytes(paths)), 84 _ => None, 85 }; 86 let bar = 87 create_progress_bar(total, self.progress_args.progress, OutputTarget::File); 88 let mut temp_file = tempfile()?; 89 self.compress_internal(input, Builder::new(&mut temp_file), bar.as_ref())?; 90 temp_file.seek(SeekFrom::Start(0))?; 91 io::copy(&mut temp_file, &mut writer)?; 92 if let Some(b) = bar { 93 b.finish(); 94 } 95 Ok(()) 96 } 97 } 98 } 99 100 fn extract(&self, input: CmprssInput, output: CmprssOutput) -> Result { 101 match output { 102 CmprssOutput::Path(ref out_dir) => { 103 // Create the output directory if it doesn't exist 104 if !out_dir.exists() { 105 std::fs::create_dir_all(out_dir)?; 106 } else if !out_dir.is_dir() { 107 bail!("tar extraction output must be a directory"); 108 } 109 110 match input { 111 CmprssInput::Path(paths) => { 112 if paths.len() != 1 { 113 bail!("tar extraction expects exactly one archive file"); 114 } 115 let file = File::open(&paths[0])?; 116 let size = file.metadata()?.len(); 117 self.unpack_with_progress(file, Some(size), out_dir) 118 } 119 CmprssInput::Pipe(mut pipe) => { 120 // Create a temporary file to store the tar content 121 let mut temp_file = tempfile()?; 122 123 // Copy from pipe to temporary file 124 io::copy(&mut pipe, &mut temp_file)?; 125 126 // Reset the file position to the beginning 127 temp_file.seek(SeekFrom::Start(0))?; 128 let size = temp_file.metadata()?.len(); 129 self.unpack_with_progress(temp_file, Some(size), out_dir) 130 } 131 CmprssInput::Reader(reader) => { 132 let mut archive = Archive::new(reader.0); 133 archive.unpack(out_dir)?; 134 Ok(()) 135 } 136 } 137 } 138 CmprssOutput::Pipe(_) => bail!("tar extraction to stdout is not supported"), 139 CmprssOutput::Writer(mut writer) => match input { 140 CmprssInput::Path(paths) => { 141 if paths.len() != 1 { 142 bail!("tar extraction expects exactly one archive file"); 143 } 144 let mut file = File::open(&paths[0])?; 145 io::copy(&mut file, &mut writer)?; 146 Ok(()) 147 } 148 CmprssInput::Pipe(mut pipe) => { 149 io::copy(&mut pipe, &mut writer)?; 150 Ok(()) 151 } 152 CmprssInput::Reader(mut reader) => { 153 io::copy(&mut reader, &mut writer)?; 154 Ok(()) 155 } 156 }, 157 } 158 } 159 160 fn append(&self, input: CmprssInput, output: CmprssOutput) -> Result { 161 let path = match output { 162 CmprssOutput::Path(p) => p, 163 _ => bail!("tar append requires the archive path as the output target"), 164 }; 165 if !path.is_file() { 166 bail!("tar append target must be an existing file: {:?}", path); 167 } 168 169 // Locate the offset just past the last entry's data (512-byte padded) 170 // so we can truncate off the trailing zero blocks and resume writing 171 // entries from there. Using the iterator is cheap: tar entries carry 172 // their own position, so we walk headers without reading file data. 173 let end_of_entries = { 174 let reader = File::open(&path)?; 175 let mut archive = Archive::new(reader); 176 let mut end: u64 = 0; 177 for entry in archive.entries()? { 178 let entry = entry?; 179 let file_pos = entry.raw_file_position(); 180 let size = entry.size(); 181 // Round up to the next 512-byte block boundary. 182 let padded = size.div_ceil(512) * 512; 183 end = file_pos + padded; 184 } 185 end 186 }; 187 188 let mut file = OpenOptions::new().read(true).write(true).open(&path)?; 189 // Truncate any trailing end-of-archive zero blocks so the new entries 190 // start at `end_of_entries` and Builder::finish writes fresh ones. 191 file.set_len(end_of_entries)?; 192 file.seek(SeekFrom::Start(end_of_entries))?; 193 194 let total = match &input { 195 CmprssInput::Path(paths) => Some(total_input_bytes(paths)), 196 _ => None, 197 }; 198 let bar = create_progress_bar(total, self.progress_args.progress, OutputTarget::File); 199 self.compress_internal(input, Builder::new(file), bar.as_ref())?; 200 if let Some(b) = bar { 201 b.finish(); 202 } 203 Ok(()) 204 } 205 206 fn list(&self, input: CmprssInput) -> Result { 207 let reader: Box<dyn Read> = match input { 208 CmprssInput::Path(paths) => { 209 if paths.len() != 1 { 210 bail!("tar listing expects exactly one archive file"); 211 } 212 Box::new(File::open(&paths[0])?) 213 } 214 CmprssInput::Pipe(stdin) => Box::new(stdin), 215 CmprssInput::Reader(reader) => reader.0, 216 }; 217 let mut archive = Archive::new(reader); 218 let stdout = io::stdout(); 219 let mut out = stdout.lock(); 220 for entry in archive.entries()? { 221 let entry = entry?; 222 let path = entry.path()?; 223 writeln!(out, "{}", path.display())?; 224 } 225 Ok(()) 226 } 227} 228 229impl Tar { 230 /// Internal compress helper. When `bar` is `Some`, recursively walks 231 /// path inputs ourselves (rather than using `Builder::append_dir_all`) 232 /// so every file read runs through `ProgressReader`, sharing a single 233 /// bar across all entries. 234 fn compress_internal<W: Write>( 235 &self, 236 input: CmprssInput, 237 mut archive: Builder<W>, 238 bar: Option<&ProgressBar>, 239 ) -> Result { 240 match input { 241 CmprssInput::Path(paths) => { 242 for path in paths { 243 let name = path 244 .file_name() 245 .ok_or_else(|| anyhow!("input path has no file name: {:?}", path))?; 246 if path.is_file() { 247 append_file_entry(&mut archive, Path::new(name), &path, bar)?; 248 } else if path.is_dir() { 249 append_dir_entry(&mut archive, Path::new(name), &path, bar)?; 250 } else { 251 bail!("tar does not support this file type"); 252 } 253 } 254 } 255 CmprssInput::Pipe(mut pipe) => { 256 // For pipe input, we'll create a single file named "archive" 257 let mut temp_file = tempfile()?; 258 io::copy(&mut pipe, &mut temp_file)?; 259 temp_file.seek(SeekFrom::Start(0))?; 260 archive.append_file("archive", &mut temp_file)?; 261 } 262 CmprssInput::Reader(_) => { 263 bail!("tar does not accept an in-memory reader input"); 264 } 265 } 266 Ok(archive.finish()?) 267 } 268 269 fn unpack_with_progress<R: Read>( 270 &self, 271 reader: R, 272 size: Option<u64>, 273 out_dir: &Path, 274 ) -> Result { 275 let bar = create_progress_bar(size, self.progress_args.progress, OutputTarget::File); 276 let reader = ProgressReader::new(reader, bar.clone()); 277 let mut archive = Archive::new(reader); 278 archive.unpack(out_dir)?; 279 if let Some(b) = bar { 280 b.finish(); 281 } 282 Ok(()) 283 } 284} 285 286/// Append one regular file to the tar archive, wrapping reads in a 287/// `ProgressReader` that ticks the shared bar. 288fn append_file_entry<W: Write>( 289 archive: &mut Builder<W>, 290 archive_name: &Path, 291 disk_path: &Path, 292 bar: Option<&ProgressBar>, 293) -> Result { 294 let mut file = File::open(disk_path)?; 295 let meta = file.metadata()?; 296 let mut header = Header::new_gnu(); 297 header.set_metadata(&meta); 298 header.set_size(meta.len()); 299 let reader = ProgressReader::new(&mut file, bar.cloned()); 300 archive.append_data(&mut header, archive_name, reader)?; 301 Ok(()) 302} 303 304/// Write the directory header, then recurse into its children. 305fn append_dir_entry<W: Write>( 306 archive: &mut Builder<W>, 307 archive_name: &Path, 308 disk_path: &Path, 309 bar: Option<&ProgressBar>, 310) -> Result { 311 let meta = std::fs::metadata(disk_path)?; 312 let mut header = Header::new_gnu(); 313 header.set_metadata(&meta); 314 header.set_entry_type(EntryType::Directory); 315 header.set_size(0); 316 archive.append_data(&mut header, archive_name, io::empty())?; 317 for entry in std::fs::read_dir(disk_path)? { 318 let entry = entry?; 319 let child_archive = archive_name.join(entry.file_name()); 320 let child_disk = entry.path(); 321 if child_disk.is_file() { 322 append_file_entry(archive, &child_archive, &child_disk, bar)?; 323 } else if child_disk.is_dir() { 324 append_dir_entry(archive, &child_archive, &child_disk, bar)?; 325 } 326 // Skip symlinks/other types; they weren't handled before either. 327 } 328 Ok(()) 329} 330 331#[cfg(test)] 332mod tests { 333 use super::*; 334 use crate::test_utils::*; 335 use assert_fs::prelude::*; 336 use predicates::prelude::*; 337 use std::path::PathBuf; 338 339 /// Test the basic interface of the Tar compressor 340 #[test] 341 fn test_tar_interface() { 342 let compressor = Tar::default(); 343 test_compressor_interface(&compressor, "tar", Some("tar")); 344 } 345 346 /// Test the default compression level 347 #[test] 348 fn test_tar_default_compression() -> Result { 349 let compressor = Tar::default(); 350 test_compression(&compressor) 351 } 352 353 /// Append new entries into an existing tar and confirm both old and new 354 /// entries extract correctly. 355 #[test] 356 fn test_append_adds_entries() -> Result { 357 let compressor = Tar::default(); 358 let working_dir = assert_fs::TempDir::new()?; 359 360 let original = working_dir.child("original.txt"); 361 original.write_str("original contents")?; 362 let extra = working_dir.child("extra.txt"); 363 extra.write_str("appended contents")?; 364 365 let archive = working_dir.child("archive.tar"); 366 compressor.compress( 367 CmprssInput::Path(vec![original.path().to_path_buf()]), 368 CmprssOutput::Path(archive.path().to_path_buf()), 369 )?; 370 let size_before = std::fs::metadata(archive.path())?.len(); 371 372 compressor.append( 373 CmprssInput::Path(vec![extra.path().to_path_buf()]), 374 CmprssOutput::Path(archive.path().to_path_buf()), 375 )?; 376 let size_after = std::fs::metadata(archive.path())?.len(); 377 assert!( 378 size_after > size_before, 379 "archive did not grow after append: {size_before} -> {size_after}", 380 ); 381 382 let extract_dir = working_dir.child("extracted"); 383 std::fs::create_dir_all(extract_dir.path())?; 384 compressor.extract( 385 CmprssInput::Path(vec![archive.path().to_path_buf()]), 386 CmprssOutput::Path(extract_dir.path().to_path_buf()), 387 )?; 388 389 extract_dir 390 .child("original.txt") 391 .assert(predicate::path::eq_file(original.path())); 392 extract_dir 393 .child("extra.txt") 394 .assert(predicate::path::eq_file(extra.path())); 395 Ok(()) 396 } 397 398 /// Appending to a missing target must error rather than silently creating 399 /// a new archive. 400 #[test] 401 fn test_append_missing_target_errors() { 402 let compressor = Tar::default(); 403 let working_dir = assert_fs::TempDir::new().unwrap(); 404 let extra = working_dir.child("extra.txt"); 405 extra.write_str("x").unwrap(); 406 let missing = working_dir.child("nope.tar"); 407 408 let err = compressor 409 .append( 410 CmprssInput::Path(vec![extra.path().to_path_buf()]), 411 CmprssOutput::Path(missing.path().to_path_buf()), 412 ) 413 .expect_err("append to a missing archive should error"); 414 assert!(err.to_string().contains("must be an existing file")); 415 } 416 417 /// Test tar-specific functionality: directory handling 418 #[test] 419 fn test_directory_handling() -> Result { 420 let compressor = Tar::default(); 421 let dir = assert_fs::TempDir::new()?; 422 let file_path = dir.child("file.txt"); 423 file_path.write_str("garbage data for testing")?; 424 let working_dir = assert_fs::TempDir::new()?; 425 let archive = working_dir.child("dir_archive.tar"); 426 archive.assert(predicate::path::missing()); 427 428 compressor.compress( 429 CmprssInput::Path(vec![dir.path().to_path_buf()]), 430 CmprssOutput::Path(archive.path().to_path_buf()), 431 )?; 432 archive.assert(predicate::path::is_file()); 433 434 let extract_dir = working_dir.child("extracted"); 435 std::fs::create_dir_all(extract_dir.path())?; 436 compressor.extract( 437 CmprssInput::Path(vec![archive.path().to_path_buf()]), 438 CmprssOutput::Path(extract_dir.path().to_path_buf()), 439 )?; 440 441 let dir_name: PathBuf = dir.path().file_name().unwrap().into(); 442 extract_dir 443 .child(dir_name) 444 .child("file.txt") 445 .assert(predicate::path::eq_file(file_path.path())); 446 Ok(()) 447 } 448}