an efficient binary archive format
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

cleanup vacuum implementation, make it safer using a backup file, use integers and ensure they're native endian when loading

zach 741ca57e 470bea64

+90 -45
+90 -45
src/lib.rs
··· 46 46 #[repr(C, packed)] 47 47 #[derive(FromBytes, Unaligned, IntoBytes, Immutable, Clone, Copy, Debug, Default)] 48 48 pub struct Entry { 49 - pub offset: [u8; std::mem::size_of::<u64>()], // Use [u8; 8] for disk stability 50 - pub compressed_size: [u8; std::mem::size_of::<u64>()], 51 - pub uncompressed_size: [u8; std::mem::size_of::<u64>()], 52 - pub crc32: [u8; std::mem::size_of::<u32>()], 53 - pub name_len: [u8; std::mem::size_of::<u16>()], 49 + pub offset: u64, 50 + pub compressed_size: u64, 51 + pub uncompressed_size: u64, 52 + pub crc32: u32, 53 + pub name_len: u16, 54 54 pub compression_type: u8, 55 55 pub _reserved: u8, 56 56 } 57 57 58 - // Add helpers to convert back to numbers for Rust logic 58 + // The binary format uses little-endian byte order for all multi-byte integers. 59 + // These methods handle endianness conversion transparently: 60 + // - On little-endian systems (x86, ARM): zero overhead, direct access 61 + // - On big-endian systems: bytes are swapped to/from little-endian 62 + 59 63 impl Entry { 60 64 pub fn offset(&self) -> u64 { 61 - u64::from_le_bytes(self.offset) 65 + u64::from_le(self.offset) 66 + } 67 + 68 + pub fn set_offset(&mut self, value: u64) { 69 + self.offset = value.to_le(); 62 70 } 63 71 64 72 pub fn compressed_size(&self) -> u64 { 65 - u64::from_le_bytes(self.compressed_size) 73 + u64::from_le(self.compressed_size) 74 + } 75 + 76 + pub fn set_compressed_size(&mut self, value: u64) { 77 + self.compressed_size = value.to_le(); 66 78 } 67 79 68 80 pub fn uncompressed_size(&self) -> u64 { 69 - u64::from_le_bytes(self.uncompressed_size) 81 + u64::from_le(self.uncompressed_size) 82 + } 83 + 84 + pub fn set_uncompressed_size(&mut self, value: u64) { 85 + self.uncompressed_size = value.to_le(); 86 + } 87 + 88 + pub fn crc32(&self) -> u32 { 89 + u32::from_le(self.crc32) 90 + } 91 + 92 + pub fn set_crc32(&mut self, value: u32) { 93 + self.crc32 = value.to_le(); 70 94 } 71 95 72 96 pub fn name_len(&self) -> usize { 73 - u16::from_le_bytes(self.name_len) as usize 97 + u16::from_le(self.name_len) as usize 98 + } 99 + 100 + pub fn set_name_len(&mut self, value: u16) { 101 + self.name_len = value.to_le(); 74 102 } 75 103 76 104 pub fn compression_type(&self) -> Compress { ··· 80 108 _ => Compress::default(), 81 109 } 82 110 } 83 - 84 - pub fn crc32(&self) -> u32 { 85 - u32::from_le_bytes(self.crc32) 86 - } 87 111 } 88 112 89 113 #[repr(C, packed)] ··· 150 174 if computed_crc != self.expected_crc32 { 151 175 return Err(io::Error::new( 152 176 io::ErrorKind::InvalidData, 153 - format!("CRC32 mismatch: expected {:x}, got {:x}", self.expected_crc32, computed_crc), 177 + format!( 178 + "CRC32 mismatch: expected {:x}, got {:x}", 179 + self.expected_crc32, computed_crc 180 + ), 154 181 )); 155 182 } 156 183 Ok(()) ··· 227 254 228 255 self.bindle.data_end = current_pos + pad_len; 229 256 230 - let crc32 = self.crc32_hasher.clone().finalize(); 257 + let crc32_value = self.crc32_hasher.clone().finalize(); 231 258 232 - let entry = Entry { 233 - offset: self.start_offset.to_le_bytes(), 234 - compressed_size: compressed_size.to_le_bytes(), 235 - uncompressed_size: self.uncompressed_size.to_le_bytes(), 236 - crc32: crc32.to_le_bytes(), 259 + let mut entry = Entry { 237 260 compression_type, 238 - name_len: (self.name.len() as u16).to_le_bytes(), 239 261 ..Default::default() 240 262 }; 263 + entry.set_offset(self.start_offset); 264 + entry.set_compressed_size(compressed_size); 265 + entry.set_uncompressed_size(self.uncompressed_size); 266 + entry.set_crc32(crc32_value); 267 + entry.set_name_len(self.name.len() as u16); 241 268 242 269 self.bindle.index.insert(self.name.clone(), entry); 243 270 self.name.clear(); // Mark as closed ··· 417 444 } 418 445 419 446 pub fn vacuum(&mut self) -> io::Result<()> { 420 - let tmp_path = self.path.with_extension("tmp"); 447 + let backup_path = self.path.with_extension("backup"); 421 448 422 - // Create and populate the temporary file 423 - { 449 + // Release locks and close current file 450 + drop(self.mmap.take()); 451 + let _ = self.file.unlock(); 452 + 453 + // Rename original to backup 454 + std::fs::rename(&self.path, &backup_path)?; 455 + 456 + // Open backup for reading 457 + let mut backup_file = File::open(&backup_path)?; 458 + 459 + // Create new file at original path 460 + let result = { 424 461 let mut new_file = OpenOptions::new() 425 462 .write(true) 463 + .read(true) 426 464 .create(true) 427 465 .truncate(true) 428 - .open(&tmp_path)?; 466 + .open(&self.path)?; 429 467 430 468 new_file.write_all(BNDL_MAGIC)?; 431 469 let mut current_offset = HEADER_SIZE as u64; 432 470 433 - // Copy only live entries to the new file 471 + // Copy only live entries from backup to new file 434 472 for entry in self.index.values_mut() { 435 473 let mut buf = vec![0u8; entry.compressed_size() as usize]; 436 - self.file.seek(SeekFrom::Start(entry.offset()))?; 437 - self.file.read_exact(&mut buf)?; 474 + backup_file.seek(SeekFrom::Start(entry.offset()))?; 475 + backup_file.read_exact(&mut buf)?; 438 476 439 - new_file.seek(SeekFrom::Start(current_offset as u64))?; 477 + new_file.seek(SeekFrom::Start(current_offset))?; 440 478 new_file.write_all(&buf)?; 441 479 442 - entry.offset = current_offset.to_le_bytes(); 480 + entry.set_offset(current_offset); 443 481 let pad = pad::<8, u64>(entry.compressed_size()); 444 482 if pad > 0 { 445 483 new_file.write_all(&vec![0u8; pad as usize])?; ··· 447 485 current_offset += entry.compressed_size() + pad; 448 486 } 449 487 450 - // Write the index and footer to the TEMP file before closing it 488 + // Write the index and footer 451 489 let index_start = current_offset; 452 490 for (name, entry) in &self.index { 453 491 new_file.write_all(entry.as_bytes())?; ··· 465 503 }; 466 504 new_file.write_all(footer.as_bytes())?; 467 505 new_file.sync_all()?; 468 - // new_file is closed here when it goes out of scope 469 - } 470 506 471 - // Release ALL handles to the original file 472 - drop(self.mmap.take()); 473 - let _ = self.file.unlock(); 507 + Ok(()) 508 + }; 474 509 475 - // Re-open self.file in a way that allows us to drop it immediately 476 - let old_file = std::mem::replace(&mut self.file, File::open(&tmp_path)?); 477 - drop(old_file); 510 + // Handle result 511 + match result { 512 + Ok(()) => { 513 + // Success - delete backup 514 + std::fs::remove_file(&backup_path).ok(); 515 + } 516 + Err(e) => { 517 + // Failure - restore from backup 518 + std::fs::remove_file(&self.path).ok(); 519 + std::fs::rename(&backup_path, &self.path).ok(); 520 + return Err(e); 521 + } 522 + } 478 523 479 - // Perform the atomic rename while no handles point to the original path 480 - std::fs::rename(&tmp_path, &self.path)?; 481 - 482 - // Re-establish the state for the Bindle struct 524 + // Re-open the new file 483 525 let file = OpenOptions::new().read(true).write(true).open(&self.path)?; 484 526 file.lock_shared()?; 485 527 let mmap = unsafe { Mmap::map(&file)? }; ··· 503 545 entry.offset() as usize..(entry.offset() + entry.compressed_size()) as usize, 504 546 )?; 505 547 let mut out = Vec::with_capacity(entry.uncompressed_size() as usize); 506 - zstd::Decoder::new(compressed_data).ok()?.read_to_end(&mut out).ok()?; 548 + zstd::Decoder::new(compressed_data) 549 + .ok()? 550 + .read_to_end(&mut out) 551 + .ok()?; 507 552 Cow::Owned(out) 508 553 } else { 509 554 let uncompressed_data = mmap.get(