an efficient binary archive format
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

more cleanup

zach 2b8687d6 741ca57e

+779 -700
+3
.gitignore
··· 5 5 c/test 6 6 c/input.txt 7 7 c/test_dir* 8 + *.dylib 9 + *.so 10 + *.a
+7 -6
include/bindle.h
··· 12 12 #include <stdint.h> 13 13 #include <stdbool.h> 14 14 15 - typedef enum BindleCompress { 15 + enum BindleCompress { 16 16 BindleCompressNone = 0, 17 17 BindleCompressZstd = 1, 18 18 BindleCompressAuto = 2, 19 - } BindleCompress; 19 + }; 20 + typedef uint8_t BindleCompress; 20 21 21 22 typedef struct Bindle Bindle; 22 23 ··· 37 38 const char *name, 38 39 const uint8_t *data, 39 40 size_t data_len, 40 - enum BindleCompress compress); 41 + BindleCompress compress); 41 42 42 43 /** 43 44 * Adds a new entry, the name should be NUL terminated, will the data can contain NUL characters since the length ··· 46 47 bool bindle_add_file(struct Bindle *ctx, 47 48 const char *name, 48 49 const char *path, 49 - enum BindleCompress compress); 50 + BindleCompress compress); 50 51 51 52 /** 52 53 * Save any changed to disk ··· 94 95 95 96 bool bindle_unpack(struct Bindle *ctx, const char *dest_path); 96 97 97 - bool bindle_pack(struct Bindle *ctx, const char *src_path, enum BindleCompress compress); 98 + bool bindle_pack(struct Bindle *ctx, const char *src_path, BindleCompress compress); 98 99 99 100 bool bindle_exists(const struct Bindle *ctx, const char *name); 100 101 ··· 111 112 */ 112 113 struct BindleWriter *bindle_writer_new(struct Bindle *ctx, 113 114 const char *name, 114 - enum BindleCompress compress); 115 + BindleCompress compress); 115 116 116 117 bool bindle_writer_write(struct BindleWriter *stream, const uint8_t *data, size_t len); 117 118
+461
src/bindle.rs
··· 1 + use crc32fast::Hasher; 2 + use fs2::FileExt; 3 + use memmap2::Mmap; 4 + use std::borrow::Cow; 5 + use std::collections::BTreeMap; 6 + use std::fs::{File, OpenOptions}; 7 + use std::io::{self, Read, Seek, SeekFrom, Write}; 8 + use std::path::{Path, PathBuf}; 9 + use zerocopy::{FromBytes, IntoBytes}; 10 + 11 + use crate::compress::Compress; 12 + use crate::entry::{Entry, Footer}; 13 + use crate::reader::{Either, Reader}; 14 + use crate::writer::Writer; 15 + use crate::{ 16 + pad, write_padding, AUTO_COMPRESS_THRESHOLD, BNDL_ALIGN, BNDL_MAGIC, ENTRY_SIZE, FOOTER_MAGIC, 17 + FOOTER_SIZE, HEADER_SIZE, 18 + }; 19 + 20 + pub struct Bindle { 21 + pub(crate) path: PathBuf, 22 + pub(crate) file: File, 23 + pub(crate) mmap: Option<Mmap>, 24 + pub(crate) index: BTreeMap<String, Entry>, 25 + pub(crate) data_end: u64, 26 + } 27 + 28 + impl Bindle { 29 + /// Create a new bindle file, this will overwrite the existing file 30 + pub fn create<P: AsRef<Path>>(path: P) -> io::Result<Self> { 31 + let path_buf = path.as_ref().to_path_buf(); 32 + let opts = OpenOptions::new() 33 + .truncate(true) 34 + .read(true) 35 + .write(true) 36 + .create(true) 37 + .to_owned(); 38 + Self::new(path_buf, opts) 39 + } 40 + 41 + /// Open or create a bindle file 42 + pub fn open<P: AsRef<Path>>(path: P) -> io::Result<Self> { 43 + let path_buf = path.as_ref().to_path_buf(); 44 + let opts = OpenOptions::new() 45 + .read(true) 46 + .write(true) 47 + .create(true) 48 + .to_owned(); 49 + Self::new(path_buf, opts) 50 + } 51 + 52 + /// Open a bindle file, this will not create it if it doesn't exist 53 + pub fn load<P: AsRef<Path>>(path: P) -> io::Result<Self> { 54 + let path_buf = path.as_ref().to_path_buf(); 55 + let opts = OpenOptions::new().read(true).write(true).to_owned(); 56 + Self::new(path_buf, opts) 57 + } 58 + 59 + /// Create a new `Bindle` from a path and file, the path must match the file 60 + pub fn new(path: PathBuf, opts: OpenOptions) -> io::Result<Self> { 61 + let mut file = opts.open(&path)?; 62 + file.lock_shared()?; 63 + let len = file.metadata()?.len(); 64 + 65 + // Handle completely new/empty files 66 + if len == 0 { 67 + file.write_all(BNDL_MAGIC)?; 68 + return Ok(Self { 69 + path, 70 + file, 71 + mmap: None, 72 + index: BTreeMap::new(), 73 + data_end: HEADER_SIZE as u64, 74 + }); 75 + } 76 + 77 + // Safety check: File must be at least HEADER + FOOTER size (24 bytes) 78 + // This prevents "attempt to subtract with overflow" when calculating footer_pos 79 + if len < (HEADER_SIZE + FOOTER_SIZE) as u64 { 80 + return Err(io::Error::new( 81 + io::ErrorKind::InvalidData, 82 + "File too small to be a valid bindle", 83 + )); 84 + } 85 + 86 + let mut header = [0u8; 8]; 87 + file.read_exact(&mut header)?; 88 + if &header != BNDL_MAGIC { 89 + return Err(io::Error::new(io::ErrorKind::InvalidData, "Invalid header")); 90 + } 91 + 92 + let m = unsafe { Mmap::map(&file)? }; 93 + 94 + // Calculate footer position. Subtraction is now safe due to the check above. 95 + let footer_pos = m.len() - FOOTER_SIZE; 96 + let footer = Footer::read_from_bytes(&m[footer_pos..]).map_err(|_| { 97 + io::Error::new(io::ErrorKind::InvalidData, "Failed to read footer") 98 + })?; 99 + 100 + if footer.magic() != FOOTER_MAGIC { 101 + return Err(io::Error::new( 102 + io::ErrorKind::InvalidData, 103 + "Invalid footer, the file may be corrupt", 104 + )); 105 + } 106 + 107 + let data_end = footer.index_offset(); 108 + let count = footer.entry_count(); 109 + let mut index = BTreeMap::new(); 110 + 111 + let mut cursor = data_end as usize; 112 + for _ in 0..count { 113 + // Ensure there is enough data left for an Entry header 114 + if cursor + ENTRY_SIZE > footer_pos { 115 + break; 116 + } 117 + 118 + let entry = match Entry::read_from_bytes(&m[cursor..cursor + ENTRY_SIZE]) { 119 + Ok(e) => e, 120 + Err(_) => break, // Corrupted entry, stop reading 121 + }; 122 + let n_start = cursor + ENTRY_SIZE; 123 + 124 + // Validate that the filename exists within the mapped bounds 125 + if n_start + entry.name_len() > footer_pos { 126 + break; 127 + } 128 + 129 + let name = 130 + String::from_utf8_lossy(&m[n_start..n_start + entry.name_len()]).into_owned(); 131 + index.insert(name, entry); 132 + 133 + let total = ENTRY_SIZE + entry.name_len(); 134 + cursor += (total + (BNDL_ALIGN - 1)) & !(BNDL_ALIGN - 1); 135 + } 136 + 137 + Ok(Self { 138 + path, 139 + file, 140 + mmap: Some(m), 141 + index, 142 + data_end, 143 + }) 144 + } 145 + 146 + fn should_auto_compress(&self, compress: Compress, len: usize) -> bool { 147 + compress == Compress::Zstd || (compress == Compress::Auto && len > AUTO_COMPRESS_THRESHOLD) 148 + } 149 + 150 + pub fn add(&mut self, name: &str, data: &[u8], compress: Compress) -> io::Result<()> { 151 + let mut stream = self.writer(name, compress)?; 152 + stream.write_all(data)?; 153 + stream.close()?; 154 + Ok(()) 155 + } 156 + 157 + pub fn add_file( 158 + &mut self, 159 + name: &str, 160 + path: impl AsRef<Path>, 161 + compress: Compress, 162 + ) -> io::Result<()> { 163 + let mut stream = self.writer(name, compress)?; 164 + let mut src = std::fs::File::open(path)?; 165 + std::io::copy(&mut src, &mut stream)?; 166 + Ok(()) 167 + } 168 + 169 + pub fn save(&mut self) -> io::Result<()> { 170 + self.file.lock_exclusive()?; 171 + self.file.seek(SeekFrom::Start(self.data_end))?; 172 + let index_start = self.data_end; 173 + 174 + for (name, entry) in &self.index { 175 + self.file.write_all(entry.as_bytes())?; 176 + self.file.write_all(name.as_bytes())?; 177 + let pad = pad::<BNDL_ALIGN, usize>(ENTRY_SIZE + name.len()); 178 + if pad > 0 { 179 + write_padding(&mut self.file, pad)?; 180 + } 181 + } 182 + 183 + let footer = Footer::new(index_start, self.index.len() as u32, FOOTER_MAGIC); 184 + self.file.write_all(footer.as_bytes())?; 185 + 186 + // Truncate file to current position to remove any old data 187 + let current_pos = self.file.stream_position()?; 188 + self.file.set_len(current_pos)?; 189 + self.file.flush()?; 190 + 191 + self.mmap = Some(unsafe { Mmap::map(&self.file)? }); 192 + self.file.lock_shared()?; 193 + Ok(()) 194 + } 195 + 196 + pub fn vacuum(&mut self) -> io::Result<()> { 197 + let backup_path = self.path.with_extension("backup"); 198 + 199 + // Release locks and close current file 200 + drop(self.mmap.take()); 201 + let _ = self.file.unlock(); 202 + 203 + // Rename original to backup 204 + std::fs::rename(&self.path, &backup_path)?; 205 + 206 + // Open backup for reading 207 + let mut backup_file = File::open(&backup_path)?; 208 + 209 + // Create new file at original path 210 + let result = { 211 + let mut new_file = OpenOptions::new() 212 + .write(true) 213 + .read(true) 214 + .create(true) 215 + .truncate(true) 216 + .open(&self.path)?; 217 + 218 + new_file.write_all(BNDL_MAGIC)?; 219 + let mut current_offset = HEADER_SIZE as u64; 220 + 221 + // Copy only live entries from backup to new file 222 + for entry in self.index.values_mut() { 223 + let mut buf = vec![0u8; entry.compressed_size() as usize]; 224 + backup_file.seek(SeekFrom::Start(entry.offset()))?; 225 + backup_file.read_exact(&mut buf)?; 226 + 227 + new_file.seek(SeekFrom::Start(current_offset))?; 228 + new_file.write_all(&buf)?; 229 + 230 + entry.set_offset(current_offset); 231 + let pad = pad::<8, u64>(entry.compressed_size()); 232 + if pad > 0 { 233 + write_padding(&mut new_file, pad as usize)?; 234 + } 235 + current_offset += entry.compressed_size() + pad; 236 + } 237 + 238 + // Write the index and footer 239 + let index_start = current_offset; 240 + for (name, entry) in &self.index { 241 + new_file.write_all(entry.as_bytes())?; 242 + new_file.write_all(name.as_bytes())?; 243 + let pad = pad::<BNDL_ALIGN, usize>(ENTRY_SIZE + name.len()); 244 + if pad > 0 { 245 + write_padding(&mut new_file, pad)?; 246 + } 247 + } 248 + 249 + let footer = Footer::new(index_start, self.index.len() as u32, FOOTER_MAGIC); 250 + new_file.write_all(footer.as_bytes())?; 251 + new_file.sync_all()?; 252 + 253 + Ok(()) 254 + }; 255 + 256 + // Handle result 257 + match result { 258 + Ok(()) => { 259 + // Success - delete backup 260 + std::fs::remove_file(&backup_path).ok(); 261 + } 262 + Err(e) => { 263 + // Failure - restore from backup 264 + std::fs::remove_file(&self.path).ok(); 265 + std::fs::rename(&backup_path, &self.path).ok(); 266 + return Err(e); 267 + } 268 + } 269 + 270 + // Re-open the new file 271 + let file = OpenOptions::new().read(true).write(true).open(&self.path)?; 272 + file.lock_shared()?; 273 + let mmap = unsafe { Mmap::map(&file)? }; 274 + 275 + let footer_pos = mmap.len() - FOOTER_SIZE; 276 + let footer = Footer::read_from_bytes(&mmap[footer_pos..]).map_err(|_| { 277 + io::Error::new(io::ErrorKind::InvalidData, "Failed to read footer after vacuum") 278 + })?; 279 + 280 + self.file = file; 281 + self.mmap = Some(mmap); 282 + self.data_end = footer.index_offset(); 283 + 284 + Ok(()) 285 + } 286 + 287 + pub fn read<'a>(&'a self, name: &str) -> Option<Cow<'a, [u8]>> { 288 + let entry = self.index.get(name)?; 289 + let mmap = self.mmap.as_ref()?; 290 + 291 + let data = if entry.compression_type() == Compress::Zstd { 292 + let compressed_data = mmap.get( 293 + entry.offset() as usize..(entry.offset() + entry.compressed_size()) as usize, 294 + )?; 295 + let mut out = Vec::with_capacity(entry.uncompressed_size() as usize); 296 + zstd::Decoder::new(compressed_data) 297 + .ok()? 298 + .read_to_end(&mut out) 299 + .ok()?; 300 + Cow::Owned(out) 301 + } else { 302 + let uncompressed_data = mmap.get( 303 + entry.offset() as usize..(entry.offset() + entry.uncompressed_size()) as usize, 304 + )?; 305 + Cow::Borrowed(uncompressed_data) 306 + }; 307 + 308 + // Verify CRC32 309 + let computed_crc = crc32fast::hash(&data); 310 + if computed_crc != entry.crc32() { 311 + return None; 312 + } 313 + 314 + Some(data) 315 + } 316 + 317 + /// Read to an `std::io::Write` 318 + pub fn read_to<W: std::io::Write>(&self, name: &str, mut w: W) -> std::io::Result<u64> { 319 + let mut reader = self.reader(name)?; 320 + let bytes_copied = std::io::copy(&mut reader, &mut w)?; 321 + reader.verify_crc32()?; 322 + Ok(bytes_copied) 323 + } 324 + 325 + // Returns a seekable reader for an entry. 326 + /// If compressed, it provides a transparently decompressing stream. 327 + pub fn reader<'a>(&'a self, name: &str) -> io::Result<Reader<'a>> { 328 + let entry = self 329 + .index 330 + .get(name) 331 + .ok_or_else(|| io::Error::new(io::ErrorKind::NotFound, "Entry not found"))?; 332 + 333 + let start = entry.offset() as usize; 334 + let end = start + entry.compressed_size() as usize; 335 + let mmap = self 336 + .mmap 337 + .as_ref() 338 + .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "Missing mmap"))?; 339 + let data_slice = &mmap[start..end]; 340 + 341 + let cursor = io::Cursor::new(data_slice); 342 + 343 + if entry.compression_type() == Compress::Zstd { 344 + // Zstd streaming decoder 345 + let decoder = zstd::Decoder::new(cursor)?; 346 + Ok(Reader { 347 + decoder: Either::Left(decoder), 348 + crc32_hasher: Hasher::new(), 349 + expected_crc32: entry.crc32(), 350 + }) 351 + } else { 352 + Ok(Reader { 353 + decoder: Either::Right(cursor), 354 + crc32_hasher: Hasher::new(), 355 + expected_crc32: entry.crc32(), 356 + }) 357 + } 358 + } 359 + 360 + /// The number of entries 361 + pub fn len(&self) -> usize { 362 + self.index.len() 363 + } 364 + 365 + /// Returns true if there are no entries 366 + pub fn is_empty(&self) -> bool { 367 + self.index.is_empty() 368 + } 369 + 370 + /// Direct readonly access to the index 371 + pub fn index(&self) -> &BTreeMap<String, Entry> { 372 + &self.index 373 + } 374 + 375 + /// Clear all entries 376 + pub fn clear(&mut self) { 377 + self.index.clear() 378 + } 379 + 380 + /// Checks if an entry exists in the archive index. 381 + pub fn exists(&self, name: &str) -> bool { 382 + self.index.contains_key(name) 383 + } 384 + 385 + /// Remove an entry from the index. 386 + /// The data remains in the file until vacuum() is called. 387 + /// Returns true if the entry existed and was removed. 388 + pub fn remove(&mut self, name: &str) -> bool { 389 + self.index.remove(name).is_some() 390 + } 391 + 392 + /// Recursively packs a directory into the archive. 393 + pub fn pack<P: AsRef<Path>>(&mut self, src_dir: P, compress: Compress) -> io::Result<()> { 394 + self.pack_recursive(src_dir.as_ref(), src_dir.as_ref(), compress) 395 + } 396 + 397 + fn pack_recursive( 398 + &mut self, 399 + base: &Path, 400 + current: &Path, 401 + compress: Compress, 402 + ) -> io::Result<()> { 403 + if current.is_dir() { 404 + for entry in std::fs::read_dir(current)? { 405 + self.pack_recursive(base, &entry?.path(), compress)?; 406 + } 407 + } else { 408 + let name = current 409 + .strip_prefix(base) 410 + .map_err(|e| io::Error::new(io::ErrorKind::Other, e))? 411 + .to_string_lossy(); 412 + let mut data = Vec::new(); 413 + File::open(current)?.read_to_end(&mut data)?; 414 + self.add(&name, &data, compress)?; 415 + } 416 + Ok(()) 417 + } 418 + 419 + /// Unpacks all archive entries to a destination directory. 420 + pub fn unpack<P: AsRef<Path>>(&self, dest: P) -> io::Result<()> { 421 + let dest_path = dest.as_ref(); 422 + if let Some(parent) = dest_path.parent() { 423 + std::fs::create_dir_all(parent)?; 424 + } 425 + for (name, _) in &self.index { 426 + if let Some(data) = self.read(name) { 427 + let file_path = dest_path.join(name); 428 + if let Some(parent) = file_path.parent() { 429 + std::fs::create_dir_all(parent)?; 430 + } 431 + std::fs::write(file_path, data)?; 432 + } 433 + } 434 + Ok(()) 435 + } 436 + 437 + pub fn writer<'a>(&'a mut self, name: &str, compress: Compress) -> io::Result<Writer<'a>> { 438 + self.file.seek(SeekFrom::Start(self.data_end))?; 439 + let compress = self.should_auto_compress(compress, 0); 440 + let f = self.file.try_clone()?; 441 + let start_offset = self.data_end; 442 + Ok(Writer { 443 + name: name.to_string(), 444 + bindle: self, 445 + encoder: if compress { 446 + Some(zstd::Encoder::new(f, 3)?) 447 + } else { 448 + None 449 + }, 450 + start_offset, 451 + uncompressed_size: 0, 452 + crc32_hasher: Hasher::new(), 453 + }) 454 + } 455 + } 456 + 457 + impl Drop for Bindle { 458 + fn drop(&mut self) { 459 + let _ = self.file.unlock(); 460 + } 461 + }
+20
src/compress.rs
··· 1 + #[repr(u8)] 2 + #[derive(Clone, Copy, Debug, Default, PartialEq, Eq, PartialOrd, Ord)] 3 + pub enum Compress { 4 + None = 0, 5 + Zstd = 1, 6 + #[default] 7 + Auto = 2, 8 + } 9 + 10 + impl Compress { 11 + pub(crate) fn from_u8(value: u8) -> Self { 12 + match value { 13 + 0 => Compress::None, 14 + 1 => Compress::Zstd, 15 + // Invalid/unknown values default to None (safest option) 16 + // Auto is never stored on disk, only used as input policy 17 + _ => Compress::None, 18 + } 19 + } 20 + }
+96
src/entry.rs
··· 1 + use zerocopy::{FromBytes, Immutable, IntoBytes, Unaligned}; 2 + 3 + use crate::compress::Compress; 4 + 5 + #[repr(C, packed)] 6 + #[derive(FromBytes, Unaligned, IntoBytes, Immutable, Clone, Copy, Debug, Default)] 7 + pub struct Entry { 8 + offset: u64, 9 + compressed_size: u64, 10 + uncompressed_size: u64, 11 + crc32: u32, 12 + name_len: u16, 13 + pub compression_type: u8, 14 + pub _reserved: u8, 15 + } 16 + 17 + // The binary format uses little-endian byte order for all multi-byte integers. 18 + // These methods handle endianness conversion transparently: 19 + // - On little-endian systems (x86, ARM): zero overhead, direct access 20 + // - On big-endian systems: bytes are swapped to/from little-endian 21 + 22 + impl Entry { 23 + pub fn offset(&self) -> u64 { 24 + u64::from_le(self.offset) 25 + } 26 + 27 + pub fn set_offset(&mut self, value: u64) { 28 + self.offset = value.to_le(); 29 + } 30 + 31 + pub fn compressed_size(&self) -> u64 { 32 + u64::from_le(self.compressed_size) 33 + } 34 + 35 + pub fn set_compressed_size(&mut self, value: u64) { 36 + self.compressed_size = value.to_le(); 37 + } 38 + 39 + pub fn uncompressed_size(&self) -> u64 { 40 + u64::from_le(self.uncompressed_size) 41 + } 42 + 43 + pub fn set_uncompressed_size(&mut self, value: u64) { 44 + self.uncompressed_size = value.to_le(); 45 + } 46 + 47 + pub fn crc32(&self) -> u32 { 48 + u32::from_le(self.crc32) 49 + } 50 + 51 + pub fn set_crc32(&mut self, value: u32) { 52 + self.crc32 = value.to_le(); 53 + } 54 + 55 + pub fn name_len(&self) -> usize { 56 + u16::from_le(self.name_len) as usize 57 + } 58 + 59 + pub fn set_name_len(&mut self, value: u16) { 60 + self.name_len = value.to_le(); 61 + } 62 + 63 + pub fn compression_type(&self) -> Compress { 64 + Compress::from_u8(self.compression_type) 65 + } 66 + } 67 + 68 + #[repr(C, packed)] 69 + #[derive(FromBytes, Unaligned, IntoBytes, Immutable, Debug)] 70 + pub(crate) struct Footer { 71 + pub index_offset: u64, 72 + pub entry_count: u32, 73 + pub magic: u32, 74 + } 75 + 76 + impl Footer { 77 + pub fn new(index_offset: u64, entry_count: u32, magic: u32) -> Self { 78 + Self { 79 + index_offset: index_offset.to_le(), 80 + entry_count: entry_count.to_le(), 81 + magic: magic.to_le(), 82 + } 83 + } 84 + 85 + pub fn index_offset(&self) -> u64 { 86 + u64::from_le(self.index_offset) 87 + } 88 + 89 + pub fn entry_count(&self) -> u32 { 90 + u32::from_le(self.entry_count) 91 + } 92 + 93 + pub fn magic(&self) -> u32 { 94 + u32::from_le(self.magic) 95 + } 96 + }
+37 -694
src/lib.rs
··· 1 - use crc32fast::Hasher; 2 - use fs2::FileExt; 3 - use memmap2::Mmap; 4 - use std::borrow::Cow; 5 - use std::collections::BTreeMap; 6 - use std::fs::{File, OpenOptions}; 7 - use std::io::{self, BufReader, Read, Seek, SeekFrom, Write}; 8 - use std::path::{Path, PathBuf}; 9 - use zerocopy::{FromBytes, Immutable, IntoBytes, Unaligned}; 1 + use std::io::{self, Write}; 2 + 3 + // Module declarations 4 + mod bindle; 5 + mod compress; 6 + mod entry; 7 + mod reader; 8 + mod writer; 10 9 11 10 pub(crate) mod ffi; 12 11 13 - const BNDL_MAGIC: &[u8; 8] = b"BINDL001"; 14 - const BNDL_ALIGN: usize = 8; 15 - const ENTRY_SIZE: usize = std::mem::size_of::<Entry>(); 16 - const FOOTER_SIZE: usize = std::mem::size_of::<Footer>(); 17 - const HEADER_SIZE: usize = 8; 18 - const AUTO_COMPRESS_THRESHOLD: usize = 2048; 19 - const FOOTER_MAGIC: u32 = 0x62626262; 12 + // Public re-exports 13 + pub use bindle::Bindle; 14 + pub use compress::Compress; 15 + pub use entry::Entry; 16 + pub use reader::Reader; 17 + pub use writer::Writer; 18 + 19 + // Constants 20 + pub(crate) const BNDL_MAGIC: &[u8; 8] = b"BINDL001"; 21 + pub(crate) const BNDL_ALIGN: usize = 8; 22 + pub(crate) const ENTRY_SIZE: usize = std::mem::size_of::<Entry>(); 23 + pub(crate) const FOOTER_SIZE: usize = std::mem::size_of::<entry::Footer>(); 24 + pub(crate) const HEADER_SIZE: usize = 8; 25 + pub(crate) const AUTO_COMPRESS_THRESHOLD: usize = 2048; 26 + pub(crate) const FOOTER_MAGIC: u32 = 0x62626262; 27 + const ZEROS: &[u8; 64] = &[0u8; 64]; // Reusable zero buffer for padding 20 28 21 - fn pad< 29 + // Helper functions 30 + pub(crate) fn pad< 22 31 const SIZE: usize, 23 32 T: Copy + TryFrom<usize> + std::ops::Sub<T, Output = T> + std::ops::Rem<T, Output = T>, 24 33 >( ··· 34 43 unreachable!() 35 44 } 36 45 37 - #[repr(C)] 38 - #[derive(Clone, Copy, Debug, Default, PartialEq, Eq, PartialOrd, Ord)] 39 - pub enum Compress { 40 - None = 0, 41 - Zstd = 1, 42 - #[default] 43 - Auto = 2, 44 - } 45 - 46 - #[repr(C, packed)] 47 - #[derive(FromBytes, Unaligned, IntoBytes, Immutable, Clone, Copy, Debug, Default)] 48 - pub struct Entry { 49 - pub offset: u64, 50 - pub compressed_size: u64, 51 - pub uncompressed_size: u64, 52 - pub crc32: u32, 53 - pub name_len: u16, 54 - pub compression_type: u8, 55 - pub _reserved: u8, 56 - } 57 - 58 - // The binary format uses little-endian byte order for all multi-byte integers. 59 - // These methods handle endianness conversion transparently: 60 - // - On little-endian systems (x86, ARM): zero overhead, direct access 61 - // - On big-endian systems: bytes are swapped to/from little-endian 62 - 63 - impl Entry { 64 - pub fn offset(&self) -> u64 { 65 - u64::from_le(self.offset) 66 - } 67 - 68 - pub fn set_offset(&mut self, value: u64) { 69 - self.offset = value.to_le(); 70 - } 71 - 72 - pub fn compressed_size(&self) -> u64 { 73 - u64::from_le(self.compressed_size) 74 - } 75 - 76 - pub fn set_compressed_size(&mut self, value: u64) { 77 - self.compressed_size = value.to_le(); 78 - } 79 - 80 - pub fn uncompressed_size(&self) -> u64 { 81 - u64::from_le(self.uncompressed_size) 82 - } 83 - 84 - pub fn set_uncompressed_size(&mut self, value: u64) { 85 - self.uncompressed_size = value.to_le(); 86 - } 87 - 88 - pub fn crc32(&self) -> u32 { 89 - u32::from_le(self.crc32) 90 - } 91 - 92 - pub fn set_crc32(&mut self, value: u32) { 93 - self.crc32 = value.to_le(); 94 - } 95 - 96 - pub fn name_len(&self) -> usize { 97 - u16::from_le(self.name_len) as usize 98 - } 99 - 100 - pub fn set_name_len(&mut self, value: u16) { 101 - self.name_len = value.to_le(); 102 - } 103 - 104 - pub fn compression_type(&self) -> Compress { 105 - match self.compression_type { 106 - 0 => Compress::None, 107 - 1 => Compress::Zstd, 108 - _ => Compress::default(), 109 - } 110 - } 111 - } 112 - 113 - #[repr(C, packed)] 114 - #[derive(FromBytes, Unaligned, IntoBytes, Immutable, Debug)] 115 - struct Footer { 116 - pub index_offset: u64, 117 - pub entry_count: u32, 118 - pub magic: u32, 119 - } 120 - 121 - pub struct Bindle { 122 - path: PathBuf, 123 - file: File, 124 - mmap: Option<Mmap>, 125 - index: BTreeMap<String, Entry>, 126 - data_end: u64, 127 - } 128 - 129 - pub enum Either<A, B> { 130 - Left(A), 131 - Right(B), 132 - } 133 - 134 - pub struct Reader<'a> { 135 - decoder: Either<zstd::Decoder<'static, BufReader<io::Cursor<&'a [u8]>>>, io::Cursor<&'a [u8]>>, 136 - crc32_hasher: Hasher, 137 - expected_crc32: u32, 138 - } 139 - 140 - impl<'a> Read for Reader<'a> { 141 - fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> { 142 - let n = match &mut self.decoder { 143 - Either::Left(x) => x.read(buf)?, 144 - Either::Right(x) => x.read(buf)?, 145 - }; 146 - 147 - if n > 0 { 148 - self.crc32_hasher.update(&buf[..n]); 149 - } 150 - 151 - Ok(n) 152 - } 153 - } 154 - 155 - // Note: Seeking is only supported for uncompressed entries in this simple implementation. 156 - // Seeking in compressed streams requires a frame-aware decoder. 157 - impl<'a> Seek for Reader<'a> { 158 - fn seek(&mut self, pos: SeekFrom) -> io::Result<u64> { 159 - match &mut self.decoder { 160 - Either::Left(_) => Err(io::Error::new( 161 - io::ErrorKind::Unsupported, 162 - "Seeking not supported on compressed streams", 163 - )), 164 - Either::Right(x) => x.seek(pos), 165 - } 166 - } 167 - } 168 - 169 - impl<'a> Reader<'a> { 170 - /// Verify the CRC32 of the data read so far. 171 - /// This should be called after all data has been read to ensure data integrity. 172 - pub fn verify_crc32(&self) -> io::Result<()> { 173 - let computed_crc = self.crc32_hasher.clone().finalize(); 174 - if computed_crc != self.expected_crc32 { 175 - return Err(io::Error::new( 176 - io::ErrorKind::InvalidData, 177 - format!( 178 - "CRC32 mismatch: expected {:x}, got {:x}", 179 - self.expected_crc32, computed_crc 180 - ), 181 - )); 182 - } 183 - Ok(()) 184 - } 185 - } 186 - 187 - pub struct Writer<'a> { 188 - pub(crate) bindle: &'a mut Bindle, 189 - pub(crate) encoder: Option<zstd::Encoder<'a, std::fs::File>>, 190 - pub(crate) name: String, 191 - pub(crate) start_offset: u64, 192 - pub(crate) uncompressed_size: u64, 193 - pub(crate) crc32_hasher: Hasher, 194 - } 195 - 196 - impl<'a> Drop for Writer<'a> { 197 - fn drop(&mut self) { 198 - let _ = self.close_drop(); 199 - } 200 - } 201 - 202 - impl<'a> std::io::Write for Writer<'a> { 203 - fn write(&mut self, buf: &[u8]) -> io::Result<usize> { 204 - self.write_chunk(buf)?; 205 - Ok(buf.len()) 46 + // Helper to write padding zeros without allocating 47 + pub(crate) fn write_padding<W: Write>(writer: &mut W, len: usize) -> io::Result<()> { 48 + let mut remaining = len; 49 + while remaining > 0 { 50 + let chunk = remaining.min(ZEROS.len()); 51 + writer.write_all(&ZEROS[..chunk])?; 52 + remaining -= chunk; 206 53 } 207 - 208 - fn flush(&mut self) -> io::Result<()> { 209 - Ok(()) 210 - } 211 - } 212 - 213 - impl<'a> Writer<'a> { 214 - pub fn write_chunk(&mut self, data: &[u8]) -> io::Result<()> { 215 - if self.name.is_empty() { 216 - return Err(std::io::Error::new(std::io::ErrorKind::Other, "closed")); 217 - } 218 - 219 - self.uncompressed_size += data.len() as u64; 220 - self.crc32_hasher.update(data); 221 - 222 - if let Some(encoder) = &mut self.encoder { 223 - encoder.write_all(data)?; 224 - } else { 225 - self.bindle.file.write_all(data)?; 226 - } 227 - 228 - Ok(()) 229 - } 230 - 231 - fn close_drop(&mut self) -> io::Result<()> { 232 - if self.name.is_empty() { 233 - return Ok(()); 234 - } 235 - 236 - let (compression_type, current_pos) = if let Some(encoder) = self.encoder.take() { 237 - let mut f = encoder.finish()?; 238 - let pos = f.stream_position()?; 239 - // Sync the main file handle to match the encoder's position 240 - self.bindle.file.seek(SeekFrom::Start(pos))?; 241 - (1, pos) 242 - } else { 243 - let pos = self.bindle.file.stream_position()?; 244 - (0, pos) 245 - }; 246 - 247 - let compressed_size = current_pos - self.start_offset; 248 - 249 - // Handle 8-byte alignment padding 250 - let pad_len = pad::<8, u64>(current_pos); 251 - if pad_len > 0 { 252 - self.bindle.file.write_all(&vec![0u8; pad_len as usize])?; 253 - } 254 - 255 - self.bindle.data_end = current_pos + pad_len; 256 - 257 - let crc32_value = self.crc32_hasher.clone().finalize(); 258 - 259 - let mut entry = Entry { 260 - compression_type, 261 - ..Default::default() 262 - }; 263 - entry.set_offset(self.start_offset); 264 - entry.set_compressed_size(compressed_size); 265 - entry.set_uncompressed_size(self.uncompressed_size); 266 - entry.set_crc32(crc32_value); 267 - entry.set_name_len(self.name.len() as u16); 268 - 269 - self.bindle.index.insert(self.name.clone(), entry); 270 - self.name.clear(); // Mark as closed 271 - Ok(()) 272 - } 273 - 274 - pub fn close(mut self) -> io::Result<()> { 275 - self.close_drop() 276 - } 277 - } 278 - 279 - impl Bindle { 280 - /// Create a new bindle file, this will overwrite the existing file 281 - pub fn create<P: AsRef<Path>>(path: P) -> io::Result<Self> { 282 - let path_buf = path.as_ref().to_path_buf(); 283 - let opts = OpenOptions::new() 284 - .truncate(true) 285 - .read(true) 286 - .write(true) 287 - .create(true) 288 - .to_owned(); 289 - Self::new(path_buf, opts) 290 - } 291 - 292 - /// Open or create a bindle file 293 - pub fn open<P: AsRef<Path>>(path: P) -> io::Result<Self> { 294 - let path_buf = path.as_ref().to_path_buf(); 295 - let opts = OpenOptions::new() 296 - .read(true) 297 - .write(true) 298 - .create(true) 299 - .to_owned(); 300 - Self::new(path_buf, opts) 301 - } 302 - 303 - /// Open a bindle file, this will not create it if it doesn't exist 304 - pub fn load<P: AsRef<Path>>(path: P) -> io::Result<Self> { 305 - let path_buf = path.as_ref().to_path_buf(); 306 - let opts = OpenOptions::new().read(true).write(true).to_owned(); 307 - Self::new(path_buf, opts) 308 - } 309 - 310 - /// Create a new `Bindle` from a path and file, the path must match the file 311 - pub fn new(path: PathBuf, opts: OpenOptions) -> io::Result<Self> { 312 - let mut file = opts.open(&path)?; 313 - file.lock_shared()?; 314 - let len = file.metadata()?.len(); 315 - 316 - // Handle completely new/empty files 317 - if len == 0 { 318 - file.write_all(BNDL_MAGIC)?; 319 - return Ok(Self { 320 - path, 321 - file, 322 - mmap: None, 323 - index: BTreeMap::new(), 324 - data_end: HEADER_SIZE as u64, 325 - }); 326 - } 327 - 328 - // Safety check: File must be at least HEADER + FOOTER size (24 bytes) 329 - // This prevents "attempt to subtract with overflow" when calculating footer_pos 330 - if len < (HEADER_SIZE + FOOTER_SIZE) as u64 { 331 - return Err(io::Error::new( 332 - io::ErrorKind::InvalidData, 333 - "File too small to be a valid bindle", 334 - )); 335 - } 336 - 337 - let mut header = [0u8; 8]; 338 - file.read_exact(&mut header)?; 339 - if &header != BNDL_MAGIC { 340 - return Err(io::Error::new(io::ErrorKind::InvalidData, "Invalid header")); 341 - } 342 - 343 - let m = unsafe { Mmap::map(&file)? }; 344 - 345 - // Calculate footer position. Subtraction is now safe due to the check above. 346 - let footer_pos = m.len() - FOOTER_SIZE; 347 - let footer = Footer::read_from_bytes(&m[footer_pos..]).unwrap(); 348 - 349 - if footer.magic != FOOTER_MAGIC { 350 - return Err(io::Error::new( 351 - io::ErrorKind::InvalidData, 352 - "Invalid footer, the file may be corrupt", 353 - )); 354 - } 355 - 356 - let data_end = footer.index_offset; 357 - let count = footer.entry_count; 358 - let mut index = BTreeMap::new(); 359 - 360 - let mut cursor = data_end as usize; 361 - for _ in 0..count { 362 - // Ensure there is enough data left for an Entry header 363 - if cursor + ENTRY_SIZE > footer_pos { 364 - break; 365 - } 366 - 367 - let entry = Entry::read_from_bytes(&m[cursor..cursor + ENTRY_SIZE]).unwrap(); 368 - let n_start = cursor + ENTRY_SIZE; 369 - 370 - // Validate that the filename exists within the mapped bounds 371 - if n_start + entry.name_len() > footer_pos { 372 - break; 373 - } 374 - 375 - let name = 376 - String::from_utf8_lossy(&m[n_start..n_start + entry.name_len()]).into_owned(); 377 - index.insert(name, entry); 378 - 379 - let total = ENTRY_SIZE + entry.name_len(); 380 - cursor += (total + (BNDL_ALIGN - 1)) & !(BNDL_ALIGN - 1); 381 - } 382 - 383 - Ok(Self { 384 - path, 385 - file, 386 - mmap: Some(m), 387 - index, 388 - data_end, 389 - }) 390 - } 391 - 392 - fn should_auto_compress(&self, compress: Compress, len: usize) -> bool { 393 - compress == Compress::Zstd || (compress == Compress::Auto && len > AUTO_COMPRESS_THRESHOLD) 394 - } 395 - 396 - pub fn add(&mut self, name: &str, data: &[u8], compress: Compress) -> io::Result<()> { 397 - let mut stream = self.writer(name, compress)?; 398 - stream.write_all(data)?; 399 - stream.close()?; 400 - Ok(()) 401 - } 402 - 403 - pub fn add_file( 404 - &mut self, 405 - name: &str, 406 - path: impl AsRef<Path>, 407 - compress: Compress, 408 - ) -> io::Result<()> { 409 - let mut stream = self.writer(name, compress)?; 410 - let mut src = std::fs::File::open(path)?; 411 - std::io::copy(&mut src, &mut stream)?; 412 - Ok(()) 413 - } 414 - 415 - pub fn save(&mut self) -> io::Result<()> { 416 - self.file.lock_exclusive()?; 417 - self.file.seek(SeekFrom::Start(self.data_end))?; 418 - let index_start = self.data_end; 419 - 420 - for (name, entry) in &self.index { 421 - self.file.write_all(entry.as_bytes())?; 422 - self.file.write_all(name.as_bytes())?; 423 - let pad = pad::<BNDL_ALIGN, usize>(ENTRY_SIZE + name.len()); 424 - if pad > 0 { 425 - self.file.write_all(&vec![0u8; pad])?; 426 - } 427 - } 428 - 429 - let footer = Footer { 430 - index_offset: index_start, 431 - entry_count: self.index.len() as u32, 432 - magic: FOOTER_MAGIC, 433 - }; 434 - self.file.write_all(footer.as_bytes())?; 435 - 436 - // Truncate file to current position to remove any old data 437 - let current_pos = self.file.stream_position()?; 438 - self.file.set_len(current_pos)?; 439 - self.file.flush()?; 440 - 441 - self.mmap = Some(unsafe { Mmap::map(&self.file)? }); 442 - self.file.lock_shared()?; 443 - Ok(()) 444 - } 445 - 446 - pub fn vacuum(&mut self) -> io::Result<()> { 447 - let backup_path = self.path.with_extension("backup"); 448 - 449 - // Release locks and close current file 450 - drop(self.mmap.take()); 451 - let _ = self.file.unlock(); 452 - 453 - // Rename original to backup 454 - std::fs::rename(&self.path, &backup_path)?; 455 - 456 - // Open backup for reading 457 - let mut backup_file = File::open(&backup_path)?; 458 - 459 - // Create new file at original path 460 - let result = { 461 - let mut new_file = OpenOptions::new() 462 - .write(true) 463 - .read(true) 464 - .create(true) 465 - .truncate(true) 466 - .open(&self.path)?; 467 - 468 - new_file.write_all(BNDL_MAGIC)?; 469 - let mut current_offset = HEADER_SIZE as u64; 470 - 471 - // Copy only live entries from backup to new file 472 - for entry in self.index.values_mut() { 473 - let mut buf = vec![0u8; entry.compressed_size() as usize]; 474 - backup_file.seek(SeekFrom::Start(entry.offset()))?; 475 - backup_file.read_exact(&mut buf)?; 476 - 477 - new_file.seek(SeekFrom::Start(current_offset))?; 478 - new_file.write_all(&buf)?; 479 - 480 - entry.set_offset(current_offset); 481 - let pad = pad::<8, u64>(entry.compressed_size()); 482 - if pad > 0 { 483 - new_file.write_all(&vec![0u8; pad as usize])?; 484 - } 485 - current_offset += entry.compressed_size() + pad; 486 - } 487 - 488 - // Write the index and footer 489 - let index_start = current_offset; 490 - for (name, entry) in &self.index { 491 - new_file.write_all(entry.as_bytes())?; 492 - new_file.write_all(name.as_bytes())?; 493 - let pad = pad::<BNDL_ALIGN, usize>(ENTRY_SIZE + name.len()); 494 - if pad > 0 { 495 - new_file.write_all(&vec![0u8; pad])?; 496 - } 497 - } 498 - 499 - let footer = Footer { 500 - index_offset: index_start, 501 - entry_count: self.index.len() as u32, 502 - magic: FOOTER_MAGIC, 503 - }; 504 - new_file.write_all(footer.as_bytes())?; 505 - new_file.sync_all()?; 506 - 507 - Ok(()) 508 - }; 509 - 510 - // Handle result 511 - match result { 512 - Ok(()) => { 513 - // Success - delete backup 514 - std::fs::remove_file(&backup_path).ok(); 515 - } 516 - Err(e) => { 517 - // Failure - restore from backup 518 - std::fs::remove_file(&self.path).ok(); 519 - std::fs::rename(&backup_path, &self.path).ok(); 520 - return Err(e); 521 - } 522 - } 523 - 524 - // Re-open the new file 525 - let file = OpenOptions::new().read(true).write(true).open(&self.path)?; 526 - file.lock_shared()?; 527 - let mmap = unsafe { Mmap::map(&file)? }; 528 - 529 - let footer_pos = mmap.len() - FOOTER_SIZE; 530 - let footer = Footer::read_from_bytes(&mmap[footer_pos..]).unwrap(); 531 - 532 - self.file = file; 533 - self.mmap = Some(mmap); 534 - self.data_end = footer.index_offset; 535 - 536 - Ok(()) 537 - } 538 - 539 - pub fn read<'a>(&'a self, name: &str) -> Option<Cow<'a, [u8]>> { 540 - let entry = self.index.get(name)?; 541 - let mmap = self.mmap.as_ref()?; 542 - 543 - let data = if entry.compression_type == Compress::Zstd as u8 { 544 - let compressed_data = mmap.get( 545 - entry.offset() as usize..(entry.offset() + entry.compressed_size()) as usize, 546 - )?; 547 - let mut out = Vec::with_capacity(entry.uncompressed_size() as usize); 548 - zstd::Decoder::new(compressed_data) 549 - .ok()? 550 - .read_to_end(&mut out) 551 - .ok()?; 552 - Cow::Owned(out) 553 - } else { 554 - let uncompressed_data = mmap.get( 555 - entry.offset() as usize..(entry.offset() + entry.uncompressed_size()) as usize, 556 - )?; 557 - Cow::Borrowed(uncompressed_data) 558 - }; 559 - 560 - // Verify CRC32 561 - let computed_crc = crc32fast::hash(&data); 562 - if computed_crc != entry.crc32() { 563 - return None; 564 - } 565 - 566 - Some(data) 567 - } 568 - 569 - /// Read to an `std::io::Write` 570 - pub fn read_to<W: std::io::Write>(&self, name: &str, mut w: W) -> std::io::Result<u64> { 571 - let mut reader = self.reader(name)?; 572 - let bytes_copied = std::io::copy(&mut reader, &mut w)?; 573 - reader.verify_crc32()?; 574 - Ok(bytes_copied) 575 - } 576 - 577 - // Returns a seekable reader for an entry. 578 - /// If compressed, it provides a transparently decompressing stream. 579 - pub fn reader<'a>(&'a self, name: &str) -> io::Result<Reader<'a>> { 580 - let entry = self 581 - .index 582 - .get(name) 583 - .ok_or_else(|| io::Error::new(io::ErrorKind::NotFound, "Entry not found"))?; 584 - 585 - let start = entry.offset() as usize; 586 - let end = start + entry.compressed_size() as usize; 587 - let mmap = self 588 - .mmap 589 - .as_ref() 590 - .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "Missing mmap"))?; 591 - let data_slice = &mmap[start..end]; 592 - 593 - let cursor = io::Cursor::new(data_slice); 594 - 595 - if entry.compression_type == 1 { 596 - // Zstd streaming decoder 597 - let decoder = zstd::Decoder::new(cursor)?; 598 - Ok(Reader { 599 - decoder: Either::Left(decoder), 600 - crc32_hasher: Hasher::new(), 601 - expected_crc32: entry.crc32(), 602 - }) 603 - } else { 604 - Ok(Reader { 605 - decoder: Either::Right(cursor), 606 - crc32_hasher: Hasher::new(), 607 - expected_crc32: entry.crc32(), 608 - }) 609 - } 610 - } 611 - 612 - /// The number of entries 613 - pub fn len(&self) -> usize { 614 - self.index.len() 615 - } 616 - 617 - /// Returns true if there are no entries 618 - pub fn is_empty(&self) -> bool { 619 - self.index.is_empty() 620 - } 621 - 622 - /// Direct readonly access to the index 623 - pub fn index(&self) -> &BTreeMap<String, Entry> { 624 - &self.index 625 - } 626 - 627 - /// Clear all entries 628 - pub fn clear(&mut self) { 629 - self.index.clear() 630 - } 631 - 632 - /// Checks if an entry exists in the archive index. 633 - pub fn exists(&self, name: &str) -> bool { 634 - self.index.contains_key(name) 635 - } 636 - 637 - /// Remove an entry from the index. 638 - /// The data remains in the file until vacuum() is called. 639 - /// Returns true if the entry existed and was removed. 640 - pub fn remove(&mut self, name: &str) -> bool { 641 - self.index.remove(name).is_some() 642 - } 643 - 644 - /// Recursively packs a directory into the archive. 645 - pub fn pack<P: AsRef<Path>>(&mut self, src_dir: P, compress: Compress) -> io::Result<()> { 646 - self.pack_recursive(src_dir.as_ref(), src_dir.as_ref(), compress) 647 - } 648 - 649 - fn pack_recursive( 650 - &mut self, 651 - base: &Path, 652 - current: &Path, 653 - compress: Compress, 654 - ) -> io::Result<()> { 655 - if current.is_dir() { 656 - for entry in std::fs::read_dir(current)? { 657 - self.pack_recursive(base, &entry?.path(), compress)?; 658 - } 659 - } else { 660 - let name = current 661 - .strip_prefix(base) 662 - .map_err(|e| io::Error::new(io::ErrorKind::Other, e))? 663 - .to_string_lossy(); 664 - let mut data = Vec::new(); 665 - File::open(current)?.read_to_end(&mut data)?; 666 - self.add(&name, &data, compress)?; 667 - } 668 - Ok(()) 669 - } 670 - 671 - /// Unpacks all archive entries to a destination directory. 672 - pub fn unpack<P: AsRef<Path>>(&self, dest: P) -> io::Result<()> { 673 - let dest_path = dest.as_ref(); 674 - if let Some(parent) = dest_path.parent() { 675 - std::fs::create_dir_all(parent)?; 676 - } 677 - for (name, _) in &self.index { 678 - if let Some(data) = self.read(name) { 679 - let file_path = dest_path.join(name); 680 - if let Some(parent) = file_path.parent() { 681 - std::fs::create_dir_all(parent)?; 682 - } 683 - std::fs::write(file_path, data)?; 684 - } 685 - } 686 - Ok(()) 687 - } 688 - 689 - pub fn writer<'a>(&'a mut self, name: &str, compress: Compress) -> io::Result<Writer<'a>> { 690 - self.file.seek(SeekFrom::Start(self.data_end))?; 691 - let compress = self.should_auto_compress(compress, 0); 692 - let f = self.file.try_clone()?; 693 - let start_offset = self.data_end; 694 - Ok(Writer { 695 - name: name.to_string(), 696 - bindle: self, 697 - encoder: if compress { 698 - Some(zstd::Encoder::new(f, 3)?) 699 - } else { 700 - None 701 - }, 702 - start_offset, 703 - uncompressed_size: 0, 704 - crc32_hasher: Hasher::new(), 705 - }) 706 - } 707 - } 708 - 709 - impl Drop for Bindle { 710 - fn drop(&mut self) { 711 - let _ = self.file.unlock(); 712 - } 54 + Ok(()) 713 55 } 714 56 715 57 #[cfg(test)] 716 58 mod tests { 717 59 use super::*; 718 60 use std::fs; 61 + use std::fs::OpenOptions; 62 + use std::io::{Seek, SeekFrom}; 719 63 720 64 #[test] 721 65 fn test_create_and_read() { ··· 971 315 972 316 // 3. Corrupt the data by modifying a byte directly in the file 973 317 { 974 - use std::io::{Seek, SeekFrom, Write}; 975 318 let mut file = OpenOptions::new() 976 319 .write(true) 977 320 .read(true) ··· 980 323 981 324 // Skip the header and modify the first byte of data 982 325 file.seek(SeekFrom::Start(HEADER_SIZE as u64)).unwrap(); 983 - file.write_all(b"X").unwrap(); // Corrupt first byte 326 + file.write(&[b'X']).unwrap(); // Corrupt first byte 984 327 file.flush().unwrap(); 985 328 } 986 329
+60
src/reader.rs
··· 1 + use crc32fast::Hasher; 2 + use std::io::{self, BufReader, Read, Seek, SeekFrom}; 3 + 4 + pub enum Either<A, B> { 5 + Left(A), 6 + Right(B), 7 + } 8 + 9 + pub struct Reader<'a> { 10 + pub(crate) decoder: Either<zstd::Decoder<'static, BufReader<io::Cursor<&'a [u8]>>>, io::Cursor<&'a [u8]>>, 11 + pub(crate) crc32_hasher: Hasher, 12 + pub(crate) expected_crc32: u32, 13 + } 14 + 15 + impl<'a> Read for Reader<'a> { 16 + fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> { 17 + let n = match &mut self.decoder { 18 + Either::Left(x) => x.read(buf)?, 19 + Either::Right(x) => x.read(buf)?, 20 + }; 21 + 22 + if n > 0 { 23 + self.crc32_hasher.update(&buf[..n]); 24 + } 25 + 26 + Ok(n) 27 + } 28 + } 29 + 30 + // Note: Seeking is only supported for uncompressed entries in this simple implementation. 31 + // Seeking in compressed streams requires a frame-aware decoder. 32 + impl<'a> Seek for Reader<'a> { 33 + fn seek(&mut self, pos: SeekFrom) -> io::Result<u64> { 34 + match &mut self.decoder { 35 + Either::Left(_) => Err(io::Error::new( 36 + io::ErrorKind::Unsupported, 37 + "Seeking not supported on compressed streams", 38 + )), 39 + Either::Right(x) => x.seek(pos), 40 + } 41 + } 42 + } 43 + 44 + impl<'a> Reader<'a> { 45 + /// Verify the CRC32 of the data read so far. 46 + /// This should be called after all data has been read to ensure data integrity. 47 + pub fn verify_crc32(&self) -> io::Result<()> { 48 + let computed_crc = self.crc32_hasher.clone().finalize(); 49 + if computed_crc != self.expected_crc32 { 50 + return Err(io::Error::new( 51 + io::ErrorKind::InvalidData, 52 + format!( 53 + "CRC32 mismatch: expected {:x}, got {:x}", 54 + self.expected_crc32, computed_crc 55 + ), 56 + )); 57 + } 58 + Ok(()) 59 + } 60 + }
+95
src/writer.rs
··· 1 + use crc32fast::Hasher; 2 + use std::io::{self, Seek, SeekFrom, Write}; 3 + 4 + use crate::bindle::Bindle; 5 + use crate::entry::Entry; 6 + 7 + pub struct Writer<'a> { 8 + pub(crate) bindle: &'a mut Bindle, 9 + pub(crate) encoder: Option<zstd::Encoder<'a, std::fs::File>>, 10 + pub(crate) name: String, 11 + pub(crate) start_offset: u64, 12 + pub(crate) uncompressed_size: u64, 13 + pub(crate) crc32_hasher: Hasher, 14 + } 15 + 16 + impl<'a> Drop for Writer<'a> { 17 + fn drop(&mut self) { 18 + let _ = self.close_drop(); 19 + } 20 + } 21 + 22 + impl<'a> std::io::Write for Writer<'a> { 23 + fn write(&mut self, buf: &[u8]) -> io::Result<usize> { 24 + self.write_chunk(buf)?; 25 + Ok(buf.len()) 26 + } 27 + 28 + fn flush(&mut self) -> io::Result<()> { 29 + Ok(()) 30 + } 31 + } 32 + 33 + impl<'a> Writer<'a> { 34 + pub fn write_chunk(&mut self, data: &[u8]) -> io::Result<()> { 35 + if self.name.is_empty() { 36 + return Err(std::io::Error::new(std::io::ErrorKind::Other, "closed")); 37 + } 38 + 39 + self.uncompressed_size += data.len() as u64; 40 + self.crc32_hasher.update(data); 41 + 42 + if let Some(encoder) = &mut self.encoder { 43 + encoder.write_all(data)?; 44 + } else { 45 + self.bindle.file.write_all(data)?; 46 + } 47 + 48 + Ok(()) 49 + } 50 + 51 + fn close_drop(&mut self) -> io::Result<()> { 52 + if self.name.is_empty() { 53 + return Ok(()); 54 + } 55 + 56 + let (compression_type, current_pos) = if let Some(encoder) = self.encoder.take() { 57 + let mut f = encoder.finish()?; 58 + let pos = f.stream_position()?; 59 + // Sync the main file handle to match the encoder's position 60 + self.bindle.file.seek(SeekFrom::Start(pos))?; 61 + (1, pos) 62 + } else { 63 + let pos = self.bindle.file.stream_position()?; 64 + (0, pos) 65 + }; 66 + 67 + let compressed_size = current_pos - self.start_offset; 68 + 69 + // Handle 8-byte alignment padding 70 + let pad_len = crate::pad::<8, u64>(current_pos); 71 + if pad_len > 0 { 72 + crate::write_padding(&mut self.bindle.file, pad_len as usize)?; 73 + } 74 + 75 + self.bindle.data_end = current_pos + pad_len; 76 + 77 + let crc32_value = self.crc32_hasher.clone().finalize(); 78 + 79 + let mut entry = Entry::default(); 80 + entry.set_offset(self.start_offset); 81 + entry.set_compressed_size(compressed_size); 82 + entry.set_uncompressed_size(self.uncompressed_size); 83 + entry.set_crc32(crc32_value); 84 + entry.set_name_len(self.name.len() as u16); 85 + entry.compression_type = compression_type; 86 + 87 + self.bindle.index.insert(self.name.clone(), entry); 88 + self.name.clear(); // Mark as closed 89 + Ok(()) 90 + } 91 + 92 + pub fn close(mut self) -> io::Result<()> { 93 + self.close_drop() 94 + } 95 + }