an efficient binary archive format
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

docs, cleanup

zach dbd32f24 efd1647c

+216 -31
+67
include/bindle.h
··· 12 12 #include <stdint.h> 13 13 #include <stdbool.h> 14 14 15 + /** 16 + * Compression mode for entries. 17 + */ 15 18 enum BindleCompress { 19 + /** 20 + * No compression. 21 + */ 16 22 BindleCompressNone = 0, 23 + /** 24 + * Zstandard compression. 25 + */ 17 26 BindleCompressZstd = 1, 27 + /** 28 + * Automatically compress if entry is larger than 2KB threshold. 29 + * Note: This is never stored on disk, only used as a policy hint. 30 + */ 18 31 BindleCompressAuto = 2, 19 32 }; 20 33 typedef uint8_t BindleCompress; 21 34 35 + /** 36 + * A binary archive for collecting files. 37 + * 38 + * Uses memory-mapped I/O for fast reads, supports optional zstd compression, and handles updates via shadowing. 39 + * Files can be added incrementally without rewriting the entire archive. 40 + * 41 + * # Example 42 + * 43 + * ```no_run 44 + * use bindle_file::{Bindle, Compress}; 45 + * 46 + * let mut archive = Bindle::open("data.bndl")?; 47 + * archive.add("file.txt", b"data", Compress::None)?; 48 + * archive.save()?; 49 + * # Ok::<(), std::io::Error>(()) 50 + * ``` 51 + */ 22 52 typedef struct Bindle Bindle; 23 53 54 + /** 55 + * A streaming reader for archive entries. 56 + * 57 + * Created by the archive's `reader()` method. Automatically decompresses compressed entries and tracks CRC32 for integrity verification. 58 + * 59 + * # Example 60 + * 61 + * ```no_run 62 + * # use bindle_file::Bindle; 63 + * # let archive = Bindle::open("data.bndl")?; 64 + * let mut reader = archive.reader("file.txt")?; 65 + * std::io::copy(&mut reader, &mut std::io::stdout())?; 66 + * reader.verify_crc32()?; 67 + * # Ok::<(), std::io::Error>(()) 68 + * ``` 69 + */ 24 70 typedef struct BindleReader BindleReader; 25 71 72 + /** 73 + * A streaming writer for adding entries to an archive. 74 + * 75 + * Created by [`Bindle::writer()`]. Automatically compresses data if requested and computes CRC32 for integrity verification. 76 + * 77 + * The writer must be closed with [`close()`](Writer::close) or will be automatically closed when dropped. After closing, call [`Bindle::save()`] to commit the index. 78 + * 79 + * # Example 80 + * 81 + * ```no_run 82 + * use std::io::Write; 83 + * use bindle_file::{Bindle, Compress}; 84 + * 85 + * let mut archive = Bindle::open("data.bndl")?; 86 + * let mut writer = archive.writer("file.txt", Compress::None)?; 87 + * writer.write_all(b"data")?; 88 + * writer.close()?; 89 + * archive.save()?; 90 + * # Ok::<(), std::io::Error>(()) 91 + * ``` 92 + */ 26 93 typedef struct BindleWriter BindleWriter; 27 94 28 95 /**
+68 -22
src/bindle.rs
··· 13 13 use crate::reader::{Either, Reader}; 14 14 use crate::writer::Writer; 15 15 use crate::{ 16 - pad, write_padding, AUTO_COMPRESS_THRESHOLD, BNDL_ALIGN, BNDL_MAGIC, ENTRY_SIZE, FOOTER_MAGIC, 17 - FOOTER_SIZE, HEADER_SIZE, 16 + AUTO_COMPRESS_THRESHOLD, BNDL_ALIGN, BNDL_MAGIC, ENTRY_SIZE, FOOTER_MAGIC, FOOTER_SIZE, 17 + HEADER_SIZE, pad, write_padding, 18 18 }; 19 19 20 + /// A binary archive for collecting files. 21 + /// 22 + /// Uses memory-mapped I/O for fast reads, supports optional zstd compression, and handles updates via shadowing. 23 + /// Files can be added incrementally without rewriting the entire archive. 24 + /// 25 + /// # Example 26 + /// 27 + /// ```no_run 28 + /// use bindle_file::{Bindle, Compress}; 29 + /// 30 + /// let mut archive = Bindle::open("data.bndl")?; 31 + /// archive.add("file.txt", b"data", Compress::None)?; 32 + /// archive.save()?; 33 + /// # Ok::<(), std::io::Error>(()) 34 + /// ``` 20 35 pub struct Bindle { 21 36 pub(crate) path: PathBuf, 22 37 pub(crate) file: File, ··· 26 41 } 27 42 28 43 impl Bindle { 29 - /// Create a new bindle file, this will overwrite the existing file 44 + /// Creates a new archive, overwriting any existing file at the path. 30 45 pub fn create<P: AsRef<Path>>(path: P) -> io::Result<Self> { 31 46 let path_buf = path.as_ref().to_path_buf(); 32 47 let opts = OpenOptions::new() ··· 38 53 Self::new(path_buf, opts) 39 54 } 40 55 41 - /// Open or create a bindle file 56 + /// Opens an existing archive or creates a new one if it doesn't exist. 42 57 pub fn open<P: AsRef<Path>>(path: P) -> io::Result<Self> { 43 58 let path_buf = path.as_ref().to_path_buf(); 44 59 let opts = OpenOptions::new() ··· 49 64 Self::new(path_buf, opts) 50 65 } 51 66 52 - /// Open a bindle file, this will not create it if it doesn't exist 67 + /// Opens an existing archive. Returns an error if the file doesn't exist. 53 68 pub fn load<P: AsRef<Path>>(path: P) -> io::Result<Self> { 54 69 let path_buf = path.as_ref().to_path_buf(); 55 70 let opts = OpenOptions::new().read(true).write(true).to_owned(); ··· 93 108 94 109 // Calculate footer position. Subtraction is now safe due to the check above. 95 110 let footer_pos = m.len() - FOOTER_SIZE; 96 - let footer = Footer::read_from_bytes(&m[footer_pos..]).map_err(|_| { 97 - io::Error::new(io::ErrorKind::InvalidData, "Failed to read footer") 98 - })?; 111 + let footer = Footer::read_from_bytes(&m[footer_pos..]) 112 + .map_err(|_| io::Error::new(io::ErrorKind::InvalidData, "Failed to read footer"))?; 99 113 100 114 if footer.magic() != FOOTER_MAGIC { 101 115 return Err(io::Error::new( ··· 147 161 compress == Compress::Zstd || (compress == Compress::Auto && len > AUTO_COMPRESS_THRESHOLD) 148 162 } 149 163 164 + /// Adds data to the archive with the given name. 165 + /// 166 + /// If an entry with the same name exists, it will be shadowed. Call [`save()`](Bindle::save) to commit changes. 150 167 pub fn add(&mut self, name: &str, data: &[u8], compress: Compress) -> io::Result<()> { 151 168 let mut stream = self.writer(name, compress)?; 152 169 stream.write_all(data)?; ··· 154 171 Ok(()) 155 172 } 156 173 174 + /// Adds a file from the filesystem to the archive. 175 + /// 176 + /// Reads the file at `path` and stores it with the given `name`. Call [`save()`](Bindle::save) to commit changes. 157 177 pub fn add_file( 158 178 &mut self, 159 179 name: &str, ··· 166 186 Ok(()) 167 187 } 168 188 189 + /// Commits all pending changes by writing the index and footer to disk. 190 + /// 191 + /// Must be called after add/remove operations to make changes persistent. 169 192 pub fn save(&mut self) -> io::Result<()> { 170 193 self.file.lock_exclusive()?; 171 194 self.file.seek(SeekFrom::Start(self.data_end))?; ··· 193 216 Ok(()) 194 217 } 195 218 219 + /// Reclaims space by removing shadowed data. 220 + /// 221 + /// Rebuilds the archive with only live entries, removing old versions of updated files. 196 222 pub fn vacuum(&mut self) -> io::Result<()> { 197 223 let temp_path = self.path.with_extension("tmp"); 198 224 ··· 256 282 257 283 let footer_pos = mmap.len() - FOOTER_SIZE; 258 284 let footer = Footer::read_from_bytes(&mmap[footer_pos..]).map_err(|_| { 259 - io::Error::new(io::ErrorKind::InvalidData, "Failed to read footer after vacuum") 285 + io::Error::new( 286 + io::ErrorKind::InvalidData, 287 + "Failed to read footer after vacuum", 288 + ) 260 289 })?; 261 290 262 291 self.file = temp_file; ··· 266 295 Ok(()) 267 296 } 268 297 298 + /// Reads an entry from the archive, decompressing if needed. 299 + /// 300 + /// Returns `None` if the entry doesn't exist or if CRC32 verification fails. 269 301 pub fn read<'a>(&'a self, name: &str) -> Option<Cow<'a, [u8]>> { 270 302 let entry = self.index.get(name)?; 271 303 let mmap = self.mmap.as_ref()?; ··· 296 328 Some(data) 297 329 } 298 330 299 - /// Read to an `std::io::Write` 331 + /// Reads an entry and writes it to the given writer. 332 + /// 333 + /// Returns the number of bytes written. Verifies CRC32 after reading. 300 334 pub fn read_to<W: std::io::Write>(&self, name: &str, mut w: W) -> std::io::Result<u64> { 301 335 let mut reader = self.reader(name)?; 302 336 let bytes_copied = std::io::copy(&mut reader, &mut w)?; ··· 304 338 Ok(bytes_copied) 305 339 } 306 340 307 - // Returns a seekable reader for an entry. 308 - /// If compressed, it provides a transparently decompressing stream. 341 + /// Returns a streaming reader for an entry. 342 + /// 343 + /// Automatically decompresses if the entry is compressed. Call [`Reader::verify_crc32()`] after reading to verify integrity. 309 344 pub fn reader<'a>(&'a self, name: &str) -> io::Result<Reader<'a>> { 310 345 let entry = self 311 346 .index ··· 339 374 } 340 375 } 341 376 342 - /// The number of entries 377 + /// Returns the number of entries in the archive. 343 378 pub fn len(&self) -> usize { 344 379 self.index.len() 345 380 } 346 381 347 - /// Returns true if there are no entries 382 + /// Returns true if the archive contains no entries. 348 383 pub fn is_empty(&self) -> bool { 349 384 self.index.is_empty() 350 385 } 351 386 352 - /// Direct readonly access to the index 387 + /// Returns a reference to the archive index. 388 + /// 389 + /// The index maps entry names to their metadata. 353 390 pub fn index(&self) -> &BTreeMap<String, Entry> { 354 391 &self.index 355 392 } 356 393 357 - /// Clear all entries 394 + /// Removes all entries from the index. 395 + /// 396 + /// Call [`save()`](Bindle::save) to commit. Data remains in the file until [`vacuum()`](Bindle::vacuum) is called. 358 397 pub fn clear(&mut self) { 359 398 self.index.clear() 360 399 } 361 400 362 - /// Checks if an entry exists in the archive index. 401 + /// Returns true if an entry with the given name exists. 363 402 pub fn exists(&self, name: &str) -> bool { 364 403 self.index.contains_key(name) 365 404 } 366 405 367 - /// Remove an entry from the index. 368 - /// The data remains in the file until vacuum() is called. 369 - /// Returns true if the entry existed and was removed. 406 + /// Removes an entry from the index. 407 + /// 408 + /// Returns true if the entry existed. Data remains in the file until [`vacuum()`](Bindle::vacuum) is called. 370 409 pub fn remove(&mut self, name: &str) -> bool { 371 410 self.index.remove(name).is_some() 372 411 } 373 412 374 - /// Recursively packs a directory into the archive. 413 + /// Recursively adds all files from a directory to the archive. 414 + /// 415 + /// File paths are stored relative to the source directory. Call [`save()`](Bindle::save) to commit. 375 416 pub fn pack<P: AsRef<Path>>(&mut self, src_dir: P, compress: Compress) -> io::Result<()> { 376 417 self.pack_recursive(src_dir.as_ref(), src_dir.as_ref(), compress) 377 418 } ··· 398 439 Ok(()) 399 440 } 400 441 401 - /// Unpacks all archive entries to a destination directory. 442 + /// Extracts all entries to a destination directory. 443 + /// 444 + /// Creates subdirectories as needed to match the stored paths. 402 445 pub fn unpack<P: AsRef<Path>>(&self, dest: P) -> io::Result<()> { 403 446 let dest_path = dest.as_ref(); 404 447 if let Some(parent) = dest_path.parent() { ··· 416 459 Ok(()) 417 460 } 418 461 462 + /// Creates a streaming writer for adding an entry. 463 + /// 464 + /// The writer must be closed and then [`save()`](Bindle::save) must be called to commit the entry. 419 465 pub fn writer<'a>(&'a mut self, name: &str, compress: Compress) -> io::Result<Writer<'a>> { 420 466 self.file.lock_exclusive()?; 421 467 self.file.seek(SeekFrom::Start(self.data_end))?;
+5
src/compress.rs
··· 1 + /// Compression mode for entries. 1 2 #[repr(u8)] 2 3 #[derive(Clone, Copy, Debug, Default, PartialEq, Eq, PartialOrd, Ord)] 3 4 pub enum Compress { 5 + /// No compression. 4 6 None = 0, 7 + /// Zstandard compression. 5 8 Zstd = 1, 9 + /// Automatically compress if entry is larger than 2KB threshold. 10 + /// Note: This is never stored on disk, only used as a policy hint. 6 11 #[default] 7 12 Auto = 2, 8 13 }
+15 -5
src/entry.rs
··· 2 2 3 3 use crate::compress::Compress; 4 4 5 + /// Metadata for an entry in the archive. 6 + /// 7 + /// Contains information about stored files including offset, size, compression, and CRC32 checksum. 8 + /// Retrieved via the archive's `index()` method. 5 9 #[repr(C, packed)] 6 10 #[derive(FromBytes, Unaligned, IntoBytes, Immutable, Clone, Copy, Debug, Default)] 7 11 pub struct Entry { ··· 20 24 // - On big-endian systems: bytes are swapped to/from little-endian 21 25 22 26 impl Entry { 27 + /// Returns the byte offset where this entry's data starts in the archive. 23 28 pub fn offset(&self) -> u64 { 24 29 u64::from_le(self.offset) 25 30 } 26 31 27 - pub fn set_offset(&mut self, value: u64) { 32 + pub(crate) fn set_offset(&mut self, value: u64) { 28 33 self.offset = value.to_le(); 29 34 } 30 35 36 + /// Returns the compressed size of this entry in bytes. 31 37 pub fn compressed_size(&self) -> u64 { 32 38 u64::from_le(self.compressed_size) 33 39 } 34 40 35 - pub fn set_compressed_size(&mut self, value: u64) { 41 + pub(crate) fn set_compressed_size(&mut self, value: u64) { 36 42 self.compressed_size = value.to_le(); 37 43 } 38 44 45 + /// Returns the uncompressed size of this entry in bytes. 39 46 pub fn uncompressed_size(&self) -> u64 { 40 47 u64::from_le(self.uncompressed_size) 41 48 } 42 49 43 - pub fn set_uncompressed_size(&mut self, value: u64) { 50 + pub(crate) fn set_uncompressed_size(&mut self, value: u64) { 44 51 self.uncompressed_size = value.to_le(); 45 52 } 46 53 54 + /// Returns the CRC32 checksum of the uncompressed data. 47 55 pub fn crc32(&self) -> u32 { 48 56 u32::from_le(self.crc32) 49 57 } 50 58 51 - pub fn set_crc32(&mut self, value: u32) { 59 + pub(crate) fn set_crc32(&mut self, value: u32) { 52 60 self.crc32 = value.to_le(); 53 61 } 54 62 63 + /// Returns the length of the entry name in bytes. 55 64 pub fn name_len(&self) -> usize { 56 65 u16::from_le(self.name_len) as usize 57 66 } 58 67 59 - pub fn set_name_len(&mut self, value: u16) { 68 + pub(crate) fn set_name_len(&mut self, value: u16) { 60 69 self.name_len = value.to_le(); 61 70 } 62 71 72 + /// Returns the compression type for this entry. 63 73 pub fn compression_type(&self) -> Compress { 64 74 Compress::from_u8(self.compression_type) 65 75 }
+18
src/lib.rs
··· 1 + //! Bindle is a binary archive format for collecting files. 2 + //! 3 + //! The format uses memory-mapped I/O for fast reads, optional zstd compression, 4 + //! and supports append-only writes with shadowing for updates. 5 + //! 6 + //! # Example 7 + //! 8 + //! ```no_run 9 + //! use bindle_file::{Bindle, Compress}; 10 + //! 11 + //! let mut archive = Bindle::open("data.bndl")?; 12 + //! archive.add("file.txt", b"data", Compress::None)?; 13 + //! archive.save()?; 14 + //! 15 + //! let data = archive.read("file.txt").unwrap(); 16 + //! # Ok::<(), std::io::Error>(()) 17 + //! ``` 18 + 1 19 use std::io::{self, Write}; 2 20 3 21 // Module declarations
+21 -4
src/reader.rs
··· 1 1 use crc32fast::Hasher; 2 2 use std::io::{self, BufReader, Read, Seek, SeekFrom}; 3 3 4 - pub enum Either<A, B> { 4 + pub(crate) enum Either<A, B> { 5 5 Left(A), 6 6 Right(B), 7 7 } 8 8 9 + /// A streaming reader for archive entries. 10 + /// 11 + /// Created by the archive's `reader()` method. Automatically decompresses compressed entries and tracks CRC32 for integrity verification. 12 + /// 13 + /// # Example 14 + /// 15 + /// ```no_run 16 + /// # use bindle_file::Bindle; 17 + /// # let archive = Bindle::open("data.bndl")?; 18 + /// let mut reader = archive.reader("file.txt")?; 19 + /// std::io::copy(&mut reader, &mut std::io::stdout())?; 20 + /// reader.verify_crc32()?; 21 + /// # Ok::<(), std::io::Error>(()) 22 + /// ``` 9 23 pub struct Reader<'a> { 10 - pub(crate) decoder: Either<zstd::Decoder<'static, BufReader<io::Cursor<&'a [u8]>>>, io::Cursor<&'a [u8]>>, 24 + pub(crate) decoder: 25 + Either<zstd::Decoder<'static, BufReader<io::Cursor<&'a [u8]>>>, io::Cursor<&'a [u8]>>, 11 26 pub(crate) crc32_hasher: Hasher, 12 27 pub(crate) expected_crc32: u32, 13 28 } ··· 42 57 } 43 58 44 59 impl<'a> Reader<'a> { 45 - /// Verify the CRC32 of the data read so far. 46 - /// This should be called after all data has been read to ensure data integrity. 60 + /// Verifies the CRC32 checksum of the data read so far. 61 + /// 62 + /// Should be called after reading all data to ensure integrity. 63 + /// Returns an error if the computed CRC32 doesn't match the expected value. 47 64 pub fn verify_crc32(&self) -> io::Result<()> { 48 65 let computed_crc = self.crc32_hasher.clone().finalize(); 49 66 if computed_crc != self.expected_crc32 {
+22
src/writer.rs
··· 4 4 use crate::bindle::Bindle; 5 5 use crate::entry::Entry; 6 6 7 + /// A streaming writer for adding entries to an archive. 8 + /// 9 + /// Created by [`Bindle::writer()`]. Automatically compresses data if requested and computes CRC32 for integrity verification. 10 + /// 11 + /// The writer must be closed with [`close()`](Writer::close) or will be automatically closed when dropped. After closing, call [`Bindle::save()`] to commit the index. 12 + /// 13 + /// # Example 14 + /// 15 + /// ```no_run 16 + /// use std::io::Write; 17 + /// use bindle_file::{Bindle, Compress}; 18 + /// 19 + /// let mut archive = Bindle::open("data.bndl")?; 20 + /// let mut writer = archive.writer("file.txt", Compress::None)?; 21 + /// writer.write_all(b"data")?; 22 + /// writer.close()?; 23 + /// archive.save()?; 24 + /// # Ok::<(), std::io::Error>(()) 25 + /// ``` 7 26 pub struct Writer<'a> { 8 27 pub(crate) bindle: &'a mut Bindle, 9 28 pub(crate) encoder: Option<zstd::Encoder<'a, std::fs::File>>, ··· 92 111 Ok(()) 93 112 } 94 113 114 + /// Closes the writer and finalizes the entry. 115 + /// 116 + /// Automatically called when the writer is dropped, but calling explicitly allows error handling. 95 117 pub fn close(mut self) -> io::Result<()> { 96 118 self.close_drop() 97 119 }