an efficient binary archive format
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

init

zach 60e8d5ea

+600
+1
.gitignore
··· 1 + /target
+16
Cargo.toml
··· 1 + [package] 2 + name = "bindle" 3 + version = "0.1.0" 4 + edition = "2024" 5 + 6 + [lib] 7 + crate-type = ["cdylib", "staticlib", "rlib"] 8 + 9 + [dependencies] 10 + crc32fast = "1.5.0" 11 + memmap2 = "0.9.9" 12 + zerocopy = { version = "0.8.38", features = ["std", "derive"] } 13 + zstd = "0.13.3" 14 + 15 + [build-dependencies] 16 + cbindgen = "0.29"
+53
bindle.h
··· 1 + /* Auto-generated by cbindgen - do not edit manually */ 2 + 3 + #ifndef BINDLE_H 4 + #define BINDLE_H 5 + 6 + #include <stdarg.h> 7 + #include <stdbool.h> 8 + #include <stddef.h> 9 + #include <stdint.h> 10 + #include <stdlib.h> 11 + #include <stddef.h> 12 + #include <stdint.h> 13 + #include <stdbool.h> 14 + 15 + /** 16 + * Opaque handle to a Bindle archive. 17 + */ 18 + typedef struct BindleContext BindleContext; 19 + 20 + struct BindleContext *bindle_open(const char *path); 21 + 22 + /** 23 + * Adds a new entry. Returns true on success. 24 + */ 25 + bool bindle_add(struct BindleContext *ctx, 26 + const char *name, 27 + const uint8_t *data, 28 + size_t data_len, 29 + bool compress); 30 + 31 + /** 32 + * Commits changes to disk. 33 + */ 34 + bool bindle_save(struct BindleContext *ctx); 35 + 36 + /** 37 + * Frees BindleContext 38 + */ 39 + void bindle_free(struct BindleContext *ctx); 40 + 41 + uint8_t *bindle_read(struct BindleContext *ctx, const char *name, size_t *out_len); 42 + 43 + void bindle_free_buffer(uint8_t *ptr, size_t len); 44 + 45 + size_t bindle_length(const struct BindleContext *ctx); 46 + 47 + /** 48 + * Returns the name of the entry at the given index. 49 + * The string is owned by the Bindle; the caller must NOT free it. 50 + */ 51 + const char *bindle_entry_name(const struct BindleContext *ctx, size_t index, size_t *len); 52 + 53 + #endif /* BINDLE_H */
+16
build.rs
··· 1 + use std::env; 2 + use std::path::PathBuf; 3 + 4 + fn main() { 5 + let crate_dir = env::var("CARGO_MANIFEST_DIR").unwrap(); 6 + 7 + // Instead of using .generate(), use the Builder for more control 8 + let config = cbindgen::Config::from_file("cbindgen.toml").unwrap_or_default(); 9 + 10 + cbindgen::Builder::new() 11 + .with_crate(&crate_dir) 12 + .with_config(config) 13 + .generate() 14 + .expect("Unable to generate bindings") 15 + .write_to_file(PathBuf::from(crate_dir).join("bindle.h")); 16 + }
+6
cbindgen.toml
··· 1 + language = "C" 2 + header = "/* Auto-generated by cbindgen - do not edit manually */" 3 + include_guard = "BINDLE_H" 4 + sys_includes = ["stddef.h", "stdint.h", "stdbool.h"] 5 + usize_is_size_t = true 6 +
+149
src/ffi.rs
··· 1 + use std::ffi::CStr; 2 + use std::os::raw::c_char; 3 + use std::slice; 4 + 5 + use crate::Bindle; 6 + 7 + /// Opaque handle to a Bindle archive. 8 + pub struct BindleContext { 9 + pub(crate) inner: Bindle, 10 + } 11 + 12 + #[unsafe(no_mangle)] 13 + pub unsafe extern "C" fn bindle_open(path: *const c_char) -> *mut BindleContext { 14 + if path.is_null() { 15 + return std::ptr::null_mut(); 16 + } 17 + 18 + // Explicit unsafe block for raw pointer dereference 19 + let path_str = unsafe { 20 + match CStr::from_ptr(path).to_str() { 21 + Ok(s) => s, 22 + Err(_) => return std::ptr::null_mut(), 23 + } 24 + }; 25 + 26 + match Bindle::open(path_str) { 27 + Ok(b) => Box::into_raw(Box::new(BindleContext { inner: b })), 28 + Err(_) => std::ptr::null_mut(), 29 + } 30 + } 31 + 32 + /// Adds a new entry. Returns true on success. 33 + #[unsafe(no_mangle)] 34 + pub unsafe extern "C" fn bindle_add( 35 + ctx: *mut BindleContext, 36 + name: *const c_char, 37 + data: *const u8, 38 + data_len: usize, 39 + compress: bool, 40 + ) -> bool { 41 + if ctx.is_null() || name.is_null() || (data.is_null() && data_len > 0) { 42 + return false; 43 + } 44 + 45 + unsafe { 46 + let name_str = match CStr::from_ptr(name).to_str() { 47 + Ok(s) => s, 48 + Err(_) => return false, 49 + }; 50 + 51 + let data_slice = slice::from_raw_parts(data, data_len); 52 + let b = &mut (*ctx).inner; 53 + 54 + b.add(name_str, data_slice, compress).is_ok() 55 + } 56 + } 57 + 58 + /// Commits changes to disk. 59 + #[unsafe(no_mangle)] 60 + pub unsafe extern "C" fn bindle_save(ctx: *mut BindleContext) -> bool { 61 + if ctx.is_null() { 62 + return false; 63 + } 64 + unsafe { 65 + let b = &mut (*ctx).inner; 66 + b.save().is_ok() 67 + } 68 + } 69 + 70 + /// Frees BindleContext 71 + #[unsafe(no_mangle)] 72 + pub unsafe extern "C" fn bindle_free(ctx: *mut BindleContext) { 73 + if ctx.is_null() { 74 + return; 75 + } 76 + unsafe { drop(Box::from_raw(ctx)) } 77 + } 78 + 79 + #[unsafe(no_mangle)] 80 + pub unsafe extern "C" fn bindle_read( 81 + ctx: *mut BindleContext, 82 + name: *const c_char, 83 + out_len: *mut usize, 84 + ) -> *mut u8 { 85 + if ctx.is_null() || name.is_null() || out_len.is_null() { 86 + return std::ptr::null_mut(); 87 + } 88 + 89 + unsafe { 90 + let name_str = match CStr::from_ptr(name).to_str() { 91 + Ok(s) => s, 92 + Err(_) => return std::ptr::null_mut(), 93 + }; 94 + 95 + let b = &(*ctx).inner; 96 + 97 + if let Some(data) = b.read(name_str) { 98 + let mut bytes = data.to_vec(); 99 + bytes.shrink_to_fit(); 100 + let ptr = bytes.as_mut_ptr(); 101 + *out_len = bytes.len(); 102 + std::mem::forget(bytes); 103 + ptr 104 + } else { 105 + std::ptr::null_mut() 106 + } 107 + } 108 + } 109 + 110 + #[unsafe(no_mangle)] 111 + pub unsafe extern "C" fn bindle_free_buffer(ptr: *mut u8, len: usize) { 112 + if !ptr.is_null() { 113 + unsafe { 114 + let _ = Vec::from_raw_parts(ptr, len, len); 115 + } 116 + } 117 + } 118 + 119 + #[unsafe(no_mangle)] 120 + pub unsafe extern "C" fn bindle_length(ctx: *const BindleContext) -> usize { 121 + if ctx.is_null() { 122 + return 0; 123 + } 124 + unsafe { (*ctx).inner.len() } 125 + } 126 + 127 + /// Returns the name of the entry at the given index. 128 + /// The string is owned by the Bindle; the caller must NOT free it. 129 + #[unsafe(no_mangle)] 130 + pub unsafe extern "C" fn bindle_entry_name( 131 + ctx: *const BindleContext, 132 + index: usize, 133 + len: *mut usize, 134 + ) -> *const c_char { 135 + if ctx.is_null() { 136 + return std::ptr::null(); 137 + } 138 + 139 + let b = unsafe { &(*ctx).inner }; 140 + match b.entries.get(index) { 141 + Some((_, name)) => { 142 + unsafe { 143 + *len = name.as_bytes().len(); 144 + } 145 + name.as_ptr() as *const _ 146 + } 147 + None => std::ptr::null(), 148 + } 149 + }
+359
src/lib.rs
··· 1 + use memmap2::Mmap; 2 + use std::borrow::Cow; 3 + use std::fs::{File, OpenOptions}; 4 + use std::io::{self, Read, Seek, SeekFrom, Write}; 5 + use std::path::Path; 6 + use zerocopy::{FromBytes, Immutable, IntoBytes, Unaligned}; 7 + 8 + mod ffi; 9 + 10 + const BNDL_MAGIC: &[u8; 8] = b"BINDL001"; 11 + const BNDL_ALIGN: usize = 8; 12 + const ENTRY_SIZE: usize = std::mem::size_of::<Entry>(); 13 + const FOOTER_SIZE: usize = std::mem::size_of::<Footer>(); 14 + const HEADER_SIZE: u64 = 8; 15 + 16 + #[repr(C, packed)] 17 + #[derive(FromBytes, Unaligned, IntoBytes, Immutable, Clone, Copy, Debug)] 18 + pub struct Entry { 19 + pub offset: [u8; 8], 20 + pub compressed_size: [u8; 8], 21 + pub uncompressed_size: [u8; 8], 22 + pub crc32: [u8; 4], 23 + pub name_len: [u8; 2], 24 + pub compression_type: u8, 25 + pub _reserved: u8, 26 + } 27 + 28 + #[repr(C, packed)] 29 + #[derive(FromBytes, Unaligned, IntoBytes, Immutable, Debug)] 30 + struct Footer { 31 + pub index_offset: [u8; 8], 32 + pub entry_count: [u8; 4], 33 + } 34 + 35 + pub struct Bindle { 36 + file: File, 37 + mmap: Option<Mmap>, 38 + entries: Vec<(Entry, String)>, 39 + data_end: u64, 40 + } 41 + 42 + impl Bindle { 43 + pub fn open<P: AsRef<Path>>(path: P) -> io::Result<Self> { 44 + let mut file = OpenOptions::new() 45 + .read(true) 46 + .write(true) 47 + .create(true) 48 + .open(path)?; 49 + 50 + let len = file.metadata()?.len(); 51 + 52 + if len == 0 { 53 + // New file: Write the magic header immediately 54 + file.write_all(BNDL_MAGIC)?; 55 + return Ok(Self { 56 + file, 57 + mmap: None, 58 + entries: Vec::new(), 59 + data_end: HEADER_SIZE, 60 + }); 61 + } 62 + 63 + // Existing file: Check header magic 64 + let mut header = [0u8; 8]; 65 + file.read_exact(&mut header)?; 66 + if &header != BNDL_MAGIC { 67 + return Err(io::Error::new( 68 + io::ErrorKind::InvalidData, 69 + "Invalid Bindle header", 70 + )); 71 + } 72 + // Case 2: File exists but is too small to even hold a footer 73 + if len < FOOTER_SIZE as u64 { 74 + return Err(io::Error::new( 75 + io::ErrorKind::InvalidData, 76 + "File too small to be a Bindle", 77 + )); 78 + } 79 + 80 + let m = unsafe { Mmap::map(&file)? }; 81 + let footer_pos = m.len() - FOOTER_SIZE; 82 + 83 + let footer = Footer::read_from_bytes(&m[footer_pos..]) 84 + .map_err(|_| io::Error::new(io::ErrorKind::InvalidData, "Invalid footer alignment"))?; 85 + 86 + // If magic is valid, proceed to parse the index 87 + let data_end = u64::from_le_bytes(footer.index_offset); 88 + let count = u32::from_le_bytes(footer.entry_count); 89 + let mut entries = Vec::with_capacity(count as usize); 90 + 91 + let mut cursor = data_end as usize; 92 + for _ in 0..count { 93 + let entry_bytes = m 94 + .get(cursor..cursor + ENTRY_SIZE) 95 + .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "Index out of bounds"))?; 96 + let entry = Entry::read_from_bytes(entry_bytes) 97 + .map_err(|_| io::Error::new(io::ErrorKind::InvalidData, "Invalid entry"))?; 98 + 99 + let n_len = u16::from_le_bytes(entry.name_len) as usize; 100 + let n_start = cursor + ENTRY_SIZE; 101 + let n_end = n_start + n_len; 102 + 103 + let name_bytes = m 104 + .get(n_start..n_end) 105 + .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "Name out of bounds"))?; 106 + let name = String::from_utf8_lossy(name_bytes).into_owned(); 107 + 108 + entries.push((entry, name)); 109 + 110 + let total = ENTRY_SIZE + n_len; 111 + cursor += (total + (BNDL_ALIGN - 1)) & !(BNDL_ALIGN - 1); 112 + } 113 + 114 + Ok(Self { 115 + file, 116 + mmap: Some(m), 117 + entries, 118 + data_end, 119 + }) 120 + } 121 + 122 + /// Reads data for an entry using Cow to avoid unnecessary copies. 123 + pub fn read<'a>(&'a self, name: &str) -> Option<Cow<'a, [u8]>> { 124 + let (entry, _) = self.entries.iter().find(|(_, n)| n == name)?; 125 + let mmap = self.mmap.as_ref()?; 126 + 127 + let offset = u64::from_le_bytes(entry.offset) as usize; 128 + let c_size = u64::from_le_bytes(entry.compressed_size) as usize; 129 + let u_size = u64::from_le_bytes(entry.uncompressed_size) as usize; 130 + 131 + let data = mmap.get(offset..offset + c_size)?; 132 + 133 + if entry.compression_type == 1 { 134 + let mut out = Vec::with_capacity(u_size); 135 + zstd::Decoder::new(data).ok()?.read_to_end(&mut out).ok()?; 136 + Some(Cow::Owned(out)) 137 + } else { 138 + Some(Cow::Borrowed(data)) 139 + } 140 + } 141 + 142 + /// Streams data directly to a writer (e.g., File, TcpStream) to keep memory usage low. 143 + pub fn read_to_writer<W: Write>(&self, name: &str, mut writer: W) -> io::Result<u64> { 144 + let (entry, _) = self 145 + .entries 146 + .iter() 147 + .find(|(_, n)| n == name) 148 + .ok_or_else(|| io::Error::new(io::ErrorKind::NotFound, "Entry not found"))?; 149 + 150 + let mmap = self 151 + .mmap 152 + .as_ref() 153 + .ok_or_else(|| io::Error::new(io::ErrorKind::Other, "Archive not mapped"))?; 154 + 155 + let offset = u64::from_le_bytes(entry.offset) as usize; 156 + let c_size = u64::from_le_bytes(entry.compressed_size) as usize; 157 + let data = mmap.get(offset..offset + c_size).ok_or_else(|| { 158 + io::Error::new(io::ErrorKind::InvalidData, "Data range out of bounds") 159 + })?; 160 + 161 + if entry.compression_type == 1 { 162 + let mut decoder = zstd::Decoder::new(data)?; 163 + io::copy(&mut decoder, &mut writer) 164 + } else { 165 + writer.write_all(data)?; 166 + Ok(data.len() as u64) 167 + } 168 + } 169 + 170 + pub fn add(&mut self, name: &str, data: &[u8], compress: bool) -> io::Result<()> { 171 + // 1. Prevent Duplicate Keys 172 + if self 173 + .entries 174 + .iter() 175 + .any(|(_, existing_name)| existing_name == name) 176 + { 177 + return Err(io::Error::new( 178 + io::ErrorKind::AlreadyExists, 179 + format!("Entry '{}' already exists in bindle", name), 180 + )); 181 + } 182 + 183 + // 2. Position the file pointer at the end of valid data 184 + // If data_end is 0, we start after the 8-byte Magic Header 185 + let write_pos = if self.data_end >= HEADER_SIZE { 186 + self.data_end 187 + } else { 188 + HEADER_SIZE 189 + }; 190 + 191 + self.file.seek(SeekFrom::Start(write_pos))?; 192 + 193 + // 3. Prepare and write data 194 + let write_data = if compress { 195 + zstd::encode_all(data, 3)? 196 + } else { 197 + data.to_vec() 198 + }; 199 + 200 + let start_offset = self.file.stream_position()?; 201 + self.file.write_all(&write_data)?; 202 + 203 + // 4. Align to 8 bytes for the next entry or index 204 + let current_pos = self.file.stream_position()?; 205 + let pad = (BNDL_ALIGN as u64 - (current_pos % BNDL_ALIGN as u64)) % BNDL_ALIGN as u64; 206 + if pad > 0 { 207 + self.file.write_all(&vec![0u8; pad as usize])?; 208 + } 209 + 210 + // 5. Update state 211 + self.data_end = self.file.stream_position()?; 212 + let entry = Entry { 213 + offset: start_offset.to_le_bytes(), 214 + compressed_size: (write_data.len() as u64).to_le_bytes(), 215 + uncompressed_size: (data.len() as u64).to_le_bytes(), 216 + crc32: crc32fast::hash(&write_data).to_le_bytes(), 217 + name_len: (name.len() as u16).to_le_bytes(), 218 + compression_type: if compress { 1 } else { 0 }, 219 + _reserved: 0, 220 + }; 221 + 222 + self.entries.push((entry, name.to_string())); 223 + Ok(()) 224 + } 225 + 226 + pub fn save(&mut self) -> io::Result<()> { 227 + self.file.seek(SeekFrom::Start(self.data_end))?; 228 + let index_start = self.data_end; 229 + 230 + for (entry, name) in &self.entries { 231 + self.file.write_all(entry.as_bytes())?; 232 + self.file.write_all(name.as_bytes())?; 233 + let current_disk_size = ENTRY_SIZE + name.len(); 234 + let pad = (BNDL_ALIGN - (current_disk_size % BNDL_ALIGN)) % BNDL_ALIGN; 235 + if pad > 0 { 236 + self.file.write_all(&vec![0u8; pad])?; 237 + } 238 + } 239 + 240 + let footer = Footer { 241 + index_offset: index_start.to_le_bytes(), 242 + entry_count: (self.entries.len() as u32).to_le_bytes(), 243 + }; 244 + 245 + self.file.write_all(footer.as_bytes())?; 246 + self.file.flush()?; 247 + 248 + self.mmap = Some(unsafe { Mmap::map(&self.file)? }); 249 + Ok(()) 250 + } 251 + 252 + /// Returns a list of all entry names in the archive. 253 + pub fn list(&self) -> Vec<&str> { 254 + self.entries.iter().map(|(_, name)| name.as_str()).collect() 255 + } 256 + 257 + /// Returns the number of entries. 258 + pub fn len(&self) -> usize { 259 + self.entries.len() 260 + } 261 + 262 + pub fn is_empty(&self) -> bool { 263 + self.entries.is_empty() 264 + } 265 + } 266 + 267 + #[cfg(test)] 268 + mod tests { 269 + use super::*; 270 + use std::fs; 271 + 272 + #[test] 273 + fn test_create_and_read() { 274 + let path = "test_basic.bindl"; 275 + let data = b"Hello, Bindle World!"; 276 + 277 + // 1. Create and Write 278 + { 279 + let mut fp = Bindle::open(path).expect("Failed to open"); 280 + fp.add("hello.txt", data, false).expect("Failed to add"); 281 + fp.save().expect("Failed to commit"); 282 + } 283 + 284 + // 2. Open and Read 285 + { 286 + let fp = Bindle::open(path).expect("Failed to re-open"); 287 + let result = fp.read("hello.txt").expect("File not found"); 288 + assert_eq!(result.as_ref(), data); 289 + } 290 + 291 + fs::remove_file(path).ok(); 292 + } 293 + 294 + #[test] 295 + fn test_zstd_compression() { 296 + let path = "test_zstd.bindl"; 297 + // Highly compressible data 298 + let data = vec![b'A'; 1000]; 299 + 300 + { 301 + let mut fp = Bindle::open(path).expect("Failed to open"); 302 + fp.add("large.bin", &data, true).expect("Failed to add"); 303 + fp.save().expect("Failed to commit"); 304 + } 305 + 306 + let fp = Bindle::open(path).expect("Failed to re-open"); 307 + 308 + // Ensure data is correct 309 + let result = fp.read("large.bin").expect("File not found"); 310 + assert_eq!(result, data); 311 + 312 + // Ensure the file on disk is actually smaller than the raw data (including headers) 313 + let meta = fs::metadata(path).unwrap(); 314 + assert!(meta.len() < 1000); 315 + 316 + fs::remove_file(path).ok(); 317 + } 318 + 319 + #[test] 320 + fn test_append_functionality() { 321 + let path = "test_append.bindl"; 322 + let _ = std::fs::remove_file(path); 323 + 324 + // 1. Initial creation 325 + { 326 + let mut fp = Bindle::open(path).expect("Fail open 1"); 327 + fp.add("1.txt", b"First", false).unwrap(); 328 + fp.save().expect("Fail commit 1"); 329 + } // File handle closed here 330 + 331 + // 2. Append session 332 + { 333 + let mut fp = Bindle::open(path).expect("Fail open 2"); 334 + // At this point, entries contains "1.txt" 335 + 336 + fp.add("2.txt", b"Second", false).unwrap(); 337 + fp.save().expect("Fail commit 2"); 338 + 339 + // Now test the read 340 + let first = fp.read("1.txt").expect("Could not find 1.txt"); 341 + let second = fp.read("2.txt").expect("Could not find 2.txt"); 342 + 343 + assert_eq!(first.as_ref(), b"First"); 344 + assert_eq!(second.as_ref(), b"Second"); 345 + } 346 + let _ = std::fs::remove_file(path); 347 + } 348 + 349 + #[test] 350 + fn test_invalid_magic() { 351 + let path = "invalid.bindl"; 352 + fs::write(path, b"NOT_A_PACK_FILE_AT_ALL").unwrap(); 353 + 354 + let res = Bindle::open(path); 355 + assert!(res.is_err()); 356 + 357 + fs::remove_file(path).ok(); 358 + } 359 + }