an efficient binary archive format
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

add c re-implementation for bindle.h, testing

zach 170d2a25 58fc65f7

+652 -218
+5
.gitignore
··· 1 1 /target 2 + Cargo.lock 3 + *.bndl 4 + c/bindle_c 5 + c/test 6 + c/input.txt
+3
Makefile
··· 1 + test: 2 + cargo test 3 + make -C c test
+1 -1
SPEC.md
··· 1 - # Bindle File Format (.bdnl) 1 + # Bindle File Format (.bndl) 2 2 3 3 Bindle is a simple append-only binary archive format. It features a trailing index to support efficient writes and memory-mapped reads. 4 4
+13 -14
bindle.h
··· 12 12 #include <stdint.h> 13 13 #include <stdbool.h> 14 14 15 - /** 16 - * Opaque handle to a Bindle archive. 17 - */ 18 - typedef struct BindleContext BindleContext; 15 + typedef struct Bindle Bindle; 19 16 20 - struct BindleContext *bindle_open(const char *path); 17 + struct Bindle *bindle_open(const char *path); 21 18 22 19 /** 23 20 * Adds a new entry. Returns true on success. 24 21 */ 25 - bool bindle_add(struct BindleContext *ctx, 22 + bool bindle_add(struct Bindle *ctx, 26 23 const char *name, 27 24 const uint8_t *data, 28 25 size_t data_len, ··· 31 28 /** 32 29 * Commits changes to disk. 33 30 */ 34 - bool bindle_save(struct BindleContext *ctx); 31 + bool bindle_save(struct Bindle *ctx); 35 32 36 33 /** 37 34 * Frees BindleContext 38 35 */ 39 - void bindle_free(struct BindleContext *ctx); 36 + void bindle_close(struct Bindle *ctx); 37 + 38 + uint8_t *bindle_read(struct Bindle *ctx_ptr, const char *name, size_t *out_len); 40 39 41 - uint8_t *bindle_read(struct BindleContext *ctx, const char *name, size_t *out_len); 40 + void bindle_free_buffer(uint8_t *ptr); 42 41 43 - const uint8_t *bindle_read_uncompressed_direct(struct BindleContext *ctx, 42 + const uint8_t *bindle_read_uncompressed_direct(struct Bindle *ctx, 44 43 const char *name, 45 44 size_t *out_len); 46 45 47 - void bindle_free_buffer(uint8_t *ptr, size_t len); 48 - 49 - size_t bindle_length(const struct BindleContext *ctx); 46 + size_t bindle_length(const struct Bindle *ctx); 50 47 51 48 /** 52 49 * Returns the name of the entry at the given index. 53 50 * The string is owned by the Bindle; the caller must NOT free it. 54 51 */ 55 - const char *bindle_entry_name(const struct BindleContext *ctx, size_t index, size_t *len); 52 + const char *bindle_entry_name(const struct Bindle *ctx, size_t index, size_t *len); 53 + 54 + bool bindle_vacuum(struct Bindle *ctx); 56 55 57 56 #endif /* BINDLE_H */
+5
c/Makefile
··· 1 + .PHONY: test 2 + test: 3 + clang -I .. -o bindle_c bindle-cli.c bindle.c `pkg-config --cflags --libs libzstd` 4 + python3 test.py a.bindl 5 +
+86
c/bindle-cli.c
··· 1 + #include "../bindle.h" 2 + #include <stdio.h> 3 + #include <stdlib.h> 4 + #include <string.h> 5 + 6 + void print_usage(const char *prog) { 7 + printf("Usage: %s <bindle_file> <command> [args]\n", prog); 8 + printf("Commands:\n"); 9 + printf(" list List all entries\n"); 10 + printf(" cat <name> Output entry content to stdout\n"); 11 + printf(" add <name> <file> Add a file to the archive\n"); 12 + } 13 + 14 + int main(int argc, char **argv) { 15 + if (argc < 3) { 16 + print_usage(argv[0]); 17 + return 1; 18 + } 19 + 20 + const char *db_path = argv[2]; 21 + const char *cmd = argv[1]; 22 + 23 + Bindle *b = bindle_open(db_path); 24 + if (!b) { 25 + fprintf(stderr, "Error: Could not open or create bindle '%s'\n", db_path); 26 + return 1; 27 + } 28 + 29 + if (strcmp(cmd, "list") == 0) { 30 + uint64_t count = bindle_length(b); 31 + printf("%-20s\n", "NAME"); 32 + printf("----------------------------------------------------------\n"); 33 + size_t namelen = 0; 34 + for (uint64_t i = 0; i < count; i++) { 35 + const char *name = bindle_entry_name(b, i, &namelen); 36 + printf("%*s\n", (int)namelen, name); 37 + } 38 + } else if (strcmp(cmd, "cat") == 0) { 39 + if (argc < 4) { 40 + fprintf(stderr, "Error: 'cat' requires an entry name\n"); 41 + return 1; 42 + } 43 + size_t out_len = 0; 44 + uint8_t *data = bindle_read(b, argv[3], &out_len); 45 + if (data) { 46 + fwrite(data, 1, out_len, stdout); 47 + free(data); 48 + } else { 49 + fprintf(stderr, "Error: Entry '%s' not found\n", argv[3]); 50 + return 1; 51 + } 52 + } else if (strcmp(cmd, "add") == 0) { 53 + if (argc < 5) { 54 + fprintf(stderr, "Error: 'add' requires <name> and <file_path>\n"); 55 + return 1; 56 + } 57 + 58 + FILE *inf = fopen(argv[4], "rb"); 59 + if (!inf) { 60 + perror("fopen"); 61 + return 1; 62 + } 63 + 64 + fseek(inf, 0, SEEK_END); 65 + size_t len = ftell(inf); 66 + fseek(inf, 0, SEEK_SET); 67 + 68 + uint8_t *buf = malloc(len); 69 + fread(buf, 1, len, inf); 70 + fclose(inf); 71 + 72 + // We'll default to compression (true) for the CLI 73 + if (bindle_add(b, argv[3], buf, len, true)) { 74 + bindle_save(b); 75 + fprintf(stderr, "Added '%s' successfully.\n", argv[3]); 76 + } else { 77 + fprintf(stderr, "Error: Failed to add '%s' (duplicate name?)\n", argv[3]); 78 + } 79 + free(buf); 80 + } else { 81 + print_usage(argv[0]); 82 + } 83 + 84 + bindle_close(b); 85 + return 0; 86 + }
+285
c/bindle.c
··· 1 + #include "bindle.h" 2 + 3 + #include <stdio.h> 4 + #include <stdlib.h> 5 + #include <string.h> 6 + #include <sys/file.h> 7 + #include <zstd.h> 8 + 9 + #define BNDL_MAGIC "BINDL001" 10 + #define BNDL_ALIGN 8 11 + #define ALIGN_UP(n, m) (((n) + (m) - 1) & ~((m) - 1)) 12 + 13 + /* --- Private Disk Structures --- */ 14 + #pragma pack(push, 1) 15 + typedef struct { 16 + uint64_t offset; 17 + uint64_t compressed_size; 18 + uint64_t uncompressed_size; 19 + uint32_t crc32; 20 + uint16_t name_len; 21 + uint8_t compression_type; 22 + uint8_t _reserved; 23 + } BindleEntryRaw; 24 + 25 + typedef struct { 26 + uint64_t index_offset; 27 + uint64_t entry_count; 28 + } BindleFooterRaw; 29 + #pragma pack(pop) 30 + 31 + /* --- Private In-Memory Structures --- */ 32 + typedef struct { 33 + BindleEntryRaw meta; 34 + char *name; 35 + } BindleEntry; 36 + 37 + struct Bindle { 38 + char *path; 39 + FILE *fp; 40 + BindleEntry *entries; 41 + uint64_t count; 42 + uint64_t data_end; 43 + }; 44 + 45 + /* --- API Implementation --- */ 46 + 47 + Bindle *bindle_open(const char *path) { 48 + FILE *fp = fopen(path, "r+b"); 49 + if (!fp) 50 + fp = fopen(path, "w+b"); 51 + if (!fp) 52 + return NULL; 53 + 54 + flock(fileno(fp), LOCK_SH); 55 + 56 + Bindle *b = calloc(1, sizeof(Bindle)); 57 + b->path = strdup(path); 58 + b->fp = fp; 59 + 60 + fseek(fp, 0, SEEK_END); 61 + long file_size = ftell(fp); 62 + 63 + if (file_size == 0) { 64 + fwrite(BNDL_MAGIC, 8, 1, fp); 65 + b->data_end = 8; 66 + return b; 67 + } 68 + 69 + // Header check 70 + char magic[8]; 71 + fseek(fp, 0, SEEK_SET); 72 + if (fread(magic, 8, 1, fp) != 1 || memcmp(magic, BNDL_MAGIC, 8) != 0) { 73 + bindle_close(b); 74 + return NULL; 75 + } 76 + 77 + // Footer parse 78 + BindleFooterRaw footer; 79 + fseek(fp, file_size - sizeof(BindleFooterRaw), SEEK_SET); 80 + if (fread(&footer, sizeof(BindleFooterRaw), 1, fp) != 1) { 81 + bindle_close(b); 82 + return NULL; 83 + } 84 + 85 + b->count = footer.entry_count; 86 + b->data_end = footer.index_offset; 87 + b->entries = malloc(sizeof(BindleEntry) * b->count); 88 + 89 + // Index parse 90 + fseek(fp, footer.index_offset, SEEK_SET); 91 + for (uint64_t i = 0; i < b->count; i++) { 92 + fread(&b->entries[i].meta, sizeof(BindleEntryRaw), 1, fp); 93 + b->entries[i].name = malloc(b->entries[i].meta.name_len + 1); 94 + fread(b->entries[i].name, b->entries[i].meta.name_len, 1, fp); 95 + b->entries[i].name[b->entries[i].meta.name_len] = '\0'; 96 + 97 + size_t consumed = sizeof(BindleEntryRaw) + b->entries[i].meta.name_len; 98 + fseek(fp, ALIGN_UP(consumed, BNDL_ALIGN) - consumed, SEEK_CUR); 99 + } 100 + return b; 101 + } 102 + 103 + bool bindle_add(Bindle *b, const char *name, const uint8_t *data, size_t len, 104 + bool compress) { 105 + if (!b || !name) 106 + return false; 107 + 108 + size_t c_size = len; 109 + void *write_ptr = (void *)data; 110 + void *comp_buf = NULL; 111 + 112 + if (compress) { 113 + size_t bound = ZSTD_compressBound(len); 114 + comp_buf = malloc(bound); 115 + c_size = ZSTD_compress(comp_buf, bound, data, len, 3); 116 + if (ZSTD_isError(c_size)) { 117 + free(comp_buf); 118 + return false; 119 + } 120 + write_ptr = comp_buf; 121 + } 122 + 123 + // 1. Write data at current data_end 124 + fseek(b->fp, b->data_end, SEEK_SET); 125 + uint64_t offset = ftell(b->fp); 126 + fwrite(write_ptr, 1, c_size, b->fp); 127 + 128 + // 2. Align data_end 129 + size_t pad = ALIGN_UP(c_size, BNDL_ALIGN) - c_size; 130 + if (pad > 0) { 131 + uint8_t zero[8] = {0}; 132 + fwrite(zero, 1, pad, b->fp); 133 + } 134 + b->data_end = ftell(b->fp); 135 + 136 + // 3. Shadowing: Check if name already exists 137 + for (uint64_t i = 0; i < b->count; i++) { 138 + if (strcmp(b->entries[i].name, name) == 0) { 139 + b->entries[i].meta.offset = offset; 140 + b->entries[i].meta.compressed_size = c_size; 141 + b->entries[i].meta.uncompressed_size = len; 142 + b->entries[i].meta.compression_type = compress ? 1 : 0; 143 + if (comp_buf) 144 + free(comp_buf); 145 + return true; 146 + } 147 + } 148 + 149 + // 4. New Entry 150 + b->entries = realloc(b->entries, sizeof(BindleEntry) * (b->count + 1)); 151 + BindleEntry *e = &b->entries[b->count++]; 152 + e->name = strdup(name); 153 + e->meta = (BindleEntryRaw){ 154 + offset, c_size, len, 0, (uint16_t)strlen(name), compress ? 1 : 0, 0}; 155 + 156 + if (comp_buf) 157 + free(comp_buf); 158 + return true; 159 + } 160 + 161 + uint8_t *bindle_read(Bindle *b, const char *name, size_t *out_len) { 162 + for (uint64_t i = 0; i < b->count; i++) { 163 + if (strcmp(b->entries[i].name, name) == 0) { 164 + BindleEntryRaw *m = &b->entries[i].meta; 165 + uint8_t *c_buf = malloc(m->compressed_size); 166 + fseek(b->fp, m->offset, SEEK_SET); 167 + fread(c_buf, 1, m->compressed_size, b->fp); 168 + 169 + if (m->compression_type == 1) { 170 + uint8_t *u_buf = malloc(m->uncompressed_size); 171 + size_t actual = ZSTD_decompress(u_buf, m->uncompressed_size, c_buf, 172 + m->compressed_size); 173 + free(c_buf); 174 + *out_len = actual; 175 + return u_buf; 176 + } 177 + *out_len = m->compressed_size; 178 + return c_buf; 179 + } 180 + } 181 + return NULL; 182 + } 183 + 184 + const uint8_t *bindle_read_uncompressed_direct(Bindle *b, const char *name, 185 + size_t *out_len) { 186 + for (uint64_t i = 0; i < b->count; i++) { 187 + if (strcmp(b->entries[i].name, name) == 0) { 188 + BindleEntryRaw *m = &b->entries[i].meta; 189 + if (m->compression_type != 0) 190 + return NULL; 191 + 192 + uint8_t *buf = malloc(m->uncompressed_size); 193 + fseek(b->fp, m->offset, SEEK_SET); 194 + fread(buf, 1, m->uncompressed_size, b->fp); 195 + *out_len = m->uncompressed_size; 196 + return buf; 197 + } 198 + } 199 + return NULL; 200 + } 201 + 202 + bool bindle_save(Bindle *b) { 203 + flock(fileno(b->fp), LOCK_EX); 204 + fseek(b->fp, b->data_end, SEEK_SET); 205 + uint64_t index_start = b->data_end; 206 + 207 + for (uint64_t i = 0; i < b->count; i++) { 208 + fwrite(&b->entries[i].meta, sizeof(BindleEntryRaw), 1, b->fp); 209 + fwrite(b->entries[i].name, 1, b->entries[i].meta.name_len, b->fp); 210 + size_t consumed = sizeof(BindleEntryRaw) + b->entries[i].meta.name_len; 211 + size_t pad = ALIGN_UP(consumed, BNDL_ALIGN) - consumed; 212 + if (pad > 0) { 213 + uint8_t zero[8] = {0}; 214 + fwrite(zero, 1, pad, b->fp); 215 + } 216 + } 217 + 218 + BindleFooterRaw footer = {index_start, b->count}; 219 + fwrite(&footer, sizeof(BindleFooterRaw), 1, b->fp); 220 + fflush(b->fp); 221 + flock(fileno(b->fp), LOCK_SH); 222 + return true; 223 + } 224 + 225 + bool bindle_vacuum(Bindle *b) { 226 + char tmp_path[1024]; 227 + snprintf(tmp_path, sizeof(tmp_path), "%s.tmp", b->path); 228 + FILE *out = fopen(tmp_path, "wb"); 229 + if (!out) 230 + return false; 231 + 232 + fwrite(BNDL_MAGIC, 8, 1, out); 233 + uint64_t current_offset = 8; 234 + 235 + for (uint64_t i = 0; i < b->count; i++) { 236 + uint64_t size = b->entries[i].meta.compressed_size; 237 + uint8_t *buf = malloc(size); 238 + fseek(b->fp, b->entries[i].meta.offset, SEEK_SET); 239 + fread(buf, 1, size, b->fp); 240 + 241 + fseek(out, current_offset, SEEK_SET); 242 + fwrite(buf, 1, size, out); 243 + 244 + b->entries[i].meta.offset = current_offset; 245 + 246 + size_t pad = ALIGN_UP(size, BNDL_ALIGN) - size; 247 + if (pad > 0) { 248 + uint8_t zero[8] = {0}; 249 + fwrite(zero, 1, pad, out); 250 + } 251 + current_offset += size + pad; 252 + free(buf); 253 + } 254 + 255 + fclose(b->fp); 256 + b->fp = out; 257 + b->data_end = current_offset; 258 + bindle_save(b); // Finalize index in new file 259 + 260 + rename(tmp_path, b->path); 261 + return true; 262 + } 263 + 264 + size_t bindle_length(const Bindle *b) { return b ? b->count : 0; } 265 + 266 + const char *bindle_entry_name(const Bindle *b, size_t index, size_t *namelen) { 267 + if (!b || index >= b->count) 268 + return NULL; 269 + *namelen = b->entries[index].meta.name_len; 270 + return b->entries[index].name; 271 + } 272 + 273 + void bindle_free_buffer(uint8_t *ptr) { free(ptr); } 274 + 275 + void bindle_close(Bindle *b) { 276 + if (!b) 277 + return; 278 + flock(fileno(b->fp), LOCK_UN); 279 + for (uint64_t i = 0; i < b->count; i++) 280 + free(b->entries[i].name); 281 + free(b->entries); 282 + fclose(b->fp); 283 + free(b->path); 284 + free(b); 285 + }
+2
c/compile_flags.txt
··· 1 + -I.. 2 + -I/opt/homebrew/include
+39
c/test.py
··· 1 + import subprocess 2 + import os 3 + import hashlib 4 + 5 + def get_hash(data): 6 + return hashlib.sha256(data).hexdigest() 7 + 8 + def run_test(): 9 + test_file = "compat_test.bndl" 10 + secret_content = b"Consistency is the playground of the gods." 11 + with open("input.txt", "wb") as f: 12 + f.write(secret_content) 13 + 14 + print("--- Phase 1: Rust Create -> C Read ---") 15 + # 1. Create with Rust 16 + subprocess.run(["cargo", "run", "--", "add",test_file, "msg", "input.txt", "--compress"], check=True) 17 + 18 + # 2. Read with C (Assuming you compiled the C example to ./bindle_c) 19 + result_c = subprocess.run(["./bindle_c", "cat", test_file, "msg"], capture_output=True) 20 + 21 + if get_hash(result_c.stdout) == get_hash(secret_content): 22 + print("✅ SUCCESS: C successfully read Rust-compressed data.") 23 + else: 24 + print("❌ FAIL: C output does not match original content.") 25 + 26 + print("\n--- Phase 2: C Create -> Rust Read ---") 27 + # 3. Use C to add a different file (Assuming your C binary has an 'add' command) 28 + subprocess.run(["./bindle_c", "add", test_file, "c_msg", "input.txt", "1"], check=True) # 1 for compress 29 + 30 + # 4. Use Rust to list and verify 31 + result_rust = subprocess.run(["cargo", "run", "--", "cat", test_file, "c_msg"], capture_output=True) 32 + 33 + if get_hash(result_rust.stdout) == get_hash(secret_content): 34 + print("✅ SUCCESS: Rust successfully read C-compressed data.") 35 + else: 36 + print("❌ FAIL: Rust output does not match.") 37 + 38 + if __name__ == "__main__": 39 + run_test()
+1 -1
src/bin/bindle.rs
··· 81 81 ); 82 82 println!("{}", "-".repeat(60)); 83 83 84 - for (entry, name) in b.entries() { 84 + for (name, entry) in b.index().iter() { 85 85 let size = u64::from_le_bytes(entry.uncompressed_size); 86 86 let packed = u64::from_le_bytes(entry.compressed_size); 87 87
+94 -45
src/ffi.rs
··· 1 + use std::alloc::{Layout, dealloc}; 1 2 use std::ffi::CStr; 3 + use std::mem; 2 4 use std::os::raw::c_char; 3 5 use std::slice; 4 6 5 7 use crate::Bindle; 6 8 7 - /// Opaque handle to a Bindle archive. 8 - pub struct BindleContext { 9 - pub(crate) inner: Bindle, 10 - } 11 - 12 9 #[unsafe(no_mangle)] 13 - pub unsafe extern "C" fn bindle_open(path: *const c_char) -> *mut BindleContext { 10 + pub unsafe extern "C" fn bindle_open(path: *const c_char) -> *mut Bindle { 14 11 if path.is_null() { 15 12 return std::ptr::null_mut(); 16 13 } ··· 24 21 }; 25 22 26 23 match Bindle::open(path_str) { 27 - Ok(b) => Box::into_raw(Box::new(BindleContext { inner: b })), 24 + Ok(b) => Box::into_raw(Box::new(b)), 28 25 Err(_) => std::ptr::null_mut(), 29 26 } 30 27 } ··· 32 29 /// Adds a new entry. Returns true on success. 33 30 #[unsafe(no_mangle)] 34 31 pub unsafe extern "C" fn bindle_add( 35 - ctx: *mut BindleContext, 32 + ctx: *mut Bindle, 36 33 name: *const c_char, 37 34 data: *const u8, 38 35 data_len: usize, ··· 49 46 }; 50 47 51 48 let data_slice = slice::from_raw_parts(data, data_len); 52 - let b = &mut (*ctx).inner; 49 + let b = &mut (*ctx); 53 50 54 51 b.add(name_str, data_slice, compress).is_ok() 55 52 } ··· 57 54 58 55 /// Commits changes to disk. 59 56 #[unsafe(no_mangle)] 60 - pub unsafe extern "C" fn bindle_save(ctx: *mut BindleContext) -> bool { 57 + pub unsafe extern "C" fn bindle_save(ctx: *mut Bindle) -> bool { 61 58 if ctx.is_null() { 62 59 return false; 63 60 } 64 61 unsafe { 65 - let b = &mut (*ctx).inner; 62 + let b = &mut (*ctx); 66 63 b.save().is_ok() 67 64 } 68 65 } 69 66 70 67 /// Frees BindleContext 71 68 #[unsafe(no_mangle)] 72 - pub unsafe extern "C" fn bindle_free(ctx: *mut BindleContext) { 69 + pub unsafe extern "C" fn bindle_close(ctx: *mut Bindle) { 73 70 if ctx.is_null() { 74 71 return; 75 72 } ··· 78 75 79 76 #[unsafe(no_mangle)] 80 77 pub unsafe extern "C" fn bindle_read( 81 - ctx: *mut BindleContext, 78 + ctx_ptr: *mut Bindle, 82 79 name: *const c_char, 83 80 out_len: *mut usize, 84 81 ) -> *mut u8 { 85 - if ctx.is_null() || name.is_null() || out_len.is_null() { 86 - return std::ptr::null_mut(); 87 - } 82 + unsafe { 83 + if ctx_ptr.is_null() || name.is_null() { 84 + return std::ptr::null_mut(); 85 + } 88 86 89 - unsafe { 90 - let name_str = match CStr::from_ptr(name).to_str() { 87 + // 1. Convert the C string to a Rust &str 88 + let c_str = std::ffi::CStr::from_ptr(name); 89 + let name_str = match c_str.to_str() { 91 90 Ok(s) => s, 92 91 Err(_) => return std::ptr::null_mut(), 93 92 }; 94 93 95 - let b = &(*ctx).inner; 94 + // 2. Access your Rust Bindle struct 95 + let ctx = &mut *ctx_ptr; 96 + 97 + // 3. The actual data retrieval logic 98 + // (Assuming your Rust Bindle has a method like .get(name)) 99 + match ctx.read(name_str) { 100 + Some(bytes) => wrap_in_ffi_header(bytes.as_ref(), out_len), 101 + None => return std::ptr::null_mut(), 102 + } 103 + } 104 + } 105 + 106 + /// Internal helper to perform the "Hidden Header" allocation 107 + unsafe fn wrap_in_ffi_header(data: &[u8], out_len: *mut usize) -> *mut u8 { 108 + unsafe { 109 + let len = data.len(); 110 + if !out_len.is_null() { 111 + *out_len = len; 112 + } 113 + 114 + let size_of_header = std::mem::size_of::<usize>(); 115 + let total_size = size_of_header + len; 116 + let layout = 117 + std::alloc::Layout::from_size_align(total_size, std::mem::align_of::<usize>()).unwrap(); 118 + 119 + let raw_ptr = std::alloc::alloc(layout); 120 + if raw_ptr.is_null() { 121 + return std::ptr::null_mut(); 122 + } 123 + 124 + // Store the length at the start 125 + *(raw_ptr as *mut usize) = len; 126 + 127 + // Copy data to the payload area 128 + let data_ptr = raw_ptr.add(size_of_header); 129 + std::ptr::copy_nonoverlapping(data.as_ptr(), data_ptr, len); 96 130 97 - if let Some(data) = b.read(name_str) { 98 - let mut bytes = data.to_vec(); 99 - bytes.shrink_to_fit(); 100 - let ptr = bytes.as_mut_ptr(); 101 - *out_len = bytes.len(); 102 - std::mem::forget(bytes); 103 - ptr 104 - } else { 105 - std::ptr::null_mut() 131 + data_ptr 132 + } 133 + } 134 + 135 + #[unsafe(no_mangle)] 136 + pub unsafe extern "C" fn bindle_free_buffer(ptr: *mut u8) { 137 + unsafe { 138 + if ptr.is_null() { 139 + return; 106 140 } 141 + 142 + let size_of_header = mem::size_of::<usize>(); 143 + 144 + // 1. Step back to find the start of the header 145 + let raw_ptr = ptr.sub(size_of_header); 146 + 147 + // 2. Read the length we stored there 148 + let len = *(raw_ptr as *const usize); 149 + 150 + // 3. Reconstruct the layout used during allocation 151 + let total_size = size_of_header + len; 152 + let layout = Layout::from_size_align(total_size, mem::align_of::<usize>()).unwrap(); 153 + 154 + // 4. Deallocate the entire block 155 + dealloc(raw_ptr, layout); 107 156 } 108 157 } 109 158 110 159 #[unsafe(no_mangle)] 111 160 pub unsafe extern "C" fn bindle_read_uncompressed_direct( 112 - ctx: *mut BindleContext, 161 + ctx: *mut Bindle, 113 162 name: *const c_char, 114 163 out_len: *mut usize, 115 164 ) -> *const u8 { ··· 123 172 Err(_) => return std::ptr::null_mut(), 124 173 }; 125 174 126 - let b = &(*ctx).inner; 175 + let b = &(*ctx); 127 176 if let Some(data) = b.read(name_str) { 128 177 match data { 129 178 std::borrow::Cow::Borrowed(bytes) => bytes.as_ptr(), ··· 136 185 } 137 186 138 187 #[unsafe(no_mangle)] 139 - pub unsafe extern "C" fn bindle_free_buffer(ptr: *mut u8, len: usize) { 140 - if !ptr.is_null() { 141 - unsafe { 142 - let _ = Vec::from_raw_parts(ptr, len, len); 143 - } 144 - } 145 - } 146 - 147 - #[unsafe(no_mangle)] 148 - pub unsafe extern "C" fn bindle_length(ctx: *const BindleContext) -> usize { 188 + pub unsafe extern "C" fn bindle_length(ctx: *const Bindle) -> usize { 149 189 if ctx.is_null() { 150 190 return 0; 151 191 } 152 - unsafe { (*ctx).inner.len() } 192 + unsafe { (*ctx).len() } 153 193 } 154 194 155 195 /// Returns the name of the entry at the given index. 156 196 /// The string is owned by the Bindle; the caller must NOT free it. 157 197 #[unsafe(no_mangle)] 158 198 pub unsafe extern "C" fn bindle_entry_name( 159 - ctx: *const BindleContext, 199 + ctx: *const Bindle, 160 200 index: usize, 161 201 len: *mut usize, 162 202 ) -> *const c_char { ··· 164 204 return std::ptr::null(); 165 205 } 166 206 167 - let b = unsafe { &(*ctx).inner }; 168 - match b.entries.get(index) { 169 - Some((_, name)) => { 207 + let b = unsafe { &(*ctx) }; 208 + match b.index.iter().nth(index) { 209 + Some((name, _)) => { 170 210 unsafe { 171 211 *len = name.as_bytes().len(); 172 212 } ··· 175 215 None => std::ptr::null(), 176 216 } 177 217 } 218 + 219 + #[unsafe(no_mangle)] 220 + pub unsafe extern "C" fn bindle_vacuum(ctx: *mut Bindle) -> bool { 221 + if ctx.is_null() { 222 + return false; 223 + } 224 + let b = unsafe { &mut (*ctx) }; 225 + b.vacuum().is_ok() 226 + }
+118 -157
src/lib.rs
··· 1 1 use fs2::FileExt; 2 2 use memmap2::Mmap; 3 3 use std::borrow::Cow; 4 + use std::collections::BTreeMap; 4 5 use std::fs::{File, OpenOptions}; 5 6 use std::io::{self, Read, Seek, SeekFrom, Write}; 6 - use std::path::Path; 7 + use std::path::{Path, PathBuf}; 7 8 use zerocopy::{FromBytes, Immutable, IntoBytes, Unaligned}; 8 9 9 10 mod ffi; 10 11 11 12 const BNDL_MAGIC: &[u8; 8] = b"BINDL001"; 12 13 const BNDL_ALIGN: usize = 8; 13 - const ENTRY_SIZE: usize = std::mem::size_of::<Entry>(); 14 - const FOOTER_SIZE: usize = std::mem::size_of::<Footer>(); 14 + const ENTRY_SIZE: usize = std::mem::size_of::<BindleEntry>(); 15 + const FOOTER_SIZE: usize = std::mem::size_of::<BindleFooter>(); 15 16 const HEADER_SIZE: u64 = 8; 16 17 17 18 #[repr(C, packed)] 18 - #[derive(FromBytes, Unaligned, IntoBytes, Immutable, Clone, Copy, Debug)] 19 - pub struct Entry { 20 - pub offset: [u8; 8], 19 + #[derive(FromBytes, Unaligned, IntoBytes, Immutable, Clone, Copy, Debug, Default)] 20 + pub struct BindleEntry { 21 + pub offset: [u8; 8], // Use [u8; 8] for disk stability 21 22 pub compressed_size: [u8; 8], 22 23 pub uncompressed_size: [u8; 8], 23 24 pub crc32: [u8; 4], ··· 26 27 pub _reserved: u8, 27 28 } 28 29 30 + // Add helpers to convert back to numbers for Rust logic 31 + impl BindleEntry { 32 + pub fn offset(&self) -> u64 { 33 + u64::from_le_bytes(self.offset) 34 + } 35 + 36 + pub fn compressed_size(&self) -> u64 { 37 + u64::from_le_bytes(self.compressed_size) 38 + } 39 + 40 + pub fn uncompressed_size(&self) -> u64 { 41 + u64::from_le_bytes(self.uncompressed_size) 42 + } 43 + 44 + pub fn name_len(&self) -> usize { 45 + u16::from_le_bytes(self.name_len) as usize 46 + } 47 + } 48 + 29 49 #[repr(C, packed)] 30 50 #[derive(FromBytes, Unaligned, IntoBytes, Immutable, Debug)] 31 - struct Footer { 32 - pub index_offset: [u8; 8], 33 - pub entry_count: [u8; 8], 51 + struct BindleFooter { 52 + pub index_offset: u64, 53 + pub entry_count: u64, 34 54 } 35 55 36 56 pub struct Bindle { 57 + path: PathBuf, 37 58 file: File, 38 59 mmap: Option<Mmap>, 39 - entries: Vec<(Entry, String)>, 60 + index: BTreeMap<String, BindleEntry>, 40 61 data_end: u64, 41 62 } 42 63 43 64 impl Bindle { 44 65 pub fn open<P: AsRef<Path>>(path: P) -> io::Result<Self> { 66 + let path_buf = path.as_ref().to_path_buf(); 45 67 let mut file = OpenOptions::new() 46 68 .read(true) 47 69 .write(true) 48 70 .create(true) 49 - .open(path)?; 50 - 71 + .open(&path_buf)?; 51 72 file.lock_shared()?; 52 73 53 74 let len = file.metadata()?.len(); 54 - 55 75 if len == 0 { 56 - // New file: Write the magic header immediately 57 76 file.write_all(BNDL_MAGIC)?; 58 77 return Ok(Self { 78 + path: path_buf, 59 79 file, 60 80 mmap: None, 61 - entries: Vec::new(), 81 + index: BTreeMap::new(), 62 82 data_end: HEADER_SIZE, 63 83 }); 64 84 } 65 85 66 - // Existing file: Check header magic 67 86 let mut header = [0u8; 8]; 68 87 file.read_exact(&mut header)?; 69 88 if &header != BNDL_MAGIC { 70 - return Err(io::Error::new( 71 - io::ErrorKind::InvalidData, 72 - "Invalid Bindle header", 73 - )); 74 - } 75 - // Case 2: File exists but is too small to even hold a footer 76 - if len < FOOTER_SIZE as u64 { 77 - return Err(io::Error::new( 78 - io::ErrorKind::InvalidData, 79 - "File too small to be a Bindle", 80 - )); 89 + return Err(io::Error::new(io::ErrorKind::InvalidData, "Invalid header")); 81 90 } 82 91 83 92 let m = unsafe { Mmap::map(&file)? }; 84 93 let footer_pos = m.len() - FOOTER_SIZE; 85 - 86 - let footer = Footer::read_from_bytes(&m[footer_pos..]) 87 - .map_err(|_| io::Error::new(io::ErrorKind::InvalidData, "Invalid footer alignment"))?; 94 + let footer = BindleFooter::read_from_bytes(&m[footer_pos..]).unwrap(); 88 95 89 - // If magic is valid, proceed to parse the index 90 - let data_end = u64::from_le_bytes(footer.index_offset); 91 - let count = u64::from_le_bytes(footer.entry_count); 92 - let mut entries = Vec::with_capacity(count as usize); 96 + let data_end = footer.index_offset; 97 + let count = footer.entry_count; 98 + let mut index = BTreeMap::new(); 93 99 94 100 let mut cursor = data_end as usize; 95 101 for _ in 0..count { 96 - let entry_bytes = m 97 - .get(cursor..cursor + ENTRY_SIZE) 98 - .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "Index out of bounds"))?; 99 - let entry = Entry::read_from_bytes(entry_bytes) 100 - .map_err(|_| io::Error::new(io::ErrorKind::InvalidData, "Invalid entry"))?; 101 - 102 - let n_len = u16::from_le_bytes(entry.name_len) as usize; 102 + let entry = BindleEntry::read_from_bytes(&m[cursor..cursor + ENTRY_SIZE]).unwrap(); 103 103 let n_start = cursor + ENTRY_SIZE; 104 - let n_end = n_start + n_len; 105 - 106 - let name_bytes = m 107 - .get(n_start..n_end) 108 - .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "Name out of bounds"))?; 109 - let name = String::from_utf8_lossy(name_bytes).into_owned(); 110 - 111 - entries.push((entry, name)); 112 - 113 - let total = ENTRY_SIZE + n_len; 104 + let name = 105 + String::from_utf8_lossy(&m[n_start..n_start + entry.name_len()]).into_owned(); 106 + index.insert(name, entry); 107 + let total = ENTRY_SIZE + entry.name_len(); 114 108 cursor += (total + (BNDL_ALIGN - 1)) & !(BNDL_ALIGN - 1); 115 109 } 116 110 117 111 Ok(Self { 112 + path: path_buf, 118 113 file, 119 114 mmap: Some(m), 120 - entries, 115 + index, 121 116 data_end, 122 117 }) 123 118 } 124 119 125 - /// Reads data for an entry using Cow to avoid unnecessary copies. 126 - pub fn read<'a>(&'a self, name: &str) -> Option<Cow<'a, [u8]>> { 127 - let (entry, _) = self.entries.iter().find(|(_, n)| n == name)?; 128 - let mmap = self.mmap.as_ref()?; 129 - 130 - let offset = u64::from_le_bytes(entry.offset) as usize; 131 - let c_size = u64::from_le_bytes(entry.compressed_size) as usize; 132 - let u_size = u64::from_le_bytes(entry.uncompressed_size) as usize; 133 - 134 - let data = mmap.get(offset..offset + c_size)?; 135 - 136 - if entry.compression_type == 1 { 137 - let mut out = Vec::with_capacity(u_size); 138 - zstd::Decoder::new(data).ok()?.read_to_end(&mut out).ok()?; 139 - Some(Cow::Owned(out)) 140 - } else { 141 - Some(Cow::Borrowed(data)) 142 - } 143 - } 144 - 145 - /// Streams data directly to a writer (e.g., File, TcpStream) to keep memory usage low. 146 - pub fn read_to_writer<W: Write>(&self, name: &str, mut writer: W) -> io::Result<u64> { 147 - let (entry, _) = self 148 - .entries 149 - .iter() 150 - .find(|(_, n)| n == name) 151 - .ok_or_else(|| io::Error::new(io::ErrorKind::NotFound, "Entry not found"))?; 152 - 153 - let mmap = self 154 - .mmap 155 - .as_ref() 156 - .ok_or_else(|| io::Error::new(io::ErrorKind::Other, "Archive not mapped"))?; 157 - 158 - let offset = u64::from_le_bytes(entry.offset) as usize; 159 - let c_size = u64::from_le_bytes(entry.compressed_size) as usize; 160 - let data = mmap.get(offset..offset + c_size).ok_or_else(|| { 161 - io::Error::new(io::ErrorKind::InvalidData, "Data range out of bounds") 162 - })?; 163 - 164 - if entry.compression_type == 1 { 165 - let mut decoder = zstd::Decoder::new(data)?; 166 - io::copy(&mut decoder, &mut writer) 167 - } else { 168 - writer.write_all(data)?; 169 - Ok(data.len() as u64) 170 - } 171 - } 172 - 173 120 pub fn add(&mut self, name: &str, data: &[u8], compress: bool) -> io::Result<()> { 174 - // Prevent Duplicate Keys 175 - if self 176 - .entries 177 - .iter() 178 - .any(|(_, existing_name)| existing_name == name) 179 - { 180 - return Err(io::Error::new( 181 - io::ErrorKind::AlreadyExists, 182 - format!("Entry '{}' already exists in bindle", name), 183 - )); 184 - } 185 - 186 - // Position the file pointer at the end of valid data 187 - // If data_end is 0, we start after the 8-byte Magic Header 188 - let write_pos = if self.data_end >= HEADER_SIZE { 189 - self.data_end 121 + let (processed, c_type) = if compress { 122 + (zstd::encode_all(data, 3)?, 1) 190 123 } else { 191 - HEADER_SIZE 124 + (data.to_vec(), 0) 192 125 }; 193 126 194 - self.file.seek(SeekFrom::Start(write_pos))?; 127 + self.file.seek(SeekFrom::Start(self.data_end))?; 128 + self.file.write_all(&processed)?; 195 129 196 - // Prepare and write data 197 - let write_data = if compress { 198 - zstd::encode_all(data, 3)? 199 - } else { 200 - data.to_vec() 201 - }; 202 - 203 - let start_offset = self.file.stream_position()?; 204 - self.file.write_all(&write_data)?; 205 - 206 - // Align to 8 bytes for the next entry or index 207 - let current_pos = self.file.stream_position()?; 208 - let pad = (BNDL_ALIGN as u64 - (current_pos % BNDL_ALIGN as u64)) % BNDL_ALIGN as u64; 130 + let offset = self.data_end; 131 + let c_size = processed.len() as u64; 132 + let pad = (8 - (c_size % 8)) % 8; 209 133 if pad > 0 { 210 134 self.file.write_all(&vec![0u8; pad as usize])?; 211 135 } 212 136 213 - // 5. Update state 214 - self.data_end = self.file.stream_position()?; 215 - let entry = Entry { 216 - offset: start_offset.to_le_bytes(), 217 - compressed_size: (write_data.len() as u64).to_le_bytes(), 137 + self.data_end = offset + c_size + pad; 138 + 139 + let entry = BindleEntry { 140 + offset: offset.to_le_bytes(), 141 + compressed_size: c_size.to_le_bytes(), 218 142 uncompressed_size: (data.len() as u64).to_le_bytes(), 219 - crc32: crc32fast::hash(&write_data).to_le_bytes(), 143 + compression_type: c_type, 220 144 name_len: (name.len() as u16).to_le_bytes(), 221 - compression_type: if compress { 1 } else { 0 }, 222 - _reserved: 0, 145 + ..Default::default() 223 146 }; 224 147 225 - self.entries.push((entry, name.to_string())); 148 + self.index.insert(name.to_string(), entry); 226 149 Ok(()) 227 150 } 228 151 229 152 pub fn save(&mut self) -> io::Result<()> { 230 153 self.file.lock_exclusive()?; 231 - 232 154 self.file.seek(SeekFrom::Start(self.data_end))?; 233 155 let index_start = self.data_end; 234 156 235 - for (entry, name) in &self.entries { 157 + for (name, entry) in &self.index { 236 158 self.file.write_all(entry.as_bytes())?; 237 159 self.file.write_all(name.as_bytes())?; 238 - let current_disk_size = ENTRY_SIZE + name.len(); 239 - let pad = (BNDL_ALIGN - (current_disk_size % BNDL_ALIGN)) % BNDL_ALIGN; 160 + let pad = (BNDL_ALIGN - ((ENTRY_SIZE + name.len()) % BNDL_ALIGN)) % BNDL_ALIGN; 240 161 if pad > 0 { 241 162 self.file.write_all(&vec![0u8; pad])?; 242 163 } 243 164 } 244 165 245 - let footer = Footer { 246 - index_offset: index_start.to_le_bytes(), 247 - entry_count: (self.entries.len() as u64).to_le_bytes(), 166 + let footer = BindleFooter { 167 + index_offset: index_start, 168 + entry_count: self.index.len() as u64, 248 169 }; 249 - 250 170 self.file.write_all(footer.as_bytes())?; 251 171 self.file.flush()?; 252 172 self.mmap = Some(unsafe { Mmap::map(&self.file)? }); 253 173 self.file.lock_shared()?; 254 - 255 174 Ok(()) 256 175 } 257 176 258 - /// Returns a list of all entry names in the archive. 259 - pub fn list(&self) -> Vec<&str> { 260 - self.entries.iter().map(|(_, name)| name.as_str()).collect() 177 + pub fn vacuum(&mut self) -> io::Result<()> { 178 + let tmp_path = self.path.with_extension("tmp"); 179 + let mut new_file = OpenOptions::new() 180 + .write(true) 181 + .create(true) 182 + .truncate(true) 183 + .open(&tmp_path)?; 184 + new_file.write_all(BNDL_MAGIC)?; 185 + let mut current_offset = HEADER_SIZE; 186 + 187 + for entry in self.index.values_mut() { 188 + let mut buf = vec![0u8; entry.compressed_size() as usize]; 189 + self.file.seek(SeekFrom::Start(entry.offset()))?; 190 + self.file.read_exact(&mut buf)?; 191 + 192 + new_file.seek(SeekFrom::Start(current_offset))?; 193 + new_file.write_all(&buf)?; 194 + 195 + entry.offset = current_offset.to_le_bytes(); 196 + let pad = (8 - (entry.compressed_size() % 8)) % 8; 197 + if pad > 0 { 198 + new_file.write_all(&vec![0u8; pad as usize])?; 199 + } 200 + current_offset += entry.compressed_size() + pad; 201 + } 202 + 203 + self.data_end = current_offset; 204 + self.file = new_file; 205 + self.save()?; 206 + std::fs::rename(tmp_path, &self.path)?; 207 + Ok(()) 261 208 } 262 209 263 - pub fn entries(&self) -> &[(Entry, String)] { 264 - &self.entries 210 + pub fn read<'a>(&'a self, name: &str) -> Option<Cow<'a, [u8]>> { 211 + let entry = self.index.get(name)?; 212 + let mmap = self.mmap.as_ref()?; 213 + let data = 214 + mmap.get(entry.offset() as usize..(entry.offset() + entry.compressed_size()) as usize)?; 215 + 216 + if entry.compression_type == 1 { 217 + let mut out = Vec::with_capacity(entry.uncompressed_size() as usize); 218 + zstd::Decoder::new(data).ok()?.read_to_end(&mut out).ok()?; 219 + Some(Cow::Owned(out)) 220 + } else { 221 + Some(Cow::Borrowed(data)) 222 + } 265 223 } 266 224 267 - /// Returns the number of entries. 268 225 pub fn len(&self) -> usize { 269 - self.entries.len() 226 + self.index.len() 270 227 } 271 228 272 229 pub fn is_empty(&self) -> bool { 273 - self.entries.is_empty() 230 + self.index.is_empty() 231 + } 232 + 233 + pub fn index(&self) -> &BTreeMap<String, BindleEntry> { 234 + &self.index 274 235 } 275 236 } 276 237