an efficient binary archive format
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

add verification step to tests

zach 7e9d2089 ac65cd3e

+42 -1
+1 -1
README.md
··· 1 1 # bindle-file 2 2 3 - [bindle](https://en.wikipedia.org/wiki/Bindle) is an efficient, general purpose binary archive format for collecting files. 3 + [bindle](https://en.wikipedia.org/wiki/Bindle) is an fast/efficient, binary archive format. 4 4 5 5 The format uses memory-mapped I/O for fast reads, optional zstd compression, and supports append-only writes with shadowing for updates. Files can be added incrementally without rewriting the entire archive. 6 6
+41
bench/bench.py
··· 37 37 return f"{seconds:.3f} s" 38 38 39 39 40 + def verify_extraction(src_dir: Path, extract_dir: Path) -> None: 41 + """Verify extracted files match source files.""" 42 + src_files = {f.relative_to(src_dir): f for f in src_dir.rglob("*") if f.is_file()} 43 + extract_files = {f.relative_to(extract_dir): f for f in extract_dir.rglob("*") if f.is_file()} 44 + 45 + # Check file count 46 + if len(src_files) != len(extract_files): 47 + raise ValueError(f"File count mismatch: {len(src_files)} source, {len(extract_files)} extracted") 48 + 49 + # Check each file exists and has correct size 50 + for rel_path, src_file in src_files.items(): 51 + if rel_path not in extract_files: 52 + raise ValueError(f"Missing file in extraction: {rel_path}") 53 + 54 + extract_file = extract_files[rel_path] 55 + src_size = src_file.stat().st_size 56 + extract_size = extract_file.stat().st_size 57 + 58 + if src_size != extract_size: 59 + raise ValueError(f"Size mismatch for {rel_path}: {src_size} vs {extract_size}") 60 + 61 + # Verify content matches 62 + if src_file.read_bytes() != extract_file.read_bytes(): 63 + raise ValueError(f"Content mismatch for {rel_path}") 64 + 65 + 40 66 def create_test_data(base_dir: Path) -> None: 41 67 """Create a variety of test files.""" 42 68 base_dir.mkdir(parents=True, exist_ok=True) ··· 90 116 ) 91 117 unpack_time = time.perf_counter() - start 92 118 119 + # Verify extraction (not timed) 120 + verify_extraction(src_dir, extract_dir) 121 + 93 122 return pack_time, size, unpack_time 94 123 95 124 ··· 119 148 ) 120 149 unpack_time = time.perf_counter() - start 121 150 151 + # Verify extraction (not timed) 152 + verify_extraction(src_dir, extract_dir) 153 + 122 154 return pack_time, size, unpack_time 123 155 124 156 ··· 146 178 ) 147 179 unpack_time = time.perf_counter() - start 148 180 181 + # Verify extraction (not timed) 182 + verify_extraction(src_dir, extract_dir) 183 + 149 184 return pack_time, size, unpack_time 150 185 151 186 ··· 173 208 ) 174 209 unpack_time = time.perf_counter() - start 175 210 211 + # Verify extraction (not timed) 212 + verify_extraction(src_dir, extract_dir) 213 + 176 214 return pack_time, size, unpack_time 177 215 178 216 ··· 199 237 check=True, 200 238 ) 201 239 unpack_time = time.perf_counter() - start 240 + 241 + # Verify extraction (not timed) 242 + verify_extraction(src_dir, extract_dir) 202 243 203 244 return pack_time, size, unpack_time 204 245