an efficient binary archive format
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

crc32

zach d30ce2cd 4afbaa6b

+157 -10
+7
include/bindle.h
··· 114 114 115 115 ptrdiff_t bindle_reader_read(struct BindleReader *reader, uint8_t *buffer, size_t buffer_len); 116 116 117 + /** 118 + * Verify the CRC32 of data read from the reader. 119 + * Should be called after reading all data to ensure integrity. 120 + * Returns true if CRC32 matches, false otherwise. 121 + */ 122 + bool bindle_reader_verify_crc32(const struct BindleReader *reader); 123 + 117 124 void bindle_reader_close(struct BindleReader *reader); 118 125 119 126 #endif /* BINDLE_H */
+13
src/ffi.rs
··· 380 380 } 381 381 } 382 382 383 + /// Verify the CRC32 of data read from the reader. 384 + /// Should be called after reading all data to ensure integrity. 385 + /// Returns true if CRC32 matches, false otherwise. 386 + #[unsafe(no_mangle)] 387 + pub unsafe extern "C" fn bindle_reader_verify_crc32(reader: *const Reader) -> bool { 388 + if reader.is_null() { 389 + return false; 390 + } 391 + 392 + let r = unsafe { &*reader }; 393 + r.verify_crc32().is_ok() 394 + } 395 + 383 396 #[unsafe(no_mangle)] 384 397 pub unsafe extern "C" fn bindle_reader_close(reader: *mut Reader) { 385 398 if !reader.is_null() {
+137 -10
src/lib.rs
··· 1 + use crc32fast::Hasher; 1 2 use fs2::FileExt; 2 3 use memmap2::Mmap; 3 4 use std::borrow::Cow; ··· 79 80 _ => Compress::default(), 80 81 } 81 82 } 83 + 84 + pub fn crc32(&self) -> u32 { 85 + u32::from_le_bytes(self.crc32) 86 + } 82 87 } 83 88 84 89 #[repr(C, packed)] ··· 104 109 105 110 pub struct Reader<'a> { 106 111 decoder: Either<zstd::Decoder<'static, BufReader<io::Cursor<&'a [u8]>>>, io::Cursor<&'a [u8]>>, 112 + crc32_hasher: Hasher, 113 + expected_crc32: u32, 107 114 } 108 115 109 116 impl<'a> Read for Reader<'a> { 110 117 fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> { 111 - match &mut self.decoder { 112 - Either::Left(x) => x.read(buf), 113 - Either::Right(x) => x.read(buf), 118 + let n = match &mut self.decoder { 119 + Either::Left(x) => x.read(buf)?, 120 + Either::Right(x) => x.read(buf)?, 121 + }; 122 + 123 + if n > 0 { 124 + self.crc32_hasher.update(&buf[..n]); 114 125 } 126 + 127 + Ok(n) 115 128 } 116 129 } 117 130 ··· 129 142 } 130 143 } 131 144 145 + impl<'a> Reader<'a> { 146 + /// Verify the CRC32 of the data read so far. 147 + /// This should be called after all data has been read to ensure data integrity. 148 + pub fn verify_crc32(&self) -> io::Result<()> { 149 + let computed_crc = self.crc32_hasher.clone().finalize(); 150 + if computed_crc != self.expected_crc32 { 151 + return Err(io::Error::new( 152 + io::ErrorKind::InvalidData, 153 + format!("CRC32 mismatch: expected {:x}, got {:x}", self.expected_crc32, computed_crc), 154 + )); 155 + } 156 + Ok(()) 157 + } 158 + } 159 + 132 160 pub struct Writer<'a> { 133 161 pub(crate) bindle: &'a mut Bindle, 134 162 pub(crate) encoder: Option<zstd::Encoder<'a, std::fs::File>>, 135 163 pub(crate) name: String, 136 164 pub(crate) start_offset: u64, 137 165 pub(crate) uncompressed_size: u64, 166 + pub(crate) crc32_hasher: Hasher, 138 167 } 139 168 140 169 impl<'a> Drop for Writer<'a> { ··· 161 190 } 162 191 163 192 self.uncompressed_size += data.len() as u64; 193 + self.crc32_hasher.update(data); 164 194 165 195 if let Some(encoder) = &mut self.encoder { 166 196 encoder.write_all(data)?; ··· 197 227 198 228 self.bindle.data_end = current_pos + pad_len; 199 229 230 + let crc32 = self.crc32_hasher.clone().finalize(); 231 + 200 232 let entry = Entry { 201 233 offset: self.start_offset.to_le_bytes(), 202 234 compressed_size: compressed_size.to_le_bytes(), 203 235 uncompressed_size: self.uncompressed_size.to_le_bytes(), 236 + crc32: crc32.to_le_bytes(), 204 237 compression_type, 205 238 name_len: (self.name.len() as u16).to_le_bytes(), 206 239 ..Default::default() ··· 460 493 let entry = self.index.get(name)?; 461 494 let mmap = self.mmap.as_ref()?; 462 495 463 - if entry.compression_type == Compress::Zstd as u8 { 464 - let data = mmap.get( 496 + let data = if entry.compression_type == Compress::Zstd as u8 { 497 + let compressed_data = mmap.get( 465 498 entry.offset() as usize..(entry.offset() + entry.compressed_size()) as usize, 466 499 )?; 467 500 let mut out = Vec::with_capacity(entry.uncompressed_size() as usize); 468 - zstd::Decoder::new(data).ok()?.read_to_end(&mut out).ok()?; 469 - Some(Cow::Owned(out)) 501 + zstd::Decoder::new(compressed_data).ok()?.read_to_end(&mut out).ok()?; 502 + Cow::Owned(out) 470 503 } else { 471 - let data = mmap.get( 504 + let uncompressed_data = mmap.get( 472 505 entry.offset() as usize..(entry.offset() + entry.uncompressed_size()) as usize, 473 506 )?; 474 - Some(Cow::Borrowed(data)) 507 + Cow::Borrowed(uncompressed_data) 508 + }; 509 + 510 + // Verify CRC32 511 + let computed_crc = crc32fast::hash(&data); 512 + if computed_crc != entry.crc32() { 513 + return None; 475 514 } 515 + 516 + Some(data) 476 517 } 477 518 478 519 /// Read to an `std::io::Write` 479 520 pub fn read_to<W: std::io::Write>(&self, name: &str, mut w: W) -> std::io::Result<u64> { 480 - std::io::copy(&mut self.reader(name)?, &mut w) 521 + let mut reader = self.reader(name)?; 522 + let bytes_copied = std::io::copy(&mut reader, &mut w)?; 523 + reader.verify_crc32()?; 524 + Ok(bytes_copied) 481 525 } 482 526 483 527 // Returns a seekable reader for an entry. ··· 503 547 let decoder = zstd::Decoder::new(cursor)?; 504 548 Ok(Reader { 505 549 decoder: Either::Left(decoder), 550 + crc32_hasher: Hasher::new(), 551 + expected_crc32: entry.crc32(), 506 552 }) 507 553 } else { 508 554 Ok(Reader { 509 555 decoder: Either::Right(cursor), 556 + crc32_hasher: Hasher::new(), 557 + expected_crc32: entry.crc32(), 510 558 }) 511 559 } 512 560 } ··· 596 644 }, 597 645 start_offset, 598 646 uncompressed_size: 0, 647 + crc32_hasher: Hasher::new(), 599 648 }) 600 649 } 601 650 } ··· 839 888 let result = b.read("streamed_file.txt").expect("Entry not found"); 840 889 assert_eq!(result.as_ref(), expected); 841 890 assert_eq!(result.len(), expected.len()); 891 + 892 + let _ = std::fs::remove_file(path); 893 + } 894 + 895 + #[test] 896 + fn test_crc32_corruption_detection() { 897 + let path = "test_crc32.bindl"; 898 + let _ = std::fs::remove_file(path); 899 + let data = b"Test data for CRC32 verification"; 900 + 901 + // 1. Create a file with valid data 902 + { 903 + let mut b = Bindle::open(path).expect("Failed to open"); 904 + b.add("test.txt", data, Compress::None).unwrap(); 905 + b.save().unwrap(); 906 + } 907 + 908 + // 2. Verify that reading with correct data works 909 + { 910 + let b = Bindle::open(path).expect("Failed to reopen"); 911 + let result = b.read("test.txt").expect("Should read successfully"); 912 + assert_eq!(result.as_ref(), data); 913 + } 914 + 915 + // 3. Corrupt the data by modifying a byte directly in the file 916 + { 917 + use std::io::{Seek, SeekFrom, Write}; 918 + let mut file = OpenOptions::new() 919 + .write(true) 920 + .read(true) 921 + .open(path) 922 + .unwrap(); 923 + 924 + // Skip the header and modify the first byte of data 925 + file.seek(SeekFrom::Start(HEADER_SIZE as u64)).unwrap(); 926 + file.write_all(b"X").unwrap(); // Corrupt first byte 927 + file.flush().unwrap(); 928 + } 929 + 930 + // 4. Verify that reading corrupted data fails CRC32 check 931 + { 932 + let b = Bindle::open(path).expect("Failed to reopen after corruption"); 933 + let result = b.read("test.txt"); 934 + assert!(result.is_none(), "Read should fail due to CRC32 mismatch"); 935 + } 936 + 937 + let _ = std::fs::remove_file(path); 938 + } 939 + 940 + #[test] 941 + fn test_crc32_with_compression() { 942 + let path = "test_crc32_compressed.bindl"; 943 + let _ = std::fs::remove_file(path); 944 + let data = vec![b'A'; 2000]; // Large enough to trigger compression 945 + 946 + // 1. Create a file with compressed data 947 + { 948 + let mut b = Bindle::open(path).expect("Failed to open"); 949 + b.add("compressed.bin", &data, Compress::Zstd).unwrap(); 950 + b.save().unwrap(); 951 + } 952 + 953 + // 2. Verify that reading compressed data works and CRC32 is verified 954 + { 955 + let b = Bindle::open(path).expect("Failed to reopen"); 956 + let result = b.read("compressed.bin").expect("Should read successfully"); 957 + assert_eq!(result.as_ref(), data.as_slice()); 958 + } 959 + 960 + // 3. Also test with the streaming reader 961 + { 962 + let b = Bindle::open(path).expect("Failed to reopen"); 963 + let mut reader = b.reader("compressed.bin").unwrap(); 964 + let mut output = Vec::new(); 965 + std::io::copy(&mut reader, &mut output).unwrap(); 966 + reader.verify_crc32().expect("CRC32 should match"); 967 + assert_eq!(output, data); 968 + } 842 969 843 970 let _ = std::fs::remove_file(path); 844 971 }