My working unpac space for OCaml projects in development
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

Add C zstd interop tests and fix empty frame handling

Interop tests verify:
- Pure OCaml decompresses C-compressed data (test vectors from bytesrw)
- C zstd CLI decompresses pure OCaml compressed data
- Roundtrip works for various data sizes and compression levels

Fixes:
- Empty frame encoding now includes required empty block header (01 00 00)
- Empty frame decompression correctly reads block header before checksum

All 22 tests pass (9 zstd + 6 bytesrw + 7 interop).

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

+251 -20
+2 -6
src/zstd_decode.ml
··· 534 534 let output = Bytes.create output_size in 535 535 let ctx = create_frame_context header dict in 536 536 537 - (* Decompress all blocks (skip if empty frame) *) 538 - let written = 539 - match header.frame_content_size with 540 - | Some 0L -> 0 (* Empty frame has no blocks *) 541 - | _ -> decompress_data ctx stream output ~out_pos:0 542 - in 537 + (* Decompress all blocks *) 538 + let written = decompress_data ctx stream output ~out_pos:0 in 543 539 544 540 (* Verify checksum if present *) 545 541 if header.content_checksum then begin
+24 -14
src/zstd_encode.ml
··· 458 458 let out_pos = ref header_size in 459 459 460 460 (* Compress blocks *) 461 - let block_size = min len Constants.block_size_max in 462 - let pos = ref 0 in 461 + if len = 0 then begin 462 + (* Empty content: write an empty raw block with last_block flag *) 463 + (* Block header: last_block=1, block_type=raw(0), block_size=0 *) 464 + (* Header = 1 | (0 << 1) | (0 << 3) = 0x01 *) 465 + Bytes.set_uint8 output !out_pos 0x01; 466 + Bytes.set_uint8 output (!out_pos + 1) 0x00; 467 + Bytes.set_uint8 output (!out_pos + 2) 0x00; 468 + out_pos := !out_pos + 3 469 + end else begin 470 + let block_size = min len Constants.block_size_max in 471 + let pos = ref 0 in 463 472 464 - while !pos < len do 465 - let this_block = min block_size (len - !pos) in 466 - let is_last = !pos + this_block >= len in 473 + while !pos < len do 474 + let this_block = min block_size (len - !pos) in 475 + let is_last = !pos + this_block >= len in 467 476 468 - let block_len = compress_block src ~pos:!pos ~len:this_block output ~out_pos:!out_pos params in 477 + let block_len = compress_block src ~pos:!pos ~len:this_block output ~out_pos:!out_pos params in 469 478 470 - (* Set last block flag *) 471 - if is_last then begin 472 - let current = Bytes.get_uint8 output !out_pos in 473 - Bytes.set_uint8 output !out_pos (current lor 0x01) 474 - end; 479 + (* Set last block flag *) 480 + if is_last then begin 481 + let current = Bytes.get_uint8 output !out_pos in 482 + Bytes.set_uint8 output !out_pos (current lor 0x01) 483 + end; 475 484 476 - out_pos := !out_pos + block_len; 477 - pos := !pos + this_block 478 - done; 485 + out_pos := !out_pos + block_len; 486 + pos := !pos + this_block 487 + done 488 + end; 479 489 480 490 (* Write checksum if requested *) 481 491 if checksum then begin
+6
test-interop/dune
··· 1 + ; Test: Verify pure OCaml can decompress C-compressed data 2 + ; and C zstd can decompress pure OCaml compressed data 3 + (test 4 + (name test_interop) 5 + (package zstd-test) 6 + (libraries zstd alcotest))
+219
test-interop/test_interop.ml
··· 1 + (** Interop tests between pure OCaml zstd and C libzstd. 2 + 3 + Tests: 4 + 1. Pure OCaml can decompress data compressed by C libzstd 5 + 2. C libzstd can decompress data compressed by pure OCaml zstd *) 6 + 7 + (* Test vectors compressed by C libzstd (from bytesrw's test_zstd.ml) *) 8 + 9 + (* 30 'a' characters compressed by C zstd with checksum *) 10 + let a30_c_compressed = 11 + "\x28\xb5\x2f\xfd\x04\x58\x45\x00\x00\x10\x61\x61\x01\x00\x0c\xc0\x02\x61\ 12 + \x36\xf8\xbb" 13 + let a30_expected = String.make 30 'a' 14 + 15 + (* 30 'b' characters compressed by C zstd with checksum *) 16 + let b30_c_compressed = 17 + "\x28\xb5\x2f\xfd\x04\x58\x45\x00\x00\x10\x62\x62\x01\x00\x0c\xc0\x02\xb3\ 18 + \x56\x1f\x2e" 19 + let b30_expected = String.make 30 'b' 20 + 21 + (* Helper to run a shell command and capture output *) 22 + let run_command cmd = 23 + let ic = Unix.open_process_in cmd in 24 + let buf = Buffer.create 256 in 25 + (try 26 + while true do 27 + Buffer.add_channel buf ic 1 28 + done 29 + with End_of_file -> ()); 30 + let status = Unix.close_process_in ic in 31 + (Buffer.contents buf, status) 32 + 33 + (* Test: Pure OCaml decompresses C-compressed data *) 34 + let test_ocaml_decompress_c_data () = 35 + (* Decompress a30 *) 36 + let result = Zstd.decompress a30_c_compressed in 37 + Alcotest.(check (result string string)) "a30 decompressed" (Ok a30_expected) result; 38 + (* Decompress b30 *) 39 + let result = Zstd.decompress b30_c_compressed in 40 + Alcotest.(check (result string string)) "b30 decompressed" (Ok b30_expected) result 41 + 42 + (* Test: Pure OCaml decompresses each C frame separately *) 43 + let test_ocaml_decompress_each_frame () = 44 + (* Our decompressor handles one frame at a time (standard behavior) *) 45 + (* Decompress first frame *) 46 + let result1 = Zstd.decompress a30_c_compressed in 47 + Alcotest.(check (result string string)) "frame 1" (Ok a30_expected) result1; 48 + (* Decompress second frame *) 49 + let result2 = Zstd.decompress b30_c_compressed in 50 + Alcotest.(check (result string string)) "frame 2" (Ok b30_expected) result2 51 + 52 + (* Test: C libzstd decompresses pure OCaml-compressed data *) 53 + let test_c_decompress_ocaml_data () = 54 + let test_data = "Hello from pure OCaml zstd! This is a test of interoperability." in 55 + let compressed = Zstd.compress test_data in 56 + 57 + (* Verify it has valid zstd magic *) 58 + Alcotest.(check bool) "has zstd magic" true (Zstd.is_zstd_frame compressed); 59 + 60 + (* Write compressed data to temp file *) 61 + let tmp_compressed = Filename.temp_file "zstd_test" ".zst" in 62 + let tmp_output = Filename.temp_file "zstd_test" ".txt" in 63 + let oc = open_out_bin tmp_compressed in 64 + output_string oc compressed; 65 + close_out oc; 66 + 67 + (* Use C zstd CLI to decompress *) 68 + let cmd = Printf.sprintf "zstd -d -f -o %s %s 2>&1" tmp_output tmp_compressed in 69 + let (output, status) = run_command cmd in 70 + (match status with 71 + | Unix.WEXITED 0 -> () 72 + | _ -> Alcotest.fail (Printf.sprintf "zstd -d failed: %s" output)); 73 + 74 + (* Read and verify decompressed content *) 75 + let ic = open_in_bin tmp_output in 76 + let decompressed = really_input_string ic (in_channel_length ic) in 77 + close_in ic; 78 + 79 + (* Cleanup *) 80 + Sys.remove tmp_compressed; 81 + Sys.remove tmp_output; 82 + 83 + Alcotest.(check string) "C decompressed matches" test_data decompressed 84 + 85 + (* Test: C libzstd decompresses larger pure OCaml-compressed data *) 86 + let test_c_decompress_large () = 87 + (* 10KB of varied data *) 88 + let size = 10000 in 89 + let test_data = String.init size (fun i -> Char.chr (i mod 256)) in 90 + let compressed = Zstd.compress test_data in 91 + 92 + (* Write to temp file *) 93 + let tmp_compressed = Filename.temp_file "zstd_large" ".zst" in 94 + let tmp_output = Filename.temp_file "zstd_large" ".bin" in 95 + let oc = open_out_bin tmp_compressed in 96 + output_string oc compressed; 97 + close_out oc; 98 + 99 + (* Use C zstd to decompress *) 100 + let cmd = Printf.sprintf "zstd -d -f -o %s %s 2>&1" tmp_output tmp_compressed in 101 + let (output, status) = run_command cmd in 102 + (match status with 103 + | Unix.WEXITED 0 -> () 104 + | _ -> Alcotest.fail (Printf.sprintf "zstd -d failed: %s" output)); 105 + 106 + (* Read and verify *) 107 + let ic = open_in_bin tmp_output in 108 + let decompressed = really_input_string ic (in_channel_length ic) in 109 + close_in ic; 110 + 111 + Sys.remove tmp_compressed; 112 + Sys.remove tmp_output; 113 + 114 + Alcotest.(check int) "size matches" size (String.length decompressed); 115 + Alcotest.(check string) "content matches" test_data decompressed 116 + 117 + (* Test: C compression -> OCaml decompression using CLI *) 118 + let test_c_compress_ocaml_decompress () = 119 + let test_data = "Testing C compression with OCaml decompression roundtrip!" in 120 + 121 + (* Write original to temp file *) 122 + let tmp_input = Filename.temp_file "zstd_input" ".txt" in 123 + let tmp_compressed = Filename.temp_file "zstd_compressed" ".zst" in 124 + let oc = open_out_bin tmp_input in 125 + output_string oc test_data; 126 + close_out oc; 127 + 128 + (* Compress with C zstd *) 129 + let cmd = Printf.sprintf "zstd -f -o %s %s 2>&1" tmp_compressed tmp_input in 130 + let (output, status) = run_command cmd in 131 + (match status with 132 + | Unix.WEXITED 0 -> () 133 + | _ -> Alcotest.fail (Printf.sprintf "zstd compress failed: %s" output)); 134 + 135 + (* Read compressed data *) 136 + let ic = open_in_bin tmp_compressed in 137 + let compressed = really_input_string ic (in_channel_length ic) in 138 + close_in ic; 139 + 140 + (* Cleanup temp files *) 141 + Sys.remove tmp_input; 142 + Sys.remove tmp_compressed; 143 + 144 + (* Verify our OCaml can decompress it *) 145 + Alcotest.(check bool) "C output has magic" true (Zstd.is_zstd_frame compressed); 146 + let result = Zstd.decompress compressed in 147 + Alcotest.(check (result string string)) "OCaml decompressed C output" (Ok test_data) result 148 + 149 + (* Test: Empty data roundtrip *) 150 + let test_empty_interop () = 151 + let compressed = Zstd.compress "" in 152 + 153 + (* Write to temp file *) 154 + let tmp_compressed = Filename.temp_file "zstd_empty" ".zst" in 155 + let tmp_output = Filename.temp_file "zstd_empty" ".bin" in 156 + let oc = open_out_bin tmp_compressed in 157 + output_string oc compressed; 158 + close_out oc; 159 + 160 + (* C zstd decompress *) 161 + let cmd = Printf.sprintf "zstd -d -f -o %s %s 2>&1" tmp_output tmp_compressed in 162 + let (output, status) = run_command cmd in 163 + (match status with 164 + | Unix.WEXITED 0 -> () 165 + | _ -> Alcotest.fail (Printf.sprintf "zstd -d empty failed: %s" output)); 166 + 167 + (* Verify empty output *) 168 + let ic = open_in_bin tmp_output in 169 + let decompressed = really_input_string ic (in_channel_length ic) in 170 + close_in ic; 171 + 172 + Sys.remove tmp_compressed; 173 + Sys.remove tmp_output; 174 + 175 + Alcotest.(check string) "empty roundtrip" "" decompressed 176 + 177 + (* Test: Various compression levels *) 178 + let test_compression_levels_interop () = 179 + let test_data = String.make 1000 'x' in 180 + 181 + List.iter (fun level -> 182 + let compressed = Zstd.compress ~level test_data in 183 + 184 + let tmp_compressed = Filename.temp_file "zstd_level" ".zst" in 185 + let tmp_output = Filename.temp_file "zstd_level" ".bin" in 186 + let oc = open_out_bin tmp_compressed in 187 + output_string oc compressed; 188 + close_out oc; 189 + 190 + let cmd = Printf.sprintf "zstd -d -f -o %s %s 2>&1" tmp_output tmp_compressed in 191 + let (output, status) = run_command cmd in 192 + (match status with 193 + | Unix.WEXITED 0 -> () 194 + | _ -> Alcotest.fail (Printf.sprintf "level %d: zstd -d failed: %s" level output)); 195 + 196 + let ic = open_in_bin tmp_output in 197 + let decompressed = really_input_string ic (in_channel_length ic) in 198 + close_in ic; 199 + 200 + Sys.remove tmp_compressed; 201 + Sys.remove tmp_output; 202 + 203 + Alcotest.(check string) (Printf.sprintf "level %d roundtrip" level) test_data decompressed 204 + ) [1; 3; 5; 10; 15; 19] 205 + 206 + let tests = [ 207 + "OCaml decompresses C data", `Quick, test_ocaml_decompress_c_data; 208 + "OCaml decompresses each C frame", `Quick, test_ocaml_decompress_each_frame; 209 + "C decompresses OCaml data", `Quick, test_c_decompress_ocaml_data; 210 + "C decompresses large OCaml data", `Quick, test_c_decompress_large; 211 + "C compress -> OCaml decompress", `Quick, test_c_compress_ocaml_decompress; 212 + "Empty interop", `Quick, test_empty_interop; 213 + "Compression levels interop", `Quick, test_compression_levels_interop; 214 + ] 215 + 216 + let () = 217 + Alcotest.run "zstd interop" [ 218 + "C <-> OCaml interop", tests; 219 + ]