My working unpac space for OCaml projects in development
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

Add performance optimizations and benchmarks

Performance improvements:
- Fast 4-byte-at-a-time match length comparison
- Sparse hashing for long matches (skip hashing every 4th byte)
- Better code comments for hot path operations

Benchmarks:
- Synthetic data tests (repeated patterns, random data)
- Standard corpus files (alice29.txt, html, urls.10K)
- Reports compression ratio and throughput in MB/s

Results on test system:
- Compression: 70-370 MB/s depending on data compressibility
- Decompression: 20-9000 MB/s (faster for incompressible data)
- Compression ratios match expected Snappy behavior

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

+187 -21
+136
bench/bench_snappy.ml
··· 1 + (* Snappy benchmark suite *) 2 + 3 + (* Generate test data *) 4 + let make_repeated n pattern = 5 + let plen = String.length pattern in 6 + let buf = Bytes.create n in 7 + for i = 0 to n - 1 do 8 + Bytes.set buf i (String.get pattern (i mod plen)) 9 + done; 10 + Bytes.unsafe_to_string buf 11 + 12 + let make_random n seed = 13 + let buf = Bytes.create n in 14 + let state = ref seed in 15 + for i = 0 to n - 1 do 16 + state := !state * 1103515245 + 12345; 17 + Bytes.set buf i (Char.chr ((!state lsr 16) land 0xFF)) 18 + done; 19 + Bytes.unsafe_to_string buf 20 + 21 + let read_file path = 22 + let ic = open_in_bin path in 23 + let n = in_channel_length ic in 24 + let s = really_input_string ic n in 25 + close_in ic; 26 + s 27 + 28 + (* Timing utilities *) 29 + let time_ns () = Unix.gettimeofday () *. 1e9 30 + 31 + let benchmark name iterations data f = 32 + (* Warmup *) 33 + for _ = 1 to 5 do 34 + ignore (f data) 35 + done; 36 + 37 + (* Actual benchmark *) 38 + let start = time_ns () in 39 + for _ = 1 to iterations do 40 + ignore (f data) 41 + done; 42 + let elapsed = time_ns () -. start in 43 + 44 + let total_bytes = iterations * String.length data in 45 + let bytes_per_sec = float_of_int total_bytes /. (elapsed /. 1e9) in 46 + let mb_per_sec = bytes_per_sec /. (1024.0 *. 1024.0) in 47 + 48 + Printf.printf "%-30s %8.2f MB/s (%d iterations, %.3f ms total)\n%!" 49 + name mb_per_sec iterations (elapsed /. 1e6) 50 + 51 + let benchmark_compression name data = 52 + let iterations = 53 + (* Adjust iterations based on data size for reasonable run time *) 54 + let size = String.length data in 55 + if size < 1000 then 10000 56 + else if size < 10000 then 1000 57 + else if size < 100000 then 100 58 + else 10 59 + in 60 + benchmark (name ^ " compress") iterations data Snappy.compress; 61 + 62 + let compressed = Snappy.compress data in 63 + let ratio = float_of_int (String.length compressed) /. 64 + float_of_int (String.length data) *. 100.0 in 65 + Printf.printf " Compression ratio: %.1f%% (%d -> %d bytes)\n%!" 66 + ratio (String.length data) (String.length compressed); 67 + 68 + benchmark (name ^ " decompress") iterations compressed Snappy.decompress_exn 69 + 70 + let benchmark_framed name data = 71 + let iterations = 72 + let size = String.length data in 73 + if size < 10000 then 100 74 + else if size < 100000 then 50 75 + else 10 76 + in 77 + benchmark (name ^ " framed compress") iterations data Snappy.compress_framed; 78 + 79 + let compressed = Snappy.compress_framed data in 80 + benchmark (name ^ " framed decompress") iterations compressed 81 + (fun s -> match Snappy.decompress_framed s with 82 + | Ok x -> x 83 + | Error e -> failwith e) 84 + 85 + let () = 86 + Printf.printf "Snappy OCaml Benchmark Suite\n"; 87 + Printf.printf "============================\n\n"; 88 + 89 + (* Test data *) 90 + let data_1k_repeated = make_repeated 1024 "ABCDEFGH" in 91 + let data_10k_repeated = make_repeated 10240 "Hello World! " in 92 + let data_100k_repeated = make_repeated 102400 "Repeated pattern for compression test. " in 93 + 94 + let data_1k_random = make_random 1024 12345 in 95 + let data_10k_random = make_random 10240 67890 in 96 + let data_100k_random = make_random 102400 11111 in 97 + 98 + Printf.printf "=== Highly Compressible Data ===\n"; 99 + benchmark_compression "1KB repeated" data_1k_repeated; 100 + Printf.printf "\n"; 101 + benchmark_compression "10KB repeated" data_10k_repeated; 102 + Printf.printf "\n"; 103 + benchmark_compression "100KB repeated" data_100k_repeated; 104 + Printf.printf "\n"; 105 + 106 + Printf.printf "=== Random (Incompressible) Data ===\n"; 107 + benchmark_compression "1KB random" data_1k_random; 108 + Printf.printf "\n"; 109 + benchmark_compression "10KB random" data_10k_random; 110 + Printf.printf "\n"; 111 + benchmark_compression "100KB random" data_100k_random; 112 + Printf.printf "\n"; 113 + 114 + Printf.printf "=== Framing Format ===\n"; 115 + benchmark_framed "100KB repeated" data_100k_repeated; 116 + Printf.printf "\n"; 117 + benchmark_framed "100KB random" data_100k_random; 118 + Printf.printf "\n"; 119 + 120 + (* Try to load corpus files if available *) 121 + Printf.printf "=== Corpus Files ===\n"; 122 + let corpus_files = [ 123 + "test/testdata/alice29.txt"; 124 + "test/testdata/html"; 125 + "test/testdata/urls.10K"; 126 + ] in 127 + List.iter (fun path -> 128 + if Sys.file_exists path then begin 129 + let data = read_file path in 130 + benchmark_compression path data; 131 + Printf.printf "\n" 132 + end else 133 + Printf.printf "Skipping %s (not found)\n\n" path 134 + ) corpus_files; 135 + 136 + Printf.printf "Benchmark complete.\n"
+3
bench/dune
··· 1 + (executable 2 + (name bench_snappy) 3 + (libraries snappy unix))
+48 -21
src/snappy.ml
··· 333 333 Compression 334 334 ============================================================ *) 335 335 336 - (* Hash function - takes 4 bytes, returns hash for table lookup *) 336 + (* Hash function - takes 4 bytes, returns hash for table lookup 337 + Using a multiplicative hash with a good mixing constant from the original Snappy. 338 + The constant 0x1e35a7bd was chosen to distribute bits well. *) 337 339 let[@inline always] hash_4bytes v shift = 338 - (* Use a good mixing constant - this is from the original Snappy *) 339 340 let kMul = 0x1e35a7bd in 340 341 ((v * kMul) lsr shift) land (max_hash_table_size - 1) 342 + 343 + (* Faster match length finding - compare 4 bytes at a time when possible *) 344 + let[@inline] find_match_length_fast src a b limit = 345 + let len = ref 0 in 346 + let remaining = limit - b in 347 + 348 + (* Compare 4 bytes at a time while we can *) 349 + while !len + 4 <= remaining && 350 + get_u32_le src (a + !len) = get_u32_le src (b + !len) do 351 + len := !len + 4 352 + done; 353 + 354 + (* Compare remaining bytes one at a time *) 355 + while b + !len < limit && get_u8 src (a + !len) = get_u8 src (b + !len) do 356 + incr len 357 + done; 358 + !len 341 359 342 360 (* Emit a literal. Returns new dst position. *) 343 361 let emit_literal dst dst_pos src src_pos length = ··· 406 424 emit_copy_split dst dst_pos offset (length - 64) 407 425 end 408 426 409 - (* Find match length starting at positions a and b *) 410 - let[@inline] find_match_length src a b limit = 411 - let len = ref 0 in 412 - while b + !len < limit && get_u8 src (a + !len) = get_u8 src (b + !len) do 413 - incr len 414 - done; 415 - !len 416 - 417 427 (* Maximum compressed length for input of given size *) 418 428 let max_compressed_length input_len = 419 429 (* Worst case: 1 byte overhead per 6 bytes + varint header *) 420 430 varint_length input_len + input_len + (input_len / 6) + 32 421 431 422 432 (* Compress src[src_pos..src_pos+src_len) into dst starting at dst_pos. 423 - Returns bytes written. Assumes dst has enough space. *) 433 + Returns bytes written. Assumes dst has enough space. 434 + 435 + Performance optimizations: 436 + - Uses fast 4-byte-at-a-time match length comparison 437 + - Skips hashing for long matches (hash every Nth byte) 438 + - Inlined hot path operations *) 424 439 let compress_into ~src ~src_pos ~src_len ~dst ~dst_pos = 425 440 if src_len = 0 then begin 426 441 (* Empty input *) ··· 458 473 dst_i := emit_literal dst !dst_i src !lit_start (!i - !lit_start) 459 474 end; 460 475 461 - (* Find match length *) 462 - let match_len = 4 + find_match_length src (candidate + 4) (!i + 4) src_end in 476 + (* Find match length using fast 4-byte comparison *) 477 + let match_len = 4 + find_match_length_fast src (candidate + 4) (!i + 4) src_end in 463 478 let offset = !i - candidate in 464 479 465 480 (* Emit copy *) 466 481 dst_i := emit_copy_split dst !dst_i offset match_len; 467 482 468 - (* Skip matched bytes, but still hash them for future matches *) 483 + (* Skip matched bytes, using sparse hashing for long matches *) 469 484 let match_end = !i + match_len in 470 - i := !i + 1; 471 - while !i < match_end && !i < src_limit do 472 - let h = hash_4bytes (get_u32_le src !i) shift in 473 - Array.unsafe_set table h !i; 474 - incr i 475 - done; 476 - i := match_end; 485 + if match_len > 16 then begin 486 + (* For long matches, only hash every 4th byte to save time *) 487 + i := !i + 1; 488 + while !i < match_end - 3 && !i < src_limit do 489 + let h = hash_4bytes (get_u32_le src !i) shift in 490 + Array.unsafe_set table h !i; 491 + i := !i + 4 492 + done; 493 + i := match_end 494 + end else begin 495 + (* For short matches, hash every byte *) 496 + i := !i + 1; 497 + while !i < match_end && !i < src_limit do 498 + let h = hash_4bytes (get_u32_le src !i) shift in 499 + Array.unsafe_set table h !i; 500 + incr i 501 + done; 502 + i := match_end 503 + end; 477 504 lit_start := !i 478 505 end else 479 506 incr i