···11+(** Benchmarks for pure OCaml zstd implementation.
22+33+ Run with: dune exec bench/bench_zstd.exe -- -quota 5
44+55+ To compare with C zstd, run:
66+ python3 -c "print('A' * 102400, end='')" > /tmp/rep100k.bin
77+ zstd -b1 -e9 -i5 /tmp/rep100k.bin
88+*)
99+1010+open Core
1111+open Core_bench
1212+1313+(** Generate test data with different characteristics *)
1414+module Test_data = struct
1515+ (** Highly compressible: repeated pattern *)
1616+ let repetitive size =
1717+ String.init size ~f:(fun i -> Char.of_int_exn ((i mod 26) + 65))
1818+1919+ (** Medium compressibility: English-like text with spaces *)
2020+ let text_like size =
2121+ let words = [| "the "; "quick "; "brown "; "fox "; "jumps "; "over ";
2222+ "lazy "; "dog "; "and "; "runs "; "now " |] in
2323+ let buf = Buffer.create size in
2424+ let i = ref 0 in
2525+ while Buffer.length buf < size do
2626+ Buffer.add_string buf words.(!i mod Array.length words);
2727+ incr i
2828+ done;
2929+ String.sub (Buffer.contents buf) ~pos:0 ~len:size
3030+3131+ (** Binary-like: sequential bytes with variation *)
3232+ let binary_like size =
3333+ String.init size ~f:(fun i -> Char.of_int_exn ((i / 4) mod 256))
3434+end
3535+3636+(** Benchmark decompression *)
3737+let decompression_benchmarks data_name data =
3838+ let size = String.length data in
3939+ (* Pre-compress at level 3 *)
4040+ let compressed = Zstd.compress ~level:3 data in
4141+ let ratio = Float.of_int (String.length compressed) /. Float.of_int size *. 100.0 in
4242+ [
4343+ Bench.Test.create
4444+ ~name:(sprintf "decompress/%s/%dKB(%.0f%%)" data_name (size / 1024) ratio)
4545+ (fun () -> ignore (Zstd.decompress_exn compressed : string));
4646+ ]
4747+4848+(** Benchmark roundtrip *)
4949+let roundtrip_benchmarks data_name data =
5050+ let size = String.length data in
5151+ [
5252+ Bench.Test.create
5353+ ~name:(sprintf "roundtrip/%s/%dKB" data_name (size / 1024))
5454+ (fun () ->
5555+ let compressed = Zstd.compress ~level:3 data in
5656+ ignore (Zstd.decompress_exn compressed : string));
5757+ ]
5858+5959+(** Print compression ratio summary *)
6060+let print_compression_ratios () =
6161+ let sizes = [1024; 10240; 102400] in
6262+ let data_types = [
6363+ ("repetitive", Test_data.repetitive);
6464+ ("text", Test_data.text_like);
6565+ ("binary", Test_data.binary_like);
6666+ ] in
6767+6868+ printf "\n=== Compression Ratios (size as %% of original) ===\n";
6969+ printf "%-12s %8s %8s %8s %8s %8s\n"
7070+ "Data Type" "Size" "L1" "L3" "L6" "L9";
7171+ printf "%s\n" (String.make 56 '-');
7272+7373+ List.iter sizes ~f:(fun size ->
7474+ List.iter data_types ~f:(fun (name, gen) ->
7575+ let data = gen size in
7676+ let ratios = List.map [1; 3; 6; 9] ~f:(fun level ->
7777+ let compressed = Zstd.compress ~level data in
7878+ Float.of_int (String.length compressed) /. Float.of_int size *. 100.0
7979+ ) in
8080+ printf "%-12s %7dB " name size;
8181+ List.iter ratios ~f:(fun r -> printf "%7.1f%% " r);
8282+ printf "\n"
8383+ )
8484+ );
8585+ printf "\n"
8686+8787+let () =
8888+ (* Print compression ratios first *)
8989+ print_compression_ratios ();
9090+9191+ (* Generate test data at different sizes *)
9292+ let sizes = [
9393+ (1, "1KB");
9494+ (10, "10KB");
9595+ (100, "100KB");
9696+ ] in
9797+9898+ let all_benchmarks =
9999+ List.concat_map sizes ~f:(fun (kb, _) ->
100100+ let size = kb * 1024 in
101101+102102+ (* Different data types *)
103103+ let repetitive = Test_data.repetitive size in
104104+ let text_like = Test_data.text_like size in
105105+ let binary_like = Test_data.binary_like size in
106106+107107+ List.concat [
108108+ (* Compression benchmarks - only level 3 for each type/size *)
109109+ [Bench.Test.create
110110+ ~name:(sprintf "compress/rep/%dKB" kb)
111111+ (fun () -> ignore (Zstd.compress ~level:3 repetitive : string))];
112112+ [Bench.Test.create
113113+ ~name:(sprintf "compress/text/%dKB" kb)
114114+ (fun () -> ignore (Zstd.compress ~level:3 text_like : string))];
115115+ [Bench.Test.create
116116+ ~name:(sprintf "compress/bin/%dKB" kb)
117117+ (fun () -> ignore (Zstd.compress ~level:3 binary_like : string))];
118118+119119+ (* Decompression benchmarks *)
120120+ decompression_benchmarks "rep" repetitive;
121121+ decompression_benchmarks "text" text_like;
122122+ decompression_benchmarks "bin" binary_like;
123123+124124+ (* Roundtrip benchmarks *)
125125+ roundtrip_benchmarks "rep" repetitive;
126126+ roundtrip_benchmarks "text" text_like;
127127+ roundtrip_benchmarks "bin" binary_like;
128128+ ]
129129+ )
130130+ in
131131+132132+ printf "Running %d benchmarks...\n" (List.length all_benchmarks);
133133+ printf "Use -quota N to set seconds per benchmark (default 10).\n\n";
134134+ Command_unix.run (Bench.make_command all_benchmarks)
+7
bench/dune
···11+(executable
22+ (name bench_zstd)
33+ (libraries zstd core core_bench core_unix)
44+ (preprocess (pps ppx_jane)))
55+66+; Benchmarks comparing pure OCaml zstd vs C zstd.
77+; C zstd is accessed via the 'zstd' CLI tool (must be installed).