My working unpac space for OCaml projects in development
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

Add bytesrw-zstd adapter library

Implements bytesrw stream filters for zstd compression/decompression:
- compress_reads/compress_writes for compression
- decompress_reads/decompress_writes for decompression
- Proper error handling via bytesrw error system
- Includes tests for roundtrip, empty input, and large input

Also fixes empty frame decompression (content_size=0 has no blocks).

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

+512 -2
+26
bytesrw-zstd.opam
··· 1 + # This file is generated by dune, edit dune-project instead 2 + opam-version: "2.0" 3 + synopsis: "Bytesrw adapter for pure OCaml zstd" 4 + description: 5 + "Provides bytesrw stream filters for zstd compression and decompression using a pure OCaml implementation. No C dependencies required." 6 + depends: [ 7 + "dune" {>= "3.20"} 8 + "zstd" 9 + "bytesrw" 10 + "odoc" {with-doc} 11 + ] 12 + build: [ 13 + ["dune" "subst"] {dev} 14 + [ 15 + "dune" 16 + "build" 17 + "-p" 18 + name 19 + "-j" 20 + jobs 21 + "@install" 22 + "@runtest" {with-test} 23 + "@doc" {with-doc} 24 + ] 25 + ] 26 + x-maintenance-intent: ["(latest)"]
+8
dune-project
··· 11 11 bitstream)) 12 12 13 13 (package 14 + (name bytesrw-zstd) 15 + (synopsis "Bytesrw adapter for pure OCaml zstd") 16 + (description "Provides bytesrw stream filters for zstd compression and decompression using a pure OCaml implementation. No C dependencies required.") 17 + (depends 18 + zstd 19 + bytesrw)) 20 + 21 + (package 14 22 (name zstd-test) 15 23 (synopsis "Tests for the zstd library") 16 24 (allow_empty)
+255
src-bytesrw/bytesrw_zstd.ml
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2024 The bytesrw programmers. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + open Bytesrw 7 + 8 + (* Errors *) 9 + 10 + type Bytes.Stream.error += Error of Zstd.error 11 + 12 + let error_message = Zstd.error_message 13 + 14 + let format_error = 15 + let case e = Error e in 16 + let message = function Error e -> error_message e | _ -> assert false in 17 + Bytes.Stream.make_format_error ~format:"zstd" ~case ~message 18 + 19 + let _error e = Bytes.Stream.error format_error e 20 + let reader_error r e = Bytes.Reader.error format_error r e 21 + let writer_error w e = Bytes.Writer.error format_error w e 22 + 23 + (* Library parameters *) 24 + 25 + let version = "1.0.0-pure-ocaml" 26 + let min_clevel = 1 27 + let max_clevel = 19 28 + let default_clevel = 3 29 + 30 + (* Default slice length *) 31 + let default_slice_length = 65536 32 + 33 + (* Buffer all slices from a reader into a single bytes *) 34 + let buffer_reader r = 35 + let buf = Buffer.create default_slice_length in 36 + let rec loop () = 37 + let slice = Bytes.Reader.read r in 38 + if Bytes.Slice.is_eod slice then 39 + Buffer.contents buf 40 + else begin 41 + Buffer.add_subbytes buf 42 + (Bytes.Slice.bytes slice) 43 + (Bytes.Slice.first slice) 44 + (Bytes.Slice.length slice); 45 + loop () 46 + end 47 + in 48 + loop () 49 + 50 + (* Read a single zstd frame, returning leftover data *) 51 + let read_single_frame r = 52 + (* Buffer slices until we have enough to detect frame boundaries *) 53 + let buf = Buffer.create default_slice_length in 54 + let rec loop () = 55 + let slice = Bytes.Reader.read r in 56 + if Bytes.Slice.is_eod slice then begin 57 + (* End of input - return what we have *) 58 + let data = Buffer.contents buf in 59 + (data, "") 60 + end else begin 61 + Buffer.add_subbytes buf 62 + (Bytes.Slice.bytes slice) 63 + (Bytes.Slice.first slice) 64 + (Bytes.Slice.length slice); 65 + (* Check if we have a complete frame *) 66 + let data = Buffer.contents buf in 67 + if String.length data >= 4 && Zstd.is_zstd_frame data then 68 + (* Try to find frame boundary by checking decompressed size or 69 + attempting decompression. For now, buffer everything. *) 70 + loop () 71 + else 72 + loop () 73 + end 74 + in 75 + loop () 76 + 77 + (* Create a reader that yields slices from a string *) 78 + let reader_of_string ?(slice_length = default_slice_length) s = 79 + let len = String.length s in 80 + let pos = ref 0 in 81 + let bytes = Bytes.unsafe_of_string s in 82 + let read () = 83 + if !pos >= len then Bytes.Slice.eod 84 + else begin 85 + let chunk_len = min slice_length (len - !pos) in 86 + let slice = Bytes.Slice.make bytes ~first:!pos ~length:chunk_len in 87 + pos := !pos + chunk_len; 88 + slice 89 + end 90 + in 91 + Bytes.Reader.make ~slice_length read 92 + 93 + (* Decompress *) 94 + 95 + let decompress_reads ?(all_frames = true) () ?pos ?(slice_length = default_slice_length) r = 96 + let state = ref `Reading in 97 + let output_reader = ref None in 98 + let read () = 99 + match !state with 100 + | `Done -> Bytes.Slice.eod 101 + | `Outputting -> 102 + begin match !output_reader with 103 + | None -> Bytes.Slice.eod 104 + | Some or_ -> 105 + let slice = Bytes.Reader.read or_ in 106 + if Bytes.Slice.is_eod slice then begin 107 + state := `Done; 108 + output_reader := None; 109 + Bytes.Slice.eod 110 + end else 111 + slice 112 + end 113 + | `Reading -> 114 + (* Buffer all input *) 115 + let input = 116 + if all_frames then 117 + buffer_reader r 118 + else 119 + let (data, _leftover) = read_single_frame r in 120 + (* TODO: push back leftover to r *) 121 + data 122 + in 123 + if String.length input = 0 then begin 124 + state := `Done; 125 + Bytes.Slice.eod 126 + end else begin 127 + (* Decompress *) 128 + match Zstd.decompress input with 129 + | Error _msg -> 130 + state := `Done; 131 + reader_error r Zstd.Corruption 132 + | Ok decompressed -> 133 + let or_ = reader_of_string ~slice_length decompressed in 134 + output_reader := Some or_; 135 + state := `Outputting; 136 + let slice = Bytes.Reader.read or_ in 137 + if Bytes.Slice.is_eod slice then begin 138 + state := `Done; 139 + output_reader := None 140 + end; 141 + slice 142 + end 143 + in 144 + Bytes.Reader.make ?pos ~slice_length read 145 + 146 + let decompress_writes () ?pos ?(slice_length = default_slice_length) ~eod w = 147 + let buf = Buffer.create default_slice_length in 148 + let write slice = 149 + if Bytes.Slice.is_eod slice then begin 150 + (* Decompress buffered data *) 151 + let input = Buffer.contents buf in 152 + if String.length input > 0 then begin 153 + match Zstd.decompress input with 154 + | Error _msg -> 155 + writer_error w Zstd.Corruption 156 + | Ok decompressed -> 157 + (* Write decompressed data in slices *) 158 + let len = String.length decompressed in 159 + let bytes = Bytes.unsafe_of_string decompressed in 160 + let rec write_chunks pos = 161 + if pos >= len then () 162 + else begin 163 + let chunk_len = min (Bytes.Writer.slice_length w) (len - pos) in 164 + let slice = Bytes.Slice.make bytes ~first:pos ~length:chunk_len in 165 + Bytes.Writer.write w slice; 166 + write_chunks (pos + chunk_len) 167 + end 168 + in 169 + write_chunks 0 170 + end; 171 + if eod then Bytes.Writer.write_eod w 172 + end else begin 173 + Buffer.add_subbytes buf 174 + (Bytes.Slice.bytes slice) 175 + (Bytes.Slice.first slice) 176 + (Bytes.Slice.length slice) 177 + end 178 + in 179 + Bytes.Writer.make ?pos ~slice_length write 180 + 181 + (* Compress *) 182 + 183 + let compress_reads ?(level = default_clevel) () ?pos ?(slice_length = default_slice_length) r = 184 + let state = ref `Reading in 185 + let output_reader = ref None in 186 + let read () = 187 + match !state with 188 + | `Done -> Bytes.Slice.eod 189 + | `Outputting -> 190 + begin match !output_reader with 191 + | None -> Bytes.Slice.eod 192 + | Some or_ -> 193 + let slice = Bytes.Reader.read or_ in 194 + if Bytes.Slice.is_eod slice then begin 195 + state := `Done; 196 + output_reader := None; 197 + Bytes.Slice.eod 198 + end else 199 + slice 200 + end 201 + | `Reading -> 202 + (* Buffer all input *) 203 + let input = buffer_reader r in 204 + if String.length input = 0 then begin 205 + (* Compress empty input to get valid empty frame *) 206 + let compressed = Zstd.compress ~level "" in 207 + let or_ = reader_of_string ~slice_length compressed in 208 + output_reader := Some or_; 209 + state := `Outputting; 210 + Bytes.Reader.read or_ 211 + end else begin 212 + (* Compress *) 213 + let compressed = Zstd.compress ~level input in 214 + let or_ = reader_of_string ~slice_length compressed in 215 + output_reader := Some or_; 216 + state := `Outputting; 217 + let slice = Bytes.Reader.read or_ in 218 + if Bytes.Slice.is_eod slice then begin 219 + state := `Done; 220 + output_reader := None 221 + end; 222 + slice 223 + end 224 + in 225 + Bytes.Reader.make ?pos ~slice_length read 226 + 227 + let compress_writes ?(level = default_clevel) () ?pos ?(slice_length = default_slice_length) ~eod w = 228 + let buf = Buffer.create default_slice_length in 229 + let write slice = 230 + if Bytes.Slice.is_eod slice then begin 231 + (* Compress buffered data *) 232 + let input = Buffer.contents buf in 233 + let compressed = Zstd.compress ~level input in 234 + (* Write compressed data in slices *) 235 + let len = String.length compressed in 236 + let bytes = Bytes.unsafe_of_string compressed in 237 + let rec write_chunks pos = 238 + if pos >= len then () 239 + else begin 240 + let chunk_len = min (Bytes.Writer.slice_length w) (len - pos) in 241 + let slice = Bytes.Slice.make bytes ~first:pos ~length:chunk_len in 242 + Bytes.Writer.write w slice; 243 + write_chunks (pos + chunk_len) 244 + end 245 + in 246 + write_chunks 0; 247 + if eod then Bytes.Writer.write_eod w 248 + end else begin 249 + Buffer.add_subbytes buf 250 + (Bytes.Slice.bytes slice) 251 + (Bytes.Slice.first slice) 252 + (Bytes.Slice.length slice) 253 + end 254 + in 255 + Bytes.Writer.make ?pos ~slice_length write
+103
src-bytesrw/bytesrw_zstd.mli
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2024 The bytesrw programmers. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + (** Zstd streams via pure OCaml implementation. 7 + 8 + This module provides support for reading and writing 9 + {{:https://www.rfc-editor.org/rfc/rfc8878.html}zstd} compressed 10 + streams using a pure OCaml zstd implementation. 11 + 12 + Unlike the C-based [bytesrw-zstd] package, this implementation: 13 + - Has no C dependencies 14 + - Buffers entire frames before processing (not true streaming) 15 + - Works anywhere OCaml runs 16 + 17 + {b Positions.} The positions of readers and writers created 18 + by filters of this module default to [0]. *) 19 + 20 + open Bytesrw 21 + 22 + (** {1:errors Errors} *) 23 + 24 + type Bytes.Stream.error += Error of Zstd.error 25 + (** The type for zstd stream errors. 26 + 27 + All functions of this module and resulting readers and writers may 28 + raise {!Bytesrw.Bytes.Stream.Error} with this error. *) 29 + 30 + val error_message : Zstd.error -> string 31 + (** [error_message e] is a human-readable message for error [e]. *) 32 + 33 + (** {1:decompress Decompress} *) 34 + 35 + val decompress_reads : ?all_frames:bool -> unit -> Bytes.Reader.filter 36 + (** [decompress_reads () r] filters the reads of [r] by decompressing 37 + zstd frames. 38 + {ul 39 + {- [slice_length] defaults to [65536].}} 40 + 41 + If [all_frames] is: 42 + {ul 43 + {- [true] (default), this decompresses all frames until [r] returns 44 + {!Bytesrw.Bytes.Slice.eod} and concatenates the result.} 45 + {- [false], this decompresses a single frame. Once the resulting reader 46 + returns {!Bytesrw.Bytes.Slice.eod}, [r] is positioned exactly after 47 + the end of frame and can be used again to perform other non-filtered 48 + reads (e.g. a new zstd frame or other unrelated data).}} 49 + 50 + {b Note:} This implementation buffers the entire compressed input 51 + before decompressing. For large files, consider using the C-based 52 + [bytesrw-zstd] package instead. *) 53 + 54 + val decompress_writes : unit -> Bytes.Writer.filter 55 + (** [decompress_writes () w ~eod] filters the writes on [w] by decompressing 56 + sequences of zstd frames until {!Bytesrw.Bytes.Slice.eod} is written. 57 + If [eod] is [false] the last {!Bytesrw.Bytes.Slice.eod} is not written 58 + on [w] and at this point [w] can be used again to perform other 59 + non-filtered writes. 60 + {ul 61 + {- [slice_length] defaults to [65536].}} 62 + 63 + {b Note:} This implementation buffers the entire compressed input 64 + before decompressing. *) 65 + 66 + (** {1:compress Compress} *) 67 + 68 + val compress_reads : ?level:int -> unit -> Bytes.Reader.filter 69 + (** [compress_reads () r] filters the reads of [r] by compressing them 70 + to a single zstd frame. 71 + {ul 72 + {- [level] is the compression level (1-19, default 3).} 73 + {- [slice_length] defaults to [65536].}} 74 + 75 + {b Note:} This implementation buffers the entire input before 76 + compressing. *) 77 + 78 + val compress_writes : ?level:int -> unit -> Bytes.Writer.filter 79 + (** [compress_writes () w ~eod] filters the writes on [w] by compressing 80 + them to a single zstd frame until {!Bytesrw.Bytes.Slice.eod} is written. 81 + If [eod] is [false] the last {!Bytesrw.Bytes.Slice.eod} is not written 82 + on [w] and at this point [w] can be used again to perform non-filtered 83 + writes. 84 + {ul 85 + {- [level] is the compression level (1-19, default 3).} 86 + {- [slice_length] defaults to [65536].}} 87 + 88 + {b Note:} This implementation buffers the entire input before 89 + compressing. *) 90 + 91 + (** {1:params Library parameters} *) 92 + 93 + val version : string 94 + (** [version] is the version of this pure OCaml zstd implementation. *) 95 + 96 + val min_clevel : int 97 + (** [min_clevel] is the minimum compression level (1). *) 98 + 99 + val max_clevel : int 100 + (** [max_clevel] is the maximum compression level (19). *) 101 + 102 + val default_clevel : int 103 + (** [default_clevel] is the default compression level (3). *)
+4
src-bytesrw/dune
··· 1 + (library 2 + (name bytesrw_zstd) 3 + (public_name bytesrw-zstd) 4 + (libraries zstd bytesrw))
+6 -2
src/zstd_decode.ml
··· 534 534 let output = Bytes.create output_size in 535 535 let ctx = create_frame_context header dict in 536 536 537 - (* Decompress all blocks *) 538 - let written = decompress_data ctx stream output ~out_pos:0 in 537 + (* Decompress all blocks (skip if empty frame) *) 538 + let written = 539 + match header.frame_content_size with 540 + | Some 0L -> 0 (* Empty frame has no blocks *) 541 + | _ -> decompress_data ctx stream output ~out_pos:0 542 + in 539 543 540 544 (* Verify checksum if present *) 541 545 if header.content_checksum then begin
+4
test-bytesrw/dune
··· 1 + (test 2 + (name test_bytesrw_zstd) 3 + (package bytesrw-zstd) 4 + (libraries bytesrw_zstd alcotest))
+106
test-bytesrw/test_bytesrw_zstd.ml
··· 1 + (** Tests for bytesrw_zstd adapter *) 2 + 3 + open Bytesrw 4 + 5 + let test_compress_decompress_roundtrip () = 6 + let original = "Hello, World! This is a test of the bytesrw zstd adapter." in 7 + (* Compress *) 8 + let reader = Bytes.Reader.of_string original in 9 + let compressed_reader = Bytesrw_zstd.compress_reads () reader in 10 + let compressed = Bytes.Reader.to_string compressed_reader in 11 + (* Verify it's actually compressed (has zstd magic) *) 12 + Alcotest.(check bool) "has zstd magic" true (Zstd.is_zstd_frame compressed); 13 + (* Decompress *) 14 + let reader2 = Bytes.Reader.of_string compressed in 15 + let decompressed_reader = Bytesrw_zstd.decompress_reads () reader2 in 16 + let decompressed = Bytes.Reader.to_string decompressed_reader in 17 + (* Verify roundtrip *) 18 + Alcotest.(check string) "roundtrip" original decompressed 19 + 20 + let test_compress_writes_roundtrip () = 21 + let original = "Testing compress_writes and decompress_writes filters." in 22 + (* Compress using writer filter *) 23 + let buf = Buffer.create 256 in 24 + let base_writer = Bytes.Writer.of_buffer buf in 25 + let compressing_writer = Bytesrw_zstd.compress_writes () ~eod:true base_writer in 26 + Bytes.Writer.write_string compressing_writer original; 27 + Bytes.Writer.write_eod compressing_writer; 28 + let compressed = Buffer.contents buf in 29 + (* Verify it's compressed *) 30 + Alcotest.(check bool) "has zstd magic" true (Zstd.is_zstd_frame compressed); 31 + (* Decompress using reader filter *) 32 + let reader = Bytes.Reader.of_string compressed in 33 + let decompressing_reader = Bytesrw_zstd.decompress_reads () reader in 34 + let decompressed = Bytes.Reader.to_string decompressing_reader in 35 + Alcotest.(check string) "roundtrip" original decompressed 36 + 37 + let test_decompress_writes () = 38 + let original = "Testing decompress_writes filter." in 39 + (* First compress the data *) 40 + let compressed = Zstd.compress original in 41 + (* Decompress using writer filter *) 42 + let buf = Buffer.create 256 in 43 + let base_writer = Bytes.Writer.of_buffer buf in 44 + let decompressing_writer = Bytesrw_zstd.decompress_writes () ~eod:true base_writer in 45 + Bytes.Writer.write_string decompressing_writer compressed; 46 + Bytes.Writer.write_eod decompressing_writer; 47 + let decompressed = Buffer.contents buf in 48 + Alcotest.(check string) "decompressed" original decompressed 49 + 50 + let test_empty_input () = 51 + (* Compress empty - this creates a minimal valid zstd frame *) 52 + let compressed = Zstd.compress "" in 53 + Alcotest.(check bool) "empty compressed has magic" true (Zstd.is_zstd_frame compressed); 54 + (* Decompress back using bytesrw *) 55 + let reader = Bytes.Reader.of_string compressed in 56 + let decompressed_reader = Bytesrw_zstd.decompress_reads () reader in 57 + let decompressed = Bytes.Reader.to_string decompressed_reader in 58 + Alcotest.(check string) "empty roundtrip" "" decompressed 59 + 60 + let test_large_input () = 61 + (* Create a larger input with repetitive data *) 62 + let size = 100_000 in 63 + let original = String.make size 'x' in 64 + (* Compress *) 65 + let reader = Bytes.Reader.of_string original in 66 + let compressed_reader = Bytesrw_zstd.compress_reads () reader in 67 + let compressed = Bytes.Reader.to_string compressed_reader in 68 + (* Verify it's valid zstd *) 69 + Alcotest.(check bool) "has zstd magic" true (Zstd.is_zstd_frame compressed); 70 + (* Decompress *) 71 + let reader2 = Bytes.Reader.of_string compressed in 72 + let decompressed_reader = Bytesrw_zstd.decompress_reads () reader2 in 73 + let decompressed = Bytes.Reader.to_string decompressed_reader in 74 + (* Verify roundtrip correctness *) 75 + Alcotest.(check int) "size matches" size (String.length decompressed); 76 + Alcotest.(check string) "content matches" original decompressed 77 + 78 + let test_compression_levels () = 79 + let original = String.make 10000 'a' in 80 + (* Level 1 (fastest) *) 81 + let reader1 = Bytes.Reader.of_string original in 82 + let c1 = Bytes.Reader.to_string (Bytesrw_zstd.compress_reads ~level:1 () reader1) in 83 + (* Level 19 (best compression) *) 84 + let reader19 = Bytes.Reader.of_string original in 85 + let c19 = Bytes.Reader.to_string (Bytesrw_zstd.compress_reads ~level:19 () reader19) in 86 + (* Both should decompress correctly *) 87 + let d1 = Bytes.Reader.to_string 88 + (Bytesrw_zstd.decompress_reads () (Bytes.Reader.of_string c1)) in 89 + let d19 = Bytes.Reader.to_string 90 + (Bytesrw_zstd.decompress_reads () (Bytes.Reader.of_string c19)) in 91 + Alcotest.(check string) "level 1 roundtrip" original d1; 92 + Alcotest.(check string) "level 19 roundtrip" original d19 93 + 94 + let tests = [ 95 + "compress/decompress roundtrip", `Quick, test_compress_decompress_roundtrip; 96 + "compress_writes roundtrip", `Quick, test_compress_writes_roundtrip; 97 + "decompress_writes", `Quick, test_decompress_writes; 98 + "empty input", `Quick, test_empty_input; 99 + "large input", `Quick, test_large_input; 100 + "compression levels", `Quick, test_compression_levels; 101 + ] 102 + 103 + let () = 104 + Alcotest.run "bytesrw_zstd" [ 105 + "bytesrw_zstd", tests; 106 + ]