My working unpac space for OCaml projects in development
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

Use vendored bitstream library with slice interface

- Add bitstream as a dependency
- Rewrite bit_reader.ml as thin wrapper around Bitstream, translating
exceptions to Zstd_error
- Rewrite bit_writer.ml as thin wrapper around Bitstream
- Update vendored bitstream to latest with bytesrw-compatible slice API

This decouples bit-level I/O from zstd-specific error handling and
enables future bytesrw streaming integration.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

+441 -395
+2 -1
dune-project
··· 7 7 (synopsis "Pure OCaml implementation of Zstandard compression") 8 8 (description "A complete pure OCaml implementation of the Zstandard (zstd) compression algorithm (RFC 8878). Includes both compression and decompression with support for all compression levels and dictionaries.") 9 9 (depends 10 - (ocaml (>= 5.1)))) 10 + (ocaml (>= 5.1)) 11 + bitstream)) 11 12 12 13 (package 13 14 (name zstd-test)
+39 -147
src/bit_reader.ml
··· 1 1 (** Bitstream reader for Zstandard decompression. 2 2 3 - Supports two modes: 4 - - Forward reading: for frame headers and FSE table descriptions 5 - - Backward reading: for FSE and Huffman coded bitstreams *) 3 + This module wraps the Bitstream library, translating exceptions 4 + to Zstd_error for consistent error handling. *) 5 + 6 + (** Helper to wrap Bitstream operations and translate exceptions *) 7 + let[@inline] wrap_truncated f = 8 + try f () 9 + with Bitstream.End_of_stream -> 10 + raise (Constants.Zstd_error Constants.Truncated_input) 11 + 12 + let[@inline] wrap_all f = 13 + try f () 14 + with 15 + | Bitstream.End_of_stream -> 16 + raise (Constants.Zstd_error Constants.Truncated_input) 17 + | Bitstream.Invalid_state _ -> 18 + raise (Constants.Zstd_error Constants.Corruption) 19 + | Bitstream.Corrupted_stream _ -> 20 + raise (Constants.Zstd_error Constants.Corruption) 6 21 7 22 (** Forward bitstream reader - reads from start to end *) 8 23 module Forward = struct 9 - type t = { 10 - src : bytes; 11 - mutable byte_pos : int; 12 - mutable bit_pos : int; (* 0-7, bits consumed in current byte *) 13 - len : int; 14 - } 24 + type t = Bitstream.Forward_reader.t 15 25 16 26 let create src ~pos ~len = 17 - { src; byte_pos = pos; bit_pos = 0; len = pos + len } 27 + Bitstream.Forward_reader.create src ~pos ~len 18 28 19 29 let of_bytes src = 20 - create src ~pos:0 ~len:(Bytes.length src) 30 + Bitstream.Forward_reader.of_bytes src 21 31 22 32 let[@inline] remaining t = 23 - (t.len - t.byte_pos) * 8 - t.bit_pos 33 + Bitstream.Forward_reader.remaining t 24 34 25 35 let[@inline] is_byte_aligned t = 26 - t.bit_pos = 0 36 + Bitstream.Forward_reader.is_byte_aligned t 27 37 28 - (** Read up to 64 bits, little-endian *) 29 38 let[@inline] read_bits t n = 30 - if n <= 0 then 0 31 - else if n > 64 then invalid_arg "read_bits: n > 64" 32 - else begin 33 - let result = ref 0 in 34 - let bits_read = ref 0 in 35 - while !bits_read < n do 36 - if t.byte_pos >= t.len then 37 - raise (Constants.Zstd_error Constants.Truncated_input); 38 - let byte = Bytes.get_uint8 t.src t.byte_pos in 39 - let available = 8 - t.bit_pos in 40 - let to_read = min available (n - !bits_read) in 41 - let mask = (1 lsl to_read) - 1 in 42 - let bits = (byte lsr t.bit_pos) land mask in 43 - result := !result lor (bits lsl !bits_read); 44 - bits_read := !bits_read + to_read; 45 - t.bit_pos <- t.bit_pos + to_read; 46 - if t.bit_pos >= 8 then begin 47 - t.bit_pos <- 0; 48 - t.byte_pos <- t.byte_pos + 1 49 - end 50 - done; 51 - !result 52 - end 39 + wrap_truncated (fun () -> Bitstream.Forward_reader.read_bits t n) 53 40 54 41 let[@inline] read_byte t = 55 - if t.bit_pos <> 0 then 56 - invalid_arg "read_byte: not byte aligned"; 57 - if t.byte_pos >= t.len then 58 - raise (Constants.Zstd_error Constants.Truncated_input); 59 - let b = Bytes.get_uint8 t.src t.byte_pos in 60 - t.byte_pos <- t.byte_pos + 1; 61 - b 42 + wrap_all (fun () -> Bitstream.Forward_reader.read_byte t) 62 43 63 - (** Rewind by n bits *) 64 44 let rewind_bits t n = 65 - let total_bits = t.byte_pos * 8 + t.bit_pos in 66 - let new_total = total_bits - n in 67 - if new_total < 0 then 68 - raise (Constants.Zstd_error Constants.Truncated_input); 69 - t.byte_pos <- new_total / 8; 70 - t.bit_pos <- new_total mod 8 45 + wrap_truncated (fun () -> Bitstream.Forward_reader.rewind_bits t n) 71 46 72 - (** Align to next byte boundary *) 73 47 let align t = 74 - if t.bit_pos <> 0 then begin 75 - t.bit_pos <- 0; 76 - t.byte_pos <- t.byte_pos + 1 77 - end 48 + Bitstream.Forward_reader.align t 78 49 79 - (** Get current position in bytes (must be aligned) *) 80 50 let byte_position t = 81 - if t.bit_pos <> 0 then 82 - invalid_arg "byte_position: not byte aligned"; 83 - t.byte_pos 51 + wrap_all (fun () -> Bitstream.Forward_reader.byte_position t) 84 52 85 - (** Get a slice of bytes (must be aligned) *) 86 53 let get_bytes t n = 87 - if t.bit_pos <> 0 then 88 - invalid_arg "get_bytes: not byte aligned"; 89 - if t.byte_pos + n > t.len then 90 - raise (Constants.Zstd_error Constants.Truncated_input); 91 - let result = Bytes.sub t.src t.byte_pos n in 92 - t.byte_pos <- t.byte_pos + n; 93 - result 54 + wrap_all (fun () -> Bitstream.Forward_reader.get_bytes t n) 94 55 95 - (** Advance by n bytes (must be aligned) *) 96 56 let advance t n = 97 - if t.bit_pos <> 0 then 98 - invalid_arg "advance: not byte aligned"; 99 - if t.byte_pos + n > t.len then 100 - raise (Constants.Zstd_error Constants.Truncated_input); 101 - t.byte_pos <- t.byte_pos + n 57 + wrap_all (fun () -> Bitstream.Forward_reader.advance t n) 102 58 103 - (** Create a sub-reader for a portion of the stream *) 104 59 let sub t n = 105 - if t.bit_pos <> 0 then 106 - invalid_arg "sub: not byte aligned"; 107 - if t.byte_pos + n > t.len then 108 - raise (Constants.Zstd_error Constants.Truncated_input); 109 - let result = create t.src ~pos:t.byte_pos ~len:n in 110 - t.byte_pos <- t.byte_pos + n; 111 - result 60 + wrap_all (fun () -> Bitstream.Forward_reader.sub t n) 112 61 113 - (** Remaining bytes (must be aligned) *) 114 62 let remaining_bytes t = 115 - if t.bit_pos <> 0 then 116 - invalid_arg "remaining_bytes: not byte aligned"; 117 - t.len - t.byte_pos 63 + wrap_all (fun () -> Bitstream.Forward_reader.remaining_bytes t) 118 64 end 119 65 120 66 (** Backward bitstream reader - reads from end to start. 121 67 Used for FSE and Huffman coded streams. *) 122 68 module Backward = struct 123 - type t = { 124 - src : bytes; 125 - start_pos : int; 126 - mutable bit_offset : int; (* Bits remaining from end, decreasing *) 127 - } 69 + type t = Bitstream.Backward_reader.t 128 70 129 - (** Create from bytes. Finds the padding marker (first 1-bit from end) *) 130 71 let create src ~pos ~len = 131 - if len = 0 then 132 - raise (Constants.Zstd_error Constants.Truncated_input); 133 - let last_byte_pos = pos + len - 1 in 134 - let last_byte = Bytes.get_uint8 src last_byte_pos in 135 - if last_byte = 0 then 136 - raise (Constants.Zstd_error Constants.Corruption); 137 - (* Find the highest set bit - this is the padding marker *) 138 - let rec find_marker byte bit = 139 - if bit < 0 then 0 140 - else if (byte land (1 lsl bit)) <> 0 then bit 141 - else find_marker byte (bit - 1) 142 - in 143 - let padding = 8 - find_marker last_byte 7 in 144 - let bit_offset = len * 8 - padding in 145 - { src; start_pos = pos; bit_offset } 72 + wrap_all (fun () -> Bitstream.Backward_reader.of_bytes src ~pos ~len) 146 73 147 74 let of_bytes src ~pos ~len = 148 75 create src ~pos ~len 149 76 150 - let[@inline] remaining t = t.bit_offset 77 + let[@inline] remaining t = 78 + Bitstream.Backward_reader.remaining t 151 79 152 - (** Read n bits from the end of the stream, moving backward. 153 - Returns 0 bits if trying to read past the beginning. *) 154 80 let[@inline] read_bits t n = 155 - if n <= 0 then 0 156 - else if n > 64 then invalid_arg "read_bits: n > 64" 157 - else begin 158 - t.bit_offset <- t.bit_offset - n; 159 - let actual_offset = max 0 t.bit_offset in 160 - let actual_bits = if t.bit_offset < 0 then n + t.bit_offset else n in 161 - if actual_bits <= 0 then 0 162 - else begin 163 - let byte_offset = t.start_pos + (actual_offset / 8) in 164 - let bit_offset = actual_offset mod 8 in 165 - let result = ref 0 in 166 - let bits_read = ref 0 in 167 - let current_byte = ref byte_offset in 168 - let current_bit = ref bit_offset in 169 - while !bits_read < actual_bits do 170 - let byte = Bytes.get_uint8 t.src !current_byte in 171 - let available = 8 - !current_bit in 172 - let to_read = min available (actual_bits - !bits_read) in 173 - let mask = (1 lsl to_read) - 1 in 174 - let bits = (byte lsr !current_bit) land mask in 175 - result := !result lor (bits lsl !bits_read); 176 - bits_read := !bits_read + to_read; 177 - current_bit := !current_bit + to_read; 178 - if !current_bit >= 8 then begin 179 - current_bit := 0; 180 - incr current_byte 181 - end 182 - done; 183 - (* If we read past the beginning, shift the result *) 184 - if t.bit_offset < 0 then 185 - !result lsl (-t.bit_offset) 186 - else 187 - !result 188 - end 189 - end 81 + Bitstream.Backward_reader.read_bits t n 190 82 191 - (** Check if stream is exhausted *) 192 - let[@inline] is_empty t = t.bit_offset <= 0 83 + let[@inline] is_empty t = 84 + Bitstream.Backward_reader.is_empty t 193 85 end 194 86 195 87 (** Read little-endian integers from bytes *)
+17 -87
src/bit_writer.ml
··· 1 1 (** Bitstream writer for Zstandard compression. 2 2 3 - Supports both forward writing (for headers) and backward accumulation 4 - (for FSE/Huffman encoded streams that are read backwards). *) 3 + This module wraps the Bitstream library for consistent API 4 + with the rest of the zstd implementation. *) 5 5 6 6 (** Forward bitstream writer - writes from start to end *) 7 7 module Forward = struct 8 - type t = { 9 - dst : bytes; 10 - mutable byte_pos : int; 11 - mutable bit_pos : int; (* 0-7, bits written in current byte *) 12 - mutable current_byte : int; 13 - } 8 + type t = Bitstream.Forward_writer.t 14 9 15 10 let create dst ~pos = 16 - { dst; byte_pos = pos; bit_pos = 0; current_byte = 0 } 11 + Bitstream.Forward_writer.create dst ~pos 17 12 18 13 let of_bytes dst = 19 - create dst ~pos:0 14 + Bitstream.Forward_writer.of_bytes dst 20 15 21 - (** Flush accumulated bits to output *) 22 16 let flush t = 23 - if t.bit_pos > 0 then begin 24 - Bytes.set_uint8 t.dst t.byte_pos t.current_byte; 25 - t.byte_pos <- t.byte_pos + 1; 26 - t.bit_pos <- 0; 27 - t.current_byte <- 0 28 - end 17 + Bitstream.Forward_writer.flush t 29 18 30 - (** Write n bits (little-endian) *) 31 19 let write_bits t value n = 32 - if n <= 0 then () 33 - else if n > 32 then invalid_arg "write_bits: n > 32" 34 - else begin 35 - let value = ref value in 36 - let remaining = ref n in 37 - 38 - while !remaining > 0 do 39 - let available = 8 - t.bit_pos in 40 - let to_write = min available !remaining in 41 - let mask = (1 lsl to_write) - 1 in 42 - t.current_byte <- t.current_byte lor ((!value land mask) lsl t.bit_pos); 43 - value := !value lsr to_write; 44 - remaining := !remaining - to_write; 45 - t.bit_pos <- t.bit_pos + to_write; 46 - 47 - if t.bit_pos = 8 then begin 48 - Bytes.set_uint8 t.dst t.byte_pos t.current_byte; 49 - t.byte_pos <- t.byte_pos + 1; 50 - t.bit_pos <- 0; 51 - t.current_byte <- 0 52 - end 53 - done 54 - end 20 + Bitstream.Forward_writer.write_bits t value n 55 21 56 - (** Write a single byte (must be byte-aligned) *) 57 22 let write_byte t value = 58 - if t.bit_pos <> 0 then flush t; 59 - Bytes.set_uint8 t.dst t.byte_pos value; 60 - t.byte_pos <- t.byte_pos + 1 23 + Bitstream.Forward_writer.write_byte t value 61 24 62 - (** Write bytes directly (must be byte-aligned) *) 63 25 let write_bytes t src = 64 - if t.bit_pos <> 0 then flush t; 65 - let len = Bytes.length src in 66 - Bytes.blit src 0 t.dst t.byte_pos len; 67 - t.byte_pos <- t.byte_pos + len 26 + Bitstream.Forward_writer.write_bytes t src 68 27 69 - (** Get current position in bytes *) 70 28 let byte_position t = 71 - if t.bit_pos > 0 then t.byte_pos + 1 else t.byte_pos 29 + Bitstream.Forward_writer.byte_position t 72 30 73 - (** Finalize and return number of bytes written *) 74 31 let finalize t = 75 - flush t; 76 - t.byte_pos 32 + Bitstream.Forward_writer.finalize t 77 33 end 78 34 79 35 (** Backward bitstream writer - accumulates bits to be read backwards. 80 36 Used for FSE and Huffman encoding. *) 81 37 module Backward = struct 82 - type t = { 83 - mutable bits : int64; (* Accumulated bits *) 84 - mutable num_bits : int; (* Number of bits accumulated *) 85 - buffer : bytes; 86 - mutable buf_pos : int; (* Write position (from end) *) 87 - } 38 + type t = Bitstream.Backward_writer.t 88 39 89 40 let create size = 90 - { bits = 0L; num_bits = 0; buffer = Bytes.create size; buf_pos = size } 41 + Bitstream.Backward_writer.create size 91 42 92 - (** Add bits to the accumulator *) 93 43 let[@inline] write_bits t value n = 94 - if n > 0 then begin 95 - t.bits <- Int64.logor t.bits (Int64.shift_left (Int64.of_int value) t.num_bits); 96 - t.num_bits <- t.num_bits + n 97 - end 44 + Bitstream.Backward_writer.write_bits t value n 98 45 99 - (** Flush complete bytes from accumulator to buffer *) 100 46 let flush_bytes t = 101 - while t.num_bits >= 8 do 102 - t.buf_pos <- t.buf_pos - 1; 103 - Bytes.set_uint8 t.buffer t.buf_pos (Int64.to_int (Int64.logand t.bits 0xFFL)); 104 - t.bits <- Int64.shift_right_logical t.bits 8; 105 - t.num_bits <- t.num_bits - 8 106 - done 47 + Bitstream.Backward_writer.flush_bytes t 107 48 108 - (** Finalize: add padding marker and flush remaining bits *) 109 49 let finalize t = 110 - (* Add the 1-bit marker followed by 0-7 padding bits *) 111 - write_bits t 1 1; 112 - (* Pad to byte boundary *) 113 - if t.num_bits mod 8 <> 0 then 114 - t.num_bits <- ((t.num_bits + 7) / 8) * 8; 115 - flush_bytes t; 116 - (* Return the slice of buffer that was used *) 117 - let len = Bytes.length t.buffer - t.buf_pos in 118 - Bytes.sub t.buffer t.buf_pos len 50 + Bitstream.Backward_writer.finalize t 119 51 120 - (** Get the data written so far (for checking size) *) 121 52 let current_size t = 122 - Bytes.length t.buffer - t.buf_pos + (t.num_bits + 7) / 8 53 + Bitstream.Backward_writer.current_size t 123 54 end 124 -
+1 -1
src/dune
··· 1 1 (library 2 2 (name zstd) 3 3 (public_name zstd) 4 - (libraries xxhash) 4 + (libraries xxhash bitstream) 5 5 (ocamlopt_flags (:standard -O3)))
+230 -114
vendor/opam/ocaml-bitstream/src/bitstream.ml
··· 7 7 Backward streams read/write from the end of a buffer towards the start, 8 8 which is required by some compression algorithms (FSE, ANS). *) 9 9 10 + (** {1 Slice Type} *) 11 + 12 + module Slice = struct 13 + type t = { 14 + bytes : bytes; 15 + first : int; 16 + length : int; 17 + } 18 + 19 + let make bytes ~first ~length = 20 + { bytes; first; length } 21 + 22 + let of_bytes ?first ?length bytes = 23 + let first = Option.value first ~default:0 in 24 + let length = Option.value length ~default:(Bytes.length bytes - first) in 25 + { bytes; first; length } 26 + 27 + let to_bytes t = 28 + Bytes.sub t.bytes t.first t.length 29 + 30 + let is_empty t = 31 + t.length = 0 32 + 33 + let sub t ~first ~length = 34 + { bytes = t.bytes; first = t.first + first; length } 35 + end 36 + 37 + (** {1 Exceptions} *) 38 + 39 + exception End_of_stream 40 + (** Raised when attempting to read past the end of the stream. *) 41 + 42 + exception Invalid_state of string 43 + (** Raised when an operation requires a specific state (e.g., byte alignment). *) 44 + 45 + exception Corrupted_stream of string 46 + (** Raised when stream data is malformed (e.g., invalid padding marker). *) 47 + 10 48 (** {1 Forward Bitstream Reader} *) 11 49 12 50 module Forward_reader = struct 13 51 type t = { 14 52 src : bytes; 53 + start_pos : int; 15 54 limit : int; 16 55 mutable byte_pos : int; 17 - mutable current_bits : int; 18 - mutable bits_remaining : int; 56 + mutable bit_pos : int; (* 0-7, bits consumed in current byte *) 19 57 } 20 58 21 - let create src ~pos ~len = 22 - { src; limit = pos + len; byte_pos = pos; 23 - current_bits = 0; bits_remaining = 0 } 59 + let of_slice (slice : Slice.t) = 60 + { src = slice.bytes; 61 + start_pos = slice.first; 62 + limit = slice.first + slice.length; 63 + byte_pos = slice.first; 64 + bit_pos = 0 } 24 65 25 66 let of_bytes src = 26 - create src ~pos:0 ~len:(Bytes.length src) 67 + of_slice (Slice.of_bytes src) 27 68 28 - let byte_position t = 29 - t.byte_pos 69 + let create src ~pos ~len = 70 + of_slice (Slice.make src ~first:pos ~length:len) 30 71 31 - let remaining_bytes t = 32 - t.limit - t.byte_pos 72 + let[@inline] remaining t = 73 + (t.limit - t.byte_pos) * 8 - t.bit_pos 33 74 34 - let[@inline] read_byte t = 35 - if t.byte_pos >= t.limit then 0 36 - else begin 37 - let b = Bytes.get_uint8 t.src t.byte_pos in 38 - t.byte_pos <- t.byte_pos + 1; 39 - b 40 - end 75 + let[@inline] is_byte_aligned t = 76 + t.bit_pos = 0 41 77 42 78 let[@inline] read_bits t n = 43 79 if n <= 0 then 0 44 - else if n > 32 then invalid_arg "read_bits: n > 32" 80 + else if n > 57 then invalid_arg "read_bits: n > 57" 45 81 else begin 46 82 let result = ref 0 in 47 - let remaining = ref n in 48 - let shift = ref 0 in 83 + let bits_read = ref 0 in 84 + while !bits_read < n do 85 + if t.byte_pos >= t.limit then 86 + raise End_of_stream; 87 + let byte = Bytes.get_uint8 t.src t.byte_pos in 88 + let available = 8 - t.bit_pos in 89 + let to_read = min available (n - !bits_read) in 90 + let mask = (1 lsl to_read) - 1 in 91 + let bits = (byte lsr t.bit_pos) land mask in 92 + result := !result lor (bits lsl !bits_read); 93 + bits_read := !bits_read + to_read; 94 + t.bit_pos <- t.bit_pos + to_read; 95 + if t.bit_pos >= 8 then begin 96 + t.bit_pos <- 0; 97 + t.byte_pos <- t.byte_pos + 1 98 + end 99 + done; 100 + !result 101 + end 49 102 50 - while !remaining > 0 do 51 - if t.bits_remaining = 0 then begin 52 - t.current_bits <- read_byte t; 53 - t.bits_remaining <- 8 54 - end; 103 + let[@inline] read_byte t = 104 + if t.bit_pos <> 0 then 105 + raise (Invalid_state "read_byte: not byte aligned"); 106 + if t.byte_pos >= t.limit then 107 + raise End_of_stream; 108 + let b = Bytes.get_uint8 t.src t.byte_pos in 109 + t.byte_pos <- t.byte_pos + 1; 110 + b 55 111 56 - let take = min !remaining t.bits_remaining in 57 - let mask = (1 lsl take) - 1 in 58 - result := !result lor ((t.current_bits land mask) lsl !shift); 59 - t.current_bits <- t.current_bits lsr take; 60 - t.bits_remaining <- t.bits_remaining - take; 61 - remaining := !remaining - take; 62 - shift := !shift + take 63 - done; 112 + let rewind_bits t n = 113 + let total_bits = (t.byte_pos - t.start_pos) * 8 + t.bit_pos in 114 + let new_total = total_bits - n in 115 + if new_total < 0 then 116 + raise End_of_stream; 117 + t.byte_pos <- t.start_pos + new_total / 8; 118 + t.bit_pos <- new_total mod 8 64 119 65 - !result 120 + let align t = 121 + if t.bit_pos <> 0 then begin 122 + t.bit_pos <- 0; 123 + t.byte_pos <- t.byte_pos + 1 66 124 end 67 125 68 - let skip_bits t n = 69 - ignore (read_bits t n) 126 + let byte_position t = 127 + if t.bit_pos <> 0 then 128 + raise (Invalid_state "byte_position: not byte aligned"); 129 + t.byte_pos 130 + 131 + let get_slice t n : Slice.t = 132 + if t.bit_pos <> 0 then 133 + raise (Invalid_state "get_slice: not byte aligned"); 134 + if t.byte_pos + n > t.limit then 135 + raise End_of_stream; 136 + let result = Slice.make t.src ~first:t.byte_pos ~length:n in 137 + t.byte_pos <- t.byte_pos + n; 138 + result 70 139 71 140 let get_bytes t n = 72 - if n <= 0 then Bytes.empty 73 - else begin 74 - let result = Bytes.create n in 75 - Bytes.blit t.src t.byte_pos result 0 n; 76 - t.byte_pos <- t.byte_pos + n; 77 - result 78 - end 141 + Slice.to_bytes (get_slice t n) 142 + 143 + let to_slice t : Slice.t = 144 + if t.bit_pos <> 0 then 145 + raise (Invalid_state "to_slice: not byte aligned"); 146 + Slice.make t.src ~first:t.byte_pos ~length:(t.limit - t.byte_pos) 147 + 148 + let advance t n = 149 + if t.bit_pos <> 0 then 150 + raise (Invalid_state "advance: not byte aligned"); 151 + if t.byte_pos + n > t.limit then 152 + raise End_of_stream; 153 + t.byte_pos <- t.byte_pos + n 79 154 80 - let align_to_byte t = 81 - t.bits_remaining <- 0; 82 - t.current_bits <- 0 155 + let sub t n = 156 + if t.bit_pos <> 0 then 157 + raise (Invalid_state "sub: not byte aligned"); 158 + if t.byte_pos + n > t.limit then 159 + raise End_of_stream; 160 + let result = of_slice (Slice.make t.src ~first:t.byte_pos ~length:n) in 161 + t.byte_pos <- t.byte_pos + n; 162 + result 163 + 164 + let remaining_bytes t = 165 + if t.bit_pos <> 0 then 166 + raise (Invalid_state "remaining_bytes: not byte aligned"); 167 + t.limit - t.byte_pos 168 + 169 + let skip_bits t n = 170 + ignore (read_bits t n) 83 171 end 84 172 85 173 (** {1 Backward Bitstream Reader} ··· 90 178 module Backward_reader = struct 91 179 type t = { 92 180 src : bytes; 93 - start : int; 94 - mutable byte_pos : int; 95 - mutable bit_buf : int64; 96 - mutable bits_remaining : int; 181 + start_pos : int; 182 + mutable bit_offset : int; (* Bits remaining from end, decreasing *) 97 183 } 98 184 99 - let of_bytes src ~pos ~len = 100 - if len <= 0 then 101 - { src; start = pos; byte_pos = pos; bit_buf = 0L; bits_remaining = 0 } 102 - else begin 103 - let end_pos = pos + len - 1 in 104 - let byte_pos = ref end_pos in 105 - 106 - let init_bits = ref 0L in 107 - let num_bytes = min 8 len in 108 - for i = 0 to num_bytes - 1 do 109 - let b = Bytes.get_uint8 src (end_pos - i) in 110 - init_bits := Int64.logor !init_bits (Int64.shift_left (Int64.of_int b) (i * 8)) 111 - done; 112 - byte_pos := end_pos - num_bytes; 185 + let of_slice (slice : Slice.t) = 186 + if slice.length = 0 then 187 + raise End_of_stream; 188 + let last_byte_pos = slice.first + slice.length - 1 in 189 + let last_byte = Bytes.get_uint8 slice.bytes last_byte_pos in 190 + if last_byte = 0 then 191 + raise (Corrupted_stream "invalid padding marker"); 192 + (* Find the highest set bit - this is the padding marker *) 193 + let rec find_marker byte bit = 194 + if bit < 0 then 0 195 + else if (byte land (1 lsl bit)) <> 0 then bit 196 + else find_marker byte (bit - 1) 197 + in 198 + let padding = 8 - find_marker last_byte 7 in 199 + let bit_offset = slice.length * 8 - padding in 200 + { src = slice.bytes; start_pos = slice.first; bit_offset } 113 201 114 - let total_bits = num_bytes * 8 in 115 - let rec find_marker bits count = 116 - if count <= 0 then (bits, count) 117 - else if Int64.logand bits (Int64.shift_left 1L (count - 1)) <> 0L then 118 - (bits, count - 1) 119 - else 120 - find_marker bits (count - 1) 121 - in 122 - let (bits, remaining) = find_marker !init_bits total_bits in 202 + let of_bytes src ~pos ~len = 203 + of_slice (Slice.make src ~first:pos ~length:len) 123 204 124 - { src; start = pos; byte_pos = !byte_pos; bit_buf = bits; bits_remaining = remaining } 125 - end 205 + let[@inline] remaining t = t.bit_offset 126 206 127 - let[@inline] refill t = 128 - while t.bits_remaining <= 56 && t.byte_pos >= t.start do 129 - let b = Bytes.get_uint8 t.src t.byte_pos in 130 - t.bit_buf <- Int64.logor t.bit_buf 131 - (Int64.shift_left (Int64.of_int b) t.bits_remaining); 132 - t.bits_remaining <- t.bits_remaining + 8; 133 - t.byte_pos <- t.byte_pos - 1 134 - done 207 + let[@inline] is_empty t = t.bit_offset <= 0 135 208 136 209 let[@inline] read_bits t n = 137 210 if n <= 0 then 0 138 - else if n > 32 then invalid_arg "read_bits: n > 32" 211 + else if n > 57 then invalid_arg "read_bits: n > 57" 139 212 else begin 140 - if t.bits_remaining < n then refill t; 141 - let mask = Int64.sub (Int64.shift_left 1L n) 1L in 142 - let result = Int64.to_int (Int64.logand t.bit_buf mask) in 143 - t.bit_buf <- Int64.shift_right_logical t.bit_buf n; 144 - t.bits_remaining <- t.bits_remaining - n; 145 - result 146 - end 147 - 148 - let[@inline] peek_bits t n = 149 - if n <= 0 then 0 150 - else if n > 32 then invalid_arg "peek_bits: n > 32" 151 - else begin 152 - if t.bits_remaining < n then refill t; 153 - let mask = Int64.sub (Int64.shift_left 1L n) 1L in 154 - Int64.to_int (Int64.logand t.bit_buf mask) 213 + t.bit_offset <- t.bit_offset - n; 214 + let actual_offset = max 0 t.bit_offset in 215 + let actual_bits = if t.bit_offset < 0 then n + t.bit_offset else n in 216 + if actual_bits <= 0 then 0 217 + else begin 218 + let byte_offset = t.start_pos + (actual_offset / 8) in 219 + let bit_offset = actual_offset mod 8 in 220 + let result = ref 0 in 221 + let bits_read = ref 0 in 222 + let current_byte = ref byte_offset in 223 + let current_bit = ref bit_offset in 224 + while !bits_read < actual_bits do 225 + let byte = Bytes.get_uint8 t.src !current_byte in 226 + let available = 8 - !current_bit in 227 + let to_read = min available (actual_bits - !bits_read) in 228 + let mask = (1 lsl to_read) - 1 in 229 + let bits = (byte lsr !current_bit) land mask in 230 + result := !result lor (bits lsl !bits_read); 231 + bits_read := !bits_read + to_read; 232 + current_bit := !current_bit + to_read; 233 + if !current_bit >= 8 then begin 234 + current_bit := 0; 235 + incr current_byte 236 + end 237 + done; 238 + (* If we read past the beginning, shift the result *) 239 + if t.bit_offset < 0 then 240 + !result lsl (-t.bit_offset) 241 + else 242 + !result 243 + end 155 244 end 156 245 157 - let remaining t = 158 - if t.bits_remaining > 0 then t.bits_remaining 159 - else (t.byte_pos - t.start + 1) * 8 246 + let peek_bits t n = 247 + let saved_offset = t.bit_offset in 248 + let result = read_bits t n in 249 + t.bit_offset <- saved_offset; 250 + result 160 251 end 161 252 162 253 (** {1 Forward Bitstream Writer} *) ··· 164 255 module Forward_writer = struct 165 256 type t = { 166 257 dst : bytes; 258 + start_pos : int; 167 259 mutable byte_pos : int; 168 - mutable bit_pos : int; 260 + mutable bit_pos : int; (* 0-7, bits written in current byte *) 169 261 mutable current_byte : int; 170 262 } 171 263 172 - let create dst ~pos = 173 - { dst; byte_pos = pos; bit_pos = 0; current_byte = 0 } 264 + let of_slice (slice : Slice.t) = 265 + { dst = slice.bytes; 266 + start_pos = slice.first; 267 + byte_pos = slice.first; 268 + bit_pos = 0; 269 + current_byte = 0 } 174 270 175 271 let of_bytes dst = 176 - create dst ~pos:0 272 + of_slice (Slice.of_bytes dst) 273 + 274 + let create dst ~pos = 275 + of_slice (Slice.make dst ~first:pos ~length:(Bytes.length dst - pos)) 177 276 178 277 let flush t = 179 278 if t.bit_pos > 0 then begin ··· 185 284 186 285 let[@inline] write_bits t value n = 187 286 if n <= 0 then () 188 - else if n > 32 then invalid_arg "write_bits: n > 32" 287 + else if n > 57 then invalid_arg "write_bits: n > 57" 189 288 else begin 190 289 let value = ref value in 191 290 let remaining = ref n in ··· 213 312 Bytes.set_uint8 t.dst t.byte_pos value; 214 313 t.byte_pos <- t.byte_pos + 1 215 314 216 - let write_bytes t src = 315 + let write_slice t (slice : Slice.t) = 217 316 if t.bit_pos <> 0 then flush t; 218 - let len = Bytes.length src in 219 - Bytes.blit src 0 t.dst t.byte_pos len; 220 - t.byte_pos <- t.byte_pos + len 317 + Bytes.blit slice.bytes slice.first t.dst t.byte_pos slice.length; 318 + t.byte_pos <- t.byte_pos + slice.length 319 + 320 + let write_bytes t src = 321 + write_slice t (Slice.of_bytes src) 221 322 222 323 let byte_position t = 223 324 if t.bit_pos > 0 then t.byte_pos + 1 else t.byte_pos 224 325 225 326 let finalize t = 226 327 flush t; 227 - t.byte_pos 328 + t.byte_pos - t.start_pos 329 + 330 + let to_slice t : Slice.t = 331 + flush t; 332 + Slice.make t.dst ~first:t.start_pos ~length:(t.byte_pos - t.start_pos) 228 333 end 229 334 230 335 (** {1 Backward Bitstream Writer} ··· 256 361 t.num_bits <- t.num_bits - 8 257 362 done 258 363 259 - let finalize t = 364 + let finalize_to_slice t : Slice.t = 260 365 write_bits t 1 1; 261 366 if t.num_bits mod 8 <> 0 then 262 367 t.num_bits <- ((t.num_bits + 7) / 8) * 8; 263 368 flush_bytes t; 264 369 let len = Bytes.length t.buffer - t.buf_pos in 265 - Bytes.sub t.buffer t.buf_pos len 370 + (* Reverse bytes in place so marker ends up at the end *) 371 + for i = 0 to len / 2 - 1 do 372 + let j = t.buf_pos + i in 373 + let k = t.buf_pos + len - 1 - i in 374 + let tmp = Bytes.get t.buffer j in 375 + Bytes.set t.buffer j (Bytes.get t.buffer k); 376 + Bytes.set t.buffer k tmp 377 + done; 378 + Slice.make t.buffer ~first:t.buf_pos ~length:len 379 + 380 + let finalize t = 381 + Slice.to_bytes (finalize_to_slice t) 266 382 267 383 let current_size t = 268 384 Bytes.length t.buffer - t.buf_pos + (t.num_bits + 7) / 8
+151 -45
vendor/opam/ocaml-bitstream/src/bitstream.mli
··· 8 8 {1 Overview} 9 9 10 10 {[ 11 - (* Forward reading - parse a binary format *) 12 - let r = Bitstream.Forward_reader.of_bytes data in 11 + (* Forward reading from a slice (zero-copy) *) 12 + let slice = { Bitstream.Slice.bytes = data; first = 0; length = n } in 13 + let r = Bitstream.Forward_reader.of_slice slice in 13 14 let magic = Bitstream.Forward_reader.read_bits r 32 in 14 15 let flags = Bitstream.Forward_reader.read_bits r 8 in 15 16 16 - (* Forward writing - generate binary output *) 17 - let w = Bitstream.Forward_writer.of_bytes buffer in 18 - Bitstream.Forward_writer.write_bits w magic 32; 19 - Bitstream.Forward_writer.write_bits w flags 8; 20 - let len = Bitstream.Forward_writer.finalize w 17 + (* Get remaining data as a slice (zero-copy) *) 18 + let remaining = Bitstream.Forward_reader.to_slice r in 21 19 22 20 (* Backward reading - for FSE/ANS entropy decoding *) 23 - let r = Bitstream.Backward_reader.of_bytes data ~pos:0 ~len in 21 + let r = Bitstream.Backward_reader.of_slice slice in 24 22 let symbol = Bitstream.Backward_reader.read_bits r num_bits 25 23 ]} 26 24 27 - {1 Forward vs Backward Streams} 25 + {1 Bytesrw Compatibility} 26 + 27 + The {!Slice} type is structurally compatible with [Bytesrw.Bytes.Slice.t], 28 + enabling zero-copy integration with bytesrw-based streaming. All reader 29 + and writer constructors accept slices as the primary input type. 30 + 31 + {1 Error Handling} 32 + 33 + Operations raise exceptions on error: 34 + - {!End_of_stream}: Reading past end of stream 35 + - {!Invalid_state}: Operation requires specific state (e.g., byte alignment) 36 + - {!Corrupted_stream}: Malformed stream data *) 28 37 29 - - {b Forward streams} read/write from the start of a buffer towards the end. 30 - This is the standard approach for most binary formats. 38 + (** {1 Slice Type} 31 39 32 - - {b Backward streams} read/write from the end of a buffer towards the start. 33 - This is required by some compression algorithms (FSE, ANS, Huffman in zstd) 34 - where encoding happens forwards but decoding must happen backwards. *) 40 + A slice is a view into a byte buffer. This type is structurally compatible 41 + with [Bytesrw.Bytes.Slice.t], enabling zero-copy interop. *) 42 + 43 + module Slice : sig 44 + type t = { 45 + bytes : bytes; 46 + first : int; 47 + length : int; 48 + } 49 + (** A slice referencing [length] bytes starting at [first] in [bytes]. 50 + This is structurally identical to [Bytesrw.Bytes.Slice.t]. *) 51 + 52 + val make : bytes -> first:int -> length:int -> t 53 + (** [make bytes ~first ~length] creates a slice. *) 54 + 55 + val of_bytes : ?first:int -> ?length:int -> bytes -> t 56 + (** [of_bytes bytes] creates a slice for the entire buffer. 57 + Optional [first] and [length] can restrict the range. *) 58 + 59 + val to_bytes : t -> bytes 60 + (** [to_bytes t] copies the slice contents to a new buffer. *) 61 + 62 + val is_empty : t -> bool 63 + (** [is_empty t] returns true if the slice has zero length. *) 64 + 65 + val sub : t -> first:int -> length:int -> t 66 + (** [sub t ~first ~length] creates a sub-slice. [first] is relative to [t]. *) 67 + end 68 + 69 + (** {1 Exceptions} *) 70 + 71 + exception End_of_stream 72 + (** Raised when attempting to read past the end of the stream. *) 73 + 74 + exception Invalid_state of string 75 + (** Raised when an operation requires a specific state (e.g., byte alignment). *) 76 + 77 + exception Corrupted_stream of string 78 + (** Raised when stream data is malformed (e.g., invalid padding marker). *) 35 79 36 80 (** {1 Forward Bitstream Reader} *) 37 81 ··· 39 83 (** Forward bitstream reader state. *) 40 84 type t 41 85 42 - val create : bytes -> pos:int -> len:int -> t 43 - (** [create src ~pos ~len] creates a reader for [len] bytes starting at [pos]. *) 86 + val of_slice : Slice.t -> t 87 + (** [of_slice slice] creates a reader from a slice. Zero-copy. *) 44 88 45 89 val of_bytes : bytes -> t 46 90 (** [of_bytes src] creates a reader for the entire byte buffer. *) 47 91 48 - val byte_position : t -> int 49 - (** [byte_position t] returns the current byte position in the source. *) 92 + val create : bytes -> pos:int -> len:int -> t 93 + (** [create src ~pos ~len] creates a reader for [len] bytes starting at [pos]. *) 50 94 51 - val remaining_bytes : t -> int 52 - (** [remaining_bytes t] returns the number of unread bytes. *) 95 + val remaining : t -> int 96 + (** [remaining t] returns the number of unread bits. *) 97 + 98 + val is_byte_aligned : t -> bool 99 + (** [is_byte_aligned t] returns true if the reader is at a byte boundary. *) 100 + 101 + val read_bits : t -> int -> int 102 + (** [read_bits t n] reads and returns [n] bits (1-57) in little-endian order. 103 + @raise End_of_stream if not enough data available. 104 + @raise Invalid_argument if [n > 57]. *) 53 105 54 106 val read_byte : t -> int 55 107 (** [read_byte t] reads and returns the next byte (0-255). 56 - Returns 0 if at end of stream. *) 108 + @raise Invalid_state if not byte aligned. 109 + @raise End_of_stream if at end of stream. *) 57 110 58 - val read_bits : t -> int -> int 59 - (** [read_bits t n] reads and returns [n] bits (1-32) in little-endian order. 60 - @raise Invalid_argument if [n > 32]. *) 111 + val rewind_bits : t -> int -> unit 112 + (** [rewind_bits t n] rewinds the stream by [n] bits. 113 + @raise End_of_stream if rewinding past the start. *) 61 114 62 - val skip_bits : t -> int -> unit 63 - (** [skip_bits t n] skips [n] bits without returning them. *) 115 + val align : t -> unit 116 + (** [align t] advances to the next byte boundary if not already aligned. *) 117 + 118 + val byte_position : t -> int 119 + (** [byte_position t] returns the current byte position. 120 + @raise Invalid_state if not byte aligned. *) 121 + 122 + val get_slice : t -> int -> Slice.t 123 + (** [get_slice t n] returns the next [n] bytes as a slice (zero-copy). 124 + The slice references the underlying buffer directly. 125 + @raise Invalid_state if not byte aligned. 126 + @raise End_of_stream if not enough data. *) 64 127 65 128 val get_bytes : t -> int -> bytes 66 129 (** [get_bytes t n] reads and returns the next [n] bytes as a new buffer. 67 - The reader must be byte-aligned. *) 130 + Equivalent to [Slice.to_bytes (get_slice t n)]. 131 + @raise Invalid_state if not byte aligned. 132 + @raise End_of_stream if not enough data. *) 133 + 134 + val to_slice : t -> Slice.t 135 + (** [to_slice t] returns the remaining data as a slice (zero-copy). 136 + @raise Invalid_state if not byte aligned. *) 137 + 138 + val advance : t -> int -> unit 139 + (** [advance t n] skips [n] bytes without returning them. 140 + @raise Invalid_state if not byte aligned. 141 + @raise End_of_stream if not enough data. *) 68 142 69 - val align_to_byte : t -> unit 70 - (** [align_to_byte t] discards any remaining bits in the current byte, 71 - aligning the reader to the next byte boundary. *) 143 + val sub : t -> int -> t 144 + (** [sub t n] creates a sub-reader for the next [n] bytes and advances [t]. 145 + @raise Invalid_state if not byte aligned. 146 + @raise End_of_stream if not enough data. *) 147 + 148 + val remaining_bytes : t -> int 149 + (** [remaining_bytes t] returns the number of unread bytes. 150 + @raise Invalid_state if not byte aligned. *) 151 + 152 + val skip_bits : t -> int -> unit 153 + (** [skip_bits t n] skips [n] bits without returning them. 154 + @raise End_of_stream if not enough data. *) 72 155 end 73 156 74 157 (** {1 Backward Bitstream Reader} ··· 77 160 includes a padding marker: the highest 1-bit in the final byte indicates 78 161 where actual data begins. 79 162 80 - This format is used by FSE and ANS entropy coders in zstd. *) 163 + This format is used by FSE and ANS entropy coders. *) 81 164 82 165 module Backward_reader : sig 83 166 (** Backward bitstream reader state. *) 84 167 type t 85 168 169 + val of_slice : Slice.t -> t 170 + (** [of_slice slice] creates a backward reader from a slice. Zero-copy. 171 + @raise End_of_stream if slice is empty. 172 + @raise Corrupted_stream if padding marker is invalid. *) 173 + 86 174 val of_bytes : bytes -> pos:int -> len:int -> t 87 175 (** [of_bytes src ~pos ~len] creates a backward reader. 88 176 The stream is read from position [pos + len - 1] towards [pos]. 89 - Automatically handles the padding marker. *) 177 + @raise End_of_stream if [len = 0]. 178 + @raise Corrupted_stream if padding marker is invalid. *) 179 + 180 + val remaining : t -> int 181 + (** [remaining t] returns the number of bits remaining. *) 182 + 183 + val is_empty : t -> bool 184 + (** [is_empty t] returns true if no more bits are available. *) 90 185 91 186 val read_bits : t -> int -> int 92 - (** [read_bits t n] reads and returns [n] bits (1-32). 93 - @raise Invalid_argument if [n > 32]. *) 187 + (** [read_bits t n] reads and returns [n] bits (1-57). 188 + Returns 0 bits when reading past the beginning. 189 + @raise Invalid_argument if [n > 57]. *) 94 190 95 191 val peek_bits : t -> int -> int 96 192 (** [peek_bits t n] returns the next [n] bits without consuming them. 97 - @raise Invalid_argument if [n > 32]. *) 98 - 99 - val remaining : t -> int 100 - (** [remaining t] returns an estimate of remaining bits. *) 193 + @raise Invalid_argument if [n > 57]. *) 101 194 end 102 195 103 196 (** {1 Forward Bitstream Writer} *) ··· 106 199 (** Forward bitstream writer state. *) 107 200 type t 108 201 109 - val create : bytes -> pos:int -> t 110 - (** [create dst ~pos] creates a writer starting at [pos] in buffer [dst]. *) 202 + val of_slice : Slice.t -> t 203 + (** [of_slice slice] creates a writer into a slice. Zero-copy. *) 111 204 112 205 val of_bytes : bytes -> t 113 206 (** [of_bytes dst] creates a writer starting at position 0. *) 114 207 208 + val create : bytes -> pos:int -> t 209 + (** [create dst ~pos] creates a writer starting at [pos] in buffer [dst]. *) 210 + 115 211 val write_bits : t -> int -> int -> unit 116 - (** [write_bits t value n] writes the lower [n] bits (1-32) of [value] 212 + (** [write_bits t value n] writes the lower [n] bits (1-57) of [value] 117 213 in little-endian order. 118 - @raise Invalid_argument if [n > 32]. *) 214 + @raise Invalid_argument if [n > 57]. *) 119 215 120 216 val write_byte : t -> int -> unit 121 217 (** [write_byte t value] writes a single byte. Flushes any partial bits first. *) 218 + 219 + val write_slice : t -> Slice.t -> unit 220 + (** [write_slice t slice] writes bytes from a slice. Flushes any partial bits first. *) 122 221 123 222 val write_bytes : t -> bytes -> unit 124 223 (** [write_bytes t src] writes all bytes from [src]. Flushes any partial bits first. *) ··· 131 230 132 231 val finalize : t -> int 133 232 (** [finalize t] flushes and returns the total number of bytes written. *) 233 + 234 + val to_slice : t -> Slice.t 235 + (** [to_slice t] flushes and returns the written data as a slice (zero-copy). 236 + The slice references the underlying destination buffer. *) 134 237 end 135 238 136 239 (** {1 Backward Bitstream Writer} 137 240 138 241 Accumulates bits to produce output that will be read backwards. 139 - Used for FSE and Huffman encoding in zstd. *) 242 + Used for FSE and Huffman encoding. *) 140 243 141 244 module Backward_writer : sig 142 245 (** Backward bitstream writer state. *) ··· 151 254 val flush_bytes : t -> unit 152 255 (** [flush_bytes t] flushes complete bytes to the internal buffer. *) 153 256 257 + val finalize_to_slice : t -> Slice.t 258 + (** [finalize_to_slice t] adds the padding marker, flushes, and returns output 259 + as a slice (zero-copy). The slice references the internal buffer. *) 260 + 154 261 val finalize : t -> bytes 155 262 (** [finalize t] adds the padding marker, flushes, and returns the output. 156 - The returned bytes should be appended to the output and will be read 157 - backwards during decoding. *) 263 + Equivalent to [Slice.to_bytes (finalize_to_slice t)]. *) 158 264 159 265 val current_size : t -> int 160 266 (** [current_size t] returns the current output size estimate. *)
+1
zstd.opam
··· 6 6 depends: [ 7 7 "dune" {>= "3.20"} 8 8 "ocaml" {>= "5.1"} 9 + "bitstream" 9 10 "odoc" {with-doc} 10 11 ] 11 12 build: [