···11+(lang dune 3.20)
22+(name bitstream)
33+44+(package
55+ (name bitstream)
66+ (synopsis "Bit-level I/O for binary format parsing and generation")
77+ (description "Forward and backward bitstream reading/writing for binary formats. Supports bit-level operations required by compression algorithms like FSE, ANS, and Huffman coding.")
88+ (depends
99+ (ocaml (>= 4.14))
1010+ (alcotest :with-test)))
+269
vendor/opam/ocaml-bitstream/src/bitstream.ml
···11+(** Bitstream - Bit-level I/O for binary formats.
22+33+ Provides forward and backward bitstream reading and writing for parsing
44+ and generating binary formats that operate at the bit level.
55+66+ Forward streams read/write from the start of a buffer towards the end.
77+ Backward streams read/write from the end of a buffer towards the start,
88+ which is required by some compression algorithms (FSE, ANS). *)
99+1010+(** {1 Forward Bitstream Reader} *)
1111+1212+module Forward_reader = struct
1313+ type t = {
1414+ src : bytes;
1515+ limit : int;
1616+ mutable byte_pos : int;
1717+ mutable current_bits : int;
1818+ mutable bits_remaining : int;
1919+ }
2020+2121+ let create src ~pos ~len =
2222+ { src; limit = pos + len; byte_pos = pos;
2323+ current_bits = 0; bits_remaining = 0 }
2424+2525+ let of_bytes src =
2626+ create src ~pos:0 ~len:(Bytes.length src)
2727+2828+ let byte_position t =
2929+ t.byte_pos
3030+3131+ let remaining_bytes t =
3232+ t.limit - t.byte_pos
3333+3434+ let[@inline] read_byte t =
3535+ if t.byte_pos >= t.limit then 0
3636+ else begin
3737+ let b = Bytes.get_uint8 t.src t.byte_pos in
3838+ t.byte_pos <- t.byte_pos + 1;
3939+ b
4040+ end
4141+4242+ let[@inline] read_bits t n =
4343+ if n <= 0 then 0
4444+ else if n > 32 then invalid_arg "read_bits: n > 32"
4545+ else begin
4646+ let result = ref 0 in
4747+ let remaining = ref n in
4848+ let shift = ref 0 in
4949+5050+ while !remaining > 0 do
5151+ if t.bits_remaining = 0 then begin
5252+ t.current_bits <- read_byte t;
5353+ t.bits_remaining <- 8
5454+ end;
5555+5656+ let take = min !remaining t.bits_remaining in
5757+ let mask = (1 lsl take) - 1 in
5858+ result := !result lor ((t.current_bits land mask) lsl !shift);
5959+ t.current_bits <- t.current_bits lsr take;
6060+ t.bits_remaining <- t.bits_remaining - take;
6161+ remaining := !remaining - take;
6262+ shift := !shift + take
6363+ done;
6464+6565+ !result
6666+ end
6767+6868+ let skip_bits t n =
6969+ ignore (read_bits t n)
7070+7171+ let get_bytes t n =
7272+ if n <= 0 then Bytes.empty
7373+ else begin
7474+ let result = Bytes.create n in
7575+ Bytes.blit t.src t.byte_pos result 0 n;
7676+ t.byte_pos <- t.byte_pos + n;
7777+ result
7878+ end
7979+8080+ let align_to_byte t =
8181+ t.bits_remaining <- 0;
8282+ t.current_bits <- 0
8383+end
8484+8585+(** {1 Backward Bitstream Reader}
8686+8787+ Reads bits from the end of a buffer towards the start. The stream
8888+ starts with a padding marker (highest 1-bit indicates start of data). *)
8989+9090+module Backward_reader = struct
9191+ type t = {
9292+ src : bytes;
9393+ start : int;
9494+ mutable byte_pos : int;
9595+ mutable bit_buf : int64;
9696+ mutable bits_remaining : int;
9797+ }
9898+9999+ let of_bytes src ~pos ~len =
100100+ if len <= 0 then
101101+ { src; start = pos; byte_pos = pos; bit_buf = 0L; bits_remaining = 0 }
102102+ else begin
103103+ let end_pos = pos + len - 1 in
104104+ let byte_pos = ref end_pos in
105105+106106+ let init_bits = ref 0L in
107107+ let num_bytes = min 8 len in
108108+ for i = 0 to num_bytes - 1 do
109109+ let b = Bytes.get_uint8 src (end_pos - i) in
110110+ init_bits := Int64.logor !init_bits (Int64.shift_left (Int64.of_int b) (i * 8))
111111+ done;
112112+ byte_pos := end_pos - num_bytes;
113113+114114+ let total_bits = num_bytes * 8 in
115115+ let rec find_marker bits count =
116116+ if count <= 0 then (bits, count)
117117+ else if Int64.logand bits (Int64.shift_left 1L (count - 1)) <> 0L then
118118+ (bits, count - 1)
119119+ else
120120+ find_marker bits (count - 1)
121121+ in
122122+ let (bits, remaining) = find_marker !init_bits total_bits in
123123+124124+ { src; start = pos; byte_pos = !byte_pos; bit_buf = bits; bits_remaining = remaining }
125125+ end
126126+127127+ let[@inline] refill t =
128128+ while t.bits_remaining <= 56 && t.byte_pos >= t.start do
129129+ let b = Bytes.get_uint8 t.src t.byte_pos in
130130+ t.bit_buf <- Int64.logor t.bit_buf
131131+ (Int64.shift_left (Int64.of_int b) t.bits_remaining);
132132+ t.bits_remaining <- t.bits_remaining + 8;
133133+ t.byte_pos <- t.byte_pos - 1
134134+ done
135135+136136+ let[@inline] read_bits t n =
137137+ if n <= 0 then 0
138138+ else if n > 32 then invalid_arg "read_bits: n > 32"
139139+ else begin
140140+ if t.bits_remaining < n then refill t;
141141+ let mask = Int64.sub (Int64.shift_left 1L n) 1L in
142142+ let result = Int64.to_int (Int64.logand t.bit_buf mask) in
143143+ t.bit_buf <- Int64.shift_right_logical t.bit_buf n;
144144+ t.bits_remaining <- t.bits_remaining - n;
145145+ result
146146+ end
147147+148148+ let[@inline] peek_bits t n =
149149+ if n <= 0 then 0
150150+ else if n > 32 then invalid_arg "peek_bits: n > 32"
151151+ else begin
152152+ if t.bits_remaining < n then refill t;
153153+ let mask = Int64.sub (Int64.shift_left 1L n) 1L in
154154+ Int64.to_int (Int64.logand t.bit_buf mask)
155155+ end
156156+157157+ let remaining t =
158158+ if t.bits_remaining > 0 then t.bits_remaining
159159+ else (t.byte_pos - t.start + 1) * 8
160160+end
161161+162162+(** {1 Forward Bitstream Writer} *)
163163+164164+module Forward_writer = struct
165165+ type t = {
166166+ dst : bytes;
167167+ mutable byte_pos : int;
168168+ mutable bit_pos : int;
169169+ mutable current_byte : int;
170170+ }
171171+172172+ let create dst ~pos =
173173+ { dst; byte_pos = pos; bit_pos = 0; current_byte = 0 }
174174+175175+ let of_bytes dst =
176176+ create dst ~pos:0
177177+178178+ let flush t =
179179+ if t.bit_pos > 0 then begin
180180+ Bytes.set_uint8 t.dst t.byte_pos t.current_byte;
181181+ t.byte_pos <- t.byte_pos + 1;
182182+ t.bit_pos <- 0;
183183+ t.current_byte <- 0
184184+ end
185185+186186+ let[@inline] write_bits t value n =
187187+ if n <= 0 then ()
188188+ else if n > 32 then invalid_arg "write_bits: n > 32"
189189+ else begin
190190+ let value = ref value in
191191+ let remaining = ref n in
192192+193193+ while !remaining > 0 do
194194+ let available = 8 - t.bit_pos in
195195+ let to_write = min available !remaining in
196196+ let mask = (1 lsl to_write) - 1 in
197197+ t.current_byte <- t.current_byte lor ((!value land mask) lsl t.bit_pos);
198198+ value := !value lsr to_write;
199199+ remaining := !remaining - to_write;
200200+ t.bit_pos <- t.bit_pos + to_write;
201201+202202+ if t.bit_pos = 8 then begin
203203+ Bytes.set_uint8 t.dst t.byte_pos t.current_byte;
204204+ t.byte_pos <- t.byte_pos + 1;
205205+ t.bit_pos <- 0;
206206+ t.current_byte <- 0
207207+ end
208208+ done
209209+ end
210210+211211+ let write_byte t value =
212212+ if t.bit_pos <> 0 then flush t;
213213+ Bytes.set_uint8 t.dst t.byte_pos value;
214214+ t.byte_pos <- t.byte_pos + 1
215215+216216+ let write_bytes t src =
217217+ if t.bit_pos <> 0 then flush t;
218218+ let len = Bytes.length src in
219219+ Bytes.blit src 0 t.dst t.byte_pos len;
220220+ t.byte_pos <- t.byte_pos + len
221221+222222+ let byte_position t =
223223+ if t.bit_pos > 0 then t.byte_pos + 1 else t.byte_pos
224224+225225+ let finalize t =
226226+ flush t;
227227+ t.byte_pos
228228+end
229229+230230+(** {1 Backward Bitstream Writer}
231231+232232+ Accumulates bits to be read backwards. Used for FSE and Huffman encoding. *)
233233+234234+module Backward_writer = struct
235235+ type t = {
236236+ mutable bits : int64;
237237+ mutable num_bits : int;
238238+ buffer : bytes;
239239+ mutable buf_pos : int;
240240+ }
241241+242242+ let create size =
243243+ { bits = 0L; num_bits = 0; buffer = Bytes.create size; buf_pos = size }
244244+245245+ let[@inline] write_bits t value n =
246246+ if n > 0 then begin
247247+ t.bits <- Int64.logor t.bits (Int64.shift_left (Int64.of_int value) t.num_bits);
248248+ t.num_bits <- t.num_bits + n
249249+ end
250250+251251+ let flush_bytes t =
252252+ while t.num_bits >= 8 do
253253+ t.buf_pos <- t.buf_pos - 1;
254254+ Bytes.set_uint8 t.buffer t.buf_pos (Int64.to_int (Int64.logand t.bits 0xFFL));
255255+ t.bits <- Int64.shift_right_logical t.bits 8;
256256+ t.num_bits <- t.num_bits - 8
257257+ done
258258+259259+ let finalize t =
260260+ write_bits t 1 1;
261261+ if t.num_bits mod 8 <> 0 then
262262+ t.num_bits <- ((t.num_bits + 7) / 8) * 8;
263263+ flush_bytes t;
264264+ let len = Bytes.length t.buffer - t.buf_pos in
265265+ Bytes.sub t.buffer t.buf_pos len
266266+267267+ let current_size t =
268268+ Bytes.length t.buffer - t.buf_pos + (t.num_bits + 7) / 8
269269+end
+161
vendor/opam/ocaml-bitstream/src/bitstream.mli
···11+(** Bitstream - Bit-level I/O for binary formats.
22+33+ This library provides efficient bit-level reading and writing for parsing
44+ and generating binary formats. It supports both forward (start-to-end) and
55+ backward (end-to-start) operations, as required by various compression
66+ algorithms.
77+88+ {1 Overview}
99+1010+ {[
1111+ (* Forward reading - parse a binary format *)
1212+ let r = Bitstream.Forward_reader.of_bytes data in
1313+ let magic = Bitstream.Forward_reader.read_bits r 32 in
1414+ let flags = Bitstream.Forward_reader.read_bits r 8 in
1515+1616+ (* Forward writing - generate binary output *)
1717+ let w = Bitstream.Forward_writer.of_bytes buffer in
1818+ Bitstream.Forward_writer.write_bits w magic 32;
1919+ Bitstream.Forward_writer.write_bits w flags 8;
2020+ let len = Bitstream.Forward_writer.finalize w
2121+2222+ (* Backward reading - for FSE/ANS entropy decoding *)
2323+ let r = Bitstream.Backward_reader.of_bytes data ~pos:0 ~len in
2424+ let symbol = Bitstream.Backward_reader.read_bits r num_bits
2525+ ]}
2626+2727+ {1 Forward vs Backward Streams}
2828+2929+ - {b Forward streams} read/write from the start of a buffer towards the end.
3030+ This is the standard approach for most binary formats.
3131+3232+ - {b Backward streams} read/write from the end of a buffer towards the start.
3333+ This is required by some compression algorithms (FSE, ANS, Huffman in zstd)
3434+ where encoding happens forwards but decoding must happen backwards. *)
3535+3636+(** {1 Forward Bitstream Reader} *)
3737+3838+module Forward_reader : sig
3939+ (** Forward bitstream reader state. *)
4040+ type t
4141+4242+ val create : bytes -> pos:int -> len:int -> t
4343+ (** [create src ~pos ~len] creates a reader for [len] bytes starting at [pos]. *)
4444+4545+ val of_bytes : bytes -> t
4646+ (** [of_bytes src] creates a reader for the entire byte buffer. *)
4747+4848+ val byte_position : t -> int
4949+ (** [byte_position t] returns the current byte position in the source. *)
5050+5151+ val remaining_bytes : t -> int
5252+ (** [remaining_bytes t] returns the number of unread bytes. *)
5353+5454+ val read_byte : t -> int
5555+ (** [read_byte t] reads and returns the next byte (0-255).
5656+ Returns 0 if at end of stream. *)
5757+5858+ val read_bits : t -> int -> int
5959+ (** [read_bits t n] reads and returns [n] bits (1-32) in little-endian order.
6060+ @raise Invalid_argument if [n > 32]. *)
6161+6262+ val skip_bits : t -> int -> unit
6363+ (** [skip_bits t n] skips [n] bits without returning them. *)
6464+6565+ val get_bytes : t -> int -> bytes
6666+ (** [get_bytes t n] reads and returns the next [n] bytes as a new buffer.
6767+ The reader must be byte-aligned. *)
6868+6969+ val align_to_byte : t -> unit
7070+ (** [align_to_byte t] discards any remaining bits in the current byte,
7171+ aligning the reader to the next byte boundary. *)
7272+end
7373+7474+(** {1 Backward Bitstream Reader}
7575+7676+ Reads bits from the end of a buffer towards the start. The stream format
7777+ includes a padding marker: the highest 1-bit in the final byte indicates
7878+ where actual data begins.
7979+8080+ This format is used by FSE and ANS entropy coders in zstd. *)
8181+8282+module Backward_reader : sig
8383+ (** Backward bitstream reader state. *)
8484+ type t
8585+8686+ val of_bytes : bytes -> pos:int -> len:int -> t
8787+ (** [of_bytes src ~pos ~len] creates a backward reader.
8888+ The stream is read from position [pos + len - 1] towards [pos].
8989+ Automatically handles the padding marker. *)
9090+9191+ val read_bits : t -> int -> int
9292+ (** [read_bits t n] reads and returns [n] bits (1-32).
9393+ @raise Invalid_argument if [n > 32]. *)
9494+9595+ val peek_bits : t -> int -> int
9696+ (** [peek_bits t n] returns the next [n] bits without consuming them.
9797+ @raise Invalid_argument if [n > 32]. *)
9898+9999+ val remaining : t -> int
100100+ (** [remaining t] returns an estimate of remaining bits. *)
101101+end
102102+103103+(** {1 Forward Bitstream Writer} *)
104104+105105+module Forward_writer : sig
106106+ (** Forward bitstream writer state. *)
107107+ type t
108108+109109+ val create : bytes -> pos:int -> t
110110+ (** [create dst ~pos] creates a writer starting at [pos] in buffer [dst]. *)
111111+112112+ val of_bytes : bytes -> t
113113+ (** [of_bytes dst] creates a writer starting at position 0. *)
114114+115115+ val write_bits : t -> int -> int -> unit
116116+ (** [write_bits t value n] writes the lower [n] bits (1-32) of [value]
117117+ in little-endian order.
118118+ @raise Invalid_argument if [n > 32]. *)
119119+120120+ val write_byte : t -> int -> unit
121121+ (** [write_byte t value] writes a single byte. Flushes any partial bits first. *)
122122+123123+ val write_bytes : t -> bytes -> unit
124124+ (** [write_bytes t src] writes all bytes from [src]. Flushes any partial bits first. *)
125125+126126+ val byte_position : t -> int
127127+ (** [byte_position t] returns the current output position including any partial byte. *)
128128+129129+ val flush : t -> unit
130130+ (** [flush t] writes any accumulated bits as a partial byte. *)
131131+132132+ val finalize : t -> int
133133+ (** [finalize t] flushes and returns the total number of bytes written. *)
134134+end
135135+136136+(** {1 Backward Bitstream Writer}
137137+138138+ Accumulates bits to produce output that will be read backwards.
139139+ Used for FSE and Huffman encoding in zstd. *)
140140+141141+module Backward_writer : sig
142142+ (** Backward bitstream writer state. *)
143143+ type t
144144+145145+ val create : int -> t
146146+ (** [create size] creates a writer with an internal buffer of [size] bytes. *)
147147+148148+ val write_bits : t -> int -> int -> unit
149149+ (** [write_bits t value n] accumulates [n] bits from [value]. *)
150150+151151+ val flush_bytes : t -> unit
152152+ (** [flush_bytes t] flushes complete bytes to the internal buffer. *)
153153+154154+ val finalize : t -> bytes
155155+ (** [finalize t] adds the padding marker, flushes, and returns the output.
156156+ The returned bytes should be appended to the output and will be read
157157+ backwards during decoding. *)
158158+159159+ val current_size : t -> int
160160+ (** [current_size t] returns the current output size estimate. *)
161161+end