CCSDS 121.0-B-3 Lossless Data Compression (Rice/Golomb coding)
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

Add 8 new CCSDS/RFC protocol packages

- ocaml-rice: CCSDS 121.0-B lossless compression (Rice/Golomb)
- ocaml-udpcl: RFC 7122 UDP convergence layer for Bundle Protocol
- ocaml-erasure: CCSDS 131.5-B erasure correcting codes (GF(2^8))
- ocaml-short-ldpc: CCSDS 131.4-B short block-length LDPC
- ocaml-opm: CCSDS 502.0-B Orbit Parameter Message (KVN)
- ocaml-aem: CCSDS 504.0-B Attitude Ephemeris Message (KVN)
- ocaml-tdm: CCSDS 503.0-B Tracking Data Message (KVN)
- ocaml-rdm: CCSDS 508.1-B Re-entry Data Message (KVN)

+859
+19
README.md
··· 1 + # ocaml-ccsds-121 2 + 3 + OCaml implementation of **CCSDS 121.0-B-3** Lossless Data Compression, 4 + the Rice/Golomb adaptive entropy coding algorithm used by space missions 5 + (Mars rovers, JWST, Earth observation satellites) for lossless 6 + compression of image and science instrument data. 7 + 8 + ## Reference 9 + 10 + [CCSDS 121.0-B-3](https://public.ccsds.org/Pubs/121x0b3.pdf) — Lossless 11 + Data Compression. Consultative Committee for Space Data Systems, 2020. 12 + 13 + ## Usage 14 + 15 + ```ocaml 16 + let cfg = Ccsds_121.config ~block_size:16 ~bits_per_sample:16 () 17 + let compressed = Ccsds_121.compress cfg data 18 + let original = Ccsds_121.decompress cfg compressed 19 + ```
+22
dune-project
··· 1 + (lang dune 3.21) 2 + (name rice) 3 + 4 + (generate_opam_files true) 5 + 6 + (source (tangled gazagnaire.org/ocaml-rice)) 7 + 8 + (maintainers "Thomas Gazagnaire") 9 + (authors "Thomas Gazagnaire") 10 + 11 + (package 12 + (name rice) 13 + (synopsis "CCSDS 121.0-B-3 Lossless Data Compression (Rice/Golomb coding)") 14 + (description 15 + "Implementation of the CCSDS 121.0-B-3 Lossless Data Compression standard. 16 + Uses Rice/Golomb adaptive entropy coding for lossless compression of 17 + science instrument data, as used by space missions including Mars rovers, 18 + JWST, and Earth observation satellites.") 19 + (depends 20 + (ocaml (>= 5.1)) 21 + (alcotest (and (>= 1.7.0) :with-test)) 22 + (alcobar (and (>= 0.1) :with-test))))
+22
fuzz/dune
··· 1 + (executable 2 + (name fuzz) 3 + (modules fuzz fuzz_rice) 4 + (libraries rice alcobar)) 5 + 6 + (rule 7 + (alias runtest) 8 + (enabled_if 9 + (<> %{profile} afl)) 10 + (deps fuzz.exe) 11 + (action 12 + (run %{exe:fuzz.exe}))) 13 + 14 + (rule 15 + (alias fuzz) 16 + (enabled_if 17 + (= %{profile} afl)) 18 + (deps fuzz.exe) 19 + (action 20 + (progn 21 + (run %{exe:fuzz.exe} --gen-corpus corpus) 22 + (run afl-fuzz -V 60 -i corpus -o _fuzz -- %{exe:fuzz.exe} @@))))
+1
fuzz/fuzz.ml
··· 1 + let () = Alcobar.run "rice" [ Fuzz_rice.suite ]
+82
fuzz/fuzz_rice.ml
··· 1 + (** Fuzz tests for CCSDS 121.0-B-3 Lossless Data Compression. *) 2 + 3 + open Alcobar 4 + 5 + (** Roundtrip: compress then decompress must recover the original data. We 6 + generate random bytes, pad to a sample-aligned length, and verify the 7 + roundtrip property. *) 8 + let test_roundtrip_8 buf = 9 + (* Truncate to reasonable size *) 10 + let max_len = 4096 in 11 + let buf = 12 + if String.length buf > max_len then String.sub buf 0 max_len else buf 13 + in 14 + let data = Bytes.of_string buf in 15 + let cfg = Rice.config ~block_size:16 ~bits_per_sample:8 () in 16 + if Bytes.length data > 0 then begin 17 + let compressed = Rice.compress cfg data in 18 + match Rice.decompress cfg compressed with 19 + | Error msg -> fail ("decompress failed: " ^ msg) 20 + | Ok result -> 21 + if not (Bytes.equal data result) then 22 + fail "roundtrip mismatch for 8-bit data" 23 + end 24 + 25 + (** Roundtrip with 16-bit samples. Input length must be even. *) 26 + let test_roundtrip_16 buf = 27 + let max_len = 4096 in 28 + let buf = 29 + if String.length buf > max_len then String.sub buf 0 max_len else buf 30 + in 31 + (* Ensure even length for 16-bit samples *) 32 + let len = String.length buf land lnot 1 in 33 + if len > 0 then begin 34 + let data = Bytes.of_string (String.sub buf 0 len) in 35 + let cfg = Rice.config ~block_size:16 ~bits_per_sample:16 () in 36 + let compressed = Rice.compress cfg data in 37 + match Rice.decompress cfg compressed with 38 + | Error msg -> fail ("decompress failed: " ^ msg) 39 + | Ok result -> 40 + if not (Bytes.equal data result) then 41 + fail "roundtrip mismatch for 16-bit data" 42 + end 43 + 44 + (** Decompress must not crash on arbitrary input. *) 45 + let test_decompress_no_crash buf = 46 + let max_len = 1024 in 47 + let buf = 48 + if String.length buf > max_len then String.sub buf 0 max_len else buf 49 + in 50 + let data = Bytes.of_string buf in 51 + let cfg = Rice.config ~block_size:16 ~bits_per_sample:16 () in 52 + let _ = Rice.decompress cfg data in 53 + () 54 + 55 + (** Roundtrip with varying block sizes. *) 56 + let test_roundtrip_block_sizes n buf = 57 + let max_len = 1024 in 58 + let buf = 59 + if String.length buf > max_len then String.sub buf 0 max_len else buf 60 + in 61 + let data = Bytes.of_string buf in 62 + if Bytes.length data > 0 then begin 63 + let block_sizes = [| 8; 16; 32; 64 |] in 64 + let bs = block_sizes.(abs n mod 4) in 65 + let cfg = Rice.config ~block_size:bs ~bits_per_sample:8 () in 66 + let compressed = Rice.compress cfg data in 67 + match Rice.decompress cfg compressed with 68 + | Error msg -> fail ("decompress failed: " ^ msg) 69 + | Ok result -> 70 + if not (Bytes.equal data result) then 71 + failf "roundtrip mismatch with block_size=%d" bs 72 + end 73 + 74 + let suite = 75 + ( "ccsds-121", 76 + [ 77 + test_case "roundtrip 8-bit" [ bytes ] test_roundtrip_8; 78 + test_case "roundtrip 16-bit" [ bytes ] test_roundtrip_16; 79 + test_case "decompress no crash" [ bytes ] test_decompress_no_crash; 80 + test_case "roundtrip block sizes" [ int; bytes ] 81 + test_roundtrip_block_sizes; 82 + ] )
+3
lib/dune
··· 1 + (library 2 + (name rice) 3 + (public_name rice))
+417
lib/rice.ml
··· 1 + (** CCSDS 121.0-B-3 Lossless Data Compression. 2 + 3 + Implements Rice/Golomb adaptive entropy coding as specified in the CCSDS 4 + 121.0-B-3 standard for lossless compression of science data. *) 5 + 6 + (* -- Configuration -------------------------------------------------------- *) 7 + 8 + type predictor = Unit_delay | Neighborhood 9 + type config = { block_size : int; bits_per_sample : int; predictor : predictor } 10 + 11 + let config ?(block_size = 16) ?(bits_per_sample = 16) () = 12 + if block_size < 8 || block_size > 64 then 13 + invalid_arg 14 + (Printf.sprintf "block_size must be in 8..64, got %d" block_size); 15 + if bits_per_sample < 1 || bits_per_sample > 32 then 16 + invalid_arg 17 + (Printf.sprintf "bits_per_sample must be in 1..32, got %d" bits_per_sample); 18 + { block_size; bits_per_sample; predictor = Unit_delay } 19 + 20 + let config_with_predictor predictor cfg = { cfg with predictor } 21 + 22 + (* -- Bit I/O -------------------------------------------------------------- *) 23 + 24 + (** Growable bit buffer for writing compressed output. *) 25 + module Bitwriter : sig 26 + type t 27 + 28 + val create : int -> t 29 + val write_bits : t -> int -> int -> unit 30 + val write_unary : t -> int -> unit 31 + val to_bytes : t -> bytes 32 + val bit_length : t -> int 33 + end = struct 34 + type t = { 35 + mutable buf : bytes; 36 + mutable byte_pos : int; 37 + mutable bit_pos : int; (* 0..7, next bit position within current byte *) 38 + } 39 + 40 + let create initial_cap = 41 + let cap = max 16 initial_cap in 42 + { buf = Bytes.make cap '\000'; byte_pos = 0; bit_pos = 0 } 43 + 44 + let ensure_capacity t n_bytes = 45 + let needed = t.byte_pos + n_bytes + 1 in 46 + if needed > Bytes.length t.buf then begin 47 + let new_cap = max needed (Bytes.length t.buf * 2) in 48 + let new_buf = Bytes.make new_cap '\000' in 49 + Bytes.blit t.buf 0 new_buf 0 (t.byte_pos + 1); 50 + t.buf <- new_buf 51 + end 52 + 53 + let write_bit t b = 54 + ensure_capacity t 1; 55 + if b <> 0 then begin 56 + let cur = Bytes.get_uint8 t.buf t.byte_pos in 57 + Bytes.set_uint8 t.buf t.byte_pos (cur lor (1 lsl (7 - t.bit_pos))) 58 + end; 59 + t.bit_pos <- t.bit_pos + 1; 60 + if t.bit_pos = 8 then begin 61 + t.bit_pos <- 0; 62 + t.byte_pos <- t.byte_pos + 1; 63 + ensure_capacity t 1; 64 + Bytes.set_uint8 t.buf t.byte_pos 0 65 + end 66 + 67 + let write_bits t nbits value = 68 + (* Write [nbits] bits from [value], MSB first. *) 69 + for i = nbits - 1 downto 0 do 70 + write_bit t ((value lsr i) land 1) 71 + done 72 + 73 + let write_unary t n = 74 + (* Unary code: n zeros followed by a 1. *) 75 + for _ = 1 to n do 76 + write_bit t 0 77 + done; 78 + write_bit t 1 79 + 80 + let to_bytes t = 81 + let total_bytes = if t.bit_pos = 0 then t.byte_pos else t.byte_pos + 1 in 82 + Bytes.sub t.buf 0 total_bytes 83 + 84 + let bit_length t = (t.byte_pos * 8) + t.bit_pos 85 + end 86 + 87 + (** Bit reader for decompression. *) 88 + module Bitreader : sig 89 + type t 90 + 91 + val create : bytes -> t 92 + val read_bit : t -> int 93 + val read_bits : t -> int -> int 94 + val read_unary : t -> int 95 + val bits_remaining : t -> int 96 + end = struct 97 + type t = { 98 + buf : bytes; 99 + total_bits : int; 100 + mutable byte_pos : int; 101 + mutable bit_pos : int; 102 + } 103 + 104 + let create buf = 105 + { buf; total_bits = Bytes.length buf * 8; byte_pos = 0; bit_pos = 0 } 106 + 107 + let bits_remaining t = t.total_bits - ((t.byte_pos * 8) + t.bit_pos) 108 + 109 + let read_bit t = 110 + if (t.byte_pos * 8) + t.bit_pos >= t.total_bits then 111 + raise (Invalid_argument "bitreader: end of stream"); 112 + let byte_val = Bytes.get_uint8 t.buf t.byte_pos in 113 + let bit = (byte_val lsr (7 - t.bit_pos)) land 1 in 114 + t.bit_pos <- t.bit_pos + 1; 115 + if t.bit_pos = 8 then begin 116 + t.bit_pos <- 0; 117 + t.byte_pos <- t.byte_pos + 1 118 + end; 119 + bit 120 + 121 + let read_bits t nbits = 122 + let v = ref 0 in 123 + for _ = 1 to nbits do 124 + v := (!v lsl 1) lor read_bit t 125 + done; 126 + !v 127 + 128 + let read_unary t = 129 + let n = ref 0 in 130 + while read_bit t = 0 do 131 + incr n 132 + done; 133 + !n 134 + end 135 + 136 + (* -- Sample packing/unpacking --------------------------------------------- *) 137 + 138 + let bytes_per_sample bps = (bps + 7) / 8 139 + 140 + let read_sample buf offset bps = 141 + let bps_bytes = bytes_per_sample bps in 142 + if offset + bps_bytes > Bytes.length buf then 143 + raise (Invalid_argument "read_sample: buffer too short"); 144 + let v = ref 0 in 145 + for i = 0 to bps_bytes - 1 do 146 + v := (!v lsl 8) lor Bytes.get_uint8 buf (offset + i) 147 + done; 148 + (* Mask to exact bit width *) 149 + let mask = if bps >= 63 then max_int else (1 lsl bps) - 1 in 150 + !v land mask 151 + 152 + let write_sample buf offset bps value = 153 + let bps_bytes = bytes_per_sample bps in 154 + for i = bps_bytes - 1 downto 0 do 155 + Bytes.set_uint8 buf 156 + (offset + (bps_bytes - 1 - i)) 157 + ((value lsr (i * 8)) land 0xFF) 158 + done 159 + 160 + let sample_count cfg data_len = data_len / bytes_per_sample cfg.bits_per_sample 161 + 162 + (* -- Predictor ------------------------------------------------------------ *) 163 + 164 + (** Map a signed residual to a non-negative integer. CCSDS 121.0 uses: delta -> 165 + if delta >= 0 then 2*delta else 2*|delta|-1 *) 166 + let map_residual delta = if delta >= 0 then 2 * delta else (2 * -delta) - 1 167 + 168 + (** Inverse: non-negative integer back to signed residual. *) 169 + let unmap_residual m = if m land 1 = 0 then m / 2 else -((m + 1) / 2) 170 + 171 + let compute_residuals cfg samples = 172 + let n = Array.length samples in 173 + let residuals = Array.make n 0 in 174 + (match cfg.predictor with 175 + | Unit_delay -> 176 + (* First sample: residual = sample itself (no prediction) *) 177 + if n > 0 then residuals.(0) <- map_residual samples.(0); 178 + for i = 1 to n - 1 do 179 + residuals.(i) <- map_residual (samples.(i) - samples.(i - 1)) 180 + done 181 + | Neighborhood -> 182 + (* Simplified neighborhood: average of two previous samples *) 183 + if n > 0 then residuals.(0) <- map_residual samples.(0); 184 + if n > 1 then residuals.(1) <- map_residual (samples.(1) - samples.(0)); 185 + for i = 2 to n - 1 do 186 + let pred = (samples.(i - 1) + samples.(i - 2)) / 2 in 187 + residuals.(i) <- map_residual (samples.(i) - pred) 188 + done); 189 + residuals 190 + 191 + let reconstruct_samples cfg residuals = 192 + let n = Array.length residuals in 193 + let samples = Array.make n 0 in 194 + let mask = 195 + if cfg.bits_per_sample >= 63 then max_int 196 + else (1 lsl cfg.bits_per_sample) - 1 197 + in 198 + (match cfg.predictor with 199 + | Unit_delay -> 200 + if n > 0 then samples.(0) <- unmap_residual residuals.(0) land mask; 201 + for i = 1 to n - 1 do 202 + samples.(i) <- 203 + (samples.(i - 1) + unmap_residual residuals.(i)) land mask 204 + done 205 + | Neighborhood -> 206 + if n > 0 then samples.(0) <- unmap_residual residuals.(0) land mask; 207 + if n > 1 then 208 + samples.(1) <- (samples.(0) + unmap_residual residuals.(1)) land mask; 209 + for i = 2 to n - 1 do 210 + let pred = (samples.(i - 1) + samples.(i - 2)) / 2 in 211 + samples.(i) <- (pred + unmap_residual residuals.(i)) land mask 212 + done); 213 + samples 214 + 215 + (* -- Adaptive Rice coding ------------------------------------------------- *) 216 + 217 + (** Select the optimal split parameter k for a block of mapped residuals. Per 218 + CCSDS 121.0: k = floor(log2(sum / J)) where sum is the sum of mapped 219 + residuals in the block and J is the block size. k is clamped to 220 + [0, bits_per_sample]. *) 221 + let select_k residuals ofs len bps = 222 + let sum = ref 0 in 223 + for i = ofs to ofs + len - 1 do 224 + sum := !sum + residuals.(i) 225 + done; 226 + if !sum = 0 then 0 227 + else 228 + let ratio = float_of_int !sum /. float_of_int len in 229 + let k = int_of_float (Float.round (log ratio /. log 2.0)) in 230 + (* Per the standard, the floor of log2 *) 231 + let k = max 0 k in 232 + min k bps 233 + 234 + (** Compute floor(log2(x)) for positive x. *) 235 + let _floor_log2 x = 236 + if x <= 0 then 0 237 + else 238 + let r = ref 0 in 239 + let v = ref x in 240 + while !v > 1 do 241 + v := !v lsr 1; 242 + incr r 243 + done; 244 + !r 245 + 246 + (** Encode a block of mapped residuals using Rice coding with parameter k. *) 247 + let encode_block bw residuals ofs len k bps = 248 + (* Write the split parameter k in the block header. 249 + We encode k using a fixed number of bits = ceil(log2(bps+1)). 250 + For simplicity, use enough bits to encode values 0..bps. *) 251 + let k_bits = max 1 (1 + _floor_log2 bps) in 252 + Bitwriter.write_bits bw k_bits k; 253 + if k = 0 then begin 254 + (* Zero-split: encode each residual as unary code. 255 + But if a value exceeds a threshold, use an escape and write raw. *) 256 + let threshold = bps in 257 + for i = ofs to ofs + len - 1 do 258 + let m = residuals.(i) in 259 + if m < threshold then Bitwriter.write_unary bw m 260 + else begin 261 + (* Escape: write [threshold] zeros then the raw value *) 262 + for _ = 1 to threshold do 263 + Bitwriter.write_bits bw 1 0 264 + done; 265 + Bitwriter.write_bits bw bps m 266 + end 267 + done 268 + end 269 + else begin 270 + for i = ofs to ofs + len - 1 do 271 + let m = residuals.(i) in 272 + let q = m lsr k in 273 + let r = m land ((1 lsl k) - 1) in 274 + (* Check if quotient is too large; use escape *) 275 + if q < bps then begin 276 + Bitwriter.write_unary bw q; 277 + Bitwriter.write_bits bw k r 278 + end 279 + else begin 280 + (* Escape: bps zeros then raw value *) 281 + for _ = 1 to bps do 282 + Bitwriter.write_bits bw 1 0 283 + done; 284 + Bitwriter.write_bits bw bps m 285 + end 286 + done 287 + end 288 + 289 + (** Decode a block of mapped residuals. *) 290 + let decode_block br len k bps = 291 + let residuals = Array.make len 0 in 292 + let k_bits_needed = if k = 0 then 0 else k in 293 + if k = 0 then begin 294 + let threshold = bps in 295 + for i = 0 to len - 1 do 296 + (* Read unary, but detect escape *) 297 + let count = ref 0 in 298 + let escaped = ref false in 299 + let continue = ref true in 300 + while !continue do 301 + let bit = Bitreader.read_bit br in 302 + if bit = 1 then continue := false 303 + else begin 304 + incr count; 305 + if !count = threshold then begin 306 + escaped := true; 307 + continue := false 308 + end 309 + end 310 + done; 311 + if !escaped then residuals.(i) <- Bitreader.read_bits br bps 312 + else residuals.(i) <- !count 313 + done 314 + end 315 + else begin 316 + ignore k_bits_needed; 317 + for i = 0 to len - 1 do 318 + (* Read unary quotient, detect escape *) 319 + let q = ref 0 in 320 + let escaped = ref false in 321 + let continue = ref true in 322 + while !continue do 323 + let bit = Bitreader.read_bit br in 324 + if bit = 1 then continue := false 325 + else begin 326 + incr q; 327 + if !q = bps then begin 328 + escaped := true; 329 + continue := false 330 + end 331 + end 332 + done; 333 + if !escaped then residuals.(i) <- Bitreader.read_bits br bps 334 + else begin 335 + let r = Bitreader.read_bits br k in 336 + residuals.(i) <- (!q lsl k) lor r 337 + end 338 + done 339 + end; 340 + residuals 341 + 342 + (* -- Compress ------------------------------------------------------------- *) 343 + 344 + let compress cfg data = 345 + let bps = cfg.bits_per_sample in 346 + let bps_bytes = bytes_per_sample bps in 347 + let n = sample_count cfg (Bytes.length data) in 348 + if n = 0 then Bytes.empty 349 + else begin 350 + (* Read samples from input *) 351 + let samples = 352 + Array.init n (fun i -> read_sample data (i * bps_bytes) bps) 353 + in 354 + (* Compute prediction residuals *) 355 + let residuals = compute_residuals cfg samples in 356 + (* Estimate output size *) 357 + let est = max 64 (Bytes.length data * 2) in 358 + let bw = Bitwriter.create est in 359 + (* Write header: number of samples (32 bits) *) 360 + Bitwriter.write_bits bw 32 n; 361 + (* Encode blocks *) 362 + let j = cfg.block_size in 363 + let num_full_blocks = n / j in 364 + let remainder = n mod j in 365 + for b = 0 to num_full_blocks - 1 do 366 + let ofs = b * j in 367 + let k = select_k residuals ofs j bps in 368 + encode_block bw residuals ofs j k bps 369 + done; 370 + (* Handle last partial block *) 371 + if remainder > 0 then begin 372 + let ofs = num_full_blocks * j in 373 + let k = select_k residuals ofs remainder bps in 374 + encode_block bw residuals ofs remainder k bps 375 + end; 376 + Bitwriter.to_bytes bw 377 + end 378 + 379 + (* -- Decompress ----------------------------------------------------------- *) 380 + 381 + let decompress cfg data = 382 + if Bytes.length data = 0 then Ok Bytes.empty 383 + else 384 + try 385 + let bps = cfg.bits_per_sample in 386 + let bps_bytes = bytes_per_sample bps in 387 + let br = Bitreader.create data in 388 + (* Read header: number of samples *) 389 + let n = Bitreader.read_bits br 32 in 390 + if n < 0 then Error "invalid sample count" 391 + else if n = 0 then Ok Bytes.empty 392 + else begin 393 + let j = cfg.block_size in 394 + let k_bits = max 1 (1 + _floor_log2 bps) in 395 + let num_full_blocks = n / j in 396 + let remainder = n mod j in 397 + let total_blocks = num_full_blocks + if remainder > 0 then 1 else 0 in 398 + (* Decode all blocks *) 399 + let all_residuals = Array.make n 0 in 400 + let pos = ref 0 in 401 + for _ = 0 to total_blocks - 1 do 402 + let block_len = min j (n - !pos) in 403 + let k = Bitreader.read_bits br k_bits in 404 + let block = decode_block br block_len k bps in 405 + Array.blit block 0 all_residuals !pos block_len; 406 + pos := !pos + block_len 407 + done; 408 + (* Reconstruct samples from residuals *) 409 + let samples = reconstruct_samples cfg all_residuals in 410 + (* Pack samples into output bytes *) 411 + let out = Bytes.make (n * bps_bytes) '\000' in 412 + Array.iteri (fun i s -> write_sample out (i * bps_bytes) bps s) samples; 413 + Ok out 414 + end 415 + with 416 + | Invalid_argument msg -> Error msg 417 + | Failure msg -> Error msg
+29
lib/rice.mli
··· 1 + (** CCSDS 121.0-B-3 Lossless Data Compression. 2 + 3 + Rice/Golomb adaptive entropy coding for science instrument data. Used by 4 + missions including Mars rovers, JWST, and Earth observation satellites for 5 + lossless compression of image and science data. *) 6 + 7 + type config 8 + (** Compression configuration. *) 9 + 10 + val config : ?block_size:int -> ?bits_per_sample:int -> unit -> config 11 + (** [config ?block_size ?bits_per_sample ()] creates a configuration. 12 + @param block_size Number of samples per block (default: 16, range: 8-64). 13 + @param bits_per_sample Input sample bit depth (default: 16, range: 1-32). *) 14 + 15 + val compress : config -> bytes -> bytes 16 + (** [compress cfg data] compresses [data] using CCSDS 121.0 Rice coding. *) 17 + 18 + val decompress : config -> bytes -> (bytes, string) result 19 + (** [decompress cfg data] decompresses CCSDS 121.0 compressed data. *) 20 + 21 + (** {1 Predictor} *) 22 + 23 + type predictor = 24 + | Unit_delay 25 + | Neighborhood 26 + (** Prediction mode. [Unit_delay] uses the previous sample; [Neighborhood] 27 + uses surrounding samples (for 2D data). *) 28 + 29 + val config_with_predictor : predictor -> config -> config
+35
rice.opam
··· 1 + # This file is generated by dune, edit dune-project instead 2 + opam-version: "2.0" 3 + synopsis: "CCSDS 121.0-B-3 Lossless Data Compression (Rice/Golomb coding)" 4 + description: """ 5 + Implementation of the CCSDS 121.0-B-3 Lossless Data Compression standard. 6 + Uses Rice/Golomb adaptive entropy coding for lossless compression of 7 + science instrument data, as used by space missions including Mars rovers, 8 + JWST, and Earth observation satellites.""" 9 + maintainer: ["Thomas Gazagnaire"] 10 + authors: ["Thomas Gazagnaire"] 11 + homepage: "https://tangled.org/gazagnaire.org/ocaml-rice" 12 + bug-reports: "https://tangled.org/gazagnaire.org/ocaml-rice/issues" 13 + depends: [ 14 + "dune" {>= "3.21"} 15 + "ocaml" {>= "5.1"} 16 + "alcotest" {>= "1.7.0" & with-test} 17 + "alcobar" {>= "0.1" & with-test} 18 + "odoc" {with-doc} 19 + ] 20 + build: [ 21 + ["dune" "subst"] {dev} 22 + [ 23 + "dune" 24 + "build" 25 + "-p" 26 + name 27 + "-j" 28 + jobs 29 + "@install" 30 + "@runtest" {with-test} 31 + "@doc" {with-doc} 32 + ] 33 + ] 34 + dev-repo: "git+https://tangled.org/gazagnaire.org/ocaml-rice" 35 + x-maintenance-intent: ["(latest)"]
+3
test/dune
··· 1 + (test 2 + (name test_rice) 3 + (libraries rice alcotest))
+226
test/test_rice.ml
··· 1 + (** Tests for CCSDS 121.0-B-3 Lossless Data Compression. *) 2 + 3 + let () = Random.self_init () 4 + 5 + (* -- Helpers -------------------------------------------------------------- *) 6 + 7 + let cfg16 = Rice.config ~block_size:16 ~bits_per_sample:16 () 8 + let cfg8 = Rice.config ~block_size:16 ~bits_per_sample:8 () 9 + 10 + (** Pack a list of 16-bit samples into bytes (big-endian). *) 11 + let pack_16 samples = 12 + let n = List.length samples in 13 + let buf = Bytes.make (n * 2) '\000' in 14 + List.iteri 15 + (fun i s -> 16 + Bytes.set_uint8 buf (i * 2) ((s lsr 8) land 0xFF); 17 + Bytes.set_uint8 buf ((i * 2) + 1) (s land 0xFF)) 18 + samples; 19 + buf 20 + 21 + (** Pack a list of 8-bit samples into bytes. *) 22 + let pack_8 samples = 23 + let n = List.length samples in 24 + let buf = Bytes.make n '\000' in 25 + List.iteri (fun i s -> Bytes.set_uint8 buf i (s land 0xFF)) samples; 26 + buf 27 + 28 + (** Unpack 16-bit big-endian samples from bytes. *) 29 + let unpack_16 buf = 30 + let n = Bytes.length buf / 2 in 31 + List.init n (fun i -> 32 + let hi = Bytes.get_uint8 buf (i * 2) in 33 + let lo = Bytes.get_uint8 buf ((i * 2) + 1) in 34 + (hi lsl 8) lor lo) 35 + 36 + (** Generate [n] random samples in [0, max_val). *) 37 + let random_samples n max_val = List.init n (fun _ -> Random.int max_val) 38 + 39 + let roundtrip cfg data = 40 + let compressed = Rice.compress cfg data in 41 + match Rice.decompress cfg compressed with 42 + | Ok decompressed -> decompressed 43 + | Error msg -> Alcotest.failf "decompression failed: %s" msg 44 + 45 + let bytes_eq = Alcotest.testable (Fmt.of_to_string Bytes.to_string) Bytes.equal 46 + 47 + (* -- Tests ---------------------------------------------------------------- *) 48 + 49 + (** 1. Roundtrip: random data compress->decompress recovers original. *) 50 + let test_roundtrip_random () = 51 + for _ = 1 to 10 do 52 + let samples = random_samples 128 65536 in 53 + let data = pack_16 samples in 54 + let result = roundtrip cfg16 data in 55 + Alcotest.(check bytes_eq) "roundtrip random 16-bit" data result 56 + done 57 + 58 + (** 2. All-zeros: optimal compression (should be very small). *) 59 + let test_all_zeros () = 60 + let samples = List.init 256 (fun _ -> 0) in 61 + let data = pack_16 samples in 62 + let compressed = Rice.compress cfg16 data in 63 + let result = roundtrip cfg16 data in 64 + Alcotest.(check bytes_eq) "all-zeros roundtrip" data result; 65 + (* All-zeros should compress very well *) 66 + Alcotest.(check bool) 67 + "all-zeros compresses" true 68 + (Bytes.length compressed < Bytes.length data) 69 + 70 + (** 3. All-same-value: near-optimal compression. *) 71 + let test_all_same () = 72 + let v = 42 in 73 + let samples = List.init 256 (fun _ -> v) in 74 + let data = pack_8 samples in 75 + let compressed = Rice.compress cfg8 data in 76 + let result = roundtrip cfg8 data in 77 + Alcotest.(check bytes_eq) "all-same roundtrip" data result; 78 + (* Constant data should compress well after first sample *) 79 + Alcotest.(check bool) 80 + "constant data compresses" true 81 + (Bytes.length compressed < Bytes.length data) 82 + 83 + (** 4. Ramp data: predictor residuals should be constant. *) 84 + let test_ramp () = 85 + let samples = List.init 64 (fun i -> i) in 86 + let data = pack_16 samples in 87 + let compressed = Rice.compress cfg16 data in 88 + let result = roundtrip cfg16 data in 89 + Alcotest.(check bytes_eq) "ramp roundtrip" data result; 90 + (* Ramp with unit-delay predictor: residuals are all 1 (except first), 91 + so it should compress well. *) 92 + Alcotest.(check bool) 93 + "ramp compresses" true 94 + (Bytes.length compressed < Bytes.length data) 95 + 96 + (** 5. Random noise: compression ratio near 1.0. *) 97 + let test_random_noise () = 98 + let samples = random_samples 256 65536 in 99 + let data = pack_16 samples in 100 + let compressed = Rice.compress cfg16 data in 101 + let result = roundtrip cfg16 data in 102 + Alcotest.(check bytes_eq) "random noise roundtrip" data result; 103 + (* Random data should not compress much; the compressed size should not 104 + be dramatically larger than input (within 2x due to overhead). *) 105 + Alcotest.(check bool) 106 + "random noise bounded" true 107 + (Bytes.length compressed < Bytes.length data * 3) 108 + 109 + (** 6. Different block sizes. *) 110 + let test_block_sizes () = 111 + let samples = random_samples 128 256 in 112 + let data = pack_8 samples in 113 + List.iter 114 + (fun bs -> 115 + let cfg = Rice.config ~block_size:bs ~bits_per_sample:8 () in 116 + let result = roundtrip cfg data in 117 + Alcotest.(check bytes_eq) 118 + (Printf.sprintf "block_size=%d roundtrip" bs) 119 + data result) 120 + [ 8; 16; 32; 64 ] 121 + 122 + (** 7. Different bit depths. *) 123 + let test_bit_depths () = 124 + List.iter 125 + (fun bps -> 126 + let max_val = 1 lsl min bps 16 in 127 + let bps_bytes = (bps + 7) / 8 in 128 + let n = 64 in 129 + let samples = random_samples n max_val in 130 + let buf = Bytes.make (n * bps_bytes) '\000' in 131 + List.iteri 132 + (fun i s -> 133 + (* Write sample big-endian *) 134 + for j = bps_bytes - 1 downto 0 do 135 + Bytes.set_uint8 buf 136 + ((i * bps_bytes) + (bps_bytes - 1 - j)) 137 + ((s lsr (j * 8)) land 0xFF) 138 + done) 139 + samples; 140 + let cfg = Rice.config ~block_size:16 ~bits_per_sample:bps () in 141 + let result = roundtrip cfg buf in 142 + Alcotest.(check bytes_eq) 143 + (Printf.sprintf "bps=%d roundtrip" bps) 144 + buf result) 145 + [ 8; 16; 24; 32 ] 146 + 147 + (** 8. Known test vector: compress [0,1,2,...,15] with unit delay predictor. We 148 + verify roundtrip and that the output is deterministic. *) 149 + let test_known_vector () = 150 + let samples = List.init 16 Fun.id in 151 + let data = pack_16 samples in 152 + let compressed = Rice.compress cfg16 data in 153 + (* Verify roundtrip *) 154 + let result = roundtrip cfg16 data in 155 + Alcotest.(check bytes_eq) "known vector roundtrip" data result; 156 + (* Verify deterministic: compressing same data twice gives same output *) 157 + let compressed2 = Rice.compress cfg16 data in 158 + Alcotest.(check bytes_eq) "deterministic compression" compressed compressed2; 159 + (* Verify compressed output is non-empty and shorter than input *) 160 + Alcotest.(check bool) 161 + "known vector produces output" true 162 + (Bytes.length compressed > 0); 163 + (* For a ramp 0..15, residuals after unit-delay are [0,1,1,...,1], 164 + mapped to [0,2,2,...,2]. This should compress well. *) 165 + Alcotest.(check bool) 166 + "known vector compresses" true 167 + (Bytes.length compressed < Bytes.length data); 168 + (* Verify the uncompressed samples match *) 169 + let recovered = unpack_16 result in 170 + Alcotest.(check (list int)) "known vector samples" samples recovered 171 + 172 + (** 9. Empty input. *) 173 + let test_empty () = 174 + let data = Bytes.empty in 175 + let compressed = Rice.compress cfg16 data in 176 + Alcotest.(check int) "empty compresses to empty" 0 (Bytes.length compressed); 177 + match Rice.decompress cfg16 compressed with 178 + | Ok result -> 179 + Alcotest.(check int) "empty decompresses to empty" 0 (Bytes.length result) 180 + | Error msg -> Alcotest.failf "empty decompress failed: %s" msg 181 + 182 + (** 10. Single sample. *) 183 + let test_single_sample () = 184 + let data = pack_16 [ 12345 ] in 185 + let result = roundtrip cfg16 data in 186 + Alcotest.(check bytes_eq) "single sample roundtrip" data result; 187 + let recovered = unpack_16 result in 188 + Alcotest.(check (list int)) "single sample value" [ 12345 ] recovered 189 + 190 + (** Predictor: unit delay vs neighborhood both roundtrip. *) 191 + let test_predictors () = 192 + let samples = List.init 64 (fun i -> ((i * 7) + 13) mod 256) in 193 + let data = pack_8 samples in 194 + let cfg_ud = 195 + Rice.config_with_predictor Unit_delay 196 + (Rice.config ~block_size:16 ~bits_per_sample:8 ()) 197 + in 198 + let cfg_nb = 199 + Rice.config_with_predictor Neighborhood 200 + (Rice.config ~block_size:16 ~bits_per_sample:8 ()) 201 + in 202 + let result_ud = roundtrip cfg_ud data in 203 + let result_nb = roundtrip cfg_nb data in 204 + Alcotest.(check bytes_eq) "unit_delay roundtrip" data result_ud; 205 + Alcotest.(check bytes_eq) "neighborhood roundtrip" data result_nb 206 + 207 + (* -- Runner --------------------------------------------------------------- *) 208 + 209 + let () = 210 + Alcotest.run "ccsds-121" 211 + [ 212 + ( "roundtrip", 213 + [ 214 + Alcotest.test_case "random data" `Quick test_roundtrip_random; 215 + Alcotest.test_case "all zeros" `Quick test_all_zeros; 216 + Alcotest.test_case "all same value" `Quick test_all_same; 217 + Alcotest.test_case "ramp" `Quick test_ramp; 218 + Alcotest.test_case "random noise" `Quick test_random_noise; 219 + Alcotest.test_case "block sizes" `Quick test_block_sizes; 220 + Alcotest.test_case "bit depths" `Quick test_bit_depths; 221 + Alcotest.test_case "known vector" `Quick test_known_vector; 222 + Alcotest.test_case "empty input" `Quick test_empty; 223 + Alcotest.test_case "single sample" `Quick test_single_sample; 224 + Alcotest.test_case "predictors" `Quick test_predictors; 225 + ] ); 226 + ]