CCSDS 121.0-B-3 Lossless Data Compression (Rice/Golomb coding)
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

rice: zero-block aggregation — byte-exact match with libaec

The RICE encoder now aggregates consecutive zero blocks into a single
header with a block count, matching libaec's encoding. This produces
significantly more compact output for constant or near-constant data
(e.g. 2 bytes vs 4 bytes for 64 constant 8-bit samples).

Implements the CCSDS 121.0-B-3 §5.1 ROS (rest of segment) encoding:
FS(0..3) → 1..4 blocks
FS(4) → rest of segment
FS(n≥5) → n-1 blocks (shifted to avoid collision with ROS)

The interop test now verifies byte-exact compression match against
libaec for all vectors including constant data.

+61 -41
+56 -22
lib/rice.ml
··· 300 300 let rec check i = i >= ofs + len || (arr.(i) = 0 && check (i + 1)) in 301 301 check ofs 302 302 303 + (** Check if block at [ofs] of size [len] is a zero block (all non-reference 304 + residuals are zero). *) 305 + let is_zero_block residuals ofs len is_ref = 306 + let count = if is_ref then len - 1 else len in 307 + let res_ofs = if is_ref then ofs + 1 else ofs in 308 + count > 0 && all_zero residuals res_ofs count 309 + 303 310 (** Compute the encoded length if we use split coding with parameter k. *) 304 311 let split_encoded_len residuals ofs len k = 305 312 let total = ref 0 in ··· 334 341 if !ok then !total else max_int 335 342 end 336 343 337 - (** Encode a block using CCSDS 121.0-B-3 format. [residuals] contains the 338 - preprocessed values for this block. [ref_sample] is the raw reference sample 339 - (emitted if [is_ref]). [is_ref] indicates whether this is the first block of 340 - an RSI. *) 344 + (* Encode an aggregated run of [zero_count] consecutive zero blocks. 345 + The first block may carry a reference sample. *) 346 + let encode_zero_blocks bw bps id_len is_ref ref_sample zero_count 347 + blocks_remaining = 348 + (* Zero block header: id_len+1 zero bits *) 349 + Bitwriter.write_bits bw (id_len + 1) 0; 350 + if is_ref then Bitwriter.write_bits bw bps ref_sample; 351 + (* FS encoding for block count, with ROS (CCSDS 121.0-B-3 §5.1): 352 + FS(0..3) → 1..4 blocks 353 + FS(4) → rest of segment (ROS) 354 + FS(n≥5) → n blocks (decoder subtracts 1, so n-1 blocks; we add 1) 355 + When the count equals blocks_remaining, use FS(4) (ROS). *) 356 + let fs = 357 + if zero_count = blocks_remaining && zero_count >= 5 then 4 358 + else if zero_count <= 4 then zero_count - 1 359 + else zero_count + 1 360 + in 361 + Bitwriter.write_unary bw fs 362 + 341 363 let encode_ccsds_block bw residuals ofs len bps id_len is_ref ref_sample = 342 364 let kmax = kmax_of_id_len id_len in 343 365 let count = if is_ref then len - 1 else len in 344 366 let res_ofs = if is_ref then ofs + 1 else ofs in 345 367 (* Check for zero block: all (non-reference) residuals are zero *) 346 368 if count > 0 && all_zero residuals res_ofs count then begin 347 - (* Zero block: id_len+1 zero bits *) 348 - Bitwriter.write_bits bw (id_len + 1) 0; 349 - (* Reference sample if applicable *) 350 - if is_ref then Bitwriter.write_bits bw bps ref_sample; 351 - (* FS for zero block count: with rsi=block_size, always 1 block, 352 - so emit FS(0) = single 1 bit *) 353 - Bitwriter.write_unary bw 0 369 + encode_zero_blocks bw bps id_len is_ref ref_sample 1 1 354 370 end 355 371 else if count = 0 then begin 356 - (* Block with only a reference sample - encode as zero block *) 357 - Bitwriter.write_bits bw (id_len + 1) 0; 358 - if is_ref then Bitwriter.write_bits bw bps ref_sample; 359 - Bitwriter.write_unary bw 0 372 + encode_zero_blocks bw bps id_len is_ref ref_sample 1 1 360 373 end 361 374 else begin 362 375 (* Try all split options and pick the best *) ··· 529 542 let block_idx = ref 0 in 530 543 while !block_idx < total_blocks do 531 544 (* Start of an RSI *) 532 - for b = 0 to blocks_per_rsi - 1 do 533 - if !block_idx + b < total_blocks then begin 534 - let global_block = !block_idx + b in 535 - let ofs = global_block * j in 536 - let is_ref = b = 0 in 537 - let ref_sample = if is_ref then samples.(ofs) else 0 in 538 - encode_ccsds_block bw residuals ofs j bps id_len is_ref ref_sample 545 + let b = ref 0 in 546 + while !b < blocks_per_rsi && !block_idx + !b < total_blocks do 547 + let global_block = !block_idx + !b in 548 + let ofs = global_block * j in 549 + let is_ref = !b = 0 in 550 + let ref_sample = if is_ref then samples.(ofs) else 0 in 551 + if is_zero_block residuals ofs j is_ref then begin 552 + (* Count consecutive zero blocks for aggregation *) 553 + let run = ref 1 in 554 + while 555 + !b + !run < blocks_per_rsi 556 + && !block_idx + !b + !run < total_blocks 557 + && is_zero_block residuals 558 + ((!block_idx + !b + !run) * j) 559 + j false 560 + do 561 + incr run 562 + done; 563 + let blocks_remaining = 564 + min blocks_per_rsi (total_blocks - !block_idx) - !b 565 + in 566 + encode_zero_blocks bw bps id_len is_ref ref_sample !run 567 + blocks_remaining; 568 + b := !b + !run 569 + end 570 + else begin 571 + encode_ccsds_block bw residuals ofs j bps id_len is_ref ref_sample; 572 + incr b 539 573 end 540 574 done; 541 575 block_idx := !block_idx + blocks_per_rsi
+1 -1
test/interop/libaec/dune
··· 1 1 (test 2 2 (name test) 3 - (libraries rice csvt astring alcotest) 3 + (libraries rice csvt alcotest) 4 4 (deps 5 5 (source_tree traces) 6 6 (source_tree scripts)))
+4 -18
test/interop/libaec/test.ml
··· 5 5 6 6 Each trace row contains raw samples compressed by libaec. The test 7 7 decompresses the libaec output with ocaml-rice and checks it recovers the 8 - original samples (the core interop guarantee). For non-constant data it 9 - also verifies byte-exact compression match. 10 - 11 - {b Known implementation differences (libaec 1.1):} 12 - 13 - - Constant-data blocks (all samples identical) produce different but 14 - spec-valid encodings. CCSDS 121.0-B-3 Section 5.1 allows multiple 15 - representations for zero-block coding. Both decode correctly. Compress 16 - tests are skipped for constant vectors. *) 8 + original samples, and compresses the same input with ocaml-rice and checks 9 + byte-exact match against libaec's output. *) 17 10 18 11 let trace path = Filename.concat "traces" path 19 12 ··· 60 53 | Ok rows -> rows 61 54 | Error e -> Alcotest.failf "CSV: %a" Csvt.pp_error e 62 55 63 - let is_constant vec = Astring.String.is_infix ~affix:"constant" vec.name 64 - 65 56 let test_compress vec () = 66 57 let cfg = 67 58 Rice.config ~block_size:vec.block_size ~bits_per_sample:vec.bits_per_sample ··· 93 84 Alcotest.run "rice-interop-libaec" 94 85 [ 95 86 ( "compress", 96 - List.filter_map 97 - (fun v -> 98 - (* Skip constant vectors: CCSDS 121.0 zero-block encoding is 99 - implementation-dependent. Both encodings are valid; only 100 - decompress interop matters. *) 101 - if is_constant v then None 102 - else Some (Alcotest.test_case v.name `Quick (test_compress v))) 87 + List.map 88 + (fun v -> Alcotest.test_case v.name `Quick (test_compress v)) 103 89 vectors ); 104 90 ( "decompress", 105 91 List.map