OCaml Zarr jsont codecs for v2/v3 and common conventions
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

feat: V3 codec types (bytes, gzip, blosc, crc32c, transpose, sharding)

Add V3 module with all six core zarr v3 codec types plus Other_ext fallback.
Sharding is recursive via Jsont.rec' with a lazy value referencing codec_jsont_lazy.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

+286
+184
src/zarr_jsont.ml
··· 265 265 |> Jsont.Object.finish 266 266 end 267 267 268 + module V3 = struct 269 + module Codec = struct 270 + module Bytes = struct 271 + type t = { endian : [ `Little | `Big ] } 272 + let endian t = t.endian 273 + let endian_jsont = Jsont.enum ~kind:"endian" ["little", `Little; "big", `Big] 274 + let jsont = 275 + Jsont.Object.map ~kind:"Bytes.config" (fun e -> { endian = e }) 276 + |> Jsont.Object.mem "endian" endian_jsont ~enc:(fun t -> t.endian) 277 + |> Jsont.Object.skip_unknown 278 + |> Jsont.Object.finish 279 + end 280 + 281 + module Gzip = struct 282 + type t = { level : int } 283 + let level t = t.level 284 + let jsont = 285 + Jsont.Object.map ~kind:"Gzip.config" (fun l -> { level = l }) 286 + |> Jsont.Object.mem "level" Jsont.int ~enc:(fun t -> t.level) 287 + |> Jsont.Object.skip_unknown 288 + |> Jsont.Object.finish 289 + end 290 + 291 + module Blosc = struct 292 + type t = { 293 + cname : string; 294 + clevel : int; 295 + shuffle : [ `Noshuffle | `Shuffle | `Bitshuffle ]; 296 + typesize : int option; 297 + blocksize : int; 298 + } 299 + let cname t = t.cname 300 + let clevel t = t.clevel 301 + let shuffle t = t.shuffle 302 + let typesize t = t.typesize 303 + let blocksize t = t.blocksize 304 + let shuffle_jsont = Jsont.enum ~kind:"blosc_shuffle" 305 + ["noshuffle", `Noshuffle; "shuffle", `Shuffle; "bitshuffle", `Bitshuffle] 306 + let jsont = 307 + Jsont.Object.map ~kind:"Blosc.config" 308 + (fun cn cl sh ts bs -> { cname = cn; clevel = cl; shuffle = sh; typesize = ts; blocksize = bs }) 309 + |> Jsont.Object.mem "cname" Jsont.string ~enc:(fun t -> t.cname) 310 + |> Jsont.Object.mem "clevel" Jsont.int ~enc:(fun t -> t.clevel) 311 + |> Jsont.Object.mem "shuffle" shuffle_jsont ~enc:(fun t -> t.shuffle) 312 + |> Jsont.Object.opt_mem "typesize" Jsont.int ~enc:(fun t -> t.typesize) 313 + |> Jsont.Object.mem "blocksize" Jsont.int ~enc:(fun t -> t.blocksize) 314 + |> Jsont.Object.skip_unknown 315 + |> Jsont.Object.finish 316 + end 317 + 318 + module Transpose = struct 319 + type t = { order : int list } 320 + let order t = t.order 321 + let jsont = 322 + Jsont.Object.map ~kind:"Transpose.config" (fun o -> { order = o }) 323 + |> Jsont.Object.mem "order" (Jsont.list Jsont.int) ~enc:(fun t -> t.order) 324 + |> Jsont.Object.skip_unknown 325 + |> Jsont.Object.finish 326 + end 327 + 328 + module Sharding = struct 329 + type t = { 330 + chunk_shape : int list; 331 + codecs : codec list; 332 + index_codecs : codec list; 333 + index_location : [ `Start | `End ]; 334 + } 335 + and codec = [ 336 + | `Bytes of Bytes.t 337 + | `Gzip of Gzip.t 338 + | `Blosc of Blosc.t 339 + | `Crc32c 340 + | `Transpose of Transpose.t 341 + | `Sharding of t 342 + | `Other of Other_ext.t 343 + ] 344 + let chunk_shape t = t.chunk_shape 345 + let codecs t = t.codecs 346 + let index_codecs t = t.index_codecs 347 + let index_location t = t.index_location 348 + end 349 + end 350 + 351 + type codec = Codec.Sharding.codec 352 + 353 + let rec codec_jsont_lazy : codec Jsont.t Lazy.t = lazy ( 354 + let find_name mems = 355 + List.find_map (fun ((name, _), value) -> 356 + if name = "name" then match value with 357 + | Jsont.String (s, _) -> Some s 358 + | _ -> None 359 + else None) mems 360 + in 361 + let find_config mems = 362 + List.find_map (fun ((name, _), value) -> 363 + if name = "configuration" then Some value else None) mems 364 + in 365 + let decode_config codec_t config_json = 366 + match Jsont.Json.decode codec_t config_json with 367 + | Ok v -> v | Error e -> failwith e 368 + in 369 + let encode_named name config_json = 370 + let mems = [ (("name", Jsont.Meta.none), Jsont.Json.string name) ] in 371 + let mems = match config_json with 372 + | None -> mems 373 + | Some c -> mems @ [ (("configuration", Jsont.Meta.none), c) ] 374 + in 375 + Jsont.Json.object' mems 376 + in 377 + let encode_config codec_t v = 378 + match Jsont.Json.encode codec_t v with 379 + | Ok j -> j | Error e -> failwith e 380 + in 381 + let index_location_jsont = 382 + Jsont.enum ~kind:"index_location" ["start", `Start; "end", `End] 383 + in 384 + let sharding_config_jsont = 385 + let codec_list = Jsont.list (Jsont.rec' codec_jsont_lazy) in 386 + Jsont.Object.map ~kind:"Sharding.config" 387 + (fun cs cc ic il -> 388 + Codec.Sharding.{ chunk_shape = cs; codecs = cc; index_codecs = ic; index_location = il }) 389 + |> Jsont.Object.mem "chunk_shape" (Jsont.list Jsont.int) 390 + ~enc:(fun (t : Codec.Sharding.t) -> t.chunk_shape) 391 + |> Jsont.Object.mem "codecs" codec_list 392 + ~enc:(fun (t : Codec.Sharding.t) -> t.codecs) 393 + |> Jsont.Object.mem "index_codecs" codec_list 394 + ~dec_absent:[] 395 + ~enc:(fun (t : Codec.Sharding.t) -> t.index_codecs) 396 + ~enc_omit:(fun l -> l = []) 397 + |> Jsont.Object.mem "index_location" index_location_jsont 398 + ~dec_absent:`End 399 + ~enc:(fun (t : Codec.Sharding.t) -> t.index_location) 400 + ~enc_omit:(fun v -> v = `End) 401 + |> Jsont.Object.skip_unknown 402 + |> Jsont.Object.finish 403 + in 404 + Jsont.map ~kind:"V3.Codec" 405 + ~dec:(fun json -> 406 + match json with 407 + | Jsont.Object (mems, _) -> 408 + let name = match find_name mems with 409 + | Some n -> n 410 + | None -> failwith "codec: missing name" 411 + in 412 + let config = find_config mems in 413 + (match name with 414 + | "bytes" -> 415 + `Bytes (decode_config Codec.Bytes.jsont (Option.get config)) 416 + | "gzip" -> 417 + `Gzip (decode_config Codec.Gzip.jsont (Option.get config)) 418 + | "blosc" -> 419 + `Blosc (decode_config Codec.Blosc.jsont (Option.get config)) 420 + | "crc32c" -> `Crc32c 421 + | "transpose" -> 422 + `Transpose (decode_config Codec.Transpose.jsont (Option.get config)) 423 + | "sharding_indexed" -> 424 + `Sharding (decode_config sharding_config_jsont (Option.get config)) 425 + | _ -> 426 + (match Jsont.Json.decode Other_ext.jsont json with 427 + | Ok o -> `Other o 428 + | Error e -> failwith e)) 429 + | _ -> failwith "codec: expected object") 430 + ~enc:(function 431 + | `Bytes b -> 432 + encode_named "bytes" (Some (encode_config Codec.Bytes.jsont b)) 433 + | `Gzip g -> 434 + encode_named "gzip" (Some (encode_config Codec.Gzip.jsont g)) 435 + | `Blosc b -> 436 + encode_named "blosc" (Some (encode_config Codec.Blosc.jsont b)) 437 + | `Crc32c -> 438 + encode_named "crc32c" None 439 + | `Transpose t -> 440 + encode_named "transpose" (Some (encode_config Codec.Transpose.jsont t)) 441 + | `Sharding s -> 442 + encode_named "sharding_indexed" (Some (encode_config sharding_config_jsont s)) 443 + | `Other o -> 444 + (match Jsont.Json.encode Other_ext.jsont o with 445 + | Ok j -> j | Error e -> failwith e)) 446 + Jsont.json 447 + ) 448 + 449 + let codec_jsont : codec Jsont.t = Jsont.rec' codec_jsont_lazy 450 + end 451 + 268 452 module V2 = struct 269 453 module Compressor = struct 270 454 module Blosc = struct
+59
src/zarr_jsont.mli
··· 73 73 val jsont : t Jsont.t 74 74 end 75 75 76 + (** Zarr v3 codec types. *) 77 + module V3 : sig 78 + (** Typed sub-codecs for known v3 codecs. *) 79 + module Codec : sig 80 + (** Bytes codec configuration. *) 81 + module Bytes : sig 82 + type t 83 + val endian : t -> [ `Little | `Big ] 84 + end 85 + 86 + (** Gzip codec configuration. *) 87 + module Gzip : sig 88 + type t 89 + val level : t -> int 90 + end 91 + 92 + (** Blosc codec configuration. *) 93 + module Blosc : sig 94 + type t 95 + val cname : t -> string 96 + val clevel : t -> int 97 + val shuffle : t -> [ `Noshuffle | `Shuffle | `Bitshuffle ] 98 + val typesize : t -> int option 99 + val blocksize : t -> int 100 + end 101 + 102 + (** Transpose codec configuration. *) 103 + module Transpose : sig 104 + type t 105 + val order : t -> int list 106 + end 107 + 108 + (** Sharding codec configuration. *) 109 + module Sharding : sig 110 + type t 111 + and codec = [ 112 + | `Bytes of Bytes.t 113 + | `Gzip of Gzip.t 114 + | `Blosc of Blosc.t 115 + | `Crc32c 116 + | `Transpose of Transpose.t 117 + | `Sharding of t 118 + | `Other of Other_ext.t 119 + ] 120 + val chunk_shape : t -> int list 121 + val codecs : t -> codec list 122 + val index_codecs : t -> codec list 123 + val index_location : t -> [ `Start | `End ] 124 + end 125 + end 126 + 127 + (** A v3 codec: bytes, gzip, blosc, crc32c, transpose, sharding, or a catch-all. *) 128 + type codec = Codec.Sharding.codec 129 + 130 + val codec_jsont : codec Jsont.t 131 + (** Codec for {!codec}. Dispatches on the ["name"] field. 132 + Sharding codecs are decoded recursively. *) 133 + end 134 + 76 135 (** Zarr v2 compressor and filter codecs. *) 77 136 module V2 : sig 78 137 (** Typed sub-codecs for known v2 compressors. *)
+43
test/test_zarr_jsont.ml
··· 194 194 assert (Zarr_jsont.V2.Array_meta.filters v2 = None); 195 195 print_endline "test_v2_array: ok" 196 196 197 + let test_v3_codecs () = 198 + let c = Zarr_jsont.V3.codec_jsont in 199 + (* bytes *) 200 + let v = decode c {|{"name":"bytes","configuration":{"endian":"little"}}|} in 201 + (match v with 202 + | `Bytes b -> assert (Zarr_jsont.V3.Codec.Bytes.endian b = `Little) 203 + | _ -> assert false); 204 + (* gzip *) 205 + let v = decode c {|{"name":"gzip","configuration":{"level":5}}|} in 206 + (match v with 207 + | `Gzip g -> assert (Zarr_jsont.V3.Codec.Gzip.level g = 5) 208 + | _ -> assert false); 209 + (* blosc *) 210 + let v = decode c {|{"name":"blosc","configuration":{"cname":"lz4","clevel":5,"shuffle":"shuffle","typesize":4,"blocksize":0}}|} in 211 + (match v with 212 + | `Blosc b -> 213 + assert (Zarr_jsont.V3.Codec.Blosc.cname b = "lz4"); 214 + assert (Zarr_jsont.V3.Codec.Blosc.shuffle b = `Shuffle) 215 + | _ -> assert false); 216 + (* crc32c *) 217 + let v = decode c {|{"name":"crc32c"}|} in 218 + assert (v = `Crc32c); 219 + (* transpose *) 220 + let v = decode c {|{"name":"transpose","configuration":{"order":[1,0,2]}}|} in 221 + (match v with 222 + | `Transpose t -> assert (Zarr_jsont.V3.Codec.Transpose.order t = [1;0;2]) 223 + | _ -> assert false); 224 + (* unknown *) 225 + let v = decode c {|{"name":"zstd","configuration":{"level":3}}|} in 226 + (match v with 227 + | `Other o -> assert (Zarr_jsont.Other_ext.name o = "zstd") 228 + | _ -> assert false); 229 + (* sharding *) 230 + let v = decode c {|{"name":"sharding_indexed","configuration":{"chunk_shape":[32,32],"codecs":[{"name":"bytes","configuration":{"endian":"little"}}],"index_location":"end"}}|} in 231 + (match v with 232 + | `Sharding s -> 233 + assert (Zarr_jsont.V3.Codec.Sharding.chunk_shape s = [32; 32]); 234 + assert (List.length (Zarr_jsont.V3.Codec.Sharding.codecs s) = 1); 235 + assert (Zarr_jsont.V3.Codec.Sharding.index_location s = `End) 236 + | _ -> assert false); 237 + print_endline "test_v3_codecs: ok" 238 + 197 239 let () = test_other_codec () 198 240 let () = test_other_ext () 199 241 let () = test_fill_value () ··· 201 243 let () = test_v2_compressor () 202 244 let () = test_v2_filter () 203 245 let () = test_v2_array () 246 + let () = test_v3_codecs ()