OCaml Zarr jsont codecs for v2/v3 and common conventions
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

feat: V3 data_type, chunk_grid, chunk_key_encoding, array_meta

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

+349
+244
src/zarr_jsont.ml
··· 447 447 ) 448 448 449 449 let codec_jsont : codec Jsont.t = Jsont.rec' codec_jsont_lazy 450 + 451 + (* Shared helpers for name-dispatched object codecs *) 452 + let find_name mems = 453 + List.find_map (fun ((name, _), value) -> 454 + if name = "name" then match value with 455 + | Jsont.String (s, _) -> Some s 456 + | _ -> None 457 + else None) mems 458 + 459 + let find_config mems = 460 + List.find_map (fun ((name, _), value) -> 461 + if name = "configuration" then Some value else None) mems 462 + 463 + let decode_config codec_t config_json = 464 + match Jsont.Json.decode codec_t config_json with 465 + | Ok v -> v | Error e -> failwith e 466 + 467 + let encode_named name config_json = 468 + let mems = [ (("name", Jsont.Meta.none), Jsont.Json.string name) ] in 469 + let mems = match config_json with 470 + | None -> mems 471 + | Some c -> mems @ [ (("configuration", Jsont.Meta.none), c) ] 472 + in 473 + Jsont.Json.object' mems 474 + 475 + let encode_config codec_t v = 476 + match Jsont.Json.encode codec_t v with 477 + | Ok j -> j | Error e -> failwith e 478 + 479 + let decode_other_ext json = 480 + match Jsont.Json.decode Other_ext.jsont json with 481 + | Ok o -> o | Error e -> failwith e 482 + 483 + let encode_other_ext o = 484 + match Jsont.Json.encode Other_ext.jsont o with 485 + | Ok j -> j | Error e -> failwith e 486 + 487 + module Data_type = struct 488 + type t = [ 489 + | `Bool | `Int8 | `Int16 | `Int32 | `Int64 490 + | `Uint8 | `Uint16 | `Uint32 | `Uint64 491 + | `Float16 | `Float32 | `Float64 492 + | `Complex64 | `Complex128 493 + | `Raw of int 494 + | `Other of Other_ext.t 495 + ] 496 + end 497 + 498 + let data_type_jsont : Data_type.t Jsont.t = 499 + let string_table = [ 500 + "bool", (`Bool : Data_type.t); 501 + "int8", `Int8; 502 + "int16", `Int16; 503 + "int32", `Int32; 504 + "int64", `Int64; 505 + "uint8", `Uint8; 506 + "uint16", `Uint16; 507 + "uint32", `Uint32; 508 + "uint64", `Uint64; 509 + "float16", `Float16; 510 + "float32", `Float32; 511 + "float64", `Float64; 512 + "complex64", `Complex64; 513 + "complex128", `Complex128; 514 + ] in 515 + let enc_string : Data_type.t -> string = function 516 + | `Bool -> "bool" 517 + | `Int8 -> "int8" 518 + | `Int16 -> "int16" 519 + | `Int32 -> "int32" 520 + | `Int64 -> "int64" 521 + | `Uint8 -> "uint8" 522 + | `Uint16 -> "uint16" 523 + | `Uint32 -> "uint32" 524 + | `Uint64 -> "uint64" 525 + | `Float16 -> "float16" 526 + | `Float32 -> "float32" 527 + | `Float64 -> "float64" 528 + | `Complex64 -> "complex64" 529 + | `Complex128 -> "complex128" 530 + | `Raw n -> Printf.sprintf "r%d" n 531 + | `Other _ -> assert false 532 + in 533 + let dec_string = 534 + Jsont.map ~kind:"V3.Data_type.string" 535 + ~dec:(fun s -> 536 + match List.assoc_opt s string_table with 537 + | Some v -> v 538 + | None -> 539 + (* Check for r<bits> raw type *) 540 + if String.length s >= 2 && s.[0] = 'r' then 541 + let bits_str = String.sub s 1 (String.length s - 1) in 542 + (try `Raw (int_of_string bits_str) 543 + with _ -> failwith (Printf.sprintf "V3.Data_type: unknown type %s" s)) 544 + else 545 + failwith (Printf.sprintf "V3.Data_type: unknown type %s" s)) 546 + ~enc:enc_string 547 + Jsont.string 548 + in 549 + let dec_object = 550 + Jsont.map ~kind:"V3.Data_type.object" 551 + ~dec:(fun json -> `Other (decode_other_ext json)) 552 + ~enc:(function 553 + | `Other o -> encode_other_ext o 554 + | _ -> assert false) 555 + Jsont.json 556 + in 557 + Jsont.any ~kind:"V3.Data_type" 558 + ~dec_string 559 + ~dec_object 560 + ~enc:(function 561 + | `Other _ -> dec_object 562 + | v -> ignore v; dec_string) 563 + () 564 + 565 + module Chunk_grid = struct 566 + module Regular = struct 567 + type t = { chunk_shape : int list } 568 + let chunk_shape t = t.chunk_shape 569 + let jsont = 570 + Jsont.Object.map ~kind:"Regular.config" (fun cs -> { chunk_shape = cs }) 571 + |> Jsont.Object.mem "chunk_shape" (Jsont.list Jsont.int) ~enc:(fun t -> t.chunk_shape) 572 + |> Jsont.Object.skip_unknown 573 + |> Jsont.Object.finish 574 + end 575 + 576 + type t = [ `Regular of Regular.t | `Other of Other_ext.t ] 577 + end 578 + 579 + let chunk_grid_jsont : Chunk_grid.t Jsont.t = 580 + Jsont.map ~kind:"V3.Chunk_grid" 581 + ~dec:(fun json -> 582 + match json with 583 + | Jsont.Object (mems, _) -> 584 + let name = match find_name mems with 585 + | Some n -> n 586 + | None -> failwith "chunk_grid: missing name" 587 + in 588 + let config = find_config mems in 589 + (match name with 590 + | "regular" -> 591 + `Regular (decode_config Chunk_grid.Regular.jsont (Option.get config)) 592 + | _ -> `Other (decode_other_ext json)) 593 + | _ -> failwith "chunk_grid: expected object") 594 + ~enc:(function 595 + | `Regular r -> 596 + encode_named "regular" (Some (encode_config Chunk_grid.Regular.jsont r)) 597 + | `Other o -> encode_other_ext o) 598 + Jsont.json 599 + 600 + module Chunk_key_encoding = struct 601 + module Default = struct 602 + type t = { separator : [ `Slash | `Dot ] } 603 + let separator t = t.separator 604 + let separator_jsont = 605 + Jsont.enum ~kind:"separator" ["/", `Slash; ".", `Dot] 606 + let jsont = 607 + Jsont.Object.map ~kind:"Default.config" (fun sep -> { separator = sep }) 608 + |> Jsont.Object.mem "separator" separator_jsont ~enc:(fun t -> t.separator) 609 + |> Jsont.Object.skip_unknown 610 + |> Jsont.Object.finish 611 + end 612 + 613 + type t = [ `Default of Default.t | `Other of Other_ext.t ] 614 + end 615 + 616 + let chunk_key_encoding_jsont : Chunk_key_encoding.t Jsont.t = 617 + Jsont.map ~kind:"V3.Chunk_key_encoding" 618 + ~dec:(fun json -> 619 + match json with 620 + | Jsont.Object (mems, _) -> 621 + let name = match find_name mems with 622 + | Some n -> n 623 + | None -> failwith "chunk_key_encoding: missing name" 624 + in 625 + let config = find_config mems in 626 + (match name with 627 + | "default" -> 628 + `Default (decode_config Chunk_key_encoding.Default.jsont (Option.get config)) 629 + | _ -> `Other (decode_other_ext json)) 630 + | _ -> failwith "chunk_key_encoding: expected object") 631 + ~enc:(function 632 + | `Default d -> 633 + encode_named "default" (Some (encode_config Chunk_key_encoding.Default.jsont d)) 634 + | `Other o -> encode_other_ext o) 635 + Jsont.json 636 + 637 + module Array_meta = struct 638 + type t = { 639 + shape : int list; 640 + data_type : Data_type.t; 641 + chunk_grid : Chunk_grid.t; 642 + chunk_key_encoding : Chunk_key_encoding.t; 643 + codecs : codec list; 644 + fill_value : fill_value; 645 + dimension_names : string option list option; 646 + storage_transformers : Other_ext.t list option; 647 + unknown : Jsont.json; 648 + } 649 + 650 + let shape t = t.shape 651 + let data_type t = t.data_type 652 + let chunk_grid t = t.chunk_grid 653 + let chunk_key_encoding t = t.chunk_key_encoding 654 + let codecs t = t.codecs 655 + let fill_value t = t.fill_value 656 + let dimension_names t = t.dimension_names 657 + let storage_transformers t = t.storage_transformers 658 + let unknown t = t.unknown 659 + end 660 + 661 + let array_meta_jsont : Array_meta.t Jsont.t = 662 + Jsont.Object.map ~kind:"V3.Array_meta" 663 + (fun _zarr_format _node_type sh dt cg cke cs fv dn st _attrs unk -> 664 + Array_meta.{ 665 + shape = sh; data_type = dt; chunk_grid = cg; chunk_key_encoding = cke; 666 + codecs = cs; fill_value = fv; dimension_names = dn; storage_transformers = st; 667 + unknown = unk; 668 + }) 669 + |> Jsont.Object.mem "zarr_format" Jsont.int ~enc:(fun _ -> 3) 670 + |> Jsont.Object.mem "node_type" Jsont.string ~enc:(fun _ -> "array") 671 + |> Jsont.Object.mem "shape" (Jsont.list Jsont.int) 672 + ~enc:(fun (t : Array_meta.t) -> t.shape) 673 + |> Jsont.Object.mem "data_type" data_type_jsont 674 + ~enc:(fun (t : Array_meta.t) -> t.data_type) 675 + |> Jsont.Object.mem "chunk_grid" chunk_grid_jsont 676 + ~enc:(fun (t : Array_meta.t) -> t.chunk_grid) 677 + |> Jsont.Object.mem "chunk_key_encoding" chunk_key_encoding_jsont 678 + ~enc:(fun (t : Array_meta.t) -> t.chunk_key_encoding) 679 + |> Jsont.Object.mem "codecs" (Jsont.list codec_jsont) 680 + ~enc:(fun (t : Array_meta.t) -> t.codecs) 681 + |> Jsont.Object.mem "fill_value" fill_value_jsont 682 + ~enc:(fun (t : Array_meta.t) -> t.fill_value) 683 + |> Jsont.Object.opt_mem "dimension_names" 684 + (Jsont.list (Jsont.option Jsont.string)) 685 + ~enc:(fun (t : Array_meta.t) -> t.dimension_names) 686 + |> Jsont.Object.opt_mem "storage_transformers" 687 + (Jsont.list Other_ext.jsont) 688 + ~enc:(fun (t : Array_meta.t) -> t.storage_transformers) 689 + |> Jsont.Object.opt_mem "attributes" Jsont.json 690 + ~enc:(fun _ -> None) 691 + |> Jsont.Object.keep_unknown Jsont.json_mems 692 + ~enc:(fun (t : Array_meta.t) -> t.unknown) 693 + |> Jsont.Object.finish 450 694 end 451 695 452 696 module V2 = struct
+66
src/zarr_jsont.mli
··· 130 130 val codec_jsont : codec Jsont.t 131 131 (** Codec for {!codec}. Dispatches on the ["name"] field. 132 132 Sharding codecs are decoded recursively. *) 133 + 134 + (** V3 data type. Either a core named type (string) or an extension (object). *) 135 + module Data_type : sig 136 + type t = [ 137 + | `Bool | `Int8 | `Int16 | `Int32 | `Int64 138 + | `Uint8 | `Uint16 | `Uint32 | `Uint64 139 + | `Float16 | `Float32 | `Float64 140 + | `Complex64 | `Complex128 141 + | `Raw of int 142 + | `Other of Other_ext.t 143 + ] 144 + end 145 + 146 + val data_type_jsont : Data_type.t Jsont.t 147 + (** Codec for {!Data_type.t}. Core types decode from JSON strings; 148 + extension types decode from JSON objects. The [r<bits>] pattern 149 + decodes as [`Raw bits]. *) 150 + 151 + (** V3 chunk grid specification. *) 152 + module Chunk_grid : sig 153 + (** Regular (fixed-shape) chunk grid. *) 154 + module Regular : sig 155 + type t 156 + val chunk_shape : t -> int list 157 + end 158 + 159 + type t = [ `Regular of Regular.t | `Other of Other_ext.t ] 160 + end 161 + 162 + val chunk_grid_jsont : Chunk_grid.t Jsont.t 163 + (** Codec for {!Chunk_grid.t}. Dispatches on the ["name"] field. *) 164 + 165 + (** V3 chunk key encoding specification. *) 166 + module Chunk_key_encoding : sig 167 + (** Default chunk key encoding with configurable separator. *) 168 + module Default : sig 169 + type t 170 + val separator : t -> [ `Slash | `Dot ] 171 + end 172 + 173 + type t = [ `Default of Default.t | `Other of Other_ext.t ] 174 + end 175 + 176 + val chunk_key_encoding_jsont : Chunk_key_encoding.t Jsont.t 177 + (** Codec for {!Chunk_key_encoding.t}. Dispatches on the ["name"] field. *) 178 + 179 + (** Complete v3 array metadata. *) 180 + module Array_meta : sig 181 + type t 182 + val shape : t -> int list 183 + val data_type : t -> Data_type.t 184 + val chunk_grid : t -> Chunk_grid.t 185 + val chunk_key_encoding : t -> Chunk_key_encoding.t 186 + val codecs : t -> codec list 187 + val fill_value : t -> fill_value 188 + val dimension_names : t -> string option list option 189 + val storage_transformers : t -> Other_ext.t list option 190 + val unknown : t -> Jsont.json 191 + end 192 + 193 + val array_meta_jsont : Array_meta.t Jsont.t 194 + (** Codec for {!Array_meta.t}. Decodes and encodes the full v3 array 195 + metadata object. The ["zarr_format"] and ["node_type"] fields are 196 + consumed on decode and always written as [3] / ["array"] on encode. 197 + Dimension names may be absent or contain null entries. 198 + Unknown fields are preserved. *) 133 199 end 134 200 135 201 (** Zarr v2 compressor and filter codecs. *)
+39
test/test_zarr_jsont.ml
··· 236 236 | _ -> assert false); 237 237 print_endline "test_v3_codecs: ok" 238 238 239 + let test_v3_data_type () = 240 + let dt = Zarr_jsont.V3.data_type_jsont in 241 + assert (decode dt {|"float64"|} = `Float64); 242 + assert (decode dt {|"bool"|} = `Bool); 243 + assert (decode dt {|"int32"|} = `Int32); 244 + assert (decode dt {|"r16"|} = `Raw 16); 245 + let v = decode dt {|{"name":"datetime","configuration":{"unit":"ns"}}|} in 246 + (match v with 247 + | `Other o -> assert (Zarr_jsont.Other_ext.name o = "datetime") 248 + | _ -> assert false); 249 + print_endline "test_v3_data_type: ok" 250 + 251 + let test_v3_array_meta () = 252 + let json = {|{ 253 + "zarr_format": 3, 254 + "node_type": "array", 255 + "shape": [10000, 1000], 256 + "dimension_names": ["rows", "columns"], 257 + "data_type": "float64", 258 + "chunk_grid": {"name": "regular", "configuration": {"chunk_shape": [1000, 100]}}, 259 + "chunk_key_encoding": {"name": "default", "configuration": {"separator": "/"}}, 260 + "codecs": [{"name": "bytes", "configuration": {"endian": "little"}}], 261 + "fill_value": "NaN", 262 + "attributes": {"foo": 42} 263 + }|} in 264 + let v = decode Zarr_jsont.V3.array_meta_jsont json in 265 + assert (Zarr_jsont.V3.Array_meta.shape v = [10000; 1000]); 266 + assert (Zarr_jsont.V3.Array_meta.data_type v = `Float64); 267 + assert (Zarr_jsont.V3.Array_meta.dimension_names v = Some [Some "rows"; Some "columns"]); 268 + (match Zarr_jsont.V3.Array_meta.chunk_grid v with 269 + | `Regular r -> assert (Zarr_jsont.V3.Chunk_grid.Regular.chunk_shape r = [1000; 100]) 270 + | _ -> assert false); 271 + (match Zarr_jsont.V3.Array_meta.chunk_key_encoding v with 272 + | `Default d -> assert (Zarr_jsont.V3.Chunk_key_encoding.Default.separator d = `Slash) 273 + | _ -> assert false); 274 + print_endline "test_v3_array_meta: ok" 275 + 239 276 let () = test_other_codec () 240 277 let () = test_other_ext () 241 278 let () = test_fill_value () ··· 244 281 let () = test_v2_filter () 245 282 let () = test_v2_array () 246 283 let () = test_v3_codecs () 284 + let () = test_v3_data_type () 285 + let () = test_v3_array_meta ()