OCaml Zarr jsont codecs for v2/v3 and common conventions
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

feat: V2_node and V3_node with Attrs integration

Add V2_node and V3_node top-level modules (after Attrs to avoid ordering
issues), v2_array_jsont, v2_group_jsont, and v3_jsont codecs that
decode/encode full Zarr nodes with embedded convention attributes.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

+286
+129
src/zarr_jsont.ml
··· 1180 1180 | _ -> ()); 1181 1181 Jsont.Json.object' (List.rev !mems)) 1182 1182 Jsont.json 1183 + 1184 + module V2_node = struct 1185 + type t = { 1186 + kind : [ `Array of V2.Array_meta.t | `Group ]; 1187 + attrs : Attrs.t; 1188 + unknown : Jsont.json; 1189 + } 1190 + let kind t = t.kind 1191 + let attrs t = t.attrs 1192 + let unknown t = t.unknown 1193 + end 1194 + 1195 + module V3_node = struct 1196 + type t = { 1197 + kind : [ `Array of V3.Array_meta.t | `Group ]; 1198 + attrs : Attrs.t; 1199 + unknown : Jsont.json; 1200 + } 1201 + let kind t = t.kind 1202 + let attrs t = t.attrs 1203 + let unknown t = t.unknown 1204 + end 1205 + 1206 + let v2_array_jsont : V2_node.t Jsont.t = 1207 + Jsont.map ~kind:"V2.Array" 1208 + ~dec:(fun meta -> 1209 + V2_node.{ 1210 + kind = `Array meta; 1211 + attrs = Attrs.empty; 1212 + unknown = Jsont.Json.object' []; 1213 + }) 1214 + ~enc:(fun t -> 1215 + match t.V2_node.kind with 1216 + | `Array a -> a 1217 + | `Group -> failwith "v2_array_jsont: not an array") 1218 + V2.array_meta_jsont 1219 + 1220 + let v2_group_jsont : V2_node.t Jsont.t = 1221 + Jsont.map ~kind:"V2.Group" 1222 + ~dec:(fun _json -> 1223 + V2_node.{ 1224 + kind = `Group; 1225 + attrs = Attrs.empty; 1226 + unknown = Jsont.Json.object' []; 1227 + }) 1228 + ~enc:(fun _t -> 1229 + Jsont.Json.object' [ 1230 + (("zarr_format", Jsont.Meta.none), Jsont.Json.number 2.0); 1231 + ]) 1232 + Jsont.json 1233 + 1234 + let v3_jsont : V3_node.t Jsont.t = 1235 + let find_mem mems k = 1236 + List.find_map (fun ((n, _), v) -> if n = k then Some v else None) mems 1237 + in 1238 + Jsont.map ~kind:"V3.Node" 1239 + ~dec:(fun json -> 1240 + let mems = match json with 1241 + | Jsont.Object (m, _) -> m 1242 + | _ -> failwith "v3_jsont: expected object" 1243 + in 1244 + let node_type = match find_mem mems "node_type" with 1245 + | Some (Jsont.String (s, _)) -> s 1246 + | _ -> failwith "v3_jsont: missing node_type" 1247 + in 1248 + let attrs_val = 1249 + match find_mem mems "attributes" with 1250 + | Some attrs_json -> 1251 + (match Jsont.Json.decode attrs_jsont attrs_json with 1252 + | Ok a -> a 1253 + | Error _ -> Attrs.empty) 1254 + | None -> Attrs.empty 1255 + in 1256 + (* unknown: everything except the standard V3 node-level keys *) 1257 + let standard_keys = [ 1258 + "zarr_format"; "node_type"; "shape"; "data_type"; 1259 + "chunk_grid"; "chunk_key_encoding"; "codecs"; "fill_value"; 1260 + "dimension_names"; "storage_transformers"; "attributes"; 1261 + ] in 1262 + let unknown_mems = List.filter 1263 + (fun ((k, _), _) -> not (List.mem k standard_keys)) mems 1264 + in 1265 + let unknown_val = Jsont.Json.object' unknown_mems in 1266 + (match node_type with 1267 + | "array" -> 1268 + let arr = match Jsont.Json.decode V3.array_meta_jsont json with 1269 + | Ok a -> a 1270 + | Error e -> failwith ("v3_jsont: " ^ e) 1271 + in 1272 + V3_node.{ kind = `Array arr; attrs = attrs_val; unknown = unknown_val } 1273 + | "group" -> 1274 + V3_node.{ kind = `Group; attrs = attrs_val; unknown = unknown_val } 1275 + | s -> failwith ("v3_jsont: unknown node_type: " ^ s))) 1276 + ~enc:(fun (t : V3_node.t) -> 1277 + let attrs_json = 1278 + match Jsont.Json.encode attrs_jsont t.attrs with 1279 + | Ok j -> j 1280 + | Error e -> failwith ("v3_jsont enc attrs: " ^ e) 1281 + in 1282 + let has_attrs = match attrs_json with 1283 + | Jsont.Object ([], _) -> false 1284 + | _ -> true 1285 + in 1286 + match t.kind with 1287 + | `Array a -> 1288 + let arr_json = match Jsont.Json.encode V3.array_meta_jsont a with 1289 + | Ok j -> j 1290 + | Error e -> failwith ("v3_jsont enc array: " ^ e) 1291 + in 1292 + let arr_mems = match arr_json with 1293 + | Jsont.Object (m, _) -> m 1294 + | _ -> [] 1295 + in 1296 + let mems = if has_attrs then 1297 + arr_mems @ [(("attributes", Jsont.Meta.none), attrs_json)] 1298 + else arr_mems 1299 + in 1300 + Jsont.Json.object' mems 1301 + | `Group -> 1302 + let mems = [ 1303 + (("zarr_format", Jsont.Meta.none), Jsont.Json.number 3.0); 1304 + (("node_type", Jsont.Meta.none), Jsont.Json.string "group"); 1305 + ] in 1306 + let mems = if has_attrs then 1307 + mems @ [(("attributes", Jsont.Meta.none), attrs_json)] 1308 + else mems 1309 + in 1310 + Jsont.Json.object' mems) 1311 + Jsont.json
+44
src/zarr_jsont.mli
··· 373 373 appropriate sub-codec. On encode, auto-populates [zarr_conventions] from 374 374 whichever conventions are [Some], then merges their flat members back into 375 375 the object alongside [multiscales] and unknown keys. *) 376 + 377 + (** A Zarr v2 node (array or group) with associated attributes. 378 + 379 + Defined at the top level (not inside {!V2}) so it can reference {!Attrs.t}, 380 + which is declared after the V2 module. *) 381 + module V2_node : sig 382 + type t 383 + val kind : t -> [ `Array of V2.Array_meta.t | `Group ] 384 + val attrs : t -> Attrs.t 385 + val unknown : t -> Jsont.json 386 + end 387 + 388 + (** A Zarr v3 node (array or group) with associated attributes. 389 + 390 + Defined at the top level (not inside {!V3}) so it can reference {!Attrs.t}, 391 + which is declared after the V3 module. *) 392 + module V3_node : sig 393 + type t 394 + val kind : t -> [ `Array of V3.Array_meta.t | `Group ] 395 + val attrs : t -> Attrs.t 396 + val unknown : t -> Jsont.json 397 + end 398 + 399 + val v2_array_jsont : V2_node.t Jsont.t 400 + (** Codec for a Zarr v2 array node. 401 + 402 + Decodes the [.zarray] JSON object into a {!V2_node.t} with [`Array] kind. 403 + Attributes are not decoded by this codec; use {!attrs_jsont} separately if 404 + needed. *) 405 + 406 + val v2_group_jsont : V2_node.t Jsont.t 407 + (** Codec for a Zarr v2 group node. 408 + 409 + Decodes any JSON object into a {!V2_node.t} with [`Group] kind. 410 + Encodes as [{"zarr_format": 2}]. *) 411 + 412 + val v3_jsont : V3_node.t Jsont.t 413 + (** Codec for a Zarr v3 node (array or group). 414 + 415 + Dispatches on the ["node_type"] field. For arrays, decodes the full array 416 + metadata via {!V3.array_meta_jsont}; for groups, produces a [`Group] node. 417 + The ["attributes"] sub-object, if present, is decoded via {!attrs_jsont} and 418 + stored in the node's [attrs] field. On encode, attributes are re-inserted 419 + as the ["attributes"] member only when non-empty. *)
+113
test/test_zarr_jsont.ml
··· 349 349 assert (Zarr_jsont.Attrs.multiscales e = None); 350 350 print_endline "test_attrs: ok" 351 351 352 + let test_v2_node_array () = 353 + let json = {|{ 354 + "zarr_format": 2, 355 + "shape": [100, 100], 356 + "chunks": [10, 10], 357 + "dtype": "<i4", 358 + "compressor": {"id": "zlib", "level": 1}, 359 + "fill_value": 42, 360 + "order": "C", 361 + "filters": null 362 + }|} in 363 + let v = decode Zarr_jsont.v2_array_jsont json in 364 + (match Zarr_jsont.V2_node.kind v with 365 + | `Array a -> assert (Zarr_jsont.V2.Array_meta.shape a = [100; 100]) 366 + | `Group -> assert false); 367 + assert (Zarr_jsont.Attrs.proj (Zarr_jsont.V2_node.attrs v) = None); 368 + (* roundtrip *) 369 + let json' = encode Zarr_jsont.v2_array_jsont v in 370 + let v' = decode Zarr_jsont.v2_array_jsont json' in 371 + (match Zarr_jsont.V2_node.kind v' with 372 + | `Array a -> assert (Zarr_jsont.V2.Array_meta.shape a = [100; 100]) 373 + | `Group -> assert false); 374 + print_endline "test_v2_node_array: ok" 375 + 376 + let test_v2_group () = 377 + let json = {|{"zarr_format": 2}|} in 378 + let v = decode Zarr_jsont.v2_group_jsont json in 379 + assert (Zarr_jsont.V2_node.kind v = `Group); 380 + assert (Zarr_jsont.Attrs.conventions (Zarr_jsont.V2_node.attrs v) = []); 381 + (* roundtrip *) 382 + let json' = encode Zarr_jsont.v2_group_jsont v in 383 + let v' = decode Zarr_jsont.v2_group_jsont json' in 384 + assert (Zarr_jsont.V2_node.kind v' = `Group); 385 + print_endline "test_v2_group: ok" 386 + 387 + let test_v3_node_group () = 388 + let json = {|{ 389 + "zarr_format": 3, 390 + "node_type": "group", 391 + "attributes": { 392 + "proj:code": "EPSG:4326", 393 + "spatial:dimensions": ["Y", "X"] 394 + } 395 + }|} in 396 + let v = decode Zarr_jsont.v3_jsont json in 397 + assert (Zarr_jsont.V3_node.kind v = `Group); 398 + (match Zarr_jsont.Attrs.proj (Zarr_jsont.V3_node.attrs v) with 399 + | Some p -> assert (Zarr_jsont.Conv.Proj.code p = Some "EPSG:4326") 400 + | None -> assert false); 401 + (match Zarr_jsont.Attrs.spatial (Zarr_jsont.V3_node.attrs v) with 402 + | Some s -> assert (Zarr_jsont.Conv.Spatial.dimensions s = ["Y"; "X"]) 403 + | None -> assert false); 404 + (* roundtrip *) 405 + let json' = encode Zarr_jsont.v3_jsont v in 406 + let v' = decode Zarr_jsont.v3_jsont json' in 407 + assert (Zarr_jsont.V3_node.kind v' = `Group); 408 + (match Zarr_jsont.Attrs.proj (Zarr_jsont.V3_node.attrs v') with 409 + | Some p -> assert (Zarr_jsont.Conv.Proj.code p = Some "EPSG:4326") 410 + | None -> assert false); 411 + print_endline "test_v3_node_group: ok" 412 + 413 + let test_v3_node_array () = 414 + let json = {|{ 415 + "zarr_format": 3, 416 + "node_type": "array", 417 + "shape": [100], 418 + "data_type": "int32", 419 + "chunk_grid": {"name": "regular", "configuration": {"chunk_shape": [10]}}, 420 + "chunk_key_encoding": {"name": "default", "configuration": {"separator": "/"}}, 421 + "codecs": [{"name": "bytes", "configuration": {"endian": "little"}}], 422 + "fill_value": 0 423 + }|} in 424 + let v = decode Zarr_jsont.v3_jsont json in 425 + (match Zarr_jsont.V3_node.kind v with 426 + | `Array a -> assert (Zarr_jsont.V3.Array_meta.shape a = [100]) 427 + | `Group -> assert false); 428 + assert (Zarr_jsont.Attrs.proj (Zarr_jsont.V3_node.attrs v) = None); 429 + (* roundtrip *) 430 + let json' = encode Zarr_jsont.v3_jsont v in 431 + let v' = decode Zarr_jsont.v3_jsont json' in 432 + (match Zarr_jsont.V3_node.kind v' with 433 + | `Array a -> assert (Zarr_jsont.V3.Array_meta.shape a = [100]) 434 + | `Group -> assert false); 435 + print_endline "test_v3_node_array: ok" 436 + 437 + let test_v3_node_array_with_attrs () = 438 + let json = {|{ 439 + "zarr_format": 3, 440 + "node_type": "array", 441 + "shape": [50, 50], 442 + "data_type": "float64", 443 + "chunk_grid": {"name": "regular", "configuration": {"chunk_shape": [10, 10]}}, 444 + "chunk_key_encoding": {"name": "default", "configuration": {"separator": "/"}}, 445 + "codecs": [{"name": "bytes", "configuration": {"endian": "little"}}], 446 + "fill_value": "NaN", 447 + "attributes": { 448 + "proj:code": "EPSG:4326" 449 + } 450 + }|} in 451 + let v = decode Zarr_jsont.v3_jsont json in 452 + (match Zarr_jsont.V3_node.kind v with 453 + | `Array a -> assert (Zarr_jsont.V3.Array_meta.shape a = [50; 50]) 454 + | `Group -> assert false); 455 + (match Zarr_jsont.Attrs.proj (Zarr_jsont.V3_node.attrs v) with 456 + | Some p -> assert (Zarr_jsont.Conv.Proj.code p = Some "EPSG:4326") 457 + | None -> assert false); 458 + print_endline "test_v3_node_array_with_attrs: ok" 459 + 352 460 let () = test_other_codec () 353 461 let () = test_other_ext () 354 462 let () = test_fill_value () ··· 363 471 let () = test_conv_spatial () 364 472 let () = test_conv_multiscales () 365 473 let () = test_attrs () 474 + let () = test_v2_node_array () 475 + let () = test_v2_group () 476 + let () = test_v3_node_group () 477 + let () = test_v3_node_array () 478 + let () = test_v3_node_array_with_attrs ()