OCaml Zarr jsont codecs for v2/v3 and common conventions
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

feat: consolidated metadata support (v2 .zmetadata + v3 inline)

Add Consolidated module for v3 inline consolidated_metadata field
(zarr-specs PR 309) and V2_consolidated module for v2 .zmetadata files.

The probe function now checks for consolidated metadata:
- V3: parses consolidated_metadata from zarr.json if present
- V2: reads .zmetadata if present alongside .zgroup

The zarr-inspect CLI displays the consolidated node listing.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

+278 -25
+44 -17
bin/zarr_inspect.ml
··· 42 42 let pp_json ppf json = 43 43 Jsont.pp_json ppf json 44 44 45 - let pretty_print_result { Zarr_jsont.node; attrs } = 46 - let encode_and_print codec v label = 47 - match Jsont.Json.encode codec v with 48 - | Ok json -> 49 - Printf.printf "── %s ──\n" label; 50 - Format.printf "%a@." pp_json json 51 - | Error e -> 52 - Printf.eprintf "Error encoding %s: %s\n" label e 53 - in 45 + let encode_and_print codec v label = 46 + match Jsont.Json.encode codec v with 47 + | Ok json -> 48 + Printf.printf "── %s ──\n" label; 49 + Format.printf "%a@." pp_json json 50 + | Error e -> 51 + Printf.eprintf "Error encoding %s: %s\n" label e 52 + 53 + let pretty_print_consolidated = function 54 + | `V3 c -> 55 + let entries = Zarr_jsont.Consolidated.metadata c in 56 + Printf.printf "\n── consolidated metadata (%d nodes) ──\n" 57 + (List.length entries); 58 + List.iter (fun (path, node) -> 59 + let kind = match Zarr_jsont.V3_node.kind node with 60 + | `Array _ -> "array" | `Group -> "group" 61 + in 62 + Printf.printf " %s [%s]\n" path kind 63 + ) entries 64 + | `V2 c -> 65 + let entries = Zarr_jsont.V2_consolidated.entries c in 66 + Printf.printf "\n── consolidated metadata (%d entries) ──\n" 67 + (List.length entries); 68 + List.iter (fun { Zarr_jsont.V2_consolidated.path; node; attrs } -> 69 + let kind = match Zarr_jsont.V2_node.kind node with 70 + | `Array _ -> "array" | `Group -> "group" 71 + in 72 + let has_attrs = match attrs with Some _ -> " +attrs" | None -> "" in 73 + Printf.printf " %s [%s%s]\n" 74 + (if path = "" then "(root)" else path) kind has_attrs 75 + ) entries 76 + 77 + let pretty_print_result { Zarr_jsont.node; attrs; consolidated } = 54 78 (match node with 55 79 | `V2 n -> 56 80 (match Zarr_jsont.V2_node.kind n with ··· 65 89 encode_and_print Zarr_jsont.v3_jsont n "v3 zarr.json (array)" 66 90 | `Group -> 67 91 encode_and_print Zarr_jsont.v3_jsont n "v3 zarr.json (group)")); 68 - match attrs with 69 - | Some a -> 70 - (match Jsont.Json.encode Zarr_jsont.attrs_jsont a with 71 - | Ok json -> 72 - Printf.printf "\n── v2 .zattrs ──\n"; 73 - Format.printf "%a@." pp_json json 74 - | Error e -> 75 - Printf.eprintf "Error encoding .zattrs: %s\n" e) 92 + (match attrs with 93 + | Some a -> 94 + (match Jsont.Json.encode Zarr_jsont.attrs_jsont a with 95 + | Ok json -> 96 + Printf.printf "\n── v2 .zattrs ──\n"; 97 + Format.printf "%a@." pp_json json 98 + | Error e -> 99 + Printf.eprintf "Error encoding .zattrs: %s\n" e) 100 + | None -> ()); 101 + match consolidated with 102 + | Some c -> pretty_print_consolidated c 76 103 | None -> () 77 104 78 105 (* CLI *)
+194 -6
src/zarr_jsont.ml
··· 1360 1360 | Ok j -> j | Error e -> invalid_arg e)) 1361 1361 Jsont.json 1362 1362 1363 + (* Consolidated metadata — V3 *) 1364 + 1365 + module Consolidated = struct 1366 + type t = { 1367 + metadata : (string * V3_node.t) list; 1368 + kind : string; 1369 + } 1370 + let metadata t = t.metadata 1371 + let kind t = t.kind 1372 + 1373 + let jsont = 1374 + (* consolidated_metadata is an object with "metadata", "kind", "must_understand" *) 1375 + Jsont.map ~kind:"Consolidated" 1376 + ~dec:(fun json -> 1377 + let mems = match json with 1378 + | Jsont.Object (mems, _) -> mems | _ -> invalid_arg "expected object" 1379 + in 1380 + let find k = List.find_map (fun ((n, _), v) -> 1381 + if n = k then Some v else None) mems 1382 + in 1383 + let kind = match find "kind" with 1384 + | Some (Jsont.String (s, _)) -> s | _ -> "inline" 1385 + in 1386 + let metadata = match find "metadata" with 1387 + | Some (Jsont.Object (entries, _)) -> 1388 + List.filter_map (fun ((path, _), node_json) -> 1389 + match Jsont.Json.decode v3_jsont node_json with 1390 + | Ok node -> Some (path, node) 1391 + | Error _ -> None) entries 1392 + | _ -> [] 1393 + in 1394 + { metadata; kind }) 1395 + ~enc:(fun t -> 1396 + let entries = List.map (fun (path, node) -> 1397 + let node_json = match Jsont.Json.encode v3_jsont node with 1398 + | Ok j -> j | Error e -> invalid_arg e 1399 + in 1400 + ((path, Jsont.Meta.none), node_json)) t.metadata 1401 + in 1402 + Jsont.Json.object' [ 1403 + (("metadata", Jsont.Meta.none), Jsont.Object (entries, Jsont.Meta.none)); 1404 + (("kind", Jsont.Meta.none), Jsont.Json.string t.kind); 1405 + (("must_understand", Jsont.Meta.none), Jsont.Json.bool false); 1406 + ]) 1407 + Jsont.json 1408 + end 1409 + 1410 + (* Consolidated metadata — V2 (.zmetadata) *) 1411 + 1412 + module V2_consolidated = struct 1413 + type entry = { 1414 + path : string; 1415 + node : V2_node.t; 1416 + attrs : Attrs.t option; 1417 + } 1418 + 1419 + type t = { 1420 + entries : entry list; 1421 + format_version : int; 1422 + } 1423 + let entries t = t.entries 1424 + let format_version t = t.format_version 1425 + 1426 + let jsont = 1427 + (* .zmetadata: {"metadata": {"path/.zarray": {...}, ...}, "zarr_consolidated_format": 1} *) 1428 + Jsont.map ~kind:"V2_consolidated" 1429 + ~dec:(fun json -> 1430 + let mems = match json with 1431 + | Jsont.Object (mems, _) -> mems | _ -> invalid_arg "expected object" 1432 + in 1433 + let find k = List.find_map (fun ((n, _), v) -> 1434 + if n = k then Some v else None) mems 1435 + in 1436 + let format_version = match find "zarr_consolidated_format" with 1437 + | Some (Jsont.Number (f, _)) -> int_of_float f | _ -> 1 1438 + in 1439 + let metadata = match find "metadata" with 1440 + | Some (Jsont.Object (entries, _)) -> entries 1441 + | _ -> [] 1442 + in 1443 + (* Group entries by path prefix: collect .zarray/.zgroup and .zattrs *) 1444 + let tbl = Hashtbl.create 16 in 1445 + List.iter (fun ((key, _), value) -> 1446 + (* key is like "array1/.zarray" or "array1/.zattrs" or ".zgroup" *) 1447 + let parts = match String.rindex_opt key '/' with 1448 + | Some i -> (String.sub key 0 i, String.sub key (i + 1) (String.length key - i - 1)) 1449 + | None -> ("", key) 1450 + in 1451 + let (prefix, basename) = parts in 1452 + let (node_json, attrs_json) = 1453 + match Hashtbl.find_opt tbl prefix with 1454 + | Some v -> v | None -> (None, None) 1455 + in 1456 + let v = match basename with 1457 + | ".zarray" | ".zgroup" -> (Some value, attrs_json) 1458 + | ".zattrs" -> (node_json, Some value) 1459 + | _ -> (node_json, attrs_json) 1460 + in 1461 + Hashtbl.replace tbl prefix v 1462 + ) metadata; 1463 + let entries = Hashtbl.fold (fun path (node_json, attrs_json) acc -> 1464 + let node = match node_json with 1465 + | Some nj -> 1466 + (match Jsont.Json.decode v2_array_jsont nj with 1467 + | Ok n -> Some n 1468 + | Error _ -> 1469 + match Jsont.Json.decode v2_group_jsont nj with 1470 + | Ok n -> Some n | Error _ -> None) 1471 + | None -> None 1472 + in 1473 + let attrs = match attrs_json with 1474 + | Some aj -> 1475 + (match Jsont.Json.decode attrs_jsont aj with 1476 + | Ok a -> Some a | Error _ -> None) 1477 + | None -> None 1478 + in 1479 + match node with 1480 + | Some n -> { path; node = n; attrs } :: acc 1481 + | None -> acc 1482 + ) tbl [] in 1483 + let entries = List.sort (fun a b -> String.compare a.path b.path) entries in 1484 + { entries; format_version }) 1485 + ~enc:(fun t -> 1486 + let metadata_entries = List.concat_map (fun entry -> 1487 + let prefix = if entry.path = "" then "" else entry.path ^ "/" in 1488 + let node_entries = match V2_node.kind entry.node with 1489 + | `Array a -> 1490 + (match Jsont.Json.encode V2.array_meta_jsont a with 1491 + | Ok j -> [((prefix ^ ".zarray", Jsont.Meta.none), j)] 1492 + | Error _ -> []) 1493 + | `Group -> 1494 + [((prefix ^ ".zgroup", Jsont.Meta.none), 1495 + Jsont.Json.object' [(("zarr_format", Jsont.Meta.none), Jsont.Json.number 2.0)])] 1496 + in 1497 + let attrs_entries = match entry.attrs with 1498 + | Some a -> 1499 + (match Jsont.Json.encode attrs_jsont a with 1500 + | Ok j -> [((prefix ^ ".zattrs", Jsont.Meta.none), j)] 1501 + | Error _ -> []) 1502 + | None -> [] 1503 + in 1504 + node_entries @ attrs_entries 1505 + ) t.entries in 1506 + Jsont.Json.object' [ 1507 + (("metadata", Jsont.Meta.none), Jsont.Object (metadata_entries, Jsont.Meta.none)); 1508 + (("zarr_consolidated_format", Jsont.Meta.none), Jsont.Json.number (float_of_int t.format_version)); 1509 + ]) 1510 + Jsont.json 1511 + end 1512 + 1363 1513 (* Store probing *) 1364 1514 1365 1515 type probe_result = { 1366 1516 node : t; 1367 1517 attrs : Attrs.t option; 1518 + consolidated : [ `V3 of Consolidated.t | `V2 of V2_consolidated.t ] option; 1368 1519 } 1369 1520 1370 1521 let decode_string codec s = 1371 1522 Jsont_bytesrw.decode_string codec s 1372 1523 1524 + let try_v3_consolidated contents = 1525 + (* Check if the raw JSON has a consolidated_metadata field *) 1526 + match Jsont_bytesrw.decode_string Jsont.json contents with 1527 + | Ok (Jsont.Object (mems, _)) -> 1528 + List.find_map (fun ((k, _), v) -> 1529 + if k = "consolidated_metadata" then 1530 + match Jsont.Json.decode Consolidated.jsont v with 1531 + | Ok c -> Some c | Error _ -> None 1532 + else None) mems 1533 + | _ -> None 1534 + 1373 1535 let probe ~read path = 1374 1536 let read_rel = read in 1375 1537 (* Try v3 first *) 1376 1538 match read_rel "zarr.json" with 1377 1539 | Ok contents -> 1378 1540 (match decode_string v3_jsont contents with 1379 - | Ok node -> Ok { node = `V3 node; attrs = None } 1541 + | Ok node -> 1542 + let consolidated = Option.map (fun c -> `V3 c) 1543 + (try_v3_consolidated contents) in 1544 + Ok { node = `V3 node; attrs = None; consolidated } 1380 1545 | Error e -> Error (Printf.sprintf "zarr.json: %s" e)) 1381 1546 | Error _ -> 1382 1547 (* Try v2 .zarray *) ··· 1390 1555 | Ok a -> Some a | Error _ -> None) 1391 1556 | Error _ -> None 1392 1557 in 1393 - Ok { node = `V2 node; attrs } 1558 + Ok { node = `V2 node; attrs; consolidated = None } 1394 1559 | Error e -> Error (Printf.sprintf ".zarray: %s" e)) 1395 1560 | Error _ -> 1396 1561 (* Try v2 .zgroup *) ··· 1404 1569 | Ok a -> Some a | Error _ -> None) 1405 1570 | Error _ -> None 1406 1571 in 1407 - Ok { node = `V2 node; attrs } 1572 + (* Try .zmetadata for v2 consolidated *) 1573 + let consolidated = match read_rel ".zmetadata" with 1574 + | Ok meta_contents -> 1575 + (match decode_string V2_consolidated.jsont meta_contents with 1576 + | Ok c -> Some (`V2 c) | Error _ -> None) 1577 + | Error _ -> None 1578 + in 1579 + Ok { node = `V2 node; attrs; consolidated } 1408 1580 | Error e -> Error (Printf.sprintf ".zgroup: %s" e)) 1409 1581 | Error _ -> 1410 - Error (Printf.sprintf 1411 - "%s: no zarr metadata found (tried zarr.json, .zarray, .zgroup)" 1412 - path) 1582 + (* Last resort: try .zmetadata directly (no .zgroup present) *) 1583 + match read_rel ".zmetadata" with 1584 + | Ok meta_contents -> 1585 + (match decode_string V2_consolidated.jsont meta_contents with 1586 + | Ok c -> 1587 + let node = V2_node.{ kind = `Group; attrs = Attrs.empty; 1588 + unknown = Jsont.Json.object' [] } in 1589 + let attrs = match read_rel ".zattrs" with 1590 + | Ok a -> 1591 + (match decode_string attrs_jsont a with 1592 + | Ok a -> Some a | Error _ -> None) 1593 + | Error _ -> None 1594 + in 1595 + Ok { node = `V2 node; attrs; consolidated = Some (`V2 c) } 1596 + | Error e -> Error (Printf.sprintf ".zmetadata: %s" e)) 1597 + | Error _ -> 1598 + Error (Printf.sprintf 1599 + "%s: no zarr metadata found (tried zarr.json, .zarray, .zgroup, .zmetadata)" 1600 + path)
+40 -2
src/zarr_jsont.mli
··· 423 423 {!v3_jsont} as appropriate. V2 arrays are distinguished from v2 groups 424 424 by the presence of a ["shape"] field. *) 425 425 426 + (** {1 Consolidated metadata} *) 427 + 428 + (** V3 consolidated metadata (inline in a group's [zarr.json]). 429 + 430 + See {{:https://github.com/zarr-developers/zarr-specs/pull/309}zarr-specs PR 309}. 431 + The [consolidated_metadata] field maps relative node paths to their 432 + full metadata objects. *) 433 + module Consolidated : sig 434 + type t 435 + val metadata : t -> (string * V3_node.t) list 436 + (** Mapping from relative node path to decoded node metadata. *) 437 + 438 + val kind : t -> string 439 + (** Currently always ["inline"]. *) 440 + 441 + val jsont : t Jsont.t 442 + end 443 + 444 + (** V2 consolidated metadata ([.zmetadata] file). 445 + 446 + Maps flat keys like ["array1/.zarray"] and ["array1/.zattrs"] to their 447 + decoded JSON objects. *) 448 + module V2_consolidated : sig 449 + type entry = { 450 + path : string; 451 + node : V2_node.t; 452 + attrs : Attrs.t option; 453 + } 454 + 455 + type t 456 + val entries : t -> entry list 457 + val format_version : t -> int 458 + 459 + val jsont : t Jsont.t 460 + end 461 + 426 462 (** {1 Store probing} *) 427 463 428 464 type probe_result = { ··· 430 466 attrs : Attrs.t option; 431 467 (** For v2, the separately-fetched [.zattrs] if present. For v3, [None] 432 468 (attributes are inline in [zarr.json]). *) 469 + consolidated : [ `V3 of Consolidated.t | `V2 of V2_consolidated.t ] option; 470 + (** Consolidated metadata if present. *) 433 471 } 434 472 (** The result of probing a zarr store path. *) 435 473 ··· 444 482 [Error msg] if the file does not exist or cannot be read. 445 483 446 484 The probing order is: 447 - + [zarr.json] (v3) 485 + + [zarr.json] (v3), checking for [consolidated_metadata] field 448 486 + [.zarray] (v2 array), with optional [.zattrs] 449 - + [.zgroup] (v2 group), with optional [.zattrs] 487 + + [.zgroup] (v2 group), with optional [.zattrs] and [.zmetadata] 450 488 451 489 Returns [Error] if none of the expected files can be read. *)