OCaml Zarr jsont codecs for v2/v3 and common conventions
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

feat: recursive children from consolidated metadata

probe now populates children by building a tree from consolidated
metadata entries. V3 and V2 consolidated are both supported with
recursive grouping by path components.

zarr-inspect displays the tree with types and shapes, e.g.:
utm01 [group]
embeddings [array int8 9x128x1466368x69632]
scales [array float32 9x1466368x69632]

No directory listing needed — children come from consolidated metadata.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

+156 -58
+44 -27
bin/zarr_inspect.ml
··· 54 54 | Error e -> 55 55 Printf.eprintf "Error encoding %s: %s\n" label e 56 56 57 - let pretty_print_consolidated = function 58 - | `V3 c -> 59 - let entries = Zarr_jsont.Consolidated.metadata c in 60 - Printf.printf "\n── consolidated metadata (%d nodes) ──\n" 61 - (List.length entries); 62 - List.iter (fun (path, node) -> 63 - let kind = match Zarr_jsont.V3_node.kind node with 64 - | `Array _ -> "array" | `Group -> "group" 65 - in 66 - Printf.printf " %s [%s]\n" path kind 67 - ) entries 68 - | `V2 c -> 69 - let entries = Zarr_jsont.V2_consolidated.entries c in 70 - Printf.printf "\n── consolidated metadata (%d entries) ──\n" 71 - (List.length entries); 72 - List.iter (fun { Zarr_jsont.V2_consolidated.path; node; attrs } -> 73 - let kind = match Zarr_jsont.V2_node.kind node with 74 - | `Array _ -> "array" | `Group -> "group" 75 - in 76 - let has_attrs = match attrs with Some _ -> " +attrs" | None -> "" in 77 - Printf.printf " %s [%s%s]\n" 78 - (if path = "" then "(root)" else path) kind has_attrs 79 - ) entries 57 + let rec print_tree indent children = 58 + List.iter (fun (name, (result : Zarr_jsont.probe_result)) -> 59 + let kind = match result.node with 60 + | `V2 n -> 61 + (match Zarr_jsont.V2_node.kind n with 62 + | `Array a -> 63 + let shape = Zarr_jsont.V2.Array_meta.shape a in 64 + Printf.sprintf "array %s %s" 65 + (match Zarr_jsont.V2.Array_meta.dtype a with 66 + | `Float (_, n) -> Printf.sprintf "float%d" (n*8) 67 + | `Int (_, n) -> Printf.sprintf "int%d" (n*8) 68 + | `Uint (_, n) -> Printf.sprintf "uint%d" (n*8) 69 + | `Bool -> "bool" 70 + | _ -> "?") 71 + (String.concat "x" (List.map string_of_int shape)) 72 + | `Group -> "group") 73 + | `V3 n -> 74 + (match Zarr_jsont.V3_node.kind n with 75 + | `Array a -> 76 + let shape = Zarr_jsont.V3.Array_meta.shape a in 77 + let dt = match Zarr_jsont.V3.Array_meta.data_type a with 78 + | `Bool -> "bool" | `Int8 -> "int8" | `Int16 -> "int16" 79 + | `Int32 -> "int32" | `Int64 -> "int64" 80 + | `Uint8 -> "uint8" | `Uint16 -> "uint16" 81 + | `Uint32 -> "uint32" | `Uint64 -> "uint64" 82 + | `Float16 -> "float16" | `Float32 -> "float32" 83 + | `Float64 -> "float64" 84 + | `Complex64 -> "complex64" | `Complex128 -> "complex128" 85 + | `Raw n -> Printf.sprintf "r%d" n 86 + | `Other o -> Zarr_jsont.Other_ext.name o 87 + in 88 + Printf.sprintf "array %s %s" dt 89 + (String.concat "x" (List.map string_of_int shape)) 90 + | `Group -> "group") 91 + in 92 + Printf.printf "%s%s [%s]\n" indent name kind; 93 + if result.children <> [] then 94 + print_tree (indent ^ " ") result.children 95 + ) children 80 96 81 - let pretty_print_result { Zarr_jsont.node; attrs; consolidated } = 97 + let pretty_print_result { Zarr_jsont.node; attrs; children; _ } = 82 98 (match node with 83 99 | `V2 n -> 84 100 (match Zarr_jsont.V2_node.kind n with ··· 102 118 | Error e -> 103 119 Printf.eprintf "Error encoding .zattrs: %s\n" e) 104 120 | None -> ()); 105 - match consolidated with 106 - | Some c -> pretty_print_consolidated c 107 - | None -> () 121 + if children <> [] then begin 122 + Printf.printf "\n── children ──\n"; 123 + print_tree " " children 124 + end 108 125 109 126 (* CLI *) 110 127
+104 -31
src/zarr_jsont.ml
··· 1531 1531 node : t; 1532 1532 attrs : Attrs.t option; 1533 1533 consolidated : [ `V3 of Consolidated.t | `V2 of V2_consolidated.t ] option; 1534 + children : (string * probe_result) list; 1534 1535 } 1535 1536 1536 1537 let decode_string codec s = 1537 1538 Jsont_bytesrw.decode_string codec s 1538 1539 1539 1540 let try_v3_consolidated contents = 1540 - (* Check if the raw JSON has a consolidated_metadata field *) 1541 1541 match Jsont_bytesrw.decode_string Jsont.json contents with 1542 1542 | Ok (Jsont.Object (mems, _)) -> 1543 1543 List.find_map (fun ((k, _), v) -> ··· 1547 1547 else None) mems 1548 1548 | _ -> None 1549 1549 1550 + (* Build children tree from consolidated metadata *) 1551 + let rec children_of_v3_consolidated (c : Consolidated.t) = 1552 + let immediate = Hashtbl.create 16 in 1553 + List.iter (fun (path, node) -> 1554 + let top, rest = match String.index_opt path '/' with 1555 + | Some i -> 1556 + String.sub path 0 i, 1557 + Some (String.sub path (i + 1) (String.length path - i - 1), node) 1558 + | None -> path, None 1559 + in 1560 + let (direct, nested) = match Hashtbl.find_opt immediate top with 1561 + | Some v -> v | None -> (None, []) 1562 + in 1563 + match rest with 1564 + | None -> Hashtbl.replace immediate top (Some node, nested) 1565 + | Some sub -> Hashtbl.replace immediate top (direct, sub :: nested) 1566 + ) (Consolidated.metadata c); 1567 + Hashtbl.fold (fun name (direct, nested) acc -> 1568 + let node = match direct with 1569 + | Some n -> n 1570 + | None -> 1571 + V3_node.{ kind = `Group; attrs = Attrs.empty; 1572 + unknown = Jsont.Json.object' [] } 1573 + in 1574 + let children = match V3_node.kind node, nested with 1575 + | `Group, _ :: _ -> 1576 + let sub_c = { Consolidated.metadata = List.rev nested; 1577 + kind = c.kind } in 1578 + children_of_v3_consolidated sub_c 1579 + | _ -> [] 1580 + in 1581 + (name, { node = `V3 node; attrs = None; 1582 + consolidated = None; children }) :: acc 1583 + ) immediate [] 1584 + |> List.sort (fun (a, _) (b, _) -> String.compare a b) 1585 + 1586 + and children_of_v2_consolidated (c : V2_consolidated.t) = 1587 + (* Build tree: group entries by top-level path component *) 1588 + let immediate = Hashtbl.create 16 in 1589 + List.iter (fun ({ V2_consolidated.path; _ } as entry) -> 1590 + if path <> "" then begin 1591 + let top, is_direct = match String.index_opt path '/' with 1592 + | Some i -> String.sub path 0 i, false 1593 + | None -> path, true 1594 + in 1595 + let (direct, nested) = match Hashtbl.find_opt immediate top with 1596 + | Some v -> v | None -> (None, []) 1597 + in 1598 + if is_direct then 1599 + Hashtbl.replace immediate top (Some entry, nested) 1600 + else 1601 + let rest_path = match String.index_opt path '/' with 1602 + | Some i -> String.sub path (i + 1) (String.length path - i - 1) 1603 + | None -> path 1604 + in 1605 + Hashtbl.replace immediate top 1606 + (direct, { entry with path = rest_path } :: nested) 1607 + end 1608 + ) (V2_consolidated.entries c); 1609 + Hashtbl.fold (fun name (direct, nested) acc -> 1610 + let node, attrs = match direct with 1611 + | Some e -> e.V2_consolidated.node, e.V2_consolidated.attrs 1612 + | None -> 1613 + V2_node.{ kind = `Group; attrs = Attrs.empty; 1614 + unknown = Jsont.Json.object' [] }, None 1615 + in 1616 + let children = match nested with 1617 + | [] -> [] 1618 + | _ -> 1619 + let sub_c = { V2_consolidated.entries = List.rev nested; 1620 + format_version = V2_consolidated.format_version c } in 1621 + children_of_v2_consolidated sub_c 1622 + in 1623 + (name, { node = `V2 node; attrs; 1624 + consolidated = None; children }) :: acc 1625 + ) immediate [] 1626 + |> List.sort (fun (a, _) (b, _) -> String.compare a b) 1627 + 1628 + let read_v2_attrs read_rel = 1629 + match read_rel ".zattrs" with 1630 + | Ok a -> (match decode_string attrs_jsont a with Ok a -> Some a | Error _ -> None) 1631 + | Error _ -> None 1632 + 1550 1633 let probe ~read path = 1551 1634 let read_rel = read in 1552 1635 (* Try v3 first *) ··· 1556 1639 | Ok node -> 1557 1640 let consolidated = Option.map (fun c -> `V3 c) 1558 1641 (try_v3_consolidated contents) in 1559 - Ok { node = `V3 node; attrs = None; consolidated } 1642 + let children = match consolidated with 1643 + | Some (`V3 c) -> children_of_v3_consolidated c 1644 + | _ -> [] 1645 + in 1646 + Ok { node = `V3 node; attrs = None; consolidated; children } 1560 1647 | Error e -> Error (Printf.sprintf "zarr.json: %s" e)) 1561 1648 | Error _ -> 1562 - (* Try v2 .zarray *) 1563 1649 match read_rel ".zarray" with 1564 1650 | Ok contents -> 1565 1651 (match decode_string v2_array_jsont contents with 1566 1652 | Ok node -> 1567 - let attrs = match read_rel ".zattrs" with 1568 - | Ok a -> 1569 - (match decode_string attrs_jsont a with 1570 - | Ok a -> Some a | Error _ -> None) 1571 - | Error _ -> None 1572 - in 1573 - Ok { node = `V2 node; attrs; consolidated = None } 1653 + let attrs = read_v2_attrs read_rel in 1654 + Ok { node = `V2 node; attrs; consolidated = None; children = [] } 1574 1655 | Error e -> Error (Printf.sprintf ".zarray: %s" e)) 1575 1656 | Error _ -> 1576 - (* Try v2 .zgroup *) 1577 1657 match read_rel ".zgroup" with 1578 1658 | Ok contents -> 1579 1659 (match decode_string v2_group_jsont contents with 1580 1660 | Ok node -> 1581 - let attrs = match read_rel ".zattrs" with 1582 - | Ok a -> 1583 - (match decode_string attrs_jsont a with 1584 - | Ok a -> Some a | Error _ -> None) 1585 - | Error _ -> None 1586 - in 1587 - (* Try .zmetadata for v2 consolidated *) 1661 + let attrs = read_v2_attrs read_rel in 1588 1662 let consolidated = match read_rel ".zmetadata" with 1589 - | Ok meta_contents -> 1590 - (match decode_string V2_consolidated.jsont meta_contents with 1663 + | Ok mc -> 1664 + (match decode_string V2_consolidated.jsont mc with 1591 1665 | Ok c -> Some (`V2 c) | Error _ -> None) 1592 1666 | Error _ -> None 1593 1667 in 1594 - Ok { node = `V2 node; attrs; consolidated } 1668 + let children = match consolidated with 1669 + | Some (`V2 c) -> children_of_v2_consolidated c 1670 + | _ -> [] 1671 + in 1672 + Ok { node = `V2 node; attrs; consolidated; children } 1595 1673 | Error e -> Error (Printf.sprintf ".zgroup: %s" e)) 1596 1674 | Error _ -> 1597 - (* Last resort: try .zmetadata directly (no .zgroup present) *) 1598 1675 match read_rel ".zmetadata" with 1599 - | Ok meta_contents -> 1600 - (match decode_string V2_consolidated.jsont meta_contents with 1676 + | Ok mc -> 1677 + (match decode_string V2_consolidated.jsont mc with 1601 1678 | Ok c -> 1602 1679 let node = V2_node.{ kind = `Group; attrs = Attrs.empty; 1603 1680 unknown = Jsont.Json.object' [] } in 1604 - let attrs = match read_rel ".zattrs" with 1605 - | Ok a -> 1606 - (match decode_string attrs_jsont a with 1607 - | Ok a -> Some a | Error _ -> None) 1608 - | Error _ -> None 1609 - in 1610 - Ok { node = `V2 node; attrs; consolidated = Some (`V2 c) } 1681 + let attrs = read_v2_attrs read_rel in 1682 + let children = children_of_v2_consolidated c in 1683 + Ok { node = `V2 node; attrs; consolidated = Some (`V2 c); children } 1611 1684 | Error e -> Error (Printf.sprintf ".zmetadata: %s" e)) 1612 1685 | Error _ -> 1613 1686 Error (Printf.sprintf
+8
src/zarr_jsont.mli
··· 473 473 (attributes are inline in [zarr.json]). *) 474 474 consolidated : [ `V3 of Consolidated.t | `V2 of V2_consolidated.t ] option; 475 475 (** Consolidated metadata if present. *) 476 + children : (string * probe_result) list; 477 + (** Recursively probed children. Populated from consolidated metadata 478 + when available, or by listing the directory (if [list] is provided). *) 476 479 } 477 480 (** The result of probing a zarr store path. *) 478 481 ··· 490 493 + [zarr.json] (v3), checking for [consolidated_metadata] field 491 494 + [.zarray] (v2 array), with optional [.zattrs] 492 495 + [.zgroup] (v2 group), with optional [.zattrs] and [.zmetadata] 496 + 497 + When consolidated metadata is present, [children] is populated by 498 + decoding each entry. Each child in consolidated metadata is itself 499 + probed via [read] for any additional metadata not captured in the 500 + consolidated form. 493 501 494 502 Returns [Error] if none of the expected files can be read. *)