OCaml Zarr jsont codecs for v2/v3 and common conventions
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

refactor: move pretty-printing combinators to library

Add pp_dtype, pp_data_type, pp_fill_value, pp_attrs, pp_probe_result
to the library. These use plain string indentation (not Format boxes)
for clean tree output.

The zarr-inspect binary is now a one-liner calling pp_probe_result.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

+154 -171
+3 -171
bin/zarr_inspect.ml
··· 41 41 let full = Filename.concat base_path relpath in 42 42 read_file full 43 43 44 - (* Pretty-print a decoded zarr node *) 45 - 46 - let pp_json ppf json = 47 - Jsont.pp_json ppf json 48 - 49 - let encode_and_print codec v label = 50 - match Jsont.Json.encode codec v with 51 - | Ok json -> 52 - Printf.printf "── %s ──\n" label; 53 - Format.printf "%a@." pp_json json 54 - | Error e -> 55 - Printf.eprintf "Error encoding %s: %s\n" label e 56 - 57 - let pp_v2_dtype = function 58 - | `Float (_, n) -> Printf.sprintf "float%d" (n*8) 59 - | `Int (_, n) -> Printf.sprintf "int%d" (n*8) 60 - | `Uint (_, n) -> Printf.sprintf "uint%d" (n*8) 61 - | `Bool -> "bool" 62 - | `Complex (_, n) -> Printf.sprintf "complex%d" (n*8) 63 - | `Datetime (_, u) -> Printf.sprintf "datetime[%s]" u 64 - | `Timedelta (_, u) -> Printf.sprintf "timedelta[%s]" u 65 - | `String n -> Printf.sprintf "S%d" n 66 - | `Unicode (_, n) -> Printf.sprintf "U%d" n 67 - | `Raw n -> Printf.sprintf "V%d" n 68 - | `Structured _ -> "structured" 69 - 70 - let pp_v3_dtype = function 71 - | `Bool -> "bool" | `Int8 -> "int8" | `Int16 -> "int16" 72 - | `Int32 -> "int32" | `Int64 -> "int64" 73 - | `Uint8 -> "uint8" | `Uint16 -> "uint16" 74 - | `Uint32 -> "uint32" | `Uint64 -> "uint64" 75 - | `Float16 -> "float16" | `Float32 -> "float32" 76 - | `Float64 -> "float64" 77 - | `Complex64 -> "complex64" | `Complex128 -> "complex128" 78 - | `Raw n -> Printf.sprintf "r%d" n 79 - | `Other o -> Zarr_jsont.Other_ext.name o 80 - 81 - let pp_shape shape = String.concat "x" (List.map string_of_int shape) 82 - 83 - let pp_floats fs = String.concat "," (List.map (Printf.sprintf "%.4g") fs) 84 - 85 - let print_attrs indent (attrs : Zarr_jsont.Attrs.t) = 86 - (* proj *) 87 - (match Zarr_jsont.Attrs.proj attrs with 88 - | Some p -> 89 - let parts = List.filter_map Fun.id [ 90 - Option.map (Printf.sprintf "code=%s") (Zarr_jsont.Conv.Proj.code p); 91 - Option.map (fun _ -> "wkt2=...") (Zarr_jsont.Conv.Proj.wkt2 p); 92 - Option.map (fun _ -> "projjson=...") (Zarr_jsont.Conv.Proj.projjson p); 93 - ] in 94 - if parts <> [] then 95 - Printf.printf "%sproj: %s\n" indent (String.concat " " parts) 96 - | None -> ()); 97 - (* spatial *) 98 - (match Zarr_jsont.Attrs.spatial attrs with 99 - | Some s -> 100 - let dims = String.concat "," (Zarr_jsont.Conv.Spatial.dimensions s) in 101 - Printf.printf "%sspatial: dims=[%s]" indent dims; 102 - (match Zarr_jsont.Conv.Spatial.bbox s with 103 - | Some bbox -> Printf.printf " bbox=[%s]" (pp_floats bbox) 104 - | None -> ()); 105 - (match Zarr_jsont.Conv.Spatial.transform s with 106 - | Some t -> Printf.printf " transform=[%s]" (pp_floats t) 107 - | None -> ()); 108 - (match Zarr_jsont.Conv.Spatial.shape s with 109 - | Some sh -> Printf.printf " shape=[%s]" (pp_shape sh) 110 - | None -> ()); 111 - (match Zarr_jsont.Conv.Spatial.registration s with 112 - | Some `Pixel -> Printf.printf " reg=pixel" 113 - | Some `Node -> Printf.printf " reg=node" 114 - | None -> ()); 115 - Printf.printf "\n" 116 - | None -> ()); 117 - (* multiscales *) 118 - (match Zarr_jsont.Attrs.multiscales attrs with 119 - | Some m -> 120 - let layout = Zarr_jsont.Conv.Multiscales.layout m in 121 - Printf.printf "%smultiscales: %d levels" indent (List.length layout); 122 - (match Zarr_jsont.Conv.Multiscales.resampling_method m with 123 - | Some r -> Printf.printf " resampling=%s" r | None -> ()); 124 - Printf.printf "\n"; 125 - List.iter (fun item -> 126 - let asset = Zarr_jsont.Conv.Multiscales.Layout_item.asset item in 127 - Printf.printf "%s %s" indent asset; 128 - (match Zarr_jsont.Conv.Multiscales.Layout_item.derived_from item with 129 - | Some d -> Printf.printf " <- %s" d | None -> ()); 130 - (match Zarr_jsont.Conv.Multiscales.Layout_item.transform item with 131 - | Some t -> 132 - (match Zarr_jsont.Conv.Multiscales.Transform.scale t with 133 - | Some s -> Printf.printf " scale=[%s]" (pp_floats s) | None -> ()) 134 - | None -> ()); 135 - Printf.printf "\n" 136 - ) layout 137 - | None -> ()); 138 - (* unknown keys *) 139 - (match Zarr_jsont.Attrs.unknown attrs with 140 - | Jsont.Object (mems, _) when mems <> [] -> 141 - List.iter (fun ((k, _), v) -> 142 - let vs = match Jsont_bytesrw.encode_string ~format:Jsont.Minify Jsont.json v with 143 - | Ok s -> 144 - if String.length s > 60 then String.sub s 0 57 ^ "..." 145 - else s 146 - | Error _ -> "?" 147 - in 148 - Printf.printf "%s%s: %s\n" indent k vs 149 - ) mems 150 - | _ -> ()) 151 - 152 - let node_attrs = function 153 - | `V2 n -> Zarr_jsont.V2_node.attrs n 154 - | `V3 n -> Zarr_jsont.V3_node.attrs n 44 + (* Pretty-print *) 155 45 156 - let rec print_tree indent children = 157 - List.iter (fun (name, (result : Zarr_jsont.probe_result)) -> 158 - let kind = match result.node with 159 - | `V2 n -> 160 - (match Zarr_jsont.V2_node.kind n with 161 - | `Array a -> 162 - Printf.sprintf "array %s %s" 163 - (pp_v2_dtype (Zarr_jsont.V2.Array_meta.dtype a)) 164 - (pp_shape (Zarr_jsont.V2.Array_meta.shape a)) 165 - | `Group -> "group") 166 - | `V3 n -> 167 - (match Zarr_jsont.V3_node.kind n with 168 - | `Array a -> 169 - Printf.sprintf "array %s %s" 170 - (pp_v3_dtype (Zarr_jsont.V3.Array_meta.data_type a)) 171 - (pp_shape (Zarr_jsont.V3.Array_meta.shape a)) 172 - | `Group -> "group") 173 - in 174 - Printf.printf "%s%s [%s]\n" indent name kind; 175 - let attrs = node_attrs result.node in 176 - let sub_indent = indent ^ " " in 177 - (* Also check probe_result.attrs for v2 separate .zattrs *) 178 - let attrs = match result.attrs with 179 - | Some a -> a | None -> attrs 180 - in 181 - print_attrs sub_indent attrs; 182 - if result.children <> [] then 183 - print_tree sub_indent result.children 184 - ) children 185 - 186 - let pretty_print_result { Zarr_jsont.node; attrs; children; _ } = 187 - let root_kind = match node with 188 - | `V2 n -> 189 - (match Zarr_jsont.V2_node.kind n with 190 - | `Array a -> 191 - encode_and_print Zarr_jsont.V2.array_meta_jsont a "v2 .zarray"; 192 - "v2 array" 193 - | `Group -> 194 - Printf.printf "── v2 .zgroup ──\n"; 195 - "v2 group") 196 - | `V3 n -> 197 - (match Zarr_jsont.V3_node.kind n with 198 - | `Array _ -> 199 - encode_and_print Zarr_jsont.v3_jsont n "v3 zarr.json (array)"; 200 - "v3 array" 201 - | `Group -> 202 - Printf.printf "── v3 group ──\n"; 203 - "v3 group") 204 - in 205 - ignore root_kind; 206 - (* Print root attrs *) 207 - let root_attrs = match attrs with 208 - | Some a -> a 209 - | None -> node_attrs node 210 - in 211 - print_attrs " " root_attrs; 212 - if children <> [] then begin 213 - Printf.printf "\n── children ──\n"; 214 - print_tree " " children 215 - end 46 + let pretty_print_result result = 47 + Format.printf "%a@." Zarr_jsont.pp_probe_result result 216 48 217 49 (* CLI *) 218 50
+133
src/zarr_jsont.ml
··· 1686 1686 Error (Printf.sprintf 1687 1687 "%s: no zarr metadata found (tried zarr.json, .zarray, .zgroup, .zmetadata)" 1688 1688 path) 1689 + 1690 + (* Pretty-printing *) 1691 + 1692 + let pf = Format.fprintf 1693 + 1694 + let pp_dtype ppf = function 1695 + | `Bool -> pf ppf "bool" 1696 + | `Int (_, n) -> pf ppf "int%d" (n * 8) 1697 + | `Uint (_, n) -> pf ppf "uint%d" (n * 8) 1698 + | `Float (_, n) -> pf ppf "float%d" (n * 8) 1699 + | `Complex (_, n) -> pf ppf "complex%d" (n * 8) 1700 + | `Datetime (_, u) -> pf ppf "datetime[%s]" u 1701 + | `Timedelta (_, u) -> pf ppf "timedelta[%s]" u 1702 + | `String n -> pf ppf "S%d" n 1703 + | `Unicode (_, n) -> pf ppf "U%d" n 1704 + | `Raw n -> pf ppf "V%d" n 1705 + | `Structured _ -> pf ppf "structured" 1706 + 1707 + let pp_data_type ppf = function 1708 + | `Bool -> pf ppf "bool" | `Int8 -> pf ppf "int8" 1709 + | `Int16 -> pf ppf "int16" | `Int32 -> pf ppf "int32" 1710 + | `Int64 -> pf ppf "int64" | `Uint8 -> pf ppf "uint8" 1711 + | `Uint16 -> pf ppf "uint16" | `Uint32 -> pf ppf "uint32" 1712 + | `Uint64 -> pf ppf "uint64" | `Float16 -> pf ppf "float16" 1713 + | `Float32 -> pf ppf "float32" | `Float64 -> pf ppf "float64" 1714 + | `Complex64 -> pf ppf "complex64" | `Complex128 -> pf ppf "complex128" 1715 + | `Raw n -> pf ppf "r%d" n 1716 + | `Other o -> pf ppf "%s" (Other_ext.name o) 1717 + 1718 + let pp_fill_value ppf = function 1719 + | `Null -> pf ppf "null" 1720 + | `Bool b -> pf ppf "%b" b 1721 + | `Int i -> pf ppf "%Ld" i 1722 + | `Float f -> pf ppf "%g" f 1723 + | `Complex (r, i) -> pf ppf "%g+%gi" r i 1724 + | `Bytes s -> pf ppf "<%d bytes>" (String.length s) 1725 + 1726 + let pp_shape ppf shape = 1727 + Format.pp_print_list ~pp_sep:(fun ppf () -> pf ppf "x") 1728 + Format.pp_print_int ppf shape 1729 + 1730 + let pp_floats ppf fs = 1731 + Format.pp_print_list ~pp_sep:(fun ppf () -> pf ppf ",") 1732 + (fun ppf f -> pf ppf "%.4g" f) ppf fs 1733 + 1734 + let pp_opt pp ppf = function 1735 + | Some v -> pp ppf v 1736 + | None -> () 1737 + 1738 + let pp_labelled label pp ppf v = 1739 + pf ppf " %s=[%a]" label pp v 1740 + 1741 + let node_attrs = function 1742 + | `V2 n -> V2_node.attrs n 1743 + | `V3 n -> V3_node.attrs n 1744 + 1745 + let pp_node_kind ppf = function 1746 + | `V2 n -> 1747 + (match V2_node.kind n with 1748 + | `Array a -> pf ppf "array %a %a" pp_dtype (V2.Array_meta.dtype a) pp_shape (V2.Array_meta.shape a) 1749 + | `Group -> pf ppf "group") 1750 + | `V3 n -> 1751 + (match V3_node.kind n with 1752 + | `Array a -> pf ppf "array %a %a" pp_data_type (V3.Array_meta.data_type a) pp_shape (V3.Array_meta.shape a) 1753 + | `Group -> pf ppf "group") 1754 + 1755 + let rec pp_tree indent ppf children = 1756 + List.iter (fun (name, (result : probe_result)) -> 1757 + pf ppf "\n%s%s [%a]" indent name pp_node_kind result.node; 1758 + let attrs = match result.attrs with 1759 + | Some a -> a | None -> node_attrs result.node 1760 + in 1761 + pp_indented_attrs (indent ^ " ") ppf attrs; 1762 + if result.children <> [] then 1763 + pp_tree (indent ^ " ") ppf result.children 1764 + ) children 1765 + 1766 + and pp_indented_attrs indent ppf attrs = 1767 + let nl () = pf ppf "\n%s" indent in 1768 + (match Attrs.proj attrs with 1769 + | Some p -> 1770 + nl (); pf ppf "proj:"; 1771 + Option.iter (pf ppf " code=%s") (Conv.Proj.code p); 1772 + Option.iter (fun _ -> pf ppf " wkt2=...") (Conv.Proj.wkt2 p); 1773 + Option.iter (fun _ -> pf ppf " projjson=...") (Conv.Proj.projjson p) 1774 + | None -> ()); 1775 + (match Attrs.spatial attrs with 1776 + | Some s -> 1777 + nl (); pf ppf "spatial: dims=[%a]" 1778 + (Format.pp_print_list ~pp_sep:(fun ppf () -> pf ppf ",") 1779 + Format.pp_print_string) (Conv.Spatial.dimensions s); 1780 + pp_opt (pp_labelled "bbox" pp_floats) ppf (Conv.Spatial.bbox s); 1781 + pp_opt (pp_labelled "transform" pp_floats) ppf (Conv.Spatial.transform s); 1782 + pp_opt (pp_labelled "shape" pp_shape) ppf (Conv.Spatial.shape s); 1783 + (match Conv.Spatial.registration s with 1784 + | Some `Pixel -> pf ppf " reg=pixel" | Some `Node -> pf ppf " reg=node" 1785 + | None -> ()) 1786 + | None -> ()); 1787 + (match Attrs.multiscales attrs with 1788 + | Some m -> 1789 + let layout = Conv.Multiscales.layout m in 1790 + nl (); pf ppf "multiscales: %d levels" (List.length layout); 1791 + Option.iter (pf ppf " resampling=%s") (Conv.Multiscales.resampling_method m); 1792 + List.iter (fun item -> 1793 + pf ppf "\n%s %s" indent (Conv.Multiscales.Layout_item.asset item); 1794 + Option.iter (pf ppf " <- %s") (Conv.Multiscales.Layout_item.derived_from item); 1795 + Option.iter (fun t -> 1796 + pp_opt (pp_labelled "scale" pp_floats) ppf (Conv.Multiscales.Transform.scale t) 1797 + ) (Conv.Multiscales.Layout_item.transform item) 1798 + ) layout 1799 + | None -> ()); 1800 + (match Attrs.unknown attrs with 1801 + | Jsont.Object (mems, _) -> 1802 + List.iter (fun ((k, _), v) -> 1803 + let vs = match Jsont_bytesrw.encode_string ~format:Jsont.Minify Jsont.json v with 1804 + | Ok s when String.length s > 60 -> String.sub s 0 57 ^ "..." 1805 + | Ok s -> s | Error _ -> "?" 1806 + in 1807 + nl (); pf ppf "%s: %s" k vs 1808 + ) mems 1809 + | _ -> ()) 1810 + 1811 + let pp_attrs ppf attrs = pp_indented_attrs " " ppf attrs 1812 + 1813 + let pp_probe_result ppf result = 1814 + pf ppf "[%a]" pp_node_kind result.node; 1815 + let attrs = match result.attrs with 1816 + | Some a -> a | None -> node_attrs result.node 1817 + in 1818 + pp_indented_attrs " " ppf attrs; 1819 + if result.children <> [] then 1820 + pp_tree " " ppf result.children; 1821 + pf ppf "\n"
+18
src/zarr_jsont.mli
··· 500 500 consolidated form. 501 501 502 502 Returns [Error] if none of the expected files can be read. *) 503 + 504 + (** {1 Pretty-printing} *) 505 + 506 + val pp_dtype : Format.formatter -> dtype -> unit 507 + (** Pretty-print a v2 dtype (e.g. [float64], [int32], [structured]). *) 508 + 509 + val pp_data_type : Format.formatter -> V3.Data_type.t -> unit 510 + (** Pretty-print a v3 data type. *) 511 + 512 + val pp_fill_value : Format.formatter -> fill_value -> unit 513 + (** Pretty-print a fill value. *) 514 + 515 + val pp_attrs : Format.formatter -> Attrs.t -> unit 516 + (** Pretty-print decoded attributes: conventions then unknown keys. *) 517 + 518 + val pp_probe_result : Format.formatter -> probe_result -> unit 519 + (** Pretty-print a probe result as a tree with types, shapes, 520 + conventions, and unknown attributes. *)