OCaml Zarr jsont codecs for v2/v3 and common conventions
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

feat: store probing API and zarr-inspect CLI

Add Zarr_jsont.probe ~read which probes a zarr store by trying
zarr.json, .zarray, .zgroup in order. The read function is a
caller-supplied callback for fetching files.

Add zarr-inspect binary that uses curl for URLs and direct file
reads for local directories.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

+192 -8
+5
bin/dune
··· 1 + (executable 2 + (name zarr_inspect) 3 + (public_name zarr-inspect) 4 + (package zarr-jsont) 5 + (libraries zarr_jsont jsont jsont.bytesrw cmdliner unix))
+106
bin/zarr_inspect.ml
··· 1 + open Cmdliner 2 + 3 + (* File reading *) 4 + 5 + let read_file path = 6 + try 7 + let ic = open_in path in 8 + Fun.protect ~finally:(fun () -> close_in ic) (fun () -> 9 + let n = in_channel_length ic in 10 + let buf = Bytes.create n in 11 + really_input ic buf 0 n; 12 + Ok (Bytes.to_string buf)) 13 + with Sys_error msg -> Error msg 14 + 15 + (* HTTP fetching via curl *) 16 + 17 + let read_curl url = 18 + let tmp = Filename.temp_file "zarr_inspect" ".json" in 19 + Fun.protect ~finally:(fun () -> Sys.remove tmp) (fun () -> 20 + let cmd = Printf.sprintf "curl -sf -o '%s' '%s'" tmp url in 21 + match Sys.command cmd with 22 + | 0 -> read_file tmp 23 + | _ -> Error (Printf.sprintf "curl failed for %s" url)) 24 + 25 + (* Dispatch: URL or local path *) 26 + 27 + let is_url s = 28 + let s = String.lowercase_ascii s in 29 + String.length s > 7 && 30 + (String.sub s 0 7 = "http://" || String.sub s 0 8 = "https://") 31 + 32 + let make_reader base_path = 33 + if is_url base_path then 34 + fun relpath -> read_curl (base_path ^ "/" ^ relpath) 35 + else 36 + fun relpath -> 37 + let full = Filename.concat base_path relpath in 38 + read_file full 39 + 40 + (* Pretty-print a decoded zarr node *) 41 + 42 + let pp_json ppf json = 43 + Jsont.pp_json ppf json 44 + 45 + let pretty_print_result { Zarr_jsont.node; attrs } = 46 + let encode_and_print codec v label = 47 + match Jsont.Json.encode codec v with 48 + | Ok json -> 49 + Printf.printf "── %s ──\n" label; 50 + Format.printf "%a@." pp_json json 51 + | Error e -> 52 + Printf.eprintf "Error encoding %s: %s\n" label e 53 + in 54 + (match node with 55 + | `V2 n -> 56 + (match Zarr_jsont.V2_node.kind n with 57 + | `Array a -> 58 + encode_and_print Zarr_jsont.V2.array_meta_jsont a "v2 .zarray" 59 + | `Group -> 60 + Printf.printf "── v2 .zgroup ──\n"; 61 + Format.printf "{@\n \"zarr_format\": 2@\n}@.") 62 + | `V3 n -> 63 + (match Zarr_jsont.V3_node.kind n with 64 + | `Array _ -> 65 + encode_and_print Zarr_jsont.v3_jsont n "v3 zarr.json (array)" 66 + | `Group -> 67 + encode_and_print Zarr_jsont.v3_jsont n "v3 zarr.json (group)")); 68 + match attrs with 69 + | Some a -> 70 + (match Jsont.Json.encode Zarr_jsont.attrs_jsont a with 71 + | Ok json -> 72 + Printf.printf "\n── v2 .zattrs ──\n"; 73 + Format.printf "%a@." pp_json json 74 + | Error e -> 75 + Printf.eprintf "Error encoding .zattrs: %s\n" e) 76 + | None -> () 77 + 78 + (* CLI *) 79 + 80 + let path_arg = 81 + let doc = "Path to a zarr store (local directory or HTTP URL)." in 82 + Arg.(required & pos 0 (some string) None & info [] ~docv:"PATH" ~doc) 83 + 84 + let run path = 85 + let read = make_reader path in 86 + match Zarr_jsont.probe ~read path with 87 + | Ok result -> 88 + pretty_print_result result; 89 + `Ok () 90 + | Error msg -> 91 + `Error (false, msg) 92 + 93 + let cmd = 94 + let doc = "Inspect zarr store metadata" in 95 + let man = [ 96 + `S Manpage.s_description; 97 + `P "Reads zarr v2 or v3 metadata from a local directory or HTTP URL, \ 98 + decodes it using typed codecs, and pretty-prints the re-encoded JSON."; 99 + `S Manpage.s_examples; 100 + `Pre " zarr-inspect ./my-dataset"; 101 + `Pre " zarr-inspect https://example.com/zarr-store"; 102 + ] in 103 + let info = Cmd.info "zarr-inspect" ~version:"0.1.0" ~doc ~man in 104 + Cmd.v info Term.(ret (const run $ path_arg)) 105 + 106 + let () = exit (Cmd.eval cmd)
+2 -1
dune-project
··· 8 8 (depends 9 9 (ocaml (>= 5.1)) 10 10 (jsont (>= 0.2.0)) 11 - (bytesrw (>= 0.1.0)))) 11 + (bytesrw (>= 0.1.0)) 12 + (cmdliner (>= 1.3.0))))
+51
src/zarr_jsont.ml
··· 1359 1359 (match Jsont.Json.encode v3_jsont node with 1360 1360 | Ok j -> j | Error e -> invalid_arg e)) 1361 1361 Jsont.json 1362 + 1363 + (* Store probing *) 1364 + 1365 + type probe_result = { 1366 + node : t; 1367 + attrs : Attrs.t option; 1368 + } 1369 + 1370 + let decode_string codec s = 1371 + Jsont_bytesrw.decode_string codec s 1372 + 1373 + let probe ~read path = 1374 + let read_rel = read in 1375 + (* Try v3 first *) 1376 + match read_rel "zarr.json" with 1377 + | Ok contents -> 1378 + (match decode_string v3_jsont contents with 1379 + | Ok node -> Ok { node = `V3 node; attrs = None } 1380 + | Error e -> Error (Printf.sprintf "zarr.json: %s" e)) 1381 + | Error _ -> 1382 + (* Try v2 .zarray *) 1383 + match read_rel ".zarray" with 1384 + | Ok contents -> 1385 + (match decode_string v2_array_jsont contents with 1386 + | Ok node -> 1387 + let attrs = match read_rel ".zattrs" with 1388 + | Ok a -> 1389 + (match decode_string attrs_jsont a with 1390 + | Ok a -> Some a | Error _ -> None) 1391 + | Error _ -> None 1392 + in 1393 + Ok { node = `V2 node; attrs } 1394 + | Error e -> Error (Printf.sprintf ".zarray: %s" e)) 1395 + | Error _ -> 1396 + (* Try v2 .zgroup *) 1397 + match read_rel ".zgroup" with 1398 + | Ok contents -> 1399 + (match decode_string v2_group_jsont contents with 1400 + | Ok node -> 1401 + let attrs = match read_rel ".zattrs" with 1402 + | Ok a -> 1403 + (match decode_string attrs_jsont a with 1404 + | Ok a -> Some a | Error _ -> None) 1405 + | Error _ -> None 1406 + in 1407 + Ok { node = `V2 node; attrs } 1408 + | Error e -> Error (Printf.sprintf ".zgroup: %s" e)) 1409 + | Error _ -> 1410 + Error (Printf.sprintf 1411 + "%s: no zarr metadata found (tried zarr.json, .zarray, .zgroup)" 1412 + path)
+28 -7
src/zarr_jsont.mli
··· 1 1 (** Jsont codecs for Zarr v2 and v3 metadata. *) 2 2 3 - (** Polymorphic fill value shared between Zarr v2 and v3 array metadata. 4 - 5 - Encodes the value stored in unwritten or missing chunks. The JSON 6 - representation is flexible: null, boolean, integer number, floating-point 7 - number (including the special strings ["NaN"], ["Infinity"], 8 - ["-Infinity"]), a two-element array for complex numbers, or an integer 9 - array (0–255) for raw bytes. *) 3 + (** Fill value that encodes the value stored in unwritten or missing chunks. *) 10 4 type fill_value = [ 11 5 | `Null 12 6 | `Bool of bool ··· 428 422 field and dispatching to {!v2_array_jsont}, {!v2_group_jsont}, or 429 423 {!v3_jsont} as appropriate. V2 arrays are distinguished from v2 groups 430 424 by the presence of a ["shape"] field. *) 425 + 426 + (** {1 Store probing} *) 427 + 428 + type probe_result = { 429 + node : t; 430 + attrs : Attrs.t option; 431 + (** For v2, the separately-fetched [.zattrs] if present. For v3, [None] 432 + (attributes are inline in [zarr.json]). *) 433 + } 434 + (** The result of probing a zarr store path. *) 435 + 436 + val probe : 437 + read:(string -> (string, string) result) -> 438 + string -> 439 + (probe_result, string) result 440 + (** [probe ~read path] probes the zarr store rooted at [path]. 441 + 442 + [read relpath] is called to fetch the contents of a file at 443 + [path/relpath]. It should return [Ok contents] on success or 444 + [Error msg] if the file does not exist or cannot be read. 445 + 446 + The probing order is: 447 + + [zarr.json] (v3) 448 + + [.zarray] (v2 array), with optional [.zattrs] 449 + + [.zgroup] (v2 group), with optional [.zattrs] 450 + 451 + Returns [Error] if none of the expected files can be read. *)