···8989 | `Bytes _ -> array_codec)
9090 ()
91919292+type endian = [ `Little | `Big | `Not_applicable ]
9393+9494+type dtype = [
9595+ | `Bool
9696+ | `Int of endian * int
9797+ | `Uint of endian * int
9898+ | `Float of endian * int
9999+ | `Complex of endian * int
100100+ | `Timedelta of endian * string
101101+ | `Datetime of endian * string
102102+ | `String of int
103103+ | `Unicode of endian * int
104104+ | `Raw of int
105105+ | `Structured of (string * dtype * int list option) list
106106+]
107107+108108+let parse_endian = function
109109+ | '<' -> `Little
110110+ | '>' -> `Big
111111+ | '|' | '=' -> `Not_applicable
112112+ | c -> failwith (Printf.sprintf "dtype: unknown endian char %c" c)
113113+114114+let endian_char = function
115115+ | `Little -> '<'
116116+ | `Big -> '>'
117117+ | `Not_applicable -> '|'
118118+119119+(* Parse a NumPy typestr like "<f8", "|b1", "<M8[ns]", "|S10", etc. *)
120120+let parse_typestr s =
121121+ if String.length s < 3 then
122122+ failwith (Printf.sprintf "dtype: typestr too short: %s" s);
123123+ let endian = parse_endian s.[0] in
124124+ let kind = s.[1] in
125125+ let rest = String.sub s 2 (String.length s - 2) in
126126+ match kind with
127127+ | 'b' ->
128128+ let n = int_of_string rest in
129129+ if n = 1 then `Bool
130130+ else failwith (Printf.sprintf "dtype: invalid bool size %d" n)
131131+ | 'i' -> `Int (endian, int_of_string rest)
132132+ | 'u' -> `Uint (endian, int_of_string rest)
133133+ | 'f' -> `Float (endian, int_of_string rest)
134134+ | 'c' -> `Complex (endian, int_of_string rest)
135135+ | 'M' ->
136136+ (* e.g. "8[ns]" *)
137137+ let unit_str =
138138+ if String.length rest > 2 && rest.[0] = '8' && rest.[1] = '[' then begin
139139+ let close = String.index rest ']' in
140140+ String.sub rest 2 (close - 2)
141141+ end else failwith (Printf.sprintf "dtype: invalid datetime typestr: %s" s)
142142+ in
143143+ `Datetime (endian, unit_str)
144144+ | 'm' ->
145145+ let unit_str =
146146+ if String.length rest > 2 && rest.[0] = '8' && rest.[1] = '[' then begin
147147+ let close = String.index rest ']' in
148148+ String.sub rest 2 (close - 2)
149149+ end else failwith (Printf.sprintf "dtype: invalid timedelta typestr: %s" s)
150150+ in
151151+ `Timedelta (endian, unit_str)
152152+ | 'S' -> `String (int_of_string rest)
153153+ | 'U' -> `Unicode (endian, int_of_string rest)
154154+ | 'V' -> `Raw (int_of_string rest)
155155+ | c -> failwith (Printf.sprintf "dtype: unknown kind char %c" c)
156156+157157+let encode_typestr (dt : dtype) : string =
158158+ match dt with
159159+ | `Bool -> "|b1"
160160+ | `Int (e, n) -> Printf.sprintf "%ci%d" (endian_char e) n
161161+ | `Uint (e, n) -> Printf.sprintf "%cu%d" (endian_char e) n
162162+ | `Float (e, n) -> Printf.sprintf "%cf%d" (endian_char e) n
163163+ | `Complex (e, n) -> Printf.sprintf "%cc%d" (endian_char e) n
164164+ | `Datetime (e, u) -> Printf.sprintf "%cM8[%s]" (endian_char e) u
165165+ | `Timedelta (e, u) -> Printf.sprintf "%cm8[%s]" (endian_char e) u
166166+ | `String n -> Printf.sprintf "|S%d" n
167167+ | `Unicode (e, n) -> Printf.sprintf "%cU%d" (endian_char e) n
168168+ | `Raw n -> Printf.sprintf "|V%d" n
169169+ | `Structured _ -> failwith "dtype: encode_typestr called on structured dtype"
170170+171171+(* Forward reference to allow recursive dtype_jsont. *)
172172+let dtype_jsont_fwd : dtype Jsont.t ref = ref (Jsont.todo ~kind:"dtype" ())
173173+174174+let dtype_jsont : dtype Jsont.t = Jsont.rec' (lazy (
175175+ let simple_codec =
176176+ Jsont.map ~kind:"dtype_string"
177177+ ~dec:parse_typestr
178178+ ~enc:encode_typestr
179179+ Jsont.string
180180+ in
181181+ let decode_shape items =
182182+ List.map (function
183183+ | Jsont.Number (f, _) -> int_of_float f
184184+ | j -> failwith (Format.asprintf "dtype: expected int in shape, got %a" Jsont.pp_json j))
185185+ items
186186+ in
187187+ (* Decode a single field descriptor from a JSON list:
188188+ ["name", "<f4"] or ["name", "<f4", [3, 2]] *)
189189+ let decode_field (json_items : Jsont.json list) : string * dtype * int list option =
190190+ match json_items with
191191+ | [ Jsont.String (name, _); Jsont.String (typestr, _) ] ->
192192+ (name, parse_typestr typestr, None)
193193+ | [ Jsont.String (name, _); Jsont.String (typestr, _); Jsont.Array (shape_items, _) ] ->
194194+ (name, parse_typestr typestr, Some (decode_shape shape_items))
195195+ | [ Jsont.String (name, _); (Jsont.Array _ as nested_json) ] ->
196196+ let nested = match Jsont.Json.decode !dtype_jsont_fwd nested_json with
197197+ | Ok v -> v
198198+ | Error e -> failwith (Printf.sprintf "dtype: nested structured decode error: %s" e)
199199+ in
200200+ (name, nested, None)
201201+ | [ Jsont.String (name, _); (Jsont.Array _ as nested_json); Jsont.Array (shape_items, _) ] ->
202202+ let nested = match Jsont.Json.decode !dtype_jsont_fwd nested_json with
203203+ | Ok v -> v
204204+ | Error e -> failwith (Printf.sprintf "dtype: nested structured decode error: %s" e)
205205+ in
206206+ (name, nested, Some (decode_shape shape_items))
207207+ | _ -> failwith "dtype: invalid field descriptor"
208208+ in
209209+ let structured_codec =
210210+ Jsont.map ~kind:"dtype_array"
211211+ ~dec:(fun (fields_json : Jsont.json list) ->
212212+ let fields = List.map (function
213213+ | Jsont.Array (items, _) -> decode_field items
214214+ | j -> failwith (Format.asprintf "dtype: expected array field descriptor, got %a" Jsont.pp_json j))
215215+ fields_json
216216+ in
217217+ `Structured fields)
218218+ ~enc:(function
219219+ | `Structured fields ->
220220+ List.map (fun (name, dt, shape_opt) ->
221221+ let name_json = Jsont.Json.string name in
222222+ let dtype_json = match Jsont.Json.encode !dtype_jsont_fwd dt with
223223+ | Ok j -> j
224224+ | Error e -> failwith (Printf.sprintf "dtype: encode error: %s" e)
225225+ in
226226+ match shape_opt with
227227+ | None -> Jsont.Json.list [ name_json; dtype_json ]
228228+ | Some shape ->
229229+ let shape_json = Jsont.Json.list
230230+ (List.map (fun n -> Jsont.Json.number (float_of_int n)) shape)
231231+ in
232232+ Jsont.Json.list [ name_json; dtype_json; shape_json ])
233233+ fields
234234+ | _ -> assert false)
235235+ (Jsont.list Jsont.json)
236236+ in
237237+ Jsont.any ~kind:"dtype"
238238+ ~dec_string:simple_codec
239239+ ~dec_array:structured_codec
240240+ ~enc:(function
241241+ | `Structured _ -> structured_codec
242242+ | _ -> simple_codec)
243243+ ()
244244+))
245245+246246+let () = dtype_jsont_fwd := dtype_jsont
247247+92248module Other_ext = struct
93249 type t = { name : string; configuration : Jsont.json option; must_understand : bool }
94250
+27
src/zarr_jsont.mli
···1919val fill_value_jsont : fill_value Jsont.t
2020(** Codec for {!fill_value}. Dispatches on the JSON sort via {!Jsont.any}. *)
21212222+(** Byte order of a NumPy array dtype. *)
2323+type endian = [ `Little | `Big | `Not_applicable ]
2424+2525+(** NumPy array dtype as used in Zarr v2 array metadata [".zarray"].
2626+2727+ Simple types are encoded as JSON strings in NumPy typestr format (e.g.
2828+ ["<f8"], ["|b1"]). Structured types are encoded as JSON arrays of field
2929+ descriptors, each of the form [["name","<dtype_str"]] or
3030+ [["name","<dtype_str",[dim1,...]]]. *)
3131+type dtype = [
3232+ | `Bool
3333+ | `Int of endian * int
3434+ | `Uint of endian * int
3535+ | `Float of endian * int
3636+ | `Complex of endian * int
3737+ | `Timedelta of endian * string
3838+ | `Datetime of endian * string
3939+ | `String of int
4040+ | `Unicode of endian * int
4141+ | `Raw of int
4242+ | `Structured of (string * dtype * int list option) list
4343+]
4444+4545+val dtype_jsont : dtype Jsont.t
4646+(** Codec for {!dtype}. Simple types decode/encode as JSON strings;
4747+ structured types decode/encode as JSON arrays. *)
4848+2249(** Catch-all type for unrecognized v2 codecs.
23502451 Represents objects with an ["id"] key plus arbitrary extra fields,
+53
test/test_zarr_jsont.ml
···5555 (match v with `Bytes s -> assert (String.length s = 3) | _ -> assert false);
5656 print_endline "test_fill_value: ok"
57575858+let test_dtype () =
5959+ let dt = Zarr_jsont.dtype_jsont in
6060+ (* simple float *)
6161+ let v = decode dt {|"<f8"|} in
6262+ assert (v = `Float (`Little, 8));
6363+ (* big-endian int *)
6464+ let v = decode dt {|">i4"|} in
6565+ assert (v = `Int (`Big, 4));
6666+ (* boolean *)
6767+ let v = decode dt {|"|b1"|} in
6868+ assert (v = `Bool);
6969+ (* unsigned int *)
7070+ let v = decode dt {|"<u2"|} in
7171+ assert (v = `Uint (`Little, 2));
7272+ (* complex *)
7373+ let v = decode dt {|"<c16"|} in
7474+ assert (v = `Complex (`Little, 16));
7575+ (* datetime *)
7676+ let v = decode dt {|"<M8[ns]"|} in
7777+ assert (v = `Datetime (`Little, "ns"));
7878+ (* timedelta *)
7979+ let v = decode dt {|"<m8[s]"|} in
8080+ assert (v = `Timedelta (`Little, "s"));
8181+ (* fixed string *)
8282+ let v = decode dt {|"|S10"|} in
8383+ assert (v = `String 10);
8484+ (* unicode *)
8585+ let v = decode dt {|"<U5"|} in
8686+ assert (v = `Unicode (`Little, 5));
8787+ (* void/raw *)
8888+ let v = decode dt {|"|V16"|} in
8989+ assert (v = `Raw 16);
9090+ (* structured *)
9191+ let v = decode dt {|[["x","<f4"],["y","<f4",[3]]]|} in
9292+ (match v with
9393+ | `Structured fields ->
9494+ assert (List.length fields = 2);
9595+ let (n1, t1, s1) = List.nth fields 0 in
9696+ assert (n1 = "x" && t1 = `Float (`Little, 4) && s1 = None);
9797+ let (n2, t2, s2) = List.nth fields 1 in
9898+ assert (n2 = "y" && t2 = `Float (`Little, 4) && s2 = Some [3])
9999+ | _ -> assert false);
100100+ (* roundtrip simple *)
101101+ let json' = encode dt (`Float (`Little, 8)) in
102102+ assert (decode dt json' = `Float (`Little, 8));
103103+ (* roundtrip structured *)
104104+ let s = `Structured [("x", `Float (`Little, 4), None);
105105+ ("y", `Int (`Big, 2), Some [3; 2])] in
106106+ let json' = encode dt s in
107107+ assert (decode dt json' = s);
108108+ print_endline "test_dtype: ok"
109109+58110let () = test_other_codec ()
59111let () = test_other_ext ()
60112let () = test_fill_value ()
113113+let () = test_dtype ()