OCaml Zarr jsont codecs for v2/v3 and common conventions
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

refactor: dtype codec uses proper jsont error handling

- parse_typestr returns (dtype, string) result instead of raising
- Simple dtype uses Jsont.of_of_string (result-based, proper errors)
- Structured dtype uses Jsont.Error.msgf for decode errors
- decode_field and decode_shape return result types
- No more invalid_arg or manual JSON AST matching in dtype codec

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

+100 -96
+100 -96
src/zarr_jsont.ml
··· 109 109 | `Structured of (string * dtype * int list option) list 110 110 ] 111 111 112 - let parse_endian = function 113 - | '<' -> `Little 114 - | '>' -> `Big 115 - | '|' | '=' -> `Not_applicable 116 - | c -> invalid_arg (Printf.sprintf "dtype: unknown endian char %c" c) 112 + let parse_endian c = 113 + match c with 114 + | '<' -> Ok `Little 115 + | '>' -> Ok `Big 116 + | '|' | '=' -> Ok `Not_applicable 117 + | c -> Error (Printf.sprintf "unknown endian char '%c'" c) 117 118 118 119 let endian_char = function 119 - | `Little -> '<' 120 - | `Big -> '>' 121 - | `Not_applicable -> '|' 120 + | `Little -> '<' | `Big -> '>' | `Not_applicable -> '|' 122 121 123 - (* Parse a NumPy typestr like "<f8", "|b1", "<M8[ns]", "|S10", etc. *) 124 - let parse_typestr s = 125 - if String.length s < 3 then 126 - invalid_arg (Printf.sprintf "dtype: typestr too short: %s" s); 127 - let endian = parse_endian s.[0] in 128 - let kind = s.[1] in 129 - let rest = String.sub s 2 (String.length s - 2) in 130 - match kind with 131 - | 'b' -> 132 - let n = int_of_string rest in 133 - if n = 1 then `Bool 134 - else invalid_arg (Printf.sprintf "dtype: invalid bool size %d" n) 135 - | 'i' -> `Int (endian, int_of_string rest) 136 - | 'u' -> `Uint (endian, int_of_string rest) 137 - | 'f' -> `Float (endian, int_of_string rest) 138 - | 'c' -> `Complex (endian, int_of_string rest) 139 - | 'M' -> 140 - (* e.g. "8[ns]" *) 141 - let unit_str = 142 - if String.length rest > 2 && rest.[0] = '8' && rest.[1] = '[' then begin 143 - let close = String.index rest ']' in 144 - String.sub rest 2 (close - 2) 145 - end else invalid_arg (Printf.sprintf "dtype: invalid datetime typestr: %s" s) 146 - in 147 - `Datetime (endian, unit_str) 148 - | 'm' -> 149 - let unit_str = 150 - if String.length rest > 2 && rest.[0] = '8' && rest.[1] = '[' then begin 151 - let close = String.index rest ']' in 152 - String.sub rest 2 (close - 2) 153 - end else invalid_arg (Printf.sprintf "dtype: invalid timedelta typestr: %s" s) 154 - in 155 - `Timedelta (endian, unit_str) 156 - | 'S' -> `String (int_of_string rest) 157 - | 'U' -> `Unicode (endian, int_of_string rest) 158 - | 'V' -> `Raw (int_of_string rest) 159 - | c -> invalid_arg (Printf.sprintf "dtype: unknown kind char %c" c) 122 + let parse_typestr s : (dtype, string) result = 123 + if String.length s < 3 then Error (Printf.sprintf "typestr too short: %s" s) 124 + else 125 + match parse_endian s.[0] with 126 + | Error _ as e -> e 127 + | Ok endian -> 128 + let kind = s.[1] in 129 + let rest = String.sub s 2 (String.length s - 2) in 130 + match kind with 131 + | 'b' -> 132 + let n = int_of_string rest in 133 + if n = 1 then Ok `Bool 134 + else Error (Printf.sprintf "invalid bool size %d" n) 135 + | 'i' -> Ok (`Int (endian, int_of_string rest)) 136 + | 'u' -> Ok (`Uint (endian, int_of_string rest)) 137 + | 'f' -> Ok (`Float (endian, int_of_string rest)) 138 + | 'c' -> Ok (`Complex (endian, int_of_string rest)) 139 + | 'M' -> 140 + if String.length rest > 2 && rest.[0] = '8' && rest.[1] = '[' then 141 + let close = String.index rest ']' in 142 + Ok (`Datetime (endian, String.sub rest 2 (close - 2))) 143 + else Error (Printf.sprintf "invalid datetime typestr: %s" s) 144 + | 'm' -> 145 + if String.length rest > 2 && rest.[0] = '8' && rest.[1] = '[' then 146 + let close = String.index rest ']' in 147 + Ok (`Timedelta (endian, String.sub rest 2 (close - 2))) 148 + else Error (Printf.sprintf "invalid timedelta typestr: %s" s) 149 + | 'S' -> Ok (`String (int_of_string rest)) 150 + | 'U' -> Ok (`Unicode (endian, int_of_string rest)) 151 + | 'V' -> Ok (`Raw (int_of_string rest)) 152 + | c -> Error (Printf.sprintf "unknown dtype kind '%c'" c) 160 153 161 154 let encode_typestr (dt : dtype) : string = 162 155 match dt with ··· 170 163 | `String n -> Printf.sprintf "|S%d" n 171 164 | `Unicode (e, n) -> Printf.sprintf "%cU%d" (endian_char e) n 172 165 | `Raw n -> Printf.sprintf "|V%d" n 173 - | `Structured _ -> invalid_arg "dtype: encode_typestr called on structured dtype" 166 + | `Structured _ -> assert false (* structured uses array encoding *) 167 + 168 + (* Structured dtype field: ["name", "<f4"] or ["name", "<f4", [3]] *) 169 + let decode_shape items = 170 + let rec aux = function 171 + | [] -> Ok [] 172 + | Jsont.Number (f, _) :: rest -> 173 + (match aux rest with Ok tl -> Ok (int_of_float f :: tl) | e -> e) 174 + | _ -> Error "expected integer in shape" 175 + in 176 + aux items 177 + 178 + let decode_field self (json_items : Jsont.json list) 179 + : (string * dtype * int list option, string) result = 180 + let decode_dt = function 181 + | Jsont.String (typestr, _) -> parse_typestr typestr 182 + | Jsont.Array _ as nested -> 183 + Jsont.Json.decode self nested 184 + | _ -> Error "expected string or array for dtype" 185 + in 186 + match json_items with 187 + | Jsont.String (name, _) :: dt_json :: rest -> 188 + (match decode_dt dt_json with 189 + | Error _ as e -> e 190 + | Ok dt -> 191 + match rest with 192 + | [] -> Ok (name, dt, None) 193 + | [Jsont.Array (shape_items, _)] -> 194 + (match decode_shape shape_items with 195 + | Ok s -> Ok (name, dt, Some s) 196 + | Error e -> Error e) 197 + | _ -> Error "expected optional shape array as third element") 198 + | _ -> Error "field descriptor must start with a string name" 174 199 175 200 let rec dtype_jsont_lazy : dtype Jsont.t Lazy.t = lazy ( 176 201 let self = Jsont.rec' dtype_jsont_lazy in 177 202 let simple_codec = 178 - Jsont.map ~kind:"dtype_string" 179 - ~dec:parse_typestr 180 - ~enc:encode_typestr 181 - Jsont.string 203 + Jsont.of_of_string ~kind:"dtype" parse_typestr ~enc:encode_typestr 182 204 in 183 - let decode_shape items = 184 - List.map (function 185 - | Jsont.Number (f, _) -> int_of_float f 186 - | j -> invalid_arg (Format.asprintf "dtype: expected int in shape, got %a" Jsont.pp_json j)) 187 - items 188 - in 189 - let decode_nested json = 190 - match Jsont.Json.decode self json with 191 - | Ok v -> v 192 - | Error e -> invalid_arg (Printf.sprintf "dtype: nested structured decode error: %s" e) 193 - in 194 - let encode_nested dt = 195 - match Jsont.Json.encode self dt with 196 - | Ok j -> j 197 - | Error e -> invalid_arg (Printf.sprintf "dtype: encode error: %s" e) 205 + let encode_field (name, dt, shape_opt) = 206 + let name_json = Jsont.Json.string name in 207 + let dtype_json = match dt with 208 + | `Structured _ -> 209 + (match Jsont.Json.encode self dt with 210 + | Ok j -> j 211 + | Error e -> Jsont.Error.msg Jsont.Meta.none e) 212 + | _ -> Jsont.Json.string (encode_typestr dt) 213 + in 214 + match shape_opt with 215 + | None -> Jsont.Json.list [ name_json; dtype_json ] 216 + | Some shape -> 217 + Jsont.Json.list [ name_json; dtype_json; 218 + Jsont.Json.list (List.map (fun n -> Jsont.Json.number (float_of_int n)) shape) ] 198 219 in 199 - let decode_field (json_items : Jsont.json list) : string * dtype * int list option = 200 - match json_items with 201 - | [ Jsont.String (name, _); Jsont.String (typestr, _) ] -> 202 - (name, parse_typestr typestr, None) 203 - | [ Jsont.String (name, _); Jsont.String (typestr, _); Jsont.Array (shape_items, _) ] -> 204 - (name, parse_typestr typestr, Some (decode_shape shape_items)) 205 - | [ Jsont.String (name, _); (Jsont.Array _ as nested_json) ] -> 206 - (name, decode_nested nested_json, None) 207 - | [ Jsont.String (name, _); (Jsont.Array _ as nested_json); Jsont.Array (shape_items, _) ] -> 208 - (name, decode_nested nested_json, Some (decode_shape shape_items)) 209 - | _ -> invalid_arg "dtype: invalid field descriptor" 220 + let decode_fields fields_json = 221 + let rec aux = function 222 + | [] -> Ok [] 223 + | Jsont.Array (items, _) :: rest -> 224 + Result.bind (decode_field self items) (fun field -> 225 + Result.map (fun tl -> field :: tl) (aux rest)) 226 + | _ -> Error "expected array for each field descriptor" 227 + in 228 + match aux fields_json with 229 + | Ok fields -> (`Structured fields : dtype) 230 + | Error e -> Jsont.Error.msgf Jsont.Meta.none "dtype: %s" e 210 231 in 211 232 let structured_codec = 212 - Jsont.map ~kind:"dtype_array" 213 - ~dec:(fun (fields_json : Jsont.json list) -> 214 - let fields = List.map (function 215 - | Jsont.Array (items, _) -> decode_field items 216 - | j -> invalid_arg (Format.asprintf "dtype: expected array field descriptor, got %a" Jsont.pp_json j)) 217 - fields_json 218 - in 219 - `Structured fields) 233 + Jsont.map ~kind:"dtype" 234 + ~dec:decode_fields 220 235 ~enc:(function 221 - | `Structured fields -> 222 - List.map (fun (name, dt, shape_opt) -> 223 - let name_json = Jsont.Json.string name in 224 - let dtype_json = encode_nested dt in 225 - match shape_opt with 226 - | None -> Jsont.Json.list [ name_json; dtype_json ] 227 - | Some shape -> 228 - let shape_json = Jsont.Json.list 229 - (List.map (fun n -> Jsont.Json.number (float_of_int n)) shape) 230 - in 231 - Jsont.Json.list [ name_json; dtype_json; shape_json ]) 232 - fields 236 + | `Structured fields -> List.map encode_field fields 233 237 | _ -> assert false) 234 238 (Jsont.list Jsont.json) 235 239 in