Declarative CSV codecs
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

csvt: unified GADT with String/Obj/Map cases (soup paper)

Replace separate col_codec and record-based t with a single GADT
following the recipe from Bünzli's "An alphabet for your data soups":
String for fields, Obj for rows, Map for composition. Column values
are decoded into a heterogeneous Dict keyed by Type.Id.t witnesses
and applied to the constructor via dec_fun — fully type-safe with
no Obj.repr. Also adds decode_field/encode_field and get_col query.

+266 -273
+182 -167
lib/csvt.ml
··· 3 3 SPDX-License-Identifier: ISC 4 4 ---------------------------------------------------------------------------*) 5 5 6 - (** Declarative CSV codecs. *) 6 + (** Declarative CSV codecs. 7 + 8 + Following the finally-tagged representation from Bünzli's "An alphabet for 9 + your data soups". *) 7 10 8 11 (* {1 Errors} *) 9 12 ··· 26 29 let pp_error ppf e = Format.pp_print_string ppf (error_to_string e) 27 30 let ( let* ) = Result.bind 28 31 29 - (* {1 Column codecs} *) 32 + (* {1 Data types} 30 33 31 - type ('a, 'b) col_base_map = { 32 - col_dec : string -> ('b, string) result; 33 - col_enc : 'b -> string; 34 - } 34 + A single GADT [t] with one case per sort of CSV data: 35 35 36 - type _ col_codec = 37 - | String_col : string col_codec 38 - | Int_col : int col_codec 39 - | Float_col : float col_codec 40 - | Bool_col : bool col_codec 41 - | Nullable_float_col : float col_codec 42 - | Nullable_int_col : int col_codec 43 - | Option_col : 'a col_codec -> 'a option col_codec 44 - | Col_map : ('a, 'b) col_base_map * string -> 'b col_codec 36 + - [String] — base case for CSV fields (all CSV values are strings) 37 + - [Obj] — maps a CSV row (named columns) to an ML value 38 + - [Map] — composes CSV types 45 39 46 - let is_null s = String.equal s "NULL" || String.equal s "" 40 + Column values are decoded into a heterogeneous [Dict] keyed by 41 + [Type.Id.t] witnesses, then applied to the constructor via [apply_dict]. *) 47 42 48 - let rec col_decode : type a. a col_codec -> string -> (a, string) result = 49 - fun c s -> 50 - match c with 51 - | String_col -> Ok s 52 - | Int_col -> ( 53 - match int_of_string_opt s with 54 - | Some i -> Ok i 55 - | None -> Error "not an integer") 56 - | Float_col -> ( 57 - match float_of_string_opt s with 58 - | Some f -> Ok f 59 - | None -> Error "not a float") 60 - | Bool_col -> ( 61 - match String.lowercase_ascii s with 62 - | "true" | "1" | "yes" -> Ok true 63 - | "false" | "0" | "no" -> Ok false 64 - | _ -> Error "not a boolean") 65 - | Nullable_float_col -> ( 66 - if is_null s then Ok Float.nan 67 - else 68 - match float_of_string_opt s with 69 - | Some f -> Ok f 70 - | None -> Error "not a float") 71 - | Nullable_int_col -> ( 72 - if is_null s then Ok (-1) 73 - else 74 - match int_of_string_opt s with 75 - | Some i -> Ok i 76 - | None -> Error "not an integer") 77 - | Option_col inner -> 78 - if is_null s then Ok None else Result.map Option.some (col_decode inner s) 79 - | Col_map (m, _) -> m.col_dec s 43 + type ('a, 'b) base_map = { dec : 'a -> ('b, string) result; enc : 'b -> 'a } 80 44 81 - let rec col_encode : type a. a col_codec -> a -> string = 82 - fun c v -> 83 - match c with 84 - | String_col -> v 85 - | Int_col -> string_of_int v 86 - | Float_col -> string_of_float v 87 - | Bool_col -> string_of_bool v 88 - | Nullable_float_col -> if Float.is_nan v then "NULL" else string_of_float v 89 - | Nullable_int_col -> if v = -1 then "NULL" else string_of_int v 90 - | Option_col inner -> ( 91 - match v with None -> "NULL" | Some x -> col_encode inner x) 92 - | Col_map (m, _) -> m.col_enc v 45 + type ('ret, 'f) dec_fun = 46 + | Dec_fun : 'f -> ('ret, 'f) dec_fun 47 + | Dec_app : ('ret, 'a -> 'b) dec_fun * 'a Type.Id.t -> ('ret, 'b) dec_fun 93 48 94 - let rec col_kind : type a. a col_codec -> string = function 95 - | String_col -> "string" 96 - | Int_col -> "int" 97 - | Float_col -> "float" 98 - | Bool_col -> "bool" 99 - | Nullable_float_col -> "nullable_float" 100 - | Nullable_int_col -> "nullable_int" 101 - | Option_col inner -> "option(" ^ col_kind inner ^ ")" 102 - | Col_map (_, kind) -> kind 49 + type ('o, 'a) mem_map = { 50 + name : string; 51 + type' : 'a t; 52 + id : 'a Type.Id.t; 53 + dec_absent : 'a option; 54 + enc : 'o -> 'a; 55 + } 103 56 104 - let col_map ?(kind = "custom") ~dec ~enc () = 105 - Col_map ({ col_dec = dec; col_enc = enc }, kind) 57 + and mem_dec = Mem_dec : ('o, 'a) mem_map -> mem_dec 58 + and 'o mem_enc = Mem_enc : ('o, 'a) mem_map -> 'o mem_enc 106 59 107 - let string = String_col 108 - let int = Int_col 109 - let float = Float_col 110 - let bool = Bool_col 111 - let nullable_float = Nullable_float_col 112 - let nullable_int = Nullable_int_col 113 - let option c = Option_col c 60 + and ('o, 'dec) obj_map = { 61 + dec : ('o, 'dec) dec_fun; 62 + mem_decs : mem_dec list; 63 + mem_encs : 'o mem_enc list; 64 + } 114 65 115 - (* {1 Row codec internals} 66 + and ('a, 'b) map = { dom : 'a t; map : ('a, 'b) base_map } 116 67 117 - Following the finally-tagged representation from Bünzli's "An alphabet 118 - for your data soups". We use a [dec_fun] GADT to represent the 119 - partially-applied constructor with [Type.Id.t] witnesses as placeholders 120 - for argument values, and a heterogeneous [Dict] to collect decoded 121 - column values. This is fully type-safe — no [Obj.repr] needed. *) 68 + and _ t = 69 + | String : (string, 'a) base_map -> 'a t 70 + | Obj : ('o, 'o) obj_map -> 'o t 71 + | Map : ('a, 'b) map -> 'b t 122 72 123 - (* {2 Heterogeneous dictionary} *) 73 + (* {1 Heterogeneous dictionary} *) 124 74 125 75 module Dict = struct 126 76 module M = Map.Make (Int) ··· 141 91 | None -> assert false) 142 92 end 143 93 144 - (* {2 Constructor GADT} *) 145 - 146 - type ('ret, 'f) dec_fun = 147 - | Dec_fun : 'f -> ('ret, 'f) dec_fun 148 - | Dec_app : ('ret, 'a -> 'b) dec_fun * 'a Type.Id.t -> ('ret, 'b) dec_fun 149 - 150 94 let rec apply_dict : type ret f. (ret, f) dec_fun -> Dict.t -> f = 151 95 fun dec dict -> 152 96 match dec with 153 97 | Dec_fun f -> f 154 98 | Dec_app (f, arg) -> (apply_dict f dict) (Option.get (Dict.find arg dict)) 155 99 156 - (* {2 Column maps} *) 100 + (* {1 Decode and encode} 101 + 102 + The CSV equivalents of the paper's [decode] and [encode] functions, 103 + recursing on the [t] GADT. *) 104 + 105 + let rec decode_field : type a. a t -> string -> (a, string) result = 106 + fun t s -> 107 + match t with 108 + | String m -> m.dec s 109 + | Map m -> 110 + let* v = decode_field m.dom s in 111 + m.map.dec v 112 + | Obj _ -> Error "cannot decode a row from a single field" 113 + 114 + let rec encode_field : type a. a t -> a -> string = 115 + fun t v -> 116 + match t with 117 + | String m -> m.enc v 118 + | Map m -> encode_field m.dom (m.map.enc v) 119 + | Obj _ -> invalid_arg "Csvt: cannot encode a row to a single field" 120 + 121 + (* {1 Base codecs} *) 122 + 123 + let string : string t = String { dec = (fun s -> Ok s); enc = Fun.id } 124 + 125 + let int : int t = 126 + String 127 + { 128 + dec = 129 + (fun s -> 130 + match int_of_string_opt s with 131 + | Some i -> Ok i 132 + | None -> Error "not an integer"); 133 + enc = string_of_int; 134 + } 135 + 136 + let float : float t = 137 + String 138 + { 139 + dec = 140 + (fun s -> 141 + match float_of_string_opt s with 142 + | Some f -> Ok f 143 + | None -> Error "not a float"); 144 + enc = string_of_float; 145 + } 146 + 147 + let bool : bool t = 148 + String 149 + { 150 + dec = 151 + (fun s -> 152 + match String.lowercase_ascii s with 153 + | "true" | "1" | "yes" -> Ok true 154 + | "false" | "0" | "no" -> Ok false 155 + | _ -> Error "not a boolean"); 156 + enc = string_of_bool; 157 + } 157 158 158 - type ('o, 'a) col_map = { 159 - name : string; 160 - type' : 'a col_codec; 161 - id : 'a Type.Id.t; 162 - dec_absent : 'a option; 163 - enc : 'o -> 'a; 164 - } 159 + let is_null s = String.equal s "NULL" || String.equal s "" 165 160 166 - type col_dec = Col_dec : ('o, 'a) col_map -> col_dec 167 - type 'o col_enc = Col_enc : ('o, 'a) col_map -> 'o col_enc 161 + let nullable_float : float t = 162 + String 163 + { 164 + dec = 165 + (fun s -> 166 + if is_null s then Ok Float.nan 167 + else 168 + match float_of_string_opt s with 169 + | Some f -> Ok f 170 + | None -> Error "not a float"); 171 + enc = (fun f -> if Float.is_nan f then "NULL" else string_of_float f); 172 + } 173 + 174 + let nullable_int : int t = 175 + String 176 + { 177 + dec = 178 + (fun s -> 179 + if is_null s then Ok (-1) 180 + else 181 + match int_of_string_opt s with 182 + | Some i -> Ok i 183 + | None -> Error "not an integer"); 184 + enc = (fun i -> if i = -1 then "NULL" else string_of_int i); 185 + } 168 186 169 - (* {1 Row codec} *) 187 + let option t = 188 + String 189 + { 190 + dec = 191 + (fun s -> 192 + if is_null s then Ok None 193 + else Result.map Option.some (decode_field t s)); 194 + enc = (function None -> "NULL" | Some v -> encode_field t v); 195 + } 170 196 171 - type 'a t = { 172 - kind : string; 173 - dec : ('a, 'a) dec_fun; 174 - col_decs : col_dec list; 175 - col_encs : 'a col_enc list; 176 - } 197 + let col_map ~dec ~enc () = String { dec; enc } 177 198 178 199 (* {1 Row builder} *) 179 200 180 201 module Row = struct 181 202 type 'a codec = 'a t 203 + type nonrec ('o, 'dec) map = ('o, 'dec) obj_map 182 204 183 - type ('o, 'dec) map = { 184 - m_kind : string; 185 - m_dec : ('o, 'dec) dec_fun; 186 - m_col_decs : col_dec list; (* reversed *) 187 - m_col_encs : 'o col_enc list; (* reversed *) 188 - } 205 + let obj dec = { dec = Dec_fun dec; mem_decs = []; mem_encs = [] } 189 206 190 - let obj ?(kind = "row") dec = 191 - { m_kind = kind; m_dec = Dec_fun dec; m_col_decs = []; m_col_encs = [] } 192 - 193 - let col ?doc:_ ?dec_absent name type' ~enc m = 207 + let col ?dec_absent name type' ~enc m = 194 208 let id = Type.Id.make () in 195 - let cm = { name; type'; id; dec_absent; enc } in 209 + let mm = { name; type'; id; dec_absent; enc } in 196 210 { 197 - m_kind = m.m_kind; 198 - m_dec = Dec_app (m.m_dec, id); 199 - m_col_decs = Col_dec cm :: m.m_col_decs; 200 - m_col_encs = Col_enc cm :: m.m_col_encs; 211 + dec = Dec_app (m.dec, id); 212 + mem_decs = Mem_dec mm :: m.mem_decs; 213 + mem_encs = Mem_enc mm :: m.mem_encs; 201 214 } 202 215 203 - let finish m : _ t = 204 - { 205 - kind = m.m_kind; 206 - dec = m.m_dec; 207 - col_decs = List.rev m.m_col_decs; 208 - col_encs = List.rev m.m_col_encs; 209 - } 216 + let finish m = 217 + Obj 218 + { 219 + dec = m.dec; 220 + mem_decs = List.rev m.mem_decs; 221 + mem_encs = List.rev m.mem_encs; 222 + } 210 223 end 211 224 225 + let as_obj : type a. a t -> (a, a) obj_map = function 226 + | Obj m -> m 227 + | _ -> invalid_arg "Csvt: expected a row codec" 228 + 212 229 (* {1 Introspection} *) 213 230 214 - let col_names codec = List.map (fun (Col_dec cm) -> cm.name) codec.col_decs 215 - let col_count codec = List.length codec.col_decs 231 + let col_names t = 232 + let m = as_obj t in 233 + List.map (fun (Mem_dec mm) -> mm.name) m.mem_decs 216 234 217 - (* {1 Query support} *) 235 + let col_count t = 236 + let m = as_obj t in 237 + List.length m.mem_decs 218 238 219 - let get_col name (type a) (c : a col_codec) : a t = 220 - let id = Type.Id.make () in 221 - let cm = { name; type' = c; id; dec_absent = None; enc = Fun.id } in 222 - { 223 - kind = "get_col(" ^ name ^ ")"; 224 - dec = Dec_app (Dec_fun Fun.id, id); 225 - col_decs = [ Col_dec cm ]; 226 - col_encs = [ Col_enc cm ]; 227 - } 239 + (* {1 Query support} 240 + 241 + The paper's [get_mem] combinator, adapted for CSV. *) 242 + 243 + let get_col name t = Row.(obj Fun.id |> col name t ~enc:Fun.id |> finish) 228 244 229 245 (* {1 Header resolution} *) 230 246 ··· 232 248 233 249 type 'a resolved = { 234 250 r_indices : int array; 235 - r_cols : col_dec array; 251 + r_mems : mem_dec array; 236 252 r_dec : ('a, 'a) dec_fun; 237 253 } 238 254 ··· 246 262 in 247 263 go 0 248 264 249 - let resolve codec header = 250 - let cols = Array.of_list codec.col_decs in 251 - let n = Array.length cols in 265 + let resolve t header = 266 + let m = as_obj t in 267 + let mems = Array.of_list m.mem_decs in 268 + let n = Array.length mems in 252 269 let indices = Array.make n (-1) in 253 270 let rec go i = 254 - if i >= n then Ok { r_indices = indices; r_cols = cols; r_dec = codec.dec } 271 + if i >= n then Ok { r_indices = indices; r_mems = mems; r_dec = m.dec } 255 272 else 256 - let (Col_dec cm) = cols.(i) in 257 - let idx = find_col header cm.name in 273 + let (Mem_dec mm) = mems.(i) in 274 + let idx = find_col header mm.name in 258 275 indices.(i) <- idx; 259 - if idx < 0 && Option.is_none cm.dec_absent then 260 - Error (Missing_column cm.name) 276 + if idx < 0 && Option.is_none mm.dec_absent then 277 + Error (Missing_column mm.name) 261 278 else go (i + 1) 262 279 in 263 280 go 0 ··· 265 282 (* {1 Row decoding} *) 266 283 267 284 let decode_row resolved row_num fields = 268 - let n = Array.length resolved.r_cols in 285 + let n = Array.length resolved.r_mems in 269 286 let nf = Array.length fields in 270 287 let rec go i dict = 271 288 if i >= n then Ok (apply_dict resolved.r_dec dict) 272 289 else 273 - let (Col_dec cm) = resolved.r_cols.(i) in 290 + let (Mem_dec mm) = resolved.r_mems.(i) in 274 291 let idx = resolved.r_indices.(i) in 275 292 if idx < 0 then 276 - match cm.dec_absent with 277 - | Some v -> go (i + 1) (Dict.add cm.id v dict) 278 - | None -> Error (Missing_column cm.name) 293 + match mm.dec_absent with 294 + | Some v -> go (i + 1) (Dict.add mm.id v dict) 295 + | None -> Error (Missing_column mm.name) 279 296 else if idx >= nf then 280 297 Error (Truncated_row { row = row_num; expected = idx + 1; got = nf }) 281 298 else 282 299 let s = fields.(idx) in 283 - match col_decode cm.type' s with 284 - | Ok v -> go (i + 1) (Dict.add cm.id v dict) 300 + match decode_field mm.type' s with 301 + | Ok v -> go (i + 1) (Dict.add mm.id v dict) 285 302 | Error msg -> 286 303 Error 287 - (Bad_value { row = row_num; column = cm.name; value = s; msg }) 304 + (Bad_value { row = row_num; column = mm.name; value = s; msg }) 288 305 in 289 306 go 0 Dict.empty 290 307 291 308 (* {1 Row encoding} *) 292 309 293 - let encode_header codec = 294 - Array.of_list (List.map (fun (Col_enc cm) -> cm.name) codec.col_encs) 310 + let encode_header t = 311 + let m = as_obj t in 312 + Array.of_list (List.map (fun (Mem_enc mm) -> mm.name) m.mem_encs) 295 313 296 - let encode_row codec v = 314 + let encode_row t v = 315 + let m = as_obj t in 297 316 Array.of_list 298 - (List.map 299 - (fun (Col_enc cm) -> col_encode cm.type' (cm.enc v)) 300 - codec.col_encs) 317 + (List.map (fun (Mem_enc mm) -> encode_field mm.type' (mm.enc v)) m.mem_encs) 301 318 302 319 (* {1 CSV splitting} *) 303 320 ··· 370 387 371 388 let write codec rows writer = 372 389 let w s = Bytesrw.Bytes.Writer.write_string writer s in 373 - (* Write header *) 374 390 let header = encode_header codec in 375 391 w (String.concat "," (Array.to_list header)); 376 392 w "\n"; 377 - (* Write rows *) 378 393 List.iter 379 394 (fun row -> 380 395 let fields = encode_row codec row in
+71 -93
lib/csvt.mli
··· 5 5 6 6 (** Declarative CSV codecs. 7 7 8 - Csvt provides a bidirectional codec system for CSV files, following the 9 - finally-tagged representation from Bünzli's 10 - {{:https://erratique.ch/software/jsont}"An alphabet for your data soups"}. 11 - Column values are decoded to a heterogeneous dictionary keyed by 12 - {!Type.Id.t} witnesses and applied to the constructor via a [dec_fun] GADT — 13 - no [Obj.repr] needed. 8 + Csvt defines a generalized algebraic datatype whose values denote 9 + bidirectional maps between CSV data and the OCaml values of your choice. 10 + Field-level and row-level codecs share a single type {!t}. Row codecs are 11 + built declaratively using constructors and field accessors with the {!Row} 12 + builder. See 13 + {{:https://erratique.ch/repos/jsont/paper/soup.pdf}An alphabet for your data 14 + soups} for the technique. 14 15 15 16 {2 Quick Start} 16 17 17 - Define a codec for your OCaml types: 18 18 {v 19 19 type point = { x : float; y : float; label : string } 20 20 ··· 26 26 |> col "label" string ~enc:(fun p -> p.label) 27 27 |> finish 28 28 )) 29 - v} 30 - 31 - Decode from a file: 32 - {v 33 - let () = 34 - match Csvt.decode_file point_codec "points.csv" with 35 - | Ok points -> List.iter (fun p -> Printf.printf "%s\n" p.label) points 36 - | Error e -> prerr_endline (Csvt.error_to_string e) 37 29 v} *) 38 30 39 31 (** {1:errors Errors} *) ··· 51 43 val pp_error : error Fmt.t 52 44 (** [pp_error] pretty-prints an error. *) 53 45 54 - (** {1:codec Column Codecs} *) 46 + (** {1:codec Codecs} 55 47 56 - type 'a col_codec 57 - (** The type of column-level codecs. A column codec knows how to convert between 58 - a single CSV field (string) and a typed OCaml value. *) 48 + A unified type for both field-level and row-level codecs. Field codecs 49 + ([string], [int], [float], ...) and row codecs (built with {!Row}) are all 50 + values of type ['a t]. *) 51 + 52 + type 'a t 53 + (** The type of CSV codecs mapping CSV data to OCaml values of type ['a]. *) 54 + 55 + (** {1:field Field Codecs} *) 59 56 60 - val string : string col_codec 61 - (** Codec for string fields (identity). *) 57 + val string : string t 58 + (** Identity codec for string fields. *) 62 59 63 - val int : int col_codec 60 + val int : int t 64 61 (** Codec for integer fields. *) 65 62 66 - val float : float col_codec 63 + val float : float t 67 64 (** Codec for floating-point fields. *) 68 65 69 - val bool : bool col_codec 70 - (** [bool] decodes "true"/"1" as [true], "false"/"0" as [false]. *) 66 + val bool : bool t 67 + (** [bool] decodes ["true"]/["1"]/["yes"] as [true], ["false"]/["0"]/["no"] as 68 + [false]. *) 71 69 72 - val nullable_float : float col_codec 73 - (** [nullable_float] is like {!float} but treats "NULL" and empty strings as 74 - [nan]. *) 70 + val nullable_float : float t 71 + (** Like {!float} but treats ["NULL"] and empty strings as [nan]. *) 75 72 76 - val nullable_int : int col_codec 77 - (** [nullable_int] is like {!int} but treats "NULL" and empty strings as [-1]. 78 - *) 73 + val nullable_int : int t 74 + (** Like {!int} but treats ["NULL"] and empty strings as [-1]. *) 79 75 80 - val option : 'a col_codec -> 'a option col_codec 81 - (** [option c] treats "NULL" and empty strings as [None]. *) 76 + val option : 'a t -> 'a option t 77 + (** [option t] treats ["NULL"] and empty strings as [None], otherwise decodes 78 + through [t] and wraps in [Some]. *) 82 79 83 80 val col_map : 84 - ?kind:string -> 85 - dec:(string -> ('a, string) result) -> 86 - enc:('a -> string) -> 87 - unit -> 88 - 'a col_codec 89 - (** [col_map ~dec ~enc ()] creates a column codec from raw decode/encode 90 - functions. *) 81 + dec:(string -> ('a, string) result) -> enc:('a -> string) -> unit -> 'a t 82 + (** [col_map ~dec ~enc ()] creates a custom field codec. *) 91 83 92 - val col_kind : 'a col_codec -> string 93 - (** [col_kind c] returns the kind description of column codec [c]. *) 84 + (** {1:decode Decode and Encode} *) 94 85 95 - val col_decode : 'a col_codec -> string -> ('a, string) result 96 - (** [col_decode c s] decodes a single CSV field using column codec [c]. *) 86 + val decode_field : 'a t -> string -> ('a, string) result 87 + (** [decode_field t s] decodes string [s] using CSV type [t]. *) 97 88 98 - val col_encode : 'a col_codec -> 'a -> string 99 - (** [col_encode c v] encodes a value using column codec [c]. *) 89 + val encode_field : 'a t -> 'a -> string 90 + (** [encode_field t v] encodes value [v] to a string using CSV type [t]. *) 91 + 92 + (** {1:introspect Introspection} *) 93 + 94 + val col_names : 'a t -> string list 95 + (** [col_names t] returns the column names of row codec [t]. *) 96 + 97 + val col_count : 'a t -> int 98 + (** [col_count t] returns the number of columns in row codec [t]. *) 99 + 100 + (** {1:query Query} *) 100 101 101 - (** {1:row Row Codecs} *) 102 + val get_col : string -> 'a t -> 'a t 103 + (** [get_col name t] creates a single-column row codec that extracts column 104 + [name] decoded with [t]. Useful for projecting a single column from a CSV 105 + file without defining a full row type. *) 102 106 103 - type 'a t 104 - (** The type of row-level codecs. A row codec maps between a CSV row (resolved 105 - through header column names) and a typed OCaml record. *) 107 + (** {1:row Row Builder} *) 106 108 107 - (** Row codec builder. *) 108 109 module Row : sig 109 110 type 'a codec = 'a t 110 111 111 112 type ('o, 'dec) map 112 - (** Builder state for a row codec. ['o] is the record type, ['dec] is the 113 - remaining decoder function type. *) 113 + (** Builder state. ['o] is the row type, ['dec] is the remaining constructor 114 + type. *) 114 115 115 - val obj : ?kind:string -> 'dec -> ('o, 'dec) map 116 - (** [obj f] starts building a row codec with decoder function [f]. 116 + val obj : 'dec -> ('o, 'dec) map 117 + (** [obj f] starts building a row codec with constructor [f]. 117 118 118 119 {v 119 - obj (fun a b c -> { a; b; c }) 120 + Row.(obj (fun a b c -> { a; b; c }) 120 121 |> col "a" string ~enc:(fun r -> r.a) 121 122 |> col "b" int ~enc:(fun r -> r.b) 122 123 |> col "c" float ~enc:(fun r -> r.c) 123 - |> finish 124 + |> finish) 124 125 v} *) 125 126 126 127 val col : 127 - ?doc:string -> 128 128 ?dec_absent:'a -> 129 129 string -> 130 - 'a col_codec -> 130 + 'a t -> 131 131 enc:('o -> 'a) -> 132 132 ('o, 'a -> 'dec) map -> 133 133 ('o, 'dec) map 134 - (** [col name codec ~enc m] adds a column to the row builder. 134 + (** [col name type' ~enc m] adds a column to the row builder. 135 135 136 - @param name The CSV column header name. 137 - @param codec The codec for the column value. 138 - @param doc Documentation string. 139 136 @param dec_absent Default value if the column is missing from the header. 140 137 @param enc Extractor function for encoding. *) 141 138 ··· 143 140 (** [finish m] completes the row codec. *) 144 141 end 145 142 146 - (** {1:introspection Introspection} *) 147 - 148 - val col_names : 'a t -> string list 149 - (** [col_names codec] returns the list of column names in the codec. *) 150 - 151 - val col_count : 'a t -> int 152 - (** [col_count codec] returns the number of columns in the codec. *) 153 - 154 - (** {1:query Query Support} *) 155 - 156 - val get_col : string -> 'a col_codec -> 'a t 157 - (** [get_col name c] creates a single-column row codec that extracts just one 158 - column by name. Useful for querying a specific column from a CSV. *) 159 - 160 143 (** {1:header Header Resolution} *) 161 144 162 145 type header = string array ··· 167 150 performed once, giving O(1) per-row decoding. *) 168 151 169 152 val resolve : 'a t -> header -> ('a resolved, error) result 170 - (** [resolve codec header] resolves column names to indices. Returns an error if 171 - a required column is missing. *) 153 + (** [resolve t header] resolves column names to indices. *) 172 154 173 155 val decode_row : 'a resolved -> int -> string array -> ('a, error) result 174 - (** [decode_row resolved row_num fields] decodes a single CSV row. [row_num] is 175 - for error reporting (1-based). *) 156 + (** [decode_row resolved row_num fields] decodes a single CSV row. *) 176 157 177 158 val encode_header : 'a t -> header 178 - (** [encode_header codec] returns the CSV header for encoding. *) 159 + (** [encode_header t] returns the CSV header for encoding. *) 179 160 180 161 val encode_row : 'a t -> 'a -> string array 181 - (** [encode_row codec v] encodes a value as a CSV row. *) 162 + (** [encode_row t v] encodes a value as a CSV row. *) 182 163 183 164 (** {1:file File Operations} *) 184 165 185 166 val decode_channel : 'a t -> in_channel -> ('a list, error) result 186 - (** [decode_channel codec ic] decodes all rows from a CSV input channel. *) 167 + (** [decode_channel t ic] decodes all rows from a CSV input channel. *) 187 168 188 169 val decode_file : 'a t -> string -> ('a list, error) result 189 - (** [decode_file codec path] decodes all rows from a CSV file. *) 170 + (** [decode_file t path] decodes all rows from a CSV file. *) 190 171 191 172 val fold_channel : 192 173 'a t -> in_channel -> ('b -> 'a -> 'b) -> 'b -> ('b, error) result 193 - (** [fold_channel codec ic f acc] folds over rows without building an 194 - intermediate list. Useful for large files. *) 174 + (** [fold_channel t ic f acc] folds over rows without building a list. *) 195 175 196 176 val fold_file : 'a t -> string -> ('b -> 'a -> 'b) -> 'b -> ('b, error) result 197 - (** [fold_file codec path f acc] folds over rows from a file. *) 177 + (** [fold_file t path f acc] folds over rows from a file. *) 198 178 199 179 val decode_string : 'a t -> string -> ('a list, error) result 200 - (** [decode_string codec s] decodes all rows from a CSV string. *) 180 + (** [decode_string t s] decodes all rows from a CSV string. *) 201 181 202 182 (** {1:bytesrw Streaming I/O} *) 203 183 204 184 val read : 'a t -> Bytesrw.Bytes.Reader.t -> ('a list, error) result 205 - (** [read codec r] reads CSV data from reader [r] and decodes all rows using 206 - codec. *) 185 + (** [read t r] reads CSV data from reader [r] and decodes all rows. *) 207 186 208 187 val write : 'a t -> 'a list -> Bytesrw.Bytes.Writer.t -> unit 209 - (** [write codec rows w] encodes [rows] as CSV and writes them to writer [w]. 210 - The header line is written first, followed by each data row. *) 188 + (** [write t rows w] encodes [rows] as CSV and writes them to writer [w]. *)
+13 -13
test/test_csvt.ml
··· 386 386 387 387 let test_col_map () = 388 388 let hex_codec = 389 - Csvt.col_map ~kind:"hex" 389 + Csvt.col_map 390 390 ~dec:(fun s -> 391 391 match int_of_string_opt ("0x" ^ s) with 392 392 | Some i -> Ok i ··· 603 603 (* string *) 604 604 Alcotest.(check (result string string)) 605 605 "string dec" (Ok "hello") 606 - (Csvt.col_decode Csvt.string "hello"); 606 + (Csvt.decode_field Csvt.string "hello"); 607 607 Alcotest.(check string) 608 608 "string enc" "hello" 609 - (Csvt.col_encode Csvt.string "hello"); 609 + (Csvt.encode_field Csvt.string "hello"); 610 610 (* int *) 611 611 Alcotest.(check (result int string)) 612 612 "int dec" (Ok 42) 613 - (Csvt.col_decode Csvt.int "42"); 614 - Alcotest.(check string) "int enc" "42" (Csvt.col_encode Csvt.int 42); 613 + (Csvt.decode_field Csvt.int "42"); 614 + Alcotest.(check string) "int enc" "42" (Csvt.encode_field Csvt.int 42); 615 615 (* float *) 616 - (match Csvt.col_decode Csvt.float "3.14" with 616 + (match Csvt.decode_field Csvt.float "3.14" with 617 617 | Ok f -> Alcotest.(check (float 1e-6)) "float dec" 3.14 f 618 618 | Error e -> Alcotest.failf "float dec: %s" e); 619 - Alcotest.(check string) "float enc" "3.14" (Csvt.col_encode Csvt.float 3.14); 619 + Alcotest.(check string) "float enc" "3.14" (Csvt.encode_field Csvt.float 3.14); 620 620 (* bool *) 621 621 Alcotest.(check (result bool string)) 622 622 "bool dec" (Ok true) 623 - (Csvt.col_decode Csvt.bool "yes"); 624 - Alcotest.(check string) "bool enc" "true" (Csvt.col_encode Csvt.bool true); 623 + (Csvt.decode_field Csvt.bool "yes"); 624 + Alcotest.(check string) "bool enc" "true" (Csvt.encode_field Csvt.bool true); 625 625 (* option *) 626 626 Alcotest.(check (result (option int) string)) 627 627 "option dec none" (Ok None) 628 - (Csvt.col_decode (Csvt.option Csvt.int) "NULL"); 628 + (Csvt.decode_field (Csvt.option Csvt.int) "NULL"); 629 629 Alcotest.(check (result (option int) string)) 630 630 "option dec some" (Ok (Some 7)) 631 - (Csvt.col_decode (Csvt.option Csvt.int) "7"); 631 + (Csvt.decode_field (Csvt.option Csvt.int) "7"); 632 632 Alcotest.(check string) 633 633 "option enc none" "NULL" 634 - (Csvt.col_encode (Csvt.option Csvt.int) None); 634 + (Csvt.encode_field (Csvt.option Csvt.int) None); 635 635 Alcotest.(check string) 636 636 "option enc some" "7" 637 - (Csvt.col_encode (Csvt.option Csvt.int) (Some 7)) 637 + (Csvt.encode_field (Csvt.option Csvt.int) (Some 7)) 638 638 639 639 (* {1 Query tests (get_col)} *) 640 640