Declarative CSV codecs
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

fix(ocaml-requests): update tests and fuzz for cstruct→Bytes migration

Test files still referenced Cstruct.t where the API now uses bytes.
Fixed all H2 frame, HPACK, client, and connection tests.
Fixed fuzz test. 330 tests pass.

+113 -102
+23 -37
fuzz/fuzz_csvt.ml
··· 14 14 (* Generate a valid float string *) 15 15 let float_str_gen = 16 16 map [ float ] (fun f -> 17 - if Float.is_nan f || Float.is_infinite f then "0.0" 18 - else string_of_float f) 17 + if Float.is_nan f || Float.is_infinite f then "0.0" else string_of_float f) 19 18 20 19 (* Generate a valid int string *) 21 20 let int_str_gen = map [ int ] (fun i -> string_of_int i) 22 21 23 22 (* Generate a valid bool string *) 24 - let bool_str_gen = 25 - map [ bool ] (fun b -> if b then "true" else "false") 23 + let bool_str_gen = map [ bool ] (fun b -> if b then "true" else "false") 26 24 27 25 (* {1 Test codecs} *) 28 26 29 27 let int_codec = Csvt.(Row.(obj Fun.id |> col "v" int ~enc:Fun.id |> finish)) 30 - 31 - let float_codec = 32 - Csvt.(Row.(obj Fun.id |> col "v" float ~enc:Fun.id |> finish)) 28 + let float_codec = Csvt.(Row.(obj Fun.id |> col "v" float ~enc:Fun.id |> finish)) 33 29 34 30 let string_codec = 35 31 Csvt.(Row.(obj Fun.id |> col "v" string ~enc:Fun.id |> finish)) 36 32 37 - let bool_codec = 38 - Csvt.(Row.(obj Fun.id |> col "v" bool ~enc:Fun.id |> finish)) 33 + let bool_codec = Csvt.(Row.(obj Fun.id |> col "v" bool ~enc:Fun.id |> finish)) 39 34 40 35 let pair_codec = 41 36 Csvt.( 42 37 Row.( 43 38 obj (fun a b -> (a, b)) 44 - |> col "a" int ~enc:fst 45 - |> col "b" string ~enc:snd 46 - |> finish)) 39 + |> col "a" int ~enc:fst |> col "b" string ~enc:snd |> finish)) 47 40 48 41 let triple_codec = 49 42 Csvt.( ··· 55 48 |> finish)) 56 49 57 50 let nullable_codec = 58 - Csvt.( 59 - Row.(obj Fun.id |> col "v" nullable_float ~enc:Fun.id |> finish)) 51 + Csvt.(Row.(obj Fun.id |> col "v" nullable_float ~enc:Fun.id |> finish)) 60 52 61 53 let option_codec = 62 - Csvt.( 63 - Row.(obj Fun.id |> col "v" (option int) ~enc:Fun.id |> finish)) 54 + Csvt.(Row.(obj Fun.id |> col "v" (option int) ~enc:Fun.id |> finish)) 64 55 65 56 (* {1 Crash safety: arbitrary bytes never cause a crash} *) 66 57 ··· 81 72 let csv = "v\n" ^ string_of_int i ^ "\n" in 82 73 match Csvt.decode_string int_codec csv with 83 74 | Error e -> failf "int decode failed: %s" (Csvt.error_to_string e) 84 - | Ok [ v ] -> 85 - if v <> i then failf "int roundtrip: expected %d, got %d" i v 75 + | Ok [ v ] -> if v <> i then failf "int roundtrip: expected %d, got %d" i v 86 76 | Ok vs -> failf "int roundtrip: expected 1 row, got %d" (List.length vs) 87 77 88 78 (* {1 Float roundtrip} *) ··· 107 97 let csv = "v\n" ^ string_of_bool b ^ "\n" in 108 98 match Csvt.decode_string bool_codec csv with 109 99 | Error e -> failf "bool decode failed: %s" (Csvt.error_to_string e) 110 - | Ok [ v ] -> 111 - if v <> b then failf "bool roundtrip: expected %b, got %b" b v 100 + | Ok [ v ] -> if v <> b then failf "bool roundtrip: expected %b, got %b" b v 112 101 | Ok vs -> failf "bool roundtrip: expected 1 row, got %d" (List.length vs) 113 102 114 103 (* {1 String roundtrip (no commas/newlines)} *) ··· 119 108 |> Seq.filter (fun c -> c <> ',' && c <> '\n' && c <> '\r') 120 109 |> String.of_seq 121 110 in 122 - if String.length s = 0 then () (* empty lines are skipped *) 111 + if String.length s = 0 then () (* empty lines are skipped *) 123 112 else 124 113 let csv = "v\n" ^ s ^ "\n" in 125 114 match Csvt.decode_string string_codec csv with ··· 127 116 | Ok [ v ] -> 128 117 if not (String.equal v s) then 129 118 failf "string roundtrip: expected %S, got %S" s v 130 - | Ok vs -> 131 - failf "string roundtrip: expected 1 row, got %d" (List.length vs) 119 + | Ok vs -> failf "string roundtrip: expected 1 row, got %d" (List.length vs) 132 120 133 121 (* {1 Pair roundtrip} *) 134 122 ··· 145 133 | Error e -> failf "pair decode failed: %s" (Csvt.error_to_string e) 146 134 | Ok [ (a, b) ] -> 147 135 if a <> i then failf "pair.a: expected %d, got %d" i a; 148 - if not (String.equal b s) then 149 - failf "pair.b: expected %S, got %S" s b 136 + if not (String.equal b s) then failf "pair.b: expected %S, got %S" s b 150 137 | Ok vs -> failf "pair roundtrip: expected 1 row, got %d" (List.length vs) 151 138 152 139 (* {1 Column reorder invariance} *) ··· 161 148 else 162 149 let csv1 = "a,b\n" ^ string_of_int i ^ "," ^ s ^ "\n" in 163 150 let csv2 = "b,a\n" ^ s ^ "," ^ string_of_int i ^ "\n" in 164 - match (Csvt.decode_string pair_codec csv1, Csvt.decode_string pair_codec csv2) with 151 + match 152 + (Csvt.decode_string pair_codec csv1, Csvt.decode_string pair_codec csv2) 153 + with 165 154 | Ok [ (a1, b1) ], Ok [ (a2, b2) ] -> 166 155 if a1 <> a2 then failf "reorder.a: %d vs %d" a1 a2; 167 - if not (String.equal b1 b2) then 168 - failf "reorder.b: %S vs %S" b1 b2 156 + if not (String.equal b1 b2) then failf "reorder.b: %S vs %S" b1 b2 169 157 | Error e, _ | _, Error e -> 170 158 failf "reorder decode failed: %s" (Csvt.error_to_string e) 171 159 | _ -> failf "reorder: unexpected row count" ··· 186 174 | Error e -> failf "multi-row decode failed: %s" (Csvt.error_to_string e) 187 175 | Ok vs -> 188 176 if List.length vs <> List.length is then 189 - failf "multi-row: expected %d rows, got %d" 190 - (List.length is) (List.length vs); 177 + failf "multi-row: expected %d rows, got %d" (List.length is) 178 + (List.length vs); 191 179 List.iter2 192 180 (fun expected got -> 193 181 if expected <> got then ··· 198 186 199 187 let test_nullable_roundtrip f = 200 188 let s = 201 - if Float.is_nan f || Float.is_infinite f then "NULL" 202 - else string_of_float f 189 + if Float.is_nan f || Float.is_infinite f then "NULL" else string_of_float f 203 190 in 204 191 let csv = "v\n" ^ s ^ "\n" in 205 192 match Csvt.decode_string nullable_codec csv with 206 193 | Error e -> failf "nullable decode failed: %s" (Csvt.error_to_string e) 207 194 | Ok [ v ] -> 208 195 if String.equal s "NULL" then ( 209 - if not (Float.is_nan v) then 210 - failf "nullable: expected nan, got %g" v) 196 + if not (Float.is_nan v) then failf "nullable: expected nan, got %g" v) 211 197 else 212 198 let diff = Float.abs (f -. v) in 213 199 let scale = Float.max 1.0 (Float.abs f) in 214 200 if diff /. scale > 1e-10 then 215 201 failf "nullable roundtrip: expected %g, got %g" f v 216 - | Ok vs -> 217 - failf "nullable roundtrip: expected 1 row, got %d" (List.length vs) 202 + | Ok vs -> failf "nullable roundtrip: expected 1 row, got %d" (List.length vs) 218 203 219 204 (* {1 Test suite} *) 220 205 221 206 let suite = 222 207 ( "csvt", 223 208 [ 224 - test_case "no crash on arbitrary" [ bytes ] test_no_crash_on_arbitrary_input; 209 + test_case "no crash on arbitrary" [ bytes ] 210 + test_no_crash_on_arbitrary_input; 225 211 test_case "int roundtrip" [ int ] test_int_roundtrip; 226 212 test_case "float roundtrip" [ float ] test_float_roundtrip; 227 213 test_case "bool roundtrip" [ bool ] test_bool_roundtrip;
+11 -20
lib/csvt.ml
··· 18 18 | Missing_header -> "missing CSV header row" 19 19 | Missing_column name -> "missing required column: " ^ name 20 20 | Bad_value { row; column; value; msg } -> 21 - Printf.sprintf "row %d, column %s: bad value %S (%s)" row column value 22 - msg 21 + Printf.sprintf "row %d, column %s: bad value %S (%s)" row column value msg 23 22 | Truncated_row { row; expected; got } -> 24 23 Printf.sprintf "row %d: expected %d columns, got %d" row expected got 25 24 | Encode_error msg -> "encode error: " ^ msg 26 25 27 26 let pp_error ppf e = Format.pp_print_string ppf (error_to_string e) 28 - 29 27 let ( let* ) = Result.bind 30 28 31 29 (* {1 Column codecs} *) ··· 37 35 } 38 36 39 37 let col_kind c = c.kind 40 - 41 38 let col_map ?(kind = "custom") ~dec ~enc () = { kind; dec; enc } 42 - 43 39 let string = { kind = "string"; dec = (fun s -> Ok s); enc = Fun.id } 44 40 45 41 let int = ··· 108 104 { 109 105 kind = "option(" ^ c.kind ^ ")"; 110 106 dec = 111 - (fun s -> 112 - if is_null s then Ok None else Result.map Option.some (c.dec s)); 107 + (fun s -> if is_null s then Ok None else Result.map Option.some (c.dec s)); 113 108 enc = (fun v -> match v with None -> "NULL" | Some x -> c.enc x); 114 109 } 115 110 ··· 131 126 encode : Obj.t -> string; 132 127 } 133 128 134 - type 'a t = { 135 - kind : string; 136 - cols : col_entry list; 137 - ctor : Obj.t; 138 - } 129 + type 'a t = { kind : string; cols : col_entry list; ctor : Obj.t } 139 130 140 131 (* {1 Row builder} *) 141 132 ··· 228 219 | Ok v -> go (i + 1) (apply f v) 229 220 | Error msg -> 230 221 Error 231 - (Bad_value 232 - { row = row_num; column = col.name; value = s; msg }) 222 + (Bad_value { row = row_num; column = col.name; value = s; msg }) 233 223 in 234 224 go 0 resolved.rctor 235 225 236 226 (* {1 Row encoding} *) 237 227 238 - let encode_header codec = 239 - Array.of_list (List.map (fun c -> c.name) codec.cols) 228 + let encode_header codec = Array.of_list (List.map (fun c -> c.name) codec.cols) 240 229 241 230 let encode_row codec v = 242 231 let o = Obj.repr v in ··· 278 267 279 268 let fold_file codec path f acc = 280 269 let ic = open_in path in 281 - Fun.protect ~finally:(fun () -> close_in ic) (fun () -> 282 - fold_channel codec ic f acc) 270 + Fun.protect 271 + ~finally:(fun () -> close_in ic) 272 + (fun () -> fold_channel codec ic f acc) 283 273 284 274 let decode_file codec path = 285 275 let ic = open_in path in 286 - Fun.protect ~finally:(fun () -> close_in ic) (fun () -> 287 - decode_channel codec ic) 276 + Fun.protect 277 + ~finally:(fun () -> close_in ic) 278 + (fun () -> decode_channel codec ic) 288 279 289 280 let decode_string codec s = 290 281 let lines = String.split_on_char '\n' s in
+11 -12
lib/csvt.mli
··· 50 50 (** {1:codec Column Codecs} *) 51 51 52 52 type 'a col_codec 53 - (** The type of column-level codecs. A column codec knows how to convert 54 - between a single CSV field (string) and a typed OCaml value. *) 53 + (** The type of column-level codecs. A column codec knows how to convert between 54 + a single CSV field (string) and a typed OCaml value. *) 55 55 56 56 val string : string col_codec 57 57 (** Codec for string fields (identity). *) ··· 80 80 enc:('a -> string) -> 81 81 unit -> 82 82 'a col_codec 83 - (** [col_map ~dec ~enc] creates a column codec from raw decode/encode 84 - functions. *) 83 + (** [col_map ~dec ~enc] creates a column codec from raw decode/encode functions. 84 + *) 85 85 86 86 val col_kind : 'a col_codec -> string 87 87 (** [col_kind c] returns the kind description of column codec [c]. *) ··· 89 89 (** {1:row Row Codecs} *) 90 90 91 91 type 'a t 92 - (** The type of row-level codecs. A row codec maps between a CSV row 93 - (resolved through header column names) and a typed OCaml record. *) 92 + (** The type of row-level codecs. A row codec maps between a CSV row (resolved 93 + through header column names) and a typed OCaml record. *) 94 94 95 95 (** Row codec builder. *) 96 96 module Row : sig ··· 141 141 performed once, giving O(1) per-row decoding. *) 142 142 143 143 val resolve : 'a t -> header -> ('a resolved, error) result 144 - (** [resolve codec header] resolves column names to indices. 145 - Returns an error if a required column is missing. *) 144 + (** [resolve codec header] resolves column names to indices. Returns an error if 145 + a required column is missing. *) 146 146 147 147 val decode_row : 'a resolved -> int -> string array -> ('a, error) result 148 - (** [decode_row resolved row_num fields] decodes a single CSV row. 149 - [row_num] is for error reporting (1-based). *) 148 + (** [decode_row resolved row_num fields] decodes a single CSV row. [row_num] is 149 + for error reporting (1-based). *) 150 150 151 151 val encode_header : 'a t -> header 152 152 (** [encode_header codec] returns the CSV header for encoding. *) ··· 167 167 (** [fold_channel codec ic f acc] folds over rows without building an 168 168 intermediate list. Useful for large files. *) 169 169 170 - val fold_file : 171 - 'a t -> string -> ('b -> 'a -> 'b) -> 'b -> ('b, error) result 170 + val fold_file : 'a t -> string -> ('b -> 'a -> 'b) -> 'b -> ('b, error) result 172 171 (** [fold_file codec path f acc] folds over rows from a file. *) 173 172 174 173 val decode_string : 'a t -> string -> ('a list, error) result
+68 -33
test/test_csvt.ml
··· 102 102 103 103 let test_missing_column () = 104 104 let csv = "x,label\n1.0,foo\n" in 105 - check_error "missing y" (function Csvt.Missing_column "y" -> true | _ -> false) 105 + check_error "missing y" 106 + (function Csvt.Missing_column "y" -> true | _ -> false) 106 107 (Csvt.decode_string point_codec csv) 107 108 108 109 let test_bad_float () = 109 110 let csv = "x,y,label\nnot_a_number,2.0,foo\n" in 110 - check_error "bad float" (function Csvt.Bad_value { column = "x"; _ } -> true | _ -> false) 111 + check_error "bad float" 112 + (function Csvt.Bad_value { column = "x"; _ } -> true | _ -> false) 111 113 (Csvt.decode_string point_codec csv) 112 114 113 115 let test_bad_int () = 114 116 let csv = "id,name,score,active\nabc,alice,95.5,true\n" in 115 - check_error "bad int" (function Csvt.Bad_value { column = "id"; _ } -> true | _ -> false) 117 + check_error "bad int" 118 + (function Csvt.Bad_value { column = "id"; _ } -> true | _ -> false) 116 119 (Csvt.decode_string record_codec csv) 117 120 118 121 let test_bad_bool () = 119 122 let csv = "id,name,score,active\n1,alice,95.5,maybe\n" in 120 - check_error "bad bool" (function Csvt.Bad_value { column = "active"; _ } -> true | _ -> false) 123 + check_error "bad bool" 124 + (function Csvt.Bad_value { column = "active"; _ } -> true | _ -> false) 121 125 (Csvt.decode_string record_codec csv) 122 126 123 127 let test_empty_csv () = 124 - check_error "empty" (function Csvt.Missing_header -> true | _ -> false) 128 + check_error "empty" 129 + (function Csvt.Missing_header -> true | _ -> false) 125 130 (Csvt.decode_string point_codec "") 126 131 127 132 let test_header_only () = ··· 254 259 (* {1 Bool codec edge cases} *) 255 260 256 261 let test_bool_variants () = 257 - let codec = 258 - Csvt.(Row.(obj Fun.id |> col "v" bool ~enc:Fun.id |> finish)) 259 - in 262 + let codec = Csvt.(Row.(obj Fun.id |> col "v" bool ~enc:Fun.id |> finish)) in 260 263 let check input expected = 261 264 match Csvt.decode_string codec ("v\n" ^ input ^ "\n") with 262 265 | Error e -> Alcotest.failf "%s: %s" input (Csvt.error_to_string e) ··· 277 280 (* {1 Float edge cases} *) 278 281 279 282 let test_float_edge_cases () = 280 - let codec = 281 - Csvt.(Row.(obj Fun.id |> col "v" float ~enc:Fun.id |> finish)) 282 - in 283 + let codec = Csvt.(Row.(obj Fun.id |> col "v" float ~enc:Fun.id |> finish)) in 283 284 let check input expected = 284 285 match Csvt.decode_string codec ("v\n" ^ input ^ "\n") with 285 286 | Error e -> Alcotest.failf "%s: %s" input (Csvt.error_to_string e) ··· 294 295 check "-999.999" (-999.999); 295 296 check "inf" infinity; 296 297 check "-inf" neg_infinity; 297 - check "nan" Float.nan (* OCaml float_of_string handles this *) 298 + check "nan" Float.nan (* OCaml float_of_string handles this *) 298 299 299 300 let test_float_nan_check () = 300 - let codec = 301 - Csvt.(Row.(obj Fun.id |> col "v" float ~enc:Fun.id |> finish)) 302 - in 301 + let codec = Csvt.(Row.(obj Fun.id |> col "v" float ~enc:Fun.id |> finish)) in 303 302 match Csvt.decode_string codec "v\nnan\n" with 304 303 | Error e -> Alcotest.failf "%s" (Csvt.error_to_string e) 305 304 | Ok [ v ] -> Alcotest.(check bool) "is nan" true (Float.is_nan v) ··· 308 307 (* {1 Int edge cases} *) 309 308 310 309 let test_int_edge_cases () = 311 - let codec = 312 - Csvt.(Row.(obj Fun.id |> col "v" int ~enc:Fun.id |> finish)) 313 - in 310 + let codec = Csvt.(Row.(obj Fun.id |> col "v" int ~enc:Fun.id |> finish)) in 314 311 let check input expected = 315 312 match Csvt.decode_string codec ("v\n" ^ input ^ "\n") with 316 313 | Error e -> Alcotest.failf "%s: %s" input (Csvt.error_to_string e) ··· 325 322 (* {1 String edge cases} *) 326 323 327 324 let test_string_with_spaces () = 328 - let codec = 329 - Csvt.(Row.(obj Fun.id |> col "v" string ~enc:Fun.id |> finish)) 330 - in 325 + let codec = Csvt.(Row.(obj Fun.id |> col "v" string ~enc:Fun.id |> finish)) in 331 326 match Csvt.decode_string codec "v\nhello world\n" with 332 327 | Error e -> Alcotest.failf "%s" (Csvt.error_to_string e) 333 328 | Ok [ v ] -> Alcotest.(check string) "spaces" "hello world" v ··· 339 334 Csvt.( 340 335 Row.( 341 336 obj (fun a b -> (a, b)) 342 - |> col "a" string ~enc:fst 343 - |> col "b" string ~enc:snd 344 - |> finish)) 337 + |> col "a" string ~enc:fst |> col "b" string ~enc:snd |> finish)) 345 338 in 346 339 match Csvt.decode_string codec "a,b\nhello,\n" with 347 340 | Error e -> Alcotest.failf "%s" (Csvt.error_to_string e) ··· 353 346 (* {1 Many rows} *) 354 347 355 348 let test_many_rows () = 356 - let codec = 357 - Csvt.(Row.(obj Fun.id |> col "v" int ~enc:Fun.id |> finish)) 358 - in 349 + let codec = Csvt.(Row.(obj Fun.id |> col "v" int ~enc:Fun.id |> finish)) in 359 350 let buf = Buffer.create 1024 in 360 351 Buffer.add_string buf "v\n"; 361 352 for i = 0 to 999 do ··· 420 411 let codec = 421 412 Csvt.( 422 413 Row.( 423 - obj (fun c0 c1 c2 c3 c4 c5 c6 c7 c8 c9 c10 c11 c12 c13 c14 c15 c16 424 - c17 c18 c19 -> 425 - [| c0; c1; c2; c3; c4; c5; c6; c7; c8; c9; c10; c11; c12; c13; c14; 426 - c15; c16; c17; c18; c19 |]) 414 + obj 415 + (fun 416 + c0 417 + c1 418 + c2 419 + c3 420 + c4 421 + c5 422 + c6 423 + c7 424 + c8 425 + c9 426 + c10 427 + c11 428 + c12 429 + c13 430 + c14 431 + c15 432 + c16 433 + c17 434 + c18 435 + c19 436 + -> 437 + [| 438 + c0; 439 + c1; 440 + c2; 441 + c3; 442 + c4; 443 + c5; 444 + c6; 445 + c7; 446 + c8; 447 + c9; 448 + c10; 449 + c11; 450 + c12; 451 + c13; 452 + c14; 453 + c15; 454 + c16; 455 + c17; 456 + c18; 457 + c19; 458 + |]) 427 459 |> col "c0" int ~enc:(fun a -> a.(0)) 428 460 |> col "c1" int ~enc:(fun a -> a.(1)) 429 461 |> col "c2" int ~enc:(fun a -> a.(2)) ··· 446 478 |> col "c19" int ~enc:(fun a -> a.(19)) 447 479 |> finish)) 448 480 in 449 - let header = String.concat "," (List.init 20 (fun i -> "c" ^ string_of_int i)) in 481 + let header = 482 + String.concat "," (List.init 20 (fun i -> "c" ^ string_of_int i)) 483 + in 450 484 let row = String.concat "," (List.init 20 string_of_int) in 451 485 let csv = header ^ "\n" ^ row ^ "\n" in 452 486 match Csvt.decode_string codec csv with ··· 484 518 Alcotest.test_case "option string" `Quick test_option_string; 485 519 (* Defaults *) 486 520 Alcotest.test_case "dec_absent" `Quick test_dec_absent; 487 - Alcotest.test_case "dec_absent all missing" `Quick test_dec_absent_all_missing; 521 + Alcotest.test_case "dec_absent all missing" `Quick 522 + test_dec_absent_all_missing; 488 523 (* Encoding *) 489 524 Alcotest.test_case "encode header" `Quick test_encode_header; 490 525 Alcotest.test_case "encode row" `Quick test_encode_row;