Declarative JSON data manipulation for OCaml
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

at main 227 lines 7.5 kB view raw
1(* Tests for [Json.Codec.ignore]'s skip-parse fast path. 2 3 Two angles: 4 5 1. Hand-written positive/negative cases covering the shape grammar 6 (string escapes, number formats, nested structures, truncation). 7 2. Differential property: on any string, [Json.Codec.ignore] and 8 [Json.t] agree on Ok/Error status. [Json.Codec.ignore] is allowed 9 to be more permissive (accept where [Json.t] errors) only at 10 content level -- never at structural level. Alcobar generates 11 random inputs and asserts the invariant. *) 12 13let decode_ignore s = Json.of_string Json.Codec.ignore s 14let decode_dom s = Json.Value.of_string s 15let is_ok = function Ok _ -> true | Error _ -> false 16 17(* -- Positive cases: Json.Codec.ignore must accept all valid JSON -- *) 18 19let test_ignore_accepts_valid name s () = 20 match decode_ignore s with 21 | Ok () -> () 22 | Error e -> 23 Alcotest.failf "Json.Codec.ignore rejected valid input %s: %a" name 24 Json.Error.pp e 25 26let positive_cases = 27 [ 28 ("null", "null"); 29 ("true", "true"); 30 ("false", "false"); 31 ("int", "42"); 32 ("neg int", "-17"); 33 ("float", "3.14"); 34 ("exp", "1.5e10"); 35 ("neg exp", "-2.5E-3"); 36 ("zero", "0"); 37 ("empty string", "\"\""); 38 ("simple string", "\"hello\""); 39 ("escaped quote", {|"a\"b"|}); 40 ("escaped backslash", {|"a\\b"|}); 41 ("escaped slash", {|"a\/b"|}); 42 ("escaped newline", {|"a\nb"|}); 43 ("escaped tab", {|"a\tb"|}); 44 ("escaped unicode", {|"a\u0041b"|}); 45 ("utf8 content", "\"caf\xc3\xa9\""); 46 ("empty array", "[]"); 47 ("int array", "[1, 2, 3]"); 48 ("mixed array", "[1, \"a\", null, true]"); 49 ("nested array", "[[1,2],[3,4]]"); 50 ("empty object", "{}"); 51 ("simple object", {|{"a":1}|}); 52 ("multi-member", {|{"a":1,"b":2,"c":3}|}); 53 ("nested object", {|{"a":{"b":{"c":1}}}|}); 54 ("object in array", {|[{"a":1},{"b":2}]|}); 55 ("array in object", {|{"items":[1,2,3]}|}); 56 ("whitespace", " \n\t [ 1 , 2 ] "); 57 ("unicode escape pair", {|"\uD83D\uDE00"|}); 58 ] 59 60(* -- Negative cases: Json.Codec.ignore must reject structurally broken input. 61 62 "Structurally broken" means: mismatched brackets, unclosed strings, 63 unclosed containers, or complete absence of a value. The skip path 64 is explicitly more permissive on content (numbers, escapes) and may 65 accept things Json.t rejects. *) 66 67let test_ignore_rejects_malformed name s () = 68 match decode_ignore s with 69 | Ok () -> Alcotest.failf "Json.Codec.ignore accepted malformed input %s" name 70 | Error _ -> () 71 72let structural_negatives = 73 [ 74 ("empty", ""); 75 ("just whitespace", " \n\t "); 76 ("unclosed array", "[1, 2"); 77 ("unclosed object", {|{"a":1|}); 78 ("unclosed string", {|"hello|}); 79 ("mismatched close", "[1, 2}"); 80 ("trailing comma in array", "[1, 2,]"); 81 ("missing colon in object", {|{"a" 1}|}); 82 ("missing value after colon", {|{"a":}|}); 83 ("just comma", ","); 84 ("just close brace", "}"); 85 ("just close bracket", "]"); 86 ] 87 88(* -- Differential: Json.Codec.ignore and Json.t on the same input -- 89 90 Expected: on all corpus files (known valid), both decode to Ok. 91 On a set of synthetic malformed inputs, both return Error -- or at 92 worst, Json.Codec.ignore accepts something Json.t rejects (content 93 permissiveness, documented). The strict structural contract says 94 Json.Codec.ignore MUST reject what Json.t rejects at the structural 95 level. *) 96 97let test_diff_valid_both_accept name s () = 98 let ri = decode_ignore s and rj = decode_dom s in 99 match (ri, rj) with 100 | Ok _, Ok _ -> () 101 | Error e, Ok _ -> 102 Alcotest.failf "Json.Codec.ignore rejected but Json.t accepted %s: %a" 103 name Json.Error.pp e 104 | Ok _, Error e -> 105 Alcotest.failf 106 "Json.Codec.ignore accepted but Json.t rejected %s (content \ 107 permissiveness): %a" 108 name Json.Error.pp e 109 | Error _, Error _ -> () 110 111let differential_cases = 112 (* A grab-bag of realistic JSON values from the simdjson corpus 113 shapes but small enough to inline. *) 114 [ 115 ("null deep", "[[[[[null]]]]]"); 116 ("tight nest", {|{"a":{"b":{"c":{"d":[1,2,3]}}}}|}); 117 ("many numbers", "[1,2,3,4,5,6,7,8,9,10]"); 118 ("escapes in key", {|{"a\"b":1}|}); 119 ("escape in string val", {|{"k":"a\"b"}|}); 120 ("unicode in key", {|{"caf\u00e9":1}|}); 121 ] 122 123(* -- Content-permissiveness: Json.Codec.ignore matches simdjson On-Demand 124 semantics. Structural contract (bracket nesting, string quote 125 matching) is enforced; content validity (number shape, escape 126 correctness) is NOT. Callers needing strict content validation 127 should decode with Json.t and discard. These cases document 128 the boundary. -- *) 129 130let permissive_cases = 131 [ 132 ("double dot", "1..2"); 133 ("plus leader", "+5"); 134 ("double exp", "1eE2"); 135 ("bad escape char", {|"a\zb"|}); 136 ("short unicode escape", {|"a\u41b"|}); 137 ] 138 139let test_permissive_ignore name s () = 140 let ri = decode_ignore s and rj = decode_dom s in 141 Alcotest.(check bool) (Fmt.str "json rejects %s" name) false (is_ok rj); 142 (* Json.Codec.ignore accepts this -- document the behaviour. *) 143 match ri with 144 | Ok _ -> () (* Expected permissive acceptance. *) 145 | Error _ -> 146 (* If ignore also rejects, we have tighter validation than 147 expected. Note and continue. *) 148 Fmt.epr "(info: Json.Codec.ignore also rejected %s)\n" name 149 150(* -- Corpus torture test -- 151 152 If the simdjson corpus is present at [/tmp/jsont_corpus/*.json], 153 run Json.Codec.ignore over each file and assert acceptance. Also check 154 that Json.Codec.ignore and Json.t agree (both Ok) on every file. 155 Skipped silently if the corpus isn't available. *) 156 157let read_file path = 158 try 159 let ic = open_in_bin path in 160 let n = in_channel_length ic in 161 let b = Bytes.create n in 162 really_input ic b 0 n; 163 close_in ic; 164 Some (Bytes.unsafe_to_string b) 165 with Sys_error _ | End_of_file -> None 166 167let corpus_dir = "/tmp/jsont_corpus" 168 169let corpus_files () = 170 try 171 Sys.readdir corpus_dir |> Array.to_list 172 |> List.filter (fun n -> Filename.check_suffix n ".json") 173 |> List.sort compare 174 with Sys_error _ -> [] 175 176let test_corpus_file name () = 177 match read_file (Filename.concat corpus_dir name) with 178 | None -> () 179 | Some s -> ( 180 (match decode_ignore s with 181 | Ok () -> () 182 | Error e -> 183 Alcotest.failf "ignore rejected corpus %s: %s" name 184 (Json.Error.to_string e)); 185 match decode_dom s with 186 | Ok _ -> () 187 | Error e -> 188 Alcotest.failf "json rejected corpus %s: %s" name 189 (Json.Error.to_string e)) 190 191(* -- Entry point -- *) 192 193let suite = 194 let positive = 195 List.map 196 (fun (n, s) -> 197 Alcotest.test_case ("accept " ^ n) `Quick 198 (test_ignore_accepts_valid n s)) 199 positive_cases 200 in 201 let neg = 202 List.map 203 (fun (n, s) -> 204 Alcotest.test_case ("reject " ^ n) `Quick 205 (test_ignore_rejects_malformed n s)) 206 structural_negatives 207 in 208 let diff = 209 List.map 210 (fun (n, s) -> 211 Alcotest.test_case ("diff " ^ n) `Quick 212 (test_diff_valid_both_accept n s)) 213 differential_cases 214 in 215 let perm = 216 List.map 217 (fun (n, s) -> 218 Alcotest.test_case ("permissive " ^ n) `Quick 219 (test_permissive_ignore n s)) 220 permissive_cases 221 in 222 let corpus = 223 List.map 224 (fun n -> Alcotest.test_case ("corpus " ^ n) `Quick (test_corpus_file n)) 225 (corpus_files ()) 226 in 227 ("skip-parse", positive @ neg @ diff @ perm @ corpus)