Declarative JSON data manipulation for OCaml
1(* Tests for [Json.Codec.ignore]'s skip-parse fast path.
2
3 Two angles:
4
5 1. Hand-written positive/negative cases covering the shape grammar
6 (string escapes, number formats, nested structures, truncation).
7 2. Differential property: on any string, [Json.Codec.ignore] and
8 [Json.t] agree on Ok/Error status. [Json.Codec.ignore] is allowed
9 to be more permissive (accept where [Json.t] errors) only at
10 content level -- never at structural level. Alcobar generates
11 random inputs and asserts the invariant. *)
12
13let decode_ignore s = Json.of_string Json.Codec.ignore s
14let decode_dom s = Json.Value.of_string s
15let is_ok = function Ok _ -> true | Error _ -> false
16
17(* -- Positive cases: Json.Codec.ignore must accept all valid JSON -- *)
18
19let test_ignore_accepts_valid name s () =
20 match decode_ignore s with
21 | Ok () -> ()
22 | Error e ->
23 Alcotest.failf "Json.Codec.ignore rejected valid input %s: %a" name
24 Json.Error.pp e
25
26let positive_cases =
27 [
28 ("null", "null");
29 ("true", "true");
30 ("false", "false");
31 ("int", "42");
32 ("neg int", "-17");
33 ("float", "3.14");
34 ("exp", "1.5e10");
35 ("neg exp", "-2.5E-3");
36 ("zero", "0");
37 ("empty string", "\"\"");
38 ("simple string", "\"hello\"");
39 ("escaped quote", {|"a\"b"|});
40 ("escaped backslash", {|"a\\b"|});
41 ("escaped slash", {|"a\/b"|});
42 ("escaped newline", {|"a\nb"|});
43 ("escaped tab", {|"a\tb"|});
44 ("escaped unicode", {|"a\u0041b"|});
45 ("utf8 content", "\"caf\xc3\xa9\"");
46 ("empty array", "[]");
47 ("int array", "[1, 2, 3]");
48 ("mixed array", "[1, \"a\", null, true]");
49 ("nested array", "[[1,2],[3,4]]");
50 ("empty object", "{}");
51 ("simple object", {|{"a":1}|});
52 ("multi-member", {|{"a":1,"b":2,"c":3}|});
53 ("nested object", {|{"a":{"b":{"c":1}}}|});
54 ("object in array", {|[{"a":1},{"b":2}]|});
55 ("array in object", {|{"items":[1,2,3]}|});
56 ("whitespace", " \n\t [ 1 , 2 ] ");
57 ("unicode escape pair", {|"\uD83D\uDE00"|});
58 ]
59
60(* -- Negative cases: Json.Codec.ignore must reject structurally broken input.
61
62 "Structurally broken" means: mismatched brackets, unclosed strings,
63 unclosed containers, or complete absence of a value. The skip path
64 is explicitly more permissive on content (numbers, escapes) and may
65 accept things Json.t rejects. *)
66
67let test_ignore_rejects_malformed name s () =
68 match decode_ignore s with
69 | Ok () -> Alcotest.failf "Json.Codec.ignore accepted malformed input %s" name
70 | Error _ -> ()
71
72let structural_negatives =
73 [
74 ("empty", "");
75 ("just whitespace", " \n\t ");
76 ("unclosed array", "[1, 2");
77 ("unclosed object", {|{"a":1|});
78 ("unclosed string", {|"hello|});
79 ("mismatched close", "[1, 2}");
80 ("trailing comma in array", "[1, 2,]");
81 ("missing colon in object", {|{"a" 1}|});
82 ("missing value after colon", {|{"a":}|});
83 ("just comma", ",");
84 ("just close brace", "}");
85 ("just close bracket", "]");
86 ]
87
88(* -- Differential: Json.Codec.ignore and Json.t on the same input --
89
90 Expected: on all corpus files (known valid), both decode to Ok.
91 On a set of synthetic malformed inputs, both return Error -- or at
92 worst, Json.Codec.ignore accepts something Json.t rejects (content
93 permissiveness, documented). The strict structural contract says
94 Json.Codec.ignore MUST reject what Json.t rejects at the structural
95 level. *)
96
97let test_diff_valid_both_accept name s () =
98 let ri = decode_ignore s and rj = decode_dom s in
99 match (ri, rj) with
100 | Ok _, Ok _ -> ()
101 | Error e, Ok _ ->
102 Alcotest.failf "Json.Codec.ignore rejected but Json.t accepted %s: %a"
103 name Json.Error.pp e
104 | Ok _, Error e ->
105 Alcotest.failf
106 "Json.Codec.ignore accepted but Json.t rejected %s (content \
107 permissiveness): %a"
108 name Json.Error.pp e
109 | Error _, Error _ -> ()
110
111let differential_cases =
112 (* A grab-bag of realistic JSON values from the simdjson corpus
113 shapes but small enough to inline. *)
114 [
115 ("null deep", "[[[[[null]]]]]");
116 ("tight nest", {|{"a":{"b":{"c":{"d":[1,2,3]}}}}|});
117 ("many numbers", "[1,2,3,4,5,6,7,8,9,10]");
118 ("escapes in key", {|{"a\"b":1}|});
119 ("escape in string val", {|{"k":"a\"b"}|});
120 ("unicode in key", {|{"caf\u00e9":1}|});
121 ]
122
123(* -- Content-permissiveness: Json.Codec.ignore matches simdjson On-Demand
124 semantics. Structural contract (bracket nesting, string quote
125 matching) is enforced; content validity (number shape, escape
126 correctness) is NOT. Callers needing strict content validation
127 should decode with Json.t and discard. These cases document
128 the boundary. -- *)
129
130let permissive_cases =
131 [
132 ("double dot", "1..2");
133 ("plus leader", "+5");
134 ("double exp", "1eE2");
135 ("bad escape char", {|"a\zb"|});
136 ("short unicode escape", {|"a\u41b"|});
137 ]
138
139let test_permissive_ignore name s () =
140 let ri = decode_ignore s and rj = decode_dom s in
141 Alcotest.(check bool) (Fmt.str "json rejects %s" name) false (is_ok rj);
142 (* Json.Codec.ignore accepts this -- document the behaviour. *)
143 match ri with
144 | Ok _ -> () (* Expected permissive acceptance. *)
145 | Error _ ->
146 (* If ignore also rejects, we have tighter validation than
147 expected. Note and continue. *)
148 Fmt.epr "(info: Json.Codec.ignore also rejected %s)\n" name
149
150(* -- Corpus torture test --
151
152 If the simdjson corpus is present at [/tmp/jsont_corpus/*.json],
153 run Json.Codec.ignore over each file and assert acceptance. Also check
154 that Json.Codec.ignore and Json.t agree (both Ok) on every file.
155 Skipped silently if the corpus isn't available. *)
156
157let read_file path =
158 try
159 let ic = open_in_bin path in
160 let n = in_channel_length ic in
161 let b = Bytes.create n in
162 really_input ic b 0 n;
163 close_in ic;
164 Some (Bytes.unsafe_to_string b)
165 with Sys_error _ | End_of_file -> None
166
167let corpus_dir = "/tmp/jsont_corpus"
168
169let corpus_files () =
170 try
171 Sys.readdir corpus_dir |> Array.to_list
172 |> List.filter (fun n -> Filename.check_suffix n ".json")
173 |> List.sort compare
174 with Sys_error _ -> []
175
176let test_corpus_file name () =
177 match read_file (Filename.concat corpus_dir name) with
178 | None -> ()
179 | Some s -> (
180 (match decode_ignore s with
181 | Ok () -> ()
182 | Error e ->
183 Alcotest.failf "ignore rejected corpus %s: %s" name
184 (Json.Error.to_string e));
185 match decode_dom s with
186 | Ok _ -> ()
187 | Error e ->
188 Alcotest.failf "json rejected corpus %s: %s" name
189 (Json.Error.to_string e))
190
191(* -- Entry point -- *)
192
193let suite =
194 let positive =
195 List.map
196 (fun (n, s) ->
197 Alcotest.test_case ("accept " ^ n) `Quick
198 (test_ignore_accepts_valid n s))
199 positive_cases
200 in
201 let neg =
202 List.map
203 (fun (n, s) ->
204 Alcotest.test_case ("reject " ^ n) `Quick
205 (test_ignore_rejects_malformed n s))
206 structural_negatives
207 in
208 let diff =
209 List.map
210 (fun (n, s) ->
211 Alcotest.test_case ("diff " ^ n) `Quick
212 (test_diff_valid_both_accept n s))
213 differential_cases
214 in
215 let perm =
216 List.map
217 (fun (n, s) ->
218 Alcotest.test_case ("permissive " ^ n) `Quick
219 (test_permissive_ignore n s))
220 permissive_cases
221 in
222 let corpus =
223 List.map
224 (fun n -> Alcotest.test_case ("corpus " ^ n) `Quick (test_corpus_file n))
225 (corpus_files ())
226 in
227 ("skip-parse", positive @ neg @ diff @ perm @ corpus)