Declarative JSON data manipulation for OCaml
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

json: move bytesrw parser/encoder into codec.ml; shrink json.ml to 78 lines

Codec.Private was only there to let the bytesrw parser (in json.ml)
reach codec-internal helpers across a module boundary. Fold the parser
and encoder into codec.ml instead: the helpers never leave the file,
and Codec.Private collapses from "everything the parser needed" to
just the handful of error helpers json_brr.ml still reaches for from
a separate library.

Also drop [type format = Minify | Indent | Layout]: it was just a
three-state derivation of [?preserve / ?indent]; the encoder record
now carries [preserve : bool] and [indent : bool] directly and the
write paths branch on those.

Wrap the streaming entry points [of_reader / of_string / to_writer /
to_string] in [module Stream : sig ... end] so that [let open
Json.Codec in] no longer shadows callers' local [of_string] helpers
(caught by ocaml-did / ocaml-oci / ocaml-claude). Top-level users
keep [Json.of_string] and friends; the Stream namespace is for the
thin re-exports.

json.ml is now 78 lines: type re-exports, [module Codec = Codec],
top-level wrappers, [module Tape], and the [module Value] AST-layer
convenience wrappers. Everything codec lives in codec.ml.

did.ml had a stray merge marker from concurrent edits; resolve to the
new [failf] name.

+1645 -1602
+12 -10
lib/brr/json_brr.ml
··· 4 4 ---------------------------------------------------------------------------*) 5 5 6 6 open Json.Codec 7 + open Json.Codec.Private 7 8 8 9 let jv_error_to_error e = 9 10 let ctx = Json.Context.empty and meta = Json.Meta.none in ··· 58 59 (* Decoding *) 59 60 60 61 let fail_push_array map i e = 61 - Json.Codec.fail_push_array Json.Meta.none map (i, Json.Meta.none) e 62 + Json.Codec.Private.fail_push_array Json.Meta.none map (i, Json.Meta.none) e 62 63 63 64 let fail_push_object map n e = 64 - Json.Codec.fail_push_object Json.Meta.none map (n, Json.Meta.none) e 65 + Json.Codec.Private.fail_push_object Json.Meta.none map (n, Json.Meta.none) e 65 66 66 67 let fail_type_mismatch t ~fnd = 67 - Json.Codec.fail_type_mismatch Json.Meta.none t ~fnd 68 + Json.Codec.Private.fail_type_mismatch Json.Meta.none t ~fnd 68 69 69 70 let all_unexpected ~mem_decs mems = 70 71 let unexpected (n, _jname) = ··· 144 145 | Object_cases (umems', cases) -> 145 146 let umems' = Unknown_mems umems' in 146 147 let umems, dict = 147 - Json.Codec.override_unknown_mems ~by:umems umems' dict 148 + Json.Codec.Private.override_unknown_mems ~by:umems umems' dict 148 149 in 149 150 decode_object_cases map umems cases mem_decs dict names jv 150 151 | Object_basic umems' -> ( 151 152 let umems' = Unknown_mems (Some umems') in 152 153 let umems, dict = 153 - Json.Codec.override_unknown_mems ~by:umems umems' dict 154 + Json.Codec.Private.override_unknown_mems ~by:umems umems' dict 154 155 in 155 156 match umems with 156 157 | Unknown_mems (Some Unknown_skip | None) -> ··· 178 179 fun map umems umap mem_decs dict names jv -> 179 180 match names with 180 181 | [] -> 181 - Json.Codec.finish_object_decode map Json.Meta.none umems umap mem_decs 182 - dict 182 + Json.Codec.Private.finish_object_decode map Json.Meta.none umems umap 183 + mem_decs dict 183 184 | (n, jname) :: names -> ( 184 185 match String_map.find_opt n mem_decs with 185 186 | Some (Mem_dec m) -> ··· 195 196 decode_object_basic map umems umap mem_decs dict names jv 196 197 | Unknown_error -> 197 198 let fnd = (n, Json.Meta.none) :: all_unexpected ~mem_decs names in 198 - Json.Codec.fail_unexpected_members Json.Meta.none map ~fnd 199 + Json.Codec.Private.fail_unexpected_members Json.Meta.none map ~fnd 199 200 | Unknown_keep (mmap, _) -> 200 201 let umap = 201 202 let v = ··· 219 220 let decode_case_tag tag = 220 221 let eq_tag (Case c) = cases.tag_compare c.tag tag = 0 in 221 222 match List.find_opt eq_tag cases.cases with 222 - | None -> Json.Codec.fail_unexpected_case_tag Json.Meta.none map cases tag 223 + | None -> 224 + Json.Codec.Private.fail_unexpected_case_tag Json.Meta.none map cases tag 223 225 | Some (Case case) -> 224 226 let mems = String_map.remove cases.tag.name names in 225 227 let dict = ··· 237 239 | None -> 238 240 let exp = String_map.singleton cases.tag.name (Mem_dec cases.tag) in 239 241 let fnd = jv_mem_name_list jv in 240 - Json.Codec.fail_missing_members Json.Meta.none map ~exp ~fnd) 242 + Json.Codec.Private.fail_missing_members Json.Meta.none map ~exp ~fnd) 241 243 242 244 and decode_any : type a. a t -> a any_map -> Jv.t -> a = 243 245 fun t map jv ->
+1478 -1
lib/codec.ml
··· 1 1 (* Internal codec representation. This is the GADT that json.ml's 2 2 combinators walk at decode/encode time. The public alias is 3 - [type 'a Json.codec = 'a Codec.t] in json.ml. *) 3 + [type 'a Json.codec = 'a t] in json.ml. *) 4 4 5 5 (* See the .mli for documentation *) 6 6 ··· 1956 1956 | _ :: _ -> 1957 1957 let absent = if allow_absent then Some v else None in 1958 1958 update_path ?stub ?absent p (const t v) 1959 + 1960 + module Private = struct 1961 + let array_kinded_sort = array_kinded_sort 1962 + let object_kinded_sort = object_kinded_sort 1963 + let pp_kind = pp_kind 1964 + let pp_code = pp_code 1965 + let fail_push_array = fail_push_array 1966 + let fail_push_object = fail_push_object 1967 + let fail_type_mismatch = fail_type_mismatch 1968 + let fail_missing_members = fail_missing_members 1969 + let fail_unexpected_members = fail_unexpected_members 1970 + let fail_unexpected_case_tag = fail_unexpected_case_tag 1971 + let object_meta_arg = object_meta_arg 1972 + 1973 + module Dict = Dict 1974 + 1975 + let apply_dict = apply_dict 1976 + let override_unknown_mems = override_unknown_mems 1977 + let finish_object_decode = finish_object_decode 1978 + end 1979 + 1980 + (* ======================================================================== 1981 + Bytesrw streaming parser and encoder. Moved from json.ml. 1982 + ======================================================================== *) 1983 + 1984 + open Bytesrw 1985 + 1986 + (* XXX add these things to Stdlib.Uchar *) 1987 + 1988 + let uchar_max_utf8_bytes = 4 1989 + 1990 + let[@inline] uchar_utf8_decode_length = function 1991 + | '\x00' .. '\x7F' -> 1 1992 + | '\x80' .. '\xC1' -> 0 1993 + | '\xC2' .. '\xDF' -> 2 1994 + | '\xE0' .. '\xEF' -> 3 1995 + | '\xF0' .. '\xF4' -> 4 1996 + | _ -> 0 1997 + 1998 + (* Character classes *) 1999 + 2000 + let[@inline] is_digit u = 0x0030 (* 0 *) <= u && u <= 0x0039 (* 9 *) 2001 + let[@inline] is_number_start u = is_digit u || u = 0x002D (* - *) 2002 + let[@inline] is_surrogate u = 0xD800 <= u && u <= 0xDFFF 2003 + let[@inline] _is_hi_surrogate u = 0xD800 <= u && u <= 0xDBFF 2004 + let[@inline] is_lo_surrogate u = 0xDC00 <= u && u <= 0xDFFF 2005 + 2006 + let[@inline] is_control u = 2007 + (0x0000 <= u && u <= 0x001F) 2008 + (* C0 control characters *) 2009 + || u = 0x007F 2010 + (* Delete *) 2011 + || (0x0080 <= u && u <= 0x009F) 2012 + (* C1 control characters *) 2013 + || u = 0x2028 2014 + (* Line separator *) || u = 0x2029 2015 + (* Paragraph separator *) || u = 0x200E 2016 + (* left-to-right mark *) || u = 0x200F (* right-to-left mark *) 2017 + 2018 + let sot = 0x1A0000 (* start of text U+10FFFF + 1 *) 2019 + let eot = 0x1A0001 (* end of text U+10FFFF + 2 *) 2020 + let pp_code = pp_code 2021 + 2022 + let pp_quchar ppf u = 2023 + pp_code ppf 2024 + @@ 2025 + if u = sot then "start of text" 2026 + else if u = eot then "end of text" 2027 + else if is_control u || is_surrogate u then Fmt.str "U+%04X" u 2028 + else 2029 + let u = Uchar.of_int u in 2030 + let b = Stdlib.Bytes.make (Uchar.utf_8_byte_length u) '\x00' in 2031 + Stdlib.( 2032 + Stdlib.ignore (Bytes.set_utf_8_uchar b 0 u); 2033 + Bytes.unsafe_to_string b) 2034 + 2035 + (* A simple growable byte buffer used for token and whitespace 2036 + accumulation. Raw [Bytes.t] access lets us compare buffer content 2037 + against candidate keys without allocating an intermediate string. *) 2038 + type tokbuf = { mutable bytes : Stdlib.Bytes.t; mutable len : int } 2039 + 2040 + let tokbuf_create n = { bytes = Stdlib.Bytes.create n; len = 0 } 2041 + let[@inline] tokbuf_clear t = t.len <- 0 2042 + 2043 + let[@inline] tokbuf_ensure t need = 2044 + let cap = Stdlib.Bytes.length t.bytes in 2045 + if t.len + need > cap then ( 2046 + let new_cap = max (cap * 2) (t.len + need) in 2047 + let b = Stdlib.Bytes.create new_cap in 2048 + Stdlib.Bytes.blit t.bytes 0 b 0 t.len; 2049 + t.bytes <- b) 2050 + 2051 + let[@inline] tokbuf_add_char t c = 2052 + tokbuf_ensure t 1; 2053 + Stdlib.Bytes.unsafe_set t.bytes t.len c; 2054 + t.len <- t.len + 1 2055 + 2056 + let[@inline] tokbuf_add_utf_8_uchar t u = 2057 + let n = Uchar.utf_8_byte_length u in 2058 + tokbuf_ensure t n; 2059 + Stdlib.ignore (Stdlib.Bytes.set_utf_8_uchar t.bytes t.len u : int); 2060 + t.len <- t.len + n 2061 + 2062 + let[@inline] tokbuf_contents t = Stdlib.Bytes.sub_string t.bytes 0 t.len 2063 + 2064 + (* Byte-compare buffer content to a string without allocating. *) 2065 + let tokbuf_equal_string t s = 2066 + let n = String.length s in 2067 + if t.len <> n then false 2068 + else 2069 + let rec loop i = 2070 + if i >= n then true 2071 + else if Stdlib.Bytes.unsafe_get t.bytes i <> String.unsafe_get s i then 2072 + false 2073 + else loop (i + 1) 2074 + in 2075 + loop 0 2076 + 2077 + (* Decoder *) 2078 + 2079 + type decoder = { 2080 + file : string; 2081 + meta_none : Meta.t; (* A meta with just [file] therein. *) 2082 + locs : bool; (* [true] if text locations should be computed. *) 2083 + layout : bool; (* [true] if text layout should be kept. *) 2084 + reader : Bytes.Reader.t; (* The source of bytes. *) 2085 + mutable i : Stdlib.Bytes.t; (* Current input slice. *) 2086 + mutable i_max : int; (* Maximum byte index in [i]. *) 2087 + mutable i_next : int; (* Next byte index to read in [i]. *) 2088 + overlap : Stdlib.Bytes.t; (* Buffer for overlapping decodes. *) 2089 + mutable u : int; (* Current Unicode scalar value or sot or eot. *) 2090 + mutable byte_count : int; (* Global byte count. *) 2091 + mutable line : int; (* Current line number. *) 2092 + mutable line_start : int; (* Current line global byte position. *) 2093 + token : tokbuf; 2094 + ws : tokbuf; (* Bufferizes whitespace when layout is [true]. *) 2095 + } 2096 + 2097 + let decoder ?(locs = false) ?(layout = false) ?(file = "-") reader = 2098 + let overlap = Stdlib.Bytes.create uchar_max_utf8_bytes in 2099 + let token = tokbuf_create 255 and ws = tokbuf_create 255 in 2100 + let meta_none = Meta.make (Loc.(set_file none) file) in 2101 + { 2102 + file; 2103 + meta_none; 2104 + locs; 2105 + layout; 2106 + reader; 2107 + i = overlap (* overwritten by initial refill *); 2108 + i_max = 0; 2109 + i_next = 1 (* triggers an initial refill *); 2110 + overlap; 2111 + u = sot; 2112 + byte_count = 0; 2113 + line = 1; 2114 + line_start = 0; 2115 + token; 2116 + ws; 2117 + } 2118 + 2119 + (* Decoder positions *) 2120 + 2121 + let last_byte_of d = 2122 + if d.u <= 0x7F then d.byte_count - 1 2123 + else if d.u = sot || d.u = eot then d.byte_count 2124 + else 2125 + (* On multi-bytes uchars we want to point on the first byte. *) 2126 + d.byte_count - Uchar.utf_8_byte_length (Uchar.of_int d.u) 2127 + 2128 + (* Decoder errors *) 2129 + 2130 + let[@inline] loc_of_pos d ~first_byte ~last_byte ~first_line_num 2131 + ~first_line_byte ~last_line_num ~last_line_byte = 2132 + Loc.make ~file:d.file ~first_byte ~last_byte ~first_line_num ~first_line_byte 2133 + ~last_line_num ~last_line_byte 2134 + 2135 + let error_meta d = 2136 + let first_byte = last_byte_of d in 2137 + let first_line_num = d.line and first_line_byte = d.line_start in 2138 + Meta.make 2139 + @@ loc_of_pos d ~first_byte ~last_byte:first_byte ~first_line_num 2140 + ~first_line_byte ~last_line_num:first_line_num 2141 + ~last_line_byte:first_line_byte 2142 + 2143 + let error_meta_to_current ~first_byte ~first_line_num ~first_line_byte d = 2144 + let last_byte = last_byte_of d in 2145 + let last_line_num = d.line and last_line_byte = d.line_start in 2146 + Meta.make 2147 + @@ loc_of_pos d ~first_byte ~last_byte ~first_line_num ~first_line_byte 2148 + ~last_line_num ~last_line_byte 2149 + 2150 + let err_here d fmt = Error.failf (error_meta d) fmt 2151 + 2152 + let err_to_here ~first_byte ~first_line_num ~first_line_byte d fmt = 2153 + Error.failf 2154 + (error_meta_to_current ~first_byte ~first_line_num ~first_line_byte d) 2155 + fmt 2156 + 2157 + let err_malformed_utf_8 d = 2158 + if d.i_next > d.i_max then 2159 + err_here d "UTF-8 decoding error: unexpected end of bytes" 2160 + else 2161 + err_here d "UTF-8 decoding error: invalid byte %a" pp_code 2162 + (Fmt.str "%x02x" (Bytes.get_uint8 d.i d.i_next)) 2163 + 2164 + let err_exp d = err_here d "Expected %a but found %a" 2165 + let err_exp_while d = err_here d "Expected %a while parsing %a but found %a" 2166 + let err_exp_eot d = err_exp d pp_quchar eot pp_quchar d.u 2167 + let err_not_json_value d = err_exp d pp_code "JSON value" pp_quchar d.u 2168 + 2169 + let current_json_sort d = 2170 + match d.u with 2171 + | 0x0066 (* f *) | 0x0074 (* t *) -> Sort.Bool 2172 + | 0x006E (* n *) -> Sort.Null 2173 + | 0x007B (* { *) -> Sort.Object 2174 + | 0x005B (* [ *) -> Sort.Array 2175 + | 0x0022 (* DQUOTE *) -> Sort.String 2176 + | u when is_number_start u -> Sort.Number 2177 + | _ -> err_not_json_value d 2178 + 2179 + let fail_type_mismatch d t = 2180 + fail_type_mismatch (error_meta d) t ~fnd:(current_json_sort d) 2181 + 2182 + (* Errors for constants *) 2183 + 2184 + let err_exp_in_const ~first_byte ~first_line_num ~first_line_byte d ~exp ~fnd 2185 + ~const = 2186 + err_to_here ~first_byte ~first_line_num ~first_line_byte d 2187 + "Expected %a while parsing %a but found: %a" pp_quchar exp pp_code const 2188 + pp_quchar fnd 2189 + 2190 + (* Errors for numbers *) 2191 + 2192 + let err_float_parse meta tok = 2193 + Error.failf meta "Could not parse %S to a %a" tok pp_code "float" 2194 + 2195 + let err_exp_digit d = 2196 + err_exp_while d pp_code "decimal digit" pp_code "number" pp_quchar d.u 2197 + 2198 + (* Errors for strings *) 2199 + 2200 + let err_exp_hex_digit d = 2201 + err_exp_while d pp_code "hex digit" pp_code "character escape" pp_quchar d.u 2202 + 2203 + let err_exp_lo_surrogate d u = 2204 + err_exp_while d pp_code "low surrogate" pp_code "character escape" pp_quchar u 2205 + 2206 + let err_unpaired_lo_surrogate d u = 2207 + err_here d "Unpaired low surrogate %a in %a" pp_quchar u pp_code "string" 2208 + 2209 + let err_unpaired_hi_surrogate d u = 2210 + err_here d "Unpaired high surrogate %a in %a" pp_quchar u pp_code "string" 2211 + 2212 + let err_exp_esc ~first_byte ~first_line_num ~first_line_byte d u = 2213 + err_to_here ~first_byte ~first_line_num ~first_line_byte d 2214 + "Expected %a while parsing %a found %a" pp_code "escape character" pp_code 2215 + "escape" pp_quchar u 2216 + 2217 + let err_unclosed_string ~first_byte ~first_line_num ~first_line_byte d = 2218 + err_to_here ~first_byte ~first_line_num ~first_line_byte d "Unclosed %a" 2219 + pp_code "string" 2220 + 2221 + let err_illegal_ctrl_char ~first_byte ~first_line_num ~first_line_byte d = 2222 + err_to_here ~first_byte ~first_line_num ~first_line_byte d 2223 + "Illegal control character %a in %a" pp_quchar d.u pp_code "string" 2224 + 2225 + (* Errors for arrays *) 2226 + 2227 + let err_exp_comma_or_eoa d ~fnd = 2228 + err_here d "Expected %a or %a after %a but found %a" pp_code "," pp_code "]" 2229 + pp_code "array element" pp_quchar fnd 2230 + 2231 + let err_unclosed_array d = err_here d "Unclosed %a" pp_code "array" 2232 + 2233 + let err_exp_comma_or_eoo d = 2234 + err_here d "Expected %a or %a after %a but found: %a" pp_code "," pp_code "}" 2235 + pp_code "object member" pp_quchar d.u 2236 + 2237 + (* Errors for objects *) 2238 + 2239 + let err_exp_mem d = 2240 + err_here d "Expected %a but found %a" pp_code "object member" pp_quchar d.u 2241 + 2242 + let err_exp_mem_or_eoo d = 2243 + err_here d "Expected: %a or %a but found %a" pp_code "object member" pp_code 2244 + "}" pp_quchar d.u 2245 + 2246 + let err_exp_colon d = 2247 + err_here d "Expected %a after %a but found %a" pp_code ":" pp_code 2248 + "member name" pp_quchar d.u 2249 + 2250 + let err_unclosed_object d (map : ('o, 'o) object_map) = 2251 + err_here d "Unclosed %a" pp_kind (object_kinded_sort map) 2252 + 2253 + (* Decode next character in d.u *) 2254 + 2255 + let[@inline] is_eoslice d = d.i_next > d.i_max 2256 + let[@inline] is_eod d = d.i_max = -1 (* Only happens on Slice.eod *) 2257 + let[@inline] available d = d.i_max - d.i_next + 1 2258 + 2259 + let[@inline] set_slice d slice = 2260 + d.i <- Bytes.Slice.bytes slice; 2261 + d.i_next <- Bytes.Slice.first slice; 2262 + d.i_max <- d.i_next + Bytes.Slice.length slice - 1 2263 + 2264 + let rec setup_overlap d start need = 2265 + match need with 2266 + | 0 -> 2267 + let slice = 2268 + match available d with 2269 + | 0 -> Bytes.Reader.read d.reader 2270 + | length -> Bytes.Slice.make d.i ~first:d.i_next ~length 2271 + in 2272 + d.i <- d.overlap; 2273 + d.i_next <- 0; 2274 + d.i_max <- start; 2275 + slice 2276 + | need -> 2277 + if is_eoslice d then set_slice d (Bytes.Reader.read d.reader); 2278 + if is_eod d then ( 2279 + d.byte_count <- d.byte_count - start; 2280 + err_malformed_utf_8 d); 2281 + let available = available d in 2282 + let take = Int.min need available in 2283 + for i = 0 to take - 1 do 2284 + Bytes.set d.overlap (start + i) (Bytes.get d.i (d.i_next + i)) 2285 + done; 2286 + d.i_next <- d.i_next + take; 2287 + d.byte_count <- d.byte_count + take; 2288 + setup_overlap d (start + take) (need - take) 2289 + 2290 + let rec nextc d = 2291 + let a = available d in 2292 + if a <= 0 then 2293 + if is_eod d then d.u <- eot 2294 + else ( 2295 + set_slice d (Bytes.Reader.read d.reader); 2296 + nextc d) 2297 + else 2298 + let b = Bytes.get d.i d.i_next in 2299 + if a < uchar_max_utf8_bytes && a < uchar_utf8_decode_length b then begin 2300 + let s = setup_overlap d 0 (uchar_utf8_decode_length b) in 2301 + nextc d; 2302 + set_slice d s 2303 + end 2304 + else 2305 + d.u <- 2306 + (match b with 2307 + | ('\x00' .. '\x09' | '\x0B' | '\x0E' .. '\x7F') as u -> 2308 + (* ASCII fast path *) 2309 + d.i_next <- d.i_next + 1; 2310 + d.byte_count <- d.byte_count + 1; 2311 + Char.code u 2312 + | '\x0D' (* CR *) -> 2313 + d.i_next <- d.i_next + 1; 2314 + d.byte_count <- d.byte_count + 1; 2315 + d.line_start <- d.byte_count; 2316 + d.line <- d.line + 1; 2317 + 0x000D 2318 + | '\x0A' (* LF *) -> 2319 + d.i_next <- d.i_next + 1; 2320 + d.byte_count <- d.byte_count + 1; 2321 + d.line_start <- d.byte_count; 2322 + if d.u <> 0x000D then d.line <- d.line + 1; 2323 + 0x000A 2324 + | _ -> 2325 + let udec = Bytes.get_utf_8_uchar d.i d.i_next in 2326 + if not (Uchar.utf_decode_is_valid udec) then err_malformed_utf_8 d 2327 + else 2328 + let u = Uchar.to_int (Uchar.utf_decode_uchar udec) in 2329 + let ulen = Uchar.utf_decode_length udec in 2330 + d.i_next <- d.i_next + ulen; 2331 + d.byte_count <- d.byte_count + ulen; 2332 + u) 2333 + 2334 + (* Decoder tokenizer *) 2335 + 2336 + let[@inline] token_clear d = tokbuf_clear d.token 2337 + 2338 + let[@inline] token_pop d = 2339 + let t = tokbuf_contents d.token in 2340 + token_clear d; 2341 + t 2342 + 2343 + let[@inline] token_add d u = 2344 + if u <= 0x7F then tokbuf_add_char d.token (Char.unsafe_chr u) 2345 + else tokbuf_add_utf_8_uchar d.token (Uchar.unsafe_of_int u) 2346 + 2347 + (* Find a member in [mem_decs] whose key matches the current token 2348 + buffer content byte-for-byte, without allocating a string. Returns 2349 + the matching mem_dec together with the key string (owned by the 2350 + map). Used as a fast-path for object member dispatch. *) 2351 + let mem_by_token d mem_decs = 2352 + let r = ref None in 2353 + (try 2354 + String_map.iter 2355 + (fun k v -> 2356 + if tokbuf_equal_string d.token k then begin 2357 + r := Some (v, k); 2358 + raise_notrace Exit 2359 + end) 2360 + mem_decs 2361 + with Exit -> ()); 2362 + !r 2363 + 2364 + let[@inline] accept d = 2365 + token_add d d.u; 2366 + nextc d 2367 + 2368 + let token_pop_float d ~meta = 2369 + let token = token_pop d in 2370 + match float_of_string_opt token with 2371 + | Some f -> f 2372 + | None -> err_float_parse meta token (* likely [assert false] *) 2373 + 2374 + (* Decoder layout and position tracking *) 2375 + 2376 + let[@inline] ws_pop d = 2377 + if not d.layout then "" 2378 + else 2379 + let t = tokbuf_contents d.ws in 2380 + tokbuf_clear d.ws; 2381 + t 2382 + 2383 + let loc_to_current ~first_byte ~first_line_num ~first_line_byte d = 2384 + if not d.locs then Loc.none 2385 + else 2386 + let last_byte = last_byte_of d in 2387 + let last_line_num = d.line and last_line_byte = d.line_start in 2388 + loc_of_pos d ~first_byte ~last_byte ~first_line_num ~first_line_byte 2389 + ~last_line_num ~last_line_byte 2390 + 2391 + let loc_prev_ascii_char ~first_byte ~first_line_num ~first_line_byte d = 2392 + (* N.B. when we call that the line doesn't move and the char was on 2393 + a single byte *) 2394 + if not d.locs then Loc.none 2395 + else 2396 + let last_byte = last_byte_of d - 1 in 2397 + let last_line_num = d.line and last_line_byte = d.line_start in 2398 + loc_of_pos d ~first_byte ~last_byte ~first_line_num ~first_line_byte 2399 + ~last_line_num ~last_line_byte 2400 + 2401 + let meta_make d ?ws_before ?ws_after loc = 2402 + if (not d.locs) && not d.layout then d.meta_none 2403 + else Meta.make ?ws_before ?ws_after loc 2404 + 2405 + (* Decoding *) 2406 + 2407 + let false_uchars = [| 0x0066; 0x0061; 0x006C; 0x0073; 0x0065 |] 2408 + let true_uchars = [| 0x0074; 0x0072; 0x0075; 0x0065 |] 2409 + let null_uchars = [| 0x006E; 0x0075; 0x006C; 0x006C |] 2410 + 2411 + let ascii_str us = 2412 + String.init (Stdlib.Array.length us) (fun i -> 2413 + Char.chr (Stdlib.Array.get us i)) 2414 + 2415 + let[@inline] is_ws u = 2416 + if u > 0x20 then false 2417 + else 2418 + match Char.unsafe_chr u with ' ' | '\t' | '\r' | '\n' -> true | _ -> false 2419 + 2420 + let[@inline] read_ws d = 2421 + while is_ws d.u do 2422 + if d.layout then tokbuf_add_char d.ws (Char.unsafe_chr d.u); 2423 + nextc d 2424 + done 2425 + 2426 + let read_json_const d const = 2427 + (* First character was checked. *) 2428 + let ws_before = ws_pop d in 2429 + let first_byte = last_byte_of d in 2430 + let first_line_num = d.line and first_line_byte = d.line_start in 2431 + for i = 1 to Stdlib.Array.length const - 1 do 2432 + nextc d; 2433 + let c = Stdlib.Array.get const i in 2434 + if not (Int.equal d.u c) then 2435 + err_exp_in_const ~first_byte ~first_line_num ~first_line_byte d ~exp:c 2436 + ~fnd:d.u ~const:(ascii_str const) 2437 + done; 2438 + let loc = loc_to_current d ~first_byte ~first_line_num ~first_line_byte in 2439 + let ws_after = 2440 + nextc d; 2441 + read_ws d; 2442 + ws_pop d 2443 + in 2444 + meta_make d ~ws_before ~ws_after loc 2445 + 2446 + let[@inline] read_json_false d = read_json_const d false_uchars 2447 + let[@inline] read_json_true d = read_json_const d true_uchars 2448 + let[@inline] read_json_null d = read_json_const d null_uchars 2449 + 2450 + let read_json_number d = 2451 + (* [is_number_start d.u] = true *) 2452 + let[@inline] read_digits d = 2453 + while is_digit d.u do 2454 + accept d 2455 + done 2456 + in 2457 + let[@inline] read_int d = 2458 + match d.u with 2459 + | 0x0030 (* 0 *) -> accept d 2460 + | u when is_digit u -> 2461 + accept d; 2462 + read_digits d 2463 + | _ -> err_exp_digit d 2464 + in 2465 + let[@inline] read_opt_frac d = 2466 + match d.u with 2467 + | 0x002E (* . *) -> 2468 + accept d; 2469 + if is_digit d.u then read_digits d else err_exp_digit d 2470 + | _ -> () 2471 + in 2472 + let[@inline] read_opt_exp d = 2473 + match d.u with 2474 + | 0x0065 (* e *) | 0x0045 (* E *) -> 2475 + token_add d d.u; 2476 + nextc d; 2477 + (match d.u with 2478 + | 0x002D (* - *) | 0x002B (* + *) -> 2479 + token_add d d.u; 2480 + nextc d 2481 + | _ -> ()); 2482 + if is_digit d.u then read_digits d else err_exp_digit d 2483 + | _ -> () 2484 + in 2485 + let first_byte = last_byte_of d in 2486 + let first_line_num = d.line and first_line_byte = d.line_start in 2487 + let ws_before = ws_pop d in 2488 + token_clear d; 2489 + if d.u = 0x002D (* - *) then accept d; 2490 + read_int d; 2491 + read_opt_frac d; 2492 + read_opt_exp d; 2493 + let loc = 2494 + loc_prev_ascii_char d ~first_byte ~first_line_num ~first_line_byte 2495 + in 2496 + let ws_after = 2497 + read_ws d; 2498 + ws_pop d 2499 + in 2500 + meta_make d ~ws_before ~ws_after loc 2501 + 2502 + let read_json_string d = 2503 + (* d.u is 0x0022 *) 2504 + let first_byte = last_byte_of d in 2505 + let first_line_num = d.line and first_line_byte = d.line_start in 2506 + let rec read_uescape d hi uc count = 2507 + if count > 0 then 2508 + match d.u with 2509 + | u when 0x0030 <= u && u <= 0x0039 -> 2510 + nextc d; 2511 + read_uescape d hi ((uc * 16) + u - 0x30) (count - 1) 2512 + | u when 0x0041 <= u && u <= 0x0046 -> 2513 + nextc d; 2514 + read_uescape d hi ((uc * 16) + u - 0x37) (count - 1) 2515 + | u when 0x0061 <= u && u <= 0x0066 -> 2516 + nextc d; 2517 + read_uescape d hi ((uc * 16) + u - 0x57) (count - 1) 2518 + | _ -> err_exp_hex_digit d 2519 + else 2520 + match hi with 2521 + | Some hi -> 2522 + (* combine high and low surrogate. *) 2523 + if not (is_lo_surrogate uc) then err_exp_lo_surrogate d uc 2524 + else 2525 + let u = (((hi land 0x3FF) lsl 10) lor (uc land 0x3FF)) + 0x10000 in 2526 + token_add d u 2527 + | None -> 2528 + if not (is_surrogate uc) then token_add d uc 2529 + else if uc > 0xDBFF then err_unpaired_lo_surrogate d uc 2530 + else if d.u <> 0x005C (* \ *) then err_unpaired_hi_surrogate d uc 2531 + else ( 2532 + nextc d; 2533 + if d.u <> 0x0075 (* u *) then err_unpaired_hi_surrogate d uc 2534 + else ( 2535 + nextc d; 2536 + read_uescape d (Some uc) 0 4)) 2537 + in 2538 + let read_escape d = 2539 + match d.u with 2540 + | 0x0022 (* DQUOTE *) | 0x005C (* \ *) | 0x002F (* / *) -> accept d 2541 + | 0x0062 (* b *) -> 2542 + token_add d 0x0008 (* backspace *); 2543 + nextc d 2544 + | 0x0066 (* f *) -> 2545 + token_add d 0x000C (* form feed *); 2546 + nextc d 2547 + | 0x006E (* n *) -> 2548 + token_add d 0x000A (* line feed *); 2549 + nextc d 2550 + | 0x0072 (* r *) -> 2551 + token_add d 0x000D (* carriage return *); 2552 + nextc d 2553 + | 0x0074 (* t *) -> 2554 + token_add d 0x0009 (* tab *); 2555 + nextc d 2556 + | 0x0075 (* u *) -> 2557 + nextc d; 2558 + read_uescape d None 0 4 2559 + | u -> err_exp_esc ~first_byte ~first_line_num ~first_line_byte d u 2560 + in 2561 + let rec loop d = 2562 + match d.u with 2563 + | 0x005C (* \ *) -> 2564 + nextc d; 2565 + read_escape d; 2566 + loop d 2567 + | 0x0022 (* DQUOTE *) -> () 2568 + | u when u = eot -> 2569 + err_unclosed_string ~first_byte ~first_line_num ~first_line_byte d 2570 + | u when 0x0000 <= u && u <= 0x001F -> 2571 + err_illegal_ctrl_char ~first_byte ~first_line_num ~first_line_byte d 2572 + | _ -> 2573 + accept d; 2574 + loop d 2575 + in 2576 + let ws_before = ws_pop d in 2577 + nextc d; 2578 + token_clear d; 2579 + loop d; 2580 + let loc = loc_to_current d ~first_byte ~first_line_num ~first_line_byte in 2581 + let ws_after = 2582 + nextc d; 2583 + read_ws d; 2584 + ws_pop d 2585 + in 2586 + meta_make d ~ws_before ~ws_after loc 2587 + 2588 + let read_json_name d = 2589 + let meta = read_json_string d in 2590 + if d.u = 0x003A (* : *) then ( 2591 + nextc d; 2592 + meta) 2593 + else err_exp_colon d 2594 + 2595 + let read_json_mem_sep d = 2596 + if d.u = 0x007D (* } *) then () 2597 + else if d.u = 0x002C (* , *) then ( 2598 + nextc d; 2599 + read_ws d; 2600 + if d.u <> 0x0022 then err_exp_mem d) 2601 + else err_exp_comma_or_eoo d 2602 + 2603 + (* Skip-parse a JSON value: advance past [d.u] at the byte level without 2604 + materialising token buffers, parsing numbers, or decoding string 2605 + escapes. The only decoding done is UTF-8 in [nextc]; escapes in 2606 + strings are recognised only enough to not stop at a backslash-quote. *) 2607 + let rec skip_json_value d = 2608 + read_ws d; 2609 + match d.u with 2610 + | 0x007B (* { *) -> skip_json_object d 2611 + | 0x005B (* [ *) -> skip_json_array d 2612 + | 0x0022 (* DQUOTE *) -> skip_json_string d 2613 + | 0x006E (* n *) -> Stdlib.ignore (read_json_null d) 2614 + | 0x0074 (* t *) -> Stdlib.ignore (read_json_true d) 2615 + | 0x0066 (* f *) -> Stdlib.ignore (read_json_false d) 2616 + | u when is_number_start u -> skip_json_number d 2617 + | _ -> err_not_json_value d 2618 + 2619 + and skip_json_string d = 2620 + (* Byte-level scan for the closing quote; matches simdjson On-Demand 2621 + semantics. Structural contract (bracket nesting, string termination) 2622 + is enforced; content (escape correctness, exact hex digits after 2623 + [\u]) is NOT validated. Consumers needing strict content 2624 + validation should decode with [Json.json] and then discard rather 2625 + than [ignore]. *) 2626 + let done_ = ref false in 2627 + while not !done_ do 2628 + if d.i_next > d.i_max then 2629 + if is_eod d then 2630 + err_unclosed_string ~first_byte:0 ~first_line_num:Loc.line_num_none 2631 + ~first_line_byte:Loc.byte_pos_none d 2632 + else set_slice d (Bytes.Reader.read d.reader) 2633 + else begin 2634 + let b = Stdlib.Bytes.unsafe_get d.i d.i_next in 2635 + d.i_next <- d.i_next + 1; 2636 + d.byte_count <- d.byte_count + 1; 2637 + match b with 2638 + | '\\' -> 2639 + if d.i_next > d.i_max then 2640 + if is_eod d then 2641 + err_unclosed_string ~first_byte:0 2642 + ~first_line_num:Loc.line_num_none 2643 + ~first_line_byte:Loc.byte_pos_none d 2644 + else set_slice d (Bytes.Reader.read d.reader); 2645 + d.i_next <- d.i_next + 1; 2646 + d.byte_count <- d.byte_count + 1 2647 + | '"' -> done_ := true 2648 + | _ -> () 2649 + end 2650 + done; 2651 + nextc d; 2652 + read_ws d 2653 + 2654 + and skip_json_number d = 2655 + (* Consume number-continuation characters; matches simdjson 2656 + On-Demand. Structural number shape ([1..2], [+5], [1eE2]) is NOT 2657 + validated here. *) 2658 + let done_ = ref false in 2659 + while not !done_ do 2660 + if d.i_next > d.i_max then 2661 + if is_eod d then done_ := true 2662 + else set_slice d (Bytes.Reader.read d.reader) 2663 + else 2664 + match Stdlib.Bytes.unsafe_get d.i d.i_next with 2665 + | '0' .. '9' | '-' | '+' | '.' | 'e' | 'E' -> 2666 + d.i_next <- d.i_next + 1; 2667 + d.byte_count <- d.byte_count + 1 2668 + | _ -> done_ := true 2669 + done; 2670 + nextc d; 2671 + read_ws d 2672 + 2673 + and skip_json_array d = 2674 + nextc d; 2675 + (* [ *) 2676 + read_ws d; 2677 + if d.u = 0x005D (* ] *) then ( 2678 + nextc d; 2679 + read_ws d) 2680 + else 2681 + let rec loop () = 2682 + skip_json_value d; 2683 + match d.u with 2684 + | 0x002C (* , *) -> 2685 + nextc d; 2686 + read_ws d; 2687 + loop () 2688 + | 0x005D (* ] *) -> 2689 + nextc d; 2690 + read_ws d 2691 + | fnd -> err_exp_comma_or_eoa d ~fnd 2692 + in 2693 + loop () 2694 + 2695 + and skip_json_object d = 2696 + nextc d; 2697 + (* { *) 2698 + read_ws d; 2699 + if d.u = 0x007D (* } *) then ( 2700 + nextc d; 2701 + read_ws d) 2702 + else 2703 + let rec loop () = 2704 + if d.u <> 0x0022 then err_exp_mem d; 2705 + skip_json_string d; 2706 + if d.u <> 0x003A (* : *) then err_exp_colon d; 2707 + nextc d; 2708 + read_ws d; 2709 + skip_json_value d; 2710 + match d.u with 2711 + | 0x002C (* , *) -> 2712 + nextc d; 2713 + read_ws d; 2714 + loop () 2715 + | 0x007D (* } *) -> 2716 + nextc d; 2717 + read_ws d 2718 + | _ -> err_exp_comma_or_eoo d 2719 + in 2720 + loop () 2721 + 2722 + let rec parse : type a. decoder -> a t -> a = 2723 + fun d t -> 2724 + match 2725 + read_ws d; 2726 + t 2727 + with 2728 + | Null map -> ( 2729 + match d.u with 2730 + | 0x006E (* n *) -> map.dec (read_json_null d) () 2731 + | _ -> fail_type_mismatch d t) 2732 + | Bool map -> ( 2733 + match d.u with 2734 + | 0x0066 (* f *) -> map.dec (read_json_false d) false 2735 + | 0x0074 (* t *) -> map.dec (read_json_true d) true 2736 + | _ -> fail_type_mismatch d t) 2737 + | Number map -> ( 2738 + match d.u with 2739 + | u when is_number_start u -> 2740 + let meta = read_json_number d in 2741 + map.dec meta (token_pop_float d ~meta) 2742 + | 0x006E (* n *) -> map.dec (read_json_null d) Float.nan 2743 + | _ -> fail_type_mismatch d t) 2744 + | String map -> ( 2745 + match d.u with 2746 + | 0x0022 (* DQUOTE *) -> 2747 + let meta = read_json_string d in 2748 + map.dec meta (token_pop d) 2749 + | _ -> fail_type_mismatch d t) 2750 + | Array map -> ( 2751 + match d.u with 2752 + | 0x005B (* [ *) -> decode_array d map 2753 + | _ -> fail_type_mismatch d t) 2754 + | Object map -> ( 2755 + match d.u with 2756 + | 0x007B (* { *) -> decode_object d map 2757 + | _ -> fail_type_mismatch d t) 2758 + | Map map -> map.dec (parse d map.dom) 2759 + | Any map -> decode_any d t map 2760 + | Rec t -> parse d (Lazy.force t) 2761 + | Ignore -> skip_json_value d 2762 + 2763 + and decode_array : type a elt b. decoder -> (a, elt, b) array_map -> a = 2764 + fun d map -> 2765 + let ws_before = ws_pop d in 2766 + let first_byte = last_byte_of d in 2767 + let first_line_num = d.line and first_line_byte = d.line_start in 2768 + let b, len = 2769 + match 2770 + nextc d; 2771 + read_ws d; 2772 + d.u 2773 + with 2774 + | 0x005D (* ] *) -> (map.dec_empty (), 0) 2775 + | _ -> ( 2776 + let b = ref (map.dec_empty ()) in 2777 + let i = ref 0 in 2778 + let next = ref true in 2779 + try 2780 + while !next do 2781 + begin 2782 + let first_byte = last_byte_of d in 2783 + let first_line_num = d.line and first_line_byte = d.line_start in 2784 + try 2785 + if map.dec_skip !i !b then parse d ignore 2786 + else b := map.dec_add !i (parse d map.elt) !b 2787 + with Error e -> 2788 + let imeta = 2789 + error_meta_to_current ~first_byte ~first_line_num 2790 + ~first_line_byte d 2791 + in 2792 + fail_push_array (error_meta d) map (!i, imeta) e 2793 + end; 2794 + incr i; 2795 + match 2796 + read_ws d; 2797 + d.u 2798 + with 2799 + | 0x005D (* ] *) -> next := false 2800 + | 0x002C (* , *) -> 2801 + nextc d; 2802 + read_ws d 2803 + | u when u = eot -> err_unclosed_array d 2804 + | fnd -> err_exp_comma_or_eoa d ~fnd 2805 + done; 2806 + (!b, !i) 2807 + with Error e -> 2808 + Error.adjust_context ~first_byte ~first_line_num ~first_line_byte e) 2809 + in 2810 + let loc = loc_to_current d ~first_byte ~first_line_num ~first_line_byte in 2811 + let ws_after = 2812 + nextc d; 2813 + read_ws d; 2814 + ws_pop d 2815 + in 2816 + let meta = meta_make d ~ws_before ~ws_after loc in 2817 + map.dec_finish meta len b 2818 + 2819 + and decode_object : type a. decoder -> (a, a) object_map -> a = 2820 + fun d map -> 2821 + let ws_before = ws_pop d in 2822 + let first_byte = last_byte_of d in 2823 + let first_line_num = d.line and first_line_byte = d.line_start in 2824 + let dict = 2825 + try 2826 + nextc d; 2827 + read_ws d; 2828 + decode_object_map d map (Unknown_mems None) String_map.empty 2829 + String_map.empty [] Dict.empty 2830 + with 2831 + | Error { ctx; meta; kind } when Loc.Context.is_empty ctx -> 2832 + let meta = 2833 + (* This is for when finish_object_decode raises. *) 2834 + if Loc.is_none (Meta.loc meta) then 2835 + error_meta_to_current d ~first_byte ~first_line_num ~first_line_byte 2836 + else meta 2837 + in 2838 + Error.fail ~ctx ~meta kind 2839 + | Error e -> 2840 + Error.adjust_context ~first_byte ~first_line_num ~first_line_byte e 2841 + in 2842 + let loc = loc_to_current d ~first_byte ~first_line_num ~first_line_byte in 2843 + let ws_after = 2844 + nextc d; 2845 + read_ws d; 2846 + ws_pop d 2847 + in 2848 + let meta = meta_make d ~ws_before ~ws_after loc in 2849 + let dict = Dict.add object_meta_arg meta dict in 2850 + apply_dict map.dec dict 2851 + 2852 + and decode_object_delayed : type o. 2853 + decoder -> 2854 + (o, o) object_map -> 2855 + mem_dec String_map.t -> 2856 + mem_dec String_map.t -> 2857 + object' -> 2858 + Dict.t -> 2859 + mem_dec String_map.t * object' * Dict.t = 2860 + fun d map mem_miss mem_decs delay dict -> 2861 + let rec loop d map mem_miss mem_decs rem_delay dict = function 2862 + | [] -> (mem_miss, rem_delay, dict) 2863 + | ((((name, _meta) as nm), v) as mem) :: delay -> ( 2864 + match String_map.find_opt name mem_decs with 2865 + | None -> loop d map mem_miss mem_decs (mem :: rem_delay) dict delay 2866 + | Some (Mem_dec m) -> 2867 + let dict = 2868 + try 2869 + let t = m.type' in 2870 + let v = 2871 + match decode t v with 2872 + | Ok v -> v 2873 + | Error e -> raise_notrace (Error e) 2874 + in 2875 + Dict.add m.id v dict 2876 + with Error e -> fail_push_object (error_meta d) map nm e 2877 + in 2878 + let mem_miss = String_map.remove name mem_miss in 2879 + loop d map mem_miss mem_decs rem_delay dict delay) 2880 + in 2881 + loop d map mem_miss mem_decs [] dict delay 2882 + 2883 + and decode_object_map : type o. 2884 + decoder -> 2885 + (o, o) object_map -> 2886 + unknown_mems_option -> 2887 + mem_dec String_map.t -> 2888 + mem_dec String_map.t -> 2889 + object' -> 2890 + Dict.t -> 2891 + Dict.t = 2892 + fun d map umems mem_miss mem_decs delay dict -> 2893 + let u _ _ _ = assert false in 2894 + let mem_miss = String_map.union u mem_miss map.mem_decs in 2895 + let mem_decs = String_map.union u mem_decs map.mem_decs in 2896 + match map.shape with 2897 + | Object_cases (umems', cases) -> 2898 + let umems' = Unknown_mems umems' in 2899 + let umems, dict = override_unknown_mems ~by:umems umems' dict in 2900 + decode_object_case d map umems cases mem_miss mem_decs delay dict 2901 + | Object_basic umems' -> ( 2902 + let mem_miss, delay, dict = 2903 + decode_object_delayed d map mem_miss mem_decs delay dict 2904 + in 2905 + let umems' = Unknown_mems (Some umems') in 2906 + let umems, dict = override_unknown_mems ~by:umems umems' dict in 2907 + match umems with 2908 + | Unknown_mems (Some Unknown_skip | None) -> 2909 + decode_object_basic d map Unknown_skip () mem_miss mem_decs dict 2910 + | Unknown_mems (Some (Unknown_error as u)) -> 2911 + if delay = [] then 2912 + decode_object_basic d map u () mem_miss mem_decs dict 2913 + else 2914 + let fnd = List.map fst delay in 2915 + fail_unexpected_members (error_meta d) map ~fnd 2916 + | Unknown_mems (Some (Unknown_keep (umap, _) as u)) -> 2917 + let add_delay umems (((n, meta) as nm), v) = 2918 + try 2919 + let t = umap.mems_type in 2920 + let v = 2921 + match decode t v with 2922 + | Ok v -> v 2923 + | Error e -> raise_notrace (Error e) 2924 + in 2925 + umap.dec_add meta n v umems 2926 + with Error e -> fail_push_object (error_meta d) map nm e 2927 + in 2928 + let umems = List.fold_left add_delay (umap.dec_empty ()) delay in 2929 + decode_object_basic d map u umems mem_miss mem_decs dict) 2930 + 2931 + and decode_object_basic : type o p mems builder. 2932 + decoder -> 2933 + (o, o) object_map -> 2934 + (p, mems, builder) unknown_mems -> 2935 + builder -> 2936 + mem_dec String_map.t -> 2937 + mem_dec String_map.t -> 2938 + Dict.t -> 2939 + Dict.t = 2940 + fun d map u umap mem_miss mem_decs dict -> 2941 + match d.u with 2942 + | 0x007D (* } *) -> 2943 + let meta = 2944 + d.meta_none 2945 + (* we add a correct one in decode_object *) 2946 + in 2947 + finish_object_decode map meta u umap mem_miss dict 2948 + | 0x0022 -> 2949 + let meta = read_json_name d in 2950 + (* Fast path: byte-compare the token buffer against [mem_decs] 2951 + keys without allocating. Only materialise the name as a 2952 + string if no match was found (for Unknown_keep paths and 2953 + error messages). *) 2954 + begin match mem_by_token d mem_decs with 2955 + | Some (Mem_dec mem, name) -> 2956 + token_clear d; 2957 + let mem_miss = String_map.remove name mem_miss in 2958 + let dict = 2959 + try Dict.add mem.id (parse d mem.type') dict 2960 + with Error e -> fail_push_object (error_meta d) map (name, meta) e 2961 + in 2962 + read_json_mem_sep d; 2963 + decode_object_basic d map u umap mem_miss mem_decs dict 2964 + | None -> ( 2965 + match u with 2966 + | Unknown_skip -> 2967 + (* The name is never read, so we don't need to allocate it. *) 2968 + token_clear d; 2969 + let () = 2970 + try parse d ignore 2971 + with Error e -> 2972 + fail_push_object (error_meta d) map (token_pop d, meta) e 2973 + in 2974 + read_json_mem_sep d; 2975 + decode_object_basic d map u umap mem_miss mem_decs dict 2976 + | Unknown_error -> 2977 + let name = token_pop d in 2978 + let fnd = [ (name, meta) ] in 2979 + fail_unexpected_members (error_meta d) map ~fnd 2980 + | Unknown_keep (umap', _) -> 2981 + let name = token_pop d in 2982 + let umap = 2983 + try umap'.dec_add meta name (parse d umap'.mems_type) umap 2984 + with Error e -> 2985 + fail_push_object (error_meta d) map (name, meta) e 2986 + in 2987 + read_json_mem_sep d; 2988 + decode_object_basic d map u umap mem_miss mem_decs dict) 2989 + end 2990 + | u when u = eot -> err_unclosed_object d map 2991 + | _ -> err_exp_mem_or_eoo d 2992 + 2993 + and decode_object_case : type o cases tag. 2994 + decoder -> 2995 + (o, o) object_map -> 2996 + unknown_mems_option -> 2997 + (o, cases, tag) object_cases -> 2998 + mem_dec String_map.t -> 2999 + mem_dec String_map.t -> 3000 + object' -> 3001 + Dict.t -> 3002 + Dict.t = 3003 + fun d map umems cases mem_miss mem_decs delay dict -> 3004 + let decode_case_tag ~sep map umems cases mem_miss mem_decs nmeta tag delay = 3005 + let eq_tag (Case c) = cases.tag_compare c.tag tag = 0 in 3006 + match List.find_opt eq_tag cases.cases with 3007 + | None -> ( 3008 + try fail_unexpected_case_tag (error_meta d) map cases tag 3009 + with Error e -> 3010 + fail_push_object (error_meta d) map (cases.tag.name, nmeta) e) 3011 + | Some (Case case) -> 3012 + if sep then read_json_mem_sep d; 3013 + let dict = 3014 + decode_object_map d case.object_map umems mem_miss mem_decs delay dict 3015 + in 3016 + Dict.add cases.id (case.dec (apply_dict case.object_map.dec dict)) dict 3017 + in 3018 + match d.u with 3019 + | 0x007D (* } *) -> ( 3020 + match cases.tag.dec_absent with 3021 + | Some tag -> 3022 + decode_case_tag ~sep:false map umems cases mem_miss mem_decs 3023 + d.meta_none tag delay 3024 + | None -> 3025 + let fnd = List.map (fun ((n, _), _) -> n) delay in 3026 + let exp = String_map.singleton cases.tag.name (Mem_dec cases.tag) in 3027 + fail_missing_members (error_meta d) map ~exp ~fnd) 3028 + | 0x0022 -> 3029 + let meta = read_json_name d in 3030 + let name = token_pop d in 3031 + if String.equal name cases.tag.name then 3032 + let tag = 3033 + try parse d cases.tag.type' 3034 + with Error e -> fail_push_object (error_meta d) map (name, meta) e 3035 + in 3036 + decode_case_tag ~sep:true map umems cases mem_miss mem_decs meta tag 3037 + delay 3038 + else 3039 + begin match String_map.find_opt name mem_decs with 3040 + | Some (Mem_dec mem) -> 3041 + let mem_miss = String_map.remove name mem_miss in 3042 + let dict = 3043 + try Dict.add mem.id (parse d mem.type') dict 3044 + with Error e -> 3045 + fail_push_object (error_meta d) map (name, meta) e 3046 + in 3047 + read_json_mem_sep d; 3048 + decode_object_case d map umems cases mem_miss mem_decs delay dict 3049 + | None -> 3050 + (* Because JSON can be out of order we don't know how to decode 3051 + this yet. Generic decode *) 3052 + let v = 3053 + try parse d Value.t 3054 + with Error e -> 3055 + fail_push_object (error_meta d) map (name, meta) e 3056 + in 3057 + let delay = ((name, meta), v) :: delay in 3058 + read_json_mem_sep d; 3059 + decode_object_case d map umems cases mem_miss mem_decs delay dict 3060 + end 3061 + | u when u = eot -> err_unclosed_object d map 3062 + | _ -> err_exp_mem_or_eoo d 3063 + 3064 + and decode_any : type a. decoder -> a t -> a any_map -> a = 3065 + fun d t map -> 3066 + let case d t map = 3067 + match map with None -> fail_type_mismatch d t | Some t -> parse d t 3068 + in 3069 + match d.u with 3070 + | 0x006E (* n *) -> case d t map.dec_null 3071 + | 0x0066 (* f *) | 0x0074 (* t *) -> case d t map.dec_bool 3072 + | 0x0022 (* DQUOTE *) -> case d t map.dec_string 3073 + | 0x005B (* [ *) -> case d t map.dec_array 3074 + | 0x007B (* { *) -> case d t map.dec_object 3075 + | u when is_number_start u -> case d t map.dec_number 3076 + | _ -> err_not_json_value d 3077 + 3078 + let of_reader_exn ?layout ?locs ?file t reader = 3079 + let d = decoder ?layout ?locs ?file reader in 3080 + let v = 3081 + nextc d; 3082 + parse d t 3083 + in 3084 + if d.u <> eot then err_exp_eot d else v 3085 + 3086 + let of_reader ?layout ?locs ?file t reader = 3087 + try Ok (of_reader_exn ?layout ?locs ?file t reader) with Error e -> Error e 3088 + 3089 + let of_string_exn ?layout ?locs ?file t s = 3090 + of_reader_exn ?layout ?locs ?file t (Bytes.Reader.of_string s) 3091 + 3092 + let of_string ?layout ?locs ?file t s = 3093 + of_reader ?layout ?locs ?file t (Bytes.Reader.of_string s) 3094 + 3095 + (* Encoding *) 3096 + 3097 + type encoder = { 3098 + writer : Bytes.Writer.t; (* Destination of bytes. *) 3099 + o : Bytes.t; (* Buffer for slices. *) 3100 + o_max : int; (* Max index in [o]. *) 3101 + mutable o_next : int; (* Next writable index in [o]. *) 3102 + preserve : bool; (* Emit [Meta.t] whitespace / layout. *) 3103 + indent : bool; (* Pretty-print with nested indentation. *) 3104 + number_format : string; 3105 + } 3106 + 3107 + let encoder ?buf ?indent ?(preserve = false) 3108 + ?(number_format = Ast.default_number_format) writer = 3109 + let indent = Option.is_some indent && not preserve in 3110 + let o = 3111 + match buf with 3112 + | Some buf -> buf 3113 + | None -> Bytes.create (Bytes.Writer.slice_length writer) 3114 + in 3115 + let len = Bytes.length o in 3116 + let number_format = string_of_format number_format in 3117 + let o_max = len - 1 and o_next = 0 in 3118 + { writer; o; o_max; o_next; preserve; indent; number_format } 3119 + 3120 + let[@inline] rem_len e = e.o_max - e.o_next + 1 3121 + 3122 + let flush e = 3123 + Bytes.Writer.write e.writer (Bytes.Slice.make e.o ~first:0 ~length:e.o_next); 3124 + e.o_next <- 0 3125 + 3126 + let write_eot ~eod e = 3127 + flush e; 3128 + if eod then Bytes.Writer.write_eod e.writer 3129 + 3130 + let write_char e c = 3131 + if e.o_next > e.o_max then flush e; 3132 + Stdlib.Bytes.set e.o e.o_next c; 3133 + e.o_next <- e.o_next + 1 3134 + 3135 + let rec write_substring e s first length = 3136 + if length = 0 then () 3137 + else 3138 + let len = Int.min (rem_len e) length in 3139 + if len = 0 then ( 3140 + flush e; 3141 + write_substring e s first length) 3142 + else begin 3143 + Bytes.blit_string s first e.o e.o_next len; 3144 + e.o_next <- e.o_next + len; 3145 + write_substring e s (first + len) (length - len) 3146 + end 3147 + 3148 + let write_bytes e s = write_substring e s 0 (String.length s) 3149 + let write_sep e = write_char e ',' 3150 + 3151 + let write_indent e ~nest = 3152 + for _i = 1 to nest do 3153 + write_char e ' '; 3154 + write_char e ' ' 3155 + done 3156 + 3157 + let write_ws_before e m = write_bytes e (Meta.ws_before m) 3158 + let write_ws_after e m = write_bytes e (Meta.ws_after m) 3159 + let write_json_null e = write_bytes e "null" 3160 + let write_json_bool e b = write_bytes e (if b then "true" else "false") 3161 + 3162 + (* XXX we bypass the printf machinery as it costs quite quite a bit. 3163 + Would be even better if we could format directly to a bytes values 3164 + rather than allocating a string per number. *) 3165 + external format_float : string -> float -> string = "caml_format_float" 3166 + 3167 + let write_json_number e f = 3168 + if Float.is_finite f then write_bytes e (format_float e.number_format f) 3169 + else write_json_null e 3170 + 3171 + let write_json_string e s = 3172 + let is_control = function '\x00' .. '\x1F' | '\x7F' -> true | _ -> false in 3173 + let len = String.length s in 3174 + let flush e start i max = 3175 + if start <= max then write_substring e s start (i - start) 3176 + in 3177 + let rec loop start i max = 3178 + if i > max then flush e start i max 3179 + else 3180 + let next = i + 1 in 3181 + match String.get s i with 3182 + | '\"' -> 3183 + flush e start i max; 3184 + write_bytes e "\\\""; 3185 + loop next next max 3186 + | '\\' -> 3187 + flush e start i max; 3188 + write_bytes e "\\\\"; 3189 + loop next next max 3190 + | '\n' -> 3191 + flush e start i max; 3192 + write_bytes e "\\n"; 3193 + loop next next max 3194 + | '\r' -> 3195 + flush e start i max; 3196 + write_bytes e "\\r"; 3197 + loop next next max 3198 + | '\t' -> 3199 + flush e start i max; 3200 + write_bytes e "\\t"; 3201 + loop next next max 3202 + | c when is_control c -> 3203 + flush e start i max; 3204 + write_bytes e "\\u"; 3205 + write_bytes e (Fmt.str "%04X" (Char.code c)); 3206 + loop next next max 3207 + | _ -> loop start next max 3208 + in 3209 + write_char e '"'; 3210 + loop 0 0 (len - 1); 3211 + write_char e '"' 3212 + 3213 + let encode_null (map : ('a, 'b) base_map) e v = 3214 + let () = map.enc v in 3215 + if not e.preserve then write_json_null e 3216 + else 3217 + let meta = map.enc_meta v in 3218 + write_ws_before e meta; 3219 + write_json_null e; 3220 + write_ws_after e meta 3221 + 3222 + let encode_bool (map : ('a, 'b) base_map) e v = 3223 + let b = map.enc v in 3224 + if not e.preserve then write_json_bool e b 3225 + else 3226 + let meta = map.enc_meta v in 3227 + write_ws_before e meta; 3228 + write_json_bool e b; 3229 + write_ws_after e meta 3230 + 3231 + let encode_number (map : ('a, 'b) base_map) e v = 3232 + let n = map.enc v in 3233 + if not e.preserve then write_json_number e n 3234 + else 3235 + let meta = map.enc_meta v in 3236 + write_ws_before e meta; 3237 + write_json_number e n; 3238 + write_ws_after e meta 3239 + 3240 + let encode_string (map : ('a, 'b) base_map) e v = 3241 + let s = map.enc v in 3242 + if not e.preserve then write_json_string e s 3243 + else 3244 + let meta = map.enc_meta v in 3245 + write_ws_before e meta; 3246 + write_json_string e s; 3247 + write_ws_after e meta 3248 + 3249 + let encode_mem_indent ~nest e = 3250 + write_char e '\n'; 3251 + write_indent e ~nest 3252 + 3253 + let encode_mem_name e meta n = 3254 + if e.preserve then ( 3255 + write_ws_before e meta; 3256 + write_json_string e n; 3257 + write_ws_after e meta; 3258 + write_char e ':') 3259 + else ( 3260 + write_json_string e n; 3261 + if e.indent then write_bytes e ": " else write_char e ':') 3262 + 3263 + let rec write : type a. nest:int -> a t -> encoder -> a -> unit = 3264 + fun ~nest t e v -> 3265 + match t with 3266 + | Null map -> encode_null map e v 3267 + | Bool map -> encode_bool map e v 3268 + | Number map -> encode_number map e v 3269 + | String map -> encode_string map e v 3270 + | Array map -> encode_array ~nest map e v 3271 + | Object map -> encode_object ~nest map e v 3272 + | Any map -> write ~nest (map.enc v) e v 3273 + | Map map -> write ~nest map.dom e (map.enc v) 3274 + | Rec t -> write ~nest (Lazy.force t) e v 3275 + | Ignore -> Error.failf Meta.none "Cannot encode Ignore value" 3276 + 3277 + and encode_array : type a elt b. 3278 + nest:int -> (a, elt, b) array_map -> encoder -> a -> unit = 3279 + fun ~nest map e v -> 3280 + let encode_element ~nest map e i v = 3281 + if i <> 0 then write_sep e; 3282 + try 3283 + write ~nest map.elt e v; 3284 + e 3285 + with Error e -> fail_push_array Meta.none map (i, Meta.none) e 3286 + in 3287 + if e.preserve then ( 3288 + let meta = map.enc_meta v in 3289 + write_ws_before e meta; 3290 + write_char e '['; 3291 + Stdlib.ignore (map.enc (encode_element ~nest:(nest + 1) map) e v); 3292 + write_char e ']'; 3293 + write_ws_after e meta) 3294 + else if e.indent then ( 3295 + let encode_element ~nest map e i v = 3296 + if i <> 0 then write_sep e; 3297 + write_char e '\n'; 3298 + write_indent e ~nest; 3299 + try 3300 + write ~nest map.elt e v; 3301 + e 3302 + with Error e -> fail_push_array Meta.none map (i, Meta.none) e 3303 + in 3304 + let array_not_empty e = 3305 + e.o_next = 0 || not (Bytes.get e.o (e.o_next - 1) = '[') 3306 + in 3307 + write_char e '['; 3308 + Stdlib.ignore (map.enc (encode_element ~nest:(nest + 1) map) e v); 3309 + if array_not_empty e then ( 3310 + write_char e '\n'; 3311 + write_indent e ~nest); 3312 + write_char e ']') 3313 + else ( 3314 + write_char e '['; 3315 + Stdlib.ignore (map.enc (encode_element ~nest:(nest + 1) map) e v); 3316 + write_char e ']') 3317 + 3318 + and encode_object : type o. 3319 + nest:int -> (o, o) object_map -> encoder -> o -> unit = 3320 + fun ~nest map e o -> 3321 + if e.preserve then ( 3322 + let meta = map.enc_meta o in 3323 + write_ws_before e meta; 3324 + write_char e '{'; 3325 + Stdlib.ignore 3326 + (encode_object_map ~nest:(nest + 1) map ~do_unknown:true e ~start:true o); 3327 + write_char e '}'; 3328 + write_ws_after e meta) 3329 + else if e.indent then ( 3330 + write_char e '{'; 3331 + let start = 3332 + encode_object_map ~nest:(nest + 1) map ~do_unknown:true e ~start:true o 3333 + in 3334 + if not start then ( 3335 + write_char e '\n'; 3336 + write_indent e ~nest); 3337 + write_char e '}') 3338 + else ( 3339 + write_char e '{'; 3340 + Stdlib.ignore 3341 + (encode_object_map ~nest:(nest + 1) map ~do_unknown:true e ~start:true o); 3342 + write_char e '}') 3343 + 3344 + and encode_object_map : type o. 3345 + nest:int -> 3346 + (o, o) object_map -> 3347 + do_unknown:bool -> 3348 + encoder -> 3349 + start:bool -> 3350 + o -> 3351 + bool = 3352 + fun ~nest map ~do_unknown e ~start o -> 3353 + let encode_mem ~nest map e o start (Mem_enc mmap) = 3354 + try 3355 + let v = mmap.enc o in 3356 + if mmap.enc_omit v then start 3357 + else begin 3358 + if not start then write_char e ','; 3359 + if e.indent then encode_mem_indent ~nest e; 3360 + let meta = 3361 + (* if e.preserve then mmap.enc_name_meta v else *) 3362 + Meta.none 3363 + in 3364 + encode_mem_name e meta mmap.name; 3365 + write ~nest mmap.type' e v; 3366 + false 3367 + end 3368 + with Error e -> fail_push_object Meta.none map (mmap.name, Meta.none) e 3369 + in 3370 + match map.shape with 3371 + | Object_basic u -> 3372 + let start = 3373 + List.fold_left (encode_mem ~nest map e o) start map.mem_encs 3374 + in 3375 + begin match u with 3376 + | Unknown_keep (umap, enc) when do_unknown -> 3377 + encode_unknown_mems ~nest map umap e ~start (enc o) 3378 + | _ -> start 3379 + end 3380 + | Object_cases (umap, cases) -> ( 3381 + let (Case_value (case, c)) = cases.enc_case (cases.enc o) in 3382 + let start = 3383 + if cases.tag.enc_omit case.tag then start 3384 + else encode_mem ~nest map e case.tag start (Mem_enc cases.tag) 3385 + in 3386 + let start = 3387 + List.fold_left (encode_mem ~nest map e o) start map.mem_encs 3388 + in 3389 + match umap with 3390 + | Some (Unknown_keep (umap, enc)) -> 3391 + let start = 3392 + encode_object_map ~nest case.object_map ~do_unknown:false e ~start c 3393 + in 3394 + encode_unknown_mems ~nest map umap e ~start (enc o) 3395 + | _ -> encode_object_map ~nest case.object_map ~do_unknown e ~start c) 3396 + 3397 + and encode_unknown_mems : type o mems a builder. 3398 + nest:int -> 3399 + (o, o) object_map -> 3400 + (mems, a, builder) mems_map -> 3401 + encoder -> 3402 + start:bool -> 3403 + mems -> 3404 + bool = 3405 + fun ~nest map umap e ~start mems -> 3406 + let encode_unknown_mem ~nest map umap e meta n v start = 3407 + try 3408 + if not start then write_char e ','; 3409 + if e.indent then encode_mem_indent ~nest e; 3410 + encode_mem_name e meta n; 3411 + write ~nest umap.mems_type e v; 3412 + false 3413 + with Error e -> fail_push_object Meta.none map (n, Meta.none) e 3414 + in 3415 + umap.enc (encode_unknown_mem ~nest map umap e) mems start 3416 + 3417 + let to_writer ?buf ?indent ?preserve ?number_format t v ~eod w = 3418 + let e = encoder ?buf ?indent ?preserve ?number_format w in 3419 + write ~nest:0 t e v; 3420 + write_eot ~eod e 3421 + 3422 + let to_string ?buf ?indent ?preserve ?number_format t v = 3423 + let b = Buffer.create 255 in 3424 + let w = Bytes.Writer.of_buffer b in 3425 + to_writer ?buf ?indent ?preserve ?number_format ~eod:true t v w; 3426 + Buffer.contents b 3427 + 3428 + module Stream = struct 3429 + let of_reader = of_reader 3430 + let of_reader_exn = of_reader_exn 3431 + let of_string = of_string 3432 + let of_string_exn = of_string_exn 3433 + let to_writer = to_writer 3434 + let to_string = to_string 3435 + end
+137 -93
lib/codec.mli
··· 264 264 val kind : 'a t -> string 265 265 (** [kind t] is the kind of the underlying map, see {!Json.kind}. *) 266 266 267 - val array_kinded_sort : ('a, 'elt, 'builder) array_map -> string 268 - (** [array_kinded_sort map] is like {!kinded_sort} but acts directly on the 269 - array [map]. *) 270 - 271 - val object_kinded_sort : ('o, 'dec) object_map -> string 272 - (** [object_kinded_sort map] is like {!kinded_sort} but acts directly on the 273 - object [map]. *) 274 - 275 - val pp_kind : string Fmt.t 276 - (** [pp_kind] formats kinds. *) 277 - 278 267 val doc : 'a t -> string 279 268 (** See {!Json.doc}. *) 280 269 281 270 val with_doc : ?kind:string -> ?doc:string -> 'a t -> 'a t 282 271 (** See {!Json.with_doc}. *) 283 272 284 - (** {1:errors Errors} *) 273 + (** {1:private Internal runtime} 285 274 286 - val fail_push_array : 287 - Meta.t -> ('array, 'elt, 'builder) array_map -> int node -> Error.t -> 'a 288 - (** [fail_push_array] is like {!Error.fail_push_array} but uses the given array 289 - [meta] and array map to caracterize the context. *) 275 + The contents of {!Private} are the primitives used by the bytesrw parser and 276 + the codec runtime. They are not part of the stable public API; do not depend 277 + on them from application code. *) 290 278 291 - val fail_push_object : 292 - Meta.t -> ('o, 'dec) object_map -> string node -> Error.t -> 'a 293 - (** [fail_push_object] is like {!Error.fail_push_object} but uses the given 294 - object [meta] and object map to caracterize the context. *) 279 + type unknown_mems_option = 280 + | Unknown_mems : 281 + ('o, 'mems, 'builder) unknown_mems option 282 + -> unknown_mems_option 283 + (** Internal. Used by {!Private.override_unknown_mems}. *) 295 284 296 - val fail_type_mismatch : Meta.t -> 'a t -> fnd:Sort.t -> 'b 297 - (** [fail_type_mismatch meta t ~fnd] errors when the kind expected by codec [t] 298 - does not match the actually-parsed sort [fnd]. *) 285 + module Private : sig 286 + val array_kinded_sort : ('a, 'elt, 'builder) array_map -> string 287 + (** [array_kinded_sort map] is like {!kinded_sort} but acts directly on the 288 + array [map]. *) 299 289 300 - val fail_missing_members : 301 - Meta.t -> 302 - ('o, 'o) object_map -> 303 - exp:mem_dec String_map.t -> 304 - fnd:string list -> 305 - 'a 306 - (** [fail_missing_members m map exp fnd] errors when [exp] cannot be found, 307 - [fnd] can list a few members that were found. *) 290 + val object_kinded_sort : ('o, 'dec) object_map -> string 291 + (** [object_kinded_sort map] is like {!kinded_sort} but acts directly on the 292 + object [map]. *) 308 293 309 - val fail_unexpected_members : 310 - Meta.t -> ('o, 'o) object_map -> fnd:(string * Meta.t) list -> 'a 311 - (** [fail_unexpected_members meta map ~fnd] errors when [fnd] are unexpected 312 - members for object [map]. *) 294 + val pp_kind : string Fmt.t 295 + (** [pp_kind] formats kinds. *) 313 296 314 - val fail_unexpected_case_tag : 315 - Meta.t -> ('o, 'o) object_map -> ('o, 'd, 'tag) object_cases -> 'tag -> 'a 316 - (** [fail_unexpected_case_tag meta map cases tag] is when a [tag] of a case 317 - member has no corresponding case. *) 297 + val pp_code : string Fmt.t 298 + (** [pp_code] formats strings like code (in bold). *) 318 299 319 - (** {1:toolbox Processor toolbox} *) 300 + (** {1:errors Errors} *) 320 301 321 - val object_meta_arg : Meta.t Type.Id.t 322 - (** [object_meta_arg] is the type identifier used to thread an object's 323 - {!Meta.t} through an object map decode via {!Dict.t}. *) 302 + val fail_push_array : 303 + Meta.t -> ('array, 'elt, 'builder) array_map -> int node -> Error.t -> 'a 304 + (** [fail_push_array] is like {!Error.fail_push_array} but uses the given 305 + array [meta] and array map to caracterize the context. *) 324 306 325 - (** Heterogeneous dictionaries. *) 326 - module Dict : sig 327 - type binding = 328 - | B : 'a Type.Id.t * 'a -> binding 329 - (** The type for bindings, packing a type identifier and a value. *) 307 + val fail_push_object : 308 + Meta.t -> ('o, 'dec) object_map -> string node -> Error.t -> 'a 309 + (** [fail_push_object] is like {!Error.fail_push_object} but uses the given 310 + object [meta] and object map to caracterize the context. *) 330 311 331 - type t 332 - (** The type for dictionaries keyed by {!Type.Id}. *) 312 + val fail_type_mismatch : Meta.t -> 'a t -> fnd:Sort.t -> 'b 313 + (** [fail_type_mismatch meta t ~fnd] errors when the kind expected by codec 314 + [t] does not match the actually-parsed sort [fnd]. *) 333 315 334 - val empty : t 335 - (** [empty] is the empty dictionary. *) 316 + val fail_missing_members : 317 + Meta.t -> 318 + ('o, 'o) object_map -> 319 + exp:mem_dec String_map.t -> 320 + fnd:string list -> 321 + 'a 322 + (** [fail_missing_members m map exp fnd] errors when [exp] cannot be found, 323 + [fnd] can list a few members that were found. *) 336 324 337 - val mem : 'a Type.Id.t -> t -> bool 338 - (** [mem id d] is [true] iff [d] contains a binding for [id]. *) 325 + val fail_unexpected_members : 326 + Meta.t -> ('o, 'o) object_map -> fnd:(string * Meta.t) list -> 'a 327 + (** [fail_unexpected_members meta map ~fnd] errors when [fnd] are unexpected 328 + members for object [map]. *) 339 329 340 - val add : 'a Type.Id.t -> 'a -> t -> t 341 - (** [add id v d] binds [id] to [v] in [d], shadowing any previous binding. *) 330 + val fail_unexpected_case_tag : 331 + Meta.t -> ('o, 'o) object_map -> ('o, 'd, 'tag) object_cases -> 'tag -> 'a 332 + (** [fail_unexpected_case_tag meta map cases tag] is when a [tag] of a case 333 + member has no corresponding case. *) 342 334 343 - val remove : 'a Type.Id.t -> t -> t 344 - (** [remove id d] is [d] with the binding for [id] removed. *) 335 + (** {1:toolbox Processor toolbox} *) 345 336 346 - val find : 'a Type.Id.t -> t -> 'a option 347 - (** [find id d] is the value bound to [id] in [d], if any. *) 348 - end 337 + val object_meta_arg : Meta.t Type.Id.t 338 + (** [object_meta_arg] is the type identifier used to thread an object's 339 + {!Meta.t} through an object map decode via {!Dict.t}. *) 349 340 350 - val apply_dict : ('ret, 'f) dec_fun -> Dict.t -> 'f 351 - (** [apply_dict dec dict] applies [dict] to [f] in order to get the value ['f]. 352 - Raises [Invalid_argument] if [dict] has not all the type identifiers that 353 - [dec] needs. *) 341 + (** Heterogeneous dictionaries. *) 342 + module Dict : sig 343 + type binding = B : 'a Type.Id.t * 'a -> binding 344 + type t 354 345 355 - type unknown_mems_option = 356 - | Unknown_mems : 357 - ('o, 'mems, 'builder) unknown_mems option 358 - -> unknown_mems_option 359 - (** A type for hiding an optional {!type-unknown_mems} values. *) 346 + val empty : t 347 + val mem : 'a Type.Id.t -> t -> bool 348 + val add : 'a Type.Id.t -> 'a -> t -> t 349 + val remove : 'a Type.Id.t -> t -> t 350 + val find : 'a Type.Id.t -> t -> 'a option 351 + end 360 352 361 - val override_unknown_mems : 362 - by:unknown_mems_option -> 363 - unknown_mems_option -> 364 - Dict.t -> 365 - unknown_mems_option * Dict.t 366 - (** [override_unknown_mems ~by current dict] preforms the unknown member 367 - overriding logic for {!Json.Object.Case} objects. In particular if [current] 368 - is a {!Json.Object.Mems.val-map} it adds an empty one in [dict] so that the 369 - associated decoding function does not fail. *) 353 + val apply_dict : ('ret, 'f) dec_fun -> Dict.t -> 'f 354 + (** [apply_dict dec dict] applies [dict] to [f]. *) 370 355 371 - val finish_object_decode : 372 - ('o, 'o) object_map -> 373 - Meta.t -> 374 - ('p, 'mems, 'builder) unknown_mems -> 375 - 'builder -> 376 - mem_dec String_map.t -> 377 - Dict.t -> 378 - Dict.t 379 - (** [finish_object_decode map meta unknown_mems umap rem_mems dict] finishes an 380 - object map [map] decode. It adds the [umap] (if needed) to [dict], it adds 381 - [meta] to [dict] under {!object_meta_arg} and tries to find andd default 382 - values to [dict] for [rem_mems] (and errors if it can't). *) 356 + val override_unknown_mems : 357 + by:unknown_mems_option -> 358 + unknown_mems_option -> 359 + Dict.t -> 360 + unknown_mems_option * Dict.t 361 + (** [override_unknown_mems ~by current dict] performs the unknown-member 362 + overriding logic for case objects. *) 383 363 384 - val pp_code : string Fmt.t 385 - (** [pp_code] formats strings like code (in bold). *) 364 + val finish_object_decode : 365 + ('o, 'o) object_map -> 366 + Meta.t -> 367 + ('p, 'mems, 'builder) unknown_mems -> 368 + 'builder -> 369 + mem_dec String_map.t -> 370 + Dict.t -> 371 + Dict.t 372 + (** [finish_object_decode map meta unknown_mems umap rem_mems dict] finishes 373 + an object-map decode. *) 374 + end 386 375 387 376 (* ======================================================================== 388 377 Codec combinators (moved from json.mli). ··· 1249 1238 1250 1239 val encode_exn : 'a t -> 'a -> value 1251 1240 (** [encode_exn] is like {!val-encode} but raises the exception {!Error}. *) 1241 + 1242 + (** {1:stream Byte-stream I/O} 1243 + 1244 + Scoped under [Stream] so [open Codec] doesn't shadow callers' own 1245 + [of_string] / [to_string]. Top-level users should prefer {!Json.of_string} 1246 + and friends. *) 1247 + 1248 + module Stream : sig 1249 + val of_reader : 1250 + ?layout:bool -> 1251 + ?locs:bool -> 1252 + ?file:string -> 1253 + 'a t -> 1254 + Bytesrw.Bytes.Reader.t -> 1255 + ('a, Error.t) result 1256 + 1257 + val of_reader_exn : 1258 + ?layout:bool -> 1259 + ?locs:bool -> 1260 + ?file:string -> 1261 + 'a t -> 1262 + Bytesrw.Bytes.Reader.t -> 1263 + 'a 1264 + 1265 + val of_string : 1266 + ?layout:bool -> 1267 + ?locs:bool -> 1268 + ?file:string -> 1269 + 'a t -> 1270 + string -> 1271 + ('a, Error.t) result 1272 + 1273 + val of_string_exn : 1274 + ?layout:bool -> ?locs:bool -> ?file:string -> 'a t -> string -> 'a 1275 + 1276 + val to_writer : 1277 + ?buf:Bytes.t -> 1278 + ?indent:int -> 1279 + ?preserve:bool -> 1280 + ?number_format:number_format -> 1281 + 'a t -> 1282 + 'a -> 1283 + eod:bool -> 1284 + Bytesrw.Bytes.Writer.t -> 1285 + unit 1286 + 1287 + val to_string : 1288 + ?buf:Bytes.t -> 1289 + ?indent:int -> 1290 + ?preserve:bool -> 1291 + ?number_format:number_format -> 1292 + 'a t -> 1293 + 'a -> 1294 + string 1295 + end
+18 -1498
lib/json.ml
··· 17 17 exception Error = Loc.Error 18 18 19 19 module Error = Error 20 - 21 - (* Public alias for codecs. *) 22 - 23 20 module Codec = Codec 24 21 25 22 type 'a codec = 'a Codec.t 26 - 27 - (* Generic JSON AST — lifted from the internal Value module. *) 28 - 29 23 type name = Value.name 30 24 type member = Value.member 31 25 type object' = Value.object' ··· 41 35 type number_format = Value.number_format 42 36 43 37 let pp = Value.pp 44 - 45 - (* Codec combinators and low-level representation. This module re-exports 46 - everything from [Codec] and adds the public combinator surface, 47 - [Base]/[Array]/[Object] sub-submodules, and [Value] codecs. *) 48 - 49 - (* Top-level wrappers over generic-value decode / encode / recode. *) 50 - 51 - let decode t j = Codec.decode t j 52 - let decode_exn t j = Codec.decode_exn t j 53 - let encode t v = Codec.encode t v 54 - let encode_exn t v = Codec.encode_exn t v 38 + let decode = Codec.decode 39 + let decode_exn = Codec.decode_exn 40 + let encode = Codec.encode 41 + let encode_exn = Codec.encode_exn 55 42 56 43 let pp_value ?(number_format = Value.default_number_format) t () ppf v = 57 44 match encode t v with 58 45 | Ok j -> Value.pp' number_format ppf j 59 46 | Error e -> Value.pp_string ppf (Error.to_string e) 60 47 61 - (* Formatting *) 62 - 63 - (* Internal representation of the encoder's whitespace strategy, derived 64 - from the public ?indent / ?preserve arguments via [format_of_args]. *) 65 - type format = Minify | Indent | Layout 66 - 67 - let format_of_args ~indent ~preserve = 68 - if preserve then Layout 69 - else match indent with None -> Minify | Some _ -> Indent 70 - 71 - (* Tape *) 48 + let of_reader = Codec.Stream.of_reader 49 + let of_reader_exn = Codec.Stream.of_reader_exn 50 + let of_string = Codec.Stream.of_string 51 + let of_string_exn = Codec.Stream.of_string_exn 52 + let to_writer = Codec.Stream.to_writer 53 + let to_string = Codec.Stream.to_string 72 54 73 55 module Tape = Tape 74 - (*--------------------------------------------------------------------------- 75 - Copyright (c) 2024 The jsont programmers. All rights reserved. 76 - SPDX-License-Identifier: ISC 77 - ---------------------------------------------------------------------------*) 78 - 79 - open Bytesrw 80 - open Codec 81 - 82 - (* XXX add these things to Stdlib.Uchar *) 83 - 84 - let uchar_max_utf8_bytes = 4 85 - 86 - let[@inline] uchar_utf8_decode_length = function 87 - | '\x00' .. '\x7F' -> 1 88 - | '\x80' .. '\xC1' -> 0 89 - | '\xC2' .. '\xDF' -> 2 90 - | '\xE0' .. '\xEF' -> 3 91 - | '\xF0' .. '\xF4' -> 4 92 - | _ -> 0 93 - 94 - (* Character classes *) 95 - 96 - let[@inline] is_digit u = 0x0030 (* 0 *) <= u && u <= 0x0039 (* 9 *) 97 - let[@inline] is_number_start u = is_digit u || u = 0x002D (* - *) 98 - let[@inline] is_surrogate u = 0xD800 <= u && u <= 0xDFFF 99 - let[@inline] _is_hi_surrogate u = 0xD800 <= u && u <= 0xDBFF 100 - let[@inline] is_lo_surrogate u = 0xDC00 <= u && u <= 0xDFFF 101 - 102 - let[@inline] is_control u = 103 - (0x0000 <= u && u <= 0x001F) 104 - (* C0 control characters *) 105 - || u = 0x007F 106 - (* Delete *) 107 - || (0x0080 <= u && u <= 0x009F) 108 - (* C1 control characters *) 109 - || u = 0x2028 110 - (* Line separator *) || u = 0x2029 111 - (* Paragraph separator *) || u = 0x200E 112 - (* left-to-right mark *) || u = 0x200F (* right-to-left mark *) 113 - 114 - let sot = 0x1A0000 (* start of text U+10FFFF + 1 *) 115 - let eot = 0x1A0001 (* end of text U+10FFFF + 2 *) 116 - let pp_code = Codec.pp_code 117 - 118 - let pp_quchar ppf u = 119 - pp_code ppf 120 - @@ 121 - if u = sot then "start of text" 122 - else if u = eot then "end of text" 123 - else if is_control u || is_surrogate u then Fmt.str "U+%04X" u 124 - else 125 - let u = Uchar.of_int u in 126 - let b = Stdlib.Bytes.make (Uchar.utf_8_byte_length u) '\x00' in 127 - Stdlib.( 128 - Stdlib.ignore (Bytes.set_utf_8_uchar b 0 u); 129 - Bytes.unsafe_to_string b) 130 - 131 - (* A simple growable byte buffer used for token and whitespace 132 - accumulation. Raw [Bytes.t] access lets us compare buffer content 133 - against candidate keys without allocating an intermediate string. *) 134 - type tokbuf = { mutable bytes : Stdlib.Bytes.t; mutable len : int } 135 - 136 - let tokbuf_create n = { bytes = Stdlib.Bytes.create n; len = 0 } 137 - let[@inline] tokbuf_clear t = t.len <- 0 138 - 139 - let[@inline] tokbuf_ensure t need = 140 - let cap = Stdlib.Bytes.length t.bytes in 141 - if t.len + need > cap then ( 142 - let new_cap = max (cap * 2) (t.len + need) in 143 - let b = Stdlib.Bytes.create new_cap in 144 - Stdlib.Bytes.blit t.bytes 0 b 0 t.len; 145 - t.bytes <- b) 146 - 147 - let[@inline] tokbuf_add_char t c = 148 - tokbuf_ensure t 1; 149 - Stdlib.Bytes.unsafe_set t.bytes t.len c; 150 - t.len <- t.len + 1 151 - 152 - let[@inline] tokbuf_add_utf_8_uchar t u = 153 - let n = Uchar.utf_8_byte_length u in 154 - tokbuf_ensure t n; 155 - Stdlib.ignore (Stdlib.Bytes.set_utf_8_uchar t.bytes t.len u : int); 156 - t.len <- t.len + n 157 - 158 - let[@inline] tokbuf_contents t = Stdlib.Bytes.sub_string t.bytes 0 t.len 159 - 160 - (* Byte-compare buffer content to a string without allocating. *) 161 - let tokbuf_equal_string t s = 162 - let n = String.length s in 163 - if t.len <> n then false 164 - else 165 - let rec loop i = 166 - if i >= n then true 167 - else if Stdlib.Bytes.unsafe_get t.bytes i <> String.unsafe_get s i then 168 - false 169 - else loop (i + 1) 170 - in 171 - loop 0 172 - 173 - (* Decoder *) 174 - 175 - type decoder = { 176 - file : string; 177 - meta_none : Meta.t; (* A meta with just [file] therein. *) 178 - locs : bool; (* [true] if text locations should be computed. *) 179 - layout : bool; (* [true] if text layout should be kept. *) 180 - reader : Bytes.Reader.t; (* The source of bytes. *) 181 - mutable i : Stdlib.Bytes.t; (* Current input slice. *) 182 - mutable i_max : int; (* Maximum byte index in [i]. *) 183 - mutable i_next : int; (* Next byte index to read in [i]. *) 184 - overlap : Stdlib.Bytes.t; (* Buffer for overlapping decodes. *) 185 - mutable u : int; (* Current Unicode scalar value or sot or eot. *) 186 - mutable byte_count : int; (* Global byte count. *) 187 - mutable line : int; (* Current line number. *) 188 - mutable line_start : int; (* Current line global byte position. *) 189 - token : tokbuf; 190 - ws : tokbuf; (* Bufferizes whitespace when layout is [true]. *) 191 - } 192 - 193 - let decoder ?(locs = false) ?(layout = false) ?(file = "-") reader = 194 - let overlap = Stdlib.Bytes.create uchar_max_utf8_bytes in 195 - let token = tokbuf_create 255 and ws = tokbuf_create 255 in 196 - let meta_none = Meta.make (Loc.(set_file none) file) in 197 - { 198 - file; 199 - meta_none; 200 - locs; 201 - layout; 202 - reader; 203 - i = overlap (* overwritten by initial refill *); 204 - i_max = 0; 205 - i_next = 1 (* triggers an initial refill *); 206 - overlap; 207 - u = sot; 208 - byte_count = 0; 209 - line = 1; 210 - line_start = 0; 211 - token; 212 - ws; 213 - } 214 - 215 - (* Decoder positions *) 216 - 217 - let last_byte_of d = 218 - if d.u <= 0x7F then d.byte_count - 1 219 - else if d.u = sot || d.u = eot then d.byte_count 220 - else 221 - (* On multi-bytes uchars we want to point on the first byte. *) 222 - d.byte_count - Uchar.utf_8_byte_length (Uchar.of_int d.u) 223 - 224 - (* Decoder errors *) 225 - 226 - let[@inline] loc_of_pos d ~first_byte ~last_byte ~first_line_num 227 - ~first_line_byte ~last_line_num ~last_line_byte = 228 - Loc.make ~file:d.file ~first_byte ~last_byte ~first_line_num ~first_line_byte 229 - ~last_line_num ~last_line_byte 230 - 231 - let error_meta d = 232 - let first_byte = last_byte_of d in 233 - let first_line_num = d.line and first_line_byte = d.line_start in 234 - Meta.make 235 - @@ loc_of_pos d ~first_byte ~last_byte:first_byte ~first_line_num 236 - ~first_line_byte ~last_line_num:first_line_num 237 - ~last_line_byte:first_line_byte 238 - 239 - let error_meta_to_current ~first_byte ~first_line_num ~first_line_byte d = 240 - let last_byte = last_byte_of d in 241 - let last_line_num = d.line and last_line_byte = d.line_start in 242 - Meta.make 243 - @@ loc_of_pos d ~first_byte ~last_byte ~first_line_num ~first_line_byte 244 - ~last_line_num ~last_line_byte 245 - 246 - let err_here d fmt = Error.failf (error_meta d) fmt 247 - 248 - let err_to_here ~first_byte ~first_line_num ~first_line_byte d fmt = 249 - Error.failf 250 - (error_meta_to_current ~first_byte ~first_line_num ~first_line_byte d) 251 - fmt 252 - 253 - let err_malformed_utf_8 d = 254 - if d.i_next > d.i_max then 255 - err_here d "UTF-8 decoding error: unexpected end of bytes" 256 - else 257 - err_here d "UTF-8 decoding error: invalid byte %a" pp_code 258 - (Fmt.str "%x02x" (Bytes.get_uint8 d.i d.i_next)) 259 - 260 - let err_exp d = err_here d "Expected %a but found %a" 261 - let err_exp_while d = err_here d "Expected %a while parsing %a but found %a" 262 - let err_exp_eot d = err_exp d pp_quchar eot pp_quchar d.u 263 - let err_not_json_value d = err_exp d pp_code "JSON value" pp_quchar d.u 264 - 265 - let current_json_sort d = 266 - match d.u with 267 - | 0x0066 (* f *) | 0x0074 (* t *) -> Sort.Bool 268 - | 0x006E (* n *) -> Sort.Null 269 - | 0x007B (* { *) -> Sort.Object 270 - | 0x005B (* [ *) -> Sort.Array 271 - | 0x0022 (* DQUOTE *) -> Sort.String 272 - | u when is_number_start u -> Sort.Number 273 - | _ -> err_not_json_value d 274 - 275 - let fail_type_mismatch d t = 276 - Codec.fail_type_mismatch (error_meta d) t ~fnd:(current_json_sort d) 277 - 278 - (* Errors for constants *) 279 - 280 - let err_exp_in_const ~first_byte ~first_line_num ~first_line_byte d ~exp ~fnd 281 - ~const = 282 - err_to_here ~first_byte ~first_line_num ~first_line_byte d 283 - "Expected %a while parsing %a but found: %a" pp_quchar exp pp_code const 284 - pp_quchar fnd 285 - 286 - (* Errors for numbers *) 287 - 288 - let err_float_parse meta tok = 289 - Error.failf meta "Could not parse %S to a %a" tok pp_code "float" 290 - 291 - let err_exp_digit d = 292 - err_exp_while d pp_code "decimal digit" pp_code "number" pp_quchar d.u 293 - 294 - (* Errors for strings *) 295 - 296 - let err_exp_hex_digit d = 297 - err_exp_while d pp_code "hex digit" pp_code "character escape" pp_quchar d.u 298 - 299 - let err_exp_lo_surrogate d u = 300 - err_exp_while d pp_code "low surrogate" pp_code "character escape" pp_quchar u 301 - 302 - let err_unpaired_lo_surrogate d u = 303 - err_here d "Unpaired low surrogate %a in %a" pp_quchar u pp_code "string" 304 - 305 - let err_unpaired_hi_surrogate d u = 306 - err_here d "Unpaired high surrogate %a in %a" pp_quchar u pp_code "string" 307 - 308 - let err_exp_esc ~first_byte ~first_line_num ~first_line_byte d u = 309 - err_to_here ~first_byte ~first_line_num ~first_line_byte d 310 - "Expected %a while parsing %a found %a" pp_code "escape character" pp_code 311 - "escape" pp_quchar u 312 - 313 - let err_unclosed_string ~first_byte ~first_line_num ~first_line_byte d = 314 - err_to_here ~first_byte ~first_line_num ~first_line_byte d "Unclosed %a" 315 - pp_code "string" 316 - 317 - let err_illegal_ctrl_char ~first_byte ~first_line_num ~first_line_byte d = 318 - err_to_here ~first_byte ~first_line_num ~first_line_byte d 319 - "Illegal control character %a in %a" pp_quchar d.u pp_code "string" 320 - 321 - (* Errors for arrays *) 322 - 323 - let err_exp_comma_or_eoa d ~fnd = 324 - err_here d "Expected %a or %a after %a but found %a" pp_code "," pp_code "]" 325 - pp_code "array element" pp_quchar fnd 326 - 327 - let err_unclosed_array d = err_here d "Unclosed %a" pp_code "array" 328 - 329 - let err_exp_comma_or_eoo d = 330 - err_here d "Expected %a or %a after %a but found: %a" pp_code "," pp_code "}" 331 - pp_code "object member" pp_quchar d.u 332 - 333 - (* Errors for objects *) 334 - 335 - let err_exp_mem d = 336 - err_here d "Expected %a but found %a" pp_code "object member" pp_quchar d.u 337 - 338 - let err_exp_mem_or_eoo d = 339 - err_here d "Expected: %a or %a but found %a" pp_code "object member" pp_code 340 - "}" pp_quchar d.u 341 - 342 - let err_exp_colon d = 343 - err_here d "Expected %a after %a but found %a" pp_code ":" pp_code 344 - "member name" pp_quchar d.u 345 - 346 - let err_unclosed_object d (map : ('o, 'o) Codec.object_map) = 347 - err_here d "Unclosed %a" Codec.pp_kind (Codec.object_kinded_sort map) 348 - 349 - (* Decode next character in d.u *) 350 - 351 - let[@inline] is_eoslice d = d.i_next > d.i_max 352 - let[@inline] is_eod d = d.i_max = -1 (* Only happens on Slice.eod *) 353 - let[@inline] available d = d.i_max - d.i_next + 1 354 - 355 - let[@inline] set_slice d slice = 356 - d.i <- Bytes.Slice.bytes slice; 357 - d.i_next <- Bytes.Slice.first slice; 358 - d.i_max <- d.i_next + Bytes.Slice.length slice - 1 359 - 360 - let rec setup_overlap d start need = 361 - match need with 362 - | 0 -> 363 - let slice = 364 - match available d with 365 - | 0 -> Bytes.Reader.read d.reader 366 - | length -> Bytes.Slice.make d.i ~first:d.i_next ~length 367 - in 368 - d.i <- d.overlap; 369 - d.i_next <- 0; 370 - d.i_max <- start; 371 - slice 372 - | need -> 373 - if is_eoslice d then set_slice d (Bytes.Reader.read d.reader); 374 - if is_eod d then ( 375 - d.byte_count <- d.byte_count - start; 376 - err_malformed_utf_8 d); 377 - let available = available d in 378 - let take = Int.min need available in 379 - for i = 0 to take - 1 do 380 - Bytes.set d.overlap (start + i) (Bytes.get d.i (d.i_next + i)) 381 - done; 382 - d.i_next <- d.i_next + take; 383 - d.byte_count <- d.byte_count + take; 384 - setup_overlap d (start + take) (need - take) 385 - 386 - let rec nextc d = 387 - let a = available d in 388 - if a <= 0 then 389 - if is_eod d then d.u <- eot 390 - else ( 391 - set_slice d (Bytes.Reader.read d.reader); 392 - nextc d) 393 - else 394 - let b = Bytes.get d.i d.i_next in 395 - if a < uchar_max_utf8_bytes && a < uchar_utf8_decode_length b then begin 396 - let s = setup_overlap d 0 (uchar_utf8_decode_length b) in 397 - nextc d; 398 - set_slice d s 399 - end 400 - else 401 - d.u <- 402 - (match b with 403 - | ('\x00' .. '\x09' | '\x0B' | '\x0E' .. '\x7F') as u -> 404 - (* ASCII fast path *) 405 - d.i_next <- d.i_next + 1; 406 - d.byte_count <- d.byte_count + 1; 407 - Char.code u 408 - | '\x0D' (* CR *) -> 409 - d.i_next <- d.i_next + 1; 410 - d.byte_count <- d.byte_count + 1; 411 - d.line_start <- d.byte_count; 412 - d.line <- d.line + 1; 413 - 0x000D 414 - | '\x0A' (* LF *) -> 415 - d.i_next <- d.i_next + 1; 416 - d.byte_count <- d.byte_count + 1; 417 - d.line_start <- d.byte_count; 418 - if d.u <> 0x000D then d.line <- d.line + 1; 419 - 0x000A 420 - | _ -> 421 - let udec = Bytes.get_utf_8_uchar d.i d.i_next in 422 - if not (Uchar.utf_decode_is_valid udec) then err_malformed_utf_8 d 423 - else 424 - let u = Uchar.to_int (Uchar.utf_decode_uchar udec) in 425 - let ulen = Uchar.utf_decode_length udec in 426 - d.i_next <- d.i_next + ulen; 427 - d.byte_count <- d.byte_count + ulen; 428 - u) 429 - 430 - (* Decoder tokenizer *) 431 - 432 - let[@inline] token_clear d = tokbuf_clear d.token 433 - 434 - let[@inline] token_pop d = 435 - let t = tokbuf_contents d.token in 436 - token_clear d; 437 - t 438 - 439 - let[@inline] token_add d u = 440 - if u <= 0x7F then tokbuf_add_char d.token (Char.unsafe_chr u) 441 - else tokbuf_add_utf_8_uchar d.token (Uchar.unsafe_of_int u) 442 - 443 - (* Find a member in [mem_decs] whose key matches the current token 444 - buffer content byte-for-byte, without allocating a string. Returns 445 - the matching mem_dec together with the key string (owned by the 446 - map). Used as a fast-path for object member dispatch. *) 447 - let mem_by_token d mem_decs = 448 - let r = ref None in 449 - (try 450 - String_map.iter 451 - (fun k v -> 452 - if tokbuf_equal_string d.token k then begin 453 - r := Some (v, k); 454 - raise_notrace Exit 455 - end) 456 - mem_decs 457 - with Exit -> ()); 458 - !r 459 - 460 - let[@inline] accept d = 461 - token_add d d.u; 462 - nextc d 463 - 464 - let token_pop_float d ~meta = 465 - let token = token_pop d in 466 - match float_of_string_opt token with 467 - | Some f -> f 468 - | None -> err_float_parse meta token (* likely [assert false] *) 469 - 470 - (* Decoder layout and position tracking *) 471 - 472 - let[@inline] ws_pop d = 473 - if not d.layout then "" 474 - else 475 - let t = tokbuf_contents d.ws in 476 - tokbuf_clear d.ws; 477 - t 478 - 479 - let loc_to_current ~first_byte ~first_line_num ~first_line_byte d = 480 - if not d.locs then Loc.none 481 - else 482 - let last_byte = last_byte_of d in 483 - let last_line_num = d.line and last_line_byte = d.line_start in 484 - loc_of_pos d ~first_byte ~last_byte ~first_line_num ~first_line_byte 485 - ~last_line_num ~last_line_byte 486 - 487 - let loc_prev_ascii_char ~first_byte ~first_line_num ~first_line_byte d = 488 - (* N.B. when we call that the line doesn't move and the char was on 489 - a single byte *) 490 - if not d.locs then Loc.none 491 - else 492 - let last_byte = last_byte_of d - 1 in 493 - let last_line_num = d.line and last_line_byte = d.line_start in 494 - loc_of_pos d ~first_byte ~last_byte ~first_line_num ~first_line_byte 495 - ~last_line_num ~last_line_byte 496 - 497 - let meta_make d ?ws_before ?ws_after loc = 498 - if (not d.locs) && not d.layout then d.meta_none 499 - else Meta.make ?ws_before ?ws_after loc 500 - 501 - (* Decoding *) 502 - 503 - let false_uchars = [| 0x0066; 0x0061; 0x006C; 0x0073; 0x0065 |] 504 - let true_uchars = [| 0x0074; 0x0072; 0x0075; 0x0065 |] 505 - let null_uchars = [| 0x006E; 0x0075; 0x006C; 0x006C |] 506 - 507 - let ascii_str us = 508 - String.init (Stdlib.Array.length us) (fun i -> 509 - Char.chr (Stdlib.Array.get us i)) 510 - 511 - let[@inline] is_ws u = 512 - if u > 0x20 then false 513 - else 514 - match Char.unsafe_chr u with ' ' | '\t' | '\r' | '\n' -> true | _ -> false 515 - 516 - let[@inline] read_ws d = 517 - while is_ws d.u do 518 - if d.layout then tokbuf_add_char d.ws (Char.unsafe_chr d.u); 519 - nextc d 520 - done 521 - 522 - let read_json_const d const = 523 - (* First character was checked. *) 524 - let ws_before = ws_pop d in 525 - let first_byte = last_byte_of d in 526 - let first_line_num = d.line and first_line_byte = d.line_start in 527 - for i = 1 to Stdlib.Array.length const - 1 do 528 - nextc d; 529 - let c = Stdlib.Array.get const i in 530 - if not (Int.equal d.u c) then 531 - err_exp_in_const ~first_byte ~first_line_num ~first_line_byte d ~exp:c 532 - ~fnd:d.u ~const:(ascii_str const) 533 - done; 534 - let loc = loc_to_current d ~first_byte ~first_line_num ~first_line_byte in 535 - let ws_after = 536 - nextc d; 537 - read_ws d; 538 - ws_pop d 539 - in 540 - meta_make d ~ws_before ~ws_after loc 541 - 542 - let[@inline] read_json_false d = read_json_const d false_uchars 543 - let[@inline] read_json_true d = read_json_const d true_uchars 544 - let[@inline] read_json_null d = read_json_const d null_uchars 545 - 546 - let read_json_number d = 547 - (* [is_number_start d.u] = true *) 548 - let[@inline] read_digits d = 549 - while is_digit d.u do 550 - accept d 551 - done 552 - in 553 - let[@inline] read_int d = 554 - match d.u with 555 - | 0x0030 (* 0 *) -> accept d 556 - | u when is_digit u -> 557 - accept d; 558 - read_digits d 559 - | _ -> err_exp_digit d 560 - in 561 - let[@inline] read_opt_frac d = 562 - match d.u with 563 - | 0x002E (* . *) -> 564 - accept d; 565 - if is_digit d.u then read_digits d else err_exp_digit d 566 - | _ -> () 567 - in 568 - let[@inline] read_opt_exp d = 569 - match d.u with 570 - | 0x0065 (* e *) | 0x0045 (* E *) -> 571 - token_add d d.u; 572 - nextc d; 573 - (match d.u with 574 - | 0x002D (* - *) | 0x002B (* + *) -> 575 - token_add d d.u; 576 - nextc d 577 - | _ -> ()); 578 - if is_digit d.u then read_digits d else err_exp_digit d 579 - | _ -> () 580 - in 581 - let first_byte = last_byte_of d in 582 - let first_line_num = d.line and first_line_byte = d.line_start in 583 - let ws_before = ws_pop d in 584 - token_clear d; 585 - if d.u = 0x002D (* - *) then accept d; 586 - read_int d; 587 - read_opt_frac d; 588 - read_opt_exp d; 589 - let loc = 590 - loc_prev_ascii_char d ~first_byte ~first_line_num ~first_line_byte 591 - in 592 - let ws_after = 593 - read_ws d; 594 - ws_pop d 595 - in 596 - meta_make d ~ws_before ~ws_after loc 597 - 598 - let read_json_string d = 599 - (* d.u is 0x0022 *) 600 - let first_byte = last_byte_of d in 601 - let first_line_num = d.line and first_line_byte = d.line_start in 602 - let rec read_uescape d hi uc count = 603 - if count > 0 then 604 - match d.u with 605 - | u when 0x0030 <= u && u <= 0x0039 -> 606 - nextc d; 607 - read_uescape d hi ((uc * 16) + u - 0x30) (count - 1) 608 - | u when 0x0041 <= u && u <= 0x0046 -> 609 - nextc d; 610 - read_uescape d hi ((uc * 16) + u - 0x37) (count - 1) 611 - | u when 0x0061 <= u && u <= 0x0066 -> 612 - nextc d; 613 - read_uescape d hi ((uc * 16) + u - 0x57) (count - 1) 614 - | _ -> err_exp_hex_digit d 615 - else 616 - match hi with 617 - | Some hi -> 618 - (* combine high and low surrogate. *) 619 - if not (is_lo_surrogate uc) then err_exp_lo_surrogate d uc 620 - else 621 - let u = (((hi land 0x3FF) lsl 10) lor (uc land 0x3FF)) + 0x10000 in 622 - token_add d u 623 - | None -> 624 - if not (is_surrogate uc) then token_add d uc 625 - else if uc > 0xDBFF then err_unpaired_lo_surrogate d uc 626 - else if d.u <> 0x005C (* \ *) then err_unpaired_hi_surrogate d uc 627 - else ( 628 - nextc d; 629 - if d.u <> 0x0075 (* u *) then err_unpaired_hi_surrogate d uc 630 - else ( 631 - nextc d; 632 - read_uescape d (Some uc) 0 4)) 633 - in 634 - let read_escape d = 635 - match d.u with 636 - | 0x0022 (* DQUOTE *) | 0x005C (* \ *) | 0x002F (* / *) -> accept d 637 - | 0x0062 (* b *) -> 638 - token_add d 0x0008 (* backspace *); 639 - nextc d 640 - | 0x0066 (* f *) -> 641 - token_add d 0x000C (* form feed *); 642 - nextc d 643 - | 0x006E (* n *) -> 644 - token_add d 0x000A (* line feed *); 645 - nextc d 646 - | 0x0072 (* r *) -> 647 - token_add d 0x000D (* carriage return *); 648 - nextc d 649 - | 0x0074 (* t *) -> 650 - token_add d 0x0009 (* tab *); 651 - nextc d 652 - | 0x0075 (* u *) -> 653 - nextc d; 654 - read_uescape d None 0 4 655 - | u -> err_exp_esc ~first_byte ~first_line_num ~first_line_byte d u 656 - in 657 - let rec loop d = 658 - match d.u with 659 - | 0x005C (* \ *) -> 660 - nextc d; 661 - read_escape d; 662 - loop d 663 - | 0x0022 (* DQUOTE *) -> () 664 - | u when u = eot -> 665 - err_unclosed_string ~first_byte ~first_line_num ~first_line_byte d 666 - | u when 0x0000 <= u && u <= 0x001F -> 667 - err_illegal_ctrl_char ~first_byte ~first_line_num ~first_line_byte d 668 - | _ -> 669 - accept d; 670 - loop d 671 - in 672 - let ws_before = ws_pop d in 673 - nextc d; 674 - token_clear d; 675 - loop d; 676 - let loc = loc_to_current d ~first_byte ~first_line_num ~first_line_byte in 677 - let ws_after = 678 - nextc d; 679 - read_ws d; 680 - ws_pop d 681 - in 682 - meta_make d ~ws_before ~ws_after loc 683 - 684 - let read_json_name d = 685 - let meta = read_json_string d in 686 - if d.u = 0x003A (* : *) then ( 687 - nextc d; 688 - meta) 689 - else err_exp_colon d 690 - 691 - let read_json_mem_sep d = 692 - if d.u = 0x007D (* } *) then () 693 - else if d.u = 0x002C (* , *) then ( 694 - nextc d; 695 - read_ws d; 696 - if d.u <> 0x0022 then err_exp_mem d) 697 - else err_exp_comma_or_eoo d 698 - 699 - (* Skip-parse a JSON value: advance past [d.u] at the byte level without 700 - materialising token buffers, parsing numbers, or decoding string 701 - escapes. The only decoding done is UTF-8 in [nextc]; escapes in 702 - strings are recognised only enough to not stop at a backslash-quote. *) 703 - let rec skip_json_value d = 704 - read_ws d; 705 - match d.u with 706 - | 0x007B (* { *) -> skip_json_object d 707 - | 0x005B (* [ *) -> skip_json_array d 708 - | 0x0022 (* DQUOTE *) -> skip_json_string d 709 - | 0x006E (* n *) -> Stdlib.ignore (read_json_null d) 710 - | 0x0074 (* t *) -> Stdlib.ignore (read_json_true d) 711 - | 0x0066 (* f *) -> Stdlib.ignore (read_json_false d) 712 - | u when is_number_start u -> skip_json_number d 713 - | _ -> err_not_json_value d 714 - 715 - and skip_json_string d = 716 - (* Byte-level scan for the closing quote; matches simdjson On-Demand 717 - semantics. Structural contract (bracket nesting, string termination) 718 - is enforced; content (escape correctness, exact hex digits after 719 - [\u]) is NOT validated. Consumers needing strict content 720 - validation should decode with [Json.json] and then discard rather 721 - than [Codec.ignore]. *) 722 - let done_ = ref false in 723 - while not !done_ do 724 - if d.i_next > d.i_max then 725 - if is_eod d then 726 - err_unclosed_string ~first_byte:0 ~first_line_num:Loc.line_num_none 727 - ~first_line_byte:Loc.byte_pos_none d 728 - else set_slice d (Bytes.Reader.read d.reader) 729 - else begin 730 - let b = Stdlib.Bytes.unsafe_get d.i d.i_next in 731 - d.i_next <- d.i_next + 1; 732 - d.byte_count <- d.byte_count + 1; 733 - match b with 734 - | '\\' -> 735 - if d.i_next > d.i_max then 736 - if is_eod d then 737 - err_unclosed_string ~first_byte:0 738 - ~first_line_num:Loc.line_num_none 739 - ~first_line_byte:Loc.byte_pos_none d 740 - else set_slice d (Bytes.Reader.read d.reader); 741 - d.i_next <- d.i_next + 1; 742 - d.byte_count <- d.byte_count + 1 743 - | '"' -> done_ := true 744 - | _ -> () 745 - end 746 - done; 747 - nextc d; 748 - read_ws d 749 - 750 - and skip_json_number d = 751 - (* Consume number-continuation characters; matches simdjson 752 - On-Demand. Structural number shape ([1..2], [+5], [1eE2]) is NOT 753 - validated here. *) 754 - let done_ = ref false in 755 - while not !done_ do 756 - if d.i_next > d.i_max then 757 - if is_eod d then done_ := true 758 - else set_slice d (Bytes.Reader.read d.reader) 759 - else 760 - match Stdlib.Bytes.unsafe_get d.i d.i_next with 761 - | '0' .. '9' | '-' | '+' | '.' | 'e' | 'E' -> 762 - d.i_next <- d.i_next + 1; 763 - d.byte_count <- d.byte_count + 1 764 - | _ -> done_ := true 765 - done; 766 - nextc d; 767 - read_ws d 768 - 769 - and skip_json_array d = 770 - nextc d; 771 - (* [ *) 772 - read_ws d; 773 - if d.u = 0x005D (* ] *) then ( 774 - nextc d; 775 - read_ws d) 776 - else 777 - let rec loop () = 778 - skip_json_value d; 779 - match d.u with 780 - | 0x002C (* , *) -> 781 - nextc d; 782 - read_ws d; 783 - loop () 784 - | 0x005D (* ] *) -> 785 - nextc d; 786 - read_ws d 787 - | fnd -> err_exp_comma_or_eoa d ~fnd 788 - in 789 - loop () 790 - 791 - and skip_json_object d = 792 - nextc d; 793 - (* { *) 794 - read_ws d; 795 - if d.u = 0x007D (* } *) then ( 796 - nextc d; 797 - read_ws d) 798 - else 799 - let rec loop () = 800 - if d.u <> 0x0022 then err_exp_mem d; 801 - skip_json_string d; 802 - if d.u <> 0x003A (* : *) then err_exp_colon d; 803 - nextc d; 804 - read_ws d; 805 - skip_json_value d; 806 - match d.u with 807 - | 0x002C (* , *) -> 808 - nextc d; 809 - read_ws d; 810 - loop () 811 - | 0x007D (* } *) -> 812 - nextc d; 813 - read_ws d 814 - | _ -> err_exp_comma_or_eoo d 815 - in 816 - loop () 817 - 818 - let rec parse : type a. decoder -> a t -> a = 819 - fun d t -> 820 - match 821 - read_ws d; 822 - t 823 - with 824 - | Null map -> ( 825 - match d.u with 826 - | 0x006E (* n *) -> map.dec (read_json_null d) () 827 - | _ -> fail_type_mismatch d t) 828 - | Bool map -> ( 829 - match d.u with 830 - | 0x0066 (* f *) -> map.dec (read_json_false d) false 831 - | 0x0074 (* t *) -> map.dec (read_json_true d) true 832 - | _ -> fail_type_mismatch d t) 833 - | Number map -> ( 834 - match d.u with 835 - | u when is_number_start u -> 836 - let meta = read_json_number d in 837 - map.dec meta (token_pop_float d ~meta) 838 - | 0x006E (* n *) -> map.dec (read_json_null d) Float.nan 839 - | _ -> fail_type_mismatch d t) 840 - | String map -> ( 841 - match d.u with 842 - | 0x0022 (* DQUOTE *) -> 843 - let meta = read_json_string d in 844 - map.dec meta (token_pop d) 845 - | _ -> fail_type_mismatch d t) 846 - | Array map -> ( 847 - match d.u with 848 - | 0x005B (* [ *) -> decode_array d map 849 - | _ -> fail_type_mismatch d t) 850 - | Object map -> ( 851 - match d.u with 852 - | 0x007B (* { *) -> decode_object d map 853 - | _ -> fail_type_mismatch d t) 854 - | Map map -> map.dec (parse d map.dom) 855 - | Any map -> decode_any d t map 856 - | Rec t -> parse d (Lazy.force t) 857 - | Ignore -> skip_json_value d 858 - 859 - and decode_array : type a elt b. decoder -> (a, elt, b) array_map -> a = 860 - fun d map -> 861 - let ws_before = ws_pop d in 862 - let first_byte = last_byte_of d in 863 - let first_line_num = d.line and first_line_byte = d.line_start in 864 - let b, len = 865 - match 866 - nextc d; 867 - read_ws d; 868 - d.u 869 - with 870 - | 0x005D (* ] *) -> (map.dec_empty (), 0) 871 - | _ -> ( 872 - let b = ref (map.dec_empty ()) in 873 - let i = ref 0 in 874 - let next = ref true in 875 - try 876 - while !next do 877 - begin 878 - let first_byte = last_byte_of d in 879 - let first_line_num = d.line and first_line_byte = d.line_start in 880 - try 881 - if map.dec_skip !i !b then parse d Codec.ignore 882 - else b := map.dec_add !i (parse d map.elt) !b 883 - with Error e -> 884 - let imeta = 885 - error_meta_to_current ~first_byte ~first_line_num 886 - ~first_line_byte d 887 - in 888 - Codec.fail_push_array (error_meta d) map (!i, imeta) e 889 - end; 890 - incr i; 891 - match 892 - read_ws d; 893 - d.u 894 - with 895 - | 0x005D (* ] *) -> next := false 896 - | 0x002C (* , *) -> 897 - nextc d; 898 - read_ws d 899 - | u when u = eot -> err_unclosed_array d 900 - | fnd -> err_exp_comma_or_eoa d ~fnd 901 - done; 902 - (!b, !i) 903 - with Error e -> 904 - Error.adjust_context ~first_byte ~first_line_num ~first_line_byte e) 905 - in 906 - let loc = loc_to_current d ~first_byte ~first_line_num ~first_line_byte in 907 - let ws_after = 908 - nextc d; 909 - read_ws d; 910 - ws_pop d 911 - in 912 - let meta = meta_make d ~ws_before ~ws_after loc in 913 - map.dec_finish meta len b 914 - 915 - and decode_object : type a. decoder -> (a, a) object_map -> a = 916 - fun d map -> 917 - let ws_before = ws_pop d in 918 - let first_byte = last_byte_of d in 919 - let first_line_num = d.line and first_line_byte = d.line_start in 920 - let dict = 921 - try 922 - nextc d; 923 - read_ws d; 924 - decode_object_map d map (Unknown_mems None) String_map.empty 925 - String_map.empty [] Dict.empty 926 - with 927 - | Error { ctx; meta; kind } when Loc.Context.is_empty ctx -> 928 - let meta = 929 - (* This is for when Codec.finish_object_decode raises. *) 930 - if Loc.is_none (Meta.loc meta) then 931 - error_meta_to_current d ~first_byte ~first_line_num ~first_line_byte 932 - else meta 933 - in 934 - Error.fail ~ctx ~meta kind 935 - | Error e -> 936 - Error.adjust_context ~first_byte ~first_line_num ~first_line_byte e 937 - in 938 - let loc = loc_to_current d ~first_byte ~first_line_num ~first_line_byte in 939 - let ws_after = 940 - nextc d; 941 - read_ws d; 942 - ws_pop d 943 - in 944 - let meta = meta_make d ~ws_before ~ws_after loc in 945 - let dict = Dict.add Codec.object_meta_arg meta dict in 946 - Codec.apply_dict map.dec dict 947 - 948 - and decode_object_delayed : type o. 949 - decoder -> 950 - (o, o) object_map -> 951 - mem_dec String_map.t -> 952 - mem_dec String_map.t -> 953 - object' -> 954 - Dict.t -> 955 - mem_dec String_map.t * object' * Dict.t = 956 - fun d map mem_miss mem_decs delay dict -> 957 - let rec loop d map mem_miss mem_decs rem_delay dict = function 958 - | [] -> (mem_miss, rem_delay, dict) 959 - | ((((name, _meta) as nm), v) as mem) :: delay -> ( 960 - match String_map.find_opt name mem_decs with 961 - | None -> loop d map mem_miss mem_decs (mem :: rem_delay) dict delay 962 - | Some (Mem_dec m) -> 963 - let dict = 964 - try 965 - let t = m.type' in 966 - let v = 967 - match Codec.decode t v with 968 - | Ok v -> v 969 - | Error e -> raise_notrace (Error e) 970 - in 971 - Dict.add m.id v dict 972 - with Error e -> Codec.fail_push_object (error_meta d) map nm e 973 - in 974 - let mem_miss = String_map.remove name mem_miss in 975 - loop d map mem_miss mem_decs rem_delay dict delay) 976 - in 977 - loop d map mem_miss mem_decs [] dict delay 978 - 979 - and decode_object_map : type o. 980 - decoder -> 981 - (o, o) object_map -> 982 - unknown_mems_option -> 983 - mem_dec String_map.t -> 984 - mem_dec String_map.t -> 985 - object' -> 986 - Dict.t -> 987 - Dict.t = 988 - fun d map umems mem_miss mem_decs delay dict -> 989 - let u _ _ _ = assert false in 990 - let mem_miss = String_map.union u mem_miss map.mem_decs in 991 - let mem_decs = String_map.union u mem_decs map.mem_decs in 992 - match map.shape with 993 - | Object_cases (umems', cases) -> 994 - let umems' = Unknown_mems umems' in 995 - let umems, dict = Codec.override_unknown_mems ~by:umems umems' dict in 996 - decode_object_case d map umems cases mem_miss mem_decs delay dict 997 - | Object_basic umems' -> ( 998 - let mem_miss, delay, dict = 999 - decode_object_delayed d map mem_miss mem_decs delay dict 1000 - in 1001 - let umems' = Unknown_mems (Some umems') in 1002 - let umems, dict = Codec.override_unknown_mems ~by:umems umems' dict in 1003 - match umems with 1004 - | Unknown_mems (Some Unknown_skip | None) -> 1005 - decode_object_basic d map Unknown_skip () mem_miss mem_decs dict 1006 - | Unknown_mems (Some (Unknown_error as u)) -> 1007 - if delay = [] then 1008 - decode_object_basic d map u () mem_miss mem_decs dict 1009 - else 1010 - let fnd = List.map fst delay in 1011 - Codec.fail_unexpected_members (error_meta d) map ~fnd 1012 - | Unknown_mems (Some (Unknown_keep (umap, _) as u)) -> 1013 - let add_delay umems (((n, meta) as nm), v) = 1014 - try 1015 - let t = umap.mems_type in 1016 - let v = 1017 - match Codec.decode t v with 1018 - | Ok v -> v 1019 - | Error e -> raise_notrace (Error e) 1020 - in 1021 - umap.dec_add meta n v umems 1022 - with Error e -> Codec.fail_push_object (error_meta d) map nm e 1023 - in 1024 - let umems = List.fold_left add_delay (umap.dec_empty ()) delay in 1025 - decode_object_basic d map u umems mem_miss mem_decs dict) 1026 - 1027 - and decode_object_basic : type o p mems builder. 1028 - decoder -> 1029 - (o, o) object_map -> 1030 - (p, mems, builder) unknown_mems -> 1031 - builder -> 1032 - mem_dec String_map.t -> 1033 - mem_dec String_map.t -> 1034 - Dict.t -> 1035 - Dict.t = 1036 - fun d map u umap mem_miss mem_decs dict -> 1037 - match d.u with 1038 - | 0x007D (* } *) -> 1039 - let meta = 1040 - d.meta_none 1041 - (* we add a correct one in decode_object *) 1042 - in 1043 - Codec.finish_object_decode map meta u umap mem_miss dict 1044 - | 0x0022 -> 1045 - let meta = read_json_name d in 1046 - (* Fast path: byte-compare the token buffer against [mem_decs] 1047 - keys without allocating. Only materialise the name as a 1048 - string if no match was found (for Unknown_keep paths and 1049 - error messages). *) 1050 - begin match mem_by_token d mem_decs with 1051 - | Some (Mem_dec mem, name) -> 1052 - token_clear d; 1053 - let mem_miss = String_map.remove name mem_miss in 1054 - let dict = 1055 - try Dict.add mem.id (parse d mem.type') dict 1056 - with Error e -> 1057 - Codec.fail_push_object (error_meta d) map (name, meta) e 1058 - in 1059 - read_json_mem_sep d; 1060 - decode_object_basic d map u umap mem_miss mem_decs dict 1061 - | None -> ( 1062 - match u with 1063 - | Unknown_skip -> 1064 - (* The name is never read, so we don't need to allocate it. *) 1065 - token_clear d; 1066 - let () = 1067 - try parse d Codec.ignore 1068 - with Error e -> 1069 - Codec.fail_push_object (error_meta d) map 1070 - (token_pop d, meta) 1071 - e 1072 - in 1073 - read_json_mem_sep d; 1074 - decode_object_basic d map u umap mem_miss mem_decs dict 1075 - | Unknown_error -> 1076 - let name = token_pop d in 1077 - let fnd = [ (name, meta) ] in 1078 - Codec.fail_unexpected_members (error_meta d) map ~fnd 1079 - | Unknown_keep (umap', _) -> 1080 - let name = token_pop d in 1081 - let umap = 1082 - try umap'.dec_add meta name (parse d umap'.mems_type) umap 1083 - with Error e -> 1084 - Codec.fail_push_object (error_meta d) map (name, meta) e 1085 - in 1086 - read_json_mem_sep d; 1087 - decode_object_basic d map u umap mem_miss mem_decs dict) 1088 - end 1089 - | u when u = eot -> err_unclosed_object d map 1090 - | _ -> err_exp_mem_or_eoo d 1091 - 1092 - and decode_object_case : type o cases tag. 1093 - decoder -> 1094 - (o, o) object_map -> 1095 - unknown_mems_option -> 1096 - (o, cases, tag) object_cases -> 1097 - mem_dec String_map.t -> 1098 - mem_dec String_map.t -> 1099 - object' -> 1100 - Dict.t -> 1101 - Dict.t = 1102 - fun d map umems cases mem_miss mem_decs delay dict -> 1103 - let decode_case_tag ~sep map umems cases mem_miss mem_decs nmeta tag delay = 1104 - let eq_tag (Case c) = cases.tag_compare c.tag tag = 0 in 1105 - match List.find_opt eq_tag cases.cases with 1106 - | None -> ( 1107 - try Codec.fail_unexpected_case_tag (error_meta d) map cases tag 1108 - with Error e -> 1109 - Codec.fail_push_object (error_meta d) map (cases.tag.name, nmeta) e) 1110 - | Some (Case case) -> 1111 - if sep then read_json_mem_sep d; 1112 - let dict = 1113 - decode_object_map d case.object_map umems mem_miss mem_decs delay dict 1114 - in 1115 - Dict.add cases.id (case.dec (apply_dict case.object_map.dec dict)) dict 1116 - in 1117 - match d.u with 1118 - | 0x007D (* } *) -> ( 1119 - match cases.tag.dec_absent with 1120 - | Some tag -> 1121 - decode_case_tag ~sep:false map umems cases mem_miss mem_decs 1122 - d.meta_none tag delay 1123 - | None -> 1124 - let fnd = List.map (fun ((n, _), _) -> n) delay in 1125 - let exp = String_map.singleton cases.tag.name (Mem_dec cases.tag) in 1126 - Codec.fail_missing_members (error_meta d) map ~exp ~fnd) 1127 - | 0x0022 -> 1128 - let meta = read_json_name d in 1129 - let name = token_pop d in 1130 - if String.equal name cases.tag.name then 1131 - let tag = 1132 - try parse d cases.tag.type' 1133 - with Error e -> 1134 - Codec.fail_push_object (error_meta d) map (name, meta) e 1135 - in 1136 - decode_case_tag ~sep:true map umems cases mem_miss mem_decs meta tag 1137 - delay 1138 - else 1139 - begin match String_map.find_opt name mem_decs with 1140 - | Some (Mem_dec mem) -> 1141 - let mem_miss = String_map.remove name mem_miss in 1142 - let dict = 1143 - try Dict.add mem.id (parse d mem.type') dict 1144 - with Error e -> 1145 - Codec.fail_push_object (error_meta d) map (name, meta) e 1146 - in 1147 - read_json_mem_sep d; 1148 - decode_object_case d map umems cases mem_miss mem_decs delay dict 1149 - | None -> 1150 - (* Because JSON can be out of order we don't know how to decode 1151 - this yet. Generic decode *) 1152 - let v = 1153 - try parse d Codec.Value.t 1154 - with Error e -> 1155 - Codec.fail_push_object (error_meta d) map (name, meta) e 1156 - in 1157 - let delay = ((name, meta), v) :: delay in 1158 - read_json_mem_sep d; 1159 - decode_object_case d map umems cases mem_miss mem_decs delay dict 1160 - end 1161 - | u when u = eot -> err_unclosed_object d map 1162 - | _ -> err_exp_mem_or_eoo d 1163 - 1164 - and decode_any : type a. decoder -> a t -> a any_map -> a = 1165 - fun d t map -> 1166 - let case d t map = 1167 - match map with None -> fail_type_mismatch d t | Some t -> parse d t 1168 - in 1169 - match d.u with 1170 - | 0x006E (* n *) -> case d t map.dec_null 1171 - | 0x0066 (* f *) | 0x0074 (* t *) -> case d t map.dec_bool 1172 - | 0x0022 (* DQUOTE *) -> case d t map.dec_string 1173 - | 0x005B (* [ *) -> case d t map.dec_array 1174 - | 0x007B (* { *) -> case d t map.dec_object 1175 - | u when is_number_start u -> case d t map.dec_number 1176 - | _ -> err_not_json_value d 1177 - 1178 - let of_reader_exn ?layout ?locs ?file t reader = 1179 - let d = decoder ?layout ?locs ?file reader in 1180 - let v = 1181 - nextc d; 1182 - parse d t 1183 - in 1184 - if d.u <> eot then err_exp_eot d else v 1185 - 1186 - let of_reader ?layout ?locs ?file t reader = 1187 - try Ok (of_reader_exn ?layout ?locs ?file t reader) with Error e -> Error e 1188 - 1189 - let of_string_exn ?layout ?locs ?file t s = 1190 - of_reader_exn ?layout ?locs ?file t (Bytes.Reader.of_string s) 1191 - 1192 - let of_string ?layout ?locs ?file t s = 1193 - of_reader ?layout ?locs ?file t (Bytes.Reader.of_string s) 1194 - 1195 - (* Encoding *) 1196 - 1197 - type encoder = { 1198 - writer : Bytes.Writer.t; (* Destination of bytes. *) 1199 - o : Bytes.t; (* Buffer for slices. *) 1200 - o_max : int; (* Max index in [o]. *) 1201 - mutable o_next : int; (* Next writable index in [o]. *) 1202 - format : format; 1203 - number_format : string; 1204 - } 1205 - 1206 - let encoder ?buf ?indent ?(preserve = false) 1207 - ?(number_format = Ast.default_number_format) writer = 1208 - let format = format_of_args ~indent ~preserve in 1209 - let o = 1210 - match buf with 1211 - | Some buf -> buf 1212 - | None -> Bytes.create (Bytes.Writer.slice_length writer) 1213 - in 1214 - let len = Bytes.length o in 1215 - let number_format = string_of_format number_format in 1216 - let o_max = len - 1 and o_next = 0 in 1217 - { writer; o; o_max; o_next; format; number_format } 1218 - 1219 - let[@inline] rem_len e = e.o_max - e.o_next + 1 1220 - 1221 - let flush e = 1222 - Bytes.Writer.write e.writer (Bytes.Slice.make e.o ~first:0 ~length:e.o_next); 1223 - e.o_next <- 0 1224 - 1225 - let write_eot ~eod e = 1226 - flush e; 1227 - if eod then Bytes.Writer.write_eod e.writer 1228 - 1229 - let write_char e c = 1230 - if e.o_next > e.o_max then flush e; 1231 - Stdlib.Bytes.set e.o e.o_next c; 1232 - e.o_next <- e.o_next + 1 1233 - 1234 - let rec write_substring e s first length = 1235 - if length = 0 then () 1236 - else 1237 - let len = Int.min (rem_len e) length in 1238 - if len = 0 then ( 1239 - flush e; 1240 - write_substring e s first length) 1241 - else begin 1242 - Bytes.blit_string s first e.o e.o_next len; 1243 - e.o_next <- e.o_next + len; 1244 - write_substring e s (first + len) (length - len) 1245 - end 1246 - 1247 - let write_bytes e s = write_substring e s 0 (String.length s) 1248 - let write_sep e = write_char e ',' 1249 - 1250 - let write_indent e ~nest = 1251 - for _i = 1 to nest do 1252 - write_char e ' '; 1253 - write_char e ' ' 1254 - done 1255 - 1256 - let write_ws_before e m = write_bytes e (Meta.ws_before m) 1257 - let write_ws_after e m = write_bytes e (Meta.ws_after m) 1258 - let write_json_null e = write_bytes e "null" 1259 - let write_json_bool e b = write_bytes e (if b then "true" else "false") 1260 - 1261 - (* XXX we bypass the printf machinery as it costs quite quite a bit. 1262 - Would be even better if we could format directly to a bytes values 1263 - rather than allocating a string per number. *) 1264 - external format_float : string -> float -> string = "caml_format_float" 1265 - 1266 - let write_json_number e f = 1267 - if Float.is_finite f then write_bytes e (format_float e.number_format f) 1268 - else write_json_null e 1269 - 1270 - let write_json_string e s = 1271 - let is_control = function '\x00' .. '\x1F' | '\x7F' -> true | _ -> false in 1272 - let len = String.length s in 1273 - let flush e start i max = 1274 - if start <= max then write_substring e s start (i - start) 1275 - in 1276 - let rec loop start i max = 1277 - if i > max then flush e start i max 1278 - else 1279 - let next = i + 1 in 1280 - match String.get s i with 1281 - | '\"' -> 1282 - flush e start i max; 1283 - write_bytes e "\\\""; 1284 - loop next next max 1285 - | '\\' -> 1286 - flush e start i max; 1287 - write_bytes e "\\\\"; 1288 - loop next next max 1289 - | '\n' -> 1290 - flush e start i max; 1291 - write_bytes e "\\n"; 1292 - loop next next max 1293 - | '\r' -> 1294 - flush e start i max; 1295 - write_bytes e "\\r"; 1296 - loop next next max 1297 - | '\t' -> 1298 - flush e start i max; 1299 - write_bytes e "\\t"; 1300 - loop next next max 1301 - | c when is_control c -> 1302 - flush e start i max; 1303 - write_bytes e "\\u"; 1304 - write_bytes e (Fmt.str "%04X" (Char.code c)); 1305 - loop next next max 1306 - | _ -> loop start next max 1307 - in 1308 - write_char e '"'; 1309 - loop 0 0 (len - 1); 1310 - write_char e '"' 1311 - 1312 - let encode_null (map : ('a, 'b) Codec.base_map) e v = 1313 - let () = map.enc v in 1314 - match e.format with 1315 - | Minify | Indent -> write_json_null e 1316 - | Layout -> 1317 - let meta = map.enc_meta v in 1318 - write_ws_before e meta; 1319 - write_json_null e; 1320 - write_ws_after e meta 1321 - 1322 - let encode_bool (map : ('a, 'b) Codec.base_map) e v = 1323 - let b = map.enc v in 1324 - match e.format with 1325 - | Minify | Indent -> write_json_bool e b 1326 - | Layout -> 1327 - let meta = map.enc_meta v in 1328 - write_ws_before e meta; 1329 - write_json_bool e b; 1330 - write_ws_after e meta 1331 - 1332 - let encode_number (map : ('a, 'b) Codec.base_map) e v = 1333 - let n = map.enc v in 1334 - match e.format with 1335 - | Minify | Indent -> write_json_number e n 1336 - | Layout -> 1337 - let meta = map.enc_meta v in 1338 - write_ws_before e meta; 1339 - write_json_number e n; 1340 - write_ws_after e meta 1341 - 1342 - let encode_string (map : ('a, 'b) Codec.base_map) e v = 1343 - let s = map.enc v in 1344 - match e.format with 1345 - | Minify | Indent -> write_json_string e s 1346 - | Layout -> 1347 - let meta = map.enc_meta v in 1348 - write_ws_before e meta; 1349 - write_json_string e s; 1350 - write_ws_after e meta 1351 - 1352 - let encode_mem_indent ~nest e = 1353 - write_char e '\n'; 1354 - write_indent e ~nest 1355 - 1356 - let encode_mem_name e meta n = 1357 - match e.format with 1358 - | Minify -> 1359 - write_json_string e n; 1360 - write_char e ':' 1361 - | Indent -> 1362 - write_json_string e n; 1363 - write_bytes e ": " 1364 - | Layout -> 1365 - write_ws_before e meta; 1366 - write_json_string e n; 1367 - write_ws_after e meta; 1368 - write_char e ':' 1369 - 1370 - let rec write : type a. nest:int -> a Codec.t -> encoder -> a -> unit = 1371 - fun ~nest t e v -> 1372 - match t with 1373 - | Null map -> encode_null map e v 1374 - | Bool map -> encode_bool map e v 1375 - | Number map -> encode_number map e v 1376 - | String map -> encode_string map e v 1377 - | Array map -> encode_array ~nest map e v 1378 - | Object map -> encode_object ~nest map e v 1379 - | Any map -> write ~nest (map.enc v) e v 1380 - | Map map -> write ~nest map.dom e (map.enc v) 1381 - | Rec t -> write ~nest (Lazy.force t) e v 1382 - | Ignore -> Error.failf Meta.none "Cannot encode Ignore value" 1383 - 1384 - and encode_array : type a elt b. 1385 - nest:int -> (a, elt, b) Codec.array_map -> encoder -> a -> unit = 1386 - fun ~nest map e v -> 1387 - let encode_element ~nest map e i v = 1388 - if i <> 0 then write_sep e; 1389 - try 1390 - write ~nest map.elt e v; 1391 - e 1392 - with Error e -> Codec.fail_push_array Meta.none map (i, Meta.none) e 1393 - in 1394 - match e.format with 1395 - | Minify -> 1396 - write_char e '['; 1397 - Stdlib.ignore (map.enc (encode_element ~nest:(nest + 1) map) e v); 1398 - write_char e ']' 1399 - | Layout -> 1400 - let meta = map.enc_meta v in 1401 - write_ws_before e meta; 1402 - write_char e '['; 1403 - Stdlib.ignore (map.enc (encode_element ~nest:(nest + 1) map) e v); 1404 - write_char e ']'; 1405 - write_ws_after e meta 1406 - | Indent -> 1407 - let encode_element ~nest map e i v = 1408 - if i <> 0 then write_sep e; 1409 - write_char e '\n'; 1410 - write_indent e ~nest; 1411 - try 1412 - write ~nest map.elt e v; 1413 - e 1414 - with Error e -> Codec.fail_push_array Meta.none map (i, Meta.none) e 1415 - in 1416 - let array_not_empty e = 1417 - e.o_next = 0 || not (Bytes.get e.o (e.o_next - 1) = '[') 1418 - in 1419 - write_char e '['; 1420 - Stdlib.ignore (map.enc (encode_element ~nest:(nest + 1) map) e v); 1421 - if array_not_empty e then ( 1422 - write_char e '\n'; 1423 - write_indent e ~nest); 1424 - write_char e ']' 1425 - 1426 - and encode_object : type o. 1427 - nest:int -> (o, o) Codec.object_map -> encoder -> o -> unit = 1428 - fun ~nest map e o -> 1429 - match e.format with 1430 - | Minify -> 1431 - write_char e '{'; 1432 - Stdlib.ignore 1433 - (encode_object_map ~nest:(nest + 1) map ~do_unknown:true e ~start:true o); 1434 - write_char e '}' 1435 - | Layout -> 1436 - let meta = map.enc_meta o in 1437 - write_ws_before e meta; 1438 - write_char e '{'; 1439 - Stdlib.ignore 1440 - (encode_object_map ~nest:(nest + 1) map ~do_unknown:true e ~start:true o); 1441 - write_char e '}'; 1442 - write_ws_after e meta 1443 - | Indent -> 1444 - write_char e '{'; 1445 - let start = 1446 - encode_object_map ~nest:(nest + 1) map ~do_unknown:true e ~start:true o 1447 - in 1448 - if not start then ( 1449 - write_char e '\n'; 1450 - write_indent e ~nest); 1451 - write_char e '}' 1452 - 1453 - and encode_object_map : type o. 1454 - nest:int -> 1455 - (o, o) Codec.object_map -> 1456 - do_unknown:bool -> 1457 - encoder -> 1458 - start:bool -> 1459 - o -> 1460 - bool = 1461 - fun ~nest map ~do_unknown e ~start o -> 1462 - let encode_mem ~nest map e o start (Mem_enc mmap) = 1463 - try 1464 - let v = mmap.enc o in 1465 - if mmap.enc_omit v then start 1466 - else begin 1467 - if not start then write_char e ','; 1468 - if e.format = Indent then encode_mem_indent ~nest e; 1469 - let meta = 1470 - (* if e.format = Layout then mmap.enc_name_meta v else *) 1471 - Meta.none 1472 - in 1473 - encode_mem_name e meta mmap.name; 1474 - write ~nest mmap.type' e v; 1475 - false 1476 - end 1477 - with Error e -> 1478 - Codec.fail_push_object Meta.none map (mmap.name, Meta.none) e 1479 - in 1480 - match map.shape with 1481 - | Object_basic u -> 1482 - let start = 1483 - List.fold_left (encode_mem ~nest map e o) start map.mem_encs 1484 - in 1485 - begin match u with 1486 - | Unknown_keep (umap, enc) when do_unknown -> 1487 - encode_unknown_mems ~nest map umap e ~start (enc o) 1488 - | _ -> start 1489 - end 1490 - | Object_cases (umap, cases) -> ( 1491 - let (Case_value (case, c)) = cases.enc_case (cases.enc o) in 1492 - let start = 1493 - if cases.tag.enc_omit case.tag then start 1494 - else encode_mem ~nest map e case.tag start (Mem_enc cases.tag) 1495 - in 1496 - let start = 1497 - List.fold_left (encode_mem ~nest map e o) start map.mem_encs 1498 - in 1499 - match umap with 1500 - | Some (Unknown_keep (umap, enc)) -> 1501 - let start = 1502 - encode_object_map ~nest case.object_map ~do_unknown:false e ~start c 1503 - in 1504 - encode_unknown_mems ~nest map umap e ~start (enc o) 1505 - | _ -> encode_object_map ~nest case.object_map ~do_unknown e ~start c) 1506 - 1507 - and encode_unknown_mems : type o mems a builder. 1508 - nest:int -> 1509 - (o, o) object_map -> 1510 - (mems, a, builder) mems_map -> 1511 - encoder -> 1512 - start:bool -> 1513 - mems -> 1514 - bool = 1515 - fun ~nest map umap e ~start mems -> 1516 - let encode_unknown_mem ~nest map umap e meta n v start = 1517 - try 1518 - if not start then write_char e ','; 1519 - if e.format = Indent then encode_mem_indent ~nest e; 1520 - encode_mem_name e meta n; 1521 - write ~nest umap.mems_type e v; 1522 - false 1523 - with Error e -> Codec.fail_push_object Meta.none map (n, Meta.none) e 1524 - in 1525 - umap.enc (encode_unknown_mem ~nest map umap e) mems start 1526 - 1527 - let to_writer ?buf ?indent ?preserve ?number_format t v ~eod w = 1528 - let e = encoder ?buf ?indent ?preserve ?number_format w in 1529 - write ~nest:0 t e v; 1530 - write_eot ~eod e 1531 - 1532 - let to_string ?buf ?indent ?preserve ?number_format t v = 1533 - let b = Buffer.create 255 in 1534 - let w = Bytes.Writer.of_buffer b in 1535 - to_writer ?buf ?indent ?preserve ?number_format ~eod:true t v w; 1536 - Buffer.contents b 1537 56 1538 57 module Value = struct 1539 - include Ast 58 + include Value 1540 59 1541 60 let of_string ?layout ?locs ?file s = 1542 - of_string ?layout ?locs ?file Codec.Value.t s 61 + Codec.Stream.of_string ?layout ?locs ?file Codec.Value.t s 1543 62 1544 63 let of_string_exn ?layout ?locs ?file s = 1545 - of_string_exn ?layout ?locs ?file Codec.Value.t s 64 + Codec.Stream.of_string_exn ?layout ?locs ?file Codec.Value.t s 1546 65 1547 66 let of_reader ?layout ?locs ?file r = 1548 - of_reader ?layout ?locs ?file Codec.Value.t r 67 + Codec.Stream.of_reader ?layout ?locs ?file Codec.Value.t r 1549 68 1550 69 let of_reader_exn ?layout ?locs ?file r = 1551 - of_reader_exn ?layout ?locs ?file Codec.Value.t r 70 + Codec.Stream.of_reader_exn ?layout ?locs ?file Codec.Value.t r 1552 71 1553 72 let to_string ?buf ?indent ?preserve ?number_format v = 1554 - to_string ?buf ?indent ?preserve ?number_format Codec.Value.t v 73 + Codec.Stream.to_string ?buf ?indent ?preserve ?number_format Codec.Value.t v 1555 74 1556 75 let to_writer ?buf ?indent ?preserve ?number_format v ~eod w = 1557 - to_writer ?buf ?indent ?preserve ?number_format Codec.Value.t v ~eod w 76 + Codec.Stream.to_writer ?buf ?indent ?preserve ?number_format Codec.Value.t v 77 + ~eod w 1558 78 end