Declarative JSON data manipulation for OCaml
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

ocaml-json: collapse json.bytesrw sublib into core per spec

Spec migration step 3:
- Inline the streaming I/O (decoder/encoder) from lib/bytesrw/json_bytesrw.{ml,mli}
into lib/json.{ml,mli} so of_string/of_reader/to_string/to_writer and their
_exn variants sit at the top level of Json, not under a separate Json_bytesrw
module.
- Drop the json.bytesrw sublib and update all dune files (bench/test/fuzz) to
depend on plain 'json'.
- Rename the internal parser/emitter helpers 'decode'/'encode' to 'parse'/
'write' so they don't collide with the public Codec.decode/encode.
- Update test/bench/fuzz callsites: Json_bytesrw.X -> Json.X.

Per the ocaml-encodings skill, bytesrw is pure OCaml and not an external dep
that must be isolated, so it belongs in core.

+1717 -26
+2 -2
bench/bench.ml
··· 114 114 let content = read_file path in 115 115 let size_bytes = String.length content in 116 116 let size_mb = float_of_int size_bytes /. 1_048_576.0 in 117 - let dom_decode s = Json_bytesrw.of_string Json.Codec.Value.t s in 117 + let dom_decode s = Json.of_string Json.Codec.Value.t s in 118 118 let field_decode = 119 119 let codec = field_codec name in 120 - fun s -> Json_bytesrw.of_string codec s 120 + fun s -> Json.of_string codec s 121 121 in 122 122 let dom = run_mode ~content ~decode:dom_decode in 123 123 let fld = run_mode ~content ~decode:field_decode in
+1 -1
bench/dune
··· 1 1 (executable 2 2 (name bench) 3 - (libraries json json.bytesrw unix memtrace)) 3 + (libraries json unix memtrace))
+1 -1
fuzz/dune
··· 1 1 (executable 2 2 (name fuzz) 3 3 (modules fuzz fuzz_json) 4 - (libraries json json.bytesrw alcobar)) 4 + (libraries json alcobar)) 5 5 6 6 (rule 7 7 (alias runtest)
+8 -8
fuzz/fuzz_json.ml
··· 20 20 (** [Json.Codec.ignore] - must not crash on arbitrary input. *) 21 21 let test_ignore_crash buf = 22 22 let buf = truncate buf in 23 - match Json_bytesrw.of_string Json.Codec.ignore buf with Ok _ | Error _ -> () 23 + match Json.of_string Json.Codec.ignore buf with Ok _ | Error _ -> () 24 24 25 25 (** [Json.Codec.Value.t] - must not crash on arbitrary input. *) 26 26 let test_json_crash buf = 27 27 let buf = truncate buf in 28 - match Json_bytesrw.of_string Json.Codec.Value.t buf with 28 + match Json.of_string Json.Codec.Value.t buf with 29 29 | Ok _ | Error _ -> () 30 30 31 31 (** Skip-parse implication: if [Json.Codec.Value.t] accepts, [Json.Codec.ignore] 32 32 must accept. *) 33 33 let test_skip_implication buf = 34 34 let buf = truncate buf in 35 - match Json_bytesrw.of_string Json.Codec.Value.t buf with 35 + match Json.of_string Json.Codec.Value.t buf with 36 36 | Error _ -> () 37 37 | Ok _ -> ( 38 - match Json_bytesrw.of_string Json.Codec.ignore buf with 38 + match Json.of_string Json.Codec.ignore buf with 39 39 | Ok () -> () 40 40 | Error e -> 41 41 failf ··· 46 46 matches. *) 47 47 let test_roundtrip buf = 48 48 let buf = truncate ~max_len:4096 buf in 49 - match Json_bytesrw.of_string Json.Codec.Value.t buf with 49 + match Json.of_string Json.Codec.Value.t buf with 50 50 | Error _ -> () 51 51 | Ok v -> ( 52 - match Json_bytesrw.to_string Json.Codec.Value.t v with 52 + match Json.to_string Json.Codec.Value.t v with 53 53 | Error _ -> () 54 54 | Ok s -> ( 55 - match Json_bytesrw.of_string Json.Codec.Value.t s with 56 - | Error e -> failf "roundtrip: re-decode failed on %S: %s" s e 55 + match Json.of_string Json.Codec.Value.t s with 56 + | Error e -> failf "roundtrip: re-decode failed on %S: %a" s Json.Error.pp e 57 57 | Ok v' -> 58 58 if not (Json.equal v v') then failf "roundtrip: value changed")) 59 59
+1
lib/dune
··· 3 3 (public_name json) 4 4 (libraries 5 5 fmt 6 + bytesrw 6 7 (re_export loc)))
+1504
lib/json.ml
··· 1687 1687 (* Tape *) 1688 1688 1689 1689 module Tape = Tape 1690 + (*--------------------------------------------------------------------------- 1691 + Copyright (c) 2024 The jsont programmers. All rights reserved. 1692 + SPDX-License-Identifier: ISC 1693 + ---------------------------------------------------------------------------*) 1694 + 1695 + open Bytesrw 1696 + open Codec 1697 + 1698 + (* XXX add these things to Stdlib.Uchar *) 1699 + 1700 + let uchar_max_utf8_bytes = 4 1701 + 1702 + let[@inline] uchar_utf8_decode_length = function 1703 + | '\x00' .. '\x7F' -> 1 1704 + | '\x80' .. '\xC1' -> 0 1705 + | '\xC2' .. '\xDF' -> 2 1706 + | '\xE0' .. '\xEF' -> 3 1707 + | '\xF0' .. '\xF4' -> 4 1708 + | _ -> 0 1709 + 1710 + (* Character classes *) 1711 + 1712 + let[@inline] is_digit u = 0x0030 (* 0 *) <= u && u <= 0x0039 (* 9 *) 1713 + let[@inline] is_number_start u = is_digit u || u = 0x002D (* - *) 1714 + let[@inline] is_surrogate u = 0xD800 <= u && u <= 0xDFFF 1715 + let[@inline] _is_hi_surrogate u = 0xD800 <= u && u <= 0xDBFF 1716 + let[@inline] is_lo_surrogate u = 0xDC00 <= u && u <= 0xDFFF 1717 + 1718 + let[@inline] is_control u = 1719 + (0x0000 <= u && u <= 0x001F) 1720 + (* C0 control characters *) 1721 + || u = 0x007F 1722 + (* Delete *) 1723 + || (0x0080 <= u && u <= 0x009F) 1724 + (* C1 control characters *) 1725 + || u = 0x2028 1726 + (* Line separator *) || u = 0x2029 1727 + (* Paragraph separator *) || u = 0x200E 1728 + (* left-to-right mark *) || u = 0x200F (* right-to-left mark *) 1729 + 1730 + let sot = 0x1A0000 (* start of text U+10FFFF + 1 *) 1731 + let eot = 0x1A0001 (* end of text U+10FFFF + 2 *) 1732 + let pp_code = Codec.pp_code 1733 + 1734 + let pp_quchar ppf u = 1735 + pp_code ppf 1736 + @@ 1737 + if u = sot then "start of text" 1738 + else if u = eot then "end of text" 1739 + else if is_control u || is_surrogate u then Fmt.str "U+%04X" u 1740 + else 1741 + let u = Uchar.of_int u in 1742 + let b = Stdlib.Bytes.make (Uchar.utf_8_byte_length u) '\x00' in 1743 + Stdlib.( 1744 + Stdlib.ignore (Bytes.set_utf_8_uchar b 0 u); 1745 + Bytes.unsafe_to_string b) 1746 + 1747 + (* A simple growable byte buffer used for token and whitespace 1748 + accumulation. Raw [Bytes.t] access lets us compare buffer content 1749 + against candidate keys without allocating an intermediate string. *) 1750 + type tokbuf = { mutable bytes : Stdlib.Bytes.t; mutable len : int } 1751 + 1752 + let tokbuf_create n = { bytes = Stdlib.Bytes.create n; len = 0 } 1753 + let[@inline] tokbuf_clear t = t.len <- 0 1754 + 1755 + let[@inline] tokbuf_ensure t need = 1756 + let cap = Stdlib.Bytes.length t.bytes in 1757 + if t.len + need > cap then ( 1758 + let new_cap = max (cap * 2) (t.len + need) in 1759 + let b = Stdlib.Bytes.create new_cap in 1760 + Stdlib.Bytes.blit t.bytes 0 b 0 t.len; 1761 + t.bytes <- b) 1762 + 1763 + let[@inline] tokbuf_add_char t c = 1764 + tokbuf_ensure t 1; 1765 + Stdlib.Bytes.unsafe_set t.bytes t.len c; 1766 + t.len <- t.len + 1 1767 + 1768 + let[@inline] tokbuf_add_utf_8_uchar t u = 1769 + let n = Uchar.utf_8_byte_length u in 1770 + tokbuf_ensure t n; 1771 + Stdlib.ignore (Stdlib.Bytes.set_utf_8_uchar t.bytes t.len u : int); 1772 + t.len <- t.len + n 1773 + 1774 + let[@inline] tokbuf_contents t = Stdlib.Bytes.sub_string t.bytes 0 t.len 1775 + 1776 + (* Byte-compare buffer content to a string without allocating. *) 1777 + let tokbuf_equal_string t s = 1778 + let n = String.length s in 1779 + if t.len <> n then false 1780 + else 1781 + let rec loop i = 1782 + if i >= n then true 1783 + else if Stdlib.Bytes.unsafe_get t.bytes i <> String.unsafe_get s i then 1784 + false 1785 + else loop (i + 1) 1786 + in 1787 + loop 0 1788 + 1789 + (* Decoder *) 1790 + 1791 + type decoder = { 1792 + file : string; 1793 + meta_none : Meta.t; (* A meta with just [file] therein. *) 1794 + locs : bool; (* [true] if text locations should be computed. *) 1795 + layout : bool; (* [true] if text layout should be kept. *) 1796 + reader : Bytes.Reader.t; (* The source of bytes. *) 1797 + mutable i : Stdlib.Bytes.t; (* Current input slice. *) 1798 + mutable i_max : int; (* Maximum byte index in [i]. *) 1799 + mutable i_next : int; (* Next byte index to read in [i]. *) 1800 + overlap : Stdlib.Bytes.t; (* Buffer for overlapping decodes. *) 1801 + mutable u : int; (* Current Unicode scalar value or sot or eot. *) 1802 + mutable byte_count : int; (* Global byte count. *) 1803 + mutable line : int; (* Current line number. *) 1804 + mutable line_start : int; (* Current line global byte position. *) 1805 + token : tokbuf; 1806 + ws : tokbuf; (* Bufferizes whitespace when layout is [true]. *) 1807 + } 1808 + 1809 + let decoder ?(locs = false) ?(layout = false) ?(file = "-") reader = 1810 + let overlap = Stdlib.Bytes.create uchar_max_utf8_bytes in 1811 + let token = tokbuf_create 255 and ws = tokbuf_create 255 in 1812 + let meta_none = Meta.make (Loc.(set_file none) file) in 1813 + { 1814 + file; 1815 + meta_none; 1816 + locs; 1817 + layout; 1818 + reader; 1819 + i = overlap (* overwritten by initial refill *); 1820 + i_max = 0; 1821 + i_next = 1 (* triggers an initial refill *); 1822 + overlap; 1823 + u = sot; 1824 + byte_count = 0; 1825 + line = 1; 1826 + line_start = 0; 1827 + token; 1828 + ws; 1829 + } 1830 + 1831 + (* Decoder positions *) 1832 + 1833 + let last_byte_of d = 1834 + if d.u <= 0x7F then d.byte_count - 1 1835 + else if d.u = sot || d.u = eot then d.byte_count 1836 + else 1837 + (* On multi-bytes uchars we want to point on the first byte. *) 1838 + d.byte_count - Uchar.utf_8_byte_length (Uchar.of_int d.u) 1839 + 1840 + (* Decoder errors *) 1841 + 1842 + let[@inline] textloc_of_pos d ~first_byte ~last_byte ~first_line_num 1843 + ~first_line_byte ~last_line_num ~last_line_byte = 1844 + Loc.make ~file:d.file ~first_byte ~last_byte ~first_line_num ~first_line_byte 1845 + ~last_line_num ~last_line_byte 1846 + 1847 + let error_meta d = 1848 + let first_byte = last_byte_of d in 1849 + let first_line_num = d.line and first_line_byte = d.line_start in 1850 + Meta.make 1851 + @@ textloc_of_pos d ~first_byte ~last_byte:first_byte ~first_line_num 1852 + ~first_line_byte ~last_line_num:first_line_num 1853 + ~last_line_byte:first_line_byte 1854 + 1855 + let error_meta_to_current ~first_byte ~first_line_num ~first_line_byte d = 1856 + let last_byte = last_byte_of d in 1857 + let last_line_num = d.line and last_line_byte = d.line_start in 1858 + Meta.make 1859 + @@ textloc_of_pos d ~first_byte ~last_byte ~first_line_num ~first_line_byte 1860 + ~last_line_num ~last_line_byte 1861 + 1862 + let err_here d fmt = Error.failf (error_meta d) fmt 1863 + 1864 + let err_to_here ~first_byte ~first_line_num ~first_line_byte d fmt = 1865 + Error.failf 1866 + (error_meta_to_current ~first_byte ~first_line_num ~first_line_byte d) 1867 + fmt 1868 + 1869 + let err_malformed_utf_8 d = 1870 + if d.i_next > d.i_max then 1871 + err_here d "UTF-8 decoding error: unexpected end of bytes" 1872 + else 1873 + err_here d "UTF-8 decoding error: invalid byte %a" pp_code 1874 + (Fmt.str "%x02x" (Bytes.get_uint8 d.i d.i_next)) 1875 + 1876 + let err_exp d = err_here d "Expected %a but found %a" 1877 + let err_exp_while d = err_here d "Expected %a while parsing %a but found %a" 1878 + let err_exp_eot d = err_exp d pp_quchar eot pp_quchar d.u 1879 + let err_not_json_value d = err_exp d pp_code "JSON value" pp_quchar d.u 1880 + 1881 + let current_json_sort d = 1882 + match d.u with 1883 + | 0x0066 (* f *) | 0x0074 (* t *) -> Sort.Bool 1884 + | 0x006E (* n *) -> Sort.Null 1885 + | 0x007B (* { *) -> Sort.Object 1886 + | 0x005B (* [ *) -> Sort.Array 1887 + | 0x0022 (* DQUOTE *) -> Sort.String 1888 + | u when is_number_start u -> Sort.Number 1889 + | _ -> err_not_json_value d 1890 + 1891 + let type_error d t = 1892 + Codec.type_error (error_meta d) t ~fnd:(current_json_sort d) 1893 + 1894 + (* Errors for constants *) 1895 + 1896 + let err_exp_in_const ~first_byte ~first_line_num ~first_line_byte d ~exp ~fnd 1897 + ~const = 1898 + err_to_here ~first_byte ~first_line_num ~first_line_byte d 1899 + "Expected %a while parsing %a but found: %a" pp_quchar exp pp_code const 1900 + pp_quchar fnd 1901 + 1902 + (* Errors for numbers *) 1903 + 1904 + let err_float_parse meta tok = 1905 + Error.failf meta "Could not parse %S to a %a" tok pp_code "float" 1906 + 1907 + let err_exp_digit d = 1908 + err_exp_while d pp_code "decimal digit" pp_code "number" pp_quchar d.u 1909 + 1910 + (* Errors for strings *) 1911 + 1912 + let err_exp_hex_digit d = 1913 + err_exp_while d pp_code "hex digit" pp_code "character escape" pp_quchar d.u 1914 + 1915 + let err_exp_lo_surrogate d u = 1916 + err_exp_while d pp_code "low surrogate" pp_code "character escape" pp_quchar u 1917 + 1918 + let err_unpaired_lo_surrogate d u = 1919 + err_here d "Unpaired low surrogate %a in %a" pp_quchar u pp_code "string" 1920 + 1921 + let err_unpaired_hi_surrogate d u = 1922 + err_here d "Unpaired high surrogate %a in %a" pp_quchar u pp_code "string" 1923 + 1924 + let err_exp_esc ~first_byte ~first_line_num ~first_line_byte d u = 1925 + err_to_here ~first_byte ~first_line_num ~first_line_byte d 1926 + "Expected %a while parsing %a found %a" pp_code "escape character" pp_code 1927 + "escape" pp_quchar u 1928 + 1929 + let err_unclosed_string ~first_byte ~first_line_num ~first_line_byte d = 1930 + err_to_here ~first_byte ~first_line_num ~first_line_byte d "Unclosed %a" 1931 + pp_code "string" 1932 + 1933 + let err_illegal_ctrl_char ~first_byte ~first_line_num ~first_line_byte d = 1934 + err_to_here ~first_byte ~first_line_num ~first_line_byte d 1935 + "Illegal control character %a in %a" pp_quchar d.u pp_code "string" 1936 + 1937 + (* Errors for arrays *) 1938 + 1939 + let err_exp_comma_or_eoa d ~fnd = 1940 + err_here d "Expected %a or %a after %a but found %a" pp_code "," pp_code "]" 1941 + pp_code "array element" pp_quchar fnd 1942 + 1943 + let err_unclosed_array d = err_here d "Unclosed %a" pp_code "array" 1944 + 1945 + let err_exp_comma_or_eoo d = 1946 + err_here d "Expected %a or %a after %a but found: %a" pp_code "," pp_code "}" 1947 + pp_code "object member" pp_quchar d.u 1948 + 1949 + (* Errors for objects *) 1950 + 1951 + let err_exp_mem d = 1952 + err_here d "Expected %a but found %a" pp_code "object member" pp_quchar d.u 1953 + 1954 + let err_exp_mem_or_eoo d = 1955 + err_here d "Expected: %a or %a but found %a" pp_code "object member" pp_code 1956 + "}" pp_quchar d.u 1957 + 1958 + let err_exp_colon d = 1959 + err_here d "Expected %a after %a but found %a" pp_code ":" pp_code 1960 + "member name" pp_quchar d.u 1961 + 1962 + let err_unclosed_object d (map : ('o, 'o) Codec.object_map) = 1963 + err_here d "Unclosed %a" Codec.pp_kind (Codec.object_kinded_sort map) 1964 + 1965 + (* Decode next character in d.u *) 1966 + 1967 + let[@inline] is_eoslice d = d.i_next > d.i_max 1968 + let[@inline] is_eod d = d.i_max = -1 (* Only happens on Slice.eod *) 1969 + let[@inline] available d = d.i_max - d.i_next + 1 1970 + 1971 + let[@inline] set_slice d slice = 1972 + d.i <- Bytes.Slice.bytes slice; 1973 + d.i_next <- Bytes.Slice.first slice; 1974 + d.i_max <- d.i_next + Bytes.Slice.length slice - 1 1975 + 1976 + let rec setup_overlap d start need = 1977 + match need with 1978 + | 0 -> 1979 + let slice = 1980 + match available d with 1981 + | 0 -> Bytes.Reader.read d.reader 1982 + | length -> Bytes.Slice.make d.i ~first:d.i_next ~length 1983 + in 1984 + d.i <- d.overlap; 1985 + d.i_next <- 0; 1986 + d.i_max <- start; 1987 + slice 1988 + | need -> 1989 + if is_eoslice d then set_slice d (Bytes.Reader.read d.reader); 1990 + if is_eod d then ( 1991 + d.byte_count <- d.byte_count - start; 1992 + err_malformed_utf_8 d); 1993 + let available = available d in 1994 + let take = Int.min need available in 1995 + for i = 0 to take - 1 do 1996 + Bytes.set d.overlap (start + i) (Bytes.get d.i (d.i_next + i)) 1997 + done; 1998 + d.i_next <- d.i_next + take; 1999 + d.byte_count <- d.byte_count + take; 2000 + setup_overlap d (start + take) (need - take) 2001 + 2002 + let rec nextc d = 2003 + let a = available d in 2004 + if a <= 0 then 2005 + if is_eod d then d.u <- eot 2006 + else ( 2007 + set_slice d (Bytes.Reader.read d.reader); 2008 + nextc d) 2009 + else 2010 + let b = Bytes.get d.i d.i_next in 2011 + if a < uchar_max_utf8_bytes && a < uchar_utf8_decode_length b then begin 2012 + let s = setup_overlap d 0 (uchar_utf8_decode_length b) in 2013 + nextc d; 2014 + set_slice d s 2015 + end 2016 + else 2017 + d.u <- 2018 + (match b with 2019 + | ('\x00' .. '\x09' | '\x0B' | '\x0E' .. '\x7F') as u -> 2020 + (* ASCII fast path *) 2021 + d.i_next <- d.i_next + 1; 2022 + d.byte_count <- d.byte_count + 1; 2023 + Char.code u 2024 + | '\x0D' (* CR *) -> 2025 + d.i_next <- d.i_next + 1; 2026 + d.byte_count <- d.byte_count + 1; 2027 + d.line_start <- d.byte_count; 2028 + d.line <- d.line + 1; 2029 + 0x000D 2030 + | '\x0A' (* LF *) -> 2031 + d.i_next <- d.i_next + 1; 2032 + d.byte_count <- d.byte_count + 1; 2033 + d.line_start <- d.byte_count; 2034 + if d.u <> 0x000D then d.line <- d.line + 1; 2035 + 0x000A 2036 + | _ -> 2037 + let udec = Bytes.get_utf_8_uchar d.i d.i_next in 2038 + if not (Uchar.utf_decode_is_valid udec) then err_malformed_utf_8 d 2039 + else 2040 + let u = Uchar.to_int (Uchar.utf_decode_uchar udec) in 2041 + let ulen = Uchar.utf_decode_length udec in 2042 + d.i_next <- d.i_next + ulen; 2043 + d.byte_count <- d.byte_count + ulen; 2044 + u) 2045 + 2046 + (* Decoder tokenizer *) 2047 + 2048 + let[@inline] token_clear d = tokbuf_clear d.token 2049 + 2050 + let[@inline] token_pop d = 2051 + let t = tokbuf_contents d.token in 2052 + token_clear d; 2053 + t 2054 + 2055 + let[@inline] token_add d u = 2056 + if u <= 0x7F then tokbuf_add_char d.token (Char.unsafe_chr u) 2057 + else tokbuf_add_utf_8_uchar d.token (Uchar.unsafe_of_int u) 2058 + 2059 + (* Find a member in [mem_decs] whose key matches the current token 2060 + buffer content byte-for-byte, without allocating a string. Returns 2061 + the matching mem_dec together with the key string (owned by the 2062 + map). Used as a fast-path for object member dispatch. *) 2063 + let mem_by_token d mem_decs = 2064 + let r = ref None in 2065 + (try 2066 + String_map.iter 2067 + (fun k v -> 2068 + if tokbuf_equal_string d.token k then begin 2069 + r := Some (v, k); 2070 + raise_notrace Exit 2071 + end) 2072 + mem_decs 2073 + with Exit -> ()); 2074 + !r 2075 + 2076 + let[@inline] accept d = 2077 + token_add d d.u; 2078 + nextc d 2079 + 2080 + let token_pop_float d ~meta = 2081 + let token = token_pop d in 2082 + match float_of_string_opt token with 2083 + | Some f -> f 2084 + | None -> err_float_parse meta token (* likely [assert false] *) 2085 + 2086 + (* Decoder layout and position tracking *) 2087 + 2088 + let[@inline] ws_pop d = 2089 + if not d.layout then "" 2090 + else 2091 + let t = tokbuf_contents d.ws in 2092 + tokbuf_clear d.ws; 2093 + t 2094 + 2095 + let textloc_to_current ~first_byte ~first_line_num ~first_line_byte d = 2096 + if not d.locs then Loc.none 2097 + else 2098 + let last_byte = last_byte_of d in 2099 + let last_line_num = d.line and last_line_byte = d.line_start in 2100 + textloc_of_pos d ~first_byte ~last_byte ~first_line_num ~first_line_byte 2101 + ~last_line_num ~last_line_byte 2102 + 2103 + let textloc_prev_ascii_char ~first_byte ~first_line_num ~first_line_byte d = 2104 + (* N.B. when we call that the line doesn't move and the char was on 2105 + a single byte *) 2106 + if not d.locs then Loc.none 2107 + else 2108 + let last_byte = last_byte_of d - 1 in 2109 + let last_line_num = d.line and last_line_byte = d.line_start in 2110 + textloc_of_pos d ~first_byte ~last_byte ~first_line_num ~first_line_byte 2111 + ~last_line_num ~last_line_byte 2112 + 2113 + let meta_make d ?ws_before ?ws_after textloc = 2114 + if (not d.locs) && not d.layout then d.meta_none 2115 + else Meta.make ?ws_before ?ws_after textloc 2116 + 2117 + (* Decoding *) 2118 + 2119 + let false_uchars = [| 0x0066; 0x0061; 0x006C; 0x0073; 0x0065 |] 2120 + let true_uchars = [| 0x0074; 0x0072; 0x0075; 0x0065 |] 2121 + let null_uchars = [| 0x006E; 0x0075; 0x006C; 0x006C |] 2122 + 2123 + let ascii_str us = 2124 + String.init (Stdlib.Array.length us) (fun i -> 2125 + Char.chr (Stdlib.Array.get us i)) 2126 + 2127 + let[@inline] is_ws u = 2128 + if u > 0x20 then false 2129 + else 2130 + match Char.unsafe_chr u with ' ' | '\t' | '\r' | '\n' -> true | _ -> false 2131 + 2132 + let[@inline] read_ws d = 2133 + while is_ws d.u do 2134 + if d.layout then tokbuf_add_char d.ws (Char.unsafe_chr d.u); 2135 + nextc d 2136 + done 2137 + 2138 + let read_json_const d const = 2139 + (* First character was checked. *) 2140 + let ws_before = ws_pop d in 2141 + let first_byte = last_byte_of d in 2142 + let first_line_num = d.line and first_line_byte = d.line_start in 2143 + for i = 1 to Stdlib.Array.length const - 1 do 2144 + nextc d; 2145 + let c = Stdlib.Array.get const i in 2146 + if not (Int.equal d.u c) then 2147 + err_exp_in_const ~first_byte ~first_line_num ~first_line_byte d ~exp:c 2148 + ~fnd:d.u ~const:(ascii_str const) 2149 + done; 2150 + let textloc = 2151 + textloc_to_current d ~first_byte ~first_line_num ~first_line_byte 2152 + in 2153 + let ws_after = 2154 + nextc d; 2155 + read_ws d; 2156 + ws_pop d 2157 + in 2158 + meta_make d ~ws_before ~ws_after textloc 2159 + 2160 + let[@inline] read_json_false d = read_json_const d false_uchars 2161 + let[@inline] read_json_true d = read_json_const d true_uchars 2162 + let[@inline] read_json_null d = read_json_const d null_uchars 2163 + 2164 + let read_json_number d = 2165 + (* [is_number_start d.u] = true *) 2166 + let[@inline] read_digits d = 2167 + while is_digit d.u do 2168 + accept d 2169 + done 2170 + in 2171 + let[@inline] read_int d = 2172 + match d.u with 2173 + | 0x0030 (* 0 *) -> accept d 2174 + | u when is_digit u -> 2175 + accept d; 2176 + read_digits d 2177 + | _ -> err_exp_digit d 2178 + in 2179 + let[@inline] read_opt_frac d = 2180 + match d.u with 2181 + | 0x002E (* . *) -> 2182 + accept d; 2183 + if is_digit d.u then read_digits d else err_exp_digit d 2184 + | _ -> () 2185 + in 2186 + let[@inline] read_opt_exp d = 2187 + match d.u with 2188 + | 0x0065 (* e *) | 0x0045 (* E *) -> 2189 + token_add d d.u; 2190 + nextc d; 2191 + (match d.u with 2192 + | 0x002D (* - *) | 0x002B (* + *) -> 2193 + token_add d d.u; 2194 + nextc d 2195 + | _ -> ()); 2196 + if is_digit d.u then read_digits d else err_exp_digit d 2197 + | _ -> () 2198 + in 2199 + let first_byte = last_byte_of d in 2200 + let first_line_num = d.line and first_line_byte = d.line_start in 2201 + let ws_before = ws_pop d in 2202 + token_clear d; 2203 + if d.u = 0x002D (* - *) then accept d; 2204 + read_int d; 2205 + read_opt_frac d; 2206 + read_opt_exp d; 2207 + let textloc = 2208 + textloc_prev_ascii_char d ~first_byte ~first_line_num ~first_line_byte 2209 + in 2210 + let ws_after = 2211 + read_ws d; 2212 + ws_pop d 2213 + in 2214 + meta_make d ~ws_before ~ws_after textloc 2215 + 2216 + let read_json_string d = 2217 + (* d.u is 0x0022 *) 2218 + let first_byte = last_byte_of d in 2219 + let first_line_num = d.line and first_line_byte = d.line_start in 2220 + let rec read_uescape d hi uc count = 2221 + if count > 0 then 2222 + match d.u with 2223 + | u when 0x0030 <= u && u <= 0x0039 -> 2224 + nextc d; 2225 + read_uescape d hi ((uc * 16) + u - 0x30) (count - 1) 2226 + | u when 0x0041 <= u && u <= 0x0046 -> 2227 + nextc d; 2228 + read_uescape d hi ((uc * 16) + u - 0x37) (count - 1) 2229 + | u when 0x0061 <= u && u <= 0x0066 -> 2230 + nextc d; 2231 + read_uescape d hi ((uc * 16) + u - 0x57) (count - 1) 2232 + | _ -> err_exp_hex_digit d 2233 + else 2234 + match hi with 2235 + | Some hi -> 2236 + (* combine high and low surrogate. *) 2237 + if not (is_lo_surrogate uc) then err_exp_lo_surrogate d uc 2238 + else 2239 + let u = (((hi land 0x3FF) lsl 10) lor (uc land 0x3FF)) + 0x10000 in 2240 + token_add d u 2241 + | None -> 2242 + if not (is_surrogate uc) then token_add d uc 2243 + else if uc > 0xDBFF then err_unpaired_lo_surrogate d uc 2244 + else if d.u <> 0x005C (* \ *) then err_unpaired_hi_surrogate d uc 2245 + else ( 2246 + nextc d; 2247 + if d.u <> 0x0075 (* u *) then err_unpaired_hi_surrogate d uc 2248 + else ( 2249 + nextc d; 2250 + read_uescape d (Some uc) 0 4)) 2251 + in 2252 + let read_escape d = 2253 + match d.u with 2254 + | 0x0022 (* DQUOTE *) | 0x005C (* \ *) | 0x002F (* / *) -> accept d 2255 + | 0x0062 (* b *) -> 2256 + token_add d 0x0008 (* backspace *); 2257 + nextc d 2258 + | 0x0066 (* f *) -> 2259 + token_add d 0x000C (* form feed *); 2260 + nextc d 2261 + | 0x006E (* n *) -> 2262 + token_add d 0x000A (* line feed *); 2263 + nextc d 2264 + | 0x0072 (* r *) -> 2265 + token_add d 0x000D (* carriage return *); 2266 + nextc d 2267 + | 0x0074 (* t *) -> 2268 + token_add d 0x0009 (* tab *); 2269 + nextc d 2270 + | 0x0075 (* u *) -> 2271 + nextc d; 2272 + read_uescape d None 0 4 2273 + | u -> err_exp_esc ~first_byte ~first_line_num ~first_line_byte d u 2274 + in 2275 + let rec loop d = 2276 + match d.u with 2277 + | 0x005C (* \ *) -> 2278 + nextc d; 2279 + read_escape d; 2280 + loop d 2281 + | 0x0022 (* DQUOTE *) -> () 2282 + | u when u = eot -> 2283 + err_unclosed_string ~first_byte ~first_line_num ~first_line_byte d 2284 + | u when 0x0000 <= u && u <= 0x001F -> 2285 + err_illegal_ctrl_char ~first_byte ~first_line_num ~first_line_byte d 2286 + | _ -> 2287 + accept d; 2288 + loop d 2289 + in 2290 + let ws_before = ws_pop d in 2291 + nextc d; 2292 + token_clear d; 2293 + loop d; 2294 + let textloc = 2295 + textloc_to_current d ~first_byte ~first_line_num ~first_line_byte 2296 + in 2297 + let ws_after = 2298 + nextc d; 2299 + read_ws d; 2300 + ws_pop d 2301 + in 2302 + meta_make d ~ws_before ~ws_after textloc 2303 + 2304 + let read_json_name d = 2305 + let meta = read_json_string d in 2306 + if d.u = 0x003A (* : *) then ( 2307 + nextc d; 2308 + meta) 2309 + else err_exp_colon d 2310 + 2311 + let read_json_mem_sep d = 2312 + if d.u = 0x007D (* } *) then () 2313 + else if d.u = 0x002C (* , *) then ( 2314 + nextc d; 2315 + read_ws d; 2316 + if d.u <> 0x0022 then err_exp_mem d) 2317 + else err_exp_comma_or_eoo d 2318 + 2319 + (* Skip-parse a JSON value: advance past [d.u] at the byte level without 2320 + materialising token buffers, parsing numbers, or decoding string 2321 + escapes. The only decoding done is UTF-8 in [nextc]; escapes in 2322 + strings are recognised only enough to not stop at a backslash-quote. *) 2323 + let rec skip_json_value d = 2324 + read_ws d; 2325 + match d.u with 2326 + | 0x007B (* { *) -> skip_json_object d 2327 + | 0x005B (* [ *) -> skip_json_array d 2328 + | 0x0022 (* DQUOTE *) -> skip_json_string d 2329 + | 0x006E (* n *) -> Stdlib.ignore (read_json_null d) 2330 + | 0x0074 (* t *) -> Stdlib.ignore (read_json_true d) 2331 + | 0x0066 (* f *) -> Stdlib.ignore (read_json_false d) 2332 + | u when is_number_start u -> skip_json_number d 2333 + | _ -> err_not_json_value d 2334 + 2335 + and skip_json_string d = 2336 + (* Byte-level scan for the closing quote; matches simdjson On-Demand 2337 + semantics. Structural contract (bracket nesting, string termination) 2338 + is enforced; content (escape correctness, exact hex digits after 2339 + [\u]) is NOT validated. Consumers needing strict content 2340 + validation should decode with [Json.json] and then discard rather 2341 + than [Codec.ignore]. *) 2342 + let done_ = ref false in 2343 + while not !done_ do 2344 + if d.i_next > d.i_max then 2345 + if is_eod d then 2346 + err_unclosed_string ~first_byte:0 ~first_line_num:Loc.line_num_none 2347 + ~first_line_byte:Loc.byte_pos_none d 2348 + else set_slice d (Bytes.Reader.read d.reader) 2349 + else begin 2350 + let b = Stdlib.Bytes.unsafe_get d.i d.i_next in 2351 + d.i_next <- d.i_next + 1; 2352 + d.byte_count <- d.byte_count + 1; 2353 + match b with 2354 + | '\\' -> 2355 + if d.i_next > d.i_max then 2356 + if is_eod d then 2357 + err_unclosed_string ~first_byte:0 2358 + ~first_line_num:Loc.line_num_none 2359 + ~first_line_byte:Loc.byte_pos_none d 2360 + else set_slice d (Bytes.Reader.read d.reader); 2361 + d.i_next <- d.i_next + 1; 2362 + d.byte_count <- d.byte_count + 1 2363 + | '"' -> done_ := true 2364 + | _ -> () 2365 + end 2366 + done; 2367 + nextc d; 2368 + read_ws d 2369 + 2370 + and skip_json_number d = 2371 + (* Consume number-continuation characters; matches simdjson 2372 + On-Demand. Structural number shape ([1..2], [+5], [1eE2]) is NOT 2373 + validated here. *) 2374 + let done_ = ref false in 2375 + while not !done_ do 2376 + if d.i_next > d.i_max then 2377 + if is_eod d then done_ := true 2378 + else set_slice d (Bytes.Reader.read d.reader) 2379 + else 2380 + match Stdlib.Bytes.unsafe_get d.i d.i_next with 2381 + | '0' .. '9' | '-' | '+' | '.' | 'e' | 'E' -> 2382 + d.i_next <- d.i_next + 1; 2383 + d.byte_count <- d.byte_count + 1 2384 + | _ -> done_ := true 2385 + done; 2386 + nextc d; 2387 + read_ws d 2388 + 2389 + and skip_json_array d = 2390 + nextc d; 2391 + (* [ *) 2392 + read_ws d; 2393 + if d.u = 0x005D (* ] *) then ( 2394 + nextc d; 2395 + read_ws d) 2396 + else 2397 + let rec loop () = 2398 + skip_json_value d; 2399 + match d.u with 2400 + | 0x002C (* , *) -> 2401 + nextc d; 2402 + read_ws d; 2403 + loop () 2404 + | 0x005D (* ] *) -> 2405 + nextc d; 2406 + read_ws d 2407 + | fnd -> err_exp_comma_or_eoa d ~fnd 2408 + in 2409 + loop () 2410 + 2411 + and skip_json_object d = 2412 + nextc d; 2413 + (* { *) 2414 + read_ws d; 2415 + if d.u = 0x007D (* } *) then ( 2416 + nextc d; 2417 + read_ws d) 2418 + else 2419 + let rec loop () = 2420 + if d.u <> 0x0022 then err_exp_mem d; 2421 + skip_json_string d; 2422 + if d.u <> 0x003A (* : *) then err_exp_colon d; 2423 + nextc d; 2424 + read_ws d; 2425 + skip_json_value d; 2426 + match d.u with 2427 + | 0x002C (* , *) -> 2428 + nextc d; 2429 + read_ws d; 2430 + loop () 2431 + | 0x007D (* } *) -> 2432 + nextc d; 2433 + read_ws d 2434 + | _ -> err_exp_comma_or_eoo d 2435 + in 2436 + loop () 2437 + 2438 + let rec parse : type a. decoder -> a t -> a = 2439 + fun d t -> 2440 + match 2441 + read_ws d; 2442 + t 2443 + with 2444 + | Null map -> ( 2445 + match d.u with 2446 + | 0x006E (* n *) -> map.dec (read_json_null d) () 2447 + | _ -> type_error d t) 2448 + | Bool map -> ( 2449 + match d.u with 2450 + | 0x0066 (* f *) -> map.dec (read_json_false d) false 2451 + | 0x0074 (* t *) -> map.dec (read_json_true d) true 2452 + | _ -> type_error d t) 2453 + | Number map -> ( 2454 + match d.u with 2455 + | u when is_number_start u -> 2456 + let meta = read_json_number d in 2457 + map.dec meta (token_pop_float d ~meta) 2458 + | 0x006E (* n *) -> map.dec (read_json_null d) Float.nan 2459 + | _ -> type_error d t) 2460 + | String map -> ( 2461 + match d.u with 2462 + | 0x0022 (* DQUOTE *) -> 2463 + let meta = read_json_string d in 2464 + map.dec meta (token_pop d) 2465 + | _ -> type_error d t) 2466 + | Array map -> ( 2467 + match d.u with 2468 + | 0x005B (* [ *) -> decode_array d map 2469 + | _ -> type_error d t) 2470 + | Object map -> ( 2471 + match d.u with 2472 + | 0x007B (* { *) -> decode_object d map 2473 + | _ -> type_error d t) 2474 + | Map map -> map.dec (parse d map.dom) 2475 + | Any map -> decode_any d t map 2476 + | Rec t -> parse d (Lazy.force t) 2477 + | Ignore -> skip_json_value d 2478 + 2479 + and decode_array : type a elt b. decoder -> (a, elt, b) array_map -> a = 2480 + fun d map -> 2481 + let ws_before = ws_pop d in 2482 + let first_byte = last_byte_of d in 2483 + let first_line_num = d.line and first_line_byte = d.line_start in 2484 + let b, len = 2485 + match 2486 + nextc d; 2487 + read_ws d; 2488 + d.u 2489 + with 2490 + | 0x005D (* ] *) -> (map.dec_empty (), 0) 2491 + | _ -> ( 2492 + let b = ref (map.dec_empty ()) in 2493 + let i = ref 0 in 2494 + let next = ref true in 2495 + try 2496 + while !next do 2497 + begin 2498 + let first_byte = last_byte_of d in 2499 + let first_line_num = d.line and first_line_byte = d.line_start in 2500 + try 2501 + if map.dec_skip !i !b then parse d Codec.ignore 2502 + else b := map.dec_add !i (parse d map.elt) !b 2503 + with Error e -> 2504 + let imeta = 2505 + error_meta_to_current ~first_byte ~first_line_num 2506 + ~first_line_byte d 2507 + in 2508 + Codec.error_push_array (error_meta d) map (!i, imeta) e 2509 + end; 2510 + incr i; 2511 + match 2512 + read_ws d; 2513 + d.u 2514 + with 2515 + | 0x005D (* ] *) -> next := false 2516 + | 0x002C (* , *) -> 2517 + nextc d; 2518 + read_ws d 2519 + | u when u = eot -> err_unclosed_array d 2520 + | fnd -> err_exp_comma_or_eoa d ~fnd 2521 + done; 2522 + (!b, !i) 2523 + with Error e -> 2524 + Error.adjust_context ~first_byte ~first_line_num ~first_line_byte e) 2525 + in 2526 + let textloc = 2527 + textloc_to_current d ~first_byte ~first_line_num ~first_line_byte 2528 + in 2529 + let ws_after = 2530 + nextc d; 2531 + read_ws d; 2532 + ws_pop d 2533 + in 2534 + let meta = meta_make d ~ws_before ~ws_after textloc in 2535 + map.dec_finish meta len b 2536 + 2537 + and decode_object : type a. decoder -> (a, a) object_map -> a = 2538 + fun d map -> 2539 + let ws_before = ws_pop d in 2540 + let first_byte = last_byte_of d in 2541 + let first_line_num = d.line and first_line_byte = d.line_start in 2542 + let dict = 2543 + try 2544 + nextc d; 2545 + read_ws d; 2546 + decode_object_map d map (Unknown_mems None) String_map.empty 2547 + String_map.empty [] Dict.empty 2548 + with 2549 + | Error (ctx, meta, k) when Error.Context.is_empty ctx -> 2550 + let meta = 2551 + (* This is for when Codec.finish_object_decode raises. *) 2552 + if Loc.is_none (Meta.textloc meta) then 2553 + error_meta_to_current d ~first_byte ~first_line_num ~first_line_byte 2554 + else meta 2555 + in 2556 + Error.raise ctx meta k 2557 + | Error e -> 2558 + Error.adjust_context ~first_byte ~first_line_num ~first_line_byte e 2559 + in 2560 + let textloc = 2561 + textloc_to_current d ~first_byte ~first_line_num ~first_line_byte 2562 + in 2563 + let ws_after = 2564 + nextc d; 2565 + read_ws d; 2566 + ws_pop d 2567 + in 2568 + let meta = meta_make d ~ws_before ~ws_after textloc in 2569 + let dict = Dict.add Codec.object_meta_arg meta dict in 2570 + Codec.apply_dict map.dec dict 2571 + 2572 + and decode_object_delayed : type o. 2573 + decoder -> 2574 + (o, o) object_map -> 2575 + mem_dec String_map.t -> 2576 + mem_dec String_map.t -> 2577 + object' -> 2578 + Dict.t -> 2579 + mem_dec String_map.t * object' * Dict.t = 2580 + fun d map mem_miss mem_decs delay dict -> 2581 + let rec loop d map mem_miss mem_decs rem_delay dict = function 2582 + | [] -> (mem_miss, rem_delay, dict) 2583 + | ((((name, _meta) as nm), v) as mem) :: delay -> ( 2584 + match String_map.find_opt name mem_decs with 2585 + | None -> loop d map mem_miss mem_decs (mem :: rem_delay) dict delay 2586 + | Some (Mem_dec m) -> 2587 + let dict = 2588 + try 2589 + let t = m.type' in 2590 + let v = 2591 + match Codec.decode t v with 2592 + | Ok v -> v 2593 + | Error e -> raise_notrace (Error e) 2594 + in 2595 + Dict.add m.id v dict 2596 + with Error e -> Codec.error_push_object (error_meta d) map nm e 2597 + in 2598 + let mem_miss = String_map.remove name mem_miss in 2599 + loop d map mem_miss mem_decs rem_delay dict delay) 2600 + in 2601 + loop d map mem_miss mem_decs [] dict delay 2602 + 2603 + and decode_object_map : type o. 2604 + decoder -> 2605 + (o, o) object_map -> 2606 + unknown_mems_option -> 2607 + mem_dec String_map.t -> 2608 + mem_dec String_map.t -> 2609 + object' -> 2610 + Dict.t -> 2611 + Dict.t = 2612 + fun d map umems mem_miss mem_decs delay dict -> 2613 + let u _ _ _ = assert false in 2614 + let mem_miss = String_map.union u mem_miss map.mem_decs in 2615 + let mem_decs = String_map.union u mem_decs map.mem_decs in 2616 + match map.shape with 2617 + | Object_cases (umems', cases) -> 2618 + let umems' = Unknown_mems umems' in 2619 + let umems, dict = Codec.override_unknown_mems ~by:umems umems' dict in 2620 + decode_object_case d map umems cases mem_miss mem_decs delay dict 2621 + | Object_basic umems' -> ( 2622 + let mem_miss, delay, dict = 2623 + decode_object_delayed d map mem_miss mem_decs delay dict 2624 + in 2625 + let umems' = Unknown_mems (Some umems') in 2626 + let umems, dict = Codec.override_unknown_mems ~by:umems umems' dict in 2627 + match umems with 2628 + | Unknown_mems (Some Unknown_skip | None) -> 2629 + decode_object_basic d map Unknown_skip () mem_miss mem_decs dict 2630 + | Unknown_mems (Some (Unknown_error as u)) -> 2631 + if delay = [] then 2632 + decode_object_basic d map u () mem_miss mem_decs dict 2633 + else 2634 + let fnd = List.map fst delay in 2635 + Codec.unexpected_mems_error (error_meta d) map ~fnd 2636 + | Unknown_mems (Some (Unknown_keep (umap, _) as u)) -> 2637 + let add_delay umems (((n, meta) as nm), v) = 2638 + try 2639 + let t = umap.mems_type in 2640 + let v = 2641 + match Codec.decode t v with 2642 + | Ok v -> v 2643 + | Error e -> raise_notrace (Error e) 2644 + in 2645 + umap.dec_add meta n v umems 2646 + with Error e -> Codec.error_push_object (error_meta d) map nm e 2647 + in 2648 + let umems = List.fold_left add_delay (umap.dec_empty ()) delay in 2649 + decode_object_basic d map u umems mem_miss mem_decs dict) 2650 + 2651 + and decode_object_basic : type o p mems builder. 2652 + decoder -> 2653 + (o, o) object_map -> 2654 + (p, mems, builder) unknown_mems -> 2655 + builder -> 2656 + mem_dec String_map.t -> 2657 + mem_dec String_map.t -> 2658 + Dict.t -> 2659 + Dict.t = 2660 + fun d map u umap mem_miss mem_decs dict -> 2661 + match d.u with 2662 + | 0x007D (* } *) -> 2663 + let meta = 2664 + d.meta_none 2665 + (* we add a correct one in decode_object *) 2666 + in 2667 + Codec.finish_object_decode map meta u umap mem_miss dict 2668 + | 0x0022 -> 2669 + let meta = read_json_name d in 2670 + (* Fast path: byte-compare the token buffer against [mem_decs] 2671 + keys without allocating. Only materialise the name as a 2672 + string if no match was found (for Unknown_keep paths and 2673 + error messages). *) 2674 + begin match mem_by_token d mem_decs with 2675 + | Some (Mem_dec mem, name) -> 2676 + token_clear d; 2677 + let mem_miss = String_map.remove name mem_miss in 2678 + let dict = 2679 + try Dict.add mem.id (parse d mem.type') dict 2680 + with Error e -> 2681 + Codec.error_push_object (error_meta d) map (name, meta) e 2682 + in 2683 + read_json_mem_sep d; 2684 + decode_object_basic d map u umap mem_miss mem_decs dict 2685 + | None -> ( 2686 + match u with 2687 + | Unknown_skip -> 2688 + (* The name is never read, so we don't need to allocate it. *) 2689 + token_clear d; 2690 + let () = 2691 + try parse d Codec.ignore 2692 + with Error e -> 2693 + Codec.error_push_object (error_meta d) map 2694 + (token_pop d, meta) 2695 + e 2696 + in 2697 + read_json_mem_sep d; 2698 + decode_object_basic d map u umap mem_miss mem_decs dict 2699 + | Unknown_error -> 2700 + let name = token_pop d in 2701 + let fnd = [ (name, meta) ] in 2702 + Codec.unexpected_mems_error (error_meta d) map ~fnd 2703 + | Unknown_keep (umap', _) -> 2704 + let name = token_pop d in 2705 + let umap = 2706 + try umap'.dec_add meta name (parse d umap'.mems_type) umap 2707 + with Error e -> 2708 + Codec.error_push_object (error_meta d) map (name, meta) e 2709 + in 2710 + read_json_mem_sep d; 2711 + decode_object_basic d map u umap mem_miss mem_decs dict) 2712 + end 2713 + | u when u = eot -> err_unclosed_object d map 2714 + | _ -> err_exp_mem_or_eoo d 2715 + 2716 + and decode_object_case : type o cases tag. 2717 + decoder -> 2718 + (o, o) object_map -> 2719 + unknown_mems_option -> 2720 + (o, cases, tag) object_cases -> 2721 + mem_dec String_map.t -> 2722 + mem_dec String_map.t -> 2723 + object' -> 2724 + Dict.t -> 2725 + Dict.t = 2726 + fun d map umems cases mem_miss mem_decs delay dict -> 2727 + let decode_case_tag ~sep map umems cases mem_miss mem_decs nmeta tag delay = 2728 + let eq_tag (Case c) = cases.tag_compare c.tag tag = 0 in 2729 + match List.find_opt eq_tag cases.cases with 2730 + | None -> ( 2731 + try Codec.unexpected_case_tag_error (error_meta d) map cases tag 2732 + with Error e -> 2733 + Codec.error_push_object (error_meta d) map (cases.tag.name, nmeta) e) 2734 + | Some (Case case) -> 2735 + if sep then read_json_mem_sep d; 2736 + let dict = 2737 + decode_object_map d case.object_map umems mem_miss mem_decs delay dict 2738 + in 2739 + Dict.add cases.id (case.dec (apply_dict case.object_map.dec dict)) dict 2740 + in 2741 + match d.u with 2742 + | 0x007D (* } *) -> ( 2743 + match cases.tag.dec_absent with 2744 + | Some tag -> 2745 + decode_case_tag ~sep:false map umems cases mem_miss mem_decs 2746 + d.meta_none tag delay 2747 + | None -> 2748 + let fnd = List.map (fun ((n, _), _) -> n) delay in 2749 + let exp = String_map.singleton cases.tag.name (Mem_dec cases.tag) in 2750 + Codec.missing_mems_error (error_meta d) map ~exp ~fnd) 2751 + | 0x0022 -> 2752 + let meta = read_json_name d in 2753 + let name = token_pop d in 2754 + if String.equal name cases.tag.name then 2755 + let tag = 2756 + try parse d cases.tag.type' 2757 + with Error e -> 2758 + Codec.error_push_object (error_meta d) map (name, meta) e 2759 + in 2760 + decode_case_tag ~sep:true map umems cases mem_miss mem_decs meta tag 2761 + delay 2762 + else 2763 + begin match String_map.find_opt name mem_decs with 2764 + | Some (Mem_dec mem) -> 2765 + let mem_miss = String_map.remove name mem_miss in 2766 + let dict = 2767 + try Dict.add mem.id (parse d mem.type') dict 2768 + with Error e -> 2769 + Codec.error_push_object (error_meta d) map (name, meta) e 2770 + in 2771 + read_json_mem_sep d; 2772 + decode_object_case d map umems cases mem_miss mem_decs delay dict 2773 + | None -> 2774 + (* Because JSON can be out of order we don't know how to decode 2775 + this yet. Generic decode *) 2776 + let v = 2777 + try parse d Codec.Value.t 2778 + with Error e -> 2779 + Codec.error_push_object (error_meta d) map (name, meta) e 2780 + in 2781 + let delay = ((name, meta), v) :: delay in 2782 + read_json_mem_sep d; 2783 + decode_object_case d map umems cases mem_miss mem_decs delay dict 2784 + end 2785 + | u when u = eot -> err_unclosed_object d map 2786 + | _ -> err_exp_mem_or_eoo d 2787 + 2788 + and decode_any : type a. decoder -> a t -> a any_map -> a = 2789 + fun d t map -> 2790 + let case d t map = 2791 + match map with None -> type_error d t | Some t -> parse d t 2792 + in 2793 + match d.u with 2794 + | 0x006E (* n *) -> case d t map.dec_null 2795 + | 0x0066 (* f *) | 0x0074 (* t *) -> case d t map.dec_bool 2796 + | 0x0022 (* DQUOTE *) -> case d t map.dec_string 2797 + | 0x005B (* [ *) -> case d t map.dec_array 2798 + | 0x007B (* { *) -> case d t map.dec_object 2799 + | u when is_number_start u -> case d t map.dec_number 2800 + | _ -> err_not_json_value d 2801 + 2802 + let of_reader_exn ?layout ?locs ?file t reader = 2803 + let d = decoder ?layout ?locs ?file reader in 2804 + let v = 2805 + nextc d; 2806 + parse d t 2807 + in 2808 + if d.u <> eot then err_exp_eot d else v 2809 + 2810 + let of_reader ?layout ?locs ?file t reader = 2811 + try Ok (of_reader_exn ?layout ?locs ?file t reader) with Error e -> Error e 2812 + 2813 + let of_string_exn ?layout ?locs ?file t s = 2814 + of_reader_exn ?layout ?locs ?file t (Bytes.Reader.of_string s) 2815 + 2816 + let of_string ?layout ?locs ?file t s = 2817 + of_reader ?layout ?locs ?file t (Bytes.Reader.of_string s) 2818 + 2819 + (* Encoding *) 2820 + 2821 + type encoder = { 2822 + writer : Bytes.Writer.t; (* Destination of bytes. *) 2823 + o : Bytes.t; (* Buffer for slices. *) 2824 + o_max : int; (* Max index in [o]. *) 2825 + mutable o_next : int; (* Next writable index in [o]. *) 2826 + format : format; 2827 + number_format : string; 2828 + } 2829 + 2830 + let encoder ?buf ?(format = Minify) ?(number_format = default_number_format) 2831 + writer = 2832 + let o = 2833 + match buf with 2834 + | Some buf -> buf 2835 + | None -> Bytes.create (Bytes.Writer.slice_length writer) 2836 + in 2837 + let len = Bytes.length o in 2838 + let number_format = string_of_format number_format in 2839 + let o_max = len - 1 and o_next = 0 in 2840 + { writer; o; o_max; o_next; format; number_format } 2841 + 2842 + let[@inline] rem_len e = e.o_max - e.o_next + 1 2843 + 2844 + let flush e = 2845 + Bytes.Writer.write e.writer (Bytes.Slice.make e.o ~first:0 ~length:e.o_next); 2846 + e.o_next <- 0 2847 + 2848 + let write_eot ~eod e = 2849 + flush e; 2850 + if eod then Bytes.Writer.write_eod e.writer 2851 + 2852 + let write_char e c = 2853 + if e.o_next > e.o_max then flush e; 2854 + Stdlib.Bytes.set e.o e.o_next c; 2855 + e.o_next <- e.o_next + 1 2856 + 2857 + let rec write_substring e s first length = 2858 + if length = 0 then () 2859 + else 2860 + let len = Int.min (rem_len e) length in 2861 + if len = 0 then ( 2862 + flush e; 2863 + write_substring e s first length) 2864 + else begin 2865 + Bytes.blit_string s first e.o e.o_next len; 2866 + e.o_next <- e.o_next + len; 2867 + write_substring e s (first + len) (length - len) 2868 + end 2869 + 2870 + let write_bytes e s = write_substring e s 0 (String.length s) 2871 + let write_sep e = write_char e ',' 2872 + 2873 + let write_indent e ~nest = 2874 + for _i = 1 to nest do 2875 + write_char e ' '; 2876 + write_char e ' ' 2877 + done 2878 + 2879 + let write_ws_before e m = write_bytes e (Meta.ws_before m) 2880 + let write_ws_after e m = write_bytes e (Meta.ws_after m) 2881 + let write_json_null e = write_bytes e "null" 2882 + let write_json_bool e b = write_bytes e (if b then "true" else "false") 2883 + 2884 + (* XXX we bypass the printf machinery as it costs quite quite a bit. 2885 + Would be even better if we could format directly to a bytes values 2886 + rather than allocating a string per number. *) 2887 + external format_float : string -> float -> string = "caml_format_float" 2888 + 2889 + let write_json_number e f = 2890 + if Float.is_finite f then write_bytes e (format_float e.number_format f) 2891 + else write_json_null e 2892 + 2893 + let write_json_string e s = 2894 + let is_control = function '\x00' .. '\x1F' | '\x7F' -> true | _ -> false in 2895 + let len = String.length s in 2896 + let flush e start i max = 2897 + if start <= max then write_substring e s start (i - start) 2898 + in 2899 + let rec loop start i max = 2900 + if i > max then flush e start i max 2901 + else 2902 + let next = i + 1 in 2903 + match String.get s i with 2904 + | '\"' -> 2905 + flush e start i max; 2906 + write_bytes e "\\\""; 2907 + loop next next max 2908 + | '\\' -> 2909 + flush e start i max; 2910 + write_bytes e "\\\\"; 2911 + loop next next max 2912 + | '\n' -> 2913 + flush e start i max; 2914 + write_bytes e "\\n"; 2915 + loop next next max 2916 + | '\r' -> 2917 + flush e start i max; 2918 + write_bytes e "\\r"; 2919 + loop next next max 2920 + | '\t' -> 2921 + flush e start i max; 2922 + write_bytes e "\\t"; 2923 + loop next next max 2924 + | c when is_control c -> 2925 + flush e start i max; 2926 + write_bytes e "\\u"; 2927 + write_bytes e (Fmt.str "%04X" (Char.code c)); 2928 + loop next next max 2929 + | _ -> loop start next max 2930 + in 2931 + write_char e '"'; 2932 + loop 0 0 (len - 1); 2933 + write_char e '"' 2934 + 2935 + let encode_null (map : ('a, 'b) Codec.base_map) e v = 2936 + let () = map.enc v in 2937 + match e.format with 2938 + | Minify | Indent -> write_json_null e 2939 + | Layout -> 2940 + let meta = map.enc_meta v in 2941 + write_ws_before e meta; 2942 + write_json_null e; 2943 + write_ws_after e meta 2944 + 2945 + let encode_bool (map : ('a, 'b) Codec.base_map) e v = 2946 + let b = map.enc v in 2947 + match e.format with 2948 + | Minify | Indent -> write_json_bool e b 2949 + | Layout -> 2950 + let meta = map.enc_meta v in 2951 + write_ws_before e meta; 2952 + write_json_bool e b; 2953 + write_ws_after e meta 2954 + 2955 + let encode_number (map : ('a, 'b) Codec.base_map) e v = 2956 + let n = map.enc v in 2957 + match e.format with 2958 + | Minify | Indent -> write_json_number e n 2959 + | Layout -> 2960 + let meta = map.enc_meta v in 2961 + write_ws_before e meta; 2962 + write_json_number e n; 2963 + write_ws_after e meta 2964 + 2965 + let encode_string (map : ('a, 'b) Codec.base_map) e v = 2966 + let s = map.enc v in 2967 + match e.format with 2968 + | Minify | Indent -> write_json_string e s 2969 + | Layout -> 2970 + let meta = map.enc_meta v in 2971 + write_ws_before e meta; 2972 + write_json_string e s; 2973 + write_ws_after e meta 2974 + 2975 + let encode_mem_indent ~nest e = 2976 + write_char e '\n'; 2977 + write_indent e ~nest 2978 + 2979 + let encode_mem_name e meta n = 2980 + match e.format with 2981 + | Minify -> 2982 + write_json_string e n; 2983 + write_char e ':' 2984 + | Indent -> 2985 + write_json_string e n; 2986 + write_bytes e ": " 2987 + | Layout -> 2988 + write_ws_before e meta; 2989 + write_json_string e n; 2990 + write_ws_after e meta; 2991 + write_char e ':' 2992 + 2993 + let rec write : type a. nest:int -> a Codec.t -> encoder -> a -> unit = 2994 + fun ~nest t e v -> 2995 + match t with 2996 + | Null map -> encode_null map e v 2997 + | Bool map -> encode_bool map e v 2998 + | Number map -> encode_number map e v 2999 + | String map -> encode_string map e v 3000 + | Array map -> encode_array ~nest map e v 3001 + | Object map -> encode_object ~nest map e v 3002 + | Any map -> write ~nest (map.enc v) e v 3003 + | Map map -> write ~nest map.dom e (map.enc v) 3004 + | Rec t -> write ~nest (Lazy.force t) e v 3005 + | Ignore -> Error.failf Meta.none "Cannot encode Ignore value" 3006 + 3007 + and encode_array : type a elt b. 3008 + nest:int -> (a, elt, b) Codec.array_map -> encoder -> a -> unit = 3009 + fun ~nest map e v -> 3010 + let encode_element ~nest map e i v = 3011 + if i <> 0 then write_sep e; 3012 + try 3013 + write ~nest map.elt e v; 3014 + e 3015 + with Error e -> Codec.error_push_array Meta.none map (i, Meta.none) e 3016 + in 3017 + match e.format with 3018 + | Minify -> 3019 + write_char e '['; 3020 + Stdlib.ignore (map.enc (encode_element ~nest:(nest + 1) map) e v); 3021 + write_char e ']' 3022 + | Layout -> 3023 + let meta = map.enc_meta v in 3024 + write_ws_before e meta; 3025 + write_char e '['; 3026 + Stdlib.ignore (map.enc (encode_element ~nest:(nest + 1) map) e v); 3027 + write_char e ']'; 3028 + write_ws_after e meta 3029 + | Indent -> 3030 + let encode_element ~nest map e i v = 3031 + if i <> 0 then write_sep e; 3032 + write_char e '\n'; 3033 + write_indent e ~nest; 3034 + try 3035 + write ~nest map.elt e v; 3036 + e 3037 + with Error e -> Codec.error_push_array Meta.none map (i, Meta.none) e 3038 + in 3039 + let array_not_empty e = 3040 + e.o_next = 0 || not (Bytes.get e.o (e.o_next - 1) = '[') 3041 + in 3042 + write_char e '['; 3043 + Stdlib.ignore (map.enc (encode_element ~nest:(nest + 1) map) e v); 3044 + if array_not_empty e then ( 3045 + write_char e '\n'; 3046 + write_indent e ~nest); 3047 + write_char e ']' 3048 + 3049 + and encode_object : type o. 3050 + nest:int -> (o, o) Codec.object_map -> encoder -> o -> unit = 3051 + fun ~nest map e o -> 3052 + match e.format with 3053 + | Minify -> 3054 + write_char e '{'; 3055 + Stdlib.ignore 3056 + (encode_object_map ~nest:(nest + 1) map ~do_unknown:true e ~start:true o); 3057 + write_char e '}' 3058 + | Layout -> 3059 + let meta = map.enc_meta o in 3060 + write_ws_before e meta; 3061 + write_char e '{'; 3062 + Stdlib.ignore 3063 + (encode_object_map ~nest:(nest + 1) map ~do_unknown:true e ~start:true o); 3064 + write_char e '}'; 3065 + write_ws_after e meta 3066 + | Indent -> 3067 + write_char e '{'; 3068 + let start = 3069 + encode_object_map ~nest:(nest + 1) map ~do_unknown:true e ~start:true o 3070 + in 3071 + if not start then ( 3072 + write_char e '\n'; 3073 + write_indent e ~nest); 3074 + write_char e '}' 3075 + 3076 + and encode_object_map : type o. 3077 + nest:int -> 3078 + (o, o) Codec.object_map -> 3079 + do_unknown:bool -> 3080 + encoder -> 3081 + start:bool -> 3082 + o -> 3083 + bool = 3084 + fun ~nest map ~do_unknown e ~start o -> 3085 + let encode_mem ~nest map e o start (Mem_enc mmap) = 3086 + try 3087 + let v = mmap.enc o in 3088 + if mmap.enc_omit v then start 3089 + else begin 3090 + if not start then write_char e ','; 3091 + if e.format = Indent then encode_mem_indent ~nest e; 3092 + let meta = 3093 + (* if e.format = Layout then mmap.enc_name_meta v else *) 3094 + Meta.none 3095 + in 3096 + encode_mem_name e meta mmap.name; 3097 + write ~nest mmap.type' e v; 3098 + false 3099 + end 3100 + with Error e -> 3101 + Codec.error_push_object Meta.none map (mmap.name, Meta.none) e 3102 + in 3103 + match map.shape with 3104 + | Object_basic u -> 3105 + let start = 3106 + List.fold_left (encode_mem ~nest map e o) start map.mem_encs 3107 + in 3108 + begin match u with 3109 + | Unknown_keep (umap, enc) when do_unknown -> 3110 + encode_unknown_mems ~nest map umap e ~start (enc o) 3111 + | _ -> start 3112 + end 3113 + | Object_cases (umap, cases) -> ( 3114 + let (Case_value (case, c)) = cases.enc_case (cases.enc o) in 3115 + let start = 3116 + if cases.tag.enc_omit case.tag then start 3117 + else encode_mem ~nest map e case.tag start (Mem_enc cases.tag) 3118 + in 3119 + let start = 3120 + List.fold_left (encode_mem ~nest map e o) start map.mem_encs 3121 + in 3122 + match umap with 3123 + | Some (Unknown_keep (umap, enc)) -> 3124 + let start = 3125 + encode_object_map ~nest case.object_map ~do_unknown:false e ~start c 3126 + in 3127 + encode_unknown_mems ~nest map umap e ~start (enc o) 3128 + | _ -> encode_object_map ~nest case.object_map ~do_unknown e ~start c) 3129 + 3130 + and encode_unknown_mems : type o mems a builder. 3131 + nest:int -> 3132 + (o, o) object_map -> 3133 + (mems, a, builder) mems_map -> 3134 + encoder -> 3135 + start:bool -> 3136 + mems -> 3137 + bool = 3138 + fun ~nest map umap e ~start mems -> 3139 + let encode_unknown_mem ~nest map umap e meta n v start = 3140 + try 3141 + if not start then write_char e ','; 3142 + if e.format = Indent then encode_mem_indent ~nest e; 3143 + encode_mem_name e meta n; 3144 + write ~nest umap.mems_type e v; 3145 + false 3146 + with Error e -> Codec.error_push_object Meta.none map (n, Meta.none) e 3147 + in 3148 + umap.enc (encode_unknown_mem ~nest map umap e) mems start 3149 + 3150 + let to_writer_exn ?buf ?format ?number_format t v ~eod w = 3151 + let e = encoder ?buf ?format ?number_format w in 3152 + write ~nest:0 t e v; 3153 + write_eot ~eod e 3154 + 3155 + let to_writer ?buf ?format ?number_format t v ~eod w = 3156 + try Ok (to_writer_exn ?buf ?format ?number_format t v ~eod w) 3157 + with Error e -> Error e 3158 + 3159 + let to_string_exn ?buf ?format ?number_format t v = 3160 + let b = Buffer.create 255 in 3161 + let w = Bytes.Writer.of_buffer b in 3162 + to_writer_exn ?buf ?format ?number_format ~eod:true t v w; 3163 + Buffer.contents b 3164 + 3165 + let to_string ?buf ?format ?number_format t v = 3166 + try Ok (to_string_exn ?buf ?format ?number_format t v) 3167 + with Error e -> Error e 3168 + 3169 + (* Recode *) 3170 + 3171 + let unsurprising_defaults layout format = 3172 + match (layout, format) with 3173 + | Some true, None -> (Some true, Some Layout) 3174 + | None, (Some Layout as l) -> (Some true, l) 3175 + | l, f -> (l, f) 3176 + 3177 + let recode_exn ?layout ?locs ?file ?buf ?format ?number_format t r w ~eod = 3178 + let layout, format = unsurprising_defaults layout format in 3179 + let v = of_reader_exn ?layout ?locs ?file t r in 3180 + to_writer_exn ?buf ?format ?number_format t v ~eod w 3181 + 3182 + let recode ?layout ?locs ?file ?buf ?format ?number_format t r w ~eod = 3183 + try Ok (recode_exn ?layout ?locs ?file ?buf ?format ?number_format t r w ~eod) 3184 + with Error e -> Error e 3185 + 3186 + let recode_string_exn ?layout ?locs ?file ?buf ?format ?number_format t s = 3187 + let layout, format = unsurprising_defaults layout format in 3188 + let v = of_string_exn ?layout ?locs ?file t s in 3189 + to_string_exn ?buf ?format ?number_format t v 3190 + 3191 + let recode_string ?layout ?locs ?file ?buf ?format ?number_format t s = 3192 + try Ok (recode_string_exn ?layout ?locs ?file ?buf ?format ?number_format t s) 3193 + with Error e -> Error e
+186
lib/json.mli
··· 1521 1521 module Tape = Tape 1522 1522 (** Simdjson-compatible tape format. A columnar representation of a JSON value 1523 1523 laid out for random access by word index. *) 1524 + (*--------------------------------------------------------------------------- 1525 + Copyright (c) 2024 The jsont programmers. All rights reserved. 1526 + SPDX-License-Identifier: ISC 1527 + ---------------------------------------------------------------------------*) 1528 + 1529 + (** JSON codec. 1530 + 1531 + According to {{:https://www.rfc-editor.org/rfc/rfc8259}RFC 8259}. 1532 + 1533 + See notes about {{!layout}layout preservation} and behaviour on 1534 + {{!duplicate}duplicate members}. 1535 + 1536 + {b Tip.} For maximal performance decode with [~layout:false] and 1537 + [~locs:false], this is the default. However using [~locs:true] improves some 1538 + error reports. *) 1539 + 1540 + open Bytesrw 1541 + 1542 + (** {1:decode Decode} *) 1543 + 1544 + val of_reader : 1545 + ?layout:bool -> 1546 + ?locs:bool -> 1547 + ?file:Loc.fpath -> 1548 + 'a codec -> 1549 + Bytes.Reader.t -> 1550 + ('a, Error.t) result 1551 + (** [of_reader t r] decodes a value from [r] according to [t]. 1552 + - If [layout] is [true] whitespace is preserved in {!Meta.t} values. 1553 + Defaults to [false]. 1554 + - If [locs] is [true] locations are preserved in {!Meta.t} values and error 1555 + messages are precisely located. Defaults to [false]. 1556 + - [file] is the file path from which [r] is assumed to read. Defaults to 1557 + {!Loc.file_none}. *) 1558 + 1559 + val of_reader_exn : 1560 + ?layout:bool -> 1561 + ?locs:bool -> 1562 + ?file:Loc.fpath -> 1563 + 'a codec -> 1564 + Bytes.Reader.t -> 1565 + 'a 1566 + (** [of_reader_exn] is like {!val-of_reader} but raises {!Json.exception-Error}. 1567 + *) 1568 + 1569 + val of_string : 1570 + ?layout:bool -> 1571 + ?locs:bool -> 1572 + ?file:Loc.fpath -> 1573 + 'a codec -> 1574 + string -> 1575 + ('a, Error.t) result 1576 + (** [of_string] is like {!val-of_reader} but decodes directly from a string. *) 1577 + 1578 + val of_string_exn : 1579 + ?layout:bool -> ?locs:bool -> ?file:Loc.fpath -> 'a codec -> string -> 'a 1580 + (** [of_string_exn] is like {!val-of_string} but raises {!Json.exception-Error}. 1581 + *) 1582 + 1583 + (** {1:encode Encode} *) 1584 + 1585 + val to_writer : 1586 + ?buf:Bytes.t -> 1587 + ?format:format -> 1588 + ?number_format:number_format -> 1589 + 'a codec -> 1590 + 'a -> 1591 + eod:bool -> 1592 + Bytes.Writer.t -> 1593 + (unit, Error.t) result 1594 + (** [to_writer t v ~eod w] encodes value [v] according to [t] on [w]. 1595 + - If [buf] is specified it is used as a buffer for the slices written on 1596 + [w]. Defaults to a buffer of length {!Bytes.Writer.slice_length}[ w]. 1597 + - [format] specifies how the JSON should be formatted. Defaults to 1598 + {!Minify}. 1599 + - [number_format] specifies the format string to format numbers. Defaults to 1600 + {!default_number_format}. 1601 + - [eod] indicates whether {!Bytesrw.Bytes.Slice.eod} should be written on 1602 + [w]. *) 1603 + 1604 + val to_writer_exn : 1605 + ?buf:Bytes.t -> 1606 + ?format:format -> 1607 + ?number_format:number_format -> 1608 + 'a codec -> 1609 + 'a -> 1610 + eod:bool -> 1611 + Bytes.Writer.t -> 1612 + unit 1613 + (** [to_writer_exn] is like {!val-to_writer} but raises {!Json.exception-Error}. 1614 + *) 1615 + 1616 + val to_string : 1617 + ?buf:Bytes.t -> 1618 + ?format:format -> 1619 + ?number_format:number_format -> 1620 + 'a codec -> 1621 + 'a -> 1622 + (string, Error.t) result 1623 + (** [to_string] is like {!val-to_writer} but writes to a string. *) 1624 + 1625 + val to_string_exn : 1626 + ?buf:Bytes.t -> 1627 + ?format:format -> 1628 + ?number_format:number_format -> 1629 + 'a codec -> 1630 + 'a -> 1631 + string 1632 + (** [to_string_exn] is like {!val-to_string} but raises {!Json.exception-Error}. 1633 + *) 1634 + 1635 + (** {1:recode Recode} 1636 + 1637 + The defaults in these functions are those of {!val-of_reader} and 1638 + {!val-to_writer}, except if [layout] is [true], [format] defaults to 1639 + [Layout] and vice-versa. *) 1640 + 1641 + val recode : 1642 + ?layout:bool -> 1643 + ?locs:bool -> 1644 + ?file:Loc.fpath -> 1645 + ?buf:Bytes.t -> 1646 + ?format:format -> 1647 + ?number_format:number_format -> 1648 + 'a codec -> 1649 + Bytes.Reader.t -> 1650 + Bytes.Writer.t -> 1651 + eod:bool -> 1652 + (unit, Error.t) result 1653 + (** [recode] is {!val-of_reader} followed by {!val-to_writer}. *) 1654 + 1655 + val recode_exn : 1656 + ?layout:bool -> 1657 + ?locs:bool -> 1658 + ?file:Loc.fpath -> 1659 + ?buf:Bytes.t -> 1660 + ?format:format -> 1661 + ?number_format:number_format -> 1662 + 'a codec -> 1663 + Bytes.Reader.t -> 1664 + Bytes.Writer.t -> 1665 + eod:bool -> 1666 + unit 1667 + (** [recode_exn] is like {!val-recode} but raises {!Json.exception-Error}. *) 1668 + 1669 + val recode_string : 1670 + ?layout:bool -> 1671 + ?locs:bool -> 1672 + ?file:Loc.fpath -> 1673 + ?buf:Bytes.t -> 1674 + ?format:format -> 1675 + ?number_format:number_format -> 1676 + 'a codec -> 1677 + string -> 1678 + (string, Error.t) result 1679 + (** [recode_string] is {!of_string} followed by {!to_string}. *) 1680 + 1681 + val recode_string_exn : 1682 + ?layout:bool -> 1683 + ?locs:bool -> 1684 + ?file:Loc.fpath -> 1685 + ?buf:Bytes.t -> 1686 + ?format:format -> 1687 + ?number_format:number_format -> 1688 + 'a codec -> 1689 + string -> 1690 + string 1691 + (** [recode_string_exn] is like {!val-recode_string} but raises 1692 + {!Json.exception-Error}. *) 1693 + 1694 + (** {1:layout Layout preservation} 1695 + 1696 + In order to simplify the implementation not all layout is preserved. In 1697 + particular: 1698 + - White space in empty arrays and objects is dropped. 1699 + - Unicode escapes are replaced by their UTF-8 encoding. 1700 + - The format of numbers is not preserved. *) 1701 + 1702 + (** {1:duplicate Duplicate object members} 1703 + 1704 + Duplicate object members are undefined behaviour in JSON. We follow the 1705 + behaviour of 1706 + {{:https://262.ecma-international.org/6.0/#sec-internalizejsonproperty} 1707 + [JSON.parse]} and the last one takes over, however duplicate members all 1708 + have to parse with the specified type as we error as soon as possible. Also 1709 + {{!Json.Object.case_mem}case members} are not allowed to duplicate. *)
+1 -1
test/bytesrw/dune
··· 1 1 (test 2 2 (name test) 3 - (libraries json json.bytesrw alcotest)) 3 + (libraries json alcotest))
+4 -4
test/bytesrw/test_json_bytesrw.ml
··· 2 2 streaming I/O surface. *) 3 3 4 4 let test_decode_primitive () = 5 - match Json_bytesrw.of_string Json.Codec.int "42" with 5 + match Json.of_string Json.Codec.int "42" with 6 6 | Error e -> Alcotest.failf "decode failed: %a" Json.Error.pp e 7 7 | Ok n -> Alcotest.(check int) "42 round-trip" 42 n 8 8 9 9 let test_encode_primitive () = 10 - match Json_bytesrw.to_string Json.Codec.int 7 with 10 + match Json.to_string Json.Codec.int 7 with 11 11 | Error e -> Alcotest.failf "encode failed: %a" Json.Error.pp e 12 12 | Ok s -> Alcotest.(check string) "7 encoded" "7" s 13 13 ··· 20 20 |> finish 21 21 in 22 22 let input = {|{"a": 7, "b": "hi"}|} in 23 - match Json_bytesrw.of_string pair_codec input with 23 + match Json.of_string pair_codec input with 24 24 | Error e -> Alcotest.failf "decode failed: %a" Json.Error.pp e 25 25 | Ok (a, b) -> 26 26 Alcotest.(check int) "a" 7 a; 27 27 Alcotest.(check string) "b" "hi" b 28 28 29 29 let test_decode_error () = 30 - match Json_bytesrw.of_string Json.Codec.int "not json" with 30 + match Json.of_string Json.Codec.int "not json" with 31 31 | Ok _ -> Alcotest.fail "expected decode error" 32 32 | Error _ -> () 33 33
+1 -1
test/dune
··· 1 1 (test 2 2 (name test) 3 - (libraries json json.bytesrw alcotest)) 3 + (libraries json alcotest))
+8 -8
test/test_json.ml
··· 10 10 content level -- never at structural level. Crowbar generates 11 11 random inputs and asserts the invariant. *) 12 12 13 - let decode_ignore s = Json_bytesrw.of_string Json.Codec.ignore s 14 - let decode_dom s = Json_bytesrw.of_string Json.Codec.Value.t s 13 + let decode_ignore s = Json.of_string Json.Codec.ignore s 14 + let decode_dom s = Json.of_string Json.Codec.Value.t s 15 15 let is_ok = function Ok _ -> true | Error _ -> false 16 16 17 17 (* -- Positive cases: Json.Codec.ignore must accept all valid JSON -- *) ··· 100 100 | Ok _, Ok _ -> () 101 101 | Error e, Ok _ -> 102 102 Alcotest.failf 103 - "Json.Codec.ignore rejected but Json.Codec.Value.t accepted %s: %s" name 104 - e 103 + "Json.Codec.ignore rejected but Json.Codec.Value.t accepted %s: %a" name 104 + Json.Error.pp e 105 105 | Ok _, Error e -> 106 106 Alcotest.failf 107 107 "Json.Codec.ignore accepted but Json.Codec.Value.t rejected %s \ 108 - (content permissiveness): %s" 109 - name e 108 + (content permissiveness): %a" 109 + name Json.Error.pp e 110 110 | Error _, Error _ -> () 111 111 112 112 let differential_cases = ··· 180 180 | Some s -> ( 181 181 (match decode_ignore s with 182 182 | Ok () -> () 183 - | Error e -> Alcotest.failf "ignore rejected corpus %s: %s" name e); 183 + | Error e -> Alcotest.failf "ignore rejected corpus %s: %s" name (Json.Error.to_string e)); 184 184 match decode_dom s with 185 185 | Ok _ -> () 186 - | Error e -> Alcotest.failf "json rejected corpus %s: %s" name e) 186 + | Error e -> Alcotest.failf "json rejected corpus %s: %s" name (Json.Error.to_string e)) 187 187 188 188 (* -- Entry point -- *) 189 189