Declarative JSON data manipulation for OCaml
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

ocaml-json: swap Buffer for custom tokbuf + byte-based member lookup

Two changes:

1. Replace [Buffer.t] for the decoder's token/whitespace accumulators
with a minimal [tokbuf] record {mutable bytes; mutable len}. Same
semantics but exposes the raw bytes for zero-allocation content
checks.

2. Add [find_mem_by_token] which iterates the [mem_decs] String_map
comparing each key byte-for-byte against the accumulated name
without materialising a string. Used in [decode_object_basic]: on
hit, [String_map.remove] uses the interned key; on Unknown_skip
(common case for field-access codecs with unknown members), the
name is never allocated; only Unknown_keep and Unknown_error
paths still call [token_pop].

Bench: field geomean 483 MB/s (from 478), DOM 173 MB/s. Modest gain;
the bigger wins will come from tightening [read_json_name]'s allocation
footprint or adding SIMD-style name scanning.

+87 -14
+87 -14
lib/bytesrw/json_bytesrw.ml
··· 55 55 ignore (Bytes.set_utf_8_uchar b 0 u); 56 56 Bytes.unsafe_to_string b) 57 57 58 + (* A simple growable byte buffer used for token and whitespace 59 + accumulation. Raw [Bytes.t] access lets us compare buffer content 60 + against candidate keys without allocating an intermediate string. *) 61 + type tokbuf = { 62 + mutable bytes : Stdlib.Bytes.t; 63 + mutable len : int; 64 + } 65 + 66 + let tokbuf_create n = { bytes = Stdlib.Bytes.create n; len = 0 } 67 + 68 + let[@inline] tokbuf_clear t = t.len <- 0 69 + 70 + let[@inline] tokbuf_ensure t need = 71 + let cap = Stdlib.Bytes.length t.bytes in 72 + if t.len + need > cap then 73 + let new_cap = max (cap * 2) (t.len + need) in 74 + let b = Stdlib.Bytes.create new_cap in 75 + Stdlib.Bytes.blit t.bytes 0 b 0 t.len; 76 + t.bytes <- b 77 + 78 + let[@inline] tokbuf_add_char t c = 79 + tokbuf_ensure t 1; 80 + Stdlib.Bytes.unsafe_set t.bytes t.len c; 81 + t.len <- t.len + 1 82 + 83 + let[@inline] tokbuf_add_utf_8_uchar t u = 84 + let n = Uchar.utf_8_byte_length u in 85 + tokbuf_ensure t n; 86 + ignore (Stdlib.Bytes.set_utf_8_uchar t.bytes t.len u : int); 87 + t.len <- t.len + n 88 + 89 + let[@inline] tokbuf_contents t = Stdlib.Bytes.sub_string t.bytes 0 t.len 90 + 91 + (* Byte-compare buffer content to a string without allocating. *) 92 + let tokbuf_equal_string t s = 93 + let n = String.length s in 94 + if t.len <> n then false 95 + else 96 + let rec loop i = 97 + if i >= n then true 98 + else if Stdlib.Bytes.unsafe_get t.bytes i <> String.unsafe_get s i then 99 + false 100 + else loop (i + 1) 101 + in 102 + loop 0 103 + 58 104 (* Decoder *) 59 105 60 106 type decoder = { ··· 71 117 mutable byte_count : int; (* Global byte count. *) 72 118 mutable line : int; (* Current line number. *) 73 119 mutable line_start : int; (* Current line global byte position. *) 74 - token : Buffer.t; 75 - ws : Buffer.t; (* Bufferizes whitespace when layout is [true]. *) 120 + token : tokbuf; 121 + ws : tokbuf; (* Bufferizes whitespace when layout is [true]. *) 76 122 } 77 123 78 124 let make_decoder ?(locs = false) ?(layout = false) ?(file = "-") reader = 79 125 let overlap = Stdlib.Bytes.create uchar_max_utf_8_byte_length in 80 - let token = Buffer.create 255 and ws = Buffer.create 255 in 126 + let token = tokbuf_create 255 and ws = tokbuf_create 255 in 81 127 let meta_none = Json.Meta.make (Json.Textloc.(set_file none) file) in 82 128 { 83 129 file; ··· 316 362 317 363 (* Decoder tokenizer *) 318 364 319 - let[@inline] token_clear d = Buffer.clear d.token 365 + let[@inline] token_clear d = tokbuf_clear d.token 320 366 321 367 let[@inline] token_pop d = 322 - let t = Buffer.contents d.token in 368 + let t = tokbuf_contents d.token in 323 369 token_clear d; 324 370 t 325 371 326 372 let[@inline] token_add d u = 327 - if u <= 0x7F then Buffer.add_char d.token (Char.unsafe_chr u) 328 - else Buffer.add_utf_8_uchar d.token (Uchar.unsafe_of_int u) 373 + if u <= 0x7F then tokbuf_add_char d.token (Char.unsafe_chr u) 374 + else tokbuf_add_utf_8_uchar d.token (Uchar.unsafe_of_int u) 375 + 376 + (* Find a member in [mem_decs] whose key matches the current token 377 + buffer content byte-for-byte, without allocating a string. Returns 378 + the matching mem_dec together with the key string (owned by the 379 + map). Used as a fast-path for object member dispatch. *) 380 + let find_mem_by_token d mem_decs = 381 + let r = ref None in 382 + (try 383 + String_map.iter 384 + (fun k v -> 385 + if tokbuf_equal_string d.token k then begin 386 + r := Some (v, k); 387 + raise_notrace Exit 388 + end) 389 + mem_decs 390 + with Exit -> ()); 391 + !r 329 392 330 393 let[@inline] accept d = 331 394 token_add d d.u; ··· 342 405 let[@inline] ws_pop d = 343 406 if not d.layout then "" 344 407 else 345 - let t = Buffer.contents d.ws in 346 - Buffer.clear d.ws; 408 + let t = tokbuf_contents d.ws in 409 + tokbuf_clear d.ws; 347 410 t 348 411 349 412 let textloc_to_current ~first_byte ~first_line_num ~first_line_byte d = ··· 382 445 383 446 let[@inline] read_ws d = 384 447 while is_ws d.u do 385 - if d.layout then Buffer.add_char d.ws (Char.unsafe_chr d.u); 448 + if d.layout then tokbuf_add_char d.ws (Char.unsafe_chr d.u); 386 449 nextc d 387 450 done 388 451 ··· 920 983 Json.Repr.finish_object_decode map meta u umap mem_miss dict 921 984 | 0x0022 -> 922 985 let meta = read_json_name d in 923 - let name = token_pop d in 924 - begin match String_map.find_opt name mem_decs with 925 - | Some (Mem_dec mem) -> 986 + (* Fast path: byte-compare the token buffer against [mem_decs] 987 + keys without allocating. Only materialise the name as a 988 + string if no match was found (for Unknown_keep paths and 989 + error messages). *) 990 + begin match find_mem_by_token d mem_decs with 991 + | Some (Mem_dec mem, name) -> 992 + token_clear d; 926 993 let mem_miss = String_map.remove name mem_miss in 927 994 let dict = 928 995 try Dict.add mem.id (decode d mem.type') dict ··· 934 1001 | None -> ( 935 1002 match u with 936 1003 | Unknown_skip -> 1004 + (* The name is never read, so we don't need to allocate it. *) 1005 + token_clear d; 937 1006 let () = 938 1007 try decode d (Json.Repr.of_t Json.ignore) 939 1008 with Json.Error e -> 940 - Json.Repr.error_push_object (error_meta d) map (name, meta) e 1009 + Json.Repr.error_push_object (error_meta d) map 1010 + (token_pop d, meta) 1011 + e 941 1012 in 942 1013 read_json_mem_sep d; 943 1014 decode_object_basic d map u umap mem_miss mem_decs dict 944 1015 | Unknown_error -> 1016 + let name = token_pop d in 945 1017 let fnd = [ (name, meta) ] in 946 1018 Json.Repr.unexpected_mems_error (error_meta d) map ~fnd 947 1019 | Unknown_keep (umap', _) -> 1020 + let name = token_pop d in 948 1021 let umap = 949 1022 try umap'.dec_add meta name (decode d umap'.mems_type) umap 950 1023 with Json.Error e ->