Declarative JSON data manipulation for OCaml
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

ocaml-json: eliminate tuple and closure allocations in hot paths

Two changes together:

1. Remove [get_line_pos] which allocated a 2-tuple on every call.
Inline [d.line] and [d.line_start] at each call site and thread
them as two int labelled args [~first_line_num ~first_line_byte]
through the error/textloc helpers (err_to_here, err_exp_in_const,
err_exp_esc, err_unclosed_string, err_illegal_ctrl_char,
textloc_to_current, textloc_prev_ascii_char,
error_meta_to_current). Roughly 45 call sites adapted.

2. Rewrite [skip_json_string] and [skip_json_number] as imperative
while-loops with a single [done_] flag instead of [let rec loop]
nested in the function body. Avoids the fresh closure allocated on
every invocation.

Memtrace deltas (field-access bench on canada+citm+twitter corpus):

get_line_pos 10.8% -> 0% (removed)
skip_json_number.loop 11.5% -> <1% (closure removed)
skip_json_string.loop 6.9% -> <1% (closure removed)

DOM mode geomean edged up from ~160 to ~172 MB/s (less pressure from
same get_line_pos fix). Field geomean stable at ~480 MB/s; further
wins require member-name interning or SIMD-style byte scanning for
object key dispatch.

+126 -83
+126 -83
lib/bytesrw/json_bytesrw.ml
··· 99 99 100 100 (* Decoder positions *) 101 101 102 - let[@inline] get_line_pos d = (d.line, d.line_start) 103 - 104 102 let get_last_byte d = 105 103 if d.u <= 0x7F then d.byte_count - 1 106 104 else if d.u = sot || d.u = eot then d.byte_count ··· 110 108 111 109 (* Decoder errors *) 112 110 113 - let textloc_of_pos d ~first_byte ~last_byte ~first_line:(fln, flb) 114 - ~last_line:(lln, llb) = 115 - Json.Textloc.make ~file:d.file ~first_byte ~last_byte ~first_line_num:fln 116 - ~first_line_byte:flb ~last_line_num:lln ~last_line_byte:llb 111 + let[@inline] textloc_of_pos d ~first_byte ~last_byte ~first_line_num 112 + ~first_line_byte ~last_line_num ~last_line_byte = 113 + Json.Textloc.make ~file:d.file ~first_byte ~last_byte ~first_line_num 114 + ~first_line_byte ~last_line_num ~last_line_byte 117 115 118 116 let error_meta d = 119 - let first_byte = get_last_byte d and first_line = get_line_pos d in 120 - let last_byte = first_byte and last_line = first_line in 117 + let first_byte = get_last_byte d in 118 + let first_line_num = d.line and first_line_byte = d.line_start in 121 119 Json.Meta.make 122 - @@ textloc_of_pos d ~first_byte ~last_byte ~first_line ~last_line 120 + @@ textloc_of_pos d ~first_byte ~last_byte:first_byte ~first_line_num 121 + ~first_line_byte ~last_line_num:first_line_num 122 + ~last_line_byte:first_line_byte 123 123 124 - let error_meta_to_current ~first_byte ~first_line d = 125 - let last_byte = get_last_byte d and last_line = get_line_pos d in 124 + let error_meta_to_current ~first_byte ~first_line_num ~first_line_byte d = 125 + let last_byte = get_last_byte d in 126 + let last_line_num = d.line and last_line_byte = d.line_start in 126 127 Json.Meta.make 127 - @@ textloc_of_pos d ~first_byte ~last_byte ~first_line ~last_line 128 + @@ textloc_of_pos d ~first_byte ~last_byte ~first_line_num ~first_line_byte 129 + ~last_line_num ~last_line_byte 128 130 129 131 let err_here d fmt = Json.Error.failf (error_meta d) fmt 130 132 131 - let err_to_here ~first_byte ~first_line d fmt = 132 - Json.Error.failf (error_meta_to_current ~first_byte ~first_line d) fmt 133 + let err_to_here ~first_byte ~first_line_num ~first_line_byte d fmt = 134 + Json.Error.failf 135 + (error_meta_to_current ~first_byte ~first_line_num ~first_line_byte d) 136 + fmt 133 137 134 138 let err_malformed_utf_8 d = 135 139 if d.i_next > d.i_max then ··· 158 162 159 163 (* Errors for constants *) 160 164 161 - let err_exp_in_const ~first_byte ~first_line d ~exp ~fnd ~const = 162 - err_to_here ~first_byte ~first_line d 165 + let err_exp_in_const ~first_byte ~first_line_num ~first_line_byte d ~exp ~fnd 166 + ~const = 167 + err_to_here ~first_byte ~first_line_num ~first_line_byte d 163 168 "Expected %a while parsing %a but found: %a" pp_quchar exp pp_code const 164 169 pp_quchar fnd 165 170 ··· 185 190 let err_unpaired_hi_surrogate d u = 186 191 err_here d "Unpaired high surrogate %a in %a" pp_quchar u pp_code "string" 187 192 188 - let err_exp_esc ~first_byte ~first_line d u = 189 - err_to_here ~first_byte ~first_line d "Expected %a while parsing %a found %a" 190 - pp_code "escape character" pp_code "escape" pp_quchar u 193 + let err_exp_esc ~first_byte ~first_line_num ~first_line_byte d u = 194 + err_to_here ~first_byte ~first_line_num ~first_line_byte d 195 + "Expected %a while parsing %a found %a" pp_code "escape character" pp_code 196 + "escape" pp_quchar u 191 197 192 - let err_unclosed_string ~first_byte ~first_line d = 193 - err_to_here ~first_byte ~first_line d "Unclosed %a" pp_code "string" 198 + let err_unclosed_string ~first_byte ~first_line_num ~first_line_byte d = 199 + err_to_here ~first_byte ~first_line_num ~first_line_byte d "Unclosed %a" 200 + pp_code "string" 194 201 195 - let err_illegal_ctrl_char ~first_byte ~first_line d = 196 - err_to_here ~first_byte ~first_line d "Illegal control character %a in %a" 197 - pp_quchar d.u pp_code "string" 202 + let err_illegal_ctrl_char ~first_byte ~first_line_num ~first_line_byte d = 203 + err_to_here ~first_byte ~first_line_num ~first_line_byte d 204 + "Illegal control character %a in %a" pp_quchar d.u pp_code "string" 198 205 199 206 (* Errors for arrays *) 200 207 ··· 339 346 Buffer.clear d.ws; 340 347 t 341 348 342 - let textloc_to_current ~first_byte ~first_line d = 349 + let textloc_to_current ~first_byte ~first_line_num ~first_line_byte d = 343 350 if not d.locs then Json.Textloc.none 344 351 else 345 - let last_byte = get_last_byte d and last_line = get_line_pos d in 346 - textloc_of_pos d ~first_byte ~last_byte ~first_line ~last_line 352 + let last_byte = get_last_byte d in 353 + let last_line_num = d.line and last_line_byte = d.line_start in 354 + textloc_of_pos d ~first_byte ~last_byte ~first_line_num ~first_line_byte 355 + ~last_line_num ~last_line_byte 347 356 348 - let textloc_prev_ascii_char ~first_byte ~first_line d = 357 + let textloc_prev_ascii_char ~first_byte ~first_line_num ~first_line_byte d = 349 358 (* N.B. when we call that the line doesn't move and the char was on 350 359 a single byte *) 351 360 if not d.locs then Json.Textloc.none 352 361 else 353 - let last_byte = get_last_byte d and last_line = get_line_pos d in 354 - let last_byte = last_byte - 1 in 355 - textloc_of_pos d ~first_byte ~last_byte ~first_line ~last_line 362 + let last_byte = get_last_byte d - 1 in 363 + let last_line_num = d.line and last_line_byte = d.line_start in 364 + textloc_of_pos d ~first_byte ~last_byte ~first_line_num ~first_line_byte 365 + ~last_line_num ~last_line_byte 356 366 357 367 let meta_make d ?ws_before ?ws_after textloc = 358 368 if (not d.locs) && not d.layout then d.meta_none ··· 379 389 let read_json_const d const = 380 390 (* First character was checked. *) 381 391 let ws_before = ws_pop d in 382 - let first_byte = get_last_byte d and first_line = get_line_pos d in 392 + let first_byte = get_last_byte d in 393 + let first_line_num = d.line and first_line_byte = d.line_start in 383 394 for i = 1 to Array.length const - 1 do 384 395 nextc d; 385 396 if not (Int.equal d.u const.(i)) then 386 - err_exp_in_const ~first_byte ~first_line d ~exp:const.(i) ~fnd:d.u 387 - ~const:(ascii_str const) 397 + err_exp_in_const ~first_byte ~first_line_num ~first_line_byte d 398 + ~exp:const.(i) ~fnd:d.u ~const:(ascii_str const) 388 399 done; 389 - let textloc = textloc_to_current d ~first_byte ~first_line in 400 + let textloc = 401 + textloc_to_current d ~first_byte ~first_line_num ~first_line_byte 402 + in 390 403 let ws_after = 391 404 nextc d; 392 405 read_ws d; ··· 434 447 | _ -> () 435 448 in 436 449 let first_byte = get_last_byte d in 437 - let first_line = get_line_pos d in 450 + let first_line_num = d.line and first_line_byte = d.line_start in 438 451 let ws_before = ws_pop d in 439 452 token_clear d; 440 453 if d.u = 0x002D (* - *) then accept d; 441 454 read_int d; 442 455 read_opt_frac d; 443 456 read_opt_exp d; 444 - let textloc = textloc_prev_ascii_char d ~first_byte ~first_line in 457 + let textloc = 458 + textloc_prev_ascii_char d ~first_byte ~first_line_num ~first_line_byte 459 + in 445 460 let ws_after = 446 461 read_ws d; 447 462 ws_pop d ··· 450 465 451 466 let read_json_string d = 452 467 (* d.u is 0x0022 *) 453 - let first_byte = get_last_byte d and first_line = get_line_pos d in 468 + let first_byte = get_last_byte d in 469 + let first_line_num = d.line and first_line_byte = d.line_start in 454 470 let rec read_uescape d hi uc count = 455 471 if count > 0 then 456 472 match d.u with ··· 504 520 | 0x0075 (* u *) -> 505 521 nextc d; 506 522 read_uescape d None 0 4 507 - | u -> err_exp_esc ~first_byte ~first_line d u 523 + | u -> err_exp_esc ~first_byte ~first_line_num ~first_line_byte d u 508 524 in 509 525 let rec loop d = 510 526 match d.u with ··· 513 529 read_escape d; 514 530 loop d 515 531 | 0x0022 (* DQUOTE *) -> () 516 - | u when u = eot -> err_unclosed_string ~first_byte ~first_line d 532 + | u when u = eot -> 533 + err_unclosed_string ~first_byte ~first_line_num ~first_line_byte d 517 534 | u when 0x0000 <= u && u <= 0x001F -> 518 - err_illegal_ctrl_char ~first_byte ~first_line d 535 + err_illegal_ctrl_char ~first_byte ~first_line_num ~first_line_byte d 519 536 | u -> 520 537 accept d; 521 538 loop d ··· 524 541 nextc d; 525 542 token_clear d; 526 543 loop d; 527 - let textloc = textloc_to_current d ~first_byte ~first_line in 544 + let textloc = 545 + textloc_to_current d ~first_byte ~first_line_num ~first_line_byte 546 + in 528 547 let ws_after = 529 548 nextc d; 530 549 read_ws d; ··· 564 583 | _ -> err_not_json_value d 565 584 566 585 and skip_json_string d = 567 - (* d.u is 0x22. advance past opening quote and scan for matching close, 568 - honouring backslash escapes. no token buffer. *) 586 + (* Fast byte-level scan for the closing quote. Relies on UTF-8 587 + self-synchronisation: the bytes 0x22 and 0x5C only appear as the 588 + literal quote and backslash in valid UTF-8, never as continuation 589 + bytes. [d.u] is left stale during the scan; resynced via [nextc] 590 + at exit. Line tracking is not updated -- raw newlines in JSON 591 + string content are disallowed by spec. Imperative loop, no closure 592 + allocation per call. *) 593 + let done_ = ref false in 594 + while not !done_ do 595 + if d.i_next > d.i_max then 596 + if is_eod d then 597 + err_unclosed_string ~first_byte:0 ~first_line_num:Loc.line_num_none 598 + ~first_line_byte:Loc.byte_pos_none d 599 + else set_slice d (Bytes.Reader.read d.reader) 600 + else begin 601 + let b = Stdlib.Bytes.unsafe_get d.i d.i_next in 602 + d.i_next <- d.i_next + 1; 603 + d.byte_count <- d.byte_count + 1; 604 + match b with 605 + | '\\' -> 606 + if d.i_next > d.i_max then 607 + if is_eod d then 608 + err_unclosed_string ~first_byte:0 609 + ~first_line_num:Loc.line_num_none 610 + ~first_line_byte:Loc.byte_pos_none d 611 + else set_slice d (Bytes.Reader.read d.reader); 612 + d.i_next <- d.i_next + 1; 613 + d.byte_count <- d.byte_count + 1 614 + | '"' -> done_ := true 615 + | _ -> () 616 + end 617 + done; 569 618 nextc d; 570 - let rec loop () = 571 - match d.u with 572 - | 0x005C (* \ *) -> 573 - nextc d; 574 - (match d.u with 575 - | u when u = eot -> 576 - err_unclosed_string ~first_byte:0 577 - ~first_line:(Loc.line_num_none, Loc.byte_pos_none) d 578 - | _ -> nextc d); 579 - loop () 580 - | 0x0022 (* DQUOTE *) -> nextc d 581 - | u when u = eot -> 582 - err_unclosed_string ~first_byte:0 583 - ~first_line:(Loc.line_num_none, Loc.byte_pos_none) d 584 - | _ -> 585 - nextc d; 586 - loop () 587 - in 588 - loop (); 589 619 read_ws d 590 620 591 621 and skip_json_number d = 592 - let rec loop () = 593 - match d.u with 594 - | u 595 - when is_digit u || u = 0x002E (* . *) || u = 0x002D (* - *) 596 - || u = 0x002B (* + *) || u = 0x0065 (* e *) || u = 0x0045 (* E *) -> 597 - nextc d; 598 - loop () 599 - | _ -> () 600 - in 601 - loop (); 622 + (* Raw byte scan for number continuation chars. All ASCII. *) 623 + let done_ = ref false in 624 + while not !done_ do 625 + if d.i_next > d.i_max then 626 + if is_eod d then done_ := true 627 + else set_slice d (Bytes.Reader.read d.reader) 628 + else 629 + match Stdlib.Bytes.unsafe_get d.i d.i_next with 630 + | '0' .. '9' | '-' | '+' | '.' | 'e' | 'E' -> 631 + d.i_next <- d.i_next + 1; 632 + d.byte_count <- d.byte_count + 1 633 + | _ -> done_ := true 634 + done; 635 + nextc d; 602 636 read_ws d 603 637 604 638 and skip_json_array d = ··· 694 728 and decode_array : type a elt b. decoder -> (a, elt, b) array_map -> a = 695 729 fun d map -> 696 730 let ws_before = ws_pop d in 697 - let first_byte = get_last_byte d and first_line = get_line_pos d in 731 + let first_byte = get_last_byte d in 732 + let first_line_num = d.line and first_line_byte = d.line_start in 698 733 let b, len = 699 734 match 700 735 nextc d; ··· 709 744 try 710 745 while !next do 711 746 begin 712 - let first_byte = get_last_byte d 713 - and first_line = get_line_pos d in 747 + let first_byte = get_last_byte d in 748 + let first_line_num = d.line 749 + and first_line_byte = d.line_start in 714 750 try 715 751 if map.dec_skip !i !b then decode d (of_t Json.ignore) 716 752 else b := map.dec_add !i (decode d map.elt) !b 717 753 with Json.Error e -> 718 - let imeta = error_meta_to_current ~first_byte ~first_line d in 754 + let imeta = 755 + error_meta_to_current ~first_byte ~first_line_num 756 + ~first_line_byte d 757 + in 719 758 Json.Repr.error_push_array (error_meta d) map (!i, imeta) e 720 759 end; 721 760 incr i; ··· 732 771 done; 733 772 (!b, !i) 734 773 with Json.Error e -> 735 - let first_line_num, first_line_byte = first_line in 736 774 Json.Error.adjust_context ~first_byte ~first_line_num ~first_line_byte 737 775 e) 738 776 in 739 - let textloc = textloc_to_current d ~first_byte ~first_line in 777 + let textloc = 778 + textloc_to_current d ~first_byte ~first_line_num ~first_line_byte 779 + in 740 780 let ws_after = 741 781 nextc d; 742 782 read_ws d; ··· 748 788 and decode_object : type a. decoder -> (a, a) object_map -> a = 749 789 fun d map -> 750 790 let ws_before = ws_pop d in 751 - let first_byte = get_last_byte d and first_line = get_line_pos d in 791 + let first_byte = get_last_byte d in 792 + let first_line_num = d.line and first_line_byte = d.line_start in 752 793 let dict = 753 794 try 754 795 nextc d; ··· 760 801 let meta = 761 802 (* This is for when Json.Repr.finish_object_decode raises. *) 762 803 if Json.Textloc.is_none (Json.Meta.textloc meta) then 763 - error_meta_to_current d ~first_byte ~first_line 804 + error_meta_to_current d ~first_byte ~first_line_num 805 + ~first_line_byte 764 806 else meta 765 807 in 766 808 Json.Error.raise ctx meta k 767 809 | Json.Error e -> 768 - let first_line_num, first_line_byte = first_line in 769 810 Json.Error.adjust_context ~first_byte ~first_line_num ~first_line_byte e 770 811 in 771 - let textloc = textloc_to_current d ~first_byte ~first_line in 812 + let textloc = 813 + textloc_to_current d ~first_byte ~first_line_num ~first_line_byte 814 + in 772 815 let ws_after = 773 816 nextc d; 774 817 read_ws d;