Declarative JSON data manipulation for OCaml
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

ocaml-json: Cursor zipper over Value.t with JSON Pointer (RFC 6901)

Mirror the [ocaml-yaml] addition: a [Json.Cursor] module that zips
a {!Value.t}, exposes [up] / [down_field] / [down_index] /
[replace] / [to_value], bridges to [Loc.Context.t] via
[to_context] / [of_context], and renders / parses
{{:https://www.rfc-editor.org/rfc/rfc6901}JSON Pointer (RFC 6901)}
addresses through [pointer] / [of_pointer].

The implementation follows the YAML side line-for-line, retargeted at
JSON's data model: frames are [Array] (focus inside an array, plus
the index of the focused slot) and [Object] (focus inside an object,
plus the [Value.name] node of the matched member). Object members
that survive an [up] reuse the original [Value.name] node, so the
member's source meta is preserved across edits.

Pointer rules per RFC 6901:

- [""] is the root; non-empty pointers must start with [/].
- Tokens are escape-decoded as [~1] -> [/] then [~0] -> [~] (§4).
- Array indices follow the §5 grammar
[array-index = %x30 / ( %x31-39 *DIGIT )]: leading-zero indices
are rejected. The [-] index from RFC 6902 (Patch's "one past the
last element") is not accepted here — Pointer references existing
values only.

Tests: 9 cases — focus/root, down_field/down_index, replace+zip
round-trip, [pointer] root, pointer round-trip on
[/users] / [/users/0] / [/users/0/name], escape decode of
[/tilde~0slash~1], leading-zero rejection, and to_context shape.

+452
+225
lib/cursor.ml
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2026 Thomas Gazagnaire <thomas@gazagnaire.org> 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + module Meta = Loc.Meta 7 + module Path = Loc.Path 8 + module Context = Loc.Context 9 + 10 + (* Frames describe the surroundings of the focus, leaf-to-root. [before] 11 + holds siblings that come before the focus, in REVERSED order (so 12 + prepending a sibling on the way up is O(1)); [after] holds the 13 + siblings that come after the focus, in source order. *) 14 + 15 + type array_ = { 16 + meta : Meta.t; 17 + before : Value.t list; 18 + index : int; 19 + after : Value.t list; 20 + } 21 + 22 + type object_ = { 23 + meta : Meta.t; 24 + before : Value.member list; 25 + name : Value.name; 26 + after : Value.member list; 27 + } 28 + 29 + type frame = Array of array_ | Object of object_ 30 + type t = { focus : Value.t; frames : frame list } 31 + 32 + let root focus = { focus; frames = [] } 33 + let focus c = c.focus 34 + 35 + let rebuild_parent (focus : Value.t) (frame : frame) : Value.t = 36 + match frame with 37 + | Array { meta; before; after; _ } -> 38 + let xs = List.rev_append before (focus :: after) in 39 + Value.Array (xs, meta) 40 + | Object { meta; before; name; after } -> 41 + let mems = List.rev_append before ((name, focus) :: after) in 42 + Value.Object (mems, meta) 43 + 44 + let rec to_value c = 45 + match c.frames with 46 + | [] -> c.focus 47 + | f :: fs -> to_value { focus = rebuild_parent c.focus f; frames = fs } 48 + 49 + let up c = 50 + match c.frames with 51 + | [] -> None 52 + | f :: fs -> Some { focus = rebuild_parent c.focus f; frames = fs } 53 + 54 + let replace v c = { c with focus = v } 55 + 56 + let split_at n xs = 57 + let rec aux i before after = 58 + match (i, after) with 59 + | 0, x :: rest -> Some (before, x, rest) 60 + | _, [] -> None 61 + | _, x :: rest -> aux (i - 1) (x :: before) rest 62 + in 63 + if n < 0 then None else aux n [] xs 64 + 65 + let down_index n c = 66 + match c.focus with 67 + | Value.Array (xs, meta) -> ( 68 + match split_at n xs with 69 + | None -> None 70 + | Some (before, focus, after) -> 71 + let frame = Array { meta; before; index = n; after } in 72 + Some { focus; frames = frame :: c.frames }) 73 + | _ -> None 74 + 75 + let split_member k mems = 76 + let rec aux before = function 77 + | [] -> None 78 + | (((s, _) as name), v) :: rest when s = k -> Some (before, v, name, rest) 79 + | mem :: rest -> aux (mem :: before) rest 80 + in 81 + aux [] mems 82 + 83 + let down_field k c = 84 + match c.focus with 85 + | Value.Object (mems, meta) -> ( 86 + match split_member k mems with 87 + | None -> None 88 + | Some (before, focus, name, after) -> 89 + let frame = Object { meta; before; name; after } in 90 + Some { focus; frames = frame :: c.frames }) 91 + | _ -> None 92 + 93 + let sort_node sort = 94 + let s = Sort.to_string sort in 95 + ((s, Meta.none) : string Loc.node) 96 + 97 + let frame_step (f : frame) : (string Loc.node * Path.step) option = 98 + match f with 99 + | Array { index; _ } -> 100 + Some (sort_node Sort.Array, Path.Nth (index, Meta.none)) 101 + | Object { name = s, m; _ } -> Some (sort_node Sort.Object, Path.Mem (s, m)) 102 + 103 + let to_context c = 104 + List.fold_right 105 + (fun f ctx -> 106 + match frame_step f with 107 + | None -> ctx 108 + | Some (sort, step) -> Context.push ~sort step ctx) 109 + c.frames Context.empty 110 + 111 + let step_into c (step : Path.step) = 112 + match step with 113 + | Path.Nth (n, _) -> down_index n c 114 + | Path.Mem (s, _) -> down_field s c 115 + | _ -> None 116 + 117 + let of_context ctx v = 118 + let steps = Path.steps (Context.path ctx) in 119 + let rec walk c = function 120 + | [] -> Some c 121 + | step :: rest -> ( 122 + match step_into c step with None -> None | Some c' -> walk c' rest) 123 + in 124 + walk (root v) steps 125 + 126 + (* JSON Pointer (RFC 6901). *) 127 + 128 + let pointer_encode_token s = 129 + let b = Buffer.create (String.length s + 2) in 130 + String.iter 131 + (fun c -> 132 + match c with 133 + | '~' -> Buffer.add_string b "~0" 134 + | '/' -> Buffer.add_string b "~1" 135 + | c -> Buffer.add_char b c) 136 + s; 137 + Buffer.contents b 138 + 139 + let pointer_decode_token s = 140 + let len = String.length s in 141 + let b = Buffer.create len in 142 + let valid = ref true in 143 + let i = ref 0 in 144 + while !i < len do 145 + let c = s.[!i] in 146 + if c = '~' then 147 + if !i + 1 >= len then ( 148 + valid := false; 149 + i := len) 150 + else ( 151 + (match s.[!i + 1] with 152 + | '0' -> Buffer.add_char b '~' 153 + | '1' -> Buffer.add_char b '/' 154 + | _ -> valid := false); 155 + i := !i + 2) 156 + else ( 157 + Buffer.add_char b c; 158 + incr i) 159 + done; 160 + if !valid then Some (Buffer.contents b) else None 161 + 162 + let pointer_decode_index s = 163 + let len = String.length s in 164 + if len = 0 then None 165 + else if len = 1 && s.[0] = '0' then Some 0 166 + else if s.[0] = '0' then None 167 + else 168 + let valid = ref true in 169 + String.iter (fun c -> if not (c >= '0' && c <= '9') then valid := false) s; 170 + if !valid then int_of_string_opt s else None 171 + 172 + let pointer_step_into c (token : string) = 173 + match c.focus with 174 + | Value.Array _ -> ( 175 + match pointer_decode_index token with 176 + | None -> None 177 + | Some n -> down_index n c) 178 + | Value.Object _ -> down_field token c 179 + | _ -> None 180 + 181 + let pointer_tokens p = 182 + if p = "" then Some [] 183 + else if p.[0] <> '/' then None 184 + else 185 + let parts = 186 + String.split_on_char '/' (String.sub p 1 (String.length p - 1)) 187 + in 188 + let rec decode_all acc = function 189 + | [] -> Some (List.rev acc) 190 + | t :: rest -> ( 191 + match pointer_decode_token t with 192 + | None -> None 193 + | Some t' -> decode_all (t' :: acc) rest) 194 + in 195 + decode_all [] parts 196 + 197 + let token_of_frame (f : frame) : string = 198 + match f with 199 + | Array { index; _ } -> string_of_int index 200 + | Object { name = s, _; _ } -> pointer_encode_token s 201 + 202 + let pointer c = 203 + let buf = Buffer.create 32 in 204 + let rec emit = function 205 + | [] -> () 206 + | f :: rest -> 207 + emit rest; 208 + Buffer.add_char buf '/'; 209 + Buffer.add_string buf (token_of_frame f) 210 + in 211 + emit c.frames; 212 + Buffer.contents buf 213 + 214 + let of_pointer v p = 215 + match pointer_tokens p with 216 + | None -> None 217 + | Some tokens -> 218 + let rec walk c = function 219 + | [] -> Some c 220 + | t :: rest -> ( 221 + match pointer_step_into c t with 222 + | None -> None 223 + | Some c' -> walk c' rest) 224 + in 225 + walk (root v) tokens
+97
lib/cursor.mli
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2026 Thomas Gazagnaire <thomas@gazagnaire.org> 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + (** Zipper over {!Value.t}. 7 + 8 + A cursor is a focused sub-value plus the path back to the document root. 9 + Edits at the focus are accumulated and recovered as a whole document through 10 + {!to_value}. The zipper representation makes navigation and in-place 11 + rewriting cheap without re-walking the tree. 12 + 13 + The cursor's path is a structural sibling of {!Loc.Context.t}: every cursor 14 + produces a context that describes where it is, and any context can be 15 + replayed against a {!Value.t} to recover the cursor at that address. 16 + {!to_context} / {!of_context} cross between the two views; after a failed 17 + decode, [Error.ctx |> Cursor.of_context |> Option.map Cursor.focus] jumps 18 + straight to the offending node without re-walking the document by hand. *) 19 + 20 + type t 21 + (** A cursor: focused sub-value plus path frames from the focus up to the root. 22 + *) 23 + 24 + (** {1:nav Navigation} *) 25 + 26 + val root : Value.t -> t 27 + (** [root v] is the cursor focused on the root of [v]. *) 28 + 29 + val focus : t -> Value.t 30 + (** [focus c] is the value currently in focus. *) 31 + 32 + val to_value : t -> Value.t 33 + (** [to_value c] is the document with [c]'s edits applied — i.e. [c]'s focus 34 + plumbed back through every ancestor frame. *) 35 + 36 + val up : t -> t option 37 + (** [up c] is [c]'s parent cursor, or [None] at the root. *) 38 + 39 + val down_index : int -> t -> t option 40 + (** [down_index n c] enters the [n]-th element when the focus is a 41 + {!Value.Array}, or [None] if out of range or the focus is not an array. *) 42 + 43 + val down_field : string -> t -> t option 44 + (** [down_field k c] enters the value of the {!Value.Object} member with name 45 + [k]. First match wins on duplicate names. [None] if no such member, or the 46 + focus is not an object. *) 47 + 48 + (** {1:edit Editing} *) 49 + 50 + val replace : Value.t -> t -> t 51 + (** [replace v c] is [c] with its focus replaced by [v]. The replacement is 52 + propagated to ancestors lazily on {!to_value} / {!up}. *) 53 + 54 + (** {1:context Context bridge} 55 + 56 + Errors and cursors share a structural spine: any error's {!Loc.Context.t} 57 + can be replayed as a cursor, and any cursor's path can be presented as a 58 + context for cross-format tooling that walks paths uniformly. *) 59 + 60 + val to_context : t -> Loc.Context.t 61 + (** [to_context c] is the {!Loc.Context.t} describing [c]'s position from the 62 + root. The context's sort labels reflect each ancestor's {!Sort.t}; the path 63 + steps mirror the navigation that produced [c]. *) 64 + 65 + val of_context : Loc.Context.t -> Value.t -> t option 66 + (** [of_context ctx v] descends [ctx]'s steps into [v], yielding the cursor at 67 + that address. [None] if any step does not match the tree (e.g. a 68 + {!Loc.Path.Mem} step against an array, or an out-of-range {!Loc.Path.Nth}). 69 + *) 70 + 71 + (** {1:pointer JSON Pointer (RFC 6901)} 72 + 73 + JSON Pointer addresses any sub-value of a JSON document by string. 74 + 75 + {b Syntax.} The empty string [""] addresses the root. A non-empty pointer is 76 + a sequence of [/]-prefixed reference tokens, e.g. ["/users/0/email"]. Within 77 + a token, [~] and [/] are escaped as [~0] and [~1] respectively (per RFC 6901 78 + §3); decoding replaces [~1] with [/] first, then [~0] with [~]. 79 + 80 + {b Array indices.} A token addressing an array must be a non-negative 81 + decimal integer with no leading zeros (per RFC 6901 §4 and §5: the 82 + productions [array-index = %x30 / ( %x31-39 *DIGIT )] forbid leading zeros). 83 + The [-] index from RFC 6902 (JSON Patch's "one past the last element") is 84 + not accepted here — Pointer references existing values only. *) 85 + 86 + val pointer : t -> string 87 + (** [pointer c] is [c]'s position rendered as a JSON Pointer (RFC 6901). The 88 + cursor at the root produces [""]; descents into object members and array 89 + indices each contribute one [/]-prefixed token, with [~] and [/] escaped as 90 + [~0] / [~1] respectively. *) 91 + 92 + val of_pointer : Value.t -> string -> t option 93 + (** [of_pointer v p] descends the JSON Pointer [p] into [v]. Returns [None] if 94 + [p] is malformed (e.g. non-empty but missing the leading [/], invalid 95 + escape, leading zero in an array index) or any token cannot be followed 96 + (member not found, array index out of range, intermediate node is a scalar). 97 + *)
+1
lib/json.ml
··· 56 56 let to_string = Codec.Stream.to_string 57 57 58 58 module Tape = Tape 59 + module Cursor = Cursor 59 60 60 61 module Value = struct 61 62 include Value
+5
lib/json.mli
··· 955 955 module Tape = Tape 956 956 (** Simdjson-compatible tape format. A columnar representation of a JSON value 957 957 laid out for random access by word index. *) 958 + 959 + module Cursor = Cursor 960 + (** Zipper over {!Value.t} with 961 + {{:https://www.rfc-editor.org/rfc/rfc6901}JSON Pointer (RFC 6901)} 962 + addressing. *)
+1
test/test.ml
··· 7 7 Test_codec.suite; 8 8 Test_tape.suite; 9 9 Test_json.suite; 10 + Test_cursor.suite; 10 11 ]
+123
test/test_cursor.ml
··· 1 + (** Tests for {!Json.Cursor}: zipper navigation, replace, and JSON Pointer (RFC 2 + 6901) round-trip. *) 3 + 4 + open Json 5 + 6 + let sample_value = 7 + let open Value in 8 + object' 9 + [ 10 + member (name "users") 11 + (list [ object' [ member (name "name") (string "alice") ] ]); 12 + member (name "tilde~slash/") (int 42); 13 + ] 14 + 15 + let test_focus_root () = 16 + let c = Cursor.root sample_value in 17 + Alcotest.(check bool) 18 + "focus is the root" true 19 + (Value.equal (Cursor.focus c) sample_value) 20 + 21 + let test_down_field () = 22 + let c = Cursor.root sample_value in 23 + match Cursor.down_field "users" c with 24 + | None -> Alcotest.fail "expected to descend into 'users'" 25 + | Some c' -> ( 26 + match Cursor.focus c' with 27 + | Value.Array _ -> () 28 + | _ -> Alcotest.fail "focus should be an array") 29 + 30 + let test_down_index () = 31 + let c = Cursor.root sample_value in 32 + match Option.bind (Cursor.down_field "users" c) (Cursor.down_index 0) with 33 + | None -> Alcotest.fail "expected to descend to users[0]" 34 + | Some c' -> ( 35 + match Option.map Cursor.focus (Cursor.down_field "name" c') with 36 + | Some (Value.String ("alice", _)) -> () 37 + | _ -> Alcotest.fail "users[0].name should be \"alice\"") 38 + 39 + let test_replace_zip () = 40 + let c = Cursor.root sample_value in 41 + let edit = 42 + let ( let* ) = Option.bind in 43 + let* c1 = Cursor.down_field "users" c in 44 + let* c2 = Cursor.down_index 0 c1 in 45 + let* c3 = Cursor.down_field "name" c2 in 46 + let c3' = Cursor.replace (Value.string "bob") c3 in 47 + Some (Cursor.to_value c3') 48 + in 49 + match edit with 50 + | None -> Alcotest.fail "edit path failed" 51 + | Some v -> ( 52 + let c' = Cursor.root v in 53 + match 54 + Option.bind (Cursor.down_field "users" c') (Cursor.down_index 0) 55 + |> Option.map (fun c'' -> 56 + Option.map Cursor.focus (Cursor.down_field "name" c'')) 57 + |> Option.join 58 + with 59 + | Some (Value.String ("bob", _)) -> () 60 + | _ -> Alcotest.fail "edit did not propagate") 61 + 62 + let test_pointer_root () = 63 + let c = Cursor.root sample_value in 64 + Alcotest.(check string) "root is empty" "" (Cursor.pointer c) 65 + 66 + let test_pointer_round_trip () = 67 + let cases = [ ""; "/users"; "/users/0"; "/users/0/name" ] in 68 + List.iter 69 + (fun p -> 70 + match Cursor.of_pointer sample_value p with 71 + | None -> Alcotest.failf "of_pointer failed for %S" p 72 + | Some c -> 73 + Alcotest.(check string) 74 + (Fmt.str "round-trip %S" p) 75 + p (Cursor.pointer c)) 76 + cases 77 + 78 + let test_pointer_escapes () = 79 + match Cursor.of_pointer sample_value "/tilde~0slash~1" with 80 + | None -> Alcotest.fail "expected escape decode to find 'tilde~slash/'" 81 + | Some c -> ( 82 + match Cursor.focus c with 83 + | Value.Number (n, _) when Float.equal n 42.0 -> () 84 + | _ -> Alcotest.fail "focus value at escape-decoded key wrong") 85 + 86 + let test_pointer_leading_zero_rejected () = 87 + match Cursor.of_pointer sample_value "/users/01" with 88 + | None -> () 89 + | Some _ -> Alcotest.fail "leading-zero index must be rejected (RFC 6901 §4)" 90 + 91 + let test_to_context () = 92 + let open Loc in 93 + let c = Cursor.root sample_value in 94 + match Option.bind (Cursor.down_field "users" c) (Cursor.down_index 0) with 95 + | None -> Alcotest.fail "descent failed" 96 + | Some c' -> 97 + let ctx = Cursor.to_context c' in 98 + let steps = Path.steps (Context.path ctx) in 99 + Alcotest.(check int) "context has two frames" 2 (List.length steps); 100 + let has_users = ref false and has_zero = ref false in 101 + List.iter 102 + (function 103 + | Path.Mem ("users", _) -> has_users := true 104 + | Path.Nth (0, _) -> has_zero := true 105 + | _ -> ()) 106 + steps; 107 + Alcotest.(check bool) "context contains users frame" true !has_users; 108 + Alcotest.(check bool) "context contains [0] frame" true !has_zero 109 + 110 + let suite = 111 + ( "cursor", 112 + [ 113 + Alcotest.test_case "focus root" `Quick test_focus_root; 114 + Alcotest.test_case "down_field" `Quick test_down_field; 115 + Alcotest.test_case "down_index" `Quick test_down_index; 116 + Alcotest.test_case "replace and zip" `Quick test_replace_zip; 117 + Alcotest.test_case "pointer root" `Quick test_pointer_root; 118 + Alcotest.test_case "pointer round-trip" `Quick test_pointer_round_trip; 119 + Alcotest.test_case "pointer escapes" `Quick test_pointer_escapes; 120 + Alcotest.test_case "pointer leading-zero rejected" `Quick 121 + test_pointer_leading_zero_rejected; 122 + Alcotest.test_case "to_context" `Quick test_to_context; 123 + ] )