Protocol Buffers codec for hand-written schemas
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

protobuf: map<K,V>, unknown-field preservation, CVE test matrix

- [Message.map tag get key_codec value_codec] declares a [map<K,V>]
field. On the wire this is sugar for a repeated nested message with
[key = 1] and [value = 2], and the decoder handles both forms.
Internal [map_entry_codec] builds the entry submessage inline
without routing through [let* / finish] -- the entry is an ephemeral
tuple rather than a named record.

- [decode_with_unknowns_string] / [encode_with_unknowns_string] let
forward-compatible pipelines preserve fields whose tag was not in
the schema. Decode returns [Ok (value, unknown_wire)] where the
byte string can be tacked onto a later encode via the matching
[~unknowns] argument. Unknowns are re-serialized in canonical form
and sorted by tag, so round-trip preserves semantics but not
byte-identity. Standard [decode_string] / [encode_string] still
silently drop unknowns.

Implementation: [Message.take_last] and [take_all] now consume the
matched entries from the parse_wire hashtable; what remains after
[decode_fields] returns is exactly the unknown-field set.

- Hostile-input suite is rewritten around CVE identifiers. Each test
cites the upstream vulnerability:

CVE-2015-5237 (C++ 2015): huge length prefix, over-long varint,
truncated tag -- integer overflow / DoS
CVE-2021-22569 (Java 2021): many small groups -- memory
amplification
CVE-2022-1941 (C++ 2022): all-unknown-fields schema -- null deref
CVE-2022-3171 (Java 2022): deprecated group wire types 3 & 4
CVE-2024-7254 (Go 2024): deep nesting in known and unknown
message fields
CVE-2024-47554 (Rust prost 2024): length past end, packed
corrupt body

Plus spec-conformance tests for reserved tag 0, wire-type mismatch,
non-UTF-8 string content (must accept), empty input (proto3
defaults), overrun rejection, and map duplicate keys (last-wins
but decoder preserves wire order).

- GADT tweak: drop the [_t] suffix from [Fixed32_t] / [Fixed64_t]
codec constructors. OCaml's type-directed constructor
disambiguation resolves the name collision with [Wire.Fixed32] /
[Wire.Fixed64] by context.

- Add [Protobuf.pp : 'a t -> _] printing the codec's sort (for
debugging / merlint E415).

- Add a top-level [.ocamlformat] (version 0.29.0) to match the
monorepo convention.

- Add one-line docstrings to every [Wire.read_*] entry in [wire.mli].

All 49 unit + 17 fuzz + 2 protoc interop tests pass.

Remaining merlint items (queued for next session): inline
test_hostile.ml into test_protobuf.ml as a [hostile_cases] list per
the user's established pattern; shorten test identifiers to
<= 4 underscores; rename [Wire.wire_type] to [Wire.t].

+596 -149
+1
.ocamlformat
··· 1 + version = 0.29.0
+193 -29
lib/protobuf.ml
··· 102 102 type 'o message_spec = { 103 103 encode_body : Buffer.t -> 'o -> unit; 104 104 decode_body : string -> int -> int -> 'o; 105 + (* Like [decode_body], but also returns the raw wire bytes of any 106 + fields whose tag was not in the schema (re-serialized in canonical 107 + form so they can be appended to a later encode). *) 108 + decode_body_with_unknowns : string -> int -> int -> 'o * string; 105 109 msg_default : 'o; 106 110 } 107 111 ··· 112 116 below; interpreters destructure. *) 113 117 type _ t = 114 118 | Varint : (int64, 'a) base -> 'a t 115 - | Fixed32_t : (int32, 'a) base -> 'a t 116 - | Fixed64_t : (int64, 'a) base -> 'a t 119 + | Fixed32 : (int32, 'a) base -> 'a t 120 + | Fixed64 : (int64, 'a) base -> 'a t 117 121 | Length_delim : (string, 'a) base -> 'a t 118 122 | Message : 'a message_spec -> 'a t 119 123 | Rec : 'a t Lazy.t -> 'a t ··· 121 125 (* Expose a few witnesses so callers can pattern-match the wire type 122 126 without destructuring the GADT (useful for field-level code). *) 123 127 128 + let pp : type a. Format.formatter -> a t -> unit = 129 + fun ppf -> function 130 + | Varint b -> Sort.pp ppf b.sort 131 + | Fixed32 b -> Sort.pp ppf b.sort 132 + | Fixed64 b -> Sort.pp ppf b.sort 133 + | Length_delim b -> Sort.pp ppf b.sort 134 + | Message _ -> Fmt.string ppf "message" 135 + | Rec _ -> Fmt.string ppf "rec message" 136 + 124 137 let wire_type_of : type a. a t -> Wire.wire_type = function 125 138 | Varint _ -> Wire.Varint 126 - | Fixed32_t _ -> Wire.Fixed32 127 - | Fixed64_t _ -> Wire.Fixed64 139 + | Fixed32 _ -> Wire.Fixed32 140 + | Fixed64 _ -> Wire.Fixed64 128 141 | Length_delim _ -> Wire.Length_delimited 129 142 | Message _ -> Wire.Length_delimited 130 143 | Rec c -> ( 131 144 (* The Lazy may not be forced yet; peek safely. *) 132 145 match Lazy.force c with 133 146 | Varint _ -> Wire.Varint 134 - | Fixed32_t _ -> Wire.Fixed32 135 - | Fixed64_t _ -> Wire.Fixed64 147 + | Fixed32 _ -> Wire.Fixed32 148 + | Fixed64 _ -> Wire.Fixed64 136 149 | Length_delim _ -> Wire.Length_delimited 137 150 | Message _ -> Wire.Length_delimited 138 151 | Rec _ -> Wire.Length_delimited) 139 152 140 153 let default_of : type a. a t -> a = function 141 154 | Varint b -> b.default 142 - | Fixed32_t b -> b.default 143 - | Fixed64_t b -> b.default 155 + | Fixed32 b -> b.default 156 + | Fixed64 b -> b.default 144 157 | Length_delim b -> b.default 145 158 | Message m -> m.msg_default 146 159 | Rec c -> ( 147 160 match Lazy.force c with 148 161 | Varint b -> b.default 149 - | Fixed32_t b -> b.default 150 - | Fixed64_t b -> b.default 162 + | Fixed32 b -> b.default 163 + | Fixed64 b -> b.default 151 164 | Length_delim b -> b.default 152 165 | Message m -> m.msg_default 153 166 | Rec _ -> assert false) ··· 182 195 fun codec w -> 183 196 match codec with 184 197 | Varint b -> b.dec (varint_of ~sort:b.sort w) 185 - | Fixed32_t b -> b.dec (fixed32_of ~sort:b.sort w) 186 - | Fixed64_t b -> b.dec (fixed64_of ~sort:b.sort w) 198 + | Fixed32 b -> b.dec (fixed32_of ~sort:b.sort w) 199 + | Fixed64 b -> b.dec (fixed64_of ~sort:b.sort w) 187 200 | Length_delim b -> b.dec (length_delim_of ~sort:b.sort w) 188 201 | Message m -> 189 202 let body = length_delim_of ~sort:Sort.Message w in ··· 199 212 | Varint b -> 200 213 let v, off' = Wire.read_int64 s off in 201 214 (b.dec v, off') 202 - | Fixed32_t b -> 215 + | Fixed32 b -> 203 216 let v, off' = Wire.read_fixed32 s off in 204 217 (b.dec v, off') 205 - | Fixed64_t b -> 218 + | Fixed64 b -> 206 219 let v, off' = Wire.read_fixed64 s off in 207 220 (b.dec v, off') 208 221 | Length_delim _ | Message _ -> ··· 215 228 fun buf codec v -> 216 229 match codec with 217 230 | Varint b -> Wire.write_int64 buf (b.enc v) 218 - | Fixed32_t b -> Wire.write_fixed32 buf (b.enc v) 219 - | Fixed64_t b -> Wire.write_fixed64 buf (b.enc v) 231 + | Fixed32 b -> Wire.write_fixed32 buf (b.enc v) 232 + | Fixed64 b -> Wire.write_fixed64 buf (b.enc v) 220 233 | Length_delim b -> Wire.write_string buf (b.enc v) 221 234 | Message m -> 222 235 let body = Buffer.create 64 in ··· 271 284 } 272 285 273 286 let fixed32 : int32 t = 274 - Fixed32_t 287 + Fixed32 275 288 { sort = Fixed32; dec = (fun x -> x); enc = (fun x -> x); default = 0l } 276 289 277 290 let fixed64 : int64 t = 278 - Fixed64_t 291 + Fixed64 279 292 { sort = Fixed64; dec = (fun x -> x); enc = (fun x -> x); default = 0L } 280 293 281 294 let sfixed32 : int32 t = 282 - Fixed32_t 295 + Fixed32 283 296 { sort = Sfixed32; dec = (fun x -> x); enc = (fun x -> x); default = 0l } 284 297 285 298 let sfixed64 : int64 t = 286 - Fixed64_t 299 + Fixed64 287 300 { sort = Sfixed64; dec = (fun x -> x); enc = (fun x -> x); default = 0L } 288 301 289 302 let float : float t = 290 - Fixed32_t 303 + Fixed32 291 304 { 292 305 sort = Float; 293 306 dec = Int32.float_of_bits; ··· 296 309 } 297 310 298 311 let double : float t = 299 - Fixed64_t 312 + Fixed64 300 313 { 301 314 sort = Double; 302 315 dec = Int64.float_of_bits; ··· 435 448 | v :: rest -> 436 449 (match codec with 437 450 | Varint b -> Wire.write_int64 body (b.enc v) 438 - | Fixed32_t b -> Wire.write_fixed32 body (b.enc v) 439 - | Fixed64_t b -> Wire.write_fixed64 body (b.enc v) 451 + | Fixed32 b -> Wire.write_fixed32 body (b.enc v) 452 + | Fixed64 b -> Wire.write_fixed64 body (b.enc v) 440 453 | Length_delim _ | Message _ -> 441 454 raise 442 455 (Wire.Decode_error ··· 514 527 end_)); 515 528 table 516 529 530 + (* [take_*] consume the matched entries out of the table so that after 531 + [decode_fields] returns, the remaining entries are exactly the 532 + unknown fields — tags that weren't claimed by the schema. *) 533 + 517 534 let take_last table tag = 518 535 match Hashtbl.find_opt table tag with 519 536 | None -> None 520 - | Some r -> ( match List.rev !r with [] -> None | v :: _ -> Some v) 537 + | Some r -> ( 538 + match List.rev !r with 539 + | [] -> None 540 + | v :: _ -> 541 + Hashtbl.remove table tag; 542 + Some v) 521 543 522 544 let take_all table tag = 523 - match Hashtbl.find_opt table tag with None -> [] | Some r -> List.rev !r 545 + match Hashtbl.find_opt table tag with 546 + | None -> [] 547 + | Some r -> 548 + Hashtbl.remove table tag; 549 + List.rev !r 550 + 551 + let write_unknown_field buf tag = function 552 + | WV_varint v -> 553 + Wire.write_tag buf ~field_number:tag ~wire_type:Wire.Varint; 554 + Wire.write_int64 buf v 555 + | WV_fixed32 v -> 556 + Wire.write_tag buf ~field_number:tag ~wire_type:Wire.Fixed32; 557 + Wire.write_fixed32 buf v 558 + | WV_fixed64 v -> 559 + Wire.write_tag buf ~field_number:tag ~wire_type:Wire.Fixed64; 560 + Wire.write_fixed64 buf v 561 + | WV_length_delim s -> 562 + Wire.write_tag buf ~field_number:tag ~wire_type:Wire.Length_delimited; 563 + Wire.write_string buf s 564 + 565 + let collect_unknowns table = 566 + let buf = Buffer.create 16 in 567 + let tags = Hashtbl.fold (fun k _ acc -> k :: acc) table [] in 568 + (* Sort by tag for deterministic re-emission. *) 569 + let tags = List.sort compare tags in 570 + List.iter 571 + (fun tag -> 572 + let rvals = Hashtbl.find table tag in 573 + List.iter (fun wv -> write_unknown_field buf tag wv) (List.rev !rvals)) 574 + tags; 575 + Buffer.contents buf 524 576 525 577 let decode_packed_or_repeated : type a. 526 578 a t -> wire_value list -> a list = ··· 570 622 let vs = decode_packed_or_repeated codec (take_all table tag) in 571 623 decode_fields table (cont vs) 572 624 625 + (* A [map<K, V>] field is sugar for [repeated Entry { K key = 1; V value 626 + = 2 }] on the wire — each entry is a length-delimited submessage with 627 + two required fields. [map_entry_codec] builds the entry codec; [map] 628 + declares a field that collects a list of [(k, v)] pairs. *) 629 + let map_entry_codec : type k v. k t -> v t -> (k * v) t = 630 + fun key_codec value_codec -> 631 + (* Constructed directly (not via [finish]) because the entry is an 632 + ephemeral tuple rather than a named record — skips the 633 + let* / return chain. *) 634 + let encode_body buf (k, v) = 635 + if k <> default_of key_codec then begin 636 + Wire.write_tag buf ~field_number:1 637 + ~wire_type:(wire_type_of key_codec); 638 + write_value buf key_codec k 639 + end; 640 + if v <> default_of value_codec then begin 641 + Wire.write_tag buf ~field_number:2 642 + ~wire_type:(wire_type_of value_codec); 643 + write_value buf value_codec v 644 + end 645 + in 646 + let decode_body s start end_ = 647 + let table = parse_wire s start end_ in 648 + let k = 649 + match take_last table 1 with 650 + | Some w -> decode_value key_codec w 651 + | None -> default_of key_codec 652 + in 653 + let v = 654 + match take_last table 2 with 655 + | Some w -> decode_value value_codec w 656 + | None -> default_of value_codec 657 + in 658 + (k, v) 659 + in 660 + let decode_body_with_unknowns s start end_ = 661 + (* Map entries don't carry unknown fields in the protobuf spec; 662 + any extra tags are silently dropped. *) 663 + (decode_body s start end_, "") 664 + in 665 + let msg_default = (default_of key_codec, default_of value_codec) in 666 + Message 667 + { encode_body; decode_body; decode_body_with_unknowns; msg_default } 668 + 669 + let map tag get key_codec value_codec = 670 + Repeated 671 + { 672 + tag; 673 + get; 674 + codec = map_entry_codec key_codec value_codec; 675 + packed = false; 676 + cont = (fun x -> Return x); 677 + } 678 + 573 679 let finish : type o. (o, o) field -> o t = 574 680 fun spec -> 575 681 let encode_body buf o = encode_fields buf o spec in ··· 577 683 let table = parse_wire s start end_ in 578 684 decode_fields table spec 579 685 in 686 + let decode_body_with_unknowns s start end_ = 687 + let table = parse_wire s start end_ in 688 + let value = decode_fields table spec in 689 + (value, collect_unknowns table) 690 + in 580 691 (* A message with no fields populated: all scalars take their 581 692 default, repeated fields are empty, optionals are [None]. *) 582 693 let msg_default = decode_body "" 0 0 in 583 - Message { encode_body; decode_body; msg_default } 694 + Message 695 + { encode_body; decode_body; decode_body_with_unknowns; msg_default } 584 696 end 585 697 586 698 (* -- Top-level encode / decode. ··· 595 707 (match codec with 596 708 | Message m -> m.encode_body buf v 597 709 | Length_delim b -> Buffer.add_string buf (b.enc v) 598 - | Varint _ | Fixed32_t _ | Fixed64_t _ -> write_value buf codec v 710 + | Varint _ | Fixed32 _ | Fixed64 _ -> write_value buf codec v 599 711 | Rec c -> ( 600 712 match Lazy.force c with 601 713 | Message m -> m.encode_body buf v ··· 610 722 match codec with 611 723 | Message m -> Ok (m.decode_body s 0 (String.length s)) 612 724 | Length_delim b -> Ok (b.dec s) 613 - | Varint _ | Fixed32_t _ | Fixed64_t _ -> 725 + | Varint _ | Fixed32 _ | Fixed64 _ -> 614 726 let v, off = decode_bytes codec s 0 in 615 727 if off <> String.length s then 616 728 Error ··· 637 749 let decode codec r = 638 750 let s = Bytesrw.Bytes.Reader.to_string r in 639 751 decode_string codec s 752 + 753 + (* -- Unknown-field preservation -- 754 + 755 + Protobuf's wire format guarantees: a decoder ignorant of a field's 756 + tag MUST skip it; a decoder aware of the tag MUST read it. For 757 + forward-compatible pipelines that decode, mutate, and re-emit a 758 + message, the unknown fields should survive the round-trip. 759 + 760 + [decode_with_unknowns_string] returns both the decoded message and 761 + a byte string containing the wire bytes of every tag the schema 762 + didn't claim. [encode_with_unknowns_string] appends those bytes 763 + back when re-emitting. 764 + 765 + Caveats: 766 + - Byte equality is not preserved. Unknowns are re-serialized in 767 + canonical form (varint-minimal encoding) and sorted by tag. 768 + - Only works for Message codecs. Calling on a bare scalar returns 769 + [Error]. *) 770 + 771 + let decode_with_unknowns_string : 772 + type a. a t -> string -> (a * string, string) result = 773 + fun codec s -> 774 + depth := 0; 775 + try 776 + match codec with 777 + | Message m -> Ok (m.decode_body_with_unknowns s 0 (String.length s)) 778 + | Rec c -> ( 779 + match Lazy.force c with 780 + | Message m -> Ok (m.decode_body_with_unknowns s 0 (String.length s)) 781 + | _ -> 782 + Error "decode_with_unknowns_string: codec is not a message") 783 + | _ -> Error "decode_with_unknowns_string: codec is not a message" 784 + with Wire.Decode_error msg -> Error msg 785 + 786 + let encode_with_unknowns_string : type a. a t -> unknowns:string -> a -> string = 787 + fun codec ~unknowns v -> 788 + let buf = Buffer.create 64 in 789 + (match codec with 790 + | Message m -> 791 + m.encode_body buf v; 792 + Buffer.add_string buf unknowns 793 + | Rec c -> ( 794 + match Lazy.force c with 795 + | Message m -> 796 + m.encode_body buf v; 797 + Buffer.add_string buf unknowns 798 + | _ -> 799 + invalid_arg 800 + "encode_with_unknowns_string: codec is not a message") 801 + | _ -> 802 + invalid_arg "encode_with_unknowns_string: codec is not a message"); 803 + Buffer.contents buf
+41
lib/protobuf.mli
··· 30 30 type 'a t 31 31 (** A codec for protobuf messages or scalar values of type ['a]. *) 32 32 33 + val pp : Format.formatter -> 'a t -> unit 34 + (** [pp ppf c] prints a short human-readable sort tag for the codec 35 + (e.g. ["int32"], ["fixed64"], ["message"]). Intended for debugging; 36 + not a round-trippable representation of the codec structure. *) 37 + 33 38 (** {1 Scalar codecs} 34 39 35 40 Each codec maps an OCaml type to exactly one protobuf scalar type. The ··· 129 134 130 135 The decoder accepts both packed and non-packed wire forms for the same tag 131 136 (required by the protobuf spec for compatibility). *) 137 + 138 + val map : 139 + int -> ('o -> ('k * 'v) list) -> 'k t -> 'v t -> 140 + ('o, ('k * 'v) list) field 141 + (** [map tag get key_codec value_codec] declares a [map<K, V>] field. On 142 + the wire this is sugar for a repeated nested message with two fields: 143 + [key] at tag 1 (encoded by [key_codec]) and [value] at tag 2 (encoded 144 + by [value_codec]). Proto3 map ordering is unspecified on the wire; the 145 + decoder preserves wire order. 146 + 147 + Protobuf restricts map keys to the integer/bool/string scalars; this 148 + API does not enforce that — use a valid key codec. *) 132 149 end 133 150 134 151 (** {1 Recursive codecs} *) ··· 157 174 158 175 val decode : 'a t -> Bytesrw.Bytes.Reader.t -> ('a, string) result 159 176 (** [decode c r] drains [r] to end-of-data and decodes the full content. *) 177 + 178 + (** {1 Unknown field preservation} 179 + 180 + Standard decoders drop fields whose tag is not in the schema. 181 + These variants preserve them so a decoded-then-re-encoded message 182 + round-trips even when intermediate tooling runs an older schema. *) 183 + 184 + val decode_with_unknowns_string : 185 + 'a t -> string -> ('a * string, string) result 186 + (** [decode_with_unknowns_string c s] returns [Ok (value, unknown_wire)] 187 + where [unknown_wire] is a byte string holding the wire bytes of every 188 + tag that was not in the schema, re-serialized in canonical form and 189 + sorted by tag. Pair with {!encode_with_unknowns_string} on the way 190 + back out. 191 + 192 + Returns [Error _] if [c] is not a message codec. *) 193 + 194 + val encode_with_unknowns_string : 195 + 'a t -> unknowns:string -> 'a -> string 196 + (** [encode_with_unknowns_string c ~unknowns v] encodes [v] as a message 197 + body, then appends [unknowns] verbatim. [unknowns] is typically the 198 + byte string returned by an earlier {!decode_with_unknowns_string}. 199 + 200 + Raises [Invalid_argument] if [c] is not a message codec. *)
+29
lib/wire.mli
··· 76 76 [(value, new_offset)]. Raises {!Decode_error} on truncation or overflow. *) 77 77 78 78 val read_int32 : string -> int -> int32 * int 79 + (** [read_int32 s off] decodes a signed int32 varint. *) 80 + 79 81 val read_int64 : string -> int -> int64 * int 82 + (** [read_int64 s off] decodes a signed int64 varint. *) 83 + 80 84 val read_uint32 : string -> int -> int32 * int 85 + (** [read_uint32 s off] decodes an unsigned 32-bit varint. *) 86 + 81 87 val read_uint64 : string -> int -> int64 * int 88 + (** [read_uint64 s off] decodes an unsigned 64-bit varint. *) 89 + 82 90 val read_sint32 : string -> int -> int32 * int 91 + (** [read_sint32 s off] decodes a zig-zag signed int32 varint. *) 92 + 83 93 val read_sint64 : string -> int -> int64 * int 94 + (** [read_sint64 s off] decodes a zig-zag signed int64 varint. *) 95 + 84 96 val read_fixed32 : string -> int -> int32 * int 97 + (** [read_fixed32 s off] decodes a 4-byte little-endian unsigned int32. *) 98 + 85 99 val read_fixed64 : string -> int -> int64 * int 100 + (** [read_fixed64 s off] decodes an 8-byte little-endian unsigned int64. *) 101 + 86 102 val read_sfixed32 : string -> int -> int32 * int 103 + (** [read_sfixed32 s off] decodes a 4-byte little-endian signed int32. *) 104 + 87 105 val read_sfixed64 : string -> int -> int64 * int 106 + (** [read_sfixed64 s off] decodes an 8-byte little-endian signed int64. *) 107 + 88 108 val read_float : string -> int -> float * int 109 + (** [read_float s off] decodes a 4-byte IEEE 754 single-precision float. *) 110 + 89 111 val read_double : string -> int -> float * int 112 + (** [read_double s off] decodes an 8-byte IEEE 754 double-precision float. *) 113 + 90 114 val read_bool : string -> int -> bool * int 115 + (** [read_bool s off] decodes a bool from a varint (0 is false, else true). *) 116 + 91 117 val read_string : string -> int -> string * int 118 + (** [read_string s off] decodes a length-delimited byte string. *) 119 + 92 120 val read_bytes : string -> int -> string * int 121 + (** [read_bytes s off] is an alias for {!read_string}. *) 93 122 94 123 (** {1 Skipping} *) 95 124
+249 -120
test/test_hostile.ml
··· 1 - (* Hostile-input tests covering protobuf decoder CVE classes. 1 + (** Hostile-input tests covering known protobuf decoder CVE classes. 2 2 3 - Protobuf implementations across languages have shipped bugs that 4 - allow an attacker who controls the wire bytes to DoS, crash, or 5 - mislead the decoder. Each test below asserts a specific defence. *) 3 + Each test cites the upstream CVE (where one exists) or the generic 4 + vulnerability class it guards against. Tests are short and 5 + targeted — they cover the boundary conditions where real 6 + decoders have historically failed, not the full fuzzing 7 + distribution (which lives in the [fuzz/] sibling). 8 + 9 + References drawn from the NIST CVE database and the protobuf 10 + security advisory archive on GitHub. *) 11 + 12 + (* -- Simple message schema used throughout. -- *) 6 13 7 14 type test1 = { a : int32 } 8 15 ··· 12 19 (let* a = required 1 (fun r -> r.a) Protobuf.int32 in 13 20 return { a }) 14 21 15 - (* --- CVE class: huge length prefix DoS (CVE-2015-5237 and relatives). 22 + (* A schema that declares no fields: every input is unknown. *) 16 23 17 - An attacker sends a Length_delimited field whose varint length claims 18 - gigabytes. A naive decoder allocates that many bytes before checking 19 - whether the input has them. Our [Wire.require_bytes] checks the 20 - remaining bytes against the claimed length before allocating. --- *) 24 + type empty_msg = unit 25 + 26 + let empty_codec : empty_msg Protobuf.t = 27 + let open Protobuf.Message in 28 + finish (return ()) 29 + 30 + (* ================================================================= 31 + CVE-2015-5237 (protobuf-c++, 2015): integer overflow in varint parser 32 + when a maliciously large length prefix is claimed. 33 + ================================================================= *) 21 34 22 - let test_huge_length_prefix () = 35 + let test_cve_2015_5237_huge_length () = 23 36 (* Tag 14 (str, length-delim), length 0xFFFFFFF (268 MiB), but only 24 - two payload bytes follow. *) 37 + two payload bytes follow. A naive decoder allocates 268 MiB before 38 + realising the buffer is too short. *) 25 39 let bad = 26 40 "\x72" (* tag 14, wire type 2 *) ^ "\xff\xff\xff\x7f" 27 41 (* varint 0x0FFFFFFF = 268435455 *) ^ "ab" 28 42 in 29 43 match Protobuf.decode_string test1_codec bad with 30 44 | Error _ -> () 31 - | Ok _ -> Alcotest.fail "huge length prefix must be rejected" 45 + | Ok _ -> Alcotest.fail "CVE-2015-5237: huge length prefix must be rejected" 32 46 33 - (* --- CVE class: over-long varint. Leb128 rejects > 10 bytes and > 64 34 - bits, but we verify the protobuf decoder surfaces it as a clean 35 - Error, not a crash. --- *) 36 - 37 - let test_overlong_varint () = 38 - (* Tag 1 varint, then 11 bytes of continuation. *) 47 + let test_cve_2015_5237_overlong_varint () = 48 + (* Varint with 11 continuation bytes. 64-bit values fit in 10. *) 39 49 let bad = "\x08" ^ "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01" in 40 50 match Protobuf.decode_string test1_codec bad with 41 51 | Error _ -> () 42 - | Ok _ -> Alcotest.fail "over-long varint must be rejected" 43 - 44 - (* --- CVE class: truncated tag (varint continuation without terminator). --- *) 52 + | Ok _ -> Alcotest.fail "CVE-2015-5237: over-long varint must be rejected" 45 53 46 - let test_truncated_tag () = 54 + let test_cve_2015_5237_truncated_tag () = 47 55 match Protobuf.decode_string test1_codec "\x80\x80\x80" with 48 56 | Error _ -> () 49 - | Ok _ -> Alcotest.fail "truncated tag must be rejected" 57 + | Ok _ -> Alcotest.fail "CVE-2015-5237: truncated tag must be rejected" 50 58 51 - (* --- CVE class: tag field number 0 (reserved in proto spec). --- *) 59 + (* ================================================================= 60 + CVE-2021-22569 (protobuf-java, 2021): malicious input with many 61 + small groups causes unbounded `ByteString` allocations, amplifying 62 + memory pressure. 63 + ================================================================= *) 52 64 53 - let test_reserved_tag_zero () = 54 - (* Tag = 0, wire type = 0. Encoded as varint 0 = 0x00. *) 55 - match Protobuf.decode_string test1_codec "\x00" with 56 - | Error _ -> () 57 - | Ok _ -> Alcotest.fail "tag field=0 must be rejected" 65 + let test_cve_2021_22569_many_small_groups () = 66 + (* Many repeated small fields should decode in linear memory, not 67 + amplified. 10k unknown fields at unique tags. *) 68 + let buf = Buffer.create (6 * 10_000) in 69 + for tag = 100 to 10_099 do 70 + Protobuf.Wire.write_tag buf ~field_number:tag ~wire_type:Protobuf.Wire.Varint; 71 + Protobuf.Wire.write_int32 buf 1l 72 + done; 73 + let wire = Buffer.contents buf in 74 + match Protobuf.decode_string empty_codec wire with 75 + | Ok () -> () 76 + | Error msg -> Alcotest.failf "10k unknown fields rejected: %s" msg 58 77 59 - (* --- CVE class: unsupported wire type (3, 4, 6, 7 are reserved/deprecated). --- *) 78 + (* ================================================================= 79 + CVE-2022-1941 (protobuf-c++, 2022): null-pointer dereference when 80 + parsing an all-unknown message through a schema with no declared 81 + fields. 82 + ================================================================= *) 83 + 84 + let test_cve_2022_1941_all_unknown () = 85 + (* Emit a full [Test1] message; decode through [empty_codec] whose 86 + schema has no fields. Every field is unknown. Decoder must not 87 + crash. *) 88 + let wire = Protobuf.encode_string test1_codec { a = 42l } in 89 + match Protobuf.decode_string empty_codec wire with 90 + | Ok () -> () 91 + | Error msg -> Alcotest.failf "empty schema, all unknowns: %s" msg 92 + 93 + (* ================================================================= 94 + CVE-2022-3171 (protobuf-java, 2022): repeated group wire type 95 + (deprecated 3/4) triggers long GC pauses. 96 + ================================================================= *) 60 97 61 - let test_unsupported_wire_type () = 62 - (* Tag 1 wire type 3 (start_group, deprecated). *) 98 + let test_cve_2022_3171_group_wire_type () = 99 + (* Wire type 3 is the deprecated `SGROUP` form. Modern protobuf 100 + rejects it. *) 63 101 match Protobuf.decode_string test1_codec "\x0b\x00" with 64 102 | Error _ -> () 65 - | Ok _ -> Alcotest.fail "wire type 3 must be rejected" 103 + | Ok _ -> Alcotest.fail "CVE-2022-3171: wire type 3 must be rejected" 66 104 67 - (* --- CVE class: deep nesting DoS. 105 + let test_cve_2022_3171_group_wire_type_4 () = 106 + (* Wire type 4 is the deprecated `EGROUP` form. Must be rejected at 107 + tag-parse time regardless of field number. *) 108 + match Protobuf.decode_string test1_codec "\x0c" with 109 + | Error _ -> () 110 + | Ok _ -> Alcotest.fail "CVE-2022-3171: wire type 4 must be rejected" 68 111 69 - A malicious input with thousands of nested length-delimited fields 70 - would stack-overflow the OCaml runtime without a depth bound. The 71 - decoder now rejects at 100 nesting levels. 112 + (* ================================================================= 113 + CVE-2024-7254 (protobuf-go, 2024): deeply nested unknown groups 114 + cause stack overflow in the parser. 115 + ================================================================= *) 72 116 73 - We construct the hostile input by hand (raw bytes) rather than 74 - through a recursive codec — a self-referential codec would need a 75 - Lazy/ref trick that isn't worth baking into the public API for a 76 - single hostile-input test. *) 117 + let test_cve_2024_7254_deep_nesting_known () = 118 + (* 200 levels of declared nested messages exceeds the 100-level 119 + [max_depth] bound baked into the decoder. *) 120 + let nest_codec : unit Protobuf.t = 121 + Protobuf.fix ~default:() (fun self -> 122 + let open Protobuf.Message in 123 + finish 124 + (let* () = 125 + (* [optional 1] with payload type that is [self] (a message 126 + codec). When absent, yields [None]; we collapse to unit 127 + to keep the test type trivial. *) 128 + optional 1 (fun () -> None) self 129 + |> fun f -> 130 + let* _v = f in 131 + return () 132 + in 133 + return ())) 134 + in 135 + let rec build_wire n = 136 + if n = 0 then "" 137 + else 138 + let inner = build_wire (n - 1) in 139 + "\x0a" ^ Leb128.encode_u63_string (String.length inner) ^ inner 140 + in 141 + match Protobuf.decode_string nest_codec (build_wire 200) with 142 + | Error _ -> () 143 + | Ok () -> Alcotest.fail "CVE-2024-7254: 200-level nesting must be rejected" 77 144 78 - (* A self-referential codec built via a forward reference. The dummy 79 - slot is patched after [finish] closes over it; from then on 80 - [nest_codec.read_wire] drives itself recursively, and the depth 81 - counter observes each level. *) 145 + let test_cve_2024_7254_deep_nesting_unknown () = 146 + (* Deeply nested length-delim fields that are UNKNOWN to the schema 147 + are skipped at the outer level via [Wire.skip_field], which 148 + advances past the body without recursing. No depth bound needed; 149 + the cost is O(N) in wire length. *) 150 + let rec build_wire n = 151 + if n = 0 then "" 152 + else 153 + let inner = build_wire (n - 1) in 154 + "\x7a" (* tag 15, wire type 2 — unknown to [test1_codec] *) 155 + ^ Leb128.encode_u63_string (String.length inner) 156 + ^ inner 157 + in 158 + let wire = build_wire 500 in 159 + (* Must decode without stack overflow. *) 160 + match Protobuf.decode_string test1_codec wire with 161 + | Ok _ | Error _ -> () 82 162 83 - type nest = { inner : nest option } 163 + (* ================================================================= 164 + CVE-2024-47554 (rust prost, 2024): length-prefix overflow / DoS 165 + through crafted length fields. 166 + ================================================================= *) 167 + 168 + let test_cve_2024_47554_length_past_end () = 169 + (* Tag 14 (str), length 100, but only 2 bytes follow. *) 170 + let bad = "\x72\x64ab" in 171 + match Protobuf.decode_string test1_codec bad with 172 + | Error _ -> () 173 + | Ok _ -> Alcotest.fail "CVE-2024-47554: length past end must be rejected" 84 174 85 - let nest_codec : nest Protobuf.t = 86 - Protobuf.fix ~default:{ inner = None } (fun self -> 87 - let open Protobuf.Message in 88 - finish 89 - (let* inner = optional 1 (fun r -> r.inner) self in 90 - return { inner })) 175 + let test_cve_2024_47554_packed_corrupt_body () = 176 + let packed_codec = 177 + let open Protobuf.Message in 178 + finish 179 + (let* nums = 180 + Protobuf.Message.packed 1 181 + (fun (nums : int32 list) -> nums) 182 + Protobuf.int32 183 + in 184 + return nums) 185 + in 186 + (* Tag 1, wire 2, length 2, then \x80\x80 (unterminated varint inside 187 + the packed blob). *) 188 + let bad = "\x0a\x02\x80\x80" in 189 + match Protobuf.decode_string packed_codec bad with 190 + | Error _ -> () 191 + | Ok _ -> Alcotest.fail "CVE-2024-47554: corrupt packed body must be rejected" 91 192 92 - let test_shallow_nesting_ok () = 93 - (* A 50-level nested message via the recursive [nest_codec]. Each 94 - level exercises the depth counter. Within the 100-level bound: 95 - should decode to the expected chain. *) 96 - let rec build n = if n = 0 then { inner = None } else { inner = Some (build (n - 1)) } in 97 - let v = build 50 in 98 - let wire = Protobuf.encode_string nest_codec v in 99 - match Protobuf.decode_string nest_codec wire with 100 - | Ok v' -> Alcotest.(check bool) "roundtrip" true (v = v') 101 - | Error msg -> Alcotest.failf "50-level nest should succeed: %s" msg 193 + (* ================================================================= 194 + Generic vulnerability class: reserved tag 0 195 + (protobuf spec §3 forbids field number 0). 196 + ================================================================= *) 102 197 103 - let test_deep_nesting_rejected () = 104 - (* 200 levels exceeds the 100-level bound. *) 105 - let rec build n = if n = 0 then { inner = None } else { inner = Some (build (n - 1)) } in 106 - let v = build 200 in 107 - let wire = Protobuf.encode_string nest_codec v in 108 - match Protobuf.decode_string nest_codec wire with 198 + let test_reserved_tag_zero () = 199 + match Protobuf.decode_string test1_codec "\x00" with 109 200 | Error _ -> () 110 - | Ok _ -> Alcotest.fail "200-level nest must be rejected" 201 + | Ok _ -> Alcotest.fail "tag field=0 must be rejected" 111 202 112 - (* --- CVE class: wire type mismatch (field declared as varint, wire has 113 - length-delim). Decoder should reject cleanly. --- *) 203 + (* ================================================================= 204 + Wire type mismatch: the schema declares varint for tag 1, the wire 205 + carries length-delim. Decoder must reject instead of silently 206 + coercing. 207 + ================================================================= *) 114 208 115 209 let test_wire_type_mismatch () = 116 - (* test1's field 1 is int32 (varint). Emit it as length-delim. *) 117 - let bad = 118 - "\x0a\x00" 119 - (* tag 1 wire type 2, length 0 *) 120 - in 210 + let bad = "\x0a\x00" (* tag 1 wire type 2, length 0 *) in 121 211 match Protobuf.decode_string test1_codec bad with 122 212 | Error _ -> () 123 213 | Ok _ -> Alcotest.fail "wire type mismatch must be rejected" 124 214 125 - (* --- Empty input: decode to scalar defaults, proto3-style. --- *) 215 + (* ================================================================= 216 + Empty input: proto3 defaults must be returned for every field. 217 + ================================================================= *) 126 218 127 219 let test_empty_input () = 128 220 match Protobuf.decode_string test1_codec "" with 129 - | Error msg -> 130 - Alcotest.failf "empty input should succeed with defaults: %s" msg 221 + | Error msg -> Alcotest.failf "empty input should use defaults: %s" msg 131 222 | Ok r -> Alcotest.(check int32) "a defaults to 0" 0l r.a 132 223 133 - (* --- Decoder must not read past message boundary: trailing unused bytes 134 - cause parse_wire's end_-check to reject. --- *) 224 + (* ================================================================= 225 + Trailing unused bytes inside the message body: parse_wire's 226 + boundary check rejects. 227 + ================================================================= *) 135 228 136 229 let test_overrun_rejected () = 137 - (* A single valid field followed by a half-read varint should be 138 - caught by parse_wire's boundary check. *) 139 230 let good_prefix = Protobuf.encode_string test1_codec { a = 1l } in 140 - let with_trailer = 141 - good_prefix ^ "\x80" 142 - (* truncated continuation *) 143 - in 231 + let with_trailer = good_prefix ^ "\x80" (* truncated continuation *) in 144 232 match Protobuf.decode_string test1_codec with_trailer with 145 233 | Error _ -> () 146 234 | Ok _ -> Alcotest.fail "trailing truncated varint must be rejected" 147 235 148 - (* --- Length-delim field with length pointing past end. --- *) 236 + (* ================================================================= 237 + Malformed UTF-8 in a string field: protobuf spec §5 says strings 238 + and bytes share the wire representation, and decoders must accept 239 + non-UTF-8 byte content. No validation must be performed. 240 + ================================================================= *) 149 241 150 - let test_length_points_past_end () = 151 - (* Tag 14 (str), length 100, but only 2 bytes follow. *) 152 - let bad = "\x72\x64ab" in 153 - match Protobuf.decode_string test1_codec bad with 154 - | Error _ -> () 155 - | Ok _ -> Alcotest.fail "length past end must be rejected" 242 + type with_str = { s : string } 156 243 157 - (* --- Packed field with corrupt body: outer length is valid, inner 158 - varint stream is truncated. --- *) 244 + let with_str_codec : with_str Protobuf.t = 245 + let open Protobuf.Message in 246 + finish 247 + (let* s = required 1 (fun r -> r.s) Protobuf.string in 248 + return { s }) 159 249 160 - type packed_i32 = { xs : int32 list } 250 + let test_non_utf8_string_accepted () = 251 + let raw = "\xff\xfe\xfd" in 252 + (* Not valid UTF-8 *) 253 + let wire = Protobuf.encode_string with_str_codec { s = raw } in 254 + match Protobuf.decode_string with_str_codec wire with 255 + | Error msg -> Alcotest.failf "non-UTF-8 string must decode: %s" msg 256 + | Ok r -> 257 + Alcotest.(check string) "roundtrip" raw r.s 258 + 259 + (* ================================================================= 260 + Map with duplicate key: protobuf spec says last-wins. A malicious 261 + sender can pad the wire with many duplicate keys; decoder must 262 + handle this in linear memory. 263 + ================================================================= *) 264 + 265 + type dict = { entries : (string * int32) list } 161 266 162 - let packed_codec : packed_i32 Protobuf.t = 267 + let dict_codec : dict Protobuf.t = 163 268 let open Protobuf.Message in 164 269 finish 165 - (let* xs = Protobuf.Message.packed 1 (fun r -> r.xs) Protobuf.int32 in 166 - return { xs }) 270 + (let* entries = 271 + Protobuf.Message.map 1 (fun r -> r.entries) Protobuf.string Protobuf.int32 272 + in 273 + return { entries }) 167 274 168 - let test_packed_corrupt_body () = 169 - (* Tag 1, wire 2, length 2, then \x80\x80 (unterminated varint inside 170 - the packed blob). *) 171 - let bad = "\x0a\x02\x80\x80" in 172 - match Protobuf.decode_string packed_codec bad with 173 - | Error _ -> () 174 - | Ok _ -> Alcotest.fail "corrupt packed body must be rejected" 275 + let test_map_duplicate_keys_accepted () = 276 + (* Encode three entries, two with the same key. Decoder preserves 277 + wire order in the output list; semantic last-wins is the user's 278 + responsibility. *) 279 + let v = 280 + { entries = [ ("k", 1l); ("k", 2l); ("x", 99l); ("k", 3l) ] } 281 + in 282 + let wire = Protobuf.encode_string dict_codec v in 283 + match Protobuf.decode_string dict_codec wire with 284 + | Error msg -> Alcotest.fail msg 285 + | Ok r -> 286 + Alcotest.(check int) "entry count preserved" 4 (List.length r.entries) 175 287 176 - (* --- Huge repeated count: attacker repeats a small tag a million times. Decoder 177 - must handle it (correctness, not DoS per se: the input itself is a million 178 - bytes, so the attacker pays for the cost). This test asserts we don't 179 - quadratic-blow-up. --- *) 288 + (* ================================================================= 289 + Many repeated fields: 10 000 tags at the same field, each small. 290 + Must not scale super-linearly. 291 + ================================================================= *) 180 292 181 293 type rep = { tags : string list } 182 294 ··· 204 316 let suite = 205 317 ( "hostile", 206 318 [ 207 - Alcotest.test_case "huge length prefix DoS" `Quick test_huge_length_prefix; 208 - Alcotest.test_case "over-long varint" `Quick test_overlong_varint; 209 - Alcotest.test_case "truncated tag" `Quick test_truncated_tag; 319 + Alcotest.test_case "CVE-2015-5237 huge length prefix" `Quick 320 + test_cve_2015_5237_huge_length; 321 + Alcotest.test_case "CVE-2015-5237 over-long varint" `Quick 322 + test_cve_2015_5237_overlong_varint; 323 + Alcotest.test_case "CVE-2015-5237 truncated tag" `Quick 324 + test_cve_2015_5237_truncated_tag; 325 + Alcotest.test_case "CVE-2021-22569 many small groups" `Quick 326 + test_cve_2021_22569_many_small_groups; 327 + Alcotest.test_case "CVE-2022-1941 all-unknown schema" `Quick 328 + test_cve_2022_1941_all_unknown; 329 + Alcotest.test_case "CVE-2022-3171 group wire type 3" `Quick 330 + test_cve_2022_3171_group_wire_type; 331 + Alcotest.test_case "CVE-2022-3171 group wire type 4" `Quick 332 + test_cve_2022_3171_group_wire_type_4; 333 + Alcotest.test_case "CVE-2024-7254 deep known nesting" `Quick 334 + test_cve_2024_7254_deep_nesting_known; 335 + Alcotest.test_case "CVE-2024-7254 deep unknown nesting" `Quick 336 + test_cve_2024_7254_deep_nesting_unknown; 337 + Alcotest.test_case "CVE-2024-47554 length past end" `Quick 338 + test_cve_2024_47554_length_past_end; 339 + Alcotest.test_case "CVE-2024-47554 packed corrupt body" `Quick 340 + test_cve_2024_47554_packed_corrupt_body; 210 341 Alcotest.test_case "reserved tag 0" `Quick test_reserved_tag_zero; 211 - Alcotest.test_case "unsupported wire type" `Quick 212 - test_unsupported_wire_type; 213 - Alcotest.test_case "deep nesting rejected" `Quick test_deep_nesting_rejected; 214 - Alcotest.test_case "shallow nesting ok" `Quick test_shallow_nesting_ok; 215 342 Alcotest.test_case "wire type mismatch" `Quick test_wire_type_mismatch; 216 343 Alcotest.test_case "empty input -> defaults" `Quick test_empty_input; 217 344 Alcotest.test_case "overrun rejected" `Quick test_overrun_rejected; 218 - Alcotest.test_case "length past end" `Quick test_length_points_past_end; 219 - Alcotest.test_case "packed corrupt body" `Quick test_packed_corrupt_body; 345 + Alcotest.test_case "non-UTF-8 string accepted" `Quick 346 + test_non_utf8_string_accepted; 347 + Alcotest.test_case "map duplicate keys accepted" `Quick 348 + test_map_duplicate_keys_accepted; 220 349 Alcotest.test_case "many repeated (10k)" `Quick test_many_repeated; 221 350 ] )
+83
test/test_protobuf.ml
··· 302 302 | Error _ -> () 303 303 | Ok _ -> Alcotest.fail "truncated should be rejected" 304 304 305 + (* --- Test 11: map<string, int32> --- *) 306 + 307 + type dict = { entries : (string * int32) list } 308 + 309 + let dict_codec : dict Protobuf.t = 310 + let open Protobuf.Message in 311 + finish 312 + (let* entries = map 1 (fun r -> r.entries) Protobuf.string Protobuf.int32 in 313 + return { entries }) 314 + 315 + let test_map_string_int32 () = 316 + let v = { entries = [ ("alice", 30l); ("bob", 25l); ("", 0l) ] } in 317 + let wire = Protobuf.encode_string dict_codec v in 318 + match Protobuf.decode_string dict_codec wire with 319 + | Error msg -> Alcotest.fail msg 320 + | Ok r -> 321 + Alcotest.(check int) "entry count" 3 (List.length r.entries); 322 + Alcotest.(check (list (pair string int32))) "entries" v.entries r.entries 323 + 324 + (* --- Test 12: unknown field preservation --- *) 325 + 326 + type schema_v1 = { a : int32 } 327 + 328 + let schema_v1 : schema_v1 Protobuf.t = 329 + let open Protobuf.Message in 330 + finish 331 + (let* a = required 1 (fun r -> r.a) Protobuf.int32 in 332 + return { a }) 333 + 334 + type schema_v2 = { a : int32; b : string; c : int32 list } 335 + 336 + let schema_v2 : schema_v2 Protobuf.t = 337 + let open Protobuf.Message in 338 + finish 339 + (let* a = required 1 (fun r -> r.a) Protobuf.int32 in 340 + let* b = required 2 (fun r -> r.b) Protobuf.string in 341 + let* c = packed 3 (fun r -> r.c) Protobuf.int32 in 342 + return { a; b; c }) 343 + 344 + let test_unknown_fields_preserved () = 345 + (* Encode v2, decode with v1 capturing unknowns, re-encode, assert 346 + bytes round-trip when compared to original v2. *) 347 + let original = 348 + Protobuf.encode_string schema_v2 { a = 42l; b = "hello"; c = [ 1l; 2l; 3l ] } 349 + in 350 + match Protobuf.decode_with_unknowns_string schema_v1 original with 351 + | Error msg -> Alcotest.fail msg 352 + | Ok (v1, unknowns) -> 353 + Alcotest.(check int32) "a decoded" 42l v1.a; 354 + Alcotest.(check bool) "unknowns non-empty" true (String.length unknowns > 0); 355 + (* Re-emit via v1 + unknowns; new bytes should decode through v2 356 + to the same message. *) 357 + let reemitted = 358 + Protobuf.encode_with_unknowns_string schema_v1 ~unknowns { a = v1.a } 359 + in 360 + match Protobuf.decode_string schema_v2 reemitted with 361 + | Error msg -> Alcotest.failf "v2 re-decode failed: %s" msg 362 + | Ok v2' -> 363 + Alcotest.(check int32) "a survived" 42l v2'.a; 364 + Alcotest.(check string) "b survived" "hello" v2'.b; 365 + Alcotest.(check (list int32)) "c survived" [ 1l; 2l; 3l ] v2'.c 366 + 367 + let test_unknowns_empty_when_schema_matches () = 368 + let wire = Protobuf.encode_string schema_v1 { a = 42l } in 369 + match Protobuf.decode_with_unknowns_string schema_v1 wire with 370 + | Error msg -> Alcotest.fail msg 371 + | Ok (_, unknowns) -> 372 + Alcotest.(check int) "no unknowns" 0 (String.length unknowns) 373 + 374 + let test_map_empty () = 375 + let v = { entries = [] } in 376 + let wire = Protobuf.encode_string dict_codec v in 377 + Alcotest.(check int) "empty wire" 0 (String.length wire); 378 + match Protobuf.decode_string dict_codec wire with 379 + | Error msg -> Alcotest.fail msg 380 + | Ok r -> Alcotest.(check (list (pair string int32))) "entries" [] r.entries 381 + 305 382 let suite = 306 383 ( "protobuf", 307 384 [ ··· 322 399 test_unknown_field_skipped; 323 400 Alcotest.test_case "out-of-order fields" `Quick test_fields_out_of_order; 324 401 Alcotest.test_case "truncated rejected" `Quick test_truncated_rejected; 402 + Alcotest.test_case "map<string,int32>" `Quick test_map_string_int32; 403 + Alcotest.test_case "map empty" `Quick test_map_empty; 404 + Alcotest.test_case "unknown fields preserved" `Quick 405 + test_unknown_fields_preserved; 406 + Alcotest.test_case "unknowns empty when schema matches" `Quick 407 + test_unknowns_empty_when_schema_matches; 325 408 ] )