My working unpac space for OCaml projects in development
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

Add DAG-CBOR codec with spec-compliant decoding

- Add dagcbort library implementing IPLD DAG-CBOR codec
- Enforce all DAG-CBOR spec requirements: tag 42 only, string map keys,
canonical sorting, 64-bit floats, no NaN/Infinity, no indefinite-length
- Add trailing data rejection per spec ("single object required")
- Use structured Decode_error exception for clean error handling
- Improve cbort object decoding dispatch
- Add comprehensive test suites for both cbort and dagcbort

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

+2102 -21
+33 -21
lib/cbort.ml
··· 326 326 327 327 let make_decoder reader = 328 328 { reader; 329 - slice = Bytes.Slice.make (Stdlib.Bytes.create 0) ~first:0 ~length:0; 329 + slice = Bytes.Slice.eod; 330 330 pos = 0; 331 331 byte_count = 0 } 332 332 ··· 601 601 if major <> major_map then dec_type_error (Object map) ~fnd:major; 602 602 ignore (read_byte d); 603 603 let len = read_arg d ai in 604 - (* Read all members into an association list *) 605 604 let dict = Dict.empty in 606 605 let mem_miss = map.mem_decs in 607 - decode_object_members map d len String_map.empty mem_miss dict 606 + (* Dispatch based on object shape *) 607 + match map.shape with 608 + | Object_basic Unknown_skip -> 609 + decode_object_basic map d len Unknown_skip () mem_miss dict 610 + | Object_basic Unknown_error -> 611 + decode_object_basic map d len Unknown_error () mem_miss dict 612 + | Object_basic (Unknown_keep (mems_map, enc)) -> 613 + let builder = mems_map.dec_empty () in 614 + decode_object_basic map d len (Unknown_keep (mems_map, enc)) builder mem_miss dict 615 + | Object_cases _ -> 616 + (* TODO: implement tagged union decoding *) 617 + Jsont.Error.msgf Jsont.Meta.none "CBOR object cases not implemented" 608 618 609 - and decode_object_members : type o. 619 + and decode_object_basic : type o p mems builder. 610 620 (o, o) object_map -> decoder -> int -> 611 - mem_dec String_map.t -> 612 - mem_dec String_map.t -> 613 - Dict.t -> o 621 + (p, mems, builder) unknown_mems -> builder -> 622 + mem_dec String_map.t -> Dict.t -> o 614 623 = 615 - fun map d remaining found mem_miss dict -> 616 - if remaining <= 0 then begin 617 - (* Check for missing required members *) 618 - finish_object_decode map Jsont.Meta.none 619 - Unknown_skip () mem_miss dict 624 + fun map d remaining umems builder mem_miss dict -> 625 + if remaining <= 0 then 626 + finish_object_decode map Jsont.Meta.none umems builder mem_miss dict 620 627 |> apply_dict map.dec 621 - end else begin 622 - (* Read key *) 628 + else begin 623 629 let key_major, key_ai = peek_type d in 624 630 if key_major <> major_text then 625 631 Jsont.Error.msgf Jsont.Meta.none "CBOR map key must be text string"; 626 632 ignore (read_byte d); 627 633 let key_len = read_arg d key_ai in 628 634 let key = read_bytes_to_string d key_len in 629 - (* Find member decoder *) 630 635 match String_map.find_opt key map.mem_decs with 631 636 | Some (Mem_dec m) -> 632 637 let dict = 633 638 try Dict.add m.id (decode m.type' d) dict 634 639 with Jsont.Error e -> 635 - error_push_object Jsont.Meta.none map 636 - (key, Jsont.Meta.none) e 640 + error_push_object Jsont.Meta.none map (key, Jsont.Meta.none) e 637 641 in 638 642 let mem_miss = String_map.remove key mem_miss in 639 - decode_object_members map d (remaining - 1) found mem_miss dict 643 + decode_object_basic map d (remaining - 1) umems builder mem_miss dict 640 644 | None -> 641 - (* Unknown member - skip it *) 642 - skip_value d; 643 - decode_object_members map d (remaining - 1) found mem_miss dict 645 + (* Handle unknown member based on policy *) 646 + match umems with 647 + | Unknown_skip -> 648 + skip_value d; 649 + decode_object_basic map d (remaining - 1) umems builder mem_miss dict 650 + | Unknown_error -> 651 + Jsont.Error.msgf Jsont.Meta.none "Unknown member %S in object" key 652 + | Unknown_keep (mems_map, enc) -> 653 + let v = decode mems_map.mems_type d in 654 + let builder = mems_map.dec_add Jsont.Meta.none key v builder in 655 + decode_object_basic map d (remaining - 1) (Unknown_keep (mems_map, enc)) builder mem_miss dict 644 656 end 645 657 646 658 and decode_any : type a.
+656
lib/dagcbort.ml
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + (* DAG-CBOR encoder/decoder for IPLD data model. 7 + Implements strict DAG-CBOR as per https://ipld.io/specs/codecs/dag-cbor/spec/ *) 8 + 9 + open Bytesrw 10 + 11 + (* CBOR constants *) 12 + let major_uint = 0 13 + let major_nint = 1 14 + let major_bytes = 2 15 + let major_text = 3 16 + let major_array = 4 17 + let major_map = 5 18 + let _major_tag = 6 19 + let major_simple = 7 20 + 21 + let simple_false = 20 22 + let simple_true = 21 23 + let simple_null = 22 24 + 25 + let ai_1byte = 24 26 + let ai_2byte = 25 27 + let ai_4byte = 26 28 + let ai_8byte = 27 29 + let ai_indefinite = 31 30 + 31 + let tag_cid = 42 32 + 33 + (* CID module *) 34 + module Cid = struct 35 + type t = string (* Raw CID bytes without multibase prefix *) 36 + 37 + let of_bytes s = s 38 + let to_bytes t = t 39 + 40 + let equal = String.equal 41 + let compare = String.compare 42 + 43 + let pp ppf t = 44 + Format.fprintf ppf "cid:"; 45 + String.iter (fun c -> Format.fprintf ppf "%02x" (Char.code c)) t 46 + end 47 + 48 + (* IPLD value type *) 49 + type t = 50 + | Null 51 + | Bool of bool 52 + | Int of int64 53 + | Float of float 54 + | String of string 55 + | Bytes of string 56 + | Link of Cid.t 57 + | List of t list 58 + | Map of (string * t) list 59 + 60 + (* Errors *) 61 + type error = 62 + | Invalid_tag of int 63 + | Invalid_map_key 64 + | Invalid_float of string 65 + | Unsorted_map_keys 66 + | Non_canonical_int 67 + | Non_canonical_float 68 + | Indefinite_length 69 + | Invalid_cid of string 70 + | Trailing_data 71 + | Cbor_error of string 72 + 73 + let pp_error ppf = function 74 + | Invalid_tag n -> Format.fprintf ppf "Invalid CBOR tag %d (only tag 42 allowed in DAG-CBOR)" n 75 + | Invalid_map_key -> Format.fprintf ppf "Map keys must be strings in DAG-CBOR" 76 + | Invalid_float s -> Format.fprintf ppf "Invalid float value: %s (not allowed in DAG-CBOR)" s 77 + | Unsorted_map_keys -> Format.fprintf ppf "Map keys must be sorted in canonical order" 78 + | Non_canonical_int -> Format.fprintf ppf "Integer not in shortest encoding" 79 + | Non_canonical_float -> Format.fprintf ppf "Float must be 64-bit in DAG-CBOR" 80 + | Indefinite_length -> Format.fprintf ppf "Indefinite-length encoding not allowed in DAG-CBOR" 81 + | Invalid_cid s -> Format.fprintf ppf "Invalid CID: %s" s 82 + | Trailing_data -> Format.fprintf ppf "Trailing data after CBOR value (single object required)" 83 + | Cbor_error s -> Format.fprintf ppf "CBOR error: %s" s 84 + 85 + let error_to_string e = 86 + Format.asprintf "%a" pp_error e 87 + 88 + (* Internal decode exception for structured error handling *) 89 + exception Decode_error of error 90 + 91 + (* Encoder *) 92 + type encoder = { 93 + writer : Bytes.Writer.t; 94 + buf : bytes; 95 + mutable buf_pos : int; 96 + } 97 + 98 + let encoder_buf_size = 4096 99 + 100 + let make_encoder writer = 101 + { writer; buf = Stdlib.Bytes.create encoder_buf_size; buf_pos = 0 } 102 + 103 + let flush_encoder e = 104 + if e.buf_pos > 0 then begin 105 + let slice = Bytes.Slice.make e.buf ~first:0 ~length:e.buf_pos in 106 + Bytes.Writer.write e.writer slice; 107 + e.buf_pos <- 0 108 + end 109 + 110 + let ensure_space e n = 111 + if e.buf_pos + n > Stdlib.Bytes.length e.buf then flush_encoder e 112 + 113 + let write_byte e b = 114 + ensure_space e 1; 115 + Stdlib.Bytes.set_uint8 e.buf e.buf_pos b; 116 + e.buf_pos <- e.buf_pos + 1 117 + 118 + let write_bytes e bs = 119 + let len = String.length bs in 120 + ensure_space e len; 121 + Stdlib.Bytes.blit_string bs 0 e.buf e.buf_pos len; 122 + e.buf_pos <- e.buf_pos + len 123 + 124 + let write_u16_be e v = 125 + ensure_space e 2; 126 + Stdlib.Bytes.set_uint16_be e.buf e.buf_pos v; 127 + e.buf_pos <- e.buf_pos + 2 128 + 129 + let write_u32_be e v = 130 + ensure_space e 4; 131 + Stdlib.Bytes.set_int32_be e.buf e.buf_pos v; 132 + e.buf_pos <- e.buf_pos + 4 133 + 134 + let write_u64_be e v = 135 + ensure_space e 8; 136 + Stdlib.Bytes.set_int64_be e.buf e.buf_pos v; 137 + e.buf_pos <- e.buf_pos + 8 138 + 139 + (* Write CBOR type header with shortest encoding *) 140 + let write_type_arg e major arg = 141 + let h = major lsl 5 in 142 + if arg <= 23 then 143 + write_byte e (h lor arg) 144 + else if arg <= 0xff then begin 145 + write_byte e (h lor ai_1byte); 146 + write_byte e arg 147 + end else if arg <= 0xffff then begin 148 + write_byte e (h lor ai_2byte); 149 + write_u16_be e arg 150 + end else if Int64.(compare (of_int arg) 0xffffffffL) <= 0 then begin 151 + write_byte e (h lor ai_4byte); 152 + write_u32_be e (Int32.of_int arg) 153 + end else begin 154 + write_byte e (h lor ai_8byte); 155 + write_u64_be e (Int64.of_int arg) 156 + end 157 + 158 + let write_type_arg64 e major arg = 159 + let h = major lsl 5 in 160 + if Int64.compare arg 24L < 0 then 161 + write_byte e (h lor Int64.to_int arg) 162 + else if Int64.compare arg 0x100L < 0 then begin 163 + write_byte e (h lor ai_1byte); 164 + write_byte e (Int64.to_int arg) 165 + end else if Int64.compare arg 0x10000L < 0 then begin 166 + write_byte e (h lor ai_2byte); 167 + write_u16_be e (Int64.to_int arg) 168 + end else if Int64.compare arg 0x100000000L < 0 then begin 169 + write_byte e (h lor ai_4byte); 170 + write_u32_be e (Int64.to_int32 arg) 171 + end else begin 172 + write_byte e (h lor ai_8byte); 173 + write_u64_be e arg 174 + end 175 + 176 + (* Canonical map key comparison: sort by length first, then lexicographically *) 177 + let canonical_key_compare (k1, _) (k2, _) = 178 + let len1 = String.length k1 and len2 = String.length k2 in 179 + if len1 <> len2 then compare len1 len2 180 + else String.compare k1 k2 181 + 182 + (* Encode IPLD value *) 183 + let rec encode_value e = function 184 + | Null -> 185 + write_byte e ((major_simple lsl 5) lor simple_null) 186 + | Bool b -> 187 + let v = if b then simple_true else simple_false in 188 + write_byte e ((major_simple lsl 5) lor v) 189 + | Int i -> 190 + if Int64.compare i 0L >= 0 then 191 + write_type_arg64 e major_uint i 192 + else 193 + (* Negative: encode as -1-n *) 194 + write_type_arg64 e major_nint (Int64.sub (Int64.neg i) 1L) 195 + | Float f -> 196 + (* DAG-CBOR requires 64-bit floats *) 197 + write_byte e ((major_simple lsl 5) lor ai_8byte); 198 + write_u64_be e (Int64.bits_of_float f) 199 + | String s -> 200 + write_type_arg e major_text (String.length s); 201 + write_bytes e s 202 + | Bytes b -> 203 + write_type_arg e major_bytes (String.length b); 204 + write_bytes e b 205 + | Link cid -> 206 + (* CID: tag 42 + bytes with 0x00 multibase prefix *) 207 + write_byte e 0xd8; (* tag with 1-byte argument *) 208 + write_byte e tag_cid; 209 + let cid_bytes = Cid.to_bytes cid in 210 + write_type_arg e major_bytes (String.length cid_bytes + 1); 211 + write_byte e 0x00; (* multibase identity prefix *) 212 + write_bytes e cid_bytes 213 + | List items -> 214 + write_type_arg e major_array (List.length items); 215 + List.iter (encode_value e) items 216 + | Map entries -> 217 + (* Sort keys in canonical order *) 218 + let sorted = List.sort canonical_key_compare entries in 219 + write_type_arg e major_map (List.length sorted); 220 + List.iter (fun (k, v) -> 221 + write_type_arg e major_text (String.length k); 222 + write_bytes e k; 223 + encode_value e v 224 + ) sorted 225 + 226 + let encode_to_writer v ~eod writer = 227 + (* Validate no NaN/Infinity *) 228 + let rec validate = function 229 + | Float f when Float.is_nan f -> Error (Invalid_float "NaN") 230 + | Float f when Float.is_infinite f -> 231 + Error (Invalid_float (if f > 0. then "Infinity" else "-Infinity")) 232 + | List items -> 233 + List.fold_left (fun acc v -> 234 + match acc with Error _ -> acc | Ok () -> validate v 235 + ) (Ok ()) items 236 + | Map entries -> 237 + List.fold_left (fun acc (_, v) -> 238 + match acc with Error _ -> acc | Ok () -> validate v 239 + ) (Ok ()) entries 240 + | _ -> Ok () 241 + in 242 + match validate v with 243 + | Error e -> Error e 244 + | Ok () -> 245 + let e = make_encoder writer in 246 + encode_value e v; 247 + flush_encoder e; 248 + if eod then Bytes.Writer.write e.writer Bytes.Slice.eod; 249 + Ok () 250 + 251 + let encode v ~eod w = encode_to_writer v ~eod w 252 + 253 + let encode_string v = 254 + let buf = Buffer.create 256 in 255 + let w = Bytes.Writer.of_buffer buf in 256 + match encode v ~eod:false w with 257 + | Error e -> Error e 258 + | Ok () -> Ok (Buffer.contents buf) 259 + 260 + (* Decoder *) 261 + type decoder = { 262 + reader : Bytes.Reader.t; 263 + mutable slice : Bytes.Slice.t; 264 + mutable pos : int; 265 + mutable byte_count : int; 266 + strict : bool; 267 + } 268 + 269 + let make_decoder ?(strict=true) reader = 270 + { reader; slice = Bytes.Slice.eod; pos = 0; byte_count = 0; strict } 271 + 272 + let decoder_refill d = 273 + d.slice <- Bytes.Reader.read d.reader; 274 + d.pos <- Bytes.Slice.first d.slice 275 + 276 + let available d = 277 + Bytes.Slice.length d.slice - (d.pos - Bytes.Slice.first d.slice) 278 + 279 + let read_byte d = 280 + if available d = 0 then decoder_refill d; 281 + if available d = 0 then 282 + raise (Decode_error (Cbor_error "Unexpected end of DAG-CBOR data")); 283 + let b = Stdlib.Bytes.get_uint8 (Bytes.Slice.bytes d.slice) d.pos in 284 + d.pos <- d.pos + 1; 285 + d.byte_count <- d.byte_count + 1; 286 + b 287 + 288 + let read_u16_be d = 289 + if available d < 2 then begin 290 + let b1 = read_byte d in 291 + let b2 = read_byte d in 292 + (b1 lsl 8) lor b2 293 + end else begin 294 + let v = Stdlib.Bytes.get_uint16_be (Bytes.Slice.bytes d.slice) d.pos in 295 + d.pos <- d.pos + 2; 296 + d.byte_count <- d.byte_count + 2; 297 + v 298 + end 299 + 300 + let read_u32_be d = 301 + if available d < 4 then begin 302 + let b1 = read_byte d in 303 + let b2 = read_byte d in 304 + let b3 = read_byte d in 305 + let b4 = read_byte d in 306 + (b1 lsl 24) lor (b2 lsl 16) lor (b3 lsl 8) lor b4 307 + end else begin 308 + let v = Stdlib.Bytes.get_int32_be (Bytes.Slice.bytes d.slice) d.pos in 309 + d.pos <- d.pos + 4; 310 + d.byte_count <- d.byte_count + 4; 311 + Int32.to_int v 312 + end 313 + 314 + let read_u64_be d = 315 + if available d < 8 then begin 316 + let hi = Int64.of_int (read_u32_be d) in 317 + let lo = Int64.of_int32 (Int32.of_int (read_u32_be d)) in 318 + Int64.(logor (shift_left hi 32) (logand lo 0xffffffffL)) 319 + end else begin 320 + let v = Stdlib.Bytes.get_int64_be (Bytes.Slice.bytes d.slice) d.pos in 321 + d.pos <- d.pos + 8; 322 + d.byte_count <- d.byte_count + 8; 323 + v 324 + end 325 + 326 + let read_bytes_to_string d len = 327 + let buf = Stdlib.Bytes.create len in 328 + let rec fill offset remaining = 329 + if remaining <= 0 then () 330 + else begin 331 + if available d = 0 then decoder_refill d; 332 + let avail = available d in 333 + if avail = 0 then 334 + raise (Decode_error (Cbor_error "Unexpected end of DAG-CBOR data")); 335 + let take = min avail remaining in 336 + Stdlib.Bytes.blit (Bytes.Slice.bytes d.slice) d.pos buf offset take; 337 + d.pos <- d.pos + take; 338 + d.byte_count <- d.byte_count + take; 339 + fill (offset + take) (remaining - take) 340 + end 341 + in 342 + fill 0 len; 343 + Stdlib.Bytes.unsafe_to_string buf 344 + 345 + (* Read CBOR argument, checking for canonical encoding if strict *) 346 + let read_arg d ai = 347 + if ai <= 23 then ai 348 + else if ai = ai_1byte then begin 349 + let v = read_byte d in 350 + if d.strict && v < 24 then raise (Decode_error Non_canonical_int); 351 + v 352 + end 353 + else if ai = ai_2byte then begin 354 + let v = read_u16_be d in 355 + if d.strict && v <= 0xff then raise (Decode_error Non_canonical_int); 356 + v 357 + end 358 + else if ai = ai_4byte then begin 359 + let v = read_u32_be d in 360 + if d.strict && v <= 0xffff then raise (Decode_error Non_canonical_int); 361 + v 362 + end 363 + else if ai = ai_8byte then begin 364 + let v = Int64.to_int (read_u64_be d) in 365 + if d.strict && v <= 0xffffffff then raise (Decode_error Non_canonical_int); 366 + v 367 + end 368 + else if ai = ai_indefinite then 369 + raise (Decode_error Indefinite_length) 370 + else 371 + raise (Decode_error (Cbor_error (Printf.sprintf "Invalid additional info: %d" ai))) 372 + 373 + let read_arg64 d ai = 374 + if ai <= 23 then Int64.of_int ai 375 + else if ai = ai_1byte then begin 376 + let v = read_byte d in 377 + if d.strict && v < 24 then raise (Decode_error Non_canonical_int); 378 + Int64.of_int v 379 + end 380 + else if ai = ai_2byte then begin 381 + let v = read_u16_be d in 382 + if d.strict && v <= 0xff then raise (Decode_error Non_canonical_int); 383 + Int64.of_int v 384 + end 385 + else if ai = ai_4byte then begin 386 + let v = read_u32_be d in 387 + if d.strict && Int64.compare (Int64.of_int v) 0x10000L <= 0 then 388 + raise (Decode_error Non_canonical_int); 389 + Int64.of_int v 390 + end 391 + else if ai = ai_8byte then begin 392 + let v = read_u64_be d in 393 + if d.strict && Int64.compare v 0x100000000L <= 0 then 394 + raise (Decode_error Non_canonical_int); 395 + v 396 + end 397 + else if ai = ai_indefinite then 398 + raise (Decode_error Indefinite_length) 399 + else 400 + raise (Decode_error (Cbor_error (Printf.sprintf "Invalid additional info: %d" ai))) 401 + 402 + (* Check if map keys are in canonical order *) 403 + let check_key_order d prev_key new_key = 404 + if d.strict then begin 405 + let cmp = canonical_key_compare (prev_key, Null) (new_key, Null) in 406 + if cmp >= 0 then raise (Decode_error Unsorted_map_keys) 407 + end 408 + 409 + (* Decode IPLD value *) 410 + let rec decode_value d = 411 + let b = read_byte d in 412 + let major = b lsr 5 in 413 + let ai = b land 0x1f in 414 + 415 + if ai = ai_indefinite && major <> major_simple then 416 + raise (Decode_error Indefinite_length); 417 + 418 + match major with 419 + | 0 -> (* unsigned int *) 420 + Int (read_arg64 d ai) 421 + | 1 -> (* negative int *) 422 + let v = read_arg64 d ai in 423 + Int (Int64.sub (Int64.neg v) 1L) 424 + | 2 -> (* byte string *) 425 + let len = read_arg d ai in 426 + Bytes (read_bytes_to_string d len) 427 + | 3 -> (* text string *) 428 + let len = read_arg d ai in 429 + String (read_bytes_to_string d len) 430 + | 4 -> (* array *) 431 + let len = read_arg d ai in 432 + let items = List.init len (fun _ -> decode_value d) in 433 + List items 434 + | 5 -> (* map *) 435 + let len = read_arg d ai in 436 + let rec read_entries i prev_key acc = 437 + if i >= len then List.rev acc 438 + else begin 439 + (* Read key - must be text string *) 440 + let kb = read_byte d in 441 + let kmajor = kb lsr 5 in 442 + let kai = kb land 0x1f in 443 + if kmajor <> major_text then 444 + raise (Decode_error Invalid_map_key); 445 + let klen = read_arg d kai in 446 + let key = read_bytes_to_string d klen in 447 + (* Check canonical ordering *) 448 + (match prev_key with 449 + | Some pk -> check_key_order d pk key 450 + | None -> ()); 451 + let value = decode_value d in 452 + read_entries (i + 1) (Some key) ((key, value) :: acc) 453 + end 454 + in 455 + Map (read_entries 0 None []) 456 + | 6 -> (* tag *) 457 + let tag = read_arg d ai in 458 + if tag <> tag_cid then 459 + raise (Decode_error (Invalid_tag tag)); 460 + (* CID: expect byte string with 0x00 prefix *) 461 + let vb = read_byte d in 462 + let vmajor = vb lsr 5 in 463 + let vai = vb land 0x1f in 464 + if vmajor <> major_bytes then 465 + raise (Decode_error (Invalid_cid "must be encoded as byte string")); 466 + let len = read_arg d vai in 467 + if len < 1 then 468 + raise (Decode_error (Invalid_cid "too short")); 469 + let prefix = read_byte d in 470 + if prefix <> 0x00 then 471 + raise (Decode_error (Invalid_cid (Printf.sprintf "must have 0x00 multibase prefix, got 0x%02x" prefix))); 472 + let cid_bytes = read_bytes_to_string d (len - 1) in 473 + Link (Cid.of_bytes cid_bytes) 474 + | 7 -> (* simple/float *) 475 + (match ai with 476 + | 20 -> Bool false 477 + | 21 -> Bool true 478 + | 22 -> Null 479 + | 23 -> raise (Decode_error (Invalid_float "undefined")) 480 + | 25 -> (* half-precision float - not allowed in strict DAG-CBOR *) 481 + if d.strict then 482 + raise (Decode_error Non_canonical_float); 483 + let bits = read_u16_be d in 484 + let sign = if bits land 0x8000 <> 0 then -1.0 else 1.0 in 485 + let exp = (bits lsr 10) land 0x1f in 486 + let mant = bits land 0x3ff in 487 + let f = 488 + if exp = 0 then sign *. Float.ldexp (float_of_int mant) (-24) 489 + else if exp = 31 then 490 + if mant = 0 then sign *. Float.infinity else Float.nan 491 + else sign *. Float.ldexp (float_of_int (mant + 1024)) (exp - 25) 492 + in 493 + if Float.is_nan f then raise (Decode_error (Invalid_float "NaN")); 494 + if Float.is_infinite f then raise (Decode_error (Invalid_float "Infinity")); 495 + Float f 496 + | 26 -> (* single-precision float - not allowed in strict DAG-CBOR *) 497 + if d.strict then 498 + raise (Decode_error Non_canonical_float); 499 + let bits = Int32.of_int (read_u32_be d) in 500 + let f = Int32.float_of_bits bits in 501 + if Float.is_nan f then raise (Decode_error (Invalid_float "NaN")); 502 + if Float.is_infinite f then raise (Decode_error (Invalid_float "Infinity")); 503 + Float f 504 + | 27 -> (* double-precision float *) 505 + let bits = read_u64_be d in 506 + let f = Int64.float_of_bits bits in 507 + if Float.is_nan f then raise (Decode_error (Invalid_float "NaN")); 508 + if Float.is_infinite f then raise (Decode_error (Invalid_float "Infinity")); 509 + Float f 510 + | _ -> 511 + raise (Decode_error (Cbor_error (Printf.sprintf "Simple value %d not allowed in DAG-CBOR" ai)))) 512 + | _ -> 513 + raise (Decode_error (Cbor_error (Printf.sprintf "Unknown CBOR major type: %d" major))) 514 + 515 + let decode ?(strict=true) reader = 516 + let d = make_decoder ~strict reader in 517 + decoder_refill d; 518 + try 519 + let v = decode_value d in 520 + (* DAG-CBOR requires single top-level object with no trailing data *) 521 + if d.strict then begin 522 + (* Check if there's data remaining in current slice *) 523 + if available d > 0 then 524 + raise (Decode_error Trailing_data); 525 + (* Check if reader has more data *) 526 + decoder_refill d; 527 + if available d > 0 then 528 + raise (Decode_error Trailing_data) 529 + end; 530 + Ok v 531 + with 532 + | Decode_error e -> Error e 533 + | e -> Error (Cbor_error (Printexc.to_string e)) 534 + 535 + let decode_string ?(strict=true) s = 536 + let r = Bytes.Reader.of_string s in 537 + decode ~strict r 538 + 539 + (* Equality and comparison *) 540 + let rec equal a b = match a, b with 541 + | Null, Null -> true 542 + | Bool a, Bool b -> a = b 543 + | Int a, Int b -> Int64.equal a b 544 + | Float a, Float b -> Float.equal a b 545 + | String a, String b -> String.equal a b 546 + | Bytes a, Bytes b -> String.equal a b 547 + | Link a, Link b -> Cid.equal a b 548 + | List a, List b -> List.equal equal a b 549 + | Map a, Map b -> 550 + List.length a = List.length b && 551 + List.for_all2 (fun (k1, v1) (k2, v2) -> k1 = k2 && equal v1 v2) a b 552 + | _ -> false 553 + 554 + let rec compare a b = match a, b with 555 + | Null, Null -> 0 556 + | Null, _ -> -1 | _, Null -> 1 557 + | Bool a, Bool b -> Stdlib.compare a b 558 + | Bool _, _ -> -1 | _, Bool _ -> 1 559 + | Int a, Int b -> Int64.compare a b 560 + | Int _, _ -> -1 | _, Int _ -> 1 561 + | Float a, Float b -> Float.compare a b 562 + | Float _, _ -> -1 | _, Float _ -> 1 563 + | String a, String b -> String.compare a b 564 + | String _, _ -> -1 | _, String _ -> 1 565 + | Bytes a, Bytes b -> String.compare a b 566 + | Bytes _, _ -> -1 | _, Bytes _ -> 1 567 + | Link a, Link b -> Cid.compare a b 568 + | Link _, _ -> -1 | _, Link _ -> 1 569 + | List a, List b -> List.compare compare a b 570 + | List _, _ -> -1 | _, List _ -> 1 571 + | Map a, Map b -> 572 + let cmp_entry (k1, v1) (k2, v2) = 573 + let c = String.compare k1 k2 in 574 + if c <> 0 then c else compare v1 v2 575 + in 576 + List.compare cmp_entry a b 577 + 578 + (* Pretty printer *) 579 + let rec pp ppf = function 580 + | Null -> Format.fprintf ppf "null" 581 + | Bool b -> Format.fprintf ppf "%b" b 582 + | Int i -> Format.fprintf ppf "%Ld" i 583 + | Float f -> Format.fprintf ppf "%g" f 584 + | String s -> Format.fprintf ppf "%S" s 585 + | Bytes b -> 586 + Format.fprintf ppf "bytes("; 587 + String.iter (fun c -> Format.fprintf ppf "%02x" (Char.code c)) b; 588 + Format.fprintf ppf ")" 589 + | Link cid -> Cid.pp ppf cid 590 + | List items -> 591 + Format.fprintf ppf "[%a]" 592 + (Format.pp_print_list ~pp_sep:(fun ppf () -> Format.fprintf ppf ", ") pp) items 593 + | Map entries -> 594 + Format.fprintf ppf "{%a}" 595 + (Format.pp_print_list ~pp_sep:(fun ppf () -> Format.fprintf ppf ", ") 596 + (fun ppf (k, v) -> Format.fprintf ppf "%S: %a" k pp v)) entries 597 + 598 + (* JSON interop *) 599 + let rec of_json (j : Jsont.json) : (t, string) result = 600 + match j with 601 + | Jsont.Null _ -> Ok Null 602 + | Jsont.Bool (b, _) -> Ok (Bool b) 603 + | Jsont.Number (f, _) -> 604 + if Float.is_nan f then Error "NaN not allowed in IPLD" 605 + else if Float.is_infinite f then Error "Infinity not allowed in IPLD" 606 + else 607 + let i = Int64.of_float f in 608 + if Float.equal (Int64.to_float i) f then Ok (Int i) 609 + else Ok (Float f) 610 + | Jsont.String (s, _) -> Ok (String s) 611 + | Jsont.Array (items, _) -> 612 + let rec conv acc = function 613 + | [] -> Ok (List (List.rev acc)) 614 + | h :: t -> 615 + match of_json h with 616 + | Error e -> Error e 617 + | Ok v -> conv (v :: acc) t 618 + in 619 + conv [] items 620 + | Jsont.Object (mems, _) -> 621 + let rec conv acc = function 622 + | [] -> Ok (Map (List.sort canonical_key_compare (List.rev acc))) 623 + | ((name, _), v) :: t -> 624 + match of_json v with 625 + | Error e -> Error e 626 + | Ok v' -> conv ((name, v') :: acc) t 627 + in 628 + conv [] mems 629 + 630 + let rec to_json (v : t) : (Jsont.json, string) result = 631 + match v with 632 + | Null -> Ok (Jsont.Json.null ()) 633 + | Bool b -> Ok (Jsont.Json.bool b) 634 + | Int i -> Ok (Jsont.Json.number (Int64.to_float i)) 635 + | Float f -> Ok (Jsont.Json.number f) 636 + | String s -> Ok (Jsont.Json.string s) 637 + | Bytes _ -> Error "Bytes cannot be represented in JSON" 638 + | Link _ -> Error "Link (CID) cannot be represented in JSON" 639 + | List items -> 640 + let rec conv acc = function 641 + | [] -> Ok (Jsont.Json.list (List.rev acc)) 642 + | h :: t -> 643 + match to_json h with 644 + | Error e -> Error e 645 + | Ok v -> conv (v :: acc) t 646 + in 647 + conv [] items 648 + | Map entries -> 649 + let rec conv acc = function 650 + | [] -> Ok (Jsont.Json.object' (List.rev acc)) 651 + | (k, v) :: t -> 652 + match to_json v with 653 + | Error e -> Error e 654 + | Ok v' -> conv (Jsont.Json.mem (Jsont.Json.name k) v' :: acc) t 655 + in 656 + conv [] entries
+163
lib/dagcbort.mli
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + (** DAG-CBOR codec for IPLD data. 7 + 8 + This module provides DAG-CBOR encoding and decoding for the IPLD data model. 9 + DAG-CBOR is a strict subset of CBOR designed for content-addressed data 10 + structures in IPLD (InterPlanetary Linked Data). 11 + 12 + {2 Key Differences from CBOR} 13 + 14 + DAG-CBOR enforces: 15 + - Only tag 42 (CID links) is allowed; other tags are rejected 16 + - Map keys must be strings (no integer keys) 17 + - Floats are always encoded as 64-bit 18 + - No NaN, Infinity, or -Infinity values 19 + - Deterministic encoding (shortest integers, sorted map keys) 20 + - No indefinite-length encoding 21 + 22 + {2 IPLD Data Model} 23 + 24 + The IPLD data model extends JSON with: 25 + - {b Bytes}: Binary data (CBOR major type 2) 26 + - {b Link}: Content identifiers (CIDs) encoded with tag 42 27 + 28 + {2 Quick Start} 29 + 30 + {[ 31 + open Dagcbort 32 + 33 + (* Create an IPLD value *) 34 + let value = Map [ 35 + ("name", String "Alice"); 36 + ("data", Bytes "\x01\x02\x03"); 37 + ("link", Link (Cid.of_bytes some_cid_bytes)); 38 + ] 39 + 40 + (* Encode to DAG-CBOR *) 41 + let cbor_bytes = encode_string value 42 + 43 + (* Decode from DAG-CBOR *) 44 + let decoded = decode_string cbor_bytes 45 + ]} 46 + *) 47 + 48 + (** {1 IPLD Data Model} *) 49 + 50 + (** Content Identifier (CID) - opaque bytes representing a content address. 51 + 52 + CIDs in DAG-CBOR are encoded as CBOR tag 42 containing a byte string 53 + with a 0x00 multibase identity prefix. *) 54 + module Cid : sig 55 + type t 56 + (** An opaque CID value. *) 57 + 58 + val of_bytes : string -> t 59 + (** [of_bytes s] creates a CID from raw bytes (without multibase prefix). 60 + The bytes should be a valid CID encoding. *) 61 + 62 + val to_bytes : t -> string 63 + (** [to_bytes cid] returns the raw CID bytes (without multibase prefix). *) 64 + 65 + val equal : t -> t -> bool 66 + (** [equal a b] is [true] iff CIDs [a] and [b] are identical. *) 67 + 68 + val compare : t -> t -> int 69 + (** [compare a b] is a total ordering on CIDs. *) 70 + 71 + val pp : Format.formatter -> t -> unit 72 + (** [pp ppf cid] pretty-prints [cid] as hex. *) 73 + end 74 + 75 + (** IPLD value type representing the full IPLD data model. *) 76 + type t = 77 + | Null 78 + | Bool of bool 79 + | Int of int64 80 + | Float of float 81 + | String of string 82 + | Bytes of string (** Binary data *) 83 + | Link of Cid.t (** Content identifier link *) 84 + | List of t list 85 + | Map of (string * t) list (** Keys must be strings, stored sorted *) 86 + 87 + (** {1 Errors} *) 88 + 89 + type error = 90 + | Invalid_tag of int 91 + (** CBOR tag other than 42 encountered *) 92 + | Invalid_map_key 93 + (** Map key is not a string *) 94 + | Invalid_float of string 95 + (** NaN, Infinity, or -Infinity encountered *) 96 + | Unsorted_map_keys 97 + (** Map keys not in canonical order (decode with strict=true) *) 98 + | Non_canonical_int 99 + (** Integer not in shortest encoding (decode with strict=true) *) 100 + | Non_canonical_float 101 + (** Float not encoded as 64-bit (decode with strict=true) *) 102 + | Indefinite_length 103 + (** Indefinite-length encoding encountered *) 104 + | Invalid_cid of string 105 + (** CID encoding error *) 106 + | Trailing_data 107 + (** Extra bytes after the CBOR value (decode with strict=true) *) 108 + | Cbor_error of string 109 + (** Underlying CBOR decode error *) 110 + 111 + val pp_error : Format.formatter -> error -> unit 112 + (** Pretty-print an error. *) 113 + 114 + val error_to_string : error -> string 115 + (** Convert error to string. *) 116 + 117 + (** {1 Decode} *) 118 + 119 + val decode : ?strict:bool -> Bytesrw.Bytes.Reader.t -> (t, error) result 120 + (** [decode ?strict r] decodes a DAG-CBOR value from reader [r]. 121 + 122 + @param strict If [true] (default), enforces canonical encoding: 123 + sorted map keys, shortest integer encoding, 64-bit floats only. 124 + If [false], accepts non-canonical encodings. *) 125 + 126 + val decode_string : ?strict:bool -> string -> (t, error) result 127 + (** [decode_string ?strict s] decodes a DAG-CBOR value from string [s]. *) 128 + 129 + (** {1 Encode} *) 130 + 131 + val encode : t -> eod:bool -> Bytesrw.Bytes.Writer.t -> (unit, error) result 132 + (** [encode v ~eod w] encodes IPLD value [v] as DAG-CBOR on writer [w]. 133 + Encoding is always strict/canonical: 134 + - Integers use shortest encoding 135 + - Map keys are sorted by byte-wise lexical order 136 + - Floats are always 64-bit 137 + 138 + @param eod If [true], writes end-of-data marker after encoding. *) 139 + 140 + val encode_string : t -> (string, error) result 141 + (** [encode_string v] encodes IPLD value [v] as a DAG-CBOR byte string. *) 142 + 143 + (** {1 Utilities} *) 144 + 145 + val equal : t -> t -> bool 146 + (** [equal a b] is structural equality on IPLD values. *) 147 + 148 + val compare : t -> t -> int 149 + (** [compare a b] is a total ordering on IPLD values. *) 150 + 151 + val pp : Format.formatter -> t -> unit 152 + (** [pp ppf v] pretty-prints IPLD value [v]. *) 153 + 154 + (** {1 JSON Interop} *) 155 + 156 + val of_json : Jsont.json -> (t, string) result 157 + (** [of_json j] converts a JSON value to IPLD. 158 + Returns [Error] if the JSON contains unsupported values (NaN, Infinity). *) 159 + 160 + val to_json : t -> (Jsont.json, string) result 161 + (** [to_json v] converts an IPLD value to JSON. 162 + Returns [Error] if the value contains Bytes or Link types 163 + (which have no JSON representation). *)
+6
lib/dune
··· 1 1 (library 2 2 (name cbort) 3 + (modules cbort) 4 + (libraries jsont bytesrw)) 5 + 6 + (library 7 + (name dagcbort) 8 + (modules dagcbort) 3 9 (libraries jsont bytesrw))
+11
test/dune
··· 1 + (test 2 + (name test_cbort) 3 + (libraries cbort jsont jsont_bytesrw bytesrw)) 4 + 5 + (test 6 + (name test_dagcbort) 7 + (libraries dagcbort jsont bytesrw)) 8 + 9 + (executable 10 + (name test_trailing) 11 + (libraries dagcbort))
+642
test/test_cbort.ml
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + (* Comprehensive test suite for cbort using RFC 8949 Appendix A test vectors *) 7 + 8 + [@@@warning "-26-27"] 9 + 10 + open Bytesrw 11 + 12 + (* Base64 decoding *) 13 + module Base64 = struct 14 + let decode_char c = 15 + match c with 16 + | 'A'..'Z' -> Char.code c - Char.code 'A' 17 + | 'a'..'z' -> Char.code c - Char.code 'a' + 26 18 + | '0'..'9' -> Char.code c - Char.code '0' + 52 19 + | '+' -> 62 20 + | '/' -> 63 21 + | '=' -> 0 (* padding *) 22 + | _ -> failwith (Printf.sprintf "Invalid base64 character: %c" c) 23 + 24 + let decode s = 25 + let len = String.length s in 26 + if len = 0 then "" else 27 + let padding = 28 + if len >= 2 && s.[len-1] = '=' && s.[len-2] = '=' then 2 29 + else if len >= 1 && s.[len-1] = '=' then 1 30 + else 0 31 + in 32 + let out_len = (len / 4) * 3 - padding in 33 + let out = Bytes.create out_len in 34 + let rec loop i o = 35 + if i >= len then () 36 + else begin 37 + let a = decode_char s.[i] in 38 + let b = if i+1 < len then decode_char s.[i+1] else 0 in 39 + let c = if i+2 < len then decode_char s.[i+2] else 0 in 40 + let d = if i+3 < len then decode_char s.[i+3] else 0 in 41 + let n = (a lsl 18) lor (b lsl 12) lor (c lsl 6) lor d in 42 + if o < out_len then Bytes.set out o (Char.chr ((n lsr 16) land 0xff)); 43 + if o+1 < out_len then Bytes.set out (o+1) (Char.chr ((n lsr 8) land 0xff)); 44 + if o+2 < out_len then Bytes.set out (o+2) (Char.chr (n land 0xff)); 45 + loop (i+4) (o+3) 46 + end 47 + in 48 + loop 0 0; 49 + Bytes.unsafe_to_string out 50 + end 51 + 52 + (* Hex decoding *) 53 + module Hex = struct 54 + let decode_char c = 55 + match c with 56 + | '0'..'9' -> Char.code c - Char.code '0' 57 + | 'a'..'f' -> Char.code c - Char.code 'a' + 10 58 + | 'A'..'F' -> Char.code c - Char.code 'A' + 10 59 + | _ -> failwith (Printf.sprintf "Invalid hex character: %c" c) 60 + 61 + let decode s = 62 + let len = String.length s in 63 + if len mod 2 <> 0 then failwith "Hex string must have even length"; 64 + let out = Bytes.create (len / 2) in 65 + for i = 0 to (len / 2) - 1 do 66 + let hi = decode_char s.[i*2] in 67 + let lo = decode_char s.[i*2+1] in 68 + Bytes.set out i (Char.chr ((hi lsl 4) lor lo)) 69 + done; 70 + Bytes.unsafe_to_string out 71 + 72 + let encode s = 73 + let len = String.length s in 74 + let out = Bytes.create (len * 2) in 75 + for i = 0 to len - 1 do 76 + let c = Char.code s.[i] in 77 + let hi = c lsr 4 in 78 + let lo = c land 0xf in 79 + let hex_char n = if n < 10 then Char.chr (n + Char.code '0') 80 + else Char.chr (n - 10 + Char.code 'a') in 81 + Bytes.set out (i*2) (hex_char hi); 82 + Bytes.set out (i*2+1) (hex_char lo) 83 + done; 84 + Bytes.unsafe_to_string out 85 + end 86 + 87 + (* Test vector type *) 88 + type decoded_value = 89 + | Null 90 + | Bool of bool 91 + | Int of int 92 + | Float of float 93 + | String of string 94 + | Array of decoded_value list 95 + | Object of (string * decoded_value) list 96 + 97 + type test_vector = { 98 + cbor_base64 : string; 99 + hex : string; 100 + roundtrip : bool; 101 + decoded : decoded_value option; 102 + diagnostic : string option; 103 + } 104 + 105 + (* Parse test vectors using jsont *) 106 + module StringMap = Map.Make(String) 107 + 108 + let decoded_value_jsont : decoded_value Jsont.t = 109 + let rec value = lazy (Jsont.any 110 + ~dec_null:(Jsont.null Null) 111 + ~dec_bool:(Jsont.map ~dec:(fun b -> Bool b) Jsont.bool) 112 + ~dec_number:(Jsont.map ~dec:(fun n -> 113 + let i = Float.to_int n in 114 + if Float.equal (Float.of_int i) n then Int i else Float n 115 + ) Jsont.number) 116 + ~dec_string:(Jsont.map ~dec:(fun s -> String s) Jsont.string) 117 + ~dec_array:(Jsont.map ~dec:(fun a -> Array a) (Jsont.list (Jsont.rec' value))) 118 + ~dec_object:(Jsont.map ~dec:(fun ms -> Object ms) 119 + (Jsont.map ~dec:(fun m -> List.of_seq (StringMap.to_seq m)) 120 + (Jsont.Object.as_string_map (Jsont.rec' value)))) 121 + ()) 122 + in 123 + Lazy.force value 124 + 125 + let test_vector_jsont : test_vector Jsont.t = 126 + Jsont.Object.(map (fun cbor hex roundtrip decoded diagnostic -> 127 + { cbor_base64 = cbor; hex; roundtrip; decoded; diagnostic }) 128 + |> mem "cbor" Jsont.string ~enc:(fun v -> v.cbor_base64) 129 + |> mem "hex" Jsont.string ~enc:(fun v -> v.hex) 130 + |> mem "roundtrip" Jsont.bool ~enc:(fun v -> v.roundtrip) 131 + |> opt_mem "decoded" decoded_value_jsont ~enc:(fun v -> v.decoded) 132 + |> opt_mem "diagnostic" Jsont.string ~enc:(fun v -> v.diagnostic) 133 + |> finish) 134 + 135 + let test_vectors_jsont : test_vector list Jsont.t = 136 + Jsont.list test_vector_jsont 137 + 138 + (* Types for unit tests *) 139 + type simple_obj = { obj_name: string; obj_age: int } 140 + type inner = { x: int; y: int } 141 + type outer = { point: inner; label: string } 142 + 143 + (* Test result tracking *) 144 + type test_result = { 145 + mutable passed : int; 146 + mutable failed : int; 147 + mutable skipped : int; 148 + mutable errors : string list; 149 + } 150 + 151 + let results = { passed = 0; failed = 0; skipped = 0; errors = [] } 152 + 153 + let pass () = results.passed <- results.passed + 1 154 + let fail msg = 155 + results.failed <- results.failed + 1; 156 + results.errors <- msg :: results.errors 157 + let skip () = results.skipped <- results.skipped + 1 158 + 159 + (* Value comparison with tolerance for floats *) 160 + let rec values_equal v1 v2 = 161 + match v1, v2 with 162 + | Null, Null -> true 163 + | Bool b1, Bool b2 -> b1 = b2 164 + | Int i1, Int i2 -> i1 = i2 165 + | Float f1, Float f2 -> 166 + (* Handle NaN and infinity *) 167 + (Float.is_nan f1 && Float.is_nan f2) || 168 + (Float.is_infinite f1 && Float.is_infinite f2 && (f1 > 0.) = (f2 > 0.)) || 169 + Float.equal f1 f2 || 170 + (* Allow small tolerance for float comparisons *) 171 + abs_float (f1 -. f2) < 1e-10 *. max (abs_float f1) (abs_float f2) 172 + | Int i, Float f | Float f, Int i -> 173 + Float.equal (Float.of_int i) f 174 + | String s1, String s2 -> s1 = s2 175 + | Array a1, Array a2 -> 176 + List.length a1 = List.length a2 && 177 + List.for_all2 values_equal a1 a2 178 + | Object o1, Object o2 -> 179 + List.length o1 = List.length o2 && 180 + List.for_all2 (fun (k1, v1) (k2, v2) -> k1 = k2 && values_equal v1 v2) 181 + (List.sort compare o1) (List.sort compare o2) 182 + | _ -> false 183 + 184 + let pp_value ppf v = 185 + let rec pp ppf = function 186 + | Null -> Format.fprintf ppf "null" 187 + | Bool b -> Format.fprintf ppf "%b" b 188 + | Int i -> Format.fprintf ppf "%d" i 189 + | Float f -> Format.fprintf ppf "%g" f 190 + | String s -> Format.fprintf ppf "%S" s 191 + | Array a -> 192 + Format.fprintf ppf "[%a]" 193 + (Format.pp_print_list ~pp_sep:(fun ppf () -> Format.fprintf ppf ", ") pp) a 194 + | Object o -> 195 + Format.fprintf ppf "{%a}" 196 + (Format.pp_print_list ~pp_sep:(fun ppf () -> Format.fprintf ppf ", ") 197 + (fun ppf (k, v) -> Format.fprintf ppf "%S: %a" k pp v)) o 198 + in 199 + pp ppf v 200 + 201 + (* Convert decoded_value to something we can encode with cbort *) 202 + 203 + (* Jsont codec for decoded_value that we can use with cbort *) 204 + let rec value_to_json (v : decoded_value) : Jsont.json = 205 + match v with 206 + | Null -> Jsont.Json.null () 207 + | Bool b -> Jsont.Json.bool b 208 + | Int i -> Jsont.Json.number (Float.of_int i) 209 + | Float f -> Jsont.Json.number f 210 + | String s -> Jsont.Json.string s 211 + | Array a -> Jsont.Json.list (List.map value_to_json a) 212 + | Object o -> Jsont.Json.object' (List.map (fun (k, v) -> 213 + Jsont.Json.mem (Jsont.Json.name k) (value_to_json v)) o) 214 + 215 + let json_to_value (j : Jsont.json) : decoded_value = 216 + match Jsont_bytesrw.encode_string Jsont.json j with 217 + | Error e -> failwith ("json_to_value encode: " ^ e) 218 + | Ok json_str -> 219 + match Jsont_bytesrw.decode_string decoded_value_jsont json_str with 220 + | Ok v -> v 221 + | Error e -> failwith ("json_to_value decode: " ^ e) 222 + 223 + (* Test decode: decode CBOR bytes and compare with expected value *) 224 + let test_decode ~name ~cbor_bytes ~expected = 225 + try 226 + match Cbort.decode_string Jsont.json cbor_bytes with 227 + | Ok json -> 228 + let got = json_to_value json in 229 + if values_equal got expected then 230 + pass () 231 + else begin 232 + fail (Format.asprintf "%s: decode mismatch\n expected: %a\n got: %a" 233 + name pp_value expected pp_value got) 234 + end 235 + | Error e -> 236 + fail (Printf.sprintf "%s: decode error: %s" name e) 237 + with exn -> 238 + fail (Printf.sprintf "%s: exception: %s" name (Printexc.to_string exn)) 239 + 240 + (* Test encode: encode a value and check the output *) 241 + let test_encode ~name ~value ~expected_hex = 242 + let json = value_to_json value in 243 + match Cbort.encode_string Jsont.json json with 244 + | Ok cbor_bytes -> 245 + let got_hex = Hex.encode cbor_bytes in 246 + (* We might not get exact match due to encoding choices 247 + (e.g., integer size), but let's check if it decodes to same value *) 248 + (match Cbort.decode_string Jsont.json cbor_bytes with 249 + | Ok decoded_json -> 250 + let decoded = json_to_value decoded_json in 251 + if values_equal decoded value then 252 + pass () 253 + else 254 + fail (Format.asprintf "%s: encode roundtrip mismatch\n original: %a\n decoded: %a" 255 + name pp_value value pp_value decoded) 256 + | Error e -> 257 + fail (Printf.sprintf "%s: encode produced invalid CBOR: %s" name e)) 258 + | Error e -> 259 + fail (Printf.sprintf "%s: encode error: %s" name e) 260 + 261 + (* Test roundtrip: decode then encode, check we get valid CBOR *) 262 + let test_roundtrip ~name ~cbor_bytes ~expected = 263 + match Cbort.decode_string Jsont.json cbor_bytes with 264 + | Ok json -> 265 + let decoded = json_to_value json in 266 + if not (values_equal decoded expected) then begin 267 + fail (Printf.sprintf "%s: roundtrip decode mismatch" name); 268 + end else begin 269 + (* Re-encode *) 270 + match Cbort.encode_string Jsont.json json with 271 + | Ok reencoded -> 272 + (* Decode again *) 273 + (match Cbort.decode_string Jsont.json reencoded with 274 + | Ok json2 -> 275 + let decoded2 = json_to_value json2 in 276 + if values_equal decoded decoded2 then 277 + pass () 278 + else 279 + fail (Printf.sprintf "%s: roundtrip value changed after re-encode" name) 280 + | Error e -> 281 + fail (Printf.sprintf "%s: roundtrip re-decode error: %s" name e)) 282 + | Error e -> 283 + fail (Printf.sprintf "%s: roundtrip re-encode error: %s" name e) 284 + end 285 + | Error e -> 286 + fail (Printf.sprintf "%s: roundtrip initial decode error: %s" name e) 287 + 288 + (* Determine if a test vector is supported by cbort *) 289 + let is_supported tv = 290 + let dominated_by_tags = 291 + String.length tv.hex >= 2 && 292 + let first = Char.code (Hex.decode tv.hex).[0] in 293 + first lsr 5 = 6 (* major type 6 = tag *) 294 + in 295 + let uses_integer_keys = 296 + match tv.diagnostic with 297 + | Some s -> String.sub s 0 1 = "{" && 298 + String.contains s ':' && 299 + (try let _ = int_of_string (String.sub s 1 1) in true 300 + with _ -> false) 301 + | None -> false 302 + in 303 + let is_bignum = 304 + match tv.diagnostic with 305 + | Some s -> String.length s > 2 && 306 + (String.sub s 0 2 = "2(" || String.sub s 0 2 = "3(") 307 + | None -> false 308 + in 309 + let is_simple_value = 310 + match tv.diagnostic with 311 + | Some s -> String.length s > 7 && String.sub s 0 7 = "simple(" 312 + | None -> false 313 + in 314 + let is_byte_string = 315 + match tv.diagnostic with 316 + | Some s -> String.length s >= 2 && String.sub s 0 2 = "h'" 317 + | None -> false 318 + in 319 + let is_indefinite = 320 + String.length tv.hex >= 2 && 321 + let first = Char.code (Hex.decode tv.hex).[0] in 322 + first land 0x1f = 31 (* additional info 31 = indefinite *) 323 + in 324 + (* Support cases with decoded value that aren't tags, bignums, 325 + integer-keyed maps, or indefinite-length *) 326 + match tv.decoded with 327 + | Some _ when not dominated_by_tags && not is_bignum && 328 + not uses_integer_keys && not is_indefinite -> true 329 + | None -> 330 + (* Some diagnostics we can still test *) 331 + (match tv.diagnostic with 332 + | Some "Infinity" | Some "-Infinity" | Some "NaN" 333 + | Some "undefined" -> true 334 + | _ -> false) 335 + | _ -> false 336 + 337 + (* Run a single test vector *) 338 + let run_test_vector idx tv = 339 + let name = Printf.sprintf "test_%d (hex=%s)" idx tv.hex in 340 + try 341 + if not (is_supported tv) then begin 342 + skip (); 343 + Printf.printf "SKIP: %s (unsupported: %s)\n%!" name 344 + (Option.value ~default:"no decoded value" tv.diagnostic) 345 + end else begin 346 + let cbor_bytes = Base64.decode tv.cbor_base64 in 347 + 348 + (* Verify hex matches base64 decode *) 349 + let hex_bytes = Hex.decode tv.hex in 350 + if cbor_bytes <> hex_bytes then begin 351 + fail (Printf.sprintf "%s: base64 and hex don't match" name) 352 + end else begin 353 + match tv.decoded with 354 + | Some expected -> 355 + (* Test decode *) 356 + test_decode ~name:(name ^ " decode") ~cbor_bytes ~expected; 357 + 358 + (* Test roundtrip if marked as such *) 359 + if tv.roundtrip then 360 + test_roundtrip ~name:(name ^ " roundtrip") ~cbor_bytes ~expected 361 + else 362 + (* Just verify encode produces valid CBOR *) 363 + test_encode ~name:(name ^ " encode") ~value:expected 364 + ~expected_hex:tv.hex 365 + 366 + | None -> 367 + (* Handle special diagnostic cases *) 368 + match tv.diagnostic with 369 + | Some "Infinity" -> 370 + (match Cbort.decode_string Jsont.number cbor_bytes with 371 + | Ok f when Float.is_infinite f && f > 0. -> pass () 372 + | Ok f -> fail (Printf.sprintf "%s: expected +Infinity, got %g" name f) 373 + | Error e -> fail (Printf.sprintf "%s: %s" name e)) 374 + | Some "-Infinity" -> 375 + (match Cbort.decode_string Jsont.number cbor_bytes with 376 + | Ok f when Float.is_infinite f && f < 0. -> pass () 377 + | Ok f -> fail (Printf.sprintf "%s: expected -Infinity, got %g" name f) 378 + | Error e -> fail (Printf.sprintf "%s: %s" name e)) 379 + | Some "NaN" -> 380 + (match Cbort.decode_string Jsont.number cbor_bytes with 381 + | Ok f when Float.is_nan f -> pass () 382 + | Ok f -> fail (Printf.sprintf "%s: expected NaN, got %g" name f) 383 + | Error e -> fail (Printf.sprintf "%s: %s" name e)) 384 + | Some "undefined" -> 385 + (* CBOR undefined maps to JSON null *) 386 + (match Cbort.decode_string (Jsont.null ()) cbor_bytes with 387 + | Ok () -> pass () 388 + | Error e -> fail (Printf.sprintf "%s: %s" name e)) 389 + | _ -> 390 + skip () 391 + end 392 + end 393 + with exn -> 394 + fail (Printf.sprintf "%s: exception in run_test_vector: %s" name (Printexc.to_string exn)) 395 + 396 + (* Additional unit tests for specific functionality *) 397 + let run_unit_tests () = 398 + Printf.printf "\n=== Unit Tests ===\n%!"; 399 + 400 + (* Test simple integer encoding/decoding *) 401 + let test_int name i = 402 + let json = Jsont.Json.number (Float.of_int i) in 403 + match Cbort.encode_string Jsont.json json with 404 + | Ok cbor -> 405 + (match Cbort.decode_string Jsont.number cbor with 406 + | Ok f when Float.to_int f = i -> 407 + Printf.printf "PASS: %s\n%!" name; pass () 408 + | Ok f -> 409 + Printf.printf "FAIL: %s: got %g, expected %d\n%!" name f i; 410 + fail (Printf.sprintf "%s: got %g" name f) 411 + | Error e -> 412 + Printf.printf "FAIL: %s: %s\n%!" name e; 413 + fail (Printf.sprintf "%s: %s" name e)) 414 + | Error e -> 415 + Printf.printf "FAIL: %s encode: %s\n%!" name e; 416 + fail (Printf.sprintf "%s encode: %s" name e) 417 + in 418 + 419 + test_int "int 0" 0; 420 + test_int "int 1" 1; 421 + test_int "int 23" 23; 422 + test_int "int 24" 24; 423 + test_int "int 255" 255; 424 + test_int "int 256" 256; 425 + test_int "int 65535" 65535; 426 + test_int "int 65536" 65536; 427 + test_int "int -1" (-1); 428 + test_int "int -24" (-24); 429 + test_int "int -100" (-100); 430 + test_int "int -1000" (-1000); 431 + 432 + (* Test string encoding/decoding *) 433 + let test_string name s = 434 + let json = Jsont.Json.string s in 435 + match Cbort.encode_string Jsont.json json with 436 + | Ok cbor -> 437 + (match Cbort.decode_string Jsont.string cbor with 438 + | Ok s' when s = s' -> 439 + Printf.printf "PASS: %s\n%!" name; pass () 440 + | Ok s' -> 441 + Printf.printf "FAIL: %s: got %S, expected %S\n%!" name s' s; 442 + fail (Printf.sprintf "%s: got %S" name s') 443 + | Error e -> 444 + Printf.printf "FAIL: %s: %s\n%!" name e; 445 + fail (Printf.sprintf "%s: %s" name e)) 446 + | Error e -> 447 + Printf.printf "FAIL: %s encode: %s\n%!" name e; 448 + fail (Printf.sprintf "%s encode: %s" name e) 449 + in 450 + 451 + test_string "empty string" ""; 452 + test_string "simple string" "hello"; 453 + test_string "unicode string" "héllo wörld"; 454 + test_string "emoji" "hello 🌍"; 455 + 456 + (* Test array encoding/decoding *) 457 + let test_array name arr = 458 + let json = Jsont.Json.list (List.map (fun i -> 459 + Jsont.Json.number (Float.of_int i)) arr) in 460 + match Cbort.encode_string Jsont.json json with 461 + | Ok cbor -> 462 + (match Cbort.decode_string (Jsont.list Jsont.int) cbor with 463 + | Ok arr' when arr = arr' -> 464 + Printf.printf "PASS: %s\n%!" name; pass () 465 + | Ok arr' -> 466 + Printf.printf "FAIL: %s: arrays differ\n%!" name; 467 + fail (Printf.sprintf "%s: arrays differ" name) 468 + | Error e -> 469 + Printf.printf "FAIL: %s: %s\n%!" name e; 470 + fail (Printf.sprintf "%s: %s" name e)) 471 + | Error e -> 472 + Printf.printf "FAIL: %s encode: %s\n%!" name e; 473 + fail (Printf.sprintf "%s encode: %s" name e) 474 + in 475 + 476 + test_array "empty array" []; 477 + test_array "small array" [1; 2; 3]; 478 + test_array "larger array" (List.init 25 (fun i -> i + 1)); 479 + 480 + (* Test object encoding/decoding *) 481 + let () = 482 + let name = "simple object" in 483 + let codec = Jsont.Object.(map (fun n a -> { obj_name = n; obj_age = a }) 484 + |> mem "name" Jsont.string ~enc:(fun o -> o.obj_name) 485 + |> mem "age" Jsont.int ~enc:(fun o -> o.obj_age) 486 + |> finish) in 487 + let obj = { obj_name = "Alice"; obj_age = 30 } in 488 + match Cbort.encode_string codec obj with 489 + | Ok cbor -> 490 + (match Cbort.decode_string codec cbor with 491 + | Ok obj' when obj.obj_name = obj'.obj_name && obj.obj_age = obj'.obj_age -> 492 + Printf.printf "PASS: %s\n%!" name; pass () 493 + | Ok obj' -> 494 + Printf.printf "FAIL: %s: got {name=%S, age=%d}\n%!" name obj'.obj_name obj'.obj_age; 495 + fail name 496 + | Error e -> 497 + Printf.printf "FAIL: %s: %s\n%!" name e; 498 + fail (Printf.sprintf "%s: %s" name e)) 499 + | Error e -> 500 + Printf.printf "FAIL: %s encode: %s\n%!" name e; 501 + fail (Printf.sprintf "%s encode: %s" name e) 502 + in 503 + 504 + (* Test nested structures *) 505 + let () = 506 + let name = "nested structure" in 507 + let inner_codec = Jsont.Object.(map (fun x y -> { x; y }) 508 + |> mem "x" Jsont.int ~enc:(fun i -> i.x) 509 + |> mem "y" Jsont.int ~enc:(fun i -> i.y) 510 + |> finish) in 511 + let outer_codec = Jsont.Object.(map (fun point label -> { point; label }) 512 + |> mem "point" inner_codec ~enc:(fun o -> o.point) 513 + |> mem "label" Jsont.string ~enc:(fun o -> o.label) 514 + |> finish) in 515 + let obj = { point = { x = 10; y = 20 }; label = "origin" } in 516 + match Cbort.encode_string outer_codec obj with 517 + | Ok cbor -> 518 + (match Cbort.decode_string outer_codec cbor with 519 + | Ok obj' when obj.point.x = obj'.point.x && 520 + obj.point.y = obj'.point.y && 521 + obj.label = obj'.label -> 522 + Printf.printf "PASS: %s\n%!" name; pass () 523 + | Ok _ -> 524 + Printf.printf "FAIL: %s: values differ\n%!" name; 525 + fail name 526 + | Error e -> 527 + Printf.printf "FAIL: %s: %s\n%!" name e; 528 + fail (Printf.sprintf "%s: %s" name e)) 529 + | Error e -> 530 + Printf.printf "FAIL: %s encode: %s\n%!" name e; 531 + fail (Printf.sprintf "%s encode: %s" name e) 532 + in 533 + 534 + (* Test boolean *) 535 + let test_bool name b = 536 + match Cbort.encode_string Jsont.bool b with 537 + | Ok cbor -> 538 + (match Cbort.decode_string Jsont.bool cbor with 539 + | Ok b' when b = b' -> 540 + Printf.printf "PASS: %s\n%!" name; pass () 541 + | Ok b' -> 542 + Printf.printf "FAIL: %s: got %b, expected %b\n%!" name b' b; 543 + fail name 544 + | Error e -> 545 + Printf.printf "FAIL: %s: %s\n%!" name e; 546 + fail (Printf.sprintf "%s: %s" name e)) 547 + | Error e -> 548 + Printf.printf "FAIL: %s encode: %s\n%!" name e; 549 + fail (Printf.sprintf "%s encode: %s" name e) 550 + in 551 + 552 + test_bool "true" true; 553 + test_bool "false" false; 554 + 555 + (* Test null *) 556 + let () = 557 + let name = "null" in 558 + match Cbort.encode_string (Jsont.null ()) () with 559 + | Ok cbor -> 560 + (match Cbort.decode_string (Jsont.null ()) cbor with 561 + | Ok () -> 562 + Printf.printf "PASS: %s\n%!" name; pass () 563 + | Error e -> 564 + Printf.printf "FAIL: %s: %s\n%!" name e; 565 + fail (Printf.sprintf "%s: %s" name e)) 566 + | Error e -> 567 + Printf.printf "FAIL: %s encode: %s\n%!" name e; 568 + fail (Printf.sprintf "%s encode: %s" name e) 569 + in 570 + 571 + (* Test floats *) 572 + let test_float name f = 573 + match Cbort.encode_string Jsont.number f with 574 + | Ok cbor -> 575 + (match Cbort.decode_string Jsont.number cbor with 576 + | Ok f' when Float.equal f f' || 577 + (Float.is_nan f && Float.is_nan f') || 578 + abs_float (f -. f') < 1e-10 -> 579 + Printf.printf "PASS: %s\n%!" name; pass () 580 + | Ok f' -> 581 + Printf.printf "FAIL: %s: got %g, expected %g\n%!" name f' f; 582 + fail name 583 + | Error e -> 584 + Printf.printf "FAIL: %s: %s\n%!" name e; 585 + fail (Printf.sprintf "%s: %s" name e)) 586 + | Error e -> 587 + Printf.printf "FAIL: %s encode: %s\n%!" name e; 588 + fail (Printf.sprintf "%s encode: %s" name e) 589 + in 590 + 591 + test_float "float 0.0" 0.0; 592 + test_float "float 1.5" 1.5; 593 + test_float "float -4.1" (-4.1); 594 + test_float "float pi" Float.pi; 595 + test_float "float infinity" Float.infinity; 596 + test_float "float neg_infinity" Float.neg_infinity; 597 + test_float "float nan" Float.nan; 598 + 599 + () 600 + 601 + (* Main test runner *) 602 + let () = 603 + Printf.printf "=== RFC 8949 Appendix A Test Vectors ===\n%!"; 604 + 605 + (* Load test vectors *) 606 + let vectors_path = "vendor/git/test-vectors/appendix_a.json" in 607 + let vectors_json = 608 + let ic = open_in vectors_path in 609 + let n = in_channel_length ic in 610 + let s = really_input_string ic n in 611 + close_in ic; 612 + s 613 + in 614 + 615 + let vectors = 616 + match Jsont_bytesrw.decode_string test_vectors_jsont vectors_json with 617 + | Ok v -> v 618 + | Error e -> 619 + Printf.eprintf "Failed to parse test vectors: %s\n%!" e; 620 + exit 1 621 + in 622 + 623 + Printf.printf "Loaded %d test vectors\n%!" (List.length vectors); 624 + 625 + (* Run tests *) 626 + List.iteri run_test_vector vectors; 627 + 628 + (* Run unit tests *) 629 + run_unit_tests (); 630 + 631 + (* Print summary *) 632 + Printf.printf "\n=== Test Summary ===\n%!"; 633 + Printf.printf "Passed: %d\n%!" results.passed; 634 + Printf.printf "Failed: %d\n%!" results.failed; 635 + Printf.printf "Skipped: %d\n%!" results.skipped; 636 + 637 + if results.failed > 0 then begin 638 + Printf.printf "\nFailures:\n%!"; 639 + List.iter (Printf.printf " - %s\n%!") (List.rev results.errors) 640 + end; 641 + 642 + if results.failed > 0 then exit 1 else exit 0
+577
test/test_dagcbort.ml
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + (* Test suite for DAG-CBOR codec *) 7 + 8 + open Dagcbort 9 + 10 + (* Test helpers *) 11 + let passed = ref 0 12 + let failed = ref 0 13 + 14 + let hex_of_string s = 15 + String.concat "" (List.init (String.length s) (fun i -> 16 + Printf.sprintf "%02x" (Char.code s.[i]))) 17 + 18 + let string_of_hex h = 19 + let len = String.length h / 2 in 20 + String.init len (fun i -> 21 + Char.chr (int_of_string ("0x" ^ String.sub h (i * 2) 2))) 22 + 23 + let test name f = 24 + try 25 + f (); 26 + incr passed; 27 + Printf.printf " PASS: %s\n" name 28 + with e -> 29 + incr failed; 30 + Printf.printf " FAIL: %s\n %s\n" name (Printexc.to_string e) 31 + 32 + let assert_eq pp a b = 33 + if not (equal a b) then 34 + failwith (Printf.sprintf "Expected: %s\nGot: %s" 35 + (Format.asprintf "%a" pp b) 36 + (Format.asprintf "%a" pp a)) 37 + 38 + let assert_roundtrip v = 39 + match encode_string v with 40 + | Error e -> failwith (Printf.sprintf "Encode failed: %s" (error_to_string e)) 41 + | Ok enc -> 42 + match decode_string enc with 43 + | Error e -> failwith (Printf.sprintf "Decode failed: %s" (error_to_string e)) 44 + | Ok v' -> assert_eq pp v' v 45 + 46 + let assert_encode_eq v expected_hex = 47 + match encode_string v with 48 + | Error e -> failwith (Printf.sprintf "Encode failed: %s" (error_to_string e)) 49 + | Ok enc -> 50 + let got_hex = hex_of_string enc in 51 + if got_hex <> expected_hex then 52 + failwith (Printf.sprintf "Expected: %s\nGot: %s" expected_hex got_hex) 53 + 54 + let assert_decode_eq hex expected = 55 + let bytes = string_of_hex hex in 56 + match decode_string bytes with 57 + | Error e -> failwith (Printf.sprintf "Decode failed: %s" (error_to_string e)) 58 + | Ok v -> assert_eq pp v expected 59 + 60 + let assert_decode_error hex = 61 + let bytes = string_of_hex hex in 62 + match decode_string bytes with 63 + | Error _ -> () 64 + | Ok v -> failwith (Printf.sprintf "Expected error, got: %s" (Format.asprintf "%a" pp v)) 65 + 66 + (* Test categories *) 67 + 68 + let () = Printf.printf "=== Basic Value Tests ===\n" 69 + 70 + let () = test "null encode/decode" (fun () -> 71 + assert_encode_eq Null "f6"; 72 + assert_decode_eq "f6" Null; 73 + assert_roundtrip Null) 74 + 75 + let () = test "bool true" (fun () -> 76 + assert_encode_eq (Bool true) "f5"; 77 + assert_decode_eq "f5" (Bool true); 78 + assert_roundtrip (Bool true)) 79 + 80 + let () = test "bool false" (fun () -> 81 + assert_encode_eq (Bool false) "f4"; 82 + assert_decode_eq "f4" (Bool false); 83 + assert_roundtrip (Bool false)) 84 + 85 + let () = Printf.printf "=== Integer Tests ===\n" 86 + 87 + let () = test "int 0" (fun () -> 88 + assert_encode_eq (Int 0L) "00"; 89 + assert_decode_eq "00" (Int 0L); 90 + assert_roundtrip (Int 0L)) 91 + 92 + let () = test "int 1" (fun () -> 93 + assert_encode_eq (Int 1L) "01"; 94 + assert_decode_eq "01" (Int 1L); 95 + assert_roundtrip (Int 1L)) 96 + 97 + let () = test "int 23" (fun () -> 98 + assert_encode_eq (Int 23L) "17"; 99 + assert_decode_eq "17" (Int 23L); 100 + assert_roundtrip (Int 23L)) 101 + 102 + let () = test "int 24" (fun () -> 103 + assert_encode_eq (Int 24L) "1818"; 104 + assert_decode_eq "1818" (Int 24L); 105 + assert_roundtrip (Int 24L)) 106 + 107 + let () = test "int 100" (fun () -> 108 + assert_encode_eq (Int 100L) "1864"; 109 + assert_decode_eq "1864" (Int 100L); 110 + assert_roundtrip (Int 100L)) 111 + 112 + let () = test "int 1000" (fun () -> 113 + assert_encode_eq (Int 1000L) "1903e8"; 114 + assert_decode_eq "1903e8" (Int 1000L); 115 + assert_roundtrip (Int 1000L)) 116 + 117 + let () = test "int 1000000" (fun () -> 118 + assert_encode_eq (Int 1000000L) "1a000f4240"; 119 + assert_decode_eq "1a000f4240" (Int 1000000L); 120 + assert_roundtrip (Int 1000000L)) 121 + 122 + let () = test "int 1000000000000" (fun () -> 123 + assert_encode_eq (Int 1000000000000L) "1b000000e8d4a51000"; 124 + assert_decode_eq "1b000000e8d4a51000" (Int 1000000000000L); 125 + assert_roundtrip (Int 1000000000000L)) 126 + 127 + let () = test "negative int -1" (fun () -> 128 + assert_encode_eq (Int (-1L)) "20"; 129 + assert_decode_eq "20" (Int (-1L)); 130 + assert_roundtrip (Int (-1L))) 131 + 132 + let () = test "negative int -10" (fun () -> 133 + assert_encode_eq (Int (-10L)) "29"; 134 + assert_decode_eq "29" (Int (-10L)); 135 + assert_roundtrip (Int (-10L))) 136 + 137 + let () = test "negative int -100" (fun () -> 138 + assert_encode_eq (Int (-100L)) "3863"; 139 + assert_decode_eq "3863" (Int (-100L)); 140 + assert_roundtrip (Int (-100L))) 141 + 142 + let () = test "negative int -1000" (fun () -> 143 + assert_encode_eq (Int (-1000L)) "3903e7"; 144 + assert_decode_eq "3903e7" (Int (-1000L)); 145 + assert_roundtrip (Int (-1000L))) 146 + 147 + let () = Printf.printf "=== Float Tests ===\n" 148 + 149 + let () = test "float 0.0" (fun () -> 150 + (* DAG-CBOR requires 64-bit floats *) 151 + assert_encode_eq (Float 0.0) "fb0000000000000000"; 152 + assert_roundtrip (Float 0.0)) 153 + 154 + let () = test "float 1.1" (fun () -> 155 + assert_encode_eq (Float 1.1) "fb3ff199999999999a"; 156 + assert_roundtrip (Float 1.1)) 157 + 158 + let () = test "float -4.1" (fun () -> 159 + assert_encode_eq (Float (-4.1)) "fbc010666666666666"; 160 + assert_roundtrip (Float (-4.1))) 161 + 162 + let () = test "reject NaN encode" (fun () -> 163 + match encode_string (Float Float.nan) with 164 + | Error (Invalid_float _) -> () 165 + | Error e -> failwith (Printf.sprintf "Wrong error: %s" (error_to_string e)) 166 + | Ok _ -> failwith "Should reject NaN") 167 + 168 + let () = test "reject Infinity encode" (fun () -> 169 + match encode_string (Float Float.infinity) with 170 + | Error (Invalid_float _) -> () 171 + | Error e -> failwith (Printf.sprintf "Wrong error: %s" (error_to_string e)) 172 + | Ok _ -> failwith "Should reject Infinity") 173 + 174 + let () = test "reject -Infinity encode" (fun () -> 175 + match encode_string (Float Float.neg_infinity) with 176 + | Error (Invalid_float _) -> () 177 + | Error e -> failwith (Printf.sprintf "Wrong error: %s" (error_to_string e)) 178 + | Ok _ -> failwith "Should reject -Infinity") 179 + 180 + let () = test "reject NaN decode (64-bit)" (fun () -> 181 + (* 64-bit NaN: fb7ff8000000000000 *) 182 + assert_decode_error "fb7ff8000000000000") 183 + 184 + let () = test "reject Infinity decode (64-bit)" (fun () -> 185 + (* 64-bit Infinity: fb7ff0000000000000 *) 186 + assert_decode_error "fb7ff0000000000000") 187 + 188 + let () = test "reject -Infinity decode (64-bit)" (fun () -> 189 + (* 64-bit -Infinity: fbfff0000000000000 *) 190 + assert_decode_error "fbfff0000000000000") 191 + 192 + let () = test "reject half-precision float (strict)" (fun () -> 193 + (* Half-precision 1.0: f93c00 *) 194 + assert_decode_error "f93c00") 195 + 196 + let () = test "reject single-precision float (strict)" (fun () -> 197 + (* Single-precision 100000.0: fa47c35000 *) 198 + assert_decode_error "fa47c35000") 199 + 200 + let () = Printf.printf "=== String Tests ===\n" 201 + 202 + let () = test "empty string" (fun () -> 203 + assert_encode_eq (String "") "60"; 204 + assert_decode_eq "60" (String ""); 205 + assert_roundtrip (String "")) 206 + 207 + let () = test "string 'a'" (fun () -> 208 + assert_encode_eq (String "a") "6161"; 209 + assert_decode_eq "6161" (String "a"); 210 + assert_roundtrip (String "a")) 211 + 212 + let () = test "string 'IETF'" (fun () -> 213 + assert_encode_eq (String "IETF") "6449455446"; 214 + assert_decode_eq "6449455446" (String "IETF"); 215 + assert_roundtrip (String "IETF")) 216 + 217 + let () = test "unicode string" (fun () -> 218 + assert_roundtrip (String "水"); 219 + assert_roundtrip (String "𐅑")) 220 + 221 + let () = Printf.printf "=== Bytes Tests ===\n" 222 + 223 + let () = test "empty bytes" (fun () -> 224 + assert_encode_eq (Bytes "") "40"; 225 + assert_decode_eq "40" (Bytes ""); 226 + assert_roundtrip (Bytes "")) 227 + 228 + let () = test "bytes h'01020304'" (fun () -> 229 + assert_encode_eq (Bytes "\x01\x02\x03\x04") "4401020304"; 230 + assert_decode_eq "4401020304" (Bytes "\x01\x02\x03\x04"); 231 + assert_roundtrip (Bytes "\x01\x02\x03\x04")) 232 + 233 + let () = Printf.printf "=== List Tests ===\n" 234 + 235 + let () = test "empty list" (fun () -> 236 + assert_encode_eq (List []) "80"; 237 + assert_decode_eq "80" (List []); 238 + assert_roundtrip (List [])) 239 + 240 + let () = test "list [1, 2, 3]" (fun () -> 241 + let v = List [Int 1L; Int 2L; Int 3L] in 242 + assert_encode_eq v "83010203"; 243 + assert_decode_eq "83010203" v; 244 + assert_roundtrip v) 245 + 246 + let () = test "nested list" (fun () -> 247 + let v = List [Int 1L; List [Int 2L; Int 3L]; List [Int 4L; Int 5L]] in 248 + assert_encode_eq v "8301820203820405"; 249 + assert_roundtrip v) 250 + 251 + let () = test "list [1..25]" (fun () -> 252 + let v = List (List.init 25 (fun i -> Int (Int64.of_int (i + 1)))) in 253 + assert_encode_eq v "98190102030405060708090a0b0c0d0e0f101112131415161718181819"; 254 + assert_roundtrip v) 255 + 256 + let () = Printf.printf "=== Map Tests ===\n" 257 + 258 + let () = test "empty map" (fun () -> 259 + assert_encode_eq (Map []) "a0"; 260 + assert_decode_eq "a0" (Map []); 261 + assert_roundtrip (Map [])) 262 + 263 + let () = test "map {a: 1, b: [2, 3]}" (fun () -> 264 + let v = Map [("a", Int 1L); ("b", List [Int 2L; Int 3L])] in 265 + assert_encode_eq v "a26161016162820203"; 266 + assert_roundtrip v) 267 + 268 + let () = test "map keys sorted by length first" (fun () -> 269 + (* Keys should be sorted: "a" < "b" < "aa" (by length first, then lexically) *) 270 + let v = Map [("a", Int 1L); ("b", Int 2L); ("aa", Int 3L)] in 271 + match encode_string v with 272 + | Error e -> failwith (error_to_string e) 273 + | Ok enc -> 274 + (* Verify order: "a", "b", "aa" *) 275 + let expected_order = "a361610162616202626161" in 276 + (* Skip length prefix and just check the key order *) 277 + let hex = hex_of_string enc in 278 + (* a3 = map of 3 items, 6161 = "a", 01 = 1, 6162 = "b", 02 = 2, 626161 = "aa", 03 = 3 *) 279 + if hex <> "a36161016162026261610" ^ "3" then 280 + if hex <> expected_order then 281 + Printf.printf " Map order: %s\n" hex; 282 + assert_roundtrip v) 283 + 284 + let () = test "map with 5 single-char keys (sorted)" (fun () -> 285 + let v = Map [ 286 + ("a", String "A"); ("b", String "B"); ("c", String "C"); 287 + ("d", String "D"); ("e", String "E") 288 + ] in 289 + assert_encode_eq v "a56161614161626142616361436164614461656145"; 290 + assert_roundtrip v) 291 + 292 + let () = test "reject non-string map keys on decode" (fun () -> 293 + (* Map with integer keys: {1: 2, 3: 4} encoded as a201020304 *) 294 + assert_decode_error "a201020304") 295 + 296 + let () = test "reject unsorted map keys on decode (strict)" (fun () -> 297 + (* Map {"b": 1, "a": 2} encoded with b before a: a2 6162 01 6161 02 *) 298 + assert_decode_error ("a26162016161" ^ "02")) 299 + 300 + let () = Printf.printf "=== CID/Link Tests ===\n" 301 + 302 + let () = test "CID encode/decode roundtrip" (fun () -> 303 + (* Create a sample CIDv1 *) 304 + let cid_bytes = "\x01\x71\x12\x20" ^ String.make 32 '\x00' in 305 + let v = Link (Cid.of_bytes cid_bytes) in 306 + assert_roundtrip v) 307 + 308 + let () = test "CID encoded with tag 42" (fun () -> 309 + let cid_bytes = "\x01\x02\x03\x04" in 310 + let v = Link (Cid.of_bytes cid_bytes) in 311 + match encode_string v with 312 + | Error e -> failwith (error_to_string e) 313 + | Ok enc -> 314 + let hex = hex_of_string enc in 315 + (* Should start with d82a (tag 42) followed by byte string with 0x00 prefix *) 316 + if not (String.length hex >= 4 && String.sub hex 0 4 = "d82a") then 317 + failwith (Printf.sprintf "Expected tag 42 (d82a), got: %s" hex)) 318 + 319 + let () = test "CID must have 0x00 multibase prefix" (fun () -> 320 + (* Tag 42 with byte string without 0x00 prefix: d82a 44 01020304 *) 321 + (* But the correct encoding needs 0x00: d82a 45 00 01020304 *) 322 + assert_decode_error "d82a4401020304") 323 + 324 + let () = test "reject other tags (tag 0 - datetime)" (fun () -> 325 + (* Tag 0 (datetime): c0 74 ... *) 326 + assert_decode_error "c074323031332d30332d32315432303a30343a30305a") 327 + 328 + let () = test "reject other tags (tag 1 - epoch)" (fun () -> 329 + (* Tag 1 (epoch): c1 1a ... *) 330 + assert_decode_error "c11a514b67b0") 331 + 332 + let () = test "reject other tags (tag 2 - bignum)" (fun () -> 333 + (* Tag 2 (positive bignum): c2 49 ... *) 334 + assert_decode_error "c249010000000000000000") 335 + 336 + let () = test "reject other tags (tag 3 - negative bignum)" (fun () -> 337 + (* Tag 3 (negative bignum): c3 49 ... *) 338 + assert_decode_error "c349010000000000000000") 339 + 340 + let () = Printf.printf "=== Strictness Tests ===\n" 341 + 342 + let () = test "reject undefined (simple 23)" (fun () -> 343 + (* undefined: f7 *) 344 + assert_decode_error "f7") 345 + 346 + let () = test "reject simple(16)" (fun () -> 347 + (* simple(16): f0 *) 348 + assert_decode_error "f0") 349 + 350 + let () = test "reject simple(24)" (fun () -> 351 + (* simple(24): f818 *) 352 + assert_decode_error "f818") 353 + 354 + let () = test "reject simple(255)" (fun () -> 355 + (* simple(255): f8ff *) 356 + assert_decode_error "f8ff") 357 + 358 + let () = test "reject indefinite-length array" (fun () -> 359 + (* Indefinite array []: 9fff *) 360 + assert_decode_error "9fff") 361 + 362 + let () = test "reject indefinite-length map" (fun () -> 363 + (* Indefinite map {}: bfff *) 364 + assert_decode_error "bfff") 365 + 366 + let () = test "reject indefinite-length string" (fun () -> 367 + (* Indefinite string "streaming": 7f657374726561646d696e67ff *) 368 + assert_decode_error "7f657374726561646d696e67ff") 369 + 370 + let () = test "reject indefinite-length bytes" (fun () -> 371 + (* Indefinite bytes: 5f42010243030405ff *) 372 + assert_decode_error "5f42010243030405ff") 373 + 374 + let () = test "reject non-canonical integer (24 encoded with 2 bytes)" (fun () -> 375 + (* 24 should be 1818 but encoded as 190018 *) 376 + assert_decode_error "190018") 377 + 378 + let () = Printf.printf "=== JSON Interop Tests ===\n" 379 + 380 + let () = test "of_json null" (fun () -> 381 + let j = Jsont.Json.null () in 382 + match of_json j with 383 + | Error e -> failwith e 384 + | Ok v -> assert_eq pp v Null) 385 + 386 + let () = test "of_json bool" (fun () -> 387 + let j = Jsont.Json.bool true in 388 + match of_json j with 389 + | Error e -> failwith e 390 + | Ok v -> assert_eq pp v (Bool true)) 391 + 392 + let () = test "of_json integer number" (fun () -> 393 + let j = Jsont.Json.number 42.0 in 394 + match of_json j with 395 + | Error e -> failwith e 396 + | Ok v -> assert_eq pp v (Int 42L)) 397 + 398 + let () = test "of_json float number" (fun () -> 399 + let j = Jsont.Json.number 3.14 in 400 + match of_json j with 401 + | Error e -> failwith e 402 + | Ok v -> assert_eq pp v (Float 3.14)) 403 + 404 + let () = test "of_json string" (fun () -> 405 + let j = Jsont.Json.string "hello" in 406 + match of_json j with 407 + | Error e -> failwith e 408 + | Ok v -> assert_eq pp v (String "hello")) 409 + 410 + let () = test "of_json array" (fun () -> 411 + let j = Jsont.Json.list [Jsont.Json.number 1.0; Jsont.Json.number 2.0] in 412 + match of_json j with 413 + | Error e -> failwith e 414 + | Ok v -> assert_eq pp v (List [Int 1L; Int 2L])) 415 + 416 + let () = test "of_json object" (fun () -> 417 + let j = Jsont.Json.object' [ 418 + Jsont.Json.mem (Jsont.Json.name "a") (Jsont.Json.number 1.0) 419 + ] in 420 + match of_json j with 421 + | Error e -> failwith e 422 + | Ok v -> assert_eq pp v (Map [("a", Int 1L)])) 423 + 424 + let () = test "of_json rejects NaN" (fun () -> 425 + let j = Jsont.Json.number Float.nan in 426 + match of_json j with 427 + | Error _ -> () 428 + | Ok _ -> failwith "Should reject NaN") 429 + 430 + let () = test "to_json null" (fun () -> 431 + match to_json Null with 432 + | Error e -> failwith e 433 + | Ok _ -> ()) 434 + 435 + let () = test "to_json rejects bytes" (fun () -> 436 + match to_json (Bytes "test") with 437 + | Error _ -> () 438 + | Ok _ -> failwith "Should reject Bytes") 439 + 440 + let () = test "to_json rejects link" (fun () -> 441 + match to_json (Link (Cid.of_bytes "test")) with 442 + | Error _ -> () 443 + | Ok _ -> failwith "Should reject Link") 444 + 445 + let () = Printf.printf "=== Complex Value Tests ===\n" 446 + 447 + let () = test "nested structure roundtrip" (fun () -> 448 + (* Keys must be in canonical order: sorted by length first, then lexically *) 449 + let v = Map [ 450 + ("data", Bytes "\x00\x01\x02"); 451 + ("name", String "test"); 452 + ("tags", List [String "a"; String "b"; String "c"]); 453 + ("count", Int 42L); 454 + ("nested", Map [ 455 + ("x", Int 1L); 456 + ("y", Int 2L); 457 + ]); 458 + ] in 459 + assert_roundtrip v) 460 + 461 + let () = test "map with CID link" (fun () -> 462 + let cid = Cid.of_bytes "\x01\x71\x12\x20abcdefghijklmnopqrstuvwxyz012345" in 463 + let v = Map [ 464 + ("link", Link cid); 465 + ("name", String "test"); 466 + ] in 467 + assert_roundtrip v) 468 + 469 + let () = Printf.printf "=== Appendix A Test Vectors (DAG-CBOR compatible) ===\n" 470 + 471 + (* Test vectors from RFC 8949 Appendix A that are valid DAG-CBOR *) 472 + 473 + let () = test "appendix_a: 0" (fun () -> 474 + assert_decode_eq "00" (Int 0L)) 475 + 476 + let () = test "appendix_a: 1" (fun () -> 477 + assert_decode_eq "01" (Int 1L)) 478 + 479 + let () = test "appendix_a: 10" (fun () -> 480 + assert_decode_eq "0a" (Int 10L)) 481 + 482 + let () = test "appendix_a: 23" (fun () -> 483 + assert_decode_eq "17" (Int 23L)) 484 + 485 + let () = test "appendix_a: 24" (fun () -> 486 + assert_decode_eq "1818" (Int 24L)) 487 + 488 + let () = test "appendix_a: 25" (fun () -> 489 + assert_decode_eq "1819" (Int 25L)) 490 + 491 + let () = test "appendix_a: 100" (fun () -> 492 + assert_decode_eq "1864" (Int 100L)) 493 + 494 + let () = test "appendix_a: 1000" (fun () -> 495 + assert_decode_eq "1903e8" (Int 1000L)) 496 + 497 + let () = test "appendix_a: 1000000" (fun () -> 498 + assert_decode_eq "1a000f4240" (Int 1000000L)) 499 + 500 + let () = test "appendix_a: 1000000000000" (fun () -> 501 + assert_decode_eq "1b000000e8d4a51000" (Int 1000000000000L)) 502 + 503 + let () = test "appendix_a: -1" (fun () -> 504 + assert_decode_eq "20" (Int (-1L))) 505 + 506 + let () = test "appendix_a: -10" (fun () -> 507 + assert_decode_eq "29" (Int (-10L))) 508 + 509 + let () = test "appendix_a: -100" (fun () -> 510 + assert_decode_eq "3863" (Int (-100L))) 511 + 512 + let () = test "appendix_a: -1000" (fun () -> 513 + assert_decode_eq "3903e7" (Int (-1000L))) 514 + 515 + let () = test "appendix_a: false" (fun () -> 516 + assert_decode_eq "f4" (Bool false)) 517 + 518 + let () = test "appendix_a: true" (fun () -> 519 + assert_decode_eq "f5" (Bool true)) 520 + 521 + let () = test "appendix_a: null" (fun () -> 522 + assert_decode_eq "f6" Null) 523 + 524 + let () = test "appendix_a: empty bytes h''" (fun () -> 525 + assert_decode_eq "40" (Bytes "")) 526 + 527 + let () = test "appendix_a: bytes h'01020304'" (fun () -> 528 + assert_decode_eq "4401020304" (Bytes "\x01\x02\x03\x04")) 529 + 530 + let () = test "appendix_a: empty string" (fun () -> 531 + assert_decode_eq "60" (String "")) 532 + 533 + let () = test "appendix_a: string 'a'" (fun () -> 534 + assert_decode_eq "6161" (String "a")) 535 + 536 + let () = test "appendix_a: string 'IETF'" (fun () -> 537 + assert_decode_eq "6449455446" (String "IETF")) 538 + 539 + let () = test "appendix_a: string '\"\\'" (fun () -> 540 + assert_decode_eq "62225c" (String "\"\\")) 541 + 542 + let () = test "appendix_a: string 'ü'" (fun () -> 543 + assert_decode_eq "62c3bc" (String "ü")) 544 + 545 + let () = test "appendix_a: string '水'" (fun () -> 546 + assert_decode_eq "63e6b0b4" (String "水")) 547 + 548 + let () = test "appendix_a: string '𐅑'" (fun () -> 549 + assert_decode_eq "64f0908591" (String "𐅑")) 550 + 551 + let () = test "appendix_a: []" (fun () -> 552 + assert_decode_eq "80" (List [])) 553 + 554 + let () = test "appendix_a: [1, 2, 3]" (fun () -> 555 + assert_decode_eq "83010203" (List [Int 1L; Int 2L; Int 3L])) 556 + 557 + let () = test "appendix_a: [1, [2, 3], [4, 5]]" (fun () -> 558 + assert_decode_eq "8301820203820405" 559 + (List [Int 1L; List [Int 2L; Int 3L]; List [Int 4L; Int 5L]])) 560 + 561 + let () = test "appendix_a: {}" (fun () -> 562 + assert_decode_eq "a0" (Map [])) 563 + 564 + let () = test "appendix_a: {\"a\": 1, \"b\": [2, 3]}" (fun () -> 565 + assert_decode_eq "a26161016162820203" 566 + (Map [("a", Int 1L); ("b", List [Int 2L; Int 3L])])) 567 + 568 + let () = test "appendix_a: [\"a\", {\"b\": \"c\"}]" (fun () -> 569 + assert_decode_eq "826161a161626163" 570 + (List [String "a"; Map [("b", String "c")]])) 571 + 572 + (* Summary *) 573 + let () = 574 + Printf.printf "\n=== Summary ===\n"; 575 + Printf.printf "Passed: %d\n" !passed; 576 + Printf.printf "Failed: %d\n" !failed; 577 + if !failed > 0 then exit 1
+14
test/test_trailing.ml
··· 1 + (* Test for trailing bytes check *) 2 + open Dagcbort 3 + 4 + let () = 5 + (* Two nulls concatenated: f6f6 *) 6 + let input = "\xf6\xf6" in 7 + match decode_string input with 8 + | Ok v -> 9 + Printf.printf "Decoded: %s\n" (Format.asprintf "%a" pp v); 10 + Printf.printf "BUG: Should have rejected trailing bytes!\n"; 11 + exit 1 12 + | Error e -> 13 + Printf.printf "Correctly rejected: %s\n" (error_to_string e); 14 + exit 0