objective categorical abstract machine language personal data server
65
fork

Configure Feed

Select the types of activity you want to include in your feed.

Implement lexicon resolution and validation for record create endpoints

+1054 -8
+1
dune-project
··· 72 72 (timedesc (>= 3.1.0)) 73 73 (uri (>= 4.4.0)) 74 74 (uuidm (>= 0.9.10)) 75 + (uuseg (>= 17.0.0)) 75 76 (yojson (>= 3.0.0)) 76 77 (lwt_ppx (>= 5.9.1)) 77 78 (ppx_deriving_yojson (>= 3.9.1))
+37 -5
pegasus/lib/api/repo/applyWrites.ml
··· 32 32 Auth.assert_repo_scope ctx.auth ~collection 33 33 ~action:Oauth.Scopes.Delete ) 34 34 input.writes ; 35 + let%lwt validation_statuses = 36 + match input.validate with 37 + | Some true -> 38 + Lwt_list.map_s 39 + (fun w -> 40 + match w with 41 + | Create {collection; value; _} | Update {collection; value; _} 42 + -> ( 43 + match%lwt 44 + Record_validator.validate_record ~nsid:collection 45 + ~record:value 46 + with 47 + | Ok () -> 48 + Lwt.return (Some "valid") 49 + | Error msg -> 50 + Errors.invalid_request 51 + ("record validation failed: " ^ msg) ) 52 + | Delete _ -> 53 + Lwt.return None ) 54 + input.writes 55 + | Some false | None -> 56 + Lwt.return (List.map (fun _ -> Some "unknown") input.writes) 57 + in 35 58 let%lwt repo = Repository.load did in 36 59 let repo_writes = 37 60 List.map ··· 46 69 in 47 70 let results = 48 71 Option.some 49 - @@ List.map 50 - (fun r -> 51 - r |> Repository.apply_writes_result_to_yojson 52 - |> results_item_of_yojson |> Result.get_ok ) 53 - aw_results 72 + @@ List.map2 73 + (fun r status -> 74 + let item = 75 + r |> Repository.apply_writes_result_to_yojson 76 + |> results_item_of_yojson |> Result.get_ok 77 + in 78 + match item with 79 + | CreateResult cr -> 80 + CreateResult {cr with validation_status= status} 81 + | UpdateResult ur -> 82 + UpdateResult {ur with validation_status= status} 83 + | DeleteResult _ -> 84 + item ) 85 + aw_results validation_statuses 54 86 in 55 87 Dream.json @@ Yojson.Safe.to_string 56 88 @@ output_to_yojson
+15 -1
pegasus/lib/api/repo/createRecord.ml
··· 20 20 let%lwt did = Xrpc.resolve_repo_did_authed ctx input.repo in 21 21 Auth.assert_repo_scope ctx.auth ~collection:input.collection 22 22 ~action:Oauth.Scopes.Create ; 23 + let%lwt validation_status = 24 + match input.validate with 25 + | Some true -> ( 26 + match%lwt 27 + Record_validator.validate_record ~nsid:input.collection 28 + ~record:input.record 29 + with 30 + | Ok () -> 31 + Lwt.return "valid" 32 + | Error msg -> 33 + Errors.invalid_request ("record validation failed: " ^ msg) ) 34 + | Some false | None -> 35 + Lwt.return "unknown" 36 + in 23 37 let%lwt repo = Repository.load did in 24 38 let write : Repository.repo_write = 25 39 Create ··· 41 55 { uri 42 56 ; cid= Cid.to_string cid 43 57 ; commit= Some {cid= Cid.to_string commit_cid; rev} 44 - ; validation_status= Some "valid" } 58 + ; validation_status= Some validation_status } 45 59 | _ -> 46 60 Errors.invalid_request "unexpected delete result" )
+15 -1
pegasus/lib/api/repo/putRecord.ml
··· 23 23 Auth.assert_repo_scope ctx.auth ~collection:input.collection 24 24 ~action:Oauth.Scopes.Update ; 25 25 let%lwt did = Xrpc.resolve_repo_did_authed ctx input.repo in 26 + let%lwt validation_status = 27 + match input.validate with 28 + | Some true -> ( 29 + match%lwt 30 + Record_validator.validate_record ~nsid:input.collection 31 + ~record:input.record 32 + with 33 + | Ok () -> 34 + Lwt.return "valid" 35 + | Error msg -> 36 + Errors.invalid_request ("record validation failed: " ^ msg) ) 37 + | Some false | None -> 38 + Lwt.return "unknown" 39 + in 26 40 let%lwt repo = Repository.load did in 27 41 let write : Repository.repo_write = 28 42 match input.swap_record with ··· 55 69 { uri 56 70 ; cid= Cid.to_string cid 57 71 ; commit= Some {cid= Cid.to_string commit_cid; rev} 58 - ; validation_status= Some "valid" } 72 + ; validation_status= Some validation_status } 59 73 | _ -> 60 74 Errors.invalid_request "unexpected delete result" )
+3
pegasus/lib/dune
··· 14 14 emile 15 15 frontend 16 16 hermes 17 + hermes_cli 17 18 html_of_jsx 18 19 ipld 19 20 kleidos ··· 28 29 timedesc 29 30 uri 30 31 uuidm 32 + uuseg 33 + uuseg.string 31 34 webauthn 32 35 yojson 33 36 hermes_ppx
+26
pegasus/lib/lexicon_resolver.ml
··· 26 26 let cache : permission_set Ttl_cache.String_cache.t = 27 27 Ttl_cache.String_cache.create (3 * Util.Time.hour) () 28 28 29 + let schema_cache : Hermes_cli.Lexicon_types.lexicon_doc Ttl_cache.String_cache.t 30 + = 31 + Ttl_cache.String_cache.create (3 * Util.Time.hour) () 32 + 29 33 (* reuse dns client from id_resolver *) 30 34 let dns_client = Id_resolver.Handle.dns_client 31 35 ··· 131 135 Lwt.return_ok ps ) ) ) 132 136 133 137 let clear_cache nsid = Ttl_cache.String_cache.remove cache nsid 138 + 139 + (* resolve and parse a lexicon document from nsid *) 140 + let resolve_schema nsid = 141 + match Ttl_cache.String_cache.get schema_cache nsid with 142 + | Some cached -> 143 + Lwt.return_ok cached 144 + | None -> ( 145 + match%lwt resolve_did_authority nsid with 146 + | Error e -> 147 + Lwt.return_error ("DNS resolution failed: " ^ e) 148 + | Ok did -> ( 149 + match%lwt fetch_lexicon ~did ~nsid with 150 + | Error e -> 151 + Lwt.return_error ("lexicon fetch failed: " ^ e) 152 + | Ok json -> ( 153 + try 154 + let doc = Hermes_cli.Parser.parse_lexicon_doc json in 155 + Ttl_cache.String_cache.set schema_cache nsid doc ; 156 + Lwt.return_ok doc 157 + with Failure e -> Lwt.return_error ("lexicon parse failed: " ^ e) ) ) ) 158 + 159 + let clear_schema_cache nsid = Ttl_cache.String_cache.remove schema_cache nsid
+481
pegasus/lib/record_validator.ml
··· 1 + module Types = Hermes_cli.Lexicon_types 2 + 3 + exception Validation_error of string 4 + 5 + let fail fmt = Printf.ksprintf (fun msg -> raise (Validation_error msg)) fmt 6 + 7 + type ctx = {current_doc: Types.lexicon_doc; visited: (string * string) list} 8 + 9 + let count_graphemes s = 10 + Uuseg_string.fold_utf_8 `Grapheme_cluster (fun acc _ -> acc + 1) 0 s 11 + 12 + let compile_re s = Re.Pcre.re s |> Re.compile 13 + 14 + let did_re = compile_re {|^did:[a-z]+:[a-zA-Z0-9._:%-]+$|} 15 + 16 + let is_did s = String.length s <= 2048 && Re.execp did_re s 17 + 18 + let handle_label_re = 19 + compile_re {|^[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?$|} 20 + 21 + let is_handle s = 22 + let len = String.length s in 23 + if len < 1 || len > 253 then false 24 + else 25 + let labels = String.split_on_char '.' s in 26 + match labels with 27 + | [] | [_] -> 28 + false 29 + | _ -> 30 + let rec walk = function 31 + | [] -> 32 + false (* unreachable *) 33 + | [last] -> 34 + (* final label (TLD) must start with a letter *) 35 + let c = if last = "" then ' ' else last.[0] in 36 + ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) 37 + && Re.execp handle_label_re last 38 + | l :: rest -> 39 + Re.execp handle_label_re l && walk rest 40 + in 41 + walk labels 42 + 43 + let is_at_identifier s = is_did s || is_handle s 44 + 45 + let nsid_label_re = compile_re {|^[a-zA-Z](?:[a-zA-Z0-9-]{0,62})$|} 46 + 47 + let nsid_name_re = compile_re {|^[a-zA-Z](?:[a-zA-Z0-9]{0,62})$|} 48 + 49 + let is_nsid s = 50 + let len = String.length s in 51 + if len < 3 || len > 317 then false 52 + else 53 + let labels = String.split_on_char '.' s in 54 + match List.rev labels with 55 + | [] | [_] | [_; _] -> 56 + false 57 + | name :: authority_rev -> 58 + Re.execp nsid_name_re name 59 + && List.for_all (fun l -> Re.execp nsid_label_re l) authority_rev 60 + 61 + let tid_re = 62 + compile_re {|^[234567abcdefghij][234567abcdefghijklmnopqrstuvwxyz]{12}$|} 63 + 64 + let is_tid s = Re.execp tid_re s 65 + 66 + let record_key_re = compile_re {|^[a-zA-Z0-9_~.:-]{1,512}$|} 67 + 68 + let is_record_key s = 69 + s <> "." && s <> ".." && String.length s <= 512 && Re.execp record_key_re s 70 + 71 + let datetime_re = 72 + compile_re 73 + {|^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}(?:\.[0-9]+)?(?:Z|[+-][0-9]{2}:[0-9]{2})$|} 74 + 75 + let is_datetime s = Re.execp datetime_re s 76 + 77 + let is_at_uri s = Option.is_some (Util.Syntax.parse_at_uri s) 78 + 79 + let is_cid s = Result.is_ok (Cid.of_string s) 80 + 81 + let validate_string_format fmt s = 82 + let fmt_err () = fail "value %S is not a valid %s" s fmt in 83 + match fmt with 84 + | "at-identifier" -> 85 + if not (is_at_identifier s) then fmt_err () 86 + | "did" -> 87 + if not (is_did s) then fmt_err () 88 + | "handle" -> 89 + if not (is_handle s) then fmt_err () 90 + | "nsid" -> 91 + if not (is_nsid s) then fmt_err () 92 + | "tid" -> 93 + if not (is_tid s) then fmt_err () 94 + | "record-key" -> 95 + if not (is_record_key s) then fmt_err () 96 + | "datetime" -> 97 + if not (is_datetime s) then fmt_err () 98 + | "at-uri" -> 99 + if not (is_at_uri s) then fmt_err () 100 + | "cid" -> 101 + if not (is_cid s) then fmt_err () 102 + | "uri" | "language" -> 103 + (* validation is complex, will implement later maybe *) 104 + () 105 + | _ -> 106 + (* unknown format; accept *) 107 + () 108 + 109 + type parsed_ref = {nsid: string option; fragment: string} 110 + 111 + let parse_ref r = 112 + match String.index_opt r '#' with 113 + | None -> 114 + {nsid= Some r; fragment= "main"} 115 + | Some 0 -> 116 + {nsid= None; fragment= String.sub r 1 (String.length r - 1)} 117 + | Some i -> 118 + { nsid= Some (String.sub r 0 i) 119 + ; fragment= String.sub r (i + 1) (String.length r - i - 1) } 120 + 121 + let lookup_def (doc : Types.lexicon_doc) name = 122 + List.find_opt (fun d -> d.Types.name = name) doc.defs 123 + |> Option.map (fun d -> d.Types.type_def) 124 + 125 + (* returns (doc, type_def) that the ref points at, plus the effective nsid 126 + so we can register it in the visited set *) 127 + let resolve_ref ctx ref_str = 128 + let pr = parse_ref ref_str in 129 + let target_nsid = Option.value pr.nsid ~default:ctx.current_doc.id in 130 + if target_nsid = ctx.current_doc.id then 131 + match lookup_def ctx.current_doc pr.fragment with 132 + | Some td -> 133 + Lwt.return (target_nsid, ctx.current_doc, td) 134 + | None -> 135 + fail "ref %s: no def %s in lexicon" ref_str pr.fragment 136 + else 137 + match%lwt Lexicon_resolver.resolve_schema target_nsid with 138 + | Error e -> 139 + fail "ref %s: could not resolve lexicon: %s" ref_str e 140 + | Ok other_doc -> ( 141 + match lookup_def other_doc pr.fragment with 142 + | Some td -> 143 + Lwt.return (target_nsid, other_doc, td) 144 + | None -> 145 + fail "ref %s: no def %s in lexicon %s" ref_str pr.fragment target_nsid 146 + ) 147 + 148 + let validate_string (spec : Types.string_spec) json = 149 + let s = match json with `String s -> s | _ -> fail "expected string" in 150 + ( match spec.const with 151 + | Some c when s <> c -> 152 + fail "expected constant %S, got %S" c s 153 + | _ -> 154 + () ) ; 155 + ( match spec.enum with 156 + | Some vs when not (List.mem s vs) -> 157 + fail "value %S not in enum" s 158 + | _ -> 159 + () ) ; 160 + ( match spec.min_length with 161 + | Some n when String.length s < n -> 162 + fail "string shorter than minLength %d" n 163 + | _ -> 164 + () ) ; 165 + ( match spec.max_length with 166 + | Some n when String.length s > n -> 167 + fail "string longer than maxLength %d" n 168 + | _ -> 169 + () ) ; 170 + ( match (spec.min_graphemes, spec.max_graphemes) with 171 + | None, None -> 172 + () 173 + | mn, mx -> ( 174 + let g = count_graphemes s in 175 + ( match mn with 176 + | Some n when g < n -> 177 + fail "string shorter than minGraphemes %d" n 178 + | _ -> 179 + () ) ; 180 + match mx with 181 + | Some n when g > n -> 182 + fail "string longer than maxGraphemes %d" n 183 + | _ -> 184 + () ) ) ; 185 + match spec.format with Some f -> validate_string_format f s | None -> () 186 + 187 + let validate_integer (spec : Types.integer_spec) json = 188 + let i = 189 + match json with 190 + | `Int i -> 191 + i 192 + | `Intlit s -> ( 193 + try int_of_string s with _ -> fail "invalid integer literal %s" s ) 194 + | _ -> 195 + fail "expected integer" 196 + in 197 + ( match spec.const with 198 + | Some c when i <> c -> 199 + fail "expected constant %d, got %d" c i 200 + | _ -> 201 + () ) ; 202 + ( match spec.enum with 203 + | Some vs when not (List.mem i vs) -> 204 + fail "value %d not in enum" i 205 + | _ -> 206 + () ) ; 207 + ( match spec.minimum with 208 + | Some n when i < n -> 209 + fail "integer below minimum %d" n 210 + | _ -> 211 + () ) ; 212 + match spec.maximum with 213 + | Some n when i > n -> 214 + fail "integer above maximum %d" n 215 + | _ -> 216 + () 217 + 218 + let validate_boolean (spec : Types.boolean_spec) json = 219 + let b = match json with `Bool b -> b | _ -> fail "expected boolean" in 220 + match spec.const with 221 + | Some c when b <> c -> 222 + fail "expected constant %b, got %b" c b 223 + | _ -> 224 + () 225 + 226 + let validate_bytes (spec : Types.bytes_spec) json = 227 + let b64 = 228 + match json with 229 + | `Assoc [("$bytes", `String s)] -> 230 + s 231 + | _ -> 232 + fail "expected bytes object with $bytes field" 233 + in 234 + let byte_len = 235 + (* 3 bytes per 4 b64 chars minus padding *) 236 + let n = String.length b64 in 237 + let pad = 238 + if n >= 2 && String.sub b64 (n - 2) 2 = "==" then 2 239 + else if n >= 1 && String.sub b64 (n - 1) 1 = "=" then 1 240 + else 0 241 + in 242 + (n / 4 * 3) - pad 243 + in 244 + ( match spec.min_length with 245 + | Some n when byte_len < n -> 246 + fail "bytes shorter than minLength %d" n 247 + | _ -> 248 + () ) ; 249 + match spec.max_length with 250 + | Some n when byte_len > n -> 251 + fail "bytes longer than maxLength %d" n 252 + | _ -> 253 + () 254 + 255 + let validate_cid_link json = 256 + match json with 257 + | `Assoc [("$link", `String s)] -> 258 + if not (is_cid s) then fail "invalid CID in $link" 259 + | _ -> 260 + fail "expected cid-link object {\"$link\": ...}" 261 + 262 + let mime_matches pattern mime = 263 + if pattern = "*/*" then true 264 + else 265 + match String.index_opt pattern '/' with 266 + | None -> 267 + pattern = mime 268 + | Some i -> ( 269 + let p_type = String.sub pattern 0 i in 270 + let p_sub = 271 + String.sub pattern (i + 1) (String.length pattern - i - 1) 272 + in 273 + match String.index_opt mime '/' with 274 + | None -> 275 + false 276 + | Some j -> 277 + let m_type = String.sub mime 0 j in 278 + let m_sub = String.sub mime (j + 1) (String.length mime - j - 1) in 279 + p_type = m_type && (p_sub = "*" || p_sub = m_sub) ) 280 + 281 + let validate_blob (spec : Types.blob_spec) json = 282 + let fields = 283 + match json with `Assoc pairs -> pairs | _ -> fail "expected blob object" 284 + in 285 + let get k = List.assoc_opt k fields in 286 + let mime_type = 287 + match get "mimeType" with 288 + | Some (`String s) -> 289 + s 290 + | _ -> 291 + fail "blob missing mimeType" 292 + in 293 + let size = 294 + match get "$type" with 295 + | Some (`String "blob") -> ( 296 + ( match get "ref" with 297 + | Some (`Assoc [("$link", `String c)]) -> 298 + if not (is_cid c) then fail "blob ref has invalid CID" 299 + | _ -> 300 + fail "blob missing ref" ) ; 301 + match get "size" with 302 + | Some (`Int n) -> 303 + Some n 304 + | Some (`Intlit s) -> ( 305 + try Some (int_of_string s) with _ -> fail "invalid blob size" ) 306 + | _ -> 307 + fail "blob missing size" ) 308 + | _ -> ( 309 + (* legacy shape *) 310 + match get "cid" with 311 + | Some (`String c) -> 312 + if not (is_cid c) then fail "blob cid is invalid" ; 313 + None 314 + | _ -> 315 + fail "blob has neither $type=blob nor legacy cid field" ) 316 + in 317 + ( match spec.accept with 318 + | Some [] | None -> 319 + () 320 + | Some patterns -> 321 + if not (List.exists (fun p -> mime_matches p mime_type) patterns) then 322 + fail "blob mimeType %S not in accept list" mime_type ) ; 323 + match (spec.max_size, size) with 324 + | Some max_s, Some s when s > max_s -> 325 + fail "blob size %d exceeds maxSize %d" s max_s 326 + | _ -> 327 + () 328 + 329 + let rec validate_value ctx (td : Types.type_def) (json : Yojson.Safe.t) : 330 + unit Lwt.t = 331 + match td with 332 + | String spec -> 333 + validate_string spec json ; Lwt.return_unit 334 + | Integer spec -> 335 + validate_integer spec json ; Lwt.return_unit 336 + | Boolean spec -> 337 + validate_boolean spec json ; Lwt.return_unit 338 + | Bytes spec -> 339 + validate_bytes spec json ; Lwt.return_unit 340 + | Blob spec -> 341 + validate_blob spec json ; Lwt.return_unit 342 + | CidLink _ -> 343 + validate_cid_link json ; Lwt.return_unit 344 + | Array spec -> 345 + validate_array ctx spec json 346 + | Object spec -> 347 + validate_object ctx spec json 348 + | Ref spec -> 349 + validate_ref ctx spec json 350 + | Union spec -> 351 + validate_union ctx spec json 352 + | Token _ -> 353 + (match json with `String _ -> () | _ -> fail "expected token (string)") ; 354 + Lwt.return_unit 355 + | Unknown _ -> 356 + Lwt.return_unit 357 + | Record spec -> 358 + validate_object ctx spec.record json 359 + | Query _ | Procedure _ | Subscription _ | PermissionSet _ -> 360 + fail "lexicon type %s is not a valid record payload" 361 + ( match td with 362 + | Query _ -> 363 + "query" 364 + | Procedure _ -> 365 + "procedure" 366 + | Subscription _ -> 367 + "subscription" 368 + | PermissionSet _ -> 369 + "permission-set" 370 + | _ -> 371 + "other" ) 372 + 373 + and validate_array ctx (spec : Types.array_spec) json = 374 + let items = match json with `List xs -> xs | _ -> fail "expected array" in 375 + let len = List.length items in 376 + ( match spec.min_length with 377 + | Some n when len < n -> 378 + fail "array shorter than minLength %d" n 379 + | _ -> 380 + () ) ; 381 + ( match spec.max_length with 382 + | Some n when len > n -> 383 + fail "array longer than maxLength %d" n 384 + | _ -> 385 + () ) ; 386 + Lwt_list.iter_s (validate_value ctx spec.items) items 387 + 388 + and validate_object ctx (spec : Types.object_spec) json = 389 + let fields = 390 + match json with `Assoc kvs -> kvs | _ -> fail "expected object" 391 + in 392 + let required = Option.value spec.required ~default:[] in 393 + let nullable = Option.value spec.nullable ~default:[] in 394 + List.iter 395 + (fun k -> 396 + if not (List.mem_assoc k fields) then fail "missing required field %S" k ) 397 + required ; 398 + Lwt_list.iter_s 399 + (fun (name, (prop : Types.property)) -> 400 + match List.assoc_opt name fields with 401 + | None -> 402 + Lwt.return_unit 403 + | Some `Null when List.mem name nullable -> 404 + Lwt.return_unit 405 + | Some `Null -> 406 + fail "field %S is not nullable" name 407 + | Some v -> 408 + Lwt.catch 409 + (fun () -> validate_value ctx prop.type_def v) 410 + (function 411 + | Validation_error m -> fail "at %S: %s" name m | e -> Lwt.fail e ) ) 412 + spec.properties 413 + 414 + and validate_ref ctx (spec : Types.ref_spec) json = 415 + let pr = parse_ref spec.ref_ in 416 + let target_nsid = Option.value pr.nsid ~default:ctx.current_doc.id in 417 + let key = (target_nsid, pr.fragment) in 418 + if List.mem key ctx.visited then Lwt.return_unit 419 + else 420 + let%lwt _nsid, doc, td = resolve_ref ctx spec.ref_ in 421 + let ctx' = {current_doc= doc; visited= key :: ctx.visited} in 422 + (* A ref may resolve to a Record def; record's content is the inner object. *) 423 + let td = 424 + match td with Types.Record {record; _} -> Types.Object record | t -> t 425 + in 426 + validate_value ctx' td json 427 + 428 + and validate_union ctx (spec : Types.union_spec) json = 429 + let closed = Option.value spec.closed ~default:false in 430 + let fields = 431 + match json with `Assoc kvs -> kvs | _ -> fail "expected object in union" 432 + in 433 + let type_tag = 434 + match List.assoc_opt "$type" fields with 435 + | Some (`String s) -> 436 + Some s 437 + | _ -> 438 + None 439 + in 440 + let ref_matches_tag tag ref_str = 441 + let pr = parse_ref ref_str in 442 + let ref_nsid = Option.value pr.nsid ~default:ctx.current_doc.id in 443 + let canonical = 444 + if pr.fragment = "main" then ref_nsid else ref_nsid ^ "#" ^ pr.fragment 445 + in 446 + canonical = tag 447 + in 448 + match type_tag with 449 + | None -> 450 + if closed then fail "union value missing $type (closed union)" 451 + else Lwt.return_unit 452 + | Some tag -> ( 453 + match List.find_opt (ref_matches_tag tag) spec.refs with 454 + | Some matched -> 455 + let fake_ref : Types.ref_spec = {ref_= matched; description= None} in 456 + validate_ref ctx fake_ref json 457 + | None -> 458 + if closed then fail "$type %S not in closed union" tag 459 + else Lwt.return_unit ) 460 + 461 + let validate_record ~nsid ~(record : Yojson.Safe.t) = 462 + match%lwt Lexicon_resolver.resolve_schema nsid with 463 + | Error e -> 464 + Lwt.return_error ("could not resolve lexicon: " ^ e) 465 + | Ok doc -> ( 466 + match lookup_def doc "main" with 467 + | None -> 468 + Lwt.return_error "lexicon has no defs.main" 469 + | Some (Record rec_spec) -> 470 + let ctx = {current_doc= doc; visited= [(nsid, "main")]} in 471 + Lwt.catch 472 + (fun () -> 473 + let%lwt () = validate_object ctx rec_spec.record record in 474 + Lwt.return_ok () ) 475 + (function 476 + | Validation_error msg -> 477 + Lwt.return_error msg 478 + | e -> 479 + Lwt.return_error (Printexc.to_string e) ) 480 + | Some _ -> 481 + Lwt.return_error "defs.main is not a record type" )
+1 -1
pegasus/test/dune
··· 1 1 (tests 2 - (names test_sequencer test_scopes) 2 + (names test_sequencer test_scopes test_record_validator) 3 3 (package pegasus) 4 4 (libraries ipld pegasus lwt lwt.unix lwt_ppx alcotest str) 5 5 (preprocess
+475
pegasus/test/test_record_validator.ml
··· 1 + open Alcotest 2 + open Pegasus 3 + module Types = Hermes_cli.Lexicon_types 4 + 5 + let test_is_did () = 6 + check bool "valid plc" true 7 + (Record_validator.is_did "did:plc:vwzwgnygau7ed7b7wt5ux7y2") ; 8 + check bool "valid web" true (Record_validator.is_did "did:web:example.com") ; 9 + check bool "missing method" false (Record_validator.is_did "did::abc") ; 10 + check bool "missing id" false (Record_validator.is_did "did:plc:") ; 11 + check bool "no prefix" false 12 + (Record_validator.is_did "plc:vwzwgnygau7ed7b7wt5ux7y2") ; 13 + check bool "upper method" false 14 + (Record_validator.is_did "did:PLC:vwzwgnygau7ed7b7wt5ux7y2") 15 + 16 + let test_is_handle () = 17 + check bool "simple" true (Record_validator.is_handle "alice.bsky.social") ; 18 + check bool "short tld" true (Record_validator.is_handle "a.co") ; 19 + check bool "digits ok in middle" true 20 + (Record_validator.is_handle "user1.example.com") ; 21 + check bool "no dots" false (Record_validator.is_handle "handle") ; 22 + check bool "trailing dot" false (Record_validator.is_handle "alice.bsky.") ; 23 + check bool "numeric tld" false (Record_validator.is_handle "alice.bsky.1") ; 24 + check bool "leading dash" false 25 + (Record_validator.is_handle "-bad.example.com") 26 + 27 + let test_is_nsid () = 28 + check bool "valid" true 29 + (Record_validator.is_nsid "com.atproto.repo.createRecord") ; 30 + check bool "app.bsky" true (Record_validator.is_nsid "app.bsky.feed.post") ; 31 + check bool "too few labels" false (Record_validator.is_nsid "com.example") ; 32 + check bool "name starts with digit" false 33 + (Record_validator.is_nsid "com.example.1bad") ; 34 + check bool "name contains underscore" false 35 + (Record_validator.is_nsid "com.example.bad_name") 36 + 37 + let test_is_tid () = 38 + check bool "valid" true (Record_validator.is_tid "3jzfcijpj2z2a") ; 39 + check bool "wrong length" false (Record_validator.is_tid "3jzfc") ; 40 + check bool "invalid first char" false 41 + (Record_validator.is_tid "kjzfcijpj2z2a") 42 + 43 + let test_is_record_key () = 44 + check bool "self" true (Record_validator.is_record_key "self") ; 45 + check bool "with dash" true (Record_validator.is_record_key "some-key.v1") ; 46 + check bool "dot not ok" false (Record_validator.is_record_key ".") ; 47 + check bool "dotdot not ok" false (Record_validator.is_record_key "..") ; 48 + check bool "slash forbidden" false (Record_validator.is_record_key "a/b") 49 + 50 + let test_is_datetime () = 51 + check bool "with ms" true 52 + (Record_validator.is_datetime "1985-04-12T23:20:50.123Z") ; 53 + check bool "no ms" true (Record_validator.is_datetime "2024-01-01T00:00:00Z") ; 54 + check bool "offset" true 55 + (Record_validator.is_datetime "2024-01-01T00:00:00+05:00") ; 56 + check bool "no tz" false (Record_validator.is_datetime "2024-01-01T00:00:00") 57 + 58 + let test_count_graphemes () = 59 + check int "ascii" 5 (Record_validator.count_graphemes "hello") ; 60 + check int "empty" 0 (Record_validator.count_graphemes "") ; 61 + check int "e-acute precomposed" 4 62 + (Record_validator.count_graphemes "caf\xC3\xA9") ; 63 + (* "cafe" + combining acute U+0301: 5 codepoints, 4 clusters *) 64 + check int "e-acute decomposed" 4 65 + (Record_validator.count_graphemes "cafe\xCC\x81") ; 66 + check int "camel emoji" 1 67 + (Record_validator.count_graphemes "\xF0\x9F\x90\xAB") ; 68 + (* ZWJ family: man + ZWJ + woman = 3 codepoints, 1 cluster *) 69 + check int "zwj sequence" 1 70 + (Record_validator.count_graphemes 71 + "\xF0\x9F\x91\xA8\xE2\x80\x8D\xF0\x9F\x91\xA9" ) 72 + 73 + let setup_schema_cache nsid json_str = 74 + let doc = 75 + Hermes_cli.Parser.parse_lexicon_doc (Yojson.Safe.from_string json_str) 76 + in 77 + Ttl_cache.String_cache.set Lexicon_resolver.schema_cache nsid doc 78 + 79 + let validate_json ~nsid json_str = 80 + let json = Yojson.Safe.from_string json_str in 81 + Record_validator.validate_record ~nsid ~record:json 82 + 83 + let check_ok msg result = 84 + match result with 85 + | Ok () -> 86 + () 87 + | Error e -> 88 + failf "%s: expected Ok, got Error: %s" msg e 89 + 90 + let check_err msg_fragment result = 91 + match result with 92 + | Ok () -> 93 + fail "expected error, got Ok" 94 + | Error e -> 95 + if 96 + String.length msg_fragment > 0 97 + && (not (Str.string_match (Str.regexp_string msg_fragment) e 0)) 98 + && not 99 + ( try 100 + ignore 101 + (Str.search_forward (Str.regexp_string msg_fragment) e 0) ; 102 + true 103 + with Not_found -> false ) 104 + then failf "error %S did not contain %S" e msg_fragment 105 + 106 + let simple_post_lexicon = 107 + {|{ 108 + "lexicon": 1, 109 + "id": "com.test.post", 110 + "defs": { 111 + "main": { 112 + "type": "record", 113 + "key": "tid", 114 + "record": { 115 + "type": "object", 116 + "required": ["text", "createdAt"], 117 + "properties": { 118 + "text": { 119 + "type": "string", 120 + "maxLength": 300, 121 + "maxGraphemes": 100 122 + }, 123 + "createdAt": { "type": "string", "format": "datetime" }, 124 + "likes": { "type": "integer", "minimum": 0, "maximum": 1000000 }, 125 + "pinned": { "type": "boolean" } 126 + } 127 + } 128 + } 129 + } 130 + }|} 131 + 132 + let test_string_length_pass () = 133 + setup_schema_cache "com.test.post" simple_post_lexicon ; 134 + let result = 135 + Lwt_main.run 136 + (validate_json ~nsid:"com.test.post" 137 + {|{"text":"hi","createdAt":"2024-01-01T00:00:00Z"}|} ) 138 + in 139 + check_ok "valid record" result 140 + 141 + let test_string_too_long () = 142 + setup_schema_cache "com.test.post" simple_post_lexicon ; 143 + let long_text = String.make 301 'x' in 144 + let body = 145 + Printf.sprintf {|{"text":"%s","createdAt":"2024-01-01T00:00:00Z"}|} 146 + long_text 147 + in 148 + let result = Lwt_main.run (validate_json ~nsid:"com.test.post" body) in 149 + check_err "maxLength" result 150 + 151 + let test_missing_required () = 152 + setup_schema_cache "com.test.post" simple_post_lexicon ; 153 + let result = 154 + Lwt_main.run (validate_json ~nsid:"com.test.post" {|{"text":"hi"}|}) 155 + in 156 + check_err "createdAt" result 157 + 158 + let test_integer_out_of_range () = 159 + setup_schema_cache "com.test.post" simple_post_lexicon ; 160 + let result = 161 + Lwt_main.run 162 + (validate_json ~nsid:"com.test.post" 163 + {|{"text":"hi","createdAt":"2024-01-01T00:00:00Z","likes":-1}|} ) 164 + in 165 + check_err "minimum" result 166 + 167 + let test_bad_datetime_format () = 168 + setup_schema_cache "com.test.post" simple_post_lexicon ; 169 + let result = 170 + Lwt_main.run 171 + (validate_json ~nsid:"com.test.post" 172 + {|{"text":"hi","createdAt":"not-a-date"}|} ) 173 + in 174 + check_err "datetime" result 175 + 176 + let test_wrong_type () = 177 + setup_schema_cache "com.test.post" simple_post_lexicon ; 178 + let result = 179 + Lwt_main.run 180 + (validate_json ~nsid:"com.test.post" 181 + {|{"text":42,"createdAt":"2024-01-01T00:00:00Z"}|} ) 182 + in 183 + check_err "string" result 184 + 185 + let test_extra_fields_ignored () = 186 + setup_schema_cache "com.test.post" simple_post_lexicon ; 187 + let result = 188 + Lwt_main.run 189 + (validate_json ~nsid:"com.test.post" 190 + {|{"text":"hi","createdAt":"2024-01-01T00:00:00Z","extra":"ok"}|} ) 191 + in 192 + check_ok "extras allowed" result 193 + 194 + (* record with nullable + array + cid-link + local ref *) 195 + let ref_lexicon = 196 + {|{ 197 + "lexicon": 1, 198 + "id": "com.test.ref", 199 + "defs": { 200 + "main": { 201 + "type": "record", 202 + "key": "tid", 203 + "record": { 204 + "type": "object", 205 + "required": ["ids", "author"], 206 + "nullable": ["bio"], 207 + "properties": { 208 + "ids": { 209 + "type": "array", 210 + "minLength": 1, 211 + "items": { "type": "string", "format": "cid" } 212 + }, 213 + "author": { "type": "ref", "ref": "#person" }, 214 + "bio": { "type": "string" }, 215 + "icon": { "type": "cid-link" } 216 + } 217 + } 218 + }, 219 + "person": { 220 + "type": "object", 221 + "required": ["did"], 222 + "properties": { 223 + "did": { "type": "string", "format": "did" }, 224 + "name": { "type": "string", "maxLength": 50 } 225 + } 226 + } 227 + } 228 + }|} 229 + 230 + let valid_cid = "bafkreibjfgx2gprinfvicegelk5kosd6y2frmqpqzwqkg7usac74l3t2v4" 231 + 232 + let test_array_and_local_ref_ok () = 233 + setup_schema_cache "com.test.ref" ref_lexicon ; 234 + let body = 235 + Printf.sprintf 236 + {|{"ids":["%s"],"author":{"did":"did:plc:vwzwgnygau7ed7b7wt5ux7y2","name":"Alice"}}|} 237 + valid_cid 238 + in 239 + let result = Lwt_main.run (validate_json ~nsid:"com.test.ref" body) in 240 + check_ok "valid ref" result 241 + 242 + let test_array_min_length () = 243 + setup_schema_cache "com.test.ref" ref_lexicon ; 244 + let body = 245 + {|{"ids":[],"author":{"did":"did:plc:vwzwgnygau7ed7b7wt5ux7y2"}}|} 246 + in 247 + let result = Lwt_main.run (validate_json ~nsid:"com.test.ref" body) in 248 + check_err "minLength" result 249 + 250 + let test_cid_in_array_invalid () = 251 + setup_schema_cache "com.test.ref" ref_lexicon ; 252 + let body = 253 + {|{"ids":["not-a-cid"],"author":{"did":"did:plc:vwzwgnygau7ed7b7wt5ux7y2"}}|} 254 + in 255 + let result = Lwt_main.run (validate_json ~nsid:"com.test.ref" body) in 256 + check_err "cid" result 257 + 258 + let test_ref_missing_required () = 259 + setup_schema_cache "com.test.ref" ref_lexicon ; 260 + let body = 261 + Printf.sprintf {|{"ids":["%s"],"author":{"name":"nodid"}}|} valid_cid 262 + in 263 + let result = Lwt_main.run (validate_json ~nsid:"com.test.ref" body) in 264 + check_err "did" result 265 + 266 + let test_nullable_field_allowed () = 267 + setup_schema_cache "com.test.ref" ref_lexicon ; 268 + let body = 269 + Printf.sprintf 270 + {|{"ids":["%s"],"author":{"did":"did:plc:vwzwgnygau7ed7b7wt5ux7y2"},"bio":null}|} 271 + valid_cid 272 + in 273 + let result = Lwt_main.run (validate_json ~nsid:"com.test.ref" body) in 274 + check_ok "null allowed" result 275 + 276 + let test_cid_link_shape () = 277 + setup_schema_cache "com.test.ref" ref_lexicon ; 278 + let body = 279 + Printf.sprintf 280 + {|{"ids":["%s"],"author":{"did":"did:plc:vwzwgnygau7ed7b7wt5ux7y2"},"icon":{"$link":"%s"}}|} 281 + valid_cid valid_cid 282 + in 283 + let result = Lwt_main.run (validate_json ~nsid:"com.test.ref" body) in 284 + check_ok "cid-link ok" result 285 + 286 + let test_cid_link_bad () = 287 + setup_schema_cache "com.test.ref" ref_lexicon ; 288 + let body = 289 + Printf.sprintf 290 + {|{"ids":["%s"],"author":{"did":"did:plc:vwzwgnygau7ed7b7wt5ux7y2"},"icon":{"$link":"nope"}}|} 291 + valid_cid 292 + in 293 + let result = Lwt_main.run (validate_json ~nsid:"com.test.ref" body) in 294 + check_err "CID" result 295 + 296 + (* blob + union *) 297 + let blob_lexicon = 298 + {|{ 299 + "lexicon": 1, 300 + "id": "com.test.blob", 301 + "defs": { 302 + "main": { 303 + "type": "record", 304 + "key": "tid", 305 + "record": { 306 + "type": "object", 307 + "required": ["image"], 308 + "properties": { 309 + "image": { 310 + "type": "blob", 311 + "accept": ["image/*"], 312 + "maxSize": 1000 313 + } 314 + } 315 + } 316 + } 317 + } 318 + }|} 319 + 320 + let test_blob_typed_ok () = 321 + setup_schema_cache "com.test.blob" blob_lexicon ; 322 + let body = 323 + Printf.sprintf 324 + {|{"image":{"$type":"blob","ref":{"$link":"%s"},"mimeType":"image/png","size":500}}|} 325 + valid_cid 326 + in 327 + let result = Lwt_main.run (validate_json ~nsid:"com.test.blob" body) in 328 + check_ok "typed blob" result 329 + 330 + let test_blob_mime_rejected () = 331 + setup_schema_cache "com.test.blob" blob_lexicon ; 332 + let body = 333 + Printf.sprintf 334 + {|{"image":{"$type":"blob","ref":{"$link":"%s"},"mimeType":"video/mp4","size":500}}|} 335 + valid_cid 336 + in 337 + let result = Lwt_main.run (validate_json ~nsid:"com.test.blob" body) in 338 + check_err "accept" result 339 + 340 + let test_blob_too_big () = 341 + setup_schema_cache "com.test.blob" blob_lexicon ; 342 + let body = 343 + Printf.sprintf 344 + {|{"image":{"$type":"blob","ref":{"$link":"%s"},"mimeType":"image/png","size":9999}}|} 345 + valid_cid 346 + in 347 + let result = Lwt_main.run (validate_json ~nsid:"com.test.blob" body) in 348 + check_err "maxSize" result 349 + 350 + let union_lexicon = 351 + {|{ 352 + "lexicon": 1, 353 + "id": "com.test.union", 354 + "defs": { 355 + "main": { 356 + "type": "record", 357 + "key": "tid", 358 + "record": { 359 + "type": "object", 360 + "required": ["payload"], 361 + "properties": { 362 + "payload": { 363 + "type": "union", 364 + "refs": ["#foo", "#bar"], 365 + "closed": true 366 + } 367 + } 368 + } 369 + }, 370 + "foo": { 371 + "type": "object", 372 + "required": ["a"], 373 + "properties": { "a": { "type": "string" } } 374 + }, 375 + "bar": { 376 + "type": "object", 377 + "required": ["b"], 378 + "properties": { "b": { "type": "integer" } } 379 + } 380 + } 381 + }|} 382 + 383 + let test_union_closed_match () = 384 + setup_schema_cache "com.test.union" union_lexicon ; 385 + let body = {|{"payload":{"$type":"com.test.union#foo","a":"hi"}}|} in 386 + let result = Lwt_main.run (validate_json ~nsid:"com.test.union" body) in 387 + check_ok "closed union match" result 388 + 389 + let test_union_closed_no_match () = 390 + setup_schema_cache "com.test.union" union_lexicon ; 391 + let body = {|{"payload":{"$type":"com.test.union#baz","a":"hi"}}|} in 392 + let result = Lwt_main.run (validate_json ~nsid:"com.test.union" body) in 393 + check_err "closed union" result 394 + 395 + let test_union_closed_missing_type () = 396 + setup_schema_cache "com.test.union" union_lexicon ; 397 + let body = {|{"payload":{"a":"hi"}}|} in 398 + let result = Lwt_main.run (validate_json ~nsid:"com.test.union" body) in 399 + check_err "$type" result 400 + 401 + let open_union_lexicon = 402 + {|{ 403 + "lexicon": 1, 404 + "id": "com.test.openu", 405 + "defs": { 406 + "main": { 407 + "type": "record", 408 + "key": "tid", 409 + "record": { 410 + "type": "object", 411 + "required": ["x"], 412 + "properties": { 413 + "x": { "type": "union", "refs": ["#only"] } 414 + } 415 + } 416 + }, 417 + "only": { 418 + "type": "object", 419 + "required": ["a"], 420 + "properties": { "a": { "type": "string" } } 421 + } 422 + } 423 + }|} 424 + 425 + let test_union_open_unknown_ok () = 426 + setup_schema_cache "com.test.openu" open_union_lexicon ; 427 + let body = {|{"x":{"$type":"com.future.thing","whatever":123}}|} in 428 + let result = Lwt_main.run (validate_json ~nsid:"com.test.openu" body) in 429 + check_ok "open union accepts unknown" result 430 + 431 + let test_union_open_known_still_validated () = 432 + setup_schema_cache "com.test.openu" open_union_lexicon ; 433 + (* $type matches the known ref, but the inner field type is wrong *) 434 + let body = {|{"x":{"$type":"com.test.openu#only","a":42}}|} in 435 + let result = Lwt_main.run (validate_json ~nsid:"com.test.openu" body) in 436 + check_err "string" result 437 + 438 + let () = 439 + run "record_validator" 440 + [ ( "formats" 441 + , [ ("is_did", `Quick, test_is_did) 442 + ; ("is_handle", `Quick, test_is_handle) 443 + ; ("is_nsid", `Quick, test_is_nsid) 444 + ; ("is_tid", `Quick, test_is_tid) 445 + ; ("is_record_key", `Quick, test_is_record_key) 446 + ; ("is_datetime", `Quick, test_is_datetime) ] ) 447 + ; ("graphemes", [("count_graphemes", `Quick, test_count_graphemes)]) 448 + ; ( "primitives" 449 + , [ ("string length pass", `Quick, test_string_length_pass) 450 + ; ("string too long", `Quick, test_string_too_long) 451 + ; ("missing required", `Quick, test_missing_required) 452 + ; ("integer out of range", `Quick, test_integer_out_of_range) 453 + ; ("bad datetime", `Quick, test_bad_datetime_format) 454 + ; ("wrong type", `Quick, test_wrong_type) 455 + ; ("extra fields ignored", `Quick, test_extra_fields_ignored) ] ) 456 + ; ( "compound" 457 + , [ ("array + local ref", `Quick, test_array_and_local_ref_ok) 458 + ; ("array minLength", `Quick, test_array_min_length) 459 + ; ("cid in array invalid", `Quick, test_cid_in_array_invalid) 460 + ; ("ref missing required", `Quick, test_ref_missing_required) 461 + ; ("nullable allowed", `Quick, test_nullable_field_allowed) 462 + ; ("cid-link shape", `Quick, test_cid_link_shape) 463 + ; ("cid-link bad", `Quick, test_cid_link_bad) ] ) 464 + ; ( "blob" 465 + , [ ("typed ok", `Quick, test_blob_typed_ok) 466 + ; ("mime rejected", `Quick, test_blob_mime_rejected) 467 + ; ("too big", `Quick, test_blob_too_big) ] ) 468 + ; ( "union" 469 + , [ ("closed match", `Quick, test_union_closed_match) 470 + ; ("closed no match", `Quick, test_union_closed_no_match) 471 + ; ("closed missing $type", `Quick, test_union_closed_missing_type) 472 + ; ("open unknown ok", `Quick, test_union_open_unknown_ok) 473 + ; ( "open known still validated" 474 + , `Quick 475 + , test_union_open_known_still_validated ) ] ) ]