···2626let cache : permission_set Ttl_cache.String_cache.t =
2727 Ttl_cache.String_cache.create (3 * Util.Time.hour) ()
28282929+let schema_cache : Hermes_cli.Lexicon_types.lexicon_doc Ttl_cache.String_cache.t
3030+ =
3131+ Ttl_cache.String_cache.create (3 * Util.Time.hour) ()
3232+2933(* reuse dns client from id_resolver *)
3034let dns_client = Id_resolver.Handle.dns_client
3135···131135 Lwt.return_ok ps ) ) )
132136133137let clear_cache nsid = Ttl_cache.String_cache.remove cache nsid
138138+139139+(* resolve and parse a lexicon document from nsid *)
140140+let resolve_schema nsid =
141141+ match Ttl_cache.String_cache.get schema_cache nsid with
142142+ | Some cached ->
143143+ Lwt.return_ok cached
144144+ | None -> (
145145+ match%lwt resolve_did_authority nsid with
146146+ | Error e ->
147147+ Lwt.return_error ("DNS resolution failed: " ^ e)
148148+ | Ok did -> (
149149+ match%lwt fetch_lexicon ~did ~nsid with
150150+ | Error e ->
151151+ Lwt.return_error ("lexicon fetch failed: " ^ e)
152152+ | Ok json -> (
153153+ try
154154+ let doc = Hermes_cli.Parser.parse_lexicon_doc json in
155155+ Ttl_cache.String_cache.set schema_cache nsid doc ;
156156+ Lwt.return_ok doc
157157+ with Failure e -> Lwt.return_error ("lexicon parse failed: " ^ e) ) ) )
158158+159159+let clear_schema_cache nsid = Ttl_cache.String_cache.remove schema_cache nsid
+481
pegasus/lib/record_validator.ml
···11+module Types = Hermes_cli.Lexicon_types
22+33+exception Validation_error of string
44+55+let fail fmt = Printf.ksprintf (fun msg -> raise (Validation_error msg)) fmt
66+77+type ctx = {current_doc: Types.lexicon_doc; visited: (string * string) list}
88+99+let count_graphemes s =
1010+ Uuseg_string.fold_utf_8 `Grapheme_cluster (fun acc _ -> acc + 1) 0 s
1111+1212+let compile_re s = Re.Pcre.re s |> Re.compile
1313+1414+let did_re = compile_re {|^did:[a-z]+:[a-zA-Z0-9._:%-]+$|}
1515+1616+let is_did s = String.length s <= 2048 && Re.execp did_re s
1717+1818+let handle_label_re =
1919+ compile_re {|^[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?$|}
2020+2121+let is_handle s =
2222+ let len = String.length s in
2323+ if len < 1 || len > 253 then false
2424+ else
2525+ let labels = String.split_on_char '.' s in
2626+ match labels with
2727+ | [] | [_] ->
2828+ false
2929+ | _ ->
3030+ let rec walk = function
3131+ | [] ->
3232+ false (* unreachable *)
3333+ | [last] ->
3434+ (* final label (TLD) must start with a letter *)
3535+ let c = if last = "" then ' ' else last.[0] in
3636+ ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))
3737+ && Re.execp handle_label_re last
3838+ | l :: rest ->
3939+ Re.execp handle_label_re l && walk rest
4040+ in
4141+ walk labels
4242+4343+let is_at_identifier s = is_did s || is_handle s
4444+4545+let nsid_label_re = compile_re {|^[a-zA-Z](?:[a-zA-Z0-9-]{0,62})$|}
4646+4747+let nsid_name_re = compile_re {|^[a-zA-Z](?:[a-zA-Z0-9]{0,62})$|}
4848+4949+let is_nsid s =
5050+ let len = String.length s in
5151+ if len < 3 || len > 317 then false
5252+ else
5353+ let labels = String.split_on_char '.' s in
5454+ match List.rev labels with
5555+ | [] | [_] | [_; _] ->
5656+ false
5757+ | name :: authority_rev ->
5858+ Re.execp nsid_name_re name
5959+ && List.for_all (fun l -> Re.execp nsid_label_re l) authority_rev
6060+6161+let tid_re =
6262+ compile_re {|^[234567abcdefghij][234567abcdefghijklmnopqrstuvwxyz]{12}$|}
6363+6464+let is_tid s = Re.execp tid_re s
6565+6666+let record_key_re = compile_re {|^[a-zA-Z0-9_~.:-]{1,512}$|}
6767+6868+let is_record_key s =
6969+ s <> "." && s <> ".." && String.length s <= 512 && Re.execp record_key_re s
7070+7171+let datetime_re =
7272+ compile_re
7373+ {|^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}(?:\.[0-9]+)?(?:Z|[+-][0-9]{2}:[0-9]{2})$|}
7474+7575+let is_datetime s = Re.execp datetime_re s
7676+7777+let is_at_uri s = Option.is_some (Util.Syntax.parse_at_uri s)
7878+7979+let is_cid s = Result.is_ok (Cid.of_string s)
8080+8181+let validate_string_format fmt s =
8282+ let fmt_err () = fail "value %S is not a valid %s" s fmt in
8383+ match fmt with
8484+ | "at-identifier" ->
8585+ if not (is_at_identifier s) then fmt_err ()
8686+ | "did" ->
8787+ if not (is_did s) then fmt_err ()
8888+ | "handle" ->
8989+ if not (is_handle s) then fmt_err ()
9090+ | "nsid" ->
9191+ if not (is_nsid s) then fmt_err ()
9292+ | "tid" ->
9393+ if not (is_tid s) then fmt_err ()
9494+ | "record-key" ->
9595+ if not (is_record_key s) then fmt_err ()
9696+ | "datetime" ->
9797+ if not (is_datetime s) then fmt_err ()
9898+ | "at-uri" ->
9999+ if not (is_at_uri s) then fmt_err ()
100100+ | "cid" ->
101101+ if not (is_cid s) then fmt_err ()
102102+ | "uri" | "language" ->
103103+ (* validation is complex, will implement later maybe *)
104104+ ()
105105+ | _ ->
106106+ (* unknown format; accept *)
107107+ ()
108108+109109+type parsed_ref = {nsid: string option; fragment: string}
110110+111111+let parse_ref r =
112112+ match String.index_opt r '#' with
113113+ | None ->
114114+ {nsid= Some r; fragment= "main"}
115115+ | Some 0 ->
116116+ {nsid= None; fragment= String.sub r 1 (String.length r - 1)}
117117+ | Some i ->
118118+ { nsid= Some (String.sub r 0 i)
119119+ ; fragment= String.sub r (i + 1) (String.length r - i - 1) }
120120+121121+let lookup_def (doc : Types.lexicon_doc) name =
122122+ List.find_opt (fun d -> d.Types.name = name) doc.defs
123123+ |> Option.map (fun d -> d.Types.type_def)
124124+125125+(* returns (doc, type_def) that the ref points at, plus the effective nsid
126126+ so we can register it in the visited set *)
127127+let resolve_ref ctx ref_str =
128128+ let pr = parse_ref ref_str in
129129+ let target_nsid = Option.value pr.nsid ~default:ctx.current_doc.id in
130130+ if target_nsid = ctx.current_doc.id then
131131+ match lookup_def ctx.current_doc pr.fragment with
132132+ | Some td ->
133133+ Lwt.return (target_nsid, ctx.current_doc, td)
134134+ | None ->
135135+ fail "ref %s: no def %s in lexicon" ref_str pr.fragment
136136+ else
137137+ match%lwt Lexicon_resolver.resolve_schema target_nsid with
138138+ | Error e ->
139139+ fail "ref %s: could not resolve lexicon: %s" ref_str e
140140+ | Ok other_doc -> (
141141+ match lookup_def other_doc pr.fragment with
142142+ | Some td ->
143143+ Lwt.return (target_nsid, other_doc, td)
144144+ | None ->
145145+ fail "ref %s: no def %s in lexicon %s" ref_str pr.fragment target_nsid
146146+ )
147147+148148+let validate_string (spec : Types.string_spec) json =
149149+ let s = match json with `String s -> s | _ -> fail "expected string" in
150150+ ( match spec.const with
151151+ | Some c when s <> c ->
152152+ fail "expected constant %S, got %S" c s
153153+ | _ ->
154154+ () ) ;
155155+ ( match spec.enum with
156156+ | Some vs when not (List.mem s vs) ->
157157+ fail "value %S not in enum" s
158158+ | _ ->
159159+ () ) ;
160160+ ( match spec.min_length with
161161+ | Some n when String.length s < n ->
162162+ fail "string shorter than minLength %d" n
163163+ | _ ->
164164+ () ) ;
165165+ ( match spec.max_length with
166166+ | Some n when String.length s > n ->
167167+ fail "string longer than maxLength %d" n
168168+ | _ ->
169169+ () ) ;
170170+ ( match (spec.min_graphemes, spec.max_graphemes) with
171171+ | None, None ->
172172+ ()
173173+ | mn, mx -> (
174174+ let g = count_graphemes s in
175175+ ( match mn with
176176+ | Some n when g < n ->
177177+ fail "string shorter than minGraphemes %d" n
178178+ | _ ->
179179+ () ) ;
180180+ match mx with
181181+ | Some n when g > n ->
182182+ fail "string longer than maxGraphemes %d" n
183183+ | _ ->
184184+ () ) ) ;
185185+ match spec.format with Some f -> validate_string_format f s | None -> ()
186186+187187+let validate_integer (spec : Types.integer_spec) json =
188188+ let i =
189189+ match json with
190190+ | `Int i ->
191191+ i
192192+ | `Intlit s -> (
193193+ try int_of_string s with _ -> fail "invalid integer literal %s" s )
194194+ | _ ->
195195+ fail "expected integer"
196196+ in
197197+ ( match spec.const with
198198+ | Some c when i <> c ->
199199+ fail "expected constant %d, got %d" c i
200200+ | _ ->
201201+ () ) ;
202202+ ( match spec.enum with
203203+ | Some vs when not (List.mem i vs) ->
204204+ fail "value %d not in enum" i
205205+ | _ ->
206206+ () ) ;
207207+ ( match spec.minimum with
208208+ | Some n when i < n ->
209209+ fail "integer below minimum %d" n
210210+ | _ ->
211211+ () ) ;
212212+ match spec.maximum with
213213+ | Some n when i > n ->
214214+ fail "integer above maximum %d" n
215215+ | _ ->
216216+ ()
217217+218218+let validate_boolean (spec : Types.boolean_spec) json =
219219+ let b = match json with `Bool b -> b | _ -> fail "expected boolean" in
220220+ match spec.const with
221221+ | Some c when b <> c ->
222222+ fail "expected constant %b, got %b" c b
223223+ | _ ->
224224+ ()
225225+226226+let validate_bytes (spec : Types.bytes_spec) json =
227227+ let b64 =
228228+ match json with
229229+ | `Assoc [("$bytes", `String s)] ->
230230+ s
231231+ | _ ->
232232+ fail "expected bytes object with $bytes field"
233233+ in
234234+ let byte_len =
235235+ (* 3 bytes per 4 b64 chars minus padding *)
236236+ let n = String.length b64 in
237237+ let pad =
238238+ if n >= 2 && String.sub b64 (n - 2) 2 = "==" then 2
239239+ else if n >= 1 && String.sub b64 (n - 1) 1 = "=" then 1
240240+ else 0
241241+ in
242242+ (n / 4 * 3) - pad
243243+ in
244244+ ( match spec.min_length with
245245+ | Some n when byte_len < n ->
246246+ fail "bytes shorter than minLength %d" n
247247+ | _ ->
248248+ () ) ;
249249+ match spec.max_length with
250250+ | Some n when byte_len > n ->
251251+ fail "bytes longer than maxLength %d" n
252252+ | _ ->
253253+ ()
254254+255255+let validate_cid_link json =
256256+ match json with
257257+ | `Assoc [("$link", `String s)] ->
258258+ if not (is_cid s) then fail "invalid CID in $link"
259259+ | _ ->
260260+ fail "expected cid-link object {\"$link\": ...}"
261261+262262+let mime_matches pattern mime =
263263+ if pattern = "*/*" then true
264264+ else
265265+ match String.index_opt pattern '/' with
266266+ | None ->
267267+ pattern = mime
268268+ | Some i -> (
269269+ let p_type = String.sub pattern 0 i in
270270+ let p_sub =
271271+ String.sub pattern (i + 1) (String.length pattern - i - 1)
272272+ in
273273+ match String.index_opt mime '/' with
274274+ | None ->
275275+ false
276276+ | Some j ->
277277+ let m_type = String.sub mime 0 j in
278278+ let m_sub = String.sub mime (j + 1) (String.length mime - j - 1) in
279279+ p_type = m_type && (p_sub = "*" || p_sub = m_sub) )
280280+281281+let validate_blob (spec : Types.blob_spec) json =
282282+ let fields =
283283+ match json with `Assoc pairs -> pairs | _ -> fail "expected blob object"
284284+ in
285285+ let get k = List.assoc_opt k fields in
286286+ let mime_type =
287287+ match get "mimeType" with
288288+ | Some (`String s) ->
289289+ s
290290+ | _ ->
291291+ fail "blob missing mimeType"
292292+ in
293293+ let size =
294294+ match get "$type" with
295295+ | Some (`String "blob") -> (
296296+ ( match get "ref" with
297297+ | Some (`Assoc [("$link", `String c)]) ->
298298+ if not (is_cid c) then fail "blob ref has invalid CID"
299299+ | _ ->
300300+ fail "blob missing ref" ) ;
301301+ match get "size" with
302302+ | Some (`Int n) ->
303303+ Some n
304304+ | Some (`Intlit s) -> (
305305+ try Some (int_of_string s) with _ -> fail "invalid blob size" )
306306+ | _ ->
307307+ fail "blob missing size" )
308308+ | _ -> (
309309+ (* legacy shape *)
310310+ match get "cid" with
311311+ | Some (`String c) ->
312312+ if not (is_cid c) then fail "blob cid is invalid" ;
313313+ None
314314+ | _ ->
315315+ fail "blob has neither $type=blob nor legacy cid field" )
316316+ in
317317+ ( match spec.accept with
318318+ | Some [] | None ->
319319+ ()
320320+ | Some patterns ->
321321+ if not (List.exists (fun p -> mime_matches p mime_type) patterns) then
322322+ fail "blob mimeType %S not in accept list" mime_type ) ;
323323+ match (spec.max_size, size) with
324324+ | Some max_s, Some s when s > max_s ->
325325+ fail "blob size %d exceeds maxSize %d" s max_s
326326+ | _ ->
327327+ ()
328328+329329+let rec validate_value ctx (td : Types.type_def) (json : Yojson.Safe.t) :
330330+ unit Lwt.t =
331331+ match td with
332332+ | String spec ->
333333+ validate_string spec json ; Lwt.return_unit
334334+ | Integer spec ->
335335+ validate_integer spec json ; Lwt.return_unit
336336+ | Boolean spec ->
337337+ validate_boolean spec json ; Lwt.return_unit
338338+ | Bytes spec ->
339339+ validate_bytes spec json ; Lwt.return_unit
340340+ | Blob spec ->
341341+ validate_blob spec json ; Lwt.return_unit
342342+ | CidLink _ ->
343343+ validate_cid_link json ; Lwt.return_unit
344344+ | Array spec ->
345345+ validate_array ctx spec json
346346+ | Object spec ->
347347+ validate_object ctx spec json
348348+ | Ref spec ->
349349+ validate_ref ctx spec json
350350+ | Union spec ->
351351+ validate_union ctx spec json
352352+ | Token _ ->
353353+ (match json with `String _ -> () | _ -> fail "expected token (string)") ;
354354+ Lwt.return_unit
355355+ | Unknown _ ->
356356+ Lwt.return_unit
357357+ | Record spec ->
358358+ validate_object ctx spec.record json
359359+ | Query _ | Procedure _ | Subscription _ | PermissionSet _ ->
360360+ fail "lexicon type %s is not a valid record payload"
361361+ ( match td with
362362+ | Query _ ->
363363+ "query"
364364+ | Procedure _ ->
365365+ "procedure"
366366+ | Subscription _ ->
367367+ "subscription"
368368+ | PermissionSet _ ->
369369+ "permission-set"
370370+ | _ ->
371371+ "other" )
372372+373373+and validate_array ctx (spec : Types.array_spec) json =
374374+ let items = match json with `List xs -> xs | _ -> fail "expected array" in
375375+ let len = List.length items in
376376+ ( match spec.min_length with
377377+ | Some n when len < n ->
378378+ fail "array shorter than minLength %d" n
379379+ | _ ->
380380+ () ) ;
381381+ ( match spec.max_length with
382382+ | Some n when len > n ->
383383+ fail "array longer than maxLength %d" n
384384+ | _ ->
385385+ () ) ;
386386+ Lwt_list.iter_s (validate_value ctx spec.items) items
387387+388388+and validate_object ctx (spec : Types.object_spec) json =
389389+ let fields =
390390+ match json with `Assoc kvs -> kvs | _ -> fail "expected object"
391391+ in
392392+ let required = Option.value spec.required ~default:[] in
393393+ let nullable = Option.value spec.nullable ~default:[] in
394394+ List.iter
395395+ (fun k ->
396396+ if not (List.mem_assoc k fields) then fail "missing required field %S" k )
397397+ required ;
398398+ Lwt_list.iter_s
399399+ (fun (name, (prop : Types.property)) ->
400400+ match List.assoc_opt name fields with
401401+ | None ->
402402+ Lwt.return_unit
403403+ | Some `Null when List.mem name nullable ->
404404+ Lwt.return_unit
405405+ | Some `Null ->
406406+ fail "field %S is not nullable" name
407407+ | Some v ->
408408+ Lwt.catch
409409+ (fun () -> validate_value ctx prop.type_def v)
410410+ (function
411411+ | Validation_error m -> fail "at %S: %s" name m | e -> Lwt.fail e ) )
412412+ spec.properties
413413+414414+and validate_ref ctx (spec : Types.ref_spec) json =
415415+ let pr = parse_ref spec.ref_ in
416416+ let target_nsid = Option.value pr.nsid ~default:ctx.current_doc.id in
417417+ let key = (target_nsid, pr.fragment) in
418418+ if List.mem key ctx.visited then Lwt.return_unit
419419+ else
420420+ let%lwt _nsid, doc, td = resolve_ref ctx spec.ref_ in
421421+ let ctx' = {current_doc= doc; visited= key :: ctx.visited} in
422422+ (* A ref may resolve to a Record def; record's content is the inner object. *)
423423+ let td =
424424+ match td with Types.Record {record; _} -> Types.Object record | t -> t
425425+ in
426426+ validate_value ctx' td json
427427+428428+and validate_union ctx (spec : Types.union_spec) json =
429429+ let closed = Option.value spec.closed ~default:false in
430430+ let fields =
431431+ match json with `Assoc kvs -> kvs | _ -> fail "expected object in union"
432432+ in
433433+ let type_tag =
434434+ match List.assoc_opt "$type" fields with
435435+ | Some (`String s) ->
436436+ Some s
437437+ | _ ->
438438+ None
439439+ in
440440+ let ref_matches_tag tag ref_str =
441441+ let pr = parse_ref ref_str in
442442+ let ref_nsid = Option.value pr.nsid ~default:ctx.current_doc.id in
443443+ let canonical =
444444+ if pr.fragment = "main" then ref_nsid else ref_nsid ^ "#" ^ pr.fragment
445445+ in
446446+ canonical = tag
447447+ in
448448+ match type_tag with
449449+ | None ->
450450+ if closed then fail "union value missing $type (closed union)"
451451+ else Lwt.return_unit
452452+ | Some tag -> (
453453+ match List.find_opt (ref_matches_tag tag) spec.refs with
454454+ | Some matched ->
455455+ let fake_ref : Types.ref_spec = {ref_= matched; description= None} in
456456+ validate_ref ctx fake_ref json
457457+ | None ->
458458+ if closed then fail "$type %S not in closed union" tag
459459+ else Lwt.return_unit )
460460+461461+let validate_record ~nsid ~(record : Yojson.Safe.t) =
462462+ match%lwt Lexicon_resolver.resolve_schema nsid with
463463+ | Error e ->
464464+ Lwt.return_error ("could not resolve lexicon: " ^ e)
465465+ | Ok doc -> (
466466+ match lookup_def doc "main" with
467467+ | None ->
468468+ Lwt.return_error "lexicon has no defs.main"
469469+ | Some (Record rec_spec) ->
470470+ let ctx = {current_doc= doc; visited= [(nsid, "main")]} in
471471+ Lwt.catch
472472+ (fun () ->
473473+ let%lwt () = validate_object ctx rec_spec.record record in
474474+ Lwt.return_ok () )
475475+ (function
476476+ | Validation_error msg ->
477477+ Lwt.return_error msg
478478+ | e ->
479479+ Lwt.return_error (Printexc.to_string e) )
480480+ | Some _ ->
481481+ Lwt.return_error "defs.main is not a record type" )