OCaml HTML5 parser/serialiser based on Python's JustHTML
1(** Importmap validation checker.
2
3 Validates that <script type="importmap"> elements contain valid JSON
4 and conform to importmap structural requirements. *)
5
6type state = {
7 mutable in_importmap : bool;
8 content : Buffer.t;
9}
10
11let create () = {
12 in_importmap = false;
13 content = Buffer.create 256;
14}
15
16let reset state =
17 state.in_importmap <- false;
18 Buffer.clear state.content
19
20(** Simple JSON value representation *)
21type json =
22 | JNull
23 | JBool of bool
24 | JNumber of float
25 | JString of string
26 | JArray of json list
27 | JObject of (string * json) list
28
29(** Simple JSON parser *)
30let parse_json s_orig =
31 let s = String.trim s_orig in
32 let len = String.length s in
33 if len = 0 then Error "Empty JSON"
34 else
35 let pos = ref 0 in
36
37 let skip_ws () =
38 while !pos < len && (s.[!pos] = ' ' || s.[!pos] = '\t' || s.[!pos] = '\n' || s.[!pos] = '\r') do
39 incr pos
40 done
41 in
42
43 let peek () = if !pos < len then Some s.[!pos] else None in
44 let consume () = let c = s.[!pos] in incr pos; c in
45
46 let rec parse_value () =
47 skip_ws ();
48 match peek () with
49 | None -> Error "Unexpected end of input"
50 | Some '{' -> parse_object ()
51 | Some '[' -> parse_array ()
52 | Some '"' -> parse_string ()
53 | Some 't' -> parse_true ()
54 | Some 'f' -> parse_false ()
55 | Some 'n' -> parse_null ()
56 | Some c when c = '-' || (c >= '0' && c <= '9') -> parse_number ()
57 | Some _ -> Error "Unexpected character"
58
59 and parse_object () =
60 ignore (consume ()); (* consume { *)
61 skip_ws ();
62 match peek () with
63 | Some '}' -> ignore (consume ()); Ok (JObject [])
64 | _ ->
65 let rec parse_members acc =
66 skip_ws ();
67 match parse_string () with
68 | Error e -> Error e
69 | Ok (JString key) ->
70 skip_ws ();
71 (match peek () with
72 | Some ':' ->
73 ignore (consume ());
74 (match parse_value () with
75 | Error e -> Error e
76 | Ok value ->
77 skip_ws ();
78 let acc' = (key, value) :: acc in
79 match peek () with
80 | Some ',' -> ignore (consume ()); parse_members acc'
81 | Some '}' -> ignore (consume ()); Ok (JObject (List.rev acc'))
82 | _ -> Error "Expected ',' or '}'")
83 | _ -> Error "Expected ':'")
84 | Ok _ -> Error "Expected string key"
85 in
86 parse_members []
87
88 and parse_array () =
89 ignore (consume ()); (* consume [ *)
90 skip_ws ();
91 match peek () with
92 | Some ']' -> ignore (consume ()); Ok (JArray [])
93 | _ ->
94 let rec parse_elements acc =
95 match parse_value () with
96 | Error e -> Error e
97 | Ok value ->
98 skip_ws ();
99 let acc' = value :: acc in
100 match peek () with
101 | Some ',' -> ignore (consume ()); parse_elements acc'
102 | Some ']' -> ignore (consume ()); Ok (JArray (List.rev acc'))
103 | _ -> Error "Expected ',' or ']'"
104 in
105 parse_elements []
106
107 and parse_string () =
108 skip_ws ();
109 match peek () with
110 | Some '"' ->
111 ignore (consume ());
112 let buf = Buffer.create 32 in
113 let rec read () =
114 match peek () with
115 | None -> Error "Unterminated string"
116 | Some '"' -> ignore (consume ()); Ok (JString (Buffer.contents buf))
117 | Some '\\' ->
118 ignore (consume ());
119 (match peek () with
120 | None -> Error "Unterminated escape"
121 | Some c -> ignore (consume ()); Buffer.add_char buf c; read ())
122 | Some c -> ignore (consume ()); Buffer.add_char buf c; read ()
123 in
124 read ()
125 | _ -> Error "Expected string"
126
127 and parse_number () =
128 let start = !pos in
129 if peek () = Some '-' then incr pos;
130 while !pos < len && s.[!pos] >= '0' && s.[!pos] <= '9' do incr pos done;
131 if !pos < len && s.[!pos] = '.' then begin
132 incr pos;
133 while !pos < len && s.[!pos] >= '0' && s.[!pos] <= '9' do incr pos done
134 end;
135 if !pos < len && (s.[!pos] = 'e' || s.[!pos] = 'E') then begin
136 incr pos;
137 if !pos < len && (s.[!pos] = '+' || s.[!pos] = '-') then incr pos;
138 while !pos < len && s.[!pos] >= '0' && s.[!pos] <= '9' do incr pos done
139 end;
140 let num_str = String.sub s start (!pos - start) in
141 match float_of_string_opt num_str with
142 | Some f -> Ok (JNumber f)
143 | None -> Error "Invalid number"
144
145 and parse_true () =
146 if !pos + 4 <= len && String.sub s !pos 4 = "true" then
147 (pos := !pos + 4; Ok (JBool true))
148 else Error "Expected 'true'"
149
150 and parse_false () =
151 if !pos + 5 <= len && String.sub s !pos 5 = "false" then
152 (pos := !pos + 5; Ok (JBool false))
153 else Error "Expected 'false'"
154
155 and parse_null () =
156 if !pos + 4 <= len && String.sub s !pos 4 = "null" then
157 (pos := !pos + 4; Ok JNull)
158 else Error "Expected 'null'"
159 in
160
161 match parse_value () with
162 | Error e -> Error e
163 | Ok v ->
164 skip_ws ();
165 if !pos = len then Ok v
166 else Error "Unexpected content after JSON"
167
168(** Validate importmap structure *)
169type importmap_error =
170 | InvalidJSON of string
171 | EmptyKey of string (* property name where empty key was found *)
172 | NotObject of string (* property name that should be object but isn't *)
173 | NotString of string (* property name that should be string but isn't *)
174 | ForbiddenProperty of string
175 | SlashKeyWithoutSlashValue of string (* property name where slash key doesn't have slash value *)
176 | InvalidScopeKey (* scope key is not a valid URL *)
177 | InvalidScopeValue of string (* scope value is not a valid URL *)
178 | ScopeValueNotObject (* a value inside scopes is not a JSON object *)
179
180(** Check if a string looks like a valid URL-like specifier for importmaps *)
181let is_valid_url_like s =
182 if String.length s = 0 then false
183 else
184 (* Valid URL-like: starts with /, ./, ../, or has a scheme followed by :// or : *)
185 let starts_with_slash = s.[0] = '/' in
186 let starts_with_dot_slash = String.length s >= 2 && s.[0] = '.' && s.[1] = '/' in
187 let starts_with_dot_dot_slash = String.length s >= 3 && s.[0] = '.' && s.[1] = '.' && s.[2] = '/' in
188 let has_scheme =
189 match String.index_opt s ':' with
190 | None -> false
191 | Some pos when pos > 0 ->
192 (* Check that characters before : are valid scheme characters *)
193 let scheme = String.sub s 0 pos in
194 String.length scheme > 0 &&
195 String.for_all (fun c ->
196 (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') ||
197 (c >= '0' && c <= '9') || c = '+' || c = '-' || c = '.'
198 ) scheme
199 | _ -> false
200 in
201 starts_with_slash || starts_with_dot_slash || starts_with_dot_dot_slash || has_scheme
202
203let validate_importmap s =
204 match parse_json s with
205 | Error msg -> [InvalidJSON msg]
206 | Ok json ->
207 let errors = ref [] in
208 let add_error e = errors := e :: !errors in
209
210 (match json with
211 | JObject members ->
212 List.iter (fun (key, value) ->
213 (* Check for forbidden top-level properties *)
214 if key <> "imports" && key <> "scopes" && key <> "integrity" then
215 add_error (ForbiddenProperty key);
216
217 (* Check imports *)
218 if key = "imports" then begin
219 match value with
220 | JObject import_members ->
221 List.iter (fun (ikey, ivalue) ->
222 if ikey = "" then add_error (EmptyKey "imports");
223 (* Check slash-ending consistency *)
224 let key_ends_with_slash = String.length ikey > 0 && ikey.[String.length ikey - 1] = '/' in
225 match ivalue with
226 | JString v ->
227 if key_ends_with_slash then begin
228 let val_ends_with_slash = String.length v > 0 && v.[String.length v - 1] = '/' in
229 if not val_ends_with_slash then
230 add_error (SlashKeyWithoutSlashValue "imports")
231 end
232 | JNull -> () (* null is allowed *)
233 | _ -> add_error (NotString ("imports[" ^ ikey ^ "]"))
234 ) import_members
235 | _ -> add_error (NotObject "imports")
236 end;
237
238 (* Check scopes *)
239 if key = "scopes" then begin
240 match value with
241 | JObject scope_members ->
242 List.iter (fun (skey, svalue) ->
243 if skey = "" then add_error (EmptyKey "scopes");
244 (* Check that scope key is a valid URL *)
245 if skey <> "" && not (is_valid_url_like skey) then
246 add_error InvalidScopeKey;
247 match svalue with
248 | JObject scope_imports ->
249 List.iter (fun (sikey, sivalue) ->
250 if sikey = "" then add_error (EmptyKey ("scopes[" ^ skey ^ "]"));
251 match sivalue with
252 | JString v ->
253 (* Check that scope value is a valid URL *)
254 if not (is_valid_url_like v) then
255 add_error (InvalidScopeValue sikey)
256 | JNull -> ()
257 | _ -> add_error (NotString ("scopes[" ^ skey ^ "][" ^ sikey ^ "]"))
258 ) scope_imports
259 | _ -> add_error ScopeValueNotObject
260 ) scope_members
261 | _ -> add_error (NotObject "scopes")
262 end
263 ) members
264 | _ -> add_error (NotObject "root"));
265
266 List.rev !errors
267
268let start_element state ~element _collector =
269 match element.Element.tag with
270 | Tag.Html `Script ->
271 (* Check if type="importmap" *)
272 let type_attr = List.find_opt (fun (n, _) ->
273 Astring.String.Ascii.lowercase n = "type"
274 ) element.raw_attrs in
275 (match type_attr with
276 | Some (_, v) when Astring.String.Ascii.lowercase v = "importmap" ->
277 state.in_importmap <- true;
278 Buffer.clear state.content
279 | _ -> ())
280 | _ -> () (* Only script elements can be importmaps *)
281
282let error_to_typed = function
283 | InvalidJSON _ -> `Importmap `Invalid_json
284 | EmptyKey _ -> `Importmap `Empty_key
285 | NotObject prop when prop = "root" -> `Importmap `Invalid_root
286 | NotObject prop when prop = "imports" -> `Importmap `Imports_not_object
287 | NotObject _ -> `Importmap `Scopes_not_object (* scopes *)
288 | NotString _ -> `Importmap `Non_string_value
289 | ForbiddenProperty _ -> `Importmap `Invalid_root
290 | SlashKeyWithoutSlashValue _ -> `Importmap `Key_trailing_slash
291 | InvalidScopeKey -> `Importmap `Scopes_invalid_url
292 | InvalidScopeValue _ -> `Importmap `Scopes_value_invalid_url
293 | ScopeValueNotObject -> `Importmap `Scopes_values_not_object
294
295let end_element state ~tag collector =
296 match tag with
297 | Tag.Html `Script when state.in_importmap ->
298 let content = Buffer.contents state.content in
299 let errors = validate_importmap content in
300 List.iter (fun err ->
301 Message_collector.add_typed collector (error_to_typed err)
302 ) errors;
303 state.in_importmap <- false
304 | _ -> ()
305
306let characters state text _collector =
307 if state.in_importmap then
308 Buffer.add_string state.content text
309
310let checker = Checker.make ~create ~reset ~start_element ~end_element
311 ~characters ()