OCaml HTML5 parser/serialiser based on Python's JustHTML
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at main 311 lines 11 kB view raw
1(** Importmap validation checker. 2 3 Validates that <script type="importmap"> elements contain valid JSON 4 and conform to importmap structural requirements. *) 5 6type state = { 7 mutable in_importmap : bool; 8 content : Buffer.t; 9} 10 11let create () = { 12 in_importmap = false; 13 content = Buffer.create 256; 14} 15 16let reset state = 17 state.in_importmap <- false; 18 Buffer.clear state.content 19 20(** Simple JSON value representation *) 21type json = 22 | JNull 23 | JBool of bool 24 | JNumber of float 25 | JString of string 26 | JArray of json list 27 | JObject of (string * json) list 28 29(** Simple JSON parser *) 30let parse_json s_orig = 31 let s = String.trim s_orig in 32 let len = String.length s in 33 if len = 0 then Error "Empty JSON" 34 else 35 let pos = ref 0 in 36 37 let skip_ws () = 38 while !pos < len && (s.[!pos] = ' ' || s.[!pos] = '\t' || s.[!pos] = '\n' || s.[!pos] = '\r') do 39 incr pos 40 done 41 in 42 43 let peek () = if !pos < len then Some s.[!pos] else None in 44 let consume () = let c = s.[!pos] in incr pos; c in 45 46 let rec parse_value () = 47 skip_ws (); 48 match peek () with 49 | None -> Error "Unexpected end of input" 50 | Some '{' -> parse_object () 51 | Some '[' -> parse_array () 52 | Some '"' -> parse_string () 53 | Some 't' -> parse_true () 54 | Some 'f' -> parse_false () 55 | Some 'n' -> parse_null () 56 | Some c when c = '-' || (c >= '0' && c <= '9') -> parse_number () 57 | Some _ -> Error "Unexpected character" 58 59 and parse_object () = 60 ignore (consume ()); (* consume { *) 61 skip_ws (); 62 match peek () with 63 | Some '}' -> ignore (consume ()); Ok (JObject []) 64 | _ -> 65 let rec parse_members acc = 66 skip_ws (); 67 match parse_string () with 68 | Error e -> Error e 69 | Ok (JString key) -> 70 skip_ws (); 71 (match peek () with 72 | Some ':' -> 73 ignore (consume ()); 74 (match parse_value () with 75 | Error e -> Error e 76 | Ok value -> 77 skip_ws (); 78 let acc' = (key, value) :: acc in 79 match peek () with 80 | Some ',' -> ignore (consume ()); parse_members acc' 81 | Some '}' -> ignore (consume ()); Ok (JObject (List.rev acc')) 82 | _ -> Error "Expected ',' or '}'") 83 | _ -> Error "Expected ':'") 84 | Ok _ -> Error "Expected string key" 85 in 86 parse_members [] 87 88 and parse_array () = 89 ignore (consume ()); (* consume [ *) 90 skip_ws (); 91 match peek () with 92 | Some ']' -> ignore (consume ()); Ok (JArray []) 93 | _ -> 94 let rec parse_elements acc = 95 match parse_value () with 96 | Error e -> Error e 97 | Ok value -> 98 skip_ws (); 99 let acc' = value :: acc in 100 match peek () with 101 | Some ',' -> ignore (consume ()); parse_elements acc' 102 | Some ']' -> ignore (consume ()); Ok (JArray (List.rev acc')) 103 | _ -> Error "Expected ',' or ']'" 104 in 105 parse_elements [] 106 107 and parse_string () = 108 skip_ws (); 109 match peek () with 110 | Some '"' -> 111 ignore (consume ()); 112 let buf = Buffer.create 32 in 113 let rec read () = 114 match peek () with 115 | None -> Error "Unterminated string" 116 | Some '"' -> ignore (consume ()); Ok (JString (Buffer.contents buf)) 117 | Some '\\' -> 118 ignore (consume ()); 119 (match peek () with 120 | None -> Error "Unterminated escape" 121 | Some c -> ignore (consume ()); Buffer.add_char buf c; read ()) 122 | Some c -> ignore (consume ()); Buffer.add_char buf c; read () 123 in 124 read () 125 | _ -> Error "Expected string" 126 127 and parse_number () = 128 let start = !pos in 129 if peek () = Some '-' then incr pos; 130 while !pos < len && s.[!pos] >= '0' && s.[!pos] <= '9' do incr pos done; 131 if !pos < len && s.[!pos] = '.' then begin 132 incr pos; 133 while !pos < len && s.[!pos] >= '0' && s.[!pos] <= '9' do incr pos done 134 end; 135 if !pos < len && (s.[!pos] = 'e' || s.[!pos] = 'E') then begin 136 incr pos; 137 if !pos < len && (s.[!pos] = '+' || s.[!pos] = '-') then incr pos; 138 while !pos < len && s.[!pos] >= '0' && s.[!pos] <= '9' do incr pos done 139 end; 140 let num_str = String.sub s start (!pos - start) in 141 match float_of_string_opt num_str with 142 | Some f -> Ok (JNumber f) 143 | None -> Error "Invalid number" 144 145 and parse_true () = 146 if !pos + 4 <= len && String.sub s !pos 4 = "true" then 147 (pos := !pos + 4; Ok (JBool true)) 148 else Error "Expected 'true'" 149 150 and parse_false () = 151 if !pos + 5 <= len && String.sub s !pos 5 = "false" then 152 (pos := !pos + 5; Ok (JBool false)) 153 else Error "Expected 'false'" 154 155 and parse_null () = 156 if !pos + 4 <= len && String.sub s !pos 4 = "null" then 157 (pos := !pos + 4; Ok JNull) 158 else Error "Expected 'null'" 159 in 160 161 match parse_value () with 162 | Error e -> Error e 163 | Ok v -> 164 skip_ws (); 165 if !pos = len then Ok v 166 else Error "Unexpected content after JSON" 167 168(** Validate importmap structure *) 169type importmap_error = 170 | InvalidJSON of string 171 | EmptyKey of string (* property name where empty key was found *) 172 | NotObject of string (* property name that should be object but isn't *) 173 | NotString of string (* property name that should be string but isn't *) 174 | ForbiddenProperty of string 175 | SlashKeyWithoutSlashValue of string (* property name where slash key doesn't have slash value *) 176 | InvalidScopeKey (* scope key is not a valid URL *) 177 | InvalidScopeValue of string (* scope value is not a valid URL *) 178 | ScopeValueNotObject (* a value inside scopes is not a JSON object *) 179 180(** Check if a string looks like a valid URL-like specifier for importmaps *) 181let is_valid_url_like s = 182 if String.length s = 0 then false 183 else 184 (* Valid URL-like: starts with /, ./, ../, or has a scheme followed by :// or : *) 185 let starts_with_slash = s.[0] = '/' in 186 let starts_with_dot_slash = String.length s >= 2 && s.[0] = '.' && s.[1] = '/' in 187 let starts_with_dot_dot_slash = String.length s >= 3 && s.[0] = '.' && s.[1] = '.' && s.[2] = '/' in 188 let has_scheme = 189 match String.index_opt s ':' with 190 | None -> false 191 | Some pos when pos > 0 -> 192 (* Check that characters before : are valid scheme characters *) 193 let scheme = String.sub s 0 pos in 194 String.length scheme > 0 && 195 String.for_all (fun c -> 196 (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || 197 (c >= '0' && c <= '9') || c = '+' || c = '-' || c = '.' 198 ) scheme 199 | _ -> false 200 in 201 starts_with_slash || starts_with_dot_slash || starts_with_dot_dot_slash || has_scheme 202 203let validate_importmap s = 204 match parse_json s with 205 | Error msg -> [InvalidJSON msg] 206 | Ok json -> 207 let errors = ref [] in 208 let add_error e = errors := e :: !errors in 209 210 (match json with 211 | JObject members -> 212 List.iter (fun (key, value) -> 213 (* Check for forbidden top-level properties *) 214 if key <> "imports" && key <> "scopes" && key <> "integrity" then 215 add_error (ForbiddenProperty key); 216 217 (* Check imports *) 218 if key = "imports" then begin 219 match value with 220 | JObject import_members -> 221 List.iter (fun (ikey, ivalue) -> 222 if ikey = "" then add_error (EmptyKey "imports"); 223 (* Check slash-ending consistency *) 224 let key_ends_with_slash = String.length ikey > 0 && ikey.[String.length ikey - 1] = '/' in 225 match ivalue with 226 | JString v -> 227 if key_ends_with_slash then begin 228 let val_ends_with_slash = String.length v > 0 && v.[String.length v - 1] = '/' in 229 if not val_ends_with_slash then 230 add_error (SlashKeyWithoutSlashValue "imports") 231 end 232 | JNull -> () (* null is allowed *) 233 | _ -> add_error (NotString ("imports[" ^ ikey ^ "]")) 234 ) import_members 235 | _ -> add_error (NotObject "imports") 236 end; 237 238 (* Check scopes *) 239 if key = "scopes" then begin 240 match value with 241 | JObject scope_members -> 242 List.iter (fun (skey, svalue) -> 243 if skey = "" then add_error (EmptyKey "scopes"); 244 (* Check that scope key is a valid URL *) 245 if skey <> "" && not (is_valid_url_like skey) then 246 add_error InvalidScopeKey; 247 match svalue with 248 | JObject scope_imports -> 249 List.iter (fun (sikey, sivalue) -> 250 if sikey = "" then add_error (EmptyKey ("scopes[" ^ skey ^ "]")); 251 match sivalue with 252 | JString v -> 253 (* Check that scope value is a valid URL *) 254 if not (is_valid_url_like v) then 255 add_error (InvalidScopeValue sikey) 256 | JNull -> () 257 | _ -> add_error (NotString ("scopes[" ^ skey ^ "][" ^ sikey ^ "]")) 258 ) scope_imports 259 | _ -> add_error ScopeValueNotObject 260 ) scope_members 261 | _ -> add_error (NotObject "scopes") 262 end 263 ) members 264 | _ -> add_error (NotObject "root")); 265 266 List.rev !errors 267 268let start_element state ~element _collector = 269 match element.Element.tag with 270 | Tag.Html `Script -> 271 (* Check if type="importmap" *) 272 let type_attr = List.find_opt (fun (n, _) -> 273 Astring.String.Ascii.lowercase n = "type" 274 ) element.raw_attrs in 275 (match type_attr with 276 | Some (_, v) when Astring.String.Ascii.lowercase v = "importmap" -> 277 state.in_importmap <- true; 278 Buffer.clear state.content 279 | _ -> ()) 280 | _ -> () (* Only script elements can be importmaps *) 281 282let error_to_typed = function 283 | InvalidJSON _ -> `Importmap `Invalid_json 284 | EmptyKey _ -> `Importmap `Empty_key 285 | NotObject prop when prop = "root" -> `Importmap `Invalid_root 286 | NotObject prop when prop = "imports" -> `Importmap `Imports_not_object 287 | NotObject _ -> `Importmap `Scopes_not_object (* scopes *) 288 | NotString _ -> `Importmap `Non_string_value 289 | ForbiddenProperty _ -> `Importmap `Invalid_root 290 | SlashKeyWithoutSlashValue _ -> `Importmap `Key_trailing_slash 291 | InvalidScopeKey -> `Importmap `Scopes_invalid_url 292 | InvalidScopeValue _ -> `Importmap `Scopes_value_invalid_url 293 | ScopeValueNotObject -> `Importmap `Scopes_values_not_object 294 295let end_element state ~tag collector = 296 match tag with 297 | Tag.Html `Script when state.in_importmap -> 298 let content = Buffer.contents state.content in 299 let errors = validate_importmap content in 300 List.iter (fun err -> 301 Message_collector.add_typed collector (error_to_typed err) 302 ) errors; 303 state.in_importmap <- false 304 | _ -> () 305 306let characters state text _collector = 307 if state.in_importmap then 308 Buffer.add_string state.content text 309 310let checker = Checker.make ~create ~reset ~start_element ~end_element 311 ~characters ()