OCaml HTML5 parser/serialiser based on Python's JustHTML
1(* HTML5 token types *)
2
3type tag_kind = Start | End
4
5type doctype = {
6 name : string option;
7 public_id : string option;
8 system_id : string option;
9 force_quirks : bool;
10}
11
12type tag = {
13 kind : tag_kind;
14 name : string;
15 attrs : (string * string) list;
16 self_closing : bool;
17}
18
19type t =
20 | Tag of tag
21 | Character of string
22 | Comment of string
23 | Doctype of doctype
24 | EOF
25
26let make_start_tag name attrs self_closing =
27 Tag { kind = Start; name; attrs; self_closing }
28
29let make_end_tag name =
30 Tag { kind = End; name; attrs = []; self_closing = false }
31
32let make_doctype ?name ?public_id ?system_id ?(force_quirks=false) () =
33 Doctype { name; public_id; system_id; force_quirks }
34
35let make_comment data = Comment data
36
37let make_character data = Character data
38
39let eof = EOF
40
41(* Pretty printers *)
42let pp_tag_kind fmt = function
43 | Start -> Format.pp_print_string fmt "Start"
44 | End -> Format.pp_print_string fmt "End"
45
46let pp_doctype fmt (d : doctype) =
47 Format.fprintf fmt "DOCTYPE{name=%a; public_id=%a; system_id=%a; force_quirks=%b}"
48 (Format.pp_print_option Format.pp_print_string) d.name
49 (Format.pp_print_option Format.pp_print_string) d.public_id
50 (Format.pp_print_option Format.pp_print_string) d.system_id
51 d.force_quirks
52
53let pp_tag fmt (t : tag) =
54 Format.fprintf fmt "<%s%s"
55 (match t.kind with Start -> "" | End -> "/")
56 t.name;
57 List.iter (fun (k, v) -> Format.fprintf fmt " %s=%S" k v) t.attrs;
58 if t.self_closing then Format.pp_print_string fmt " /";
59 Format.pp_print_char fmt '>'
60
61let pp fmt = function
62 | Tag t -> pp_tag fmt t
63 | Character s -> Format.fprintf fmt "Character %S" s
64 | Comment s -> Format.fprintf fmt "Comment %S" s
65 | Doctype d -> pp_doctype fmt d
66 | EOF -> Format.pp_print_string fmt "EOF"