(* HTML5 token types *)
type tag_kind = Start | End
type doctype = {
name : string option;
public_id : string option;
system_id : string option;
force_quirks : bool;
}
type tag = {
kind : tag_kind;
name : string;
attrs : (string * string) list;
self_closing : bool;
}
type t =
| Tag of tag
| Character of string
| Comment of string
| Doctype of doctype
| EOF
let make_start_tag name attrs self_closing =
Tag { kind = Start; name; attrs; self_closing }
let make_end_tag name =
Tag { kind = End; name; attrs = []; self_closing = false }
let make_doctype ?name ?public_id ?system_id ?(force_quirks=false) () =
Doctype { name; public_id; system_id; force_quirks }
let make_comment data = Comment data
let make_character data = Character data
let eof = EOF
(* Pretty printers *)
let pp_tag_kind fmt = function
| Start -> Format.pp_print_string fmt "Start"
| End -> Format.pp_print_string fmt "End"
let pp_doctype fmt (d : doctype) =
Format.fprintf fmt "DOCTYPE{name=%a; public_id=%a; system_id=%a; force_quirks=%b}"
(Format.pp_print_option Format.pp_print_string) d.name
(Format.pp_print_option Format.pp_print_string) d.public_id
(Format.pp_print_option Format.pp_print_string) d.system_id
d.force_quirks
let pp_tag fmt (t : tag) =
Format.fprintf fmt "<%s%s"
(match t.kind with Start -> "" | End -> "/")
t.name;
List.iter (fun (k, v) -> Format.fprintf fmt " %s=%S" k v) t.attrs;
if t.self_closing then Format.pp_print_string fmt " /";
Format.pp_print_char fmt '>'
let pp fmt = function
| Tag t -> pp_tag fmt t
| Character s -> Format.fprintf fmt "Character %S" s
| Comment s -> Format.fprintf fmt "Comment %S" s
| Doctype d -> pp_doctype fmt d
| EOF -> Format.pp_print_string fmt "EOF"