(* HTML5 token types *) type tag_kind = Start | End type doctype = { name : string option; public_id : string option; system_id : string option; force_quirks : bool; } type tag = { kind : tag_kind; name : string; attrs : (string * string) list; self_closing : bool; } type t = | Tag of tag | Character of string | Comment of string | Doctype of doctype | EOF let make_start_tag name attrs self_closing = Tag { kind = Start; name; attrs; self_closing } let make_end_tag name = Tag { kind = End; name; attrs = []; self_closing = false } let make_doctype ?name ?public_id ?system_id ?(force_quirks=false) () = Doctype { name; public_id; system_id; force_quirks } let make_comment data = Comment data let make_character data = Character data let eof = EOF (* Pretty printers *) let pp_tag_kind fmt = function | Start -> Format.pp_print_string fmt "Start" | End -> Format.pp_print_string fmt "End" let pp_doctype fmt (d : doctype) = Format.fprintf fmt "DOCTYPE{name=%a; public_id=%a; system_id=%a; force_quirks=%b}" (Format.pp_print_option Format.pp_print_string) d.name (Format.pp_print_option Format.pp_print_string) d.public_id (Format.pp_print_option Format.pp_print_string) d.system_id d.force_quirks let pp_tag fmt (t : tag) = Format.fprintf fmt "<%s%s" (match t.kind with Start -> "" | End -> "/") t.name; List.iter (fun (k, v) -> Format.fprintf fmt " %s=%S" k v) t.attrs; if t.self_closing then Format.pp_print_string fmt " /"; Format.pp_print_char fmt '>' let pp fmt = function | Tag t -> pp_tag fmt t | Character s -> Format.fprintf fmt "Character %S" s | Comment s -> Format.fprintf fmt "Comment %S" s | Doctype d -> pp_doctype fmt d | EOF -> Format.pp_print_string fmt "EOF"