OCaml HTML5 parser/serialiser based on Python's JustHTML
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at main 66 lines 1.8 kB view raw
1(* HTML5 token types *) 2 3type tag_kind = Start | End 4 5type doctype = { 6 name : string option; 7 public_id : string option; 8 system_id : string option; 9 force_quirks : bool; 10} 11 12type tag = { 13 kind : tag_kind; 14 name : string; 15 attrs : (string * string) list; 16 self_closing : bool; 17} 18 19type t = 20 | Tag of tag 21 | Character of string 22 | Comment of string 23 | Doctype of doctype 24 | EOF 25 26let make_start_tag name attrs self_closing = 27 Tag { kind = Start; name; attrs; self_closing } 28 29let make_end_tag name = 30 Tag { kind = End; name; attrs = []; self_closing = false } 31 32let make_doctype ?name ?public_id ?system_id ?(force_quirks=false) () = 33 Doctype { name; public_id; system_id; force_quirks } 34 35let make_comment data = Comment data 36 37let make_character data = Character data 38 39let eof = EOF 40 41(* Pretty printers *) 42let pp_tag_kind fmt = function 43 | Start -> Format.pp_print_string fmt "Start" 44 | End -> Format.pp_print_string fmt "End" 45 46let pp_doctype fmt (d : doctype) = 47 Format.fprintf fmt "DOCTYPE{name=%a; public_id=%a; system_id=%a; force_quirks=%b}" 48 (Format.pp_print_option Format.pp_print_string) d.name 49 (Format.pp_print_option Format.pp_print_string) d.public_id 50 (Format.pp_print_option Format.pp_print_string) d.system_id 51 d.force_quirks 52 53let pp_tag fmt (t : tag) = 54 Format.fprintf fmt "<%s%s" 55 (match t.kind with Start -> "" | End -> "/") 56 t.name; 57 List.iter (fun (k, v) -> Format.fprintf fmt " %s=%S" k v) t.attrs; 58 if t.self_closing then Format.pp_print_string fmt " /"; 59 Format.pp_print_char fmt '>' 60 61let pp fmt = function 62 | Tag t -> pp_tag fmt t 63 | Character s -> Format.fprintf fmt "Character %S" s 64 | Comment s -> Format.fprintf fmt "Comment %S" s 65 | Doctype d -> pp_doctype fmt d 66 | EOF -> Format.pp_print_string fmt "EOF"