OCaml HTML5 parser/serialiser based on Python's JustHTML
1(* CSS selector parser *)
2
3open Selector_ast
4open Selector_token
5
6(* Re-use the Selector_error exception from the lexer for consistency *)
7let raise_error code = raise (Selector_lexer.Selector_error code)
8
9type t = {
10 tokens : Selector_token.t list;
11 mutable pos : int;
12}
13
14let create tokens = { tokens; pos = 0 }
15
16let peek t =
17 if t.pos < List.length t.tokens then
18 List.nth t.tokens t.pos
19 else EOF
20
21let advance t =
22 if t.pos < List.length t.tokens then
23 t.pos <- t.pos + 1
24
25let consume t =
26 let tok = peek t in
27 advance t;
28 tok
29
30let expect t expected =
31 let tok = peek t in
32 if tok <> expected then
33 raise_error (match expected with EOF -> Selector_error_code.Expected_end_of_selector | _ -> Selector_error_code.Unexpected_token)
34 else
35 advance t
36
37let parse_simple_selector t =
38 match peek t with
39 | Tag name ->
40 advance t;
41 Some (make_simple Type_tag ~name ())
42 | Universal ->
43 advance t;
44 Some (make_simple Type_universal ())
45 | Id name ->
46 advance t;
47 Some (make_simple Type_id ~name ())
48 | Class name ->
49 advance t;
50 Some (make_simple Type_class ~name ())
51 | Attr_start ->
52 advance t;
53 let attr_name = match peek t with
54 | Tag name -> advance t; name
55 | _ -> raise_error Selector_error_code.Expected_attribute_name
56 in
57 (match peek t with
58 | Attr_end ->
59 advance t;
60 Some (make_simple Type_attr ~name:attr_name ())
61 | Attr_op op ->
62 advance t;
63 let value = match peek t with
64 | String v -> advance t; v
65 | _ -> raise_error Selector_error_code.Expected_attribute_value
66 in
67 (match peek t with
68 | Attr_end -> advance t
69 | _ -> raise_error Selector_error_code.Expected_closing_bracket);
70 Some (make_simple Type_attr ~name:attr_name ~operator:op ~value ())
71 | _ -> raise_error Selector_error_code.Expected_closing_bracket_or_operator)
72 | Colon ->
73 advance t;
74 let name = match peek t with
75 | Tag n -> advance t; n
76 | _ -> raise_error Selector_error_code.Expected_pseudo_class_name
77 in
78 let arg = match peek t with
79 | Paren_open ->
80 advance t;
81 let a = match peek t with
82 | String s -> advance t; Some s
83 | Paren_close -> None
84 | _ -> None
85 in
86 (match peek t with
87 | Paren_close -> advance t
88 | _ -> raise_error Selector_error_code.Expected_closing_paren);
89 a
90 | _ -> None
91 in
92 Some (make_simple Type_pseudo ~name ?arg ())
93 | _ -> None
94
95let parse_compound_selector t =
96 let rec loop acc =
97 match parse_simple_selector t with
98 | Some s -> loop (s :: acc)
99 | None -> acc
100 in
101 let selectors = List.rev (loop []) in
102 if selectors = [] then None
103 else Some (make_compound selectors)
104
105let parse_complex_selector t =
106 match parse_compound_selector t with
107 | None -> None
108 | Some first ->
109 let parts = ref [(None, first)] in
110 let rec loop () =
111 match peek t with
112 | Combinator comb ->
113 advance t;
114 (match parse_compound_selector t with
115 | None -> raise_error Selector_error_code.Expected_selector_after_combinator
116 | Some compound ->
117 parts := (Some comb, compound) :: !parts;
118 loop ())
119 | _ -> ()
120 in
121 loop ();
122 Some (make_complex (List.rev !parts))
123
124let parse tokens =
125 let t = create tokens in
126 let rec loop acc =
127 match parse_complex_selector t with
128 | None -> acc
129 | Some sel ->
130 (match peek t with
131 | Comma ->
132 advance t;
133 loop (sel :: acc)
134 | EOF -> sel :: acc
135 | _ -> raise_error Selector_error_code.Unexpected_token)
136 in
137 let selectors = List.rev (loop []) in
138 (match peek t with
139 | EOF -> ()
140 | _ -> raise_error Selector_error_code.Expected_end_of_selector);
141 match selectors with
142 | [] -> raise_error Selector_error_code.Empty_selector
143 | [sel] -> Complex sel
144 | sels -> List (make_list sels)
145
146let parse_selector input =
147 if String.trim input = "" then
148 raise_error Selector_error_code.Empty_selector;
149 let tokens = Selector_lexer.tokenize input in
150 parse tokens