(* CSS selector matching *) module Dom = Dom open Selector_ast (* Use Astring for string operations *) let lowercase = Astring.String.Ascii.lowercase let trim = Astring.String.trim let find_sub = Astring.String.find_sub let fields = Astring.String.fields (* Check if haystack contains needle as a substring *) let string_contains ~haystack ~needle = Option.is_some (find_sub ~sub:needle haystack) let is_element node = let name = node.Dom.name in name <> "#text" && name <> "#comment" && name <> "#document" && name <> "#document-fragment" && name <> "!doctype" let get_element_children node = List.filter is_element node.Dom.children let get_previous_sibling node = match node.Dom.parent with | None -> None | Some parent -> let rec find_prev prev = function | [] -> None | n :: rest -> if n == node then prev else if is_element n then find_prev (Some n) rest else find_prev prev rest in find_prev None parent.Dom.children let is_first_child node = match node.Dom.parent with | None -> false | Some parent -> match get_element_children parent with | first :: _ -> first == node | [] -> false let is_last_child node = match node.Dom.parent with | None -> false | Some parent -> match List.rev (get_element_children parent) with | last :: _ -> last == node | [] -> false let is_first_of_type node = match node.Dom.parent with | None -> false | Some parent -> let name = lowercase node.Dom.name in let rec find = function | [] -> false | n :: _ when lowercase n.Dom.name = name -> n == node | _ :: rest -> find rest in find (get_element_children parent) let is_last_of_type node = match node.Dom.parent with | None -> false | Some parent -> let name = lowercase node.Dom.name in let rec find last = function | [] -> (match last with Some l -> l == node | None -> false) | n :: rest when lowercase n.Dom.name = name -> find (Some n) rest | _ :: rest -> find last rest in find None (get_element_children parent) let get_index node = match node.Dom.parent with | None -> 0 | Some parent -> let children = get_element_children parent in let rec find idx = function | [] -> 0 | n :: _ when n == node -> idx | _ :: rest -> find (idx + 1) rest in find 1 children let get_type_index node = match node.Dom.parent with | None -> 0 | Some parent -> let name = lowercase node.Dom.name in let children = get_element_children parent in let rec find idx = function | [] -> 0 | n :: _ when n == node -> idx | n :: rest when lowercase n.Dom.name = name -> find (idx + 1) rest | _ :: rest -> find idx rest in find 1 children (* Parse nth expression: "odd", "even", "3", "2n+1", etc *) let parse_nth expr = let expr = lowercase (trim expr) in if expr = "odd" then Some (2, 1) else if expr = "even" then Some (2, 0) else let expr = String.concat "" (fields ~is_sep:(fun c -> c = ' ') expr) in if String.contains expr 'n' then let parts = String.split_on_char 'n' expr in match parts with | [a_part; b_part] -> let a = if a_part = "" || a_part = "+" then 1 else if a_part = "-" then -1 else int_of_string_opt a_part |> Option.value ~default:0 in let b = if b_part = "" then 0 else int_of_string_opt b_part |> Option.value ~default:0 in Some (a, b) | _ -> None else match int_of_string_opt expr with | Some n -> Some (0, n) | None -> None let matches_nth index a b = if a = 0 then index = b else let diff = index - b in if a > 0 then diff >= 0 && diff mod a = 0 else diff <= 0 && diff mod a = 0 let rec matches_simple node selector = if not (is_element node) then false else match selector.selector_type with | Type_universal -> true | Type_tag -> (match selector.name with | Some name -> lowercase node.Dom.name = lowercase name | None -> false) | Type_id -> (match selector.name with | Some id -> (match Dom.get_attr node "id" with | Some node_id -> node_id = id | None -> false) | None -> false) | Type_class -> (match selector.name with | Some cls -> (match Dom.get_attr node "class" with | Some class_attr -> let classes = fields ~is_sep:(fun c -> c = ' ') class_attr in List.mem cls classes | None -> false) | None -> false) | Type_attr -> (match selector.name with | Some attr_name -> let attr_name_lower = lowercase attr_name in let node_value = List.find_map (fun (k, v) -> if lowercase k = attr_name_lower then Some v else None ) node.Dom.attrs in (match node_value with | None -> false | Some _ when selector.operator = None -> true | Some attr_value -> let value = Option.value selector.value ~default:"" in (match selector.operator with | Some "=" -> attr_value = value | Some "~=" -> let words = fields ~is_sep:(fun c -> c = ' ') attr_value in List.mem value words | Some "|=" -> attr_value = value || String.length attr_value > String.length value && String.sub attr_value 0 (String.length value) = value && attr_value.[String.length value] = '-' | Some "^=" -> value <> "" && String.length attr_value >= String.length value && String.sub attr_value 0 (String.length value) = value | Some "$=" -> value <> "" && String.length attr_value >= String.length value && String.sub attr_value (String.length attr_value - String.length value) (String.length value) = value | Some "*=" -> value <> "" && string_contains ~haystack:attr_value ~needle:value | Some _ | None -> false)) | None -> false) | Type_pseudo -> (match selector.name with | Some "first-child" -> is_first_child node | Some "last-child" -> is_last_child node | Some "first-of-type" -> is_first_of_type node | Some "last-of-type" -> is_last_of_type node | Some "only-child" -> is_first_child node && is_last_child node | Some "only-of-type" -> is_first_of_type node && is_last_of_type node | Some "empty" -> not (List.exists (fun c -> is_element c || (c.Dom.name = "#text" && trim c.Dom.data <> "") ) node.Dom.children) | Some "root" -> (match node.Dom.parent with | Some p -> p.Dom.name = "#document" || p.Dom.name = "#document-fragment" | None -> false) | Some "nth-child" -> (match selector.arg with | Some arg -> (match parse_nth arg with | Some (a, b) -> matches_nth (get_index node) a b | None -> false) | None -> false) | Some "nth-of-type" -> (match selector.arg with | Some arg -> (match parse_nth arg with | Some (a, b) -> matches_nth (get_type_index node) a b | None -> false) | None -> false) | Some "not" -> (match selector.arg with | Some arg -> (try let inner = Selector_parser.parse_selector arg in not (matches_selector node inner) with _ -> true) | None -> true) | _ -> false) and matches_compound node (compound : Selector_ast.compound_selector) = List.for_all (matches_simple node) compound.selectors and matches_complex node complex = (* Match from right to left *) let parts = List.rev complex.parts in match parts with | [] -> false | (_, rightmost) :: rest -> if not (matches_compound node rightmost) then false else let rec check current remaining = match remaining with | [] -> true | (Some " ", compound) :: rest -> (* Descendant combinator *) let rec find_ancestor n = match n.Dom.parent with | None -> false | Some p -> if matches_compound p compound then check p rest else find_ancestor p in find_ancestor current | (Some ">", compound) :: rest -> (* Child combinator *) (match current.Dom.parent with | None -> false | Some p -> if matches_compound p compound then check p rest else false) | (Some "+", compound) :: rest -> (* Adjacent sibling *) (match get_previous_sibling current with | None -> false | Some sib -> if matches_compound sib compound then check sib rest else false) | (Some "~", compound) :: rest -> (* General sibling *) let rec find_sibling n = match get_previous_sibling n with | None -> false | Some sib -> if matches_compound sib compound then check sib rest else find_sibling sib in find_sibling current | (None, compound) :: rest -> if matches_compound current compound then check current rest else false | _ -> false in check node rest and matches_selector node selector = match selector with | Simple s -> matches_simple node s | Compound c -> matches_compound node c | Complex c -> matches_complex node c | List l -> List.exists (fun c -> matches_complex node c) l.selectors let matches node selector_string = try let selector = Selector_parser.parse_selector selector_string in matches_selector node selector with _ -> false let rec query_descendants node selector results = List.iter (fun child -> if is_element child && matches_selector child selector then results := child :: !results; query_descendants child selector results; (* Also search template content *) (match child.Dom.template_content with | Some tc -> query_descendants tc selector results | None -> ()) ) node.Dom.children let query root selector_string = try let selector = Selector_parser.parse_selector selector_string in let results = ref [] in query_descendants root selector results; List.rev !results with _ -> []