(*--------------------------------------------------------------------------- Copyright (c) 2025 Anil Madhavapeddy . All rights reserved. SPDX-License-Identifier: ISC ---------------------------------------------------------------------------*) (** Browser-compatible test runner for html5rw regression tests. This module provides functions to run html5lib conformance tests in the browser, receiving test data as strings and returning results as JavaScript-accessible objects. *) [@@@warning "-69-33"] (* Silence unused-field and unused-open warnings *) open Brr (* ============================================================ *) (* Test Result Types *) (* ============================================================ *) type test_result = { test_num : int; description : string; input : string; expected : string; actual : string; success : bool; } type file_result = { filename : string; test_type : string; passed_count : int; failed_count : int; tests : test_result list; } type suite_result = { name : string; total_passed : int; total_failed : int; files : file_result list; } (* ============================================================ *) (* Tree Construction Tests *) (* ============================================================ *) module TreeConstruction = struct type test_case = { input : string; expected_tree : string; expected_errors : string list; script_on : bool; fragment_context : string option; } let parse_test_case lines = let rec parse acc = function | [] -> acc | line :: rest when String.length line > 0 && line.[0] = '#' -> let section = String.trim line in let content, remaining = collect_section rest in parse ((section, content) :: acc) remaining | _ :: rest -> parse acc rest and collect_section lines = let rec loop acc = function | [] -> (List.rev acc, []) | line :: rest when String.length line > 0 && line.[0] = '#' -> (List.rev acc, line :: rest) | line :: rest -> loop (line :: acc) rest in loop [] lines in let sections = parse [] lines in let get_section name = match List.assoc_opt name sections with | Some lines -> String.concat "\n" lines | None -> "" in let data = get_section "#data" in let document = get_section "#document" in let errors_text = get_section "#errors" in let errors = String.split_on_char '\n' errors_text |> List.filter (fun s -> String.trim s <> "") in let script_on = List.mem_assoc "#script-on" sections in let fragment = if List.mem_assoc "#document-fragment" sections then Some (get_section "#document-fragment" |> String.trim) else None in { input = data; expected_tree = document; expected_errors = errors; script_on; fragment_context = fragment } let parse_dat_content content = let lines = String.split_on_char '\n' content in let rec split_tests current acc = function | [] -> if current = [] then List.rev acc else List.rev (List.rev current :: acc) | "" :: "#data" :: rest -> let new_acc = if current = [] then acc else (List.rev current :: acc) in split_tests ["#data"] new_acc rest | line :: rest -> split_tests (line :: current) acc rest in let test_groups = split_tests [] [] lines in List.filter_map (fun lines -> if List.exists (fun l -> l = "#data") lines then Some (parse_test_case lines) else None ) test_groups let strip_tree_prefix s = let lines = String.split_on_char '\n' s in let stripped = List.filter_map (fun line -> if String.length line >= 2 && String.sub line 0 2 = "| " then Some (String.sub line 2 (String.length line - 2)) else if String.trim line = "" then None else Some line ) lines in String.concat "\n" stripped let normalize_tree s = let lines = String.split_on_char '\n' s in let non_empty = List.filter (fun l -> String.trim l <> "") lines in String.concat "\n" non_empty let run_test test = try let result = match test.fragment_context with | Some ctx_str -> let (namespace, tag_name) = match String.split_on_char ' ' ctx_str with | [ns; tag] when ns = "svg" -> (Some "svg", tag) | [ns; tag] when ns = "math" -> (Some "mathml", tag) | [tag] -> (None, tag) | _ -> (None, ctx_str) in let context = Html5rw.Parser.make_fragment_context ~tag_name ~namespace () in let reader = Bytesrw.Bytes.Reader.of_string test.input in Html5rw.Parser.parse ~collect_errors:true ~fragment_context:context reader | None -> let reader = Bytesrw.Bytes.Reader.of_string test.input in Html5rw.Parser.parse ~collect_errors:true reader in let actual_tree = Html5rw.Dom.to_test_format (Html5rw.Parser.root result) in let expected = normalize_tree (strip_tree_prefix test.expected_tree) in let actual = normalize_tree (strip_tree_prefix actual_tree) in (expected = actual, expected, actual) with e -> let expected = normalize_tree (strip_tree_prefix test.expected_tree) in (false, expected, Printf.sprintf "EXCEPTION: %s" (Printexc.to_string e)) let run_content ~filename content = let tests = parse_dat_content content in let passed = ref 0 in let failed = ref 0 in let results = ref [] in List.iteri (fun i test -> if test.script_on then () else begin let (success, expected, actual) = run_test test in let description = let input_preview = String.sub test.input 0 (min 60 (String.length test.input)) in if test.fragment_context <> None then Printf.sprintf "Fragment (%s): %s" (Option.get test.fragment_context) input_preview else input_preview in let result = { test_num = i + 1; description; input = test.input; expected; actual; success; } in results := result :: !results; if success then incr passed else incr failed end ) tests; { filename; test_type = "Tree Construction"; passed_count = !passed; failed_count = !failed; tests = List.rev !results; } end (* ============================================================ *) (* Encoding Tests *) (* ============================================================ *) module EncodingTests = struct type test_case = { input : string; expected_encoding : string; } let normalize_encoding_name s = String.lowercase_ascii (String.trim s) let encoding_to_test_name = function | Html5rw.Encoding.Utf8 -> "utf-8" | Html5rw.Encoding.Utf16le -> "utf-16le" | Html5rw.Encoding.Utf16be -> "utf-16be" | Html5rw.Encoding.Windows_1252 -> "windows-1252" | Html5rw.Encoding.Iso_8859_2 -> "iso-8859-2" | Html5rw.Encoding.Euc_jp -> "euc-jp" let parse_test_case lines = let rec parse acc = function | [] -> acc | line :: rest when String.length line > 0 && line.[0] = '#' -> let section = String.trim line in let content, remaining = collect_section rest in parse ((section, content) :: acc) remaining | _ :: rest -> parse acc rest and collect_section lines = let rec loop acc = function | [] -> (List.rev acc, []) | line :: rest when String.length line > 0 && line.[0] = '#' -> (List.rev acc, line :: rest) | line :: rest -> loop (line :: acc) rest in loop [] lines in let sections = parse [] lines in let get_section name = match List.assoc_opt name sections with | Some lines -> String.concat "\n" lines | None -> "" in let data = get_section "#data" in let encoding = get_section "#encoding" in { input = data; expected_encoding = String.trim encoding } let parse_dat_content content = let lines = String.split_on_char '\n' content in let rec split_tests current acc = function | [] -> if current = [] then List.rev acc else List.rev (List.rev current :: acc) | "" :: "#data" :: rest -> let new_acc = if current = [] then acc else (List.rev current :: acc) in split_tests ["#data"] new_acc rest | line :: rest -> split_tests (line :: current) acc rest in let test_groups = split_tests [] [] lines in List.filter_map (fun lines -> if List.exists (fun l -> l = "#data") lines then Some (parse_test_case lines) else None ) test_groups let run_test test = try let (_, detected_encoding) = Html5rw.Encoding.decode (Bytes.of_string test.input) () in let detected_name = encoding_to_test_name detected_encoding in let expected_name = normalize_encoding_name test.expected_encoding in let match_encoding det exp = det = exp || (det = "windows-1252" && (exp = "windows-1252" || exp = "cp1252" || exp = "iso-8859-1")) || (det = "iso-8859-2" && (exp = "iso-8859-2" || exp = "iso8859-2" || exp = "latin2")) || (det = "utf-8" && (exp = "utf-8" || exp = "utf8")) || (det = "euc-jp" && (exp = "euc-jp" || exp = "eucjp")) in (match_encoding detected_name expected_name, detected_name, expected_name) with e -> (false, Printf.sprintf "EXCEPTION: %s" (Printexc.to_string e), test.expected_encoding) let run_content ~filename content = let tests = parse_dat_content content in let passed = ref 0 in let failed = ref 0 in let results = ref [] in List.iteri (fun i test -> if String.trim test.expected_encoding = "" then () else begin let (success, detected, expected) = run_test test in let result = { test_num = i + 1; description = Printf.sprintf "Detect %s encoding" expected; input = String.escaped test.input; expected; actual = detected; success; } in results := result :: !results; if success then incr passed else incr failed end ) tests; { filename; test_type = "Encoding Detection"; passed_count = !passed; failed_count = !failed; tests = List.rev !results; } end (* ============================================================ *) (* JavaScript API *) (* ============================================================ *) let test_result_to_jv (r : test_result) = Jv.obj [| "testNum", Jv.of_int r.test_num; "description", Jv.of_string r.description; "input", Jv.of_string r.input; "expected", Jv.of_string r.expected; "actual", Jv.of_string r.actual; "success", Jv.of_bool r.success; |] let file_result_to_jv (r : file_result) = Jv.obj [| "filename", Jv.of_string r.filename; "testType", Jv.of_string r.test_type; "passedCount", Jv.of_int r.passed_count; "failedCount", Jv.of_int r.failed_count; "tests", Jv.of_list test_result_to_jv r.tests; |] let suite_result_to_jv (r : suite_result) = Jv.obj [| "name", Jv.of_string r.name; "totalPassed", Jv.of_int r.total_passed; "totalFailed", Jv.of_int r.total_failed; "files", Jv.of_list file_result_to_jv r.files; |] (** Run tree construction tests on a single file's content *) let run_tree_construction_test filename content = let result = TreeConstruction.run_content ~filename content in file_result_to_jv result (** Run encoding detection tests on a single file's content *) let run_encoding_test filename content = let result = EncodingTests.run_content ~filename content in file_result_to_jv result (** Run all tests from provided test data *) let run_all_tests (test_files : (string * string * string) list) = let tree_files = ref [] in let encoding_files = ref [] in let total_passed = ref 0 in let total_failed = ref 0 in List.iter (fun (test_type, filename, content) -> let result = match test_type with | "tree-construction" -> let r = TreeConstruction.run_content ~filename content in tree_files := r :: !tree_files; r | "encoding" -> let r = EncodingTests.run_content ~filename content in encoding_files := r :: !encoding_files; r | _ -> failwith ("Unknown test type: " ^ test_type) in total_passed := !total_passed + result.passed_count; total_failed := !total_failed + result.failed_count ) test_files; let all_files = List.rev !tree_files @ List.rev !encoding_files in let suite = { name = "HTML5lib Regression Tests"; total_passed = !total_passed; total_failed = !total_failed; files = all_files; } in suite_result_to_jv suite (* ============================================================ *) (* Simple Parser Test for Quick Validation *) (* ============================================================ *) let quick_parse_test html = try let reader = Bytesrw.Bytes.Reader.of_string html in let result = Html5rw.Parser.parse ~collect_errors:true reader in let root = Html5rw.Parser.root result in let serialized = Html5rw.Dom.to_html root in let errors = Html5rw.Parser.errors result in let error_to_string e = Format.asprintf "%a" Html5rw.pp_parse_error e in Jv.obj [| "success", Jv.of_bool true; "html", Jv.of_string serialized; "errorCount", Jv.of_int (List.length errors); "errors", Jv.of_list (fun e -> Jv.of_string (error_to_string e)) errors; |] with e -> Jv.obj [| "success", Jv.of_bool false; "error", Jv.of_string (Printexc.to_string e); |] (* ============================================================ *) (* Export to JavaScript *) (* ============================================================ *) let () = let html5rw_tests = Jv.obj [| "runTreeConstructionTest", Jv.callback ~arity:2 (fun filename content -> run_tree_construction_test (Jv.to_string filename) (Jv.to_string content)); "runEncodingTest", Jv.callback ~arity:2 (fun filename content -> run_encoding_test (Jv.to_string filename) (Jv.to_string content)); "runAllTests", Jv.callback ~arity:1 (fun files_jv -> let files = Jv.to_list (fun item -> let test_type = Jv.to_string (Jv.get item "type") in let filename = Jv.to_string (Jv.get item "filename") in let content = Jv.to_string (Jv.get item "content") in (test_type, filename, content) ) files_jv in run_all_tests files); "quickParseTest", Jv.callback ~arity:1 (fun html -> quick_parse_test (Jv.to_string html)); "version", Jv.of_string "1.0.0"; |] in Jv.set Jv.global "html5rwTests" html5rw_tests