···11-(* Quick analysis: find failing test files and print their content *)
22-33-let tests_dir = "validator/tests"
44-55-type expected_outcome = Valid | Invalid | HasWarning | Unknown
66-77-let parse_outcome filename =
88- (* Check .html *)
99- if String.length filename > 13 && String.sub filename (String.length filename - 13) 13 = "-isvalid.html" then Valid
1010- else if String.length filename > 13 && String.sub filename (String.length filename - 13) 13 = "-novalid.html" then Invalid
1111- else if String.length filename > 13 && String.sub filename (String.length filename - 13) 13 = "-haswarn.html" then HasWarning
1212- (* Check .xhtml *)
1313- else if String.length filename > 14 && String.sub filename (String.length filename - 14) 14 = "-isvalid.xhtml" then Valid
1414- else if String.length filename > 14 && String.sub filename (String.length filename - 14) 14 = "-novalid.xhtml" then Invalid
1515- else if String.length filename > 14 && String.sub filename (String.length filename - 14) 14 = "-haswarn.xhtml" then HasWarning
1616- else Unknown
1717-1818-let rec find_files dir =
1919- let entries = Sys.readdir dir |> Array.to_list in
2020- List.concat_map (fun entry ->
2121- let path = Filename.concat dir entry in
2222- if Sys.is_directory path then find_files path
2323- else if parse_outcome (Filename.basename path) <> Unknown then [path]
2424- else []
2525- ) entries
2626-2727-let () =
2828- let mode = if Array.length Sys.argv > 1 then Sys.argv.(1) else "novalid" in
2929- let files = find_files tests_dir in
3030- let count = ref 0 in
3131-3232- List.iter (fun path ->
3333- let outcome = parse_outcome (Filename.basename path) in
3434- let ic = open_in path in
3535- let content = really_input_string ic (in_channel_length ic) in
3636- close_in ic;
3737-3838- let reader = Bytesrw.Bytes.Reader.of_string content in
3939- let result = Htmlrw_check.check ~collect_parse_errors:true reader in
4040- let errors = Htmlrw_check.errors result in
4141- let warnings = Htmlrw_check.warnings result in
4242-4343- let should_print = match mode with
4444- | "isvalid" -> outcome = Valid && (errors <> [] || warnings <> []) && !count < 60
4545- | _ -> outcome = Invalid && errors = [] && !count < 60
4646- in
4747- if should_print then begin
4848- Printf.printf "\n=== %s ===\n" path;
4949- if mode = "isvalid" then begin
5050- if errors <> [] then begin
5151- Printf.printf "ERRORS:\n";
5252- List.iter (fun e -> Printf.printf " %s\n" e.Htmlrw_check.text) errors
5353- end;
5454- if warnings <> [] then begin
5555- Printf.printf "WARNINGS:\n";
5656- List.iter (fun w -> Printf.printf " %s\n" w.Htmlrw_check.text) warnings
5757- end
5858- end;
5959- print_endline content;
6060- incr count
6161- end
6262- ) files
-38
test/debug_check.ml
···11-let () =
22- let test_file = "validator/tests/html/attributes/lang/missing-lang-attribute-haswarn.html" in
33- let ic = open_in test_file in
44- let html = really_input_string ic (in_channel_length ic) in
55- close_in ic;
66- let reader = Bytesrw.Bytes.Reader.of_string html in
77- let doc = Html5rw.parse ~collect_errors:true reader in
88- let root = Html5rw.root doc in
99- print_endline "=== DOM Structure (with namespaces) ===";
1010- let rec print_node indent (node : Html5rw.Dom.node) =
1111- let open Html5rw.Dom in
1212- match node.name with
1313- | "#text" -> ()
1414- | "#document" | "#document-fragment" ->
1515- Printf.printf "%s%s\n" indent node.name;
1616- List.iter (print_node (indent ^ " ")) node.children
1717- | "!doctype" -> ()
1818- | "#comment" -> ()
1919- | _ ->
2020- let ns = match node.namespace with Some ns -> ns | None -> "none" in
2121- Printf.printf "%s<%s ns=%s>\n" indent node.name ns;
2222- List.iter (fun (k, v) ->
2323- if k = "foo" then Printf.printf "%s @%s=%s\n" indent k v
2424- ) node.attrs;
2525- List.iter (print_node (indent ^ " ")) node.children
2626- in
2727- print_node "" root;
2828- print_endline "\n=== Checking... ===";
2929- let reader2 = Bytesrw.Bytes.Reader.of_string html in
3030- let result = Htmlrw_check.check ~collect_parse_errors:true ~system_id:test_file reader2 in
3131- let errors = Htmlrw_check.errors result in
3232- let warnings = Htmlrw_check.warnings result in
3333- print_endline "=== Errors ===";
3434- List.iter (fun e -> print_endline e.Htmlrw_check.text) errors;
3535- print_endline "\n=== Warnings ===";
3636- List.iter (fun e -> print_endline e.Htmlrw_check.text) warnings;
3737- print_endline "\n=== Expected ===";
3838- print_endline "Consider adding a \xe2\x80\x9clang\xe2\x80\x9d attribute to the \xe2\x80\x9chtml\xe2\x80\x9d start tag to declare the language of this document."
-41
test/debug_validator.ml
···11-(** Debug utility for testing individual HTML files against the validator *)
22-33-let () =
44- if Array.length Sys.argv < 2 then begin
55- Printf.printf "Usage: debug_validator <html-file>\n";
66- exit 1
77- end;
88-99- let path = Sys.argv.(1) in
1010- let ic = open_in path in
1111- let content = really_input_string ic (in_channel_length ic) in
1212- close_in ic;
1313-1414- Printf.printf "=== Checking: %s ===\n\n" path;
1515- Printf.printf "Input (%d bytes):\n%s\n\n" (String.length content) content;
1616-1717- let reader = Bytesrw.Bytes.Reader.of_string content in
1818- let result = Htmlrw_check.check ~collect_parse_errors:true ~system_id:path reader in
1919-2020- let errors = Htmlrw_check.errors result in
2121- let warnings = Htmlrw_check.warnings result in
2222-2323- Printf.printf "=== Results ===\n";
2424- Printf.printf "Errors: %d\n" (List.length errors);
2525- List.iter (fun msg ->
2626- Printf.printf " [ERROR] %s\n" msg.Htmlrw_check.text;
2727- (match msg.Htmlrw_check.location with
2828- | Some loc -> Printf.printf " at line %d, col %d\n" loc.line loc.column
2929- | None -> ())
3030- ) errors;
3131-3232- Printf.printf "Warnings: %d\n" (List.length warnings);
3333- List.iter (fun msg ->
3434- Printf.printf " [WARN] %s\n" msg.Htmlrw_check.text;
3535- (match msg.Htmlrw_check.location with
3636- | Some loc -> Printf.printf " at line %d, col %d\n" loc.line loc.column
3737- | None -> ())
3838- ) warnings;
3939-4040- Printf.printf "\n=== Formatted Output ===\n";
4141- Printf.printf "%s\n" (Htmlrw_check.to_text result)
···11-let () =
22- let content = In_channel.with_open_text "validator/tests/html-svg/struct-cond-02-t-haswarn.html" (fun ic ->
33- In_channel.input_all ic
44- ) in
55- let reader = Bytesrw.Bytes.Reader.of_string content in
66- let result = Htmlrw_check.check ~system_id:"test.html" reader in
77- let warnings = Htmlrw_check.warnings result in
88- Printf.printf "Total warnings: %d\n" (List.length warnings);
99- List.iter (fun msg ->
1010- Printf.printf "WARNING: %s\n" msg.Htmlrw_check.text
1111- ) warnings