(*--------------------------------------------------------------------------- Copyright (c) 2025 Anil Madhavapeddy SPDX-License-Identifier: MIT ---------------------------------------------------------------------------*) (** Browser-based test runner for langdetect. This module runs regression tests in the browser and displays results in the DOM. It demonstrates language detection across multiple languages. *) open Brr (** Test case definition *) type test_case = { name : string; text : string; expected : string; } (** Test results *) type test_result = { test : test_case; detected : string option; prob : float option; passed : bool; time_ms : float; } (** Sample texts from the native test corpus *) let test_cases = [| (* Same corpus as test/test_langdetect.ml *) { name = "English"; text = "The quick brown fox jumps over the lazy dog. This is a sample of English text that should be detected correctly by the language detection algorithm. Language detection uses n-gram frequency analysis to determine the most likely language of a given text sample."; expected = "en" }; { name = "Chinese"; text = "看官,現今我們中國四萬萬同胞欲內免專制、外杜瓜分的一個絕大轉機、絕大遭際,不是那預備立憲一事麼?但那立憲上加了這麼預備兩個字的活動考語,我就深恐將來這瘟憲立不成,必定嫁禍到我們同胞程度不齊上,以為卸罪地步。"; expected = "zh" }; { name = "Hebrew"; text = "זוהי דוגמה לטקסט בעברית שנועד לבדיקת זיהוי שפה. עברית היא שפה שמית שנכתבת מימין לשמאל. המערכת צריכה לזהות אותה כראוי על סמך התדירות של אותיות ותבניות אופייניות."; expected = "he" }; { name = "German"; text = "Dies ist ein Beispieltext auf Deutsch, der zur Spracherkennung verwendet wird. Die deutsche Sprache hat viele charakteristische Merkmale wie Umlaute und zusammengesetzte Wörter, die die Erkennung erleichtern."; expected = "de" }; { name = "French"; text = "Ceci est un exemple de texte en français pour tester la détection de langue. Le français est une langue romane avec des caractéristiques distinctives comme les accents et les conjugaisons verbales."; expected = "fr" }; { name = "Japanese"; text = "これは日本語のテキストです。日本語の言語検出をテストするためのサンプルです。日本語には漢字、ひらがな、カタカナの三種類の文字が使われています。"; expected = "ja" }; { name = "Russian"; text = "Это пример текста на русском языке для тестирования определения языка. Русский язык использует кириллический алфавит и имеет сложную грамматику с падежами и склонениями."; expected = "ru" }; { name = "Spanish"; text = "Este es un ejemplo de texto en español para probar la detección de idiomas. El español es una lengua romance hablada por millones de personas en todo el mundo."; expected = "es" }; { name = "Arabic"; text = "هذا مثال على نص باللغة العربية لاختبار اكتشاف اللغة. اللغة العربية هي لغة سامية تكتب من اليمين إلى اليسار."; expected = "ar" }; { name = "Korean"; text = "이것은 언어 감지를 테스트하기 위한 한국어 텍스트 예시입니다. 한국어는 한글이라는 독특한 문자 체계를 사용합니다."; expected = "ko" }; { name = "Portuguese"; text = "Este é um exemplo de texto em português para testar a detecção de idiomas. O português é uma língua românica falada em Portugal, Brasil e outros países."; expected = "pt" }; { name = "Italian"; text = "Questo è un esempio di testo in italiano per testare il rilevamento della lingua. L'italiano è una lingua romanza con una ricca storia letteraria."; expected = "it" }; { name = "Dutch"; text = "Dit is een voorbeeld van Nederlandse tekst voor het testen van taaldetectie. Nederlands wordt gesproken in Nederland en België en heeft veel overeenkomsten met Duits en Engels."; expected = "nl" }; { name = "Polish"; text = "To jest przykładowy tekst w języku polskim do testowania wykrywania języka. Polski jest językiem słowiańskim z bogatą historią literacką i skomplikowaną gramatyką."; expected = "pl" }; { name = "Turkish"; text = "Bu, dil algılama testleri için Türkçe örnek bir metindir. Türkçe, agglutinative bir dil yapısına sahip ve Latin alfabesi kullanmaktadır. Özel karakterler içerir."; expected = "tr" }; { name = "Swedish"; text = "Detta är en exempeltext på svenska för att testa språkdetektering. Svenska är ett nordiskt språk som talas i Sverige och Finland med karakteristiska vokaler."; expected = "sv" }; { name = "Vietnamese"; text = "Đây là một văn bản mẫu bằng tiếng Việt để kiểm tra phát hiện ngôn ngữ. Tiếng Việt sử dụng bảng chữ cái Latin với nhiều dấu thanh điệu đặc biệt."; expected = "vi" }; { name = "Thai"; text = "นี่คือข้อความตัวอย่างภาษาไทยสำหรับทดสอบการตรวจจับภาษา ภาษาไทยใช้อักษรไทย และมีระบบวรรณยุกต์ที่ซับซ้อน"; expected = "th" }; { name = "Hindi"; text = "यह भाषा पहचान परीक्षण के लिए हिंदी में एक नमूना पाठ है। हिंदी देवनागरी लिपि का उपयोग करती है और भारत की आधिकारिक भाषाओं में से एक है।"; expected = "hi" }; { name = "Finnish"; text = "Tämä on suomenkielinen esimerkkiteksti kielentunnistuksen testaamiseksi. Suomi on suomalais-ugrilainen kieli, jolla on monimutkainen taivutusjärjestelmä."; expected = "fi" }; |] (** Get current time in milliseconds *) let now_ms () = Jv.to_float (Jv.call (Jv.get Jv.global "performance") "now" [||]) (** Run a single test *) let run_test detector test = (* Set deterministic seed before EACH test, like native tests do *) Langdetect.set_random_seed detector 42; let start = now_ms () in let result = Langdetect.detect_with_prob detector test.text in let time_ms = now_ms () -. start in let detected, prob = match result with | Some (lang, p) -> Some lang, Some p | None -> None, None in (* Handle special case: zh matching zh-cn/zh-tw *) let lang_matches expected detected = if expected = "zh" then String.length detected >= 2 && String.sub detected 0 2 = "zh" else expected = detected in let passed = match detected with | Some lang -> lang_matches test.expected lang | None -> false in { test; detected; prob; passed; time_ms } (** Shared detector instance - created lazily on first use *) let shared_detector = lazy (Langdetect.create_default ()) (** Run all tests and return results *) let run_all_tests () = let detector = Lazy.force shared_detector in Array.map (run_test detector) test_cases (** Create a result row element *) let create_result_row result = let status_class = if result.passed then "pass" else "fail" in let status_text = if result.passed then "✓" else "✗" in let detected_text = match result.detected with | Some lang -> lang | None -> "(none)" in let prob_text = match result.prob with | Some p -> Printf.sprintf "%.1f%%" (p *. 100.0) | None -> "-" in let time_text = Printf.sprintf "%.1fms" result.time_ms in (* Truncate long text for display *) let display_text = let t = result.test.text in if String.length t > 60 then String.sub t 0 57 ^ "..." else t in let tr = El.tr [] in El.set_children tr [ El.td [El.txt' result.test.name]; El.td ~at:[At.class' (Jstr.v "corpus-text")] [El.txt' display_text]; El.td ~at:[At.class' (Jstr.v "code")] [El.txt' result.test.expected]; El.td ~at:[At.class' (Jstr.v "code")] [El.txt' detected_text]; El.td [El.txt' prob_text]; El.td [El.txt' time_text]; El.td ~at:[At.class' (Jstr.v status_class)] [El.txt' status_text]; ]; tr (** Create summary stats *) let create_summary results = let total = Array.length results in let passed = Array.fold_left (fun acc r -> if r.passed then acc + 1 else acc) 0 results in let failed = total - passed in let total_time = Array.fold_left (fun acc r -> acc +. r.time_ms) 0.0 results in let avg_time = total_time /. float_of_int total in El.div ~at:[At.class' (Jstr.v "summary")] [ El.h2 [El.txt' "Test Results"]; El.p [ El.strong [El.txt' (Printf.sprintf "%d/%d tests passed" passed total)]; El.txt' (Printf.sprintf " (%d failed)" failed); ]; El.p [ El.txt' (Printf.sprintf "Total time: %.1fms (avg %.2fms per test)" total_time avg_time); ]; ] (** Console error logging *) let console_error msg = ignore (Jv.call (Jv.get Jv.global "console") "error" [| Jv.of_string msg |]) let console_log msg = ignore (Jv.call (Jv.get Jv.global "console") "log" [| Jv.of_string msg |]) (** Main test runner *) let run_tests_ui () = console_log "[langdetect-tests] Starting test UI..."; try (* Find or create output container *) let container = match El.find_first_by_selector (Jstr.v "#test-results") ~root:(Document.body G.document) with | Some el -> console_log "[langdetect-tests] Found #test-results container"; el | None -> console_log "[langdetect-tests] Creating #test-results container"; let el = El.div ~at:[At.id (Jstr.v "test-results")] [] in El.append_children (Document.body G.document) [el]; el in (* Show loading message *) El.set_children container [ El.p [El.txt' "Running tests..."] ]; console_log "[langdetect-tests] Set loading message, scheduling test run..."; (* Run tests using JavaScript setTimeout *) let run_tests_callback () = console_log "[langdetect-tests] Callback executing..."; try console_log "[langdetect-tests] Running tests..."; let results = run_all_tests () in console_log (Printf.sprintf "[langdetect-tests] Tests complete: %d results" (Array.length results)); (* Build results table *) let thead = El.thead [ El.tr [ El.th [El.txt' "Language"]; El.th [El.txt' "Sample Text"]; El.th [El.txt' "Expected"]; El.th [El.txt' "Detected"]; El.th [El.txt' "Confidence"]; El.th [El.txt' "Time"]; El.th [El.txt' "Status"]; ] ] in let tbody = El.tbody [] in Array.iter (fun result -> El.append_children tbody [create_result_row result] ) results; let table = El.table ~at:[At.class' (Jstr.v "results-table")] [thead; tbody] in (* Update container *) El.set_children container [ create_summary results; table; ]; console_log "[langdetect-tests] UI updated with results" with exn -> console_error (Printf.sprintf "[langdetect-tests] Error running tests: %s" (Printexc.to_string exn)); El.set_children container [ El.p ~at:[At.style (Jstr.v "color: red")] [ El.txt' (Printf.sprintf "Error: %s" (Printexc.to_string exn)) ] ] in (* Use Brr's timer function *) console_log "[langdetect-tests] Scheduling tests with G.set_timeout..."; let _timer = G.set_timeout ~ms:200 run_tests_callback in console_log "[langdetect-tests] Timer scheduled"; () with exn -> console_error (Printf.sprintf "[langdetect-tests] Error in run_tests_ui: %s" (Printexc.to_string exn)) (** Interactive demo section *) let setup_demo () = console_log "[langdetect-tests] Setting up demo..."; try let demo_container = match El.find_first_by_selector (Jstr.v "#demo") ~root:(Document.body G.document) with | Some el -> console_log "[langdetect-tests] Found #demo container"; el | None -> console_log "[langdetect-tests] No #demo container, using body"; Document.body G.document in console_log "[langdetect-tests] Creating demo elements..."; let textarea = El.textarea ~at:[ At.id (Jstr.v "demo-input"); At.v (Jstr.v "rows") (Jstr.v "4"); At.v (Jstr.v "placeholder") (Jstr.v "Enter text to detect language..."); ] [] in let result_div = El.div ~at:[At.id (Jstr.v "demo-result")] [ El.txt' "Enter text above and click Detect" ] in let detect_button = El.button ~at:[At.id (Jstr.v "demo-button")] [El.txt' "Detect Language"] in console_log "[langdetect-tests] Created demo elements, setting up click handler..."; (* Set up click handler - detector is created lazily on first click *) ignore (Ev.listen Ev.click (fun _ -> let text = Jstr.to_string (El.prop El.Prop.value textarea) in if String.length text > 0 then begin let detector = Lazy.force shared_detector in let start = now_ms () in let results = Langdetect.detect detector text in let time_ms = now_ms () -. start in let result_html = match results with | [] -> [El.txt' "No language detected (text may be too short)"] | _ -> let items = List.map (fun (r : Langdetect.result) -> El.li [ El.strong [El.txt' r.lang]; El.txt' (Printf.sprintf " — %.1f%% confidence" (r.prob *. 100.0)) ] ) results in [ El.p [El.txt' (Printf.sprintf "Detected in %.1fms:" time_ms)]; El.ul items ] in El.set_children result_div result_html end ) (El.as_target detect_button)); console_log "[langdetect-tests] Click handler registered"; (* Add demo section to container *) let tag = Jstr.to_string (El.tag_name demo_container) in console_log (Printf.sprintf "[langdetect-tests] Container tag: %s" tag); El.set_children demo_container [ El.h2 [El.txt' "Try It"]; El.div ~at:[At.class' (Jstr.v "demo-area")] [ textarea; detect_button; result_div; ] ]; console_log "[langdetect-tests] Demo UI created" with exn -> console_error (Printf.sprintf "[langdetect-tests] Error in setup_demo: %s" (Printexc.to_string exn)) (** Entry point *) let () = (* Register global API for the interactive demo in test.html *) Langdetect_js.register_global_api (); (* Wait for DOM to be ready *) let ready_state = Jv.get (Jv.get Jv.global "document") "readyState" |> Jv.to_string in if ready_state = "loading" then ignore (Jv.call Jv.global "addEventListener" [| Jv.of_string "DOMContentLoaded"; Jv.callback ~arity:1 (fun _ -> run_tests_ui (); setup_demo () ) |]) else begin run_tests_ui (); setup_demo () end