(*--------------------------------------------------------------------------- Copyright (c) 2025 Anil Madhavapeddy SPDX-License-Identifier: MIT ---------------------------------------------------------------------------*) let detect_language input_text = let detector = Langdetect.create_default () in let results = Langdetect.detect detector input_text in List.iter (fun (r : Langdetect.result) -> Printf.printf "%s %.4f\n" r.lang r.prob) results let read_all_stdin () = let buf = Buffer.create 4096 in try while true do Buffer.add_channel buf stdin 4096 done; Buffer.contents buf with End_of_file -> Buffer.contents buf let read_file path = let ic = open_in path in let n = in_channel_length ic in let s = really_input_string ic n in close_in ic; s let run file_opt = let text = match file_opt with | Some path -> read_file path | None -> read_all_stdin () in if String.length (String.trim text) = 0 then `Error (false, "No input text provided") else begin detect_language text; `Ok () end open Cmdliner let file_arg = let doc = "Input file to detect language from. If not provided, reads from stdin." in Arg.(value & pos 0 (some file) None & info [] ~docv:"FILE" ~doc) let cmd = let doc = "Detect the language of text" in let man = [ `S Manpage.s_description; `P "Detects the natural language of input text using n-gram frequency analysis."; `P "Outputs detected language codes and their probabilities as space-separated values, one per line, sorted by probability (highest first)."; `S Manpage.s_examples; `P "Detect language from a file:"; `Pre " langdetect document.txt"; `P "Detect language from stdin:"; `Pre " echo 'Hello world' | langdetect"; ] in let info = Cmd.info "langdetect" ~version:"%%VERSION%%" ~doc ~man in Cmd.v info Term.(ret (const run $ file_arg)) let () = exit (Cmd.eval cmd)