···11+22+(* Word Graph
33+44+ Given a number of letters n and a file containing words (one per line),
55+ this program builds the undirected graph where
66+ - vertices are words of length n;
77+ - two words are connected with an edge if they differ by exactly one letter.
88+99+ Then the program computes and prints the components.
1010+1111+ Finally, it repeatedly queries a word from the user and displays
1212+ its component.
1313+*)
1414+1515+open Format
1616+open Graph
1717+module H = Hashtbl.Make(String)
1818+1919+module G = Imperative.Graph.Abstract(String)
2020+let g = G.create ()
2121+2222+let words : G.V.t H.t = H.create 16
2323+2424+let add_word w =
2525+ let v = G.V.create w in H.add words w v; G.add_vertex g v
2626+2727+let rec read_words n c =
2828+ match input_line c with
2929+ | s -> if String.length s = n then add_word s; read_words n c
3030+ | exception End_of_file -> ()
3131+3232+let () =
3333+ try match Sys.argv with [| _; n; f |] ->
3434+ let n = int_of_string n in
3535+ let c = open_in f in
3636+ read_words n c;
3737+ close_in c;
3838+ | _ -> raise Exit
3939+ with _ -> eprintf "%s <int> <file>@." Sys.argv.(0); exit 1
4040+4141+let () = printf "%d words@." (G.nb_vertex g)
4242+4343+let diff1 s1 s2 =
4444+ let n = String.length s1 in
4545+ assert (String.length s2 = n);
4646+ let rec scan d i =
4747+ i = n && d = 1 ||
4848+ i < n && if s1.[i] = s2.[i] then scan d (i+1) else d = 0 && scan 1 (i+1) in
4949+ scan 0 0
5050+5151+let () =
5252+ G.iter_vertex (fun v1 ->
5353+ G.iter_vertex (fun v2 ->
5454+ if diff1 (G.V.label v1) (G.V.label v2) then G.add_edge g v1 v2
5555+ ) g) g
5656+5757+let () = printf "%d edges@." (G.nb_edges g)
5858+5959+module C = Components.Undirected(G)
6060+let comp = C.components_array g
6161+6262+let histogram a =
6363+ let h = Hashtbl.create (Array.length a) in
6464+ let incr v =
6565+ Hashtbl.replace h v (1 + try Hashtbl.find h v with Not_found -> 0) in
6666+ Array.iter incr a;
6767+ let l = Hashtbl.fold (fun v n acc -> (v, n) :: acc) h [] in
6868+ List.sort (fun (v1, _) (v2, _) -> compare v1 v2) l
6969+7070+let () =
7171+ printf "%d components@." (Array.length comp);
7272+ Array.sort (fun l1 l2 -> Stdlib.compare (List.length l1) (List.length l2))
7373+ comp;
7474+ let print1 v = printf "@ %s" (G.V.label v) in
7575+ let print c =
7676+ printf "@[<hov 2>%d:" (List.length c); List.iter print1 c; printf "@]@." in
7777+ Array.iter print comp;
7878+ let hist = histogram (Array.map List.length comp) in
7979+ List.iter (fun (v, n) -> printf "%d component(s) of size %d@." n v) hist
8080+8181+module D = Traverse.Dfs(G)
8282+8383+let () =
8484+ while true do
8585+ printf "start: @?";
8686+ let s = read_line () in
8787+ try
8888+ let v = H.find words s in
8989+ printf "@[<hov 2>component:";
9090+ let m = ref 0 in
9191+ let visit v = printf "@ %s" (G.V.label v); incr m in
9292+ D.prefix_component visit g v;
9393+ printf "@]@.";
9494+ printf "%d word(s)@." !m
9595+ with Not_found ->
9696+ printf "not a vertex@."
9797+ done