objective categorical abstract machine language personal data server
1open Hermes_cli
2
3let dune_file =
4 Printf.sprintf
5 {|(library
6 (name %s)
7 (libraries hermes yojson lwt)
8 (preprocess (pps hermes_ppx ppx_deriving_yojson)))|}
9
10(* recursively find all json files in a path (file or directory) *)
11let find_json_files path =
12 let rec aux acc p =
13 if Sys.is_directory p then
14 Sys.readdir p |> Array.to_list
15 |> List.map (Filename.concat p)
16 |> List.fold_left aux acc
17 else if Filename.check_suffix p ".json" then p :: acc
18 else acc
19 in
20 aux [] path
21
22let generate_index lexicons =
23 let nsids = List.map (fun lexicon -> lexicon.Lexicon_types.id) lexicons in
24 let trie = Naming.group_nsids_by_prefix nsids in
25 let rec build_index (trie : Naming.trie) index indent =
26 match trie with
27 | Node children ->
28 List.fold_left
29 (fun acc (key, child) ->
30 match (child : Naming.trie) with
31 | Module nsid ->
32 let module_name = Naming.flat_module_name_of_nsid nsid in
33 acc ^ indent
34 ^ Printf.sprintf "module %s = %s\n"
35 (String.capitalize_ascii key)
36 module_name
37 | Node _ ->
38 acc ^ indent
39 ^ Printf.sprintf "module %s = struct\n"
40 (String.capitalize_ascii key)
41 ^ build_index child index (indent ^ " ")
42 ^ indent ^ "end\n" )
43 index children
44 | _ ->
45 failwith "build_index called with invalid trie"
46 in
47 build_index (Node trie) "" ""
48
49(* generate module structure from lexicons *)
50let generate ~inputs ~output_dir ~module_name =
51 (* create output directory *)
52 if not (Sys.file_exists output_dir) then Sys.mkdir output_dir 0o755 ;
53 (* find all lexicon files from all inputs *)
54 let files = List.concat_map find_json_files inputs in
55 Printf.printf "Found %d lexicon files\n" (List.length files) ;
56 (* parse all files *)
57 let lexicons =
58 List.filter_map
59 (fun path ->
60 match Parser.parse_file path with
61 | Ok doc ->
62 Printf.printf " Parsed: %s\n" doc.Lexicon_types.id ;
63 Some doc
64 | Error e ->
65 Printf.eprintf " Error parsing %s: %s\n" path e ;
66 None )
67 files
68 in
69 Printf.printf "Successfully parsed %d lexicons\n" (List.length lexicons) ;
70 (* find file-level SCCs to detect cross-file cycles *)
71 let sccs = Scc.find_file_sccs lexicons in
72 Printf.printf "Found %d file-level SCCs\n" (List.length sccs) ;
73 (* track shared module index for unique naming *)
74 let shared_index = ref 0 in
75 (* generate each SCC *)
76 List.iter
77 (fun scc ->
78 match scc with
79 | [] ->
80 ()
81 | [doc] ->
82 (* single file, no cycle - generate normally *)
83 let code = Codegen.gen_lexicon_module doc in
84 let rel_path = Naming.file_path_of_nsid doc.Lexicon_types.id in
85 let full_path = Filename.concat output_dir rel_path in
86 let oc = open_out full_path in
87 output_string oc code ;
88 close_out oc ;
89 Printf.printf " Generated: %s\n" rel_path
90 | docs ->
91 (* multiple files forming a cycle - use shared module strategy *)
92 incr shared_index ;
93 let nsids = List.map (fun d -> d.Lexicon_types.id) docs in
94 Printf.printf " Cyclic lexicons: %s\n" (String.concat ", " nsids) ;
95 (* sort for consistent ordering *)
96 let sorted_docs =
97 List.sort
98 (fun a b -> String.compare a.Lexicon_types.id b.Lexicon_types.id)
99 docs
100 in
101 (* generate shared module with all types *)
102 let shared_module_name =
103 Naming.shared_module_name nsids !shared_index
104 in
105 let shared_file = Naming.shared_file_name nsids !shared_index in
106 let code = Codegen.gen_shared_module sorted_docs in
107 let full_path = Filename.concat output_dir shared_file in
108 let oc = open_out full_path in
109 output_string oc code ;
110 close_out oc ;
111 Printf.printf " Generated shared: %s\n" shared_file ;
112 (* generate re-export modules for each nsid *)
113 List.iter
114 (fun doc ->
115 let stub =
116 Codegen.gen_reexport_module ~shared_module_name
117 ~all_merged_docs:sorted_docs doc
118 in
119 let rel_path = Naming.file_path_of_nsid doc.Lexicon_types.id in
120 let full_path = Filename.concat output_dir rel_path in
121 let oc = open_out full_path in
122 output_string oc stub ;
123 close_out oc ;
124 Printf.printf " Generated: %s -> %s\n" rel_path
125 shared_module_name )
126 docs )
127 sccs ;
128 (* generate index file *)
129 let index_path =
130 Filename.concat output_dir (String.lowercase_ascii module_name ^ ".ml")
131 in
132 let oc = open_out index_path in
133 Printf.fprintf oc "(* %s - generated from atproto lexicons *)\n\n" module_name ;
134 (* export each lexicon as a module alias *)
135 Out_channel.output_string oc (generate_index lexicons) ;
136 close_out oc ;
137 Printf.printf "Generated index: %s\n" index_path ;
138 (* generate dune file *)
139 let dune_path = Filename.concat output_dir "dune" in
140 let oc = open_out dune_path in
141 Out_channel.output_string oc (dune_file (String.lowercase_ascii module_name)) ;
142 close_out oc ;
143 Printf.printf "Generated dune file\n" ;
144 Printf.printf "Done! Generated %d modules\n" (List.length lexicons)
145
146let inputs =
147 let doc = "lexicon files or directories to search recursively for JSON" in
148 Cmdliner.Arg.(non_empty & pos_all file [] & info [] ~docv:"INPUT" ~doc)
149
150let output_dir =
151 let doc = "output directory for generated code" in
152 Cmdliner.Arg.(
153 required & opt (some string) None & info ["o"; "output"] ~docv:"DIR" ~doc )
154
155let module_name =
156 let doc = "name of the generated module" in
157 Cmdliner.Arg.(
158 value
159 & opt string "Hermes_lexicons"
160 & info ["m"; "module-name"] ~docv:"NAME" ~doc )
161
162let generate_cmd =
163 let doc = "generate ocaml types from atproto lexicons" in
164 let info = Cmdliner.Cmd.info "generate" ~doc in
165 let generate' inputs output_dir module_name =
166 generate ~inputs ~output_dir ~module_name
167 in
168 Cmdliner.Cmd.v info
169 Cmdliner.Term.(const generate' $ inputs $ output_dir $ module_name)
170
171let main_cmd =
172 let doc = "hermes - atproto lexicon code generator" in
173 let info = Cmdliner.Cmd.info "hermes-cli" ~version:"0.1.0" ~doc in
174 Cmdliner.Cmd.group info [generate_cmd]
175
176let () = exit (Cmdliner.Cmd.eval main_cmd)