···77## What it looks like
8899```mlf
1010+/// Blog-style post record for the Bluesky feed.
1111+@const("revision", 3)
1212+self {}
1313+1014record post {
1115 text!: string constrained {
1216 maxLength: 3000,
···2125 parent!: com.atproto.repo.strongRef,
2226};
2327```
2828+2929+`self {}` represents the lexicon itself — docs on it map to the
3030+top-level `description`; `@const` / `@reference` annotations carry
3131+non-spec or vendor extension fields through to JSON.
24322533## Installation
2634
+3-2
codegen-plugins/mlf-codegen-go/src/lib.rs
···190190 Item::Query(_) | Item::Procedure(_) | Item::Subscription(_) => {
191191 // TODO: Generate client methods
192192 }
193193- Item::Use(_) => {
194194- // Skip use statements
193193+ Item::Use(_) | Item::SelfItem(_) => {
194194+ // Skip `use` statements and the `self { }` item — both
195195+ // are lexicon-level metadata, not types to emit.
195196 }
196197 }
197198 }
+3-2
codegen-plugins/mlf-codegen-rust/src/lib.rs
···216216 Item::Query(_) | Item::Procedure(_) | Item::Subscription(_) => {
217217 // TODO: Generate client methods
218218 }
219219- Item::Use(_) => {
220220- // Skip use statements
219219+ Item::Use(_) | Item::SelfItem(_) => {
220220+ // Skip `use` statements and the `self { }` item — both
221221+ // are lexicon-level metadata, not types to emit.
221222 }
222223 }
223224 }
+3-2
codegen-plugins/mlf-codegen-typescript/src/lib.rs
···162162 // TODO: Generate client methods for these
163163 // Annotation idea: @clientMethod for custom generation
164164 }
165165- Item::Use(_) => {
166166- // Skip use statements in output
165165+ Item::Use(_) | Item::SelfItem(_) => {
166166+ // Skip `use` statements and the `self { }` item — both
167167+ // are lexicon-level metadata, not types to emit.
167168 }
168169 }
169170 }
+6-2
mlf-cli/src/generate/lexicon.rs
···163163 continue;
164164 }
165165166166- let json_lexicon = mlf_codegen::generate_lexicon(&namespace, &lexicon, &workspace);
166166+ let output = mlf_codegen::generate_lexicon(&namespace, &lexicon, &workspace);
167167+168168+ for warning in &output.warnings {
169169+ eprintln!(" warning: {}: {}", warning.namespace, warning.message);
170170+ }
167171168172 let output_path = if flat {
169173 output_dir.join(format!("{}.json", namespace))
···180184 path
181185 };
182186183183- let json_str = serde_json::to_string_pretty(&json_lexicon).unwrap();
187187+ let json_str = serde_json::to_string_pretty(&output.json).unwrap();
184188 if let Err(source) = std::fs::write(&output_path, format!("{}\n", json_str)) {
185189 errors.push((output_path.display().to_string(), format!("Failed to write file: {}", source)));
186190 continue;
+263-36
mlf-cli/src/generate/mlf.rs
···228228pub fn generate_mlf_from_json(json: &Value) -> Result<MlfGenerateOutput, MlfGenerateError> {
229229 let mut output = String::new();
230230231231- // Extract NSID to get the last segment for "main" definitions
232231 let nsid = json
233232 .get("id")
234233 .and_then(|v| v.as_str())
···250249 warnings: RefCell::new(Vec::new()),
251250 };
252251253253- // Process all definitions
252252+ // Emit a `self {}` item when the source has a top-level description
253253+ // or any non-spec top-level field (`revision`, vendor `x-*`, etc.).
254254+ if let Some(self_mlf) = render_self_item(json, &ctx) {
255255+ output.push_str(&self_mlf);
256256+ output.push('\n');
257257+ }
258258+254259 for (name, def) in defs {
255260 let def_type = def.get("type").and_then(|v| v.as_str()).ok_or_else(|| {
256261 MlfGenerateError::InvalidLexicon {
···258263 }
259264 })?;
260265261261- match def_type {
262262- "record" => {
263263- let mlf = generate_record(name, def, &ctx)?;
264264- output.push_str(&mlf);
265265- output.push('\n');
266266- }
267267- "query" => {
268268- let mlf = generate_query(name, def, &ctx)?;
269269- output.push_str(&mlf);
270270- output.push('\n');
271271- }
272272- "procedure" => {
273273- let mlf = generate_procedure(name, def, &ctx)?;
274274- output.push_str(&mlf);
275275- output.push('\n');
276276- }
277277- "subscription" => {
278278- let mlf = generate_subscription(name, def, &ctx)?;
279279- output.push_str(&mlf);
280280- output.push('\n');
281281- }
282282- "token" => {
283283- let mlf = generate_token(name, def)?;
284284- output.push_str(&mlf);
285285- output.push('\n');
286286- }
287287- _ => {
288288- // All other types (object, string, array, union, etc.) are treated as def type
289289- let mlf = generate_def_type(name, def, &ctx)?;
290290- output.push_str(&mlf);
291291- output.push('\n');
292292- }
293293- }
266266+ let mlf = match def_type {
267267+ "record" => generate_record(name, def, &ctx)?,
268268+ "query" => generate_query(name, def, &ctx)?,
269269+ "procedure" => generate_procedure(name, def, &ctx)?,
270270+ "subscription" => generate_subscription(name, def, &ctx)?,
271271+ "token" => generate_token(name, def, &ctx)?,
272272+ t if is_known_def_type(t) => generate_def_type(name, def, &ctx)?,
273273+ // Unknown def type (e.g. `permission-set`): emit a
274274+ // placeholder def with `@const` annotations carrying every
275275+ // field, so the shape roundtrips byte-faithfully without
276276+ // requiring a grammar entry for every future spec type.
277277+ _ => render_unknown_def_passthrough(name, def, last_segment, &ctx),
278278+ };
279279+ output.push_str(&mlf);
280280+ output.push('\n');
294281 }
295282296283 Ok(MlfGenerateOutput {
···299286 })
300287}
301288289289+/// Spec-defined top-level fields we handle through dedicated paths.
290290+/// Anything else at the root becomes an `@const` on the emitted
291291+/// `self {}` item.
292292+const TOP_LEVEL_SPEC_FIELDS: &[&str] = &["lexicon", "id", "description", "defs", "$type"];
293293+294294+/// Def-kind identifiers we know how to render structurally. Anything
295295+/// else (e.g. `permission-set`) falls through to the unknown-def
296296+/// passthrough.
297297+const KNOWN_DEF_TYPES: &[&str] = &[
298298+ "record", "query", "procedure", "subscription", "token",
299299+ "object", "string", "integer", "boolean", "bytes", "blob",
300300+ "null", "unknown", "array", "union", "ref", "cid-link",
301301+];
302302+303303+fn is_known_def_type(type_name: &str) -> bool {
304304+ KNOWN_DEF_TYPES.contains(&type_name)
305305+}
306306+307307+/// Spec-defined fields on each def kind. Any other key on the def's
308308+/// JSON object becomes an `@const` annotation on the emitted item, so
309309+/// vendor extensions (`revision`, `x-*` flags, etc.) roundtrip
310310+/// byte-faithfully.
311311+const RECORD_SPEC_FIELDS: &[&str] = &["type", "description", "key", "record"];
312312+const QUERY_SPEC_FIELDS: &[&str] = &[
313313+ "type", "description", "parameters", "output", "errors",
314314+];
315315+const PROCEDURE_SPEC_FIELDS: &[&str] = &[
316316+ "type", "description", "parameters", "input", "output", "errors",
317317+];
318318+const SUBSCRIPTION_SPEC_FIELDS: &[&str] = &[
319319+ "type", "description", "parameters", "message", "errors",
320320+];
321321+const TOKEN_SPEC_FIELDS: &[&str] = &["type", "description"];
322322+323323+/// Spec-defined fields at the top of a def-type definition. Covers
324324+/// primitives (with their constraint keys), containers (array, object,
325325+/// union, ref) and unifies them all — anything outside this list on a
326326+/// def-type JSON object is treated as an extension.
327327+const DEF_TYPE_SPEC_FIELDS: &[&str] = &[
328328+ "type", "description",
329329+ // Constraint keys (mirror CONSTRAINT_KEYS).
330330+ "minLength", "maxLength", "minGraphemes", "maxGraphemes",
331331+ "minimum", "maximum", "format", "enum", "knownValues",
332332+ "accept", "maxSize", "default", "const",
333333+ // Container keys.
334334+ "items", "properties", "required", "nullable",
335335+ "refs", "closed", "ref",
336336+];
337337+338338+/// Build a `self {}` item from the top-level JSON, or `None` when
339339+/// there's nothing to emit (no description, no unknown fields). Docs
340340+/// come from top-level `description`; extension fields become `@const`
341341+/// annotations.
342342+fn render_self_item(json: &Value, ctx: &ConversionContext) -> Option<String> {
343343+ let obj = json.as_object()?;
344344+345345+ let description = obj.get("description").and_then(|v| v.as_str()).unwrap_or("");
346346+ let has_extension = obj
347347+ .keys()
348348+ .any(|k| !TOP_LEVEL_SPEC_FIELDS.contains(&k.as_str()));
349349+350350+ if description.is_empty() && !has_extension {
351351+ return None;
352352+ }
353353+354354+ let mut out = String::new();
355355+ for line in description.lines() {
356356+ out.push_str("/// ");
357357+ out.push_str(line);
358358+ out.push('\n');
359359+ }
360360+ for (key, value) in obj {
361361+ if TOP_LEVEL_SPEC_FIELDS.contains(&key.as_str()) {
362362+ continue;
363363+ }
364364+ warn_if_reference_shaped(ctx, key, value);
365365+ out.push_str(&format!(
366366+ "@const(\"{}\", {})\n",
367367+ escape_string_for_mlf(key),
368368+ render_json_as_mlf_literal(value)
369369+ ));
370370+ }
371371+ out.push_str("self {}\n");
372372+ Some(out)
373373+}
374374+375375+/// Heuristic: warn when a `@const` string value contains `#`, since that's
376376+/// the ATProto local-ref shape and the author may have intended `@reference`.
377377+/// The converter can't know intent from JSON, so it always emits `@const`;
378378+/// the warning nudges hand-review.
379379+fn warn_if_reference_shaped(ctx: &ConversionContext, key: &str, value: &Value) {
380380+ let Value::String(s) = value else { return };
381381+ if !s.contains('#') {
382382+ return;
383383+ }
384384+ ctx.warn(format!(
385385+ "extension field {:?} has value {:?} which looks NSID-shaped; \
386386+ emitted as `@const` — consider `@reference` if you intend workspace \
387387+ name resolution when hand-editing the MLF",
388388+ key, s
389389+ ));
390390+}
391391+392392+/// Emit a placeholder `def type X = unknown;` with `@const` annotations
393393+/// for every field — used when the def's `type` isn't in our known
394394+/// set. Keeps the lexicon's shape roundtrippable without a dedicated
395395+/// grammar entry.
396396+fn render_unknown_def_passthrough(
397397+ name: &str,
398398+ def: &Value,
399399+ last_segment: &str,
400400+ ctx: &ConversionContext,
401401+) -> String {
402402+ let obj = match def.as_object() {
403403+ Some(o) => o,
404404+ None => return format!("def type {} = unknown;\n", escape_name(name)),
405405+ };
406406+ let mut out = String::new();
407407+ if let Some(description) = obj.get("description").and_then(|v| v.as_str()) {
408408+ if !description.is_empty() {
409409+ for line in description.lines() {
410410+ out.push_str("/// ");
411411+ out.push_str(line);
412412+ out.push('\n');
413413+ }
414414+ }
415415+ }
416416+ if name == "main" {
417417+ out.push_str("@main\n");
418418+ }
419419+ for (key, value) in obj {
420420+ // `description` surfaced as the doc-comment block already; don't
421421+ // double-emit as `@const`. Every other field — including `type`
422422+ // itself — passes through as an annotation so the lexicon's
423423+ // shape is preserved verbatim.
424424+ if key == "description" {
425425+ continue;
426426+ }
427427+ warn_if_reference_shaped(ctx, key, value);
428428+ out.push_str(&format!(
429429+ "@const(\"{}\", {})\n",
430430+ escape_string_for_mlf(key),
431431+ render_json_as_mlf_literal(value)
432432+ ));
433433+ }
434434+ let def_name = if name == "main" {
435435+ escape_name(last_segment)
436436+ } else {
437437+ escape_name(name)
438438+ };
439439+ out.push_str(&format!("def type {} = unknown;\n", def_name));
440440+ out
441441+}
442442+443443+/// Emit `@const(key, value)` annotation lines for every field on `def`
444444+/// that isn't listed in `spec_fields`. Each generator calls this after
445445+/// emitting `@main` (if applicable) and before the declaration line,
446446+/// so vendor extensions carry through in the same position the codegen
447447+/// expects to find them when emitting JSON back.
448448+fn render_extension_annotations(
449449+ def: &Value,
450450+ spec_fields: &[&str],
451451+ ctx: &ConversionContext,
452452+) -> String {
453453+ let Some(obj) = def.as_object() else {
454454+ return String::new();
455455+ };
456456+ let mut out = String::new();
457457+ for (key, value) in obj {
458458+ if spec_fields.contains(&key.as_str()) {
459459+ continue;
460460+ }
461461+ warn_if_reference_shaped(ctx, key, value);
462462+ out.push_str(&format!(
463463+ "@const(\"{}\", {})\n",
464464+ escape_string_for_mlf(key),
465465+ render_json_as_mlf_literal(value)
466466+ ));
467467+ }
468468+ out
469469+}
470470+471471+/// Render a JSON value as MLF source text suitable for use as an
472472+/// annotation-value literal (the second arg of `@const`). Handles every
473473+/// JSON shape; strings are quoted and escaped, objects use the
474474+/// `{ "key": value, ... }` form.
475475+fn render_json_as_mlf_literal(value: &Value) -> String {
476476+ match value {
477477+ Value::Null => "null".to_string(),
478478+ Value::Bool(b) => b.to_string(),
479479+ Value::String(s) => format!("\"{}\"", escape_string_for_mlf(s)),
480480+ Value::Number(n) => {
481481+ if let Some(i) = n.as_i64() {
482482+ i.to_string()
483483+ } else if let Some(f) = n.as_f64() {
484484+ f.to_string()
485485+ } else {
486486+ "null".to_string()
487487+ }
488488+ }
489489+ Value::Array(items) => {
490490+ let rendered: Vec<String> = items.iter().map(render_json_as_mlf_literal).collect();
491491+ format!("[{}]", rendered.join(", "))
492492+ }
493493+ Value::Object(map) => {
494494+ let rendered: Vec<String> = map
495495+ .iter()
496496+ .map(|(k, v)| {
497497+ format!(
498498+ "\"{}\": {}",
499499+ escape_string_for_mlf(k),
500500+ render_json_as_mlf_literal(v)
501501+ )
502502+ })
503503+ .collect();
504504+ format!("{{ {} }}", rendered.join(", "))
505505+ }
506506+ }
507507+}
508508+509509+fn escape_string_for_mlf(s: &str) -> String {
510510+ s.replace('\\', "\\\\").replace('"', "\\\"")
511511+}
512512+302513struct ConversionContext {
303514 current_namespace: String,
304515 /// MLF-side name of this lexicon's main def.
···365576 output.push_str("@main\n");
366577 }
367578579579+ output.push_str(&render_extension_annotations(def, RECORD_SPEC_FIELDS, ctx));
580580+368581 // Use last segment of NSID for "main" definitions
369582 let record_name = if name == "main" {
370583 escape_name(&ctx.local_main_name)
···431644 if name == "main" {
432645 output.push_str("@main\n");
433646 }
647647+648648+ output.push_str(&render_extension_annotations(def, QUERY_SPEC_FIELDS, ctx));
434649435650 let query_name = if name == "main" {
436651 escape_name(&ctx.local_main_name)
···526741 output.push_str("@main\n");
527742 }
528743744744+ output.push_str(&render_extension_annotations(def, PROCEDURE_SPEC_FIELDS, ctx));
745745+529746 let procedure_name = if name == "main" {
530747 escape_name(&ctx.local_main_name)
531748 } else {
···625842 output.push_str("@main\n");
626843 }
627844845845+ output.push_str(&render_extension_annotations(def, SUBSCRIPTION_SPEC_FIELDS, ctx));
846846+628847 let subscription_name = if name == "main" {
629848 escape_name(&ctx.local_main_name)
630849 } else {
···680899 Ok(output)
681900}
682901683683-fn generate_token(name: &str, def: &Value) -> Result<String, MlfGenerateError> {
902902+fn generate_token(
903903+ name: &str,
904904+ def: &Value,
905905+ ctx: &ConversionContext,
906906+) -> Result<String, MlfGenerateError> {
684907 let mut output = String::new();
685908686909 // Add doc comment
···691914 }
692915 }
693916 }
917917+918918+ output.push_str(&render_extension_annotations(def, TOKEN_SPEC_FIELDS, ctx));
694919695920 let escaped_name = escape_name(name);
696921 output.push_str(&format!("token {};\n", escaped_name));
···713938 if name == "main" {
714939 output.push_str("@main\n");
715940 }
941941+942942+ output.push_str(&render_extension_annotations(def, DEF_TYPE_SPEC_FIELDS, ctx));
716943717944 // Use last segment of NSID for "main" definitions
718945 // Keywords are now allowed by the parser, so just escape with backticks
-941
mlf-cli/src/generate/mlf.rs.backup
···11-use miette::Diagnostic;
22-use serde_json::Value;
33-use std::path::PathBuf;
44-use thiserror::Error;
55-66-#[derive(Error, Debug, Diagnostic)]
77-pub enum MlfGenerateError {
88- #[error("Failed to read file: {path}")]
99- #[diagnostic(code(mlf::generate::read_file))]
1010- #[allow(dead_code)]
1111- ReadFile {
1212- path: String,
1313- #[source]
1414- source: std::io::Error,
1515- },
1616-1717- #[error("Failed to parse JSON: {path}")]
1818- #[diagnostic(code(mlf::generate::parse_json))]
1919- #[allow(dead_code)]
2020- ParseJson {
2121- path: String,
2222- #[source]
2323- source: serde_json::Error,
2424- },
2525-2626- #[error("Failed to write output: {path}")]
2727- #[diagnostic(code(mlf::generate::write_output))]
2828- WriteOutput {
2929- path: String,
3030- #[source]
3131- source: std::io::Error,
3232- },
3333-3434- #[error("Invalid lexicon format: {message}")]
3535- #[diagnostic(code(mlf::generate::invalid_lexicon))]
3636- InvalidLexicon { message: String },
3737-3838- #[error("Failed to expand glob pattern")]
3939- #[diagnostic(code(mlf::generate::glob_error))]
4040- GlobError {
4141- #[source]
4242- source: glob::GlobError,
4343- },
4444-4545- #[error("Invalid glob pattern: {pattern}")]
4646- #[diagnostic(code(mlf::generate::invalid_glob))]
4747- InvalidGlob {
4848- pattern: String,
4949- #[source]
5050- source: glob::PatternError,
5151- },
5252-}
5353-5454-pub fn run(input_patterns: Vec<String>, output_dir: PathBuf) -> Result<(), MlfGenerateError> {
5555- let mut file_paths = Vec::new();
5656-5757- for pattern in input_patterns {
5858- if pattern.contains('*') || pattern.contains('?') {
5959- for entry in glob::glob(&pattern).map_err(|source| MlfGenerateError::InvalidGlob {
6060- pattern: pattern.clone(),
6161- source,
6262- })? {
6363- let path = entry.map_err(|source| MlfGenerateError::GlobError { source })?;
6464- file_paths.push(path);
6565- }
6666- } else {
6767- file_paths.push(PathBuf::from(pattern));
6868- }
6969- }
7070-7171- std::fs::create_dir_all(&output_dir).map_err(|source| MlfGenerateError::WriteOutput {
7272- path: output_dir.display().to_string(),
7373- source,
7474- })?;
7575-7676- let mut errors = Vec::new();
7777- let mut success_count = 0;
7878-7979- for file_path in file_paths {
8080- let source = match std::fs::read_to_string(&file_path) {
8181- Ok(s) => s,
8282- Err(source) => {
8383- errors.push((
8484- file_path.display().to_string(),
8585- format!("Failed to read file: {}", source),
8686- ));
8787- continue;
8888- }
8989- };
9090-9191- let json: Value = match serde_json::from_str(&source) {
9292- Ok(j) => j,
9393- Err(source) => {
9494- errors.push((
9595- file_path.display().to_string(),
9696- format!("Failed to parse JSON: {}", source),
9797- ));
9898- continue;
9999- }
100100- };
101101-102102- let mlf_content = match generate_mlf_from_json(&json) {
103103- Ok(content) => content,
104104- Err(e) => {
105105- errors.push((file_path.display().to_string(), format!("{:?}", e)));
106106- continue;
107107- }
108108- };
109109-110110- // Extract namespace from JSON "id" field
111111- let namespace = json
112112- .get("id")
113113- .and_then(|v| v.as_str())
114114- .ok_or_else(|| MlfGenerateError::InvalidLexicon {
115115- message: "Missing 'id' field in lexicon".to_string(),
116116- })?;
117117-118118- // Create output path from namespace
119119- let mut output_path = output_dir.clone();
120120- for segment in namespace.split('.') {
121121- output_path.push(segment);
122122- }
123123- if let Err(source) = std::fs::create_dir_all(&output_path.parent().unwrap()) {
124124- errors.push((
125125- file_path.display().to_string(),
126126- format!("Failed to create directory: {}", source),
127127- ));
128128- continue;
129129- }
130130- output_path.set_extension("mlf");
131131-132132- if let Err(source) = std::fs::write(&output_path, mlf_content) {
133133- errors.push((
134134- output_path.display().to_string(),
135135- format!("Failed to write file: {}", source),
136136- ));
137137- continue;
138138- }
139139-140140- println!("Generated: {}", output_path.display());
141141- success_count += 1;
142142- }
143143-144144- if !errors.is_empty() {
145145- eprintln!(
146146- "\n{} file(s) generated successfully, {} error(s) encountered:\n",
147147- success_count,
148148- errors.len()
149149- );
150150- for (path, error) in &errors {
151151- eprintln!(" {} - {}", path, error);
152152- }
153153- eprintln!();
154154- return Err(MlfGenerateError::InvalidLexicon {
155155- message: format!("{} errors total", errors.len()),
156156- });
157157- }
158158-159159- println!("\nSuccessfully generated {} file(s)", success_count);
160160- Ok(())
161161-}
162162-163163-pub fn generate_mlf_from_json(json: &Value) -> Result<String, MlfGenerateError> {
164164- let mut output = String::new();
165165-166166- // Extract NSID to get the last segment for "main" definitions
167167- let nsid = json
168168- .get("id")
169169- .and_then(|v| v.as_str())
170170- .ok_or_else(|| MlfGenerateError::InvalidLexicon {
171171- message: "Missing 'id' field in lexicon".to_string(),
172172- })?;
173173-174174- let last_segment = nsid.split('.').last().unwrap_or("main");
175175-176176- let defs = json.get("defs").and_then(|v| v.as_object()).ok_or_else(|| {
177177- MlfGenerateError::InvalidLexicon {
178178- message: "Missing or invalid 'defs' field".to_string(),
179179- }
180180- })?;
181181-182182- // Create a context to pass the current namespace to type generation
183183- let ctx = ConversionContext {
184184- current_namespace: nsid.to_string(),
185185- };
186186-187187- // Process all definitions
188188- for (name, def) in defs {
189189- let def_type = def.get("type").and_then(|v| v.as_str()).ok_or_else(|| {
190190- MlfGenerateError::InvalidLexicon {
191191- message: format!("Missing 'type' field for definition '{}'", name),
192192- }
193193- })?;
194194-195195- match def_type {
196196- "record" => {
197197- let mlf = generate_record(name, def, last_segment, &ctx)?;
198198- output.push_str(&mlf);
199199- output.push('\n');
200200- }
201201- "query" => {
202202- let mlf = generate_query(name, def, last_segment, &ctx)?;
203203- output.push_str(&mlf);
204204- output.push('\n');
205205- }
206206- "procedure" => {
207207- let mlf = generate_procedure(name, def, last_segment, &ctx)?;
208208- output.push_str(&mlf);
209209- output.push('\n');
210210- }
211211- "subscription" => {
212212- let mlf = generate_subscription(name, def, last_segment, &ctx)?;
213213- output.push_str(&mlf);
214214- output.push('\n');
215215- }
216216- "token" => {
217217- let mlf = generate_token(name, def)?;
218218- output.push_str(&mlf);
219219- output.push('\n');
220220- }
221221- "object" => {
222222- let mlf = generate_def_type(name, def, last_segment, &ctx)?;
223223- output.push_str(&mlf);
224224- output.push('\n');
225225- }
226226- _ => {
227227- // Unknown type, skip
228228- }
229229- }
230230- }
231231-232232- Ok(output)
233233-}
234234-235235-struct ConversionContext {
236236- current_namespace: String,
237237-}
238238-239239-/// Reserved words in MLF that need to be escaped
240240-const RESERVED_WORDS: &[&str] = &[
241241- "main", "record", "query", "procedure", "subscription", "token", "def", "type", "use",
242242- "pub", "alias", "namespace", "constrained", "error", "unit", "null", "boolean",
243243- "integer", "string", "bytes", "blob", "unknown", "array", "object", "union", "ref",
244244-];
245245-246246-/// Escape a name if it's a reserved word
247247-fn escape_name(name: &str) -> String {
248248- if RESERVED_WORDS.contains(&name) {
249249- format!("`{}`", name)
250250- } else {
251251- name.to_string()
252252- }
253253-}
254254-255255-fn generate_record(name: &str, def: &Value, last_segment: &str, ctx: &ConversionContext) -> Result<String, MlfGenerateError> {
256256- let mut output = String::new();
257257-258258- // Add doc comment if present
259259- if let Some(desc) = def.get("description").and_then(|v| v.as_str()) {
260260- if !desc.is_empty() {
261261- for line in desc.lines() {
262262- output.push_str(&format!("/// {}\n", line));
263263- }
264264- }
265265- }
266266-267267- // Add @main annotation for "main" definitions
268268- if name == "main" {
269269- output.push_str("@main\n");
270270- }
271271-272272- // Use last segment of NSID for "main" definitions
273273- let record_name = if name == "main" {
274274- escape_name(last_segment)
275275- } else {
276276- escape_name(name)
277277- };
278278-279279- output.push_str(&format!("record {} {{\n", record_name));
280280-281281- // Get the record object
282282- let record_obj = def.get("record").and_then(|v| v.as_object()).ok_or_else(|| {
283283- MlfGenerateError::InvalidLexicon {
284284- message: format!("Missing 'record' field in record definition '{}'", name),
285285- }
286286- })?;
287287-288288- let properties = record_obj
289289- .get("properties")
290290- .and_then(|v| v.as_object())
291291- .ok_or_else(|| MlfGenerateError::InvalidLexicon {
292292- message: format!("Missing 'properties' in record '{}'", name),
293293- })?;
294294-295295- let required = record_obj
296296- .get("required")
297297- .and_then(|v| v.as_array())
298298- .map(|arr| {
299299- arr.iter()
300300- .filter_map(|v| v.as_str())
301301- .collect::<Vec<_>>()
302302- })
303303- .unwrap_or_default();
304304-305305- for (field_name, field_def) in properties {
306306- // Add field doc comment
307307- if let Some(desc) = field_def.get("description").and_then(|v| v.as_str()) {
308308- if !desc.is_empty() {
309309- for line in desc.lines() {
310310- output.push_str(&format!(" /// {}\n", line));
311311- }
312312- }
313313- }
314314-315315- let is_required = required.contains(&field_name.as_str());
316316- let required_marker = if is_required { "!" } else { "" };
317317-318318- let field_type = generate_type(field_def)?;
319319- let escaped_field_name = escape_name(field_name);
320320- output.push_str(&format!(
321321- " {}{}: {},\n",
322322- escaped_field_name, required_marker, field_type
323323- ));
324324- }
325325-326326- output.push_str("}\n");
327327- Ok(output)
328328-}
329329-330330-fn generate_query(name: &str, def: &Value, last_segment: &str, ctx: &ConversionContext) -> Result<String, MlfGenerateError> {
331331- let mut output = String::new();
332332-333333- // Add doc comment
334334- if let Some(desc) = def.get("description").and_then(|v| v.as_str()) {
335335- if !desc.is_empty() {
336336- for line in desc.lines() {
337337- output.push_str(&format!("/// {}\n", line));
338338- }
339339- }
340340- }
341341-342342- // Add @main annotation for "main" definitions
343343- if name == "main" {
344344- output.push_str("@main\n");
345345- }
346346-347347- let query_name = if name == "main" {
348348- escape_name(last_segment)
349349- } else {
350350- escape_name(name)
351351- };
352352- output.push_str(&format!("query {}", query_name));
353353-354354- // Parameters
355355- output.push('(');
356356- if let Some(params) = def.get("parameters").and_then(|v| v.as_object()) {
357357- let properties = params.get("properties").and_then(|v| v.as_object());
358358- let required = params
359359- .get("required")
360360- .and_then(|v| v.as_array())
361361- .map(|arr| {
362362- arr.iter()
363363- .filter_map(|v| v.as_str())
364364- .collect::<Vec<_>>()
365365- })
366366- .unwrap_or_default();
367367-368368- if let Some(props) = properties {
369369- let param_strs: Vec<String> = props
370370- .iter()
371371- .map(|(param_name, param_def)| {
372372- let is_required = required.contains(¶m_name.as_str());
373373- let required_marker = if is_required { "!" } else { "" };
374374- let param_type = generate_type(param_def).unwrap_or_else(|_| "unknown".to_string());
375375- let escaped_param_name = escape_name(param_name);
376376-377377- // Add doc comment inline if present
378378- let mut result = String::new();
379379- if let Some(desc) = param_def.get("description").and_then(|v| v.as_str()) {
380380- if !desc.is_empty() {
381381- result.push_str(&format!("\n /// {}\n ", desc));
382382- }
383383- }
384384- result.push_str(&format!("{}{}: {}", escaped_param_name, required_marker, param_type));
385385- result
386386- })
387387- .collect();
388388-389389- if !param_strs.is_empty() {
390390- output.push_str(¶m_strs.join(","));
391391- }
392392- }
393393- }
394394- output.push(')');
395395-396396- // Output type
397397- if let Some(output_obj) = def.get("output").and_then(|v| v.as_object()) {
398398- if let Some(schema) = output_obj.get("schema") {
399399- let return_type = generate_type(schema)?;
400400- output.push_str(&format!(": {}", return_type));
401401-402402- // Check for errors
403403- if let Some(errors) = output_obj.get("errors").and_then(|v| v.as_object()) {
404404- output.push_str(" | error {\n");
405405- for (error_name, error_def) in errors {
406406- if let Some(desc) = error_def.get("description").and_then(|v| v.as_str()) {
407407- if !desc.is_empty() {
408408- output.push_str(&format!(" /// {}\n", desc));
409409- }
410410- }
411411- output.push_str(&format!(" {},\n", error_name));
412412- }
413413- output.push('}');
414414- }
415415- }
416416- }
417417-418418- output.push_str(";\n");
419419- Ok(output)
420420-}
421421-422422-fn generate_procedure(name: &str, def: &Value, last_segment: &str, ctx: &ConversionContext) -> Result<String, MlfGenerateError> {
423423- let mut output = String::new();
424424-425425- // Add doc comment
426426- if let Some(desc) = def.get("description").and_then(|v| v.as_str()) {
427427- if !desc.is_empty() {
428428- for line in desc.lines() {
429429- output.push_str(&format!("/// {}\n", line));
430430- }
431431- }
432432- }
433433-434434- // Add @main annotation for "main" definitions
435435- if name == "main" {
436436- output.push_str("@main\n");
437437- }
438438-439439- let procedure_name = if name == "main" {
440440- escape_name(last_segment)
441441- } else {
442442- escape_name(name)
443443- };
444444- output.push_str(&format!("procedure {}", procedure_name));
445445-446446- // Input parameters
447447- output.push('(');
448448- if let Some(input) = def.get("input").and_then(|v| v.as_object()) {
449449- if let Some(schema) = input.get("schema").and_then(|v| v.as_object()) {
450450- let properties = schema.get("properties").and_then(|v| v.as_object());
451451- let required = schema
452452- .get("required")
453453- .and_then(|v| v.as_array())
454454- .map(|arr| {
455455- arr.iter()
456456- .filter_map(|v| v.as_str())
457457- .collect::<Vec<_>>()
458458- })
459459- .unwrap_or_default();
460460-461461- if let Some(props) = properties {
462462- let param_strs: Vec<String> = props
463463- .iter()
464464- .map(|(param_name, param_def)| {
465465- let is_required = required.contains(¶m_name.as_str());
466466- let required_marker = if is_required { "!" } else { "" };
467467- let param_type =
468468- generate_type(param_def).unwrap_or_else(|_| "unknown".to_string());
469469- let escaped_param_name = escape_name(param_name);
470470-471471- // Add doc comment inline if present
472472- let mut result = String::new();
473473- if let Some(desc) = param_def.get("description").and_then(|v| v.as_str()) {
474474- if !desc.is_empty() {
475475- result.push_str(&format!("\n /// {}\n ", desc));
476476- }
477477- }
478478- result.push_str(&format!(
479479- "{}{}: {}",
480480- escaped_param_name, required_marker, param_type
481481- ));
482482- result
483483- })
484484- .collect();
485485-486486- if !param_strs.is_empty() {
487487- output.push_str(¶m_strs.join(","));
488488- }
489489- }
490490- }
491491- }
492492- output.push(')');
493493-494494- // Output type
495495- if let Some(output_obj) = def.get("output").and_then(|v| v.as_object()) {
496496- if let Some(schema) = output_obj.get("schema") {
497497- let return_type = generate_type(schema)?;
498498- output.push_str(&format!(": {}", return_type));
499499-500500- // Check for errors
501501- if let Some(errors) = output_obj.get("errors").and_then(|v| v.as_object()) {
502502- output.push_str(" | error {\n");
503503- for (error_name, error_def) in errors {
504504- if let Some(desc) = error_def.get("description").and_then(|v| v.as_str()) {
505505- if !desc.is_empty() {
506506- output.push_str(&format!(" /// {}\n", desc));
507507- }
508508- }
509509- output.push_str(&format!(" {},\n", error_name));
510510- }
511511- output.push('}');
512512- }
513513- }
514514- }
515515-516516- output.push_str(";\n");
517517- Ok(output)
518518-}
519519-520520-fn generate_subscription(name: &str, def: &Value, last_segment: &str, ctx: &ConversionContext) -> Result<String, MlfGenerateError> {
521521- let mut output = String::new();
522522-523523- // Add doc comment
524524- if let Some(desc) = def.get("description").and_then(|v| v.as_str()) {
525525- if !desc.is_empty() {
526526- for line in desc.lines() {
527527- output.push_str(&format!("/// {}\n", line));
528528- }
529529- }
530530- }
531531-532532- // Add @main annotation for "main" definitions
533533- if name == "main" {
534534- output.push_str("@main\n");
535535- }
536536-537537- let subscription_name = if name == "main" {
538538- escape_name(last_segment)
539539- } else {
540540- escape_name(name)
541541- };
542542- output.push_str(&format!("subscription {}", subscription_name));
543543-544544- // Parameters
545545- output.push('(');
546546- if let Some(params) = def.get("parameters").and_then(|v| v.as_object()) {
547547- let properties = params.get("properties").and_then(|v| v.as_object());
548548- let required = params
549549- .get("required")
550550- .and_then(|v| v.as_array())
551551- .map(|arr| {
552552- arr.iter()
553553- .filter_map(|v| v.as_str())
554554- .collect::<Vec<_>>()
555555- })
556556- .unwrap_or_default();
557557-558558- if let Some(props) = properties {
559559- let param_strs: Vec<String> = props
560560- .iter()
561561- .map(|(param_name, param_def)| {
562562- let is_required = required.contains(¶m_name.as_str());
563563- let required_marker = if is_required { "!" } else { "" };
564564- let param_type = generate_type(param_def).unwrap_or_else(|_| "unknown".to_string());
565565- let escaped_param_name = escape_name(param_name);
566566-567567- format!("{}{}: {}", escaped_param_name, required_marker, param_type)
568568- })
569569- .collect();
570570-571571- if !param_strs.is_empty() {
572572- output.push_str(¶m_strs.join(", "));
573573- }
574574- }
575575- }
576576- output.push(')');
577577-578578- // Message types
579579- if let Some(message) = def.get("message").and_then(|v| v.as_object()) {
580580- if let Some(schema) = message.get("schema") {
581581- let message_type = generate_type(schema)?;
582582- output.push_str(&format!(": {}", message_type));
583583- }
584584- }
585585-586586- output.push_str(";\n");
587587- Ok(output)
588588-}
589589-590590-fn generate_token(name: &str, def: &Value) -> Result<String, MlfGenerateError> {
591591- let mut output = String::new();
592592-593593- // Add doc comment
594594- if let Some(desc) = def.get("description").and_then(|v| v.as_str()) {
595595- if !desc.is_empty() {
596596- for line in desc.lines() {
597597- output.push_str(&format!("/// {}\n", line));
598598- }
599599- }
600600- }
601601-602602- let escaped_name = escape_name(name);
603603- output.push_str(&format!("token {};\n", escaped_name));
604604- Ok(output)
605605-}
606606-607607-fn generate_def_type(name: &str, def: &Value, last_segment: &str, ctx: &ConversionContext) -> Result<String, MlfGenerateError> {
608608- let mut output = String::new();
609609-610610- // Add @main annotation for "main" definitions
611611- if name == "main" {
612612- output.push_str("@main\n");
613613- }
614614-615615- // Use last segment of NSID for "main" definitions
616616- let def_name = if name == "main" {
617617- escape_name(last_segment)
618618- } else {
619619- escape_name(name)
620620- };
621621-622622- output.push_str(&format!("def type {} = ", def_name));
623623- let type_str = generate_type_with_indent(def, 0)?;
624624- output.push_str(&type_str);
625625- output.push_str(";\n");
626626-627627- Ok(output)
628628-}
629629-630630-fn generate_type_with_indent(type_def: &Value, indent_level: usize, ctx: &ConversionContext) -> Result<String, MlfGenerateError> {
631631- let type_name = type_def.get("type").and_then(|v| v.as_str());
632632-633633- match type_name {
634634- Some("object") => {
635635- let indent = " ".repeat(indent_level);
636636- let field_indent = " ".repeat(indent_level + 1);
637637-638638- let mut output = String::from("{\n");
639639- let properties = type_def
640640- .get("properties")
641641- .and_then(|v| v.as_object())
642642- .ok_or_else(|| MlfGenerateError::InvalidLexicon {
643643- message: "Missing 'properties' in object type".to_string(),
644644- })?;
645645-646646- let required = type_def
647647- .get("required")
648648- .and_then(|v| v.as_array())
649649- .map(|arr| {
650650- arr.iter()
651651- .filter_map(|v| v.as_str())
652652- .collect::<Vec<_>>()
653653- })
654654- .unwrap_or_default();
655655-656656- for (field_name, field_def) in properties {
657657- // Add field doc comment
658658- if let Some(desc) = field_def.get("description").and_then(|v| v.as_str()) {
659659- if !desc.is_empty() {
660660- for line in desc.lines() {
661661- output.push_str(&format!("{}/// {}\n", field_indent, line));
662662- }
663663- }
664664- }
665665-666666- let is_required = required.contains(&field_name.as_str());
667667- let required_marker = if is_required { "!" } else { "" };
668668- let field_type = generate_type_with_indent(field_def, indent_level + 1)?;
669669- let escaped_field_name = escape_name(field_name);
670670- output.push_str(&format!(
671671- "{}{}{}: {},\n",
672672- field_indent, escaped_field_name, required_marker, field_type
673673- ));
674674- }
675675-676676- output.push_str(&format!("{}}}", indent));
677677- Ok(output)
678678- }
679679- _ => generate_type(type_def),
680680- }
681681-}
682682-683683-fn generate_type(type_def: &Value, ctx: &ConversionContext) -> Result<String, MlfGenerateError> {
684684- let type_name = type_def.get("type").and_then(|v| v.as_str());
685685-686686- match type_name {
687687- Some("null") => Ok("null".to_string()),
688688- Some("boolean") => Ok("boolean".to_string()),
689689- Some("integer") => {
690690- let mut result = "integer".to_string();
691691- result = apply_constraints(result, type_def);
692692- Ok(result)
693693- }
694694- Some("string") => {
695695- // Check if this is a format string that maps to a prelude type
696696- if let Some(format) = type_def.get("format").and_then(|v| v.as_str()) {
697697- let prelude_type = match format {
698698- "did" => "Did",
699699- "at-uri" => "AtUri",
700700- "at-identifier" => "AtIdentifier",
701701- "handle" => "Handle",
702702- "datetime" => "Datetime",
703703- "uri" => "Uri",
704704- "cid" => "Cid",
705705- "nsid" => "Nsid",
706706- "tid" => "Tid",
707707- "record-key" => "RecordKey",
708708- "language" => "Language",
709709- _ => {
710710- // Unknown format, fall through to normal string with constraints
711711- let mut result = "string".to_string();
712712- result = apply_constraints(result, type_def);
713713- return Ok(result);
714714- }
715715- };
716716- // If it's a known prelude type with only the format constraint, use the prelude type directly
717717- // Check if there are other constraints besides format
718718- let has_other_constraints = type_def.get("minLength").is_some()
719719- || type_def.get("maxLength").is_some()
720720- || type_def.get("minGraphemes").is_some()
721721- || type_def.get("maxGraphemes").is_some()
722722- || type_def.get("enum").is_some()
723723- || type_def.get("knownValues").is_some()
724724- || type_def.get("default").is_some();
725725-726726- if !has_other_constraints {
727727- return Ok(prelude_type.to_string());
728728- }
729729- }
730730-731731- let mut result = "string".to_string();
732732- result = apply_constraints(result, type_def);
733733- Ok(result)
734734- }
735735- Some("bytes") => Ok("bytes".to_string()),
736736- Some("blob") => {
737737- let mut result = "blob".to_string();
738738- result = apply_constraints(result, type_def);
739739- Ok(result)
740740- }
741741- Some("unknown") => Ok("unknown".to_string()),
742742- Some("array") => {
743743- let items = type_def.get("items").ok_or_else(|| {
744744- MlfGenerateError::InvalidLexicon {
745745- message: "Missing 'items' in array type".to_string(),
746746- }
747747- })?;
748748-749749- // Check if items have constraints
750750- let items_obj = items.as_object();
751751- let has_item_constraints = items_obj.map_or(false, |obj| {
752752- obj.contains_key("minLength") ||
753753- obj.contains_key("maxLength") ||
754754- obj.contains_key("minGraphemes") ||
755755- obj.contains_key("maxGraphemes") ||
756756- obj.contains_key("minimum") ||
757757- obj.contains_key("maximum") ||
758758- obj.contains_key("enum") ||
759759- obj.contains_key("knownValues") ||
760760- obj.contains_key("default")
761761- });
762762-763763- let item_type = if has_item_constraints {
764764- // If item has constraints, we need to wrap in parentheses to apply constraints before []
765765- // For now, just generate the base type without item constraints
766766- // TODO: Consider generating a type alias for complex constrained items
767767- items.get("type")
768768- .and_then(|t| t.as_str())
769769- .unwrap_or("unknown")
770770- .to_string()
771771- } else {
772772- generate_type(items)?
773773- };
774774-775775- let mut result = format!("{}[]", item_type);
776776- result = apply_constraints(result, type_def);
777777- Ok(result)
778778- }
779779- Some("object") => {
780780- let mut output = String::from("{\n");
781781- let properties = type_def
782782- .get("properties")
783783- .and_then(|v| v.as_object())
784784- .ok_or_else(|| MlfGenerateError::InvalidLexicon {
785785- message: "Missing 'properties' in object type".to_string(),
786786- })?;
787787-788788- let required = type_def
789789- .get("required")
790790- .and_then(|v| v.as_array())
791791- .map(|arr| {
792792- arr.iter()
793793- .filter_map(|v| v.as_str())
794794- .collect::<Vec<_>>()
795795- })
796796- .unwrap_or_default();
797797-798798- for (field_name, field_def) in properties {
799799- // Add field doc comment
800800- if let Some(desc) = field_def.get("description").and_then(|v| v.as_str()) {
801801- if !desc.is_empty() {
802802- for line in desc.lines() {
803803- output.push_str(&format!(" /// {}\n", line));
804804- }
805805- }
806806- }
807807-808808- let is_required = required.contains(&field_name.as_str());
809809- let required_marker = if is_required { "!" } else { "" };
810810- let field_type = generate_type(field_def)?;
811811- let escaped_field_name = escape_name(field_name);
812812- output.push_str(&format!(
813813- " {}{}: {},\n",
814814- escaped_field_name, required_marker, field_type
815815- ));
816816- }
817817-818818- output.push_str(" }");
819819- Ok(output)
820820- }
821821- Some("union") => {
822822- let refs = type_def.get("refs").and_then(|v| v.as_array()).ok_or_else(|| {
823823- MlfGenerateError::InvalidLexicon {
824824- message: "Missing 'refs' in union type".to_string(),
825825- }
826826- })?;
827827-828828- let type_strs: Vec<String> = refs
829829- .iter()
830830- .map(|r| generate_type(r).unwrap_or_else(|_| "unknown".to_string()))
831831- .collect();
832832-833833- let mut result = type_strs.join(" | ");
834834-835835- // Check if closed
836836- if type_def.get("closed").and_then(|v| v.as_bool()).unwrap_or(false) {
837837- result.push_str(" | !");
838838- }
839839-840840- Ok(result)
841841- }
842842- Some("ref") => {
843843- if let Some(ref_str) = type_def.get("ref").and_then(|v| v.as_str()) {
844844- // Handle references:
845845- // "#defName" -> "defName" (local reference, same file)
846846- // "namespace.id#defName" -> Check if same namespace, if so use "defName", else use full path
847847-848848- if let Some(stripped) = ref_str.strip_prefix('#') {
849849- // Local reference: #defName -> defName
850850- Ok(stripped.to_string())
851851- } else if let Some((namespace, def_name)) = ref_str.split_once('#') {
852852- // Check if this is the current namespace
853853- // For now, we'll just use the def name if it's the same namespace
854854- // Note: This requires passing context through, which we'll add
855855- // For external refs, we keep the full NSID format
856856- Ok(format!("{}.{}", namespace, def_name))
857857- } else {
858858- // No # at all - shouldn't happen in valid lexicons, but handle gracefully
859859- Ok(ref_str.to_string())
860860- }
861861- } else {
862862- Err(MlfGenerateError::InvalidLexicon {
863863- message: "Missing 'ref' in ref type".to_string(),
864864- })
865865- }
866866- }
867867- _ => Ok("unknown".to_string()),
868868- }
869869-}
870870-871871-fn apply_constraints(mut type_str: String, type_def: &Value) -> String {
872872- let mut constraints = Vec::new();
873873-874874- if let Some(min_length) = type_def.get("minLength").and_then(|v| v.as_i64()) {
875875- constraints.push(format!("minLength: {}", min_length));
876876- }
877877- if let Some(max_length) = type_def.get("maxLength").and_then(|v| v.as_i64()) {
878878- constraints.push(format!("maxLength: {}", max_length));
879879- }
880880- if let Some(min_graphemes) = type_def.get("minGraphemes").and_then(|v| v.as_i64()) {
881881- constraints.push(format!("minGraphemes: {}", min_graphemes));
882882- }
883883- if let Some(max_graphemes) = type_def.get("maxGraphemes").and_then(|v| v.as_i64()) {
884884- constraints.push(format!("maxGraphemes: {}", max_graphemes));
885885- }
886886- if let Some(minimum) = type_def.get("minimum").and_then(|v| v.as_i64()) {
887887- constraints.push(format!("minimum: {}", minimum));
888888- }
889889- if let Some(maximum) = type_def.get("maximum").and_then(|v| v.as_i64()) {
890890- constraints.push(format!("maximum: {}", maximum));
891891- }
892892- if let Some(format) = type_def.get("format").and_then(|v| v.as_str()) {
893893- constraints.push(format!("format: \"{}\"", format));
894894- }
895895- if let Some(enum_vals) = type_def.get("enum").and_then(|v| v.as_array()) {
896896- let vals: Vec<String> = enum_vals
897897- .iter()
898898- .filter_map(|v| v.as_str())
899899- .map(|s| format!("\"{}\"", s))
900900- .collect();
901901- constraints.push(format!("enum: [{}]", vals.join(", ")));
902902- }
903903- if let Some(known_vals) = type_def.get("knownValues").and_then(|v| v.as_array()) {
904904- let vals: Vec<String> = known_vals
905905- .iter()
906906- .filter_map(|v| v.as_str())
907907- .map(|s| format!("\"{}\"", s))
908908- .collect();
909909- constraints.push(format!("knownValues: [{}]", vals.join(", ")));
910910- }
911911- if let Some(accept) = type_def.get("accept").and_then(|v| v.as_array()) {
912912- let mimes: Vec<String> = accept
913913- .iter()
914914- .filter_map(|v| v.as_str())
915915- .map(|s| format!("\"{}\"", s))
916916- .collect();
917917- constraints.push(format!("accept: [{}]", mimes.join(", ")));
918918- }
919919- if let Some(max_size) = type_def.get("maxSize").and_then(|v| v.as_i64()) {
920920- constraints.push(format!("maxSize: {}", max_size));
921921- }
922922- if let Some(default) = type_def.get("default") {
923923- let default_str = match default {
924924- Value::String(s) => format!("\"{}\"", s),
925925- Value::Number(n) => n.to_string(),
926926- Value::Bool(b) => b.to_string(),
927927- _ => "null".to_string(),
928928- };
929929- constraints.push(format!("default: {}", default_str));
930930- }
931931-932932- if !constraints.is_empty() {
933933- type_str.push_str(" constrained {\n");
934934- for constraint in &constraints {
935935- type_str.push_str(&format!(" {},\n", constraint));
936936- }
937937- type_str.push_str(" }");
938938- }
939939-940940- type_str
941941-}
+207-12
mlf-codegen/src/lib.rs
···33use serde_json::{json, Map, Value};
44use std::collections::HashMap;
5566+/// A non-fatal advisory emitted during codegen. Mirrors the shape of
77+/// `ConversionWarning` on the JSON→MLF side so both directions use one
88+/// structured-warning model.
99+#[derive(Debug, Clone, PartialEq)]
1010+pub struct CodegenWarning {
1111+ /// Namespace of the lexicon that emitted the warning.
1212+ pub namespace: String,
1313+ /// Human-readable description of what was coerced and why.
1414+ pub message: String,
1515+}
1616+1717+/// Bundle of codegen output: the generated lexicon JSON plus any
1818+/// advisory warnings produced while walking the AST.
1919+#[derive(Debug, Clone)]
2020+pub struct CodegenOutput {
2121+ pub json: Value,
2222+ pub warnings: Vec<CodegenWarning>,
2323+}
2424+625// Re-export inventory for macros
726#[doc(hidden)]
827pub use inventory;
···112131 })
113132}
114133115115-pub fn generate_lexicon(namespace: &str, lexicon: &Lexicon, workspace: &Workspace) -> Value {
134134+pub fn generate_lexicon(namespace: &str, lexicon: &Lexicon, workspace: &Workspace) -> CodegenOutput {
116135 let usage_counts = analyze_type_usage(lexicon);
117136 let eligibility = MainEligibility::for_lexicon(namespace, lexicon);
118137119138 let mut defs = Map::new();
139139+ let mut self_description = String::new();
140140+ let mut self_extensions: Vec<(String, Value)> = Vec::new();
141141+ let mut warnings: Vec<CodegenWarning> = Vec::new();
120142121143 for item in &lexicon.items {
122144 match item {
123145 Item::Record(record) => {
124124- let value = generate_record_json(record, &usage_counts, workspace, namespace);
146146+ let mut value = generate_record_json(record, &usage_counts, workspace, namespace);
147147+ apply_extension_annotations(&mut value, &record.annotations, workspace, namespace, &mut warnings);
125148 insert_def(&mut defs, &record.name.name, eligibility.is_main(&record.name.name, &record.annotations), value);
126149 }
127150 Item::Query(query) => {
128128- let value = generate_query_json(query, &usage_counts, workspace, namespace);
151151+ let mut value = generate_query_json(query, &usage_counts, workspace, namespace);
152152+ apply_extension_annotations(&mut value, &query.annotations, workspace, namespace, &mut warnings);
129153 insert_def(&mut defs, &query.name.name, eligibility.is_main(&query.name.name, &query.annotations), value);
130154 }
131155 Item::Procedure(procedure) => {
132132- let value = generate_procedure_json(procedure, &usage_counts, workspace, namespace);
156156+ let mut value = generate_procedure_json(procedure, &usage_counts, workspace, namespace);
157157+ apply_extension_annotations(&mut value, &procedure.annotations, workspace, namespace, &mut warnings);
133158 insert_def(&mut defs, &procedure.name.name, eligibility.is_main(&procedure.name.name, &procedure.annotations), value);
134159 }
135160 Item::Subscription(subscription) => {
136136- let value = generate_subscription_json(subscription, &usage_counts, workspace, namespace);
161161+ let mut value = generate_subscription_json(subscription, &usage_counts, workspace, namespace);
162162+ apply_extension_annotations(&mut value, &subscription.annotations, workspace, namespace, &mut warnings);
137163 insert_def(&mut defs, &subscription.name.name, eligibility.is_main(&subscription.name.name, &subscription.annotations), value);
138164 }
139165 Item::DefType(def_type) => {
140140- let value = generate_def_type_json(def_type, &usage_counts, workspace, namespace);
166166+ let mut value = generate_def_type_json(def_type, &usage_counts, workspace, namespace);
167167+ apply_extension_annotations(&mut value, &def_type.annotations, workspace, namespace, &mut warnings);
141168 insert_def(&mut defs, &def_type.name.name, eligibility.is_main(&def_type.name.name, &def_type.annotations), value);
142169 }
143170 Item::Token(token) => {
144171 let mut token_obj = Map::new();
145172 token_obj.insert("type".to_string(), json!("token"));
146173 insert_opt_str(&mut token_obj, "description", &extract_docs(&token.docs));
147147- defs.insert(token.name.name.clone(), Value::Object(token_obj));
174174+ let mut value = Value::Object(token_obj);
175175+ apply_extension_annotations(&mut value, &token.annotations, workspace, namespace, &mut warnings);
176176+ defs.insert(token.name.name.clone(), value);
177177+ }
178178+ Item::SelfItem(self_item) => {
179179+ // The `self { }` item carries lexicon-level metadata. Its
180180+ // docs become the top-level `description`; its extension
181181+ // annotations become top-level JSON fields alongside
182182+ // `lexicon`, `id`, `defs`.
183183+ self_description = extract_docs(&self_item.docs);
184184+ self_extensions = collect_extension_fields(&self_item.annotations, workspace, namespace, &mut warnings);
148185 }
149186 // Inline types never appear in `defs` — they expand at their point
150150- // of use. Other item kinds (use statements, namespace blocks) are
151151- // structural and not emitted into the lexicon output.
187187+ // of use. Use statements are structural and not emitted.
152188 _ => {}
153189 }
154190 }
···157193 root.insert("$type".to_string(), json!("com.atproto.lexicon.schema"));
158194 root.insert("lexicon".to_string(), json!(1));
159195 root.insert("id".to_string(), json!(namespace));
196196+ insert_opt_str(&mut root, "description", &self_description);
197197+ for (key, value) in self_extensions {
198198+ root.insert(key, value);
199199+ }
160200 root.insert("defs".to_string(), json!(defs));
161161- Value::Object(root)
201201+ CodegenOutput {
202202+ json: Value::Object(root),
203203+ warnings,
204204+ }
205205+}
206206+207207+/// Extract `@const` / `@reference` annotations into (key, JSON value)
208208+/// pairs. Used by the self-item handling to populate top-level lexicon
209209+/// fields. Annotations of other names (including generator-scoped ones
210210+/// like `@rust:deprecated`) are ignored.
211211+fn collect_extension_fields(
212212+ annotations: &[Annotation],
213213+ workspace: &Workspace,
214214+ current_namespace: &str,
215215+ warnings: &mut Vec<CodegenWarning>,
216216+) -> Vec<(String, Value)> {
217217+ annotations
218218+ .iter()
219219+ .filter_map(|ann| extension_field(ann, workspace, current_namespace, warnings))
220220+ .collect()
221221+}
222222+223223+/// Mutate `value` (expected to be a JSON object) in place, adding any
224224+/// extension fields derived from `@const` / `@reference` annotations.
225225+/// Non-object values are left untouched.
226226+fn apply_extension_annotations(
227227+ value: &mut Value,
228228+ annotations: &[Annotation],
229229+ workspace: &Workspace,
230230+ current_namespace: &str,
231231+ warnings: &mut Vec<CodegenWarning>,
232232+) {
233233+ let Some(obj) = value.as_object_mut() else {
234234+ return;
235235+ };
236236+ for (key, field_value) in collect_extension_fields(annotations, workspace, current_namespace, warnings) {
237237+ obj.insert(key, field_value);
238238+ }
239239+}
240240+241241+/// If `annotation` is a recognised extension (`@const` or `@reference`),
242242+/// return the `(key, JSON value)` it produces. Otherwise `None`.
243243+///
244244+/// `@const(key, value)` — value is a literal; rendered verbatim.
245245+/// `@reference(key, path)` — path is resolved through the workspace and
246246+/// emitted as an NSID string.
247247+fn extension_field(
248248+ annotation: &Annotation,
249249+ workspace: &Workspace,
250250+ current_namespace: &str,
251251+ warnings: &mut Vec<CodegenWarning>,
252252+) -> Option<(String, Value)> {
253253+ if !annotation.selectors.is_empty() {
254254+ // Generator-scoped annotation (e.g. `@rust:deprecated`); not an
255255+ // extension.
256256+ return None;
257257+ }
258258+ let name = annotation.name.name.as_str();
259259+ let is_const = name == "const";
260260+ let is_reference = name == "reference";
261261+ if !is_const && !is_reference {
262262+ return None;
263263+ }
264264+ // Both forms take exactly two positional args: (key: string, value).
265265+ let mut args = annotation.args.iter();
266266+ let key_arg = args.next()?;
267267+ let value_arg = args.next()?;
268268+ if args.next().is_some() {
269269+ return None;
270270+ }
271271+ let AnnotationArg::Positional(AnnotationValue::String(key)) = key_arg else {
272272+ return None;
273273+ };
274274+ let AnnotationArg::Positional(value) = value_arg else {
275275+ return None;
276276+ };
277277+ let json_value = if is_const {
278278+ annotation_value_to_json(value, key, current_namespace, warnings)
279279+ } else {
280280+ // @reference: expect a type path; emit the resolved NSID.
281281+ let AnnotationValue::Reference(path) = value else {
282282+ return None;
283283+ };
284284+ Value::String(resolve_ref_nsid(path, workspace, current_namespace))
285285+ };
286286+ Some((key.clone(), json_value))
287287+}
288288+289289+/// Literal-to-JSON conversion for `@const` values. Coerces whole-number
290290+/// f64 to i64 (so `@const("revision", 3)` emits `3`, not `3.0`) and
291291+/// transparently stringifies genuinely fractional numbers — ATProto's
292292+/// data model has no floats, so the spec-compliant representation is a
293293+/// string. `key` and `namespace` are used to tag any emitted warnings.
294294+fn annotation_value_to_json(
295295+ value: &AnnotationValue,
296296+ key: &str,
297297+ namespace: &str,
298298+ warnings: &mut Vec<CodegenWarning>,
299299+) -> Value {
300300+ match value {
301301+ AnnotationValue::String(s) => Value::String(s.clone()),
302302+ AnnotationValue::Number(n) => {
303303+ if n.is_finite() && n.fract() == 0.0 && *n >= i64::MIN as f64 && *n <= i64::MAX as f64 {
304304+ Value::Number(serde_json::Number::from(*n as i64))
305305+ } else if n.is_finite() {
306306+ warnings.push(CodegenWarning {
307307+ namespace: namespace.to_string(),
308308+ message: format!(
309309+ "@const({:?}, {}): ATProto's data model has no floats; \
310310+ emitting as string {:?} to stay spec-compliant",
311311+ key, n, n.to_string()
312312+ ),
313313+ });
314314+ Value::String(n.to_string())
315315+ } else {
316316+ warnings.push(CodegenWarning {
317317+ namespace: namespace.to_string(),
318318+ message: format!(
319319+ "@const({:?}, {}): non-finite number is not representable in JSON; \
320320+ emitting `null`",
321321+ key, n
322322+ ),
323323+ });
324324+ Value::Null
325325+ }
326326+ }
327327+ AnnotationValue::Boolean(b) => Value::Bool(*b),
328328+ AnnotationValue::Null => Value::Null,
329329+ AnnotationValue::Array(items) => {
330330+ Value::Array(
331331+ items
332332+ .iter()
333333+ .map(|item| annotation_value_to_json(item, key, namespace, warnings))
334334+ .collect(),
335335+ )
336336+ }
337337+ AnnotationValue::Object(entries) => {
338338+ let mut obj = Map::new();
339339+ for (entry_key, entry_value) in entries {
340340+ obj.insert(
341341+ entry_key.clone(),
342342+ annotation_value_to_json(entry_value, key, namespace, warnings),
343343+ );
344344+ }
345345+ Value::Object(obj)
346346+ }
347347+ // A `Reference` shouldn't appear in a `@const` value position —
348348+ // it would mean the author wrote a bare identifier where they
349349+ // meant a literal. Fall back to emitting the path as-is; the
350350+ // parser could also reject this earlier, but defensively handle
351351+ // it here.
352352+ AnnotationValue::Reference(path) => Value::String(path.to_string()),
353353+ }
162354}
163355164356/// Decides whether a given def should be emitted under the key `main` or
···8741066 }
87510678761068 fn generate(&self, ctx: &GeneratorContext) -> Result<String, String> {
877877- let json = generate_lexicon(ctx.namespace, ctx.lexicon, ctx.workspace);
878878- serde_json::to_string_pretty(&json)
10691069+ // The plugin `CodeGenerator` trait doesn't carry a warning
10701070+ // channel today; the callers that care about warnings call
10711071+ // `generate_lexicon` directly instead of going through this shim.
10721072+ let output = generate_lexicon(ctx.namespace, ctx.lexicon, ctx.workspace);
10731073+ serde_json::to_string_pretty(&output.json)
8791074 .map_err(|e| format!("Failed to serialize JSON: {}", e))
8801075 }
8811076}
+25
mlf-lang/src/ast.rs
···3838 Procedure(Procedure),
3939 Subscription(Subscription),
4040 Use(Use),
4141+ /// `self { }` — a declaration representing the lexicon itself. Docs
4242+ /// and annotations attached to it map to the lexicon's top-level
4343+ /// fields in JSON (e.g. `description`, vendor extensions). Body is
4444+ /// empty in V1; the `{}` shape is reserved for future contents.
4545+ SelfItem(SelfItem),
4146}
42474348impl Spanned for Item {
···5156 Item::Procedure(p) => p.span,
5257 Item::Subscription(s) => s.span,
5358 Item::Use(u) => u.span,
5959+ Item::SelfItem(s) => s.span,
5460 }
5561 }
5662}
57636464+/// The lexicon-as-item. See [`Item::SelfItem`] for the semantic role.
6565+#[derive(Debug, Clone, PartialEq)]
6666+pub struct SelfItem {
6767+ pub docs: Vec<DocComment>,
6868+ pub annotations: Vec<Annotation>,
6969+ pub span: Span,
7070+}
7171+5872/// Documentation comment
5973#[derive(Debug, Clone, PartialEq, Eq)]
6074pub struct DocComment {
···87101 String(String),
88102 Number(f64),
89103 Boolean(bool),
104104+ /// JSON `null`. Used by `@const` to represent explicit-null values
105105+ /// that appear in source lexicons' extension fields.
106106+ Null,
107107+ /// JSON array. Element types are freely mixed, matching JSON semantics.
108108+ Array(Vec<AnnotationValue>),
109109+ /// JSON object literal — a map with string keys. Keys permit any
110110+ /// JSON-legal form (including hyphens, e.g. `"x-vendor-flag"`).
111111+ Object(Vec<(String, AnnotationValue)>),
112112+ /// A type path resolved through the workspace. Used by `@reference`
113113+ /// to name an MLF type by path; codegen resolves to an NSID string.
114114+ Reference(Path),
90115}
9111692117/// A record definition
···139139 return serde_wasm_bindgen::to_value(&result).unwrap();
140140 }
141141142142- // Generate JSON lexicon
143143- let json_lexicon = mlf_codegen::generate_lexicon(namespace, &lexicon, &workspace);
142142+ // Generate JSON lexicon. Codegen warnings are dropped on the floor
143143+ // in the wasm surface today — the playground doesn't have a UI for
144144+ // them yet; when it does, they flow through `output.warnings`.
145145+ let output = mlf_codegen::generate_lexicon(namespace, &lexicon, &workspace);
144146145145- match serde_json::to_string_pretty(&json_lexicon) {
147147+ match serde_json::to_string_pretty(&output.json) {
146148 Ok(json_str) => {
147149 let result = GenerateResult {
148150 success: true,
···11+com.example.const_float_stringifies: @const("x-threshold", 3.14): ATProto's data model has no floats; emitting as string "3.14" to stay spec-compliant
···11+[test]
22+name = "const_float_stringifies"
33+description = "@const with a fractional numeric value stringifies to stay spec-compliant and emits a warning"
44+namespace = "com.example.const_float_stringifies"
···11+[test]
22+name = "item_extensions_codegen"
33+description = "@const annotations on a record emit as extra JSON fields on the record def"
44+namespace = "com.example.item_extensions_codegen"
···11+com.example.refhint: extension field "xFallback" has value "com.example.other#someType" which looks NSID-shaped; emitted as `@const` — consider `@reference` if you intend workspace name resolution when hand-editing the MLF
···30303131Arguments can be:
3232- **Strings**: `"value"`
3333-- **Numbers**: `42`, `3.14`
3333+- **Numbers**: `42`, `3.14`, `-10`
3434- **Booleans**: `true`, `false`
3535+- **Null**: `null`
3636+- **Arrays / objects** of the above (JSON-shaped literals)
3737+- **Type paths**: `com.example.other.thing` (used by `@reference`)
35383639### Named Arguments
3740···189192- `"application/xml"` - XML
190193- `"*/*"` - Any MIME type
191194- Custom MIME types as needed
195195+196196+## Lexicon-Level Metadata: `self { }`
197197+198198+ATProto Lexicon JSON has four top-level fields: `lexicon`, `id`, `description`, and `defs`. The first two are derived mechanically from the file, and `defs` is filled by your record/query/etc. definitions. The last — `description`, plus any non-spec top-level fields vendors commonly add — needs somewhere to live in MLF source.
199199+200200+That somewhere is `self { }`: a first-class item that represents the lexicon itself. Doc comments on `self { }` become the top-level `description`, and extension annotations on it become top-level JSON fields.
201201+202202+```mlf
203203+/// Blog-style post record for the Bluesky feed.
204204+@const("revision", 3)
205205+@const("x-vendor-flag", true)
206206+self {}
207207+208208+record post {
209209+ text!: string,
210210+ createdAt!: Datetime,
211211+}
212212+```
213213+214214+Generates:
215215+216216+```json
217217+{
218218+ "$type": "com.atproto.lexicon.schema",
219219+ "lexicon": 1,
220220+ "id": "...",
221221+ "description": "Blog-style post record for the Bluesky feed.",
222222+ "revision": 3,
223223+ "x-vendor-flag": true,
224224+ "defs": { "main": { "type": "record", ... } }
225225+}
226226+```
227227+228228+**Rules:**
229229+- Optional. Files without `self { }` behave exactly as today.
230230+- At most one per file, at the top level only.
231231+- Body is always empty (`{}`) in V1; the shape is reserved for future contents.
232232+233233+## Extension Annotations
234234+235235+Annotations parse uniformly — any `@name(args...)` is grammatically valid. Two annotation names are recognized by the lexicon generator as carrying JSON extension fields; everything else is metadata that codegen ignores. Both attach to `self { }` for top-level fields, or to any record / query / procedure / subscription / def / token for per-item extra fields.
236236+237237+### `@const(key, value)` — literal
238238+239239+Emitted verbatim as JSON. Takes any annotation literal: string, integer, boolean, null, array, or nested object. This is the form used for lexicon-level metadata and is also what the JSON-to-MLF converter emits when preserving non-spec fields.
240240+241241+```mlf
242242+@const("revision", 3)
243243+@const("x-tags", ["alpha", "beta"])
244244+@const("x-meta", { "team": "platform", "critical": true })
245245+self {}
246246+```
247247+248248+**On fractional numbers:** ATProto's data model has no floats. If you pass a fractional value to `@const`, the lexicon generator transparently stringifies it (`@const("x-threshold", 3.14)` emits `"x-threshold": "3.14"`) and emits a warning. Integers and whole-number floats emit as JSON numbers unchanged.
249249+250250+### `@reference(key, path)` — named-type reference
251251+252252+Resolves the type path through the workspace and emits the resolved NSID string. Useful when you want an extension field to point at another defined type without hardcoding its NSID.
253253+254254+```mlf
255255+@reference("xFallbackType", com.example.other.thing)
256256+record foo {
257257+ name!: string,
258258+}
259259+```
260260+261261+Generates (on the `foo` record):
262262+263263+```json
264264+{
265265+ "type": "record",
266266+ "xFallbackType": "com.example.other.thing",
267267+ "record": { ... }
268268+}
269269+```
270270+271271+### Other annotations
272272+273273+Any `@whatever(args)` the generator doesn't recognize is left untouched — it's metadata for whoever reads the AST (linters, codegen plugins, documentation tools). `@const` and `@reference` are the only two that influence JSON output.
192274193275## Annotation Processing
194276