A human-friendly DSL for ATProto Lexicons
27
fork

Configure Feed

Select the types of activity you want to include in your feed.

Introduce self {} and extension annotations

authored by stavola.xyz and committed by

Tangled 3aac6e63 fd3c06a0

+1381 -1025
+8
README.md
··· 7 7 ## What it looks like 8 8 9 9 ```mlf 10 + /// Blog-style post record for the Bluesky feed. 11 + @const("revision", 3) 12 + self {} 13 + 10 14 record post { 11 15 text!: string constrained { 12 16 maxLength: 3000, ··· 21 25 parent!: com.atproto.repo.strongRef, 22 26 }; 23 27 ``` 28 + 29 + `self {}` represents the lexicon itself — docs on it map to the 30 + top-level `description`; `@const` / `@reference` annotations carry 31 + non-spec or vendor extension fields through to JSON. 24 32 25 33 ## Installation 26 34
+3 -2
codegen-plugins/mlf-codegen-go/src/lib.rs
··· 190 190 Item::Query(_) | Item::Procedure(_) | Item::Subscription(_) => { 191 191 // TODO: Generate client methods 192 192 } 193 - Item::Use(_) => { 194 - // Skip use statements 193 + Item::Use(_) | Item::SelfItem(_) => { 194 + // Skip `use` statements and the `self { }` item — both 195 + // are lexicon-level metadata, not types to emit. 195 196 } 196 197 } 197 198 }
+3 -2
codegen-plugins/mlf-codegen-rust/src/lib.rs
··· 216 216 Item::Query(_) | Item::Procedure(_) | Item::Subscription(_) => { 217 217 // TODO: Generate client methods 218 218 } 219 - Item::Use(_) => { 220 - // Skip use statements 219 + Item::Use(_) | Item::SelfItem(_) => { 220 + // Skip `use` statements and the `self { }` item — both 221 + // are lexicon-level metadata, not types to emit. 221 222 } 222 223 } 223 224 }
+3 -2
codegen-plugins/mlf-codegen-typescript/src/lib.rs
··· 162 162 // TODO: Generate client methods for these 163 163 // Annotation idea: @clientMethod for custom generation 164 164 } 165 - Item::Use(_) => { 166 - // Skip use statements in output 165 + Item::Use(_) | Item::SelfItem(_) => { 166 + // Skip `use` statements and the `self { }` item — both 167 + // are lexicon-level metadata, not types to emit. 167 168 } 168 169 } 169 170 }
+6 -2
mlf-cli/src/generate/lexicon.rs
··· 163 163 continue; 164 164 } 165 165 166 - let json_lexicon = mlf_codegen::generate_lexicon(&namespace, &lexicon, &workspace); 166 + let output = mlf_codegen::generate_lexicon(&namespace, &lexicon, &workspace); 167 + 168 + for warning in &output.warnings { 169 + eprintln!(" warning: {}: {}", warning.namespace, warning.message); 170 + } 167 171 168 172 let output_path = if flat { 169 173 output_dir.join(format!("{}.json", namespace)) ··· 180 184 path 181 185 }; 182 186 183 - let json_str = serde_json::to_string_pretty(&json_lexicon).unwrap(); 187 + let json_str = serde_json::to_string_pretty(&output.json).unwrap(); 184 188 if let Err(source) = std::fs::write(&output_path, format!("{}\n", json_str)) { 185 189 errors.push((output_path.display().to_string(), format!("Failed to write file: {}", source))); 186 190 continue;
+263 -36
mlf-cli/src/generate/mlf.rs
··· 228 228 pub fn generate_mlf_from_json(json: &Value) -> Result<MlfGenerateOutput, MlfGenerateError> { 229 229 let mut output = String::new(); 230 230 231 - // Extract NSID to get the last segment for "main" definitions 232 231 let nsid = json 233 232 .get("id") 234 233 .and_then(|v| v.as_str()) ··· 250 249 warnings: RefCell::new(Vec::new()), 251 250 }; 252 251 253 - // Process all definitions 252 + // Emit a `self {}` item when the source has a top-level description 253 + // or any non-spec top-level field (`revision`, vendor `x-*`, etc.). 254 + if let Some(self_mlf) = render_self_item(json, &ctx) { 255 + output.push_str(&self_mlf); 256 + output.push('\n'); 257 + } 258 + 254 259 for (name, def) in defs { 255 260 let def_type = def.get("type").and_then(|v| v.as_str()).ok_or_else(|| { 256 261 MlfGenerateError::InvalidLexicon { ··· 258 263 } 259 264 })?; 260 265 261 - match def_type { 262 - "record" => { 263 - let mlf = generate_record(name, def, &ctx)?; 264 - output.push_str(&mlf); 265 - output.push('\n'); 266 - } 267 - "query" => { 268 - let mlf = generate_query(name, def, &ctx)?; 269 - output.push_str(&mlf); 270 - output.push('\n'); 271 - } 272 - "procedure" => { 273 - let mlf = generate_procedure(name, def, &ctx)?; 274 - output.push_str(&mlf); 275 - output.push('\n'); 276 - } 277 - "subscription" => { 278 - let mlf = generate_subscription(name, def, &ctx)?; 279 - output.push_str(&mlf); 280 - output.push('\n'); 281 - } 282 - "token" => { 283 - let mlf = generate_token(name, def)?; 284 - output.push_str(&mlf); 285 - output.push('\n'); 286 - } 287 - _ => { 288 - // All other types (object, string, array, union, etc.) are treated as def type 289 - let mlf = generate_def_type(name, def, &ctx)?; 290 - output.push_str(&mlf); 291 - output.push('\n'); 292 - } 293 - } 266 + let mlf = match def_type { 267 + "record" => generate_record(name, def, &ctx)?, 268 + "query" => generate_query(name, def, &ctx)?, 269 + "procedure" => generate_procedure(name, def, &ctx)?, 270 + "subscription" => generate_subscription(name, def, &ctx)?, 271 + "token" => generate_token(name, def, &ctx)?, 272 + t if is_known_def_type(t) => generate_def_type(name, def, &ctx)?, 273 + // Unknown def type (e.g. `permission-set`): emit a 274 + // placeholder def with `@const` annotations carrying every 275 + // field, so the shape roundtrips byte-faithfully without 276 + // requiring a grammar entry for every future spec type. 277 + _ => render_unknown_def_passthrough(name, def, last_segment, &ctx), 278 + }; 279 + output.push_str(&mlf); 280 + output.push('\n'); 294 281 } 295 282 296 283 Ok(MlfGenerateOutput { ··· 299 286 }) 300 287 } 301 288 289 + /// Spec-defined top-level fields we handle through dedicated paths. 290 + /// Anything else at the root becomes an `@const` on the emitted 291 + /// `self {}` item. 292 + const TOP_LEVEL_SPEC_FIELDS: &[&str] = &["lexicon", "id", "description", "defs", "$type"]; 293 + 294 + /// Def-kind identifiers we know how to render structurally. Anything 295 + /// else (e.g. `permission-set`) falls through to the unknown-def 296 + /// passthrough. 297 + const KNOWN_DEF_TYPES: &[&str] = &[ 298 + "record", "query", "procedure", "subscription", "token", 299 + "object", "string", "integer", "boolean", "bytes", "blob", 300 + "null", "unknown", "array", "union", "ref", "cid-link", 301 + ]; 302 + 303 + fn is_known_def_type(type_name: &str) -> bool { 304 + KNOWN_DEF_TYPES.contains(&type_name) 305 + } 306 + 307 + /// Spec-defined fields on each def kind. Any other key on the def's 308 + /// JSON object becomes an `@const` annotation on the emitted item, so 309 + /// vendor extensions (`revision`, `x-*` flags, etc.) roundtrip 310 + /// byte-faithfully. 311 + const RECORD_SPEC_FIELDS: &[&str] = &["type", "description", "key", "record"]; 312 + const QUERY_SPEC_FIELDS: &[&str] = &[ 313 + "type", "description", "parameters", "output", "errors", 314 + ]; 315 + const PROCEDURE_SPEC_FIELDS: &[&str] = &[ 316 + "type", "description", "parameters", "input", "output", "errors", 317 + ]; 318 + const SUBSCRIPTION_SPEC_FIELDS: &[&str] = &[ 319 + "type", "description", "parameters", "message", "errors", 320 + ]; 321 + const TOKEN_SPEC_FIELDS: &[&str] = &["type", "description"]; 322 + 323 + /// Spec-defined fields at the top of a def-type definition. Covers 324 + /// primitives (with their constraint keys), containers (array, object, 325 + /// union, ref) and unifies them all — anything outside this list on a 326 + /// def-type JSON object is treated as an extension. 327 + const DEF_TYPE_SPEC_FIELDS: &[&str] = &[ 328 + "type", "description", 329 + // Constraint keys (mirror CONSTRAINT_KEYS). 330 + "minLength", "maxLength", "minGraphemes", "maxGraphemes", 331 + "minimum", "maximum", "format", "enum", "knownValues", 332 + "accept", "maxSize", "default", "const", 333 + // Container keys. 334 + "items", "properties", "required", "nullable", 335 + "refs", "closed", "ref", 336 + ]; 337 + 338 + /// Build a `self {}` item from the top-level JSON, or `None` when 339 + /// there's nothing to emit (no description, no unknown fields). Docs 340 + /// come from top-level `description`; extension fields become `@const` 341 + /// annotations. 342 + fn render_self_item(json: &Value, ctx: &ConversionContext) -> Option<String> { 343 + let obj = json.as_object()?; 344 + 345 + let description = obj.get("description").and_then(|v| v.as_str()).unwrap_or(""); 346 + let has_extension = obj 347 + .keys() 348 + .any(|k| !TOP_LEVEL_SPEC_FIELDS.contains(&k.as_str())); 349 + 350 + if description.is_empty() && !has_extension { 351 + return None; 352 + } 353 + 354 + let mut out = String::new(); 355 + for line in description.lines() { 356 + out.push_str("/// "); 357 + out.push_str(line); 358 + out.push('\n'); 359 + } 360 + for (key, value) in obj { 361 + if TOP_LEVEL_SPEC_FIELDS.contains(&key.as_str()) { 362 + continue; 363 + } 364 + warn_if_reference_shaped(ctx, key, value); 365 + out.push_str(&format!( 366 + "@const(\"{}\", {})\n", 367 + escape_string_for_mlf(key), 368 + render_json_as_mlf_literal(value) 369 + )); 370 + } 371 + out.push_str("self {}\n"); 372 + Some(out) 373 + } 374 + 375 + /// Heuristic: warn when a `@const` string value contains `#`, since that's 376 + /// the ATProto local-ref shape and the author may have intended `@reference`. 377 + /// The converter can't know intent from JSON, so it always emits `@const`; 378 + /// the warning nudges hand-review. 379 + fn warn_if_reference_shaped(ctx: &ConversionContext, key: &str, value: &Value) { 380 + let Value::String(s) = value else { return }; 381 + if !s.contains('#') { 382 + return; 383 + } 384 + ctx.warn(format!( 385 + "extension field {:?} has value {:?} which looks NSID-shaped; \ 386 + emitted as `@const` — consider `@reference` if you intend workspace \ 387 + name resolution when hand-editing the MLF", 388 + key, s 389 + )); 390 + } 391 + 392 + /// Emit a placeholder `def type X = unknown;` with `@const` annotations 393 + /// for every field — used when the def's `type` isn't in our known 394 + /// set. Keeps the lexicon's shape roundtrippable without a dedicated 395 + /// grammar entry. 396 + fn render_unknown_def_passthrough( 397 + name: &str, 398 + def: &Value, 399 + last_segment: &str, 400 + ctx: &ConversionContext, 401 + ) -> String { 402 + let obj = match def.as_object() { 403 + Some(o) => o, 404 + None => return format!("def type {} = unknown;\n", escape_name(name)), 405 + }; 406 + let mut out = String::new(); 407 + if let Some(description) = obj.get("description").and_then(|v| v.as_str()) { 408 + if !description.is_empty() { 409 + for line in description.lines() { 410 + out.push_str("/// "); 411 + out.push_str(line); 412 + out.push('\n'); 413 + } 414 + } 415 + } 416 + if name == "main" { 417 + out.push_str("@main\n"); 418 + } 419 + for (key, value) in obj { 420 + // `description` surfaced as the doc-comment block already; don't 421 + // double-emit as `@const`. Every other field — including `type` 422 + // itself — passes through as an annotation so the lexicon's 423 + // shape is preserved verbatim. 424 + if key == "description" { 425 + continue; 426 + } 427 + warn_if_reference_shaped(ctx, key, value); 428 + out.push_str(&format!( 429 + "@const(\"{}\", {})\n", 430 + escape_string_for_mlf(key), 431 + render_json_as_mlf_literal(value) 432 + )); 433 + } 434 + let def_name = if name == "main" { 435 + escape_name(last_segment) 436 + } else { 437 + escape_name(name) 438 + }; 439 + out.push_str(&format!("def type {} = unknown;\n", def_name)); 440 + out 441 + } 442 + 443 + /// Emit `@const(key, value)` annotation lines for every field on `def` 444 + /// that isn't listed in `spec_fields`. Each generator calls this after 445 + /// emitting `@main` (if applicable) and before the declaration line, 446 + /// so vendor extensions carry through in the same position the codegen 447 + /// expects to find them when emitting JSON back. 448 + fn render_extension_annotations( 449 + def: &Value, 450 + spec_fields: &[&str], 451 + ctx: &ConversionContext, 452 + ) -> String { 453 + let Some(obj) = def.as_object() else { 454 + return String::new(); 455 + }; 456 + let mut out = String::new(); 457 + for (key, value) in obj { 458 + if spec_fields.contains(&key.as_str()) { 459 + continue; 460 + } 461 + warn_if_reference_shaped(ctx, key, value); 462 + out.push_str(&format!( 463 + "@const(\"{}\", {})\n", 464 + escape_string_for_mlf(key), 465 + render_json_as_mlf_literal(value) 466 + )); 467 + } 468 + out 469 + } 470 + 471 + /// Render a JSON value as MLF source text suitable for use as an 472 + /// annotation-value literal (the second arg of `@const`). Handles every 473 + /// JSON shape; strings are quoted and escaped, objects use the 474 + /// `{ "key": value, ... }` form. 475 + fn render_json_as_mlf_literal(value: &Value) -> String { 476 + match value { 477 + Value::Null => "null".to_string(), 478 + Value::Bool(b) => b.to_string(), 479 + Value::String(s) => format!("\"{}\"", escape_string_for_mlf(s)), 480 + Value::Number(n) => { 481 + if let Some(i) = n.as_i64() { 482 + i.to_string() 483 + } else if let Some(f) = n.as_f64() { 484 + f.to_string() 485 + } else { 486 + "null".to_string() 487 + } 488 + } 489 + Value::Array(items) => { 490 + let rendered: Vec<String> = items.iter().map(render_json_as_mlf_literal).collect(); 491 + format!("[{}]", rendered.join(", ")) 492 + } 493 + Value::Object(map) => { 494 + let rendered: Vec<String> = map 495 + .iter() 496 + .map(|(k, v)| { 497 + format!( 498 + "\"{}\": {}", 499 + escape_string_for_mlf(k), 500 + render_json_as_mlf_literal(v) 501 + ) 502 + }) 503 + .collect(); 504 + format!("{{ {} }}", rendered.join(", ")) 505 + } 506 + } 507 + } 508 + 509 + fn escape_string_for_mlf(s: &str) -> String { 510 + s.replace('\\', "\\\\").replace('"', "\\\"") 511 + } 512 + 302 513 struct ConversionContext { 303 514 current_namespace: String, 304 515 /// MLF-side name of this lexicon's main def. ··· 365 576 output.push_str("@main\n"); 366 577 } 367 578 579 + output.push_str(&render_extension_annotations(def, RECORD_SPEC_FIELDS, ctx)); 580 + 368 581 // Use last segment of NSID for "main" definitions 369 582 let record_name = if name == "main" { 370 583 escape_name(&ctx.local_main_name) ··· 431 644 if name == "main" { 432 645 output.push_str("@main\n"); 433 646 } 647 + 648 + output.push_str(&render_extension_annotations(def, QUERY_SPEC_FIELDS, ctx)); 434 649 435 650 let query_name = if name == "main" { 436 651 escape_name(&ctx.local_main_name) ··· 526 741 output.push_str("@main\n"); 527 742 } 528 743 744 + output.push_str(&render_extension_annotations(def, PROCEDURE_SPEC_FIELDS, ctx)); 745 + 529 746 let procedure_name = if name == "main" { 530 747 escape_name(&ctx.local_main_name) 531 748 } else { ··· 625 842 output.push_str("@main\n"); 626 843 } 627 844 845 + output.push_str(&render_extension_annotations(def, SUBSCRIPTION_SPEC_FIELDS, ctx)); 846 + 628 847 let subscription_name = if name == "main" { 629 848 escape_name(&ctx.local_main_name) 630 849 } else { ··· 680 899 Ok(output) 681 900 } 682 901 683 - fn generate_token(name: &str, def: &Value) -> Result<String, MlfGenerateError> { 902 + fn generate_token( 903 + name: &str, 904 + def: &Value, 905 + ctx: &ConversionContext, 906 + ) -> Result<String, MlfGenerateError> { 684 907 let mut output = String::new(); 685 908 686 909 // Add doc comment ··· 691 914 } 692 915 } 693 916 } 917 + 918 + output.push_str(&render_extension_annotations(def, TOKEN_SPEC_FIELDS, ctx)); 694 919 695 920 let escaped_name = escape_name(name); 696 921 output.push_str(&format!("token {};\n", escaped_name)); ··· 713 938 if name == "main" { 714 939 output.push_str("@main\n"); 715 940 } 941 + 942 + output.push_str(&render_extension_annotations(def, DEF_TYPE_SPEC_FIELDS, ctx)); 716 943 717 944 // Use last segment of NSID for "main" definitions 718 945 // Keywords are now allowed by the parser, so just escape with backticks
-941
mlf-cli/src/generate/mlf.rs.backup
··· 1 - use miette::Diagnostic; 2 - use serde_json::Value; 3 - use std::path::PathBuf; 4 - use thiserror::Error; 5 - 6 - #[derive(Error, Debug, Diagnostic)] 7 - pub enum MlfGenerateError { 8 - #[error("Failed to read file: {path}")] 9 - #[diagnostic(code(mlf::generate::read_file))] 10 - #[allow(dead_code)] 11 - ReadFile { 12 - path: String, 13 - #[source] 14 - source: std::io::Error, 15 - }, 16 - 17 - #[error("Failed to parse JSON: {path}")] 18 - #[diagnostic(code(mlf::generate::parse_json))] 19 - #[allow(dead_code)] 20 - ParseJson { 21 - path: String, 22 - #[source] 23 - source: serde_json::Error, 24 - }, 25 - 26 - #[error("Failed to write output: {path}")] 27 - #[diagnostic(code(mlf::generate::write_output))] 28 - WriteOutput { 29 - path: String, 30 - #[source] 31 - source: std::io::Error, 32 - }, 33 - 34 - #[error("Invalid lexicon format: {message}")] 35 - #[diagnostic(code(mlf::generate::invalid_lexicon))] 36 - InvalidLexicon { message: String }, 37 - 38 - #[error("Failed to expand glob pattern")] 39 - #[diagnostic(code(mlf::generate::glob_error))] 40 - GlobError { 41 - #[source] 42 - source: glob::GlobError, 43 - }, 44 - 45 - #[error("Invalid glob pattern: {pattern}")] 46 - #[diagnostic(code(mlf::generate::invalid_glob))] 47 - InvalidGlob { 48 - pattern: String, 49 - #[source] 50 - source: glob::PatternError, 51 - }, 52 - } 53 - 54 - pub fn run(input_patterns: Vec<String>, output_dir: PathBuf) -> Result<(), MlfGenerateError> { 55 - let mut file_paths = Vec::new(); 56 - 57 - for pattern in input_patterns { 58 - if pattern.contains('*') || pattern.contains('?') { 59 - for entry in glob::glob(&pattern).map_err(|source| MlfGenerateError::InvalidGlob { 60 - pattern: pattern.clone(), 61 - source, 62 - })? { 63 - let path = entry.map_err(|source| MlfGenerateError::GlobError { source })?; 64 - file_paths.push(path); 65 - } 66 - } else { 67 - file_paths.push(PathBuf::from(pattern)); 68 - } 69 - } 70 - 71 - std::fs::create_dir_all(&output_dir).map_err(|source| MlfGenerateError::WriteOutput { 72 - path: output_dir.display().to_string(), 73 - source, 74 - })?; 75 - 76 - let mut errors = Vec::new(); 77 - let mut success_count = 0; 78 - 79 - for file_path in file_paths { 80 - let source = match std::fs::read_to_string(&file_path) { 81 - Ok(s) => s, 82 - Err(source) => { 83 - errors.push(( 84 - file_path.display().to_string(), 85 - format!("Failed to read file: {}", source), 86 - )); 87 - continue; 88 - } 89 - }; 90 - 91 - let json: Value = match serde_json::from_str(&source) { 92 - Ok(j) => j, 93 - Err(source) => { 94 - errors.push(( 95 - file_path.display().to_string(), 96 - format!("Failed to parse JSON: {}", source), 97 - )); 98 - continue; 99 - } 100 - }; 101 - 102 - let mlf_content = match generate_mlf_from_json(&json) { 103 - Ok(content) => content, 104 - Err(e) => { 105 - errors.push((file_path.display().to_string(), format!("{:?}", e))); 106 - continue; 107 - } 108 - }; 109 - 110 - // Extract namespace from JSON "id" field 111 - let namespace = json 112 - .get("id") 113 - .and_then(|v| v.as_str()) 114 - .ok_or_else(|| MlfGenerateError::InvalidLexicon { 115 - message: "Missing 'id' field in lexicon".to_string(), 116 - })?; 117 - 118 - // Create output path from namespace 119 - let mut output_path = output_dir.clone(); 120 - for segment in namespace.split('.') { 121 - output_path.push(segment); 122 - } 123 - if let Err(source) = std::fs::create_dir_all(&output_path.parent().unwrap()) { 124 - errors.push(( 125 - file_path.display().to_string(), 126 - format!("Failed to create directory: {}", source), 127 - )); 128 - continue; 129 - } 130 - output_path.set_extension("mlf"); 131 - 132 - if let Err(source) = std::fs::write(&output_path, mlf_content) { 133 - errors.push(( 134 - output_path.display().to_string(), 135 - format!("Failed to write file: {}", source), 136 - )); 137 - continue; 138 - } 139 - 140 - println!("Generated: {}", output_path.display()); 141 - success_count += 1; 142 - } 143 - 144 - if !errors.is_empty() { 145 - eprintln!( 146 - "\n{} file(s) generated successfully, {} error(s) encountered:\n", 147 - success_count, 148 - errors.len() 149 - ); 150 - for (path, error) in &errors { 151 - eprintln!(" {} - {}", path, error); 152 - } 153 - eprintln!(); 154 - return Err(MlfGenerateError::InvalidLexicon { 155 - message: format!("{} errors total", errors.len()), 156 - }); 157 - } 158 - 159 - println!("\nSuccessfully generated {} file(s)", success_count); 160 - Ok(()) 161 - } 162 - 163 - pub fn generate_mlf_from_json(json: &Value) -> Result<String, MlfGenerateError> { 164 - let mut output = String::new(); 165 - 166 - // Extract NSID to get the last segment for "main" definitions 167 - let nsid = json 168 - .get("id") 169 - .and_then(|v| v.as_str()) 170 - .ok_or_else(|| MlfGenerateError::InvalidLexicon { 171 - message: "Missing 'id' field in lexicon".to_string(), 172 - })?; 173 - 174 - let last_segment = nsid.split('.').last().unwrap_or("main"); 175 - 176 - let defs = json.get("defs").and_then(|v| v.as_object()).ok_or_else(|| { 177 - MlfGenerateError::InvalidLexicon { 178 - message: "Missing or invalid 'defs' field".to_string(), 179 - } 180 - })?; 181 - 182 - // Create a context to pass the current namespace to type generation 183 - let ctx = ConversionContext { 184 - current_namespace: nsid.to_string(), 185 - }; 186 - 187 - // Process all definitions 188 - for (name, def) in defs { 189 - let def_type = def.get("type").and_then(|v| v.as_str()).ok_or_else(|| { 190 - MlfGenerateError::InvalidLexicon { 191 - message: format!("Missing 'type' field for definition '{}'", name), 192 - } 193 - })?; 194 - 195 - match def_type { 196 - "record" => { 197 - let mlf = generate_record(name, def, last_segment, &ctx)?; 198 - output.push_str(&mlf); 199 - output.push('\n'); 200 - } 201 - "query" => { 202 - let mlf = generate_query(name, def, last_segment, &ctx)?; 203 - output.push_str(&mlf); 204 - output.push('\n'); 205 - } 206 - "procedure" => { 207 - let mlf = generate_procedure(name, def, last_segment, &ctx)?; 208 - output.push_str(&mlf); 209 - output.push('\n'); 210 - } 211 - "subscription" => { 212 - let mlf = generate_subscription(name, def, last_segment, &ctx)?; 213 - output.push_str(&mlf); 214 - output.push('\n'); 215 - } 216 - "token" => { 217 - let mlf = generate_token(name, def)?; 218 - output.push_str(&mlf); 219 - output.push('\n'); 220 - } 221 - "object" => { 222 - let mlf = generate_def_type(name, def, last_segment, &ctx)?; 223 - output.push_str(&mlf); 224 - output.push('\n'); 225 - } 226 - _ => { 227 - // Unknown type, skip 228 - } 229 - } 230 - } 231 - 232 - Ok(output) 233 - } 234 - 235 - struct ConversionContext { 236 - current_namespace: String, 237 - } 238 - 239 - /// Reserved words in MLF that need to be escaped 240 - const RESERVED_WORDS: &[&str] = &[ 241 - "main", "record", "query", "procedure", "subscription", "token", "def", "type", "use", 242 - "pub", "alias", "namespace", "constrained", "error", "unit", "null", "boolean", 243 - "integer", "string", "bytes", "blob", "unknown", "array", "object", "union", "ref", 244 - ]; 245 - 246 - /// Escape a name if it's a reserved word 247 - fn escape_name(name: &str) -> String { 248 - if RESERVED_WORDS.contains(&name) { 249 - format!("`{}`", name) 250 - } else { 251 - name.to_string() 252 - } 253 - } 254 - 255 - fn generate_record(name: &str, def: &Value, last_segment: &str, ctx: &ConversionContext) -> Result<String, MlfGenerateError> { 256 - let mut output = String::new(); 257 - 258 - // Add doc comment if present 259 - if let Some(desc) = def.get("description").and_then(|v| v.as_str()) { 260 - if !desc.is_empty() { 261 - for line in desc.lines() { 262 - output.push_str(&format!("/// {}\n", line)); 263 - } 264 - } 265 - } 266 - 267 - // Add @main annotation for "main" definitions 268 - if name == "main" { 269 - output.push_str("@main\n"); 270 - } 271 - 272 - // Use last segment of NSID for "main" definitions 273 - let record_name = if name == "main" { 274 - escape_name(last_segment) 275 - } else { 276 - escape_name(name) 277 - }; 278 - 279 - output.push_str(&format!("record {} {{\n", record_name)); 280 - 281 - // Get the record object 282 - let record_obj = def.get("record").and_then(|v| v.as_object()).ok_or_else(|| { 283 - MlfGenerateError::InvalidLexicon { 284 - message: format!("Missing 'record' field in record definition '{}'", name), 285 - } 286 - })?; 287 - 288 - let properties = record_obj 289 - .get("properties") 290 - .and_then(|v| v.as_object()) 291 - .ok_or_else(|| MlfGenerateError::InvalidLexicon { 292 - message: format!("Missing 'properties' in record '{}'", name), 293 - })?; 294 - 295 - let required = record_obj 296 - .get("required") 297 - .and_then(|v| v.as_array()) 298 - .map(|arr| { 299 - arr.iter() 300 - .filter_map(|v| v.as_str()) 301 - .collect::<Vec<_>>() 302 - }) 303 - .unwrap_or_default(); 304 - 305 - for (field_name, field_def) in properties { 306 - // Add field doc comment 307 - if let Some(desc) = field_def.get("description").and_then(|v| v.as_str()) { 308 - if !desc.is_empty() { 309 - for line in desc.lines() { 310 - output.push_str(&format!(" /// {}\n", line)); 311 - } 312 - } 313 - } 314 - 315 - let is_required = required.contains(&field_name.as_str()); 316 - let required_marker = if is_required { "!" } else { "" }; 317 - 318 - let field_type = generate_type(field_def)?; 319 - let escaped_field_name = escape_name(field_name); 320 - output.push_str(&format!( 321 - " {}{}: {},\n", 322 - escaped_field_name, required_marker, field_type 323 - )); 324 - } 325 - 326 - output.push_str("}\n"); 327 - Ok(output) 328 - } 329 - 330 - fn generate_query(name: &str, def: &Value, last_segment: &str, ctx: &ConversionContext) -> Result<String, MlfGenerateError> { 331 - let mut output = String::new(); 332 - 333 - // Add doc comment 334 - if let Some(desc) = def.get("description").and_then(|v| v.as_str()) { 335 - if !desc.is_empty() { 336 - for line in desc.lines() { 337 - output.push_str(&format!("/// {}\n", line)); 338 - } 339 - } 340 - } 341 - 342 - // Add @main annotation for "main" definitions 343 - if name == "main" { 344 - output.push_str("@main\n"); 345 - } 346 - 347 - let query_name = if name == "main" { 348 - escape_name(last_segment) 349 - } else { 350 - escape_name(name) 351 - }; 352 - output.push_str(&format!("query {}", query_name)); 353 - 354 - // Parameters 355 - output.push('('); 356 - if let Some(params) = def.get("parameters").and_then(|v| v.as_object()) { 357 - let properties = params.get("properties").and_then(|v| v.as_object()); 358 - let required = params 359 - .get("required") 360 - .and_then(|v| v.as_array()) 361 - .map(|arr| { 362 - arr.iter() 363 - .filter_map(|v| v.as_str()) 364 - .collect::<Vec<_>>() 365 - }) 366 - .unwrap_or_default(); 367 - 368 - if let Some(props) = properties { 369 - let param_strs: Vec<String> = props 370 - .iter() 371 - .map(|(param_name, param_def)| { 372 - let is_required = required.contains(&param_name.as_str()); 373 - let required_marker = if is_required { "!" } else { "" }; 374 - let param_type = generate_type(param_def).unwrap_or_else(|_| "unknown".to_string()); 375 - let escaped_param_name = escape_name(param_name); 376 - 377 - // Add doc comment inline if present 378 - let mut result = String::new(); 379 - if let Some(desc) = param_def.get("description").and_then(|v| v.as_str()) { 380 - if !desc.is_empty() { 381 - result.push_str(&format!("\n /// {}\n ", desc)); 382 - } 383 - } 384 - result.push_str(&format!("{}{}: {}", escaped_param_name, required_marker, param_type)); 385 - result 386 - }) 387 - .collect(); 388 - 389 - if !param_strs.is_empty() { 390 - output.push_str(&param_strs.join(",")); 391 - } 392 - } 393 - } 394 - output.push(')'); 395 - 396 - // Output type 397 - if let Some(output_obj) = def.get("output").and_then(|v| v.as_object()) { 398 - if let Some(schema) = output_obj.get("schema") { 399 - let return_type = generate_type(schema)?; 400 - output.push_str(&format!(": {}", return_type)); 401 - 402 - // Check for errors 403 - if let Some(errors) = output_obj.get("errors").and_then(|v| v.as_object()) { 404 - output.push_str(" | error {\n"); 405 - for (error_name, error_def) in errors { 406 - if let Some(desc) = error_def.get("description").and_then(|v| v.as_str()) { 407 - if !desc.is_empty() { 408 - output.push_str(&format!(" /// {}\n", desc)); 409 - } 410 - } 411 - output.push_str(&format!(" {},\n", error_name)); 412 - } 413 - output.push('}'); 414 - } 415 - } 416 - } 417 - 418 - output.push_str(";\n"); 419 - Ok(output) 420 - } 421 - 422 - fn generate_procedure(name: &str, def: &Value, last_segment: &str, ctx: &ConversionContext) -> Result<String, MlfGenerateError> { 423 - let mut output = String::new(); 424 - 425 - // Add doc comment 426 - if let Some(desc) = def.get("description").and_then(|v| v.as_str()) { 427 - if !desc.is_empty() { 428 - for line in desc.lines() { 429 - output.push_str(&format!("/// {}\n", line)); 430 - } 431 - } 432 - } 433 - 434 - // Add @main annotation for "main" definitions 435 - if name == "main" { 436 - output.push_str("@main\n"); 437 - } 438 - 439 - let procedure_name = if name == "main" { 440 - escape_name(last_segment) 441 - } else { 442 - escape_name(name) 443 - }; 444 - output.push_str(&format!("procedure {}", procedure_name)); 445 - 446 - // Input parameters 447 - output.push('('); 448 - if let Some(input) = def.get("input").and_then(|v| v.as_object()) { 449 - if let Some(schema) = input.get("schema").and_then(|v| v.as_object()) { 450 - let properties = schema.get("properties").and_then(|v| v.as_object()); 451 - let required = schema 452 - .get("required") 453 - .and_then(|v| v.as_array()) 454 - .map(|arr| { 455 - arr.iter() 456 - .filter_map(|v| v.as_str()) 457 - .collect::<Vec<_>>() 458 - }) 459 - .unwrap_or_default(); 460 - 461 - if let Some(props) = properties { 462 - let param_strs: Vec<String> = props 463 - .iter() 464 - .map(|(param_name, param_def)| { 465 - let is_required = required.contains(&param_name.as_str()); 466 - let required_marker = if is_required { "!" } else { "" }; 467 - let param_type = 468 - generate_type(param_def).unwrap_or_else(|_| "unknown".to_string()); 469 - let escaped_param_name = escape_name(param_name); 470 - 471 - // Add doc comment inline if present 472 - let mut result = String::new(); 473 - if let Some(desc) = param_def.get("description").and_then(|v| v.as_str()) { 474 - if !desc.is_empty() { 475 - result.push_str(&format!("\n /// {}\n ", desc)); 476 - } 477 - } 478 - result.push_str(&format!( 479 - "{}{}: {}", 480 - escaped_param_name, required_marker, param_type 481 - )); 482 - result 483 - }) 484 - .collect(); 485 - 486 - if !param_strs.is_empty() { 487 - output.push_str(&param_strs.join(",")); 488 - } 489 - } 490 - } 491 - } 492 - output.push(')'); 493 - 494 - // Output type 495 - if let Some(output_obj) = def.get("output").and_then(|v| v.as_object()) { 496 - if let Some(schema) = output_obj.get("schema") { 497 - let return_type = generate_type(schema)?; 498 - output.push_str(&format!(": {}", return_type)); 499 - 500 - // Check for errors 501 - if let Some(errors) = output_obj.get("errors").and_then(|v| v.as_object()) { 502 - output.push_str(" | error {\n"); 503 - for (error_name, error_def) in errors { 504 - if let Some(desc) = error_def.get("description").and_then(|v| v.as_str()) { 505 - if !desc.is_empty() { 506 - output.push_str(&format!(" /// {}\n", desc)); 507 - } 508 - } 509 - output.push_str(&format!(" {},\n", error_name)); 510 - } 511 - output.push('}'); 512 - } 513 - } 514 - } 515 - 516 - output.push_str(";\n"); 517 - Ok(output) 518 - } 519 - 520 - fn generate_subscription(name: &str, def: &Value, last_segment: &str, ctx: &ConversionContext) -> Result<String, MlfGenerateError> { 521 - let mut output = String::new(); 522 - 523 - // Add doc comment 524 - if let Some(desc) = def.get("description").and_then(|v| v.as_str()) { 525 - if !desc.is_empty() { 526 - for line in desc.lines() { 527 - output.push_str(&format!("/// {}\n", line)); 528 - } 529 - } 530 - } 531 - 532 - // Add @main annotation for "main" definitions 533 - if name == "main" { 534 - output.push_str("@main\n"); 535 - } 536 - 537 - let subscription_name = if name == "main" { 538 - escape_name(last_segment) 539 - } else { 540 - escape_name(name) 541 - }; 542 - output.push_str(&format!("subscription {}", subscription_name)); 543 - 544 - // Parameters 545 - output.push('('); 546 - if let Some(params) = def.get("parameters").and_then(|v| v.as_object()) { 547 - let properties = params.get("properties").and_then(|v| v.as_object()); 548 - let required = params 549 - .get("required") 550 - .and_then(|v| v.as_array()) 551 - .map(|arr| { 552 - arr.iter() 553 - .filter_map(|v| v.as_str()) 554 - .collect::<Vec<_>>() 555 - }) 556 - .unwrap_or_default(); 557 - 558 - if let Some(props) = properties { 559 - let param_strs: Vec<String> = props 560 - .iter() 561 - .map(|(param_name, param_def)| { 562 - let is_required = required.contains(&param_name.as_str()); 563 - let required_marker = if is_required { "!" } else { "" }; 564 - let param_type = generate_type(param_def).unwrap_or_else(|_| "unknown".to_string()); 565 - let escaped_param_name = escape_name(param_name); 566 - 567 - format!("{}{}: {}", escaped_param_name, required_marker, param_type) 568 - }) 569 - .collect(); 570 - 571 - if !param_strs.is_empty() { 572 - output.push_str(&param_strs.join(", ")); 573 - } 574 - } 575 - } 576 - output.push(')'); 577 - 578 - // Message types 579 - if let Some(message) = def.get("message").and_then(|v| v.as_object()) { 580 - if let Some(schema) = message.get("schema") { 581 - let message_type = generate_type(schema)?; 582 - output.push_str(&format!(": {}", message_type)); 583 - } 584 - } 585 - 586 - output.push_str(";\n"); 587 - Ok(output) 588 - } 589 - 590 - fn generate_token(name: &str, def: &Value) -> Result<String, MlfGenerateError> { 591 - let mut output = String::new(); 592 - 593 - // Add doc comment 594 - if let Some(desc) = def.get("description").and_then(|v| v.as_str()) { 595 - if !desc.is_empty() { 596 - for line in desc.lines() { 597 - output.push_str(&format!("/// {}\n", line)); 598 - } 599 - } 600 - } 601 - 602 - let escaped_name = escape_name(name); 603 - output.push_str(&format!("token {};\n", escaped_name)); 604 - Ok(output) 605 - } 606 - 607 - fn generate_def_type(name: &str, def: &Value, last_segment: &str, ctx: &ConversionContext) -> Result<String, MlfGenerateError> { 608 - let mut output = String::new(); 609 - 610 - // Add @main annotation for "main" definitions 611 - if name == "main" { 612 - output.push_str("@main\n"); 613 - } 614 - 615 - // Use last segment of NSID for "main" definitions 616 - let def_name = if name == "main" { 617 - escape_name(last_segment) 618 - } else { 619 - escape_name(name) 620 - }; 621 - 622 - output.push_str(&format!("def type {} = ", def_name)); 623 - let type_str = generate_type_with_indent(def, 0)?; 624 - output.push_str(&type_str); 625 - output.push_str(";\n"); 626 - 627 - Ok(output) 628 - } 629 - 630 - fn generate_type_with_indent(type_def: &Value, indent_level: usize, ctx: &ConversionContext) -> Result<String, MlfGenerateError> { 631 - let type_name = type_def.get("type").and_then(|v| v.as_str()); 632 - 633 - match type_name { 634 - Some("object") => { 635 - let indent = " ".repeat(indent_level); 636 - let field_indent = " ".repeat(indent_level + 1); 637 - 638 - let mut output = String::from("{\n"); 639 - let properties = type_def 640 - .get("properties") 641 - .and_then(|v| v.as_object()) 642 - .ok_or_else(|| MlfGenerateError::InvalidLexicon { 643 - message: "Missing 'properties' in object type".to_string(), 644 - })?; 645 - 646 - let required = type_def 647 - .get("required") 648 - .and_then(|v| v.as_array()) 649 - .map(|arr| { 650 - arr.iter() 651 - .filter_map(|v| v.as_str()) 652 - .collect::<Vec<_>>() 653 - }) 654 - .unwrap_or_default(); 655 - 656 - for (field_name, field_def) in properties { 657 - // Add field doc comment 658 - if let Some(desc) = field_def.get("description").and_then(|v| v.as_str()) { 659 - if !desc.is_empty() { 660 - for line in desc.lines() { 661 - output.push_str(&format!("{}/// {}\n", field_indent, line)); 662 - } 663 - } 664 - } 665 - 666 - let is_required = required.contains(&field_name.as_str()); 667 - let required_marker = if is_required { "!" } else { "" }; 668 - let field_type = generate_type_with_indent(field_def, indent_level + 1)?; 669 - let escaped_field_name = escape_name(field_name); 670 - output.push_str(&format!( 671 - "{}{}{}: {},\n", 672 - field_indent, escaped_field_name, required_marker, field_type 673 - )); 674 - } 675 - 676 - output.push_str(&format!("{}}}", indent)); 677 - Ok(output) 678 - } 679 - _ => generate_type(type_def), 680 - } 681 - } 682 - 683 - fn generate_type(type_def: &Value, ctx: &ConversionContext) -> Result<String, MlfGenerateError> { 684 - let type_name = type_def.get("type").and_then(|v| v.as_str()); 685 - 686 - match type_name { 687 - Some("null") => Ok("null".to_string()), 688 - Some("boolean") => Ok("boolean".to_string()), 689 - Some("integer") => { 690 - let mut result = "integer".to_string(); 691 - result = apply_constraints(result, type_def); 692 - Ok(result) 693 - } 694 - Some("string") => { 695 - // Check if this is a format string that maps to a prelude type 696 - if let Some(format) = type_def.get("format").and_then(|v| v.as_str()) { 697 - let prelude_type = match format { 698 - "did" => "Did", 699 - "at-uri" => "AtUri", 700 - "at-identifier" => "AtIdentifier", 701 - "handle" => "Handle", 702 - "datetime" => "Datetime", 703 - "uri" => "Uri", 704 - "cid" => "Cid", 705 - "nsid" => "Nsid", 706 - "tid" => "Tid", 707 - "record-key" => "RecordKey", 708 - "language" => "Language", 709 - _ => { 710 - // Unknown format, fall through to normal string with constraints 711 - let mut result = "string".to_string(); 712 - result = apply_constraints(result, type_def); 713 - return Ok(result); 714 - } 715 - }; 716 - // If it's a known prelude type with only the format constraint, use the prelude type directly 717 - // Check if there are other constraints besides format 718 - let has_other_constraints = type_def.get("minLength").is_some() 719 - || type_def.get("maxLength").is_some() 720 - || type_def.get("minGraphemes").is_some() 721 - || type_def.get("maxGraphemes").is_some() 722 - || type_def.get("enum").is_some() 723 - || type_def.get("knownValues").is_some() 724 - || type_def.get("default").is_some(); 725 - 726 - if !has_other_constraints { 727 - return Ok(prelude_type.to_string()); 728 - } 729 - } 730 - 731 - let mut result = "string".to_string(); 732 - result = apply_constraints(result, type_def); 733 - Ok(result) 734 - } 735 - Some("bytes") => Ok("bytes".to_string()), 736 - Some("blob") => { 737 - let mut result = "blob".to_string(); 738 - result = apply_constraints(result, type_def); 739 - Ok(result) 740 - } 741 - Some("unknown") => Ok("unknown".to_string()), 742 - Some("array") => { 743 - let items = type_def.get("items").ok_or_else(|| { 744 - MlfGenerateError::InvalidLexicon { 745 - message: "Missing 'items' in array type".to_string(), 746 - } 747 - })?; 748 - 749 - // Check if items have constraints 750 - let items_obj = items.as_object(); 751 - let has_item_constraints = items_obj.map_or(false, |obj| { 752 - obj.contains_key("minLength") || 753 - obj.contains_key("maxLength") || 754 - obj.contains_key("minGraphemes") || 755 - obj.contains_key("maxGraphemes") || 756 - obj.contains_key("minimum") || 757 - obj.contains_key("maximum") || 758 - obj.contains_key("enum") || 759 - obj.contains_key("knownValues") || 760 - obj.contains_key("default") 761 - }); 762 - 763 - let item_type = if has_item_constraints { 764 - // If item has constraints, we need to wrap in parentheses to apply constraints before [] 765 - // For now, just generate the base type without item constraints 766 - // TODO: Consider generating a type alias for complex constrained items 767 - items.get("type") 768 - .and_then(|t| t.as_str()) 769 - .unwrap_or("unknown") 770 - .to_string() 771 - } else { 772 - generate_type(items)? 773 - }; 774 - 775 - let mut result = format!("{}[]", item_type); 776 - result = apply_constraints(result, type_def); 777 - Ok(result) 778 - } 779 - Some("object") => { 780 - let mut output = String::from("{\n"); 781 - let properties = type_def 782 - .get("properties") 783 - .and_then(|v| v.as_object()) 784 - .ok_or_else(|| MlfGenerateError::InvalidLexicon { 785 - message: "Missing 'properties' in object type".to_string(), 786 - })?; 787 - 788 - let required = type_def 789 - .get("required") 790 - .and_then(|v| v.as_array()) 791 - .map(|arr| { 792 - arr.iter() 793 - .filter_map(|v| v.as_str()) 794 - .collect::<Vec<_>>() 795 - }) 796 - .unwrap_or_default(); 797 - 798 - for (field_name, field_def) in properties { 799 - // Add field doc comment 800 - if let Some(desc) = field_def.get("description").and_then(|v| v.as_str()) { 801 - if !desc.is_empty() { 802 - for line in desc.lines() { 803 - output.push_str(&format!(" /// {}\n", line)); 804 - } 805 - } 806 - } 807 - 808 - let is_required = required.contains(&field_name.as_str()); 809 - let required_marker = if is_required { "!" } else { "" }; 810 - let field_type = generate_type(field_def)?; 811 - let escaped_field_name = escape_name(field_name); 812 - output.push_str(&format!( 813 - " {}{}: {},\n", 814 - escaped_field_name, required_marker, field_type 815 - )); 816 - } 817 - 818 - output.push_str(" }"); 819 - Ok(output) 820 - } 821 - Some("union") => { 822 - let refs = type_def.get("refs").and_then(|v| v.as_array()).ok_or_else(|| { 823 - MlfGenerateError::InvalidLexicon { 824 - message: "Missing 'refs' in union type".to_string(), 825 - } 826 - })?; 827 - 828 - let type_strs: Vec<String> = refs 829 - .iter() 830 - .map(|r| generate_type(r).unwrap_or_else(|_| "unknown".to_string())) 831 - .collect(); 832 - 833 - let mut result = type_strs.join(" | "); 834 - 835 - // Check if closed 836 - if type_def.get("closed").and_then(|v| v.as_bool()).unwrap_or(false) { 837 - result.push_str(" | !"); 838 - } 839 - 840 - Ok(result) 841 - } 842 - Some("ref") => { 843 - if let Some(ref_str) = type_def.get("ref").and_then(|v| v.as_str()) { 844 - // Handle references: 845 - // "#defName" -> "defName" (local reference, same file) 846 - // "namespace.id#defName" -> Check if same namespace, if so use "defName", else use full path 847 - 848 - if let Some(stripped) = ref_str.strip_prefix('#') { 849 - // Local reference: #defName -> defName 850 - Ok(stripped.to_string()) 851 - } else if let Some((namespace, def_name)) = ref_str.split_once('#') { 852 - // Check if this is the current namespace 853 - // For now, we'll just use the def name if it's the same namespace 854 - // Note: This requires passing context through, which we'll add 855 - // For external refs, we keep the full NSID format 856 - Ok(format!("{}.{}", namespace, def_name)) 857 - } else { 858 - // No # at all - shouldn't happen in valid lexicons, but handle gracefully 859 - Ok(ref_str.to_string()) 860 - } 861 - } else { 862 - Err(MlfGenerateError::InvalidLexicon { 863 - message: "Missing 'ref' in ref type".to_string(), 864 - }) 865 - } 866 - } 867 - _ => Ok("unknown".to_string()), 868 - } 869 - } 870 - 871 - fn apply_constraints(mut type_str: String, type_def: &Value) -> String { 872 - let mut constraints = Vec::new(); 873 - 874 - if let Some(min_length) = type_def.get("minLength").and_then(|v| v.as_i64()) { 875 - constraints.push(format!("minLength: {}", min_length)); 876 - } 877 - if let Some(max_length) = type_def.get("maxLength").and_then(|v| v.as_i64()) { 878 - constraints.push(format!("maxLength: {}", max_length)); 879 - } 880 - if let Some(min_graphemes) = type_def.get("minGraphemes").and_then(|v| v.as_i64()) { 881 - constraints.push(format!("minGraphemes: {}", min_graphemes)); 882 - } 883 - if let Some(max_graphemes) = type_def.get("maxGraphemes").and_then(|v| v.as_i64()) { 884 - constraints.push(format!("maxGraphemes: {}", max_graphemes)); 885 - } 886 - if let Some(minimum) = type_def.get("minimum").and_then(|v| v.as_i64()) { 887 - constraints.push(format!("minimum: {}", minimum)); 888 - } 889 - if let Some(maximum) = type_def.get("maximum").and_then(|v| v.as_i64()) { 890 - constraints.push(format!("maximum: {}", maximum)); 891 - } 892 - if let Some(format) = type_def.get("format").and_then(|v| v.as_str()) { 893 - constraints.push(format!("format: \"{}\"", format)); 894 - } 895 - if let Some(enum_vals) = type_def.get("enum").and_then(|v| v.as_array()) { 896 - let vals: Vec<String> = enum_vals 897 - .iter() 898 - .filter_map(|v| v.as_str()) 899 - .map(|s| format!("\"{}\"", s)) 900 - .collect(); 901 - constraints.push(format!("enum: [{}]", vals.join(", "))); 902 - } 903 - if let Some(known_vals) = type_def.get("knownValues").and_then(|v| v.as_array()) { 904 - let vals: Vec<String> = known_vals 905 - .iter() 906 - .filter_map(|v| v.as_str()) 907 - .map(|s| format!("\"{}\"", s)) 908 - .collect(); 909 - constraints.push(format!("knownValues: [{}]", vals.join(", "))); 910 - } 911 - if let Some(accept) = type_def.get("accept").and_then(|v| v.as_array()) { 912 - let mimes: Vec<String> = accept 913 - .iter() 914 - .filter_map(|v| v.as_str()) 915 - .map(|s| format!("\"{}\"", s)) 916 - .collect(); 917 - constraints.push(format!("accept: [{}]", mimes.join(", "))); 918 - } 919 - if let Some(max_size) = type_def.get("maxSize").and_then(|v| v.as_i64()) { 920 - constraints.push(format!("maxSize: {}", max_size)); 921 - } 922 - if let Some(default) = type_def.get("default") { 923 - let default_str = match default { 924 - Value::String(s) => format!("\"{}\"", s), 925 - Value::Number(n) => n.to_string(), 926 - Value::Bool(b) => b.to_string(), 927 - _ => "null".to_string(), 928 - }; 929 - constraints.push(format!("default: {}", default_str)); 930 - } 931 - 932 - if !constraints.is_empty() { 933 - type_str.push_str(" constrained {\n"); 934 - for constraint in &constraints { 935 - type_str.push_str(&format!(" {},\n", constraint)); 936 - } 937 - type_str.push_str(" }"); 938 - } 939 - 940 - type_str 941 - }
+207 -12
mlf-codegen/src/lib.rs
··· 3 3 use serde_json::{json, Map, Value}; 4 4 use std::collections::HashMap; 5 5 6 + /// A non-fatal advisory emitted during codegen. Mirrors the shape of 7 + /// `ConversionWarning` on the JSON→MLF side so both directions use one 8 + /// structured-warning model. 9 + #[derive(Debug, Clone, PartialEq)] 10 + pub struct CodegenWarning { 11 + /// Namespace of the lexicon that emitted the warning. 12 + pub namespace: String, 13 + /// Human-readable description of what was coerced and why. 14 + pub message: String, 15 + } 16 + 17 + /// Bundle of codegen output: the generated lexicon JSON plus any 18 + /// advisory warnings produced while walking the AST. 19 + #[derive(Debug, Clone)] 20 + pub struct CodegenOutput { 21 + pub json: Value, 22 + pub warnings: Vec<CodegenWarning>, 23 + } 24 + 6 25 // Re-export inventory for macros 7 26 #[doc(hidden)] 8 27 pub use inventory; ··· 112 131 }) 113 132 } 114 133 115 - pub fn generate_lexicon(namespace: &str, lexicon: &Lexicon, workspace: &Workspace) -> Value { 134 + pub fn generate_lexicon(namespace: &str, lexicon: &Lexicon, workspace: &Workspace) -> CodegenOutput { 116 135 let usage_counts = analyze_type_usage(lexicon); 117 136 let eligibility = MainEligibility::for_lexicon(namespace, lexicon); 118 137 119 138 let mut defs = Map::new(); 139 + let mut self_description = String::new(); 140 + let mut self_extensions: Vec<(String, Value)> = Vec::new(); 141 + let mut warnings: Vec<CodegenWarning> = Vec::new(); 120 142 121 143 for item in &lexicon.items { 122 144 match item { 123 145 Item::Record(record) => { 124 - let value = generate_record_json(record, &usage_counts, workspace, namespace); 146 + let mut value = generate_record_json(record, &usage_counts, workspace, namespace); 147 + apply_extension_annotations(&mut value, &record.annotations, workspace, namespace, &mut warnings); 125 148 insert_def(&mut defs, &record.name.name, eligibility.is_main(&record.name.name, &record.annotations), value); 126 149 } 127 150 Item::Query(query) => { 128 - let value = generate_query_json(query, &usage_counts, workspace, namespace); 151 + let mut value = generate_query_json(query, &usage_counts, workspace, namespace); 152 + apply_extension_annotations(&mut value, &query.annotations, workspace, namespace, &mut warnings); 129 153 insert_def(&mut defs, &query.name.name, eligibility.is_main(&query.name.name, &query.annotations), value); 130 154 } 131 155 Item::Procedure(procedure) => { 132 - let value = generate_procedure_json(procedure, &usage_counts, workspace, namespace); 156 + let mut value = generate_procedure_json(procedure, &usage_counts, workspace, namespace); 157 + apply_extension_annotations(&mut value, &procedure.annotations, workspace, namespace, &mut warnings); 133 158 insert_def(&mut defs, &procedure.name.name, eligibility.is_main(&procedure.name.name, &procedure.annotations), value); 134 159 } 135 160 Item::Subscription(subscription) => { 136 - let value = generate_subscription_json(subscription, &usage_counts, workspace, namespace); 161 + let mut value = generate_subscription_json(subscription, &usage_counts, workspace, namespace); 162 + apply_extension_annotations(&mut value, &subscription.annotations, workspace, namespace, &mut warnings); 137 163 insert_def(&mut defs, &subscription.name.name, eligibility.is_main(&subscription.name.name, &subscription.annotations), value); 138 164 } 139 165 Item::DefType(def_type) => { 140 - let value = generate_def_type_json(def_type, &usage_counts, workspace, namespace); 166 + let mut value = generate_def_type_json(def_type, &usage_counts, workspace, namespace); 167 + apply_extension_annotations(&mut value, &def_type.annotations, workspace, namespace, &mut warnings); 141 168 insert_def(&mut defs, &def_type.name.name, eligibility.is_main(&def_type.name.name, &def_type.annotations), value); 142 169 } 143 170 Item::Token(token) => { 144 171 let mut token_obj = Map::new(); 145 172 token_obj.insert("type".to_string(), json!("token")); 146 173 insert_opt_str(&mut token_obj, "description", &extract_docs(&token.docs)); 147 - defs.insert(token.name.name.clone(), Value::Object(token_obj)); 174 + let mut value = Value::Object(token_obj); 175 + apply_extension_annotations(&mut value, &token.annotations, workspace, namespace, &mut warnings); 176 + defs.insert(token.name.name.clone(), value); 177 + } 178 + Item::SelfItem(self_item) => { 179 + // The `self { }` item carries lexicon-level metadata. Its 180 + // docs become the top-level `description`; its extension 181 + // annotations become top-level JSON fields alongside 182 + // `lexicon`, `id`, `defs`. 183 + self_description = extract_docs(&self_item.docs); 184 + self_extensions = collect_extension_fields(&self_item.annotations, workspace, namespace, &mut warnings); 148 185 } 149 186 // Inline types never appear in `defs` — they expand at their point 150 - // of use. Other item kinds (use statements, namespace blocks) are 151 - // structural and not emitted into the lexicon output. 187 + // of use. Use statements are structural and not emitted. 152 188 _ => {} 153 189 } 154 190 } ··· 157 193 root.insert("$type".to_string(), json!("com.atproto.lexicon.schema")); 158 194 root.insert("lexicon".to_string(), json!(1)); 159 195 root.insert("id".to_string(), json!(namespace)); 196 + insert_opt_str(&mut root, "description", &self_description); 197 + for (key, value) in self_extensions { 198 + root.insert(key, value); 199 + } 160 200 root.insert("defs".to_string(), json!(defs)); 161 - Value::Object(root) 201 + CodegenOutput { 202 + json: Value::Object(root), 203 + warnings, 204 + } 205 + } 206 + 207 + /// Extract `@const` / `@reference` annotations into (key, JSON value) 208 + /// pairs. Used by the self-item handling to populate top-level lexicon 209 + /// fields. Annotations of other names (including generator-scoped ones 210 + /// like `@rust:deprecated`) are ignored. 211 + fn collect_extension_fields( 212 + annotations: &[Annotation], 213 + workspace: &Workspace, 214 + current_namespace: &str, 215 + warnings: &mut Vec<CodegenWarning>, 216 + ) -> Vec<(String, Value)> { 217 + annotations 218 + .iter() 219 + .filter_map(|ann| extension_field(ann, workspace, current_namespace, warnings)) 220 + .collect() 221 + } 222 + 223 + /// Mutate `value` (expected to be a JSON object) in place, adding any 224 + /// extension fields derived from `@const` / `@reference` annotations. 225 + /// Non-object values are left untouched. 226 + fn apply_extension_annotations( 227 + value: &mut Value, 228 + annotations: &[Annotation], 229 + workspace: &Workspace, 230 + current_namespace: &str, 231 + warnings: &mut Vec<CodegenWarning>, 232 + ) { 233 + let Some(obj) = value.as_object_mut() else { 234 + return; 235 + }; 236 + for (key, field_value) in collect_extension_fields(annotations, workspace, current_namespace, warnings) { 237 + obj.insert(key, field_value); 238 + } 239 + } 240 + 241 + /// If `annotation` is a recognised extension (`@const` or `@reference`), 242 + /// return the `(key, JSON value)` it produces. Otherwise `None`. 243 + /// 244 + /// `@const(key, value)` — value is a literal; rendered verbatim. 245 + /// `@reference(key, path)` — path is resolved through the workspace and 246 + /// emitted as an NSID string. 247 + fn extension_field( 248 + annotation: &Annotation, 249 + workspace: &Workspace, 250 + current_namespace: &str, 251 + warnings: &mut Vec<CodegenWarning>, 252 + ) -> Option<(String, Value)> { 253 + if !annotation.selectors.is_empty() { 254 + // Generator-scoped annotation (e.g. `@rust:deprecated`); not an 255 + // extension. 256 + return None; 257 + } 258 + let name = annotation.name.name.as_str(); 259 + let is_const = name == "const"; 260 + let is_reference = name == "reference"; 261 + if !is_const && !is_reference { 262 + return None; 263 + } 264 + // Both forms take exactly two positional args: (key: string, value). 265 + let mut args = annotation.args.iter(); 266 + let key_arg = args.next()?; 267 + let value_arg = args.next()?; 268 + if args.next().is_some() { 269 + return None; 270 + } 271 + let AnnotationArg::Positional(AnnotationValue::String(key)) = key_arg else { 272 + return None; 273 + }; 274 + let AnnotationArg::Positional(value) = value_arg else { 275 + return None; 276 + }; 277 + let json_value = if is_const { 278 + annotation_value_to_json(value, key, current_namespace, warnings) 279 + } else { 280 + // @reference: expect a type path; emit the resolved NSID. 281 + let AnnotationValue::Reference(path) = value else { 282 + return None; 283 + }; 284 + Value::String(resolve_ref_nsid(path, workspace, current_namespace)) 285 + }; 286 + Some((key.clone(), json_value)) 287 + } 288 + 289 + /// Literal-to-JSON conversion for `@const` values. Coerces whole-number 290 + /// f64 to i64 (so `@const("revision", 3)` emits `3`, not `3.0`) and 291 + /// transparently stringifies genuinely fractional numbers — ATProto's 292 + /// data model has no floats, so the spec-compliant representation is a 293 + /// string. `key` and `namespace` are used to tag any emitted warnings. 294 + fn annotation_value_to_json( 295 + value: &AnnotationValue, 296 + key: &str, 297 + namespace: &str, 298 + warnings: &mut Vec<CodegenWarning>, 299 + ) -> Value { 300 + match value { 301 + AnnotationValue::String(s) => Value::String(s.clone()), 302 + AnnotationValue::Number(n) => { 303 + if n.is_finite() && n.fract() == 0.0 && *n >= i64::MIN as f64 && *n <= i64::MAX as f64 { 304 + Value::Number(serde_json::Number::from(*n as i64)) 305 + } else if n.is_finite() { 306 + warnings.push(CodegenWarning { 307 + namespace: namespace.to_string(), 308 + message: format!( 309 + "@const({:?}, {}): ATProto's data model has no floats; \ 310 + emitting as string {:?} to stay spec-compliant", 311 + key, n, n.to_string() 312 + ), 313 + }); 314 + Value::String(n.to_string()) 315 + } else { 316 + warnings.push(CodegenWarning { 317 + namespace: namespace.to_string(), 318 + message: format!( 319 + "@const({:?}, {}): non-finite number is not representable in JSON; \ 320 + emitting `null`", 321 + key, n 322 + ), 323 + }); 324 + Value::Null 325 + } 326 + } 327 + AnnotationValue::Boolean(b) => Value::Bool(*b), 328 + AnnotationValue::Null => Value::Null, 329 + AnnotationValue::Array(items) => { 330 + Value::Array( 331 + items 332 + .iter() 333 + .map(|item| annotation_value_to_json(item, key, namespace, warnings)) 334 + .collect(), 335 + ) 336 + } 337 + AnnotationValue::Object(entries) => { 338 + let mut obj = Map::new(); 339 + for (entry_key, entry_value) in entries { 340 + obj.insert( 341 + entry_key.clone(), 342 + annotation_value_to_json(entry_value, key, namespace, warnings), 343 + ); 344 + } 345 + Value::Object(obj) 346 + } 347 + // A `Reference` shouldn't appear in a `@const` value position — 348 + // it would mean the author wrote a bare identifier where they 349 + // meant a literal. Fall back to emitting the path as-is; the 350 + // parser could also reject this earlier, but defensively handle 351 + // it here. 352 + AnnotationValue::Reference(path) => Value::String(path.to_string()), 353 + } 162 354 } 163 355 164 356 /// Decides whether a given def should be emitted under the key `main` or ··· 874 1066 } 875 1067 876 1068 fn generate(&self, ctx: &GeneratorContext) -> Result<String, String> { 877 - let json = generate_lexicon(ctx.namespace, ctx.lexicon, ctx.workspace); 878 - serde_json::to_string_pretty(&json) 1069 + // The plugin `CodeGenerator` trait doesn't carry a warning 1070 + // channel today; the callers that care about warnings call 1071 + // `generate_lexicon` directly instead of going through this shim. 1072 + let output = generate_lexicon(ctx.namespace, ctx.lexicon, ctx.workspace); 1073 + serde_json::to_string_pretty(&output.json) 879 1074 .map_err(|e| format!("Failed to serialize JSON: {}", e)) 880 1075 } 881 1076 }
+25
mlf-lang/src/ast.rs
··· 38 38 Procedure(Procedure), 39 39 Subscription(Subscription), 40 40 Use(Use), 41 + /// `self { }` — a declaration representing the lexicon itself. Docs 42 + /// and annotations attached to it map to the lexicon's top-level 43 + /// fields in JSON (e.g. `description`, vendor extensions). Body is 44 + /// empty in V1; the `{}` shape is reserved for future contents. 45 + SelfItem(SelfItem), 41 46 } 42 47 43 48 impl Spanned for Item { ··· 51 56 Item::Procedure(p) => p.span, 52 57 Item::Subscription(s) => s.span, 53 58 Item::Use(u) => u.span, 59 + Item::SelfItem(s) => s.span, 54 60 } 55 61 } 56 62 } 57 63 64 + /// The lexicon-as-item. See [`Item::SelfItem`] for the semantic role. 65 + #[derive(Debug, Clone, PartialEq)] 66 + pub struct SelfItem { 67 + pub docs: Vec<DocComment>, 68 + pub annotations: Vec<Annotation>, 69 + pub span: Span, 70 + } 71 + 58 72 /// Documentation comment 59 73 #[derive(Debug, Clone, PartialEq, Eq)] 60 74 pub struct DocComment { ··· 87 101 String(String), 88 102 Number(f64), 89 103 Boolean(bool), 104 + /// JSON `null`. Used by `@const` to represent explicit-null values 105 + /// that appear in source lexicons' extension fields. 106 + Null, 107 + /// JSON array. Element types are freely mixed, matching JSON semantics. 108 + Array(Vec<AnnotationValue>), 109 + /// JSON object literal — a map with string keys. Keys permit any 110 + /// JSON-legal form (including hyphens, e.g. `"x-vendor-flag"`). 111 + Object(Vec<(String, AnnotationValue)>), 112 + /// A type path resolved through the workspace. Used by `@reference` 113 + /// to name an MLF type by path; codegen resolves to an NSID string. 114 + Reference(Path), 90 115 } 91 116 92 117 /// A record definition
+3
mlf-lang/src/lexer.rs
··· 29 29 Procedure, 30 30 Query, 31 31 Record, 32 + SelfKw, 32 33 String, 33 34 Subscription, 34 35 Token, ··· 84 85 Token::Procedure => write!(f, "procedure"), 85 86 Token::Query => write!(f, "query"), 86 87 Token::Record => write!(f, "record"), 88 + Token::SelfKw => write!(f, "self"), 87 89 Token::String => write!(f, "string"), 88 90 Token::Subscription => write!(f, "subscription"), 89 91 Token::Token => write!(f, "token"), ··· 158 160 "procedure" => Token::Procedure, 159 161 "query" => Token::Query, 160 162 "record" => Token::Record, 163 + "self" => Token::SelfKw, 161 164 "string" => Token::String, 162 165 "subscription" => Token::Subscription, 163 166 "token" => Token::Token,
+94
mlf-lang/src/parser.rs
··· 211 211 LexToken::Procedure => self.parse_procedure(doc_comments, annotations), 212 212 LexToken::Subscription => self.parse_subscription(doc_comments, annotations), 213 213 LexToken::Use => self.parse_use(), 214 + LexToken::SelfKw => self.parse_self(doc_comments, annotations), 214 215 _ => Err(ParseError::Syntax { 215 216 message: alloc::format!("Expected item definition, found {}", self.current().token), 216 217 span: self.current().span, 217 218 }), 218 219 } 220 + } 221 + 222 + /// Parse a `self { }` declaration — the lexicon-as-item. Body is 223 + /// required but currently always empty; the `{}` shape is reserved 224 + /// for future contents (see the C6 design doc). 225 + fn parse_self( 226 + &mut self, 227 + docs: Vec<DocComment>, 228 + annotations: Vec<Annotation>, 229 + ) -> Result<Item, ParseError> { 230 + let start = self.expect(LexToken::SelfKw)?; 231 + self.expect(LexToken::LeftBrace)?; 232 + // V1: body must be empty. Allow whitespace/comments (already 233 + // skipped by the tokeniser) but reject any real content. 234 + if !matches!(self.current().token, LexToken::RightBrace) { 235 + return Err(ParseError::Syntax { 236 + message: alloc::format!( 237 + "`self {{}}` body is reserved for future use; expected `}}`, found {}", 238 + self.current().token 239 + ), 240 + span: self.current().span, 241 + }); 242 + } 243 + let end = self.expect(LexToken::RightBrace)?; 244 + 245 + Ok(Item::SelfItem(SelfItem { 246 + docs, 247 + annotations, 248 + span: Span::new(start.start, end.end), 249 + })) 219 250 } 220 251 221 252 fn parse_annotations(&mut self) -> Result<Vec<Annotation>, ParseError> { ··· 326 357 self.advance(); 327 358 Ok(AnnotationValue::Boolean(false)) 328 359 } 360 + LexToken::Null => { 361 + self.advance(); 362 + Ok(AnnotationValue::Null) 363 + } 364 + LexToken::LeftBracket => self.parse_annotation_value_array(), 365 + LexToken::LeftBrace => self.parse_annotation_value_object(), 366 + // Identifier → type reference path. Consumed here so `@reference` 367 + // can carry a named type like `SomeType` or `ns.Foo`. 368 + LexToken::Ident(_) => { 369 + let path = self.parse_path()?; 370 + Ok(AnnotationValue::Reference(path)) 371 + } 329 372 _ => Err(ParseError::Syntax { 330 373 message: alloc::format!("Expected annotation value, found {}", current.token), 331 374 span: current.span, 332 375 }), 333 376 } 377 + } 378 + 379 + /// Parse `[value, value, ...]` as an annotation-value array. Trailing 380 + /// commas are accepted. Empty `[]` is valid. 381 + fn parse_annotation_value_array(&mut self) -> Result<AnnotationValue, ParseError> { 382 + self.expect(LexToken::LeftBracket)?; 383 + let mut items = Vec::new(); 384 + while !matches!(self.current().token, LexToken::RightBracket) { 385 + items.push(self.parse_annotation_value()?); 386 + if matches!(self.current().token, LexToken::Comma) { 387 + self.advance(); 388 + } else { 389 + break; 390 + } 391 + } 392 + self.expect(LexToken::RightBracket)?; 393 + Ok(AnnotationValue::Array(items)) 394 + } 395 + 396 + /// Parse `{ "key": value, ... }` as an annotation-value object. Keys 397 + /// must be string literals (not identifiers) so arbitrary JSON-legal 398 + /// keys like `"x-vendor-flag"` work without additional grammar. 399 + fn parse_annotation_value_object(&mut self) -> Result<AnnotationValue, ParseError> { 400 + self.expect(LexToken::LeftBrace)?; 401 + let mut entries = Vec::new(); 402 + while !matches!(self.current().token, LexToken::RightBrace) { 403 + let current = self.current(); 404 + let key = match &current.token { 405 + LexToken::StringLit(s) => s.clone(), 406 + _ => { 407 + return Err(ParseError::Syntax { 408 + message: alloc::format!( 409 + "Expected string literal as object key, found {}", 410 + current.token 411 + ), 412 + span: current.span, 413 + }); 414 + } 415 + }; 416 + self.advance(); 417 + self.expect(LexToken::Colon)?; 418 + let value = self.parse_annotation_value()?; 419 + entries.push((key, value)); 420 + if matches!(self.current().token, LexToken::Comma) { 421 + self.advance(); 422 + } else { 423 + break; 424 + } 425 + } 426 + self.expect(LexToken::RightBrace)?; 427 + Ok(AnnotationValue::Object(entries)) 334 428 } 335 429 336 430 fn parse_record(&mut self, docs: Vec<DocComment>, annotations: Vec<Annotation>) -> Result<Item, ParseError> {
+6 -6
mlf-lang/src/workspace.rs
··· 1115 1115 Item::Query(q) => Some(q.name.name.as_str()), 1116 1116 Item::Procedure(p) => Some(p.name.name.as_str()), 1117 1117 Item::Subscription(s) => Some(s.name.name.as_str()), 1118 - Item::Use(_) => None, 1118 + Item::Use(_) | Item::SelfItem(_) => None, 1119 1119 }; 1120 1120 1121 1121 if let Some(name) = name { ··· 1136 1136 Item::Query(q) => q.name.span, 1137 1137 Item::Procedure(p) => p.name.span, 1138 1138 Item::Subscription(s) => s.name.span, 1139 - Item::Use(_) => continue, 1139 + Item::Use(_) | Item::SelfItem(_) => continue, 1140 1140 }; 1141 1141 errors.push(crate::error::ValidationError::ReservedName { 1142 1142 name: name.clone(), ··· 1204 1204 Item::Query(q) => q.name.span, 1205 1205 Item::Procedure(p) => p.name.span, 1206 1206 Item::Subscription(s) => s.name.span, 1207 - Item::Use(_) => continue, 1207 + Item::Use(_) | Item::SelfItem(_) => continue, 1208 1208 }; 1209 1209 errors.push(crate::error::ValidationError::ConflictNotAllowed { 1210 1210 name: name.clone(), ··· 1230 1230 Item::Query(q) => q.name.span, 1231 1231 Item::Procedure(p) => p.name.span, 1232 1232 Item::Subscription(s) => s.name.span, 1233 - Item::Use(_) => continue, 1233 + Item::Use(_) | Item::SelfItem(_) => continue, 1234 1234 }; 1235 1235 errors.push(crate::error::ValidationError::DuplicateDefinition { 1236 1236 name: name.clone(), ··· 1242 1242 Item::Query(q) => q.name.span, 1243 1243 Item::Procedure(p) => p.name.span, 1244 1244 Item::Subscription(s) => s.name.span, 1245 - Item::Use(_) => continue, 1245 + Item::Use(_) | Item::SelfItem(_) => continue, 1246 1246 }, 1247 1247 second_span: span, 1248 1248 module_namespace: namespace.to_string(), ··· 1395 1395 Item::Query(q) => self.resolve_query(namespace, q), 1396 1396 Item::Procedure(p) => self.resolve_procedure(namespace, p), 1397 1397 Item::Subscription(s) => self.resolve_subscription(namespace, s), 1398 - Item::Token(_) | Item::Use(_) => Ok(()), 1398 + Item::Token(_) | Item::Use(_) | Item::SelfItem(_) => Ok(()), 1399 1399 } 1400 1400 } 1401 1401
+16 -5
mlf-lsp/src/server.rs
··· 566 566 Item::Query(q) => q.name.span, 567 567 Item::Procedure(p) => p.name.span, 568 568 Item::Subscription(s) => s.name.span, 569 - Item::Use(_) => continue, 569 + Item::Use(_) | Item::SelfItem(_) => continue, 570 570 }; 571 571 572 572 self.client ··· 664 664 Item::Query(q) => Some(q.name.span), 665 665 Item::Procedure(p) => Some(p.name.span), 666 666 Item::Subscription(s) => Some(s.name.span), 667 - Item::Use(_) => None, 667 + Item::Use(_) | Item::SelfItem(_) => None, 668 668 }; 669 669 670 670 if let Some(span) = def_span { ··· 684 684 Item::Query(q) => q.name.span, 685 685 Item::Procedure(p) => p.name.span, 686 686 Item::Subscription(s) => s.name.span, 687 - Item::Use(_) => continue, 687 + Item::Use(_) | Item::SelfItem(_) => continue, 688 688 }; 689 689 690 690 return Some((doc_uri.clone(), def_span)); ··· 908 908 Item::Query(q) => Some(&q.annotations), 909 909 Item::Procedure(p) => Some(&p.annotations), 910 910 Item::Subscription(s) => Some(&s.annotations), 911 + Item::SelfItem(s) => Some(&s.annotations), 911 912 _ => None, 912 913 }; 913 914 ··· 944 945 "cache" => "Defines caching strategy", 945 946 "indexed" => "Marks this field as indexed", 946 947 "sensitive" => "Marks this field as containing sensitive data (e.g., PII)", 948 + "const" => "Extension field (literal). `@const(key, value)` emits `key: value` verbatim in the JSON Lexicon. Use on `self {}` for top-level fields, or any item for per-item fields.", 949 + "reference" => "Extension field (named-type reference). `@reference(key, path)` resolves `path` through the workspace and emits the resulting NSID string under `key`.", 947 950 _ => "Custom annotation", 948 951 }; 949 952 ··· 1022 1025 Item::Procedure(_) => "procedure", 1023 1026 Item::Subscription(_) => "subscription", 1024 1027 Item::Use(_) => "use", 1028 + Item::SelfItem(_) => "self", 1025 1029 }; 1026 1030 1027 1031 contents.push(MarkedString::LanguageString(LanguageString { ··· 1029 1033 value: format!("{} {}", kind, name), 1030 1034 })); 1031 1035 1036 + if let Item::SelfItem(_) = item { 1037 + contents.push(MarkedString::String( 1038 + "Represents the lexicon itself. Doc comments emit as the top-level `description`; `@const` / `@reference` annotations emit as top-level JSON fields.".to_string() 1039 + )); 1040 + } 1041 + 1032 1042 // Add documentation if available 1033 1043 if !docs.is_empty() { 1034 1044 contents.push(MarkedString::String(docs.join("\n"))); ··· 1427 1437 ("procedure", CompletionItemKind::KEYWORD, "Define a procedure"), 1428 1438 ("subscription", CompletionItemKind::KEYWORD, "Define a subscription"), 1429 1439 ("use", CompletionItemKind::KEYWORD, "Import types from another module"), 1440 + ("self", CompletionItemKind::KEYWORD, "Lexicon-as-item — attach top-level docs and @const / @reference extensions"), 1430 1441 ]; 1431 1442 1432 1443 for (label, kind, detail) in keywords { ··· 1620 1631 Item::Query(q) => q.name.span, 1621 1632 Item::Procedure(p) => p.name.span, 1622 1633 Item::Subscription(s) => s.name.span, 1623 - Item::Use(_) => continue, 1634 + Item::Use(_) | Item::SelfItem(_) => continue, 1624 1635 }; 1625 1636 1626 1637 let range = span_to_range(&text, def_span); ··· 1745 1756 SymbolKind::EVENT, 1746 1757 span_to_range(&doc_state.text, s.span), 1747 1758 ), 1748 - Item::Use(_) => continue, 1759 + Item::Use(_) | Item::SelfItem(_) => continue, 1749 1760 }; 1750 1761 1751 1762 #[allow(deprecated)]
+3 -1
mlf-lsp/src/utils.rs
··· 79 79 Item::Procedure(p) => p.span, 80 80 Item::Subscription(s) => s.span, 81 81 Item::Use(u) => u.span, 82 + Item::SelfItem(s) => s.span, 82 83 }; 83 84 offset_in_span(offset, span) 84 85 }) ··· 133 134 Item::Query(q) => &q.name.name, 134 135 Item::Procedure(p) => &p.name.name, 135 136 Item::Subscription(s) => &s.name.name, 136 - Item::Use(_) => "", 137 + Item::Use(_) | Item::SelfItem(_) => "", 137 138 } 138 139 } 139 140 ··· 147 148 Item::Query(q) => &q.docs, 148 149 Item::Procedure(p) => &p.docs, 149 150 Item::Subscription(s) => &s.docs, 151 + Item::SelfItem(s) => &s.docs, 150 152 Item::Use(_) => return vec![], 151 153 }; 152 154
+5 -3
mlf-wasm/src/lib.rs
··· 139 139 return serde_wasm_bindgen::to_value(&result).unwrap(); 140 140 } 141 141 142 - // Generate JSON lexicon 143 - let json_lexicon = mlf_codegen::generate_lexicon(namespace, &lexicon, &workspace); 142 + // Generate JSON lexicon. Codegen warnings are dropped on the floor 143 + // in the wasm surface today — the playground doesn't have a UI for 144 + // them yet; when it does, they flow through `output.warnings`. 145 + let output = mlf_codegen::generate_lexicon(namespace, &lexicon, &workspace); 144 146 145 - match serde_json::to_string_pretty(&json_lexicon) { 147 + match serde_json::to_string_pretty(&output.json) { 146 148 Ok(json_str) => { 147 149 let result = GenerateResult { 148 150 success: true,
+20
tests/codegen/lexicon/const_float_stringifies/expected.json
··· 1 + { 2 + "$type": "com.atproto.lexicon.schema", 3 + "lexicon": 1, 4 + "id": "com.example.const_float_stringifies", 5 + "x-threshold": "3.14", 6 + "x-revision": 3, 7 + "defs": { 8 + "main": { 9 + "type": "record", 10 + "key": "tid", 11 + "record": { 12 + "type": "object", 13 + "required": ["name"], 14 + "properties": { 15 + "name": {"type": "string"} 16 + } 17 + } 18 + } 19 + } 20 + }
+1
tests/codegen/lexicon/const_float_stringifies/expected_warnings.txt
··· 1 + com.example.const_float_stringifies: @const("x-threshold", 3.14): ATProto's data model has no floats; emitting as string "3.14" to stay spec-compliant
+8
tests/codegen/lexicon/const_float_stringifies/input.mlf
··· 1 + @const("x-threshold", 3.14) 2 + @const("x-revision", 3) 3 + self {} 4 + 5 + @main 6 + record constFloatStringifies { 7 + name!: string, 8 + }
+4
tests/codegen/lexicon/const_float_stringifies/test.toml
··· 1 + [test] 2 + name = "const_float_stringifies" 3 + description = "@const with a fractional numeric value stringifies to stay spec-compliant and emits a warning" 4 + namespace = "com.example.const_float_stringifies"
+20
tests/codegen/lexicon/item_extensions_codegen/expected.json
··· 1 + { 2 + "$type": "com.atproto.lexicon.schema", 3 + "lexicon": 1, 4 + "id": "com.example.item_extensions_codegen", 5 + "defs": { 6 + "main": { 7 + "type": "record", 8 + "key": "tid", 9 + "record": { 10 + "type": "object", 11 + "required": ["name"], 12 + "properties": { 13 + "name": {"type": "string"} 14 + } 15 + }, 16 + "x-deprecated": true, 17 + "x-since": "2024-01-01" 18 + } 19 + } 20 + }
+6
tests/codegen/lexicon/item_extensions_codegen/input.mlf
··· 1 + @main 2 + @const("x-deprecated", true) 3 + @const("x-since", "2024-01-01") 4 + record itemExtensions { 5 + name!: string, 6 + }
+4
tests/codegen/lexicon/item_extensions_codegen/test.toml
··· 1 + [test] 2 + name = "item_extensions_codegen" 3 + description = "@const annotations on a record emit as extra JSON fields on the record def" 4 + namespace = "com.example.item_extensions_codegen"
+18
tests/codegen/lexicon/self_item/expected.json
··· 1 + { 2 + "$type": "com.atproto.lexicon.schema", 3 + "lexicon": 1, 4 + "id": "com.example.self_item", 5 + "description": "A lexicon with top-level metadata.", 6 + "revision": 3, 7 + "x-vendor-flag": true, 8 + "xFallbackType": "#selfItem", 9 + "defs": { 10 + "main": { 11 + "type": "object", 12 + "required": ["value"], 13 + "properties": { 14 + "value": {"type": "string"} 15 + } 16 + } 17 + } 18 + }
+10
tests/codegen/lexicon/self_item/input.mlf
··· 1 + /// A lexicon with top-level metadata. 2 + @const("revision", 3) 3 + @const("x-vendor-flag", true) 4 + @reference("xFallbackType", selfItem) 5 + self {} 6 + 7 + @main 8 + def type selfItem = { 9 + value!: string, 10 + };
+4
tests/codegen/lexicon/self_item/test.toml
··· 1 + [test] 2 + name = "self_item" 3 + description = "self {} with docs and @const/@reference extensions emit as top-level JSON fields" 4 + namespace = "com.example.self_item"
+28 -3
tests/codegen_integration.rs
··· 38 38 39 39 let lexicon = ws.get_lexicon(&namespace).ok_or("Module not found")?; 40 40 41 - let output_json = generate_lexicon(&namespace, lexicon, &ws); 41 + let output = generate_lexicon(&namespace, lexicon, &ws); 42 42 43 43 let expected_str = fs::read_to_string(test_dir.join("expected.json"))?; 44 44 let expected_json: Value = serde_json::from_str(&expected_str)?; 45 45 46 - if output_json != expected_json { 46 + if output.json != expected_json { 47 47 return Err(format!( 48 48 "Output mismatch:\nExpected:\n{}\n\nGot:\n{}", 49 49 serde_json::to_string_pretty(&expected_json).unwrap(), 50 - serde_json::to_string_pretty(&output_json).unwrap() 50 + serde_json::to_string_pretty(&output.json).unwrap() 51 + ) 52 + .into()); 53 + } 54 + 55 + let warnings_path = test_dir.join("expected_warnings.txt"); 56 + let expected_warnings = if warnings_path.exists() { 57 + fs::read_to_string(&warnings_path)? 58 + } else { 59 + String::new() 60 + }; 61 + let actual_warnings = output 62 + .warnings 63 + .iter() 64 + .map(|w| format!("{}: {}", w.namespace, w.message)) 65 + .collect::<Vec<_>>() 66 + .join("\n"); 67 + let actual_warnings = if actual_warnings.is_empty() { 68 + String::new() 69 + } else { 70 + format!("{}\n", actual_warnings) 71 + }; 72 + if actual_warnings != expected_warnings { 73 + return Err(format!( 74 + "Warnings mismatch:\n--- expected ---\n{}\n--- got ---\n{}", 75 + expected_warnings, actual_warnings 51 76 ) 52 77 .into()); 53 78 }
+7
tests/lexicon_to_mlf/item_extensions/expected.mlf
··· 1 + @main 2 + @const("x-deprecated", true) 3 + @const("x-since", "2024-01-01") 4 + record itemext { 5 + name!: string, 6 + } 7 +
+19
tests/lexicon_to_mlf/item_extensions/input.json
··· 1 + { 2 + "lexicon": 1, 3 + "id": "com.example.itemext", 4 + "defs": { 5 + "main": { 6 + "type": "record", 7 + "key": "tid", 8 + "x-deprecated": true, 9 + "x-since": "2024-01-01", 10 + "record": { 11 + "type": "object", 12 + "required": ["name"], 13 + "properties": { 14 + "name": {"type": "string"} 15 + } 16 + } 17 + } 18 + } 19 + }
+6
tests/lexicon_to_mlf/ref_hint_warning/expected.mlf
··· 1 + @main 2 + @const("xFallback", "com.example.other#someType") 3 + def type refhint = { 4 + value: string, 5 + }; 6 +
+1
tests/lexicon_to_mlf/ref_hint_warning/expected_warnings.txt
··· 1 + com.example.refhint: extension field "xFallback" has value "com.example.other#someType" which looks NSID-shaped; emitted as `@const` — consider `@reference` if you intend workspace name resolution when hand-editing the MLF
+14
tests/lexicon_to_mlf/ref_hint_warning/input.json
··· 1 + { 2 + "lexicon": 1, 3 + "id": "com.example.refhint", 4 + "defs": { 5 + "main": { 6 + "type": "object", 7 + "required": [], 8 + "properties": { 9 + "value": {"type": "string"} 10 + }, 11 + "xFallback": "com.example.other#someType" 12 + } 13 + } 14 + }
+8
tests/lexicon_to_mlf/self_top_level_description/expected.mlf
··· 1 + /// A short description of the whole lexicon. 2 + self {} 3 + 4 + @main 5 + def type described = { 6 + name!: string, 7 + }; 8 +
+14
tests/lexicon_to_mlf/self_top_level_description/input.json
··· 1 + { 2 + "lexicon": 1, 3 + "id": "com.example.described", 4 + "description": "A short description of the whole lexicon.", 5 + "defs": { 6 + "main": { 7 + "type": "object", 8 + "required": ["name"], 9 + "properties": { 10 + "name": {"type": "string"} 11 + } 12 + } 13 + } 14 + }
+11
tests/lexicon_to_mlf/self_top_level_extensions/expected.mlf
··· 1 + /// With extensions. 2 + @const("revision", 3) 3 + @const("x-vendor-flag", true) 4 + @const("x-tags", ["alpha", "beta"]) 5 + self {} 6 + 7 + @main 8 + def type extended = { 9 + value: string, 10 + }; 11 +
+17
tests/lexicon_to_mlf/self_top_level_extensions/input.json
··· 1 + { 2 + "lexicon": 1, 3 + "id": "com.example.extended", 4 + "description": "With extensions.", 5 + "revision": 3, 6 + "x-vendor-flag": true, 7 + "x-tags": ["alpha", "beta"], 8 + "defs": { 9 + "main": { 10 + "type": "object", 11 + "required": [], 12 + "properties": { 13 + "value": {"type": "string"} 14 + } 15 + } 16 + } 17 + }
+7
tests/lexicon_to_mlf/unknown_def_type_passthrough/expected.mlf
··· 1 + @main 2 + @const("type", "permission-set") 3 + @const("title", "Example Access") 4 + @const("detail", "Access to example resources.") 5 + @const("permissions", [{ "type": "permission", "resource": "repo", "collection": ["com.example.one", "com.example.two"] }]) 6 + def type permission = unknown; 7 +
+18
tests/lexicon_to_mlf/unknown_def_type_passthrough/input.json
··· 1 + { 2 + "lexicon": 1, 3 + "id": "com.example.permission", 4 + "defs": { 5 + "main": { 6 + "type": "permission-set", 7 + "title": "Example Access", 8 + "detail": "Access to example resources.", 9 + "permissions": [ 10 + { 11 + "type": "permission", 12 + "resource": "repo", 13 + "collection": ["com.example.one", "com.example.two"] 14 + } 15 + ] 16 + } 17 + } 18 + }
+2 -2
tests/real_world/roundtrip.rs
··· 168 168 .get_lexicon(&namespace) 169 169 .ok_or_else(|| format!("Module not found in workspace: {}", namespace))?; 170 170 171 - let json = generate_lexicon(&namespace, lexicon, &ws); 172 - out.push((relative.with_extension("json"), json)); 171 + let output = generate_lexicon(&namespace, lexicon, &ws); 172 + out.push((relative.with_extension("json"), output.json)); 173 173 } 174 174 175 175 Ok(out)
+47 -1
tree-sitter-mlf/grammar.js
··· 33 33 $.query_definition, 34 34 $.procedure_definition, 35 35 $.subscription_definition, 36 + $.self_definition, 37 + ), 38 + 39 + // `self { }` — the lexicon-as-item. Body is always empty in V1; the 40 + // `{}` shape is reserved for future contents. Docs and annotations 41 + // attach to it exactly like any other item, and emit as top-level 42 + // JSON metadata (`description`, extension fields). 43 + self_definition: $ => seq( 44 + repeat($.annotation), 45 + 'self', 46 + '{', 47 + '}' 36 48 ), 37 49 38 50 // Comments ··· 77 89 annotation_value: $ => choice( 78 90 $.string, 79 91 $.number, 80 - $.boolean 92 + $.boolean, 93 + $.null_literal, 94 + $.annotation_array, 95 + $.annotation_object, 96 + // A bare type path — used by the `@reference` extension annotation 97 + // to name a type that codegen resolves through the workspace. 98 + $.type_path 99 + ), 100 + 101 + null_literal: $ => 'null', 102 + 103 + annotation_array: $ => seq( 104 + '[', 105 + optional(seq( 106 + $.annotation_value, 107 + repeat(seq(',', $.annotation_value)), 108 + optional(',') 109 + )), 110 + ']' 111 + ), 112 + 113 + annotation_object: $ => seq( 114 + '{', 115 + optional(seq( 116 + $.annotation_object_entry, 117 + repeat(seq(',', $.annotation_object_entry)), 118 + optional(',') 119 + )), 120 + '}' 121 + ), 122 + 123 + annotation_object_entry: $ => seq( 124 + field('key', $.string), 125 + ':', 126 + field('value', $.annotation_value) 81 127 ), 82 128 83 129 // Use statements
+4
tree-sitter-mlf/queries/highlights.scm
··· 13 13 "subscription" 14 14 "error" 15 15 "constrained" 16 + "self" 16 17 ] @keyword 18 + 19 + ; Null literal (in annotation values) 20 + (null_literal) @constant.builtin 17 21 18 22 ; Primitive types 19 23 [
+193 -3
tree-sitter-mlf/src/grammar.json
··· 1 1 { 2 + "$schema": "https://tree-sitter.github.io/tree-sitter/assets/schemas/grammar.schema.json", 2 3 "name": "mlf", 3 4 "rules": { 4 5 "source_file": { ··· 42 43 { 43 44 "type": "SYMBOL", 44 45 "name": "subscription_definition" 46 + }, 47 + { 48 + "type": "SYMBOL", 49 + "name": "self_definition" 50 + } 51 + ] 52 + }, 53 + "self_definition": { 54 + "type": "SEQ", 55 + "members": [ 56 + { 57 + "type": "REPEAT", 58 + "content": { 59 + "type": "SYMBOL", 60 + "name": "annotation" 61 + } 62 + }, 63 + { 64 + "type": "STRING", 65 + "value": "self" 66 + }, 67 + { 68 + "type": "STRING", 69 + "value": "{" 70 + }, 71 + { 72 + "type": "STRING", 73 + "value": "}" 45 74 } 46 75 ] 47 76 }, ··· 265 294 { 266 295 "type": "SYMBOL", 267 296 "name": "boolean" 297 + }, 298 + { 299 + "type": "SYMBOL", 300 + "name": "null_literal" 301 + }, 302 + { 303 + "type": "SYMBOL", 304 + "name": "annotation_array" 305 + }, 306 + { 307 + "type": "SYMBOL", 308 + "name": "annotation_object" 309 + }, 310 + { 311 + "type": "SYMBOL", 312 + "name": "type_path" 313 + } 314 + ] 315 + }, 316 + "null_literal": { 317 + "type": "STRING", 318 + "value": "null" 319 + }, 320 + "annotation_array": { 321 + "type": "SEQ", 322 + "members": [ 323 + { 324 + "type": "STRING", 325 + "value": "[" 326 + }, 327 + { 328 + "type": "CHOICE", 329 + "members": [ 330 + { 331 + "type": "SEQ", 332 + "members": [ 333 + { 334 + "type": "SYMBOL", 335 + "name": "annotation_value" 336 + }, 337 + { 338 + "type": "REPEAT", 339 + "content": { 340 + "type": "SEQ", 341 + "members": [ 342 + { 343 + "type": "STRING", 344 + "value": "," 345 + }, 346 + { 347 + "type": "SYMBOL", 348 + "name": "annotation_value" 349 + } 350 + ] 351 + } 352 + }, 353 + { 354 + "type": "CHOICE", 355 + "members": [ 356 + { 357 + "type": "STRING", 358 + "value": "," 359 + }, 360 + { 361 + "type": "BLANK" 362 + } 363 + ] 364 + } 365 + ] 366 + }, 367 + { 368 + "type": "BLANK" 369 + } 370 + ] 371 + }, 372 + { 373 + "type": "STRING", 374 + "value": "]" 375 + } 376 + ] 377 + }, 378 + "annotation_object": { 379 + "type": "SEQ", 380 + "members": [ 381 + { 382 + "type": "STRING", 383 + "value": "{" 384 + }, 385 + { 386 + "type": "CHOICE", 387 + "members": [ 388 + { 389 + "type": "SEQ", 390 + "members": [ 391 + { 392 + "type": "SYMBOL", 393 + "name": "annotation_object_entry" 394 + }, 395 + { 396 + "type": "REPEAT", 397 + "content": { 398 + "type": "SEQ", 399 + "members": [ 400 + { 401 + "type": "STRING", 402 + "value": "," 403 + }, 404 + { 405 + "type": "SYMBOL", 406 + "name": "annotation_object_entry" 407 + } 408 + ] 409 + } 410 + }, 411 + { 412 + "type": "CHOICE", 413 + "members": [ 414 + { 415 + "type": "STRING", 416 + "value": "," 417 + }, 418 + { 419 + "type": "BLANK" 420 + } 421 + ] 422 + } 423 + ] 424 + }, 425 + { 426 + "type": "BLANK" 427 + } 428 + ] 429 + }, 430 + { 431 + "type": "STRING", 432 + "value": "}" 433 + } 434 + ] 435 + }, 436 + "annotation_object_entry": { 437 + "type": "SEQ", 438 + "members": [ 439 + { 440 + "type": "FIELD", 441 + "name": "key", 442 + "content": { 443 + "type": "SYMBOL", 444 + "name": "string" 445 + } 446 + }, 447 + { 448 + "type": "STRING", 449 + "value": ":" 450 + }, 451 + { 452 + "type": "FIELD", 453 + "name": "value", 454 + "content": { 455 + "type": "SYMBOL", 456 + "name": "annotation_value" 457 + } 268 458 } 269 459 ] 270 460 }, ··· 1436 1626 "precedences": [], 1437 1627 "externals": [], 1438 1628 "inline": [], 1439 - "supertypes": [] 1440 - } 1441 - 1629 + "supertypes": [], 1630 + "reserved": {} 1631 + }
+105 -2
tree-sitter-mlf/src/node-types.json
··· 77 77 } 78 78 }, 79 79 { 80 + "type": "annotation_array", 81 + "named": true, 82 + "fields": {}, 83 + "children": { 84 + "multiple": true, 85 + "required": false, 86 + "types": [ 87 + { 88 + "type": "annotation_value", 89 + "named": true 90 + } 91 + ] 92 + } 93 + }, 94 + { 95 + "type": "annotation_object", 96 + "named": true, 97 + "fields": {}, 98 + "children": { 99 + "multiple": true, 100 + "required": false, 101 + "types": [ 102 + { 103 + "type": "annotation_object_entry", 104 + "named": true 105 + } 106 + ] 107 + } 108 + }, 109 + { 110 + "type": "annotation_object_entry", 111 + "named": true, 112 + "fields": { 113 + "key": { 114 + "multiple": false, 115 + "required": true, 116 + "types": [ 117 + { 118 + "type": "string", 119 + "named": true 120 + } 121 + ] 122 + }, 123 + "value": { 124 + "multiple": false, 125 + "required": true, 126 + "types": [ 127 + { 128 + "type": "annotation_value", 129 + "named": true 130 + } 131 + ] 132 + } 133 + } 134 + }, 135 + { 80 136 "type": "annotation_selectors", 81 137 "named": true, 82 138 "fields": {}, ··· 100 156 "required": true, 101 157 "types": [ 102 158 { 159 + "type": "annotation_array", 160 + "named": true 161 + }, 162 + { 163 + "type": "annotation_object", 164 + "named": true 165 + }, 166 + { 103 167 "type": "boolean", 168 + "named": true 169 + }, 170 + { 171 + "type": "null_literal", 104 172 "named": true 105 173 }, 106 174 { ··· 109 177 }, 110 178 { 111 179 "type": "string", 180 + "named": true 181 + }, 182 + { 183 + "type": "type_path", 112 184 "named": true 113 185 } 114 186 ] ··· 461 533 "named": true 462 534 }, 463 535 { 536 + "type": "self_definition", 537 + "named": true 538 + }, 539 + { 464 540 "type": "subscription_definition", 465 541 "named": true 466 542 }, ··· 505 581 } 506 582 ] 507 583 } 584 + }, 585 + { 586 + "type": "null_literal", 587 + "named": true, 588 + "fields": {} 508 589 }, 509 590 { 510 591 "type": "object_type", ··· 755 836 } 756 837 }, 757 838 { 839 + "type": "self_definition", 840 + "named": true, 841 + "fields": {}, 842 + "children": { 843 + "multiple": true, 844 + "required": false, 845 + "types": [ 846 + { 847 + "type": "annotation", 848 + "named": true 849 + } 850 + ] 851 + } 852 + }, 853 + { 758 854 "type": "source_file", 759 855 "named": true, 856 + "root": true, 760 857 "fields": {}, 761 858 "children": { 762 859 "multiple": true, ··· 996 1093 }, 997 1094 { 998 1095 "type": "comment", 999 - "named": true 1096 + "named": true, 1097 + "extra": true 1000 1098 }, 1001 1099 { 1002 1100 "type": "constrained", ··· 1008 1106 }, 1009 1107 { 1010 1108 "type": "doc_comment", 1011 - "named": true 1109 + "named": true, 1110 + "extra": true 1012 1111 }, 1013 1112 { 1014 1113 "type": "error", ··· 1048 1147 }, 1049 1148 { 1050 1149 "type": "record", 1150 + "named": false 1151 + }, 1152 + { 1153 + "type": "self", 1051 1154 "named": false 1052 1155 }, 1053 1156 {
+83 -1
website/content/docs/language-guide/11-annotations.md
··· 30 30 31 31 Arguments can be: 32 32 - **Strings**: `"value"` 33 - - **Numbers**: `42`, `3.14` 33 + - **Numbers**: `42`, `3.14`, `-10` 34 34 - **Booleans**: `true`, `false` 35 + - **Null**: `null` 36 + - **Arrays / objects** of the above (JSON-shaped literals) 37 + - **Type paths**: `com.example.other.thing` (used by `@reference`) 35 38 36 39 ### Named Arguments 37 40 ··· 189 192 - `"application/xml"` - XML 190 193 - `"*/*"` - Any MIME type 191 194 - Custom MIME types as needed 195 + 196 + ## Lexicon-Level Metadata: `self { }` 197 + 198 + ATProto Lexicon JSON has four top-level fields: `lexicon`, `id`, `description`, and `defs`. The first two are derived mechanically from the file, and `defs` is filled by your record/query/etc. definitions. The last — `description`, plus any non-spec top-level fields vendors commonly add — needs somewhere to live in MLF source. 199 + 200 + That somewhere is `self { }`: a first-class item that represents the lexicon itself. Doc comments on `self { }` become the top-level `description`, and extension annotations on it become top-level JSON fields. 201 + 202 + ```mlf 203 + /// Blog-style post record for the Bluesky feed. 204 + @const("revision", 3) 205 + @const("x-vendor-flag", true) 206 + self {} 207 + 208 + record post { 209 + text!: string, 210 + createdAt!: Datetime, 211 + } 212 + ``` 213 + 214 + Generates: 215 + 216 + ```json 217 + { 218 + "$type": "com.atproto.lexicon.schema", 219 + "lexicon": 1, 220 + "id": "...", 221 + "description": "Blog-style post record for the Bluesky feed.", 222 + "revision": 3, 223 + "x-vendor-flag": true, 224 + "defs": { "main": { "type": "record", ... } } 225 + } 226 + ``` 227 + 228 + **Rules:** 229 + - Optional. Files without `self { }` behave exactly as today. 230 + - At most one per file, at the top level only. 231 + - Body is always empty (`{}`) in V1; the shape is reserved for future contents. 232 + 233 + ## Extension Annotations 234 + 235 + Annotations parse uniformly — any `@name(args...)` is grammatically valid. Two annotation names are recognized by the lexicon generator as carrying JSON extension fields; everything else is metadata that codegen ignores. Both attach to `self { }` for top-level fields, or to any record / query / procedure / subscription / def / token for per-item extra fields. 236 + 237 + ### `@const(key, value)` — literal 238 + 239 + Emitted verbatim as JSON. Takes any annotation literal: string, integer, boolean, null, array, or nested object. This is the form used for lexicon-level metadata and is also what the JSON-to-MLF converter emits when preserving non-spec fields. 240 + 241 + ```mlf 242 + @const("revision", 3) 243 + @const("x-tags", ["alpha", "beta"]) 244 + @const("x-meta", { "team": "platform", "critical": true }) 245 + self {} 246 + ``` 247 + 248 + **On fractional numbers:** ATProto's data model has no floats. If you pass a fractional value to `@const`, the lexicon generator transparently stringifies it (`@const("x-threshold", 3.14)` emits `"x-threshold": "3.14"`) and emits a warning. Integers and whole-number floats emit as JSON numbers unchanged. 249 + 250 + ### `@reference(key, path)` — named-type reference 251 + 252 + Resolves the type path through the workspace and emits the resolved NSID string. Useful when you want an extension field to point at another defined type without hardcoding its NSID. 253 + 254 + ```mlf 255 + @reference("xFallbackType", com.example.other.thing) 256 + record foo { 257 + name!: string, 258 + } 259 + ``` 260 + 261 + Generates (on the `foo` record): 262 + 263 + ```json 264 + { 265 + "type": "record", 266 + "xFallbackType": "com.example.other.thing", 267 + "record": { ... } 268 + } 269 + ``` 270 + 271 + ### Other annotations 272 + 273 + Any `@whatever(args)` the generator doesn't recognize is left untouched — it's metadata for whoever reads the AST (linters, codegen plugins, documentation tools). `@const` and `@reference` are the only two that influence JSON output. 192 274 193 275 ## Annotation Processing 194 276
+56
website/content/docs/language-guide/11-lexicon-mapping.md
··· 489 489 } 490 490 ``` 491 491 492 + ## Lexicon-Level Metadata 493 + 494 + The `self { }` item carries lexicon-level metadata: docs become the top-level `description`, and `@const` / `@reference` annotations on it become top-level JSON fields. See [Annotations → Lexicon-Level Metadata](/docs/language-guide/annotations/#lexicon-level-metadata-self) for the full story. 495 + 496 + **MLF:** 497 + ```mlf 498 + /// A short description of the whole lexicon. 499 + @const("revision", 3) 500 + self {} 501 + 502 + record post { 503 + text!: string, 504 + } 505 + ``` 506 + 507 + **Generated JSON:** 508 + ```json 509 + { 510 + "$type": "com.atproto.lexicon.schema", 511 + "lexicon": 1, 512 + "id": "com.example.post", 513 + "description": "A short description of the whole lexicon.", 514 + "revision": 3, 515 + "defs": { 516 + "main": { "type": "record", ... } 517 + } 518 + } 519 + ``` 520 + 521 + ## Per-Item Extension Fields 522 + 523 + `@const` and `@reference` attached to a record / query / procedure / subscription / def / token emit as extra fields on that item's JSON object. This is how the JSON-to-MLF converter preserves non-spec fields on individual defs. 524 + 525 + **MLF:** 526 + ```mlf 527 + @const("x-deprecated", true) 528 + @reference("xFallbackType", com.example.other.thing) 529 + record post { 530 + text!: string, 531 + } 532 + ``` 533 + 534 + **Generated JSON:** 535 + ```json 536 + { 537 + "defs": { 538 + "main": { 539 + "type": "record", 540 + "x-deprecated": true, 541 + "xFallbackType": "com.example.other.thing", 542 + "record": { "type": "object", ... } 543 + } 544 + } 545 + } 546 + ``` 547 + 492 548 ## Complete Example Comparison 493 549 494 550 Here's a full lexicon showing MLF and its JSON output:
+1 -1
website/syntaxes/mlf.sublime-syntax
··· 83 83 pop: true 84 84 85 85 keywords: 86 - - match: '\b(namespace|use|as|record|inline|def|type|token|query|procedure|subscription|throws|constrained|error)\b' 86 + - match: '\b(namespace|use|as|record|inline|def|type|token|query|procedure|subscription|throws|constrained|error|self)\b' 87 87 scope: keyword.control.mlf 88 88 - match: '\b(main|defs)\b' 89 89 scope: keyword.other.mlf