A human-friendly DSL for ATProto Lexicons
27
fork

Configure Feed

Select the types of activity you want to include in your feed.

Accept some malformed objects, unions, arrays with warnings

authored by stavola.xyz and committed by

Tangled 0630254c 56a85c7b

+187 -23
+10 -4
mlf-cli/src/fetch.rs
··· 448 448 println!(" → Saved JSON (checksum verified)"); 449 449 450 450 // Convert to MLF 451 - let mlf_content = crate::generate::mlf::generate_mlf_from_json(&fetched.lexicon) 451 + let converted = crate::generate::mlf::generate_mlf_from_json(&fetched.lexicon) 452 452 .map_err(|e| FetchError::ConversionError(format!("{:?}", e)))?; 453 + for warning in &converted.warnings { 454 + eprintln!(" ⚠ {}: {}", warning.namespace, warning.message); 455 + } 453 456 454 457 let mut mlf_path = mlf_dir.join("lexicons/mlf"); 455 458 for segment in nsid.split('.') { ··· 460 463 if let Some(parent) = mlf_path.parent() { 461 464 std::fs::create_dir_all(parent)?; 462 465 } 463 - std::fs::write(&mlf_path, mlf_content)?; 466 + std::fs::write(&mlf_path, converted.mlf)?; 464 467 println!(" → Converted to MLF"); 465 468 466 469 Ok(()) ··· 531 534 println!(" → Saved JSON to {}", json_path.display()); 532 535 533 536 // Convert to MLF 534 - let mlf_content = crate::generate::mlf::generate_mlf_from_json(&fetched.lexicon) 537 + let converted = crate::generate::mlf::generate_mlf_from_json(&fetched.lexicon) 535 538 .map_err(|e| FetchError::ConversionError(format!("{:?}", e)))?; 539 + for warning in &converted.warnings { 540 + eprintln!(" ⚠ {}: {}", warning.namespace, warning.message); 541 + } 536 542 537 543 // Save MLF file 538 544 let mut mlf_path = mlf_dir.join("lexicons/mlf"); ··· 544 550 if let Some(parent) = mlf_path.parent() { 545 551 std::fs::create_dir_all(parent)?; 546 552 } 547 - std::fs::write(&mlf_path, mlf_content)?; 553 + std::fs::write(&mlf_path, converted.mlf)?; 548 554 println!(" → Converted to MLF at {}", mlf_path.display()); 549 555 550 556 // Calculate hash and extract dependencies for lockfile
+103 -16
mlf-cli/src/generate/mlf.rs
··· 1 1 use miette::Diagnostic; 2 2 use serde_json::Value; 3 + use std::cell::RefCell; 3 4 use std::path::PathBuf; 4 5 use thiserror::Error; 6 + 7 + /// A non-fatal issue surfaced by the Lexicon→MLF converter. Produced 8 + /// when the source lexicon is malformed in a way we can recover from 9 + /// (typically: missing spec-required fields we coerce to empty / 10 + /// fall-back values). Callers decide whether to print, log, or ignore. 11 + #[derive(Debug, Clone, PartialEq, Eq)] 12 + pub struct ConversionWarning { 13 + /// Namespace of the lexicon the warning came from. 14 + pub namespace: String, 15 + /// Human-readable description of what we coerced and why. 16 + pub message: String, 17 + } 18 + 19 + /// Output of [`generate_mlf_from_json`]. Carries both the rendered MLF 20 + /// and any non-fatal warnings accumulated during conversion. 21 + #[derive(Debug, Clone)] 22 + pub struct MlfGenerateOutput { 23 + pub mlf: String, 24 + pub warnings: Vec<ConversionWarning>, 25 + } 5 26 6 27 #[derive(Error, Debug, Diagnostic)] 7 28 pub enum MlfGenerateError { ··· 131 152 } 132 153 }; 133 154 134 - let mlf_content = match generate_mlf_from_json(&json) { 135 - Ok(content) => content, 155 + let output = match generate_mlf_from_json(&json) { 156 + Ok(output) => output, 136 157 Err(e) => { 137 158 errors.push((file_path.display().to_string(), format!("{:?}", e))); 138 159 continue; 139 160 } 140 161 }; 162 + for warning in &output.warnings { 163 + eprintln!( 164 + "warning ({}): {}", 165 + warning.namespace, warning.message 166 + ); 167 + } 168 + let mlf_content = output.mlf; 141 169 142 170 // Extract namespace from JSON "id" field 143 171 let namespace = json ··· 197 225 Ok(()) 198 226 } 199 227 200 - pub fn generate_mlf_from_json(json: &Value) -> Result<String, MlfGenerateError> { 228 + pub fn generate_mlf_from_json(json: &Value) -> Result<MlfGenerateOutput, MlfGenerateError> { 201 229 let mut output = String::new(); 202 230 203 231 // Extract NSID to get the last segment for "main" definitions ··· 219 247 // Create a context to pass the current namespace to type generation 220 248 let ctx = ConversionContext { 221 249 current_namespace: nsid.to_string(), 250 + warnings: RefCell::new(Vec::new()), 222 251 }; 223 252 224 253 // Process all definitions ··· 264 293 } 265 294 } 266 295 267 - Ok(output) 296 + Ok(MlfGenerateOutput { 297 + mlf: output, 298 + warnings: ctx.warnings.into_inner(), 299 + }) 268 300 } 269 301 270 302 struct ConversionContext { 271 303 current_namespace: String, 304 + /// Non-fatal issues accumulated during conversion. Callers receive 305 + /// these via [`MlfGenerateOutput`] and decide what to do with them 306 + /// (print to stderr, collect for a summary, suppress). 307 + warnings: RefCell<Vec<ConversionWarning>>, 308 + } 309 + 310 + impl ConversionContext { 311 + fn warn(&self, message: impl Into<String>) { 312 + self.warnings.borrow_mut().push(ConversionWarning { 313 + namespace: self.current_namespace.clone(), 314 + message: message.into(), 315 + }); 316 + } 272 317 } 273 318 274 319 /// Reserved words in MLF that need to be escaped ··· 877 922 ctx: &ConversionContext, 878 923 indent_level: usize, 879 924 ) -> Result<Rendered, MlfGenerateError> { 880 - let items = type_def 881 - .get("items") 882 - .ok_or_else(|| MlfGenerateError::InvalidLexicon { 883 - message: "Missing 'items' in array type".to_string(), 884 - })?; 925 + // `items` is spec-required on `array` types. Handle a missing field 926 + // leniently — fall back to `unknown` as the item type, warn — to 927 + // match the lenient handling of `object` without `properties` and 928 + // empty-refs unions. Malformed-but-publishable lexicons stay 929 + // convertible instead of blocking the whole authority. 930 + let fallback_items = Value::Object(serde_json::Map::from_iter([( 931 + "type".to_string(), 932 + Value::String("unknown".to_string()), 933 + )])); 934 + let items = match type_def.get("items") { 935 + Some(v) => v, 936 + None => { 937 + ctx.warn( 938 + "array type is missing `items` field; \ 939 + treating item type as `unknown` (ATProto spec lists `items` as required)", 940 + ); 941 + &fallback_items 942 + } 943 + }; 885 944 let item = generate_type(items, ctx, indent_level)?; 886 945 let base = format!("{}[]", item.into_array_base()); 887 946 Ok(Rendered::with_constraints(&base, type_def, indent_level)) ··· 895 954 let obj = type_def.as_object().ok_or_else(|| MlfGenerateError::InvalidLexicon { 896 955 message: "Object type definition is not a JSON object".to_string(), 897 956 })?; 898 - let properties = obj 899 - .get("properties") 900 - .and_then(|v| v.as_object()) 901 - .ok_or_else(|| MlfGenerateError::InvalidLexicon { 902 - message: "Missing 'properties' in object type".to_string(), 903 - })?; 957 + 958 + // The spec lists `properties` as required on object types, but real- 959 + // world lexicons (e.g. blog.pckt.richtext.facet marker defs) publish 960 + // empty objects with no `properties` field at all. Accept that 961 + // leniently — semantically missing `properties` is equivalent to 962 + // `properties: {}` — but surface a warning so authors know their 963 + // lexicon isn't strictly spec-compliant. 964 + let empty_map = serde_json::Map::new(); 965 + let properties = match obj.get("properties") { 966 + Some(v) => v.as_object().ok_or_else(|| MlfGenerateError::InvalidLexicon { 967 + message: "`properties` in object type must be a JSON object".to_string(), 968 + })?, 969 + None => { 970 + ctx.warn( 971 + "object type is missing `properties` field; \ 972 + treating as empty (ATProto spec lists `properties` as required)", 973 + ); 974 + &empty_map 975 + } 976 + }; 904 977 let required = string_array(obj, "required"); 905 978 let nullable = string_array(obj, "nullable"); 906 979 ··· 977 1050 message: "Missing 'refs' in union type".to_string(), 978 1051 })?; 979 1052 1053 + let closed = type_def.get("closed").and_then(|v| v.as_bool()).unwrap_or(false); 1054 + 1055 + // An open union with zero refs is malformed per the ATProto spec — it 1056 + // names no valid types at all. Real-world lexicons (e.g. 1057 + // blog.pckt.content) publish this shape anyway; rather than refusing 1058 + // to convert we fall back to `unknown` with a warning, matching the 1059 + // lenient handling of `object` types without `properties`. 1060 + if refs.is_empty() && !closed { 1061 + ctx.warn( 1062 + "open union has no `refs`; emitting `unknown` as a placeholder \ 1063 + (ATProto spec lists `refs` as required on union types)", 1064 + ); 1065 + return Ok(Rendered::atom("unknown")); 1066 + } 1067 + 980 1068 // Each entry in `refs` is a string (local `#defName` or external 981 1069 // `namespace#defName`), not a nested type object. 982 1070 let parts: Vec<String> = refs ··· 988 1076 }) 989 1077 .collect(); 990 1078 991 - let closed = type_def.get("closed").and_then(|v| v.as_bool()).unwrap_or(false); 992 1079 let mut text = parts.join(" | "); 993 1080 if closed { 994 1081 text.push_str(" | !");
+2
tests/lexicon_to_mlf/lenient_array_without_items/expected.mlf
··· 1 + def type tags = unknown[]; 2 +
+1
tests/lexicon_to_mlf/lenient_array_without_items/expected_warnings.txt
··· 1 + com.example.arraynoitems: array type is missing `items` field; treating item type as `unknown` (ATProto spec lists `items` as required)
+9
tests/lexicon_to_mlf/lenient_array_without_items/input.json
··· 1 + { 2 + "lexicon": 1, 3 + "id": "com.example.arraynoitems", 4 + "defs": { 5 + "tags": { 6 + "type": "array" 7 + } 8 + } 9 + }
+2
tests/lexicon_to_mlf/lenient_empty_union_refs/expected.mlf
··· 1 + def type items = unknown[]; 2 +
+1
tests/lexicon_to_mlf/lenient_empty_union_refs/expected_warnings.txt
··· 1 + com.example.emptyunion: open union has no `refs`; emitting `unknown` as a placeholder (ATProto spec lists `refs` as required on union types)
+14
tests/lexicon_to_mlf/lenient_empty_union_refs/input.json
··· 1 + { 2 + "lexicon": 1, 3 + "id": "com.example.emptyunion", 4 + "defs": { 5 + "items": { 6 + "type": "array", 7 + "items": { 8 + "type": "union", 9 + "refs": [], 10 + "closed": false 11 + } 12 + } 13 + } 14 + }
+4
tests/lexicon_to_mlf/lenient_object_without_properties/expected.mlf
··· 1 + /// Marker for bold text 2 + def type bold = { 3 + }; 4 +
+1
tests/lexicon_to_mlf/lenient_object_without_properties/expected_warnings.txt
··· 1 + com.example.marker: object type is missing `properties` field; treating as empty (ATProto spec lists `properties` as required)
+10
tests/lexicon_to_mlf/lenient_object_without_properties/input.json
··· 1 + { 2 + "lexicon": 1, 3 + "id": "com.example.marker", 4 + "defs": { 5 + "bold": { 6 + "type": "object", 7 + "description": "Marker for bold text" 8 + } 9 + } 10 + }
+30 -3
tests/lexicon_to_mlf_integration.rs
··· 3 3 // Each subdirectory of `lexicon_to_mlf/` is a single test case containing: 4 4 // - `input.json`: the Lexicon JSON to convert 5 5 // - `expected.mlf`: the expected MLF source produced by the converter 6 + // - `expected_warnings.txt` (optional): the expected warnings, one 7 + // per line in `<namespace>: <message>` form. When absent, the test 8 + // asserts no warnings were produced. 6 9 7 10 use mlf_cli::generate::mlf::generate_mlf_from_json; 8 11 use serde_json::Value; ··· 18 21 let output = generate_mlf_from_json(&json).map_err(|e| format!("{:?}", e))?; 19 22 20 23 let expected = fs::read_to_string(test_dir.join("expected.mlf"))?; 24 + if output.mlf != expected { 25 + return Err(format!( 26 + "MLF mismatch:\n--- expected ---\n{}\n--- got ---\n{}", 27 + expected, output.mlf 28 + ) 29 + .into()); 30 + } 21 31 22 - if output != expected { 32 + let warnings_path = test_dir.join("expected_warnings.txt"); 33 + let expected_warnings = if warnings_path.exists() { 34 + fs::read_to_string(&warnings_path)? 35 + } else { 36 + String::new() 37 + }; 38 + let actual_warnings = output 39 + .warnings 40 + .iter() 41 + .map(|w| format!("{}: {}", w.namespace, w.message)) 42 + .collect::<Vec<_>>() 43 + .join("\n"); 44 + let actual_warnings = if actual_warnings.is_empty() { 45 + String::new() 46 + } else { 47 + format!("{}\n", actual_warnings) 48 + }; 49 + if actual_warnings != expected_warnings { 23 50 return Err(format!( 24 - "Output mismatch:\n--- expected ---\n{}\n--- got ---\n{}", 25 - expected, output 51 + "Warnings mismatch:\n--- expected ---\n{}\n--- got ---\n{}", 52 + expected_warnings, actual_warnings 26 53 ) 27 54 .into()); 28 55 }