···448448 println!(" → Saved JSON (checksum verified)");
449449450450 // Convert to MLF
451451- let mlf_content = crate::generate::mlf::generate_mlf_from_json(&fetched.lexicon)
451451+ let converted = crate::generate::mlf::generate_mlf_from_json(&fetched.lexicon)
452452 .map_err(|e| FetchError::ConversionError(format!("{:?}", e)))?;
453453+ for warning in &converted.warnings {
454454+ eprintln!(" ⚠ {}: {}", warning.namespace, warning.message);
455455+ }
453456454457 let mut mlf_path = mlf_dir.join("lexicons/mlf");
455458 for segment in nsid.split('.') {
···460463 if let Some(parent) = mlf_path.parent() {
461464 std::fs::create_dir_all(parent)?;
462465 }
463463- std::fs::write(&mlf_path, mlf_content)?;
466466+ std::fs::write(&mlf_path, converted.mlf)?;
464467 println!(" → Converted to MLF");
465468466469 Ok(())
···531534 println!(" → Saved JSON to {}", json_path.display());
532535533536 // Convert to MLF
534534- let mlf_content = crate::generate::mlf::generate_mlf_from_json(&fetched.lexicon)
537537+ let converted = crate::generate::mlf::generate_mlf_from_json(&fetched.lexicon)
535538 .map_err(|e| FetchError::ConversionError(format!("{:?}", e)))?;
539539+ for warning in &converted.warnings {
540540+ eprintln!(" ⚠ {}: {}", warning.namespace, warning.message);
541541+ }
536542537543 // Save MLF file
538544 let mut mlf_path = mlf_dir.join("lexicons/mlf");
···544550 if let Some(parent) = mlf_path.parent() {
545551 std::fs::create_dir_all(parent)?;
546552 }
547547- std::fs::write(&mlf_path, mlf_content)?;
553553+ std::fs::write(&mlf_path, converted.mlf)?;
548554 println!(" → Converted to MLF at {}", mlf_path.display());
549555550556 // Calculate hash and extract dependencies for lockfile
+103-16
mlf-cli/src/generate/mlf.rs
···11use miette::Diagnostic;
22use serde_json::Value;
33+use std::cell::RefCell;
34use std::path::PathBuf;
45use thiserror::Error;
66+77+/// A non-fatal issue surfaced by the Lexicon→MLF converter. Produced
88+/// when the source lexicon is malformed in a way we can recover from
99+/// (typically: missing spec-required fields we coerce to empty /
1010+/// fall-back values). Callers decide whether to print, log, or ignore.
1111+#[derive(Debug, Clone, PartialEq, Eq)]
1212+pub struct ConversionWarning {
1313+ /// Namespace of the lexicon the warning came from.
1414+ pub namespace: String,
1515+ /// Human-readable description of what we coerced and why.
1616+ pub message: String,
1717+}
1818+1919+/// Output of [`generate_mlf_from_json`]. Carries both the rendered MLF
2020+/// and any non-fatal warnings accumulated during conversion.
2121+#[derive(Debug, Clone)]
2222+pub struct MlfGenerateOutput {
2323+ pub mlf: String,
2424+ pub warnings: Vec<ConversionWarning>,
2525+}
526627#[derive(Error, Debug, Diagnostic)]
728pub enum MlfGenerateError {
···131152 }
132153 };
133154134134- let mlf_content = match generate_mlf_from_json(&json) {
135135- Ok(content) => content,
155155+ let output = match generate_mlf_from_json(&json) {
156156+ Ok(output) => output,
136157 Err(e) => {
137158 errors.push((file_path.display().to_string(), format!("{:?}", e)));
138159 continue;
139160 }
140161 };
162162+ for warning in &output.warnings {
163163+ eprintln!(
164164+ "warning ({}): {}",
165165+ warning.namespace, warning.message
166166+ );
167167+ }
168168+ let mlf_content = output.mlf;
141169142170 // Extract namespace from JSON "id" field
143171 let namespace = json
···197225 Ok(())
198226}
199227200200-pub fn generate_mlf_from_json(json: &Value) -> Result<String, MlfGenerateError> {
228228+pub fn generate_mlf_from_json(json: &Value) -> Result<MlfGenerateOutput, MlfGenerateError> {
201229 let mut output = String::new();
202230203231 // Extract NSID to get the last segment for "main" definitions
···219247 // Create a context to pass the current namespace to type generation
220248 let ctx = ConversionContext {
221249 current_namespace: nsid.to_string(),
250250+ warnings: RefCell::new(Vec::new()),
222251 };
223252224253 // Process all definitions
···264293 }
265294 }
266295267267- Ok(output)
296296+ Ok(MlfGenerateOutput {
297297+ mlf: output,
298298+ warnings: ctx.warnings.into_inner(),
299299+ })
268300}
269301270302struct ConversionContext {
271303 current_namespace: String,
304304+ /// Non-fatal issues accumulated during conversion. Callers receive
305305+ /// these via [`MlfGenerateOutput`] and decide what to do with them
306306+ /// (print to stderr, collect for a summary, suppress).
307307+ warnings: RefCell<Vec<ConversionWarning>>,
308308+}
309309+310310+impl ConversionContext {
311311+ fn warn(&self, message: impl Into<String>) {
312312+ self.warnings.borrow_mut().push(ConversionWarning {
313313+ namespace: self.current_namespace.clone(),
314314+ message: message.into(),
315315+ });
316316+ }
272317}
273318274319/// Reserved words in MLF that need to be escaped
···877922 ctx: &ConversionContext,
878923 indent_level: usize,
879924) -> Result<Rendered, MlfGenerateError> {
880880- let items = type_def
881881- .get("items")
882882- .ok_or_else(|| MlfGenerateError::InvalidLexicon {
883883- message: "Missing 'items' in array type".to_string(),
884884- })?;
925925+ // `items` is spec-required on `array` types. Handle a missing field
926926+ // leniently — fall back to `unknown` as the item type, warn — to
927927+ // match the lenient handling of `object` without `properties` and
928928+ // empty-refs unions. Malformed-but-publishable lexicons stay
929929+ // convertible instead of blocking the whole authority.
930930+ let fallback_items = Value::Object(serde_json::Map::from_iter([(
931931+ "type".to_string(),
932932+ Value::String("unknown".to_string()),
933933+ )]));
934934+ let items = match type_def.get("items") {
935935+ Some(v) => v,
936936+ None => {
937937+ ctx.warn(
938938+ "array type is missing `items` field; \
939939+ treating item type as `unknown` (ATProto spec lists `items` as required)",
940940+ );
941941+ &fallback_items
942942+ }
943943+ };
885944 let item = generate_type(items, ctx, indent_level)?;
886945 let base = format!("{}[]", item.into_array_base());
887946 Ok(Rendered::with_constraints(&base, type_def, indent_level))
···895954 let obj = type_def.as_object().ok_or_else(|| MlfGenerateError::InvalidLexicon {
896955 message: "Object type definition is not a JSON object".to_string(),
897956 })?;
898898- let properties = obj
899899- .get("properties")
900900- .and_then(|v| v.as_object())
901901- .ok_or_else(|| MlfGenerateError::InvalidLexicon {
902902- message: "Missing 'properties' in object type".to_string(),
903903- })?;
957957+958958+ // The spec lists `properties` as required on object types, but real-
959959+ // world lexicons (e.g. blog.pckt.richtext.facet marker defs) publish
960960+ // empty objects with no `properties` field at all. Accept that
961961+ // leniently — semantically missing `properties` is equivalent to
962962+ // `properties: {}` — but surface a warning so authors know their
963963+ // lexicon isn't strictly spec-compliant.
964964+ let empty_map = serde_json::Map::new();
965965+ let properties = match obj.get("properties") {
966966+ Some(v) => v.as_object().ok_or_else(|| MlfGenerateError::InvalidLexicon {
967967+ message: "`properties` in object type must be a JSON object".to_string(),
968968+ })?,
969969+ None => {
970970+ ctx.warn(
971971+ "object type is missing `properties` field; \
972972+ treating as empty (ATProto spec lists `properties` as required)",
973973+ );
974974+ &empty_map
975975+ }
976976+ };
904977 let required = string_array(obj, "required");
905978 let nullable = string_array(obj, "nullable");
906979···9771050 message: "Missing 'refs' in union type".to_string(),
9781051 })?;
979105210531053+ let closed = type_def.get("closed").and_then(|v| v.as_bool()).unwrap_or(false);
10541054+10551055+ // An open union with zero refs is malformed per the ATProto spec — it
10561056+ // names no valid types at all. Real-world lexicons (e.g.
10571057+ // blog.pckt.content) publish this shape anyway; rather than refusing
10581058+ // to convert we fall back to `unknown` with a warning, matching the
10591059+ // lenient handling of `object` types without `properties`.
10601060+ if refs.is_empty() && !closed {
10611061+ ctx.warn(
10621062+ "open union has no `refs`; emitting `unknown` as a placeholder \
10631063+ (ATProto spec lists `refs` as required on union types)",
10641064+ );
10651065+ return Ok(Rendered::atom("unknown"));
10661066+ }
10671067+9801068 // Each entry in `refs` is a string (local `#defName` or external
9811069 // `namespace#defName`), not a nested type object.
9821070 let parts: Vec<String> = refs
···9881076 })
9891077 .collect();
9901078991991- let closed = type_def.get("closed").and_then(|v| v.as_bool()).unwrap_or(false);
9921079 let mut text = parts.join(" | ");
9931080 if closed {
9941081 text.push_str(" | !");