A human-friendly DSL for ATProto Lexicons
27
fork

Configure Feed

Select the types of activity you want to include in your feed.

Treat required and nullable as unordered sets in tests

authored by stavola.xyz and committed by

Tangled 23681144 1e59547a

+54 -36
+54 -36
tests/real_world/roundtrip.rs
··· 285 285 ) -> ComparisonResult { 286 286 let mut acceptable_diffs = Vec::new(); 287 287 288 - // Lexicon authors use any of three syntactic forms for references 289 - // (`#foo`, `ns.path#foo`, bare `ns.path` for implicit-main), so we 290 - // rewrite both sides to a single canonical form before comparing. 291 - // This folds away C7/C8-style diffs that are purely stylistic. 288 + // Fold away purely-stylistic differences before comparing: the three 289 + // ATProto-equivalent ref forms, and set-shaped fields like `required` 290 + // and `nullable` whose element order carries no semantic meaning. 292 291 let lexicon_id = original 293 292 .get("id") 294 293 .and_then(|v| v.as_str()) ··· 296 295 .to_string(); 297 296 let mut original = original.clone(); 298 297 let mut generated = generated.clone(); 299 - canonicalize_refs(&mut original, &lexicon_id); 300 - canonicalize_refs(&mut generated, &lexicon_id); 298 + canonicalize_lexicon(&mut original, &lexicon_id); 299 + canonicalize_lexicon(&mut generated, &lexicon_id); 301 300 302 301 let original_stripped = strip_dollar_type(&original); 303 302 let generated_stripped = strip_dollar_type(&generated); ··· 316 315 ComparisonResult::Failure("Structural differences detected".to_string()) 317 316 } 318 317 319 - /// Rewrite every reference string inside `value` to its canonical 320 - /// `authority#defName` form, so the three syntactic variants compare 321 - /// equal: 318 + /// Walk `value` and rewrite every node that carries semantic-equivalence 319 + /// noise into a single canonical form. This exists because the 320 + /// authoritative lexicons we roundtrip against exercise author-chosen 321 + /// styles that ATProto treats as equivalent; byte comparison would flag 322 + /// them as diffs even though the lexicons are identical in meaning. 322 323 /// 323 - /// * `#foo` → `<lexicon_id>#foo` 324 - /// * `ns.path#foo` → `ns.path#foo` (unchanged) 325 - /// * `ns.path` (bare NSID) → `ns.path#main` (ATProto spec shorthand) 324 + /// Currently folds: 326 325 /// 327 - /// The walk descends into every value, but only rewrites strings found 328 - /// at well-known ref positions (`{"type":"ref", "ref": ...}` and the 329 - /// `refs` array of a union). 330 - fn canonicalize_refs(value: &mut serde_json::Value, lexicon_id: &str) { 326 + /// * The three ATProto reference forms — local `#foo`, explicit 327 + /// `ns#foo`, bare `ns` (= `ns#main`) — into `ns#foo` at every ref site. 328 + /// * Set-shaped string arrays (`required`, `nullable`) — ATProto defines 329 + /// these as sets, so their element order carries no meaning — into a 330 + /// sorted order at every object. 331 + fn canonicalize_lexicon(value: &mut serde_json::Value, lexicon_id: &str) { 331 332 match value { 332 333 serde_json::Value::Object(obj) => { 333 - let kind = obj.get("type").and_then(|v| v.as_str()).map(str::to_owned); 334 - match kind.as_deref() { 335 - Some("ref") => { 336 - if let Some(serde_json::Value::String(s)) = obj.get_mut("ref") { 337 - *s = canonicalize_ref_string(s, lexicon_id); 338 - } 339 - } 340 - Some("union") => { 341 - if let Some(serde_json::Value::Array(arr)) = obj.get_mut("refs") { 342 - for item in arr.iter_mut() { 343 - if let serde_json::Value::String(s) = item { 344 - *s = canonicalize_ref_string(s, lexicon_id); 345 - } 346 - } 347 - } 348 - } 349 - _ => {} 350 - } 334 + canonicalize_ref_site(obj, lexicon_id); 335 + sort_set_arrays(obj); 351 336 for (_, v) in obj.iter_mut() { 352 - canonicalize_refs(v, lexicon_id); 337 + canonicalize_lexicon(v, lexicon_id); 353 338 } 354 339 } 355 340 serde_json::Value::Array(arr) => { 356 341 for v in arr.iter_mut() { 357 - canonicalize_refs(v, lexicon_id); 342 + canonicalize_lexicon(v, lexicon_id); 343 + } 344 + } 345 + _ => {} 346 + } 347 + } 348 + 349 + /// If `obj` is a ref or union node, rewrite its ref strings to canonical 350 + /// `authority#defName` form. 351 + fn canonicalize_ref_site(obj: &mut serde_json::Map<String, serde_json::Value>, lexicon_id: &str) { 352 + match obj.get("type").and_then(|v| v.as_str()) { 353 + Some("ref") => { 354 + if let Some(serde_json::Value::String(s)) = obj.get_mut("ref") { 355 + *s = canonicalize_ref_string(s, lexicon_id); 356 + } 357 + } 358 + Some("union") => { 359 + if let Some(serde_json::Value::Array(arr)) = obj.get_mut("refs") { 360 + for item in arr.iter_mut() { 361 + if let serde_json::Value::String(s) = item { 362 + *s = canonicalize_ref_string(s, lexicon_id); 363 + } 364 + } 358 365 } 359 366 } 360 367 _ => {} 368 + } 369 + } 370 + 371 + /// ATProto defines `required` and `nullable` as sets of field names. 372 + /// Sort them in place so two lexicons that list the same field names in 373 + /// different orders compare equal. 374 + fn sort_set_arrays(obj: &mut serde_json::Map<String, serde_json::Value>) { 375 + for key in ["required", "nullable"] { 376 + if let Some(serde_json::Value::Array(arr)) = obj.get_mut(key) { 377 + arr.sort_by(|a, b| a.as_str().unwrap_or("").cmp(b.as_str().unwrap_or(""))); 378 + } 361 379 } 362 380 } 363 381