···285285) -> ComparisonResult {
286286 let mut acceptable_diffs = Vec::new();
287287288288- // Lexicon authors use any of three syntactic forms for references
289289- // (`#foo`, `ns.path#foo`, bare `ns.path` for implicit-main), so we
290290- // rewrite both sides to a single canonical form before comparing.
291291- // This folds away C7/C8-style diffs that are purely stylistic.
288288+ // Fold away purely-stylistic differences before comparing: the three
289289+ // ATProto-equivalent ref forms, and set-shaped fields like `required`
290290+ // and `nullable` whose element order carries no semantic meaning.
292291 let lexicon_id = original
293292 .get("id")
294293 .and_then(|v| v.as_str())
···296295 .to_string();
297296 let mut original = original.clone();
298297 let mut generated = generated.clone();
299299- canonicalize_refs(&mut original, &lexicon_id);
300300- canonicalize_refs(&mut generated, &lexicon_id);
298298+ canonicalize_lexicon(&mut original, &lexicon_id);
299299+ canonicalize_lexicon(&mut generated, &lexicon_id);
301300302301 let original_stripped = strip_dollar_type(&original);
303302 let generated_stripped = strip_dollar_type(&generated);
···316315 ComparisonResult::Failure("Structural differences detected".to_string())
317316}
318317319319-/// Rewrite every reference string inside `value` to its canonical
320320-/// `authority#defName` form, so the three syntactic variants compare
321321-/// equal:
318318+/// Walk `value` and rewrite every node that carries semantic-equivalence
319319+/// noise into a single canonical form. This exists because the
320320+/// authoritative lexicons we roundtrip against exercise author-chosen
321321+/// styles that ATProto treats as equivalent; byte comparison would flag
322322+/// them as diffs even though the lexicons are identical in meaning.
322323///
323323-/// * `#foo` → `<lexicon_id>#foo`
324324-/// * `ns.path#foo` → `ns.path#foo` (unchanged)
325325-/// * `ns.path` (bare NSID) → `ns.path#main` (ATProto spec shorthand)
324324+/// Currently folds:
326325///
327327-/// The walk descends into every value, but only rewrites strings found
328328-/// at well-known ref positions (`{"type":"ref", "ref": ...}` and the
329329-/// `refs` array of a union).
330330-fn canonicalize_refs(value: &mut serde_json::Value, lexicon_id: &str) {
326326+/// * The three ATProto reference forms — local `#foo`, explicit
327327+/// `ns#foo`, bare `ns` (= `ns#main`) — into `ns#foo` at every ref site.
328328+/// * Set-shaped string arrays (`required`, `nullable`) — ATProto defines
329329+/// these as sets, so their element order carries no meaning — into a
330330+/// sorted order at every object.
331331+fn canonicalize_lexicon(value: &mut serde_json::Value, lexicon_id: &str) {
331332 match value {
332333 serde_json::Value::Object(obj) => {
333333- let kind = obj.get("type").and_then(|v| v.as_str()).map(str::to_owned);
334334- match kind.as_deref() {
335335- Some("ref") => {
336336- if let Some(serde_json::Value::String(s)) = obj.get_mut("ref") {
337337- *s = canonicalize_ref_string(s, lexicon_id);
338338- }
339339- }
340340- Some("union") => {
341341- if let Some(serde_json::Value::Array(arr)) = obj.get_mut("refs") {
342342- for item in arr.iter_mut() {
343343- if let serde_json::Value::String(s) = item {
344344- *s = canonicalize_ref_string(s, lexicon_id);
345345- }
346346- }
347347- }
348348- }
349349- _ => {}
350350- }
334334+ canonicalize_ref_site(obj, lexicon_id);
335335+ sort_set_arrays(obj);
351336 for (_, v) in obj.iter_mut() {
352352- canonicalize_refs(v, lexicon_id);
337337+ canonicalize_lexicon(v, lexicon_id);
353338 }
354339 }
355340 serde_json::Value::Array(arr) => {
356341 for v in arr.iter_mut() {
357357- canonicalize_refs(v, lexicon_id);
342342+ canonicalize_lexicon(v, lexicon_id);
343343+ }
344344+ }
345345+ _ => {}
346346+ }
347347+}
348348+349349+/// If `obj` is a ref or union node, rewrite its ref strings to canonical
350350+/// `authority#defName` form.
351351+fn canonicalize_ref_site(obj: &mut serde_json::Map<String, serde_json::Value>, lexicon_id: &str) {
352352+ match obj.get("type").and_then(|v| v.as_str()) {
353353+ Some("ref") => {
354354+ if let Some(serde_json::Value::String(s)) = obj.get_mut("ref") {
355355+ *s = canonicalize_ref_string(s, lexicon_id);
356356+ }
357357+ }
358358+ Some("union") => {
359359+ if let Some(serde_json::Value::Array(arr)) = obj.get_mut("refs") {
360360+ for item in arr.iter_mut() {
361361+ if let serde_json::Value::String(s) = item {
362362+ *s = canonicalize_ref_string(s, lexicon_id);
363363+ }
364364+ }
358365 }
359366 }
360367 _ => {}
368368+ }
369369+}
370370+371371+/// ATProto defines `required` and `nullable` as sets of field names.
372372+/// Sort them in place so two lexicons that list the same field names in
373373+/// different orders compare equal.
374374+fn sort_set_arrays(obj: &mut serde_json::Map<String, serde_json::Value>) {
375375+ for key in ["required", "nullable"] {
376376+ if let Some(serde_json::Value::Array(arr)) = obj.get_mut(key) {
377377+ arr.sort_by(|a, b| a.as_str().unwrap_or("").cmp(b.as_str().unwrap_or("")));
378378+ }
361379 }
362380}
363381