forked from
stavola.xyz/mlf
A human-friendly DSL for ATProto Lexicons
1use crate::config::{find_project_root, get_mlf_cache_dir, init_mlf_cache, ConfigError, MlfConfig, LockFile};
2use mlf_lexicon_fetcher::{optimize_fetch_patterns, ProductionLexiconFetcher};
3use miette::Diagnostic;
4use sha2::{Digest, Sha256};
5use std::collections::HashSet;
6use thiserror::Error;
7
8#[derive(Error, Debug, Diagnostic)]
9pub enum FetchError {
10 #[error("Failed to find project root")]
11 #[diagnostic(code(mlf::fetch::no_project_root))]
12 NoProjectRoot(#[from] ConfigError),
13
14 #[error("Failed to create .mlf directory: {0}")]
15 #[diagnostic(code(mlf::fetch::init_failed))]
16 InitFailed(#[source] std::io::Error),
17
18 #[error("Failed to fetch lexicon from ATProto repo: {0}")]
19 #[diagnostic(code(mlf::fetch::http_error))]
20 HttpError(String),
21
22 #[error("Failed to parse lexicon JSON: {0}")]
23 #[diagnostic(code(mlf::fetch::parse_error))]
24 ParseError(#[from] serde_json::Error),
25
26 #[error("Failed to convert lexicon to MLF: {0}")]
27 #[diagnostic(code(mlf::fetch::conversion_error))]
28 ConversionError(String),
29
30 #[error("IO error: {0}")]
31 #[diagnostic(code(mlf::fetch::io_error))]
32 IoError(#[from] std::io::Error),
33
34 #[error("Invalid NSID format: {0}")]
35 #[diagnostic(code(mlf::fetch::invalid_nsid))]
36 InvalidNsid(String),
37}
38
39
40
41/// Main entry point for fetch command
42pub async fn run_fetch(nsid: Option<String>, save: bool, update: bool, locked: bool) -> Result<(), FetchError> {
43 // Validate flags
44 if update && locked {
45 return Err(FetchError::HttpError(
46 "Cannot use --update and --locked together".to_string()
47 ));
48 }
49
50 // Find project root
51 let current_dir = std::env::current_dir()?;
52 let project_root = ensure_project_root(¤t_dir)?;
53
54 match nsid {
55 Some(namespace) => {
56 // Fetch single namespace with transitive dependencies
57 let lockfile_path = project_root.join("mlf-lock.toml");
58 let mut lockfile = LockFile::load(&lockfile_path).unwrap_or_else(|_| LockFile::new());
59
60 // Load config to check if transitive deps are enabled
61 let config_path = project_root.join("mlf.toml");
62 let config = MlfConfig::load(&config_path).map_err(FetchError::NoProjectRoot)?;
63
64 fetch_lexicon_with_lock(&namespace, &project_root, &mut lockfile).await?;
65
66 // Handle transitive dependencies if enabled
67 if config.dependencies.allow_transitive_deps {
68 println!("\n→ Checking for transitive dependencies...");
69 fetch_transitive_dependencies(
70 &project_root,
71 &mut lockfile,
72 config.dependencies.optimize_transitive_fetches
73 ).await?;
74 }
75
76 // Save lockfile
77 lockfile.save(&lockfile_path).map_err(FetchError::NoProjectRoot)?;
78 println!("\n→ Updated mlf-lock.toml");
79
80 // Save to mlf.toml if --save flag is provided
81 if save {
82 save_dependency(&project_root, &namespace)?;
83 }
84
85 Ok(())
86 }
87 None => {
88 // Fetch all dependencies from mlf.toml
89 fetch_all_dependencies(&project_root, update, locked).await
90 }
91 }
92}
93
94fn ensure_project_root(current_dir: &std::path::Path) -> Result<std::path::PathBuf, FetchError> {
95 match find_project_root(current_dir) {
96 Ok(root) => Ok(root),
97 Err(ConfigError::NotFound) => {
98 // Ask user if they want to create mlf.toml
99 eprintln!("No mlf.toml found in current or parent directories.");
100 eprintln!("Would you like to create one in the current directory? (y/n)");
101
102 let mut input = String::new();
103 std::io::stdin()
104 .read_line(&mut input)
105 .map_err(FetchError::InitFailed)?;
106
107 if input.trim().to_lowercase() == "y" {
108 let config_path = current_dir.join("mlf.toml");
109 MlfConfig::create_default(&config_path).map_err(FetchError::NoProjectRoot)?;
110 println!("Created mlf.toml in {}", current_dir.display());
111 Ok(current_dir.to_path_buf())
112 } else {
113 Err(FetchError::NoProjectRoot(ConfigError::NotFound))
114 }
115 }
116 Err(e) => Err(FetchError::NoProjectRoot(e)),
117 }
118}
119
120async fn fetch_all_dependencies(project_root: &std::path::Path, update: bool, locked: bool) -> Result<(), FetchError> {
121 // Load mlf.toml
122 let config_path = project_root.join("mlf.toml");
123 let config = MlfConfig::load(&config_path).map_err(FetchError::NoProjectRoot)?;
124
125 if config.dependencies.dependencies.is_empty() {
126 println!("No dependencies found in mlf.toml");
127 return Ok(());
128 }
129
130 let allow_transitive = config.dependencies.allow_transitive_deps;
131
132 // Load or create lockfile
133 let lockfile_path = project_root.join("mlf-lock.toml");
134 let existing_lockfile = LockFile::load(&lockfile_path).map_err(FetchError::NoProjectRoot)?;
135 let has_existing_lockfile = lockfile_path.exists() && !existing_lockfile.lexicons.is_empty();
136
137 // Handle --locked mode
138 if locked {
139 if !has_existing_lockfile {
140 return Err(FetchError::HttpError(
141 "No lockfile found. Run `mlf fetch` first to create mlf-lock.toml".to_string()
142 ));
143 }
144
145 // In locked mode, we use the lockfile and verify nothing needs updating
146 // For now, we'll just use the lockfile - verification can be enhanced later
147 println!("Using locked dependencies from mlf-lock.toml");
148 return fetch_from_lockfile(project_root, &existing_lockfile).await;
149 }
150
151 // Determine fetch mode
152 let mode = if update {
153 "update (ignoring lockfile)"
154 } else if has_existing_lockfile {
155 "lockfile"
156 } else {
157 "fresh"
158 };
159
160 println!("Fetching {} dependencies... (mode: {}, transitive deps: {})",
161 config.dependencies.dependencies.len(),
162 mode,
163 if allow_transitive { "enabled" } else { "disabled" });
164
165 // In update mode or if no lockfile, do full fetch
166 // In normal mode with lockfile, use lockfile for cached entries
167 let mut lockfile = if update || !has_existing_lockfile {
168 LockFile::new()
169 } else {
170 existing_lockfile
171 };
172
173 let mut errors = Vec::new();
174 let mut success_count = 0;
175 let mut fetched_nsids = HashSet::new();
176
177 // Fetch initial dependencies
178 for dep in &config.dependencies.dependencies {
179 println!("\nFetching: {}", dep);
180 match fetch_lexicon_with_lock(dep, project_root, &mut lockfile).await {
181 Ok(()) => {
182 success_count += 1;
183 fetched_nsids.insert(dep.clone());
184 }
185 Err(e) => {
186 errors.push((dep.clone(), format!("{}", e)));
187 }
188 }
189 }
190
191 // If transitive dependencies are enabled, fetch them
192 if allow_transitive {
193 fetch_transitive_dependencies(&project_root, &mut lockfile, config.dependencies.optimize_transitive_fetches).await?;
194 }
195
196 // Save the lockfile
197 lockfile.save(&lockfile_path).map_err(FetchError::NoProjectRoot)?;
198 println!("\n→ Updated mlf-lock.toml");
199
200 if !errors.is_empty() {
201 eprintln!(
202 "\n{} dependency(ies) fetched successfully, {} error(s):",
203 success_count,
204 errors.len()
205 );
206 for (dep, error) in &errors {
207 eprintln!(" {} - {}", dep, error);
208 }
209 return Err(FetchError::HttpError(format!(
210 "Failed to fetch {} dependencies",
211 errors.len()
212 )));
213 }
214
215 println!("\n✓ Successfully fetched all {} dependencies", success_count);
216 Ok(())
217}
218
219/// Fetch transitive dependencies by iteratively resolving unresolved references
220async fn fetch_transitive_dependencies(
221 project_root: &std::path::Path,
222 lockfile: &mut LockFile,
223 optimize_fetches: bool
224) -> Result<(), FetchError> {
225 let mut fetched_nsids = HashSet::new();
226 // Track NSIDs from lockfile as already fetched
227 for nsid in lockfile.lexicons.keys() {
228 fetched_nsids.insert(nsid.clone());
229 }
230
231 let mut iteration = 0;
232 const MAX_ITERATIONS: usize = 10;
233
234 loop {
235 iteration += 1;
236 if iteration > MAX_ITERATIONS {
237 eprintln!("\nWarning: Reached maximum iteration limit for transitive dependencies");
238 break;
239 }
240
241 // Collect unresolved references
242 let unresolved = match collect_unresolved_references(project_root) {
243 Ok(refs) => refs,
244 Err(e) => {
245 eprintln!("\nWarning: Failed to analyze dependencies: {}", e);
246 break;
247 }
248 };
249
250 // Filter out NSIDs we've already fetched or tried to fetch
251 let new_deps: HashSet<String> = unresolved
252 .into_iter()
253 .filter(|nsid| !fetched_nsids.contains(nsid))
254 .collect();
255
256 if new_deps.is_empty() {
257 break;
258 }
259
260 if optimize_fetches {
261 // Optimize the fetch patterns to reduce number of fetches
262 let optimized_patterns = optimize_fetch_patterns(&new_deps);
263
264 println!("\n→ Found {} unresolved reference(s), fetching {} optimized pattern(s)...",
265 new_deps.len(), optimized_patterns.len());
266
267 // Track which patterns are wildcards and their constituent NSIDs
268 let mut wildcard_failures: Vec<(String, Vec<String>)> = Vec::new();
269
270 for pattern in optimized_patterns {
271 let is_wildcard = pattern.ends_with(".*");
272 println!("\nFetching transitive dependency: {}", pattern);
273 fetched_nsids.insert(pattern.clone());
274
275 match fetch_lexicon_with_lock(&pattern, project_root, lockfile).await {
276 Ok(()) => {}
277 Err(e) => {
278 eprintln!(" Warning: Failed to fetch {}: {}", pattern, e);
279
280 // If this was a wildcard that failed, collect the individual NSIDs for retry
281 if is_wildcard {
282 let pattern_prefix = pattern.strip_suffix(".*").unwrap();
283 let matching_nsids: Vec<String> = new_deps.iter()
284 .filter(|nsid| nsid.starts_with(pattern_prefix))
285 .cloned()
286 .collect();
287
288 if !matching_nsids.is_empty() {
289 wildcard_failures.push((pattern.clone(), matching_nsids));
290 }
291 }
292 }
293 }
294 }
295
296 // Retry failed wildcards with individual NSIDs
297 if !wildcard_failures.is_empty() {
298 println!("\n→ Retrying failed wildcard patterns with individual NSIDs...");
299
300 for (failed_pattern, nsids) in wildcard_failures {
301 println!(" Retrying {} NSIDs from failed pattern: {}", nsids.len(), failed_pattern);
302
303 for nsid in nsids {
304 if !fetched_nsids.contains(&nsid) {
305 println!(" Fetching: {}", nsid);
306 fetched_nsids.insert(nsid.clone());
307
308 match fetch_lexicon_with_lock(&nsid, project_root, lockfile).await {
309 Ok(()) => {}
310 Err(e) => {
311 eprintln!(" Warning: Failed to fetch {}: {}", nsid, e);
312 }
313 }
314 }
315 }
316 }
317 }
318 } else {
319 // Fetch individually without optimization (safer, more predictable)
320 println!("\n→ Found {} unresolved reference(s), fetching individually...",
321 new_deps.len());
322
323 for nsid in &new_deps {
324 println!("\nFetching transitive dependency: {}", nsid);
325 fetched_nsids.insert(nsid.clone());
326
327 match fetch_lexicon_with_lock(nsid, project_root, lockfile).await {
328 Ok(()) => {}
329 Err(e) => {
330 // Don't fail the entire fetch for transitive deps
331 eprintln!(" Warning: Failed to fetch {}: {}", nsid, e);
332 }
333 }
334 }
335 }
336 }
337
338 Ok(())
339}
340
341/// Fetch dependencies using the lockfile
342/// This refetches each lexicon from its recorded DID and verifies the checksum
343async fn fetch_from_lockfile(project_root: &std::path::Path, lockfile: &LockFile) -> Result<(), FetchError> {
344 if lockfile.lexicons.is_empty() {
345 println!("Lockfile is empty");
346 return Ok(());
347 }
348
349 println!("Fetching {} lexicon(s) from lockfile...", lockfile.lexicons.len());
350
351 let mut errors = Vec::new();
352 let mut success_count = 0;
353
354 // Fetch each lexicon from its DID
355 for (nsid, locked) in &lockfile.lexicons {
356 println!("\nRefetching: {}", nsid);
357
358 // Fetch the lexicon using the DID from lockfile
359 match fetch_specific_lexicon(nsid, &locked.did, &locked.checksum, project_root).await {
360 Ok(()) => {
361 success_count += 1;
362 }
363 Err(e) => {
364 errors.push((nsid.clone(), format!("{}", e)));
365 }
366 }
367 }
368
369 if !errors.is_empty() {
370 eprintln!(
371 "\n{} lexicon(s) fetched successfully, {} error(s):",
372 success_count,
373 errors.len()
374 );
375 for (nsid, error) in &errors {
376 eprintln!(" {} - {}", nsid, error);
377 }
378 return Err(FetchError::HttpError(format!(
379 "Failed to fetch {} lexicons",
380 errors.len()
381 )));
382 }
383
384 println!("\n✓ Successfully fetched all {} lexicons", success_count);
385 Ok(())
386}
387
388/// Fetch a specific lexicon by NSID from a known DID, verifying checksum
389async fn fetch_specific_lexicon(
390 nsid: &str,
391 did: &str,
392 expected_checksum: &str,
393 project_root: &std::path::Path,
394) -> Result<(), FetchError> {
395 // Initialize .mlf directory
396 init_mlf_cache(project_root).map_err(FetchError::InitFailed)?;
397 let mlf_dir = get_mlf_cache_dir(project_root);
398
399 // Create fetcher and fetch from known DID (bypassing DNS)
400 let fetcher = ProductionLexiconFetcher::production()
401 .await
402 .map_err(|e| FetchError::HttpError(format!("Failed to create fetcher: {}", e)))?;
403
404 let result = fetcher
405 .fetch_from_did_with_metadata(did, nsid)
406 .await
407 .map_err(|e| FetchError::HttpError(format!("Failed to fetch from DID: {}", e)))?;
408
409 if result.lexicons.is_empty() {
410 return Err(FetchError::HttpError(format!(
411 "Lexicon {} not found in repo {}",
412 nsid, did
413 )));
414 }
415
416 // We should only get one lexicon for an exact NSID match
417 let fetched = &result.lexicons[0];
418
419 if fetched.nsid != nsid {
420 return Err(FetchError::HttpError(format!(
421 "Expected lexicon {}, but got {}",
422 nsid, fetched.nsid
423 )));
424 }
425
426 // Verify checksum
427 let json_str = serde_json::to_string_pretty(&fetched.lexicon)?;
428 let hash = calculate_hash(&json_str);
429
430 if hash != expected_checksum {
431 return Err(FetchError::HttpError(format!(
432 "Checksum mismatch for {}: expected {}, got {}",
433 nsid, expected_checksum, hash
434 )));
435 }
436
437 // Save JSON
438 let mut json_path = mlf_dir.join("lexicons/json");
439 for segment in nsid.split('.') {
440 json_path.push(segment);
441 }
442 json_path.set_extension("json");
443
444 if let Some(parent) = json_path.parent() {
445 std::fs::create_dir_all(parent)?;
446 }
447 std::fs::write(&json_path, &json_str)?;
448 println!(" → Saved JSON (checksum verified)");
449
450 // Convert to MLF
451 let mlf_content = crate::generate::mlf::generate_mlf_from_json(&fetched.lexicon)
452 .map_err(|e| FetchError::ConversionError(format!("{:?}", e)))?;
453
454 let mut mlf_path = mlf_dir.join("lexicons/mlf");
455 for segment in nsid.split('.') {
456 mlf_path.push(segment);
457 }
458 mlf_path.set_extension("mlf");
459
460 if let Some(parent) = mlf_path.parent() {
461 std::fs::create_dir_all(parent)?;
462 }
463 std::fs::write(&mlf_path, mlf_content)?;
464 println!(" → Converted to MLF");
465
466 Ok(())
467}
468
469fn save_dependency(project_root: &std::path::Path, nsid: &str) -> Result<(), FetchError> {
470 let config_path = project_root.join("mlf.toml");
471 let mut config = MlfConfig::load(&config_path).map_err(FetchError::NoProjectRoot)?;
472
473 if config.dependencies.dependencies.contains(&nsid.to_string()) {
474 println!("Dependency '{}' already in mlf.toml", nsid);
475 return Ok(());
476 }
477
478 config.dependencies.dependencies.push(nsid.to_string());
479 config.save(&config_path).map_err(FetchError::NoProjectRoot)?;
480
481 println!("Added '{}' to dependencies in mlf.toml", nsid);
482 Ok(())
483}
484
485async fn fetch_lexicon_with_lock(nsid: &str, project_root: &std::path::Path, lockfile: &mut LockFile) -> Result<(), FetchError> {
486 // Initialize .mlf directory
487 init_mlf_cache(project_root).map_err(FetchError::InitFailed)?;
488 let mlf_dir = get_mlf_cache_dir(&project_root);
489
490 // Validate NSID format
491 validate_nsid_format(nsid)?;
492
493 println!("Fetching lexicons for pattern: {}", nsid);
494
495 // Create the lexicon fetcher (encapsulates all DNS and HTTP logic)
496 let fetcher = ProductionLexiconFetcher::production()
497 .await
498 .map_err(|e| FetchError::HttpError(format!("Failed to create fetcher: {}", e)))?;
499
500 // Fetch lexicons with metadata
501 let result = fetcher
502 .fetch_with_metadata(nsid)
503 .await
504 .map_err(|e| FetchError::HttpError(format!("Failed to fetch: {}", e)))?;
505
506 if result.lexicons.is_empty() {
507 return Err(FetchError::HttpError(format!(
508 "No lexicons matched pattern: {}",
509 nsid
510 )));
511 }
512
513 println!(" → Found {} lexicon record(s)", result.lexicons.len());
514
515 // Process each fetched lexicon
516 for fetched in &result.lexicons {
517 println!(" Processing: {}", fetched.nsid);
518
519 // Save JSON file
520 let json_str = serde_json::to_string_pretty(&fetched.lexicon)?;
521 let mut json_path = mlf_dir.join("lexicons/json");
522 for segment in fetched.nsid.split('.') {
523 json_path.push(segment);
524 }
525 json_path.set_extension("json");
526
527 if let Some(parent) = json_path.parent() {
528 std::fs::create_dir_all(parent)?;
529 }
530 std::fs::write(&json_path, &json_str)?;
531 println!(" → Saved JSON to {}", json_path.display());
532
533 // Convert to MLF
534 let mlf_content = crate::generate::mlf::generate_mlf_from_json(&fetched.lexicon)
535 .map_err(|e| FetchError::ConversionError(format!("{:?}", e)))?;
536
537 // Save MLF file
538 let mut mlf_path = mlf_dir.join("lexicons/mlf");
539 for segment in fetched.nsid.split('.') {
540 mlf_path.push(segment);
541 }
542 mlf_path.set_extension("mlf");
543
544 if let Some(parent) = mlf_path.parent() {
545 std::fs::create_dir_all(parent)?;
546 }
547 std::fs::write(&mlf_path, mlf_content)?;
548 println!(" → Converted to MLF at {}", mlf_path.display());
549
550 // Calculate hash and extract dependencies for lockfile
551 let hash = calculate_hash(&json_str);
552 let dependencies = extract_dependencies_from_json(&fetched.lexicon);
553
554 // Update lockfile with DID from fetcher metadata
555 lockfile.add_lexicon(fetched.nsid.clone(), fetched.did.clone(), hash, dependencies);
556 }
557
558 println!("✓ Successfully fetched {} lexicon(s) for {}", result.lexicons.len(), nsid);
559 Ok(())
560}
561
562fn validate_nsid_format(nsid: &str) -> Result<(), FetchError> {
563 // Remove wildcard suffix for validation (both .* and ._)
564 let nsid_base = nsid
565 .strip_suffix(".*")
566 .or_else(|| nsid.strip_suffix("._"))
567 .unwrap_or(nsid);
568
569 let parts: Vec<&str> = nsid_base.split('.').collect();
570
571 // NSID must have at least 2 segments (authority)
572 // e.g., "place.stream", "place.stream.key", "place.stream.*", or "place.stream._"
573 if parts.len() < 2 {
574 return Err(FetchError::InvalidNsid(format!(
575 "NSID must have at least 2 segments (e.g., 'place.stream' or 'com.atproto.repo.strongRef'): {}",
576 nsid
577 )));
578 }
579
580 Ok(())
581}
582
583
584/// Calculate SHA-256 hash of content
585fn calculate_hash(content: &str) -> String {
586 let mut hasher = Sha256::new();
587 hasher.update(content.as_bytes());
588 format!("sha256:{:x}", hasher.finalize())
589}
590
591/// Extract external references from a lexicon JSON
592/// Returns a list of NSIDs that this lexicon depends on
593fn extract_dependencies_from_json(json: &serde_json::Value) -> Vec<String> {
594 let mut deps = HashSet::new();
595
596 fn visit_value(value: &serde_json::Value, deps: &mut HashSet<String>) {
597 match value {
598 serde_json::Value::Object(map) => {
599 // Check if this is a ref object
600 if let Some(ref_val) = map.get("ref") {
601 if let Some(ref_str) = ref_val.as_str() {
602 // External refs are multi-segment NSIDs
603 if ref_str.contains('.') {
604 deps.insert(ref_str.to_string());
605 }
606 }
607 }
608
609 // Recurse into all values
610 for val in map.values() {
611 visit_value(val, deps);
612 }
613 }
614 serde_json::Value::Array(arr) => {
615 for val in arr {
616 visit_value(val, deps);
617 }
618 }
619 _ => {}
620 }
621 }
622
623 visit_value(json, &mut deps);
624 let mut result: Vec<String> = deps.into_iter().collect();
625 result.sort();
626 result
627}
628
629/// Extract external references from MLF files that need to be resolved
630/// Returns a set of namespace patterns (not full NSIDs) that need to be fetched
631fn collect_unresolved_references(project_root: &std::path::Path) -> Result<HashSet<String>, FetchError> {
632 use mlf_lang::{parser, workspace::Workspace};
633
634 let mlf_dir = get_mlf_cache_dir(project_root);
635 let mlf_lexicons_dir = mlf_dir.join("lexicons/mlf");
636
637 if !mlf_lexicons_dir.exists() {
638 return Ok(HashSet::new());
639 }
640
641 // Build a workspace with std library to avoid fetching std types
642 let mut workspace = Workspace::with_std()
643 .map_err(|e| FetchError::IoError(std::io::Error::new(
644 std::io::ErrorKind::Other,
645 format!("Failed to load standard library: {:?}", e)
646 )))?;
647 let mut unresolved = HashSet::new();
648
649 // Recursively find all .mlf files
650 fn collect_mlf_files(dir: &std::path::Path, files: &mut Vec<std::path::PathBuf>) -> std::io::Result<()> {
651 if dir.is_dir() {
652 for entry in std::fs::read_dir(dir)? {
653 let entry = entry?;
654 let path = entry.path();
655 if path.is_dir() {
656 collect_mlf_files(&path, files)?;
657 } else if path.extension().and_then(|s| s.to_str()) == Some("mlf") {
658 files.push(path);
659 }
660 }
661 }
662 Ok(())
663 }
664
665 let mut mlf_files = Vec::new();
666 collect_mlf_files(&mlf_lexicons_dir, &mut mlf_files)?;
667
668 // Parse each MLF file and add to workspace
669 for mlf_file in mlf_files {
670 let content = std::fs::read_to_string(&mlf_file)?;
671
672 // Extract namespace from file path
673 // e.g., ".mlf/lexicons/mlf/place/stream/key.mlf" -> "place.stream.key"
674 let relative_path = mlf_file.strip_prefix(&mlf_lexicons_dir)
675 .map_err(|_| FetchError::IoError(std::io::Error::new(
676 std::io::ErrorKind::Other,
677 "Failed to compute relative path"
678 )))?;
679
680 let namespace = relative_path
681 .with_extension("")
682 .to_string_lossy()
683 .replace(std::path::MAIN_SEPARATOR, ".");
684
685 // Parse the lexicon
686 if let Ok(lexicon) = parser::parse_lexicon(&content) {
687 let _ = workspace.add_module(namespace, lexicon);
688 }
689 }
690
691 // Resolve to find undefined references
692 if let Err(errors) = workspace.resolve() {
693 for error in errors.errors {
694 if let mlf_lang::error::ValidationError::UndefinedReference { name, .. } = error {
695 // Only collect multi-segment NSIDs (external references)
696 // Single-segment names are likely local typos
697 if name.contains('.') {
698 // Convert type reference to namespace pattern
699 // e.g., "app.bsky.actor.defs.profileViewBasic" -> "app.bsky.actor.*"
700 // We fetch the whole namespace since we don't know which specific
701 // lexicon file contains the type definition
702 let namespace_pattern = extract_namespace_pattern(&name);
703 unresolved.insert(namespace_pattern);
704 }
705 }
706 }
707 }
708
709 Ok(unresolved)
710}
711
712/// Extract the namespace pattern from a type reference
713/// For "app.bsky.actor.defs.profileViewBasic" returns "app.bsky.actor.*"
714/// This handles the common ATProto pattern where defs are in a separate namespace
715fn extract_namespace_pattern(type_ref: &str) -> String {
716 let parts: Vec<&str> = type_ref.split('.').collect();
717
718 // For references with 3+ segments, use the first 3 segments as the namespace
719 // e.g., "app.bsky.actor.defs.profileViewBasic" -> "app.bsky.actor.*"
720 // e.g., "com.atproto.repo.strongRef" -> "com.atproto.repo.*"
721 if parts.len() >= 3 {
722 format!("{}.{}.{}.*", parts[0], parts[1], parts[2])
723 } else if parts.len() == 2 {
724 // For 2-segment refs like "place.stream", fetch everything under that authority
725 format!("{}.*", type_ref)
726 } else {
727 // Single segment or empty, just return as-is (shouldn't happen)
728 type_ref.to_string()
729 }
730}
731