mlf-cli/src/fetch.rs at main

vmx.cx / mlf
forked from stavola.xyz/mlf
fork
A human-friendly DSL for ATProto Lexicons
fork
mlf / mlf-cli / src / fetch.rs
at main 731 lines 26 kB view raw
wrap content
Matt Stavola Factor out mlf-lexicon-fetcher 6mo ago
bcdc2ce5
  1use crate::config::{find_project_root, get_mlf_cache_dir, init_mlf_cache, ConfigError, MlfConfig, LockFile};
  2use mlf_lexicon_fetcher::{optimize_fetch_patterns, ProductionLexiconFetcher};
  3use miette::Diagnostic;
  4use sha2::{Digest, Sha256};
  5use std::collections::HashSet;
  6use thiserror::Error;
  7
  8#[derive(Error, Debug, Diagnostic)]
  9pub enum FetchError {
 10    #[error("Failed to find project root")]
 11    #[diagnostic(code(mlf::fetch::no_project_root))]
 12    NoProjectRoot(#[from] ConfigError),
 13
 14    #[error("Failed to create .mlf directory: {0}")]
 15    #[diagnostic(code(mlf::fetch::init_failed))]
 16    InitFailed(#[source] std::io::Error),
 17
 18    #[error("Failed to fetch lexicon from ATProto repo: {0}")]
 19    #[diagnostic(code(mlf::fetch::http_error))]
 20    HttpError(String),
 21
 22    #[error("Failed to parse lexicon JSON: {0}")]
 23    #[diagnostic(code(mlf::fetch::parse_error))]
 24    ParseError(#[from] serde_json::Error),
 25
 26    #[error("Failed to convert lexicon to MLF: {0}")]
 27    #[diagnostic(code(mlf::fetch::conversion_error))]
 28    ConversionError(String),
 29
 30    #[error("IO error: {0}")]
 31    #[diagnostic(code(mlf::fetch::io_error))]
 32    IoError(#[from] std::io::Error),
 33
 34    #[error("Invalid NSID format: {0}")]
 35    #[diagnostic(code(mlf::fetch::invalid_nsid))]
 36    InvalidNsid(String),
 37}
 38
 39
 40
 41/// Main entry point for fetch command
 42pub async fn run_fetch(nsid: Option<String>, save: bool, update: bool, locked: bool) -> Result<(), FetchError> {
 43    // Validate flags
 44    if update && locked {
 45        return Err(FetchError::HttpError(
 46            "Cannot use --update and --locked together".to_string()
 47        ));
 48    }
 49
 50    // Find project root
 51    let current_dir = std::env::current_dir()?;
 52    let project_root = ensure_project_root(&current_dir)?;
 53
 54    match nsid {
 55        Some(namespace) => {
 56            // Fetch single namespace with transitive dependencies
 57            let lockfile_path = project_root.join("mlf-lock.toml");
 58            let mut lockfile = LockFile::load(&lockfile_path).unwrap_or_else(|_| LockFile::new());
 59
 60            // Load config to check if transitive deps are enabled
 61            let config_path = project_root.join("mlf.toml");
 62            let config = MlfConfig::load(&config_path).map_err(FetchError::NoProjectRoot)?;
 63
 64            fetch_lexicon_with_lock(&namespace, &project_root, &mut lockfile).await?;
 65
 66            // Handle transitive dependencies if enabled
 67            if config.dependencies.allow_transitive_deps {
 68                println!("\n→ Checking for transitive dependencies...");
 69                fetch_transitive_dependencies(
 70                    &project_root,
 71                    &mut lockfile,
 72                    config.dependencies.optimize_transitive_fetches
 73                ).await?;
 74            }
 75
 76            // Save lockfile
 77            lockfile.save(&lockfile_path).map_err(FetchError::NoProjectRoot)?;
 78            println!("\n→ Updated mlf-lock.toml");
 79
 80            // Save to mlf.toml if --save flag is provided
 81            if save {
 82                save_dependency(&project_root, &namespace)?;
 83            }
 84
 85            Ok(())
 86        }
 87        None => {
 88            // Fetch all dependencies from mlf.toml
 89            fetch_all_dependencies(&project_root, update, locked).await
 90        }
 91    }
 92}
 93
 94fn ensure_project_root(current_dir: &std::path::Path) -> Result<std::path::PathBuf, FetchError> {
 95    match find_project_root(current_dir) {
 96        Ok(root) => Ok(root),
 97        Err(ConfigError::NotFound) => {
 98            // Ask user if they want to create mlf.toml
 99            eprintln!("No mlf.toml found in current or parent directories.");
100            eprintln!("Would you like to create one in the current directory? (y/n)");
101
102            let mut input = String::new();
103            std::io::stdin()
104                .read_line(&mut input)
105                .map_err(FetchError::InitFailed)?;
106
107            if input.trim().to_lowercase() == "y" {
108                let config_path = current_dir.join("mlf.toml");
109                MlfConfig::create_default(&config_path).map_err(FetchError::NoProjectRoot)?;
110                println!("Created mlf.toml in {}", current_dir.display());
111                Ok(current_dir.to_path_buf())
112            } else {
113                Err(FetchError::NoProjectRoot(ConfigError::NotFound))
114            }
115        }
116        Err(e) => Err(FetchError::NoProjectRoot(e)),
117    }
118}
119
120async fn fetch_all_dependencies(project_root: &std::path::Path, update: bool, locked: bool) -> Result<(), FetchError> {
121    // Load mlf.toml
122    let config_path = project_root.join("mlf.toml");
123    let config = MlfConfig::load(&config_path).map_err(FetchError::NoProjectRoot)?;
124
125    if config.dependencies.dependencies.is_empty() {
126        println!("No dependencies found in mlf.toml");
127        return Ok(());
128    }
129
130    let allow_transitive = config.dependencies.allow_transitive_deps;
131
132    // Load or create lockfile
133    let lockfile_path = project_root.join("mlf-lock.toml");
134    let existing_lockfile = LockFile::load(&lockfile_path).map_err(FetchError::NoProjectRoot)?;
135    let has_existing_lockfile = lockfile_path.exists() && !existing_lockfile.lexicons.is_empty();
136
137    // Handle --locked mode
138    if locked {
139        if !has_existing_lockfile {
140            return Err(FetchError::HttpError(
141                "No lockfile found. Run `mlf fetch` first to create mlf-lock.toml".to_string()
142            ));
143        }
144
145        // In locked mode, we use the lockfile and verify nothing needs updating
146        // For now, we'll just use the lockfile - verification can be enhanced later
147        println!("Using locked dependencies from mlf-lock.toml");
148        return fetch_from_lockfile(project_root, &existing_lockfile).await;
149    }
150
151    // Determine fetch mode
152    let mode = if update {
153        "update (ignoring lockfile)"
154    } else if has_existing_lockfile {
155        "lockfile"
156    } else {
157        "fresh"
158    };
159
160    println!("Fetching {} dependencies... (mode: {}, transitive deps: {})",
161             config.dependencies.dependencies.len(),
162             mode,
163             if allow_transitive { "enabled" } else { "disabled" });
164
165    // In update mode or if no lockfile, do full fetch
166    // In normal mode with lockfile, use lockfile for cached entries
167    let mut lockfile = if update || !has_existing_lockfile {
168        LockFile::new()
169    } else {
170        existing_lockfile
171    };
172
173    let mut errors = Vec::new();
174    let mut success_count = 0;
175    let mut fetched_nsids = HashSet::new();
176
177    // Fetch initial dependencies
178    for dep in &config.dependencies.dependencies {
179        println!("\nFetching: {}", dep);
180        match fetch_lexicon_with_lock(dep, project_root, &mut lockfile).await {
181            Ok(()) => {
182                success_count += 1;
183                fetched_nsids.insert(dep.clone());
184            }
185            Err(e) => {
186                errors.push((dep.clone(), format!("{}", e)));
187            }
188        }
189    }
190
191    // If transitive dependencies are enabled, fetch them
192    if allow_transitive {
193        fetch_transitive_dependencies(&project_root, &mut lockfile, config.dependencies.optimize_transitive_fetches).await?;
194    }
195
196    // Save the lockfile
197    lockfile.save(&lockfile_path).map_err(FetchError::NoProjectRoot)?;
198    println!("\n→ Updated mlf-lock.toml");
199
200    if !errors.is_empty() {
201        eprintln!(
202            "\n{} dependency(ies) fetched successfully, {} error(s):",
203            success_count,
204            errors.len()
205        );
206        for (dep, error) in &errors {
207            eprintln!("  {} - {}", dep, error);
208        }
209        return Err(FetchError::HttpError(format!(
210            "Failed to fetch {} dependencies",
211            errors.len()
212        )));
213    }
214
215    println!("\n✓ Successfully fetched all {} dependencies", success_count);
216    Ok(())
217}
218
219/// Fetch transitive dependencies by iteratively resolving unresolved references
220async fn fetch_transitive_dependencies(
221    project_root: &std::path::Path,
222    lockfile: &mut LockFile,
223    optimize_fetches: bool
224) -> Result<(), FetchError> {
225    let mut fetched_nsids = HashSet::new();
226    // Track NSIDs from lockfile as already fetched
227    for nsid in lockfile.lexicons.keys() {
228        fetched_nsids.insert(nsid.clone());
229    }
230
231    let mut iteration = 0;
232    const MAX_ITERATIONS: usize = 10;
233
234    loop {
235        iteration += 1;
236        if iteration > MAX_ITERATIONS {
237            eprintln!("\nWarning: Reached maximum iteration limit for transitive dependencies");
238            break;
239        }
240
241        // Collect unresolved references
242        let unresolved = match collect_unresolved_references(project_root) {
243            Ok(refs) => refs,
244            Err(e) => {
245                eprintln!("\nWarning: Failed to analyze dependencies: {}", e);
246                break;
247            }
248        };
249
250        // Filter out NSIDs we've already fetched or tried to fetch
251        let new_deps: HashSet<String> = unresolved
252            .into_iter()
253            .filter(|nsid| !fetched_nsids.contains(nsid))
254            .collect();
255
256        if new_deps.is_empty() {
257            break;
258        }
259
260        if optimize_fetches {
261            // Optimize the fetch patterns to reduce number of fetches
262            let optimized_patterns = optimize_fetch_patterns(&new_deps);
263
264            println!("\n→ Found {} unresolved reference(s), fetching {} optimized pattern(s)...",
265                     new_deps.len(), optimized_patterns.len());
266
267            // Track which patterns are wildcards and their constituent NSIDs
268            let mut wildcard_failures: Vec<(String, Vec<String>)> = Vec::new();
269
270            for pattern in optimized_patterns {
271                let is_wildcard = pattern.ends_with(".*");
272                println!("\nFetching transitive dependency: {}", pattern);
273                fetched_nsids.insert(pattern.clone());
274
275                match fetch_lexicon_with_lock(&pattern, project_root, lockfile).await {
276                    Ok(()) => {}
277                    Err(e) => {
278                        eprintln!("  Warning: Failed to fetch {}: {}", pattern, e);
279
280                        // If this was a wildcard that failed, collect the individual NSIDs for retry
281                        if is_wildcard {
282                            let pattern_prefix = pattern.strip_suffix(".*").unwrap();
283                            let matching_nsids: Vec<String> = new_deps.iter()
284                                .filter(|nsid| nsid.starts_with(pattern_prefix))
285                                .cloned()
286                                .collect();
287
288                            if !matching_nsids.is_empty() {
289                                wildcard_failures.push((pattern.clone(), matching_nsids));
290                            }
291                        }
292                    }
293                }
294            }
295
296            // Retry failed wildcards with individual NSIDs
297            if !wildcard_failures.is_empty() {
298                println!("\n→ Retrying failed wildcard patterns with individual NSIDs...");
299
300                for (failed_pattern, nsids) in wildcard_failures {
301                    println!("  Retrying {} NSIDs from failed pattern: {}", nsids.len(), failed_pattern);
302
303                    for nsid in nsids {
304                        if !fetched_nsids.contains(&nsid) {
305                            println!("    Fetching: {}", nsid);
306                            fetched_nsids.insert(nsid.clone());
307
308                            match fetch_lexicon_with_lock(&nsid, project_root, lockfile).await {
309                                Ok(()) => {}
310                                Err(e) => {
311                                    eprintln!("      Warning: Failed to fetch {}: {}", nsid, e);
312                                }
313                            }
314                        }
315                    }
316                }
317            }
318        } else {
319            // Fetch individually without optimization (safer, more predictable)
320            println!("\n→ Found {} unresolved reference(s), fetching individually...",
321                     new_deps.len());
322
323            for nsid in &new_deps {
324                println!("\nFetching transitive dependency: {}", nsid);
325                fetched_nsids.insert(nsid.clone());
326
327                match fetch_lexicon_with_lock(nsid, project_root, lockfile).await {
328                    Ok(()) => {}
329                    Err(e) => {
330                        // Don't fail the entire fetch for transitive deps
331                        eprintln!("  Warning: Failed to fetch {}: {}", nsid, e);
332                    }
333                }
334            }
335        }
336    }
337
338    Ok(())
339}
340
341/// Fetch dependencies using the lockfile
342/// This refetches each lexicon from its recorded DID and verifies the checksum
343async fn fetch_from_lockfile(project_root: &std::path::Path, lockfile: &LockFile) -> Result<(), FetchError> {
344    if lockfile.lexicons.is_empty() {
345        println!("Lockfile is empty");
346        return Ok(());
347    }
348
349    println!("Fetching {} lexicon(s) from lockfile...", lockfile.lexicons.len());
350
351    let mut errors = Vec::new();
352    let mut success_count = 0;
353
354    // Fetch each lexicon from its DID
355    for (nsid, locked) in &lockfile.lexicons {
356        println!("\nRefetching: {}", nsid);
357
358        // Fetch the lexicon using the DID from lockfile
359        match fetch_specific_lexicon(nsid, &locked.did, &locked.checksum, project_root).await {
360            Ok(()) => {
361                success_count += 1;
362            }
363            Err(e) => {
364                errors.push((nsid.clone(), format!("{}", e)));
365            }
366        }
367    }
368
369    if !errors.is_empty() {
370        eprintln!(
371            "\n{} lexicon(s) fetched successfully, {} error(s):",
372            success_count,
373            errors.len()
374        );
375        for (nsid, error) in &errors {
376            eprintln!("  {} - {}", nsid, error);
377        }
378        return Err(FetchError::HttpError(format!(
379            "Failed to fetch {} lexicons",
380            errors.len()
381        )));
382    }
383
384    println!("\n✓ Successfully fetched all {} lexicons", success_count);
385    Ok(())
386}
387
388/// Fetch a specific lexicon by NSID from a known DID, verifying checksum
389async fn fetch_specific_lexicon(
390    nsid: &str,
391    did: &str,
392    expected_checksum: &str,
393    project_root: &std::path::Path,
394) -> Result<(), FetchError> {
395    // Initialize .mlf directory
396    init_mlf_cache(project_root).map_err(FetchError::InitFailed)?;
397    let mlf_dir = get_mlf_cache_dir(project_root);
398
399    // Create fetcher and fetch from known DID (bypassing DNS)
400    let fetcher = ProductionLexiconFetcher::production()
401        .await
402        .map_err(|e| FetchError::HttpError(format!("Failed to create fetcher: {}", e)))?;
403
404    let result = fetcher
405        .fetch_from_did_with_metadata(did, nsid)
406        .await
407        .map_err(|e| FetchError::HttpError(format!("Failed to fetch from DID: {}", e)))?;
408
409    if result.lexicons.is_empty() {
410        return Err(FetchError::HttpError(format!(
411            "Lexicon {} not found in repo {}",
412            nsid, did
413        )));
414    }
415
416    // We should only get one lexicon for an exact NSID match
417    let fetched = &result.lexicons[0];
418
419    if fetched.nsid != nsid {
420        return Err(FetchError::HttpError(format!(
421            "Expected lexicon {}, but got {}",
422            nsid, fetched.nsid
423        )));
424    }
425
426    // Verify checksum
427    let json_str = serde_json::to_string_pretty(&fetched.lexicon)?;
428    let hash = calculate_hash(&json_str);
429
430    if hash != expected_checksum {
431        return Err(FetchError::HttpError(format!(
432            "Checksum mismatch for {}: expected {}, got {}",
433            nsid, expected_checksum, hash
434        )));
435    }
436
437    // Save JSON
438    let mut json_path = mlf_dir.join("lexicons/json");
439    for segment in nsid.split('.') {
440        json_path.push(segment);
441    }
442    json_path.set_extension("json");
443
444    if let Some(parent) = json_path.parent() {
445        std::fs::create_dir_all(parent)?;
446    }
447    std::fs::write(&json_path, &json_str)?;
448    println!("  → Saved JSON (checksum verified)");
449
450    // Convert to MLF
451    let mlf_content = crate::generate::mlf::generate_mlf_from_json(&fetched.lexicon)
452        .map_err(|e| FetchError::ConversionError(format!("{:?}", e)))?;
453
454    let mut mlf_path = mlf_dir.join("lexicons/mlf");
455    for segment in nsid.split('.') {
456        mlf_path.push(segment);
457    }
458    mlf_path.set_extension("mlf");
459
460    if let Some(parent) = mlf_path.parent() {
461        std::fs::create_dir_all(parent)?;
462    }
463    std::fs::write(&mlf_path, mlf_content)?;
464    println!("  → Converted to MLF");
465
466    Ok(())
467}
468
469fn save_dependency(project_root: &std::path::Path, nsid: &str) -> Result<(), FetchError> {
470    let config_path = project_root.join("mlf.toml");
471    let mut config = MlfConfig::load(&config_path).map_err(FetchError::NoProjectRoot)?;
472
473    if config.dependencies.dependencies.contains(&nsid.to_string()) {
474        println!("Dependency '{}' already in mlf.toml", nsid);
475        return Ok(());
476    }
477
478    config.dependencies.dependencies.push(nsid.to_string());
479    config.save(&config_path).map_err(FetchError::NoProjectRoot)?;
480
481    println!("Added '{}' to dependencies in mlf.toml", nsid);
482    Ok(())
483}
484
485async fn fetch_lexicon_with_lock(nsid: &str, project_root: &std::path::Path, lockfile: &mut LockFile) -> Result<(), FetchError> {
486    // Initialize .mlf directory
487    init_mlf_cache(project_root).map_err(FetchError::InitFailed)?;
488    let mlf_dir = get_mlf_cache_dir(&project_root);
489
490    // Validate NSID format
491    validate_nsid_format(nsid)?;
492
493    println!("Fetching lexicons for pattern: {}", nsid);
494
495    // Create the lexicon fetcher (encapsulates all DNS and HTTP logic)
496    let fetcher = ProductionLexiconFetcher::production()
497        .await
498        .map_err(|e| FetchError::HttpError(format!("Failed to create fetcher: {}", e)))?;
499
500    // Fetch lexicons with metadata
501    let result = fetcher
502        .fetch_with_metadata(nsid)
503        .await
504        .map_err(|e| FetchError::HttpError(format!("Failed to fetch: {}", e)))?;
505
506    if result.lexicons.is_empty() {
507        return Err(FetchError::HttpError(format!(
508            "No lexicons matched pattern: {}",
509            nsid
510        )));
511    }
512
513    println!("  → Found {} lexicon record(s)", result.lexicons.len());
514
515    // Process each fetched lexicon
516    for fetched in &result.lexicons {
517        println!("  Processing: {}", fetched.nsid);
518
519        // Save JSON file
520        let json_str = serde_json::to_string_pretty(&fetched.lexicon)?;
521        let mut json_path = mlf_dir.join("lexicons/json");
522        for segment in fetched.nsid.split('.') {
523            json_path.push(segment);
524        }
525        json_path.set_extension("json");
526
527        if let Some(parent) = json_path.parent() {
528            std::fs::create_dir_all(parent)?;
529        }
530        std::fs::write(&json_path, &json_str)?;
531        println!("    → Saved JSON to {}", json_path.display());
532
533        // Convert to MLF
534        let mlf_content = crate::generate::mlf::generate_mlf_from_json(&fetched.lexicon)
535            .map_err(|e| FetchError::ConversionError(format!("{:?}", e)))?;
536
537        // Save MLF file
538        let mut mlf_path = mlf_dir.join("lexicons/mlf");
539        for segment in fetched.nsid.split('.') {
540            mlf_path.push(segment);
541        }
542        mlf_path.set_extension("mlf");
543
544        if let Some(parent) = mlf_path.parent() {
545            std::fs::create_dir_all(parent)?;
546        }
547        std::fs::write(&mlf_path, mlf_content)?;
548        println!("    → Converted to MLF at {}", mlf_path.display());
549
550        // Calculate hash and extract dependencies for lockfile
551        let hash = calculate_hash(&json_str);
552        let dependencies = extract_dependencies_from_json(&fetched.lexicon);
553
554        // Update lockfile with DID from fetcher metadata
555        lockfile.add_lexicon(fetched.nsid.clone(), fetched.did.clone(), hash, dependencies);
556    }
557
558    println!("✓ Successfully fetched {} lexicon(s) for {}", result.lexicons.len(), nsid);
559    Ok(())
560}
561
562fn validate_nsid_format(nsid: &str) -> Result<(), FetchError> {
563    // Remove wildcard suffix for validation (both .* and ._)
564    let nsid_base = nsid
565        .strip_suffix(".*")
566        .or_else(|| nsid.strip_suffix("._"))
567        .unwrap_or(nsid);
568
569    let parts: Vec<&str> = nsid_base.split('.').collect();
570
571    // NSID must have at least 2 segments (authority)
572    // e.g., "place.stream", "place.stream.key", "place.stream.*", or "place.stream._"
573    if parts.len() < 2 {
574        return Err(FetchError::InvalidNsid(format!(
575            "NSID must have at least 2 segments (e.g., 'place.stream' or 'com.atproto.repo.strongRef'): {}",
576            nsid
577        )));
578    }
579
580    Ok(())
581}
582
583
584/// Calculate SHA-256 hash of content
585fn calculate_hash(content: &str) -> String {
586    let mut hasher = Sha256::new();
587    hasher.update(content.as_bytes());
588    format!("sha256:{:x}", hasher.finalize())
589}
590
591/// Extract external references from a lexicon JSON
592/// Returns a list of NSIDs that this lexicon depends on
593fn extract_dependencies_from_json(json: &serde_json::Value) -> Vec<String> {
594    let mut deps = HashSet::new();
595
596    fn visit_value(value: &serde_json::Value, deps: &mut HashSet<String>) {
597        match value {
598            serde_json::Value::Object(map) => {
599                // Check if this is a ref object
600                if let Some(ref_val) = map.get("ref") {
601                    if let Some(ref_str) = ref_val.as_str() {
602                        // External refs are multi-segment NSIDs
603                        if ref_str.contains('.') {
604                            deps.insert(ref_str.to_string());
605                        }
606                    }
607                }
608
609                // Recurse into all values
610                for val in map.values() {
611                    visit_value(val, deps);
612                }
613            }
614            serde_json::Value::Array(arr) => {
615                for val in arr {
616                    visit_value(val, deps);
617                }
618            }
619            _ => {}
620        }
621    }
622
623    visit_value(json, &mut deps);
624    let mut result: Vec<String> = deps.into_iter().collect();
625    result.sort();
626    result
627}
628
629/// Extract external references from MLF files that need to be resolved
630/// Returns a set of namespace patterns (not full NSIDs) that need to be fetched
631fn collect_unresolved_references(project_root: &std::path::Path) -> Result<HashSet<String>, FetchError> {
632    use mlf_lang::{parser, workspace::Workspace};
633
634    let mlf_dir = get_mlf_cache_dir(project_root);
635    let mlf_lexicons_dir = mlf_dir.join("lexicons/mlf");
636
637    if !mlf_lexicons_dir.exists() {
638        return Ok(HashSet::new());
639    }
640
641    // Build a workspace with std library to avoid fetching std types
642    let mut workspace = Workspace::with_std()
643        .map_err(|e| FetchError::IoError(std::io::Error::new(
644            std::io::ErrorKind::Other,
645            format!("Failed to load standard library: {:?}", e)
646        )))?;
647    let mut unresolved = HashSet::new();
648
649    // Recursively find all .mlf files
650    fn collect_mlf_files(dir: &std::path::Path, files: &mut Vec<std::path::PathBuf>) -> std::io::Result<()> {
651        if dir.is_dir() {
652            for entry in std::fs::read_dir(dir)? {
653                let entry = entry?;
654                let path = entry.path();
655                if path.is_dir() {
656                    collect_mlf_files(&path, files)?;
657                } else if path.extension().and_then(|s| s.to_str()) == Some("mlf") {
658                    files.push(path);
659                }
660            }
661        }
662        Ok(())
663    }
664
665    let mut mlf_files = Vec::new();
666    collect_mlf_files(&mlf_lexicons_dir, &mut mlf_files)?;
667
668    // Parse each MLF file and add to workspace
669    for mlf_file in mlf_files {
670        let content = std::fs::read_to_string(&mlf_file)?;
671
672        // Extract namespace from file path
673        // e.g., ".mlf/lexicons/mlf/place/stream/key.mlf" -> "place.stream.key"
674        let relative_path = mlf_file.strip_prefix(&mlf_lexicons_dir)
675            .map_err(|_| FetchError::IoError(std::io::Error::new(
676                std::io::ErrorKind::Other,
677                "Failed to compute relative path"
678            )))?;
679
680        let namespace = relative_path
681            .with_extension("")
682            .to_string_lossy()
683            .replace(std::path::MAIN_SEPARATOR, ".");
684
685        // Parse the lexicon
686        if let Ok(lexicon) = parser::parse_lexicon(&content) {
687            let _ = workspace.add_module(namespace, lexicon);
688        }
689    }
690
691    // Resolve to find undefined references
692    if let Err(errors) = workspace.resolve() {
693        for error in errors.errors {
694            if let mlf_lang::error::ValidationError::UndefinedReference { name, .. } = error {
695                // Only collect multi-segment NSIDs (external references)
696                // Single-segment names are likely local typos
697                if name.contains('.') {
698                    // Convert type reference to namespace pattern
699                    // e.g., "app.bsky.actor.defs.profileViewBasic" -> "app.bsky.actor.*"
700                    // We fetch the whole namespace since we don't know which specific
701                    // lexicon file contains the type definition
702                    let namespace_pattern = extract_namespace_pattern(&name);
703                    unresolved.insert(namespace_pattern);
704                }
705            }
706        }
707    }
708
709    Ok(unresolved)
710}
711
712/// Extract the namespace pattern from a type reference
713/// For "app.bsky.actor.defs.profileViewBasic" returns "app.bsky.actor.*"
714/// This handles the common ATProto pattern where defs are in a separate namespace
715fn extract_namespace_pattern(type_ref: &str) -> String {
716    let parts: Vec<&str> = type_ref.split('.').collect();
717
718    // For references with 3+ segments, use the first 3 segments as the namespace
719    // e.g., "app.bsky.actor.defs.profileViewBasic" -> "app.bsky.actor.*"
720    // e.g., "com.atproto.repo.strongRef" -> "com.atproto.repo.*"
721    if parts.len() >= 3 {
722        format!("{}.{}.{}.*", parts[0], parts[1], parts[2])
723    } else if parts.len() == 2 {
724        // For 2-segment refs like "place.stream", fetch everything under that authority
725        format!("{}.*", type_ref)
726    } else {
727        // Single segment or empty, just return as-is (shouldn't happen)
728        type_ref.to_string()
729    }
730}
731
Configure Feed

Configure Feed