use crate::config::{find_project_root, get_mlf_cache_dir, init_mlf_cache, ConfigError, MlfConfig, LockFile}; use mlf_lexicon_fetcher::{optimize_fetch_patterns, ProductionLexiconFetcher}; use miette::Diagnostic; use sha2::{Digest, Sha256}; use std::collections::HashSet; use thiserror::Error; #[derive(Error, Debug, Diagnostic)] pub enum FetchError { #[error("Failed to find project root")] #[diagnostic(code(mlf::fetch::no_project_root))] NoProjectRoot(#[from] ConfigError), #[error("Failed to create .mlf directory: {0}")] #[diagnostic(code(mlf::fetch::init_failed))] InitFailed(#[source] std::io::Error), #[error("Failed to fetch lexicon from ATProto repo: {0}")] #[diagnostic(code(mlf::fetch::http_error))] HttpError(String), #[error("Failed to parse lexicon JSON: {0}")] #[diagnostic(code(mlf::fetch::parse_error))] ParseError(#[from] serde_json::Error), #[error("Failed to convert lexicon to MLF: {0}")] #[diagnostic(code(mlf::fetch::conversion_error))] ConversionError(String), #[error("IO error: {0}")] #[diagnostic(code(mlf::fetch::io_error))] IoError(#[from] std::io::Error), #[error("Invalid NSID format: {0}")] #[diagnostic(code(mlf::fetch::invalid_nsid))] InvalidNsid(String), } /// Main entry point for fetch command pub async fn run_fetch(nsid: Option, save: bool, update: bool, locked: bool) -> Result<(), FetchError> { // Validate flags if update && locked { return Err(FetchError::HttpError( "Cannot use --update and --locked together".to_string() )); } // Find project root let current_dir = std::env::current_dir()?; let project_root = ensure_project_root(¤t_dir)?; match nsid { Some(namespace) => { // Fetch single namespace with transitive dependencies let lockfile_path = project_root.join("mlf-lock.toml"); let mut lockfile = LockFile::load(&lockfile_path).unwrap_or_else(|_| LockFile::new()); // Load config to check if transitive deps are enabled let config_path = project_root.join("mlf.toml"); let config = MlfConfig::load(&config_path).map_err(FetchError::NoProjectRoot)?; fetch_lexicon_with_lock(&namespace, &project_root, &mut lockfile).await?; // Handle transitive dependencies if enabled if config.dependencies.allow_transitive_deps { println!("\n→ Checking for transitive dependencies..."); fetch_transitive_dependencies( &project_root, &mut lockfile, config.dependencies.optimize_transitive_fetches ).await?; } // Save lockfile lockfile.save(&lockfile_path).map_err(FetchError::NoProjectRoot)?; println!("\n→ Updated mlf-lock.toml"); // Save to mlf.toml if --save flag is provided if save { save_dependency(&project_root, &namespace)?; } Ok(()) } None => { // Fetch all dependencies from mlf.toml fetch_all_dependencies(&project_root, update, locked).await } } } fn ensure_project_root(current_dir: &std::path::Path) -> Result { match find_project_root(current_dir) { Ok(root) => Ok(root), Err(ConfigError::NotFound) => { // Ask user if they want to create mlf.toml eprintln!("No mlf.toml found in current or parent directories."); eprintln!("Would you like to create one in the current directory? (y/n)"); let mut input = String::new(); std::io::stdin() .read_line(&mut input) .map_err(FetchError::InitFailed)?; if input.trim().to_lowercase() == "y" { let config_path = current_dir.join("mlf.toml"); MlfConfig::create_default(&config_path).map_err(FetchError::NoProjectRoot)?; println!("Created mlf.toml in {}", current_dir.display()); Ok(current_dir.to_path_buf()) } else { Err(FetchError::NoProjectRoot(ConfigError::NotFound)) } } Err(e) => Err(FetchError::NoProjectRoot(e)), } } async fn fetch_all_dependencies(project_root: &std::path::Path, update: bool, locked: bool) -> Result<(), FetchError> { // Load mlf.toml let config_path = project_root.join("mlf.toml"); let config = MlfConfig::load(&config_path).map_err(FetchError::NoProjectRoot)?; if config.dependencies.dependencies.is_empty() { println!("No dependencies found in mlf.toml"); return Ok(()); } let allow_transitive = config.dependencies.allow_transitive_deps; // Load or create lockfile let lockfile_path = project_root.join("mlf-lock.toml"); let existing_lockfile = LockFile::load(&lockfile_path).map_err(FetchError::NoProjectRoot)?; let has_existing_lockfile = lockfile_path.exists() && !existing_lockfile.lexicons.is_empty(); // Handle --locked mode if locked { if !has_existing_lockfile { return Err(FetchError::HttpError( "No lockfile found. Run `mlf fetch` first to create mlf-lock.toml".to_string() )); } // In locked mode, we use the lockfile and verify nothing needs updating // For now, we'll just use the lockfile - verification can be enhanced later println!("Using locked dependencies from mlf-lock.toml"); return fetch_from_lockfile(project_root, &existing_lockfile).await; } // Determine fetch mode let mode = if update { "update (ignoring lockfile)" } else if has_existing_lockfile { "lockfile" } else { "fresh" }; println!("Fetching {} dependencies... (mode: {}, transitive deps: {})", config.dependencies.dependencies.len(), mode, if allow_transitive { "enabled" } else { "disabled" }); // In update mode or if no lockfile, do full fetch // In normal mode with lockfile, use lockfile for cached entries let mut lockfile = if update || !has_existing_lockfile { LockFile::new() } else { existing_lockfile }; let mut errors = Vec::new(); let mut success_count = 0; let mut fetched_nsids = HashSet::new(); // Fetch initial dependencies for dep in &config.dependencies.dependencies { println!("\nFetching: {}", dep); match fetch_lexicon_with_lock(dep, project_root, &mut lockfile).await { Ok(()) => { success_count += 1; fetched_nsids.insert(dep.clone()); } Err(e) => { errors.push((dep.clone(), format!("{}", e))); } } } // If transitive dependencies are enabled, fetch them if allow_transitive { fetch_transitive_dependencies(&project_root, &mut lockfile, config.dependencies.optimize_transitive_fetches).await?; } // Save the lockfile lockfile.save(&lockfile_path).map_err(FetchError::NoProjectRoot)?; println!("\n→ Updated mlf-lock.toml"); if !errors.is_empty() { eprintln!( "\n{} dependency(ies) fetched successfully, {} error(s):", success_count, errors.len() ); for (dep, error) in &errors { eprintln!(" {} - {}", dep, error); } return Err(FetchError::HttpError(format!( "Failed to fetch {} dependencies", errors.len() ))); } println!("\n✓ Successfully fetched all {} dependencies", success_count); Ok(()) } /// Fetch transitive dependencies by iteratively resolving unresolved references async fn fetch_transitive_dependencies( project_root: &std::path::Path, lockfile: &mut LockFile, optimize_fetches: bool ) -> Result<(), FetchError> { let mut fetched_nsids = HashSet::new(); // Track NSIDs from lockfile as already fetched for nsid in lockfile.lexicons.keys() { fetched_nsids.insert(nsid.clone()); } let mut iteration = 0; const MAX_ITERATIONS: usize = 10; loop { iteration += 1; if iteration > MAX_ITERATIONS { eprintln!("\nWarning: Reached maximum iteration limit for transitive dependencies"); break; } // Collect unresolved references let unresolved = match collect_unresolved_references(project_root) { Ok(refs) => refs, Err(e) => { eprintln!("\nWarning: Failed to analyze dependencies: {}", e); break; } }; // Filter out NSIDs we've already fetched or tried to fetch let new_deps: HashSet = unresolved .into_iter() .filter(|nsid| !fetched_nsids.contains(nsid)) .collect(); if new_deps.is_empty() { break; } if optimize_fetches { // Optimize the fetch patterns to reduce number of fetches let optimized_patterns = optimize_fetch_patterns(&new_deps); println!("\n→ Found {} unresolved reference(s), fetching {} optimized pattern(s)...", new_deps.len(), optimized_patterns.len()); // Track which patterns are wildcards and their constituent NSIDs let mut wildcard_failures: Vec<(String, Vec)> = Vec::new(); for pattern in optimized_patterns { let is_wildcard = pattern.ends_with(".*"); println!("\nFetching transitive dependency: {}", pattern); fetched_nsids.insert(pattern.clone()); match fetch_lexicon_with_lock(&pattern, project_root, lockfile).await { Ok(()) => {} Err(e) => { eprintln!(" Warning: Failed to fetch {}: {}", pattern, e); // If this was a wildcard that failed, collect the individual NSIDs for retry if is_wildcard { let pattern_prefix = pattern.strip_suffix(".*").unwrap(); let matching_nsids: Vec = new_deps.iter() .filter(|nsid| nsid.starts_with(pattern_prefix)) .cloned() .collect(); if !matching_nsids.is_empty() { wildcard_failures.push((pattern.clone(), matching_nsids)); } } } } } // Retry failed wildcards with individual NSIDs if !wildcard_failures.is_empty() { println!("\n→ Retrying failed wildcard patterns with individual NSIDs..."); for (failed_pattern, nsids) in wildcard_failures { println!(" Retrying {} NSIDs from failed pattern: {}", nsids.len(), failed_pattern); for nsid in nsids { if !fetched_nsids.contains(&nsid) { println!(" Fetching: {}", nsid); fetched_nsids.insert(nsid.clone()); match fetch_lexicon_with_lock(&nsid, project_root, lockfile).await { Ok(()) => {} Err(e) => { eprintln!(" Warning: Failed to fetch {}: {}", nsid, e); } } } } } } } else { // Fetch individually without optimization (safer, more predictable) println!("\n→ Found {} unresolved reference(s), fetching individually...", new_deps.len()); for nsid in &new_deps { println!("\nFetching transitive dependency: {}", nsid); fetched_nsids.insert(nsid.clone()); match fetch_lexicon_with_lock(nsid, project_root, lockfile).await { Ok(()) => {} Err(e) => { // Don't fail the entire fetch for transitive deps eprintln!(" Warning: Failed to fetch {}: {}", nsid, e); } } } } } Ok(()) } /// Fetch dependencies using the lockfile /// This refetches each lexicon from its recorded DID and verifies the checksum async fn fetch_from_lockfile(project_root: &std::path::Path, lockfile: &LockFile) -> Result<(), FetchError> { if lockfile.lexicons.is_empty() { println!("Lockfile is empty"); return Ok(()); } println!("Fetching {} lexicon(s) from lockfile...", lockfile.lexicons.len()); let mut errors = Vec::new(); let mut success_count = 0; // Fetch each lexicon from its DID for (nsid, locked) in &lockfile.lexicons { println!("\nRefetching: {}", nsid); // Fetch the lexicon using the DID from lockfile match fetch_specific_lexicon(nsid, &locked.did, &locked.checksum, project_root).await { Ok(()) => { success_count += 1; } Err(e) => { errors.push((nsid.clone(), format!("{}", e))); } } } if !errors.is_empty() { eprintln!( "\n{} lexicon(s) fetched successfully, {} error(s):", success_count, errors.len() ); for (nsid, error) in &errors { eprintln!(" {} - {}", nsid, error); } return Err(FetchError::HttpError(format!( "Failed to fetch {} lexicons", errors.len() ))); } println!("\n✓ Successfully fetched all {} lexicons", success_count); Ok(()) } /// Fetch a specific lexicon by NSID from a known DID, verifying checksum async fn fetch_specific_lexicon( nsid: &str, did: &str, expected_checksum: &str, project_root: &std::path::Path, ) -> Result<(), FetchError> { // Initialize .mlf directory init_mlf_cache(project_root).map_err(FetchError::InitFailed)?; let mlf_dir = get_mlf_cache_dir(project_root); // Create fetcher and fetch from known DID (bypassing DNS) let fetcher = ProductionLexiconFetcher::production() .await .map_err(|e| FetchError::HttpError(format!("Failed to create fetcher: {}", e)))?; let result = fetcher .fetch_from_did_with_metadata(did, nsid) .await .map_err(|e| FetchError::HttpError(format!("Failed to fetch from DID: {}", e)))?; if result.lexicons.is_empty() { return Err(FetchError::HttpError(format!( "Lexicon {} not found in repo {}", nsid, did ))); } // We should only get one lexicon for an exact NSID match let fetched = &result.lexicons[0]; if fetched.nsid != nsid { return Err(FetchError::HttpError(format!( "Expected lexicon {}, but got {}", nsid, fetched.nsid ))); } // Verify checksum let json_str = serde_json::to_string_pretty(&fetched.lexicon)?; let hash = calculate_hash(&json_str); if hash != expected_checksum { return Err(FetchError::HttpError(format!( "Checksum mismatch for {}: expected {}, got {}", nsid, expected_checksum, hash ))); } // Save JSON let mut json_path = mlf_dir.join("lexicons/json"); for segment in nsid.split('.') { json_path.push(segment); } json_path.set_extension("json"); if let Some(parent) = json_path.parent() { std::fs::create_dir_all(parent)?; } std::fs::write(&json_path, &json_str)?; println!(" → Saved JSON (checksum verified)"); // Convert to MLF let mlf_content = crate::generate::mlf::generate_mlf_from_json(&fetched.lexicon) .map_err(|e| FetchError::ConversionError(format!("{:?}", e)))?; let mut mlf_path = mlf_dir.join("lexicons/mlf"); for segment in nsid.split('.') { mlf_path.push(segment); } mlf_path.set_extension("mlf"); if let Some(parent) = mlf_path.parent() { std::fs::create_dir_all(parent)?; } std::fs::write(&mlf_path, mlf_content)?; println!(" → Converted to MLF"); Ok(()) } fn save_dependency(project_root: &std::path::Path, nsid: &str) -> Result<(), FetchError> { let config_path = project_root.join("mlf.toml"); let mut config = MlfConfig::load(&config_path).map_err(FetchError::NoProjectRoot)?; if config.dependencies.dependencies.contains(&nsid.to_string()) { println!("Dependency '{}' already in mlf.toml", nsid); return Ok(()); } config.dependencies.dependencies.push(nsid.to_string()); config.save(&config_path).map_err(FetchError::NoProjectRoot)?; println!("Added '{}' to dependencies in mlf.toml", nsid); Ok(()) } async fn fetch_lexicon_with_lock(nsid: &str, project_root: &std::path::Path, lockfile: &mut LockFile) -> Result<(), FetchError> { // Initialize .mlf directory init_mlf_cache(project_root).map_err(FetchError::InitFailed)?; let mlf_dir = get_mlf_cache_dir(&project_root); // Validate NSID format validate_nsid_format(nsid)?; println!("Fetching lexicons for pattern: {}", nsid); // Create the lexicon fetcher (encapsulates all DNS and HTTP logic) let fetcher = ProductionLexiconFetcher::production() .await .map_err(|e| FetchError::HttpError(format!("Failed to create fetcher: {}", e)))?; // Fetch lexicons with metadata let result = fetcher .fetch_with_metadata(nsid) .await .map_err(|e| FetchError::HttpError(format!("Failed to fetch: {}", e)))?; if result.lexicons.is_empty() { return Err(FetchError::HttpError(format!( "No lexicons matched pattern: {}", nsid ))); } println!(" → Found {} lexicon record(s)", result.lexicons.len()); // Process each fetched lexicon for fetched in &result.lexicons { println!(" Processing: {}", fetched.nsid); // Save JSON file let json_str = serde_json::to_string_pretty(&fetched.lexicon)?; let mut json_path = mlf_dir.join("lexicons/json"); for segment in fetched.nsid.split('.') { json_path.push(segment); } json_path.set_extension("json"); if let Some(parent) = json_path.parent() { std::fs::create_dir_all(parent)?; } std::fs::write(&json_path, &json_str)?; println!(" → Saved JSON to {}", json_path.display()); // Convert to MLF let mlf_content = crate::generate::mlf::generate_mlf_from_json(&fetched.lexicon) .map_err(|e| FetchError::ConversionError(format!("{:?}", e)))?; // Save MLF file let mut mlf_path = mlf_dir.join("lexicons/mlf"); for segment in fetched.nsid.split('.') { mlf_path.push(segment); } mlf_path.set_extension("mlf"); if let Some(parent) = mlf_path.parent() { std::fs::create_dir_all(parent)?; } std::fs::write(&mlf_path, mlf_content)?; println!(" → Converted to MLF at {}", mlf_path.display()); // Calculate hash and extract dependencies for lockfile let hash = calculate_hash(&json_str); let dependencies = extract_dependencies_from_json(&fetched.lexicon); // Update lockfile with DID from fetcher metadata lockfile.add_lexicon(fetched.nsid.clone(), fetched.did.clone(), hash, dependencies); } println!("✓ Successfully fetched {} lexicon(s) for {}", result.lexicons.len(), nsid); Ok(()) } fn validate_nsid_format(nsid: &str) -> Result<(), FetchError> { // Remove wildcard suffix for validation (both .* and ._) let nsid_base = nsid .strip_suffix(".*") .or_else(|| nsid.strip_suffix("._")) .unwrap_or(nsid); let parts: Vec<&str> = nsid_base.split('.').collect(); // NSID must have at least 2 segments (authority) // e.g., "place.stream", "place.stream.key", "place.stream.*", or "place.stream._" if parts.len() < 2 { return Err(FetchError::InvalidNsid(format!( "NSID must have at least 2 segments (e.g., 'place.stream' or 'com.atproto.repo.strongRef'): {}", nsid ))); } Ok(()) } /// Calculate SHA-256 hash of content fn calculate_hash(content: &str) -> String { let mut hasher = Sha256::new(); hasher.update(content.as_bytes()); format!("sha256:{:x}", hasher.finalize()) } /// Extract external references from a lexicon JSON /// Returns a list of NSIDs that this lexicon depends on fn extract_dependencies_from_json(json: &serde_json::Value) -> Vec { let mut deps = HashSet::new(); fn visit_value(value: &serde_json::Value, deps: &mut HashSet) { match value { serde_json::Value::Object(map) => { // Check if this is a ref object if let Some(ref_val) = map.get("ref") { if let Some(ref_str) = ref_val.as_str() { // External refs are multi-segment NSIDs if ref_str.contains('.') { deps.insert(ref_str.to_string()); } } } // Recurse into all values for val in map.values() { visit_value(val, deps); } } serde_json::Value::Array(arr) => { for val in arr { visit_value(val, deps); } } _ => {} } } visit_value(json, &mut deps); let mut result: Vec = deps.into_iter().collect(); result.sort(); result } /// Extract external references from MLF files that need to be resolved /// Returns a set of namespace patterns (not full NSIDs) that need to be fetched fn collect_unresolved_references(project_root: &std::path::Path) -> Result, FetchError> { use mlf_lang::{parser, workspace::Workspace}; let mlf_dir = get_mlf_cache_dir(project_root); let mlf_lexicons_dir = mlf_dir.join("lexicons/mlf"); if !mlf_lexicons_dir.exists() { return Ok(HashSet::new()); } // Build a workspace with std library to avoid fetching std types let mut workspace = Workspace::with_std() .map_err(|e| FetchError::IoError(std::io::Error::new( std::io::ErrorKind::Other, format!("Failed to load standard library: {:?}", e) )))?; let mut unresolved = HashSet::new(); // Recursively find all .mlf files fn collect_mlf_files(dir: &std::path::Path, files: &mut Vec) -> std::io::Result<()> { if dir.is_dir() { for entry in std::fs::read_dir(dir)? { let entry = entry?; let path = entry.path(); if path.is_dir() { collect_mlf_files(&path, files)?; } else if path.extension().and_then(|s| s.to_str()) == Some("mlf") { files.push(path); } } } Ok(()) } let mut mlf_files = Vec::new(); collect_mlf_files(&mlf_lexicons_dir, &mut mlf_files)?; // Parse each MLF file and add to workspace for mlf_file in mlf_files { let content = std::fs::read_to_string(&mlf_file)?; // Extract namespace from file path // e.g., ".mlf/lexicons/mlf/place/stream/key.mlf" -> "place.stream.key" let relative_path = mlf_file.strip_prefix(&mlf_lexicons_dir) .map_err(|_| FetchError::IoError(std::io::Error::new( std::io::ErrorKind::Other, "Failed to compute relative path" )))?; let namespace = relative_path .with_extension("") .to_string_lossy() .replace(std::path::MAIN_SEPARATOR, "."); // Parse the lexicon if let Ok(lexicon) = parser::parse_lexicon(&content) { let _ = workspace.add_module(namespace, lexicon); } } // Resolve to find undefined references if let Err(errors) = workspace.resolve() { for error in errors.errors { if let mlf_lang::error::ValidationError::UndefinedReference { name, .. } = error { // Only collect multi-segment NSIDs (external references) // Single-segment names are likely local typos if name.contains('.') { // Convert type reference to namespace pattern // e.g., "app.bsky.actor.defs.profileViewBasic" -> "app.bsky.actor.*" // We fetch the whole namespace since we don't know which specific // lexicon file contains the type definition let namespace_pattern = extract_namespace_pattern(&name); unresolved.insert(namespace_pattern); } } } } Ok(unresolved) } /// Extract the namespace pattern from a type reference /// For "app.bsky.actor.defs.profileViewBasic" returns "app.bsky.actor.*" /// This handles the common ATProto pattern where defs are in a separate namespace fn extract_namespace_pattern(type_ref: &str) -> String { let parts: Vec<&str> = type_ref.split('.').collect(); // For references with 3+ segments, use the first 3 segments as the namespace // e.g., "app.bsky.actor.defs.profileViewBasic" -> "app.bsky.actor.*" // e.g., "com.atproto.repo.strongRef" -> "com.atproto.repo.*" if parts.len() >= 3 { format!("{}.{}.{}.*", parts[0], parts[1], parts[2]) } else if parts.len() == 2 { // For 2-segment refs like "place.stream", fetch everything under that authority format!("{}.*", type_ref) } else { // Single segment or empty, just return as-is (shouldn't happen) type_ref.to_string() } }