this repo has no description
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at main 407 lines 13 kB view raw
1use color_eyre::eyre::Result; 2use sha2::{Digest, Sha256}; 3use std::path::Path; 4 5#[derive(Debug)] 6pub struct NixpkgsRelease { 7 pub url: String, 8 pub hash: String, 9} 10 11#[derive(Debug, Clone)] 12struct Package { 13 name: String, 14 pname: Option<String>, 15 version: Option<String>, 16 display_name: Option<String>, 17 system: Option<String>, 18 output_name: Option<String>, 19 available: i32, 20 broken: i32, 21 description: Option<String>, 22 homepage: Option<String>, 23 insecure: i32, 24 unfree: i32, 25 unsupported: i32, 26 position: Option<String>, 27 long_description: Option<String>, 28 main_program: Option<String>, 29 license_spdx_id: Option<String>, 30 license_full_name: Option<String>, 31 license_free: i32, 32 license_url: Option<String>, 33} 34 35#[derive(Debug, Clone)] 36struct Maintainer { 37 package_name: String, 38 name: Option<String>, 39 email: Option<String>, 40 github: Option<String>, 41 github_id: Option<i64>, 42 matrix: Option<String>, 43} 44 45pub async fn get_latest_nixpkgs_release() -> Result<NixpkgsRelease> { 46 let base_url = crate::config::get().nixpkgs_channel.clone(); 47 48 let response = reqwest::get(&base_url).await?; 49 let html = response.text().await?; 50 51 let url_regex = 52 regex::Regex::new(r"<a href='([^']+/packages\.json\.br)'>packages\.json\.br</a>")?; 53 let hash_regex = regex::Regex::new( 54 r"packages\.json\.br</a></td><td align='right'>\d+</td><td><tt>([a-f0-9]{64})</tt>", 55 )?; 56 57 let url = url_regex 58 .captures(&html) 59 .and_then(|cap| cap.get(1)) 60 .map(|m| { 61 let path = m.as_str(); 62 if path.starts_with("http") { 63 path.to_string() 64 } else if path.starts_with('/') { 65 format!("https://releases.nixos.org{path}") 66 } else { 67 format!("https://releases.nixos.org/{path}") 68 } 69 }) 70 .ok_or_else(|| color_eyre::eyre::eyre!("Could not find packages.json.br URL"))?; 71 72 let hash = hash_regex 73 .captures(&html) 74 .and_then(|cap| cap.get(1)) 75 .map(|m| m.as_str().to_string()) 76 .ok_or_else(|| color_eyre::eyre::eyre!("Could not find packages.json.br hash"))?; 77 78 Ok(NixpkgsRelease { url, hash }) 79} 80 81fn get_stored_hash() -> Option<String> { 82 let hash_path = crate::utils::get_data_dir().join("nixpkgs.hash"); 83 std::fs::read_to_string(hash_path).ok() 84} 85 86fn store_hash(hash: &str) -> Result<()> { 87 let hash_path = crate::utils::get_data_dir().join("nixpkgs.hash"); 88 std::fs::write(hash_path, hash)?; 89 Ok(()) 90} 91 92#[allow(clippy::too_many_lines, clippy::items_after_statements)] 93pub async fn ensure_nixpkgs_database() -> Result<()> { 94 let db_path = crate::utils::get_data_dir().join("packages.db"); 95 96 println!("Checking for nixpkgs updates..."); 97 let release = get_latest_nixpkgs_release().await?; 98 let stored_hash = get_stored_hash(); 99 100 if Path::new(&db_path).exists() && stored_hash.as_deref() == Some(&release.hash) { 101 println!("nixpkgs database is up to date"); 102 return Ok(()); 103 } 104 105 if Path::new(&db_path).exists() { 106 println!("New nixpkgs release detected, updating database..."); 107 std::fs::remove_file(&db_path)?; 108 } else { 109 println!("nixpkgs database not found, building..."); 110 } 111 112 println!("Downloading from {}...", release.url); 113 let response = reqwest::get(&release.url).await?; 114 let compressed = response.bytes().await?; 115 116 let mut hasher = Sha256::new(); 117 hasher.update(&compressed); 118 let computed_hash = format!("{:x}", hasher.finalize()); 119 120 if computed_hash != release.hash { 121 return Err(color_eyre::eyre::eyre!( 122 "Hash mismatch! Expected {}, got {}", 123 release.hash, 124 computed_hash 125 )); 126 } 127 128 println!("Hash verified, decompressing..."); 129 let mut decompressed = Vec::new(); 130 let mut decoder = brotli::Decompressor::new(compressed.as_ref(), 4096); 131 std::io::copy(&mut decoder, &mut decompressed)?; 132 133 println!("Parsing JSON..."); 134 let json_data: serde_json::Value = serde_json::from_slice(&decompressed)?; 135 136 let packages = json_data["packages"] 137 .as_object() 138 .ok_or_else(|| color_eyre::eyre::eyre!("Invalid packages.json format"))?; 139 140 println!("Creating database with {} packages...", packages.len()); 141 142 const BATCH_SIZE: usize = 5000; // Increased from 1000 for better performance 143 144 let mut conn = rusqlite::Connection::open(&db_path)?; 145 146 conn.execute( 147 "CREATE TABLE packages ( 148 package_name TEXT PRIMARY KEY, 149 pname TEXT, 150 version TEXT, 151 name TEXT, 152 system TEXT, 153 output_name TEXT, 154 available INTEGER, 155 broken INTEGER, 156 description TEXT, 157 homepage TEXT, 158 insecure INTEGER, 159 unfree INTEGER, 160 unsupported INTEGER, 161 position TEXT, 162 long_description TEXT, 163 main_program TEXT, 164 license_spdx_id TEXT, 165 license_full_name TEXT, 166 license_free INTEGER, 167 license_url TEXT 168 )", 169 [], 170 )?; 171 172 conn.execute( 173 "CREATE TABLE maintainers ( 174 id INTEGER PRIMARY KEY AUTOINCREMENT, 175 package_name TEXT, 176 name TEXT, 177 email TEXT, 178 github TEXT, 179 github_id INTEGER, 180 matrix TEXT, 181 FOREIGN KEY (package_name) REFERENCES packages(package_name) 182 )", 183 [], 184 )?; 185 186 conn.execute( 187 "CREATE INDEX idx_package_name ON packages(package_name)", 188 [], 189 )?; 190 conn.execute("CREATE INDEX idx_pname ON packages(pname)", [])?; 191 conn.execute( 192 "CREATE INDEX idx_maintainers_package ON maintainers(package_name)", 193 [], 194 )?; 195 196 // Enable WAL mode for better concurrent access 197 conn.pragma_update(None, "journal_mode", "WAL")?; 198 199 // Increase cache size for better performance 200 conn.pragma_update(None, "cache_size", "-64000")?; 201 202 let total = packages.len(); 203 let mut count = 0; 204 205 let mut package_batch = Vec::with_capacity(BATCH_SIZE); 206 let mut maintainer_batch = Vec::with_capacity(BATCH_SIZE * 4); // Estimate 4 maintainers per package 207 208 for (pkg_name, pkg_data) in packages { 209 let meta = &pkg_data["meta"]; 210 let license_data = &meta["license"]; 211 212 let license_spdx = extract_license(license_data); 213 let homepage = extract_homepage(meta); 214 215 package_batch.push(Package { 216 name: pkg_name.clone(), 217 pname: extract_string(pkg_data, "pname"), 218 version: extract_string(pkg_data, "version"), 219 display_name: extract_string(pkg_data, "name"), 220 system: extract_string(pkg_data, "system"), 221 output_name: extract_string(pkg_data, "outputName"), 222 available: i32::from( 223 meta.get("available") 224 .and_then(serde_json::Value::as_bool) 225 .unwrap_or(false), 226 ), 227 broken: i32::from( 228 meta.get("broken") 229 .and_then(serde_json::Value::as_bool) 230 .unwrap_or(false), 231 ), 232 description: extract_string(meta, "description"), 233 homepage, 234 insecure: i32::from( 235 meta.get("insecure") 236 .and_then(serde_json::Value::as_bool) 237 .unwrap_or(false), 238 ), 239 unfree: i32::from( 240 meta.get("unfree") 241 .and_then(serde_json::Value::as_bool) 242 .unwrap_or(false), 243 ), 244 unsupported: i32::from( 245 meta.get("unsupported") 246 .and_then(serde_json::Value::as_bool) 247 .unwrap_or(false), 248 ), 249 position: extract_string(meta, "position"), 250 long_description: extract_string(meta, "longDescription"), 251 main_program: extract_string(meta, "mainProgram"), 252 license_spdx_id: license_spdx, 253 license_full_name: None, 254 license_free: 0, 255 license_url: None, 256 }); 257 258 if let Some(maintainers) = meta.get("maintainers").and_then(|v| v.as_array()) { 259 for m in maintainers { 260 if let Some(obj) = m.as_object() { 261 maintainer_batch.push(Maintainer { 262 package_name: pkg_name.clone(), 263 name: obj 264 .get("name") 265 .and_then(|v| v.as_str()) 266 .map(std::string::ToString::to_string), 267 email: obj 268 .get("email") 269 .and_then(|v| v.as_str()) 270 .map(std::string::ToString::to_string), 271 github: obj 272 .get("github") 273 .and_then(|v| v.as_str()) 274 .map(std::string::ToString::to_string), 275 github_id: obj.get("githubId").and_then(serde_json::Value::as_i64), 276 matrix: obj 277 .get("matrix") 278 .and_then(|v| v.as_str()) 279 .map(std::string::ToString::to_string), 280 }); 281 } 282 } 283 } 284 285 count += 1; 286 287 if package_batch.len() >= BATCH_SIZE { 288 insert_batch(&mut conn, &package_batch, &maintainer_batch)?; 289 print_progress(count, total); 290 package_batch.clear(); 291 maintainer_batch.clear(); 292 } 293 } 294 295 if !package_batch.is_empty() { 296 insert_batch(&mut conn, &package_batch, &maintainer_batch)?; 297 } 298 299 println!("Vacuuming..."); 300 conn.execute("VACUUM", [])?; 301 302 store_hash(&release.hash)?; 303 304 println!("Database created successfully: {}", db_path.display()); 305 Ok(()) 306} 307 308/// Extract a string value from a JSON object 309fn extract_string(obj: &serde_json::Value, key: &str) -> Option<String> { 310 obj.get(key) 311 .and_then(serde_json::Value::as_str) 312 .map(std::string::ToString::to_string) 313} 314 315/// Extract license information from license data 316fn extract_license(license_data: &serde_json::Value) -> Option<String> { 317 match license_data { 318 serde_json::Value::Object(obj) => obj 319 .get("spdxId") 320 .and_then(|v| v.as_str()) 321 .map(std::string::ToString::to_string), 322 serde_json::Value::Array(arr) => { 323 let ids: Vec<&str> = arr 324 .iter() 325 .filter_map(|v| v.get("spdxId")) 326 .filter_map(|v| v.as_str()) 327 .collect(); 328 if ids.is_empty() { 329 None 330 } else { 331 Some(ids.join(", ")) 332 } 333 } 334 serde_json::Value::String(s) => Some(s.clone()), 335 _ => None, 336 } 337} 338 339/// Extract homepage from metadata 340fn extract_homepage(meta: &serde_json::Value) -> Option<String> { 341 meta.get("homepage").and_then(|h| match h { 342 serde_json::Value::String(s) => Some(s.clone()), 343 serde_json::Value::Array(arr) => arr 344 .first() 345 .and_then(|v| v.as_str()) 346 .map(std::string::ToString::to_string), 347 _ => None, 348 }) 349} 350 351/// Insert a batch of packages and maintainers into the database 352fn insert_batch( 353 conn: &mut rusqlite::Connection, 354 package_batch: &[Package], 355 maintainer_batch: &[Maintainer], 356) -> Result<()> { 357 let tx = conn.transaction()?; 358 { 359 let mut stmt = tx.prepare_cached("INSERT INTO packages VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)")?; 360 for p in package_batch { 361 stmt.execute(rusqlite::params![ 362 p.name, 363 p.pname, 364 p.version, 365 p.display_name, 366 p.system, 367 p.output_name, 368 p.available, 369 p.broken, 370 p.description, 371 p.homepage, 372 p.insecure, 373 p.unfree, 374 p.unsupported, 375 p.position, 376 p.long_description, 377 p.main_program, 378 p.license_spdx_id, 379 p.license_full_name, 380 p.license_free, 381 p.license_url, 382 ])?; 383 } 384 } 385 { 386 let mut stmt = tx.prepare_cached("INSERT INTO maintainers (package_name, name, email, github, github_id, matrix) VALUES (?, ?, ?, ?, ?, ?)")?; 387 for m in maintainer_batch { 388 stmt.execute(rusqlite::params![ 389 m.package_name, 390 m.name, 391 m.email, 392 m.github, 393 m.github_id, 394 m.matrix, 395 ])?; 396 } 397 } 398 tx.commit()?; 399 Ok(()) 400} 401 402/// Print progress information 403fn print_progress(count: usize, total: usize) { 404 #[allow(clippy::cast_precision_loss)] 405 let progress = (count as f64 / total as f64) * 100.0; 406 println!("Progress: {count}/{total} ({progress:.1}%)"); 407}