A public mirror for the whole atmosphere hubble.microcosm.blue
27
fork

Configure Feed

Select the types of activity you want to include in your feed.

sample inputs

so it's not 100 years of running

phil 1b6b9d94 1852e6fd

+77 -3
+59
Cargo.lock
··· 940 940 checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" 941 941 942 942 [[package]] 943 + name = "ppv-lite86" 944 + version = "0.2.21" 945 + source = "registry+https://github.com/rust-lang/crates.io-index" 946 + checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" 947 + dependencies = [ 948 + "zerocopy", 949 + ] 950 + 951 + [[package]] 943 952 name = "proc-macro2" 944 953 version = "1.0.106" 945 954 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 974 983 checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" 975 984 976 985 [[package]] 986 + name = "rand" 987 + version = "0.9.2" 988 + source = "registry+https://github.com/rust-lang/crates.io-index" 989 + checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" 990 + dependencies = [ 991 + "rand_chacha", 992 + "rand_core", 993 + ] 994 + 995 + [[package]] 996 + name = "rand_chacha" 997 + version = "0.9.0" 998 + source = "registry+https://github.com/rust-lang/crates.io-index" 999 + checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" 1000 + dependencies = [ 1001 + "ppv-lite86", 1002 + "rand_core", 1003 + ] 1004 + 1005 + [[package]] 1006 + name = "rand_core" 1007 + version = "0.9.5" 1008 + source = "registry+https://github.com/rust-lang/crates.io-index" 1009 + checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c" 1010 + dependencies = [ 1011 + "getrandom", 1012 + ] 1013 + 1014 + [[package]] 977 1015 name = "redox_syscall" 978 1016 version = "0.5.18" 979 1017 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 1204 1242 "async-channel", 1205 1243 "clap", 1206 1244 "csv", 1245 + "rand", 1207 1246 "repo-stream", 1208 1247 "rocksdb", 1209 1248 "serde", ··· 1485 1524 version = "0.8.15" 1486 1525 source = "registry+https://github.com/rust-lang/crates.io-index" 1487 1526 checksum = "fdd20c5420375476fbd4394763288da7eb0cc0b8c11deed431a91562af7335d3" 1527 + 1528 + [[package]] 1529 + name = "zerocopy" 1530 + version = "0.8.48" 1531 + source = "registry+https://github.com/rust-lang/crates.io-index" 1532 + checksum = "eed437bf9d6692032087e337407a86f04cd8d6a16a37199ed57949d415bd68e9" 1533 + dependencies = [ 1534 + "zerocopy-derive", 1535 + ] 1536 + 1537 + [[package]] 1538 + name = "zerocopy-derive" 1539 + version = "0.8.48" 1540 + source = "registry+https://github.com/rust-lang/crates.io-index" 1541 + checksum = "70e3cd084b1788766f53af483dd21f93881ff30d7320490ec3ef7526d203bad4" 1542 + dependencies = [ 1543 + "proc-macro2", 1544 + "quote", 1545 + "syn", 1546 + ] 1488 1547 1489 1548 [[package]] 1490 1549 name = "zstd-sys"
+1
space-efficiency-check/Cargo.toml
··· 13 13 tracing = { workspace = true } 14 14 tracing-subscriber = { workspace = true } 15 15 csv = "1" 16 + rand = "0.9" 16 17 serde = { version = "1", features = ["derive"] } 17 18 18 19 [profile.release]
+5 -1
space-efficiency-check/src/main.rs
··· 27 27 /// skip import and just do manual compact 28 28 #[arg(long, action)] 29 29 just_compact: bool, 30 + 31 + /// sample fraction of DIDs to import (0.0–1.0, default: all) 32 + #[arg(long)] 33 + sample: Option<f64>, 30 34 } 31 35 32 36 fn open_db(path: &Path) -> Result<DB, rocksdb::Error> { ··· 77 81 return Ok(()) 78 82 } 79 83 80 - let stats = run_workers(&args.car_dir, db.clone(), args.workers, args.mem_limit_mb).await?; 84 + let stats = run_workers(&args.car_dir, db.clone(), args.workers, args.mem_limit_mb, args.sample).await?; 81 85 82 86 let repos = stats.repos.load(Ordering::Relaxed); 83 87 let empty = stats.empty_repos.load(Ordering::Relaxed);
+12 -2
space-efficiency-check/src/work.rs
··· 46 46 db: Arc<DB>, 47 47 workers: usize, 48 48 mem_limit_mb: usize, 49 + sample: Option<f64>, 49 50 ) -> Result<Stats, ProcessError> { 50 51 let stats = Arc::new(Stats::default()); 51 52 let (tx, rx) = async_channel::bounded(1024); ··· 56 57 let rx = rx.clone(); 57 58 let db = db.clone(); 58 59 let stats = stats.clone(); 59 - set.spawn(worker(rx, db, stats, mem_limit_mb)); 60 + set.spawn(worker(rx, db, stats, mem_limit_mb, sample)); 60 61 } 61 62 62 63 let mut file_count = 0; ··· 96 97 db: Arc<DB>, 97 98 stats: Arc<Stats>, 98 99 mem_limit_mb: usize, 100 + sample: Option<f64>, 99 101 ) { 100 102 while let Ok(path) = rx.recv().await { 101 - let p = process_car(&path, mem_limit_mb, db.clone(), &stats); 103 + let p = process_car(&path, mem_limit_mb, db.clone(), &stats, sample); 102 104 match tokio::time::timeout(Duration::from_secs(30), p).await { 103 105 Ok(Ok(_)) => {} 104 106 Ok(Err(e)) => { ··· 118 120 mem_limit_mb: usize, 119 121 db: Arc<DB>, 120 122 stats: &Stats, 123 + sample: Option<f64>, 121 124 ) -> Result<(), ProcessError> { 122 125 let file = tokio::fs::File::open(path).await?; 123 126 let reader = BufReader::new(file); ··· 131 134 132 135 let did = car.commit.did.clone(); 133 136 let cid = car.commit.data.to_bytes(); 137 + 138 + // skip this DID if it doesn't fall within the sample fraction 139 + if let Some(frac) = sample { 140 + if rand::random::<f64>() >= frac { 141 + return Ok(()); 142 + } 143 + } 134 144 135 145 if false { 136 146 let mut account_key = Vec::with_capacity(1 + did.len());