tracks lexicons and how many times they appeared on the jetstream
3
fork

Configure Feed

Select the types of activity you want to include in your feed.

fix(server): encode_block takes correct amount of items, improve rate limiter usage

dusk d0e611bc 7503870a

+61 -57
+26 -18
server/src/db/handle.rs
··· 85 85 self.eps.rate() as usize * 60 86 86 } 87 87 88 - pub fn queue(&self, event: EventRecord) { 89 - self.buf.lock().push(event); 88 + pub fn queue(&self, events: impl IntoIterator<Item = EventRecord>) { 89 + let mut count = 0; 90 + self.buf.lock().extend(events.into_iter().inspect(|_| { 91 + count += 1; 92 + })); 90 93 self.last_insert.store(CLOCK.raw(), AtomicOrdering::Relaxed); 91 - self.eps.observe(); 94 + self.eps.observe(count); 92 95 } 93 96 94 97 pub fn compact( ··· 174 177 items: impl IntoIterator<Item = Item>, 175 178 count: usize, 176 179 ) -> AppResult<Block> { 180 + if count == 0 { 181 + return Err(std::io::Error::new( 182 + std::io::ErrorKind::InvalidInput, 183 + "no items requested", 184 + ) 185 + .into()); 186 + } 177 187 let mut writer = ItemEncoder::new( 178 188 WritableByteView::with_size(ItemEncoder::encoded_len(count)), 179 189 count, ··· 181 191 let mut start_timestamp = None; 182 192 let mut end_timestamp = None; 183 193 let mut written = 0_usize; 184 - for item in items { 194 + for item in items.into_iter().take(count) { 185 195 writer.encode(&item)?; 186 196 if start_timestamp.is_none() { 187 197 start_timestamp = Some(item.timestamp); 188 198 } 189 199 end_timestamp = Some(item.timestamp); 190 - if written >= count { 191 - break; 192 - } 193 200 written += 1; 194 201 } 202 + if written != count { 203 + return Err(std::io::Error::new( 204 + std::io::ErrorKind::InvalidData, 205 + "unexpected number of items, invalid data?", 206 + ) 207 + .into()); 208 + } 195 209 if let (Some(start_timestamp), Some(end_timestamp)) = (start_timestamp, end_timestamp) { 196 210 let value = writer.finish()?; 197 211 let key = varints_unsigned_encoded([start_timestamp, end_timestamp]); ··· 205 219 } 206 220 207 221 pub fn encode_block(&self, item_count: usize) -> AppResult<Block> { 208 - let block = Self::encode_block_from_items( 209 - self.buf.lock().drain(..).map(|event| { 222 + let mut buf = self.buf.lock(); 223 + let end = item_count.min(buf.len()); 224 + Self::encode_block_from_items( 225 + buf.drain(..end).map(|event| { 210 226 Item::new( 211 227 event.timestamp, 212 228 &NsidHit { ··· 215 231 ) 216 232 }), 217 233 item_count, 218 - )?; 219 - if block.written != item_count { 220 - return Err(std::io::Error::new( 221 - std::io::ErrorKind::InvalidData, 222 - "unexpected number of items, invalid data?", 223 - ) 224 - .into()); 225 - } 226 - Ok(block) 234 + ) 227 235 } 228 236 }
+8 -18
server/src/db/mod.rs
··· 143 143 // if we disconnect for a long time, we want to sync all of what we 144 144 // have to avoid having many small blocks (even if we run compaction 145 145 // later, it reduces work until we run compaction) 146 - let block_size = is_too_old 146 + let block_size = (is_too_old || all) 147 147 .then_some(self.max_block_size) 148 148 .unwrap_or_else(|| { 149 149 self.max_block_size ··· 264 264 pub fn ingest_events(&self, events: impl Iterator<Item = EventRecord>) -> AppResult<()> { 265 265 for (key, chunk) in events.chunk_by(|event| event.nsid.clone()).into_iter() { 266 266 let mut counts = self.get_count(&key)?; 267 - let handle = self.ensure_handle(&key); 268 - for event in chunk { 269 - let EventRecord { 270 - timestamp, deleted, .. 271 - } = event.clone(); 272 - 273 - handle.queue(event); 274 - 267 + let mut count = 0; 268 + self.ensure_handle(&key).queue(chunk.inspect(|e| { 275 269 // increment count 276 - counts.last_seen = timestamp; 277 - if deleted { 270 + counts.last_seen = e.timestamp; 271 + if e.deleted { 278 272 counts.deleted_count += 1; 279 273 } else { 280 274 counts.count += 1; 281 275 } 282 - 283 - self.eps.observe(); 284 - } 276 + count += 1; 277 + })); 278 + self.eps.observe(count); 285 279 self.insert_count(&key, &counts)?; 286 280 if self.event_broadcaster.receiver_count() > 0 { 287 281 let _ = self.event_broadcaster.send((key, counts)); 288 282 } 289 283 } 290 284 Ok(()) 291 - } 292 - 293 - pub fn record_event(&self, e: EventRecord) -> AppResult<()> { 294 - self.ingest_events(std::iter::once(e)) 295 285 } 296 286 297 287 #[inline(always)]
+22 -10
server/src/main.rs
··· 1 1 use std::{ops::Deref, time::Duration}; 2 2 3 + use itertools::Itertools; 3 4 use rclite::Arc; 4 5 use smol_str::ToSmolStr; 5 6 use tokio_util::sync::CancellationToken; ··· 252 253 Arc::new(Db::new(".fjall_data_to", cancel_token.child_token()).expect("couldnt create db")); 253 254 254 255 let nsids = from.get_nsids().collect::<Vec<_>>(); 256 + let eps_thread = std::thread::spawn({ 257 + let to = to.clone(); 258 + move || { 259 + loop { 260 + std::thread::sleep(Duration::from_secs(3)); 261 + tracing::info!("{} rps", to.eps()); 262 + } 263 + } 264 + }); 255 265 let mut threads = Vec::with_capacity(nsids.len()); 256 266 let start = CLOCK.now(); 257 267 for nsid in nsids { 258 268 let from = from.clone(); 259 269 let to = to.clone(); 260 270 threads.push(std::thread::spawn(move || { 261 - tracing::info!("migrating {} ...", nsid.deref()); 271 + tracing::info!("{}: migrating...", nsid.deref()); 262 272 let mut count = 0_u64; 263 - for hit in from.get_hits(&nsid, ..) { 264 - let hit = hit.expect("cant read event"); 265 - let data = hit.access(); 266 - to.record_event(EventRecord { 267 - nsid: nsid.to_smolstr(), 268 - timestamp: hit.timestamp, 269 - deleted: data.deleted, 270 - }) 273 + for hits in from.get_hits(&nsid, ..).chunks(100000).into_iter() { 274 + to.ingest_events(hits.map(|hit| { 275 + count += 1; 276 + let hit = hit.expect("cant decode hit"); 277 + EventRecord { 278 + nsid: nsid.to_smolstr(), 279 + timestamp: hit.timestamp, 280 + deleted: hit.access().deleted, 281 + } 282 + })) 271 283 .expect("cant record event"); 272 - count += 1; 273 284 } 285 + tracing::info!("{}: ingested {} events...", nsid.deref(), count); 274 286 count 275 287 })); 276 288 }
+5 -11
server/src/utils.rs
··· 150 150 } 151 151 152 152 /// record an event 153 - pub fn observe(&self) { 153 + pub fn observe(&self, count: u64) { 154 154 self.maybe_advance_buckets(); 155 155 156 156 let bucket_index = self.get_current_bucket_index(); 157 - self.buckets[bucket_index].fetch_add(1, Ordering::Relaxed); 157 + self.buckets[bucket_index].fetch_add(count, Ordering::Relaxed); 158 158 } 159 159 160 160 /// get the current rate in events per second ··· 219 219 let tracker = DefaultRateTracker::new(Duration::from_secs(2)); 220 220 221 221 // record some events 222 - tracker.observe(); 223 - tracker.observe(); 224 - tracker.observe(); 222 + tracker.observe(3); 225 223 226 224 let rate = tracker.rate(); 227 225 assert_eq!(rate, 1.5); // 3 events over 2 seconds = 1.5 events/sec ··· 232 230 let tracker = DefaultRateTracker::new(Duration::from_secs(1)); 233 231 234 232 // record a lot of events 235 - for _ in 0..1000 { 236 - tracker.observe(); 237 - } 233 + tracker.observe(1000); 238 234 239 235 let rate = tracker.rate(); 240 236 assert_eq!(rate, 1000.0); // 1000 events in 1 second ··· 248 244 for _ in 0..4 { 249 245 let tracker_clone = Arc::clone(&tracker); 250 246 let handle = thread::spawn(move || { 251 - for _ in 0..10 { 252 - tracker_clone.observe(); 253 - } 247 + tracker_clone.observe(10); 254 248 }); 255 249 handles.push(handle); 256 250 }