Constellation, Spacedust, Slingshot, UFOs: atproto crates and services for microcosm
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

merging iterator, maybe working

still only actually using the single AllTime iter, but seems to work with that.

phil 548e6de7 426e8184

+108 -39
+1
ufos/src/db_types.rs
··· 276 276 } 277 277 } 278 278 279 + // BUG: this needs to use the null-terminating string thing!!!!!!!!!!!!!! the whole point of all of this!!!! 279 280 impl DbBytes for Nsid { 280 281 fn from_db_bytes(bytes: &[u8]) -> Result<(Self, usize), EncodingError> { 281 282 let (s, n) = decode_from_slice(bytes, bincode_conf())?;
+103 -39
ufos/src/storage_fjall.rs
··· 2 2 use crate::error::StorageError; 3 3 use crate::storage::{StorageResult, StorageWhatever, StoreBackground, StoreReader, StoreWriter}; 4 4 use crate::store_types::{ 5 - AllTimeDidsKey, AllTimeRecordsKey, AllTimeRollupKey, CountsValue, DeleteAccountQueueKey, 6 - DeleteAccountQueueVal, HourTruncatedCursor, HourlyDidsKey, HourlyRecordsKey, HourlyRollupKey, 7 - JetstreamCursorKey, JetstreamCursorValue, JetstreamEndpointKey, JetstreamEndpointValue, 8 - LiveCountsKey, NewRollupCursorKey, NewRollupCursorValue, NsidRecordFeedKey, NsidRecordFeedVal, 5 + AllTimeDidsKey, AllTimeRecordsKey, AllTimeRollupKey, CountsValue, CursorBucket, 6 + DeleteAccountQueueKey, DeleteAccountQueueVal, HourTruncatedCursor, HourlyDidsKey, 7 + HourlyRecordsKey, HourlyRollupKey, HourlyRollupStaticPrefix, JetstreamCursorKey, 8 + JetstreamCursorValue, JetstreamEndpointKey, JetstreamEndpointValue, LiveCountsKey, 9 + NewRollupCursorKey, NewRollupCursorValue, NsidRecordFeedKey, NsidRecordFeedVal, 9 10 RecordLocationKey, RecordLocationMeta, RecordLocationVal, RecordRawValue, SketchSecretKey, 10 11 SketchSecretPrefix, TakeoffKey, TakeoffValue, TrimCollectionCursorKey, WeekTruncatedCursor, 11 12 WeeklyDidsKey, WeeklyRecordsKey, WeeklyRollupKey, ··· 15 16 UFOsRecord, 16 17 }; 17 18 use async_trait::async_trait; 19 + use fjall::Snapshot; 18 20 use fjall::{Batch as FjallBatch, Config, Keyspace, PartitionCreateOptions, PartitionHandle}; 19 21 use jetstream::events::Cursor; 20 22 use std::collections::{HashMap, HashSet}; ··· 373 375 }) 374 376 } 375 377 378 + fn get_earliest_hour(&self, rollups: Option<&Snapshot>) -> StorageResult<HourTruncatedCursor> { 379 + let cursor = rollups 380 + .unwrap_or(&self.rollups.snapshot()) 381 + .prefix(HourlyRollupStaticPrefix::default().to_db_bytes()?) 382 + .next() 383 + .transpose()? 384 + .map(|(key_bytes, _)| db_complete::<HourlyRollupKey>(&key_bytes)) 385 + .transpose()? 386 + .map(|key| key.cursor()) 387 + .unwrap_or_else(|| Cursor::from_start().into()); 388 + Ok(cursor) 389 + } 390 + 376 391 fn get_all_collections( 377 392 &self, 378 393 limit: usize, 379 394 cursor: Option<Vec<u8>>, 380 - _since: Option<HourTruncatedCursor>, 381 - _until: Option<HourTruncatedCursor>, 395 + since: Option<HourTruncatedCursor>, 396 + until: Option<HourTruncatedCursor>, 382 397 ) -> StorageResult<(Vec<NsidCount>, Option<Vec<u8>>)> { 383 - Ok(if true { 384 - let snapshot = self.rollups.snapshot(); 398 + let snapshot = self.rollups.snapshot(); 385 399 386 - let start = if let Some(cursor_bytes) = cursor { 387 - let nsid = db_complete::<Nsid>(&cursor_bytes)?; // TODO: bubble a *client* error type 388 - Bound::Excluded( 389 - AllTimeRollupKey::from_pair(Default::default(), nsid).to_db_bytes()?, 390 - ) 391 - } else { 392 - Bound::Included(AllTimeRollupKey::from_prefix_to_db_bytes( 393 - &Default::default(), 394 - )?) 395 - }; 400 + let buckets = if let (None, None) = (since, until) { 401 + vec![CursorBucket::AllTime] 402 + } else { 403 + let mut lower = self.get_earliest_hour(Some(&snapshot))?; 404 + if let Some(specified) = since { 405 + if specified > lower { 406 + lower = specified; 407 + } 408 + } 409 + let upper = until.unwrap_or_else(|| Cursor::at(SystemTime::now()).into()); 410 + CursorBucket::buckets_spanning(lower, upper) 411 + }; 396 412 397 - let end_bytes = AllTimeRollupKey::prefix_range_end(&Default::default())?; 398 - let end = Bound::Excluded(end_bytes.clone()); 413 + let cursor_nsid = cursor.as_deref().map(db_complete::<Nsid>).transpose()?; // TODO: bubble a *client* error type 399 414 400 - let mut out = Vec::new(); 401 - let mut next_cursor = None; 402 - for (i, kv) in snapshot.range((start, end)).take(limit).enumerate() { 403 - let (key_bytes, val_bytes) = kv?; 404 - let key = db_complete::<AllTimeRollupKey>(&key_bytes)?; 405 - let db_counts = db_complete::<CountsValue>(&val_bytes)?; 406 - out.push(NsidCount { 407 - nsid: key.collection().to_string(), 408 - records: db_counts.records(), 409 - dids_estimate: db_counts.dids().estimate() as u64, 410 - }); 411 - if i == limit - 1 { 412 - log::warn!("reached limit, setting next cursor"); 413 - let nsid_bytes = key.collection().to_db_bytes()?; 414 - next_cursor = Some(nsid_bytes); 415 + type Item = StorageResult<(Nsid, fjall::Slice)>; 416 + let mut iters = Vec::with_capacity(buckets.len()); 417 + for bucket in buckets { 418 + match bucket { 419 + CursorBucket::Hour(_hour) => todo!(), 420 + CursorBucket::Week(_week) => todo!(), 421 + CursorBucket::AllTime => { 422 + let start = if let Some(ref nsid) = cursor_nsid { 423 + Bound::Excluded( 424 + AllTimeRollupKey::from_pair(Default::default(), nsid.clone()) 425 + .to_db_bytes()?, 426 + ) 427 + } else { 428 + Bound::Included(AllTimeRollupKey::from_prefix_to_db_bytes( 429 + &Default::default(), 430 + )?) 431 + }; 432 + let end = 433 + Bound::Excluded(AllTimeRollupKey::prefix_range_end(&Default::default())?); 434 + let it = snapshot.range((start, end)).map(|kv| match kv { 435 + Ok((k_bytes, v_bytes)) => db_complete::<AllTimeRollupKey>(&k_bytes) 436 + .map(|key| (key.collection().clone(), v_bytes)) 437 + .map_err(|e| e.into()), 438 + Err(e) => Err(e.into()), // lsm-tree error into fjall error 439 + }); 440 + let boxed: Box<dyn Iterator<Item = Item>> = Box::new(it); 441 + iters.push(boxed.peekable()); 415 442 } 416 443 } 444 + } 417 445 418 - (out, next_cursor) 419 - } else { 420 - todo!() 421 - }) 446 + let mut out = Vec::new(); 447 + let mut current_nsid = None; 448 + for _ in 0..limit { 449 + // double-scan the iters for each element: this could be eliminated but we're starting simple. 450 + // first scan: find the lowest nsid 451 + // second scan: take + merge, and advance all iters with lowest nsid 452 + let mut lowest: Option<Nsid> = None; 453 + for iter in &mut iters { 454 + if let Some(bla) = iter.peek_mut() { 455 + let (nsid, _) = match bla { 456 + Ok(v) => v, 457 + Err(e) => Err(std::mem::replace(e, StorageError::Stolen))?, 458 + }; 459 + lowest = match lowest { 460 + Some(ref current) if nsid.as_str() > current.as_str() => lowest, 461 + _ => Some(nsid.clone()), 462 + }; 463 + } 464 + } 465 + current_nsid = lowest.clone(); 466 + let Some(nsid) = lowest else { break }; 467 + 468 + let mut merged = CountsValue::default(); 469 + for iter in &mut iters { 470 + // unwrap: potential fjall error was already checked & bailed over when peeking in the first loop 471 + if let Some(Ok((_, count_bytes))) = iter.next_if(|v| v.as_ref().unwrap().0 == nsid) 472 + { 473 + let counts = db_complete::<CountsValue>(&count_bytes)?; 474 + merged.merge(&counts); 475 + } 476 + } 477 + out.push(NsidCount { 478 + nsid: nsid.to_string(), 479 + records: merged.records(), 480 + dids_estimate: merged.dids().estimate() as u64, 481 + }); 482 + } 483 + 484 + let next_cursor = current_nsid.map(|s| s.to_db_bytes()).transpose()?; 485 + Ok((out, next_cursor)) 422 486 } 423 487 424 488 fn get_top_collections_by_count(
+4
ufos/src/store_types.rs
··· 318 318 nsid.clone(), 319 319 ) 320 320 } 321 + pub fn cursor(&self) -> HourTruncatedCursor { 322 + self.prefix.suffix 323 + } 321 324 } 322 325 pub type HourlyRollupVal = CountsValue; 323 326 ··· 457 460 pub enum CursorBucket { 458 461 Hour(HourTruncatedCursor), 459 462 Week(WeekTruncatedCursor), 463 + AllTime, 460 464 } 461 465 462 466 impl CursorBucket {