tracks lexicons and how many times they appeared on the jetstream
3
fork

Configure Feed

Select the types of activity you want to include in your feed.

refactor(server): implement WriteableByteView so we don't have to copy from a Vec while constructing fjall::Slice

dusk a820ae13 925dbc95

+75 -17
+1
server/Cargo.lock
··· 1493 1493 "async-trait", 1494 1494 "axum", 1495 1495 "axum-tws", 1496 + "byteview", 1496 1497 "fjall", 1497 1498 "futures-util", 1498 1499 "itertools",
+1
server/Cargo.toml
··· 27 27 snmalloc-rs = "0.3.8" 28 28 quanta = "0.12.6" 29 29 itertools = "0.14.0" 30 + byteview = "0.6.1"
+8 -8
server/src/db/block.rs
··· 49 49 } 50 50 } 51 51 52 - pub fn encode(&mut self, item: &Item<T>) -> AppResult<()> { 52 + pub fn encode(&mut self, item: &Item<T>) -> io::Result<()> { 53 53 if self.prev_timestamp == 0 { 54 54 // self.writer.write_varint(item.timestamp)?; 55 55 self.prev_timestamp = item.timestamp; ··· 68 68 Ok(()) 69 69 } 70 70 71 - fn write_data(&mut self, data: &[u8]) -> AppResult<()> { 71 + fn write_data(&mut self, data: &[u8]) -> io::Result<()> { 72 72 self.writer.write_varint(data.len())?; 73 73 self.writer.write_all(data)?; 74 74 Ok(()) 75 75 } 76 76 77 - pub fn finish(mut self) -> AppResult<W> { 77 + pub fn finish(mut self) -> io::Result<W> { 78 78 self.writer.flush()?; 79 79 Ok(self.writer) 80 80 } ··· 89 89 } 90 90 91 91 impl<R: Read, T: Archive> ItemDecoder<R, T> { 92 - pub fn new(reader: R, start_timestamp: u64) -> AppResult<Self> { 92 + pub fn new(reader: R, start_timestamp: u64) -> io::Result<Self> { 93 93 Ok(ItemDecoder { 94 94 reader, 95 95 current_timestamp: start_timestamp, ··· 99 99 }) 100 100 } 101 101 102 - pub fn decode(&mut self) -> AppResult<Option<Item<T>>> { 102 + pub fn decode(&mut self) -> io::Result<Option<Item<T>>> { 103 103 if self.first_item { 104 104 // read the first timestamp 105 105 // let timestamp = match self.reader.read_varint::<u64>() { ··· 144 144 } 145 145 146 146 // [10, 11, 12, 14] -> [1, 1, 2] -> [0, 1] 147 - fn read_timestamp(&mut self) -> AppResult<Option<u64>> { 147 + fn read_timestamp(&mut self) -> io::Result<Option<u64>> { 148 148 let delta = match self.reader.read_varint::<i64>() { 149 149 Ok(delta) => delta, 150 150 Err(e) if e.kind() == io::ErrorKind::UnexpectedEof => return Ok(None), ··· 156 156 Ok(Some(self.current_timestamp)) 157 157 } 158 158 159 - fn read_item(&mut self) -> AppResult<Option<AlignedVec>> { 159 + fn read_item(&mut self) -> io::Result<Option<AlignedVec>> { 160 160 let data_len = match self.reader.read_varint::<usize>() { 161 161 Ok(data_len) => data_len, 162 162 Err(e) if e.kind() == io::ErrorKind::UnexpectedEof => return Ok(None), ··· 172 172 } 173 173 174 174 impl<R: Read, T: Archive> Iterator for ItemDecoder<R, T> { 175 - type Item = AppResult<Item<T>>; 175 + type Item = io::Result<Item<T>>; 176 176 177 177 fn next(&mut self) -> Option<Self::Item> { 178 178 self.decode().transpose()
+65 -9
server/src/db/mod.rs
··· 1 1 use std::{ 2 - io::Cursor, 2 + io::{self, Cursor, Write}, 3 + marker::PhantomData, 3 4 ops::{Bound, Deref, RangeBounds}, 4 5 path::Path, 5 6 sync::{ ··· 9 10 time::Duration, 10 11 }; 11 12 13 + use byteview::ByteView; 12 14 use fjall::{Config, Keyspace, Partition, PartitionCreateOptions, Slice}; 13 15 use itertools::{Either, Itertools}; 14 16 use ordered_varint::Variable; 15 17 use rkyv::{Archive, Deserialize, Serialize, rancor::Error}; 16 18 use smol_str::SmolStr; 17 19 use tokio::sync::broadcast; 20 + use tokio_util::bytes::{self, BufMut}; 18 21 19 22 use crate::{ 20 23 db::block::{ReadVariableExt, WriteVariableExt}, ··· 69 72 } 70 73 71 74 type ItemDecoder = block::ItemDecoder<Cursor<Slice>, NsidHit>; 72 - type ItemEncoder = block::ItemEncoder<Vec<u8>, NsidHit>; 75 + type ItemEncoder = block::ItemEncoder<WritableByteView, NsidHit>; 73 76 type Item = block::Item<NsidHit>; 74 77 78 + struct WritableByteView { 79 + view: ByteView, 80 + written: usize, 81 + } 82 + 83 + impl WritableByteView { 84 + // returns None if the view already has a reference to it 85 + fn with_size(capacity: usize) -> Self { 86 + Self { 87 + view: ByteView::with_size(capacity), 88 + written: 0, 89 + } 90 + } 91 + 92 + #[inline(always)] 93 + fn into_inner(self) -> ByteView { 94 + self.view 95 + } 96 + } 97 + 98 + impl Write for WritableByteView { 99 + fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> { 100 + let len = buf.len(); 101 + if len > self.view.len() - self.written { 102 + return Err(std::io::Error::new( 103 + std::io::ErrorKind::StorageFull, 104 + "buffer full", 105 + )); 106 + } 107 + // SAFETY: this is safe because we have checked that the buffer is not full 108 + // SAFETY: we own the mutator so no other references to the view exist 109 + unsafe { 110 + std::ptr::copy_nonoverlapping( 111 + buf.as_ptr(), 112 + self.view 113 + .get_mut() 114 + .unwrap_unchecked() 115 + .as_mut_ptr() 116 + .add(self.written), 117 + len, 118 + ); 119 + self.written += len; 120 + } 121 + Ok(len) 122 + } 123 + 124 + #[inline(always)] 125 + fn flush(&mut self) -> std::io::Result<()> { 126 + Ok(()) 127 + } 128 + } 129 + 75 130 pub struct LexiconHandle { 76 131 tree: Partition, 77 132 buf: Arc<scc::Queue<EventRecord>>, ··· 113 168 } 114 169 115 170 fn sync(&self, max_block_size: usize) -> AppResult<usize> { 116 - let mut writer = ItemEncoder::new(Vec::with_capacity( 117 - size_of::<u64>() + self.item_count().min(max_block_size) * size_of::<(u64, NsidHit)>(), 118 - )); 171 + let buf_size = 172 + size_of::<u64>() + self.item_count().min(max_block_size) * size_of::<(u64, NsidHit)>(); 173 + let mut writer = ItemEncoder::new(WritableByteView::with_size(buf_size)); 119 174 let mut start_timestamp = None; 120 175 let mut end_timestamp = None; 121 176 let mut written = 0_usize; ··· 142 197 let mut key = Vec::with_capacity(size_of::<u64>() * 2); 143 198 key.write_varint(start_timestamp)?; 144 199 key.write_varint(end_timestamp)?; 145 - self.tree.insert(key, value)?; 200 + self.tree.insert(key, value.into_inner())?; 146 201 } 147 202 Ok(written) 148 203 } ··· 384 439 let map_block = move |(key, val)| { 385 440 let mut key_reader = Cursor::new(key); 386 441 let start_timestamp = key_reader.read_varint::<u64>()?; 387 - let items = 388 - ItemDecoder::new(Cursor::new(val), start_timestamp)?.take_while(move |item| { 442 + let items = ItemDecoder::new(Cursor::new(val), start_timestamp)? 443 + .take_while(move |item| { 389 444 item.as_ref().map_or(true, |item| item.timestamp <= limit) 390 - }); 445 + }) 446 + .map(|res| res.map_err(AppError::from)); 391 447 Ok(items) 392 448 }; 393 449