Fast and robust atproto CAR file processing in rust
15
fork

Configure Feed

Select the types of activity you want to include in your feed.

clean up error handling

phil fbca2232 7537ed69

+33 -28
+5 -3
src/disk.rs
··· 1 + use crate::drive::DriveError; 1 2 use rusqlite::OptionalExtension; 2 3 use std::path::PathBuf; 3 4 ··· 81 82 } 82 83 pub fn put_many( 83 84 &mut self, 84 - kv: impl Iterator<Item = (Vec<u8>, Vec<u8>)>, 85 - ) -> rusqlite::Result<()> { 85 + kv: impl Iterator<Item = Result<(Vec<u8>, Vec<u8>), DriveError>>, 86 + ) -> Result<(), DriveError> { 86 87 let tx = self.tx.as_ref().unwrap(); 87 88 let mut insert_stmt = tx.prepare_cached("INSERT INTO blocks (key, val) VALUES (?1, ?2)")?; 88 - for (k, v) in kv { 89 + for pair in kv { 90 + let (k, v) = pair?; 89 91 insert_stmt.execute((k, v))?; 90 92 } 91 93 Ok(())
+16 -12
src/drive.rs
··· 10 10 use tokio::io::AsyncRead; 11 11 12 12 use crate::mst::{Commit, Node}; 13 - use crate::walk::{Step, Trip, Walker}; 13 + use crate::walk::{Step, WalkError, Walker}; 14 14 15 15 /// Errors that can happen while consuming and emitting blocks and records 16 16 #[derive(Debug, thiserror::Error)] ··· 24 24 #[error("The MST block {0} could not be found")] 25 25 MissingBlock(Cid), 26 26 #[error("Failed to walk the mst tree: {0}")] 27 - Tripped(#[from] Trip), 27 + WalkError(#[from] WalkError), 28 28 #[error("CAR file had no roots")] 29 29 MissingRoot, 30 30 #[error("Storage error")] ··· 33 33 BincodeEncodeError(#[from] bincode::error::EncodeError), 34 34 #[error("Decode error: {0}")] 35 35 BincodeDecodeError(#[from] bincode::error::DecodeError), 36 + #[error("Tried to send on a closed channel")] 37 + ChannelSendError, // SendError takes <T> which we don't need 38 + #[error("Failed to join a task: {0}")] 39 + JoinError(#[from] tokio::task::JoinError), 36 40 } 37 41 38 42 pub trait Processable: Clone + Serialize + DeserializeOwned { ··· 118 122 } 119 123 120 124 // remaining possible types: node, record, other. optimistically process 121 - // TODO: get the actual in-memory size to compute disk spill 122 125 let maybe_processed = if Node::could_be(&data) { 123 126 MaybeProcessedBlock::Raw(data) 124 127 } else { ··· 191 194 let kvs = self 192 195 .mem_blocks 193 196 .into_iter() 194 - .map(|(k, v)| (k.to_bytes(), encode(v).unwrap())); 197 + .map(|(k, v)| Ok(encode(v).map(|v| (k.to_bytes(), v))?)); 195 198 196 199 writer.put_many(kvs)?; 197 200 198 201 drop(writer); // cannot outlive access 199 202 Ok::<_, DriveError>(access) 200 203 }) 201 - .await 202 - .unwrap()?; 204 + .await??; 203 205 204 206 let (tx, mut rx) = tokio::sync::mpsc::channel::<Vec<(Cid, MaybeProcessedBlock<T>)>>(2); 205 207 ··· 209 211 while let Some(chunk) = rx.blocking_recv() { 210 212 let kvs = chunk 211 213 .into_iter() 212 - .map(|(k, v)| (k.to_bytes(), encode(v).unwrap())); 214 + .map(|(k, v)| Ok(encode(v).map(|v| (k.to_bytes(), v))?)); 213 215 writer.put_many(kvs)?; 214 216 } 215 217 ··· 251 253 if chunk.is_empty() { 252 254 break; 253 255 } 254 - tx.send(chunk).await.unwrap(); 256 + tx.send(chunk) 257 + .await 258 + .map_err(|_| DriveError::ChannelSendError)?; 255 259 } 256 260 drop(tx); 257 261 log::debug!("done. waiting for worker to finish..."); 258 262 259 - access = access_worker.await.unwrap()?; 263 + access = access_worker.await??; 260 264 261 265 log::debug!("worker finished."); 262 266 ··· 307 311 self.access = access; 308 312 Ok::<_, DriveError>((self, out)) 309 313 }) 310 - .await 311 - .unwrap()?; // TODO 314 + .await??; 312 315 313 316 if out.is_empty() { 314 317 Ok((self, None)) ··· 353 356 if out.is_empty() { 354 357 break; 355 358 } 356 - tx.blocking_send(out).unwrap(); 359 + tx.blocking_send(out) 360 + .map_err(|_| DriveError::ChannelSendError)?; 357 361 } 358 362 359 363 drop(reader); // cannot outlive access
+12 -13
src/walk.rs
··· 10 10 11 11 /// Errors that can happen while walking 12 12 #[derive(Debug, thiserror::Error)] 13 - pub enum Trip { 13 + pub enum WalkError { 14 14 #[error("Failed to fingerprint commit block")] 15 15 BadCommitFingerprint, 16 16 #[error("Failed to decode commit block: {0}")] ··· 176 176 &mut self, 177 177 blocks: &mut HashMap<Cid, MaybeProcessedBlock<T>>, 178 178 process: impl Fn(Vec<u8>) -> T, 179 - ) -> Result<Step<T>, Trip> { 179 + ) -> Result<Step<T>, WalkError> { 180 180 loop { 181 181 let Some(need) = self.stack.last_mut() else { 182 182 log::trace!("tried to walk but we're actually done."); ··· 192 192 }; 193 193 194 194 let MaybeProcessedBlock::Raw(data) = block else { 195 - return Err(Trip::BadCommitFingerprint); 195 + return Err(WalkError::BadCommitFingerprint); 196 196 }; 197 - let node = 198 - serde_ipld_dagcbor::from_slice::<Node>(&data).map_err(Trip::BadCommit)?; 197 + let node = serde_ipld_dagcbor::from_slice::<Node>(&data) 198 + .map_err(WalkError::BadCommit)?; 199 199 200 200 // found node, make sure we remember 201 201 self.stack.pop(); ··· 205 205 } 206 206 Need::Record { rkey, cid } => { 207 207 log::trace!("need record {cid:?}"); 208 + // note that we cannot *remove* a record block, sadly, since 209 + // there can be multiple rkeys pointing to the same cid. 208 210 let Some(data) = blocks.get_mut(cid) else { 209 - log::trace!("record block not found, resting"); 210 211 return Ok(Step::Missing(*cid)); 211 212 }; 212 213 let rkey = rkey.clone(); ··· 218 219 // found node, make sure we remember 219 220 self.stack.pop(); 220 221 221 - log::trace!("emitting a block as a step. depth={}", self.stack.len()); 222 - 223 222 // rkeys *must* be in order or else the tree is invalid (or 224 223 // we have a bug) 225 224 if rkey <= self.prev { ··· 238 237 &mut self, 239 238 reader: &mut SqliteReader, 240 239 process: impl Fn(Vec<u8>) -> T, 241 - ) -> Result<Step<T>, Trip> { 240 + ) -> Result<Step<T>, WalkError> { 242 241 loop { 243 242 let Some(need) = self.stack.last_mut() else { 244 243 log::trace!("tried to walk but we're actually done."); ··· 257 256 let block: MaybeProcessedBlock<T> = crate::drive::decode(&block_bytes)?; 258 257 259 258 let MaybeProcessedBlock::Raw(data) = block else { 260 - return Err(Trip::BadCommitFingerprint); 259 + return Err(WalkError::BadCommitFingerprint); 261 260 }; 262 - let node = 263 - serde_ipld_dagcbor::from_slice::<Node>(&data).map_err(Trip::BadCommit)?; 261 + let node = serde_ipld_dagcbor::from_slice::<Node>(&data) 262 + .map_err(WalkError::BadCommit)?; 264 263 265 264 // found node, make sure we remember 266 265 self.stack.pop(); 267 266 268 267 // queue up work on the found node next 269 - push_from_node(&mut self.stack, &node, depth).map_err(Trip::MstError)?; 268 + push_from_node(&mut self.stack, &node, depth).map_err(WalkError::MstError)?; 270 269 } 271 270 Need::Record { rkey, cid } => { 272 271 log::trace!("need record {cid:?}");