···4455use crate::disk_walk::{Step, Trip, Walker};
66use crate::mst::Commit;
77-use crate::mst::Node;
8798use ipld_core::cid::Cid;
1010-use serde::{Deserialize, Serialize, de::DeserializeOwned};
99+use serde::{Serialize, de::DeserializeOwned};
11101211/// Errors that can happen while consuming and emitting blocks and records
1312#[derive(Debug, thiserror::Error)]
···2625 Tripped(#[from] Trip),
2726}
28272929-#[derive(Debug, Clone, Serialize, Deserialize)]
3030-pub enum MaybeProcessedBlock<T: Clone + Serialize> {
3131- /// A block that's *probably* a Node (but we can't know yet)
3232- ///
3333- /// It *can be* a record that suspiciously looks a lot like a node, so we
3434- /// cannot eagerly turn it into a Node. We only know for sure what it is
3535- /// when we actually walk down the MST
3636- Raw(Vec<u8>),
3737- /// A processed record from a block that was definitely not a Node
3838- ///
3939- /// If we _never_ needed this block, then we may have wasted a bit of effort
4040- /// trying to process it. Oh well.
4141- ///
4242- /// Processing has to be fallible because the CAR can have totally-unused
4343- /// blocks, which can just be garbage. since we're eagerly trying to process
4444- /// record blocks without knowing for sure that they *are* records, we
4545- /// discard any definitely-not-nodes that fail processing and keep their
4646- /// error in the buffer for them. if we later try to retreive them as a
4747- /// record, then we can surface the error.
4848- ///
4949- /// The error type is `String` because we don't really want to put
5050- /// any constraints like `Serialize` on the error type, and `Error`
5151- /// at least requires `Display`. It's a compromise.
5252- ProcessedOk(T),
5353- Unprocessable(String),
5454-}
5555-5628pub trait BlockStore<MPB: Serialize + DeserializeOwned> {
5757- fn put(&self, key: Cid, value: MPB); // unwraps for now
2929+ fn put_batch(&self, blocks: Vec<(Cid, MPB)>); // unwraps for now
5830 fn get(&self, key: Cid) -> Option<MPB>;
5931}
6032···6638 SE: Error + 'static,
6739 S: Stream<Item = CarBlock<SE>>,
6840 T: Clone + Serialize + DeserializeOwned,
6969- BS: BlockStore<MaybeProcessedBlock<T>>,
4141+ BS: BlockStore<Vec<u8>>,
7042 P: Fn(&[u8]) -> Result<T, PE>,
7143 PE: Error,
7244{
7345 #[allow(dead_code)]
7474- block_stream: S,
4646+ block_stream: Option<S>,
7547 block_store: BS,
7648 walker: Walker,
7749 process: P,
···8254 SE: Error + 'static,
8355 S: Stream<Item = CarBlock<SE>> + Unpin,
8456 T: Clone + Serialize + DeserializeOwned,
8585- BS: BlockStore<MaybeProcessedBlock<T>>,
5757+ BS: BlockStore<Vec<u8>>,
8658 P: Fn(&[u8]) -> Result<T, PE>,
8759 PE: Error,
8860{
···10577 /// memory usage.
10678 pub async fn init(
10779 root: Cid,
108108- mut block_stream: S,
8080+ block_stream: S,
10981 block_store: BS,
11082 process: P,
11183 ) -> Result<(Commit, Self), DriveError> {
11284 let mut commit = None;
1138511486 log::warn!("init: load blocks");
8787+8888+ let mut chunked = block_stream.try_chunks(4096);
1158911690 // go ahead and put all blocks in the block store
117117- while let Some((cid, data)) = block_stream
9191+ while let Some(chunk) = chunked
11892 .try_next()
11993 .await
12094 .map_err(|e| DriveError::CarBlockError(e.into()))?
12195 {
122122- if cid == root {
123123- let c: Commit = serde_ipld_dagcbor::from_slice(&data)
124124- .map_err(|e| DriveError::BadCommit(e.into()))?;
125125- commit = Some(c);
126126- } else {
127127- block_store.put(
128128- cid,
129129- if Node::could_be(&data) {
130130- MaybeProcessedBlock::Raw(data)
131131- } else {
132132- match process(&data) {
133133- Ok(t) => MaybeProcessedBlock::ProcessedOk(t),
134134- Err(e) => MaybeProcessedBlock::Unprocessable(e.to_string()),
135135- }
136136- },
137137- );
9696+ let mut to_insert = Vec::with_capacity(chunk.len());
9797+ for (cid, data) in chunk {
9898+ if cid == root {
9999+ let c: Commit = serde_ipld_dagcbor::from_slice(&data)
100100+ .map_err(|e| DriveError::BadCommit(e.into()))?;
101101+ commit = Some(c);
102102+ } else {
103103+ to_insert.push((cid, data));
104104+ }
138105 }
106106+ block_store.put_batch(to_insert)
139107 }
140108141109 log::warn!("init: got commit?");
···148116 log::warn!("init: wrapping up");
149117150118 let me = Self {
151151- block_stream,
119119+ block_stream: None,
152120 block_store,
153121 walker,
154122 process,
+22-14
src/disk_redb.rs
···11use crate::disk_drive::BlockStore;
22use ipld_core::cid::Cid;
33-use redb::{Database, Error, ReadableTable, TableDefinition, WriteTransaction};
33+use redb::{Database, Durability, Error, ReadableDatabase, TableDefinition};
44use serde::{Serialize, de::DeserializeOwned};
55use std::path::Path;
66···99pub struct RedbStore {
1010 #[allow(dead_code)]
1111 db: Database,
1212- tx: Option<WriteTransaction>,
1312}
14131514impl RedbStore {
···1716 log::warn!("redb new");
1817 let db = Database::create(path)?;
1918 log::warn!("db created");
2020- let mut tx = db.begin_write()?;
2121- tx.set_durability(redb::Durability::None).unwrap();
2222- log::warn!("transaction begun");
2323- Ok(Self { db, tx: Some(tx) })
1919+ Ok(Self { db })
2420 }
2521}
26222723impl Drop for RedbStore {
2824 fn drop(&mut self) {
2929- let tx = self.tx.take();
3030- tx.unwrap().abort().unwrap();
2525+ let mut tx = self.db.begin_write().unwrap();
2626+ tx.set_durability(Durability::None).unwrap();
2727+ tx.delete_table(TABLE).unwrap();
2828+ tx.commit().unwrap();
3129 }
3230}
33313432impl<MPB: Serialize + DeserializeOwned> BlockStore<MPB> for RedbStore {
3535- fn put(&self, c: Cid, t: MPB) {
3636- let key_bytes = c.to_bytes();
3737- let val_bytes = bincode::serde::encode_to_vec(t, bincode::config::standard()).unwrap();
3333+ fn put_batch(&self, blocks: Vec<(Cid, MPB)>) {
3434+ let mut tx = self.db.begin_write().unwrap();
3535+ tx.set_durability(Durability::None).unwrap();
3636+3837 {
3939- let mut table = self.tx.as_ref().unwrap().open_table(TABLE).unwrap();
4040- table.insert(&*key_bytes, &*val_bytes).unwrap();
3838+ let mut table = tx.open_table(TABLE).unwrap();
3939+ for (cid, t) in blocks {
4040+ let key_bytes = cid.to_bytes();
4141+ let val_bytes =
4242+ bincode::serde::encode_to_vec(t, bincode::config::standard()).unwrap();
4343+ table.insert(&*key_bytes, &*val_bytes).unwrap();
4444+ }
4145 }
4646+4747+ tx.commit().unwrap();
4248 }
4949+4350 fn get(&self, c: Cid) -> Option<MPB> {
4451 let key_bytes = c.to_bytes();
4545- let table = self.tx.as_ref().unwrap().open_table(TABLE).unwrap();
5252+ let tx = self.db.begin_read().unwrap();
5353+ let table = tx.open_table(TABLE).unwrap();
4654 let maybe_val_bytes = table.get(&*key_bytes).unwrap()?;
4755 let (t, n): (MPB, usize) =
4856 bincode::serde::decode_from_slice(maybe_val_bytes.value(), bincode::config::standard())