···11//! Consume an MST block stream, producing an ordered stream of records
2233-use futures::{Stream, TryStreamExt};
33+use crate::disk::{DiskAccess, DiskStore, DiskWriter, StorageErrorBase};
44use ipld_core::cid::Cid;
55+use iroh_car::CarReader;
66+use serde::de::DeserializeOwned;
77+use serde::{Deserialize, Serialize};
58use std::collections::HashMap;
66-use std::error::Error;
99+use std::convert::Infallible;
1010+use tokio::io::AsyncRead;
711812use crate::mst::{Commit, Node};
99-use crate::walk::{Step, Trip, Walker};
1313+use crate::walk::{DiskTrip, Step, Trip, Walker};
10141115/// Errors that can happen while consuming and emitting blocks and records
1216#[derive(Debug, thiserror::Error)]
1313-pub enum DriveError<E: Error> {
1414- #[error("Failed to initialize CarReader: {0}")]
1717+pub enum DriveError {
1818+ #[error("Error from iroh_car: {0}")]
1519 CarReader(#[from] iroh_car::Error),
1616- #[error("Car block stream error: {0}")]
1717- CarBlockError(Box<dyn Error>),
1820 #[error("Failed to decode commit block: {0}")]
1919- BadCommit(Box<dyn Error>),
2121+ BadBlock(#[from] serde_ipld_dagcbor::DecodeError<Infallible>),
2022 #[error("The Commit block reference by the root was not found")]
2123 MissingCommit,
2224 #[error("The MST block {0} could not be found")]
2325 MissingBlock(Cid),
2426 #[error("Failed to walk the mst tree: {0}")]
2525- Tripped(#[from] Trip<E>),
2727+ Tripped(#[from] Trip),
2828+ #[error("CAR file had no roots")]
2929+ MissingRoot,
3030+}
3131+3232+#[derive(Debug, thiserror::Error)]
3333+pub enum DiskDriveError<E: StorageErrorBase> {
3434+ #[error("Error from iroh_car: {0}")]
3535+ CarReader(#[from] iroh_car::Error),
3636+ #[error("Failed to decode commit block: {0}")]
3737+ BadBlock(#[from] serde_ipld_dagcbor::DecodeError<Infallible>),
3838+ #[error("Storage error")]
3939+ StorageError(#[from] E),
4040+ #[error("The Commit block reference by the root was not found")]
4141+ MissingCommit,
4242+ #[error("The MST block {0} could not be found")]
4343+ MissingBlock(Cid),
4444+ #[error("Encode error: {0}")]
4545+ BincodeEncodeError(#[from] bincode::error::EncodeError),
4646+ #[error("Decode error: {0}")]
4747+ BincodeDecodeError(#[from] bincode::error::DecodeError),
4848+ #[error("disk tripped: {0}")]
4949+ DiskTripped(#[from] DiskTrip<E>),
2650}
27512828-type CarBlock<E> = Result<(Cid, Vec<u8>), E>;
5252+// #[derive(Debug, thiserror::Error)]
5353+// pub enum Boooooo<E: StorageErrorBase> {
5454+// #[error("disk tripped: {0}")]
5555+// DiskTripped(#[from] DiskTrip<E>),
5656+// #[error("dde whatever: {0}")]
5757+// DiskDriveError(#[from] DiskDriveError<E>),
5858+// }
5959+6060+pub trait Processable: Clone + Serialize + DeserializeOwned {}
29613030-#[derive(Debug)]
3131-pub enum MaybeProcessedBlock<T, E> {
6262+#[derive(Debug, Clone, Serialize, Deserialize)]
6363+pub enum MaybeProcessedBlock<T> {
3264 /// A block that's *probably* a Node (but we can't know yet)
3365 ///
3466 /// It *can be* a record that suspiciously looks a lot like a node, so we
···5082 /// There's an alternative here, which would be to kick unprocessable blocks
5183 /// back to Raw, or maybe even a new RawUnprocessable variant. Then we could
5284 /// surface the typed error later if needed by trying to reprocess.
5353- Processed(Result<T, E>),
8585+ Processed(T),
5486}
55875656-/// The core driver between the block stream and MST walker
5757-pub struct Vehicle<SE, S, T, P, PE>
5858-where
5959- S: Stream<Item = CarBlock<SE>>,
6060- P: Fn(&[u8]) -> Result<T, PE>,
6161- PE: Error,
6262-{
6363- block_stream: S,
6464- blocks: HashMap<Cid, MaybeProcessedBlock<T, PE>>,
6565- walker: Walker,
6666- process: P,
8888+impl<T: Processable> Processable for MaybeProcessedBlock<T> {}
8989+9090+pub enum Vehicle<R: AsyncRead + Unpin, T: Processable> {
9191+ Lil(Commit, MemDriver<T>),
9292+ Big(BigCar<R, T>),
6793}
68946969-impl<SE, S, T: Clone, P, PE> Vehicle<SE, S, T, P, PE>
7070-where
7171- SE: Error + 'static,
7272- S: Stream<Item = CarBlock<SE>> + Unpin,
7373- P: Fn(&[u8]) -> Result<T, PE>,
7474- PE: Error,
7575-{
7676- /// Set up the stream
7777- ///
7878- /// This will eagerly consume blocks until the `Commit` object is found.
7979- /// *Usually* the it's the first block, but there is no guarantee.
8080- ///
8181- /// ### Parameters
8282- ///
8383- /// `root`: CID of the commit object that is the root of the MST
8484- ///
8585- /// `block_stream`: Input stream of raw CAR blocks
8686- ///
8787- /// `process`: record-transforming callback:
8888- ///
8989- /// For tasks where records can be quickly processed into a *smaller*
9090- /// useful representation, you can do that eagerly as blocks come in by
9191- /// passing the processor as a callback here. This can reduce overall
9292- /// memory usage.
9393- pub async fn init(
9494- root: Cid,
9595- mut block_stream: S,
9696- process: P,
9797- ) -> Result<(Commit, Self), DriveError<PE>> {
9898- let mut blocks = HashMap::new();
9595+pub async fn load_car<R: AsyncRead + Unpin, T: Processable>(
9696+ reader: R,
9797+ process: fn(&[u8]) -> T,
9898+ max_size: usize,
9999+) -> Result<Vehicle<R, T>, DriveError> {
100100+ let mut mem_blocks = HashMap::new();
101101+102102+ let mut car = CarReader::new(reader).await?;
103103+104104+ let root = *car
105105+ .header()
106106+ .roots()
107107+ .first()
108108+ .ok_or(DriveError::MissingRoot)?;
109109+ log::debug!("root: {root:?}");
110110+111111+ let mut commit = None;
112112+113113+ // try to load all the blocks into memory
114114+ while let Some((cid, data)) = car.next_block().await? {
115115+ // the root commit is a Special Third Kind of block that we need to make
116116+ // sure not to optimistically send to the processing function
117117+ if cid == root {
118118+ let c: Commit = serde_ipld_dagcbor::from_slice(&data)?;
119119+ commit = Some(c);
120120+ continue;
121121+ }
99122100100- let mut commit = None;
123123+ // remaining possible types: node, record, other. optimistically process
124124+ // TODO: get the actual in-memory size to compute disk spill
125125+ let maybe_processed = if Node::could_be(&data) {
126126+ MaybeProcessedBlock::Raw(data)
127127+ } else {
128128+ MaybeProcessedBlock::Processed(process(&data))
129129+ };
101130102102- while let Some((cid, data)) = block_stream
103103- .try_next()
104104- .await
105105- .map_err(|e| DriveError::CarBlockError(e.into()))?
106106- {
107107- if cid == root {
108108- let c: Commit = serde_ipld_dagcbor::from_slice(&data)
109109- .map_err(|e| DriveError::BadCommit(e.into()))?;
110110- commit = Some(c);
111111- break;
112112- } else {
113113- blocks.insert(
114114- cid,
115115- if Node::could_be(&data) {
116116- MaybeProcessedBlock::Raw(data)
117117- } else {
118118- MaybeProcessedBlock::Processed(process(&data))
119119- },
120120- );
121121- }
131131+ // stash (maybe processed) blocks in memory as long as we have room
132132+ mem_blocks.insert(cid, maybe_processed);
133133+ if mem_blocks.len() >= max_size {
134134+ return Ok(Vehicle::Big(BigCar {
135135+ car,
136136+ root,
137137+ process,
138138+ max_size,
139139+ mem_blocks,
140140+ commit,
141141+ }));
122142 }
143143+ }
123144124124- // we either broke out or read all the blocks without finding the commit...
125125- let commit = commit.ok_or(DriveError::MissingCommit)?;
145145+ // all blocks loaded and we fit in memory! hopefully we found the commit...
146146+ let commit = commit.ok_or(DriveError::MissingCommit)?;
126147127127- let walker = Walker::new(commit.data);
148148+ let walker = Walker::new(commit.data);
128149129129- let me = Self {
130130- block_stream,
131131- blocks,
150150+ Ok(Vehicle::Lil(
151151+ commit,
152152+ MemDriver {
153153+ blocks: mem_blocks,
132154 walker,
133155 process,
134134- };
135135- Ok((commit, me))
136136- }
156156+ },
157157+ ))
158158+}
137159138138- async fn drive_until(&mut self, cid_needed: Cid) -> Result<(), DriveError<PE>> {
139139- while let Some((cid, data)) = self
140140- .block_stream
141141- .try_next()
142142- .await
143143- .map_err(|e| DriveError::CarBlockError(e.into()))?
144144- {
145145- self.blocks.insert(
146146- cid,
147147- if Node::could_be(&data) {
160160+/// a paritally memory-loaded car file that needs disk spillover to continue
161161+pub struct BigCar<R: AsyncRead + Unpin, T: Processable> {
162162+ car: CarReader<R>,
163163+ root: Cid,
164164+ process: fn(&[u8]) -> T,
165165+ max_size: usize,
166166+ mem_blocks: HashMap<Cid, MaybeProcessedBlock<T>>,
167167+ pub commit: Option<Commit>,
168168+}
169169+170170+fn encode(v: impl Serialize) -> Result<Vec<u8>, bincode::error::EncodeError> {
171171+ bincode::serde::encode_to_vec(v, bincode::config::standard())
172172+}
173173+174174+pub fn decode<T: Processable>(bytes: &[u8]) -> Result<T, bincode::error::DecodeError> {
175175+ let (t, n) = bincode::serde::decode_from_slice(bytes, bincode::config::standard())?;
176176+ assert_eq!(n, bytes.len(), "expected to decode all bytes"); // TODO
177177+ Ok(t)
178178+}
179179+180180+impl<R: AsyncRead + Unpin, T: Processable + Send + 'static> BigCar<R, T> {
181181+ pub async fn finish_loading<S: DiskStore>(
182182+ mut self,
183183+ mut store: S,
184184+ ) -> Result<(Commit, BigCarReady<T, S::Access>), DiskDriveError<S::StorageError>>
185185+ where
186186+ S::Access: Send + 'static,
187187+ S::StorageError: 'static,
188188+ {
189189+ // set up access for real
190190+ let mut access = store.get_access().await?;
191191+192192+ // move access in and back out so we can manage lifetimes
193193+ // dump mem blocks into the store
194194+ access = tokio::task::spawn(async move {
195195+ let mut writer = access.get_writer()?;
196196+ for (k, v) in self.mem_blocks {
197197+ let key_bytes = k.to_bytes();
198198+ let val_bytes = encode(v)?; // TODO
199199+ writer.put(key_bytes, val_bytes)?;
200200+ }
201201+ drop(writer); // cannot outlive access
202202+ Ok::<_, DiskDriveError<S::StorageError>>(access)
203203+ })
204204+ .await
205205+ .unwrap()?;
206206+207207+ // dump the rest to disk (in chunks)
208208+ loop {
209209+ let mut chunk = vec![];
210210+ loop {
211211+ let Some((cid, data)) = self.car.next_block().await? else {
212212+ break;
213213+ };
214214+ // we still gotta keep checking for the root since we might not have it
215215+ if cid == self.root {
216216+ let c: Commit = serde_ipld_dagcbor::from_slice(&data)?;
217217+ self.commit = Some(c);
218218+ continue;
219219+ }
220220+ // remaining possible types: node, record, other. optimistically process
221221+ // TODO: get the actual in-memory size to compute disk spill
222222+ let maybe_processed = if Node::could_be(&data) {
148223 MaybeProcessedBlock::Raw(data)
149224 } else {
150225 MaybeProcessedBlock::Processed((self.process)(&data))
151151- },
152152- );
153153- if cid == cid_needed {
154154- return Ok(());
226226+ };
227227+ chunk.push((cid, maybe_processed));
228228+ if chunk.len() >= self.max_size {
229229+ // eventually this won't be .len()
230230+ break;
231231+ }
155232 }
233233+ if chunk.is_empty() {
234234+ break;
235235+ }
236236+237237+ // move access in and back out so we can manage lifetimes
238238+ // dump mem blocks into the store
239239+ access = tokio::task::spawn_blocking(move || {
240240+ let mut writer = access.get_writer()?;
241241+ for (k, v) in chunk {
242242+ let key_bytes = k.to_bytes();
243243+ let val_bytes = encode(v)?; // TODO
244244+ writer.put(key_bytes, val_bytes)?;
245245+ }
246246+ drop(writer); // cannot outlive access
247247+ Ok::<_, DiskDriveError<S::StorageError>>(access)
248248+ })
249249+ .await
250250+ .unwrap()?; // TODO
156251 }
157252158158- // if we never found the block
159159- Err(DriveError::MissingBlock(cid_needed))
253253+ let commit = self.commit.ok_or(DiskDriveError::MissingCommit)?;
254254+255255+ let walker = Walker::new(commit.data);
256256+257257+ Ok((
258258+ commit,
259259+ BigCarReady {
260260+ process: self.process,
261261+ access,
262262+ walker,
263263+ },
264264+ ))
160265 }
266266+}
161267268268+pub struct BigCarReady<T: Clone, A: DiskAccess> {
269269+ process: fn(&[u8]) -> T,
270270+ access: A,
271271+ walker: Walker,
272272+}
273273+274274+impl<T: Processable + Send + 'static, A: DiskAccess + Send + 'static> BigCarReady<T, A> {
275275+ pub async fn next_chunk(
276276+ mut self,
277277+ n: usize,
278278+ ) -> Result<(Self, Option<Vec<(String, T)>>), DiskDriveError<A::StorageError>>
279279+ where
280280+ A::StorageError: Send,
281281+ {
282282+ let mut out = Vec::with_capacity(n);
283283+ (self, out) = tokio::task::spawn_blocking(move || {
284284+ let access = self.access;
285285+ let mut reader = access.get_reader()?;
286286+287287+ for _ in 0..n {
288288+ // walk as far as we can until we run out of blocks or find a record
289289+ match self.walker.disk_step(&mut reader, self.process)? {
290290+ Step::Missing(cid) => return Err(DiskDriveError::MissingBlock(cid)),
291291+ Step::Finish => break,
292292+ Step::Step { rkey, data } => {
293293+ out.push((rkey, data));
294294+ continue;
295295+ }
296296+ };
297297+ }
298298+299299+ drop(reader); // cannot outlive access
300300+ self.access = access;
301301+ Ok::<_, DiskDriveError<A::StorageError>>((self, out))
302302+ })
303303+ .await
304304+ .unwrap()?; // TODO
305305+306306+ if out.is_empty() {
307307+ Ok((self, None))
308308+ } else {
309309+ Ok((self, Some(out)))
310310+ }
311311+ }
312312+}
313313+314314+/// The core driver between the block stream and MST walker
315315+///
316316+/// In the future, PDSs will export CARs in a stream-friendly order that will
317317+/// enable processing them with tiny memory overhead. But that future is not
318318+/// here yet.
319319+///
320320+/// CARs are almost always in a stream-unfriendly order, so I'm reverting the
321321+/// optimistic stream features: we load all block first, then walk the MST.
322322+///
323323+/// This makes things much simpler: we only need to worry about spilling to disk
324324+/// in one place, and we always have a reasonable expecatation about how much
325325+/// work the init function will do. We can drop the CAR reader before walking,
326326+/// so the sync/async boundaries become a little easier to work around.
327327+#[derive(Debug)]
328328+pub struct MemDriver<T: Processable> {
329329+ blocks: HashMap<Cid, MaybeProcessedBlock<T>>,
330330+ walker: Walker,
331331+ process: fn(&[u8]) -> T,
332332+}
333333+334334+impl<T: Processable> MemDriver<T> {
162335 /// Manually step through the record outputs
163163- pub async fn next_record(&mut self) -> Result<Option<(String, T)>, DriveError<PE>> {
164164- loop {
336336+ pub async fn next_chunk(&mut self, n: usize) -> Result<Option<Vec<(String, T)>>, DriveError> {
337337+ let mut out = Vec::with_capacity(n);
338338+ for _ in 0..n {
165339 // walk as far as we can until we run out of blocks or find a record
166166- let cid_needed = match self.walker.step(&mut self.blocks, &self.process)? {
167167- Step::Rest(cid) => cid,
168168- Step::Finish => return Ok(None),
169169- Step::Step { rkey, data } => return Ok(Some((rkey, data))),
340340+ match self.walker.step(&mut self.blocks, self.process)? {
341341+ Step::Missing(cid) => return Err(DriveError::MissingBlock(cid)),
342342+ Step::Finish => break,
343343+ Step::Step { rkey, data } => {
344344+ out.push((rkey, data));
345345+ continue;
346346+ }
170347 };
348348+ }
171349172172- // load blocks until we reach that cid
173173- self.drive_until(cid_needed).await?;
350350+ if out.is_empty() {
351351+ Ok(None)
352352+ } else {
353353+ Ok(Some(out))
174354 }
175175- }
176176-177177- /// Convert to a futures::stream of record outputs
178178- pub fn stream(self) -> impl Stream<Item = Result<(String, T), DriveError<PE>>> {
179179- futures::stream::try_unfold(self, |mut this| async move {
180180- let maybe_record = this.next_record().await?;
181181- Ok(maybe_record.map(|b| (b, this)))
182182- })
183355 }
184356}
+1
src/lib.rs
···22//!
33//! For now see the [examples](https://tangled.org/@microcosm.blue/repo-stream/tree/main/examples)
4455+pub mod disk;
56pub mod disk_drive;
67pub mod disk_redb;
78pub mod disk_sqlite;
+97-31
src/walk.rs
···11//! Depth-first MST traversal
2233-use crate::drive::MaybeProcessedBlock;
33+use crate::disk::{DiskReader, StorageErrorBase};
44+use crate::drive::{MaybeProcessedBlock, Processable};
45use crate::mst::Node;
56use ipld_core::cid::Cid;
67use std::collections::HashMap;
77-use std::error::Error;
88+use std::convert::Infallible;
89910/// Errors that can happen while walking
1011#[derive(Debug, thiserror::Error)]
1111-pub enum Trip<E: Error> {
1212+pub enum Trip {
1213 #[error("empty mst nodes are not allowed")]
1314 NodeEmpty,
1515+ #[error("Failed to fingerprint commit block")]
1616+ BadCommitFingerprint,
1417 #[error("Failed to decode commit block: {0}")]
1515- BadCommit(Box<dyn std::error::Error>),
1818+ BadCommit(#[from] serde_ipld_dagcbor::DecodeError<Infallible>),
1619 #[error("Action node error: {0}")]
1720 RkeyError(#[from] RkeyError),
1818- #[error("Process failed: {0}")]
1919- ProcessFailed(E),
2021 #[error("Encountered an rkey out of order while walking the MST")]
2122 RkeyOutOfOrder,
2223}
23242525+/// Errors that can happen while walking
2626+#[derive(Debug, thiserror::Error)]
2727+pub enum DiskTrip<E: StorageErrorBase> {
2828+ #[error("tripped: {0}")]
2929+ Trip(#[from] Trip),
3030+ #[error("storage error: {0}")]
3131+ StorageError(#[from] E),
3232+ #[error("Decode error: {0}")]
3333+ BincodeDecodeError(#[from] bincode::error::DecodeError),
3434+}
3535+2436/// Errors from invalid Rkeys
2537#[derive(Debug, thiserror::Error)]
2638pub enum RkeyError {
···3345/// Walker outputs
3446#[derive(Debug)]
3547pub enum Step<T> {
3636- /// We need a CID but it's not in the block store
3737- ///
3838- /// Give the needed CID to the driver so it can load blocks until it's found
3939- Rest(Cid),
4848+ /// We needed this CID but it's not in the block store
4949+ Missing(Cid),
4050 /// Reached the end of the MST! yay!
4151 Finish,
4252 /// A record was found!
···98108 }
99109100110 /// Advance through nodes until we find a record or can't go further
101101- pub fn step<T: Clone, E: Error>(
111111+ pub fn step<T: Processable>(
102112 &mut self,
103103- blocks: &mut HashMap<Cid, MaybeProcessedBlock<T, E>>,
104104- process: impl Fn(&[u8]) -> Result<T, E>,
105105- ) -> Result<Step<T>, Trip<E>> {
113113+ blocks: &mut HashMap<Cid, MaybeProcessedBlock<T>>,
114114+ process: impl Fn(&[u8]) -> T,
115115+ ) -> Result<Step<T>, Trip> {
106116 loop {
107117 let Some(mut need) = self.stack.last() else {
108118 log::trace!("tried to walk but we're actually done.");
···114124 log::trace!("need node {cid:?}");
115125 let Some(block) = blocks.remove(cid) else {
116126 log::trace!("node not found, resting");
117117- return Ok(Step::Rest(*cid));
127127+ return Ok(Step::Missing(*cid));
118128 };
119129120130 let MaybeProcessedBlock::Raw(data) = block else {
121121- return Err(Trip::BadCommit("failed commit fingerprint".into()));
131131+ return Err(Trip::BadCommitFingerprint);
122132 };
123123- let node = serde_ipld_dagcbor::from_slice::<Node>(&data)
124124- .map_err(|e| Trip::BadCommit(e.into()))?;
133133+ let node =
134134+ serde_ipld_dagcbor::from_slice::<Node>(&data).map_err(Trip::BadCommit)?;
125135126136 // found node, make sure we remember
127137 self.stack.pop();
···133143 log::trace!("need record {cid:?}");
134144 let Some(data) = blocks.get_mut(cid) else {
135145 log::trace!("record block not found, resting");
136136- return Ok(Step::Rest(*cid));
146146+ return Ok(Step::Missing(*cid));
137147 };
138148 let rkey = rkey.clone();
139149 let data = match data {
140150 MaybeProcessedBlock::Raw(data) => process(data),
141141- MaybeProcessedBlock::Processed(Ok(t)) => Ok(t.clone()),
142142- bad => {
143143- // big hack to pull the error out -- this corrupts
144144- // a block, so we should not continue trying to work
145145- let mut steal = MaybeProcessedBlock::Raw(vec![]);
146146- std::mem::swap(&mut steal, bad);
147147- let MaybeProcessedBlock::Processed(Err(e)) = steal else {
148148- unreachable!();
149149- };
150150- return Err(Trip::ProcessFailed(e));
151151- }
151151+ MaybeProcessedBlock::Processed(t) => t.clone(),
152152 };
153153154154 // found node, make sure we remember
155155 self.stack.pop();
156156157157 log::trace!("emitting a block as a step. depth={}", self.stack.len());
158158- let data = data.map_err(Trip::ProcessFailed)?;
159158160159 // rkeys *must* be in order or else the tree is invalid (or
161160 // we have a bug)
162161 if rkey <= self.prev {
163162 return Err(Trip::RkeyOutOfOrder);
163163+ }
164164+ self.prev = rkey.clone();
165165+166166+ return Ok(Step::Step { rkey, data });
167167+ }
168168+ }
169169+ }
170170+ }
171171+172172+ /// blocking!!!!!!
173173+ pub fn disk_step<T: Processable, R: DiskReader>(
174174+ &mut self,
175175+ reader: &mut R,
176176+ process: impl Fn(&[u8]) -> T,
177177+ ) -> Result<Step<T>, DiskTrip<R::StorageError>> {
178178+ loop {
179179+ let Some(mut need) = self.stack.last() else {
180180+ log::trace!("tried to walk but we're actually done.");
181181+ return Ok(Step::Finish);
182182+ };
183183+184184+ match &mut need {
185185+ Need::Node(cid) => {
186186+ let cid_bytes = cid.to_bytes();
187187+ log::trace!("need node {cid:?}");
188188+ let Some(block_bytes) = reader.get(cid_bytes)? else {
189189+ log::trace!("node not found, resting");
190190+ return Ok(Step::Missing(*cid));
191191+ };
192192+193193+ let block: MaybeProcessedBlock<T> = crate::drive::decode(&block_bytes)?;
194194+195195+ let MaybeProcessedBlock::Raw(data) = block else {
196196+ return Err(Trip::BadCommitFingerprint.into());
197197+ };
198198+ let node =
199199+ serde_ipld_dagcbor::from_slice::<Node>(&data).map_err(Trip::BadCommit)?;
200200+201201+ // found node, make sure we remember
202202+ self.stack.pop();
203203+204204+ // queue up work on the found node next
205205+ push_from_node(&mut self.stack, &node).map_err(Trip::RkeyError)?;
206206+ }
207207+ Need::Record { rkey, cid } => {
208208+ log::trace!("need record {cid:?}");
209209+ let cid_bytes = cid.to_bytes();
210210+ let Some(data_bytes) = reader.get(cid_bytes)? else {
211211+ log::trace!("record block not found, resting");
212212+ return Ok(Step::Missing(*cid));
213213+ };
214214+ let data: MaybeProcessedBlock<T> = crate::drive::decode(&data_bytes)?;
215215+ let rkey = rkey.clone();
216216+ let data = match data {
217217+ MaybeProcessedBlock::Raw(data) => process(&data),
218218+ MaybeProcessedBlock::Processed(t) => t.clone(),
219219+ };
220220+221221+ // found node, make sure we remember
222222+ self.stack.pop();
223223+224224+ log::trace!("emitting a block as a step. depth={}", self.stack.len());
225225+226226+ // rkeys *must* be in order or else the tree is invalid (or
227227+ // we have a bug)
228228+ if rkey <= self.prev {
229229+ return Err(DiskTrip::Trip(Trip::RkeyOutOfOrder));
164230 }
165231 self.prev = rkey.clone();
166232