···3344use chrono::{DateTime, Utc};
55use fjall::OwnedWriteBatch;
66+use futures::TryFutureExt;
67use jacquard_common::cowstr::ToCowStr;
78use jacquard_common::types::cid::{Cid, IpldCid};
89use jacquard_common::types::ident::AtIdentifier;
···3435 /// the revision of the root commit of this repository.
3536 #[serde(skip_serializing_if = "Option::is_none")]
3637 pub rev: Option<Tid>,
3737- /// the CID of the root commit of this repository.
3838+ /// the CID of the MST root of this repository.
3839 #[serde(serialize_with = "crate::util::opt_cid_serialize_str")]
3940 #[serde(skip_serializing_if = "Option::is_none")]
4041 pub data: Option<IpldCid>,
···690691 Rkey::new_cow(CowStr::Owned(rkey.to_smolstr())).expect("that rkey is validated")
691692 }),
692693 })
694694+ }
695695+696696+ /// generates a streaming CAR v1 response body for this repository.
697697+ ///
698698+ /// returns `None` if the repo has no commit yet (still backfilling) or is an
699699+ /// unmigrated repo that does not have the necessary data to reconstruct the
700700+ /// root commit from.
701701+ ///
702702+ /// ## notes
703703+ /// - calling this if you are using collection allowlist will always result
704704+ /// in an error since the commit root won't match the reconstructed CID.
705705+ /// - calling this for big repositories will incur more resource cost due to
706706+ /// hydrant's structure, the whole MST is always reconstructed.
707707+ pub async fn generate_car(
708708+ &self,
709709+ ) -> Result<Option<impl futures::Stream<Item = std::io::Result<bytes::Bytes>> + Send + 'static>>
710710+ {
711711+ use iroh_car::{CarHeader, CarWriter};
712712+ use jacquard_repo::{BlockStore, MemoryBlockStore, Mst};
713713+ use miette::WrapErr;
714714+ use std::sync::Arc;
715715+716716+ let commit = match self.state().await? {
717717+ Some(state) => match state.root {
718718+ Some(c) => c,
719719+ None => return Ok(None),
720720+ },
721721+ None => return Ok(None),
722722+ };
723723+724724+ let atp_commit = match commit.into_atp_commit(self.did.clone().into_static()) {
725725+ Some(c) => c,
726726+ None => return Ok(None),
727727+ };
728728+ let commit_cid = atp_commit.to_cid().into_diagnostic()?;
729729+ let commit_cbor = atp_commit.to_cbor().into_diagnostic()?;
730730+731731+ let did = self.did.clone().into_static();
732732+ let app_state = self.state.clone();
733733+734734+ // build mst and populate the block store in a single blocking pass
735735+ let store = Arc::new(MemoryBlockStore::new());
736736+ let mst = Mst::new(store.clone());
737737+ let handle = tokio::runtime::Handle::current();
738738+739739+ let mst = tokio::task::spawn_blocking(move || -> Result<_> {
740740+ let mut mst = mst;
741741+ let prefix = keys::record_prefix_did(&did);
742742+743743+ for guard in app_state.db.records.prefix(&prefix) {
744744+ let (key, cid_bytes) = guard.into_inner().into_diagnostic()?;
745745+746746+ let rest = &key[prefix.len()..];
747747+ let mut parts = rest.splitn(2, |b: &u8| *b == keys::SEP);
748748+ let collection_raw = parts
749749+ .next()
750750+ .ok_or_else(|| miette::miette!("missing collection in record key"))?;
751751+ let rkey_raw = parts
752752+ .next()
753753+ .ok_or_else(|| miette::miette!("missing rkey in record key"))?;
754754+755755+ let collection = std::str::from_utf8(collection_raw)
756756+ .into_diagnostic()
757757+ .wrap_err("collection is not valid utf8")?;
758758+ let rkey = keys::parse_rkey(rkey_raw)?;
759759+ let mst_key = format!("{collection}/{rkey}");
760760+761761+ let ipld_cid = cid::Cid::read_bytes(cid_bytes.as_ref())
762762+ .into_diagnostic()
763763+ .wrap_err_with(|| format!("invalid cid bytes for record {mst_key}"))?;
764764+765765+ let block_key = keys::block_key(collection, cid_bytes.as_ref());
766766+ let block_bytes = app_state
767767+ .db
768768+ .blocks
769769+ .get(&block_key)
770770+ .into_diagnostic()?
771771+ .ok_or_else(|| miette::miette!("block missing for record {mst_key}"))?;
772772+773773+ handle
774774+ .block_on(mst.add_mut(&mst_key, ipld_cid))
775775+ .into_diagnostic()?;
776776+ // we use put_many here to skip calculating the CID again
777777+ handle
778778+ .block_on(mst.storage().put_many([(
779779+ ipld_cid,
780780+ bytes::Bytes::copy_from_slice(block_bytes.as_ref()),
781781+ )]))
782782+ .into_diagnostic()?;
783783+ }
784784+785785+ handle.block_on(mst.persist()).into_diagnostic()?;
786786+787787+ Result::<_>::Ok(mst)
788788+ })
789789+ .await
790790+ .into_diagnostic()??;
791791+792792+ // sanity check: rebuilt root should match stored commit data in full-index mode
793793+ let computed_root = mst.get_pointer().await.into_diagnostic()?;
794794+ if computed_root != atp_commit.data {
795795+ tracing::warn!(
796796+ computed = %computed_root,
797797+ stored = %atp_commit.data,
798798+ did = %self.did,
799799+ "mst root mismatch (expected in filter mode)",
800800+ );
801801+ }
802802+803803+ store
804804+ .put_many([(commit_cid, bytes::Bytes::from(commit_cbor))])
805805+ .await
806806+ .into_diagnostic()?;
807807+808808+ // stream the car directly to the response
809809+ let (reader, writer) = tokio::io::duplex(64 * 1024);
810810+ tokio::spawn(
811811+ async move {
812812+ let header = CarHeader::new_v1(vec![commit_cid]);
813813+ let mut car_writer = CarWriter::new(header, writer);
814814+815815+ // write commit first, then mst nodes + leaf blocks
816816+ let commit_data = store.get(&commit_cid).await?;
817817+ if let Some(data) = commit_data {
818818+ car_writer
819819+ .write(commit_cid, &data)
820820+ .await
821821+ .into_diagnostic()?;
822822+ }
823823+ mst.write_blocks_to_car(&mut car_writer).await?;
824824+ car_writer.finish().await.into_diagnostic()?;
825825+826826+ Result::<_, miette::Report>::Ok(())
827827+ }
828828+ .inspect_err(|e| tracing::error!("can't generate car: {e}")),
829829+ );
830830+831831+ Ok(Some(tokio_util::io::ReaderStream::new(reader)))
693832 }
694833695834 /// gets how many records of a collection this repository has.