···11+//! Stream utilities.
22+33+mod hash_reader;
44+55+use std::collections::VecDeque;
66+use std::future::Future;
77+use std::marker::Unpin;
88+use std::pin::Pin;
99+1010+use async_stream::try_stream;
1111+use bytes::{Bytes, BytesMut};
1212+use futures::stream::{BoxStream, Stream, StreamExt};
1313+use tokio::io::{AsyncRead, AsyncReadExt};
1414+use tokio::task::spawn;
1515+1616+pub use hash_reader::HashReader;
1717+1818+/// Merge chunks lazily into a continuous stream.
1919+///
2020+/// For each chunk, a function is called to transform it into a
2121+/// `Stream<Item = Result<Bytes>>`. This function does something like
2222+/// opening the local file or sending a request to S3.
2323+///
2424+/// We call this function some time before the start of the chunk
2525+/// is reached to eliminate delays between chunks so the merged
2626+/// stream is smooth. We don't want to start streaming all chunks
2727+/// at once as it's a waste of resources.
2828+///
2929+/// ```text
3030+/// | S3 GET | Chunk | S3 GET | ... | S3 GET | Chunk
3131+/// ```
3232+///
3333+/// ```text
3434+/// | S3 GET | Chunk | Chunk | Chunk | Chunk
3535+/// | S3 GET |-----------^ ^ ^
3636+/// | S3 GET |------| |
3737+/// | S3 GET |--------------|
3838+///
3939+/// ```
4040+///
4141+/// TODO: Support range requests so we can have seekable NARs.
4242+pub fn merge_chunks<C, F, S, Fut, E>(
4343+ mut chunks: VecDeque<C>,
4444+ streamer: F,
4545+ streamer_arg: S,
4646+ num_prefetch: usize,
4747+) -> Pin<Box<impl Stream<Item = Result<Bytes, E>>>>
4848+where
4949+ F: Fn(C, S) -> Fut,
5050+ S: Clone,
5151+ Fut: Future<Output = Result<BoxStream<'static, Result<Bytes, E>>, E>> + Send + 'static,
5252+ E: Send + 'static,
5353+{
5454+ let s = try_stream! {
5555+ let mut streams = VecDeque::new(); // a queue of JoinHandles
5656+5757+ // otherwise type inference gets confused :/
5858+ if false {
5959+ let chunk = chunks.pop_front().unwrap();
6060+ let stream = spawn(streamer(chunk, streamer_arg.clone()));
6161+ streams.push_back(stream);
6262+ }
6363+6464+ loop {
6565+ if let Some(stream) = streams.pop_front() {
6666+ let mut stream = stream.await.unwrap()?;
6767+ while let Some(item) = stream.next().await {
6868+ let item = item?;
6969+ yield item;
7070+ }
7171+ }
7272+7373+ while streams.len() < num_prefetch {
7474+ if let Some(chunk) = chunks.pop_front() {
7575+ let stream = spawn(streamer(chunk, streamer_arg.clone()));
7676+ streams.push_back(stream);
7777+ } else {
7878+ break;
7979+ }
8080+ }
8181+8282+ if chunks.is_empty() && streams.is_empty() {
8383+ // we are done!
8484+ break;
8585+ }
8686+ }
8787+ };
8888+ Box::pin(s)
8989+}
9090+9191+/// Greedily reads from a stream to fill a buffer.
9292+pub async fn read_chunk_async<S: AsyncRead + Unpin + Send>(
9393+ stream: &mut S,
9494+ mut chunk: BytesMut,
9595+) -> std::io::Result<Bytes> {
9696+ while chunk.len() < chunk.capacity() {
9797+ let read = stream.read_buf(&mut chunk).await?;
9898+9999+ if read == 0 {
100100+ break;
101101+ }
102102+ }
103103+104104+ Ok(chunk.freeze())
105105+}
106106+107107+#[cfg(test)]
108108+mod tests {
109109+ use super::*;
110110+111111+ use async_stream::stream;
112112+ use bytes::{BufMut, BytesMut};
113113+ use futures::future;
114114+115115+ #[tokio::test]
116116+ async fn test_merge_chunks() {
117117+ let chunk_a: BoxStream<Result<Bytes, ()>> = {
118118+ let s = stream! {
119119+ yield Ok(Bytes::from_static(b"Hello"));
120120+ };
121121+ Box::pin(s)
122122+ };
123123+124124+ let chunk_b: BoxStream<Result<Bytes, ()>> = {
125125+ let s = stream! {
126126+ yield Ok(Bytes::from_static(b", "));
127127+ yield Ok(Bytes::from_static(b"world"));
128128+ };
129129+ Box::pin(s)
130130+ };
131131+132132+ let chunk_c: BoxStream<Result<Bytes, ()>> = {
133133+ let s = stream! {
134134+ yield Ok(Bytes::from_static(b"!"));
135135+ };
136136+ Box::pin(s)
137137+ };
138138+139139+ let chunks: VecDeque<BoxStream<'static, Result<Bytes, ()>>> =
140140+ [chunk_a, chunk_b, chunk_c].into_iter().collect();
141141+142142+ let streamer = |c, _| future::ok(c);
143143+ let mut merged = merge_chunks(chunks, streamer, (), 2);
144144+145145+ let mut bytes = BytesMut::with_capacity(100);
146146+ while let Some(item) = merged.next().await {
147147+ bytes.put(item.unwrap());
148148+ }
149149+ let bytes = bytes.freeze();
150150+151151+ assert_eq!(&*bytes, b"Hello, world!");
152152+ }
153153+}
+2-2
attic/src/lib.rs
···2121pub mod chunking;
2222pub mod error;
2323pub mod hash;
2424+#[cfg(feature = "io")]
2525+pub mod io;
2426pub mod mime;
2527pub mod nix_store;
2628pub mod signing;
2727-#[cfg(feature = "stream")]
2828-pub mod stream;
2929#[cfg(target_family = "unix")]
3030pub mod testing;
3131#[cfg(feature = "tokio")]
-258
attic/src/stream.rs
···11-//! Stream utilities.
22-33-use std::collections::VecDeque;
44-use std::future::Future;
55-use std::marker::Unpin;
66-use std::pin::Pin;
77-use std::sync::Arc;
88-use std::task::{Context, Poll};
99-1010-use async_stream::try_stream;
1111-use bytes::{Bytes, BytesMut};
1212-use digest::{Digest, Output as DigestOutput};
1313-use futures::stream::{BoxStream, Stream, StreamExt};
1414-use tokio::io::{AsyncRead, AsyncReadExt, ReadBuf};
1515-use tokio::sync::OnceCell;
1616-use tokio::task::spawn;
1717-1818-/// Stream filter that hashes the bytes that have been read.
1919-///
2020-/// The hash is finalized when EOF is reached.
2121-pub struct StreamHasher<R: AsyncRead + Unpin, D: Digest + Unpin> {
2222- inner: R,
2323- digest: Option<D>,
2424- bytes_read: usize,
2525- finalized: Arc<OnceCell<(DigestOutput<D>, usize)>>,
2626-}
2727-2828-/// Merge chunks lazily into a continuous stream.
2929-///
3030-/// For each chunk, a function is called to transform it into a
3131-/// `Stream<Item = Result<Bytes>>`. This function does something like
3232-/// opening the local file or sending a request to S3.
3333-///
3434-/// We call this function some time before the start of the chunk
3535-/// is reached to eliminate delays between chunks so the merged
3636-/// stream is smooth. We don't want to start streaming all chunks
3737-/// at once as it's a waste of resources.
3838-///
3939-/// ```text
4040-/// | S3 GET | Chunk | S3 GET | ... | S3 GET | Chunk
4141-/// ```
4242-///
4343-/// ```text
4444-/// | S3 GET | Chunk | Chunk | Chunk | Chunk
4545-/// | S3 GET |-----------^ ^ ^
4646-/// | S3 GET |------| |
4747-/// | S3 GET |--------------|
4848-///
4949-/// ```
5050-///
5151-/// TODO: Support range requests so we can have seekable NARs.
5252-pub fn merge_chunks<C, F, S, Fut, E>(
5353- mut chunks: VecDeque<C>,
5454- streamer: F,
5555- streamer_arg: S,
5656- num_prefetch: usize,
5757-) -> Pin<Box<impl Stream<Item = Result<Bytes, E>>>>
5858-where
5959- F: Fn(C, S) -> Fut,
6060- S: Clone,
6161- Fut: Future<Output = Result<BoxStream<'static, Result<Bytes, E>>, E>> + Send + 'static,
6262- E: Send + 'static,
6363-{
6464- let s = try_stream! {
6565- let mut streams = VecDeque::new(); // a queue of JoinHandles
6666-6767- // otherwise type inference gets confused :/
6868- if false {
6969- let chunk = chunks.pop_front().unwrap();
7070- let stream = spawn(streamer(chunk, streamer_arg.clone()));
7171- streams.push_back(stream);
7272- }
7373-7474- loop {
7575- if let Some(stream) = streams.pop_front() {
7676- let mut stream = stream.await.unwrap()?;
7777- while let Some(item) = stream.next().await {
7878- let item = item?;
7979- yield item;
8080- }
8181- }
8282-8383- while streams.len() < num_prefetch {
8484- if let Some(chunk) = chunks.pop_front() {
8585- let stream = spawn(streamer(chunk, streamer_arg.clone()));
8686- streams.push_back(stream);
8787- } else {
8888- break;
8989- }
9090- }
9191-9292- if chunks.is_empty() && streams.is_empty() {
9393- // we are done!
9494- break;
9595- }
9696- }
9797- };
9898- Box::pin(s)
9999-}
100100-101101-impl<R: AsyncRead + Unpin, D: Digest + Unpin> StreamHasher<R, D> {
102102- pub fn new(inner: R, digest: D) -> (Self, Arc<OnceCell<(DigestOutput<D>, usize)>>) {
103103- let finalized = Arc::new(OnceCell::new());
104104-105105- (
106106- Self {
107107- inner,
108108- digest: Some(digest),
109109- bytes_read: 0,
110110- finalized: finalized.clone(),
111111- },
112112- finalized,
113113- )
114114- }
115115-}
116116-117117-impl<R: AsyncRead + Unpin, D: Digest + Unpin> AsyncRead for StreamHasher<R, D> {
118118- fn poll_read(
119119- mut self: Pin<&mut Self>,
120120- cx: &mut Context<'_>,
121121- buf: &mut ReadBuf<'_>,
122122- ) -> Poll<tokio::io::Result<()>> {
123123- let old_filled = buf.filled().len();
124124- let r = Pin::new(&mut self.inner).poll_read(cx, buf);
125125- let read_len = buf.filled().len() - old_filled;
126126-127127- match r {
128128- Poll::Ready(Ok(())) => {
129129- if read_len == 0 {
130130- // EOF
131131- if let Some(digest) = self.digest.take() {
132132- self.finalized
133133- .set((digest.finalize(), self.bytes_read))
134134- .expect("Hash has already been finalized");
135135- }
136136- } else {
137137- // Read something
138138- let digest = self.digest.as_mut().expect("Stream has data after EOF");
139139-140140- let filled = buf.filled();
141141- digest.update(&filled[filled.len() - read_len..]);
142142- self.bytes_read += read_len;
143143- }
144144- }
145145- Poll::Ready(Err(_)) => {
146146- assert!(read_len == 0);
147147- }
148148- Poll::Pending => {}
149149- }
150150-151151- r
152152- }
153153-}
154154-155155-/// Greedily reads from a stream to fill a buffer.
156156-pub async fn read_chunk_async<S: AsyncRead + Unpin + Send>(
157157- stream: &mut S,
158158- mut chunk: BytesMut,
159159-) -> std::io::Result<Bytes> {
160160- while chunk.len() < chunk.capacity() {
161161- let read = stream.read_buf(&mut chunk).await?;
162162-163163- if read == 0 {
164164- break;
165165- }
166166- }
167167-168168- Ok(chunk.freeze())
169169-}
170170-171171-#[cfg(test)]
172172-mod tests {
173173- use super::*;
174174-175175- use async_stream::stream;
176176- use bytes::{BufMut, BytesMut};
177177- use futures::future;
178178- use tokio::io::AsyncReadExt;
179179-180180- #[tokio::test]
181181- async fn test_stream_hasher() {
182182- let expected = b"hello world";
183183- let expected_sha256 =
184184- hex::decode("b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9")
185185- .unwrap();
186186-187187- let (mut read, finalized) = StreamHasher::new(expected.as_slice(), sha2::Sha256::new());
188188- assert!(finalized.get().is_none());
189189-190190- // force multiple reads
191191- let mut buf = vec![0u8; 100];
192192- let mut bytes_read = 0;
193193- bytes_read += read
194194- .read(&mut buf[bytes_read..bytes_read + 5])
195195- .await
196196- .unwrap();
197197- bytes_read += read
198198- .read(&mut buf[bytes_read..bytes_read + 5])
199199- .await
200200- .unwrap();
201201- bytes_read += read
202202- .read(&mut buf[bytes_read..bytes_read + 5])
203203- .await
204204- .unwrap();
205205- bytes_read += read
206206- .read(&mut buf[bytes_read..bytes_read + 5])
207207- .await
208208- .unwrap();
209209-210210- assert_eq!(expected.len(), bytes_read);
211211- assert_eq!(expected, &buf[..bytes_read]);
212212-213213- let (hash, count) = finalized.get().expect("Hash wasn't finalized");
214214-215215- assert_eq!(expected_sha256.as_slice(), hash.as_slice());
216216- assert_eq!(expected.len(), *count);
217217- eprintln!("finalized = {:x?}", finalized);
218218- }
219219-220220- #[tokio::test]
221221- async fn test_merge_chunks() {
222222- let chunk_a: BoxStream<Result<Bytes, ()>> = {
223223- let s = stream! {
224224- yield Ok(Bytes::from_static(b"Hello"));
225225- };
226226- Box::pin(s)
227227- };
228228-229229- let chunk_b: BoxStream<Result<Bytes, ()>> = {
230230- let s = stream! {
231231- yield Ok(Bytes::from_static(b", "));
232232- yield Ok(Bytes::from_static(b"world"));
233233- };
234234- Box::pin(s)
235235- };
236236-237237- let chunk_c: BoxStream<Result<Bytes, ()>> = {
238238- let s = stream! {
239239- yield Ok(Bytes::from_static(b"!"));
240240- };
241241- Box::pin(s)
242242- };
243243-244244- let chunks: VecDeque<BoxStream<'static, Result<Bytes, ()>>> =
245245- [chunk_a, chunk_b, chunk_c].into_iter().collect();
246246-247247- let streamer = |c, _| future::ok(c);
248248- let mut merged = merge_chunks(chunks, streamer, (), 2);
249249-250250- let mut bytes = BytesMut::with_capacity(100);
251251- while let Some(item) = merged.next().await {
252252- bytes.put(item.unwrap());
253253- }
254254- let bytes = bytes.freeze();
255255-256256- assert_eq!(&*bytes, b"Hello, world!");
257257- }
258258-}
+2-2
client/src/api/mod.rs
···3344use anyhow::Result;
55use bytes::Bytes;
66-use const_format::concatcp;
66+use const_format::formatcp;
77use displaydoc::Display;
88use futures::{
99 future,
···27272828/// The User-Agent string of Attic.
2929const ATTIC_USER_AGENT: &str =
3030- concatcp!("Attic/{} ({})", env!("CARGO_PKG_NAME"), ATTIC_DISTRIBUTOR);
3030+ formatcp!("Attic/{} ({})", env!("CARGO_PKG_NAME"), ATTIC_DISTRIBUTOR);
31313232/// The size threshold to send the upload info as part of the PUT body.
3333const NAR_INFO_PREAMBLE_THRESHOLD: usize = 4 * 1024; // 4 KiB
···1414};
1515use bytes::{Bytes, BytesMut};
1616use chrono::Utc;
1717-use digest::Output as DigestOutput;
1817use futures::future::join_all;
1918use futures::StreamExt;
2019use sea_orm::entity::prelude::*;
···2221use sea_orm::ActiveValue::Set;
2322use sea_orm::{QuerySelect, TransactionTrait};
2423use sha2::{Digest, Sha256};
2525-use tokio::io::{AsyncBufRead, AsyncRead, AsyncReadExt, BufReader};
2626-use tokio::sync::{OnceCell, Semaphore};
2424+use tokio::io::{AsyncBufRead, AsyncReadExt};
2525+use tokio::sync::Semaphore;
2726use tokio::task::spawn;
2827use tokio_util::io::StreamReader;
2928use tracing::instrument;
3029use uuid::Uuid;
31303131+use crate::compression::{CompressionStream, CompressorFn};
3232use crate::config::CompressionType;
3333use crate::error::{ErrorKind, ServerError, ServerResult};
3434use crate::narinfo::Compression;
···3939};
4040use attic::chunking::chunk_stream;
4141use attic::hash::Hash;
4242-use attic::stream::{read_chunk_async, StreamHasher};
4242+use attic::io::{read_chunk_async, HashReader};
4343use attic::util::Finally;
44444545use crate::database::entity::cache;
···5555/// TODO: Make this configurable
5656const CONCURRENT_CHUNK_UPLOADS: usize = 10;
57575858-type CompressorFn<C> = Box<dyn FnOnce(C) -> Box<dyn AsyncRead + Unpin + Send> + Send>;
5959-6058/// Data of a chunk.
6159enum ChunkData {
6260 /// Some bytes in memory.
6361 Bytes(Bytes),
64626563 /// A stream with a user-claimed hash and size that are potentially incorrect.
6666- Stream(Box<dyn AsyncRead + Send + Unpin + 'static>, Hash, usize),
6464+ Stream(Box<dyn AsyncBufRead + Send + Unpin + 'static>, Hash, usize),
6765}
68666967/// Result of a chunk upload.
···7270 deduplicated: bool,
7371}
74727575-/// Applies compression to a stream, computing hashes along the way.
7676-///
7777-/// Our strategy is to stream directly onto a UUID-keyed file on the
7878-/// storage backend, performing compression and computing the hashes
7979-/// along the way. We delete the file if the hashes do not match.
8080-///
8181-/// ```text
8282-/// ┌───────────────────────────────────►NAR Hash
8383-/// │
8484-/// │
8585-/// ├───────────────────────────────────►NAR Size
8686-/// │
8787-/// ┌─────┴────┐ ┌──────────┐ ┌───────────┐
8888-/// NAR Stream──►│NAR Hasher├─►│Compressor├─►│File Hasher├─►File Stream
8989-/// └──────────┘ └──────────┘ └─────┬─────┘
9090-/// │
9191-/// ├───────►File Hash
9292-/// │
9393-/// │
9494-/// └───────►File Size
9595-/// ```
9696-struct CompressionStream {
9797- stream: Box<dyn AsyncRead + Unpin + Send>,
9898- nar_compute: Arc<OnceCell<(DigestOutput<Sha256>, usize)>>,
9999- file_compute: Arc<OnceCell<(DigestOutput<Sha256>, usize)>>,
100100-}
101101-10273trait UploadPathNarInfoExt {
10374 fn to_active_model(&self) -> object::ActiveModel;
10475}
···180151 let username = req_state.auth.username().map(str::to_string);
181152182153 // Try to acquire a lock on an existing NAR
183183- let existing_nar = database.find_and_lock_nar(&upload_info.nar_hash).await?;
184184- match existing_nar {
185185- Some(existing_nar) => {
186186- // Deduplicate?
187187- let missing_chunk = ChunkRef::find()
188188- .filter(chunkref::Column::NarId.eq(existing_nar.id))
189189- .filter(chunkref::Column::ChunkId.is_null())
190190- .limit(1)
191191- .one(database)
192192- .await
193193- .map_err(ServerError::database_error)?;
154154+ if let Some(existing_nar) = database.find_and_lock_nar(&upload_info.nar_hash).await? {
155155+ // Deduplicate?
156156+ let missing_chunk = ChunkRef::find()
157157+ .filter(chunkref::Column::NarId.eq(existing_nar.id))
158158+ .filter(chunkref::Column::ChunkId.is_null())
159159+ .limit(1)
160160+ .one(database)
161161+ .await
162162+ .map_err(ServerError::database_error)?;
194163195195- if missing_chunk.is_some() {
196196- // Need to repair
197197- upload_path_new(username, cache, upload_info, stream, database, &state).await
198198- } else {
199199- // Can actually be deduplicated
200200- upload_path_dedup(
201201- username,
202202- cache,
203203- upload_info,
204204- stream,
205205- database,
206206- &state,
207207- existing_nar,
208208- )
209209- .await
210210- }
211211- }
212212- None => {
213213- // New NAR
214214- upload_path_new(username, cache, upload_info, stream, database, &state).await
164164+ if missing_chunk.is_none() {
165165+ // Can actually be deduplicated
166166+ return upload_path_dedup(
167167+ username,
168168+ cache,
169169+ upload_info,
170170+ stream,
171171+ database,
172172+ &state,
173173+ existing_nar,
174174+ )
175175+ .await;
215176 }
216177 }
178178+179179+ // New NAR or need to repair
180180+ upload_path_new(username, cache, upload_info, stream, database, &state).await
217181}
218182219183/// Uploads a path when there is already a matching NAR in the global cache.
···221185 username: Option<String>,
222186 cache: cache::Model,
223187 upload_info: UploadPathNarInfo,
224224- stream: impl AsyncRead + Unpin,
188188+ stream: impl AsyncBufRead + Unpin,
225189 database: &DatabaseConnection,
226190 state: &State,
227191 existing_nar: NarGuard,
228192) -> ServerResult<Json<UploadPathResult>> {
229193 if state.config.require_proof_of_possession {
230230- let (mut stream, nar_compute) = StreamHasher::new(stream, Sha256::new());
194194+ let (mut stream, nar_compute) = HashReader::new(stream, Sha256::new());
231195 tokio::io::copy(&mut stream, &mut tokio::io::sink())
232196 .await
233197 .map_err(ServerError::request_error)?;
···301265 username: Option<String>,
302266 cache: cache::Model,
303267 upload_info: UploadPathNarInfo,
304304- stream: impl AsyncRead + Send + Unpin + 'static,
268268+ stream: impl AsyncBufRead + Send + Unpin + 'static,
305269 database: &DatabaseConnection,
306270 state: &State,
307271) -> ServerResult<Json<UploadPathResult>> {
···319283 username: Option<String>,
320284 cache: cache::Model,
321285 upload_info: UploadPathNarInfo,
322322- stream: impl AsyncRead + Send + Unpin + 'static,
286286+ stream: impl AsyncBufRead + Send + Unpin + 'static,
323287 database: &DatabaseConnection,
324288 state: &State,
325289) -> ServerResult<Json<UploadPathResult>> {
···371335 });
372336373337 let stream = stream.take(upload_info.nar_size as u64);
374374- let (stream, nar_compute) = StreamHasher::new(stream, Sha256::new());
338338+ let (stream, nar_compute) = HashReader::new(stream, Sha256::new());
375339 let mut chunks = chunk_stream(
376340 stream,
377341 chunking_config.min_size,
···510474 username: Option<String>,
511475 cache: cache::Model,
512476 upload_info: UploadPathNarInfo,
513513- stream: impl AsyncRead + Send + Unpin + 'static,
477477+ stream: impl AsyncBufRead + Send + Unpin + 'static,
514478 database: &DatabaseConnection,
515479 state: &State,
516480) -> ServerResult<Json<UploadPathResult>> {
···623587 {
624588 // There's an existing chunk matching the hash
625589 if require_proof_of_possession && !data.is_hash_trusted() {
626626- let stream = data.into_async_read();
590590+ let stream = data.into_async_buf_read();
627591628628- let (mut stream, nar_compute) = StreamHasher::new(stream, Sha256::new());
592592+ let (mut stream, nar_compute) = HashReader::new(stream, Sha256::new());
629593 tokio::io::copy(&mut stream, &mut tokio::io::sink())
630594 .await
631595 .map_err(ServerError::request_error)?;
···705669706670 // Compress and stream to the storage backend
707671 let compressor = get_compressor_fn(compression_type, compression_level);
708708- let mut stream = CompressionStream::new(data.into_async_read(), compressor);
672672+ let mut stream = CompressionStream::new(data.into_async_buf_read(), compressor);
709673710674 backend
711675 .upload_file(key, stream.stream())
···809773 matches!(self, ChunkData::Bytes(_))
810774 }
811775812812- /// Turns the data into a stream.
813813- fn into_async_read(self) -> Box<dyn AsyncRead + Unpin + Send> {
776776+ /// Turns the data into an AsyncBufRead.
777777+ fn into_async_buf_read(self) -> Box<dyn AsyncBufRead + Unpin + Send> {
814778 match self {
815779 Self::Bytes(bytes) => Box::new(Cursor::new(bytes)),
816780 Self::Stream(stream, _, _) => stream,
817781 }
818818- }
819819-}
820820-821821-impl CompressionStream {
822822- /// Creates a new compression stream.
823823- fn new<R>(stream: R, compressor: CompressorFn<BufReader<StreamHasher<R, Sha256>>>) -> Self
824824- where
825825- R: AsyncRead + Unpin + Send + 'static,
826826- {
827827- // compute NAR hash and size
828828- let (stream, nar_compute) = StreamHasher::new(stream, Sha256::new());
829829-830830- // compress NAR
831831- let stream = compressor(BufReader::new(stream));
832832-833833- // compute file hash and size
834834- let (stream, file_compute) = StreamHasher::new(stream, Sha256::new());
835835-836836- Self {
837837- stream: Box::new(stream),
838838- nar_compute,
839839- file_compute,
840840- }
841841- }
842842-843843- /*
844844- /// Creates a compression stream without compute the uncompressed hash/size.
845845- ///
846846- /// This is useful if you already know the hash. `nar_hash_and_size` will
847847- /// always return `None`.
848848- fn new_without_nar_hash<R>(stream: R, compressor: CompressorFn<BufReader<R>>) -> Self
849849- where
850850- R: AsyncRead + Unpin + Send + 'static,
851851- {
852852- // compress NAR
853853- let stream = compressor(BufReader::new(stream));
854854-855855- // compute file hash and size
856856- let (stream, file_compute) = StreamHasher::new(stream, Sha256::new());
857857-858858- Self {
859859- stream: Box::new(stream),
860860- nar_compute: Arc::new(OnceCell::new()),
861861- file_compute,
862862- }
863863- }
864864- */
865865-866866- /// Returns the stream of the compressed object.
867867- fn stream(&mut self) -> &mut (impl AsyncRead + Unpin) {
868868- &mut self.stream
869869- }
870870-871871- /// Returns the NAR hash and size.
872872- ///
873873- /// The hash is only finalized when the stream is fully read.
874874- /// Otherwise, returns `None`.
875875- fn nar_hash_and_size(&self) -> Option<&(DigestOutput<Sha256>, usize)> {
876876- self.nar_compute.get()
877877- }
878878-879879- /// Returns the file hash and size.
880880- ///
881881- /// The hash is only finalized when the stream is fully read.
882882- /// Otherwise, returns `None`.
883883- fn file_hash_and_size(&self) -> Option<&(DigestOutput<Sha256>, usize)> {
884884- self.file_compute.get()
885782 }
886783}
887784
+81
server/src/compression.rs
···11+use std::sync::Arc;
22+33+use digest::Output as DigestOutput;
44+use sha2::{Digest, Sha256};
55+use tokio::io::{AsyncBufRead, AsyncRead};
66+use tokio::sync::OnceCell;
77+88+use attic::io::HashReader;
99+1010+pub type CompressorFn<C> = Box<dyn FnOnce(C) -> Box<dyn AsyncRead + Unpin + Send> + Send>;
1111+1212+/// Applies compression to a stream, computing hashes along the way.
1313+///
1414+/// Our strategy is to stream directly onto a UUID-keyed file on the
1515+/// storage backend, performing compression and computing the hashes
1616+/// along the way. We delete the file if the hashes do not match.
1717+///
1818+/// ```text
1919+/// ┌───────────────────────────────────►NAR Hash
2020+/// │
2121+/// │
2222+/// ├───────────────────────────────────►NAR Size
2323+/// │
2424+/// ┌─────┴────┐ ┌──────────┐ ┌───────────┐
2525+/// NAR Stream──►│NAR Hasher├─►│Compressor├─►│File Hasher├─►File Stream
2626+/// └──────────┘ └──────────┘ └─────┬─────┘
2727+/// │
2828+/// ├───────►File Hash
2929+/// │
3030+/// │
3131+/// └───────►File Size
3232+/// ```
3333+pub struct CompressionStream {
3434+ stream: Box<dyn AsyncRead + Unpin + Send>,
3535+ nar_compute: Arc<OnceCell<(DigestOutput<Sha256>, usize)>>,
3636+ file_compute: Arc<OnceCell<(DigestOutput<Sha256>, usize)>>,
3737+}
3838+3939+impl CompressionStream {
4040+ /// Creates a new compression stream.
4141+ pub fn new<R>(stream: R, compressor: CompressorFn<HashReader<R, Sha256>>) -> Self
4242+ where
4343+ R: AsyncBufRead + Unpin + Send + 'static,
4444+ {
4545+ // compute NAR hash and size
4646+ let (stream, nar_compute) = HashReader::new(stream, Sha256::new());
4747+4848+ // compress NAR
4949+ let stream = compressor(stream);
5050+5151+ // compute file hash and size
5252+ let (stream, file_compute) = HashReader::new(stream, Sha256::new());
5353+5454+ Self {
5555+ stream: Box::new(stream),
5656+ nar_compute,
5757+ file_compute,
5858+ }
5959+ }
6060+6161+ /// Returns the stream of the compressed object.
6262+ pub fn stream(&mut self) -> &mut (impl AsyncRead + Unpin) {
6363+ &mut self.stream
6464+ }
6565+6666+ /// Returns the NAR hash and size.
6767+ ///
6868+ /// The hash is only finalized when the stream is fully read.
6969+ /// Otherwise, returns `None`.
7070+ pub fn nar_hash_and_size(&self) -> Option<&(DigestOutput<Sha256>, usize)> {
7171+ self.nar_compute.get()
7272+ }
7373+7474+ /// Returns the file hash and size.
7575+ ///
7676+ /// The hash is only finalized when the stream is fully read.
7777+ /// Otherwise, returns `None`.
7878+ pub fn file_hash_and_size(&self) -> Option<&(DigestOutput<Sha256>, usize)> {
7979+ self.file_compute.get()
8080+ }
8181+}
+1
server/src/lib.rs
···15151616pub mod access;
1717mod api;
1818+mod compression;
1819pub mod config;
1920pub mod database;
2021pub mod error;
+1-1
server/src/storage/s3.rs
···19192020use super::{Download, RemoteFile, StorageBackend};
2121use crate::error::{ErrorKind, ServerError, ServerResult};
2222-use attic::stream::read_chunk_async;
2222+use attic::io::read_chunk_async;
2323use attic::util::Finally;
24242525/// The chunk size for each part in a multipart upload.