···1313 compressors: Vec<Box<dyn Compressor>>,
1414}
15151616+/// Which method intermediate (threaded) stages should invoke. The final stage
1717+/// always runs on the calling thread and is handled by a caller-supplied
1818+/// closure — only the intermediate layers need this dispatch.
1919+#[derive(Clone, Copy)]
2020+enum StageAction {
2121+ Compress,
2222+ Extract,
2323+}
2424+1625impl Pipeline {
1726 /// Create a new Pipeline with the given compressors
1827 pub fn new(compressors: Vec<Box<dyn Compressor>>) -> Self {
···2635 .map(|c| c.extension())
2736 .collect::<Vec<&str>>()
2837 .join(".")
3838+ }
3939+4040+ /// Run an ordered chain of compressor stages, with each non-final stage
4141+ /// in its own thread linked by an in-memory pipe. The final (last) stage
4242+ /// runs on the calling thread via `finalize`. Intermediate stages all
4343+ /// invoke the same method — `compress` going outward through a
4444+ /// compression pipeline, `extract` unwrapping layers on the way in.
4545+ fn run_threaded<F>(
4646+ stages: Vec<Box<dyn Compressor>>,
4747+ initial_input: CmprssInput,
4848+ intermediate: StageAction,
4949+ finalize: F,
5050+ ) -> Result
5151+ where
5252+ F: FnOnce(Box<dyn Compressor>, CmprssInput) -> Result,
5353+ {
5454+ debug_assert!(!stages.is_empty(), "pipeline is never empty");
5555+ let mut stages = stages;
5656+ let last = stages.pop().expect("pipeline is never empty");
5757+ let buffer_size = 64 * 1024;
5858+ let mut current_input = initial_input;
5959+ let mut handles = Vec::new();
6060+6161+ for stage in stages {
6262+ let (sender, receiver) = channel::<Vec<u8>>();
6363+ let stage_output =
6464+ CmprssOutput::Writer(WriteWrapper(Box::new(PipeWriter::new(sender, buffer_size))));
6565+ let next_input = CmprssInput::Reader(ReadWrapper(Box::new(PipeReader::new(receiver))));
6666+ let stage_input = std::mem::replace(&mut current_input, next_input);
6767+6868+ let handle = thread::spawn(move || match intermediate {
6969+ StageAction::Compress => stage.compress(stage_input, stage_output),
7070+ StageAction::Extract => stage.extract(stage_input, stage_output),
7171+ });
7272+ handles.push(handle);
7373+ }
7474+7575+ finalize(last, current_input)?;
7676+7777+ for handle in handles {
7878+ handle
7979+ .join()
8080+ .map_err(|_| anyhow!("Pipeline stage thread panicked"))??;
8181+ }
8282+ Ok(())
2983 }
3084}
3185···201255202256 fn compress(&self, input: CmprssInput, output: CmprssOutput) -> Result {
203257 debug_assert!(!self.compressors.is_empty(), "pipeline is never empty");
204204-205258 if self.compressors.len() == 1 {
206259 return self.compressors[0].compress(input, output);
207260 }
208208-209209- let mut op_compressors: Vec<Box<dyn Compressor>> =
210210- self.compressors.iter().map(|c| c.clone_boxed()).collect();
211211-212212- let mut handles = Vec::new();
213213- let mut current_thread_input = input; // Consumed by the first (innermost) compressor
214214- let buffer_size = 64 * 1024;
215215-216216- // Process all but the last (outermost) compressor in separate threads
217217- for _ in 0..op_compressors.len() - 1 {
218218- let compressor_for_this_stage = op_compressors.remove(0);
219219- let (sender, receiver) = channel::<Vec<u8>>();
220220- let pipe_writer = PipeWriter::new(sender, buffer_size);
221221- let input_for_next_stage =
222222- CmprssInput::Reader(ReadWrapper(Box::new(PipeReader::new(receiver))));
223223-224224- let actual_input_for_thread = current_thread_input; // Move current input to thread
225225- current_thread_input = input_for_next_stage; // Set up input for the *next* stage or final compressor
226226-227227- let handle = thread::spawn(move || {
228228- compressor_for_this_stage.compress(
229229- actual_input_for_thread,
230230- CmprssOutput::Writer(WriteWrapper(Box::new(pipe_writer))),
231231- )
232232- });
233233- handles.push(handle);
234234- }
235235-236236- // The last (outermost) compressor runs in the current thread and writes to the final output
237237- let last_compressor = op_compressors.remove(0);
238238- // current_thread_input here is the Reader from the penultimate stage
239239- last_compressor.compress(current_thread_input, output)?;
240240-241241- for handle in handles {
242242- handle
243243- .join()
244244- .map_err(|_| anyhow!("Compression thread panicked"))??;
245245- }
246246- Ok(())
261261+ // Innermost → outermost: the outermost compressor runs on the main
262262+ // thread and writes to the user-supplied output.
263263+ let stages = self.compressors.iter().map(|c| c.clone_boxed()).collect();
264264+ Self::run_threaded(stages, input, StageAction::Compress, |last, input| {
265265+ last.compress(input, output)
266266+ })
247267 }
248268249269 fn extract(&self, input: CmprssInput, output: CmprssOutput) -> Result {
250270 debug_assert!(!self.compressors.is_empty(), "pipeline is never empty");
251251-252271 if self.compressors.len() == 1 {
253272 return self.compressors[0].extract(input, output);
254273 }
255255-256256- let mut op_extractors: Vec<Box<dyn Compressor>> = self
274274+ // Outermost → innermost: the innermost extractor (typically the
275275+ // container format like tar/zip) runs on the main thread so it can
276276+ // unpack into the user-supplied output.
277277+ let stages = self
257278 .compressors
258279 .iter()
259280 .rev()
260281 .map(|c| c.clone_boxed())
261282 .collect();
262262-263263- let mut handles = Vec::new();
264264- let mut current_thread_input = input; // Consumed by the first (outermost) extractor
265265- let buffer_size = 64 * 1024;
266266-267267- // Process all but the last (innermost) extractor in separate threads.
268268- for _ in 0..op_extractors.len() - 1 {
269269- let extractor_for_this_stage = op_extractors.remove(0);
270270- let (sender, receiver) = channel::<Vec<u8>>();
271271- let pipe_writer = PipeWriter::new(sender, buffer_size);
272272- let intermediate_output_for_thread =
273273- CmprssOutput::Writer(WriteWrapper(Box::new(pipe_writer)));
274274- let input_for_next_stage =
275275- CmprssInput::Reader(ReadWrapper(Box::new(PipeReader::new(receiver))));
276276-277277- let actual_input_for_thread = current_thread_input; // Move current input to thread
278278- current_thread_input = input_for_next_stage; // Set up input for the *next* stage or final extractor
279279-280280- let handle = thread::spawn(move || {
281281- extractor_for_this_stage
282282- .extract(actual_input_for_thread, intermediate_output_for_thread)
283283- });
284284- handles.push(handle);
285285- }
286286-287287- // The last (innermost) extractor runs in the current thread and writes to the final output
288288- let last_extractor = op_extractors.remove(0);
289289- // current_thread_input here is the Reader from the penultimate stage
290290-291291- let final_output = match output {
292292- CmprssOutput::Path(ref p) => {
293293- if last_extractor.default_extracted_target() == ExtractedTarget::Directory
294294- && !p.exists()
295295- {
296296- std::fs::create_dir_all(p)?;
283283+ Self::run_threaded(stages, input, StageAction::Extract, |last, input| {
284284+ let final_output = match output {
285285+ CmprssOutput::Path(ref p) => {
286286+ // If the innermost extractor wants a directory and the
287287+ // user's output path doesn't exist yet, create it so
288288+ // e.g. tar::unpack has somewhere to write.
289289+ if last.default_extracted_target() == ExtractedTarget::Directory && !p.exists()
290290+ {
291291+ std::fs::create_dir_all(p)?;
292292+ }
293293+ CmprssOutput::Path(p.clone())
297294 }
298298- // If it's a directory, the tar extractor (usually innermost) will handle it.
299299- // The path provided should be the target directory.
300300- // Always pass the path; the backend decides how to use it.
301301- CmprssOutput::Path(p.clone())
302302- }
303303- CmprssOutput::Pipe(_) => output,
304304- CmprssOutput::Writer(_) => output,
305305- };
306306-307307- last_extractor.extract(current_thread_input, final_output)?;
308308-309309- for handle in handles {
310310- handle
311311- .join()
312312- .map_err(|_| anyhow!("Extraction thread panicked"))??;
313313- }
314314- Ok(())
295295+ CmprssOutput::Pipe(_) | CmprssOutput::Writer(_) => output,
296296+ };
297297+ last.extract(input, final_output)
298298+ })
315299 }
316300317301 fn list(&self, input: CmprssInput) -> Result {
318302 debug_assert!(!self.compressors.is_empty(), "pipeline is never empty");
319319-320303 if self.compressors.len() == 1 {
321304 return self.compressors[0].list(input);
322305 }
323323-324306 // Same plumbing as `extract`, except the innermost compressor lists
325325- // its entries to stdout instead of unpacking to an output path. Outer
326326- // layers still need to decompress into an in-memory pipe so that the
327327- // innermost container format sees plain archive bytes.
328328- let mut op_extractors: Vec<Box<dyn Compressor>> = self
307307+ // its entries to stdout instead of unpacking. Outer layers still
308308+ // decompress into the in-memory pipe so the innermost container sees
309309+ // plain archive bytes.
310310+ let stages = self
329311 .compressors
330312 .iter()
331313 .rev()
332314 .map(|c| c.clone_boxed())
333315 .collect();
334334-335335- let mut handles = Vec::new();
336336- let mut current_thread_input = input;
337337- let buffer_size = 64 * 1024;
338338-339339- for _ in 0..op_extractors.len() - 1 {
340340- let extractor = op_extractors.remove(0);
341341- let (sender, receiver) = channel::<Vec<u8>>();
342342- let pipe_writer = PipeWriter::new(sender, buffer_size);
343343- let stage_output = CmprssOutput::Writer(WriteWrapper(Box::new(pipe_writer)));
344344- let next_stage_input =
345345- CmprssInput::Reader(ReadWrapper(Box::new(PipeReader::new(receiver))));
346346-347347- let stage_input = current_thread_input;
348348- current_thread_input = next_stage_input;
349349-350350- let handle = thread::spawn(move || extractor.extract(stage_input, stage_output));
351351- handles.push(handle);
352352- }
353353-354354- let innermost = op_extractors.remove(0);
355355- innermost.list(current_thread_input)?;
356356-357357- for handle in handles {
358358- handle
359359- .join()
360360- .map_err(|_| anyhow!("Extraction thread panicked"))??;
361361- }
362362- Ok(())
316316+ Self::run_threaded(stages, input, StageAction::Extract, |innermost, input| {
317317+ innermost.list(input)
318318+ })
363319 }
364320}
365321