···1313var rootCmd = &cobra.Command{
1414 Use: "chunk",
1515 Short: "Download tool for slow and unstable servers",
1616- Long: `The idea of the project emerged as it was difficult for Minha Receita to handle the download of 37 files that adds up to just approx. 5Gb. Most of the download solutions out there (e.g. got) seem to be prepared for downloading large files, not for downloading from slow and unstable servers — which is the case at hand.`,
1616+ Long: "Download tool for slow and unstable servers using HTTP range requests, retries per HTTP request (not by file), prevents re-downloading the same content range and supports wait time to give servers time to recover.",
1717 Run: func(cmd *cobra.Command, args []string) {
1818 chunk := chunk.DefaultDownloader()
1919- chunk.TimeoutPerChunk = timeoutChunk
2020- chunk.MaxParallelDownloadsPerServer = concurrencyPerServer
2121- chunk.MaxRetriesPerChunk = maxRetriesChunk
2222- chunk.WaitBetweenRetries = waitBetweenRetries
1919+ chunk.Timeout = timeoutChunk
2020+ chunk.ConcurrencyPerServer = concurrencyPerServer
2121+ chunk.MaxRetries = maxRetriesChunk
2222+ chunk.WaitRetry = waitBetweenRetries
2323 chunk.ChunkSize = chunkSize
2424 prog := newProgress()
2525 for status := range chunk.Download(os.Args[1:len(os.Args)]...) {
···4242)
43434444func init() {
4545- rootCmd.Flags().DurationVarP(&timeoutChunk, "timeout", "t", chunk.DefaultTimeoutPerChunk, "timeout for the download of each chunk from each URL.")
4646- rootCmd.Flags().UintVarP(&maxRetriesChunk, "max-retries", "r", chunk.DefaultMaxRetriesPerChunk, "maximum number of retries for each chunk.")
4747- rootCmd.Flags().DurationVarP(&waitBetweenRetries, "wait-between-retries", "w", chunk.DefaultWaitBetweenRetries, "pause before retrying an HTTP request that has failed.")
4545+ rootCmd.Flags().DurationVarP(&timeoutChunk, "timeout", "t", chunk.DefaultTimeout, "timeout for the download of each chunk from each URL.")
4646+ rootCmd.Flags().UintVarP(&maxRetriesChunk, "max-retries", "r", chunk.DefaultMaxRetries, "maximum number of retries for each chunk.")
4747+ rootCmd.Flags().DurationVarP(&waitBetweenRetries, "wait-retry", "w", chunk.DefaultWaitRetry, "pause before retrying an HTTP request that has failed.")
4848 rootCmd.Flags().Int64VarP(&chunkSize, "chunk-size", "s", chunk.DefaultChunkSize, "maximum size of each HTTP request done using the content range header.")
4949- rootCmd.Flags().IntVarP(&concurrencyPerServer, "concurrency-per-server", "c", chunk.DefaultMaxParallelDownloadsPerServer, "controls the max number of concurrent connections opened to the same server.")
4949+ rootCmd.Flags().IntVarP(&concurrencyPerServer, "concurrency-per-server", "c", chunk.DefaultConcurrencyPerServer, "controls the max number of concurrent connections opened to the same server.")
5050}
51515252func main() {
+21-21
downloader.go
···1515)
16161717const (
1818- DefaultTimeoutPerChunk = 90 * time.Second
1919- DefaultMaxParallelDownloadsPerServer = 8
2020- DefaultMaxRetriesPerChunk = 5
2121- DefaultChunkSize = 8192
2222- DefaultWaitBetweenRetries = 1 * time.Second
1818+ DefaultTimeout = 90 * time.Second
1919+ DefaultConcurrencyPerServer = 8
2020+ DefaultMaxRetries = 5
2121+ DefaultChunkSize = 8192
2222+ DefaultWaitRetry = 1 * time.Second
2323)
24242525// DownloadStatus is the data propagated via the channel sent back to the user
···6262 // URL. A chunk is a part of a file requested using the content range HTTP
6363 // header. Thus, this timeout is not the timeout for the each file or for
6464 // the the download of every file).
6565- TimeoutPerChunk time.Duration
6565+ Timeout time.Duration
66666767 // MaxParallelDownloadsPerServer controls the max number of concurrent
6868 // connections opened to the same server. If all the URLs are from the same
6969 // server this is the total of concurrent connections. If the user is downloading
7070 // files from different servers, this limit is applied to each server idependently.
7171- MaxParallelDownloadsPerServer int
7171+ ConcurrencyPerServer int
72727373 // MaxRetriesPerChunk is the maximum amount of retries for each HTTP request
7474 // using the content range header that fails.
7575- MaxRetriesPerChunk uint
7575+ MaxRetries uint
76767777 // ChunkSize is the maximum size of each HTTP request done using the
7878 // content range header. There is no way to specify how many chunks a
···82828383 // WaitBetweenRetries is an optional pause before retrying an HTTP request
8484 // that has failed.
8585- WaitBetweenRetries time.Duration
8585+ WaitRetry time.Duration
8686}
87878888type chunk struct {
···116116}
117117118118func (d *Downloader) downloadChunkWithTimeout(userCtx context.Context, u string, c chunk) ([]byte, error) {
119119- ctx, cancel := context.WithTimeout(userCtx, d.TimeoutPerChunk) // need to propagate context, which might contain app-specific data.
119119+ ctx, cancel := context.WithTimeout(userCtx, d.Timeout) // need to propagate context, which might contain app-specific data.
120120 defer cancel()
121121 ch := make(chan []byte)
122122 errs := make(chan error)
···160160 ch <- resp
161161 return nil
162162 },
163163- retry.Attempts(d.MaxRetriesPerChunk),
164164- retry.MaxDelay(d.WaitBetweenRetries),
163163+ retry.Attempts(d.MaxRetries),
164164+ retry.MaxDelay(d.WaitRetry),
165165 )
166166 if err != nil {
167167 return 0, fmt.Errorf("error sending get http request to %s: %w", u, err)
···193193 ch <- b
194194 return nil
195195 },
196196- retry.Attempts(d.MaxRetriesPerChunk),
197197- retry.MaxDelay(d.WaitBetweenRetries),
196196+ retry.Attempts(d.MaxRetries),
197197+ retry.MaxDelay(d.WaitRetry),
198198 )
199199 if err != nil {
200200 return nil, fmt.Errorf("error downloading %s: %w", u, err)
···274274// context can be used to stop all downloads in progress.
275275func (d *Downloader) DownloadWithContext(ctx context.Context, urls ...string) <-chan DownloadStatus {
276276 if d.client == nil {
277277- d.client = newClient(d.MaxParallelDownloadsPerServer, d.TimeoutPerChunk)
277277+ d.client = newClient(d.ConcurrencyPerServer, d.Timeout)
278278 }
279279 ch := make(chan DownloadStatus, 2*len(urls)) // the first status will be the total file size (and or an error creating/trucating the file).
280280 var wg sync.WaitGroup // this wait group is used to wait for all chunks (from all downloads) to finish.
···303303// the constants in this package for their values.
304304func DefaultDownloader() *Downloader {
305305 return &Downloader{
306306- TimeoutPerChunk: DefaultTimeoutPerChunk,
307307- MaxParallelDownloadsPerServer: DefaultMaxParallelDownloadsPerServer,
308308- MaxRetriesPerChunk: DefaultMaxRetriesPerChunk,
309309- ChunkSize: DefaultChunkSize,
310310- WaitBetweenRetries: DefaultWaitBetweenRetries,
311311- client: newClient(DefaultMaxRetriesPerChunk, DefaultTimeoutPerChunk),
306306+ Timeout: DefaultTimeout,
307307+ ConcurrencyPerServer: DefaultConcurrencyPerServer,
308308+ MaxRetries: DefaultMaxRetries,
309309+ ChunkSize: DefaultChunkSize,
310310+ WaitRetry: DefaultWaitRetry,
311311+ client: newClient(DefaultMaxRetries, DefaultTimeout),
312312 }
313313}
314314