🧱 Chunk is a download manager for slow and unstable servers
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

Adjusting field names to be in-par with command-line params.

+45 -45
+9 -9
cmd/chunk/main.go
··· 13 13 var rootCmd = &cobra.Command{ 14 14 Use: "chunk", 15 15 Short: "Download tool for slow and unstable servers", 16 - Long: `The idea of the project emerged as it was difficult for Minha Receita to handle the download of 37 files that adds up to just approx. 5Gb. Most of the download solutions out there (e.g. got) seem to be prepared for downloading large files, not for downloading from slow and unstable servers — which is the case at hand.`, 16 + Long: "Download tool for slow and unstable servers using HTTP range requests, retries per HTTP request (not by file), prevents re-downloading the same content range and supports wait time to give servers time to recover.", 17 17 Run: func(cmd *cobra.Command, args []string) { 18 18 chunk := chunk.DefaultDownloader() 19 - chunk.TimeoutPerChunk = timeoutChunk 20 - chunk.MaxParallelDownloadsPerServer = concurrencyPerServer 21 - chunk.MaxRetriesPerChunk = maxRetriesChunk 22 - chunk.WaitBetweenRetries = waitBetweenRetries 19 + chunk.Timeout = timeoutChunk 20 + chunk.ConcurrencyPerServer = concurrencyPerServer 21 + chunk.MaxRetries = maxRetriesChunk 22 + chunk.WaitRetry = waitBetweenRetries 23 23 chunk.ChunkSize = chunkSize 24 24 prog := newProgress() 25 25 for status := range chunk.Download(os.Args[1:len(os.Args)]...) { ··· 42 42 ) 43 43 44 44 func init() { 45 - rootCmd.Flags().DurationVarP(&timeoutChunk, "timeout", "t", chunk.DefaultTimeoutPerChunk, "timeout for the download of each chunk from each URL.") 46 - rootCmd.Flags().UintVarP(&maxRetriesChunk, "max-retries", "r", chunk.DefaultMaxRetriesPerChunk, "maximum number of retries for each chunk.") 47 - rootCmd.Flags().DurationVarP(&waitBetweenRetries, "wait-between-retries", "w", chunk.DefaultWaitBetweenRetries, "pause before retrying an HTTP request that has failed.") 45 + rootCmd.Flags().DurationVarP(&timeoutChunk, "timeout", "t", chunk.DefaultTimeout, "timeout for the download of each chunk from each URL.") 46 + rootCmd.Flags().UintVarP(&maxRetriesChunk, "max-retries", "r", chunk.DefaultMaxRetries, "maximum number of retries for each chunk.") 47 + rootCmd.Flags().DurationVarP(&waitBetweenRetries, "wait-retry", "w", chunk.DefaultWaitRetry, "pause before retrying an HTTP request that has failed.") 48 48 rootCmd.Flags().Int64VarP(&chunkSize, "chunk-size", "s", chunk.DefaultChunkSize, "maximum size of each HTTP request done using the content range header.") 49 - rootCmd.Flags().IntVarP(&concurrencyPerServer, "concurrency-per-server", "c", chunk.DefaultMaxParallelDownloadsPerServer, "controls the max number of concurrent connections opened to the same server.") 49 + rootCmd.Flags().IntVarP(&concurrencyPerServer, "concurrency-per-server", "c", chunk.DefaultConcurrencyPerServer, "controls the max number of concurrent connections opened to the same server.") 50 50 } 51 51 52 52 func main() {
+21 -21
downloader.go
··· 15 15 ) 16 16 17 17 const ( 18 - DefaultTimeoutPerChunk = 90 * time.Second 19 - DefaultMaxParallelDownloadsPerServer = 8 20 - DefaultMaxRetriesPerChunk = 5 21 - DefaultChunkSize = 8192 22 - DefaultWaitBetweenRetries = 1 * time.Second 18 + DefaultTimeout = 90 * time.Second 19 + DefaultConcurrencyPerServer = 8 20 + DefaultMaxRetries = 5 21 + DefaultChunkSize = 8192 22 + DefaultWaitRetry = 1 * time.Second 23 23 ) 24 24 25 25 // DownloadStatus is the data propagated via the channel sent back to the user ··· 62 62 // URL. A chunk is a part of a file requested using the content range HTTP 63 63 // header. Thus, this timeout is not the timeout for the each file or for 64 64 // the the download of every file). 65 - TimeoutPerChunk time.Duration 65 + Timeout time.Duration 66 66 67 67 // MaxParallelDownloadsPerServer controls the max number of concurrent 68 68 // connections opened to the same server. If all the URLs are from the same 69 69 // server this is the total of concurrent connections. If the user is downloading 70 70 // files from different servers, this limit is applied to each server idependently. 71 - MaxParallelDownloadsPerServer int 71 + ConcurrencyPerServer int 72 72 73 73 // MaxRetriesPerChunk is the maximum amount of retries for each HTTP request 74 74 // using the content range header that fails. 75 - MaxRetriesPerChunk uint 75 + MaxRetries uint 76 76 77 77 // ChunkSize is the maximum size of each HTTP request done using the 78 78 // content range header. There is no way to specify how many chunks a ··· 82 82 83 83 // WaitBetweenRetries is an optional pause before retrying an HTTP request 84 84 // that has failed. 85 - WaitBetweenRetries time.Duration 85 + WaitRetry time.Duration 86 86 } 87 87 88 88 type chunk struct { ··· 116 116 } 117 117 118 118 func (d *Downloader) downloadChunkWithTimeout(userCtx context.Context, u string, c chunk) ([]byte, error) { 119 - ctx, cancel := context.WithTimeout(userCtx, d.TimeoutPerChunk) // need to propagate context, which might contain app-specific data. 119 + ctx, cancel := context.WithTimeout(userCtx, d.Timeout) // need to propagate context, which might contain app-specific data. 120 120 defer cancel() 121 121 ch := make(chan []byte) 122 122 errs := make(chan error) ··· 160 160 ch <- resp 161 161 return nil 162 162 }, 163 - retry.Attempts(d.MaxRetriesPerChunk), 164 - retry.MaxDelay(d.WaitBetweenRetries), 163 + retry.Attempts(d.MaxRetries), 164 + retry.MaxDelay(d.WaitRetry), 165 165 ) 166 166 if err != nil { 167 167 return 0, fmt.Errorf("error sending get http request to %s: %w", u, err) ··· 193 193 ch <- b 194 194 return nil 195 195 }, 196 - retry.Attempts(d.MaxRetriesPerChunk), 197 - retry.MaxDelay(d.WaitBetweenRetries), 196 + retry.Attempts(d.MaxRetries), 197 + retry.MaxDelay(d.WaitRetry), 198 198 ) 199 199 if err != nil { 200 200 return nil, fmt.Errorf("error downloading %s: %w", u, err) ··· 274 274 // context can be used to stop all downloads in progress. 275 275 func (d *Downloader) DownloadWithContext(ctx context.Context, urls ...string) <-chan DownloadStatus { 276 276 if d.client == nil { 277 - d.client = newClient(d.MaxParallelDownloadsPerServer, d.TimeoutPerChunk) 277 + d.client = newClient(d.ConcurrencyPerServer, d.Timeout) 278 278 } 279 279 ch := make(chan DownloadStatus, 2*len(urls)) // the first status will be the total file size (and or an error creating/trucating the file). 280 280 var wg sync.WaitGroup // this wait group is used to wait for all chunks (from all downloads) to finish. ··· 303 303 // the constants in this package for their values. 304 304 func DefaultDownloader() *Downloader { 305 305 return &Downloader{ 306 - TimeoutPerChunk: DefaultTimeoutPerChunk, 307 - MaxParallelDownloadsPerServer: DefaultMaxParallelDownloadsPerServer, 308 - MaxRetriesPerChunk: DefaultMaxRetriesPerChunk, 309 - ChunkSize: DefaultChunkSize, 310 - WaitBetweenRetries: DefaultWaitBetweenRetries, 311 - client: newClient(DefaultMaxRetriesPerChunk, DefaultTimeoutPerChunk), 306 + Timeout: DefaultTimeout, 307 + ConcurrencyPerServer: DefaultConcurrencyPerServer, 308 + MaxRetries: DefaultMaxRetries, 309 + ChunkSize: DefaultChunkSize, 310 + WaitRetry: DefaultWaitRetry, 311 + client: newClient(DefaultMaxRetries, DefaultTimeout), 312 312 } 313 313 } 314 314
+15 -15
downloader_test.go
··· 38 38 )) 39 39 defer s.Close() 40 40 d := Downloader{ 41 - TimeoutPerChunk: timeout, 42 - MaxRetriesPerChunk: 4, 43 - MaxParallelDownloadsPerServer: 1, 44 - ChunkSize: 1024, 45 - WaitBetweenRetries: 0 * time.Second, 41 + Timeout: timeout, 42 + MaxRetries: 4, 43 + ConcurrencyPerServer: 1, 44 + ChunkSize: 1024, 45 + WaitRetry: 0 * time.Second, 46 46 } 47 47 ch := d.Download(s.URL) 48 48 <-ch // discard the first got (just the file size) ··· 191 191 defer s.Close() 192 192 193 193 d := Downloader{ 194 - TimeoutPerChunk: timeout, 195 - MaxRetriesPerChunk: 4, 196 - MaxParallelDownloadsPerServer: 1, 197 - ChunkSize: 1024, 198 - WaitBetweenRetries: 0 * time.Second, 194 + Timeout: timeout, 195 + MaxRetries: 4, 196 + ConcurrencyPerServer: 1, 197 + ChunkSize: 1024, 198 + WaitRetry: 0 * time.Second, 199 199 } 200 200 ch := d.Download(s.URL) 201 201 <-ch // discard the first status (just the file size) ··· 243 243 )) 244 244 defer s.Close() 245 245 d := Downloader{ 246 - TimeoutPerChunk: timeout, 247 - MaxRetriesPerChunk: 4, 248 - MaxParallelDownloadsPerServer: 1, 249 - ChunkSize: 1024, 250 - WaitBetweenRetries: 0 * time.Second, 246 + Timeout: timeout, 247 + MaxRetries: 4, 248 + ConcurrencyPerServer: 1, 249 + ChunkSize: 1024, 250 + WaitRetry: 0 * time.Second, 251 251 } 252 252 userCtx, cancFunc := context.WithTimeout(context.Background(), userTimeout) 253 253 defer cancFunc()