···65656666## Prototype
67676868-The prototype is a CLI that wraps a GET HTTP request in a 45s timeout independent of the HTTP client's timeout. It also includes 3 retries.
6868+The prototype is a CLI that wraps a GET HTTP request in a 45s timeout independent of the HTTP client's timeout. It also includes 3 retries by default.
69697070```console
7171$ go run main.go <URL> # e.g. go run main.go https://github.com/cuducos/chunk
7272```
7373+7474+The API should work like this:
7575+7676+```go
7777+// simple use case
7878+d := NewDownloader()
7979+ch := d.Dowload(urls)
8080+8181+// partial customization
8282+d := NewDownloader()
8383+d.MaxRetriesPerChunk = 42
8484+ch := d.Dowload(urls)
8585+8686+// full control
8787+d := chunk.Downloader{...}
8888+ch := d.Download(urls)
8989+```
9090+9191+The resulting channel will transmit data about each download:
9292+9393+```go
9494+type DownloadStatus struct {
9595+ URL string
9696+ DownloadedFilePath string
9797+ FileSizeBytes uint64
9898+ DownloadedFileBytes uint64
9999+ Error error
100100+}
101101+```
+151-19
main.go
···77 "log"
88 "net/http"
99 "os"
1010+ "sync"
1011 "time"
11121213 "github.com/avast/retry-go"
1314)
14151516const (
1616- defaultRetries = 3
1717- defaultTimeout = 45 * time.Second
1717+ DefaultTimeoutPerChunk = 90 * time.Second
1818+ DefaultMaxParallelDownloadsPerServer = 8
1919+ DefaultMaxRetriesPerChunk = 5
2020+ DefaultChunkSize = 8192
2121+ DefaultWaitBetweenRetries = 0 * time.Minute
1822)
19232020-type downloader struct {
2121- client *http.Client
2222- retries uint
2424+// DownloadStatus is the data propagated via the channel sent back to the user
2525+// and it contains information about the download from each URL.
2626+type DownloadStatus struct {
2727+ // URL this status refers to
2828+ URL string
2929+3030+ // DownloadedFilePath in the user local system
3131+ DownloadedFilePath string
3232+3333+ // FileSizeBytes is the total size of the file as informed by the server
3434+ FileSizeBytes uint64
3535+3636+ // DownloadedFileBytes already downloaded from this URL
3737+ DownloadedFileBytes uint64
3838+3939+ // Any non-recoerable error captured during the download (this means that
4040+ // some errors are ignored the download is retried instead of propagating
4141+ // the error).
4242+ Error error
4343+}
4444+4545+// IsFinished informs the user whether a download is done (successfully or
4646+// with error).
4747+func (s *DownloadStatus) IsFinished() bool {
4848+ return s.Error != nil || s.DownloadedFileBytes == s.FileSizeBytes
4949+}
5050+5151+// Downloader can be configured by the user before starting the download using
5252+// the following fields. This configurations impacts how the download will be
5353+// handled, including retries, amoutn of requets, and size of each request, for
5454+// example.
5555+type Downloader struct {
5656+ // Client is the HTTP client used for every request needed to download all
5757+ // the files.
5858+ Client *http.Client
5959+6060+ // TimeoutPerChunk is the timeout for the download of each chunk from each
6161+ // URL. A chunk is a part of a file requested using the content range HTTP
6262+ // header. Thus, this timeout is not the timeout for the each file or for
6363+ // the the download of every file).
6464+ TimeoutPerChunk time.Duration
6565+6666+ // MaxParallelDownloadsPerServer controls how many requests are sent in
6767+ // parallel to the same server. If all the URLs are from the same server
6868+ // this is the total of parallel requests. If the user is downloading files
6969+ // from different servers (including different subdomains), this limit is
7070+ // applied to each server idependently.
7171+ MaxParallelDownloadsPerServer uint
7272+7373+ // MaxRetriesPerChunk is the maximum amount of retries for each HTTP request
7474+ // using the content range header that fails.
7575+ MaxRetriesPerChunk uint
7676+7777+ // ChunkSize is the maximum size of each HTTP request done using the
7878+ // content range header. There is no way to specify how many chunks a
7979+ // download will need, the focus is on slicing it in smaller chunks so slow
8080+ // and unstable servers can respond before dropping it.
8181+ ChunkSize uint64
8282+8383+ // WaitBetweenRetries is an optional pause before retrying an HTTP request
8484+ // that has failed.
8585+ WaitBetweenRetries time.Duration
2386}
24872525-func (d *downloader) downloadWithContext(ctx context.Context, u string) ([]byte, error) {
8888+func (d *Downloader) downloadFileWithContext(ctx context.Context, u string) ([]byte, error) {
2689 req, err := http.NewRequestWithContext(ctx, http.MethodGet, u, nil)
2790 if err != nil {
2891 return nil, fmt.Errorf("error creating the request for %s: %w", u, err)
2992 }
3093 req = req.WithContext(ctx)
3131- resp, err := d.client.Do(req)
9494+ resp, err := d.Client.Do(req)
3295 if err != nil {
3396 return nil, fmt.Errorf("error sending a get http request to %s: %w", u, err)
3497 }
···44107 return b.Bytes(), nil
45108}
461094747-func (d *downloader) downloadWithTimeout(u string) ([]byte, error) {
4848- ctx, cancel := context.WithTimeout(context.Background(), d.client.Timeout)
110110+func (d *Downloader) downloadFileWithTimeout(userCtx context.Context, u string) ([]byte, error) {
111111+ ctx, cancel := context.WithTimeout(context.Background(), d.Client.Timeout)
49112 defer cancel()
50113 ch := make(chan []byte)
51114 errs := make(chan error)
52115 go func() {
5353- b, err := d.downloadWithContext(ctx, u)
116116+ b, err := d.downloadFileWithContext(ctx, u)
54117 if err != nil {
55118 errs <- err
56119 return
···58121 ch <- b
59122 }()
60123 select {
124124+ case <-userCtx.Done():
125125+ cancel()
126126+ return nil, userCtx.Err()
61127 case <-ctx.Done():
62128 return nil, fmt.Errorf("request to %s ended due to timeout: %w", u, ctx.Err())
63129 case err := <-errs:
···67133 }
68134}
691357070-func (d *downloader) download(u string) ([]byte, error) {
136136+func (d *Downloader) downloadFile(ctx context.Context, u string) ([]byte, error) {
71137 ch := make(chan []byte, 1)
72138 defer close(ch)
73139 err := retry.Do(
74140 func() error {
7575- b, err := d.downloadWithTimeout(u)
141141+ b, err := d.downloadFileWithTimeout(ctx, u)
76142 if err != nil {
77143 return err
78144 }
79145 ch <- b
80146 return nil
81147 },
8282- retry.Attempts(d.retries),
8383- retry.MaxDelay(d.client.Timeout),
148148+ retry.Attempts(d.MaxRetriesPerChunk),
149149+ retry.MaxDelay(d.Client.Timeout),
84150 )
85151 if err != nil {
86152 return nil, fmt.Errorf("error downloading %s: %w", u, err)
···89155 return b, nil
90156}
91157158158+// DownloadWithContext is a version of Download that takes a context. The
159159+// context can be used to stop all downloads in progress.
160160+func (d *Downloader) DownloadWithContext(ctx context.Context, urls ...string) <-chan DownloadStatus {
161161+ ch := make(chan DownloadStatus)
162162+ var wg sync.WaitGroup
163163+ for _, u := range urls {
164164+ wg.Add(1)
165165+ go func(u string) {
166166+ defer wg.Done()
167167+ s := DownloadStatus{URL: u}
168168+ defer func() { ch <- s }()
169169+ f, err := os.CreateTemp("", "chunk-download-")
170170+ if err != nil {
171171+ s.Error = err
172172+ return
173173+ }
174174+ s.DownloadedFilePath = f.Name()
175175+ b, err := d.downloadFile(ctx, u)
176176+ if err != nil {
177177+ s.Error = err
178178+ return
179179+ }
180180+ if err := os.WriteFile(f.Name(), b, 0655); err != nil {
181181+ s.Error = err
182182+ return
183183+ }
184184+ s.DownloadedFileBytes = uint64(len(b))
185185+ s.FileSizeBytes = uint64(len(b))
186186+ }(u)
187187+ }
188188+ go func() {
189189+ wg.Wait()
190190+ close(ch)
191191+ }()
192192+ return ch
193193+}
194194+195195+// Download from all URLs slicing each in a series of chunks, of small HTTP
196196+// requests using the content range header.
197197+func (d *Downloader) Download(urls ...string) <-chan DownloadStatus {
198198+ return d.DownloadWithContext(context.Background(), urls...)
199199+}
200200+201201+// NewDownloader creates a downloader with the defalt configuration. Check
202202+// the constants in this package for their values.
203203+func NewDownloader() *Downloader {
204204+ return &Downloader{
205205+ &http.Client{Timeout: DefaultTimeoutPerChunk},
206206+ DefaultTimeoutPerChunk,
207207+ DefaultMaxParallelDownloadsPerServer,
208208+ DefaultMaxRetriesPerChunk,
209209+ DefaultChunkSize,
210210+ DefaultWaitBetweenRetries,
211211+ }
212212+}
213213+92214func main() {
9393- d := downloader{&http.Client{Timeout: defaultTimeout}, uint(defaultRetries)}
9494- b, err := d.download(os.Args[1])
9595- if err != nil {
9696- log.Fatal(err)
215215+ d := NewDownloader()
216216+ for s := range d.Download(os.Args[1]) {
217217+ if s.Error != nil {
218218+ log.Fatal(s.Error)
219219+ }
220220+ if s.IsFinished() {
221221+ b, err := os.ReadFile(s.DownloadedFilePath)
222222+ if err != nil {
223223+ log.Fatal(err)
224224+ }
225225+ fmt.Print(string(b))
226226+ if err := os.Remove(s.DownloadedFilePath); err != nil {
227227+ log.Fatal(err)
228228+ }
229229+ }
97230 }
9898- fmt.Print(string(b))
99231}