🧱 Chunk is a download manager for slow and unstable servers
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

Suggests the public API

+203 -28
+30 -1
README.md
··· 65 65 66 66 ## Prototype 67 67 68 - The prototype is a CLI that wraps a GET HTTP request in a 45s timeout independent of the HTTP client's timeout. It also includes 3 retries. 68 + The prototype is a CLI that wraps a GET HTTP request in a 45s timeout independent of the HTTP client's timeout. It also includes 3 retries by default. 69 69 70 70 ```console 71 71 $ go run main.go <URL> # e.g. go run main.go https://github.com/cuducos/chunk 72 72 ``` 73 + 74 + The API should work like this: 75 + 76 + ```go 77 + // simple use case 78 + d := NewDownloader() 79 + ch := d.Dowload(urls) 80 + 81 + // partial customization 82 + d := NewDownloader() 83 + d.MaxRetriesPerChunk = 42 84 + ch := d.Dowload(urls) 85 + 86 + // full control 87 + d := chunk.Downloader{...} 88 + ch := d.Download(urls) 89 + ``` 90 + 91 + The resulting channel will transmit data about each download: 92 + 93 + ```go 94 + type DownloadStatus struct { 95 + URL string 96 + DownloadedFilePath string 97 + FileSizeBytes uint64 98 + DownloadedFileBytes uint64 99 + Error error 100 + } 101 + ```
+151 -19
main.go
··· 7 7 "log" 8 8 "net/http" 9 9 "os" 10 + "sync" 10 11 "time" 11 12 12 13 "github.com/avast/retry-go" 13 14 ) 14 15 15 16 const ( 16 - defaultRetries = 3 17 - defaultTimeout = 45 * time.Second 17 + DefaultTimeoutPerChunk = 90 * time.Second 18 + DefaultMaxParallelDownloadsPerServer = 8 19 + DefaultMaxRetriesPerChunk = 5 20 + DefaultChunkSize = 8192 21 + DefaultWaitBetweenRetries = 0 * time.Minute 18 22 ) 19 23 20 - type downloader struct { 21 - client *http.Client 22 - retries uint 24 + // DownloadStatus is the data propagated via the channel sent back to the user 25 + // and it contains information about the download from each URL. 26 + type DownloadStatus struct { 27 + // URL this status refers to 28 + URL string 29 + 30 + // DownloadedFilePath in the user local system 31 + DownloadedFilePath string 32 + 33 + // FileSizeBytes is the total size of the file as informed by the server 34 + FileSizeBytes uint64 35 + 36 + // DownloadedFileBytes already downloaded from this URL 37 + DownloadedFileBytes uint64 38 + 39 + // Any non-recoerable error captured during the download (this means that 40 + // some errors are ignored the download is retried instead of propagating 41 + // the error). 42 + Error error 43 + } 44 + 45 + // IsFinished informs the user whether a download is done (successfully or 46 + // with error). 47 + func (s *DownloadStatus) IsFinished() bool { 48 + return s.Error != nil || s.DownloadedFileBytes == s.FileSizeBytes 49 + } 50 + 51 + // Downloader can be configured by the user before starting the download using 52 + // the following fields. This configurations impacts how the download will be 53 + // handled, including retries, amoutn of requets, and size of each request, for 54 + // example. 55 + type Downloader struct { 56 + // Client is the HTTP client used for every request needed to download all 57 + // the files. 58 + Client *http.Client 59 + 60 + // TimeoutPerChunk is the timeout for the download of each chunk from each 61 + // URL. A chunk is a part of a file requested using the content range HTTP 62 + // header. Thus, this timeout is not the timeout for the each file or for 63 + // the the download of every file). 64 + TimeoutPerChunk time.Duration 65 + 66 + // MaxParallelDownloadsPerServer controls how many requests are sent in 67 + // parallel to the same server. If all the URLs are from the same server 68 + // this is the total of parallel requests. If the user is downloading files 69 + // from different servers (including different subdomains), this limit is 70 + // applied to each server idependently. 71 + MaxParallelDownloadsPerServer uint 72 + 73 + // MaxRetriesPerChunk is the maximum amount of retries for each HTTP request 74 + // using the content range header that fails. 75 + MaxRetriesPerChunk uint 76 + 77 + // ChunkSize is the maximum size of each HTTP request done using the 78 + // content range header. There is no way to specify how many chunks a 79 + // download will need, the focus is on slicing it in smaller chunks so slow 80 + // and unstable servers can respond before dropping it. 81 + ChunkSize uint64 82 + 83 + // WaitBetweenRetries is an optional pause before retrying an HTTP request 84 + // that has failed. 85 + WaitBetweenRetries time.Duration 23 86 } 24 87 25 - func (d *downloader) downloadWithContext(ctx context.Context, u string) ([]byte, error) { 88 + func (d *Downloader) downloadFileWithContext(ctx context.Context, u string) ([]byte, error) { 26 89 req, err := http.NewRequestWithContext(ctx, http.MethodGet, u, nil) 27 90 if err != nil { 28 91 return nil, fmt.Errorf("error creating the request for %s: %w", u, err) 29 92 } 30 93 req = req.WithContext(ctx) 31 - resp, err := d.client.Do(req) 94 + resp, err := d.Client.Do(req) 32 95 if err != nil { 33 96 return nil, fmt.Errorf("error sending a get http request to %s: %w", u, err) 34 97 } ··· 44 107 return b.Bytes(), nil 45 108 } 46 109 47 - func (d *downloader) downloadWithTimeout(u string) ([]byte, error) { 48 - ctx, cancel := context.WithTimeout(context.Background(), d.client.Timeout) 110 + func (d *Downloader) downloadFileWithTimeout(userCtx context.Context, u string) ([]byte, error) { 111 + ctx, cancel := context.WithTimeout(context.Background(), d.Client.Timeout) 49 112 defer cancel() 50 113 ch := make(chan []byte) 51 114 errs := make(chan error) 52 115 go func() { 53 - b, err := d.downloadWithContext(ctx, u) 116 + b, err := d.downloadFileWithContext(ctx, u) 54 117 if err != nil { 55 118 errs <- err 56 119 return ··· 58 121 ch <- b 59 122 }() 60 123 select { 124 + case <-userCtx.Done(): 125 + cancel() 126 + return nil, userCtx.Err() 61 127 case <-ctx.Done(): 62 128 return nil, fmt.Errorf("request to %s ended due to timeout: %w", u, ctx.Err()) 63 129 case err := <-errs: ··· 67 133 } 68 134 } 69 135 70 - func (d *downloader) download(u string) ([]byte, error) { 136 + func (d *Downloader) downloadFile(ctx context.Context, u string) ([]byte, error) { 71 137 ch := make(chan []byte, 1) 72 138 defer close(ch) 73 139 err := retry.Do( 74 140 func() error { 75 - b, err := d.downloadWithTimeout(u) 141 + b, err := d.downloadFileWithTimeout(ctx, u) 76 142 if err != nil { 77 143 return err 78 144 } 79 145 ch <- b 80 146 return nil 81 147 }, 82 - retry.Attempts(d.retries), 83 - retry.MaxDelay(d.client.Timeout), 148 + retry.Attempts(d.MaxRetriesPerChunk), 149 + retry.MaxDelay(d.Client.Timeout), 84 150 ) 85 151 if err != nil { 86 152 return nil, fmt.Errorf("error downloading %s: %w", u, err) ··· 89 155 return b, nil 90 156 } 91 157 158 + // DownloadWithContext is a version of Download that takes a context. The 159 + // context can be used to stop all downloads in progress. 160 + func (d *Downloader) DownloadWithContext(ctx context.Context, urls ...string) <-chan DownloadStatus { 161 + ch := make(chan DownloadStatus) 162 + var wg sync.WaitGroup 163 + for _, u := range urls { 164 + wg.Add(1) 165 + go func(u string) { 166 + defer wg.Done() 167 + s := DownloadStatus{URL: u} 168 + defer func() { ch <- s }() 169 + f, err := os.CreateTemp("", "chunk-download-") 170 + if err != nil { 171 + s.Error = err 172 + return 173 + } 174 + s.DownloadedFilePath = f.Name() 175 + b, err := d.downloadFile(ctx, u) 176 + if err != nil { 177 + s.Error = err 178 + return 179 + } 180 + if err := os.WriteFile(f.Name(), b, 0655); err != nil { 181 + s.Error = err 182 + return 183 + } 184 + s.DownloadedFileBytes = uint64(len(b)) 185 + s.FileSizeBytes = uint64(len(b)) 186 + }(u) 187 + } 188 + go func() { 189 + wg.Wait() 190 + close(ch) 191 + }() 192 + return ch 193 + } 194 + 195 + // Download from all URLs slicing each in a series of chunks, of small HTTP 196 + // requests using the content range header. 197 + func (d *Downloader) Download(urls ...string) <-chan DownloadStatus { 198 + return d.DownloadWithContext(context.Background(), urls...) 199 + } 200 + 201 + // NewDownloader creates a downloader with the defalt configuration. Check 202 + // the constants in this package for their values. 203 + func NewDownloader() *Downloader { 204 + return &Downloader{ 205 + &http.Client{Timeout: DefaultTimeoutPerChunk}, 206 + DefaultTimeoutPerChunk, 207 + DefaultMaxParallelDownloadsPerServer, 208 + DefaultMaxRetriesPerChunk, 209 + DefaultChunkSize, 210 + DefaultWaitBetweenRetries, 211 + } 212 + } 213 + 92 214 func main() { 93 - d := downloader{&http.Client{Timeout: defaultTimeout}, uint(defaultRetries)} 94 - b, err := d.download(os.Args[1]) 95 - if err != nil { 96 - log.Fatal(err) 215 + d := NewDownloader() 216 + for s := range d.Download(os.Args[1]) { 217 + if s.Error != nil { 218 + log.Fatal(s.Error) 219 + } 220 + if s.IsFinished() { 221 + b, err := os.ReadFile(s.DownloadedFilePath) 222 + if err != nil { 223 + log.Fatal(err) 224 + } 225 + fmt.Print(string(b)) 226 + if err := os.Remove(s.DownloadedFilePath); err != nil { 227 + log.Fatal(err) 228 + } 229 + } 97 230 } 98 - fmt.Print(string(b)) 99 231 }
+22 -8
main_test.go
··· 4 4 "fmt" 5 5 "net/http" 6 6 "net/http/httptest" 7 + "os" 7 8 "sync/atomic" 8 9 "testing" 9 10 "time" ··· 39 40 ) 40 41 } 41 42 42 - func TestGet(t *testing.T) { 43 + func TestDownload(t *testing.T) { 43 44 s := testServer(t) 44 45 defer s.Close() 45 46 for _, tc := range []struct { ··· 52 53 {"timeout", "/slow", nil}, 53 54 } { 54 55 t.Run(tc.desc, func(t *testing.T) { 55 - d := downloader{&http.Client{Timeout: 250 * time.Millisecond}, 3} 56 - got, err := d.download(s.URL + tc.path) 57 - if string(got) != string(tc.expected) { 58 - t.Errorf("expected %s, got %s", string(tc.expected), string(got)) 56 + d := NewDownloader() 57 + d.TimeoutPerChunk = 250 * time.Millisecond 58 + d.Client.Timeout = 250 * time.Millisecond 59 + d.MaxRetriesPerChunk = 3 60 + ch := d.Download(s.URL + tc.path) 61 + got := <-ch 62 + var body []byte 63 + if got.Error == nil { 64 + var err error 65 + body, err = os.ReadFile(got.DownloadedFilePath) 66 + os.Remove(got.DownloadedFilePath) 67 + if err != nil { 68 + t.Errorf("could not read dowloaded file %s", got.DownloadedFilePath) 69 + } 59 70 } 60 - if tc.expected == nil && err == nil { 71 + if string(body) != string(tc.expected) { 72 + t.Errorf("expected %s, got %s", string(tc.expected), string(body)) 73 + } 74 + if tc.expected == nil && got.Error == nil { 61 75 t.Error("expected an error, but got nil") 62 76 } 63 - if tc.expected != nil && err != nil { 64 - t.Errorf("expected no error, but got %s", err) 77 + if tc.expected != nil && got.Error != nil { 78 + t.Errorf("expected no error, but got %s", got.Error) 65 79 } 66 80 }) 67 81 }