🧱 Chunk is a download manager for slow and unstable servers
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge pull request #22 from cuducos/retry-head

Uses retries (if needed) for the HEAD requests to get the download file size

authored by

Eduardo Cuducos and committed by
GitHub
aa0553d7 4e96cd29

+70 -32
+36 -22
main.go
··· 33 33 DownloadedFilePath string 34 34 35 35 // FileSizeBytes is the total size of the file as informed by the server 36 - FileSizeBytes uint64 36 + FileSizeBytes int64 37 37 38 38 // DownloadedFileBytes already downloaded from this URL 39 - DownloadedFileBytes uint64 39 + DownloadedFileBytes int64 40 40 41 41 // Any non-recoerable error captured during the download (this means that 42 42 // some errors are ignored the download is retried instead of propagating ··· 80 80 // content range header. There is no way to specify how many chunks a 81 81 // download will need, the focus is on slicing it in smaller chunks so slow 82 82 // and unstable servers can respond before dropping it. 83 - ChunkSize uint64 83 + ChunkSize int64 84 84 85 85 // WaitBetweenRetries is an optional pause before retrying an HTTP request 86 86 // that has failed. ··· 134 134 } 135 135 } 136 136 137 - func (d *Downloader) getDownloadSize(ctx context.Context, u string) (uint64, error) { 137 + func (d *Downloader) getDownloadSize(ctx context.Context, u string) (int64, error) { 138 138 req, err := http.NewRequestWithContext(ctx, http.MethodHead, u, nil) 139 139 if err != nil { 140 140 return 0, fmt.Errorf("creating the request for %s: %w", u, err) 141 141 } 142 - resp, err := d.client.Do(req) 142 + ch := make(chan *http.Response, 1) 143 + defer close(ch) 144 + err = retry.Do( 145 + func() error { 146 + resp, err := d.client.Do(req) 147 + if err != nil { 148 + return err 149 + } 150 + if resp.StatusCode != 200 { 151 + return fmt.Errorf("got unexpected http response status for %s: %s", u, resp.Status) 152 + } 153 + ch <- resp 154 + return nil 155 + }, 156 + retry.Attempts(d.MaxParallelDownloadsPerServer), 157 + retry.MaxDelay(d.WaitBetweenRetries), 158 + ) 143 159 if err != nil { 144 - return 0, fmt.Errorf("sending get http request to %s: %w", u, err) 160 + return 0, fmt.Errorf("error sending get http request to %s: %w", u, err) 145 161 } 162 + resp := <-ch 146 163 defer resp.Body.Close() 147 - if resp.StatusCode != 200 { 148 - return 0, fmt.Errorf("got unexpected http response status for %s: %s", u, resp.Status) 149 - } 150 164 if resp.ContentLength <= 0 { 151 - if resp.Header.Get("Content-Range") == "" { 152 - // TODO: find a way to throw an error on no-content with keeping the tests run as usual 153 - return 0, nil 165 + var s int64 166 + r := strings.TrimSpace(resp.Header.Get("Content-Range")) 167 + if r == "" { 168 + return 0, fmt.Errorf("could not get content length for %s", u) 154 169 } 155 - var s uint64 156 - p := strings.Split(resp.Header.Get("Content-Range"), "/") 170 + p := strings.Split(r, "/") 157 171 fmt.Sscan(p[len(p)-1], &s) 158 172 return s, nil 159 173 } 160 - return uint64(resp.ContentLength), nil 174 + return resp.ContentLength, nil 161 175 } 162 176 163 177 func (d *Downloader) downloadFile(ctx context.Context, u string) ([]byte, error) { ··· 183 197 } 184 198 185 199 type chunk struct { 186 - start uint64 187 - end uint64 200 + start int64 201 + end int64 188 202 } 189 203 190 - func (c chunk) size() uint64 { return (c.end + 1) - c.start } 204 + func (c chunk) size() int64 { return (c.end + 1) - c.start } 191 205 func (c chunk) rangeHeader() string { return fmt.Sprintf("bytes=%d-%d", c.start, c.end) } 192 206 193 - func (d *Downloader) chunks(t uint64) []chunk { 194 - var start uint64 207 + func (d *Downloader) chunks(t int64) []chunk { 208 + var start int64 195 209 last := t - 1 196 210 var c []chunk 197 211 for { ··· 223 237 path := filepath.Join(os.TempDir(), filepath.Base(u)) 224 238 s := DownloadStatus{URL: u, DownloadedFilePath: path} 225 239 defer func() { ch <- s }() 226 - t, err := d.getDownloadSize(ctx, u) // TODO: retry 240 + t, err := d.getDownloadSize(ctx, u) 227 241 if err != nil { 228 242 s.Error = fmt.Errorf("error getting file size: %w", err) 229 243 return ··· 239 253 s.Error = err 240 254 return 241 255 } 242 - s.DownloadedFileBytes = uint64(len(b)) 256 + s.DownloadedFileBytes = int64(len(b)) 243 257 }(u) 244 258 } 245 259 go func() {
+32 -8
main_test.go
··· 25 25 s := httptest.NewServer(http.HandlerFunc( 26 26 func(w http.ResponseWriter, r *http.Request) { 27 27 if r.Method == http.MethodHead { 28 + w.Header().Add("Content-Length", "2") 28 29 return 29 30 } 30 31 tc.proc(w) ··· 161 162 s := httptest.NewServer(http.HandlerFunc( 162 163 func(w http.ResponseWriter, r *http.Request) { 163 164 if r.Method == http.MethodHead { 165 + w.Header().Add("Content-Length", "2") 164 166 return 165 167 } 166 168 time.Sleep(2 * userTimeout) // this time is greater than the user timeout, but shorter than the timeout per chunk. ··· 196 198 d.ChunkSize = 5 197 199 got := d.chunks(12) 198 200 chunks := []chunk{{0, 4}, {5, 9}, {10, 11}} 199 - sizes := []uint64{5, 5, 2} 201 + sizes := []int64{5, 5, 2} 200 202 headers := []string{"bytes=0-4", "bytes=5-9", "bytes=10-11"} 201 203 if len(got) != len(chunks) { 202 204 t.Errorf("expected %d chunks, got %d", len(chunks), len(got)) ··· 236 238 } 237 239 } 238 240 241 + func TestGetDownloadSize_WithRetry(t *testing.T) { 242 + attempts := int32(0) 243 + s := httptest.NewServer(http.HandlerFunc( 244 + func(w http.ResponseWriter, r *http.Request) { 245 + fmt.Printf("attempts = %d\n", atomic.LoadInt32(&attempts)) // TODO: remove 246 + if atomic.CompareAndSwapInt32(&attempts, 0, 1) { 247 + w.WriteHeader(http.StatusTooManyRequests) 248 + return 249 + } 250 + fmt.Fprint(w, "Test") 251 + }, 252 + )) 253 + defer s.Close() 254 + 255 + d := DefaultDownloader() 256 + fmt.Printf("d.MaxRetriesPerChunk = %d\n", d.MaxRetriesPerChunk) // TODO: remove 257 + fmt.Printf("d.WaitBetweenRetries = %v\n", d.WaitBetweenRetries) // TODO: remove 258 + got, err := d.getDownloadSize(context.Background(), s.URL) 259 + 260 + if err != nil { 261 + t.Errorf("expected no error getting the file size, got %s", err) 262 + } 263 + if got != 4 { 264 + t.Errorf("invalid size, expected 4, got: %d", got) 265 + } 266 + } 267 + 239 268 func TestGetDownloadSize_ContentRange(t *testing.T) { 240 269 s := httptest.NewServer(http.HandlerFunc( 241 270 func(w http.ResponseWriter, r *http.Request) { ··· 277 306 defer s.Close() 278 307 279 308 d := DefaultDownloader() 280 - got, err := d.getDownloadSize(context.Background(), s.URL) 281 - 282 - if err != nil { 283 - t.Errorf("expected no error getting the file size, got %s", err) 284 - } 285 - if got != 0 { 286 - t.Errorf("invalid size, expected 0, got: %d", got) 309 + if _, err := d.getDownloadSize(context.Background(), s.URL); err == nil { 310 + t.Error("expected error getting the file size, got nil") 287 311 } 288 312 }
+2 -2
progress.go
··· 19 19 // download fields so the encoder/decoder has access to them 20 20 URL string 21 21 Path string 22 - ChunkSize uint64 22 + ChunkSize int64 23 23 Chunks []uint32 24 24 } 25 25 ··· 123 123 return nil // Either not empty or error, suits both cases 124 124 } 125 125 126 - func newProgress(path, url string, chunkSize uint64, chunks int, restart bool) (*progress, error) { 126 + func newProgress(path, url string, chunkSize int64, chunks int, restart bool) (*progress, error) { 127 127 absPath, err := filepath.Abs(path) 128 128 if err != nil { 129 129 return nil, fmt.Errorf("error getting absolute path for %s: %w", path, err)