A social RSS reader built on the AT Protocol. glean.at
glean atproto atmosphere rss feed social app
14
fork

Configure Feed

Select the types of activity you want to include in your feed.

Add retry logic and centralized HTTP client configuration

+184 -17
+9 -1
internal/feed/discover.go
··· 8 8 "regexp" 9 9 "strings" 10 10 "time" 11 + 12 + "pkg.rbrt.fr/glean/internal/httpclient" 11 13 ) 12 14 13 15 type imageContentTypePrefixes []string ··· 37 39 baseHrefRe = regexp.MustCompile(`<base[^>]+href="([^"]*)"`) 38 40 39 41 faviconPaths = []string{"/favicon.ico", "/favicon.png", "/apple-touch-icon.png"} 40 - discoverClient = &http.Client{Timeout: 15 * time.Second} 42 + discoverClient = &http.Client{ 43 + Timeout: 15 * time.Second, 44 + Transport: httpclient.NewTransport(), 45 + } 41 46 ) 42 47 43 48 func Discover(ctx context.Context, siteURL string) (*DiscoveryResult, error) { ··· 129 134 if err != nil { 130 135 return 131 136 } 137 + httpclient.SetDefaultHeaders(req) 132 138 resp, err := discoverClient.Do(req) 133 139 if err != nil { 134 140 return ··· 158 164 if err != nil { 159 165 return false 160 166 } 167 + httpclient.SetDefaultHeaders(req) 161 168 resp, err := discoverClient.Do(req) 162 169 if err != nil { 163 170 return false ··· 179 186 if err != nil { 180 187 return nil, "" 181 188 } 189 + httpclient.SetDefaultHeaders(req) 182 190 req.Header.Set("Accept", "text/html") 183 191 184 192 resp, err := discoverClient.Do(req)
+64 -8
internal/feed/fetcher.go
··· 8 8 "sync" 9 9 "time" 10 10 11 + "pkg.rbrt.fr/glean/internal/httpclient" 11 12 "pkg.rbrt.fr/glean/internal/metrics" 13 + ) 14 + 15 + const ( 16 + maxRetries = 3 17 + baseRetryDelay = 1 * time.Second 12 18 ) 13 19 14 20 type Fetcher struct { ··· 18 24 func NewFetcher() *Fetcher { 19 25 return &Fetcher{ 20 26 httpClient: &http.Client{ 21 - Timeout: 30 * time.Second, 27 + Timeout: 30 * time.Second, 28 + Transport: httpclient.NewTransport(), 22 29 CheckRedirect: func(req *http.Request, via []*http.Request) error { 23 30 if len(via) >= 10 { 24 31 return fmt.Errorf("too many redirects") ··· 30 37 } 31 38 32 39 func (f *Fetcher) Fetch(ctx context.Context, feedURL, etag, lastModified string) (*ParseResult, string, string, error) { 40 + var lastResp *http.Response 41 + var lastErr error 42 + 43 + for attempt := range maxRetries + 1 { 44 + if attempt > 0 { 45 + backoff := retryBackoff(attempt, lastResp) 46 + if err := httpclient.SleepWithContext(ctx, backoff); err != nil { 47 + return nil, "", "", err 48 + } 49 + } 50 + 51 + result, newEtag, newLastModified, resp, err := f.executeRequest(ctx, feedURL, etag, lastModified) 52 + lastResp = resp 53 + if err == nil { 54 + return result, newEtag, newLastModified, nil 55 + } 56 + 57 + if resp == nil || !httpclient.IsRetryable(resp.StatusCode) { 58 + return nil, "", "", err 59 + } 60 + 61 + lastErr = err 62 + } 63 + 64 + return nil, "", "", lastErr 65 + } 66 + 67 + func (f *Fetcher) executeRequest(ctx context.Context, feedURL, etag, lastModified string) (*ParseResult, string, string, *http.Response, error) { 33 68 req, err := http.NewRequestWithContext(ctx, http.MethodGet, feedURL, nil) 34 69 if err != nil { 35 - return nil, "", "", fmt.Errorf("creating request: %w", err) 70 + return nil, "", "", nil, fmt.Errorf("creating request: %w", err) 36 71 } 37 72 73 + httpclient.SetDefaultHeaders(req) 74 + req.Header.Set("Accept", httpclient.AcceptFeed) 75 + 38 76 if etag != "" { 39 77 req.Header.Set("If-None-Match", etag) 40 78 } ··· 44 82 45 83 resp, err := f.httpClient.Do(req) 46 84 if err != nil { 47 - return nil, "", "", fmt.Errorf("fetching feed: %w", err) 85 + return nil, "", "", nil, fmt.Errorf("fetching feed: %w", err) 48 86 } 49 87 defer resp.Body.Close() 50 88 51 89 if resp.StatusCode == http.StatusNotModified { 52 - return nil, "", "", nil 90 + return nil, "", "", resp, nil 91 + } 92 + 93 + if resp.StatusCode == http.StatusTooManyRequests { 94 + return nil, "", "", resp, fmt.Errorf("rate limited (retry-after: %s)", resp.Header.Get("Retry-After")) 95 + } 96 + 97 + if resp.StatusCode >= 500 { 98 + return nil, "", "", resp, fmt.Errorf("server error: %d", resp.StatusCode) 53 99 } 54 100 55 101 if resp.StatusCode < 200 || resp.StatusCode >= 300 { 56 - return nil, "", "", fmt.Errorf("unexpected status: %d", resp.StatusCode) 102 + return nil, "", "", resp, fmt.Errorf("unexpected status: %d", resp.StatusCode) 57 103 } 58 104 59 105 newEtag := resp.Header.Get("ETag") ··· 61 107 62 108 result, err := Parse(resp.Body, feedURL) 63 109 if err != nil { 64 - return nil, "", "", fmt.Errorf("parsing feed: %w", err) 110 + return nil, "", "", nil, fmt.Errorf("parsing feed: %w", err) 65 111 } 66 112 67 - return result, newEtag, newLastModified, nil 113 + return result, newEtag, newLastModified, resp, nil 114 + } 115 + 116 + func retryBackoff(attempt int, lastResp *http.Response) time.Duration { 117 + if lastResp != nil && lastResp.StatusCode == http.StatusTooManyRequests { 118 + if v := lastResp.Header.Get("Retry-After"); v != "" { 119 + if d := httpclient.ParseRetryAfter(v); d > 0 { 120 + return min(d, 10*time.Second) 121 + } 122 + } 123 + } 124 + return baseRetryDelay * time.Duration(1<<(attempt-1)) 68 125 } 69 126 70 127 type FeedStore interface { ··· 101 158 ticker := time.NewTicker(s.tickInterval) 102 159 defer ticker.Stop() 103 160 104 - // fetch all at startup 105 161 s.fetchAll(ctx) 106 162 107 163 for {
+70
internal/httpclient/httpclient.go
··· 1 + package httpclient 2 + 3 + import ( 4 + "context" 5 + "fmt" 6 + "net" 7 + "net/http" 8 + "strconv" 9 + "time" 10 + ) 11 + 12 + const ( 13 + UserAgent = "Glean/1.0 (RSS Reader)" 14 + 15 + AcceptFeed = "application/xml,application/atom+xml,application/rss+xml,application/rdf+xml,application/feed+json,text/html;q=0.9" 16 + AcceptHTML = "text/html,application/xhtml+xml;q=0.9" 17 + ) 18 + 19 + func NewTransport() *http.Transport { 20 + return &http.Transport{ 21 + DialContext: (&net.Dialer{ 22 + Timeout: 10 * time.Second, 23 + KeepAlive: 15 * time.Second, 24 + }).DialContext, 25 + MaxIdleConns: 50, 26 + IdleConnTimeout: 10 * time.Second, 27 + TLSHandshakeTimeout: 10 * time.Second, 28 + ForceAttemptHTTP2: true, 29 + } 30 + } 31 + 32 + func SetDefaultHeaders(req *http.Request) { 33 + req.Header.Set("User-Agent", UserAgent) 34 + req.Header.Set("Accept-Encoding", "br,gzip") 35 + req.Header.Set("Connection", "close") 36 + } 37 + 38 + func ParseRetryAfter(v string) time.Duration { 39 + if d, err := strconv.Atoi(v); err == nil { 40 + return time.Duration(d) * time.Second 41 + } 42 + if t, err := http.ParseTime(v); err == nil { 43 + return time.Until(t) 44 + } 45 + return 0 46 + } 47 + 48 + func SleepWithContext(ctx context.Context, d time.Duration) error { 49 + timer := time.NewTimer(d) 50 + defer timer.Stop() 51 + select { 52 + case <-ctx.Done(): 53 + return ctx.Err() 54 + case <-timer.C: 55 + return nil 56 + } 57 + } 58 + 59 + type StatusError struct { 60 + StatusCode int 61 + RetryAfter time.Duration 62 + } 63 + 64 + func (e *StatusError) Error() string { 65 + return fmt.Sprintf("HTTP %d", e.StatusCode) 66 + } 67 + 68 + func IsRetryable(statusCode int) bool { 69 + return statusCode == http.StatusTooManyRequests || statusCode >= 500 70 + }
+41 -8
internal/scraper/scraper.go
··· 3 3 import ( 4 4 "bytes" 5 5 "context" 6 + "errors" 6 7 "fmt" 7 8 "io" 8 9 "log/slog" 9 10 "net/http" 10 11 "strings" 11 12 "time" 13 + 14 + "pkg.rbrt.fr/glean/internal/httpclient" 12 15 13 16 "golang.org/x/net/html" 14 17 ) ··· 22 25 func New(logger *slog.Logger) *Scraper { 23 26 return &Scraper{ 24 27 client: &http.Client{ 25 - Timeout: 15 * time.Second, 28 + Timeout: 15 * time.Second, 29 + Transport: httpclient.NewTransport(), 26 30 CheckRedirect: func(req *http.Request, via []*http.Request) error { 27 31 if len(via) >= 10 { 28 32 return fmt.Errorf("too many redirects") ··· 66 70 } 67 71 68 72 func (s *Scraper) fetch(ctx context.Context, url string) (io.Reader, error) { 73 + reader, err := s.doFetch(ctx, url) 74 + if err == nil { 75 + return reader, nil 76 + } 77 + 78 + var se *httpclient.StatusError 79 + if !errors.As(err, &se) || !httpclient.IsRetryable(se.StatusCode) { 80 + return nil, err 81 + } 82 + 83 + backoff := 2 * time.Second 84 + if se.RetryAfter > 0 { 85 + backoff = min(se.RetryAfter, 5*time.Second) 86 + } 87 + 88 + if err := httpclient.SleepWithContext(ctx, backoff); err != nil { 89 + return nil, err 90 + } 91 + 92 + return s.doFetch(ctx, url) 93 + } 94 + 95 + func (s *Scraper) doFetch(ctx context.Context, url string) (io.Reader, error) { 69 96 req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) 70 97 if err != nil { 71 98 return nil, err 72 99 } 73 - req.Header.Set("User-Agent", "Glean/1.0 (RSS Reader)") 100 + 101 + httpclient.SetDefaultHeaders(req) 102 + req.Header.Set("Accept", httpclient.AcceptHTML) 74 103 75 104 resp, err := s.client.Do(req) 76 105 if err != nil { ··· 78 107 } 79 108 defer resp.Body.Close() 80 109 81 - if resp.StatusCode != http.StatusOK { 82 - return nil, fmt.Errorf("unexpected status: %d", resp.StatusCode) 110 + if resp.StatusCode == http.StatusOK { 111 + data, err := io.ReadAll(io.LimitReader(resp.Body, 2*1024*1024)) 112 + if err != nil { 113 + return nil, fmt.Errorf("reading body: %w", err) 114 + } 115 + return bytes.NewReader(data), nil 83 116 } 84 117 85 - data, err := io.ReadAll(io.LimitReader(resp.Body, 2*1024*1024)) 86 - if err != nil { 87 - return nil, fmt.Errorf("reading body: %w", err) 118 + se := &httpclient.StatusError{StatusCode: resp.StatusCode} 119 + if resp.StatusCode == http.StatusTooManyRequests { 120 + se.RetryAfter = httpclient.ParseRetryAfter(resp.Header.Get("Retry-After")) 88 121 } 89 - return bytes.NewReader(data), nil 122 + return nil, se 90 123 } 91 124 92 125 func extractContent(r io.Reader) (string, error) {