A social RSS reader built on the AT Protocol. glean.at
glean atproto atmosphere rss feed social app
14
fork

Configure Feed

Select the types of activity you want to include in your feed.

Remove feed caching via ETag and Last-Modified headers

+42 -111
+1 -36
docs/specs.md
··· 292 292 293 293 ### 4.1 Feed Fetching 294 294 295 - A background scheduler polls subscribed feeds on a fixed 5-minute tick. Feeds are fetched at most once per cycle regardless of how many users share them. 296 - 297 - ``` 298 - ┌─────────────────────────┐ 299 - │ Feed Scheduler │ 300 - │ (background goroutine) │ 301 - └────────┬────────────────┘ 302 - │ every 5 min 303 - ┌────────▼────────────────┐ 304 - │ Feed Fetcher │ 305 - │ │ 306 - │ 1. SELECT feeds where │ 307 - │ subscriber_count > 0 │ 308 - │ AND not fetched in │ 309 - │ last 30 min │ 310 - │ 2. Dedup in-flight: │ 311 - │ skip if already │ 312 - │ being fetched │ 313 - │ 3. Respect ETag/If-None│ 314 - │ Match / Last-Modified│ 315 - │ 4. GET feed URL │ 316 - │ 5. Parse XML/JSON │ 317 - │ 6. Upsert articles │ 318 - │ 7. Update feed metadata│ 319 - └────────┬────────────────┘ 320 - 321 - ┌──────────────┼──────────────┐ 322 - │ │ │ 323 - ┌────▼────┐ ┌─────▼─────┐ ┌─────▼─────┐ ┌─────▼─────┐ 324 - │RSS/XML │ │RSS1/RDF │ │Atom/XML │ │JSON Feed │ 325 - │Parser │ │Parser │ │Parser │ │Parser │ 326 - └─────────┘ └───────────┘ └───────────┘ └───────────┘ 327 - ``` 295 + A background scheduler polls subscribed feeds on a configurable tick. Feeds are fetched at most once per cycle regardless of how many users share them. 328 296 329 297 ### 4.2 Fetch Schedule 330 298 ··· 334 302 - **Staleness threshold**: Feeds not fetched in the last 30 minutes are eligible 335 303 - **Subscriber filter**: Only feeds with `subscriber_count > 0` are fetched 336 304 - **In-flight dedup**: If a feed is already being fetched (e.g., manual refresh and background scheduler overlap), the second caller waits for the first to complete rather than fetching again 337 - - **HTTP cache**: Honor `ETag` and `Last-Modified` headers to skip parsing when nothing changed (304 Not Modified) 338 305 - **Error tracking**: `error_count` increments on failure, resets to 0 on success. Feeds with high error counts are surfaced as "dead feeds" to the user. 339 306 340 307 ```sql ··· 534 501 last_fetched_at DATETIME, 535 502 last_error TEXT, 536 503 subscriber_count INTEGER NOT NULL DEFAULT 0, 537 - etag TEXT, 538 - last_modified TEXT, 539 504 consecutive_empty_fetches INTEGER NOT NULL DEFAULT 0, 540 505 error_count INTEGER NOT NULL DEFAULT 0, 541 506 favicon_url TEXT
+1 -2
internal/atproto/stream_handler.go
··· 75 75 return err 76 76 77 77 case actionDelete: 78 - parsed, ok := ParseRecordURI(event.URI) 78 + _, ok := ParseRecordURI(event.URI) 79 79 if !ok { 80 80 return nil 81 81 } ··· 83 83 if err == nil && sub != nil { 84 84 return h.articles.DeleteSubscription(ctx, event.DID, sub.FeedURL) 85 85 } 86 - _ = parsed 87 86 } 88 87 return nil 89 88 }
-2
internal/db/db.go
··· 173 173 last_fetched_at DATETIME, 174 174 last_error TEXT, 175 175 subscriber_count INTEGER NOT NULL DEFAULT 0, 176 - etag TEXT, 177 - last_modified TEXT, 178 176 consecutive_empty_fetches INTEGER NOT NULL DEFAULT 0, 179 177 error_count INTEGER NOT NULL DEFAULT 0, 180 178 favicon_url TEXT
+11 -18
internal/db/feed.go
··· 21 21 LastFetchedAt sql.NullTime 22 22 LastError sql.NullString 23 23 SubscriberCount int 24 - Etag sql.NullString 25 - LastModified sql.NullString 26 24 ConsecutiveEmptyFetches int 27 25 ErrorCount int 28 26 FaviconURL sql.NullString ··· 30 28 31 29 func (f *Feed) ToFeed() *feed.Feed { 32 30 return &feed.Feed{ 33 - URL: f.FeedURL, 34 - Title: f.Title.String, 35 - SiteURL: f.SiteURL.String, 36 - Description: f.Description.String, 37 - Type: f.FeedType.String, 38 - FaviconURL: f.FaviconURL.String, 39 - ETag: f.Etag.String, 40 - LastModified: f.LastModified.String, 31 + URL: f.FeedURL, 32 + Title: f.Title.String, 33 + SiteURL: f.SiteURL.String, 34 + Description: f.Description.String, 35 + Type: f.FeedType.String, 36 + FaviconURL: f.FaviconURL.String, 41 37 } 42 38 } 43 39 ··· 54 50 FaviconURL sql.NullString 55 51 } 56 52 57 - 58 53 func scanFeed(scanner interface{ Scan(...any) error }) (*Feed, error) { 59 54 f := &Feed{} 60 55 if err := scanner.Scan(&f.FeedURL, &f.Title, &f.SiteURL, &f.Description, &f.FeedType, 61 - &f.LastFetchedAt, &f.LastError, &f.SubscriberCount, &f.Etag, &f.LastModified, 56 + &f.LastFetchedAt, &f.LastError, &f.SubscriberCount, 62 57 &f.ConsecutiveEmptyFetches, &f.ErrorCount, &f.FaviconURL); err != nil { 63 58 return nil, err 64 59 } ··· 95 90 return feeds, rows.Err() 96 91 } 97 92 98 - func (s *ArticleStore) MarkFeedFetched(ctx context.Context, feedURL, etag, lastModified string) error { 93 + func (s *ArticleStore) MarkFeedFetched(ctx context.Context, feedURL string) error { 99 94 _, err := s.db.ExecContext(ctx, ` 100 95 UPDATE articles.feeds SET 101 - etag = ?, 102 - last_modified = ?, 103 96 error_count = 0, 104 97 last_error = '', 105 98 last_fetched_at = CURRENT_TIMESTAMP 106 99 WHERE feed_url = ? 107 - `, etag, lastModified, feedURL) 100 + `, feedURL) 108 101 return err 109 102 } 110 103 ··· 226 219 sub := &Subscription{} 227 220 err := s.db.QueryRowContext(ctx, ` 228 221 SELECT s.id, s.user_did, s.feed_url, COALESCE(s.title, f.title, ''), s.category, s.added_at, 229 - s.uri, s.cid 222 + s.uri, s.cid, f.favicon_url 230 223 FROM articles.subscriptions s 231 224 LEFT JOIN articles.feeds f ON s.feed_url = f.feed_url 232 225 WHERE s.user_did = ? AND s.feed_url = ? 233 - `, userDID, feedURL).Scan(&sub.ID, &sub.UserDID, &sub.FeedURL, &sub.FeedTitle, &sub.Category, &sub.AddedAt, &sub.URI, &sub.CID) 226 + `, userDID, feedURL).Scan(&sub.ID, &sub.UserDID, &sub.FeedURL, &sub.FeedTitle, &sub.Category, &sub.AddedAt, &sub.URI, &sub.CID, &sub.FaviconURL) 234 227 if err != nil { 235 228 return nil, err 236 229 }
+2 -2
internal/db/store.go
··· 31 31 return a.store.MarkFeedFetchError(ctx, feedURL, lastError) 32 32 } 33 33 34 - func (a *FeedStoreAdapter) StoreFetchResult(ctx context.Context, feedURL, etag, lastModified string, articles []feed.Article, faviconURL string) error { 35 - if err := a.store.MarkFeedFetched(ctx, feedURL, etag, lastModified); err != nil { 34 + func (a *FeedStoreAdapter) StoreFetchResult(ctx context.Context, feedURL string, articles []feed.Article, faviconURL string) error { 35 + if err := a.store.MarkFeedFetched(ctx, feedURL); err != nil { 36 36 return err 37 37 } 38 38 if len(articles) > 0 {
+19 -41
internal/feed/fetcher.go
··· 37 37 } 38 38 } 39 39 40 - func (f *Fetcher) Fetch(ctx context.Context, feedURL, etag, lastModified string) (*ParseResult, string, string, error) { 40 + func (f *Fetcher) Fetch(ctx context.Context, feedURL string) (*ParseResult, error) { 41 41 var lastResp *http.Response 42 42 var lastErr error 43 43 ··· 45 45 if attempt > 0 { 46 46 backoff := retryBackoff(attempt, lastResp) 47 47 if err := httpclient.SleepWithContext(ctx, backoff); err != nil { 48 - return nil, "", "", err 48 + return nil, err 49 49 } 50 50 } 51 51 52 - result, newEtag, newLastModified, resp, err := f.executeRequest(ctx, feedURL, etag, lastModified) 52 + result, resp, err := f.executeRequest(ctx, feedURL) 53 53 lastResp = resp 54 54 if err == nil { 55 - return result, newEtag, newLastModified, nil 55 + return result, nil 56 56 } 57 57 58 58 if resp != nil && !httpclient.IsRetryable(resp.StatusCode) { 59 - return nil, "", "", err 59 + return nil, err 60 60 } 61 61 62 62 lastErr = err 63 63 } 64 64 65 - return nil, "", "", lastErr 65 + return nil, lastErr 66 66 } 67 67 68 - func (f *Fetcher) executeRequest(ctx context.Context, feedURL, etag, lastModified string) (*ParseResult, string, string, *http.Response, error) { 68 + func (f *Fetcher) executeRequest(ctx context.Context, feedURL string) (*ParseResult, *http.Response, error) { 69 69 req, err := http.NewRequestWithContext(ctx, http.MethodGet, feedURL, nil) 70 70 if err != nil { 71 - return nil, "", "", nil, fmt.Errorf("creating request: %w", err) 71 + return nil, nil, fmt.Errorf("creating request: %w", err) 72 72 } 73 73 74 74 httpclient.SetDefaultHeaders(req) 75 75 req.Header.Set("Accept", httpclient.AcceptFeed) 76 - 77 - if etag != "" { 78 - req.Header.Set("If-None-Match", etag) 79 - } 80 - if lastModified != "" { 81 - req.Header.Set("If-Modified-Since", lastModified) 82 - } 83 76 84 77 resp, err := f.httpClient.Do(req) 85 78 if err != nil { 86 - return nil, "", "", nil, fmt.Errorf("fetching feed: %w", err) 79 + return nil, nil, fmt.Errorf("fetching feed: %w", err) 87 80 } 88 81 defer resp.Body.Close() 89 82 90 - if resp.StatusCode == http.StatusNotModified { 91 - newEtag := resp.Header.Get("ETag") 92 - if newEtag == "" { 93 - newEtag = etag 94 - } 95 - newLastModified := resp.Header.Get("Last-Modified") 96 - if newLastModified == "" { 97 - newLastModified = lastModified 98 - } 99 - return nil, newEtag, newLastModified, resp, nil 100 - } 101 - 102 83 if resp.StatusCode == http.StatusTooManyRequests { 103 - return nil, "", "", resp, fmt.Errorf("rate limited (retry-after: %s)", resp.Header.Get("Retry-After")) 84 + return nil, resp, fmt.Errorf("rate limited (retry-after: %s)", resp.Header.Get("Retry-After")) 104 85 } 105 86 106 87 if resp.StatusCode >= 500 { 107 - return nil, "", "", resp, fmt.Errorf("server error: %d", resp.StatusCode) 88 + return nil, resp, fmt.Errorf("server error: %d", resp.StatusCode) 108 89 } 109 90 110 91 if resp.StatusCode < 200 || resp.StatusCode >= 300 { 111 - return nil, "", "", resp, fmt.Errorf("unexpected status: %d", resp.StatusCode) 92 + return nil, resp, fmt.Errorf("unexpected status: %d", resp.StatusCode) 112 93 } 113 94 114 - newEtag := resp.Header.Get("ETag") 115 - newLastModified := resp.Header.Get("Last-Modified") 116 - 117 95 result, err := Parse(resp.Body, feedURL) 118 96 if err != nil { 119 - return nil, "", "", nil, fmt.Errorf("parsing feed: %w", err) 97 + return nil, nil, fmt.Errorf("parsing feed: %w", err) 120 98 } 121 99 122 - return result, newEtag, newLastModified, resp, nil 100 + return result, resp, nil 123 101 } 124 102 125 103 func retryBackoff(attempt int, lastResp *http.Response) time.Duration { ··· 135 113 136 114 type FeedStore interface { 137 115 GetFeedsToFetch(ctx context.Context, olderThan time.Duration, limit int) ([]*Feed, error) 138 - StoreFetchResult(ctx context.Context, feedURL, etag, lastModified string, articles []Article, faviconURL string) error 116 + StoreFetchResult(ctx context.Context, feedURL string, articles []Article, faviconURL string) error 139 117 RecordFetchError(ctx context.Context, feedURL, lastError string) error 140 118 } 141 119 ··· 165 143 166 144 func (s *Scheduler) Run(ctx context.Context) error { 167 145 s.logger.Info("starting initial feed refresh") 168 - s.fetchAll(ctx, 0) 146 + s.fetchAll(ctx, s.staleInterval) 169 147 170 148 ticker := time.NewTicker(s.tickInterval) 171 149 defer ticker.Stop() ··· 215 193 }() 216 194 217 195 start := time.Now() 218 - result, newEtag, newLastModified, err := s.fetcher.Fetch(ctx, feed.URL, feed.ETag, feed.LastModified) 196 + result, err := s.fetcher.Fetch(ctx, feed.URL) 219 197 metrics.FeedsFetchedDuration.Observe(time.Since(start).Seconds()) 220 198 metrics.FeedsFetched.Inc() 221 199 metrics.FeedsFetchedLast.Set(float64(time.Now().Unix())) ··· 227 205 228 206 if result == nil { 229 207 s.logger.Info("fetched articles", "feed", feed.URL, "count", 0) 230 - if err := s.store.StoreFetchResult(ctx, feed.URL, newEtag, newLastModified, nil, ""); err != nil { 208 + if err := s.store.StoreFetchResult(ctx, feed.URL, nil, ""); err != nil { 231 209 s.logger.Error("failed to store feed fetch result", "error", err, "feed", feed.URL) 232 210 } 233 211 return ··· 238 216 faviconURL = ResolveFavicon(context.Background(), feed.URL, feed.SiteURL) 239 217 } 240 218 241 - if err := s.store.StoreFetchResult(ctx, feed.URL, newEtag, newLastModified, result.Articles, faviconURL); err != nil { 219 + if err := s.store.StoreFetchResult(ctx, feed.URL, result.Articles, faviconURL); err != nil { 242 220 s.logger.Error("failed to store feed fetch result", "error", err, "feed", feed.URL) 243 221 } else { 244 222 articleCount := len(result.Articles)
+6 -8
internal/feed/parser.go
··· 13 13 ) 14 14 15 15 type Feed struct { 16 - URL string 17 - Title string 18 - SiteURL string 19 - Description string 20 - Type string 21 - FaviconURL string 22 - ETag string 23 - LastModified string 16 + URL string 17 + Title string 18 + SiteURL string 19 + Description string 20 + Type string 21 + FaviconURL string 24 22 } 25 23 26 24 type Article struct {
+2 -2
internal/server/feeds_handler.go
··· 93 93 return 94 94 } 95 95 96 - result, _, _, err := s.fetcher.Fetch(r.Context(), feedURL, "", "") 96 + result, err := s.fetcher.Fetch(r.Context(), feedURL) 97 97 if err != nil { 98 98 result, feedURL, err = s.discoverFeed(r.Context(), feedURL) 99 99 } ··· 463 463 } 464 464 465 465 for _, candidate := range discovered.FeedURLs { 466 - result, _, _, fetchErr := s.fetcher.Fetch(ctx, candidate, "", "") 466 + result, fetchErr := s.fetcher.Fetch(ctx, candidate) 467 467 if fetchErr == nil && result != nil { 468 468 return result, candidate, nil 469 469 }