feat: rework cache warming · dunkirk.sh/hn-alerts@16b0541

+31 -23

src/index.ts

··· 5 5 import { version, name } from "../package.json"; 6 6 import { preloadCaches, invalidateAndRefreshCaches } from "./libs/cacheWarming"; 7 7 import { 8 - QueryCache, 9 8 queryCache, 10 9 compressResponse, 11 10 createCachedEndpoint, 11 + createCacheHeaders, 12 12 } from "./libs/cache"; 13 13 import { handleCORS } from "./libs/cors"; 14 14 import root from "../public/index.html"; ··· 99 99 }, 100 100 where: (stories, { eq }) => eq(stories.isOnLeaderboard, true), 101 101 orderBy: (stories, { asc }) => [asc(stories.position)], 102 - limit: 30, // Reduced from 100 to 30 for better performance 102 + limit: 30, 103 103 }); 104 104 105 105 // Pre-calculate the time multiplier to optimize date transformations ··· 202 202 } 203 203 204 204 // Create a cached endpoint handler dynamically based on the story ID 205 - const handler = createCachedEndpoint( 206 - `story_snapshots_${storyId}`, 207 - async () => { 208 - // Get snapshots for the story 209 - const snapshots = await db.query.leaderboardSnapshots.findMany({ 210 - where: (snapshots, { eq }) => eq(snapshots.storyId, storyId), 211 - orderBy: (snapshots, { asc }) => [asc(snapshots.timestamp)], 212 - }); 213 - 214 - // Transform snapshot data for frontend 215 - return snapshots.map((snapshot) => ({ 216 - timestamp: snapshot.timestamp, 217 - position: snapshot.position, 218 - score: snapshot.score, 219 - date: new Date(snapshot.timestamp * 1000).toISOString(), 220 - })); 221 - }, 222 - 3600, // Cache story snapshots for 1 hour as they change less frequently 223 - ); 205 + const cacheKey = `story_snapshots_${storyId}`; 206 + const queryFn = async () => { 207 + // Get snapshots for the story 208 + const snapshots = await db.query.leaderboardSnapshots.findMany({ 209 + where: (snapshots, { eq }) => eq(snapshots.storyId, storyId), 210 + orderBy: (snapshots, { asc }) => [asc(snapshots.timestamp)], 211 + }); 224 212 225 - // Execute the cached handler 226 - return handler(req); 213 + // Transform snapshot data for frontend 214 + return snapshots.map((snapshot) => ({ 215 + timestamp: snapshot.timestamp, 216 + position: snapshot.position, 217 + score: snapshot.score, 218 + date: new Date(snapshot.timestamp * 1000).toISOString(), 219 + })); 220 + }; 221 + 222 + // Register this dynamic query for potential cache warming 223 + queryCache.register(cacheKey, queryFn, 3600); 224 + 225 + // Execute the query with caching 226 + const data = await queryCache.get(cacheKey, queryFn, 3600); 227 + 228 + // Return formatted response 229 + const response = new Response(JSON.stringify(data), { 230 + headers: createCacheHeaders(cacheKey, 3600), 231 + }); 232 + 233 + return compressResponse(req, response); 227 234 } catch (error) { 228 235 console.error("Failed to fetch snapshots for story:", error); 236 + Sentry.captureException(error); 229 237 return new Response( 230 238 JSON.stringify({ error: "Failed to fetch snapshots" }), 231 239 {

+89 -57

src/libs/cache.ts

··· 1 1 import { version } from "../../package.json"; 2 + import * as Sentry from "@sentry/bun"; 2 3 3 4 /** 4 5 * Creates consistent cache headers with stable ETags ··· 81 82 expiresAt: number; 82 83 }; 83 84 85 + // Type for registered query functions 86 + export type QueryFunction<T> = () => Promise<T>; 87 + 84 88 export class QueryCache { 85 89 private cache: Map<string, CacheItem<unknown>> = new Map(); 86 90 private defaultTTL: number = 60 * 5; // 5 minutes in seconds 87 91 private prefetchQueue: Set<string> = new Set(); 88 92 private maxItems = 500; // Maximum cache entries 89 - private highLoadMode = false; // Track high load mode 90 - private highLoadThreshold = 200; // Request threshold for high load 91 93 private requestCounter = 0; // Counter for recent requests 92 94 private lastCounterReset: number = Date.now(); // Last time counter was reset 93 95 96 + // Registry to store query functions for reuse during cache warming 97 + private queryRegistry: Map< 98 + string, 99 + { fn: QueryFunction<unknown>; ttl: number } 100 + > = new Map(); 101 + 94 102 constructor(defaultTTL?: number, maxItems?: number) { 95 103 if (defaultTTL) { 96 104 this.defaultTTL = defaultTTL; ··· 102 110 `Initialized query cache with ${this.defaultTTL}s TTL and max ${this.maxItems} items`, 103 111 ); 104 112 105 - // Set up periodic counter reset for load detection 113 + // Set up periodic counter reset for monitoring 106 114 setInterval(() => { 107 - this.highLoadMode = this.requestCounter > this.highLoadThreshold; 108 115 this.requestCounter = 0; 109 116 this.lastCounterReset = Date.now(); 110 117 }, 10000); // Reset every 10 seconds 111 118 } 112 119 120 + /** 121 + * Register a query function for later use in cache warming 122 + * @param key Cache key 123 + * @param queryFn Function that performs the actual query 124 + * @param ttl Cache TTL in seconds 125 + */ 126 + register<T>( 127 + key: string, 128 + queryFn: QueryFunction<T>, 129 + ttl: number = this.defaultTTL, 130 + ): void { 131 + this.queryRegistry.set(key, { fn: queryFn as QueryFunction<unknown>, ttl }); 132 + console.log(`Registered query function for key: ${key} with TTL: ${ttl}s`); 133 + } 134 + 135 + /** 136 + * Get all registered cache keys 137 + * @returns Array of registered cache keys 138 + */ 139 + getRegisteredKeys(): string[] { 140 + return Array.from(this.queryRegistry.keys()); 141 + } 142 + 143 + /** 144 + * Get data from cache or execute the query function 145 + * @param key Cache key 146 + * @param queryFn Function that performs the actual query 147 + * @param ttl Cache TTL in seconds 148 + * @returns Query result 149 + */ 113 150 async get<T>( 114 151 key: string, 115 - queryFn: () => Promise<T>, 152 + queryFn: QueryFunction<T>, 116 153 ttl: number = this.defaultTTL, 117 154 ): Promise<T> { 118 155 // Track request load ··· 123 160 124 161 // Return cached value if it exists and is not expired 125 162 if (cached && cached.expiresAt > now) { 126 - // Reduce logging in high load scenarios 127 - if (!this.highLoadMode) { 128 - console.log( 129 - `Cache hit for ${key} (expires in ${cached.expiresAt - now}s)`, 130 - ); 131 - } 163 + console.log( 164 + `Cache hit for ${key} (expires in ${cached.expiresAt - now}s)`, 165 + ); 132 166 133 167 // Prefetch if approaching expiration (last 10% of TTL) 134 - // Don't prefetch during high load to reduce DB pressure 135 - if ( 136 - !this.highLoadMode && 137 - cached.expiresAt - now < ttl * 0.1 && 138 - !this.prefetchQueue.has(key) 139 - ) { 168 + if (cached.expiresAt - now < ttl * 0.1 && !this.prefetchQueue.has(key)) { 140 169 this.prefetch(key, queryFn, ttl); 141 170 } 142 171 ··· 144 173 } 145 174 146 175 // Execute the query 147 - if (!this.highLoadMode) { 148 - console.log(`Cache miss for ${key}, fetching from database...`); 149 - } 176 + console.log(`Cache miss for ${key}, fetching from database...`); 150 177 const data = await queryFn(); 151 178 152 179 // Cache the result ··· 165 192 // Background prefetch to refresh cache before expiration 166 193 private prefetch<T>( 167 194 key: string, 168 - queryFn: () => Promise<T>, 195 + queryFn: QueryFunction<T>, 169 196 ttl: number, 170 197 ): void { 171 198 this.prefetchQueue.add(key); ··· 186 213 console.log(`Successfully prefetched ${key}`); 187 214 } catch (error) { 188 215 console.error(`Error prefetching ${key}:`, error); 216 + Sentry.captureException(error); 189 217 } finally { 190 218 this.prefetchQueue.delete(key); 191 219 } 192 220 }, 0); 193 221 } 194 222 223 + /** 224 + * Warm a specific cache entry using its registered query function 225 + * @param key Cache key to warm 226 + * @returns Promise resolving to the cached data or null if key not registered 227 + */ 228 + async warmCache<T>(key: string): Promise<T | null> { 229 + const registration = this.queryRegistry.get(key); 230 + if (!registration) { 231 + console.warn( 232 + `Cannot warm cache for ${key}: No registered query function`, 233 + ); 234 + return null; 235 + } 236 + 237 + try { 238 + console.log(`Warming cache for ${key} using registered function`); 239 + const data = await this.get( 240 + key, 241 + registration.fn as QueryFunction<T>, 242 + registration.ttl, 243 + ); 244 + return data; 245 + } catch (error) { 246 + console.error(`Error warming cache for ${key}:`, error); 247 + Sentry.captureException(error); 248 + return null; 249 + } 250 + } 251 + 195 252 invalidate(key: string): void { 196 253 if (this.cache.has(key)) { 197 254 console.log(`Invalidating cache for ${key}`); ··· 224 281 } 225 282 } 226 283 227 - if (!this.highLoadMode) { 228 - console.log(`Pruned ${removeCount} oldest items from cache`); 229 - } 284 + console.log(`Pruned ${removeCount} oldest items from cache`); 230 285 } 231 286 232 287 // Get cache stats for monitoring 233 288 getStats(): { 234 289 size: number; 235 290 keys: string[]; 236 - highLoad: boolean; 291 + registeredKeys: string[]; 237 292 requestRate: number; 238 293 } { 239 294 const elapsedSeconds = (Date.now() - this.lastCounterReset) / 1000; ··· 243 298 return { 244 299 size: this.cache.size, 245 300 keys: Array.from(this.cache.keys()), 246 - highLoad: this.highLoadMode, 301 + registeredKeys: Array.from(this.queryRegistry.keys()), 247 302 requestRate: Math.round(requestRate * 100) / 100, 248 303 }; 249 304 } ··· 258 313 */ 259 314 export function createCachedEndpoint<T>( 260 315 cacheKey: string, 261 - queryFn: (() => Promise<T>) & { highLoad?: () => Promise<T> }, 316 + queryFn: () => Promise<T>, 262 317 ttl = 300, 263 318 ) { 319 + // Register the query function for later use in cache warming 320 + queryCache.register(cacheKey, queryFn, ttl); 321 + 264 322 return async (request: Request) => { 265 323 try { 266 - // Check for high load indicators in headers 267 - const isHighLoad = 268 - queryCache.getStats().highLoad || 269 - request.headers.get("x-high-load") === "true"; 270 - 271 - // Use a different cache key under high load if needed 272 - const effectiveCacheKey = isHighLoad ? `${cacheKey}_lite` : cacheKey; 273 - 274 - // Execute optimized query function during high load, or regular one otherwise 275 - const effectiveQueryFn = 276 - isHighLoad && queryFn.highLoad !== undefined 277 - ? queryFn.highLoad 278 - : queryFn; 279 - 280 324 // Get data from cache or execute query 281 - const data = await queryCache.get( 282 - effectiveCacheKey, 283 - effectiveQueryFn, 284 - ttl, 285 - ); 325 + const data = await queryCache.get(cacheKey, queryFn, ttl); 286 326 287 327 // Create response with proper caching headers 288 328 const response = new Response(JSON.stringify(data), { 289 - headers: { 290 - ...createCacheHeaders(effectiveCacheKey, ttl), 291 - "X-High-Load": isHighLoad ? "true" : "false", 292 - }, 329 + headers: createCacheHeaders(cacheKey, ttl), 293 330 }); 294 331 295 332 // Apply compression and return ··· 298 335 // Log the error with context 299 336 console.error(`Error in endpoint ${cacheKey}:`, error); 300 337 301 - // Capture with Sentry if available 302 - if (typeof Sentry !== "undefined" && Sentry.captureException) { 303 - Sentry.captureException(error); 304 - } 338 + // Capture with Sentry 339 + Sentry.captureException(error); 305 340 306 341 // Return consistent error response 307 342 return new Response( ··· 320 355 321 356 // Create a global cache instance 322 357 export const queryCache = new QueryCache(); 323 - 324 - // Import Sentry for error reporting 325 - import * as Sentry from "@sentry/bun";

+39 -154

src/libs/cacheWarming.ts

··· 1 1 import * as Sentry from "@sentry/bun"; 2 2 import { db } from "./db"; 3 - import { count } from "drizzle-orm"; 4 - import { stories, users } from "./schema"; 5 3 import { queryCache } from "./cache"; 6 4 7 5 /** 8 - * Proactively warms the cache by loading commonly accessed data 6 + * Proactively warms the cache by loading commonly accessed data using registered query functions 9 7 * Call this after cron jobs update the database or at server startup 10 8 */ 11 9 export async function preloadCaches(): Promise<void> { 12 10 console.log("Preloading all caches for optimal performance..."); 13 - 11 + 14 12 try { 15 - // Load critical caches sequentially to avoid database contention 16 - 17 - // 1. Leaderboard stories (most frequently accessed) 18 - console.log("Preloading leaderboard stories cache..."); 19 - await queryCache.get('leaderboard_stories', async () => { 20 - // Only select the specific columns we need for better performance 21 - const storyAlerts = await db.query.stories.findMany({ 22 - columns: { 23 - id: true, 24 - title: true, 25 - url: true, 26 - position: true, 27 - peakPosition: true, 28 - score: true, 29 - peakScore: true, 30 - descendants: true, 31 - enteredLeaderboardAt: true, 32 - firstSeenAt: true, 33 - by: true, 34 - isFromMonitoredUser: true, 35 - }, 36 - where: (stories, { eq }) => eq(stories.isOnLeaderboard, true), 37 - orderBy: (stories, { asc }) => [asc(stories.position)], 38 - limit: 30, // Reduced from 100 to 30 for better performance 39 - }); 40 - 41 - // Pre-calculate the time multiplier to optimize date transformations 42 - const timeMultiplier = 1000; 43 - 44 - // Transform for frontend 45 - return storyAlerts.map((story) => { 46 - // Calculate timestamp only once per story 47 - const timestamp = story.enteredLeaderboardAt 48 - ? new Date(story.enteredLeaderboardAt * timeMultiplier).toISOString() 49 - : new Date(story.firstSeenAt * timeMultiplier).toISOString(); 50 - 51 - return { 52 - id: story.id, 53 - title: story.title, 54 - url: story.url || `https://news.ycombinator.com/item?id=${story.id}`, 55 - rank: story.position, 56 - peakRank: story.peakPosition, 57 - points: story.score, 58 - peakPoints: story.peakScore, 59 - comments: story.descendants, 60 - timestamp, 61 - by: story.by, 62 - isFromMonitoredUser: story.isFromMonitoredUser, 63 - }; 64 - }); 65 - }); 66 - 67 - // 1.1 Leaderboard stories lite version for high load scenarios 68 - console.log("Preloading leaderboard stories lite cache..."); 69 - await queryCache.get('leaderboard_stories_lite', async () => { 70 - // Even more optimized for high load - fewer fields, fewer records 71 - const storyAlerts = await db.query.stories.findMany({ 72 - columns: { 73 - id: true, 74 - title: true, 75 - url: true, 76 - position: true, 77 - score: true, 78 - descendants: true, 79 - by: true, 80 - isFromMonitoredUser: true, 81 - }, 82 - where: (stories, { eq }) => eq(stories.isOnLeaderboard, true), 83 - orderBy: (stories, { asc }) => [asc(stories.position)], 84 - limit: 20, // Even fewer for extreme load scenarios 85 - }); 86 - 87 - const timeMultiplier = 1000; 88 - 89 - return storyAlerts.map((story) => ({ 90 - id: story.id, 91 - title: story.title, 92 - url: story.url || `https://news.ycombinator.com/item?id=${story.id}`, 93 - rank: story.position, 94 - points: story.score, 95 - comments: story.descendants, 96 - by: story.by, 97 - isFromMonitoredUser: story.isFromMonitoredUser, 98 - })); 99 - }); 100 - 101 - // 2. Total stories count 102 - console.log("Preloading story count cache..."); 103 - await queryCache.get('total_stories_count', async () => { 104 - const result = await db.select({ count: count() }).from(stories); 105 - return Number(result[0]?.count); 106 - }); 107 - 108 - // 3. Verified users stats 109 - console.log("Preloading verified users stats cache..."); 110 - await queryCache.get('verified_users_stats', async () => { 111 - // Get stats for verified user stories 112 - const verifiedStories = await db.query.stories.findMany({ 113 - where: (stories, { eq }) => eq(stories.isFromMonitoredUser, true), 114 - }); 115 - 116 - // Get count of verified users in the system 117 - const verifiedUsersCount = await db.query.users 118 - .findMany({ 119 - where: (users, { eq }) => eq(users.verified, true), 120 - }) 121 - .then((users) => users.length); 13 + // Get all registered cache keys 14 + const registeredKeys = queryCache.getRegisteredKeys(); 15 + 16 + if (registeredKeys.length === 0) { 17 + console.warn("No registered cache keys found. Cache warming skipped."); 18 + return; 19 + } 20 + 21 + console.log(`Found ${registeredKeys.length} registered cache keys to warm`); 122 22 123 - // Count stories on front page (rank <= 30) 124 - const frontPageCount = verifiedStories.filter( 125 - (s) => s.isOnLeaderboard, 126 - ).length; 23 + // Prioritize the most critical endpoints first 24 + const priorityKeys = [ 25 + "leaderboard_stories", 26 + "total_stories_count", 27 + "verified_users_stats", 28 + ]; 127 29 128 - // Calculate average peak points for verified users 129 - let totalPeakPoints = 0; 130 - for (const s of verifiedStories) { 131 - if (s.peakScore) totalPeakPoints += s.peakScore; 132 - } 133 - const avgPeakPoints = verifiedStories.length 134 - ? Math.round(totalPeakPoints / verifiedStories.length) 135 - : 0; 30 + // Sort keys by priority (known critical keys first, then others) 31 + const sortedKeys = [ 32 + ...priorityKeys.filter((key) => registeredKeys.includes(key)), 33 + ...registeredKeys.filter((key) => !priorityKeys.includes(key)), 34 + ]; 136 35 137 - return { 138 - totalCount: verifiedUsersCount, 139 - frontPageCount: frontPageCount, 140 - avgPeakPoints: avgPeakPoints, 141 - }; 142 - }); 36 + // Warm each cache using its registered query function 37 + for (const key of sortedKeys) { 38 + console.log(`Warming cache for ${key}...`); 39 + await queryCache.warmCache(key); 40 + } 143 41 144 - // 4. Optional: Warm up top 3 story snapshots (preload most accessed story graphs) 145 - // This is done with lower priority as it's less critical 42 + // Preload snapshots for top stories - this requires custom handling 43 + // since these use dynamic keys (story_snapshots_{id}) 146 44 console.log("Preloading top story snapshots (limited to 3)..."); 147 - 45 + 148 46 // Get IDs of top 3 stories to warm their snapshots 149 47 const topStories = await db.query.stories.findMany({ 150 48 columns: { id: true }, // Only retrieve the ID field to minimize memory use 151 49 where: (stories, { eq }) => eq(stories.isOnLeaderboard, true), 152 50 orderBy: (stories, { asc }) => [asc(stories.position)], 153 - limit: 3, // Further reduced from 5 to 3 to minimize initial load 51 + limit: 3, 154 52 }); 155 53 156 - // Preload snapshots for these stories sequentially 54 + // Check if any dynamic story snapshot keys are registered 157 55 for (const story of topStories) { 158 - await queryCache.get(`story_snapshots_${story.id}`, async () => { 159 - // Get snapshots for the story 160 - const snapshots = await db.query.leaderboardSnapshots.findMany({ 161 - where: (snapshots, { eq }) => eq(snapshots.storyId, story.id), 162 - orderBy: (snapshots, { asc }) => [asc(snapshots.timestamp)], 163 - }); 164 - 165 - // Transform snapshot data for frontend 166 - return snapshots.map((snapshot) => ({ 167 - timestamp: snapshot.timestamp, 168 - position: snapshot.position, 169 - score: snapshot.score, 170 - date: new Date(snapshot.timestamp * 1000).toISOString(), 171 - })); 172 - }, 3600); // Cache story snapshots for 1 hour 56 + const snapshotKey = `story_snapshots_${story.id}`; 57 + await queryCache.warmCache(snapshotKey); 173 58 } 174 - 59 + 175 60 console.log("Cache preloading completed successfully"); 176 61 } catch (error) { 177 62 console.error("Error during cache preloading:", error); ··· 186 71 export function invalidateAndRefreshCaches(): void { 187 72 console.log("Invalidating all query caches and refreshing data"); 188 73 queryCache.invalidateAll(); 189 - 190 - // Immediately refill the cache 74 + 75 + // Immediately refill the cache using registered query functions 191 76 setTimeout(() => { 192 - preloadCaches().catch(err => { 77 + preloadCaches().catch((err) => { 193 78 console.error("Error during cache preloading after invalidation:", err); 194 79 Sentry.captureException(err); 195 80 }); 196 81 }, 100); // Small delay to let any pending requests complete 197 - } 82 + }

Configure Feed

Configure Feed