ai cooking
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

see if we can scrape albertsons locations (#352)

* see if we can scrape albertsons locations

* okay what do we have now

* kind of progress

* kind of close

* url map

* more chains

* revert unncessary touches

* fix up some logging

* one url map to rule them all

* remove rebuild now and bad test

* throw out rendundants

* rip out unused

* share some code and actually test it

* remove more dupe

* shared max distance

---------

Co-authored-by: paul miller <paul.miller>

authored by

Paul Miller
paul miller
and committed by
GitHub
0970ca74 e71d7915

+1483 -75
+141
cmd/albertsons/main.go
··· 1 + package main 2 + 3 + import ( 4 + "careme/internal/albertsons" 5 + "careme/internal/cache" 6 + "context" 7 + "errors" 8 + "flag" 9 + "fmt" 10 + "log" 11 + "log/slog" 12 + "net/http" 13 + "strings" 14 + "time" 15 + ) 16 + 17 + func main() { 18 + var ( 19 + brands string 20 + timeoutSec int 21 + delayMS int 22 + ) 23 + 24 + flag.StringVar(&brands, "brands", "", "comma-separated brand keys to sync (default: all configured chains)") 25 + flag.IntVar(&timeoutSec, "timeout", 20, "HTTP timeout in seconds") 26 + flag.IntVar(&delayMS, "delay-ms", 1000, "delay between store page requests in milliseconds") 27 + flag.Parse() 28 + 29 + chains, err := selectedChains(brands) 30 + if err != nil { 31 + log.Fatalf("failed to parse brands: %v", err) 32 + } 33 + 34 + cacheStore, err := cache.EnsureCache(albertsons.Container) 35 + if err != nil { 36 + log.Fatalf("failed to create cache: %v", err) 37 + } 38 + 39 + httpClient := &http.Client{Timeout: time.Duration(timeoutSec) * time.Second} 40 + ctx := context.Background() 41 + delay := time.Duration(delayMS) * time.Millisecond 42 + 43 + var synced int 44 + for _, chain := range chains { 45 + chainSynced, err := syncChainFromSitemap(ctx, cacheStore, httpClient, chain, chain.SitemapURL(), delay) 46 + if err != nil { 47 + slog.Warn("failed to sync albertsons-family chain", "brand", chain.Brand, "domain", chain.Domain, "error", err) 48 + continue 49 + } 50 + synced += chainSynced 51 + } 52 + 53 + fmt.Printf("synced %d Albertsons-family store summaries\n", synced) 54 + } 55 + 56 + // not concurrent safe because url map is shared. Could fix that with etags or seperate maps. 57 + func syncChainFromSitemap(ctx context.Context, cacheStore cache.ListCache, httpClient *http.Client, chain albertsons.Chain, sitemapURL string, delay time.Duration) (int, error) { 58 + urlMap, err := albertsons.LoadStoreURLMap(ctx, cacheStore) 59 + if err != nil && !errors.Is(err, cache.ErrNotFound) { 60 + return 0, err 61 + } 62 + 63 + urls, err := albertsons.FetchSitemap(ctx, httpClient, sitemapURL) 64 + if err != nil { 65 + return 0, err 66 + } 67 + 68 + pages := albertsons.FilterStorePages(urls, chain) 69 + slog.Info("syncing albertsons-family chain", "brand", chain.Brand, "domain", chain.Domain, "count", len(pages)) 70 + 71 + if urlMap == nil { 72 + urlMap = make(map[string]string, len(pages)) 73 + } 74 + 75 + var synced int 76 + var updated bool 77 + for _, page := range pages { 78 + locationID := strings.TrimSpace(urlMap[page.URL]) 79 + if locationID != "" { 80 + //exists, err := cacheStore.Exists(ctx, albertsons.StoreCachePrefix+locationID) 81 + //if err == nil && exists { 82 + continue 83 + // } 84 + } 85 + slog.Info("fetching albertsons store summary", "brand", chain.Brand, "url", page.URL) 86 + summary, err := albertsons.FetchStoreSummary(ctx, httpClient, page.URL, chain) 87 + if err != nil { 88 + slog.Warn("failed to fetch albertsons store summary", "brand", chain.Brand, "url", page.URL, "error", err) 89 + continue 90 + } 91 + if err := albertsons.CacheStoreSummary(ctx, cacheStore, summary); err != nil { 92 + slog.Warn("failed to cache albertsons store summary", "brand", chain.Brand, "location_id", summary.ID, "error", err) 93 + continue 94 + } 95 + 96 + if urlMap[page.URL] != summary.ID { 97 + urlMap[page.URL] = summary.ID 98 + updated = true 99 + } 100 + synced++ 101 + time.Sleep(delay) 102 + } 103 + 104 + if updated { 105 + if err := albertsons.SaveStoreURLMap(ctx, cacheStore, urlMap); err != nil { 106 + return synced, err 107 + } 108 + } 109 + return synced, nil 110 + } 111 + 112 + func selectedChains(raw string) ([]albertsons.Chain, error) { 113 + all := albertsons.DefaultChains() 114 + if strings.TrimSpace(raw) == "" { 115 + return all, nil 116 + } 117 + 118 + allowed := make(map[string]albertsons.Chain, len(all)) 119 + for _, chain := range all { 120 + allowed[chain.Brand] = chain 121 + } 122 + 123 + selected := make([]albertsons.Chain, 0, len(all)) 124 + for _, part := range strings.Split(raw, ",") { 125 + brand := strings.TrimSpace(strings.ToLower(part)) 126 + if brand == "" { 127 + continue 128 + } 129 + 130 + chain, ok := allowed[brand] 131 + if !ok { 132 + return nil, fmt.Errorf("unknown brand %q", brand) 133 + } 134 + selected = append(selected, chain) 135 + } 136 + 137 + if len(selected) == 0 { 138 + return nil, fmt.Errorf("no brands selected") 139 + } 140 + return selected, nil 141 + }
+176
cmd/albertsons/main_test.go
··· 1 + package main 2 + 3 + import ( 4 + "careme/internal/albertsons" 5 + "careme/internal/cache" 6 + "context" 7 + "fmt" 8 + "io" 9 + "net/http" 10 + "strings" 11 + "sync/atomic" 12 + "testing" 13 + "time" 14 + ) 15 + 16 + func TestSelectedChainsDefaultsToAll(t *testing.T) { 17 + t.Parallel() 18 + 19 + chains, err := selectedChains("") 20 + if err != nil { 21 + t.Fatalf("selectedChains returned error: %v", err) 22 + } 23 + 24 + seen := make(map[string]bool, len(chains)) 25 + for _, chain := range chains { 26 + seen[chain.Brand] = true 27 + } 28 + for _, brand := range []string{"albertsons", "safeway", "starmarket", "haggen", "acmemarkets"} { 29 + if !seen[brand] { 30 + t.Fatalf("expected brand %q in selected chains", brand) 31 + } 32 + } 33 + } 34 + 35 + func TestSelectedChainsRejectsUnknownBrand(t *testing.T) { 36 + t.Parallel() 37 + 38 + if _, err := selectedChains("unknown"); err == nil { 39 + t.Fatal("expected unknown brand error") 40 + } 41 + } 42 + 43 + func TestSyncChainFromSitemapSkipsKnownURLsWithCachedSummaries(t *testing.T) { 44 + t.Parallel() 45 + 46 + cacheStore := cache.NewInMemoryCache() 47 + var pageRequests atomic.Int32 48 + baseURL := "https://local.albertsons.test" 49 + 50 + httpClient := &http.Client{ 51 + Transport: roundTripperFunc(func(req *http.Request) (*http.Response, error) { 52 + switch req.URL.String() { 53 + case baseURL + "/sitemap.xml": 54 + body := fmt.Sprintf(`<?xml version="1.0" encoding="UTF-8"?><urlset><url><loc>%s/az/lake-havasu-city/1980-mcculloch-blvd.html</loc></url></urlset>`, baseURL) 55 + return responseWithBody(http.StatusOK, body), nil 56 + case baseURL + "/az/lake-havasu-city/1980-mcculloch-blvd.html": 57 + pageRequests.Add(1) 58 + return responseWithBody(http.StatusOK, `<html></html>`), nil 59 + default: 60 + return responseWithBody(http.StatusNotFound, "not found"), nil 61 + } 62 + }), 63 + } 64 + 65 + lat := 34.4839 66 + lon := -114.3225 67 + if err := albertsons.CacheStoreSummary(context.Background(), cacheStore, &albertsons.StoreSummary{ 68 + ID: "albertsons_3204", 69 + Brand: "albertsons", 70 + Domain: "local.albertsons.com", 71 + StoreID: "3204", 72 + Name: "Albertsons 1980 Mcculloch Blvd", 73 + Address: "1980 Mcculloch Blvd", 74 + State: "AZ", 75 + ZipCode: "86403", 76 + Lat: &lat, 77 + Lon: &lon, 78 + URL: baseURL + "/az/lake-havasu-city/1980-mcculloch-blvd.html", 79 + }); err != nil { 80 + t.Fatalf("CacheStoreSummary returned error: %v", err) 81 + } 82 + if err := albertsons.SaveStoreURLMap(context.Background(), cacheStore, map[string]string{ 83 + baseURL + "/az/lake-havasu-city/1980-mcculloch-blvd.html": "albertsons_3204", 84 + }); err != nil { 85 + t.Fatalf("SaveStoreURLMap returned error: %v", err) 86 + } 87 + 88 + chain := albertsons.Chain{ 89 + Brand: "albertsons", 90 + DisplayName: "Albertsons", 91 + Domain: strings.TrimPrefix(baseURL, "https://"), 92 + IDPrefix: "albertsons_", 93 + } 94 + 95 + synced, err := syncChainFromSitemap(context.Background(), cacheStore, httpClient, chain, baseURL+"/sitemap.xml", 0*time.Millisecond) 96 + if err != nil { 97 + t.Fatalf("syncChainFromSitemap returned error: %v", err) 98 + } 99 + if synced != 0 { 100 + t.Fatalf("expected 0 synced summaries, got %d", synced) 101 + } 102 + if pageRequests.Load() != 0 { 103 + t.Fatalf("expected no page requests for cached url, got %d", pageRequests.Load()) 104 + } 105 + } 106 + 107 + func TestSyncChainFromSitemapPreservesOtherChainURLMappings(t *testing.T) { 108 + t.Parallel() 109 + 110 + cacheStore := cache.NewInMemoryCache() 111 + baseURL := "https://local.albertsons.test" 112 + 113 + httpClient := &http.Client{ 114 + Transport: roundTripperFunc(func(req *http.Request) (*http.Response, error) { 115 + switch req.URL.String() { 116 + case baseURL + "/sitemap.xml": 117 + body := fmt.Sprintf(`<?xml version="1.0" encoding="UTF-8"?><urlset><url><loc>%s/ar/texarkana/3710-state-line-ave.html</loc></url></urlset>`, baseURL) 118 + return responseWithBody(http.StatusOK, body), nil 119 + case baseURL + "/ar/texarkana/3710-state-line-ave.html": 120 + return responseWithBody(http.StatusOK, `<!doctype html><html><head><script>window.Yext = (function(Yext){Yext.Profile = {"meta":{"id":"611"},"name":"Albertsons","address":{"city":"Texarkana","line1":"3710 State Line Ave","postalCode":"71854","region":"AR"}}; return Yext;})(window.Yext || {});</script></head><body></body></html>`), nil 121 + default: 122 + return responseWithBody(http.StatusNotFound, "not found"), nil 123 + } 124 + }), 125 + } 126 + 127 + if err := albertsons.SaveStoreURLMap(context.Background(), cacheStore, map[string]string{ 128 + "https://local.safeway.com/safeway/wa/bellevue/15100-se-38th-st.html": "safeway_1444", 129 + }); err != nil { 130 + t.Fatalf("SaveStoreURLMapEntries returned error: %v", err) 131 + } 132 + 133 + chain := albertsons.Chain{ 134 + Brand: "albertsons", 135 + DisplayName: "Albertsons", 136 + Domain: strings.TrimPrefix(baseURL, "https://"), 137 + IDPrefix: "albertsons_", 138 + } 139 + 140 + synced, err := syncChainFromSitemap(context.Background(), cacheStore, httpClient, chain, baseURL+"/sitemap.xml", 0*time.Millisecond) 141 + if err != nil { 142 + t.Fatalf("syncChainFromSitemap returned error: %v", err) 143 + } 144 + if synced != 1 { 145 + t.Fatalf("expected 1 synced summary, got %d", synced) 146 + } 147 + 148 + urlMap, err := albertsons.LoadStoreURLMap(context.Background(), cacheStore) 149 + if err != nil { 150 + t.Fatalf("LoadStoreURLMap returned error: %v", err) 151 + } 152 + if len(urlMap) != 2 { 153 + t.Fatalf("expected 2 mappings, got %d", len(urlMap)) 154 + } 155 + if got := urlMap["https://local.safeway.com/safeway/wa/bellevue/15100-se-38th-st.html"]; got != "safeway_1444" { 156 + t.Fatalf("expected safeway mapping to be preserved, got %q", got) 157 + } 158 + if got := urlMap[baseURL+"/ar/texarkana/3710-state-line-ave.html"]; got != "albertsons_611" { 159 + t.Fatalf("expected albertsons mapping to be added, got %q", got) 160 + } 161 + } 162 + 163 + type roundTripperFunc func(*http.Request) (*http.Response, error) 164 + 165 + func (f roundTripperFunc) RoundTrip(req *http.Request) (*http.Response, error) { 166 + return f(req) 167 + } 168 + 169 + func responseWithBody(status int, body string) *http.Response { 170 + return &http.Response{ 171 + StatusCode: status, 172 + Status: fmt.Sprintf("%d %s", status, http.StatusText(status)), 173 + Header: make(http.Header), 174 + Body: io.NopCloser(strings.NewReader(body)), 175 + } 176 + }
+6 -1
docs/cache-layout.md
··· 2 2 3 3 This project stores cache entries in: 4 4 - Local filesystem under `cache/` (default app cache) 5 + - Local filesystem under `albertsons/` (Albertsons-family cache) 5 6 - Local filesystem under `wholefoods/` (Whole Foods cache) 6 7 - Azure Blob container `recipes` (default app cache when `AZURE_STORAGE_ACCOUNT_NAME` is set) 8 + - Azure Blob container `albertsons` (Albertsons-family cache when `AZURE_STORAGE_ACCOUNT_NAME` is set) 7 9 - Azure Blob container `wholefoods` (Whole Foods cache when `AZURE_STORAGE_ACCOUNT_NAME` is set) 8 10 9 11 Within a given cache backend, keys with `/` become subdirectories (filesystem) or blob prefixes (Azure). ··· 22 24 | `recipe_feedback/` | JSON `RecipeFeedback` (`cooked`, `stars`, `comment`, `updated_at`) per recipe hash | `internal/recipes/feedback.go` (`SaveFeedback`) via `internal/recipes/server.go` (`handleFeedback`) | `internal/recipes/feedback.go` (`FeedbackFromCache`) and `internal/recipes/server.go` (`handleSingle`, `handleFeedback`) | 23 25 | `users/` | JSON `users/types.User` by user ID | `internal/users/storage.go` (`Update`) | `internal/users/storage.go` (`GetByID`, `List`) | 24 26 | `email2user/` | Plain text user ID keyed by normalized email | `internal/users/storage.go` (`FindOrCreateFromClerk`) | `internal/users/storage.go` (`GetByEmail`) | 27 + | `albertsons/stores/` | JSON `albertsons.StoreSummary` keyed by prefixed Albertsons-family location ID | `cmd/albertsons` and `internal/albertsons` cache helpers | `internal/albertsons` location backend | 28 + | `albertsons/store_url_map.json` | JSON object mapping store URL to prefixed Albertsons-family location ID | `cmd/albertsons` and `internal/albertsons` cache helpers | `cmd/albertsons` incremental sync | 25 29 | `wholefoods/stores/` | JSON `wholefoods.StoreSummaryResponse` keyed by Whole Foods store ID | `cmd/wholefoods` and `internal/wholefoods` cache helpers | `internal/wholefoods` location backend | 26 30 | `wholefoods/store_url_map.json` | JSON object mapping store URL to Whole Foods store ID | `cmd/wholefoods` and `internal/wholefoods` cache helpers | `cmd/wholefoods` when `-stores` is not provided | 27 31 ··· 29 33 30 34 - Cache backend selection is in `internal/cache/azure.go` (`MakeCache`). 31 35 - Most app caches use the default cache created via `cache.MakeCache()` / `cache.EnsureCache("recipes")`. 36 + - Albertsons-family locations use a separate cache created via `cache.EnsureCache("albertsons")`. 32 37 - Whole Foods uses a separate cache created via `cache.EnsureCache("wholefoods")`; it does not share the `recipes` container/directory. 33 - - Local cache paths are `recipes/` for most app data and `wholefoods/` for Whole Foods data when filesystem backend is used. 38 + - Local cache paths are `recipes/` for most app data, `albertsons/` for Albertsons-family data, and `wholefoods/` for Whole Foods data when filesystem backend is used. 34 39 - Blob names in Azure match the same key strings listed above inside their respective containers. 35 40 - Staple `ingredients/` cache keys derive from location ID, date, and a versioned backend staple signature (for example `kroger-staples-v1` or `wholefoods-staples-v1`), so Kroger and Whole Foods locations do not share staple caches and staple-definition changes can invalidate caches intentionally. 36 41 - Do not create nested keys under `recipe/<hash>` (for example `recipe/<hash>/wine`) because `FileCache` stores `recipe/<hash>` as a file path.
+109
internal/albertsons/cache.go
··· 1 + package albertsons 2 + 3 + import ( 4 + "careme/internal/cache" 5 + locationtypes "careme/internal/locations/types" 6 + "context" 7 + "encoding/json" 8 + "errors" 9 + "fmt" 10 + "log/slog" 11 + 12 + "github.com/samber/lo" 13 + lop "github.com/samber/lo/parallel" 14 + ) 15 + 16 + const ( 17 + Container = "albertsons" 18 + StoreCachePrefix = "albertsons/stores/" 19 + StoreURLMapCacheKey = "albertsons/store_url_map.json" 20 + ) 21 + 22 + func SaveStoreURLMap(ctx context.Context, c cache.Cache, urlMap map[string]string) error { 23 + raw, err := json.Marshal(urlMap) 24 + if err != nil { 25 + return fmt.Errorf("marshal store url map: %w", err) 26 + } 27 + if err := c.Put(ctx, StoreURLMapCacheKey, string(raw), cache.Unconditional()); err != nil { 28 + return fmt.Errorf("write store url map cache: %w", err) 29 + } 30 + return nil 31 + } 32 + 33 + func LoadStoreURLMap(ctx context.Context, c cache.Cache) (map[string]string, error) { 34 + reader, err := c.Get(ctx, StoreURLMapCacheKey) 35 + if err != nil { 36 + return nil, err 37 + } 38 + defer func() { 39 + _ = reader.Close() 40 + }() 41 + 42 + var urlMap map[string]string 43 + if err := json.NewDecoder(reader).Decode(&urlMap); err != nil { 44 + return nil, fmt.Errorf("decode store url map cache: %w", err) 45 + } 46 + return urlMap, nil 47 + } 48 + 49 + func CacheStoreSummary(ctx context.Context, c cache.Cache, summary *StoreSummary) error { 50 + if summary == nil { 51 + return errors.New("store summary is required") 52 + } 53 + 54 + raw, err := json.Marshal(summary) 55 + if err != nil { 56 + return fmt.Errorf("marshal store summary: %w", err) 57 + } 58 + 59 + if err := c.Put(ctx, StoreCachePrefix+summary.ID, string(raw), cache.Unconditional()); err != nil { 60 + return fmt.Errorf("write store summary cache: %w", err) 61 + } 62 + return nil 63 + } 64 + 65 + func loadCachedStoreSummaries(ctx context.Context, c cache.ListCache) ([]*StoreSummary, error) { 66 + keys, err := c.List(ctx, StoreCachePrefix, "") 67 + if err != nil { 68 + return nil, fmt.Errorf("list cached store summaries: %w", err) 69 + } 70 + 71 + //expensive. Just save a smaller map of centroids 72 + summaries := lop.Map(keys, func(key string, _ int) *StoreSummary { 73 + reader, err := c.Get(ctx, StoreCachePrefix+key) 74 + if err != nil { 75 + slog.WarnContext(ctx, "failed to read cached albertsons store summary", "key", key, "error", err) 76 + return nil 77 + } 78 + defer func() { 79 + _ = reader.Close() 80 + }() 81 + 82 + var summary StoreSummary 83 + if err := json.NewDecoder(reader).Decode(&summary); err != nil { 84 + slog.WarnContext(ctx, "failed to decode cached albertsons store summary", "key", key, "error", err) 85 + return nil 86 + } 87 + return &summary 88 + }) 89 + 90 + summaries = lo.Compact(summaries) 91 + if len(summaries) == 0 { 92 + return nil, fmt.Errorf("failed to load albertsons locations") 93 + } 94 + slog.InfoContext(ctx, "loaded albertsons locations", "count", len(summaries)) 95 + 96 + return summaries, nil 97 + } 98 + 99 + func storeSummaryToLocation(summary StoreSummary) locationtypes.Location { 100 + return locationtypes.Location{ 101 + ID: summary.ID, 102 + Name: summary.Name, 103 + Address: summary.Address, 104 + State: summary.State, 105 + ZipCode: summary.ZipCode, 106 + Lat: summary.Lat, 107 + Lon: summary.Lon, 108 + } 109 + }
+35
internal/albertsons/cache_test.go
··· 1 + package albertsons 2 + 3 + import ( 4 + "careme/internal/cache" 5 + "context" 6 + "testing" 7 + ) 8 + 9 + func TestSaveStoreURLMapRoundTrip(t *testing.T) { 10 + t.Parallel() 11 + 12 + cacheStore := cache.NewInMemoryCache() 13 + urlMap := map[string]string{ 14 + "https://local.albertsons.com/ar/texarkana/3710-state-line-ave.html": "albertsons_611", 15 + "https://local.safeway.com/safeway/wa/bellevue/15100-se-38th-st.html": "safeway_1444", 16 + } 17 + 18 + if err := SaveStoreURLMap(context.Background(), cacheStore, urlMap); err != nil { 19 + t.Fatalf("SaveStoreURLMapEntries returned error: %v", err) 20 + } 21 + 22 + got, err := LoadStoreURLMap(context.Background(), cacheStore) 23 + if err != nil { 24 + t.Fatalf("LoadStoreURLMap returned error: %v", err) 25 + } 26 + if len(got) != 2 { 27 + t.Fatalf("expected 2 url mappings, got %d", len(got)) 28 + } 29 + if got["https://local.albertsons.com/ar/texarkana/3710-state-line-ave.html"] != "albertsons_611" { 30 + t.Fatalf("unexpected albertsons mapping: %+v", got) 31 + } 32 + if got["https://local.safeway.com/safeway/wa/bellevue/15100-se-38th-st.html"] != "safeway_1444" { 33 + t.Fatalf("unexpected safeway mapping: %+v", got) 34 + } 35 + }
+400
internal/albertsons/discovery.go
··· 1 + package albertsons 2 + 3 + import ( 4 + "bytes" 5 + "careme/internal/sitemapfetch" 6 + "context" 7 + "encoding/json" 8 + "fmt" 9 + "io" 10 + "net/http" 11 + "net/url" 12 + "regexp" 13 + "slices" 14 + "strconv" 15 + "strings" 16 + ) 17 + 18 + type Chain struct { 19 + Brand string 20 + DisplayName string 21 + Domain string 22 + PathPrefix string 23 + IDPrefix string 24 + } 25 + 26 + func (c Chain) SitemapURL() string { 27 + return "https://" + c.Domain + "/sitemap.xml" 28 + } 29 + 30 + /* 31 + Acme Markets: 162 locations (CT, DE, MD, NJ, NY and PA)[158][159] 32 + Albertsons: 381 locations (AZ, AR, CA, CO, ID, LA, MT, NV, NM, ND, OK, OR, TX, UT, WA and WY)[160] 33 + Albertsons Market: 23 locations (NM)[161] 34 + Amigos: 4 locations (TX)[162] 35 + Andronico's: 7 locations (CA)[163] 36 + Balducci's: 8 locations (CT, MD, NY, VA)[164] 37 + Carrs: 11 locations (AK)[165] 38 + Haggen: 15 locations (WA)[166] 39 + Jewel-Osco: 188 locations (IL, IA, and IN)[167] 40 + Kings Food Markets: 19 locations (CT, NJ, NY)[168] 41 + Lucky: 4 locations (UT)[169] 42 + Market Street: 19 locations (NM and TX)[170] 43 + Pak 'n Save: 2 locations (CA)[171] 44 + Pavilions: 27 locations (Southern California)[172] 45 + Randalls: 28[173] locations (Greater Houston and Greater Austin, TX)[174] 46 + Safeway: 914 locations (AK, AZ, CA, CO, DC, DE, HI, ID, MD, MT, NE, NV, NM, OR, SD, VA, WA, WY)[175] 47 + Shaw's: 127 locations (MA, ME, NH, RI and VT)[176] 48 + Star Market: 21 locations (MA)[177] 49 + Tom Thumb: 65[173] locations (Dallas–Fort Worth metroplex, TX)[178] 50 + United Supermarkets: 97 locations (Texas Panhandle) plus 39 United Express locations (NM and TX)[179] 51 + Vons: 194 locations (Southern California and Southern Nevada)[180] 52 + */ 53 + var defaultChains = []Chain{ 54 + { 55 + Brand: "albertsons", 56 + DisplayName: "Albertsons", 57 + Domain: "local.albertsons.com", 58 + IDPrefix: "albertsons_", 59 + }, 60 + { 61 + Brand: "shaws", 62 + DisplayName: "Shaw's", 63 + Domain: "local.shaws.com", 64 + IDPrefix: "shaws_", 65 + }, 66 + { 67 + Brand: "starmarket", 68 + DisplayName: "Star Market", 69 + Domain: "local.starmarket.com", 70 + IDPrefix: "starmarket_", 71 + }, 72 + { 73 + Brand: "safeway", 74 + DisplayName: "Safeway", 75 + Domain: "local.safeway.com", 76 + PathPrefix: "safeway", 77 + IDPrefix: "safeway_", 78 + }, 79 + { 80 + Brand: "haggen", 81 + DisplayName: "Haggen", 82 + Domain: "local.haggen.com", 83 + IDPrefix: "haggen_", 84 + }, 85 + { 86 + Brand: "acmemarkets", 87 + DisplayName: "ACME Markets", 88 + Domain: "local.acmemarkets.com", 89 + IDPrefix: "acmemarkets_", 90 + }, 91 + { 92 + Brand: "vons", 93 + DisplayName: "Vons", 94 + Domain: "local.vons.com", 95 + IDPrefix: "vons_", 96 + }, 97 + { 98 + Brand: "jewelosco", 99 + DisplayName: "Jewel-Osco", 100 + Domain: "local.jewelosco.com", 101 + IDPrefix: "jewelosco_", 102 + }, 103 + { 104 + Brand: "unitedsupermarkets", 105 + DisplayName: "United Supermarkets", 106 + Domain: "local.unitedsupermarkets.com", 107 + IDPrefix: "unitedsupermarkets_", 108 + }, 109 + { 110 + Brand: "tomthumb", 111 + DisplayName: "Tom Thumb", 112 + Domain: "local.tomthumb.com", 113 + IDPrefix: "tomthumb_", 114 + }, 115 + { 116 + Brand: "randalls", 117 + DisplayName: "Randalls", 118 + Domain: "local.randalls.com", 119 + IDPrefix: "randalls_", 120 + }, 121 + { 122 + Brand: "pavilions", 123 + DisplayName: "Pavilions", 124 + Domain: "local.pavilions.com", 125 + IDPrefix: "pavilions_", 126 + }, 127 + { 128 + Brand: "kingsfoodmarkets", 129 + DisplayName: "Kings Food Markets", 130 + Domain: "local.kingsfoodmarkets.com", 131 + IDPrefix: "kingsfoodmarkets_", 132 + }, 133 + } 134 + 135 + type StorePage struct { 136 + Chain Chain 137 + URL string 138 + State string 139 + City string 140 + AddressSlug string 141 + } 142 + 143 + type StoreSummary struct { 144 + ID string `json:"id"` 145 + Brand string `json:"brand"` 146 + Domain string `json:"domain"` 147 + StoreID string `json:"store_id"` 148 + Name string `json:"name"` 149 + Address string `json:"address"` 150 + City string `json:"city"` 151 + State string `json:"state"` 152 + ZipCode string `json:"zip_code"` 153 + URL string `json:"url"` 154 + Lat *float64 `json:"lat,omitempty"` 155 + Lon *float64 `json:"lon,omitempty"` 156 + } 157 + 158 + type yextProfile struct { 159 + ID string `json:"id"` 160 + Name string `json:"name"` 161 + Meta yextMeta `json:"meta"` 162 + Address yextAddress `json:"address"` 163 + } 164 + 165 + type yextAddress struct { 166 + City string `json:"city"` 167 + Line1 string `json:"line1"` 168 + PostalCode string `json:"postalCode"` 169 + Region string `json:"region"` 170 + } 171 + 172 + type yextMeta struct { 173 + ID string `json:"id"` 174 + } 175 + 176 + var ( 177 + yextProfilePrefix = []byte(`Yext.Profile = `) 178 + yextProfileSuffix = []byte(`; return Yext;`) 179 + geoPositionRe = regexp.MustCompile(`meta name="geo\.position" content="([0-9.-]+);([0-9.-]+)"`) 180 + ) 181 + 182 + func DefaultChains() []Chain { 183 + return slices.Clone(defaultChains) 184 + } 185 + 186 + func IsID(locationID string) bool { 187 + locationID = strings.TrimSpace(locationID) 188 + for _, chain := range defaultChains { 189 + storeID := strings.TrimPrefix(locationID, chain.IDPrefix) 190 + if storeID != "" && storeID != locationID { 191 + return true 192 + } 193 + } 194 + return false 195 + } 196 + 197 + func FetchSitemap(ctx context.Context, client *http.Client, sitemapURL string) ([]string, error) { 198 + return sitemapfetch.FetchURLs(ctx, client, sitemapURL) 199 + } 200 + 201 + func FilterStorePages(urls []string, chain Chain) []StorePage { 202 + pages := make([]StorePage, 0, len(urls)) 203 + seen := make(map[string]struct{}, len(urls)) 204 + for _, rawURL := range urls { 205 + page, ok := ParseStorePageURL(rawURL, chain) 206 + if !ok { 207 + continue 208 + } 209 + if _, exists := seen[page.URL]; exists { 210 + continue 211 + } 212 + seen[page.URL] = struct{}{} 213 + pages = append(pages, page) 214 + } 215 + return pages 216 + } 217 + 218 + // ParseStorePageURL accepts only store landing pages. 219 + // Expected path shapes are: 220 + // 221 + // /<state>/<city>/<address>.html 222 + // /<brand>/<state>/<city>/<address>.html 223 + // 224 + // The final segment must be the store address page; category/service pages like 225 + // /produce.html or /bakery.html add an extra segment and are rejected. 226 + func ParseStorePageURL(rawURL string, chain Chain) (StorePage, bool) { 227 + u, err := url.Parse(strings.TrimSpace(rawURL)) 228 + if err != nil { 229 + return StorePage{}, false 230 + } 231 + 232 + if !strings.EqualFold(u.Host, chain.Domain) { 233 + return StorePage{}, false 234 + } 235 + 236 + segments := strings.Split(strings.Trim(u.Path, "/"), "/") 237 + if len(segments) == 1 && segments[0] == "" { 238 + return StorePage{}, false 239 + } 240 + 241 + offset := 0 242 + if chain.PathPrefix != "" { 243 + if len(segments) != 4 || !strings.EqualFold(segments[0], chain.PathPrefix) { 244 + return StorePage{}, false 245 + } 246 + offset = 1 247 + } else if len(segments) != 3 { 248 + return StorePage{}, false 249 + } 250 + 251 + address := strings.TrimSuffix(segments[offset+2], ".html") 252 + if address == segments[offset+2] || address == "" { 253 + return StorePage{}, false 254 + } 255 + 256 + state := strings.TrimSpace(segments[offset]) 257 + city := strings.TrimSpace(segments[offset+1]) 258 + if state == "" || city == "" { 259 + return StorePage{}, false 260 + } 261 + 262 + return StorePage{ 263 + Chain: chain, 264 + URL: rawURL, 265 + State: strings.ToUpper(state), 266 + City: city, 267 + AddressSlug: address, 268 + }, true 269 + } 270 + 271 + func FetchStoreSummary(ctx context.Context, client *http.Client, pageURL string, chain Chain) (*StoreSummary, error) { 272 + req, err := http.NewRequestWithContext(ctx, http.MethodGet, pageURL, nil) 273 + if err != nil { 274 + return nil, fmt.Errorf("build store page request: %w", err) 275 + } 276 + req.Header.Set("User-Agent", "Mozilla/5.0") 277 + 278 + resp, err := client.Do(req) 279 + if err != nil { 280 + return nil, fmt.Errorf("get store page: %w", err) 281 + } 282 + defer func() { 283 + _ = resp.Body.Close() 284 + }() 285 + 286 + if resp.StatusCode != http.StatusOK { 287 + return nil, fmt.Errorf("get store page: status %s", resp.Status) 288 + } 289 + 290 + body, err := io.ReadAll(io.LimitReader(resp.Body, 4*1024*1024)) 291 + if err != nil { 292 + return nil, fmt.Errorf("read store page: %w", err) 293 + } 294 + 295 + return ExtractStoreSummary(pageURL, body, chain) 296 + } 297 + 298 + func ExtractStoreSummary(pageURL string, body []byte, chain Chain) (*StoreSummary, error) { 299 + page, ok := ParseStorePageURL(pageURL, chain) 300 + if !ok { 301 + return nil, fmt.Errorf("store page URL %q is invalid", pageURL) 302 + } 303 + 304 + profileJSON, err := extractProfileJSON(body) 305 + if err != nil { 306 + return nil, err 307 + } 308 + 309 + var profile yextProfile 310 + if err := json.Unmarshal(profileJSON, &profile); err != nil { 311 + return nil, fmt.Errorf("decode yext profile: %w", err) 312 + } 313 + 314 + storeID := strings.TrimSpace(profile.ID) 315 + if storeID == "" { 316 + storeID = strings.TrimSpace(profile.Meta.ID) 317 + } 318 + if storeID == "" { 319 + return nil, fmt.Errorf("store id not found in yext profile") 320 + } 321 + 322 + address := strings.TrimSpace(profile.Address.Line1) 323 + city := strings.TrimSpace(profile.Address.City) 324 + state := strings.ToUpper(strings.TrimSpace(profile.Address.Region)) 325 + zipCode := strings.TrimSpace(profile.Address.PostalCode) 326 + 327 + if address == "" { 328 + address = page.AddressSlug 329 + } 330 + if city == "" { 331 + city = page.City 332 + } 333 + if state == "" { 334 + state = page.State 335 + } 336 + 337 + // These pages often embed only the banner name ("Safeway", "Albertsons"), 338 + // so build a store-specific display name from the address when needed. 339 + name := strings.TrimSpace(profile.Name) 340 + if name == "" || strings.EqualFold(name, page.Chain.DisplayName) { 341 + switch { 342 + case address != "": 343 + name = page.Chain.DisplayName + " " + address 344 + case city != "": 345 + name = page.Chain.DisplayName + " " + city 346 + default: 347 + name = page.Chain.DisplayName 348 + } 349 + } 350 + 351 + //seems fragile? 352 + lat, lon := extractGeoPosition(body) 353 + 354 + return &StoreSummary{ 355 + ID: page.Chain.IDPrefix + storeID, 356 + Brand: page.Chain.Brand, 357 + Domain: page.Chain.Domain, 358 + StoreID: storeID, 359 + Name: name, 360 + Address: address, 361 + City: city, 362 + State: state, 363 + ZipCode: zipCode, 364 + URL: pageURL, 365 + Lat: lat, 366 + Lon: lon, 367 + }, nil 368 + } 369 + 370 + func extractProfileJSON(body []byte) ([]byte, error) { 371 + start := bytes.Index(body, yextProfilePrefix) 372 + if start < 0 { 373 + return nil, fmt.Errorf("yext profile not found") 374 + } 375 + start += len(yextProfilePrefix) 376 + 377 + end := bytes.Index(body[start:], yextProfileSuffix) 378 + if end < 0 { 379 + return nil, fmt.Errorf("yext profile terminator not found") 380 + } 381 + 382 + return body[start : start+end], nil 383 + } 384 + 385 + func extractGeoPosition(body []byte) (*float64, *float64) { 386 + matches := geoPositionRe.FindSubmatch(body) 387 + if len(matches) != 3 { 388 + return nil, nil 389 + } 390 + 391 + lat, err := strconv.ParseFloat(string(matches[1]), 64) 392 + if err != nil { 393 + return nil, nil 394 + } 395 + lon, err := strconv.ParseFloat(string(matches[2]), 64) 396 + if err != nil { 397 + return nil, nil 398 + } 399 + return &lat, &lon 400 + }
+154
internal/albertsons/discovery_test.go
··· 1 + package albertsons 2 + 3 + import ( 4 + "strings" 5 + "testing" 6 + ) 7 + 8 + func chainByBrand(t *testing.T, brand string) Chain { 9 + t.Helper() 10 + 11 + for _, chain := range DefaultChains() { 12 + if chain.Brand == brand { 13 + return chain 14 + } 15 + } 16 + t.Fatalf("brand %q not found in DefaultChains", brand) 17 + return Chain{} 18 + } 19 + 20 + func TestParseStorePageURL(t *testing.T) { 21 + t.Parallel() 22 + 23 + tests := []struct { 24 + name string 25 + chain Chain 26 + url string 27 + want bool 28 + }{ 29 + { 30 + name: "safeway store page", 31 + chain: chainByBrand(t, "safeway"), 32 + url: "https://local.safeway.com/safeway/wa/bellevue/15100-se-38th-st.html", 33 + want: true, 34 + }, 35 + { 36 + name: "albertsons store page", 37 + chain: chainByBrand(t, "albertsons"), 38 + url: "https://local.albertsons.com/ar/texarkana/3710-state-line-ave.html", 39 + want: true, 40 + }, 41 + { 42 + name: "category page", 43 + chain: chainByBrand(t, "acmemarkets"), 44 + url: "https://local.acmemarkets.com/ct/new-canaan/288-elm-st/produce.html", 45 + want: false, 46 + }, 47 + { 48 + name: "city page", 49 + chain: chainByBrand(t, "haggen"), 50 + url: "https://local.haggen.com/wa/bellingham.html", 51 + want: false, 52 + }, 53 + { 54 + name: "other brand under safeway host", 55 + chain: chainByBrand(t, "safeway"), 56 + url: "https://local.safeway.com/pak-n-save/ca/emeryville/3889-san-pablo-ave.html", 57 + want: false, 58 + }, 59 + } 60 + 61 + for _, tc := range tests { 62 + tc := tc 63 + t.Run(tc.name, func(t *testing.T) { 64 + t.Parallel() 65 + 66 + _, got := ParseStorePageURL(tc.url, tc.chain) 67 + if got != tc.want { 68 + t.Fatalf("ParseStorePageURL(%q) = %v, want %v", tc.url, got, tc.want) 69 + } 70 + }) 71 + } 72 + } 73 + 74 + func TestFilterStorePagesDeduplicatesAndSkipsNonStores(t *testing.T) { 75 + t.Parallel() 76 + 77 + safeway := chainByBrand(t, "safeway") 78 + pages := FilterStorePages([]string{ 79 + "https://local.safeway.com/safeway/wa/bellevue/15100-se-38th-st.html", 80 + "https://local.safeway.com/safeway/wa/bellevue/15100-se-38th-st.html", 81 + "https://local.safeway.com/safeway/wa/bellevue/15100-se-38th-st/produce.html", 82 + "https://local.starmarket.com/search.html", 83 + }, safeway) 84 + 85 + if len(pages) != 1 { 86 + t.Fatalf("expected 1 store page, got %d", len(pages)) 87 + } 88 + if pages[0].Chain.Brand != "safeway" { 89 + t.Fatalf("unexpected brand: %+v", pages[0]) 90 + } 91 + } 92 + 93 + func TestExtractStoreSummary(t *testing.T) { 94 + t.Parallel() 95 + 96 + pageURL := "https://local.safeway.com/safeway/wa/bellevue/15100-se-38th-st.html" 97 + html := strings.Join([]string{ 98 + `<!doctype html><html><head>`, 99 + `<meta name="geo.position" content="47.5765527;-122.1381125">`, 100 + `<script type="text/javascript">window.Yext = (function(Yext){Yext.Profile = {"id":"1444","name":"Safeway","address":{"city":"Bellevue","line1":"15100 SE 38th St","postalCode":"98006","region":"WA"}}; return Yext;})(window.Yext || {});</script>`, 101 + `</head><body></body></html>`, 102 + }, "") 103 + 104 + summary, err := ExtractStoreSummary(pageURL, []byte(html), chainByBrand(t, "safeway")) 105 + if err != nil { 106 + t.Fatalf("ExtractStoreSummary returned error: %v", err) 107 + } 108 + 109 + if summary.ID != "safeway_1444" { 110 + t.Fatalf("unexpected id: %+v", summary) 111 + } 112 + if summary.StoreID != "1444" || summary.Brand != "safeway" || summary.Domain != "local.safeway.com" { 113 + t.Fatalf("unexpected summary identifiers: %+v", summary) 114 + } 115 + if summary.Name != "Safeway 15100 SE 38th St" { 116 + t.Fatalf("unexpected name: %q", summary.Name) 117 + } 118 + if summary.Address != "15100 SE 38th St" || summary.State != "WA" || summary.ZipCode != "98006" { 119 + t.Fatalf("unexpected address fields: %+v", summary) 120 + } 121 + if summary.Lat == nil || summary.Lon == nil { 122 + t.Fatalf("expected coordinates, got %+v", summary) 123 + } 124 + } 125 + 126 + func TestExtractStoreSummaryRequiresEmbeddedStoreID(t *testing.T) { 127 + t.Parallel() 128 + 129 + pageURL := "https://local.albertsons.com/ar/texarkana/3710-state-line-ave.html" 130 + html := `<!doctype html><html><head><script>window.Yext = (function(Yext){Yext.Profile = {"name":"Albertsons","address":{"city":"Texarkana","line1":"3710 State Line Ave","postalCode":"71854","region":"AR"}}; return Yext;})(window.Yext || {});</script></head><body></body></html>` 131 + 132 + _, err := ExtractStoreSummary(pageURL, []byte(html), chainByBrand(t, "albertsons")) 133 + if err == nil { 134 + t.Fatal("expected missing store id error") 135 + } 136 + if got, want := err.Error(), "store id not found in yext profile"; got != want { 137 + t.Fatalf("unexpected error: got %q want %q", got, want) 138 + } 139 + } 140 + 141 + func TestExtractStoreSummaryFallsBackToMetaID(t *testing.T) { 142 + t.Parallel() 143 + 144 + pageURL := "https://local.albertsons.com/az/lake-havasu-city/1980-mcculloch-blvd.html" 145 + html := `<!doctype html><html><head><script>window.Yext = (function(Yext){Yext.Profile = {"name":"Albertsons","meta":{"id":"3204"},"address":{"city":"Lake Havasu City","line1":"1980 Mcculloch Blvd","postalCode":"86403","region":"AZ"}}; return Yext;})(window.Yext || {});</script></head><body></body></html>` 146 + 147 + summary, err := ExtractStoreSummary(pageURL, []byte(html), chainByBrand(t, "albertsons")) 148 + if err != nil { 149 + t.Fatalf("ExtractStoreSummary returned error: %v", err) 150 + } 151 + if summary.ID != "albertsons_3204" || summary.StoreID != "3204" { 152 + t.Fatalf("unexpected summary: %+v", summary) 153 + } 154 + }
+71
internal/albertsons/locations.go
··· 1 + package albertsons 2 + 3 + import ( 4 + "careme/internal/cache" 5 + "careme/internal/locations/nearby" 6 + locationtypes "careme/internal/locations/types" 7 + "context" 8 + "fmt" 9 + "strings" 10 + ) 11 + 12 + type centroidByZip interface { 13 + ZipCentroidByZIP(zip string) (locationtypes.ZipCentroid, bool) 14 + } 15 + 16 + type LocationBackend struct { 17 + zipLookup centroidByZip 18 + byID map[string]locationtypes.Location 19 + } 20 + 21 + func NewLocationBackend(ctx context.Context, c cache.ListCache, zipLookup centroidByZip) (*LocationBackend, error) { 22 + if c == nil { 23 + return nil, fmt.Errorf("list cache is required") 24 + } 25 + if zipLookup == nil { 26 + return nil, fmt.Errorf("zip centroid lookup is required") 27 + } 28 + 29 + summaries, err := loadCachedStoreSummaries(ctx, c) 30 + if err != nil { 31 + return nil, err 32 + } 33 + 34 + byID := make(map[string]locationtypes.Location, len(summaries)) 35 + for _, summary := range summaries { 36 + loc := storeSummaryToLocation(*summary) 37 + byID[loc.ID] = loc 38 + } 39 + 40 + return &LocationBackend{ 41 + zipLookup: zipLookup, 42 + byID: byID, 43 + }, nil 44 + } 45 + 46 + func (b *LocationBackend) IsID(locationID string) bool { 47 + return IsID(locationID) 48 + } 49 + 50 + func (b *LocationBackend) GetLocationByID(_ context.Context, locationID string) (*locationtypes.Location, error) { 51 + locationID = strings.TrimSpace(locationID) 52 + if !IsID(locationID) { 53 + return nil, fmt.Errorf("albertsons location id %q is invalid", locationID) 54 + } 55 + 56 + loc, exists := b.byID[locationID] 57 + if !exists { 58 + return nil, fmt.Errorf("albertsons location %q not found", locationID) 59 + } 60 + 61 + copy := loc 62 + return &copy, nil 63 + } 64 + 65 + func (b *LocationBackend) GetLocationsByZip(ctx context.Context, zipcode string) ([]locationtypes.Location, error) { 66 + candidates := make([]locationtypes.Location, 0, len(b.byID)) 67 + for _, loc := range b.byID { 68 + candidates = append(candidates, loc) 69 + } 70 + return nearby.FilterAndSortByZip(ctx, b.zipLookup, zipcode, candidates, nearby.MaxLocationDistanceMiles), nil 71 + }
+133
internal/albertsons/locations_test.go
··· 1 + package albertsons 2 + 3 + import ( 4 + "careme/internal/cache" 5 + locationtypes "careme/internal/locations/types" 6 + "context" 7 + "strings" 8 + "testing" 9 + ) 10 + 11 + func TestNewLocationBackendBuildsIndexAndLookup(t *testing.T) { 12 + t.Parallel() 13 + 14 + cacheStore := cache.NewInMemoryCache() 15 + if err := CacheStoreSummary(context.Background(), cacheStore, nearbySummary()); err != nil { 16 + t.Fatalf("CacheStoreSummary returned error: %v", err) 17 + } 18 + 19 + backend, err := NewLocationBackend(context.Background(), cacheStore, staticZIPLookup{ 20 + "98006": {Lat: 47.5750, Lon: -122.1400}, 21 + }) 22 + if err != nil { 23 + t.Fatalf("NewLocationBackend returned error: %v", err) 24 + } 25 + 26 + if !backend.IsID("safeway_1444") { 27 + t.Fatalf("expected safeway id to be recognized") 28 + } 29 + if !backend.IsID("albertsons_611") { 30 + t.Fatalf("expected albertsons id to be recognized") 31 + } 32 + 33 + loc, err := backend.GetLocationByID(context.Background(), "safeway_1444") 34 + if err != nil { 35 + t.Fatalf("GetLocationByID returned error: %v", err) 36 + } 37 + if loc.Name != "Safeway 15100 SE 38th St" || loc.ZipCode != "98006" { 38 + t.Fatalf("unexpected location: %+v", loc) 39 + } 40 + } 41 + 42 + func TestLocationBackendGetLocationsByZipUsesDistance(t *testing.T) { 43 + t.Parallel() 44 + 45 + cacheStore := cache.NewInMemoryCache() 46 + if err := CacheStoreSummary(context.Background(), cacheStore, nearbySummary()); err != nil { 47 + t.Fatalf("cache nearby summary: %v", err) 48 + } 49 + if err := CacheStoreSummary(context.Background(), cacheStore, farSummary()); err != nil { 50 + t.Fatalf("cache far summary: %v", err) 51 + } 52 + 53 + backend, err := NewLocationBackend(context.Background(), cacheStore, staticZIPLookup{ 54 + "98006": {Lat: 47.5750, Lon: -122.1400}, 55 + }) 56 + if err != nil { 57 + t.Fatalf("NewLocationBackend returned error: %v", err) 58 + } 59 + 60 + locs, err := backend.GetLocationsByZip(context.Background(), "98006") 61 + if err != nil { 62 + t.Fatalf("GetLocationsByZip returned error: %v", err) 63 + } 64 + if len(locs) != 1 { 65 + t.Fatalf("expected 1 nearby location, got %d", len(locs)) 66 + } 67 + if locs[0].ID != "safeway_1444" { 68 + t.Fatalf("unexpected location id: %q", locs[0].ID) 69 + } 70 + } 71 + 72 + func TestNewLocationBackendErrorsWhenNoCachedSummaries(t *testing.T) { 73 + t.Parallel() 74 + 75 + cacheStore := cache.NewInMemoryCache() 76 + 77 + _, err := NewLocationBackend(context.Background(), cacheStore, staticZIPLookup{}) 78 + if err == nil { 79 + t.Fatal("expected NewLocationBackend to return an error") 80 + } 81 + if !strings.Contains(err.Error(), "failed to load albertsons locations") { 82 + t.Fatalf("expected missing summaries error, got %v", err) 83 + } 84 + } 85 + 86 + type staticZIPLookup map[string]coords 87 + 88 + type coords struct { 89 + Lat float64 90 + Lon float64 91 + } 92 + 93 + func (s staticZIPLookup) ZipCentroidByZIP(zip string) (locationtypes.ZipCentroid, bool) { 94 + coord, ok := s[zip] 95 + if !ok { 96 + return locationtypes.ZipCentroid{}, false 97 + } 98 + return locationtypes.ZipCentroid{Lat: coord.Lat, Lon: coord.Lon}, true 99 + } 100 + 101 + func nearbySummary() *StoreSummary { 102 + lat := 47.5765527 103 + lon := -122.1381125 104 + return &StoreSummary{ 105 + ID: "safeway_1444", 106 + Brand: "safeway", 107 + Domain: "local.safeway.com", 108 + StoreID: "1444", 109 + Name: "Safeway 15100 SE 38th St", 110 + Address: "15100 SE 38th St", 111 + State: "WA", 112 + ZipCode: "98006", 113 + Lat: &lat, 114 + Lon: &lon, 115 + } 116 + } 117 + 118 + func farSummary() *StoreSummary { 119 + lat := 33.4593747 120 + lon := -94.0419186 121 + return &StoreSummary{ 122 + ID: "albertsons_611", 123 + Brand: "albertsons", 124 + Domain: "local.albertsons.com", 125 + StoreID: "611", 126 + Name: "Albertsons 3710 State Line Ave", 127 + Address: "3710 State Line Ave", 128 + State: "AR", 129 + ZipCode: "71854", 130 + Lat: &lat, 131 + Lon: &lon, 132 + } 133 + }
+2 -1
internal/cache/azure.go
··· 5 5 "fmt" 6 6 "io" 7 7 "log" 8 + "log/slog" 8 9 "os" 9 10 "strings" 10 11 ··· 140 141 func EnsureCache(container string) (ListCache, error) { 141 142 _, ok := os.LookupEnv("AZURE_STORAGE_ACCOUNT_NAME") 142 143 if ok { 143 - log.Println("Using Azure Blob Storage for cache") 144 + slog.Info("Using Azure Blob Storage for cache", "container", container) 144 145 return NewBlobCache(container) 145 146 } 146 147 return NewFileCache(container), nil
+12
internal/config/config.go
··· 12 12 Kroger KrogerConfig `json:"kroger"` 13 13 Walmart WalmartConfig `json:"walmart"` 14 14 WholeFoods WholeFoodsConfig `json:"wholefoods"` 15 + Albertsons AlbertsonsConfig `json:"albertsons"` 15 16 Mocks MockConfig `json:"mocks"` 16 17 Clerk ClerkConfig `json:"clerk"` 17 18 Admin AdminConfig `json:"admin"` ··· 51 52 } 52 53 53 54 func (c *WholeFoodsConfig) IsEnabled() bool { 55 + return c.Enable 56 + } 57 + 58 + type AlbertsonsConfig struct { 59 + Enable bool `json:"enable"` 60 + } 61 + 62 + func (c *AlbertsonsConfig) IsEnabled() bool { 54 63 return c.Enable 55 64 } 56 65 ··· 110 119 }, 111 120 WholeFoods: WholeFoodsConfig{ 112 121 Enable: os.Getenv("WHOLEFOODS_ENABLE") != "", 122 + }, 123 + Albertsons: AlbertsonsConfig{ 124 + Enable: os.Getenv("ALBERTSONS_ENABLE") != "", 113 125 }, 114 126 Walmart: WalmartConfig{ 115 127 ConsumerID: os.Getenv("WALMART_CONSUMER_ID"),
+73
internal/locations/albertsons_test.go
··· 1 + package locations 2 + 3 + import ( 4 + "careme/internal/albertsons" 5 + "careme/internal/cache" 6 + "careme/internal/config" 7 + "context" 8 + "os" 9 + "testing" 10 + ) 11 + 12 + func TestNewAddsAlbertsonsBackendWhenEnabled(t *testing.T) { 13 + cacheStore := cache.NewInMemoryCache() 14 + oldWD, err := os.Getwd() 15 + if err != nil { 16 + t.Fatalf("Getwd returned error: %v", err) 17 + } 18 + tempDir := t.TempDir() 19 + if err := os.Chdir(tempDir); err != nil { 20 + t.Fatalf("Chdir returned error: %v", err) 21 + } 22 + t.Cleanup(func() { 23 + _ = os.Chdir(oldWD) 24 + }) 25 + 26 + unsetEnvForTest(t, "AZURE_STORAGE_ACCOUNT_NAME") 27 + unsetEnvForTest(t, "AZURE_STORAGE_PRIMARY_ACCOUNT_KEY") 28 + 29 + listCache, err := cache.EnsureCache("albertsons") 30 + if err != nil { 31 + t.Fatalf("EnsureCache returned error: %v", err) 32 + } 33 + 34 + lat := 47.5765527 35 + lon := -122.1381125 36 + if err := albertsons.CacheStoreSummary(context.Background(), listCache, &albertsons.StoreSummary{ 37 + ID: "safeway_1444", 38 + Brand: "safeway", 39 + Domain: "local.safeway.com", 40 + StoreID: "1444", 41 + Name: "Safeway 15100 SE 38th St", 42 + Address: "15100 SE 38th St", 43 + State: "WA", 44 + ZipCode: "98006", 45 + Lat: &lat, 46 + Lon: &lon, 47 + }); err != nil { 48 + t.Fatalf("CacheStoreSummary returned error: %v", err) 49 + } 50 + 51 + storage, err := New(&config.Config{ 52 + Albertsons: config.AlbertsonsConfig{Enable: true}, 53 + }, cacheStore, LoadCentroids()) 54 + if err != nil { 55 + t.Fatalf("New returned error: %v", err) 56 + } 57 + 58 + locStorage, ok := storage.(*locationStorage) 59 + if !ok { 60 + t.Fatalf("expected *locationStorage, got %T", storage) 61 + } 62 + 63 + var found bool 64 + for _, backend := range locStorage.client { 65 + if _, ok := backend.(*albertsons.LocationBackend); ok { 66 + found = true 67 + break 68 + } 69 + } 70 + if !found { 71 + t.Fatalf("expected Albertsons backend to be registered") 72 + } 73 + }
+14
internal/locations/locations.go
··· 1 1 package locations 2 2 3 3 import ( 4 + "careme/internal/albertsons" 4 5 "careme/internal/auth" 5 6 "careme/internal/cache" 6 7 "careme/internal/config" ··· 104 105 return nil, fmt.Errorf("failed to create Whole Foods backend: %w", err) 105 106 } 106 107 backends = append(backends, wfBackend) 108 + } 109 + if cfg.Albertsons.IsEnabled() { 110 + slog.Info("initializing Albertsons location backend") 111 + listCache, err := cache.EnsureCache(albertsons.Container) 112 + if err != nil { 113 + return nil, fmt.Errorf("failed to create Albertsons list cache: %w", err) 114 + } 115 + 116 + albertsonsBackend, err := albertsons.NewLocationBackend(context.Background(), listCache, centroids) 117 + if err != nil { 118 + return nil, fmt.Errorf("failed to create Albertsons backend: %w", err) 119 + } 120 + backends = append(backends, albertsonsBackend) 107 121 } 108 122 return &locationStorage{ 109 123 client: backends,
+52
internal/locations/nearby/nearby.go
··· 1 + package nearby 2 + 3 + import ( 4 + "careme/internal/locations/geo" 5 + locationtypes "careme/internal/locations/types" 6 + "context" 7 + "log/slog" 8 + "sort" 9 + "strings" 10 + ) 11 + 12 + type CentroidLookup interface { 13 + ZipCentroidByZIP(zip string) (locationtypes.ZipCentroid, bool) 14 + } 15 + 16 + const MaxLocationDistanceMiles = 20.0 17 + 18 + func FilterAndSortByZip(ctx context.Context, zipLookup CentroidLookup, zipcode string, candidates []locationtypes.Location, maxDistanceMiles float64) []locationtypes.Location { 19 + centroid, ok := zipLookup.ZipCentroidByZIP(strings.TrimSpace(zipcode)) 20 + if !ok { 21 + slog.WarnContext(ctx, "requested zip has no centroid; returning unsorted locations without distance filter", "zip", zipcode) 22 + return nil 23 + } 24 + 25 + type ranked struct { 26 + location locationtypes.Location 27 + distance float64 28 + } 29 + 30 + var rankedLocations []ranked 31 + for _, loc := range candidates { 32 + if loc.Lat == nil || loc.Lon == nil { 33 + continue 34 + } 35 + 36 + distance := geo.HaversineMiles(centroid.Lat, centroid.Lon, *loc.Lat, *loc.Lon) 37 + if distance > maxDistanceMiles { 38 + continue 39 + } 40 + rankedLocations = append(rankedLocations, ranked{location: loc, distance: distance}) 41 + } 42 + 43 + sort.SliceStable(rankedLocations, func(i, j int) bool { 44 + return rankedLocations[i].distance < rankedLocations[j].distance 45 + }) 46 + 47 + out := make([]locationtypes.Location, 0, len(rankedLocations)) 48 + for _, item := range rankedLocations { 49 + out = append(out, item.location) 50 + } 51 + return out 52 + }
+50
internal/sitemapfetch/fetch.go
··· 1 + package sitemapfetch 2 + 3 + import ( 4 + "context" 5 + "encoding/xml" 6 + "fmt" 7 + "net/http" 8 + "strings" 9 + ) 10 + 11 + type urlSet struct { 12 + URLs []struct { 13 + Loc string `xml:"loc"` 14 + } `xml:"url"` 15 + } 16 + 17 + func FetchURLs(ctx context.Context, client *http.Client, sitemapURL string) ([]string, error) { 18 + req, err := http.NewRequestWithContext(ctx, http.MethodGet, sitemapURL, nil) 19 + if err != nil { 20 + return nil, fmt.Errorf("build sitemap request: %w", err) 21 + } 22 + req.Header.Set("User-Agent", "Mozilla/5.0") 23 + 24 + resp, err := client.Do(req) 25 + if err != nil { 26 + return nil, fmt.Errorf("get sitemap: %w", err) 27 + } 28 + defer func() { 29 + _ = resp.Body.Close() 30 + }() 31 + 32 + if resp.StatusCode != http.StatusOK { 33 + return nil, fmt.Errorf("get sitemap: status %s", resp.Status) 34 + } 35 + 36 + var sitemap urlSet 37 + if err := xml.NewDecoder(resp.Body).Decode(&sitemap); err != nil { 38 + return nil, fmt.Errorf("decode sitemap: %w", err) 39 + } 40 + 41 + urls := make([]string, 0, len(sitemap.URLs)) 42 + for _, item := range sitemap.URLs { 43 + loc := strings.TrimSpace(item.Loc) 44 + if loc == "" { 45 + continue 46 + } 47 + urls = append(urls, loc) 48 + } 49 + return urls, nil 50 + }
+49
internal/sitemapfetch/fetch_test.go
··· 1 + package sitemapfetch 2 + 3 + import ( 4 + "context" 5 + "fmt" 6 + "io" 7 + "net/http" 8 + "strings" 9 + "testing" 10 + ) 11 + 12 + func TestFetchURLs(t *testing.T) { 13 + t.Parallel() 14 + 15 + client := &http.Client{ 16 + Transport: roundTripperFunc(func(req *http.Request) (*http.Response, error) { 17 + if req.URL.String() != "https://example.com/sitemap.xml" { 18 + return responseWithBody(http.StatusNotFound, "not found"), nil 19 + } 20 + return responseWithBody(http.StatusOK, `<?xml version="1.0" encoding="UTF-8"?><urlset><url><loc>https://example.com/store-1</loc></url><url><loc> https://example.com/store-2 </loc></url><url><loc></loc></url></urlset>`), nil 21 + }), 22 + } 23 + 24 + got, err := FetchURLs(context.Background(), client, "https://example.com/sitemap.xml") 25 + if err != nil { 26 + t.Fatalf("FetchURLs returned error: %v", err) 27 + } 28 + if len(got) != 2 { 29 + t.Fatalf("expected 2 urls, got %d", len(got)) 30 + } 31 + if got[0] != "https://example.com/store-1" || got[1] != "https://example.com/store-2" { 32 + t.Fatalf("unexpected urls: %+v", got) 33 + } 34 + } 35 + 36 + type roundTripperFunc func(*http.Request) (*http.Response, error) 37 + 38 + func (f roundTripperFunc) RoundTrip(req *http.Request) (*http.Response, error) { 39 + return f(req) 40 + } 41 + 42 + func responseWithBody(status int, body string) *http.Response { 43 + return &http.Response{ 44 + StatusCode: status, 45 + Status: fmt.Sprintf("%d %s", status, http.StatusText(status)), 46 + Header: make(http.Header), 47 + Body: io.NopCloser(strings.NewReader(body)), 48 + } 49 + }
+2 -38
internal/wholefoods/discovery.go
··· 1 1 package wholefoods 2 2 3 3 import ( 4 + "careme/internal/sitemapfetch" 4 5 "context" 5 - "encoding/xml" 6 6 "fmt" 7 7 "io" 8 8 "net/http" 9 9 "regexp" 10 - "strings" 11 10 ) 12 11 13 - type sitemapURLSet struct { 14 - URLs []struct { 15 - Loc string `xml:"loc"` 16 - } `xml:"url"` 17 - } 18 - 19 12 var storeIDRe = regexp.MustCompile(`store-id="(\d+)"`) 20 13 21 14 func FetchSitemap(ctx context.Context, client *http.Client, sitemapURL string) ([]string, error) { 22 - req, err := http.NewRequestWithContext(ctx, http.MethodGet, sitemapURL, nil) 23 - if err != nil { 24 - return nil, fmt.Errorf("build sitemap request: %w", err) 25 - } 26 - req.Header.Set("User-Agent", "Mozilla/5.0") 27 - 28 - resp, err := client.Do(req) 29 - if err != nil { 30 - return nil, fmt.Errorf("get sitemap: %w", err) 31 - } 32 - defer func() { 33 - _ = resp.Body.Close() 34 - }() 35 - 36 - if resp.StatusCode != http.StatusOK { 37 - return nil, fmt.Errorf("get sitemap: status %s", resp.Status) 38 - } 39 - 40 - var sitemap sitemapURLSet 41 - if err := xml.NewDecoder(resp.Body).Decode(&sitemap); err != nil { 42 - return nil, fmt.Errorf("decode sitemap: %w", err) 43 - } 44 - 45 - urls := make([]string, 0, len(sitemap.URLs)) 46 - for _, item := range sitemap.URLs { 47 - if loc := strings.TrimSpace(item.Loc); loc != "" { 48 - urls = append(urls, loc) 49 - } 50 - } 51 - return urls, nil 15 + return sitemapfetch.FetchURLs(ctx, client, sitemapURL) 52 16 } 53 17 54 18 func FetchStoreIDFromPage(ctx context.Context, client *http.Client, pageURL string) (string, error) {
+4 -35
internal/wholefoods/locations.go
··· 2 2 3 3 import ( 4 4 "careme/internal/cache" 5 - "careme/internal/locations/geo" 5 + "careme/internal/locations/nearby" 6 6 locationtypes "careme/internal/locations/types" 7 7 "context" 8 8 "fmt" 9 - "log/slog" 10 - "sort" 11 9 "strings" 12 - 13 - "github.com/samber/lo" 14 10 ) 15 - 16 - const maxLocationDistanceMiles = 20.0 17 11 18 12 type centroidByZip interface { 19 13 ZipCentroidByZIP(zip string) (locationtypes.ZipCentroid, bool) ··· 71 65 } 72 66 73 67 func (b *LocationBackend) GetLocationsByZip(ctx context.Context, zipcode string) ([]locationtypes.Location, error) { 74 - centroid, ok := b.zipLookup.ZipCentroidByZIP(strings.TrimSpace(zipcode)) 75 - if !ok { 76 - slog.WarnContext(ctx, "requested zip has no centroid; returning unsorted locations without distance filter", "zip", zipcode) 77 - //fall back to sort by zip? 78 - return nil, nil 79 - } 80 - 81 - type ranked struct { 82 - location locationtypes.Location 83 - distance float64 84 - } 85 - var rankedLocations []ranked 68 + candidates := make([]locationtypes.Location, 0, len(b.byID)) 86 69 for _, loc := range b.byID { 87 - if loc.Lat == nil || loc.Lon == nil { 88 - continue 89 - } 90 - distance := geo.HaversineMiles(centroid.Lat, centroid.Lon, *loc.Lat, *loc.Lon) 91 - if distance > maxLocationDistanceMiles { 92 - continue 93 - } 94 - rankedLocations = append(rankedLocations, ranked{location: loc, distance: distance}) 70 + candidates = append(candidates, loc) 95 71 } 96 - 97 - sort.SliceStable(rankedLocations, func(i, j int) bool { 98 - return rankedLocations[i].distance < rankedLocations[j].distance 99 - }) 100 - 101 - return lo.Map(rankedLocations, func(r ranked, _ int) locationtypes.Location { 102 - return r.location 103 - }), nil 72 + return nearby.FilterAndSortByZip(ctx, b.zipLookup, zipcode, candidates, nearby.MaxLocationDistanceMiles), nil 104 73 } 105 74 106 75 func parseLocationID(locationID string) (string, bool) {