ai cooking
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

log erros in albertson/wholefoods scrape (#465)

* log erros in albertson scrape

* set up logger for scrapers so we can captuer errors includign parsing

* fumpt

authored by

Paul Miller and committed by
GitHub
dae7edff 82eb279e

+99 -37
+28 -16
cmd/albertsons/main.go
··· 5 5 "errors" 6 6 "flag" 7 7 "fmt" 8 - "log" 9 8 "log/slog" 10 9 "net/http" 10 + "os" 11 11 "strings" 12 12 "time" 13 13 ··· 18 18 ) 19 19 20 20 func main() { 21 + ctx := context.Background() 22 + closeLogger, err := logsetup.Configure(ctx) 23 + if err != nil { 24 + slog.ErrorContext(ctx, "can't set up logger", "error", err) 25 + os.Exit(1) 26 + } 27 + 28 + if err := run(ctx); err != nil { 29 + slog.Error("albertsons scrape failed", "error", err) 30 + closeLogger() 31 + os.Exit(1) 32 + } 33 + closeLogger() 34 + } 35 + 36 + func run(ctx context.Context) error { 21 37 var ( 22 38 brands string 23 39 timeoutSec int 24 40 delayMS int 25 41 ) 26 - 27 - flag.StringVar(&brands, "brands", "", "comma-separated brand keys to sync (default: all configured chains)") 28 - flag.IntVar(&timeoutSec, "timeout", 20, "HTTP timeout in seconds") 29 - flag.IntVar(&delayMS, "delay-ms", 1000, "delay between store page requests in milliseconds") 30 - flag.Parse() 31 - 32 - ctx := context.Background() 33 - closeLogger, err := logsetup.Configure(ctx) 34 - if err != nil { 35 - log.Fatalf("failed to configure logging: %v", err) 42 + fs := flag.NewFlagSet("alberrtsons", flag.ContinueOnError) 43 + fs.StringVar(&brands, "brands", "", "comma-separated brand keys to sync (default: all configured chains)") 44 + fs.IntVar(&timeoutSec, "timeout", 20, "HTTP timeout in seconds") 45 + fs.IntVar(&delayMS, "delay-ms", 1000, "delay between store page requests in milliseconds") 46 + if err := fs.Parse(os.Args[1:]); err != nil { 47 + return fmt.Errorf("can't parse %s", err) 36 48 } 37 - defer closeLogger() 38 49 39 50 chains, err := selectedChains(brands) 40 51 if err != nil { 41 - log.Fatalf("failed to parse brands: %v", err) 52 + return fmt.Errorf("parse brands: %w", err) 42 53 } 43 54 44 55 cacheStore, err := cache.EnsureCache(albertsons.Container) 45 56 if err != nil { 46 - log.Fatalf("failed to create cache: %v", err) 57 + return fmt.Errorf("create cache: %w", err) 47 58 } 48 59 49 60 httpClient := &http.Client{Timeout: time.Duration(timeoutSec) * time.Second} ··· 51 62 52 63 synced, err := syncChains(ctx, cacheStore, httpClient, chains, delay) 53 64 if err != nil { 54 - log.Fatalf("failed to sync Albertsons-family store summaries: %v", err) 65 + return fmt.Errorf("sync Albertsons-family store summaries: %w", err) 55 66 } 56 67 57 - fmt.Printf("synced %d Albertsons-family store summaries\n", synced) 68 + slog.InfoContext(ctx, "synced Albertsons-family store summaries", "count", synced) 69 + return nil 58 70 } 59 71 60 72 func syncChains(ctx context.Context, cacheStore cache.ListCache, httpClient *http.Client, chains []albertsons.Chain, delay time.Duration) (int, error) {
+28 -21
cmd/wholefoods/main.go
··· 18 18 ) 19 19 20 20 func main() { 21 + ctx := context.Background() 22 + closeLogger, err := logsetup.Configure(ctx) 23 + if err != nil { 24 + log.Fatalf("failed to configure logging: %v", err) 25 + } 26 + if err := run(ctx); err != nil { 27 + slog.Error("failed abertson scrape", "error", err) 28 + closeLogger() 29 + os.Exit(1) 30 + } 31 + closeLogger() 32 + } 33 + 34 + func run(ctx context.Context) error { 21 35 var ( 22 36 baseURL string 23 37 sitemapURL string 24 38 timeoutSec int 25 39 ) 26 40 27 - flag.StringVar(&baseURL, "base-url", wholefoods.DefaultBaseURL, "Whole Foods base URL") 28 - flag.StringVar(&sitemapURL, "sitemap-url", wholefoods.DefaultStoreSitemapURL, "Whole Foods store sitemap URL") 29 - flag.IntVar(&timeoutSec, "timeout", 20, "HTTP timeout in seconds") 30 - flag.Parse() 31 - 32 - ctx := context.Background() 33 - closeLogger, err := logsetup.Configure(ctx) 34 - if err != nil { 35 - log.Fatalf("failed to configure logging: %v", err) 41 + fs := flag.NewFlagSet("alberrtsons", flag.ContinueOnError) 42 + fs.StringVar(&baseURL, "base-url", wholefoods.DefaultBaseURL, "Whole Foods base URL") 43 + fs.StringVar(&sitemapURL, "sitemap-url", wholefoods.DefaultStoreSitemapURL, "Whole Foods store sitemap URL") 44 + fs.IntVar(&timeoutSec, "timeout", 20, "HTTP timeout in seconds") 45 + if err := fs.Parse(os.Args[1:]); err != nil { 46 + return fmt.Errorf("can't parse %s", err) 36 47 } 37 - defer closeLogger() 38 48 39 49 cacheStore, err := cache.EnsureCache(wholefoods.Container) 40 50 if err != nil { 41 - slog.ErrorContext(ctx, "failed to create cache", "error", err) 42 - os.Exit(1) 51 + return err 43 52 } 44 53 45 54 httpClient := &http.Client{Timeout: time.Duration(timeoutSec) * time.Second} ··· 47 56 48 57 refs, err := resolveStoreReferences(ctx, cacheStore, httpClient, sitemapURL) 49 58 if err != nil { 50 - slog.ErrorContext(ctx, "failed to resolve store references", "error", err) 51 - os.Exit(1) 59 + return fmt.Errorf("failed to resolve store references: %w", err) 52 60 } 53 61 if len(refs) == 0 { 54 - slog.ErrorContext(ctx, "no Whole Foods store references found", "error", err) 55 - os.Exit(1) 62 + return fmt.Errorf("no Whole Foods store references found: %w", err) 56 63 } 57 64 58 65 slog.Info("syncing Whole Foods store summaries", "count", len(refs)) ··· 62 69 if err != nil { 63 70 if !errors.Is(err, wholefoods.ErrNotFound) { 64 71 slog.ErrorContext(ctx, "failed to fetch Whole Foods store summary", "store_id", ref.ID, "url", ref.URL, "error", err) 72 + // return error early? 65 73 } else { 66 74 slog.InfoContext(ctx, err.Error(), "store_id", ref.ID, "url", ref.URL) 67 75 } 68 76 continue 69 77 } 70 78 if err := wholefoods.CacheStoreSummary(ctx, cacheStore, summary); err != nil { 71 - slog.Warn("failed to cache Whole Foods store summary", "store_id", ref.ID, "error", err) 72 - continue 79 + return fmt.Errorf("faield to cache store %d, %w", summary.StoreID, err) 73 80 } 74 81 time.Sleep(5 * time.Second) // be nice to the server no rush here 75 82 synced++ 76 83 } 77 84 78 85 if err := wholefoods.RebuildLocationIndex(ctx, cacheStore, locations.LoadCentroids()); err != nil { 79 - slog.ErrorContext(ctx, "failed to rebuild Whole Foods location index", "error", err) 80 - os.Exit(1) 86 + return fmt.Errorf("failed to build index: %w", err) 81 87 } 82 88 83 - fmt.Printf("synced %d Whole Foods store summaries\n", synced) 89 + slog.InfoContext(ctx, "synced Whole Foods store summaries", "count", synced) 90 + return nil 84 91 } 85 92 86 93 func resolveStoreReferences(ctx context.Context, cacheStore cache.ListCache, httpClient *http.Client, sitemapURL string) ([]wholefoods.StoreReference, error) {
+42
deploy/cronjob-albertsons-scrape.yaml
··· 1 + apiVersion: batch/v1 2 + kind: CronJob 3 + metadata: 4 + name: albertsons-scrape 5 + labels: 6 + app: albertsons-scrape 7 + spec: 8 + schedule: "0 6 * * 0" 9 + concurrencyPolicy: Forbid 10 + successfulJobsHistoryLimit: 2 11 + failedJobsHistoryLimit: 3 12 + jobTemplate: 13 + spec: 14 + backoffLimit: 1 15 + template: 16 + metadata: 17 + labels: 18 + app: albertsons-scrape 19 + job: albertsons-scrape 20 + spec: 21 + restartPolicy: Never 22 + securityContext: 23 + runAsNonRoot: true 24 + runAsUser: 65532 25 + runAsGroup: 65532 26 + containers: 27 + - name: albertsons 28 + image: ghcr.io/paulgmiller/careme-albertsons:${IMAGE_TAG} 29 + imagePullPolicy: IfNotPresent 30 + envFrom: 31 + - secretRef: 32 + name: storage 33 + env: 34 + - name: APPLICATIONINSIGHTS_CONNECTION_STRING 35 + value: "InstrumentationKey=a532fcc7-5098-4f44-8dde-ff2f32d6a59b;IngestionEndpoint=https://westus3-1.in.applicationinsights.azure.com/;LiveEndpoint=https://westus3.livediagnostics.monitor.azure.com/;ApplicationId=fdc94780-6135-4a29-980e-ab114a402e58" 36 + resources: 37 + requests: 38 + cpu: 50m 39 + memory: 64Mi 40 + limits: 41 + cpu: 500m 42 + memory: 256Mi
+1
deploy/deploy.sh
··· 6 6 manifest_files=( 7 7 "${deploy_dir}/deploy.yaml" 8 8 "${deploy_dir}/cronjob-careme-mail.yaml" 9 + "${deploy_dir}/cronjob-albertsons-scrape.yaml" 9 10 "${deploy_dir}/cronjob-albertsons-reese84.yaml" 10 11 "${deploy_dir}/cronjob-wholefoods-scrape.yaml" 11 12 )