nice clean recipes pear.dunkirk.sh
recipes
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

feat: add better image parsing

+70 -2
+70 -2
internal/extract/schema/jsonld.go
··· 4 4 "encoding/json" 5 5 "fmt" 6 6 "regexp" 7 + "strconv" 7 8 "strings" 8 9 9 10 "tangled.org/dunkirk.sh/pare/internal/models" ··· 25 26 if recipe.Description == "" { 26 27 recipe.Description = findMetaDescription(doc) 27 28 } 29 + if ogImg := findMetaImage(doc); ogImg != "" { 30 + recipe.ImageURL = ogImg 31 + } 28 32 if recipe.ImageURL == "" || looksSmall(recipe.ImageURL) { 29 - if ogImg := findMetaImage(doc); ogImg != "" { 30 - recipe.ImageURL = ogImg 33 + if largeImg := findLargestImage(doc); largeImg != "" { 34 + recipe.ImageURL = largeImg 31 35 } 32 36 } 33 37 return recipe, true ··· 258 262 } 259 263 f(n) 260 264 return result 265 + } 266 + 267 + type imgCandidate struct { 268 + src string 269 + width int 270 + height int 271 + } 272 + 273 + func findLargestImage(n *html.Node) string { 274 + var candidates []imgCandidate 275 + var f func(*html.Node) 276 + f = func(n *html.Node) { 277 + if n.Type == html.ElementNode && n.Data == "img" { 278 + var src, dataSrc string 279 + var w, h int 280 + for _, a := range n.Attr { 281 + switch a.Key { 282 + case "src": 283 + src = a.Val 284 + case "data-lazy-src", "data-src": 285 + dataSrc = a.Val 286 + case "width": 287 + w, _ = strconv.Atoi(a.Val) 288 + case "height": 289 + h, _ = strconv.Atoi(a.Val) 290 + } 291 + } 292 + u := dataSrc 293 + if u == "" { 294 + u = src 295 + } 296 + if u == "" || strings.HasPrefix(u, "data:") { 297 + return 298 + } 299 + candidates = append(candidates, imgCandidate{src: u, width: w, height: h}) 300 + } 301 + for c := n.FirstChild; c != nil; c = c.NextSibling { 302 + f(c) 303 + } 304 + } 305 + f(n) 306 + if len(candidates) == 0 { 307 + return "" 308 + } 309 + // Pick the largest by area (width * height), preferring ones with explicit dimensions 310 + best := candidates[0] 311 + bestArea := best.width * best.height 312 + for _, c := range candidates[1:] { 313 + area := c.width * c.height 314 + if area > bestArea { 315 + best = c 316 + bestArea = area 317 + } 318 + } 319 + // If no candidate had dimensions, pick the first non-small URL 320 + if bestArea == 0 { 321 + for _, c := range candidates { 322 + if !looksSmall(c.src) { 323 + return c.src 324 + } 325 + } 326 + return candidates[0].src 327 + } 328 + return best.src 261 329 } 262 330 263 331 func extractIngredients(m map[string]interface{}) []models.Ingredient {