nice clean recipes pear.dunkirk.sh
recipes
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

feat: support french recipes

+407 -6
+39
internal/cooklang/export.go
··· 88 88 var timeRangeExportRe = regexp.MustCompile(`(?i)\b(\d+-\d+)\s*(seconds?|minutes?|mins?|hours?|hrs?|h)\b`) 89 89 var timeRe = regexp.MustCompile(`(?i)(^|[^0-9-])(\d+)\s*(seconds?|minutes?|mins?|hours?|hrs?|h)\b`) 90 90 91 + var frTimeRangeExportRe = regexp.MustCompile(`(?i)\b(\d+-\d+)\s*(secondes?|minutes?|mins?|heures?|h)\b`) 92 + var frTimeRe = regexp.MustCompile(`(?i)(^|[^0-9-])(\d+)\s*(secondes?|minutes?|mins?|heures?|h)\b`) 93 + 94 + func AnnotateTimersOnly(text string, lang string) string { 95 + rangeRe, timeReLang := timeRangeExportRe, timeRe 96 + if strings.HasPrefix(lang, "fr") { 97 + rangeRe, timeReLang = frTimeRangeExportRe, frTimeRe 98 + } 99 + 100 + annotated := rangeRe.ReplaceAllStringFunc(text, func(matchStr string) string { 101 + parts := rangeRe.FindStringSubmatch(matchStr) 102 + if len(parts) >= 3 { 103 + qty := parts[1] 104 + unit := parts[2] 105 + unit = normalizeTimeUnit(unit) 106 + return fmt.Sprintf("~{%s%%%s}", qty, unit) 107 + } 108 + return matchStr 109 + }) 110 + 111 + annotated = timeReLang.ReplaceAllStringFunc(annotated, func(matchStr string) string { 112 + parts := timeReLang.FindStringSubmatch(matchStr) 113 + if len(parts) >= 4 { 114 + leading := parts[1] 115 + qty := parts[2] 116 + unit := parts[3] 117 + unit = normalizeTimeUnit(unit) 118 + return leading + fmt.Sprintf("~{%s%%%s}", qty, unit) 119 + } 120 + return matchStr 121 + }) 122 + 123 + return annotated 124 + } 125 + 91 126 func AnnotateStepForDisplay(text string, ingredients []models.Ingredient) string { 92 127 index := buildIngredientIndex(ingredients) 93 128 annotated, _ := annotateStep(text, index) ··· 177 212 case "min", "mins": 178 213 return "minute" 179 214 case "hr", "hrs", "h": 215 + return "hour" 216 + case "seconde", "secondes": 217 + return "second" 218 + case "heure", "heures": 180 219 return "hour" 181 220 default: 182 221 return unit
+334
internal/extract/marmiton/marmiton.go
··· 1 + package marmiton 2 + 3 + import ( 4 + "fmt" 5 + "strings" 6 + 7 + "tangled.org/dunkirk.sh/pear/internal/models" 8 + 9 + "golang.org/x/net/html" 10 + ) 11 + 12 + func Extract(body string) (*models.Recipe, bool) { 13 + doc, err := html.Parse(strings.NewReader(body)) 14 + if err != nil { 15 + return nil, false 16 + } 17 + 18 + container := findByClass(doc, "recipeV2-container") 19 + if container == nil { 20 + return nil, false 21 + } 22 + 23 + recipe := &models.Recipe{ExtractionMethod: "marmiton"} 24 + 25 + if h1 := findFirstByTag(container, "h1"); h1 != "" { 26 + recipe.Name = h1 27 + } 28 + 29 + if img := findRecipeImage(container); img != "" { 30 + recipe.ImageURL = img 31 + } 32 + 33 + if yield, unit := findServings(container); yield != "" { 34 + if unit != "" { 35 + recipe.Yield = yield + " " + unit 36 + } else { 37 + recipe.Yield = yield 38 + } 39 + fmt.Sscanf(yield, "%d", &recipe.Servings) 40 + } 41 + 42 + recipe.PrepTime = findPrepTime(container) 43 + recipe.CookTime = findCookTime(container) 44 + 45 + recipe.Ingredients = extractIngredients(container) 46 + recipe.Instructions = extractInstructions(container) 47 + 48 + if recipe.Name == "" { 49 + return nil, false 50 + } 51 + 52 + return recipe, true 53 + } 54 + 55 + func extractIngredients(n *html.Node) []models.Ingredient { 56 + var ingredients []models.Ingredient 57 + var f func(*html.Node) 58 + f = func(n *html.Node) { 59 + if n.Type == html.ElementNode && hasClass(n, "card-ingredient") { 60 + ing := parseCardIngredient(n) 61 + if ing.Name != "" { 62 + ingredients = append(ingredients, ing) 63 + } 64 + return 65 + } 66 + for c := n.FirstChild; c != nil; c = c.NextSibling { 67 + f(c) 68 + } 69 + } 70 + f(n) 71 + return ingredients 72 + } 73 + 74 + func parseCardIngredient(n *html.Node) models.Ingredient { 75 + var quantity, unit, name, complement string 76 + var f func(*html.Node) 77 + f = func(n *html.Node) { 78 + if n.Type == html.ElementNode { 79 + for _, attr := range n.Attr { 80 + if attr.Key == "data-ingredientquantity" && quantity == "" { 81 + quantity = attr.Val 82 + } 83 + if attr.Key == "data-unitsingular" && unit == "" { 84 + unit = attr.Val 85 + } 86 + if attr.Key == "data-ingredientnamesingular" && name == "" { 87 + name = attr.Val 88 + } 89 + if attr.Key == "data-ingredientcomplementsingular" && complement == "" { 90 + complement = attr.Val 91 + } 92 + } 93 + } 94 + for c := n.FirstChild; c != nil; c = c.NextSibling { 95 + f(c) 96 + } 97 + } 98 + f(n) 99 + 100 + rawText := quantity 101 + if unit != "" { 102 + rawText += " " + unit 103 + } 104 + if name != "" { 105 + rawText += " " + name 106 + } 107 + if complement != "" { 108 + rawText += " " + complement 109 + } 110 + 111 + return models.Ingredient{ 112 + RawText: rawText, 113 + Quantity: quantity, 114 + Unit: unit, 115 + Name: buildIngredientName(name, complement), 116 + } 117 + } 118 + 119 + func buildIngredientName(name, complement string) string { 120 + if complement != "" { 121 + return name + " " + complement 122 + } 123 + return name 124 + } 125 + 126 + func extractInstructions(n *html.Node) []models.Instruction { 127 + var steps []models.Instruction 128 + var f func(*html.Node) 129 + f = func(n *html.Node) { 130 + if n.Type == html.ElementNode && hasClass(n, "recipe-step-list__container") { 131 + p := findFirst(n, "p") 132 + if p != nil { 133 + text := textContent(p) 134 + if text != "" { 135 + steps = append(steps, models.Instruction{Text: text}) 136 + } 137 + } 138 + return 139 + } 140 + for c := n.FirstChild; c != nil; c = c.NextSibling { 141 + f(c) 142 + } 143 + } 144 + f(n) 145 + return steps 146 + } 147 + 148 + func findServings(n *html.Node) (string, string) { 149 + counter := findByClass(n, "mrtn-recette_ingredients-counter") 150 + if counter == nil { 151 + return "", "" 152 + } 153 + var nb, unit string 154 + for _, attr := range counter.Attr { 155 + if attr.Key == "data-servingsnb" { 156 + nb = attr.Val 157 + } 158 + if attr.Key == "data-servingsunit" { 159 + unit = attr.Val 160 + } 161 + } 162 + return nb, unit 163 + } 164 + 165 + func findPrepTime(n *html.Node) string { 166 + _, _, prep, cook := findTimes(n) 167 + _ = cook 168 + return prep 169 + } 170 + 171 + func findCookTime(n *html.Node) string { 172 + _, _, _, cook := findTimes(n) 173 + return cook 174 + } 175 + 176 + func findTimes(n *html.Node) (total, rest, prep, cook string) { 177 + container := findByClass(n, "recipe-preparation__time") 178 + if container == nil { 179 + return 180 + } 181 + if t := findByClass(container, "time__total"); t != nil { 182 + total = textContent(t) 183 + } 184 + details := findByClass(container, "time__details") 185 + if details == nil { 186 + return 187 + } 188 + for c := details.FirstChild; c != nil; c = c.NextSibling { 189 + if c.Type != html.ElementNode || c.Data != "div" { 190 + continue 191 + } 192 + children := getChildElements(c) 193 + if len(children) == 2 { 194 + label := strings.ToLower(textContent(children[0])) 195 + value := textContent(children[1]) 196 + if strings.Contains(label, "préparation") || strings.Contains(label, "prep") { 197 + prep = value 198 + } else if strings.Contains(label, "cuisson") || strings.Contains(label, "cook") { 199 + cook = value 200 + } else if strings.Contains(label, "repos") || strings.Contains(label, "rest") { 201 + rest = value 202 + } 203 + } 204 + } 205 + return 206 + } 207 + 208 + func getChildElements(n *html.Node) []*html.Node { 209 + var children []*html.Node 210 + for c := n.FirstChild; c != nil; c = c.NextSibling { 211 + if c.Type == html.ElementNode { 212 + children = append(children, c) 213 + } 214 + } 215 + return children 216 + } 217 + 218 + func findRecipeImage(n *html.Node) string { 219 + viewer := findByClass(n, "recipe-media-viewer") 220 + if viewer == nil { 221 + return "" 222 + } 223 + var best string 224 + bestScore := 0 225 + var f func(*html.Node) 226 + f = func(n *html.Node) { 227 + if n.Type == html.ElementNode && n.Data == "img" { 228 + src := getAttr(n, "data-src") 229 + if src == "" { 230 + src = getAttr(n, "src") 231 + } 232 + if src == "" || strings.Contains(src, "lazyload") || strings.Contains(src, "w40h40") || strings.Contains(src, "w79h79") || strings.Contains(src, "w157h157") || strings.Contains(src, "w75h75") || strings.Contains(src, "w150h150") { 233 + for c := n.FirstChild; c != nil; c = c.NextSibling { 234 + f(c) 235 + } 236 + return 237 + } 238 + score := 1 239 + if strings.Contains(src, "_origin") { 240 + score = 100 241 + } else if strings.Contains(src, "_w1024") { 242 + score = 90 243 + } else if strings.Contains(src, "_w648") { 244 + score = 80 245 + } else if strings.Contains(src, "_w324") { 246 + score = 70 247 + } else if strings.Contains(src, "_w300") { 248 + score = 60 249 + } 250 + if score > bestScore { 251 + bestScore = score 252 + best = src 253 + } 254 + } 255 + for c := n.FirstChild; c != nil; c = c.NextSibling { 256 + f(c) 257 + } 258 + } 259 + f(viewer) 260 + return best 261 + } 262 + 263 + func findByClass(n *html.Node, class string) *html.Node { 264 + if hasClass(n, class) { 265 + return n 266 + } 267 + for c := n.FirstChild; c != nil; c = c.NextSibling { 268 + if found := findByClass(c, class); found != nil { 269 + return found 270 + } 271 + } 272 + return nil 273 + } 274 + 275 + func hasClass(n *html.Node, class string) bool { 276 + for _, attr := range n.Attr { 277 + if attr.Key == "class" { 278 + for _, c := range strings.Fields(attr.Val) { 279 + if c == class { 280 + return true 281 + } 282 + } 283 + } 284 + } 285 + return false 286 + } 287 + 288 + func findTextByClass(n *html.Node, class string) string { 289 + found := findByClass(n, class) 290 + if found == nil { 291 + return "" 292 + } 293 + return textContent(found) 294 + } 295 + 296 + func findFirstByTag(n *html.Node, tag string) string { 297 + found := findFirst(n, tag) 298 + if found == nil { 299 + return "" 300 + } 301 + return textContent(found) 302 + } 303 + 304 + func findFirst(n *html.Node, tag string) *html.Node { 305 + if n.Type == html.ElementNode && n.Data == tag { 306 + return n 307 + } 308 + for c := n.FirstChild; c != nil; c = c.NextSibling { 309 + if found := findFirst(c, tag); found != nil { 310 + return found 311 + } 312 + } 313 + return nil 314 + } 315 + 316 + func textContent(n *html.Node) string { 317 + if n.Type == html.TextNode { 318 + return n.Data 319 + } 320 + var sb strings.Builder 321 + for c := n.FirstChild; c != nil; c = c.NextSibling { 322 + sb.WriteString(textContent(c)) 323 + } 324 + return strings.TrimSpace(sb.String()) 325 + } 326 + 327 + func getAttr(n *html.Node, key string) string { 328 + for _, attr := range n.Attr { 329 + if attr.Key == key { 330 + return attr.Val 331 + } 332 + } 333 + return "" 334 + }
+24
internal/extract/pipeline.go
··· 7 7 "io" 8 8 "net/http" 9 9 "os" 10 + "regexp" 10 11 "strings" 11 12 "time" 12 13 13 14 "tangled.org/dunkirk.sh/pear/internal/extract/hrecipe" 15 + "tangled.org/dunkirk.sh/pear/internal/extract/marmiton" 14 16 "tangled.org/dunkirk.sh/pear/internal/extract/schema" 15 17 "tangled.org/dunkirk.sh/pear/internal/extract/wprm" 16 18 "tangled.org/dunkirk.sh/pear/internal/models" 17 19 ) 20 + 21 + var htmlLangRe = regexp.MustCompile(`(?i)<html[^>]*\blang=["']([a-zA-Z-]+)`) 18 22 19 23 type Pipeline struct { 20 24 client *http.Client ··· 58 62 } 59 63 } 60 64 65 + lang := detectLanguage(body) 66 + 67 + if recipe, ok := marmiton.Extract(body); ok { 68 + recipe.SourceURL = targetURL 69 + recipe.SourceDomain = domainOf(targetURL) 70 + recipe.Language = lang 71 + return &Result{Recipe: recipe} 72 + } 73 + 61 74 if recipe, ok := wprm.Extract(body); ok { 62 75 recipe.SourceURL = targetURL 63 76 recipe.SourceDomain = domainOf(targetURL) 77 + recipe.Language = lang 64 78 return &Result{Recipe: recipe} 65 79 } 66 80 67 81 if recipe, ok := schema.Extract(body); ok { 68 82 recipe.SourceURL = targetURL 69 83 recipe.SourceDomain = domainOf(targetURL) 84 + recipe.Language = lang 70 85 return &Result{Recipe: recipe} 71 86 } 72 87 73 88 if recipe, ok := schema.ExtractMicrodata(body); ok { 74 89 recipe.SourceURL = targetURL 75 90 recipe.SourceDomain = domainOf(targetURL) 91 + recipe.Language = lang 76 92 return &Result{Recipe: recipe} 77 93 } 78 94 79 95 if recipe, ok := hrecipe.Extract(body); ok { 80 96 recipe.SourceURL = targetURL 81 97 recipe.SourceDomain = domainOf(targetURL) 98 + recipe.Language = lang 82 99 return &Result{Recipe: recipe} 83 100 } 84 101 ··· 154 171 } 155 172 156 173 return result.Solution.Response, nil 174 + } 175 + 176 + func detectLanguage(body string) string { 177 + if m := htmlLangRe.FindStringSubmatch(body); len(m) >= 2 { 178 + return strings.ToLower(m[1]) 179 + } 180 + return "" 157 181 } 158 182 159 183 func domainOf(url string) string {
+1 -1
internal/extract/schema/jsonld.go
··· 356 356 return ingredients 357 357 } 358 358 359 - var unitList = `cups?|tablespoons?|teaspoons?|tbsp|tsp|c|oz|lbs?|pounds?|grams?|g|kg|ml|liters?|l|pinch(?:es)?|dash(?:es)?|cloves?|slices?|pieces?|heads?|sprigs?|bunch(?:es)?|cans?|bottles?|packages?|sticks?|quarts?|pints?|gallons?` 359 + var unitList = `cups?|tablespoons?|teaspoons?|tbsp|tsp|c|oz|lbs?|pounds?|grams?|g|kg|ml|liters?|l|pinch(?:es)?|dash(?:es)?|cloves?|slices?|pieces?|heads?|sprigs?|bunch(?:es)?|cans?|bottles?|packages?|sticks?|quarts?|pints?|gallons?|cuill[eè]res?\s*[aà]\s+(?:soupe|café)|cl|dl|mg|cuill[eè]re\s+(?:de\s+)?(?:soupe|café)|verres?|pinc[eé]es?|sachets?|bo[iî]tes?|tranches?|morceaux?|tiges?|gousses?|botte(?:s)?|paquets?|brisures?|feuilles?` 360 360 361 361 var numPat = `(\d+\s+\d/\d+|\d+/\d+|\d+\.?\d*)` 362 362
+1
internal/models/recipe.go
··· 15 15 Servings int 16 16 Ingredients []Ingredient 17 17 Instructions []Instruction 18 + Language string 18 19 ExtractionMethod string 19 20 } 20 21
+6 -3
main.go
··· 499 499 return s[i+len(sep):] 500 500 } 501 501 502 - func renderStep(text string, ingredients []models.Ingredient) template.HTML { 503 - annotated := cooklang.AnnotateStepForDisplay(text, ingredients) 504 - return cooklang.ParseAndRender(annotated) 502 + func renderStep(text string, ingredients []models.Ingredient, lang string) template.HTML { 503 + if lang == "" || lang == "en" || strings.HasPrefix(lang, "en-") { 504 + annotated := cooklang.AnnotateStepForDisplay(text, ingredients) 505 + return cooklang.ParseAndRender(annotated) 506 + } 507 + return cooklang.ParseAndRender(cooklang.AnnotateTimersOnly(text, lang)) 505 508 } 506 509 507 510 func groupIngredients(ings []models.Ingredient) []ingredientGroup {
+2 -2
ui/templates/recipe.html
··· 1 1 {{define "recipe_page"}} 2 2 <!DOCTYPE html> 3 - <html lang="en"> 3 + <html lang="{{if .Recipe.Language}}{{.Recipe.Language}}{{else}}en{{end}}"> 4 4 <head> 5 5 <meta charset="utf-8"> 6 6 <meta name="viewport" content="width=device-width,initial-scale=1"> ··· 76 76 <h3>Instructions</h3> 77 77 <ol class="instruction-list"> 78 78 {{range .Recipe.Instructions}} 79 - <li>{{renderStep .Text $.Recipe.Ingredients}}</li> 79 + <li>{{renderStep .Text $.Recipe.Ingredients $.Recipe.Language}}</li> 80 80 {{end}} 81 81 </ol> 82 82 </div>